原始數據:
基因組的fasta文件,兩個基因組之間blast的比對座標
scaffold1 2723 9022 contig00005 238996 1
解決方法一:
#!/usr/bin/perl -w
use strict;
use warnings;
no strict "refs";
my $usage = " perl $0 mutation-vs-wild.m8 mutation.fa wild.fa \n";
unless(@ARGV == 3){
die $usage;
}
my $aln = shift ;
my $mut = shift;
my $wild = shift;
open ALN,"<",$aln or die "can't open $aln\n";
#open MUT,"<",$mut or die "can't open $mut\n";
#open WILD,"<",$wild or die "can't open $wild\n";
my %mut = build_fa($mut);
my %wild = build_fa($wild);
coden(\%mut);
coden(\%wild);
while(<ALN>){
chomp;
my ($scf,$st1,$ed1,$ctg,$st2,$ed2) = split"\t",$_;
coden_aln($scf,$st1,$ed1);
coden_aln($ctg,$st2,$ed2);
}
my %want_mut = get_coord(\%mut);
my %want_wild = get_coord(\%wild);
get_fa_seq ("mut",\%want_mut,\%mut);
get_fa_seq ("wild",\%want_wild,\%wild);
sub build_fa {
my $file = shift @_;
my %fa;
my $tit;
open FA,"<",$file or die "can't open $file\n";
while(<FA>){
chomp;
if(/>(.*)/){
$tit = $1;
}else{
$fa{$tit} .= $_;
}
}
return %fa;
}
sub coden {
my $hash = shift @_;
foreach my $key (keys %$hash){
foreach my $i (1 .. length $hash->{$key}){
${$key}[$i] = 0;
}
}
}
sub coden_aln {
my ($scf,$st,$ed) = @_;
if($st > $ed){
($st,$ed) = ($ed,$st);
}
foreach my $i ($st .. $ed){
${$scf}[$i] =1;
}
}
sub get_coord {
my %want;
my $hash = shift @_;
my $flag;
foreach my $key (keys %$hash){
my $start;
my $end;
if($key =~/scaffold/){
$flag = "mut";
}else{
$flag = "wild";
}
foreach my $i (1.. length $hash->{$key}){
if($i == 1 && ${$key}[1] == 0){
$start = 1;
}elsif(${$key}[$i] == 0 && ${$key}[$i-1] ==1){
$start = $i;
}
if($i == length $hash->{$key} && ${$key}[$i]==0 ){
$end = $i;
}elsif(${$key}[$i] == 0 && ${$key}[$i+1] == 1){
$end = $i;
}
if($start && $end && $start <= $end ){
$want{$key}{$start} = $end;
}
}
}
open MUT_CORD,">",$flag."_coord";
foreach my $key (keys %want){
foreach my $st (keys %{$want{$key}}){
my $end = $want{$key}{$st};
print MUT_CORD $key,"\t",$st,"\t",$want{$key}{$st},"\n";
}
}
close MUT_CORD;
return %want;
}
sub get_fa_seq {
my ($flag,$want,$fa) = @_;
open MUT_FA,">",$flag."_uniq.fa";
foreach my $key (keys %$fa){
foreach my $cord (keys %{$want->{$key}}){
my $start = $cord;
my $end = $want->{$key}{$cord};
my $seq =substr ($fa->{$key},$start-1,$end-$start+1);
print MUT_FA ">$key $start->$end\n$seq\n";
}
}
close MUT_FA;
}