遠程blast的Perl腳本

問題描述

之前用blast2go太慢,自己寫了個Perl腳本進行遠程blast,可以將fasta文件拆分後並行運行。最後生成XML文件可以直接導入blast2go軟件使用。速度比直接利用blast2go-basic快些。

代碼塊

#!/usr/bin/perl
# huangliangbo 2015-3-30
# 利用該腳本進行遠程blast,生成XML格式(m7)文件;
# 需要安裝XML::SAX模塊

use v5.16;
use warnings;
use Bio::Tools::Run::RemoteBlast;
use Getopt::Long;
use File::Basename;

my $basename=basename($0);
my %opt;
GetOptions(\%opt,"program:s","database:s","file:s","evalue","v:i","b:i","gi","help");

my $help=<<USAGE;
Please enter parameters:
-help           print help information
-program        string [ blastp, blastn, blastx, tblastn, tblastx ;default: blastp ]
-database       string [ swissprot, nr, nt, etc... ]
-file           string [ fasta, contains one or more fasta format sequences ]
-evalue         string [ default: '1e-5' ]
-v              Integer[ Number of database sequences to show one-line descriptions for (V); default:10 ]
-b              Integer[ Number of database sequence to show alignments for (B); default:10 ]
-gi             [T/F]  [ Show GI in deflines,default:T ]

usage:
perl $basename -p program -d database -f file [-e e-value] [-b 5] [-v 5]
USAGE

if($opt{help} or keys %opt < 3){
    say $help;
    exit;
}

my $prog 		= $opt{program};
my $db   		= $opt{database};
my $file 		= $opt{file};
my $e_val		= $opt{evalue}||'1e-5';
my $description	= $opt{v}||10;
my $alignment	= $opt{b}||10;
my $gi			= $opt{gi}||'T';

my @params = ( '-prog'      => $prog,
               '-data'      => $db,
               '-expect'    => $e_val,
               '-readmethod' => 'xml' );

my $factory = Bio::Tools::Run::RemoteBlast->new(@params);
#$factory->retrieve_parameter('FORMAT_TYPE', 'XML');

#change a query paramter
#$Bio::Tools::Run::RemoteBlast::HEADER{'ENTREZ_QUERY'} = 'Homo sapiens [ORGN]';
#$Bio::Tools::Run::RemoteBlast::HEADER{'MATRIX_NAME'} = 'PAM30';
#$Bio::Tools::Run::RemoteBlast::HEADER{'GAPCOSTS'} = '9 1';
#$Bio::Tools::Run::RemoteBlast::HEADER{'WORD_SIZE'} = '2';

#Have to request the blast with the right amount of alignments,
#$Bio::Tools::Run::RemoteBlast::HEADER{'ALIGNMENTS'} = '1000';
#$Bio::Tools::Run::RemoteBlast::HEADER{'DESCRIPTIONS'} = '1000';

#change a retrieval parameter
$Bio::Tools::Run::RemoteBlast::RETRIEVALHEADER{'DESCRIPTIONS'} = $description;
$Bio::Tools::Run::RemoteBlast::RETRIEVALHEADER{'NCBI_GI'} = $gi=~/f/i?'no':'yes';
$Bio::Tools::Run::RemoteBlast::RETRIEVALHEADER{'ALIGNMENTS'} = $alignment;
$Bio::Tools::Run::RemoteBlast::RETRIEVALHEADER{'FORMAT_TYPE'} = 'XML';

#remove a parameter
#delete $Bio::Tools::Run::RemoteBlast::HEADER{'FILTER'};

#$v is just to turn on and off the messages
#$v is just a way to either print out messages or not. So you can toggle whether you want the messages like "waiting ..."
my $v = 1;

my $str = Bio::SeqIO->new(-file=>$file , -format => 'fasta' );

my $num = 1;
while (my $input = $str->next_seq()){
    #Blast a sequence against a database:

    #Alternatively, you could  pass in a file with many
    #sequences rather than loop through sequence one at a time
    #Remove the loop starting 'while (my $input = $str->next_seq())'
    #and swap the two lines below for an example of that.
    my $r = $factory->submit_blast($input);

    print STDERR "\n$num: waiting...\n" if( $v > 0 );
    $num ++;
    while ( my @rids = $factory->each_rid ) {
        foreach my $rid ( @rids ) {
            my $rc = $factory->retrieve_blast($rid);
            if( !ref($rc) ) {
                if( $rc < 0 ) {
                    $factory->remove_rid($rid);
                }
                print STDERR "." if ( $v > 0 );
                sleep 5;
            } else {
                my $result = $rc->next_result();
                #save the output
                my $filename = $result->query_name()."\.xml";
                $factory->save_output($filename);
                `cat $filename >> blastRemoteResult.xml; rm $filename`;
                $factory->remove_rid($rid);
            }
        }
    }
}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章