1
#!/usr/local/bin/perl -w
3
# This is a working script to extract sequence ids from a blast
4
# result file and extract those sequences from a local db.
6
# This may not work if the blastdb to index is est depending on
7
# wha your machine supports
9
# $Id: blast_fetch_local.pl,v 1.1 2001/08/01 17:47:05 jason Exp $
12
use Bio::Tools::BPlite;
14
use Bio::Index::Fasta;
18
$USAGE = 'usage: blast_fetch_local.pl -b report.bls -o outseqs.fa -d /path/to/db -i /path/to/index -p pvalue';
20
my ($db, $index, $output, $blastfile,$pvalue);
27
'i|index:s' => \$index,
28
'o|output:s' => \$output,
29
'b|blast:s' => \$blastfile,
30
'p|pvalue:s' => \$pvalue);
32
die $USAGE if( ! $db || ! -r $db || ! $index || ! $output
33
|| ! $blastfile || ! -r $blastfile );
35
# skip if it is already created
36
my $indexdb = new Bio::Index::Fasta('-filename' => $index,
38
$indexdb->id_parser( \&parse_ncbi_id );
39
$indexdb->make_index($db);
43
my $seqout = new Bio::SeqIO(-file => ">$output");
44
my $indexdb = new Bio::Index::Fasta('-filename' => $index);
45
my $bplite = new Bio::Tools::BPlite( -file => $blastfile );
47
SBJCT: while( my $sbjct = $bplite->nextSbjct ) {
48
HSP: while( my $hsp = $sbjct->nextHSP ) {
49
if( $hsp->P > $pvalue) {
50
# skip Sbjcts that don't meet the minimum P value
51
print 'skipping Sbjct with a HSP with pvalue=', $hsp->P, "\n";
54
# could also accept any Subject with an HSP that has appropriate e-value
59
my ($id) = split(/\s+/, $sbjct->name);
61
my @ids = split(/\|/, $id);
63
my $seq = $indexdb->fetch($id);
65
print "could not find id '$id'\n";
67
$seqout->write_seq($seq);
73
if( $_[0] =~ /^>(\S+)/ ) {
75
my (@elements) = split(/\|/, $val);
77
my $id = shift @elements;
78
if( $id eq 'gb' || $id eq 'gi' ) {
79
$id = $id . '|' . shift @elements;