6
bp_download_query_genbank - script to query Genbank and retrieve records
10
bp_download_query_genbank --query "Neurospora[ORGN]" --db nucest -o Ncrassa_ESTs.fa --format fasta
12
bp_download_query_genbank --queryfile 'filewithquery' --db nucest -o Ncrassa_ESTs.fa --format fasta
18
-q --query query string OR
19
--queryfile profile file with query OR
20
--gi --gis --gifile file with list of GIs to download
24
-d --db database (nucleotide [default], nucest, protein, )
26
-o --out --outfile output file (results are displayed on screen otherwise)
27
-f --format sequence file output format (fasta by default)
28
-v --verbose debugging output
32
--maxids maximum number of IDs to retrieve in a set (100 at a time by default)
34
--maxdate maxdate for a record
35
--mindate minimum date for record
36
--datetype edat or mdat (entered or modified)
38
=head1 AUTHOR Jason Stajich
40
Jason Stajich, jason-AT-bioperl.org
48
use Bio::DB::Query::GenBank;
52
my ($queryfile,$outfile,$format,$debug,%options);
56
$options{'-maxids'} = '100';
57
$options{'-db'} = 'nucleotide'; # can be nucleotide, nucest, protein
60
'h|help' => sub { exec('perldoc', $0);
63
'v|verbose' => \$debug,
64
'f|format:s' => \$format,
65
'queryfile:s' => \$queryfile,
66
'o|out|outfile:s' => \$outfile,
67
'gi|gifile|gis:s' => \$gifile,
69
'd|db:s' => \$options{'-db'},
70
'mindate:s' => \$options{'-mindate'},
71
'maxdate:s' => \$options{'-maxdate'},
72
'reldate:s' => \$options{'-reldate'},
73
'datetype:s' => \$options{'-datetype'}, # edat or mdat
74
'maxids:i' => \$options{'-maxids'},
75
'q|query:s' => \$options{'-query'},
81
$out = Bio::SeqIO->new(-format => $format,
82
-file => ">$outfile");
84
$out = Bio::SeqIO->new(-format => $format); # write to STDOUT
88
if( $options{'-db'} eq 'protein' ) {
89
$dbh = Bio::DB::GenPept->new(-verbose => $debug);
91
$dbh = Bio::DB::GenBank->new(-verbose => $debug);
96
open( my $fh => $gifile ) || die $!;
102
my @mini_ids = splice(@ids, 0, $options{'-maxids'});
103
$query = Bio::DB::Query::GenBank->new(%options,
106
my $stream = $dbh->get_Stream_by_query($query);
107
while( my $seq = $stream->next_seq ) {
108
$out->write_seq($seq);
112
} elsif( $options{'-query'}) {
113
$query = Bio::DB::Query::GenBank->new(%options);
114
} elsif( $queryfile ) {
115
open(my $fh => $queryfile) || die $!;
116
while(<$queryfile>) {
118
$options{'-query'} .= $_;
120
$query = Bio::DB::Query::GenBank->new(%options);
123
die("no query string or gifile\n");
125
my $stream = $dbh->get_Stream_by_query($query);
126
while( my $seq = $stream->next_seq ) {
127
$out->write_seq($seq);