2
# BioPerl module for Bio::Tools::EUtilities
4
# Please direct questions and support issues to <bioperl-l@bioperl.org>
6
# Cared for by Chris Fields
8
# Copyright Chris Fields
10
# You may distribute this module under the same terms as perl itself
12
# POD documentation - main docs before the code
16
Bio::Tools::EUtilities - NCBI eutil XML parsers
21
my $parser = Bio::Tools::EUtilities->new(
26
# or HTTP::Response object...
27
my $parser = Bio::Tools::EUtilities->new(
29
-response => $response
31
# esearch, esummary, elink
32
@ids = $parser->get_ids(); # returns array or array ref of IDs
36
$term = $parser->get_term(); # returns array or array ref of IDs
40
$db = $parser->get_database(); # returns database
42
# Query-related methods (esearch, egquery, espell data)
43
# eutil data centered on use of search terms
45
my $ct = $parser->get_count; # uses optional database for egquery count
46
my $translation = $parser->get_count;
48
my $corrected = $parser->get_corrected_query; # espell
50
while (my $gquery = $parser->next_GlobalQuery) {
51
# iterates through egquery data
54
# Info-related methods (einfo data)
55
# database-related information
57
my $desc = $parser->get_description;
58
my $update = $parser->get_last_update;
59
my $nm = $parser->get_menu_name;
60
my $ct = $parser->get_record_count;
62
while (my $field = $parser->next_FieldInfo) {
65
while (my $field = $parser->next_LinkInfo) {
69
# History methods (epost data, some data returned from elink)
70
# data which enables one to retrieve and query against user-stored
71
# information on the NCBI server
73
while (my $cookie = $parser->next_History) {
77
my @hists = $parser->get_Histories;
79
# Bio::Tools::EUtilities::Summary (esummary data)
80
# information on a specific database record
82
# retrieve nested docsum data
83
while (my $docsum = $parser->next_DocSum) {
84
print "ID:",$docsum->get_ids,"\n";
85
while (my $item = $docsum->next_Item) {
87
while (my $listitem = $docsum->next_ListItem) {
89
while (my $listitem = $docsum->next_Structure) {
96
# retrieve flattened item list per DocSum
97
while (my $docsum = $parser->next_DocSum) {
98
my @items = $docsum->get_all_DocSum_Items;
103
Parses NCBI eutils XML output for retrieving IDs and other information. Part of
104
the BioPerl EUtilities system.
106
This is a general parser for eutils XML; data from efetch is NOT parsed (this
107
requires separate format-dependent parsers). All other XML for eutils is parsed.
108
These modules can be used independently of Bio::DB::EUtilities and
109
Bio::Tools::EUtilities::EUtilParameters; if used in this way, only data present
110
in the XML will be parsed out (other bits are retrieved from a passed-in
111
Bio::Tools::EUtilities::EUtilParameters instance used while querying the
116
This module is largely complete. However there are a few holes which will
117
eventually be filled in. TranslationSets from esearch are not currently parsed,
124
User feedback is an integral part of the
125
evolution of this and other Bioperl modules. Send
126
your comments and suggestions preferably to one
127
of the Bioperl mailing lists. Your participation
130
bioperl-l@lists.open-bio.org - General discussion
131
http://www.bioperl.org/wiki/Mailing_lists - About the mailing lists
135
Please direct usage questions or support issues to the mailing list:
137
I<bioperl-l@bioperl.org>
139
rather than to the module maintainer directly. Many experienced and
140
reponsive experts will be able look at the problem and quickly
141
address it. Please include a thorough description of the problem
142
with code and data examples if at all possible.
144
=head2 Reporting Bugs
146
Report bugs to the Bioperl bug tracking system to
147
help us keep track the bugs and their resolution.
148
Bug reports can be submitted via the web.
150
https://redmine.open-bio.org/projects/bioperl/
154
Email cjfields at bioperl dot org
158
The rest of the documentation details each of the
159
object methods. Internal methods are usually
164
# Let the code begin...
166
package Bio::Tools::EUtilities;
170
use base qw(Bio::Root::IO Bio::Tools::EUtilities::EUtilDataI);
173
=head2 Constructor methods
180
Usage : my $parser = Bio::Tools::EUtilities->new(-file => 'my.xml',
181
-eutil => 'esearch');
182
Function : create Bio::Tools::EUtilities instance
183
Returns : new Bio::Tools::EUtilities instance
184
Args : -file/-fh - File or filehandle
185
-eutil - eutil parser to use (supports all but efetch)
186
-response - HTTP::Response object (optional)
193
'esearch' => 'Query',
194
'egquery' => 'Query',
199
'esummary' => 'Summary',
203
my($caller,@args) = @_;
204
my $class = ref $caller || $caller;
205
if ($class =~ m{Bio::Tools::EUtilities::(\S+)}) {
206
my ($self) = $class->SUPER::new(@args);
207
$self->_initialize(@args);
211
@param{ map { lc $_ } keys %param } = values %param; # lowercase keys
212
my $eutil = $param{'-eutil'} || $class->throw("Need eutil to make instance");
213
return unless( $class->_load_eutil_module( $DATA_MODULE{$eutil}) );
214
return "Bio::Tools::EUtilities::$DATA_MODULE{$eutil}"->new(-datatype => lc $DATA_MODULE{$eutil},
221
my ($self, @args) = @_;
222
my ($response, $pobj, $type, $eutil, $cache, $lazy) =
223
$self->_rearrange([qw(RESPONSE
231
$self->datatype($type);
232
$self->eutil($eutil);
233
# lazy parsing only implemented for elink and esummary (where returned data
234
# can be quite long). Also, no point to parsing lazily when the data is
235
# already in memory in an HTTP::Response object, so turn it off and chunk
236
# the Response object after parsing.
237
$response && $self->response($response);
238
$pobj && $self->parameter_base($pobj);
239
$self->cache_response($cache);
240
$lazy = 0 if ($response) || ($eutil ne 'elink' && $eutil ne 'esummary');
241
# setting parser to 'lazy' mode is permanent (can't reset later)
242
$self->{'_lazy'} = $lazy;
243
$self->{'_parsed'} = 0;
248
=head1 Bio::Tools::EUtilities methods
250
=head2 cache_response
252
Title : cache_response
253
Usage : $parser->cache_response(1)
254
Function : sets flag to cache response object (off by default)
255
Returns : value eval'ing to TRUE or FALSE
256
Args : value eval'ing to TRUE or FALSE
257
Note : must be set prior to any parsing run
262
my ($self, $cache) = @_;
263
if (defined $cache) {
264
$self->{'_cache_response'} = ($cache) ? 1 : 0;
266
return $self->{'_cache_response'};
272
Usage : my $response = $parser->response;
273
Function : Get/Set HTTP::Response object
274
Returns : HTTP::Response
275
Args : HTTP::Response
276
Note : to prevent object from destruction set cache_response() to TRUE
281
my ($self, $response) = @_;
283
$self->throw('Not an HTTP::Response object') unless (ref $response && $response->isa('HTTP::Response'));
284
$self->{'_response'} = $response;
286
return $self->{'_response'};
289
=head2 parameter_base
291
Title : parameter_base
292
Usage : my $response = $parser->parameter_base;
293
Function : Get/Set Bio::ParameterBaseI object (should be Bio::Tools::EUtilities::EUtilParameters)
294
Returns : Bio::Tools::EUtilities::EUtilParameters || undef
295
Args : (optional) Bio::Tools::EUtilities::EUtilParameters
296
Note : If this object is present, it may be used as a last resort for
297
some data values if parsed XML does not contain said values (for
298
instance, database, term, IDs, etc).
303
my ($self, $pb) = @_;
305
$self->throw('Not an Bio::ParameterBaseI object') unless (ref $pb && $pb->isa('Bio::ParameterBaseI'));
306
$self->warn('Not an Bio::Tools::EUtilities::EUtilParameters object; may experience some turbulence...') unless (ref $pb && $pb->isa('Bio::Tools::EUtilities::EUtilParameters'));
307
$self->{'_parameter_base'} = $pb;
309
return $self->{'_parameter_base'};
315
Usage : if ($parser->data_parsed) {...}
316
Function : returns TRUE if data has been parsed
317
Returns : value eval'ing to TRUE or FALSE
318
Args : none (set within parser)
319
Note : mainly internal method (set in case user wants to check
320
whether parser is exhausted).
325
return shift->{'_parsed'};
331
Usage : if ($parser->is_lazy) {...}
332
Function : returns TRUE if parser is set to lazy parsing mode
333
(only affects elink/esummary)
336
Note : Permanently set in constructor. Still highly experimental.
337
Don't stare directly at happy fun ball...
342
return shift->{'_lazy'};
348
Usage : $parser->parse_data
349
Function : direct call to parse data; normally implicitly called
357
'esummary' => [qw(DocSum Item)],
360
'einfo' => [qw(Field Link)],
361
'elink' => [qw(LinkSet LinkSetDb LinkSetDbHistory IdUrlSet
362
Id IdLinkSet ObjUrl Link LinkInfo)],
363
'espell' => [qw(Original Replaced)],
364
'esearch' => [qw(Id ErrorList WarningList)],
369
my $eutil = $self->eutil;
370
my $xs = XML::Simple->new();
371
my $response = $self->response ? $self->response :
372
$self->_fh ? $self->_fh :
373
$self->throw('No response or stream specified');
374
my $simple = ($eutil eq 'espell') ?
375
$xs->XMLin($self->_fix_espell($response), forcearray => $EUTIL_DATA{$eutil}) :
376
($response && $response->isa("HTTP::Response")) ?
377
$xs->XMLin($response->content, forcearray => $EUTIL_DATA{$eutil}) :
378
$xs->XMLin($response, forcearray => $EUTIL_DATA{$eutil});
380
if ($simple->{ERROR}) {
381
my $error = $simple->{ERROR};
382
$self->throw("NCBI $eutil fatal error: ".$error) unless ref $error;
384
if ($simple->{InvalidIdList}) {
385
$self->warn("NCBI $eutil error: Invalid ID List".$simple->{InvalidIdList});
388
if ($simple->{ErrorList} || $simple->{WarningList}) {
389
my @errorlist = @{ $simple->{ErrorList} } if $simple->{ErrorList};
390
my @warninglist = @{ $simple->{WarningList} } if $simple->{WarningList};
392
for my $error (@errorlist) {
393
my $messages = join("\n\t",map {"$_ [".$error->{$_}.']'}
394
grep {!ref $error->{$_}} keys %$error);
395
$err_warn .= "Error : $messages";
397
for my $warn (@warninglist) {
398
my $messages = join("\n\t",map {"$_ [".$warn->{$_}.']'}
399
grep {!ref $warn->{$_}} keys %$warn);
400
$err_warn .= "Warnings : $messages";
403
$self->warn("NCBI $eutil Errors/Warnings:\n".$err_warn)
404
# don't return as some data may still be useful
406
delete $self->{'_response'} unless $self->cache_response;
407
$self->{'_parsed'} = 1;
408
$self->_add_data($simple);
411
# implemented only for elink/esummary, still experimental
415
my $eutil = $self->eutil;
416
my $tag = $eutil eq 'elink' ? 'LinkSet' :
417
$eutil eq 'esummary' ? 'DocSum' :
418
$self->throw("Only eutil elink/esummary use parse_chunk()");
419
my $xs = XML::Simple->new();
420
if ($self->response) {
421
$self->throw("Lazy parsing not implemented for HTTP::Response data yet");
422
delete $self->{'_response'} if !$self->cache_response && $self->data_parsed;
423
} else { # has to be a file/filehandle
425
my ($chunk, $seendoc, $line);
427
while ($line = <$fh>) {
428
next unless $seendoc || $line =~ m{^<$tag>};
431
last if $line =~ m{^</$tag>};
433
if (!defined $line) {
434
$self->{'_parsed'} = 1;
438
$xs->XMLin($chunk, forcearray => $EUTIL_DATA{$eutil}, KeepRoot => 1)
448
Usage : $foo->to_string()
449
Function : converts current object to string
451
Args : (optional) simple data for text formatting
452
Note : Implemented in plugins
458
$self->parse_data if ($self->can('parse_data') && !$self->data_parsed);
459
return sprintf("%-20s:%s\n\n", 'EUtil', $self->eutil);
465
Usage : $info->print_all();
466
$info->print_all(-fh => $fh, -cb => $coderef);
467
Function : prints (dumps) all data in parser. Unless a coderef is supplied,
468
this just dumps the parser-specific to_string method to either a
472
-file : file to print to
473
-fh : filehandle to print to (cannot be used concurrently with file)
474
-cb : coderef to use in place of default print method. This is
475
passed in the parser object
476
-wrap : number of columns to wrap default text output to (def = 80)
477
Notes : only applicable for einfo. If -file or -fh are not defined,
483
my ($self, @args) = @_;
484
$self->_print_handler(@args);
487
=head1 Bio::Tools::EUtilities::EUtilDataI methods
492
Usage : $eutil->$foo->eutil
493
Function : Get/Set eutil
495
Args : string (eutil)
496
Throws : on invalid eutil
503
Usage : $type = $foo->datatype;
504
Function : Get/Set data object type
510
=head1 Methods useful for multiple eutils
515
Usage : my @ids = $parser->get_ids
516
Function : returns array of requested IDs (see Notes for more specifics)
518
Args : [conditional] not required except when running elink queries against
519
multiple databases. In case of the latter, the database name is
520
optional but recommended when retrieving IDs as the ID list will
521
be globbed together. In such cases, if a db name isn't provided a
522
warning is issued as a reminder.
523
Notes : esearch : returned ID list
524
elink : returned ID list (see Args above for caveats)
525
all others : from parameter_base->id or undef
530
my ($self, $request) = @_;
531
my $eutil = $self->eutil;
532
if ($self->is_lazy) {
533
$self->warn('get_ids() not implemented when using lazy mode');
536
$self->parse_data unless $self->data_parsed;
537
if ($eutil eq 'esearch') {
538
return $self->{'_id'} ? @{ $self->{'_id'} } : ();
539
} elsif ($eutil eq 'elink') {
542
if (ref $request eq 'CODE') {
543
push @ids, map {$_->get_ids }
544
grep { $request->($_) } $self->get_LinkSets;
548
grep {grep { $_ eq $request } @{$_->[1]}}
549
map {[[$_->get_ids], [$_->get_databases]]} $self->get_LinkSets;
552
$self->warn('Multiple database present, IDs will be globbed together')
553
if $self->get_linked_databases > 1;
554
push @ids, map {$_->get_ids } $self->get_LinkSets;
557
} elsif ($eutil eq 'esummary') {
558
unless (exists $self->{'_id'}) {
559
push @{$self->{'_id'}}, map {$_->get_id } $self->get_DocSums;
561
return @{$self->{'_id'}};
562
} elsif (my $pb = $self->parameter_base) {
564
return $ids ? @{$ids} : ();
573
Usage : my $db = $info->get_database;
574
Function : returns single database name (eutil-compatible). This is the
575
queried database. For most eutils this is straightforward. For
576
elinks (which have 'db' and 'dbfrom') this is db/dbto, for egquery,
577
it is the first db in the list (you probably want get_databases
581
Notes : egquery : first db in the query (you probably want get_databases)
582
einfo : the queried database
583
espell : the queried database
584
all others : from parameter_base->db or undef
589
return ($_[0]->get_databases)[0];
592
=head2 get_db (alias for get_database)
597
return shift->get_database;
602
Title : get_databases
603
Usage : my @dbs = $parser->get_databases
604
Function : returns list of databases
605
Returns : array of strings
607
Notes : This is guaranteed to return a list of databases. For a single
608
database use the convenience method get_db/get_database
610
egquery : list of all databases in the query
611
einfo : the queried database, or the available databases
612
espell : the queried database
613
elink : collected from each LinkSet
614
all others : from parameter_base->db or undef
619
my ($self, $db) = @_;
620
$self->parse_data unless $self->data_parsed;
621
my $eutil = $self->eutil;
623
if ($eutil eq 'einfo' || $eutil eq 'espell') {
624
@dbs = $self->{'_dbname'} ||
625
$self->{'_database'} ||
626
$self->get_available_databases;
627
} elsif ($eutil eq 'egquery') {
628
@dbs = map {$_->get_database} ($self->get_GlobalQueries);
629
} elsif ($eutil eq 'elink') {
632
@dbs = sort grep {!$tmp{$_}++}
633
map {($_->get_databases)} $self->get_LinkSets;
634
} elsif ($self->parameter_base) {
635
if ($self->parameter_base->eutil eq 'elink') {
636
@dbs = $self->parameter_base->dbfrom;
638
@dbs = $self->parameter_base->db;
644
=head2 get_dbs (alias for get_databases)
649
return shift->get_databases;
655
Usage : while (my $hist=$parser->next_History) {...}
656
Function : returns next HistoryI (if present).
657
Returns : Bio::Tools::EUtilities::HistoryI (Cookie or LinkSet)
659
Note : esearch, epost, and elink are all capable of returning data which
660
indicates search results (in the form of UIDs) is stored on the
661
remote server. Access to this data is wrapped up in simple interface
662
(HistoryI), which is implemented in two classes:
663
Bio::DB::EUtilities::History (the simplest) and
664
Bio::DB::EUtilities::LinkSet. In general, calls to epost and esearch
665
will only return a single HistoryI object (formerly known as a
666
Cookie), but calls to elink can generate many depending on the
667
number of IDs, the correspondence, etc. Hence this iterator, which
668
allows one to retrieve said data one piece at a time.
674
$self->parse_data unless $self->data_parsed;
675
$self->{'_histories_it'} = $self->generate_iterator('histories')
676
if (!exists $self->{'_histories_it'});
677
my $hist = $self->{'_histories_it'}->();
680
=head2 next_cookie (alias for next_History)
685
return shift->next_History;
690
Title : get_Histories
691
Usage : my @hists = $parser->get_Histories
692
Function : returns list of HistoryI objects.
693
Returns : list of Bio::Tools::EUtilities::HistoryI (History or LinkSet)
700
$self->parse_data unless $self->data_parsed;
701
ref $self->{'_histories'} ? return @{ $self->{'_histories'} } : return ();
704
=head1 Query-related methods
709
Usage : my $ct = $parser->get_count
710
Function : returns the count (hits for a search)
712
Args : [CONDITIONAL] string with database name - used to retrieve
713
count from specific database when using egquery
714
Notes : egquery : count for specified database (specified above)
715
esearch : count for last search
721
my ($self, $db) = @_;
722
$self->parse_data unless $self->data_parsed;
724
if ($self->datatype eq 'multidbquery') {
726
$self->warn('Must specify database to get count from');
729
my ($gq) = grep {$_->get_database eq $db} $self->get_GlobalQueries;
730
$gq && return $gq->get_count;
731
$self->warn("Unknown database $db");
734
return $self->{'_count'} || scalar($self->get_ids);
741
Usage : $st = $qd->get_term;
742
Function : retrieve the term for the global search
745
Notes : egquery : search term
747
esearch : from parameter_base->term or undef
753
my ($self, @args) = @_;
754
$self->parse_data unless $self->data_parsed;
755
$self->{'_term'} ? $self->{'_term'} :
756
$self->{'_query'} ? $self->{'_query'} :
757
$self->parameter_base ? $self->parameter_base->term :
761
=head2 get_translation_from
763
Title : get_translation_from
764
Usage : $string = $qd->get_translation_from();
765
Function: portion of the original query replaced with translated_to()
768
Note : only applicable for esearch
772
sub get_translation_from {
774
$self->parse_data unless $self->data_parsed;
775
return $self->{'_translation'}->{'From'};
778
=head2 get_translation_to
780
Title : get_translation_to
781
Usage : $string = $qd->get_translation_to();
782
Function: replaced string used in place of the original query term in translation_from()
785
Note : only applicable for esearch
789
sub get_translation_to {
791
$self->parse_data unless $self->data_parsed;
792
return $self->{'_translation'}->{'To'};
798
Usage : $start = $qd->get_retstart();
799
Function : retstart setting for the query (either set or NCBI default)
802
Notes : esearch : retstart
804
all others : from parameter_base->retstart or undef
810
$self->parse_data unless $self->data_parsed;
811
return $self->{'_retstart'};
817
Usage : $max = $qd->get_retmax();
818
Function : retmax setting for the query (either set or NCBI default)
821
Notes : esearch : retmax
823
all others : from parameter_base->retmax or undef
829
$self->parse_data unless $self->data_parsed;
830
return $self->{'_retmax'};
833
=head2 get_query_translation
835
Title : get_query_translation
836
Usage : $string = $qd->get_query_translation();
837
Function: returns the translated query used for the search (if any)
840
Notes : only applicable for esearch. This is the actual term used for
845
sub get_query_translation {
847
$self->parse_data unless $self->data_parsed;
848
return $self->{'_querytranslation'};
851
=head2 get_corrected_query
853
Title : get_corrected_query
854
Usage : my $cor = $eutil->get_corrected_query;
855
Function : retrieves the corrected query when using espell
858
Notes : only applicable for espell.
862
sub get_corrected_query {
864
$self->parse_data unless $self->data_parsed;
865
return $self->{'_correctedquery'};
868
=head2 get_replaced_terms
870
Title : get_replaced_terms
871
Usage : my $term = $eutil->get_replaced_terms
872
Function : returns array of strings replaced in the query
875
Notes : only applicable for espell
879
sub get_replaced_terms {
881
$self->parse_data unless $self->data_parsed;
882
if ($self->{'_spelledquery'} && $self->{'_spelledquery'}->{Replaced}) {
883
ref $self->{'_spelledquery'}->{Replaced} ?
884
return @{ $self->{'_spelledquery'}->{Replaced} } : return ();
888
=head2 next_GlobalQuery
890
Title : next_GlobalQuery
891
Usage : while (my $query = $eutil->next_GlobalQuery) {...}
892
Function : iterates through the queries returned from an egquery search
893
Returns : GlobalQuery object
895
Notes : only applicable for egquery
899
sub next_GlobalQuery {
901
$self->parse_data unless $self->data_parsed;
902
$self->{'_globalqueries_it'} = $self->generate_iterator('globalqueries')
903
if (!exists $self->{'_globalqueries_it'});
904
$self->{'_globalqueries_it'}->();
907
=head2 get_GlobalQueries
909
Title : get_GlobalQueries
910
Usage : @queries = $eutil->get_GlobalQueries
911
Function : returns list of GlobalQuery objects
912
Returns : array of GlobalQuery objects
914
Notes : only applicable for egquery
918
sub get_GlobalQueries {
920
$self->parse_data unless $self->data_parsed;
921
ref $self->{'_globalqueries'} ? return @{ $self->{'_globalqueries'} } : return ();
924
=head2 print_GlobalQueries
926
Title : print_GlobalQueries
927
Usage : $docsum->print_GlobalQueries();
928
$docsum->print_GlobalQueries(-fh => $fh, -callback => $coderef);
929
Function : prints item data for all global queries. The default printing
930
method is each item per DocSum is printed with relevant values if
931
present in a simple table using Text::Wrap.
934
-file : file to print to
935
-fh : filehandle to print to (cannot be used concurrently with file)
936
-cb : coderef to use in place of default print method. This is passed
937
in a GlobalQuery object;
938
-wrap : number of columns to wrap default text output to (def = 80)
939
Notes : only applicable for esummary. If -file or -fh are not defined,
944
sub print_GlobalQueries {
945
my ($self, @args) = @_;
946
$self->_print_handler(@args, -type => 'GlobalQuery');
949
=head1 Summary-related methods
954
Usage : while (my $ds = $esum->next_DocSum) {...}
955
Function : iterate through DocSum instances
956
Returns : single Bio::Tools::EUtilities::Summary::DocSum
958
Notes : only applicable for esummary
964
if(!$self->data_parsed && !$self->is_lazy) {
967
$self->{'_docsums_it'} = $self->generate_iterator('docsums')
968
if (!exists $self->{'_docsums_it'});
969
$self->{'_docsums_it'}->();
975
Usage : my @docsums = $esum->get_DocSums
976
Function : retrieve a list of DocSum instances
977
Returns : array of Bio::Tools::EUtilities::Summary::DocSum
979
Notes : only applicable for esummary
985
if ($self->is_lazy) {
986
$self->warn('get_DocSums() not implemented when using lazy mode');
989
$self->parse_data unless $self->data_parsed;
990
return ref $self->{'_docsums'} ? @{ $self->{'_docsums'} } : return ();
995
Title : print_DocSums
996
Usage : $docsum->print_DocSums();
997
$docsum->print_DocSums(-fh => $fh, -cb => $coderef);
998
Function : prints item data for all docsums. The default data is generated
999
via DocSum::to_string
1002
-file : file to print to
1003
-fh : filehandle to print to (cannot be used concurrently with file)
1004
-cb : coderef to use in place of default print method. This is passed
1006
-wrap : number of columns to wrap default text output to (def = 80)
1007
Notes : only applicable for esummary. If -file or -fh are not defined,
1013
my ($self, @args) = @_;
1014
$self->_print_handler(@args, -type => 'DocSum');
1017
=head1 Info-related methods
1019
=head2 get_available_databases
1021
Title : get_available_databases
1022
Usage : my @dbs = $info->get_available_databases
1023
Function : returns list of available eutil-compatible database names
1024
Returns : Array of strings
1026
Notes : only applicable for einfo.
1030
sub get_available_databases {
1032
$self->parse_data unless $self->data_parsed;
1033
($self->{'_available_databases'}) ?
1034
return @{($self->{'_available_databases'})} :
1038
=head2 get_record_count
1040
Title : get_record_count
1041
Usage : my $ct = $eutil->get_record_count;
1042
Function : returns database record count
1045
Notes : only applicable for einfo.
1049
sub get_record_count {
1051
$self->parse_data unless $self->data_parsed;
1052
return $self->{'_count'}
1055
=head2 get_last_update
1057
Title : get_last_update
1058
Usage : my $time = $info->get_last_update;
1059
Function : returns string containing time/date stamp for last database update
1062
Notes : only applicable for einfo.
1066
sub get_last_update {
1068
$self->parse_data unless $self->data_parsed;
1069
return $self->{'_lastupdate'}
1072
=head2 get_menu_name
1074
Title : get_menu_name
1075
Usage : my $nm = $info->get_menu_name;
1076
Function : returns string of database menu name
1079
Notes : only applicable for einfo.
1085
$self->parse_data unless $self->data_parsed;
1086
exists $self->{'_menuname'} ? return $self->{'_menuname'} :
1087
exists $self->{'_menu'} ? return $self->{'_menu'} :
1091
=head2 get_description
1093
Title : get_description
1094
Usage : my $desc = $info->get_description;
1095
Function : returns database description
1098
Notes : only applicable for einfo.
1102
sub get_description {
1104
$self->parse_data unless $self->data_parsed;
1105
return $self->{'_description'};
1108
=head2 next_FieldInfo
1110
Title : next_FieldInfo
1111
Usage : while (my $field = $info->next_FieldInfo) {...}
1112
Function : iterate through FieldInfo objects
1113
Returns : Field object
1115
Notes : only applicable for einfo. Uses callback() for filtering if defined
1120
sub next_FieldInfo {
1122
$self->parse_data unless $self->data_parsed;
1123
$self->{'_fieldinfo_it'} = $self->generate_iterator('fieldinfo')
1124
if (!exists $self->{'_fieldinfo_it'});
1125
$self->{'_fieldinfo_it'}->();
1128
=head2 get_FieldInfo
1130
Title : get_FieldInfo
1131
Usage : my @fields = $info->get_FieldInfo;
1132
Function : returns list of FieldInfo objects
1133
Returns : array (FieldInfo objects)
1135
Notes : only applicable for einfo.
1141
$self->parse_data unless $self->data_parsed;
1142
return ref $self->{'_fieldinfo'} ? @{ $self->{'_fieldinfo'} } : return ();
1145
*get_FieldInfos = \&get_FieldInfo;
1147
=head2 next_LinkInfo
1149
Title : next_LinkInfo
1150
Usage : while (my $link = $info->next_LinkInfo) {...}
1151
Function : iterate through LinkInfo objects
1152
Returns : LinkInfo object
1154
Notes : only applicable for einfo. Uses callback() for filtering if defined
1161
$self->parse_data unless $self->data_parsed;
1162
$self->{'_linkinfo_it'} = $self->generate_iterator('linkinfo')
1163
if (!exists $self->{'_linkinfo_it'});
1164
$self->{'_linkinfo_it'}->();
1169
Title : get_LinkInfo
1170
Usage : my @links = $info->get_LinkInfo;
1171
Function : returns list of LinkInfo objects
1172
Returns : array (LinkInfo objects)
1174
Notes : only applicable for einfo.
1180
$self->parse_data unless $self->data_parsed;
1181
return ref $self->{'_linkinfo'} ? @{ $self->{'_linkinfo'} } : return ();
1184
*get_LinkInfos = \&get_LinkInfo;
1186
=head2 print_FieldInfo
1188
Title : print_FieldInfo
1189
Usage : $info->print_FieldInfo();
1190
$info->print_FieldInfo(-fh => $fh, -cb => $coderef);
1191
Function : prints link data for each FieldInfo object. The default is generated
1192
via FieldInfo::to_string
1195
-file : file to print to
1196
-fh : filehandle to print to (cannot be used concurrently with file)
1197
-cb : coderef to use in place of default print method. This is
1198
passed in a FieldInfo object
1199
-wrap : number of columns to wrap default text output to (def = 80)
1200
Notes : only applicable for einfo. If -file or -fh are not defined,
1205
sub print_FieldInfo {
1206
my ($self, @args) = @_;
1207
$self->_print_handler(@args, -type => 'FieldInfo');
1210
=head2 print_LinkInfo
1212
Title : print_LinkInfo
1213
Usage : $info->print_LinkInfo();
1214
$info->print_LinkInfo(-fh => $fh, -cb => $coderef);
1215
Function : prints link data for each LinkInfo object. The default is generated
1216
via LinkInfo::to_string
1219
-file : file to print to
1220
-fh : filehandle to print to (cannot be used concurrently with file)
1221
-cb : coderef to use in place of default print method. This is passed
1222
in a LinkInfo object
1223
-wrap : number of columns to wrap default text output to (def = 80)
1224
Notes : only applicable for einfo. If -file or -fh are not defined,
1229
sub print_LinkInfo {
1230
my ($self, @args) = @_;
1231
$self->_print_handler(@args, -type => 'LinkInfo');
1234
=head1 Bio::Tools::EUtilities::Link-related methods
1238
Title : next_LinkSet
1239
Usage : while (my $ls = $eutil->next_LinkSet {...}
1240
Function : iterate through LinkSet objects
1241
Returns : LinkSet object
1243
Notes : only applicable for elink. Uses callback() for filtering if defined
1250
#$self->parse_data unless $self->data_parsed;
1251
if(!$self->data_parsed && !$self->is_lazy) {
1254
$self->{'_linksets_it'} = $self->generate_iterator('linksets')
1255
if (!exists $self->{'_linksets_it'});
1256
$self->{'_linksets_it'}->();
1261
Title : get_LinkSets
1262
Usage : my @links = $info->get_LinkSets;
1263
Function : returns list of LinkSets objects
1264
Returns : array (LinkSet objects)
1266
Notes : only applicable for elink.
1270
# add support for retrieval of data if lazy parsing is enacted
1274
if ($self->is_lazy) {
1275
$self->warn('get_LinkSets() not implemented when using lazy mode');
1278
$self->parse_data unless $self->data_parsed;
1279
return ref $self->{'_linksets'} ? @{ $self->{'_linksets'} } : return ();
1282
=head2 print_LinkSets
1284
Title : print_LinkSets
1285
Usage : $info->print_LinkSets();
1286
$info->print_LinkSets(-fh => $fh, -cb => $coderef);
1287
Function : prints link data for each LinkSet object. The default is generated
1288
via LinkSet::to_string
1291
-file : file to print to
1292
-fh : filehandle to print to (cannot be used concurrently with file)
1293
-cb : coderef to use in place of default print method. This is passed
1295
-wrap : number of columns to wrap default text output to (def = 80)
1296
Notes : only applicable for einfo. If -file or -fh are not defined,
1301
sub print_LinkSets {
1302
my ($self, @args) = @_;
1303
$self->_print_handler(@args, -type => 'LinkSet');
1306
=head2 get_linked_databases
1308
Title : get_linked_databases
1309
Usage : my @dbs = $eutil->get_linked_databases
1310
Function : returns list of databases linked to in linksets
1311
Returns : array of databases
1313
Notes : only applicable for elink. Now defers to get_databases.
1317
sub get_linked_databases {
1319
return $self->get_databases if $self->eutil eq 'elink';
1323
=head1 Iterator- and callback-related methods
1328
my %VALID_ITERATORS = (
1329
'globalqueries' => 'globalqueries',
1330
'fieldinfo' => 'fieldinfo',
1331
'fieldinfos' => 'fieldinfo',
1332
'linkinfo' => 'linkinfo',
1333
'linkinfos' => 'linkinfo',
1334
'linksets' => 'linksets',
1335
'docsums' => 'docsums',
1336
'histories' => 'histories'
1342
Usage : $esum->rewind()
1343
$esum->rewind('recursive')
1344
Function : retrieve a list of DocSum instances
1345
Returns : array of Bio::Tools::EUtilities::Summary::DocSum
1346
Args : [optional] Scalar; string ('all') to reset all iterators, or string
1347
describing the specific main object iterator to reset. The following
1348
are recognized (case-insensitive):
1350
'all' - rewind all objects and also recursively resets nested object interators
1351
(such as LinkSets and DocSums).
1352
'globalqueries' - GlobalQuery objects
1353
'fieldinfo' or 'fieldinfos' - FieldInfo objects
1354
'linkinfo' or 'linkinfos' - LinkInfo objects in this layer
1355
'linksets' - LinkSet objects
1356
'docsums' - DocSum objects
1357
'histories' - HistoryI objects (Cookies, LinkSets)
1362
my ($self, $arg) = ($_[0], lc $_[1]);
1363
my $eutil = $self->eutil;
1364
if ($self->is_lazy) {
1365
$self->warn('rewind() not implemented yet when running in lazy mode');
1369
if (exists $VALID_ITERATORS{$arg}) {
1370
delete $self->{'_'.$arg.'_it'};
1371
} elsif ($arg eq 'all') {
1372
for my $it (values %VALID_ITERATORS){
1373
delete $self->{'_'.$it.'_it'} if
1374
exists $self->{'_'.$it.'_it'};
1375
map {$_->rewind('all')} $self->get_LinkSets;
1376
map {$_->rewind('all')} $self->get_DocSums;
1381
=head2 generate_iterator
1383
Title : generate_iterator
1384
Usage : my $coderef = $esum->generate_iterator('linkinfo')
1385
Function : generates an iterator (code reference) which iterates through
1386
the relevant object indicated by the args
1387
Returns : code reference
1388
Args : [REQUIRED] Scalar; string describing the specific object to iterate.
1389
The following are currently recognized (case-insensitive):
1392
'fieldinfo' or 'fieldinfos' (the latter sounds clumsy, but I alias it JIC)
1393
'linkinfo' or 'linkinfos' (the latter sounds clumsy, but I alias it JIC)
1398
Note : This function generates a simple coderef that one can use
1399
independently of the various next_* functions (in fact, the next_*
1400
functions use lazily created iterators generated via this method,
1401
while rewind() merely deletes them so they can be regenerated on the
1404
A callback specified using callback() will be used to filter objects
1405
for any generated iterator. This behaviour is implemented for both
1406
normal and lazy iterator types and is the default. If you don't want
1407
this, make sure to reset any previously set callbacks via
1408
reset_callback() (which just deletes the code ref). Note that setting
1409
callback() also changes the behavior of the next_* functions as the
1410
iterators are generated here (as described above); this is a feature
1413
'Lazy' iterators are considered an experimental feature and may be
1414
modified in the future. A 'lazy' iterator, which loops through and
1415
returns objects as they are created (instead of creating all data
1416
instances up front, then iterating through) is returned if the
1417
parser is set to 'lazy' mode. This mode is only present for elink
1418
and esummary output as they are the two formats parsed which can
1419
generate potentially thousands of individual objects (note efetch
1420
isn't parsed, so isn't counted). Use of rewind() with these
1421
iterators is not supported for the time being as we can't guarantee
1422
you can rewind(), as this depends on whether the data source is
1423
seek()able and thus 'rewindable'. We will add rewind() support at a
1424
later time which will work for 'seekable' data or possibly cached
1425
objects via Storable or BDB.
1429
sub generate_iterator {
1430
my ($self, $obj) = @_;
1432
$self->throw('Must provide object type to iterate');
1433
} elsif (!exists $VALID_ITERATORS{$obj}) {
1434
$self->throw("Unknown object type [$obj]");
1436
my $cb = $self->callback;
1437
if ($self->is_lazy) {
1438
my $type = $self->eutil eq 'esummary' ? '_docsums' : '_linksets';
1439
$self->{$type} = [];
1441
if (!@{$self->{$type}}) {
1442
$self->parse_chunk; # fill the queue
1444
while (my $obj = shift @{$self->{$type}}) {
1446
($cb->($obj)) ? return $obj : next;
1454
my $loc = '_'.$VALID_ITERATORS{$obj};
1455
my $index = $#{$self->{$loc}};
1458
while ($current <= $index) {
1460
if (my $d = $cb->($self->{$loc}->[$current])) {
1461
return $self->{$loc}->[$current++] }
1467
return $self->{$loc}->[$current++]
1480
Usage : $parser->callback(sub {$_[0]->get_database eq 'protein'});
1481
Function : Get/set callback code ref used to filter returned data objects
1482
Returns : code ref if previously set
1483
Args : single argument:
1484
code ref - evaluates a passed object and returns true or false value
1486
'reset' - string, resets the iterator.
1487
returns upon any other args
1492
my ($self, $cb) = @_;
1494
delete $self->{'_cb'} if ($cb eq 'reset');
1495
return if ref $cb ne 'CODE';
1496
$self->{'_cb'} = $cb;
1498
return $self->{'_cb'};
1501
# Object printing methods
1504
my $DEF_HANDLER = sub {
1506
return $obj->to_string."\n";
1518
sub _print_handler {
1520
my ($file, $fh, $cb, $wrap, $type, $all) = $self->_rearrange([qw(FILE FH CB WRAP TYPE ALL)], @_);
1523
# default formatting delegates to_string
1525
$self->throw("Type $type not registered with print handler, exiting...")
1526
if !exists($HANDLER{$type});
1529
$self->throw("Callback must be a code reference") if ref $cb ne 'CODE';
1533
$self->throw("Have defined both file and filehandle; only use one!") if $file && $fh;
1534
my $io = ($file) ? Bio::Root::IO->new(-input => $file, -flush => 1) :
1535
Bio::Root::IO->new(-flush => 1); # defaults to STDOUT
1537
if ($type eq 'all') {
1538
my $string = $cb->($self);
1539
$io->_print($string) if $string;
1542
my $it = "next_$type";
1543
$self->throw("Unknown iterator method $it") unless $self->can($it);
1544
while (my $obj = $self->$it) {
1545
my $string = $cb->($obj);
1546
$io->_print($string) if $string;
1556
return shift->{'_seekable'}
1559
# fixes odd bad XML issue espell data (still present 6-24-07)
1562
my ($self, $response) = @_;
1564
my $type = ref($response);
1565
if ($type eq 'GLOB') {
1566
$temp .= $_ for <$response>;
1567
} elsif ($type eq 'HTTP::Response') {
1568
$temp = $response->content;
1570
$self->throw("Unrecognized ref type $type");
1572
if ($temp =~ m{^<html>}) {
1573
$self->throw("NCBI espell nonrecoverable error: HTML content returned")
1575
$temp =~ s{<ERROR>(.*?)<ERROR>}{<ERROR>$1</ERROR>};
1579
sub _load_eutil_module {
1580
my ($self, $class) = @_;
1582
my $module = "Bio::Tools::EUtilities::" . $class;
1585
$ok = $self->_load_module($module);
1589
$self: data module $module cannot be found
1591
For more information about the EUtilities system please see the EUtilities docs.