1
# $Id: eutils.pm,v 1.11.4.1 2006/10/02 23:10:15 sendu Exp $
3
# BioPerl module Bio::DB::Biblio::eutils.pm
5
# Cared for by Allen Day <allenday@ucla.edu>
6
# For copyright and disclaimer see below.
8
# POD documentation - main docs before the code
12
Bio::DB::Biblio::eutils - Access to PubMed's bibliographic query service
16
Do not use this object directly, it is recommended to access it and use
17
it through the I<Bio::Biblio> module:
20
my $biblio = new Bio::Biblio (-access => 'eutils');
21
$biblio->db('PMC'); #optional, default is PubMed.
25
This object contains the real implementation of a Bibliographic Query
26
Service as defined in L<Bio::DB::BiblioI>.
28
L<Bio::DB::BiblioI> is not implemented as documented in the interface,
29
particularly the find() method, which is not compatible with PubMed's
36
User feedback is an integral part of the evolution of this and other
37
Bioperl modules. Send your comments and suggestions preferably to
38
the Bioperl mailing list. Your participation is much appreciated.
40
bioperl-l@bioperl.org - General discussion
41
http://bioperl.org/wiki/Mailing_lists - About the mailing lists
45
Report bugs to the Bioperl bug tracking system to help us keep track
46
of the bugs and their resolution. Bug reports can be submitted via
49
http://bugzilla.open-bio.org/
53
Allen Day E<lt>allenday@ucla.eduE<gt>
57
Copyright (c) 2004 Allen Day, University of California, Los Angeles.
59
This module is free software; you can redistribute it and/or modify
60
it under the same terms as Perl itself.
64
This software is provided "as is" without warranty of any kind.
66
=head1 BUGS AND LIMITATIONS
72
More testing and debugging needed to ensure that returned citations
73
are properly transferred even if they contain foreign characters.
77
Maximum record count (MAX_RECORDS) returned currently hard coded to
82
Biblio retrieval methods should be more tightly integrated with
83
L<Bio::Biblio::Ref> and L<Bio::DB::MeSH>.
90
http://eutils.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html
93
http://eutils.ncbi.nlm.nih.gov/entrez/query/static/eutils_help.html
96
examples/biblio/biblio-eutils-example.pl
100
The main documentation details are to be found in
103
Here is the rest of the object methods. Interface methods first,
104
followed by internal methods.
108
# Let the code begin...
111
package Bio::DB::Biblio::eutils;
112
use vars qw($DEFAULT_URN);
118
use base qw(Bio::Biblio Bio::DB::BiblioI);
120
our $EFETCH = 'http://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi';
121
our $ESEARCH = 'http://www.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi';
122
our $MAX_RECORDS = 100_000;
124
# -----------------------------------------------------------------------------
128
Usage : my $obj = new Bio::Biblio (-access => 'eutils' ...);
129
(_initialize is internally called from this constructor)
130
Returns : 1 on success
133
This is an actual new() method (except for the real object creation
134
and its blessing which is done in the parent class Bio::Root::Root in
135
method _create_object).
137
Note that this method is called always as an I<object> method (never as
138
a I<class> method) - and that the object who calls this method may
139
already be partly initiated (from Bio::Biblio::new method); so if you
140
need to do some tricks with the 'class invocation' you need to change
141
Bio::Biblio::new method, not this one.
146
my ($self, @args) = @_;
148
#eutils doesn't need this code, but it doesn't hurt to leave it here... -ad
150
# make a hashtable from @args
152
@param { map { lc $_ } keys %param } = values %param; # lowercase keys
154
# copy all @args into this object (overwriting what may already be
155
# there) - changing '-key' into '_key'
157
foreach my $key (keys %param) {
158
($new_key = $key) =~ s/^-/_/;
159
$self->{ $new_key } = $param { $key };
163
# set up internal data
164
$self->twig(XML::Twig->new());
166
# finally add default values for those keys who have default value
167
# and who are not yet in the object
176
Usage : $obj->db($newval)
177
Function: specifies the database to search. valid values are:
179
pubmed, pmc, journals
181
it is also possible to add the following, and i will do
184
genome, nucleotide, protein, popset, snp, sequence, taxonomy
188
Returns : value of db (a scalar)
189
Args : on set, new value (a scalar or undef, optional)
198
my %ok = map {$_=>1} qw(pubmed pmc journals);
200
$self->{'db'} = lc($arg);
202
$self->warn("invalid db $arg, keeping value as ".$self->{'db'} || 'pubmed');
205
return $self->{'db'};
209
=head1 Methods implementing Bio::DB::BiblioI interface
211
=head2 get_collection_id
213
Title : get_collection_id
214
Usage : $id = $biblio->get_collection_id();
215
Function: returns WebEnv value from ESearch
216
Returns : ESearch WebEnv value as a string
222
sub get_collection_id {
223
return shift->collection_id();
227
return shift->count();
233
my $db = $self->db || 'pubmed';
234
$self->throw("must provide valid ID, not undef") unless defined($id);
235
my $xml = get($EFETCH.'?rettype=abstract&retmode=xml&db='.$db.'&id='.$id);
239
=head2 reset_retrieval
241
Title : reset_retrieval
242
Usage : $biblio->reset_retrieval();
243
Function: reset cursor in id list, see cursor()
250
sub reset_retrieval {
258
Usage : $xml = $biblio->get_next();
259
Function: return next record as xml
260
Returns : an xml string
269
return unless $self->has_next;
271
my $xml = $self->get_by_id( @{ $self->ids }[$self->cursor] );
272
$self->cursor( $self->cursor + 1 );
280
Usage : $xml = $biblio->get_more($more);
281
Function: returns next $more records concatenated
282
Returns : a string containing multiple xml documents
283
Args : an integer representing how many records to retrieve
289
my ($self,$more) = @_;
294
my $next = $self->get_next();
305
Usage : $has_next = $biblio->has_next();
306
Function: check to see if there are more items to be retrieved
307
Returns : 1 on true, undef on false
315
return ($self->cursor < $self->count) ? 1 : undef;
323
Usage : $biblio = $biblio->find($pubmed_query_phrase);
324
Function: perform a PubMed query using Entrez ESearch
325
Returns : a reference to the object on which the method was called
326
Args : a PubMed query phrase. See
327
http://eutils.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html
328
for help on how to construct a query.
333
my ($self,$query) = @_;
335
$query = uri_escape($query);
337
my $db = $self->db || 'pubmed';
339
my $url = $ESEARCH."?usehistory=y&db=$db&retmax=$MAX_RECORDS&term=$query";
341
my $xml = get($url) or $self->throw("couldn't retrieve results from $ESEARCH: $!");
343
$self->twig->parse($xml);
345
my @ids = map {$_->text} $self->twig->get_xpath('//IdList//Id');
349
#should we be using the ids, or the count tag?
351
my($count_element) = $self->twig->get_xpath('//Count');
352
my $count = $count_element->text();
353
$self->count(scalar(@ids));
355
my($retmax_element) = $self->twig->get_xpath('//RetMax');
356
my $retmax = $retmax_element->text();
358
my($querykey_element) = $self->twig->get_xpath('//QueryKey');
359
my $querykey = $querykey_element->text();
360
$self->query_key($querykey);
362
my($webenv_element) = $self->twig->get_xpath('//WebEnv');
363
my $webenv = $webenv_element->text();
364
$self->collection_id($webenv);
366
#initialize/reset cursor
376
Usage : @ids = $biblio->get_all_ids();
377
Function: return a list of PubMed ids resulting from call to find()
378
Returns : a list of PubMed ids, or an empty list
386
return $self->ids() if $self->ids();
393
Usage : $xml = $biblio->get_all();
394
Function: retrieve all records from query
395
Returns : return a large concatenated string of PubMed xml documents
404
my $db = $self->db || 'pubmed';
406
my $xml = get($EFETCH.'?rettype=abstract&retmode=xml&db=pubmed&query_key='.
407
$self->query_key.'&WebEnv='.$self->collection_id.
408
'&retstart=1&retmax='.$MAX_RECORDS
418
Function: no-op. this is here only for interface compatibility
434
Function: no-op. this is here only for interface compatibility
446
=head2 get_vocabulary_names
448
Title : get_vocabulary_names
450
Function: no-op. this is here only for interface compatibility
451
Returns : empty arrayref
457
sub get_vocabulary_names {
465
Function: no-op. this is here only for interface compatibility
476
=head2 get_entry_description
478
Title : get_entry_description
480
Function: no-op. this is here only for interface compatibility
487
sub get_entry_description {
491
=head2 get_all_values
493
Title : get_all_values
495
Function: no-op. this is here only for interface compatibility
506
=head2 get_all_entries
508
Title : get_all_entries
510
Function: no-op. this is here only for interface compatibility
517
sub get_all_entries {
521
=head1 Internal methods unrelated to Bio::DB::BiblioI
526
Usage : $obj->cursor($newval)
527
Function: holds position in reference collection
528
Returns : value of cursor (a scalar)
529
Args : on set, new value (a scalar or undef, optional)
538
return $self->{'cursor'} = $arg if defined($arg);
539
return $self->{'cursor'};
545
Usage : $obj->twig($newval)
546
Function: holds an XML::Twig instance.
547
Returns : value of twig (a scalar)
548
Args : on set, new value (a scalar or undef, optional)
556
return $self->{'twig'} = shift if @_;
557
return $self->{'twig'};
563
Usage : $obj->ids($newval)
564
Function: store pubmed ids resulting from find() query
565
Returns : value of ids (a scalar)
566
Args : on set, new value (a scalar or undef, optional)
574
return $self->{'ids'} = shift if @_;
575
return $self->{'ids'};
580
Title : collection_id
581
Usage : $obj->collection_id($newval)
583
Returns : value of collection_id (a scalar)
584
Args : on set, new value (a scalar or undef, optional)
592
return $self->{'collection_id'} = shift if @_;
593
return $self->{'collection_id'};
599
Usage : $obj->count($newval)
601
Returns : value of count (a scalar)
602
Args : on set, new value (a scalar or undef, optional)
610
return $self->{'count'} = shift if @_;
611
return $self->{'count'};
617
Usage : $obj->query_key($newval)
618
Function: holds query_key from ESearch document
619
Returns : value of query_key (a scalar)
620
Args : on set, new value (a scalar or undef, optional)
628
return $self->{'query_key'} = shift if @_;
629
return $self->{'query_key'};