2
# BioPerl module for Bio::Tools::EUtilities::EUtilParameters
4
# Please direct questions and support issues to <bioperl-l@bioperl.org>
6
# Cared for by Chris Fields <cjfields at bioperl dot org>
8
# Copyright Chris Fields
10
# You may distribute this module under the same terms as perl itself
12
# POD documentation - main docs before the code
16
Bio::Tools::EUtilities::EUtilParameters - Manipulation of NCBI eutil-based
17
parameters for remote database requests.
21
# Bio::Tools::EUtilities::EUtilParameters implements Bio::ParameterBaseI
23
my @params = (-eutil => 'efetch',
26
email => 'me@foo.bar',
29
my $p = Bio::Tools::EUtilities::EUtilParameters->new(@params);
31
if ($p->parameters_changed) {
35
$p->set_parameters(@extra_params); # set new NCBI parameters, leaves others preset
37
$p->reset_parameters(@new_params); # reset NCBI parameters to original state
39
$p->to_string(); # get a URI-encoded string representation of the URL address
41
$p->to_request(); # get an HTTP::Request object (to pass on to LWP::UserAgent)
45
Bio::Tools::EUtilities::EUtilParameters is-a Bio::ParameterBaseI implementation
46
that allows simple manipulation of NCBI eutil parameters for CGI-based queries.
47
SOAP-based methods may be added in the future.
49
For simplicity parameters do not require dashes when passed and do not need URI
50
encoding (spaces are converted to '+', symbols encoded, etc). Also, the
51
following extra parameters can be passed to the new() constructor or via
52
set_parameters() or reset_parameters():
54
eutil - the eutil to be used. The default is 'efetch' if not set.
55
correspondence - Flag for how IDs are treated. Default is undef (none).
56
history - a Bio::Tools::EUtilities::HistoryI object. Default is undef (none).
58
At this point minimal checking is done for potential errors in parameter
59
passing, though these should be easily added in the future when necessary.
65
User feedback is an integral part of the
66
evolution of this and other Bioperl modules. Send
67
your comments and suggestions preferably to one
68
of the Bioperl mailing lists. Your participation
71
bioperl-l@lists.open-bio.org - General discussion
72
http://www.bioperl.org/wiki/Mailing_lists - About the mailing lists
76
Please direct usage questions or support issues to the mailing list:
78
I<bioperl-l@bioperl.org>
80
rather than to the module maintainer directly. Many experienced and
81
reponsive experts will be able look at the problem and quickly
82
address it. Please include a thorough description of the problem
83
with code and data examples if at all possible.
87
Report bugs to the Bioperl bug tracking system to
88
help us keep track the bugs and their resolution.
89
Bug reports can be submitted via the web.
91
https://redmine.open-bio.org/projects/bioperl/
95
Email cjfields at bioperl dot org
99
The rest of the documentation details each of the
100
object methods. Internal methods are usually
105
# Let the code begin...
107
package Bio::Tools::EUtilities::EUtilParameters;
111
use base qw(Bio::Root::Root Bio::ParameterBaseI);
116
# eutils only has one hostbase URL
118
# mode : GET or POST (HTTP::Request)
119
# location : CGI location
120
# params : allowed parameters for that eutil
124
'location' => 'einfo.fcgi',
125
'params' => [qw(db tool email)],
128
'mode' => ['POST','GET'],
129
'location' => 'epost.fcgi',
130
'params' => [qw(db retmode id tool email WebEnv query_key)],
133
'mode' => ['GET','POST'],
134
'location' => 'efetch.fcgi',
135
'params' => [qw(db retmode id retmax retstart rettype strand seq_start
136
seq_stop complexity report tool email WebEnv query_key)],
139
'mode' => ['GET','POST'],
140
'location' => 'esearch.fcgi',
141
'params' => [qw(db retmode usehistory term field reldate mindate
142
maxdate datetype retmax retstart rettype sort tool email
146
'mode' => ['GET','POST'],
147
'location' => 'esummary.fcgi',
148
'params' => [qw(db retmode id retmax retstart rettype tool email
152
'mode' => ['GET','POST'],
153
'location' => 'elink.fcgi',
154
'params' => [qw(db retmode id reldate mindate maxdate datetype term
155
dbfrom holding cmd version tool email linkname WebEnv
159
'mode' => ['GET','POST'],
160
'location' => 'egquery.fcgi',
161
'params' => [qw(term retmode tool email)],
164
'mode' => ['GET','POST'],
165
'location' => 'espell.fcgi',
166
'params' => [qw(db retmode term tool email )],
172
# generate getter/setters (will move this into individual ones at some point)
175
@PARAMS = qw(db id email retmode rettype usehistory term field tool
176
reldate mindate maxdate datetype retstart retmax sort seq_start seq_stop
177
strand complexity report dbfrom cmd holding version linkname WebEnv
179
for my $method (@PARAMS) {
182
my (\$self, \$val) = \@_;
184
if ((!defined \$self->{'_$method'}) ||
185
(defined \$self->{'_$method'} && \$self->{'_$method'} ne \$val)) {
186
\$self->{'_statechange'} = 1;
187
\$self->{'_$method'} = \$val;
190
return \$self->{'_$method'};
197
my ($class, @args) = @_;
198
my $self = $class->SUPER::new(@args);
199
my ($retmode) = $self->_rearrange(["RETMODE"],@args);
200
# order is important here, eutil must be set first so that proper error
201
# checking occurs for the later attributes
202
$self->_set_from_args(\@args,
203
-methods => [@PARAMS, qw(eutil history correspondence id_file request_mode)]);
204
$self->eutil() || $self->eutil('efetch');
205
$self->tool() || $self->tool('BioPerl');
206
# set default retmode if not explicitly set
207
$self->set_default_retmode if (!$retmode);
208
$self->{'_statechange'} = 1;
212
=head1 Bio::ParameterBaseI implemented methods
214
=head2 set_parameters
216
Title : set_parameters
217
Usage : $pobj->set_parameters(@params);
218
Function: sets the NCBI parameters listed in the hash or array
220
Args : [optional] hash or array of parameter/values.
221
Note : This sets any parameter passed but leaves previously set data alone.
222
In addition to regular eutil-specific parameters, you can set the
225
-eutil - the eUtil to be used (default 'efetch')
226
-history - pass a HistoryI-implementing object, which
227
sets the WebEnv, query_key, and possibly db and linkname
228
(the latter two only for LinkSets)
229
-correspondence - Boolean flag, set to TRUE or FALSE; indicates how
230
IDs are to be added together for elink request where
231
ID correspondence might be needed
237
my ($self, @args) = @_;
238
# allow automated resetting; must check to ensure that retmode isn't explicitly passed
239
my ($newmode,$file) = $self->_rearrange([qw(RETMODE ID_FILE)],@args);
240
$self->_set_from_args(\@args, -methods => [@PARAMS, qw(eutil correspondence history)]);
241
# set default retmode if not explicitly passed
242
$self->set_default_retmode unless $newmode;
243
$file && $self->id_file($file);
247
=head2 reset_parameters
249
Title : reset_parameters
250
Usage : resets values
251
Function: resets parameters to either undef or value in passed hash
253
Args : [optional] hash of parameter-value pairs
254
Note : This sets any parameter passed, but resets all others (deletes them).
255
In addition to regular eutil-specific parameters, you can set the
258
-eutil - the eUtil to be used (default 'efetch')
259
-history - pass a HistoryI-implementing object, which
260
sets the WebEnv, query_key, and possibly db and linkname
261
(the latter two only for LinkSets)
262
-correspondence - Boolean flag, set to TRUE or FALSE; indicates how
263
IDs are to be added together for elink request where
264
ID correspondence might be needed
269
sub reset_parameters {
270
my ($self, @args) = @_;
271
# is there a better way of doing this? probably, but this works...
272
my ($retmode,$file) = $self->_rearrange([qw(RETMODE ID_FILE)],@args);
273
map { defined $self->{"_$_"} && undef $self->{"_$_"} } (@PARAMS, qw(eutil correspondence history_cache request_cache));
274
$self->_set_from_args(\@args, -methods => [@PARAMS, qw(eutil correspondence history)]);
275
$self->eutil() || $self->eutil('efetch');
276
$self->set_default_retmode unless $retmode;
277
$file && $self->id_file($file);
278
$self->{'_statechange'} = 1;
284
Usage : $obj->carryover(qw(email tool db))
285
Function : Carries over the designated parameters when using reset_parameters()
286
Returns : a list of carried-over parameters
287
Args : An array reference of parameters to carry over, followed optionally
288
by the mode ('add' or 'delete', indicating whether to append to or
289
remove the specified values passed in). To clear all values, pass in
290
an empty array reference (the mode in this case doesn't matter).
292
In addition to the normal eUtil-specific parameters, the following
293
additional parameters are allowed:
295
-eutil - the eUtil to be used (default 'efetch')
296
-history - pass a HistoryI-implementing object, which
297
sets the WebEnv, query_key, and possibly db and linkname
298
(the latter two only for LinkSets)
299
-correspondence - Boolean flag, set to TRUE or FALSE; indicates how
300
IDs are to be added together for elink request where
301
ID correspondence might be needed
303
Default : None (no carried over parameters)
304
Status : NYI (dev in progress, carry on, nothing to see here)
309
my ($self, $params, $mode) = @_;
310
my %allowed = map {$_ => 1} (@PARAMS, qw(eutil history correspondence));
312
$self->throw("Must pass in an array ref of parameters") unless
313
ref($params) eq 'ARRAY';
315
$self->throw("Mode must be 'add' or 'delete'") unless $mode eq 'add' || $mode eq 'delete';
316
if (!scalar(@$params)) { # empty array ref
317
$self->{_carryover} = {};
319
for my $p (@$params) {
320
if (!exists $allowed{$p}) {
321
$self->warn("$p is not a recognized eutil parameter");
324
if ($mode eq 'add') {
325
$self->{_carryover}->{$p} = 1;
327
delete $self->{_carryover}->{$p} if exists
328
$self->{_carryover}->{$p};
333
sort keys %{$self->{_carryover}} || ();
336
sub _reset_except_carryover {
338
#for my $p (@PARAMS, qw(eutil correspondence history_cache request_cache)) {
339
# undef $self->{"_$p"} if defined $self->{"_$p"};
346
Usage : $obj->request_mode
347
Function : get/set the mode for the user agent to use for generating a request
348
Returns : either a preset mode (checked against the eutil) or a best-possible
349
option based upon the currently-set parameters
356
my ($self, $mode) = @_;
357
$mode = uc $mode if defined $mode;
358
my $eutil = $self->eutil;
360
my %valid = map {$_ => 1} @{$MODE{$eutil}{mode}};
361
$self->throw("Mode $mode not supported for $eutil") unless
362
exists $valid{$mode};
363
$self->{_request_mode} = $mode;
365
return $self->{_request_mode} if $self->{_request_mode};
366
# let's try to make this a bit smarter...
368
# If not explicitly set, in cases where
369
# the number of IDs is greater than 200, or the search term is longer than
370
# 200, use POST when available
372
if (scalar(@{$MODE{$eutil}{mode}}) > 1) { # allows both GET and POST
373
my ($id, $term) = ($self->id || [], $self->term || '');
374
if (ref $id eq 'ARRAY' && scalar(@$id) > 200 || CORE::length($term) > 300) {
378
# otherwise, fallback to default
379
$MODE{$eutil}{mode}[0]; # first is default
382
=head2 parameters_changed
384
Title : parameters_changed
385
Usage : if ($pobj->parameters_changed) {...}
386
Function: Returns TRUE if parameters have changed
387
Returns : Boolean (0 or 1)
388
Args : [optional] Boolean
392
sub parameters_changed {
394
$self->{'_statechange'};
397
=head2 available_parameters
399
Title : available_parameters
400
Usage : @params = $pobj->available_parameters()
401
Function: Returns a list of the available parameters
402
Returns : Array of available parameters (no values)
403
Args : [optional] A string with the eutil name (for returning eutil-specific
408
sub available_parameters {
409
my ($self, $type) = @_;
411
if ($type eq 'all') {
414
$self->throw("$type parameters not supported") if !exists $MODE{$type};
415
return @{$MODE{$type}->{params}};
419
=head2 get_parameters
421
Title : get_parameters
422
Usage : @params = $pobj->get_parameters;
423
%params = $pobj->get_parameters;
424
Function: Returns list of key/value pairs, parameter => value
425
Returns : Flattened list of key-value pairs. All key-value pairs returned,
426
though subsets can be returned based on the '-type' parameter. Data
427
originally set as an array ref are returned based on whether the
428
'-join_id' flag is set (default is the same array ref).
429
Args : -type : the eutil name (Default: returns all). Use of '-list'
431
-list : array ref of specific parameters
432
-join_ids : Boolean; join IDs based on correspondence (Default: no join)
437
my ($self, @args) = @_;
438
my ($type, $list, $join) = $self->_rearrange([qw(TYPE LIST JOIN_IDS)], @args);
439
$self->throw("Parameter list not an array ref") if $list && ref $list ne 'ARRAY';
441
my @final = $list ? grep {$self->can($_)} @{$list} : $self->available_parameters($type);
443
for my $param (@final) {
444
if ($param eq 'id' && $self->id && $join) {
446
if ($self->correspondence && $self->eutil eq 'elink') {
447
for my $id_group (@{ $id }) {
448
if (ref($id_group) eq 'ARRAY') {
449
push @p, ('id' => join(q(,), @{ $id_group }));
451
elsif (!ref($id_group)) {
452
push @p, ('id' => $id_group);
455
$self->throw("Unknown ID type: $id_group");
459
# add a check for undef
460
push @p, ref $id eq 'ARRAY' ?
461
($param => join(',', grep {defined($_)} @{ $id })):
465
elsif ($param eq 'db' && $self->db && $join) {
467
push @p, (ref $db eq 'ARRAY') ?
468
($param => join(',', @{ $db })) :
472
push @p, ($param => $self->{"_$param"}) if defined $self->{"_$param"};
478
=head1 Implementation-specific to_* methods
483
Usage : $string = $pobj->to_string;
484
Function: Returns string (URL only in this case)
485
Returns : String (URL only for now)
486
Args : [optional] 'all'; build URI::http using all parameters
487
Default : Builds based on allowed parameters (presence of history data
488
or eutil type in %MODE).
489
Note : Changes state of object. Absolute string
494
my ($self, @args) = @_;
495
# calling to_uri changes the state
496
if ($self->parameters_changed || !defined $self->{'_string_cache'}) {
497
my $string = $self->to_request(@args)->uri->as_string;
498
$self->{'_statechange'} = 0;
499
$self->{'_string_cache'} = $string;
501
return $self->{'_string_cache'};
507
Usage : $uri = $pobj->to_request;
508
Function: Returns HTTP::Request object
509
Returns : HTTP::Request
510
Args : [optional] 'all'; builds request using all parameters
511
Default : Builds based on allowed parameters (presence of history data
512
or eutil type in %MODE).
513
Note : Changes state of object (to boolean FALSE). Used for CGI-based GET/POST
514
TODO : esearch, esummary, elink now accept POST for batch submission
515
(something NCBI apparently allowed but didn't advertise). Should we
516
switch most of these to utilize POST instead, or make it dep on the
517
number of submitted IDs?
522
my ($self, $type) = @_;
523
if ($self->parameters_changed || !defined $self->{'_request_cache'}) {
524
my $eutil = $self->eutil;
525
$self->throw("No eutil set") if !$eutil;
528
my ($location, $mode) = ($MODE{$eutil}->{location}, $self->request_mode);
530
my $uri = URI->new($self->url_base_address . $location);
531
if ($mode eq 'GET') {
532
$uri->query_form($self->get_parameters(-type => $type, -join_ids => 1) );
533
$request = HTTP::Request->new($mode => $uri);
534
$self->{'_request_cache'} = $request;
535
} elsif ($mode eq 'POST') {
536
$request = HTTP::Request->new($mode => $uri->as_string);
537
$uri->query_form($self->get_parameters(-type => $type, -join_ids => 1) );
538
$request->content_type('application/x-www-form-urlencoded');
539
$request->content($uri->query);
540
$self->{'_request_cache'} = $request;
542
$self->throw("Unrecognized request mode: $mode");
544
$self->{'_statechange'} = 0;
545
$self->{'_request_cache'} = $request;
547
return $self->{'_request_cache'};
550
=head1 Implementation specific-methods
555
Usage : $p->eutil('efetch')
556
Function: gets/sets the eutil for this set of parameters
557
Returns : string (eutil)
558
Args : [optional] string (eutil)
559
Throws : '$eutil not supported' if eutil not present
560
Note : This does not reset retmode to the default if called directly.
565
my ($self, $eutil) = @_;
567
$self->throw("$eutil not supported") if !exists $MODE{$eutil};
568
if (!defined $self->{'_eutil'} || ($self->{'_eutil'} && $self->{'_eutil'} ne $eutil)) {
569
$self->{'_eutil'} = $eutil;
570
$self->{'_statechange'} = 1;
573
return $self->{'_eutil'};
579
Usage : $p->history($history);
580
Function: gets/sets the history object to be used for these parameters
581
Returns : Bio::Tools::EUtilities::HistoryI (if set)
582
Args : [optional] Bio::Tools::EUtilities::HistoryI
583
Throws : Passed something other than a Bio::Tools::EUtilities::HistoryI
584
Note : This overrides WebEnv() and query_key() settings when set. This
585
caches the last history object passed and returns like a Get/Set
590
my ($self, $history) = @_;
592
$self->throw('Not a Bio::Tools::EUtilities::HistoryI object!') if
593
!$history->isa('Bio::Tools::EUtilities::HistoryI');
594
my ($webenv, $qkey) = $history->history;
595
$self->WebEnv($webenv);
596
$self->query_key($qkey);
597
$self->{'_statechange'} = 1;
598
$self->{'_history_cache'} = $history;
600
return $self->{'_history_cache'};
603
=head2 correspondence
605
Title : correspondence
606
Usage : $p->correspondence(1);
607
Function: Sets flag for posting IDs for one-to-one correspondence
609
Args : [optional] boolean value
614
my ($self, $corr) = @_;
616
$self->{'_correspondence'} = $corr;
617
$self->{'_statechange'} = 1;
619
return $self->{'_correspondence'};
625
Usage : $p->id_file('<foo');
626
Function: convenience method; passes in file containing a list of IDs for
627
searches (one per line), sets id() to list
629
Args : either string indicating file to use, a file handle, or an IO::Handle
631
Note : use of this overrides concurrent use of the '-id' parameter when both
632
are passed. The filename is not retained, merely parsed for IDs.
637
my ($self, $file) = @_;
639
# do this in a way that allows file, fh, IO::Handle
641
$io->_initialize_io(-input => $file);
643
while (my $line = $io->_readline) {
652
=head2 url_base_address
654
Title : url_base_address
655
Usage : $address = $p->url_base_address();
656
Function: Get URL base address
658
Args : None in this implementation; the URL is fixed
663
my $HOSTBASE = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/';
665
sub url_base_address {
666
my ($self, $address) = @_;
671
=head2 set_default_retmode
673
Title : set_default_retmode
674
Usage : $p->set_default_retmode();
675
Function: sets retmode to default value specified by the eutil() and the value
676
in %NCBI_DATABASE (for efetch only) if called
683
# default retmode if one is not supplied
684
my %NCBI_DATABASE = (
686
'nucleotide' => 'text',
690
'structure' => 'text',
693
'journals' => 'text',
696
sub set_default_retmode {
698
if ($self->eutil eq 'efetch') {
699
my $db = $self->db || return; # assume retmode will be set along with db
700
my $mode = exists $NCBI_DATABASE{$db} ? $NCBI_DATABASE{$db} : 'xml';
701
$self->retmode($mode);
703
$self->retmode('xml');
710
if (!defined $self->{'_io'}) {
711
$self->{'_io'} = Bio::Root::IO->new();
713
return $self->{'_io'};