128
167
$self->entrez_url($location || $EntrezLocation );
132
=head2 get_Taxonomy_Node
134
Title : get_Taxonomy_Node
135
Usage : my $species = $db->get_Taxonomy_Node(-taxonid => $taxonid)
136
Function: Get a Bio::Taxonomy::Taxon object
137
Returns : Bio::Taxonomy::Taxon object(s) [more than one
138
Args : -taxonid => taxonomy id (to query by taxonid)
140
-name => string (to query by a taxonomy name: common name,
142
or just a single value which is the taxid.
146
sub get_Taxonomy_Node{
148
my %p = $self->entrez_params;
152
if( $params{'-taxonid'} ) {
153
$taxonid = $params{'-taxonid'};
154
} elsif( $params{'-name'} ) {
155
my @taxaids = $self->get_taxonid($params{'-name'});
157
$self->warn("Got > 1 taxid for ".$params{'-name'}. " only using the first one");
158
$taxonid = shift @taxaids;
161
$self->warn("Need to have provided either a -taxonid or -name value to get_Taxonomy_Node");
168
my $params = join($UrlParamSeparatorValue, map { "$_=".$p{$_} } keys %p);
169
my $url = sprintf("%s%s?%s",$self->entrez_url,$EntrezSummary,$params);
170
$self->debug("url is $url\n") if( $self->verbose > 0);
173
$response = $self->get($url);
176
$self->warn("Can't query website: $@");
179
my $twig = new XML::Twig;
180
$self->debug( "resp is $response\n") if( $self->verbose > 0);
181
$twig->parse($response);
182
my $root = $twig->root;
183
my $list = $root->first_child('DocSum');
185
$self->warn("Could not find any value for $taxonid");
188
my ($id) = map { $_->text } $list->children('Id');
190
my (%item) = map { $_->{'att'}->{'Name'} => $_->text } $list->children('Item');
192
if( $item{'RANK'} eq 'species') {
193
my $node = new Bio::Species(-ncbi_taxid => $id,
194
-common_name => $item{'CommonName'},
195
-division => $item{'Division'});
196
my ($genus,$species,$subspecies) = split(' ',$item{'ScientificName'},3);
197
$node->genus($species);
198
$node->species($species);
201
$self->warn(sprintf("can't create a species object for %s (%s) because it isn't a species but is a '%s' instead",$item{'ScientificName'},$item{'CommonName'}, $item{'RANK'}));
210
Usage : my $taxonid = $db->get_taxonid('Homo sapiens');
211
Function: Searches for a taxonid (typically ncbi_taxon_id)
212
based on a query string
214
Args : Array of Strings representing species/node name
220
my ($self,$query) = @_;
221
my %p = $self->entrez_params;
224
my $params = join($UrlParamSeparatorValue, map { "$_=".$p{$_} } keys %p);
225
my $url = sprintf("%s%s?%s",$self->entrez_url,$EntrezGet,$params);
228
$response = $self->get($url);
231
$self->warn("Can't query website: $@");
234
$self->debug( "response is $response\n") if( $self->verbose > 0);
236
my $twig = new XML::Twig;
237
$twig->parse($response);
238
my $root = $twig->root;
239
my $list = $root->first_child('IdList');
240
my @data = map { $_->text } $list->children('Id');
241
( wantarray ) ? @data : shift @data;
173
Usage : my $taxon = $db->get_taxon(-taxonid => $taxonid)
174
Function: Get a Bio::Taxon object from the database.
175
Returns : Bio::Taxon object
176
Args : just a single value which is the database id, OR named args:
177
-taxonid => taxonomy id (to query by taxonid)
179
-name => string (to query by a taxonomy name: common name,
180
scientific name, etc)
182
To retrieve a taxonomy node for a GI number provide the -gi option
183
with the gi number and -db with either 'nucleotide' or 'protein' to
186
-full => 1 (to force retrieval of full information - sometimes
187
minimal information about your taxon may have been
188
cached, which is normally used to save database
196
$self->throw("Need to have installed XML::Twig");
199
my %p = $self->entrez_params;
201
# convert input request to one or more ids
202
my (@taxonids, $taxonid, $want_full);
205
if ($params{'-taxonid'}) {
206
$taxonid = $params{'-taxonid'};
208
elsif ($params{'-gi'}) {
209
my $db = $params{'-db'};
210
# we're going to do all the work here and then redirect
211
# the call based on the TaxId
212
my %p = %SequenceParams;
214
if( ref($params{'-gi'}) =~ /ARRAY/i ) {
215
$p{'id'} = join(',', @{$params{'-gi'}});
217
$p{'id'} = $params{'-gi'};
219
$p{'db'} = $db if defined $db;
220
my $params = join($UrlParamSeparatorValue, map { "$_=".$p{$_} } keys %p);
221
my $url = sprintf("%s%s?%s",$self->entrez_url,$EntrezSummary,$params);
222
$self->debug("url is $url\n");
225
if (exists $DATA_CACHE->{gi_to_ids}->{$url}) {
226
@ids = @{$DATA_CACHE->{gi_to_ids}->{$url}};
229
my $response = $self->get($url);
230
if ($response->is_success) {
231
$response = $response->content;
234
$self->throw("Can't query website: ".$response->status_line);
237
$self->debug("resp is $response\n");
238
my $twig = XML::Twig->new;
239
$twig->parse($response);
240
my $root = $twig->root;
242
for my $topnode ( $root->children('DocSum') ) {
243
for my $child ( $topnode->children('Item') ) {
244
if( uc($child->{att}->{'Name'}) eq 'TAXID' ) {
245
push @ids, $child->text;
250
$DATA_CACHE->{gi_to_ids}->{$url} = \@ids;
253
return $self->get_taxon(-taxonid => \@ids);
255
elsif ($params{'-name'}) {
256
@taxonids = $self->get_taxonid($params{'-name'});
259
$self->warn("Need to have provided either a -taxonid or -name value to get_taxon");
262
if ($params{'-full'}) {
270
if (ref($taxonid) =~ /ARRAY/i ) {
271
@taxonids = @{$taxonid};
274
push(@taxonids, $taxonid) if $taxonid;
277
# return answer(s) from the cache if possible
280
foreach my $taxonid (@taxonids) {
281
$taxonid || $self->throw("In taxonids list one was undef! '@taxonids'\n");
282
if (defined $DATA_CACHE->{full_info}->{$taxonid}) {
283
push(@results, $self->_make_taxon($DATA_CACHE->{full_info}->{$taxonid}));
285
elsif (! $want_full && defined $DATA_CACHE->{minimal_info}->{$taxonid}) {
286
push(@results, $self->_make_taxon($DATA_CACHE->{minimal_info}->{$taxonid}));
289
push(@uncached, $taxonid);
294
$taxonid = join(',', @uncached);
297
$self->debug("id is $taxonid\n");
298
my $params = join($UrlParamSeparatorValue, map { "$_=".$p{$_} } keys %p);
300
my $url = sprintf("%s%s?%s",$self->entrez_url,$EntrezFetch,$params);
301
$self->debug("url is $url\n");
302
my $response = $self->get($url);
303
if ($response->is_success) {
304
$response = $response->content;
307
$self->throw("Can't query website: ".$response->status_line);
309
$self->debug("resp is $response\n");
311
my $twig = XML::Twig->new;
312
$twig->parse($response);
314
my $root = $twig->root;
315
for my $taxon ( $root->children('Taxon') ) {
316
my $taxid = $taxon->first_child_text('TaxId');
317
$self->throw("Got a result with no TaxId!") unless $taxid;
320
if (exists $DATA_CACHE->{minimal_info}->{$taxid}) {
321
$data = $DATA_CACHE->{minimal_info}->{$taxid};
324
$data->{id} = $taxid;
325
$data->{rank} = $taxon->first_child_text('Rank');
327
my $other_names = $taxon->first_child('OtherNames');
328
my @other_names = $other_names->children_text() if $other_names;
329
my $sci_name = $taxon->first_child_text('ScientificName');
330
my $orig_sci_name = $sci_name;
331
$sci_name =~ s/ \(class\)$//;
332
push(@other_names, $orig_sci_name) if $orig_sci_name ne $sci_name;
333
$data->{scientific_name} = $sci_name;
334
$data->{common_names} = \@other_names;
336
$data->{division} = $taxon->first_child_text('Division');
337
$data->{genetic_code} = $taxon->first_child('GeneticCode')->first_child_text('GCId');
338
$data->{mitochondrial_genetic_code} = $taxon->first_child('MitoGeneticCode')->first_child_text('MGCId');
339
$data->{create_date} = $taxon->first_child_text('CreateDate');
340
$data->{update_date} = $taxon->first_child_text('UpdateDate');
341
$data->{pub_date} = $taxon->first_child_text('PubDate');
343
# since we have some information about all the ancestors of our
344
# requested node, we may as well cache data for the ancestors to
345
# reduce the number of accesses to website in future
346
my $lineage_ex = $taxon->first_child('LineageEx');
347
my ($ancestor, $lineage_data, @taxa);
348
foreach my $lineage_taxon ($lineage_ex->children) {
349
my $lineage_taxid = $lineage_taxon->first_child_text('TaxId');
351
if (exists $DATA_CACHE->{minimal_info}->{$lineage_taxid} || exists $DATA_CACHE->{full_info}->{$lineage_taxid}) {
352
$lineage_data = $DATA_CACHE->{minimal_info}->{$lineage_taxid} || $DATA_CACHE->{full_info}->{$lineage_taxid};
359
$lineage_data->{id} = $lineage_taxid;
360
$lineage_data->{scientific_name} = $lineage_taxon->first_child_text('ScientificName');
361
$lineage_data->{rank} = $lineage_taxon->first_child_text('Rank');
363
$RELATIONS->{ancestors}->{$lineage_taxid} = $ancestor->{id} if $ancestor;
365
$DATA_CACHE->{minimal_info}->{$lineage_taxid} = $lineage_data;
366
} continue { $ancestor = $lineage_data; unshift(@taxa, $lineage_data); }
368
$RELATIONS->{ancestors}->{$taxid} = $ancestor->{id} if $ancestor;
370
# go through the lineage in reverse so we can remember the children
372
foreach my $lineage_data (@taxa) {
373
$RELATIONS->{children}->{$lineage_data->{id}}->{$child->{id}} = 1;
374
} continue { $child = $lineage_data; }
376
delete $DATA_CACHE->{minimal_info}->{$taxid};
377
$DATA_CACHE->{full_info}->{$taxid} = $data;
378
push(@results, $self->_make_taxon($data));
382
wantarray() ? @results : shift @results;
385
*get_Taxonomy_Node = \&get_taxon;
390
Usage : my $taxonid = $db->get_taxonids('Homo sapiens');
391
Function: Searches for a taxonid (typically ncbi_taxon_id) based on a query
392
string. Note that multiple taxonids can match to the same supplied
394
Returns : array of integer ids in list context, one of these in scalar context
395
Args : string representing taxon's name
400
my ($self,$query) = @_;
401
my %p = $self->entrez_params;
403
# queries don't work correctly with special characters, so get rid of them.
404
if ($query =~ /<.+>/) {
405
# queries with <something> will fail, so workaround by removing, doing
406
# the query, getting multiple taxonids, then picking the one id that
407
# has a parent node with a scientific_name() or common_names()
408
# case-insensitive matching to the word(s) within <>
409
$query =~ s/ <(.+?)>//;
410
my $desired_parent_name = lc($1);
412
ID: foreach my $start_id ($self->get_taxonids($query)) {
413
my $node = $self->get_taxon($start_id) || next ID;
415
# walk up the parents until we hit a node with a named rank
417
my $parent_node = $self->ancestor($node) || next ID;
418
my $parent_sci_name = $parent_node->scientific_name || next ID;
419
my @parent_common_names = $parent_node->common_names;
421
foreach my $name ($parent_sci_name, @parent_common_names) {
422
if (lc($name) eq $desired_parent_name) {
423
return wantarray() ? ($start_id) : $start_id;
427
my $parent_rank = $parent_node->rank || 'no rank';
428
if ($parent_rank ne 'no rank') {
432
$node = $parent_node;
438
$query =~ s/[\"\(\)]//g; # not an exhaustive list; these are just the ones I know cause problems
442
if (defined $DATA_CACHE->{name_to_id}->{$query}) {
443
@data = @{$DATA_CACHE->{name_to_id}->{$query}};
447
my $params = join($UrlParamSeparatorValue, map { "$_=".$p{$_} } keys %p);
448
my $url = sprintf("%s%s?%s",$self->entrez_url,$EntrezGet,$params);
449
my $response = $self->get($url);
450
if ($response->is_success) {
451
$response = $response->content;
454
$self->throw("Can't query website: ".$response->status_line);
456
$self->debug("response is $response\n");
457
my $twig = XML::Twig->new;
458
$twig->parse($response);
459
my $root = $twig->root;
460
my $list = $root->first_child('IdList');
461
@data = map { $_->text } $list->children('Id');
463
$DATA_CACHE->{name_to_id}->{$query} = [@data];
466
wantarray() ? @data : shift @data;
469
*get_taxonid = \&get_taxonids;
474
Usage : my $ancestor_taxon = $db->ancestor($taxon)
475
Function: Retrieve the ancestor taxon of a supplied Taxon from the database.
477
Note that unless the ancestor has previously been directly
478
requested with get_taxon(), the returned Taxon object will only have
479
a minimal amount of information.
482
Args : Bio::Taxon (that was retrieved from this database)
487
my ($self, $taxon) = @_;
488
$self->throw("Must supply a Bio::Taxon") unless ref($taxon) && $taxon->isa('Bio::Taxon');
489
$self->throw("The supplied Taxon must belong to this database") unless $taxon->db_handle && $taxon->db_handle eq $self;
490
my $id = $taxon->id || $self->throw("The supplied Taxon is missing its id!");
492
my $ancestor_id = $RELATIONS->{ancestors}->{$id} || return;
493
return $self->_make_taxon($DATA_CACHE->{full_info}->{$ancestor_id} || $DATA_CACHE->{minimal_info}->{$ancestor_id});
496
=head2 each_Descendent
498
Title : each_Descendent
499
Usage : my @taxa = $db->each_Descendent($taxon);
500
Function: Get all the descendents of the supplied Taxon (but not their
501
descendents, ie. not a recursive fetchall).
503
Note that this implementation is unable to return a taxon that
504
hasn't previously been directly fetched with get_taxon(), or wasn't
505
an ancestor of such a fetch.
507
Returns : Array of Bio::Taxon objects
508
Args : Bio::Taxon (that was retrieved from this database)
512
sub each_Descendent {
513
my ($self, $taxon) = @_;
514
$self->throw("Must supply a Bio::Taxon") unless ref($taxon) && $taxon->isa('Bio::Taxon');
515
$self->throw("The supplied Taxon must belong to this database") unless $taxon->db_handle && $taxon->db_handle eq $self;
516
my $id = $taxon->id || $self->throw("The supplied Taxon is missing its id!");
518
my @children_ids = keys %{$RELATIONS->{children}->{$id} || {}};
520
foreach my $child_id (@children_ids) {
521
push(@children, $self->_make_taxon($DATA_CACHE->{full_info}->{$child_id} || $DATA_CACHE->{minimal_info}->{$child_id}));
246
527
=head2 Some Get/Setter methods