1
# $Id: Taxon.pm,v 1.1 2002/11/18 22:08:33 kortsch Exp $
3
# BioPerl module for Bio::Taxonomy::Taxon
5
# Cared for by Dan Kortschak but pilfered extensively from
6
# the Bio::Tree::Node code of Jason Stajich
8
# You may distribute this module under the same terms as perl itself
10
# POD documentation - main docs before the code
14
Bio::Taxonomy::Taxon - Generic Taxonomic Entity object
18
use Bio::Taxonomy::Taxon;
19
my $taxonA = new Bio::Taxonomy::Taxon();
20
my $taxonL = new Bio::Taxonomy::Taxon();
21
my $taxonR = new Bio::Taxonomy::Taxon();
23
my $taxon = new Bio::Taxonomy::Taxon();
24
$taxon->add_Descendents($nodeL);
25
$taxon->add_Descendents($nodeR);
27
$species = $taxon->species;
31
Makes a taxonomic unit suitable for use in a taxonomic tree
35
Dan Kortschak email B<kortschak@rsbs.anu.edu.au>
39
The rest of the documentation details each of the object
40
methods. Internal methods are usually preceded with a _
47
package Bio::Taxonomy::Taxon;
48
use vars qw(@ISA $CREATIONORDER);
51
# Object preamble - inherits from Bio::Root::Object, Bio::Tree::NodeI, Bio::Species and Bio::Taxonomy
57
# import rank information from Bio::Taxonomy.pm
58
use vars qw(@RANK %RANK);
60
@ISA = qw(Bio::Root::Root Bio::Tree::NodeI);
69
Usage : my $obj = new Bio::Taxonomy::Taxon();
70
Function: Builds a new Bio::Taxonomy::Taxon object
71
Returns : Bio::Taxonomy::Taxon
72
Args : -descendents => array pointer to descendents (optional)
73
-branch_length => branch length [integer] (optional)
75
-id => unique taxon id for node (from NCBI's list preferably)
76
-rank => the taxonomic level of the node (also from NCBI)
81
my($class,@args) = @_;
83
my $self = $class->SUPER::new(@args);
84
my ($children,$branchlen,$id,$taxon,$rank,$desc) =
86
$self->_rearrange([qw(DESCENDENTS
93
$self->{'_desc'} = {};
94
defined $desc && $self->description($desc);
95
defined $taxon && $self->taxon($taxon);
96
defined $id && $self->id($id);
97
defined $branchlen && $self->branch_length($branchlen);
98
defined $rank && $self->rank($rank);
100
if( defined $children ) {
101
if( ref($children) !~ /ARRAY/i ) {
102
$self->warn("Must specify a valid ARRAY reference to initialize a Taxon's Descendents");
104
foreach my $c ( @$children ) {
105
$self->add_Descendent($c);
108
$self->_creation_id($CREATIONORDER++);
112
=head2 add_Descendent
114
Title : add_Descendent
115
Usage : $taxon->add_Descendant($taxon);
116
Function: Adds a descendent to a taxon
117
Returns : number of current descendents for this taxon
118
Args : Bio::Taxonomy::Taxon
119
boolean flag, true if you want to ignore the fact that you are
120
adding a second node with the same unique id (typically memory
121
location reference in this implementation). default is false and
122
will throw an error if you try and overwrite an existing node.
129
my ($self,$node,$ignoreoverwrite) = @_;
131
return -1 if( ! defined $node ) ;
132
if( ! $node->isa('Bio::Taxonomy::Taxon') ) {
133
$self->warn("Trying to add a Descendent who is not a Bio::Taxonomy::Taxon");
136
# do we care about order?
137
$node->{'_ancestor'} = $self;
138
if( $self->{'_desc'}->{$node->internal_id} && ! $ignoreoverwrite ) {
139
$self->throw("Going to overwrite a taxon which is $node that is already stored here, set the ignore overwrite flag (parameter 2) to true to ignore this in the future");
142
$self->{'_desc'}->{$node->internal_id} = $node; # is this safely unique - we've tested before at any rate??
144
$self->invalidate_height();
146
return scalar keys %{$self->{'_desc'}};
150
=head2 each_Descendent
152
Title : each_Descendent($sortby)
153
Usage : my @taxa = $taxon->each_Descendent;
154
Function: all the descendents for this taxon (but not their descendents
155
i.e. not a recursive fetchall)
156
Returns : Array of Bio::Taxonomy::Taxon objects
157
Args : $sortby [optional] "height", "creation" or coderef to be used
158
to sort the order of children taxa.
164
my ($self, $sortby) = @_;
166
# order can be based on branch length (and sub branchlength)
168
$sortby ||= 'height';
170
if (ref $sortby eq 'CODE') {
171
return sort $sortby values %{$self->{'_desc'}};
173
if ($sortby eq 'height') {
174
return map { $_->[0] }
175
sort { $a->[1] <=> $b->[1] ||
176
$a->[2] <=> $b->[2] }
177
map { [$_, $_->height, $_->internal_id ] }
178
values %{$self->{'_desc'}};
180
return map { $_->[0] }
181
sort { $a->[1] <=> $b->[1] }
182
map { [$_, $_->height ] }
183
values %{$self->{'_desc'}};
188
=head2 remove_Descendent
190
Title : remove_Descendent
191
Usage : $taxon->remove_Descedent($taxon_foo);
192
Function: Removes a specific taxon from being a Descendent of this taxon
194
Args : An array of Bio::taxonomy::Taxon objects which have be previously
195
passed to the add_Descendent call of this object.
199
sub remove_Descendent{
200
my ($self,@nodes) = @_;
201
foreach my $n ( @nodes ) {
202
if( $self->{'_desc'}->{$n->internal_id} ) {
203
$n->{'_ancestor'} = undef;
204
$self->{'_desc'}->{$n->internal_id}->{'_ancestor'} = undef;
205
delete $self->{'_desc'}->{$n->internal_id};
208
$self->debug(sprintf("no taxon %s (%s) listed as a descendent in this taxon %s (%s)\n",$n->id, $n,$self->id,$self));
209
$self->debug("Descendents are " . join(',', keys %{$self->{'_desc'}})."\n");
216
=head2 remove_all_Descendents
218
Title : remove_all_Descendents
219
Usage : $taxon->remove_All_Descendents()
220
Function: Cleanup the taxon's reference to descendents and reset
221
their ancestor pointers to undef, if you don't have a reference
222
to these objects after this call they will be cleanedup - so
223
a get_nodes from the Tree object would be a safe thing to do first
230
sub remove_all_Descendents{
232
# this won't cleanup the taxa themselves if you also have
233
# a copy/pointer of them (I think)...
234
while( my ($node,$val) = each %{ $self->{'_desc'} } ) {
235
$val->{'_ancestor'} = undef;
237
$self->{'_desc'} = {};
241
=head2 get_Descendents
243
Title : get_Descendents
244
Usage : my @taxa = $taxon->get_Descendents;
245
Function: Recursively fetch all the taxa and their descendents
246
*NOTE* This is different from each_Descendent
247
Returns : Array or Bio::Taxonomy::Taxon objects
252
# implemented in the interface
257
Usage : $taxon->ancestor($newval)
258
Function: Set the Ancestor
259
Returns : value of ancestor
260
Args : newvalue (optional)
265
my ($self, $value) = @_;
266
if (defined $value) {
267
$self->{'_ancestor'} = $value;
269
return $self->{'_ancestor'};
274
Title : branch_length
275
Usage : $obj->branch_length($newval)
278
Returns : value of branch_length
279
Args : newvalue (optional)
285
my ($self,$value) = @_;
286
if( defined $value) {
287
$self->{'branch_length'} = $value;
289
return $self->{'branch_length'};
295
Usage : $obj->description($newval)
298
Returns : value of description
299
Args : newvalue (optional)
305
my ($self,$value) = @_;
306
if( defined $value ) {
307
$self->{'_desc'} = $value;
309
return $self->{'_desc'};
316
Usage : $obj->rank($newval)
317
Function: Set the taxonomic rank
319
Returns : taxonomic rank of taxon
320
Args : newvalue (optional)
326
my ($self,$value) = @_;
327
if (defined $value) {
328
my $ranks=join("|",@RANK);
329
if ($value=~/$ranks/) {
330
$self->{'_rank'} = $value;
332
$self->throw("Attempted to set unknown taxonomic rank: $value.\n");
335
return $self->{'_rank'};
342
Usage : $obj->taxon($newtaxon)
343
Function: Set the name of the taxon
345
Returns : name of taxon
346
Args : newtaxon (optional)
351
# because internal taxa have names too...
353
my ($self,$value) = @_;
354
if( defined $value ) {
355
$self->{'_taxon'} = $value;
357
return $self->{'_taxon'};
364
Usage : $obj->id($newval)
367
Returns : value of id
368
Args : newvalue (optional)
374
my ($self,$value) = @_;
375
if( defined $value ) {
376
$self->{'_id'} = $value;
378
return $self->{'_id'};
385
# try to insure that everything is cleaned up
386
$self->SUPER::DESTROY();
387
if( defined $self->{'_desc'} &&
388
ref($self->{'_desc'}) =~ /ARRAY/i ) {
389
while( my ($nodeid,$node) = each %{ $self->{'_desc'} } ) {
390
$node->{'_ancestor'} = undef; # ensure no circular references
394
$self->{'_desc'} = {};
401
Usage : my $internalid = $taxon->internal_id
402
Function: Returns the internal unique id for this taxon
403
(a monotonically increasing number for this in-memory implementation
404
but could be a database determined unique id in other
412
return $_[0]->_creation_id;
419
Usage : $obj->_creation_id($newval)
420
Function: a private method signifying the internal creation order
421
Returns : value of _creation_id
422
Args : newvalue (optional)
428
my ($self,$value) = @_;
429
if( defined $value) {
430
$self->{'_creation_id'} = $value;
432
return $self->{'_creation_id'} || 0;
436
# The following methods are implemented by NodeI decorated interface
441
Usage : if( $node->is_Leaf )
442
Function: Get Leaf status
451
$rc = 1 if( ! defined $self->{'_desc'} ||
452
keys %{$self->{'_desc'}} == 0);
459
Usage : my $str = $taxon->to_string()
460
Function: For debugging, provide a taxon as a string
469
Usage : my $len = $taxon->height
470
Function: Returns the height of the tree starting at this
471
taxon. Height is the maximum branchlength.
472
Returns : The longest length (weighting branches with branch_length) to a leaf
480
return $self->{'_height'} if( defined $self->{'_height'} );
482
if( $self->is_Leaf ) {
483
if( !defined $self->branch_length ) {
484
$self->debug(sprintf("Trying to calculate height of a taxon when a taxon (%s) has an undefined branch_length",$self->id || '?' ));
487
return $self->branch_length;
490
foreach my $subnode ( $self->each_Descendent ) {
491
my $s = $subnode->height;
492
if( $s > $max ) { $max = $s; }
494
return ($self->{'_height'} = $max + ($self->branch_length || 1));
498
=head2 invalidate_height
500
Title : invalidate_height
501
Usage : private helper method
502
Function: Invalidate our cached value of the taxon's height in the tree
509
sub invalidate_height {
512
$self->{'_height'} = undef;
513
if( $self->ancestor ) {
514
$self->ancestor->invalidate_height;
521
Usage : @obj->classify()
522
Function: a method to return the classification of a species
523
Returns : name of taxon and ancestor's taxon recursively
524
Args : boolean to specify whether we want all taxa not just ranked
531
my ($self,$allnodes) = @_;
533
my @classification=($self->taxon);
536
while (defined $node->ancestor) {
537
push @classification, $node->ancestor->taxon if $allnodes==1;
538
$node=$node->ancestor;
541
return (@classification);
548
Usage : $obj->has_rank($rank)
549
Function: a method to query ancestors' rank
557
my ($self,$rank) = @_;
559
return $self if $self->rank eq $rank;
561
while (defined $self->ancestor) {
562
return $self if $self->ancestor->rank eq $rank;
563
$self=$self->ancestor;
573
Usage : $obj->has_taxon($taxon)
574
Function: a method to query ancestors' taxa
576
Args : Bio::Taxonomy::Taxon object
582
my ($self,$taxon) = @_;
585
((defined $self->id && $self->id == $taxon->id) ||
586
($self->taxon eq $taxon->taxon && $self->rank eq $taxon->rank));
588
while (defined $self->ancestor) {
590
((defined $self->id && $self->id == $taxon->id) ||
591
($self->taxon eq $taxon->taxon && $self->rank eq $taxon->rank) &&
592
($self->taxon ne 'no rank'));
593
$self=$self->ancestor;
600
=head2 distance_to_root
602
Title : distance_to_root
603
Usage : $obj->distance_to_root
604
Function: a method to query ancestors' taxa
605
Returns : number of links to root
611
sub distance_to_root {
612
my ($self,$taxon) = @_;
616
while (defined $self->ancestor) {
618
$self=$self->ancestor;
625
=head2 recent_common_ancestor
627
Title : recent_common_ancestor
628
Usage : $obj->recent_common_ancestor($taxon)
629
Function: a method to query find common ancestors
630
Returns : Bio::Taxonomy::Taxon of query or undef if no ancestor of rank
631
Args : Bio::Taxonomy::Taxon
636
sub recent_common_ancestor {
637
my ($self,$node) = @_;
639
while (defined $node->ancestor) {
640
my $common=$self->has_taxon($node);
641
return $common if defined $common;
642
$node=$node->ancestor;
651
Usage : $obj=$taxon->species;
652
Function: Returns a Bio::Species object reflecting the taxon's tree position
653
Returns : a Bio::Species object
662
if ($self->has_rank('subspecies') && $self->ancestor->rank eq 'species') {
663
$species = Bio::Species->new(-classification => $self->ancestor->classify);
664
$species->genus($self->ancestor->ancestor->taxon);
665
$species->species($self->ancestor->taxon);
666
$species->sub_species($self->taxon);
667
} elsif ($self->has_rank('species')) {
668
$species = Bio::Species->new(-classification => $self->classify);
669
$species->genus($self->ancestor->taxon);
670
$species->species($self->taxon);
672
$self->throw("Trying to create a species from a taxonomic entity without species rank. Use classify instead of species.\n");