1
# -*-Perl-*- Test Harness script for Bioperl
2
# $Id: Taxonomy.t 15178 2008-12-16 12:48:21Z sendu $
10
test_begin(-tests => 103,
11
-requires_module => 'XML::Twig');
13
use_ok('Bio::DB::Taxonomy');
14
use_ok('Bio::Tree::Tree');
17
my $temp_dir = test_output_dir();
19
# we're actually testing Bio::Taxon and Bio::DB::Taxonomy::* here, not
22
ok my $db_entrez = Bio::DB::Taxonomy->new(-source => 'entrez');
24
ok my $db_flatfile = Bio::DB::Taxonomy->new(-source => 'flatfile',
25
-directory => $temp_dir,
26
-nodesfile => test_input_file('taxdump', 'nodes.dmp'),
27
-namesfile => test_input_file('taxdump','names.dmp'),
31
foreach my $db ($db_entrez, $db_flatfile) {
33
test_skip(-tests => 38, -requires_networking => 1) if $db eq $db_entrez;
35
eval { $id = $db->get_taxonid('Homo sapiens');};
36
skip "Unable to connect to entrez database; no network or server busy?", 38 if $@;
40
# easy test on human, try out the main Taxon methods
41
ok $n = $db->get_taxon(9606);
43
is $n->object_id, $n->id;
44
is $n->ncbi_taxid, $n->id;
45
is $n->parent_id, 9605;
46
is $n->rank, 'species';
48
is $n->node_name, 'Homo sapiens';
49
is $n->scientific_name, $n->node_name;
50
is ${$n->name('scientific')}[0], $n->node_name;
52
my %common_names = map { $_ => 1 } $n->common_names;
53
is keys %common_names, 2;
54
ok exists $common_names{human};
55
ok exists $common_names{man};
57
is $n->division, 'Primates';
58
is $n->genetic_code, 1;
59
is $n->mitochondrial_genetic_code, 2;
60
# these are entrez-only, data not available in dmp files
61
if ($db eq $db_entrez) {
62
ok defined $n->pub_date;
63
ok defined $n->create_date;
64
ok defined $n->update_date;
67
# briefly test some Bio::Tree::NodeI methods
68
ok my $ancestor = $n->ancestor;
69
is $ancestor->scientific_name, 'Homo';
70
# unless set explicitly, Bio::Taxon doesn't return anything for
71
# each_Descendent; must ask the database directly
72
ok my @children = $ancestor->db_handle->each_Descendent($ancestor);
75
sleep(3) if $db eq $db_entrez;
77
# do some trickier things...
78
ok my $n2 = $db->get_Taxonomy_Node('89593');
79
is $n2->scientific_name, 'Craniata';
81
# briefly check we can use some Tree methods
82
my $tree = Bio::Tree::Tree->new();
83
is $tree->get_lca($n, $n2)->scientific_name, 'Craniata';
85
# can we actually form a Tree and use other Tree methods?
86
ok $tree = Bio::Tree::Tree->new(-node => $n);
87
is $tree->number_nodes, 30;
88
is $tree->get_nodes, 30;
89
is $tree->find_node(-rank => 'genus')->scientific_name, 'Homo';
91
# check that getting the ancestor still works now we have explitly set the
92
# ancestor by making a Tree
93
is $n->ancestor->scientific_name, 'Homo';
95
sleep(3) if $db eq $db_entrez;
97
ok $n = $db->get_Taxonomy_Node('1760');
98
is $n->scientific_name, 'Actinobacteria';
100
sleep(3) if $db eq $db_entrez;
102
# entrez isn't as good at searching as flatfile, so we have to special-case
103
my @ids = $db->get_taxonids('Chloroflexi');
104
$db eq $db_entrez ? (is @ids, 1) : (is @ids, 2);
105
$id = $db->get_taxonids('Chloroflexi (class)');
108
@ids = $db->get_taxonids('Rhodotorula');
110
@ids = $db->get_taxonids('Rhodotorula <Microbotryomycetidae>');
116
# Test the list database
117
my @ranks = qw(superkingdom class genus species);
118
my @h_lineage = ('Eukaryota', 'Mammalia', 'Homo', 'Homo sapiens');
119
my $db_list = Bio::DB::Taxonomy->new(-source => 'list', -names => \@h_lineage,
123
ok my $h_list = $db_list->get_taxon(-name => 'Homo sapiens');
124
ok my $h_flat = $db_flatfile->get_taxon(-name => 'Homo sapiens');
126
is $h_list->ancestor->scientific_name, 'Homo';
128
my @names = $h_list->common_names;
130
$h_list->common_names('woman');
131
@names = $h_list->common_names;
133
@names = $h_flat->common_names;
136
# you can switch to another database when you need more information, which also
137
# merges information in the node from the two different dbs
138
$h_list->db_handle($db_flatfile);
139
@names = $h_list->common_names;
142
# form a tree with the list lineage first, preventing a subsequent database
143
# change from giving us all those extra ranks
144
$h_list->db_handle($db_list);
145
my $ancestors_ancestor = $h_list->ancestor->ancestor;
146
is $ancestors_ancestor->scientific_name, 'Mammalia';
148
my $tree = Bio::Tree::Tree->new(-node => $h_list);
149
$h_list->db_handle($db_flatfile);
150
$ancestors_ancestor = $h_list->ancestor->ancestor;
151
is $ancestors_ancestor->scientific_name, 'Mammalia';
153
# or we can get the flatfile database's idea of the ancestors by removing
154
# ourselves from the tree
155
is $h_flat->ancestor->ancestor->scientific_name, 'Homo/Pan/Gorilla group';
156
$h_list->ancestor(undef);
157
is $h_list->ancestor->ancestor->scientific_name, 'Homo/Pan/Gorilla group';
159
# get_lca should work on nodes from different databases
161
test_skip(-tests => 5, -requires_networking => 1);
162
$h_flat = $db_flatfile->get_taxon(-name => 'Homo');
164
eval { $h_entrez = $db_entrez->get_taxon(-name => 'Homo sapiens');};
165
skip "Unable to connect to entrez database; no network or server busy?", 5 if $@;
167
ok my $tree_functions = Bio::Tree::Tree->new();
168
is $tree_functions->get_lca($h_flat, $h_entrez)->scientific_name, 'Homo';
170
# even though the species taxa for Homo sapiens from list and flat databases
171
# have the same internal id, get_lca won't work because they have different
172
# roots and descendents
173
$h_list = $db_list->get_taxon(-name => 'Homo sapiens');
174
is $h_list->ancestor->internal_id, $h_flat->internal_id;
175
ok ! $tree_functions->get_lca($h_flat, $h_list);
177
# but we can form a tree with the flat node then remove all the ranks we're
178
# not interested in and try again
179
$tree = Bio::Tree::Tree->new(-node => $h_flat);
180
$tree->splice(-keep_rank => \@ranks);
181
is $tree->get_lca($h_flat, $h_list)->scientific_name, 'Homo';
184
# ideas from taxonomy2tree.PLS that let us make nice tree, using
185
# Bio::Tree::TreeFunctionsI methods; this is a weird and trivial example just
186
# because our test flatfile database only has the full lineage of one species
188
for my $name ('Human', 'Hominidae') {
189
my $ncbi_id = $db_flatfile->get_taxonid($name);
191
my $node = $db_flatfile->get_taxon(-taxonid => $ncbi_id);
194
$tree->merge_lineage($node);
197
ok $tree = Bio::Tree::Tree->new(-node => $node);
201
is $tree->get_nodes, 30;
202
$tree->contract_linear_paths;
203
my $ids = join(",", map { $_->id } $tree->get_nodes);
204
is $ids, '131567,9606';
206
# we can recursively fetch all descendents of a taxon
208
test_skip(-tests => 1, -requires_networking => 1);
209
eval {$db_entrez->get_taxon(10090);};
210
skip "Unable to connect to entrez database; no network or server busy?", 1 if $@;
212
my $lca = $db_entrez->get_taxon(314146);
213
my @descs = $db_entrez->get_all_Descendents($lca);
218
$db_list = Bio::DB::Taxonomy->new(-source => 'list',
220
(split(/,\s+/, "cellular organisms, Eukaryota, Fungi/Metazoa group,
221
Metazoa, Eumetazoa, Bilateria, Coelomata, Protostomia, Panarthropoda,
222
Arthropoda, Mandibulata, Pancrustacea, Hexapoda, Insecta, Dicondylia,
223
Pterygota, Neoptera, Endopterygota, Diptera, Nematocera, Culicimorpha,
224
Culicoidea, Culicidae, Anophelinae, Anopheles, Anopheles, Angusticorn,
225
Anopheles, maculipennis group, maculipennis species complex, Anopheles daciae"))]);
227
my @taxonids = $db_list->get_taxonids('Anopheles');
230
# but we should still be able to merge in an incomplete lineage of a sister
231
# species and have the 'tree' remain consistent:
233
# missing 'no rank' Anopheles
234
$db_list->add_lineage(-names => [
235
(split(/,\s+/, "Anophelinae, Anopheles, Anopheles, Angusticorn,
236
maculipennis group, maculipennis species complex, Anopheles labranchiae"))]);
237
my $node = $db_list->get_taxon(-name => 'Anopheles labranchiae');
238
is $node->ancestor->ancestor->ancestor->ancestor->ancestor->ancestor->ancestor->scientific_name, 'Anophelinae';
240
# missing 'subgenus' Anopheles
241
$db_list->add_lineage(-names => [
242
(split(/,\s+/, "Anophelinae, Anopheles, Angusticorn, Anopheles,
243
maculipennis group, maculipennis species complex, Anopheles maculipennis"))]);
244
$node = $db_list->get_taxon(-name => 'Anopheles maculipennis');
245
is $node->ancestor->ancestor->ancestor->ancestor->ancestor->ancestor->ancestor->scientific_name, 'Anophelinae';
247
# missing 'no rank' Angusticorn
248
$db_list->add_lineage(-names => [
249
(split(/,\s+/, "Anophelinae, Anopheles, Anopheles, Anopheles,
250
maculipennis group, maculipennis species complex, Anopheles melanoon"))]);
251
$node = $db_list->get_taxon(-name => 'Anopheles melanoon');
252
is $node->ancestor->ancestor->ancestor->ancestor->scientific_name, 'Angusticorn';
254
@taxonids = $db_list->get_taxonids('Anopheles');