1
# -*-Perl-*- Test Harness script for Bioperl
2
# $Id: EUtilities.t 15112 2008-12-08 18:12:38Z sendu $
11
$NUMTESTS = 4; # base number of tests (those not in blocks)
13
# I have set up eutils tests to run in sections for easier test maintenance
14
# and keeping track of problematic tests. The below hash is the list of
15
# tests, with test number and coderef.
17
# these now run very simple tests for connectivity and data sampling
18
# main tests now with the parser
21
'efetch' => {'tests' => 5,
23
'epost' => {'tests' => 11,
25
'esummary' => {'tests' => 254,
27
'esearch' => {'tests' => 13,
29
'einfo' => {'tests' => 10,
31
'elink1' => {'tests' => 8,
33
'egquery' => {'tests' => 4,
36
$NUMTESTS += $EUTILS{$_}->{'tests'} for (keys %EUTILS);
37
$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
38
# this seems to work for perl 5.6 and perl 5.8
42
test_begin(-tests => $NUMTESTS,
43
-requires_modules => [qw(XML::Simple LWP::UserAgent)],
44
-requires_networking => 1,
47
use_ok('Bio::DB::EUtilities');
48
use_ok('LWP::UserAgent');
49
use_ok('Bio::Tools::EUtilities');
50
use_ok('Bio::Tools::EUtilities::EUtilParameters');
53
# NOTE : Bio::DB::EUtilities is just a specialized pipeline to get any
54
# data available via NCBI's Entrez interface, with a few convenience methods
55
# to get UIDs and other additional information. All data returned
56
# using EFetch is raw (not Bioperl objects) and is meant to be piped into
57
# other Bioperl modules at a later point for further processing
60
my @acc = qw(MUSIGHBA1 P18584 CH402638);
63
my @ids = sort qw(1621261 89318838 68536103 20807972 730439);
66
my $term = 'dihydroorotase AND human';
68
my ($eutil, $response);
70
my %dbs = (taxonomy => 1,
73
my %links = (protein_taxonomy => 1,
74
protein_nucleotide => 1,
75
protein_nucleotide_wgs => 1,
77
protein_pubmed_refseq => 1
80
# this loops through the required tests, only running what is in %EUTILS
81
for my $test (keys %EUTILS) {
82
$EUTILS{$test}->{'sub'}->();
89
$eutil = Bio::DB::EUtilities->new(
95
isa_ok($eutil, 'Bio::DB::GenericWebAgent');
96
eval {$response = $eutil->get_Response; };
97
skip("EFetch HTTP error: $@", 4) if $@;
98
isa_ok($response, 'HTTP::Response');
99
my $content = $response->content;
100
like($content, qr(PYRR \[Mycobacterium tuberculosis H37Rv\]),
101
'EFetch: Fasta format');
103
# reuse the EUtilities webagent
104
$eutil->parameter_base->id([$ids[1]]);
105
$eutil->parameter_base->rettype('gb');
106
eval {$response = $eutil->get_Response; };
107
skip("EFetch HTTP error: $@", 2) if $@;
108
isa_ok($response, 'HTTP::Response');
109
$content = $response->content;
110
like($content, qr(^LOCUS\s+NP_623143),'EFetch: GenBank format');
114
# EPost->EFetch with History
118
$eutil = Bio::DB::EUtilities->new(
124
isa_ok($eutil, 'Bio::DB::GenericWebAgent');
125
eval {$response = $eutil->get_Response; };
126
skip("EPost HTTP error: $@", 10) if $@;
127
isa_ok($response, 'HTTP::Response');
128
# Any parameters are passed in to the parser, so these should be set.
129
# Databases and IDs always default back to the submitted ones unless
130
# the data being retrieved are IDs or contain new IDs (esearch, elink)
132
is($eutil->get_database, 'protein', '$epost->get_database()');
133
is(join(',',$eutil->get_ids), '1621261,20807972,68536103,730439,89318838', '$epost->get_ids()');
135
# these are not set using epost
136
is($eutil->get_count, undef, '$epost->get_count()');
137
is($eutil->get_term, undef, '$epost->get_term()');
139
my $history = $eutil->next_History;
140
is($history->eutil, 'epost', 'History->eutil()');
141
isa_ok($history, 'Bio::Tools::EUtilities::HistoryI');
143
# check the actual History
144
my ($webenv, $key) = $history->history;
145
like($webenv, qr{^\S{50}}, '$epost WebEnv');
146
like($key, qr{^\d+}, '$epost query key');
148
# can we fetch the sequences?
149
$eutil->set_parameters(
151
-history => $history,
154
# look for fasta headers
156
eval{ $r = $eutil->get_Response->content;};
157
skip("EPost HTTP error", 1) if $@;
158
$t = grep m{^>.*$}, split("\n", $r);
159
is($t, 5, 'EPost to EFetch');
166
my %docsum = (1621261=> { 'Caption' => ['String','CAB02640'],
167
'Title' => ['String','PROBABLE PYRIMIDINE OPERON REGULATORY PROTEIN PYRR '.
168
'[Mycobacterium tuberculosis H37Rv]'],
169
'Extra' => ['String','gi|1621261|emb|CAB02640.1|[1621261]'],
170
'Gi' => ['Integer','1621261'],
171
'CreateDate' => ['String','2003/11/21'],
172
'UpdateDate' => ['String','2005/04/17'],
173
'Flags' => ['Integer',''],
174
'TaxId' => ['Integer','83332'],
175
'Length' => ['Integer','193'],
176
'Status' => ['String','live'],
177
'ReplacedBy' => ['String',''],
178
'Comment' => ['String',''], },
179
20807972 => {'Caption' => ['String','NP_623143'],
180
'Title' => ['String','pyrimidine regulatory protein PyrR '.
181
'[Thermoanaerobacter tengcongensis MB4]'],
182
'Extra' => ['String','gi|20807972|ref|NP_623143.1|[20807972]'],
183
'Gi' => ['Integer','20807972'],
184
'CreateDate' => ['String','2002/05/09'],
185
'UpdateDate' => ['String','2005/12/03'],
186
'Flags' => ['Integer','512'],
187
'TaxId' => ['Integer','273068'],
188
'Length' => ['Integer','178'],
189
'Status' => ['String','live'],
190
'ReplacedBy' => ['String',''],
191
'Comment' => ['String',''], },
192
68536103 => {'Caption' => ['String','YP_250808'],
193
'Title' => ['String','putative pyrimidine operon regulatory protein '.
194
'[Corynebacterium jeikeium K411]'],
195
'Extra' => ['String','gi|68536103|ref|YP_250808.1|[68536103]'],
196
'Gi' => ['Integer','68536103'],
197
'CreateDate' => ['String','2005/07/04'],
198
'UpdateDate' => ['String','2006/03/30'],
199
'Flags' => ['Integer','512'],
200
'TaxId' => ['Integer','306537'],
201
'Length' => ['Integer','195'],
202
'Status' => ['String','live'],
203
'ReplacedBy' => ['String',''],
204
'Comment' => ['String',''], },
205
730439 => {'Caption' => ['String','P41007'],
206
'Title' => ['String','PyrR bifunctional protein '.
207
'[Includes: Pyrimidine operon regulatory protein; '.
208
'Uracil phosphoribosyltransferase (UPRTase)]'],
209
'Extra' => ['String','gi|730439|sp|P41007|PYRR_BACCL[730439]'],
210
'Gi' => ['Integer','730439'],
211
'CreateDate' => ['String','1995/02/01'],
212
'UpdateDate' => ['String','2006/07/25'],
213
'Flags' => ['Integer',''],
214
'TaxId' => ['Integer','1394'],
215
'Length' => ['Integer','179'],
216
'Status' => ['String','live'],
217
'ReplacedBy' => ['String',''],
218
'Comment' => ['String',''] },
219
89318838 => { 'Caption' => ['String','EAS10332'],
220
'Title' => ['String','Phosphoribosyltransferase '.
221
'[Mycobacterium gilvum PYR-GCK]'],
222
'Extra' => ['String','gi|89318838|gb|EAS10332.1|[89318838]'],
223
'Gi' => ['Integer','89318838'],
224
'CreateDate' => ['String','2006/03/09'],
225
'UpdateDate' => ['String','2006/03/09'],
226
'Flags' => ['Integer',''],
227
'TaxId' => ['Integer','350054'],
228
'Length' => ['Integer','193'],
229
'Status' => ['String','live'],
230
'ReplacedBy' => ['String',''],
231
'Comment' => ['String',''] } );
233
$eutil = Bio::DB::EUtilities->new(
234
-eutil => 'esummary',
238
isa_ok($eutil, 'Bio::DB::GenericWebAgent');
240
eval {$response = $eutil->get_Response; };
241
skip("ESummary HTTP error:$@", 253) if $@;
242
isa_ok($response, 'HTTP::Response');
244
my @docs = $eutil->get_DocSums();
245
is(scalar(@docs), 5, '$esum->get_DocSums()');
248
while (my $ds = $eutil->next_DocSum) {
249
isa_ok($ds, 'Bio::Tools::EUtilities::Summary::DocSum');
251
my $id = $ds->get_id();
252
ok(exists($docsum{$id}), '$docsum->get_id()');
254
my %items = %{ $docsum{$id} };
256
# iterate using item names
258
for my $name ($ds->get_all_names()) {
260
my ($it) = $ds->get_Items_by_name($name);
261
ok(exists $items{$name},'DocSum Name exists');
262
is($it->get_name, $name, 'get_name(),DocSum Name');
263
is($ds->get_type_by_name($name), $items{$name}->[0],
264
'get_type_by_name() from DocSum');
265
is($it->get_type, $items{$name}->[0], 'get_type() from Item');
272
# ESearch, ESearch History
276
$eutil = Bio::DB::EUtilities->new(
283
isa_ok($eutil, 'Bio::DB::GenericWebAgent');
284
eval {$response = $eutil->get_Response; };
285
skip("ESearch HTTP error:$@", 12) if $@;
286
isa_ok($response, 'HTTP::Response');
288
# can't really check for specific ID's but can check total ID's returned
289
my @esearch_ids = $eutil->get_ids;
290
is(scalar(@esearch_ids), 100, '$esearch->get_ids()');
292
cmp_ok($eutil->get_count, '>', 117, '$esearch->get_count()');
295
$eutil = Bio::DB::EUtilities->new(
303
eval {$response = $eutil->get_Response; };
304
skip("ESearch HTTP error:$@", 9) if $@;
305
is($eutil->eutil, 'esearch', 'eutil()');
306
is($eutil->get_database, 'protein', 'get_database()');
307
cmp_ok($eutil->get_count, '>', 117, 'get_count()');
308
is($eutil->get_term, $term, 'get_term()');
309
is($eutil->get_ids, 100, 'History->get_ids()');
311
my $history = $eutil->next_History;
312
isa_ok($history, 'Bio::Tools::EUtilities::HistoryI');
314
# check the actual data
315
my ($webenv, $key) = $history->history;
316
like($webenv, qr{^\S{50}}, 'WebEnv');
317
like($key, qr{^\d+}, 'query key');
319
# can we fetch the sequences?
320
$eutil->set_parameters(
322
-history => $history,
326
# look for fasta headers
328
eval{ $r = $eutil->get_Response->content;};
329
skip("EPost HTTP error", 1) if $@;
330
$t = grep m{^>.*$}, split("\n", $r);
331
is($t, 5, 'EPost to EFetch');
339
$eutil = Bio::DB::EUtilities->new(
343
isa_ok($eutil, 'Bio::DB::GenericWebAgent');
344
eval {$response = $eutil->get_Response; };
345
skip("EInfo HTTP error:$@", 10) if $@;
346
isa_ok($response, 'HTTP::Response');
347
like($response->content, qr(<eInfoResult>), 'EInfo response');
348
is(($eutil->get_database)[0], 'protein', '$einfo->get_database()');
349
like($eutil->get_last_update, qr(\d{4}\/\d{2}\/\d{2}\s\d{2}:\d{2}),
350
'$einfo->get_last_update()');
351
cmp_ok($eutil->get_record_count, '>', 9200000, '$einfo->get_record_count()');
352
is($eutil->get_description, 'Protein sequence record', '$einfo->get_description()');
353
my @links = $eutil->get_LinkInfo;
354
my @fields = $eutil->get_FieldInfo;
355
cmp_ok(scalar(@links), '>',30, '$einfo->get_LinkInfo()');
356
cmp_ok(scalar(@fields), '>',24, '$einfo->get_FieldInfo()');
358
# all databases (list)
359
$eutil = Bio::DB::EUtilities->new(
363
eval {$response = $eutil->get_Response; };
364
skip("EInfo HTTP error:$@", 1) if $@;
366
my @db = sort qw(pubmed protein nucleotide nuccore nucgss nucest structure
367
genome books cancerchromosomes cdd domains gene genomeprj gensat
368
geo gds homologene journals mesh ncbisearch nlmcatalog omia omim
369
pmc popset probe pcassay pccompound pcsubstance snp taxonomy toolkit
372
my @einfo_dbs = sort $eutil->get_databases;
373
cmp_ok(scalar(@einfo_dbs), '>=', scalar(@db), 'All EInfo databases');
378
# ELink - normal (single ID array) - single db - ElinkData tests
382
$eutil = Bio::DB::EUtilities->new(
385
-dbfrom => 'protein',
389
isa_ok($eutil, 'Bio::DB::GenericWebAgent');
390
eval {$response = $eutil->get_Response; };
391
skip("ELink HTTP error:$@", 7) if $@;
392
isa_ok($response, 'HTTP::Response');
393
like($response->content, qr(<eLinkResult>), 'ELink response');
394
# Data is too volatile to test; commenting for now...
395
#my @ids2 = qw(350054 306537 273068 83332 1394);
396
cmp_ok($eutil->get_ids, '>=', 4);
397
#is_deeply([sort $eutil->get_ids], [sort @ids2],'$elink->get_ids()');
400
is($eutil->get_LinkSets, 1, '$elink->get_LinkSets()');
401
my $linkobj = $eutil->next_LinkSet;
402
isa_ok($linkobj, 'Bio::Tools::EUtilities::Link::LinkSet');
403
is($linkobj->get_dbfrom, 'protein', '$linkdata->get_dbfrom()');
404
#is_deeply([sort $linkobj->elink_queryids],
405
# [sort @ids], '$linkdata->elink_queryids()');
406
my $db = $linkobj->get_dbto;
407
is($db, 'taxonomy', '$linkdata->get_dbto()');
408
#is_deeply([sort $linkobj->get_LinkIds_by_db($db)],
409
# [sort @ids2], '$linkdata->get_LinkIds_by_db($db)');
415
$eutil = Bio::DB::EUtilities->new(
420
isa_ok($eutil, 'Bio::DB::GenericWebAgent');
421
eval {$response = $eutil->get_Response; };
422
skip("EGQuery HTTP error:$@", 3) if $@;
423
isa_ok($response, 'HTTP::Response');
424
like($response->content, qr(<eGQueryResult>), 'EGQuery response');
425
my @gq = $eutil->get_GlobalQueries;
426
cmp_ok(scalar(@gq), '>=', 30, 'get_GlobalQueries')