6
use vars qw($DEBUG $NUMTESTS $ASNOK);
7
$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
10
eval { require Test; };
16
require Bio::ASN1::EntrezGene;
21
warn "Bio::ASN1::EntrezGene not installed, skipping tests\n";
23
plan tests => ($NUMTESTS = 1003);
27
foreach ( $Test::ntest..$NUMTESTS) {
28
skip('Cannot complete entrezgene tests',1);
32
exit(0) unless $ASNOK;
36
my @species=('Homo sapiens','Mus musculus', 'Caenorhabditis elegans');
37
my @pubmed=qw(15461460
86
my %asym=(1=>['A1B', 'ABG', 'GAB', 'HYST2477', 'DKFZp686F0970'],
87
2=>['FWP007','S863-7','DKFZp779B086'], 4=>['A12M1'], 5=>['A12M2'],6=>['A12M3'],7=>['A12M4'],
88
9=>['AAC1'],10=>['AAC2'],11=>['NATP'],
89
12=>['ACT','AACT','MGC88254'],13=>['DAC'],15=>['SNAT','AA-NAT'],
91
11287=>['A1m','A2m','MAM'],
92
11298=>['Nat4','SNAT','Nat-2'],
93
11302=>['AATYK','mKIAA0641'],11303=>['Abc1'],
94
11304=>['RmP','Abcr','Abc10','D430003I15Rik'],
95
11305=>['Abc2','mKIAA1062','D2H0S1474E'],
97
171590=>['Y74C9A.3','CELK05052'],
98
171591=>['Y74C9A.2','CELK01753'],
99
171592=>['Y74C9A.4a','Y74C9A.4b','CELK08126'],
100
171593=>['Y74C9A.5','CELK09643'],
101
171594=>['Y48G1C.4','CELK05819']);
145
my @revkeys=('Entrez Gene Status','RefSeq status','Official Full Name','chromosome','cyto','Reference','dblink',
146
'ALIAS_SYMBOL','OntologyTerm','Index terms','Official Symbol','cM','Property');
149
my $eio=new Bio::SeqIO(-file=>Bio::Root::IO->catfile("t","data",
150
"entrezgene.dat"),-format=>'entrezgene', -debug=>'on',-service_record=>'yes');
152
my ($seq,$struct,$uncapt);
155
($seq,$struct,$uncapt)=$eio->next_seq;
160
ok ref($struct),'Bio::Cluster::SequenceFamily';
161
my $acc=$seq->accession_number;
164
my $org=$seq->species->binomial;
165
ok grep(/\b$org\b/,@species),1,$org;
168
ok $seq->desc if ($acc eq '1')||($acc eq '2')||($acc eq '11304');
169
ok !defined $seq->desc if ($acc eq '171592')||($acc eq '11306');
171
#Are we supposed to have this in our test?
172
ok grep(/\b$acc\b/,@ids),1;
174
my $ann=$seq->annotation();
177
#T3: ENTREZGENE STATUS TESTS
178
my @egstatus=$ann->get_Annotations('Entrez Gene Status');
179
foreach my $status (@egstatus) {
181
if ($acc==1) {ok $status->value,'live'; last STATUS;}
182
if ($acc==2) {ok $status->value,'live'; last STATUS;}
183
if ($acc==4) {ok $status->value,'discontinued'; last STATUS;}
184
if ($acc==6) {ok $status->value,'discontinued'; last STATUS;}
185
if ($acc==11288) {ok $status->value,'secondary'; last STATUS;}
186
if ($acc==11293) {ok $status->value,'secondary'; last STATUS;}
187
if ($acc==171594) {ok $status->value,'live'; last STATUS;}
191
#T4: REFSEQ STATUS TESTS
192
my @refstatus=$ann->get_Annotations('RefSeq status');
193
foreach my $status (@refstatus) {
195
if ($acc==1) {ok $status->value,'REVIEWED'; last STATUS;}
196
if ($acc==2) {ok $status->value,'REVIEWED'; last STATUS;}
197
if ($acc==3) {ok $status->value,'PROVISIONAL'; last STATUS;}
198
if ($acc==4) {ok $status->value,'WITHDRAWN'; last STATUS;}
199
if ($acc==9) {ok $status->value,'VALIDATED'; last STATUS;}
200
if ($acc==11300) {ok $status->value,''; last STATUS;}
201
if ($acc==11306) {ok $status->value,'MODEL'; last STATUS;}
202
if ($acc==11293) {ok $status->value,'secondary'; last STATUS;}
203
if ($acc==171594) {ok $status->value,'Reviewed'; last STATUS;}
208
my @ofname=$ann->get_Annotations('Official Full Name');
209
foreach my $name (@ofname) {
211
if ($acc==10) {ok $name->value,'N-acetyltransferase 2 (arylamine N-acetyltransferase)'; last STATUS;}
212
if ($acc==13) {ok $name->value,'arylacetamide deacetylase (esterase)'; last STATUS;}
213
if ($acc==14) {ok $name->value,'angio-associated, migratory cell protein'; last STATUS;}
214
if ($acc==11287) {ok $name->value,'pregnancy zone protein'; last STATUS;}
215
if ($acc==11298) {ok $name->value,'arylalkylamine N-acetyltransferase'; last STATUS;}
216
if ($acc==11304) {ok $name->value,'ATP-binding cassette, sub-family A (ABC1), member 4'; last STATUS;}
217
if ($acc==11306) {ok $name->value,'ATP-binding cassette, sub-family B (MDR/TAP), member 7'; last STATUS;}
221
#T6: CHROMOSOME TESTS
222
my @chr=$ann->get_Annotations('chromosome');
223
foreach my $chr (@chr) {
225
if ($acc==5) {ok $chr->value,1; last STATUS;}
226
if ($acc==6) {ok $chr->value,1; last STATUS;}
227
if ($acc==7) {ok $chr->value,17; last STATUS;}
228
if ($acc==11306) {ok $chr->value,'X'; last STATUS;}
229
if ($acc==11304) {ok $chr->value,3; last STATUS;}
230
if ($acc==171590) {ok $chr->value,'I'; last STATUS;}
231
if ($acc==171592) {ok $chr->value,'I'; last STATUS;}
235
#T7: GENE SYMBOL ALIAS TESTS
236
my @sym=$ann->get_Annotations('ALIAS_SYMBOL');
237
foreach my $sym (@sym) {
238
next if (($sym eq '')||!defined($sym));
239
ok grep(/\b$sym\b/,@{$asym{$acc}}),1;
242
#T8: CYTO LOCATION TESTS
243
my @map=$ann->get_Annotations('cyto');
244
foreach my $map (@map) {
247
if ($acc==10) {ok $map->value,'8p22'; last STATUS;}
248
if ($acc==11) {ok $map->value,'8p22'; last STATUS;}
249
if ($acc==13) {ok $map->value,'3q21.3-q25.2'; last STATUS;}
250
if ($acc==11306) {ok $map->value,'X C-D'; last STATUS;}
251
if ($acc==11305) {ok $map->value,'2 A2-B'; last STATUS;}
252
if ($acc==11304) {ok $map->value,'3 G1'; last STATUS;}
253
if ($acc==11303) {ok $map->value,'4 A5-B3'; last STATUS;}
257
#T9: REFERENCE NUMBER TEST
258
my @refs=$ann->get_Annotations('Reference');
259
my $refs=$#refs+1||0;
260
ok $pmed{$acc},$refs;
263
my @dblinks=$ann->get_Annotations('dblink');
264
my @keys=$ann->get_all_annotation_keys;
266
#T10: GENERIF AND OTHER DBLINK TESTS
267
my @url=qw(HGMD Ensembl KEGG Homologene);#Only validate the URL
268
foreach my $dblink (@dblinks) {
269
my $dbname=$dblink->database||'';
271
if ( $dbname eq 'generif') {#Should have ID and text
272
ok $dblink->primary_id;
273
ok $dblink->comment->text;
277
if (($dbname eq 'MIM')&&($dblink->authority)&&($dblink->authority eq 'phenotype')) {
278
ok $dblink->optional_id;
281
if ($dbname eq 'Evidence viewer') {
282
ok $dblink->url; #We may even validate the urls?
283
ok $dblink->primary_id,2;
286
if ($dbname eq 'Model maker') {
287
ok $dblink->url; #We may even validate the urls?
288
ok $dblink->primary_id,2;
291
if ($dbname eq 'AceView') {
292
ok $dblink->url; #We may even validate the urls?
293
ok $dblink->primary_id,2;
296
if (grep(/$dbname/,@url)) {
297
ok $dblink->url; #We may even validate the urls?
300
if ($dbname eq 'GDB') {
301
ok $dblink->primary_id,'GDB:119639'; #We may even validate the urls?
304
if ($dbname eq 'UniGene') {
305
ok $dblink->url; #We may even validate the urls?
306
ok $dblink->primary_id,'Hs.212838';
309
if ($dbname eq 'PharmGKB') {
310
ok $dblink->primary_id,'PA24357';
313
if ($dbname eq 'MGC') {
314
ok $dblink->url; #We may even validate the urls?
315
ok $dblink->primary_id,'BC040071';
322
#T11: SOME EXTERNAL DATABASE IDS TESTS
323
foreach my $key (@keys) {
324
next if grep(/\b$key\b/, @revkeys);
325
my @all=$ann->get_Annotations($key);
326
#Checking xref to some databases- OMIM, Wormbase and HGNC, others later
327
foreach my $pid (@all) {
329
if (($acc==8)&&($key eq 'MIM')) {ok $pid->value,'108985'; last DBID;}
330
if (($acc==9)&&($key eq 'HGNC')) {ok $pid->value,'7645'; last DBID;}
331
if (($acc==11298)&&($key eq 'MGI')) {ok $pid->value,'1328365'; last DBID;}
332
if (($acc==171593)&&($key eq 'AceView/WormGenes')) {ok $pid->value,'1A502'; last DBID;}
333
if (($acc==171594)&&($key eq 'WormBase')) {ok $pid->value,'Y48G1C.4'; last DBID;}
338
#T12: REFERENCE RECORD TEST
340
foreach my $ref (@refs) {
341
my $pmed=$ref->medline;
342
ok grep(/\b$pmed\b/,@pubmed),1;
346
#T13/14: STS Markers and Gene Ontology
347
my @syn=('MGI:707739','MPC786');
348
my @evid=qw(IEA TAS ISS);
350
$go{11305}=['5524', '16887', '5215', '8203', '6810', '16021' ,'5765'];
351
$go{11298}=['8080', '8415', '4060', '16740'];
352
$pmeds{11305}=['12466851'];
353
my @types=qw(Function Component Process);
354
if (($acc==11305)||($acc==11298)) { #Let's check just this two...
355
foreach my $ot ($ann->get_Annotations('OntologyTerm')) {
356
if (($ot->term->authority)&&($ot->term->authority eq 'STS marker')) {
358
ok $ot->name,'AI413825';
359
ok $ot->term->namespace,'UniSTS';
360
ok $ot->identifier,158928;
363
ok $ot->name,'D11Mit102';
364
ok $ot->term->namespace,'UniSTS';
365
ok $ot->identifier,126289;
366
foreach my $syn ($ot->get_synonyms) {
367
ok grep(/\b$syn\b/,@syn),1;
372
my $evid=$ot->comment;
373
$evid=~s/evidence: //i;
374
my $type=$ot->ontology->name;
375
my @ref=$ot->term->get_references;
376
my $id=$ot->identifier;
377
my $thispmed=$ref[0]->medline if (@ref);
378
ok grep(/\b$type\b/,@types),1;
379
ok grep(/\b$id\b/,@{$go{$acc}}),1;
380
ok grep(/\b$thispmed\b/,@{$pmeds{$acc}}),1 if ($thispmed);
385
#T15/16/17: GENOMIC LOCATION TESTS/SEQUENCE TYPES TESTS/CONSERVED DOMAINS TESTS
386
my @gffs=('SEQ entrezgene gene location 63548355 63556668 . + .',
387
'SEQ entrezgene genestructure 63548355 63556668 . + .',
388
'SEQ entrezgene gene location 31124733 31133046 . + .',
389
'SEQ entrezgene genestructure 31124733 31133046 . + .',
390
'SEQ entrezgene gene location 8163589 8172398 . + .',
391
'SEQ entrezgene genestructure 8163589 8172398 . + .');
392
my @contigs=$struct->get_members;
393
my @auth=('mrna','genomic','product','mrna sequence','protein');#Known types....
394
foreach my $contig (@contigs) {
395
my $stype=$contig->authority;
396
ok grep(/^$stype$/i,@auth),1;
397
if ($acc==1) {#Do just 1?
398
if (($contig->authority eq 'genomic')||($contig->authority eq 'Genomic')) {
399
foreach my $sf ($contig->get_SeqFeatures) {
400
$sf->source_tag('entrezgene');
401
my $gff=$sf->gff_string;
403
foreach my $gffstr (@gffs) {
404
if ($gffstr eq $gff) {
411
if ($contig->authority eq 'Product') {
412
ok $contig->id,'NP_570602';
413
ok $contig->accession_number,21071030;
414
foreach my $sf ($contig->get_SeqFeatures) {
415
foreach my $dblink ($sf->annotation->get_Annotations('dblink')) {
416
my $key=$dblink->{_anchor}?$dblink->{_anchor}:$dblink->optional_id;
417
my $db=$dblink->database;
418
next unless (($db =~/cdd/i)||($sf->primary_tag=~ /conserved/i));
421
($key,$desc)=split(/:/,$key);
423
$desc=~s/^\s+//;#THIS SHOULD GO IN entrezgene.pm!!!
424
ok $desc,'IGc2; Immunoglobulin C-2 Type';
425
ok $key,'smart00408';
438
#, -locuslink=>'convert');
439
#See if we can convert to locuslink
440
#T18: BACKCOMPATIBILITY TESTS
441
my @llsp =('OFFICIAL_GENE_NAME','CHR','MAP','OFFICIAL_SYMBOL');
442
my $eio_b=new Bio::SeqIO(-file=>Bio::Root::IO->catfile("t","data",
443
"entrezgene.dat"),-format=>'entrezgene', -debug=>'on',-service_record=>'yes',-locuslink=>'convert');
445
while (my $seq=$eio_b->next_seq) {
447
my $acc=$seq->accession_number;
448
ok grep(/\b$acc\b/,@ids),1;
449
my $ann=$seq->annotation;
450
last if ($acc==4);#3 is enough? and 4 does not have gene name, so....
451
foreach my $key (@llsp) {
452
my @vals=$ann->get_Annotations($key);