1
# -*-Perl-*- Test Harness script for Bioperl
2
# $Id: Genpred.t 15112 2008-12-08 18:12:38Z sendu $
10
test_begin(-tests => 185);
12
use_ok('Bio::Tools::Fgenesh');
13
use_ok('Bio::Tools::Genscan');
14
use_ok('Bio::Tools::Genemark');
15
use_ok('Bio::Tools::Glimmer');
16
use_ok('Bio::Tools::MZEF');
21
my $genscan = Bio::Tools::Genscan->new('-file' => test_input_file('genomic-seq.genscan'));
25
my $seqin = Bio::SeqIO->new('-file' => test_input_file('genomic-seq.fasta'),
26
'-format' => "fasta");
28
my $seq = $seqin->next_seq();
32
# scan through the report
35
my ($prtseq, $cds, $tr_cds);
36
while(my $gene = $genscan->next_prediction()) {
37
$gene->attach_seq($seq) if $seq;
41
$fea = ($gene->exons())[0];
42
is $fea->strand(), -1,
43
"strand match (".$fea->strand()." and -1)";
44
$fea = ($gene->poly_A_site());
45
is $fea->score(), 1.05,
46
"score match (".$fea->score()." and 1.05)";
49
$fea = ($gene->exons("Initial"))[0];
51
"strand match (".$fea->strand()." and 1)";
52
is $fea->score(), 4.46,
53
"score match (".$fea->score()." and 4.46)";
56
my @exons = $gene->exons("Initial");
58
"initial exons ".scalar(@exons);
59
$fea = ($gene->exons())[0];
60
is $fea->score(), 1.74,
61
"score match ".$fea->score();
64
$prtseq = $gene->predicted_protein()->seq();
67
$tr_cds = $cds->translate()->seq();
69
is( lc($prtseq), lc($tr_cds),
70
"predicted and extracted protein seqs match");
74
# Genscan report with no genes predicted
75
my $null_genscan = Bio::Tools::Genscan->new('-file' => test_input_file('no-genes.genscan'));
77
my $no_gene = $null_genscan->next_prediction;
78
my @exons = $no_gene->exons;
82
my $mzef = Bio::Tools::MZEF->new('-file' => test_input_file('genomic-seq.mzef'));
86
my $gene = $mzef->next_prediction();
91
my $genemark = Bio::Tools::Genemark->new('-file' => test_input_file('genemark.out'));
93
my $gmgene = $genemark->next_prediction();
94
is $gmgene->seq_id(), "Hvrn.contig8";
95
is $genemark->analysis_date(), "Thu Mar 22 10:25:00 2001";
98
my @num_exons = (1,5,2,1,9,5,3,2,3,2,1,2,7);
99
while($gmgene = $genemark->next_prediction()) {
101
my @gmexons = $gmgene->exons();
102
is scalar(@gmexons), $num_exons[$i];
105
my $gmstart = $gmexons[0]->start();
108
my $gmend = $gmexons[0]->end();
113
# Genemark testing (prokaryotic gene fragment)
114
$genemark = Bio::Tools::Genemark->new('-file' => test_input_file('genemark-fragment.out'),
115
'-seqname' => 'AAVN02000021.1');
117
$gmgene = $genemark->next_prediction();
118
is $gmgene->seq_id(), 'AAVN02000021.1','Genemark tests';
119
is $gmgene->start(), 2;
120
is $gmgene->end(), 214;
121
is $gmgene->strand(), '1';
122
my ($gmexon) = $gmgene->exons();
123
isa_ok $gmexon->location(), 'Bio::Location::Fuzzy';
124
is $gmexon->location->start_pos_type(), 'BEFORE';
125
is $gmexon->location->max_start(), 2;
126
is $gmexon->location->end_pos_type(), 'EXACT';
127
is $gmexon->location->end(), 214;
129
$gmgene = $genemark->next_prediction();
130
is $gmgene->seq_id(), 'AAVN02000021.1';
131
is $gmgene->start, 459;
132
is $gmgene->end, 596;
133
is $gmgene->strand(), '1';
134
($gmexon) = $gmgene->exons();
135
isa_ok $gmexon->location, 'Bio::Location::Fuzzy';
136
is $gmexon->location->start_pos_type(), 'EXACT';
137
is $gmexon->location->start(), 459;
138
is $gmexon->location->end_pos_type(), 'AFTER';
139
is $gmexon->location->min_end(), 596;
141
# Glimmer testing (GlimmerM)
142
my $glimmer_m = Bio::Tools::Glimmer->new('-file' => test_input_file('GlimmerM.out'));
143
$gmgene = $glimmer_m->next_prediction;
146
is($gmgene->seq_id, 'gi|23613028|ref|NC_004326.1|');
147
is($gmgene->source_tag, 'GlimmerM_3.0');
148
is($gmgene->primary_tag, 'transcript');
149
is(($gmgene->get_tag_values('Group'))[0], 'GenePrediction1');
150
my @glim_exons = $gmgene->exons;
151
is(scalar (@glim_exons), 1);
152
is($glim_exons[0]->start, 461);
153
is($glim_exons[0]->end, 523);
154
is($glim_exons[0]->strand, -1);
155
is(($glim_exons[0]->get_tag_values('Group'))[0], 'GenePrediction1');
157
@num_exons = (0,1,3,1,4,2,5,2,8,3,5);
159
while($gmgene = $glimmer_m->next_prediction()) {
161
is(($gmgene->get_tag_values('Group'))[0],"GenePrediction$i");
162
@glim_exons = $gmgene->exons();
163
is scalar(@glim_exons), $num_exons[$i];
165
is $glim_exons[1]->start, 23910;
166
is $glim_exons[1]->end, 23956;
167
is $glim_exons[1]->strand, 1;
171
# Glimmer testing (GlimmerHMM)
172
my $glimmer_hmm = Bio::Tools::Glimmer->new('-file' => test_input_file('GlimmerHMM.out'));
173
my $ghmmgene = $glimmer_hmm->next_prediction;
176
is($ghmmgene->seq_id, 'gi|23613028|ref|NC_004326.1|');
177
is($ghmmgene->source_tag, 'GlimmerHMM');
178
is($ghmmgene->primary_tag, 'transcript');
179
is($ghmmgene->exons, 1);
181
@num_exons = qw(0 1 2 4 2 2 1 1 1 2 2 2 10 4 1 1); # only first few tested
183
while ($ghmmgene = $glimmer_hmm->next_prediction) {
185
my @ghmm_exons = $ghmmgene->exons;
186
is(scalar(@ghmm_exons), $num_exons[$i]) if $i <= $#num_exons;
188
is( $ghmm_exons[1]->start, 5538 );
189
is( $ghmm_exons[1]->end, 5647 );
190
cmp_ok( $ghmm_exons[1]->strand, '>', 0 );
195
# Glimmer testing (Glimmer 2.X)
196
my $glimmer_2 = Bio::Tools::Glimmer->new('-file' => test_input_file('Glimmer2.out'),
197
'-seqname' => 'BCTDNA',
198
'-seqlength' => 29940,);
199
my $g2gene = $glimmer_2->next_prediction;
202
is($g2gene->seq_id, 'BCTDNA');
203
is($g2gene->source_tag, 'Glimmer_2.X');
204
is($g2gene->primary_tag, 'gene');
205
is($g2gene->start, 292);
206
is($g2gene->end, 1623);
207
is($g2gene->frame, 0);
208
is($g2gene->strand, 1);
211
while ($g2gene = $glimmer_2->next_prediction) {
214
is($g2gene->start, 2230);
215
is($g2gene->end, 2349);
216
is($g2gene->strand, -1);
217
is($g2gene->frame, 0);
219
isa_ok($g2gene->location, 'Bio::Location::SplitLocationI');
220
my @sublocations = $g2gene->location->sub_Location();
221
is(scalar (@sublocations), 2);
222
is($sublocations[0]->start, 29263);
223
is($sublocations[0]->end, 29940);
224
is($sublocations[1]->start, 1);
225
is($sublocations[1]->end, 9);
226
is($g2gene->strand, 1);
227
is($g2gene->frame, 0);
233
# Glimmer testing (Glimmer 3.X)
234
my $glimmer_3 = Bio::Tools::Glimmer->new('-file' => test_input_file('Glimmer3.predict'),
235
'-detail' => test_input_file('Glimmer3.detail'));
236
my $g3gene = $glimmer_3->next_prediction;
239
is($g3gene->seq_id, 'BCTDNA');
240
is($g3gene->source_tag, 'Glimmer_3.X');
241
is($g3gene->primary_tag, 'gene');
242
is($g3gene->score, '9.60');
243
isa_ok($g3gene->location, 'Bio::Location::SplitLocationI');
244
my @sublocations = $g3gene->location->sub_Location();
245
is(scalar (@sublocations), 2);
246
is($sublocations[0]->start, 29263);
247
is($sublocations[0]->end, 29940);
248
is($sublocations[1]->start, 1);
249
is($sublocations[1]->end, 9);
250
is($g3gene->frame, 0);
253
while ($g3gene = $glimmer_3->next_prediction) {
256
is($g3gene->start, 13804);
257
is($g3gene->end, 14781);
258
is($g3gene->strand, -1);
259
is($g3gene->frame, 0);
260
is($g3gene->score, '5.51');
262
my ($orfid) = $g3gene->has_tag('Group') ? $g3gene->get_tag_values('Group') : undef;
263
is($orfid, 'GenePrediction_00015');
268
# Glimmer 3.X (prokaryotic gene fragment)
269
my $glimmer_3a = Bio::Tools::Glimmer->new(
270
'-file' => test_input_file('glimmer3-fragment.predict'),
271
'-detail' => test_input_file('glimmer3-fragment.detail'),
273
my $g3gene_a = $glimmer_3a->next_prediction;
277
isa_ok $g3gene_a->location(), 'Bio::Location::Fuzzy';
278
is $g3gene_a->location->start_pos_type(), 'BEFORE';
279
is $g3gene_a->location->max_start(), 1;
280
is $g3gene_a->location->end_pos_type(), 'EXACT';
281
is $g3gene_a->location->end(), 674;
282
is $g3gene_a->frame(), 2;
284
for (1..3) { $g3gene_a = $glimmer_3a->next_prediction; }
286
isa_ok $g3gene_a->location(), 'Bio::Location::Fuzzy';
287
is $g3gene_a->location->start_pos_type(), 'EXACT';
288
is $g3gene_a->location->start(), 2677;
289
is $g3gene_a->frame(), 0;
290
is $g3gene_a->location->end_pos_type(), 'AFTER';
291
is $g3gene_a->location->min_end(), 2932;
292
is $g3gene_a->score, '5.63';
295
my $fgh = Bio::Tools::Fgenesh->new(
296
'-file' => test_input_file('fgenesh.out'),
299
my $fghgene = $fgh->next_prediction();
302
is($fghgene->seq_id, 'gi|1914348|emb|Z81551.1|');
303
is($fghgene->source_tag, 'Fgenesh');
304
is($fghgene->start(), 29);
305
is($fghgene->end(), 1869);
306
cmp_ok($fghgene->strand(), '<', 0);
309
@num_exons = (2,5,4,8);
311
while ($fghgene = $fgh->next_prediction()) {
314
my @fghexons = $fghgene->exons();
315
is(scalar(@fghexons), $num_exons[$i]);
318
cmp_ok($fghexons[0]->strand(), '>', 0);
319
is($fghexons[0]->primary_tag(), 'InitialExon');
320
is($fghexons[0]->start(), 14778);
321
is($fghexons[0]->end(), 15104);
322
cmp_ok($fghexons[3]->strand(), '>', 0);
323
is($fghexons[3]->primary_tag(), 'TerminalExon');
324
is($fghexons[3]->start(), 16988);
325
is($fghexons[3]->end(), 17212);