1
# -*-Perl-*- Test Harness script for Bioperl
10
test_begin(-tests => 362);
14
use_ok 'Bio::SeqFeature::Generic';
18
my ($feat, $str, $feat2, $pair, @sft);
20
my $DEBUG = test_debug();
22
ok $feat = Bio::SeqFeature::Generic->new(
27
is $feat->primary_tag, '';
28
is $feat->source_tag, '';
29
is $feat->display_name, '';
31
ok $feat = Bio::SeqFeature::Generic->new(
36
-source => 'internal',
37
-display_name => 'my exon feature',
44
is $feat->start, 40, 'Start of feature location';
45
is $feat->end, 80, 'End of feature location';
46
is $feat->primary_tag, 'exon', 'Primary tag';
47
is $feat->source_tag, 'internal', 'Source tag';
48
is $feat->display_name, 'my exon feature', 'Display name';
49
is $feat->phase, undef, 'Undef phase by default';
50
is $feat->phase(1), 1, 'Phase accessor returns';
51
is $feat->phase, 1, 'Phase is persistent';
53
ok $feat->gff_string();
55
ok $feat2 = Bio::SeqFeature::Generic->new(
60
-source => 'program_a',
67
is $feat2->phase, 1, 'Set phase from constructor';
70
# Test attaching a SeqFeature::Generic to a Bio::Seq or SeqFeature::Generic
72
# Make the parent sequence object
73
my $seq = Bio::Seq->new(
74
-seq => 'aaaaggggtttt',
75
-display_id => 'test',
80
ok my $sf1 = Bio::SeqFeature::Generic->new(
86
# Add the SeqFeature to the parent
87
ok $seq->add_SeqFeature($sf1);
89
# Test that it gives the correct sequence
90
is $sf1->start, 4, 'Start of first seqfeature';
91
is $sf1->end, 9, 'End of first seqfeature';
92
is $sf1->strand, 1, 'Strand of first seqfeature';
93
ok my $sf_seq1 = $sf1->seq;
94
is $sf_seq1->seq, 'aggggt', 'Sequence of first seqfeature';
96
# Make a second seqfeature on the opposite strand
97
ok my $sf2 = Bio::SeqFeature::Generic->new(
103
# Now add the PrimarySeq to the seqfeature before adding it to the parent
104
ok $sf2->attach_seq($seq->primary_seq);
105
ok $seq->add_SeqFeature($sf2);
107
# Test again that we have the correct sequence
108
is $sf2->start, 4, 'Start of second seqfeature';
109
is $sf2->end, 9, 'End of second seqfeature';
110
is $sf2->strand, -1, 'Strand of second seqfeature';
111
ok my $sf_seq2 = $sf2->seq;
112
is $sf_seq2->seq, 'acccct', 'Sequence of second seqfeature';
116
# Some tests for bug #947
118
ok my $sfeat = Bio::SeqFeature::Generic->new(-primary => 'test');
120
ok $sfeat->add_sub_SeqFeature(
121
Bio::SeqFeature::Generic->new(
129
ok $sfeat->add_sub_SeqFeature(
130
Bio::SeqFeature::Generic->new(
138
is $sfeat->start, 2, 'sfeat start for EXPAND-ED feature (bug #947)';
139
is $sfeat->end, 8, 'sfeat end for EXPAND-ED feature (bug #947)';
141
# Some tests to see if we can set a feature to start at 0
142
ok $sfeat = Bio::SeqFeature::Generic->new(-start => 0, -end => 0 );
144
ok defined $sfeat->start;
145
is $sfeat->start, 0, 'Can create feature starting and ending at 0';
146
ok defined $sfeat->end;
147
is $sfeat->end, 0, 'Can create feature starting and ending at 0';
150
# Test for bug when Locations are not created explicitly
152
ok my $feat1 = Bio::SeqFeature::Generic->new(
158
ok $feat2 = Bio::SeqFeature::Generic->new(
164
ok my $overlap = $feat1->location->union($feat2->location);
165
is $overlap->start, 1;
166
is $overlap->end, 25;
168
ok my $intersect = $feat1->location->intersection($feat2->location);
169
is $intersect->start, 10;
170
is $intersect->end, 15;
173
# Now let's test spliced_seq
174
ok my $seqio = Bio::SeqIO->new(
175
-file => test_input_file('AY095303S1.gbk'),
178
isa_ok $seqio, 'Bio::SeqIO';
179
ok my $geneseq = $seqio->next_seq;
180
isa_ok $geneseq, 'Bio::Seq';
181
ok my ($CDS) = grep { $_->primary_tag eq 'CDS' } $geneseq->get_SeqFeatures;
185
test_skip(-tests => 5,
186
-requires_modules => [qw(IO::String
188
HTTP::Request::Common)],
189
-requires_networking => 1);
191
use_ok 'Bio::DB::GenBank';
193
$db = Bio::DB::GenBank->new(-verbose=> $DEBUG);
195
my $cdsseq = $CDS->spliced_seq(-db => $db,-nosort => 1);
197
is $cdsseq->subseq(1,76),
198
'ATGCAGCCATACGCTTCCGTGAGCGGGCGATGTCTATCTAGACCAGATGCATTGCATGTGATACCGTTTGGGCGAC';
199
is $cdsseq->translate->subseq(1,100),
200
'MQPYASVSGRCLSRPDALHVIPFGRPLQAIAGRRFVRCFAKGGQPGDKKKLNVTDKLRLGNTPPTLDVLK'.
201
'APRPTDAPSAIDDAPSTSGLGLGGGVASPR';
202
# Test what happens without
203
$cdsseq = $CDS->spliced_seq(-db => $db,-nosort => 1);
204
is $cdsseq->subseq(1,76),
205
'ATGCAGCCATACGCTTCCGTGAGCGGGCGATGTCTATCTAGACCAGATGCATTGCATGTGATACCGTTTGGGCGAC';
206
is $cdsseq->translate->subseq(1,100),
207
'MQPYASVSGRCLSRPDALHVIPFGRPLQAIAGRRFVRCFAKGGQPGDKKKLNVTDKLRLGNTPPTLDVLK'.
208
'APRPTDAPSAIDDAPSTSGLGLGGGVASPR';
211
ok $seqio = Bio::SeqIO->new(
212
-file => test_input_file('AF032047.gbk'),
215
isa_ok $seqio, 'Bio::SeqIO';
216
ok $geneseq = $seqio->next_seq;
217
isa_ok $geneseq, 'Bio::Seq';
218
ok( ($CDS) = grep { $_->primary_tag eq 'CDS' } $geneseq->get_SeqFeatures );
220
test_skip(-tests => 2,
221
-requires_modules => [qw(IO::String
223
HTTP::Request::Common)],
224
-requires_networking => 1);
226
my $cdsseq = $CDS->spliced_seq( -db => $db, -nosort => 1);
227
is $cdsseq->subseq(1,70),
228
'ATGGCTCGCTTCGTGGTGGTAGCCCTGCTCGCGCTACTCTCTCTGTCTGGCCTGGAGGCTATCCAGCATG';
229
is $cdsseq->translate->seq,
230
'MARFVVVALLALLSLSGLEAIQHAPKIQVYSRHPAENGKPNFLNCYVSGFHPSDIEVDLLKNGKKIEKVE'.
231
'HSDLSFSKDWSFYLLYYTEFTPNEKDEYACRVSHVTFPTPKTVKWDRTM*';
237
ok $seqio = Bio::SeqIO->new(
238
-format => 'genbank',
239
-file => test_input_file('NC_001284.gbk')
241
isa_ok $seqio, 'Bio::SeqIO';
242
ok my $genome = $seqio->next_seq;
244
for my $cds (grep { $_->primary_tag eq 'CDS' } $genome->get_SeqFeatures) {
245
ok my $spliced = $cds->spliced_seq(-nosort => 1)->translate->seq;
246
chop $spliced; # remove stop codon
247
is $spliced, ($cds->get_tag_values('translation'))[0], 'spliced_seq translation matches expected';
251
ok my $seq = Bio::SeqIO->new(
253
-file => test_input_file('sbay_c127.fas')
256
ok my $sf = Bio::SeqFeature::Generic->new(
261
-primary => 'splicedgene'
264
ok $sf->attach_seq($seq);
267
'TTCAATGACT' => 'FNDFYSMGKS',
268
'TCAATGACTT' => 'SMTSIPWVNQ',
269
'GTTCAATGAC' => 'VQ*LLFHG*I',
272
for my $phase (-1..3) {
273
ok my $sfseq = $sf->spliced_seq(-phase => $phase);
274
ok exists $phase_check{$sfseq->subseq(1,10)};
275
is $sfseq->translate->subseq(1,10), $phase_check{$sfseq->subseq(1,10)}, 'phase check';
279
ok $sf->add_tag_value('note','n1');
280
ok $sf->add_tag_value('note','n2');
281
ok $sf->add_tag_value('comment','c1');
282
is_deeply [sort $sf->get_all_tags()], [sort qw(note comment)] , 'Tags found';
283
is_deeply [sort $sf->get_tagset_values('note')], [sort qw(n1 n2)] , 'get_tagset_values tag values found';
284
is_deeply [sort $sf->get_tagset_values(qw(note comment))], [sort qw(c1 n1 n2)] , 'get_tagset_values tag values for multiple tags found';
286
is_deeply [sort $sf->get_tag_values('note')], [sort qw(n1 n2)] , 'get_tag_values tag values found';
287
} 'get_tag_values lives with tag';
289
is_deeply [$sf->get_tagset_values('notag') ], [], 'get_tagset_values no tag values found';
290
} 'get_tagset_values lives with no tag';
291
throws_ok { $sf->get_tag_values('notag') } qr/tag value that does not exist/, 'get_tag_values throws with no tag';
293
# Circular sequence SeqFeature tests
294
$seq = Bio::SeqIO->new(
295
-format => 'genbank',
296
-file => test_input_file('PX1CG.gb')
299
ok $seq->is_circular, 'Phi-X174 genome is circular';
301
# Retrieving the spliced sequence from any split location requires spliced_seq()
305
'A' => [3981, 136, 1, 1542, 'join(3981..5386,1..136)', 'ATGGTTCGTT'],
306
'A*' => [4497, 136, 1, 1026, 'join(4497..5386,1..136)', 'ATGAAATCGC'],
307
'B' => [5075, 136, 1, 363, 'join(5075..5386,1..51)', 'ATGGAACAAC'],
310
ok my @split_sfs = grep {
311
$_->location->isa('Bio::Location::SplitLocationI')
312
} $seq->get_SeqFeatures();
314
is @split_sfs, 3, 'Only 3 split locations';
316
for my $sf (@split_sfs) {
317
isa_ok $sf->location, 'Bio::Location::SplitLocationI';
318
ok my ($tag) = $sf->get_tag_values('product');
319
my ($start, $end, $strand, $length, $ftstring, $first_ten) = @{$sf_data{$tag}};
322
is $sf->location->to_FTstring, $ftstring, 'Feature string';
323
is $sf->spliced_seq->subseq(1,10), $first_ten, 'First ten nucleotides';
324
is $sf->strand, $strand, 'Strand';
327
local $TODO = "Need to define how to deal with start, end length for circular sequences";
328
is $sf->start, $start, 'Start';
329
is $sf->end, $end, 'End';
330
is $sf->length, $length, 'Expected length';