1
# -*-Perl-*- Test Harness script for Bioperl
10
test_begin(-tests => 249);
14
use_ok('Bio::SeqFeature::Generic');
15
use_ok('Bio::SeqFeature::FeaturePair');
16
use_ok('Bio::SeqFeature::Computation');
17
use_ok('Bio::SeqFeature::Gene::Transcript');
18
use_ok('Bio::SeqFeature::Gene::UTR');
19
use_ok('Bio::SeqFeature::Gene::Exon');
20
use_ok('Bio::SeqFeature::Gene::Poly_A_site');
21
use_ok('Bio::SeqFeature::Gene::GeneStructure');
22
use_ok('Bio::Location::Fuzzy');
25
# predeclare variables for strict
26
my ($feat,$str,$feat2,$pair,$comp_obj1,$comp_obj2,@sft);
28
my $DEBUG = test_debug();
30
$feat = Bio::SeqFeature::Generic->new( -start => 40,
34
-source => 'internal',
41
is $feat->start, 40, 'start of feature location';
42
is $feat->end, 80, 'end of feature location';
43
is $feat->primary_tag, 'exon', 'primary tag';
44
is $feat->source_tag, 'internal', 'source tag';
45
is $feat->phase, undef, 'undef phase by default';
46
is $feat->phase(1), 1, 'phase accessor returns';
47
is $feat->phase, 1, 'phase is persistent';
49
$str = $feat->gff_string() || ""; # placate -w
51
$pair = Bio::SeqFeature::FeaturePair->new();
54
$feat2 = Bio::SeqFeature::Generic->new( -start => 400,
58
-source => 'program_a',
65
is $feat2->phase, 1, 'set phase from constructor';
68
$pair->feature1($feat);
69
$pair->feature2($feat2);
71
is $pair->feature1, $feat, 'feature1 of pair stored';
72
is $pair->feature2, $feat2, 'feature2 of pair stored';
73
is $pair->start, 40, 'feature start';
74
is $pair->end, 80, 'feature end';
75
is $pair->primary_tag, 'exon', 'primary tag';
76
is $pair->source_tag, 'internal', 'source tag';
77
is $pair->hstart, 400, 'hstart';
78
is $pair->hend, 440, 'hend';
79
is $pair->hprimary_tag, 'other', 'hprimary tag';
80
is $pair->hsource_tag, 'program_a', 'hsource tag';
83
is $pair->end, 440, 'inverted end';
85
# Test attaching a SeqFeature::Generic to a Bio::Seq
87
# Make the parent sequence object
88
my $seq = Bio::Seq->new(
89
-seq => 'aaaaggggtttt',
90
-display_id => 'test',
95
my $sf1 = Bio::SeqFeature::Generic->new(
101
# Add the SeqFeature to the parent
102
ok ($seq->add_SeqFeature($sf1));
104
# Test that it gives the correct sequence
105
my $sf_seq1 = $sf1->seq->seq;
106
is $sf_seq1, 'aggggt', 'seq string';
107
is $sf1->end,9, 'sf1 end';
108
is $sf1->start,4, 'sf1 start';
110
# Make a second seqfeature on the opposite strand
111
my $sf2 = Bio::SeqFeature::Generic->new(
117
# This time add the PrimarySeq to the seqfeature
118
# before adding it to the parent
119
ok ($sf2->attach_seq($seq->primary_seq));
120
$seq->add_SeqFeature($sf2);
122
# Test again that we have the correct sequence
123
my $sf_seq2 = $sf2->seq->seq;
124
is $sf_seq2, 'acccct', 'sf2';
127
#Do some tests for computation.pm
129
ok defined ( $comp_obj1 = Bio::SeqFeature::Computation->new('-start' => 1,
131
is($comp_obj1->computation_id(332),332, 'computation id');
132
ok( $comp_obj1->add_score_value('P', 33), 'score value');
134
$comp_obj2 = Bio::SeqFeature::Computation->new('-start' => 2,
136
ok ($comp_obj1->add_sub_SeqFeature($comp_obj2, 'exon') );
137
ok (@sft = $comp_obj1->all_sub_SeqFeature_types() );
138
is($sft[0], 'exon', 'sft[0] is exon');
141
ok defined ( $comp_obj1 = Bio::SeqFeature::Computation->new
143
-start => 10, -end => 100,
144
-strand => -1, -primary => 'repeat',
145
-program_name => 'GeneMark',
146
-program_date => '12-5-2000',
147
-program_version => 'x.y',
148
-database_name => 'Arabidopsis',
149
-database_date => '12-dec-2000',
150
-computation_id => 2231,
151
-score => { no_score => 334 } )
154
is ( $comp_obj1->computation_id, 2231, 'computation id' );
155
ok ( $comp_obj1->add_score_value('P', 33) );
156
is ( ($comp_obj1->each_score_value('no_score'))[0], '334', 'score value');
158
# some tests for bug #947
160
my $sfeat = Bio::SeqFeature::Generic->new(-primary => 'test');
162
$sfeat->add_sub_SeqFeature(Bio::SeqFeature::Generic->new(-start => 2,
167
$sfeat->add_sub_SeqFeature(Bio::SeqFeature::Generic->new(-start => 6,
172
is $sfeat->start, 2, 'sfeat start for EXPAND-ED feature (bug #947)';
173
is $sfeat->end, 8, 'sfeat end for EXPAND-ED feature (bug #947)';
175
# some tests to see if we can set a feature to start at 0
176
$sfeat = Bio::SeqFeature::Generic->new(-start => 0, -end => 0 );
178
ok(defined $sfeat->start);
179
is($sfeat->start,0, 'can create feature starting and ending at 0');
180
ok(defined $sfeat->end);
181
is($sfeat->end,0,'can create feature starting and ending at 0');
184
# tests for Bio::SeqFeature::Gene::* objects
185
# using information from acc: AB077698 as a guide
187
ok my $seqio = Bio::SeqIO->new(-format => 'genbank',
188
-file => test_input_file('AB077698.gb'));
189
ok my $geneseq = $seqio->next_seq();
191
ok my $gene = Bio::SeqFeature::Gene::GeneStructure->new(-primary => 'gene',
196
ok my $transcript = Bio::SeqFeature::Gene::Transcript->new(-primary => 'CDS',
201
'note' => "Cys3His CCG1-Required Encoded on BAC clone RP5-842K24 (AL050310) The human CHCR (Cys3His CCG1-Required) protein is highly related to EXP/MBNL (Y13829, NM_021038, AF401998) and MBLL (NM_005757,AF061261), which together comprise the human Muscleblind family",
203
'protein_id' => 'BAB85648.1',
206
ok my $poly_A_site1 = Bio::SeqFeature::Gene::Poly_A_site->new
207
(-primary => 'polyA_site',
211
'note' => "Encoded on BAC clone RP5-842K24 (AL050310); PolyA_site#2 used by CHCR EST clone DKFZp434G2222 (AL133625)"
214
ok my $poly_A_site2 = Bio::SeqFeature::Gene::Poly_A_site->new
215
(-primary => 'polyA_site',
219
'note' => "Encoded on BAC clone RP5-842K24 (AL050310); PolyA_site#1 used by CHCR EST clone PLACE1010202 (AK002178)",
222
ok my $fiveprimeUTR = Bio::SeqFeature::Gene::UTR->new(-primary => "utr5prime");
223
ok $fiveprimeUTR->location(Bio::Location::Fuzzy->new(-start => "<1",
225
ok my $threeprimeUTR = Bio::SeqFeature::Gene::UTR->new(-primary => "utr3prime",
229
# Did a quick est2genome against genomic DNA (this is on Chr X) to
230
# get the gene structure by hand since it is not in the file
233
ok my $exon1 = Bio::SeqFeature::Gene::Exon->new(-primary => 'exon',
236
ok $geneseq->add_SeqFeature($exon1);
238
ok $geneseq->add_SeqFeature($fiveprimeUTR);
239
ok $geneseq->add_SeqFeature($threeprimeUTR);
240
ok $geneseq->add_SeqFeature($poly_A_site1);
241
ok $geneseq->add_SeqFeature($poly_A_site2);
243
ok $transcript->add_utr($fiveprimeUTR, 'utr5prime');
244
ok $transcript->add_utr($threeprimeUTR, 'utr3prime');
246
ok $transcript->add_exon($exon1);
248
# API only supports a single poly-A site per transcript at this point
249
$transcript->poly_A_site($poly_A_site2);
250
$geneseq->add_SeqFeature($transcript);
251
$gene->add_transcript($transcript);
252
$geneseq->add_SeqFeature($gene);
254
my ($t) = $gene->transcripts(); # get 1st transcript
256
is($t->mrna->length, 1693, 'mRNA spliced length');
257
is($gene->utrs, 2, 'has 2 UTRs');
261
# Test for bug when Locations are not created explicitly
263
my $feat1 = Bio::SeqFeature::Generic->new(-start => 1,
267
$feat2 = Bio::SeqFeature::Generic->new(-start => 10,
271
my $overlap = $feat1->location->union($feat2->location);
272
is($overlap->start, 1);
273
is($overlap->end, 25);
275
my $intersect = $feat1->location->intersection($feat2->location);
276
is($intersect->start, 10);
277
is($intersect->end, 15);
280
# now let's test spliced_seq
282
isa_ok( $seqio = Bio::SeqIO->new(-file => test_input_file('AY095303S1.gbk'),
283
-format => 'genbank'), "Bio::SeqIO");
285
isa_ok( $geneseq = $seqio->next_seq(), 'Bio::Seq');
286
my ($CDS) = grep { $_->primary_tag eq 'CDS' } $geneseq->get_SeqFeatures;
290
test_skip(-tests => 5,
291
-requires_modules => [qw(IO::String
293
HTTP::Request::Common)],
294
-requires_networking => 1);
296
use_ok('Bio::DB::GenBank');
298
$db = Bio::DB::GenBank->new(-verbose=> $DEBUG);
300
my $cdsseq = $CDS->spliced_seq(-db => $db,-nosort => 1);
302
is($cdsseq->subseq(1,76),
303
'ATGCAGCCATACGCTTCCGTGAGCGGGCGATGTCTATCTAGACCAGATGCATTGCATGTGATACCGTTTGGGCGAC');
304
is($cdsseq->translate->subseq(1,100),
305
'MQPYASVSGRCLSRPDALHVIPFGRPLQAIAGRRFVRCFAKGGQPGDKKKLNVTDKLRLGNTPPTLDVLKAPRPTDAPSAIDDAPSTSGLGLGGGVASPR');
306
# test what happens without
307
$cdsseq = $CDS->spliced_seq(-db => $db,-nosort => 1);
308
is($cdsseq->subseq(1,76),
309
'ATGCAGCCATACGCTTCCGTGAGCGGGCGATGTCTATCTAGACCAGATGCATTGCATGTGATACCGTTTGGGCGAC');
310
is($cdsseq->translate->subseq(1,100),
311
'MQPYASVSGRCLSRPDALHVIPFGRPLQAIAGRRFVRCFAKGGQPGDKKKLNVTDKLRLGNTPPTLDVLKAPRPTDAPSAIDDAPSTSGLGLGGGVASPR');
314
isa_ok( $seqio = Bio::SeqIO->new(-file => test_input_file('AF032047.gbk'),
315
-format => 'genbank'), 'Bio::SeqIO');
316
isa_ok $geneseq = $seqio->next_seq(), 'Bio::Seq';
317
($CDS) = grep { $_->primary_tag eq 'CDS' } $geneseq->get_SeqFeatures;
319
test_skip(-tests => 2,
320
-requires_modules => [qw(IO::String
322
HTTP::Request::Common)],
323
-requires_networking => 1);
325
my $cdsseq = $CDS->spliced_seq( -db => $db, -nosort => 1);
326
is($cdsseq->subseq(1,70), 'ATGGCTCGCTTCGTGGTGGTAGCCCTGCTCGCGCTACTCTCTCTGTCTGGCCTGGAGGCTATCCAGCATG');
327
is($cdsseq->translate->seq, 'MARFVVVALLALLSLSGLEAIQHAPKIQVYSRHPAENGKPNFLNCYVSGFHPSDIEVDLLKNGKKIEKVEHSDLSFSKDWSFYLLYYTEFTPNEKDEYACRVSHVTFPTPKTVKWDRTM*');
333
isa_ok( $seqio = Bio::SeqIO->new(-format => 'genbank',
334
-file => test_input_file('NC_001284.gbk')),
336
my $genome = $seqio->next_seq;
338
foreach my $cds (grep { $_->primary_tag eq 'CDS' } $genome->get_SeqFeatures) {
339
my $spliced = $cds->spliced_seq(-nosort => 1)->translate->seq;
340
chop($spliced); # remove stop codon
341
is($spliced,($cds->get_tag_values('translation'))[0],
342
'spliced seq translation matches expected');
346
my $seqin = Bio::SeqIO->new(-format => 'fasta',
347
-file => test_input_file('sbay_c127.fas'));
349
my $seq = $seqin->next_seq;
351
my $sf = Bio::SeqFeature::Generic->new(-verbose => -1,
355
-primary => 'splicedgene');
357
$sf->attach_seq($seq);
360
'TTCAATGACT' => 'FNDFYSMGKS',
361
'TCAATGACTT' => 'SMTSIPWVNQ',
362
'GTTCAATGAC' => 'VQ*LLFHG*I',
365
for my $phase (-1..3) {
366
my $sfseq = $sf->spliced_seq(-phase => $phase);
367
ok exists $phase_check{$sfseq->subseq(1,10)};
368
is ($sfseq->translate->subseq(1,10), $phase_check{$sfseq->subseq(1,10)}, 'phase check');
372
$sf->add_tag_value('note','n1');
373
$sf->add_tag_value('note','n2');
374
$sf->add_tag_value('comment','c1');
375
is_deeply( [sort $sf->get_all_tags()], [sort qw(note comment)] , 'tags found');
376
is_deeply( [sort $sf->get_tagset_values('note')], [sort qw(n1 n2)] , 'get_tagset_values tag values found');
377
is_deeply( [sort $sf->get_tagset_values(qw(note comment))], [sort qw(c1 n1 n2)] , 'get_tagset_values tag values for multiple tags found');
379
is_deeply( [sort $sf->get_tag_values('note')], [sort qw(n1 n2)] , 'get_tag_values tag values found');
380
} 'get_tag_values lives with tag';
382
is_deeply( [$sf->get_tagset_values('notag') ], [], 'get_tagset_values no tag values found');
383
} 'get_tagset_values lives with no tag';
384
throws_ok { $sf->get_tag_values('notag') } qr/tag value that does not exist/, 'get_tag_values throws with no tag';
386
# circular sequence SeqFeature tests
387
$seqin = Bio::SeqIO->new(-format => 'genbank',
388
-file => test_input_file('PX1CG.gb'));
390
$seq = $seqin->next_seq;
391
ok($seq->is_circular, 'Phi-X174 genome is circular');
393
# retrieving the spliced sequence from any split location requires
398
'A' => [3981, 136, 1, 1542, 'join(3981..5386,1..136)', 'ATGGTTCGTT'],
399
'A*' => [4497, 136, 1, 1026, 'join(4497..5386,1..136)', 'ATGAAATCGC'],
400
'B' => [5075, 136, 1, 363, 'join(5075..5386,1..51)', 'ATGGAACAAC'],
403
my @split_sfs = grep {
404
$_->location->isa('Bio::Location::SplitLocationI')
405
} $seq->get_SeqFeatures();
407
is(@split_sfs, 3, 'only 3 split locations');
409
for my $sf (@split_sfs) {
410
isa_ok($sf->location, 'Bio::Location::SplitLocationI');
411
my ($tag) = $sf->get_tag_values('product');
412
my ($start, $end, $strand, $length, $ftstring, $first_ten) = @{$sf_data{$tag}};
415
is($sf->location->to_FTstring, $ftstring, 'feature string');
416
is($sf->spliced_seq->subseq(1,10), $first_ten, 'first ten nucleotides');
417
is($sf->strand, $strand, 'strand');
420
local $TODO = "Need to define how to deal with start, end length for circular sequences";
421
is($sf->start, $start, 'start');
422
is($sf->end, $end, 'end');
423
is($sf->length, $length, 'expected length');