1
# -*-Perl-*- Test Harness script for Bioperl
2
# $Id: SeqFeature.t 15112 2008-12-08 18:12:38Z sendu $
10
test_begin(-tests => 214);
14
use_ok('Bio::SeqFeature::Generic');
15
use_ok('Bio::SeqFeature::FeaturePair');
16
use_ok('Bio::SeqFeature::Computation');
17
use_ok('Bio::SeqFeature::Gene::Transcript');
18
use_ok('Bio::SeqFeature::Gene::UTR');
19
use_ok('Bio::SeqFeature::Gene::Exon');
20
use_ok('Bio::SeqFeature::Gene::Poly_A_site');
21
use_ok('Bio::SeqFeature::Gene::GeneStructure');
22
use_ok('Bio::Location::Fuzzy');
25
# predeclare variables for strict
26
my ($feat,$str,$feat2,$pair,$comp_obj1,$comp_obj2,@sft);
28
my $DEBUG = test_debug();
30
$feat = Bio::SeqFeature::Generic->new( -start => 40,
34
-source => 'internal',
41
is $feat->start, 40, 'start of feature location';
42
is $feat->end, 80, 'end of feature location';
43
is $feat->primary_tag, 'exon', 'primary tag';
44
is $feat->source_tag, 'internal', 'source tag';
46
$str = $feat->gff_string() || ""; # placate -w
48
$pair = Bio::SeqFeature::FeaturePair->new();
51
$feat2 = Bio::SeqFeature::Generic->new( -start => 400,
55
-source => 'program_a',
63
$pair->feature1($feat);
64
$pair->feature2($feat2);
66
is $pair->feature1, $feat, 'feature1 of pair stored';
67
is $pair->feature2, $feat2, 'feature2 of pair stored';
68
is $pair->start, 40, 'feature start';
69
is $pair->end, 80, 'feature end';
70
is $pair->primary_tag, 'exon', 'primary tag';
71
is $pair->source_tag, 'internal', 'source tag';
72
is $pair->hstart, 400, 'hstart';
73
is $pair->hend, 440, 'hend';
74
is $pair->hprimary_tag, 'other', 'hprimary tag';
75
is $pair->hsource_tag, 'program_a', 'hsource tag';
78
is $pair->end, 440, 'inverted end';
80
# Test attaching a SeqFeature::Generic to a Bio::Seq
82
# Make the parent sequence object
83
my $seq = Bio::Seq->new(
84
-seq => 'aaaaggggtttt',
85
-display_id => 'test',
90
my $sf1 = Bio::SeqFeature::Generic->new(
96
# Add the SeqFeature to the parent
97
ok ($seq->add_SeqFeature($sf1));
99
# Test that it gives the correct sequence
100
my $sf_seq1 = $sf1->seq->seq;
101
is $sf_seq1, 'aggggt', 'seq string';
102
is $sf1->end,9, 'sf1 end';
103
is $sf1->start,4, 'sf1 start';
105
# Make a second seqfeature on the opposite strand
106
my $sf2 = Bio::SeqFeature::Generic->new(
112
# This time add the PrimarySeq to the seqfeature
113
# before adding it to the parent
114
ok ($sf2->attach_seq($seq->primary_seq));
115
$seq->add_SeqFeature($sf2);
117
# Test again that we have the correct sequence
118
my $sf_seq2 = $sf2->seq->seq;
119
is $sf_seq2, 'acccct', 'sf2';
122
#Do some tests for computation.pm
124
ok defined ( $comp_obj1 = Bio::SeqFeature::Computation->new('-start' => 1,
126
is($comp_obj1->computation_id(332),332, 'computation id');
127
ok( $comp_obj1->add_score_value('P', 33), 'score value');
129
$comp_obj2 = Bio::SeqFeature::Computation->new('-start' => 2,
131
ok ($comp_obj1->add_sub_SeqFeature($comp_obj2, 'exon') );
132
ok (@sft = $comp_obj1->all_sub_SeqFeature_types() );
133
is($sft[0], 'exon', 'sft[0] is exon');
136
ok defined ( $comp_obj1 = Bio::SeqFeature::Computation->new
138
-start => 10, -end => 100,
139
-strand => -1, -primary => 'repeat',
140
-program_name => 'GeneMark',
141
-program_date => '12-5-2000',
142
-program_version => 'x.y',
143
-database_name => 'Arabidopsis',
144
-database_date => '12-dec-2000',
145
-computation_id => 2231,
146
-score => { no_score => 334 } )
149
is ( $comp_obj1->computation_id, 2231, 'computation id' );
150
ok ( $comp_obj1->add_score_value('P', 33) );
151
is ( ($comp_obj1->each_score_value('no_score'))[0], '334', 'score value');
153
# some tests for bug #947
155
my $sfeat = Bio::SeqFeature::Generic->new(-primary => 'test');
157
$sfeat->add_sub_SeqFeature(Bio::SeqFeature::Generic->new(-start => 2,
162
$sfeat->add_sub_SeqFeature(Bio::SeqFeature::Generic->new(-start => 6,
167
is $sfeat->start, 2, 'sfeat start for EXPAND-ED feature (bug #947)';
168
is $sfeat->end, 8, 'sfeat end for EXPAND-ED feature (bug #947)';
170
# some tests to see if we can set a feature to start at 0
171
$sfeat = Bio::SeqFeature::Generic->new(-start => 0, -end => 0 );
173
ok(defined $sfeat->start);
174
is($sfeat->start,0, 'can create feature starting and ending at 0');
175
ok(defined $sfeat->end);
176
is($sfeat->end,0,'can create feature starting and ending at 0');
179
# tests for Bio::SeqFeature::Gene::* objects
180
# using information from acc: AB077698 as a guide
182
ok my $seqio = Bio::SeqIO->new(-format => 'genbank',
183
-file => test_input_file('AB077698.gb'));
184
ok my $geneseq = $seqio->next_seq();
186
ok my $gene = Bio::SeqFeature::Gene::GeneStructure->new(-primary => 'gene',
191
ok my $transcript = Bio::SeqFeature::Gene::Transcript->new(-primary => 'CDS',
196
'note' => "Cys3His CCG1-Required Encoded on BAC clone RP5-842K24 (AL050310) The human CHCR (Cys3His CCG1-Required) protein is highly related to EXP/MBNL (Y13829, NM_021038, AF401998) and MBLL (NM_005757,AF061261), which together comprise the human Muscleblind family",
198
'protein_id' => 'BAB85648.1',
201
ok my $poly_A_site1 = Bio::SeqFeature::Gene::Poly_A_site->new
202
(-primary => 'polyA_site',
206
'note' => "Encoded on BAC clone RP5-842K24 (AL050310); PolyA_site#2 used by CHCR EST clone DKFZp434G2222 (AL133625)"
209
ok my $poly_A_site2 = Bio::SeqFeature::Gene::Poly_A_site->new
210
(-primary => 'polyA_site',
214
'note' => "Encoded on BAC clone RP5-842K24 (AL050310); PolyA_site#1 used by CHCR EST clone PLACE1010202 (AK002178)",
217
ok my $fiveprimeUTR = Bio::SeqFeature::Gene::UTR->new(-primary => "utr5prime");
218
ok $fiveprimeUTR->location(Bio::Location::Fuzzy->new(-start => "<1",
220
ok my $threeprimeUTR = Bio::SeqFeature::Gene::UTR->new(-primary => "utr3prime",
224
# Did a quick est2genome against genomic DNA (this is on Chr X) to
225
# get the gene structure by hand since it is not in the file
228
ok my $exon1 = Bio::SeqFeature::Gene::Exon->new(-primary => 'exon',
231
ok $geneseq->add_SeqFeature($exon1);
233
ok $geneseq->add_SeqFeature($fiveprimeUTR);
234
ok $geneseq->add_SeqFeature($threeprimeUTR);
235
ok $geneseq->add_SeqFeature($poly_A_site1);
236
ok $geneseq->add_SeqFeature($poly_A_site2);
238
ok $transcript->add_utr($fiveprimeUTR, 'utr5prime');
239
ok $transcript->add_utr($threeprimeUTR, 'utr3prime');
241
ok $transcript->add_exon($exon1);
243
# API only supports a single poly-A site per transcript at this point
244
$transcript->poly_A_site($poly_A_site2);
245
$geneseq->add_SeqFeature($transcript);
246
$gene->add_transcript($transcript);
247
$geneseq->add_SeqFeature($gene);
249
my ($t) = $gene->transcripts(); # get 1st transcript
251
is($t->mrna->length, 1693, 'mRNA spliced length');
252
is($gene->utrs, 2, 'has 2 UTRs');
256
# Test for bug when Locations are not created explicitly
258
my $feat1 = Bio::SeqFeature::Generic->new(-start => 1,
262
$feat2 = Bio::SeqFeature::Generic->new(-start => 10,
266
my $overlap = $feat1->location->union($feat2->location);
267
is($overlap->start, 1);
268
is($overlap->end, 25);
270
my $intersect = $feat1->location->intersection($feat2->location);
271
is($intersect->start, 10);
272
is($intersect->end, 15);
275
# now let's test spliced_seq
277
isa_ok( $seqio = Bio::SeqIO->new(-file => test_input_file('AY095303S1.gbk'),
278
-format => 'genbank'), "Bio::SeqIO");
280
isa_ok( $geneseq = $seqio->next_seq(), 'Bio::Seq');
281
my ($CDS) = grep { $_->primary_tag eq 'CDS' } $geneseq->get_SeqFeatures;
285
test_skip(-tests => 5,
286
-requires_modules => [qw(IO::String
288
HTTP::Request::Common)],
289
-requires_networking => 1);
291
use_ok('Bio::DB::GenBank');
293
$db = Bio::DB::GenBank->new(-verbose=> $DEBUG);
295
my $cdsseq = $CDS->spliced_seq(-db => $db,-nosort => 1);
297
is($cdsseq->subseq(1,76),
298
'ATGCAGCCATACGCTTCCGTGAGCGGGCGATGTCTATCTAGACCAGATGCATTGCATGTGATACCGTTTGGGCGAC');
299
is($cdsseq->translate->subseq(1,100),
300
'MQPYASVSGRCLSRPDALHVIPFGRPLQAIAGRRFVRCFAKGGQPGDKKKLNVTDKLRLGNTPPTLDVLKAPRPTDAPSAIDDAPSTSGLGLGGGVASPR');
301
# test what happens without
302
$cdsseq = $CDS->spliced_seq(-db => $db,-nosort => 1);
303
is($cdsseq->subseq(1,76),
304
'ATGCAGCCATACGCTTCCGTGAGCGGGCGATGTCTATCTAGACCAGATGCATTGCATGTGATACCGTTTGGGCGAC');
305
is($cdsseq->translate->subseq(1,100),
306
'MQPYASVSGRCLSRPDALHVIPFGRPLQAIAGRRFVRCFAKGGQPGDKKKLNVTDKLRLGNTPPTLDVLKAPRPTDAPSAIDDAPSTSGLGLGGGVASPR');
309
isa_ok( $seqio = Bio::SeqIO->new(-file => test_input_file('AF032047.gbk'),
310
-format => 'genbank'), 'Bio::SeqIO');
311
isa_ok $geneseq = $seqio->next_seq(), 'Bio::Seq';
312
($CDS) = grep { $_->primary_tag eq 'CDS' } $geneseq->get_SeqFeatures;
314
test_skip(-tests => 2,
315
-requires_modules => [qw(IO::String
317
HTTP::Request::Common)],
318
-requires_networking => 1);
320
my $cdsseq = $CDS->spliced_seq( -db => $db, -nosort => 1);
321
is($cdsseq->subseq(1,70), 'ATGGCTCGCTTCGTGGTGGTAGCCCTGCTCGCGCTACTCTCTCTGTCTGGCCTGGAGGCTATCCAGCATG');
322
is($cdsseq->translate->seq, 'MARFVVVALLALLSLSGLEAIQHAPKIQVYSRHPAENGKPNFLNCYVSGFHPSDIEVDLLKNGKKIEKVEHSDLSFSKDWSFYLLYYTEFTPNEKDEYACRVSHVTFPTPKTVKWDRTM*');
328
isa_ok( $seqio = Bio::SeqIO->new(-format => 'genbank',
329
-file => test_input_file('NC_001284.gbk')),
331
my $genome = $seqio->next_seq;
333
foreach my $cds (grep { $_->primary_tag eq 'CDS' } $genome->get_SeqFeatures) {
334
my $spliced = $cds->spliced_seq(-nosort => 1)->translate->seq;
335
chop($spliced); # remove stop codon
336
is($spliced,($cds->get_tag_values('translation'))[0],
337
'spliced seq translation matches expected');
341
my $seqin = Bio::SeqIO->new(-format => 'fasta',
342
-file => test_input_file('sbay_c127.fas'));
344
my $seq = $seqin->next_seq;
346
my $sf = Bio::SeqFeature::Generic->new(-verbose => -1,
350
-primary => 'splicedgene');
352
$sf->attach_seq($seq);
355
'TTCAATGACT' => 'FNDFYSMGKS',
356
'TCAATGACTT' => 'SMTSIPWVNQ',
357
'GTTCAATGAC' => 'VQ*LLFHG*I',
360
for my $phase (-1..3) {
361
my $sfseq = $sf->spliced_seq(-phase => $phase);
362
ok exists $phase_check{$sfseq->subseq(1,10)};
363
is ($sfseq->translate->subseq(1,10), $phase_check{$sfseq->subseq(1,10)}, 'phase check');