~ubuntu-branches/ubuntu/saucy/bioperl/saucy-proposed

« back to all changes in this revision

Viewing changes to t/SeqFeature/SeqFeature.t

  • Committer: Bazaar Package Importer
  • Author(s): Charles Plessy
  • Date: 2009-03-10 07:19:11 UTC
  • mfrom: (1.2.3 upstream)
  • Revision ID: james.westby@ubuntu.com-20090310071911-fukqzw54pyb1f0bd
Tags: 1.6.0-2
* Removed patch system (not used):
  - removed instuctions in debian/rules;
  - removed quilt from Build-Depends in debian/control.
* Re-enabled tests:
  - uncommented test command in debian/rules;
  - uncommented previously missing build-dependencies in debian/control.
  - Re-enabled tests and uncommented build-dependencies accordingly.
* Removed libmodule-build-perl and libtest-harness-perl from
  Build-Depends-Indep (provided by perl-modules).
* Better cleaning of empty directories using find -type d -empty -delete
  instead of rmdir in debian/rules (LP: #324001).

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
# -*-Perl-*- Test Harness script for Bioperl
 
2
# $Id: SeqFeature.t 15112 2008-12-08 18:12:38Z sendu $
 
3
 
 
4
use strict;
 
5
 
 
6
BEGIN { 
 
7
    use lib '.';
 
8
    use Bio::Root::Test;
 
9
    
 
10
    test_begin(-tests => 214);
 
11
        
 
12
        use_ok('Bio::Seq');
 
13
        use_ok('Bio::SeqIO');
 
14
        use_ok('Bio::SeqFeature::Generic');
 
15
        use_ok('Bio::SeqFeature::FeaturePair');
 
16
        use_ok('Bio::SeqFeature::Computation');
 
17
        use_ok('Bio::SeqFeature::Gene::Transcript');
 
18
        use_ok('Bio::SeqFeature::Gene::UTR');
 
19
        use_ok('Bio::SeqFeature::Gene::Exon');
 
20
        use_ok('Bio::SeqFeature::Gene::Poly_A_site');
 
21
        use_ok('Bio::SeqFeature::Gene::GeneStructure');
 
22
        use_ok('Bio::Location::Fuzzy');
 
23
}
 
24
 
 
25
# predeclare variables for strict
 
26
my ($feat,$str,$feat2,$pair,$comp_obj1,$comp_obj2,@sft); 
 
27
 
 
28
my $DEBUG = test_debug();
 
29
 
 
30
$feat = Bio::SeqFeature::Generic->new( -start => 40,
 
31
                                       -end => 80,
 
32
                                       -strand => 1,
 
33
                                       -primary => 'exon',
 
34
                                       -source  => 'internal',
 
35
                                       -tag => {
 
36
                                           silly => 20,
 
37
                                           new => 1
 
38
                                           }
 
39
                                       );
 
40
 
 
41
is $feat->start, 40, 'start of feature location';
 
42
is $feat->end, 80, 'end of feature location';
 
43
is $feat->primary_tag, 'exon', 'primary tag';
 
44
is $feat->source_tag, 'internal', 'source tag';
 
45
 
 
46
$str = $feat->gff_string() || ""; # placate -w
 
47
 
 
48
$pair = Bio::SeqFeature::FeaturePair->new();
 
49
ok defined $pair;
 
50
 
 
51
$feat2 = Bio::SeqFeature::Generic->new( -start => 400,
 
52
                                       -end => 440,
 
53
                                       -strand => 1,
 
54
                                       -primary => 'other',
 
55
                                       -source  => 'program_a',
 
56
                                       -tag => {
 
57
                                           silly => 20,
 
58
                                           new => 1
 
59
                                           }
 
60
                                       );
 
61
 
 
62
ok defined $feat2;
 
63
$pair->feature1($feat);
 
64
$pair->feature2($feat2);
 
65
 
 
66
is $pair->feature1, $feat, 'feature1 of pair stored';
 
67
is $pair->feature2, $feat2, 'feature2 of pair stored';
 
68
is $pair->start, 40, 'feature start';
 
69
is $pair->end, 80, 'feature end';
 
70
is $pair->primary_tag, 'exon', 'primary tag';
 
71
is $pair->source_tag, 'internal', 'source tag';
 
72
is $pair->hstart, 400, 'hstart';
 
73
is $pair->hend, 440, 'hend';
 
74
is $pair->hprimary_tag, 'other', 'hprimary tag';
 
75
is $pair->hsource_tag, 'program_a', 'hsource tag';
 
76
 
 
77
$pair->invert;
 
78
is $pair->end, 440, 'inverted end';
 
79
 
 
80
# Test attaching a SeqFeature::Generic to a Bio::Seq
 
81
{
 
82
    # Make the parent sequence object
 
83
    my $seq = Bio::Seq->new(
 
84
                            -seq          => 'aaaaggggtttt',
 
85
                            -display_id   => 'test',
 
86
                            -alphabet     => 'dna',
 
87
                            );
 
88
    
 
89
    # Make a SeqFeature
 
90
    my $sf1 = Bio::SeqFeature::Generic->new(
 
91
                                            -start    => 4,
 
92
                                            -end      => 9,
 
93
                                            -strand   => 1,
 
94
                                            );
 
95
    
 
96
    # Add the SeqFeature to the parent
 
97
    ok ($seq->add_SeqFeature($sf1));
 
98
    
 
99
    # Test that it gives the correct sequence
 
100
    my $sf_seq1 = $sf1->seq->seq;
 
101
    is $sf_seq1, 'aggggt', 'seq string';
 
102
    is $sf1->end,9, 'sf1 end';
 
103
    is $sf1->start,4, 'sf1 start';
 
104
 
 
105
    # Make a second seqfeature on the opposite strand
 
106
    my $sf2 = Bio::SeqFeature::Generic->new(
 
107
                                            -start    => 4,
 
108
                                            -end      => 9,
 
109
                                            -strand   => -1,
 
110
                                            );
 
111
    
 
112
    # This time add the PrimarySeq to the seqfeature
 
113
    # before adding it to the parent
 
114
    ok ($sf2->attach_seq($seq->primary_seq));
 
115
    $seq->add_SeqFeature($sf2);
 
116
    
 
117
    # Test again that we have the correct sequence
 
118
    my $sf_seq2 = $sf2->seq->seq;
 
119
    is $sf_seq2, 'acccct', 'sf2';
 
120
}
 
121
 
 
122
#Do some tests for computation.pm
 
123
 
 
124
ok defined ( $comp_obj1 = Bio::SeqFeature::Computation->new('-start' => 1,
 
125
                                                            '-end'   => 10) );
 
126
is($comp_obj1->computation_id(332),332, 'computation id');
 
127
ok( $comp_obj1->add_score_value('P', 33), 'score value');
 
128
{
 
129
    $comp_obj2 = Bio::SeqFeature::Computation->new('-start' => 2,
 
130
                                                   '-end'   => 10);
 
131
    ok ($comp_obj1->add_sub_SeqFeature($comp_obj2, 'exon') );
 
132
    ok (@sft = $comp_obj1->all_sub_SeqFeature_types() );
 
133
    is($sft[0], 'exon', 'sft[0] is exon');
 
134
}
 
135
 
 
136
ok defined ( $comp_obj1 = Bio::SeqFeature::Computation->new 
 
137
             (
 
138
              -start => 10, -end => 100,
 
139
              -strand => -1, -primary => 'repeat',
 
140
              -program_name => 'GeneMark',
 
141
              -program_date => '12-5-2000',
 
142
              -program_version => 'x.y',
 
143
              -database_name => 'Arabidopsis',
 
144
              -database_date => '12-dec-2000',
 
145
              -computation_id => 2231,
 
146
              -score    => { no_score => 334 } )
 
147
             );
 
148
 
 
149
is ( $comp_obj1->computation_id, 2231, 'computation id' );
 
150
ok ( $comp_obj1->add_score_value('P', 33) );
 
151
is ( ($comp_obj1->each_score_value('no_score'))[0], '334', 'score value');
 
152
 
 
153
# some tests for bug #947
 
154
 
 
155
my $sfeat = Bio::SeqFeature::Generic->new(-primary => 'test');
 
156
 
 
157
$sfeat->add_sub_SeqFeature(Bio::SeqFeature::Generic->new(-start => 2,
 
158
                                                        -end   => 4,
 
159
                                                        -primary => 'sub1'),
 
160
                           'EXPAND');
 
161
 
 
162
$sfeat->add_sub_SeqFeature(Bio::SeqFeature::Generic->new(-start => 6,
 
163
                                                        -end   => 8,
 
164
                                                        -primary => 'sub2'),
 
165
                           'EXPAND');
 
166
 
 
167
is $sfeat->start, 2, 'sfeat start for EXPAND-ED feature (bug #947)';
 
168
is $sfeat->end, 8, 'sfeat end for EXPAND-ED feature (bug #947)';
 
169
 
 
170
# some tests to see if we can set a feature to start at 0
 
171
$sfeat = Bio::SeqFeature::Generic->new(-start => 0, -end => 0 );
 
172
 
 
173
ok(defined $sfeat->start);
 
174
is($sfeat->start,0, 'can create feature starting and ending at 0');
 
175
ok(defined $sfeat->end);
 
176
is($sfeat->end,0,'can create feature starting and ending at 0');
 
177
 
 
178
 
 
179
# tests for Bio::SeqFeature::Gene::* objects
 
180
# using information from acc: AB077698 as a guide
 
181
 
 
182
ok my $seqio = Bio::SeqIO->new(-format => 'genbank',
 
183
                         -file   => test_input_file('AB077698.gb'));
 
184
ok my $geneseq = $seqio->next_seq();
 
185
 
 
186
ok my $gene = Bio::SeqFeature::Gene::GeneStructure->new(-primary => 'gene',
 
187
                                                    -start   => 1,
 
188
                                                       -end     => 2701,
 
189
                                                       -strand  => 1);
 
190
 
 
191
ok my $transcript = Bio::SeqFeature::Gene::Transcript->new(-primary => 'CDS',
 
192
                                                          -start   => 80,
 
193
                                                          -end     => 1144,
 
194
                                                          -tag     => { 
 
195
                                                              'gene' => "CHCR",
 
196
                                                              'note' => "Cys3His CCG1-Required Encoded on BAC clone RP5-842K24 (AL050310) The human CHCR (Cys3His CCG1-Required) protein is highly related to EXP/MBNL (Y13829, NM_021038, AF401998) and MBLL (NM_005757,AF061261), which together comprise the human Muscleblind family",
 
197
                                                              'codon_start' => 1,
 
198
                                                              'protein_id'  => 'BAB85648.1',
 
199
                                                          });
 
200
 
 
201
ok my $poly_A_site1 = Bio::SeqFeature::Gene::Poly_A_site->new
 
202
    (-primary => 'polyA_site',
 
203
     -start => 2660,
 
204
     -end   => 2660,
 
205
     -tag   => { 
 
206
         'note' => "Encoded on BAC clone RP5-842K24 (AL050310); PolyA_site#2 used by CHCR EST clone DKFZp434G2222 (AL133625)"
 
207
         });
 
208
 
 
209
ok my $poly_A_site2 = Bio::SeqFeature::Gene::Poly_A_site->new
 
210
    (-primary => 'polyA_site',
 
211
     -start => 1606,
 
212
     -end   => 1606,
 
213
     -tag   => { 
 
214
         'note' => "Encoded on BAC clone RP5-842K24 (AL050310); PolyA_site#1 used by CHCR EST clone PLACE1010202 (AK002178)",
 
215
     });
 
216
 
 
217
ok my $fiveprimeUTR = Bio::SeqFeature::Gene::UTR->new(-primary => "utr5prime");
 
218
ok $fiveprimeUTR->location(Bio::Location::Fuzzy->new(-start => "<1",
 
219
                                                    -end   => 79));
 
220
ok my $threeprimeUTR = Bio::SeqFeature::Gene::UTR->new(-primary => "utr3prime",
 
221
                                                      -start   => 1145,
 
222
                                                      -end     => 2659);
 
223
 
 
224
# Did a quick est2genome against genomic DNA (this is on Chr X) to
 
225
# get the gene structure by hand since it is not in the file
 
226
# --Jason
 
227
 
 
228
ok my $exon1 = Bio::SeqFeature::Gene::Exon->new(-primary => 'exon',
 
229
                                               -start => 80,
 
230
                                               -end   => 177);
 
231
ok $geneseq->add_SeqFeature($exon1);
 
232
 
 
233
ok $geneseq->add_SeqFeature($fiveprimeUTR);
 
234
ok $geneseq->add_SeqFeature($threeprimeUTR);
 
235
ok $geneseq->add_SeqFeature($poly_A_site1);
 
236
ok $geneseq->add_SeqFeature($poly_A_site2);
 
237
 
 
238
ok $transcript->add_utr($fiveprimeUTR, 'utr5prime');
 
239
ok $transcript->add_utr($threeprimeUTR, 'utr3prime');
 
240
 
 
241
ok $transcript->add_exon($exon1);
 
242
 
 
243
# API only supports a single poly-A site per transcript at this point 
 
244
$transcript->poly_A_site($poly_A_site2);
 
245
$geneseq->add_SeqFeature($transcript);
 
246
$gene->add_transcript($transcript);
 
247
$geneseq->add_SeqFeature($gene);
 
248
 
 
249
my ($t) = $gene->transcripts(); # get 1st transcript
 
250
ok(defined $t); 
 
251
is($t->mrna->length, 1693, 'mRNA spliced length');
 
252
is($gene->utrs, 2, 'has 2 UTRs');
 
253
 
 
254
 
 
255
 
 
256
# Test for bug when Locations are not created explicitly
 
257
 
 
258
my $feat1 = Bio::SeqFeature::Generic->new(-start => 1,
 
259
                                         -end   => 15,
 
260
                                         -strand=> 1);
 
261
 
 
262
$feat2 = Bio::SeqFeature::Generic->new(-start => 10,
 
263
                                         -end   => 25,
 
264
                                         -strand=> 1);
 
265
 
 
266
my $overlap = $feat1->location->union($feat2->location);
 
267
is($overlap->start, 1);
 
268
is($overlap->end,   25);
 
269
 
 
270
my $intersect = $feat1->location->intersection($feat2->location);
 
271
is($intersect->start, 10);
 
272
is($intersect->end,   15);
 
273
 
 
274
 
 
275
# now let's test spliced_seq
 
276
 
 
277
isa_ok(  $seqio = Bio::SeqIO->new(-file => test_input_file('AY095303S1.gbk'),
 
278
                                 -format  => 'genbank'), "Bio::SeqIO");
 
279
 
 
280
isa_ok( $geneseq = $seqio->next_seq(), 'Bio::Seq');
 
281
my ($CDS) = grep { $_->primary_tag eq 'CDS' } $geneseq->get_SeqFeatures;
 
282
my $db;
 
283
 
 
284
SKIP: {
 
285
        test_skip(-tests => 5,
 
286
                          -requires_modules => [qw(IO::String
 
287
                                                                           LWP::UserAgent
 
288
                                                                           HTTP::Request::Common)],
 
289
                          -requires_networking => 1);
 
290
        
 
291
        use_ok('Bio::DB::GenBank');
 
292
    
 
293
    $db = Bio::DB::GenBank->new(-verbose=> $DEBUG);
 
294
    $CDS->verbose(-1);
 
295
    my $cdsseq = $CDS->spliced_seq(-db => $db,-nosort => 1);
 
296
    
 
297
    is($cdsseq->subseq(1,76),
 
298
       'ATGCAGCCATACGCTTCCGTGAGCGGGCGATGTCTATCTAGACCAGATGCATTGCATGTGATACCGTTTGGGCGAC');
 
299
    is($cdsseq->translate->subseq(1,100), 
 
300
       'MQPYASVSGRCLSRPDALHVIPFGRPLQAIAGRRFVRCFAKGGQPGDKKKLNVTDKLRLGNTPPTLDVLKAPRPTDAPSAIDDAPSTSGLGLGGGVASPR');
 
301
    # test what happens without 
 
302
    $cdsseq = $CDS->spliced_seq(-db => $db,-nosort => 1);    
 
303
    is($cdsseq->subseq(1,76), 
 
304
       'ATGCAGCCATACGCTTCCGTGAGCGGGCGATGTCTATCTAGACCAGATGCATTGCATGTGATACCGTTTGGGCGAC');
 
305
    is($cdsseq->translate->subseq(1,100), 
 
306
       'MQPYASVSGRCLSRPDALHVIPFGRPLQAIAGRRFVRCFAKGGQPGDKKKLNVTDKLRLGNTPPTLDVLKAPRPTDAPSAIDDAPSTSGLGLGGGVASPR');
 
307
 
308
 
 
309
isa_ok(  $seqio = Bio::SeqIO->new(-file => test_input_file('AF032047.gbk'),
 
310
                                -format  => 'genbank'), 'Bio::SeqIO');
 
311
isa_ok $geneseq = $seqio->next_seq(), 'Bio::Seq';
 
312
($CDS) = grep { $_->primary_tag eq 'CDS' } $geneseq->get_SeqFeatures;
 
313
SKIP: { 
 
314
    test_skip(-tests => 2,
 
315
                          -requires_modules => [qw(IO::String
 
316
                                                                           LWP::UserAgent
 
317
                                                                           HTTP::Request::Common)],
 
318
                          -requires_networking => 1);
 
319
        
 
320
    my $cdsseq = $CDS->spliced_seq( -db => $db, -nosort => 1);
 
321
    is($cdsseq->subseq(1,70), 'ATGGCTCGCTTCGTGGTGGTAGCCCTGCTCGCGCTACTCTCTCTGTCTGGCCTGGAGGCTATCCAGCATG');
 
322
    is($cdsseq->translate->seq, 'MARFVVVALLALLSLSGLEAIQHAPKIQVYSRHPAENGKPNFLNCYVSGFHPSDIEVDLLKNGKKIEKVEHSDLSFSKDWSFYLLYYTEFTPNEKDEYACRVSHVTFPTPKTVKWDRTM*');
 
323
}
 
324
 
 
325
 
 
326
# trans-spliced 
 
327
 
 
328
isa_ok( $seqio = Bio::SeqIO->new(-format => 'genbank',
 
329
                                 -file   => test_input_file('NC_001284.gbk')), 
 
330
        'Bio::SeqIO');
 
331
my $genome = $seqio->next_seq;
 
332
 
 
333
foreach my $cds (grep { $_->primary_tag eq 'CDS' } $genome->get_SeqFeatures) {
 
334
   my $spliced = $cds->spliced_seq(-nosort => 1)->translate->seq;
 
335
   chop($spliced); # remove stop codon
 
336
   is($spliced,($cds->get_tag_values('translation'))[0],
 
337
      'spliced seq translation matches expected');
 
338
}
 
339
 
 
340
# spliced_seq phase 
 
341
my $seqin = Bio::SeqIO->new(-format => 'fasta',
 
342
                            -file   => test_input_file('sbay_c127.fas'));
 
343
 
 
344
my $seq = $seqin->next_seq;
 
345
 
 
346
my $sf = Bio::SeqFeature::Generic->new(-verbose => -1,
 
347
                                       -start => 263,
 
348
                                       -end => 721,
 
349
                                       -strand => 1,
 
350
                                       -primary => 'splicedgene');
 
351
 
 
352
$sf->attach_seq($seq);
 
353
 
 
354
my %phase_check = (
 
355
    'TTCAATGACT' => 'FNDFYSMGKS',
 
356
    'TCAATGACTT' => 'SMTSIPWVNQ',
 
357
    'GTTCAATGAC' => 'VQ*LLFHG*I',
 
358
);
 
359
 
 
360
for my $phase (-1..3) {
 
361
    my $sfseq = $sf->spliced_seq(-phase => $phase);
 
362
    ok exists $phase_check{$sfseq->subseq(1,10)};
 
363
    is ($sfseq->translate->subseq(1,10), $phase_check{$sfseq->subseq(1,10)}, 'phase check');
 
364
}
 
365