~ubuntu-branches/ubuntu/oneiric/bioperl/oneiric

« back to all changes in this revision

Viewing changes to t/SeqFeature.t

  • Committer: Bazaar Package Importer
  • Author(s): Charles Plessy
  • Date: 2009-03-10 07:19:11 UTC
  • mfrom: (3.1.2 squeeze)
  • Revision ID: james.westby@ubuntu.com-20090310071911-ever3si2bbzx1iks
Tags: 1.6.0-2
* Removed patch system (not used):
  - removed instuctions in debian/rules;
  - removed quilt from Build-Depends in debian/control.
* Re-enabled tests:
  - uncommented test command in debian/rules;
  - uncommented previously missing build-dependencies in debian/control.
  - Re-enabled tests and uncommented build-dependencies accordingly.
* Removed libmodule-build-perl and libtest-harness-perl from
  Build-Depends-Indep (provided by perl-modules).
* Better cleaning of empty directories using find -type d -empty -delete
  instead of rmdir in debian/rules (LP: #324001).

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# -*-Perl-*-
2
 
## Bioperl Test Harness Script for Modules
3
 
##
4
 
# CVS Version
5
 
# $Id: SeqFeature.t,v 1.41.4.4 2006/11/30 09:24:00 sendu Exp $
6
 
 
7
 
 
8
 
# Before `make install' is performed this script should be runnable with
9
 
# `make test'. After `make install' it should work as `perl test.t'
10
 
 
11
 
use strict;
12
 
use vars qw($NUMTESTS);
13
 
my $skipdbtests;
14
 
my $skip_all;
15
 
BEGIN { 
16
 
        # to handle systems with no installed Test module
17
 
        # we include the t dir (where a copy of Test.pm is located)
18
 
        # as a fallback
19
 
        eval { require Test; };
20
 
        if( $@ ) {
21
 
                use lib 't';
22
 
        }
23
 
        use Test;
24
 
        $NUMTESTS = 211;
25
 
        plan tests => $NUMTESTS;
26
 
 
27
 
        eval { 
28
 
                require IO::String; 
29
 
                require LWP::UserAgent;
30
 
                require HTTP::Request::Common;
31
 
                require Bio::DB::GenBank;
32
 
        };
33
 
        if( $@ ) {
34
 
                print STDERR "IO::String, LWP::UserAgent or HTTP::Request not installed - skipping DB tests...\n";
35
 
                $skipdbtests = 1;
36
 
        } else {
37
 
                $skipdbtests = 0;
38
 
        }
39
 
        eval {
40
 
                require URI::Escape;
41
 
        };
42
 
        if( $@ ) {
43
 
                print STDERR "URI::Escape not installed, so Bio::SeqFeature::Annotated not usable - skipping all tests...\n";
44
 
                $skip_all = 1;
45
 
        }
46
 
}
47
 
 
48
 
END {
49
 
        foreach ( $Test::ntest..$NUMTESTS) {
50
 
                skip('Skipping tests which need the Bio::DB::GenBank module',1);
51
 
        }
52
 
}
53
 
 
54
 
exit(0) if $skip_all;
55
 
 
56
 
use Bio::Seq;
57
 
use Bio::SeqIO;
58
 
use Bio::SeqFeature::Generic;
59
 
use Bio::SeqFeature::FeaturePair;
60
 
use Bio::SeqFeature::SimilarityPair;
61
 
use Bio::SeqFeature::Computation;
62
 
require Bio::SeqFeature::Annotated;
63
 
use Bio::SeqFeature::Gene::Transcript;
64
 
use Bio::SeqFeature::Gene::UTR;
65
 
use Bio::SeqFeature::Gene::Exon;
66
 
use Bio::SeqFeature::Gene::Poly_A_site;
67
 
use Bio::SeqFeature::Gene::GeneStructure;
68
 
 
69
 
use Bio::Location::Fuzzy;
70
 
use Env qw(BIOPERLDEUG); # for importing bioperldebug var
71
 
ok(1);
72
 
 
73
 
# predeclare variables for strict
74
 
my ($feat,$str,$feat2,$pair,$comp_obj1,$comp_obj2,@sft); 
75
 
 
76
 
 
77
 
my $verbose = 0;
78
 
 
79
 
$feat = new Bio::SeqFeature::Generic ( -start => 40,
80
 
                                       -end => 80,
81
 
                                       -strand => 1,
82
 
                                       -primary => 'exon',
83
 
                                       -source  => 'internal',
84
 
                                       -tag => {
85
 
                                           silly => 20,
86
 
                                           new => 1
87
 
                                           }
88
 
                                       );
89
 
 
90
 
ok $feat->start, 40;
91
 
 
92
 
ok $feat->end, 80;
93
 
 
94
 
ok $feat->primary_tag, 'exon';
95
 
 
96
 
ok $feat->source_tag, 'internal';
97
 
 
98
 
$str = $feat->gff_string() || ""; # placate -w
99
 
 
100
 
# we need to figure out the correct mapping of this stuff
101
 
# soon
102
 
 
103
 
#if( $str ne "SEQ\tinternal\texon\t40\t80\t1\t.\t." ) {
104
 
#    print "not ok 3\n";
105
 
#} else {
106
 
#    print "ok 3\n";
107
 
#}
108
 
 
109
 
ok(1);
110
 
 
111
 
$pair = new Bio::SeqFeature::FeaturePair();
112
 
 
113
 
ok defined $pair;
114
 
 
115
 
$feat2 = new Bio::SeqFeature::Generic ( -start => 400,
116
 
                                       -end => 440,
117
 
                                       -strand => 1,
118
 
                                       -primary => 'other',
119
 
                                       -source  => 'program_a',
120
 
                                       -tag => {
121
 
                                           silly => 20,
122
 
                                           new => 1
123
 
                                           }
124
 
                                       );
125
 
 
126
 
ok defined $feat2;
127
 
$pair->feature1($feat);
128
 
$pair->feature2($feat2);
129
 
 
130
 
ok $pair->feature1, $feat;
131
 
ok $pair->feature2, $feat2;
132
 
ok $pair->start, 40;
133
 
ok $pair->end, 80;
134
 
ok $pair->primary_tag, 'exon';
135
 
ok $pair->source_tag, 'internal';
136
 
ok $pair->hstart, 400;
137
 
ok $pair->hend, 440;
138
 
ok $pair->hprimary_tag, 'other' ;
139
 
ok $pair->hsource_tag, 'program_a';
140
 
 
141
 
$pair->invert;
142
 
ok $pair->end, 440;
143
 
 
144
 
# Test attaching a SeqFeature::Generic to a Bio::Seq
145
 
{
146
 
    # Make the parent sequence object
147
 
    my $seq = Bio::Seq->new(
148
 
        '-seq'          => 'aaaaggggtttt',
149
 
        '-display_id'   => 'test',
150
 
        '-alphabet'      => 'dna',
151
 
        );
152
 
    
153
 
    # Make a SeqFeature
154
 
    my $sf1 = Bio::SeqFeature::Generic->new(
155
 
        '-start'    => 4,
156
 
        '-end'      => 9,
157
 
        '-strand'   => 1,
158
 
        );
159
 
    
160
 
    # Add the SeqFeature to the parent
161
 
    ok ($seq->add_SeqFeature($sf1));
162
 
    
163
 
    # Test that it gives the correct sequence
164
 
    my $sf_seq1 = $sf1->seq->seq;
165
 
    ok $sf_seq1, 'aggggt';
166
 
    ok $sf1->end,9;
167
 
    ok $sf1->start,4;
168
 
 
169
 
    # Make a second seqfeature on the opposite strand
170
 
    my $sf2 = Bio::SeqFeature::Generic->new(
171
 
        '-start'    => 4,
172
 
        '-end'      => 9,
173
 
        '-strand'   => -1,
174
 
        );
175
 
    
176
 
    # This time add the PrimarySeq to the seqfeature
177
 
    # before adding it to the parent
178
 
    ok ($sf2->attach_seq($seq->primary_seq));
179
 
    $seq->add_SeqFeature($sf2);
180
 
    
181
 
    # Test again that we have the correct sequence
182
 
    my $sf_seq2 = $sf2->seq->seq;
183
 
    ok $sf_seq2, 'acccct';
184
 
}
185
 
 
186
 
#Do some tests for computation.pm
187
 
 
188
 
ok defined ( $comp_obj1 = Bio::SeqFeature::Computation->new('-start' => 1,
189
 
                                                            '-end'   => 10) );
190
 
ok ( $comp_obj1->computation_id(332),332 );
191
 
ok ( $comp_obj1->add_score_value('P', 33) );
192
 
{
193
 
    $comp_obj2 = Bio::SeqFeature::Computation->new('-start' => 2,
194
 
                                                   '-end'   => 10);
195
 
    ok ($comp_obj1->add_sub_SeqFeature($comp_obj2, 'exon') );
196
 
    ok (@sft = $comp_obj1->all_sub_SeqFeature_types() );
197
 
    ok ($sft[0], 'exon');
198
 
}
199
 
 
200
 
ok defined ( $comp_obj1 = new Bio::SeqFeature::Computation 
201
 
             (
202
 
              -start => 10, -end => 100,
203
 
              -strand => -1, -primary => 'repeat',
204
 
              -program_name => 'GeneMark',
205
 
              -program_date => '12-5-2000',
206
 
              -program_version => 'x.y',
207
 
              -database_name => 'Arabidopsis',
208
 
              -database_date => '12-dec-2000',
209
 
              -computation_id => 2231,
210
 
              -score    => { no_score => 334 } )
211
 
             );
212
 
 
213
 
ok ( $comp_obj1->computation_id, 2231 );
214
 
ok ( $comp_obj1->add_score_value('P', 33) );
215
 
ok ( ($comp_obj1->each_score_value('no_score'))[0], '334');
216
 
 
217
 
# some tests for bug #947
218
 
 
219
 
my $sfeat = new Bio::SeqFeature::Generic(-primary => 'test');
220
 
 
221
 
$sfeat->add_sub_SeqFeature(new Bio::SeqFeature::Generic(-start => 2,
222
 
                                                        -end   => 4,
223
 
                                                        -primary => 'sub1'),
224
 
                           'EXPAND');
225
 
 
226
 
$sfeat->add_sub_SeqFeature(new Bio::SeqFeature::Generic(-start => 6,
227
 
                                                        -end   => 8,
228
 
                                                        -primary => 'sub2'),
229
 
                           'EXPAND');
230
 
 
231
 
ok($sfeat->start, 2);
232
 
ok($sfeat->end, 8);
233
 
 
234
 
# some tests to see if we can set a feature to start at 0
235
 
$sfeat = new Bio::SeqFeature::Generic(-start => 0, -end => 0 );
236
 
 
237
 
ok(defined $sfeat->start);
238
 
ok($sfeat->start,0);
239
 
ok(defined $sfeat->end);
240
 
ok($sfeat->end,0);
241
 
 
242
 
 
243
 
# tests for Bio::SeqFeature::Gene::* objects
244
 
# using information from acc: AB077698 as a guide
245
 
 
246
 
ok my $seqio = new Bio::SeqIO(-format => 'genbank',
247
 
                         -file   => Bio::Root::IO->catfile("t","data","AB077698.gb"));
248
 
ok my $geneseq = $seqio->next_seq();
249
 
 
250
 
ok my $gene = new Bio::SeqFeature::Gene::GeneStructure(-primary => 'gene',
251
 
                                                    -start   => 1,
252
 
                                                    -end     => 2701,
253
 
                                                    -strand  => 1);
254
 
 
255
 
ok my $transcript = new Bio::SeqFeature::Gene::Transcript(-primary => 'CDS',
256
 
                                                       -start   => 80,
257
 
                                                       -end     => 1144,
258
 
                                                       -tag     => { 
259
 
                                                           'gene' => "CHCR",
260
 
                                                           'note' => "Cys3His CCG1-Required Encoded on BAC clone RP5-842K24 (AL050310) The human CHCR (Cys3His CCG1-Required) protein is highly related to EXP/MBNL (Y13829, NM_021038, AF401998) and MBLL (NM_005757,AF061261), which together comprise the human Muscleblind family",
261
 
                                                           'codon_start' => 1,
262
 
                                                           'protein_id'  => 'BAB85648.1',
263
 
                                                       });
264
 
 
265
 
ok my $poly_A_site1 = new Bio::SeqFeature::Gene::Poly_A_site
266
 
    (-primary => 'polyA_site',
267
 
     -start => 2660,
268
 
     -end   => 2660,
269
 
     -tag   => { 
270
 
         'note' => "Encoded on BAC clone RP5-842K24 (AL050310); PolyA_site#2 used by CHCR EST clone DKFZp434G2222 (AL133625)"
271
 
         });
272
 
 
273
 
ok my $poly_A_site2 = new Bio::SeqFeature::Gene::Poly_A_site
274
 
    (-primary => 'polyA_site',
275
 
     -start => 1606,
276
 
     -end   => 1606,
277
 
     -tag   => { 
278
 
         'note' => "Encoded on BAC clone RP5-842K24 (AL050310); PolyA_site#1 used by CHCR EST clone PLACE1010202 (AK002178)",
279
 
     });
280
 
 
281
 
ok my $fiveprimeUTR = new Bio::SeqFeature::Gene::UTR(-primary => "utr5prime");
282
 
ok $fiveprimeUTR->location(new Bio::Location::Fuzzy(-start => "<1",
283
 
                                                 -end   => 79));
284
 
ok my $threeprimeUTR = new Bio::SeqFeature::Gene::UTR(-primary => "utr3prime",
285
 
                                                   -start   => 1145,
286
 
                                                   -end     => 2659);
287
 
 
288
 
# Did a quick est2genome against genomic DNA (this is on Chr X) to
289
 
# get the gene structure by hand since it is not in the file
290
 
# --Jason
291
 
 
292
 
ok my $exon1 = new Bio::SeqFeature::Gene::Exon(-primary => 'exon',
293
 
                                               -start => 80,
294
 
                                               -end   => 177);
295
 
ok $geneseq->add_SeqFeature($exon1);
296
 
 
297
 
ok $geneseq->add_SeqFeature($fiveprimeUTR);
298
 
ok $geneseq->add_SeqFeature($threeprimeUTR);
299
 
ok $geneseq->add_SeqFeature($poly_A_site1);
300
 
ok $geneseq->add_SeqFeature($poly_A_site2);
301
 
 
302
 
ok $transcript->add_utr($fiveprimeUTR, 'utr5prime');
303
 
ok $transcript->add_utr($threeprimeUTR, 'utr3prime');
304
 
 
305
 
ok $transcript->add_exon($exon1);
306
 
 
307
 
# API only supports a single poly-A site per transcript at this point 
308
 
$transcript->poly_A_site($poly_A_site2);
309
 
$geneseq->add_SeqFeature($transcript);
310
 
$gene->add_transcript($transcript);
311
 
$geneseq->add_SeqFeature($gene);
312
 
 
313
 
my ($t) = $gene->transcripts(); # get 1st transcript
314
 
ok(defined $t); 
315
 
ok($t->mrna->length, 1693);
316
 
ok($gene->utrs, 2);
317
 
 
318
 
 
319
 
 
320
 
# Test for bug when Locations are not created explicitly
321
 
 
322
 
my $feat1 = new Bio::SeqFeature::Generic(-start => 1,
323
 
                                         -end   => 15,
324
 
                                         -strand=> 1);
325
 
 
326
 
$feat2 = new Bio::SeqFeature::Generic(-start => 10,
327
 
                                         -end   => 25,
328
 
                                         -strand=> 1);
329
 
 
330
 
my $overlap = $feat1->location->union($feat2->location);
331
 
ok($overlap->start, 1);
332
 
ok($overlap->end,   25);
333
 
 
334
 
my $intersect = $feat1->location->intersection($feat2->location);
335
 
ok($intersect->start, 10);
336
 
ok($intersect->end,   15);
337
 
 
338
 
 
339
 
# now let's test spliced_seq
340
 
 
341
 
ok  $seqio = new Bio::SeqIO(-file => Bio::Root::IO->catfile
342
 
                            (qw(t data AY095303S1.gbk)),
343
 
                            -format  => 'genbank');
344
 
 
345
 
ok $geneseq = $seqio->next_seq();
346
 
my ($CDS) = grep { $_->primary_tag eq 'CDS' } $geneseq->get_SeqFeatures;
347
 
my $db;
348
 
 
349
 
unless( $skipdbtests ) {
350
 
 $db = new Bio::DB::GenBank(-verbose=> $ENV{BIOPERLDEBUG});
351
 
 $CDS->verbose(-1);
352
 
 my $cdsseq = $CDS->spliced_seq(-db => $db,-nosort => 1);
353
 
 
354
 
 ok($cdsseq->subseq(1,60, 'ATGCAGCCATACGCTTCCGTGAGCGGGCGATGTCTATC'.
355
 
                    'TAGACCAGATGCATTGCATGTGATACCGTTTGGGCGAC'));
356
 
 ok($cdsseq->translate->subseq(1,100), 'MQPYASVSGRCLSRPDALHVIPFGRP'.
357
 
    'LQAIAGRRFVRCFAKGGQPGDKKKLNVTDKLRLGNTPPTLDVLKAPRPTDAPSAIDDAPSTSGLGLGGGVASPR');
358
 
} else {
359
 
    skip('Cannot test for remote loc with spliced_seq w/o LWP installed',1);
360
 
    skip('Cannot test for remote loc with spliced_seq w/o LWP installed',1);
361
 
 
362
 
}
363
 
ok  $seqio = new Bio::SeqIO(-file => Bio::Root::IO->catfile
364
 
                            (qw(t data AF032047.gbk)),
365
 
                            -format  => 'genbank');
366
 
ok $geneseq = $seqio->next_seq();
367
 
($CDS) = grep { $_->primary_tag eq 'CDS' } $geneseq->get_SeqFeatures;
368
 
unless ($skipdbtests ) {
369
 
    my $cdsseq = $CDS->spliced_seq( -db => $db, -nosort => 1);
370
 
    ok($cdsseq->subseq(1,60, 'ATGGCTCGCTTCGTGGTGGTAGCCCTGCTCGCGCTACTCTCTCTG'.
371
 
                       'TCTGGCCTGGAGGCTATCCAGCATG'));
372
 
    ok($cdsseq->translate->seq, 'MARFVVVALLALLSLSGLEAIQHAPKIQVYSRHPAENGKPNFL'.
373
 
       'NCYVSGFHPSDIEVDLLKNGKKIEKVEHSDLSFSKDWSFYLLYYTEFTPNEKDEYACRVSHVTFPTPKTVKWDRTM*');
374
 
} else {
375
 
    skip('Cannot test for remote loc with spliced_seq w/o LWP installed',1);
376
 
    skip('Cannot test for remote loc with spliced_seq w/o LWP installed',1);
377
 
}
378
 
 
379
 
 
380
 
# trans-spliced 
381
 
 
382
 
ok( $seqio = Bio::SeqIO->new(-format => 'genbank',
383
 
                                                                          -file   => 
384
 
                            Bio::Root::IO->catfile(qw(t data NC_001284.gbk))));
385
 
my $genome = $seqio->next_seq;
386
 
 
387
 
foreach my $cds (grep { $_->primary_tag eq 'CDS' } $genome->get_SeqFeatures) {
388
 
   my $spliced = $cds->spliced_seq(-nosort => 1)->translate->seq;
389
 
   chop($spliced); # remove stop codon
390
 
   ok($spliced,($cds->get_tag_values('translation'))[0],'spliced seq translation matches expected');
391
 
}
392
 
 
393
 
my $sfa = Bio::SeqFeature::Annotated->new(-start => 1,
394
 
                                          -end => 5,
395
 
                                          -strand => "+",
396
 
                                          -frame => 2,
397
 
                                          -phase => 2,
398
 
                                          -score => 12,
399
 
                                          -display_name => 'test.annot',
400
 
                                          -seq_id => 'test.displayname' );
401
 
 
402
 
ok (defined $sfa);
403
 
my $loc = $sfa->location;
404
 
ok $loc->isa("Bio::Location::Simple");
405
 
 
406
 
ok $sfa->display_name eq 'test.annot';
407
 
 
408
 
 
409
 
#test bsfa::from_feature
410
 
{
411
 
  my $sfg = Bio::SeqFeature::Generic->new ( -start => 400,
412
 
                                            -end => 440,
413
 
                                            -strand => 1,
414
 
                                            -primary => 'nucleotide_motif',
415
 
                                            -source  => 'program_a',
416
 
                                            -tag => {
417
 
                                                     silly => 20,
418
 
                                                     new => 1
419
 
                                                    }
420
 
                                          );
421
 
        my $sfa2;
422
 
        eval {
423
 
                $sfa2 = Bio::SeqFeature::Annotated->new(-feature => $sfg);
424
 
        };
425
 
        if ($@) {
426
 
                foreach ( $Test::ntest..$NUMTESTS ) { skip('Could not get sofa definitions from external server',1); }
427
 
                exit(0);
428
 
        }
429
 
  ok $sfa2->type->name,'nucleotide_motif';
430
 
  ok $sfa2->primary_tag, 'nucleotide_motif';
431
 
  ok $sfa2->source,'program_a';
432
 
  ok $sfa2->strand,1;
433
 
  ok $sfa2->start,400;
434
 
  ok $sfa2->end,440;
435
 
  ok $sfa2->get_Annotations('silly')->value,20;
436
 
  ok $sfa2->get_Annotations('new')->value,1;
437
 
 
438
 
  my $sfa3 = Bio::SeqFeature::Annotated->new( -start => 1,
439
 
                                              -end => 5,
440
 
                                              -strand => "+",
441
 
                                              -frame => 2,
442
 
                                              -phase => 2,
443
 
                                              -score => 12,
444
 
                                              -display_name => 'test.annot',
445
 
                                              -seq_id => 'test.displayname' );
446
 
  eval {
447
 
        $sfa3->from_feature($sfg);
448
 
  };
449
 
  if ($@) {
450
 
        foreach ( $Test::ntest..$NUMTESTS ) { skip('Could not get sofa definitions from external server',1); }
451
 
        exit(0);
452
 
  }
453
 
  ok $sfa3->type->name,'nucleotide_motif';
454
 
  ok $sfa3->primary_tag, 'nucleotide_motif';
455
 
  ok $sfa3->source,'program_a';
456
 
  ok $sfa3->strand,1;
457
 
  ok $sfa3->start,400;
458
 
  ok $sfa3->end,440;
459
 
  ok $sfa3->get_Annotations('silly')->value,20;
460
 
  ok $sfa3->get_Annotations('new')->value,1;
461
 
}