2
## Bioperl Test Harness Script for Modules
4
# Before `make install' is performed this script should be runnable with
5
# `make test'. After `make install' it should work as `perl test.t'
10
use constant TEST_COUNT => 52;
11
use constant GFF_FILE => Bio::Root::IO->catfile('t','data',
12
'seqfeaturedb','test.gff3');
15
# to handle systems with no installed Test module
16
# we include the t dir (where a copy of Test.pm is located)
18
eval { require Test; };
23
plan test => TEST_COUNT;
24
$ENV{ORACLE_HOME} ||= '/home/oracle/Home';
27
use lib "$Bin/..","$Bin/../blib/lib";
28
use Bio::DB::SeqFeature::Store;
29
use Bio::DB::SeqFeature::Store::GFF3Loader;
33
my $explanation = shift;
48
my (@f,$f,@s,$s,$seq1,$seq2);
51
@args = (-adaptor => 'memory') unless @args;
53
my $db = eval { Bio::DB::SeqFeature::Store->new(@args) };
56
fail(TEST_COUNT - 1) unless $db;
58
my $loader = eval { Bio::DB::SeqFeature::Store::GFF3Loader->new(-store=>$db) };
61
fail(TEST_COUNT - 2) unless $loader;
64
ok($loader->load(GFF_FILE));
66
# there should be one gene named 'abc-1'
67
@f = $db->get_features_by_name('abc-1');
71
# there should be three subfeatures of type "exon" and three of type "CDS"
72
ok($f->get_SeqFeatures('exon')==3);
73
ok($f->get_SeqFeatures('CDS')==3);
75
# the sequence of feature abc-1 should match the sequence of the first exon at the beginning
77
$seq2 = (sort {$a->start<=>$b->start} $f->get_SeqFeatures('exon'))[0]->seq->seq;
78
ok(substr($seq1,0,length $seq2) eq $seq2);
80
# sequence lengths should match
81
ok(length $seq1 == $f->length);
83
# if we pull out abc-1 again we should get the same object
84
($s) = $db->get_features_by_name('abc-1');
87
# we should get two objects when we ask for abc-1 using get_features_by_alias
88
# this also depends on selective subfeature indexing
89
@f = $db->get_features_by_alias('abc-1');
92
# the two features should be different
95
# test that targets are working
96
($f) = $db->get_features_by_name('match1');
100
ok($s->seq_id eq 'CEESC13F');
101
$seq1 = $s->seq->seq;
102
ok(substr($seq1,0,10) eq 'ttgcgttcgg');
104
# can we fetch subfeatures?
105
# gene3.a has the Index=1 attribute, so we should fetch it
106
($f) = $db->get_features_by_name('gene3.a');
109
# gene 3.b doesn't have an index, so we shouldn't get it
110
($f) = $db->get_features_by_name('gene3.b');
113
# test three-tiered genes
114
($f) = $db->get_features_by_name('gene3');
116
my @transcripts = $f->get_SeqFeatures;
117
ok(@transcripts == 2);
118
ok($transcripts[0]->method eq 'mRNA');
119
ok($transcripts[0]->source eq 'confirmed');
121
# test that exon #2 is shared between the two transcripts
122
my @exons1 = $transcripts[0]->get_SeqFeatures('CDS');
124
my @exons2 = $transcripts[1]->get_SeqFeatures('CDS');
125
my ($shared1) = grep {$_->display_name||'' eq 'shared_exon'} @exons1;
126
my ($shared2) = grep {$_->display_name||'' eq 'shared_exon'} @exons2;
127
ok($shared1 && $shared2);
128
ok($shared1 eq $shared2);
129
ok($shared1->primary_id eq $shared2->primary_id);
132
ok($shared1->phase == 0);
133
ok($shared1->strand eq +1);
134
ok(($f->attributes('expressed'))[0] eq 'yes');
137
my ($gene3a) = grep { $_->display_name eq 'gene3.a'} @transcripts;
138
my ($gene3b) = grep { $_->display_name eq 'gene3.b'} @transcripts;
141
ok($gene3a->Is_expressed);
142
ok(!$gene3b->Is_expressed);
144
# the representation of the 3'-UTR in the two transcripts a and b is
145
# different (not recommended but supported by the GFF3 spec). In the
146
# first case, there are two 3'UTRs existing as independent
147
# features. In the second, there is one UTR with a split location.
148
ok($gene3a->Three_prime_UTR == 2);
149
ok($gene3b->Three_prime_UTR == 1);
150
my ($utr) = $gene3b->Three_prime_UTR;
151
ok($utr->segments == 2);
152
my $location = $utr->location;
153
ok($location->isa('Bio::Location::Split'));
154
ok($location->sub_Location == 2);
156
# ok, test that queries are working properly.
157
# find all features with the attribute "expressed"
158
@f = $db->get_features_by_attribute({expressed=>'yes'});
161
# find all top-level features on Contig3 -- there should be two
162
@f = $db->get_features_by_location(-seq_id=>'Contig3');
165
# find all top-level features on Contig3 of type 'assembly_component'
166
@f = $db->features(-seq_id=>'Contig3',-type=>'assembly_component');
171
my $feature_count = @f;
172
ok($feature_count > 0);
174
my $i = $db->get_seq_stream;
178
while ($i->next_seq) { $count++ }
179
ok($feature_count == $count);
181
# regression test on bug in which get_SeqFeatures('type') did not filter inline segments
182
@f = $db->get_features_by_name('agt830.3');
183
ok(@f && !$f[0]->get_SeqFeatures('exon'));
184
ok(@f && $f[0]->get_SeqFeatures('EST_match'));
186
# regression test on bug in which the load_id disappeared
187
ok(@f && $f[0]->load_id eq 'Match2');
189
# regress on proper handling of multiple ID features
190
my ($alignment) = $db->get_features_by_name('agt830.5');
192
ok($alignment->target->start == 1 && $alignment->target->end == 654);
193
ok($alignment->get_SeqFeatures == 2);
194
my $gff3 = $alignment->gff3_string(1);
195
my @lines = split "\n",$gff3;
197
ok ("@lines" !~ /Parent=/s);
198
ok ("@lines" =~ /ID=/s);