1
# -*-Perl-*- Test Harness script for Bioperl
2
# $Id: SeqFeature.t 15416 2009-01-21 16:46:12Z cjfields $
5
use constant TEST_COUNT => 59;
11
test_begin(-tests => TEST_COUNT);
13
$ENV{ORACLE_HOME} ||= '/home/oracle/Home';
15
use_ok('Bio::DB::SeqFeature::Store');
16
use_ok('Bio::DB::SeqFeature::Store::GFF3Loader');
19
my $gff_file = test_input_file('seqfeaturedb','test.gff3');
21
my (@f,$f,@s,$s,$seq1,$seq2);
24
@args = (-adaptor => 'memory') unless @args;
27
my $db = eval { Bio::DB::SeqFeature::Store->new(@args) };
28
skip "DB load failed? Skipping all! $@", (TEST_COUNT - 2) if $@;
31
my $loader = eval { Bio::DB::SeqFeature::Store::GFF3Loader->new(-store=>$db) };
32
skip "GFF3 loader failed? Skipping all! $@", (TEST_COUNT - 3) if $@;
36
ok($loader->load($gff_file));
38
# there should be one gene named 'abc-1'
39
@f = $db->get_features_by_name('abc-1');
43
# there should be three subfeatures of type "exon" and three of type "CDS"
44
is($f->get_SeqFeatures('exon'),3);
45
is($f->get_SeqFeatures('CDS'),3);
47
# the sequence of feature abc-1 should match the sequence of the first exon at the beginning
49
$seq2 = (sort {$a->start<=>$b->start} $f->get_SeqFeatures('exon'))[0]->seq->seq;
50
is(substr($seq1,0,length $seq2),$seq2);
52
# sequence lengths should match
53
is(length $seq1, $f->length);
55
# if we pull out abc-1 again we should get the same object
56
($s) = $db->get_features_by_name('abc-1');
59
# we should get two objects when we ask for abc-1 using get_features_by_alias
60
# this also depends on selective subfeature indexing
61
@f = $db->get_features_by_alias('abc-1');
64
# the two features should be different
67
# test that targets are working
68
($f) = $db->get_features_by_name('match1');
72
ok($s->seq_id eq 'CEESC13F');
74
is(substr($seq1,0,10), 'ttgcgttcgg');
76
# can we fetch subfeatures?
77
# gene3.a has the Index=1 attribute, so we should fetch it
78
($f) = $db->get_features_by_name('gene3.a');
81
# gene 3.b doesn't have an index, so we shouldn't get it
82
($f) = $db->get_features_by_name('gene3.b');
85
# test three-tiered genes
86
($f) = $db->get_features_by_name('gene3');
88
my @transcripts = $f->get_SeqFeatures;
90
is($transcripts[0]->method,'mRNA');
91
is($transcripts[0]->source,'confirmed');
93
# test that exon #2 is shared between the two transcripts
94
my @exons1 = $transcripts[0]->get_SeqFeatures('CDS');
96
my @exons2 = $transcripts[1]->get_SeqFeatures('CDS');
97
my ($shared1) = grep {$_->display_name||'' eq 'shared_exon'} @exons1;
98
my ($shared2) = grep {$_->display_name||'' eq 'shared_exon'} @exons2;
99
ok($shared1 && $shared2);
100
is($shared1, $shared2);
101
is($shared1->primary_id, $shared2->primary_id);
104
is($shared1->phase, 0);
105
is($shared1->strand, +1);
106
is(($f->attributes('expressed'))[0], 'yes');
109
my ($gene3a) = grep { $_->display_name eq 'gene3.a'} @transcripts;
110
my ($gene3b) = grep { $_->display_name eq 'gene3.b'} @transcripts;
113
ok($gene3a->Is_expressed);
114
ok(!$gene3b->Is_expressed);
116
# the representation of the 3'-UTR in the two transcripts a and b is
117
# different (not recommended but supported by the GFF3 spec). In the
118
# first case, there are two 3'UTRs existing as independent
119
# features. In the second, there is one UTR with a split location.
120
is($gene3a->Three_prime_UTR, 2);
121
is($gene3b->Three_prime_UTR, 1);
122
my ($utr) = $gene3b->Three_prime_UTR;
123
is($utr->segments, 2);
124
my $location = $utr->location;
125
isa_ok($location, 'Bio::Location::Split');
126
is($location->sub_Location,2);
128
# ok, test that queries are working properly.
129
# find all features with the attribute "expressed"
130
@f = $db->get_features_by_attribute({expressed=>'yes'});
133
# find all top-level features on Contig3 -- there should be two
134
@f = $db->get_features_by_location(-seq_id=>'Contig3');
137
# find all top-level features on Contig3 of type 'assembly_component'
138
@f = $db->features(-seq_id=>'Contig3',-type=>'assembly_component');
143
my $feature_count = @f;
144
cmp_ok($feature_count, '>', 0);
146
my $i = $db->get_seq_stream;
150
while ($i->next_seq) { $count++ }
151
is($feature_count,$count);
153
# regression test on bug in which get_SeqFeatures('type') did not filter inline segments
154
@f = $db->get_features_by_name('agt830.3');
155
ok(@f && !$f[0]->get_SeqFeatures('exon'));
156
ok(@f && $f[0]->get_SeqFeatures('EST_match'));
158
# regression test on bug in which the load_id disappeared
159
is(@f && $f[0]->load_id, 'Match2');
161
# regress on proper handling of multiple ID features
162
my ($alignment) = $db->get_features_by_name('agt830.5');
164
is($alignment->target->start,1);
165
is($alignment->target->end, 654);
166
is($alignment->get_SeqFeatures, 2);
167
my $gff3 = $alignment->gff3_string(1);
168
my @lines = split "\n",$gff3;
170
ok("@lines" !~ /Parent=/s);
171
ok("@lines" =~ /ID=/s);
174
test_skip(-tests => 2, -excludes_os => 'mswin');
176
if (my $child = open(F,"-|")) { # parent reads from child
177
cmp_ok(scalar <F>,'>',0);
179
# The challenge is to make sure that the handle
180
# still works in the parent!
181
my @f = $db->features();
182
cmp_ok(scalar @f,'>',0);
186
my @f = $db->features();
187
my $feature_count = @f;
188
print $feature_count;
193
# test the -ignore_seqregion flag
195
# the original should have a single feature named 'Contig1'
196
my @f = $db->get_features_by_name('Contig1');
199
$db = eval { Bio::DB::SeqFeature::Store->new(@args) };
200
$loader = eval { Bio::DB::SeqFeature::Store::GFF3Loader->new(-store=>$db,
201
-ignore_seqregion=>1)
203
$loader->load($gff_file);
204
@f = $db->get_features_by_name('Contig1');