1
# -*-Perl-*- Test Harness script for Bioperl
2
# $Id: Unflattener2.t 15112 2008-12-08 18:12:38Z sendu $
10
test_begin(-tests => 12);
13
use_ok('Bio::SeqFeature::Tools::Unflattener');
16
my $verbosity = test_debug();
19
my $unflattener = Bio::SeqFeature::Tools::Unflattener->new;
20
$unflattener->verbose($verbosity);
24
# this is an arabidopsise gbk record. it has no mRNA features.
25
# it has explicit exon/intron records
27
my @path = ("ATF14F8.gbk");
30
is ($seq->accession_number, 'AL391144');
31
my @topsfs = $seq->get_SeqFeatures;
32
my @cdss = grep {$_->primary_tag eq 'CDS'} @topsfs;
33
my $n = scalar(@topsfs);
34
if( $verbosity > 0 ) {
35
warn sprintf "TOP:%d\n", scalar(@topsfs);
39
@sfs = $unflattener->unflatten_seq(-seq=>$seq,
42
@sfs = $seq->get_SeqFeatures;
43
if( $verbosity > 0 ) {
44
warn "\n\nPOST PROCESSING:\n";
46
warn sprintf "PROCESSED/TOP:%d\n", scalar(@sfs);
49
my @allsfs = $seq->get_all_SeqFeatures;
51
my @mrnas = grep {$_->primary_tag eq 'mRNA'} @allsfs;
52
if( $verbosity > 0 ) {
53
warn sprintf "ALL:%d\n", scalar(@allsfs);
54
warn sprintf "mRNAs:%d\n", scalar(@mrnas);
57
# relationship between mRNA and CDS should be one-one
63
# this is a record from FlyBase
64
# it has mRNA features, and explicit exon/intron records
66
my @path = ("AnnIX-v003.gbk");
69
my @topsfs = $seq->get_SeqFeatures;
70
if( $verbosity > 0 ) {
71
warn sprintf "TOP:%d\n", scalar(@topsfs);
75
@sfs = $unflattener->unflatten_seq(-seq=>$seq,
78
@sfs = $seq->get_SeqFeatures;
79
if( $verbosity > 0 ) {
80
warn "\n\nPOST PROCESSING:\n";
82
warn sprintf "PROCESSED/TOP:%d\n", scalar(@sfs);
85
my @exons = grep {$_->primary_tag eq 'exon'} $seq->get_all_SeqFeatures;
86
is scalar(@exons), 6; # total number of exons per splice
87
my %numberh = map {$_->get_tag_values("number") => 1} @exons;
88
my @numbers = keys %numberh;
89
if( $verbosity > 0 ) {
90
warn sprintf "DISTINCT EXONS: %d [@numbers]\n", scalar(@numbers);
92
is scalar(@numbers), 6; # distinct exons
97
# example of a BAD genbank entry
99
my @path = ("dmel_2Lchunk.gb");
100
$seq = getseq(@path);
102
my @topsfs = $seq->get_SeqFeatures;
103
if( $verbosity > 0 ) {
104
warn sprintf "TOP:%d\n", scalar(@topsfs);
109
# we EXPECT problems with this erroneous record
110
$unflattener->error_threshold(2);
111
@sfs = $unflattener->unflatten_seq(-seq=>$seq,
114
my @probs = $unflattener->get_problems;
115
$unflattener->report_problems(\*STDERR) if $verbosity > 0;
116
$unflattener->clear_problems;
117
@sfs = $seq->get_SeqFeatures;
118
if( $verbosity > 0 ) {
119
warn "\n\nPOST PROCESSING:\n";
121
warn sprintf "PROCESSED/TOP:%d\n", scalar(@sfs);
124
my @exons = grep {$_->primary_tag eq 'exon'} $seq->get_all_SeqFeatures;
125
is scalar(@exons), 2; # total number of exons per splice
126
if( $verbosity > 0 ) {
127
warn sprintf "PROBLEMS ENCOUNTERED: %d (EXPECTED: 6)\n", scalar(@probs);
129
is scalar(@probs), 6;
135
_write_hier(0, @sfs);
141
foreach my $sf (@sfs) {
143
if ($sf->has_tag('gene')) {
144
($label) = $sf->get_tag_values('gene');
146
if ($sf->has_tag('product')) {
147
($label) = $sf->get_tag_values('product');
149
if ($sf->has_tag('number')) {
150
$label = join("; ", $sf->get_tag_values('number'));
152
printf "%s%s $label\n", ' ' x $indent, $sf->primary_tag;
153
my @sub_sfs = $sf->sub_SeqFeature;
154
_write_hier($indent+1, @sub_sfs);
161
Bio::SeqIO->new('-file'=> test_input_file(@path),
162
'-format' => 'GenBank');
163
$seqio->verbose($verbosity);
165
my $seq = $seqio->next_seq();