16
15
use_ok('Bio::Location::Split');
19
my $seq = Bio::PrimarySeq->new(
20
ok my $seq = Bio::PrimarySeq->new(), 'Bare object';
21
isa_ok $seq, 'Bio::PrimarySeqI';
25
is $seq->alphabet, undef;
26
is $seq->is_circular, undef;
30
ok $seq = Bio::PrimarySeq->new( -seq => '', -nowarnonempty => 1);
33
is $seq->alphabet, undef;
37
ok $seq = Bio::PrimarySeq->new(
20
38
'-seq' => 'TTGGTGGCGTCAACT',
21
39
'-display_id' => 'new-id',
22
40
'-alphabet' => 'dna',
40
57
# make sure all methods are implemented
41
58
is $seq->authority("bioperl.org"), "bioperl.org";
42
is $seq->namespace("t"), "t";
59
is $seq->authority, "bioperl.org";
60
is $seq->namespace("t"), "t";
43
61
is $seq->namespace, "t";
44
62
is $seq->version(0), 0;
45
is $seq->lsid_string(), "bioperl.org:t:X677667";
46
is $seq->namespace_string(), "t:X677667.0";
64
is $seq->lsid_string(), "bioperl.org:t:X677667";
65
is $seq->namespace_string, "t:X677667.0";
66
is $seq->version(47), 47;
48
67
is $seq->version, 47;
49
is $seq->namespace_string(), "t:X677667.47";
50
is $seq->description(), 'Sample Bio::Seq object';
51
is $seq->display_name(), "new-id";
68
is $seq->namespace_string, "t:X677667.47";
69
is $seq->description, 'Sample Bio::Seq object';
70
is $seq->display_name, "new-id";
74
is $seq->subseq(2, 5), 'TGGT';
76
is $seq->subseq( -start => 1, -end => 15), 'TTGGTGGCGTCAACT';
53
78
my $location = Bio::Location::Simple->new(
58
is( $seq->subseq($location), 'ACCA' );
83
is $seq->subseq($location), 'ACCA';
60
85
my $splitlocation = Bio::Location::Split->new();
61
86
$splitlocation->add_sub_Location(
85
is( $seq->subseq($fuzzy), 'GGTGGC' );
110
is $seq->subseq($fuzzy), 'GGTGGC';
113
ok my $seq = Bio::PrimarySeq->new( -seq => 'TT-GTGGCGTCAACT' );
114
is $seq->subseq(2, 5, 'nogap'), 'TGT';
115
is $seq->subseq( -start => 2, -end => 5, -nogap => 1 ), 'TGT';
116
my $location = Bio::Location::Simple->new(
121
is $seq->subseq( $location, -nogap => 1), 'TGT';
123
is $seq->subseq(-start=>2, -end=>5, -replace_with=>'aa'), 'T-GT';
124
is $seq->seq, 'TaaGGCGTCAACT';
126
throws_ok { $seq->subseq(-start=>2, -end=>5, -replace_with=>'?!'); } qr/.+/;
130
ok my $seq = Bio::PrimarySeq->new( -seq => 'AACCGGTT', -is_circular => 1 );
131
is $seq->subseq( -start => 7, -end => 10 ), 'TTAA';
87
135
my $trunc = $seq->trunc( 1, 4 );
88
136
isa_ok $trunc, 'Bio::PrimarySeqI';
89
137
is $trunc->seq(), 'TTGG' or diag( "Expecting TTGG. Got " . $trunc->seq() );
91
139
$trunc = $seq->trunc($splitlocation);
92
isa_ok( $trunc, 'Bio::PrimarySeqI' );
93
is( $trunc->seq(), 'TTGGTGACGC' );
140
isa_ok $trunc, 'Bio::PrimarySeqI' ;
141
is $trunc->seq(), 'TTGGTGACGC';
95
143
$trunc = $seq->trunc($fuzzy);
96
isa_ok( $trunc, 'Bio::PrimarySeqI' );
97
is( $trunc->seq(), 'GGTGGC' );
144
isa_ok $trunc, 'Bio::PrimarySeqI';
145
is $trunc->seq(), 'GGTGGC';
99
147
my $rev = $seq->revcom();
100
isa_ok( $rev, 'Bio::PrimarySeqI' );
148
isa_ok $rev, 'Bio::PrimarySeqI';
102
150
is $rev->seq(), 'AGTTGACGCCACCAA'
103
151
or diag( 'revcom() failed, was ' . $rev->seq() );
105
153
is $rev->display_id, 'new-id';
106
is( $rev->alphabet(), 'dna', 'alphabet copied through revcom' );
154
is $rev->display_name(), 'new-id';
155
is $rev->accession_number(), 'X677667';
156
is $rev->alphabet, 'dna';
157
is $rev->description, 'Sample Bio::Seq object';
109
'all attributes of primaryseqs are not currently copied through revcoms';
110
is( $rev->namespace, 't', 'namespace copied through revcom' );
111
is( $rev->namespace_string(),
112
"t:X677667.47", 'namespace_string copied through revcom' );
113
is( $rev->is_circular(), 0, 'is_circular copied through revcom' );
162
'all attributes of primaryseqs are not currently copied through revcom()';
163
# Probably also not copied through trunc(), transcribe() and rev_transcribe()
164
is $rev->is_circular(), 0, 'is_circular copied through revcom';
165
is $rev->version, 47, 'version copied through revcom';
166
is $rev->authority, 'bioperl.org', 'authority copied through revcom';
167
is $rev->namespace, 't', 'namespace copied through revcom';
168
is $rev->namespace_string(),
169
"t:X677667.47", 'namespace_string copied through revcom';
205
261
-id => 'aliasid',
206
262
-description => 'Alias desc'
208
is( $seq->description, 'Alias desc' );
209
is( $seq->display_id, 'aliasid' );
264
is $seq->description, 'Alias desc';
265
is $seq->display_id, 'aliasid';
214
is( $seq->alphabet, 'protein', 'Alphabet' );
216
is( $seq->alphabet, 'protein' );
218
is( $seq->alphabet, 'protein' );
220
is( $seq->alphabet, 'protein' );
222
is( $seq->alphabet, 'protein' );
224
is( $seq->alphabet, 'protein' );
226
is( $seq->alphabet, 'protein' );
228
is( $seq->alphabet, 'protein' );
230
is( $seq->alphabet, 'protein' );
232
is( $seq->alphabet, 'protein' );
234
is( $seq->alphabet, 'dna' );
236
is( $seq->alphabet, 'rna' );
238
is( $seq->alphabet, 'protein' );
240
is( $seq->alphabet, 'protein' );
241
$seq->seq('AAACTYAAAAGAATTGRCGG'); # valid degenerate DNA PCR primer sequence (90% ACGTN)
242
is( $seq->alphabet, 'dna');
243
$seq->seq('AAACTYAAAKGAATTGRCGG'); # another primer previously detected as protein (85% ACGTN)
244
is( $seq->alphabet, 'dna');
245
$seq->seq('YWACTYAAAKGARTTGRCGG'); # 70% ACGTN. Everything <= 70% is considered a protein
246
is( $seq->alphabet, 'protein');
247
$seq->seq('XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'); # Bug 2438
248
is( $seq->alphabet, 'protein', 'Bug 2438');
249
$seq->seq('CAGTCXXXXXXXXXXXXXXXXXXXXXXXXXXXCAGCG');
250
is( $seq->alphabet, 'protein' );
269
ok $seq->seq('actgx');
270
is $seq->alphabet, 'protein', 'Alphabet';
271
ok $seq->seq('actge');
272
is $seq->alphabet, 'protein';
273
ok $seq->seq('actgf');
274
is $seq->alphabet, 'protein';
275
ok $seq->seq('actgi');
276
is $seq->alphabet, 'protein';
277
ok $seq->seq('actgj');
278
is $seq->alphabet, 'protein';
279
ok $seq->seq('actgl');
280
is $seq->alphabet, 'protein';
281
ok $seq->seq('actgo');
282
is $seq->alphabet, 'protein';
283
ok $seq->seq('actgp');
284
is $seq->alphabet, 'protein';
285
ok $seq->seq('actgq');
286
is $seq->alphabet, 'protein';
287
ok $seq->seq('actgz');
288
is $seq->alphabet, 'protein';
289
ok $seq->seq('actgn');
290
is $seq->alphabet, 'dna';
291
ok $seq->seq('acugn');
292
is $seq->alphabet, 'rna';
293
ok $seq->seq('bdhkm');
294
is $seq->alphabet, 'protein';
295
ok $seq->seq('rsvwx');
296
is $seq->alphabet, 'protein';
297
ok $seq->seq('AAACTYAAAAGAATTGRCGG'); # valid degenerate DNA PCR primer sequence (90% ACGTN)
298
is $seq->alphabet, 'dna';
299
ok $seq->seq('AAACTYAAAKGAATTGRCGG'); # another primer previously detected as protein (85% ACGTN)
300
is $seq->alphabet, 'dna';
301
ok $seq->seq('YWACTYAAAKGARTTGRCGG'); # 70% ACGTNWSRM. Everything <= 70% is considered a protein
302
is $seq->alphabet, 'dna';
303
ok $seq->seq('XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'); # Bug 2438
304
is $seq->alphabet, 'protein', 'Bug 2438';
305
ok $seq->seq('CAGTCXXXXXXXXXXXXXXXXXXXXXXXXXXXCAGCG');
306
is $seq->alphabet, 'protein';
307
ok $seq->seq('WTGGGGCTATGAAAAAAAAAWTTKMGMMAAAAAWTTWTKRWMRATC'); # showed up on MAKER list
308
is $seq->alphabet, 'dna';
310
ok $seq->seq('actgn', 'protein'); # accept specified alphabet, no matter what
311
is $seq->alphabet, 'protein';
312
ok $seq->seq('bdhkm', 'dna');
313
is $seq->alphabet, 'dna';
269
332
is $aa->seq, 'MLAG';
272
# test internal PrimarySeqI _find_orfs function and translate( -orf => 'longest' )
336
ok $seq = Bio::PrimarySeq->new(), 'Length method';
338
ok $seq->length(123);
339
is $seq->length, 123;
341
ok $seq = Bio::PrimarySeq->new( -seq => 'ATGCTCTAAGCAGGGTAA' );
343
ok $seq->seq('ATGCTCTAAG');
345
is $seq->seq(undef), undef;
348
ok $seq = Bio::PrimarySeq->new( -length => 123 );
349
is $seq->length, 123;
351
ok $seq = Bio::PrimarySeq->new( -seq => 'ATGCTCTAAGCAGGGTAA' );
353
ok $seq->length( $seq->length ); # save memory by removing seq
354
is $seq->seq( undef ), undef; # ... but keeping a record of length
357
ok $seq->seq('ACGT');
358
is $seq->length, 4; # manually-specified length changed when sequence is changed
360
throws_ok { $seq->length(666); } qr/.+/; # Cannot lie about length
363
# Sequence validation method
364
is $seq->validate_seq( undef ), 1;
365
is $seq->validate_seq( '' ), 1;
366
is $seq->validate_seq( 'acgt' ), 1;
367
is $seq->validate_seq( 'ACGT' ), 1;
368
is $seq->validate_seq( 'XFRH' ), 1;
369
is $seq->validate_seq( '-~' ), 1; # gap symbols
370
is $seq->validate_seq( '-.*?=~' ), 1; # other valid symbols
371
is $seq->validate_seq( '0' ), 0;
372
is $seq->validate_seq( ' ' ), 0;
373
is $seq->validate_seq( 'AAAA$' ), 0;
374
is $seq->validate_seq( 'tt&t!' ), 0;
376
throws_ok { $seq->validate_seq('tt&t!', 1); } qr/.+/;
379
# Test direct option (no sequence validation)
380
throws_ok { $seq = Bio::PrimarySeq->new(-seq => 'A\T$AGQ+T'); } qr/.+/, 'Validation';
381
ok $seq = Bio::PrimarySeq->new( -seq => 'A\T$AGQ+T', -direct => 1 );
382
is $seq->seq, 'A\T$AGQ+T';
383
throws_ok { $seq->seq('NT@/') } qr/.+/;
385
# Set a sequence by reference
386
my $string = 'AAAACCCCGGGGTTTT';
387
ok $seq = Bio::PrimarySeq->new( -ref_to_seq => \$string );
388
is $seq->seq, 'AAAACCCCGGGGTTTT';
391
# Test internal PrimarySeqI _find_orfs function and translate( -orf => 'longest' )