1
# -*-Perl-*- Test Harness script for Bioperl
10
test_begin(-tests => 194);
13
use_ok('Bio::Seq::Quality');
14
use_ok('Bio::PrimarySeq');
15
use_ok('Bio::LocatableSeq');
16
use_ok('Bio::Seq::SimulatedRead');
19
my $VERBOSE = test_debug();
21
my ($ref, $ref2, $ref3, $ref4, $ref5, $read, $errors);
23
$ref = Bio::Seq::Quality->new(-id => 'human_id',
24
-seq => 'TAAAAAAACCCC',
25
-qual => '1 2 3 4 5 6 7 8 9 10 11 12',
26
-trace => '0 5 10 15 20 25 30 35 40 45 50 55',
27
-desc => 'The human genome' );
29
$ref2 = Bio::Seq->new(-id => 'other_genome',
31
-desc => '"Secret" sequence');
33
$ref3 = Bio::PrimarySeq->new(-seq => 'ACACTGATCTAGCGTCGTGCTAGCTGACGTAGCTGAT' );
35
$ref4 = Bio::LocatableSeq->new(-id => 'a_thaliana',
36
-seq => 'CGTATTCTGAGGAGAGCTCT' );
41
ok $read = Bio::Seq::SimulatedRead->new();
42
isa_ok $read, 'Bio::Seq::SimulatedRead';
43
isa_ok $read, 'Bio::LocatableSeq';
44
isa_ok $read, 'Bio::Seq::Quality';
46
$errors->{'1'}->{'+'} = 'T';
47
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
48
is $read->reference, $ref;
50
is $read->errors->{'1'}->{'+'}->[0], 'T';
52
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -track => 0 );
55
is $read->seq, 'TAAAAAAACCCC';
57
is $read->desc, undef;
58
is $read->revcom->seq, 'GGGGTTTTTTTA';
60
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -track => 1 );
63
is $read->seq, 'TAAAAAAACCCC';
64
is join(' ',@{$read->qual}), '';
66
is $read->desc, 'reference=human_id start=1 end=12 strand=+1 description="The human genome"';
68
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -track => 1, -coord_style => 'bioperl' );
69
is $read->desc, 'reference=human_id start=1 end=12 strand=+1 description="The human genome"';
71
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -track => 1, -coord_style => 'genbank' );
72
is $read->desc, 'reference=human_id position=1..12 description="The human genome"';
74
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -qual_levels => [30, 10]);
77
is $read->seq, 'TAAAAAAACCCC';
78
is join(' ', @{$read->qual}), '30 30 30 30 30 30 30 30 30 30 30 30';
80
is $read->desc, 'reference=human_id start=1 end=12 strand=+1 description="The human genome"';
81
is $read->revcom->seq, 'GGGGTTTTTTTA';
83
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref2 );
86
is $read->seq, 'ACGTACGT';
87
is join(' ',@{$read->qual}), '';
88
is $read->desc, 'reference=other_genome start=1 end=8 strand=+1 description="\"Secret\" sequence"';
90
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref3 );
93
is $read->seq, 'ACACTGATCTAGCGTCGTGCTAGCTGACGTAGCTGAT';
94
is join(' ',@{$read->qual}), '';
95
is $read->desc, 'start=1 end=37 strand=+1';
97
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -strand => -1, -qual_levels => [30, 10]);
98
is $read->seq, 'GGGGTTTTTTTA';
99
is join(' ', @{$read->qual}), '30 30 30 30 30 30 30 30 30 30 30 30';
100
is $read->desc, 'reference=human_id start=1 end=12 strand=-1 description="The human genome"';
102
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -strand => -1, -qual_levels => [30, 10], -coord_style => 'genbank' );
103
is $read->desc, 'reference=human_id position=complement(1..12) description="The human genome"';
105
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -start => 2, -end => 8, -qual_levels => [30, 10]);
106
is $read->seq, 'AAAAAAA';
107
is join(' ', @{$read->qual}), '30 30 30 30 30 30 30';
108
is $read->desc, 'reference=human_id start=2 end=8 strand=+1 description="The human genome"';
110
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -strand => -1, -start => 2, -end => 8, -qual_levels => [30, 10]);
111
is $read->seq, 'TTTTTTT';
112
is join(' ', @{$read->qual}), '30 30 30 30 30 30 30';
113
is $read->desc, 'reference=human_id start=2 end=8 strand=-1 description="The human genome"';
116
$errors->{'6'}->{'+'} = 'GG';
117
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -strand => -1, -start => 2, -end => 8, -errors => $errors, -qual_levels => [30, 10]);
120
is $read->seq, 'TTTTTTGGT';
121
is join(' ', @{$read->qual}), '30 30 30 30 30 30 10 10 30';
122
is $read->desc, 'reference=human_id start=2 end=8 strand=-1 errors=6+G,6+G description="The human genome"';
125
$errors->{'6'}->{'+'} = 'GG';
126
$errors->{'1'}->{'%'} = 'T';
127
$errors->{'3'}->{'-'} = undef;
128
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -strand => 1, -start => 2, -end => 8, -errors => $errors, -qual_levels => [30, 10]);
131
is $read->seq, 'TAAAAGGA';
132
is join(' ', @{$read->qual}), '10 30 30 30 30 10 10 30';
133
is $read->desc, 'reference=human_id start=2 end=8 strand=+1 errors=1%T,3-,6+G,6+G description="The human genome"';
136
$errors->{'6'}->{'+'} = 'GG';
137
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors, -qual_levels => [30, 10]);
140
is $read->seq, 'TAAAAAGGAACCCC';
141
is join(' ', @{$read->qual}), '30 30 30 30 30 30 10 10 30 30 30 30 30 30';
142
is $read->desc, 'reference=human_id start=1 end=12 strand=+1 errors=6+G,6+G description="The human genome"';
144
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors, -mid => 'ACGT', -errors => $errors, -qual_levels => [30, 10]);
147
is $read->seq, 'ACGTTAGGAAAAAACCCC';
148
is join(' ', @{$read->qual}), '30 30 30 30 30 30 10 10 30 30 30 30 30 30 30 30 30 30';
149
is $read->desc, 'reference=human_id start=1 end=12 strand=+1 mid=ACGT errors=6+G,6+G description="The human genome"';
151
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -mid => 'TTTAAA', -qual_levels => [30, 10]);
154
is $read->seq, 'TTTAAATAAAAAAACCCC';
155
is join(' ', @{$read->qual}), '30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30';
156
is $read->desc, 'reference=human_id start=1 end=12 strand=+1 mid=TTTAAA description="The human genome"';
158
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -mid => '', -qual_levels => []);
161
is $read->seq, 'TAAAAAAACCCC';
162
is join(' ', @{$read->qual}), '';
163
is $read->desc, 'reference=human_id start=1 end=12 strand=+1 description="The human genome"';
169
$errors->{'6'}->{'+'} = ['G', 'G'];
170
$errors->{'1'}->{'%'} = ['A', 'G', 'T'];
171
$errors->{'3'}->{'-'} = [undef, undef];
172
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -strand => 1, -start => 2, -end => 8, -errors => $errors, -qual_levels => [30, 10]), 'redundant errors';
175
is $read->seq, 'TAAAAGGA';
176
is join(' ', @{$read->qual}), '10 30 30 30 30 10 10 30';
177
is $read->desc, 'reference=human_id start=2 end=8 strand=+1 errors=1%A,1%G,1%T,3-,3-,6+G,6+G description="The human genome"';
180
# Specifying errors() after new()
182
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -mid => '', -qual_levels => []);
185
is $read->seq, 'TAAAAAAACCCC';
186
is join(' ', @{$read->qual}), '';
187
is $read->desc, 'reference=human_id start=1 end=12 strand=+1 description="The human genome"';
190
ok $read->errors($errors), 'errors()';
193
is $read->seq, 'TAAAAAAACCCC';
194
is $read->desc, 'reference=human_id start=1 end=12 strand=+1 description="The human genome"';
197
$errors->{'6'}->{'+'} = 'GG';
198
ok $read->errors($errors);
199
is $read->seq, 'TAAAAAGGAACCCC';
202
is $read->desc, 'reference=human_id start=1 end=12 strand=+1 errors=6+G,6+G description="The human genome"';
205
# More tracking tests
207
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -mid => 'ACGT', -qual_levels => [], -coord_style => 'genbank' );
208
is $read->desc, 'reference=human_id position=1..12 mid=ACGT description="The human genome"';
210
ok $read->mid('AAAA');
211
is $read->desc, 'reference=human_id position=1..12 mid=AAAA description="The human genome"';
214
$errors->{'6'}->{'+'} = 'GG';
215
ok $read->errors($errors);
216
is $read->desc, 'reference=human_id position=1..12 mid=AAAA errors=6+G,6+G description="The human genome"';
218
ok not($read->track(0)), 'track()';
220
is $read->desc, undef;
223
is $read->desc, 'reference=human_id position=1..12 mid=AAAA errors=6+G,6+G description="The human genome"';
226
# qual_levels() method
228
ok $read = Bio::Seq::SimulatedRead->new(-verbose => $VERBOSE, );
229
ok $read->qual_levels([30, 10]), 'qual_levels()';
230
is join(' ', @{$read->qual_levels}), '30 10';
234
ok $read->reference($ref), 'reference()';
235
is $read->reference(), $ref;
239
ok $read = Bio::Seq::SimulatedRead->new(-verbose => $VERBOSE, ), 'mid()';
240
ok $read->qual_levels([30, 10]);
241
ok $read->reference($ref);
242
ok $read->mid('ACGT');
243
ok $read->mid, 'ACGT';
245
is $read->seq, 'ACGTTAAAAAAACCCC';
246
is join(' ', @{$read->qual}), '30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30';
247
is $read->desc, 'reference=human_id start=1 end=12 strand=+1 mid=ACGT description="The human genome"';
249
ok $read->mid('TTTAAA');
250
ok $read->mid, 'TTTAAA';
251
is $read->seq, 'TTTAAATAAAAAAACCCC';
252
is join(' ', @{$read->qual}), '30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30';
253
is $read->desc, 'reference=human_id start=1 end=12 strand=+1 mid=TTTAAA description="The human genome"';
256
# Edge case... mutation of the last bases of a simulated read with MID
259
$errors->{'18'}->{'%'} = 'T';
260
$read->errors($errors);
261
is $read->seq, 'TTTAAATAAAAAAACCCT';
264
# Try different BioPerl object types
266
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref ), 'Bio::Seq::Quality';
267
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref2 ), 'Bio::Seq';
268
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref3 ), 'Bio::PrimarySeq';
269
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref4 ), 'Bio::LocatableSeq';
272
# More detailed tests of the error specifications
275
$errors->{'0'}->{'-'} = undef;
276
warning_like {$read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors )}
277
qr/Positions of substitutions and deletions have to be strictly positive but got 0/;
278
is $read->seq, 'TAAAAAAACCCC';
281
$errors->{'1'}->{'-'} = undef;
282
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
283
is $read->seq, 'AAAAAAACCCC';
286
$errors->{'12'}->{'-'} = undef;
287
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
288
is $read->seq, 'TAAAAAAACCC';
291
$errors->{'13'}->{'-'} = undef;
292
warning_like {$read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors )}
293
qr/Position 13 is beyond end of read \(12 residues\)/; # there should be a warning too
294
is $read->seq, 'TAAAAAAACCCC';
297
$errors->{'0'}->{'%'} = 'G';
298
warning_like {$read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors )}
299
qr/Positions of substitutions and deletions have to be strictly positive/; # there should be a warning too
300
is $read->seq, 'TAAAAAAACCCC';
303
$errors->{'1'}->{'%'} = 'G';
304
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
305
is $read->seq, 'GAAAAAAACCCC';
308
$errors->{'12'}->{'%'} = 'G';
309
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
310
is $read->seq, 'TAAAAAAACCCG';
313
$errors->{'13'}->{'%'} = 'G';
314
warning_like { $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors ) }
315
qr/Position 13 is beyond end of read \(12 residues\)/; # there should be a warning too
316
is $read->seq, 'TAAAAAAACCCC';
319
$errors->{'0'}->{'+'} = 'A';
320
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
321
is $read->seq, 'ATAAAAAAACCCC';
324
$errors->{'1'}->{'+'} = 'A';
325
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
326
is $read->seq, 'TAAAAAAAACCCC';
329
$errors->{'12'}->{'+'} = 'A';
330
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
331
is $read->seq, 'TAAAAAAACCCCA';
334
$errors->{'13'}->{'+'} = 'A';
335
warning_like {$read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors ) }
336
qr/Position 13 is beyond end of read \(12 residues\)/; # there should be a warning too
337
is $read->seq, 'TAAAAAAACCCC';
340
$errors->{'1'}->{'%'} = 'G';
341
$errors->{'2'}->{'%'} = 'G';
342
$errors->{'3'}->{'%'} = 'G';
343
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
344
is $read->seq, 'GGGAAAAACCCC';
347
$errors->{'1'}->{'+'} = 'G';
348
$errors->{'2'}->{'+'} = 'G';
349
$errors->{'3'}->{'+'} = 'G';
350
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
351
is $read->seq, 'TGAGAGAAAAACCCC';
354
$errors->{'1'}->{'-'} = undef;
355
$errors->{'2'}->{'-'} = undef;
356
$errors->{'3'}->{'-'} = undef;
357
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
358
is $read->seq, 'AAAAACCCC';
361
$errors->{'1'}->{'+'} = 'GGG';
362
$errors->{'2'}->{'-'} = undef;
363
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
364
is $read->seq, 'TGGGAAAAAACCCC';
367
$errors->{'2'}->{'+'} = 'CC';
368
$errors->{'2'}->{'-'} = undef;
369
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
370
is $read->seq, 'TCCAAAAAACCCC';
373
$errors->{'2'}->{'%'} = 'C';
374
$errors->{'2'}->{'-'} = undef;
375
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
376
is $read->seq, 'TAAAAAACCCC';