6
test_begin( -tests => 236,
7
-requires_modules => [qw(Graph::Undirected)] );
8
use_ok('Bio::Assembly::IO');
9
use_ok('Bio::Assembly::Tools::ContigSpectrum');
6
test_begin( -tests => 239,
7
-requires_modules => [ qw(Bio::Assembly::Tools::ContigSpectrum)] );
8
use_ok 'Bio::Assembly::IO';
9
use_ok 'Bio::Assembly::Tools::ContigSpectrum';
12
13
my $in = Bio::Assembly::IO->new(
13
-file => test_input_file('contigspectrumtest.tigr'),
14
-file => test_input_file('contigspectrumtest.tigr'),
16
isa_ok($in, 'Bio::Assembly::IO');
17
isa_ok $in, 'Bio::Assembly::IO';
17
18
my $sc = $in->next_assembly;
18
isa_ok($sc, 'Bio::Assembly::Scaffold');
19
isa_ok $sc, 'Bio::Assembly::Scaffold';
20
21
# Try all the get/set methods
21
ok(my $csp = Bio::Assembly::Tools::ContigSpectrum->new, 'get/set methods');
22
isa_ok($csp, 'Bio::Assembly::Tools::ContigSpectrum');
25
ok($csp->nof_seq(123));
26
is($csp->nof_seq, 123);
27
ok($csp->nof_rep(456));
28
is($csp->nof_rep, 456);
29
ok($csp->max_size(789));
30
is($csp->max_size, 789);
31
ok($csp->nof_overlaps(111));
32
is($csp->nof_overlaps, 111);
33
ok($csp->min_overlap(50));
34
is($csp->min_overlap, 50);
35
ok($csp->avg_overlap(54.3));
36
is($csp->avg_overlap, 54.3);
37
ok($csp->min_identity(89.1));
38
is($csp->min_identity, 89.1);
39
ok($csp->avg_identity(98.7));
40
is($csp->avg_identity, 98.7);
41
ok($csp->avg_seq_len(123.456));
42
is($csp->avg_seq_len, 123.456);
43
ok($csp->eff_asm_params(1));
44
is($csp->eff_asm_params, 1);
22
ok my $csp = Bio::Assembly::Tools::ContigSpectrum->new, 'get/set methods';
23
isa_ok $csp, 'Bio::Assembly::Tools::ContigSpectrum';
26
ok $csp->nof_seq(123);
27
is $csp->nof_seq, 123;
28
ok $csp->nof_rep(456);
29
is $csp->nof_rep, 456;
30
ok $csp->max_size(789);
31
is $csp->max_size, 789;
32
ok $csp->nof_overlaps(111);
33
is $csp->nof_overlaps, 111;
34
ok $csp->min_overlap(50);
35
is $csp->min_overlap, 50;
36
ok $csp->avg_overlap(54.3);
37
is $csp->avg_overlap, 54.3;
38
ok $csp->min_identity(89.1);
39
is $csp->min_identity, 89.1;
40
ok $csp->avg_identity(98.7);
41
is $csp->avg_identity, 98.7;
42
ok $csp->avg_seq_len(123.456);
43
is $csp->avg_seq_len, 123.456;
44
ok $csp->eff_asm_params(1);
45
is $csp->eff_asm_params, 1;
46
47
# contig spectrum based on simple spectrum
47
ok(my $spectrum_csp = Bio::Assembly::Tools::ContigSpectrum->new, 'simple spectrum');
48
ok($spectrum_csp->spectrum({1=>1, 2=>2, 3=>3}));
49
is($spectrum_csp->eff_asm_params, 0);
50
is($spectrum_csp->nof_seq, 14);
51
is($spectrum_csp->max_size, 3);
52
is($spectrum_csp->nof_rep, 1);
53
is($spectrum_csp->nof_overlaps, 0);
54
is($spectrum_csp->min_overlap, undef);
55
is($spectrum_csp->avg_overlap, 0);
56
is($spectrum_csp->min_identity, undef);
57
is($spectrum_csp->avg_identity, 0);
58
is($spectrum_csp->avg_seq_len, 0);
59
is(scalar $spectrum_csp->assembly, 0);
61
ok(my $string = $spectrum_csp->to_string(1));
63
ok($string = $spectrum_csp->to_string(2));
64
is($string, "1\t2\t3");
65
ok($string = $spectrum_csp->to_string(3));
66
is($string, "1\n2\n3");
68
# mixed contig spectrum imported from assembly
69
ok(my $mixed_csp = Bio::Assembly::Tools::ContigSpectrum->new(
71
-eff_asm_params => 1 ), 'mixed contig spectrum');
72
is_deeply($mixed_csp->spectrum, {1=>0, 2=>3, 6=>1, 9=>1}); # [0 3 0 0 0 1 0 0 1]
73
is($mixed_csp->eff_asm_params, 1);
74
is($mixed_csp->max_size, 9);
75
is($mixed_csp->nof_rep, 1);
76
is($mixed_csp->nof_seq, 21);
77
float_is($mixed_csp->avg_seq_len, 303.81);
78
is($mixed_csp->nof_overlaps, 16);
79
is($mixed_csp->min_overlap, 35);
80
is($mixed_csp->avg_overlap, 155.875);
81
float_is($mixed_csp->min_identity, 96.8421);
82
float_is($mixed_csp->avg_identity, 98.8826);
83
is(scalar $mixed_csp->assembly, 1);
85
# dissolved contig spectrum
86
ok(my $dissolved_csp = Bio::Assembly::Tools::ContigSpectrum->new(
87
-dissolve => [$mixed_csp, 'ZZZ'] ), 'dissolved contig spectrum');
88
is_deeply($dissolved_csp->spectrum, {1=>2, 2=>1}); # [2 1]
89
is($dissolved_csp->eff_asm_params, 0);
90
is($dissolved_csp->max_size, 2);
91
is($dissolved_csp->nof_rep, 1);
92
is($dissolved_csp->nof_seq, 4);
93
float_is($dissolved_csp->avg_seq_len, 321);
94
# eff_asm_params haven't been requested
95
is($dissolved_csp->nof_overlaps, 0);
96
is($dissolved_csp->min_overlap, undef);
97
is($dissolved_csp->avg_overlap, 0);
98
is($dissolved_csp->min_identity, undef);
99
is($dissolved_csp->avg_identity, 0);
101
ok($dissolved_csp = Bio::Assembly::Tools::ContigSpectrum->new(
102
-dissolve => [$mixed_csp, 'sdsu'] ));
103
is_deeply($dissolved_csp->spectrum, {1=>3, 6=>1}); # [3 0 0 0 0 1]
104
is($dissolved_csp->eff_asm_params, 0);
105
is($dissolved_csp->max_size, 6);
106
is($dissolved_csp->nof_rep, 1);
107
is($dissolved_csp->nof_seq, 9);
108
float_is($dissolved_csp->avg_seq_len, 441.222222222222);
109
# eff_asm_params haven't been requested
110
is($dissolved_csp->nof_overlaps, 0);
111
is($dissolved_csp->min_overlap, undef);
112
is($dissolved_csp->avg_overlap, 0);
113
is($dissolved_csp->min_identity, undef);
114
is($dissolved_csp->avg_identity, 0);
116
ok($dissolved_csp = Bio::Assembly::Tools::ContigSpectrum->new(
117
-dissolve => [$mixed_csp, 'ABC'] ));
118
is_deeply($dissolved_csp->spectrum, {1=>2, 6=>1}); # [2 0 0 0 0 1]
119
is($dissolved_csp->eff_asm_params, 0);
120
is($dissolved_csp->max_size, 6);
121
is($dissolved_csp->nof_rep, 1);
122
is($dissolved_csp->nof_seq, 8);
123
is($dissolved_csp->avg_seq_len, 140.625);
124
# eff_asm_params haven't been requested
125
is($dissolved_csp->nof_overlaps, 0);
126
is($dissolved_csp->min_overlap, undef);
127
is($dissolved_csp->avg_overlap, 0);
128
is($dissolved_csp->min_identity, undef);
129
is($dissolved_csp->avg_identity, 0);
131
ok($dissolved_csp = Bio::Assembly::Tools::ContigSpectrum->new(
134
-dissolve => [$mixed_csp, 'ABC'] ));
135
is_deeply($dissolved_csp->spectrum, {1=>2, 6=>1}); # [2 0 0 0 0 1]
137
ok($dissolved_csp = Bio::Assembly::Tools::ContigSpectrum->new(
140
-dissolve => [$mixed_csp, 'ABC'] ));
141
is_deeply($dissolved_csp->spectrum, {1=>3, 5=>1}); # [3 0 0 0 1]
143
# after dissolving, the remaining assembly objects should be 3 singlets and 1 6-contig
144
my @contigs = ($dissolved_csp->assembly);
145
is(scalar @contigs, 4);
146
my @contig_ids = sort qw( 144 652_1 652_2 652_3 );
147
is_deeply( [sort map($_->id, @contigs)], \@contig_ids );
148
my @contig_sizes = sort qw( 1 1 1 5 );
149
is_deeply( [sort map($_->num_sequences, @contigs)], \@contig_sizes );
150
my @contig_isas = sort qw( Bio::Assembly::Singlet Bio::Assembly::Singlet
151
Bio::Assembly::Singlet Bio::Assembly::Contig );
152
is_deeply( [sort map(ref $_, @contigs)], \@contig_isas );
153
my @reads = ($contigs[1])->each_seq;
154
my @read_ids = sort qw(ABC|9980040 ABC|9937790 ABC|9956706 ABC|9960711 ABC|9976538);
155
is_deeply( [sort map($_->id, @reads)], \@read_ids );
157
ok($dissolved_csp = Bio::Assembly::Tools::ContigSpectrum->new(
160
-dissolve => [$mixed_csp, 'ABC'] ));
161
is_deeply($dissolved_csp->spectrum, {1=>2, 6=>1}); # [2 0 0 0 0 1]
163
ok($dissolved_csp = Bio::Assembly::Tools::ContigSpectrum->new(
166
-dissolve => [$mixed_csp, 'ABC'] ));
167
is_deeply($dissolved_csp->spectrum, {1=>2, 6=>1}); # [2 0 0 0 0 1]
169
ok($dissolved_csp = Bio::Assembly::Tools::ContigSpectrum->new(
170
-dissolve => [$mixed_csp, 'ABC'],
171
-eff_asm_params => 1 ));
172
is_deeply($dissolved_csp->spectrum, {1=>2, 6=>1}); # [2 0 0 0 0 1]
173
is($dissolved_csp->eff_asm_params, 1);
174
is($dissolved_csp->max_size, 6);
175
is($dissolved_csp->nof_rep, 1);
176
is($dissolved_csp->nof_seq, 8);
177
float_is($dissolved_csp->avg_seq_len, 140.625);
178
is($dissolved_csp->nof_overlaps, 5);
179
float_is($dissolved_csp->avg_overlap, 76.8);
180
float_is($dissolved_csp->avg_identity, 100.0);
181
# min_overlap and min_identity not explicitely specified for the dissolved csp
182
# min_overlap and min_identity are thus taken from the mixed csp
183
is($dissolved_csp->min_overlap, 35);
184
float_is($dissolved_csp->min_identity, 96.8421);
186
# cross contig spectrum
187
ok(my $cross_csp = Bio::Assembly::Tools::ContigSpectrum->new(
188
-cross => $mixed_csp), 'cross-contig spectrum');
189
is_deeply($cross_csp->spectrum, {1=>7, 2=>2, 9=>1}); # [7 2 0 0 0 0 0 0 1]
191
# assembly should have 2 singlets and 1 9-contig
192
@contigs = $cross_csp->assembly;
193
is(scalar @contigs, 3);
194
@contig_sizes = sort qw( 2 2 9 );
195
is_deeply( [sort map($_->num_sequences, @contigs)], \@contig_sizes );
196
@contig_isas = sort qw( Bio::Assembly::Contig Bio::Assembly::Contig Bio::Assembly::Contig);
197
is_deeply( [sort map(ref $_, @contigs)], \@contig_isas );
198
@read_ids = sort qw(sdsu|SDSU_RFPERU_006_E04.x01.phd.1 ZZZ|SDSU_RFPERU_010_B05.x01.phd.1);
199
is_deeply( [sort map($_->id, $contigs[0]->each_seq)], \@read_ids );
200
@read_ids = sort qw(sdsu|SDSU_RFPERU_013_H05.x01.phd.1 ABC|SDSU_RFPERU_005_F02.x01.phd.1);
201
is_deeply( [sort map($_->id, $contigs[1]->each_seq)], \@read_ids );
202
@read_ids = sort qw( ZZZ|9962187 ABC|9937790 ABC|9944760 ABC|9956706
203
sdsu|9986984 ABC|9960711 ABC|9970175 ABC|9976538 ABC|9980040);
204
is_deeply( [sort map($_->id, $contigs[2]->each_seq)], \@read_ids );
206
# effective assembly params
207
ok($cross_csp = Bio::Assembly::Tools::ContigSpectrum->new(
208
-cross => $mixed_csp,
209
-eff_asm_params => 1 ), 'cross-contig spectrum');
210
is_deeply($cross_csp->spectrum, {1=>7, 2=>2, 9=>1}); # [7 2 0 0 0 0 0 0 1]
211
is($cross_csp->nof_rep, 1);
212
is($cross_csp->eff_asm_params, 1);
213
is($cross_csp->max_size, 9);
214
is($cross_csp->nof_seq, 13);
215
float_is($cross_csp->avg_seq_len, 206.308);
216
is($cross_csp->nof_overlaps, 10);
217
float_is($cross_csp->avg_overlap, 76.9);
218
float_is($cross_csp->avg_identity, 99.2357);
219
## min_overlap and min_identity not explicitely specified for the cross csp
220
## min_overlap and min_identity are thus taken from the mixed csp
221
is($cross_csp->min_overlap, 35);
222
float_is($cross_csp->min_identity, 96.8421);
224
# with a specified minimum overlap and identity
225
ok($cross_csp = Bio::Assembly::Tools::ContigSpectrum->new(
226
-cross => $mixed_csp,
228
-min_identity => 98 ), 'cross-contig spectrum');
229
is_deeply($cross_csp->spectrum, {1=>3, 2=>1, 7=>1}); # [3 1 0 0 0 0 1]
230
is($cross_csp->nof_rep, 1);
231
is($cross_csp->eff_asm_params, 0);
232
is($cross_csp->max_size, 7);
233
is($cross_csp->nof_seq, 9);
234
float_is($cross_csp->avg_seq_len, 191.222);
235
is($cross_csp->min_overlap, 50);
236
float_is($cross_csp->min_identity, 98);
238
# sum of contig spectra
239
ok(my $sum_csp = Bio::Assembly::Tools::ContigSpectrum->new(-eff_asm_params=>1), 'contig spectrum sum');
240
ok($sum_csp->add($dissolved_csp));
241
ok($sum_csp->add($mixed_csp));
242
is_deeply($sum_csp->spectrum, {1=>2, 2=>3, 6=>2, 9=>1}); # [2 3 0 0 0 2 0 0 1]
243
is($sum_csp->eff_asm_params, 1);
244
is($sum_csp->max_size, 9);
245
is($sum_csp->nof_rep, 2);
246
is($sum_csp->nof_seq, 29);
247
float_is($sum_csp->avg_seq_len, 258.7934);
248
is($sum_csp->nof_overlaps, 21);
249
is($sum_csp->min_overlap, 35);
250
float_is($sum_csp->avg_overlap, 137.0476);
251
float_is($sum_csp->min_identity, 96.8421);
252
float_is($sum_csp->avg_identity, 99.1487);
253
is(scalar $sum_csp->assembly, 4);
255
# average of contig spectra
256
ok(my $avg_csp = Bio::Assembly::Tools::ContigSpectrum->new(-eff_asm_params=>1), 'average contig spectrum');
257
ok($avg_csp = $avg_csp->average([$dissolved_csp, $mixed_csp]));
258
is_deeply($avg_csp->spectrum, {1=>1, 2=>1.5, 6=>1, 9=>0.5}); # [1 1 0 0 0 1 0 0 0.5]
259
is($avg_csp->eff_asm_params, 1);
260
is($avg_csp->max_size, 9);
261
is($avg_csp->nof_rep, 2);
262
is($avg_csp->nof_seq, 14.5);
263
float_is($avg_csp->avg_seq_len, 258.7934);
264
is($avg_csp->nof_overlaps, 10.5);
265
is($avg_csp->min_overlap, 35);
266
float_is($avg_csp->avg_overlap, 137.0476);
267
float_is($avg_csp->min_identity, 96.8421);
268
float_is($avg_csp->avg_identity, 99.1487);
269
is(scalar $avg_csp->assembly, 4);
271
# drop assembly info from contig spectrum
272
ok($mixed_csp->drop_assembly(), 'drop assembly');
273
is(scalar $mixed_csp->assembly(), 0);
48
ok my $spectrum_csp = Bio::Assembly::Tools::ContigSpectrum->new, 'simple spectrum';
49
ok $spectrum_csp->spectrum({1=>1, 2=>2, 3=>3});
50
is $spectrum_csp->eff_asm_params, 0;
51
is $spectrum_csp->nof_seq, 14;
52
is $spectrum_csp->max_size, 3;
53
is $spectrum_csp->nof_rep, 1;
54
is $spectrum_csp->nof_overlaps, 0;
55
is $spectrum_csp->min_overlap, undef;
56
is $spectrum_csp->avg_overlap, 0;
57
is $spectrum_csp->min_identity, undef;
58
is $spectrum_csp->avg_identity, 0;
59
is $spectrum_csp->avg_seq_len, 0;
60
is scalar $spectrum_csp->assembly, 0;
62
ok my $string = $spectrum_csp->to_string(1);
64
ok $string = $spectrum_csp->to_string(2);
65
is $string, "1\t2\t3";
66
ok $string = $spectrum_csp->to_string(3);
67
is $string, "1\n2\n3";
278
ok($test_csp = Bio::Assembly::Tools::ContigSpectrum->new(-spectrum=>$spectrum), 'contig spectrum score');
279
is($test_csp->score, undef);
72
ok $test_csp = Bio::Assembly::Tools::ContigSpectrum->new(-spectrum=>$spectrum), 'contig spectrum score';
73
is $test_csp->score, undef;
280
74
$spectrum = {1=>120};
281
ok($test_csp = Bio::Assembly::Tools::ContigSpectrum->new(-spectrum=>$spectrum));
282
is($test_csp->score, 0);
75
ok $test_csp = Bio::Assembly::Tools::ContigSpectrum->new(-spectrum=>$spectrum);
76
is $test_csp->score, 0;
283
77
$spectrum = {120=>1};
284
ok($test_csp = Bio::Assembly::Tools::ContigSpectrum->new(-spectrum=>$spectrum));
285
is($test_csp->score, 1);
286
is($test_csp->score(240), 0.248953974895397);
78
ok $test_csp = Bio::Assembly::Tools::ContigSpectrum->new(-spectrum=>$spectrum);
79
is $test_csp->score, 1;
80
float_is $test_csp->score(240), 0.248953974895397;
287
81
$spectrum = {1=>120, 120=>1};
288
ok($test_csp = Bio::Assembly::Tools::ContigSpectrum->new(-spectrum=>$spectrum));
289
is($test_csp->score, 0.248953974895397);
291
# large contig (27 reads)
292
$in = Bio::Assembly::IO->new(
293
-file => test_input_file('27-contig_Newbler.ace'),
296
isa_ok($in, 'Bio::Assembly::IO');
297
$sc = $in->next_assembly;
298
isa_ok($sc, 'Bio::Assembly::Scaffold');
299
ok(my $large_csp = Bio::Assembly::Tools::ContigSpectrum->new(
301
-eff_asm_params => 1 ), 'large contig spectrum');
302
is(scalar $large_csp->assembly(), 1);
303
is_deeply($large_csp->spectrum, {1=>0, 27=>1});
304
is($large_csp->eff_asm_params, 1);
305
is($large_csp->max_size, 27);
306
is($large_csp->nof_rep, 1);
307
is($large_csp->nof_seq, 27);
308
float_is($large_csp->avg_seq_len, 100);
309
is($large_csp->nof_overlaps, 26);
310
is($large_csp->min_overlap, 54);
311
is($large_csp->avg_overlap, 88.7692307692308);
312
float_is($large_csp->min_identity, 33.3333);
313
float_is($large_csp->avg_identity, 74.7486);
315
ok(my $large_xcsp = Bio::Assembly::Tools::ContigSpectrum->new(
316
-cross => $large_csp,
317
-eff_asm_params => 1 ), 'large cross-contig spectrum');
318
is($large_xcsp->nof_overlaps, 26);
319
# operation returns sometimes 88.7692307692308 and sometimes 88.8076923076923...
320
ok( $large_xcsp->avg_overlap >= 88.7692307692307 );
321
ok( $large_xcsp->avg_overlap <= 88.8076923076924 );
322
is_deeply($large_xcsp->spectrum, {1=>21, 27=>1});
324
ok( $large_xcsp = Bio::Assembly::Tools::ContigSpectrum->new(
325
-cross => $large_csp,
326
-min_overlap => 100) );
327
is_deeply($large_xcsp->spectrum, {1=>18, 2=>5, 3=>1, 7=>1});
328
my @xcontigs = ($large_xcsp->assembly);
329
is(scalar @xcontigs, 7); # the cross-1-contigs are not included
330
my @xcontig_ids = sort qw( contig00001_1 contig00001_2 contig00001_3 contig00001_4
331
contig00001_5 contig00001_6 contig00001_7 );
332
is_deeply( [sort map($_->id, @xcontigs)], \@xcontig_ids );
333
my @xcontig_sizes = sort qw( 2 2 2 2 2 3 7 );
334
is_deeply( [sort map($_->num_sequences, @xcontigs)], \@xcontig_sizes );
335
my $xcontig = $xcontigs[5];
336
is( $xcontig->get_seq_coord($xcontig->get_seq_by_name('species1635|5973'))->start, 1);
337
is( $xcontig->get_seq_coord($xcontig->get_seq_by_name('species158|7890'))->start, 1);
338
is( $xcontig->get_seq_coord($xcontig->get_seq_by_name('species2742|48'))->end, 140);
340
# one contig at a time
341
$in = Bio::Assembly::IO->new(
342
-file => test_input_file('contigspectrumtest.tigr'),
345
$sc = $in->next_assembly;
346
ok($csp = Bio::Assembly::Tools::ContigSpectrum->new(
347
-eff_asm_params => 1 ), 'one contig at a time');
348
for my $contig ($sc->all_contigs) {
349
ok($csp->assembly($contig));
82
ok $test_csp = Bio::Assembly::Tools::ContigSpectrum->new(-spectrum=>$spectrum);
83
float_is $test_csp->score, 0.248953974895397;
86
test_skip( -tests => 183, -requires_module => 'Graph::Undirected' ); #####
88
# mixed contig spectrum imported from assembly
89
ok my $mixed_csp = Bio::Assembly::Tools::ContigSpectrum->new(
91
-eff_asm_params => 1 ), 'mixed contig spectrum';
92
is_deeply $mixed_csp->spectrum, {1=>0, 2=>3, 6=>1, 9=>1}; # [0 3 0 0 0 1 0 0 1]
93
is $mixed_csp->eff_asm_params, 1;
94
is $mixed_csp->max_size, 9;
95
is $mixed_csp->nof_rep, 1;
96
is $mixed_csp->nof_seq, 21;
97
float_is $mixed_csp->avg_seq_len, 303.81;
98
is $mixed_csp->nof_overlaps, 16;
99
is $mixed_csp->min_overlap, 35;
100
float_is $mixed_csp->avg_overlap, 155.875;
101
float_is $mixed_csp->min_identity, 96.8421;
102
float_is $mixed_csp->avg_identity, 98.8826;
103
is scalar $mixed_csp->assembly, 1;
105
# dissolved contig spectrum
106
ok my $dissolved_csp = Bio::Assembly::Tools::ContigSpectrum->new(
107
-dissolve => [$mixed_csp, 'ZZZ'] ), 'dissolved contig spectrum';
108
is_deeply $dissolved_csp->spectrum, {1=>2, 2=>1}; # [2 1]
109
is $dissolved_csp->eff_asm_params, 0;
110
is $dissolved_csp->max_size, 2;
111
is $dissolved_csp->nof_rep, 1;
112
is $dissolved_csp->nof_seq, 4;
113
float_is $dissolved_csp->avg_seq_len, 321;
114
# eff_asm_params haven't been requested
115
is $dissolved_csp->nof_overlaps, 0;
116
is $dissolved_csp->min_overlap, undef;
117
is $dissolved_csp->avg_overlap, 0;
118
is $dissolved_csp->min_identity, undef;
119
is $dissolved_csp->avg_identity, 0;
121
ok $dissolved_csp = Bio::Assembly::Tools::ContigSpectrum->new(
122
-dissolve => [$mixed_csp, 'sdsu'] );
123
is_deeply $dissolved_csp->spectrum, {1=>3, 6=>1}; # [3 0 0 0 0 1]
124
is $dissolved_csp->eff_asm_params, 0;
125
is $dissolved_csp->max_size, 6;
126
is $dissolved_csp->nof_rep, 1;
127
is $dissolved_csp->nof_seq, 9;
128
float_is $dissolved_csp->avg_seq_len, 441.222222222222;
129
# eff_asm_params haven't been requested
130
is $dissolved_csp->nof_overlaps, 0;
131
is $dissolved_csp->min_overlap, undef;
132
is $dissolved_csp->avg_overlap, 0;
133
is $dissolved_csp->min_identity, undef;
134
is $dissolved_csp->avg_identity, 0;
136
ok $dissolved_csp = Bio::Assembly::Tools::ContigSpectrum->new(
137
-dissolve => [$mixed_csp, 'ABC'] );
138
is_deeply $dissolved_csp->spectrum, {1=>2, 6=>1}; # [2 0 0 0 0 1]
139
is $dissolved_csp->eff_asm_params, 0;
140
is $dissolved_csp->max_size, 6;
141
is $dissolved_csp->nof_rep, 1;
142
is $dissolved_csp->nof_seq, 8;
143
float_is $dissolved_csp->avg_seq_len, 140.625;
144
# eff_asm_params haven't been requested
145
is $dissolved_csp->nof_overlaps, 0;
146
is $dissolved_csp->min_overlap, undef;
147
is $dissolved_csp->avg_overlap, 0;
148
is $dissolved_csp->min_identity, undef;
149
is $dissolved_csp->avg_identity, 0;
151
ok $dissolved_csp = Bio::Assembly::Tools::ContigSpectrum->new(
154
-dissolve => [$mixed_csp, 'ABC'] );
155
is_deeply $dissolved_csp->spectrum, {1=>2, 6=>1}; # [2 0 0 0 0 1]
157
ok $dissolved_csp = Bio::Assembly::Tools::ContigSpectrum->new(
160
-dissolve => [$mixed_csp, 'ABC'] );
161
is_deeply $dissolved_csp->spectrum, {1=>3, 5=>1}; # [3 0 0 0 1]
163
# after dissolving, the remaining assembly objects should be 3 singlets and 1 6-contig
164
my @contigs = ($dissolved_csp->assembly);
165
is scalar @contigs, 4;
166
my @contig_ids = sort qw( 144 652_1 652_2 652_3 );
167
is_deeply [sort map($_->id, @contigs)], \@contig_ids;
168
my @contig_sizes = sort qw( 1 1 1 5 );
169
is_deeply [sort map($_->num_sequences, @contigs)], \@contig_sizes;
170
my @contig_isas = sort qw( Bio::Assembly::Singlet Bio::Assembly::Singlet
171
Bio::Assembly::Singlet Bio::Assembly::Contig );
172
is_deeply [sort map(ref $_, @contigs)], \@contig_isas;
173
my @reads = ($contigs[1])->each_seq;
174
my @read_ids = sort qw(ABC|9980040 ABC|9937790 ABC|9956706 ABC|9960711 ABC|9976538);
175
is_deeply [sort map($_->id, @reads)], \@read_ids;
177
ok $dissolved_csp = Bio::Assembly::Tools::ContigSpectrum->new(
180
-dissolve => [$mixed_csp, 'ABC'] );
181
is_deeply $dissolved_csp->spectrum, {1=>2, 6=>1}; # [2 0 0 0 0 1]
183
ok $dissolved_csp = Bio::Assembly::Tools::ContigSpectrum->new(
186
-dissolve => [$mixed_csp, 'ABC'] );
187
is_deeply $dissolved_csp->spectrum, {1=>2, 6=>1}; # [2 0 0 0 0 1]
189
ok $dissolved_csp = Bio::Assembly::Tools::ContigSpectrum->new(
190
-dissolve => [$mixed_csp, 'ABC'],
191
-eff_asm_params => 1 );
192
is_deeply $dissolved_csp->spectrum, {1=>2, 6=>1}; # [2 0 0 0 0 1]
193
is $dissolved_csp->eff_asm_params, 1;
194
is $dissolved_csp->max_size, 6;
195
is $dissolved_csp->nof_rep, 1;
196
is $dissolved_csp->nof_seq, 8;
197
float_is $dissolved_csp->avg_seq_len, 140.625;
198
is $dissolved_csp->nof_overlaps, 5;
199
float_is $dissolved_csp->avg_overlap, 76.8;
200
float_is $dissolved_csp->avg_identity, 100.0;
201
# min_overlap and min_identity not explicitely specified for the dissolved csp
202
# min_overlap and min_identity are thus taken from the mixed csp
203
is $dissolved_csp->min_overlap, 35;
204
float_is $dissolved_csp->min_identity, 96.8421;
206
# cross contig spectrum
207
ok my $cross_csp = Bio::Assembly::Tools::ContigSpectrum->new(
208
-cross => $mixed_csp), 'cross-contig spectrum';
209
is_deeply $cross_csp->spectrum, {1=>7, 2=>2, 9=>1}; # [7 2 0 0 0 0 0 0 1]
211
# assembly should have 2 singlets and 1 9-contig
212
@contigs = $cross_csp->assembly;
213
is scalar @contigs, 3;
214
@contig_sizes = sort qw( 2 2 9 );
215
is_deeply [sort map($_->num_sequences, @contigs)], \@contig_sizes;
216
@contig_isas = sort qw( Bio::Assembly::Contig Bio::Assembly::Contig Bio::Assembly::Contig);
217
is_deeply [sort map(ref $_, @contigs)], \@contig_isas;
218
@read_ids = sort qw(sdsu|SDSU_RFPERU_006_E04.x01.phd.1 ZZZ|SDSU_RFPERU_010_B05.x01.phd.1);
219
is_deeply [sort map($_->id, $contigs[0]->each_seq)], \@read_ids;
220
@read_ids = sort qw(sdsu|SDSU_RFPERU_013_H05.x01.phd.1 ABC|SDSU_RFPERU_005_F02.x01.phd.1);
221
is_deeply [sort map($_->id, $contigs[1]->each_seq)], \@read_ids;
222
@read_ids = sort qw( ZZZ|9962187 ABC|9937790 ABC|9944760 ABC|9956706
223
sdsu|9986984 ABC|9960711 ABC|9970175 ABC|9976538 ABC|9980040);
224
is_deeply [sort map($_->id, $contigs[2]->each_seq)], \@read_ids;
226
# effective assembly params
227
ok $cross_csp = Bio::Assembly::Tools::ContigSpectrum->new(
228
-cross => $mixed_csp,
229
-eff_asm_params => 1 ), 'cross-contig spectrum';
230
is_deeply $cross_csp->spectrum, {1=>7, 2=>2, 9=>1}; # [7 2 0 0 0 0 0 0 1]
231
is $cross_csp->nof_rep, 1;
232
is $cross_csp->eff_asm_params, 1;
233
is $cross_csp->max_size, 9;
234
is $cross_csp->nof_seq, 13;
235
float_is $cross_csp->avg_seq_len, 206.308;
236
is $cross_csp->nof_overlaps, 10;
237
float_is $cross_csp->avg_overlap, 76.9;
238
float_is $cross_csp->avg_identity, 99.2357;
239
# min_overlap and min_identity not explicitly specified for the cross csp
240
# min_overlap and min_identity are thus taken from the mixed csp
241
is $cross_csp->min_overlap, 35;
242
float_is $cross_csp->min_identity, 96.8421;
244
# with a specified minimum overlap and identity
245
ok $cross_csp = Bio::Assembly::Tools::ContigSpectrum->new(
246
-cross => $mixed_csp,
248
-min_identity => 98 ), 'cross-contig spectrum';
249
is_deeply $cross_csp->spectrum, {1=>3, 2=>1, 7=>1}; # [3 1 0 0 0 0 1]
250
is $cross_csp->nof_rep, 1;
251
is $cross_csp->eff_asm_params, 0;
252
is $cross_csp->max_size, 7;
253
is $cross_csp->nof_seq, 9;
254
float_is $cross_csp->avg_seq_len, 191.222;
255
is $cross_csp->min_overlap, 50;
256
float_is $cross_csp->min_identity, 98;
258
# sum of contig spectra
259
ok my $sum_csp = Bio::Assembly::Tools::ContigSpectrum->new(-eff_asm_params=>1), 'contig spectrum sum';
260
ok $sum_csp->add($dissolved_csp);
261
ok $sum_csp->add($mixed_csp);
262
is_deeply $sum_csp->spectrum, {1=>2, 2=>3, 6=>2, 9=>1}; # [2 3 0 0 0 2 0 0 1]
263
is $sum_csp->eff_asm_params, 1;
264
is $sum_csp->max_size, 9;
265
is $sum_csp->nof_rep, 2;
266
is $sum_csp->nof_seq, 29;
267
float_is $sum_csp->avg_seq_len, 258.7934;
268
is $sum_csp->nof_overlaps, 21;
269
is $sum_csp->min_overlap, 35;
270
float_is $sum_csp->avg_overlap, 137.0476;
271
float_is $sum_csp->min_identity, 96.8421;
272
float_is $sum_csp->avg_identity, 99.1487;
273
is scalar $sum_csp->assembly, 4;
275
# average of contig spectra
276
ok my $avg_csp = Bio::Assembly::Tools::ContigSpectrum->new(-eff_asm_params=>1), 'average contig spectrum';
277
ok $avg_csp = $avg_csp->average([$dissolved_csp, $mixed_csp]);
278
is_deeply $avg_csp->spectrum, {1=>1, 2=>1.5, 6=>1, 9=>0.5}; # [1 1 0 0 0 1 0 0 0.5]
279
is $avg_csp->eff_asm_params, 1;
280
is $avg_csp->max_size, 9;
281
is $avg_csp->nof_rep, 2;
282
is $avg_csp->nof_seq, 14.5;
283
float_is $avg_csp->avg_seq_len, 258.7934;
284
is $avg_csp->nof_overlaps, 10.5;
285
is $avg_csp->min_overlap, 35;
286
float_is $avg_csp->avg_overlap, 137.0476;
287
float_is $avg_csp->min_identity, 96.8421;
288
float_is $avg_csp->avg_identity, 99.1487;
289
is scalar $avg_csp->assembly, 4;
291
# drop assembly info from contig spectrum
292
ok $mixed_csp->drop_assembly(), 'drop assembly';
293
is scalar $mixed_csp->assembly(), 0;
295
# large contig (27 reads)
296
$in = Bio::Assembly::IO->new(
297
-file => test_input_file('27-contig_Newbler.ace'),
300
isa_ok $in, 'Bio::Assembly::IO';
301
$sc = $in->next_assembly;
302
isa_ok $sc, 'Bio::Assembly::Scaffold';
303
ok my $large_csp = Bio::Assembly::Tools::ContigSpectrum->new(
305
-eff_asm_params => 1 ), 'large contig spectrum';
306
is scalar $large_csp->assembly(), 1;
307
is_deeply $large_csp->spectrum, {1=>0, 27=>1};
308
is $large_csp->eff_asm_params, 1;
309
is $large_csp->max_size, 27;
310
is $large_csp->nof_rep, 1;
311
is $large_csp->nof_seq, 27;
312
float_is $large_csp->avg_seq_len, 100;
313
is $large_csp->nof_overlaps, 26;
314
is $large_csp->min_overlap, 54;
315
# operation returns sometimes 88.76923... and sometimes 88.80769...
316
ok $large_csp->avg_overlap >= 88.7692;
317
ok $large_csp->avg_overlap <= 88.8077;
318
float_is $large_csp->min_identity, 33.3333;
319
cmp_ok($large_csp->avg_identity, '>=', 74.7, $large_csp->avg_identity);
320
cmp_ok($large_csp->avg_identity, '<=', 74.9, $large_csp->avg_identity);
322
ok my $large_xcsp = Bio::Assembly::Tools::ContigSpectrum->new(
323
-cross => $large_csp,
324
-eff_asm_params => 1 ), 'large cross-contig spectrum';
325
is $large_xcsp->nof_overlaps, 26;
326
cmp_ok($large_xcsp->avg_overlap, '>=', 88.7, $large_xcsp->avg_overlap);
327
cmp_ok($large_xcsp->avg_overlap, '<=', 88.9, $large_xcsp->avg_overlap);
328
is_deeply $large_xcsp->spectrum, {1=>21, 27=>1};
330
ok $large_xcsp = Bio::Assembly::Tools::ContigSpectrum->new(
331
-cross => $large_csp,
332
-min_overlap => 100);
333
is_deeply $large_xcsp->spectrum, {1=>18, 2=>5, 3=>1, 7=>1};
334
my @xcontigs = sort {$a->id cmp $b->id} $large_xcsp->assembly;
335
is scalar @xcontigs, 7; # the cross-1-contigs are not included
336
my @xcontig_ids = sort qw( contig00001_1 contig00001_2 contig00001_3 contig00001_4
337
contig00001_5 contig00001_6 contig00001_7 );
338
is_deeply [map($_->id, @xcontigs)], \@xcontig_ids;
339
my @xcontig_sizes = sort qw( 2 2 2 2 2 3 7 );
340
is_deeply [sort map($_->num_sequences, @xcontigs)], \@xcontig_sizes;
342
# Examine largest cross-contig
343
my $xcontig = (sort {$b->num_sequences <=> $a->num_sequences} $large_xcsp->assembly)[0];
344
is $xcontig->num_sequences, 7;
345
is $xcontig->get_seq_coord($xcontig->get_seq_by_name('species1635|5973'))->start, 1;
346
is $xcontig->get_seq_coord($xcontig->get_seq_by_name('species158|7890'))->start, 1;
347
is $xcontig->get_seq_coord($xcontig->get_seq_by_name('species2742|48'))->end, 140;
349
# one contig at a time
350
$in = Bio::Assembly::IO->new(
351
-file => test_input_file('contigspectrumtest.tigr'),
354
$sc = $in->next_assembly;
355
ok $csp = Bio::Assembly::Tools::ContigSpectrum->new(
356
-eff_asm_params => 1 ), 'one contig at a time';
357
for my $contig ($sc->all_contigs) {
358
ok $csp->assembly($contig);
361
is scalar $csp->assembly(), 5;
362
is_deeply $csp->spectrum, {1=>0, 2=>3, 6=>1, 9=>1}; # [0 3 0 0 0 1 0 0 1]
363
is $csp->eff_asm_params, 1;
364
is $csp->max_size, 9;
366
is $csp->nof_seq, 21;
367
float_is $csp->avg_seq_len, 303.81;
368
is $csp->nof_overlaps, 16;
369
is $csp->min_overlap, 35;
370
float_is $csp->avg_overlap, 155.875;
371
float_is $csp->min_identity, 96.8421;
372
float_is $csp->avg_identity, 98.8826;
352
is(scalar $csp->assembly(), 5);
353
is_deeply($csp->spectrum, {1=>0, 2=>3, 6=>1, 9=>1}); # [0 3 0 0 0 1 0 0 1]
354
is($csp->eff_asm_params, 1);
355
is($csp->max_size, 9);
356
is($csp->nof_rep, 5);
357
is($csp->nof_seq, 21);
358
float_is($csp->avg_seq_len, 303.81);
359
is($csp->nof_overlaps, 16);
360
is($csp->min_overlap, 35);
361
is($csp->avg_overlap, 155.875);
362
float_is($csp->min_identity, 96.8421);
363
float_is($csp->avg_identity, 98.8826);