93
93
-file => test_input_file('assembly_with_singlets.ace'),
94
94
-format => 'ace-consed',
96
is( $aio->variant, 'consed' );
96
is $aio->variant, 'consed', 'consed';
97
97
ok $aio = Bio::Assembly::IO->new(
98
98
-file => test_input_file('assembly_with_singlets.ace'),
101
is( $aio->variant, 'consed' );
102
ok( $aio->variant('454') );
103
is( $aio->variant, '454' );
101
is $aio->variant, 'consed';
102
ok $aio->variant('454');
103
is $aio->variant, '454';
106
106
# Some PHRAP input
109
109
my $in = Bio::Assembly::IO->new
110
110
(-file => test_input_file('consed_project','edit_dir','test_project.phrap.out'),
112
isa_ok($in, 'Bio::Assembly::IO');
112
isa_ok $in, 'Bio::Assembly::IO';
113
113
while (my $contig = $in->next_contig) {
114
114
isa_ok($contig, 'Bio::Assembly::Contig');
117
117
$in = Bio::Assembly::IO->new
118
118
(-file => test_input_file('consed_project','edit_dir','test_project.phrap.out'),
120
isa_ok($in, 'Bio::Assembly::IO');
120
isa_ok $in, 'Bio::Assembly::IO';
123
123
local $TODO = "phrap parser doesn't include the sequence string in the sequence objects.";
125
125
eval {$sc = $in->next_assembly};
129
129
$in->verbose(-1);
130
130
$in = Bio::Assembly::IO->new
131
131
(-file => test_input_file('consed_project','edit_dir','test_project.phrap.out'),
133
ok($sc = $in->next_assembly);
134
isa_ok($sc, 'Bio::Assembly::Scaffold');
133
ok $sc = $in->next_assembly;
134
isa_ok $sc, 'Bio::Assembly::Scaffold';
145
145
is $sc->annotation->get_all_annotation_keys, 0,"no annotations in Annotation collection?";
146
146
is $sc->get_nof_contigs, 1;
147
147
is $sc->get_nof_sequences_in_contigs, 2;
148
is($sc->get_nof_singlets, 2, "get_nof_singlets");
149
is($sc->get_contig_seq_ids, 2, "get_contig_seq_ids");
150
is($sc->get_contig_ids, 1, "get_contig_ids");
151
is($sc->get_singlet_ids, 2, "get_singlet_ids");
148
is $sc->get_nof_singlets, 2, "get_nof_singlets";
149
is $sc->get_contig_seq_ids, 2, "get_contig_seq_ids";
150
is $sc->get_contig_ids, 1, "get_contig_ids";
151
is $sc->get_singlet_ids, 2, "get_singlet_ids";
153
153
my @phrap_contigs = $sc->all_contigs();
154
154
isa_ok $phrap_contigs[0], "Bio::Assembly::Contig",'the contig is a Bio::Assembly::Contig';
157
157
isa_ok $singlets[0], "Bio::Assembly::Singlet", 'the singlet is a Bio::Assembly::Singlet';
159
159
my @contig_seq_ids;
160
ok(@contig_seq_ids = $sc->get_contig_seq_ids, "get_contig_seq_ids");
161
is(@contig_seq_ids, 2);
160
ok @contig_seq_ids = $sc->get_contig_seq_ids, "get_contig_seq_ids";
161
is @contig_seq_ids, 2;
162
162
for my $contig_seq_id (@contig_seq_ids) {
163
ok (not $contig_seq_id =~ m/contig/i);
163
ok not $contig_seq_id =~ m/contig/i;
166
ok(@contig_ids = $sc->get_contig_ids, "get_contig_ids");
166
ok @contig_ids = $sc->get_contig_ids, "get_contig_ids";
168
168
for my $contig_id (@contig_ids) {
169
ok (not $contig_id =~ m/contig/i);
169
ok not $contig_id =~ m/contig/i;
172
ok(@singlet_ids = $sc->get_singlet_ids, "get_singlet_ids");
172
ok @singlet_ids = $sc->get_singlet_ids, "get_singlet_ids";
174
174
for my $singlet_id (@singlet_ids) {
175
ok (not $singlet_id =~ m/contig/i);
175
ok not $singlet_id =~ m/contig/i;
178
ok(@all_seq_ids = $sc->get_all_seq_ids, "get_all_seq_ids");
178
ok @all_seq_ids = $sc->get_all_seq_ids, "get_all_seq_ids";
179
179
for my $seq_id (@all_seq_ids) {
180
ok (not $seq_id =~ m/contig/i);
180
ok not $seq_id =~ m/contig/i;
185
185
# Testing ContigAnalysis
226
226
is $assembly->get_nof_contigs, 1;
227
227
is $assembly->get_nof_sequences_in_contigs, 2;
228
is($assembly->get_nof_singlets, 0, "get_nof_singlets");
229
is($assembly->get_contig_seq_ids, 2, "get_contig_seq_ids");
230
is($assembly->get_contig_ids, 1, "get_contig_ids");
231
is($assembly->get_singlet_ids, 0, "get_singlet_ids");
228
is $assembly->get_nof_singlets, 0, "get_nof_singlets";
229
is $assembly->get_contig_seq_ids, 2, "get_contig_seq_ids";
230
is $assembly->get_contig_ids, 1, "get_contig_ids";
231
is $assembly->get_singlet_ids, 0, "get_singlet_ids";
233
233
$aio = Bio::Assembly::IO->new(
234
234
-file => test_input_file('assembly_with_singlets.ace'),
256
256
is $test_reads[1], '5762101';
258
258
is $assembly->get_nof_sequences_in_contigs, 6;
259
is($assembly->get_nof_singlets, 33, "get_nof_singlets");
259
is $assembly->get_nof_singlets, 33, "get_nof_singlets";
260
260
@singlets = $assembly->all_singlets();
261
261
isa_ok $singlets[0], "Bio::Assembly::Contig", 'the singlet is a Bio::Assembly::Contig';
262
262
isa_ok $singlets[0], "Bio::Assembly::Singlet", 'the singlet is a Bio::Assembly::Singlet';
263
ok(@contig_seq_ids = $assembly->get_contig_seq_ids, "get_contig_seq_ids");
264
is(@contig_seq_ids, 6);
263
ok @contig_seq_ids = $assembly->get_contig_seq_ids, "get_contig_seq_ids";
264
is @contig_seq_ids, 6;
265
265
for my $contig_seq_id (@contig_seq_ids) {
266
ok (not $contig_seq_id =~ m/contig/i);
266
ok not $contig_seq_id =~ m/contig/i;
268
ok(@contig_ids = $assembly->get_contig_ids, "get_contig_ids");
268
ok @contig_ids = $assembly->get_contig_ids, "get_contig_ids";
270
270
for my $contig_id (@contig_ids) {
271
ok ($contig_id =~ m/contig/i);
271
ok $contig_id =~ m/contig/i;
273
ok(@singlet_ids = $assembly->get_singlet_ids, "get_singlet_ids");
274
is(@singlet_ids, 33);
273
ok @singlet_ids = $assembly->get_singlet_ids, "get_singlet_ids";
275
275
for my $singlet_id (@singlet_ids) {
276
ok ($singlet_id =~ m/contig/i);
276
ok $singlet_id =~ m/contig/i;
278
ok(@all_seq_ids = $assembly->get_all_seq_ids, "get_all_seq_ids");
278
ok @all_seq_ids = $assembly->get_all_seq_ids, "get_all_seq_ids";
279
279
for my $seq_id (@all_seq_ids) {
280
ok (not $seq_id =~ m/contig/i);
280
ok not $seq_id =~ m/contig/i;
282
282
is(@all_seq_ids, 39);
285
ok( $aio = Bio::Assembly::IO->new(
285
ok $aio = Bio::Assembly::IO->new(
286
286
-file=>test_input_file('singlet_w_CT.ace'),
290
290
# ACE 454 variant
291
291
$aio = Bio::Assembly::IO->new(
311
311
my $right_pad_length = 81;
312
312
my $consensus_length = $left_pad_length + $sequence_length + $right_pad_length;
313
313
my $cons_seq = $contig->get_consensus_sequence->seq;
314
is( length $cons_seq, $consensus_length );
314
is length $cons_seq, $consensus_length;
315
315
$cons_seq =~ m/^(-*).*?(-*)$/;
316
is( length $1, $left_pad_length, '454 ACE variant consensus check' );
317
is( length $2, $right_pad_length );
316
is length $1, $left_pad_length, '454 ACE variant consensus check';
317
is length $2, $right_pad_length;
318
318
my $cons_qual = $contig->get_consensus_quality->qual;
319
is( scalar @$cons_qual, $consensus_length );
319
is scalar @$cons_qual, $consensus_length;
320
320
$cons_qual = join ' ', @{$contig->get_consensus_quality->qual};
321
321
my $lpad = $left_pad_length x '0 ';
322
322
my $rpad = $right_pad_length x '0 ';
323
323
$cons_qual =~ m/^($lpad).*($rpad)$/;
327
327
# Writing ACE files
328
328
my $asm_infile = '27-contig_Newbler.ace';
376
376
my $scaf_in = $asm_in->next_assembly;
377
isa_ok($scaf_in, 'Bio::Assembly::Scaffold');
378
is($scaf_in->id, 'NoName');
379
is($scaf_in->get_nof_contigs, 13);
380
is($scaf_in->get_nof_sequences_in_contigs, 36);
381
is($scaf_in->get_nof_singlets, 1);
377
isa_ok $scaf_in, 'Bio::Assembly::Scaffold';
378
is $scaf_in->id, 'NoName';
379
is $scaf_in->get_nof_contigs, 13;
380
is $scaf_in->get_nof_sequences_in_contigs, 36;
381
is $scaf_in->get_nof_singlets, 1;
382
382
my @contigseqids = sort qw(sdsu|SDSU1_RFPERU_001_A09.x01.phd.1
383
383
sdsu|SDSU1_RFPERU_001_B03.x01.phd.1 sdsu|SDSU1_RFPERU_001_B04.x01.phd.1
384
384
sdsu|SDSU1_RFPERU_001_E04.x01.phd.1 sdsu|SDSU_RFPERU_002_A01.x01.phd.1
401
401
my @contigids = sort qw(106 144 148 17 185 2 210 36 453 500 613 668 93);
402
402
my @singletids = sort qw(123);
403
403
my @singletseqids = sort qw(asdf);
404
is_deeply([sort $scaf_in->get_contig_seq_ids], \@contigseqids);
405
is_deeply([sort $scaf_in->get_contig_ids], \@contigids );
406
is_deeply([sort $scaf_in->get_singlet_ids], \@singletids );
407
isa_ok($scaf_in->get_seq_by_id('sdsu|SDSU1_RFPERU_001_A09.x01.phd.1'),'Bio::LocatableSeq');
404
is_deeply [sort $scaf_in->get_contig_seq_ids], \@contigseqids;
405
is_deeply [sort $scaf_in->get_contig_ids], \@contigids ;
406
is_deeply [sort $scaf_in->get_singlet_ids], \@singletids ;
407
isa_ok $scaf_in->get_seq_by_id('sdsu|SDSU1_RFPERU_001_A09.x01.phd.1'),'Bio::LocatableSeq';
408
408
$contig = $scaf_in->get_contig_by_id('106');
409
isa_ok($contig,'Bio::Assembly::Contig');
409
isa_ok $contig,'Bio::Assembly::Contig';
411
411
# check Contig object SeqFeature::Collection
412
412
# should add more specific Contig tests...
413
413
my @sfs = $contig->get_features_collection->features; # 5 contig features + 2 seqfeatures
415
is($sfs[1]->seq_id(), undef); # should this be undef?
416
ok( $contig->get_features_collection->get_features_by_type('_aligned_coord:sdsu|SDSU_RFPERU_006_E04.x01.phd.1') );
417
isa_ok($scaf_in->annotation, 'Bio::AnnotationCollectionI');
418
is($scaf_in->annotation->get_all_annotation_keys, 0, "no annotations in Annotation collection?");
415
is $sfs[1]->seq_id(), undef; # should this be undef?
416
ok $contig->get_features_collection->get_features_by_type('_aligned_coord:sdsu|SDSU_RFPERU_006_E04.x01.phd.1');
417
isa_ok $scaf_in->annotation, 'Bio::AnnotationCollectionI';
418
is $scaf_in->annotation->get_all_annotation_keys, 0, "no annotations in Annotation collection?";
421
421
# Exporting an assembly
435
435
ok $aio = Bio::Assembly::IO->new( -file => test_input_file($file),
436
436
-format => 'maq' ), "init maq IO object";
437
437
ok $assembly = $aio->next_assembly, "get maq assy";
438
is( $assembly->get_nof_contigs, 11, "got all contigs");
438
is $assembly->get_nof_contigs, 11, "got all contigs";
439
439
ok open(my $tf, test_input_file($file)), "read test file as text";
440
440
my @lines = <$tf>;
441
is( $assembly->get_nof_contig_seqs, scalar @lines, "recorded all maq reads");
441
is $assembly->get_nof_contig_seqs, scalar @lines, "recorded all maq reads";
442
442
ok !$assembly->get_nof_singlets, "no singlets";
444
444
ok $aio = Bio::Assembly::IO->new( -file => test_input_file($file),
445
445
-format => 'maq' );
446
isa_ok($aio, 'Bio::Assembly::IO');
446
isa_ok $aio, 'Bio::Assembly::IO';
447
447
while (my $contig = $aio->next_contig) {
448
isa_ok($contig, 'Bio::Assembly::Contig');
448
isa_ok $contig, 'Bio::Assembly::Contig';
455
455
ok $aio = Bio::Assembly::IO->new( -file => test_input_file($file),
456
456
-format => 'maq' );
457
457
ok $assembly = $aio->next_assembly, "get maq assy";
458
isa_ok($aio, 'Bio::Assembly::IO');
461
ok(@contig_seq_ids = $assembly->get_contig_seq_ids, "get_contig_seq_ids");
462
is(@contig_seq_ids, 246);
458
isa_ok $aio, 'Bio::Assembly::IO';
461
ok @contig_seq_ids = $assembly->get_contig_seq_ids, "get_contig_seq_ids";
462
is @contig_seq_ids, 246;
463
463
for my $contig_seq_id (@contig_seq_ids) {
464
ok (not $contig_seq_id =~ m/maq_assy/i);
464
ok not $contig_seq_id =~ m/maq_assy/i;
467
ok(@contig_ids = $assembly->get_contig_ids, "get_contig_ids");
467
ok @contig_ids = $assembly->get_contig_ids, "get_contig_ids";
469
469
for my $contig_id (@contig_ids) {
470
ok ($contig_id =~ m/maq_assy/i);
470
ok $contig_id =~ m/maq_assy/i;
473
ok(@singlet_ids = $assembly->get_singlet_ids, "get_singlet_ids");
473
ok @singlet_ids = $assembly->get_singlet_ids, "get_singlet_ids";
475
475
for my $singlet_id (@singlet_ids) {
476
ok ($singlet_id =~ m/maq_assy/i);
476
ok $singlet_id =~ m/maq_assy/i;
479
ok(@all_seq_ids = $assembly->get_all_seq_ids, "get_all_seq_ids");
479
ok @all_seq_ids = $assembly->get_all_seq_ids, "get_all_seq_ids";
480
480
for my $seq_id (@all_seq_ids) {
481
ok (not $seq_id =~ m/maq_assy/i);
481
ok not $seq_id =~ m/maq_assy/i;
483
is(@all_seq_ids, 250);
483
is @all_seq_ids, 250;
485
485
ok $aio = Bio::Assembly::IO->new( -file => test_input_file($file),
486
486
-format => 'maq' );
487
487
while (my $contig = $aio->next_contig) {
488
isa_ok($contig, 'Bio::Assembly::Contig');
488
isa_ok $contig, 'Bio::Assembly::Contig';