65
65
Bioperl modules. Send your comments and suggestions preferably to the
66
66
Bioperl mailing lists Your participation is much appreciated.
68
bioperl-l@bioperl.org - General discussion
69
http://bio.perl.org/MailList.html - About the mailing lists
68
bioperl-l@bioperl.org - General discussion
69
http://bioperl.org/wiki/Mailing_lists - About the mailing lists
71
71
=head2 Reporting Bugs
73
73
Report bugs to the Bioperl bug tracking system to help us keep track
74
the bugs and their resolution. Bug reports can be submitted via email
74
the bugs and their resolution. Bug reports can be submitted via the web:
77
bioperl-bugs@bio.perl.org
78
http://bugzilla.bioperl.org/
76
http://bugzilla.open-bio.org/
80
78
=head1 AUTHOR - Robson Francisco de Souza
91
89
package Bio::Assembly::IO::ace;
96
use Bio::Assembly::IO;
97
93
use Bio::Assembly::Scaffold;
98
94
use Bio::Assembly::Contig;
95
use Bio::Assembly::Singlet;
99
96
use Bio::LocatableSeq;
100
97
use Bio::Annotation::SimpleValue;
101
use Bio::Seq::PrimaryQual;
98
use Bio::Seq::Quality;
102
100
use Bio::SeqFeature::Generic;
102
my $dumper = new Dumpvalue();
103
$dumper->veryCompact(1);
104
@ISA = qw(Bio::Assembly::IO);
105
use base qw(Bio::Assembly::IO);
106
107
=head1 Parser methods
271
273
$contigOBJ->add_features([ $qual_feat ], 0);
275
# Loading read description (DeScription fields)
277
# /CHEM: (\S+)/ && do {
278
# $self->{'contigs'}[$contig]{'reads'}{$read_name}{'chemistry'} = $1;
280
# /CHROMAT_FILE: (\S+)/ && do {
281
# $self->{'contigs'}[$contig]{'reads'}{$read_name}{'chromat_file'} = $1;
283
# /DIRECTION: (\w+)/ && do {
285
# if ($ori eq 'rev') { $ori = 'C' }
286
# elsif ($ori eq 'fwd') { $ori = 'U' }
287
# $self->{'contigs'}[$contig]{'reads'}{$read_name}{'strand'} = $ori;
289
# /DYE: (\S+)/ && do {
290
# $self->{'contigs'}[$contig]{'reads'}{$read_name}{'dye'} = $1;
292
# /PHD_FILE: (\S+)/ && do {
293
# $self->{'contigs'}[$contig]{'reads'}{$read_name}{'phd_file'} = $1;
295
# /TEMPLATE: (\S+)/ && do {
296
# $self->{'contigs'}[$contig]{'reads'}{$read_name}{'template'} = $1;
298
# /TIME: (\S+ \S+ \d+ \d+\:\d+\:\d+ \d+)/ && do {
299
# $self->{'contigs'}[$contig]{'reads'}{$read_name}{'phd_time'} = $1;
276
# Loading read description (DeScription fields)
277
# chad was here! easter 2004.
278
# lingering read is a locatableseq. is there a better way to do this?
279
# i am simply adding more keys to the locatableseq
281
/CHEM: (\S+)/ && do {
282
$lingering_read->{'chemistry'} = $1;
284
/CHROMAT_FILE: (\S+)/ && do {
285
$lingering_read->{'chromatfilename'} = $1;
287
/DIRECTION: (\w+)/ && do {
289
if ($ori eq 'rev') { $ori = 'C' }
290
elsif ($ori eq 'fwd') { $ori = 'U' }
291
$lingering_read->{'strand'} = $ori;
294
$lingering_read->{'dye'} = $1;
296
/PHD_FILE: (\S+)/ && do {
297
$lingering_read->{'phdfilename'} = $1;
299
/TEMPLATE: (\S+)/ && do {
300
$lingering_read->{'template'} = $1;
302
/TIME: (\S+ \S+ \d+ \d+\:\d+\:\d+ \d+)/ && do {
303
$lingering_read->{'phd_time'} = $1;
303
307
# Loading contig tags ('tags' in phrap terminology, but Bioperl calls them features)
304
308
/^CT\s*\{/ && do {
305
309
my ($contigID,$type,$source,$start,$end,$date) = split(' ',$self->_readline);
310
my %tags = (source => $source, creation_date => $date);
306
311
$contigID =~ s/^Contig//i;
307
my $extra_info = undef;
312
my $tag_type = 'extra_info';
308
313
while ($_ = $self->_readline) {
316
$tag_type = 'comment';
320
$tag_type = 'extra_info';
328
$tags{$tag_type} .= "$_";
312
331
my $contig_tag = Bio::SeqFeature::Generic->new(-start=>$start,
315
-tag=>{ 'source' => $source,
316
'creation_date' => $date,
317
'extra_info' => $extra_info
319
336
$assembly->get_contig_by_id($contigID)->add_features([ $contig_tag ],1);
359
376
} # while ($_ = $self->_readline)
378
# hmm. what about singlets?
379
my $singletsfilename = $self->file();
380
$singletsfilename =~ s/\.ace.*$/.singlets/;
381
$singletsfilename =~ s/\<//;
382
if (!-f $singletsfilename) {
383
# oh deario, no singlets here
386
# print("Opening the singletsfile (".$singletsfilename.")\n");
387
my $singlets_fh = Bio::SeqIO->new(-file => "<$singletsfilename",
390
while (my $seq = $singlets_fh->next_seq()) {
391
# $dumper->dumpValue($seq);
392
# find the name of this singlet and attempt to get the phd from phd_dir instead
393
my ($phdfilename,$chromatfilename) = qw(unset unset);
394
if ($seq->desc() =~ /PHD_FILE: (\S+)/) {
397
if ($seq->desc() =~ /CHROMAT_FILE: (\S+)/) {
398
$chromatfilename = $1;
400
(my $phdfile = $singletsfilename) =~ s/edit_dir.*//;
401
$phdfile .= "phd_dir/$phdfilename";
402
my $singlet = new Bio::Assembly::Singlet();
404
# print STDERR ("Reading singlet data from this phdfile ($phdfile)\n");
405
my $phd_fh = new Bio::SeqIO( -file => "<$phdfile", -format => 'phd');
406
my $swq = $phd_fh->next_seq();
412
$adder->{phdfilename} = $phdfilename;
413
$adder->{chromatfilename} = $chromatfilename;
414
$singlet->seq_to_singlet($adder);
415
$assembly->add_singlet($singlet);
361
417
$assembly->update_seq_list();
362
418
return $assembly;