~ubuntu-branches/ubuntu/raring/bioperl/raring

« back to all changes in this revision

Viewing changes to Bio/Assembly/IO/ace.pm

  • Committer: Bazaar Package Importer
  • Author(s): Charles Plessy
  • Date: 2008-03-18 14:44:57 UTC
  • mfrom: (4 hardy)
  • mto: This revision was merged to the branch mainline in revision 6.
  • Revision ID: james.westby@ubuntu.com-20080318144457-1jjoztrvqwf0gruk
* debian/control:
  - Removed MIA Matt Hope (dopey) from the Uploaders field.
    Thank you for your work, Matt. I hope you are doing well.
  - Downgraded some recommended package to the 'Suggests' priority,
    according to the following discussion on Upstream's mail list.
    http://bioperl.org/pipermail/bioperl-l/2008-March/027379.html
    (Closes: #448890)
* debian/copyright converted to machine-readable format.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# $Id: ace.pm,v 1.1 2002/11/04 14:38:14 heikki Exp $
 
1
# $Id: ace.pm,v 1.13.4.1 2006/10/02 23:10:12 sendu Exp $
2
2
#
3
3
## BioPerl module for Bio::Assembly::IO::ace
4
4
#
12
12
 
13
13
Bio::Assembly::IO::ace -  module to load phrap ACE files.
14
14
 
15
 
=head1 SYNOPSYS
 
15
=head1 SYNOPSIS
16
16
 
17
17
    # Building an input stream
18
18
    use Bio::Assembly::IO;
65
65
Bioperl modules. Send your comments and suggestions preferably to the
66
66
Bioperl mailing lists  Your participation is much appreciated.
67
67
 
68
 
  bioperl-l@bioperl.org                 - General discussion
69
 
  http://bio.perl.org/MailList.html     - About the mailing lists
 
68
  bioperl-l@bioperl.org                  - General discussion
 
69
  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
70
70
 
71
71
=head2 Reporting Bugs
72
72
 
73
73
Report bugs to the Bioperl bug tracking system to help us keep track
74
 
the bugs and their resolution.  Bug reports can be submitted via email
75
 
or the web:
 
74
the bugs and their resolution.  Bug reports can be submitted via the web:
76
75
 
77
 
  bioperl-bugs@bio.perl.org
78
 
  http://bugzilla.bioperl.org/
 
76
  http://bugzilla.open-bio.org/
79
77
 
80
78
=head1 AUTHOR - Robson Francisco de Souza
81
79
 
91
89
package Bio::Assembly::IO::ace;
92
90
 
93
91
use strict;
94
 
use vars qw(@ISA);
95
92
 
96
 
use Bio::Assembly::IO;
97
93
use Bio::Assembly::Scaffold;
98
94
use Bio::Assembly::Contig;
 
95
use Bio::Assembly::Singlet;
99
96
use Bio::LocatableSeq;
100
97
use Bio::Annotation::SimpleValue;
101
 
use Bio::Seq::PrimaryQual;
 
98
use Bio::Seq::Quality;
 
99
use Bio::SeqIO;
102
100
use Bio::SeqFeature::Generic;
 
101
use Dumpvalue();
 
102
my $dumper = new Dumpvalue();
 
103
$dumper->veryCompact(1);
103
104
 
104
 
@ISA = qw(Bio::Assembly::IO);
 
105
use base qw(Bio::Assembly::IO);
105
106
 
106
107
=head1 Parser methods
107
108
 
117
118
 
118
119
sub next_assembly {
119
120
    my $self = shift; # Object reference
 
121
    my $lingering_read;
120
122
    local $/="\n";
121
123
 
122
124
    # Resetting assembly data structure
155
157
            $consensus_sequence = Bio::LocatableSeq->new(-seq=>$consensus_sequence,
156
158
                                                              -start=>1,
157
159
                                                              -end=>$consensus_length,
158
 
                                                              -id=>$contigID);
 
160
                                                              -id=>"Consensus sequence for $contigID");
159
161
            $contigOBJ->set_consensus_sequence($consensus_sequence);
160
162
            $assembly->add_contig($contigOBJ);
161
163
        };
191
193
                }
192
194
            }
193
195
 
194
 
            my $qual = Bio::Seq::PrimaryQual->new(-qual=>join(" ",@quality),
195
 
                                                  -id=>$contigOBJ->id());
 
196
            my $qual = Bio::Seq::Quality->new(-qual=>join(" ",@quality),
 
197
                                              -id=>$contigOBJ->id());
196
198
            $contigOBJ->set_consensus_quality($qual);
197
199
        };
198
200
 
232
234
                                              -id=>$read_name,
233
235
                                              -primary_id=>$read_name,
234
236
                                              -alphabet=>'dna');
235
 
 
 
237
          $lingering_read = $read;
236
238
            # Adding read location and sequence to contig ("gapped consensus" coordinates)
237
239
            my $padded_start = $read_data->{$read_name}{'padded_start'};
238
240
            my $padded_end   = $padded_start + $read_data->{$read_name}{'length'} - 1;
271
273
                $contigOBJ->add_features([ $qual_feat ], 0);
272
274
            }
273
275
        };
274
 
 
275
 
        # Loading read description (DeScription fields)
276
 
#       /^DS / && do {
277
 
#           /CHEM: (\S+)/ && do {
278
 
#               $self->{'contigs'}[$contig]{'reads'}{$read_name}{'chemistry'} = $1;
279
 
#           };
280
 
#           /CHROMAT_FILE: (\S+)/ && do {
281
 
#               $self->{'contigs'}[$contig]{'reads'}{$read_name}{'chromat_file'} = $1;
282
 
#           };
283
 
#           /DIRECTION: (\w+)/ && do {
284
 
#               my $ori = $1;
285
 
#               if    ($ori eq 'rev') { $ori = 'C' }
286
 
#               elsif ($ori eq 'fwd') { $ori = 'U' }
287
 
#               $self->{'contigs'}[$contig]{'reads'}{$read_name}{'strand'} = $ori;
288
 
#           };
289
 
#           /DYE: (\S+)/ && do {
290
 
#               $self->{'contigs'}[$contig]{'reads'}{$read_name}{'dye'} = $1;
291
 
#           };
292
 
#           /PHD_FILE: (\S+)/ && do {
293
 
#               $self->{'contigs'}[$contig]{'reads'}{$read_name}{'phd_file'} = $1;
294
 
#           };
295
 
#           /TEMPLATE: (\S+)/ && do {
296
 
#               $self->{'contigs'}[$contig]{'reads'}{$read_name}{'template'} = $1;
297
 
#           };
298
 
#           /TIME: (\S+ \S+ \d+ \d+\:\d+\:\d+ \d+)/ && do {
299
 
#               $self->{'contigs'}[$contig]{'reads'}{$read_name}{'phd_time'} = $1;
300
 
#           };
301
 
#       };
 
276
             # Loading read description (DeScription fields)
 
277
          # chad was here! easter 2004.
 
278
          # lingering read is a locatableseq. is there a better way to do this?
 
279
          # i am simply adding more keys to the locatableseq
 
280
        /^DS / && do {
 
281
            /CHEM: (\S+)/ && do {
 
282
                $lingering_read->{'chemistry'} = $1;
 
283
            };
 
284
            /CHROMAT_FILE: (\S+)/ && do {
 
285
                $lingering_read->{'chromatfilename'} = $1;
 
286
            };
 
287
            /DIRECTION: (\w+)/ && do {
 
288
                my $ori = $1;
 
289
                if    ($ori eq 'rev') { $ori = 'C' }
 
290
                elsif ($ori eq 'fwd') { $ori = 'U' }
 
291
                $lingering_read->{'strand'} = $ori;
 
292
            };
 
293
            /DYE: (\S+)/ && do {
 
294
                $lingering_read->{'dye'} = $1;
 
295
            };
 
296
            /PHD_FILE: (\S+)/ && do {
 
297
                $lingering_read->{'phdfilename'} = $1;
 
298
            };
 
299
            /TEMPLATE: (\S+)/ && do {
 
300
                $lingering_read->{'template'} = $1;
 
301
            };
 
302
            /TIME: (\S+ \S+ \d+ \d+\:\d+\:\d+ \d+)/ && do {
 
303
                $lingering_read->{'phd_time'} = $1;
 
304
            };
 
305
        };
302
306
 
303
307
        # Loading contig tags ('tags' in phrap terminology, but Bioperl calls them features)
304
308
        /^CT\s*\{/ && do {
305
309
            my ($contigID,$type,$source,$start,$end,$date) = split(' ',$self->_readline);
 
310
        my %tags = (source => $source, creation_date => $date);
306
311
            $contigID =~ s/^Contig//i;
307
 
            my $extra_info = undef;
 
312
            my $tag_type = 'extra_info';
308
313
            while ($_ = $self->_readline) {
309
 
                last if (/\}/);
310
 
                $extra_info .= $_;
 
314
            if (/COMMENT\s*\{/)
 
315
            {
 
316
                $tag_type = 'comment';
 
317
            }
 
318
            elsif (/C\}/)
 
319
            {
 
320
                $tag_type = 'extra_info';
 
321
            }
 
322
            elsif (/\}/)
 
323
            {
 
324
                last;
 
325
            }
 
326
            else
 
327
            {
 
328
                $tags{$tag_type} .= "$_";
 
329
            }
311
330
            }
312
331
            my $contig_tag = Bio::SeqFeature::Generic->new(-start=>$start,
313
332
                                                           -end=>$end,
314
333
                                                           -primary=>$type,
315
 
                                                           -tag=>{ 'source' => $source,
316
 
                                                                   'creation_date' => $date,
317
 
                                                                   'extra_info' => $extra_info
318
 
                                                               });
 
334
                                                           -tag=>\%tags,
 
335
                                                               );
319
336
            $assembly->get_contig_by_id($contigID)->add_features([ $contig_tag ],1);
320
337
        };
321
338
 
358
375
 
359
376
    } # while ($_ = $self->_readline)
360
377
 
 
378
          # hmm. what about singlets?
 
379
     my $singletsfilename = $self->file();
 
380
     $singletsfilename =~ s/\.ace.*$/.singlets/;
 
381
     $singletsfilename =~ s/\<//;
 
382
     if (!-f $singletsfilename) {
 
383
               # oh deario, no singlets here
 
384
          return $assembly;
 
385
     }
 
386
     # print("Opening the singletsfile (".$singletsfilename.")\n");
 
387
     my $singlets_fh = Bio::SeqIO->new(-file   => "<$singletsfilename",
 
388
                                          -format => 'fasta');
 
389
     my $adder;
 
390
     while (my $seq = $singlets_fh->next_seq()) {
 
391
          # $dumper->dumpValue($seq);
 
392
               # find the name of this singlet and attempt to get the phd from phd_dir instead
 
393
          my ($phdfilename,$chromatfilename) = qw(unset unset);
 
394
          if ($seq->desc() =~ /PHD_FILE: (\S+)/) {
 
395
              $phdfilename = $1;
 
396
          }
 
397
          if ($seq->desc() =~ /CHROMAT_FILE: (\S+)/)  {
 
398
               $chromatfilename = $1;
 
399
          }
 
400
          (my $phdfile = $singletsfilename) =~ s/edit_dir.*//;
 
401
          $phdfile .= "phd_dir/$phdfilename";
 
402
          my $singlet = new Bio::Assembly::Singlet();
 
403
          if (-f $phdfile) {
 
404
               # print STDERR ("Reading singlet data from this phdfile ($phdfile)\n");
 
405
               my $phd_fh = new Bio::SeqIO( -file =>   "<$phdfile", -format     =>   'phd');
 
406
               my $swq = $phd_fh->next_seq();
 
407
               $adder = $swq;
 
408
          }
 
409
          else {
 
410
               $adder = $seq;
 
411
          }
 
412
          $adder->{phdfilename} = $phdfilename;
 
413
          $adder->{chromatfilename} = $chromatfilename;
 
414
          $singlet->seq_to_singlet($adder);
 
415
          $assembly->add_singlet($singlet);
 
416
     }
361
417
    $assembly->update_seq_list();
362
418
    return $assembly;
363
419
}
372
428
 
373
429
=cut
374
430
 
375
 
sub write_assemebly {
 
431
sub write_assembly {
376
432
    my $self = shift;
377
 
 
378
 
    $self->throw("Writing phrap ACE files is not implemented yet! Sorry...");
 
433
    $self->throw_not_implemented();
379
434
}
380
435
 
 
436
 
381
437
1;
382
438
 
383
439
__END__