150
my $self = $class->SUPER::new(@args);
151
$self->_initialize(@args);
152
$self->{'_tempdir'} = $self->tempdir(CLEANUP=>1);
153
my ($file, $index, $no_head, $no_sq) = $self->_rearrange([qw(FILE INDEX NO_HEAD NO_SQ)], @args);
155
$self->{'_no_head'} = $no_head;
156
$self->{'_no_sq'} = $no_sq;
158
# get the sequence so Bio::DB::Sam can work with it
161
if (-e $index && -r _ ) {
162
$refdb = ($index =~ m/\.gz[^.]*$/) ? $self->_uncompress($index) : $index;
163
my $guesser = Bio::Tools::GuessSeqFormat->new(-file=>$refdb);
164
$self->throw("'$index' is not a fasta file.")
165
unless $guesser->guess =~ m/^fasta$/;
166
} elsif ($HAVE_BOWTIE) {
167
$inspector = Bio::Tools::Run::Bowtie->new( -command => 'inspect' );
168
$refdb = $inspector->run($index);
170
$self->throw("Bio::Tools::Run::Bowtie is not available - cannot extract refdb from index.");
173
my $bam_file = $self->_make_bam($self->_bowtie_to_sam($file, $refdb));
174
my $sam = Bio::Assembly::IO->new( -file => "<$bam_file", -refdb => $refdb , -format => 'sam' );
150
my $self = $class->SUPER::new(@args);
151
$self->_initialize(@args);
152
$self->{'_tempdir'} = $self->tempdir(CLEANUP=>1);
153
my ($file, $index, $no_head, $no_sq) = $self->_rearrange([qw(FILE INDEX NO_HEAD NO_SQ)], @args);
155
$self->{'_no_head'} = $no_head;
156
$self->{'_no_sq'} = $no_sq;
158
# get the sequence so Bio::DB::Sam can work with it
161
if (-e $index && -r _ ) {
162
$refdb = ($index =~ m/\.gz[^.]*$/) ? $self->_uncompress($index) : $index;
163
my $guesser = Bio::Tools::GuessSeqFormat->new(-file=>$refdb);
164
$self->throw("'$index' is not a fasta file.")
165
unless $guesser->guess =~ m/^fasta$/;
166
} elsif ($HAVE_BOWTIE) {
167
$inspector = Bio::Tools::Run::Bowtie->new( -command => 'inspect' );
168
$refdb = $inspector->run($index);
170
$self->throw("Bio::Tools::Run::Bowtie is not available - cannot extract refdb from index.");
173
my $bam_file = $self->_make_bam($self->_bowtie_to_sam($file, $refdb));
174
my $sam = Bio::Assembly::IO->new( -file => "<$bam_file", -refdb => $refdb , -format => 'sam' );
179
179
sub _bowtie_to_sam {
180
my ($self, $file, $refdb) = @_;
182
$self->throw("'$file' does not exist or is not readable.")
183
unless ( -e $file && -r _ );
185
if ($file =~ m/\.gz[^.]*$/) {
186
$file = $self->_uncompress($file);
193
my $guesser = Bio::Tools::GuessSeqFormat->new(-file=>$file);
194
$self->throw("'$file' is not a bowtie formatted file.") unless $guesser->guess =~ m/^bowtie$/;
202
# create temp file for working
203
my ($sam_tmp_h, $sam_tmp_f) = $self->tempfile( -dir => $self->{'_tempdir'}, -suffix => '.sam' );
205
while (my $line = $self->_readline) {
207
my ($qname,$strand,$rname,$pos,$seq,$qual,$m,$details)=split("\cI",$line);
210
my $paired_f = ($qname =~ m#/[12]#) ? 0x03 : 0;
211
my $strand_f = ($strand eq '-') ? 0x10 : 0;
212
my $op_strand_f = ($strand eq '+' && $paired_f) ? 0x20 : 0;
213
my $first_f = ($qname =~ m#/1#) ? 0x40 : 0;
214
my $second_f = ($qname =~ m#/2#) ? 0x80 : 0;
215
my $flag = $paired_f | $strand_f | $op_strand_f | $first_f | $second_f;
218
my $len = length $seq;
219
die unless $len == length $qual;
220
my $cigar = $len.'M';
221
my @detail = split(',',$details);
222
my $dist = 'NM:i:'.scalar @detail;
228
my $err = ($1-$last_pos);
230
push @mismatch,($err,$2);
232
push @mismatch, $len-$last_pos;
233
@mismatch = reverse @mismatch if $strand eq '-';
234
my $mismatch = join('',('MD:Z:',@mismatch));
239
my $mpos = $mate_line[3];
240
$mate_line[7] = $pos;
241
my $isize = $mpos-$pos-$len;
242
$mate_line[8] = -$isize;
243
print $sam_tmp_h join("\t",@mate_line),"\n";
244
print $sam_tmp_h join("\t",$qname, $flag, $rname, $pos, $mapq, $cigar, $mrnm, $mpos, $isize, $seq, $qual, $mismatch, $dist),"\n";
248
@mate_line = ($qname, $flag, $rname, $pos, $mapq, $cigar, $mrnm, undef, undef, $seq, $qual, $mismatch, $dist);
255
print $sam_tmp_h join("\t",$qname, $flag, $rname, $pos, $mapq, $cigar, $mrnm, $mpos, $isize, $seq, $qual, $mismatch, $dist),"\n";
261
return $sam_tmp_f if $self->{'_no_head'};
263
my ($samh, $samf) = $self->tempfile( -dir => $self->{'_tempdir'}, -suffix => '.sam' );
268
# print sequence dictionary
269
unless ($self->{'_no_sq'}) {
270
my $db = Bio::SeqIO->new( -file => $refdb, -format => 'fasta' );
271
while ( my $seq = $db->next_seq() ) {
272
$SQ{$seq->id} = $seq->length if $SQ{$seq->id};
275
map { print $samh join("\t", ('@SQ', "SN:$_", "LN:$SQ{$_}")), "\n" } keys %SQ;
282
open($sam_tmp_h, $sam_tmp_f) or
283
$self->throw("Can not open '$sam_tmp_f' for reading: $!");
285
print $samh $_ while (<$sam_tmp_h>);
180
my ($self, $file, $refdb) = @_;
182
$self->throw("'$file' does not exist or is not readable.")
183
unless ( -e $file && -r _ );
185
if ($file =~ m/\.gz[^.]*$/) {
186
$file = $self->_uncompress($file);
193
my $guesser = Bio::Tools::GuessSeqFormat->new(-file=>$file);
194
$self->throw("'$file' is not a bowtie formatted file.") unless $guesser->guess =~ m/^bowtie$/;
202
# create temp file for working
203
my ($sam_tmp_h, $sam_tmp_f) = $self->tempfile( -dir => $self->{'_tempdir'}, -suffix => '.sam' );
205
while (my $line = $self->_readline) {
207
my ($qname,$strand,$rname,$pos,$seq,$qual,$m,$details)=split("\cI",$line);
210
my $paired_f = ($qname =~ m#/[12]#) ? 0x03 : 0;
211
my $strand_f = ($strand eq '-') ? 0x10 : 0;
212
my $op_strand_f = ($strand eq '+' && $paired_f) ? 0x20 : 0;
213
my $first_f = ($qname =~ m#/1#) ? 0x40 : 0;
214
my $second_f = ($qname =~ m#/2#) ? 0x80 : 0;
215
my $flag = $paired_f | $strand_f | $op_strand_f | $first_f | $second_f;
218
my $len = length $seq;
219
die unless $len == length $qual;
220
my $cigar = $len.'M';
221
my @detail = split(',',$details);
222
my $dist = 'NM:i:'.scalar @detail;
228
my $err = ($1-$last_pos);
230
push @mismatch,($err,$2);
232
push @mismatch, $len-$last_pos;
233
@mismatch = reverse @mismatch if $strand eq '-';
234
my $mismatch = join('',('MD:Z:',@mismatch));
239
my $mpos = $mate_line[3];
240
$mate_line[7] = $pos;
241
my $isize = $mpos-$pos-$len;
242
$mate_line[8] = -$isize;
243
print $sam_tmp_h join("\t",@mate_line),"\n";
244
print $sam_tmp_h join("\t",$qname, $flag, $rname, $pos, $mapq, $cigar, $mrnm, $mpos, $isize, $seq, $qual, $mismatch, $dist),"\n";
248
@mate_line = ($qname, $flag, $rname, $pos, $mapq, $cigar, $mrnm, undef, undef, $seq, $qual, $mismatch, $dist);
255
print $sam_tmp_h join("\t",$qname, $flag, $rname, $pos, $mapq, $cigar, $mrnm, $mpos, $isize, $seq, $qual, $mismatch, $dist),"\n";
261
return $sam_tmp_f if $self->{'_no_head'};
263
my ($samh, $samf) = $self->tempfile( -dir => $self->{'_tempdir'}, -suffix => '.sam' );
268
# print sequence dictionary
269
unless ($self->{'_no_sq'}) {
270
my $db = Bio::SeqIO->new( -file => $refdb, -format => 'fasta' );
271
while ( my $seq = $db->next_seq() ) {
272
$SQ{$seq->id} = $seq->length if $SQ{$seq->id};
275
map { print $samh join("\t", ('@SQ', "SN:$_", "LN:$SQ{$_}")), "\n" } keys %SQ;
282
open($sam_tmp_h, $sam_tmp_f) or
283
$self->throw("Can not open '$sam_tmp_f' for reading: $!");
285
print $samh $_ while (<$sam_tmp_h>);
293
293
sub _uncompress {
294
my ($self, $file) = @_;
296
unless ($HAVE_IO_UNCOMPRESS) {
297
croak( "IO::Uncompress::Gunzip not available, can't expand '$file'" );
299
my ($tfh, $tf) = $self->tempfile( -dir => $self->{'_tempdir'} );
300
my $z = IO::Uncompress::Gunzip->new($file);
301
while (my $block = $z->getline) { print $tfh $block }
294
my ($self, $file) = @_;
296
unless ($HAVE_IO_UNCOMPRESS) {
297
croak( "IO::Uncompress::Gunzip not available, can't expand '$file'" );
299
my ($tfh, $tf) = $self->tempfile( -dir => $self->{'_tempdir'} );
300
my $z = IO::Uncompress::Gunzip->new($file);
301
while (my $block = $z->getline) { print $tfh $block }
309
my ($self, $file) = @_;
311
$self->throw("'$file' does not exist or is not readable")
312
unless ( -e $file && -r _ );
314
# make a sorted bam file from a sam file input
315
my ($bamh, $bamf) = $self->tempfile( -dir => $self->{'_tempdir'}, -suffix => '.bam' );
316
my ($srth, $srtf) = $self->tempfile( -dir => $self->{'_tempdir'}, -suffix => '.srt' );
317
$_->close for ($bamh, $srth);
319
my $samt = Bio::Tools::Run::Samtools->new( -command => 'view',
323
$samt->run( -bam => $file, -out => $bamf );
325
$samt = Bio::Tools::Run::Samtools->new( -command => 'sort' );
327
$samt->run( -bam => $bamf, -pfx => $srtf);
309
my ($self, $file) = @_;
311
$self->throw("'$file' does not exist or is not readable")
312
unless ( -e $file && -r _ );
314
# make a sorted bam file from a sam file input
315
my ($bamh, $bamf) = $self->tempfile( -dir => $self->{'_tempdir'}, -suffix => '.bam' );
316
my ($srth, $srtf) = $self->tempfile( -dir => $self->{'_tempdir'}, -suffix => '.srt' );
317
$_->close for ($bamh, $srth);
319
my $samt = Bio::Tools::Run::Samtools->new( -command => 'view',
323
$samt->run( -bam => $file, -out => $bamf );
325
$samt = Bio::Tools::Run::Samtools->new( -command => 'sort' );
327
$samt->run( -bam => $bamf, -pfx => $srtf);