17
17
use Bio::SimpleAlign;
18
#you can set the name length to something other than the default 10
19
#if you use a version of phylip (hacked) that accepts ids > 10
18
#you can set the name length to something other than the default 10
19
#if you use a version of phylip (hacked) that accepts ids > 10
20
20
my $phylipstream = new Bio::AlignIO(-format => 'phylip',
23
23
# convert data from one format to another
24
24
my $gcgstream = new Bio::AlignIO(-format => 'msf',
25
-file => 't/data/cysprot1a.msf');
25
-file => 't/data/cysprot1a.msf');
27
27
while( my $aln = $gcgstream->next_aln ) {
28
$phylipstream->write_aln($aln);
28
$phylipstream->write_aln($aln);
31
# do it again with phylip sequential format format
31
# do it again with phylip sequential format format
32
32
$phylipstream->interleaved(0);
33
33
# can also initialize the object like this
34
34
$phylipstream = new Bio::AlignIO(-interleaved => 0,
38
38
$gcgstream = new Bio::AlignIO(-format => 'msf',
39
-file => 't/data/cysprot1a.msf');
39
-file => 't/data/cysprot1a.msf');
41
41
while( my $aln = $gcgstream->next_aln ) {
42
$phylipstream->write_aln($aln);
42
$phylipstream->write_aln($aln);
100
100
Function: Initialize a new L<Bio::AlignIO::phylip> reader or writer
101
101
Returns : L<Bio::AlignIO> object
102
102
Args : [specific for writing of phylip format files]
103
-idlength => integer - length of the id (will pad w/
105
-interleaved => boolean - whether or not write as interleaved
103
-idlength => integer - length of the id (will pad w/
105
-interleaved => boolean - whether or not write as interleaved
106
106
or sequential format
107
-linelength => integer of how long a sequence lines should be
107
-line_length => integer of how long a sequence lines should be
108
108
-idlinebreak => insert a line break after the sequence id
109
so that sequence starts on the next line
109
so that sequence starts on the next line
110
-flag_SI => whether or not write a "S" or "I" just after
111
the num.seq. and line len., in the first line
112
-tag_length => integer of how long the tags have to be in
113
each line between the space separator. set it
114
to 0 to have 1 tag only.
115
-wrap_sequential => boolean for whether or not sequential
116
format should be broken up or a single line
117
default is false (single line)
115
123
$self->SUPER::_initialize(@args);
117
125
my ($interleave,$linelen,$idlinebreak,
118
$idlength) = $self->_rearrange([qw(INTERLEAVED
126
$idlength, $flag_SI, $tag_length,$ws) =
127
$self->_rearrange([qw(INTERLEAVED
133
WRAP_SEQUENTIAL)],@args);
122
134
$self->interleaved(1) if( $interleave || ! defined $interleave);
123
135
$self->idlength($idlength || $DEFAULTIDLENGTH);
124
136
$self->id_linebreak(1) if( $idlinebreak );
125
137
$self->line_length($linelen) if defined $linelen && $linelen > 0;
138
$self->flag_SI(1) if ( $flag_SI );
139
$self->tag_length($tag_length) if ( $tag_length || $DEFAULTTAGLEN );
140
$self->wrap_sequential($ws ? 1 : 0);
144
159
my ($seqcount, $residuecount, %hash, $name,$str,
145
160
@names,$seqname,$start,$end,$count,$seq);
147
162
my $aln = Bio::SimpleAlign->new(-source => 'phylip');
148
$entry = $self->_readline and
163
$entry = $self->_readline and
149
164
($seqcount, $residuecount) = $entry =~ /\s*(\d+)\s+(\d+)/;
150
165
return 0 unless $seqcount and $residuecount;
152
167
# first alignment section
153
168
my $idlen = $self->idlength;
156
my $non_interleaved = ! $self->interleaved ;
171
my $interleaved = $self->interleaved;
158
172
while( $entry = $self->_readline) {
159
last if( $entry =~ /^\s?$/ && ! $non_interleaved );
173
last if( $entry =~ /^\s?$/ && $interleaved );
161
if( $entry =~ /^\s+(\d+)\s+(\d+)\s*$/) {
175
if( $entry =~ /^\s+(\d+)\s+(\d+)\s*$/) {
162
176
$self->_pushback($entry);
165
179
if( $entry =~ /^\s+(.+)$/ ) {
167
$non_interleaved = 1;
169
unless( ! $non_interleaved ) {
170
$count = scalar @names;
171
$hash{$count} .= $str;
173
$hash{$iter++} .= $str;
174
$iter = 1 if $iter > $count;
176
} elsif( $entry =~ /^(.{$idlen})\s+(.*)\s$/ ||
183
$count = scalar @names;
184
$hash{$count} .= $str;
186
} elsif( $entry =~ /^(.{$idlen})\s+(.*)\s$/ ||
177
187
$entry =~ /^(.{$idlen})(\S{$idlen}\s+.+)\s$/ # Handle weirdnes s when id is too long
181
191
$name =~ s/[\s\/]/_/g;
182
192
$name =~ s/_+$//; # remove any trailing _'s
183
194
push @names, $name;
185
196
$count = scalar @names;
186
197
$hash{$count} = $str;
198
} elsif( $interleaved ) {
199
if( $entry =~ /^(\S+)\s+(.+)/ ||
200
$entry =~ /^(.{$idlen})(.*)\s$/ ) {
203
$name =~ s/[\s\/]/_/g;
204
$name =~ s/_+$//; # remove any trailing _'s
207
$count = scalar @names;
208
$hash{$count} = $str;
210
$self->debug("unmatched line: $entry");
188
$self->throw("Not a valid interleaved PHYLIP file!") if $count > $seqcount;
213
$self->throw("Not a valid interleaved PHYLIP file!") if $count > $seqcount;
191
unless( $non_interleaved ) {
192
217
# interleaved sections
194
219
while( $entry = $self->_readline) {
196
220
# finish current entry
197
221
if($entry =~/\s*\d+\s+\d+/){
198
222
$self->_pushback($entry);
283
my $width = $self->line_length();
260
284
my ($length,$date,$name,$seq,$miss,$pad,
261
%hash,@arr,$tempcount,$index,$idlength);
285
%hash,@arr,$tempcount,$index,$idlength,$flag_SI,$line_length, $tag_length);
263
287
foreach my $aln (@aln) {
264
if( ! $aln || ! $aln->isa('Bio::Align::AlignI') ) {
288
if( ! $aln || ! $aln->isa('Bio::Align::AlignI') ) {
265
289
$self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
268
$self->throw("All sequences in the alignment must be the same length")
292
$self->throw("All sequences in the alignment must be the same length")
269
293
unless $aln->is_flush(1) ;
295
$flag_SI = $self->flag_SI();
271
296
$aln->set_displayname_flat(); # plain
272
297
$length = $aln->length();
273
$self->_print (sprintf(" %s %s\n", $aln->no_sequences, $aln->length));
299
if ($self->interleaved() ) {
300
$self->_print (sprintf(" %s %s I\n", $aln->no_sequences, $aln->length));
302
$self->_print (sprintf(" %s %s S\n", $aln->no_sequences, $aln->length));
305
$self->_print (sprintf(" %s %s\n", $aln->no_sequences, $aln->length));
275
$idlength = $self->idlength();
308
$idlength = $self->idlength();
309
$line_length = $self->line_length();
310
$tag_length = $self->tag_length();
276
311
foreach $seq ( $aln->each_seq() ) {
277
312
$name = $aln->displayname($seq->get_nse);
278
313
$name = substr($name, 0, $idlength) if length($name) > $idlength;
279
$name = sprintf("%-".$idlength."s",$name);
314
$name = sprintf("%-".$idlength."s",$name);
280
315
if( $self->interleaved() ) {
282
} elsif( $self->id_linebreak) {
317
} elsif( $self->id_linebreak) {
286
#phylip needs dashes not dots
287
my $seq = $seq->seq();
321
#phylip needs dashes not dots
322
my $seq = $seq->seq();
289
324
$hash{$name} = $seq;
290
325
push(@arr,$name);
293
328
if( $self->interleaved() ) {
294
while( $count < $length ) {
330
if ($tag_length <= $line_length) {
331
$numtags = floor($line_length/$tag_length);
332
$line_length = $tag_length*$numtags;
336
while( $count < $length ) {
296
338
# there is another block to go!
297
339
foreach $name ( @arr ) {
298
340
my $dispname = $name;
299
341
$dispname = '' if $wrapped;
300
342
$self->_print (sprintf("%".($idlength+3)."s",$dispname));
301
343
$tempcount = $count;
303
while( ($tempcount + $idlength < $length) && ($index < 5) ) {
345
$self->debug("residue count: $count\n") if ($count%100000 == 0);
346
while( ($tempcount + $tag_length < $length) &&
347
($index < $numtags) ) {
304
348
$self->_print (sprintf("%s ",substr($hash{$name},
307
$tempcount += $idlength;
351
$tempcount += $tag_length;
355
if( $index < $numtags) {
312
356
# space to print!
313
357
$self->_print (sprintf("%s ",substr($hash{$name},
315
$tempcount += $idlength;
359
$tempcount += $tag_length;
317
361
$self->_print ("\n");
319
363
$self->_print ("\n");
320
364
$count = $tempcount;
324
368
foreach $name ( @arr ) {
325
369
my $dispname = $name;
326
$dispname = '' if $wrapped;
327
$self->_print (sprintf("%s%s\n",$dispname,$hash{$name}));
370
my $line = sprintf("%s%s\n",$dispname,$hash{$name});
371
if( $self->wrap_sequential ) {
372
$line =~ s/(.{1,$width})/$1\n/g;
374
$self->_print ($line);
331
378
$self->flush if $self->_flush_on_write && defined $self->_fh;
347
394
my ($self,$value) = @_;
348
395
my $previous = $self->{'_interleaved'};
349
if( defined $value ) {
396
if( defined $value ) {
350
397
$self->{'_interleaved'} = $value;
352
399
return $previous;
405
Usage : my $flag = $obj->flag_SI
406
Function: Get/Set if the Sequential/Interleaved flag has to be shown
407
after the number of sequences and sequence length
416
my ($self,$value) = @_;
417
my $previous = $self->{'_flag_SI'};
418
if( defined $value ) {
419
$self->{'_flag_SI'} = $value;
358
Usage : my $idlength = $obj->interleaved
359
Function: Get/Set value of id length
427
Usage : my $idlength = $obj->idlength
428
Function: Get/Set value of id length
386
455
my ($self,$value) = @_;
387
456
if( defined $value) {
388
$self->{'line_length'} = $value;
390
return $self->{'line_length'} || $DEFAULTLINELEN;
457
$self->{'_line_length'} = $value;
459
return $self->{'_line_length'} || $DEFAULTLINELEN;
466
Usage : $obj->tag_length($newval)
468
Example : my $tag_length = $obj->tag_length
469
Returns : value of the length for each space-separated tag in a line
470
Args : newvalue (optional) - set to zero to have one tag per line
476
my ($self,$value) = @_;
477
if( defined $value) {
478
$self->{'_tag_length'} = $value;
480
return $self->{'_tag_length'} || $DEFAULTTAGLEN;
394
484
=head2 id_linebreak
396
486
Title : id_linebreak
397
487
Usage : $obj->id_linebreak($newval)
399
489
Returns : value of id_linebreak
400
490
Args : newvalue (optional)