1
# $Id: fasta.pm,v 1.14 2003/07/19 22:35:44 jason Exp $
1
# $Id: fasta.pm,v 1.27.4.1 2006/10/02 23:10:12 sendu Exp $
3
3
# BioPerl module for Bio::AlignIO::fasta
5
# based on the Bio::SeqIO::fasta module
6
# by Ewan Birney <birney@sanger.ac.uk>
7
# and Lincoln Stein <lstein@cshl.org>
9
# and the SimpleAlign.pm module of Ewan Birney
11
5
# Copyright Peter Schattner
13
7
# You may distribute this module under the same terms as perl itself
16
8
# POD documentation - main docs before the code
20
Bio::AlignIO::fasta - FastA MSA Sequence input/output stream
12
Bio::AlignIO::fasta - fasta MSA Sequence input/output stream
24
Do not use this module directly. Use it via the L<Bio::AlignIO> class.
16
Do not use this module directly. Use it via the L<Bio::AlignIO>
28
21
This object can transform L<Bio::SimpleAlign> objects to and from
29
fasta flat file databases. This is for the fasta sequence format NOT
30
FastA analysis program. To process the pairwise alignments from a
22
fasta flat file databases. This is for the fasta alignment format, not
23
for the FastA sequence analysis program. To process the alignments from
31
24
FastA (FastX, FastN, FastP, tFastA, etc) use the Bio::SearchIO module.
56
47
# Let the code begin...
58
49
package Bio::AlignIO::fasta;
65
@ISA = qw(Bio::AlignIO);
54
use base qw(Bio::AlignIO);
71
Usage : $aln = $stream->next_aln()
60
Usage : $aln = $stream->next_aln
72
61
Function: returns the next alignment in the stream.
73
Returns : L<Bio::Align::AlignI> object - returns 0 on end of file
62
Returns : Bio::Align::AlignI object - returns 0 on end of file
64
Args : -width => optional argument to specify the width sequence
65
will be written (60 chars by default)
67
See L<Bio::Align::AlignI>
82
my ($start,$end,$name,$seqname,$seq,$seqchar,$tempname,$tempdesc,
84
my $aln = Bio::SimpleAlign->new();
86
while(defined ($entry = $self->_readline) ) {
87
if( $entry =~ s/^>(\S+)\s*// ) {
92
# put away last name and sequence
93
if( $name =~ /(\S+)\/(\d+)-(\d+)/ ) {
100
$end = length($seqchar); #ps 9/6/00
102
# print STDERR "Going to add with $seqchar $seqname\n";
103
$seq = new Bio::LocatableSeq('-seq' =>$seqchar,
104
'-display_id' =>$seqname,
105
'-description'=>$desc,
73
my ($width) = $self->_rearrange([qw(WIDTH)],@_);
74
$self->width($width || $WIDTH);
76
my ($start, $end, $name, $seqname, $seq, $seqchar, $entry,
77
$tempname, $tempdesc, %align, $desc, $maxlen);
78
my $aln = Bio::SimpleAlign->new();
80
while (defined ($entry = $self->_readline) ) {
82
if ( $entry =~ s/^>\s*(\S+)\s*// ) {
86
if ( defined $name ) {
87
# put away last name and sequence
88
if ( $name =~ /(\S+)\/(\d+)-(\d+)/ ) {
95
$end = $self->_get_len($seqchar);
97
$seq = new Bio::LocatableSeq(
99
-display_id => $seqname,
100
-description => $desc,
105
$self->debug("Reading $seqname\n");
113
# removed redundant symbol validation
114
# this is already done in Bio::PrimarySeq
118
# Next two lines are to silence warnings that
119
# otherwise occur at EOF when using <$fh>
120
$name = "" if (!defined $name);
121
$seqchar="" if (!defined $seqchar);
123
# Put away last name and sequence
124
if ( $name =~ /(\S+)\/(\d+)-(\d+)/ ) {
131
$end = $self->_get_len($seqchar);
134
# If $end <= 0, we have either reached the end of
135
# file in <> or we have encountered some other error
141
# This logic now also reads empty lines at the
142
# end of the file. Skip this is seqchar and seqname is null
143
unless ( length($seqchar) == 0 && length($seqname) == 0 ) {
144
$seq = new Bio::LocatableSeq(-seq => $seqchar,
145
-display_id => $seqname,
146
-description => $desc,
109
150
$aln->add_seq($seq);
117
$entry =~ s/[^A-Za-z\.\-]//g;
121
# Next two lines are to silence warnings that
122
# otherwise occur at EOF when using <$fh>
124
if (!defined $name) {$name="";}
125
if (!defined $seqchar) {$seqchar="";}
127
# Put away last name and sequence
128
if( $name =~ /(\S+)\/(\d+)-(\d+)/ ) {
135
$end = length($seqchar); #ps 9/6/00
136
# $end = length($align{$name});
140
# If $end <= 0, we have either reached the end of
141
# file in <> or we have encountered some other error
143
if ($end <= 0) { undef $aln; return $aln;}
145
# This logic now also reads empty lines at the
146
# end of the file. Skip this is seqchar and seqname is null
148
if( length($seqchar) == 0 && length($seqname) == 0 ) {
151
# print STDERR "end to add with $seqchar $seqname\n";
152
$seq = new Bio::LocatableSeq('-seq' => $seqchar,
153
'-display_id' => $seqname,
154
'-description'=> $desc,
161
my $alnlen = $aln->length;
162
foreach my $seq ( $aln->each_seq ) {
163
if( $seq->length < $alnlen ) {
164
my ($diff) = ($alnlen - $seq->length);
165
$seq->seq( $seq->seq() . "-" x $diff);
151
$self->debug("Reading $seqname\n");
153
my $alnlen = $aln->length;
154
foreach my $seq ( $aln->each_seq ) {
155
if ( $seq->length < $alnlen ) {
156
my ($diff) = ($alnlen - $seq->length);
157
$seq->seq( $seq->seq() . "-" x $diff);
191
182
$self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
185
if( $self->force_displayname_flat ) {
186
$aln->set_displayname_flat(1);
194
188
foreach $rseq ( $aln->each_seq() ) {
195
189
$name = $aln->displayname($rseq->get_nse());
196
190
$seq = $rseq->seq();
197
191
$desc = $rseq->description || '';
198
192
$self->_print (">$name $desc\n") or return ;
200
194
$length = length($seq);
201
while( ($count * 60 ) < $length ) {
202
$seqsub = substr($seq,$count*60,60);
203
$self->_print ("$seqsub\n") or return ;
195
if(defined $seq && $length > 0) {
196
$seq =~ s/(.{1,$width})/$1\n/g;
208
203
$self->flush if $self->_flush_on_write && defined $self->_fh;
211
Function: determine number of alphabetic chars
213
Args : sequence string
218
my ($self,$seq) = @_;
219
$seq =~ s/[^A-Z]//gi;
220
return CORE::length($seq);
226
Usage : $obj->width($newwidth)
227
$width = $obj->width;
228
Function: Get/set width of alignment
229
Returns : integer value of width
230
Args : on set, new value (a scalar or undef, optional)
238
return $self->{'_width'} = shift if @_;
239
return $self->{'_width'} || $WIDTH;