1
# $Id: selex.pm,v 1.10 2002/10/22 07:38:26 lapp Exp $
1
# $Id: selex.pm,v 1.14.4.3 2006/10/02 23:10:12 sendu Exp $
3
3
# BioPerl module for Bio::AlignIO::selex
5
# based on the Bio::SeqIO::selex module
6
# by Ewan Birney <birney@sanger.ac.uk>
5
# based on the Bio::SeqIO::selex module
6
# by Ewan Birney <birney@ebi.ac.uk>
7
7
# and Lincoln Stein <lstein@cshl.org>
9
9
# and the SimpleAlign.pm module of Ewan Birney
24
Do not use this module directly. Use it via the L<Bio::AlignIO> class.
24
# Do not use this module directly. Use it via the L<Bio::AlignIO> class.
29
my $in = Bio::AlignIO->new(-format => 'selex',
30
-file => 't/data/testaln.selex');
31
while( my $aln = $in->next_aln ) {
33
42
=head2 Reporting Bugs
35
44
Report bugs to the Bioperl bug tracking system to help us keep track
36
the bugs and their resolution.
37
Bug reports can be submitted via email or the web:
45
the bugs and their resolution. Bug reports can be submitted via the
39
bioperl-bugs@bio.perl.org
40
http://bugzilla.bioperl.org/
48
http://bugzilla.open-bio.org/
42
50
=head1 AUTHORS - Peter Schattner
44
52
Email: schattner@alum.mit.edu
56
Jason Stajich, jason-at-bioperl.org
79
my ($start,$end,%align,$name,$seqname,$seq,$count,%hash,%c2name, %accession, $no);
88
my ($start,$end,%align,$name,$seqname,%hash,@c2name, %accession,%desc);
80
89
my $aln = Bio::SimpleAlign->new(-source => 'selex');
82
91
# in selex format, every non-blank line that does not start
83
92
# with '#=' is an alignment segment; the '#=' lines are mark up lines.
84
93
# Of particular interest are the '#=GF <name/st-ed> AC <accession>'
85
94
# lines, which give accession numbers for each segment
87
95
while( $entry = $self->_readline) {
88
$entry =~ /^\#=GS\s+(\S+)\s+AC\s+(\S+)/ && do {
89
$accession{ $1 } = $2;
92
$entry !~ /^([^\#]\S+)\s+([A-Za-z\.\-]+)\s*/ && next;
96
if( $entry =~ /^\#=GS\s+(\S+)\s+AC\s+(\S+)/ ) {
97
$accession{ $1 } = $2;
99
} elsif( $entry =~ /^\#=GS\s+(\S+)\s+DE\s+(.+)\s*$/ ) {
101
} elsif ( $entry =~ /^([^\#]\S+)\s+([A-Za-z\.\-\*]+)\s*/ ) {
102
my ($name,$seq) = ($1,$2);
97
if( ! defined $align{$name} ) {
99
$c2name{$count} = $name;
104
if( ! defined $align{$name} ) {
107
$align{$name} .= $seq;
101
$align{$name} .= $seq;
104
110
# ok... now we can make the sequences
107
foreach $no ( sort { $a <=> $b } keys %c2name ) {
108
$name = $c2name{$no};
112
foreach my $name ( @c2name ) {
110
114
if( $name =~ /(\S+)\/(\d+)-(\d+)/ ) {
117
121
$end = length($align{$name});
119
$seq = new Bio::LocatableSeq('-seq'=>$align{$name},
124
'-accession_number' => $accession{$name},
123
my $seq = new Bio::LocatableSeq
124
('-seq' => $align{$name},
125
'-display_id' => $seqname,
128
'-description' => $desc{$name},
129
'-accession_number' => $accession{$name},
128
132
$aln->add_seq($seq);
132
135
# If $end <= 0, we have either reached the end of
133
136
# file in <> or we have encountered some other error
135
if ($end <= 0) { undef $aln;}
138
return if ($end <= 0);