20
use Bio::Align::Utilities qw(aa_to_dna_aln);
22
my $dna_aln = aa_to_dna_aln($aaaln,\%dnaseqs);
20
use Bio::Align::Utilities qw(:all);
21
# %dnaseqs is a hash of CDS sequences (spliced)
24
# Even if the protein alignments are local make sure the start/end
25
# stored in the LocatableSeq objects are to the full length protein.
26
# The CoDing Sequence that is passed in should still be the full
27
# length CDS as the nt alignment will be generated.
29
my $dna_aln = &aa_to_dna_aln($aa_aln,\%dnaseqs);
33
my $replicates = &bootstrap_replicates($aln,$count);
42
53
Bioperl modules. Send your comments and suggestions preferably to
43
54
the Bioperl mailing list. Your participation is much appreciated.
45
bioperl-l@bioperl.org - General discussion
46
http://bioperl.org/MailList.shtml - About the mailing lists
56
bioperl-l@bioperl.org - General discussion
57
http://bioperl.org/wiki/Mailing_lists - About the mailing lists
48
59
=head2 Reporting Bugs
50
61
Report bugs to the Bioperl bug tracking system to help us keep track
51
of the bugs and their resolution. Bug reports can be submitted via
62
of the bugs and their resolution. Bug reports can be submitted via the
54
bioperl-bugs@bioperl.org
55
http://bugzilla.bioperl.org/
65
http://bugzilla.open-bio.org/
57
67
=head1 AUTHOR - Jason Stajich
59
69
Email jason@bioperl.org
63
Additional contributors names and emails here
67
73
The rest of the documentation details each of the object methods.
119
129
croak('Must provide a valid Bio::Align::AlignI object as the first argument to aa_to_dna_aln, see the documentation for proper usage and the method signature');
121
131
my $alnlen = $aln->length;
122
#print "HSP length is $alnlen\n";
123
132
my $dnaalign = new Bio::SimpleAlign;
133
$aln->map_chars('\.',$GAP);
124
135
foreach my $seq ( $aln->each_seq ) {
126
my $dnaseq = $dnaseqs->{$seq->display_id} || croak("cannot find ".
128
foreach my $pos ( 1..$alnlen ) {
129
my $loc = $seq->location_from_column($pos);
131
if( !defined $loc || $loc->location_type ne 'EXACT' ) {
136
my $aa_seqstr = $seq->seq();
137
my $id = $seq->display_id;
138
my $dnaseq = $dnaseqs->{$id} || $aln->throw("cannot find ".
140
my $start_offset = ($seq->start - 1) * CODONSIZE;
142
$dnaseq = $dnaseq->seq();
143
my $dnalen = $dnaseqs->{$id}->length;
146
for( my $i = 0; $i < $alnlen; $i++ ) {
147
my $char = substr($aa_seqstr,$i + $start_offset,1);
148
if ( $char eq $GAP || $j >= $dnalen ) {
149
$nt_seqstr .= $CODONGAP;
134
# To readjust to codon boundaries
135
# end needs to be +1 so we can just multiply by CODONSIZE
138
my ($start,$end) = ((($loc->start - 1)* CODONSIZE) +1,
139
($loc->end)* CODONSIZE);
141
if( $start <=0 || $end > $dnaseq->length() ) {
142
print STDERR "start is ", $loc->start, " end is ", $loc->end, " while dnaseq length is ", $dnaseq->length(), " and start/end projected are $start,$end \n";
143
warn("codons don't seem to be matching up for $start,$end");
146
$dna = $dnaseq->subseq($start,$end);
151
$nt_seqstr .= substr($dnaseq,$j,CODONSIZE);
151
# funky looking math is to readjust to codon boundaries and deal
152
# with fact that sequence start with 1
153
my $newdna = new Bio::LocatableSeq(-display_id => $seq->id(),
154
-start => (($seq->start - 1) *
156
-end => ($seq->end * CODONSIZE),
157
-strand => $seq->strand,
155
$nt_seqstr .= $GAP x (($alnlen * 3) - length($nt_seqstr));
157
my $newdna = new Bio::LocatableSeq(-display_id => $id,
159
-start => $start_offset+1,
159
164
$dnaalign->add_seq($newdna);
161
166
return $dnaalign;
169
=head2 bootstrap_replicates
171
Title : bootstrap_replicates
172
Usage : my $alns = &bootstrap_replicates($aln,100);
173
Function: Generate a pseudo-replicate of the data by randomly
174
sampling, with replacement, the columns from an alignment for
175
the non-parametric bootstrap.
176
Returns : Arrayref of L<Bio::SimpleAlign> objects
177
Args : L<Bio::SimpleAlign> object
178
Number of replicates to generate
182
sub bootstrap_replicates {
183
my ($aln,$count) = @_;
185
my $alen = $aln->length;
187
$aln->set_displayname_flat(1);
188
for my $s ( $aln->each_seq ) {
189
push @seqs, $s->seq();
193
while( $count-- > 0 ) {
195
for($i =0; $i < $alen; $i++ ) {
196
my $index = int(rand($alen));
199
$newseqs[$c++] .= substr($_,$index,1);
202
my $newaln = Bio::SimpleAlign->new();
204
for my $s ( @newseqs ) {
206
$newaln->add_seq( Bio::LocatableSeq->new
209
-display_id => $nm[$i++],