1
# $Id: Coalescent.pm,v 1.6 2003/12/15 16:15:00 jason Exp $
3
# BioPerl module for Bio::PopGen::Simulation::Coalescent
5
# Cared for by Jason Stajich <jason-at-bioperl-dot-org>
7
# Copyright Jason Stajich
9
# You may distribute this module under the same terms as perl itself
11
# POD documentation - main docs before the code
15
Bio::PopGen::Simulation::Coalescent - A Coalescent simulation factory
19
use Bio::PopGen::Simulation::Coalescent;
21
my $sim = new Bio::PopGen::Simluation::Coalescent( -samples => \@taxonnames);
23
# or for anonymous samples
25
my $factory = new Bio::PopGen::Simluation::Coalescent( -sample_size => 6,
28
my $tree = $factory->next_tree;
30
# add 20 mutations randomly to the tree
31
$factory->add_Mutations($tree,20);
35
Builds a random tree every time next_tree is called or up to -maxcount
36
times with branch lengths and provides the ability to randomly add
37
mutations onto the tree with a probabilty proportional to the branch
40
This algorithm is based on the make_tree algorithm from Richard Hudson 1990.
42
Hudson, R. R. 1990. Gene genealogies and the coalescent
43
process. Pp. 1-44 in D. Futuyma and J. Antonovics, eds. Oxford
44
surveys in evolutionary biology. Vol. 7. Oxford University
47
This module was previously named Bio::Tree::RandomTree
53
User feedback is an integral part of the evolution of this and other
54
Bioperl modules. Send your comments and suggestions preferably to
55
the Bioperl mailing list. Your participation is much appreciated.
57
bioperl-l@bioperl.org - General discussion
58
http://bioperl.org/MailList.shtml - About the mailing lists
62
Report bugs to the Bioperl bug tracking system to help us keep track
63
of the bugs and their resolution. Bug reports can be submitted via
66
http://bugzilla.bioperl.org/
68
=head1 AUTHOR - Jason Stajich, Matthew Hahn
70
Email jason-at-bioperl-dot-org
71
Email matthew-dot-hahn-at-duke-dot-edu
75
Additional contributors names and emails here
79
The rest of the documentation details each of the object methods.
80
Internal methods are usually preceded with a _
85
# Let the code begin...
88
package Bio::PopGen::Simulation::Coalescent;
89
use vars qw(@ISA $PRECISION_DIGITS);
92
$PRECISION_DIGITS = 3; # Precision for the branchlength
94
use Bio::Factory::TreeFactoryI;
96
use Bio::Tree::AlleleNode;
97
use Bio::PopGen::Genotype;
100
@ISA = qw(Bio::Root::Root Bio::Factory::TreeFactoryI );
106
Usage : my $obj = new Bio::PopGen::Simulation::Coalescent();
107
Function: Builds a new Bio::PopGen::Simulation::Coalescent object
108
Returns : an instance of Bio::PopGen::Simulation::Coalescent
109
Args : -samples => arrayref of sample names
111
-sample_size=> number of samples (samps will get a systematic name)
112
-maxcount => [optional] maximum number of trees to provide
117
my ($class,@args) = @_;
118
my $self = $class->SUPER::new(@args);
120
$self->{'_treecounter'} = 0;
121
$self->{'_maxcount'} = 0;
122
my ($maxcount, $samps,$samplesize ) = $self->_rearrange([qw(MAXCOUNT
128
if( ! defined $samps ) {
129
if( ! defined $samplesize || $samplesize <= 0 ) {
130
$self->throw("Must specify a valid samplesize if parameter -SAMPLE is not specified");
132
foreach ( 1..$samplesize ) { push @samples, "Samp$_"; }
134
if( ref($samps) =~ /ARRAY/i ) {
135
$self->throw("Must specify a valid ARRAY reference to the parameter -SAMPLES, did you forget a leading '\\'?");
140
$self->samples(\@samples);
141
$self->sample_size(scalar @samples);
142
defined $maxcount && $self->maxcount($maxcount);
149
Usage : my $tree = $factory->next_tree
150
Function: Returns a random tree based on the initialized number of nodes
151
NOTE: if maxcount is not specified on initialization or
152
set to a valid integer, subsequent calls to next_tree will
153
continue to return random trees and never return undef
154
Returns : Bio::Tree::TreeI object
161
# If maxcount is set to something non-zero then next tree will
162
# continue to return valid trees until maxcount is reached
163
# otherwise will always return trees
164
return undef if( $self->maxcount &&
165
$self->{'_treecounter'}++ >= $self->maxcount );
166
my $size = $self->sample_size;
172
for($in=0;$in < 2*$size -1; $in++ ) {
173
push @tree, { 'nodenum' => "Node$in" };
175
# in C we would have 2 arrays
176
# an array of nodes (tree)
177
# and array of pointers to these nodes (list)
178
# and we just shuffle the list items to do the
179
# tree topology generation
180
# instead in perl, we will have a list of hashes (nodes) called @tree
181
# and a list of integers representing the indexes in tree called @list
183
for($in=0;$in < $size;$in++) {
184
$tree[$in]->{'time'} = 0;
185
$tree[$in]->{'desc1'} = undef;
186
$tree[$in]->{'desc2'} = undef;
191
# generate times for the nodes
192
for($in = $size; $in > 1; $in-- ) {
193
$t+= -2.0 * log(1 - $self->random(1)) / ( $in * ($in-1) );
194
$tree[2 * $size - $in]->{'time'} =$t;
196
# topology generation
197
for ($in = $size; $in > 1; $in-- ) {
198
my $pick = int $self->random($in);
199
my $nodeindex = $list[$pick];
200
my $swap = 2 * $size - $in;
201
$tree[$swap]->{'desc1'} = $nodeindex;
202
$list[$pick] = $list[$in-1];
203
$pick = int rand($in - 1);
204
$nodeindex = $list[$pick];
205
$tree[$swap]->{'desc2'} = $nodeindex;
206
$list[$pick] = $swap;
208
# Let's convert the hashes into nodes
211
foreach my $n ( @tree ) {
213
new Bio::Tree::AlleleNode(-id => $n->{'nodenum'},
214
-branch_length => $n->{'time'});
217
foreach my $node ( @nodes ) {
218
my $n = $tree[$ct++];
219
if( defined $n->{'desc1'} ) {
220
$node->add_Descendent($nodes[$n->{'desc1'}]);
222
if( defined $n->{'desc2'} ) {
223
$node->add_Descendent($nodes[$n->{'desc2'}]);
226
my $T = Bio::Tree::Tree->new(-root => pop @nodes );
232
Title : add_Mutations
233
Usage : $factory->add_Mutations($tree, $mutcount);
234
Function: Adds mutations to a tree via a random process weighted by
235
branch length (it is a poisson distribution
236
as part of a coalescent process)
238
Args : $tree - Bio::Tree::TreeI
239
$nummut - number of mutations
240
$precision - optional # of digits for precision
246
my ($self,$tree, $nummut,$precision) = @_;
247
$precision ||= $PRECISION_DIGITS;
248
$precision = 10**$precision;
254
my @nodes = $tree->get_nodes();
257
# Jason's somewhat simplistics way of doing a poission
258
# distribution for a fixed number of mutations
259
# build an array and put the node number in a slot
260
# representing the branch to put a mutation on
261
# but weight the number of slots per branch by the
262
# length of the branch ( ancestor's time - node time)
264
foreach my $node ( @nodes ) {
265
if( $node->ancestor ) {
266
my $len = int ( ($node->ancestor->branch_length -
267
$node->branch_length) * $precision);
269
for( my $j =0;$j < $len;$j++) {
276
if( ! $node->isa('Bio::Tree::AlleleNode') ) {
277
bless $node, 'Bio::Tree::AlleleNode'; # rebless it to the right node
279
# This let's us reset the stored genotypes so we can keep reusing the
280
# same tree topology, but throw down mutations multiple times
281
$node->reset_Genotypes;
285
die("branch len is $branchlen arraylen is $last")
286
unless ( $branchlen == $last );
288
for( my $j = 0; $j < $nummut; $j++) {
289
my $index = int(rand($branchlen));
290
my $branch = $branches[$index];
292
# We're using an infinite sites model so every new
293
# mutation is a new site
294
my $g = new Bio::PopGen::Genotype(-marker_name => "Mutation$j",
296
$nodes[$branch]->add_Genotype($g);
297
push @mutations, "Mutation$j";
298
# Let's add this mutation to all the children (push it down
299
# the branches to the tips)
300
foreach my $child ( $nodes[$branch]->get_all_Descendents ) {
301
$child->add_Genotype($g);
304
# Insure that everyone who doesn't have the mutation
305
# has the ancestral state, which is '0'
306
foreach my $node ( @nodes ) {
307
foreach my $m ( @mutations ) {
308
if( ! $node->has_Marker($m) ) {
309
my $emptyg = new Bio::PopGen::Genotype(-marker_name => $m,
311
$node->add_Genotype($emptyg);
320
Usage : $obj->maxcount($newval)
322
Returns : Maxcount value
323
Args : newvalue (optional)
329
my ($self,$value) = @_;
330
if( defined $value) {
331
if( $value =~ /^(\d+)/ ) {
332
$self->{'maxcount'} = $1;
334
$self->warn("Must specify a valid Positive integer to maxcount");
335
$self->{'maxcount'} = 0;
338
return $self->{'_maxcount'};
344
Usage : $obj->samples($newval)
347
Returns : value of samples
348
Args : newvalue (optional)
354
my ($self,$value) = @_;
355
if( defined $value) {
356
if( ref($value) !~ /ARRAY/i ) {
357
$self->warn("Must specify a valid array ref to the method 'samples'");
360
$self->{'samples'} = $value;
362
return $self->{'samples'};
369
Usage : $obj->sample_size($newval)
372
Returns : value of sample_size
373
Args : newvalue (optional)
379
my ($self,$value) = @_;
380
if( defined $value) {
381
$self->{'sample_size'} = $value;
383
return $self->{'sample_size'};
390
Usage : my $rfloat = $node->random($size)
391
Function: Generates a random number between 0 and $size
392
This is abstracted so that someone can override and provide their
393
own special RNG. This is expected to be a uniform RNG.
394
Returns : Floating point random
395
Args : $maximum size for random number (defaults to 1)
401
my ($self,$max) = @_;