2
# -*-Perl-*- (for my emacs)
9
bp_heterogeneity_test - a test for distinguishing between selection and population expansion.
13
heterogenetity_test -mut_1/--mutsyn synonymous_mut_count -mut_2/--mutnon nonsyn_mut_count -s/--smaplesize sample_size [-i/--iterations iterations] [-o/--observed observed_D] [-v/--verbose] [--silent ] [-m/--method tajimaD or fuD] [--precision]
17
This is an implementation of the Heterogenetity test as described in
18
Hahn MW, Rausher MD, and Cunningham CW. 2002. Genetics 161(1):11-20.
22
Options in brackets above are optional
24
-s or --samplesize samplesize
25
-mut_1 or --mutsyn synonymous mutation count
26
-mut_2 or --mutnon nonsynonmous mutation count
27
-i or --iterations number of iterations
28
-o or --observed observed D
29
-m or --method tajimaD or fuD for Tajima's D or Fu and Li's D
30
-v or --verbose print out extra verbose messages
31
--silent Be extra quiet
32
--precision Level of precision - specify the number of digits
35
=head2 AUTHOR Matthew Hahn E<lt>matthew.hahn-at-duke.eduE<gt>
37
For more information contact:
39
Matthew Hahn, E<lt>matthew.hahn-at-duke.eduE<gt>
40
Jason Stajich E<lt>jason-at-bioperl-dot-orgE<gt>
45
use Bio::PopGen::Simulation::Coalescent;
46
use Bio::PopGen::Statistics;
47
use Bio::PopGen::Individual;
48
use Bio::PopGen::Genotype;
51
my $mut_count_1 = 10; # synonymous
52
my $mut_count_2 = 20; # non-synonymous
55
my $observedD = undef;
58
my $precision = '4'; # Let's make the random precision between
59
# 0->1 to 1000th digits
62
's|samplesize|samp_size:i' => \$sample_size,
63
'mut_1|mutsyn:i' => \$mut_count_1,
64
'mut_2|mutnon:i' => \$mut_count_2,
65
'i|iterations:i' => \$iterations,
66
'o|obsered|observedD:f' => \$observedD,
67
'v|verbose' => \$verbose,
68
'm|method:s' => \$method,
70
'silent' => sub { $verbose = -1; },
71
'p|precision:i' => \$precision,
79
if( $method ne 'fuD' and $method ne 'tajimaD' ) {
80
die("available methods are [fu and li's D] (fuD) and Tajima's D (tajimaD)");
83
printf("sample size is %d iteration count = %d\n", $sample_size,
86
my $sim = new Bio::PopGen::Simulation::Coalescent
87
(-sample_size => $sample_size);
89
for(my $iter = 0; $iter < $iterations; $iter++ ) {
90
my $tree = $sim->next_tree($sample_size);
92
if( $mut_count_1 > 0 ) {
93
$sim->add_Mutations($tree,$mut_count_1,$precision);
95
my @leaves = $tree->get_leaf_nodes;
96
# the outgroup is just an individual with the ancestral state
98
my $outgroup = new Bio::PopGen::Individual();
99
foreach my $m ( $leaves[0]->get_marker_names ) {
100
$outgroup->add_Genotype(Bio::PopGen::Genotype->new
104
if( $method eq 'fuD' ) {
105
$f1 = Bio::PopGen::Statistics->fu_and_li_D(\@leaves,[$outgroup]);
106
} elsif( $method eq 'tajimaD' ) {
107
$f1 = Bio::PopGen::Statistics->tajima_D(\@leaves);
109
print "(mutation count = $mut_count_1) D=$f1\n"
114
if( $mut_count_2 > 0 ) {
115
$sim->add_Mutations($tree,$mut_count_2,$precision);
116
my @leaves = $tree->get_leaf_nodes;
117
# the outgroup is just an individual with the ancestral state
119
my $outgroup = new Bio::PopGen::Individual();
120
foreach my $m ( $leaves[0]->get_marker_names ) {
121
$outgroup->add_Genotype(Bio::PopGen::Genotype->new
125
if( $method eq 'fuD' ) {
126
$f2 = Bio::PopGen::Statistics->fu_and_li_D(\@leaves,[$outgroup]);
127
} elsif( $method eq 'tajimaD' ) {
128
$f2 = Bio::PopGen::Statistics->tajima_D(\@leaves);
130
print "(mutation count = $mut_count_2) D=$f2\n" if( $verbose >= 0);
133
my $deltaD = ( $f1 - $f2 );
134
push @D_distribution, $deltaD;
135
if( $iter % 10 == 0 && $iter > 0 ) {
136
print STDERR "iter = $iter\n";
140
if( defined $observedD && $iterations > 1 ) {
141
my @sortedD = sort { $a <=> $b } @D_distribution;
143
for($i = 0; $i < scalar @sortedD; $i++ ) {
144
if( $sortedD[$i] > $observedD ) {
149
printf( "index %d value=%.4f out of %d total (obs=%.4f)\n",
150
$i, $sortedD[$i], scalar @sortedD, $observedD);