3
PhyML: a program that computes maximum likelihood phylogenies from
4
DNA or AA homologous sequences.
6
Copyright (C) Stephane Guindon. Oct 2003 onward.
8
All parts of the source except where indicated are distributed under
9
the GNU public licence. See http://www.opensource.org for details.
16
//////////////////////////////////////////////////////////////
17
//////////////////////////////////////////////////////////////
19
int GEO_Main(int argc, char **argv)
21
GEO_Simulate_Estimate(argc,argv);
22
/* GEO_Estimate(argc,argv); */
26
//////////////////////////////////////////////////////////////
27
//////////////////////////////////////////////////////////////
29
int GEO_Estimate(int argc, char **argv)
49
printf("\n. Seed = %d",seed);
52
t = GEO_Make_Geo_Basic();
53
GEO_Init_Geo_Struct(t);
55
fp = Openfile(argv[1],0); /* Open tree file */
57
tree = Read_Tree_File_Phylip(fp); /* Read it */
58
Update_Ancestors(tree->n_root,tree->n_root->v[2],tree);
59
Update_Ancestors(tree->n_root,tree->n_root->v[1],tree);
60
tree->rates = RATES_Make_Rate_Struct(tree->n_otu);
61
RATES_Init_Rate_Struct(tree->rates,NULL,tree->n_otu);
65
GEO_Read_In_Landscape(argv[2],t,&ldscp,&loc_hash,tree);
67
GEO_Make_Geo_Complete(t->ldscape_sz,t->n_dim,tree->n_otu,t);
69
For(i,t->ldscape_sz*t->n_dim) t->ldscape[i] = ldscp[i];
70
For(i,tree->n_otu) t->loc[i] = loc_hash[i];
72
t->cov[0*t->n_dim+0] = t->sigma;
73
t->cov[1*t->n_dim+1] = t->sigma;
74
t->cov[0*t->n_dim+1] = 0.0;
75
t->cov[1*t->n_dim+0] = 0.0;
79
t->max_sigma = t->sigma_thresh * 2.;
81
PhyML_Printf("\n. Sigma max: %f",t->sigma_thresh);
83
GEO_Get_Locations_Beneath(t,tree);
85
probs = (phydbl *)mCalloc(tree->geo->ldscape_sz,sizeof(phydbl));
88
For(i,tree->geo->ldscape_sz) sum += tree->geo->loc_beneath[tree->n_root->num * tree->geo->ldscape_sz + i];
89
For(i,tree->geo->ldscape_sz) probs[i] = tree->geo->loc_beneath[tree->n_root->num * tree->geo->ldscape_sz + i]/sum;
90
tree->geo->loc[tree->n_root->num] = Sample_i_With_Proba_pi(probs,tree->geo->ldscape_sz);
93
GEO_Randomize_Locations(tree->n_root,t,tree);
96
GEO_Update_Occup(t,tree);
99
PhyML_Printf("\n. Init loglk: %f",tree->geo->c_lnL);
101
DR_Draw_Tree("essai.ps",tree);
112
//////////////////////////////////////////////////////////////
113
//////////////////////////////////////////////////////////////
115
int GEO_Simulate_Estimate(int argc, char **argv)
119
t_tree *tree,*ori_tree;
131
s = (char *)mCalloc(T_MAX_NAME,sizeof(char));
135
sprintf(s+strlen(s),".%d",pid);
142
printf("\n. Seed = %d",seed);
145
t = GEO_Make_Geo_Basic();
146
GEO_Init_Geo_Struct(t);
150
/* t->lbda = 0.02; */
151
/* t->sigma = 10.; */
154
t->ldscape_sz = (int)atoi(argv[1]);
156
n_tax = (int)atoi(argv[2]);
159
GEO_Make_Geo_Complete(t->ldscape_sz,t->n_dim,n_tax,t);
162
t->cov[0*t->n_dim+0] = t->sigma;
163
t->cov[1*t->n_dim+1] = t->sigma;
164
t->cov[0*t->n_dim+1] = 0.0;
165
t->cov[1*t->n_dim+0] = 0.0;
167
GEO_Simulate_Coordinates(t->ldscape_sz,t);
169
GEO_Get_Sigma_Max(t);
171
t->max_sigma = t->sigma_thresh * 2.;
173
PhyML_Printf("\n. sigma max: %f threshold: %f",t->max_sigma,t->sigma_thresh);
175
t->tau = Uni()*(t->max_tau/100.-t->min_tau*10.) + t->min_tau*10.;
176
t->lbda = EXP(Uni()*(LOG(t->max_lbda/100.)-LOG(t->min_lbda*10.)) + LOG(t->min_lbda*10.));
177
t->sigma = Uni()*(t->max_sigma-t->min_sigma) + t->min_sigma;
180
PhyML_Fprintf(fp,"\n# SigmaTrue\t SigmaThresh\t LbdaTrue\t TauTrue\txTrue\t yTrue\t xRand\t yRand\t RF\t Sigma5\t Sigma50\t Sigma95\t ProbSigmaInfThresh\t Lbda5\t Lbda50\t Lbda95\t ProbLbdaInf1\t Tau5\t Tau50\t Tau95\t X5\t X50\t X95\t Y5\t Y50\t Y95\t RandX5\t RandX50\t RandX95\t RandY5\t RandY50\t RandY95\t Mantel\t");
181
PhyML_Fprintf(fp,"\n");
183
tree = GEO_Simulate(t,n_tax);
185
ori_tree = Make_Tree_From_Scratch(tree->n_otu,NULL);
186
Copy_Tree(tree,ori_tree);
188
Random_SPRs_On_Rooted_Tree(tree);
192
Get_Bip(ori_tree->a_nodes[0],ori_tree->a_nodes[0]->v[0],ori_tree);
196
Get_Bip(tree->a_nodes[0],tree->a_nodes[0]->v[0],tree);
197
Match_Tip_Numbers(tree,ori_tree);
199
rf = (phydbl)Compare_Bip(ori_tree,tree,NO)/(tree->n_otu-3);
200
PhyML_Printf("\n. rf: %f",rf);
203
scale = EXP(Rnorm(0,0.2));
204
PhyML_Printf("\n. Scale: %f",scale);
205
For(i,2*tree->n_otu-1) tree->rates->nd_t[i] *= scale;
208
phydbl *tree_dist,*geo_dist;
211
Time_To_Branch(tree);
212
tree_dist = Dist_Btw_Tips(tree);
214
geo_dist = (phydbl *)mCalloc(tree->n_otu*tree->n_otu,sizeof(phydbl));
220
geo_dist[i*tree->n_otu+j] =
221
FABS(t->ldscape[t->loc[i]*t->n_dim+0] -
222
t->ldscape[t->loc[j]*t->n_dim+0])+
223
FABS(t->ldscape[t->loc[i]*t->n_dim+1] -
224
t->ldscape[t->loc[j]*t->n_dim+1]);
229
printf("\n. tau: %f lbda: %f sigma: %f",t->tau,t->lbda,t->sigma);
230
mantel_p_val = Mantel(tree_dist,geo_dist,tree->n_otu,tree->n_otu);
231
printf("\n. Mantel test p-value: %f",mantel_p_val);
236
rand_loc = Rand_Int(0,t->ldscape_sz-1);
238
PhyML_Printf("\n. sigma: %f\t lbda: %f\t tau:%f\t x:%f\t y:%f rand.x:%f\t rand.y:%f\t",
242
t->ldscape[t->loc[tree->n_root->num]*t->n_dim+0],
243
t->ldscape[t->loc[tree->n_root->num]*t->n_dim+1],
244
t->ldscape[rand_loc*t->n_dim+0],
245
t->ldscape[rand_loc*t->n_dim+1]);
247
PhyML_Fprintf(fp,"%f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t",
252
t->ldscape[t->loc[tree->n_root->num]*t->n_dim+0],
253
t->ldscape[t->loc[tree->n_root->num]*t->n_dim+1],
254
t->ldscape[rand_loc*t->n_dim+0],
255
t->ldscape[rand_loc*t->n_dim+1],
258
GEO_Get_Locations_Beneath(t,tree);
260
probs = (phydbl *)mCalloc(tree->geo->ldscape_sz,sizeof(phydbl));
263
For(i,tree->geo->ldscape_sz) sum += tree->geo->loc_beneath[tree->n_root->num * tree->geo->ldscape_sz + i];
264
For(i,tree->geo->ldscape_sz) probs[i] = tree->geo->loc_beneath[tree->n_root->num * tree->geo->ldscape_sz + i]/sum;
265
tree->geo->loc[tree->n_root->num] = Sample_i_With_Proba_pi(probs,tree->geo->ldscape_sz);
267
GEO_Randomize_Locations(tree->n_root,tree->geo,tree);
269
GEO_Update_Occup(t,tree);
271
PhyML_Printf("\n. Init loglk: %f",tree->geo->c_lnL);
274
res = GEO_MCMC(tree);
276
PhyML_Fprintf(fp,"%f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f \n",
278
/* Sigma5 */ Quantile(res+0*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.025),
279
/* Sigma50*/ Quantile(res+0*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.50),
280
/* Sigma95*/ Quantile(res+0*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.975),
282
/* ProbInfThesh */ Prob(res+0*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval,t->sigma_thresh),
284
/* Lbda5 */ Quantile(res+1*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.025),
285
/* Lbda50 */ Quantile(res+1*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.50),
286
/* Lbda95 */ Quantile(res+1*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.975),
288
/* ProbInf1 */ Prob(res+1*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval,1.0),
290
/* Tau5 */ Quantile(res+2*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.025),
291
/* Tau50 */ Quantile(res+2*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.50),
292
/* Tau 95 */ Quantile(res+2*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.975),
294
/* X5 */ Quantile(res+4*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.025),
295
/* X50 */ Quantile(res+4*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.50),
296
/* X95 */ Quantile(res+4*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.975),
298
/* Y5 */ Quantile(res+5*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.025),
299
/* Y50 */ Quantile(res+5*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.50),
300
/* Y95 */ Quantile(res+5*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.975),
302
/* RandX5 */ Quantile(res+6*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.025),
303
/* RandX50*/ Quantile(res+6*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.50),
304
/* RandX95*/ Quantile(res+6*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.975),
306
/* RandY5 */ Quantile(res+7*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.025),
307
/* RandY50*/ Quantile(res+7*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.50),
308
/* RandY95*/ Quantile(res+7*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.975),
322
//////////////////////////////////////////////////////////////
323
//////////////////////////////////////////////////////////////
325
phydbl *GEO_MCMC(t_tree *tree)
334
tree->mcmc = MCMC_Make_MCMC_Struct();
335
MCMC_Complete_MCMC(tree->mcmc,tree);
337
tree->mcmc->io = NULL;
339
tree->mcmc->use_data = YES;
341
tree->mcmc->sample_interval = 1E+3;
342
tree->mcmc->chain_len = 1E+6;
343
tree->mcmc->chain_len_burnin = 1E+5;
344
tree->mcmc->randomize = YES;
345
tree->mcmc->norm_freq = 1E+3;
346
tree->mcmc->max_tune = 1.E+20;
347
tree->mcmc->min_tune = 1.E-10;
348
tree->mcmc->print_every = 2;
349
tree->mcmc->is_burnin = NO;
350
tree->mcmc->nd_t_digits = 1;
356
tree->mcmc->chain_len = 1.E+8;
357
tree->mcmc->sample_interval = 50;
359
MCMC_Complete_MCMC(tree->mcmc,tree);
361
GEO_Update_Occup(t,tree);
364
PhyML_Printf("\n. Init loglk: %f",t->c_lnL);
366
tree->mcmc->start_ess[tree->mcmc->num_move_geo_sigma] = YES;
367
tree->mcmc->start_ess[tree->mcmc->num_move_geo_lambda] = YES;
368
tree->mcmc->start_ess[tree->mcmc->num_move_geo_tau] = YES;
371
res = (phydbl *)mCalloc(tree->mcmc->chain_len / tree->mcmc->sample_interval * n_vars,sizeof(phydbl));
381
t->update_fmat = YES;
382
MCMC_Geo_Sigma(tree);
386
/* t->update_fmat = YES; */
387
/* MCMC_Geo_Updown_Lbda_Sigma(tree); */
388
/* t->update_fmat = NO; */
391
/* MCMC_Geo_Updown_Tau_Lbda(tree); */
392
/* MCMC_Geo_Updown_Tau_Lbda(tree); */
393
/* MCMC_Geo_Updown_Tau_Lbda(tree); */
398
/* For(i,2*tree->n_otu-1) */
400
/* if(tree->a_nodes[i]->tax == NO) */
402
/* printf("%2d ",tree->geo->loc[i]); */
407
if(tree->mcmc->run%tree->mcmc->sample_interval == 0)
409
MCMC_Copy_To_New_Param_Val(tree->mcmc,tree);
411
MCMC_Update_Effective_Sample_Size(tree->mcmc->num_move_geo_lambda,tree->mcmc,tree);
412
MCMC_Update_Effective_Sample_Size(tree->mcmc->num_move_geo_sigma,tree->mcmc,tree);
413
MCMC_Update_Effective_Sample_Size(tree->mcmc->num_move_geo_tau,tree->mcmc,tree);
415
/* printf("\n. lbda:%f,%f tau:%f,%f sigma:%f,%f", */
416
/* tree->mcmc->acc_rate[tree->mcmc->num_move_geo_lambda], */
417
/* tree->mcmc->tune_move[tree->mcmc->num_move_geo_lambda], */
418
/* tree->mcmc->acc_rate[tree->mcmc->num_move_geo_tau], */
419
/* tree->mcmc->tune_move[tree->mcmc->num_move_geo_tau], */
420
/* tree->mcmc->acc_rate[tree->mcmc->num_move_geo_sigma], */
421
/* tree->mcmc->tune_move[tree->mcmc->num_move_geo_sigma]); */
423
PhyML_Printf("\n. Run %6d Sigma: %12f [%4.0f] Lambda: %12f [%4.0f] Tau: %12f [%4.0f] LogLk: %12f x: %12f y:%12f",
427
tree->mcmc->ess[tree->mcmc->num_move_geo_sigma],
430
tree->mcmc->ess[tree->mcmc->num_move_geo_lambda],
433
tree->mcmc->ess[tree->mcmc->num_move_geo_tau],
437
t->ldscape[t->loc[tree->n_root->num]*t->n_dim+0],
438
t->ldscape[t->loc[tree->n_root->num]*t->n_dim+1]);
440
rand_loc = Rand_Int(0,t->ldscape_sz-1);
442
res[0 * tree->mcmc->chain_len / tree->mcmc->sample_interval + tree->mcmc->run / tree->mcmc->sample_interval] = t->sigma;
443
res[1 * tree->mcmc->chain_len / tree->mcmc->sample_interval + tree->mcmc->run / tree->mcmc->sample_interval] = t->lbda;
444
res[2 * tree->mcmc->chain_len / tree->mcmc->sample_interval + tree->mcmc->run / tree->mcmc->sample_interval] = t->tau;
445
res[3 * tree->mcmc->chain_len / tree->mcmc->sample_interval + tree->mcmc->run / tree->mcmc->sample_interval] = t->c_lnL;
447
res[4 * tree->mcmc->chain_len / tree->mcmc->sample_interval + tree->mcmc->run / tree->mcmc->sample_interval] = t->ldscape[t->loc[tree->n_root->num]*t->n_dim+0];
448
res[5 * tree->mcmc->chain_len / tree->mcmc->sample_interval + tree->mcmc->run / tree->mcmc->sample_interval] = t->ldscape[t->loc[tree->n_root->num]*t->n_dim+1];
450
res[6 * tree->mcmc->chain_len / tree->mcmc->sample_interval + tree->mcmc->run / tree->mcmc->sample_interval] = t->ldscape[rand_loc*t->n_dim+0];
451
res[7 * tree->mcmc->chain_len / tree->mcmc->sample_interval + tree->mcmc->run / tree->mcmc->sample_interval] = t->ldscape[rand_loc*t->n_dim+1];
456
if(tree->mcmc->ess[tree->mcmc->num_move_geo_sigma] > 200.) tree->mcmc->adjust_tuning[tree->mcmc->num_move_geo_sigma] = NO;
457
if(tree->mcmc->ess[tree->mcmc->num_move_geo_tau] > 200.) tree->mcmc->adjust_tuning[tree->mcmc->num_move_geo_tau] = NO;
458
if(tree->mcmc->ess[tree->mcmc->num_move_geo_lambda]> 200.) tree->mcmc->adjust_tuning[tree->mcmc->num_move_geo_lambda] = NO;
460
MCMC_Get_Acc_Rates(tree->mcmc);
462
if(tree->mcmc->ess[tree->mcmc->num_move_geo_sigma] > 1000. &&
463
tree->mcmc->ess[tree->mcmc->num_move_geo_tau] > 1000. &&
464
tree->mcmc->ess[tree->mcmc->num_move_geo_lambda]> 1000.) break;
468
while(tree->mcmc->run < tree->mcmc->chain_len);
474
//////////////////////////////////////////////////////////////
475
//////////////////////////////////////////////////////////////
477
t_geo *GEO_Make_Geo_Basic()
480
t = (t_geo *)mCalloc(1,sizeof(t_geo));
484
//////////////////////////////////////////////////////////////
485
//////////////////////////////////////////////////////////////
487
void GEO_Make_Geo_Complete(int ldscape_sz, int n_dim, int n_tax, t_geo *t)
491
t->f_mat = (phydbl *)mCalloc(ldscape_sz*ldscape_sz,sizeof(phydbl));
494
t->r_mat = (phydbl *)mCalloc(ldscape_sz*ldscape_sz,sizeof(phydbl));
496
// Occupation vectors: one vector for each node
497
t->occup = (int *)mCalloc((2*n_tax-1)*ldscape_sz,sizeof(int));
500
t->ldscape = (phydbl *)mCalloc((ldscape_sz*n_dim),sizeof(phydbl));
503
t->loc = (int *)mCalloc((int)(2*n_tax-1),sizeof(int));
505
// Sorted node heights
506
t->sorted_nd = (t_node **)mCalloc((int)(2*n_tax-1),sizeof(t_node *));
509
t->cov = (phydbl *)mCalloc((int)(n_dim*n_dim),sizeof(phydbl));
511
// gives the location occupied beneath each node in the tree
512
t->loc_beneath = (int *)mCalloc((int)(2*n_tax-1)*ldscape_sz,sizeof(int));
515
//////////////////////////////////////////////////////////////
516
//////////////////////////////////////////////////////////////
518
void Free_Geo(t_geo *t)
527
Free(t->loc_beneath);
530
//////////////////////////////////////////////////////////////
531
//////////////////////////////////////////////////////////////
533
// Update F matrix. Assume a diagonal covariance matrix.
534
void GEO_Update_Fmat(t_geo *t)
541
For(i,t->n_dim) t->cov[i*t->n_dim+i] = t->sigma; // Diagonal covariance matrix. Same variance in every direction
543
lognloc = LOG(t->ldscape_sz);
546
for(i=0;i<t->ldscape_sz;i++)
548
loc1 = t->ldscape + i*t->n_dim;
550
for(j=i;j<t->ldscape_sz;j++)
552
loc2 = t->ldscape + j*t->n_dim;
554
t->f_mat[i*t->ldscape_sz+j] = .0;
556
// Calculate log(f(l_i,l_j)) - log(f(l_i,l_i) - log(m)
557
For(k,t->n_dim) t->f_mat[i*t->ldscape_sz+j] += Log_Dnorm(loc2[k],loc1[k],t->cov[k*t->n_dim+k],&err);
558
t->f_mat[i*t->ldscape_sz+j] -= lognloc;
559
For(k,t->n_dim) t->f_mat[i*t->ldscape_sz+j] -= Log_Dnorm(loc1[k],loc1[k],t->cov[k*t->n_dim+k],&err);
561
// Take the exponential
562
t->f_mat[i*t->ldscape_sz+j] = EXP(t->f_mat[i*t->ldscape_sz+j]);
564
// Matrix is symmetric
565
t->f_mat[j*t->ldscape_sz+i] = t->f_mat[i*t->ldscape_sz+j];
567
/* printf("\n. f[%d,%d] = %f (1:[%f;%f] 2:[%f;%f]) sigma=%f",i,j,t->f_mat[i*t->ldscape_sz+j],loc1[0],loc1[1],loc2[0],loc2[1],SQRT(t->cov[0*t->n_dim+0])); */
572
//////////////////////////////////////////////////////////////
573
//////////////////////////////////////////////////////////////
575
// Sort node heights from oldest to youngest age.
576
void GEO_Update_Sorted_Nd(t_geo *t, t_tree *tree)
584
For(i,2*tree->n_otu-1) t->sorted_nd[i] = tree->a_nodes[i];
586
// Bubble sort of the node heights
590
For(i,2*tree->n_otu-2)
592
if(tree->rates->nd_t[t->sorted_nd[i+1]->num] < tree->rates->nd_t[t->sorted_nd[i]->num])
594
buff = t->sorted_nd[i];
595
t->sorted_nd[i] = t->sorted_nd[i+1];
596
t->sorted_nd[i+1] = buff;
605
//////////////////////////////////////////////////////////////
606
//////////////////////////////////////////////////////////////
608
// Update the set of vectors of occupation along the tree
609
void GEO_Update_Occup(t_geo *t, t_tree *tree)
614
GEO_Update_Sorted_Nd(t,tree);
616
For(i,t->ldscape_sz*(2*tree->n_otu-1)) t->occup[i] = 0;
618
t->occup[tree->n_root->num*t->ldscape_sz + t->loc[tree->n_root->num]] = 1;
620
for(i=1;i<2*tree->n_otu-1;i++)
624
t->occup[t->sorted_nd[i]->num*t->ldscape_sz + j] =
625
t->occup[t->sorted_nd[i-1]->num*t->ldscape_sz + j];
629
if(t->sorted_nd[i-1]->tax == NO)
632
if(t->sorted_nd[i-1] == tree->n_root)
634
v1 = tree->n_root->v[1];
635
v2 = tree->n_root->v[2];
641
if(t->sorted_nd[i-1]->v[j] != t->sorted_nd[i-1]->anc &&
642
t->sorted_nd[i-1]->b[j] != tree->e_root)
644
if(!v1) v1 = t->sorted_nd[i-1]->v[j];
645
else v2 = t->sorted_nd[i-1]->v[j];
651
if(t->loc[v1->num] != t->loc[t->sorted_nd[i-1]->num])
653
t->occup[t->sorted_nd[i]->num * t->ldscape_sz + t->loc[v1->num]]++;
657
t->occup[t->sorted_nd[i]->num * t->ldscape_sz + t->loc[v2->num]]++;
663
/* For(i,2*tree->n_otu-1) */
665
/* printf("\n. Node %3d: ",t->sorted_nd[i]->num); */
666
/* For(j,t->ldscape_sz) */
668
/* /\* printf("%3d [%12f;%12f] ", *\/ */
669
/* /\* t->occup[t->sorted_nd[i]->num*t->ldscape_sz + j], *\/ */
670
/* /\* t->ldscape[j*t->n_dim+0],t->ldscape[j*t->n_dim+1]); *\/ */
671
/* /\* printf("%3d ", *\/ */
672
/* /\* t->occup[t->sorted_nd[i]->num*t->ldscape_sz + j]); *\/ */
677
//////////////////////////////////////////////////////////////
678
//////////////////////////////////////////////////////////////
680
// Calculate R mat at node n
681
void GEO_Update_Rmat(t_node *n, t_geo *t, t_tree *tree)
690
lbda_j = ((t->occup[n->num*t->ldscape_sz + j]==0) ? (1.0) : (t->lbda));
691
t->r_mat[i*t->ldscape_sz+j] = t->f_mat[i*t->ldscape_sz+j] * lbda_j * t->tau;
696
//////////////////////////////////////////////////////////////
697
//////////////////////////////////////////////////////////////
699
phydbl GEO_Lk(t_geo *t, t_tree *tree)
704
int dep,arr; // departure and arrival location indices;
705
t_node *curr_n,*prev_n,*v1,*v2;
708
GEO_Update_Occup(t,tree); // Same here.
710
if(t->update_fmat == YES) GEO_Update_Fmat(t);
715
for(i=1;i<tree->n_otu-1;i++) // Consider all the time slices, from oldest to youngest.
716
// Start at first node below root
718
prev_n = t->sorted_nd[i-1]; // node just above
719
curr_n = t->sorted_nd[i]; // current node
721
GEO_Update_Rmat(curr_n,t,tree); // NOTE: don't need to do that every time. Add check later.
723
R = GEO_Total_Migration_Rate(curr_n,t); // Total migration rate calculated at node n
727
/* printf("\n. t0: %f t1: %f R: %f tau: %f sigma: %f lbda: %f x1: %f y1: %f x2: %f y2: %f log0: %d loc1: %d rij: %G fij: %G", */
728
/* tree->rates->nd_t[t->sorted_nd[i-1]->num], */
729
/* tree->rates->nd_t[t->sorted_nd[i]->num], */
734
/* t->ldscape[t->loc[tree->geo->sorted_nd[i-1]->num]*t->n_dim+0], */
735
/* t->ldscape[t->loc[tree->geo->sorted_nd[i-1]->num]*t->n_dim+1], */
736
/* t->ldscape[t->loc[tree->geo->sorted_nd[i]->num]*t->n_dim+0], */
737
/* t->ldscape[t->loc[tree->geo->sorted_nd[i]->num]*t->n_dim+1], */
738
/* t->loc[tree->geo->sorted_nd[i-1]->num], */
739
/* t->loc[tree->geo->sorted_nd[i]->num], */
740
/* t->r_mat[t->loc[tree->geo->sorted_nd[i-1]->num] * t->ldscape_sz + t->loc[tree->geo->sorted_nd[i]->num]], */
741
/* t->f_mat[t->loc[tree->geo->sorted_nd[i-1]->num] * t->ldscape_sz + t->loc[tree->geo->sorted_nd[i]->num]] */
744
/* printf("\n >> "); */
746
/* For(j,t->ldscape_sz) */
747
/* if(t->occup[t->sorted_nd[i]->num * t->ldscape_sz + j] > 0) */
748
/* printf("%f %f -- ", */
749
/* t->ldscape[j*t->n_dim+0], */
750
/* t->ldscape[j*t->n_dim+1]); */
754
/* printf("\n. %d %d (%d) %f %p %p \n",i,curr_n->num,curr_n->tax,tree->rates->nd_t[curr_n->num],curr_n->v[1],curr_n->v[2]); */
758
if(curr_n->v[j] != curr_n->anc && curr_n->b[j] != tree->e_root)
760
if(!v1) v1 = curr_n->v[j];
761
else v2 = curr_n->v[j];
764
dep = t->loc[curr_n->num]; // departure location
765
arr = // arrival location
766
(t->loc[v1->num] == t->loc[curr_n->num] ?
770
/* printf("\n%f\t%f", */
771
/* t->ldscape[arr*t->n_dim+0]-t->ldscape[dep*t->n_dim+0], */
772
/* t->ldscape[arr*t->n_dim+1]-t->ldscape[dep*t->n_dim+1]); */
774
loglk -= R * FABS(tree->rates->nd_t[curr_n->num] - tree->rates->nd_t[prev_n->num]);
775
loglk += LOG(t->r_mat[dep * t->ldscape_sz + arr]);
777
/* printf("\n <> %d %f %f %d %d v1:%d v2:%d anc:%d %d %d %d", */
779
/* R * FABS(tree->rates->nd_t[curr_n->num] - tree->rates->nd_t[prev_n->num]), */
780
/* LOG(t->r_mat[dep * t->ldscape_sz + arr]), */
781
/* dep,arr,v1->num,v2->num,curr_n->anc->num,curr_n->v[0]->num,curr_n->v[1]->num,curr_n->v[2]->num); */
785
/* printf("\n. R = %f r_mat = %f f_mat = %f dt = %f loglk = %f", */
787
/* t->r_mat[dep * t->ldscape_sz + arr], */
788
/* t->f_mat[dep * t->ldscape_sz + arr], */
789
/* FABS(t->sorted_nd[i] - t->sorted_nd[i-1]),loglk); */
796
// Likelihood for the first 'slice' (i.e., the part just below the root down to
798
GEO_Update_Rmat(tree->n_root,t,tree);
800
loglk -= LOG(t->ldscape_sz);
801
dep = t->loc[tree->n_root->num];
803
(t->loc[tree->n_root->num] != t->loc[tree->n_root->v[1]->num] ?
804
t->loc[tree->n_root->v[1]->num] :
805
t->loc[tree->n_root->v[2]->num]);
807
/* printf("\n %f %f",t->ldscape[dep],t->ldscape[arr]); */
809
loglk += LOG(t->r_mat[dep * t->ldscape_sz + arr]);
812
For(i,t->ldscape_sz) sum += t->r_mat[dep * t->ldscape_sz + i];
816
tree->geo->c_lnL = loglk;
821
//////////////////////////////////////////////////////////////
822
//////////////////////////////////////////////////////////////
824
void GEO_Init_Tloc_Tips(t_geo *t, t_tree *tree)
835
//////////////////////////////////////////////////////////////
836
//////////////////////////////////////////////////////////////
838
// Do not forget to call GEO_Update_Rmat (with node n) before calling this function
839
phydbl GEO_Total_Migration_Rate(t_node *n, t_geo *t)
851
t->r_mat[i * t->ldscape_sz + j] *
852
t->occup[n->num * t->ldscape_sz + i];
859
//////////////////////////////////////////////////////////////
860
//////////////////////////////////////////////////////////////
862
// Find the arrival location for the migration leaving from n
863
int GEO_Get_Arrival_Location(t_node *n, t_geo *t, t_tree *tree)
866
t_node *v1, *v2; // the two daughters of n
872
if(n->v[i] && n->v[i] != n->anc)
874
if(!v1) v1 = n->v[i];
879
if(t->loc[v1->num] == t->loc[v2->num]) // Migrated to the same location as that of n
881
if(t->loc[n->num] != t->loc[v1->num])
883
PhyML_Printf("\n== Error detected in location labeling.");
884
PhyML_Printf("\n== Err in file %s at line %d\n",__FILE__,__LINE__);
888
return t->loc[n->num];
890
else // Migrated to a different spot
892
if((t->loc[v1->num] != t->loc[n->num]) && (t->loc[v2->num] != t->loc[n->num]))
894
PhyML_Printf("\n== Error detected in location labeling.");
895
PhyML_Printf("\n== Err in file %s at line %d\n",__FILE__,__LINE__);
900
if(t->loc[v1->num] == t->loc[n->num]) return t->loc[v2->num]; // v2 gets the new location, v1 inheritates from n
901
else return t->loc[v1->num]; // v1 gets the new location, v2 inheritates from n
907
//////////////////////////////////////////////////////////////
908
//////////////////////////////////////////////////////////////
910
t_tree *GEO_Simulate(t_geo *t, int n_otu)
913
int n_branching_nodes;
914
t_node **branching_nodes; // vector of nodes available for branching out
915
phydbl *p_branch; // p_branch[i]: proba that the i-th node in branching_nodes branches out (p_x vector in the article)
916
phydbl *p_mig; // p_branch[i]: proba of migrating to location i from the location of the edge branching out (q_i vector in the article)
923
int *occup; // occupation vector. Updated as we move down the tree
930
tree = Make_Tree_From_Scratch(n_otu,NULL);
931
tree->rates = RATES_Make_Rate_Struct(tree->n_otu);
932
RATES_Init_Rate_Struct(tree->rates,NULL,tree->n_otu);
933
tree->n_root = tree->a_nodes[2*tree->n_otu-2]; // Set the root node to the last element in the list of nodes
937
For(i,2*tree->n_otu-2) tree->rates->nd_t[i] = -1.;
939
occup = (int *)mCalloc(t->ldscape_sz,sizeof(int));
943
branching_nodes = (t_node **)mCalloc(tree->n_otu,sizeof(t_node *));
944
branching_nodes[0] = tree->n_root;
945
n_branching_nodes = 1;
949
p_branch = (phydbl *)mCalloc(tree->n_otu,sizeof(phydbl ));
952
p_mig = (phydbl *)mCalloc(t->ldscape_sz,sizeof(phydbl ));
954
time = 0.0; // Time at the root node (this will be changed afterward)
956
// Sample a location uniformly for the root
957
t->loc[tree->n_root->num] = Rand_Int(0,t->ldscape_sz-1);
959
// Update the occupancy vector
960
occup[t->loc[tree->n_root->num]] = 1;
965
// total migration rate
970
t->f_mat[t->loc[tree->n_root->num]*t->ldscape_sz+i] *
971
((occup[i] == 0) ? (1.0) : (t->lbda)) *
977
// Select the node that branches out
978
hit = Sample_i_With_Proba_pi(p_branch,n_branching_nodes);
980
/* printf("\n. [%d] Select node %d (location %d)",n_branching_nodes,branching_nodes[hit]->num,t->loc[branching_nodes[hit]->num]); */
982
// Set the time for the branching node
983
tree->rates->nd_t[branching_nodes[hit]->num] = time;
986
/* printf("\n. Set its time to %f",time); */
988
// Select the destination location
989
dep = t->loc[branching_nodes[hit]->num]; // Departure point
992
For(i,t->ldscape_sz) // Total rate of migration out of departure point
995
t->f_mat[dep*t->ldscape_sz+i] *
996
((occup[i] == 0) ? (1.0) : (t->lbda)) *
1001
For(i,t->ldscape_sz) p_mig[i] /= sum;
1003
arr = Sample_i_With_Proba_pi(p_mig,t->ldscape_sz);
1005
/* printf("\n. Migrate from %d [%5.2f,%5.2f] to %d [%5.2f,%5.2f]", */
1007
/* t->ldscape[dep*t->n_dim+0], */
1008
/* t->ldscape[dep*t->n_dim+1], */
1010
/* t->ldscape[arr*t->n_dim+0], */
1011
/* t->ldscape[arr*t->n_dim+1]); */
1013
/* printf("\n%f\t%f", */
1014
/* t->ldscape[arr*t->n_dim+0]-t->ldscape[dep*t->n_dim+0], */
1015
/* t->ldscape[arr*t->n_dim+1]-t->ldscape[dep*t->n_dim+1]); */
1018
// Update vector of occupation
1021
/* printf("\n. Remove %d. Add %d and %d",branching_nodes[hit]->num,tree->a_nodes[nd_idx]->num,tree->a_nodes[nd_idx+1]->num); */
1022
// Connect two new nodes to the node undergoing a branching event
1023
tree->a_nodes[nd_idx]->v[0] = branching_nodes[hit];
1024
tree->a_nodes[nd_idx+1]->v[0] = branching_nodes[hit];
1025
branching_nodes[hit]->v[1] = tree->a_nodes[nd_idx];
1026
branching_nodes[hit]->v[2] = tree->a_nodes[nd_idx+1];
1028
// update branching_nodes vector. Element 'hit' is being replaced so that the corresponding node can no longer branch out
1029
branching_nodes[hit] = tree->a_nodes[nd_idx];
1030
branching_nodes[n_branching_nodes] = tree->a_nodes[nd_idx+1];
1032
// Update t_loc vector.
1033
t->loc[tree->a_nodes[nd_idx]->num] = dep;
1034
t->loc[tree->a_nodes[nd_idx+1]->num] = arr;
1036
// Update total migration rate
1038
For(i,t->ldscape_sz)
1042
For(j,t->ldscape_sz)
1046
t->f_mat[i*t->ldscape_sz+j] *
1047
((occup[j] == 0) ? (1.0) : (t->lbda)) *
1053
// Set the time until next branching event
1054
time = time + Rexp(R);
1056
// Update p_branch vector
1057
For(i,n_branching_nodes+1)
1059
dep = t->loc[branching_nodes[i]->num];
1061
For(j,t->ldscape_sz)
1064
t->f_mat[dep*t->ldscape_sz+j] *
1065
((occup[j] == 0) ? (1.0) : (t->lbda)) *
1068
/* printf("\n. %f %f %f %f", */
1070
/* t->f_mat[dep*t->ldscape_sz+j], */
1071
/* ((occup[j]>0) ? (t->lbda) : (1.0)), */
1074
/* printf("\n. %f ",p_branch[i]); */
1078
// Increase the number of branching nodes by one (you just added 2 new and removed 1 old)
1079
n_branching_nodes++;
1083
while(n_branching_nodes < n_otu);
1086
// Set the times at the tips
1087
For(i,2*tree->n_otu-1) if(tree->rates->nd_t[i] < 0.0) tree->rates->nd_t[i] = time;
1089
// Reverse time scale
1090
For(i,2*tree->n_otu-1) tree->rates->nd_t[i] -= time;
1091
/* For(i,2*tree->n_otu-1) tree->rates->nd_t[i] = FABS(tree->rates->nd_t[i]); */
1093
// Bubble sort to put all the tips at the top of the tree->a_nodes array
1097
For(i,2*tree->n_otu-2)
1099
if(!tree->a_nodes[i+1]->v[1] && tree->a_nodes[i]->v[1])
1101
buff_nd = tree->a_nodes[i+1];
1102
tree->a_nodes[i+1] = tree->a_nodes[i];
1103
tree->a_nodes[i] = buff_nd;
1105
buff_t = tree->rates->nd_t[i+1];
1106
tree->rates->nd_t[i+1] = tree->rates->nd_t[i];
1107
tree->rates->nd_t[i] = buff_t;
1109
buff_l = t->loc[i+1];
1110
t->loc[i+1] = t->loc[i];
1120
// The rest below is just bookeeping...
1123
For(i,2*tree->n_otu-1) tree->a_nodes[i]->num = i;
1124
For(i,2*tree->n_otu-1)
1126
if(i < tree->n_otu) tree->a_nodes[i]->tax = YES;
1127
else tree->a_nodes[i]->tax = NO;
1130
/* printf("\n++++++++++++++++++\n"); */
1131
/* For(i,2*tree->n_otu-1) */
1133
/* printf("\n. Node %3d [%p] anc:%3d v1:%3d v2:%3d time: %f", */
1134
/* tree->a_nodes[i]->num, */
1135
/* (void *)tree->a_nodes[i], */
1136
/* tree->a_nodes[i]->v[0] ? tree->a_nodes[i]->v[0]->num : -1, */
1137
/* tree->a_nodes[i]->v[1] ? tree->a_nodes[i]->v[1]->num : -1, */
1138
/* tree->a_nodes[i]->v[2] ? tree->a_nodes[i]->v[2]->num : -1, */
1139
/* tree->rates->nd_t[i]); */
1145
if(!tree->a_nodes[i]->name) tree->a_nodes[i]->name = (char *)mCalloc(T_MAX_NAME,sizeof(char));
1146
strcpy(tree->a_nodes[i]->name,"x");
1147
sprintf(tree->a_nodes[i]->name+1,"%d",i);
1150
tree->n_root->v[1]->v[0] = tree->n_root->v[2];
1151
tree->n_root->v[2]->v[0] = tree->n_root->v[1];
1153
tree->num_curr_branch_available = 0;
1154
Connect_Edges_To_Nodes_Recur(tree->a_nodes[0],tree->a_nodes[0]->v[0],tree);
1156
tree->e_root = tree->n_root->v[1]->b[0];
1158
For(i,2*tree->n_otu-3)
1160
tree->a_edges[i]->l->v = FABS(tree->rates->nd_t[tree->a_edges[i]->left->num] -
1161
tree->rates->nd_t[tree->a_edges[i]->rght->num]);
1164
tree->e_root->l->v =
1165
FABS(tree->rates->nd_t[tree->n_root->v[1]->num] -
1166
tree->rates->nd_t[tree->n_root->num]) +
1167
FABS(tree->rates->nd_t[tree->n_root->v[2]->num] -
1168
tree->rates->nd_t[tree->n_root->num]);
1170
tree->n_root->l[1] = FABS(tree->rates->nd_t[tree->n_root->v[1]->num] -
1171
tree->rates->nd_t[tree->n_root->num]);
1173
tree->n_root->l[2] = FABS(tree->rates->nd_t[tree->n_root->v[2]->num] -
1174
tree->rates->nd_t[tree->n_root->num]);
1177
FABS(tree->rates->nd_t[tree->n_root->v[2]->num] -
1178
tree->rates->nd_t[tree->n_root->num]) / tree->e_root->l->v;
1180
/* printf("\n. %s ",Write_Tree(tree,NO)); */
1182
DR_Draw_Tree("essai.ps",tree);
1184
/* For(i,tree->n_otu) */
1185
/* printf("\n. %4s %4d [%5.2f %5.2f]",tree->a_nodes[i]->name, */
1187
/* t->ldscape[t->loc[i]*t->n_dim+0], */
1188
/* t->ldscape[t->loc[i]*t->n_dim+1]); */
1191
Update_Ancestors(tree->n_root,tree->n_root->v[2],tree);
1192
Update_Ancestors(tree->n_root,tree->n_root->v[1],tree);
1194
Free(branching_nodes);
1201
//////////////////////////////////////////////////////////////
1202
//////////////////////////////////////////////////////////////
1204
// Simualte n coordinates (in 2D)
1205
void GEO_Simulate_Coordinates(int n, t_geo *t)
1214
t->ldscape[i*t->n_dim+0] = -width/2. + Uni()*width;
1215
t->ldscape[i*t->n_dim+1] = -width/2. + Uni()*width;
1219
/* t->ldscape[0*t->n_dim+0] = 0.0; */
1220
/* t->ldscape[0*t->n_dim+1] = 0.0; */
1222
/* t->ldscape[1*t->n_dim+0] = 0.1; */
1223
/* t->ldscape[1*t->n_dim+1] = 0.1; */
1227
//////////////////////////////////////////////////////////////
1228
//////////////////////////////////////////////////////////////
1230
void GEO_Optimize_Sigma(t_geo *t, t_tree *tree)
1232
Generic_Brent_Lk(&(t->sigma),
1238
GEO_Wrap_Lk,NULL,tree,NULL);
1241
//////////////////////////////////////////////////////////////
1242
//////////////////////////////////////////////////////////////
1244
void GEO_Optimize_Lambda(t_geo *t, t_tree *tree)
1246
Generic_Brent_Lk(&(t->lbda),
1252
GEO_Wrap_Lk,NULL,tree,NULL);
1255
//////////////////////////////////////////////////////////////
1256
//////////////////////////////////////////////////////////////
1258
void GEO_Optimize_Tau(t_geo *t, t_tree *tree)
1260
Generic_Brent_Lk(&(t->tau),
1266
GEO_Wrap_Lk,NULL,tree,NULL);
1269
//////////////////////////////////////////////////////////////
1270
//////////////////////////////////////////////////////////////
1272
phydbl GEO_Wrap_Lk(t_edge *b, t_tree *tree, supert_tree *stree)
1274
return GEO_Lk(tree->geo,tree);
1277
//////////////////////////////////////////////////////////////
1278
//////////////////////////////////////////////////////////////
1280
void GEO_Init_Geo_Struct(t_geo *t)
1282
t->c_lnL = UNLIKELY;
1285
t->min_sigma = 1.E-3;
1286
t->max_sigma = 1.E+3;
1287
t->sigma_thresh = t->max_sigma;
1290
t->min_lbda = 1.E-3;
1291
t->max_lbda = 1.E+3;
1302
t->update_fmat = YES;
1305
//////////////////////////////////////////////////////////////
1306
//////////////////////////////////////////////////////////////
1308
void GEO_Randomize_Locations(t_node *n, t_geo *t, t_tree *tree)
1310
if(n->tax == YES) return;
1315
phydbl *probs; // vector of probability of picking each location
1320
probs = (phydbl *)mCalloc(t->ldscape_sz,sizeof(phydbl));
1325
if(n->v[i] != n->anc && n->b[i] != tree->e_root)
1327
if(!v1) v1 = n->v[i];
1332
if(v1->tax && v2->tax)
1337
else if(v1->tax && !v2->tax && t->loc[v1->num] != t->loc[n->num])
1339
t->loc[v2->num] = t->loc[n->num];
1341
else if(v2->tax && !v1->tax && t->loc[v2->num] != t->loc[n->num])
1343
t->loc[v1->num] = t->loc[n->num];
1345
else if(v1->tax && !v2->tax && t->loc[v1->num] == t->loc[n->num])
1348
For(i,t->ldscape_sz) sum += t->loc_beneath[v2->num * t->ldscape_sz + i];
1349
For(i,t->ldscape_sz) probs[i] = t->loc_beneath[v2->num * t->ldscape_sz + i]/sum;
1351
t->loc[v2->num] = Sample_i_With_Proba_pi(probs,t->ldscape_sz);
1353
else if(v2->tax && !v1->tax && t->loc[v2->num] == t->loc[n->num])
1356
For(i,t->ldscape_sz) sum += t->loc_beneath[v1->num * t->ldscape_sz + i];
1357
For(i,t->ldscape_sz) probs[i] = t->loc_beneath[v1->num * t->ldscape_sz + i]/sum;
1359
t->loc[v1->num] = Sample_i_With_Proba_pi(probs,t->ldscape_sz);
1366
n_v1 = t->loc_beneath[v1->num * t->ldscape_sz + t->loc[n->num]];
1367
n_v2 = t->loc_beneath[v2->num * t->ldscape_sz + t->loc[n->num]];
1371
PhyML_Printf("\n== Err in file %s at line %d\n",__FILE__,__LINE__);
1376
p = n_v1 / (n_v1 + n_v2);
1383
For(i,t->ldscape_sz) sum += t->loc_beneath[v2->num * t->ldscape_sz + i];
1384
For(i,t->ldscape_sz) probs[i] = t->loc_beneath[v2->num * t->ldscape_sz + i]/sum;
1386
t->loc[v2->num] = Sample_i_With_Proba_pi(probs,t->ldscape_sz);
1387
t->loc[v1->num] = t->loc[n->num];
1391
if(t->loc_beneath[v2->num * t->ldscape_sz + t->loc[n->num]] > 0)
1394
For(i,t->ldscape_sz) sum += t->loc_beneath[v1->num * t->ldscape_sz + i];
1395
For(i,t->ldscape_sz) probs[i] = t->loc_beneath[v1->num * t->ldscape_sz + i]/sum;
1397
t->loc[v1->num] = Sample_i_With_Proba_pi(probs,t->ldscape_sz);
1398
t->loc[v2->num] = t->loc[n->num];
1403
For(i,t->ldscape_sz) sum += t->loc_beneath[v2->num * t->ldscape_sz + i];
1404
For(i,t->ldscape_sz) probs[i] = t->loc_beneath[v2->num * t->ldscape_sz + i]/sum;
1406
t->loc[v2->num] = Sample_i_With_Proba_pi(probs,t->ldscape_sz);
1407
t->loc[v1->num] = t->loc[n->num];
1411
if(t->loc[v1->num] != t->loc[n->num] && t->loc[v2->num] != t->loc[n->num])
1413
PhyML_Printf("\n. %d %d %d",t->loc[v1->num],t->loc[v2->num],t->loc[n->num]);
1414
PhyML_Printf("\n== Err in file %s at line %d\n",__FILE__,__LINE__);
1418
if(t->loc_beneath[v1->num * t->ldscape_sz + t->loc[v1->num]] < 1)
1420
PhyML_Printf("\n. %d %d %d",t->loc[v1->num],t->loc[v2->num],t->loc[n->num]);
1421
PhyML_Printf("\n== Err in file %s at line %d\n",__FILE__,__LINE__);
1425
if(t->loc_beneath[v2->num * t->ldscape_sz + t->loc[v2->num]] < 1)
1427
PhyML_Printf("\n. %d %d %d",t->loc[v1->num],t->loc[v2->num],t->loc[n->num]);
1428
PhyML_Printf("\n== Err in file %s at line %d\n",__FILE__,__LINE__);
1437
if(n->v[i] != n->anc && n->b[i] != tree->e_root)
1438
GEO_Randomize_Locations(n->v[i],t,tree);
1443
//////////////////////////////////////////////////////////////
1444
//////////////////////////////////////////////////////////////
1446
void GEO_Get_Locations_Beneath(t_geo *t, t_tree *tree)
1449
GEO_Get_Locations_Beneath_Post(tree->n_root,tree->n_root->v[1],t,tree);
1450
GEO_Get_Locations_Beneath_Post(tree->n_root,tree->n_root->v[2],t,tree);
1452
For(i,t->ldscape_sz)
1454
t->loc_beneath[tree->n_root->num*t->ldscape_sz+i] =
1455
t->loc_beneath[tree->n_root->v[1]->num*t->ldscape_sz+i] +
1456
t->loc_beneath[tree->n_root->v[2]->num*t->ldscape_sz+i];
1462
//////////////////////////////////////////////////////////////
1463
//////////////////////////////////////////////////////////////
1465
void GEO_Get_Locations_Beneath_Post(t_node *a, t_node *d, t_geo *t, t_tree *tree)
1470
t->loc_beneath[d->num*t->ldscape_sz+t->loc[d->num]] = 1;
1480
if(d->v[i] != a && d->b[i] != tree->e_root)
1482
GEO_Get_Locations_Beneath_Post(d,d->v[i],t,tree);
1490
if(d->v[i] != a && d->b[i] != tree->e_root)
1492
if(!v1) v1 = d->v[i];
1498
For(i,t->ldscape_sz)
1500
t->loc_beneath[ d->num*t->ldscape_sz+i] =
1501
t->loc_beneath[v1->num*t->ldscape_sz+i] +
1502
t->loc_beneath[v2->num*t->ldscape_sz+i] ;
1507
//////////////////////////////////////////////////////////////
1508
//////////////////////////////////////////////////////////////
1510
void GEO_Get_Sigma_Max(t_geo *t)
1513
phydbl mean_dist,inv_mean_dist;
1514
phydbl sigma_a, sigma_b, sigma_c;
1515
phydbl overlap_a, overlap_b, overlap_c;
1517
phydbl overlap_target;
1519
int n_iter,n_iter_max;
1522
overlap_target = 0.95;
1526
inv_mean_dist = -1.;
1527
For(i,t->ldscape_sz-1)
1529
for(j=i+1;j<t->ldscape_sz;j++)
1531
/* dist = POW(t->ldscape[i*t->n_dim+0] - t->ldscape[j*t->n_dim+0],1); */
1532
/* if(dist > mean_dist) mean_dist = dist; */
1533
/* dist = POW(t->ldscape[i*t->n_dim+1] - t->ldscape[j*t->n_dim+1],1); */
1534
/* if(dist > mean_dist) mean_dist = dist; */
1535
mean_dist += FABS(t->ldscape[i*t->n_dim+0] - t->ldscape[j*t->n_dim+0]);
1536
mean_dist += FABS(t->ldscape[i*t->n_dim+1] - t->ldscape[j*t->n_dim+1]);
1540
mean_dist /= t->ldscape_sz*(t->ldscape_sz-1)/2.;
1541
inv_mean_dist = 1./mean_dist;
1544
PhyML_Printf("\n. mean_dist = %f",mean_dist);
1546
sigma_a = t->min_sigma; sigma_b = 1.0; sigma_c = t->max_sigma;
1547
/* sigma_a = t->min_sigma; sigma_b = 1.0; sigma_c = 10.; */
1551
d_intersect = Inverse_Truncated_Normal(inv_mean_dist,0.0,sigma_a,0.0,mean_dist);
1553
(Pnorm(mean_dist,0.0,sigma_a) - Pnorm(d_intersect,0.0,sigma_a))/
1554
(Pnorm(mean_dist,0.0,sigma_a) - Pnorm(0.0,0.0,sigma_a)) +
1555
d_intersect / mean_dist;
1556
/* printf("\n. inter: %f %f [%f]",d_intersect,mean_dist,d_intersect / mean_dist); */
1558
d_intersect = Inverse_Truncated_Normal(inv_mean_dist,0.0,sigma_b,0.0,mean_dist);
1560
(Pnorm(mean_dist,0.0,sigma_b) - Pnorm(d_intersect,0.0,sigma_b))/
1561
(Pnorm(mean_dist,0.0,sigma_b) - Pnorm(0.0,0.0,sigma_b)) +
1562
d_intersect / mean_dist;
1563
/* printf("\n. inter: %f %f [%f]",d_intersect,mean_dist,d_intersect / mean_dist); */
1565
d_intersect = Inverse_Truncated_Normal(inv_mean_dist,0.0,sigma_c,0.0,mean_dist);
1567
(Pnorm(mean_dist,0.0,sigma_c) - Pnorm(d_intersect,0.0,sigma_c))/
1568
(Pnorm(mean_dist,0.0,sigma_c) - Pnorm(0.0,0.0,sigma_c)) +
1569
d_intersect / mean_dist;
1571
/* printf("\n. inter: %f %f [%f]",d_intersect,mean_dist,d_intersect / mean_dist); */
1573
/* printf("\n. sigma_a:%f overlap_a:%f sigma_b:%f overlap_b:%f sigma_c:%f overlap_c:%f", */
1574
/* sigma_a,overlap_a, */
1575
/* sigma_b,overlap_b, */
1576
/* sigma_c,overlap_c); */
1578
if(overlap_target > overlap_a && overlap_target < overlap_b)
1581
sigma_b = sigma_a + (sigma_c - sigma_a)/2.;
1583
else if(overlap_target > overlap_b && overlap_target < overlap_c)
1586
sigma_b = sigma_a + (sigma_c - sigma_a)/2.;
1588
else if(overlap_target < overlap_a)
1592
else if(overlap_target > overlap_c)
1600
while(sigma_c - sigma_a > eps && n_iter < n_iter_max);
1602
/* if(sigma_c - sigma_a > eps) */
1604
/* PhyML_Printf("\n== Error detected in getting maximum value of sigma."); */
1605
/* PhyML_Printf("\n== Err in file %s at line %d\n",__FILE__,__LINE__); */
1610
/* PhyML_Printf("\n== Threshold for sigma: %f",sigma_b); */
1613
t->sigma_thresh = sigma_b;
1616
//////////////////////////////////////////////////////////////
1617
//////////////////////////////////////////////////////////////
1619
void MCMC_Geo_Updown_Tau_Lbda(t_tree *tree)
1621
phydbl K,mult,u,alpha,ratio;
1622
phydbl cur_lnL,new_lnL;
1623
phydbl cur_tau,new_tau;
1624
phydbl cur_lbda,new_lbda;
1626
K = tree->mcmc->tune_move[tree->mcmc->num_move_geo_updown_tau_lbda];
1627
cur_lnL = tree->geo->c_lnL;
1628
new_lnL = tree->geo->c_lnL;
1629
cur_tau = tree->geo->tau;
1630
new_tau = tree->geo->tau;
1631
cur_lbda = tree->geo->lbda;
1632
new_lbda = tree->geo->lbda;
1635
mult = EXP(K*(u-0.5));
1637
/* Multiply tau by K */
1638
new_tau = cur_tau * K;
1640
/* Divide lbda by same amount */
1641
new_lbda = cur_lbda / K;
1645
new_lbda < tree->geo->min_lbda || new_lbda > tree->geo->max_lbda ||
1646
new_tau < tree->geo->min_tau || new_tau > tree->geo->max_tau
1649
tree->mcmc->run_move[tree->mcmc->num_move_geo_updown_tau_lbda]++;
1653
tree->geo->tau = new_tau;
1654
tree->geo->lbda = new_lbda;
1656
if(tree->mcmc->use_data) new_lnL = GEO_Lk(tree->geo,tree);
1659
/* Proposal ratio: 2n-2=> number of multiplications, 1=>number of divisions */
1660
ratio += 0.0*LOG(mult); /* (1-1)*LOG(mult); */
1661
/* Likelihood density ratio */
1662
ratio += (new_lnL - cur_lnL);
1664
/* printf("\n. new_tau: %f new_lbda:%f cur_lnL:%f new_lnL:%f",new_tau,new_lbda,cur_lnL,new_lnL); */
1668
alpha = MIN(1.,ratio);
1671
if(u > alpha) /* Reject */
1673
tree->geo->tau = cur_tau;
1674
tree->geo->lbda = cur_lbda;
1675
tree->geo->c_lnL = cur_lnL;
1679
tree->mcmc->acc_move[tree->mcmc->num_move_geo_updown_tau_lbda]++;
1682
tree->mcmc->run_move[tree->mcmc->num_move_geo_updown_tau_lbda]++;
1685
//////////////////////////////////////////////////////////////
1686
//////////////////////////////////////////////////////////////
1689
void MCMC_Geo_Updown_Lbda_Sigma(t_tree *tree)
1691
phydbl K,mult,u,alpha,ratio;
1692
phydbl cur_lnL,new_lnL;
1693
phydbl cur_lbda,new_lbda;
1694
phydbl cur_sigma,new_sigma;
1696
K = tree->mcmc->tune_move[tree->mcmc->num_move_geo_updown_lbda_sigma];
1697
cur_lnL = tree->geo->c_lnL;
1698
new_lnL = tree->geo->c_lnL;
1699
cur_lbda = tree->geo->lbda;
1700
new_lbda = tree->geo->lbda;
1701
cur_sigma = tree->geo->sigma;
1702
new_sigma = tree->geo->sigma;
1705
mult = EXP(K*(u-0.5));
1707
/* Multiply lbda by K */
1708
new_lbda = cur_lbda * K;
1710
/* Divide sigma by same amount */
1711
new_sigma = cur_sigma / K;
1715
new_sigma < tree->geo->min_sigma || new_sigma > tree->geo->max_sigma ||
1716
new_lbda < tree->geo->min_lbda || new_lbda > tree->geo->max_lbda
1719
tree->mcmc->run_move[tree->mcmc->num_move_geo_updown_lbda_sigma]++;
1723
tree->geo->lbda = new_lbda;
1724
tree->geo->sigma = new_sigma;
1726
if(tree->mcmc->use_data) new_lnL = GEO_Lk(tree->geo,tree);
1729
/* Proposal ratio: 2n-2=> number of multiplications, 1=>number of divisions */
1730
ratio += 0.0*LOG(mult); /* (1-1)*LOG(mult); */
1731
/* Likelihood density ratio */
1732
ratio += (new_lnL - cur_lnL);
1734
/* printf("\n. new_lbda: %f new_sigma:%f cur_lnL:%f new_lnL:%f",new_lbda,new_sigma,cur_lnL,new_lnL); */
1738
alpha = MIN(1.,ratio);
1741
if(u > alpha) /* Reject */
1743
tree->geo->lbda = cur_lbda;
1744
tree->geo->sigma = cur_sigma;
1745
tree->geo->c_lnL = cur_lnL;
1749
tree->mcmc->acc_move[tree->mcmc->num_move_geo_updown_lbda_sigma]++;
1752
tree->mcmc->run_move[tree->mcmc->num_move_geo_updown_lbda_sigma]++;
1755
//////////////////////////////////////////////////////////////
1756
//////////////////////////////////////////////////////////////
1758
void GEO_Read_In_Landscape(char *file_name, t_geo *t, phydbl **ldscape, int **loc_hash, t_tree *tree)
1762
phydbl longitude, lattitude;
1763
int i, pos, tax,loc;
1766
PhyML_Printf("\n. Reading landscape file '%s'.\n",file_name);
1768
line = (char *)mCalloc(T_MAX_LINE,sizeof(char));
1769
s = (char *)mCalloc(T_MAX_LINE,sizeof(char));
1770
(*ldscape) = (phydbl *)mCalloc(10*t->n_dim,sizeof(phydbl));
1771
(*loc_hash) = (int *)mCalloc(tree->n_otu,sizeof(int));
1773
fp = Openfile(file_name,0);
1781
if(!fgets(line,T_MAX_LINE,fp)) break;
1783
// Read in taxon name
1787
while(line[pos] == ' ') pos++;
1791
while((line[pos] != ' ') && (line[pos] != '\n') && (line[pos] != '\t'))
1799
if(line[pos] == '\n' || line[pos] == ' ') break;
1803
if(strlen(s) > 0) For(tax,tree->n_otu) if(!strcmp(tree->a_nodes[tax]->name,s)) break;
1805
if(tax == tree->n_otu)
1807
PhyML_Printf("\n== Could not find a taxon with name '%s' in the tree provided.",s);
1808
/* PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__); */
1814
sscanf(line+pos,"%lf %lf",&longitude,&lattitude);
1816
For(loc,t->ldscape_sz)
1818
if(FABS(longitude-(*ldscape)[loc*t->n_dim+0]) < 1.E-10 &&
1819
FABS(lattitude-(*ldscape)[loc*t->n_dim+1]) < 1.E-10)
1825
if(loc == t->ldscape_sz)
1828
(*ldscape)[(t->ldscape_sz-1)*t->n_dim+0] = longitude;
1829
(*ldscape)[(t->ldscape_sz-1)*t->n_dim+1] = lattitude;
1831
printf("\n. new loc: %f %f",longitude,lattitude);
1832
if(!(t->ldscape_sz%10))
1834
(*ldscape) = (phydbl *)mRealloc((*ldscape),(t->ldscape_sz+10)*t->n_dim,sizeof(phydbl));
1838
(*loc_hash)[tax] = loc;
1843
For(tax,tree->n_otu)
1844
PhyML_Printf("\n. Taxon %30s, longitude: %12f, lattitude: %12f [%4d]",
1845
tree->a_nodes[tax]->name,
1846
(*ldscape)[(*loc_hash)[tax]*t->n_dim+0],
1847
(*ldscape)[(*loc_hash)[tax]*t->n_dim+1],
1853
//////////////////////////////////////////////////////////////
1854
//////////////////////////////////////////////////////////////
1859
//////////////////////////////////////////////////////////////
1860
//////////////////////////////////////////////////////////////
1863
//////////////////////////////////////////////////////////////
1864
//////////////////////////////////////////////////////////////