~ubuntu-branches/debian/jessie/arb/jessie

« back to all changes in this revision

Viewing changes to GDE/PHYML20130708/phyml/src/geo.c

  • Committer: Package Import Robot
  • Author(s): Elmar Pruesse, Andreas Tille, Elmar Pruesse
  • Date: 2014-09-02 15:15:06 UTC
  • mfrom: (1.1.6)
  • Revision ID: package-import@ubuntu.com-20140902151506-jihq58b3iz342wif
Tags: 6.0.2-1
[ Andreas Tille ]
* New upstream version
  Closes: #741890
* debian/upstream -> debian/upstream/metadata
* debian/control:
   - Build-Depends: added libglib2.0-dev
   - Depends: added mafft, mrbayes
* debian/rules
   - Add explicite --remove-section=.comment option to manual strip call
* cme fix dpkg-control
* arb-common.dirs: Do not create unneeded lintian dir
* Add turkish debconf translation (thanks for the patch to Mert Dirik
  <mertdirik@gmail.com>)
  Closes: #757497

[ Elmar Pruesse ]
* patches removed:
   - 10_config.makefiles.patch,
     80_no_GL.patch
       removed in favor of creating file from config.makefile.template via 
       sed in debian/control
   - 20_Makefile_main.patch
       merged upstream
   - 21_Makefiles.patch
       no longer needed
   - 30_tmpfile_CVE-2008-5378.patch: 
       merged upstream
   - 50_fix_gcc-4.8.patch:
       merged upstream
   - 40_add_libGLU.patch:
       libGLU not needed for arb_ntree)
   - 60_use_debian_packaged_raxml.patch:
       merged upstream
   - 70_hardening.patch
       merged upstream
   - 72_add_math_lib_to_linker.patch
       does not appear to be needed
* patches added:
   - 10_upstream_r12793__show_db_load_progress:
       backported patch showing progress while ARB is loading a database
       (needed as indicator/splash screen while ARB is launching)
   - 20_upstream_r12794__socket_permissions:
       backported security fix
   - 30_upstream_r12814__desktop_keywords:
       backported add keywords to desktop (fixes lintian warning)
   - 40_upstream_r12815__lintian_spelling:
       backported fix for lintian reported spelling errors
   - 50_private_nameservers
       change configuration to put nameservers into users home dirs
       (avoids need for shared writeable directory)
   - 60_use_debian_phyml
       use phyml from debian package for both interfaces in ARB
* debian/rules:
   - create config.makefile from override_dh_configure target
   - use "make tarfile" in override_dh_install
   - remove extra cleaning not needed for ARB 6
   - use "dh_install --list-missing" to avoid missing files
   - added override_dh_fixperms target
* debian/control:
   - added libarb-dev package
   - Depends: added phyml, xdg-utils
   - Suggests: removed phyml
   - fix lintian duplicate-short-description (new descriptions)
* debian/*.install:
   - "unrolled" confusing globbing to select files
   - pick files from debian/tmp
   - moved all config files to /etc/arb
* debian/arb-common.templates: updated
* scripts:
   - removed arb-add-pt-server
   - launch-wrapper: 
     - only add demo.arb to newly created $ARBUSERDATA
     - pass commandline arguments through bin/arb wrapper
   - preinst: removing old PT server index files on upgrade from 5.5*
   - postinst: set setgid on shared PT dir
* rewrote arb.1 manfile
* added file icon for ARB databases
* using upstream arb_tcp.dat

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*
 
2
 
 
3
PhyML:  a program that  computes maximum likelihood phylogenies from
 
4
DNA or AA homologous sequences.
 
5
 
 
6
Copyright (C) Stephane Guindon. Oct 2003 onward.
 
7
 
 
8
All parts of the source except where indicated are distributed under
 
9
the GNU public licence. See http://www.opensource.org for details.
 
10
 
 
11
*/
 
12
 
 
13
 
 
14
#include "geo.h"
 
15
 
 
16
//////////////////////////////////////////////////////////////
 
17
//////////////////////////////////////////////////////////////
 
18
 
 
19
int GEO_Main(int argc, char **argv)
 
20
{
 
21
  GEO_Simulate_Estimate(argc,argv);
 
22
  /* GEO_Estimate(argc,argv); */
 
23
  return(1);
 
24
}
 
25
 
 
26
//////////////////////////////////////////////////////////////
 
27
//////////////////////////////////////////////////////////////
 
28
 
 
29
int GEO_Estimate(int argc, char **argv)
 
30
{
 
31
  t_geo *t;
 
32
  int seed;
 
33
  FILE *fp;
 
34
  t_tree *tree;
 
35
  phydbl *ldscp;
 
36
  int *loc_hash;
 
37
  int i;
 
38
  phydbl *probs;
 
39
  phydbl sum;
 
40
 
 
41
  // geo ./ban
 
42
 
 
43
 
 
44
  seed = getpid();
 
45
  /* seed = 28224; */
 
46
  /* seed = 21249; */
 
47
  /* seed = 21596; */
 
48
  
 
49
  printf("\n. Seed = %d",seed);
 
50
  srand(seed);
 
51
 
 
52
  t = GEO_Make_Geo_Basic();
 
53
  GEO_Init_Geo_Struct(t);
 
54
 
 
55
  fp = Openfile(argv[1],0); /* Open tree file  */
 
56
 
 
57
  tree = Read_Tree_File_Phylip(fp); /* Read it */
 
58
  Update_Ancestors(tree->n_root,tree->n_root->v[2],tree);
 
59
  Update_Ancestors(tree->n_root,tree->n_root->v[1],tree);               
 
60
  tree->rates = RATES_Make_Rate_Struct(tree->n_otu);
 
61
  RATES_Init_Rate_Struct(tree->rates,NULL,tree->n_otu);
 
62
  Branch_To_Time(tree);
 
63
  tree->geo = t;
 
64
 
 
65
  GEO_Read_In_Landscape(argv[2],t,&ldscp,&loc_hash,tree);
 
66
  
 
67
  GEO_Make_Geo_Complete(t->ldscape_sz,t->n_dim,tree->n_otu,t);
 
68
    
 
69
  For(i,t->ldscape_sz*t->n_dim) t->ldscape[i] = ldscp[i];
 
70
  For(i,tree->n_otu) t->loc[i] = loc_hash[i];
 
71
 
 
72
  t->cov[0*t->n_dim+0] = t->sigma;
 
73
  t->cov[1*t->n_dim+1] = t->sigma;
 
74
  t->cov[0*t->n_dim+1] = 0.0;
 
75
  t->cov[1*t->n_dim+0] = 0.0;
 
76
 
 
77
  GEO_Get_Sigma_Max(t);
 
78
 
 
79
  t->max_sigma = t->sigma_thresh * 2.;
 
80
 
 
81
  PhyML_Printf("\n. Sigma max: %f",t->sigma_thresh);
 
82
 
 
83
  GEO_Get_Locations_Beneath(t,tree);
 
84
 
 
85
  probs = (phydbl *)mCalloc(tree->geo->ldscape_sz,sizeof(phydbl));
 
86
  
 
87
  sum = 0.0;
 
88
  For(i,tree->geo->ldscape_sz) sum += tree->geo->loc_beneath[tree->n_root->num * tree->geo->ldscape_sz + i];
 
89
  For(i,tree->geo->ldscape_sz) probs[i] = tree->geo->loc_beneath[tree->n_root->num * tree->geo->ldscape_sz + i]/sum;
 
90
  tree->geo->loc[tree->n_root->num] = Sample_i_With_Proba_pi(probs,tree->geo->ldscape_sz);        
 
91
  Free(probs);
 
92
 
 
93
  GEO_Randomize_Locations(tree->n_root,t,tree);
 
94
 
 
95
 
 
96
  GEO_Update_Occup(t,tree);
 
97
  GEO_Lk(t,tree);
 
98
 
 
99
  PhyML_Printf("\n. Init loglk: %f",tree->geo->c_lnL);
 
100
 
 
101
  DR_Draw_Tree("essai.ps",tree);
 
102
 
 
103
  GEO_MCMC(tree);
 
104
 
 
105
  fclose(fp);
 
106
  Free(ldscp);
 
107
  Free(loc_hash);
 
108
 
 
109
  return(1);
 
110
}
 
111
 
 
112
//////////////////////////////////////////////////////////////
 
113
//////////////////////////////////////////////////////////////
 
114
 
 
115
int GEO_Simulate_Estimate(int argc, char **argv)
 
116
{
 
117
  t_geo *t;
 
118
  int n_tax;
 
119
  t_tree *tree,*ori_tree;
 
120
  int seed;
 
121
  phydbl *res;
 
122
  FILE *fp;
 
123
  int pid;
 
124
  char *s;
 
125
  int rand_loc;
 
126
  phydbl *probs,sum;
 
127
  int i;
 
128
  phydbl mantel_p_val;
 
129
  phydbl rf;
 
130
 
 
131
  s = (char *)mCalloc(T_MAX_NAME,sizeof(char));
 
132
  
 
133
  strcpy(s,"geo.out");
 
134
  pid = getpid();
 
135
  sprintf(s+strlen(s),".%d",pid);
 
136
 
 
137
  fp = fopen(s,"w");
 
138
 
 
139
  seed = getpid();
 
140
  /* seed = 15520; */
 
141
  seed = 5023;
 
142
  printf("\n. Seed = %d",seed);
 
143
  srand(seed);
 
144
 
 
145
  t = GEO_Make_Geo_Basic();
 
146
  GEO_Init_Geo_Struct(t);
 
147
 
 
148
 
 
149
  /* t->tau        = 3.0; */
 
150
  /* t->lbda       = 0.02; */
 
151
  /* t->sigma      = 10.; */
 
152
 
 
153
 
 
154
  t->ldscape_sz = (int)atoi(argv[1]);
 
155
  t->n_dim      = 2;
 
156
  n_tax         = (int)atoi(argv[2]);
 
157
 
 
158
 
 
159
  GEO_Make_Geo_Complete(t->ldscape_sz,t->n_dim,n_tax,t);
 
160
 
 
161
  
 
162
  t->cov[0*t->n_dim+0] = t->sigma;
 
163
  t->cov[1*t->n_dim+1] = t->sigma;
 
164
  t->cov[0*t->n_dim+1] = 0.0;
 
165
  t->cov[1*t->n_dim+0] = 0.0;
 
166
  
 
167
  GEO_Simulate_Coordinates(t->ldscape_sz,t);
 
168
    
 
169
  GEO_Get_Sigma_Max(t);
 
170
  
 
171
  t->max_sigma = t->sigma_thresh * 2.;
 
172
  
 
173
  PhyML_Printf("\n. sigma max: %f threshold: %f",t->max_sigma,t->sigma_thresh);
 
174
  
 
175
  t->tau   = Uni()*(t->max_tau/100.-t->min_tau*10.)  + t->min_tau*10.;
 
176
  t->lbda  = EXP(Uni()*(LOG(t->max_lbda/100.)-LOG(t->min_lbda*10.))  + LOG(t->min_lbda*10.));
 
177
  t->sigma = Uni()*(t->max_sigma-t->min_sigma) + t->min_sigma;
 
178
 
 
179
    
 
180
  PhyML_Fprintf(fp,"\n# SigmaTrue\t SigmaThresh\t LbdaTrue\t TauTrue\txTrue\t yTrue\t xRand\t yRand\t RF\t Sigma5\t Sigma50\t Sigma95\t ProbSigmaInfThresh\t Lbda5\t Lbda50\t Lbda95\t ProbLbdaInf1\t Tau5\t Tau50\t Tau95\t X5\t X50\t X95\t Y5\t Y50\t Y95\t RandX5\t RandX50\t RandX95\t RandY5\t RandY50\t RandY95\t Mantel\t");
 
181
  PhyML_Fprintf(fp,"\n");
 
182
  
 
183
  tree = GEO_Simulate(t,n_tax);
 
184
  
 
185
  ori_tree = Make_Tree_From_Scratch(tree->n_otu,NULL);
 
186
  Copy_Tree(tree,ori_tree);
 
187
  
 
188
  Random_SPRs_On_Rooted_Tree(tree);
 
189
  
 
190
  Free_Bip(ori_tree);
 
191
  Alloc_Bip(ori_tree);  
 
192
  Get_Bip(ori_tree->a_nodes[0],ori_tree->a_nodes[0]->v[0],ori_tree);
 
193
  
 
194
  Free_Bip(tree);
 
195
  Alloc_Bip(tree);  
 
196
  Get_Bip(tree->a_nodes[0],tree->a_nodes[0]->v[0],tree);
 
197
  Match_Tip_Numbers(tree,ori_tree);
 
198
  
 
199
  rf = (phydbl)Compare_Bip(ori_tree,tree,NO)/(tree->n_otu-3);
 
200
  PhyML_Printf("\n. rf: %f",rf);
 
201
  
 
202
  phydbl scale;
 
203
  scale = EXP(Rnorm(0,0.2));
 
204
  PhyML_Printf("\n. Scale: %f",scale);
 
205
  For(i,2*tree->n_otu-1) tree->rates->nd_t[i] *= scale;
 
206
  
 
207
 
 
208
  phydbl *tree_dist,*geo_dist;
 
209
  int j;
 
210
  
 
211
  Time_To_Branch(tree);
 
212
  tree_dist = Dist_Btw_Tips(tree);
 
213
  
 
214
  geo_dist = (phydbl *)mCalloc(tree->n_otu*tree->n_otu,sizeof(phydbl));
 
215
 
 
216
  For(i,tree->n_otu)
 
217
    {
 
218
      For(j,tree->n_otu)
 
219
        {
 
220
          geo_dist[i*tree->n_otu+j] =
 
221
            FABS(t->ldscape[t->loc[i]*t->n_dim+0] -
 
222
                 t->ldscape[t->loc[j]*t->n_dim+0])+
 
223
            FABS(t->ldscape[t->loc[i]*t->n_dim+1] -
 
224
                 t->ldscape[t->loc[j]*t->n_dim+1]);
 
225
 
 
226
        }
 
227
    }
 
228
 
 
229
  printf("\n. tau: %f lbda: %f sigma: %f",t->tau,t->lbda,t->sigma);
 
230
  mantel_p_val = Mantel(tree_dist,geo_dist,tree->n_otu,tree->n_otu);
 
231
  printf("\n. Mantel test p-value: %f",mantel_p_val);
 
232
 
 
233
  Free(tree_dist);
 
234
  Free(geo_dist);
 
235
 
 
236
  rand_loc = Rand_Int(0,t->ldscape_sz-1);
 
237
 
 
238
  PhyML_Printf("\n. sigma: %f\t lbda: %f\t tau:%f\t x:%f\t y:%f rand.x:%f\t rand.y:%f\t",
 
239
               t->sigma,
 
240
               t->lbda,
 
241
               t->tau,
 
242
               t->ldscape[t->loc[tree->n_root->num]*t->n_dim+0],
 
243
               t->ldscape[t->loc[tree->n_root->num]*t->n_dim+1],
 
244
               t->ldscape[rand_loc*t->n_dim+0],
 
245
               t->ldscape[rand_loc*t->n_dim+1]);
 
246
 
 
247
  PhyML_Fprintf(fp,"%f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t",
 
248
                t->sigma,
 
249
                t->sigma_thresh,
 
250
                t->lbda,
 
251
                t->tau,
 
252
                t->ldscape[t->loc[tree->n_root->num]*t->n_dim+0],
 
253
                t->ldscape[t->loc[tree->n_root->num]*t->n_dim+1],
 
254
                t->ldscape[rand_loc*t->n_dim+0],
 
255
                t->ldscape[rand_loc*t->n_dim+1],
 
256
                rf);
 
257
 
 
258
  GEO_Get_Locations_Beneath(t,tree);
 
259
 
 
260
  probs = (phydbl *)mCalloc(tree->geo->ldscape_sz,sizeof(phydbl));
 
261
  
 
262
  sum = 0.0;
 
263
  For(i,tree->geo->ldscape_sz) sum += tree->geo->loc_beneath[tree->n_root->num * tree->geo->ldscape_sz + i];
 
264
  For(i,tree->geo->ldscape_sz) probs[i] = tree->geo->loc_beneath[tree->n_root->num * tree->geo->ldscape_sz + i]/sum;
 
265
  tree->geo->loc[tree->n_root->num] = Sample_i_With_Proba_pi(probs,tree->geo->ldscape_sz);
 
266
  Free(probs);
 
267
  GEO_Randomize_Locations(tree->n_root,tree->geo,tree);
 
268
 
 
269
  GEO_Update_Occup(t,tree);
 
270
  GEO_Lk(t,tree);
 
271
  PhyML_Printf("\n. Init loglk: %f",tree->geo->c_lnL);
 
272
 
 
273
 
 
274
  res = GEO_MCMC(tree);
 
275
 
 
276
  PhyML_Fprintf(fp,"%f\t %f\t %f\t %f\t  %f\t %f\t %f\t  %f\t  %f\t %f\t %f\t  %f\t %f\t %f\t  %f\t %f\t %f\t  %f\t %f\t %f\t  %f\t %f\t %f\t  %f \n",
 
277
 
 
278
                /* Sigma5 */ Quantile(res+0*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.025),
 
279
                /* Sigma50*/ Quantile(res+0*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.50),
 
280
                /* Sigma95*/ Quantile(res+0*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.975),
 
281
                
 
282
                /* ProbInfThesh */ Prob(res+0*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval,t->sigma_thresh),
 
283
 
 
284
                /* Lbda5 */ Quantile(res+1*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.025),
 
285
                /* Lbda50 */ Quantile(res+1*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.50),
 
286
                /* Lbda95 */ Quantile(res+1*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.975),
 
287
                
 
288
                /* ProbInf1 */ Prob(res+1*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval,1.0),
 
289
                
 
290
                /* Tau5 */ Quantile(res+2*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.025),
 
291
                /* Tau50 */ Quantile(res+2*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.50),
 
292
                /* Tau 95 */ Quantile(res+2*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.975),
 
293
 
 
294
                /* X5 */ Quantile(res+4*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.025),
 
295
                /* X50 */ Quantile(res+4*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.50),
 
296
                /* X95 */ Quantile(res+4*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.975),
 
297
 
 
298
                /* Y5 */ Quantile(res+5*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.025),
 
299
                /* Y50 */ Quantile(res+5*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.50),
 
300
                /* Y95 */ Quantile(res+5*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.975),
 
301
 
 
302
                /* RandX5 */ Quantile(res+6*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.025),
 
303
                /* RandX50*/ Quantile(res+6*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.50),
 
304
                /* RandX95*/ Quantile(res+6*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.975),
 
305
 
 
306
                /* RandY5 */ Quantile(res+7*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.025),
 
307
                /* RandY50*/ Quantile(res+7*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.50),
 
308
                /* RandY95*/ Quantile(res+7*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.975),
 
309
 
 
310
                mantel_p_val
 
311
                );
 
312
  fflush(NULL);
 
313
 
 
314
  Free(s);
 
315
  Free(res);
 
316
 
 
317
  fclose(fp);
 
318
 
 
319
  return 1;
 
320
}
 
321
 
 
322
//////////////////////////////////////////////////////////////
 
323
//////////////////////////////////////////////////////////////
 
324
 
 
325
phydbl *GEO_MCMC(t_tree *tree)
 
326
{
 
327
  phydbl *res;
 
328
  int n_vars;
 
329
  int rand_loc;
 
330
  t_geo *t;
 
331
 
 
332
  t = tree->geo;
 
333
 
 
334
  tree->mcmc = MCMC_Make_MCMC_Struct();
 
335
  MCMC_Complete_MCMC(tree->mcmc,tree);
 
336
  
 
337
  tree->mcmc->io               = NULL;
 
338
  tree->mcmc->is               = NO;
 
339
  tree->mcmc->use_data         = YES;
 
340
  tree->mcmc->run              = 0;
 
341
  tree->mcmc->sample_interval  = 1E+3;
 
342
  tree->mcmc->chain_len        = 1E+6;
 
343
  tree->mcmc->chain_len_burnin = 1E+5;
 
344
  tree->mcmc->randomize        = YES;
 
345
  tree->mcmc->norm_freq        = 1E+3;
 
346
  tree->mcmc->max_tune         = 1.E+20;
 
347
  tree->mcmc->min_tune         = 1.E-10;
 
348
  tree->mcmc->print_every      = 2;
 
349
  tree->mcmc->is_burnin        = NO;
 
350
  tree->mcmc->nd_t_digits      = 1;
 
351
 
 
352
  t->tau   = 1.0;
 
353
  t->lbda  = 1.0;
 
354
  t->sigma = 1.0;
 
355
 
 
356
  tree->mcmc->chain_len = 1.E+8;
 
357
  tree->mcmc->sample_interval = 50;
 
358
  
 
359
  MCMC_Complete_MCMC(tree->mcmc,tree);
 
360
 
 
361
  GEO_Update_Occup(t,tree);
 
362
  GEO_Lk(t,tree);
 
363
 
 
364
  PhyML_Printf("\n. Init loglk: %f",t->c_lnL);
 
365
  
 
366
  tree->mcmc->start_ess[tree->mcmc->num_move_geo_sigma]  = YES;
 
367
  tree->mcmc->start_ess[tree->mcmc->num_move_geo_lambda] = YES;
 
368
  tree->mcmc->start_ess[tree->mcmc->num_move_geo_tau]    = YES;
 
369
 
 
370
  n_vars = 10;
 
371
  res = (phydbl *)mCalloc(tree->mcmc->chain_len / tree->mcmc->sample_interval * n_vars,sizeof(phydbl));
 
372
 
 
373
 
 
374
  tree->mcmc->run = 0;
 
375
  do
 
376
    {
 
377
      MCMC_Geo_Lbda(tree);
 
378
      MCMC_Geo_Tau(tree);
 
379
      MCMC_Geo_Loc(tree);
 
380
 
 
381
      t->update_fmat = YES;
 
382
      MCMC_Geo_Sigma(tree);
 
383
      t->update_fmat = NO;
 
384
 
 
385
 
 
386
      /* t->update_fmat = YES; */
 
387
      /* MCMC_Geo_Updown_Lbda_Sigma(tree); */
 
388
      /* t->update_fmat = NO; */
 
389
 
 
390
 
 
391
      /* MCMC_Geo_Updown_Tau_Lbda(tree); */
 
392
      /* MCMC_Geo_Updown_Tau_Lbda(tree); */
 
393
      /* MCMC_Geo_Updown_Tau_Lbda(tree); */
 
394
 
 
395
      
 
396
      /* printf("\n"); */
 
397
      /* int i; */
 
398
      /* For(i,2*tree->n_otu-1) */
 
399
      /*   { */
 
400
      /*     if(tree->a_nodes[i]->tax == NO) */
 
401
      /*       { */
 
402
      /*         printf("%2d ",tree->geo->loc[i]); */
 
403
      /*       } */
 
404
      /*   } */
 
405
 
 
406
 
 
407
      if(tree->mcmc->run%tree->mcmc->sample_interval == 0)
 
408
        {
 
409
          MCMC_Copy_To_New_Param_Val(tree->mcmc,tree);
 
410
          
 
411
          MCMC_Update_Effective_Sample_Size(tree->mcmc->num_move_geo_lambda,tree->mcmc,tree);
 
412
          MCMC_Update_Effective_Sample_Size(tree->mcmc->num_move_geo_sigma,tree->mcmc,tree);
 
413
          MCMC_Update_Effective_Sample_Size(tree->mcmc->num_move_geo_tau,tree->mcmc,tree);
 
414
 
 
415
          /* printf("\n. lbda:%f,%f tau:%f,%f sigma:%f,%f", */
 
416
          /*        tree->mcmc->acc_rate[tree->mcmc->num_move_geo_lambda], */
 
417
          /*        tree->mcmc->tune_move[tree->mcmc->num_move_geo_lambda], */
 
418
          /*        tree->mcmc->acc_rate[tree->mcmc->num_move_geo_tau], */
 
419
          /*        tree->mcmc->tune_move[tree->mcmc->num_move_geo_tau], */
 
420
          /*        tree->mcmc->acc_rate[tree->mcmc->num_move_geo_sigma], */
 
421
          /*        tree->mcmc->tune_move[tree->mcmc->num_move_geo_sigma]); */
 
422
                 
 
423
          PhyML_Printf("\n. Run %6d Sigma: %12f [%4.0f] Lambda: %12f [%4.0f] Tau: %12f [%4.0f] LogLk: %12f x: %12f y:%12f",
 
424
                       tree->mcmc->run,
 
425
 
 
426
                       t->sigma,
 
427
                       tree->mcmc->ess[tree->mcmc->num_move_geo_sigma],
 
428
 
 
429
                       t->lbda,
 
430
                       tree->mcmc->ess[tree->mcmc->num_move_geo_lambda],
 
431
 
 
432
                       t->tau,
 
433
                       tree->mcmc->ess[tree->mcmc->num_move_geo_tau],
 
434
 
 
435
                       t->c_lnL,
 
436
                       
 
437
                       t->ldscape[t->loc[tree->n_root->num]*t->n_dim+0],
 
438
                       t->ldscape[t->loc[tree->n_root->num]*t->n_dim+1]);
 
439
 
 
440
          rand_loc = Rand_Int(0,t->ldscape_sz-1);
 
441
 
 
442
          res[0 * tree->mcmc->chain_len / tree->mcmc->sample_interval +  tree->mcmc->run / tree->mcmc->sample_interval] = t->sigma; 
 
443
          res[1 * tree->mcmc->chain_len / tree->mcmc->sample_interval +  tree->mcmc->run / tree->mcmc->sample_interval] = t->lbda; 
 
444
          res[2 * tree->mcmc->chain_len / tree->mcmc->sample_interval +  tree->mcmc->run / tree->mcmc->sample_interval] = t->tau; 
 
445
          res[3 * tree->mcmc->chain_len / tree->mcmc->sample_interval +  tree->mcmc->run / tree->mcmc->sample_interval] = t->c_lnL; 
 
446
                    
 
447
          res[4 * tree->mcmc->chain_len / tree->mcmc->sample_interval +  tree->mcmc->run / tree->mcmc->sample_interval] = t->ldscape[t->loc[tree->n_root->num]*t->n_dim+0];
 
448
          res[5 * tree->mcmc->chain_len / tree->mcmc->sample_interval +  tree->mcmc->run / tree->mcmc->sample_interval] = t->ldscape[t->loc[tree->n_root->num]*t->n_dim+1];
 
449
 
 
450
          res[6 * tree->mcmc->chain_len / tree->mcmc->sample_interval +  tree->mcmc->run / tree->mcmc->sample_interval] = t->ldscape[rand_loc*t->n_dim+0];
 
451
          res[7 * tree->mcmc->chain_len / tree->mcmc->sample_interval +  tree->mcmc->run / tree->mcmc->sample_interval] = t->ldscape[rand_loc*t->n_dim+1];
 
452
        }
 
453
 
 
454
      tree->mcmc->run++;
 
455
 
 
456
      if(tree->mcmc->ess[tree->mcmc->num_move_geo_sigma] > 200.) tree->mcmc->adjust_tuning[tree->mcmc->num_move_geo_sigma]  = NO;
 
457
      if(tree->mcmc->ess[tree->mcmc->num_move_geo_tau]   > 200.) tree->mcmc->adjust_tuning[tree->mcmc->num_move_geo_tau]    = NO;
 
458
      if(tree->mcmc->ess[tree->mcmc->num_move_geo_lambda]> 200.) tree->mcmc->adjust_tuning[tree->mcmc->num_move_geo_lambda] = NO;
 
459
      
 
460
      MCMC_Get_Acc_Rates(tree->mcmc);
 
461
 
 
462
      if(tree->mcmc->ess[tree->mcmc->num_move_geo_sigma] > 1000. &&
 
463
         tree->mcmc->ess[tree->mcmc->num_move_geo_tau]   > 1000. &&
 
464
         tree->mcmc->ess[tree->mcmc->num_move_geo_lambda]> 1000.) break;
 
465
 
 
466
 
 
467
    }
 
468
  while(tree->mcmc->run < tree->mcmc->chain_len);
 
469
 
 
470
  return(res);
 
471
  
 
472
}
 
473
 
 
474
//////////////////////////////////////////////////////////////
 
475
//////////////////////////////////////////////////////////////
 
476
 
 
477
t_geo *GEO_Make_Geo_Basic()
 
478
{
 
479
  t_geo *t;
 
480
  t = (t_geo *)mCalloc(1,sizeof(t_geo));
 
481
  return(t);  
 
482
}
 
483
 
 
484
//////////////////////////////////////////////////////////////
 
485
//////////////////////////////////////////////////////////////
 
486
 
 
487
void GEO_Make_Geo_Complete(int ldscape_sz, int n_dim, int n_tax, t_geo *t)
 
488
{
 
489
 
 
490
  // F matrix
 
491
  t->f_mat = (phydbl *)mCalloc(ldscape_sz*ldscape_sz,sizeof(phydbl));
 
492
 
 
493
  // R matrix
 
494
  t->r_mat = (phydbl *)mCalloc(ldscape_sz*ldscape_sz,sizeof(phydbl));
 
495
 
 
496
  // Occupation vectors: one vector for each node
 
497
  t->occup = (int *)mCalloc((2*n_tax-1)*ldscape_sz,sizeof(int));
 
498
 
 
499
  // Locations
 
500
  t->ldscape = (phydbl *)mCalloc((ldscape_sz*n_dim),sizeof(phydbl));
 
501
 
 
502
  // Lineage locations
 
503
  t->loc = (int *)mCalloc((int)(2*n_tax-1),sizeof(int));
 
504
 
 
505
  // Sorted node heights
 
506
  t->sorted_nd = (t_node **)mCalloc((int)(2*n_tax-1),sizeof(t_node *));
 
507
 
 
508
  // Covariance matrix
 
509
  t->cov = (phydbl *)mCalloc((int)(n_dim*n_dim),sizeof(phydbl));
 
510
 
 
511
  // gives the location occupied beneath each node in the tree
 
512
  t->loc_beneath = (int *)mCalloc((int)(2*n_tax-1)*ldscape_sz,sizeof(int));
 
513
}
 
514
 
 
515
//////////////////////////////////////////////////////////////
 
516
//////////////////////////////////////////////////////////////
 
517
 
 
518
void Free_Geo(t_geo *t)
 
519
{
 
520
  Free(t->f_mat);
 
521
  Free(t->r_mat);
 
522
  Free(t->occup);
 
523
  Free(t->loc);
 
524
  Free(t->ldscape);
 
525
  Free(t->sorted_nd);
 
526
  Free(t->cov);
 
527
  Free(t->loc_beneath);
 
528
}
 
529
 
 
530
//////////////////////////////////////////////////////////////
 
531
//////////////////////////////////////////////////////////////
 
532
 
 
533
// Update F matrix. Assume a diagonal covariance matrix.
 
534
void GEO_Update_Fmat(t_geo *t)
 
535
{
 
536
  phydbl *loc1, *loc2;
 
537
  int i,j,k;
 
538
  int err;
 
539
  phydbl lognloc;
 
540
  
 
541
  For(i,t->n_dim) t->cov[i*t->n_dim+i] = t->sigma; // Diagonal covariance matrix. Same variance in every direction
 
542
 
 
543
  lognloc = LOG(t->ldscape_sz);
 
544
 
 
545
  // Fill in F matrix;
 
546
  for(i=0;i<t->ldscape_sz;i++)
 
547
    {
 
548
      loc1 = t->ldscape + i*t->n_dim;
 
549
 
 
550
      for(j=i;j<t->ldscape_sz;j++)
 
551
        {
 
552
          loc2 = t->ldscape + j*t->n_dim;
 
553
 
 
554
          t->f_mat[i*t->ldscape_sz+j] = .0;
 
555
 
 
556
          // Calculate log(f(l_i,l_j)) - log(f(l_i,l_i) - log(m) 
 
557
          For(k,t->n_dim) t->f_mat[i*t->ldscape_sz+j] += Log_Dnorm(loc2[k],loc1[k],t->cov[k*t->n_dim+k],&err);
 
558
          t->f_mat[i*t->ldscape_sz+j] -= lognloc;
 
559
          For(k,t->n_dim) t->f_mat[i*t->ldscape_sz+j] -= Log_Dnorm(loc1[k],loc1[k],t->cov[k*t->n_dim+k],&err);
 
560
 
 
561
          // Take the exponential
 
562
          t->f_mat[i*t->ldscape_sz+j] = EXP(t->f_mat[i*t->ldscape_sz+j]);
 
563
          
 
564
          // Matrix is symmetric
 
565
          t->f_mat[j*t->ldscape_sz+i] = t->f_mat[i*t->ldscape_sz+j];
 
566
 
 
567
          /* printf("\n. f[%d,%d] = %f (1:[%f;%f] 2:[%f;%f]) sigma=%f",i,j,t->f_mat[i*t->ldscape_sz+j],loc1[0],loc1[1],loc2[0],loc2[1],SQRT(t->cov[0*t->n_dim+0])); */
 
568
        }
 
569
    }
 
570
}
 
571
 
 
572
//////////////////////////////////////////////////////////////
 
573
//////////////////////////////////////////////////////////////
 
574
 
 
575
// Sort node heights from oldest to youngest age.
 
576
void GEO_Update_Sorted_Nd(t_geo *t, t_tree *tree)
 
577
{
 
578
  int i;
 
579
  int swap;
 
580
  t_node *buff;
 
581
 
 
582
  buff = NULL;
 
583
 
 
584
  For(i,2*tree->n_otu-1) t->sorted_nd[i] = tree->a_nodes[i];
 
585
 
 
586
  // Bubble sort of the node heights
 
587
  do
 
588
    {
 
589
      swap = NO;
 
590
      For(i,2*tree->n_otu-2) 
 
591
        {
 
592
          if(tree->rates->nd_t[t->sorted_nd[i+1]->num] < tree->rates->nd_t[t->sorted_nd[i]->num])
 
593
            {
 
594
              buff              = t->sorted_nd[i];
 
595
              t->sorted_nd[i]   = t->sorted_nd[i+1];
 
596
              t->sorted_nd[i+1] = buff;
 
597
 
 
598
              swap = YES;
 
599
            }
 
600
        }
 
601
    }
 
602
  while(swap == YES);
 
603
}
 
604
 
 
605
//////////////////////////////////////////////////////////////
 
606
//////////////////////////////////////////////////////////////
 
607
 
 
608
// Update the set of vectors of occupation along the tree
 
609
void GEO_Update_Occup(t_geo *t, t_tree *tree)
 
610
{
 
611
  int i,j;
 
612
  t_node *v1, *v2;
 
613
 
 
614
  GEO_Update_Sorted_Nd(t,tree);
 
615
 
 
616
  For(i,t->ldscape_sz*(2*tree->n_otu-1)) t->occup[i] = 0;
 
617
  
 
618
  t->occup[tree->n_root->num*t->ldscape_sz + t->loc[tree->n_root->num]] = 1;
 
619
  
 
620
  for(i=1;i<2*tree->n_otu-1;i++)
 
621
    {
 
622
      For(j,t->ldscape_sz) 
 
623
        {
 
624
          t->occup[t->sorted_nd[i]->num*t->ldscape_sz + j] = 
 
625
            t->occup[t->sorted_nd[i-1]->num*t->ldscape_sz + j];
 
626
        }
 
627
 
 
628
      
 
629
      if(t->sorted_nd[i-1]->tax == NO)
 
630
        {
 
631
          v1 = v2 = NULL;
 
632
          if(t->sorted_nd[i-1] == tree->n_root)
 
633
            {
 
634
              v1 = tree->n_root->v[1];
 
635
              v2 = tree->n_root->v[2];
 
636
            }
 
637
          else
 
638
            {
 
639
              For(j,3)
 
640
                {
 
641
                  if(t->sorted_nd[i-1]->v[j] != t->sorted_nd[i-1]->anc &&
 
642
                     t->sorted_nd[i-1]->b[j] != tree->e_root)
 
643
                    {
 
644
                      if(!v1) v1 = t->sorted_nd[i-1]->v[j];
 
645
                      else    v2 = t->sorted_nd[i-1]->v[j];
 
646
                    }
 
647
                }
 
648
            }
 
649
 
 
650
          
 
651
          if(t->loc[v1->num] != t->loc[t->sorted_nd[i-1]->num])
 
652
            {
 
653
              t->occup[t->sorted_nd[i]->num * t->ldscape_sz + t->loc[v1->num]]++;
 
654
            }
 
655
          else
 
656
            {
 
657
              t->occup[t->sorted_nd[i]->num * t->ldscape_sz + t->loc[v2->num]]++;
 
658
            }
 
659
        }
 
660
    }
 
661
 
 
662
  /* printf("\n"); */
 
663
  /* For(i,2*tree->n_otu-1) */
 
664
  /*   { */
 
665
  /*     printf("\n. Node %3d: ",t->sorted_nd[i]->num); */
 
666
  /*     For(j,t->ldscape_sz) */
 
667
  /*       { */
 
668
  /*         /\* printf("%3d [%12f;%12f]   ", *\/ */
 
669
  /*         /\*        t->occup[t->sorted_nd[i]->num*t->ldscape_sz + j], *\/ */
 
670
  /*         /\*        t->ldscape[j*t->n_dim+0],t->ldscape[j*t->n_dim+1]); *\/ */
 
671
  /*         /\* printf("%3d ", *\/ */
 
672
  /*         /\*        t->occup[t->sorted_nd[i]->num*t->ldscape_sz + j]); *\/ */
 
673
  /*       } */
 
674
  /*   } */
 
675
}
 
676
 
 
677
//////////////////////////////////////////////////////////////
 
678
//////////////////////////////////////////////////////////////
 
679
 
 
680
// Calculate R mat at node n
 
681
void GEO_Update_Rmat(t_node *n, t_geo *t, t_tree *tree)
 
682
{
 
683
  int i,j;
 
684
  phydbl lbda_j;
 
685
 
 
686
  For(i,t->ldscape_sz)
 
687
    {
 
688
      For(j,t->ldscape_sz)
 
689
        {
 
690
          lbda_j = ((t->occup[n->num*t->ldscape_sz + j]==0) ? (1.0) : (t->lbda));
 
691
          t->r_mat[i*t->ldscape_sz+j] = t->f_mat[i*t->ldscape_sz+j] * lbda_j * t->tau;          
 
692
        }
 
693
    }
 
694
}
 
695
 
 
696
//////////////////////////////////////////////////////////////
 
697
//////////////////////////////////////////////////////////////
 
698
 
 
699
phydbl GEO_Lk(t_geo *t, t_tree *tree)
 
700
{
 
701
  int i,j;
 
702
  phydbl loglk;
 
703
  phydbl R;
 
704
  int dep,arr; // departure and arrival location indices;
 
705
  t_node *curr_n,*prev_n,*v1,*v2;
 
706
  phydbl sum;
 
707
 
 
708
  GEO_Update_Occup(t,tree);     // Same here.
 
709
 
 
710
  if(t->update_fmat == YES) GEO_Update_Fmat(t);
 
711
 
 
712
  prev_n = NULL;
 
713
  curr_n = NULL;
 
714
  loglk = .0;
 
715
  for(i=1;i<tree->n_otu-1;i++) // Consider all the time slices, from oldest to youngest. 
 
716
                               // Start at first node below root
 
717
    {
 
718
      prev_n = t->sorted_nd[i-1]; // node just above
 
719
      curr_n = t->sorted_nd[i];   // current node
 
720
      
 
721
      GEO_Update_Rmat(curr_n,t,tree); // NOTE: don't need to do that every time. Add check later.
 
722
 
 
723
      R = GEO_Total_Migration_Rate(curr_n,t); // Total migration rate calculated at node n
 
724
 
 
725
      /* if(i < 2) */
 
726
      /*   { */
 
727
      /*     printf("\n. t0: %f t1: %f  R: %f tau: %f sigma: %f lbda: %f x1: %f y1: %f x2: %f y2: %f log0: %d loc1: %d rij: %G fij: %G", */
 
728
      /*            tree->rates->nd_t[t->sorted_nd[i-1]->num], */
 
729
      /*            tree->rates->nd_t[t->sorted_nd[i]->num], */
 
730
      /*            R, */
 
731
      /*            t->tau,                  */
 
732
      /*            t->lbda,                  */
 
733
      /*            t->sigma,                  */
 
734
      /*            t->ldscape[t->loc[tree->geo->sorted_nd[i-1]->num]*t->n_dim+0], */
 
735
      /*            t->ldscape[t->loc[tree->geo->sorted_nd[i-1]->num]*t->n_dim+1], */
 
736
      /*            t->ldscape[t->loc[tree->geo->sorted_nd[i]->num]*t->n_dim+0], */
 
737
      /*            t->ldscape[t->loc[tree->geo->sorted_nd[i]->num]*t->n_dim+1], */
 
738
      /*            t->loc[tree->geo->sorted_nd[i-1]->num], */
 
739
      /*            t->loc[tree->geo->sorted_nd[i]->num], */
 
740
      /*            t->r_mat[t->loc[tree->geo->sorted_nd[i-1]->num] * t->ldscape_sz + t->loc[tree->geo->sorted_nd[i]->num]], */
 
741
      /*            t->f_mat[t->loc[tree->geo->sorted_nd[i-1]->num] * t->ldscape_sz + t->loc[tree->geo->sorted_nd[i]->num]] */
 
742
      /*            );           */
 
743
          
 
744
      /*     printf("\n >> "); */
 
745
      /*     int j; */
 
746
      /*     For(j,t->ldscape_sz)  */
 
747
      /*       if(t->occup[t->sorted_nd[i]->num * t->ldscape_sz + j] > 0)  */
 
748
      /*         printf("%f %f -- ", */
 
749
      /*                t->ldscape[j*t->n_dim+0], */
 
750
      /*                t->ldscape[j*t->n_dim+1]); */
 
751
      /*   } */
 
752
 
 
753
      
 
754
      /* printf("\n. %d %d (%d) %f %p %p \n",i,curr_n->num,curr_n->tax,tree->rates->nd_t[curr_n->num],curr_n->v[1],curr_n->v[2]); */
 
755
 
 
756
      v1 = v2 = NULL;
 
757
      For(j,3) 
 
758
        if(curr_n->v[j] != curr_n->anc && curr_n->b[j] != tree->e_root)
 
759
          {
 
760
            if(!v1) v1 = curr_n->v[j];
 
761
            else    v2 = curr_n->v[j];
 
762
          }
 
763
 
 
764
      dep = t->loc[curr_n->num]; // departure location
 
765
      arr =                      // arrival location
 
766
        (t->loc[v1->num] == t->loc[curr_n->num] ? 
 
767
         t->loc[v2->num] : 
 
768
         t->loc[v1->num]);
 
769
      
 
770
      /* printf("\n%f\t%f", */
 
771
      /*        t->ldscape[arr*t->n_dim+0]-t->ldscape[dep*t->n_dim+0], */
 
772
      /*        t->ldscape[arr*t->n_dim+1]-t->ldscape[dep*t->n_dim+1]); */
 
773
 
 
774
      loglk -= R * FABS(tree->rates->nd_t[curr_n->num] - tree->rates->nd_t[prev_n->num]);
 
775
      loglk += LOG(t->r_mat[dep * t->ldscape_sz + arr]);
 
776
 
 
777
      /* printf("\n <> %d %f %f %d %d v1:%d v2:%d anc:%d %d %d %d", */
 
778
      /*        curr_n->num, */
 
779
      /*        R * FABS(tree->rates->nd_t[curr_n->num] - tree->rates->nd_t[prev_n->num]), */
 
780
      /*        LOG(t->r_mat[dep * t->ldscape_sz + arr]), */
 
781
      /*        dep,arr,v1->num,v2->num,curr_n->anc->num,curr_n->v[0]->num,curr_n->v[1]->num,curr_n->v[2]->num); */
 
782
 
 
783
      /* if(i<2) */
 
784
      /*   { */
 
785
          /* printf("\n. R = %f r_mat = %f f_mat = %f dt = %f loglk = %f", */
 
786
          /*        R, */
 
787
          /*        t->r_mat[dep * t->ldscape_sz + arr], */
 
788
          /*        t->f_mat[dep * t->ldscape_sz + arr], */
 
789
          /*        FABS(t->sorted_nd[i] - t->sorted_nd[i-1]),loglk); */
 
790
          /* Exit("\n"); */
 
791
        /* } */
 
792
 
 
793
    }
 
794
 
 
795
 
 
796
  // Likelihood for the first 'slice' (i.e., the part just below the root down to
 
797
  // the next node)
 
798
  GEO_Update_Rmat(tree->n_root,t,tree);
 
799
 
 
800
  loglk -= LOG(t->ldscape_sz); 
 
801
  dep = t->loc[tree->n_root->num];
 
802
  arr = 
 
803
    (t->loc[tree->n_root->num] != t->loc[tree->n_root->v[1]->num] ? 
 
804
     t->loc[tree->n_root->v[1]->num] :
 
805
     t->loc[tree->n_root->v[2]->num]);
 
806
  
 
807
  /* printf("\n %f %f",t->ldscape[dep],t->ldscape[arr]); */
 
808
 
 
809
  loglk += LOG(t->r_mat[dep * t->ldscape_sz + arr]);
 
810
    
 
811
  sum = .0;
 
812
  For(i,t->ldscape_sz) sum += t->r_mat[dep * t->ldscape_sz + i];
 
813
  
 
814
  loglk -= LOG(sum);
 
815
 
 
816
  tree->geo->c_lnL = loglk;
 
817
 
 
818
  return loglk;
 
819
}
 
820
 
 
821
//////////////////////////////////////////////////////////////
 
822
//////////////////////////////////////////////////////////////
 
823
 
 
824
void GEO_Init_Tloc_Tips(t_geo *t, t_tree *tree)
 
825
{
 
826
  int i;
 
827
 
 
828
  // TO DO
 
829
  For(i,tree->n_otu)
 
830
    {
 
831
      t->loc[i] = i;
 
832
    }
 
833
}
 
834
 
 
835
//////////////////////////////////////////////////////////////
 
836
//////////////////////////////////////////////////////////////
 
837
 
 
838
// Do not forget to call GEO_Update_Rmat (with node n) before calling this function
 
839
phydbl GEO_Total_Migration_Rate(t_node *n, t_geo *t)
 
840
{
 
841
  phydbl R;
 
842
  int i,j;
 
843
 
 
844
  R = .0;
 
845
 
 
846
  For(i,t->ldscape_sz)
 
847
    {
 
848
      For(j,t->ldscape_sz)
 
849
        {
 
850
          R += 
 
851
            t->r_mat[i * t->ldscape_sz + j] * 
 
852
            t->occup[n->num * t->ldscape_sz + i];
 
853
        }
 
854
    }
 
855
 
 
856
  return R;
 
857
}
 
858
 
 
859
//////////////////////////////////////////////////////////////
 
860
//////////////////////////////////////////////////////////////
 
861
 
 
862
// Find the arrival location for the migration leaving from n
 
863
int GEO_Get_Arrival_Location(t_node *n, t_geo *t, t_tree *tree)
 
864
{
 
865
  int i;
 
866
  t_node *v1, *v2; // the two daughters of n
 
867
 
 
868
  v1 = v2 = NULL;
 
869
 
 
870
  For(i,3)
 
871
    {
 
872
      if(n->v[i] && n->v[i] != n->anc)
 
873
        {
 
874
          if(!v1) v1 = n->v[i];
 
875
          else    v2 = n->v[i];
 
876
        }
 
877
    }
 
878
 
 
879
  if(t->loc[v1->num] == t->loc[v2->num]) // Migrated to the same location as that of n
 
880
    {
 
881
      if(t->loc[n->num] != t->loc[v1->num])
 
882
        {
 
883
          PhyML_Printf("\n== Error detected in location labeling.");
 
884
          PhyML_Printf("\n== Err in file %s at line %d\n",__FILE__,__LINE__);
 
885
          Exit("\n");    
 
886
        }
 
887
      else
 
888
        return t->loc[n->num];
 
889
    }
 
890
  else // Migrated to a different spot
 
891
    {
 
892
      if((t->loc[v1->num] != t->loc[n->num]) && (t->loc[v2->num] != t->loc[n->num]))
 
893
        {
 
894
          PhyML_Printf("\n== Error detected in location labeling.");
 
895
          PhyML_Printf("\n== Err in file %s at line %d\n",__FILE__,__LINE__);
 
896
          Exit("\n");    
 
897
        }
 
898
      else
 
899
        {
 
900
          if(t->loc[v1->num] == t->loc[n->num]) return t->loc[v2->num]; // v2 gets the new location, v1 inheritates from n
 
901
          else                                  return t->loc[v1->num]; // v1 gets the new location, v2 inheritates from n
 
902
        }
 
903
    }
 
904
  return -1;
 
905
}
 
906
 
 
907
//////////////////////////////////////////////////////////////
 
908
//////////////////////////////////////////////////////////////
 
909
 
 
910
t_tree *GEO_Simulate(t_geo *t, int n_otu)
 
911
{
 
912
  t_tree *tree;
 
913
  int n_branching_nodes;
 
914
  t_node **branching_nodes; // vector of nodes available for branching out
 
915
  phydbl *p_branch; // p_branch[i]: proba that the i-th node in branching_nodes branches out (p_x vector in the article)
 
916
  phydbl *p_mig; // p_branch[i]: proba of migrating to location i from the location of the edge branching out (q_i vector in the article)
 
917
  int hit;
 
918
  phydbl time;
 
919
  int dep, arr;
 
920
  int i,j;
 
921
  phydbl sum;
 
922
  phydbl R;
 
923
  int *occup; // occupation vector. Updated as we move down the tree
 
924
  int nd_idx;
 
925
  t_node *buff_nd;
 
926
  phydbl buff_t;
 
927
  int buff_l;
 
928
  int swap;
 
929
 
 
930
  tree = Make_Tree_From_Scratch(n_otu,NULL);
 
931
  tree->rates = RATES_Make_Rate_Struct(tree->n_otu);
 
932
  RATES_Init_Rate_Struct(tree->rates,NULL,tree->n_otu);
 
933
  tree->n_root = tree->a_nodes[2*tree->n_otu-2]; // Set the root node to the last element in the list of nodes
 
934
  tree->geo = t;
 
935
 
 
936
 
 
937
  For(i,2*tree->n_otu-2) tree->rates->nd_t[i] = -1.;
 
938
 
 
939
  occup = (int *)mCalloc(t->ldscape_sz,sizeof(int));
 
940
  
 
941
  GEO_Update_Fmat(t);
 
942
 
 
943
  branching_nodes = (t_node **)mCalloc(tree->n_otu,sizeof(t_node *));
 
944
  branching_nodes[0] = tree->n_root;
 
945
  n_branching_nodes  = 1;
 
946
  nd_idx = 0;
 
947
 
 
948
  
 
949
  p_branch = (phydbl *)mCalloc(tree->n_otu,sizeof(phydbl ));
 
950
  p_branch[0] = 1.0;
 
951
 
 
952
  p_mig = (phydbl *)mCalloc(t->ldscape_sz,sizeof(phydbl ));
 
953
 
 
954
  time = 0.0; // Time at the root node (this will be changed afterward)
 
955
 
 
956
  // Sample a location uniformly for the root
 
957
  t->loc[tree->n_root->num] = Rand_Int(0,t->ldscape_sz-1);
 
958
  
 
959
  // Update the occupancy vector
 
960
  occup[t->loc[tree->n_root->num]] = 1;
 
961
 
 
962
  dep = arr = -1;
 
963
 
 
964
 
 
965
 // total migration rate
 
966
  R = 0.0;
 
967
  For(i,t->ldscape_sz) 
 
968
    {
 
969
      R += 
 
970
        t->f_mat[t->loc[tree->n_root->num]*t->ldscape_sz+i] * 
 
971
        ((occup[i] == 0) ? (1.0) : (t->lbda)) * 
 
972
        t->tau;
 
973
    }
 
974
 
 
975
  do
 
976
    {      
 
977
      // Select the node that branches out
 
978
      hit = Sample_i_With_Proba_pi(p_branch,n_branching_nodes);
 
979
      
 
980
      /* printf("\n. [%d] Select node %d (location %d)",n_branching_nodes,branching_nodes[hit]->num,t->loc[branching_nodes[hit]->num]); */
 
981
 
 
982
      // Set the time for the branching node
 
983
      tree->rates->nd_t[branching_nodes[hit]->num] = time;
 
984
 
 
985
 
 
986
      /* printf("\n. Set its time to %f",time); */
 
987
 
 
988
      // Select the destination location
 
989
      dep = t->loc[branching_nodes[hit]->num]; // Departure point
 
990
           
 
991
      sum = .0;
 
992
      For(i,t->ldscape_sz) // Total rate of migration out of departure point
 
993
        {
 
994
          p_mig[i] = 
 
995
            t->f_mat[dep*t->ldscape_sz+i] * 
 
996
            ((occup[i] == 0) ? (1.0) : (t->lbda)) * 
 
997
            t->tau;
 
998
 
 
999
          sum += p_mig[i];
 
1000
        }      
 
1001
      For(i,t->ldscape_sz) p_mig[i] /= sum;
 
1002
 
 
1003
      arr = Sample_i_With_Proba_pi(p_mig,t->ldscape_sz);
 
1004
 
 
1005
      /* printf("\n. Migrate from %d [%5.2f,%5.2f] to %d [%5.2f,%5.2f]", */
 
1006
      /*        dep, */
 
1007
      /*        t->ldscape[dep*t->n_dim+0], */
 
1008
      /*        t->ldscape[dep*t->n_dim+1], */
 
1009
      /*        arr, */
 
1010
      /*        t->ldscape[arr*t->n_dim+0], */
 
1011
      /*        t->ldscape[arr*t->n_dim+1]); */
 
1012
 
 
1013
      /* printf("\n%f\t%f", */
 
1014
      /*        t->ldscape[arr*t->n_dim+0]-t->ldscape[dep*t->n_dim+0], */
 
1015
      /*        t->ldscape[arr*t->n_dim+1]-t->ldscape[dep*t->n_dim+1]); */
 
1016
             
 
1017
 
 
1018
      // Update vector of occupation
 
1019
      occup[arr]++;
 
1020
      
 
1021
      /* printf("\n. Remove %d. Add %d and %d",branching_nodes[hit]->num,tree->a_nodes[nd_idx]->num,tree->a_nodes[nd_idx+1]->num); */
 
1022
      // Connect two new nodes to the node undergoing a branching event
 
1023
      tree->a_nodes[nd_idx]->v[0]   = branching_nodes[hit];
 
1024
      tree->a_nodes[nd_idx+1]->v[0] = branching_nodes[hit];
 
1025
      branching_nodes[hit]->v[1] = tree->a_nodes[nd_idx];
 
1026
      branching_nodes[hit]->v[2] = tree->a_nodes[nd_idx+1];
 
1027
 
 
1028
      // update branching_nodes vector. Element 'hit' is being replaced so that the corresponding node can no longer branch out
 
1029
      branching_nodes[hit]                 = tree->a_nodes[nd_idx];
 
1030
      branching_nodes[n_branching_nodes]   = tree->a_nodes[nd_idx+1];
 
1031
 
 
1032
      // Update t_loc vector.
 
1033
      t->loc[tree->a_nodes[nd_idx]->num]   = dep;
 
1034
      t->loc[tree->a_nodes[nd_idx+1]->num] = arr;
 
1035
 
 
1036
      // Update total migration rate 
 
1037
      R = .0;
 
1038
      For(i,t->ldscape_sz)
 
1039
        {
 
1040
          if(occup[i] > 0)
 
1041
            {
 
1042
              For(j,t->ldscape_sz)
 
1043
                {
 
1044
                  R += 
 
1045
                    occup[i] *
 
1046
                    t->f_mat[i*t->ldscape_sz+j] * 
 
1047
                    ((occup[j] == 0) ? (1.0) : (t->lbda)) *
 
1048
                    t->tau;
 
1049
                }
 
1050
            }
 
1051
        }
 
1052
 
 
1053
      // Set the time until next branching event
 
1054
      time = time + Rexp(R);
 
1055
    
 
1056
      // Update p_branch vector
 
1057
      For(i,n_branching_nodes+1)
 
1058
        {
 
1059
          dep = t->loc[branching_nodes[i]->num];
 
1060
          p_branch[i] = 0.0;
 
1061
          For(j,t->ldscape_sz)
 
1062
            {
 
1063
              p_branch[i] +=
 
1064
                t->f_mat[dep*t->ldscape_sz+j] * 
 
1065
                ((occup[j] == 0) ? (1.0) : (t->lbda)) * 
 
1066
                t->tau / R;
 
1067
 
 
1068
              /* printf("\n. %f %f %f %f", */
 
1069
              /*        R, */
 
1070
              /*        t->f_mat[dep*t->ldscape_sz+j], */
 
1071
              /*        ((occup[j]>0) ? (t->lbda) : (1.0)), */
 
1072
              /*        t->tau); */
 
1073
            }
 
1074
          /* printf("\n. %f ",p_branch[i]); */
 
1075
        }
 
1076
 
 
1077
              
 
1078
      // Increase the number of branching nodes by one (you just added 2 new and removed 1 old)
 
1079
      n_branching_nodes++;
 
1080
      nd_idx += 2;
 
1081
 
 
1082
    }
 
1083
  while(n_branching_nodes < n_otu);
 
1084
 
 
1085
 
 
1086
  // Set the times at the tips
 
1087
  For(i,2*tree->n_otu-1) if(tree->rates->nd_t[i] < 0.0) tree->rates->nd_t[i] = time;
 
1088
  
 
1089
  // Reverse time scale
 
1090
  For(i,2*tree->n_otu-1) tree->rates->nd_t[i] -= time;
 
1091
  /* For(i,2*tree->n_otu-1) tree->rates->nd_t[i] = FABS(tree->rates->nd_t[i]); */
 
1092
 
 
1093
  //  Bubble sort to put all the tips at the top of the tree->a_nodes array
 
1094
  do
 
1095
    {
 
1096
      swap = NO;
 
1097
      For(i,2*tree->n_otu-2)
 
1098
        {
 
1099
          if(!tree->a_nodes[i+1]->v[1] && tree->a_nodes[i]->v[1])
 
1100
            {
 
1101
              buff_nd            = tree->a_nodes[i+1];
 
1102
              tree->a_nodes[i+1] = tree->a_nodes[i];
 
1103
              tree->a_nodes[i]   = buff_nd;
 
1104
 
 
1105
              buff_t                 = tree->rates->nd_t[i+1];
 
1106
              tree->rates->nd_t[i+1] = tree->rates->nd_t[i];
 
1107
              tree->rates->nd_t[i]   = buff_t;
 
1108
 
 
1109
              buff_l      = t->loc[i+1];
 
1110
              t->loc[i+1] = t->loc[i];
 
1111
              t->loc[i]   = buff_l;
 
1112
 
 
1113
              swap = YES;
 
1114
            }
 
1115
        }
 
1116
    }
 
1117
  while(swap == YES);
 
1118
 
 
1119
 
 
1120
  // The rest below is just bookeeping...
 
1121
 
 
1122
 
 
1123
  For(i,2*tree->n_otu-1) tree->a_nodes[i]->num = i;
 
1124
  For(i,2*tree->n_otu-1) 
 
1125
    {
 
1126
      if(i < tree->n_otu) tree->a_nodes[i]->tax = YES;
 
1127
      else                tree->a_nodes[i]->tax = NO;
 
1128
    }
 
1129
 
 
1130
  /* printf("\n++++++++++++++++++\n"); */
 
1131
  /* For(i,2*tree->n_otu-1) */
 
1132
  /*   { */
 
1133
  /*     printf("\n. Node %3d [%p] anc:%3d v1:%3d v2:%3d time: %f", */
 
1134
  /*            tree->a_nodes[i]->num, */
 
1135
  /*            (void *)tree->a_nodes[i], */
 
1136
  /*            tree->a_nodes[i]->v[0] ? tree->a_nodes[i]->v[0]->num : -1, */
 
1137
  /*            tree->a_nodes[i]->v[1] ? tree->a_nodes[i]->v[1]->num : -1, */
 
1138
  /*            tree->a_nodes[i]->v[2] ? tree->a_nodes[i]->v[2]->num : -1, */
 
1139
  /*            tree->rates->nd_t[i]);              */
 
1140
    /* } */
 
1141
 
 
1142
 
 
1143
  For(i,tree->n_otu) 
 
1144
    {
 
1145
      if(!tree->a_nodes[i]->name) tree->a_nodes[i]->name = (char *)mCalloc(T_MAX_NAME,sizeof(char));
 
1146
      strcpy(tree->a_nodes[i]->name,"x");
 
1147
      sprintf(tree->a_nodes[i]->name+1,"%d",i);      
 
1148
    }
 
1149
 
 
1150
  tree->n_root->v[1]->v[0] = tree->n_root->v[2];
 
1151
  tree->n_root->v[2]->v[0] = tree->n_root->v[1];
 
1152
  
 
1153
  tree->num_curr_branch_available = 0;
 
1154
  Connect_Edges_To_Nodes_Recur(tree->a_nodes[0],tree->a_nodes[0]->v[0],tree);
 
1155
 
 
1156
  tree->e_root = tree->n_root->v[1]->b[0];
 
1157
 
 
1158
  For(i,2*tree->n_otu-3)
 
1159
    {
 
1160
      tree->a_edges[i]->l->v = FABS(tree->rates->nd_t[tree->a_edges[i]->left->num] - 
 
1161
                                    tree->rates->nd_t[tree->a_edges[i]->rght->num]);
 
1162
    }
 
1163
 
 
1164
  tree->e_root->l->v =
 
1165
    FABS(tree->rates->nd_t[tree->n_root->v[1]->num] -
 
1166
         tree->rates->nd_t[tree->n_root->num]) +
 
1167
    FABS(tree->rates->nd_t[tree->n_root->v[2]->num] -
 
1168
         tree->rates->nd_t[tree->n_root->num]);
 
1169
  
 
1170
  tree->n_root->l[1] = FABS(tree->rates->nd_t[tree->n_root->v[1]->num] -
 
1171
                            tree->rates->nd_t[tree->n_root->num]);
 
1172
 
 
1173
  tree->n_root->l[2] = FABS(tree->rates->nd_t[tree->n_root->v[2]->num] -
 
1174
                            tree->rates->nd_t[tree->n_root->num]);
 
1175
 
 
1176
  tree->n_root_pos = 
 
1177
    FABS(tree->rates->nd_t[tree->n_root->v[2]->num] -
 
1178
         tree->rates->nd_t[tree->n_root->num]) / tree->e_root->l->v;
 
1179
 
 
1180
  /* printf("\n. %s ",Write_Tree(tree,NO)); */
 
1181
 
 
1182
  DR_Draw_Tree("essai.ps",tree);
 
1183
 
 
1184
  /* For(i,tree->n_otu) */
 
1185
  /*   printf("\n. %4s %4d [%5.2f %5.2f]",tree->a_nodes[i]->name, */
 
1186
  /*          t->loc[i], */
 
1187
  /*          t->ldscape[t->loc[i]*t->n_dim+0], */
 
1188
  /*          t->ldscape[t->loc[i]*t->n_dim+1]); */
 
1189
 
 
1190
  
 
1191
  Update_Ancestors(tree->n_root,tree->n_root->v[2],tree);
 
1192
  Update_Ancestors(tree->n_root,tree->n_root->v[1],tree);               
 
1193
 
 
1194
  Free(branching_nodes);
 
1195
  Free(p_branch);
 
1196
  Free(p_mig);
 
1197
 
 
1198
  return(tree);
 
1199
}
 
1200
 
 
1201
//////////////////////////////////////////////////////////////
 
1202
//////////////////////////////////////////////////////////////
 
1203
 
 
1204
// Simualte n coordinates (in 2D)
 
1205
void GEO_Simulate_Coordinates(int n, t_geo *t)
 
1206
{
 
1207
  int i;
 
1208
  phydbl width;
 
1209
 
 
1210
  width = 5.;
 
1211
 
 
1212
  For(i,n)
 
1213
    {
 
1214
      t->ldscape[i*t->n_dim+0] = -width/2. + Uni()*width;
 
1215
      t->ldscape[i*t->n_dim+1] = -width/2. + Uni()*width;
 
1216
    }
 
1217
 
 
1218
 
 
1219
  /* t->ldscape[0*t->n_dim+0] = 0.0; */
 
1220
  /* t->ldscape[0*t->n_dim+1] = 0.0; */
 
1221
 
 
1222
  /* t->ldscape[1*t->n_dim+0] = 0.1; */
 
1223
  /* t->ldscape[1*t->n_dim+1] = 0.1; */
 
1224
 
 
1225
}
 
1226
 
 
1227
//////////////////////////////////////////////////////////////
 
1228
//////////////////////////////////////////////////////////////
 
1229
 
 
1230
void GEO_Optimize_Sigma(t_geo *t, t_tree *tree)
 
1231
{
 
1232
  Generic_Brent_Lk(&(t->sigma),
 
1233
                   t->min_sigma,
 
1234
                   t->max_sigma,
 
1235
                   1.E-5,
 
1236
                   1000,
 
1237
                   NO,
 
1238
                   GEO_Wrap_Lk,NULL,tree,NULL);
 
1239
}
 
1240
 
 
1241
//////////////////////////////////////////////////////////////
 
1242
//////////////////////////////////////////////////////////////
 
1243
 
 
1244
void GEO_Optimize_Lambda(t_geo *t, t_tree *tree)
 
1245
{
 
1246
  Generic_Brent_Lk(&(t->lbda),
 
1247
                   t->min_lbda,
 
1248
                   t->max_lbda,
 
1249
                   1.E-5,
 
1250
                   1000,
 
1251
                   NO,
 
1252
                   GEO_Wrap_Lk,NULL,tree,NULL);
 
1253
}
 
1254
 
 
1255
//////////////////////////////////////////////////////////////
 
1256
//////////////////////////////////////////////////////////////
 
1257
 
 
1258
void GEO_Optimize_Tau(t_geo *t, t_tree *tree)
 
1259
{
 
1260
  Generic_Brent_Lk(&(t->tau),
 
1261
                   t->min_tau,
 
1262
                   t->max_tau,
 
1263
                   1.E-5,
 
1264
                   1000,
 
1265
                   NO,
 
1266
                   GEO_Wrap_Lk,NULL,tree,NULL);
 
1267
}
 
1268
 
 
1269
//////////////////////////////////////////////////////////////
 
1270
//////////////////////////////////////////////////////////////
 
1271
 
 
1272
phydbl GEO_Wrap_Lk(t_edge *b, t_tree *tree, supert_tree *stree)
 
1273
{
 
1274
  return GEO_Lk(tree->geo,tree);
 
1275
}
 
1276
 
 
1277
//////////////////////////////////////////////////////////////
 
1278
//////////////////////////////////////////////////////////////
 
1279
 
 
1280
void GEO_Init_Geo_Struct(t_geo *t)
 
1281
{
 
1282
  t->c_lnL        = UNLIKELY;
 
1283
 
 
1284
  t->sigma        = 1.0;
 
1285
  t->min_sigma    = 1.E-3;
 
1286
  t->max_sigma    = 1.E+3;
 
1287
  t->sigma_thresh = t->max_sigma;
 
1288
 
 
1289
  t->lbda         = 1.0;
 
1290
  t->min_lbda     = 1.E-3;
 
1291
  t->max_lbda     = 1.E+3;
 
1292
  
 
1293
  t->tau          = 1.0;
 
1294
  t->min_tau      = 1.E-3;
 
1295
  t->max_tau      = 1.E+3;
 
1296
 
 
1297
  t->tau          = 1.0;
 
1298
 
 
1299
  t->n_dim        = 2;
 
1300
  t->ldscape_sz   = 1;
 
1301
 
 
1302
  t->update_fmat  = YES;
 
1303
}
 
1304
 
 
1305
//////////////////////////////////////////////////////////////
 
1306
//////////////////////////////////////////////////////////////
 
1307
 
 
1308
void GEO_Randomize_Locations(t_node *n, t_geo *t, t_tree *tree)
 
1309
{
 
1310
  if(n->tax == YES) return;
 
1311
  else
 
1312
    {
 
1313
      t_node *v1, *v2;
 
1314
      int i;
 
1315
      phydbl *probs; // vector of probability of picking each location
 
1316
      phydbl sum;
 
1317
      phydbl u;
 
1318
      
 
1319
 
 
1320
      probs = (phydbl *)mCalloc(t->ldscape_sz,sizeof(phydbl));
 
1321
      
 
1322
      v1 = v2 = NULL;
 
1323
      For(i,3)
 
1324
        {
 
1325
          if(n->v[i] != n->anc && n->b[i] != tree->e_root)
 
1326
            {
 
1327
              if(!v1) v1 = n->v[i];
 
1328
              else    v2 = n->v[i];
 
1329
            }
 
1330
        }
 
1331
      
 
1332
      if(v1->tax && v2->tax)
 
1333
        {
 
1334
          Free(probs);
 
1335
          return;
 
1336
        }
 
1337
      else if(v1->tax && !v2->tax && t->loc[v1->num] != t->loc[n->num])
 
1338
        {
 
1339
          t->loc[v2->num] = t->loc[n->num];
 
1340
        }
 
1341
      else if(v2->tax && !v1->tax && t->loc[v2->num] != t->loc[n->num])
 
1342
        {
 
1343
          t->loc[v1->num] = t->loc[n->num];
 
1344
        }
 
1345
      else if(v1->tax && !v2->tax && t->loc[v1->num] == t->loc[n->num])
 
1346
        {
 
1347
          sum = 0.0;
 
1348
          For(i,t->ldscape_sz) sum += t->loc_beneath[v2->num * t->ldscape_sz + i];
 
1349
          For(i,t->ldscape_sz) probs[i] = t->loc_beneath[v2->num * t->ldscape_sz + i]/sum;
 
1350
          
 
1351
          t->loc[v2->num] = Sample_i_With_Proba_pi(probs,t->ldscape_sz);      
 
1352
        }
 
1353
      else if(v2->tax && !v1->tax && t->loc[v2->num] == t->loc[n->num])
 
1354
        {
 
1355
          sum = 0.0;
 
1356
          For(i,t->ldscape_sz) sum += t->loc_beneath[v1->num * t->ldscape_sz + i];
 
1357
          For(i,t->ldscape_sz) probs[i] = t->loc_beneath[v1->num * t->ldscape_sz + i]/sum;
 
1358
          
 
1359
          t->loc[v1->num] = Sample_i_With_Proba_pi(probs,t->ldscape_sz);      
 
1360
        }
 
1361
      else
 
1362
        {
 
1363
          int n_v1, n_v2;
 
1364
          phydbl p;
 
1365
 
 
1366
          n_v1 = t->loc_beneath[v1->num * t->ldscape_sz + t->loc[n->num]];
 
1367
          n_v2 = t->loc_beneath[v2->num * t->ldscape_sz + t->loc[n->num]];
 
1368
          
 
1369
          if(n_v1 + n_v2 < 1)
 
1370
            {
 
1371
              PhyML_Printf("\n== Err in file %s at line %d\n",__FILE__,__LINE__);
 
1372
              Exit("\n");
 
1373
            }
 
1374
          
 
1375
 
 
1376
          p = n_v1 / (n_v1 + n_v2);
 
1377
 
 
1378
          u = Uni();
 
1379
 
 
1380
          if(u < p)
 
1381
            {              
 
1382
              sum = 0.0;
 
1383
              For(i,t->ldscape_sz) sum += t->loc_beneath[v2->num * t->ldscape_sz + i];
 
1384
              For(i,t->ldscape_sz) probs[i] = t->loc_beneath[v2->num * t->ldscape_sz + i]/sum;
 
1385
              
 
1386
              t->loc[v2->num] = Sample_i_With_Proba_pi(probs,t->ldscape_sz);      
 
1387
              t->loc[v1->num] = t->loc[n->num];                
 
1388
            }
 
1389
          else
 
1390
            {
 
1391
              if(t->loc_beneath[v2->num * t->ldscape_sz + t->loc[n->num]] > 0)
 
1392
                {
 
1393
                  sum = 0.0;
 
1394
                  For(i,t->ldscape_sz) sum += t->loc_beneath[v1->num * t->ldscape_sz + i];
 
1395
                  For(i,t->ldscape_sz) probs[i] = t->loc_beneath[v1->num * t->ldscape_sz + i]/sum;
 
1396
                  
 
1397
                  t->loc[v1->num] = Sample_i_With_Proba_pi(probs,t->ldscape_sz);      
 
1398
                  t->loc[v2->num] = t->loc[n->num];
 
1399
                }
 
1400
              else
 
1401
                {
 
1402
                  sum = 0.0;
 
1403
                  For(i,t->ldscape_sz) sum += t->loc_beneath[v2->num * t->ldscape_sz + i];
 
1404
                  For(i,t->ldscape_sz) probs[i] = t->loc_beneath[v2->num * t->ldscape_sz + i]/sum;
 
1405
                  
 
1406
                  t->loc[v2->num] = Sample_i_With_Proba_pi(probs,t->ldscape_sz);      
 
1407
                  t->loc[v1->num] = t->loc[n->num];                
 
1408
                }
 
1409
            }
 
1410
          
 
1411
          if(t->loc[v1->num] != t->loc[n->num] && t->loc[v2->num] != t->loc[n->num])
 
1412
            {
 
1413
              PhyML_Printf("\n. %d %d %d",t->loc[v1->num],t->loc[v2->num],t->loc[n->num]);
 
1414
              PhyML_Printf("\n== Err in file %s at line %d\n",__FILE__,__LINE__);
 
1415
              Exit("\n");
 
1416
            }
 
1417
 
 
1418
          if(t->loc_beneath[v1->num * t->ldscape_sz + t->loc[v1->num]] < 1)
 
1419
            {
 
1420
              PhyML_Printf("\n. %d %d %d",t->loc[v1->num],t->loc[v2->num],t->loc[n->num]);
 
1421
              PhyML_Printf("\n== Err in file %s at line %d\n",__FILE__,__LINE__);
 
1422
              Exit("\n");
 
1423
            }
 
1424
 
 
1425
          if(t->loc_beneath[v2->num * t->ldscape_sz + t->loc[v2->num]] < 1)
 
1426
            {
 
1427
              PhyML_Printf("\n. %d %d %d",t->loc[v1->num],t->loc[v2->num],t->loc[n->num]);
 
1428
              PhyML_Printf("\n== Err in file %s at line %d\n",__FILE__,__LINE__);
 
1429
              Exit("\n");
 
1430
            }
 
1431
          
 
1432
        }
 
1433
      
 
1434
      Free(probs);
 
1435
      
 
1436
      For(i,3)
 
1437
        if(n->v[i] != n->anc && n->b[i] != tree->e_root) 
 
1438
          GEO_Randomize_Locations(n->v[i],t,tree);
 
1439
 
 
1440
    }
 
1441
}
 
1442
 
 
1443
//////////////////////////////////////////////////////////////
 
1444
//////////////////////////////////////////////////////////////
 
1445
 
 
1446
void GEO_Get_Locations_Beneath(t_geo *t, t_tree *tree)
 
1447
{
 
1448
  int i;
 
1449
  GEO_Get_Locations_Beneath_Post(tree->n_root,tree->n_root->v[1],t,tree);
 
1450
  GEO_Get_Locations_Beneath_Post(tree->n_root,tree->n_root->v[2],t,tree);
 
1451
 
 
1452
  For(i,t->ldscape_sz)
 
1453
    {
 
1454
      t->loc_beneath[tree->n_root->num*t->ldscape_sz+i] =
 
1455
        t->loc_beneath[tree->n_root->v[1]->num*t->ldscape_sz+i] +
 
1456
        t->loc_beneath[tree->n_root->v[2]->num*t->ldscape_sz+i];      
 
1457
    }
 
1458
 
 
1459
 
 
1460
}
 
1461
 
 
1462
//////////////////////////////////////////////////////////////
 
1463
//////////////////////////////////////////////////////////////
 
1464
 
 
1465
void GEO_Get_Locations_Beneath_Post(t_node *a, t_node *d, t_geo *t, t_tree *tree)
 
1466
{
 
1467
 
 
1468
  if(d->tax) 
 
1469
    {
 
1470
      t->loc_beneath[d->num*t->ldscape_sz+t->loc[d->num]] = 1;
 
1471
      return;
 
1472
    }
 
1473
  else
 
1474
    {
 
1475
      int i;
 
1476
      t_node *v1, *v2;
 
1477
 
 
1478
      For(i,3) 
 
1479
        {
 
1480
          if(d->v[i] != a && d->b[i] != tree->e_root)
 
1481
            {
 
1482
              GEO_Get_Locations_Beneath_Post(d,d->v[i],t,tree);
 
1483
            }
 
1484
        }
 
1485
          
 
1486
 
 
1487
      v1 = v2 = NULL;
 
1488
      For(i,3) 
 
1489
        {
 
1490
          if(d->v[i] != a && d->b[i] != tree->e_root)
 
1491
            {
 
1492
              if(!v1) v1 = d->v[i];
 
1493
              else    v2 = d->v[i];
 
1494
            }
 
1495
        }
 
1496
          
 
1497
 
 
1498
      For(i,t->ldscape_sz) 
 
1499
        {
 
1500
          t->loc_beneath[ d->num*t->ldscape_sz+i] = 
 
1501
            t->loc_beneath[v1->num*t->ldscape_sz+i] + 
 
1502
            t->loc_beneath[v2->num*t->ldscape_sz+i] ;
 
1503
        }
 
1504
    }
 
1505
}
 
1506
 
 
1507
//////////////////////////////////////////////////////////////
 
1508
//////////////////////////////////////////////////////////////
 
1509
 
 
1510
void GEO_Get_Sigma_Max(t_geo *t)
 
1511
{
 
1512
  int i,j;
 
1513
  phydbl mean_dist,inv_mean_dist;
 
1514
  phydbl sigma_a, sigma_b, sigma_c;
 
1515
  phydbl overlap_a, overlap_b, overlap_c;
 
1516
  phydbl d_intersect;
 
1517
  phydbl overlap_target;
 
1518
  phydbl eps;
 
1519
  int n_iter,n_iter_max;
 
1520
 
 
1521
  eps = 1.E-6;
 
1522
  overlap_target = 0.95;
 
1523
  n_iter_max = 100;
 
1524
 
 
1525
  mean_dist = -1.;
 
1526
  inv_mean_dist = -1.;
 
1527
  For(i,t->ldscape_sz-1)
 
1528
    {
 
1529
      for(j=i+1;j<t->ldscape_sz;j++)
 
1530
        {
 
1531
          /* dist = POW(t->ldscape[i*t->n_dim+0] - t->ldscape[j*t->n_dim+0],1); */
 
1532
          /* if(dist > mean_dist) mean_dist = dist; */
 
1533
          /* dist = POW(t->ldscape[i*t->n_dim+1] - t->ldscape[j*t->n_dim+1],1); */
 
1534
          /* if(dist > mean_dist) mean_dist = dist; */
 
1535
          mean_dist += FABS(t->ldscape[i*t->n_dim+0] - t->ldscape[j*t->n_dim+0]);
 
1536
          mean_dist += FABS(t->ldscape[i*t->n_dim+1] - t->ldscape[j*t->n_dim+1]);
 
1537
        }
 
1538
    }
 
1539
  
 
1540
  mean_dist /= t->ldscape_sz*(t->ldscape_sz-1)/2.;
 
1541
  inv_mean_dist = 1./mean_dist;
 
1542
  
 
1543
 
 
1544
  PhyML_Printf("\n. mean_dist = %f",mean_dist);
 
1545
 
 
1546
  sigma_a = t->min_sigma; sigma_b = 1.0; sigma_c = t->max_sigma;
 
1547
  /* sigma_a = t->min_sigma; sigma_b = 1.0; sigma_c = 10.; */
 
1548
  n_iter = 0;
 
1549
  do
 
1550
    {
 
1551
      d_intersect = Inverse_Truncated_Normal(inv_mean_dist,0.0,sigma_a,0.0,mean_dist);
 
1552
      overlap_a = 
 
1553
        (Pnorm(mean_dist,0.0,sigma_a) - Pnorm(d_intersect,0.0,sigma_a))/
 
1554
        (Pnorm(mean_dist,0.0,sigma_a) - Pnorm(0.0,0.0,sigma_a)) + 
 
1555
        d_intersect / mean_dist;
 
1556
      /* printf("\n. inter: %f %f [%f]",d_intersect,mean_dist,d_intersect / mean_dist); */
 
1557
 
 
1558
      d_intersect = Inverse_Truncated_Normal(inv_mean_dist,0.0,sigma_b,0.0,mean_dist);
 
1559
      overlap_b = 
 
1560
        (Pnorm(mean_dist,0.0,sigma_b) - Pnorm(d_intersect,0.0,sigma_b))/
 
1561
        (Pnorm(mean_dist,0.0,sigma_b) - Pnorm(0.0,0.0,sigma_b)) + 
 
1562
        d_intersect / mean_dist;
 
1563
      /* printf("\n. inter: %f %f [%f]",d_intersect,mean_dist,d_intersect / mean_dist); */
 
1564
      
 
1565
      d_intersect = Inverse_Truncated_Normal(inv_mean_dist,0.0,sigma_c,0.0,mean_dist);
 
1566
      overlap_c = 
 
1567
        (Pnorm(mean_dist,0.0,sigma_c) - Pnorm(d_intersect,0.0,sigma_c))/
 
1568
        (Pnorm(mean_dist,0.0,sigma_c) - Pnorm(0.0,0.0,sigma_c)) + 
 
1569
        d_intersect / mean_dist;
 
1570
 
 
1571
      /* printf("\n. inter: %f %f [%f]",d_intersect,mean_dist,d_intersect / mean_dist); */
 
1572
      
 
1573
      /* printf("\n. sigma_a:%f overlap_a:%f sigma_b:%f overlap_b:%f sigma_c:%f overlap_c:%f", */
 
1574
      /*        sigma_a,overlap_a, */
 
1575
      /*        sigma_b,overlap_b, */
 
1576
      /*        sigma_c,overlap_c); */
 
1577
 
 
1578
      if(overlap_target > overlap_a && overlap_target < overlap_b)
 
1579
        {
 
1580
          sigma_c = sigma_b;
 
1581
          sigma_b = sigma_a + (sigma_c - sigma_a)/2.;
 
1582
        }
 
1583
      else if(overlap_target > overlap_b && overlap_target < overlap_c)
 
1584
        {
 
1585
          sigma_a = sigma_b;
 
1586
          sigma_b = sigma_a + (sigma_c - sigma_a)/2.;
 
1587
        }
 
1588
      else if(overlap_target < overlap_a)
 
1589
        {
 
1590
          sigma_a /= 2.;
 
1591
        }
 
1592
      else if(overlap_target > overlap_c)
 
1593
        {
 
1594
          sigma_c *= 2.;
 
1595
        }
 
1596
 
 
1597
      n_iter++;
 
1598
 
 
1599
    }
 
1600
  while(sigma_c - sigma_a > eps && n_iter < n_iter_max);
 
1601
 
 
1602
  /* if(sigma_c - sigma_a > eps) */
 
1603
  /*   { */
 
1604
  /*     PhyML_Printf("\n== Error detected in getting maximum value of sigma."); */
 
1605
  /*     PhyML_Printf("\n== Err in file %s at line %d\n",__FILE__,__LINE__); */
 
1606
  /*     Exit("\n");     */
 
1607
  /*   } */
 
1608
  /* else */
 
1609
  /*   { */
 
1610
  /*     PhyML_Printf("\n== Threshold for sigma: %f",sigma_b); */
 
1611
  /*   } */
 
1612
 
 
1613
  t->sigma_thresh = sigma_b;
 
1614
}
 
1615
 
 
1616
//////////////////////////////////////////////////////////////
 
1617
//////////////////////////////////////////////////////////////
 
1618
 
 
1619
void MCMC_Geo_Updown_Tau_Lbda(t_tree *tree)
 
1620
{
 
1621
  phydbl K,mult,u,alpha,ratio;
 
1622
  phydbl cur_lnL,new_lnL;
 
1623
  phydbl cur_tau,new_tau;
 
1624
  phydbl cur_lbda,new_lbda;
 
1625
  
 
1626
  K        = tree->mcmc->tune_move[tree->mcmc->num_move_geo_updown_tau_lbda];
 
1627
  cur_lnL  = tree->geo->c_lnL;
 
1628
  new_lnL  = tree->geo->c_lnL;
 
1629
  cur_tau  = tree->geo->tau;
 
1630
  new_tau  = tree->geo->tau;
 
1631
  cur_lbda = tree->geo->lbda;
 
1632
  new_lbda = tree->geo->lbda;
 
1633
 
 
1634
  u = Uni();
 
1635
  mult = EXP(K*(u-0.5));
 
1636
 
 
1637
  /* Multiply tau by K */
 
1638
  new_tau = cur_tau * K;
 
1639
  
 
1640
  /* Divide lbda by same amount */
 
1641
  new_lbda = cur_lbda / K;
 
1642
 
 
1643
 
 
1644
  if(
 
1645
     new_lbda < tree->geo->min_lbda || new_lbda > tree->geo->max_lbda ||
 
1646
     new_tau  < tree->geo->min_tau  || new_tau  > tree->geo->max_tau
 
1647
     )
 
1648
    {
 
1649
      tree->mcmc->run_move[tree->mcmc->num_move_geo_updown_tau_lbda]++;
 
1650
      return;
 
1651
    }
 
1652
  
 
1653
  tree->geo->tau  = new_tau;
 
1654
  tree->geo->lbda = new_lbda;
 
1655
 
 
1656
  if(tree->mcmc->use_data) new_lnL = GEO_Lk(tree->geo,tree);
 
1657
 
 
1658
  ratio = 0.0;
 
1659
  /* Proposal ratio: 2n-2=> number of multiplications, 1=>number of divisions */
 
1660
  ratio += 0.0*LOG(mult); /* (1-1)*LOG(mult); */
 
1661
  /* Likelihood density ratio */
 
1662
  ratio += (new_lnL - cur_lnL);
 
1663
 
 
1664
  /* printf("\n. new_tau: %f new_lbda:%f cur_lnL:%f new_lnL:%f",new_tau,new_lbda,cur_lnL,new_lnL); */
 
1665
 
 
1666
 
 
1667
  ratio = EXP(ratio);
 
1668
  alpha = MIN(1.,ratio);
 
1669
  u = Uni();
 
1670
  
 
1671
  if(u > alpha) /* Reject */
 
1672
    {
 
1673
      tree->geo->tau   = cur_tau;
 
1674
      tree->geo->lbda  = cur_lbda;
 
1675
      tree->geo->c_lnL = cur_lnL;
 
1676
    }
 
1677
  else
 
1678
    {
 
1679
      tree->mcmc->acc_move[tree->mcmc->num_move_geo_updown_tau_lbda]++;
 
1680
    }
 
1681
 
 
1682
  tree->mcmc->run_move[tree->mcmc->num_move_geo_updown_tau_lbda]++;
 
1683
}
 
1684
 
 
1685
//////////////////////////////////////////////////////////////
 
1686
//////////////////////////////////////////////////////////////
 
1687
 
 
1688
 
 
1689
void MCMC_Geo_Updown_Lbda_Sigma(t_tree *tree)
 
1690
{
 
1691
  phydbl K,mult,u,alpha,ratio;
 
1692
  phydbl cur_lnL,new_lnL;
 
1693
  phydbl cur_lbda,new_lbda;
 
1694
  phydbl cur_sigma,new_sigma;
 
1695
  
 
1696
  K        = tree->mcmc->tune_move[tree->mcmc->num_move_geo_updown_lbda_sigma];
 
1697
  cur_lnL  = tree->geo->c_lnL;
 
1698
  new_lnL  = tree->geo->c_lnL;
 
1699
  cur_lbda  = tree->geo->lbda;
 
1700
  new_lbda  = tree->geo->lbda;
 
1701
  cur_sigma = tree->geo->sigma;
 
1702
  new_sigma = tree->geo->sigma;
 
1703
 
 
1704
  u = Uni();
 
1705
  mult = EXP(K*(u-0.5));
 
1706
 
 
1707
  /* Multiply lbda by K */
 
1708
  new_lbda = cur_lbda * K;
 
1709
  
 
1710
  /* Divide sigma by same amount */
 
1711
  new_sigma = cur_sigma / K;
 
1712
 
 
1713
 
 
1714
  if(
 
1715
     new_sigma < tree->geo->min_sigma || new_sigma > tree->geo->max_sigma ||
 
1716
     new_lbda  < tree->geo->min_lbda  || new_lbda  > tree->geo->max_lbda
 
1717
     )
 
1718
    {
 
1719
      tree->mcmc->run_move[tree->mcmc->num_move_geo_updown_lbda_sigma]++;
 
1720
      return;
 
1721
    }
 
1722
  
 
1723
  tree->geo->lbda   = new_lbda;
 
1724
  tree->geo->sigma = new_sigma;
 
1725
 
 
1726
  if(tree->mcmc->use_data) new_lnL = GEO_Lk(tree->geo,tree);
 
1727
 
 
1728
  ratio = 0.0;
 
1729
  /* Proposal ratio: 2n-2=> number of multiplications, 1=>number of divisions */
 
1730
  ratio += 0.0*LOG(mult); /* (1-1)*LOG(mult); */
 
1731
  /* Likelihood density ratio */
 
1732
  ratio += (new_lnL - cur_lnL);
 
1733
 
 
1734
  /* printf("\n. new_lbda: %f new_sigma:%f cur_lnL:%f new_lnL:%f",new_lbda,new_sigma,cur_lnL,new_lnL); */
 
1735
 
 
1736
 
 
1737
  ratio = EXP(ratio);
 
1738
  alpha = MIN(1.,ratio);
 
1739
  u = Uni();
 
1740
  
 
1741
  if(u > alpha) /* Reject */
 
1742
    {
 
1743
      tree->geo->lbda   = cur_lbda;
 
1744
      tree->geo->sigma  = cur_sigma;
 
1745
      tree->geo->c_lnL = cur_lnL;
 
1746
    }
 
1747
  else
 
1748
    {
 
1749
      tree->mcmc->acc_move[tree->mcmc->num_move_geo_updown_lbda_sigma]++;
 
1750
    }
 
1751
 
 
1752
  tree->mcmc->run_move[tree->mcmc->num_move_geo_updown_lbda_sigma]++;
 
1753
}
 
1754
 
 
1755
//////////////////////////////////////////////////////////////
 
1756
//////////////////////////////////////////////////////////////
 
1757
 
 
1758
void GEO_Read_In_Landscape(char *file_name, t_geo *t, phydbl **ldscape, int **loc_hash, t_tree *tree)
 
1759
{
 
1760
  FILE *fp;
 
1761
  char *s,*line;
 
1762
  phydbl longitude, lattitude;
 
1763
  int i, pos, tax,loc;
 
1764
 
 
1765
  PhyML_Printf("\n");
 
1766
  PhyML_Printf("\n. Reading landscape file '%s'.\n",file_name);
 
1767
 
 
1768
  line        = (char *)mCalloc(T_MAX_LINE,sizeof(char));
 
1769
  s           = (char *)mCalloc(T_MAX_LINE,sizeof(char));
 
1770
  (*ldscape)  = (phydbl *)mCalloc(10*t->n_dim,sizeof(phydbl));
 
1771
  (*loc_hash) = (int *)mCalloc(tree->n_otu,sizeof(int));
 
1772
  
 
1773
  fp = Openfile(file_name,0);
 
1774
 
 
1775
  tax = loc = -1;
 
1776
 
 
1777
  t->ldscape_sz = 0;
 
1778
 
 
1779
  do
 
1780
    {
 
1781
      if(!fgets(line,T_MAX_LINE,fp)) break;
 
1782
 
 
1783
      // Read in taxon name
 
1784
      pos = 0;
 
1785
      do
 
1786
      {
 
1787
        while(line[pos] == ' ') pos++;
 
1788
 
 
1789
        i = 0;
 
1790
        s[0] = '\0';
 
1791
        while((line[pos] != ' ') && (line[pos] != '\n') && (line[pos] != '\t'))
 
1792
          {
 
1793
            s[i] = line[pos];
 
1794
            i++;
 
1795
            pos++;
 
1796
          }
 
1797
        s[i] = '\0';
 
1798
        
 
1799
        if(line[pos] == '\n' || line[pos] == ' ') break;
 
1800
        pos++;
 
1801
      }while(1);
 
1802
      
 
1803
      if(strlen(s) > 0) For(tax,tree->n_otu) if(!strcmp(tree->a_nodes[tax]->name,s)) break;
 
1804
 
 
1805
      if(tax == tree->n_otu)
 
1806
        {
 
1807
          PhyML_Printf("\n== Could not find a taxon with name '%s' in the tree provided.",s);
 
1808
          /* PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__); */
 
1809
          /* Exit("\n"); */
 
1810
          continue;
 
1811
        }
 
1812
 
 
1813
      
 
1814
      sscanf(line+pos,"%lf %lf",&longitude,&lattitude);
 
1815
 
 
1816
      For(loc,t->ldscape_sz)
 
1817
        {
 
1818
          if(FABS(longitude-(*ldscape)[loc*t->n_dim+0]) < 1.E-10 &&
 
1819
             FABS(lattitude-(*ldscape)[loc*t->n_dim+1]) < 1.E-10)
 
1820
            {
 
1821
              break;
 
1822
            }
 
1823
        }
 
1824
 
 
1825
      if(loc == t->ldscape_sz)
 
1826
        {
 
1827
          t->ldscape_sz++;
 
1828
          (*ldscape)[(t->ldscape_sz-1)*t->n_dim+0] = longitude;
 
1829
          (*ldscape)[(t->ldscape_sz-1)*t->n_dim+1] = lattitude;
 
1830
 
 
1831
          printf("\n. new loc: %f %f",longitude,lattitude);
 
1832
          if(!(t->ldscape_sz%10))
 
1833
            {
 
1834
              (*ldscape)  = (phydbl *)mRealloc((*ldscape),(t->ldscape_sz+10)*t->n_dim,sizeof(phydbl));
 
1835
            }
 
1836
        }
 
1837
 
 
1838
      (*loc_hash)[tax] = loc;
 
1839
      
 
1840
    }
 
1841
  while(1);
 
1842
 
 
1843
  For(tax,tree->n_otu)
 
1844
    PhyML_Printf("\n. Taxon %30s, longitude: %12f, lattitude: %12f [%4d]",
 
1845
                 tree->a_nodes[tax]->name,
 
1846
                 (*ldscape)[(*loc_hash)[tax]*t->n_dim+0],
 
1847
                 (*ldscape)[(*loc_hash)[tax]*t->n_dim+1],
 
1848
                 (*loc_hash)[tax]);
 
1849
 
 
1850
 
 
1851
}
 
1852
 
 
1853
//////////////////////////////////////////////////////////////
 
1854
//////////////////////////////////////////////////////////////
 
1855
 
 
1856
 
 
1857
 
 
1858
 
 
1859
//////////////////////////////////////////////////////////////
 
1860
//////////////////////////////////////////////////////////////
 
1861
 
 
1862
 
 
1863
//////////////////////////////////////////////////////////////
 
1864
//////////////////////////////////////////////////////////////
 
1865
 
 
1866