3
Markov chain Monte Carlo on trees (Bayesian phylogenetic analysis)
5
Ziheng YANG, December 2002
7
cc -o mcmctree -m64 -march=opteron -mtune=opteron -ansi -funsigned-char -O3 -funroll-loops -fomit-frame-pointer -finline-functions mcmctree.c tools.c -lm
8
cc -o mcmctree -march=pentiumpro -mcpu=pentiumpro -O4 -funroll-loops -fomit-frame-pointer -finline-functions mcmctree.c tools.c -lm
9
cc -o mcmctree -march=athlon -mcpu=athlon -O4 -funroll-loops -fomit-frame-pointer -finline-functions mcmctree.c tools.c -lm
11
cc -o infinitesites -DINFINITESITES -march=athlon -mcpu=athlon -Wall -O2 -funroll-loops -fomit-frame-pointer -finline-functions mcmctree.c tools.c -lm
13
cl -O2 -W3 mcmctree.c tools.c
14
cl -DHardHard -FemcmctreeHH.exe -O2 mcmctree.c tools.c
15
cl -DHardSoft -FemcmctreeHS.exe -O2 mcmctree.c tools.c
17
mcmctree <ControlFileName>
32
#define NBRANCH (NS*2-2)
33
#define NNODE (NS*2-1)
35
#define NGENE 1000 /* used for gnodes[NGENE] */
39
#define MaxNFossils 200
42
extern int noisy, NFunCall;
45
int GetOptions(char *ctlf);
46
int ReadTreeSeqs(FILE*fout);
47
int ProcessFossilInfo();
48
int ReadBlengthGH (char infile[]);
49
int GenerateBlengthGH (char infile[]);
52
int UseLocus(int locus, int copycondP, int setmodel, int setSeqName);
53
int AcceptRejectLocus(int locus, int accept);
54
void switchconPin(void);
55
int SetPGene(int igene, int _pi, int _UVRoot, int _alpha, double xcom[]);
56
int DownSptreeSetTime(int inode);
57
void getSinvDetS(double space[]);
58
int GetInitials(void);
59
int GenerateGtree(int locus);
60
void printGtree(int printBlength);
61
int SetParameters(double x[]);
62
int ConditionalPNode(int inode, int igene, double x[]);
63
double lnpData(double lnpDi[]);
64
double lnpD_locus(int locus);
65
double lnpD_locus_Approx(int locus);
66
double lnptNCgiventC(void);
68
int SetupPriorTimesFossilErrors(void);
69
double lnpriorTimes(void);
70
double lnpriorRates(void);
71
double logPriorRatioGamma(double xnew, double xold, double a, double b);
72
void copySptree(void);
73
void printSptree(void);
74
double Infinitesites (void);
75
double InfinitesitesClock (double *FixedDs);
76
int collectx (FILE* fout, double x[]);
78
int LabelOldCondP(int spnode);
79
double UpdateTimes(double *lnL, double finetune);
80
double UpdateTimesClock23(double *lnL, double finetune);
81
double UpdateRates(double *lnL, double finetune);
82
double UpdateRatesClock(double *lnL, double finetune);
83
double UpdateParameters(double *lnL, double finetune);
84
double UpdateParaRates(double finetune, double space[]);
85
double mixing(double finetune);
86
double UpdatePFossilErrors(double finetune);
87
int getPfossilerr (double postEFossil[], double nround);
88
int DescriptiveStatisticsSimpleMCMCTREE (FILE *fout, char infile[], int nbin, int nrho);
91
char *z[NS], *spname[NS], seqf[256],outf[256],treef[256],daafile[96],mcmcf[96],inBVf[96];
92
char oldconP[NNODE]; /* update conP for node? (0 yes; 1 no) */
93
int seqtype, ns, ls, ngene, posG[2],lgene[1], *pose, npatt, readpattern;
94
int np, ncode, ntime, nrate, nrgene, nalpha, npi, ncatG, print, seed;
96
int model, clock, fix_kappa, fix_omega, fix_alpha, fix_rgene, Mgene;
97
int method, icode, codonf, aaDist, NSsites;
98
double *fpatt, kappa, omega, alpha;
99
double rgene[NGENE],piG[NGENE][NCODE]; /* not used */
100
double (*plfun)(double x[],int np), freqK[NCATG], rK[NCATG], *conP, *fhK;
102
int curconP; /* curconP = 0 or 1 */
104
double *conPin[2], space[100000]; /* fixed space unsafe */
105
int conPSiteClass, NnodeScale;
106
char *nodeScale; /* nScale[ns-1] for interior nodes */
107
double *nodeScaleF; /* nScaleF[npatt] for scale factors */
111
int nbranch, nnode, root, branches[NBRANCH][2];
113
struct TREEN { /* ipop is node number in species tree */
114
int father, nson, sons[2], ibranch, ipop;
115
double branch, age, label, *conP;
116
char *nodeStr, fossil;
117
} *nodes, **gnodes, nodes_t[2*NS-1];
119
/* nodes_t[] is working space. nodes is a pointer and moves around.
120
gnodes[] holds the gene trees, subtrees constructed from the master species
121
tree. Each locus has a full set of rates (rates) for all branches on the
122
master tree, held in sptree.nodes[].rates. Branch lengths in the gene
123
tree are calculated by using those rates and the divergence times.
125
gnodes[][].label in the gene tree is used to store branch lengths estimated
126
under no clock when mcmc.usedata=2 (normal approximation to likelihood).
131
int nbranch, nnode, root, nspecies, nfossil;
132
double RootAge[4], Pfossilerr, *CcomFossilErr;
134
char name[LSPNAME+1], fossil, usefossil; /* fossil: 0, 1(L), 2(U), 3(B), 4(G) */
135
int father, nson, sons[2];
136
double age, pfossil[7]; /* parameters in fossil distribution */
137
double *rates; /* log rates for loci */
140
/* all trees are binary & rooted, with ancestors unknown. */
143
struct DATA { /* locus-specific data and tree information */
144
int ns[NGENE], ls[NGENE], npatt[NGENE], ngene, lgene[NGENE];
145
int root[NGENE+1], conP_offset[NGENE];
146
int priortime, priorrate;
147
char *z[NGENE][NS], cleandata[NGENE];
148
double *fpatt[NGENE], lnpT, lnpR, lnpDi[NGENE];
149
double pi[NGENE][NCODE];
150
double rgene[NGENE], kappa[NGENE], omega[NGENE], alpha[NGENE];
151
double birth, death, sampling;
152
double rgenegamma[2], kappagamma[2], omegagamma[2], alphagamma[2], sigma2gamma[2];
153
double pfossilerror[3]; /* (p_beta, q_beta, NminCorrect) */
154
double sigma2[NGENE]; /* sigma2[g] are the variances */
155
double *blMLE[NGENE], *Gradient[NGENE], *Hessian[NGENE];
159
struct MCMCPARAMETERS {
160
int burnin, nsample, sampfreq, usedata, saveconP, print;
162
} mcmc; /* control parameters */
165
char *models[]={"JC69","K80","F81","F84","HKY85","T92","TN93","REV"};
166
enum {JC69, K80, F81, F84, HKY85, T92, TN93, REV} MODELS;
169
double PMat[16], Cijk[64], Root[4];
170
double _rateSite=1, OldAge=999;
171
int LASTROUND=0, BayesEB, debug=0, testlnL=0, NPMat=0; /* no use for this */
173
/* for sptree.nodes[].fossil: lower, upper, bounds, gamma, inverse-gamma */
174
enum {LOWER_F=1, UPPER_F, BOUND_F, GAMMA_F, SKEWN_F, SKEWT_F, S2N_F} FOSSIL_FLAGS;
175
char *fossils[]={" ", "L", "U", "B", "G", "SN", "ST", "S2N"};
176
int npfossils[]={ 0, 4, 2, 4, 2, 3, 4, 7};
177
char *clockstr[]={"", "Global clock", "Independent rates", "Autocorrelated rates"};
178
enum {SQRT_B=1, LOG_B, JC_B} B_Transforms;
179
char *Btransform[]={"", "square root", "logarithm", "JC"};
185
int main (int argc, char *argv[])
187
char ctlf[] = "mcmctree.ctl";
189
FILE *fout, *fseed = (FILE*)gfopen("SeedUsed", "w");
191
data.priortime = 0; /* 0: BDS; 1: beta */
192
data.priorrate = 0; /* 0: LogNormal; 1: gamma */
195
com.alpha=0.; com.ncatG=1;
196
com.ncode=4; com.clock=1;
198
printf("MCMCTREE in %s\n", VerStr);
200
{ strcpy(ctlf, argv[1]); printf ("\nctlfile reset to %s.\n", ctlf); }
202
data.birth=2; data.death=1; data.sampling=0.05;
203
com.cleandata=0; mcmc.usedata=2;
204
strcpy(com.outf, "out");
205
strcpy(com.mcmcf, "mcmc.out");
209
fout=gfopen(com.outf,"w");
211
fprintf(fout, "MCMCTREE (%s) %s\n", VerStr, com.seqf);
213
com.seed = abs((2*(int)time(NULL)+1));
216
fprintf(fseed, "%d\n", com.seed); fclose(fseed);
220
if(mcmc.usedata==1) {
221
if(com.seqtype!=0) error2("usedata = 1 for nucleotides only");
225
if (com.ncatG<2 || com.ncatG>NCATG) error2("ncatG");
228
if (com.model>HKY85) error2("Only HKY or simpler models are allowed.");
229
if (com.model==JC69 || com.model==F81) { com.fix_kappa=1; com.kappa=1; }
231
else if (mcmc.usedata==2) {
234
if(com.seqtype==1) com.ncode = 61;
235
if(com.seqtype==2) com.ncode = 20;
237
else if(mcmc.usedata==3) {
238
GenerateBlengthGH("out.BV");
242
/* Do we want RootAge constraint at the root? */
243
if(sptree.RootAge[1]<=0 && sptree.nodes[sptree.root].fossil<=LOWER_F)
244
error2("set RootAge in control file when there is no upper bound on root");
245
if(data.pfossilerror[0]==0 && !sptree.nodes[sptree.root].fossil) {
246
if(sptree.RootAge[1] <= 0)
247
error2("set RootAge in control file when there is no upper bound on root");
249
sptree.nodes[sptree.root].fossil = (sptree.RootAge[0]>0 ? BOUND_F : UPPER_F);
251
sptree.nodes[sptree.root].pfossil[i] = sptree.RootAge[i];
253
printf("\nFossil calibration information used.\n");
254
for(i=sptree.nspecies; i<sptree.nspecies*2-1; i++) {
255
if((k=sptree.nodes[i].fossil) == 0) continue;
256
printf("Node %3d: %3s ( ", i+1, fossils[k]);
257
for(j=0; j<npfossils[k]; j++) {
258
printf("%6.4f", sptree.nodes[i].pfossil[j + (k==UPPER_F)]);
259
printf("%s", (j==npfossils[k]-1 ? " )\n" : ", "));
263
#if(defined INFINITESITES)
275
/* This allocates memory for conditional probabilities (conP).
276
gnodes[locus] is not allocated here but in GetGtree().
278
Conditional probabilities for internal nodes are com.conPin[2], allocated
279
according to data.ns[locus] and data.npatt[locus] at all loci. Two copies
280
of the space are allocated, hence the [2]. The copy used for the current
281
gene trees is com.conPin[com.curconP] while the copy for proposed gene trees
282
is com.conPin[!com.curconP]. data.conP_offset[locus] marks the starting
283
position in conPin[] for each locus.
285
Memory arrangement if(com.conPSiteClass=1):
286
ncode*npatt for each node, by node, by iclass, by locus
288
int locus,j,k, s1,sG=1, sfhK=0, g=data.ngene;
289
double *conP, *rates;
291
/* get mem for conP (internal nodes) */
292
if(mcmc.usedata==1) {
293
if(!com.fix_alpha && mcmc.saveconP) {
294
com.conPSiteClass=1; sG=com.ncatG;
296
data.conP_offset[0] = 0;
297
for(locus=0,com.sconP=0; locus<g; locus++) {
298
s1= com.ncode * data.npatt[locus];
299
com.sconP += sG*s1*(data.ns[locus]-1)*sizeof(double);
301
data.conP_offset[locus+1] = data.conP_offset[locus] + sG*s1*(data.ns[locus]-1);
304
if((com.conPin[0]=(double*)malloc(2*com.sconP))==NULL)
307
com.conPin[1] = com.conPin[0] + com.sconP/sizeof(double);
308
printf("\n%u bytes for conP\n", 2*com.sconP);
310
/* set gnodes[locus][].conP for tips and internal nodes */
312
for(locus=0; locus<g; locus++) {
313
conP = com.conPin[0] + data.conP_offset[locus];
314
for(j=data.ns[locus]; j<data.ns[locus]*2-1; j++,conP+=com.ncode*data.npatt[locus])
315
gnodes[locus][j].conP = conP;
316
if(!data.cleandata[locus]) {
317
/* Is this call to UseLocus still needed? Ziheng 28/12/2009 */
318
UseLocus(locus, 0, mcmc.usedata, 0);
323
for(locus=0; locus<g; locus++)
324
sfhK = max2(sfhK, data.npatt[locus]);
325
sfhK *= com.ncatG*sizeof(double);
326
if((com.fhK=(double*)realloc(com.fhK,sfhK))==NULL) error2("oom");
330
else if(mcmc.usedata==2) { /* allocate data.Gradient & data.Hessian */
331
for(locus=0,k=0; locus<data.ngene; locus++)
332
k += (2*data.ns[locus]-1)*(2*data.ns[locus]-1+2);
333
if((com.conPin[0]=(double*)malloc(k*sizeof(double)))==NULL)
335
for(j=0; j<k; j++) com.conPin[0][j]=-1;
336
for(locus=0,j=0; locus<data.ngene; locus++) {
337
data.blMLE[locus] = com.conPin[0]+j;
338
data.Gradient[locus] = com.conPin[0]+j+(2*data.ns[locus]-1);
339
data.Hessian[locus] = com.conPin[0]+j+(2*data.ns[locus]-1)*2;
340
j += (2*data.ns[locus]-1)*(2*data.ns[locus]-1+2);
344
if(com.clock>1) { /* space for rates */
345
s1 = (sptree.nspecies*2-1)*g*sizeof(double);
346
if(noisy) printf("%d bytes for rates.\n", s1);
347
if((rates=(double*)malloc(s1))==NULL) error2("oom for rates");
348
for(j=0; j<sptree.nspecies*2-1; j++)
349
sptree.nodes[j].rates = rates+g*j;
358
for(locus=0; locus<data.ngene; locus++)
363
if(mcmc.usedata==1) {
364
for(locus=0; locus<data.ngene; locus++) {
365
free(data.fpatt[locus]);
366
for(j=0;j<data.ns[locus]; j++)
367
free(data.z[locus][j]);
371
free(sptree.nodes[0].rates);
373
if(mcmc.usedata==1 && com.alpha)
378
int UseLocus (int locus, int copyconP, int setModel, int setSeqName)
381
This point nodes to the gene tree at locus gnodes[locus] and set com.z[]
382
etc. for likelihood calculation for the locus. Note that the gene tree
383
topology (gnodes[]) is never copied, but nodes[].conP are repositioned in the
384
algorithm. The pointer for root gnodes[][com.ns].conP is assumed to be the
385
start of the whole block for the locus.
386
If (copyconP && mcmc.useData), the conP for internal nodes point
387
to a fixed place (indicated by data.conP_offset[locus]) in the alternative
388
space com.conPin[!com.curconP]. Note that the conP for each locus uses the
389
correct space so that this routine can be used by all the proposal steps,
390
some of which operates on one locus and some change all loci.
392
Try to replace this with UseLocus() for B&C.
394
int i, s1 = com.ncode*data.npatt[locus], sG = (com.conPSiteClass?com.ncatG:1);
395
double *conPt=com.conPin[!com.curconP]+data.conP_offset[locus];
397
com.ns = data.ns[locus];
398
com.ls = data.ls[locus];
399
tree.root = data.root[locus];
400
tree.nnode = 2*com.ns-1;
401
nodes = gnodes[locus];
402
if(copyconP && mcmc.usedata==1) { /* this preserves the old conP. */
403
memcpy(conPt, gnodes[locus][com.ns].conP, sG*s1*(com.ns-1)*sizeof(double));
404
for(i=com.ns; i<tree.nnode; i++)
405
nodes[i].conP = conPt+(i-com.ns)*s1;
408
if(setModel && mcmc.usedata==1) {
409
com.cleandata = data.cleandata[locus];
410
for(i=0; i<com.ns; i++)
411
com.z[i] = data.z[locus][i];
412
com.npatt = com.posG[1] = data.npatt[locus];
414
com.fpatt = data.fpatt[locus];
416
/* The following is model-dependent */
417
com.kappa = data.kappa[locus];
418
com.omega = data.omega[locus];
419
com.alpha = data.alpha[locus];
421
xtoy(data.pi[locus], com.pi, com.ncode);
423
EigenTN93(com.model, com.kappa, com.kappa, com.pi, &nR, Root, Cijk);
426
DiscreteGamma (com.freqK,com.rK,com.alpha,com.alpha,com.ncatG,DGammaMean);
428
com.NnodeScale = data.NnodeScale[locus];
429
com.nodeScale = data.nodeScale[locus];
430
nS = com.NnodeScale*com.npatt * (com.conPSiteClass?com.ncatG:1);
436
for(i=0;i<com.ns;i++) com.spname[i] = sptree.nodes[nodes[i].ipop].name;
442
int AcceptRejectLocus (int locus, int accept)
444
/* This accepts or rejects the proposal at one locus.
445
This works for proposals that change one locus only.
446
After UseLocus(), gnodes[locus][ns].conP points to the alternative
447
conP space. If the proposal is accepted, this copies the newly calculated
448
conP into gnodes[locus][ns].conP. In either case, gnodes[].conP is
450
Proposals that change all loci use switchconP() to accept the proposal.
452
int i, ns=data.ns[locus], s1=com.ncode*data.npatt[locus], sG=1;
453
double *conP=com.conPin[com.curconP]+data.conP_offset[locus];
455
if(mcmc.usedata==1) {
456
if(com.conPSiteClass) sG=com.ncatG;
458
memcpy(conP, gnodes[locus][ns].conP, sG*s1*(ns-1)*sizeof(double));
459
for(i=ns; i<ns*2-1; i++)
460
gnodes[locus][i].conP = conP+(i-ns)*s1;
465
void switchconPin(void)
467
/* This reposition pointers gnodes[locus].conP to the alternative com.conPin,
468
to avoid recalculation of conditional probabilities, when a proposal is
469
accepted in steps that change all loci in one go, such as UpdateTimes()
470
and UpdateParameters().
471
Note that for site-class models (com.conPSiteClass), gnodes[].conP points
472
to the space for class 0, and the space for class 1 starts (ns-1)*ncode*npatt
473
later. Such repositioning for site classes is achieved in fx_r().
478
com.curconP =! com.curconP;
480
for(locus=0; locus<data.ngene; locus++) {
481
conP = com.conPin[com.curconP] + data.conP_offset[locus];
482
for(i=data.ns[locus]; i<data.ns[locus]*2-1; i++,conP+=com.ncode*data.npatt[locus])
483
gnodes[locus][i].conP = conP;
489
int SetPGene(int igene, int _pi, int _UVRoot, int _alpha, double xcom[])
491
/* This is not used. */
492
if(com.ngene!=1) error2("com.ngene==1?");
497
int SetParameters (double x[])
499
/* This is not used. */
503
int GetPMatBranch2 (double PMat[], double t)
505
/* This calculates the transition probability matrix.
507
double Qrates[2], T,C,A,G,Y,R, mr;
510
Qrates[0]=Qrates[1]=com.kappa;
513
PMatK80(PMat, t, com.kappa);
514
else if(com.model<=TN93) {
515
T=com.pi[0]; C=com.pi[1]; A=com.pi[2]; G=com.pi[3]; Y=T+C; R=A+G;
516
if (com.model==F84) {
517
Qrates[0]=1+com.kappa/Y; /* kappa1 */
518
Qrates[1]=1+com.kappa/R; /* kappa2 */
520
else if (com.model<=HKY85) Qrates[1]=Qrates[0];
521
mr=1/(2*T*C*Qrates[0] + 2*A*G*Qrates[1] + 2*Y*R);
522
PMatTN93(PMat, t*mr*Qrates[0], t*mr*Qrates[1], t*mr, com.pi);
529
int ConditionalPNode (int inode, int igene, double x[])
531
int n=com.ncode, i,j,k,h, ison;
534
for(i=0;i<nodes[inode].nson;i++) {
535
ison=nodes[inode].sons[i];
536
if (nodes[ison].nson>0 && !com.oldconP[ison])
537
ConditionalPNode(ison, igene, x);
539
for(i=0;i<com.npatt*n;i++) nodes[inode].conP[i] = 1;
541
for(i=0; i<nodes[inode].nson; i++) {
542
ison = nodes[inode].sons[i];
544
t = nodes[ison].branch*_rateSite;
546
printf("\nt =%12.6f ratesite = %.6f", nodes[ison].branch, _rateSite);
547
error2("blength < 0");
550
GetPMatBranch2(PMat, t);
552
if (nodes[ison].nson<1 && com.cleandata) { /* tip && clean */
553
for(h=0; h<com.npatt; h++)
555
nodes[inode].conP[h*n+j] *= PMat[j*n+com.z[ison][h]];
557
else if (nodes[ison].nson<1 && !com.cleandata) { /* tip & unclean */
558
for(h=0; h<com.npatt; h++)
560
for(k=0,t=0; k<nChara[com.z[ison][h]]; k++)
561
t += PMat[j*n+CharaMap[com.z[ison][h]][k]];
562
nodes[inode].conP[h*n+j] *= t;
566
for(h=0; h<com.npatt; h++)
568
for(k=0,t=0; k<n; k++)
569
t += PMat[j*n+k]*nodes[ison].conP[h*n+k];
570
nodes[inode].conP[h*n+j] *= t;
576
/* node scaling. Is this coded? Please check. */
577
if(com.NnodeScale && com.nodeScale[inode])
578
NodeScale(inode, 0, com.npatt);
583
double lnpD_locus (int locus)
585
/* This calculates ln{Di|Gi, Bi} using times in sptree.nodes[].age and rates.
586
Rates are in data.rgene[] if (clock==1) and in sptree.nodes[].rates if
587
(clock==0). Branch lengths in the gene tree, nodes[].branch, are calculated
588
in this routine but they may not be up-to-date before calling this routine.
589
UseLocus() is called before this routine.
594
if (mcmc.usedata==0) return(0);
595
if(com.clock==1) { /* clock */
596
for(i=0; i<tree.nnode; i++) /* age in gene tree */
597
nodes[i].age = sptree.nodes[nodes[i].ipop].age;
598
for(i=0; i<tree.nnode; i++) {
600
nodes[i].branch = (nodes[nodes[i].father].age-nodes[i].age) * data.rgene[locus];
603
else { /* independent & correlated rates */
604
for(i=0; i<tree.nnode; i++) {
605
if(i==tree.root) continue;
606
for(j=nodes[i].ipop,b=0; j!=nodes[nodes[i].father].ipop; j=dad) {
607
dad = sptree.nodes[j].father;
608
t = sptree.nodes[dad].age-sptree.nodes[j].age;
609
b += t * sptree.nodes[j].rates[locus];
615
lnL = -com.plfun(NULL, -1);
616
else if(mcmc.usedata==2)
617
lnL = lnpD_locus_Approx(locus);
622
double lnpData (double lnpDi[])
624
/* This calculates the log likelihood, the log of the probability of the data
625
given gtree[] for each locus.
626
This updates gnodes[locus][].conP for every node.
632
FOR(j,sptree.nspecies*2-1) com.oldconP[j]=0;
633
for(locus=0; locus<data.ngene; locus++) {
634
UseLocus(locus,0, mcmc.usedata, 1);
635
y = lnpD_locus(locus);
637
if(testlnL && fabs(lnpDi[locus]-y)>1e-5)
638
printf("\tlnLi %.6f != %.6f at locus %d\n", lnpDi[locus],y,locus+1);
646
double lnpD_locus_Approx (int locus)
648
/* This calculates the likelihood using the normal approxiamtion (Thorne et al.
649
1998). The branch lengths on the unrooted tree estimated without clock have
650
been read into nodes[][].label and the gradient and Hessian are in data.Gradient[]
651
& data.Hessian[]. The branch lengths predicted from the rate-evolution
652
model (that is, products of rates and times) are in nodes[].branch.
653
The tree is rooted, and the two branch lengths around the root are summed up
654
and treated as one branch length, compared with the MLE, which is stored as
655
the branch length to the first son of the root.
656
This is different from funSS_AHRS(), which has the data branch lengths in
657
nodes[].branch and calculate the predicted likelihood by multiplying nodes[].age
658
and nodes[].label (which holds the rates). Think about a redesign to avoid confusion.
660
int debug=0, i,j, ib, nb=tree.nnode-1-1; /* don't use tree.nbranch, which is not up to date. */
661
int son1=nodes[tree.root].sons[0], son2=nodes[tree.root].sons[1];
662
double lnL=0, z[NS*2-1], *g=data.Gradient[locus], *H=data.Hessian[locus], JCc=(com.ncode-1.0)/com.ncode;
664
/* construct branch lengths */
665
for(j=0; j<tree.nnode; j++) {
666
if(j==tree.root || j==son2) continue;
667
ib = nodes[j].ibranch;
669
z[ib] = nodes[son1].branch + nodes[son2].branch;
671
z[ib] = nodes[j].branch;
673
if(data.transform==SQRT_B) /* sqrt transform */
675
else if(data.transform==LOG_B) /* logarithm transform */
677
else if(data.transform==JC_B) /* JC transform */
678
z[ib] = 2*asin(sqrt( JCc - JCc*exp(-z[ib]/JCc) ));
680
z[ib] -= data.blMLE[locus][ib];
685
OutTreeN(F0,1,1); FPN(F0);
686
matout(F0, z, 1, nb);
687
matout(F0, data.blMLE[locus], 1, nb);
692
for(i=0; i<nb; i++) {
693
lnL += z[i]*z[i]*H[i*nb+i]/2;
695
lnL += z[i]*z[j]*H[i*nb+j];
701
int ReadBlengthGH (char infile[])
703
/* this reads the MLEs of branch lengths under no clock and their SEs, for
704
approximate calculation of sequence likelihood. The MLEs of branch lengths
705
are stored in data.blMLE[locus] as well as nodes[].label, and the gradient
706
and Hessian in data.Gradient[locsu][] and data.Hessian[locus][].
707
The branch length (sum of 2 branch lengths) around root in rooted tree is
708
placed on 1st son of root in rooted tree.
710
This also frees up memory for sequences.
712
FILE* fBGH = gfopen(infile,"r");
714
int locus, i, j, nb, son1, son2, leftsingle;
715
double small = 1e-20;
716
double dbu[NBRANCH], dbu2[NBRANCH], u, JCc=(com.ncode-1.0)/com.ncode, sin2u, cos2u;
719
if(noisy) printf("\nReading branch lengths, Gradient & Hessian from %s.\n", infile);
721
for(locus=0; locus<data.ngene; locus++) {
722
UseLocus(locus, 0, 0, 1);
723
printf("\nLocus %d: %d species\n", locus+1, com.ns);
726
fscanf(fBGH, "%d", &i);
728
printf("ns = %d, expecting %d", i, com.ns);
729
error2("ns not correct in ReadBlengthGH()");
731
if(ReadTreeN(fBGH, &i, &j, 0, 1))
732
error2("error when reading gene tree");
733
if(i==0) error2("gene tree should have branch lengths for error checking");
734
if((nb=tree.nbranch) != com.ns*2-3)
735
error2("nb = ns * 2 -3 ?");
738
FPN(F0); OutTreeN(F0,1,1); FPN(F0);
740
for(i=0; i<tree.nnode; i++) {
741
printf("\nnode %2d: branch %2d (%9.5f) dad %2d ", i+1, nodes[i].ibranch+1, nodes[i].branch, nodes[i].father+1);
742
for(j=0; j<nodes[i].nson; j++) printf(" %2d", nodes[i].sons[j]+1);
747
fscanf(fBGH, "%lf", &data.blMLE[locus][i]);
749
fscanf(fBGH, "%lf", &data.Gradient[locus][i]);
751
fscanf(fBGH, "%s", line);
752
if(!strstr(line,"Hessian")) error2("expecting Hessian in in.BV");
755
if(fscanf(fBGH, "%lf", &data.Hessian[locus][i*nb+j]) != 1)
756
error2("err when reading the hessian matrix in.BV");
758
UseLocus(locus, 0, 0, 1);
760
son1 = nodes[tree.root].sons[0];
761
son2 = nodes[tree.root].sons[1];
762
leftsingle = (nodes[son1].nson==0);
763
if(leftsingle) { /* Root in unrooted tree is 2nd son of root in rooted tree */
764
nodes[son1].ibranch = com.ns*2-3-1; /* last branch in unrooted tree, assuming binary tree */
765
for(i=0; i<tree.nnode; i++) {
766
if(i!=tree.root && i!=son1 && i!=son2)
767
nodes[i].ibranch -= 2;
770
else { /* Root in unrooted tree is 1st son of root in rooted tree */
771
nodes[son1].ibranch = nodes[son2].ibranch - 1;
772
for(i=0; i<tree.nnode; i++) {
773
if(i!=tree.root && i!=son1 && i!=son2)
777
nodes[tree.root].ibranch = nodes[son2].ibranch = -1;
778
for(i=0; i<tree.nnode; i++)
779
nodes[i].branch = (nodes[i].ibranch==-1 ? 0 : data.blMLE[locus][nodes[i].ibranch]);
782
FPN(F0); FPN(F0); OutTreeN(F0,1,1);
783
for(i=0; i<tree.nnode; i++) {
784
printf("\nnode %2d: branch %2d (%9.5f) dad %2d ", i+1, nodes[i].ibranch+1, nodes[i].branch, nodes[i].father+1);
785
for(j=0; j<nodes[i].nson; j++) printf(" %2d", nodes[i].sons[j]+1);
790
if(data.transform==SQRT_B) /* sqrt transform on branch lengths */
791
for(i=0; i<nb; i++) {
792
dbu[i] = 2*sqrt(data.blMLE[locus][i]);
795
else if(data.transform==LOG_B) /* logarithm transform on branch lengths */
796
for(i=0; i<nb; i++) {
797
if(data.blMLE[locus][i] < 1e-50) error2("blength should be > 0 for log transform");
798
dbu[i] = data.blMLE[locus][i];
799
dbu2[i] = dbu[i]*dbu[i];
801
else if(data.transform==JC_B) /* JC transform on branch lengths */
802
for(i=0; i<nb; i++) {
803
if(data.blMLE[locus][i] < 1e-50)
804
error2("blength should be > 0 for JC transform");
805
u = 2*asin(sqrt(JCc-JCc*exp(- data.blMLE[locus][i]/JCc )));
806
sin2u = sin(u/2); cos2u = cos(u/2);
807
dbu[i] = sin2u*cos2u/(1-sin2u*sin2u/JCc);
808
dbu2[i] = (cos2u*cos2u-sin2u*sin2u)/2/(1-sin2u*sin2u/JCc) + dbu[i]*dbu[i]/JCc;
812
if(data.transform) { /* this part is common to all transforms */
813
for(i=0; i<nb; i++) {
815
data.Hessian[locus][j*nb+i] = data.Hessian[locus][i*nb+j] *= dbu[i]*dbu[j];
816
data.Hessian[locus][i*nb+i] = data.Hessian[locus][i*nb+i] * dbu[i]*dbu[i]
817
+ data.Gradient[locus][i] * dbu2[i];
820
data.Gradient[locus][i] *= dbu[i];
823
if(data.transform==SQRT_B) /* sqrt transform on branch lengths */
825
data.blMLE[locus][i] = sqrt(data.blMLE[locus][i]);
826
else if(data.transform==LOG_B) /* logarithm transform on branch lengths */
828
data.blMLE[locus][i] = log(data.blMLE[locus][i]);
829
else if(data.transform==JC_B) /* JC transform on branch lengths */
831
data.blMLE[locus][i] = 2*asin(sqrt(JCc-JCc*exp(-data.blMLE[locus][i]/JCc )));
836
/* free up memory for sequences */
837
for(locus=0; locus<data.ngene; locus++) {
838
free(data.fpatt[locus]);
839
for(j=0;j<data.ns[locus]; j++)
840
free(data.z[locus][j]);
847
int GenerateBlengthGH (char infile[])
849
/* This generates the sequence alignment and tree files for calculating branch
850
lengths, gradient, and hessian.
851
This mimics Jeff Thorne's estbranches program.
853
FILE *fseq, *ftree, *fctl;
854
FILE *fBGH = gfopen(infile, "w");
855
char tmpseqf[32], tmptreef[32], ctlf[32], outf[32], line[10000];
859
for(locus=0; locus<data.ngene; locus++) {
860
printf("\n\n*** Locus %d ***\n", locus+1);
861
sprintf(tmpseqf, "tmp%d.txt", locus+1);
862
sprintf(tmptreef, "tmp%d.trees", locus+1);
863
sprintf(ctlf, "tmp%d.ctl", locus+1);
864
sprintf(outf, "tmp%d.out", locus+1);
865
fseq = gfopen(tmpseqf,"w");
866
ftree = gfopen(tmptreef,"w");
867
fctl = gfopen(ctlf,"w");
869
UseLocus(locus, 0, 0, 1);
870
com.cleandata = data.cleandata[locus];
871
for(i=0; i<com.ns; i++)
872
com.z[i] = data.z[locus][i];
873
com.npatt = com.posG[1]=data.npatt[locus];
875
com.fpatt = data.fpatt[locus];
880
fprintf(ftree, " 1\n\n");
881
OutTreeN(ftree, 1, 0); FPN(ftree);
883
fprintf(fctl, "seqfile = %s\n", tmpseqf);
884
fprintf(fctl, "treefile = %s\n", tmptreef);
885
fprintf(fctl, "outfile = %s\nnoisy = 3\n", outf);
887
fprintf(fctl, "seqtype = %d\n", com.seqtype);
889
fprintf(fctl, "model = %d\n", com.model);
891
fprintf(fctl, "icode = %d\n", com.icode);
892
fprintf(fctl, "fix_kappa = 0\n kappa = 2\n");
893
fprintf(fctl, "fix_omega = 0\n omega = 0.5\n");
896
fprintf(fctl, "aaRatefile = %s\n", com.daafile);
899
fprintf(fctl, "fix_alpha = 0\nalpha = 0.5\nncatG = %d\n", com.ncatG);
900
fprintf(fctl, "Small_Diff = 0.1e-6\ngetSE = 2\n");
901
fprintf(fctl, "method = %d\n", (com.alpha||com.ns<10 ? 0 : 1));
903
fclose(fseq); fclose(ftree); fclose(fctl);
905
if(com.seqtype) sprintf(line, "codeml %s", ctlf);
906
else sprintf(line, "baseml %s", ctlf);
907
printf("running %s\n", line);
910
appendfile(fBGH, "rst2");
917
int GetOptions (char *ctlf)
919
int iopt,i, nopt=27, lline=4096;
920
char line[4096],*pline, opt[32], *comment="*#";
921
char *optstr[] = {"seed", "seqfile","treefile", "outfile", "mcmcfile",
922
"seqtype", "aaRatefile", "icode", "usedata", "ndata", "model", "clock",
923
"RootAge", "fossilerror", "alpha", "ncatG", "cleandata",
924
"BDparas", "kappa_gamma", "alpha_gamma",
925
"rgene_gamma", "sigma2_gamma", "print", "burnin", "sampfreq",
926
"nsample", "finetune"};
927
double t=1, *eps=mcmc.finetune;
928
FILE *fctl=gfopen (ctlf, "r");
930
strcpy(com.inBVf, "in.BV");
932
if (noisy) printf ("\nReading options from %s..\n", ctlf);
934
if (fgets (line, lline, fctl) == NULL) break;
935
for (i=0,t=0,pline=line; i<lline&&line[i]; i++)
936
if (isalnum(line[i])) { t=1; break; }
937
else if (strchr(comment,line[i])) break;
939
sscanf (line, "%s%*s%lf", opt, &t);
940
if ((pline=strstr(line, "="))==NULL) error2 ("option file.");
942
for (iopt=0; iopt<nopt; iopt++) {
943
if (strncmp(opt, optstr[iopt], 8)==0) {
945
printf ("\n%3d %15s | %-20s %6.2f", iopt+1,optstr[iopt],opt,t);
947
case ( 0): com.seed=(int)t; break;
948
case ( 1): sscanf(pline+1, "%s", com.seqf); break;
949
case ( 2): sscanf(pline+1, "%s", com.treef); break;
950
case ( 3): sscanf(pline+1, "%s", com.outf); break;
951
case ( 4): sscanf(pline+1, "%s", com.mcmcf); break;
952
case ( 5): com.seqtype=(int)t; break;
953
case ( 6): sscanf(pline+2,"%s", com.daafile); break;
954
case ( 7): com.icode=(int)t; break;
955
case ( 8): sscanf(pline+1, "%d %s%d", &mcmc.usedata, com.inBVf, &data.transform);
956
if(mcmc.usedata==2 && strchr(com.inBVf, '*'))
957
strcpy(com.inBVf, "in.BV");
959
case ( 9): com.ndata=(int)t; break;
960
case (10): com.model=(int)t; break;
961
case (11): com.clock=(int)t; break;
963
sptree.RootAge[2] = sptree.RootAge[3] = 0.025; /* default tail probs */
964
if((strchr(line, '>') || strchr(line, '<')) && (strstr(line, "U(") || strstr(line, "B(")))
965
error2("don't mix < U B on the RootAge line");
967
if((pline=strchr(line, '>')))
968
sscanf(pline+1, "%lf", &sptree.RootAge[0]);
969
if((pline=strchr(line,'<'))) { /* RootAge[0]=0 */
970
sscanf(pline+1, "%lf", &sptree.RootAge[1]);
972
if((pline=strstr(line, "U(")))
973
sscanf(pline+2, "%lf,%lf", &sptree.RootAge[1], &sptree.RootAge[2]);
974
else if((pline=strstr(line, "B(")))
975
sscanf(pline+2, "%lf,%lf,%lf,%lf", &sptree.RootAge[0], &sptree.RootAge[1], &sptree.RootAge[2], &sptree.RootAge[3]);
978
data.pfossilerror[0] = 0.0;
979
data.pfossilerror[2] = 2; /* default: minimum 2 good fossils */
980
sscanf(pline+1, "%lf%lf%lf", data.pfossilerror, data.pfossilerror+1, data.pfossilerror+2);
982
case (14): com.alpha=t; break;
983
case (15): com.ncatG=(int)t; break;
984
case (16): com.cleandata=(int)t; break;
986
sscanf(pline+1,"%lf%lf%lf", &data.birth,&data.death,&data.sampling);
989
sscanf(pline+1,"%lf%lf", data.kappagamma, data.kappagamma+1); break;
991
sscanf(pline+1,"%lf%lf", data.alphagamma, data.alphagamma+1); break;
993
sscanf(pline+1,"%lf%lf", data.rgenegamma, data.rgenegamma+1); break;
995
sscanf(pline+1,"%lf%lf", data.sigma2gamma, data.sigma2gamma+1); break;
996
case (22): mcmc.print=(int)t; break;
997
case (23): mcmc.burnin=(int)t; break;
998
case (24): mcmc.sampfreq=(int)t; break;
999
case (25): mcmc.nsample=(int)t; break;
1001
sscanf(pline+1,"%lf%lf%lf%lf%lf%lf", eps,eps+1,eps+2,eps+3,eps+4,eps+5);
1008
{ printf ("\noption %s in %s\n", opt, ctlf); exit (-1); }
1013
if (noisy) error2("\nno ctl file..");
1016
if(com.ndata>NGENE) error2("raise NGENE?");
1017
else if(com.ndata<=0) com.ndata=1;
1018
if(com.seqtype==0 || com.seqtype==2)
1019
{ com.fix_omega=1; com.omega=0; }
1024
if(com.alpha==0) { com.fix_alpha=1; com.nalpha=0; }
1025
if(com.clock<1 || com.clock>3) error2("clock should be 1, 2, 3?");
1031
double lnpInfinitesitesClock (double t0, double FixedDs[])
1033
/* This calculates the ln of the joint pdf, which is proportional to the
1034
posterior for given root age, assuming infinite sites. Fixed distances are
1035
in FixedDs[]: d11, d12, ..., d(1,s-1), d21, d31, ..., d_g1.
1036
Note that the posterior is one dimensional, and the variable used is root age.
1038
int s=sptree.nspecies, g=data.ngene, i,j;
1041
sptree.nodes[sptree.root].age=t0;
1042
for(j=s; j<sptree.nnode; j++)
1044
sptree.nodes[j].age = t0*FixedDs[j-s]/FixedDs[0];
1046
lnp = lnpriorTimes();
1048
for(i=0; i<g; i++) {
1049
data.rgene[i] = (i==0 ? FixedDs[0]/t0 : FixedDs[s-1+i-1]/t0);
1050
lnp += (data.rgenegamma[0]-1)*log(data.rgene[i]) - data.rgenegamma[1]*data.rgene[i];
1053
lnp += (2-s)*log(FixedDs[0]) + (s-g-2)*log(t0); /* Jacobi */
1058
double InfinitesitesClock (double *FixedDs)
1060
/* This runs MCMC to calculate the posterior density of the root age
1061
when there are infinite sites at each locus. The clock is assumed, so that
1062
the posterior is one-dimensional.
1064
int i,j, ir, nround=0, nsaved=0;
1065
int s=sptree.nspecies, g=data.ngene;
1066
double t, tnew, naccept=0;
1067
double e=mcmc.finetune[0], lnp, lnpnew, lnacceptance, c, *x;
1068
double tmean, t025,t975;
1071
matout2(F0, FixedDs, 1, s-1+g-1, 8, 4);
1072
printf("\nRunning MCMC to get %d samples for t0 (root age)\n", mcmc.nsample);
1073
t=sptree.nodes[sptree.root].age;
1074
lnp = lnpInfinitesitesClock(t, FixedDs);
1075
x=(double*)malloc(max2(mcmc.nsample,(s+g)*3)*sizeof(double));
1076
if(x==NULL) error2("oom x");
1078
for(ir=-mcmc.burnin,tmean=0; ir<mcmc.nsample*mcmc.sampfreq; ir++) {
1079
if(ir==0) { nround=0; naccept=0; tmean=0; }
1080
lnacceptance = e*rnd2NormalSym(m2NormalSym);
1081
c = exp(lnacceptance);
1083
lnpnew = lnpInfinitesitesClock(tnew, FixedDs);
1084
lnacceptance += lnpnew-lnp;
1086
if(lnacceptance>=0 || rndu()<exp(lnacceptance)) {
1087
t=tnew; lnp=lnpnew; naccept++;
1090
tmean = (tmean*(nround-1)+t)/nround;
1092
if(ir>=0 && (ir+1)%mcmc.sampfreq==0)
1095
if((ir+1)%max2(mcmc.sampfreq, mcmc.sampfreq*mcmc.nsample/10000)==0)
1096
printf("\r%3.0f%% %7.2f mean t0 = %9.6f", (ir+1.)/(mcmc.nsample*mcmc.sampfreq)*100, naccept/nround,tmean);
1097
if(mcmc.sampfreq*mcmc.nsample>20 && (ir+1)%(mcmc.sampfreq*mcmc.nsample/20)==0)
1098
printf(" %s\n", printtime(timestr));
1101
qsort(x, (size_t)mcmc.nsample, sizeof(double), comparedouble);
1102
t025=x[(int)(mcmc.nsample*.025+.5)]; t975=x[(int)(mcmc.nsample*.975+.5)];
1104
/* Below x[] is used to collect the posterior means and 95% CIs */
1105
for(i=0; i<3; i++) {
1106
t = (i==0 ? tmean : (i==1 ? t025 : t975));
1107
lnpInfinitesitesClock(t, FixedDs);
1108
for(j=s; j<sptree.nnode; j++)
1109
x[i*(s+g)+(j-s)] = sptree.nodes[j].age;
1111
x[i*(s+g)+s-1+j]=data.rgene[j];
1113
printf("\nmean & 95%% CI for times\n\n");
1114
for(j=s; j<sptree.nnode; j++)
1115
printf("Node %2d: %9.5f (%9.5f, %9.5f)\n", j+1,x[j-s],x[(s+g)+j-s],x[2*(s+g)+j-s]);
1116
printf("\nmean & 95%% CI for rates\n\n");
1118
printf("gene %2d: %9.5f (%9.5f, %9.5f)\n", j+1,x[s-1+j], x[2*(s+g)+s-1+j], x[(s+g)+s-1+j]);
1120
printf("\nNote: the posterior has only one dimension.\n");
1126
double lnpInfinitesites (double FixedDs[])
1128
/* This calculates the log joint pdf, which is proportional to the posterior
1129
when there are infinite sites at each locus. The variables in the posterior
1130
are node ages, rate r0 for root son0, and mu & sigma2.
1131
FixedDs holds fixed distances locus by locus. The algorithm assumes clock=2.
1133
int s=sptree.nspecies, g=data.ngene, locus,j;
1134
int root=sptree.root, *sons=sptree.nodes[root].sons;
1135
double lnJ, lnp, b, r0; /* r0 is rate for root son0, fixed. */
1136
double t, t0=sptree.nodes[root].age - sptree.nodes[sons[0]].age;
1137
double t1=sptree.nodes[root].age - sptree.nodes[sons[1]].age;
1139
/* compute branch rates using times and branch lengths */
1140
for(locus=0; locus<g; locus++) {
1141
for(j=0; j<sptree.nnode; j++) {
1142
if(j==root || j==sons[0]) continue; /* r0 is in the posterior */
1143
t = sptree.nodes[nodes[j].father].age - sptree.nodes[j].age;
1148
b=FixedDs[locus*sptree.nnode+sons[0]];
1149
r0=sptree.nodes[sons[0]].rates[locus];
1150
sptree.nodes[j].rates[locus] = (b-r0*t0)/t1;
1151
if(r0<=0 || t1<=0 || b-r0*t0<=0) {
1152
printf("\nr0 = %.6f t1 = %.6f b-t0*t0 = %.6f", r0, t1, b-r0*t0);
1153
error2("r<=0 || t1<=0 || b-r0*t0<=0...");
1157
sptree.nodes[j].rates[locus] = FixedDs[locus*sptree.nnode+j]/t;
1161
printf("\n (age tlength) rates & branchlengths\n");
1162
for(j=0; j<sptree.nnode; j++,FPN(F0)) {
1163
t = (j==root? -1 : sptree.nodes[nodes[j].father].age - sptree.nodes[j].age);
1164
printf("%2d (%7.4f%9.4f) ", j+1,sptree.nodes[j].age, t);
1165
for(locus=0; locus<g; locus++) printf(" %9.4f", sptree.nodes[j].rates[locus]);
1167
for(locus=0; locus<g; locus++) printf(" %9.4f", FixedDs[locus*sptree.nnode+j]);
1172
lnp = lnpriorTimes() + lnpriorRates();
1173
for(j=0,lnJ=-log(t1); j<sptree.nnode; j++)
1174
if(j!=root && j!=sons[0] && j!=sons[1])
1175
lnJ -= log(sptree.nodes[nodes[j].father].age - sptree.nodes[j].age);
1182
double Infinitesites (void)
1184
/* This reads the fixed branch lengths and runs MCMC to calculate the posterior
1185
of times and rates when there are infinite sites at each locus.
1186
This is implemented for the global clock model (clock = 1) and the
1187
independent-rates model (clock = 2), but not for the correlated-rates
1189
This uses mcmc.finetune[] to propose changes.
1191
For clock=2, the MCMC samples the following variables:
1192
s-1 times, rate r0 for left root branch at each locus,
1193
mu at each locus & sigma2 at each locus.
1195
int root=sptree.root, *sons=sptree.nodes[root].sons;
1196
int lline=10000, locus, nround, ir, i,j,k, ip,np, s=sptree.nspecies,g=data.ngene;
1199
char line[10000], timestr[36];
1200
double *e=mcmc.finetune, y, ynew, yb[2], naccept[4]={0};
1201
double lnL, lnLnew, lnacceptance, c,lnc;
1202
double *x,*mx, *FixedDs,*b, maxt0,tson[2];
1203
char *FidedDf[2]={"FixedDsClock1.txt", "FixedDsClock23.txt"};
1204
FILE *fdin=gfopen(FidedDf[com.clock>1],"r"), *fmcmcout=gfopen(com.mcmcf,"w");
1206
if(com.model!=0 || com.alpha!=0) error2("use JC69 for infinite data.");
1212
printf("\nInfinite sites\n");
1213
printf("Fixed branch lengths from %s (s = %d g = %d):\n\n", FidedDf[com.clock>1], s,g);
1214
np = s-1 + g + g + g; /* times, rates, mu & sigma2 */
1215
FixedDs=(double*)malloc((g*sptree.nnode+np*2)*sizeof(double));
1216
if(FixedDs==NULL) error2("oom");
1217
x=FixedDs+g*sptree.nnode;
1219
fscanf(fdin, "%d", &i);
1220
if(i!=s) error2("wrong number of species in FixedDs.txt");
1221
fgets(line, lline, fdin);
1223
if(data.pfossilerror[0]) {
1224
puts("model of fossil errors for infinite data not tested yet.");
1226
SetupPriorTimesFossilErrors();
1229
if(com.clock==1) { /* global clock: read FixedDs[] and close file. */
1230
for(i=0; i<s-1+g-1; i++) fscanf(fdin, "%lf", &FixedDs[i]);
1232
InfinitesitesClock(FixedDs);
1236
for(locus=0,b=FixedDs,nodes=nodes_t; locus<g; locus++,b+=sptree.nnode) {
1237
ReadTreeN(fdin,&i,&i,1,1);
1238
OutTreeN(F0, 1, 1); FPN(F0);
1239
if(tree.nnode!=sptree.nnode)
1240
error2("use species tree for each locus!");
1242
b[sons[0]]=nodes[sons[0]].branch+nodes[sons[1]].branch;
1244
for(j=0; j<sptree.nnode; j++) {
1245
if(j!=root && j!=sons[0] && j!=sons[1])
1246
b[j]=nodes[j].branch;
1251
printf("\nFixed distances at %d loci\n", g);
1252
for(j=0; j<sptree.nnode; j++,FPN(F0)) {
1253
printf("node %3d ", j+1);
1254
for(locus=0; locus<g; locus++)
1255
printf(" %9.6f", FixedDs[locus*sptree.nnode+j]);
1258
for(i=0; i<g; i++) { /* reset r0 so that r0*t0<b0. GetInitial is unsafe. */
1259
y = FixedDs[i*sptree.nnode+sons[0]]/(sptree.nodes[root].age-sptree.nodes[sons[0]].age);
1260
sptree.nodes[sons[0]].rates[i] = y*rndu();
1263
for(i=0; i<np; i++) mx[i]=0;
1264
for(i=0,k=0; i<sptree.nspecies-1; i++) x[k++]=sptree.nodes[s+i].age;
1265
for(i=0; i<g; i++) x[k++]=sptree.nodes[sons[0]].rates[i];
1266
for(i=0; i<g; i++) x[k++]=data.rgene[i];
1267
for(i=0; i<g; i++) x[k++]=data.sigma2[i];
1269
lnL = lnpInfinitesites(FixedDs);
1271
printf("\nStarting MCMC (np=%d) lnp = %9.3f\nInitials: ", np, lnL);
1272
for(i=0; i<np; i++) printf(" %5.3f", x[i]);
1273
printf("\n\nparas: %d times, %d rates r0 (left daughter of root), %d mu, %d sigma2",s-1, g, g, g);
1274
printf("\nUsing finetune parameters from the control file\n");
1276
for(ir=-mcmc.burnin,nround=0; ir<mcmc.sampfreq*mcmc.nsample; ir++) {
1277
if(ir==0) { /* reset after burnin */
1278
nround=0; naccept[0]=naccept[1]=naccept[2]=naccept[3]=0; zero(mx,com.np);
1280
for(ip=0; ip<np; ip++) {
1282
if(ip<s-1) { /* times */
1283
y = sptree.nodes[s+ip].age;
1284
for(i=0;i<2;i++) tson[i]=sptree.nodes[sptree.nodes[s+ip].sons[i]].age;
1285
yb[0]=max2(tson[0], tson[1]);
1286
yb[1]=(s+ip==root?OldAge:sptree.nodes[sptree.nodes[s+ip].father].age);
1288
for(i=0; i<g; i++) {
1289
maxt0 = FixedDs[i*sptree.nnode+sons[0]]/sptree.nodes[sons[0]].rates[i];
1290
yb[1] = min2(yb[1], tson[0]+maxt0);
1293
else if(s+ip==sons[0]) {
1294
for(i=0; i<g; i++) {
1295
maxt0 = FixedDs[i*sptree.nnode+sons[0]]/sptree.nodes[sons[0]].rates[i];
1296
yb[0] = max2(yb[0], sptree.nodes[root].age-maxt0);
1299
ynew = y + e[0]*rnd2NormalSym(m2NormalSym);
1300
ynew = sptree.nodes[s+ip].age = reflect(ynew,yb[0],yb[1]);
1302
else if(ip-(s-1)<g) { /* rate r0 for root son0 for loci (r0*t0<b0) */
1303
y = sptree.nodes[root].age-sptree.nodes[sons[0]].age;
1305
printf("age error");
1307
yb[1] = FixedDs[(ip-(s-1))*sptree.nnode+sons[0]]/y;
1308
y = sptree.nodes[sons[0]].rates[ip-(s-1)];
1309
ynew = y + e[1]*rnd2NormalSym(m2NormalSym);
1310
ynew = sptree.nodes[sons[0]].rates[ip-(s-1)] = reflect(ynew,yb[0],yb[1]);
1312
else if (ip-(s-1+g)<g) { /* mu for loci */
1313
lnacceptance = e[3]*rnd2NormalSym(m2NormalSym);
1314
c=exp(lnacceptance);
1315
y = data.rgene[ip-(s-1+g)];
1316
ynew = data.rgene[ip-(s-1+g)] *= c;
1317
lnacceptance+=logPriorRatioGamma(ynew, y, data.rgenegamma[0], data.rgenegamma[1]);
1319
else { /* sigma2 for loci */
1320
lnacceptance = e[3]*rnd2NormalSym(m2NormalSym);
1321
c=exp(lnacceptance);
1322
y = data.sigma2[ip-(s-1+g*2)];
1323
ynew = data.sigma2[ip-(s-1+g*2)] *= c;
1324
lnacceptance+=logPriorRatioGamma(ynew, y, data.sigma2gamma[0], data.sigma2gamma[1]);
1327
lnLnew = lnpInfinitesites(FixedDs);
1328
lnacceptance += lnLnew-lnL;
1330
if(lnacceptance>=0 || rndu()<exp(lnacceptance)) {
1331
x[ip]=ynew; lnL=lnLnew;
1332
if(ip<s-1) naccept[0]++; /* times */
1333
else if(ip<s-1+g) naccept[1]++; /* rates */
1334
else naccept[3]++; /* mu, sigma2 */
1337
if(ip<s-1) /* times */
1338
sptree.nodes[s+ip].age = y;
1339
else if(ip-(s-1)<g) /* rate r0 for root son0 */
1340
sptree.nodes[sons[0]].rates[ip-(s-1)] = y;
1341
else if (ip-(s-1+g)<g) /* mu for loci */
1342
data.rgene[ip-(s-1+g)] = y;
1343
else /* sigma2 for loci */
1344
data.sigma2[ip-(s-1+g*2)] = y;
1348
if(MixingStep) { /* this multiples times by c and divides r and mu by c. */
1349
lnc = e[2]*rnd2NormalSym(m2NormalSym);
1351
lnacceptance = (s-1-g-g)*(lnc);
1352
for(j=s; j<sptree.nnode; j++) sptree.nodes[j].age*=c;
1353
for(i=0; i<g; i++) sptree.nodes[sons[0]].rates[i] /= c;
1354
for(i=0; i<g; i++) {
1356
ynew = data.rgene[i] /= c;
1357
lnacceptance+=logPriorRatioGamma(ynew, y, data.rgenegamma[0], data.rgenegamma[1]);
1359
lnLnew = lnpInfinitesites(FixedDs);
1360
lnacceptance += lnLnew-lnL;
1361
if(lnacceptance>=0 || rndu()<exp(lnacceptance)) {
1362
for(j=s; j<sptree.nnode; j++) x[j-s]=sptree.nodes[j].age;
1363
for(i=0; i<g; i++) x[s-1+i]=sptree.nodes[sons[0]].rates[i];
1364
for(i=0; i<g; i++) x[s-1+g+i]=data.rgene[i];
1369
for(j=sptree.nspecies; j<sptree.nnode; j++) sptree.nodes[j].age/=c;
1370
for(i=0; i<g; i++) sptree.nodes[sons[0]].rates[i] *= c;
1371
for(i=0; i<g; i++) data.rgene[i] *= c;
1375
for(i=0; i<np; i++) mx[i]=(mx[i]*(nround-1)+x[i])/nround;
1377
if((ir+1)%max2(mcmc.sampfreq, mcmc.sampfreq*mcmc.nsample/1000)==0) {
1378
printf("\r%3.0f%%", (ir+1.)/(mcmc.nsample*mcmc.sampfreq)*100.);
1379
printf(" %4.2f %4.2f %4.2f %4.2f ", naccept[0]/((s-1)*nround),naccept[1]/(g*nround),naccept[2]/nround,naccept[3]/(g*2*nround));
1382
if(np<nxpr[0]+nxpr[1]) { nxpr[0]=com.np; nxpr[1]=0; }
1383
for(j=0; j<nxpr[0]; j++) printf(" %5.3f", mx[j]);
1384
if(np>nxpr[0]+nxpr[1] && nxpr[1]) printf(" -");
1385
for(j=0; j<nxpr[1]; j++) printf(" %5.3f", mx[com.np-nxpr[1]+j]);
1386
printf(" %5.2f", lnL);
1389
fprintf(fmcmcout, "%d", ir+1);
1391
fprintf(fmcmcout, "\t%.5f", x[i]); FPN(fmcmcout);
1394
if(mcmc.sampfreq*mcmc.nsample>20 && (ir+1)%(mcmc.sampfreq*mcmc.nsample/20)==0)
1395
printf(" %s\n", printtime(timestr));
1399
printf("\nSummarizing MCMC results, time reset.\n");
1400
DescriptiveStatisticsSimpleMCMCTREE(NULL, com.mcmcf, 1, 1);
1406
int DownSptreeSetTime (int inode)
1408
/* This goes down the species tree, from the root to the tips, to specify the
1409
initial node ages. If the age of inode is not set already, it will
1411
This is called by GetInitials().
1413
int j,ison, correctionnews=0;
1415
for (j=0; j<sptree.nodes[inode].nson; j++) {
1416
ison = sptree.nodes[inode].sons[j];
1417
if(sptree.nodes[ison].nson) { /* ison is not tip */
1418
if(sptree.nodes[ison].age == -1) {
1419
sptree.nodes[ison].age = sptree.nodes[inode].age *(.6+.4*rndu());
1422
else if (sptree.nodes[ison].age > sptree.nodes[inode].age) {
1423
sptree.nodes[ison].age = sptree.nodes[inode].age *(0.95+0.5*rndu());
1426
correctionnews += DownSptreeSetTime(ison);
1429
return(correctionnews);
1434
/* This sets the initial values for starting the MCMC, and returns np, the
1435
number of parameters in the MCMC, to be collected in collectx().
1436
The routine guarantees that each node age is younger than its ancestor's age.
1437
It does not check the consistency of divergence times against the fossil
1438
constraints. As the model assumes soft bounds, any divergence times are
1439
possible, even though this means that the chain might start from a poor
1442
int np=0, i,j, g=data.ngene;
1443
double maxlower=0; /* rough age for root */
1444
double a_r=data.rgenegamma[0], b_r=data.rgenegamma[1], a,b, smallr=1e-3;
1447
com.rgene[0]=-1; /* com.rgene[] is not used. -1 to force crash */
1448
puts("\ngetting initial values to start MCMC.");
1450
/* set up rough time unit by looking at the fossil info */
1451
for(j=sptree.nspecies; j<sptree.nnode; j++) {
1452
sptree.nodes[j].age = -1;
1453
if(sptree.nodes[j].fossil == 0) continue;
1454
p = sptree.nodes[j].pfossil;
1456
if(sptree.nodes[j].fossil == LOWER_F) {
1457
sptree.nodes[j].age = p[0] * (1.1+0.2*rndu());
1458
maxlower = max2(maxlower, p[0]);
1460
else if(sptree.nodes[j].fossil == UPPER_F)
1461
sptree.nodes[j].age = p[1] * (0.6+0.4*rndu());
1462
else if(sptree.nodes[j].fossil == BOUND_F) {
1463
sptree.nodes[j].age = (p[0]+p[1])/2*(0.7+rndu()*0.6);
1464
maxlower = max2(maxlower, p[0]);
1466
else if(sptree.nodes[j].fossil == GAMMA_F) {
1467
sptree.nodes[j].age = p[0]/p[1]*(0.7+rndu()*0.6);
1468
maxlower = max2(maxlower, QuantileGamma(0.025, p[0], p[1]));
1470
else if(sptree.nodes[j].fossil == SKEWN_F || sptree.nodes[j].fossil == SKEWT_F) {
1471
d = p[2]/sqrt(1+p[2]*p[2]);
1472
a = p[0] + p[1]*d*sqrt(2/Pi);
1473
sptree.nodes[j].age = a * (0.6+0.4*rndu());
1474
maxlower = max2(maxlower, a*(1.2+2*rndu()));
1476
else if(sptree.nodes[j].fossil == S2N_F) {
1477
d = p[3]/sqrt(1+p[3]*p[3]);
1478
a = (p[1] + p[2]*d*sqrt(2/Pi)); /* mean of SN 1 */
1479
d = p[6]/sqrt(1+p[6]*p[6]);
1480
b = (p[4] + p[5]*d*sqrt(2/Pi)); /* mean of SN 2 */
1481
sptree.nodes[j].age = (p[0]*a + b) * (0.6+0.4*rndu());
1482
maxlower = max2(maxlower, (p[0]*a + b)*(1.2+2*rndu()));
1486
if(sptree.nodes[sptree.root].age == -1) {
1487
maxlower = max2(maxlower, sptree.RootAge[0]);
1488
sptree.nodes[sptree.root].age = min2(maxlower*1.5, sptree.RootAge[1]) * (.7+.6*rndu());
1490
for(i=0; i<1000; i++) {
1491
if(DownSptreeSetTime(sptree.root) == 0)
1495
error2("Starting times are unfeasible!\nTry again.");
1497
/* initial mu (mean rates) for genes */
1498
np = sptree.nspecies-1 + g;
1500
data.rgene[i] = smallr + rndgamma(a_r)/b_r; /* mu */
1502
if(com.clock>1) { /* sigma2, rates for nodes or branches */
1503
np += g + g*(sptree.nnode-1);
1505
/* sigma2 in lnrates among loci */
1507
data.sigma2[i] = rndgamma(data.sigma2gamma[0])/data.sigma2gamma[1]+smallr;
1508
/* rates at nodes */
1509
for(j=0; j<sptree.nnode; j++) {
1510
if(j==sptree.root) {
1511
for(i=0; i<g; i++) sptree.nodes[j].rates[i] = -99;
1514
for(i=0; i<g; i++) {
1515
sptree.nodes[j].rates[i] = smallr+rndgamma(a_r)/b_r;
1521
/* set up substitution parameters */
1522
if(mcmc.usedata==1) {
1524
if(com.model>=K80 && !com.fix_kappa) {
1525
data.kappa[i] = rndgamma(data.kappagamma[0])/data.kappagamma[1]+0.5;
1529
if(!com.fix_alpha) {
1530
data.alpha[i] = rndgamma(data.alphagamma[0])/data.alphagamma[1]+0.1;
1535
if(data.pfossilerror[0]) {
1536
a = data.pfossilerror[0];
1537
b = data.pfossilerror[1];
1538
sptree.Pfossilerr = a/(a+b)*(0.4+0.6*rndu());
1545
int collectx (FILE* fout, double x[])
1547
/* this collects parameters into x[] for printing and summarizing.
1548
It returns the number of parameters.
1550
clock=1: times, rates for genes, kappa, alpha
1551
clock=0: times, rates or rates by node by gene, sigma2, rho_ij, kappa, alpha
1553
int i,j, np=0, g=data.ngene;
1554
static int firsttime=1;
1556
if(firsttime && fout) fprintf(fout, "Gen");
1557
for(i=sptree.nspecies; i<sptree.nspecies*2-1; i++) {
1558
if(firsttime && fout) fprintf(fout, "\tt_n%d", i+1);
1559
x[np++] = sptree.nodes[i].age;
1561
for(i=0; i<g; i++) {
1562
if(firsttime && fout) {
1563
if(g>1) fprintf(fout, "\tmu%d", i+1);
1564
else fprintf(fout, "\tmu");
1566
x[np++] = data.rgene[i];
1569
for(i=0; i<g; i++) {
1570
if(firsttime && fout) {
1571
if(g>1) fprintf(fout, "\tsigma2_%d", i+1);
1572
else fprintf(fout, "\tsigma2");
1574
x[np++] = data.sigma2[i];
1576
for(i=0; i<g; i++) {
1577
for(j=0; j<sptree.nnode; j++) {
1578
if(j==sptree.root) continue;
1579
if(firsttime && fout) {
1580
if(g>1) fprintf(fout, "\tr_g%d_n%d", i+1, j+1);
1581
else fprintf(fout, "\tr_n%d", j+1);
1583
x[np++] = sptree.nodes[j].rates[i];
1587
if(mcmc.usedata==1) {
1589
for(i=0; i<g; i++) {
1590
if(firsttime && fout) {
1591
if(g>1) fprintf(fout, "\tkappa_%d", i+1);
1592
else fprintf(fout, "\tkappa");
1594
x[np++] = data.kappa[i];
1598
for(i=0; i<g; i++) {
1599
if(firsttime && fout) {
1600
if(g>1) fprintf(fout, "\talpha_%d", i+1);
1601
else fprintf(fout, "\talpha");
1603
x[np++] = data.alpha[i];
1606
if(data.pfossilerror[0]) {
1607
if(firsttime && fout) fprintf(fout, "\tpFossilErr");
1608
x[np++] = sptree.Pfossilerr;
1612
printf("np in collectx is %d != %d\n", np, com.np);
1613
if(!mcmc.usedata && (com.model || com.alpha)) printf("\nUse JC69 for no data");
1616
if(firsttime && fout && mcmc.usedata) fprintf(fout, "\tlnL");
1617
if(firsttime && fout) fprintf(fout, "\n");
1624
#define P0t_BDS(expmlt) (rho*(lambda-mu)/(rho*lambda +(lambda*(1-rho)-mu)*expmlt))
1626
double lnPDFkernelBDS (double t, double t1, double vt1, double lambda, double mu, double rho)
1628
/* this calculates the log of the pdf of the BDS kernel.
1630
double lnp, mlt=(mu-lambda)*t, expmlt, small=1e-20;
1633
lnp = log( (1 + rho*lambda*t1)/(t1*square(1 + rho*lambda*t)) );
1636
lnp = P0t_BDS(expmlt);
1637
lnp = log( lnp*lnp * lambda/(vt1*rho) * expmlt );
1642
double CDFkernelBDS (double t, double t1, double vt1, double lambda, double mu, double rho)
1644
/* this calculates the CDF for the kernel density from the BDS model
1646
double cdf, expmlt, small=1e-20;
1648
if(fabs(lambda - mu)<small)
1649
cdf = (1 + rho*lambda*t1)*t/(t1*(1 + rho*lambda*t));
1651
expmlt = exp((mu - lambda)*t);
1653
cdf = rho*lambda/vt1 * (1 - expmlt)/(rho*lambda + (lambda*(1 - rho) - mu)*expmlt);
1656
cdf = rho*lambda/vt1 * (expmlt - 1)/(rho*lambda*expmlt +(lambda*(1 - rho) - mu));
1663
double lnPDFkernelBeta (double t, double t1, double p, double q)
1665
/* This calculates the PDF for the beta kernel.
1666
The normalizing constant is calculated outside this routine.
1670
if(x<0 || x>1) error2("t outside of range (0, t1)");
1671
lnp = (p - 1)*log(x) + (q - 1)*log(1 - x);
1677
double lnptNCgiventC (void)
1679
/* This calculates f_BDS(tNC|tC), the conditional probability of no-calibration
1680
node ages given calibration node ages, that is, the first term in equation 3
1681
in Yang & Rannala (2006). This is called by lnpriorTimes().
1683
The beta kernel is added on 5 October 2009, specified by data.priortime=1.
1685
The routine uses sptree.nodes[].pfossil, sptree.nodes[].fossil,
1686
and lamdba, mu, rho from the birth-death process with species sampling
1687
(data.birth, data.death, data.sampling).
1688
It sorts the node ages in the species tree and then uses the
1689
birth-death prior conditional on the calibration points.
1690
t[0] is t1 in Yang & Rannala (2006).
1692
rankt[3]=1 means that the 3rd yougest age is node number ns+1. This is set up
1693
for the calibration nodes only, = -1 for non-calibration nodes. First we collect
1694
all times into t[] and sort them. Next we collect all calibration times into tc[]
1695
and sort them. Third, we find the ranks of calibration times in tc[], that is,
1696
i1, i2, ..., ic in YB06.
1698
The term for root is not in this routine. The root is excluded from the ranking.
1700
int i,j,k, n1=sptree.nspecies-1, rankprev, nfossil=0;
1701
int ranktc[MaxNFossils]; /* ranks of calibration nodes */
1702
double t[NS-1], tc[MaxNFossils], t1=sptree.nodes[sptree.root].age;
1703
double lnpt=0, expmlt=1, vt1, P0t1, cdf, cdfprev, small=1e-20;
1704
double lambda=data.birth, mu=data.death, rho=data.sampling;
1705
double p=data.birth, q=data.death, lnbeta=0;
1708
/* (A) Calculate f(tNC), joint of the non-calibration node ages.
1709
The root age is excluded, so the loop starts from j = 1.
1710
In the calculation of (eq.9)/(eq.11), (s - 2)! cancels out, as does
1711
the densities g(t_i_1) etc. in eq.11. After cancellation, only the
1712
densities of the non-calibration node ages remain in eq.9.
1714
if(data.priortime==0) { /* BDS kernel */
1715
/* vt1 is needed only if (lambda != mu) */
1716
if(fabs(lambda-mu)>small) {
1717
expmlt= exp((mu - lambda)*t1);
1718
P0t1 = P0t_BDS(expmlt);
1719
vt1 = 1 - P0t1/rho*expmlt;
1722
P0t1 = rho/(1+rho*mu*t1);
1726
for(j=1,lnpt=0; j<n1; j++) {
1727
k = sptree.nspecies+j;
1728
if(!sptree.nodes[k].usefossil) /* non-calibration node age */
1729
lnpt += lnPDFkernelBDS(sptree.nodes[k].age, t1, vt1, lambda, mu, rho);
1732
else { /* beta kernel */
1733
lnbeta = LnGamma(p) + LnGamma(q) - LnGamma(p+q);
1734
for(j=1,lnpt=0; j<n1; j++) {
1735
k = sptree.nspecies+j;
1736
if(!sptree.nodes[k].usefossil) /* non-calibration node age */
1737
lnpt += lnPDFkernelBeta(sptree.nodes[k].age, t1, p, q) + lnbeta;
1741
/* (B) Divide by f(tC), marginal of calibration node ages (eq.9/eq.11).
1742
This goes through the calibration nodes in the order of their ages,
1743
with sorted node ages stored in tc[].
1745
for(j=sptree.nspecies; j<sptree.nnode; j++) {
1746
t[j-sptree.nspecies] = sptree.nodes[j].age;
1747
if(j!=sptree.root && sptree.nodes[j].usefossil)
1748
tc[nfossil++] = sptree.nodes[j].age;
1750
if(nfossil>MaxNFossils) error2("raise MaxNFossils?");
1753
/* The only reason for sorting t[] is to construct ranktc[]. */
1754
qsort(t, (size_t)n1, sizeof(double), comparedouble);
1755
qsort(tc, (size_t)nfossil, sizeof(double), comparedouble);
1757
for(i=j=0; i<nfossil; i++) {
1758
if(i) j = ranktc[i-1]+1;
1760
if(tc[i]<t[j]) break;
1764
matout2(F0, t, 1, n1, 9, 5);
1765
matout2(F0, tc, 1, nfossil, 9, 5);
1766
for(i=0; i<nfossil; i++)
1767
printf("%9d", ranktc[i]);
1772
for(i=0,rankprev=0,cdfprev=0; i<nfossil+1; i++) {
1774
if(data.priortime==0) /* BDS kernel */
1775
cdf = CDFkernelBDS(tc[i], t1, vt1, lambda, mu, rho);
1776
else /* beta kernel */
1777
cdf = CDFBeta(tc[i]/t1, p, q, lnbeta);
1779
k = ranktc[i] - rankprev - 1;
1783
k = n1 - rankprev - 1;
1786
if(cdf <= cdfprev) error2("cdf diff<0 in lnptNCgiventC");
1787
lnpt += LnGamma(k+1.0) - k*log(cdf - cdfprev);
1789
rankprev = ranktc[i];
1792
if(debug==1) printf("\npdf = %.12f\n", exp(lnpt));
1800
/* This calculates the prior density of times at calibration nodes as specified
1801
by the fossil calibration information, the second term in equation 3 in
1802
Yang & Rannala (2006).
1805
The term for root is always calculated in this routine.
1806
If there is a fossil at root and if it is a lower bound, it is re-set to a
1809
int i,j, nfossil=0, fossil;
1810
double t, lnpt=0, a,b, thetaL,thetaR, tailL=99, tailR=99, *p, s,z, t0,P,c,A;
1813
if(sptree.nfossil==0) return(0);
1815
for(j=sptree.nspecies; j<sptree.nnode; j++) {
1816
if(j!=sptree.root && !sptree.nodes[j].usefossil)
1819
t = sptree.nodes[j].age;
1820
fossil = sptree.nodes[j].fossil;
1821
p = sptree.nodes[j].pfossil;
1822
a = sptree.nodes[j].pfossil[0];
1823
b = sptree.nodes[j].pfossil[1];
1825
if(j!=sptree.root || (sptree.nodes[j].usefossil && fossil!=LOWER_F)) {
1827
tailL = sptree.nodes[j].pfossil[3];
1828
else if(fossil==UPPER_F)
1829
tailR = sptree.nodes[j].pfossil[2];
1830
else if(fossil==BOUND_F) {
1831
tailL = sptree.nodes[j].pfossil[2];
1832
tailR = sptree.nodes[j].pfossil[3];
1835
else if(!sptree.nodes[j].usefossil) { /* root fossil absent or deleted, using RootAge */
1836
a = sptree.RootAge[0];
1837
b = sptree.RootAge[1];
1838
if(sptree.RootAge[0]>0) {
1840
tailL = sptree.RootAge[2];
1841
tailR = sptree.RootAge[3];
1845
tailR = sptree.RootAge[2];
1848
else if (fossil==LOWER_F) { /* root fossil is L. B(a,b,tailL, tailR) is used. p & c ignored. */
1850
b = sptree.RootAge[1];
1851
tailL = sptree.nodes[j].pfossil[3];
1852
tailR = (sptree.RootAge[0]>0 ? sptree.RootAge[3] : sptree.RootAge[2]);
1856
case (LOWER_F): /* truncated Cauchy C(a(1+p), s^2). tL = a. */
1857
P = sptree.nodes[j].pfossil[1];
1858
c = sptree.nodes[j].pfossil[2];
1861
A = 0.5 + 1/Pi*atan(P/c);
1864
lnpt += log((1-tailL)/(Pi*A*s*(1 + z*z)));
1868
thetaL = (1/tailL-1) / (Pi*A*c*(1 + z*z));
1869
lnpt += log(tailL*thetaL/a) + (thetaL-1)*log(t/a);
1873
/* equation (16) in Yang and Rannala (2006) */
1875
lnpt += log((1-tailR)/b);
1877
thetaR = (1-tailR)/(tailR*b);
1878
lnpt += log(tailR*thetaR)-thetaR*(t-b);
1883
lnpt += log((1-tailL-tailR)/(b-a));
1885
thetaL = (1-tailL-tailR)*a/(tailL*(b-a));
1886
lnpt += log(tailL*thetaL/a) + (thetaL-1)*log(t/a);
1889
thetaR = (1-tailL-tailR)/(tailR*(b-a));
1890
lnpt += log(tailR*thetaR) - thetaR*(t-b);
1894
lnpt += a*log(b)-b*t+(a-1)*log(t)-LnGamma(a);
1897
lnpt += log(PDFSkewN(t, p[0], p[1], p[2]));
1900
lnpt += log(PDFSkewT(t, p[0], p[1], p[2], p[3]));
1903
a = PDFSkewN(t, p[1], p[2], p[3]);
1904
b = PDFSkewN(t, p[4], p[5], p[6]);
1905
lnpt += log(p[0]*a + b);
1910
printf("lnptC: Node %3d: %3s t = %7.4f tails (%7.3f,%7.3f) ", j+1, fossils[sptree.nodes[j].fossil], t, tailL, tailR);
1911
for(i=0; i<npfossils[sptree.nodes[j].fossil]; i++)
1912
printf(" %6.4f", sptree.nodes[j].pfossil[i + (fossil==UPPER_F)]);
1913
printf(" %9.4g\n", lnpt);
1921
double getScaleFossilCombination (void);
1923
double getScaleFossilCombination (void)
1925
/* This uses Monte Carlo integration to calculate the normalizing constant for the
1926
joint prior on fossil times, when some fossils are assumed to be in error and not
1927
used. The constant is the integral of the density over the feasible region, where
1928
the times satisfy the age constraints on the tree.
1929
It is assumed that the ancestral nodes have smaller node numbers than descendent
1930
nodes, as is the case if the node numbers are assigned by ReadTreeN().
1931
nfs = nfossilused if a fossil is used for the root or nfs = nfossilused + 1
1933
CLower[] contains constants for lower bounds, to avoid duplicated computation.
1935
int nrepl=5000000, ir, i,j,k, nfs,ifs[MaxNFossils]={0}, feasible;
1936
int root=sptree.root, rootfossil = sptree.nodes[root].fossil, fossil;
1937
signed char ConstraintTab[MaxNFossils][MaxNFossils]={{0}}; /* 1: row>col; 0: irrelevant */
1938
double C, sC, accept, mt[MaxNFossils]={0}, t[MaxNFossils], *pfossil[MaxNFossils], pfossilroot[4];
1939
double importance, CLower[MaxNFossils][3];
1940
double sNormal=1, r, thetaL, thetaR, a, b, c,p,s,A,zc,zn, tailL,tailR;
1943
/* Set up constraint table */
1944
for(i=sptree.nspecies,nfs=0; i<sptree.nnode; i++) {
1945
if(i==root || sptree.nodes[i].usefossil)
1948
for(i=1; i<nfs; i++) {
1949
for(j=0; j<i; j++) {
1950
for(k=ifs[i]; k!=-1; k=sptree.nodes[k].father) {
1951
if(k == ifs[j]) { ConstraintTab[i][j] = 1; break; }
1956
for(i=0; i<nfs; i++) {
1957
printf("\n%d (%2d %s): ", i+1, ifs[i]+1, fossils[sptree.nodes[ifs[i]].fossil]);
1959
printf("%4d", ConstraintTab[i][j]);
1964
/* Set up calibration info. Save constants for lower bounds */
1966
pfossilroot[i] = sptree.nodes[root].pfossil[i];
1967
if(ifs[0] == root) { /* copy info for root into pfossilroot[] */
1968
if(!sptree.nodes[root].usefossil) { /* root fossil absent or excluded */
1970
pfossilroot[i] = sptree.RootAge[i];
1971
rootfossil = (sptree.RootAge[0]>0 ? BOUND_F : UPPER_F);
1973
else if (sptree.nodes[root].fossil==LOWER_F) { /* root fossil is lower bound */
1974
pfossilroot[1] = sptree.RootAge[1];
1975
rootfossil = BOUND_F;
1978
for(i=0; i<nfs; i++) {
1980
pfossil[i] = (j==root ? pfossilroot : sptree.nodes[j].pfossil);
1982
if(j!=root && sptree.nodes[j].fossil==LOWER_F) {
1986
tailL = pfossil[i][3];
1988
A = 0.5 + 1/Pi*atan(p/c);
1989
CLower[i][0] = (1/tailL-1) / (Pi*A*c*(1 + (p/c)*(p/c))); /* thetaCauchy */
1990
CLower[i][1] = (1/tailL-1) * a/(s*sqrt(Pi/2)); /* thetaNormal */
1991
CLower[i][2] = s/(A*c*a*sqrt(2*Pi)); /* s/Act*sqrt(2Pi) */
1996
for(ir=0,C=sC=accept=0; ir<nrepl; ir++) {
1997
for(i=0,importance=1; i<nfs; i++) {
1999
fossil = (j==root ? rootfossil : sptree.nodes[j].fossil);
2004
case(LOWER_F): /* simulating from folded normal, the importance density */
2005
tailL = pfossil[i][3];
2006
if (r < tailL) { /* left tail, CLower[i][1] has thetaNormal */
2007
thetaL = CLower[i][1];
2008
t[i] = a * pow(rndu(), 1/thetaL);
2009
importance *= CLower[i][0]/thetaL * pow(t[i]/a, CLower[i][0]-thetaL);
2014
t[i] = a + fabs(rndNormal()) * s;
2015
zc = (t[i] - a*(1+b))/(c*a);
2017
importance *= CLower[i][2] * exp(zn*zn/2) / (1+zc*zc);
2021
tailR = pfossil[i][2];
2022
if(r > tailR) { /* flat part */
2025
else { /* right tail */
2026
thetaR = (1-tailR)/(tailR*b);
2027
t[i] = b - log(rndu())/thetaR;
2031
tailL = pfossil[i][2];
2032
tailR = pfossil[i][3];
2033
if(r > tailL + tailR) /* flat part */
2034
t[i] = a + (b - a) * rndu();
2035
else if (r < tailL) { /* left tail */
2036
thetaL = (1-tailL-tailR)*a/(tailL*(b-a));
2037
t[i] = a * pow(rndu(), 1/thetaL);
2039
else { /* right tail */
2040
thetaR = (1-tailL-tailR)/(tailR*(b-a));
2041
t[i] = b - log(rndu())/thetaR;
2045
t[i] = rndgamma(a)/b;
2048
printf("\nfossil = %d (%s) not implemented.\n", fossil, fossils[fossil]);
2054
for(i=1; i<nfs; i++) {
2056
if(ConstraintTab[i][j] && t[i]>t[j])
2057
{ feasible=0; break; }
2058
if(feasible==0) break;
2063
sC += importance*importance;
2064
for(i=0; i<nfs; i++)
2065
mt[i] += t[i]*importance;
2067
if((ir+1)%100000 == 0) {
2069
s = (sC/a - C/a*C/a) / a;
2070
printf("\r%7d %.2f%% C = %.6f +- %.6f mt", ir+1, accept/(ir+1)*100, C/(ir+1), sqrt(s));
2071
for(i=0; i<nfs; i++)
2072
printf(" %6.4f", mt[i]/C);
2079
int SetupPriorTimesFossilErrors (void)
2081
/* This prepares for lnpriorTimes() under models of fossil errors. It calculates
2082
the scaling factor for the probability density of times for a given combination
2083
of the indicator variables, indicating which fossil is in error and not used.
2084
The combination in which all fossils are wrong is excluded.
2086
int is,i, icom, nused=0, it;
2087
int nMinCorrect = (int)data.pfossilerror[2];
2088
int ncomFerr = (1<<sptree.nfossil) - 1 - (nMinCorrect==2?sptree.nfossil:0);
2091
if(data.pfossilerror[0]==0 || sptree.nfossil>MaxNFossils || sptree.nfossil<2)
2092
error2("Fossil-error model is inappropriate.");
2094
sptree.CcomFossilErr = (double*)malloc(ncomFerr*sizeof(double));
2095
if(sptree.CcomFossilErr == NULL) error2("oom for CcomFossilErr");
2097
/* cycle through combinations of indicators. */
2098
for (i=0,icom=0; i < (1<<sptree.nfossil)-1; i++) {
2100
for (is=sptree.nspecies, nused=0; is<sptree.nnode; is++) {
2101
if(sptree.nodes[is].fossil) {
2102
sptree.nodes[is].usefossil = 1 - it%2;
2103
nused += sptree.nodes[is].usefossil;
2105
if(debug) printf("%2d (%2d)", sptree.nodes[is].usefossil, is+1);
2108
if(nMinCorrect==2 && nused<2) continue;
2110
if(debug) printf("\n\n********* Com %2d/%2d: %2d fossils used", icom, ncomFerr, nused);
2111
sptree.CcomFossilErr[icom-1] = getScaleFossilCombination();
2117
double lnpriorTimes (void)
2119
/* This calculates the prior density of node times in the master species tree:
2122
int nMinCorrect = (int)data.pfossilerror[2];
2123
int ncomFerr = (1<<sptree.nfossil) - 1 - (nMinCorrect==2?sptree.nfossil:0);
2124
int is, i,icom, nused, it;
2125
double pE = sptree.Pfossilerr, ln1pE=log(1-pow(pE,(double)sptree.nfossil));
2126
double lnpt=0, scaleF=-1e300, y;
2128
if(data.pfossilerror[0]==0) /* no fossil errors in model */
2129
lnpt = lnptC() + lnptNCgiventC();
2130
else { /* cycle through combinations of indicators, excluding that of all 0s. */
2131
for(i=0,icom=0; i < (1<<sptree.nfossil) - 1; i++) {
2133
for(is=sptree.nspecies, nused=0; is<sptree.nnode; is++) {
2134
if(sptree.nodes[is].fossil) {
2135
sptree.nodes[is].usefossil = 1 - it%2;
2136
nused += sptree.nodes[is].usefossil;
2140
if(nMinCorrect==2 && nused<2) continue;
2143
y = nused*log(1-pE)+(sptree.nfossil-nused)*log(pE)-ln1pE
2144
- log(sptree.CcomFossilErr[icom-1]) + lnptC() + lnptNCgiventC();
2146
if(y < scaleF + 100)
2147
lnpt += exp(y-scaleF);
2149
lnpt = lnpt*exp(scaleF-y) + 1;
2152
lnpt += exp(y-scaleF);
2154
lnpt = scaleF + log(lnpt);
2160
int getPfossilerr (double postEFossil[], double nround)
2162
/* This is modified from lnpriorTimes(), and prints out the conditonal Perror
2163
for each fossil given the times and pE.
2165
int nMinCorrect = (int)data.pfossilerror[2];
2166
int ncomFerr = (1<<sptree.nfossil) - 1 - (nMinCorrect==2?sptree.nfossil:0);
2167
int is,i,icom, k, nf=sptree.nfossil, nused, it;
2168
double pE = sptree.Pfossilerr, ln1pE=log(1-pow(pE,(double)nf));
2169
double pt, pEf[MaxNFossils]={0}, scaleF=-1e300, y;
2170
char Ef[MaxNFossils]; /* indicators of fossil errors */
2172
if(data.priortime==1)
2173
error2("beta kernel for prior time not yet implemented for model of fossil errors.");
2175
for(i=0,icom=0,pt=0; i < (1<<sptree.nfossil) - 1; i++) {
2177
for(is=sptree.nspecies, k=0, nused=0; is<sptree.nnode; is++) {
2178
if(sptree.nodes[is].fossil) {
2179
sptree.nodes[is].usefossil = 1 - it%2;
2180
nused += sptree.nodes[is].usefossil;
2181
Ef[k++] = !sptree.nodes[is].usefossil;
2185
if(nMinCorrect==2 && nused<2) continue;
2187
if(k != nf) error2("k == nf?");
2189
y = nused*log(1-pE)+(nf-nused)*log(pE)-ln1pE
2190
- log(sptree.CcomFossilErr[icom-1]) + lnptC() + lnptNCgiventC();
2192
if(y < scaleF + 200) {
2193
pt += y = exp(y-scaleF);
2195
else { /* change scaleF */
2196
pt = pt*exp(scaleF-y) + 1;
2198
pEf[k] *= exp(scaleF-y);
2203
if(Ef[k]) pEf[k] += y;
2209
postEFossil[k] = (postEFossil[k]*(nround-1) + pEf[k])/nround;
2216
int LabelOldCondP (int spnode)
2218
/* This sets com.oldconP[j]=0 if node j in the gene tree needs updating, after
2219
either rates or times have changed for spnode in the species tree. This is to
2220
avoid duplicated computation of conditional probabilities. The routine workes
2221
on the current gene tree and accounts for the fact that some species may be
2222
missing at some loci.
2223
The routine first finds spnode or its first ancestor that is present in the
2224
gene tree, then identifies that node in the genetree. This reveals the
2225
oldest node j in the gene tree that is descendent of spnode. Node j and all
2226
its ancestors in the gene tree need updating.
2228
Before calling this routine, set com.oldconP[]=1. This routine changes some
2229
com.oldconP[] into 0 but do not change any 0 to 1.
2231
The gene tree is in nodes[], as UseLocus has been called prior to this.
2232
This is called by UpdateTimes and UpdateRates.
2236
if(j>=tree.nnode || j!=nodes[j].ipop) {
2238
/* From among spnode and its ancestors in the species tree, find the
2239
first node, i, that is in genetree.
2241
for(i=spnode; i!=-1; i=sptree.nodes[i].father) {
2243
/* Find that node in genetree that is node i in species tree.
2244
Its descendent, node j, is the oldest node in gene tree that is
2245
descendent of spnode.
2247
for(j=0; j<tree.nnode && nodes[j].ipop!=i; j++) ;
2249
if(j<tree.nnode) break;
2254
for( ; ; j=nodes[j].father) {
2256
if(j==tree.root) break;
2262
double UpdateTimes (double *lnL, double finetune)
2264
/* This updates the node ages in the master species tree sptree.nodes[].age,
2266
int locus, is, i, *sons, dad;
2267
double naccept=0, t,tnew, tson[2], yb[2], y,ynew;
2268
double lnacceptance, lnLd, lnpDinew[NGENE], lnpTnew, lnpRnew=-1;
2270
for(is=sptree.nspecies; is<sptree.nnode; is++) {
2271
t = sptree.nodes[is].age;
2272
sons = sptree.nodes[is].sons;
2273
dad = sptree.nodes[is].father;
2274
tson[0] = sptree.nodes[sons[0]].age;
2275
tson[1] = sptree.nodes[sons[1]].age;
2276
y = max2(tson[0], tson[1]);
2277
yb[0] = (y>0 ? log(y) : -99);
2278
if(is != sptree.root) yb[1] = log(sptree.nodes[dad].age);
2282
ynew = y + finetune*rnd2NormalSym(m2NormalSym);
2283
ynew = reflect(ynew, yb[0], yb[1]);
2284
sptree.nodes[is].age = tnew = exp(ynew);
2285
lnacceptance = ynew - y;
2286
lnpTnew = lnpriorTimes();
2287
lnacceptance += lnpTnew - data.lnpT;
2289
lnpRnew = lnpriorRates();
2290
lnacceptance += lnpRnew - data.lnpR;
2293
for(locus=0,lnLd=0; locus<data.ngene; locus++) {
2294
UseLocus(locus, 1, mcmc.usedata, 0);
2297
for(i=0;i<sptree.nnode;i++) com.oldconP[i]=1;
2300
if(com.oldconP[tree.root])
2301
lnpDinew[locus] = data.lnpDi[locus];
2303
lnpDinew[locus] = lnpD_locus(locus);
2304
lnLd += lnpDinew[locus] - data.lnpDi[locus];
2306
lnacceptance += lnLd;
2308
if(debug==2) printf("species %2d t %8.4f %8.4f %9.2f %9.2f", is, t, tnew, lnLd, lnacceptance);
2310
if(lnacceptance>=0 || rndu()<exp(lnacceptance)) {
2312
data.lnpT = lnpTnew;
2313
for(i=0;i<data.ngene;i++)
2314
data.lnpDi[i] = lnpDinew[i];
2316
if(mcmc.usedata==1) switchconPin();
2318
data.lnpR = lnpRnew;
2319
if(debug==2) printf(" Y (%4d)\n", NPMat);
2322
sptree.nodes[is].age = t;
2323
if(debug==2) printf(" N (%4d)\n", NPMat);
2324
for(locus=0; locus<data.ngene; locus++)
2325
AcceptRejectLocus(locus, 0); /* reposition conP */
2329
return(naccept/(sptree.nspecies-1.));
2333
#if (0) /* this is not used now. */
2335
double UpdateTimesClock23 (double *lnL, double finetune)
2337
/* This updates the node ages in the master species tree sptree.nodes[].age,
2338
one by one. It simultaneously changes the rates at the three branches
2339
around the node so that the branch lengths remain the same, and there is
2340
no need to update the lnL. Sliding window on the logarithm of ages.
2342
int is, i, *sons, dad;
2343
double naccept=0, tb[2],t,tnew, tson[2], yb[2],y,ynew, rateratio[3];
2344
double lnacceptance, lnpTnew,lnpRnew;
2346
if(debug==2) puts("\nUpdateTimesClock23 ");
2348
for(is=sptree.nspecies; is<sptree.nnode; is++) {
2349
t = sptree.nodes[is].age;
2350
sons = sptree.nodes[is].sons;
2351
tson[0] = sptree.nodes[sons[0]].age;
2352
tson[1] = sptree.nodes[sons[1]].age;
2353
dad = sptree.nodes[is].father;
2354
tb[0] = max2(tson[0], tson[1]);
2355
yb[0] = (tb[0]>0 ? log(tb[0]) : -99);
2356
if(is != sptree.root) yb[1] = log(tb[1] = sptree.nodes[dad].age);
2360
ynew = y + finetune*rnd2NormalSym(m2NormalSym);
2361
ynew = reflect(ynew, yb[0], yb[1]);
2362
sptree.nodes[is].age = tnew = exp(ynew);
2363
lnacceptance = ynew - y;
2365
/* Thorne et al. (1998) equation 9. */
2366
rateratio[0] = (t-tson[0])/(tnew-tson[0]);
2367
rateratio[1] = (t-tson[1])/(tnew-tson[1]);
2368
for(i=0; i<data.ngene; i++) {
2369
sptree.nodes[sons[0]].rates[i] *= rateratio[0];
2370
sptree.nodes[sons[1]].rates[i] *= rateratio[1];
2372
lnacceptance += data.ngene*log(rateratio[0]*rateratio[1]);
2373
if(is != sptree.root) {
2374
rateratio[2] = (t-tb[1])/(tnew-tb[1]);
2375
for(i=0; i<data.ngene; i++)
2376
sptree.nodes[is].rates[i] *= rateratio[2];
2377
lnacceptance += data.ngene*log(rateratio[2]);
2380
lnpTnew = lnpriorTimes();
2381
lnacceptance += lnpTnew - data.lnpT;
2382
lnpRnew = lnpriorRates();
2383
lnacceptance += lnpRnew - data.lnpR;
2385
if(debug==2) printf("species %2d t %8.4f %8.4f", is,t,tnew);
2387
if(lnacceptance>=0 || rndu()<exp(lnacceptance)) {
2389
data.lnpT = lnpTnew;
2390
data.lnpR = lnpRnew;
2391
if(debug==2) printf(" Y (%4d)\n", NPMat);
2394
sptree.nodes[is].age = t;
2395
for(i=0; i<data.ngene; i++) {
2396
sptree.nodes[sons[0]].rates[i] /= rateratio[0];
2397
sptree.nodes[sons[1]].rates[i] /= rateratio[1];
2398
if(is!=sptree.root) sptree.nodes[is].rates[i] /= rateratio[2];
2400
if(debug==2) printf(" N (%4d)\n", NPMat);
2403
return(naccept/(sptree.nspecies-1.));
2410
void getSinvDetS (double space[])
2412
/* This uses the variance-correlation matrix data.correl[g*g] to constructs
2413
Sinv (inverse of S) and detS (|S|). It also copies the variances into
2414
data.sigma2[g]. This is called every time data.correl is updated.
2416
What restrictions should be placed on data.correl[]???
2420
int i, j, g=data.ngene;
2424
data.Sinv[i*g+i] = data.sigma2[i] = data.correl[i*g+i];
2425
for(i=0; i<g; i++) {
2427
data.Sinv[i*g+j] = data.Sinv[j*g+i]
2428
= data.correl[i*g+j]*sqrt(data.sigma2[i]*data.sigma2[j]);
2432
printf("\ncorrel & S & Sinv ");
2433
matout(F0, data.correl, g, g);
2434
matout(F0, data.Sinv, g, g);
2437
matinv (data.Sinv, g, g, space);
2438
data.detS = fabs(space[0]);
2441
matout(F0, data.Sinv, g, g);
2442
printf("|S| = %.6g\n", data.detS);
2450
double lnpriorRates (void)
2452
/* This calculates the log of the prior of rates under the two rate-drift models:
2453
the independent rates (clock=2) and the geometric Brownian motion model (clock=3).
2455
clock=2: the algorithm cycles through the branches, and add up the log
2457
clock=3: the root rate is mu or data.rgene[]. The algorithm cycles through
2458
the ancestral nodes and deals with the two daughter branches.
2460
int i, inode, dad=-1, g=data.ngene, s=sptree.nspecies, sons[2], root=sptree.root;
2461
double lnpR=-log(2*Pi)/2.0*(2*s-2)*g, t,tA,t1,t2,Tinv[4], detT;
2462
double zz, r=-1, rA,r1,r2, y1,y2;
2465
if(data.priorrate==1 && com.clock==3)
2466
error2("gamma prior for rates for clock3 not implemented yet.");
2467
else if(data.priorrate==1 && com.clock==2) { /* gamma rate prior, clock2 */
2469
for(i=0; i<g; i++) {
2470
alpha = data.rgene[i]*data.rgene[i]/data.sigma2[i];
2471
beta = data.rgene[i]/data.sigma2[i];
2472
lnpR += (alpha*log(beta) - LnGamma(alpha)) * (2*s-2);
2473
for(inode=0; inode<sptree.nnode; inode++) {
2474
if(inode==root) continue;
2475
r = sptree.nodes[inode].rates[i];
2476
lnpR += -beta*r + (alpha-1)*log(r);
2480
else if(data.priorrate ==0 && com.clock==2) { /* LN rate prior, clock2 */
2482
lnpR -= log(data.sigma2[i])/2.*(2*s-2);
2483
for(inode=0; inode<sptree.nnode; inode++) {
2484
if(inode==root) continue;
2485
for(i=0; i<g; i++) {
2486
r = sptree.nodes[inode].rates[i];
2487
zz = log(r/data.rgene[i]) + data.sigma2[i]/2;
2488
lnpR += -zz*zz/(2*data.sigma2[i]) - log(r);
2492
else if(data.priorrate ==0 && com.clock==3) { /* LN rate prior, clock3 */
2493
for(inode=0; inode<sptree.nnode; inode++) {
2494
if(sptree.nodes[inode].nson==0) continue; /* skip the tips */
2495
dad = sptree.nodes[inode].father;
2496
for(i=0; i<2; i++) sons[i] = sptree.nodes[inode].sons[i];
2497
t = sptree.nodes[inode].age;
2498
if(inode==root) tA = 0;
2499
else tA = (sptree.nodes[dad].age - t)/2;
2500
t1 = (t-sptree.nodes[sons[0]].age)/2;
2501
t2 = (t-sptree.nodes[sons[1]].age)/2;
2502
detT = t1*t2+tA*(t1+t2);
2503
Tinv[0] = (tA+t2)/detT;
2504
Tinv[1] = Tinv[2] = -tA/detT;
2505
Tinv[3] = (tA+t1)/detT;
2506
for(i=0; i<g; i++) {
2507
rA = (inode==root||dad==root ? data.rgene[i] : sptree.nodes[dad].rates[i]);
2508
r1 = sptree.nodes[sons[0]].rates[i];
2509
r2 = sptree.nodes[sons[1]].rates[i];
2510
y1 = log(r1/rA)+(tA+t1)*data.sigma2[i]/2;
2511
y2 = log(r2/rA)+(tA+t2)*data.sigma2[i]/2;
2512
zz = (y1*y1*Tinv[0]+2*y1*y2*Tinv[1]+y2*y2*Tinv[3]);
2513
lnpR -= zz/(2*data.sigma2[i]) + log(detT*square(data.sigma2[i]))/2 + log(r1*r2);
2520
double UpdateRates (double *lnL, double finetune)
2522
/* This updates rates under the rate-drift models (clock=2 or 3).
2523
It cycles through nodes in the species tree at each locus.
2525
Slight waste of computation: For every proposal to change rates, lnpriorRates()
2526
is called to calculate the prior for all rates at all loci on the whole
2527
tree, thus wasting computation, if rates are not correlated across loci.
2529
int locus, inode, j, g=data.ngene;
2530
double naccept=0, lnpRnew, lnpDinew, lnacceptance, lnLd, rold;
2531
double yb[2]={-99,99},y,ynew;
2534
return UpdateRatesClock(lnL, finetune);
2536
for(locus=0; locus<g; locus++) {
2537
for(inode=0; inode<sptree.nnode; inode++) {
2538
if(inode == sptree.root) continue;
2539
rold = sptree.nodes[inode].rates[locus];
2541
ynew = y + finetune*rnd2NormalSym(m2NormalSym);
2542
ynew = reflect(ynew, yb[0], yb[1]);
2543
sptree.nodes[inode].rates[locus] = exp(ynew);
2544
lnacceptance = ynew - y;
2546
UseLocus(locus, 1, mcmc.usedata, 0); /* copyconP=1 */
2549
FOR(j,sptree.nspecies*2-1) com.oldconP[j]=1;
2550
LabelOldCondP(inode);
2553
lnpRnew = lnpriorRates();
2554
lnacceptance += lnpRnew - data.lnpR;
2555
lnpDinew = lnpD_locus(locus);
2556
lnLd = lnpDinew - data.lnpDi[locus];
2557
lnacceptance += lnLd;
2559
if(lnacceptance>0 || rndu()<exp(lnacceptance)) {
2561
if(mcmc.usedata==1) AcceptRejectLocus(locus,1);
2563
data.lnpR = lnpRnew;
2564
data.lnpDi[locus] = lnpDinew;
2568
if(mcmc.usedata==1) AcceptRejectLocus(locus,0);
2569
sptree.nodes[inode].rates[locus] = rold;
2573
return(naccept/(g*(sptree.nnode-1)));
2577
double logPriorRatioGamma(double xnew, double xold, double a, double b)
2579
/* This calculates the log of prior ratio when x is updated from xold to xnew.
2580
x has distribution with parameters a and b.
2583
return (a-1)*log(xnew/xold) - b*(xnew-xold);
2586
double UpdateRatesClock (double *lnL, double finetune)
2588
/* This updates rates data.rgene[] under the clock by cycling through the loci.
2589
The proposal affects all branch lengths, so com.oldconP[]=0.
2590
The rate may go to 0 if a locus has no variation.
2592
int locus, j, g=data.ngene;
2593
double naccept=0, lnLd, lnpDinew, lnacceptance;
2594
double yb[2]={-99,99},y,ynew, rold;
2596
if(debug==3) puts("\nUpdateRatesClock");
2597
if(mcmc.saveconP) FOR(j,sptree.nspecies*2-1) com.oldconP[j]=0;
2598
for(locus=0; locus<g; locus++) {
2599
rold = data.rgene[locus];
2601
ynew = y + finetune*rnd2NormalSym(m2NormalSym);
2602
ynew = reflect(ynew, yb[0], yb[1]);
2603
data.rgene[locus] = exp(ynew);
2604
lnacceptance = ynew - y;
2606
UseLocus(locus, 1, mcmc.usedata, 0);
2607
lnpDinew = lnpD_locus(locus);
2608
lnLd = lnpDinew - data.lnpDi[locus]; /* likelihood ratio */
2609
lnacceptance += lnLd; /* proposal ratio */
2611
lnacceptance += logPriorRatioGamma(data.rgene[locus],rold,data.rgenegamma[0],data.rgenegamma[1]);
2614
printf("\nLocus %2d rgene %9.4f%9.4f %10.5f", locus+1, rold, data.rgene[locus], lnLd);
2616
if(lnacceptance>=0 || rndu()<exp(lnacceptance)) {
2619
data.lnpDi[locus] = lnpDinew;
2620
if(mcmc.usedata==1) AcceptRejectLocus(locus,1);
2621
if(debug==3) printf(" Y\n");
2624
data.rgene[locus] = rold;
2625
if(mcmc.usedata==1) AcceptRejectLocus(locus,0); /* reposition conP */
2627
if(debug==3) printf(" N\n");
2634
double UpdateParameters (double *lnL, double finetune)
2636
/* This updates parameters in the substitution model such as the ts/tv
2637
rate ratio for each locus.
2639
Should we update the birth-death process parameters here as well?
2642
int locus, j, ip, np=!com.fix_kappa+!com.fix_alpha;
2643
double naccept=0, lnLd,lnpDinew, lnacceptance, c=1;
2644
double yb[2]={-99,99},y,ynew, pold, pnew, *gammaprior;
2646
if(np==0) return(0);
2647
if(debug==4) puts("\nUpdateParameters");
2649
if(mcmc.saveconP) FOR(j,sptree.nspecies*2-1) com.oldconP[j]=0;
2650
for(locus=0; locus<data.ngene; locus++) {
2651
for(ip=0; ip<np; ip++) {
2652
if(ip==0 && !com.fix_kappa) { /* kappa */
2653
pold = data.kappa[locus];
2655
ynew = y + finetune*rnd2NormalSym(m2NormalSym);
2656
ynew = reflect(ynew, yb[0], yb[1]);
2657
data.kappa[locus] = pnew = exp(ynew);
2658
gammaprior = data.kappagamma;
2661
pold = data.alpha[locus];
2663
ynew = y + finetune*rnd2NormalSym(m2NormalSym);
2664
ynew = reflect(ynew, yb[0], yb[1]);
2665
data.alpha[locus] = pnew = exp(ynew);
2666
gammaprior = data.alphagamma;
2668
lnacceptance = ynew - y;
2670
UseLocus(locus, 1, mcmc.usedata, 0); /* this copies parameter from data.[] to com. */
2672
lnpDinew = lnpD_locus(locus);
2673
lnLd = lnpDinew - data.lnpDi[locus];
2674
lnacceptance += lnLd;
2675
lnacceptance += logPriorRatioGamma(pnew,pold,gammaprior[0],gammaprior[1]);
2678
printf("\nLocus %2d para%d %9.4f%9.4f %10.5f", locus+1,ip+1,pold,pnew,lnLd);
2680
if(lnacceptance >= 0 || rndu() < exp(lnacceptance)) {
2683
data.lnpDi[locus] = lnpDinew;
2684
if(mcmc.usedata==1) AcceptRejectLocus(locus,1);
2685
if(debug==4) printf(" Y\n");
2688
if(ip==0 && !com.fix_kappa)
2689
data.kappa[locus] = pold;
2691
data.alpha[locus] = pold;
2693
if(mcmc.usedata==1) AcceptRejectLocus(locus,0);
2695
if(debug==4) printf(" N\n");
2699
return(naccept/(data.ngene*np));
2703
double UpdateParaRates (double finetune, double space[])
2705
/* This updates mu (mean rate) and sigma2 (variance of lnrates) at each locus.
2706
The gamma hyperpriors are assumed to be the same across loci.
2707
The proposals in this routine do not change the likelihood.
2709
int i, ip, g=data.ngene;
2710
char *parastr[2]={"mu", "sigma2"};
2711
double lnacceptance, lnpRnew, naccept=0;
2712
double yb[2]={-99,99},y,ynew, pold, pnew, *gammaprior;
2714
if(com.clock==1) return(0);
2715
if(debug==5) puts("\nUpdateParaRates (rgene & sigma2)");
2717
for(i=0; i<g; i++) {
2718
for(ip=0; ip<2; ip++) { /* this loops through mu (rgene) and sigma2 */
2719
if(ip==0) { /* rgene (mu) */
2720
pold = data.rgene[i];
2722
ynew = y + finetune*rnd2NormalSym(m2NormalSym);
2723
ynew = reflect(ynew, yb[0], yb[1]);
2724
data.rgene[i] = pnew = exp(ynew);
2725
gammaprior = data.rgenegamma;
2728
pold = data.sigma2[i];
2730
ynew = y + finetune*rnd2NormalSym(m2NormalSym);
2731
ynew = reflect(ynew, yb[0], yb[1]);
2732
data.sigma2[i] = pnew = exp(ynew);
2733
gammaprior = data.sigma2gamma;
2736
lnacceptance = ynew - y;
2737
lnacceptance += logPriorRatioGamma(pnew,pold,gammaprior[0],gammaprior[1]);
2738
if(debug==5) printf("%-7s %2d %9.5f -> %9.5f ", parastr[ip], i, pold,pnew);
2740
lnpRnew = lnpriorRates();
2741
lnacceptance += lnpRnew-data.lnpR;
2743
if(lnacceptance>=0 || rndu()<exp(lnacceptance)) {
2745
data.lnpR = lnpRnew;
2748
if(ip==0) data.rgene[i] = pold;
2749
else data.sigma2[i] = pold;
2753
return(naccept/(g*2));
2758
double mixing (double finetune)
2760
/* If com.clock==1, this multiplies all times by c and divides all rates
2761
(mu or data.rgene[]) by c, so that the likelihood does not change.
2763
If com.clock=2 or 3, this multiplies all times by c and divide by c all rates:
2764
sptree.nodes[].rates and mu (data.rgene[]) at all loci. The likelihood is
2767
int i,j, ndivide = data.ngene;
2768
double naccept=0, a=data.rgenegamma[0], b=data.rgenegamma[1], c, lnc;
2769
double lnacceptance=0, lnpTnew, lnpRnew=-1;
2771
lnc = finetune*rnd2NormalSym(m2NormalSym);
2773
for(j=sptree.nspecies; j<sptree.nnode; j++)
2774
sptree.nodes[j].age *= c;
2776
for(j=0; j<data.ngene; j++) { /* mu */
2777
lnacceptance += (a-1)*(-lnc) - b*(data.rgene[j]/c-data.rgene[j]);
2780
if(com.clock>1) { /* rate-drift models */
2781
ndivide += data.ngene*(sptree.nspecies*2-2);
2782
for(i=0; i<sptree.nnode; i++)
2783
if(i != sptree.root)
2784
for(j=0; j<data.ngene; j++)
2785
sptree.nodes[i].rates[j] /= c;
2786
lnpRnew = lnpriorRates();
2787
lnacceptance += lnpRnew-data.lnpR;
2790
lnpTnew = lnpriorTimes();
2791
lnacceptance += lnpTnew-data.lnpT + ((sptree.nspecies-1) - ndivide)*lnc;
2793
if(lnacceptance>0 || rndu()<exp(lnacceptance)) { /* accept */
2795
data.lnpT = lnpTnew;
2796
data.lnpR = lnpRnew;
2799
for(j=sptree.nspecies; j<sptree.nnode; j++)
2800
sptree.nodes[j].age /= c;
2801
for(j=0; j<data.ngene; j++) data.rgene[j] *= c;
2803
for(i=0; i<sptree.nnode; i++)
2804
if(i != sptree.root)
2805
for(j=0; j<data.ngene; j++)
2806
sptree.nodes[i].rates[j] *= c;
2813
double UpdatePFossilErrors (double finetune)
2815
/* This updates the probability of fossil errors sptree.Pfossilerr.
2816
The proposal do not change the likelihood.
2818
double lnacceptance, lnpTnew, naccept=0, pold, pnew;
2819
double p = data.pfossilerror[0], q = data.pfossilerror[1];
2821
pold = sptree.Pfossilerr;
2822
pnew = pold + finetune*rnd2NormalSym(m2NormalSym);
2823
sptree.Pfossilerr = pnew = reflect(pnew,0,1);
2824
lnacceptance = (p-1)*log(pnew/pold) + (q-1)*log((1-pnew)/(1-pold));
2825
lnpTnew = lnpriorTimes();
2826
lnacceptance += lnpTnew-data.lnpT;
2828
if(lnacceptance>=0 || rndu()<exp(lnacceptance)) {
2830
data.lnpT = lnpTnew;
2833
sptree.Pfossilerr = pold;
2839
int DescriptiveStatisticsSimpleMCMCTREE (FILE *fout, char infile[], int nbin, int nrho)
2841
FILE *fin=gfopen(infile,"r");
2843
char *fmt=" %9.6f", *fmt1=" %9.1f", timestr[32];
2844
double *x, *mean, *median, *minx, *maxx, *x005,*x995,*x025,*x975,*x25,*x75;
2846
int lline=640000, ifields[MAXNFIELDS], Ignore1stColumn=1, ReadHeader=0;
2848
char varstr[MAXNFIELDS][32]={""};
2850
if((line=(char*)malloc(lline*sizeof(char)))==NULL) error2("oom ds");
2851
scanfile(fin, &n, &p, &ReadHeader, line, ifields);
2853
for(i=0; i<p; i++) sscanf(line+ifields[i], "%s", varstr[i]);
2855
x = (double*)malloc((p*13+p*p + p*nrho)*sizeof(double));
2856
if (x==NULL) { puts("did not get enough space."); exit(-1); }
2857
for(j=0;j<p*13+p*p + p*nrho; j++) x[j]=0;
2858
mean=x+p; median=mean+p; minx=median+p; maxx=minx+p;
2859
x005=maxx+p; x995=x005+p; x025=x995+p; x975=x025+p; x25=x975+p; x75=x25+p;
2861
for(i=0; i<n; i++) {
2862
for(j=0;j<p;j++) fscanf(fin,"%lf", &x[j]);
2863
for(j=Ignore1stColumn; j<p; j++) mean[j] += x[j]/n;
2866
y=(double*)malloc(n*sizeof(double));
2867
if(y==NULL) { printf("not enough mem for %d variables\n",n); exit(-1); }
2868
for(jj=Ignore1stColumn; jj<p; jj++) {
2870
if(ReadHeader) fgets(line, lline, fin);
2873
for(j=0; j<p; j++) fscanf(fin,"%lf", &x[j]);
2876
qsort(y, (size_t)n, sizeof(double), comparedouble);
2877
if(n%2==0) median[jj]=(y[n/2]+y[n/2+1])/2;
2878
else median[jj]=y[(n+1)/2];
2879
x005[jj] = y[(int)(n*.005)]; x995[jj] = y[(int)(n*.995)];
2880
x025[jj] = y[(int)(n*.025)]; x975[jj] = y[(int)(n*.975)];
2881
x25[jj] = y[(int)(n*.25)]; x75[jj] = y[(int)(n*.75)];
2882
printf("Summarizing... %d/%d%10s\r", jj+1, p, printtime(timestr));
2884
printf("%40s\n", "");
2887
for(j=sptree.nspecies; j<sptree.nnode; j++) {
2888
nodes[j].nodeStr = (char*)malloc(32*sizeof(char));
2889
jj = Ignore1stColumn+j-sptree.nspecies;
2890
sprintf(nodes[j].nodeStr, "%.3f-%.3f", x025[jj], x975[jj]);
2892
OutTreeN(F0,1,PrBranch|PrLabel);
2893
FPN(fout); OutTreeN(fout,1,PrBranch|PrLabel); FPN(fout);
2897
jj = Ignore1stColumn + sptree.nspecies - 1 + data.ngene * 2;
2898
for(i=0; i<data.ngene; i++) {
2899
fprintf(fout, "\nrategram locus %d:\n", i+1);
2900
for(j=0; j<sptree.nnode; j++) {
2901
if(j==sptree.root) continue;
2902
nodes[j].branch = mean[jj++];
2904
OutTreeN(fout,1,PrBranch); FPN(fout);
2909
printf("\n\nPosterior median mean & 95%% credibility interval\n\n");
2910
for (j=Ignore1stColumn; j<p; j++) {
2911
printf("%-10s", varstr[j]);
2912
printf("%7.4f %7.4f (%6.4f, %6.4f)", median[j], mean[j], x025[j],x975[j]);
2913
if(j<Ignore1stColumn + sptree.nspecies-1)
2914
printf(" (age of jeffnode %2d)", 2*sptree.nspecies-1-1-j+Ignore1stColumn);
2918
fprintf(fout, "\n\nPosterior median mean & 95%% credibility interval\n\n");
2919
for (j=Ignore1stColumn; j<p; j++) {
2920
fprintf(fout, "%-10s", varstr[j]);
2921
fprintf(fout, "%7.4f %7.4f (%6.4f, %6.4f)", median[j], mean[j], x025[j],x975[j]);
2922
if(j<sptree.nspecies-1+Ignore1stColumn)
2923
fprintf(fout, " (age of jeffnode %2d)", 2*sptree.nspecies-1-1-j+Ignore1stColumn);
2924
fprintf(fout, "\n");
2928
fclose(fin); free(x); free(y); free(line);
2929
for(j=sptree.nspecies; j<sptree.nnode; j++)
2930
free(nodes[j].nodeStr);
2935
int MCMC (FILE* fout)
2937
FILE *fmcmcout = NULL;
2938
int nsteps=4+(com.clock>1)+(data.pfossilerror[0]>0), nxpr[2]={6,2};
2939
int i,j,k, ir, g=data.ngene;
2940
double Paccept[6]={0}, lnL=0, nround=0, *x, *mx, *vx, *px, postEFossil[MaxNFossils]={0};
2945
if(mcmc.usedata!=1) mcmc.saveconP = 0;
2946
for(j=0; j<sptree.nspecies*2-1; j++)
2949
if(mcmc.usedata == 2)
2950
ReadBlengthGH(com.inBVf);
2951
if(mcmc.print) fmcmcout = gfopen(com.mcmcf,"w");
2953
printf("\n%d burnin, sampled every %d, %d samples\n",
2954
mcmc.burnin, mcmc.sampfreq, mcmc.nsample);
2955
if(mcmc.usedata) puts("Approximating posterior");
2956
else puts("Approximating prior");
2958
printf("(Settings: cleandata=%d print=%d saveconP=%d)\n",
2959
com.cleandata, mcmc.print, mcmc.saveconP);
2961
com.np = GetInitials();
2963
x=(double*)malloc(com.np*3*sizeof(double));
2964
if(x==NULL) error2("oom in mcmc");
2965
mx=x+com.np; vx=mx+com.np;
2968
printf("\aresetting times");
2971
double x0[]={1.1, 0.99, 0.88, 0.77, 0.55, 0.44, 0.22, 0.11};
2972
for(i=sptree.nspecies,k=0; i<sptree.nnode; i++)
2973
sptree.nodes[i].age=x0[k++];
2978
collectx(fmcmcout, x);
2980
if(!com.fix_kappa && !com.fix_alpha && data.ngene==2) { nxpr[0]=6; nxpr[1]=4; }
2983
if(mcmc.usedata==1) {
2984
if(!com.fix_kappa) printf("\tG(%.4f, %.4f) for kappa\n", data.kappagamma[0], data.kappagamma[1]);
2985
if(!com.fix_omega) printf("\tG(%.4f, %.4f) for omega\n", data.omegagamma[0], data.omegagamma[1]);
2986
if(!com.fix_alpha) printf("\tG(%.4f, %.4f) for alpha\n", data.alphagamma[0], data.alphagamma[1]);
2988
printf("\tG(%.4f, %.4f) for rgene\n", data.rgenegamma[0], data.rgenegamma[1]);
2989
if(com.clock>1) printf("\tG(%.4f, %.4f) for sigma2\n", data.sigma2gamma[0], data.sigma2gamma[1]);
2991
/* calculates prior for times and likelihood for each locus */
2992
if(data.pfossilerror[0])
2993
SetupPriorTimesFossilErrors();
2994
data.lnpT = lnpriorTimes();
2995
for(j=0; j<data.ngene; j++) com.rgene[j]=-1; /* com.rgene[] is not used. */
2997
data.lnpR = lnpriorRates();
2998
printf("\nInitial parameters (np = %d):\n", com.np);
2999
for(j=0;j<com.np;j++) printf(" %9.6f",x[j]); FPN(F0);
3000
lnL = lnpData(data.lnpDi);
3001
printf("\nlnL0 = %9.2f\n", lnL);
3003
printf("\nStarting MCMC (np = %d) . . .\n", com.np);
3004
printf("finetune steps (time rate mixing para RatePara?):");
3005
for(j=0; j<nsteps; j++)
3006
printf(" %7.4f",mcmc.finetune[j]);
3008
printf("\n paras: %d times, %d mu, (& kappa, alpha)\n",
3009
sptree.nspecies-1, data.ngene);
3011
printf("\n paras: %d times, %d mu, %d sigma2 (& rates, kappa, alpha)\n",
3012
sptree.nspecies-1, data.ngene, data.ngene);
3016
if(com.np<nxpr[0]+nxpr[1]) { nxpr[0]=com.np; nxpr[1]=0; }
3017
for(ir=-mcmc.burnin; ir<mcmc.sampfreq*mcmc.nsample; ir++) {
3018
if(ir==0) { /* reset after burnin */
3020
zero(Paccept,nsteps);
3024
if(mcmc.burnin>10) { /* reset finetune parameters. Do we want this? */
3025
for(j=0;j<nsteps;j++)
3026
if(Paccept[j]<.1 || Paccept[j]>.8) {
3027
mcmc.finetune[j] *= Paccept[j]/0.3;
3028
printf("finetune #%d reset to %.5f\n", j+1,mcmc.finetune[j]);
3035
Paccept[0] = (Paccept[0]*(nround-1) + UpdateTimes(&lnL, mcmc.finetune[0]))/nround;
3036
Paccept[1] = (Paccept[1]*(nround-1) + UpdateRates(&lnL, mcmc.finetune[1]))/nround;
3037
Paccept[2] = (Paccept[2]*(nround-1) + mixing(mcmc.finetune[2]))/nround;
3039
Paccept[3] = (Paccept[3]*(nround-1) + UpdateParameters(&lnL, mcmc.finetune[3]))/nround;
3041
Paccept[4] = (Paccept[4]*(nround-1) + UpdateParaRates(mcmc.finetune[4],com.space))/nround;
3043
if(data.pfossilerror[0])
3044
Paccept[5] = (Paccept[5]*(nround-1) + UpdatePFossilErrors(mcmc.finetune[5]))/nround;
3046
collectx(fmcmcout, x);
3048
for(j=0; j<com.np; j++) vx[j] += square(x[j]-mx[j]) * (nround-1.)/nround;
3049
for(j=0; j<com.np; j++) mx[j] = (mx[j]*(nround-1)+x[j])/nround;
3051
if(data.pfossilerror[0])
3052
getPfossilerr(postEFossil, nround);
3054
if(mcmc.print && ir>=0 && (ir==0 || (ir+1)%mcmc.sampfreq==0)) {
3055
fprintf(fmcmcout,"%d", ir+1);
3056
for(j=0;j<com.np; j++) fprintf(fmcmcout,"\t%.7f",x[j]);
3057
if(mcmc.usedata) fprintf(fmcmcout,"\t%.3f",lnL);
3059
if(rndu()<0.5) fflush(fout);
3061
if((ir+1)%max2(mcmc.sampfreq, mcmc.sampfreq*mcmc.nsample/10000)==0) {
3062
printf("\r%3.0f%%", (ir+1.)/(mcmc.nsample*mcmc.sampfreq)*100.);
3064
for(j=0; j<nsteps; j++)
3065
printf(" %4.2f", Paccept[j]);
3068
px = (ir >= 0 ? mx : x);
3071
FOR(j,nxpr[0]) printf(" %5.3f", px[j]);
3072
if(com.np>nxpr[0]+nxpr[1] && nxpr[1]) printf(" -");
3073
FOR(j,nxpr[1]) printf(" %5.3f", px[com.np-nxpr[1]+j]);
3074
if(mcmc.usedata) printf(" %4.1f",lnL);
3077
if(mcmc.sampfreq*mcmc.nsample>20 && (ir+1)%(mcmc.sampfreq*mcmc.nsample/20)==0) {
3078
printf(" %s\n", printtime(timestr));
3080
if(fabs(lnL-lnpData(data.lnpDi))>0.001) {
3081
printf("\n%12.6f = %12.6f?\n", lnL, lnpData(data.lnpDi));
3082
puts("lnL not right?");
3087
for(i=0; i<com.np; i++)
3088
vx[i] = sqrt(vx[i]/nround);
3089
if(mcmc.print) fclose(fmcmcout);
3091
printf("\nTime used: %s", printtime(timestr));
3092
fprintf(fout,"\nTime used: %s", printtime(timestr));
3094
fprintf(fout, "\n\nmean and S.D. of parameters using all iterations\n\n");
3095
fprintf(fout, "mean ");
3096
for(i=0; i<com.np; i++) fprintf(fout, " %9.5f", mx[i]);
3097
fprintf(fout, "\nS.D. ");
3098
for(i=0; i<com.np; i++) fprintf(fout, " %9.5f", sqrt(vx[i]));
3102
for(j=0; j<sptree.nspecies-1; j++)
3103
nodes[sptree.nspecies+j].age = mx[j];
3104
for(j=0; j<sptree.nnode; j++)
3106
nodes[j].branch = nodes[nodes[j].father].age - nodes[j].age;
3107
puts("\nSpecies tree for TreeView. Branch lengths = posterior mean times, 95% CIs = labels");
3108
FPN(F0); OutTreeN(F0,1,1); FPN(F0);
3109
fputs("\nSpecies tree for TreeView. Branch lengths = posterior mean times; 95% CIs = labels", fout);
3110
FPN(fout); OutTreeN(fout,1,PrNodeNum); FPN(fout);
3111
FPN(fout); OutTreeN(fout,1,1); FPN(fout);
3114
DescriptiveStatisticsSimpleMCMCTREE(fout, com.mcmcf, 1, 1);
3115
if(data.pfossilerror[0]) {
3116
free(sptree.CcomFossilErr);
3117
printf("\nPosterior probabilities that each fossil is in error.\n");
3118
for(i=k=0; i<sptree.nspecies*2-1; i++) {
3119
if((j=sptree.nodes[i].fossil) == 0) continue;
3120
printf("Node %2d: %s (%9.4f, %9.4f)", i+1,fossils[j],sptree.nodes[i].pfossil[0],sptree.nodes[i].pfossil[1]);
3121
printf(" %8.3f\n", postEFossil[k++]);
3127
printf("\ntime prior: %s\n", (data.priortime?"beta":"Birth-Death-Sampling"));
3128
printf("rate prior: %s\n", (data.priorrate?"gamma":"Log-Normal"));
3130
printf("%s transform is used in approx like calculation.\n", Btransform[data.transform]);
3131
printf("\nTime used: %s\n", printtime(timestr));