81
85
double mixing(double *lnL, double finetune);
82
86
double UpdatePFossilErrors(double finetune);
83
87
int getPfossilerr (double postEFossil[], double nround);
84
int DescriptiveStatisticsSimpleMCMCTREE (FILE *fout, char infile[], int nbin, int nrho);
88
int DescriptiveStatisticsSimpleMCMCTREE (FILE *fout, char infile[], int nbin);
86
90
struct CommonInfo {
87
char *z[NS], *spname[NS], seqf[256],outf[256],treef[256],daafile[96],mcmcf[96],inBVf[96];
92
char *spname[NS], seqf[256],outf[256],treef[256],daafile[96],mcmcf[96],inBVf[96];
88
93
char oldconP[NNODE]; /* update conP for node? (0 yes; 1 no) */
89
94
int seqtype, ns, ls, ngene, posG[2],lgene[1], *pose, npatt, readpattern;
90
95
int np, ncode, ntime, nrate, nrgene, nalpha, npi, ncatG, print;
915
925
int GetOptions (char *ctlf)
917
int iopt,i, nopt=28, lline=4096;
927
int iopt,i, nopt=29, lline=4096;
918
928
char line[4096],*pline, opt[32], *comment="*#";
919
929
char *optstr[] = {"seed", "seqfile","treefile", "outfile", "mcmcfile",
920
"seqtype", "aaRatefile", "icode", "usedata", "ndata", "model", "clock",
930
"seqtype", "aaRatefile", "icode", "noisy", "usedata", "ndata", "model", "clock",
921
931
"TipDate", "RootAge", "fossilerror", "alpha", "ncatG", "cleandata",
922
932
"BDparas", "kappa_gamma", "alpha_gamma",
923
933
"rgene_gamma", "sigma2_gamma", "print", "burnin", "sampfreq",
951
961
case ( 5): com.seqtype=(int)t; break;
952
962
case ( 6): sscanf(pline+2,"%s", com.daafile); break;
953
963
case ( 7): com.icode=(int)t; break;
954
case ( 8): sscanf(pline+1, "%d %s%d", &mcmc.usedata, com.inBVf, &data.transform);
964
case ( 8): noisy=(int)t; break;
965
case ( 9): sscanf(pline+1, "%d %s%d", &mcmc.usedata, com.inBVf, &data.transform);
955
966
if(mcmc.usedata==2 && strchr(com.inBVf, '*'))
956
967
strcpy(com.inBVf, "in.BV");
958
case ( 9): com.ndata=(int)t; break;
959
case (10): com.model=(int)t; break;
960
case (11): com.clock=(int)t; break;
969
case (10): com.ndata=(int)t; break;
970
case (11): com.model=(int)t; break;
971
case (12): com.clock=(int)t; break;
962
973
sscanf(pline+2,"%lf%lf", &com.TipDate, &com.TipDate_TimeUnit);
963
974
if(com.TipDate && com.TipDate_TimeUnit==0) error2("should set com.TipDate_TimeUnit");
975
data.transform = SQRT_B; /* SQRT_B, LOG_B, ARCSIN_B */
966
978
sptree.RootAge[2] = sptree.RootAge[3] = 0.025; /* default tail probs */
967
979
if((strchr(line, '>') || strchr(line, '<')) && (strstr(line, "U(") || strstr(line, "B(")))
968
980
error2("don't mix < U B on the RootAge line");
977
989
else if((pline=strstr(line, "B(")))
978
990
sscanf(pline+2, "%lf,%lf,%lf,%lf", &sptree.RootAge[0], &sptree.RootAge[1], &sptree.RootAge[2], &sptree.RootAge[3]);
981
993
data.pfossilerror[0] = 0.0;
982
data.pfossilerror[2] = 2; /* default: minimum 2 good fossils */
994
data.pfossilerror[2] = 1; /* default: minimum 2 good fossils */
983
995
sscanf(pline+1, "%lf%lf%lf", data.pfossilerror, data.pfossilerror+1, data.pfossilerror+2);
985
case (15): com.alpha=t; break;
986
case (16): com.ncatG=(int)t; break;
987
case (17): com.cleandata=(int)t; break;
997
case (16): com.alpha=t; break;
998
case (17): com.ncatG=(int)t; break;
999
case (18): com.cleandata=(int)t; break;
989
1001
sscanf(pline+1,"%lf%lf%lf%lf", &data.BDS[0],&data.BDS[1],&data.BDS[2],&data.BDS[3]);
992
1004
sscanf(pline+1,"%lf%lf", data.kappagamma, data.kappagamma+1); break;
994
1006
sscanf(pline+1,"%lf%lf", data.alphagamma, data.alphagamma+1); break;
996
1008
sscanf(pline+1,"%lf%lf", data.rgenegamma, data.rgenegamma+1); break;
998
1010
sscanf(pline+1,"%lf%lf", data.sigma2gamma, data.sigma2gamma+1); break;
999
case (23): mcmc.print=(int)t; break;
1000
case (24): mcmc.burnin=(int)t; break;
1001
case (25): mcmc.sampfreq=(int)t; break;
1002
case (26): mcmc.nsample=(int)t; break;
1011
case (24): mcmc.print=(int)t; break;
1012
case (25): mcmc.burnin=(int)t; break;
1013
case (26): mcmc.sampfreq=(int)t; break;
1014
case (27): mcmc.nsample=(int)t; break;
1004
1016
sscanf(pline+1,"%d:%lf%lf%lf%lf%lf%lf", &mcmc.resetFinetune, eps,eps+1,eps+2,eps+3,eps+4,eps+5);
1110
1127
for(j=s; j<sptree.nnode; j++)
1111
1128
x[i*(s+g)+(j-s)] = sptree.nodes[j].age;
1112
1129
for(j=0; j<g; j++)
1113
x[i*(s+g)+s-1+j]=data.rgene[j];
1115
printf("\nmean & 95%% CI for times\n\n");
1116
for(j=s; j<sptree.nnode; j++)
1117
printf("Node %2d: %9.5f (%9.5f, %9.5f)\n", j+1,x[j-s],x[(s+g)+j-s],x[2*(s+g)+j-s]);
1130
x[i*(s+g)+s-1+j] = data.rgene[j];
1132
printf("\nmean (95%% CI) CI-width for times\n\n");
1133
for(j=s; j<sptree.nnode; j++) {
1135
tU = x[2*(s+g)+j-s];
1136
printf("Node %2d: %9.6f (%9.6f, %9.6f) %9.6f\n", j+1, x[j-s], tL, tU, tU-tL);
1118
1138
printf("\nmean & 95%% CI for rates\n\n");
1119
1139
for(j=0; j<g; j++)
1120
printf("gene %2d: %9.5f (%9.5f, %9.5f)\n", j+1,x[s-1+j], x[2*(s+g)+s-1+j], x[(s+g)+s-1+j]);
1140
printf("gene %2d: %9.6f (%9.6f, %9.6f)\n", j+1,x[s-1+j], x[2*(s+g)+s-1+j], x[(s+g)+s-1+j]);
1122
1142
printf("\nNote: the posterior has only one dimension.\n");
1309
1329
yb[1] = FixedDs[(ip-(s-1))*sptree.nnode+sons[0]]/y;
1310
1330
y = sptree.nodes[sons[0]].rates[ip-(s-1)];
1311
ynew = y + e[1]*rnd2NormalSym(m2NormalSym);
1331
ynew = y + e[1]*rndBactrian(mBactrian);
1312
1332
ynew = sptree.nodes[sons[0]].rates[ip-(s-1)] = reflect(ynew,yb[0],yb[1]);
1314
1334
else if (ip-(s-1+g)<g) { /* mu for loci */
1315
lnacceptance = e[3]*rnd2NormalSym(m2NormalSym);
1335
lnacceptance = e[3]*rndBactrian(mBactrian);
1316
1336
c=exp(lnacceptance);
1317
1337
y = data.rgene[ip-(s-1+g)];
1318
1338
ynew = data.rgene[ip-(s-1+g)] *= c;
1319
lnacceptance+=logPriorRatioGamma(ynew, y, data.rgenegamma[0], data.rgenegamma[1]);
1339
lnacceptance += logPriorRatioGamma(ynew, y, data.rgenegamma[0], data.rgenegamma[1]);
1321
1341
else { /* sigma2 for loci */
1322
lnacceptance = e[3]*rnd2NormalSym(m2NormalSym);
1342
lnacceptance = e[3]*rndBactrian(mBactrian);
1323
1343
c=exp(lnacceptance);
1324
1344
y = data.sigma2[ip-(s-1+g*2)];
1325
1345
ynew = data.sigma2[ip-(s-1+g*2)] *= c;
1326
lnacceptance+=logPriorRatioGamma(ynew, y, data.sigma2gamma[0], data.sigma2gamma[1]);
1346
lnacceptance += logPriorRatioGamma(ynew, y, data.sigma2gamma[0], data.sigma2gamma[1]);
1329
1349
lnLnew = lnpInfinitesites(FixedDs);
1443
1465
possible, even though this means that the chain might start from a poor
1446
int np=0, i,j,k, dad, nchanges, g=data.ngene;
1468
int np=0, i,j,k, jj, dad, nchanges, g=data.ngene;
1447
1469
double maxlower=0; /* rough age for root */
1448
double *p=sptree.nodes[sptree.root].pfossil, smallt=(p[1]-p[0])/sptree.nspecies*2;
1470
double *p=sptree.nodes[sptree.root].pfossil, smallt=(p[1]-p[0])/(40*sqrt(sptree.nspecies));
1449
1471
double a_r=data.rgenegamma[0], b_r=data.rgenegamma[1], a,b, smallr=1e-3, d;
1472
double AgeLow[NS]={0}, tz;
1451
1474
com.rgene[0]=-1; /* com.rgene[] is not used. -1 to force crash */
1452
1475
puts("\ngetting initial values to start MCMC.");
1454
/* set up rough time unit by looking at the fossil info */
1455
1477
if(com.TipDate) { /* TipDate model */
1478
/* set up initial node ages by looking at the minimum ages at each node */
1479
if(sptree.nodes[sptree.root].fossil == BOUND_F)
1480
sptree.nodes[sptree.root].age = p[0] + (p[1]-p[0])*rndu();
1482
error2("\nthere should be something on the root age for the TipDate model\n");
1484
for(j=0; j<sptree.nspecies; j++) {
1485
tz = sptree.nodes[j].age;
1486
for(k=sptree.nodes[j].father; k!=-1; k=sptree.nodes[k].father)
1487
if(tz < AgeLow[k-sptree.nspecies])
1490
AgeLow[k-sptree.nspecies] = tz;
1492
for(j=sptree.nspecies+1; j<sptree.nnode; j++) {
1493
jj = j-sptree.nspecies;
1494
sptree.nodes[j].age = AgeLow[jj] + (sptree.nodes[sptree.nodes[j].father].age - AgeLow[jj])*(0.5+0.5*rndu());
1456
1497
for(i=0; i<1000; i++) {
1457
1498
for(j=0,nchanges=0; j<sptree.nspecies; j++) {
1458
1499
for(k=j; k!=sptree.root; k=dad) {
1459
1500
dad = sptree.nodes[k].father;
1460
1501
if(sptree.nodes[dad].age <= sptree.nodes[k].age) {
1461
sptree.nodes[dad].age = max2(sptree.nodes[k].age, smallt) * (1 + smallt*2*rndu());
1502
sptree.nodes[dad].age = max2(sptree.nodes[k].age, smallt) * (1 + smallt*0.1*rndu());
1507
if(nchanges) puts("nchanges should be 0 here??");
1466
1508
if(!nchanges) break;
1468
1510
if(sptree.nodes[sptree.root].fossil == BOUND_F) {
1469
1511
if(sptree.nodes[sptree.root].age<p[0]) {
1470
sptree.nodes[sptree.root].age = p[0] + (p[1]-p[0])*(0.2+rndu()*1.6);
1512
sptree.nodes[sptree.root].age = p[0] + (p[1]-p[0])*rndu();
1474
1517
else { /* not TipDate model */
1518
/* set up initial node ages by looking at the fossil info */
1475
1519
for(j=sptree.nspecies; j<sptree.nnode; j++) {
1476
1520
sptree.nodes[j].age = -1;
1477
1521
if(sptree.nodes[j].fossil == 0) continue;
1742
1791
r = (e*(1-c2) - (1+c2)) / (e*(1-c2) + (1+c2));
1744
1793
*p0t = (lambda + mu + psi + c1*r) / (2*lambda);
1745
qt = 2*(1-c2*c2) + e*square(1-c2) + square(1+c2)/e;
1795
logqt += log( e*e*square(1-c2) + e*2*(1-c2*c2) + square(1+c2) );
1747
/* printf("lmrp %8.6f %8.6f %7.4f %7.4f t=%7.4f p0qt= %7.4f %7.4f\n", lambda, mu, rho, psi, t, *p0t, qt); */
1799
printf("lmrp %8.4f %8.4f %7.4f %7.4f t=%7.4f p0 logqt= %7.4f %7.4e\n", lambda, mu, rho, psi, t, *p0t, logqt);
1753
double lnpriorTimesTipDate (void)
1804
double lnpriorTimesTipDate_Approach2 (void)
1755
/* Equation 6 in Stadler T. 2010. Sampling-through-time in birth-death trees.
1756
J Theor Biol 267:396-404.
1806
/* Approach 2 of Stadler & Yang (2013 Syst Biol). The BDS parameters are assumed to be fixed.
1807
This ignores terms involving y, which are constants when the BDSS parameters are fixed.
1757
1808
t1 is root age.
1759
1810
int i, m=0, n=sptree.nspecies, k=0;
1760
1811
double lambda=data.BDS[0], mu=data.BDS[1], rho=data.BDS[2], psi=data.BDS[3];
1761
double lnp=0, t, t1=sptree.nodes[sptree.root].age, p0, p1, qt, qt1, z, e;
1762
double a, b, tailL, tailR, thetaL, thetaR;
1812
double lnp=0, t1=sptree.nodes[sptree.root].age, p0, p1, logqt, logqt1, p0t1, z, e;
1764
1814
if(com.ndata>1 && com.TipDate)
1765
1815
error2("don't know how ndata works with TipDate.");
1778
1828
if(rho>0 && n<2)
1779
1829
error2("we want more than 2 extant sampled species");
1782
qt1 = p0qt_BDS(t1, lambda, mu, rho, psi, &p0);
1831
/* the numerator f(x, z | L(tmrca) = 2). */
1832
logqt1 = logqtp0_BDS_Approach2(t1, lambda, mu, rho, psi, &p0t1);
1784
1834
for(i=sptree.nspecies; i<sptree.nnode; i++) { /* loop over n+m-1 internal node ages except t1 */
1785
1835
if(i == sptree.root) continue;
1786
qt = p0qt_BDS(sptree.nodes[i].age, lambda, mu, rho, psi, &p0);
1836
logqt = logqtp0_BDS_Approach2(sptree.nodes[i].age, lambda, mu, rho, psi, &p0);
1790
/* the denominator */
1840
/* the denominator, h(tmrca, n) */
1792
1842
if(fabs(lambda-mu-psi) > 1e-20) {
1793
e = exp(-(lambda - mu - psi)*t1);
1843
e = exp(-(lambda - mu - psi)*t1); /* this causes overflow for large t */
1794
1844
z = lambda*rho + (lambda - lambda*rho - mu - psi)*e;
1795
1845
p1 = rho*square(lambda - mu - psi)*e/(z*z);
1797
printf("p1 = %.6f <=0\n", p1);
1847
printf("lnpriorTimesTipDate: p1 = %.6f <=0 (t1 = %9.5g)\n", p1, t1);
1798
1848
lnp -= log(p1*p1);
1800
1850
z = rho*lambda*(1 - e)/z;
1812
1862
else { /* rho = 0 for viruses */
1813
p0qt_BDS(t1, lambda, mu, rho, psi, &p0);
1819
if(sptree.nodes[sptree.nspecies].fossil != BOUND_F)
1820
error2("Only bounds for the root age are implemented.");
1821
lnp += lnptCalibrationDensity(t1, sptree.nodes[sptree.nspecies].fossil, sptree.nodes[sptree.nspecies].pfossil);
1863
lnp -= 2*log(1 - p0t1);
1867
if(sptree.nodes[sptree.nspecies].fossil != BOUND_F)
1868
error2("Only bounds for the root age are implemented.");
1869
lnp += lnptCalibrationDensity(t1, sptree.nodes[sptree.nspecies].fossil, sptree.nodes[sptree.nspecies].pfossil);
1874
double p01t_BDSEquation6Stadler2010 (double t, double lambda, double mu, double rho, double psi, double *p0t)
1876
/* This calculates p0t and p1t, defined in equations 1 & 2 in Stadler (2010).
1878
double c1, c2, e, r, p1t;
1880
if(fabs(lambda-mu)<1e-20 && psi==0) {
1881
r = 1/(1/rho + lambda*t);
1885
else if(fabs(lambda-mu)>1e-20 && psi==0) {
1886
e = exp((mu - lambda)*t);
1887
r = rho*(lambda-mu) / (rho*lambda + (lambda-mu-rho*lambda)*e);
1892
c1 = sqrt(square(lambda - mu - psi) + 4*lambda*psi);
1893
if(c1==0) error2("c1==0");
1894
c2 = -(lambda - mu - 2*lambda*rho - psi)/c1;
1896
r = (e*(1-c2) - (1+c2)) / (e*(1-c2) + (1+c2));
1898
*p0t = (lambda + mu + psi + c1*r) / (2*lambda);
1899
p1t = 4*rho/( 2*(1-c2*c2) + e*square(1-c2) + square(1+c2)/e );
1902
/* printf("lmrp %8.6f %8.6f %7.4f %7.4f t=%7.4f p01= %7.4f %7.4f\n", lambda, mu, rho, psi, t, *p0t, p1t); */
1907
double lnpriorTimesTipDateEquation6Stadler2010 (void)
1909
/* Equation 6 in Stadler T. 2010. Sampling-through-time in birth-death trees.
1910
J Theor Biol 267:396-404.
1914
double lambda=data.BDS[0], mu=data.BDS[1], rho=data.BDS[2], psi=data.BDS[3];
1915
double lnp=0, t, t1=sptree.nodes[sptree.root].age, p0, p1, z, e;
1916
double a, b, tailL, tailR, thetaL, thetaR;
1918
if(com.ndata>1 && com.TipDate)
1919
error2("don't know how ndata works with TipDate.");
1921
if(lambda<=0 || mu<=0 || (rho<=0 && psi<=0))
1922
error2("wrong B-D-S parameters..");
1923
for(i=0,m=0; i<sptree.nspecies; i++)
1924
m += (sptree.nodes[i].age > 0);
1925
n = sptree.nspecies - m;
1928
if(fabs(lambda-mu)<1e-20)
1929
z = 1 + 1/(rho*lambda*t1);
1931
e = exp((mu-lambda)*t1);
1932
z = (lambda*rho + (lambda-mu-lambda*rho)*e) / (rho*lambda*(1-e));
1934
lnp = (n-2)*log(z*z);
1937
p1 = p01t_BDSEquation6Stadler2010(t1, lambda, mu, rho, 0, &p0);
1938
lnp -= log((n-1)*p1*p1);
1940
/* this term is constant when the BDS parameters are fixed.
1941
lnp += (n+m-2)*log(lambda) + (k+m)*log(psi);
1944
p1 = p01t_BDSEquation6Stadler2010(t1, lambda, mu, rho, psi, &p0);
1946
for(i=sptree.nspecies; i<sptree.nnode; i++) /* loop over n+m-1 internal node ages except t1 */
1947
if(i != sptree.root)
1948
lnp += log( p01t_BDSEquation6Stadler2010(sptree.nodes[i].age, lambda, mu, rho, psi, &p0) );
1950
/* the y terms do not change when the BDS parameters are fixed.
1951
for(i=0; i<sptree.nspecies; i++) {
1952
if( (t=sptree.nodes[i].age) ) {
1953
p1 = p01t_BDSEquation6Stadler2010(t, lambda, mu, rho, psi, &p0);
1960
if(sptree.nodes[sptree.nspecies].fossil != BOUND_F)
1961
error2("Only bounds for the root age are implemented.");
1962
lnp += lnptCalibrationDensity(t1, sptree.nodes[sptree.nspecies].fossil, sptree.nodes[sptree.nspecies].pfossil);
2293
2451
/* This prepares for lnpriorTimes() under models of fossil errors. It calculates
2294
2452
the scaling factor for the probability density of times for a given combination
2295
2453
of the indicator variables, indicating which fossil is in error and not used.
2296
The combination in which all fossils are wrong is excluded.
2454
nMinCorrect is the minimum number of correct fossils.
2298
int is,i, icom, nused=0, it;
2299
int nMinCorrect = (int)data.pfossilerror[2];
2300
int ncomFerr = (1<<sptree.nfossil) - 1 - (nMinCorrect==2?sptree.nfossil:0);
2456
int nMinCorrect = (int)data.pfossilerror[2], ncomFerr, is,i, icom, nused=0, it;
2303
if(data.pfossilerror[0]==0 || sptree.nfossil>MaxNFossils || sptree.nfossil<2)
2460
if(data.pfossilerror[0]==0 || sptree.nfossil>MaxNFossils || sptree.nfossil<nMinCorrect)
2304
2461
error2("Fossil-error model is inappropriate.");
2306
sptree.CcomFossilErr = (double*)malloc(ncomFerr*sizeof(double));
2307
if(sptree.CcomFossilErr == NULL) error2("oom for CcomFossilErr");
2463
for (i=nMinCorrect,ncomFerr=0; i<=sptree.nfossil; i++) {
2464
ncomFerr += (int)( Binomial((double)sptree.nfossil, i, &t) + .1 );
2465
if(t!=0) error2("too many fossils to deal with? ");
2468
data.CcomFossilErr = (double*)malloc(ncomFerr*sizeof(double));
2469
if(data.CcomFossilErr == NULL) error2("oom for CcomFossilErr");
2309
2471
/* cycle through combinations of indicators. */
2310
for (i=0,icom=0; i < (1<<sptree.nfossil)-1; i++) {
2472
for (i=0,icom=0; i < (1<<sptree.nfossil); i++) {
2312
2474
for (is=sptree.nspecies, nused=0; is<sptree.nnode; is++) {
2313
2475
if(sptree.nodes[is].fossil) {
2317
2479
if(debug) printf("%2d (%2d)", sptree.nodes[is].usefossil, is+1);
2320
if(nMinCorrect==2 && nused<2) continue;
2322
if(debug) printf("\n\n********* Com %2d/%2d: %2d fossils used", icom, ncomFerr, nused);
2323
sptree.CcomFossilErr[icom-1] = getScaleFossilCombination();
2482
if(nused<nMinCorrect) continue;
2483
if(debug) printf("\n\n********* Combination %2d/%2d: %2d fossils used", icom+1, ncomFerr, nused);
2484
data.CcomFossilErr[icom++] = log( getScaleFossilCombination() );
2329
2491
double lnpriorTimes (void)
2331
/* This calculates the prior density of node times in the master species tree:
2493
/* This calculates the prior density of node times in the master species tree.
2494
Node ages are in sptree.nodes[].age.
2334
2496
int nMinCorrect = (int)data.pfossilerror[2];
2335
int ncomFerr = (1<<sptree.nfossil) - 1 - (nMinCorrect==2?sptree.nfossil:0);
2336
2497
int is, i,icom, nused, it;
2337
double pE = sptree.Pfossilerr, ln1pE=log(1-pow(pE,(double)sptree.nfossil));
2498
double pE = data.Pfossilerr, lnpE, ln1pE, pEconst=0, lnC, lnNC;
2338
2499
double lnpt=0, scaleF=-1e300, y;
2340
2502
if(sptree.nfossil==0 || com.TipDate) {
2342
lnpt = lnpriorTimesBDS_Approach2();
2344
lnpt = lnpriorTimesTipDate();
2347
else if(data.pfossilerror[0]==0) /* no fossil errors in model */
2348
lnpt = lnptC() + lnptNCgiventC();
2503
if(1) lnpt = lnpriorTimesBDS_Approach1();
2504
else if(0) lnpt = lnpriorTimesTipDate_Approach2();
2505
else if(0) lnpt = lnpriorTimesTipDateEquation6Stadler2010();
2507
else if(data.pfossilerror[0]==0) { /* no fossil errors in model */
2509
lnNC = lnptNCgiventC();
2512
if(debug) printf("\nftC ftNC ft = %9.5f%9.5f%9.5f", exp(lnC), exp(lnNC), exp(lnC)*exp(lnNC));
2349
2514
else { /* fossil errors: cycle through combinations of indicators, using nMinCorrect. */
2350
for(i=0,icom=0; i < (1<<sptree.nfossil) - 1; i++) {
2516
pEconst = y = pow(pE, (double)sptree.nfossil);
2517
if(y<1e-300) error2("many fossils & small pE. Rewrite the code?");
2518
for(i=1; i<nMinCorrect; i++) /* i is the number of correct fossils used */
2519
pEconst += y *= (sptree.nfossil-i+1)*(1-pE)/(i*pE);
2520
pEconst = log(1 - pEconst);
2522
lnpE=log(pE); ln1pE=log(1-pE);
2523
for(i=0,icom=0; i < (1<<sptree.nfossil); i++) { /* sum over U */
2352
2525
for(is=sptree.nspecies, nused=0; is<sptree.nnode; is++) {
2353
2526
if(sptree.nodes[is].fossil) {
2354
2527
sptree.nodes[is].usefossil = 1 - it%2;
2355
2528
nused += sptree.nodes[is].usefossil;
2359
if(nMinCorrect==2 && nused<2) continue;
2362
y = nused*log(1-pE)+(sptree.nfossil-nused)*log(pE)-ln1pE
2363
- log(sptree.CcomFossilErr[icom-1]) + lnptC() + lnptNCgiventC();
2532
if(nused<nMinCorrect) continue;
2534
lnNC = lnptNCgiventC();
2536
y = nused*ln1pE + (sptree.nfossil-nused)*lnpE - pEconst - data.CcomFossilErr[icom++]
2541
printf("\nU%d%d ftC ftNC ft = %9.5f%9.5f%9.5f", sptree.nodes[5].usefossil, sptree.nodes[6].usefossil,
2542
exp(lnC), exp(lnNC), exp(lnC)*exp(lnNC));
2365
2544
if(y < scaleF + 100)
2366
2545
lnpt += exp(y-scaleF);
2368
lnpt = lnpt*exp(scaleF-y) + 1;
2371
2550
lnpt += exp(y-scaleF);
2373
2552
lnpt = scaleF + log(lnpt);
2382
2562
for each fossil given the times and pE.
2384
2564
int nMinCorrect = (int)data.pfossilerror[2];
2385
int ncomFerr = (1<<sptree.nfossil) - 1 - (nMinCorrect==2?sptree.nfossil:0);
2386
2565
int is,i,icom, k, nf=sptree.nfossil, nused, it;
2387
double pE = sptree.Pfossilerr, ln1pE=log(1-pow(pE,(double)nf));
2388
double pt, pEf[MaxNFossils]={0}, scaleF=-1e300, y;
2389
char Ef[MaxNFossils]; /* indicators of fossil errors */
2566
double pE = data.Pfossilerr, lnpE, ln1pE, pEconst=0;
2567
double pt, pU[MaxNFossils]={0}, scaleF=-1e300, y;
2568
char U[MaxNFossils]; /* indicators of fossil errors */
2391
2570
if(data.priortime==1)
2392
2571
error2("beta kernel for prior time not yet implemented for model of fossil errors.");
2573
pEconst = y = pow(pE, (double)sptree.nfossil);
2574
for(i=1; i<nMinCorrect; i++) /* i is the number of correct fossils used */
2575
pEconst += y *= (sptree.nfossil-i+1)*(1-pE)/(i*pE);
2576
pEconst = log(1 - pEconst);
2394
for(i=0,icom=0,pt=0; i < (1<<sptree.nfossil) - 1; i++) {
2579
lnpE=log(pE); ln1pE=log(1-pE);
2580
for(i=0,icom=0,pt=0; i < (1<<sptree.nfossil); i++) { /* sum over U */
2396
2582
for(is=sptree.nspecies, k=0, nused=0; is<sptree.nnode; is++) {
2397
2583
if(sptree.nodes[is].fossil) {
2398
2584
sptree.nodes[is].usefossil = 1 - it%2;
2399
2585
nused += sptree.nodes[is].usefossil;
2400
Ef[k++] = !sptree.nodes[is].usefossil;
2586
U[k++] = !sptree.nodes[is].usefossil;
2404
if(nMinCorrect==2 && nused<2) continue;
2406
if(k != nf) error2("k == nf?");
2408
y = nused*log(1-pE)+(nf-nused)*log(pE)-ln1pE
2409
- log(sptree.CcomFossilErr[icom-1]) + lnptC() + lnptNCgiventC();
2411
if(y < scaleF + 200) {
2590
if(nused<nMinCorrect) continue;
2591
if(k != nf) error2("k != nf in getPfossilerr()?");
2593
y = nused*ln1pE+(nf-nused)*lnpE - pEconst - data.CcomFossilErr[icom++] + lnptC() + lnptNCgiventC();
2595
if(y < scaleF + 100) {
2412
2596
pt += y = exp(y-scaleF);
2598
if(U[k]) pU[k] += y;
2414
2600
else { /* change scaleF */
2415
pt = pt*exp(scaleF-y) + 1;
2417
pEf[k] *= exp(scaleF-y);
2603
for(k=0; k<nf; k++) pU[k] = U[k]; /* 1 if U[k]=1 or 0 if U[k]=0 */
2422
if(Ef[k]) pEf[k] += y;
2428
postEFossil[k] = (postEFossil[k]*(nround-1) + pEf[k])/nround;
2606
for(k=0; k<nf; k++) pU[k] /= pt;
2608
postEFossil[k] = (postEFossil[k]*(nround-1) + pU[k])/nround;
2678
2859
clock=3: the root rate is mu or data.rgene[]. The algorithm cycles through
2679
2860
the ancestral nodes and deals with the two daughter branches.
2681
int i, inode, dad=-1, g=data.ngene, s=sptree.nspecies, sons[2], root=sptree.root;
2862
int i, inode, locus, dad=-1, g=data.ngene, s=sptree.nspecies, sons[2];
2682
2863
double lnpR=-log(2*Pi)/2.0*(2*s-2)*g, t,tA,t1,t2,Tinv[4], detT;
2683
2864
double zz, r=-1, rA,r1,r2, y1,y2;
2686
if(data.priorrate==1 && com.clock==3)
2867
if(com.clock==3 && data.priorrate==1)
2687
2868
error2("gamma prior for rates for clock3 not implemented yet.");
2688
else if(data.priorrate==1 && com.clock==2) { /* gamma rate prior, clock2 */
2869
else if(com.clock==2 && data.priorrate==1) { /* clock2, gamma rate prior */
2690
for(i=0; i<g; i++) {
2691
alpha = data.rgene[i]*data.rgene[i]/data.sigma2[i];
2692
beta = data.rgene[i]/data.sigma2[i];
2693
lnpR += (alpha*log(beta) - LnGamma(alpha)) * (2*s-2);
2871
for(locus=0; locus<g; locus++) {
2872
a = data.rgene[locus]*data.rgene[locus]/data.sigma2[locus];
2873
b = data.rgene[locus]/data.sigma2[locus];
2874
lnpR += (a*log(b) - LnGamma(a)) * (2*s-2);
2694
2875
for(inode=0; inode<sptree.nnode; inode++) {
2695
if(inode==root) continue;
2696
r = sptree.nodes[inode].rates[i];
2697
lnpR += -beta*r + (alpha-1)*log(r);
2876
if(inode==sptree.root) continue;
2877
r = sptree.nodes[inode].rates[locus];
2878
lnpR += -b*r + (a-1)*log(r);
2701
else if(data.priorrate ==0 && com.clock==2) { /* LN rate prior, clock2 */
2703
lnpR -= log(data.sigma2[i])/2.*(2*s-2);
2882
else if(com.clock==2 && data.priorrate ==0) { /* clock2, LN rate prior */
2883
for(locus=0; locus<g; locus++)
2884
lnpR -= log(data.sigma2[locus])/2.*(2*s-2);
2704
2885
for(inode=0; inode<sptree.nnode; inode++) {
2705
if(inode==root) continue;
2706
for(i=0; i<g; i++) {
2707
r = sptree.nodes[inode].rates[i];
2708
zz = log(r/data.rgene[i]) + data.sigma2[i]/2;
2709
lnpR += -zz*zz/(2*data.sigma2[i]) - log(r);
2886
if(inode==sptree.root) continue;
2887
for(locus=0; locus<g; locus++) {
2888
r = sptree.nodes[inode].rates[locus];
2889
zz = log(r/data.rgene[locus]) + data.sigma2[locus]/2;
2890
lnpR += -zz*zz/(2*data.sigma2[locus]) - log(r);
2713
else if(data.priorrate ==0 && com.clock==3) { /* LN rate prior, clock3 */
2894
else if(com.clock==3 && data.priorrate ==0) { /* clock3, LN rate prior */
2714
2895
for(inode=0; inode<sptree.nnode; inode++) {
2715
2896
if(sptree.nodes[inode].nson==0) continue; /* skip the tips */
2716
2897
dad = sptree.nodes[inode].father;
2717
2898
for(i=0; i<2; i++) sons[i] = sptree.nodes[inode].sons[i];
2718
2899
t = sptree.nodes[inode].age;
2719
if(inode==root) tA = 0;
2720
else tA = (sptree.nodes[dad].age - t)/2;
2900
if(inode==sptree.root) tA = 0;
2901
else tA = (sptree.nodes[dad].age - t)/2;
2721
2902
t1 = (t-sptree.nodes[sons[0]].age)/2;
2722
2903
t2 = (t-sptree.nodes[sons[1]].age)/2;
2723
2904
detT = t1*t2+tA*(t1+t2);
2724
2905
Tinv[0] = (tA+t2)/detT;
2725
2906
Tinv[1] = Tinv[2] = -tA/detT;
2726
2907
Tinv[3] = (tA+t1)/detT;
2727
for(i=0; i<g; i++) {
2728
rA = (inode==root||dad==root ? data.rgene[i] : sptree.nodes[dad].rates[i]);
2729
r1 = sptree.nodes[sons[0]].rates[i];
2730
r2 = sptree.nodes[sons[1]].rates[i];
2731
y1 = log(r1/rA)+(tA+t1)*data.sigma2[i]/2;
2732
y2 = log(r2/rA)+(tA+t2)*data.sigma2[i]/2;
2908
for(locus=0; locus<g; locus++) {
2909
rA = (inode==sptree.root ? data.rgene[locus] : sptree.nodes[inode].rates[locus]);
2910
r1 = sptree.nodes[sons[0]].rates[locus];
2911
r2 = sptree.nodes[sons[1]].rates[locus];
2912
y1 = log(r1/rA)+(tA+t1)*data.sigma2[locus]/2;
2913
y2 = log(r2/rA)+(tA+t2)*data.sigma2[locus]/2;
2733
2914
zz = (y1*y1*Tinv[0]+2*y1*y2*Tinv[1]+y2*y2*Tinv[3]);
2734
lnpR -= zz/(2*data.sigma2[i]) + log(detT*square(data.sigma2[i]))/2 + log(r1*r2);
2915
lnpR -= zz/(2*data.sigma2[locus]) + log(detT*square(data.sigma2[locus]))/2 + log(r1*r2);
2922
double lnpriorRatioRates (int locus, int inodeChanged, double rold)
2924
/* This calculates the lnpriorRatio when one rate is changed.
2925
If inodeChanged is tip, we sum over 1 term (equation 7 in RY2007).
2926
If inodeChanged is not tip, we sum over 2 terms.
2928
double rnew=sptree.nodes[inodeChanged].rates[locus], lnpRd=0, a, b, z, znew;
2929
double zz, r=-1, rA,r1,r2, y1,y2, t,tA,t1,t2,Tinv[4], detT;
2930
int i, inode, ir, dad=-1, sons[2], OldNew;
2932
if(com.clock==2 && data.priorrate==0) { /* clock2, LN rate prior */
2933
z = log(rold/data.rgene[locus]) + data.sigma2[locus]/2;;
2934
znew = log(rnew/data.rgene[locus]) + data.sigma2[locus]/2;
2935
lnpRd = -log(rnew/rold) - (znew*znew - z*z)/(2*data.sigma2[locus]);
2937
else if (com.clock==2 && data.priorrate==1) { /* clock2, gamma rate prior */
2938
a = data.rgene[locus]*data.rgene[locus]/data.sigma2[locus];
2939
b = data.rgene[locus]/data.sigma2[locus];
2940
lnpRd = -b*(rnew-rold) + (a-1)*log(rnew/rold);
2943
for(ir=0; ir<(sptree.nodes[inodeChanged].nson==0 ? 1 : 2); ir++) {
2944
inode = (ir==0 ? sptree.nodes[inodeChanged].father : inodeChanged);
2945
dad = sptree.nodes[inode].father;
2946
for(i=0; i<2; i++) sons[i] = sptree.nodes[inode].sons[i];
2947
t = sptree.nodes[inode].age;
2948
if(inode==sptree.root) tA = 0;
2949
else tA = (sptree.nodes[dad].age - t)/2;
2950
t1 = (t-sptree.nodes[sons[0]].age)/2;
2951
t2 = (t-sptree.nodes[sons[1]].age)/2;
2952
detT = t1*t2+tA*(t1+t2);
2953
Tinv[0] = (tA+t2)/detT;
2954
Tinv[1] = Tinv[2] = -tA/detT;
2955
Tinv[3] = (tA+t1)/detT;
2956
for(OldNew=0; OldNew<2; OldNew++) { /* old rate & new rate */
2957
sptree.nodes[inodeChanged].rates[locus] = (OldNew==0 ? rold : rnew);
2958
rA = (inode==sptree.root ? data.rgene[locus] : sptree.nodes[inode].rates[locus]);
2959
r1 = sptree.nodes[sons[0]].rates[locus];
2960
r2 = sptree.nodes[sons[1]].rates[locus];
2961
y1 = log(r1/rA)+(tA+t1)*data.sigma2[locus]/2;
2962
y2 = log(r2/rA)+(tA+t2)*data.sigma2[locus]/2;
2963
zz = (y1*y1*Tinv[0]+2*y1*y2*Tinv[1]+y2*y2*Tinv[3]);
2964
zz = zz/(2*data.sigma2[locus]) + log(r1*r2);
2965
lnpRd -= (OldNew==0 ? -1 : 1) * zz;
2741
2973
double UpdateRates (double *lnL, double finetune)
2743
2975
/* This updates rates under the rate-drift models (clock=2 or 3).
3154
3384
double p = data.pfossilerror[0], q = data.pfossilerror[1];
3156
3386
if(finetune<=0) error2("steplength = 0 in UpdatePFossilErrors");
3157
pold = sptree.Pfossilerr;
3158
pnew = pold + finetune*rnd2NormalSym(m2NormalSym);
3159
sptree.Pfossilerr = pnew = reflect(pnew,0,1);
3387
pold = data.Pfossilerr;
3388
pnew = pold + finetune*rndBactrian(mBactrian);
3389
data.Pfossilerr = pnew = reflect(pnew,0,1);
3160
3390
lnacceptance = (p-1)*log(pnew/pold) + (q-1)*log((1-pnew)/(1-pold));
3161
3391
lnpTnew = lnpriorTimes();
3162
lnacceptance += lnpTnew-data.lnpT;
3392
lnacceptance += lnpTnew - data.lnpT;
3164
3394
if(lnacceptance>=0 || rndu()<exp(lnacceptance)) {
3166
3396
data.lnpT = lnpTnew;
3169
sptree.Pfossilerr = pold;
3399
data.Pfossilerr = pold;
3171
3401
return(naccept);
3175
int DescriptiveStatisticsSimpleMCMCTREE (FILE *fout, char infile[], int nbin, int nrho)
3405
int DescriptiveStatisticsSimpleMCMCTREE (FILE *fout, char infile[], int nbin)
3177
3407
FILE *fin=gfopen(infile,"r"), *fFigTree;
3178
3408
char *fmt=" %9.6f", *fmt1=" %9.1f", timestr[32], FigTreef[96]="FigTree.tre";
3179
double *x, *mean, *median, *minx, *maxx, *x005,*x995,*x025,*x975,*x25,*x75;
3409
double *x, *mean, *median, *minx, *maxx, *x005,*x995,*x025,*x975,*xHPD025,*xHPD975;
3181
3411
int n, p, i,j, jj;
3182
3412
int lline=640000, ifields[MAXNFIELDS], Ignore1stColumn=1, ReadHeader=0;
3517
3755
FPN(fout); OutTreeN(fout,1,1); FPN(fout);
3520
DescriptiveStatisticsSimpleMCMCTREE(fout, com.mcmcf, 1, 1);
3758
DescriptiveStatisticsSimpleMCMCTREE(fout, com.mcmcf, 1);
3521
3759
if(data.pfossilerror[0]) {
3522
free(sptree.CcomFossilErr);
3760
free(data.CcomFossilErr);
3523
3761
printf("\nPosterior probabilities that each fossil is in error.\n");
3524
3762
for(i=k=0; i<sptree.nspecies*2-1; i++) {
3525
3763
if( (j=sptree.nodes[i].fossil) )
3526
printf("Node %2d: %s (%9.4f, %9.4f)%9.3f\n", i+1, fossils[j],
3764
printf("Node %2d: %s (%9.4f, %9.4f) %8.4f\n", i+1, fossils[j],
3527
3765
sptree.nodes[i].pfossil[0], sptree.nodes[i].pfossil[1], postEFossil[k++]);
3529
3767
fprintf(fout, "\nPosterior probabilities that each fossil is in error.\n");
3530
3768
for(i=k=0; i<sptree.nspecies*2-1; i++) {
3531
3769
if( (j=sptree.nodes[i].fossil) )
3532
fprintf(fout, "Node %2d: %s (%9.4f, %9.4f)%9.3f\n", i+1, fossils[j],
3770
fprintf(fout, "Node %2d: %s (%9.4f, %9.4f) %8.4f\n", i+1, fossils[j],
3533
3771
sptree.nodes[i].pfossil[0], sptree.nodes[i].pfossil[1], postEFossil[k++]);