1562
2846
for(j = 0; j < rdta->sites; j++)
1564
if(undeterminedList[j + 1] == 0)
1566
switch(tr->dataVector[j + 1])
1569
fprintf(newFile, "%c", inverseMeaningPROT[tipI[j]]);
1572
fprintf(newFile, "%c", inverseMeaningDNA[tipI[j]]);
1580
fprintf(newFile, "\n");
1588
if(count && !countUndeterminedColumns)
1589
printf("An alignment file with sequence duplicates removed has already\n");
1590
if(!count && countUndeterminedColumns)
1591
printf("An alignment file with undetermined columns removed has already\n");
1592
if(count && countUndeterminedColumns)
1593
printf("An alignment file with undetermined columns and sequence duplicates removed has already\n");
1595
printf("been printed to file %s\n", noDupFile);
1597
if(count && !countUndeterminedColumns)
1598
fprintf(f, "An alignment file with sequence duplicates removed has already\n");
1599
if(!count && countUndeterminedColumns)
1600
fprintf(f, "An alignment file with undetermined columns removed has already\n");
1601
if(count && countUndeterminedColumns)
1602
fprintf(f, "An alignment file with undetermined columns and sequence duplicates removed has already\n");
1604
fprintf(f, "been printed to file %s\n", noDupFile);
1610
free(undeterminedList);
1619
static float dist(int i, int j, const int sites, const float nDouble, char **y)
1622
char *tipI = &(y[i + 1][1]);
1623
char *tipJ = &(y[j + 1][1]);
1625
for(k = 0, count = 0; k < sites; k ++)
1626
if(tipI[k] == tipJ[k])
1629
return (((float)count) * nDouble);
1632
static void distArray(int i, const int sites, const float nDouble, int n, float *ref, int *omitted, char **y)
1635
char *tipI = &(y[i + 1][1]);
1637
for(l = 0; l < n; l++)
1639
if((!omitted[l]) && (l != i))
1641
char *tipJ = &(y[l + 1][1]);
1643
for(k = 0, count = 0; k < sites; k ++)
1644
if(tipI[k] == tipJ[k])
1646
ref[l] = (((float)count) * nDouble);
1653
static int qtCompare(const void *p1, const void *p2)
1655
qtData *rc1 = (qtData *)p1;
1656
qtData *rc2 = (qtData *)p2;
1669
static qtList * clusterQT_LARGE(int n, float thres, int *ccc, rawdata *rdta)
1673
int *omitted, *current, *best;
1674
qtList *clusters = (qtList*)NULL;
1675
const float nDouble = 1.0 / (float)(rdta->sites);
1676
double t = gettime();
1677
const int sites = rdta->sites;
1682
candidates = (qtData *)malloc(sizeof(qtData) * n);
1683
clusters = (qtList *)malloc(sizeof(qtList) * n);
1684
omitted = (int*)calloc(n, sizeof(int));
1685
current = (int*)malloc(sizeof(int) * n);
1686
best = (int*)malloc(sizeof(int) * n);
1687
ref = (float*)malloc(sizeof(float) * n);
1691
for(i = 0; i < n; i++)
1696
int countCandidates = 0;
1698
current[entCount++] = i;
1701
distArray(i, sites, nDouble, n, ref, omitted, y);
1703
for(j = 0; j < n; j++)
1705
if(!omitted[j] && i != j)
1709
if((temp = ref[j]) >= thres)
1711
candidates[countCandidates].val = temp;
1712
candidates[countCandidates].number = j;
1718
if(countCandidates > 0)
1720
qsort(candidates, countCandidates, sizeof(qtData), qtCompare);
1722
for(j = 0; j < countCandidates; j++)
1726
for(k = 0; k < entCount; k++)
1727
if(dist(current[k], candidates[j].number, sites, nDouble, y) < thres)
1732
current[entCount++] = candidates[j].number;
1733
omitted[candidates[j].number] = 1;
1738
clusters[clusterCount].entries = (int *)malloc(sizeof(int) * entCount);
1739
memcpy(clusters[clusterCount].entries, current, entCount * sizeof(int));
1740
clusters[clusterCount++].count = entCount;
1744
printf("Time %f\n", gettime() - t);
1745
printf("FOUND %d Clusters\n", clusterCount);
1753
for(i = 0; i < n; i++)
1756
for(i = 0; i < clusterCount; i++)
1760
for(j = 0; j < clusters[i].count; j++)
1761
for(k = 0; k < clusters[i].count; k++)
1762
assert(dist(clusters[i].entries[j], clusters[i].entries[k],sites, nDouble, y) >= thres);
1765
for(j = 0; j < clusters[i].count; j++)
1767
check += clusters[i].entries[j];
1771
assert(ver == check);
1772
printf("Total: %d\n", total);
1777
for(i = 0; i < clusterCount; i++)
1780
int length = clusters[i].count;
1782
int *c = clusters[i].entries;
1787
for(j = 0; j < length; j++)
1792
for(k = 0; k < length; k++)
1795
avg += dist(c[j], c[k], sites, nDouble, y);
1811
for(j = 0; j < length; j++)
1820
*ccc = clusterCount;
1826
static qtList * clusterQT(float **d, int n, float thres, int *ccc)
1830
int *omitted, *current, *best;
1832
qtList *clusters = (qtList*)NULL;
1833
double t = gettime();
1835
clusters = (qtList *)malloc(sizeof(qtList) * n);
1836
omitted = (int*)calloc(n, sizeof(int));
1837
current = (int*)malloc(sizeof(int) * n);
1838
best = (int*)malloc(sizeof(int) * n);
1847
for(i = 0; i < n; i++)
1852
int *inSet = (int *)calloc(n, sizeof(int));
1853
boolean aboveThres = TRUE;
1855
current[entCount++] = i;
1863
for(j = 0; j < n; j++)
1864
if(i != j && (!omitted[j]) && (!inSet[j]) && d[i][j] > dm)
1874
for(j = 0; j < entCount && aboveThres; j++)
1875
if(d[current[j]][dmPos] < thres)
1880
current[entCount++] = dmPos;
1890
memcpy(best, current, entCount * sizeof(int));
1899
clusters[clusterCount].entries = (int *)malloc(sizeof(int) * max);
1900
memcpy(clusters[clusterCount].entries, best, max * sizeof(int));
1902
for(i = 0; i < max; i++)
1903
omitted[best[i]] = 1;
1905
clusters[clusterCount++].count = max;
1908
printf("Time %f\n", gettime() - t);
1909
printf("FOUND %d Clusters\n", clusterCount);
1915
for(i = 0; i < n; i++)
1918
for(i = 0; i < clusterCount; i++)
1920
/*printf("Cluster %d:", i);*/
1924
for(j = 0; j < clusters[i].count; j++)
1925
for(k = 0; k < clusters[i].count; k++)
1926
assert(d[clusters[i].entries[j]][clusters[i].entries[k]] >= thres);
1929
for(j = 0; j < clusters[i].count; j++)
1931
check += clusters[i].entries[j];
1932
/*printf("%d ", clusters[i].entries[j]);*/
1937
assert(ver == check);
1939
/*printf("TOTAL: %d\n", total);*/
1942
for(i = 0; i < clusterCount; i++)
1945
int length = clusters[i].count;
1947
int *c = clusters[i].entries;
1952
for(j = 0; j < length; j++)
1956
for(k = 0; k < length; k++)
1959
avg += d[c[j]][c[k]];
1968
/*printf("Cluster %d length %d avg %f\n", i, length, max);*/
1972
/*printf("Cluster %d siwtching %d <-> %d\n", i, 0, pos);*/
1978
for(j = 0; j < length; j++)
1985
*ccc = clusterCount;
1989
static void reduceBySequenceSimilarity(tree *tr, rawdata *rdta, analdef *adef)
1991
int n = tr->mxtips + 1;
1993
int *omissionList = (int *)malloc(n * sizeof(int));
1994
int *undeterminedList = (int *)malloc((rdta->sites + 1)* sizeof(int));
1995
int *modelList = (int *)malloc((rdta->sites + 1)* sizeof(int));
1996
int countNameDuplicates = 0;
1997
int countUndeterminedColumns = 0;
1998
int countOnlyGaps = 0;
1999
int modelCounter = 1;
2000
char buf[16], outName[1024];
2001
char undetermined_AA = 22;
2002
char undetermined_DNA = 15;
2004
qtList *clusters = (qtList*)NULL;
2006
int numberOfClusters = 0;
2009
strcpy(outName, workdir);
2010
strcat(outName, "RAxML_reducedList.");
2011
strcat(outName, run_id);
2014
f = fopen(infoFileName, "a");
2020
for(i = 1; i < n; i++)
2021
omissionList[i] = 0;
2023
for(i = 0; i < rdta->sites + 1; i++)
2024
undeterminedList[i] = 0;
2026
for(i = 1; i < n; i++)
2028
for(j = i + 1; j < n; j++)
2029
if(strcmp(tr->nameList[i], tr->nameList[j]) == 0)
2031
countNameDuplicates++;
2034
printf("Sequence names of taxon %d and %d are identical, they are both called %s\n", i, j, tr->nameList[i]);
2035
fprintf(f, "Sequence names of taxon %d and %d are identical, they are both called %s\n", i, j, tr->nameList[i]);
2040
if(countNameDuplicates > 0)
2044
printf("ERROR: Found %d taxa that had equal names in the alignment, exiting...\n", countNameDuplicates);
2045
fprintf(f, "ERROR: Found %d taxa that had equal names in the alignment, exiting...\n", countNameDuplicates);
2051
for(i = 1; i < n; i++)
2055
while(j <= rdta->sites)
2057
if(tr->dataVector[j] == DNA_DATA && rdta->y[i][j] != undetermined_DNA)
2059
if(tr->dataVector[j] == AA_DATA && rdta->y[i][j] != undetermined_AA)
2064
if(j == (rdta->sites + 1))
2068
printf("ERROR: Sequence %s consists entirely of undetermined values which will be treated as missing data\n", tr->nameList[i]);
2069
fprintf(f, "ERROR: Sequence %s consists entirely of undetermined values which will be treated as missing data\n", tr->nameList[i]);
2076
if(countOnlyGaps > 0)
2080
printf("ERROR: Found %d sequences that consist entirely of undetermined values, exiting...\n", countOnlyGaps);
2081
fprintf(f, "ERROR: Found %d sequences that consist entirely of undetermined values, exiting...\n", countOnlyGaps);
2087
for(i = 0; i <= rdta->sites; i++)
2090
for(i = 1; i <= rdta->sites; i++)
2096
if(tr->dataVector[i] == DNA_DATA && rdta->y[j][i] != undetermined_DNA)
2098
if(tr->dataVector[i] == AA_DATA && rdta->y[j][i] != undetermined_AA)
2105
undeterminedList[i] = 1;
2108
printf("IMPORTANT WARNING: Alignment column %d contains only undetermined values which will be treated as missing data\n", i);
2109
fprintf(f, "IMPORTANT WARNING: Alignment column %d contains only undetermined values which will be treated as missing data\n", i);
2111
countUndeterminedColumns++;
2115
if(adef->useMultipleModel)
2117
modelList[modelCounter] = tr->model[i];
2123
switch(adef->similarityFilterMode)
2130
double t = gettime();
2131
float nDouble = 1.0 / (float)(rdta->sites);
2132
int sites = rdta->sites;
2136
d = (float **)malloc(sizeof(float *) * n);
2137
for(i = 0; i < n; i++)
2138
d[i] = (float *)malloc(sizeof(float) * n);
2140
for(i = 0; i < n; i++)
2143
tipI = &(rdta->y[i + 1][1]);
2144
for(j = i + 1; j < n; j++)
2148
tipJ = &(rdta->y[j + 1][1]);
2149
for(k = 0; k < sites; k++)
2150
if(tipJ[k] == tipI[k])
2153
d[i][j] = ((float)count * nDouble);
2158
printf("DistMat %f\n", gettime() - t);
2161
clusters = clusterQT(d, n, (float)(adef->sequenceSimilarity), &numberOfClusters);
2162
printf("QT %f %d\n", gettime() - t, numberOfClusters);
2170
clusters = clusterQT_LARGE(tr->mxtips, (float)(adef->sequenceSimilarity), &numberOfClusters, rdta);
2171
printf("QT %f %d\n", gettime() - t, numberOfClusters);
2178
assoc = fopen(outName, "w");
2180
for(i = 0; i < numberOfClusters; i++)
2182
int length = clusters[i].count;
2183
int *c = clusters[i].entries;
2188
fprintf(assoc, "%s:%s", tr->nameList[c[0]], tr->nameList[c[1]]);
2189
for(j = 2; j < length; j++)
2190
fprintf(assoc, ",%s", tr->nameList[c[j]]);
2191
fprintf(assoc, "\n");
2200
if(nonTrivial > 0 || countUndeterminedColumns > 0)
2202
char noDupFile[2048];
2203
char noDupModels[2048];
2211
printf("Found %d non-trival clusters, reduction to %d sequences\n", nonTrivial, numberOfClusters);
2215
fprintf(f, "Found %d non-trival clusters, reduction to %d sequences\n", nonTrivial, numberOfClusters);
2219
if(countUndeterminedColumns > 0)
2225
printf("IMPORTANT WARNING\n");
2227
printf("Found %d %s that %s only undetermined values which will be treated as missing data.\n",
2228
countUndeterminedColumns, (countUndeterminedColumns == 1)?"column":"columns", (countUndeterminedColumns == 1)?"contains":"contain");
2229
printf("Normally these columns should be excluded from the analysis.\n\n");
2233
fprintf(f, "IMPORTANT WARNING\n");
2235
fprintf(f, "Found %d %s that %s only undetermined values which will be treated as missing data.\n",
2236
countUndeterminedColumns, (countUndeterminedColumns == 1)?"column":"columns", (countUndeterminedColumns == 1)?"contains":"contain");
2237
fprintf(f, "Normally these columns should be excluded from the analysis.\n\n");
2241
sprintf(buf, "%f", adef->sequenceSimilarity);
2243
strcpy(noDupFile, seq_file);
2244
strcat(noDupFile, ".reducedBy.");
2245
strcat(noDupFile, buf);
2248
strcpy(noDupModels, modelFileName);
2249
strcat(noDupModels, ".reducedBy.");
2250
strcat(noDupModels, buf);
2255
if(adef->useMultipleModel && !filexists(noDupModels) && countUndeterminedColumns)
2257
FILE *newFile = fopen(noDupModels, "w");
2259
printf("\nJust in case you might need it, a mixed model file with \n");
2260
printf("model assignments for undetermined columns removed is printed to file %s\n",noDupModels);
2262
fprintf(f, "\nJust in case you might need it, a mixed model file with \n");
2263
fprintf(f, "model assignments for undetermined columns removed is printed to file %s\n",noDupModels);
2266
for(i = 0; i < tr->NumberOfModels; i++)
2268
boolean modelStillExists = FALSE;
2270
for(j = 1; (j <= rdta->sites) && (!modelStillExists); j++)
2272
if(modelList[j] == i)
2273
modelStillExists = TRUE;
2276
if(modelStillExists)
2278
char *protModels[10] = {"DAYHOFF", "DCMUT", "JTT", "MTREV", "WAG", "RTREV", "CPREV", "VT", "BLOSUM62", "MTMAM"};
2283
switch(tr->partitionData[i].dataType)
2289
strcpy(AAmodel, protModels[tr->partitionData[i].protModels]);
2290
if(tr->partitionData[i].protFreqs)
2291
strcat(AAmodel, "F");
2293
fprintf(newFile, "%s, ", AAmodel);
2297
fprintf(newFile, "DNA, ");
2303
fprintf(newFile, "%s = ", tr->partitionData[i].partitionName);
2306
while(k <= rdta->sites)
2308
if(modelList[k] == i)
2311
while((modelList[k + 1] == i) && (k <= rdta->sites))
2318
fprintf(newFile, "%d", lower);
2320
fprintf(newFile, ",%d", lower);
2325
fprintf(newFile, "%d-%d", lower, upper);
2327
fprintf(newFile, ",%d-%d", lower, upper);
2333
fprintf(newFile, "\n");
2848
if(undeterminedList[j + 1] == 0)
2849
fprintf(newFile, "%c", getInverseMeaning(tr->dataVector[j + 1], tipI[j]));
2852
fprintf(newFile, "\n");
2336
2856
fclose(newFile);
2340
if(adef->useMultipleModel)
2342
printf("\n A mixed model file with model assignments for undetermined\n");
2343
printf("columns removed has already been printed to file %s\n",noDupModels);
2345
fprintf(f, "\n A mixed model file with model assignments for undetermined\n");
2346
fprintf(f, "columns removed has already been printed to file %s\n",noDupModels);
2351
if(!filexists(noDupFile))
2355
printf("Just in case you might need it, an alignment file with \n");
2356
if(nonTrivial && !countUndeterminedColumns)
2357
printf("similar sequences removed is printed to file %s\n", noDupFile);
2358
if(!nonTrivial && countUndeterminedColumns)
2359
printf("undetermined columns removed is printed to file %s\n", noDupFile);
2360
if(nonTrivial && countUndeterminedColumns)
2361
printf("similar sequences and undetermined columns removed is printed to file %s\n", noDupFile);
2363
fprintf(f, "Just in case you might need it, an alignment file with \n");
2364
if(nonTrivial && !countUndeterminedColumns)
2365
fprintf(f, "similar sequences removed is printed to file %s\n", noDupFile);
2366
if(!nonTrivial && countUndeterminedColumns)
2367
fprintf(f, "undetermined columns removed is printed to file %s\n", noDupFile);
2368
if(nonTrivial && countUndeterminedColumns)
2369
fprintf(f, "similar sequences and undetermined columns removed is printed to file %s\n", noDupFile);
2371
newFile = fopen(noDupFile, "w");
2373
fprintf(newFile, "%d %d\n", numberOfClusters, rdta->sites - countUndeterminedColumns);
2375
for(i = 0; i < numberOfClusters; i++)
2378
fprintf(newFile, "%s ", tr->nameList[clusters[i].entries[0]]);
2379
tipI = &(rdta->y[clusters[i].entries[0]][1]);
2381
for(j = 0; j < rdta->sites; j++)
2383
if(undeterminedList[j + 1] == 0)
2385
switch(tr->dataVector[j + 1])
2388
fprintf(newFile, "%c", inverseMeaningPROT[tipI[j]]);
2391
fprintf(newFile, "%c", inverseMeaningDNA[tipI[j]]);
2399
fprintf(newFile, "\n");
2405
if(nonTrivial && !countUndeterminedColumns)
2406
printf("An alignment file with similar sequences removed has already\n");
2407
if(!nonTrivial && countUndeterminedColumns)
2408
printf("An alignment file with undetermined columns removed has already\n");
2409
if(nonTrivial && countUndeterminedColumns)
2410
printf("An alignment file with undetermined columns and similar sequences removed has already\n");
2412
printf("been printed to file %s\n", noDupFile);
2414
if(nonTrivial && !countUndeterminedColumns)
2415
fprintf(f, "An alignment file with similar sequences removed has already\n");
2416
if(!nonTrivial && countUndeterminedColumns)
2417
fprintf(f, "An alignment file with undetermined columns removed has already\n");
2418
if(nonTrivial && countUndeterminedColumns)
2419
fprintf(f, "An alignment file with undetermined columns and similar sequences removed has already\n");
2421
fprintf(f, "been printed to file %s\n", noDupFile);
2860
if(count && !countUndeterminedColumns)
2861
printBothOpen("An alignment file with sequence duplicates removed has already\n");
2862
if(!count && countUndeterminedColumns)
2863
printBothOpen("An alignment file with undetermined columns removed has already\n");
2864
if(count && countUndeterminedColumns)
2865
printBothOpen("An alignment file with undetermined columns and sequence duplicates removed has already\n");
2867
printBothOpen("been printed to file %s\n", noDupFile);
2427
free(undeterminedList);
2872
rax_free(undeterminedList);
2873
rax_free(omissionList);
2874
rax_free(modelList);
2437
2883
static void generateBS(tree *tr, analdef *adef)
2441
2891
char outName[1024], buf[16];
2444
2894
assert(adef->boot != 0);
2446
2896
for(i = 0; i < adef->multipleRuns; i++)
2901
computeNextReplicate(tr, &adef->boot, (int*)NULL, (int*)NULL, FALSE, FALSE);
2451
for(j = 0; j < tr->cdta->endsite; j++)
2452
count += tr->cdta->aliaswgt[j];
2904
for(j = 0; j < tr->cdta->endsite; j++)
2905
count += tr->cdta->aliaswgt[j];
2454
2907
assert(count == tr->rdta->sites);
2456
2909
strcpy(outName, workdir);
2457
2910
strcat(outName, seq_file);
2458
2911
strcat(outName, ".BS");
3370
4017
printf(" -E specify an exclude file name, that contains a specification of alignment positions you wish to exclude.\n");
3371
4018
printf(" Format is similar to Nexus, the file shall contain entries like \"100-200 300-400\", to exclude a\n");
3372
4019
printf(" single column write, e.g., \"100-100\", if you use a mixed model, an appropriatly adapted model file\n");
3373
printf(" will be written.\n");
4020
printf(" will be written.\n");
3375
4022
printf(" -f select algorithm:\n");
3377
4024
printMinusFUsage();
4027
printf(" -F enable ML tree searches under CAT model for very large trees without switching to \n");
4028
printf(" GAMMA in the end (saves memory).\n");
4029
printf(" This option can also be used with the GAMMA models in order to avoid the thorough optimization \n");
4030
printf(" of the best-scoring ML tree in the end.\n");
4032
printf(" DEFAULT: OFF\n");
3380
4034
printf(" -g specify the file name of a multifurcating constraint tree\n");
3381
4035
printf(" this tree does not need to be comprehensive, i.e. must not contain all taxa\n");
3383
printf(" -h Display this help message.\n");
4037
printf(" -G enable the ML-based evolutionary placement algorithm heuristics\n");
4038
printf(" by specifiyng a threshold value (fraction of insertion branches to be evaluated\n");
4039
printf(" using slow insertions under ML).\n");
4041
printf(" -h Display this help message.\n");
3385
4043
printf(" -i Initial rearrangement setting for the subsequent application of topological \n");
3386
4044
printf(" changes phase\n");
3388
printf(" DEFAULT: determined by program\n");
4046
printf(" -I a posteriori bootstopping analysis. Use:\n");
4047
printf(" \"-I autoFC\" for the frequency-based criterion\n");
4048
printf(" \"-I autoMR\" for the majority-rule consensus tree criterion\n");
4049
printf(" \"-I autoMRE\" for the extended majority-rule consensus tree criterion\n");
4050
printf(" \"-I autoMRE_IGN\" for metrics similar to MRE, but include bipartitions under the threshold whether they are compatible\n");
4051
printf(" or not. This emulates MRE but is faster to compute.\n");
4052
printf(" You also need to pass a tree file containg several bootstrap replicates via \"-z\" \n");
3390
printf(" -j Specifies if checkpoints will be written by the program. If checkpoints \n");
3391
printf(" (intermediate tree topologies) shall be written by the program specify \"-j\"\n");
4054
printf(" -j Specifies that intermediate tree files shall be written to file during the standard ML and BS tree searches.\n");
3393
4056
printf(" DEFAULT: OFF\n");
4058
printf(" -J Compute majority rule consensus tree with \"-J MR\" or extended majority rule consensus tree with \"-J MRE\"\n");
4059
printf(" or strict consensus tree with \"-J STRICT\". For a custom consensus treshold >= 50%%, specify T_<NUM>, where 100 >= NUM >= 50.\n");
4060
printf(" Options \"-J STRICT_DROP\" and \"-J MR_DROP\" will execute an algorithm that identifies dropsets which contain\n");
4061
printf(" rogue taxa as proposed by Pattengale et al. in the paper \"Uncovering hidden phylogenetic consensus\".\n");
4062
printf(" You will also need to provide a tree file containing several UNROOTED trees via \"-z\"\n");
3395
4064
printf(" -k Specifies that bootstrapped trees should be printed with branch lengths.\n");
3396
4065
printf(" The bootstraps will run a bit longer, because model parameters will be optimized\n");
3397
printf(" at the end of each run. Use with CATMIX/PROTMIX or GAMMA/GAMMAI.\n");
3399
printf(" DEFAULT: OFF\n");
3401
printf(" -l Specify a threshold for sequence similarity clustering. RAxML will then print out an alignment\n");
3402
printf(" to a file called sequenceFileName.reducedBy.threshold that only contains sequences <= the\n");
3403
printf(" specified thresold that must be between 0.0 and 1.0. RAxML uses the QT-clustering algorithm \n");
3404
printf(" to perform this task. In addition, a file called RAxML_reducedList.outputFileName will be written\n");
3405
printf(" that contains clustering information.\n");
3407
printf(" DEFAULT: OFF\n");
3409
printf(" -L Same functionality as \"-l\" above, but uses a less exhasutive and thus faster clustering algorithm\n");
3410
printf(" This is intended for very large datasets with more than 20,000-30,000 sequences\n");
3412
printf(" DEFAULT: OFF\n");
3414
printf(" -m Model of Nucleotide or Amino Acid Substitution: \n");
4066
printf(" at the end of each run under GAMMA or GAMMA+P-Invar respectively.\n");
4068
printf(" DEFAULT: OFF\n");
4070
printf(" -K Specify one of the multi-state substitution models (max 32 states) implemented in RAxML.\n");
4071
printf(" Available models are: ORDERED, MK, GTR\n");
4073
printf(" DEFAULT: GTR model \n");
4075
printf(" -L Compute consensus trees labelled by IC supports and the overall TC value as proposed in Salichos and Rokas 2013.\n");
4076
printf(" Compute a majority rule consensus tree with \"-L MR\" or an extended majority rule consensus tree with \"-L MRE\".\n");
4077
printf(" For a custom consensus treshold >= 50%%, specify \"-L T_<NUM>\", where 100 >= NUM >= 50.\n");
4078
printf(" You will of course also need to provide a tree file containing several UNROOTED trees via \"-z\"!\n");
4080
printf(" -m Model of Binary (Morphological), Nucleotide, Multi-State, or Amino Acid Substitution: \n");
4082
printf(" BINARY:\n\n");
4083
printf(" \"-m BINCAT\" : Optimization of site-specific\n");
4084
printf(" evolutionary rates which are categorized into numberOfCategories distinct \n");
4085
printf(" rate categories for greater computational efficiency. Final tree might be evaluated\n");
4086
printf(" automatically under BINGAMMA, depending on the tree search option\n");
4087
printf(" \"-m BINCATI\" : Optimization of site-specific\n");
4088
printf(" evolutionary rates which are categorized into numberOfCategories distinct \n");
4089
printf(" rate categories for greater computational efficiency. Final tree might be evaluated\n");
4090
printf(" automatically under BINGAMMAI, depending on the tree search option \n");
4091
printf(" \"-m BINGAMMA\" : GAMMA model of rate \n");
4092
printf(" heterogeneity (alpha parameter will be estimated)\n");
4093
printf(" \"-m BINGAMMAI\" : Same as BINGAMMA, but with estimate of proportion of invariable sites\n");
3416
4095
printf(" NUCLEOTIDES:\n\n");
3417
printf(" \"-m GTRCAT\" : GTR + Optimization of substitution rates + Optimization of site-specific\n");
3418
printf(" evolutionary rates which are categorized into numberOfCategories distinct \n");
3419
printf(" rate categories for greater computational efficiency\n");
3420
printf(" if you do a multiple analysis with \"-#\" or \"-N\" but without bootstrapping the program\n");
3421
printf(" will use GTRMIX instead\n");
3422
printf(" \"-m GTRGAMMA\" : GTR + Optimization of substitution rates + GAMMA model of rate \n");
3423
printf(" heterogeneity (alpha parameter will be estimated)\n");
3424
printf(" \"-m GTRMIX\" : Inference of the tree under GTRCAT\n");
3425
printf(" and thereafter evaluation of the final tree topology under GTRGAMMA\n");
3426
printf(" \"-m GTRCAT_GAMMA\" : Inference of the tree with site-specific evolutionary rates.\n");
3427
printf(" However, here rates are categorized using the 4 discrete GAMMA rates.\n");
3428
printf(" Evaluation of the final tree topology under GTRGAMMA\n");
3429
printf(" \"-m GTRGAMMAI\" : Same as GTRGAMMA, but with estimate of proportion of invariable sites \n");
3430
printf(" \"-m GTRMIXI\" : Same as GTRMIX, but with estimate of proportion of invariable sites \n");
3431
printf(" \"-m GTRCAT_GAMMAI\" : Same as GTRCAT_GAMMA, but with estimate of proportion of invariable sites \n");
3433
printf(" AMINO ACIDS:\n\n");
3434
printf(" \"-m PROTCATmatrixName[F]\" : specified AA matrix + Optimization of substitution rates + Optimization of site-specific\n");
3435
printf(" evolutionary rates which are categorized into numberOfCategories distinct \n");
3436
printf(" rate categories for greater computational efficiency\n");
3437
printf(" if you do a multiple analysis with \"-#\" or \"-N\" but without bootstrapping the program\n");
3438
printf(" will use PROTMIX... instead\n");
3439
printf(" \"-m PROTGAMMAmatrixName[F]\" : specified AA matrix + Optimization of substitution rates + GAMMA model of rate \n");
3440
printf(" heterogeneity (alpha parameter will be estimated)\n");
3441
printf(" \"-m PROTMIXmatrixName[F]\" : Inference of the tree under specified AA matrix + CAT\n");
3442
printf(" and thereafter evaluation of the final tree topology under specified AA matrix + GAMMA\n");
3443
printf(" \"-m PROTCAT_GAMMAmatrixName[F]\" : Inference of the tree under specified AA matrix and site-specific evolutionary rates.\n");
3444
printf(" However, here rates are categorized using the 4 discrete GAMMA rates.\n");
3445
printf(" Evaluation of the final tree topology under specified AA matrix + GAMMA\n");
3446
printf(" \"-m PROTGAMMAImatrixName[F]\" : Same as PROTGAMMAmatrixName[F], but with estimate of proportion of invariable sites \n");
3447
printf(" \"-m PROTMIXImatrixName[F]\" : Same as PROTMIXmatrixName[F], but with estimate of proportion of invariable sites \n");
3448
printf(" \"-m PROTCAT_GAMMAImatrixName[F]\" : Same as PROTCAT_GAMMAmatrixName[F], but with estimate of proportion of invariable sites \n");
3450
printf(" Available AA substitution models: DAYHOFF, DCMUT, JTT, MTREV, WAG, RTREV, CPREV, VT, BLOSUM62, MTMAM, GTR\n");
4096
printf(" \"-m GTRCAT\" : GTR + Optimization of substitution rates + Optimization of site-specific\n");
4097
printf(" evolutionary rates which are categorized into numberOfCategories distinct \n");
4098
printf(" rate categories for greater computational efficiency. Final tree might be evaluated\n");
4099
printf(" under GTRGAMMA, depending on the tree search option\n");
4100
printf(" \"-m GTRCATI\" : GTR + Optimization of substitution rates + Optimization of site-specific\n");
4101
printf(" evolutionary rates which are categorized into numberOfCategories distinct \n");
4102
printf(" rate categories for greater computational efficiency. Final tree might be evaluated\n");
4103
printf(" under GTRGAMMAI, depending on the tree search option\n");
4104
printf(" \"-m GTRGAMMA\" : GTR + Optimization of substitution rates + GAMMA model of rate \n");
4105
printf(" heterogeneity (alpha parameter will be estimated)\n");
4106
printf(" \"-m GTRGAMMAI\" : Same as GTRGAMMA, but with estimate of proportion of invariable sites \n");
4108
printf(" MULTI-STATE:\n\n");
4109
printf(" \"-m MULTICAT\" : Optimization of site-specific\n");
4110
printf(" evolutionary rates which are categorized into numberOfCategories distinct \n");
4111
printf(" rate categories for greater computational efficiency. Final tree might be evaluated\n");
4112
printf(" automatically under MULTIGAMMA, depending on the tree search option\n");
4113
printf(" \"-m MULTICATI\" : Optimization of site-specific\n");
4114
printf(" evolutionary rates which are categorized into numberOfCategories distinct \n");
4115
printf(" rate categories for greater computational efficiency. Final tree might be evaluated\n");
4116
printf(" automatically under MULTIGAMMAI, depending on the tree search option \n");
4117
printf(" \"-m MULTIGAMMA\" : GAMMA model of rate \n");
4118
printf(" heterogeneity (alpha parameter will be estimated)\n");
4119
printf(" \"-m MULTIGAMMAI\" : Same as MULTIGAMMA, but with estimate of proportion of invariable sites\n");
4121
printf(" You can use up to 32 distinct character states to encode multi-state regions, they must be used in the following order:\n");
4122
printf(" 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V\n");
4123
printf(" i.e., if you have 6 distinct character states you would use 0, 1, 2, 3, 4, 5 to encode these.\n");
4124
printf(" The substitution model for the multi-state regions can be selected via the \"-K\" option\n");
4126
printf(" AMINO ACIDS:\n\n");
4127
printf(" \"-m PROTCATmatrixName[F]\" : specified AA matrix + Optimization of substitution rates + Optimization of site-specific\n");
4128
printf(" evolutionary rates which are categorized into numberOfCategories distinct \n");
4129
printf(" rate categories for greater computational efficiency. Final tree might be evaluated\n");
4130
printf(" automatically under PROTGAMMAmatrixName[f], depending on the tree search option\n");
4131
printf(" \"-m PROTCATImatrixName[F]\" : specified AA matrix + Optimization of substitution rates + Optimization of site-specific\n");
4132
printf(" evolutionary rates which are categorized into numberOfCategories distinct \n");
4133
printf(" rate categories for greater computational efficiency. Final tree might be evaluated\n");
4134
printf(" automatically under PROTGAMMAImatrixName[f], depending on the tree search option\n");
4135
printf(" \"-m PROTGAMMAmatrixName[F]\" : specified AA matrix + Optimization of substitution rates + GAMMA model of rate \n");
4136
printf(" heterogeneity (alpha parameter will be estimated)\n");
4137
printf(" \"-m PROTGAMMAImatrixName[F]\" : Same as PROTGAMMAmatrixName[F], but with estimate of proportion of invariable sites \n");
4139
printf(" Available AA substitution models:\n");
4146
for(i = 0; i < NUM_PROT_MODELS - 1; i++)
4148
if(i > 0 && (i % 8 == 0))
4153
printf("%s, ", protModels[i]);
4156
printf("%s\n", protModels[i]);
3451
4159
printf(" With the optional \"F\" appendix you can specify if you want to use empirical base frequencies\n");
3452
printf(" Please not that for mixed models you can in addition specify the per-gene AA model in\n");
3453
printf(" the mixed model file (see manual for details)\n");
4160
printf(" Please note that for mixed models you can in addition specify the per-gene AA model in\n");
4161
printf(" the mixed model file (see manual for details). Also note that if you estimate AA GTR parameters on a partitioned\n");
4162
printf(" dataset, they will be linked (estimated jointly) across all partitions to avoid over-parametrization\n");
3455
printf(" -M Switch on estimation of individual per-partition branch lengths. Only has effect when used in combination with \"-q\"\n");
4164
printf(" -M Switch on estimation of individual per-partition branch lengths. Only has effect when used in combination with \"-q\"\n");
3456
4165
printf(" Branch lengths for individual partitions will be printed to separate files\n");
3457
4166
printf(" A weighted average of the branch lengths is computed by using the respective partition lengths\n");
3459
printf(" DEFAULT: OFF\n");
3461
printf(" -n Specifies the name of the output file.\n");
4168
printf(" DEFAULT: OFF\n");
4170
printf(" -n Specifies the name of the output file.\n");
3463
4172
printf(" -o Specify the name of a single outgrpoup or a comma-separated list of outgroups, eg \"-o Rat\" \n");
3464
4173
printf(" or \"-o Rat,Mouse\", in case that multiple outgroups are not monophyletic the first name \n");
3465
printf(" in the list will be selected as outgroup, don't leave spaces between taxon names!\n");
3467
printf(" -q Specify the file name which contains the assignment of models to alignment\n");
3468
printf(" partitions for multiple models of substitution. For the syntax of this file\n");
3469
printf(" please consult the manual.\n");
4174
printf(" in the list will be selected as outgroup, don't leave spaces between taxon names!\n");
4176
printf(" -O Disable check for completely undetermined sequence in alignment.\n");
4177
printf(" The program will not exit with an error message when \"-O\" is specified.\n");
4179
printf(" DEFAULT: check enabled\n");
3471
4181
printf(" -p Specify a random number seed for the parsimony inferences. This allows you to reproduce your results\n");
3472
printf(" and will help me debug the program. This option HAS NO EFFECT in the parallel MPI version\n");
4182
printf(" and will help me debug the program.\n");
3474
4184
printf(" -P Specify the file name of a user-defined AA (Protein) substitution model. This file must contain\n");
3475
4185
printf(" 420 entries, the first 400 being the AA substitution rates (this must be a symmetric matrix) and the\n");
3476
4186
printf(" last 20 are the empirical base frequencies\n");
4188
printf(" -q Specify the file name which contains the assignment of models to alignment\n");
4189
printf(" partitions for multiple models of substitution. For the syntax of this file\n");
4190
printf(" please consult the manual.\n");
3478
4192
printf(" -r Specify the file name of a binary constraint tree.\n");
3479
4193
printf(" this tree does not need to be comprehensive, i.e. must not contain all taxa\n");
4195
printf(" -R Specify the file name of a binary model parameter file that has previously been generated\n");
4196
printf(" with RAxML using the -f e tree evaluation option. The file name should be: \n");
4197
printf(" RAxML_binaryModelParameters.runID\n");
3481
4199
printf(" -s Specify the name of the alignment data file in PHYLIP format\n");
4201
printf(" -S Specify the name of a secondary structure file. The file can contain \".\" for \n");
4202
printf(" alignment columns that do not form part of a stem and characters \"()<>[]{}\" to define \n");
4203
printf(" stem regions and pseudoknots\n");
3483
4205
printf(" -t Specify a user starting tree file name in Newick format\n");
3485
4207
printf(" -T PTHREADS VERSION ONLY! Specify the number of threads you want to run.\n");
3486
4208
printf(" Make sure to set \"-T\" to at most the number of CPUs you have on your machine,\n");
3487
4209
printf(" otherwise, there will be a huge performance decrease!\n");
3489
printf(" -u Specify the number of multiple BS searches per replicate\n");
3490
printf(" to obtain better ML trees for each replicate\n");
3492
printf(" DEFAULT: One ML search per BS replicate\n");
3494
printf(" -v Display version information\n");
3496
printf(" -w Name of the working directory where RAxML will write its output files\n");
3498
printf(" DEFAULT: current directory\n");
3500
printf(" -x Specify an integer number (random seed) and turn on rapid bootstrapping\n");
4211
printf(" -u use the median for the discrete approximation of the GAMMA model of rate heterogeneity\n");
4213
printf(" DEFAULT: OFF\n");
4215
printf(" -U Try to save memory by using SEV-based implementation for gap columns on large gappy alignments\n");
4216
printf(" The technique is described here: http://www.biomedcentral.com/1471-2105/12/470\n");
4217
printf(" This will only work for DNA and/or PROTEIN data and only with the SSE3 or AVX-vextorized version of the code.\n");
4219
printf(" -v Display version information\n");
4221
printf(" -V Disable rate heterogeneity among sites model and use one without rate heterogeneity instead.\n");
4222
printf(" Only works if you specify the CAT model of rate heterogeneity.\n");
4224
printf(" DEFAULT: use rate heterogeneity\n");
4226
printf(" -w FULL (!) path to the directory into which RAxML shall write its output files\n");
4228
printf(" DEFAULT: current directory\n");
4230
printf(" -W Sliding window size for leave-one-out site-specific placement bias algorithm\n");
4231
printf(" only effective when used in combination with \"-f S\" \n");
4233
printf(" DEFAULT: 100 sites\n");
4235
printf(" -x Specify an integer number (random seed) and turn on rapid bootstrapping\n");
4236
printf(" CAUTION: unlike in version 7.0.4 RAxML will conduct rapid BS replicates under \n");
4237
printf(" the model of rate heterogeneity you specified via \"-m\" and not by default under CAT\n");
4239
printf(" -X Same as the \"-y\" option below, however the parsimony search is more superficial.\n");
4240
printf(" RAxML will only do a randomized stepwise addition order parsimony tree reconstruction\n");
4241
printf(" without performing any additional SPRs.\n");
4242
printf(" This may be helpful for very broad whole-genome datasets, since this can generate topologically\n");
4243
printf(" more different starting trees.\n");
4245
printf(" DEFAULT: OFF\n");
3502
4247
printf(" -y If you want to only compute a parsimony starting tree with RAxML specify \"-y\",\n");
3503
4248
printf(" the program will exit after computation of the starting tree\n");
3505
4250
printf(" DEFAULT: OFF\n");
4252
printf(" -Y Pass a quartet grouping file name defining four groups from which to draw quartets\n");
4253
printf(" The file input format must contain 4 groups in the following form:\n");
4254
printf(" (Chicken, Human, Loach), (Cow, Carp), (Mouse, Rat, Seal), (Whale, Frog);\n");
4255
printf(" Only works in combination with -f q !\n");
3507
4257
printf(" -z Specify the file name of a file containing multiple trees e.g. from a bootstrap\n");
3508
4258
printf(" that shall be used to draw bipartition values onto a tree provided with \"-t\",\n");
3509
4259
printf(" It can also be used to compute per site log likelihoods in combination with \"-f g\"\n");
3510
4260
printf(" and to read a bunch of trees for a couple of other options (\"-f h\", \"-f m\", \"-f n\").\n");
3512
4262
printf(" -#|-N Specify the number of alternative runs on distinct starting trees\n");
3513
4263
printf(" In combination with the \"-b\" option, this will invoke a multiple boostrap analysis\n");
3514
4264
printf(" Note that \"-N\" has been added as an alternative since \"-#\" sometimes caused problems\n");
3515
printf(" with certain MPI job submission systems, since \"-#\" is often used to start comments\n");
4265
printf(" with certain MPI job submission systems, since \"-#\" is often used to start comments.\n");
4266
printf(" If you want to use the bootstopping criteria specify \"-# autoMR\" or \"-# autoMRE\" or \"-# autoMRE_IGN\"\n");
4267
printf(" for the majority-rule tree based criteria (see -I option) or \"-# autoFC\" for the frequency-based criterion.\n");
4268
printf(" Bootstopping will only work in combination with \"-x\" or \"-b\"\n");
3517
4270
printf(" DEFAULT: 1 single analysis\n");
3518
4271
printf("\n\n\n\n");
3556
4357
NumberOfThreads = 0;
4361
tr->useFastScaling = TRUE;
4362
tr->bootStopCriterion = -1;
4363
tr->wcThreshold = 0.03;
3559
4364
tr->doCutoff = TRUE;
4365
tr->secondaryStructureModel = SEC_16; /* default setting */
4366
tr->searchConvergenceCriterion = FALSE;
4367
tr->catOnly = FALSE;
4368
tr->useEpaHeuristics = FALSE;
4369
tr->fastEPAthreshold = -1.0;
4370
tr->multiStateModel = GTR_MULTI_STATE;
4371
tr->saveMemory = FALSE;
4372
tr->useGammaMedian = FALSE;
4373
tr->noRateHet = FALSE;
4374
tr->perPartitionEPA = FALSE;
4375
tr->useBrLenScaler = FALSE;
3561
4376
/********* tr inits end*************/
3565
((c = mygetopt(argc,argv,"T:E:N:u:l:x:X:z:g:r:e:a:b:c:f:i:m:t:w:s:n:o:L:B:q:#:p:vdyjhkM", &optind, &optarg))!=-1))
3568
((c = mygetopt(argc,argv,"T:E:N:u:l:x:z:g:r:e:a:b:c:f:i:m:t:w:s:n:o:L:B:P:q:#:p:vdyjhkM", &optind, &optarg))!=-1))
4380
((c = mygetopt(argc,argv,"R:T:E:N:B:L:P:S:Y:A:G:H:I:J:K:W:l:x:z:g:r:e:a:b:c:f:i:m:t:w:s:n:o:q:#:p:vudyjhkMDFQUOVCX", &optind, &optarg))!=-1))
4385
adef->useQuartetGrouping = TRUE;
4387
strcpy(quartetGroupingFileName, optarg);
4390
tr->noRateHet = TRUE;
4393
tr->useGammaMedian = TRUE;
4396
adef->checkForUndeterminedSequences = FALSE;
4399
sscanf(optarg,"%d", &(adef->slidingWindowSize));
4400
if(adef->slidingWindowSize <= 0)
4402
printf("You can't use a sliding window size smaller than 1, you specified %d\n", adef->slidingWindowSize);
4405
if(adef->slidingWindowSize <= 10)
4407
printf("You specified a small sliding window size of %d sites\n", adef->slidingWindowSize);
4408
printf("Are you sure you want to do this?\n");
4410
if(adef->slidingWindowSize >= 500)
4412
printf("You specified a large sliding window size of %d sites\n", adef->slidingWindowSize);
4413
printf("Are you sure you want to do this?\n");
4417
tr->saveMemory = TRUE;
4418
#if (!defined(__SIM_SSE3) && !defined(__AVX))
4419
printf("\nmemory saving option -U does only work with the AVX and SSE3 vectorized versions of the code\n");
4420
printf("please remove this option and execute the program again\n");
4421
printf("exiting ....\n\n");
3574
#ifdef _USE_PTHREADS
3575
sscanf(optarg,"%d", &NumberOfThreads);
3579
printf("Option -T does not have any effect with the sequential or parallel MPI version.\n");
3580
printf("It is used to specify the number of threads for the Pthreads-based parallelization\n");
3585
strcpy(proteinModelFileName, optarg);
3586
adef->userProteinModel = TRUE;
3587
parseProteinModel(adef);
4426
adef->useBinaryModelFile = TRUE;
4427
strcpy(binaryModelParamsInputFileName, optarg);
4431
const char *modelList[3] = { "ORDERED", "MK", "GTR"};
4432
const int states[3] = {ORDERED_MULTI_STATE, MK_MULTI_STATE, GTR_MULTI_STATE};
4435
sscanf(optarg, "%s", multiStateModel);
4437
for(i = 0; i < 3; i++)
4438
if(strcmp(multiStateModel, modelList[i]) == 0)
4442
tr->multiStateModel = states[i];
4445
printf("The multi-state model %s you want to use does not exist, exiting .... \n", multiStateModel);
4454
const char *modelList[21] = { "S6A", "S6B", "S6C", "S6D", "S6E", "S7A", "S7B", "S7C", "S7D", "S7E", "S7F", "S16", "S16A", "S16B", "S16C",
4455
"S16D", "S16E", "S16F", "S16I", "S16J", "S16K"};
4458
sscanf(optarg, "%s", secondaryModel);
4460
for(i = 0; i < 21; i++)
4461
if(strcmp(secondaryModel, modelList[i]) == 0)
4465
tr->secondaryStructureModel = i;
4468
printf("The secondary structure model %s you want to use does not exist, exiting .... \n", secondaryModel);
4474
sscanf(optarg,"%lf", &wcThreshold);
4475
tr->wcThreshold = wcThreshold;
4476
if(wcThreshold <= 0.0 || wcThreshold >= 1.0)
4478
printf("\nBootstrap threshold must be set to values between 0.0 and 1.0, you just set it to %f\n", wcThreshold);
4481
if(wcThreshold < 0.01 || wcThreshold > 0.05)
4483
printf("\n\nWARNING, reasonable settings for Bootstopping threshold with MR-based criteria range between 0.01 and 0.05.\n");
4484
printf("You are just setting it to %f, the most reasonable empirically determined setting is 0.03 \n\n", wcThreshold);
4488
tr->searchConvergenceCriterion = TRUE;
3590
4491
strcpy(excludeFileName, optarg);
3591
4492
adef->useExcludeFile = TRUE;
4498
tr->useEpaHeuristics = TRUE;
4500
sscanf(optarg,"%lf", &fastEPAthreshold);
4501
tr->fastEPAthreshold = fastEPAthreshold;
4503
if(fastEPAthreshold <= 0.0 || fastEPAthreshold >= 1.0)
4505
printf("\nHeuristic EPA threshold must be set to values between 0.0 and 1.0, you just set it to %f\n", fastEPAthreshold);
4508
if(fastEPAthreshold < 0.015625 || fastEPAthreshold > 0.5)
4510
printf("\n\nWARNING, reasonable settings for heuristic EPA threshold range between 0.015625 (1/64) and 0.5 (1/2).\n");
4511
printf("You are just setting it to %f\n\n", fastEPAthreshold);
4513
#ifdef _USE_PTHREADS
4514
tr->useFastScaling = FALSE;
4519
adef->readTaxaOnly = TRUE;
4520
adef->mode = BOOTSTOP_ONLY;
4521
if((sscanf(optarg,"%s", aut) > 0) && ((strcmp(aut, "autoFC") == 0) || (strcmp(aut, "autoMR") == 0) ||
4522
(strcmp(aut, "autoMRE") == 0) || (strcmp(aut, "autoMRE_IGN") == 0)))
4524
if((strcmp(aut, "autoFC") == 0))
4525
tr->bootStopCriterion = FREQUENCY_STOP;
4526
if((strcmp(aut, "autoMR") == 0))
4527
tr->bootStopCriterion = MR_STOP;
4528
if((strcmp(aut, "autoMRE") == 0))
4529
tr->bootStopCriterion = MRE_STOP;
4530
if((strcmp(aut, "autoMRE_IGN") == 0))
4531
tr->bootStopCriterion = MRE_IGN_STOP;
4536
printf("Use -I a posteriori bootstop option either as \"-I autoFC\" or \"-I autoMR\" or \"-I autoMRE\" or \"-I autoMRE_IGN\"\n");
4541
adef->readTaxaOnly = TRUE;
4542
adef->mode = CONSENSUS_ONLY;
4543
adef->calculateIC = FALSE;
4545
if((sscanf(optarg,"%s", aut) > 0) && ((strcmp(aut, "MR") == 0) || (strcmp(aut, "MRE") == 0) || (strcmp(aut, "STRICT") == 0) ||
4546
(strcmp(aut, "STRICT_DROP") == 0) || (strcmp(aut, "MR_DROP") == 0)))
4548
if((strcmp(aut, "MR") == 0))
4549
tr->consensusType = MR_CONSENSUS;
4550
if((strcmp(aut, "MR_DROP") == 0))
4552
tr->consensusType = MR_CONSENSUS;
4553
adef->leaveDropMode = TRUE;
4556
if((strcmp(aut, "MRE") == 0))
4557
tr->consensusType = MRE_CONSENSUS;
4560
if((strcmp(aut, "STRICT") == 0))
4561
tr->consensusType = STRICT_CONSENSUS;
4562
if((strcmp(aut, "STRICT_DROP") == 0))
4564
tr->consensusType = STRICT_CONSENSUS;
4565
adef->leaveDropMode = TRUE;
4570
if( (sscanf( optarg, "%s", aut) > 0) && optarg[0] == 'T' && optarg[1] == '_')
4572
tr->consensusType = USER_DEFINED;
4573
sscanf(optarg + 2,"%d", &tr->consensusUserThreshold);
4575
if(tr->consensusUserThreshold < 50 || tr->consensusUserThreshold > 100)
4577
printf("Please specify a custom threshold c, with 50 <= c <= 100\n" );
4584
printf("Use -J consensus tree option either as \"-J MR\" or \"-J MRE\" or \"-J STRICT\" or \"-J MR_DROP\" or \"-J STRICT_DROP\" or T_<NUM>, where NUM >= 50\n");
4590
adef->verboseIC = TRUE;
4593
adef->readTaxaOnly = TRUE;
4594
adef->mode = CONSENSUS_ONLY;
4595
adef->leaveDropMode = FALSE;
4596
adef->calculateIC = TRUE;
4598
if((sscanf(optarg,"%s", aut) > 0) && ((strcmp(aut, "MR") == 0) || (strcmp(aut, "MRE") == 0)))
4600
if((strcmp(aut, "MR") == 0))
4601
tr->consensusType = MR_CONSENSUS;
4603
if((strcmp(aut, "MRE") == 0))
4604
tr->consensusType = MRE_CONSENSUS;
4608
if((sscanf( optarg, "%s", aut) > 0) && optarg[0] == 'T' && optarg[1] == '_')
4610
tr->consensusType = USER_DEFINED;
4611
sscanf(optarg + 2,"%d", &tr->consensusUserThreshold);
4613
if(tr->consensusUserThreshold < 50 || tr->consensusUserThreshold > 100)
4615
printf("Please specify a custom threshold c, with 50 <= c <= 100\n" );
4622
printf("Use -L consensus tree option including IC/TC score computation either as \"-L MR\" or \"-L MRE\" or \"-L T_<NUM>\", where NUM >= 50\n");
3594
4628
adef->perGeneBranchLengths = TRUE;
3597
sscanf(optarg,"%d", &multipleBoots);
3598
adef->multiBoot = multipleBoots;
4631
strcpy(proteinModelFileName, optarg);
4632
adef->userProteinModel = TRUE;
4633
/*parseProteinModel(adef->externalAAMatrix, proteinModelFileName);*/
4636
adef->useSecondaryStructure = TRUE;
4637
strcpy(secondaryStructureFileName, optarg);
4640
#ifdef _USE_PTHREADS
4641
sscanf(optarg,"%d", &NumberOfThreads);
4645
printf("Option -T does not have any effect with the sequential or parallel MPI version.\n");
4646
printf("It is used to specify the number of threads for the Pthreads-based parallelization\n");
3601
strcpy(outgroups, optarg);
3602
parseOutgroups(outgroups, tr);
3603
adef->outgroup = TRUE;
4653
outgroups = (char*)rax_malloc(sizeof(char) * (strlen(optarg) + 1));
4654
strcpy(outgroups, optarg);
4655
parseOutgroups(outgroups, tr);
4656
rax_free(outgroups);
4657
adef->outgroup = TRUE;
3606
4661
adef->bootstrapBranchLengths = TRUE;
3609
4664
strcpy(bootStrapFile, optarg);
3613
4668
adef->randomStartingTree = TRUE;
3616
4671
strcpy(tree_file, optarg);
3617
4672
adef->grouping = TRUE;
3618
adef->restart = TRUE;
4673
adef->restart = TRUE;
3622
4677
strcpy(tree_file, optarg);
3623
4678
adef->restart = TRUE;
3624
4679
adef->constraint = TRUE;
3625
4680
constraintSet = 1;
3628
4683
sscanf(optarg,"%lf", &likelihoodEpsilon);
3629
adef->likelihoodEpsilon = likelihoodEpsilon;
4684
adef->likelihoodEpsilon = likelihoodEpsilon;
3632
4687
strcpy(modelFileName,optarg);
3633
adef->useMultipleModel = TRUE;
4688
adef->useMultipleModel = TRUE;
3636
sscanf(optarg,"%ld", &parsimonySeed);
3637
adef->parsimonySeed = parsimonySeed;
4691
sscanf(optarg,"%ld", &(adef->parsimonySeed));
4692
if(adef->parsimonySeed <= 0)
4694
printf("Parsimony seed specified via -p must be greater than zero\n");
3641
/* TODO include auto in readme */
3642
4700
if(sscanf(optarg,"%d", &multipleRuns) > 0)
3644
4702
adef->multipleRuns = multipleRuns;
3648
if((sscanf(optarg,"%s", aut) > 0) &&
3650
(strcmp(aut, "auto") == 0) ||
3651
(strcmp(aut, "Auto") == 0) ||
3652
(strcmp(aut, "AUTO") == 0) ||
3653
(strcmp(aut, "automatic") == 0) ||
3654
(strcmp(aut, "Automatic") == 0) ||
3655
(strcmp(aut, "AUTOMATIC") == 0)
4706
if((sscanf(optarg,"%s", aut) > 0) && ((strcmp(aut, "autoFC") == 0) || (strcmp(aut, "autoMR") == 0) ||
4707
(strcmp(aut, "autoMRE") == 0) || (strcmp(aut, "autoMRE_IGN") == 0)))
3659
4710
adef->bootStopping = TRUE;
3660
adef->multipleRuns = 1000;
4711
adef->multipleRuns = 1000;
4713
if((strcmp(aut, "autoFC") == 0))
4714
tr->bootStopCriterion = FREQUENCY_STOP;
4715
if((strcmp(aut, "autoMR") == 0))
4716
tr->bootStopCriterion = MR_STOP;
4717
if((strcmp(aut, "autoMRE") == 0))
4718
tr->bootStopCriterion = MRE_STOP;
4719
if((strcmp(aut, "autoMRE_IGN") == 0))
4720
tr->bootStopCriterion = MRE_IGN_STOP;
3664
4724
if(processID == 0)
3666
printf("Use -# or -N option either with an integer, e.g., -# 100 or with -# auto\n");
3667
printf("or -N 100 or -N auto respectively, note that auto will not work for the\n");
4726
printf("Use -# or -N option either with an integer, e.g., -# 100 or with -# autoFC or -# autoMR or -# autoMRE or -# autoMRE_IGN\n");
4727
printf("or -N 100 or -N autoFC or -N autoMR or -N autoMRE or -N autoMRE_IGN respectively, note that auto will not work for the\n");
3668
4728
printf("MPI-based parallel version\n");
4733
multipleRunsSet = TRUE;
4736
printVersionInfo(TRUE, (FILE*)NULL);
4739
adef->stepwiseAdditionOnly = FALSE;
3678
4740
adef->startingTreeOnly = 1;
4743
adef->stepwiseAdditionOnly = TRUE;
4744
adef->startingTreeOnly = 1;
3684
4750
adef->checkpoints = 1;
3687
4753
strcpy(weightFileName,optarg);
3688
4754
adef->useWeightFile = TRUE;
3691
sscanf(optarg,"%ld", &adef->boot);
3694
sscanf(optarg,"%ld", &adef->rapidBoot);
3696
adef->optimizeBSmodel = FALSE;
3701
sscanf(optarg,"%ld", &adef->rapidBoot);
3702
adef->optimizeBSmodel = TRUE;
4757
sscanf(optarg,"%ld", &adef->boot);
4760
printf("Bootstrap seed specified via -b must be greater than zero\n");
4766
sscanf(optarg,"%ld", &adef->rapidBoot);
4767
if(adef->rapidBoot <= 0)
4769
printf("Bootstrap seed specified via -x must be greater than zero\n");
3706
sscanf(optarg, "%d", &adef->categories);
3709
sscanf(optarg,"%lf", &sequenceSimilarity);
3710
adef->sequenceSimilarity = sequenceSimilarity;
3711
adef->mode = SEQUENCE_SIMILARITY_FILTER;
3712
adef->similarityFilterMode = SMALL_DATA;
3715
sscanf(optarg,"%lf", &sequenceSimilarity);
3716
adef->sequenceSimilarity = sequenceSimilarity;
3717
adef->mode = SEQUENCE_SIMILARITY_FILTER;
3718
adef->similarityFilterMode = LARGE_DATA;
3721
/* TODO include in readme */
3722
sscanf(optarg,"%lf", &(adef->bootstopCutoff));
3723
if(adef->bootstopCutoff <= 0.0)
3725
printf("ERROR BootstopCutoff was set to %f, but must be greater than 0.0\n",
3726
adef->bootstopCutoff);
3729
if(adef->bootstopCutoff == 0.5)
3731
printf("\n\nWARNING: BootstopCutoff was set to %f, this is equivalent to default\n", adef->bootstopCutoff);
3732
printf("Bootstopping without the \"-B\" option. Are you sure that this is \n");
3733
printf("what you want to do?\n\n");
3735
if(adef->bootstopCutoff > 0.5)
3737
printf("ERROR BootstopCutoff was set to %f, but must be smaller or equal to 0.5\n",
3738
adef->bootstopCutoff);
4775
sscanf(optarg, "%d", &adef->categories);
3743
4778
sscanf(optarg, "%c", &modelChar);
3744
4779
switch(modelChar)
4782
adef->mode = ANCESTRAL_STATES;
4783
/*adef->compressPatterns = FALSE;*/
3747
4786
adef->allInOne = TRUE;
3748
4787
adef->mode = BIG_RAPID_MODE;
3749
4788
tr->doCutoff = TRUE;
3752
adef->mode = CALC_BIPARTITIONS;
4791
adef->readTaxaOnly = TRUE;
4792
adef->mode = CALC_BIPARTITIONS;
4795
adef->mode = OPTIMIZE_BR_LEN_SCALER;
4796
adef->perGeneBranchLengths = TRUE;
4797
tr->useBrLenScaler = TRUE;
3755
4800
adef->mode = CHECK_ALIGNMENT;
4803
adef->mode = ANCESTRAL_SEQUENCE_TEST;
4804
tr->useFastScaling = FALSE;
3758
4807
adef->mode = BIG_RAPID_MODE;
3759
4808
tr->doCutoff = TRUE;
3762
adef->mode = TREE_EVALUATION;
3765
/* TODO include in readme */
3766
adef->mode = OPTIMIZE_RATES;
3767
adef->treeLength = TRUE;
4811
adef->mode = TREE_EVALUATION;
4814
adef->mode = FAST_SEARCH;
4815
adef->veryFast = TRUE;
4818
adef->mode = FAST_SEARCH;
4819
adef->veryFast = FALSE;
3770
adef->mode = OPTIMIZE_RATES;
3771
adef->computePerSiteLLs = TRUE;
4822
tr->useFastScaling = FALSE;
4823
tr->optimizeAllTrees = FALSE;
4824
adef->mode = PER_SITE_LL;
4827
tr->useFastScaling = FALSE;
4828
tr->optimizeAllTrees = TRUE;
4829
adef->mode = PER_SITE_LL;
3774
adef->mode = TREE_EVALUATION;
3775
adef->likelihoodTest = TRUE;
3778
adef->reallyThoroughBoot = TRUE;
4832
tr->optimizeAllTrees = FALSE;
4833
adef->mode = TREE_EVALUATION;
4834
adef->likelihoodTest = TRUE;
4835
tr->useFastScaling = FALSE;
4838
tr->optimizeAllTrees = TRUE;
4839
adef->mode = TREE_EVALUATION;
4840
adef->likelihoodTest = TRUE;
4841
tr->useFastScaling = FALSE;
4844
adef->readTaxaOnly = TRUE;
4845
adef->mode = CALC_BIPARTITIONS_IC;
4848
adef->mode = ROOT_TREE;
4849
adef->readTaxaOnly = TRUE;
4852
adef->mode = GENERATE_BS;
3781
4853
adef->generateBS = TRUE;
3784
/* TODO include in readme */
3785
adef->bootStopOnly = 1;
3788
/* TODO include in readme */
3789
adef->bootStopOnly = 2;
3792
adef->bootStopOnly = 3;
3795
adef->bootStopOnly = 4;
4856
adef->mode = SH_LIKE_SUPPORTS;
4857
tr->useFastScaling = FALSE;
4860
adef->readTaxaOnly = TRUE;
4861
adef->mode = COMPUTE_BIPARTITION_CORRELATION;
4864
tr->optimizeAllTrees = FALSE;
4865
adef->mode = COMPUTE_LHS;
4868
tr->optimizeAllTrees = TRUE;
4869
adef->mode = COMPUTE_LHS;
3798
4872
adef->mode = BIG_RAPID_MODE;
3799
4873
tr->doCutoff = FALSE;
3802
4876
adef->mode = PARSIMONY_ADDITION;
3805
/* TODO include in README */
3806
adef->mode = MEHRING_ALGO;
4879
adef->mode = QUARTET_CALCULATION;
3809
adef->mode = OPTIMIZE_RATES;
4882
adef->readTaxaOnly = TRUE;
4883
adef->mode = COMPUTE_RF_DISTANCE;
4886
adef->readTaxaOnly = TRUE;
4887
adef->mode = PLAUSIBILITY_CHECKER;
3812
4890
adef->mode = SPLIT_MULTI_GENE;
4893
adef->mode = EPA_SITE_SPECIFIC_BIAS;
4894
tr->useFastScaling = FALSE;
4895
adef->compressPatterns = FALSE;
3815
4898
adef->mode = BIG_RAPID_MODE;
3816
4899
tr->doCutoff = TRUE;
3817
adef->permuteTreeoptimize = TRUE;
4900
adef->permuteTreeoptimize = TRUE;
4903
adef->mode = THOROUGH_OPTIMIZATION;
3821
adef->mode = ARNDT_MODE;
3825
adef->rapidML_Addition = TRUE;
3828
adef->computeELW = TRUE;
4906
adef->mode = MORPH_CALIBRATOR;
4907
tr->useFastScaling = FALSE;
4908
adef->compressPatterns = FALSE;
4911
adef->mode = CLASSIFY_ML;
4913
tr->perPartitionEPA = FALSE;
4915
adef->compressPatterns = FALSE;
4917
#ifdef _USE_PTHREADS
4918
tr->useFastScaling = FALSE;
4923
adef->mode = CLASSIFY_ML;
4926
tr->perPartitionEPA = TRUE;
4928
adef->compressPatterns = FALSE;
4930
#ifdef _USE_PTHREADS
4931
tr->useFastScaling = FALSE;
4935
adef->mode = COMPUTE_ELW;
4936
adef->computeELW = TRUE;
4937
tr->optimizeAllTrees = FALSE;
4940
adef->mode = COMPUTE_ELW;
4941
adef->computeELW = TRUE;
4942
tr->optimizeAllTrees = TRUE;
4945
adef->mode = DISTANCE_MODE;
4946
adef->computeDistance = TRUE;
4949
adef->mode = CLASSIFY_MP;
3832
4953
if(processID == 0)
3834
4955
printf("Error select one of the following algorithms via -f :\n");
3842
4963
sscanf(optarg, "%d", &adef->initial);
3843
4964
adef->initialSet = TRUE;
3846
4967
strcpy(run_id,optarg);
4968
analyzeRunId(run_id);
3850
strcpy(workdir,optarg);
4972
strcpy(resultDir, optarg);
4973
resultDirSet = TRUE;
3853
4976
strcpy(tree_file, optarg);
3854
4977
adef->restart = TRUE;
4282
5544
static void makeFileNames(void)
4284
5546
int infoFileExists = 0;
4286
MPI_Status msgStatus;
4289
strcpy(permFileName, workdir);
5548
strcpy(verboseSplitsFileName, workdir);
5549
strcpy(permFileName, workdir);
4290
5550
strcpy(resultFileName, workdir);
4291
5551
strcpy(logFileName, workdir);
4292
5552
strcpy(checkpointFileName, workdir);
4293
5553
strcpy(infoFileName, workdir);
4294
strcpy(randomFileName, workdir);
5554
strcpy(randomFileName, workdir);
4295
5555
strcpy(bootstrapFileName, workdir);
4296
5556
strcpy(bipartitionsFileName, workdir);
5557
strcpy(bipartitionsFileNameBranchLabels, workdir);
5558
strcpy(icFileNameBranchLabels, workdir);
4297
5559
strcpy(ratesFileName, workdir);
4298
5560
strcpy(lengthFileName, workdir);
4299
5561
strcpy(lengthFileNameModel, workdir);
4300
strcpy( perSiteLLsFileName, workdir);
5562
strcpy(perSiteLLsFileName, workdir);
5563
strcpy(binaryModelParamsOutputFileName, workdir);
5565
strcat(verboseSplitsFileName, "RAxML_verboseSplits.");
4302
5566
strcat(permFileName, "RAxML_parsimonyTree.");
4303
5567
strcat(resultFileName, "RAxML_result.");
4304
5568
strcat(logFileName, "RAxML_log.");
4305
5569
strcat(checkpointFileName, "RAxML_checkpoint.");
4306
5570
strcat(infoFileName, "RAxML_info.");
4307
strcat(randomFileName, "RAxML_randomTree.");
4308
strcat(bootstrapFileName, "RAxML_bootstrap.");
5571
strcat(randomFileName, "RAxML_randomTree.");
5572
strcat(bootstrapFileName, "RAxML_bootstrap.");
4309
5573
strcat(bipartitionsFileName, "RAxML_bipartitions.");
5574
strcat(bipartitionsFileNameBranchLabels, "RAxML_bipartitionsBranchLabels.");
5575
strcat(icFileNameBranchLabels, "RAxML_IC_Score_BranchLabels.");
4310
5576
strcat(ratesFileName, "RAxML_perSiteRates.");
4311
5577
strcat(lengthFileName, "RAxML_treeLength.");
4312
5578
strcat(lengthFileNameModel, "RAxML_treeLengthModel.");
4313
strcat( perSiteLLsFileName, "RAxML_perSiteLLs.");
5579
strcat(perSiteLLsFileName, "RAxML_perSiteLLs.");
5580
strcat(binaryModelParamsOutputFileName, "RAxML_binaryModelParameters.");
5582
strcat(verboseSplitsFileName, run_id);
4315
5583
strcat(permFileName, run_id);
4316
5584
strcat(resultFileName, run_id);
4317
5585
strcat(logFileName, run_id);
4318
5586
strcat(checkpointFileName, run_id);
4319
strcat(infoFileName, run_id);
4320
strcat(randomFileName, run_id);
4321
strcat(bootstrapFileName, run_id);
5587
strcat(infoFileName, run_id);
5588
strcat(randomFileName, run_id);
5589
strcat(bootstrapFileName, run_id);
4322
5590
strcat(bipartitionsFileName, run_id);
5591
strcat(bipartitionsFileNameBranchLabels, run_id);
5592
strcat(icFileNameBranchLabels, run_id);
4323
5593
strcat(ratesFileName, run_id);
4324
5594
strcat(lengthFileName, run_id);
4325
5595
strcat(lengthFileNameModel, run_id);
4326
5596
strcat(perSiteLLsFileName, run_id);
5597
strcat(binaryModelParamsOutputFileName, run_id);
5603
strcpy(bootstrapFileNamePID, bootstrapFileName);
5604
strcat(bootstrapFileNamePID, ".PID.");
5605
sprintf(buf, "%d", processID);
5606
strcat(bootstrapFileNamePID, buf);
4328
5610
if(processID == 0)
4330
5612
infoFileExists = filexists(infoFileName);
4336
for(i = 1; i < numOfWorkers; i++)
4337
MPI_Send(&infoFileExists, 1, MPI_INT, i, FINALIZE, MPI_COMM_WORLD);
4341
5614
if(infoFileExists)
4343
5616
printf("RAxML output files with the run ID <%s> already exist \n", run_id);
4344
5617
printf("in directory %s ...... exiting\n", workdir);
4356
MPI_Recv(&infoFileExists, 1, MPI_INT, 0, FINALIZE, MPI_COMM_WORLD, &msgStatus);
4367
static void readData(analdef *adef, rawdata *rdta, cruncheddata *cdta, tree *tr)
5632
/***********************reading and initializing input ******************/
5635
/********************PRINTING various INFO **************************************/
5638
void printBaseFrequencies(tree *tr)
4369
INFILE = fopen(seq_file, "r");
4374
printf( "Could not open sequence file: %s\n", seq_file);
4377
getinput(adef, rdta, cdta, tr);
5645
for(model = 0; model < tr->NumberOfModels; model++)
5649
printBothOpen("Partition: %d with name: %s\n", model, tr->partitionData[model].partitionName);
5650
printBothOpen("Base frequencies: ");
5652
if(tr->partitionData[model].protModels == LG4 || tr->partitionData[model].protModels == LG4X)
5657
printBothOpen("\n");
5659
for(k = 0; k < 4; k++)
5661
printBothOpen("LG4 %d: ", k);
5662
for(i = 0; i < tr->partitionData[model].states; i++)
5663
printBothOpen("%1.3f ", tr->partitionData[model].frequencies_LG4[k][i]);
5664
printBothOpen("\n");
5669
for(i = 0; i < tr->partitionData[model].states; i++)
5670
printBothOpen("%1.3f ", tr->partitionData[model].frequencies[i]);
5673
printBothOpen("\n\n");
4384
/***********************reading and initializing input ******************/
4387
/********************PRINTING various INFO **************************************/
4390
5678
static void printModelAndProgramInfo(tree *tr, analdef *adef, int argc, char *argv[])
4392
5680
if(processID == 0)
4395
FILE *infoFile = fopen(infoFileName, "a");
5683
FILE *infoFile = myfopen(infoFileName, "ab");
4396
5684
char modelType[128];
4398
if(adef->useInvariant)
4399
strcpy(modelType, "GAMMA+P-Invar");
4401
strcpy(modelType, "GAMMA");
4403
printf("\n\nYou are using %s version %s released by Alexandros Stamatakis in %s\n", programName, programVersion, programDate);
4404
fprintf(infoFile, "\n\nYou are using %s version %s released by Alexandros Stamatakis in %s\n", programName, programVersion, programDate);
4406
if(adef->mode == OPTIMIZE_RATES)
4408
printf("\nAlignment has %d columns\n\n", tr->cdta->endsite);
4409
fprintf(infoFile, "\nAlignment has %d columns\n\n", tr->cdta->endsite);
4413
printf("\nAlignment has %d distinct alignment patterns\n\n", tr->cdta->endsite);
4414
fprintf(infoFile, "\nAlignment has %d distinct alignment patterns\n\n", tr->cdta->endsite);
4417
if(adef->useInvariant)
4419
printf("Found %d invariant alignment patterns that correspond to %d columns \n", tr->numberOfInvariableColumns, tr->weightOfInvariableColumns);
4420
fprintf(infoFile, "Found %d invariant alignment patterns that correspond to %d columns \n", tr->numberOfInvariableColumns, tr->weightOfInvariableColumns);
4423
printf("Proportion of gaps and completely undetermined characters in this alignment: %f\n", adef->gapyness);
4424
fprintf(infoFile, "Proportion of gaps and completely undetermined characters in this alignment: %f\n",
5686
if(!adef->readTaxaOnly)
5688
if(adef->useInvariant)
5689
strcpy(modelType, "GAMMA+P-Invar");
5691
strcpy(modelType, "GAMMA");
5694
printVersionInfo(FALSE, infoFile);
5698
if(!adef->readTaxaOnly)
5700
if(!adef->compressPatterns)
5701
printBoth(infoFile, "\nAlignment has %d columns\n\n", tr->cdta->endsite);
5703
printBoth(infoFile, "\nAlignment has %d distinct alignment patterns\n\n", tr->cdta->endsite);
5705
if(adef->useInvariant)
5706
printBoth(infoFile, "Found %d invariant alignment patterns that correspond to %d columns \n", tr->numberOfInvariableColumns, tr->weightOfInvariableColumns);
5708
printBoth(infoFile, "Proportion of gaps and completely undetermined characters in this alignment: %3.2f%s\n", 100.0 * adef->gapyness, "%");
4427
5711
switch(adef->mode)
4430
printf("Arndt-Mode\n");
4431
fprintf(infoFile, "Arndt-Mode\n");
4433
case TREE_EVALUATION :
4434
printf("\nRAxML Model Optimization up to an accuracy of %f log likelihood units\n\n",
4435
adef->likelihoodEpsilon);
4436
fprintf(infoFile, "\nRAxML Model Optimization up to an accuracy of %f log likelihood units\n\n",
4437
adef->likelihoodEpsilon);
5714
printBoth(infoFile, "\nRAxML Computation of pairwise distances\n\n");
5716
case TREE_EVALUATION :
5717
printBoth(infoFile, "\nRAxML Model Optimization up to an accuracy of %f log likelihood units\n\n", adef->likelihoodEpsilon);
4439
5719
case BIG_RAPID_MODE:
4440
5720
if(adef->rapidBoot)
4442
5722
if(adef->allInOne)
4444
printf("\nRAxML rapid bootstrapping and subsequent ML search\n\n");
4445
fprintf(infoFile, "\nRAxML rapid bootstrapping and subsequent ML search\n\n");
5723
printBoth(infoFile, "\nRAxML rapid bootstrapping and subsequent ML search\n\n");
4449
printf("\nRAxML rapid bootstrapping algorithm\n\n");
4450
fprintf(infoFile, "\nRAxML rapid bootstrapping algorithm\n\n");
5725
printBoth(infoFile, "\nRAxML rapid bootstrapping algorithm\n\n");
4455
printf("\nRAxML rapid hill-climbing mode\n\n");
4456
fprintf(infoFile, "\nRAxML rapid hill-climbing mode\n\n");
5728
printBoth(infoFile, "\nRAxML rapid hill-climbing mode\n\n");
4459
5730
case CALC_BIPARTITIONS:
4460
printf("\nRAxML Bipartition Computation: Drawing support values from trees in file %s onto tree in file %s\n\n",
4461
bootStrapFile, tree_file);
4462
fprintf(infoFile, "\nRAxML Bipartition Computation: Drawing support values from trees in file %s onto tree in file %s\n\n",
4463
bootStrapFile, tree_file);
4466
case OPTIMIZE_RATES:
4467
if(!(adef->treeLength || adef->computePerSiteLLs))
4469
printf("\nRAxML optimization of per-site evolutionary rates\n\n");
4470
fprintf(infoFile,"\nRAxML optimization of per-site evolutionary rates\n\n");
4472
if(adef->treeLength)
4474
printf("\nRAxML optimization of per-site evolutionary rates and tree-length sliding window\n\n");
4475
fprintf(infoFile,"\nRAxML optimization of per-site evolutionary rates and tree-length sliding window\n\n");
4477
if(adef->computePerSiteLLs)
4479
printf("\nRAxML computation of per-site log likelihoods\n\n");
4480
fprintf(infoFile,"\nRAxML computation of per-site log likelihoods\n\n");
5731
printBoth(infoFile, "\nRAxML Bipartition Computation: Drawing support values from trees in file %s onto tree in file %s\n\n",
5732
bootStrapFile, tree_file);
5734
case CALC_BIPARTITIONS_IC:
5735
printBoth(infoFile, "\nRAxML IC and TC score Computation: Computing IC and TC scores induced by trees in file %s w.r.t. tree in file %s\n\n",
5736
bootStrapFile, tree_file);
5739
printBoth(infoFile, "\nRAxML computation of per-site log likelihoods\n");
4484
5741
case PARSIMONY_ADDITION:
4485
printf("\nRAxML stepwise MP addition to incomplete starting tree\n\n");
4486
fprintf(infoFile,"\nRAxML stepwise MP addition to incomplete starting tree\n\n");
4490
printf("\nRAxML single-sequence position determination algorithm\n\n");
4491
fprintf(infoFile,"\nRAxML single-sequence position determination algorithm\n\n");
5742
printBoth(infoFile, "\nRAxML stepwise MP addition to incomplete starting tree\n\n");
5745
printBoth(infoFile, "\nRAxML likelihood-based placement algorithm\n\n");
5748
printBoth(infoFile, "\nRAxML parsimony-based placement algorithm\n\n");
5751
printBoth(infoFile, "\nRAxML BS replicate generation\n\n");
5754
printBoth(infoFile, "\nRAxML ELW test\n\n");
5757
printBoth(infoFile, "\nRAxML a posteriori Bootstrap convergence assessment\n\n");
5759
case CONSENSUS_ONLY:
5760
if(adef->leaveDropMode)
5761
printBoth(infoFile, "\nRAxML rogue taxa computation by Andre Aberer (HITS)\n\n");
5763
printBoth(infoFile, "\nRAxML consensus tree computation\n\n");
5766
printBoth(infoFile, "\nRAxML computation of likelihoods for a set of trees\n\n");
5768
case COMPUTE_BIPARTITION_CORRELATION:
5769
printBoth(infoFile, "\nRAxML computation of bipartition support correlation on two sets of trees\n\n");
5771
case COMPUTE_RF_DISTANCE:
5772
printBoth(infoFile, "\nRAxML computation of RF distances for all pairs of trees in a set of trees\n\n");
5774
case MORPH_CALIBRATOR:
5775
printBoth(infoFile, "\nRAxML morphological calibrator using Maximum Likelihood\n\n");
5778
printBoth(infoFile, "\nRAxML experimental very fast tree search\n\n");
5780
case SH_LIKE_SUPPORTS:
5781
printBoth(infoFile, "\nRAxML computation of SH-like support values on a given tree\n\n");
5783
case EPA_SITE_SPECIFIC_BIAS:
5784
printBoth(infoFile, "\nRAxML exprimental site-specfific phylogenetic placement bias analysis algorithm\n\n");
5786
case ANCESTRAL_STATES:
5787
printBoth(infoFile, "\nRAxML marginal ancestral state computation\n\n");
5789
case QUARTET_CALCULATION:
5790
printBoth(infoFile, "\nRAxML quartet computation\n\n");
5792
case THOROUGH_OPTIMIZATION:
5793
printBoth(infoFile, "\nRAxML thorough tree optimization\n\n");
5795
case OPTIMIZE_BR_LEN_SCALER :
5796
printBoth(infoFile, "\nRAxML Branch length scaler and other model parameter optimization up to an accuracy of %f log likelihood units\n\n", adef->likelihoodEpsilon);
5798
case ANCESTRAL_SEQUENCE_TEST:
5799
printBoth(infoFile, "\nRAxML ancestral sequence test for Jiajie\n\n");
5801
case PLAUSIBILITY_CHECKER:
5802
printBoth(infoFile, "\nRAxML large-tree plausibility-checker\n\n");
5805
printBoth(infoFile, "\nRAxML tree rooting algorithm\n\n");
4494
printf("Oups, forgot to implement mode description %d exiting\n", adef->mode);
4498
if(tr->NumberOfModels > 1)
5812
if(!adef->readTaxaOnly)
4500
5814
if(adef->perGeneBranchLengths)
4502
printf("Partitioned Data Mode: Using %d distinct models/partitions with individual per partition branch length optimization\n", tr->NumberOfModels);
4503
fprintf(infoFile, "Partitioned Data Mode: Using %d distinct models/partitions with individual per partition branch length optimization\n", tr->NumberOfModels);
4505
fprintf(infoFile, "\n\n");
4509
printf("Partitioned Data Mode: Using %d distinct models/partitions with joint branch length optimization\n",
4510
tr->NumberOfModels);
4511
fprintf(infoFile, "Partitioned Data Mode: Using %d distinct models/partitions with joint branch length optimization\n",
4512
tr->NumberOfModels);
4514
fprintf(infoFile, "\n\n");
4522
printf("\nExecuting %d rapid bootstrap inferences and thereafter a thorough ML search \n\n", adef->multipleRuns);
4523
fprintf(infoFile, "\nExecuting %d rapid bootstrap inferences and thereafter a thorough ML search \n\n", adef->multipleRuns);
4527
printf("\nExecuting %d rapid bootstrap inferences\n\n", adef->multipleRuns);
4528
fprintf(infoFile, "\nExecuting %d rapid bootstrap inferences\n\n", adef->multipleRuns);
4535
if(adef->multipleRuns > 1)
4537
printf("Executing %d non-parametric bootstrap inferences\n\n", adef->multipleRuns);
4538
fprintf(infoFile, "Executing %d non-parametric bootstrap inferences\n\n", adef->multipleRuns);
4542
printf("Executing %d non-parametric bootstrap inference\n\n", adef->multipleRuns);
4543
fprintf(infoFile, "Executing %d non-parametric bootstrap inference\n\n", adef->multipleRuns);
4548
char treeType[1024];
4551
strcpy(treeType, "user-specifed");
4554
if(adef->randomStartingTree)
4555
strcpy(treeType, "distinct complete random");
4557
strcpy(treeType, "distinct randomized MP");
4561
if(adef->multipleRuns > 1)
4563
printf("Executing %d inferences on the original alignment using %d %s trees\n\n",
4564
adef->multipleRuns, adef->multipleRuns, treeType);
4565
fprintf(infoFile, "Executing %d inferences on the original alignment using %d %s trees\n\n",
4566
adef->multipleRuns, adef->multipleRuns, treeType);
4570
printf("Executing %d inference on the original alignment using a %s tree\n\n",
4571
adef->multipleRuns, treeType);
4572
fprintf(infoFile, "Executing %d inference on the original alignment using a %s tree\n\n",
4573
adef->multipleRuns, treeType);
4578
if(tr->rateHetModel == GAMMA || tr->rateHetModel == GAMMA_I)
4580
printf( "All free model parameters will be estimated by RAxML\n");
4581
printf( "%s model of rate heteorgeneity, ML estimate of alpha-parameter\n", modelType);
4582
printf( "%s Model parameters will be estimated up to an accuracy of %2.10f Log Likelihood units\n\n",
4583
modelType, adef->likelihoodEpsilon);
4584
fprintf(infoFile, "All free model parameters will be estimated by RAxML\n");
4585
fprintf(infoFile, "%s model of rate heterogeneity, ML estimate of alpha-parameter\n", modelType);
4586
fprintf(infoFile, "%s Model parameters will be estimated up to an accuracy of %2.10f Log Likelihood units\n\n",
4587
modelType, adef->likelihoodEpsilon);
4591
if(adef->useMixedModel)
4593
printf( "All free model parameters will be estimated by RAxML\n");
4594
printf( "ML estimate of %d per site rate categories\n", adef->categories);
4595
printf( "Likelihood of final tree will be evaluated and optimized under %s\n", modelType);
4596
printf( "Final model parameters will be estimated up to an accuracy of %2.10f Log Likelihood units\n\n",
4597
adef->likelihoodEpsilon);
4599
fprintf(infoFile, "All free model parameters will be estimated by RAxML\n");
4600
fprintf(infoFile, "ML estimate of %d per site rate categories\n", adef->categories);
4601
fprintf(infoFile, "Likelihood of final tree will be evaluated and optimized under %s\n", modelType);
4602
fprintf(infoFile, "Final model parameters will be estimated up to an accuracy of %2.10f Log Likelihood units\n\n",
4603
adef->likelihoodEpsilon);
4607
printf( "Approximation of rate heterogeneity only!\n");
4608
printf( "All free model parameters will be estimated by RAxML\n");
4609
printf( "ML estimate of %d per site rate categories\n", adef->categories);
4610
printf( "WARNING: CAT-based likelihood values should NEVER be used to COMPARE trees!\n\n");
4612
fprintf(infoFile, "Approximation of rate heterogeneity only!\n");
4613
fprintf(infoFile, "All free model parameters will be estimated by RAxML\n");
4614
fprintf(infoFile, "ML estimate of %d per site rate categories\n", adef->categories);
4615
fprintf(infoFile, "WARNING: CAT-based likelihood values should NEVER be used to COMPARE trees!\n\n");
4620
for(model = 0; model < tr->NumberOfModels; model++)
4622
printf("Partition: %d\n", model);
4623
printf("Name: %s\n", tr->partitionData[model].partitionName);
4625
fprintf(infoFile, "Partition: %d\n", model);
4626
fprintf(infoFile, "Name: %s\n", tr->partitionData[model].partitionName);
4628
switch(tr->partitionData[model].dataType)
4631
printf("DataType: DNA\n");
4632
printf("Substitution Matrix: GTR\n");
4636
printf("Empirical Base Frequencies:\n");
4637
printf("pi(A): %f pi(C): %f pi(G): %f pi(T): %f",
4638
tr->frequencies_DNA[model * 4 + 0], tr->frequencies_DNA[model * 4 + 1],
4639
tr->frequencies_DNA[model * 4 + 2], tr->frequencies_DNA[model * 4 + 3]);
4643
printf("Empirical Base Frequencies will not be printed for Bootstrapping\n");
4646
fprintf(infoFile, "DataType: DNA\n");
4647
fprintf(infoFile, "Substitution Matrix: GTR\n");
4651
fprintf(infoFile, "Empirical Base Frequencies:\n");
4652
fprintf(infoFile, "pi(A): %f pi(C): %f pi(G): %f pi(T): %f",
4653
tr->frequencies_DNA[model * 4 + 0], tr->frequencies_DNA[model * 4 + 1],
4654
tr->frequencies_DNA[model * 4 + 2], tr->frequencies_DNA[model * 4 + 3]);
4658
fprintf(infoFile, "Empirical Base Frequencies will not be printed for Bootstrapping\n");
4663
char *protStrings[10] = {"DAYHOFF", "DCMUT", "JTT", "MTREV", "WAG", "RTREV", "CPREV", "VT", "BLOSUM62", "MTMAM"};
4664
char basesPROT[20] = {'A', 'R', 'N' , 'D', 'C', 'Q','E','G','H','I','L','K','M','F','P','S','T','W','Y','V'};
4666
assert(tr->partitionData[model].protModels >= 0 && tr->partitionData[model].protModels < 10);
4668
printf("DataType: AA\n");
4669
printf("Substitution Matrix: %s\n", protStrings[tr->partitionData[model].protModels]);
4670
printf("%s Base Frequencies:\n", (tr->partitionData[model].protFreqs == 1)?"Empirical":"Fixed");
4676
for(k = 0; k < 20; k++)
4678
if(k % 4 == 0 && k > 0)
4681
printf("pi(%c): %f ", basesPROT[k], tr->frequencies_AA[model * 20 + k]);
4686
printf("Base Frequencies will not be printed for Bootstrapping\n");
4689
fprintf(infoFile, "DataType: AA\n");
4690
fprintf(infoFile, "Substitution Matrix: %s\n", protStrings[tr->partitionData[model].protModels]);
4691
fprintf(infoFile, "%s Base Frequencies:\n", (tr->partitionData[model].protFreqs == 1)?"Empirical":"Fixed");
4697
for(k = 0; k < 20; k++)
4700
fprintf(infoFile, "\n");
4702
fprintf(infoFile, "pi(%c): %f ", basesPROT[k], tr->frequencies_AA[model * 20 + k]);
4707
fprintf(infoFile, "Base Frequencies will not be printed for Bootstrapping\n");
4718
fprintf(infoFile,"\n\n\n");
4722
fprintf(infoFile, "\n");
4724
fprintf(infoFile,"RAxML was called as follows:\n\n");
5815
printBoth(infoFile, "Using %d distinct models/data partitions with individual per partition branch length optimization\n\n\n", tr->NumberOfModels);
5817
printBoth(infoFile, "Using %d distinct models/data partitions with joint branch length optimization\n\n\n", tr->NumberOfModels);
5820
if(adef->mode == BIG_RAPID_MODE)
5825
printBoth(infoFile, "\nExecuting %d rapid bootstrap inferences and thereafter a thorough ML search \n\n", adef->multipleRuns);
5827
printBoth(infoFile, "\nExecuting %d rapid bootstrap inferences\n\n", adef->multipleRuns);
5832
printBoth(infoFile, "Executing %d non-parametric bootstrap inferences\n\n", adef->multipleRuns);
5835
char treeType[1024];
5838
strcpy(treeType, "user-specifed");
5841
if(adef->randomStartingTree)
5842
strcpy(treeType, "distinct complete random");
5844
strcpy(treeType, "distinct randomized MP");
5847
printBoth(infoFile, "Executing %d inferences on the original alignment using %d %s trees\n\n",
5848
adef->multipleRuns, adef->multipleRuns, treeType);
5854
if(!adef->readTaxaOnly)
5856
printBoth(infoFile, "All free model parameters will be estimated by RAxML\n");
5859
if(tr->rateHetModel == GAMMA || tr->rateHetModel == GAMMA_I)
5860
printBoth(infoFile, "%s model of rate heteorgeneity, ML estimate of alpha-parameter\n\n", modelType);
5863
printBoth(infoFile, "ML estimate of %d per site rate categories\n\n", adef->categories);
5864
if(adef->mode != CLASSIFY_ML && adef->mode != CLASSIFY_MP)
5865
printBoth(infoFile, "Likelihood of final tree will be evaluated and optimized under %s\n\n", modelType);
5868
if(adef->mode != CLASSIFY_ML && adef->mode != CLASSIFY_MP)
5869
printBoth(infoFile, "%s Model parameters will be estimated up to an accuracy of %2.10f Log Likelihood units\n\n",
5870
modelType, adef->likelihoodEpsilon);
5873
for(model = 0; model < tr->NumberOfModels; model++)
5875
printBoth(infoFile, "Partition: %d\n", model);
5876
printBoth(infoFile, "Alignment Patterns: %d\n", tr->partitionData[model].upper - tr->partitionData[model].lower);
5877
printBoth(infoFile, "Name: %s\n", tr->partitionData[model].partitionName);
5879
switch(tr->partitionData[model].dataType)
5882
printBoth(infoFile, "DataType: DNA\n");
5883
printBoth(infoFile, "Substitution Matrix: GTR\n");
5886
assert(tr->partitionData[model].protModels >= 0 && tr->partitionData[model].protModels < NUM_PROT_MODELS);
5887
printBoth(infoFile, "DataType: AA\n");
5888
if(tr->partitionData[model].protModels != PROT_FILE)
5890
printBoth(infoFile, "Substitution Matrix: %s\n", protModels[tr->partitionData[model].protModels]);
5891
printBoth(infoFile, "Using %s base frequencies\n", (tr->partitionData[model].usePredefinedProtFreqs == TRUE)?"fixed":"empirical");
5895
printBoth(infoFile, "Substitution Matrix File name: %s\n", tr->partitionData[model].proteinSubstitutionFileName);
5896
printBoth(infoFile, "Using base frequencies as provided in the model file\n");
5900
printBoth(infoFile, "DataType: BINARY/MORPHOLOGICAL\n");
5901
printBoth(infoFile, "Substitution Matrix: Uncorrected\n");
5903
case SECONDARY_DATA:
5904
printBoth(infoFile, "DataType: SECONDARY STRUCTURE\n");
5905
printBoth(infoFile, "Substitution Matrix: %s\n", secondaryModelList[tr->secondaryStructureModel]);
5907
case SECONDARY_DATA_6:
5908
printBoth(infoFile, "DataType: SECONDARY STRUCTURE 6 STATE\n");
5909
printBoth(infoFile, "Substitution Matrix: %s\n", secondaryModelList[tr->secondaryStructureModel]);
5911
case SECONDARY_DATA_7:
5912
printBoth(infoFile, "DataType: SECONDARY STRUCTURE 7 STATE\n");
5913
printBoth(infoFile, "Substitution Matrix: %s\n", secondaryModelList[tr->secondaryStructureModel]);
5916
printBoth(infoFile, "DataType: Multi-State with %d distinct states in use (maximum 32)\n",tr->partitionData[model].states);
5917
switch(tr->multiStateModel)
5919
case ORDERED_MULTI_STATE:
5920
printBoth(infoFile, "Substitution Matrix: Ordered Likelihood\n");
5922
case MK_MULTI_STATE:
5923
printBoth(infoFile, "Substitution Matrix: MK model\n");
5925
case GTR_MULTI_STATE:
5926
printBoth(infoFile, "Substitution Matrix: GTR\n");
5933
printBoth(infoFile, "DataType: Codon\n");
5938
printBoth(infoFile, "\n\n\n");
5942
printBoth(infoFile, "\n");
5944
printBoth(infoFile, "RAxML was called as follows:\n\n");
4725
5945
for(i = 0; i < argc; i++)
4726
fprintf(infoFile,"%s ", argv[i]);
4727
fprintf(infoFile,"\n\n\n");
5946
printBoth(infoFile,"%s ", argv[i]);
5947
printBoth(infoFile,"\n\n\n");
4729
5949
fclose(infoFile);
5246
6729
avgLH /= ((double)adef->multipleRuns);
5248
printf("\n\nOverall Time for %d Inferences %f\n", adef->multipleRuns, t);
5249
printf("Average Time per Inference %f\n", (double)(t/((double)adef->multipleRuns)));
5250
printf("Average Likelihood : %f\n", avgLH);
5252
printf("Best Likelihood in run number %d: likelihood %f\n\n", bestI, bestLH);
5254
if(adef->checkpoints)
5255
printf("Checkpoints written to: %s.RUN.%d.* to %d.*\n", checkpointFileName, 0, adef->multipleRuns - 1);
5258
if(adef->randomStartingTree)
5259
printf("Random starting trees written to: %s.RUN.%d to %d\n", randomFileName, 0, adef->multipleRuns - 1);
5261
printf("Parsimony starting trees written to: %s.RUN.%d to %d\n", permFileName, 0, adef->multipleRuns - 1);
5263
printf("Final trees written to: %s.RUN.%d to %d\n", resultFileName, 0, adef->multipleRuns - 1);
5264
printf("Execution Log Files written to: %s.RUN.%d to %d\n", logFileName, 0, adef->multipleRuns - 1);
5265
printf("Execution information file written to: %s\n", infoFileName);
5268
fprintf(infoFile, "\n\nOverall Time for %d Inferences %f\n", adef->multipleRuns, t);
5269
fprintf(infoFile, "Average Time per Inference %f\n", (double)(t/((double)adef->multipleRuns)));
5270
fprintf(infoFile, "Average Likelihood : %f\n", avgLH);
5271
fprintf(infoFile, "\n");
5272
fprintf(infoFile, "Best Likelihood in run number %d: likelihood %f\n\n", bestI, bestLH);
5273
if(adef->checkpoints)
5274
fprintf(infoFile, "Checkpoints written to: %s.RUN.%d.* to %d.*\n", checkpointFileName, 0, adef->multipleRuns - 1);
5277
if(adef->randomStartingTree)
5278
fprintf(infoFile, "Random starting trees written to: %s.RUN.%d to %d\n", randomFileName, 0, adef->multipleRuns - 1);
5280
fprintf(infoFile, "Parsimony starting trees written to: %s.RUN.%d to %d\n", permFileName, 0, adef->multipleRuns - 1);
5282
fprintf(infoFile, "Final trees written to: %s.RUN.%d to %d\n", resultFileName, 0, adef->multipleRuns - 1);
5283
fprintf(infoFile, "Execution Log Files written to: %s.RUN.%d to %d\n", logFileName, 0, adef->multipleRuns - 1);
5284
fprintf(infoFile, "Execution information file written to: %s\n", infoFileName);
6731
printBothOpen("\n\nOverall Time for %d Inferences %f\n", adef->multipleRuns, t);
6732
printBothOpen("Average Time per Inference %f\n", (double)(t/((double)adef->multipleRuns)));
6733
printBothOpen("Average Likelihood : %f\n", avgLH);
6734
printBothOpen("\n");
6735
printBothOpen("Best Likelihood in run number %d: likelihood %f\n\n", bestI, bestLH);
6737
if(adef->checkpoints)
6738
printBothOpen("Checkpoints written to: %s.RUN.%d.* to %d.*\n", checkpointFileName, 0, adef->multipleRuns - 1);
6741
if(adef->randomStartingTree)
6742
printBothOpen("Random starting trees written to: %s.RUN.%d to %d\n", randomFileName, 0, adef->multipleRuns - 1);
6744
printBothOpen("Parsimony starting trees written to: %s.RUN.%d to %d\n", permFileName, 0, adef->multipleRuns - 1);
6746
printBothOpen("Final trees written to: %s.RUN.%d to %d\n", resultFileName, 0, adef->multipleRuns - 1);
6747
printBothOpen("Execution Log Files written to: %s.RUN.%d to %d\n", logFileName, 0, adef->multipleRuns - 1);
6748
printBothOpen("Execution information file written to: %s\n", infoFileName);
5289
printf("\n\nOverall Time for 1 Inference %f\n", t);
5290
printf("Likelihood : %f\n", tr->likelihood);
5293
if(adef->checkpoints)
5294
printf("Checkpoints written to: %s.*\n", checkpointFileName);
5297
if(adef->randomStartingTree)
5298
printf("Random starting tree written to: %s\n", randomFileName);
5300
printf("Parsimony starting tree written to: %s\n", permFileName);
5302
printf("Final tree written to: %s\n", resultFileName);
5303
printf("Execution Log File written to: %s\n", logFileName);
5304
printf("Execution information file written to: %s\n",infoFileName);
5308
fprintf(infoFile, "\n\nOverall Time for 1 Inference %f\n", t);
5309
fprintf(infoFile, "Likelihood : %f\n", tr->likelihood);
5310
fprintf(infoFile, "\n\n");
5312
if(adef->checkpoints)
5313
fprintf(infoFile, "Checkpoints written to: %s.*\n", checkpointFileName);
5316
if(adef->randomStartingTree)
5317
fprintf(infoFile, "Random starting tree written to: %s\n", randomFileName);
5319
fprintf(infoFile, "Parsimony starting tree written to: %s\n", permFileName);
5321
fprintf(infoFile, "Final tree written to: %s\n", resultFileName);
5322
fprintf(infoFile, "Execution Log File written to: %s\n", logFileName);
5323
fprintf(infoFile, "Execution information file written to: %s\n",infoFileName);
6752
printBothOpen("\n\nOverall Time for 1 Inference %f\n", t);
6753
printBothOpen("Likelihood : %f\n", tr->likelihood);
6754
printBothOpen("\n\n");
6756
if(adef->checkpoints)
6757
printBothOpen("Checkpoints written to: %s.*\n", checkpointFileName);
6760
if(adef->randomStartingTree)
6761
printBothOpen("Random starting tree written to: %s\n", randomFileName);
6763
printBothOpen("Parsimony starting tree written to: %s\n", permFileName);
6765
printBothOpen("Final tree written to: %s\n", resultFileName);
6766
printBothOpen("Execution Log File written to: %s\n", logFileName);
6767
printBothOpen("Execution information file written to: %s\n",infoFileName);
5329
6772
case CALC_BIPARTITIONS:
5330
printf("\n\nTime for Computation of Bipartitions %f\n", t);
5331
printf("Tree with bipartitions written to file: %s\n", bipartitionsFileName);
5332
printf("Execution information file written to : %s\n",infoFileName);
5335
fprintf(infoFile, "\n\nTime for Computation of Bipartitions %f\n", t);
5336
fprintf(infoFile, "Tree with bipartitions written to file: %s\n", bipartitionsFileName);
6773
printBothOpen("\n\nTime for Computation of Bipartitions %f\n", t);
6774
printBothOpen("Tree with bipartitions written to file: %s\n", bipartitionsFileName);
6775
printBothOpen("Tree with bipartitions as branch labels written to file: %s\n", bipartitionsFileNameBranchLabels);
6776
printBothOpen("Execution information file written to : %s\n",infoFileName);
5340
case OPTIMIZE_RATES:
5341
if(! (adef->computePerSiteLLs || adef->treeLength))
5343
printf("\n\nTime for Optimization of per-site rates %f\n", t);
5344
printf("Optimized rates written to file: %s\n", ratesFileName);
5345
printf("Execution information file written to : %s\n",infoFileName);
5348
fprintf(infoFile, "\n\nTime for Optimization of per-site rates %f\n", t);
5349
fprintf(infoFile, "Optimized rates written to file: %s\n", ratesFileName);
5352
if(adef->treeLength)
5354
printf("\n\nTime for Optimization of per-site rates and sliding window tree length %f\n", t);
5355
printf("Optimized rates written to file: %s, \n sliding window data written to %s and %s\n", ratesFileName, lengthFileName, lengthFileNameModel);
5356
printf("Execution information file written to : %s\n",infoFileName);
5359
fprintf(infoFile, "\n\nTime for Optimization of per-site rates and sliding window tree length %f\n", t);
5360
fprintf(infoFile, "Optimized rates written to file: %s, \n sliding window data written to %s and %s\n", ratesFileName, lengthFileName, lengthFileNameModel);
5363
if(adef->computePerSiteLLs)
5365
printf("\n\nTime for Optimization of per-site log likelihoods %f\n", t);
5366
printf("Per-site Log Likelihoods written to File %s in Tree-Puzzle format\n", perSiteLLsFileName);
5367
printf("Execution information file written to : %s\n",infoFileName);
5370
fprintf(infoFile, "\n\nTime for Optimization of per-site log likelihoods %f\n", t);
5371
fprintf(infoFile, "Per-site Log Likelihoods written to File %s in Tree-Puzzle format\n", perSiteLLsFileName);
6778
case CALC_BIPARTITIONS_IC:
6779
printBothOpen("\n\nTime for Computation of TC and IC scores %f\n", t);
6780
printBothOpen("Tree with IC scores as branch labels written to file: %s\n", icFileNameBranchLabels);
6781
printBothOpen("Execution information file written to : %s\n",infoFileName);
6784
printBothOpen("\n\nTime for Optimization of per-site log likelihoods %f\n", t);
6785
printBothOpen("Per-site Log Likelihoods written to File %s in Tree-Puzzle format\n", perSiteLLsFileName);
6786
printBothOpen("Execution information file written to : %s\n",infoFileName);
5375
6789
case PARSIMONY_ADDITION:
5376
printf("\n\nTime for MP stepwise addition %f\n", t);
5377
printf("Execution information file written to : %s\n",infoFileName);
5378
printf("Complete parsimony tree written to: %s\n", permFileName);
5382
fprintf(infoFile, "\n\nTime for MP stepwise addition %f\n", t);
5383
fprintf(infoFile, "Complete parsimony tree written to: %s\n", permFileName);
6790
printBothOpen("\n\nTime for MP stepwise addition %f\n", t);
6791
printBothOpen("Execution information file written to : %s\n",infoFileName);
6792
printBothOpen("Complete parsimony tree written to: %s\n", permFileName);
6794
case ANCESTRAL_STATES:
6795
printBothOpen("\n\nTime for marginal ancestral state computation: %f\n\n", t);
6797
case QUARTET_CALCULATION:
6798
printBothOpen("\n\nOverall Time for quartet computation: %f\n\n", t);
6800
case THOROUGH_OPTIMIZATION:
6801
printBothOpen("\n\nTime for thorough tree optimization: %f\n\n", t);
6804
printBothOpen("\n\nTime for tree rooting: %f\n\n", t);
5397
/********************PRINTING various INFO **************************************/
5399
6814
/************************************************************************************/
5401
static void computeLHTest(tree *tr, analdef *adef, char *bootStrapFileName)
5403
int numberOfTrees = 0, i;
5405
double bestLH, currentLH, weightSum = 0.0;
5406
double *bestVector, *otherVector;
5409
bestVector = (double*)malloc(sizeof(double) * tr->cdta->endsite);
5410
otherVector = (double*)malloc(sizeof(double) * tr->cdta->endsite);
5412
for(i = 0; i < tr->cdta->endsite; i++)
5413
weightSum += (double)(tr->cdta->aliaswgt[i]);
5416
printf("Model optimization, best Tree: %f\n", tr->likelihood);
5417
bestLH = tr->likelihood;
5420
evaluateGenericInitrav(tr, tr->start);
5422
evaluateGenericVector(tr, tr->start, bestVector);
5424
INFILE = fopen(bootStrapFileName, "r");
5425
while((ch = getc(INFILE)) != EOF)
5432
printf("Found %d trees in File %s\n", numberOfTrees, bootStrapFileName);
5434
for(i = 0; i < numberOfTrees; i++)
5436
treeReadLen(INFILE, tr, adef);
5437
treeEvaluate(tr, 2);
5438
tr->start = tr->nodep[1];
5440
evaluateGenericInitrav(tr, tr->start);
5442
currentLH = tr->likelihood;
5443
if(currentLH > bestLH)
5445
printf("Better tree found %d at %f\n", i, currentLH);
5448
/*printf("Tree %d %f\n",i, tr->likelihood);*/
5450
evaluateGenericVector(tr, tr->start, otherVector);
5454
double temp, wtemp, sum, sum2, sd;
5459
for (j = 0; j < tr->cdta->endsite; j++)
5461
temp = bestVector[j] - otherVector[j];
5462
wtemp = tr->cdta->aliaswgt[j] * temp;
5464
sum2 += wtemp * temp;
5467
sd = sqrt( weightSum * (sum2 - sum*sum / weightSum)
5468
/ (weightSum - 1) );
5470
printf("Tree: %d Likelihood: %f D(LH): %f SD: %f Significantly Worse: %s\n", i, currentLH, currentLH - bestLH, sd, (sum > 1.95996 * sd) ? "Yes" : " No");
5481
static void computePerSiteLLs(tree *tr, analdef *adef, char *bootStrapFileName)
5483
int numberOfTrees = 0, i, j;
5485
double *otherVector;
5488
tlf = fopen( perSiteLLsFileName, "w");
5490
otherVector = (double*)malloc(sizeof(double) * tr->cdta->endsite);
5492
allocNodex(tr, adef);
5494
INFILE = fopen(bootStrapFileName, "r");
5495
while((ch = getc(INFILE)) != EOF)
5502
printf("Found %d trees in File %s\n", numberOfTrees, bootStrapFileName);
5504
fprintf(tlf, " %d %d\n", numberOfTrees, tr->cdta->endsite);
5506
for(i = 0; i < numberOfTrees; i++)
5508
treeReadLen(INFILE, tr, adef);
5512
treeEvaluate(tr, 2);
5514
printf("Tree %d: %f\n", i, tr->likelihood);
5516
tr->start = tr->nodep[1];
5518
evaluateGenericInitrav(tr, tr->start);
5520
evaluateGenericVector(tr, tr->start, otherVector);
5522
fprintf(tlf, "tr%d\t", i + 1);
5523
for(j = 0; j < tr->cdta->endsite; j++)
5525
fprintf(tlf, "%f ", otherVector[j]);
5535
6817
#ifdef _USE_PTHREADS
5540
static void pinThread2Cpu(int myTid)
5542
char *coreSteppingStr;
5546
coreSteppingStr = getenv("SCHEDULE");
5547
len = coreSteppingStr ? strlen(coreSteppingStr) : 0;
5553
if ((coreSteppingStr[myTid] >= '0') && (coreSteppingStr[myTid] <= '9'))
5554
myCore = coreSteppingStr[myTid] - '0';
5556
if ((coreSteppingStr[myTid] >= 'a') && (coreSteppingStr[myTid] <= 'f'))
5557
myCore = coreSteppingStr[myTid] - 'a' + 10;
5559
if ((coreSteppingStr[myTid] >= 'A') && (coreSteppingStr[myTid] <= 'F'))
5560
myCore = coreSteppingStr[myTid] - 'A' + 10;
5564
CPU_SET(myCore, &cpuMask);
5566
if (sched_setaffinity(0, sizeof(cpuMask), &cpuMask))
5568
printf("Error while scheduling Thread #%d to CPU %d\n",
5573
/* printf("Scheduled Thread #%d to logical CPU %d\n", myTid, myCore); */
5585
static void calcBounds(int tid, const int n, int start, int end, int *l, int *u)
5587
int span = end - start;
5590
/* assert(span % n == 0); */
5595
span = 1 + (span / n);
5597
*l = start + tid * span;
5606
static void strided_Bounds(int tid, int endsite, int n, int *startIndex, int *endIndex)
5608
int endsiteL = endsite - tid;
5610
if(endsiteL % n == 0)
5611
endsiteL = endsiteL / n;
5613
endsiteL = 1 + (endsiteL / n);
5616
*endIndex = endsiteL;
5619
static void collectDouble(double *dest, double *source, const int totallength, const int stride, const int offset)
5625
for(; k < totallength; i++, k += stride)
5626
dest[k] = source[i];
5632
static void strideTips(char **dest, char **source, const int totallength, const int stride,
5633
const int offset, const int mxtips, int strideLength)
5637
assert(offset < stride);
5639
for(i = 0; i < mxtips; i++)
5641
char *d = &dest[i + 1][0];
5642
char *s = &source[i + 1][0];
5644
for(k = 0, j = offset; j < totallength; j += stride, k++)
5646
assert(k < strideLength);
5653
static void strideInt(int *dest, int *source, const int totallength, const int stride, const int offset)
5658
for(i = offset, k = 0; i < totallength; i += stride, k++)
5662
static void strideDouble(double *dest, double *source, const int totallength, const int stride, const int offset)
5667
for(; i < totallength; i += stride, d++)
5672
static void stridePartitionData(tree *localTree, int tid, int n, int length)
6824
static void computeFraction(tree *localTree, int tid, int n)
6832
for(model = 0; model < localTree->NumberOfModels; model++)
6836
for(i = localTree->partitionData[model].lower; i < localTree->partitionData[model].upper; i++)
6837
if(i % (size_t)n == (size_t)tid)
6840
localTree->partitionData[model].width = width;
6846
static void threadFixModelIndices(tree *tr, tree *localTree, int tid, int n)
5679
strided_Bounds(tid, length, n, &dummy, &endsite);
5681
/* printf("%d %d\n", endsite, tid); */
5683
for(i = 0; i < localTree->NumberOfModels; i++)
5685
localTree->strided_partitionData[i].dataType = localTree->partitionData[i].dataType;
5686
localTree->strided_partitionData[i].protModels = localTree->strided_partitionData[i].protModels;
5687
localTree->strided_partitionData[i].protFreqs = localTree->strided_partitionData[i].protFreqs;
5690
if(localTree->NumberOfModels > 1)
5694
localTree->strided_partitionData[0].lower = 0;
5696
model = localTree->strided_model[0];
5701
if(localTree->strided_model[i] != model)
5703
localTree->strided_partitionData[model].upper = i;
5704
localTree->strided_partitionData[model + 1].lower = i;
5705
model = localTree->strided_model[i];
6858
for(model = 0; model < (size_t)localTree->NumberOfModels; model++)
6860
localTree->partitionData[model].lower = tr->partitionData[model].lower;
6861
localTree->partitionData[model].upper = tr->partitionData[model].upper;
6864
computeFraction(localTree, tid, n);
6866
for(model = 0, offset = 0, countOffset = 0; model < (size_t)localTree->NumberOfModels; model++)
6868
localTree->partitionData[model].sumBuffer = &localTree->sumBuffer[offset];
6869
localTree->partitionData[model].perSiteLL = &localTree->perSiteLLPtr[countOffset];
6870
localTree->partitionData[model].wgt = &localTree->wgtPtr[countOffset];
6871
localTree->partitionData[model].invariant = &localTree->invariantPtr[countOffset];
6872
localTree->partitionData[model].rateCategory = &localTree->rateCategoryPtr[countOffset];
6874
countOffset += localTree->partitionData[model].width;
6876
offset += (size_t)(tr->discreteRateCategories) * (size_t)(tr->partitionData[model].states) * (size_t)(localTree->partitionData[model].width);
6879
myLength = countOffset;
6882
/* figure in data */
6884
for(i = 0; i < (size_t)localTree->mxtips; i++)
6886
for(model = 0, offset = 0, countOffset = 0; model < (size_t)localTree->NumberOfModels; model++)
6888
localTree->partitionData[model].yVector[i+1] = &localTree->y_ptr[i * myLength + countOffset];
6889
countOffset += localTree->partitionData[model].width;
6891
assert(countOffset == myLength);
6896
for(model = 0, globalCounter = 0; model < (size_t)localTree->NumberOfModels; model++)
6898
for(localCounter = 0, i = (size_t)localTree->partitionData[model].lower; i < (size_t)localTree->partitionData[model].upper; i++)
6900
if(i % (size_t)n == (size_t)tid)
6902
localTree->partitionData[model].wgt[localCounter] = tr->cdta->aliaswgt[globalCounter];
6903
localTree->partitionData[model].invariant[localCounter] = tr->invariant[globalCounter];
6904
localTree->partitionData[model].rateCategory[localCounter] = tr->cdta->rateCategory[globalCounter];
6906
for(j = 1; j <= (size_t)localTree->mxtips; j++)
6907
localTree->partitionData[model].yVector[j][localCounter] = tr->yVector[j][globalCounter];
5711
localTree->strided_partitionData[localTree->NumberOfModels - 1].upper = endsite;
5714
for(i = 0; i < localTree->NumberOfModels; i++)
5715
printf("%d %d %d\n", tid, localTree->strided_partitionData[i].lower, localTree->strided_partitionData[i].upper);
5720
localTree->strided_partitionData[0].lower = 0;
5721
localTree->strided_partitionData[0].upper = endsite;
6915
for(model = 0; model < (size_t)localTree->NumberOfModels; model++)
6918
undetermined = getUndetermined(localTree->partitionData[model].dataType);
6921
width = localTree->partitionData[model].width;
6923
localTree->partitionData[model].gapVectorLength = ((int)width / 32) + 1;
6925
memset(localTree->partitionData[model].gapVector, 0, localTree->partitionData[model].initialGapVectorSize);
6927
for(j = 1; j <= (size_t)(localTree->mxtips); j++)
6928
for(i = 0; i < width; i++)
6929
if(localTree->partitionData[model].yVector[j][i] == undetermined)
6930
localTree->partitionData[model].gapVector[localTree->partitionData[model].gapVectorLength * j + i / 32] |= mask32[i % 32];
6935
static void initPartition(tree *tr, tree *localTree, int tid)
6939
localTree->threadID = tid;
6943
int totalLength = 0;
6945
localTree->useGammaMedian = tr->useGammaMedian;
6946
localTree->saveMemory = tr->saveMemory;
6947
localTree->innerNodes = tr->innerNodes;
6948
localTree->useFastScaling = tr->useFastScaling;
6949
localTree->perPartitionEPA = tr->perPartitionEPA;
6950
localTree->maxCategories = tr->maxCategories;
6952
localTree->originalCrunchedLength = tr->originalCrunchedLength;
6953
localTree->NumberOfModels = tr->NumberOfModels;
6954
localTree->mxtips = tr->mxtips;
6955
localTree->multiBranch = tr->multiBranch;
6957
localTree->nameList = tr->nameList;
6958
localTree->numBranches = tr->numBranches;
6959
localTree->lhs = (double*)rax_malloc(sizeof(double) * localTree->originalCrunchedLength);
6960
localTree->executeModel = (boolean*)rax_malloc(sizeof(boolean) * localTree->NumberOfModels);
6961
localTree->perPartitionLH = (double*)rax_malloc(sizeof(double) * localTree->NumberOfModels);
6962
localTree->storedPerPartitionLH = (double*)rax_malloc(sizeof(double) * localTree->NumberOfModels);
6964
localTree->fracchanges = (double*)rax_malloc(sizeof(double) * localTree->NumberOfModels);
6965
localTree->rawFracchanges = (double*)rax_malloc(sizeof(double) * localTree->NumberOfModels);
6967
localTree->partitionContributions = (double*)rax_malloc(sizeof(double) * localTree->NumberOfModels);
6969
localTree->partitionData = (pInfo*)rax_malloc(sizeof(pInfo) * localTree->NumberOfModels);
6971
/* extend for multi-branch */
6972
localTree->td[0].count = 0;
6973
localTree->td[0].ti = (traversalInfo *)rax_malloc(sizeof(traversalInfo) * localTree->mxtips);
6975
localTree->cdta = (cruncheddata*)rax_malloc(sizeof(cruncheddata));
6976
localTree->cdta->patrat = (double*)rax_malloc(sizeof(double) * localTree->originalCrunchedLength);
6977
localTree->cdta->patratStored = (double*)rax_malloc(sizeof(double) * localTree->originalCrunchedLength);
6979
localTree->discreteRateCategories = tr->discreteRateCategories;
6981
for(model = 0; model < localTree->NumberOfModels; model++)
6983
localTree->partitionData[model].numberOfCategories = tr->partitionData[model].numberOfCategories;
6984
localTree->partitionData[model].states = tr->partitionData[model].states;
6985
localTree->partitionData[model].maxTipStates = tr->partitionData[model].maxTipStates;
6986
localTree->partitionData[model].dataType = tr->partitionData[model].dataType;
6987
localTree->partitionData[model].protModels = tr->partitionData[model].protModels;
6988
localTree->partitionData[model].usePredefinedProtFreqs = tr->partitionData[model].usePredefinedProtFreqs;
6989
localTree->partitionData[model].mxtips = tr->partitionData[model].mxtips;
6990
localTree->partitionData[model].lower = tr->partitionData[model].lower;
6991
localTree->partitionData[model].upper = tr->partitionData[model].upper;
6992
localTree->executeModel[model] = TRUE;
6993
localTree->perPartitionLH[model] = 0.0;
6994
localTree->storedPerPartitionLH[model] = 0.0;
6995
totalLength += (localTree->partitionData[model].upper - localTree->partitionData[model].lower);
6998
assert(totalLength == localTree->originalCrunchedLength);
7001
for(model = 0; model < localTree->NumberOfModels; model++)
7002
localTree->partitionData[model].width = 0;
7006
static void allocNodex(tree *tr, int tid, int n)
7010
memoryRequirements = 0,
7013
computeFraction(tr, tid, n);
7015
allocPartitions(tr);
7018
for(model = 0; model < (size_t)tr->NumberOfModels; model++)
7021
width = tr->partitionData[model].width,
7026
memoryRequirements += (size_t)(tr->discreteRateCategories) * (size_t)(tr->partitionData[model].states) * width;
7028
tr->partitionData[model].gapVectorLength = ((int)width / 32) + 1;
7030
tr->partitionData[model].gapVector = (unsigned int*)rax_calloc(tr->partitionData[model].gapVectorLength * 2 * tr->mxtips, sizeof(unsigned int));
7032
tr->partitionData[model].initialGapVectorSize = tr->partitionData[model].gapVectorLength * 2 * tr->mxtips * sizeof(int);
7034
/* always multiply by 4 due to frequent switching between CAT and GAMMA in standard RAxML */
7036
tr->partitionData[model].gapColumn = (double *)rax_malloc(
7037
((size_t)(tr->innerNodes)) *
7039
((size_t)(tr->partitionData[model].states)) *
7041
for(i = 0; i < tr->innerNodes; i++)
7043
tr->partitionData[model].xVector[i] = (double*)NULL;
7044
tr->partitionData[model].expVector[i] = (int*)NULL;
7050
tr->perSiteLL = (double *)rax_malloc((size_t)tr->cdta->endsite * sizeof(double));
7051
assert(tr->perSiteLL != NULL);
7054
tr->sumBuffer = (double *)rax_malloc(memoryRequirements * sizeof(double));
7055
assert(tr->sumBuffer != NULL);
7057
tr->y_ptr = (unsigned char *)rax_malloc(myLength * (size_t)(tr->mxtips) * sizeof(unsigned char));
7058
assert(tr->y_ptr != NULL);
7060
tr->perSiteLLPtr = (double*) rax_malloc(myLength * sizeof(double));
7061
assert(tr->perSiteLLPtr != NULL);
7063
tr->wgtPtr = (int*) rax_malloc(myLength * sizeof(int));
7064
assert(tr->wgtPtr != NULL);
7066
tr->invariantPtr = (int*) rax_malloc(myLength * sizeof(int));
7067
assert(tr->invariantPtr != NULL);
7069
tr->rateCategoryPtr = (int*) rax_malloc(myLength * sizeof(int));
7070
assert(tr->rateCategoryPtr != NULL);
5727
7078
inline static void sendTraversalInfo(tree *localTree, tree *tr)
5729
/* the one below is a hack we are re-assigning the local pointer to the global one
5730
the memcpy version below is just for testing and preparing the
5731
fine-grained MPI BlueGene version */
5735
localTree->td[0] = tr->td[0];
5739
localTree->td[0].count = tr->td[0].count;
5740
memcpy(localTree->td[0].ti, tr->td[0].ti, localTree->td[0].count * sizeof(traversalInfo));
7080
localTree->td[0] = tr->td[0];
5749
static void execFunction(tree *tr, tree *localTree, const int startIndex, const int endIndex,
5750
const int parsimonyStartIndex, const int parsimonyEndIndex, int tid, int n)
7084
static void collectDouble(double *dst, double *src, tree *tr, int n, int tid)
5752
double result, dlnLdlz, d2lnLdlz2;
5753
int parsimonyResult;
7092
for(model = 0; model < tr->NumberOfModels; model++)
7094
for(i = tr->partitionData[model].lower; i < tr->partitionData[model].upper; i++)
7096
if(i % (size_t)n == (size_t)tid)
7103
static void broadcastPerSiteRates(tree *tr, tree *localTree)
7109
for(model = 0; model < localTree->NumberOfModels; model++)
7111
localTree->partitionData[model].numberOfCategories = tr->partitionData[model].numberOfCategories;
7113
for(i = 0; i < localTree->partitionData[model].numberOfCategories; i++)
7115
localTree->partitionData[model].perSiteRates[i] = tr->partitionData[model].perSiteRates[i];
7116
localTree->partitionData[model].unscaled_perSiteRates[i] = tr->partitionData[model].unscaled_perSiteRates[i];
7122
static void copyLG4(tree *localTree, tree *tr, int model, const partitionLengths *pl)
7124
if(tr->partitionData[model].protModels == LG4 || tr->partitionData[model].protModels == LG4X)
7129
for(k = 0; k < 4; k++)
7131
memcpy(localTree->partitionData[model].EIGN_LG4[k], tr->partitionData[model].EIGN_LG4[k], pl->eignLength * sizeof(double));
7132
memcpy(localTree->partitionData[model].EV_LG4[k], tr->partitionData[model].EV_LG4[k], pl->evLength * sizeof(double));
7133
memcpy(localTree->partitionData[model].EI_LG4[k], tr->partitionData[model].EI_LG4[k], pl->eiLength * sizeof(double));
7134
memcpy(localTree->partitionData[model].substRates_LG4[k], tr->partitionData[model].substRates_LG4[k], pl->substRatesLength * sizeof(double));
7135
memcpy(localTree->partitionData[model].frequencies_LG4[k], tr->partitionData[model].frequencies_LG4[k], pl->frequenciesLength * sizeof(double));
7136
memcpy(localTree->partitionData[model].tipVector_LG4[k], tr->partitionData[model].tipVector_LG4[k], pl->tipVectorLength * sizeof(double));
7141
static void execFunction(tree *tr, tree *localTree, int tid, int n)
7143
double volatile result;
5758
7154
currentJob = threadJob >> 16;
5762
/*switch(threadJob) */
5764
case THREAD_NEWVIEW:
5767
sendTraversalInfo(localTree, tr);
5769
newviewIterative(localTree, startIndex, endIndex);
5771
newviewIterative(tr, startIndex, endIndex);
5775
/*****************************************************/
7158
case THREAD_INIT_PARTITION:
7159
initPartition(tr, localTree, tid);
7161
case THREAD_ALLOC_LIKELIHOOD:
7162
allocNodex(localTree, tid, n);
7163
threadFixModelIndices(tr, localTree, tid, n);
7165
case THREAD_FIX_MODEL_INDICES:
7166
threadFixModelIndices(tr, localTree, tid, n);
5777
7168
case THREAD_EVALUATE:
5780
sendTraversalInfo(localTree, tr);
5781
result = evaluateIterative(localTree, startIndex, endIndex);
5783
result = evaluateIterative(tr, startIndex, endIndex);
5787
reductionBuffer[tid] = result;
5790
/*****************************************************/
5792
case THREAD_SUM_MAKENEWZ:
5795
sendTraversalInfo(localTree, tr);
5796
makenewzIterative(localTree, startIndex, endIndex);
5798
makenewzIterative(tr, startIndex, endIndex);
5803
/*****************************************************/
5805
case THREAD_MAKENEWZ:
5810
localTree->modelNumber = tr->modelNumber;
5811
localTree->coreLZ = tr->coreLZ;
5813
if(localTree->multiBranch)
5814
execCore(localTree, &dlnLdlz, &d2lnLdlz2, localTree->strided_partitionData[localTree->modelNumber].lower,
5815
localTree->strided_partitionData[localTree->modelNumber].upper, localTree->modelNumber);
5817
execCore(localTree, &dlnLdlz, &d2lnLdlz2, startIndex, endIndex, localTree->modelNumber);
5822
int start = tr->partitionData[tr->modelNumber].lower;
5823
int end = tr->partitionData[tr->modelNumber].upper;
5825
calcBounds(tid, n, start, end, &l, &u);
5828
execCore(tr, &dlnLdlz, &d2lnLdlz2, l, u, tr->modelNumber);
5832
execCore(tr, &dlnLdlz, &d2lnLdlz2, startIndex, endIndex, tr->modelNumber);
5838
reductionBuffer[tid] = dlnLdlz;
5839
reductionBufferTwo[tid] = d2lnLdlz2;
5842
/*****************************************************/
5844
case THREAD_SUM_MAKENEWZ_PARTITION:
5846
int u, l, start, end;
7169
sendTraversalInfo(localTree, tr);
7170
result = evaluateIterative(localTree, FALSE);
7172
if(localTree->NumberOfModels > 1)
7174
for(model = 0; model < localTree->NumberOfModels; model++)
7175
reductionBuffer[tid * localTree->NumberOfModels + model] = localTree->perPartitionLH[model];
7178
reductionBuffer[tid] = result;
7182
for(model = 0; model < localTree->NumberOfModels; model++)
7183
localTree->executeModel[model] = TRUE;
7186
case THREAD_NEWVIEW_MASKED:
7187
sendTraversalInfo(localTree, tr);
7188
memcpy(localTree->executeModel, tr->executeModel, sizeof(boolean) * localTree->NumberOfModels);
7189
newviewIterative(localTree);
7192
for(model = 0; model < localTree->NumberOfModels; model++)
7193
localTree->executeModel[model] = TRUE;
7196
case THREAD_NEWVIEW:
7197
sendTraversalInfo(localTree, tr);
7198
newviewIterative(localTree);
7200
case THREAD_MAKENEWZ_FIRST:
7203
dlnLdlz[NUM_BRANCHES],
7204
d2lnLdlz2[NUM_BRANCHES];
7206
sendTraversalInfo(localTree, tr);
7209
memcpy(localTree->coreLZ, tr->coreLZ, sizeof(double) * localTree->numBranches);
7210
memcpy(localTree->executeModel, tr->executeModel, sizeof(boolean) * localTree->NumberOfModels);
7213
makenewzIterative(localTree);
7214
execCore(localTree, dlnLdlz, d2lnLdlz2);
7216
if(!tr->multiBranch)
7218
reductionBuffer[tid] = dlnLdlz[0];
7219
reductionBufferTwo[tid] = d2lnLdlz2[0];
7223
for(i = 0; i < (size_t)localTree->NumberOfModels; i++)
7225
reductionBuffer[tid * localTree->NumberOfModels + i] = dlnLdlz[i];
7226
reductionBufferTwo[tid * localTree->NumberOfModels + i] = d2lnLdlz2[i];
7232
for(model = 0; model < localTree->NumberOfModels; model++)
7233
localTree->executeModel[model] = TRUE;
7237
case THREAD_MAKENEWZ:
7240
dlnLdlz[NUM_BRANCHES],
7241
d2lnLdlz2[NUM_BRANCHES];
7243
memcpy(localTree->coreLZ, tr->coreLZ, sizeof(double) * localTree->numBranches);
7244
memcpy(localTree->executeModel, tr->executeModel, sizeof(boolean) * localTree->NumberOfModels);
7246
execCore(localTree, dlnLdlz, d2lnLdlz2);
7248
if(!tr->multiBranch)
7250
reductionBuffer[tid] = dlnLdlz[0];
7251
reductionBufferTwo[tid] = d2lnLdlz2[0];
7255
for(i = 0; i < (size_t)localTree->NumberOfModels; i++)
7257
reductionBuffer[tid * localTree->NumberOfModels + i] = dlnLdlz[i];
7258
reductionBufferTwo[tid * localTree->NumberOfModels + i] = d2lnLdlz2[i];
7263
for(model = 0; model < localTree->NumberOfModels; model++)
7264
localTree->executeModel[model] = TRUE;
7268
case THREAD_COPY_RATES:
7271
for(model = 0; model < localTree->NumberOfModels; model++)
7273
const partitionLengths *pl = getPartitionLengths(&(tr->partitionData[model]));
7275
memcpy(localTree->partitionData[model].EIGN, tr->partitionData[model].EIGN, pl->eignLength * sizeof(double));
7276
memcpy(localTree->partitionData[model].EV, tr->partitionData[model].EV, pl->evLength * sizeof(double));
7277
memcpy(localTree->partitionData[model].EI, tr->partitionData[model].EI, pl->eiLength * sizeof(double));
7278
memcpy(localTree->partitionData[model].tipVector, tr->partitionData[model].tipVector, pl->tipVectorLength * sizeof(double));
7280
copyLG4(localTree, tr, model, pl);
7284
case THREAD_OPT_RATE:
7287
memcpy(localTree->executeModel, tr->executeModel, localTree->NumberOfModels * sizeof(boolean));
7289
for(model = 0; model < localTree->NumberOfModels; model++)
7291
const partitionLengths *pl = getPartitionLengths(&(tr->partitionData[model]));
7293
memcpy(localTree->partitionData[model].EIGN, tr->partitionData[model].EIGN, pl->eignLength * sizeof(double));
7294
memcpy(localTree->partitionData[model].EV, tr->partitionData[model].EV, pl->evLength * sizeof(double));
7295
memcpy(localTree->partitionData[model].EI, tr->partitionData[model].EI, pl->eiLength * sizeof(double));
7296
memcpy(localTree->partitionData[model].tipVector, tr->partitionData[model].tipVector, pl->tipVectorLength * sizeof(double));
7298
copyLG4(localTree, tr, model, pl);
7302
result = evaluateIterative(localTree, FALSE);
7305
if(localTree->NumberOfModels > 1)
7307
for(model = 0; model < localTree->NumberOfModels; model++)
7308
reductionBuffer[tid * localTree->NumberOfModels + model] = localTree->perPartitionLH[model];
7311
reductionBuffer[tid] = result;
7316
for(model = 0; model < localTree->NumberOfModels; model++)
7317
localTree->executeModel[model] = TRUE;
7320
case THREAD_COPY_INVAR:
7323
for(model = 0; model < localTree->NumberOfModels; model++)
7324
localTree->partitionData[model].propInvariant = tr->partitionData[model].propInvariant;
7327
case THREAD_OPT_INVAR:
7330
memcpy(localTree->executeModel, tr->executeModel, localTree->NumberOfModels * sizeof(boolean));
7331
for(model = 0; model < localTree->NumberOfModels; model++)
7332
localTree->partitionData[model].propInvariant = tr->partitionData[model].propInvariant;
7335
result = evaluateIterative(localTree, FALSE);
7337
if(localTree->NumberOfModels > 1)
7339
for(model = 0; model < localTree->NumberOfModels; model++)
7340
reductionBuffer[tid * localTree->NumberOfModels + model] = localTree->perPartitionLH[model];
7343
reductionBuffer[tid] = result;
7347
for(model = 0; model < localTree->NumberOfModels; model++)
7348
localTree->executeModel[model] = TRUE;
7351
case THREAD_COPY_ALPHA:
7354
for(model = 0; model < localTree->NumberOfModels; model++)
7356
memcpy(localTree->partitionData[model].gammaRates, tr->partitionData[model].gammaRates, sizeof(double) * 4);
7357
localTree->partitionData[model].alpha = tr->partitionData[model].alpha;
7361
case THREAD_OPT_ALPHA:
7364
memcpy(localTree->executeModel, tr->executeModel, localTree->NumberOfModels * sizeof(boolean));
7365
for(model = 0; model < localTree->NumberOfModels; model++)
7366
memcpy(localTree->partitionData[model].gammaRates, tr->partitionData[model].gammaRates, sizeof(double) * 4);
7369
result = evaluateIterative(localTree, FALSE);
7372
if(localTree->NumberOfModels > 1)
7374
for(model = 0; model < localTree->NumberOfModels; model++)
7375
reductionBuffer[tid * localTree->NumberOfModels + model] = localTree->perPartitionLH[model];
7378
reductionBuffer[tid] = result;
7382
for(model = 0; model < localTree->NumberOfModels; model++)
7383
localTree->executeModel[model] = TRUE;
7386
case THREAD_RESET_MODEL:
7389
for(model = 0; model < localTree->NumberOfModels; model++)
7391
const partitionLengths *pl = getPartitionLengths(&(tr->partitionData[model]));
7393
memcpy(localTree->partitionData[model].EIGN, tr->partitionData[model].EIGN, pl->eignLength * sizeof(double));
7394
memcpy(localTree->partitionData[model].EV, tr->partitionData[model].EV, pl->evLength * sizeof(double));
7395
memcpy(localTree->partitionData[model].EI, tr->partitionData[model].EI, pl->eiLength * sizeof(double));
7396
memcpy(localTree->partitionData[model].substRates, tr->partitionData[model].substRates, pl->substRatesLength * sizeof(double));
7397
memcpy(localTree->partitionData[model].frequencies, tr->partitionData[model].frequencies, pl->frequenciesLength * sizeof(double));
7398
memcpy(localTree->partitionData[model].tipVector, tr->partitionData[model].tipVector, pl->tipVectorLength * sizeof(double));
7400
copyLG4(localTree, tr, model, pl);
7402
memcpy(localTree->partitionData[model].gammaRates, tr->partitionData[model].gammaRates, sizeof(double) * 4);
7403
localTree->partitionData[model].alpha = tr->partitionData[model].alpha;
7404
localTree->partitionData[model].brLenScaler = tr->partitionData[model].brLenScaler;
7405
localTree->partitionData[model].propInvariant = tr->partitionData[model].propInvariant;
7409
case THREAD_COPY_INIT_MODEL:
7412
localTree->rateHetModel = tr->rateHetModel;
7414
for(model = 0; model < localTree->NumberOfModels; model++)
7416
const partitionLengths *pl = getPartitionLengths(&(tr->partitionData[model]));
7418
memcpy(localTree->partitionData[model].EIGN, tr->partitionData[model].EIGN, pl->eignLength * sizeof(double));
7419
memcpy(localTree->partitionData[model].EV, tr->partitionData[model].EV, pl->evLength * sizeof(double));
7420
memcpy(localTree->partitionData[model].EI, tr->partitionData[model].EI, pl->eiLength * sizeof(double));
7421
memcpy(localTree->partitionData[model].substRates, tr->partitionData[model].substRates, pl->substRatesLength * sizeof(double));
7422
memcpy(localTree->partitionData[model].frequencies, tr->partitionData[model].frequencies, pl->frequenciesLength * sizeof(double));
7423
memcpy(localTree->partitionData[model].tipVector, tr->partitionData[model].tipVector, pl->tipVectorLength * sizeof(double));
7425
copyLG4(localTree, tr, model, pl);
7427
memcpy(localTree->partitionData[model].weights, tr->partitionData[model].weights, sizeof(double) * 4);
7428
memcpy(localTree->partitionData[model].gammaRates, tr->partitionData[model].gammaRates, sizeof(double) * 4);
7429
localTree->partitionData[model].alpha = tr->partitionData[model].alpha;
7430
localTree->partitionData[model].brLenScaler = tr->partitionData[model].brLenScaler;
7431
localTree->partitionData[model].propInvariant = tr->partitionData[model].propInvariant;
7432
localTree->partitionData[model].lower = tr->partitionData[model].lower;
7433
localTree->partitionData[model].upper = tr->partitionData[model].upper;
7435
localTree->partitionData[model].numberOfCategories = tr->partitionData[model].numberOfCategories;
7438
memcpy(localTree->cdta->patrat, tr->cdta->patrat, localTree->originalCrunchedLength * sizeof(double));
7439
memcpy(localTree->cdta->patratStored, tr->cdta->patratStored, localTree->originalCrunchedLength * sizeof(double));
7442
for(model = 0; model < localTree->NumberOfModels; model++)
7447
for(i = localTree->partitionData[model].lower, localIndex = 0; i < localTree->partitionData[model].upper; i++)
7448
if(i % (size_t)n == (size_t)tid)
7450
localTree->partitionData[model].wgt[localIndex] = tr->cdta->aliaswgt[i];
7451
localTree->partitionData[model].invariant[localIndex] = tr->invariant[i];
7457
case THREAD_RATE_CATS:
7458
sendTraversalInfo(localTree, tr);
7461
localTree->lower_spacing = tr->lower_spacing;
7462
localTree->upper_spacing = tr->upper_spacing;
7465
optRateCatPthreads(localTree, localTree->lower_spacing, localTree->upper_spacing, localTree->lhs, n, tid);
7469
collectDouble(tr->cdta->patrat, localTree->cdta->patrat, localTree, n, tid);
7470
collectDouble(tr->cdta->patratStored, localTree->cdta->patratStored, localTree, n, tid);
7471
collectDouble(tr->lhs, localTree->lhs, localTree, n, tid);
7474
case THREAD_COPY_RATE_CATS:
7477
memcpy(localTree->cdta->patrat, tr->cdta->patrat, localTree->originalCrunchedLength * sizeof(double));
7478
memcpy(localTree->cdta->patratStored, tr->cdta->patratStored, localTree->originalCrunchedLength * sizeof(double));
7479
broadcastPerSiteRates(tr, localTree);
7482
for(model = 0; model < localTree->NumberOfModels; model++)
7484
localTree->partitionData[model].numberOfCategories = tr->partitionData[model].numberOfCategories;
7486
for(localCounter = 0, i = localTree->partitionData[model].lower; i < localTree->partitionData[model].upper; i++)
7488
if(i % (size_t)n == (size_t)tid)
7490
localTree->partitionData[model].rateCategory[localCounter] = tr->cdta->rateCategory[i];
7496
case THREAD_CAT_TO_GAMMA:
7498
localTree->rateHetModel = tr->rateHetModel;
7500
case THREAD_GAMMA_TO_CAT:
7502
localTree->rateHetModel = tr->rateHetModel;
7504
case THREAD_EVALUATE_VECTOR:
7505
sendTraversalInfo(localTree, tr);
7506
result = evaluateIterative(localTree, TRUE);
7508
if(localTree->NumberOfModels > 1)
7510
for(model = 0; model < localTree->NumberOfModels; model++)
7511
reductionBuffer[tid * localTree->NumberOfModels + model] = localTree->perPartitionLH[model];
7514
reductionBuffer[tid] = result;
7518
for(model = 0; model < localTree->NumberOfModels; model++)
7519
localTree->executeModel[model] = TRUE;
7522
for(model = 0, globalCounter = 0; model < localTree->NumberOfModels; model++)
7524
for(localCounter = 0, i = localTree->partitionData[model].lower; i < localTree->partitionData[model].upper; i++)
7526
if(i % (size_t)n == (size_t)tid)
7528
tr->perSiteLL[globalCounter] = localTree->partitionData[model].perSiteLL[localCounter];
7535
case THREAD_COPY_PARAMS:
7538
for(model = 0; model < localTree->NumberOfModels; model++)
7540
const partitionLengths *pl = getPartitionLengths(&(tr->partitionData[model]));
7542
memcpy(localTree->partitionData[model].EIGN, tr->partitionData[model].EIGN, pl->eignLength * sizeof(double));
7543
memcpy(localTree->partitionData[model].EV, tr->partitionData[model].EV, pl->evLength * sizeof(double));
7544
memcpy(localTree->partitionData[model].EI, tr->partitionData[model].EI, pl->eiLength * sizeof(double));
7545
memcpy(localTree->partitionData[model].substRates, tr->partitionData[model].substRates, pl->substRatesLength * sizeof(double));
7546
memcpy(localTree->partitionData[model].frequencies, tr->partitionData[model].frequencies, pl->frequenciesLength * sizeof(double));
7547
memcpy(localTree->partitionData[model].tipVector, tr->partitionData[model].tipVector, pl->tipVectorLength * sizeof(double));
7549
copyLG4(localTree, tr, model, pl);
7554
case THREAD_INIT_EPA:
7557
localTree->leftRootNode = tr->leftRootNode;
7558
localTree->rightRootNode = tr->rightRootNode;
7559
localTree->wasRooted = tr->wasRooted;
7560
localTree->bInf = tr->bInf;
7561
localTree->numberOfBranches = tr->numberOfBranches;
7562
localTree->contiguousVectorLength = tr->contiguousVectorLength;
7563
localTree->contiguousScalingLength = tr->contiguousScalingLength;
7564
localTree->inserts = tr->inserts;
7565
localTree->numberOfTipsForInsertion = tr->numberOfTipsForInsertion;
7566
localTree->fracchange = tr->fracchange;
7567
localTree->rawFracchange = tr->rawFracchange;
7569
memcpy(localTree->partitionContributions, tr->partitionContributions, sizeof(double) * localTree->NumberOfModels);
7571
memcpy(localTree->fracchanges, tr->fracchanges, sizeof(double) * localTree->NumberOfModels);
7573
memcpy(localTree->rawFracchanges, tr->rawFracchanges, sizeof(double) * localTree->NumberOfModels);
7576
if(localTree->perPartitionEPA)
7578
localTree->readPartition = (int *)rax_malloc(sizeof(int) * (size_t)localTree->numberOfTipsForInsertion);
7579
memcpy(localTree->readPartition, tr->readPartition, sizeof(int) * (size_t)localTree->numberOfTipsForInsertion);
7584
localTree->temporarySumBuffer = (double *)rax_malloc(sizeof(double) * localTree->contiguousVectorLength);
7585
localTree->temporaryVector = (double *)rax_malloc(sizeof(double) * localTree->contiguousVectorLength);
7587
localTree->temporaryScaling = (int *)rax_malloc(sizeof(int) * localTree->contiguousScalingLength);
7590
localTree->contiguousWgt = (int*)rax_malloc(sizeof(int) * localTree->contiguousScalingLength);
7591
localTree->contiguousInvariant = (int*)rax_malloc(sizeof(int) * localTree->contiguousScalingLength);
7594
memcpy(localTree->contiguousWgt , tr->cdta->aliaswgt, sizeof(int) * localTree->contiguousScalingLength);
7595
memcpy(localTree->contiguousInvariant , tr->invariant, sizeof(int) * localTree->contiguousScalingLength);
7598
broadcastPerSiteRates(tr, localTree);
7601
localTree->contiguousRateCategory = (int*)rax_malloc(sizeof(int) * localTree->contiguousScalingLength);
7604
memcpy(localTree->contiguousRateCategory, tr->cdta->rateCategory, sizeof(int) * localTree->contiguousScalingLength);
7606
localTree->contiguousTips = tr->yVector;
5851
/* only required for a rarely used, undocumented function */
5854
start = tr->partitionData[tr->modelNumber].lower;
5855
end = tr->partitionData[tr->modelNumber].upper;
5856
calcBounds(tid, n, start, end, &l, &u);
5858
makenewzIterativePartition(tr, l, u, tr->modelNumber);
5862
/*****************************************************/
5864
case THREAD_MAKENEWZ_PARTITION:
7609
case THREAD_GATHER_LIKELIHOOD:
5866
int u, l, start, end;
5871
/* only required for a rarely used, undocumented function */
5875
start = tr->partitionData[tr->modelNumber].lower;
5876
end = tr->partitionData[tr->modelNumber].upper;
5877
calcBounds(tid, n, start, end, &l, &u);
5879
execCorePartition(tr, &dlnLdlz, &d2lnLdlz2, l, u, tr->modelNumber);
5880
reductionBuffer[tid] = dlnLdlz;
5881
reductionBufferTwo[tid] = d2lnLdlz2;
7612
branchCounter = tr->branchCounter;
7615
*leftContigousVector = localTree->bInf[branchCounter].epa->left,
7616
*rightContigousVector = localTree->bInf[branchCounter].epa->right;
7619
*leftContigousScalingVector = localTree->bInf[branchCounter].epa->leftScaling,
7620
*rightContigousScalingVector = localTree->bInf[branchCounter].epa->rightScaling,
7621
rightNumber = localTree->bInf[branchCounter].epa->rightNodeNumber,
7622
leftNumber = localTree->bInf[branchCounter].epa->leftNodeNumber;
7625
globalColumnCount = 0,
7628
for(model = 0; model < localTree->NumberOfModels; model++)
7634
*leftStridedVector = (double *)NULL,
7635
*rightStridedVector = (double *)NULL;
7638
*leftStridedScalingVector = (int *)NULL,
7639
*rightStridedScalingVector = (int *)NULL;
7642
localColumnCount = 0,
7645
if(!isTip(leftNumber, localTree->mxtips))
7647
leftStridedVector = localTree->partitionData[model].xVector[leftNumber - localTree->mxtips - 1];
7648
leftStridedScalingVector = localTree->partitionData[model].expVector[leftNumber - localTree->mxtips - 1];
7651
if(!isTip(rightNumber, localTree->mxtips))
7653
rightStridedVector = localTree->partitionData[model].xVector[rightNumber - localTree->mxtips - 1];
7654
rightStridedScalingVector = localTree->partitionData[model].expVector[rightNumber - localTree->mxtips - 1];
7657
assert(!(isTip(leftNumber, localTree->mxtips) && isTip(rightNumber, localTree->mxtips)));
7659
blockRequirements = (size_t)(tr->discreteRateCategories) * (size_t)(tr->partitionData[model].states);
7661
for(globalColumnCount = localTree->partitionData[model].lower; globalColumnCount < localTree->partitionData[model].upper; globalColumnCount++)
7663
if(globalColumnCount % (size_t)n == (size_t)tid)
7665
if(leftStridedVector)
7667
memcpy(&leftContigousVector[globalCount], &leftStridedVector[localCount], sizeof(double) * blockRequirements);
7668
leftContigousScalingVector[globalColumnCount] = leftStridedScalingVector[localColumnCount];
7671
if(rightStridedVector)
7673
memcpy(&rightContigousVector[globalCount], &rightStridedVector[localCount], sizeof(double) * blockRequirements);
7674
rightContigousScalingVector[globalColumnCount] = rightStridedScalingVector[localColumnCount];
7678
localCount += blockRequirements;
7683
globalCount += blockRequirements;
7686
assert(localColumnCount == localTree->partitionData[model].width);
7687
assert(localCount == (localTree->partitionData[model].width * (int)blockRequirements));
7692
case THREAD_INSERT_CLASSIFY:
7693
case THREAD_INSERT_CLASSIFY_THOROUGH:
7703
pthread_mutex_lock(&mutex);
7705
if(NumberOfJobs == 0)
7709
branchNumber = localTree->numberOfBranches - NumberOfJobs;
7713
pthread_mutex_unlock(&mutex);
7719
case THREAD_INSERT_CLASSIFY:
7720
addTraverseRobIterative(localTree, branchNumber);
7722
case THREAD_INSERT_CLASSIFY_THOROUGH:
7723
testInsertThoroughIterative(localTree, branchNumber);
7733
case THREAD_PREPARE_BIPS_FOR_PRINT:
7744
pthread_mutex_lock(&mutex);
7746
if(NumberOfJobs == 0)
7750
i = tr->consensusBipLen - NumberOfJobs;
7754
pthread_mutex_unlock(&mutex);
7759
*bipA = tr->consensusBips[i] ;
7764
while(firstIndex < tr->bitVectorLength && bipA->bitVector[firstIndex] == 0 )
7768
for(j = i + 1; j < tr->consensusBipLen; j++)
7771
*bipB = tr->consensusBips[j];
7773
if(bipA->amountTips < bipB->amountTips &&
7774
issubset(bipA->bitVector, bipB->bitVector, tr->bitVectorLength, firstIndex))
7776
/* i is child of j */
7778
*elem = (List*) rax_malloc(sizeof(List));
7780
elem->value = rax_calloc(1, sizeof(int));
7782
*(int*)elem->value = i;
7784
pthread_mutex_lock(tr->mutexesForHashing[j]); /* LOCKED */
7786
tr->hasAncestor[i] = TRUE;
7788
elem->next = tr->listOfDirectChildren[j];
7789
tr->listOfDirectChildren[j] = elem;
7791
pthread_mutex_unlock(tr->mutexesForHashing[j]); /* UNLOCKED */
7793
break; /* each node has only 1 parent -> nothing more to do */
7800
case THREAD_MRE_COMPUTE:
7804
/* worker threads */
7805
boolean done = FALSE;
7806
int localEntryCount = (int) tr->h->entryCount; /* problem? */
7809
int acquiredJobs = 0;
7814
pthread_mutex_lock(&mutex) ; /* START LOCK */
7816
if( NumberOfJobs == 0 )
7822
if( localEntryCount - NumberOfJobs + tr->recommendedAmountJobs < tr->sectionEnd)
7824
/* try to acquire the recommended amount of jobs */
7825
jobId = localEntryCount - NumberOfJobs;
7826
acquiredJobs = tr->recommendedAmountJobs;
7827
NumberOfJobs -= acquiredJobs;
7830
if( localEntryCount - NumberOfJobs < (signed int)tr->sectionEnd)
7832
/* at least get one job */
7833
jobId = tr->h->entryCount - NumberOfJobs;
7838
pthread_mutex_unlock(&mutex); /* END LOCK */
7840
if(*(tr->len) >= tr->maxBips)
7844
while(acquiredJobs > 0)
7850
*currentEntry = tr->sbw[jobId];
7854
if(!((unsigned int)tr->mr_thresh < currentEntry->supportFromTreeset[0]))
7856
for(k = *(tr->len); k > 0; k--)
7858
if(! compatible(tr->sbi[k-1], currentEntry, tr->bitVectorLength))
7866
tr->bipStatus[jobId - tr->sectionEnd + tr->bipStatusLen] = MRE_POSSIBLE_CANDIDATE; /* ready to check */
7868
tr->bipStatus[jobId - tr->sectionEnd + tr->bipStatusLen] = MRE_EXCLUDED; /* can be omitted */
7878
/* check in a looping manner, if bipartitions could be added */
7889
/* get highest bip to check */
7891
while(highestToCheck < tr->bipStatusLen)
7893
/* waits busily as long as there is nothing to do */
7894
/* printf("%d is highest to check\n", highestToCheck); */
7895
if( ! tr->bipStatus[highestToCheck] )
7898
if(tr->bipStatus[highestToCheck] == MRE_POSSIBLE_CANDIDATE)
7905
if( tmpCounter >= tr->maxBips ||
7906
(highestToCheck == tr->bipStatusLen /* end of buffer that is examined */
7907
&& (unsigned int)tr->sectionEnd == tr->h->entryCount /* the end of the buffer is also the hashtable */
7908
&& tr->bipStatus[highestToCheck-1] > MRE_POSSIBLE_CANDIDATE))
7910
/* the last entry in buffer was already processed */
7911
*(tr->len) = tmpCounter; /* for the workers to finish */
7912
break; /* master says goodbye */
7915
/* reset section (resp. the buffer to be checked) */
7917
if( highestToCheck == tr->bipStatusLen)
7924
*(tr->len) = tmpCounter; /* reset counter for workers */
7925
tr->entriesOfSection = &(tr->sbw[tr->sectionEnd ]);
7927
/* find new section end: tries to find a new window
7928
size (and resp. sectionEnd) s.t. the expected
7929
amount of work for master and workers is the same.
7931
density /= tr->bipStatusLen;
7933
/* I am not entirely sure, if this makes the code really incredible faster... */
7934
max = 5 * (NumberOfThreads-1);
7936
tr->recommendedAmountJobs = (int)(max + (min - max) * density); /* recommend an amount of jobs to be calculate per thread between min and max */
7941
tmp = MAX((2 * tmpCounter * SECTION_CONSTANT / (NumberOfThreads * density)), /* the above discussed formula */
7942
NumberOfThreads * MRE_MIN_AMOUNT_JOBS_PER_THREAD ); /* we need at least a bit work */
7943
newSectionEnd = MIN(tr->sectionEnd + tmp, (int)(tr->h->entryCount));
7946
newSectionEnd = tr->h->entryCount;
7950
tr->bipStatusLen = newSectionEnd - tr->sectionEnd;
7951
rax_free(tr->bipStatus);
7952
/* printf("%d\n" ,tr->bipStatusLen); */
7953
tr->bipStatus = (int*)rax_calloc(tr->bipStatusLen, sizeof(int));
7954
tr->sectionEnd = newSectionEnd;
7958
assert( tr->bipStatus[highestToCheck] == MRE_POSSIBLE_CANDIDATE);
7960
for(i = highestToCheck; i > 0; i--) /* checking new bip */
7962
assert(tr->bipStatus[i-1] == MRE_ADDED || tr->bipStatus[i-1] == MRE_EXCLUDED);
7964
if(tr->bipStatus[i-1] == MRE_ADDED
7965
&& ! compatible(tr->entriesOfSection[i-1], tr->entriesOfSection[highestToCheck], tr->bitVectorLength))
7967
tr->bipStatus[highestToCheck] = MRE_EXCLUDED;
7972
if(i == 0) /* accepting */
7974
tr->bipStatus[highestToCheck] = MRE_ADDED;
7975
tr->sbi[tmpCounter] = tr->entriesOfSection[highestToCheck];
7983
case THREAD_NEWVIEW_ANCESTRAL:
7984
sendTraversalInfo(localTree, tr);
7985
newviewIterativeAncestral(localTree);
7987
case THREAD_GATHER_ANCESTRAL:
7990
*contigousVector = tr->ancestralStates;
7993
globalColumnCount = 0,
7996
for(model = 0; model < localTree->NumberOfModels; model++)
8004
localColumnCount = 0,
8008
*stridedVector = localTree->partitionData[model].sumBuffer;
8010
if(tr->rateHetModel == CAT)
8015
blockRequirements = (size_t)(rateHet) * (size_t)(tr->partitionData[model].states);
8017
for(globalColumnCount = localTree->partitionData[model].lower; globalColumnCount < localTree->partitionData[model].upper; globalColumnCount++)
8019
if(globalColumnCount % (size_t)n == (size_t)tid)
8021
memcpy(&contigousVector[globalCount], &stridedVector[localCount], sizeof(double) * blockRequirements);
8024
localCount += blockRequirements;
8027
globalCount += blockRequirements;
8030
assert(localColumnCount == localTree->partitionData[model].width);
8031
assert(localCount == (localTree->partitionData[model].width * (int)blockRequirements));
8035
case THREAD_OPT_SCALER:
8038
memcpy(localTree->executeModel, tr->executeModel, localTree->NumberOfModels * sizeof(boolean));
8040
for(model = 0; model < localTree->NumberOfModels; model++)
8041
localTree->partitionData[model].brLenScaler = tr->partitionData[model].brLenScaler;
8044
result = evaluateIterative(localTree, FALSE);
8046
if(localTree->NumberOfModels > 1)
8048
for(model = 0; model < localTree->NumberOfModels; model++)
8049
reductionBuffer[tid * localTree->NumberOfModels + model] = localTree->perPartitionLH[model];
8052
reductionBuffer[tid] = result;
8056
for(model = 0; model < localTree->NumberOfModels; model++)
8057
localTree->executeModel[model] = TRUE;
5885
/*****************************************************/
5887
case THREAD_NEWVIEW_PARTITION:
5890
localTree->modelNumber = tr->modelNumber;
5891
sendTraversalInfo(localTree, tr);
5893
newviewIterativePartition(localTree, localTree->strided_partitionData[localTree->modelNumber].lower,
5894
localTree->strided_partitionData[localTree->modelNumber].upper, localTree->modelNumber);
5898
int start = tr->partitionData[tr->modelNumber].lower;
5899
int end = tr->partitionData[tr->modelNumber].upper;
5900
calcBounds(tid, n, start, end, &l, &u);
5902
newviewIterativePartition(tr, l, u, tr->modelNumber);
5907
/*****************************************************/
5909
case THREAD_EVALUATE_PARTITION:
5913
localTree->modelNumber = tr->modelNumber;
5914
sendTraversalInfo(localTree, tr);
5919
result = evaluateIterativePartition(localTree, localTree->strided_partitionData[localTree->modelNumber].lower,
5920
localTree->strided_partitionData[localTree->modelNumber].upper,
5921
localTree->modelNumber);
5925
int start = tr->partitionData[tr->modelNumber].lower;
5926
int end = tr->partitionData[tr->modelNumber].upper;
5927
calcBounds(tid, n, start, end, &l, &u);
5928
result = evaluateIterativePartition(tr, l, u, tr->modelNumber);
5933
reductionBuffer[tid] = result;
5936
/*****************************************************/
5938
case THREAD_RATE_CATS:
5943
sendTraversalInfo(localTree, tr);
5944
localTree->lower_spacing = tr->lower_spacing;
5945
localTree->upper_spacing = tr->upper_spacing;
5947
optRateCat_LOCAL(localTree, startIndex, endIndex,
5948
localTree->lower_spacing, localTree->upper_spacing, localTree->strided_lhs);
5952
collectDouble(tr->cdta->patrat, localTree->strided_patrat, tr->cdta->endsite, n, tid);
5953
collectDouble(tr->cdta->patratStored, localTree->strided_patratStored, tr->cdta->endsite, n, tid);
5954
collectDouble(tr->lhs, localTree->strided_lhs, tr->cdta->endsite, n, tid);
5958
for(i = 0; i < tr->cdta->endsite; i++)
5960
optRateCat(tr, i, tr->lower_spacing, tr->upper_spacing, tr->lhs);
5965
/*****************************************************/
5967
case THREAD_NEWVIEW_PARSIMONY:
5971
sendTraversalInfo(localTree, tr);
5972
newviewParsimonyIterative(localTree, parsimonyStartIndex, parsimonyEndIndex);
5974
newviewParsimonyIterative(tr, parsimonyStartIndex, parsimonyEndIndex);
5979
/*****************************************************/
5981
case THREAD_EVALUATE_PARSIMONY:
5985
sendTraversalInfo(localTree, tr);
5986
parsimonyResult = evaluateParsimonyIterative(localTree, parsimonyStartIndex, parsimonyEndIndex);
5988
parsimonyResult = evaluateParsimonyIterative(tr, parsimonyStartIndex, parsimonyEndIndex);
5992
reductionBufferParsimony[tid] = parsimonyResult;
5996
/*****************************************************/
5998
case THREAD_EVALUATE_VECTOR:
6000
sendTraversalInfo(localTree, tr);
6001
evaluateGenericVectorIterative(localTree, startIndex, endIndex);
6003
collectDouble(tr->siteLL_Vector, localTree->strided_siteLL_Vector, tr->cdta->endsite, n, tid);
6006
/* rarely used function */
6009
evaluateGenericVectorIterative(tr, startIndex, endIndex);
6013
/*****************************************************/
6015
case THREAD_CATEGORIZE:
6021
sendTraversalInfo(localTree, tr);
6023
for(i = 0; i < localTree->NumberOfModels; i++)
6025
localTree->strided_patrat[i * 4] = localTree->gammaRates[i * 4];
6026
localTree->strided_patrat[i * 4 + 1] = localTree->gammaRates[i * 4 + 1];
6027
localTree->strided_patrat[i * 4 + 2] = localTree->gammaRates[i * 4 + 2];
6028
localTree->strided_patrat[i * 4 + 3] = localTree->gammaRates[i * 4 + 3];
6029
assert(i * 4 + 3 < localTree->originalCrunchedLength);
6032
localTree->NumberOfCategories = 4 * localTree->NumberOfModels;
6033
categorizeIterative(localTree, startIndex, endIndex);
6035
for(i = startIndex; i < endIndex; i++)
6038
temp = localTree->gammaRates[localTree->strided_rateCategory[i]];
6039
localTree->strided_wr[i] = wtemp = temp * localTree->strided_aliaswgt[i];
6040
localTree->strided_wr2[i] = temp * wtemp;
6044
categorizeIterative(tr, startIndex, endIndex);
6048
/*****************************************************/
6052
case THREAD_PREPARE_PARSIMONY:
6053
/*printf("THREAD_PREPARE_PARSIMONY\n"); */
6056
localTree->parsimonyLength = tr->parsimonyLength;
6057
memcpy(localTree->partitionData , tr->partitionData, sizeof(pInfo) * localTree->NumberOfModels);
6060
strideInt(localTree->strided_model, tr->model,
6061
localTree->originalCrunchedLength, n, tid);
6062
strideInt(localTree->strided_dataVector, tr->dataVector,
6063
localTree->originalCrunchedLength, n, tid);
6064
stridePartitionData(localTree, tid, n, localTree->parsimonyLength);
6066
strideTips(localTree->strided_yVector, tr->yVector, localTree->originalCrunchedLength, n, tid,
6067
localTree->mxtips, localTree->strideLength);
6068
strideInt(localTree->strided_aliaswgt, tr->cdta->aliaswgt,
6069
localTree->originalCrunchedLength, n, tid);
6071
localTree->mySpan = 1 + (localTree->parsimonyLength / n);
6072
localTree->parsimonyData = (parsimonyVector *)malloc(sizeof(parsimonyVector) *
6073
localTree->mxtips * localTree->mySpan);
6077
/*****************************************************/
6079
case THREAD_FINISH_PARSIMONY:
6080
free(localTree->parsimonyData);
6084
localTree->cdta->endsite = tr->cdta->endsite;
6085
memcpy(localTree->partitionData , tr->partitionData, sizeof(pInfo) * localTree->NumberOfModels);
6087
strideInt(localTree->strided_model, tr->model,
6088
localTree->originalCrunchedLength, n, tid);
6089
strideInt(localTree->strided_dataVector, tr->dataVector,
6090
localTree->originalCrunchedLength, n, tid);
6091
stridePartitionData(localTree, tid, n, localTree->cdta->endsite);
6093
strideTips(localTree->strided_yVector, tr->yVector, localTree->originalCrunchedLength, n, tid,
6094
localTree->mxtips, localTree->strideLength);
6095
strideInt(localTree->strided_aliaswgt, tr->cdta->aliaswgt,
6096
localTree->originalCrunchedLength, n, tid);
6100
/*****************************************************/
6102
case THREAD_ALLOC_LIKELIHOOD:
6103
/*printf("THREAD_ALLOC_LIKELIHOOD\n");*/
6107
localTree->likelihoodFunction = tr->likelihoodFunction;
6108
localTree->currentModel = tr->currentModel;
6109
localTree->cdta->endsite = tr->cdta->endsite;
6110
localTree->mySpan = 1 + (localTree->cdta->endsite / n);
6112
localTree->expArray = (int *)malloc(localTree->mySpan * localTree->mxtips * sizeof(int));
6114
if(localTree->mixedData)
6120
switch(localTree->currentModel)
6123
span = 20 * localTree->mySpan;
6124
localTree->sumBuffer = (double *)malloc(20 * localTree->strideLength * sizeof(double));
6127
span = 80 * localTree->mySpan;
6128
localTree->sumBuffer = (double *)malloc(80 * localTree->strideLength * sizeof(double));
6131
span = 16 * localTree->mySpan;
6132
localTree->sumBuffer = (double *)malloc(16 * localTree->strideLength * sizeof(double));
6135
span = 4 * localTree->mySpan;
6136
localTree->sumBuffer = (double *)malloc(4 * localTree->strideLength * sizeof(double));
6141
localTree->likelihoodArray = (double *)malloc(span * localTree->mxtips * sizeof(double));
6145
for(i = 0; i < localTree->mxtips; i++)
6146
localTree->xVector[i] = &(localTree->likelihoodArray[i * span]);
6150
/*****************************************************/
6152
case THREAD_FREE_LIKELIHOOD:
6153
free(localTree->expArray);
6154
free(localTree->likelihoodArray);
6155
free(localTree->sumBuffer);
6156
localTree->expArray = (int*)NULL;
6157
localTree->likelihoodArray = (double*)NULL;
6158
localTree->sumBuffer = (double*)NULL;
6161
/*****************************************************/
6163
case THREAD_COPY_REVERSIBLE:
6166
memcpy(localTree->tipVectorDNA, tr->tipVectorDNA, localTree->NumberOfModels * 64 * sizeof(double));
6167
memcpy(localTree->tipVectorAA, tr->tipVectorAA, localTree->NumberOfModels * 460 * sizeof(double));
6169
memcpy(localTree->EV_DNA, tr->EV_DNA, localTree->NumberOfModels * 16 * sizeof(double));
6170
memcpy(localTree->EV_AA, tr->EV_AA, localTree->NumberOfModels * 400 * sizeof(double));
6172
memcpy(localTree->EI_DNA, tr->EI_DNA, localTree->NumberOfModels * 12 * sizeof(double));
6173
memcpy(localTree->EI_AA, tr->EI_AA, localTree->NumberOfModels * 380 * sizeof(double));
6175
memcpy(localTree->EIGN_DNA, tr->EIGN_DNA, localTree->NumberOfModels * 3 * sizeof(double));
6176
memcpy(localTree->EIGN_AA, tr->EIGN_AA, localTree->NumberOfModels * 19 * sizeof(double));
6180
/*****************************************************/
6182
case THREAD_COPY_RATE_CATS:
6184
localTree->NumberOfCategories = tr->NumberOfCategories;
6186
strideInt(localTree->strided_rateCategory, tr->cdta->rateCategory,
6187
localTree->originalCrunchedLength, n, tid);
6189
memcpy(localTree->strided_patrat, tr->cdta->patrat, localTree->originalCrunchedLength * sizeof(double));
6191
strideDouble(localTree->strided_patratStored, tr->cdta->patratStored,
6192
localTree->originalCrunchedLength, n, tid);
6193
strideDouble(localTree->strided_wr, tr->cdta->wr,
6194
localTree->originalCrunchedLength, n, tid);
6195
strideDouble(localTree->strided_wr2, tr->cdta->wr2,
6196
localTree->originalCrunchedLength, n, tid);
6200
/*****************************************************/
6202
case THREAD_COPY_GAMMA_RATES:
6204
memcpy(localTree->gammaRates, tr->gammaRates, localTree->NumberOfModels * 4 * sizeof(double));
6207
/*****************************************************/
6209
case THREAD_COPY_INVARIANTS:
6211
memcpy(localTree->invariants, tr->invariants, localTree->NumberOfModels * sizeof(double));
6214
/*****************************************************/
6216
case THREAD_COPY_INIT_MODEL:
6217
/* printf("THREAD_COPY_INIT_MODEL\n"); */
6220
localTree->NumberOfCategories = tr->NumberOfCategories;
6221
localTree->likelihoodFunction = tr->likelihoodFunction;
6222
localTree->cdta->endsite = tr->cdta->endsite;
6224
memcpy(localTree->tipVectorDNA, tr->tipVectorDNA, localTree->NumberOfModels * 64 * sizeof(double));
6225
memcpy(localTree->tipVectorAA, tr->tipVectorAA, localTree->NumberOfModels * 460 * sizeof(double));
6227
memcpy(localTree->EV_DNA, tr->EV_DNA, localTree->NumberOfModels * 16 * sizeof(double));
6228
memcpy(localTree->EV_AA, tr->EV_AA, localTree->NumberOfModels * 400 * sizeof(double));
6230
memcpy(localTree->EI_DNA, tr->EI_DNA, localTree->NumberOfModels * 12 * sizeof(double));
6231
memcpy(localTree->EI_AA, tr->EI_AA, localTree->NumberOfModels * 380 * sizeof(double));
6233
memcpy(localTree->EIGN_DNA, tr->EIGN_DNA, localTree->NumberOfModels * 3 * sizeof(double));
6234
memcpy(localTree->EIGN_AA, tr->EIGN_AA, localTree->NumberOfModels * 19 * sizeof(double));
6236
memcpy(localTree->frequencies_DNA, tr->frequencies_DNA, localTree->NumberOfModels * 4 * sizeof(double));
6237
memcpy(localTree->frequencies_AA, tr->frequencies_AA, localTree->NumberOfModels * 20 * sizeof(double));
6239
memcpy(localTree->invariants, tr->invariants, localTree->NumberOfModels * sizeof(double));
6241
memcpy(localTree->gammaRates, tr->gammaRates, localTree->NumberOfModels * 4 * sizeof(double));
6243
memcpy(localTree->partitionData , tr->partitionData, sizeof(pInfo) * localTree->NumberOfModels);
6246
strideInt(localTree->strided_model, tr->model,
6247
localTree->originalCrunchedLength, n, tid);
6248
strideInt(localTree->strided_dataVector, tr->dataVector,
6249
localTree->originalCrunchedLength, n, tid);
6250
stridePartitionData(localTree, tid, n, localTree->cdta->endsite);
6252
strideInt(localTree->strided_rateCategory, tr->cdta->rateCategory,
6253
localTree->originalCrunchedLength, n, tid);
6255
strideInt(localTree->strided_aliaswgt, tr->cdta->aliaswgt,
6256
localTree->originalCrunchedLength, n, tid);
6259
strideInt(localTree->strided_invariant, tr->invariant,
6260
localTree->originalCrunchedLength, n, tid);
6262
memcpy(localTree->strided_patrat, tr->cdta->patrat, localTree->originalCrunchedLength * sizeof(double));
6264
strideDouble(localTree->strided_patratStored, tr->cdta->patratStored,
6265
localTree->originalCrunchedLength, n, tid);
6266
strideDouble(localTree->strided_wr, tr->cdta->wr,
6267
localTree->originalCrunchedLength, n, tid);
6268
strideDouble(localTree->strided_wr2, tr->cdta->wr2,
6269
localTree->originalCrunchedLength, n, tid);
6271
strideTips(localTree->strided_yVector, tr->yVector, localTree->originalCrunchedLength, n, tid,
6272
localTree->mxtips, localTree->strideLength);
6275
/*****************************************************/
6277
case THREAD_NEXT_REPLICATE:
6278
/* printf("THREAD_NEXT_REPLICATE\n"); */
6281
localTree->cdta->endsite = tr->cdta->endsite;
6282
memcpy(localTree->partitionData , tr->partitionData, sizeof(pInfo) * localTree->NumberOfModels);
6285
strideInt(localTree->strided_model, tr->model,
6286
localTree->originalCrunchedLength, n, tid);
6287
strideInt(localTree->strided_dataVector, tr->dataVector,
6288
localTree->originalCrunchedLength, n, tid);
6289
stridePartitionData(localTree, tid, n, localTree->cdta->endsite);
6291
strideInt(localTree->strided_aliaswgt, tr->cdta->aliaswgt,
6292
localTree->originalCrunchedLength, n, tid);
6293
strideInt(localTree->strided_rateCategory, tr->cdta->rateCategory,
6294
localTree->originalCrunchedLength, n, tid);
6296
strideDouble(localTree->strided_wr, tr->cdta->wr,
6297
localTree->originalCrunchedLength, n, tid);
6298
strideDouble(localTree->strided_wr2, tr->cdta->wr2,
6299
localTree->originalCrunchedLength, n, tid);
6301
memcpy(localTree->strided_patrat, tr->cdta->patrat, localTree->originalCrunchedLength * sizeof(double));
6303
strideTips(localTree->strided_yVector, tr->yVector, localTree->originalCrunchedLength, n, tid,
6304
localTree->mxtips, localTree->strideLength);
8060
case THREAD_COPY_LG4X_RATES:
8063
for(model = 0; model < localTree->NumberOfModels; model++)
8065
memcpy(localTree->partitionData[model].weights, tr->partitionData[model].weights, sizeof(double) * 4);
8066
memcpy(localTree->partitionData[model].gammaRates, tr->partitionData[model].gammaRates, sizeof(double) * 4);
8070
case THREAD_OPT_LG4X_RATES:
8073
memcpy(localTree->executeModel, tr->executeModel, localTree->NumberOfModels * sizeof(boolean));
8075
for(model = 0; model < localTree->NumberOfModels; model++)
8077
memcpy(localTree->partitionData[model].weights, tr->partitionData[model].weights, sizeof(double) * 4);
8078
memcpy(localTree->partitionData[model].gammaRates, tr->partitionData[model].gammaRates, sizeof(double) * 4);
8083
result = evaluateIterative(localTree, FALSE);
8085
if(localTree->NumberOfModels > 1)
8087
for(model = 0; model < localTree->NumberOfModels; model++)
8088
reductionBuffer[tid * localTree->NumberOfModels + model] = localTree->perPartitionLH[model];
8091
reductionBuffer[tid] = result;
8095
for(model = 0; model < localTree->NumberOfModels; model++)
8096
localTree->executeModel[model] = TRUE;
8100
printf("Job %d\n", currentJob);
6315
void masterBarrier(int jobType, tree *tr)
8108
void masterBarrier(int jobType, tree *tr)
6317
const int n = NumberOfThreads;
6318
int startIndex, endIndex, i, sum,
6319
parsimonyStartIndex, parsimonyEndIndex;
6322
jobCycle = !jobCycle;
8111
n = NumberOfThreads;
8117
jobCycle = !jobCycle;
6323
8118
threadJob = (jobType << 16) + jobCycle;
6327
threadJob = jobType;
6328
jobCycle = !jobCycle;
6332
strided_Bounds(0, tr->cdta->endsite, n, &startIndex, &endIndex);
6333
strided_Bounds(0, tr->parsimonyLength, n, &parsimonyStartIndex, &parsimonyEndIndex);
6335
calcBounds(0, n, 0, tr->parsimonyLength, &parsimonyStartIndex, &parsimonyEndIndex);
6336
calcBounds(0, n, 0, tr->cdta->endsite, &startIndex, &endIndex);
6339
execFunction(tr, tr, startIndex, endIndex, parsimonyStartIndex, parsimonyEndIndex, 0, n);
8120
execFunction(tr, tr, 0, n);
6343
8125
for(i = 1, sum = 1; i < n; i++)
6344
8126
sum += barrierBuffer[i];
6348
8130
for(i = 1; i < n; i++)
6349
8131
barrierBuffer[i] = 0;
6350
/*threadJob = -1; */
6356
static void allocStrides(tree *tr)
8134
#ifndef _PORTABLE_PTHREADS
8136
static void pinToCore(int tid)
8141
CPU_SET(tid, &cpuset);
6360
if(tr->numBranches < NUM_BRANCHES)
8143
if(pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset) != 0)
6362
printf("PERFORMANCE WARNING: for optimal efficiency on this dataset\n");
6363
printf("set NUM_BRANCHES to %d in file axml.h an re-compile\n", tr->numBranches);
8145
printBothOpen("\n\nThere was a problem finding a physical core for thread number %d to run on.\n", tid);
8146
printBothOpen("Probably this happend because you are trying to run more threads than you have cores available,\n");
8147
printBothOpen("which is a thing you should never ever do again, good bye .... \n\n");
6366
tr->strideLength = 1 + (tr->originalCrunchedLength / NumberOfThreads);
6368
tr->strided_y0 = (char *)malloc(tr->strideLength * tr->mxtips * sizeof(char));
6369
tr->strided_yVector = (char **)malloc((tr->mxtips + 1) * sizeof(char *));
6371
for(i = 0; i < tr->mxtips; i++)
6372
tr->strided_yVector[i + 1] = &(tr->strided_y0[tr->strideLength * i]);
6374
tr->strided_aliaswgt = (int *)malloc(sizeof(int) * tr->strideLength);
6375
tr->strided_invariant = (int *)malloc(sizeof(int) * tr->strideLength);
6376
tr->strided_model = (int *)malloc(sizeof(int) * tr->strideLength);
6377
tr->strided_rateCategory = (int *)malloc(sizeof(int) * tr->strideLength);
6378
tr->strided_dataVector = (int *)malloc(sizeof(int) * tr->strideLength);
6380
tr->strided_wr = (double *)malloc(sizeof(double) * tr->strideLength);
6381
tr->strided_wr2 = (double *)malloc(sizeof(double) * tr->strideLength);
6382
tr->strided_siteLL_Vector = (double *)malloc(sizeof(double) * tr->strideLength);
6384
/* this is a bit ugly here */
6386
tr->strided_patrat = (double *)malloc(sizeof(double) * tr->originalCrunchedLength);
6390
tr->strided_patratStored = (double *)malloc(sizeof(double) * tr->strideLength);
6391
tr->strided_lhs = (double *)malloc(sizeof(double) * tr->strideLength);
6393
tr->strided_partitionData = (pInfo*)malloc(sizeof(pInfo) * tr->NumberOfModels);
6398
8154
static void *likelihoodThread(void *tData)
6400
threadData *td = (threadData*)tData;
6402
tree *localTree = (tree *)malloc(sizeof(tree));
6405
parsimonyStartIndex,
8156
threadData *td = (threadData*)tData;
8159
*localTree = (tree *)rax_malloc(sizeof(tree));
6411
const int n = NumberOfThreads;
6412
const int tid = td->threadNumber;
6415
cruncheddata *cdta = (cruncheddata *)malloc(sizeof(cruncheddata));
8164
n = NumberOfThreads,
8165
tid = td->threadNumber;
8167
#ifndef _PORTABLE_PTHREADS
6423
localTree->expArray = (int*)NULL;
6424
localTree->likelihoodArray = (double*)NULL;
6425
localTree->sumBuffer = (double*)NULL;
6426
localTree->cdta = cdta;
6427
localTree->mixedData = tr->mixedData;
6428
localTree->NumberOfModels = tr->NumberOfModels;
6429
localTree->mxtips = tr->mxtips;
6430
localTree->originalCrunchedLength = tr->originalCrunchedLength;
6431
localTree->multiBranch = tr->multiBranch;
6432
localTree->numBranches = tr->numBranches;
6434
localTree->tipVectorDNA = (double *)malloc(localTree->NumberOfModels * 64 * sizeof(double));
6435
localTree->tipVectorAA = (double *)malloc(localTree->NumberOfModels * 460 * sizeof(double));
6437
localTree->EV_DNA = (double *)malloc(localTree->NumberOfModels * 16 * sizeof(double));
6438
localTree->EV_AA = (double *)malloc(localTree->NumberOfModels * 400 * sizeof(double));
6440
localTree->EI_DNA = (double *)malloc(localTree->NumberOfModels * 12 * sizeof(double));
6441
localTree->EI_AA = (double *)malloc(localTree->NumberOfModels * 380 * sizeof(double));
6443
localTree->EIGN_DNA = (double *)malloc(localTree->NumberOfModels * 3 * sizeof(double));
6444
localTree->EIGN_AA = (double *)malloc(localTree->NumberOfModels * 19 * sizeof(double));
6446
localTree->frequencies_DNA = (double *)malloc(localTree->NumberOfModels * 4 * sizeof(double));
6447
localTree->frequencies_AA = (double *)malloc(localTree->NumberOfModels * 20 * sizeof(double));
6449
localTree->initialRates_DNA = (double *)malloc(localTree->NumberOfModels * 5 * sizeof(double));
6450
localTree->initialRates_AA = (double *)malloc(localTree->NumberOfModels * 190 * sizeof(double));
6452
localTree->xVector = (double **)malloc(localTree->mxtips * sizeof(double *));
6454
localTree->gammaRates = (double *)malloc(localTree->NumberOfModels * 4 * sizeof(double));
6455
localTree->invariants = (double *)malloc(localTree->NumberOfModels * sizeof(double));
6456
localTree->model = (int *) malloc(localTree->originalCrunchedLength * sizeof(int));
6458
localTree->partitionData = (pInfo*)malloc(sizeof(pInfo) * localTree->NumberOfModels);
6460
localTree->td[0].count = 0;
6461
localTree->td[0].ti = (traversalInfo *)malloc(sizeof(traversalInfo) * localTree->mxtips);
6463
localTree->NumberOfCategories = tr->NumberOfCategories;
6465
allocStrides(localTree);
6470
8171
printf("\nThis is RAxML Worker Pthread Number: %d\n", tid);
6476
while (myCycle == jobCycle);
6481
8175
while (myCycle == threadJob);
6482
8176
myCycle = threadJob;
6485
strided_Bounds(tid, localTree->cdta->endsite, n, &startIndex, &endIndex);
6486
strided_Bounds(tid, localTree->parsimonyLength, n, &parsimonyStartIndex, &parsimonyEndIndex);
6488
calcBounds(tid, n, 0, tr->cdta->endsite, &startIndex, &endIndex);
6489
calcBounds(tid, n, 0, tr->parsimonyLength, &parsimonyStartIndex, &parsimonyEndIndex);
8178
execFunction(tr, localTree, tid, n);
6492
execFunction(tr, localTree, startIndex, endIndex, parsimonyStartIndex, parsimonyEndIndex, tid, n);
6494
barrierBuffer[tid] = 1;
8181
barrierBuffer[tid] = 1;
6497
8184
return (void*)NULL;
6500
8187
static void startPthreads(tree *tr)
6502
8189
pthread_t *threads;
6503
8190
pthread_attr_t attr;
6505
8192
threadData *tData;
6508
/* TODO pthread_attr_getstackaddr and pthread_attr_setstackaddr */
6512
/* threadJob = -1; */
6516
8197
printf("\nThis is the RAxML Master Pthread\n");
6518
8199
pthread_attr_init(&attr);
6519
8200
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
6521
threads = (pthread_t *)malloc(NumberOfThreads * sizeof(pthread_t));
6522
tData = (threadData *)malloc(NumberOfThreads * sizeof(threadData));
6523
reductionBuffer = (double *)malloc(sizeof(double) * NumberOfThreads);
6524
reductionBufferTwo = (double *)malloc(sizeof(double) * NumberOfThreads);
6525
reductionBufferParsimony = (int *)malloc(sizeof(int) * NumberOfThreads);
6526
barrierBuffer = (int *)malloc(sizeof(int) * NumberOfThreads);
8202
pthread_mutex_init(&mutex , (pthread_mutexattr_t *)NULL);
8204
threads = (pthread_t *)rax_malloc(NumberOfThreads * sizeof(pthread_t));
8205
tData = (threadData *)rax_malloc(NumberOfThreads * sizeof(threadData));
8208
reductionBuffer = (volatile double *)rax_malloc(sizeof(volatile double) * NumberOfThreads * tr->NumberOfModels);
8209
reductionBufferTwo = (volatile double *)rax_malloc(sizeof(volatile double) * NumberOfThreads * tr->NumberOfModels);
8210
reductionBufferThree = (volatile double *)rax_malloc(sizeof(volatile double) * NumberOfThreads * tr->NumberOfModels);
8211
reductionBufferParsimony = (volatile int *)rax_malloc(sizeof(volatile int) * NumberOfThreads);
8214
barrierBuffer = (volatile char *)rax_malloc(sizeof(volatile char) * NumberOfThreads);
6532
8216
for(t = 0; t < NumberOfThreads; t++)
6533
barrierBuffer[t] = 0;
8217
barrierBuffer[t] = 0;
8220
branchInfos = (volatile branchInfo **)rax_malloc(sizeof(volatile branchInfo *) * NumberOfThreads);
6539
8222
for(t = 1; t < NumberOfThreads; t++)
6541
8224
tData[t].tr = tr;
6557
8240
/*************************************************************************************************************************************************************/
6565
8242
static int elwCompare(const void *p1, const void *p2)
6567
8244
elw *rc1 = (elw *)p1;
6568
8245
elw *rc2 = (elw *)p2;
6570
8247
double i = rc1->weight;
6571
8248
double j = rc2->weight;
8257
static int elwCompareLikelihood(const void *p1, const void *p2)
8259
elw *rc1 = (elw *)p1;
8260
elw *rc2 = (elw *)p2;
8272
static void computeLHTest(tree *tr, analdef *adef, char *bootStrapFileName)
8283
*treeFile = getNumberOfTrees(tr, bootStrapFileName, adef);
8286
*bestVector = (double*)rax_malloc(sizeof(double) * tr->cdta->endsite);
8288
for(i = 0; i < tr->cdta->endsite; i++)
8289
weightSum += (double)(tr->cdta->aliaswgt[i]);
8291
modOpt(tr, adef, TRUE, adef->likelihoodEpsilon);
8292
printBothOpen("Model optimization, best Tree: %f\n", tr->likelihood);
8293
bestLH = tr->likelihood;
8295
evaluateGenericVector(tr, tr->start);
8296
memcpy(bestVector, tr->perSiteLL, tr->cdta->endsite * sizeof(double));
8298
for(i = 0; i < tr->numberOfTrees; i++)
8310
treeReadLen(treeFile, tr, FALSE, FALSE, FALSE, adef, TRUE, FALSE);
8313
if(tr->optimizeAllTrees)
8315
treeEvaluate(tr, 1);
8316
modOpt(tr, adef, FALSE, adef->likelihoodEpsilon);
8319
treeEvaluate(tr, 2);
8321
tr->start = tr->nodep[1];
8323
currentLH = tr->likelihood;
8325
if(currentLH > bestLH)
8326
printBothOpen("Better tree found %d at %f\n", i, currentLH);
8328
evaluateGenericVector(tr, tr->start);
8333
for (j = 0; j < tr->cdta->endsite; j++)
8335
temp = bestVector[j] - tr->perSiteLL[j];
8336
wtemp = tr->cdta->aliaswgt[j] * temp;
8338
sum2 += wtemp * temp;
8341
sd = sqrt( weightSum * (sum2 - sum*sum / weightSum) / (weightSum - 1) );
8342
/* this is for a 5% p level */
8344
printBothOpen("Tree: %d Likelihood: %f D(LH): %f SD: %f Significantly Worse: %s (5%s), %s (2%s), %s (1%s)\n",
8345
i, currentLH, currentLH - bestLH, sd,
8346
(sum > 1.95996 * sd) ? "Yes" : " No", "%",
8347
(sum > 2.326 * sd) ? "Yes" : " No", "%",
8348
(sum > 2.57583 * sd) ? "Yes" : " No", "%");
8352
rax_free(bestVector);
8357
static void computePerSiteLLs(tree *tr, analdef *adef, char *bootStrapFileName)
8363
*treeFile = getNumberOfTrees(tr, bootStrapFileName, adef),
8364
*tlf = myfopen(perSiteLLsFileName, "wb");
8367
*unsortedSites = (double*)rax_malloc(sizeof(double) * tr->rdta->sites);
8371
fprintf(tlf, " %d %d\n", tr->numberOfTrees, tr->rdta->sites);
8373
for(i = 0; i < tr->numberOfTrees; i++)
8379
treeReadLen(treeFile, tr, FALSE, FALSE, FALSE, adef, TRUE, FALSE);
8380
assert(tr->ntips == tr->mxtips);
8384
if(adef->useBinaryModelFile)
8386
readBinaryModel(tr);
8387
evaluateGenericInitrav(tr, tr->start);
8388
treeEvaluate(tr, 2);
8391
modOpt(tr, adef, TRUE, adef->likelihoodEpsilon);
8395
if(tr->optimizeAllTrees)
8397
treeEvaluate(tr, 1);
8398
modOpt(tr, adef, FALSE, adef->likelihoodEpsilon);
8401
treeEvaluate(tr, 2);
8404
tr->start = tr->nodep[1];
8406
evaluateGenericVector(tr, tr->start);
8408
printBothOpen("Tree %d: %f\n", i, tr->likelihood);
8410
fprintf(tlf, "tr%d\t", i + 1);
8412
for(j = 0; j < tr->cdta->endsite; j++)
8414
for(k = 0; k < tr->rdta->sites; k++)
8415
if(j == tr->patternPosition[k])
8416
unsortedSites[tr->columnPosition[k] - 1] = tr->perSiteLL[j];
8419
for(j = 0; j < tr->rdta->sites; j++)
8420
fprintf(tlf, "%f ", unsortedSites[j]);
8427
rax_free(unsortedSites);
8432
static double cumulativeTreeLength(tree *tr, analdef *adef)
8436
if(adef->perGeneBranchLengths)
8445
for(model = 0; model < tr->NumberOfModels; model++)
8456
tlm = treeLength(tr, model);
8458
lower = tr->partitionData[model].lower;
8459
upper = tr->partitionData[model].upper;
8461
for(i = lower; i < upper; i++)
8462
wgt += tr->cdta->aliaswgt[i];
8464
accLength += ((double)wgt) * tlm;
8468
tl = accLength / ((double)accWgt);
8472
tl = treeLength(tr, 0);
8478
static void computeAllLHs(tree *tr, analdef *adef, char *bootStrapFileName)
8490
*treeFile = getNumberOfTrees(tr, bootStrapFileName, adef),
8491
*result = myfopen(resultFileName, "wb");
8496
INFILE = getNumberOfTrees(tr, bootStrapFileName, adef);
8498
bestT = (bestlist *) rax_malloc(sizeof(bestlist));
8500
initBestTree(bestT, 1, tr->mxtips);
8502
list = (elw *)rax_malloc(sizeof(elw) * tr->numberOfTrees);
8504
for(i = 0; i < tr->numberOfTrees; i++)
8506
treeReadLen(treeFile, tr, FALSE, FALSE, FALSE, adef, TRUE, FALSE);
8512
if(adef->useBinaryModelFile)
8514
readBinaryModel(tr);
8515
evaluateGenericInitrav(tr, tr->start);
8516
treeEvaluate(tr, 2);
8519
modOpt(tr, adef, TRUE, adef->likelihoodEpsilon);
8521
printBothOpen("Model optimization on first Tree: %f\n", tr->likelihood);
8525
evaluateGenericInitrav(tr, tr->start);
8528
treeEvaluateProgressive(tr);
8529
treeEvaluateRandom(tr, 2);
8531
if(tr->optimizeAllTrees)
8533
treeEvaluate(tr, 1);
8534
modOpt(tr, adef, FALSE, adef->likelihoodEpsilon);
8537
treeEvaluate(tr, 2);
8541
list[i].lh = tr->likelihood;
8543
Tree2String(tr->tree_string, tr, tr->start->back, TRUE, TRUE, FALSE, FALSE, TRUE, adef, SUMMARIZE_LH, FALSE, FALSE, FALSE, FALSE);
8545
fprintf(result, "%s", tr->tree_string);
8547
saveBestTree(bestT, tr);
8549
if(tr->likelihood > bestLH)
8550
bestLH = tr->likelihood;
8552
printBothOpen("Tree %d Likelihood %f Tree-Length %f\n", i, tr->likelihood, cumulativeTreeLength(tr, adef));
8555
qsort(list, tr->numberOfTrees, sizeof(elw), elwCompareLikelihood);
8557
printBothOpen("\n");
8558
for(i = 0; i < tr->numberOfTrees; i++)
8559
printBothOpen("%d %f\n", list[i].tree, list[i].lh);
8561
printBothOpen("\n");
8564
recallBestTree(bestT, 1, tr);
8565
evaluateGeneric(tr, tr->start);
8566
printf("Model optimization, %f <-> %f\n", bestLH, tr->likelihood);
8567
fprintf(infoFile, "Model optimization, %f <-> %f\n", bestLH, tr->likelihood);
8568
modOpt(tr, adef, TRUE, adef->likelihoodEpsilon);
8569
treeEvaluate(tr, 2);
8570
printf("Model optimization, %f <-> %f\n", bestLH, tr->likelihood);
8571
fprintf(infoFile, "Model optimization, %f <-> %f\n", bestLH, tr->likelihood);
8574
printBothOpen("\nAll evaluated trees with branch lengths written to File: %s\n", resultFileName);
8575
printBothOpen("\nTotal execution time: %f\n", gettime() - masterTime);
6585
8585
static void computeELW(tree *tr, analdef *adef, char *bootStrapFileName)
6599
int *originalRateCategories = (int*)malloc(tr->cdta->endsite * sizeof(int));
6600
int *originalInvariant = (int*)malloc(tr->cdta->endsite * sizeof(int));
6606
infoFile = fopen(infoFileName, "a");
6608
initModel(tr, tr->rdta, tr->cdta, adef);
6609
allocNodex(tr, adef);
6611
INFILE = fopen(bootStrapFileName, "r");
6612
while((ch = getc(INFILE)) != EOF)
6619
if(numberOfTrees < 2)
6621
printf("Error, there is only one tree in file %s which you want to use to conduct an ELW test\n", bootStrapFileName);
8588
*treeFile = getNumberOfTrees(tr, bootStrapFileName, adef);
8594
*originalRateCategories = (int*)rax_malloc(tr->cdta->endsite * sizeof(int)),
8595
*originalInvariant = (int*)rax_malloc(tr->cdta->endsite * sizeof(int));
8610
initModel(tr, tr->rdta, tr->cdta, adef);
8612
if(tr->numberOfTrees < 2)
8614
printBothOpen("Error, there is only one tree in file %s which you want to use to conduct an ELW test\n", bootStrapFileName);
6626
printf("\n\nFound %d trees in File %s\n\n", numberOfTrees, bootStrapFileName);
6627
fprintf(infoFile, "\n\nFound %d trees in File %s\n\n", numberOfTrees, bootStrapFileName);
6629
bootweights = (elw *)malloc(sizeof(elw) * numberOfTrees);
6631
lhs = (double **)malloc(sizeof(double *) * numberOfTrees);
6633
for(k = 0; k < numberOfTrees; k++)
6634
lhs[k] = (double *)malloc(sizeof(double) * adef->multipleRuns);
6636
lhweights = (double **)malloc(sizeof(double *) * numberOfTrees);
6638
for(k = 0; k < numberOfTrees; k++)
6639
lhweights[k] = (double *)malloc(sizeof(double) * adef->multipleRuns);
6642
treeReadLen(INFILE, tr, adef);
6647
This is for testing only !
6648
for(i = 0; i < numberOfTrees; i++)
6650
treeReadLen(INFILE, tr, adef);
6651
treeEvaluate(tr, 2.0);
6652
bootweights[i].lh = tr->likelihood;
6657
printf("Model optimization, first Tree: %f\n", tr->likelihood);
6658
fprintf(infoFile, "Model optimization, first Tree: %f\n", tr->likelihood);
8619
bootweights = (elw *)rax_malloc(sizeof(elw) * tr->numberOfTrees);
8621
rankTest = (elw **)rax_malloc(sizeof(elw *) * adef->multipleRuns);
8623
for(k = 0; k < adef->multipleRuns; k++)
8624
rankTest[k] = (elw *)rax_malloc(sizeof(elw) * tr->numberOfTrees);
8626
lhs = (double **)rax_malloc(sizeof(double *) * tr->numberOfTrees);
8628
for(k = 0; k < tr->numberOfTrees; k++)
8629
lhs[k] = (double *)rax_calloc(adef->multipleRuns, sizeof(double));
8632
lhweights = (double **)rax_malloc(sizeof(double *) * tr->numberOfTrees);
8634
for(k = 0; k < tr->numberOfTrees; k++)
8635
lhweights[k] = (double *)rax_calloc(adef->multipleRuns, sizeof(double));
8637
/* read in the first tree and optimize ML params on it */
8639
treeReadLen(treeFile, tr, FALSE, FALSE, FALSE, adef, TRUE, FALSE);
8641
modOpt(tr, adef, TRUE, adef->likelihoodEpsilon);
8644
printBothOpen("Model optimization, first Tree: %f\n", tr->likelihood);
6661
8646
memcpy(originalRateCategories, tr->cdta->rateCategory, sizeof(int) * tr->cdta->endsite);
6662
8647
memcpy(originalInvariant, tr->invariant, sizeof(int) * tr->cdta->endsite);
6664
8649
assert(adef->boot > 0);
6665
/* this is ugly, should be passed as param to computenextreplicate() */
8651
/* TODO this is ugly, should be passed as param to computenextreplicate() */
6666
8653
startSeed = adef->boot;
6669
for(i = 0; i < numberOfTrees; i++)
6671
treeReadLen(INFILE, tr, adef);
8657
now read the trees one by one, do a couple of BS replicates and re-compute their likelihood
8661
/* loop over all trees */
8663
for(i = 0; i < tr->numberOfTrees; i++)
8666
/* read in new tree */
8668
treeReadLen(treeFile, tr, FALSE, FALSE, FALSE, adef, TRUE, FALSE);
8670
if(tr->optimizeAllTrees)
8672
treeEvaluate(tr, 1);
8673
modOpt(tr, adef, FALSE, adef->likelihoodEpsilon);
8676
treeEvaluate(tr, 2.0);
8678
printBothOpen("Original tree %d likelihood %f\n", i, tr->likelihood);
8680
if(tr->likelihood > best)
8682
best = tr->likelihood;
8685
/* reset branches to default values */
6672
8687
resetBranches(tr);
6673
adef->rapidBoot = startSeed;
8689
/* reset BS random seed, we want to use the same replicates for every tree */
8691
adef->rapidBoot = startSeed;
6675
8693
for(k = 0; k < adef->multipleRuns; k++)
6677
computeNextReplicate(tr, adef, originalRateCategories, originalInvariant);
8695
/* compute the next BS replicate, i.e., re-sample alignment columns */
8697
computeNextReplicate(tr, &adef->rapidBoot, originalRateCategories, originalInvariant, TRUE, TRUE);
8699
evaluateGenericInitrav(tr, tr->start);
8701
/* if this is the first replicate for this tree do a slightly more thorough br-len opt */
8702
/* we don't re-estimate ML model params (except branches) for every replicate to make things a bit faster */
6680
8705
treeEvaluate(tr, 2.0);
6682
treeEvaluate(tr, 0.5);
6683
/*printf("%d %d %f\n", i, k, tr->likelihood);*/
6684
lhs[i][k] = tr->likelihood;
6687
reductionCleanup(tr, adef, originalRateCategories, originalInvariant);
8707
treeEvaluate(tr, 0.5);
8709
/* store the likelihood of replicate k for tree i */
8710
lhs[i][k] = tr->likelihood;
8712
rankTest[k][i].lh = tr->likelihood;
8713
rankTest[k][i].tree = i;
8716
/* restore the original alignment to start BS procedure for the next tree */
8718
reductionCleanup(tr, originalRateCategories, originalInvariant);
8721
assert(bestIndex >= 0 && best != unlikely);
8723
printBothOpen("Best-Scoring tree is tree %d with score %f\n", bestIndex, best);
8726
/* now loop over all replicates */
6692
8728
for(k = 0; k < adef->multipleRuns; k++)
6694
double best = unlikely;
8730
/* find best score for this replicate */
6697
for(i = 0; i < numberOfTrees; i++)
8732
for(i = 0, best = unlikely; i < tr->numberOfTrees; i++)
6698
8733
if(lhs[i][k] > best)
6699
8734
best = lhs[i][k];
6701
for(i = 0; i < numberOfTrees; i++)
8736
/* compute exponential weights w.r.t. the best likelihood for replicate k */
8738
for(i = 0; i < tr->numberOfTrees; i++)
6702
8739
lhweights[i][k] = exp(lhs[i][k] - best);
6704
for(i = 0; i < numberOfTrees; i++)
8741
/* sum over all exponential weights */
8743
for(i = 0, sum = 0.0; i < tr->numberOfTrees; i++)
6705
8744
sum += lhweights[i][k];
6707
for(i = 0; i < numberOfTrees; i++)
8746
/* and normalize by the sum */
8748
for(i = 0; i < tr->numberOfTrees; i++)
6708
8749
lhweights[i][k] = lhweights[i][k] / sum;
6714
for(i = 0; i < numberOfTrees; i++)
8753
/* now loop over all trees */
8755
for(i = 0; i < tr->numberOfTrees; i++)
6718
for(k = 0; k < adef->multipleRuns; k++)
8758
/* loop to sum over all replicate weights for tree i */
8760
for(k = 0, sum = 0.0; k < adef->multipleRuns; k++)
6719
8761
sum += lhweights[i][k];
6721
bootweights[i].weight = sum / ((double)adef->multipleRuns);
8763
/* set the weight and the index of the respective tree */
8765
bootweights[i].weight = sum / ((double)adef->multipleRuns);
6722
8766
bootweights[i].tree = i;
6725
qsort(bootweights, numberOfTrees, sizeof(elw), elwCompare);
8769
/* now just sort the tree collection by weights */
8771
qsort(bootweights, tr->numberOfTrees, sizeof(elw), elwCompare);
8773
printBothOpen("Tree\t Posterior Probability \t Cumulative posterior probability\n");
8775
/* loop over the sorted array of trees and print out statistics */
8777
for(i = 0, sum = 0.0; i < tr->numberOfTrees; i++)
8779
sum += bootweights[i].weight;
8781
printBothOpen("%d\t\t %f \t\t %f\n", bootweights[i].tree, bootweights[i].weight, sum);
8788
// now compute the super-duper rank test
8790
printBothOpen("\n\nNow also computing the super-duper rank test, though I still don't\n");
8791
printBothOpen("understand what it actually means. What this thing does is to initially determine\n");
8792
printBothOpen("the best-scoring ML tree on the original alignment and then the scores of the input\n");
8793
printBothOpen("trees on the number of specified Bootstrap replicates. Then it sorts the scores of the trees\n");
8794
printBothOpen("for every bootstrap replicate and determines the rank of the best-scoring tree on every BS\n");
8795
printBothOpen("replicate. It then prints out how many positions in the sorted lists of thz BS replicates \n");
8796
printBothOpen("must be included in order for the best scoring tree to appear 95 and 99 times respectively.\n");
8797
printBothOpen("This gives some intuition about how variable the score order of the trees will be under\n");
8798
printBothOpen("slight alterations of the data.\n\n");
8800
// sort all BS replicates accodring to likelihood scores
8802
for(i = 0; i < adef->multipleRuns; i++)
8803
qsort(rankTest[i], tr->numberOfTrees, sizeof(elw), elwCompareLikelihood);
8806
// search for our best-scoring tree in every sorted array of likelihood scores
8808
for(i = 0; i < adef->multipleRuns; i++)
8810
for(k = 0; k < tr->numberOfTrees; k++)
8812
if(rankTest[i][k].tree == bestIndex)
8817
for(k = 0; k < tr->numberOfTrees; k++)
8820
countBest[k] += countBest[k - 1];
8822
printBothOpen("Number of Occurences of best-scoring tree for %d BS replicates up to position %d in sorted list: %d\n",
8823
adef->multipleRuns, k, countBest[k]);
8825
if(cutOff95 == -1 && countBest[k] <= (int)((double)adef->multipleRuns * 0.95 + 0.5))
8828
if(cutOff99 == -1 && countBest[k] <= (int)((double)adef->multipleRuns * 0.99 + 0.5))
8832
assert(countBest[k-1] == adef->multipleRuns);
8833
assert(cutOff95 >= 0 && cutOff99 >= 0);
8835
printBothOpen("\n95%s cutoff reached after including %d out of %d sorted likelihood columns\n", "%", countBest[cutOff95], adef->multipleRuns);
8837
printBothOpen("99%s cutoff reached after including %d out of %d sorted likelihood columns\n\n", "%", countBest[cutOff99], adef->multipleRuns);
8841
printBothOpen("\nTotal execution time: %f\n\n", gettime() - masterTime);
8843
rax_free(originalRateCategories);
8844
rax_free(originalInvariant);
8852
static void computeDistances(tree *tr, analdef *adef)
8854
int i, j, modelCounter;
8855
double z0[NUM_BRANCHES];
8856
double result[NUM_BRANCHES];
8858
char distanceFileName[1024];
8863
strcpy(distanceFileName, workdir);
8864
strcat(distanceFileName, "RAxML_distances.");
8865
strcat(distanceFileName, run_id);
8867
out = myfopen(distanceFileName, "wb");
8869
modOpt(tr, adef, TRUE, adef->likelihoodEpsilon);
8871
printBothOpen("\nLog Likelihood Score after parameter optimization: %f\n\n", tr->likelihood);
8872
printBothOpen("\nComputing pairwise ML-distances ...\n");
8874
for(modelCounter = 0; modelCounter < tr->NumberOfModels; modelCounter++)
8875
z0[modelCounter] = defaultz;
8879
for(i = 1; i <= tr->mxtips; i++)
8880
for(j = i + 1; j <= tr->mxtips; j++)
8884
makenewzGenericDistance(tr, 10, z0, result, i, j);
8890
for(k = 0, x = 0.0; k < tr->numBranches; k++)
8892
assert(tr->partitionContributions[k] != -1.0);
8893
assert(tr->fracchanges[k] != -1.0);
8897
x += (-log(z) * tr->fracchanges[k]) * tr->partitionContributions[k];
8905
x = -log(z) * tr->fracchange;
8908
/*printf("%s-%s \t %f\n", tr->nameList[i], tr->nameList[j], x);*/
8909
fprintf(out, "%s %s \t %f\n", tr->nameList[i], tr->nameList[j], x);
8916
printBothOpen("\nTime for pair-wise ML distance computation of %d distances: %f seconds\n",
8917
(tr->mxtips * tr->mxtips - tr->mxtips) / 2, t);
8918
printBothOpen("\nDistances written to file: %s\n", distanceFileName);
8927
static void morphologicalCalibration(tree *tr, analdef *adef)
8930
replicates = adef->multipleRuns,
8932
*significanceCounter = (int*)rax_malloc(sizeof(int) * tr->cdta->endsite);
8935
*reference = (double*)rax_malloc(sizeof(double) * tr->cdta->endsite);
8938
integerFileName[1024] = "";
8945
printBothOpen("You did not specify the number of random trees to be generated by \"-#\" !\n");
8946
printBothOpen("Automatically setting it to 100.\n");
8950
printBothOpen("Likelihood on Reference tree: %f\n\n", tr->likelihood);
8952
evaluateGenericVector(tr, tr->start);
8954
for(i = 0; i < tr->cdta->endsite; i++)
8955
significanceCounter[i] = 0;
8957
memcpy(reference, tr->perSiteLL, tr->cdta->endsite * sizeof(double));
8959
for(i = 0; i < replicates; i++)
8963
printBothOpen("Testing Random Tree [%d]\n", i);
8964
makeRandomTree(tr, adef);
8965
evaluateGenericInitrav(tr, tr->start);
8966
treeEvaluate(tr, 2);
8969
don't really need modOpt here
8970
modOpt(tr, adef, TRUE, adef->likelihoodEpsilon);
8973
evaluateGenericVector(tr, tr->start);
8976
for(k = 0; k < tr->cdta->endsite; k++)
8977
if(tr->perSiteLL[k] <= reference[k])
8978
significanceCounter[k] = significanceCounter[k] + 1;
8981
strcpy(integerFileName, workdir);
8982
strcat(integerFileName, "RAxML_weights.");
8983
strcat(integerFileName, run_id);
8985
integerFile = myfopen(integerFileName, "wb");
8987
for(i = 0; i < tr->cdta->endsite; i++)
8988
fprintf(integerFile, "%d ", significanceCounter[i]);
8990
fclose(integerFile);
8992
printBothOpen("RAxML calibrated integer weight file written to: %s\n", integerFileName);
9000
static int sortLex(const void *a, const void *b)
9009
while((aPtr[i] != '\0') && (bPtr[i] != '\0') && (aPtr[i] == bPtr[i]))
9012
if((aPtr[i] == '\0') || (bPtr[i] == '\0'))
9013
return (bPtr[i] == '\0');
9015
return (aPtr[i] > bPtr[i]);
9019
static void extractTaxaFromTopology(tree *tr, rawdata *rdta, cruncheddata *cdta, char fileName[1024])
9022
*f = myfopen(fileName, "rb");
9026
buffer[nmlngth + 2];
9034
nameList = (char**)rax_malloc(sizeof(char*) * taxaSize);
9036
while((c = fgetc(f)) != ';')
9038
if(c == '(' || c == ',')
9041
if(c == '(' || c == ',')
9052
while(c != ':' && c != ')' && c != ',');
9055
if(taxaCount == taxaSize)
9058
nameList = (char **)rax_realloc(nameList, sizeof(char*) * taxaSize, FALSE);
9061
nameList[taxaCount] = (char*)rax_malloc(sizeof(char) * (strlen(buffer) + 1));
9062
strcpy(nameList[taxaCount], buffer);
9072
/* BEGIN ensuring no taxon occurs twice */
6731
/*printf("Tree\t Posterior Probability \t Cumulative posterior probability \t Original Likelihood\n");*/
6732
printf("Tree\t Posterior Probability \t Cumulative posterior probability\n");
6733
fprintf(infoFile, "Tree\t Posterior Probability \t Cumulative posterior probability\n");
6734
for(i = 0; i < numberOfTrees; i++)
6736
sum += bootweights[i].weight;
6737
/*printf("%d\t\t %f \t\t %f \t\t\t %f\n", bootweights[i].tree, bootweights[i].weight, sum, bootweights[i].lh);*/
6738
printf("%d\t\t %f \t\t %f\n", bootweights[i].tree, bootweights[i].weight, sum);
6739
fprintf(infoFile, "%d\t\t %f \t\t %f\n", bootweights[i].tree, bootweights[i].weight, sum);
9075
**taxList = (char **)rax_malloc(sizeof(char *) * (size_t)taxaCount);
9077
for(i = 0; i < taxaCount; ++i)
9078
taxList[i] = nameList[i];
9080
qsort(taxList, taxaCount, sizeof(char**), sortLex);
9082
for(i = 1; i < taxaCount; ++i)
9083
if(strcmp(taxList[i], taxList[i-1]) == 0)
9085
printf("A taxon labelled by %s appears twice in the first tree of tree collection %s, exiting ...\n", buffer, bootStrapFile);
6743
free(originalRateCategories);
6744
free(originalInvariant);
6755
int main (int argc, char *argv[])
9094
printf("Found a total of %d taxa in first tree of tree collection %s\n", taxaCount, bootStrapFile);
9095
printf("Expecting all remaining trees in collection to have the same taxon set\n");
9097
rdta->numsp = taxaCount;
9099
tr->nameList = (char **)rax_malloc(sizeof(char *) * (taxaCount + 1));
9100
for(i = 1; i <= taxaCount; i++)
9101
tr->nameList[i] = nameList[i - 1];
9108
if (rdta->numsp < 4)
9110
printf("TOO FEW SPECIES, tree contains only %d species\n", rdta->numsp);
9114
tr->nameHash = initStringHashTable(10 * taxaCount);
9115
for(i = 1; i <= taxaCount; i++)
9116
addword(tr->nameList[i], tr->nameHash, i);
9122
static void myfwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream)
9125
bytes_written = fwrite(ptr, size, nmemb, stream);
9127
assert(bytes_written = nmemb);
9131
static void writeLG4(tree *tr, int model, int dataType, FILE *f, partitionLengths pLengths[MAX_MODEL])
9133
if(tr->partitionData[model].protModels == LG4 || tr->partitionData[model].protModels == LG4X)
9138
for(k = 0; k < 4; k++)
9140
myfwrite(tr->partitionData[model].EIGN_LG4[k], sizeof(double), pLengths[dataType].eignLength, f);
9141
myfwrite(tr->partitionData[model].EV_LG4[k], sizeof(double), pLengths[dataType].evLength, f);
9142
myfwrite(tr->partitionData[model].EI_LG4[k], sizeof(double), pLengths[dataType].eiLength, f);
9143
myfwrite(tr->partitionData[model].frequencies_LG4[k], sizeof(double), pLengths[dataType].frequenciesLength, f);
9144
myfwrite(tr->partitionData[model].tipVector_LG4[k], sizeof(double), pLengths[dataType].tipVectorLength, f);
9145
myfwrite(tr->partitionData[model].substRates_LG4[k], sizeof(double), pLengths[dataType].substRatesLength, f);
9151
void writeBinaryModel(tree *tr)
9157
*f = myfopen(binaryModelParamsOutputFileName, "w");
9161
myfwrite(tr->cdta->rateCategory, sizeof(int), tr->rdta->sites + 1, f);
9162
myfwrite(tr->cdta->patrat, sizeof(double), tr->rdta->sites + 1, f);
9163
myfwrite(tr->cdta->patratStored, sizeof(double), tr->rdta->sites + 1, f);
9165
/* partition contributions for fracchange */
9167
myfwrite(tr->partitionContributions, sizeof(double), tr->NumberOfModels, f);
9171
myfwrite(&tr->fracchange, sizeof(double), 1, f);
9172
myfwrite(tr->fracchanges, sizeof(double), (size_t)tr->NumberOfModels, f);
9174
myfwrite(&tr->rawFracchange, sizeof(double), 1, f);
9175
myfwrite(tr->rawFracchanges, sizeof(double), (size_t)tr->NumberOfModels, f);
9179
for(model = 0; model < tr->NumberOfModels; model++)
9182
dataType = tr->partitionData[model].dataType;
9184
myfwrite(tr->partitionData[model].weightExponents, sizeof(double), 4, f);
9185
myfwrite(tr->partitionData[model].weights, sizeof(double), 4, f);
9187
myfwrite(tr->partitionData[model].gammaRates, sizeof(double), 4, f);
9189
myfwrite(tr->partitionData[model].EIGN, sizeof(double), pLengths[dataType].eignLength, f);
9190
myfwrite(tr->partitionData[model].EV, sizeof(double), pLengths[dataType].evLength, f);
9191
myfwrite(tr->partitionData[model].EI, sizeof(double), pLengths[dataType].eiLength, f);
9193
myfwrite(tr->partitionData[model].frequencies, sizeof(double), pLengths[dataType].frequenciesLength, f);
9194
myfwrite(tr->partitionData[model].tipVector, sizeof(double), pLengths[dataType].tipVectorLength, f);
9195
myfwrite(tr->partitionData[model].substRates, sizeof(double), pLengths[dataType].substRatesLength, f);
9196
myfwrite(&(tr->partitionData[model].alpha), sizeof(double), 1, f);
9197
myfwrite(&(tr->partitionData[model].propInvariant), sizeof(double), 1, f);
9199
myfwrite(&(tr->partitionData[model].numberOfCategories), sizeof(int), 1, f);
9201
myfwrite(&(tr->partitionData[model].protModels), sizeof(int), 1, f);
9202
myfwrite(&(tr->partitionData[model].autoProtModels), sizeof(int), 1, f);
9204
myfwrite(tr->partitionData[model].perSiteRates, sizeof(double), tr->partitionData[model].numberOfCategories, f);
9205
myfwrite(tr->partitionData[model].unscaled_perSiteRates, sizeof(double), tr->partitionData[model].numberOfCategories, f);
9207
writeLG4(tr, model, dataType, f, pLengths);
9210
printBothOpen("\nModel parameters (binary file format) written to: %s\n", binaryModelParamsOutputFileName);
9215
static void myfread(void *ptr, size_t size, size_t nmemb, FILE *stream)
9220
bytes_read = fread(ptr, size, nmemb, stream);
9222
assert(bytes_read == nmemb);
9226
static void readLG4(tree *tr, int model, int dataType, FILE *f, partitionLengths pLengths[MAX_MODEL])
9228
if(tr->partitionData[model].protModels == LG4 || tr->partitionData[model].protModels == LG4X)
9233
for(k = 0; k < 4; k++)
9235
myfread(tr->partitionData[model].EIGN_LG4[k], sizeof(double), pLengths[dataType].eignLength, f);
9236
myfread(tr->partitionData[model].EV_LG4[k], sizeof(double), pLengths[dataType].evLength, f);
9237
myfread(tr->partitionData[model].EI_LG4[k], sizeof(double), pLengths[dataType].eiLength, f);
9238
myfread(tr->partitionData[model].frequencies_LG4[k], sizeof(double), pLengths[dataType].frequenciesLength, f);
9239
myfread(tr->partitionData[model].tipVector_LG4[k], sizeof(double), pLengths[dataType].tipVectorLength, f);
9240
myfread(tr->partitionData[model].substRates_LG4[k], sizeof(double), pLengths[dataType].substRatesLength, f);
9245
void readBinaryModel(tree *tr)
9254
printBothOpen("\nRAxML is reading a binary model file and not optimizing model params\n");
9256
f = fopen(binaryModelParamsInputFileName, "r");
9260
myfread(tr->cdta->rateCategory, sizeof(int), (size_t)(tr->rdta->sites + 1), f);
9261
myfread(tr->cdta->patrat, sizeof(double), (size_t)(tr->rdta->sites + 1), f);
9262
myfread(tr->cdta->patratStored, sizeof(double), (size_t)(tr->rdta->sites + 1), f);
9264
/* partition contributions for fracchange */
9266
myfread(tr->partitionContributions, sizeof(double), tr->NumberOfModels, f);
9270
myfread(&tr->fracchange, sizeof(double), 1, f);
9271
myfread(tr->fracchanges, sizeof(double), (size_t)tr->NumberOfModels, f);
9273
myfread(&tr->rawFracchange, sizeof(double), 1, f);
9274
myfread(tr->rawFracchanges, sizeof(double), (size_t)tr->NumberOfModels, f);
9278
for(model = 0; model < tr->NumberOfModels; model++)
9281
dataType = tr->partitionData[model].dataType;
9283
myfread(tr->partitionData[model].weightExponents, sizeof(double), 4, f);
9284
myfread(tr->partitionData[model].weights, sizeof(double), 4, f);
9286
myfread(tr->partitionData[model].gammaRates, sizeof(double), 4, f);
9288
myfread(tr->partitionData[model].EIGN, sizeof(double), (size_t)(pLengths[dataType].eignLength), f);
9289
myfread(tr->partitionData[model].EV, sizeof(double), (size_t)(pLengths[dataType].evLength), f);
9290
myfread(tr->partitionData[model].EI, sizeof(double), (size_t)(pLengths[dataType].eiLength), f);
9292
myfread(tr->partitionData[model].frequencies, sizeof(double), (size_t)(pLengths[dataType].frequenciesLength), f);
9293
myfread(tr->partitionData[model].tipVector, sizeof(double), (size_t)(pLengths[dataType].tipVectorLength), f);
9294
myfread(tr->partitionData[model].substRates, sizeof(double), (size_t)(pLengths[dataType].substRatesLength), f);
9296
myfread(&(tr->partitionData[model].alpha), sizeof(double), 1, f);
9297
myfread(&(tr->partitionData[model].propInvariant), sizeof(double), 1, f);
9299
myfread(&(tr->partitionData[model].numberOfCategories), sizeof(int), 1, f);
9301
myfread(&(tr->partitionData[model].protModels), sizeof(int), 1, f);
9302
myfread(&(tr->partitionData[model].autoProtModels), sizeof(int), 1, f);
9304
myfread(tr->partitionData[model].perSiteRates, sizeof(double), tr->partitionData[model].numberOfCategories, f);
9305
myfread(tr->partitionData[model].unscaled_perSiteRates, sizeof(double), tr->partitionData[model].numberOfCategories, f);
9307
readLG4(tr, model, dataType, f, pLengths);
9310
#ifdef _USE_PTHREADS
9311
masterBarrier(THREAD_COPY_INIT_MODEL, tr);
9312
//masterBarrier(THREAD_RESET_MODEL, tr);
9315
if(tr->rateHetModel == CAT)
9317
#ifdef _USE_PTHREADS
9318
masterBarrier(THREAD_COPY_RATE_CATS, tr);
9326
for(model = 0; model < tr->NumberOfModels; model++)
9331
for(i = tr->partitionData[model].lower; i < tr->partitionData[model].upper; i++, localCounter++)
9332
tr->partitionData[model].rateCategory[localCounter] = tr->cdta->rateCategory[i];
9344
static int iterated_bitcount(unsigned int n)
9358
static char bits_in_16bits [0x1u << 16];
9360
static void compute_bits_in_16bits(void)
9364
assert(sizeof(unsigned int) == 4);
9366
for (i = 0; i < (0x1u<<16); i++)
9367
bits_in_16bits[i] = iterated_bitcount(i);
9372
unsigned int precomputed16_bitcount (unsigned int n)
9374
/* works only for 32-bit int*/
9376
return bits_in_16bits [n & 0xffffu]
9377
+ bits_in_16bits [(n >> 16) & 0xffffu] ;
9380
/* functions to compute likelihoods on quartets */
9383
/* a parser error function */
9385
static void parseError(int c)
9387
printf("Quartet grouping parser expecting symbol: %c\n", c);
9391
/* parser for the taxon grouping format, one has to specify 4 groups in a newick-like
9392
format from which quartets (a substantially smaller number compared to ungrouped quartets)
9395
static void groupingParser(char *quartetGroupFileName, int *groups[4], int groupSize[4], tree *tr)
9398
*f = myfopen(quartetGroupFileName, "r");
9408
printf("%s\n", quartetGroupFileName);
9410
for(i = 0; i < 4; i++)
9412
groups[i] = (int*)rax_malloc(sizeof(int) * (tr->mxtips + 1));
9416
while((ch = getc(f)) != EOF)
9429
n = treeFindTipName(f, tr, FALSE);
9430
if(n <= 0 || n > tr->mxtips)
9431
printf("parsing error, raxml is expecting to read a taxon name, found \"%c\" instead\n", ch);
9432
assert(n > 0 && n <= tr->mxtips);
9434
groups[groupCounter][groupSize[groupCounter]] = n;
9435
groupSize[groupCounter] = groupSize[groupCounter] + 1;
9453
if(groupCounter == 4)
9468
printf("Error: extra char after ; %c\n", ch);
9477
assert(groupCounter == 4);
9478
assert(taxonCounter == tr->mxtips);
9480
printBothOpen("Successfully parsed quartet groups\n\n");
9482
/* print out the taxa that have been assigned to the 4 groups */
9484
for(i = 0; i < 4; i++)
9489
printBothOpen("group %d has %d members\n", i, groupSize[i]);
9491
for(j = 0; j < groupSize[i]; j++)
9492
printBothOpen("%s\n", tr->nameList[groups[i][j]]);
9494
printBothOpen("\n");
9501
static double quartetLikelihood(tree *tr, nodeptr p1, nodeptr p2, nodeptr p3, nodeptr p4, nodeptr q1, nodeptr q2)
9504
build a quartet tree, where q1 and q2 are the inner nodes and p1, p2, p3, p4
9505
are the tips of the quartet where the sequence data is located.
9507
initially set all branch lengths to the default value.
9511
for the tree and node data structure used, please see one of the last chapter's of Joe
9515
hookupDefault(q1, q2, tr->numBranches);
9517
hookupDefault(q1->next, p1, tr->numBranches);
9518
hookupDefault(q1->next->next, p2, tr->numBranches);
9520
hookupDefault(q2->next, p3, tr->numBranches);
9521
hookupDefault(q2->next->next, p4, tr->numBranches);
9523
/* now compute the likelihood vectors at the two inner nodes of the tree,
9524
here the virtual root is located between the two inner nodes q1 and q2.
9527
newviewGeneric(tr, q1);
9528
newviewGeneric(tr, q2);
9530
/* call a function that is also used for NNIs that iteratively optimizes all
9531
5 branch lengths in the tree.
9533
Note that 16 is an important tuning parameter, this integer value determines
9534
how many times we visit all branches until we give up further optimizing the branch length
9538
nniSmooth(tr, q1, 16);
9540
/* now compute the log likelihood of the tree for the virtual root located between inner nodes q1 and q2 */
9547
evaluateGeneric(tr, q1->back->next->next);
9553
newviewGeneric(tr, q1);
9554
newviewGeneric(tr, q2);
9555
evaluateGeneric(tr, q1);
9558
assert(ABS(l - tr->likelihood) < 0.00001);
9562
return (tr->likelihood);
9589
#define QUARTET_MESSAGE_SIZE sizeof(quartetResult)
9590
#define QUARTET_MESSAGE 0
9593
static void startQuartetMaster(tree *tr, FILE *f)
9596
*qr = (quartetResult *)rax_malloc(sizeof(quartetResult));
9606
assert(processID == 0);
9610
MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
9612
switch(status.MPI_TAG)
9614
case QUARTET_MESSAGE:
9615
MPI_Recv((void *)(qr), QUARTET_MESSAGE_SIZE, MPI_BYTE, status.MPI_SOURCE, QUARTET_MESSAGE, MPI_COMM_WORLD, &recvStatus);
9616
fprintf(f, "%d %d | %d %d: %f\n", qr->a1, qr->b1, qr->c1, qr->d1, qr->l1);
9617
fprintf(f, "%d %d | %d %d: %f\n", qr->a2, qr->b2, qr->c2, qr->d2, qr->l2);
9618
fprintf(f, "%d %d | %d %d: %f\n", qr->a3, qr->b3, qr->c3, qr->d3, qr->l3);
9621
MPI_Recv(&dummy, 1, MPI_INT, status.MPI_SOURCE, I_AM_DONE, MPI_COMM_WORLD, &recvStatus);
9623
if(workersDone == processes -1)
9638
static void computeAllThreeQuartets(tree *tr, nodeptr q1, nodeptr q2, int t1, int t2, int t3, int t4, FILE *f)
9640
/* set the tip nodes to different sequences
9641
with the tip indices t1, t2, t3, t4 */
9654
*qr = (quartetResult *)rax_malloc(sizeof(quartetResult));
9659
/* compute the likelihood of tree ((p1, p2), (p3, p4)) */
9661
l = quartetLikelihood(tr, p1, p2, p3, p4, q1, q2);
9663
#ifndef _QUARTET_MPI
9664
fprintf(f, "%d %d | %d %d: %f\n", p1->number, p2->number, p3->number, p4->number, l);
9666
qr->a1 = p1->number;
9667
qr->b1 = p2->number;
9668
qr->c1 = p3->number;
9669
qr->d1 = p4->number;
9672
/* second quartet */
9674
/* compute the likelihood of tree ((p1, p3), (p2, p4)) */
9676
l = quartetLikelihood(tr, p1, p3, p2, p4, q1, q2);
9678
#ifndef _QUARTET_MPI
9679
fprintf(f, "%d %d | %d %d: %f\n", p1->number, p3->number, p2->number, p4->number, l);
9681
qr->a2 = p1->number;
9682
qr->b2 = p3->number;
9683
qr->c2 = p2->number;
9684
qr->d2 = p4->number;
9689
/* compute the likelihood of tree ((p1, p4), (p2, p3)) */
9691
l = quartetLikelihood(tr, p1, p4, p2, p3, q1, q2);
9693
#ifndef _QUARTET_MPI
9694
fprintf(f, "%d %d | %d %d: %f\n", p1->number, p4->number, p2->number, p3->number, l);
9696
qr->a3 = p1->number;
9697
qr->b3 = p4->number;
9698
qr->c3 = p2->number;
9699
qr->d3 = p3->number;
9702
MPI_Send((void *)qr, QUARTET_MESSAGE_SIZE, MPI_BYTE, 0, QUARTET_MESSAGE, MPI_COMM_WORLD);
9704
assert(processID > 0);
9709
/* the three quartet options: all quartets, randomly sub-sample a certain number n of quartets,
9710
subsample all quartets from 4 pre-defined groups of quartets */
9712
#define ALL_QUARTETS 0
9713
#define RANDOM_QUARTETS 1
9714
#define GROUPED_QUARTETS 2
9718
static void computeQuartets(tree *tr, analdef *adef, rawdata *rdta, cruncheddata *cdta)
9720
/* some indices for generating quartets in an arbitrary way */
9723
flavor = ALL_QUARTETS,
9737
randomQuartets = (unsigned long int)(adef->multipleRuns),
9739
numberOfQuartets = ((unsigned long int)tr->mxtips * ((unsigned long int)tr->mxtips - 1) * ((unsigned long int)tr->mxtips - 2) * ((unsigned long int)tr->mxtips - 3)) / 24;
9741
/* use two inner nodes for building quartet trees */
9744
q1 = tr->nodep[tr->mxtips + 1],
9745
q2 = tr->nodep[tr->mxtips + 2];
9748
quartetFileName[1024];
9753
/* build output file name */
9755
strcpy(quartetFileName, workdir);
9756
strcat(quartetFileName, "RAxML_quartets.");
9757
strcat(quartetFileName, run_id);
9759
/* open output file */
9766
f = myfopen(quartetFileName, "w");
9768
/* initialize model parameters */
9770
initModel(tr, rdta, cdta, adef);
9774
if(!adef->useBinaryModelFile)
9780
/* get a starting tree: either reads in a tree or computes a randomized stepwise addition parsimony tree */
9782
getStartingTree(tr, adef);
9784
/* optimize model parameters on that comprehensive tree that can subsequently be used for qyartet building */
9786
modOpt(tr, adef, TRUE, adef->likelihoodEpsilon);
9788
printBothOpen("Time for parsing input tree or building parsimony tree and optimizing model parameters: %f\n\n", gettime() - masterTime);
9792
readBinaryModel(tr);
9794
printBothOpen("Time for reading model parameters: %f\n\n", gettime() - masterTime);
9798
/* figure out which flavor of quartets we want to compute */
9800
if(adef->useQuartetGrouping)
9802
flavor = GROUPED_QUARTETS;
9803
groupingParser(quartetGroupingFileName, groups, groupSize, tr);
9807
if(randomQuartets > numberOfQuartets)
9810
if(randomQuartets == 1)
9811
flavor = ALL_QUARTETS;
9814
fraction = (double)randomQuartets / (double)numberOfQuartets;
9815
flavor = RANDOM_QUARTETS;
9819
/* print some output on what we are doing*/
9824
printBothOpen("There are %u quartet sets for which RAxML will evaluate all %u quartet trees\n", numberOfQuartets, numberOfQuartets * 3);
9826
case RANDOM_QUARTETS:
9827
printBothOpen("There are %u quartet sets for which RAxML will randomly sub-sambple %u sets (%f per cent), i.e., compute %u quartet trees\n", numberOfQuartets, randomQuartets, 100 * fraction, randomQuartets * 3);
9829
case GROUPED_QUARTETS:
9830
printBothOpen("There are 4 quartet groups from which RAxML will evaluate all %u quartet trees\n", (unsigned int)groupSize[0] * (unsigned int)groupSize[1] * (unsigned int)groupSize[2] * (unsigned int)groupSize[3] * 3);
9836
/* print taxon name to taxon number correspondance table to output file */
9841
fprintf(f, "Taxon names and indices:\n\n");
9843
for(i = 1; i <= tr->mxtips; i++)
9845
fprintf(f, "%s %d\n", tr->nameList[i], i);
9846
assert(tr->nodep[i]->number == i);
9855
/* do a loop to generate some quartets to test.
9856
note that tip nodes/sequences in RAxML are indexed from 1,...,n
9857
and not from 0,...,n-1 as one might expect
9859
tr->mxtips is the maximum number of tips in the alignment/tree
9870
assert(randomQuartets == 1);
9872
/* compute all possible quartets */
9874
for(t1 = 1; t1 <= tr->mxtips; t1++)
9875
for(t2 = t1 + 1; t2 <= tr->mxtips; t2++)
9876
for(t3 = t2 + 1; t3 <= tr->mxtips; t3++)
9877
for(t4 = t3 + 1; t4 <= tr->mxtips; t4++)
9880
if((quartetCounter % (unsigned long int)(processes - 1)) == (unsigned long int)(processID - 1))
9882
computeAllThreeQuartets(tr, q1, q2, t1, t2, t3, t4, f);
9886
assert(quartetCounter == numberOfQuartets);
9889
case RANDOM_QUARTETS:
9891
/* randomly sub-sample a fraction of all quartets */
9893
for(t1 = 1; t1 <= tr->mxtips; t1++)
9894
for(t2 = t1 + 1; t2 <= tr->mxtips; t2++)
9895
for(t3 = t2 + 1; t3 <= tr->mxtips; t3++)
9896
for(t4 = t3 + 1; t4 <= tr->mxtips; t4++)
9899
r = randum(&adef->parsimonySeed);
9904
if((quartetCounter % (unsigned long int)(processes - 1)) == (unsigned long int)(processID - 1))
9906
computeAllThreeQuartets(tr, q1, q2, t1, t2, t3, t4, f);
9910
if(quartetCounter == randomQuartets)
9915
assert(quartetCounter == randomQuartets);
9918
case GROUPED_QUARTETS:
9920
/* compute all quartets that can be built out of the four pre-defined groups */
9922
for(t1 = 0; t1 < groupSize[0]; t1++)
9923
for(t2 = 0; t2 < groupSize[1]; t2++)
9924
for(t3 = 0; t3 < groupSize[2]; t3++)
9925
for(t4 = 0; t4 < groupSize[3]; t4++)
9934
if((quartetCounter % (unsigned long int)(processes - 1)) == (unsigned long int)(processID - 1))
9936
computeAllThreeQuartets(tr, q1, q2, i1, i2, i3, i4, f);
9940
printBothOpen("\nComputed all %u possible grouped quartets\n", quartetCounter);
9949
startQuartetMaster(tr, f);
9955
MPI_Send(&dummy, 1, MPI_INT, 0, I_AM_DONE, MPI_COMM_WORLD);
9961
printBothOpen("\nPure quartet computation time: %f secs\n", t);
9963
printBothOpen("\nAll quartets and corresponding likelihoods written to file %s\n", quartetFileName);
9971
static void thoroughTreeOptimization(tree *tr, analdef *adef, rawdata *rdta, cruncheddata *cdta)
9974
bestTreeFileName[1024];
9979
initModel(tr, rdta, cdta, adef);
9981
getStartingTree(tr, adef);
9983
modOpt(tr, adef, TRUE, adef->likelihoodEpsilon);
9986
tr->doCutoff = FALSE;
9988
printBothOpen("\nStart likelihood: %f\n\n", tr->likelihood);
9990
treeOptimizeThorough(tr, 1, 10);
9991
evaluateGenericInitrav(tr, tr->start);
9993
modOpt(tr, adef, TRUE, adef->likelihoodEpsilon);
9995
printBothOpen("End likelihood: %f\n\n", tr->likelihood);
9997
printModelParams(tr, adef);
9999
strcpy(bestTreeFileName, workdir);
10000
strcat(bestTreeFileName, "RAxML_bestTree.");
10001
strcat(bestTreeFileName, run_id);
10003
Tree2String(tr->tree_string, tr, tr->start->back, TRUE, TRUE, FALSE, FALSE, TRUE, adef, SUMMARIZE_LH, FALSE, FALSE, FALSE, FALSE);
10004
f = myfopen(bestTreeFileName, "wb");
10005
fprintf(f, "%s", tr->tree_string);
10008
printBothOpen("Best-scoring ML tree written to: %s\n\n", bestTreeFileName);
10011
static void evaluateSD(tree *tr, double bestLH, double *bestVector, double weightSum, int configuration, int i, FILE *f)
10022
evaluateGenericInitrav(tr, tr->start);
10023
evaluateGenericVector(tr, tr->start);
10025
currentLH = tr->likelihood;
10027
printBothOpen("Configuration %d Likelihood: %f\n", configuration, tr->likelihood);
10029
fprintf(f, "tr%d\t", configuration);
10031
if(currentLH > bestLH)
10032
printBothOpen("WARNING tree with ancestral sequence taxon %s has a better likelihood %f > %f than the reference tree!\n", tr->nameList[i], currentLH, bestLH);
10034
for (k = 0; k < tr->cdta->endsite; k++)
10040
temp = bestVector[k] - tr->perSiteLL[k],
10041
wtemp = tr->cdta->aliaswgt[k] * temp;
10043
for(w = 0; w < tr->cdta->aliaswgt[k]; w++)
10044
fprintf(f, "%f ", tr->perSiteLL[k]);
10047
sum2 += wtemp * temp;
10052
sd = sqrt( weightSum * (sum2 - sum * sum / weightSum) / (weightSum - 1) );
10054
printBothOpen("Ancestral Taxon: %s Likelihood: %f D(LH): %f SD: %f \nSignificantly Worse: %s (5%s), %s (2%s), %s (1%s)\n",
10055
tr->nameList[i], currentLH, currentLH - bestLH, sd,
10056
(sum > 1.95996 * sd) ? "Yes" : " No", "%",
10057
(sum > 2.326 * sd) ? "Yes" : " No", "%",
10058
(sum > 2.57583 * sd) ? "Yes" : " No", "%");
10060
printBothOpen("\n");
10063
static void ancestralSequenceTest(tree *tr)
10066
*f = myfopen(quartetGroupingFileName, "r");
10071
*candidateAncestorList = (int *)rax_calloc((tr->mxtips + 1), sizeof(int)),
10072
numberOfCandidateAncestors = 0;
10077
*bestVector = (double*)rax_malloc(sizeof(double) * tr->cdta->endsite);
10079
assert(tr->useFastScaling == FALSE);
10081
for(i = 0; i < tr->cdta->endsite; i++)
10082
weightSum += (double)(tr->cdta->aliaswgt[i]);
10084
evaluateGenericInitrav(tr, tr->start);
10085
evaluateGenericVector(tr, tr->start);
10087
bestLH = tr->likelihood;
10089
memcpy(bestVector, tr->perSiteLL, tr->cdta->endsite * sizeof(double));
10091
printBothOpen("Likelihood of reference tree: %f\n\n\n", tr->likelihood);
10093
while((ch = getc(f)) != EOF)
10102
n = treeFindTipName(f, tr, FALSE);
10104
if(n <= 0 || n > tr->mxtips)
10105
printf("parsing error, raxml is expecting to read a taxon name that is contained in the reference tree you passed!\n");
10107
assert(n > 0 && n <= tr->mxtips);
10109
candidateAncestorList[n] = 1;
10110
numberOfCandidateAncestors++;
10116
for(i = 1; i <= tr->mxtips; i++)
10118
if(candidateAncestorList[i])
10130
attachmentBranch[NUM_BRANCHES],
10131
leftBranch[NUM_BRANCHES],
10132
rightBranch[NUM_BRANCHES];
10140
strcpy(fileName, workdir);
10141
strcat(fileName, "RAxML_ancestralTest.");
10142
strcat(fileName, tr->nameList[i]);
10143
strcat(fileName, ".");
10144
strcat(fileName, run_id);
10146
f = myfopen(fileName, "w");
10148
fprintf(f, " 3 %d\n", tr->rdta->sites);
10150
assert(strcmp(tr->nameList[i], tr->nameList[p->number]) == 0);
10152
printBothOpen("Checking if %s is a candidate ancestor\n\n", tr->nameList[i]);
10153
printBothOpen("Per site log likelihoods for the three configurations will be written to file %s\n\n", fileName);
10155
memcpy(attachmentBranch, p->z, sizeof(double) * NUM_BRANCHES);
10156
memcpy(leftBranch, l->z, sizeof(double) * NUM_BRANCHES);
10157
memcpy(rightBranch, r->z, sizeof(double) * NUM_BRANCHES);
10162
for(k = 0; k < NUM_BRANCHES; k++)
10163
p->z[k] = q->z[k] = zmax;
10165
evaluateSD(tr, bestLH, bestVector, weightSum, 1, i, f);
10167
memcpy(p->z, attachmentBranch, sizeof(double) * NUM_BRANCHES);
10168
memcpy(p->back->z, attachmentBranch, sizeof(double) * NUM_BRANCHES);
10170
evaluateGenericInitrav(tr, tr->start);
10171
assert(tr->likelihood == bestLH);
10175
for(k = 0; k < NUM_BRANCHES; k++)
10177
p->z[k] = q->z[k] = zmax;
10178
l->z[k] = l->back->z[k] = zmax;
10181
evaluateSD(tr, bestLH, bestVector, weightSum, 2, i, f);
10183
memcpy(p->z, attachmentBranch, sizeof(double) * NUM_BRANCHES);
10184
memcpy(p->back->z, attachmentBranch, sizeof(double) * NUM_BRANCHES);
10185
memcpy(l->z, leftBranch, sizeof(double) * NUM_BRANCHES);
10186
memcpy(l->back->z, leftBranch, sizeof(double) * NUM_BRANCHES);
10188
evaluateGenericInitrav(tr, tr->start);
10189
assert(tr->likelihood == bestLH);
10193
for(k = 0; k < NUM_BRANCHES; k++)
10195
p->z[k] = q->z[k] = zmax;
10196
r->z[k] = r->back->z[k] = zmax;
10199
evaluateSD(tr, bestLH, bestVector, weightSum, 3, i, f);
10201
memcpy(p->z, attachmentBranch, sizeof(double) * NUM_BRANCHES);
10202
memcpy(p->back->z, attachmentBranch, sizeof(double) * NUM_BRANCHES);
10203
memcpy(r->z, rightBranch, sizeof(double) * NUM_BRANCHES);
10204
memcpy(r->back->z, rightBranch, sizeof(double) * NUM_BRANCHES);
10206
evaluateGenericInitrav(tr, tr->start);
10207
assert(tr->likelihood == bestLH);
10209
printBothOpen("\n\n");
10214
printBothOpen("good-bye\n\n");
10216
rax_free(candidateAncestorList);
10217
rax_free(bestVector);
10221
static double distancesInitial(nodeptr p, double *distances, tree *tr, boolean fullTraversal)
10223
if(isTip(p->number, tr->mxtips))
10233
if(fullTraversal || !p->x)
10239
acc += distancesInitial(q->back, distances, tr, fullTraversal);
10243
distances[p->number] = acc;
10246
p->next->next->x = 0;
10249
acc = distances[p->number];
10251
return acc + p->z[0];
10257
static void distancesNewview(nodeptr p, double *distances, tree *tr, nodeptr *rootBranch, double *minimum)
10266
if(isTip(p->number, tr->mxtips))
10270
if(!isTip(q->number, tr->mxtips))
10273
distancesInitial(q, distances, tr, FALSE);
10274
left = distances[q->number];
10277
if(left <= p->z[0])
10279
//the balanced root is in this branch
10286
diff = left - p->z[0];
10288
if(diff < *minimum)
10299
if(!isTip(q->number, tr->mxtips))
10302
distancesInitial(q, distances, tr, FALSE);
10304
left = distances[q->number];
10309
if(!isTip(p->number, tr->mxtips))
10312
distancesInitial(p, distances, tr, FALSE);
10314
right = distances[p->number];
10319
if(ABS(left - right) <= p->z[0])
10330
diff = left - (right + p->z[0]);
10332
diff = right - (left + p->z[0]);
10334
if(*minimum > diff)
10345
distancesNewview(q->back, distances, tr, rootBranch, minimum);
10351
static void printTreeRec(FILE *f, nodeptr p, tree *tr, boolean rootDescendant, boolean printBranchLabels)
10353
if(isTip(p->number, tr->mxtips))
10356
fprintf(f, "%s", tr->nameList[p->number]);
10358
fprintf(f, "%s:%f", tr->nameList[p->number], p->z[0]);
10363
printTreeRec(f, p->next->back, tr, FALSE, printBranchLabels);
10365
printTreeRec(f, p->next->next->back, tr, FALSE, printBranchLabels);
10371
if(printBranchLabels && !isTip(p->number, tr->mxtips) && !isTip(p->back->number, tr->mxtips))
10373
assert(p->support == p->back->support);
10374
fprintf(f, "):%f[%d]", p->z[0], p->support);
10377
fprintf(f, "):%f", p->z[0]);
10382
static void printTree(nodeptr p, tree *tr, double *distances, FILE *f, boolean printBranchLabels)
10387
thisBranch = p->z[0],
10394
if(!isTip(p->number, tr->mxtips))
10397
distancesInitial(p, distances, tr, FALSE);
10399
left = distances[p->number];
10404
if(!isTip(q->number, tr->mxtips))
10407
distancesInitial(q, distances, tr, FALSE);
10409
right = distances[q->number];
10414
//printf("left %f right %f thisBranch %f\n", left, right, thisBranch);
10416
if(ABS(left - right) <= thisBranch)
10420
leftRoot = (right + thisBranch - left) / 2.0;
10421
rightRoot = thisBranch - leftRoot;
10425
rightRoot = (left + thisBranch - right) / 2.0;
10426
leftRoot = thisBranch - rightRoot;
10433
leftRoot = thisBranch;
10439
rightRoot = thisBranch;
10443
//descend into right subtree and print it
10446
printTreeRec(f, p, tr, TRUE, printBranchLabels);
10448
//finished right subtree, print attachment branch of right subtree
10449
//noew descent into left subtree
10451
if(printBranchLabels && !isTip(p->number, tr->mxtips) && !isTip(q->number, tr->mxtips))
10453
assert(p->support == q->support);
10454
fprintf(f, ":%f[%d], ", leftRoot, p->support);
10457
fprintf(f, ":%f, ", leftRoot);
10458
printTreeRec(f, q, tr, TRUE, printBranchLabels);
10460
//finished left subtree, now print its branch to the root node
10463
if(printBranchLabels && !isTip(p->number, tr->mxtips) && !isTip(q->number, tr->mxtips))
10465
assert(p->support == q->support);
10466
fprintf(f, ":%f[%d]);", rightRoot, q->support);
10469
fprintf(f, ":%f);", rightRoot);
10472
static void rootTree(tree *tr, analdef *adef)
10480
*distances = (double *)rax_malloc(sizeof(double) * 2 * tr->mxtips);
10483
rootedTreeFile[1024];
10486
*f = myfopen(tree_file, "r");
10492
printBranchLabels = FALSE;
10494
for(i = 0; i < 2 * tr->mxtips; i++)
10495
distances[i] = 0.0;
10497
strcpy(rootedTreeFile, workdir);
10498
strcat(rootedTreeFile, "RAxML_rootedTree.");
10499
strcat(rootedTreeFile, run_id);
10501
treeReadLen(f, tr, TRUE, FALSE, TRUE, adef, TRUE, TRUE);
10503
if(tr->branchLabelCounter > 0)
10505
assert(tr->branchLabelCounter == (tr->ntips - 3));
10506
printBranchLabels = TRUE;
10507
printBothOpen("\nYour input tree contains branch labels, these will also be printed in the output tree ...\n\n");
10512
minimum = checkDistances = distancesInitial(tr->start->back, distances, tr, TRUE);
10514
//printf("Tree Lenght: %f\n", checkDistances);
10516
f = myfopen(rootedTreeFile, "w");
10518
distancesNewview(tr->start->back, distances, tr, &rootBranch, &minimum);
10520
printTree(rootBranch, tr, distances, f, printBranchLabels);
10524
printBothOpen("RAxML-rooted tree using subtree length-balance printed to file:\n%s\n", rootedTreeFile);
10526
rax_free(distances);
10529
int main (int argc, char *argv[])
6757
10531
rawdata *rdta;
6758
10532
cruncheddata *cdta;
6763
MPI_Init(&argc, &argv);
6764
MPI_Comm_rank(MPI_COMM_WORLD, &processID);
6765
MPI_Comm_size(MPI_COMM_WORLD, &numOfWorkers);
6767
printf("\nThis is the RAxML MPI Master process\n");
6769
printf("\nThis is the RAxML MPI Worker Process Number: %d\n", processID);
10538
countOtherModel = 0;
10540
#if (defined(_USE_PTHREADS) && !defined(_PORTABLE_PTHREADS))
10544
#if (defined(_WAYNE_MPI) || defined (_QUARTET_MPI))
10545
MPI_Init(&argc, &argv);
10546
MPI_Comm_rank(MPI_COMM_WORLD, &processID);
10547
MPI_Comm_size(MPI_COMM_WORLD, &processes);
10548
printf("\nThis is RAxML MPI Process Number: %d\n", processID);
6771
10550
processID = 0;
6774
masterTime = gettime();
6776
adef = (analdef *)malloc(sizeof(analdef));
6777
rdta = (rawdata *)malloc(sizeof(rawdata));
6778
cdta = (cruncheddata *)malloc(sizeof(cruncheddata));
6779
tr = (tree *)malloc(sizeof(tree));
6782
get_args(argc,argv, adef, tr);
6784
if(adef->model == M_PROTCAT || adef->model == M_GTRCAT)
6785
tr->rateHetModel = CAT;
10553
masterTime = gettime();
10556
globalArgv = (char **)rax_malloc(sizeof(char *) * argc);
10557
for(i = 0; i < argc; i++)
10558
globalArgv[i] = argv[i];
10562
#if ! (defined(__ppc) || defined(__powerpc__) || defined(PPC))
10565
David Defour's command
10566
_mm_setcsr( _mm_getcsr() | (_MM_FLUSH_ZERO_ON | MM_DAZ_ON));
10569
_mm_setcsr( _mm_getcsr() | _MM_FLUSH_ZERO_ON);
10573
adef = (analdef *)rax_malloc(sizeof(analdef));
10574
rdta = (rawdata *)rax_malloc(sizeof(rawdata));
10575
cdta = (cruncheddata *)rax_malloc(sizeof(cruncheddata));
10576
tr = (tree *)rax_malloc(sizeof(tree));
10578
/* initialize lookup table for fast bit counter */
10580
compute_bits_in_16bits();
10583
get_args(argc,argv, adef, tr);
10586
if(adef->readTaxaOnly)
6788
if(adef->useInvariant)
6789
tr->rateHetModel = GAMMA_I;
10588
if(adef->mode == PLAUSIBILITY_CHECKER || adef->mode == ROOT_TREE)
10589
extractTaxaFromTopology(tr, rdta, cdta, tree_file);
6791
tr->rateHetModel = GAMMA;
10591
extractTaxaFromTopology(tr, rdta, cdta, bootStrapFile);
6795
This is a very ugly numerical bug fix, that intends to avoid the unaesthetic phenomena
6796
that can occur during model param optimization due to the dependency between parameters
6797
alpha and invar which are NOT independent from each other.
6798
When using P-Invar set likelihood epsilon to a lower value!
6800
TODO-MIX this is very ugly !
10594
getinput(adef, rdta, cdta, tr);
10596
checkOutgroups(tr, adef);
10599
#if (defined(_WAYNE_MPI) || defined (_QUARTET_MPI))
10600
MPI_Barrier(MPI_COMM_WORLD);
6804
10603
if(adef->useInvariant && adef->likelihoodEpsilon > 0.001)
6805
adef->likelihoodEpsilon = 0.001;
6807
readData(adef, rdta, cdta, tr);
6809
checkOutgroups(tr, adef);
10605
printBothOpen("\nYou are using a proportion of Invariable sites estimate, although I don't\n");
10606
printBothOpen("like it. The likelihood epsilon \"-f e\" will be automatically lowered to 0.001\n");
10607
printBothOpen("to avoid unfavorable effects caused by simultaneous optimization of alpha and P-Invar\n");
10609
adef->likelihoodEpsilon = 0.001;
10614
switch back to model without secondary structure for all this
10618
if(adef->useSecondaryStructure)
10620
tr->dataVector = tr->initialDataVector;
10621
tr->partitionData = tr->initialPartitionData;
10622
tr->NumberOfModels--;
6812
10625
if(adef->useExcludeFile)
6814
10627
handleExcludeFile(tr, adef, rdta);
10632
if(!adef->readTaxaOnly && adef->mode != FAST_SEARCH && adef->mode != SH_LIKE_SUPPORTS)
10633
checkSequences(tr, rdta, adef);
6818
if(adef->mode != SEQUENCE_SIMILARITY_FILTER)
6820
checkSequences(tr, rdta, adef);
6824
reduceBySequenceSimilarity(tr, rdta, adef);
6828
10636
if(adef->mode == SPLIT_MULTI_GENE)
6830
10638
splitMultiGene(tr, rdta);
6834
10642
if(adef->mode == CHECK_ALIGNMENT)
6836
10644
printf("Alignment format can be read by RAxML \n");
6840
makeweights(adef, rdta, cdta, tr);
6841
makevalues(rdta, cdta, tr, adef);
6843
if(adef->generateBS)
10649
switch back to model with secondary structure for all this
10653
if(adef->useSecondaryStructure && !adef->readTaxaOnly)
10655
tr->dataVector = tr->extendedDataVector;
10656
tr->partitionData = tr->extendedPartitionData;
10657
tr->NumberOfModels++;
10658
/* might as well rax_free the initial structures here */
10662
if(!adef->readTaxaOnly)
10668
makeweights(adef, rdta, cdta, tr);
10669
makevalues(rdta, cdta, tr, adef);
10671
for(i = 0; i < tr->NumberOfModels; i++)
10673
if(!(tr->partitionData[i].dataType == AA_DATA || tr->partitionData[i].dataType == DNA_DATA))
10676
if(tr->partitionData[i].protModels == LG4 || tr->partitionData[i].protModels == LG4X)
10679
if(tr->partitionData[i].dataType == AA_DATA)
10681
if(tr->partitionData[i].protModels == GTR || tr->partitionData[i].protModels == GTR_UNLINKED)
10692
printf("Error: the LG4 substitution model does not work in combination with the \"-U\" memory saving flag!\n\n");
10696
if(adef->useInvariant)
10698
printf("Error: the LG4 substitution model does not work for proportion of invariavble sites estimates!\n\n");
10704
printf("Error: the LG4 substitution model does not work with the CAT model of rate heterogeneity!\n\n");
10709
if(tr->saveMemory && countNonSev > 0)
10711
printf("\nError, you want to use the SEV-based memory saving technique for large gappy datasets with missing data.\n");
10712
printf("However, this is only implelemented for DNA and protein data partitions, one of your partitions is neither DNA\n");
10713
printf("nor protein data ... exiting to prevent bad things from happening ;-) \n\n");
10719
if(countGTR > 0 && countOtherModel > 0)
10721
printf("Error, it is only allowed to conduct partitioned AA analyses\n");
10722
printf("with a GTR model of AA substitution, if not all AA partitions are assigned\n");
10723
printf("the GTR or GTR_UNLINKED model.\n\n");
10725
printf("The following partitions do not use GTR:\n");
10727
for(i = 0; i < tr->NumberOfModels; i++)
10729
if(tr->partitionData[i].dataType == AA_DATA && (tr->partitionData[i].protModels != GTR || tr->partitionData[i].protModels != GTR_UNLINKED))
10730
printf("Partition %s\n", tr->partitionData[i].partitionName);
10732
printf("exiting ...\n");
10736
if(countGTR > 0 && tr->NumberOfModels > 1)
10738
FILE *info = myfopen(infoFileName, "ab");
10740
printBoth(info, "You are using the GTR model of AA substitution!\n");
10741
printBoth(info, "GTR parameters for AA substiution will automatically be estimated\n");
10742
printBoth(info, "either jointly (GTR params will be linked) or independently (when using GTR_UNLINKED) across all partitions.\n");
10743
printBoth(info, "WARNING: you may be over-parametrizing the model!\n\n\n");
10749
if(adef->mode == CLASSIFY_ML || adef->mode == CLASSIFY_MP)
10750
tr->innerNodes = (size_t)(countTaxaInTopology() - 1);
10752
tr->innerNodes = tr->mxtips;
10755
setRateHetAndDataIncrement(tr, adef);
10757
#ifdef _USE_PTHREADS
10759
masterBarrier(THREAD_INIT_PARTITION, tr);
10760
if(!adef->readTaxaOnly)
10761
masterBarrier(THREAD_ALLOC_LIKELIHOOD, tr);
10763
if(!adef->readTaxaOnly)
10767
printModelAndProgramInfo(tr, adef, argc, argv);
10772
getStartingTree(tr, adef);
10776
if(adef->useBinaryModelFile)
10778
assert(tr->rateHetModel != CAT);
10779
readBinaryModel(tr);
10782
initModel(tr, rdta, cdta, adef);
10784
getStartingTree(tr, adef);
6845
10788
generateBS(tr, adef);
6849
#ifdef _USE_PTHREADS
6853
if(adef->computeELW)
6854
computeELW(tr, adef, bootStrapFile);
6859
initModel(tr, rdta, cdta, adef);
6861
printModelAndProgramInfo(tr, adef, argc, argv);
6863
if(adef->bootStopOnly > 0)
6865
computeBootStopOnly(tr, adef, bootStrapFile);
6872
printf("OPT_ARNDT\n");
6873
getStartingTree(tr, adef);
6874
optimizeArndt(tr, adef);
6877
getStartingTree(tr, adef);
6878
determineSequencePosition(tr, adef);
10792
computeELW(tr, adef, bootStrapFile);
10796
initModel(tr, rdta, cdta, adef);
10797
computeAllLHs(tr, adef, bootStrapFile);
10800
case COMPUTE_BIPARTITION_CORRELATION:
10801
compareBips(tr, bootStrapFile, adef);
10804
case COMPUTE_RF_DISTANCE:
10805
computeRF(tr, bootStrapFile, adef);
10808
case BOOTSTOP_ONLY:
10809
computeBootStopOnly(tr, bootStrapFile, adef);
10812
case CONSENSUS_ONLY:
10813
if(adef->leaveDropMode)
10814
computeRogueTaxa(tr, bootStrapFile, adef);
10816
computeConsensusOnly(tr, bootStrapFile, adef, adef->calculateIC);
10819
case DISTANCE_MODE:
10820
initModel(tr, rdta, cdta, adef);
10821
getStartingTree(tr, adef);
10822
computeDistances(tr, adef);
6880
10824
case PARSIMONY_ADDITION:
10825
initModel(tr, rdta, cdta, adef);
6881
10826
getStartingTree(tr, adef);
6882
printStartingTree(tr, adef, TRUE);
6884
case OPTIMIZE_RATES:
6885
if(adef->computePerSiteLLs)
6886
computePerSiteLLs(tr, adef, bootStrapFile);
6888
optimizeRatesOnly(tr, adef);
6890
case TREE_EVALUATION:
10827
printStartingTree(tr, adef, TRUE);
10830
initModel(tr, rdta, cdta, adef);
10831
computePerSiteLLs(tr, adef, bootStrapFile);
10833
case TREE_EVALUATION:
10834
initModel(tr, rdta, cdta, adef);
6891
10836
getStartingTree(tr, adef);
6892
if(adef->likelihoodTest)
10838
if(adef->likelihoodTest)
6893
10839
computeLHTest(tr, adef, bootStrapFile);
6897
printLog(tr, adef, TRUE);
6898
printResult(tr, adef, TRUE);
10842
if(adef->useBinaryModelFile)
10844
readBinaryModel(tr);
10845
evaluateGenericInitrav(tr, tr->start);
10846
treeEvaluate(tr, 2);
10850
modOpt(tr, adef, TRUE, adef->likelihoodEpsilon);
10851
writeBinaryModel(tr);
10854
printLog(tr, adef, TRUE);
10855
printResult(tr, adef, TRUE);
6901
case CALC_BIPARTITIONS:
6902
calcBipartitions(tr, adef, tree_file, bootStrapFile);
10859
case ANCESTRAL_STATES:
10860
initModel(tr, rdta, cdta, adef);
10862
getStartingTree(tr, adef);
10864
modOpt(tr, adef, TRUE, adef->likelihoodEpsilon);
10866
evaluateGenericInitrav(tr, tr->start);
10868
computeAncestralStates(tr, tr->likelihood);
10870
case QUARTET_CALCULATION:
10871
computeQuartets(tr, adef, rdta, cdta);
10873
case THOROUGH_OPTIMIZATION:
10874
thoroughTreeOptimization(tr, adef, rdta, cdta);
10876
case CALC_BIPARTITIONS:
10877
calcBipartitions(tr, adef, tree_file, bootStrapFile);
10879
case CALC_BIPARTITIONS_IC:
10880
calcBipartitions_IC(tr, adef, tree_file, bootStrapFile);
6904
10882
case BIG_RAPID_MODE:
6906
10884
doBootstrap(tr, adef, rdta, cdta);
6909
10887
if(adef->rapidBoot)
6912
doAllInOneVincent(tr, adef);
10889
initModel(tr, rdta, cdta, adef);
6914
10890
doAllInOne(tr, adef);
6918
doInference(tr, adef, rdta, cdta);
10893
doInference(tr, adef, rdta, cdta);
10896
case MORPH_CALIBRATOR:
10897
initModel(tr, rdta, cdta, adef);
10898
getStartingTree(tr, adef);
10899
evaluateGenericInitrav(tr, tr->start);
10900
modOpt(tr, adef, TRUE, adef->likelihoodEpsilon);
10901
morphologicalCalibration(tr, adef);
10904
fastSearch(tr, adef, rdta, cdta);
10906
case SH_LIKE_SUPPORTS:
10907
shSupports(tr, adef, rdta, cdta);
10909
case EPA_SITE_SPECIFIC_BIAS:
10910
initModel(tr, rdta, cdta, adef);
10911
getStartingTree(tr, adef);
10912
modOpt(tr, adef, TRUE, adef->likelihoodEpsilon);
10913
computePlacementBias(tr, adef);
10915
case OPTIMIZE_BR_LEN_SCALER:
10916
initModel(tr, rdta, cdta, adef);
10918
getStartingTree(tr, adef);
10919
evaluateGenericInitrav(tr, tr->start);
10920
modOpt(tr, adef, FALSE, adef->likelihoodEpsilon);
10922
printBothOpen("Likelihood: %f\n", tr->likelihood);
10925
case ANCESTRAL_SEQUENCE_TEST:
10926
initModel(tr, rdta, cdta, adef);
10928
getStartingTree(tr, adef);
10930
evaluateGenericInitrav(tr, tr->start);
10931
modOpt(tr, adef, FALSE, adef->likelihoodEpsilon);
10933
ancestralSequenceTest(tr);
10935
case PLAUSIBILITY_CHECKER:
10936
plausibilityChecker(tr, adef);
10940
rootTree(tr, adef);
6925
10946
finalizeInfoFile(tr, adef);
10948
#if (defined(_WAYNE_MPI) || defined (_QUARTET_MPI))
6928
10949
MPI_Finalize();