2
// File: PhylogeneticsApplicationTools.cpp
3
// Created by: Julien Dutheil
4
// Created on: Fri Oct 21 16:49 2005
5
// from old file ApplicationTools.cpp created on Sun Dec 14 09:36:26 2003
9
Copyright or Ā© or Copr. Bio++ Development Team, (November 16, 2004)
11
This software is a computer program whose purpose is to provide classes
12
for phylogenetic data analysis.
14
This software is governed by the CeCILL license under French law and
15
abiding by the rules of distribution of free software. You can use,
16
modify and/ or redistribute the software under the terms of the CeCILL
17
license as circulated by CEA, CNRS and INRIA at the following URL
18
"http://www.cecill.info".
20
As a counterpart to the access to the source code and rights to copy,
21
modify and redistribute granted by the license, users are provided only
22
with a limited warranty and the software's author, the holder of the
23
economic rights, and the successive licensors have only limited
26
In this respect, the user's attention is drawn to the risks associated
27
with loading, using, modifying and/or developing or reproducing the
28
software by the user in light of its specific status of free software,
29
that may mean that it is complicated to manipulate, and that also
30
therefore means that it is reserved for developers and experienced
31
professionals having in-depth computer knowledge. Users are therefore
32
encouraged to load and test the software's suitability as regards their
33
requirements in conditions enabling the security of their systems and/or
34
data to be ensured and, more generally, to use and operate it in the
35
same conditions as regards security.
37
The fact that you are presently reading this means that you have had
38
knowledge of the CeCILL license and that you accept its terms.
41
#include "PhylogeneticsApplicationTools.h"
42
#include "../Model.all"
43
#include "../Likelihood.all"
44
#include "../OptimizationTools.h"
46
#include "../Io/Newick.h"
47
#include "../Io/NexusIoTree.h"
48
#include "../Io/Nhx.h"
50
#include <Bpp/Io/FileTools.h>
51
#include <Bpp/Text/TextTools.h>
52
#include <Bpp/App/ApplicationTools.h>
53
#include <Bpp/Text/StringTokenizer.h>
54
#include <Bpp/Text/KeyvalTools.h>
55
#include <Bpp/Numeric/Prob.all>
56
#include <Bpp/Numeric/Function.all>
59
#include <Bpp/Seq/Alphabet/AlphabetTools.h>
60
#include <Bpp/Seq/Container/SequenceContainerTools.h>
61
#include <Bpp/Seq/App/SequenceApplicationTools.h>
73
/******************************************************************************/
75
Tree* PhylogeneticsApplicationTools::getTree(
76
map<string, string>& params,
79
bool suffixIsOptional,
80
bool verbose) throw (Exception)
82
string format = ApplicationTools::getStringParameter(prefix + "tree.format", params, "Newick", suffix, suffixIsOptional, true);
83
string treeFilePath = ApplicationTools::getAFilePath(prefix + "tree.file", params, true, true, suffix, suffixIsOptional);
86
if (format == "Newick")
87
treeReader = new Newick(true);
88
else if (format == "Nexus")
89
treeReader = new NexusIOTree();
90
else if (format == "NHX")
91
treeReader = new Nhx();
92
else throw Exception("Unknow format for tree reading: " + format);
93
Tree* tree = treeReader->read(treeFilePath);
96
if (verbose) ApplicationTools::displayResult("Tree file", treeFilePath);
100
/******************************************************************************/
102
vector<Tree*> PhylogeneticsApplicationTools::getTrees(
103
map<string, string>& params,
104
const string& prefix,
105
const string& suffix,
106
bool suffixIsOptional,
107
bool verbose) throw (Exception)
109
string format = ApplicationTools::getStringParameter(prefix + "trees.format", params, "Newick", suffix, suffixIsOptional, true);
110
string treeFilePath = ApplicationTools::getAFilePath(prefix + "trees.file", params, true, true, suffix, suffixIsOptional);
112
IMultiTree* treeReader;
113
if (format == "Newick")
114
treeReader = new Newick(true);
115
else if (format == "Nexus")
116
treeReader = new NexusIOTree();
117
else if (format == "NHX")
118
treeReader = new Nhx();
119
else throw Exception("Unknow format for tree reading: " + format);
121
treeReader->read(treeFilePath, trees);
126
ApplicationTools::displayResult("Tree file", treeFilePath);
127
ApplicationTools::displayResult("Number of trees in file", trees.size());
132
/******************************************************************************/
134
SubstitutionModel* PhylogeneticsApplicationTools::getSubstitutionModelDefaultInstance(
135
const Alphabet* alphabet,
136
const string& modelDescription,
137
map<string, string>& unparsedParameterValues,
141
bool verbose) throw (Exception)
143
SubstitutionModel* model = 0;
144
string modelName = "", left = "";
145
map<string, string> args;
147
KeyvalTools::parseProcedure(modelDescription, modelName, args);
149
bool word = ((modelName == "Word") || (modelName == "Triplet") || (modelName == "CodonNeutral")
150
|| (modelName == "CodonAsynonymous"));
152
bool wordfreq = ((modelName == "CodonAsynonymousFrequencies")
153
|| (modelName == "CodonNeutralFrequencies"));
155
// //////////////////////////////////
157
// ////////////////////////////////
159
if (modelName == "MixedModel" && allowMixed)
161
map<string, string> unparsedParameterValuesNested;
162
if (args.find("model") == args.end())
163
throw Exception("The argument 'model' is missing from MixedSubstitutionModel description");
164
string nestedModelDescription = args["model"];
165
SubstitutionModel* pSM = getSubstitutionModelDefaultInstance(alphabet,
166
nestedModelDescription,
167
unparsedParameterValuesNested,
172
map<string, DiscreteDistribution*> mdist;
173
map<string, string> unparsedParameterValuesNested2, unparsedParameterValuesNested3;
175
for (map<string, string>::iterator it = unparsedParameterValuesNested.begin();
176
it != unparsedParameterValuesNested.end(); it++)
178
if (it->second.find("(") != string::npos)
180
unparsedParameterValuesNested3.clear();
181
mdist[pSM->getParameterNameWithoutNamespace(it->first)] = getDistributionDefaultInstance(it->second, unparsedParameterValuesNested3, true);
182
for (map<string, string>::iterator it2 = unparsedParameterValuesNested3.begin();
183
it2 != unparsedParameterValuesNested3.end(); it2++)
185
unparsedParameterValuesNested2[it->first + "_" + it2->first] = it2->second;
189
unparsedParameterValuesNested2[it->first] = it->second;
192
for (map<string, string>::iterator it = unparsedParameterValuesNested2.begin();
193
it != unparsedParameterValuesNested2.end(); it++)
195
unparsedParameterValues[it->first] = it->second;
200
if (args.find("from") != args.end())
201
fi=alphabet->charToInt(args["from"]);
202
if (args.find("to") != args.end())
203
ti=alphabet->charToInt(args["to"]);
205
model = new MixtureOfASubstitutionModel(alphabet, pSM, mdist, fi, ti);
207
vector<string> v = model->getParameters().getParameterNames();
209
for (map<string, DiscreteDistribution*>::iterator it = mdist.begin();
210
it != mdist.end(); it++)
216
ApplicationTools::displayResult("Mixture Of A Substitution Model", nestedModelDescription );
218
else if (modelName == "Mixture" && allowMixed)
220
vector<string> v_nestedModelDescription;
221
vector<SubstitutionModel*> v_pSM;
222
map<string, string> unparsedParameterValuesNested;
224
if (args.find("model1") == args.end())
226
throw Exception("Missing argument 'model1' for model " + modelName + ".");
228
unsigned int nbmodels = 0;
230
while (args.find("model" + TextTools::toString(nbmodels + 1)) != args.end())
232
v_nestedModelDescription.push_back(args["model" + TextTools::toString(++nbmodels)]);
236
throw Exception("Missing nested models for model " + modelName + ".");
237
for (unsigned i = 0; i < v_nestedModelDescription.size(); i++)
239
unparsedParameterValuesNested.clear();
240
model = getSubstitutionModelDefaultInstance(alphabet, v_nestedModelDescription[i], unparsedParameterValuesNested, false, true, false, false);
241
for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
243
unparsedParameterValues[modelName + "." + TextTools::toString(i + 1) + "_" + it->first] = it->second;
245
v_pSM.push_back(model);
248
model = new MixtureOfSubstitutionModels(alphabet, v_pSM);
250
ApplicationTools::displayResult("Mixture Of Substitution Models", modelName );
253
// /////////////////////////////////
254
// / WORDS and CODONS defined by models
255
// ///////////////////////////////
259
vector<string> v_nestedModelDescription;
260
vector<SubstitutionModel*> v_pSM;
261
const WordAlphabet* pWA;
263
string s, nestedModelDescription;
264
unsigned int nbmodels;
266
if ((modelName == "Word" && !AlphabetTools::isWordAlphabet(alphabet)) ||
267
(modelName != "Word" && !AlphabetTools::isCodonAlphabet(alphabet)))
268
throw Exception("Bad alphabet type "
269
+ alphabet->getAlphabetType() + " for model " + modelName + ".");
271
pWA = dynamic_cast<const WordAlphabet*>(alphabet);
273
if (args.find("model") != args.end())
275
nestedModelDescription = args["model"];
276
if (modelName == "Word")
278
v_nestedModelDescription.push_back(nestedModelDescription);
279
nbmodels = pWA->getLength();
283
v_nestedModelDescription.push_back(nestedModelDescription);
289
if (args.find("model1") == args.end())
291
throw Exception("Missing argument 'model' or 'model1' for model " + modelName + ".");
295
while (args.find("model" + TextTools::toString(nbmodels + 1)) != args.end())
297
v_nestedModelDescription.push_back(args["model" + TextTools::toString(++nbmodels)]);
302
throw Exception("Missing nested models for model " + modelName + ".");
304
if (pWA->getLength() != nbmodels)
305
throw Exception("Bad alphabet type "
306
+ alphabet->getAlphabetType() + " for model " + modelName + ".");
308
map<string, string> unparsedParameterValuesNested;
310
if (v_nestedModelDescription.size() != nbmodels)
312
model = getSubstitutionModelDefaultInstance(pWA->getNAlphabet(0), v_nestedModelDescription[0], unparsedParameterValuesNested, false, true, false, false);
313
for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
315
unparsedParameterValues[modelName + "._" + it->first] = it->second;
317
v_pSM.push_back(model);
321
for (unsigned i = 0; i < v_nestedModelDescription.size(); i++)
323
unparsedParameterValuesNested.clear();
324
model = getSubstitutionModelDefaultInstance(pWA->getNAlphabet(i), v_nestedModelDescription[i], unparsedParameterValuesNested, false, true, false, false);
325
for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
327
unparsedParameterValues[modelName + "." + TextTools::toString(i + 1) + "_" + it->first] = it->second;
329
v_pSM.push_back(model);
333
// /////////////////////////////////
335
// ///////////////////////////////
337
if (modelName == "Word")
339
model = (v_nestedModelDescription.size() != nbmodels)
340
? new WordReversibleSubstitutionModel(v_pSM[0], nbmodels)
341
: new WordReversibleSubstitutionModel(v_pSM);
344
// /////////////////////////////////
346
// ///////////////////////////////
348
else if (modelName == "Triplet")
350
if (dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[0]) == 0)
351
throw Exception("Non simple NucleotideSubstitutionModel imbedded in " + modelName + " model.");
353
if (v_nestedModelDescription.size() != 3)
354
model = new TripletReversibleSubstitutionModel(
355
dynamic_cast<const CodonAlphabet*>(pWA),
356
dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[0]));
359
if (dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[1]) == 0 || dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[2]) == 0)
360
throw Exception("Non simple NucleotideSubstitutionModel imbedded in " + modelName + " model.");
362
model = new TripletReversibleSubstitutionModel(
363
dynamic_cast<const CodonAlphabet*>(pWA),
364
dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[0]),
365
dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[1]),
366
dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[2]));
370
// /////////////////////////////////
372
// ///////////////////////////////
374
else if (modelName == "CodonNeutral")
376
if (dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[0]) == 0)
377
throw Exception("Non simple NucleotideSubstitutionModel imbedded in " + modelName + " model.");
379
if (v_nestedModelDescription.size() != 3)
380
model = new CodonNeutralReversibleSubstitutionModel(
381
dynamic_cast<const CodonAlphabet*>(pWA),
382
dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[0]));
385
if (dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[1]) == 0 || dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[2]) == 0)
386
throw Exception("Non simple NucleotideSubstitutionModel imbedded in " + modelName + " model.");
388
model = new CodonNeutralReversibleSubstitutionModel(
389
dynamic_cast<const CodonAlphabet*>(pWA),
390
dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[0]),
391
dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[1]),
392
dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[2]));
396
// /////////////////////////////////
397
// / CODON ASYNONYMOUS
398
// ///////////////////////////////
400
else if (modelName == "CodonAsynonymous")
402
if (args.find("genetic_code") == args.end())
403
args["genetic_code"] = pWA->getAlphabetType();
405
GeneticCode* pgc = SequenceApplicationTools::getGeneticCode(dynamic_cast<const NucleicAlphabet*>(pWA->getNAlphabet(0)), args["genetic_code"]);
406
if (pgc->getSourceAlphabet()->getAlphabetType() != pWA->getAlphabetType())
407
throw Exception("Mismatch between genetic code and codon alphabet");
409
AlphabetIndex2<double> * pai2;
411
if (args.find("aadistance") == args.end())
414
pai2 = SequenceApplicationTools::getAADistance(args["aadistance"]);
416
if (dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[0]) == 0)
417
throw Exception("Non simple NucleotideSubstitutionModel imbedded in " + modelName + " model.");
419
if (v_nestedModelDescription.size() != 3)
420
model = new CodonAsynonymousReversibleSubstitutionModel(pgc,
421
dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[0]), pai2);
424
if (dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[1]) == 0 || dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[2]) == 0)
425
throw Exception("Non simple NucleotideSubstitutionModel imbedded in " + modelName + " model.");
427
model = new CodonAsynonymousReversibleSubstitutionModel(
429
dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[0]),
430
dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[1]),
431
dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[2]), pai2);
436
// /////////////////////////////////
437
// / CODON MODELS with FREQUENCIES
438
// ///////////////////////////////
442
if (!AlphabetTools::isCodonAlphabet(alphabet))
443
throw Exception("Alphabet should be Codon Alphabet.");
445
const CodonAlphabet* pCA = dynamic_cast<const CodonAlphabet*>(alphabet);
447
if (args.find("frequencies") == args.end())
448
throw Exception("Missing equilibrium frequencies.");
450
map<string, string> unparsedParameterValuesNested;
452
FrequenciesSet* pFS = getFrequenciesSetDefaultInstance(pCA, args["frequencies"], unparsedParameterValuesNested);
454
for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
456
unparsedParameterValues[modelName + ".freq_" + it->first] = it->second;
459
// /////////////////////////////////
460
// / CODONNEUTRALFREQUENCIES
461
// ///////////////////////////////
463
if (modelName == "CodonNeutralFrequencies")
465
model = new CodonNeutralFrequenciesReversibleSubstitutionModel(pCA, pFS);
468
modelName += args["frequencies"];
471
// /////////////////////////////////
472
// / CODONASYNONYMOUSFREQUENCIES
473
// ///////////////////////////////
475
else if (modelName == "CodonAsynonymousFrequencies")
477
if (args.find("genetic_code") == args.end())
478
args["genetic_code"] = pCA->getAlphabetType();
480
GeneticCode* pgc = SequenceApplicationTools::getGeneticCode(dynamic_cast<const NucleicAlphabet*>(pCA->getNAlphabet(0)), args["genetic_code"]);
481
if (pgc->getSourceAlphabet()->getAlphabetType() != pCA->getAlphabetType())
482
throw Exception("Mismatch between genetic code and codon alphabet");
484
AlphabetIndex2<double> * pai2;
486
if (args.find("aadistance") == args.end())
489
pai2 = SequenceApplicationTools::getAADistance(args["aadistance"]);
491
model = new CodonAsynonymousFrequenciesReversibleSubstitutionModel(pgc, pFS, pai2);
495
// //////////////////////////////////////
496
// predefined codon models
497
// //////////////////////////////////////
499
else if ((modelName == "MG94") || (modelName == "YN98") ||
500
(modelName == "GY94") || (modelName.substr(0, 5) == "YNGKP"))
502
if (!AlphabetTools::isCodonAlphabet(alphabet))
503
throw Exception("Alphabet should be Codon Alphabet.");
505
const CodonAlphabet* pCA = (const CodonAlphabet*)(alphabet);
507
if (args.find("genetic_code") == args.end())
508
args["genetic_code"] = pCA->getAlphabetType();
510
GeneticCode* pgc = SequenceApplicationTools::getGeneticCode(dynamic_cast<const NucleicAlphabet*>(pCA->getNAlphabet(0)), args["genetic_code"]);
511
if (pgc->getSourceAlphabet()->getAlphabetType() != pCA->getAlphabetType())
512
throw Exception("Mismatch between genetic code and codon alphabet");
514
string freqOpt = ApplicationTools::getStringParameter("frequencies", args, "F0");
515
map<string, string> unparsedParameterValuesNested;
516
FrequenciesSet* codonFreqs = getFrequenciesSetDefaultInstance(pCA, freqOpt, unparsedParameterValuesNested);
518
for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
520
unparsedParameterValues[modelName + ".freq_" + it->first] = it->second;
523
if (modelName == "MG94")
524
model = new MG94(pgc, codonFreqs);
525
else if (modelName == "GY94")
526
model = new GY94(pgc, codonFreqs);
527
else if ((modelName == "YN98") || (modelName == "YNGKP_M0"))
528
model = new YN98(pgc, codonFreqs);
529
else if (modelName == "YNGKP_M1")
530
model = new YNGKP_M1(pgc, codonFreqs);
531
else if (modelName == "YNGKP_M2")
532
model = new YNGKP_M2(pgc, codonFreqs);
533
else if (modelName == "YNGKP_M3")
534
if (args.find("n") == args.end())
535
model = new YNGKP_M3(pgc, codonFreqs);
537
model = new YNGKP_M3(pgc, codonFreqs, TextTools::to<unsigned int>(args["n"]));
538
else if ((modelName == "YNGKP_M7") || modelName == "YNGKP_M8")
540
if (args.find("n") == args.end())
541
throw Exception("Missing argument 'n' (number of classes) in " + modelName + " distribution");
542
unsigned int nbClasses = TextTools::to<unsigned int>(args["n"]);
544
if (modelName == "YNGKP_M7")
545
model = new YNGKP_M7(pgc, codonFreqs, nbClasses);
546
else if (modelName == "YNGKP_M8")
547
model = new YNGKP_M8(pgc, codonFreqs, nbClasses);
550
throw Exception("Unknown Codon model: " + modelName);
553
// /////////////////////////////////
555
// ///////////////////////////////
557
else if (modelName == "RE08")
560
throw Exception("PhylogeneticsApplicationTools::getSubstitutionModelDefaultInstance. No Gap model allowed here.");
562
// We have to parse the nested model first:
563
string nestedModelDescription = args["model"];
564
if (TextTools::isEmpty(nestedModelDescription))
565
throw Exception("PhylogeneticsApplicationTools::getSubstitutionModelDefaultInstance. Missing argument 'model' for model 'RE08'.");
567
ApplicationTools::displayResult("Gap model", modelName);
568
map<string, string> unparsedParameterValuesNested;
569
SubstitutionModel* nestedModel = getSubstitutionModelDefaultInstance(alphabet, nestedModelDescription, unparsedParameterValuesNested, allowCovarions, allowMixed, false, verbose);
571
// Now we create the RE08 substitution model:
572
ReversibleSubstitutionModel* tmp = dynamic_cast<ReversibleSubstitutionModel*>(nestedModel);
573
model = new RE08(tmp);
575
// Then we update the parameter set:
576
for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
578
unparsedParameterValues["RE08.model_" + it->first] = it->second;
582
// /////////////////////////////////
584
// ///////////////////////////////
586
else if (modelName == "TS98")
589
throw Exception("PhylogeneticsApplicationTools::getSubstitutionModelDefaultInstance. No Covarion model allowed here.");
591
// We have to parse the nested model first:
592
string nestedModelDescription = args["model"];
593
if (TextTools::isEmpty(nestedModelDescription))
594
throw Exception("PhylogeneticsApplicationTools::getSubstitutionModelDefaultInstance. Missing argument 'model' for model 'TS98'.");
596
ApplicationTools::displayResult("Covarion model", modelName);
597
map<string, string> unparsedParameterValuesNested;
598
SubstitutionModel* nestedModel = getSubstitutionModelDefaultInstance(alphabet, nestedModelDescription, unparsedParameterValuesNested, false, allowMixed, allowGaps, verbose);
600
// Now we create the TS98 substitution model:
601
ReversibleSubstitutionModel* tmp = dynamic_cast<ReversibleSubstitutionModel*>(nestedModel);
602
model = new TS98(tmp);
604
// Then we update the parameter set:
605
for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
607
unparsedParameterValues["TS98.model_" + it->first] = it->second;
611
// /////////////////////////////////
613
// ///////////////////////////////
615
else if (modelName == "G01")
618
throw Exception("PhylogeneticsApplicationTools::getSubstitutionModelDefaultInstance. No Covarion model allowed here.");
620
// We have to parse the nested model first:
621
string nestedModelDescription = args["model"];
622
if (TextTools::isEmpty(nestedModelDescription))
623
throw Exception("PhylogeneticsApplicationTools::getSubstitutionModelDefaultInstance. Missing argument 'model' for model 'G01'.");
624
string nestedRateDistDescription = args["rdist"];
625
if (TextTools::isEmpty(nestedRateDistDescription))
626
throw Exception("PhylogeneticsApplicationTools::getSubstitutionModelDefaultInstance. Missing argument 'rdist' for model 'G01'.");
628
ApplicationTools::displayResult("Covarion model", modelName);
630
map<string, string> unparsedParameterValuesNestedModel;
631
SubstitutionModel* nestedModel = getSubstitutionModelDefaultInstance(alphabet, nestedModelDescription, unparsedParameterValuesNestedModel, false, allowMixed, allowGaps, verbose);
632
map<string, string> unparsedParameterValuesNestedDist;
633
DiscreteDistribution* nestedRDist = getRateDistributionDefaultInstance(nestedRateDistDescription, unparsedParameterValuesNestedDist, false, verbose);
635
// Now we create the G01 substitution model:
636
ReversibleSubstitutionModel* tmp = dynamic_cast<ReversibleSubstitutionModel*>(nestedModel);
637
model = new G2001(tmp, nestedRDist);
639
// Then we update the parameter set:
640
for (map<string, string>::iterator it = unparsedParameterValuesNestedModel.begin(); it != unparsedParameterValuesNestedModel.end(); it++)
642
unparsedParameterValues["G01.model_" + it->first] = it->second;
644
for (map<string, string>::iterator it = unparsedParameterValuesNestedDist.begin(); it != unparsedParameterValuesNestedDist.end(); it++)
646
unparsedParameterValues["G01.rdist_" + it->first] = it->second;
651
// This is a 'simple' model...
652
if (AlphabetTools::isNucleicAlphabet(alphabet))
654
const NucleicAlphabet* alpha = dynamic_cast<const NucleicAlphabet*>(alphabet);
656
// /////////////////////////////////
658
// ///////////////////////////////
660
if (modelName == "GTR")
662
model = new GTR(alpha);
666
// /////////////////////////////////
668
// ///////////////////////////////
670
else if (modelName == "SSR")
672
model = new SSR(alpha);
675
// /////////////////////////////////
677
// ///////////////////////////////
679
else if (modelName == "L95")
681
model = new L95(alpha);
684
// /////////////////////////////////
686
// ///////////////////////////////
688
else if (modelName == "RN95")
690
model = new RN95(alpha);
693
// /////////////////////////////////
695
// ///////////////////////////////
697
else if (modelName == "RN95s")
699
model = new RN95s(alpha);
702
// /////////////////////////////////
704
// //////////////////////////////
706
else if (modelName == "TN93")
708
model = new TN93(alpha);
711
// /////////////////////////////////
713
// ///////////////////////////////
715
else if (modelName == "HKY85")
717
model = new HKY85(alpha);
720
// /////////////////////////////////
722
// ///////////////////////////////
724
else if (modelName == "F84")
726
model = new F84(alpha);
729
// /////////////////////////////////
731
// ///////////////////////////////
733
else if (modelName == "T92")
735
model = new T92(alpha);
738
// /////////////////////////////////
740
// ///////////////////////////////
742
else if (modelName == "K80")
744
model = new K80(alpha);
748
// /////////////////////////////////
750
// ///////////////////////////////
752
else if (modelName == "JC69")
754
model = new JCnuc(alpha);
758
throw Exception("Model '" + modelName + "' unknown.");
763
const ProteicAlphabet* alpha = dynamic_cast<const ProteicAlphabet*>(alphabet);
765
if (modelName == "JC69+F")
766
model = new JCprot(alpha, new FullProteinFrequenciesSet(alpha), true);
767
else if (modelName == "DSO78+F")
768
model = new DSO78(alpha, new FullProteinFrequenciesSet(alpha), true);
769
else if (modelName == "JTT92+F")
770
model = new JTT92(alpha, new FullProteinFrequenciesSet(alpha), true);
771
else if (modelName == "LG08+F")
772
model = new LG08(alpha, new FullProteinFrequenciesSet(alpha), true);
773
else if (modelName == "WAG01+F")
774
model = new WAG01(alpha, new FullProteinFrequenciesSet(alpha), true);
775
else if (modelName == "Empirical+F")
777
string prefix = args["name"];
778
if (TextTools::isEmpty(prefix))
779
throw Exception("'name' argument missing for user-defined substitution model.");
780
model = new UserProteinSubstitutionModel(alpha, args["file"], new FullProteinFrequenciesSet(alpha), prefix + "+F.", true);
782
else if (modelName == "JC69")
783
model = new JCprot(alpha);
784
else if (modelName == "DSO78")
785
model = new DSO78(alpha);
786
else if (modelName == "JTT92")
787
model = new JTT92(alpha);
788
else if (modelName == "LG08")
789
model = new LG08(alpha);
790
else if (modelName == "WAG01")
791
model = new WAG01(alpha);
792
else if (modelName == "LLG08_EHO")
793
model = new LLG08_EHO(alpha);
794
else if (modelName == "LLG08_EX2")
795
model = new LLG08_EX2(alpha);
796
else if (modelName == "LLG08_EX3")
797
model = new LLG08_EX3(alpha);
798
else if (modelName == "LLG08_UL2")
799
model = new LLG08_UL2(alpha);
800
else if (modelName == "LLG08_UL3")
801
model = new LLG08_UL3(alpha);
802
else if (modelName == "Empirical")
804
string prefix = args["name"];
805
if (TextTools::isEmpty(prefix))
806
throw Exception("'name' argument missing for user-defined substitution model.");
807
model = new UserProteinSubstitutionModel(alpha, args["file"], prefix);
810
throw Exception("Model '" + modelName + "' unknown.");
813
ApplicationTools::displayResult("Substitution model", modelName);
816
//Update parameter args:
817
vector<string> pnames = model->getParameters().getParameterNames();
819
for (unsigned int i = 0; i < pnames.size(); i++)
821
string name = model->getParameterNameWithoutNamespace(pnames[i]);
822
if (args.find(name) != args.end())
824
unparsedParameterValues[modelName + "." + name] = args[name];
828
// Now look if some parameters are aliased:
829
ParameterList pl = model->getIndependentParameters();
830
string pname, pval, pname2;
831
for (unsigned int i = 0; i < pl.size(); i++)
833
pname = model->getParameterNameWithoutNamespace(pl[i].getName());
835
if (args.find(pname) == args.end()) continue;
838
if ((pval.length() >= 5 && pval.substr(0, 5) == "model") ||
839
(pval.find("(") != string::npos))
842
for (unsigned int j = 0; j < pl.size() && !found; j++)
844
pname2 = model->getParameterNameWithoutNamespace(pl[j].getName());
846
//if (j == i || args.find(pname2) == args.end()) continue; Julien 03/03/2010: This extra condition prevents complicated (nested) models to work properly...
847
if (j == i) continue;
850
// This is an alias...
851
// NB: this may throw an exception if uncorrect! We leave it as is for now :s
852
model->aliasParameters(pname2, pname);
854
ApplicationTools::displayResult("Parameter alias found", pname + "->" + pname2);
858
if (!TextTools::isDecimalNumber(pval) && !found)
859
throw Exception("Incorrect parameter syntax: parameter " + pval + " was not found and can't be used as a value for parameter " + pname + ".");
862
if (args.find("useObservedFreqs") != args.end())
863
unparsedParameterValues[model->getNamespace() + "useObservedFreqs"] = args["useObservedFreqs"];
864
if (args.find("useObservedFreqs.pseudoCount") != args.end())
865
unparsedParameterValues[model->getNamespace() + "useObservedFreqs.pseudoCount"] = args["useObservedFreqs.pseudoCount"];
870
/******************************************************************************/
872
SubstitutionModel* PhylogeneticsApplicationTools::getSubstitutionModel(
873
const Alphabet* alphabet,
874
const SiteContainer* data,
875
std::map<std::string, std::string>& params,
876
const string& suffix,
877
bool suffixIsOptional,
878
bool verbose) throw (Exception)
880
string modelDescription;
881
if (AlphabetTools::isCodonAlphabet(alphabet))
882
modelDescription = ApplicationTools::getStringParameter("model", params, "CodonNeutral(model=JC69)", suffix, suffixIsOptional, verbose);
883
else if (AlphabetTools::isWordAlphabet(alphabet))
884
modelDescription = ApplicationTools::getStringParameter("model", params, "Word(model=JC69)", suffix, suffixIsOptional, verbose);
886
modelDescription = ApplicationTools::getStringParameter("model", params, "JC69", suffix, suffixIsOptional, verbose);
888
map<string, string> unparsedParameterValues;
889
SubstitutionModel* model = getSubstitutionModelDefaultInstance(alphabet, modelDescription, unparsedParameterValues, true, true, true, verbose);
890
setSubstitutionModelParametersInitialValues(model, unparsedParameterValues, data, verbose);
894
/******************************************************************************/
896
void PhylogeneticsApplicationTools::setSubstitutionModelParametersInitialValues(
897
SubstitutionModel* model,
898
std::map<std::string, std::string>& unparsedParameterValues,
899
const SiteContainer* data,
900
bool verbose) throw (Exception)
902
bool useObsFreq = ApplicationTools::getBooleanParameter(model->getNamespace() + "useObservedFreqs", unparsedParameterValues, false, "", true, false);
903
if (verbose) ApplicationTools::displayResult("Use observed frequencies for model", useObsFreq ? "yes" : "no");
904
if (useObsFreq && data != 0)
906
unsigned int psi = ApplicationTools::getParameter<unsigned int>(model->getNamespace() + "useObservedFreqs.pseudoCount", unparsedParameterValues, 0);
907
model->setFreqFromData(*data, psi);
909
ParameterList pl = model->getIndependentParameters();
910
for (unsigned int i = 0; i < pl.size(); i++)
912
AutoParameter ap(pl[i]);
913
ap.setMessageHandler(ApplicationTools::warning);
914
pl.setParameter(i, ap);
917
for (unsigned int i = 0; i < pl.size(); i++)
919
const string pName = pl[i].getName();
920
posp = pName.rfind(".");
921
if (!useObsFreq || (pName.substr(posp + 1, 5) != "theta") || unparsedParameterValues.find(pName) != unparsedParameterValues.end())
923
double value = ApplicationTools::getDoubleParameter(pName, unparsedParameterValues, pl[i].getValue());
924
pl[i].setValue(value);
927
ApplicationTools::displayResult("Parameter found", pName + "=" + TextTools::toString(pl[i].getValue()));
930
model->matchParametersValues(pl);
933
/******************************************************************************/
935
void PhylogeneticsApplicationTools::setSubstitutionModelParametersInitialValues(
936
SubstitutionModel* model,
937
std::map<std::string, std::string>& unparsedParameterValues,
938
const std::string& modelPrefix,
939
const SiteContainer* data,
940
std::map<std::string, double>& existingParams,
941
std::vector<std::string>& specificParams,
942
std::vector<std::string>& sharedParams,
943
bool verbose) throw (Exception)
945
bool useObsFreq = ApplicationTools::getBooleanParameter(model->getNamespace() + "useObservedFreqs", unparsedParameterValues, false, "", "", false);
946
if (verbose) ApplicationTools::displayResult("Use observed frequencies for model", useObsFreq ? "yes" : "no");
947
if (useObsFreq && data != 0)
949
unsigned int psi = ApplicationTools::getParameter<unsigned int>(model->getNamespace() + "useObservedFreqs.pseudoCount", unparsedParameterValues, 0);
950
model->setFreqFromData(*data, psi);
953
ParameterList pl = model->getIndependentParameters();
954
for (unsigned int i = 0; i < pl.size(); i++)
956
AutoParameter ap(pl[i]);
957
ap.setMessageHandler(ApplicationTools::warning);
958
pl.setParameter(i, ap);
961
for (unsigned int i = 0; i < pl.size(); i++)
963
const string pName = pl[i].getName();
964
unsigned int posp = model->getParameterNameWithoutNamespace(pName).rfind(".");
966
if (!useObsFreq || (model->getParameterNameWithoutNamespace(pName).substr(posp + 1, 5) != "theta") || unparsedParameterValues.find(pName) != unparsedParameterValues.end())
968
value = ApplicationTools::getStringParameter(pName, unparsedParameterValues, TextTools::toString(pl[i].getValue()));
969
if (value.size() > 5 && value.substr(0, 5) == "model")
971
if (existingParams.find(value) != existingParams.end())
973
pl[i].setValue(existingParams[value]);
974
sharedParams.push_back(value);
977
throw Exception("Error, unknown parameter" + modelPrefix + pName);
981
double value2 = TextTools::toDouble(value);
982
existingParams[modelPrefix + pName] = value2;
983
specificParams.push_back(pName);
984
pl[i].setValue(value2);
989
existingParams[modelPrefix + pName] = pl[i].getValue();
990
specificParams.push_back(pName);
993
ApplicationTools::displayResult("Parameter found", modelPrefix + pName + "=" + TextTools::toString(pl[i].getValue()));
995
model->matchParametersValues(pl);
998
/******************************************************************************/
1000
FrequenciesSet* PhylogeneticsApplicationTools::getRootFrequenciesSet(
1001
const Alphabet* alphabet,
1002
const SiteContainer* data,
1003
std::map<std::string, std::string>& params,
1004
const std::vector<double>& rateFreqs,
1005
const std::string& suffix,
1006
bool suffixIsOptional,
1007
bool verbose) throw (Exception)
1009
string freqDescription = ApplicationTools::getStringParameter("nonhomogeneous.root_freq", params, "Full(init=observed)", suffix, suffixIsOptional);
1010
if (freqDescription == "None")
1016
FrequenciesSet* freq = getFrequenciesSet(alphabet, freqDescription, data, rateFreqs, verbose);
1018
ApplicationTools::displayResult("Root frequencies ", freq->getName());
1023
/******************************************************************************/
1025
FrequenciesSet* PhylogeneticsApplicationTools::getFrequenciesSet(
1026
const Alphabet* alphabet,
1027
const std::string& freqDescription,
1028
const SiteContainer* data,
1029
const std::vector<double>& rateFreqs,
1030
bool verbose) throw (Exception)
1032
map<string, string> unparsedParameterValues;
1033
FrequenciesSet* pFS = getFrequenciesSetDefaultInstance(alphabet, freqDescription, unparsedParameterValues);
1035
// Now we set the initial frequencies according to options:
1036
if (unparsedParameterValues.find("init") != unparsedParameterValues.end())
1038
// Initialization using the "init" option
1039
string init = unparsedParameterValues["init"];
1040
if (init == "observed")
1043
throw Exception("Missing data for observed frequencies");
1044
unsigned int psc = 0;
1045
if (unparsedParameterValues.find("pseudoCount") != unparsedParameterValues.end())
1046
psc = TextTools::toInt(unparsedParameterValues["pseudoCount"]);
1048
map<int, double> freqs;
1049
SequenceContainerTools::getFrequencies(*data, freqs, psc);
1051
pFS->setFrequenciesFromMap(freqs);
1053
else if (init == "balanced")
1055
// Nothing to do here, this is the default instanciation.
1058
throw Exception("Unknown init argument");
1060
else if (unparsedParameterValues.find("values") != unparsedParameterValues.end())
1062
// Initialization using the "values" argument
1063
vector<double> frequencies;
1064
string rf = unparsedParameterValues["values"];
1065
StringTokenizer strtok(rf.substr(1, rf.length() - 2), ",");
1066
while (strtok.hasMoreToken())
1067
frequencies.push_back(TextTools::toDouble(strtok.nextToken()));
1068
pFS->setFrequencies(frequencies);
1072
// Explicit initialization of each parameter
1073
ParameterList pl = pFS->getParameters();
1075
for (unsigned int i = 0; i < pl.size(); i++)
1077
AutoParameter ap(pl[i]);
1079
ap.setMessageHandler(ApplicationTools::warning);
1080
pl.setParameter(i, ap);
1083
for (unsigned int i = 0; i < pl.size(); i++)
1085
const string pName = pl[i].getName();
1086
double value = ApplicationTools::getDoubleParameter(pName, unparsedParameterValues, pl[i].getValue());
1087
pl[i].setValue(value);
1089
ApplicationTools::displayResult("Parameter found", pName + "=" + TextTools::toString(pl[i].getValue()));
1092
pFS->matchParametersValues(pl);
1095
// /////// To be changed for input normalization
1096
if (rateFreqs.size() > 0)
1098
pFS = new MarkovModulatedFrequenciesSet(pFS, rateFreqs);
1104
/******************************************************************************/
1107
FrequenciesSet* PhylogeneticsApplicationTools::getFrequenciesSetDefaultInstance(
1108
const Alphabet* alphabet,
1109
const std::string& freqDescription,
1110
std::map<std::string, std::string>& unparsedParameterValues) throw (Exception)
1113
map<string, string> args;
1114
KeyvalTools::parseProcedure(freqDescription, freqName, args);
1115
FrequenciesSet* pFS;
1117
if (freqName.substr(0, 4) == "Full")
1119
if (AlphabetTools::isNucleicAlphabet(alphabet))
1121
pFS = new FullNucleotideFrequenciesSet(dynamic_cast<const NucleicAlphabet*>(alphabet));
1123
else if (AlphabetTools::isProteicAlphabet(alphabet))
1125
pFS = new FullProteinFrequenciesSet(dynamic_cast<const ProteicAlphabet*>(alphabet));
1127
else if (AlphabetTools::isCodonAlphabet(alphabet))
1129
pFS = new FullCodonFrequenciesSet(dynamic_cast<const CodonAlphabet*>(alphabet));
1133
pFS = new FullFrequenciesSet(alphabet);
1136
// Update parameter values:
1137
if (args.find("theta") != args.end())
1138
unparsedParameterValues["Full.theta"] = args["theta"];
1139
for (unsigned int i = 1; i < alphabet->getSize(); i++)
1141
if (args.find("theta" + TextTools::toString(i) ) != args.end())
1143
unparsedParameterValues["Full.theta" + TextTools::toString(i) ] = args["theta" + TextTools::toString(i) ];
1147
else if (freqName == "GC")
1149
if (!AlphabetTools::isNucleicAlphabet(alphabet))
1150
throw Exception("Error, unvalid frequencies " + freqName + " with non-nucleic alphabet.");
1152
pFS = new GCFrequenciesSet(dynamic_cast<const NucleicAlphabet*>(alphabet));
1154
if (args.find("theta") != args.end())
1155
unparsedParameterValues["GC.theta"] = args["theta"];
1159
else if (freqName == "Word")
1161
if (!AlphabetTools::isWordAlphabet(alphabet))
1162
throw Exception("PhylogeneticsApplicationTools::getFrequenciesSetDefaultInstance.\n\t Bad alphabet type "
1163
+ alphabet->getAlphabetType() + " for frequenciesset " + freqName + ".");
1165
const WordAlphabet* pWA = dynamic_cast<const WordAlphabet*>(alphabet);
1167
if (args.find("frequency") != args.end())
1169
string sAFS = args["frequency"];
1171
unsigned int nbfreq = pWA->getLength();
1172
FrequenciesSet* pFS2;
1174
for (unsigned i = 0; i < nbfreq; i++)
1176
st += TextTools::toString(i + 1);
1179
map<string, string> unparsedParameterValuesNested;
1180
pFS2 = getFrequenciesSetDefaultInstance(pWA->getNAlphabet(0), sAFS, unparsedParameterValuesNested);
1181
for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
1183
unparsedParameterValues["Word." + st + "_" + it->first] = it->second;
1185
pFS = new WordFromUniqueFrequenciesSet(pWA, pFS2);
1189
if (args.find("frequency1") == args.end())
1190
throw Exception("PhylogeneticsApplicationTools::getFrequenciesSetDefaultInstance. Missing argument 'frequency' or 'frequency1' for frequencies set 'Word'.");
1191
vector<string> v_sAFS;
1192
vector<FrequenciesSet*> v_AFS;
1193
unsigned int nbfreq = 1;
1195
while (args.find("frequency" + TextTools::toString(nbfreq)) != args.end())
1197
v_sAFS.push_back(args["frequency" + TextTools::toString(nbfreq++)]);
1200
if (v_sAFS.size() != pWA->getLength())
1201
throw Exception("PhylogeneticsApplicationTools::getFrequenciesSetDefaultInstance. Number of frequencies (" + TextTools::toString(v_sAFS.size()) + ") does not match length of the words (" + TextTools::toString(pWA->getLength()) + ")");
1203
map<string, string> unparsedParameterValuesNested;
1204
for (unsigned i = 0; i < v_sAFS.size(); i++)
1206
unparsedParameterValuesNested.clear();
1207
pFS = getFrequenciesSetDefaultInstance(pWA->getNAlphabet(i), v_sAFS[i], unparsedParameterValuesNested);
1208
for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
1210
unparsedParameterValues["Word." + TextTools::toString(i + 1) + "_" + it->first] = it->second;
1212
v_AFS.push_back(pFS);
1215
pFS = new WordFromIndependentFrequenciesSet(pWA, v_AFS);
1218
// INDEPENDENT CODON
1219
else if (freqName == "Codon")
1221
if (!AlphabetTools::isCodonAlphabet(alphabet))
1222
throw Exception("PhylogeneticsApplicationTools::getFrequenciesSetDefaultInstance.\n\t Bad alphabet type "
1223
+ alphabet->getAlphabetType() + " for frequenciesset " + freqName + ".");
1225
const CodonAlphabet* pWA = dynamic_cast<const CodonAlphabet*>(alphabet);
1227
if (args.find("frequency") != args.end())
1229
string sAFS = args["frequency"];
1231
unsigned int nbfreq = pWA->getLength();
1232
FrequenciesSet* pFS2;
1234
for (unsigned i = 0; i < nbfreq; i++)
1236
st += TextTools::toString(i+1);
1239
map<string, string> unparsedParameterValuesNested;
1240
pFS2 = getFrequenciesSetDefaultInstance(pWA->getNAlphabet(0), sAFS, unparsedParameterValuesNested);
1241
for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
1243
unparsedParameterValues["Codon." + st + "_" + it->first] = it->second;
1245
pFS = new CodonFromUniqueFrequenciesSet(pWA,pFS2);
1249
if (args.find("frequency1") == args.end())
1250
throw Exception("PhylogeneticsApplicationTools::getFrequenciesSetDefaultInstance. Missing argument 'frequency' or 'frequency1' for frequencies set 'Codon'.");
1251
vector<string> v_sAFS;
1252
vector<FrequenciesSet*> v_AFS;
1253
unsigned int nbfreq = 1;
1255
while (args.find("frequency" + TextTools::toString(nbfreq)) != args.end())
1257
v_sAFS.push_back(args["frequency" + TextTools::toString(nbfreq++)]);
1260
if (v_sAFS.size() != 3)
1261
throw Exception("PhylogeneticsApplicationTools::getFrequenciesSetDefaultInstance. Number of frequencies (" + TextTools::toString(v_sAFS.size()) + ") is not three");
1263
map<string, string> unparsedParameterValuesNested;
1264
for (unsigned i = 0; i < v_sAFS.size(); i++)
1266
unparsedParameterValuesNested.clear();
1267
pFS = getFrequenciesSetDefaultInstance(pWA->getNAlphabet(i), v_sAFS[i], unparsedParameterValuesNested);
1268
for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
1270
unparsedParameterValues["Codon." + TextTools::toString(i+1) + "_" + it->first] = it->second;
1272
v_AFS.push_back(pFS);
1275
pFS = new CodonFromIndependentFrequenciesSet(pWA, v_AFS);
1278
else if (AlphabetTools::isCodonAlphabet(alphabet))
1282
if (freqName == "F0")
1283
opt = FrequenciesSet::F0;
1284
else if (freqName == "F1X4")
1285
opt = FrequenciesSet::F1X4;
1286
else if (freqName == "F3X4")
1287
opt = FrequenciesSet::F3X4;
1288
else if (freqName == "F61")
1289
opt = FrequenciesSet::F61;
1292
pFS = FrequenciesSet::getFrequenciesSetForCodons(opt, *dynamic_cast<const CodonAlphabet*>(alphabet));
1294
throw Exception("Unknown frequency option: " + freqName);
1297
throw Exception("Unknown frequency option: " + freqName);
1299
// Forward arguments:
1300
if (args.find("init") != args.end())
1301
unparsedParameterValues["init"] = args["init"];
1302
if (args.find("pseudoCount") != args.end())
1303
unparsedParameterValues["pseudoCount"] = args["pseudoCount"];
1304
if (args.find("values") != args.end())
1305
unparsedParameterValues["values"] = args["values"];
1311
/******************************************************************************/
1313
SubstitutionModelSet* PhylogeneticsApplicationTools::getSubstitutionModelSet(
1314
const Alphabet* alphabet,
1315
const SiteContainer* data,
1316
map<string, string>& params,
1317
const string& suffix,
1318
bool suffixIsOptional,
1319
bool verbose) throw (Exception)
1321
if (!ApplicationTools::parameterExists("nonhomogeneous.number_of_models", params))
1322
throw Exception("You must specify this parameter: nonhomogeneous.number_of_models .");
1323
unsigned int nbModels = ApplicationTools::getParameter<unsigned int>("nonhomogeneous.number_of_models", params, 1, suffix, suffixIsOptional, false);
1325
throw Exception("The number of models can't be 0 !");
1327
if (verbose) ApplicationTools::displayResult("Number of distinct models", TextTools::toString(nbModels));
1330
// ///////////////////////////////////////////
1331
// Build a new model set object:
1333
vector<double> rateFreqs;
1335
if (AlphabetTools::isCodonAlphabet(alphabet))
1336
tmpDesc = ApplicationTools::getStringParameter("model1", params, "CodonNeutral(model=JC69)", suffix, suffixIsOptional, verbose);
1337
else if (AlphabetTools::isWordAlphabet(alphabet))
1338
tmpDesc = ApplicationTools::getStringParameter("model1", params, "Word(model=JC69)", suffix, suffixIsOptional, verbose);
1340
tmpDesc = ApplicationTools::getStringParameter("model1", params, "JC69", suffix, suffixIsOptional, verbose);
1343
map<string, string> tmpUnparsedParameterValues;
1344
auto_ptr<SubstitutionModel> tmp(getSubstitutionModelDefaultInstance(alphabet, tmpDesc, tmpUnparsedParameterValues, true, true, true, 0));
1345
if (tmp->getNumberOfStates() != alphabet->getSize())
1347
// Markov-Modulated Markov Model...
1348
unsigned int n = (unsigned int)(tmp->getNumberOfStates() / alphabet->getSize());
1349
rateFreqs = vector<double>(n, 1. / (double)n); // Equal rates assumed for now, may be changed later (actually, in the most general case,
1352
// ////////////////////////////////////
1353
// Deal with root frequencies
1355
bool stationarity = ApplicationTools::getBooleanParameter("nonhomogeneous.stationarity", params, false, "", false, false);
1356
FrequenciesSet* rootFrequencies = 0;
1359
rootFrequencies = getRootFrequenciesSet(alphabet, data, params, rateFreqs, suffix, suffixIsOptional, verbose);
1360
stationarity = !rootFrequencies;
1362
ApplicationTools::displayBooleanResult("Stationarity assumed", stationarity);
1364
SubstitutionModelSet* modelSet = stationarity ?
1365
new SubstitutionModelSet(alphabet, true) : //Stationarity assumed.
1366
new SubstitutionModelSet(alphabet, rootFrequencies);
1368
// //////////////////////////////////////
1369
// Now parse all models:
1371
map<string, double> existingParameters;
1373
for (unsigned int i = 0; i < nbModels; i++)
1375
string prefix = "model" + TextTools::toString(i + 1);
1377
if (AlphabetTools::isCodonAlphabet(alphabet))
1378
modelDesc = ApplicationTools::getStringParameter(prefix, params, "CodonNeutral(model=JC69)", suffix, suffixIsOptional, verbose);
1380
if (AlphabetTools::isWordAlphabet(alphabet))
1381
modelDesc = ApplicationTools::getStringParameter(prefix, params, "Word(model=JC69)", suffix, suffixIsOptional, verbose);
1383
modelDesc = ApplicationTools::getStringParameter(prefix, params, "JC69", suffix, suffixIsOptional, verbose);
1386
map<string, string> unparsedParameterValues;
1387
SubstitutionModel* model = getSubstitutionModelDefaultInstance(alphabet, modelDesc, unparsedParameterValues, true, true, true, verbose);
1390
vector<string> specificParameters, sharedParameters;
1391
setSubstitutionModelParametersInitialValues(model,
1392
unparsedParameterValues, prefix, data,
1393
existingParameters, specificParameters, sharedParameters,
1395
vector<int> nodesId = ApplicationTools::getVectorParameter<int>(prefix + "nodes_id", params, ',', ':', TextTools::toString(i), suffix, suffixIsOptional, true);
1396
if (verbose) ApplicationTools::displayResult("Model" + TextTools::toString(i + 1) + " is associated to", TextTools::toString(nodesId.size()) + " node(s).");
1397
// Add model and specific parameters:
1398
modelSet->addModel(model, nodesId, specificParameters);
1399
// Now set shared parameters:
1400
for (unsigned int j = 0; j < sharedParameters.size(); j++)
1402
string pName = sharedParameters[j];
1403
string::size_type index = pName.find(".");
1404
if (index == string::npos) throw Exception("PhylogeneticsApplicationTools::getSubstitutionModelSet. Bad parameter name: " + pName);
1405
string name = pName.substr(index + 1) + "_" + pName.substr(5, index - 5);
1406
//namespace checking:
1407
vector<unsigned int> models = modelSet->getModelsWithParameter(name);
1408
if (models.size() == 0)
1409
throw Exception("PhylogeneticsApplicationTools::getSubstitutionModelSet. Parameter `" + name + "' is not associated to any model.");
1410
if (model->getNamespace() == modelSet->getModel(models[0])->getNamespace())
1411
modelSet->setParameterToModel(modelSet->getParameterIndex(name), modelSet->getNumberOfModels() - 1);
1413
throw Exception("Assigning a value to a parameter with a distinct namespace is not (yet) allowed. Consider using parameter aliasing instead.");
1417
//Finally check parameter aliasing:
1418
string aliasDesc = ApplicationTools::getStringParameter("nonhomogeneous.alias", params, "", suffix, suffixIsOptional, verbose);
1419
StringTokenizer st(aliasDesc, ",");
1420
while (st.hasMoreToken()) {
1421
string alias = st.nextToken();
1422
string::size_type index = alias.find("->");
1423
if (index == string::npos)
1424
throw Exception("PhylogeneticsApplicationTools::getSubstitutionModelSet. Bad alias syntax, should contain `->' symbol: " + alias);
1425
string p1 = alias.substr(0, index);
1426
string p2 = alias.substr(index + 2);
1427
ApplicationTools::displayResult("Parameter alias found", p1 + "->" + p2);
1428
modelSet->aliasParameters(p1, p2);
1434
/******************************************************************************/
1436
DiscreteDistribution* PhylogeneticsApplicationTools::getRateDistributionDefaultInstance(
1437
const string& distDescription,
1438
map<string, string>& unparsedParameterValues,
1439
bool constDistAllowed,
1440
bool verbose) throw (Exception)
1443
DiscreteDistribution* rDist = 0;
1444
map<string, string> args;
1445
KeyvalTools::parseProcedure(distDescription, distName, args);
1447
if (distName == "Invariant")
1449
// We have to parse the nested distribution first:
1450
string nestedDistDescription = args["dist"];
1451
if (TextTools::isEmpty(nestedDistDescription))
1452
throw Exception("PhylogeneticsApplicationTools::getRateDistributionDefaultInstance. Missing argument 'dist' for distribution 'Invariant'.");
1454
ApplicationTools::displayResult("Invariant Mixed distribution", distName );
1455
map<string, string> unparsedParameterValuesNested;
1456
DiscreteDistribution* nestedDistribution = getRateDistributionDefaultInstance(nestedDistDescription, unparsedParameterValuesNested, constDistAllowed, verbose);
1458
// Now we create the Invariant rate distribution:
1459
rDist = new InvariantMixedDiscreteDistribution(nestedDistribution, 0.1, 0.000001); // , "Invariant.");
1461
// Then we update the parameter set:
1462
for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
1464
unparsedParameterValues["InvariantMixed.dist_" + it->first] = it->second;
1466
if (args.find("p") != args.end())
1467
unparsedParameterValues["InvariantMixed.p"] = args["p"];
1469
else if (distName == "Uniform")
1471
if (!constDistAllowed) throw Exception("You can't use a constant distribution here!");
1472
rDist = new ConstantDistribution(1., true);
1474
else if (distName == "Gamma")
1476
if (args.find("n") == args.end())
1477
throw Exception("Missing argument 'n' (number of classes) in Gamma distribution");
1478
unsigned int nbClasses = TextTools::to<unsigned int>(args["n"]);
1479
rDist = new GammaDiscreteDistribution(nbClasses, 1., 1.); // , "Gamma.");
1480
rDist->aliasParameters("alpha", "beta");
1481
if (args.find("alpha") != args.end())
1482
unparsedParameterValues["Gamma.alpha"] = args["alpha"];
1486
throw Exception("Unknown distribution: " + distName + ".");
1490
ApplicationTools::displayResult("Rate distribution", distName);
1491
ApplicationTools::displayResult("Number of classes", TextTools::toString(rDist->getNumberOfCategories()));
1497
/******************************************************************************/
1499
DiscreteDistribution* PhylogeneticsApplicationTools::getDistributionDefaultInstance(
1500
const std::string& distDescription,
1501
std::map<std::string, std::string>& unparsedParameterValues,
1506
DiscreteDistribution* rDist = 0;
1507
map<string, string> args;
1508
KeyvalTools::parseProcedure(distDescription, distName, args);
1510
if (distName == "InvariantMixed")
1512
// We have to parse the nested distribution first:
1513
string nestedDistDescription = args["dist"];
1514
if (TextTools::isEmpty(nestedDistDescription))
1515
throw Exception("PhylogeneticsApplicationTools::getDistributionDefaultInstance. Missing argument 'dist' for distribution 'Invariant'.");
1517
ApplicationTools::displayResult("Invariant Mixed distribution", distName );
1518
map<string, string> unparsedParameterValuesNested;
1519
DiscreteDistribution* nestedDistribution = getDistributionDefaultInstance(nestedDistDescription,
1520
unparsedParameterValuesNested,
1523
// Now we create the Invariant rate distribution:
1524
rDist = new InvariantMixedDiscreteDistribution(nestedDistribution, 0.1, 0.000001);
1526
// Then we update the parameter set:
1527
for (map<string, string>::iterator it = unparsedParameterValuesNested.begin();
1528
it != unparsedParameterValuesNested.end(); it++)
1530
unparsedParameterValues["InvarianMixed.dist_" + it->first] = it->second;
1532
if (args.find("p") != args.end())
1533
unparsedParameterValues["InvariantMixed.p"] = args["p"];
1535
else if (distName == "Constant")
1537
if (args.find("value") == args.end())
1538
throw Exception("Missing argument 'value' in Constant distribution");
1539
rDist = new ConstantDistribution(TextTools::to<double>(args["value"]));
1540
unparsedParameterValues["Constant.value"] = args["value"];
1542
else if (distName == "Simple")
1544
if (args.find("values") == args.end())
1545
throw Exception("Missing argument 'values' in Simple distribution");
1546
if (args.find("probas") == args.end())
1547
throw Exception("Missing argument 'probas' in Simple distribution");
1548
vector<double> probas, values;
1550
string rf = args["values"];
1551
StringTokenizer strtok(rf.substr(1, rf.length() - 2), ",");
1552
while (strtok.hasMoreToken())
1553
values.push_back(TextTools::toDouble(strtok.nextToken()));
1555
rf = args["probas"];
1556
StringTokenizer strtok2(rf.substr(1, rf.length() - 2), ",");
1557
while (strtok2.hasMoreToken())
1558
probas.push_back(TextTools::toDouble(strtok2.nextToken()));
1560
rDist = new SimpleDiscreteDistribution(values, probas);
1561
vector<string> v = rDist->getParameters().getParameterNames();
1563
for (unsigned int i = 0; i < v.size(); i++)
1565
unparsedParameterValues[v[i]] = TextTools::toString(rDist->getParameterValue(rDist->getParameterNameWithoutNamespace(v[i])));
1568
else if (distName == "Mixture")
1570
if (args.find("probas") == args.end())
1571
throw Exception("Missing argument 'probas' in Mixture distribution");
1572
vector<double> probas;
1573
vector<DiscreteDistribution*> v_pdd;
1574
DiscreteDistribution* pdd;
1575
string rf = args["probas"];
1576
StringTokenizer strtok2(rf.substr(1, rf.length() - 2), ",");
1577
while (strtok2.hasMoreToken())
1578
probas.push_back(TextTools::toDouble(strtok2.nextToken()));
1580
vector<string> v_nestedDistrDescr;
1582
unsigned int nbd = 0;
1583
while (args.find("distribution" + TextTools::toString(++nbd)) != args.end())
1584
v_nestedDistrDescr.push_back(args["distribution" + TextTools::toString(nbd)]);
1586
if (v_nestedDistrDescr.size() != probas.size())
1587
throw Exception("Number of distributions do not fit the number of probabilities");
1588
map<string, string> unparsedParameterValuesNested;
1590
for (unsigned i = 0; i < v_nestedDistrDescr.size(); i++)
1592
unparsedParameterValuesNested.clear();
1593
pdd = getDistributionDefaultInstance(v_nestedDistrDescr[i], unparsedParameterValuesNested, false);
1595
for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
1597
unparsedParameterValues[distName + "." + TextTools::toString(i + 1) + "_" + it->first] = it->second;
1599
v_pdd.push_back(pdd);
1601
rDist = new MixtureOfDiscreteDistributions(v_pdd, probas);
1605
if (args.find("n") == args.end())
1606
throw Exception("Missing argument 'n' (number of classes) in " + distName
1608
unsigned int nbClasses = TextTools::to<unsigned int>(args["n"]);
1610
if (distName == "Gamma")
1612
rDist = new GammaDiscreteDistribution(nbClasses, 1, 1);
1613
if (args.find("alpha") != args.end())
1614
unparsedParameterValues["Gamma.alpha"] = args["alpha"];
1615
if (args.find("beta") != args.end())
1616
unparsedParameterValues["Gamma.beta"] = args["beta"];
1618
else if (distName == "Gaussian")
1620
rDist = new GaussianDiscreteDistribution(nbClasses, 1, 1);
1621
if (args.find("mu") != args.end())
1622
unparsedParameterValues["Gaussian.mu"] = args["mu"];
1623
if (args.find("sigma") != args.end())
1624
unparsedParameterValues["Gaussian.sigma"] = args["sigma"];
1626
else if (distName == "Beta")
1628
rDist = new BetaDiscreteDistribution(nbClasses, 1, 1);
1629
if (args.find("alpha") != args.end())
1630
unparsedParameterValues["Beta.alpha"] = args["alpha"];
1631
if (args.find("beta") != args.end())
1632
unparsedParameterValues["Beta.beta"] = args["beta"];
1634
else if (distName == "Exponential")
1636
rDist = new ExponentialDiscreteDistribution(nbClasses, 1);
1637
if (args.find("lambda") != args.end())
1638
unparsedParameterValues["Exponential.lambda"] = args["lambda"];
1639
if (args.find("median") != args.end())
1640
rDist->setMedian(true);
1642
else if (distName == "TruncExponential")
1644
rDist = new TruncatedExponentialDiscreteDistribution(nbClasses, 1, 0);
1646
if (args.find("median") != args.end())
1647
rDist->setMedian(true);
1648
if (args.find("lambda") != args.end())
1649
unparsedParameterValues["TruncExponential.lambda"] = args["lambda"];
1650
if (args.find("tp") != args.end())
1651
unparsedParameterValues["TruncExponential.tp"] = args["tp"];
1653
else if (distName == "Uniform")
1655
if (args.find("begin") == args.end())
1656
throw Exception("Missing argument 'begin' in Uniform distribution");
1657
if (args.find("end") == args.end())
1658
throw Exception("Missing argument 'end' in Uniform distribution");
1659
rDist = new UniformDiscreteDistribution(nbClasses, TextTools::to<double>(args["begin"]),
1660
TextTools::to<double>(args["end"]));
1664
throw Exception("Unknown distribution: " + distName + ".");
1669
ApplicationTools::displayResult("Distribution", distName);
1670
ApplicationTools::displayResult("Number of classes", TextTools::toString(rDist->getNumberOfCategories()));
1676
/******************************************************************************/
1678
MultipleDiscreteDistribution* PhylogeneticsApplicationTools::getMultipleDistributionDefaultInstance(
1679
const std::string& distDescription,
1680
std::map<std::string, std::string>& unparsedParameterValues,
1684
MultipleDiscreteDistribution* pMDD = 0;
1685
map<string, string> args;
1686
KeyvalTools::parseProcedure(distDescription, distName, args);
1688
if (distName == "Dirichlet")
1690
if (args.find("classes") == args.end())
1691
throw Exception("Missing argument 'classes' (vector of number of classes) in " + distName
1693
if (args.find("alphas") == args.end())
1694
throw Exception("Missing argument 'alphas' (vector of Dirichlet shape parameters) in Dirichlet distribution");
1695
vector<double> alphas;
1696
vector<unsigned int> classes;
1698
string rf = args["alphas"];
1699
StringTokenizer strtok(rf.substr(1, rf.length() - 2), ",");
1700
while (strtok.hasMoreToken())
1701
alphas.push_back(TextTools::toDouble(strtok.nextToken()));
1703
rf = args["classes"];
1704
StringTokenizer strtok2(rf.substr(1, rf.length() - 2), ",");
1705
while (strtok2.hasMoreToken())
1706
classes.push_back(TextTools::toInt(strtok2.nextToken()));
1708
pMDD = new DirichletDiscreteDistribution(classes, alphas);
1709
vector<string> v = pMDD->getParameters().getParameterNames();
1711
for (unsigned int i = 0; i < v.size(); i++)
1713
unparsedParameterValues[v[i]] = TextTools::toString(pMDD->getParameterValue(pMDD->getParameterNameWithoutNamespace(v[i])));
1717
throw Exception("Unknown multiple distribution name: " + distName);
1722
/******************************************************************************/
1724
void PhylogeneticsApplicationTools::setRateDistributionParametersInitialValues(
1725
DiscreteDistribution* rDist,
1726
map<string, string>& unparsedParameterValues,
1727
bool verbose) throw (Exception)
1729
ParameterList pl = rDist->getIndependentParameters();
1730
for (unsigned int i = 0; i < pl.size(); i++)
1732
AutoParameter ap(pl[i]);
1733
ap.setMessageHandler(ApplicationTools::warning);
1734
pl.setParameter(i, ap);
1737
for (unsigned int i = 0; i < pl.size(); i++)
1739
const string pName = pl[i].getName();
1740
double value = ApplicationTools::getDoubleParameter(pName, unparsedParameterValues, pl[i].getValue());
1741
pl[i].setValue(value);
1743
ApplicationTools::displayResult("Parameter found", pName + "=" + TextTools::toString(pl[i].getValue()));
1745
rDist->matchParametersValues(pl);
1748
for (unsigned int c = 0; c < rDist->getNumberOfCategories(); c++)
1750
ApplicationTools::displayResult("- Category " + TextTools::toString(c)
1751
+ " (Pr = " + TextTools::toString(rDist->getProbability(c)) + ") rate", TextTools::toString(rDist->getCategory(c)));
1757
/******************************************************************************/
1759
DiscreteDistribution* PhylogeneticsApplicationTools::getRateDistribution(
1760
map<string, string>& params,
1761
const string& suffix,
1762
bool suffixIsOptional,
1763
bool verbose) throw (Exception)
1765
string distDescription = ApplicationTools::getStringParameter("rate_distribution", params, "Uniform()", suffix, suffixIsOptional);
1766
map<string, string> unparsedParameterValues;
1767
DiscreteDistribution* rDist = getRateDistributionDefaultInstance(distDescription, unparsedParameterValues, verbose);
1768
setRateDistributionParametersInitialValues(rDist, unparsedParameterValues, verbose);
1772
/******************************************************************************/
1774
TreeLikelihood* PhylogeneticsApplicationTools::optimizeParameters(
1776
const ParameterList& parameters,
1777
std::map<std::string, std::string>& params,
1778
const std::string& suffix,
1779
bool suffixIsOptional,
1783
string optimization = ApplicationTools::getStringParameter("optimization", params, "FullD(derivatives=Newton)", suffix, suffixIsOptional, false);
1784
if (optimization == "None") return tl;
1786
map<string, string> optArgs;
1787
KeyvalTools::parseProcedure(optimization, optName, optArgs);
1789
unsigned int optVerbose = ApplicationTools::getParameter<unsigned int>("optimization.verbose", params, 2, suffix, suffixIsOptional);
1791
string mhPath = ApplicationTools::getAFilePath("optimization.message_handler", params, false, false, suffix, suffixIsOptional);
1792
OutputStream* messageHandler =
1793
(mhPath == "none") ? 0 :
1794
(mhPath == "std") ? ApplicationTools::message :
1795
new StlOutputStream(auto_ptr<ostream>(new ofstream(mhPath.c_str(), ios::out)));
1796
if (verbose) ApplicationTools::displayResult("Message handler", mhPath);
1798
string prPath = ApplicationTools::getAFilePath("optimization.profiler", params, false, false, suffix, suffixIsOptional);
1799
OutputStream* profiler =
1800
(prPath == "none") ? 0 :
1801
(prPath == "std") ? ApplicationTools::message :
1802
new StlOutputStream(auto_ptr<ostream>(new ofstream(prPath.c_str(), ios::out)));
1803
if (profiler) profiler->setPrecision(20);
1804
if (verbose) ApplicationTools::displayResult("Profiler", prPath);
1806
bool scaleFirst = ApplicationTools::getBooleanParameter("optimization.scale_first", params, false, suffix, suffixIsOptional, false);
1809
// We scale the tree before optimizing each branch length separately:
1810
if (verbose) ApplicationTools::displayMessage("Scaling the tree before optimizing each branch length separately.");
1811
double tolerance = ApplicationTools::getDoubleParameter("optimization.scale_first.tolerance", params, .0001, suffix, suffixIsOptional, true);
1812
if (verbose) ApplicationTools::displayResult("Scaling tolerance", TextTools::toString(tolerance));
1813
int nbEvalMax = ApplicationTools::getIntParameter("optimization.scale_first.max_number_f_eval", params, 1000000, suffix, suffixIsOptional, true);
1814
if (verbose) ApplicationTools::displayResult("Scaling max # f eval", TextTools::toString(nbEvalMax));
1815
OptimizationTools::optimizeTreeScale(
1822
ApplicationTools::displayResult("New tree likelihood", -tl->getValue());
1826
// Should I ignore some parameters?
1827
ParameterList parametersToEstimate = parameters;
1828
string paramListDesc = ApplicationTools::getStringParameter("optimization.ignore_parameter", params, "", suffix, suffixIsOptional, false);
1829
StringTokenizer st(paramListDesc, ",");
1830
while (st.hasMoreToken())
1834
string param = st.nextToken();
1835
if (param == "BrLen")
1837
vector<string> vs = tl->getBranchLengthsParameters().getParameterNames();
1838
parametersToEstimate.deleteParameters(vs);
1840
ApplicationTools::displayResult("Parameter ignored", string("Branch lengths"));
1842
else if (param == "Ancient")
1844
NonHomogeneousTreeLikelihood* nhtl = dynamic_cast<NonHomogeneousTreeLikelihood*>(tl);
1845
if (!nhtl) ApplicationTools::displayWarning("The 'Ancient' parameters do not exist in homogeneous models, and will be ignored.");
1848
vector<string> vs = nhtl->getRootFrequenciesParameters().getParameterNames();
1849
parametersToEstimate.deleteParameters(vs);
1852
ApplicationTools::displayResult("Parameter ignored", string("Root frequencies"));
1854
else if ((i = param.find("*")) != string::npos)
1856
string pref = param.substr(0, i);
1858
for (unsigned int j = 0; j < parametersToEstimate.size(); j++)
1860
if (parametersToEstimate[j].getName().find(pref) == 0)
1861
vs.push_back(parametersToEstimate[j].getName());
1863
for (vector<string>::iterator it = vs.begin(); it != vs.end(); it++)
1865
parametersToEstimate.deleteParameter(*it);
1867
ApplicationTools::displayResult("Parameter ignored", *it);
1872
parametersToEstimate.deleteParameter(param);
1874
ApplicationTools::displayResult("Parameter ignored", param);
1877
catch (ParameterNotFoundException& pnfe)
1879
ApplicationTools::displayWarning("Parameter '" + pnfe.getParameter() + "' not found, and so can't be ignored!");
1883
unsigned int nbEvalMax = ApplicationTools::getParameter<unsigned int>("optimization.max_number_f_eval", params, 1000000, suffix, suffixIsOptional);
1884
if (verbose) ApplicationTools::displayResult("Max # ML evaluations", TextTools::toString(nbEvalMax));
1886
double tolerance = ApplicationTools::getDoubleParameter("optimization.tolerance", params, .000001, suffix, suffixIsOptional);
1887
if (verbose) ApplicationTools::displayResult("Tolerance", TextTools::toString(tolerance));
1889
//Backing up or restoring?
1890
auto_ptr<BackupListener> backupListener;
1891
string backupFile = ApplicationTools::getAFilePath("optimization.backup.file", params, false, false);
1892
if (backupFile != "none") {
1893
ApplicationTools::displayResult("Parameters will be backup to", backupFile);
1894
backupListener.reset(new BackupListener(backupFile));
1895
if (FileTools::fileExists(backupFile)) {
1896
ApplicationTools::displayMessage("A backup file was found! Try to restore parameters from previous run...");
1897
ifstream bck(backupFile.c_str(), ios::in);
1898
vector<string> lines = FileTools::putStreamIntoVectorOfStrings(bck);
1899
double fval = TextTools::toDouble(lines[0].substr(5));
1900
ParameterList pl = tl->getParameters();
1901
for (size_t l = 1; l < lines.size(); ++l) {
1902
if (!TextTools::isEmpty(lines[l])) {
1903
StringTokenizer stp(lines[l], "=");
1904
if (stp.numberOfRemainingTokens() != 2) {
1905
cerr << "Corrupted backup file!!!" << endl;
1906
cerr << "at line " << l << ": " << lines[l] << endl;
1908
string pname = stp.nextToken();
1909
string pvalue = stp.nextToken();
1910
unsigned int p = pl.whichParameterHasName(pname);
1911
pl.setParameter(p, AutoParameter(pl[p]));
1912
pl[p].setValue(TextTools::toDouble(pvalue));
1916
tl->setParameters(pl);
1917
if (abs(tl->getValue() - fval) > 0.000001)
1918
throw Exception("Incorrect likelihood value after restoring, from backup file. Remove backup file and start from scratch :s");
1919
ApplicationTools::displayResult("Restoring log-likelihood", -fval);
1924
bool optimizeTopo = ApplicationTools::getBooleanParameter("optimization.topology", params, false, suffix, suffixIsOptional, false);
1925
if (verbose) ApplicationTools::displayResult("Optimize topology", optimizeTopo ? "yes" : "no");
1926
string nniMethod = ApplicationTools::getStringParameter("optimization.topology.algorithm_nni.method", params, "phyml", suffix, suffixIsOptional, false);
1928
if (nniMethod == "fast")
1930
nniAlgo = NNITopologySearch::FAST;
1932
else if (nniMethod == "better")
1934
nniAlgo = NNITopologySearch::BETTER;
1936
else if (nniMethod == "phyml")
1938
nniAlgo = NNITopologySearch::PHYML;
1940
else throw Exception("Unknown NNI algorithm: '" + nniMethod + "'.");
1943
string order = ApplicationTools::getStringParameter("derivatives", optArgs, "Newton", "", true, false);
1944
string optMethodDeriv;
1945
if (order == "Gradient")
1947
optMethodDeriv = OptimizationTools::OPTIMIZATION_GRADIENT;
1949
else if (order == "Newton")
1951
optMethodDeriv = OptimizationTools::OPTIMIZATION_NEWTON;
1953
else if (order == "BFGS")
1955
optMethodDeriv = OptimizationTools::OPTIMIZATION_BFGS;
1957
else throw Exception("Unknown derivatives algorithm: '" + order + "'.");
1958
if (verbose) ApplicationTools::displayResult("Optimization method", optName);
1959
if (verbose) ApplicationTools::displayResult("Algorithm used for derivable parameters", order);
1961
//See if we should reparametrize:
1962
bool reparam = ApplicationTools::getBooleanParameter("optimization.reparametrization", params, false);
1963
if (verbose) ApplicationTools::displayResult("Reparametrization", (reparam ? "yes" : "no"));
1966
if ((optName == "D-Brent") || (optName == "D-BFGS"))
1968
// Uses Newton-Brent method or Newton-BFGS method
1969
string optMethodModel;
1970
if (optName == "D-Brent")
1971
optMethodModel = OptimizationTools::OPTIMIZATION_BRENT;
1973
optMethodModel = OptimizationTools::OPTIMIZATION_BFGS;
1975
unsigned int nstep = ApplicationTools::getParameter<unsigned int>("nstep", optArgs, 1, "", true, false);
1979
bool optNumFirst = ApplicationTools::getBooleanParameter("optimization.topology.numfirst", params, true, suffix, suffixIsOptional, false);
1980
unsigned int topoNbStep = ApplicationTools::getParameter<unsigned int>("optimization.topology.nstep", params, 1, "", true, false);
1981
double tolBefore = ApplicationTools::getDoubleParameter("optimization.topology.tolerance.before", params, 100, suffix, suffixIsOptional);
1982
double tolDuring = ApplicationTools::getDoubleParameter("optimization.topology.tolerance.during", params, 100, suffix, suffixIsOptional);
1983
tl = OptimizationTools::optimizeTreeNNI(
1984
dynamic_cast<NNIHomogeneousTreeLikelihood*>(tl), parametersToEstimate,
1985
optNumFirst, tolBefore, tolDuring, nbEvalMax, topoNbStep, messageHandler, profiler,
1986
reparam, optVerbose, optMethodDeriv, nstep, nniAlgo);
1989
if (verbose && nstep > 1) ApplicationTools::displayResult("# of precision steps", TextTools::toString(nstep));
1990
parametersToEstimate.matchParametersValues(tl->getParameters());
1991
n = OptimizationTools::optimizeNumericalParameters(
1992
dynamic_cast<DiscreteRatesAcrossSitesTreeLikelihood*>(tl), parametersToEstimate,
1993
backupListener.get(), nstep, tolerance, nbEvalMax, messageHandler, profiler, reparam, optVerbose, optMethodDeriv, optMethodModel);
1995
else if (optName == "FullD")
1997
// Uses Newton-raphson algorithm with numerical derivatives when required.
2001
bool optNumFirst = ApplicationTools::getBooleanParameter("optimization.topology.numfirst", params, true, suffix, suffixIsOptional, false);
2002
unsigned int topoNbStep = ApplicationTools::getParameter<unsigned int>("optimization.topology.nstep", params, 1, "", true, false);
2003
double tolBefore = ApplicationTools::getDoubleParameter("optimization.topology.tolerance.before", params, 100, suffix, suffixIsOptional);
2004
double tolDuring = ApplicationTools::getDoubleParameter("optimization.topology.tolerance.during", params, 100, suffix, suffixIsOptional);
2005
tl = OptimizationTools::optimizeTreeNNI2(
2006
dynamic_cast<NNIHomogeneousTreeLikelihood*>(tl), parametersToEstimate,
2007
optNumFirst, tolBefore, tolDuring, nbEvalMax, topoNbStep, messageHandler, profiler,
2008
reparam, optVerbose, optMethodDeriv, nniAlgo);
2011
parametersToEstimate.matchParametersValues(tl->getParameters());
2012
n = OptimizationTools::optimizeNumericalParameters2(
2013
dynamic_cast<DiscreteRatesAcrossSitesTreeLikelihood*>(tl), parametersToEstimate,
2014
backupListener.get(), tolerance, nbEvalMax, messageHandler, profiler, reparam, optVerbose, optMethodDeriv);
2016
else throw Exception("Unknown optimization method: " + optName);
2018
string finalMethod = ApplicationTools::getStringParameter("optimization.final", params, "none", suffix, suffixIsOptional, true);
2019
Optimizer* finalOptimizer = 0;
2020
if (finalMethod == "none")
2022
else if (finalMethod == "simplex")
2024
finalOptimizer = new DownhillSimplexMethod(tl);
2026
else if (finalMethod == "powell")
2028
finalOptimizer = new PowellMultiDimensions(tl);
2030
else throw Exception("Unknown final optimization method: " + finalMethod);
2034
parametersToEstimate.matchParametersValues(tl->getParameters());
2035
if (verbose) ApplicationTools::displayResult("Final optimization step", finalMethod);
2036
finalOptimizer->setProfiler(profiler);
2037
finalOptimizer->setMessageHandler(messageHandler);
2038
finalOptimizer->setMaximumNumberOfEvaluations(nbEvalMax);
2039
finalOptimizer->getStopCondition()->setTolerance(tolerance);
2040
finalOptimizer->setVerbose(verbose);
2041
finalOptimizer->setConstraintPolicy(AutoParameter::CONSTRAINTS_AUTO);
2042
finalOptimizer->init(parametersToEstimate);
2043
finalOptimizer->optimize();
2044
n += finalOptimizer->getNumberOfEvaluations();
2045
delete finalOptimizer;
2048
if (verbose) ApplicationTools::displayResult("Performed", TextTools::toString(n) + " function evaluations.");
2049
if (backupFile != "none") {
2050
remove(backupFile.c_str());
2055
/******************************************************************************/
2057
void PhylogeneticsApplicationTools::optimizeParameters(
2058
DiscreteRatesAcrossSitesClockTreeLikelihood* tl,
2059
const ParameterList& parameters,
2060
map<string, string>& params,
2061
const string& suffix,
2062
bool suffixIsOptional,
2066
string optimization = ApplicationTools::getStringParameter("optimization", params, "FullD(derivatives=Newton)", suffix, suffixIsOptional, false);
2067
if (optimization == "None") return;
2069
map<string, string> optArgs;
2070
KeyvalTools::parseProcedure(optimization, optName, optArgs);
2072
unsigned int optVerbose = ApplicationTools::getParameter<unsigned int>("optimization.verbose", params, 2, suffix, suffixIsOptional);
2074
string mhPath = ApplicationTools::getAFilePath("optimization.message_handler", params, false, false, suffix, suffixIsOptional);
2075
OutputStream* messageHandler =
2076
(mhPath == "none") ? 0 :
2077
(mhPath == "std") ? ApplicationTools::message :
2078
new StlOutputStream(auto_ptr<ostream>(new ofstream(mhPath.c_str(), ios::out)));
2079
if (verbose) ApplicationTools::displayResult("Message handler", mhPath);
2081
string prPath = ApplicationTools::getAFilePath("optimization.profiler", params, false, false, suffix, suffixIsOptional);
2082
OutputStream* profiler =
2083
(prPath == "none") ? 0 :
2084
(prPath == "std") ? ApplicationTools::message :
2085
new StlOutputStream(auto_ptr<ostream>(new ofstream(prPath.c_str(), ios::out)));
2086
if (profiler) profiler->setPrecision(20);
2087
if (verbose) ApplicationTools::displayResult("Profiler", prPath);
2089
ParameterList parametersToEstimate = parameters;
2091
// Should I ignore some parameters?
2092
string paramListDesc = ApplicationTools::getStringParameter("optimization.ignore_parameter", params, "", suffix, suffixIsOptional, false);
2093
StringTokenizer st(paramListDesc, ",");
2094
while (st.hasMoreToken())
2098
string param = st.nextToken();
2099
if (param == "BrLen")
2101
vector<string> vs = tl->getBranchLengthsParameters().getParameterNames();
2102
parametersToEstimate.deleteParameters(vs);
2104
ApplicationTools::displayResult("Parameter ignored", string("Branch lengths"));
2106
else if (param == "Ancient")
2108
NonHomogeneousTreeLikelihood* nhtl = dynamic_cast<NonHomogeneousTreeLikelihood*>(tl);
2109
if (!nhtl) ApplicationTools::displayWarning("The 'Ancient' parameters do not exist in homogeneous models, and will be ignored.");
2112
vector<string> vs = nhtl->getRootFrequenciesParameters().getParameterNames();
2113
parametersToEstimate.deleteParameters(vs);
2116
ApplicationTools::displayResult("Parameter ignored", string("Root frequencies"));
2120
parametersToEstimate.deleteParameter(param);
2122
ApplicationTools::displayResult("Parameter ignored", param);
2125
catch (ParameterNotFoundException& pnfe)
2127
ApplicationTools::displayError("Parameter '" + pnfe.getParameter() + "' not found, and so can't be ignored!");
2131
unsigned int nbEvalMax = ApplicationTools::getParameter<unsigned int>("optimization.max_number_f_eval", params, 1000000, suffix, suffixIsOptional);
2132
if (verbose) ApplicationTools::displayResult("Max # ML evaluations", TextTools::toString(nbEvalMax));
2134
double tolerance = ApplicationTools::getDoubleParameter("optimization.tolerance", params, .000001, suffix, suffixIsOptional);
2135
if (verbose) ApplicationTools::displayResult("Tolerance", TextTools::toString(tolerance));
2137
string order = ApplicationTools::getStringParameter("derivatives", optArgs, "Gradient", "", true, false);
2138
string optMethod, derMethod;
2139
if (order == "Gradient")
2141
optMethod = OptimizationTools::OPTIMIZATION_GRADIENT;
2143
else if (order == "Newton")
2145
optMethod = OptimizationTools::OPTIMIZATION_NEWTON;
2147
else throw Exception("Option '" + order + "' is not known for 'optimization.method.derivatives'.");
2148
if (verbose) ApplicationTools::displayResult("Optimization method", optName);
2149
if (verbose) ApplicationTools::displayResult("Algorithm used for derivable parameters", order);
2151
//Backing up or restoring?
2152
auto_ptr<BackupListener> backupListener;
2153
string backupFile = ApplicationTools::getAFilePath("optimization.backup.file", params, false, false);
2154
if (backupFile != "none") {
2155
ApplicationTools::displayResult("Parameters will be backup to", backupFile);
2156
backupListener.reset(new BackupListener(backupFile));
2157
if (FileTools::fileExists(backupFile)) {
2158
ApplicationTools::displayMessage("A backup file was found! Try to restore parameters from previous run...");
2159
ifstream bck(backupFile.c_str(), ios::in);
2160
vector<string> lines = FileTools::putStreamIntoVectorOfStrings(bck);
2161
double fval = TextTools::toDouble(lines[0].substr(5));
2162
ParameterList pl = tl->getParameters();
2163
for (size_t l = 1; l < lines.size(); ++l) {
2164
if (!TextTools::isEmpty(lines[l])) {
2165
StringTokenizer stp(lines[l], "=");
2166
if (stp.numberOfRemainingTokens() != 2) {
2167
cerr << "Corrupted backup file!!!" << endl;
2168
cerr << "at line " << l << ": " << lines[l] << endl;
2170
string pname = stp.nextToken();
2171
string pvalue = stp.nextToken();
2172
unsigned int p = pl.whichParameterHasName(pname);
2173
pl.setParameter(p, AutoParameter(pl[p]));
2174
pl[p].setValue(TextTools::toDouble(pvalue));
2178
tl->setParameters(pl);
2179
if (abs(tl->getValue() - fval) > 0.000001)
2180
throw Exception("Incorrect likelihood value after restoring, from backup file. Remove backup file and start from scratch :s");
2181
ApplicationTools::displayResult("Restoring log-likelihood", -fval);
2186
if (optName == "D-Brent")
2188
// Uses Newton-Brent method:
2189
unsigned int nstep = ApplicationTools::getParameter<unsigned int>("nstep", optArgs, 1, "", true, false);
2190
if (verbose && nstep > 1) ApplicationTools::displayResult("# of precision steps", TextTools::toString(nstep));
2191
n = OptimizationTools::optimizeNumericalParametersWithGlobalClock(
2193
parametersToEstimate,
2194
backupListener.get(),
2203
else if (optName == "FullD")
2205
// Uses Newton-raphson alogrithm with numerical derivatives when required.
2206
n = OptimizationTools::optimizeNumericalParametersWithGlobalClock2(
2208
parametersToEstimate,
2209
backupListener.get(),
2217
else throw Exception("Unknown optimization method: " + optName);
2219
string finalMethod = ApplicationTools::getStringParameter("optimization.final", params, "none", suffix, suffixIsOptional, false);
2220
Optimizer* finalOptimizer = 0;
2221
if (finalMethod == "none")
2223
else if (finalMethod == "simplex")
2225
finalOptimizer = new DownhillSimplexMethod(tl);
2227
else if (finalMethod == "powell")
2229
finalOptimizer = new PowellMultiDimensions(tl);
2231
else throw Exception("Unknown final optimization method: " + finalMethod);
2235
parametersToEstimate.matchParametersValues(tl->getParameters());
2236
ApplicationTools::displayResult("Final optimization step", finalMethod);
2237
finalOptimizer->setProfiler(profiler);
2238
finalOptimizer->setMessageHandler(messageHandler);
2239
finalOptimizer->setMaximumNumberOfEvaluations(nbEvalMax);
2240
finalOptimizer->getStopCondition()->setTolerance(tolerance);
2241
finalOptimizer->setVerbose(verbose);
2242
finalOptimizer->setConstraintPolicy(AutoParameter::CONSTRAINTS_AUTO);
2243
finalOptimizer->init(parametersToEstimate);
2244
finalOptimizer->optimize();
2245
n += finalOptimizer->getNumberOfEvaluations();
2246
delete finalOptimizer;
2250
ApplicationTools::displayResult("Performed", TextTools::toString(n) + " function evaluations.");
2251
if (backupFile != "none") {
2252
remove(backupFile.c_str());
2256
/******************************************************************************/
2258
void PhylogeneticsApplicationTools::checkEstimatedParameters(const ParameterList& pl)
2260
for (unsigned int i = 0; i < pl.size(); ++i)
2262
const Constraint* constraint = pl[i].getConstraint();
2265
double value = pl[i].getValue();
2266
if (!constraint->isCorrect(value - 1e-6) || !constraint->isCorrect(value + 1e-6))
2268
ApplicationTools::displayWarning("This parameter has a value close to the boundary: " + pl[i].getName() + "(" + TextTools::toString(value) + ").");
2274
/******************************************************************************/
2276
void PhylogeneticsApplicationTools::writeTree(
2277
const TreeTemplate<Node>& tree,
2278
map<string, string>& params,
2279
const string& prefix,
2280
const string& suffix,
2281
bool suffixIsOptional,
2283
bool checkOnly) throw (Exception)
2285
string format = ApplicationTools::getStringParameter(prefix + "tree.format", params, "Newick", suffix, suffixIsOptional, false);
2286
string file = ApplicationTools::getAFilePath(prefix + "tree.file", params, true, false, suffix, suffixIsOptional);
2288
if (format == "Newick")
2289
treeWriter = new Newick();
2290
else if (format == "Nexus")
2291
treeWriter = new NexusIOTree();
2292
else if (format == "NHX")
2293
treeWriter = new Nhx(false);
2294
else throw Exception("Unknown format for tree writing: " + format);
2296
treeWriter->write(tree, file, true);
2298
if (verbose) ApplicationTools::displayResult("Wrote tree to file ", file);
2301
/******************************************************************************/
2303
void PhylogeneticsApplicationTools::writeTrees(
2304
const vector<Tree*>& trees,
2305
map<string, string>& params,
2306
const string& prefix,
2307
const string& suffix,
2308
bool suffixIsOptional,
2310
bool checkOnly) throw (Exception)
2312
string format = ApplicationTools::getStringParameter(prefix + "trees.format", params, "Newick", suffix, suffixIsOptional, false);
2313
string file = ApplicationTools::getAFilePath(prefix + "trees.file", params, true, false, suffix, suffixIsOptional);
2314
OMultiTree* treeWriter;
2315
if (format == "Newick")
2316
treeWriter = new Newick();
2317
else if (format == "Nexus")
2318
treeWriter = new NexusIOTree();
2319
else if (format == "NHX")
2320
treeWriter = new Nhx();
2321
else throw Exception("Unknow format for tree writing: " + format);
2323
treeWriter->write(trees, file, true);
2325
if (verbose) ApplicationTools::displayResult("Wrote trees to file ", file);
2328
/******************************************************************************/
2330
void PhylogeneticsApplicationTools::describeParameters_(const ParameterAliasable* parametrizable, OutputStream& out, map<string, string>& globalAliases, const vector<string>& names, bool printLocalAliases)
2332
ParameterList pl = parametrizable->getIndependentParameters().subList(names);
2333
unsigned int p = out.getPrecision();
2334
out.setPrecision(12);
2335
for (unsigned int i = 0; i < pl.size(); i++)
2337
if (i > 0) out << ", ";
2338
string pname = parametrizable->getParameterNameWithoutNamespace(pl[i].getName());
2340
// Check for global aliases:
2341
if (globalAliases.find(pl[i].getName()) == globalAliases.end())
2343
(out << pname << "=").enableScientificNotation(false) << pl[i].getValue();
2346
out << pname << "=" << globalAliases[pl[i].getName()];
2348
// Now check for local aliases:
2349
if (printLocalAliases)
2351
vector<string> aliases = parametrizable->getAlias(pname);
2352
for (unsigned int j = 0; aliases.size(); j++)
2354
out << ", " << aliases[j] << "=" << pname;
2358
out.setPrecision(p);
2361
/******************************************************************************/
2363
void PhylogeneticsApplicationTools::describeSubstitutionModel_(const SubstitutionModel* model, OutputStream& out, map<string, string>& globalAliases)
2365
//Is it a protein user defined model?
2366
const UserProteinSubstitutionModel* userModel = dynamic_cast<const UserProteinSubstitutionModel*>(model);
2370
vector<string> pnames = userModel->getParameters().getParameterNames();
2371
if (pnames.size() > 0)
2373
out << "(file=" << userModel->getPath();
2374
for (unsigned int i = 0; i < pnames.size(); i++)
2376
out << ", " << pnames[i] << "=" << userModel->getParameterValue(pnames[i]);
2383
//Is it a markov-modulated model?
2384
const MarkovModulatedSubstitutionModel* mmModel = dynamic_cast<const MarkovModulatedSubstitutionModel*>(model);
2387
out << mmModel->getName() << "(model=";
2388
const SubstitutionModel* nestedModel = mmModel->getNestedModel();
2389
describeSubstitutionModel_(nestedModel, out, globalAliases);
2391
vector<string> names;
2392
const G2001* gModel = dynamic_cast<const G2001*>(model);
2395
// Also print distribution here:
2397
const DiscreteDistribution* nestedDist = gModel->getRateDistribution();
2398
describeDiscreteDistribution_(nestedDist, out, globalAliases);
2400
names.push_back(gModel->getParameter("nu").getName());
2402
const TS98* tsModel = dynamic_cast<const TS98*>(model);
2405
names.push_back(tsModel->getParameter("s1").getName());
2406
names.push_back(tsModel->getParameter("s2").getName());
2408
describeParameters_(mmModel, out, globalAliases, names);
2413
//Is it a model with gaps?
2414
const RE08* reModel = dynamic_cast<const RE08*>(model);
2417
out << reModel->getName() << "(model=";
2418
const SubstitutionModel* nestedModel = reModel->getNestedModel();
2419
describeSubstitutionModel_(nestedModel, out, globalAliases);
2421
vector<string> names;
2422
names.push_back(reModel->getParameter("lambda").getName());
2423
names.push_back(reModel->getParameter("mu").getName());
2424
describeParameters_(reModel, out, globalAliases, names);
2429
//Is it a codon model?
2430
const YN98* codonModel1 = dynamic_cast<const YN98*>(model);
2433
out << codonModel1->getName() << "(frequencies=" << codonModel1->getFreq().getName() << ",";
2434
describeParameters_(model, out, globalAliases, model->getIndependentParameters().getParameterNames());
2438
const MG94* codonModel2 = dynamic_cast<const MG94*>(model);
2441
out << codonModel2->getName() << "(frequencies=" << codonModel2->getFreq().getName() << ",";
2442
describeParameters_(model, out, globalAliases, model->getIndependentParameters().getParameterNames());
2446
const GY94* codonModel3 = dynamic_cast<const GY94*>(model);
2449
out << codonModel3->getName() << "(frequencies=" << codonModel3->getFreq().getName() << ",";
2450
describeParameters_(model, out, globalAliases, model->getIndependentParameters().getParameterNames());
2455
//General procedure:
2456
out << model->getName() << "(";
2457
describeParameters_(model, out, globalAliases, model->getIndependentParameters().getParameterNames());
2461
/******************************************************************************/
2464
void PhylogeneticsApplicationTools::describeFrequenciesSet_(const FrequenciesSet* pfreqset, OutputStream& out)
2471
out << pfreqset->getName() << "(";
2472
ParameterList pl = pfreqset->getParameters();
2473
unsigned int p = out.getPrecision();
2474
out.setPrecision(12);
2475
for (unsigned int i = 0; i < pl.size(); i++)
2477
if (i > 0) out << ", ";
2478
string pname = pfreqset->getParameterNameWithoutNamespace(pl[i].getName());
2479
(out << pname << "=").enableScientificNotation(false) << pl[i].getValue();
2482
out.setPrecision(p);
2485
/******************************************************************************/
2487
void PhylogeneticsApplicationTools::printParameters(const SubstitutionModel* model, OutputStream& out)
2490
map<string, string> globalAliases;
2491
describeSubstitutionModel_(model, out, globalAliases);
2495
/******************************************************************************/
2497
void PhylogeneticsApplicationTools::printParameters(const SubstitutionModelSet* modelSet, OutputStream& out)
2499
(out << "nonhomogeneous = general").endLine();
2500
(out << "nonhomogeneous.number_of_models = " << modelSet->getNumberOfModels()).endLine();
2502
// Get the parameter links:
2503
map< unsigned int, vector<string> > modelLinks; // for each model index, stores the list of global parameters.
2504
map< string, set<unsigned int> > parameterLinks; // for each parameter name, stores the list of model indices, wich should be sorted.
2505
ParameterList pl = modelSet->getParameters();
2506
ParameterList plroot = modelSet->getRootFrequenciesParameters();
2507
for (unsigned int i = 0; i < pl.size(); i++)
2509
if (!plroot.hasParameter(pl[i].getName()))
2511
string name = pl[i].getName();
2512
vector<unsigned int> models = modelSet->getModelsWithParameter(name);
2513
for (size_t j = 0; j < models.size(); ++j)
2515
modelLinks[models[j]].push_back(name);
2516
parameterLinks[name].insert(models[j]);
2521
// Loop over all models:
2522
for (unsigned int i = 0; i < modelSet->getNumberOfModels(); i++)
2524
const SubstitutionModel* model = modelSet->getModel(i);
2526
// First get the global aliases for this model:
2527
map<string, string> globalAliases;
2528
vector<string> names = modelLinks[i];
2529
for (unsigned int j = 0; j < names.size(); j++)
2531
const string name = names[j];
2532
if (parameterLinks[name].size() > 1)
2534
// there is a global alias here
2535
if (*parameterLinks[name].begin() != i) // Otherwise, this is the 'reference' value
2537
globalAliases[modelSet->getParameterModelName(name)] = "model" + TextTools::toString((*parameterLinks[name].begin()) + 1) + "." + modelSet->getParameterModelName(name);
2543
out.endLine() << "model" << (i + 1) << " = ";
2544
describeSubstitutionModel_(model, out, globalAliases);
2546
vector<int> ids = modelSet->getNodesWithModel(i);
2547
out << "model" << (i + 1) << ".nodes_id = " << ids[0];
2548
for (unsigned int j = 1; j < ids.size(); j++)
2550
out << "," << ids[j];
2555
// Root frequencies:
2557
(out << "# Root frequencies:").endLine();
2558
out << "nonhomogeneous.root_freq = ";
2559
describeFrequenciesSet_(modelSet->getRootFrequenciesSet(), out);
2562
/******************************************************************************/
2564
void PhylogeneticsApplicationTools::describeDiscreteDistribution_(const DiscreteDistribution* rDist, OutputStream& out, map<string, string>& globalAliases)
2566
const InvariantMixedDiscreteDistribution* invar = dynamic_cast<const InvariantMixedDiscreteDistribution*>(rDist);
2567
const DiscreteDistribution* test = rDist;
2570
test = invar->getVariableSubDistribution();
2571
out << "Invariant(dist=";
2572
describeDiscreteDistribution_(test, out, globalAliases);
2574
vector<string> names;
2575
names.push_back(invar->getParameter("p").getName());
2576
describeParameters_(invar, out, globalAliases, names);
2581
test = dynamic_cast<const ConstantDistribution*>(rDist);
2582
if (test) out << "Uniform()";
2585
test = dynamic_cast<const GammaDiscreteDistribution*>(rDist);
2588
out << "Gamma(n=" << rDist->getNumberOfCategories() << ", ";
2589
describeParameters_(rDist, out, globalAliases, rDist->getIndependentParameters().getParameterNames(), false);
2592
else throw Exception("PhylogeneticsApplicationTools::printParameters(DiscreteDistribution). Unsupported distribution.");
2597
void PhylogeneticsApplicationTools::printParameters(const DiscreteDistribution* rDist, OutputStream& out)
2599
out << "rate_distribution = ";
2600
map<string, string> globalAliases;
2601
describeDiscreteDistribution_(rDist, out, globalAliases);
2605
/******************************************************************************/