3
PhyML: a program that computes maximum likelihood phylogenies from
4
DNA or AA homologous sequences.
6
Copyright (C) Stephane Guindon. Oct 2003 onward.
8
All parts of the source except where indicated are distributed under
9
the GNU public licence. See http://www.opensource.org for details.
34
#define For(i,n) for(i=0; i<n; i++)
35
#define Fors(i,n,s) for(i=0; i<n; i+=s)
36
#define PointGamma(prob,alpha,beta) PointChi2(prob,2.0*(alpha))/(2.0*(beta))
37
#define SHFT2(a,b,c) (a)=(b);(b)=(c);
38
#define SHFT3(a,b,c,d) (a)=(b);(b)=(c);(c)=(d);
39
#define MAX(a,b) ((a)>(b)?(a):(b))
40
#define MIN(a,b) ((a)<(b)?(a):(b))
41
#define SIGN(a,b) ((b) > 0.0 ? fabs(a) : -fabs(a))
42
#define SHFT(a,b,c,d) (a)=(b);(b)=(c);(c)=(d);
49
(sizeof (x) == sizeof (long double) ? isnan_ld (x) \
50
: sizeof (x) == sizeof (double) ? isnan_d (x) \
52
static inline int isnan_f (float x) { return x != x; }
53
static inline int isnan_d (double x) { return x != x; }
54
static inline int isnan_ld (long double x) { return x != x; }
59
(sizeof (x) == sizeof (long double) ? isinf_ld (x) \
60
: sizeof (x) == sizeof (double) ? isinf_d (x) \
62
static inline int isinf_f (float x) { return isnan (x - x); }
63
static inline int isinf_d (double x) { return isnan (x - x); }
64
static inline int isinf_ld (long double x) { return isnan (x - x); }
68
#define MCMC_MOVE_RANDWALK_UNIFORM 0
69
#define MCMC_MOVE_LOG_RANDWALK_UNIFORM 1
70
#define MCMC_MOVE_RANDWALK_NORMAL 2
71
#define MCMC_MOVE_LOG_RANDWALK_NORMAL 3
72
#define MCMC_MOVE_SCALE_THORNE 4
73
#define MCMC_MOVE_SCALE_GAMMA 5
75
#define N_MAX_MOVES 50
77
#define N_MAX_NEX_COM 20
78
#define T_MAX_NEX_COM 100
79
#define N_MAX_NEX_PARM 50
80
#define T_MAX_TOKEN 200
82
#define N_MAX_MIXT_CLASSES 1000
92
#define BEST_OF_NNI_AND_SPR 2
94
#define M_1_SQRT_2_PI 0.398942280401432677939946059934
95
#define M_SQRT_32 5.656854249492380195206754896838
96
#define PI 3.14159265358979311600
97
#define SQRT2PI 2.50662827463100024161
98
#define LOG2PI 1.83787706640934533908
99
#define LOG2 0.69314718055994528623
100
#define LOG_SQRT_2_PI 0.918938533204672741780329736406
117
#define NT 0 /*! nucleotides */
118
#define AA 1 /*! amino acids */
119
#define GENERIC 2 /*! custom alphabet */
122
#define ACGT 0 /*! A,G,G,T encoding */
123
#define RY 1 /*! R,Y encoding */
125
#define INTERFACE_DATA_TYPE 0
126
#define INTERFACE_MULTIGENE 1
127
#define INTERFACE_MODEL 2
128
#define INTERFACE_TOPO_SEARCH 3
129
#define INTERFACE_BRANCH_SUPPORT 4
132
#define INFINITY HUGE
135
#define N_MAX_OPTIONS 100
138
#define T_MAX_FILE 500
139
#define T_MAX_LINE 2000000
140
#define T_MAX_NAME 100
142
#define T_MAX_SEQ 2000000
143
#define T_MAX_OPTION 100
144
#define T_MAX_LABEL 10
145
#define T_MAX_STATE 5
146
#define N_MAX_LABEL 10
147
#define BLOCK_LABELS 100
149
#define NODE_DEG_MAX 500
150
#define BRENT_IT_MAX 500
151
#define BRENT_CGOLD 0.3819660
152
#define BRENT_ZEPS 1.e-10
153
#define MNBRAK_GOLD 1.618034
154
#define MNBRAK_GLIMIT 100.0
155
#define MNBRAK_TINY 1.e-20
156
#define ALPHA_MIN 0.04
157
#define ALPHA_MAX 100
158
#define BL_START 1.e-04
159
#define GOLDEN_R 0.61803399
160
#define GOLDEN_C (1.0-GOLDEN_R)
161
#define N_MAX_INSERT 20
162
#define N_MAX_OTU 4000
163
#define UNLIKELY -1.e10
165
#define ROUND_MAX 100
166
#define DIST_MAX 2.00
167
#define AROUND_LK 50.0
168
#define PROP_STEP 1.0
169
#define T_MAX_ALPHABET 22
170
#define MDBL_MIN FLT_MIN
171
#define MDBL_MAX FLT_MAX
172
#define POWELL_ITMAX 200
173
#define LINMIN_TOL 2.0E-04
174
#define SCALE_POW 10 /*! Scaling factor will be 2^SCALE_POW or 2^(-SCALE_POW) [[ WARNING: SCALE_POW < 31 ]]*/
175
#define DEFAULT_SIZE_SPR_LIST 20
178
#define OUTPUT_TREE_FORMAT NEWICK
179
#define MAX_PARS 1000000000
181
#define LIM_SCALE_VAL 1.E-50 /*! Scaling limit (deprecated) */
183
#define MIN_CLOCK_RATE 1.E-10
185
#define MIN_VAR_BL 1.E-8
186
#define MAX_VAR_BL 1.E+3
213
// Amino acid ordering:
214
// Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val
216
#define COMPOUND_COR 0
217
#define COMPOUND_NOCOR 1
218
#define EXPONENTIAL 2
222
#define STRICTCLOCK 6
227
#define MINALRTCHI2SH 3
232
/* /\* Uncomment the lines below to switch to single precision *\/ */
233
/* typedef float phydbl; */
234
/* #define LOG logf */
235
/* #define POW powf */
236
/* #define EXP expf */
237
/* #define FABS fabsf */
238
/* #define SQRT sqrtf */
239
/* #define CEIL ceilf */
240
/* #define FLOOR floorf */
241
/* #define RINT rintf */
242
/* #define ROUND roundf */
243
/* #define TRUNC truncf */
244
/* #define COS cosf */
245
/* #define SIN sinf */
246
/* #define TAN tanf */
247
/* #define SMALL FLT_MIN */
248
/* #define BIG FLT_MAX */
249
/* #define SMALL_PIJ 1.E-10 */
250
/* #define BL_MIN 1.E-5 */
251
/* #define P_LK_LIM_INF 2.168404e-19 /\* 2^-62 *\/ */
252
/* #define P_LK_LIM_SUP 4.611686e+18 /\* 2^62 *\/ */
254
/* Uncomment the line below to switch to double precision */
255
typedef double phydbl;
269
#define SMALL DBL_MIN
271
#define SMALL_PIJ 1.E-20
273
#define LOGSMALL -690.
276
#if !(defined PHYTIME || defined SERGEII)
284
/* #define P_LK_LIM_INF 7.888609052e-31 */
285
/* #define P_LK_LIM_MAX 1.267650600e+30 */
286
/* #define P_LK_LIM_INF 4.909093465e-91 /\* R: format(2^(-300),digits=10) *\/ */
287
/* #define P_LK_LIM_SUP 2.037035976e+90 /\* R: format(2^(+300),digits=10) *\/ */
288
#define P_LK_LIM_INF 3.054936e-151 /* 2^-500 */
289
#define P_LK_LIM_SUP 3.273391e+150 /* 2^500 */
291
#define T_MAX_XML_TAG 64
293
/*!********************************************************/
295
typedef struct __Scalar_Int {
297
struct __Scalar_Int *next;
298
struct __Scalar_Int *prev;
302
/*!********************************************************/
304
typedef struct __Scalar_Dbl {
307
struct __Scalar_Dbl *next;
308
struct __Scalar_Dbl *prev;
311
/*!********************************************************/
313
typedef struct __Vect_Int {
316
struct __Vect_Int *next;
317
struct __Vect_Int *prev;
321
/*!********************************************************/
323
typedef struct __Vect_Dbl {
326
struct __Vect_Dbl *next;
327
struct __Vect_Dbl *prev;
330
/*!********************************************************/
332
typedef struct __String {
335
struct __String *next;
336
struct __String *prev;
339
/*!********************************************************/
341
typedef struct __Node {
342
struct __Node **v; /*! table of pointers to neighbor nodes. Dimension = 3 */
343
struct __Node ***bip_node; /*! three lists of pointer to tip nodes. One list for each direction */
344
struct __Edge **b; /*! table of pointers to neighbor branches */
345
struct __Node *anc; /*! direct ancestor t_node (for rooted tree only) */
346
struct __Node *ext_node;
347
struct __Node *match_node;
348
struct __Align *c_seq; /*! corresponding compressed sequence */
349
struct __Align *c_seq_anc; /*! corresponding compressed ancestral sequence */
352
struct __Node *next; /*! tree->a_nodes[i]->next <=> tree->next->a_nodes[i] */
353
struct __Node *prev; /*! See above */
354
struct __Node *next_mixt; /*! Next mixture tree*/
355
struct __Node *prev_mixt; /*! Parent mixture tree */
357
struct __Calibration **calib;
358
short int *calib_applies_to;
361
int *bip_size; /*! Size of each of the three lists from bip_node */
362
int num; /*! t_node number */
363
int tax; /*! tax = 1 -> external node, else -> internal t_node */
364
int check_branch; /*! check_branch=1 is the corresponding branch is labelled with '*' */
365
char *name; /*! taxon name (if exists) */
366
char *ori_name; /*! taxon name (if exists) */
368
phydbl *score; /*! score used in BioNJ to determine the best pair of nodes to agglomerate */
369
phydbl *l; /*! lengths of the (three or one) branche(s) connected this t_node */
370
phydbl dist_to_root; /*! distance to the root t_node */
378
int *s_ingrp; /*! does the subtree beneath belong to the ingroup */
379
int *s_outgrp; /*! does the subtree beneath belong to the outgroup */
381
int id_rank; /*! order taxa alphabetically and use id_rank to store the ranks */
388
/*!********************************************************/
390
typedef struct __Edge {
392
syntax : (node) [edge]
393
(left_1) . .(right_1)
398
(left_2) . .(right_2)
402
struct __Node *left,*rght; /*! t_node on the left/right side of the t_edge */
403
short int l_r,r_l,l_v1,l_v2,r_v1,r_v2;
404
/*! these are directions (i.e., 0, 1 or 2): */
405
/*! l_r (left to right) -> left[b_fcus->l_r] = right */
406
/*! r_l (right to left) -> right[b_fcus->r_l] = left */
407
/*! l_v1 (left t_node to first t_node != from right) -> left[b_fcus->l_v1] = left_1 */
408
/*! l_v2 (left t_node to secnd t_node != from right) -> left[b_fcus->l_v2] = left_2 */
409
/*! r_v1 (right t_node to first t_node != from left) -> right[b_fcus->r_v1] = right_1 */
410
/*! r_v2 (right t_node to secnd t_node != from left) -> right[b_fcus->r_v2] = right_2 */
415
struct __Edge *next_mixt;
416
struct __Edge *prev_mixt;
418
int num; /*! branch number */
419
scalar_dbl *l; /*! branch length */
420
scalar_dbl *best_l; /*! best branch length found so far */
421
scalar_dbl *l_old; /*! old branch length */
422
scalar_dbl *l_var; /*! variance of edge length */
423
scalar_dbl *l_var_old; /*! variance of edge length (previous value) */
426
int bip_score; /*! score of the bipartition generated by the corresponding edge
427
bip_score = 1 iif the branch is found in both trees to be compared,
428
bip_score = 0 otherwise. */
430
phydbl *p_lk_left,*p_lk_rght; /*! likelihoods of the subtree on the left and
431
right side (for each site and each relative rate category) */
432
short int *p_lk_tip_r, *p_lk_tip_l;
433
short int *div_post_pred_left; /*! posterior prediction of nucleotide/aa diversity (left-hand subtree) */
434
short int *div_post_pred_rght; /*! posterior prediction of nucleotide/aa diversity (rght-hand subtree) */
435
short int does_exist;
443
phydbl *Pij_rr; /*! matrix of change probabilities and its first and secnd derivates */
444
int *pars_l,*pars_r; /*! parsimony of the subtree on the left and right sides (for each site) */
445
unsigned int *ui_l, *ui_r; /*! union - intersection vectors used in Fitch's parsimony algorithm */
446
int *p_pars_l, *p_pars_r; /*! conditional parsimony vectors */
448
int num_st_left; /*! number of the subtree on the left side */
449
int num_st_rght; /*! number of the subtree on the right side */
452
/*! Below are the likelihood scaling factors (used in functions
453
`Get_All_Partial_Lk_Scale' in lk.c */
454
int *sum_scale_left_cat;
455
int *sum_scale_rght_cat;
459
phydbl bootval; /*! bootstrap value (if exists) */
461
short int is_alive; /*! is_alive = 1 if this t_edge is used in a tree */
463
phydbl dist_btw_edges;
464
int topo_dist_btw_edges;
468
phydbl ratio_test; /*! approximate likelihood ratio test */
469
phydbl alrt_statistic; /*! aLRT statistic */
471
char **labels; /*! string of characters that labels the corresponding t_edge */
472
int n_labels; /*! number of labels */
473
int n_jumps; /*! number of jumps of substitution rates */
479
/*!********************************************************/
481
typedef struct __Tree{
483
struct __Node *n_root; /*! root t_node */
484
struct __Edge *e_root; /*! t_edge on which lies the root */
485
struct __Node **a_nodes; /*! array of nodes that defines the tree topology */
486
struct __Edge **a_edges; /*! array of edges */
487
struct __Model *mod; /*! substitution model */
488
struct __Calign *data; /*! sequences */
489
struct __Calign *anc_data; /*! ancestral sequences */
490
struct __Tree *next; /* set to NULL by default. Used for mixture models */
491
struct __Tree *prev; /* set to NULL by default. Used for mixture models */
492
struct __Tree *next_mixt; /* set to NULL by default. Used for mixture models */
493
struct __Tree *prev_mixt; /* set to NULL by default. Used for mixture models */
494
struct __Tree *mixt_tree; /* set to NULL by default. Used for mixture models */
495
struct __Option *io; /*! input/output */
496
struct __Matrix *mat; /*! pairwise distance matrix */
497
struct __Node **curr_path; /*! list of nodes that form a path in the tree */
498
struct __SPR **spr_list;
499
struct __SPR *best_spr;
500
struct __Tdraw *ps_tree; /*! structure for drawing trees in postscript format */
501
struct __T_Rate *rates; /*! structure for handling rates of evolution */
502
struct __Tmcmc *mcmc;
503
struct __Triplet *triplet_struct;
504
struct __Phylogeo *geo;
507
int tree_num; /*! tree number. Used for mixture models */
508
int ps_page_number; /*! when multiple trees are printed, this variable give the current page number */
509
int depth_curr_path; /*! depth of the t_node path defined by curr_path */
510
int has_bip; /*!if has_bip=1, then the structure to compare
511
tree topologies is allocated, has_bip=0 otherwise */
512
int n_otu; /*! number of taxa */
513
int curr_site; /*! current site of the alignment to be processed */
514
int curr_catg; /*! current class of the discrete gamma rate distribution */
515
int n_swap; /*! number of NNIs performed */
516
int n_pattern; /*! number of distinct site patterns */
517
int has_branch_lengths; /*! =1 iff input tree displays branch lengths */
518
int print_boot_val; /*! if print_boot_val=1, the bootstrap values are printed */
519
int print_alrt_val; /*! if print_boot_val=1, the bootstrap values are printed */
520
int both_sides; /*! both_sides=1 -> a pre-order and a post-order tree
521
traversals are required to compute the likelihood
522
of every subtree in the phylogeny*/
523
int num_curr_branch_available; /*!gives the number of the next cell in a_edges that is free to receive a pointer to a branch */
528
int dp; /*! Data partition */
529
int s_mod_num; /*! Substitution model number */
530
int lock_topo; /*! = 1 any subsequent topological modification will be banished */
533
int *mutmap; /*! Mutational map */
536
phydbl best_lnL; /*! highest value of the loglikelihood found so far */
537
int best_pars; /*! highest value of the parsimony found so far */
538
phydbl c_lnL; /*! loglikelihood */
539
phydbl old_lnL; /*! old loglikelihood */
540
phydbl sum_min_sum_scale; /*! common factor of scaling factors */
541
phydbl *c_lnL_sorted; /*! used to compute c_lnL by adding sorted terms to minimize CPU errors */
542
phydbl *cur_site_lk; /*! vector of loglikelihoods at individual sites */
543
phydbl *old_site_lk; /*! vector of likelihoods at individual sites */
544
phydbl **log_site_lk_cat; /*! loglikelihood at individual sites and for each class of rate*/
545
phydbl *site_lk_cat; /*! loglikelihood at a single site and for each class of rate*/
546
phydbl unconstraint_lk; /*! unconstrained (or multinomial) likelihood */
547
phydbl **log_lks_aLRT; /*! used to compute several branch supports */
548
phydbl n_root_pos; /*! position of the root on its t_edge */
549
phydbl size; /*! tree size */
555
int perform_spr_right_away;
560
int bl_from_node_stamps; /*! == 1 -> Branch lengths are determined by t_node times */
561
phydbl sum_y_dist_sq;
563
phydbl tip_order_score;
565
int update_alias_subpatt;
567
phydbl geo_mig_sd; /*! standard deviation of the migration step random variable */
568
phydbl geo_lnL; /*! log likelihood of the phylo-geography model */
572
phydbl *short_l; /*! Vector of short branch length values */
573
int n_short_l; /*! Length of short_l */
576
short int br_len_recorded;
579
short int apply_lk_scaling; /*! Applying scaling of likelihoods. YES/NO */
581
phydbl *K;//a vector of the norm.constants for the node times prior.
585
/*!********************************************************/
587
typedef struct __Super_Tree {
589
struct __List_Tree *treelist; /*! list of trees. One tree for each data set to be processed */
590
struct __Calign *curr_cdata;
591
struct __Option **optionlist; /*! list of pointers to input structures (used in supertrees) */
593
struct __Node ***match_st_node_in_gt;
594
/*! match_st_in_gt_node[subdataset number][supertree t_node number]
595
* gives the t_node in tree estimated from 'subdataset number' that corresponds
596
* to 'supertree t_node number' in the supertree
599
struct __Node *****map_st_node_in_gt;
600
/*! mat_st_gt_node[gt_num][st_node_num][direction] gives the
601
* t_node in gt gt_num that maps t_node st_node_num in st.
604
struct __Edge ***map_st_edge_in_gt;
605
/*! map_st_gt_br[gt_num][st_branch_num] gives the
606
* branch in gt gt_num that maps branch st_branch_num
610
struct __Edge ****map_gt_edge_in_st;
611
/*! mat_gt_st_br[gt_num][gt_branch_num][] is the list of
612
* branches in st that map branch gt_branch_num
616
int **size_map_gt_edge_in_st;
617
/*! size_map_gt_st_br[gt_num][gt_branch_num] gives the
618
* size of the list map_gt_st_br[gt_num][gt_branch_num][]
622
struct __Edge ***match_st_edge_in_gt;
623
/*! match_st_edge_in_gt[gt_num][st_branch_num] gives the
624
* branch in gt gt_num that matches branch st_branch_num
627
struct __Edge ***match_gt_edge_in_st;
628
/*! match_gt_edge_in_st[gt_num][gt_branch_num] gives the
629
* branch in st that matches branch gt_branch_num
632
struct __Node ****closest_match;
633
/*! closest_match[gt_num][st_node_num][dir] gives the
634
* closest t_node in st that matches a t_node in gt gt_num
638
/*! closest_dist[gt_num][st_node_num][dir] gives the
639
* number of edges to traverse to get to node
640
* closest_match[gt_num][st_node_num][dir]
644
/*! number of trees */
647
/*! bl[gt_num][gt_branch_num] gives the length of
648
* branch gt_branch_num
655
/*! bl estimated during NNI (original topo)
660
/*! bl estimated during NNI (topo conf 1)
665
/*! bl estimated during NNI (topo conf 2)
670
/*! partition[gt_num] gives the t_edge partition number
675
struct __Model **s_mod; /*! substitution model */
682
/*!********************************************************/
684
typedef struct __List_Tree { /*! a list of trees */
685
struct __Tree **tree;
686
int list_size; /*! number of trees in the list */
689
/*!********************************************************/
691
typedef struct __Align {
692
char *name; /*! sequence name */
693
int len; /*! sequence length */
694
char *state; /*! sequence itself */
695
int *d_state; /*! sequence itself (digits) */
696
short int *is_ambigu; /*! is_ambigu[site] = 1 if state[site] is an ambiguous character. 0 otherwise */
699
/*!********************************************************/
702
typedef struct __Calign {
703
struct __Align **c_seq; /*! compressed sequences */
704
phydbl *b_frq; /*! observed state frequencies */
705
short int *invar; /*! < 0 -> polymorphism observed */
706
int *wght; /*! # of each site in c_align */
707
short int *ambigu; /*! ambigu[i]=1 is one or more of the sequences at site
708
i display an ambiguous character */
710
int n_otu; /*! number of taxa */
711
int clean_len; /*! uncrunched sequences lenghts without gaps */
712
int crunch_len; /*! crunched sequences lengths */
713
int init_len; /*! length of the uncompressed sequences */
714
int *sitepatt; /*! this array maps the position of the patterns in the
715
compressed alignment to the positions in the uncompressed
717
int format; /*! 0 (default): PHYLIP. 1: NEXUS. */
720
/*!********************************************************/
722
typedef struct __Matrix { /*! mostly used in BIONJ */
723
phydbl **P,**Q,**dist; /*! observed proportions of transition, transverion and distances
724
between pairs of sequences */
726
t_tree *tree; /*! tree... */
727
int *on_off; /*! on_off[i]=1 if column/line i corresponds to a t_node that has not
728
been agglomerated yet */
729
int n_otu; /*! number of taxa */
730
char **name; /*! sequence names */
731
int r; /*! number of nodes that have not been agglomerated yet */
732
struct __Node **tip_node; /*! array of pointer to the leaves of the tree */
733
int curr_int; /*! used in the NJ/BIONJ algorithms */
734
int method; /*! if method=1->NJ method is used, BIONJ otherwise */
737
/*!********************************************************/
739
typedef struct __RateMatrix {
740
int n_diff_rr; /*! number of different relative substitution rates in the custom model */
741
vect_dbl *rr; /*! relative rate parameters of the GTR or custom model (given by rr_val[rr_num[i]]) */
742
vect_dbl *rr_val; /*! relative rate parameters of the GTR or custom model */
744
vect_int *n_rr_per_cat; /*! number of rate parameters in each category */
748
struct __RateMatrix *next;
749
struct __RateMatrix *prev;
752
/*!********************************************************/
754
typedef struct __RAS {
755
/*! Rate across sites */
756
int n_catg; /*! number of categories in the discrete gamma distribution */
757
int invar; /*! =1 iff the substitution model takes into account invariable sites */
758
int gamma_median; /*! 1: use the median of each bin in the discrete gamma distribution. 0: the mean is used */
759
vect_dbl *gamma_r_proba; /*! probabilities of the substitution rates defined by the discrete gamma distribution */
760
vect_dbl *gamma_r_proba_unscaled;
761
vect_dbl *gamma_rr; /*! substitution rates defined by the RAS distribution */
762
vect_dbl *gamma_rr_unscaled; /*! substitution rates defined by the RAS distribution (unscaled) */
763
scalar_dbl *alpha; /*! gamma shapa parameter */
765
scalar_dbl *free_rate_mr; /*! mean relative rate as given by the FreeRate model */
768
int parent_class_number;
769
scalar_dbl *pinvar; /*! proportion of invariable sites */
772
short int init_r_proba;
773
short int normalise_rr;
775
short int *skip_rate_cat;
782
/*!********************************************************/
784
typedef struct __EquFreq {
785
/*! Equilibrium frequencies */
786
vect_dbl *pi; /*! states frequencies */
787
vect_dbl *pi_unscaled; /*! states frequencies (unscaled) */
789
struct __EquFreq *next;
790
struct __EquFreq *prev;
794
/*!********************************************************/
796
typedef struct __Model {
797
struct __Optimiz *s_opt; /*! pointer to parameters to optimize */
798
struct __Eigen *eigen;
801
struct __Model *next;
802
struct __Model *prev;
803
struct __Model *next_mixt;
804
struct __Model *prev_mixt;
805
struct __RateMatrix *r_mat;
806
struct __EquFreq *e_frq;
809
t_string *aa_rate_mat_file;
810
FILE *fp_aa_rate_mat;
812
vect_dbl *user_b_freq; /*! user-defined nucleotide frequencies */
814
t_string *custom_mod_string; /*! string of characters used to define custom models of substitution */
816
int mod_num; /*! model number */
818
int update_eigen; /*! update_eigen=1-> eigen values/vectors need to be updated */
823
int ns; /*! number of states (4 for ADN, 20 for AA) */
825
int bootstrap; /*! Number of bootstrap replicates (0 : no bootstrap analysis is launched) */
826
int use_m4mod; /*! Use a Makrkov modulated Markov model ? */
828
scalar_dbl *kappa; /*! transition/transversion rate */
829
scalar_dbl *lambda; /*! parameter used to define the ts/tv ratios in the F84 and TN93 models */
830
scalar_dbl *br_len_multiplier;
832
vect_dbl *Pij_rr; /*! matrix of change probabilities */
833
scalar_dbl *mr; /*! mean rate = branch length/time interval mr = -sum(i)(vct_pi[i].mat_Q[ii]) */
835
short int log_l; /*! Edge lengths are actually log(Edge lengths) if log_l == YES !*/
836
phydbl l_min; /*! Minimum branch length !*/
837
phydbl l_max; /*! Maximum branch length !*/
839
phydbl l_var_sigma; /*! For any edge b we have b->l_var->v = l_var_sigma * (b->l->v)^2 */
840
phydbl l_var_min; /*! Min of variance of branch lengths (used in conjunction with gamma_mgf_bl == YES) */
841
phydbl l_var_max; /*! Max of variance of branch lengths (used in conjunction with gamma_mgf_bl == YES) */
843
int gamma_mgf_bl; /*! P = \int_0^inf exp(QL) p(L) where L=\int_0^t R(s) ds and p(L) is the gamma density. Set to NO by default !*/
845
int n_mixt_classes; /* Number of classes in the mixture model. */
847
scalar_dbl *r_mat_weight;
848
scalar_dbl *e_frq_weight;
852
/*!********************************************************/
854
typedef struct __Eigen{
856
phydbl *q; /*! matrix which eigen values and vectors are computed */
859
phydbl *e_val; /*! eigen values (vector), real part. */
860
phydbl *e_val_im; /*! eigen values (vector), imaginary part */
861
phydbl *r_e_vect; /*! right eigen vector (matrix), real part */
862
phydbl *r_e_vect_im; /*! right eigen vector (matrix), imaginary part */
863
phydbl *l_e_vect; /*! left eigen vector (matrix), real part */
865
struct __Eigen *prev;
866
struct __Eigen *next;
869
/*!********************************************************/
871
typedef struct __Option { /*! mostly used in 'help.c' */
872
struct __Model *mod; /*! pointer to a substitution model */
873
struct __Tree *tree; /*! pointer to the current tree */
874
struct __Align **data; /*! pointer to the uncompressed sequences */
875
struct __Tree *cstr_tree; /*! pointer to a constraint tree (can be a multifurcating one) */
876
struct __Calign *cdata; /*! pointer to the compressed sequences */
877
struct __Super_Tree *st; /*! pointer to supertree */
878
struct __Tnexcom **nex_com_list;
879
struct __List_Tree *treelist; /*! list of trees. */
880
struct __Option *next;
881
struct __Option *prev;
883
int interleaved; /*! interleaved or sequential sequence file format ? */
884
int in_tree; /*! =1 iff a user input tree is used as input */
886
char *in_align_file; /*! alignment file name */
887
FILE *fp_in_align; /*! pointer to the alignment file */
889
char *in_tree_file; /*! input tree file name */
890
FILE *fp_in_tree; /*! pointer to the input tree file */
892
char *in_constraint_tree_file; /*! input constraint tree file name */
893
FILE *fp_in_constraint_tree; /*! pointer to the input constraint tree file */
895
char *out_tree_file; /*! name of the tree file */
898
char *out_trees_file; /*! name of the tree file */
899
FILE *fp_out_trees; /*! pointer to the tree file containing all the trees estimated using random starting trees */
901
char *out_boot_tree_file; /*! name of the tree file */
902
FILE *fp_out_boot_tree; /*! pointer to the bootstrap tree file */
904
char *out_boot_stats_file; /*! name of the tree file */
905
FILE *fp_out_boot_stats; /*! pointer to the statistics file */
907
char *out_stats_file; /*! name of the statistics file */
910
char *out_trace_file; /*! name of the file in which the likelihood of the model is written */
913
char *out_lk_file; /*! name of the file in which the likelihood of the model is written */
916
char *out_ps_file; /*! name of the file in which tree(s) is(are) written */
920
char *clade_list_file;
922
int datatype; /*! 0->DNA, 1->AA */
923
int print_boot_trees; /*! =1 if the bootstrapped trees are printed in output */
924
int out_stats_file_open_mode; /*! opening file mode for statistics file */
925
int out_tree_file_open_mode; /*! opening file mode for tree file */
926
int n_data_sets; /*! number of data sets to be analysed */
927
int n_trees; /*! number of trees */
928
int init_len; /*! sequence length */
929
int n_otu; /*! number of taxa */
930
int n_data_set_asked; /*! number of bootstrap replicates */
931
char *nt_or_cd; /*! nucleotide or codon data ? (not used) */
932
int multigene; /*! if=1 -> analyse several partitions. */
933
int config_multigene;
934
int n_part; /*! number of data partitions */
936
int ratio_test; /*! from 1 to 4 for specific branch supports, 0 of not */
938
int data_file_format; /*! Data format: Phylip or Nexus */
939
int tree_file_format; /*! Tree format: Phylip or Nexus */
943
int r_seed; /*! random seed */
944
int collapse_boot; /*! 0 -> branch length on bootstrap trees are not collapsed if too small */
945
int random_boot_seq_order; /*! !0 -> sequence order in bootstrapped data set is random */
949
int rm_ambigu; /*! 0 is the default. 1: columns with ambiguous characters are discarded prior further analysis */
953
int quiet; /*! 0 is the default. 1: no interactive question (for batch mode) */
954
int lk_approx; /* EXACT or NORMAL */
959
char **long_tax_names;
960
char **short_tax_names;
970
int do_alias_subpatt;
971
struct __Tmcmc *mcmc;
972
struct __T_Rate *rates;
977
/*!********************************************************/
979
typedef struct __Optimiz { /*! parameters to be optimised (mostly used in 'optimiz.c') */
980
int print; /*! =1 -> verbose mode */
982
int opt_alpha; /*! =1 -> the gamma shape parameter is optimised */
983
int opt_kappa; /*! =1 -> the ts/tv ratio parameter is optimised */
984
int opt_lambda; /*! =1 -> the F84|TN93 model specific parameter is optimised */
985
int opt_pinvar; /*! =1 -> the proportion of invariants is optimised */
986
int opt_state_freq; /*! =1 -> the nucleotide frequencies are optimised */
987
int opt_rr; /*! =1 -> the relative rate parameters of the GTR or the customn model are optimised */
988
int opt_subst_param; /*! if opt_topo=0 and opt_subst_param=1 -> the numerical parameters of the
989
model are optimised. if opt_topo=0 and opt_free_param=0 -> no parameter is
993
int opt_cov_free_rates;
996
int opt_bl; /*! =1 -> the branch lengths are optimised */
997
int opt_topo; /*! =1 -> the tree topology is optimised */
999
phydbl init_lk; /*! initial loglikelihood value */
1000
int n_it_max; /*! maximum bnumber of iteration during an optimisation step */
1001
int last_opt; /*! =1 -> the numerical parameters are optimised further while the
1002
tree topology remains fixed */
1003
int random_input_tree; /*! boolean */
1004
int n_rand_starts; /*! number of random starting points */
1007
int user_state_freq;
1008
int opt_five_branch;
1012
phydbl tree_size_mult; /*! tree size multiplier */
1013
phydbl min_diff_lk_local;
1014
phydbl min_diff_lk_global;
1015
phydbl min_diff_lk_move;
1016
phydbl p_moves_to_examine;
1026
phydbl max_delta_lnL_spr;
1028
int opt_free_mixt_rates;
1029
int constrained_br_len;
1030
int opt_gamma_br_len;
1031
int first_opt_free_mixt_rates;
1039
int opt_rmat_weight;
1040
int opt_efrq_weight;
1042
int skip_tree_traversal;
1043
int serial_free_rates;
1045
int curr_opt_free_rates;
1048
/*!********************************************************/
1050
typedef struct __NNI{
1052
struct __Node *left;
1053
struct __Node *rght;
1063
struct __Node *swap_node_v1;
1064
struct __Node *swap_node_v2;
1065
struct __Node *swap_node_v3;
1066
struct __Node *swap_node_v4;
1068
int best_conf; /*! best topological configuration :
1069
((left_1,left_2),right_1,right_2) or
1070
((left_1,right_2),right_1,left_2) or
1071
((left_1,right_1),right_1,left_2) */
1074
/*!********************************************************/
1076
typedef struct __SPR{
1077
struct __Node *n_link;
1078
struct __Node *n_opp_to_link;
1079
struct __Edge *b_opp_to_link;
1080
struct __Edge *b_target;
1081
struct __Edge *b_init_target;
1082
struct __Node **path;
1083
phydbl init_target_l;
1084
phydbl init_target_v;
1094
struct __SPR *next_mixt;
1095
struct __SPR *prev_mixt;
1099
/*!********************************************************/
1101
typedef struct __Triplet{
1107
phydbl ***p_one_site;
1108
phydbl ***sum_p_one_site;
1112
struct __Eigen *eigen_struct;
1113
struct __Model *mod;
1115
struct __Triplet *next;
1116
struct __Triplet *prev;
1119
/*!********************************************************/
1121
typedef struct __Pnode{
1122
struct __Pnode **next;
1127
/*!********************************************************/
1129
typedef struct __M4 {
1130
int n_h; /*! number of hidden states */
1131
int n_o; /*! number of observable states */
1135
phydbl **o_mats; /*! set of matrices of substitution rates across observable states */
1136
phydbl *multipl; /*! vector of values that multiply each o_mats matrix */
1137
phydbl *o_rr; /*! relative rates (symmetric) of substitution between observable states */
1138
phydbl *h_rr; /*! relative rates (symmetric) of substitution between hidden states */
1139
phydbl *h_mat; /*! matrix that describes the substitutions between hidden states (aka switches) */
1140
phydbl *o_fq; /*! equilibrium frequencies for the observable states */
1141
phydbl *h_fq; /*! equilibrium frequencies for the hidden states */
1142
phydbl *h_fq_unscaled; /*! unscaled equilibrium frequencies for the hidden states */
1143
phydbl *multipl_unscaled; /*! unscaled vector of values that multiply each o_mats matrix */
1145
phydbl delta; /*! switching rate */
1146
phydbl alpha; /*! gamma shape parameter */
1149
/*!********************************************************/
1151
typedef struct __Tdraw {
1152
phydbl *xcoord; /*! t_node coordinates on the x axis */
1153
phydbl *ycoord; /*! t_node coordinates on the y axis */
1154
phydbl *xcoord_s; /*! t_node coordinates on the x axis (scaled) */
1155
phydbl *ycoord_s; /*! t_node coordinates on the y axis (scaled) */
1165
phydbl max_dist_to_root;
1168
/*!********************************************************/
1170
typedef struct __T_Rate {
1171
phydbl lexp; /*! Parameter of the exponential distribution that governs the rate at which substitution between rate classes ocur */
1175
phydbl birth_rate_min;
1176
phydbl birth_rate_max;
1181
phydbl c_lnL_rates; /*! Prob(Br len | time stamps, model of rate evolution) */
1182
phydbl c_lnL_times; /*! Prob(time stamps) */
1183
phydbl c_lnL_jps; /*! Prob(# Jumps | time stamps, rates, model of rate evolution) */
1184
phydbl clock_r; /*! Mean substitution rate, i.e., 'molecular clock' rate */
1190
phydbl true_tree_size;
1193
phydbl nu; /*! Parameter of the Exponential distribution for the corresponding model */
1197
phydbl sum_invalid_areas;
1200
phydbl *nd_r; /*! Current rates at nodes */
1201
phydbl *br_r; /*! Current rates along edges */
1202
phydbl *nd_t; /*! Current t_node times */
1204
phydbl *true_t; /*! true t_node times (including root node) */
1205
phydbl *true_r; /*! true t_edge rates (on rooted tree) */
1208
phydbl *dens; /*! Probability densities of mean substitution rates at the nodes */
1209
phydbl *ml_l; /*! ML t_edge lengths (rooted) */
1210
phydbl *cur_l; /*! Current t_edge lengths (rooted) */
1211
phydbl *u_ml_l; /*! ML t_edge lengths (unrooted) */
1212
phydbl *u_cur_l; /*! Current t_edge lengths (unrooted) */
1215
phydbl *mean_r; /*! average values of br_r taken across the sampled values during the MCMC */
1216
phydbl *mean_t; /*! average values of nd_t taken across the sampled values during the MCMC */
1221
short int *_2n_vect5;
1222
phydbl *_2n2n_vect1;
1223
phydbl *_2n2n_vect2;
1224
phydbl *trip_cond_cov;
1225
phydbl *trip_reg_coeff;
1229
phydbl *t_prior_min;
1230
phydbl *t_prior_max;
1236
phydbl *grad_l; /* gradient */
1238
phydbl *time_slice_lims;
1239
phydbl *survival_rank;
1240
phydbl *survival_dur;
1243
int adjust_rates; /*! if = 1, branch rates are adjusted such that a modification of a given t_node time
1244
does not modify any branch lengths */
1245
int use_rates; /*! if = 0, branch lengths are expressed as differences between t_node times */
1246
int bl_from_rt; /*! if =1, branch lengths are obtained as the product of cur_r and t */
1248
int model; /*! Model number */
1250
int met_within_gibbs;
1258
int *n_time_slice_spans;
1262
short int *t_has_prior;
1263
struct __Node **lca; /*! 2-way table of common ancestral nodes for each pari of nodes */
1265
short int *br_do_updt;
1266
phydbl *cur_gamma_prior_mean;
1267
phydbl *cur_gamma_prior_var;
1269
int model_log_rates;
1271
short int nd_t_recorded;
1272
short int br_r_recorded;
1276
struct __Calibration *calib;
1278
int *curr_nd_for_cal;
1279
phydbl c_lnL_Hastings_ratio;
1281
phydbl *t_prior_min_buff;
1282
phydbl *t_prior_max_buff;
1283
phydbl *times_partial_proba;
1287
/*!********************************************************/
1289
typedef struct __Tmcmc {
1290
struct __Option *io;
1293
phydbl *move_weight;
1308
int num_move_clock_r;
1309
int num_move_tree_height;
1310
int num_move_subtree_height;
1312
int num_move_tree_rates;
1313
int num_move_subtree_rates;
1314
int num_move_updown_nu_cr;
1315
int num_move_updown_t_cr;
1316
int num_move_updown_t_br;
1318
int num_move_cov_rates;
1319
int num_move_cov_switch;
1320
int num_move_birth_rate;
1321
int num_move_jump_calibration;
1322
int num_move_geo_lambda;
1323
int num_move_geo_sigma;
1324
int num_move_geo_tau;
1325
int num_move_geo_updown_tau_lbda;
1326
int num_move_geo_updown_lbda_sigma;
1337
time_t t_last_print;
1343
FILE *out_fp_constree;
1353
int sample_interval;
1354
int chain_len_burnin;
1361
phydbl *old_param_val;
1362
phydbl *new_param_val;
1366
phydbl *sum_curval_nextval;
1372
int is; /* Importance sampling? Yes or NO */
1375
/*!********************************************************/
1377
typedef struct __Tpart {
1378
int *ns; /*! number of states for each partition (e.g., 2, 4, 3) */
1379
int *cum_ns; /*! cumulative number of states (e.g., 0, 2, 6) */
1380
int ns_max; /*! maximum number of states */
1381
int ns_min; /*! minimum number of states */
1382
int n_partitions; /*! number of partitions */
1383
struct __Eigen *eigen;
1386
/*!********************************************************/
1388
typedef struct __Tnexcom {
1393
struct __Tnexparm **parm;
1396
/*!********************************************************/
1398
typedef struct __Tnexparm {
1403
int (*fp)(char *, struct __Tnexparm *, struct __Option *);
1404
struct __Tnexcom *com;
1407
/*!********************************************************/
1409
typedef struct __ParamInt {
1413
/*!********************************************************/
1415
typedef struct __ParamDbl {
1419
/*!********************************************************/
1421
typedef struct __XML_node {
1423
struct __XML_attr *attr; // Pointer to the first element of a list of attributes
1424
int n_attr; // Number of attributes
1425
struct __XML_node *next; // Next sibling
1426
struct __XML_node *prev; // Previous sibling
1427
struct __XML_node *parent; // Parent of this node
1428
struct __XML_node *child; // Child of this node
1432
struct __Generic_Data_Structure *ds; // Pointer to a data strucuture. Can be a scalar, a vector, anything.
1435
/*!********************************************************/
1437
typedef struct __Generic_Data_Structure {
1439
struct __Generic_Data_Structure *next;
1443
/*!********************************************************/
1445
typedef struct __XML_attr {
1448
struct __XML_attr *next; // Next attribute
1449
struct __XML_attr *prev; // Previous attribute
1452
/*!********************************************************/
1454
typedef struct __Calibration {
1455
phydbl *proba; // Probability of this calibration (set by the user and fixed throughout)
1456
phydbl lower; // lower bound
1457
phydbl upper; // upper bound
1460
struct __Node **all_applies_to;
1461
int n_all_applies_to;
1462
struct __Calibration *next;
1463
struct __Calibration *prev;
1466
/*!********************************************************/
1468
typedef struct __Phylogeo{
1469
phydbl *cov; // Covariance of migrations (n_dim x n_dim)
1470
phydbl *r_mat; // R matrix. Gives the rates of migrations between locations. See article.
1471
phydbl *f_mat; // F matrix. See article.
1472
int *occup; // Vector giving the number of lineages that occupy each location
1473
phydbl *ldscape; // Coordinates of the locations
1474
int *loc; // Location for each lineage
1475
int *loc_beneath; // Gives the location occupied beneath each node in the tree
1476
int ldscape_sz; // Landscape size: number of locations
1477
int n_dim; // Dimension of the data (e.g., longitude + lattitude -> n_dim = 2)
1480
phydbl sigma; // Dispersal parameter
1483
phydbl sigma_thresh; // beyond sigma_thresh, there is no dispersal bias.
1485
phydbl lbda; // Competition parameter
1491
struct __Node **sorted_nd; // Table of nodes sorted wrt their heights.
1493
phydbl tau; // overall migration rate parameter
1499
/*!********************************************************/
1500
/*!********************************************************/
1501
/*!********************************************************/
1502
/*!********************************************************/
1503
/*!********************************************************/
1505
void Unroot_Tree(char **subtrees);
1506
void Init_Tree(t_tree *tree,int n_otu);
1507
void Init_Edge_Light(t_edge *b,int num);
1508
void Init_Node_Light(t_node *n,int num);
1509
void Make_Edge_Dirs(t_edge *b,t_node *a,t_node *d,t_tree *tree);
1510
void Init_NNI(nni *a_nni);
1511
void Init_Nexus_Format(nexcom **com);
1512
void Restrict_To_Coding_Position(align **data,option *io);
1513
void Uppercase(char *ch);
1514
void Lowercase(char *ch);
1515
calign *Compact_Data(align **data,option *io);
1516
calign *Compact_Cdata(calign *data,option *io);
1517
void Traverse_Prefix_Tree(int site,int seqnum,int *patt_num,int *n_patt,align **data,option *io,pnode *n);
1518
pnode *Create_Pnode(int size);
1519
void Get_Base_Freqs(calign *data);
1520
void Get_AA_Freqs(calign *data);
1521
void Swap_Nodes_On_Edges(t_edge *e1,t_edge *e2,int swap,t_tree *tree);
1522
void Connect_Edges_To_Nodes_Recur(t_node *a,t_node *d,t_tree *tree);
1523
void Connect_One_Edge_To_Two_Nodes(t_node *a,t_node *d,t_edge *b,t_tree *tree);
1524
void Update_Dirs(t_tree *tree);
1525
void Exit(char *message);
1526
void *mCalloc(int nb,size_t size);
1527
void *mRealloc(void *p,int nb,size_t size);
1528
int Sort_Phydbl_Decrease(const void *a,const void *b);
1529
void Qksort_Int(int *A,int *B,int ilo,int ihi);
1530
void Qksort(phydbl *A,phydbl *B,int ilo,int ihi);
1531
void Qksort_Matrix(phydbl **A,int col,int ilo,int ihi);
1532
void Order_Tree_Seq(t_tree *tree,align **data);
1533
char *Add_Taxa_To_Constraint_Tree(FILE *fp,calign *cdata);
1534
void Check_Constraint_Tree_Taxa_Names(t_tree *tree,calign *cdata);
1535
void Order_Tree_CSeq(t_tree *tree,calign *cdata);
1536
void Init_Mat(matrix *mat,calign *data);
1537
void Copy_Tax_Names_To_Tip_Labels(t_tree *tree,calign *data);
1538
void Share_Lk_Struct(t_tree *t_full,t_tree *t_empt);
1539
void Share_Spr_Struct(t_tree *t_full,t_tree *t_empt);
1540
void Share_Pars_Struct(t_tree *t_full,t_tree *t_empt);
1541
int Sort_Edges_NNI_Score(t_tree *tree,t_edge **sorted_edges,int n_elem);
1542
int Sort_Edges_Depth(t_tree *tree,t_edge **sorted_edges,int n_elem);
1543
void NNI(t_tree *tree,t_edge *b_fcus,int do_swap);
1544
void NNI_Pars(t_tree *tree,t_edge *b_fcus,int do_swap);
1545
void Swap(t_node *a,t_node *b,t_node *c,t_node *d,t_tree *tree);
1546
void Update_All_Partial_Lk(t_edge *b_fcus,t_tree *tree);
1547
void Update_SubTree_Partial_Lk(t_edge *b_fcus,t_node *a,t_node *d,t_tree *tree);
1548
void Copy_Seq_Names_To_Tip_Labels(t_tree *tree,calign *data);
1549
calign *Copy_Cseq(calign *ori,option *io);
1550
int Filexists(char *filename);
1551
matrix *K80_dist(calign *data,phydbl g_shape);
1552
matrix *JC69_Dist(calign *data,t_mod *mod);
1553
matrix *Hamming_Dist(calign *data,t_mod *mod);
1554
int Is_Invar(int patt_num,int stepsize,int datatype,calign *data);
1555
int Is_Ambigu(char *state,int datatype,int stepsize);
1556
void Check_Ambiguities(calign *data,int datatype,int stepsize);
1557
int Get_State_From_Ui(int ui,int datatype);
1558
int Assign_State(char *c,int datatype,int stepsize);
1559
char Reciproc_Assign_State(int i_state,int datatype);
1560
int Assign_State_With_Ambiguity(char *c,int datatype,int stepsize);
1561
void Clean_Tree_Connections(t_tree *tree);
1562
void Bootstrap(t_tree *tree);
1563
void Br_Len_Involving_Invar(t_tree *tree);
1564
void Br_Len_Not_Involving_Invar(t_tree *tree);
1565
void Getstring_Stdin(char *s);
1566
phydbl Num_Derivatives_One_Param(phydbl (*func)(t_tree *tree), t_tree *tree,
1567
phydbl f0, phydbl *param, int which, int n_param, phydbl stepsize, int logt,
1568
phydbl *err, int precise, int is_positive);
1569
phydbl Num_Derivatives_One_Param_Nonaligned(phydbl (*func)(t_tree *tree), t_tree *tree,
1570
phydbl f0, phydbl **param, int which, int n_param, phydbl stepsize, int logt,
1571
phydbl *err, int precise, int is_positive);
1572
int Num_Derivative_Several_Param(t_tree *tree,phydbl *param,int n_param,phydbl stepsize,int logt,phydbl(*func)(t_tree *tree),phydbl *derivatives, int is_positive);
1573
int Num_Derivative_Several_Param_Nonaligned(t_tree *tree, phydbl **param, int n_param, phydbl stepsize, int logt,
1574
phydbl (*func)(t_tree *tree), phydbl *derivatives, int is_positive);
1575
int Compare_Two_States(char *state1,char *state2,int state_size);
1576
void Copy_One_State(char *from,char *to,int state_size);
1577
void Copy_Dist(phydbl **cpy,phydbl **orig,int n);
1578
t_mod *Copy_Model(t_mod *ori);
1579
void Record_Model(t_mod *ori,t_mod *cpy);
1580
void Set_Defaults_Input(option *io);
1581
void Set_Defaults_Model(t_mod *mod);
1582
void Set_Defaults_Optimiz(t_opt *s_opt);
1583
void Test_Node_Table_Consistency(t_tree *tree);
1584
void Get_Bip(t_node *a,t_node *d,t_tree *tree);
1585
void Alloc_Bip(t_tree *tree);
1586
int Sort_Phydbl_Increase(const void *a,const void *b);
1587
int Sort_String(const void *a,const void *b);
1588
int Compare_Bip(t_tree *tree1,t_tree *tree2,int on_existing_edges_only);
1589
void Match_Tip_Numbers(t_tree *tree1,t_tree *tree2);
1590
void Test_Multiple_Data_Set_Format(option *io);
1591
int Are_Compatible(char *statea,char *stateb,int stepsize,int datatype);
1592
void Hide_Ambiguities(calign *data);
1593
void Copy_Tree(t_tree *ori,t_tree *cpy);
1594
void Prune_Subtree(t_node *a,t_node *d,t_edge **target,t_edge **residual,t_tree *tree);
1595
void Graft_Subtree(t_edge *target,t_node *link,t_edge *residual,t_tree *tree);
1596
void Reassign_Node_Nums(t_node *a,t_node *d,int *curr_ext_node,int *curr_int_node,t_tree *tree);
1597
void Reassign_Edge_Nums(t_node *a,t_node *d,int *curr_br,t_tree *tree);
1598
void Find_Mutual_Direction(t_node *n1,t_node *n2,short int *dir_n1_to_n2,short int *dir_n2_to_n1);
1599
void Update_Dir_To_Tips(t_node *a,t_node *d,t_tree *tree);
1600
void Fill_Dir_Table(t_tree *tree);
1601
int Get_Subtree_Size(t_node *a,t_node *d);
1602
void Fast_Br_Len(t_edge *b,t_tree *tree,int approx);
1603
void Init_Eigen_Struct(eigen *this);
1604
phydbl Triple_Dist(t_node *a,t_tree *tree,int approx);
1605
void Make_Symmetric(phydbl **F,int size);
1606
void Round_Down_Freq_Patt(phydbl **F,t_tree *tree);
1607
phydbl Get_Sum_Of_Cells(phydbl *F,t_tree *tree);
1608
void Divide_Cells(phydbl **F,phydbl div,t_tree *tree);
1609
void Divide_Mat_By_Vect(phydbl **F,phydbl *vect,int size);
1610
void Multiply_Mat_By_Vect(phydbl **F,phydbl *vect,int size);
1611
void Found_In_Subtree(t_node *a,t_node *d,t_node *target,int *match,t_tree *tree);
1612
void Get_List_Of_Target_Edges(t_node *a,t_node *d,t_edge **list,int *list_size,t_tree *tree);
1613
void Fix_All(t_tree *tree);
1614
void Record_Br_Len(t_tree *tree);
1615
void Restore_Br_Len(t_tree *tree);
1616
void Get_Dist_Btw_Edges(t_node *a,t_node *d,t_tree *tree);
1617
void Detect_Polytomies(t_edge *b,phydbl l_thresh,t_tree *tree);
1618
void Get_List_Of_Nodes_In_Polytomy(t_node *a,t_node *d,t_node ***list,int *size_list);
1619
void Check_Path(t_node *a,t_node *d,t_node *target,t_tree *tree);
1620
void Connect_Two_Nodes(t_node *a,t_node *d);
1621
void Get_List_Of_Adjacent_Targets(t_node *a,t_node *d,t_node ***node_list,t_edge ***edge_list,int *list_size);
1622
void Sort_List_Of_Adjacent_Targets(t_edge ***list,int list_size);
1623
t_node *Common_Nodes_Btw_Two_Edges(t_edge *a,t_edge *b);
1624
int KH_Test(phydbl *site_lk_M1,phydbl *site_lk_M2,t_tree *tree);
1625
void Random_Tree(t_tree *tree);
1626
void Reorganize_Edges_Given_Lk_Struct(t_tree *tree);
1627
void Random_NNI(int n_moves,t_tree *tree);
1628
void Fill_Missing_Dist(matrix *mat);
1629
void Fill_Missing_Dist_XY(int x,int y,matrix *mat);
1630
phydbl Least_Square_Missing_Dist_XY(int x,int y,phydbl dxy,matrix *mat);
1631
void Check_Memory_Amount(t_tree *tree);
1632
int Get_State_From_P_Lk(phydbl *p_lk,int pos,t_tree *tree);
1633
int Get_State_From_P_Pars(short int *p_pars,int pos,t_tree *tree);
1634
void Check_Dirs(t_tree *tree);
1635
void Warn_And_Exit(char *s);
1636
void Randomize_Sequence_Order(calign *cdata);
1637
void Update_Root_Pos(t_tree *tree);
1638
void Add_Root(t_edge *target,t_tree *tree);
1639
void Update_Ancestors(t_node *a,t_node *d,t_tree *tree);
1640
#if (defined PHYTIME || defined SERGEII)
1641
t_tree *Generate_Random_Tree_From_Scratch(int n_otu,int rooted);
1643
void Random_Lineage_Rates(t_node *a,t_node *d,t_edge *b,phydbl stick_prob,phydbl *rates,int curr_rate,int n_rates,t_tree *tree);
1644
t_edge *Find_Edge_With_Label(char *label,t_tree *tree);
1645
void Evolve(calign *data,t_mod *mod,t_tree *tree);
1646
int Pick_State(int n,phydbl *prob);
1647
void Evolve_Recur(t_node *a,t_node *d,t_edge *b,int a_state,int r_class,int site_num,calign *gen_data,t_mod *mod,t_tree *tree);
1648
void Site_Diversity(t_tree *tree);
1649
void Site_Diversity_Post(t_node *a,t_node *d,t_edge *b,t_tree *tree);
1650
void Site_Diversity_Pre(t_node *a,t_node *d,t_edge *b,t_tree *tree);
1651
void Subtree_Union(t_node *n,t_edge *b_fcus,t_tree *tree);
1652
void Binary_Decomposition(int value,int *bit_vect,int size);
1653
void Print_Diversity_Header(FILE *fp,t_tree *tree);
1654
phydbl Univariate_Kernel_Density_Estimate(phydbl where,phydbl *x,int sample_size);
1655
phydbl Multivariate_Kernel_Density_Estimate(phydbl *where,phydbl **x,int sample_size,int vect_size);
1656
phydbl Var(phydbl *x,int n);
1657
phydbl Mean(phydbl *x,int n);
1658
void Best_Of_NNI_And_SPR(t_tree *tree);
1659
int Polint(phydbl *xa,phydbl *ya,int n,phydbl x,phydbl *y,phydbl *dy);
1660
void JF(t_tree *tree);
1661
t_tree *Dist_And_BioNJ(calign *cdata,t_mod *mod,option *io);
1662
void Add_BioNJ_Branch_Lengths(t_tree *tree,calign *cdata,t_mod *mod);
1663
char *Bootstrap_From_String(char *s_tree,calign *cdata,t_mod *mod,option *io);
1664
char *aLRT_From_String(char *s_tree,calign *cdata,t_mod *mod,option *io);
1665
void Prepare_Tree_For_Lk(t_tree *tree);
1666
void Find_Common_Tips(t_tree *tree1,t_tree *tree2);
1667
phydbl Get_Tree_Size(t_tree *tree);
1668
void Dist_To_Root_Pre(t_node *a,t_node *d,t_edge *b,t_tree *tree);
1669
void Dist_To_Root(t_node *n_root,t_tree *tree);
1670
char *Basename(char *path);
1671
t_node *Find_Lca_Pair_Of_Nodes(t_node *n1,t_node *n2,t_tree *tree);
1672
t_node *Find_Lca_Clade(t_node **node_list,int node_list_size,t_tree *tree);
1673
int Get_List_Of_Ancestors(t_node *ref_node,t_node **list,int *size,t_tree *tree);
1674
int Edge_Num_To_Node_Num(int edge_num,t_tree *tree);
1675
void Time_To_Branch(t_tree *tree);
1676
void Time_To_Branch_Pre(t_node *a,t_node *d,t_tree *tree);
1677
void Branch_Lengths_To_Rate_Lengths(t_tree *tree);
1678
void Branch_Lengths_To_Rate_Lengths_Pre(t_node *a,t_node *d,t_tree *tree);
1679
int Find_Clade(char **tax_name_list,int list_size,t_tree *tree);
1680
void Find_Clade_Pre(t_node *a,t_node *d,int *tax_num_list,int list_size,int *num,t_tree *tree);
1681
t_edge *Find_Root_Edge(FILE *fp_input_tree,t_tree *tree);
1682
void Copy_Tree_Topology_With_Labels(t_tree *ori,t_tree *cpy);
1683
void Set_Model_Name(t_mod *mod);
1684
void Adjust_Min_Diff_Lk(t_tree *tree);
1685
void Translate_Tax_Names(char **tax_names,t_tree *tree);
1686
void Skip_Comment(FILE *fp);
1687
void Get_Best_Root_Position(t_tree *tree);
1688
void Get_Best_Root_Position_Post(t_node *a,t_node *d,int *has_outgrp,t_tree *tree);
1689
void Get_Best_Root_Position_Pre(t_node *a,t_node *d,t_tree *tree);
1690
void Get_OutIn_Scores(t_node *a,t_node *d);
1691
int Check_Sequence_Name(char *s);
1692
int Scale_Subtree_Height(t_node *a,phydbl K,phydbl floor,int *n_nodes,t_tree *tree);
1693
void Scale_Node_Heights_Post(t_node *a,t_node *d,phydbl K,phydbl floor,int *n_nodes,t_tree *tree);
1694
int Scale_Subtree_Rates(t_node *a,phydbl mult,int *n_nodes,t_tree *tree);
1695
void Check_Br_Len_Bounds(t_tree *tree);
1696
int Scale_Subtree_Rates_Post(t_node *a,t_node *d,phydbl mult,int *n_nodes,t_tree *tree);
1697
void Get_Node_Ranks(t_tree *tree);
1698
void Get_Node_Ranks_Pre(t_node *a,t_node *d,t_tree *tree);
1699
void Log_Br_Len(t_tree *tree);
1700
phydbl Diff_Lk_Norm_At_Given_Edge(t_edge *b,t_tree *tree);
1701
void Adjust_Variances(t_tree *tree);
1702
phydbl Effective_Sample_Size(phydbl first_val,phydbl last_val,phydbl sum,phydbl sumsq,phydbl sumcurnext,int n);
1703
void Rescale_Free_Rate_Tree(t_tree *tree);
1704
phydbl Rescale_Br_Len_Multiplier_Tree(t_tree *tree);
1705
phydbl Unscale_Br_Len_Multiplier_Tree(t_tree *tree);
1706
phydbl Reflect(phydbl x,phydbl l,phydbl u);
1707
int Are_Equal(phydbl a,phydbl b,phydbl eps);
1708
int Check_Topo_Constraints(t_tree *big_tree,t_tree *small_tree);
1709
void Prune_Tree(t_tree *big_tree,t_tree *small_tree);
1710
void Match_Nodes_In_Small_Tree(t_tree *small_tree,t_tree *big_tree);
1711
void Find_Surviving_Edges_In_Small_Tree(t_tree *small_tree,t_tree *big_tree);
1712
void Find_Surviving_Edges_In_Small_Tree_Post(t_node *a,t_node *d,t_tree *small_tree,t_tree *big_tree);
1713
void Set_Taxa_Id_Ranking(t_tree *tree);
1714
void Get_Edge_Binary_Coding_Number(t_tree *tree);
1715
void Make_Ancestral_Seq(t_tree *tree);
1716
void Make_MutMap(t_tree *tree);
1717
int Get_Mutmap_Val(int edge,int site,int mut,t_tree *tree);
1718
void Get_Mutmap_Coord(int idx,int *edge,int *site,int *mut,t_tree *tree);
1719
void Copy_Edge_Lengths(t_tree *to,t_tree *from);
1720
void Init_Scalar_Dbl(scalar_dbl *p);
1721
void Init_Scalar_Int(scalar_int *p);
1722
void Init_Vect_Dbl(int len,vect_dbl *p);
1723
void Init_Vect_Int(int len,vect_int *p);
1724
char *To_Lower_String(char *in);
1725
phydbl String_To_Dbl(char *string);
1726
char *To_Upper_String(char *in);
1727
void Connect_CSeqs_To_Nodes(calign *cdata, t_tree *tree);
1728
void Switch_Eigen(int state, t_mod *mod);
1729
void Joint_Proba_States_Left_Right(phydbl *Pij, phydbl *p_lk_left, phydbl *p_lk_rght,
1730
vect_dbl *pi, int scale_left, int scale_rght,
1731
phydbl *F, int n, int site, t_tree *tree);
1732
void Set_Both_Sides(int yesno, t_tree *tree);
1733
void Set_D_States(calign *data, int datatype, int stepsize);
1734
void Branch_To_Time(t_tree *tree);
1735
void Branch_To_Time_Pre(t_node *a, t_node *d, t_tree *tree);
1736
void Path_Length(t_node *dep, t_node *arr, phydbl *len, t_tree *tree);
1737
phydbl *Dist_Btw_Tips(t_tree *tree);
1738
void Random_SPRs_On_Rooted_Tree(t_tree *tree);
1739
void Set_P_Lk_One_Side(phydbl **Pij, phydbl **p_lk, int **sum_scale, t_node *d, t_edge *b, t_tree *tree);
1740
void Set_All_P_Lk(t_node **n_v1, t_node **n_v2,
1741
phydbl **p_lk , int **sum_scale , int **p_lk_loc,
1742
phydbl **Pij1, phydbl **p_lk1, int **sum_scale1,
1743
phydbl **Pij2, phydbl **p_lk2, int **sum_scale2,
1744
t_node *d, t_edge *b, t_tree *tree);
1746
void Optimum_Root_Position_IL_Model(t_tree *tree);
1747
void Set_Br_Len_Var(t_tree *tree);
1748
void Check_Br_Lens(t_tree *tree);
1754
#include "optimiz.h"
1770
#include "mpi_boot.h"
1782
#ifdef _NOT_NEEDED_A_PRIORI