1
/* -*- mode: c; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
3
/* This a mix of tree functions and data-structures from
4
* Bob Edgar's Muscle (version 3.7) ported to pure C, topped up with
5
* some of our own stuff.
7
* Used files: phy.cpp, tree.h, phytofile.cpp and phyfromclust.cpp
9
* Muscle's code is public domain and so is this code here.
11
* From http://www.drive5.com/muscle/license.htm:
13
* MUSCLE is public domain software
15
* The MUSCLE software, including object and source code and
16
* documentation, is hereby donated to the public domain.
18
* Disclaimer of warranty
19
* THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND,
20
* EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION IMPLIED
21
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27
* RCS $Id: muscle_tree.c 230 2011-04-09 15:37:50Z andreas $
39
#include "muscle_tree.h"
42
static const double VERY_NEGATIVE_DOUBLE = -9e29;
43
/*const double dInsane = VERY_NEGATIVE_DOUBLE;*/
44
static const double dInsane = -9e29;
45
static const unsigned uInsane = 8888888;
51
/* Returned from Tree::GetToken: */
59
/* Following are never returned from Tree::GetToken: */
60
NTT_SingleQuotedString,
61
NTT_DoubleQuotedString,
67
InitCache(uint uCacheCount, tree_t *tree);
69
TreeZero(tree_t *tree);
71
GetNeighborCount(unsigned uNodeIndex, tree_t *tree);
73
IsEdge(unsigned uNodeIndex1, unsigned uNodeIndex2, tree_t *tree);
75
HasEdgeLength(uint uNodeIndex1, uint uNodeIndex2, tree_t *tree);
77
TreeToFileNodeRooted(tree_t *tree, uint m_uRootNodeIndex, FILE *fp);
79
ValidateNode(uint uNodeIndex, tree_t *tree);
81
AssertAreNeighbors(unsigned uNodeIndex1, unsigned uNodeIndex2, tree_t *tree);
83
ExpandCache(tree_t *tree);
85
TreeCreateRooted(tree_t *tree);
87
GetGroupFromFile(FILE *fp, uint uNodeIndex, double *ptrdEdgeLength, tree_t *tree);
88
static NEWICK_TOKEN_TYPE
89
GetToken(FILE *fp, char szToken[], uint uBytes);
90
/* stuff from textfile.cpp */
92
FileSkipWhite(FILE *fp);
94
FileSkipWhiteX(FILE *fp);
96
SetLeafName(uint uNodeIndex, const char *ptrName, tree_t *tree);
98
AppendBranch(tree_t *tree, uint uExistingLeafIndex);
100
SetEdgeLength(uint uNodeIndex1, uint uNodeIndex2,
101
double dLength, tree_t *tree);
103
UnrootFromFile(tree_t *tree);
105
GetNeighbor(uint uNodeIndex, uint uNeighborSubscript, tree_t *tree);
107
InitNode(tree_t *prTree, uint uNodeIndex);
111
* @param[in] uNodeIndex
115
* @return id of left node
116
* @note called GetRight in Muscle3.7
119
GetLeft(uint uNodeIndex, tree_t *tree)
121
assert(NULL != tree);
122
assert(tree->m_bRooted && uNodeIndex < tree->m_uNodeCount);
123
return tree->m_uNeighbor2[uNodeIndex];
125
/*** end: GetLeft ***/
130
* @param[in] uNodeIndex
134
* @return id of right node
135
* @note called GetRight in Muscle3.7
138
GetRight(uint uNodeIndex, tree_t *tree)
140
assert(NULL != tree);
141
assert(tree->m_bRooted && uNodeIndex < tree->m_uNodeCount);
142
return tree->m_uNeighbor3[uNodeIndex];
144
/*** end: GetRight ***/
149
* @param[in] uNodeIndex
153
* @return leaf id of current node
156
GetLeafId(uint uNodeIndex, tree_t *tree)
158
assert(NULL != tree);
159
assert(uNodeIndex < tree->m_uNodeCount);
160
assert(IsLeaf(uNodeIndex, tree));
162
return tree->m_Ids[uNodeIndex];
164
/*** end: GetLeafId ***/
169
* @note originally called GetLeafName
173
GetLeafName(unsigned uNodeIndex, tree_t *tree)
175
assert(NULL != tree);
176
assert(uNodeIndex < tree->m_uNodeCount);
177
assert(IsLeaf(uNodeIndex, tree));
178
return tree->m_ptrName[uNodeIndex];
180
/*** end: GetLeafName ***/
185
* @brief returns first leaf node for a depth-first traversal of tree
190
* @return node index of first leaf node for depth-first traversal
192
* @note called FirstDepthFirstNode in Muscle3.7
196
FirstDepthFirstNode(tree_t *tree)
200
assert(NULL != tree);
201
assert(IsRooted(tree));
203
/* Descend via left branches until we hit a leaf */
204
uNodeIndex = tree->m_uRootNodeIndex;
205
while (!IsLeaf(uNodeIndex, tree))
206
uNodeIndex = GetLeft(uNodeIndex, tree);
209
/*** end: FirstDepthFirstNode ***/
213
* @brief returns next leaf node index for depth-first traversal of
218
* @param[in] uNodeIndex
221
* @return node index of next leaf node during depth-first traversal
223
* @note called NextDepthFirstNode in Muscle3.7
226
NextDepthFirstNode(uint uNodeIndex, tree_t *tree)
230
assert(NULL != tree);
231
assert(IsRooted(tree));
232
assert(uNodeIndex < tree->m_uNodeCount);
234
if (IsRoot(uNodeIndex, tree))
236
/* Node uNodeIndex is root, end of traversal */
237
return NULL_NEIGHBOR;
240
uParent = GetParent(uNodeIndex, tree);
241
if (GetRight(uParent, tree) == uNodeIndex) {
242
/* Is right branch, return parent=uParent */
246
uNodeIndex = GetRight(uParent, tree);
247
/* Descend left from right sibling uNodeIndex */
248
while (!IsLeaf(uNodeIndex, tree)) {
249
uNodeIndex = GetLeft(uNodeIndex, tree);
252
/* bottom out at leaf uNodeIndex */
255
/*** end: NextDepthFirstNode ***/
260
* @brief check if tree is a rooted tree
263
* @return TRUE if given tree is rooted, FALSE otherwise
267
IsRooted(tree_t *tree)
269
assert(NULL != tree);
270
return tree->m_bRooted;
272
/*** end: IsRooted ***/
279
FreeMuscleTree(tree_t *tree)
286
/* FIXME use GetLeafNodeIndex? or
287
for (unsigned uNodeIndex = 0; uNodeIndex < m_uNodeCount; ++uNodeIndex)
289
if (tree.IsLeaf(uNodeIndex))
291
const char *ptrName =
292
tree.GetLeafName(uNodeIndex);
294
/* IsLeaf needs m_uNodeCount and all m_uNeighbor's
297
for (i=0; i<tree->m_uNodeCount; i++) {
298
/* IsLeaf needs neighbour count, so free those guys later */
299
if (IsLeaf(i, tree)) {
300
CKFREE(tree->m_ptrName[i]);
303
CKFREE(tree->m_ptrName);
305
CKFREE(tree->m_uNeighbor1);
306
CKFREE(tree->m_uNeighbor2);
307
CKFREE(tree->m_uNeighbor3);
311
CKFREE(tree->m_dEdgeLength1);
312
CKFREE(tree->m_dEdgeLength2);
313
CKFREE(tree->m_dEdgeLength3);
315
CKFREE(tree->m_dHeight);
316
CKFREE(tree->m_bHasHeight);
318
CKFREE(tree->m_bHasEdgeLength1);
319
CKFREE(tree->m_bHasEdgeLength2);
320
CKFREE(tree->m_bHasEdgeLength3);
326
/*** end: FreeMuscleTree ***/
331
* @brief create a muscle tree
333
* @note Original comment in Muscle code: "Create rooted tree from a
334
* vector description. Node indexes are 0..N-1 for leaves, N..2N-2 for
335
* internal nodes. Vector subscripts are i-N and have values for
336
* internal nodes only, but those values are node indexes 0..2N-2. So
337
* e.g. if N=6 and Left[2]=1, this means that the third internal node
338
* (node index 8) has the second leaf (node index 1) as its left
339
* child. uRoot gives the vector subscript of the root, so add N to
340
* get the node index."
344
* @param[in] uLeafCount
345
* number of leaf nodes
347
* Internal node index of root node
349
* Node index of left sibling of an internal node.
350
* Index range: 0--uLeafCount-1
352
* Node index of right sibling of an internal node.
353
* Index range: 0--uLeafCount-1
354
* @param[in] LeftLength
355
* Branch length of left branch of an internal node.
356
* Index range: 0--uLeafCount-1
357
* @param[in] RightLength
358
* Branch length of right branch of an internal node.
359
* Index range: 0--uLeafCount-1
361
* Leaf id. Index range: 0--uLeafCount
362
* @param[in] LeafNames
363
* Leaf label. Index range: 0--uLeafCount
367
MuscleTreeCreate(tree_t *tree,
368
uint uLeafCount, uint uRoot,
369
const uint *Left, const uint *Right,
370
const float *LeftLength, const float* RightLength,
371
const uint *LeafIds, char **LeafNames)
376
tree->m_uNodeCount = 2*uLeafCount - 1;
377
InitCache(tree->m_uNodeCount, tree);
379
for (uNodeIndex = 0; uNodeIndex < uLeafCount; ++uNodeIndex) {
380
tree->m_Ids[uNodeIndex] = LeafIds[uNodeIndex];
381
tree->m_ptrName[uNodeIndex] = CkStrdup(LeafNames[uNodeIndex]);
384
for (uNodeIndex = uLeafCount; uNodeIndex < tree->m_uNodeCount; ++uNodeIndex) {
385
uint v = uNodeIndex - uLeafCount;
386
uint uLeft = Left[v];
387
uint uRight = Right[v];
388
float fLeft = LeftLength[v];
389
float fRight = RightLength[v];
391
tree->m_uNeighbor2[uNodeIndex] = uLeft;
392
tree->m_uNeighbor3[uNodeIndex] = uRight;
394
tree->m_bHasEdgeLength2[uNodeIndex] = TRUE;
395
tree->m_bHasEdgeLength3[uNodeIndex] = TRUE;
397
tree->m_dEdgeLength2[uNodeIndex] = fLeft;
398
tree->m_dEdgeLength3[uNodeIndex] = fRight;
400
tree->m_uNeighbor1[uLeft] = uNodeIndex;
401
tree->m_uNeighbor1[uRight] = uNodeIndex;
403
tree->m_dEdgeLength1[uLeft] = fLeft;
404
tree->m_dEdgeLength1[uRight] = fRight;
406
tree->m_bHasEdgeLength1[uLeft] = TRUE;
407
tree->m_bHasEdgeLength1[uRight] = TRUE;
410
tree->m_bRooted = TRUE;
411
tree->m_uRootNodeIndex = uRoot + uLeafCount;
416
/*** end: MuscleTreeCreate ***/
424
* filepointer to write to2
426
* @brief write a muscle tree to a file in newick format (distances
432
MuscleTreeToFile(FILE *fp, tree_t *tree)
434
assert(NULL != tree);
435
if (IsRooted(tree)) {
436
TreeToFileNodeRooted(tree, tree->m_uRootNodeIndex, fp);
440
Log(&rLog, LOG_FATAL, "FIXME: output of unrooted muscle trees not implemented");
443
/*** end: MuscleTreeToFile ***/
448
* @brief check if given node is a leaf node
450
* @param[in] uNodeIndex
455
* @return TRUE if given node is a leaf, FALSE otherwise
457
* @note called IsLeaf in Muscle3.7. See tree.h in original code
460
IsLeaf(uint uNodeIndex, tree_t *tree)
462
assert(NULL != tree);
463
assert(uNodeIndex < tree->m_uNodeCount);
464
if (1 == tree->m_uNodeCount)
466
return 1 == GetNeighborCount(uNodeIndex, tree);
468
/*** end: IsLeaf ***/
475
IsRoot(uint uNodeIndex, tree_t *tree)
477
assert(NULL != tree);
478
return IsRooted(tree) && tree->m_uRootNodeIndex == uNodeIndex;
480
/*** end: IsRoot ***/
487
GetNeighborCount(uint uNodeIndex, tree_t *tree)
490
assert(uNodeIndex < tree->m_uNodeCount);
491
assert(NULL != tree);
492
assert(NULL != tree->m_uNeighbor1);
493
assert(NULL != tree->m_uNeighbor2);
494
assert(NULL != tree->m_uNeighbor3);
495
n1 = tree->m_uNeighbor1[uNodeIndex];
496
n2 = tree->m_uNeighbor2[uNodeIndex];
497
n3 = tree->m_uNeighbor3[uNodeIndex];
498
return (NULL_NEIGHBOR != n1) + (NULL_NEIGHBOR != n2) + (NULL_NEIGHBOR != n3);
500
/*** end: GetNeighborCount ***/
506
GetParent(unsigned uNodeIndex, tree_t *tree)
508
assert(NULL != tree);
509
assert(tree->m_bRooted && uNodeIndex < tree->m_uNodeCount);
510
return tree->m_uNeighbor1[uNodeIndex];
512
/*** end: GetParent ***/
519
IsEdge(unsigned uNodeIndex1, unsigned uNodeIndex2, tree_t *tree)
521
assert(uNodeIndex1 < tree->m_uNodeCount && uNodeIndex2 < tree->m_uNodeCount);
522
assert(NULL != tree);
524
return tree->m_uNeighbor1[uNodeIndex1] == uNodeIndex2 ||
525
tree->m_uNeighbor2[uNodeIndex1] == uNodeIndex2 ||
526
tree->m_uNeighbor3[uNodeIndex1] == uNodeIndex2;
528
/*** end: IsEdge ***/
535
HasEdgeLength(uint uNodeIndex1, uint uNodeIndex2, tree_t *tree)
537
assert(NULL != tree);
538
assert(uNodeIndex1 < tree->m_uNodeCount);
539
assert(uNodeIndex2 < tree->m_uNodeCount);
540
assert(IsEdge(uNodeIndex1, uNodeIndex2, tree));
542
if (tree->m_uNeighbor1[uNodeIndex1] == uNodeIndex2)
543
return tree->m_bHasEdgeLength1[uNodeIndex1];
544
else if (tree->m_uNeighbor2[uNodeIndex1] == uNodeIndex2)
545
return tree->m_bHasEdgeLength2[uNodeIndex1];
546
assert(tree->m_uNeighbor3[uNodeIndex1] == uNodeIndex2);
547
return tree->m_bHasEdgeLength3[uNodeIndex1];
555
GetEdgeLength(uint uNodeIndex1, uint uNodeIndex2, tree_t *tree)
557
assert(NULL != tree);
558
assert(uNodeIndex1 < tree->m_uNodeCount && uNodeIndex2 < tree->m_uNodeCount);
559
if (!HasEdgeLength(uNodeIndex1, uNodeIndex2, tree))
561
Log(&rLog, LOG_FATAL, "Missing edge length in tree %u-%u", uNodeIndex1, uNodeIndex2);
564
if (tree->m_uNeighbor1[uNodeIndex1] == uNodeIndex2)
565
return tree->m_dEdgeLength1[uNodeIndex1];
566
else if (tree->m_uNeighbor2[uNodeIndex1] == uNodeIndex2)
567
return tree->m_dEdgeLength2[uNodeIndex1];
568
assert(tree->m_uNeighbor3[uNodeIndex1] == uNodeIndex2);
569
return tree->m_dEdgeLength3[uNodeIndex1];
571
/*** end: GetEdgeLength ***/
578
InitNode(tree_t *prTree, uint uNodeIndex)
580
prTree->m_uNeighbor1[uNodeIndex] = NULL_NEIGHBOR;
581
prTree->m_uNeighbor2[uNodeIndex] = NULL_NEIGHBOR;
582
prTree->m_uNeighbor3[uNodeIndex] = NULL_NEIGHBOR;
583
prTree->m_bHasEdgeLength1[uNodeIndex] = FALSE;
584
prTree->m_bHasEdgeLength2[uNodeIndex] = FALSE;
585
prTree->m_bHasEdgeLength3[uNodeIndex] = FALSE;
587
prTree->m_bHasHeight[uNodeIndex] = FALSE;
588
prTree->m_dHeight[uNodeIndex] = dInsane;
590
prTree->m_dEdgeLength1[uNodeIndex] = dInsane;
591
prTree->m_dEdgeLength2[uNodeIndex] = dInsane;
592
prTree->m_dEdgeLength3[uNodeIndex] = dInsane;
594
prTree->m_ptrName[uNodeIndex] = NULL;
595
prTree->m_Ids[uNodeIndex] = uInsane;
597
/*** end: InitNode ***/
604
InitCache(uint uCacheCount, tree_t *tree)
608
tree->m_uCacheCount = uCacheCount;
610
tree->m_uNeighbor1 = (uint *) CKMALLOC(sizeof(uint) * tree->m_uCacheCount);
611
tree->m_uNeighbor2 = (uint *) CKMALLOC(sizeof(uint) * tree->m_uCacheCount);
612
tree->m_uNeighbor3 = (uint *) CKMALLOC(sizeof(uint) * tree->m_uCacheCount);
614
tree->m_Ids = (uint *) CKMALLOC(sizeof(uint) * tree->m_uCacheCount);
616
tree->m_dEdgeLength1 = (double *) CKMALLOC(sizeof(double) * tree->m_uCacheCount);
617
tree->m_dEdgeLength2 = (double *) CKMALLOC(sizeof(double) * tree->m_uCacheCount);
618
tree->m_dEdgeLength3 = (double *) CKMALLOC(sizeof(double) * tree->m_uCacheCount);
620
tree->m_dHeight = (double *) CKMALLOC(sizeof(double) * tree->m_uCacheCount);
621
tree->m_bHasHeight = (bool *) CKMALLOC(sizeof(bool) * tree->m_uCacheCount);
623
tree->m_bHasEdgeLength1 = (bool *) CKMALLOC(sizeof(bool) * tree->m_uCacheCount);
624
tree->m_bHasEdgeLength2 = (bool *) CKMALLOC(sizeof(bool) * tree->m_uCacheCount);
625
tree->m_bHasEdgeLength3 = (bool *) CKMALLOC(sizeof(bool) * tree->m_uCacheCount);
627
tree->m_ptrName = (char **) CKMALLOC(sizeof(char *) * tree->m_uCacheCount);
629
for (uNodeIndex = 0; uNodeIndex < tree->m_uNodeCount; ++uNodeIndex) {
630
InitNode(tree, uNodeIndex);
633
/*** end: InitCache ***/
640
* @note Replacing Tree::Clear but no freeing of memory! Just setting
641
* everything to 0/NULL
644
TreeZero(tree_t *tree)
646
assert(NULL != tree);
647
tree->m_uNodeCount = 0;
648
tree->m_uCacheCount = 0;
650
tree->m_uNeighbor1 = NULL;
651
tree->m_uNeighbor2 = NULL;
652
tree->m_uNeighbor3 = NULL;
654
tree->m_dEdgeLength1 = NULL;
655
tree->m_dEdgeLength2 = NULL;
656
tree->m_dEdgeLength3 = NULL;
659
tree->m_dHeight = NULL;
660
tree->m_bHasHeight = NULL;
662
tree->m_bHasEdgeLength1 = NULL;
663
tree->m_bHasEdgeLength2 = NULL;
664
tree->m_bHasEdgeLength3 = NULL;
666
tree->m_ptrName = NULL;
669
tree->m_bRooted = FALSE;
670
tree->m_uRootNodeIndex = 0;
680
TreeToFileNodeRooted(tree_t *tree, uint uNodeIndex, FILE *fp)
684
assert(IsRooted(tree));
685
assert(NULL != tree);
686
bGroup = !IsLeaf(uNodeIndex, tree) || IsRoot(uNodeIndex, tree);
692
if (IsLeaf(uNodeIndex, tree)) {
693
/* File.PutString(GetName(uNodeIndex)); */
694
fprintf(fp, "%s", tree->m_ptrName[uNodeIndex]);
696
TreeToFileNodeRooted(tree, GetLeft(uNodeIndex, tree), fp);
698
TreeToFileNodeRooted(tree, GetRight(uNodeIndex, tree), fp);
704
if (!IsRoot(uNodeIndex, tree)) {
705
uint uParent = GetParent(uNodeIndex, tree);
706
if (HasEdgeLength(uNodeIndex, uParent, tree))
707
fprintf(fp, ":%g", GetEdgeLength(uNodeIndex, uParent, tree));
711
/*** end: TreeToFileNodeRooted ***/
719
TreeValidate(tree_t *tree)
722
assert(NULL != tree);
723
for (uNodeIndex = 0; uNodeIndex < tree->m_uNodeCount; ++uNodeIndex) {
724
ValidateNode(uNodeIndex, tree);
726
/* FIXME: maybe set negative length to zero? What impact would
729
/*** end TreeValidate ***/
738
ValidateNode(uint uNodeIndex, tree_t *tree)
742
assert(NULL != tree);
744
if (uNodeIndex >= tree->m_uNodeCount)
745
Log(&rLog, LOG_FATAL, "ValidateNode(%u), %u nodes", uNodeIndex, tree->m_uNodeCount);
747
uNeighborCount = GetNeighborCount(uNodeIndex, tree);
750
if (2 == uNeighborCount) {
751
if (!tree->m_bRooted) {
752
Log(&rLog, LOG_FATAL, "Tree::ValidateNode: Node %u has two neighbors, tree is not rooted",
755
if (uNodeIndex != tree->m_uRootNodeIndex) {
756
Log(&rLog, LOG_FATAL, "Tree::ValidateNode: Node %u has two neighbors, but not root node=%u",
757
uNodeIndex, tree->m_uRootNodeIndex);
761
n1 = tree->m_uNeighbor1[uNodeIndex];
762
n2 = tree->m_uNeighbor2[uNodeIndex];
763
n3 = tree->m_uNeighbor3[uNodeIndex];
765
if (NULL_NEIGHBOR == n2 && NULL_NEIGHBOR != n3) {
766
Log(&rLog, LOG_FATAL, "Tree::ValidateNode, n2=null, n3!=null", uNodeIndex);
768
if (NULL_NEIGHBOR == n3 && NULL_NEIGHBOR != n2) {
769
Log(&rLog, LOG_FATAL, "Tree::ValidateNode, n3=null, n2!=null", uNodeIndex);
772
if (n1 != NULL_NEIGHBOR)
773
AssertAreNeighbors(uNodeIndex, n1, tree);
774
if (n2 != NULL_NEIGHBOR)
775
AssertAreNeighbors(uNodeIndex, n2, tree);
776
if (n3 != NULL_NEIGHBOR)
777
AssertAreNeighbors(uNodeIndex, n3, tree);
781
if (n1 != NULL_NEIGHBOR && (n1 == n2 || n1 == n3)) {
782
Log(&rLog, LOG_FATAL, "Tree::ValidateNode, duplicate neighbors in node %u", uNodeIndex);
784
if (n2 != NULL_NEIGHBOR && (n2 == n1 || n2 == n3)) {
785
Log(&rLog, LOG_FATAL, "Tree::ValidateNode, duplicate neighbors in node %u", uNodeIndex);
787
if (n3 != NULL_NEIGHBOR && (n3 == n1 || n3 == n2)) {
788
Log(&rLog, LOG_FATAL, "Tree::ValidateNode, duplicate neighbors in node %u", uNodeIndex);
792
if (IsRooted(tree)) {
793
if (NULL_NEIGHBOR == GetParent(uNodeIndex, tree)) {
794
if (uNodeIndex != tree->m_uRootNodeIndex) {
795
Log(&rLog, LOG_FATAL, "Tree::ValiateNode(%u), no parent", uNodeIndex);
797
} else if (GetLeft(GetParent(uNodeIndex, tree), tree) != uNodeIndex &&
798
GetRight(GetParent(uNodeIndex, tree), tree) != uNodeIndex) {
799
Log(&rLog, LOG_FATAL, "Tree::ValidateNode(%u), parent / child mismatch", uNodeIndex);
803
/*** end: ValidateNode ***/
811
AssertAreNeighbors(unsigned uNodeIndex1, unsigned uNodeIndex2, tree_t *tree)
814
assert(NULL != tree);
816
if (uNodeIndex1 >= tree->m_uNodeCount || uNodeIndex2 >= tree->m_uNodeCount)
817
Log(&rLog, LOG_FATAL, "AssertAreNeighbors(%u,%u), are %u nodes",
818
uNodeIndex1, uNodeIndex2, tree->m_uNodeCount);
820
if (tree->m_uNeighbor1[uNodeIndex1] != uNodeIndex2 &&
821
tree->m_uNeighbor2[uNodeIndex1] != uNodeIndex2 &&
822
tree->m_uNeighbor3[uNodeIndex1] != uNodeIndex2) {
823
Log(&rLog, LOG_FATAL, "AssertAreNeighbors(%u,%u) failed", uNodeIndex1, uNodeIndex2);
827
if (tree->m_uNeighbor1[uNodeIndex2] != uNodeIndex1 &&
828
tree->m_uNeighbor2[uNodeIndex2] != uNodeIndex1 &&
829
tree->m_uNeighbor3[uNodeIndex2] != uNodeIndex1) {
830
Log(&rLog, LOG_FATAL, "AssertAreNeighbors(%u,%u) failed", uNodeIndex1, uNodeIndex2);
834
Has12 = HasEdgeLength(uNodeIndex1, uNodeIndex2, tree);
835
Has21 = HasEdgeLength(uNodeIndex2, uNodeIndex1, tree);
836
if (Has12 != Has21) {
837
HasEdgeLength(uNodeIndex1, uNodeIndex2, tree);
838
HasEdgeLength(uNodeIndex2, uNodeIndex1, tree);
839
Log(&rLog, LOG_ERROR, "HasEdgeLength(%u, %u)=%c HasEdgeLength(%u, %u)=%c\n",
846
Log(&rLog, LOG_FATAL, "Tree::AssertAreNeighbors, HasEdgeLength not symmetric");
851
double d12 = GetEdgeLength(uNodeIndex1, uNodeIndex2, tree);
852
double d21 = GetEdgeLength(uNodeIndex2, uNodeIndex1, tree);
854
Log(&rLog, LOG_FATAL, "Tree::AssertAreNeighbors, Edge length disagrees %u-%u=%.3g, %u-%u=%.3g",
855
uNodeIndex1, uNodeIndex2, d12,
856
uNodeIndex2, uNodeIndex1, d21);
860
/*** end: AssertAreNeighbors ***/
866
* @note see phyfromfile.cpp in Muscle3.7. Tree has to be a pointer to
867
* an already allocated tree structure.
869
* return non-Zero on failure
871
* leafids will not be set here (FIXME:CHECK if true)
874
MuscleTreeFromFile(tree_t *tree, char *ftree)
879
NEWICK_TOKEN_TYPE NTT;
886
if (NULL == (fp=fopen(ftree, "r"))) {
887
Log(&rLog, LOG_ERROR, "Couldn't open tree-file '%s' for reading. Skipping", ftree);
892
* If we discover that it is unrooted, will convert on the fly.
894
TreeCreateRooted(tree);
896
bEdgeLength = GetGroupFromFile(fp, 0, &dEdgeLength, tree);
899
/* Next token should be either ';' for rooted tree or ',' for
902
NTT = GetToken(fp, szToken, sizeof(szToken));
904
/* If rooted, all done. */
905
if (NTT_Semicolon == NTT) {
907
Log(&rLog, LOG_WARN, " *** Warning *** edge length on root group in Newick file %s\n", ftree);
913
if (NTT_Comma != NTT)
914
Log(&rLog, LOG_FATAL, "Tree::FromFile, expected ';' or ',', got '%s'", szToken);
916
uThirdNode = UnrootFromFile(tree);
917
bEdgeLength = GetGroupFromFile(fp, uThirdNode, &dEdgeLength, tree);
919
SetEdgeLength(0, uThirdNode, dEdgeLength, tree);
925
/*** end MuscleTreeFromFile ***/
933
ExpandCache(tree_t *tree)
935
const uint uNodeCount = 100;
937
uint *uNewNeighbor1, *uNewNeighbor2, *uNewNeighbor3;
939
double *dNewEdgeLength1, *dNewEdgeLength2, *dNewEdgeLength3;
944
bool *bNewHasEdgeLength1, *bNewHasEdgeLength2, *bNewHasEdgeLength3;
947
assert(NULL != tree);
948
uNewCacheCount = tree->m_uCacheCount + uNodeCount;
949
uNewNeighbor1 = (uint *) CKMALLOC(
950
uNewCacheCount * sizeof(uint));
951
uNewNeighbor2 = (uint *) CKMALLOC(
952
uNewCacheCount * sizeof(uint));
953
uNewNeighbor3 = (uint *) CKMALLOC(
954
uNewCacheCount * sizeof(uint));
956
uNewIds = (uint *) CKCALLOC(
957
uNewCacheCount, sizeof(uint));
959
dNewEdgeLength1 = (double *) CKMALLOC(
960
uNewCacheCount * sizeof(double));
961
dNewEdgeLength2 = (double *) CKMALLOC(
962
uNewCacheCount * sizeof(double));
963
dNewEdgeLength3 = (double *) CKMALLOC(
964
uNewCacheCount * sizeof(double));
966
dNewHeight = (double *) CKMALLOC(
967
uNewCacheCount * sizeof(double));
968
bNewHasHeight = (bool *) CKMALLOC(
969
uNewCacheCount * sizeof(bool));
972
bNewHasEdgeLength1 = (bool *) CKMALLOC(
973
uNewCacheCount * sizeof(bool));
974
bNewHasEdgeLength2 = (bool *) CKMALLOC(
975
uNewCacheCount * sizeof(bool));
976
bNewHasEdgeLength3 = (bool *) CKMALLOC(
977
uNewCacheCount * sizeof(bool));
978
ptrNewName = (char **) CKCALLOC(uNewCacheCount, sizeof(char*));
980
if (tree->m_uCacheCount > 0) {
981
uint uUnsignedBytes, uEdgeBytes;
982
uint uBoolBytes, uNameBytes;
984
uUnsignedBytes = tree->m_uCacheCount*sizeof(uint);
986
memcpy(uNewNeighbor1, tree->m_uNeighbor1, uUnsignedBytes);
987
memcpy(uNewNeighbor2, tree->m_uNeighbor2, uUnsignedBytes);
988
memcpy(uNewNeighbor3, tree->m_uNeighbor3, uUnsignedBytes);
990
memcpy(uNewIds, tree->m_Ids, uUnsignedBytes);
992
uEdgeBytes = tree->m_uCacheCount*sizeof(double);
993
memcpy(dNewEdgeLength1, tree->m_dEdgeLength1, uEdgeBytes);
994
memcpy(dNewEdgeLength2, tree->m_dEdgeLength2, uEdgeBytes);
995
memcpy(dNewEdgeLength3, tree->m_dEdgeLength3, uEdgeBytes);
997
memcpy(dNewHeight, tree->m_dHeight, uEdgeBytes);
1000
uBoolBytes = tree->m_uCacheCount*sizeof(bool);
1001
memcpy(bNewHasEdgeLength1, tree->m_bHasEdgeLength1, uBoolBytes);
1002
memcpy(bNewHasEdgeLength2, tree->m_bHasEdgeLength2, uBoolBytes);
1003
memcpy(bNewHasEdgeLength3, tree->m_bHasEdgeLength3, uBoolBytes);
1005
memcpy(bNewHasHeight, tree->m_bHasHeight, uBoolBytes);
1007
uNameBytes = tree->m_uCacheCount*sizeof(char *);
1008
memcpy(ptrNewName, tree->m_ptrName, uNameBytes);
1010
/* similiar to FreeMuscleTree
1013
/* IsLeaf needs m_uNodeCount and all m_uNeighbor's
1017
for (i=0; i<tree->m_uNodeCount; i++) {
1018
if (IsLeaf(i, tree)) {
1020
if (NULL==tree->m_ptrName[i]) {
1021
Log(&rLog, LOG_WARN, "FIXME tree->m_ptrName[%d] is already NULL", i);
1024
CKFREE(tree->m_ptrName[i]);
1028
CKFREE(tree->m_ptrName);
1030
CKFREE(tree->m_uNeighbor1);
1031
CKFREE(tree->m_uNeighbor2);
1032
CKFREE(tree->m_uNeighbor3);
1034
CKFREE(tree->m_Ids);
1036
CKFREE(tree->m_dEdgeLength1);
1037
CKFREE(tree->m_dEdgeLength2);
1038
CKFREE(tree->m_dEdgeLength3);
1040
CKFREE(tree->m_bHasEdgeLength1);
1041
CKFREE(tree->m_bHasEdgeLength2);
1042
CKFREE(tree->m_bHasEdgeLength3);
1044
CKFREE(tree->m_bHasHeight);
1045
CKFREE(tree->m_dHeight);
1049
tree->m_uCacheCount = uNewCacheCount;
1050
tree->m_uNeighbor1 = uNewNeighbor1;
1051
tree->m_uNeighbor2 = uNewNeighbor2;
1052
tree->m_uNeighbor3 = uNewNeighbor3;
1053
tree->m_Ids = uNewIds;
1054
tree->m_dEdgeLength1 = dNewEdgeLength1;
1055
tree->m_dEdgeLength2 = dNewEdgeLength2;
1056
tree->m_dEdgeLength3 = dNewEdgeLength3;
1059
tree->m_dHeight = dNewHeight;
1060
tree->m_bHasHeight = bNewHasHeight;
1062
tree->m_bHasEdgeLength1 = bNewHasEdgeLength1;
1063
tree->m_bHasEdgeLength2 = bNewHasEdgeLength2;
1064
tree->m_bHasEdgeLength3 = bNewHasEdgeLength3;
1066
tree->m_ptrName = ptrNewName;
1069
/*** end: ExpandCache ***/
1075
* Tree must be pointer to an already allocated tree structure
1079
TreeCreateRooted(tree_t *tree)
1083
tree->m_uNodeCount = 1;
1085
tree->m_uNeighbor1[0] = NULL_NEIGHBOR;
1086
tree->m_uNeighbor2[0] = NULL_NEIGHBOR;
1087
tree->m_uNeighbor3[0] = NULL_NEIGHBOR;
1089
tree->m_bHasEdgeLength1[0] = FALSE;
1090
tree->m_bHasEdgeLength2[0] = FALSE;
1091
tree->m_bHasEdgeLength3[0] = FALSE;
1093
tree->m_bHasHeight[0] = FALSE;
1096
tree->m_uRootNodeIndex = 0;
1097
tree->m_bRooted = TRUE;
1103
/*** end: TreeCreateRooted ***/
1110
UnrootFromFile(tree_t *tree)
1114
Log("Before unroot:\n");
1118
if (!tree->m_bRooted)
1119
Log(&rLog, LOG_FATAL, "Tree::Unroot, not rooted");
1121
/* Convention: root node is always node zero */
1122
assert(IsRoot(0, tree));
1123
assert(NULL_NEIGHBOR == tree->m_uNeighbor1[0]);
1125
uThirdNode = tree->m_uNodeCount++;
1127
tree->m_uNeighbor1[0] = uThirdNode;
1128
tree->m_uNeighbor1[uThirdNode] = 0;
1130
tree->m_uNeighbor2[uThirdNode] = NULL_NEIGHBOR;
1131
tree->m_uNeighbor3[uThirdNode] = NULL_NEIGHBOR;
1133
tree->m_dEdgeLength1[0] = 0;
1134
tree->m_dEdgeLength1[uThirdNode] = 0;
1135
tree->m_bHasEdgeLength1[uThirdNode] = TRUE;
1137
tree->m_bRooted = FALSE;
1140
Log("After unroot:\n");
1146
/*** end: UnrootFromFile ***/
1155
GetGroupFromFile(FILE *fp, uint uNodeIndex, double *ptrdEdgeLength, tree_t *tree)
1158
NEWICK_TOKEN_TYPE NTT = GetToken(fp, szToken, sizeof(szToken));
1163
/* Group is either leaf name or (left, right). */
1164
if (NTT_String == NTT) {
1165
SetLeafName(uNodeIndex, szToken, tree);
1167
Log(&rLog, LOG_FORCED_DEBUG, "Group is leaf '%s'", szToken);
1169
} else if (NTT_Lparen == NTT) {
1170
const unsigned uLeft = AppendBranch(tree, uNodeIndex);
1171
const unsigned uRight = uLeft + 1;
1173
bool bLeftLength = GetGroupFromFile(fp, uLeft, &dEdgeLength, tree);
1175
/* Left sub-group...
1178
Log(&rLog, LOG_FORCED_DEBUG, "%s", "Got '(', group is compound, expect left sub-group");
1181
Log(&rLog, LOG_FORCED_DEBUG, "Edge length for left sub-group: %.3g", dEdgeLength);
1183
Log(&rLog, LOG_FORCED_DEBUG, "%s", "No edge length for left sub-group");
1187
SetEdgeLength(uNodeIndex, uLeft, dEdgeLength, tree);
1189
/* ... then comma ...
1192
Log(&rLog, LOG_FORCED_DEBUG, "%s", "Expect comma");
1194
NTT = GetToken(fp, szToken, sizeof(szToken));
1195
if (NTT_Comma != NTT)
1196
Log(&rLog, LOG_FATAL, "Tree::GetGroupFromFile, expected ',', got '%s'", szToken);
1198
/* ...then right sub-group...
1201
Log(&rLog, LOG_FORCED_DEBUG, "%s", "Expect right sub-group");
1203
bRightLength = GetGroupFromFile(fp, uRight, &dEdgeLength, tree);
1205
SetEdgeLength(uNodeIndex, uRight, dEdgeLength, tree);
1209
Log(&rLog, LOG_FORCED_DEBUG, "Edge length for right sub-group: %.3g", dEdgeLength);
1211
Log(&rLog, LOG_FORCED_DEBUG, "%s", "No edge length for right sub-group");
1214
/* ... then closing parenthesis.
1217
Log(&rLog, LOG_FORCED_DEBUG, "%s", "Expect closing parenthesis (or comma if > 2-ary)");
1219
NTT = GetToken(fp, szToken, sizeof(szToken));
1220
if (NTT_Rparen == NTT)
1222
else if (NTT_Comma == NTT) {
1223
if (ungetc(',', fp)==EOF)
1224
Log(&rLog, LOG_FATAL, "%s" "ungetc failed");
1227
Log(&rLog, LOG_FATAL, "Tree::GetGroupFromFile, expected ')' or ',', got '%s'", szToken);
1229
Log(&rLog, LOG_FATAL, "Tree::GetGroupFromFile, expected '(' or leaf name, got '%s'",
1233
/* Group may optionally be followed by edge length.
1235
bEof = FileSkipWhiteX(fp);
1238
if ((c = fgetc(fp))==EOF) /* GetCharX */
1239
Log(&rLog, LOG_FATAL, "%s", "fgetc reached end of file");
1241
Log(&rLog, LOG_FORCED_DEBUG, "Character following group, could be colon, is '%c'", c);
1244
NTT = GetToken(fp, szToken, sizeof(szToken));
1245
if (NTT_String != NTT)
1246
Log(&rLog, LOG_FATAL, "Tree::GetGroupFromFile, expected edge length, got '%s'", szToken);
1247
*ptrdEdgeLength = atof(szToken);
1250
if (ungetc(c, fp)==EOF)
1251
Log(&rLog, LOG_FATAL, "%s" "ungetc failed");
1255
/*** end: GetGroupFromFile ***/
1264
FileSkipWhite(FILE *fp)
1266
bool bEof = FileSkipWhiteX(fp);
1268
Log(&rLog, LOG_FATAL, "%s", "End-of-file skipping white space");
1270
/*** end: FileSkipWhite ***/
1279
FileSkipWhiteX(FILE *fp)
1286
if ((c = fgetc(fp))==EOF) {
1295
if (ungetc(c, fp)==EOF)
1296
Log(&rLog, LOG_FATAL, "%s" "ungetc failed");
1302
/*** end: FileSkipWhiteX ***/
1311
GetToken(FILE *fp, char szToken[], uint uBytes)
1314
unsigned uBytesCopied = 0;
1315
NEWICK_TOKEN_TYPE TT;
1317
/* Skip leading white space */
1320
if ((c = fgetc(fp))==EOF) /* GetCharX */
1321
Log(&rLog, LOG_FATAL, "%s", "fgetc reached end of file");
1323
/* In case a single-character token */
1339
return NTT_Semicolon;
1345
TT = NTT_SingleQuotedString;
1346
if ((c = fgetc(fp))==EOF) /* GetCharX */
1347
Log(&rLog, LOG_FATAL, "%s", "fgetc reached end of file");
1351
TT = NTT_DoubleQuotedString;
1352
if ((c = fgetc(fp))==EOF) /* GetCharX */
1353
Log(&rLog, LOG_FATAL, "%s", "fgetc reached end of file");
1368
if (TT != NTT_Comment) {
1369
if (uBytesCopied < uBytes - 2) {
1370
szToken[uBytesCopied++] = c;
1371
szToken[uBytesCopied] = 0;
1373
Log(&rLog, LOG_FATAL, "Tree::GetToken: input buffer too small, token so far='%s'", szToken);
1376
c = fgetc(fp); /* GetChar */
1377
bEof = (c==EOF ? TRUE : FALSE);
1384
if (0 != strchr("():;,", c)) {
1385
if (ungetc(c, fp)==EOF)
1386
Log(&rLog, LOG_FATAL, "%s" "ungetc failed");
1393
case NTT_SingleQuotedString:
1398
case NTT_DoubleQuotedString:
1405
return GetToken(fp, szToken, uBytes);
1409
Log(&rLog, LOG_FATAL, "Tree::GetToken, invalid TT=%u", TT);
1413
/*** end: GetToken ***/
1421
SetLeafName(unsigned uNodeIndex, const char *ptrName, tree_t *tree)
1423
assert(uNodeIndex < tree->m_uNodeCount);
1424
assert(IsLeaf(uNodeIndex, tree));
1425
free(tree->m_ptrName[uNodeIndex]);
1426
/*LOG_DEBUG("Setting tree->m_ptrName[uNodeIndex=%d] to %s", uNodeIndex, ptrName);*/
1427
tree->m_ptrName[uNodeIndex] = CkStrdup(ptrName);
1429
/*** end: SetLeafName ***/
1436
* Append a new branch. This adds two new nodes and joins them to an
1437
* existing leaf node. Return value is k, new nodes have indexes k and
1442
AppendBranch(tree_t *tree, uint uExistingLeafIndex)
1448
if (0 == tree->m_uNodeCount) {
1449
Log(&rLog, LOG_FATAL, "%s(): %s", __FUNCTION__, "tree has not been created");
1451
assert(NULL_NEIGHBOR == tree->m_uNeighbor2[uExistingLeafIndex]);
1452
assert(NULL_NEIGHBOR == tree->m_uNeighbor3[uExistingLeafIndex]);
1453
assert(uExistingLeafIndex < tree->m_uNodeCount);
1455
if (!IsLeaf(uExistingLeafIndex, tree)) {
1456
Log(&rLog, LOG_FATAL, "AppendBranch(%u): not leaf", uExistingLeafIndex);
1460
if (tree->m_uNodeCount >= tree->m_uCacheCount - 2) {
1463
uNewLeaf1 = tree->m_uNodeCount;
1464
uNewLeaf2 = tree->m_uNodeCount + 1;
1466
tree->m_uNodeCount += 2;
1468
tree->m_uNeighbor2[uExistingLeafIndex] = uNewLeaf1;
1469
tree->m_uNeighbor3[uExistingLeafIndex] = uNewLeaf2;
1471
tree->m_uNeighbor1[uNewLeaf1] = uExistingLeafIndex;
1472
tree->m_uNeighbor1[uNewLeaf2] = uExistingLeafIndex;
1474
tree->m_uNeighbor2[uNewLeaf1] = NULL_NEIGHBOR;
1475
tree->m_uNeighbor2[uNewLeaf2] = NULL_NEIGHBOR;
1477
tree->m_uNeighbor3[uNewLeaf1] = NULL_NEIGHBOR;
1478
tree->m_uNeighbor3[uNewLeaf2] = NULL_NEIGHBOR;
1480
tree->m_dEdgeLength2[uExistingLeafIndex] = 0;
1481
tree->m_dEdgeLength3[uExistingLeafIndex] = 0;
1483
tree->m_dEdgeLength1[uNewLeaf1] = 0;
1484
tree->m_dEdgeLength2[uNewLeaf1] = 0;
1485
tree->m_dEdgeLength3[uNewLeaf1] = 0;
1487
tree->m_dEdgeLength1[uNewLeaf2] = 0;
1488
tree->m_dEdgeLength2[uNewLeaf2] = 0;
1489
tree->m_dEdgeLength3[uNewLeaf2] = 0;
1491
tree->m_bHasEdgeLength1[uNewLeaf1] = FALSE;
1492
tree->m_bHasEdgeLength2[uNewLeaf1] = FALSE;
1493
tree->m_bHasEdgeLength3[uNewLeaf1] = FALSE;
1495
tree->m_bHasEdgeLength1[uNewLeaf2] = FALSE;
1496
tree->m_bHasEdgeLength2[uNewLeaf2] = FALSE;
1497
tree->m_bHasEdgeLength3[uNewLeaf2] = FALSE;
1500
tree->m_bHasHeight[uNewLeaf1] = FALSE;
1501
tree->m_bHasHeight[uNewLeaf2] = FALSE;
1503
tree->m_Ids[uNewLeaf1] = uInsane;
1504
tree->m_Ids[uNewLeaf2] = uInsane;
1508
/*** end: AppendBranch ***/
1516
SetEdgeLength(uint uNodeIndex1, uint uNodeIndex2,
1517
double dLength, tree_t *tree)
1519
assert(uNodeIndex1 < tree->m_uNodeCount && uNodeIndex2 < tree->m_uNodeCount);
1520
assert(IsEdge(uNodeIndex1, uNodeIndex2, tree));
1522
if (tree->m_uNeighbor1[uNodeIndex1] == uNodeIndex2) {
1523
tree->m_dEdgeLength1[uNodeIndex1] = dLength;
1524
tree->m_bHasEdgeLength1[uNodeIndex1] = TRUE;
1525
} else if (tree->m_uNeighbor2[uNodeIndex1] == uNodeIndex2) {
1526
tree->m_dEdgeLength2[uNodeIndex1] = dLength;
1527
tree->m_bHasEdgeLength2[uNodeIndex1] = TRUE;
1530
assert(tree->m_uNeighbor3[uNodeIndex1] == uNodeIndex2);
1531
tree->m_dEdgeLength3[uNodeIndex1] = dLength;
1532
tree->m_bHasEdgeLength3[uNodeIndex1] = TRUE;
1535
if (tree->m_uNeighbor1[uNodeIndex2] == uNodeIndex1) {
1536
tree->m_dEdgeLength1[uNodeIndex2] = dLength;
1537
tree->m_bHasEdgeLength1[uNodeIndex2] = TRUE;
1538
} else if (tree->m_uNeighbor2[uNodeIndex2] == uNodeIndex1) {
1539
tree->m_dEdgeLength2[uNodeIndex2] = dLength;
1540
tree->m_bHasEdgeLength2[uNodeIndex2] = TRUE;
1542
assert(tree->m_uNeighbor3[uNodeIndex2] == uNodeIndex1);
1543
tree->m_dEdgeLength3[uNodeIndex2] = dLength;
1544
tree->m_bHasEdgeLength3[uNodeIndex2] = TRUE;
1547
/*** end: SetEdgeLength ***/
1559
LogTree(tree_t *tree, FILE *fp)
1565
fprintf(fp, "This is a tree with %u nodes, which is ", tree->m_uNodeCount);
1567
if (IsRooted(tree)) {
1568
fprintf(fp, "rooted:\n");
1569
fprintf(fp, "Index Parnt LengthP Left LengthL Right LengthR Id Name\n");
1570
fprintf(fp, "----- ----- ------- ---- ------- ----- ------- ----- ----\n");
1573
fprintf(fp, "unrooted;\n");
1574
fprintf(fp, "Index Nbr_1 Length1 Nbr_2 Length2 Nbr_3 Length3 Id Name\n");
1575
fprintf(fp, "----- ----- ------- ----- ------- ----- ------- ----- ----\n");
1578
for (uNodeIndex = 0; uNodeIndex < tree->m_uNodeCount; ++uNodeIndex) {
1579
fprintf(fp, "%5u ", uNodeIndex);
1580
n1 = tree->m_uNeighbor1[uNodeIndex];
1581
n2 = tree->m_uNeighbor2[uNodeIndex];
1582
n3 = tree->m_uNeighbor3[uNodeIndex];
1584
if (NULL_NEIGHBOR != n1) {
1585
fprintf(fp, "%5u ", n1);
1586
if (tree->m_bHasEdgeLength1[uNodeIndex])
1587
fprintf(fp, "%7.3g ", tree->m_dEdgeLength1[uNodeIndex]);
1594
if (NULL_NEIGHBOR != n2) {
1595
fprintf(fp, "%5u ", n2);
1596
if (tree->m_bHasEdgeLength2[uNodeIndex])
1597
fprintf(fp, "%7.3g ", tree->m_dEdgeLength2[uNodeIndex]);
1604
if (NULL_NEIGHBOR != n3) {
1605
fprintf(fp, "%5u ", n3);
1606
if (tree->m_bHasEdgeLength3[uNodeIndex])
1607
fprintf(fp, "%7.3g ", tree->m_dEdgeLength3[uNodeIndex]);
1614
if (tree->m_Ids != 0 && IsLeaf(uNodeIndex, tree)) {
1615
unsigned uId = tree->m_Ids[uNodeIndex];
1619
fprintf(fp, "%5u", uId);
1624
if (tree->m_bRooted && uNodeIndex == tree->m_uRootNodeIndex)
1625
fprintf(fp, " [ROOT] ");
1626
ptrName = tree->m_ptrName[uNodeIndex];
1628
fprintf(fp, " %s", ptrName);
1633
/*** end: LogTree ***/
1639
* replaces m_uLeafCount
1642
GetLeafCount(tree_t *tree)
1646
return (tree->m_uNodeCount+1)/2;
1648
/*** GetLeafCount ***/
1656
GetNodeCount(tree_t *tree)
1660
return 2*(GetLeafCount(tree)) - 1;
1662
/*** end: GetNodeCount ***/
1669
GetNeighbor(uint uNodeIndex, uint uNeighborSubscript, tree_t *prTree)
1671
assert(uNodeIndex < prTree->m_uNodeCount);
1672
switch (uNeighborSubscript)
1675
return prTree->m_uNeighbor1[uNodeIndex];
1677
return prTree->m_uNeighbor2[uNodeIndex];
1679
return prTree->m_uNeighbor3[uNodeIndex];
1681
Log(&rLog, LOG_FATAL, "Internal error in %s: sub=%u", __FUNCTION__, uNeighborSubscript);
1682
return NULL_NEIGHBOR;
1684
/*** end: GetNeighbor ***/
1694
SetLeafId(tree_t *tree, uint uNodeIndex, uint uId)
1696
assert(uNodeIndex < tree->m_uNodeCount);
1697
assert(IsLeaf(uNodeIndex, tree));
1698
tree->m_Ids[uNodeIndex] = uId;
1700
/*** end: SetLeafId ***/
1707
GetRootNodeIndex(tree_t *tree)
1710
return tree->m_uRootNodeIndex;
1712
/*** end: GetRootNodeIndex ***/
1717
* @note avoid usage if you want to iterate over all indices, because
1722
LeafIndexToNodeIndex(uint uLeafIndex, tree_t *prTree) {
1723
uint uLeafCount = 0;
1724
unsigned uNodeCount = GetNodeCount(prTree);
1727
for (uNodeIndex = 0; uNodeIndex < uNodeCount; uNodeIndex++) {
1728
if (IsLeaf(uNodeIndex, prTree)) {
1729
if (uLeafCount == uLeafIndex) {
1736
Log(&rLog, LOG_FATAL, "Internal error: node index out of range");
1739
/*** end: LeafIndexToNodeIndex ***/
1745
* @brief Append a (source) tree to a (dest) tree to a given node
1746
* which will be replaced. All other nodes in that tree will stay the
1749
* @param[out] prDstTree
1750
* The tree to append to
1751
* @param[in] uDstTreeReplaceNodeIndex
1752
* Dest tree node to which source tree will be appended
1753
* @param[in] prSrcTree
1754
* The tree to append
1756
* @note No nodes inside prDstTree will change except
1757
* uDstTreeReplaceNodeIndex
1760
* @warning: Function won't check or touch the m_Ids/leaf-indices!
1761
* That means if you want to join two trees with leaf indices 1-10 and
1762
* 1-10 your m_Ids/leaf-indices won't be unique anymore and the
1763
* association between your sequences and the tree are broken. Make
1764
* sure m_Ids are unique before calling me.
1766
* The easiest would have been to do this by recursively calling
1767
* AppendBranch() (after adding uSrcTreeNodeIndex as extra argument to
1768
* this function). But recursion is evil. Yet another version would be
1769
* to setup all the data and call MuscleTreeCreate() to create a third
1770
* tree, which seems complicated and wasteful. The approach taken here
1771
* is the following: increase dest tree memory, copy over each src
1772
* tree node data and update the indices and counters. This is tricky
1773
* and has a lot of potential for bugs if tree interface is changed
1774
* (and even if it isn't).
1778
AppendTree(tree_t *prDstTree, uint uDstTreeReplaceNodeIndex, tree_t *prSrcTree)
1780
uint uSrcTreeNodeIndex;
1781
uint uOrgDstTreeNodeCount;
1783
assert(NULL!=prDstTree);
1784
assert(NULL!=prSrcTree);
1785
assert(uDstTreeReplaceNodeIndex < prDstTree->m_uNodeCount);
1786
assert(IsLeaf(uDstTreeReplaceNodeIndex, prDstTree));
1787
assert(IsRooted(prDstTree) && IsRooted(prSrcTree));
1790
uOrgDstTreeNodeCount = prDstTree->m_uNodeCount;
1793
/* increase dest tree memory
1795
while (prDstTree->m_uCacheCount
1797
(GetNodeCount(prDstTree) + GetNodeCount(prSrcTree))) {
1798
ExpandCache(prDstTree);
1802
/* copy all src tree nodes
1805
for (uSrcTreeNodeIndex=0;
1806
uSrcTreeNodeIndex<GetNodeCount(prSrcTree); uSrcTreeNodeIndex++) {
1807
uint uNewDstNodeIndex = prDstTree->m_uNodeCount;
1809
/* distinguish 4 cases for src nodes to copy:
1811
* 1. src node is the only node, i.e. root & leaf
1813
* 2. src node is root: set only left & right, but not parent
1814
* and just replace the given dest index
1816
* 3. src node is leaf: set only parent
1818
* 4. src node is internal node: update all three neighbours
1820
* FIXME: this is messy. Is there a cleaner way to do this by
1821
* merging all cases?
1824
if (IsRoot(uSrcTreeNodeIndex, prSrcTree) && IsLeaf(uSrcTreeNodeIndex, prSrcTree)) {
1825
/* special case: if this is the only member in
1826
* tree, i.e. it's root and leaf. Copy leaf name and leaf
1827
* id. No neighbours to update
1830
/* free dst node name if set */
1831
if (NULL != prDstTree->m_ptrName[uDstTreeReplaceNodeIndex]) {
1832
CKFREE(prDstTree->m_ptrName[uDstTreeReplaceNodeIndex]);
1835
prDstTree->m_ptrName[uDstTreeReplaceNodeIndex] =
1836
CkStrdup(GetLeafName(uSrcTreeNodeIndex, prSrcTree));
1838
prDstTree->m_Ids[uDstTreeReplaceNodeIndex] =
1839
prSrcTree->m_Ids[uSrcTreeNodeIndex];
1841
/* no updated of uNodeCount, because we used the replace node */
1844
Log(&rLog, LOG_FORCED_DEBUG, "Updated dst rpl node %d with the only src leaf node: parent=%d (%f)",
1845
uDstTreeReplaceNodeIndex,
1846
prDstTree->m_uNeighbor1[uDstTreeReplaceNodeIndex], prDstTree->m_dEdgeLength1[uDstTreeReplaceNodeIndex]);
1849
} else if (IsRoot(uSrcTreeNodeIndex, prSrcTree)) {
1850
/* src node is root: replace uDstTreeReplaceNodeIndex
1851
* (not uNewDstNodeIndex) with src root, i.e. the
1852
* uDstTreeReplaceNodeIndex becomes an internal node now.
1854
* We only have two neighbours 2 & 3 (no parent). Keep old
1855
* parent info (neighbor 1).
1858
/* free dst node name if set */
1859
if (NULL != prDstTree->m_ptrName[uDstTreeReplaceNodeIndex]) {
1860
CKFREE(prDstTree->m_ptrName[uDstTreeReplaceNodeIndex]);
1863
prDstTree->m_uNeighbor2[uDstTreeReplaceNodeIndex] =
1864
prSrcTree->m_uNeighbor2[uSrcTreeNodeIndex] + uOrgDstTreeNodeCount;
1865
prDstTree->m_uNeighbor3[uDstTreeReplaceNodeIndex] =
1866
prSrcTree->m_uNeighbor3[uSrcTreeNodeIndex] + uOrgDstTreeNodeCount;
1868
prDstTree->m_bHasEdgeLength2[uDstTreeReplaceNodeIndex] =
1869
prSrcTree->m_bHasEdgeLength2[uSrcTreeNodeIndex];
1870
prDstTree->m_bHasEdgeLength3[uDstTreeReplaceNodeIndex] =
1871
prSrcTree->m_bHasEdgeLength3[uSrcTreeNodeIndex];
1873
prDstTree->m_dEdgeLength2[uDstTreeReplaceNodeIndex] =
1874
prSrcTree->m_dEdgeLength2[uSrcTreeNodeIndex];
1875
prDstTree->m_dEdgeLength3[uDstTreeReplaceNodeIndex] =
1876
prSrcTree->m_dEdgeLength3[uSrcTreeNodeIndex];
1878
/* make Id invalid */
1879
prDstTree->m_Ids[uDstTreeReplaceNodeIndex] = uInsane;
1881
/* no updated of uNodeCount, because we used the replace node */
1884
Log(&rLog, LOG_FORCED_DEBUG, "Updated dst rpl node %d with the src root node: (untouched) parent=%d (%f) left=%d (%f) right=%d (%f)",
1885
uDstTreeReplaceNodeIndex,
1886
prDstTree->m_uNeighbor1[uDstTreeReplaceNodeIndex], prDstTree->m_dEdgeLength1[uDstTreeReplaceNodeIndex],
1887
prDstTree->m_uNeighbor2[uDstTreeReplaceNodeIndex], prDstTree->m_dEdgeLength2[uDstTreeReplaceNodeIndex],
1888
prDstTree->m_uNeighbor3[uDstTreeReplaceNodeIndex], prDstTree->m_dEdgeLength3[uDstTreeReplaceNodeIndex]);
1891
} else if (IsLeaf(uSrcTreeNodeIndex, prSrcTree)) {
1892
/* src node is a leaf, which means we only have one
1893
* neighbour, and that is its parent, i.e. n1
1897
/* initialise/zero new node to default values
1899
InitNode(prDstTree, uNewDstNodeIndex);
1902
/* update m_ptrName/leaf name
1904
prDstTree->m_ptrName[uNewDstNodeIndex] =
1905
CkStrdup(GetLeafName(uSrcTreeNodeIndex, prSrcTree));
1907
/* update parent node (beware of special case: parent was
1909
if (IsRoot(prSrcTree->m_uNeighbor1[uSrcTreeNodeIndex], prSrcTree)) {
1910
prDstTree->m_uNeighbor1[uNewDstNodeIndex] =
1911
uDstTreeReplaceNodeIndex;
1913
prDstTree->m_uNeighbor1[uNewDstNodeIndex] =
1914
prSrcTree->m_uNeighbor1[uSrcTreeNodeIndex] + uOrgDstTreeNodeCount;
1917
/* update edge length info to parent
1919
prDstTree->m_bHasEdgeLength1[uNewDstNodeIndex] =
1920
prSrcTree->m_bHasEdgeLength1[uSrcTreeNodeIndex];
1921
prDstTree->m_dEdgeLength1[uNewDstNodeIndex] =
1922
prSrcTree->m_dEdgeLength1[uSrcTreeNodeIndex];
1924
/* update sequence/object id
1926
prDstTree->m_Ids[uNewDstNodeIndex] =
1927
prSrcTree->m_Ids[uSrcTreeNodeIndex];
1929
/* we used a new node so increase their count */
1930
prDstTree->m_uNodeCount += 1;
1933
Log(&rLog, LOG_FORCED_DEBUG, "Updated dst node %d with a src leaf node: parent=%d (%f)",
1935
prDstTree->m_uNeighbor1[uNewDstNodeIndex], prDstTree->m_dEdgeLength1[uNewDstNodeIndex]);
1939
/* src node is not root neither leaf, means we have an
1940
* internal node. Update all neighbour info
1944
/* initialise/zero node values to default values
1946
InitNode(prDstTree, uNewDstNodeIndex);
1950
/* parent: special case if parent was src tree root */
1951
if (IsRoot(prSrcTree->m_uNeighbor1[uSrcTreeNodeIndex], prSrcTree)) {
1952
prDstTree->m_uNeighbor1[uNewDstNodeIndex] =
1953
uDstTreeReplaceNodeIndex;
1955
prDstTree->m_uNeighbor1[uNewDstNodeIndex] =
1956
prSrcTree->m_uNeighbor1[uSrcTreeNodeIndex] + uOrgDstTreeNodeCount;
1959
prDstTree->m_uNeighbor2[uNewDstNodeIndex] =
1960
prSrcTree->m_uNeighbor2[uSrcTreeNodeIndex] + uOrgDstTreeNodeCount;
1962
prDstTree->m_uNeighbor3[uNewDstNodeIndex] =
1963
prSrcTree->m_uNeighbor3[uSrcTreeNodeIndex] + uOrgDstTreeNodeCount;
1965
/* update edge length info
1968
prDstTree->m_bHasEdgeLength1[uNewDstNodeIndex] =
1969
prSrcTree->m_bHasEdgeLength1[uSrcTreeNodeIndex];
1970
prDstTree->m_dEdgeLength1[uNewDstNodeIndex] =
1971
prSrcTree->m_dEdgeLength1[uSrcTreeNodeIndex];
1973
prDstTree->m_bHasEdgeLength2[uNewDstNodeIndex] =
1974
prSrcTree->m_bHasEdgeLength2[uSrcTreeNodeIndex];
1975
prDstTree->m_dEdgeLength2[uNewDstNodeIndex] =
1976
prSrcTree->m_dEdgeLength2[uSrcTreeNodeIndex];
1978
prDstTree->m_bHasEdgeLength3[uNewDstNodeIndex] =
1979
prSrcTree->m_bHasEdgeLength3[uSrcTreeNodeIndex];
1980
prDstTree->m_dEdgeLength3[uNewDstNodeIndex] =
1981
prSrcTree->m_dEdgeLength3[uSrcTreeNodeIndex];
1983
/* we used a new node so increase their count */
1984
prDstTree->m_uNodeCount += 1;
1987
Log(&rLog, LOG_FORCED_DEBUG, "Updated dst node %d with an internal src node: parent=%d (%f) left=%d (%f) right=%d (%f)",
1989
prDstTree->m_uNeighbor1[uNewDstNodeIndex], prDstTree->m_dEdgeLength1[uNewDstNodeIndex],
1990
prDstTree->m_uNeighbor2[uNewDstNodeIndex], prDstTree->m_dEdgeLength2[uNewDstNodeIndex],
1991
prDstTree->m_uNeighbor3[uNewDstNodeIndex], prDstTree->m_dEdgeLength3[uNewDstNodeIndex]);
1996
/* end for each src tree node */
2000
* m_uRootNodeIndex stays the same.
2002
* No need to touch m_uCacheCount.
2006
Log(&rLog, LOG_FATAL, "Internal error: Height usage not implemented in %s", __FUNCTION__);
2011
TreeValidate(prDstTree);
2016
/*** end: AppendTree() ***/