~ubuntu-branches/ubuntu/quantal/clustalo/quantal

« back to all changes in this revision

Viewing changes to src/clustal/muscle_tree.c

  • Committer: Package Import Robot
  • Author(s): Olivier Sallou
  • Date: 2011-12-14 11:21:56 UTC
  • Revision ID: package-import@ubuntu.com-20111214112156-y3chwm0t4yn2nevm
Tags: upstream-1.0.3
ImportĀ upstreamĀ versionĀ 1.0.3

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/* -*- mode: c; tab-width: 4; c-basic-offset: 4;  indent-tabs-mode: nil -*- */
 
2
 
 
3
/* This a mix of tree functions and data-structures from
 
4
 * Bob Edgar's Muscle (version 3.7) ported to pure C, topped up with
 
5
 * some of our own stuff.
 
6
 *
 
7
 * Used files: phy.cpp, tree.h, phytofile.cpp and phyfromclust.cpp
 
8
 *
 
9
 * Muscle's code is public domain and so is this code here.
 
10
 
 
11
 * From http://www.drive5.com/muscle/license.htm:
 
12
 * """
 
13
 * MUSCLE is public domain software
 
14
 *
 
15
 * The MUSCLE software, including object and source code and
 
16
 * documentation, is hereby donated to the public domain.
 
17
 *
 
18
 * Disclaimer of warranty
 
19
 * THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND,
 
20
 * EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION IMPLIED
 
21
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 
22
 * """
 
23
 *
 
24
 */
 
25
 
 
26
/*
 
27
 *  RCS $Id: muscle_tree.c 230 2011-04-09 15:37:50Z andreas $
 
28
 */
 
29
 
 
30
#include <stdlib.h>
 
31
#include <stdio.h>
 
32
#include <string.h>
 
33
#include <limits.h>
 
34
#include <assert.h>
 
35
#include <ctype.h>
 
36
 
 
37
#include "util.h"
 
38
#include "log.h"
 
39
#include "muscle_tree.h"
 
40
 
 
41
 
 
42
static const double VERY_NEGATIVE_DOUBLE = -9e29;
 
43
/*const double dInsane = VERY_NEGATIVE_DOUBLE;*/
 
44
static const double dInsane = -9e29;
 
45
static const unsigned uInsane = 8888888;
 
46
 
 
47
typedef enum 
 
48
{
 
49
    NTT_Unknown,
 
50
 
 
51
    /* Returned from Tree::GetToken: */
 
52
    NTT_Lparen,
 
53
    NTT_Rparen,
 
54
    NTT_Colon,
 
55
    NTT_Comma,
 
56
    NTT_Semicolon,
 
57
    NTT_String,
 
58
    
 
59
    /* Following are never returned from Tree::GetToken: */
 
60
    NTT_SingleQuotedString,
 
61
    NTT_DoubleQuotedString,
 
62
    NTT_Comment
 
63
} NEWICK_TOKEN_TYPE;
 
64
 
 
65
 
 
66
static void
 
67
InitCache(uint uCacheCount, tree_t *tree);
 
68
static void
 
69
TreeZero(tree_t *tree);
 
70
static uint
 
71
GetNeighborCount(unsigned uNodeIndex, tree_t *tree);
 
72
static bool
 
73
IsEdge(unsigned uNodeIndex1, unsigned uNodeIndex2, tree_t *tree);
 
74
static bool
 
75
HasEdgeLength(uint uNodeIndex1, uint uNodeIndex2, tree_t *tree);
 
76
static void
 
77
TreeToFileNodeRooted(tree_t *tree, uint m_uRootNodeIndex, FILE *fp);
 
78
static void
 
79
ValidateNode(uint uNodeIndex, tree_t *tree);
 
80
static void
 
81
AssertAreNeighbors(unsigned uNodeIndex1, unsigned uNodeIndex2, tree_t *tree);
 
82
static void
 
83
ExpandCache(tree_t *tree);
 
84
static void
 
85
TreeCreateRooted(tree_t *tree);
 
86
static bool
 
87
GetGroupFromFile(FILE *fp, uint uNodeIndex, double *ptrdEdgeLength, tree_t *tree);
 
88
static NEWICK_TOKEN_TYPE
 
89
GetToken(FILE *fp, char szToken[], uint uBytes);
 
90
/* stuff from textfile.cpp */
 
91
static void
 
92
FileSkipWhite(FILE *fp);
 
93
static bool
 
94
FileSkipWhiteX(FILE *fp);
 
95
static void
 
96
SetLeafName(uint uNodeIndex, const char *ptrName, tree_t *tree);
 
97
uint
 
98
AppendBranch(tree_t *tree, uint uExistingLeafIndex);
 
99
static void
 
100
SetEdgeLength(uint uNodeIndex1, uint uNodeIndex2,
 
101
              double dLength, tree_t *tree);
 
102
static uint
 
103
UnrootFromFile(tree_t *tree);
 
104
uint
 
105
GetNeighbor(uint uNodeIndex, uint uNeighborSubscript, tree_t *tree);
 
106
static void
 
107
InitNode(tree_t *prTree, uint uNodeIndex);
 
108
 
 
109
 
 
110
/**
 
111
 * @param[in] uNodeIndex
 
112
 * node to examine
 
113
 * @param[in] tree
 
114
 * tree to examine
 
115
 * @return id of left node
 
116
 * @note called GetRight in Muscle3.7
 
117
 */
 
118
uint
 
119
GetLeft(uint uNodeIndex, tree_t *tree) 
 
120
{
 
121
    assert(NULL != tree);
 
122
    assert(tree->m_bRooted && uNodeIndex < tree->m_uNodeCount);
 
123
    return tree->m_uNeighbor2[uNodeIndex];
 
124
}
 
125
/***   end: GetLeft   ***/
 
126
 
 
127
 
 
128
 
 
129
/**
 
130
 * @param[in] uNodeIndex
 
131
 * node to examine
 
132
 * @param[in] tree
 
133
 * tree to examine
 
134
 * @return id of right node
 
135
 * @note called GetRight in Muscle3.7
 
136
 */
 
137
uint
 
138
GetRight(uint uNodeIndex, tree_t *tree)
 
139
{
 
140
    assert(NULL != tree);
 
141
    assert(tree->m_bRooted && uNodeIndex < tree->m_uNodeCount);
 
142
    return tree->m_uNeighbor3[uNodeIndex];
 
143
}
 
144
/***   end: GetRight   ***/
 
145
 
 
146
 
 
147
 
 
148
/**
 
149
 * @param[in] uNodeIndex
 
150
 * node to examine
 
151
 * @param[in] tree
 
152
 * tree to examine
 
153
 * @return leaf id of current node
 
154
 */
 
155
uint
 
156
GetLeafId(uint uNodeIndex, tree_t *tree)
 
157
{
 
158
    assert(NULL != tree);
 
159
    assert(uNodeIndex < tree->m_uNodeCount);
 
160
    assert(IsLeaf(uNodeIndex, tree));
 
161
    
 
162
    return tree->m_Ids[uNodeIndex];
 
163
}
 
164
/***   end: GetLeafId   ***/
 
165
 
 
166
 
 
167
 
 
168
/**
 
169
 * @note originally called GetLeafName
 
170
 *
 
171
 */
 
172
char *
 
173
GetLeafName(unsigned uNodeIndex, tree_t *tree)
 
174
{
 
175
    assert(NULL != tree);
 
176
    assert(uNodeIndex < tree->m_uNodeCount);
 
177
    assert(IsLeaf(uNodeIndex, tree));
 
178
    return tree->m_ptrName[uNodeIndex];
 
179
}
 
180
/***   end: GetLeafName   ***/
 
181
 
 
182
 
 
183
 
 
184
/**
 
185
 * @brief returns first leaf node for a depth-first traversal of tree
 
186
 *
 
187
 * @param[in] tree
 
188
 * tree to traverse
 
189
 *
 
190
 * @return node index of first leaf node for depth-first traversal
 
191
 *
 
192
 * @note called FirstDepthFirstNode in Muscle3.7
 
193
 *
 
194
 */
 
195
uint
 
196
FirstDepthFirstNode(tree_t *tree)
 
197
{
 
198
    uint uNodeIndex;
 
199
        
 
200
    assert(NULL != tree);
 
201
    assert(IsRooted(tree));    
 
202
    
 
203
    /* Descend via left branches until we hit a leaf */
 
204
    uNodeIndex =  tree->m_uRootNodeIndex;
 
205
    while (!IsLeaf(uNodeIndex, tree))
 
206
        uNodeIndex = GetLeft(uNodeIndex, tree);
 
207
    return uNodeIndex;
 
208
}
 
209
/***   end: FirstDepthFirstNode   ***/
 
210
 
 
211
 
 
212
/**
 
213
 * @brief returns next leaf node index for depth-first traversal of
 
214
 * tree
 
215
 *
 
216
 * @param[in] tree
 
217
 * tree to traverse
 
218
 * @param[in] uNodeIndex
 
219
 * Current node index
 
220
 *
 
221
 * @return node index of next leaf node during depth-first traversal
 
222
 *
 
223
 * @note called NextDepthFirstNode in Muscle3.7
 
224
 */
 
225
uint
 
226
NextDepthFirstNode(uint uNodeIndex, tree_t *tree)
 
227
{
 
228
    uint uParent;
 
229
    
 
230
    assert(NULL != tree);
 
231
    assert(IsRooted(tree));
 
232
    assert(uNodeIndex < tree->m_uNodeCount);
 
233
    
 
234
    if (IsRoot(uNodeIndex, tree))
 
235
    {
 
236
        /* Node uNodeIndex is root, end of traversal */
 
237
        return NULL_NEIGHBOR;
 
238
    }
 
239
    
 
240
    uParent = GetParent(uNodeIndex, tree);
 
241
    if (GetRight(uParent, tree) == uNodeIndex) {
 
242
        /* Is right branch, return parent=uParent */
 
243
        return uParent;
 
244
    }
 
245
    
 
246
    uNodeIndex = GetRight(uParent, tree);
 
247
    /* Descend left from right sibling uNodeIndex */
 
248
    while (!IsLeaf(uNodeIndex, tree)) {
 
249
        uNodeIndex = GetLeft(uNodeIndex, tree);
 
250
    }
 
251
    
 
252
    /*  bottom out at leaf uNodeIndex */
 
253
    return uNodeIndex;
 
254
}
 
255
/***   end: NextDepthFirstNode   ***/
 
256
 
 
257
 
 
258
 
 
259
/**
 
260
 * @brief check if tree is a rooted tree
 
261
 * @param[in] tree
 
262
 * tree to check
 
263
 * @return TRUE if given tree is rooted, FALSE otherwise
 
264
 *
 
265
 */
 
266
bool
 
267
IsRooted(tree_t *tree) 
 
268
{
 
269
    assert(NULL != tree);
 
270
    return tree->m_bRooted;
 
271
}
 
272
/***   end: IsRooted   ***/
 
273
 
 
274
 
 
275
/**
 
276
 *
 
277
 */
 
278
void
 
279
FreeMuscleTree(tree_t *tree)
 
280
{
 
281
    uint i;
 
282
 
 
283
    assert(tree!=NULL);
 
284
    
 
285
 
 
286
    /* FIXME use GetLeafNodeIndex? or
 
287
       for (unsigned uNodeIndex = 0; uNodeIndex < m_uNodeCount; ++uNodeIndex)
 
288
       {
 
289
       if (tree.IsLeaf(uNodeIndex))
 
290
       {
 
291
       const char *ptrName =
 
292
       tree.GetLeafName(uNodeIndex);
 
293
    */
 
294
    /* IsLeaf needs m_uNodeCount and all m_uNeighbor's
 
295
     * so free first
 
296
     */
 
297
    for (i=0; i<tree->m_uNodeCount; i++) {
 
298
        /* IsLeaf needs neighbour count, so free those guys later */
 
299
        if (IsLeaf(i, tree)) {
 
300
            CKFREE(tree->m_ptrName[i]);
 
301
        }
 
302
    }
 
303
    CKFREE(tree->m_ptrName);
 
304
 
 
305
    CKFREE(tree->m_uNeighbor1);
 
306
    CKFREE(tree->m_uNeighbor2);
 
307
    CKFREE(tree->m_uNeighbor3);
 
308
    
 
309
    CKFREE(tree->m_Ids);
 
310
    
 
311
    CKFREE(tree->m_dEdgeLength1);
 
312
    CKFREE(tree->m_dEdgeLength2);
 
313
    CKFREE(tree->m_dEdgeLength3);
 
314
#if USE_HEIGHT
 
315
    CKFREE(tree->m_dHeight);
 
316
    CKFREE(tree->m_bHasHeight);
 
317
#endif 
 
318
    CKFREE(tree->m_bHasEdgeLength1);
 
319
    CKFREE(tree->m_bHasEdgeLength2);
 
320
    CKFREE(tree->m_bHasEdgeLength3);
 
321
    
 
322
    TreeZero(tree);
 
323
 
 
324
    free(tree);
 
325
}
 
326
/***   end: FreeMuscleTree   ***/
 
327
 
 
328
 
 
329
 
 
330
/**
 
331
 * @brief create a muscle tree
 
332
 *
 
333
 * @note Original comment in Muscle code: "Create rooted tree from a
 
334
 * vector description. Node indexes are 0..N-1 for leaves, N..2N-2 for
 
335
 * internal nodes. Vector subscripts are i-N and have values for
 
336
 * internal nodes only, but those values are node indexes 0..2N-2. So
 
337
 * e.g. if N=6 and Left[2]=1, this means that the third internal node
 
338
 * (node index 8) has the second leaf (node index 1) as its left
 
339
 * child. uRoot gives the vector subscript of the root, so add N to
 
340
 * get the node index."
 
341
 *
 
342
 * @param[out] tree
 
343
 * newly created tree
 
344
 * @param[in] uLeafCount
 
345
 * number of leaf nodes
 
346
 * @param[in] uRoot
 
347
 * Internal node index of root node
 
348
 * @param[in] Left
 
349
 * Node index of left sibling of an internal node.
 
350
 * Index range: 0--uLeafCount-1
 
351
 * @param[in] Right
 
352
 * Node index of right sibling of an internal node.
 
353
 * Index range: 0--uLeafCount-1
 
354
 * @param[in] LeftLength
 
355
 * Branch length of left branch of an internal node.
 
356
 * Index range: 0--uLeafCount-1
 
357
 * @param[in] RightLength
 
358
 * Branch length of right branch of an internal node.
 
359
 * Index range: 0--uLeafCount-1
 
360
 * @param[in] LeafIds
 
361
 * Leaf id. Index range: 0--uLeafCount
 
362
 * @param[in] LeafNames
 
363
 * Leaf label. Index range: 0--uLeafCount
 
364
 *
 
365
 */
 
366
void
 
367
MuscleTreeCreate(tree_t *tree,
 
368
                  uint uLeafCount, uint uRoot,
 
369
                  const uint *Left, const uint *Right,
 
370
                  const float *LeftLength, const float* RightLength,
 
371
                  const uint *LeafIds, char **LeafNames)
 
372
{
 
373
    uint uNodeIndex;
 
374
 
 
375
        TreeZero(tree);
 
376
    tree->m_uNodeCount = 2*uLeafCount - 1;
 
377
        InitCache(tree->m_uNodeCount, tree);
 
378
    
 
379
        for (uNodeIndex = 0; uNodeIndex < uLeafCount; ++uNodeIndex) {
 
380
                tree->m_Ids[uNodeIndex] = LeafIds[uNodeIndex];
 
381
                tree->m_ptrName[uNodeIndex] = CkStrdup(LeafNames[uNodeIndex]);
 
382
    }
 
383
 
 
384
        for (uNodeIndex = uLeafCount; uNodeIndex < tree->m_uNodeCount; ++uNodeIndex) {
 
385
                uint v = uNodeIndex - uLeafCount;
 
386
                uint uLeft = Left[v];
 
387
                uint uRight = Right[v];
 
388
                float fLeft = LeftLength[v];
 
389
                float fRight = RightLength[v];
 
390
        
 
391
                tree->m_uNeighbor2[uNodeIndex] = uLeft;
 
392
                tree->m_uNeighbor3[uNodeIndex] = uRight;
 
393
        
 
394
                tree->m_bHasEdgeLength2[uNodeIndex] = TRUE;
 
395
                tree->m_bHasEdgeLength3[uNodeIndex] = TRUE;
 
396
 
 
397
                tree->m_dEdgeLength2[uNodeIndex] = fLeft;
 
398
                tree->m_dEdgeLength3[uNodeIndex] = fRight;
 
399
 
 
400
                tree->m_uNeighbor1[uLeft] = uNodeIndex;
 
401
                tree->m_uNeighbor1[uRight] = uNodeIndex;
 
402
 
 
403
                tree->m_dEdgeLength1[uLeft] = fLeft;
 
404
                tree->m_dEdgeLength1[uRight] = fRight;
 
405
 
 
406
                tree->m_bHasEdgeLength1[uLeft] = TRUE;
 
407
                tree->m_bHasEdgeLength1[uRight] = TRUE;
 
408
    }
 
409
 
 
410
        tree->m_bRooted = TRUE;
 
411
        tree->m_uRootNodeIndex = uRoot + uLeafCount;
 
412
#ifndef NDEBUG
 
413
        TreeValidate(tree);
 
414
#endif
 
415
}
 
416
/***   end: MuscleTreeCreate   ***/
 
417
 
 
418
 
 
419
 
 
420
/**
 
421
 * @param[in] tree
 
422
 * tree to write
 
423
 * @param[out] fp
 
424
 * filepointer to write to2
 
425
 * 
 
426
 * @brief write a muscle tree to a file in newick format (distances
 
427
 * and all names)
 
428
 *
 
429
 * 
 
430
 */
 
431
void
 
432
MuscleTreeToFile(FILE *fp, tree_t *tree)
 
433
{
 
434
    assert(NULL != tree);
 
435
    if (IsRooted(tree)) {
 
436
        TreeToFileNodeRooted(tree, tree->m_uRootNodeIndex, fp);
 
437
        fprintf(fp, ";\n");
 
438
        return;
 
439
    } else {
 
440
        Log(&rLog, LOG_FATAL, "FIXME: output of unrooted muscle trees not implemented");
 
441
    }
 
442
}
 
443
/***   end: MuscleTreeToFile   ***/
 
444
 
 
445
 
 
446
 
 
447
/**
 
448
 * @brief check if given node is a leaf node
 
449
 *
 
450
 * @param[in] uNodeIndex
 
451
 * the node index
 
452
 * @param tree
 
453
 * the tree
 
454
 *
 
455
 * @return TRUE if given node is a leaf, FALSE otherwise
 
456
 *
 
457
 * @note called IsLeaf in Muscle3.7. See tree.h in original code
 
458
 */
 
459
bool
 
460
IsLeaf(uint uNodeIndex, tree_t *tree)
 
461
{
 
462
    assert(NULL != tree);
 
463
    assert(uNodeIndex < tree->m_uNodeCount);
 
464
    if (1 == tree->m_uNodeCount)
 
465
        return TRUE;
 
466
    return 1 == GetNeighborCount(uNodeIndex, tree);
 
467
}
 
468
/***   end: IsLeaf   ***/
 
469
 
 
470
 
 
471
 
 
472
/**
 
473
 */
 
474
bool
 
475
IsRoot(uint uNodeIndex, tree_t *tree)
 
476
{
 
477
    assert(NULL != tree);
 
478
    return IsRooted(tree) && tree->m_uRootNodeIndex == uNodeIndex;
 
479
}
 
480
/***   end: IsRoot   ***/
 
481
 
 
482
 
 
483
 
 
484
/**
 
485
 */
 
486
uint
 
487
GetNeighborCount(uint uNodeIndex, tree_t *tree)
 
488
{
 
489
    uint n1, n2, n3;
 
490
    assert(uNodeIndex < tree->m_uNodeCount);
 
491
    assert(NULL != tree);
 
492
    assert(NULL != tree->m_uNeighbor1);
 
493
    assert(NULL != tree->m_uNeighbor2);
 
494
    assert(NULL != tree->m_uNeighbor3);
 
495
    n1 = tree->m_uNeighbor1[uNodeIndex];
 
496
    n2 = tree->m_uNeighbor2[uNodeIndex];
 
497
    n3 = tree->m_uNeighbor3[uNodeIndex];
 
498
    return (NULL_NEIGHBOR != n1) + (NULL_NEIGHBOR != n2) + (NULL_NEIGHBOR != n3);
 
499
}
 
500
/***   end: GetNeighborCount   ***/
 
501
 
 
502
 
 
503
/**
 
504
 */
 
505
uint
 
506
GetParent(unsigned uNodeIndex, tree_t *tree)
 
507
{
 
508
    assert(NULL != tree);
 
509
    assert(tree->m_bRooted && uNodeIndex < tree->m_uNodeCount);
 
510
    return tree->m_uNeighbor1[uNodeIndex];
 
511
}
 
512
/***   end: GetParent   ***/
 
513
 
 
514
 
 
515
 
 
516
/**
 
517
 */
 
518
bool
 
519
IsEdge(unsigned uNodeIndex1, unsigned uNodeIndex2, tree_t *tree)
 
520
{
 
521
    assert(uNodeIndex1 < tree->m_uNodeCount && uNodeIndex2 < tree->m_uNodeCount);
 
522
    assert(NULL != tree);
 
523
    
 
524
    return tree->m_uNeighbor1[uNodeIndex1] == uNodeIndex2 ||
 
525
        tree->m_uNeighbor2[uNodeIndex1] == uNodeIndex2 ||
 
526
        tree->m_uNeighbor3[uNodeIndex1] == uNodeIndex2;
 
527
}
 
528
/***   end: IsEdge   ***/
 
529
 
 
530
 
 
531
 
 
532
/**
 
533
 */
 
534
bool
 
535
HasEdgeLength(uint uNodeIndex1, uint uNodeIndex2, tree_t *tree)
 
536
{
 
537
    assert(NULL != tree);
 
538
    assert(uNodeIndex1 < tree->m_uNodeCount);
 
539
    assert(uNodeIndex2 < tree->m_uNodeCount);
 
540
    assert(IsEdge(uNodeIndex1, uNodeIndex2, tree));
 
541
    
 
542
    if (tree->m_uNeighbor1[uNodeIndex1] == uNodeIndex2)
 
543
        return tree->m_bHasEdgeLength1[uNodeIndex1];
 
544
    else if (tree->m_uNeighbor2[uNodeIndex1] == uNodeIndex2)
 
545
        return tree->m_bHasEdgeLength2[uNodeIndex1];
 
546
    assert(tree->m_uNeighbor3[uNodeIndex1] == uNodeIndex2);
 
547
    return tree->m_bHasEdgeLength3[uNodeIndex1];
 
548
}
 
549
/***   end:   ***/
 
550
 
 
551
 
 
552
/**
 
553
 */
 
554
double
 
555
GetEdgeLength(uint uNodeIndex1, uint uNodeIndex2, tree_t *tree)
 
556
{
 
557
    assert(NULL != tree);
 
558
    assert(uNodeIndex1 < tree->m_uNodeCount && uNodeIndex2 < tree->m_uNodeCount);
 
559
    if (!HasEdgeLength(uNodeIndex1, uNodeIndex2, tree))
 
560
    {
 
561
        Log(&rLog, LOG_FATAL, "Missing edge length in tree %u-%u", uNodeIndex1, uNodeIndex2);
 
562
    }
 
563
    
 
564
    if (tree->m_uNeighbor1[uNodeIndex1] == uNodeIndex2)
 
565
        return tree->m_dEdgeLength1[uNodeIndex1];
 
566
    else if (tree->m_uNeighbor2[uNodeIndex1] == uNodeIndex2)
 
567
        return tree->m_dEdgeLength2[uNodeIndex1];
 
568
    assert(tree->m_uNeighbor3[uNodeIndex1] == uNodeIndex2);
 
569
    return tree->m_dEdgeLength3[uNodeIndex1];
 
570
}
 
571
/***   end: GetEdgeLength   ***/
 
572
 
 
573
 
 
574
/**
 
575
 *
 
576
 */
 
577
void
 
578
InitNode(tree_t *prTree, uint uNodeIndex)
 
579
{
 
580
    prTree->m_uNeighbor1[uNodeIndex] = NULL_NEIGHBOR;
 
581
    prTree->m_uNeighbor2[uNodeIndex] = NULL_NEIGHBOR;
 
582
    prTree->m_uNeighbor3[uNodeIndex] = NULL_NEIGHBOR;
 
583
    prTree->m_bHasEdgeLength1[uNodeIndex] = FALSE;
 
584
    prTree->m_bHasEdgeLength2[uNodeIndex] = FALSE;
 
585
    prTree->m_bHasEdgeLength3[uNodeIndex] = FALSE;
 
586
#if USE_HEIGHT
 
587
    prTree->m_bHasHeight[uNodeIndex] = FALSE;
 
588
    prTree->m_dHeight[uNodeIndex] = dInsane;
 
589
#endif
 
590
    prTree->m_dEdgeLength1[uNodeIndex] = dInsane;
 
591
    prTree->m_dEdgeLength2[uNodeIndex] = dInsane;
 
592
    prTree->m_dEdgeLength3[uNodeIndex] = dInsane;
 
593
    
 
594
    prTree->m_ptrName[uNodeIndex] = NULL;
 
595
    prTree->m_Ids[uNodeIndex] = uInsane;
 
596
}
 
597
/***   end: InitNode   ***/
 
598
 
 
599
 
 
600
/**
 
601
 *
 
602
 */
 
603
void
 
604
InitCache(uint uCacheCount, tree_t *tree)
 
605
{
 
606
    uint uNodeIndex;
 
607
    
 
608
    tree->m_uCacheCount = uCacheCount;
 
609
    
 
610
    tree->m_uNeighbor1 = (uint *) CKMALLOC(sizeof(uint) * tree->m_uCacheCount);
 
611
    tree->m_uNeighbor2 = (uint *) CKMALLOC(sizeof(uint) * tree->m_uCacheCount);
 
612
    tree->m_uNeighbor3 = (uint *) CKMALLOC(sizeof(uint) * tree->m_uCacheCount);
 
613
    
 
614
    tree->m_Ids = (uint *) CKMALLOC(sizeof(uint) * tree->m_uCacheCount);
 
615
    
 
616
    tree->m_dEdgeLength1 = (double *) CKMALLOC(sizeof(double) * tree->m_uCacheCount);
 
617
    tree->m_dEdgeLength2 = (double *) CKMALLOC(sizeof(double) * tree->m_uCacheCount);
 
618
    tree->m_dEdgeLength3 = (double *) CKMALLOC(sizeof(double) * tree->m_uCacheCount);
 
619
#if USE_HEIGHT
 
620
    tree->m_dHeight = (double *) CKMALLOC(sizeof(double) * tree->m_uCacheCount);
 
621
    tree->m_bHasHeight = (bool *) CKMALLOC(sizeof(bool) * tree->m_uCacheCount);
 
622
#endif    
 
623
    tree->m_bHasEdgeLength1 = (bool *) CKMALLOC(sizeof(bool) * tree->m_uCacheCount);
 
624
    tree->m_bHasEdgeLength2 = (bool *) CKMALLOC(sizeof(bool) * tree->m_uCacheCount);
 
625
    tree->m_bHasEdgeLength3 = (bool *) CKMALLOC(sizeof(bool) * tree->m_uCacheCount);
 
626
 
 
627
    tree->m_ptrName = (char **) CKMALLOC(sizeof(char *) * tree->m_uCacheCount);
 
628
    
 
629
    for (uNodeIndex = 0; uNodeIndex < tree->m_uNodeCount; ++uNodeIndex) {
 
630
        InitNode(tree, uNodeIndex);
 
631
    }
 
632
}
 
633
/***   end: InitCache   ***/
 
634
 
 
635
 
 
636
 
 
637
 
 
638
/**
 
639
 *
 
640
 * @note Replacing Tree::Clear but no freeing of memory! Just setting
 
641
 * everything to 0/NULL
 
642
 */
 
643
void
 
644
TreeZero(tree_t *tree)
 
645
{
 
646
    assert(NULL != tree);
 
647
    tree->m_uNodeCount = 0;
 
648
    tree->m_uCacheCount = 0;
 
649
 
 
650
    tree->m_uNeighbor1 = NULL;
 
651
    tree->m_uNeighbor2 = NULL;
 
652
    tree->m_uNeighbor3 = NULL;
 
653
    
 
654
    tree->m_dEdgeLength1 = NULL;
 
655
    tree->m_dEdgeLength2 = NULL;
 
656
    tree->m_dEdgeLength3 = NULL;
 
657
    
 
658
#if USE_HEIGHT
 
659
    tree->m_dHeight = NULL;
 
660
    tree->m_bHasHeight = NULL;
 
661
#endif
 
662
    tree->m_bHasEdgeLength1 = NULL;
 
663
    tree->m_bHasEdgeLength2 = NULL;
 
664
    tree->m_bHasEdgeLength3 = NULL;
 
665
 
 
666
    tree->m_ptrName = NULL;
 
667
    tree->m_Ids = NULL;
 
668
    
 
669
    tree->m_bRooted = FALSE;
 
670
    tree->m_uRootNodeIndex = 0;
 
671
}
 
672
/* end: TreeZero */
 
673
 
 
674
 
 
675
 
 
676
/**
 
677
 *
 
678
 */
 
679
void
 
680
TreeToFileNodeRooted(tree_t *tree, uint uNodeIndex, FILE *fp)
 
681
{
 
682
    bool bGroup;
 
683
    
 
684
    assert(IsRooted(tree));
 
685
    assert(NULL != tree);
 
686
    bGroup = !IsLeaf(uNodeIndex, tree) || IsRoot(uNodeIndex, tree);
 
687
    
 
688
    if (bGroup) 
 
689
        fprintf(fp, "(\n");
 
690
    
 
691
 
 
692
    if (IsLeaf(uNodeIndex, tree)) {
 
693
        /* File.PutString(GetName(uNodeIndex)); */
 
694
        fprintf(fp, "%s", tree->m_ptrName[uNodeIndex]);
 
695
    } else {
 
696
        TreeToFileNodeRooted(tree, GetLeft(uNodeIndex, tree), fp);
 
697
        fprintf(fp, ",\n");
 
698
        TreeToFileNodeRooted(tree, GetRight(uNodeIndex, tree), fp);
 
699
    }
 
700
 
 
701
    if (bGroup)
 
702
        fprintf(fp, ")");
 
703
 
 
704
    if (!IsRoot(uNodeIndex, tree)) {
 
705
        uint uParent = GetParent(uNodeIndex, tree);
 
706
        if (HasEdgeLength(uNodeIndex, uParent, tree))
 
707
            fprintf(fp, ":%g", GetEdgeLength(uNodeIndex, uParent, tree));
 
708
    }
 
709
    fprintf(fp, "\n");
 
710
}
 
711
/***   end: TreeToFileNodeRooted   ***/
 
712
 
 
713
 
 
714
/**
 
715
 *
 
716
 *
 
717
 */
 
718
void
 
719
TreeValidate(tree_t *tree)
 
720
{
 
721
    uint uNodeIndex;
 
722
    assert(NULL != tree);
 
723
    for (uNodeIndex = 0; uNodeIndex < tree->m_uNodeCount; ++uNodeIndex) {
 
724
        ValidateNode(uNodeIndex, tree);
 
725
    }
 
726
    /* FIXME: maybe set negative length to zero? What impact would
 
727
     * that have? */
 
728
}
 
729
/***   end TreeValidate   ***/
 
730
 
 
731
 
 
732
 
 
733
/**
 
734
 *
 
735
 *
 
736
 */
 
737
void
 
738
ValidateNode(uint uNodeIndex, tree_t *tree)
 
739
{
 
740
    uint uNeighborCount;
 
741
    uint n1, n2, n3;
 
742
    assert(NULL != tree);
 
743
    
 
744
    if (uNodeIndex >= tree->m_uNodeCount)
 
745
        Log(&rLog, LOG_FATAL, "ValidateNode(%u), %u nodes", uNodeIndex, tree->m_uNodeCount);
 
746
 
 
747
    uNeighborCount = GetNeighborCount(uNodeIndex, tree);
 
748
    
 
749
 
 
750
    if (2 == uNeighborCount) {
 
751
        if (!tree->m_bRooted) {
 
752
            Log(&rLog, LOG_FATAL, "Tree::ValidateNode: Node %u has two neighbors, tree is not rooted",
 
753
                  uNodeIndex);
 
754
        }
 
755
        if (uNodeIndex != tree->m_uRootNodeIndex) {
 
756
            Log(&rLog, LOG_FATAL, "Tree::ValidateNode: Node %u has two neighbors, but not root node=%u",
 
757
                  uNodeIndex, tree->m_uRootNodeIndex);
 
758
        }
 
759
    }
 
760
 
 
761
    n1 = tree->m_uNeighbor1[uNodeIndex];
 
762
    n2 = tree->m_uNeighbor2[uNodeIndex];
 
763
    n3 = tree->m_uNeighbor3[uNodeIndex];
 
764
    
 
765
    if (NULL_NEIGHBOR == n2 && NULL_NEIGHBOR != n3) {
 
766
        Log(&rLog, LOG_FATAL, "Tree::ValidateNode, n2=null, n3!=null", uNodeIndex);
 
767
    }
 
768
    if (NULL_NEIGHBOR == n3 && NULL_NEIGHBOR != n2) {
 
769
        Log(&rLog, LOG_FATAL, "Tree::ValidateNode, n3=null, n2!=null", uNodeIndex);
 
770
    }
 
771
    
 
772
    if (n1 != NULL_NEIGHBOR)
 
773
        AssertAreNeighbors(uNodeIndex, n1, tree);
 
774
    if (n2 != NULL_NEIGHBOR)
 
775
        AssertAreNeighbors(uNodeIndex, n2, tree);
 
776
    if (n3 != NULL_NEIGHBOR)
 
777
        AssertAreNeighbors(uNodeIndex, n3, tree);
 
778
 
 
779
 
 
780
    
 
781
    if (n1 != NULL_NEIGHBOR && (n1 == n2 || n1 == n3)) {
 
782
        Log(&rLog, LOG_FATAL, "Tree::ValidateNode, duplicate neighbors in node %u", uNodeIndex);
 
783
    }
 
784
    if (n2 != NULL_NEIGHBOR && (n2 == n1 || n2 == n3)) {
 
785
        Log(&rLog, LOG_FATAL, "Tree::ValidateNode, duplicate neighbors in node %u", uNodeIndex);
 
786
    }
 
787
    if (n3 != NULL_NEIGHBOR && (n3 == n1 || n3 == n2)) {
 
788
        Log(&rLog, LOG_FATAL, "Tree::ValidateNode, duplicate neighbors in node %u", uNodeIndex);
 
789
    }
 
790
 
 
791
 
 
792
    if (IsRooted(tree)) {
 
793
        if (NULL_NEIGHBOR == GetParent(uNodeIndex, tree)) {
 
794
            if (uNodeIndex != tree->m_uRootNodeIndex) {
 
795
                Log(&rLog, LOG_FATAL, "Tree::ValiateNode(%u), no parent", uNodeIndex);
 
796
            }
 
797
        } else if (GetLeft(GetParent(uNodeIndex, tree), tree) != uNodeIndex &&
 
798
                   GetRight(GetParent(uNodeIndex, tree), tree) != uNodeIndex) {
 
799
            Log(&rLog, LOG_FATAL, "Tree::ValidateNode(%u), parent / child mismatch", uNodeIndex);
 
800
        }
 
801
    }
 
802
}
 
803
/***   end: ValidateNode   ***/
 
804
 
 
805
 
 
806
/**
 
807
 *
 
808
 *
 
809
 */
 
810
void
 
811
AssertAreNeighbors(unsigned uNodeIndex1, unsigned uNodeIndex2, tree_t *tree)
 
812
{
 
813
    bool Has12, Has21;
 
814
    assert(NULL != tree);
 
815
 
 
816
    if (uNodeIndex1 >= tree->m_uNodeCount || uNodeIndex2 >= tree->m_uNodeCount)
 
817
        Log(&rLog, LOG_FATAL, "AssertAreNeighbors(%u,%u), are %u nodes",
 
818
              uNodeIndex1, uNodeIndex2, tree->m_uNodeCount);
 
819
 
 
820
    if (tree->m_uNeighbor1[uNodeIndex1] != uNodeIndex2 &&
 
821
        tree->m_uNeighbor2[uNodeIndex1] != uNodeIndex2 &&
 
822
        tree->m_uNeighbor3[uNodeIndex1] != uNodeIndex2) {
 
823
        Log(&rLog, LOG_FATAL, "AssertAreNeighbors(%u,%u) failed", uNodeIndex1, uNodeIndex2);
 
824
    }
 
825
 
 
826
    
 
827
    if (tree->m_uNeighbor1[uNodeIndex2] != uNodeIndex1 &&
 
828
        tree->m_uNeighbor2[uNodeIndex2] != uNodeIndex1 &&
 
829
        tree->m_uNeighbor3[uNodeIndex2] != uNodeIndex1) {
 
830
        Log(&rLog, LOG_FATAL, "AssertAreNeighbors(%u,%u) failed", uNodeIndex1, uNodeIndex2);
 
831
    }
 
832
 
 
833
 
 
834
    Has12 = HasEdgeLength(uNodeIndex1, uNodeIndex2, tree);
 
835
    Has21 = HasEdgeLength(uNodeIndex2, uNodeIndex1, tree);
 
836
    if (Has12 != Has21) {
 
837
        HasEdgeLength(uNodeIndex1, uNodeIndex2, tree);
 
838
        HasEdgeLength(uNodeIndex2, uNodeIndex1, tree);
 
839
        Log(&rLog, LOG_ERROR, "HasEdgeLength(%u, %u)=%c HasEdgeLength(%u, %u)=%c\n",
 
840
              uNodeIndex1,
 
841
              uNodeIndex2,
 
842
              Has12 ? 'T' : 'F',
 
843
              uNodeIndex2,
 
844
              uNodeIndex1,
 
845
              Has21 ? 'T' : 'F');
 
846
        Log(&rLog, LOG_FATAL, "Tree::AssertAreNeighbors, HasEdgeLength not symmetric");
 
847
    }
 
848
 
 
849
    
 
850
    if (Has12) {
 
851
        double d12 = GetEdgeLength(uNodeIndex1, uNodeIndex2, tree);
 
852
        double d21 = GetEdgeLength(uNodeIndex2, uNodeIndex1, tree);
 
853
        if (d12 != d21) {
 
854
            Log(&rLog, LOG_FATAL, "Tree::AssertAreNeighbors, Edge length disagrees %u-%u=%.3g, %u-%u=%.3g",
 
855
                  uNodeIndex1, uNodeIndex2, d12,
 
856
                  uNodeIndex2, uNodeIndex1, d21);
 
857
        }
 
858
    }
 
859
}
 
860
/***   end: AssertAreNeighbors   ***/
 
861
 
 
862
 
 
863
 
 
864
/**
 
865
 *
 
866
 * @note see phyfromfile.cpp in Muscle3.7. Tree has to be a pointer to
 
867
 * an already allocated tree structure.
 
868
 *
 
869
 * return non-Zero on failure
 
870
 *
 
871
 * leafids will not be set here (FIXME:CHECK if true)
 
872
 */
 
873
int
 
874
MuscleTreeFromFile(tree_t *tree, char *ftree)
 
875
{
 
876
    double dEdgeLength;
 
877
    bool bEdgeLength;
 
878
    char szToken[16];
 
879
    NEWICK_TOKEN_TYPE NTT;
 
880
    unsigned uThirdNode;
 
881
    FILE *fp = NULL;
 
882
 
 
883
    assert(tree!=NULL);
 
884
    assert(ftree!=NULL);
 
885
 
 
886
    if (NULL == (fp=fopen(ftree, "r"))) {
 
887
        Log(&rLog, LOG_ERROR, "Couldn't open tree-file '%s' for reading. Skipping", ftree);
 
888
        return -1;
 
889
    }
 
890
    
 
891
    /* Assume rooted.
 
892
     * If we discover that it is unrooted, will convert on the fly.
 
893
     */
 
894
    TreeCreateRooted(tree);
 
895
 
 
896
    bEdgeLength = GetGroupFromFile(fp, 0, &dEdgeLength, tree);
 
897
 
 
898
 
 
899
    /* Next token should be either ';' for rooted tree or ',' for
 
900
     * unrooted.
 
901
     */
 
902
    NTT = GetToken(fp, szToken, sizeof(szToken));
 
903
 
 
904
    /* If rooted, all done. */
 
905
    if (NTT_Semicolon == NTT) {
 
906
        if (bEdgeLength)
 
907
            Log(&rLog, LOG_WARN, " *** Warning *** edge length on root group in Newick file %s\n", ftree);
 
908
        TreeValidate(tree);
 
909
        fclose(fp);
 
910
        return 0;
 
911
    }
 
912
 
 
913
    if (NTT_Comma != NTT)
 
914
        Log(&rLog, LOG_FATAL, "Tree::FromFile, expected ';' or ',', got '%s'", szToken);
 
915
 
 
916
    uThirdNode = UnrootFromFile(tree);
 
917
    bEdgeLength = GetGroupFromFile(fp, uThirdNode, &dEdgeLength, tree);
 
918
    if (bEdgeLength)
 
919
        SetEdgeLength(0, uThirdNode, dEdgeLength, tree);
 
920
    TreeValidate(tree);
 
921
 
 
922
    fclose(fp);
 
923
    return 0;
 
924
}
 
925
/***   end MuscleTreeFromFile   ***/
 
926
 
 
927
 
 
928
 
 
929
/**
 
930
 *
 
931
 */
 
932
void
 
933
ExpandCache(tree_t *tree)
 
934
{
 
935
    const uint uNodeCount = 100;
 
936
    uint uNewCacheCount;
 
937
    uint *uNewNeighbor1, *uNewNeighbor2, *uNewNeighbor3;
 
938
    uint *uNewIds;
 
939
    double *dNewEdgeLength1, *dNewEdgeLength2, *dNewEdgeLength3;
 
940
#if USE_HEIGHT
 
941
    double *dNewHeight;
 
942
    bool *bNewHasHeight;
 
943
#endif
 
944
    bool *bNewHasEdgeLength1, *bNewHasEdgeLength2, *bNewHasEdgeLength3;
 
945
    char **ptrNewName;
 
946
 
 
947
    assert(NULL != tree);
 
948
    uNewCacheCount = tree->m_uCacheCount + uNodeCount;
 
949
    uNewNeighbor1 = (uint *) CKMALLOC(
 
950
        uNewCacheCount * sizeof(uint));
 
951
    uNewNeighbor2 = (uint *) CKMALLOC(
 
952
        uNewCacheCount * sizeof(uint));
 
953
    uNewNeighbor3 = (uint *) CKMALLOC(
 
954
        uNewCacheCount * sizeof(uint));
 
955
 
 
956
    uNewIds = (uint *) CKCALLOC(
 
957
        uNewCacheCount, sizeof(uint));
 
958
    
 
959
    dNewEdgeLength1 = (double *) CKMALLOC(
 
960
        uNewCacheCount * sizeof(double));
 
961
    dNewEdgeLength2 =  (double *) CKMALLOC(
 
962
        uNewCacheCount * sizeof(double));
 
963
    dNewEdgeLength3 = (double *) CKMALLOC(
 
964
        uNewCacheCount * sizeof(double));
 
965
#if USE_HEIGHT
 
966
    dNewHeight = (double *) CKMALLOC(
 
967
        uNewCacheCount * sizeof(double));
 
968
    bNewHasHeight = (bool *) CKMALLOC(
 
969
        uNewCacheCount * sizeof(bool));
 
970
#endif
 
971
     
 
972
    bNewHasEdgeLength1 = (bool *) CKMALLOC(
 
973
        uNewCacheCount * sizeof(bool));
 
974
    bNewHasEdgeLength2 = (bool *) CKMALLOC(
 
975
        uNewCacheCount * sizeof(bool));
 
976
    bNewHasEdgeLength3 = (bool *) CKMALLOC(
 
977
        uNewCacheCount * sizeof(bool));
 
978
    ptrNewName = (char **) CKCALLOC(uNewCacheCount, sizeof(char*));
 
979
 
 
980
    if (tree->m_uCacheCount > 0) {
 
981
        uint uUnsignedBytes, uEdgeBytes;
 
982
        uint uBoolBytes, uNameBytes;
 
983
 
 
984
        uUnsignedBytes = tree->m_uCacheCount*sizeof(uint);
 
985
        
 
986
        memcpy(uNewNeighbor1, tree->m_uNeighbor1, uUnsignedBytes);
 
987
        memcpy(uNewNeighbor2, tree->m_uNeighbor2, uUnsignedBytes);
 
988
        memcpy(uNewNeighbor3, tree->m_uNeighbor3, uUnsignedBytes);
 
989
 
 
990
        memcpy(uNewIds, tree->m_Ids, uUnsignedBytes);
 
991
 
 
992
        uEdgeBytes = tree->m_uCacheCount*sizeof(double);
 
993
        memcpy(dNewEdgeLength1, tree->m_dEdgeLength1, uEdgeBytes);
 
994
        memcpy(dNewEdgeLength2, tree->m_dEdgeLength2, uEdgeBytes);
 
995
        memcpy(dNewEdgeLength3, tree->m_dEdgeLength3, uEdgeBytes);
 
996
#if USE_HEIGHT
 
997
        memcpy(dNewHeight, tree->m_dHeight, uEdgeBytes);
 
998
#endif        
 
999
          
 
1000
        uBoolBytes = tree->m_uCacheCount*sizeof(bool);
 
1001
        memcpy(bNewHasEdgeLength1, tree->m_bHasEdgeLength1, uBoolBytes);
 
1002
        memcpy(bNewHasEdgeLength2, tree->m_bHasEdgeLength2, uBoolBytes);
 
1003
        memcpy(bNewHasEdgeLength3, tree->m_bHasEdgeLength3, uBoolBytes);
 
1004
#if USE_HEIGHT
 
1005
        memcpy(bNewHasHeight, tree->m_bHasHeight, uBoolBytes);
 
1006
#endif
 
1007
        uNameBytes = tree->m_uCacheCount*sizeof(char *);
 
1008
        memcpy(ptrNewName, tree->m_ptrName, uNameBytes);
 
1009
 
 
1010
        /* similiar to FreeMuscleTree
 
1011
         */
 
1012
        
 
1013
        /* IsLeaf needs m_uNodeCount and all m_uNeighbor's
 
1014
         * so free first
 
1015
         */
 
1016
#if 0
 
1017
        for (i=0; i<tree->m_uNodeCount; i++) {
 
1018
            if (IsLeaf(i, tree)) {
 
1019
#ifndef NDEBUG
 
1020
                if (NULL==tree->m_ptrName[i]) {
 
1021
                    Log(&rLog, LOG_WARN, "FIXME tree->m_ptrName[%d] is already NULL", i);
 
1022
                }
 
1023
#endif
 
1024
                CKFREE(tree->m_ptrName[i]);
 
1025
            }
 
1026
        }
 
1027
#endif
 
1028
        CKFREE(tree->m_ptrName);
 
1029
 
 
1030
        CKFREE(tree->m_uNeighbor1);
 
1031
        CKFREE(tree->m_uNeighbor2);
 
1032
        CKFREE(tree->m_uNeighbor3);
 
1033
 
 
1034
        CKFREE(tree->m_Ids);
 
1035
 
 
1036
        CKFREE(tree->m_dEdgeLength1);
 
1037
        CKFREE(tree->m_dEdgeLength2);
 
1038
        CKFREE(tree->m_dEdgeLength3);
 
1039
 
 
1040
        CKFREE(tree->m_bHasEdgeLength1);
 
1041
        CKFREE(tree->m_bHasEdgeLength2);
 
1042
        CKFREE(tree->m_bHasEdgeLength3);
 
1043
#if USE_HEIGHT
 
1044
        CKFREE(tree->m_bHasHeight);
 
1045
        CKFREE(tree->m_dHeight);
 
1046
#endif
 
1047
    }
 
1048
    
 
1049
    tree->m_uCacheCount = uNewCacheCount;
 
1050
    tree->m_uNeighbor1 = uNewNeighbor1;
 
1051
    tree->m_uNeighbor2 = uNewNeighbor2;
 
1052
    tree->m_uNeighbor3 = uNewNeighbor3;
 
1053
    tree->m_Ids = uNewIds;
 
1054
    tree->m_dEdgeLength1 = dNewEdgeLength1;
 
1055
    tree->m_dEdgeLength2 = dNewEdgeLength2;
 
1056
    tree->m_dEdgeLength3 = dNewEdgeLength3;
 
1057
        
 
1058
#ifdef USE_HEIGHT
 
1059
    tree->m_dHeight = dNewHeight;
 
1060
    tree->m_bHasHeight = bNewHasHeight;
 
1061
#endif
 
1062
    tree->m_bHasEdgeLength1 = bNewHasEdgeLength1;
 
1063
    tree->m_bHasEdgeLength2 = bNewHasEdgeLength2;
 
1064
    tree->m_bHasEdgeLength3 = bNewHasEdgeLength3;
 
1065
        
 
1066
    tree->m_ptrName = ptrNewName;
 
1067
 
 
1068
}
 
1069
/***   end: ExpandCache   ***/
 
1070
 
 
1071
 
 
1072
 
 
1073
/**
 
1074
 *
 
1075
 * Tree must be pointer to an already allocated tree structure
 
1076
 *
 
1077
 */
 
1078
void
 
1079
TreeCreateRooted(tree_t *tree)
 
1080
{
 
1081
    TreeZero(tree);
 
1082
    ExpandCache(tree);
 
1083
    tree->m_uNodeCount = 1;
 
1084
 
 
1085
    tree->m_uNeighbor1[0] = NULL_NEIGHBOR;
 
1086
    tree->m_uNeighbor2[0] = NULL_NEIGHBOR;
 
1087
    tree->m_uNeighbor3[0] = NULL_NEIGHBOR;
 
1088
    
 
1089
    tree->m_bHasEdgeLength1[0] = FALSE;
 
1090
    tree->m_bHasEdgeLength2[0] = FALSE;
 
1091
    tree->m_bHasEdgeLength3[0] = FALSE;
 
1092
#if USE_HEIGHT
 
1093
    tree->m_bHasHeight[0] = FALSE;
 
1094
#endif
 
1095
    
 
1096
    tree->m_uRootNodeIndex = 0;
 
1097
    tree->m_bRooted = TRUE;
 
1098
    
 
1099
#ifndef NDEBUG
 
1100
    TreeValidate(tree);
 
1101
#endif
 
1102
}
 
1103
/***   end: TreeCreateRooted   ***/
 
1104
 
 
1105
 
 
1106
/**
 
1107
 *
 
1108
 */
 
1109
uint
 
1110
UnrootFromFile(tree_t *tree)
 
1111
{
 
1112
    uint uThirdNode;
 
1113
#if TRACE
 
1114
    Log("Before unroot:\n");
 
1115
    LogMe();
 
1116
#endif
 
1117
    
 
1118
    if (!tree->m_bRooted)
 
1119
        Log(&rLog, LOG_FATAL, "Tree::Unroot, not rooted");
 
1120
    
 
1121
    /* Convention: root node is always node zero */
 
1122
    assert(IsRoot(0, tree));
 
1123
    assert(NULL_NEIGHBOR == tree->m_uNeighbor1[0]);
 
1124
 
 
1125
    uThirdNode = tree->m_uNodeCount++;
 
1126
    
 
1127
    tree->m_uNeighbor1[0] = uThirdNode;
 
1128
    tree->m_uNeighbor1[uThirdNode] = 0;
 
1129
    
 
1130
    tree->m_uNeighbor2[uThirdNode] = NULL_NEIGHBOR;
 
1131
    tree->m_uNeighbor3[uThirdNode] = NULL_NEIGHBOR;
 
1132
    
 
1133
    tree->m_dEdgeLength1[0] = 0;
 
1134
    tree->m_dEdgeLength1[uThirdNode] = 0;
 
1135
    tree->m_bHasEdgeLength1[uThirdNode] = TRUE;
 
1136
    
 
1137
    tree->m_bRooted = FALSE;
 
1138
    
 
1139
#if TRACE
 
1140
    Log("After unroot:\n");
 
1141
    LogMe();
 
1142
#endif
 
1143
 
 
1144
    return uThirdNode;
 
1145
}
 
1146
/***   end: UnrootFromFile   ***/
 
1147
 
 
1148
 
 
1149
 
 
1150
/**
 
1151
 *
 
1152
 *
 
1153
 */
 
1154
bool
 
1155
GetGroupFromFile(FILE *fp, uint uNodeIndex, double *ptrdEdgeLength, tree_t *tree)
 
1156
{
 
1157
    char szToken[1024];
 
1158
    NEWICK_TOKEN_TYPE NTT = GetToken(fp, szToken, sizeof(szToken));
 
1159
    bool bRightLength;
 
1160
    bool bEof;
 
1161
    char c;
 
1162
 
 
1163
    /* Group is either leaf name or (left, right). */
 
1164
    if (NTT_String == NTT) {
 
1165
        SetLeafName(uNodeIndex, szToken, tree);
 
1166
#if TRACE
 
1167
        Log(&rLog, LOG_FORCED_DEBUG, "Group is leaf '%s'", szToken);
 
1168
#endif
 
1169
    } else if (NTT_Lparen == NTT) {
 
1170
        const unsigned uLeft = AppendBranch(tree, uNodeIndex);
 
1171
        const unsigned uRight = uLeft + 1;
 
1172
        double dEdgeLength;
 
1173
        bool bLeftLength = GetGroupFromFile(fp, uLeft, &dEdgeLength, tree);
 
1174
        
 
1175
        /* Left sub-group...
 
1176
         */
 
1177
#if TRACE
 
1178
        Log(&rLog, LOG_FORCED_DEBUG, "%s", "Got '(', group is compound, expect left sub-group");
 
1179
 
 
1180
        if (bLeftLength) {
 
1181
            Log(&rLog, LOG_FORCED_DEBUG, "Edge length for left sub-group: %.3g", dEdgeLength);
 
1182
        } else {
 
1183
            Log(&rLog, LOG_FORCED_DEBUG, "%s", "No edge length for left sub-group");
 
1184
        }
 
1185
#endif
 
1186
        if (bLeftLength)
 
1187
            SetEdgeLength(uNodeIndex, uLeft, dEdgeLength, tree);
 
1188
 
 
1189
        /* ... then comma ...
 
1190
         */
 
1191
#if TRACE
 
1192
        Log(&rLog, LOG_FORCED_DEBUG, "%s", "Expect comma");
 
1193
#endif
 
1194
        NTT = GetToken(fp, szToken, sizeof(szToken));
 
1195
        if (NTT_Comma != NTT)
 
1196
            Log(&rLog, LOG_FATAL, "Tree::GetGroupFromFile, expected ',', got '%s'", szToken);
 
1197
        
 
1198
        /* ...then right sub-group...
 
1199
         */
 
1200
#if TRACE
 
1201
        Log(&rLog, LOG_FORCED_DEBUG, "%s", "Expect right sub-group");
 
1202
#endif
 
1203
        bRightLength = GetGroupFromFile(fp, uRight, &dEdgeLength, tree);
 
1204
        if (bRightLength)
 
1205
            SetEdgeLength(uNodeIndex, uRight, dEdgeLength, tree);
 
1206
        
 
1207
#if TRACE
 
1208
        if (bRightLength)
 
1209
            Log(&rLog, LOG_FORCED_DEBUG, "Edge length for right sub-group: %.3g", dEdgeLength);
 
1210
        else
 
1211
            Log(&rLog, LOG_FORCED_DEBUG, "%s", "No edge length for right sub-group");
 
1212
#endif
 
1213
 
 
1214
        /* ... then closing parenthesis.
 
1215
         */
 
1216
#if TRACE
 
1217
        Log(&rLog, LOG_FORCED_DEBUG, "%s", "Expect closing parenthesis (or comma if > 2-ary)");
 
1218
#endif
 
1219
        NTT = GetToken(fp, szToken, sizeof(szToken));
 
1220
        if (NTT_Rparen == NTT)
 
1221
            ;
 
1222
        else if (NTT_Comma == NTT) {
 
1223
            if (ungetc(',', fp)==EOF)
 
1224
                Log(&rLog, LOG_FATAL, "%s" "ungetc failed");
 
1225
            return FALSE;
 
1226
        } else
 
1227
            Log(&rLog, LOG_FATAL, "Tree::GetGroupFromFile, expected ')' or ',', got '%s'", szToken);
 
1228
    } else {
 
1229
        Log(&rLog, LOG_FATAL, "Tree::GetGroupFromFile, expected '(' or leaf name, got '%s'",
 
1230
              szToken);
 
1231
    }
 
1232
    
 
1233
    /* Group may optionally be followed by edge length.
 
1234
     */
 
1235
    bEof = FileSkipWhiteX(fp);
 
1236
    if (bEof)
 
1237
        return FALSE;
 
1238
    if ((c = fgetc(fp))==EOF) /* GetCharX */
 
1239
        Log(&rLog, LOG_FATAL, "%s", "fgetc reached end of file");
 
1240
#if TRACE
 
1241
    Log(&rLog, LOG_FORCED_DEBUG, "Character following group, could be colon, is '%c'", c);
 
1242
#endif
 
1243
    if (':' == c) {
 
1244
        NTT = GetToken(fp, szToken, sizeof(szToken));
 
1245
        if (NTT_String != NTT)
 
1246
            Log(&rLog, LOG_FATAL, "Tree::GetGroupFromFile, expected edge length, got '%s'", szToken);
 
1247
        *ptrdEdgeLength = atof(szToken);
 
1248
        return TRUE;
 
1249
    }
 
1250
    if (ungetc(c, fp)==EOF)
 
1251
        Log(&rLog, LOG_FATAL, "%s" "ungetc failed");
 
1252
    
 
1253
    return FALSE;
 
1254
}
 
1255
/***   end: GetGroupFromFile   ***/
 
1256
 
 
1257
 
 
1258
 
 
1259
 
 
1260
/**
 
1261
 *
 
1262
 */
 
1263
void
 
1264
FileSkipWhite(FILE *fp)
 
1265
{
 
1266
    bool bEof = FileSkipWhiteX(fp);
 
1267
    if (bEof)
 
1268
        Log(&rLog, LOG_FATAL, "%s", "End-of-file skipping white space");
 
1269
}
 
1270
/***   end: FileSkipWhite   ***/
 
1271
 
 
1272
 
 
1273
 
 
1274
 
 
1275
/**
 
1276
 *
 
1277
 */
 
1278
bool
 
1279
FileSkipWhiteX(FILE *fp)
 
1280
{
 
1281
    for (;;) {
 
1282
        int c;
 
1283
        bool bEof;
 
1284
        
 
1285
        /* GetChar */
 
1286
        if ((c = fgetc(fp))==EOF) {
 
1287
            bEof = TRUE;
 
1288
        } else {
 
1289
            bEof = FALSE;
 
1290
        }
 
1291
 
 
1292
        if (bEof)
 
1293
            return TRUE;
 
1294
        if (!isspace(c)) {
 
1295
            if (ungetc(c, fp)==EOF)
 
1296
                Log(&rLog, LOG_FATAL, "%s" "ungetc failed");
 
1297
            break;
 
1298
        }
 
1299
    }
 
1300
    return FALSE;
 
1301
}
 
1302
/***   end: FileSkipWhiteX   ***/
 
1303
 
 
1304
 
 
1305
 
 
1306
 
 
1307
/**
 
1308
 *
 
1309
 */
 
1310
NEWICK_TOKEN_TYPE
 
1311
GetToken(FILE *fp, char szToken[], uint uBytes)
 
1312
{
 
1313
    char c;
 
1314
    unsigned uBytesCopied = 0;
 
1315
    NEWICK_TOKEN_TYPE TT;
 
1316
 
 
1317
    /* Skip leading white space */
 
1318
    FileSkipWhite(fp);
 
1319
 
 
1320
    if ((c = fgetc(fp))==EOF) /* GetCharX */
 
1321
        Log(&rLog, LOG_FATAL, "%s", "fgetc reached end of file");
 
1322
    
 
1323
    /* In case a single-character token */
 
1324
    szToken[0] = c;
 
1325
    szToken[1] = 0;
 
1326
 
 
1327
    switch (c) {
 
1328
 
 
1329
    case '(':
 
1330
        return NTT_Lparen;
 
1331
        
 
1332
    case ')':
 
1333
        return NTT_Rparen;
 
1334
        
 
1335
    case ':':
 
1336
        return NTT_Colon;
 
1337
        
 
1338
    case ';':
 
1339
        return NTT_Semicolon;
 
1340
        
 
1341
    case ',':
 
1342
        return NTT_Comma;
 
1343
        
 
1344
    case '\'':
 
1345
        TT = NTT_SingleQuotedString;
 
1346
        if ((c = fgetc(fp))==EOF) /* GetCharX */
 
1347
            Log(&rLog, LOG_FATAL, "%s", "fgetc reached end of file");
 
1348
        break;
 
1349
        
 
1350
    case '"':
 
1351
        TT = NTT_DoubleQuotedString;
 
1352
        if ((c = fgetc(fp))==EOF) /* GetCharX */
 
1353
            Log(&rLog, LOG_FATAL, "%s", "fgetc reached end of file");
 
1354
        break;
 
1355
        
 
1356
    case '[':
 
1357
        TT = NTT_Comment;
 
1358
        break;
 
1359
        
 
1360
    default:
 
1361
        TT = NTT_String;
 
1362
        break;
 
1363
    }
 
1364
    
 
1365
    for (;;)
 
1366
    {
 
1367
        bool bEof;
 
1368
        if (TT != NTT_Comment) {
 
1369
            if (uBytesCopied < uBytes - 2)  {
 
1370
                szToken[uBytesCopied++] = c;
 
1371
                szToken[uBytesCopied] = 0;
 
1372
            } else {
 
1373
                Log(&rLog, LOG_FATAL, "Tree::GetToken: input buffer too small, token so far='%s'", szToken);
 
1374
            }
 
1375
        }
 
1376
        c = fgetc(fp); /* GetChar */
 
1377
        bEof = (c==EOF ? TRUE : FALSE);
 
1378
        if (bEof)
 
1379
            return TT;
 
1380
        
 
1381
        switch (TT) {
 
1382
 
 
1383
        case NTT_String:
 
1384
            if (0 != strchr("():;,", c)) {
 
1385
                if (ungetc(c, fp)==EOF)
 
1386
                    Log(&rLog, LOG_FATAL, "%s" "ungetc failed");
 
1387
                return NTT_String;
 
1388
            }
 
1389
            if (isspace(c))
 
1390
                return NTT_String;
 
1391
            break;
 
1392
            
 
1393
        case NTT_SingleQuotedString:
 
1394
            if ('\'' == c)
 
1395
                return NTT_String;
 
1396
            break;
 
1397
            
 
1398
        case NTT_DoubleQuotedString:
 
1399
            if ('"' == c)
 
1400
                return NTT_String;
 
1401
            break;
 
1402
            
 
1403
        case NTT_Comment:
 
1404
            if (']' == c)
 
1405
                return GetToken(fp, szToken, uBytes);
 
1406
            break;
 
1407
            
 
1408
        default:
 
1409
            Log(&rLog, LOG_FATAL, "Tree::GetToken, invalid TT=%u", TT);
 
1410
        }
 
1411
    }
 
1412
}
 
1413
/***   end: GetToken   ***/
 
1414
 
 
1415
 
 
1416
 
 
1417
/***   SetLeafName
 
1418
 *
 
1419
 */
 
1420
void
 
1421
SetLeafName(unsigned uNodeIndex, const char *ptrName, tree_t *tree)
 
1422
{
 
1423
    assert(uNodeIndex < tree->m_uNodeCount);
 
1424
    assert(IsLeaf(uNodeIndex, tree));
 
1425
    free(tree->m_ptrName[uNodeIndex]);
 
1426
    /*LOG_DEBUG("Setting tree->m_ptrName[uNodeIndex=%d] to %s", uNodeIndex, ptrName);*/
 
1427
    tree->m_ptrName[uNodeIndex] = CkStrdup(ptrName);
 
1428
}
 
1429
/***   end: SetLeafName   ***/
 
1430
 
 
1431
 
 
1432
 
 
1433
 
 
1434
/**
 
1435
 *
 
1436
 * Append a new branch. This adds two new nodes and joins them to an
 
1437
 * existing leaf node. Return value is k, new nodes have indexes k and
 
1438
 * k+1 respectively.
 
1439
 *
 
1440
 */
 
1441
uint
 
1442
AppendBranch(tree_t *tree, uint uExistingLeafIndex)
 
1443
{
 
1444
    uint uNewLeaf1;
 
1445
    uint uNewLeaf2;
 
1446
    
 
1447
    assert(tree!=NULL);
 
1448
    if (0 == tree->m_uNodeCount) {
 
1449
        Log(&rLog, LOG_FATAL, "%s(): %s", __FUNCTION__, "tree has not been created");
 
1450
    }
 
1451
    assert(NULL_NEIGHBOR == tree->m_uNeighbor2[uExistingLeafIndex]);
 
1452
    assert(NULL_NEIGHBOR == tree->m_uNeighbor3[uExistingLeafIndex]);
 
1453
    assert(uExistingLeafIndex < tree->m_uNodeCount);
 
1454
#ifndef NDEBUG
 
1455
    if (!IsLeaf(uExistingLeafIndex, tree)) {
 
1456
        Log(&rLog, LOG_FATAL, "AppendBranch(%u): not leaf", uExistingLeafIndex);
 
1457
    }
 
1458
#endif
 
1459
 
 
1460
    if (tree->m_uNodeCount >= tree->m_uCacheCount - 2) {
 
1461
        ExpandCache(tree);
 
1462
    }
 
1463
    uNewLeaf1 = tree->m_uNodeCount;
 
1464
    uNewLeaf2 = tree->m_uNodeCount + 1;
 
1465
 
 
1466
    tree->m_uNodeCount += 2;
 
1467
    
 
1468
    tree->m_uNeighbor2[uExistingLeafIndex] = uNewLeaf1;
 
1469
    tree->m_uNeighbor3[uExistingLeafIndex] = uNewLeaf2;
 
1470
    
 
1471
    tree->m_uNeighbor1[uNewLeaf1] = uExistingLeafIndex;
 
1472
    tree->m_uNeighbor1[uNewLeaf2] = uExistingLeafIndex;
 
1473
    
 
1474
    tree->m_uNeighbor2[uNewLeaf1] = NULL_NEIGHBOR;
 
1475
    tree->m_uNeighbor2[uNewLeaf2] = NULL_NEIGHBOR;
 
1476
    
 
1477
    tree->m_uNeighbor3[uNewLeaf1] = NULL_NEIGHBOR;
 
1478
    tree->m_uNeighbor3[uNewLeaf2] = NULL_NEIGHBOR;
 
1479
    
 
1480
    tree->m_dEdgeLength2[uExistingLeafIndex] = 0;
 
1481
    tree->m_dEdgeLength3[uExistingLeafIndex] = 0;
 
1482
    
 
1483
    tree->m_dEdgeLength1[uNewLeaf1] = 0;
 
1484
    tree->m_dEdgeLength2[uNewLeaf1] = 0;
 
1485
    tree->m_dEdgeLength3[uNewLeaf1] = 0;
 
1486
    
 
1487
    tree->m_dEdgeLength1[uNewLeaf2] = 0;
 
1488
    tree->m_dEdgeLength2[uNewLeaf2] = 0;
 
1489
    tree->m_dEdgeLength3[uNewLeaf2] = 0;
 
1490
    
 
1491
    tree->m_bHasEdgeLength1[uNewLeaf1] = FALSE;
 
1492
    tree->m_bHasEdgeLength2[uNewLeaf1] = FALSE;
 
1493
    tree->m_bHasEdgeLength3[uNewLeaf1] = FALSE;
 
1494
    
 
1495
    tree->m_bHasEdgeLength1[uNewLeaf2] = FALSE;
 
1496
    tree->m_bHasEdgeLength2[uNewLeaf2] = FALSE;
 
1497
    tree->m_bHasEdgeLength3[uNewLeaf2] = FALSE;
 
1498
    
 
1499
#if USE_HEIGHT
 
1500
    tree->m_bHasHeight[uNewLeaf1] = FALSE;
 
1501
    tree->m_bHasHeight[uNewLeaf2] = FALSE;
 
1502
#endif 
 
1503
    tree->m_Ids[uNewLeaf1] = uInsane;
 
1504
    tree->m_Ids[uNewLeaf2] = uInsane;
 
1505
    
 
1506
    return uNewLeaf1;
 
1507
}
 
1508
/***   end: AppendBranch   ***/
 
1509
 
 
1510
 
 
1511
/**
 
1512
 *
 
1513
 *
 
1514
 */
 
1515
void
 
1516
SetEdgeLength(uint uNodeIndex1, uint uNodeIndex2,
 
1517
              double dLength, tree_t *tree)
 
1518
{
 
1519
    assert(uNodeIndex1 < tree->m_uNodeCount && uNodeIndex2 < tree->m_uNodeCount);
 
1520
    assert(IsEdge(uNodeIndex1, uNodeIndex2, tree));
 
1521
    
 
1522
    if (tree->m_uNeighbor1[uNodeIndex1] == uNodeIndex2) {
 
1523
        tree->m_dEdgeLength1[uNodeIndex1] = dLength;
 
1524
        tree->m_bHasEdgeLength1[uNodeIndex1] = TRUE;
 
1525
    } else if (tree->m_uNeighbor2[uNodeIndex1] == uNodeIndex2) {
 
1526
        tree->m_dEdgeLength2[uNodeIndex1] = dLength;
 
1527
        tree->m_bHasEdgeLength2[uNodeIndex1] = TRUE;
 
1528
        
 
1529
    } else {
 
1530
        assert(tree->m_uNeighbor3[uNodeIndex1] == uNodeIndex2);
 
1531
        tree->m_dEdgeLength3[uNodeIndex1] = dLength;
 
1532
        tree->m_bHasEdgeLength3[uNodeIndex1] = TRUE;
 
1533
    }
 
1534
    
 
1535
    if (tree->m_uNeighbor1[uNodeIndex2] == uNodeIndex1) {
 
1536
        tree->m_dEdgeLength1[uNodeIndex2] = dLength;
 
1537
        tree->m_bHasEdgeLength1[uNodeIndex2] = TRUE;
 
1538
    } else if (tree->m_uNeighbor2[uNodeIndex2] == uNodeIndex1) {
 
1539
        tree->m_dEdgeLength2[uNodeIndex2] = dLength;
 
1540
        tree->m_bHasEdgeLength2[uNodeIndex2] = TRUE;
 
1541
    } else {
 
1542
        assert(tree->m_uNeighbor3[uNodeIndex2] == uNodeIndex1);
 
1543
        tree->m_dEdgeLength3[uNodeIndex2] = dLength;
 
1544
        tree->m_bHasEdgeLength3[uNodeIndex2] = TRUE;
 
1545
    }
 
1546
}
 
1547
/***   end: SetEdgeLength   ***/
 
1548
 
 
1549
 
 
1550
 
 
1551
/**
 
1552
 *
 
1553
 * Debug output
 
1554
 *
 
1555
 * LogMe in phy.cpp
 
1556
 *
 
1557
 */
 
1558
void
 
1559
LogTree(tree_t *tree, FILE *fp)
 
1560
{
 
1561
    uint uNodeIndex;
 
1562
    uint n1, n2, n3;
 
1563
    char *ptrName;
 
1564
    
 
1565
    fprintf(fp, "This is a tree with %u nodes, which is ", tree->m_uNodeCount);
 
1566
    
 
1567
    if (IsRooted(tree)) {
 
1568
        fprintf(fp, "rooted:\n");
 
1569
        fprintf(fp, "Index  Parnt  LengthP  Left   LengthL  Right  LengthR     Id  Name\n");
 
1570
        fprintf(fp, "-----  -----  -------  ----   -------  -----  -------  -----  ----\n");
 
1571
 
 
1572
    } else {
 
1573
        fprintf(fp, "unrooted;\n");
 
1574
        fprintf(fp, "Index  Nbr_1  Length1  Nbr_2  Length2  Nbr_3  Length3     Id  Name\n");
 
1575
        fprintf(fp, "-----  -----  -------  -----  -------  -----  -------  -----  ----\n");
 
1576
    }
 
1577
    
 
1578
    for (uNodeIndex = 0; uNodeIndex < tree->m_uNodeCount; ++uNodeIndex) {
 
1579
        fprintf(fp, "%5u  ", uNodeIndex);
 
1580
        n1 = tree->m_uNeighbor1[uNodeIndex];
 
1581
        n2 = tree->m_uNeighbor2[uNodeIndex];
 
1582
        n3 = tree->m_uNeighbor3[uNodeIndex];
 
1583
        
 
1584
        if (NULL_NEIGHBOR != n1) {
 
1585
            fprintf(fp, "%5u  ", n1);
 
1586
            if (tree->m_bHasEdgeLength1[uNodeIndex])
 
1587
                fprintf(fp, "%7.3g  ", tree->m_dEdgeLength1[uNodeIndex]);
 
1588
            else
 
1589
                fprintf(fp, "      *  ");
 
1590
        } else {
 
1591
            fprintf(fp, "                ");
 
1592
        }
 
1593
        
 
1594
        if (NULL_NEIGHBOR != n2) {
 
1595
            fprintf(fp, "%5u  ", n2);
 
1596
            if (tree->m_bHasEdgeLength2[uNodeIndex])
 
1597
                fprintf(fp, "%7.3g  ", tree->m_dEdgeLength2[uNodeIndex]);
 
1598
            else
 
1599
                fprintf(fp, "      *  ");
 
1600
        } else {
 
1601
            fprintf(fp, "                ");
 
1602
        }
 
1603
        
 
1604
        if (NULL_NEIGHBOR != n3) {
 
1605
            fprintf(fp, "%5u  ", n3);
 
1606
            if (tree->m_bHasEdgeLength3[uNodeIndex])
 
1607
                fprintf(fp, "%7.3g  ", tree->m_dEdgeLength3[uNodeIndex]);
 
1608
            else
 
1609
                fprintf(fp, "      *  ");
 
1610
        } else {
 
1611
            fprintf(fp, "                ");
 
1612
        }
 
1613
        
 
1614
        if (tree->m_Ids != 0 && IsLeaf(uNodeIndex, tree)) {
 
1615
            unsigned uId = tree->m_Ids[uNodeIndex];
 
1616
            if (uId == uInsane)
 
1617
                fprintf(fp, "    *");
 
1618
            else
 
1619
                fprintf(fp, "%5u", uId);
 
1620
        } else {
 
1621
            fprintf(fp, "     ");
 
1622
        }
 
1623
        
 
1624
        if (tree->m_bRooted && uNodeIndex == tree->m_uRootNodeIndex)
 
1625
            fprintf(fp, "  [ROOT] ");
 
1626
        ptrName = tree->m_ptrName[uNodeIndex];
 
1627
        if (ptrName != 0)
 
1628
            fprintf(fp, "  %s", ptrName);
 
1629
        
 
1630
        fprintf(fp, "\n");
 
1631
    }     
 
1632
}
 
1633
/***   end: LogTree   ***/
 
1634
 
 
1635
 
 
1636
 
 
1637
/**
 
1638
 *
 
1639
 * replaces m_uLeafCount
 
1640
 */
 
1641
uint
 
1642
GetLeafCount(tree_t *tree)
 
1643
{
 
1644
    assert(tree!=NULL);
 
1645
    
 
1646
    return (tree->m_uNodeCount+1)/2;
 
1647
}
 
1648
/***   GetLeafCount   ***/
 
1649
 
 
1650
 
 
1651
 
 
1652
/**
 
1653
 *
 
1654
 */
 
1655
uint
 
1656
GetNodeCount(tree_t *tree)
 
1657
{
 
1658
    assert(tree!=NULL);
 
1659
    
 
1660
    return 2*(GetLeafCount(tree)) - 1;
 
1661
}
 
1662
/***   end: GetNodeCount   ***/
 
1663
 
 
1664
 
 
1665
/**
 
1666
 *
 
1667
 */
 
1668
uint
 
1669
GetNeighbor(uint uNodeIndex, uint uNeighborSubscript, tree_t *prTree)
 
1670
{
 
1671
    assert(uNodeIndex < prTree->m_uNodeCount);
 
1672
    switch (uNeighborSubscript)
 
1673
    {
 
1674
    case 0:
 
1675
        return prTree->m_uNeighbor1[uNodeIndex];
 
1676
    case 1:
 
1677
        return prTree->m_uNeighbor2[uNodeIndex];
 
1678
    case 2:
 
1679
        return prTree->m_uNeighbor3[uNodeIndex];
 
1680
    }
 
1681
    Log(&rLog, LOG_FATAL, "Internal error in %s: sub=%u", __FUNCTION__, uNeighborSubscript);
 
1682
    return NULL_NEIGHBOR;
 
1683
}
 
1684
/***   end: GetNeighbor   ***/
 
1685
 
 
1686
 
 
1687
 
 
1688
 
 
1689
 
 
1690
/**
 
1691
 *
 
1692
 */
 
1693
void
 
1694
SetLeafId(tree_t *tree, uint uNodeIndex, uint uId)
 
1695
{
 
1696
    assert(uNodeIndex < tree->m_uNodeCount);
 
1697
    assert(IsLeaf(uNodeIndex, tree));
 
1698
    tree->m_Ids[uNodeIndex] = uId;
 
1699
}
 
1700
/***   end: SetLeafId    ***/
 
1701
 
 
1702
 
 
1703
/**
 
1704
 *
 
1705
 */
 
1706
uint
 
1707
GetRootNodeIndex(tree_t *tree)
 
1708
{
 
1709
    assert(NULL!=tree);
 
1710
    return tree->m_uRootNodeIndex;
 
1711
}
 
1712
/***   end: GetRootNodeIndex   ***/
 
1713
 
 
1714
 
 
1715
 
 
1716
/**
 
1717
 * @note avoid usage if you want to iterate over all indices, because
 
1718
 * this will be slow
 
1719
 *
 
1720
 */
 
1721
uint
 
1722
LeafIndexToNodeIndex(uint uLeafIndex, tree_t *prTree) {
 
1723
    uint uLeafCount = 0;
 
1724
    unsigned uNodeCount = GetNodeCount(prTree);
 
1725
    uint uNodeIndex;
 
1726
    
 
1727
    for (uNodeIndex = 0; uNodeIndex < uNodeCount; uNodeIndex++) {
 
1728
        if (IsLeaf(uNodeIndex, prTree)) {
 
1729
            if (uLeafCount == uLeafIndex) {
 
1730
                return uNodeIndex;
 
1731
            } else {
 
1732
                uLeafCount++;
 
1733
            }
 
1734
        }
 
1735
    }
 
1736
    Log(&rLog, LOG_FATAL, "Internal error: node index out of range");
 
1737
    return 0;
 
1738
}
 
1739
/***   end: LeafIndexToNodeIndex   ***/
 
1740
 
 
1741
 
 
1742
 
 
1743
 
 
1744
/**
 
1745
 * @brief Append a (source) tree to a (dest) tree to a given node
 
1746
 * which will be replaced. All other nodes in that tree will stay the
 
1747
 * same.
 
1748
 *
 
1749
 * @param[out] prDstTree
 
1750
 * The tree to append to
 
1751
 * @param[in] uDstTreeReplaceNodeIndex
 
1752
 * Dest tree node to which source tree will be appended
 
1753
 * @param[in] prSrcTree
 
1754
 * The tree to append
 
1755
 * 
 
1756
 * @note No nodes inside prDstTree will change except
 
1757
 * uDstTreeReplaceNodeIndex
 
1758
 *
 
1759
 *
 
1760
 * @warning: Function won't check or touch the m_Ids/leaf-indices!
 
1761
 * That means if you want to join two trees with leaf indices 1-10 and
 
1762
 * 1-10 your m_Ids/leaf-indices won't be unique anymore and the
 
1763
 * association between your sequences and the tree are broken. Make
 
1764
 * sure m_Ids are unique before calling me.
 
1765
 *
 
1766
 * The easiest would have been to do this by recursively calling
 
1767
 * AppendBranch() (after adding uSrcTreeNodeIndex as extra argument to
 
1768
 * this function). But recursion is evil. Yet another version would be
 
1769
 * to setup all the data and call MuscleTreeCreate() to create a third
 
1770
 * tree, which seems complicated and wasteful. The approach taken here
 
1771
 * is the following: increase dest tree memory, copy over each src
 
1772
 * tree node data and update the indices and counters. This is tricky
 
1773
 * and has a lot of potential for bugs if tree interface is changed
 
1774
 * (and even if it isn't).
 
1775
 *
 
1776
 */
 
1777
void
 
1778
AppendTree(tree_t *prDstTree, uint uDstTreeReplaceNodeIndex, tree_t *prSrcTree)
 
1779
{
 
1780
    uint uSrcTreeNodeIndex;
 
1781
    uint uOrgDstTreeNodeCount;
 
1782
 
 
1783
    assert(NULL!=prDstTree);
 
1784
    assert(NULL!=prSrcTree);
 
1785
    assert(uDstTreeReplaceNodeIndex < prDstTree->m_uNodeCount);
 
1786
    assert(IsLeaf(uDstTreeReplaceNodeIndex, prDstTree));
 
1787
    assert(IsRooted(prDstTree) && IsRooted(prSrcTree));
 
1788
 
 
1789
    
 
1790
    uOrgDstTreeNodeCount = prDstTree->m_uNodeCount;
 
1791
 
 
1792
    
 
1793
    /* increase dest tree memory
 
1794
     */
 
1795
    while (prDstTree->m_uCacheCount
 
1796
           <
 
1797
           (GetNodeCount(prDstTree) + GetNodeCount(prSrcTree))) {
 
1798
        ExpandCache(prDstTree);
 
1799
    }
 
1800
 
 
1801
 
 
1802
    /* copy all src tree nodes
 
1803
     *
 
1804
     */
 
1805
    for (uSrcTreeNodeIndex=0;
 
1806
         uSrcTreeNodeIndex<GetNodeCount(prSrcTree); uSrcTreeNodeIndex++) {
 
1807
        uint uNewDstNodeIndex = prDstTree->m_uNodeCount;
 
1808
        
 
1809
        /* distinguish 4 cases for src nodes to copy:
 
1810
         *
 
1811
         * 1. src node is the only node, i.e. root & leaf
 
1812
         *
 
1813
         * 2. src node is root: set only left & right, but not parent
 
1814
         * and just replace the given dest index
 
1815
         *
 
1816
         * 3. src node is leaf: set only parent
 
1817
         *
 
1818
         * 4. src node is internal node: update all three neighbours
 
1819
         *
 
1820
         * FIXME: this is messy. Is there a cleaner way to do this by
 
1821
         * merging all cases?
 
1822
         *
 
1823
         */
 
1824
        if (IsRoot(uSrcTreeNodeIndex, prSrcTree) && IsLeaf(uSrcTreeNodeIndex, prSrcTree)) {
 
1825
            /* special case: if this is the only member in
 
1826
             * tree, i.e. it's root and leaf. Copy leaf name and leaf
 
1827
             * id. No neighbours to update
 
1828
             */
 
1829
 
 
1830
            /* free dst node name if set */
 
1831
            if (NULL != prDstTree->m_ptrName[uDstTreeReplaceNodeIndex]) {
 
1832
                CKFREE(prDstTree->m_ptrName[uDstTreeReplaceNodeIndex]);
 
1833
            }
 
1834
 
 
1835
            prDstTree->m_ptrName[uDstTreeReplaceNodeIndex] =
 
1836
                CkStrdup(GetLeafName(uSrcTreeNodeIndex, prSrcTree));
 
1837
 
 
1838
            prDstTree->m_Ids[uDstTreeReplaceNodeIndex] =
 
1839
                prSrcTree->m_Ids[uSrcTreeNodeIndex];
 
1840
 
 
1841
            /* no updated of uNodeCount, because we used the replace node */
 
1842
 
 
1843
#if TRACE
 
1844
            Log(&rLog, LOG_FORCED_DEBUG, "Updated dst rpl node %d with the only src leaf node: parent=%d (%f)",
 
1845
                      uDstTreeReplaceNodeIndex,
 
1846
                      prDstTree->m_uNeighbor1[uDstTreeReplaceNodeIndex], prDstTree->m_dEdgeLength1[uDstTreeReplaceNodeIndex]);
 
1847
#endif
 
1848
            
 
1849
        } else if (IsRoot(uSrcTreeNodeIndex, prSrcTree)) {
 
1850
            /* src node is root: replace uDstTreeReplaceNodeIndex
 
1851
             * (not uNewDstNodeIndex) with src root, i.e. the
 
1852
             * uDstTreeReplaceNodeIndex becomes an internal node now.
 
1853
             *
 
1854
             * We only have two neighbours 2 & 3 (no parent). Keep old
 
1855
             * parent info (neighbor 1).
 
1856
             */
 
1857
 
 
1858
            /* free dst node name if set */
 
1859
            if (NULL != prDstTree->m_ptrName[uDstTreeReplaceNodeIndex]) {
 
1860
                CKFREE(prDstTree->m_ptrName[uDstTreeReplaceNodeIndex]);
 
1861
            }
 
1862
            
 
1863
            prDstTree->m_uNeighbor2[uDstTreeReplaceNodeIndex] =
 
1864
                prSrcTree->m_uNeighbor2[uSrcTreeNodeIndex] + uOrgDstTreeNodeCount;
 
1865
            prDstTree->m_uNeighbor3[uDstTreeReplaceNodeIndex] =
 
1866
                prSrcTree->m_uNeighbor3[uSrcTreeNodeIndex] + uOrgDstTreeNodeCount;          
 
1867
            
 
1868
            prDstTree->m_bHasEdgeLength2[uDstTreeReplaceNodeIndex] =
 
1869
                prSrcTree->m_bHasEdgeLength2[uSrcTreeNodeIndex];
 
1870
            prDstTree->m_bHasEdgeLength3[uDstTreeReplaceNodeIndex] =
 
1871
                prSrcTree->m_bHasEdgeLength3[uSrcTreeNodeIndex];            
 
1872
 
 
1873
            prDstTree->m_dEdgeLength2[uDstTreeReplaceNodeIndex] =
 
1874
                prSrcTree->m_dEdgeLength2[uSrcTreeNodeIndex];
 
1875
            prDstTree->m_dEdgeLength3[uDstTreeReplaceNodeIndex] =
 
1876
                prSrcTree->m_dEdgeLength3[uSrcTreeNodeIndex];
 
1877
 
 
1878
            /* make Id invalid */
 
1879
            prDstTree->m_Ids[uDstTreeReplaceNodeIndex] = uInsane;
 
1880
 
 
1881
            /* no updated of uNodeCount, because we used the replace node */
 
1882
 
 
1883
#if TRACE
 
1884
            Log(&rLog, LOG_FORCED_DEBUG, "Updated dst rpl node %d with the src root node: (untouched) parent=%d (%f) left=%d (%f) right=%d (%f)",
 
1885
                      uDstTreeReplaceNodeIndex,
 
1886
                      prDstTree->m_uNeighbor1[uDstTreeReplaceNodeIndex], prDstTree->m_dEdgeLength1[uDstTreeReplaceNodeIndex],
 
1887
                      prDstTree->m_uNeighbor2[uDstTreeReplaceNodeIndex], prDstTree->m_dEdgeLength2[uDstTreeReplaceNodeIndex],
 
1888
                      prDstTree->m_uNeighbor3[uDstTreeReplaceNodeIndex], prDstTree->m_dEdgeLength3[uDstTreeReplaceNodeIndex]);
 
1889
#endif
 
1890
            
 
1891
        } else if (IsLeaf(uSrcTreeNodeIndex, prSrcTree)) {
 
1892
            /* src node is a leaf, which means we only have one
 
1893
             * neighbour, and that is its parent, i.e. n1
 
1894
             *
 
1895
             */
 
1896
 
 
1897
            /* initialise/zero new node to default values
 
1898
             */
 
1899
            InitNode(prDstTree, uNewDstNodeIndex);
 
1900
 
 
1901
        
 
1902
            /*  update m_ptrName/leaf name
 
1903
             */
 
1904
            prDstTree->m_ptrName[uNewDstNodeIndex] =
 
1905
                CkStrdup(GetLeafName(uSrcTreeNodeIndex, prSrcTree));
 
1906
 
 
1907
            /* update parent node (beware of special case: parent was
 
1908
               src tree root */
 
1909
            if (IsRoot(prSrcTree->m_uNeighbor1[uSrcTreeNodeIndex], prSrcTree)) {
 
1910
                    prDstTree->m_uNeighbor1[uNewDstNodeIndex] =
 
1911
                        uDstTreeReplaceNodeIndex;
 
1912
            } else {
 
1913
                prDstTree->m_uNeighbor1[uNewDstNodeIndex] =
 
1914
                    prSrcTree->m_uNeighbor1[uSrcTreeNodeIndex] + uOrgDstTreeNodeCount;
 
1915
            }
 
1916
 
 
1917
            /* update edge length info to parent
 
1918
             */
 
1919
            prDstTree->m_bHasEdgeLength1[uNewDstNodeIndex] =
 
1920
                prSrcTree->m_bHasEdgeLength1[uSrcTreeNodeIndex];
 
1921
            prDstTree->m_dEdgeLength1[uNewDstNodeIndex] =
 
1922
                prSrcTree->m_dEdgeLength1[uSrcTreeNodeIndex];
 
1923
 
 
1924
            /* update sequence/object id
 
1925
             */
 
1926
            prDstTree->m_Ids[uNewDstNodeIndex] =
 
1927
                prSrcTree->m_Ids[uSrcTreeNodeIndex];            
 
1928
 
 
1929
            /* we used a new node so increase their count */
 
1930
            prDstTree->m_uNodeCount += 1;
 
1931
            
 
1932
#if TRACE
 
1933
            Log(&rLog, LOG_FORCED_DEBUG, "Updated dst node %d with a src leaf node: parent=%d (%f)",
 
1934
                      uNewDstNodeIndex,
 
1935
                      prDstTree->m_uNeighbor1[uNewDstNodeIndex], prDstTree->m_dEdgeLength1[uNewDstNodeIndex]);
 
1936
#endif
 
1937
            
 
1938
        } else  {
 
1939
            /* src node is not root neither leaf, means we have an
 
1940
             * internal node. Update all neighbour info
 
1941
             * 
 
1942
             */
 
1943
 
 
1944
            /* initialise/zero node values to default values
 
1945
             */
 
1946
            InitNode(prDstTree, uNewDstNodeIndex);
 
1947
          
 
1948
            /* update neigbours
 
1949
             */
 
1950
            /* parent: special case if parent was src tree root */
 
1951
            if (IsRoot(prSrcTree->m_uNeighbor1[uSrcTreeNodeIndex], prSrcTree)) {
 
1952
                    prDstTree->m_uNeighbor1[uNewDstNodeIndex] =
 
1953
                        uDstTreeReplaceNodeIndex;
 
1954
            } else {
 
1955
                prDstTree->m_uNeighbor1[uNewDstNodeIndex] =
 
1956
                    prSrcTree->m_uNeighbor1[uSrcTreeNodeIndex] + uOrgDstTreeNodeCount;
 
1957
            }
 
1958
            /* left */
 
1959
            prDstTree->m_uNeighbor2[uNewDstNodeIndex] =
 
1960
                prSrcTree->m_uNeighbor2[uSrcTreeNodeIndex] + uOrgDstTreeNodeCount;
 
1961
            /* right */
 
1962
            prDstTree->m_uNeighbor3[uNewDstNodeIndex] =
 
1963
                prSrcTree->m_uNeighbor3[uSrcTreeNodeIndex] + uOrgDstTreeNodeCount;
 
1964
 
 
1965
            /* update edge length info
 
1966
             */
 
1967
            /* parent */
 
1968
            prDstTree->m_bHasEdgeLength1[uNewDstNodeIndex] =
 
1969
                prSrcTree->m_bHasEdgeLength1[uSrcTreeNodeIndex];
 
1970
            prDstTree->m_dEdgeLength1[uNewDstNodeIndex] =
 
1971
                prSrcTree->m_dEdgeLength1[uSrcTreeNodeIndex];
 
1972
            /* left */
 
1973
            prDstTree->m_bHasEdgeLength2[uNewDstNodeIndex] =
 
1974
                prSrcTree->m_bHasEdgeLength2[uSrcTreeNodeIndex];
 
1975
            prDstTree->m_dEdgeLength2[uNewDstNodeIndex] =
 
1976
                prSrcTree->m_dEdgeLength2[uSrcTreeNodeIndex];
 
1977
            /* right */
 
1978
            prDstTree->m_bHasEdgeLength3[uNewDstNodeIndex] =
 
1979
                prSrcTree->m_bHasEdgeLength3[uSrcTreeNodeIndex];
 
1980
            prDstTree->m_dEdgeLength3[uNewDstNodeIndex] =
 
1981
                prSrcTree->m_dEdgeLength3[uSrcTreeNodeIndex];
 
1982
 
 
1983
            /* we used a new node so increase their count */
 
1984
            prDstTree->m_uNodeCount += 1;
 
1985
            
 
1986
#if TRACE
 
1987
            Log(&rLog, LOG_FORCED_DEBUG, "Updated dst node %d with an internal src node: parent=%d (%f) left=%d (%f) right=%d (%f)",
 
1988
                      uNewDstNodeIndex,
 
1989
                      prDstTree->m_uNeighbor1[uNewDstNodeIndex], prDstTree->m_dEdgeLength1[uNewDstNodeIndex],
 
1990
                      prDstTree->m_uNeighbor2[uNewDstNodeIndex], prDstTree->m_dEdgeLength2[uNewDstNodeIndex],
 
1991
                      prDstTree->m_uNeighbor3[uNewDstNodeIndex], prDstTree->m_dEdgeLength3[uNewDstNodeIndex]);
 
1992
#endif
 
1993
        }
 
1994
 
 
1995
    }
 
1996
    /* end for each src tree node */
 
1997
 
 
1998
    
 
1999
    /*
 
2000
     * m_uRootNodeIndex stays the same.
 
2001
     *
 
2002
     * No need to touch m_uCacheCount.
 
2003
     *
 
2004
     */    
 
2005
#if USE_HEIGHT
 
2006
    Log(&rLog, LOG_FATAL, "Internal error: Height usage not implemented in %s", __FUNCTION__);
 
2007
#endif 
 
2008
 
 
2009
    
 
2010
#ifndef NDEBUG
 
2011
    TreeValidate(prDstTree);
 
2012
#endif
 
2013
        
 
2014
    return;
 
2015
}
 
2016
/***   end: AppendTree()   ***/
 
2017