~ubuntu-branches/debian/jessie/arb/jessie

« back to all changes in this revision

Viewing changes to ARBDB/adtree.cxx

  • Committer: Package Import Robot
  • Author(s): Elmar Pruesse, Andreas Tille, Elmar Pruesse
  • Date: 2014-09-02 15:15:06 UTC
  • mfrom: (1.1.6)
  • Revision ID: package-import@ubuntu.com-20140902151506-jihq58b3iz342wif
Tags: 6.0.2-1
[ Andreas Tille ]
* New upstream version
  Closes: #741890
* debian/upstream -> debian/upstream/metadata
* debian/control:
   - Build-Depends: added libglib2.0-dev
   - Depends: added mafft, mrbayes
* debian/rules
   - Add explicite --remove-section=.comment option to manual strip call
* cme fix dpkg-control
* arb-common.dirs: Do not create unneeded lintian dir
* Add turkish debconf translation (thanks for the patch to Mert Dirik
  <mertdirik@gmail.com>)
  Closes: #757497

[ Elmar Pruesse ]
* patches removed:
   - 10_config.makefiles.patch,
     80_no_GL.patch
       removed in favor of creating file from config.makefile.template via 
       sed in debian/control
   - 20_Makefile_main.patch
       merged upstream
   - 21_Makefiles.patch
       no longer needed
   - 30_tmpfile_CVE-2008-5378.patch: 
       merged upstream
   - 50_fix_gcc-4.8.patch:
       merged upstream
   - 40_add_libGLU.patch:
       libGLU not needed for arb_ntree)
   - 60_use_debian_packaged_raxml.patch:
       merged upstream
   - 70_hardening.patch
       merged upstream
   - 72_add_math_lib_to_linker.patch
       does not appear to be needed
* patches added:
   - 10_upstream_r12793__show_db_load_progress:
       backported patch showing progress while ARB is loading a database
       (needed as indicator/splash screen while ARB is launching)
   - 20_upstream_r12794__socket_permissions:
       backported security fix
   - 30_upstream_r12814__desktop_keywords:
       backported add keywords to desktop (fixes lintian warning)
   - 40_upstream_r12815__lintian_spelling:
       backported fix for lintian reported spelling errors
   - 50_private_nameservers
       change configuration to put nameservers into users home dirs
       (avoids need for shared writeable directory)
   - 60_use_debian_phyml
       use phyml from debian package for both interfaces in ARB
* debian/rules:
   - create config.makefile from override_dh_configure target
   - use "make tarfile" in override_dh_install
   - remove extra cleaning not needed for ARB 6
   - use "dh_install --list-missing" to avoid missing files
   - added override_dh_fixperms target
* debian/control:
   - added libarb-dev package
   - Depends: added phyml, xdg-utils
   - Suggests: removed phyml
   - fix lintian duplicate-short-description (new descriptions)
* debian/*.install:
   - "unrolled" confusing globbing to select files
   - pick files from debian/tmp
   - moved all config files to /etc/arb
* debian/arb-common.templates: updated
* scripts:
   - removed arb-add-pt-server
   - launch-wrapper: 
     - only add demo.arb to newly created $ARBUSERDATA
     - pass commandline arguments through bin/arb wrapper
   - preinst: removing old PT server index files on upgrade from 5.5*
   - postinst: set setgid on shared PT dir
* rewrote arb.1 manfile
* added file icon for ARB databases
* using upstream arb_tcp.dat

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
// =============================================================== //
 
2
//                                                                 //
 
3
//   File      : adtree.cxx                                        //
 
4
//   Purpose   : tree functions                                    //
 
5
//                                                                 //
 
6
//   Institute of Microbiology (Technical University Munich)       //
 
7
//   http://www.arb-home.de/                                       //
 
8
//                                                                 //
 
9
// =============================================================== //
 
10
 
 
11
#include <arbdbt.h>
 
12
#include <arb_progress.h>
 
13
#include "gb_local.h"
 
14
#include <arb_strarray.h>
 
15
#include <set>
 
16
#include <limits.h>
 
17
#include <arb_global_defs.h>
 
18
#include <arb_strbuf.h>
 
19
#include <arb_diff.h>
 
20
#include <arb_defs.h>
 
21
 
 
22
#define GBT_PUT_DATA 1
 
23
#define GBT_GET_SIZE 0
 
24
 
 
25
GBDATA *GBT_get_tree_data(GBDATA *gb_main) {
 
26
    return GBT_find_or_create(gb_main, "tree_data", 7);
 
27
}
 
28
 
 
29
GBT_TREE::bs100_mode GBT_TREE::toggle_bootstrap100(bs100_mode mode) {
 
30
    if (!is_leaf) {
 
31
        if (!is_root_node()) {
 
32
            double bootstrap;
 
33
            switch (parse_bootstrap(bootstrap)) {
 
34
                case REMARK_NONE:
 
35
                case REMARK_OTHER:
 
36
                    switch (mode) {
 
37
                        case BS_UNDECIDED: mode = BS_INSERT;
 
38
                        case BS_INSERT: set_bootstrap(100);
 
39
                        case BS_REMOVE: break;
 
40
                    }
 
41
                    break;
 
42
                case REMARK_BOOTSTRAP:
 
43
                    if (bootstrap >= 99.5) {
 
44
                        switch (mode) {
 
45
                            case BS_UNDECIDED: mode = BS_REMOVE;
 
46
                            case BS_REMOVE: remove_remark();
 
47
                            case BS_INSERT: break;
 
48
                        }
 
49
                    }
 
50
                    break;
 
51
            }
 
52
        }
 
53
 
 
54
        mode = get_leftson()->toggle_bootstrap100(mode);
 
55
        mode = get_rightson()->toggle_bootstrap100(mode);
 
56
    }
 
57
    return mode;
 
58
}
 
59
void GBT_TREE::remove_bootstrap() {
 
60
    freenull(remark_branch);
 
61
    if (!is_leaf) {
 
62
        get_leftson()->remove_bootstrap();
 
63
        get_rightson()->remove_bootstrap();
 
64
    }
 
65
}
 
66
void GBT_TREE::reset_branchlengths() {
 
67
    if (!is_leaf) {
 
68
        leftlen = rightlen = DEFAULT_BRANCH_LENGTH;
 
69
 
 
70
        get_leftson()->reset_branchlengths();
 
71
        get_rightson()->reset_branchlengths();
 
72
    }
 
73
}
 
74
 
 
75
void GBT_TREE::scale_branchlengths(double factor) {
 
76
    if (!is_leaf) {
 
77
        leftlen  *= factor;
 
78
        rightlen *= factor;
 
79
 
 
80
        get_leftson()->scale_branchlengths(factor);
 
81
        get_rightson()->scale_branchlengths(factor);
 
82
    }
 
83
}
 
84
 
 
85
GBT_LEN GBT_TREE::sum_child_lengths() const {
 
86
    if (is_leaf) return 0.0;
 
87
    return
 
88
        leftlen +
 
89
        rightlen +
 
90
        get_leftson()->sum_child_lengths() +
 
91
        get_rightson()->sum_child_lengths();
 
92
}
 
93
 
 
94
void GBT_TREE::bootstrap2branchlen() {
 
95
    //! copy bootstraps to branchlengths
 
96
    if (is_leaf) {
 
97
        set_branchlength_unrooted(DEFAULT_BRANCH_LENGTH);
 
98
    }
 
99
    else {
 
100
        if (father) {
 
101
            double         bootstrap;
 
102
            GBT_RemarkType rtype = parse_bootstrap(bootstrap);
 
103
 
 
104
            if (rtype == REMARK_BOOTSTRAP) {
 
105
                double len = bootstrap/100.0;
 
106
                set_branchlength_unrooted(len);
 
107
            }
 
108
            else {
 
109
                set_branchlength_unrooted(1.0); // no bootstrap means "100%"
 
110
            }
 
111
        }
 
112
        get_leftson()->bootstrap2branchlen();
 
113
        get_rightson()->bootstrap2branchlen();
 
114
    }
 
115
}
 
116
 
 
117
void GBT_TREE::branchlen2bootstrap() {
 
118
    //! copy branchlengths to bootstraps
 
119
    remove_remark();
 
120
    if (!is_leaf) {
 
121
        if (!is_root_node()) {
 
122
            set_bootstrap(get_branchlength_unrooted()*100.0);
 
123
        }
 
124
        get_leftson()->branchlen2bootstrap();
 
125
        get_rightson()->branchlen2bootstrap();
 
126
    }
 
127
}
 
128
 
 
129
GBT_TREE *GBT_TREE::fixDeletedSon() {
 
130
    // fix node after one son has been deleted
 
131
    GBT_TREE *result = NULL;
 
132
 
 
133
    if (leftson) {
 
134
        gb_assert(!rightson);
 
135
        result  = leftson;
 
136
        leftson = NULL;
 
137
    }
 
138
    else {
 
139
        gb_assert(!leftson);
 
140
        gb_assert(rightson);
 
141
 
 
142
        result   = rightson;
 
143
        rightson = NULL;
 
144
    }
 
145
 
 
146
    // now 'result' contains the lasting tree
 
147
    result->father = father;
 
148
 
 
149
    if (remark_branch && !result->remark_branch) { // rescue remarks if possible
 
150
        result->remark_branch    = remark_branch;
 
151
        remark_branch = NULL;
 
152
    }
 
153
    if (gb_node && !result->gb_node) { // rescue group if possible
 
154
        result->gb_node = gb_node;
 
155
        gb_node         = NULL;
 
156
    }
 
157
 
 
158
    is_leaf = true; // don't try recursive delete
 
159
    delete this;
 
160
 
 
161
    return result;
 
162
}
 
163
 
 
164
const GBT_TREE *GBT_TREE::ancestor_common_with(const GBT_TREE *other) const {
 
165
    if (this == other) return this;
 
166
    if (is_anchestor_of(other)) return this;
 
167
    if (other->is_anchestor_of(this)) return other;
 
168
    return get_father()->ancestor_common_with(other->get_father());
 
169
}
 
170
 
 
171
// ----------------------
 
172
//      remove leafs
 
173
 
 
174
GBT_TREE *GBT_remove_leafs(GBT_TREE *tree, GBT_TreeRemoveType mode, const GB_HASH *species_hash, int *removed, int *groups_removed) { // @@@ add tests for GBT_remove_leafs()
 
175
    /*! Remove leafs from given 'tree'.
 
176
     * @param tree tree from which species will be removed
 
177
     * @param mode defines what to remove
 
178
     * @param species_hash hash translation from leaf-name to species-dbnode (not needed if tree is linked; see GBT_link_tree)
 
179
     * @param removed will be incremented for each removed leaf (if !NULL)
 
180
     * @param groups_removed will be incremented for each removed group (if !NULL)
 
181
     * @return new root node
 
182
     *
 
183
     * if 'species_hash' is not provided and tree is not linked,
 
184
     * the function will silently act strange:
 
185
     * - GBT_REMOVE_MARKED and GBT_REMOVE_UNMARKED will remove any leaf
 
186
     * - GBT_REMOVE_ZOMBIES and GBT_KEEP_MARKED will remove all leafs
 
187
     */
 
188
 
 
189
    if (tree->is_leaf) {
 
190
        if (tree->name) {
 
191
            bool    deleteSelf = false;
 
192
            GBDATA *gb_node;
 
193
 
 
194
            if (species_hash) {
 
195
                gb_node = (GBDATA*)GBS_read_hash(species_hash, tree->name);
 
196
                gb_assert(tree->gb_node == 0); // don't call linked tree with 'species_hash'!
 
197
            }
 
198
            else gb_node = tree->gb_node;
 
199
 
 
200
            if (gb_node) {
 
201
                if (mode & (GBT_REMOVE_MARKED|GBT_REMOVE_UNMARKED)) {
 
202
                    long flag = GB_read_flag(gb_node);
 
203
                    deleteSelf = (flag && (mode&GBT_REMOVE_MARKED)) || (!flag && (mode&GBT_REMOVE_UNMARKED));
 
204
                }
 
205
            }
 
206
            else { // zombie
 
207
                if (mode & GBT_REMOVE_ZOMBIES) deleteSelf = true;
 
208
            }
 
209
 
 
210
            if (deleteSelf) {
 
211
                delete tree;
 
212
                tree = NULL;
 
213
                if (removed) (*removed)++;
 
214
            }
 
215
        }
 
216
    }
 
217
    else {
 
218
        tree->leftson  = GBT_remove_leafs(tree->leftson, mode, species_hash, removed, groups_removed);
 
219
        tree->rightson = GBT_remove_leafs(tree->rightson, mode, species_hash, removed, groups_removed);
 
220
 
 
221
        if (tree->leftson) {
 
222
            if (!tree->rightson) { // right son deleted
 
223
                tree = tree->fixDeletedSon();
 
224
            }
 
225
            // otherwise no son deleted
 
226
        }
 
227
        else if (tree->rightson) { // left son deleted
 
228
            tree = tree->fixDeletedSon();
 
229
        }
 
230
        else {                  // everything deleted -> delete self
 
231
            if (tree->name && groups_removed) (*groups_removed)++;
 
232
            tree->is_leaf = true;
 
233
            delete tree;
 
234
            tree = NULL;
 
235
        }
 
236
    }
 
237
 
 
238
    return tree;
 
239
}
 
240
 
 
241
// ---------------------
 
242
//      trees order
 
243
 
 
244
inline int get_tree_idx(GBDATA *gb_tree) {
 
245
    GBDATA *gb_order = GB_entry(gb_tree, "order");
 
246
    int     idx      = 0;
 
247
    if (gb_order) {
 
248
        idx = GB_read_int(gb_order);
 
249
        gb_assert(idx>0); // invalid index
 
250
    }
 
251
    return idx;
 
252
}
 
253
 
 
254
inline int get_max_tree_idx(GBDATA *gb_treedata) {
 
255
    int max_idx = 0;
 
256
    for (GBDATA *gb_tree = GB_child(gb_treedata); gb_tree; gb_tree = GB_nextChild(gb_tree)) {
 
257
        int idx = get_tree_idx(gb_tree);
 
258
        if (idx>max_idx) max_idx = idx;
 
259
    }
 
260
    return max_idx;
 
261
}
 
262
 
 
263
inline GBDATA *get_tree_with_idx(GBDATA *gb_treedata, int at_idx) {
 
264
    GBDATA *gb_found = NULL;
 
265
    for (GBDATA *gb_tree = GB_child(gb_treedata); gb_tree && !gb_found; gb_tree = GB_nextChild(gb_tree)) {
 
266
        int idx = get_tree_idx(gb_tree);
 
267
        if (idx == at_idx) {
 
268
            gb_found = gb_tree;
 
269
        }
 
270
    }
 
271
    return gb_found;
 
272
}
 
273
 
 
274
inline GBDATA *get_tree_infrontof_idx(GBDATA *gb_treedata, int infrontof_idx) {
 
275
    GBDATA *gb_infrontof = NULL;
 
276
    if (infrontof_idx) {
 
277
        int best_idx = 0;
 
278
        for (GBDATA *gb_tree = GB_child(gb_treedata); gb_tree; gb_tree = GB_nextChild(gb_tree)) {
 
279
            int idx = get_tree_idx(gb_tree);
 
280
            gb_assert(idx);
 
281
            if (idx>best_idx && idx<infrontof_idx) {
 
282
                best_idx     = idx;
 
283
                gb_infrontof = gb_tree;
 
284
            }
 
285
        }
 
286
    }
 
287
    return gb_infrontof;
 
288
}
 
289
 
 
290
inline GBDATA *get_tree_behind_idx(GBDATA *gb_treedata, int behind_idx) {
 
291
    GBDATA *gb_behind = NULL;
 
292
    if (behind_idx) {
 
293
        int best_idx = INT_MAX;
 
294
        for (GBDATA *gb_tree = GB_child(gb_treedata); gb_tree; gb_tree = GB_nextChild(gb_tree)) {
 
295
            int idx = get_tree_idx(gb_tree);
 
296
            gb_assert(idx);
 
297
            if (idx>behind_idx && idx<best_idx) {
 
298
                best_idx  = idx;
 
299
                gb_behind = gb_tree;
 
300
            }
 
301
        }
 
302
    }
 
303
    return gb_behind;
 
304
}
 
305
 
 
306
inline GB_ERROR set_tree_idx(GBDATA *gb_tree, int idx) {
 
307
    GB_ERROR  error    = NULL;
 
308
    GBDATA   *gb_order = GB_entry(gb_tree, "order");
 
309
    if (!gb_order) {
 
310
        gb_order = GB_create(gb_tree, "order", GB_INT);
 
311
        if (!gb_order) error = GB_await_error();
 
312
    }
 
313
    if (!error) error = GB_write_int(gb_order, idx);
 
314
    return error;
 
315
}
 
316
 
 
317
static GB_ERROR reserve_tree_idx(GBDATA *gb_treedata, int idx) {
 
318
    GB_ERROR  error   = NULL;
 
319
    GBDATA   *gb_tree = get_tree_with_idx(gb_treedata, idx);
 
320
    if (gb_tree) {
 
321
        error = reserve_tree_idx(gb_treedata, idx+1);
 
322
        if (!error) error = set_tree_idx(gb_tree, idx+1);
 
323
    }
 
324
    return error;
 
325
}
 
326
 
 
327
static void ensure_trees_have_order(GBDATA *gb_treedata) {
 
328
    GBDATA *gb_main = GB_get_father(gb_treedata);
 
329
 
 
330
    gb_assert(GB_get_root(gb_main)       == gb_main);
 
331
    gb_assert(GBT_get_tree_data(gb_main) == gb_treedata);
 
332
 
 
333
    GB_ERROR  error              = NULL;
 
334
    GBDATA   *gb_tree_order_flag = GB_search(gb_main, "/tmp/trees_have_order", GB_INT);
 
335
 
 
336
    if (!gb_tree_order_flag) error = GB_await_error();
 
337
    else {
 
338
        if (GB_read_int(gb_tree_order_flag) == 0) { // not checked yet
 
339
            int max_idx = get_max_tree_idx(gb_treedata);
 
340
            for (GBDATA *gb_tree = GB_child(gb_treedata); gb_tree && !error; gb_tree = GB_nextChild(gb_tree)) {
 
341
                if (!get_tree_idx(gb_tree)) {
 
342
                    error = set_tree_idx(gb_tree, ++max_idx);
 
343
                }
 
344
            }
 
345
            if (!error) error = GB_write_int(gb_tree_order_flag, 1);
 
346
        }
 
347
    }
 
348
    if (error) GBK_terminatef("failed to order trees (Reason: %s)", error);
 
349
}
 
350
 
 
351
static void tree_set_default_order(GBDATA *gb_tree) {
 
352
    // if 'gb_tree' has no order yet, move it to the bottom (as done previously)
 
353
    if (!get_tree_idx(gb_tree)) {
 
354
        set_tree_idx(gb_tree, get_max_tree_idx(GB_get_father(gb_tree))+1);
 
355
    }
 
356
}
 
357
 
 
358
// -----------------------------
 
359
//      tree write functions
 
360
 
 
361
GB_ERROR GBT_write_group_name(GBDATA *gb_group_name, const char *new_group_name) {
 
362
    GB_ERROR error = 0;
 
363
    size_t   len   = strlen(new_group_name);
 
364
 
 
365
    if (len >= GB_GROUP_NAME_MAX) {
 
366
        error = GBS_global_string("Group name '%s' too long (max %i characters)", new_group_name, GB_GROUP_NAME_MAX);
 
367
    }
 
368
    else {
 
369
        error = GB_write_string(gb_group_name, new_group_name);
 
370
    }
 
371
    return error;
 
372
}
 
373
 
 
374
static GB_ERROR gbt_write_tree_nodes(GBDATA *gb_tree, GBT_TREE *node, long *startid) {
 
375
    // increments '*startid' for each inner node (not for leafs)
 
376
 
 
377
    GB_ERROR error = NULL;
 
378
 
 
379
    if (!node->is_leaf) {
 
380
        bool node_is_used = false;
 
381
 
 
382
        if (node->name && node->name[0]) {
 
383
            if (!node->gb_node) {
 
384
                node->gb_node = GB_create_container(gb_tree, "node");
 
385
                if (!node->gb_node) error = GB_await_error();
 
386
            }
 
387
            if (!error) {
 
388
                GBDATA *gb_name     = GB_search(node->gb_node, "group_name", GB_STRING);
 
389
                if (!gb_name) error = GB_await_error();
 
390
                else    error       = GBT_write_group_name(gb_name, node->name);
 
391
 
 
392
                node_is_used = true; // wrote groupname -> node is used
 
393
            }
 
394
        }
 
395
 
 
396
        if (node->gb_node && !error) {
 
397
            if (!node_is_used) {
 
398
                GBDATA *gb_nonid = GB_child(node->gb_node);
 
399
                while (gb_nonid && strcmp("id", GB_read_key_pntr(gb_nonid)) == 0) {
 
400
                    gb_nonid = GB_nextChild(gb_nonid);
 
401
                }
 
402
                if (gb_nonid) node_is_used = true; // found child that is not "id" -> node is used
 
403
            }
 
404
 
 
405
            if (node_is_used) { // set id for used nodes
 
406
                error = GBT_write_int(node->gb_node, "id", *startid);
 
407
                if (!error) GB_clear_user_flag(node->gb_node, GB_USERFLAG_GHOSTNODE); // mark node as "used"
 
408
            }
 
409
            else {          // delete unused nodes
 
410
                error = GB_delete(node->gb_node);
 
411
                if (!error) node->gb_node = 0;
 
412
            }
 
413
        }
 
414
 
 
415
        (*startid)++;
 
416
        if (!error) error = gbt_write_tree_nodes(gb_tree, node->leftson, startid);
 
417
        if (!error) error = gbt_write_tree_nodes(gb_tree, node->rightson, startid);
 
418
    }
 
419
    return error;
 
420
}
 
421
 
 
422
static char *gbt_write_tree_rek_new(const GBT_TREE *node, char *dest, long mode) {
 
423
    {
 
424
        const char *c1 = node->get_remark();
 
425
        if (c1) {
 
426
            if (mode == GBT_PUT_DATA) {
 
427
                int c;
 
428
                *(dest++) = 'R';
 
429
                while ((c = *(c1++))) {
 
430
                    if (c == 1) continue;
 
431
                    *(dest++) = c;
 
432
                }
 
433
                *(dest++) = 1;
 
434
            }
 
435
            else {
 
436
                dest += strlen(c1) + 2;
 
437
            }
 
438
        }
 
439
    }
 
440
    if (node->is_leaf) {
 
441
        if (mode == GBT_PUT_DATA) {
 
442
            *(dest++) = 'L';
 
443
            if (node->name) strcpy(dest, node->name);
 
444
 
 
445
            char *c1;
 
446
            while ((c1 = (char *)strchr(dest, 1))) {
 
447
                *c1 = 2;
 
448
            }
 
449
            dest      += strlen(dest);
 
450
            *(dest++)  = 1;
 
451
            
 
452
            return dest;
 
453
        }
 
454
        else {
 
455
            if (node->name) return dest+1+strlen(node->name)+1; // N name term
 
456
            return dest+1+1;
 
457
        }
 
458
    }
 
459
    else {
 
460
        char buffer[40];
 
461
        sprintf(buffer, "%g,%g;", node->leftlen, node->rightlen);
 
462
        if (mode == GBT_PUT_DATA) {
 
463
            *(dest++) = 'N';
 
464
            strcpy(dest, buffer);
 
465
            dest += strlen(buffer);
 
466
        }
 
467
        else {
 
468
            dest += strlen(buffer)+1;
 
469
        }
 
470
        dest = gbt_write_tree_rek_new(node->leftson,  dest, mode);
 
471
        dest = gbt_write_tree_rek_new(node->rightson, dest, mode);
 
472
        return dest;
 
473
    }
 
474
}
 
475
 
 
476
static GB_ERROR gbt_write_tree(GBDATA *gb_main, GBDATA *gb_tree, const char *tree_name, GBT_TREE *tree) {
 
477
    /*! writes a tree to the database.
 
478
     *
 
479
     * If tree is loaded by function GBT_read_tree(..) then 'tree_name' should be NULL
 
480
     * else 'gb_tree' should be set to NULL
 
481
     *
 
482
     * To copy a tree call GB_copy(dest,source);
 
483
     * or set recursively all tree->gb_node variables to zero (that unlinks the tree),
 
484
     */
 
485
 
 
486
    GB_ERROR error = 0;
 
487
 
 
488
    if (tree) {
 
489
        if (tree_name) {
 
490
            if (gb_tree) error = GBS_global_string("can't change name of existing tree (to '%s')", tree_name);
 
491
            else {
 
492
                error = GBT_check_tree_name(tree_name);
 
493
                if (!error) {
 
494
                    GBDATA *gb_tree_data = GBT_get_tree_data(gb_main);
 
495
                    gb_tree              = GB_search(gb_tree_data, tree_name, GB_CREATE_CONTAINER);
 
496
 
 
497
                    if (!gb_tree) error = GB_await_error();
 
498
                }
 
499
            }
 
500
        }
 
501
        else {
 
502
            if (!gb_tree) error = "No tree name given";
 
503
        }
 
504
 
 
505
        gb_assert(gb_tree || error);
 
506
 
 
507
        if (!error) {
 
508
            // mark all old style tree data for deletion
 
509
            GBDATA *gb_node;
 
510
            for (gb_node = GB_entry(gb_tree, "node"); gb_node; gb_node = GB_nextEntry(gb_node)) {
 
511
                GB_set_user_flag(gb_node, GB_USERFLAG_GHOSTNODE); // mark as "possibly unused"
 
512
            }
 
513
 
 
514
            // build tree-string and save to DB
 
515
            {
 
516
                char *t_size = gbt_write_tree_rek_new(tree, 0, GBT_GET_SIZE); // calc size of tree-string
 
517
                char *ctree  = (char *)GB_calloc(sizeof(char), (size_t)(t_size+1)); // allocate buffer for tree-string
 
518
 
 
519
                t_size = gbt_write_tree_rek_new(tree, ctree, GBT_PUT_DATA); // write into buffer
 
520
                *(t_size) = 0;
 
521
 
 
522
                bool was_allowed = GB_allow_compression(gb_main, false);
 
523
                error            = GBT_write_string(gb_tree, "tree", ctree);
 
524
                GB_allow_compression(gb_main, was_allowed);
 
525
                free(ctree);
 
526
            }
 
527
        }
 
528
 
 
529
        if (!error) {
 
530
            // save nodes to DB
 
531
            long size         = 0;
 
532
            error             = gbt_write_tree_nodes(gb_tree, tree, &size); // reports number of nodes in 'size'
 
533
            if (!error) error = GBT_write_int(gb_tree, "nnodes", size);
 
534
 
 
535
            if (!error) {
 
536
                GBDATA *gb_node;
 
537
                GBDATA *gb_node_next;
 
538
 
 
539
                for (gb_node = GB_entry(gb_tree, "node"); // delete all ghost nodes
 
540
                     gb_node && !error;
 
541
                     gb_node = gb_node_next)
 
542
                {
 
543
                    GBDATA *gbd = GB_entry(gb_node, "id");
 
544
                    gb_node_next = GB_nextEntry(gb_node);
 
545
                    if (!gbd || GB_user_flag(gb_node, GB_USERFLAG_GHOSTNODE)) error = GB_delete(gb_node);
 
546
                }
 
547
            }
 
548
        }
 
549
 
 
550
        if (!error) tree_set_default_order(gb_tree);
 
551
    }
 
552
 
 
553
    return error;
 
554
}
 
555
 
 
556
GB_ERROR GBT_write_tree(GBDATA *gb_main, const char *tree_name, GBT_TREE *tree) {
 
557
    return gbt_write_tree(gb_main, NULL, tree_name, tree);
 
558
}
 
559
GB_ERROR GBT_overwrite_tree(GBDATA *gb_tree, GBT_TREE *tree) {
 
560
    return gbt_write_tree(GB_get_root(gb_tree), gb_tree, NULL, tree);
 
561
}
 
562
 
 
563
static GB_ERROR write_tree_remark(GBDATA *gb_tree, const char *remark) {
 
564
    return GBT_write_string(gb_tree, "remark", remark);
 
565
}
 
566
GB_ERROR GBT_write_tree_remark(GBDATA *gb_main, const char *tree_name, const char *remark) {
 
567
    return write_tree_remark(GBT_find_tree(gb_main, tree_name), remark);
 
568
}
 
569
 
 
570
GB_ERROR GBT_log_to_tree_remark(GBDATA *gb_tree, const char *log_entry) {
 
571
    GB_ERROR    error      = NULL;
 
572
    const char *old_remark = GBT_read_char_pntr(gb_tree, "remark");
 
573
    if (!old_remark && GB_have_error()) {
 
574
        error = GB_await_error();
 
575
    }
 
576
    else {
 
577
        char *new_remark = GBS_log_dated_action_to(old_remark, log_entry);
 
578
        error            = write_tree_remark(gb_tree, new_remark);
 
579
        free(new_remark);
 
580
    }
 
581
    return error;
 
582
}
 
583
GB_ERROR GBT_log_to_tree_remark(GBDATA *gb_main, const char *tree_name, const char *log_entry) {
 
584
    return GBT_log_to_tree_remark(GBT_find_tree(gb_main, tree_name), log_entry);
 
585
}
 
586
 
 
587
GB_ERROR GBT_write_tree_with_remark(GBDATA *gb_main, const char *tree_name, GBT_TREE *tree, const char *remark) {
 
588
    GB_ERROR error              = GBT_write_tree(gb_main, tree_name, tree);
 
589
    if (!error && remark) error = GBT_write_tree_remark(gb_main, tree_name, remark);
 
590
    return error;
 
591
}
 
592
 
 
593
// ----------------------------
 
594
//      tree read functions
 
595
 
 
596
static GBT_TREE *gbt_read_tree_rek(char **data, long *startid, GBDATA **gb_tree_nodes, const TreeNodeFactory& nodeFactory, int size_of_tree, GB_ERROR& error) {
 
597
    GBT_TREE *node = NULL;
 
598
    if (!error) {
 
599
        node = nodeFactory.makeNode();
 
600
 
 
601
        char  c = *((*data)++);
 
602
        char *p1;
 
603
 
 
604
        if (c=='R') {
 
605
            p1 = strchr(*data, 1);
 
606
            *(p1++) = 0;
 
607
            node->set_remark(*data);
 
608
            c = *(p1++);
 
609
            *data = p1;
 
610
        }
 
611
 
 
612
 
 
613
        if (c=='N') {
 
614
            p1 = (char *)strchr(*data, ',');
 
615
            *(p1++) = 0;
 
616
            node->leftlen = GB_atof(*data);
 
617
            *data = p1;
 
618
            p1 = (char *)strchr(*data, ';');
 
619
            *(p1++) = 0;
 
620
            node->rightlen = GB_atof(*data);
 
621
            *data = p1;
 
622
            if ((*startid < size_of_tree) && (node->gb_node = gb_tree_nodes[*startid])) {
 
623
                GBDATA *gb_group_name = GB_entry(node->gb_node, "group_name");
 
624
                if (gb_group_name) {
 
625
                    node->name = GB_read_string(gb_group_name);
 
626
                }
 
627
            }
 
628
            (*startid)++;
 
629
            node->leftson = gbt_read_tree_rek(data, startid, gb_tree_nodes, nodeFactory, size_of_tree, error);
 
630
            if (!node->leftson) freenull(node);
 
631
            else {
 
632
                node->rightson = gbt_read_tree_rek(data, startid, gb_tree_nodes, nodeFactory, size_of_tree, error);
 
633
                if (!node->rightson) {
 
634
                    freenull(node->leftson);
 
635
                    freenull(node);
 
636
                }
 
637
                else {
 
638
                    node->leftson->father  = node;
 
639
                    node->rightson->father = node;
 
640
                }
 
641
            }
 
642
        }
 
643
        else if (c=='L') {
 
644
            node->is_leaf = true;
 
645
            p1            = (char *)strchr(*data, 1);
 
646
 
 
647
            gb_assert(p1);
 
648
            gb_assert(p1[0] == 1);
 
649
 
 
650
            *p1        = 0;
 
651
            node->name = strdup(*data);
 
652
            *data      = p1+1;
 
653
        }
 
654
        else {
 
655
            if (!c) {
 
656
                error = "Unexpected end of tree definition.";
 
657
            }
 
658
            else {
 
659
                error = GBS_global_string("Can't interpret tree definition (expected 'N' or 'L' - not '%c')", c);
 
660
            }
 
661
            freenull(node);
 
662
        }
 
663
    }
 
664
    gb_assert(contradicted(node, error));
 
665
    return node;
 
666
}
 
667
 
 
668
 
 
669
static GBT_TREE *read_tree_and_size_internal(GBDATA *gb_tree, GBDATA *gb_ctree, const TreeNodeFactory& nodeFactory, int node_count, GB_ERROR& error) {
 
670
    GBDATA   **gb_tree_nodes;
 
671
    GBT_TREE  *node = 0;
 
672
 
 
673
    gb_tree_nodes = (GBDATA **)GB_calloc(sizeof(GBDATA *), (size_t)node_count);
 
674
    if (gb_tree) {
 
675
        GBDATA *gb_node;
 
676
 
 
677
        for (gb_node = GB_entry(gb_tree, "node"); gb_node && !error; gb_node = GB_nextEntry(gb_node)) {
 
678
            long    i;
 
679
            GBDATA *gbd = GB_entry(gb_node, "id");
 
680
            if (!gbd) continue;
 
681
 
 
682
            i = GB_read_int(gbd);
 
683
            if (i<0 || i >= node_count) {
 
684
                error = "An inner node of the tree is corrupt";
 
685
            }
 
686
            else {
 
687
                gb_tree_nodes[i] = gb_node;
 
688
            }
 
689
        }
 
690
    }
 
691
    if (!error) {
 
692
        char *cptr[1];
 
693
        long  startid[1];
 
694
        char *fbuf;
 
695
 
 
696
        startid[0] = 0;
 
697
        fbuf       = cptr[0] = GB_read_string(gb_ctree);
 
698
        node       = gbt_read_tree_rek(cptr, startid, gb_tree_nodes, nodeFactory, node_count, error);
 
699
        free (fbuf);
 
700
    }
 
701
 
 
702
    free(gb_tree_nodes);
 
703
 
 
704
    gb_assert(contradicted(node, error));
 
705
    return node;
 
706
}
 
707
 
 
708
GBT_TREE *GBT_read_tree_and_size(GBDATA *gb_main, const char *tree_name, const TreeNodeFactory& nodeFactory, int *tree_size) {
 
709
    /*! Loads a tree from DB into any user defined structure.
 
710
     *
 
711
     * @param gb_main DB root node
 
712
     * @param tree_name is the name of the tree in the db
 
713
     * @param nodeFactory makes the tree-node instances
 
714
     * @param tree_size if != NULL -> gets set to "size of tree" (aka number of leafs minus 1)
 
715
     *
 
716
     * @return
 
717
     * - NULL if any error occurs (which is exported then)
 
718
     * - root of loaded tree (dynamic type depends on 'nodeFactory')
 
719
     */
 
720
 
 
721
    GB_ERROR error = 0;
 
722
 
 
723
    if (!tree_name) {
 
724
        error = "no treename given";
 
725
    }
 
726
    else {
 
727
        error = GBT_check_tree_name(tree_name);
 
728
        if (!error) {
 
729
            GBDATA *gb_tree = GBT_find_tree(gb_main, tree_name);
 
730
 
 
731
            if (!gb_tree) {
 
732
                error = "tree not found";
 
733
            }
 
734
            else {
 
735
                GBDATA *gb_nnodes = GB_entry(gb_tree, "nnodes");
 
736
                if (!gb_nnodes) {
 
737
                    error = "tree is empty";
 
738
                }
 
739
                else {
 
740
                    long size = GB_read_int(gb_nnodes);
 
741
                    if (!size) {
 
742
                        error = "has no nodes";
 
743
                    }
 
744
                    else {
 
745
                        GBDATA *gb_ctree = GB_search(gb_tree, "tree", GB_FIND);
 
746
                        if (!gb_ctree) {
 
747
                            error = "old unsupported tree format";
 
748
                        }
 
749
                        else { // "new" style tree
 
750
                            GBT_TREE *tree = read_tree_and_size_internal(gb_tree, gb_ctree, nodeFactory, size, error);
 
751
                            if (!error) {
 
752
                                gb_assert(tree);
 
753
                                if (tree_size) *tree_size = size; // return size of tree (=leafs-1)
 
754
                                tree->announce_tree_constructed();
 
755
                                return tree;
 
756
                            }
 
757
 
 
758
                            gb_assert(!tree);
 
759
                        }
 
760
                    }
 
761
                }
 
762
            }
 
763
        }
 
764
    }
 
765
 
 
766
    gb_assert(error);
 
767
    GB_export_errorf("Failed to read tree '%s' (Reason: %s)", tree_name, error);
 
768
    return NULL;
 
769
}
 
770
 
 
771
GBT_TREE *GBT_read_tree(GBDATA *gb_main, const char *tree_name, const TreeNodeFactory& nodeFactory) {
 
772
    //! @see GBT_read_tree_and_size()
 
773
    return GBT_read_tree_and_size(gb_main, tree_name, nodeFactory, 0);
 
774
}
 
775
 
 
776
size_t GBT_count_leafs(const GBT_TREE *tree) {
 
777
    if (tree->is_leaf) {
 
778
        return 1;
 
779
    }
 
780
    return GBT_count_leafs(tree->leftson) + GBT_count_leafs(tree->rightson);
 
781
}
 
782
 
 
783
static GB_ERROR gbt_invalid_because(const GBT_TREE *tree, const char *reason) {
 
784
    return GBS_global_string("((GBT_TREE*)0x%p) %s", tree, reason);
 
785
}
 
786
 
 
787
inline bool has_son(const GBT_TREE *father, const GBT_TREE *son) {
 
788
    return !father->is_leaf && (father->leftson == son || father->rightson == son);
 
789
}
 
790
 
 
791
static GB_ERROR gbt_is_invalid(bool is_root, const GBT_TREE *tree) {
 
792
    if (tree->father) {
 
793
        if (!has_son(tree->father, tree)) return gbt_invalid_because(tree, "is not son of its father");
 
794
    }
 
795
    else {
 
796
        if (!is_root) return gbt_invalid_because(tree, "has no father (but isn't root)");
 
797
    }
 
798
 
 
799
    GB_ERROR error = NULL;
 
800
    if (tree->is_leaf) {
 
801
        if (tree->leftson) return gbt_invalid_because(tree, "is leaf, but has leftson");
 
802
        if (tree->rightson) return gbt_invalid_because(tree, "is leaf, but has rightson");
 
803
    }
 
804
    else {
 
805
        if (!tree->leftson) return gbt_invalid_because(tree, "is inner node, but has no leftson");
 
806
        if (!tree->rightson) return gbt_invalid_because(tree, "is inner node, but has no rightson");
 
807
 
 
808
        error             = gbt_is_invalid(false, tree->leftson);
 
809
        if (!error) error = gbt_is_invalid(false, tree->rightson);
 
810
    }
 
811
    return error;
 
812
}
 
813
 
 
814
GB_ERROR GBT_is_invalid(const GBT_TREE *tree) {
 
815
    if (tree->father) return gbt_invalid_because(tree, "is expected to be the root-node, but has father");
 
816
    if (tree->is_leaf) return gbt_invalid_because(tree, "is expected to be the root-node, but is a leaf (tree too small)");
 
817
    return gbt_is_invalid(true, tree);
 
818
}
 
819
 
 
820
// -------------------------------------------
 
821
//      link the tree tips to the database
 
822
 
 
823
struct link_tree_data {
 
824
    GB_HASH      *species_hash;
 
825
    GB_HASH      *seen_species;                     // used to count duplicates
 
826
    arb_progress *progress;
 
827
    int           zombies;                          // counts zombies
 
828
    int           duplicates;                       // counts duplicates
 
829
};
 
830
 
 
831
static GB_ERROR gbt_link_tree_to_hash_rek(GBT_TREE *tree, link_tree_data *ltd) {
 
832
    GB_ERROR error = 0;
 
833
    if (tree->is_leaf) {
 
834
        tree->gb_node = 0;
 
835
        if (tree->name) {
 
836
            GBDATA *gbd = (GBDATA*)GBS_read_hash(ltd->species_hash, tree->name);
 
837
            if (gbd) tree->gb_node = gbd;
 
838
            else ltd->zombies++;
 
839
 
 
840
            if (ltd->seen_species) {
 
841
                if (GBS_read_hash(ltd->seen_species, tree->name)) ltd->duplicates++;
 
842
                else GBS_write_hash(ltd->seen_species, tree->name, 1);
 
843
            }
 
844
        }
 
845
 
 
846
        if (ltd->progress) ++(*ltd->progress);
 
847
    }
 
848
    else {
 
849
        error = gbt_link_tree_to_hash_rek(tree->leftson, ltd);
 
850
        if (!error) error = gbt_link_tree_to_hash_rek(tree->rightson, ltd);
 
851
    }
 
852
    return error;
 
853
}
 
854
 
 
855
static GB_ERROR GBT_link_tree_using_species_hash(GBT_TREE *tree, bool show_status, GB_HASH *species_hash, int *zombies, int *duplicates) {
 
856
    GB_ERROR       error;
 
857
    link_tree_data ltd;
 
858
    long           leafs = 0;
 
859
 
 
860
    if (duplicates || show_status) {
 
861
        leafs = GBT_count_leafs(tree);
 
862
    }
 
863
 
 
864
    ltd.species_hash = species_hash;
 
865
    ltd.seen_species = leafs ? GBS_create_hash(leafs, GB_IGNORE_CASE) : 0;
 
866
    ltd.zombies      = 0;
 
867
    ltd.duplicates   = 0;
 
868
 
 
869
    if (show_status) {
 
870
        ltd.progress = new arb_progress("Relinking tree to database", leafs);
 
871
    }
 
872
    else {
 
873
        ltd.progress = NULL;
 
874
    }
 
875
 
 
876
    error = gbt_link_tree_to_hash_rek(tree, &ltd);
 
877
    if (ltd.seen_species) GBS_free_hash(ltd.seen_species);
 
878
 
 
879
    if (zombies) *zombies       = ltd.zombies;
 
880
    if (duplicates) *duplicates = ltd.duplicates;
 
881
 
 
882
    delete ltd.progress;
 
883
 
 
884
    return error;
 
885
}
 
886
 
 
887
GB_ERROR GBT_link_tree(GBT_TREE *tree, GBDATA *gb_main, bool show_status, int *zombies, int *duplicates) {
 
888
    /*! Link a given tree to the database. That means that for all tips the member
 
889
     * 'gb_node' is set to the database container holding the species data.
 
890
     *
 
891
     * @param tree which will be linked to DB
 
892
     * @param gb_main DB root node
 
893
     * @param show_status show a progress indicator?
 
894
     * @param zombies if != NULL -> set to number of zombies (aka non-existing species) in tree
 
895
     * @param duplicates if != NULL -> set to number of duplicated species in tree
 
896
     *
 
897
     * @return error on failure
 
898
     *
 
899
     * @see GBT_unlink_tree()
 
900
     */
 
901
 
 
902
    GB_HASH  *species_hash = GBT_create_species_hash(gb_main);
 
903
    GB_ERROR  error        = GBT_link_tree_using_species_hash(tree, show_status, species_hash, zombies, duplicates);
 
904
 
 
905
    GBS_free_hash(species_hash);
 
906
 
 
907
    return error;
 
908
}
 
909
 
 
910
void GBT_unlink_tree(GBT_TREE *tree) {
 
911
    /*! Unlink tree from the database.
 
912
     * @see GBT_link_tree()
 
913
     */
 
914
    tree->gb_node = 0;
 
915
    if (!tree->is_leaf) {
 
916
        GBT_unlink_tree(tree->leftson);
 
917
        GBT_unlink_tree(tree->rightson);
 
918
    }
 
919
}
 
920
 
 
921
// ----------------------
 
922
//      search trees
 
923
 
 
924
GBDATA *GBT_find_tree(GBDATA *gb_main, const char *tree_name) {
 
925
    /*! @return
 
926
     * - DB tree container associated with tree_name
 
927
     * - NULL if no such tree exists
 
928
     */
 
929
    return GB_entry(GBT_get_tree_data(gb_main), tree_name);
 
930
}
 
931
 
 
932
inline bool is_tree(GBDATA *gb_tree) {
 
933
    if (!gb_tree) return false;
 
934
    GBDATA *gb_tree_data = GB_get_father(gb_tree);
 
935
    return gb_tree_data && GB_has_key(gb_tree_data, "tree_data");
 
936
}
 
937
 
 
938
inline GBDATA *get_first_tree(GBDATA *gb_main) {
 
939
    return GB_child(GBT_get_tree_data(gb_main));
 
940
}
 
941
 
 
942
inline GBDATA *get_next_tree(GBDATA *gb_tree) {
 
943
    if (!gb_tree) return NULL;
 
944
    gb_assert(is_tree(gb_tree));
 
945
    return GB_nextChild(gb_tree);
 
946
}
 
947
 
 
948
GBDATA *GBT_find_largest_tree(GBDATA *gb_main) {
 
949
    long    maxnodes   = 0;
 
950
    GBDATA *gb_largest = NULL;
 
951
 
 
952
    for (GBDATA *gb_tree = get_first_tree(gb_main); gb_tree; gb_tree = get_next_tree(gb_tree)) {
 
953
        long *nnodes = GBT_read_int(gb_tree, "nnodes");
 
954
        if (nnodes && *nnodes>maxnodes) {
 
955
            gb_largest = gb_tree;
 
956
            maxnodes   = *nnodes;
 
957
        }
 
958
    }
 
959
    return gb_largest;
 
960
}
 
961
 
 
962
GBDATA *GBT_tree_infrontof(GBDATA *gb_tree) {
 
963
    GBDATA *gb_treedata = GB_get_father(gb_tree);
 
964
    ensure_trees_have_order(gb_treedata);
 
965
    return get_tree_infrontof_idx(gb_treedata, get_tree_idx(gb_tree));
 
966
}
 
967
GBDATA *GBT_tree_behind(GBDATA *gb_tree) {
 
968
    GBDATA *gb_treedata = GB_get_father(gb_tree);
 
969
    ensure_trees_have_order(gb_treedata);
 
970
    return get_tree_behind_idx(gb_treedata, get_tree_idx(gb_tree));
 
971
}
 
972
 
 
973
GBDATA *GBT_find_top_tree(GBDATA *gb_main) {
 
974
    GBDATA *gb_treedata = GBT_get_tree_data(gb_main);
 
975
    ensure_trees_have_order(gb_treedata);
 
976
 
 
977
    GBDATA *gb_top = get_tree_with_idx(gb_treedata, 1);
 
978
    if (!gb_top) gb_top = get_tree_behind_idx(gb_treedata, 1);
 
979
    return gb_top;
 
980
}
 
981
GBDATA *GBT_find_bottom_tree(GBDATA *gb_main) {
 
982
    GBDATA *gb_treedata = GBT_get_tree_data(gb_main);
 
983
    ensure_trees_have_order(gb_treedata);
 
984
    return get_tree_infrontof_idx(gb_treedata, INT_MAX);
 
985
}
 
986
 
 
987
const char *GBT_existing_tree(GBDATA *gb_main, const char *tree_name) {
 
988
    // search for a specify existing tree (and fallback to any existing)
 
989
    GBDATA *gb_tree       = GBT_find_tree(gb_main, tree_name);
 
990
    if (!gb_tree) gb_tree = get_first_tree(gb_main);
 
991
    return GBT_get_tree_name(gb_tree);
 
992
}
 
993
 
 
994
GBDATA *GBT_find_next_tree(GBDATA *gb_tree) {
 
995
    GBDATA *gb_other = NULL;
 
996
    if (gb_tree) {
 
997
        gb_other = GBT_tree_behind(gb_tree);
 
998
        if (!gb_other) {
 
999
            gb_other = GBT_find_top_tree(GB_get_root(gb_tree));
 
1000
            if (gb_other == gb_tree) gb_other = NULL;
 
1001
        }
 
1002
    }
 
1003
    gb_assert(gb_other != gb_tree);
 
1004
    return gb_other;
 
1005
}
 
1006
 
 
1007
// --------------------
 
1008
//      tree names
 
1009
 
 
1010
const char *GBT_get_tree_name(GBDATA *gb_tree) {
 
1011
    if (!gb_tree) return NULL;
 
1012
    gb_assert(is_tree(gb_tree));
 
1013
    return GB_read_key_pntr(gb_tree);
 
1014
}
 
1015
 
 
1016
GB_ERROR GBT_check_tree_name(const char *tree_name) {
 
1017
    GB_ERROR error = GB_check_key(tree_name);
 
1018
    if (!error) {
 
1019
        if (strncmp(tree_name, "tree_", 5) != 0) {
 
1020
            error = "has to start with 'tree_'";
 
1021
        }
 
1022
    }
 
1023
    if (error) {
 
1024
        error = GBS_global_string("not a valid treename '%s' (Reason: %s)", tree_name, error);
 
1025
    }
 
1026
    return error;
 
1027
}
 
1028
 
 
1029
const char *GBT_name_of_largest_tree(GBDATA *gb_main) {
 
1030
    return GBT_get_tree_name(GBT_find_largest_tree(gb_main));
 
1031
}
 
1032
 
 
1033
const char *GBT_name_of_bottom_tree(GBDATA *gb_main) {
 
1034
    return GBT_get_tree_name(GBT_find_bottom_tree(gb_main));
 
1035
}
 
1036
 
 
1037
// -------------------
 
1038
//      tree info
 
1039
 
 
1040
const char *GBT_tree_info_string(GBDATA *gb_main, const char *tree_name, int maxTreeNameLen) {
 
1041
    // maxTreeNameLen shall be the max len of the longest tree name (or -1 -> do not format)
 
1042
 
 
1043
    const char *result  = 0;
 
1044
    GBDATA     *gb_tree = GBT_find_tree(gb_main, tree_name);
 
1045
 
 
1046
    if (!gb_tree) {
 
1047
        GB_export_errorf("tree '%s' not found", tree_name);
 
1048
    }
 
1049
    else {
 
1050
        GBDATA *gb_nnodes = GB_entry(gb_tree, "nnodes");
 
1051
        if (!gb_nnodes) {
 
1052
            GB_export_errorf("nnodes not found in tree '%s'", tree_name);
 
1053
        }
 
1054
        else {
 
1055
            const char *sizeInfo = GBS_global_string("(%li:%i)", GB_read_int(gb_nnodes)+1, GB_read_security_write(gb_tree));
 
1056
            GBDATA     *gb_rem   = GB_entry(gb_tree, "remark");
 
1057
            int         len;
 
1058
 
 
1059
            if (maxTreeNameLen == -1) {
 
1060
                result = GBS_global_string("%s %11s", tree_name, sizeInfo);
 
1061
                len    = strlen(tree_name);
 
1062
            }
 
1063
            else {
 
1064
                result = GBS_global_string("%-*s %11s", maxTreeNameLen, tree_name, sizeInfo);
 
1065
                len    = maxTreeNameLen;
 
1066
            }
 
1067
            if (gb_rem) {
 
1068
                const char *remark    = GB_read_char_pntr(gb_rem);
 
1069
                const int   remarkLen = 800;
 
1070
                char       *res2      = GB_give_other_buffer(remark, len+1+11+2+remarkLen+1);
 
1071
 
 
1072
                strcpy(res2, result);
 
1073
                strcat(res2, "  ");
 
1074
                strncat(res2, remark, remarkLen);
 
1075
 
 
1076
                result = res2;
 
1077
            }
 
1078
        }
 
1079
    }
 
1080
    return result;
 
1081
}
 
1082
 
 
1083
long GBT_size_of_tree(GBDATA *gb_main, const char *tree_name) {
 
1084
    // return the number of inner nodes in binary tree (or -1 if unknown)
 
1085
    // Note:
 
1086
    //        leafs                        = size + 1
 
1087
    //        inner nodes in unrooted tree = size - 1
 
1088
 
 
1089
    long    nnodes  = -1;
 
1090
    GBDATA *gb_tree = GBT_find_tree(gb_main, tree_name);
 
1091
    if (gb_tree) {
 
1092
        GBDATA *gb_nnodes = GB_entry(gb_tree, "nnodes");
 
1093
        if (gb_nnodes) {
 
1094
            nnodes = GB_read_int(gb_nnodes);
 
1095
        }
 
1096
    }
 
1097
    return nnodes;
 
1098
}
 
1099
 
 
1100
 
 
1101
struct indexed_name {
 
1102
    int         idx;
 
1103
    const char *name;
 
1104
 
 
1105
    bool operator<(const indexed_name& other) const { return idx < other.idx; }
 
1106
};
 
1107
 
 
1108
void GBT_get_tree_names(ConstStrArray& names, GBDATA *gb_main, bool sorted) {
 
1109
    // stores tree names in 'names'
 
1110
 
 
1111
    GBDATA *gb_treedata = GBT_get_tree_data(gb_main);
 
1112
    ensure_trees_have_order(gb_treedata);
 
1113
 
 
1114
    long tree_count = GB_number_of_subentries(gb_treedata);
 
1115
 
 
1116
    names.reserve(tree_count);
 
1117
    typedef std::set<indexed_name> ordered_trees;
 
1118
    ordered_trees trees;
 
1119
 
 
1120
    {
 
1121
        int t     = 0;
 
1122
        int count = 0;
 
1123
        for (GBDATA *gb_tree = GB_child(gb_treedata); gb_tree; gb_tree = GB_nextChild(gb_tree), ++t) {
 
1124
            indexed_name iname;
 
1125
            iname.name = GB_read_key_pntr(gb_tree);
 
1126
            iname.idx  = sorted ? get_tree_idx(gb_tree) : ++count;
 
1127
 
 
1128
            trees.insert(iname);
 
1129
        }
 
1130
    }
 
1131
 
 
1132
    if (tree_count != (long)trees.size()) { // there are duplicated "order" entries
 
1133
        gb_assert(sorted); // should not happen in unsorted mode
 
1134
 
 
1135
        typedef std::set<int> ints;
 
1136
 
 
1137
        ints    used_indices;
 
1138
        GBDATA *gb_first_tree = GB_child(gb_treedata);
 
1139
        GBDATA *gb_tree       = gb_first_tree;
 
1140
 
 
1141
        while (gb_tree) {
 
1142
            int idx = get_tree_idx(gb_tree);
 
1143
            if (used_indices.find(idx) != used_indices.end()) { // duplicate order
 
1144
                GB_ERROR error    = reserve_tree_idx(gb_treedata, idx+1);
 
1145
                if (!error) error = set_tree_idx(gb_tree, idx+1);
 
1146
                if (error) GBK_terminatef("failed to fix tree-order (Reason: %s)", error);
 
1147
 
 
1148
                // now restart
 
1149
                used_indices.clear();
 
1150
                gb_tree = gb_first_tree;
 
1151
            }
 
1152
            else {
 
1153
                used_indices.insert(idx);
 
1154
                gb_tree = GB_nextChild(gb_tree);
 
1155
            }
 
1156
        }
 
1157
        GBT_get_tree_names(names, gb_main, sorted);
 
1158
        return;
 
1159
    }
 
1160
 
 
1161
    for (ordered_trees::const_iterator t = trees.begin(); t != trees.end(); ++t) {
 
1162
        names.put(t->name);
 
1163
    }
 
1164
}
 
1165
 
 
1166
NOT4PERL GB_ERROR GBT_move_tree(GBDATA *gb_moved_tree, GBT_ORDER_MODE mode, GBDATA *gb_target_tree) {
 
1167
    // moves 'gb_moved_tree' next to 'gb_target_tree' (only changes tree-order) 
 
1168
    gb_assert(gb_moved_tree && gb_target_tree);
 
1169
 
 
1170
    GBDATA *gb_treedata = GB_get_father(gb_moved_tree);
 
1171
    ensure_trees_have_order(gb_treedata);
 
1172
 
 
1173
    int target_idx = get_tree_idx(gb_target_tree);
 
1174
    gb_assert(target_idx);
 
1175
 
 
1176
    if (mode == GBT_BEHIND) target_idx++;
 
1177
 
 
1178
    GB_ERROR error    = reserve_tree_idx(gb_treedata, target_idx);
 
1179
    if (!error) error = set_tree_idx(gb_moved_tree, target_idx);
 
1180
 
 
1181
    return error;
 
1182
}
 
1183
 
 
1184
static GBDATA *get_source_and_check_target_tree(GBDATA *gb_main, const char *source_tree, const char *dest_tree, GB_ERROR& error) {
 
1185
    GBDATA *gb_source_tree = NULL;
 
1186
 
 
1187
    error             = GBT_check_tree_name(source_tree);
 
1188
    if (!error) error = GBT_check_tree_name(dest_tree);
 
1189
 
 
1190
    if (error && strcmp(source_tree, NO_TREE_SELECTED) == 0) {
 
1191
        error = "No tree selected";
 
1192
    }
 
1193
 
 
1194
    if (!error && strcmp(source_tree, dest_tree) == 0) error = "source- and dest-tree are the same";
 
1195
 
 
1196
    if (!error) {
 
1197
        gb_source_tree = GBT_find_tree(gb_main, source_tree);
 
1198
        if (!gb_source_tree) error = GBS_global_string("tree '%s' not found", source_tree);
 
1199
        else {
 
1200
            GBDATA *gb_dest_tree = GBT_find_tree(gb_main, dest_tree);
 
1201
            if (gb_dest_tree) {
 
1202
                error = GBS_global_string("tree '%s' already exists", dest_tree);
 
1203
                gb_source_tree = NULL;
 
1204
            }
 
1205
        }
 
1206
    }
 
1207
 
 
1208
    gb_assert(contradicted(error, gb_source_tree));
 
1209
    return gb_source_tree;
 
1210
}
 
1211
 
 
1212
static GBDATA *copy_tree_container(GBDATA *gb_source_tree, const char *newName, GB_ERROR& error) {
 
1213
    GBDATA *gb_treedata  = GB_get_father(gb_source_tree);
 
1214
    GBDATA *gb_dest_tree = GB_create_container(gb_treedata, newName);
 
1215
 
 
1216
    if (!gb_dest_tree) error = GB_await_error();
 
1217
    else error               = GB_copy(gb_dest_tree, gb_source_tree);
 
1218
 
 
1219
    gb_assert(contradicted(error, gb_dest_tree));
 
1220
    return gb_dest_tree;
 
1221
}
 
1222
 
 
1223
GB_ERROR GBT_copy_tree(GBDATA *gb_main, const char *source_name, const char *dest_name) {
 
1224
    GB_ERROR  error;
 
1225
    GBDATA   *gb_source_tree = get_source_and_check_target_tree(gb_main, source_name, dest_name, error);
 
1226
 
 
1227
    if (gb_source_tree) {
 
1228
        GBDATA *gb_dest_tree = copy_tree_container(gb_source_tree, dest_name, error);
 
1229
        if (gb_dest_tree) {
 
1230
            int source_idx = get_tree_idx(gb_source_tree);
 
1231
            int dest_idx   = source_idx+1;
 
1232
 
 
1233
            error             = reserve_tree_idx(GB_get_father(gb_dest_tree), dest_idx);
 
1234
            if (!error) error = set_tree_idx(gb_dest_tree, dest_idx);
 
1235
        }
 
1236
    }
 
1237
 
 
1238
    return error;
 
1239
}
 
1240
 
 
1241
GB_ERROR GBT_rename_tree(GBDATA *gb_main, const char *source_name, const char *dest_name) {
 
1242
    GB_ERROR  error;
 
1243
    GBDATA   *gb_source_tree = get_source_and_check_target_tree(gb_main, source_name, dest_name, error);
 
1244
 
 
1245
    if (gb_source_tree) {
 
1246
        GBDATA *gb_dest_tree = copy_tree_container(gb_source_tree, dest_name, error);
 
1247
        if (gb_dest_tree) error = GB_delete(gb_source_tree);
 
1248
    }
 
1249
 
 
1250
    return error;
 
1251
}
 
1252
 
 
1253
static GB_CSTR *fill_species_name_array(GB_CSTR *current, const GBT_TREE *tree) {
 
1254
    if (tree->is_leaf) {
 
1255
        current[0] = tree->name;
 
1256
        return current+1;
 
1257
    }
 
1258
    current = fill_species_name_array(current, tree->leftson);
 
1259
    current = fill_species_name_array(current, tree->rightson);
 
1260
    return current;
 
1261
}
 
1262
 
 
1263
GB_CSTR *GBT_get_names_of_species_in_tree(const GBT_TREE *tree, size_t *count) {
 
1264
    /* creates an array of all species names in a tree,
 
1265
     * The names are not allocated (so they may change as side effect of renaming species) */
 
1266
 
 
1267
    size_t   size   = GBT_count_leafs(tree);
 
1268
    GB_CSTR *result = (GB_CSTR *)GB_calloc(sizeof(char *), size + 1);
 
1269
    
 
1270
    IF_ASSERTION_USED(GB_CSTR *check =) fill_species_name_array(result, tree);
 
1271
    gb_assert(check - size == result);
 
1272
 
 
1273
    if (count) *count = size;
 
1274
 
 
1275
    return result;
 
1276
}
 
1277
 
 
1278
static void tree2newick(const GBT_TREE *tree, GBS_strstruct& out, NewickFormat format) {
 
1279
    gb_assert(tree);
 
1280
    if (tree->is_leaf) {
 
1281
        out.cat(tree->name);
 
1282
    }
 
1283
    else {
 
1284
        out.put('(');
 
1285
        tree2newick(tree->leftson, out, format);
 
1286
        out.put(',');
 
1287
        tree2newick(tree->rightson, out, format);
 
1288
        out.put(')');
 
1289
 
 
1290
        if (format & (nGROUP|nREMARK)) {
 
1291
            const char *remark = format&nREMARK ? tree->get_remark() : NULL;
 
1292
            const char *group  = format&nGROUP ? tree->name : NULL;
 
1293
 
 
1294
            if (remark || group) {
 
1295
                out.put('\'');
 
1296
                if (remark) {
 
1297
                    out.cat(remark);
 
1298
                    if (group) out.put(':');
 
1299
                }
 
1300
                if (group) out.cat(group);
 
1301
                out.put('\'');
 
1302
            }
 
1303
        }
 
1304
    }
 
1305
 
 
1306
    if (format&nLENGTH && !tree->is_root_node()) {
 
1307
        out.put(':');
 
1308
        out.nprintf(10, "%5.3f", tree->get_branchlength());
 
1309
    }
 
1310
}
 
1311
 
 
1312
char *GBT_tree_2_newick(const GBT_TREE *tree, NewickFormat format) {
 
1313
    GBS_strstruct out(1000);
 
1314
    if (tree) tree2newick(tree, out, format);
 
1315
    out.put(';');
 
1316
    return out.release();
 
1317
}
 
1318
 
 
1319
 
 
1320
// --------------------------------------------------------------------------------
 
1321
 
 
1322
#ifdef UNIT_TESTS
 
1323
#include <test_unit.h>
 
1324
 
 
1325
static const char *getTreeOrder(GBDATA *gb_main) {
 
1326
    ConstStrArray names;
 
1327
    GBT_get_tree_names(names, gb_main, true);
 
1328
 
 
1329
    char *joined         = GBT_join_names(names, '|');
 
1330
    char *size_and_names = GBS_global_string_copy("%zu:%s", names.size(), joined);
 
1331
    free(joined);
 
1332
 
 
1333
    RETURN_LOCAL_ALLOC(size_and_names);
 
1334
}
 
1335
 
 
1336
void TEST_tree_names() {
 
1337
    TEST_EXPECT_ERROR_CONTAINS(GBT_check_tree_name(""),               "not a valid treename");
 
1338
    TEST_EXPECT_ERROR_CONTAINS(GBT_check_tree_name("not_a_treename"), "not a valid treename");
 
1339
    TEST_EXPECT_ERROR_CONTAINS(GBT_check_tree_name("tree_bad.dot"),   "not a valid treename");
 
1340
 
 
1341
    TEST_EXPECT_NO_ERROR(GBT_check_tree_name("tree_")); // ugly but ok
 
1342
    TEST_EXPECT_NO_ERROR(GBT_check_tree_name("tree_ok"));
 
1343
}
 
1344
 
 
1345
void TEST_tree_contraints() {
 
1346
    // test minima
 
1347
    const int MIN_LEAFS = 2;
 
1348
 
 
1349
    TEST_EXPECT_EQUAL(leafs_2_nodes(MIN_LEAFS, ROOTED), 3);
 
1350
    TEST_EXPECT_EQUAL(leafs_2_nodes(MIN_LEAFS, UNROOTED), 2);
 
1351
    TEST_EXPECT_EQUAL(leafs_2_edges(MIN_LEAFS, ROOTED), 2);
 
1352
    TEST_EXPECT_EQUAL(leafs_2_edges(MIN_LEAFS, UNROOTED), 1);
 
1353
 
 
1354
    TEST_EXPECT_EQUAL(MIN_LEAFS, nodes_2_leafs(3, ROOTED));   // test minimum (3 nodes rooted)
 
1355
    TEST_EXPECT_EQUAL(MIN_LEAFS, nodes_2_leafs(2, UNROOTED)); // test minimum (2 nodes unrooted)
 
1356
 
 
1357
    TEST_EXPECT_EQUAL(MIN_LEAFS, edges_2_leafs(2, ROOTED));   // test minimum (2 edges rooted)
 
1358
    TEST_EXPECT_EQUAL(MIN_LEAFS, edges_2_leafs(1, UNROOTED)); // test minimum (1 edge unrooted)
 
1359
 
 
1360
    // test inverse functions:
 
1361
    for (int i = 3; i<=7; ++i) {
 
1362
        // test "leaf->XXX" and back conversions (any number of leafs is possible)
 
1363
        TEST_EXPECT_EQUAL(i, nodes_2_leafs(leafs_2_nodes(i, ROOTED), ROOTED));
 
1364
        TEST_EXPECT_EQUAL(i, nodes_2_leafs(leafs_2_nodes(i, UNROOTED), UNROOTED));
 
1365
 
 
1366
        TEST_EXPECT_EQUAL(i, edges_2_leafs(leafs_2_edges(i, ROOTED), ROOTED));
 
1367
        TEST_EXPECT_EQUAL(i, edges_2_leafs(leafs_2_edges(i, UNROOTED), UNROOTED));
 
1368
 
 
1369
        bool odd = i%2;
 
1370
        if (odd) {
 
1371
            TEST_EXPECT_EQUAL(i, leafs_2_nodes(nodes_2_leafs(i, ROOTED), ROOTED)); // rooted trees only contain odd numbers of nodes
 
1372
            TEST_EXPECT_EQUAL(i, leafs_2_edges(edges_2_leafs(i, UNROOTED), UNROOTED)); // unrooted trees only contain odd numbers of edges
 
1373
        }
 
1374
        else { // even
 
1375
            TEST_EXPECT_EQUAL(i, leafs_2_nodes(nodes_2_leafs(i, UNROOTED), UNROOTED)); // unrooted trees only contain even numbers of nodes
 
1376
            TEST_EXPECT_EQUAL(i, leafs_2_edges(edges_2_leafs(i, ROOTED), ROOTED)); // rooted trees only contain even numbers of edges
 
1377
        }
 
1378
 
 
1379
        // test adding a leaf adds two nodes:
 
1380
        int added = i+1;
 
1381
        TEST_EXPECT_EQUAL(leafs_2_nodes(added, ROOTED)-leafs_2_nodes(i, ROOTED), 2);
 
1382
        TEST_EXPECT_EQUAL(leafs_2_nodes(added, UNROOTED)-leafs_2_nodes(i, UNROOTED), 2);
 
1383
    }
 
1384
 
 
1385
 
 
1386
}
 
1387
 
 
1388
void TEST_copy_rename_delete_tree_order() {
 
1389
    GB_shell  shell;
 
1390
    GBDATA   *gb_main = GB_open("TEST_trees.arb", "r");
 
1391
 
 
1392
    {
 
1393
        GB_transaction ta(gb_main);
 
1394
 
 
1395
        {
 
1396
            TEST_EXPECT_NULL(GBT_get_tree_name(NULL));
 
1397
            
 
1398
            TEST_EXPECT_EQUAL(GBT_name_of_largest_tree(gb_main), "tree_removal");
 
1399
 
 
1400
            TEST_EXPECT_EQUAL(GBT_get_tree_name(GBT_find_top_tree(gb_main)), "tree_test");
 
1401
            TEST_EXPECT_EQUAL(GBT_name_of_bottom_tree(gb_main), "tree_removal");
 
1402
 
 
1403
            long inner_nodes = GBT_size_of_tree(gb_main, "tree_nj_bs");
 
1404
            TEST_EXPECT_EQUAL(inner_nodes, 5);
 
1405
            TEST_EXPECT_EQUAL(GBT_tree_info_string(gb_main, "tree_nj_bs", -1), "tree_nj_bs       (6:0)  PRG=dnadist CORR=none FILTER=none PKG=ARB");
 
1406
            TEST_EXPECT_EQUAL(GBT_tree_info_string(gb_main, "tree_nj_bs", 20), "tree_nj_bs                 (6:0)  PRG=dnadist CORR=none FILTER=none PKG=ARB");
 
1407
 
 
1408
            {
 
1409
                GBT_TREE *tree = GBT_read_tree(gb_main, "tree_nj_bs", GBT_TREE_NodeFactory());
 
1410
 
 
1411
                TEST_REJECT_NULL(tree);
 
1412
 
 
1413
                size_t leaf_count = GBT_count_leafs(tree);
 
1414
 
 
1415
                size_t   species_count;
 
1416
                GB_CSTR *species = GBT_get_names_of_species_in_tree(tree, &species_count);
 
1417
 
 
1418
                StrArray species2;
 
1419
                for (int i = 0; species[i]; ++i) species2.put(strdup(species[i]));
 
1420
 
 
1421
                TEST_EXPECT_EQUAL(species_count, leaf_count);
 
1422
                TEST_EXPECT_EQUAL(long(species_count), inner_nodes+1);
 
1423
 
 
1424
                {
 
1425
                    char *joined = GBT_join_names(species2, '*');
 
1426
                    TEST_EXPECT_EQUAL(joined, "CloButyr*CloButy2*CorGluta*CorAquat*CurCitre*CytAquat");
 
1427
                    free(joined);
 
1428
                }
 
1429
 
 
1430
                free(species);
 
1431
 
 
1432
                TEST_EXPECT_NEWICK(nSIMPLE, tree, "(CloButyr,(CloButy2,((CorGluta,(CorAquat,CurCitre)),CytAquat)));");
 
1433
                TEST_EXPECT_NEWICK(nSIMPLE, NULL, ";");
 
1434
 
 
1435
                delete tree;
 
1436
            }
 
1437
 
 
1438
            TEST_EXPECT_EQUAL(GBT_existing_tree(gb_main, "tree_nj_bs"), "tree_nj_bs");
 
1439
            TEST_EXPECT_EQUAL(GBT_existing_tree(gb_main, "tree_nosuch"), "tree_test");
 
1440
        }
 
1441
 
 
1442
        // changing tree order
 
1443
        {
 
1444
            TEST_EXPECT_EQUAL(getTreeOrder(gb_main), "5:tree_test|tree_tree2|tree_nj|tree_nj_bs|tree_removal");
 
1445
 
 
1446
            GBDATA *gb_test    = GBT_find_tree(gb_main, "tree_test");
 
1447
            GBDATA *gb_tree2   = GBT_find_tree(gb_main, "tree_tree2");
 
1448
            GBDATA *gb_nj      = GBT_find_tree(gb_main, "tree_nj");
 
1449
            GBDATA *gb_nj_bs   = GBT_find_tree(gb_main, "tree_nj_bs");
 
1450
            GBDATA *gb_removal = GBT_find_tree(gb_main, "tree_removal");
 
1451
 
 
1452
            TEST_EXPECT_NO_ERROR(GBT_move_tree(gb_test, GBT_BEHIND, gb_removal)); // move to bottom
 
1453
            TEST_EXPECT_EQUAL(getTreeOrder(gb_main), "5:tree_tree2|tree_nj|tree_nj_bs|tree_removal|tree_test");
 
1454
 
 
1455
            TEST_EXPECT_EQUAL(GBT_tree_behind(gb_tree2), gb_nj);
 
1456
            TEST_EXPECT_EQUAL(GBT_tree_behind(gb_nj), gb_nj_bs);
 
1457
            TEST_EXPECT_EQUAL(GBT_tree_behind(gb_nj_bs), gb_removal);
 
1458
            TEST_EXPECT_EQUAL(GBT_tree_behind(gb_removal), gb_test);
 
1459
            TEST_EXPECT_NULL(GBT_tree_behind(gb_test));
 
1460
 
 
1461
            TEST_EXPECT_NULL(GBT_tree_infrontof(gb_tree2));
 
1462
            TEST_EXPECT_EQUAL(GBT_tree_infrontof(gb_nj), gb_tree2);
 
1463
            TEST_EXPECT_EQUAL(GBT_tree_infrontof(gb_nj_bs), gb_nj);
 
1464
            TEST_EXPECT_EQUAL(GBT_tree_infrontof(gb_removal), gb_nj_bs);
 
1465
            TEST_EXPECT_EQUAL(GBT_tree_infrontof(gb_test), gb_removal);
 
1466
 
 
1467
            TEST_EXPECT_NO_ERROR(GBT_move_tree(gb_test, GBT_INFRONTOF, gb_tree2)); // move back to top
 
1468
            TEST_EXPECT_EQUAL(getTreeOrder(gb_main), "5:tree_test|tree_tree2|tree_nj|tree_nj_bs|tree_removal");
 
1469
 
 
1470
            TEST_EXPECT_NO_ERROR(GBT_move_tree(gb_test, GBT_BEHIND, gb_tree2)); // move from top
 
1471
            TEST_EXPECT_EQUAL(getTreeOrder(gb_main), "5:tree_tree2|tree_test|tree_nj|tree_nj_bs|tree_removal");
 
1472
 
 
1473
            TEST_EXPECT_NO_ERROR(GBT_move_tree(gb_removal, GBT_INFRONTOF, gb_nj)); // move from end
 
1474
            TEST_EXPECT_EQUAL(getTreeOrder(gb_main), "5:tree_tree2|tree_test|tree_removal|tree_nj|tree_nj_bs");
 
1475
 
 
1476
            TEST_EXPECT_NO_ERROR(GBT_move_tree(gb_nj_bs, GBT_INFRONTOF, gb_nj_bs)); // noop
 
1477
            TEST_EXPECT_EQUAL(getTreeOrder(gb_main), "5:tree_tree2|tree_test|tree_removal|tree_nj|tree_nj_bs");
 
1478
 
 
1479
            TEST_EXPECT_EQUAL(GBT_get_tree_name(GBT_find_top_tree(gb_main)), "tree_tree2");
 
1480
 
 
1481
            TEST_EXPECT_EQUAL(GBT_get_tree_name(GBT_find_next_tree(gb_removal)), "tree_nj");
 
1482
            TEST_EXPECT_EQUAL(GBT_get_tree_name(GBT_find_next_tree(gb_nj_bs)), "tree_tree2"); // last -> first
 
1483
        }
 
1484
 
 
1485
        // check tree order is maintained by copy, rename and delete
 
1486
 
 
1487
        {
 
1488
            // copy
 
1489
            TEST_EXPECT_ERROR_CONTAINS(GBT_copy_tree(gb_main, "tree_nosuch", "tree_whatever"), "tree 'tree_nosuch' not found");
 
1490
            TEST_EXPECT_ERROR_CONTAINS(GBT_copy_tree(gb_main, "tree_test",   "tree_test"),     "source- and dest-tree are the same");
 
1491
            TEST_EXPECT_ERROR_CONTAINS(GBT_copy_tree(gb_main, "tree_tree2",  "tree_test"),     "tree 'tree_test' already exists");
 
1492
 
 
1493
            TEST_EXPECT_NO_ERROR(GBT_copy_tree(gb_main, "tree_test", "tree_test_copy"));
 
1494
            TEST_REJECT_NULL(GBT_find_tree(gb_main, "tree_test_copy"));
 
1495
            TEST_EXPECT_EQUAL(getTreeOrder(gb_main), "6:tree_tree2|tree_test|tree_test_copy|tree_removal|tree_nj|tree_nj_bs");
 
1496
 
 
1497
            // rename
 
1498
            TEST_EXPECT_NO_ERROR(GBT_rename_tree(gb_main, "tree_nj", "tree_renamed_nj"));
 
1499
            TEST_REJECT_NULL(GBT_find_tree(gb_main, "tree_renamed_nj"));
 
1500
            TEST_EXPECT_EQUAL(getTreeOrder(gb_main), "6:tree_tree2|tree_test|tree_test_copy|tree_removal|tree_renamed_nj|tree_nj_bs");
 
1501
 
 
1502
            TEST_EXPECT_NO_ERROR(GBT_rename_tree(gb_main, "tree_tree2", "tree_renamed_tree2"));
 
1503
            TEST_EXPECT_EQUAL(getTreeOrder(gb_main), "6:tree_renamed_tree2|tree_test|tree_test_copy|tree_removal|tree_renamed_nj|tree_nj_bs");
 
1504
 
 
1505
            TEST_EXPECT_NO_ERROR(GBT_rename_tree(gb_main, "tree_test_copy", "tree_renamed_test_copy"));
 
1506
            TEST_EXPECT_EQUAL(getTreeOrder(gb_main), "6:tree_renamed_tree2|tree_test|tree_renamed_test_copy|tree_removal|tree_renamed_nj|tree_nj_bs");
 
1507
 
 
1508
            // delete
 
1509
 
 
1510
            GBDATA *gb_nj_bs             = GBT_find_tree(gb_main, "tree_nj_bs");
 
1511
            GBDATA *gb_renamed_nj        = GBT_find_tree(gb_main, "tree_renamed_nj");
 
1512
            GBDATA *gb_renamed_test_copy = GBT_find_tree(gb_main, "tree_renamed_test_copy");
 
1513
            GBDATA *gb_renamed_tree2     = GBT_find_tree(gb_main, "tree_renamed_tree2");
 
1514
            GBDATA *gb_test              = GBT_find_tree(gb_main, "tree_test");
 
1515
            GBDATA *gb_removal           = GBT_find_tree(gb_main, "tree_removal");
 
1516
 
 
1517
            TEST_EXPECT_NO_ERROR(GB_delete(gb_renamed_tree2));
 
1518
            TEST_EXPECT_NO_ERROR(GB_delete(gb_renamed_test_copy));
 
1519
            TEST_EXPECT_NO_ERROR(GB_delete(gb_renamed_nj));
 
1520
            TEST_EXPECT_NO_ERROR(GB_delete(gb_removal));
 
1521
 
 
1522
            // .. two trees left
 
1523
 
 
1524
            TEST_EXPECT_EQUAL(getTreeOrder(gb_main), "2:tree_test|tree_nj_bs");
 
1525
 
 
1526
            TEST_EXPECT_EQUAL(GBT_find_largest_tree(gb_main), gb_test);
 
1527
            TEST_EXPECT_EQUAL(GBT_find_top_tree(gb_main), gb_test);
 
1528
            TEST_EXPECT_EQUAL(GBT_find_bottom_tree(gb_main), gb_nj_bs);
 
1529
            
 
1530
            TEST_EXPECT_EQUAL(GBT_find_next_tree(gb_test), gb_nj_bs);
 
1531
            TEST_EXPECT_EQUAL(GBT_find_next_tree(gb_test), gb_nj_bs);
 
1532
            TEST_EXPECT_EQUAL(GBT_find_next_tree(gb_nj_bs), gb_test);
 
1533
 
 
1534
            TEST_EXPECT_NULL (GBT_tree_infrontof(gb_test));
 
1535
            TEST_EXPECT_EQUAL(GBT_tree_behind   (gb_test), gb_nj_bs);
 
1536
            
 
1537
            TEST_EXPECT_EQUAL(GBT_tree_infrontof(gb_nj_bs), gb_test);
 
1538
            TEST_EXPECT_NULL (GBT_tree_behind   (gb_nj_bs));
 
1539
 
 
1540
            TEST_EXPECT_NO_ERROR(GBT_move_tree(gb_test, GBT_BEHIND, gb_nj_bs)); // move to bottom
 
1541
            TEST_EXPECT_EQUAL(getTreeOrder(gb_main), "2:tree_nj_bs|tree_test");
 
1542
            TEST_EXPECT_NO_ERROR(GBT_move_tree(gb_test, GBT_INFRONTOF, gb_nj_bs)); // move to top
 
1543
            TEST_EXPECT_EQUAL(getTreeOrder(gb_main), "2:tree_test|tree_nj_bs");
 
1544
            
 
1545
            TEST_EXPECT_NO_ERROR(GB_delete(gb_nj_bs));
 
1546
 
 
1547
            // .. one tree left
 
1548
 
 
1549
            TEST_EXPECT_EQUAL(getTreeOrder(gb_main), "1:tree_test");
 
1550
 
 
1551
            TEST_EXPECT_EQUAL(GBT_find_largest_tree(gb_main), gb_test);
 
1552
            TEST_EXPECT_EQUAL(GBT_find_top_tree(gb_main), gb_test);
 
1553
            TEST_EXPECT_EQUAL(GBT_find_bottom_tree(gb_main), gb_test);
 
1554
            
 
1555
            TEST_EXPECT_NULL(GBT_find_next_tree(gb_test)); // no other tree left
 
1556
            TEST_EXPECT_NULL(GBT_tree_behind(gb_test));
 
1557
            TEST_EXPECT_NULL(GBT_tree_infrontof(gb_test));
 
1558
 
 
1559
            TEST_EXPECT_NO_ERROR(GB_delete(gb_test));
 
1560
 
 
1561
            // .. no tree left
 
1562
            
 
1563
            TEST_EXPECT_EQUAL(getTreeOrder(gb_main), "0:");
 
1564
 
 
1565
            TEST_EXPECT_NULL(GBT_find_tree(gb_main, "tree_test"));
 
1566
            TEST_EXPECT_NULL(GBT_existing_tree(gb_main, "tree_whatever"));
 
1567
            TEST_EXPECT_NULL(GBT_find_largest_tree(gb_main));
 
1568
        }
 
1569
    }
 
1570
 
 
1571
    GB_close(gb_main);
 
1572
}
 
1573
 
 
1574
void TEST_tree_remove_leafs() {
 
1575
    GB_shell  shell;
 
1576
    GBDATA   *gb_main = GB_open("TEST_trees.arb", "r");
 
1577
 
 
1578
    {
 
1579
        GBT_TreeRemoveType tested_modes[] = {
 
1580
            GBT_REMOVE_MARKED,
 
1581
            GBT_REMOVE_UNMARKED,
 
1582
            GBT_REMOVE_ZOMBIES,
 
1583
            GBT_KEEP_MARKED,
 
1584
        };
 
1585
 
 
1586
        const char *org_topo          = "((CloInnoc:0.371,(CloTyrob:0.009,(CloTyro2:0.017,(CloTyro3:1.046,CloTyro4:0.061):0.026):0.017):0.274):0.029,(CloBifer:0.388,((CloCarni:0.120,CurCitre:0.058):1.000,((CloPaste:0.179,(Zombie1:0.120,(CloButy2:0.009,CloButyr:0.000):0.564):0.010):0.131,(CytAquat:0.711,(CelBiazo:0.059,(CorGluta:0.522,(CorAquat:0.084,Zombie2:0.058):0.103):0.054):0.207):0.162):0.124):0.124):0.029);";
 
1587
        const char *rem_marked_topo   = "((CloInnoc:0.371,(CloTyrob:0.009,(CloTyro2:0.017,(CloTyro3:1.046,CloTyro4:0.061):0.026):0.017):0.274):0.029,(CloBifer:0.388,(CloCarni:1.000,((CloPaste:0.179,Zombie1:0.010):0.131,(CelBiazo:0.059,Zombie2:0.054):0.162):0.124):0.124):0.029);";
 
1588
        const char *rem_unmarked_topo = "(CurCitre:1.000,((Zombie1:0.120,(CloButy2:0.009,CloButyr:0.000):0.564):0.131,(CytAquat:0.711,(CorGluta:0.522,(CorAquat:0.084,Zombie2:0.058):0.103):0.207):0.162):0.124);";
 
1589
        const char *rem_zombies_topo  = "((CloInnoc:0.371,(CloTyrob:0.009,(CloTyro2:0.017,(CloTyro3:1.046,CloTyro4:0.061):0.026):0.017):0.274):0.029,(CloBifer:0.388,((CloCarni:0.120,CurCitre:0.058):1.000,((CloPaste:0.179,(CloButy2:0.009,CloButyr:0.000):0.010):0.131,(CytAquat:0.711,(CelBiazo:0.059,(CorGluta:0.522,CorAquat:0.103):0.054):0.207):0.162):0.124):0.124):0.029);";
 
1590
        const char *kept_marked_topo  = "(CurCitre:1.000,((CloButy2:0.009,CloButyr:0.000):0.131,(CytAquat:0.711,(CorGluta:0.522,CorAquat:0.103):0.207):0.162):0.124);";
 
1591
 
 
1592
        const char *kept_zombies_topo        = "(Zombie1:0.131,Zombie2:0.162);";
 
1593
        const char *kept_zombies_broken_topo = "Zombie2;";
 
1594
 
 
1595
        const char *empty_topo = ";";
 
1596
 
 
1597
        GB_transaction ta(gb_main);
 
1598
        for (unsigned mode = 0; mode<ARRAY_ELEMS(tested_modes); ++mode) {
 
1599
            GBT_TreeRemoveType what = tested_modes[mode];
 
1600
 
 
1601
            for (int linked = 0; linked<=1; ++linked) {
 
1602
                TEST_ANNOTATE(GBS_global_string("mode=%u linked=%i", mode, linked));
 
1603
 
 
1604
                GBT_TREE *tree = GBT_read_tree(gb_main, "tree_removal", GBT_TREE_NodeFactory());
 
1605
                bool      once = mode == 0 && linked == 0;
 
1606
 
 
1607
                if (linked) {
 
1608
                    int zombies    = 0;
 
1609
                    int duplicates = 0;
 
1610
 
 
1611
                    TEST_EXPECT_NO_ERROR(GBT_link_tree(tree, gb_main, false, &zombies, &duplicates));
 
1612
 
 
1613
                    TEST_EXPECT_EQUAL(zombies, 2);
 
1614
                    TEST_EXPECT_EQUAL(duplicates, 0);
 
1615
                }
 
1616
 
 
1617
                if (once) TEST_EXPECT_NEWICK(nLENGTH, tree, org_topo);
 
1618
 
 
1619
                int removedCount       = 0;
 
1620
                int groupsRemovedCount = 0;
 
1621
 
 
1622
                tree = GBT_remove_leafs(tree, what, NULL, &removedCount, &groupsRemovedCount);
 
1623
 
 
1624
                if (linked) {
 
1625
                    GBT_TreeRemoveType what_next = what;
 
1626
 
 
1627
                    switch (what) {
 
1628
                        case GBT_REMOVE_MARKED:
 
1629
                            TEST_EXPECT_EQUAL(removedCount, 6);
 
1630
                            TEST_EXPECT_EQUAL(groupsRemovedCount, 0);
 
1631
                            TEST_EXPECT_NEWICK(nLENGTH, tree, rem_marked_topo);
 
1632
                            what_next = GBT_REMOVE_UNMARKED;
 
1633
                            break;
 
1634
                        case GBT_REMOVE_UNMARKED:
 
1635
                            TEST_EXPECT_EQUAL(removedCount, 9);
 
1636
                            TEST_EXPECT_EQUAL(groupsRemovedCount, 1);
 
1637
                            TEST_EXPECT_NEWICK(nLENGTH, tree, rem_unmarked_topo);
 
1638
                            what_next = GBT_REMOVE_MARKED;
 
1639
                            break;
 
1640
                        case GBT_REMOVE_ZOMBIES:
 
1641
                            TEST_EXPECT_EQUAL(removedCount, 2);
 
1642
                            TEST_EXPECT_EQUAL(groupsRemovedCount, 0);
 
1643
                            TEST_EXPECT_NEWICK(nLENGTH, tree, rem_zombies_topo);
 
1644
                            break;
 
1645
                        case GBT_KEEP_MARKED:
 
1646
                            TEST_EXPECT_EQUAL(removedCount, 11);
 
1647
                            TEST_EXPECT_EQUAL(groupsRemovedCount, 1);
 
1648
                            TEST_EXPECT_NEWICK(nLENGTH, tree, kept_marked_topo);
 
1649
                            what_next = GBT_REMOVE_MARKED;
 
1650
                            break;
 
1651
                    }
 
1652
 
 
1653
                    if (what_next != what) {
 
1654
                        gb_assert(tree);
 
1655
                        tree = GBT_remove_leafs(tree, what_next, NULL, &removedCount, &groupsRemovedCount);
 
1656
 
 
1657
                        switch (what) {
 
1658
                            case GBT_REMOVE_MARKED: // + GBT_REMOVE_UNMARKED
 
1659
                                TEST_EXPECT_EQUAL(removedCount, 16);
 
1660
                                TEST_EXPECT_EQUAL(groupsRemovedCount, 1);
 
1661
                                TEST_EXPECT_NEWICK__BROKEN(nLENGTH, tree, kept_zombies_topo);
 
1662
                                TEST_EXPECT_NEWICK(nLENGTH, tree, kept_zombies_broken_topo); // @@@ invalid topology (single leaf)
 
1663
                                break;
 
1664
                            case GBT_REMOVE_UNMARKED: // + GBT_REMOVE_MARKED
 
1665
                                TEST_EXPECT_EQUAL(removedCount, 15);
 
1666
                                TEST_EXPECT_EQUAL(groupsRemovedCount, 1);
 
1667
                                TEST_EXPECT_NEWICK(nLENGTH, tree, kept_zombies_topo);
 
1668
                                break;
 
1669
                            case GBT_KEEP_MARKED: // + GBT_REMOVE_MARKED
 
1670
                                TEST_EXPECT_EQUAL(removedCount, 17);
 
1671
                                TEST_EXPECT_EQUAL__BROKEN(groupsRemovedCount, 2, 1); // @@@ expect that all groups have been removed! 
 
1672
                                TEST_EXPECT_EQUAL(groupsRemovedCount, 1);
 
1673
                                TEST_EXPECT_NEWICK(nLENGTH, tree, empty_topo);
 
1674
                                break;
 
1675
                            default:
 
1676
                                TEST_REJECT(true);
 
1677
                                break;
 
1678
                        }
 
1679
                    }
 
1680
                }
 
1681
                else {
 
1682
                    switch (what) {
 
1683
                        case GBT_REMOVE_MARKED:
 
1684
                        case GBT_REMOVE_UNMARKED:
 
1685
                            TEST_EXPECT_EQUAL(removedCount, 0);
 
1686
                            TEST_EXPECT_EQUAL(groupsRemovedCount, 0);
 
1687
                            TEST_EXPECT_NEWICK(nLENGTH, tree, org_topo);
 
1688
                            break;
 
1689
                        case GBT_REMOVE_ZOMBIES:
 
1690
                        case GBT_KEEP_MARKED:
 
1691
                            TEST_EXPECT_EQUAL(removedCount, 17);
 
1692
                            TEST_EXPECT_EQUAL(groupsRemovedCount, 2);
 
1693
                            TEST_EXPECT_NEWICK(nLENGTH, tree, empty_topo);
 
1694
                            break;
 
1695
                    }
 
1696
                }
 
1697
 
 
1698
                delete tree;
 
1699
            }
 
1700
        }
 
1701
    }
 
1702
 
 
1703
    GB_close(gb_main);
 
1704
}
 
1705
 
 
1706
 
 
1707
#endif // UNIT_TESTS