1
// =============================================================== //
3
// File : adtree.cxx //
4
// Purpose : tree functions //
6
// Institute of Microbiology (Technical University Munich) //
7
// http://www.arb-home.de/ //
9
// =============================================================== //
12
#include <arb_progress.h>
14
#include <arb_strarray.h>
17
#include <arb_global_defs.h>
18
#include <arb_strbuf.h>
22
#define GBT_PUT_DATA 1
23
#define GBT_GET_SIZE 0
25
GBDATA *GBT_get_tree_data(GBDATA *gb_main) {
26
return GBT_find_or_create(gb_main, "tree_data", 7);
29
GBT_TREE::bs100_mode GBT_TREE::toggle_bootstrap100(bs100_mode mode) {
31
if (!is_root_node()) {
33
switch (parse_bootstrap(bootstrap)) {
37
case BS_UNDECIDED: mode = BS_INSERT;
38
case BS_INSERT: set_bootstrap(100);
39
case BS_REMOVE: break;
42
case REMARK_BOOTSTRAP:
43
if (bootstrap >= 99.5) {
45
case BS_UNDECIDED: mode = BS_REMOVE;
46
case BS_REMOVE: remove_remark();
47
case BS_INSERT: break;
54
mode = get_leftson()->toggle_bootstrap100(mode);
55
mode = get_rightson()->toggle_bootstrap100(mode);
59
void GBT_TREE::remove_bootstrap() {
60
freenull(remark_branch);
62
get_leftson()->remove_bootstrap();
63
get_rightson()->remove_bootstrap();
66
void GBT_TREE::reset_branchlengths() {
68
leftlen = rightlen = DEFAULT_BRANCH_LENGTH;
70
get_leftson()->reset_branchlengths();
71
get_rightson()->reset_branchlengths();
75
void GBT_TREE::scale_branchlengths(double factor) {
80
get_leftson()->scale_branchlengths(factor);
81
get_rightson()->scale_branchlengths(factor);
85
GBT_LEN GBT_TREE::sum_child_lengths() const {
86
if (is_leaf) return 0.0;
90
get_leftson()->sum_child_lengths() +
91
get_rightson()->sum_child_lengths();
94
void GBT_TREE::bootstrap2branchlen() {
95
//! copy bootstraps to branchlengths
97
set_branchlength_unrooted(DEFAULT_BRANCH_LENGTH);
102
GBT_RemarkType rtype = parse_bootstrap(bootstrap);
104
if (rtype == REMARK_BOOTSTRAP) {
105
double len = bootstrap/100.0;
106
set_branchlength_unrooted(len);
109
set_branchlength_unrooted(1.0); // no bootstrap means "100%"
112
get_leftson()->bootstrap2branchlen();
113
get_rightson()->bootstrap2branchlen();
117
void GBT_TREE::branchlen2bootstrap() {
118
//! copy branchlengths to bootstraps
121
if (!is_root_node()) {
122
set_bootstrap(get_branchlength_unrooted()*100.0);
124
get_leftson()->branchlen2bootstrap();
125
get_rightson()->branchlen2bootstrap();
129
GBT_TREE *GBT_TREE::fixDeletedSon() {
130
// fix node after one son has been deleted
131
GBT_TREE *result = NULL;
134
gb_assert(!rightson);
146
// now 'result' contains the lasting tree
147
result->father = father;
149
if (remark_branch && !result->remark_branch) { // rescue remarks if possible
150
result->remark_branch = remark_branch;
151
remark_branch = NULL;
153
if (gb_node && !result->gb_node) { // rescue group if possible
154
result->gb_node = gb_node;
158
is_leaf = true; // don't try recursive delete
164
const GBT_TREE *GBT_TREE::ancestor_common_with(const GBT_TREE *other) const {
165
if (this == other) return this;
166
if (is_anchestor_of(other)) return this;
167
if (other->is_anchestor_of(this)) return other;
168
return get_father()->ancestor_common_with(other->get_father());
171
// ----------------------
174
GBT_TREE *GBT_remove_leafs(GBT_TREE *tree, GBT_TreeRemoveType mode, const GB_HASH *species_hash, int *removed, int *groups_removed) { // @@@ add tests for GBT_remove_leafs()
175
/*! Remove leafs from given 'tree'.
176
* @param tree tree from which species will be removed
177
* @param mode defines what to remove
178
* @param species_hash hash translation from leaf-name to species-dbnode (not needed if tree is linked; see GBT_link_tree)
179
* @param removed will be incremented for each removed leaf (if !NULL)
180
* @param groups_removed will be incremented for each removed group (if !NULL)
181
* @return new root node
183
* if 'species_hash' is not provided and tree is not linked,
184
* the function will silently act strange:
185
* - GBT_REMOVE_MARKED and GBT_REMOVE_UNMARKED will remove any leaf
186
* - GBT_REMOVE_ZOMBIES and GBT_KEEP_MARKED will remove all leafs
191
bool deleteSelf = false;
195
gb_node = (GBDATA*)GBS_read_hash(species_hash, tree->name);
196
gb_assert(tree->gb_node == 0); // don't call linked tree with 'species_hash'!
198
else gb_node = tree->gb_node;
201
if (mode & (GBT_REMOVE_MARKED|GBT_REMOVE_UNMARKED)) {
202
long flag = GB_read_flag(gb_node);
203
deleteSelf = (flag && (mode&GBT_REMOVE_MARKED)) || (!flag && (mode&GBT_REMOVE_UNMARKED));
207
if (mode & GBT_REMOVE_ZOMBIES) deleteSelf = true;
213
if (removed) (*removed)++;
218
tree->leftson = GBT_remove_leafs(tree->leftson, mode, species_hash, removed, groups_removed);
219
tree->rightson = GBT_remove_leafs(tree->rightson, mode, species_hash, removed, groups_removed);
222
if (!tree->rightson) { // right son deleted
223
tree = tree->fixDeletedSon();
225
// otherwise no son deleted
227
else if (tree->rightson) { // left son deleted
228
tree = tree->fixDeletedSon();
230
else { // everything deleted -> delete self
231
if (tree->name && groups_removed) (*groups_removed)++;
232
tree->is_leaf = true;
241
// ---------------------
244
inline int get_tree_idx(GBDATA *gb_tree) {
245
GBDATA *gb_order = GB_entry(gb_tree, "order");
248
idx = GB_read_int(gb_order);
249
gb_assert(idx>0); // invalid index
254
inline int get_max_tree_idx(GBDATA *gb_treedata) {
256
for (GBDATA *gb_tree = GB_child(gb_treedata); gb_tree; gb_tree = GB_nextChild(gb_tree)) {
257
int idx = get_tree_idx(gb_tree);
258
if (idx>max_idx) max_idx = idx;
263
inline GBDATA *get_tree_with_idx(GBDATA *gb_treedata, int at_idx) {
264
GBDATA *gb_found = NULL;
265
for (GBDATA *gb_tree = GB_child(gb_treedata); gb_tree && !gb_found; gb_tree = GB_nextChild(gb_tree)) {
266
int idx = get_tree_idx(gb_tree);
274
inline GBDATA *get_tree_infrontof_idx(GBDATA *gb_treedata, int infrontof_idx) {
275
GBDATA *gb_infrontof = NULL;
278
for (GBDATA *gb_tree = GB_child(gb_treedata); gb_tree; gb_tree = GB_nextChild(gb_tree)) {
279
int idx = get_tree_idx(gb_tree);
281
if (idx>best_idx && idx<infrontof_idx) {
283
gb_infrontof = gb_tree;
290
inline GBDATA *get_tree_behind_idx(GBDATA *gb_treedata, int behind_idx) {
291
GBDATA *gb_behind = NULL;
293
int best_idx = INT_MAX;
294
for (GBDATA *gb_tree = GB_child(gb_treedata); gb_tree; gb_tree = GB_nextChild(gb_tree)) {
295
int idx = get_tree_idx(gb_tree);
297
if (idx>behind_idx && idx<best_idx) {
306
inline GB_ERROR set_tree_idx(GBDATA *gb_tree, int idx) {
307
GB_ERROR error = NULL;
308
GBDATA *gb_order = GB_entry(gb_tree, "order");
310
gb_order = GB_create(gb_tree, "order", GB_INT);
311
if (!gb_order) error = GB_await_error();
313
if (!error) error = GB_write_int(gb_order, idx);
317
static GB_ERROR reserve_tree_idx(GBDATA *gb_treedata, int idx) {
318
GB_ERROR error = NULL;
319
GBDATA *gb_tree = get_tree_with_idx(gb_treedata, idx);
321
error = reserve_tree_idx(gb_treedata, idx+1);
322
if (!error) error = set_tree_idx(gb_tree, idx+1);
327
static void ensure_trees_have_order(GBDATA *gb_treedata) {
328
GBDATA *gb_main = GB_get_father(gb_treedata);
330
gb_assert(GB_get_root(gb_main) == gb_main);
331
gb_assert(GBT_get_tree_data(gb_main) == gb_treedata);
333
GB_ERROR error = NULL;
334
GBDATA *gb_tree_order_flag = GB_search(gb_main, "/tmp/trees_have_order", GB_INT);
336
if (!gb_tree_order_flag) error = GB_await_error();
338
if (GB_read_int(gb_tree_order_flag) == 0) { // not checked yet
339
int max_idx = get_max_tree_idx(gb_treedata);
340
for (GBDATA *gb_tree = GB_child(gb_treedata); gb_tree && !error; gb_tree = GB_nextChild(gb_tree)) {
341
if (!get_tree_idx(gb_tree)) {
342
error = set_tree_idx(gb_tree, ++max_idx);
345
if (!error) error = GB_write_int(gb_tree_order_flag, 1);
348
if (error) GBK_terminatef("failed to order trees (Reason: %s)", error);
351
static void tree_set_default_order(GBDATA *gb_tree) {
352
// if 'gb_tree' has no order yet, move it to the bottom (as done previously)
353
if (!get_tree_idx(gb_tree)) {
354
set_tree_idx(gb_tree, get_max_tree_idx(GB_get_father(gb_tree))+1);
358
// -----------------------------
359
// tree write functions
361
GB_ERROR GBT_write_group_name(GBDATA *gb_group_name, const char *new_group_name) {
363
size_t len = strlen(new_group_name);
365
if (len >= GB_GROUP_NAME_MAX) {
366
error = GBS_global_string("Group name '%s' too long (max %i characters)", new_group_name, GB_GROUP_NAME_MAX);
369
error = GB_write_string(gb_group_name, new_group_name);
374
static GB_ERROR gbt_write_tree_nodes(GBDATA *gb_tree, GBT_TREE *node, long *startid) {
375
// increments '*startid' for each inner node (not for leafs)
377
GB_ERROR error = NULL;
379
if (!node->is_leaf) {
380
bool node_is_used = false;
382
if (node->name && node->name[0]) {
383
if (!node->gb_node) {
384
node->gb_node = GB_create_container(gb_tree, "node");
385
if (!node->gb_node) error = GB_await_error();
388
GBDATA *gb_name = GB_search(node->gb_node, "group_name", GB_STRING);
389
if (!gb_name) error = GB_await_error();
390
else error = GBT_write_group_name(gb_name, node->name);
392
node_is_used = true; // wrote groupname -> node is used
396
if (node->gb_node && !error) {
398
GBDATA *gb_nonid = GB_child(node->gb_node);
399
while (gb_nonid && strcmp("id", GB_read_key_pntr(gb_nonid)) == 0) {
400
gb_nonid = GB_nextChild(gb_nonid);
402
if (gb_nonid) node_is_used = true; // found child that is not "id" -> node is used
405
if (node_is_used) { // set id for used nodes
406
error = GBT_write_int(node->gb_node, "id", *startid);
407
if (!error) GB_clear_user_flag(node->gb_node, GB_USERFLAG_GHOSTNODE); // mark node as "used"
409
else { // delete unused nodes
410
error = GB_delete(node->gb_node);
411
if (!error) node->gb_node = 0;
416
if (!error) error = gbt_write_tree_nodes(gb_tree, node->leftson, startid);
417
if (!error) error = gbt_write_tree_nodes(gb_tree, node->rightson, startid);
422
static char *gbt_write_tree_rek_new(const GBT_TREE *node, char *dest, long mode) {
424
const char *c1 = node->get_remark();
426
if (mode == GBT_PUT_DATA) {
429
while ((c = *(c1++))) {
430
if (c == 1) continue;
436
dest += strlen(c1) + 2;
441
if (mode == GBT_PUT_DATA) {
443
if (node->name) strcpy(dest, node->name);
446
while ((c1 = (char *)strchr(dest, 1))) {
449
dest += strlen(dest);
455
if (node->name) return dest+1+strlen(node->name)+1; // N name term
461
sprintf(buffer, "%g,%g;", node->leftlen, node->rightlen);
462
if (mode == GBT_PUT_DATA) {
464
strcpy(dest, buffer);
465
dest += strlen(buffer);
468
dest += strlen(buffer)+1;
470
dest = gbt_write_tree_rek_new(node->leftson, dest, mode);
471
dest = gbt_write_tree_rek_new(node->rightson, dest, mode);
476
static GB_ERROR gbt_write_tree(GBDATA *gb_main, GBDATA *gb_tree, const char *tree_name, GBT_TREE *tree) {
477
/*! writes a tree to the database.
479
* If tree is loaded by function GBT_read_tree(..) then 'tree_name' should be NULL
480
* else 'gb_tree' should be set to NULL
482
* To copy a tree call GB_copy(dest,source);
483
* or set recursively all tree->gb_node variables to zero (that unlinks the tree),
490
if (gb_tree) error = GBS_global_string("can't change name of existing tree (to '%s')", tree_name);
492
error = GBT_check_tree_name(tree_name);
494
GBDATA *gb_tree_data = GBT_get_tree_data(gb_main);
495
gb_tree = GB_search(gb_tree_data, tree_name, GB_CREATE_CONTAINER);
497
if (!gb_tree) error = GB_await_error();
502
if (!gb_tree) error = "No tree name given";
505
gb_assert(gb_tree || error);
508
// mark all old style tree data for deletion
510
for (gb_node = GB_entry(gb_tree, "node"); gb_node; gb_node = GB_nextEntry(gb_node)) {
511
GB_set_user_flag(gb_node, GB_USERFLAG_GHOSTNODE); // mark as "possibly unused"
514
// build tree-string and save to DB
516
char *t_size = gbt_write_tree_rek_new(tree, 0, GBT_GET_SIZE); // calc size of tree-string
517
char *ctree = (char *)GB_calloc(sizeof(char), (size_t)(t_size+1)); // allocate buffer for tree-string
519
t_size = gbt_write_tree_rek_new(tree, ctree, GBT_PUT_DATA); // write into buffer
522
bool was_allowed = GB_allow_compression(gb_main, false);
523
error = GBT_write_string(gb_tree, "tree", ctree);
524
GB_allow_compression(gb_main, was_allowed);
532
error = gbt_write_tree_nodes(gb_tree, tree, &size); // reports number of nodes in 'size'
533
if (!error) error = GBT_write_int(gb_tree, "nnodes", size);
537
GBDATA *gb_node_next;
539
for (gb_node = GB_entry(gb_tree, "node"); // delete all ghost nodes
541
gb_node = gb_node_next)
543
GBDATA *gbd = GB_entry(gb_node, "id");
544
gb_node_next = GB_nextEntry(gb_node);
545
if (!gbd || GB_user_flag(gb_node, GB_USERFLAG_GHOSTNODE)) error = GB_delete(gb_node);
550
if (!error) tree_set_default_order(gb_tree);
556
GB_ERROR GBT_write_tree(GBDATA *gb_main, const char *tree_name, GBT_TREE *tree) {
557
return gbt_write_tree(gb_main, NULL, tree_name, tree);
559
GB_ERROR GBT_overwrite_tree(GBDATA *gb_tree, GBT_TREE *tree) {
560
return gbt_write_tree(GB_get_root(gb_tree), gb_tree, NULL, tree);
563
static GB_ERROR write_tree_remark(GBDATA *gb_tree, const char *remark) {
564
return GBT_write_string(gb_tree, "remark", remark);
566
GB_ERROR GBT_write_tree_remark(GBDATA *gb_main, const char *tree_name, const char *remark) {
567
return write_tree_remark(GBT_find_tree(gb_main, tree_name), remark);
570
GB_ERROR GBT_log_to_tree_remark(GBDATA *gb_tree, const char *log_entry) {
571
GB_ERROR error = NULL;
572
const char *old_remark = GBT_read_char_pntr(gb_tree, "remark");
573
if (!old_remark && GB_have_error()) {
574
error = GB_await_error();
577
char *new_remark = GBS_log_dated_action_to(old_remark, log_entry);
578
error = write_tree_remark(gb_tree, new_remark);
583
GB_ERROR GBT_log_to_tree_remark(GBDATA *gb_main, const char *tree_name, const char *log_entry) {
584
return GBT_log_to_tree_remark(GBT_find_tree(gb_main, tree_name), log_entry);
587
GB_ERROR GBT_write_tree_with_remark(GBDATA *gb_main, const char *tree_name, GBT_TREE *tree, const char *remark) {
588
GB_ERROR error = GBT_write_tree(gb_main, tree_name, tree);
589
if (!error && remark) error = GBT_write_tree_remark(gb_main, tree_name, remark);
593
// ----------------------------
594
// tree read functions
596
static GBT_TREE *gbt_read_tree_rek(char **data, long *startid, GBDATA **gb_tree_nodes, const TreeNodeFactory& nodeFactory, int size_of_tree, GB_ERROR& error) {
597
GBT_TREE *node = NULL;
599
node = nodeFactory.makeNode();
601
char c = *((*data)++);
605
p1 = strchr(*data, 1);
607
node->set_remark(*data);
614
p1 = (char *)strchr(*data, ',');
616
node->leftlen = GB_atof(*data);
618
p1 = (char *)strchr(*data, ';');
620
node->rightlen = GB_atof(*data);
622
if ((*startid < size_of_tree) && (node->gb_node = gb_tree_nodes[*startid])) {
623
GBDATA *gb_group_name = GB_entry(node->gb_node, "group_name");
625
node->name = GB_read_string(gb_group_name);
629
node->leftson = gbt_read_tree_rek(data, startid, gb_tree_nodes, nodeFactory, size_of_tree, error);
630
if (!node->leftson) freenull(node);
632
node->rightson = gbt_read_tree_rek(data, startid, gb_tree_nodes, nodeFactory, size_of_tree, error);
633
if (!node->rightson) {
634
freenull(node->leftson);
638
node->leftson->father = node;
639
node->rightson->father = node;
644
node->is_leaf = true;
645
p1 = (char *)strchr(*data, 1);
648
gb_assert(p1[0] == 1);
651
node->name = strdup(*data);
656
error = "Unexpected end of tree definition.";
659
error = GBS_global_string("Can't interpret tree definition (expected 'N' or 'L' - not '%c')", c);
664
gb_assert(contradicted(node, error));
669
static GBT_TREE *read_tree_and_size_internal(GBDATA *gb_tree, GBDATA *gb_ctree, const TreeNodeFactory& nodeFactory, int node_count, GB_ERROR& error) {
670
GBDATA **gb_tree_nodes;
673
gb_tree_nodes = (GBDATA **)GB_calloc(sizeof(GBDATA *), (size_t)node_count);
677
for (gb_node = GB_entry(gb_tree, "node"); gb_node && !error; gb_node = GB_nextEntry(gb_node)) {
679
GBDATA *gbd = GB_entry(gb_node, "id");
682
i = GB_read_int(gbd);
683
if (i<0 || i >= node_count) {
684
error = "An inner node of the tree is corrupt";
687
gb_tree_nodes[i] = gb_node;
697
fbuf = cptr[0] = GB_read_string(gb_ctree);
698
node = gbt_read_tree_rek(cptr, startid, gb_tree_nodes, nodeFactory, node_count, error);
704
gb_assert(contradicted(node, error));
708
GBT_TREE *GBT_read_tree_and_size(GBDATA *gb_main, const char *tree_name, const TreeNodeFactory& nodeFactory, int *tree_size) {
709
/*! Loads a tree from DB into any user defined structure.
711
* @param gb_main DB root node
712
* @param tree_name is the name of the tree in the db
713
* @param nodeFactory makes the tree-node instances
714
* @param tree_size if != NULL -> gets set to "size of tree" (aka number of leafs minus 1)
717
* - NULL if any error occurs (which is exported then)
718
* - root of loaded tree (dynamic type depends on 'nodeFactory')
724
error = "no treename given";
727
error = GBT_check_tree_name(tree_name);
729
GBDATA *gb_tree = GBT_find_tree(gb_main, tree_name);
732
error = "tree not found";
735
GBDATA *gb_nnodes = GB_entry(gb_tree, "nnodes");
737
error = "tree is empty";
740
long size = GB_read_int(gb_nnodes);
742
error = "has no nodes";
745
GBDATA *gb_ctree = GB_search(gb_tree, "tree", GB_FIND);
747
error = "old unsupported tree format";
749
else { // "new" style tree
750
GBT_TREE *tree = read_tree_and_size_internal(gb_tree, gb_ctree, nodeFactory, size, error);
753
if (tree_size) *tree_size = size; // return size of tree (=leafs-1)
754
tree->announce_tree_constructed();
767
GB_export_errorf("Failed to read tree '%s' (Reason: %s)", tree_name, error);
771
GBT_TREE *GBT_read_tree(GBDATA *gb_main, const char *tree_name, const TreeNodeFactory& nodeFactory) {
772
//! @see GBT_read_tree_and_size()
773
return GBT_read_tree_and_size(gb_main, tree_name, nodeFactory, 0);
776
size_t GBT_count_leafs(const GBT_TREE *tree) {
780
return GBT_count_leafs(tree->leftson) + GBT_count_leafs(tree->rightson);
783
static GB_ERROR gbt_invalid_because(const GBT_TREE *tree, const char *reason) {
784
return GBS_global_string("((GBT_TREE*)0x%p) %s", tree, reason);
787
inline bool has_son(const GBT_TREE *father, const GBT_TREE *son) {
788
return !father->is_leaf && (father->leftson == son || father->rightson == son);
791
static GB_ERROR gbt_is_invalid(bool is_root, const GBT_TREE *tree) {
793
if (!has_son(tree->father, tree)) return gbt_invalid_because(tree, "is not son of its father");
796
if (!is_root) return gbt_invalid_because(tree, "has no father (but isn't root)");
799
GB_ERROR error = NULL;
801
if (tree->leftson) return gbt_invalid_because(tree, "is leaf, but has leftson");
802
if (tree->rightson) return gbt_invalid_because(tree, "is leaf, but has rightson");
805
if (!tree->leftson) return gbt_invalid_because(tree, "is inner node, but has no leftson");
806
if (!tree->rightson) return gbt_invalid_because(tree, "is inner node, but has no rightson");
808
error = gbt_is_invalid(false, tree->leftson);
809
if (!error) error = gbt_is_invalid(false, tree->rightson);
814
GB_ERROR GBT_is_invalid(const GBT_TREE *tree) {
815
if (tree->father) return gbt_invalid_because(tree, "is expected to be the root-node, but has father");
816
if (tree->is_leaf) return gbt_invalid_because(tree, "is expected to be the root-node, but is a leaf (tree too small)");
817
return gbt_is_invalid(true, tree);
820
// -------------------------------------------
821
// link the tree tips to the database
823
struct link_tree_data {
824
GB_HASH *species_hash;
825
GB_HASH *seen_species; // used to count duplicates
826
arb_progress *progress;
827
int zombies; // counts zombies
828
int duplicates; // counts duplicates
831
static GB_ERROR gbt_link_tree_to_hash_rek(GBT_TREE *tree, link_tree_data *ltd) {
836
GBDATA *gbd = (GBDATA*)GBS_read_hash(ltd->species_hash, tree->name);
837
if (gbd) tree->gb_node = gbd;
840
if (ltd->seen_species) {
841
if (GBS_read_hash(ltd->seen_species, tree->name)) ltd->duplicates++;
842
else GBS_write_hash(ltd->seen_species, tree->name, 1);
846
if (ltd->progress) ++(*ltd->progress);
849
error = gbt_link_tree_to_hash_rek(tree->leftson, ltd);
850
if (!error) error = gbt_link_tree_to_hash_rek(tree->rightson, ltd);
855
static GB_ERROR GBT_link_tree_using_species_hash(GBT_TREE *tree, bool show_status, GB_HASH *species_hash, int *zombies, int *duplicates) {
860
if (duplicates || show_status) {
861
leafs = GBT_count_leafs(tree);
864
ltd.species_hash = species_hash;
865
ltd.seen_species = leafs ? GBS_create_hash(leafs, GB_IGNORE_CASE) : 0;
870
ltd.progress = new arb_progress("Relinking tree to database", leafs);
876
error = gbt_link_tree_to_hash_rek(tree, <d);
877
if (ltd.seen_species) GBS_free_hash(ltd.seen_species);
879
if (zombies) *zombies = ltd.zombies;
880
if (duplicates) *duplicates = ltd.duplicates;
887
GB_ERROR GBT_link_tree(GBT_TREE *tree, GBDATA *gb_main, bool show_status, int *zombies, int *duplicates) {
888
/*! Link a given tree to the database. That means that for all tips the member
889
* 'gb_node' is set to the database container holding the species data.
891
* @param tree which will be linked to DB
892
* @param gb_main DB root node
893
* @param show_status show a progress indicator?
894
* @param zombies if != NULL -> set to number of zombies (aka non-existing species) in tree
895
* @param duplicates if != NULL -> set to number of duplicated species in tree
897
* @return error on failure
899
* @see GBT_unlink_tree()
902
GB_HASH *species_hash = GBT_create_species_hash(gb_main);
903
GB_ERROR error = GBT_link_tree_using_species_hash(tree, show_status, species_hash, zombies, duplicates);
905
GBS_free_hash(species_hash);
910
void GBT_unlink_tree(GBT_TREE *tree) {
911
/*! Unlink tree from the database.
912
* @see GBT_link_tree()
915
if (!tree->is_leaf) {
916
GBT_unlink_tree(tree->leftson);
917
GBT_unlink_tree(tree->rightson);
921
// ----------------------
924
GBDATA *GBT_find_tree(GBDATA *gb_main, const char *tree_name) {
926
* - DB tree container associated with tree_name
927
* - NULL if no such tree exists
929
return GB_entry(GBT_get_tree_data(gb_main), tree_name);
932
inline bool is_tree(GBDATA *gb_tree) {
933
if (!gb_tree) return false;
934
GBDATA *gb_tree_data = GB_get_father(gb_tree);
935
return gb_tree_data && GB_has_key(gb_tree_data, "tree_data");
938
inline GBDATA *get_first_tree(GBDATA *gb_main) {
939
return GB_child(GBT_get_tree_data(gb_main));
942
inline GBDATA *get_next_tree(GBDATA *gb_tree) {
943
if (!gb_tree) return NULL;
944
gb_assert(is_tree(gb_tree));
945
return GB_nextChild(gb_tree);
948
GBDATA *GBT_find_largest_tree(GBDATA *gb_main) {
950
GBDATA *gb_largest = NULL;
952
for (GBDATA *gb_tree = get_first_tree(gb_main); gb_tree; gb_tree = get_next_tree(gb_tree)) {
953
long *nnodes = GBT_read_int(gb_tree, "nnodes");
954
if (nnodes && *nnodes>maxnodes) {
955
gb_largest = gb_tree;
962
GBDATA *GBT_tree_infrontof(GBDATA *gb_tree) {
963
GBDATA *gb_treedata = GB_get_father(gb_tree);
964
ensure_trees_have_order(gb_treedata);
965
return get_tree_infrontof_idx(gb_treedata, get_tree_idx(gb_tree));
967
GBDATA *GBT_tree_behind(GBDATA *gb_tree) {
968
GBDATA *gb_treedata = GB_get_father(gb_tree);
969
ensure_trees_have_order(gb_treedata);
970
return get_tree_behind_idx(gb_treedata, get_tree_idx(gb_tree));
973
GBDATA *GBT_find_top_tree(GBDATA *gb_main) {
974
GBDATA *gb_treedata = GBT_get_tree_data(gb_main);
975
ensure_trees_have_order(gb_treedata);
977
GBDATA *gb_top = get_tree_with_idx(gb_treedata, 1);
978
if (!gb_top) gb_top = get_tree_behind_idx(gb_treedata, 1);
981
GBDATA *GBT_find_bottom_tree(GBDATA *gb_main) {
982
GBDATA *gb_treedata = GBT_get_tree_data(gb_main);
983
ensure_trees_have_order(gb_treedata);
984
return get_tree_infrontof_idx(gb_treedata, INT_MAX);
987
const char *GBT_existing_tree(GBDATA *gb_main, const char *tree_name) {
988
// search for a specify existing tree (and fallback to any existing)
989
GBDATA *gb_tree = GBT_find_tree(gb_main, tree_name);
990
if (!gb_tree) gb_tree = get_first_tree(gb_main);
991
return GBT_get_tree_name(gb_tree);
994
GBDATA *GBT_find_next_tree(GBDATA *gb_tree) {
995
GBDATA *gb_other = NULL;
997
gb_other = GBT_tree_behind(gb_tree);
999
gb_other = GBT_find_top_tree(GB_get_root(gb_tree));
1000
if (gb_other == gb_tree) gb_other = NULL;
1003
gb_assert(gb_other != gb_tree);
1007
// --------------------
1010
const char *GBT_get_tree_name(GBDATA *gb_tree) {
1011
if (!gb_tree) return NULL;
1012
gb_assert(is_tree(gb_tree));
1013
return GB_read_key_pntr(gb_tree);
1016
GB_ERROR GBT_check_tree_name(const char *tree_name) {
1017
GB_ERROR error = GB_check_key(tree_name);
1019
if (strncmp(tree_name, "tree_", 5) != 0) {
1020
error = "has to start with 'tree_'";
1024
error = GBS_global_string("not a valid treename '%s' (Reason: %s)", tree_name, error);
1029
const char *GBT_name_of_largest_tree(GBDATA *gb_main) {
1030
return GBT_get_tree_name(GBT_find_largest_tree(gb_main));
1033
const char *GBT_name_of_bottom_tree(GBDATA *gb_main) {
1034
return GBT_get_tree_name(GBT_find_bottom_tree(gb_main));
1037
// -------------------
1040
const char *GBT_tree_info_string(GBDATA *gb_main, const char *tree_name, int maxTreeNameLen) {
1041
// maxTreeNameLen shall be the max len of the longest tree name (or -1 -> do not format)
1043
const char *result = 0;
1044
GBDATA *gb_tree = GBT_find_tree(gb_main, tree_name);
1047
GB_export_errorf("tree '%s' not found", tree_name);
1050
GBDATA *gb_nnodes = GB_entry(gb_tree, "nnodes");
1052
GB_export_errorf("nnodes not found in tree '%s'", tree_name);
1055
const char *sizeInfo = GBS_global_string("(%li:%i)", GB_read_int(gb_nnodes)+1, GB_read_security_write(gb_tree));
1056
GBDATA *gb_rem = GB_entry(gb_tree, "remark");
1059
if (maxTreeNameLen == -1) {
1060
result = GBS_global_string("%s %11s", tree_name, sizeInfo);
1061
len = strlen(tree_name);
1064
result = GBS_global_string("%-*s %11s", maxTreeNameLen, tree_name, sizeInfo);
1065
len = maxTreeNameLen;
1068
const char *remark = GB_read_char_pntr(gb_rem);
1069
const int remarkLen = 800;
1070
char *res2 = GB_give_other_buffer(remark, len+1+11+2+remarkLen+1);
1072
strcpy(res2, result);
1074
strncat(res2, remark, remarkLen);
1083
long GBT_size_of_tree(GBDATA *gb_main, const char *tree_name) {
1084
// return the number of inner nodes in binary tree (or -1 if unknown)
1087
// inner nodes in unrooted tree = size - 1
1090
GBDATA *gb_tree = GBT_find_tree(gb_main, tree_name);
1092
GBDATA *gb_nnodes = GB_entry(gb_tree, "nnodes");
1094
nnodes = GB_read_int(gb_nnodes);
1101
struct indexed_name {
1105
bool operator<(const indexed_name& other) const { return idx < other.idx; }
1108
void GBT_get_tree_names(ConstStrArray& names, GBDATA *gb_main, bool sorted) {
1109
// stores tree names in 'names'
1111
GBDATA *gb_treedata = GBT_get_tree_data(gb_main);
1112
ensure_trees_have_order(gb_treedata);
1114
long tree_count = GB_number_of_subentries(gb_treedata);
1116
names.reserve(tree_count);
1117
typedef std::set<indexed_name> ordered_trees;
1118
ordered_trees trees;
1123
for (GBDATA *gb_tree = GB_child(gb_treedata); gb_tree; gb_tree = GB_nextChild(gb_tree), ++t) {
1125
iname.name = GB_read_key_pntr(gb_tree);
1126
iname.idx = sorted ? get_tree_idx(gb_tree) : ++count;
1128
trees.insert(iname);
1132
if (tree_count != (long)trees.size()) { // there are duplicated "order" entries
1133
gb_assert(sorted); // should not happen in unsorted mode
1135
typedef std::set<int> ints;
1138
GBDATA *gb_first_tree = GB_child(gb_treedata);
1139
GBDATA *gb_tree = gb_first_tree;
1142
int idx = get_tree_idx(gb_tree);
1143
if (used_indices.find(idx) != used_indices.end()) { // duplicate order
1144
GB_ERROR error = reserve_tree_idx(gb_treedata, idx+1);
1145
if (!error) error = set_tree_idx(gb_tree, idx+1);
1146
if (error) GBK_terminatef("failed to fix tree-order (Reason: %s)", error);
1149
used_indices.clear();
1150
gb_tree = gb_first_tree;
1153
used_indices.insert(idx);
1154
gb_tree = GB_nextChild(gb_tree);
1157
GBT_get_tree_names(names, gb_main, sorted);
1161
for (ordered_trees::const_iterator t = trees.begin(); t != trees.end(); ++t) {
1166
NOT4PERL GB_ERROR GBT_move_tree(GBDATA *gb_moved_tree, GBT_ORDER_MODE mode, GBDATA *gb_target_tree) {
1167
// moves 'gb_moved_tree' next to 'gb_target_tree' (only changes tree-order)
1168
gb_assert(gb_moved_tree && gb_target_tree);
1170
GBDATA *gb_treedata = GB_get_father(gb_moved_tree);
1171
ensure_trees_have_order(gb_treedata);
1173
int target_idx = get_tree_idx(gb_target_tree);
1174
gb_assert(target_idx);
1176
if (mode == GBT_BEHIND) target_idx++;
1178
GB_ERROR error = reserve_tree_idx(gb_treedata, target_idx);
1179
if (!error) error = set_tree_idx(gb_moved_tree, target_idx);
1184
static GBDATA *get_source_and_check_target_tree(GBDATA *gb_main, const char *source_tree, const char *dest_tree, GB_ERROR& error) {
1185
GBDATA *gb_source_tree = NULL;
1187
error = GBT_check_tree_name(source_tree);
1188
if (!error) error = GBT_check_tree_name(dest_tree);
1190
if (error && strcmp(source_tree, NO_TREE_SELECTED) == 0) {
1191
error = "No tree selected";
1194
if (!error && strcmp(source_tree, dest_tree) == 0) error = "source- and dest-tree are the same";
1197
gb_source_tree = GBT_find_tree(gb_main, source_tree);
1198
if (!gb_source_tree) error = GBS_global_string("tree '%s' not found", source_tree);
1200
GBDATA *gb_dest_tree = GBT_find_tree(gb_main, dest_tree);
1202
error = GBS_global_string("tree '%s' already exists", dest_tree);
1203
gb_source_tree = NULL;
1208
gb_assert(contradicted(error, gb_source_tree));
1209
return gb_source_tree;
1212
static GBDATA *copy_tree_container(GBDATA *gb_source_tree, const char *newName, GB_ERROR& error) {
1213
GBDATA *gb_treedata = GB_get_father(gb_source_tree);
1214
GBDATA *gb_dest_tree = GB_create_container(gb_treedata, newName);
1216
if (!gb_dest_tree) error = GB_await_error();
1217
else error = GB_copy(gb_dest_tree, gb_source_tree);
1219
gb_assert(contradicted(error, gb_dest_tree));
1220
return gb_dest_tree;
1223
GB_ERROR GBT_copy_tree(GBDATA *gb_main, const char *source_name, const char *dest_name) {
1225
GBDATA *gb_source_tree = get_source_and_check_target_tree(gb_main, source_name, dest_name, error);
1227
if (gb_source_tree) {
1228
GBDATA *gb_dest_tree = copy_tree_container(gb_source_tree, dest_name, error);
1230
int source_idx = get_tree_idx(gb_source_tree);
1231
int dest_idx = source_idx+1;
1233
error = reserve_tree_idx(GB_get_father(gb_dest_tree), dest_idx);
1234
if (!error) error = set_tree_idx(gb_dest_tree, dest_idx);
1241
GB_ERROR GBT_rename_tree(GBDATA *gb_main, const char *source_name, const char *dest_name) {
1243
GBDATA *gb_source_tree = get_source_and_check_target_tree(gb_main, source_name, dest_name, error);
1245
if (gb_source_tree) {
1246
GBDATA *gb_dest_tree = copy_tree_container(gb_source_tree, dest_name, error);
1247
if (gb_dest_tree) error = GB_delete(gb_source_tree);
1253
static GB_CSTR *fill_species_name_array(GB_CSTR *current, const GBT_TREE *tree) {
1254
if (tree->is_leaf) {
1255
current[0] = tree->name;
1258
current = fill_species_name_array(current, tree->leftson);
1259
current = fill_species_name_array(current, tree->rightson);
1263
GB_CSTR *GBT_get_names_of_species_in_tree(const GBT_TREE *tree, size_t *count) {
1264
/* creates an array of all species names in a tree,
1265
* The names are not allocated (so they may change as side effect of renaming species) */
1267
size_t size = GBT_count_leafs(tree);
1268
GB_CSTR *result = (GB_CSTR *)GB_calloc(sizeof(char *), size + 1);
1270
IF_ASSERTION_USED(GB_CSTR *check =) fill_species_name_array(result, tree);
1271
gb_assert(check - size == result);
1273
if (count) *count = size;
1278
static void tree2newick(const GBT_TREE *tree, GBS_strstruct& out, NewickFormat format) {
1280
if (tree->is_leaf) {
1281
out.cat(tree->name);
1285
tree2newick(tree->leftson, out, format);
1287
tree2newick(tree->rightson, out, format);
1290
if (format & (nGROUP|nREMARK)) {
1291
const char *remark = format&nREMARK ? tree->get_remark() : NULL;
1292
const char *group = format&nGROUP ? tree->name : NULL;
1294
if (remark || group) {
1298
if (group) out.put(':');
1300
if (group) out.cat(group);
1306
if (format&nLENGTH && !tree->is_root_node()) {
1308
out.nprintf(10, "%5.3f", tree->get_branchlength());
1312
char *GBT_tree_2_newick(const GBT_TREE *tree, NewickFormat format) {
1313
GBS_strstruct out(1000);
1314
if (tree) tree2newick(tree, out, format);
1316
return out.release();
1320
// --------------------------------------------------------------------------------
1323
#include <test_unit.h>
1325
static const char *getTreeOrder(GBDATA *gb_main) {
1326
ConstStrArray names;
1327
GBT_get_tree_names(names, gb_main, true);
1329
char *joined = GBT_join_names(names, '|');
1330
char *size_and_names = GBS_global_string_copy("%zu:%s", names.size(), joined);
1333
RETURN_LOCAL_ALLOC(size_and_names);
1336
void TEST_tree_names() {
1337
TEST_EXPECT_ERROR_CONTAINS(GBT_check_tree_name(""), "not a valid treename");
1338
TEST_EXPECT_ERROR_CONTAINS(GBT_check_tree_name("not_a_treename"), "not a valid treename");
1339
TEST_EXPECT_ERROR_CONTAINS(GBT_check_tree_name("tree_bad.dot"), "not a valid treename");
1341
TEST_EXPECT_NO_ERROR(GBT_check_tree_name("tree_")); // ugly but ok
1342
TEST_EXPECT_NO_ERROR(GBT_check_tree_name("tree_ok"));
1345
void TEST_tree_contraints() {
1347
const int MIN_LEAFS = 2;
1349
TEST_EXPECT_EQUAL(leafs_2_nodes(MIN_LEAFS, ROOTED), 3);
1350
TEST_EXPECT_EQUAL(leafs_2_nodes(MIN_LEAFS, UNROOTED), 2);
1351
TEST_EXPECT_EQUAL(leafs_2_edges(MIN_LEAFS, ROOTED), 2);
1352
TEST_EXPECT_EQUAL(leafs_2_edges(MIN_LEAFS, UNROOTED), 1);
1354
TEST_EXPECT_EQUAL(MIN_LEAFS, nodes_2_leafs(3, ROOTED)); // test minimum (3 nodes rooted)
1355
TEST_EXPECT_EQUAL(MIN_LEAFS, nodes_2_leafs(2, UNROOTED)); // test minimum (2 nodes unrooted)
1357
TEST_EXPECT_EQUAL(MIN_LEAFS, edges_2_leafs(2, ROOTED)); // test minimum (2 edges rooted)
1358
TEST_EXPECT_EQUAL(MIN_LEAFS, edges_2_leafs(1, UNROOTED)); // test minimum (1 edge unrooted)
1360
// test inverse functions:
1361
for (int i = 3; i<=7; ++i) {
1362
// test "leaf->XXX" and back conversions (any number of leafs is possible)
1363
TEST_EXPECT_EQUAL(i, nodes_2_leafs(leafs_2_nodes(i, ROOTED), ROOTED));
1364
TEST_EXPECT_EQUAL(i, nodes_2_leafs(leafs_2_nodes(i, UNROOTED), UNROOTED));
1366
TEST_EXPECT_EQUAL(i, edges_2_leafs(leafs_2_edges(i, ROOTED), ROOTED));
1367
TEST_EXPECT_EQUAL(i, edges_2_leafs(leafs_2_edges(i, UNROOTED), UNROOTED));
1371
TEST_EXPECT_EQUAL(i, leafs_2_nodes(nodes_2_leafs(i, ROOTED), ROOTED)); // rooted trees only contain odd numbers of nodes
1372
TEST_EXPECT_EQUAL(i, leafs_2_edges(edges_2_leafs(i, UNROOTED), UNROOTED)); // unrooted trees only contain odd numbers of edges
1375
TEST_EXPECT_EQUAL(i, leafs_2_nodes(nodes_2_leafs(i, UNROOTED), UNROOTED)); // unrooted trees only contain even numbers of nodes
1376
TEST_EXPECT_EQUAL(i, leafs_2_edges(edges_2_leafs(i, ROOTED), ROOTED)); // rooted trees only contain even numbers of edges
1379
// test adding a leaf adds two nodes:
1381
TEST_EXPECT_EQUAL(leafs_2_nodes(added, ROOTED)-leafs_2_nodes(i, ROOTED), 2);
1382
TEST_EXPECT_EQUAL(leafs_2_nodes(added, UNROOTED)-leafs_2_nodes(i, UNROOTED), 2);
1388
void TEST_copy_rename_delete_tree_order() {
1390
GBDATA *gb_main = GB_open("TEST_trees.arb", "r");
1393
GB_transaction ta(gb_main);
1396
TEST_EXPECT_NULL(GBT_get_tree_name(NULL));
1398
TEST_EXPECT_EQUAL(GBT_name_of_largest_tree(gb_main), "tree_removal");
1400
TEST_EXPECT_EQUAL(GBT_get_tree_name(GBT_find_top_tree(gb_main)), "tree_test");
1401
TEST_EXPECT_EQUAL(GBT_name_of_bottom_tree(gb_main), "tree_removal");
1403
long inner_nodes = GBT_size_of_tree(gb_main, "tree_nj_bs");
1404
TEST_EXPECT_EQUAL(inner_nodes, 5);
1405
TEST_EXPECT_EQUAL(GBT_tree_info_string(gb_main, "tree_nj_bs", -1), "tree_nj_bs (6:0) PRG=dnadist CORR=none FILTER=none PKG=ARB");
1406
TEST_EXPECT_EQUAL(GBT_tree_info_string(gb_main, "tree_nj_bs", 20), "tree_nj_bs (6:0) PRG=dnadist CORR=none FILTER=none PKG=ARB");
1409
GBT_TREE *tree = GBT_read_tree(gb_main, "tree_nj_bs", GBT_TREE_NodeFactory());
1411
TEST_REJECT_NULL(tree);
1413
size_t leaf_count = GBT_count_leafs(tree);
1415
size_t species_count;
1416
GB_CSTR *species = GBT_get_names_of_species_in_tree(tree, &species_count);
1419
for (int i = 0; species[i]; ++i) species2.put(strdup(species[i]));
1421
TEST_EXPECT_EQUAL(species_count, leaf_count);
1422
TEST_EXPECT_EQUAL(long(species_count), inner_nodes+1);
1425
char *joined = GBT_join_names(species2, '*');
1426
TEST_EXPECT_EQUAL(joined, "CloButyr*CloButy2*CorGluta*CorAquat*CurCitre*CytAquat");
1432
TEST_EXPECT_NEWICK(nSIMPLE, tree, "(CloButyr,(CloButy2,((CorGluta,(CorAquat,CurCitre)),CytAquat)));");
1433
TEST_EXPECT_NEWICK(nSIMPLE, NULL, ";");
1438
TEST_EXPECT_EQUAL(GBT_existing_tree(gb_main, "tree_nj_bs"), "tree_nj_bs");
1439
TEST_EXPECT_EQUAL(GBT_existing_tree(gb_main, "tree_nosuch"), "tree_test");
1442
// changing tree order
1444
TEST_EXPECT_EQUAL(getTreeOrder(gb_main), "5:tree_test|tree_tree2|tree_nj|tree_nj_bs|tree_removal");
1446
GBDATA *gb_test = GBT_find_tree(gb_main, "tree_test");
1447
GBDATA *gb_tree2 = GBT_find_tree(gb_main, "tree_tree2");
1448
GBDATA *gb_nj = GBT_find_tree(gb_main, "tree_nj");
1449
GBDATA *gb_nj_bs = GBT_find_tree(gb_main, "tree_nj_bs");
1450
GBDATA *gb_removal = GBT_find_tree(gb_main, "tree_removal");
1452
TEST_EXPECT_NO_ERROR(GBT_move_tree(gb_test, GBT_BEHIND, gb_removal)); // move to bottom
1453
TEST_EXPECT_EQUAL(getTreeOrder(gb_main), "5:tree_tree2|tree_nj|tree_nj_bs|tree_removal|tree_test");
1455
TEST_EXPECT_EQUAL(GBT_tree_behind(gb_tree2), gb_nj);
1456
TEST_EXPECT_EQUAL(GBT_tree_behind(gb_nj), gb_nj_bs);
1457
TEST_EXPECT_EQUAL(GBT_tree_behind(gb_nj_bs), gb_removal);
1458
TEST_EXPECT_EQUAL(GBT_tree_behind(gb_removal), gb_test);
1459
TEST_EXPECT_NULL(GBT_tree_behind(gb_test));
1461
TEST_EXPECT_NULL(GBT_tree_infrontof(gb_tree2));
1462
TEST_EXPECT_EQUAL(GBT_tree_infrontof(gb_nj), gb_tree2);
1463
TEST_EXPECT_EQUAL(GBT_tree_infrontof(gb_nj_bs), gb_nj);
1464
TEST_EXPECT_EQUAL(GBT_tree_infrontof(gb_removal), gb_nj_bs);
1465
TEST_EXPECT_EQUAL(GBT_tree_infrontof(gb_test), gb_removal);
1467
TEST_EXPECT_NO_ERROR(GBT_move_tree(gb_test, GBT_INFRONTOF, gb_tree2)); // move back to top
1468
TEST_EXPECT_EQUAL(getTreeOrder(gb_main), "5:tree_test|tree_tree2|tree_nj|tree_nj_bs|tree_removal");
1470
TEST_EXPECT_NO_ERROR(GBT_move_tree(gb_test, GBT_BEHIND, gb_tree2)); // move from top
1471
TEST_EXPECT_EQUAL(getTreeOrder(gb_main), "5:tree_tree2|tree_test|tree_nj|tree_nj_bs|tree_removal");
1473
TEST_EXPECT_NO_ERROR(GBT_move_tree(gb_removal, GBT_INFRONTOF, gb_nj)); // move from end
1474
TEST_EXPECT_EQUAL(getTreeOrder(gb_main), "5:tree_tree2|tree_test|tree_removal|tree_nj|tree_nj_bs");
1476
TEST_EXPECT_NO_ERROR(GBT_move_tree(gb_nj_bs, GBT_INFRONTOF, gb_nj_bs)); // noop
1477
TEST_EXPECT_EQUAL(getTreeOrder(gb_main), "5:tree_tree2|tree_test|tree_removal|tree_nj|tree_nj_bs");
1479
TEST_EXPECT_EQUAL(GBT_get_tree_name(GBT_find_top_tree(gb_main)), "tree_tree2");
1481
TEST_EXPECT_EQUAL(GBT_get_tree_name(GBT_find_next_tree(gb_removal)), "tree_nj");
1482
TEST_EXPECT_EQUAL(GBT_get_tree_name(GBT_find_next_tree(gb_nj_bs)), "tree_tree2"); // last -> first
1485
// check tree order is maintained by copy, rename and delete
1489
TEST_EXPECT_ERROR_CONTAINS(GBT_copy_tree(gb_main, "tree_nosuch", "tree_whatever"), "tree 'tree_nosuch' not found");
1490
TEST_EXPECT_ERROR_CONTAINS(GBT_copy_tree(gb_main, "tree_test", "tree_test"), "source- and dest-tree are the same");
1491
TEST_EXPECT_ERROR_CONTAINS(GBT_copy_tree(gb_main, "tree_tree2", "tree_test"), "tree 'tree_test' already exists");
1493
TEST_EXPECT_NO_ERROR(GBT_copy_tree(gb_main, "tree_test", "tree_test_copy"));
1494
TEST_REJECT_NULL(GBT_find_tree(gb_main, "tree_test_copy"));
1495
TEST_EXPECT_EQUAL(getTreeOrder(gb_main), "6:tree_tree2|tree_test|tree_test_copy|tree_removal|tree_nj|tree_nj_bs");
1498
TEST_EXPECT_NO_ERROR(GBT_rename_tree(gb_main, "tree_nj", "tree_renamed_nj"));
1499
TEST_REJECT_NULL(GBT_find_tree(gb_main, "tree_renamed_nj"));
1500
TEST_EXPECT_EQUAL(getTreeOrder(gb_main), "6:tree_tree2|tree_test|tree_test_copy|tree_removal|tree_renamed_nj|tree_nj_bs");
1502
TEST_EXPECT_NO_ERROR(GBT_rename_tree(gb_main, "tree_tree2", "tree_renamed_tree2"));
1503
TEST_EXPECT_EQUAL(getTreeOrder(gb_main), "6:tree_renamed_tree2|tree_test|tree_test_copy|tree_removal|tree_renamed_nj|tree_nj_bs");
1505
TEST_EXPECT_NO_ERROR(GBT_rename_tree(gb_main, "tree_test_copy", "tree_renamed_test_copy"));
1506
TEST_EXPECT_EQUAL(getTreeOrder(gb_main), "6:tree_renamed_tree2|tree_test|tree_renamed_test_copy|tree_removal|tree_renamed_nj|tree_nj_bs");
1510
GBDATA *gb_nj_bs = GBT_find_tree(gb_main, "tree_nj_bs");
1511
GBDATA *gb_renamed_nj = GBT_find_tree(gb_main, "tree_renamed_nj");
1512
GBDATA *gb_renamed_test_copy = GBT_find_tree(gb_main, "tree_renamed_test_copy");
1513
GBDATA *gb_renamed_tree2 = GBT_find_tree(gb_main, "tree_renamed_tree2");
1514
GBDATA *gb_test = GBT_find_tree(gb_main, "tree_test");
1515
GBDATA *gb_removal = GBT_find_tree(gb_main, "tree_removal");
1517
TEST_EXPECT_NO_ERROR(GB_delete(gb_renamed_tree2));
1518
TEST_EXPECT_NO_ERROR(GB_delete(gb_renamed_test_copy));
1519
TEST_EXPECT_NO_ERROR(GB_delete(gb_renamed_nj));
1520
TEST_EXPECT_NO_ERROR(GB_delete(gb_removal));
1522
// .. two trees left
1524
TEST_EXPECT_EQUAL(getTreeOrder(gb_main), "2:tree_test|tree_nj_bs");
1526
TEST_EXPECT_EQUAL(GBT_find_largest_tree(gb_main), gb_test);
1527
TEST_EXPECT_EQUAL(GBT_find_top_tree(gb_main), gb_test);
1528
TEST_EXPECT_EQUAL(GBT_find_bottom_tree(gb_main), gb_nj_bs);
1530
TEST_EXPECT_EQUAL(GBT_find_next_tree(gb_test), gb_nj_bs);
1531
TEST_EXPECT_EQUAL(GBT_find_next_tree(gb_test), gb_nj_bs);
1532
TEST_EXPECT_EQUAL(GBT_find_next_tree(gb_nj_bs), gb_test);
1534
TEST_EXPECT_NULL (GBT_tree_infrontof(gb_test));
1535
TEST_EXPECT_EQUAL(GBT_tree_behind (gb_test), gb_nj_bs);
1537
TEST_EXPECT_EQUAL(GBT_tree_infrontof(gb_nj_bs), gb_test);
1538
TEST_EXPECT_NULL (GBT_tree_behind (gb_nj_bs));
1540
TEST_EXPECT_NO_ERROR(GBT_move_tree(gb_test, GBT_BEHIND, gb_nj_bs)); // move to bottom
1541
TEST_EXPECT_EQUAL(getTreeOrder(gb_main), "2:tree_nj_bs|tree_test");
1542
TEST_EXPECT_NO_ERROR(GBT_move_tree(gb_test, GBT_INFRONTOF, gb_nj_bs)); // move to top
1543
TEST_EXPECT_EQUAL(getTreeOrder(gb_main), "2:tree_test|tree_nj_bs");
1545
TEST_EXPECT_NO_ERROR(GB_delete(gb_nj_bs));
1549
TEST_EXPECT_EQUAL(getTreeOrder(gb_main), "1:tree_test");
1551
TEST_EXPECT_EQUAL(GBT_find_largest_tree(gb_main), gb_test);
1552
TEST_EXPECT_EQUAL(GBT_find_top_tree(gb_main), gb_test);
1553
TEST_EXPECT_EQUAL(GBT_find_bottom_tree(gb_main), gb_test);
1555
TEST_EXPECT_NULL(GBT_find_next_tree(gb_test)); // no other tree left
1556
TEST_EXPECT_NULL(GBT_tree_behind(gb_test));
1557
TEST_EXPECT_NULL(GBT_tree_infrontof(gb_test));
1559
TEST_EXPECT_NO_ERROR(GB_delete(gb_test));
1563
TEST_EXPECT_EQUAL(getTreeOrder(gb_main), "0:");
1565
TEST_EXPECT_NULL(GBT_find_tree(gb_main, "tree_test"));
1566
TEST_EXPECT_NULL(GBT_existing_tree(gb_main, "tree_whatever"));
1567
TEST_EXPECT_NULL(GBT_find_largest_tree(gb_main));
1574
void TEST_tree_remove_leafs() {
1576
GBDATA *gb_main = GB_open("TEST_trees.arb", "r");
1579
GBT_TreeRemoveType tested_modes[] = {
1581
GBT_REMOVE_UNMARKED,
1586
const char *org_topo = "((CloInnoc:0.371,(CloTyrob:0.009,(CloTyro2:0.017,(CloTyro3:1.046,CloTyro4:0.061):0.026):0.017):0.274):0.029,(CloBifer:0.388,((CloCarni:0.120,CurCitre:0.058):1.000,((CloPaste:0.179,(Zombie1:0.120,(CloButy2:0.009,CloButyr:0.000):0.564):0.010):0.131,(CytAquat:0.711,(CelBiazo:0.059,(CorGluta:0.522,(CorAquat:0.084,Zombie2:0.058):0.103):0.054):0.207):0.162):0.124):0.124):0.029);";
1587
const char *rem_marked_topo = "((CloInnoc:0.371,(CloTyrob:0.009,(CloTyro2:0.017,(CloTyro3:1.046,CloTyro4:0.061):0.026):0.017):0.274):0.029,(CloBifer:0.388,(CloCarni:1.000,((CloPaste:0.179,Zombie1:0.010):0.131,(CelBiazo:0.059,Zombie2:0.054):0.162):0.124):0.124):0.029);";
1588
const char *rem_unmarked_topo = "(CurCitre:1.000,((Zombie1:0.120,(CloButy2:0.009,CloButyr:0.000):0.564):0.131,(CytAquat:0.711,(CorGluta:0.522,(CorAquat:0.084,Zombie2:0.058):0.103):0.207):0.162):0.124);";
1589
const char *rem_zombies_topo = "((CloInnoc:0.371,(CloTyrob:0.009,(CloTyro2:0.017,(CloTyro3:1.046,CloTyro4:0.061):0.026):0.017):0.274):0.029,(CloBifer:0.388,((CloCarni:0.120,CurCitre:0.058):1.000,((CloPaste:0.179,(CloButy2:0.009,CloButyr:0.000):0.010):0.131,(CytAquat:0.711,(CelBiazo:0.059,(CorGluta:0.522,CorAquat:0.103):0.054):0.207):0.162):0.124):0.124):0.029);";
1590
const char *kept_marked_topo = "(CurCitre:1.000,((CloButy2:0.009,CloButyr:0.000):0.131,(CytAquat:0.711,(CorGluta:0.522,CorAquat:0.103):0.207):0.162):0.124);";
1592
const char *kept_zombies_topo = "(Zombie1:0.131,Zombie2:0.162);";
1593
const char *kept_zombies_broken_topo = "Zombie2;";
1595
const char *empty_topo = ";";
1597
GB_transaction ta(gb_main);
1598
for (unsigned mode = 0; mode<ARRAY_ELEMS(tested_modes); ++mode) {
1599
GBT_TreeRemoveType what = tested_modes[mode];
1601
for (int linked = 0; linked<=1; ++linked) {
1602
TEST_ANNOTATE(GBS_global_string("mode=%u linked=%i", mode, linked));
1604
GBT_TREE *tree = GBT_read_tree(gb_main, "tree_removal", GBT_TREE_NodeFactory());
1605
bool once = mode == 0 && linked == 0;
1611
TEST_EXPECT_NO_ERROR(GBT_link_tree(tree, gb_main, false, &zombies, &duplicates));
1613
TEST_EXPECT_EQUAL(zombies, 2);
1614
TEST_EXPECT_EQUAL(duplicates, 0);
1617
if (once) TEST_EXPECT_NEWICK(nLENGTH, tree, org_topo);
1619
int removedCount = 0;
1620
int groupsRemovedCount = 0;
1622
tree = GBT_remove_leafs(tree, what, NULL, &removedCount, &groupsRemovedCount);
1625
GBT_TreeRemoveType what_next = what;
1628
case GBT_REMOVE_MARKED:
1629
TEST_EXPECT_EQUAL(removedCount, 6);
1630
TEST_EXPECT_EQUAL(groupsRemovedCount, 0);
1631
TEST_EXPECT_NEWICK(nLENGTH, tree, rem_marked_topo);
1632
what_next = GBT_REMOVE_UNMARKED;
1634
case GBT_REMOVE_UNMARKED:
1635
TEST_EXPECT_EQUAL(removedCount, 9);
1636
TEST_EXPECT_EQUAL(groupsRemovedCount, 1);
1637
TEST_EXPECT_NEWICK(nLENGTH, tree, rem_unmarked_topo);
1638
what_next = GBT_REMOVE_MARKED;
1640
case GBT_REMOVE_ZOMBIES:
1641
TEST_EXPECT_EQUAL(removedCount, 2);
1642
TEST_EXPECT_EQUAL(groupsRemovedCount, 0);
1643
TEST_EXPECT_NEWICK(nLENGTH, tree, rem_zombies_topo);
1645
case GBT_KEEP_MARKED:
1646
TEST_EXPECT_EQUAL(removedCount, 11);
1647
TEST_EXPECT_EQUAL(groupsRemovedCount, 1);
1648
TEST_EXPECT_NEWICK(nLENGTH, tree, kept_marked_topo);
1649
what_next = GBT_REMOVE_MARKED;
1653
if (what_next != what) {
1655
tree = GBT_remove_leafs(tree, what_next, NULL, &removedCount, &groupsRemovedCount);
1658
case GBT_REMOVE_MARKED: // + GBT_REMOVE_UNMARKED
1659
TEST_EXPECT_EQUAL(removedCount, 16);
1660
TEST_EXPECT_EQUAL(groupsRemovedCount, 1);
1661
TEST_EXPECT_NEWICK__BROKEN(nLENGTH, tree, kept_zombies_topo);
1662
TEST_EXPECT_NEWICK(nLENGTH, tree, kept_zombies_broken_topo); // @@@ invalid topology (single leaf)
1664
case GBT_REMOVE_UNMARKED: // + GBT_REMOVE_MARKED
1665
TEST_EXPECT_EQUAL(removedCount, 15);
1666
TEST_EXPECT_EQUAL(groupsRemovedCount, 1);
1667
TEST_EXPECT_NEWICK(nLENGTH, tree, kept_zombies_topo);
1669
case GBT_KEEP_MARKED: // + GBT_REMOVE_MARKED
1670
TEST_EXPECT_EQUAL(removedCount, 17);
1671
TEST_EXPECT_EQUAL__BROKEN(groupsRemovedCount, 2, 1); // @@@ expect that all groups have been removed!
1672
TEST_EXPECT_EQUAL(groupsRemovedCount, 1);
1673
TEST_EXPECT_NEWICK(nLENGTH, tree, empty_topo);
1683
case GBT_REMOVE_MARKED:
1684
case GBT_REMOVE_UNMARKED:
1685
TEST_EXPECT_EQUAL(removedCount, 0);
1686
TEST_EXPECT_EQUAL(groupsRemovedCount, 0);
1687
TEST_EXPECT_NEWICK(nLENGTH, tree, org_topo);
1689
case GBT_REMOVE_ZOMBIES:
1690
case GBT_KEEP_MARKED:
1691
TEST_EXPECT_EQUAL(removedCount, 17);
1692
TEST_EXPECT_EQUAL(groupsRemovedCount, 2);
1693
TEST_EXPECT_NEWICK(nLENGTH, tree, empty_topo);
1707
#endif // UNIT_TESTS