1
// =============================================================== //
3
// File : NT_tree_cmp.cxx //
6
// Institute of Microbiology (Technical University Munich) //
7
// http://www.arb-home.de/ //
9
// =============================================================== //
11
#include "NT_species_set.h"
14
#include <arb_progress.h>
20
AWT_species_set_root::AWT_species_set_root(GBDATA *gb_maini, long nspeciesi, arb_progress *progress_) {
21
memset((char *)this, 0, sizeof(*this));
25
sets = (AWT_species_set **)GB_calloc(sizeof(AWT_species_set *), (size_t)leafs_2_nodes(nspecies, ROOTED));
27
for (int i=0; i<256; i++) {
30
while (j) { // count bits
36
species_hash = GBS_create_hash(nspecies, GB_IGNORE_CASE);
40
AWT_species_set_root::~AWT_species_set_root() {
41
for (int i=0; i<nsets; i++) delete sets[i];
43
GBS_free_hash(species_hash);
46
void AWT_species_set_root::add(const char *species_name) {
47
if (GBS_read_hash(species_hash, species_name)) {
48
aw_message(GBS_global_string("Warning: Species '%s' is found more than once in tree", species_name));
51
GBS_write_hash(species_hash, species_name, species_counter++);
55
void AWT_species_set_root::add(AWT_species_set *set) {
56
nt_assert(nsets<nspecies*2);
60
AWT_species_set *AWT_species_set_root::search_best_match(const AWT_species_set *set, long& best_cost) {
61
// returns best match for 'set'
62
// sets 'best_cost' to minimum mismatches
64
AWT_species_set *result = 0;
67
unsigned char *sbs = set->bitstring;
68
for (long i=nsets-1; i>=0; i--) {
70
unsigned char *rsb = sets[i]->bitstring;
72
for (long j=bitstring_bytes()-1; j>=0; j--) {
73
sum += diff_bits[ sbs[j] ^ rsb[j] ];
75
if (sum > nspecies/2) sum = nspecies - sum; // take always the minimum
76
if (sum < best_cost) {
84
int AWT_species_set_root::search_and_remember_best_match_and_log_errors(const AWT_species_set *set, FILE *log_file) {
85
// set's best_cost & best_node (of best match for 'set' found in other tree)
86
// returns the number of mismatches (plus a small penalty for missing species)
88
// When moving node info, 'set' represents a subtree of the source tree.
89
// When comparing topologies, 'set' represents a subtree of the destination tree.
92
AWT_species_set *bs = search_best_match(set, net_cost);
94
double best_cost = net_cost + set->unfound_species_count * 0.0001;
95
if (best_cost < bs->best_cost) {
96
bs->best_cost = best_cost;
97
bs->best_node = set->node;
101
fprintf(log_file, "Group '%s' not placed optimal (%li errors)\n",
109
void AWT_species_set::init(AP_tree *nodei, const AWT_species_set_root *ssr) {
110
memset((char *)this, 0, sizeof(*this));
112
bitstring = (unsigned char *)GB_calloc(sizeof(char), ssr->bitstring_longs()*sizeof(long));
114
best_cost = 0x7fffffff;
117
AWT_species_set::AWT_species_set(AP_tree *nodei, const AWT_species_set_root *ssr, const char *species_name) {
120
long species_index = GBS_read_hash(ssr->species_hash, species_name);
122
bitstring[species_index/8] |= 1 << (species_index % 8);
125
unfound_species_count = 1;
129
AWT_species_set::AWT_species_set(AP_tree *nodei, const AWT_species_set_root *ssr, const AWT_species_set *l, const AWT_species_set *r) {
132
const long *lbits = (const long *)l->bitstring;
133
const long *rbits = (const long *)r->bitstring;
134
long *dest = (long *)bitstring;
136
for (long j = ssr->bitstring_longs()-1; j>=0; j--) {
137
dest[j] = lbits[j] | rbits[j];
139
unfound_species_count = l->unfound_species_count + r->unfound_species_count;
142
AWT_species_set::~AWT_species_set() {
146
AWT_species_set *AWT_species_set_root::move_tree_2_ssr(AP_tree *node) {
148
// Warning: confusing resource handling:
149
// - leafs are returned "NOT owned by anybody"
150
// - inner nodes are added to and owned by this->sets
153
this->add(node->name);
154
ss = new AWT_species_set(node, this, node->name);
155
nt_assert(ss->is_leaf_set());
158
AWT_species_set *ls = move_tree_2_ssr(node->get_leftson());
159
AWT_species_set *rs = move_tree_2_ssr(node->get_rightson());
161
ss = new AWT_species_set(node, this, ls, rs);
163
nt_assert(!ss->is_leaf_set());
165
if (ls->is_leaf_set()) delete ls;
166
if (rs->is_leaf_set()) delete rs;
171
AWT_species_set *AWT_species_set_root::find_best_matches_info(AP_tree *node, FILE *log, bool compare_node_info) {
172
/* Go through all node of the source tree and search for the best
173
* matching node in dest_tree (meaning searching ssr->sets)
174
* If a match is found, set ssr->sets to this match.
177
AWT_species_set *ss = NULL;
179
ss = new AWT_species_set(node, this, node->name);
182
AWT_species_set *ls = find_best_matches_info(node->get_leftson(), log, compare_node_info);
183
AWT_species_set *rs = ls ? find_best_matches_info(node->get_rightson(), log, compare_node_info) : NULL;
186
ss = new AWT_species_set(node, this, ls, rs);
187
if (compare_node_info) {
188
int mismatches = search_and_remember_best_match_and_log_errors(ss, log);
189
// the #-sign is important (otherwise TREE_write_Newick will not work correctly; interference with bootstrap values!)
190
char *new_remark = mismatches ? GBS_global_string_copy("# %i", mismatches) : NULL;
191
node->use_as_remark(new_remark);
195
search_and_remember_best_match_and_log_errors(ss, log);
204
if (progress->aborted()) {
212
GB_ERROR AWT_species_set_root::copy_node_information(FILE *log, bool delete_old_nodes, bool nodes_with_marked_only) {
213
GB_ERROR error = NULL;
215
if (log) fputs("\nDetailed group changes:\n\n", log);
217
for (long j=this->nsets-1; j>=0 && !error; j--) {
218
AWT_species_set *cset = this->sets[j];
220
char *old_group_name = NULL;
221
bool insert_new_node = cset->best_node && cset->best_node->name;
223
if (nodes_with_marked_only && insert_new_node) {
224
if (!cset->node->contains_marked_species()) insert_new_node = false;
227
if (cset->node->gb_node && (delete_old_nodes || insert_new_node)) { // There is already a node, delete old
228
if (cset->node->name == 0) {
229
GBDATA *gb_name = GB_entry(cset->node->gb_node, "group_name");
231
cset->node->name = GB_read_string(gb_name);
234
cset->node->name = strdup("<gb_node w/o name>");
238
old_group_name = strdup(cset->node->name); // store old_group_name to rename new inserted node
240
error = GB_delete(cset->node->gb_node);
242
cset->node->gb_node = 0;
243
freenull(cset->node->name);
248
if (insert_new_node) {
249
cset->node->gb_node = GB_create_container(cset->node->get_tree_root()->get_gb_tree(), "node");
250
error = GB_copy(cset->node->gb_node, cset->best_node->gb_node);
252
GBDATA *gb_name = GB_entry(cset->node->gb_node, "group_name");
255
char *best_group_name = GB_read_string(gb_name);
256
if (old_group_name) {
257
if (!delete_old_nodes) {
258
if (strcmp(old_group_name, best_group_name) != 0) { // old and new name differ
259
char *new_group_name = GBS_global_string_copy("%s [was: %s]", best_group_name, old_group_name);
260
GB_write_string(gb_name, new_group_name);
261
if (log) fprintf(log, "Destination group '%s' overwritten by '%s'\n", old_group_name, new_group_name);
262
free(new_group_name);
265
if (log) fprintf(log, "Group '%s' remains unchanged\n", old_group_name);
270
if (strcmp(old_group_name, best_group_name) == 0) {
271
fprintf(log, "Group '%s' remains unchanged\n", old_group_name);
274
fprintf(log, "Destination group '%s' overwritten by '%s'\n", old_group_name, best_group_name);
280
if (log) fprintf(log, "Group '%s' inserted\n", best_group_name);
282
free(best_group_name);
287
if (old_group_name && log) fprintf(log, "Destination group '%s' removed\n", old_group_name);
290
free(old_group_name);
295
void AWT_species_set_root::finish(GB_ERROR& error) {
296
if (!error) error = progress->error_if_aborted();
300
GB_ERROR AWT_move_info(GBDATA *gb_main, const char *tree_source, const char *tree_dest, const char *log_file, TreeInfoMode mode, bool nodes_with_marked_only) {
304
nt_assert(contradicted(mode == TREE_INFO_COMPARE, log_file));
306
if (mode == TREE_INFO_COMPARE) {
307
// info is written into 'tree_source'
308
// (but we want to modify destination tree - like 'mode node info' does)
309
std::swap(tree_source, tree_dest);
313
nt_assert(mode == TREE_INFO_COPY || mode == TREE_INFO_ADD);
314
log = fopen(log_file, "w");
317
"LOGFILE: %s node info\n"
319
" Source tree '%s'\n"
320
"Destination tree '%s'\n"
322
mode == TREE_INFO_COPY ? "Copying" : "Adding",
323
tree_source, tree_dest);
326
GB_begin_transaction(gb_main);
328
AP_tree_root rsource(new AliView(gb_main), new AP_TreeNodeFactory, NULL, false);
329
AP_tree_root rdest (new AliView(gb_main), new AP_TreeNodeFactory, NULL, false);
330
AP_tree_root& rsave = (mode == TREE_INFO_COMPARE) ? rsource : rdest;
331
arb_progress progress("Comparing Topologies");
333
error = rsource.loadFromDB(tree_source);
334
if (!error) error = rsource.linkToDB(0, 0);
336
error = rdest.loadFromDB(tree_dest);
337
if (!error) error = rdest.linkToDB(0, 0);
339
AP_tree *source = rsource.get_root_node();
340
AP_tree *dest = rdest.get_root_node();
342
long nspecies = dest->count_leafs();
343
long source_leafs = source->count_leafs();
344
long source_nodes = leafs_2_nodes(source_leafs, ROOTED);
346
arb_progress compare_progress(source_nodes);
347
compare_progress.subtitle("Comparing both trees");
349
AWT_species_set_root *ssr = new AWT_species_set_root(gb_main, nspecies, &compare_progress);
351
ssr->move_tree_2_ssr(dest);
353
if (source_leafs < 3) error = GB_export_error("Destination tree has less than 3 species");
355
AWT_species_set *root_setl = ssr->find_best_matches_info(source->get_leftson(), log, mode == TREE_INFO_COMPARE);
356
AWT_species_set *root_setr = root_setl ? ssr->find_best_matches_info(source->get_rightson(), log, mode == TREE_INFO_COMPARE) : NULL;
359
if (mode != TREE_INFO_COMPARE) {
360
compare_progress.subtitle("Copying node information");
361
ssr->copy_node_information(log, mode == TREE_INFO_COPY, nodes_with_marked_only);
365
AWT_species_set *new_root_setl = ssr->search_best_match(root_setl, dummy);
366
AWT_species_set *new_root_setr = ssr->search_best_match(root_setr, dummy);
367
AP_tree *new_rootl = new_root_setl->node;
368
AP_tree *new_rootr = new_root_setr->node;
370
new_rootl->set_root(); // set root correctly
371
new_rootr->set_root(); // set root correctly
373
compare_progress.subtitle("Saving trees");
375
AP_tree *saved_root = (mode == TREE_INFO_COMPARE) ? source : new_rootr->get_root_node();
376
error = GBT_overwrite_tree(rsave.get_gb_tree(), saved_root);
380
if (mode == TREE_INFO_COMPARE) {
381
entry = GBS_global_string_copy("Compared topology with %s", tree_dest);
384
const char *copiedOrAdded = mode == TREE_INFO_COPY ? "Copied" : "Added";
386
entry = GBS_global_string_copy("%s node info %sfrom %s",
388
nodes_with_marked_only ? "of marked " : "",
391
GBT_log_to_tree_remark(rsave.get_gb_tree(), entry);
406
if (error) fprintf(log, "\nError: %s\n", error); // write error to log as well
410
return GB_end_transaction(gb_main, error);