1
/* This module is desined to organize the data structure partitions
1
// ============================================================= //
3
// File : CT_part.cxx //
6
// Institute of Microbiology (Technical University Munich) //
7
// http://www.arb-home.de/ //
9
// ============================================================= //
11
/* This module is designed to organize the data structure partitions
2
12
partitions represent the edges of a tree */
3
/* the partitions are implemented as an array of longs */
4
/* Each leaf in a GBT-Tree is represented as one Bit in the Partition */
13
// the partitions are implemented as an array of longs
14
// Each leaf in a GBT-Tree is represented as one Bit in the Partition
16
#include "CT_part.hxx"
10
17
#include <arbdbt.h>
13
#include "CT_part.hxx"
15
PELEM cutmask; /* this mask is nessary to cut the not
16
needed bits from the last long */
17
int longs = 0, /* number of longs per part */
18
plen = 0; /* number of bits per part */
21
/** Function to initialize the variables above
22
@PARAMETER len number of bits the part should content
23
result: calculate cutmask, longs, plen */
24
void part_init(int len)
18
#include <arb_strarray.h>
20
#define BITS_PER_PELEM (sizeof(PELEM)*8)
22
#if defined(DUMP_PART_INIT) || defined(UNIT_TESTS)
23
static const char *readable_cutmask(PELEM mask) {
24
static char readable[BITS_PER_PELEM+1];
25
memset(readable, '0', BITS_PER_PELEM);
26
readable[BITS_PER_PELEM] = 0;
28
for (int b = BITS_PER_PELEM-1; b >= 0; --b) {
29
if (mask&1) readable[b] = '1';
36
PartitionSize::PartitionSize(const int len)
38
longs((((len + 7) / 8)+sizeof(PELEM)-1) / sizeof(PELEM)),
28
/* cutmask is nessary to cut unused bits */
29
j = 8 * sizeof(PELEM);
31
if(!j) j += 8 * sizeof(PELEM);
42
/*! Function to initialize the global variables above
43
* @param len number of bits the part should content
45
* result: calculate cutmask, longs, plen
48
int j = len % BITS_PER_PELEM;
49
if (!j) j += BITS_PER_PELEM;
51
for (int i=0; i<j; i++) {
37
longs = (((len +7) / 8)+sizeof(PELEM)-1) / sizeof(PELEM);
42
/** Testfunction to print a part
43
@PARAMETER p pointer to the part */
44
void part_print(PART *p)
49
for(i=0; i<longs; i++) {
51
for(j=0; k<plen && size_t(j)<sizeof(PELEM)*8; j++, k++) {
60
printf(":%.2f,%d%%: ", p->len, p->percent);
64
/** construct new part */
69
p = (PART *) getmem(sizeof(PART));
70
p->p = (PELEM *) getmem(longs * sizeof(PELEM));
77
@PARAMETER p partition */
78
void part_free(PART *p)
85
/** build a partion that totaly constited of 111111...1111 that is needed to
86
build the root of a specific ntree */
92
for(i=0; i<longs; i++) {
95
p->p[longs-1] &= cutmask;
100
/** set the bit of the part p at the position pos
103
void part_setbit(PART *p, int pos)
105
p->p[(pos / sizeof(PELEM) / 8)] |= (1 << (pos % (sizeof(PELEM)*8)));
110
/* test if the part son is possibly a son of the part father, a father defined
111
in this context as a part covers every bit of his son. needed in CT_ntree
114
int son(PART *son, PART *father)
118
for(i=0; i<longs; i++) {
119
if((son->p[i] & father->p[i]) != son->p[i]) return 0;
125
/** test if two parts are brothers, brothers mean that ervery bit in p1 is
126
different from p2 and vice versa. needed in CT_ntree
129
int brothers(PART *p1, PART *p2)
133
for(i=0; i<longs; i++) {
134
if(p1->p[i] & p2->p[i]) return 0;
142
void part_invert(PART *p)
146
for(i=0; i<longs; i++)
148
p->p[longs-1] &= cutmask;
155
void part_or(PART *s, PART *d)
159
for(i=0; i<longs; i++) {
165
/** compare two parts p1 and p2
166
result: 1, if p1 equal p2
168
int part_cmp(PART *p1, PART *p2)
172
for(i=0; i<longs; i++) {
173
if(p1->p[i] != p2->p[i]) return 0;
180
/** calculate a hashkey from p, needed in hash */
181
int part_key(PART *p)
186
for(i=0; i<longs; i++) {
196
/** set the len of this edge (this part) */
197
void part_setlen(PART *p, GBT_LEN len)
203
/** set the percentile appearence of this part in "entrytrees" */
204
void part_setperc(PART *p, int perc)
210
/** add perc on percent from p */
211
void part_addperc(PART *p, int perc)
217
/** copy part s in part d
220
void part_copy(PART *s, PART *d)
224
for(i=0; i<longs; i++)
227
d->percent = s->percent;
231
/** standartize the partitions, two parts are equal if one is just the inverted
232
version of the other so the standart is defined that the version is the
233
representant, whos first bit is equal 1 */
234
void part_standart(PART *p)
238
if(p->p[0] & 1) return;
240
for(i=0; i<longs; i++)
242
p->p[longs-1] &= cutmask;
246
/** calculate the first bit set in p, this is only useful if only one bit is set,
247
this is used toidentify leafs in a ntree
248
attention: p must be != NULL !!! */
249
int calc_index(PART *p)
254
for(i=0; i<longs; i++) {
57
size_t possible = longs*BITS_PER_PELEM;
58
arb_assert((possible-bits)<BITS_PER_PELEM); // longs is too big (wasted space)
60
#if defined(DUMP_PART_INIT)
61
printf("leafs=%i\n", len);
62
printf("cutmask='%s'\n", readable_cutmask(cutmask));
63
printf("longs=%i (can hold %zu bits)\n", longs, possible);
64
printf("bits=%i\n", bits);
69
#if defined(NTREE_DEBUG_FUNCTIONS)
71
static const CharPtrArray *namesPtr = NULL;
73
void PART::start_pretty_printing(const CharPtrArray& names) { namesPtr = &names; }
74
void PART::stop_pretty_printing() { namesPtr = NULL; }
76
void PART::print() const {
77
// ! Testfunction to print a part
79
const int longs = get_longs();
80
const int plen = info->get_bits();
83
const CharPtrArray& names = *namesPtr;
84
for (int part = 0; part<=1; ++part) {
86
for (int i=0; i<longs; i++) {
88
for (int j=0; k<plen && size_t(j)<sizeof(PELEM)*8; j++, k++) {
89
bool bitset = p[i] & el;
91
const char *name = names[k];
93
fputc(name[0], stdout); // first char of name
95
if (!first) fputc(',', stdout);
97
fputs(name, stdout); // full name
104
fputs("---", stdout);
110
for (int i=0; i<longs; i++) {
112
for (int j=0; k<plen && size_t(j)<sizeof(PELEM)*8; j++, k++) {
113
bool bitset = p[i] & el;
114
fputc('0'+bitset, stdout);
120
printf(" len=%.5f prob=%5.1f%% w.len=%.5f leaf=%i dist2center=%i\n",
121
len, weight*100.0, get_len(), is_leaf_edge(), distance_to_tree_center());
125
PART *PartitionSize::create_root() const {
126
/*! build a partition that totally consists of 111111...1111 that is needed to
127
* build the root of a specific ntree
130
PART *p = new PART(this, 1.0);
132
arb_assert(p->is_valid());
136
bool PART::overlaps_with(const PART *other) const {
137
/*! test if two parts overlap (i.e. share common bits)
140
arb_assert(is_valid());
141
arb_assert(other->is_valid());
143
int longs = get_longs();
144
for (int i=0; i<longs; i++) {
145
if (p[i] & other->p[i]) return true;
150
void PART::invert() {
153
// Each edge in a tree connects two subtrees.
154
// These subtrees are represented by inverse partitions
156
arb_assert(is_valid());
158
int longs = get_longs();
159
for (int i=0; i<longs; i++) {
162
p[longs-1] &= get_cutmask();
164
members = get_maxsize()-members;
166
arb_assert(is_valid());
169
void PART::invertInSuperset(const PART *superset) {
170
arb_assert(is_valid());
171
arb_assert(is_subset_of(superset));
173
int longs = get_longs();
174
for (int i=0; i<longs; i++) {
175
p[i] = p[i] ^ superset->p[i];
177
p[longs-1] &= get_cutmask();
179
members = superset->get_members()-members;
181
arb_assert(is_valid());
182
arb_assert(is_subset_of(superset));
186
void PART::add_members_from(const PART *source) {
187
//! destination = source or destination
188
arb_assert(source->is_valid());
189
arb_assert(is_valid());
191
bool distinct = disjunct_from(source);
193
int longs = get_longs();
194
for (int i=0; i<longs; i++) {
195
p[i] |= source->p[i];
199
members += source->members;
202
members = count_members();
205
arb_assert(is_valid());
209
bool PART::equals(const PART *other) const {
210
/*! return true if p1 and p2 are equal
212
arb_assert(is_valid());
213
arb_assert(other->is_valid());
215
int longs = get_longs();
216
for (int i=0; i<longs; i++) {
217
if (p[i] != other->p[i]) return false;
223
unsigned PART::key() const {
224
//! calculate a hashkey from part
225
arb_assert(is_valid());
228
int longs = get_longs();
229
for (int i=0; i<longs; i++) {
236
int PART::count_members() const {
237
//! count the number of leafs in partition
239
int longs = get_longs();
240
for (int i = 0; i<longs; ++i) {
243
if (i == (longs-1)) e = e&get_cutmask();
245
for (size_t b = 0; b<(sizeof(e)*8); ++b) {
253
bool PART::is_standardized() const { // @@@ inline
254
/*! true if PART is in standard representation.
258
// may be any criteria which differs between PART and its inverse
259
// if you change the criteria, generated trees will change
260
// (because branch-insertion-order is affected)
262
return bit_is_set(0);
265
void PART::standardize() {
266
/*! standardize the partition
268
* Generally two PARTs are equivalent, if one is the inverted version of the other.
269
* A standardized PART is equal for equivalent PARTs, i.e. may be used as key (as done in PartRegistry)
271
arb_assert(is_valid());
272
if (!is_standardized()) {
274
arb_assert(is_standardized());
276
arb_assert(is_valid());
279
int PART::index() const {
280
/*! calculate the first bit set in p,
282
* this is only useful if only one bit is set,
283
* this is used to identify leafs in a ntree
285
* ATTENTION: p must be != NULL
287
arb_assert(is_valid());
288
arb_assert(is_leaf_edge());
291
int longs = get_longs();
292
for (int i=0; i<longs; i++) {
256
294
pos = i * sizeof(PELEM) * 8;
258
for(; p_temp; p_temp >>= 1, pos++) {
296
for (; p_temp; p_temp >>= 1, pos++) {
305
int PART::insertionOrder_cmp(const PART *other) const {
306
// defines order in which edges will be inserted into the consensus tree
308
if (this == other) return 0;
310
int cmp = is_leaf_edge() - other->is_leaf_edge();
313
cmp = -double_cmp(weight, other->weight); // insert bigger weight first
315
int centerdist1 = distance_to_tree_center();
316
int centerdist2 = other->distance_to_tree_center();
318
cmp = centerdist1-centerdist2; // insert central edges first
321
cmp = -double_cmp(get_len(), other->get_len()); // NOW REALLY insert bigger len first
322
// (change affected test results: increased in-tree-distance,
323
// but reduced parsimony value of result-trees)
325
cmp = id - other->id; // strict by definition
334
inline int PELEM_cmp(const PELEM& p1, const PELEM& p2) {
335
return p1<p2 ? -1 : (p1>p2 ? 1 : 0);
338
int PART::topological_cmp(const PART *other) const {
339
// define a strict order on topologies defined by edges
341
if (this == other) return 0;
343
arb_assert(is_standardized());
344
arb_assert(other->is_standardized());
346
int cmp = members - other->members;
348
int longs = get_longs();
349
for (int i = 0; !cmp && i<longs; ++i) {
350
cmp = PELEM_cmp(p[i], other->p[i]);
354
arb_assert(contradicted(cmp, equals(other)));
359
// --------------------------------------------------------------------------------
363
#include <test_unit.h>
366
void TEST_PartRegistry() {
368
PartitionSize reg(0);
369
TEST_EXPECT_EQUAL(reg.get_bits(), 0);
370
TEST_EXPECT_EQUAL(reg.get_longs(), 0);
371
// cutmask doesnt matter
375
PartitionSize reg(1);
376
TEST_EXPECT_EQUAL(reg.get_bits(), 1);
377
TEST_EXPECT_EQUAL(reg.get_longs(), 1);
378
TEST_EXPECT_EQUAL(readable_cutmask(reg.get_cutmask()), "00000000000000000000000000000001");
382
PartitionSize reg(31);
383
TEST_EXPECT_EQUAL(reg.get_bits(), 31);
384
TEST_EXPECT_EQUAL(reg.get_longs(), 1);
385
TEST_EXPECT_EQUAL(readable_cutmask(reg.get_cutmask()), "01111111111111111111111111111111");
389
PartitionSize reg(32);
390
TEST_EXPECT_EQUAL(reg.get_bits(), 32);
391
TEST_EXPECT_EQUAL(reg.get_longs(), 1);
392
TEST_EXPECT_EQUAL(readable_cutmask(reg.get_cutmask()), "11111111111111111111111111111111");
396
PartitionSize reg(33);
397
TEST_EXPECT_EQUAL(reg.get_bits(), 33);
398
TEST_EXPECT_EQUAL(reg.get_longs(), 2);
399
TEST_EXPECT_EQUAL(readable_cutmask(reg.get_cutmask()), "00000000000000000000000000000001");
403
PartitionSize reg(95);
404
TEST_EXPECT_EQUAL(reg.get_bits(), 95);
405
TEST_EXPECT_EQUAL(reg.get_longs(), 3);
406
TEST_EXPECT_EQUAL(readable_cutmask(reg.get_cutmask()), "01111111111111111111111111111111");
412
// --------------------------------------------------------------------------------