1
// =============================================================== //
3
// File : di_clustertree.hxx //
4
// Purpose : Tree structure used for cluster detection //
6
// Coded by Ralf Westram (coder@reallysoft.de) in October 2009 //
7
// Institute of Microbiology (Technical University Munich) //
8
// http://www.arb-home.de/ //
10
// =============================================================== //
12
#ifndef DI_CLUSTERTREE_HXX
13
#define DI_CLUSTERTREE_HXX
16
#include <ARB_Tree.hxx>
18
#ifndef AP_SEQUENCE_HXX
19
#include <AP_sequence.hxx>
22
#ifndef _GLIBCXX_CLIMITS
25
#ifndef _GLIBCXX_STRING
33
#define cl_assert(cond) arb_assert(cond)
40
CS_UNKNOWN = 0, // initial state
41
CS_TOO_SMALL = 1, // cluster is too small
42
CS_MAYBE_CLUSTER = 2, // need to test whether this is a cluster
43
CS_NO_CLUSTER = 4, // not a cluster (only worstKnownDistance is known)
44
CS_IS_CLUSTER = 8, // subtree is cluster (all sequence distances are known)
45
CS_SUB_CLUSTER = 16, // like CS_IS_CLUSTER, but father is cluster as well
48
const float NO_DISTANCE = -1.0;
50
// ------------------------
53
class ClusterTreeRoot : public ARB_seqtree_root {
54
AP_FLOAT maxDistance; // max. allowed distance inside cluster
55
unsigned minClusterSize; // min. size of cluster (number of leafs)
58
ClusterTreeRoot(AliView *aliview, AP_sequence *seqTemplate_, AP_FLOAT maxDistance_, size_t minClusterSize_);
59
virtual ~ClusterTreeRoot() OVERRIDE {}
61
DEFINE_DOWNCAST_ACCESSORS(ClusterTree, get_root_node, ARB_seqtree_root::get_root_node());
63
GB_ERROR find_clusters();
64
unsigned get_minClusterSize() const { return minClusterSize; }
65
AP_FLOAT get_maxDistance() const { return maxDistance; }
72
ClusterTree *ct1, *ct2; // ct1<ct2!
75
TwoLeafs(ClusterTree *c1, ClusterTree *c2)
76
: ct1(c1<c2 ? c1 : c2)
77
, ct2(c1<c2 ? c2 : c1)
80
const ClusterTree *first() const { return ct1; }
81
ClusterTree *first() { return ct1; }
82
const ClusterTree *second() const { return ct2; }
83
ClusterTree *second() { return ct2; }
85
bool operator<(const TwoLeafs& other) const {
86
return ct1 == other.ct1 ? ct2<other.ct2 : ct1<other.ct1;
94
LeafRelation(const TwoLeafs& pair_, AP_FLOAT value_)
99
bool operator<(const LeafRelation& other) const {
100
if (value == other.value) return *pair < *other.pair;
101
return value < other.value;
104
const TwoLeafs& get_pair() const { return *pair; }
105
AP_FLOAT get_value() const { return value; }
108
typedef std::map<ClusterTree*, AP_FLOAT> NodeValues;
109
typedef std::map<TwoLeafs, AP_FLOAT> LeafRelations;
110
typedef LeafRelations::const_iterator LeafRelationCIter;
112
// --------------------
117
#define TRACE_DIST_CALC
120
class ClusterTree : public ARB_countedTree { // derived from a Noncopyable
123
unsigned leaf_count; // number of leafs in subtree
124
unsigned clus_count; // number of clusters at and in subtree
125
unsigned depth; // depth of node ( 1 == root )
126
AP_FLOAT min_bases; // min. bases used for comparing two members
128
NodeValues *branchDepths; // leaf-depths (distance from this each leaf)
129
LeafRelations *branchDists; // distance (branch) between two leafs
130
LeafRelations *sequenceDists; // real distance between sequences of two leafs
132
TwoLeafs *worstKnownDistance;
134
void calc_branch_depths();
135
void calc_branch_dists();
137
#if defined(TRACE_DIST_CALC)
138
unsigned calculatedDistances;
139
#endif // TRACE_DIST_CALC
142
explicit ClusterTree(ClusterTreeRoot *tree_root_)
143
: ARB_countedTree(tree_root_)
151
, sequenceDists(NULL)
152
, worstKnownDistance(NULL)
155
~ClusterTree() OVERRIDE {
156
delete worstKnownDistance;
157
delete sequenceDists;
161
DEFINE_TREE_ACCESSORS(ClusterTreeRoot, ClusterTree);
163
unsigned get_cluster_count() const { return clus_count; }
164
unsigned get_leaf_count() const OVERRIDE { return leaf_count; }
165
unsigned get_depth() const { return depth; }
167
#if defined(TRACE_DIST_CALC)
168
unsigned get_calculated_distances() const { return calculatedDistances; }
169
#endif // TRACE_DIST_CALC
171
bool knows_seqDists() const { return state & (CS_IS_CLUSTER|CS_SUB_CLUSTER); }
173
unsigned possible_relations() const { return (leaf_count*(leaf_count-1)) / 2; }
174
unsigned known_seqDists() const { return knows_seqDists() ? possible_relations() : 0; }
176
ClusterTree *get_cluster(size_t num); // this allows sequentiell access to clusters
177
ClusterState get_state() const { return state; }
179
void init_tree() OVERRIDE;
180
void detect_clusters(arb_progress& progress);
182
const NodeValues *get_branch_depths() {
183
if (!branchDepths) calc_branch_depths();
187
const LeafRelations *get_branch_dists() {
188
if (!branchDists) calc_branch_dists();
192
const LeafRelations *get_sequence_dists() const { return sequenceDists; }
194
AP_FLOAT get_seqDist(const TwoLeafs& pair);
195
const AP_FLOAT *has_seqDist(const TwoLeafs& pair) const;
196
const ClusterTree *commonFatherWith(const ClusterTree *other) const;
198
AP_FLOAT get_min_bases() const { return min_bases; }
200
void oblivion(bool forgetDistances); // forget unneeded data
203
struct ClusterTreeNodeFactory : public RootedTreeNodeFactory {
204
virtual RootedTree *makeNode(TreeRoot *root) const {
205
return new ClusterTree(DOWNCAST(ClusterTreeRoot*, root));
209
struct UseAnyTree : public ARB_tree_predicate {
210
bool selects(const ARB_seqtree&) const OVERRIDE { return true; }
214
#error di_clustertree.hxx included twice
215
#endif // DI_CLUSTERTREE_HXX