55
62
using namespace std;
59
// HMMState implementation
65
HMMState::HMMState(const String& name, bool hidden)
70
cerr << "State: " << name << ", hidden=" << hidden << endl;
74
HMMState::HMMState(const HMMState& state)
75
: hidden_(state.hidden_),
79
/* pre_states_(state.pre_states_),
80
succ_states_(state.succ_states_)
89
void HMMState::setName(const String& name)
94
const String& HMMState::getName() const
99
HMMState& HMMState::operator = (const HMMState& state)
101
hidden_ = state.hidden_;
104
succ_states_.clear();
105
//pre_states_ = state.pre_states_;
106
//succ_states_ = state.succ_states_;
110
void HMMState::addSuccessorState(HMMState* state)
113
cerr << "'" << state->getName() << "'" << endl;
115
succ_states_.insert(state);
118
void HMMState::deleteSuccessorState(HMMState* state)
121
cerr << "'" << state->getName() << "'" << endl;
123
succ_states_.erase(state);
126
void HMMState::addPredecessorState(HMMState* state)
129
cerr << "'" << state->getName() << "'" << endl;
131
pre_states_.insert(state);
134
void HMMState::deletePredecessorState(HMMState* state)
137
cerr << "'" << state->getName() << "'" << endl;
139
pre_states_.erase(state);
142
const set<HMMState*>& HMMState::getPredecessorStates() const
147
const set<HMMState*>& HMMState::getSuccessorStates() const
152
bool HMMState::isHidden() const
157
void HMMState::setHidden(bool hidden)
162
// The hidden markov model
163
HiddenMarkovModel::HiddenMarkovModel()
164
: pseudo_counts_(0.0)
168
HiddenMarkovModel::HiddenMarkovModel(const HiddenMarkovModel& hmm)
173
HiddenMarkovModel::~HiddenMarkovModel()
178
HiddenMarkovModel& HiddenMarkovModel::operator = (const HiddenMarkovModel& hmm)
188
HMMState* HiddenMarkovModel::getState(const String& name)
190
if (name_to_state_.find(name) == name_to_state_.end())
192
throw Exception::ElementNotFound(__FILE__, __LINE__, __PRETTY_FUNCTION__, name);
194
return name_to_state_[name];
197
const HMMState* HiddenMarkovModel::getState(const String& name) const
199
if (name_to_state_.find(name) == name_to_state_.end())
201
throw Exception::ElementNotFound(__FILE__, __LINE__, __PRETTY_FUNCTION__, name);
203
return name_to_state_.find(name)->second;
206
void HiddenMarkovModel::writeGraphMLFile(const String& filename)
208
set<HMMState*> states = states_;
209
Map<HMMState*, vector<HMMState*> > transitions;
211
ofstream out(filename.c_str());
66
// HMMState implementation
67
HMMState::HMMState() :
72
HMMState::HMMState(const String & name, bool hidden) :
77
cerr << "State: " << name << ", hidden=" << hidden << endl;
81
HMMState::HMMState(const HMMState & state) :
82
hidden_(state.hidden_),
87
/* pre_states_(state.pre_states_),
88
succ_states_(state.succ_states_)
97
void HMMState::setName(const String & name)
102
const String & HMMState::getName() const
107
HMMState & HMMState::operator=(const HMMState & state)
109
hidden_ = state.hidden_;
112
succ_states_.clear();
113
//pre_states_ = state.pre_states_;
114
//succ_states_ = state.succ_states_;
118
void HMMState::addSuccessorState(HMMState * state)
121
cerr << "'" << state->getName() << "'" << endl;
123
succ_states_.insert(state);
126
void HMMState::deleteSuccessorState(HMMState * state)
129
cerr << "'" << state->getName() << "'" << endl;
131
succ_states_.erase(state);
134
void HMMState::addPredecessorState(HMMState * state)
137
cerr << "'" << state->getName() << "'" << endl;
139
pre_states_.insert(state);
142
void HMMState::deletePredecessorState(HMMState * state)
145
cerr << "'" << state->getName() << "'" << endl;
147
pre_states_.erase(state);
150
const set<HMMState *> & HMMState::getPredecessorStates() const
155
const set<HMMState *> & HMMState::getSuccessorStates() const
160
bool HMMState::isHidden() const
165
void HMMState::setHidden(bool hidden)
170
// The hidden markov model
171
HiddenMarkovModel::HiddenMarkovModel() :
176
HiddenMarkovModel::HiddenMarkovModel(const HiddenMarkovModel & hmm)
181
HiddenMarkovModel::~HiddenMarkovModel()
186
HiddenMarkovModel & HiddenMarkovModel::operator=(const HiddenMarkovModel & hmm)
196
HMMState * HiddenMarkovModel::getState(const String & name)
198
if (name_to_state_.find(name) == name_to_state_.end())
200
throw Exception::ElementNotFound(__FILE__, __LINE__, __PRETTY_FUNCTION__, name);
202
return name_to_state_[name];
205
const HMMState * HiddenMarkovModel::getState(const String & name) const
207
if (name_to_state_.find(name) == name_to_state_.end())
209
throw Exception::ElementNotFound(__FILE__, __LINE__, __PRETTY_FUNCTION__, name);
211
return name_to_state_.find(name)->second;
214
void HiddenMarkovModel::writeGraphMLFile(const String & filename)
216
set<HMMState *> states = states_;
217
Map<HMMState *, vector<HMMState *> > transitions;
219
ofstream out(filename.c_str());
212
220
out << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" << endl;
214
out << "<graphml xmlns=\"http://graphml.graphdrawing.org/xmlns/graphml\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" " <<
215
"xsi:schemaLocation=\"http://graphml.graphdrawing.org/xmlns/graphml http://www.yworks.com/xml/schema/graphml/1.0/ygraphml.xsd\" " <<
216
"xmlns:y=\"http://www.yworks.com/xml/graphml\">" << endl;
222
out << "<graphml xmlns=\"http://graphml.graphdrawing.org/xmlns/graphml\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" " <<
223
"xsi:schemaLocation=\"http://graphml.graphdrawing.org/xmlns/graphml http://www.yworks.com/xml/schema/graphml/1.0/ygraphml.xsd\" " <<
224
"xmlns:y=\"http://www.yworks.com/xml/graphml\">" << endl;
218
out << "<key id=\"d0\" for=\"node\" yfiles.type=\"nodegraphics\"/>" << endl;
219
out << "<key id=\"d1\" for=\"edge\" yfiles.type=\"edgegraphics\"/>" << endl;
226
out << "<key id=\"d0\" for=\"node\" yfiles.type=\"nodegraphics\"/>" << endl;
227
out << "<key id=\"d1\" for=\"edge\" yfiles.type=\"edgegraphics\"/>" << endl;
220
228
out << " <graph id=\"G\" edgedefault=\"directed\">" << endl;
221
for (set<HMMState*>::const_iterator it = states.begin(); it != states.end(); ++it)
223
out << " <node id=\"" << (*it)->getName() << "\">" << endl;
224
out << " <data key=\"d0\">" << endl;
225
out << " <y:ShapeNode>" << endl;
226
out << " <y:NodeLabel>" << (*it)->getName() << "</y:NodeLabel>" << endl;
227
out << " </y:ShapeNode>" << endl;
228
out << " </data>" << endl;
229
out << " </node>" << endl;
231
set<HMMState*> succ = (*it)->getSuccessorStates();
232
for (set<HMMState*>::const_iterator sit = succ.begin(); sit != succ.end(); ++sit)
234
transitions[*it].push_back(*sit);
240
for (Map<HMMState*, vector<HMMState*> >::const_iterator it = transitions.begin(); it != transitions.end(); ++it)
242
for (vector<HMMState*>::const_iterator it1 = it->second.begin(); it1 != it->second.end(); ++it1)
244
out << " <edge source=\"" << it->first->getName() << "\" target=\"" << (*it1)->getName() << "\" directed=\"true\">" << endl;
245
out << " <data key=\"d1\">" << endl;
246
out << " <y:PolyLineEdge>" << endl;
247
out << " <y:EdgeLabel>" << getTransitionProbability_(it->first, *it1) << "</y:EdgeLabel>" << endl;
248
out << " </y:PolyLineEdge>" << endl;
249
out << " </data>" << endl;
250
out << " </edge>" << endl;
254
out << " </graph>" << endl;
255
out << "</graphml>" << endl;
259
void HiddenMarkovModel::write(ostream& out) const
262
for (set<HMMState*>::const_iterator it = states_.begin(); it != states_.end(); ++it)
264
out << "State " << (*it)->getName();
266
if (!(*it)->isHidden())
274
for (Map<HMMState*, Map<HMMState*, DoubleReal> >::const_iterator it1 = trans_.begin(); it1 != trans_.end(); ++it1)
276
for (Map<HMMState*, DoubleReal>::const_iterator it2 = it1->second.begin(); it2 != it1->second.end(); ++it2)
278
out << "Transition " << it1->first->getName() << " " << it2->first->getName() << " " << it2->second << endl;
284
for (Map<String, Map<String, pair<String, String> > >::const_iterator it = synonym_trans_names_.begin(); it != synonym_trans_names_.end(); ++it)
286
for (Map<String, pair<String, String> >::const_iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
288
out << "Synonym " << it->first << " " << it2->first << " " << it2->second.first << " " << it2->second.second << endl;
292
for (Map<HMMState*, Map<HMMState*, std::pair<HMMState*, HMMState*> > >::const_iterator it1 = synonym_trans_.begin(); it1 != synonym_trans_.end(); ++it1)
294
for (Map<HMMState*, std::pair<HMMState*, HMMState*> >::const_iterator it2 = it1->second.begin(); it2 != it1->second.end(); ++it2)
296
//cerr << "Synonym " << it1->first->getName() << " " << it2->first->getName() << " " << it2->second.first->getName() << " " << it2->second.second->getName() << endl;
229
for (set<HMMState *>::const_iterator it = states.begin(); it != states.end(); ++it)
231
out << " <node id=\"" << (*it)->getName() << "\">" << endl;
232
out << " <data key=\"d0\">" << endl;
233
out << " <y:ShapeNode>" << endl;
234
out << " <y:NodeLabel>" << (*it)->getName() << "</y:NodeLabel>" << endl;
235
out << " </y:ShapeNode>" << endl;
236
out << " </data>" << endl;
237
out << " </node>" << endl;
239
set<HMMState *> succ = (*it)->getSuccessorStates();
240
for (set<HMMState *>::const_iterator sit = succ.begin(); sit != succ.end(); ++sit)
242
transitions[*it].push_back(*sit);
248
for (Map<HMMState *, vector<HMMState *> >::const_iterator it = transitions.begin(); it != transitions.end(); ++it)
250
for (vector<HMMState *>::const_iterator it1 = it->second.begin(); it1 != it->second.end(); ++it1)
252
out << " <edge source=\"" << it->first->getName() << "\" target=\"" << (*it1)->getName() << "\" directed=\"true\">" << endl;
253
out << " <data key=\"d1\">" << endl;
254
out << " <y:PolyLineEdge>" << endl;
255
out << " <y:EdgeLabel>" << getTransitionProbability_(it->first, *it1) << "</y:EdgeLabel>" << endl;
256
out << " </y:PolyLineEdge>" << endl;
257
out << " </data>" << endl;
258
out << " </edge>" << endl;
262
out << " </graph>" << endl;
263
out << "</graphml>" << endl;
267
void HiddenMarkovModel::write(ostream & out) const
270
for (set<HMMState *>::const_iterator it = states_.begin(); it != states_.end(); ++it)
272
out << "State " << (*it)->getName();
274
if (!(*it)->isHidden())
282
for (Map<HMMState *, Map<HMMState *, DoubleReal> >::const_iterator it1 = trans_.begin(); it1 != trans_.end(); ++it1)
284
for (Map<HMMState *, DoubleReal>::const_iterator it2 = it1->second.begin(); it2 != it1->second.end(); ++it2)
286
out << "Transition " << it1->first->getName() << " " << it2->first->getName() << " " << it2->second << endl;
292
for (Map<String, Map<String, pair<String, String> > >::const_iterator it = synonym_trans_names_.begin(); it != synonym_trans_names_.end(); ++it)
294
for (Map<String, pair<String, String> >::const_iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
296
out << "Synonym " << it->first << " " << it2->first << " " << it2->second.first << " " << it2->second.second << endl;
300
for (Map<HMMState *, Map<HMMState *, std::pair<HMMState *, HMMState *> > >::const_iterator it1 = synonym_trans_.begin(); it1 != synonym_trans_.end(); ++it1)
302
for (Map<HMMState *, std::pair<HMMState *, HMMState *> >::const_iterator it2 = it1->second.begin(); it2 != it1->second.end(); ++it2)
304
//cerr << "Synonym " << it1->first->getName() << " " << it2->first->getName() << " " << it2->second.first->getName() << " " << it2->second.second->getName() << endl;
297
305
out << "Synonym " << it1->first->getName() << " " << it2->first->getName() << " " << it2->second.first->getName() << " " << it2->second.second->getName() << endl;
304
void HiddenMarkovModel::clear()
306
for (set<HMMState*>::const_iterator it = states_.begin(); it != states_.end(); ++it)
312
void HiddenMarkovModel::clear()
314
for (set<HMMState *>::const_iterator it = states_.begin(); it != states_.end(); ++it)
311
319
count_trans_.clear();
312
train_count_trans_all_.clear();
320
train_count_trans_all_.clear();
313
321
forward_.clear();
314
322
backward_.clear();
315
323
name_to_state_.clear();
390
398
make_pair(name_to_state_[it2->second.first], name_to_state_[it2->second.second]);
396
void HiddenMarkovModel::setTransitionProbability_(HMMState * s1, HMMState * s2, DoubleReal trans_prob)
398
trans_[s1][s2] = trans_prob;
399
s1->addSuccessorState(s2);
400
s2->addPredecessorState(s1);
401
enabled_trans_[s1].insert(s2);
402
training_steps_count_[s1][s2] = 0;
405
void HiddenMarkovModel::setTransitionProbability(const String& s1, const String& s2, DoubleReal trans_prob)
404
void HiddenMarkovModel::setTransitionProbability_(HMMState * s1, HMMState * s2, DoubleReal trans_prob)
406
trans_[s1][s2] = trans_prob;
407
s1->addSuccessorState(s2);
408
s2->addPredecessorState(s1);
409
enabled_trans_[s1].insert(s2);
410
training_steps_count_[s1][s2] = 0;
413
void HiddenMarkovModel::setTransitionProbability(const String & s1, const String & s2, DoubleReal trans_prob)
407
415
OPENMS_PRECONDITION(
408
name_to_state_.find(s1) != name_to_state_.end() && name_to_state_.find(s2) != name_to_state_.end(),
409
String("HiddenMarkovModel::setTransitionProbability(" + String(s1) + ", " + String(s2) + ", " + String(trans_prob) + "), no suchstate!").c_str());
416
name_to_state_.find(s1) != name_to_state_.end() && name_to_state_.find(s2) != name_to_state_.end(),
417
String("HiddenMarkovModel::setTransitionProbability(" + String(s1) + ", " + String(s2) + ", " + String(trans_prob) + "), no suchstate!").c_str());
410
418
#ifdef SIMPLE_DEBUG2
411
419
cerr << "setTransitionProbability: '" << s1 << "' -> '" << s2 << "'" << " " << trans_prob << endl;
414
trans_[name_to_state_[s1]][name_to_state_[s2]] = trans_prob;
415
name_to_state_[s1]->addSuccessorState(name_to_state_[s2]);
416
name_to_state_[s2]->addPredecessorState(name_to_state_[s1]);
417
enabled_trans_[name_to_state_[s1]].insert(name_to_state_[s2]);
418
training_steps_count_[name_to_state_[s1]][name_to_state_[s2]] = 0;
421
DoubleReal HiddenMarkovModel::getTransitionProbability(const String& s1, const String& s2) const
423
if (name_to_state_.find(s1) == name_to_state_.end())
425
throw Exception::ElementNotFound(__FILE__, __LINE__, __PRETTY_FUNCTION__, s1);
427
HMMState* state1 = name_to_state_[s1];
428
if (name_to_state_.find(s2) == name_to_state_.end())
430
throw Exception::ElementNotFound(__FILE__, __LINE__, __PRETTY_FUNCTION__, s2);
432
HMMState* state2 = name_to_state_[s2];
433
return getTransitionProbability_(state1, state2);
436
DoubleReal HiddenMarkovModel::getTransitionProbability_(HMMState* s1, HMMState* s2) const
438
HMMState* state1 = s1;
439
HMMState* state2 = s2;
442
cerr << "getTransitionProbability(" << s1->getName() << ", " << s2->getName() << ")" << endl;
445
// check if transition is a synonym transition
446
if (synonym_trans_.find(state1) != synonym_trans_.end() &&
447
synonym_trans_.find(state1)->second.find(state2) != synonym_trans_.find(state1)->second.end())
449
HMMState* tmp = synonym_trans_.find(state1)->second.find(state2)->second.first;
450
state2 = synonym_trans_.find(state1)->second.find(state2)->second.second; //name_to_state_[p.second];
455
cerr << "getTransitionProbability: " << state1->getName() << " " << state2->getName() << endl;
458
// get the transition prob
459
if (trans_.find(state1) != trans_.end() && trans_.find(state1)->second.find(state2) != trans_.find(state1)->second.end())
462
// TODO !!!! ???? path length correction
463
if (state2->getName().hasSubstring("next"))
465
return sqrt(trans_[state1][state2]);
469
return trans_.find(state1)->second.find(state2)->second;
480
void HiddenMarkovModel::train()
482
#ifdef HIDDEN_MARKOV_MODEL_DEBUG
483
cerr << "HiddenMarkovModel::train()" << endl;
485
trained_trans_.clear();
487
#ifdef HIDDEN_MARKOV_MODEL_DEBUG
488
cerr << "calc forward part" << endl;
491
calculateForwardPart_();
493
#ifdef HIDDEN_MARKOV_MODEL_DEBUG
494
cerr << "calc backward part" << endl;
497
calculateBackwardPart_();
500
#ifdef HIDDEN_MARKOV_MODEL_DEBUG
501
cerr << "calc px" << endl;
504
// calc p_x from forward part
507
for (Map<HMMState*, DoubleReal>::const_iterator it1 = train_emission_prob_.begin(); it1 != train_emission_prob_.end(); ++it1)
509
for (set<HMMState*>::const_iterator it2 = it1->first->getPredecessorStates().begin(); it2 != it1->first->getPredecessorStates().end(); ++it2)
511
px += getForwardVariable_(*it2);
515
DoubleReal num_px(0);
521
#ifdef HIDDEN_MARKOV_MODEL_DEBUG
522
cerr << "px=" << num_px << endl;
523
cerr << "add contributions to count_trans" << endl;
526
// add contributions to count_trans_
527
for (set<pair<HMMState*, HMMState*> >::const_iterator it = trained_trans_.begin(); it != trained_trans_.end(); ++it)
530
tmp = num_px * getForwardVariable_(it->first) * getBackwardVariable_(it->second) * getTransitionProbability_(it->first, it->second);
531
tmp += pseudo_counts_;
532
HMMState* s1 = it->first;
533
HMMState* s2 = it->second;
535
if (synonym_trans_.find(s1) != synonym_trans_.end() && synonym_trans_[s1].find(s2) != synonym_trans_[s1].end())
537
HMMState* tmp = synonym_trans_[s1][s2].first;
538
s2 = synonym_trans_[s1][s2].second;
542
train_count_trans_all_[s1][s2].push_back(tmp);
543
if (count_trans_.find(s1) != count_trans_.end() && count_trans_[s1].find(s2) != count_trans_[s1].end())
545
count_trans_[s1][s2] += tmp;
549
count_trans_[s1][s2] = tmp;
551
training_steps_count_[s1][s2]++;
555
void HiddenMarkovModel::calculateForwardPart_()
559
for (Map<HMMState*, DoubleReal>::iterator it = init_prob_.begin(); it != init_prob_.end(); ++it)
561
//cerr << it->first << " " << it->second << endl;
562
//cerr << it->first->getName() << endl;
563
forward_[it->first] = it->second;
566
for (Map<HMMState*, DoubleReal>::iterator it = init_prob_.begin(); it != init_prob_.end(); ++it)
568
succ.insert(it->first->getSuccessorStates().begin(), it->first->getSuccessorStates().end());
570
while ( !succ.empty() )
572
set<HMMState*> succ_new;
573
for (set<HMMState*>::const_iterator it = succ.begin(); it != succ.end(); ++it)
575
set<HMMState*> pre = (*it)->getPredecessorStates();
577
for (set<HMMState*>::const_iterator it2 = pre.begin(); it2 != pre.end(); ++it2)
579
#ifdef HIDDEN_MARKOV_MODEL_DEBUG
580
cerr << "\tadding from " << (*it2)->getName() << " " << getForwardVariable_(*it2) << " * " << getTransitionProbability(*it2, *it) << endl;
582
sum += getForwardVariable_(*it2) * getTransitionProbability_(*it2, *it);
583
trained_trans_.insert(make_pair(*it2, *it));
586
succ_new.insert((*it)->getSuccessorStates().begin(), (*it)->getSuccessorStates().end());
587
#ifdef HIDDEN_MARKOV_MODEL_DEBUG
588
cerr << "f: " << (*it)->getName() << "\t" << sum << endl;
596
void HiddenMarkovModel::calculateBackwardPart_()
600
for (Map<HMMState*, DoubleReal>::iterator it = train_emission_prob_.begin(); it != train_emission_prob_.end(); ++it)
602
backward_[it->first] = it->second;
605
for (Map<HMMState*, DoubleReal>::iterator it = train_emission_prob_.begin(); it != train_emission_prob_.end(); ++it)
607
#ifdef HIDDEN_MARKOV_MODEL_DEBUG
608
cerr << "b:" << it->first << " " << it->first->getName() << " " << it->second << endl;
610
pre.insert(it->first->getPredecessorStates().begin(), it->first->getPredecessorStates().end());
612
while ( !pre.empty())
614
set <HMMState*> pre_new;
615
for (set<HMMState*>::const_iterator it = pre.begin(); it != pre.end(); ++it)
617
set<HMMState*> succ = (*it)->getSuccessorStates();
619
for (set<HMMState*>::const_iterator it2 = succ.begin(); it2 != succ.end(); ++it2)
621
#ifdef HIDDEN_MARKOV_MODEL_DEBUG
622
cerr << "\tadding from " << (*it2)->getName() << " " << getBackwardVariable_(*it2) << " * " << getTransitionProbability(*it, *it2) << endl;
624
sum += getBackwardVariable_(*it2) * getTransitionProbability_(*it, *it2);
625
trained_trans_.insert(make_pair(*it, *it2));
627
backward_[*it] = sum;
628
pre_new.insert((*it)->getPredecessorStates().begin(), (*it)->getPredecessorStates().end());
629
#ifdef HIDDEN_MARKOV_MODEL_DEBUG
630
cerr << "b: " << (*it)->getName() << "\t" << sum << endl;
638
DoubleReal HiddenMarkovModel::getForwardVariable_(HMMState* state)
640
return forward_.find(state) != forward_.end() ? forward_[state] : 0;
643
DoubleReal HiddenMarkovModel::getBackwardVariable_(HMMState* state)
645
return backward_.find(state) != backward_.end() ? backward_[state] : 0;
648
void HiddenMarkovModel::evaluate()
650
for (Map<HMMState*, Map<HMMState*, DoubleReal> >::const_iterator it1 = count_trans_.begin(); it1 != count_trans_.end(); ++it1)
652
#ifdef EVALUATE_DEBUG
653
cerr << it1->first->getName() << endl;
656
for (Map<HMMState*, DoubleReal>::const_iterator it2 = it1->second.begin(); it2 != it1->second.end(); ++it2)
658
if (count_trans_.find(it1->first) != count_trans_.end() &&
659
count_trans_[it1->first].find(it2->first) != count_trans_[it1->first].end())
661
sum += count_trans_[it1->first][it2->first];
662
#ifdef EVALUATE_DEBUG
663
cerr << it1->first->getName() << " " << it2->first->getName() << " ";
665
//<< count_trans_[it1->first][it2->first] << endl;
666
for (vector<DoubleReal>::const_iterator it = train_count_trans_all_[it1->first][it2->first].begin(); it != train_count_trans_all_[it1->first][it2->first].end(); ++it)
670
vector<DoubleReal> data = train_count_trans_all_[it1->first][it2->first];
671
std::sort(data.begin(),data.end());
672
DoubleReal mean = gsl_stats_mean(&data.front(),1,data.size());
673
DoubleReal variance = gsl_stats_variance_m(&data.front(),1,data.size(),mean);
674
cerr << "mean=" << mean << ", variance=" << variance << endl;
681
for (Map<HMMState*, DoubleReal>::const_iterator it2 = it1->second.begin(); it2 != it1->second.end(); ++it2)
683
if (count_trans_.find(it1->first) != count_trans_.end() &&
684
count_trans_[it1->first].find(it2->first) != count_trans_[it1->first].end())
686
trans_[it1->first][it2->first] = count_trans_[it1->first][it2->first] / sum;
693
void HiddenMarkovModel::setInitialTransitionProbability(const String& state, DoubleReal prob)
695
OPENMS_PRECONDITION(name_to_state_.find(state) != name_to_state_.end(), String("HiddenMarkovModel::setInitialTransitionProbability(" + state + ", " + String(prob) + "), no suchstate!").c_str());
696
//cerr << state << " " << prob << endl;
697
init_prob_[name_to_state_[state]] = prob;
700
void HiddenMarkovModel::clearInitialTransitionProbabilities()
705
void HiddenMarkovModel::setTrainingEmissionProbability(const String& state, DoubleReal prob)
708
cerr << "setTrainingEmissionProbability(" << state << "(" << name_to_state_[state] << "), " << prob << ")" << endl;
711
OPENMS_PRECONDITION(name_to_state_.find(state) != name_to_state_.end(), String("HiddenMarkovModel::setTrainingEmissionProbability(" + state + ", " + String(prob) + "), no such state!").c_str());
712
train_emission_prob_[name_to_state_[state]] = prob;
715
void HiddenMarkovModel::clearTrainingEmissionProbabilities()
717
train_emission_prob_.clear();
720
void HiddenMarkovModel::enableTransition(const String& s1, const String& s2)
723
cerr << "enableTransition: '" << s1 << "' -> '" << s2 << "'" << endl;
725
enableTransition_(name_to_state_[s1], name_to_state_[s2]);
728
void HiddenMarkovModel::enableTransition_(HMMState* s1, HMMState* s2)
730
s1->addSuccessorState(s2);
731
s2->addPredecessorState(s1);
732
enabled_trans_[s1].insert(s2);
735
void HiddenMarkovModel::disableTransition(const String& s1, const String& s2)
737
disableTransition_(name_to_state_[s1], name_to_state_[s2]);
740
void HiddenMarkovModel::disableTransition_(HMMState* s1, HMMState* s2)
742
s1->deleteSuccessorState(s2);
743
s2->deletePredecessorState(s1);
744
enabled_trans_[s1].erase(s2);
747
void HiddenMarkovModel::disableTransitions()
749
for (Map<HMMState*, set<HMMState*> >::const_iterator it = enabled_trans_.begin(); it != enabled_trans_.end(); ++it)
751
for (set<HMMState*>::const_iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
753
it->first->deleteSuccessorState(*it2);
754
(*it2)->deletePredecessorState(it->first);
757
enabled_trans_.clear();
760
void HiddenMarkovModel::calculateEmissionProbabilities(Map<HMMState*, DoubleReal>& emission_probs)
762
Map<HMMState*, DoubleReal> states = init_prob_;
764
while ( !states.empty() )
766
Map<HMMState*, DoubleReal> tmp = states;
767
for (Map<HMMState*, DoubleReal>::const_iterator it = tmp.begin(); it != tmp.end(); ++it)
769
for (set<HMMState*>::const_iterator it2 = it->first->getSuccessorStates().begin(); it2 != it->first->getSuccessorStates().end(); ++it2)
771
//cerr << "->" << (*it2)->getName() << "=(from " << it->first->getName() << "): "; // << states[it->first] << " * " << getTransitionProbability_(it->first, *it2) << " ";
772
if (states.find(*it2) != states.end())
774
//cerr << " += " << states[it->first] * getTransitionProbability_(it->first, *it2);
775
states[*it2] += states[it->first] * getTransitionProbability_(it->first, *it2);
779
//cerr << " = " << states[it->first] * getTransitionProbability_(it->first, *it2);
780
states[*it2] = states[it->first] * getTransitionProbability_(it->first, *it2);
782
if (!(*it2)->isHidden())
784
if (emission_probs.find(*it2) != emission_probs.end())
786
//cerr << " emission: += " << states[it->first] * getTransitionProbability_(it->first, *it2);
787
emission_probs[*it2] += states[it->first] * getTransitionProbability_(it->first, *it2);
791
//cerr << " emission: += " << states[it->first] * getTransitionProbability_(it->first, *it2);
792
emission_probs[*it2] = states[it->first] * getTransitionProbability_(it->first, *it2);
797
states.erase(it->first);
804
void HiddenMarkovModel::dump()
806
cerr << "dump of transitions: " << endl;
807
for (Map<HMMState*, Map<HMMState*, DoubleReal> >::const_iterator it = trans_.begin(); it != trans_.end(); ++it)
809
for (Map<HMMState*, DoubleReal>::const_iterator it1 = it->second.begin(); it1 != it->second.end(); ++it1)
811
cout << it->first->getName() << " -> " << it1->first->getName() << " " << it1->second << " " << training_steps_count_[it->first][it1->first] << ": ";
812
vector<DoubleReal> all_trans = train_count_trans_all_[it->first][it1->first];
814
if ( !all_trans.empty() )
816
DoubleReal sum = accumulate(all_trans.begin(), all_trans.end(), 0.0);
817
DoubleReal avg(sum/DoubleReal(all_trans.size()));
819
for (Size i = 0; i != all_trans.size(); ++i)
821
cout << all_trans[i] << " ";
822
rsd += abs(all_trans[i] - avg);
824
cout << "rsd=" << rsd / DoubleReal(all_trans.size()) / avg;
825
cout << ", avg=" << avg;
832
cerr << "dump completed" << endl;
835
void HiddenMarkovModel::forwardDump()
838
for (Map<HMMState*, DoubleReal>::iterator it = init_prob_.begin(); it != init_prob_.end(); ++it)
840
succ.insert(it->first->getSuccessorStates().begin(), it->first->getSuccessorStates().end());
842
while ( !succ.empty() )
844
set<HMMState*> succ_new;
845
for (set<HMMState*>::const_iterator it = succ.begin(); it != succ.end(); ++it)
847
cerr << (*it)->getName() << endl;
848
succ_new.insert((*it)->getSuccessorStates().begin(), (*it)->getSuccessorStates().end());
855
void HiddenMarkovModel::estimateUntrainedTransitions()
857
#ifdef HIDDEN_MARKOV_MODEL_DEBUG
858
cerr << "Transition training stats" << endl;
859
for (Map<HMMState*, Map<HMMState*, Size> >::const_iterator it = training_steps_count_.begin(); it != training_steps_count_.end(); ++it)
861
for (Map<HMMState*, Size>::const_iterator it1 = it->second.begin(); it1 != it->second.end(); ++it1)
863
cerr << it->first->getName() << " " << it1->first->getName() << " " << it1->second << endl;
867
cerr << "Estimation" << endl;
871
set<const Residue*> residues(ResidueDB::getInstance()->getResidues("Natural20"));
422
trans_[name_to_state_[s1]][name_to_state_[s2]] = trans_prob;
423
name_to_state_[s1]->addSuccessorState(name_to_state_[s2]);
424
name_to_state_[s2]->addPredecessorState(name_to_state_[s1]);
425
enabled_trans_[name_to_state_[s1]].insert(name_to_state_[s2]);
426
training_steps_count_[name_to_state_[s1]][name_to_state_[s2]] = 0;
429
DoubleReal HiddenMarkovModel::getTransitionProbability(const String & s1, const String & s2) const
431
if (name_to_state_.find(s1) == name_to_state_.end())
433
throw Exception::ElementNotFound(__FILE__, __LINE__, __PRETTY_FUNCTION__, s1);
435
HMMState * state1 = name_to_state_[s1];
436
if (name_to_state_.find(s2) == name_to_state_.end())
438
throw Exception::ElementNotFound(__FILE__, __LINE__, __PRETTY_FUNCTION__, s2);
440
HMMState * state2 = name_to_state_[s2];
441
return getTransitionProbability_(state1, state2);
444
DoubleReal HiddenMarkovModel::getTransitionProbability_(HMMState * s1, HMMState * s2) const
446
HMMState * state1 = s1;
447
HMMState * state2 = s2;
450
cerr << "getTransitionProbability(" << s1->getName() << ", " << s2->getName() << ")" << endl;
453
// check if transition is a synonym transition
454
if (synonym_trans_.find(state1) != synonym_trans_.end() &&
455
synonym_trans_.find(state1)->second.find(state2) != synonym_trans_.find(state1)->second.end())
457
HMMState * tmp = synonym_trans_.find(state1)->second.find(state2)->second.first;
458
state2 = synonym_trans_.find(state1)->second.find(state2)->second.second; //name_to_state_[p.second];
463
cerr << "getTransitionProbability: " << state1->getName() << " " << state2->getName() << endl;
466
// get the transition prob
467
if (trans_.find(state1) != trans_.end() && trans_.find(state1)->second.find(state2) != trans_.find(state1)->second.end())
470
// TODO !!!! ???? path length correction
471
if (state2->getName().hasSubstring("next"))
473
return sqrt(trans_[state1][state2]);
477
return trans_.find(state1)->second.find(state2)->second;
488
void HiddenMarkovModel::train()
490
#ifdef HIDDEN_MARKOV_MODEL_DEBUG
491
cerr << "HiddenMarkovModel::train()" << endl;
493
trained_trans_.clear();
495
#ifdef HIDDEN_MARKOV_MODEL_DEBUG
496
cerr << "calc forward part" << endl;
499
calculateForwardPart_();
501
#ifdef HIDDEN_MARKOV_MODEL_DEBUG
502
cerr << "calc backward part" << endl;
505
calculateBackwardPart_();
508
#ifdef HIDDEN_MARKOV_MODEL_DEBUG
509
cerr << "calc px" << endl;
512
// calc p_x from forward part
515
for (Map<HMMState *, DoubleReal>::const_iterator it1 = train_emission_prob_.begin(); it1 != train_emission_prob_.end(); ++it1)
517
for (set<HMMState *>::const_iterator it2 = it1->first->getPredecessorStates().begin(); it2 != it1->first->getPredecessorStates().end(); ++it2)
519
px += getForwardVariable_(*it2);
523
DoubleReal num_px(0);
529
#ifdef HIDDEN_MARKOV_MODEL_DEBUG
530
cerr << "px=" << num_px << endl;
531
cerr << "add contributions to count_trans" << endl;
534
// add contributions to count_trans_
535
for (set<pair<HMMState *, HMMState *> >::const_iterator it = trained_trans_.begin(); it != trained_trans_.end(); ++it)
538
tmp = num_px * getForwardVariable_(it->first) * getBackwardVariable_(it->second) * getTransitionProbability_(it->first, it->second);
539
tmp += pseudo_counts_;
540
HMMState * s1 = it->first;
541
HMMState * s2 = it->second;
543
if (synonym_trans_.find(s1) != synonym_trans_.end() && synonym_trans_[s1].find(s2) != synonym_trans_[s1].end())
545
HMMState * tmp = synonym_trans_[s1][s2].first;
546
s2 = synonym_trans_[s1][s2].second;
550
train_count_trans_all_[s1][s2].push_back(tmp);
551
if (count_trans_.find(s1) != count_trans_.end() && count_trans_[s1].find(s2) != count_trans_[s1].end())
553
count_trans_[s1][s2] += tmp;
557
count_trans_[s1][s2] = tmp;
559
training_steps_count_[s1][s2]++;
563
void HiddenMarkovModel::calculateForwardPart_()
566
set<HMMState *> succ;
567
for (Map<HMMState *, DoubleReal>::iterator it = init_prob_.begin(); it != init_prob_.end(); ++it)
569
//cerr << it->first << " " << it->second << endl;
570
//cerr << it->first->getName() << endl;
571
forward_[it->first] = it->second;
574
for (Map<HMMState *, DoubleReal>::iterator it = init_prob_.begin(); it != init_prob_.end(); ++it)
576
succ.insert(it->first->getSuccessorStates().begin(), it->first->getSuccessorStates().end());
578
while (!succ.empty())
580
set<HMMState *> succ_new;
581
for (set<HMMState *>::const_iterator it = succ.begin(); it != succ.end(); ++it)
583
set<HMMState *> pre = (*it)->getPredecessorStates();
585
for (set<HMMState *>::const_iterator it2 = pre.begin(); it2 != pre.end(); ++it2)
587
#ifdef HIDDEN_MARKOV_MODEL_DEBUG
588
cerr << "\tadding from " << (*it2)->getName() << " " << getForwardVariable_(*it2) << " * " << getTransitionProbability(*it2, *it) << endl;
590
sum += getForwardVariable_(*it2) * getTransitionProbability_(*it2, *it);
591
trained_trans_.insert(make_pair(*it2, *it));
594
succ_new.insert((*it)->getSuccessorStates().begin(), (*it)->getSuccessorStates().end());
595
#ifdef HIDDEN_MARKOV_MODEL_DEBUG
596
cerr << "f: " << (*it)->getName() << "\t" << sum << endl;
604
void HiddenMarkovModel::calculateBackwardPart_()
608
for (Map<HMMState *, DoubleReal>::iterator it = train_emission_prob_.begin(); it != train_emission_prob_.end(); ++it)
610
backward_[it->first] = it->second;
613
for (Map<HMMState *, DoubleReal>::iterator it = train_emission_prob_.begin(); it != train_emission_prob_.end(); ++it)
615
#ifdef HIDDEN_MARKOV_MODEL_DEBUG
616
cerr << "b:" << it->first << " " << it->first->getName() << " " << it->second << endl;
618
pre.insert(it->first->getPredecessorStates().begin(), it->first->getPredecessorStates().end());
622
set<HMMState *> pre_new;
623
for (set<HMMState *>::const_iterator it = pre.begin(); it != pre.end(); ++it)
625
set<HMMState *> succ = (*it)->getSuccessorStates();
627
for (set<HMMState *>::const_iterator it2 = succ.begin(); it2 != succ.end(); ++it2)
629
#ifdef HIDDEN_MARKOV_MODEL_DEBUG
630
cerr << "\tadding from " << (*it2)->getName() << " " << getBackwardVariable_(*it2) << " * " << getTransitionProbability(*it, *it2) << endl;
632
sum += getBackwardVariable_(*it2) * getTransitionProbability_(*it, *it2);
633
trained_trans_.insert(make_pair(*it, *it2));
635
backward_[*it] = sum;
636
pre_new.insert((*it)->getPredecessorStates().begin(), (*it)->getPredecessorStates().end());
637
#ifdef HIDDEN_MARKOV_MODEL_DEBUG
638
cerr << "b: " << (*it)->getName() << "\t" << sum << endl;
646
DoubleReal HiddenMarkovModel::getForwardVariable_(HMMState * state)
648
return forward_.find(state) != forward_.end() ? forward_[state] : 0;
651
DoubleReal HiddenMarkovModel::getBackwardVariable_(HMMState * state)
653
return backward_.find(state) != backward_.end() ? backward_[state] : 0;
656
void HiddenMarkovModel::evaluate()
658
for (Map<HMMState *, Map<HMMState *, DoubleReal> >::const_iterator it1 = count_trans_.begin(); it1 != count_trans_.end(); ++it1)
660
#ifdef EVALUATE_DEBUG
661
cerr << it1->first->getName() << endl;
664
for (Map<HMMState *, DoubleReal>::const_iterator it2 = it1->second.begin(); it2 != it1->second.end(); ++it2)
666
if (count_trans_.find(it1->first) != count_trans_.end() &&
667
count_trans_[it1->first].find(it2->first) != count_trans_[it1->first].end())
669
sum += count_trans_[it1->first][it2->first];
670
#ifdef EVALUATE_DEBUG
671
cerr << it1->first->getName() << " " << it2->first->getName() << " ";
673
//<< count_trans_[it1->first][it2->first] << endl;
674
for (vector<DoubleReal>::const_iterator it = train_count_trans_all_[it1->first][it2->first].begin(); it != train_count_trans_all_[it1->first][it2->first].end(); ++it)
678
vector<DoubleReal> data = train_count_trans_all_[it1->first][it2->first];
679
std::sort(data.begin(), data.end());
680
DoubleReal mean = gsl_stats_mean(&data.front(), 1, data.size());
681
DoubleReal variance = gsl_stats_variance_m(&data.front(), 1, data.size(), mean);
682
cerr << "mean=" << mean << ", variance=" << variance << endl;
689
for (Map<HMMState *, DoubleReal>::const_iterator it2 = it1->second.begin(); it2 != it1->second.end(); ++it2)
691
if (count_trans_.find(it1->first) != count_trans_.end() &&
692
count_trans_[it1->first].find(it2->first) != count_trans_[it1->first].end())
694
trans_[it1->first][it2->first] = count_trans_[it1->first][it2->first] / sum;
701
void HiddenMarkovModel::setInitialTransitionProbability(const String & state, DoubleReal prob)
703
OPENMS_PRECONDITION(name_to_state_.find(state) != name_to_state_.end(), String("HiddenMarkovModel::setInitialTransitionProbability(" + state + ", " + String(prob) + "), no suchstate!").c_str());
704
//cerr << state << " " << prob << endl;
705
init_prob_[name_to_state_[state]] = prob;
708
void HiddenMarkovModel::clearInitialTransitionProbabilities()
713
void HiddenMarkovModel::setTrainingEmissionProbability(const String & state, DoubleReal prob)
716
cerr << "setTrainingEmissionProbability(" << state << "(" << name_to_state_[state] << "), " << prob << ")" << endl;
719
OPENMS_PRECONDITION(name_to_state_.find(state) != name_to_state_.end(), String("HiddenMarkovModel::setTrainingEmissionProbability(" + state + ", " + String(prob) + "), no such state!").c_str());
720
train_emission_prob_[name_to_state_[state]] = prob;
723
void HiddenMarkovModel::clearTrainingEmissionProbabilities()
725
train_emission_prob_.clear();
728
void HiddenMarkovModel::enableTransition(const String & s1, const String & s2)
731
cerr << "enableTransition: '" << s1 << "' -> '" << s2 << "'" << endl;
733
enableTransition_(name_to_state_[s1], name_to_state_[s2]);
736
void HiddenMarkovModel::enableTransition_(HMMState * s1, HMMState * s2)
738
s1->addSuccessorState(s2);
739
s2->addPredecessorState(s1);
740
enabled_trans_[s1].insert(s2);
743
void HiddenMarkovModel::disableTransition(const String & s1, const String & s2)
745
disableTransition_(name_to_state_[s1], name_to_state_[s2]);
748
void HiddenMarkovModel::disableTransition_(HMMState * s1, HMMState * s2)
750
s1->deleteSuccessorState(s2);
751
s2->deletePredecessorState(s1);
752
enabled_trans_[s1].erase(s2);
755
void HiddenMarkovModel::disableTransitions()
757
for (Map<HMMState *, set<HMMState *> >::const_iterator it = enabled_trans_.begin(); it != enabled_trans_.end(); ++it)
759
for (set<HMMState *>::const_iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
761
it->first->deleteSuccessorState(*it2);
762
(*it2)->deletePredecessorState(it->first);
765
enabled_trans_.clear();
768
void HiddenMarkovModel::calculateEmissionProbabilities(Map<HMMState *, DoubleReal> & emission_probs)
770
Map<HMMState *, DoubleReal> states = init_prob_;
772
while (!states.empty())
774
Map<HMMState *, DoubleReal> tmp = states;
775
for (Map<HMMState *, DoubleReal>::const_iterator it = tmp.begin(); it != tmp.end(); ++it)
777
for (set<HMMState *>::const_iterator it2 = it->first->getSuccessorStates().begin(); it2 != it->first->getSuccessorStates().end(); ++it2)
779
//cerr << "->" << (*it2)->getName() << "=(from " << it->first->getName() << "): "; // << states[it->first] << " * " << getTransitionProbability_(it->first, *it2) << " ";
780
if (states.find(*it2) != states.end())
782
//cerr << " += " << states[it->first] * getTransitionProbability_(it->first, *it2);
783
states[*it2] += states[it->first] * getTransitionProbability_(it->first, *it2);
787
//cerr << " = " << states[it->first] * getTransitionProbability_(it->first, *it2);
788
states[*it2] = states[it->first] * getTransitionProbability_(it->first, *it2);
790
if (!(*it2)->isHidden())
792
if (emission_probs.find(*it2) != emission_probs.end())
794
//cerr << " emission: += " << states[it->first] * getTransitionProbability_(it->first, *it2);
795
emission_probs[*it2] += states[it->first] * getTransitionProbability_(it->first, *it2);
799
//cerr << " emission: += " << states[it->first] * getTransitionProbability_(it->first, *it2);
800
emission_probs[*it2] = states[it->first] * getTransitionProbability_(it->first, *it2);
805
states.erase(it->first);
812
void HiddenMarkovModel::dump()
814
cerr << "dump of transitions: " << endl;
815
for (Map<HMMState *, Map<HMMState *, DoubleReal> >::const_iterator it = trans_.begin(); it != trans_.end(); ++it)
817
for (Map<HMMState *, DoubleReal>::const_iterator it1 = it->second.begin(); it1 != it->second.end(); ++it1)
819
cout << it->first->getName() << " -> " << it1->first->getName() << " " << it1->second << " " << training_steps_count_[it->first][it1->first] << ": ";
820
vector<DoubleReal> all_trans = train_count_trans_all_[it->first][it1->first];
822
if (!all_trans.empty())
824
DoubleReal sum = accumulate(all_trans.begin(), all_trans.end(), 0.0);
825
DoubleReal avg(sum / DoubleReal(all_trans.size()));
827
for (Size i = 0; i != all_trans.size(); ++i)
829
cout << all_trans[i] << " ";
830
rsd += abs(all_trans[i] - avg);
832
cout << "rsd=" << rsd / DoubleReal(all_trans.size()) / avg;
833
cout << ", avg=" << avg;
840
cerr << "dump completed" << endl;
843
void HiddenMarkovModel::forwardDump()
845
set<HMMState *> succ;
846
for (Map<HMMState *, DoubleReal>::iterator it = init_prob_.begin(); it != init_prob_.end(); ++it)
848
succ.insert(it->first->getSuccessorStates().begin(), it->first->getSuccessorStates().end());
850
while (!succ.empty())
852
set<HMMState *> succ_new;
853
for (set<HMMState *>::const_iterator it = succ.begin(); it != succ.end(); ++it)
855
cerr << (*it)->getName() << endl;
856
succ_new.insert((*it)->getSuccessorStates().begin(), (*it)->getSuccessorStates().end());
863
void HiddenMarkovModel::estimateUntrainedTransitions()
865
#ifdef HIDDEN_MARKOV_MODEL_DEBUG
866
cerr << "Transition training stats" << endl;
867
for (Map<HMMState *, Map<HMMState *, Size> >::const_iterator it = training_steps_count_.begin(); it != training_steps_count_.end(); ++it)
869
for (Map<HMMState *, Size>::const_iterator it1 = it->second.begin(); it1 != it->second.end(); ++it1)
871
cerr << it->first->getName() << " " << it1->first->getName() << " " << it1->second << endl;
875
cerr << "Estimation" << endl;
879
set<const Residue *> residues(ResidueDB::getInstance()->getResidues("Natural20"));
872
880
for (StringList::const_iterator it = var_modifications_.begin(); it != var_modifications_.end(); ++it)
874
882
residues.insert(ResidueDB::getInstance()->getModifiedResidue(*it));
877
// pathways axyz and bxyz and the first two explicitely modeled ones
879
HMMState* end_state = name_to_state_["end"];
880
StringList pathways = StringList::create("axyz,axyz1,axyz1,bxyz,bxyz1,bxyz2");
881
for (StringList::const_iterator pathway_it = pathways.begin(); pathway_it != pathways.end(); ++pathway_it)
883
String pathway = *pathway_it;
884
for (set<const Residue*>::const_iterator it = residues.begin(); it != residues.end(); ++it)
886
s2 = name_to_state_[pathway];
887
for (set<const Residue*>::const_iterator jt = residues.begin(); jt != residues.end(); ++jt)
889
AASequence first_aa, second_aa;
892
String aa1(first_aa.toString()), aa2(second_aa.toString());
893
#ifdef HIDDEN_MARKOV_MODEL_DEBUG
894
cerr << "Estimating: " << aa1 << " -> " << aa2 << " (" << training_steps_count_[name_to_state_[aa1 + aa2 + "_" + pathway]][s2] << ") :: " ;
897
if (training_steps_count_[name_to_state_[aa1 + aa2 + "_" + pathway]][s2] == 0)
901
// "rows" of the amino acid matrix
902
for (set<const Residue*>::const_iterator kt = residues.begin(); kt != residues.end(); ++kt)
906
String aa3(third_aa.toString());
907
if (training_steps_count_[name_to_state_[aa1 + aa3 + "_" + pathway]][s2] != 0)
909
sum += trans_[name_to_state_[aa1 + aa3 + "_" + pathway]][s2];
913
// "columns" of the amino acid matrix
914
for (set<const Residue*>::const_iterator kt = residues.begin(); kt != residues.end(); ++kt)
918
String aa3(third_aa.toString());
920
if (training_steps_count_[name_to_state_[aa3 + aa2 + "_" + pathway]][s2] != 0)
922
sum += trans_[name_to_state_[aa3 + aa2 + "_" + pathway]][s2];
927
#ifdef HIDDEN_MARKOV_MODEL_DEBUG
928
cerr << trans_[name_to_state_[aa1 + aa2 + "_" + pathway]][s2] << " to ";
932
#ifdef HIDDEN_MARKOV_MODEL_DEBUG
933
cerr << "setting transitions of " << aa1 << aa2 << "_" << pathway << " -> " << pathway << " to " << sum/DoubleReal(count) << endl;
935
trans_[name_to_state_[aa1 + aa2 + "_" + pathway]][s2] = sum/DoubleReal(count);
936
trans_[name_to_state_[aa1 + aa2 + "_" + pathway]][end_state] = 1 - sum/DoubleReal(count);
938
#ifdef HIDDEN_MARKOV_MODEL_DEBUG
939
cerr << sum/DoubleReal(count) << endl;
943
cerr << "not needed" << endl;
951
String sc_residues("HKRDE");
952
for (String::ConstIterator it = sc_residues.begin(); it != sc_residues.end(); ++it)
956
for (set<const Residue*>::const_iterator jt = residues.begin(); jt != residues.end(); ++jt)
885
// pathways axyz and bxyz and the first two explicitly modeled ones
887
HMMState * end_state = name_to_state_["end"];
888
StringList pathways = StringList::create("axyz,axyz1,axyz1,bxyz,bxyz1,bxyz2");
889
for (StringList::const_iterator pathway_it = pathways.begin(); pathway_it != pathways.end(); ++pathway_it)
891
String pathway = *pathway_it;
892
for (set<const Residue *>::const_iterator it = residues.begin(); it != residues.end(); ++it)
894
s2 = name_to_state_[pathway];
895
for (set<const Residue *>::const_iterator jt = residues.begin(); jt != residues.end(); ++jt)
897
AASequence first_aa, second_aa;
900
String aa1(first_aa.toString()), aa2(second_aa.toString());
901
#ifdef HIDDEN_MARKOV_MODEL_DEBUG
902
cerr << "Estimating: " << aa1 << " -> " << aa2 << " (" << training_steps_count_[name_to_state_[aa1 + aa2 + "_" + pathway]][s2] << ") :: ";
905
if (training_steps_count_[name_to_state_[aa1 + aa2 + "_" + pathway]][s2] == 0)
909
// "rows" of the amino acid matrix
910
for (set<const Residue *>::const_iterator kt = residues.begin(); kt != residues.end(); ++kt)
914
String aa3(third_aa.toString());
915
if (training_steps_count_[name_to_state_[aa1 + aa3 + "_" + pathway]][s2] != 0)
917
sum += trans_[name_to_state_[aa1 + aa3 + "_" + pathway]][s2];
921
// "columns" of the amino acid matrix
922
for (set<const Residue *>::const_iterator kt = residues.begin(); kt != residues.end(); ++kt)
926
String aa3(third_aa.toString());
928
if (training_steps_count_[name_to_state_[aa3 + aa2 + "_" + pathway]][s2] != 0)
930
sum += trans_[name_to_state_[aa3 + aa2 + "_" + pathway]][s2];
935
#ifdef HIDDEN_MARKOV_MODEL_DEBUG
936
cerr << trans_[name_to_state_[aa1 + aa2 + "_" + pathway]][s2] << " to ";
940
#ifdef HIDDEN_MARKOV_MODEL_DEBUG
941
cerr << "setting transitions of " << aa1 << aa2 << "_" << pathway << " -> " << pathway << " to " << sum / DoubleReal(count) << endl;
943
trans_[name_to_state_[aa1 + aa2 + "_" + pathway]][s2] = sum / DoubleReal(count);
944
trans_[name_to_state_[aa1 + aa2 + "_" + pathway]][end_state] = 1 - sum / DoubleReal(count);
946
#ifdef HIDDEN_MARKOV_MODEL_DEBUG
947
cerr << sum / DoubleReal(count) << endl;
951
cerr << "not needed" << endl;
959
String sc_residues("HKRDE");
960
for (String::ConstIterator it = sc_residues.begin(); it != sc_residues.end(); ++it)
964
for (set<const Residue *>::const_iterator jt = residues.begin(); jt != residues.end(); ++jt)
958
966
AASequence second_aa;
960
String aa2(second_aa.toString());
962
s2 = name_to_state_[sc_res];
963
if (training_steps_count_[name_to_state_[aa2 + "_" + sc_res]][s2] == 0)
967
for (set<const Residue*>::const_iterator kt = residues.begin(); kt != residues.end(); ++kt)
971
String aa3(third_aa.toString());
973
HMMState* s1 = name_to_state_[aa3 + "_" + sc_res];
974
if (training_steps_count_[s1][s2] != 0)
976
sum += trans_[s1][s2];
983
trans_[name_to_state_[aa2 + "_" + sc_res]][s2] = sum/DoubleReal(count);
984
trans_[name_to_state_[aa2 + "_" + sc_res]][end_state] = 1 - sum/DoubleReal(count);
990
StringList bk_pathways = StringList::create("bk-1,bk-2");
992
for (StringList::const_iterator pathway_it = bk_pathways.begin(); pathway_it != bk_pathways.end(); ++pathway_it)
994
String pathway = *pathway_it;
995
s2 = name_to_state_[pathway];
996
for (set<const Residue*>::const_iterator it = residues.begin(); it != residues.end(); ++it)
1000
String aa1(first_aa.toString());
1001
if (training_steps_count_[name_to_state_[aa1 + "_" + pathway]][s2] == 0)
1005
for (set<const Residue*>::const_iterator jt = residues.begin(); jt != residues.end(); ++jt)
1007
AASequence second_aa;
1009
String aa2(second_aa.toString());
1010
HMMState* s1 = name_to_state_[aa2 + "_" + pathway];
1011
if (training_steps_count_[s1][s2] != 0)
1013
sum += trans_[s1][s2];
1016
//cerr << "Estimating transition of '" << aa1 << pathway << "' -> '" << pathway << "' to " << sum/(DoubleReal)count << endl;
1019
trans_[name_to_state_[aa1 + "_" + pathway]][s2] = sum/(DoubleReal)count;
1020
trans_[name_to_state_[aa1 + "_" + pathway]][end_state] = 1 - sum/(DoubleReal)count;
1029
void HiddenMarkovModel::setPseudoCounts(DoubleReal pseudo_counts)
1031
pseudo_counts_ = pseudo_counts;
1035
DoubleReal HiddenMarkovModel::getPseudoCounts() const
1037
return pseudo_counts_;
1040
void HiddenMarkovModel::setVariableModifications(const StringList& modifications)
1042
var_modifications_ = modifications;
1045
void HiddenMarkovModel::copy_(const HiddenMarkovModel& source)
1047
Map<HMMState*, HMMState*> old_to_new;
1048
for (set<HMMState*>::const_iterator it = source.states_.begin(); it != source.states_.end(); ++it)
1050
HMMState* s = new HMMState(**it);
968
String aa2(second_aa.toString());
970
s2 = name_to_state_[sc_res];
971
if (training_steps_count_[name_to_state_[aa2 + "_" + sc_res]][s2] == 0)
975
for (set<const Residue *>::const_iterator kt = residues.begin(); kt != residues.end(); ++kt)
979
String aa3(third_aa.toString());
981
HMMState * s1 = name_to_state_[aa3 + "_" + sc_res];
982
if (training_steps_count_[s1][s2] != 0)
984
sum += trans_[s1][s2];
991
trans_[name_to_state_[aa2 + "_" + sc_res]][s2] = sum / DoubleReal(count);
992
trans_[name_to_state_[aa2 + "_" + sc_res]][end_state] = 1 - sum / DoubleReal(count);
998
StringList bk_pathways = StringList::create("bk-1,bk-2");
1000
for (StringList::const_iterator pathway_it = bk_pathways.begin(); pathway_it != bk_pathways.end(); ++pathway_it)
1002
String pathway = *pathway_it;
1003
s2 = name_to_state_[pathway];
1004
for (set<const Residue *>::const_iterator it = residues.begin(); it != residues.end(); ++it)
1006
AASequence first_aa;
1008
String aa1(first_aa.toString());
1009
if (training_steps_count_[name_to_state_[aa1 + "_" + pathway]][s2] == 0)
1013
for (set<const Residue *>::const_iterator jt = residues.begin(); jt != residues.end(); ++jt)
1015
AASequence second_aa;
1017
String aa2(second_aa.toString());
1018
HMMState * s1 = name_to_state_[aa2 + "_" + pathway];
1019
if (training_steps_count_[s1][s2] != 0)
1021
sum += trans_[s1][s2];
1024
//cerr << "Estimating transition of '" << aa1 << pathway << "' -> '" << pathway << "' to " << sum/(DoubleReal)count << endl;
1027
trans_[name_to_state_[aa1 + "_" + pathway]][s2] = sum / (DoubleReal)count;
1028
trans_[name_to_state_[aa1 + "_" + pathway]][end_state] = 1 - sum / (DoubleReal)count;
1037
void HiddenMarkovModel::setPseudoCounts(DoubleReal pseudo_counts)
1039
pseudo_counts_ = pseudo_counts;
1043
DoubleReal HiddenMarkovModel::getPseudoCounts() const
1045
return pseudo_counts_;
1048
void HiddenMarkovModel::setVariableModifications(const StringList & modifications)
1050
var_modifications_ = modifications;
1053
void HiddenMarkovModel::copy_(const HiddenMarkovModel & source)
1055
Map<HMMState *, HMMState *> old_to_new;
1056
for (set<HMMState *>::const_iterator it = source.states_.begin(); it != source.states_.end(); ++it)
1058
HMMState * s = new HMMState(**it);
1051
1059
states_.insert(s);
1052
1060
name_to_state_[s->getName()] = s;
1053
1061
old_to_new[*it] = s;
1057
for (Map<HMMState*, Map<HMMState*, DoubleReal> >::const_iterator it1 = source.trans_.begin(); it1 != source.trans_.end(); ++it1)
1059
for (Map<HMMState*, DoubleReal>::const_iterator it2 = it1->second.begin(); it2 != it1->second.end(); ++it2)
1061
trans_[old_to_new[it1->first]][old_to_new[it2->first]] = it2->second;
1066
for (Map<HMMState*, Map<HMMState*, DoubleReal> >::const_iterator it1 = source.count_trans_.begin(); it1 != source.count_trans_.end(); ++it1)
1068
for (Map<HMMState*, DoubleReal>::const_iterator it2 = it1->second.begin(); it2 != it1->second.end(); ++it2)
1065
for (Map<HMMState *, Map<HMMState *, DoubleReal> >::const_iterator it1 = source.trans_.begin(); it1 != source.trans_.end(); ++it1)
1067
for (Map<HMMState *, DoubleReal>::const_iterator it2 = it1->second.begin(); it2 != it1->second.end(); ++it2)
1069
trans_[old_to_new[it1->first]][old_to_new[it2->first]] = it2->second;
1074
for (Map<HMMState *, Map<HMMState *, DoubleReal> >::const_iterator it1 = source.count_trans_.begin(); it1 != source.count_trans_.end(); ++it1)
1076
for (Map<HMMState *, DoubleReal>::const_iterator it2 = it1->second.begin(); it2 != it1->second.end(); ++it2)
1070
1078
count_trans_[old_to_new[it1->first]][old_to_new[it2->first]] = it2->second;
1074
for (Map<HMMState*, Map<HMMState*, std::vector<DoubleReal> > >::const_iterator it1 = source.train_count_trans_all_.begin(); it1 != source.train_count_trans_all_.end(); ++it1)
1076
for (Map<HMMState*, vector<DoubleReal> >::const_iterator it2 = it1->second.begin(); it2 != it1->second.end(); ++it2)
1082
for (Map<HMMState *, Map<HMMState *, std::vector<DoubleReal> > >::const_iterator it1 = source.train_count_trans_all_.begin(); it1 != source.train_count_trans_all_.end(); ++it1)
1084
for (Map<HMMState *, vector<DoubleReal> >::const_iterator it2 = it1->second.begin(); it2 != it1->second.end(); ++it2)
1078
1086
train_count_trans_all_[old_to_new[it1->first]][old_to_new[it2->first]] = it2->second;
1084
for (Map<HMMState*, Map<HMMState*, Size> >::const_iterator it1 = source.training_steps_count_.begin(); it1 != source.training_steps_count_.end(); ++it1)
1092
for (Map<HMMState *, Map<HMMState *, Size> >::const_iterator it1 = source.training_steps_count_.begin(); it1 != source.training_steps_count_.end(); ++it1)
1086
for (Map<HMMState*, Size>::const_iterator it2 = it1->second.begin(); it2 != it1->second.end(); ++it2)
1094
for (Map<HMMState *, Size>::const_iterator it2 = it1->second.begin(); it2 != it1->second.end(); ++it2)
1088
1096
training_steps_count_[old_to_new[it1->first]][old_to_new[it2->first]] = it2->second;
1092
// forward and backward are just temporary objects
1094
for (Map<HMMState*, DoubleReal>::const_iterator it = source.train_emission_prob_.begin(); it != source.train_emission_prob_.end(); ++it)
1096
train_emission_prob_[old_to_new[it->first]] = it->second;
1099
for (Map<HMMState*, DoubleReal>::const_iterator it = source.init_prob_.begin(); it != source.init_prob_.end(); ++it)
1101
init_prob_[old_to_new[it->first]] = it->second;
1104
for (std::set<std::pair<HMMState*, HMMState*> >::const_iterator it = source.trained_trans_.begin(); it != source.trained_trans_.end(); ++it)
1106
trained_trans_.insert(make_pair(old_to_new[it->first], old_to_new[it->second]));
1109
synonym_trans_names_ = source.synonym_trans_names_;
1110
pseudo_counts_ = source.pseudo_counts_;
1111
var_modifications_ = source.var_modifications_;
1100
// forward and backward are just temporary objects
1102
for (Map<HMMState *, DoubleReal>::const_iterator it = source.train_emission_prob_.begin(); it != source.train_emission_prob_.end(); ++it)
1104
train_emission_prob_[old_to_new[it->first]] = it->second;
1107
for (Map<HMMState *, DoubleReal>::const_iterator it = source.init_prob_.begin(); it != source.init_prob_.end(); ++it)
1109
init_prob_[old_to_new[it->first]] = it->second;
1112
for (std::set<std::pair<HMMState *, HMMState *> >::const_iterator it = source.trained_trans_.begin(); it != source.trained_trans_.end(); ++it)
1114
trained_trans_.insert(make_pair(old_to_new[it->first], old_to_new[it->second]));
1117
synonym_trans_names_ = source.synonym_trans_names_;
1118
pseudo_counts_ = source.pseudo_counts_;
1119
var_modifications_ = source.var_modifications_;
1114
1122
for (Map<String, Map<String, std::pair<String, String> > >::const_iterator it = synonym_trans_names_.begin(); it != synonym_trans_names_.end(); ++it)