2
Copyright © 2013, marmuta <marmvta@gmail.com>
4
This file is part of Onboard.
6
This program is free software: you can redistribute it and/or modify
7
it under the terms of the GNU General Public License as published by
8
the Free Software Foundation, either version 3 of the License, or
9
(at your option) any later version.
11
This program is distributed in the hope that it will be useful,
12
but WITHOUT ANY WARRANTY; without even the implied warranty of
13
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
GNU General Public License for more details.
16
You should have received a copy of the GNU General Public License
17
along with this program. If not, see <http://www.gnu.org/licenses/>.
21
#include "lm_unigram.h"
26
//------------------------------------------------------------------------
28
//------------------------------------------------------------------------
30
// Calculate a vector of probabilities for the ngrams formed
31
// by history + word[i], for all i.
32
// Input: constant history and a vector of candidate words
33
// Output: vector of probabilities, one value per candidate word
34
void UnigramModel::get_probs(const std::vector<WordId>& history,
35
const std::vector<WordId>& words,
36
std::vector<double>& probabilities)
38
std::vector<double>& vp = probabilities;
39
int size = words.size(); // number of candidate words
40
int num_word_types = get_num_word_types();
41
int cs = accumulate(m_counts.begin(), m_counts.end(), 0); // total number of occurencess
45
for(int i=0; i<size; i++)
47
WordId wid = words[i];
48
CountType count = m_counts.at(wid);
49
vp[i] = count / (double) cs;
54
fill(vp.begin(), vp.end(), 1.0/num_word_types); // uniform distribution