1
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
2
<html xmlns="http://www.w3.org/1999/xhtml">
4
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
5
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
6
<title>tesseract: /usr/local/google/home/jbreiden/tesseract-ocr-read-only/cube/char_bigrams.cpp Source File</title>
8
<link href="tabs.css" rel="stylesheet" type="text/css"/>
9
<link href="doxygen.css" rel="stylesheet" type="text/css" />
10
<link href="navtree.css" rel="stylesheet" type="text/css"/>
11
<script type="text/javascript" src="jquery.js"></script>
12
<script type="text/javascript" src="resize.js"></script>
13
<script type="text/javascript" src="navtree.js"></script>
14
<script type="text/javascript">
15
$(document).ready(initResizable);
17
<link href="search/search.css" rel="stylesheet" type="text/css"/>
18
<script type="text/javascript" src="search/search.js"></script>
19
<script type="text/javascript">
20
$(document).ready(function() { searchBox.OnSelectItem(0); });
25
<div id="top"><!-- do not remove this div! -->
29
<table cellspacing="0" cellpadding="0">
31
<tr style="height: 56px;">
34
<td style="padding-left: 0.5em;">
35
<div id="projectname">tesseract
36
 <span id="projectnumber">3.03</span>
48
<!-- Generated by Doxygen 1.7.6.1 -->
49
<script type="text/javascript">
50
var searchBox = new SearchBox("searchBox", "search",false,'Search');
52
<div id="navrow1" class="tabs">
54
<li><a href="index.html"><span>Main Page</span></a></li>
55
<li><a href="pages.html"><span>Related Pages</span></a></li>
56
<li><a href="modules.html"><span>Modules</span></a></li>
57
<li><a href="namespaces.html"><span>Namespaces</span></a></li>
58
<li><a href="annotated.html"><span>Classes</span></a></li>
59
<li class="current"><a href="files.html"><span>Files</span></a></li>
61
<div id="MSearchBox" class="MSearchBoxInactive">
63
<img id="MSearchSelect" src="search/mag_sel.png"
64
onmouseover="return searchBox.OnSearchSelectShow()"
65
onmouseout="return searchBox.OnSearchSelectHide()"
67
<input type="text" id="MSearchField" value="Search" accesskey="S"
68
onfocus="searchBox.OnSearchFieldFocus(true)"
69
onblur="searchBox.OnSearchFieldFocus(false)"
70
onkeyup="searchBox.OnSearchFieldChange(event)"/>
71
</span><span class="right">
72
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
78
<div id="navrow2" class="tabs2">
80
<li><a href="files.html"><span>File List</span></a></li>
81
<li><a href="globals.html"><span>File Members</span></a></li>
85
<div id="side-nav" class="ui-resizable side-nav-resizable">
87
<div id="nav-tree-contents">
90
<div id="splitbar" style="-moz-user-select:none;"
91
class="ui-resizable-handle">
94
<script type="text/javascript">
95
initNavTree('a00962.html','');
97
<div id="doc-content">
99
<div class="headertitle">
100
<div class="title">/usr/local/google/home/jbreiden/tesseract-ocr-read-only/cube/char_bigrams.cpp</div> </div>
102
<div class="contents">
103
<a href="a00962.html">Go to the documentation of this file.</a><div class="fragment"><pre class="fragment"><a name="l00001"></a>00001 <span class="comment">/**********************************************************************</span>
104
<a name="l00002"></a>00002 <span class="comment"> * File: char_bigrams.cpp</span>
105
<a name="l00003"></a>00003 <span class="comment"> * Description: Implementation of a Character Bigrams Class</span>
106
<a name="l00004"></a>00004 <span class="comment"> * Author: Ahmad Abdulkader</span>
107
<a name="l00005"></a>00005 <span class="comment"> * Created: 2007</span>
108
<a name="l00006"></a>00006 <span class="comment"> *</span>
109
<a name="l00007"></a>00007 <span class="comment"> * (C) Copyright 2008, Google Inc.</span>
110
<a name="l00008"></a>00008 <span class="comment"> ** Licensed under the Apache License, Version 2.0 (the "License");</span>
111
<a name="l00009"></a>00009 <span class="comment"> ** you may not use this file except in compliance with the License.</span>
112
<a name="l00010"></a>00010 <span class="comment"> ** You may obtain a copy of the License at</span>
113
<a name="l00011"></a>00011 <span class="comment"> ** http://www.apache.org/licenses/LICENSE-2.0</span>
114
<a name="l00012"></a>00012 <span class="comment"> ** Unless required by applicable law or agreed to in writing, software</span>
115
<a name="l00013"></a>00013 <span class="comment"> ** distributed under the License is distributed on an "AS IS" BASIS,</span>
116
<a name="l00014"></a>00014 <span class="comment"> ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
117
<a name="l00015"></a>00015 <span class="comment"> ** See the License for the specific language governing permissions and</span>
118
<a name="l00016"></a>00016 <span class="comment"> ** limitations under the License.</span>
119
<a name="l00017"></a>00017 <span class="comment"> *</span>
120
<a name="l00018"></a>00018 <span class="comment"> **********************************************************************/</span>
121
<a name="l00019"></a>00019
122
<a name="l00020"></a>00020 <span class="preprocessor">#include <algorithm></span>
123
<a name="l00021"></a>00021 <span class="preprocessor">#include <math.h></span>
124
<a name="l00022"></a>00022 <span class="preprocessor">#include <string></span>
125
<a name="l00023"></a>00023 <span class="preprocessor">#include <vector></span>
126
<a name="l00024"></a>00024
127
<a name="l00025"></a>00025 <span class="preprocessor">#include "<a class="code" href="a00963.html">char_bigrams.h</a>"</span>
128
<a name="l00026"></a>00026 <span class="preprocessor">#include "<a class="code" href="a00991.html">cube_utils.h</a>"</span>
129
<a name="l00027"></a>00027 <span class="preprocessor">#include "<a class="code" href="a00839.html">ndminx.h</a>"</span>
130
<a name="l00028"></a>00028 <span class="preprocessor">#include "<a class="code" href="a00979.html">cube_const.h</a>"</span>
131
<a name="l00029"></a>00029
132
<a name="l00030"></a>00030 <span class="keyword">namespace </span><a class="code" href="a01266.html#afed58feacb84df2de88bdd613cfdba6d">tesseract</a> {
133
<a name="l00031"></a>00031
134
<a name="l00032"></a><a class="code" href="a00302.html#a17e9f5a3b5a5dc9704b89c698e5db481">00032</a> <a class="code" href="a00302.html#a17e9f5a3b5a5dc9704b89c698e5db481">CharBigrams::CharBigrams</a>() {
135
<a name="l00033"></a>00033 memset(&bigram_table_, 0, <span class="keyword">sizeof</span>(bigram_table_));
136
<a name="l00034"></a>00034 }
137
<a name="l00035"></a>00035
138
<a name="l00036"></a><a class="code" href="a00302.html#ae622fa52434296b70785149199b7007f">00036</a> <a class="code" href="a00302.html#ae622fa52434296b70785149199b7007f">CharBigrams::~CharBigrams</a>() {
139
<a name="l00037"></a>00037 <span class="keywordflow">if</span> (bigram_table_.<a class="code" href="a00303.html#aae8c431003254c08e6ec02d1309e2d65">char_bigram</a> != NULL) {
140
<a name="l00038"></a>00038 <span class="keywordflow">for</span> (<span class="keywordtype">int</span> ch1 = 0; ch1 <= bigram_table_.<a class="code" href="a00303.html#adcb5fea862baff3582bcf5223218026d">max_char</a>; ch1++) {
141
<a name="l00039"></a>00039 <a class="code" href="a00301.html">CharBigram</a> *char_bigram = bigram_table_.<a class="code" href="a00303.html#aae8c431003254c08e6ec02d1309e2d65">char_bigram</a> + ch1;
142
<a name="l00040"></a>00040
143
<a name="l00041"></a>00041 <span class="keywordflow">if</span> (char_bigram-><a class="code" href="a00301.html#acf18b9398a62f9f184fdd18b8d91bdd8">bigram</a> != NULL) {
144
<a name="l00042"></a>00042 <span class="keyword">delete</span> []char_bigram-><a class="code" href="a00301.html#acf18b9398a62f9f184fdd18b8d91bdd8">bigram</a>;
145
<a name="l00043"></a>00043 }
146
<a name="l00044"></a>00044 }
147
<a name="l00045"></a>00045 <span class="keyword">delete</span> []bigram_table_.<a class="code" href="a00303.html#aae8c431003254c08e6ec02d1309e2d65">char_bigram</a>;
148
<a name="l00046"></a>00046 }
149
<a name="l00047"></a>00047 }
150
<a name="l00048"></a>00048
151
<a name="l00049"></a><a class="code" href="a00302.html#a736a26176210da09fd5c57498de1cf77">00049</a> <a class="code" href="a00302.html">CharBigrams</a> *<a class="code" href="a00302.html#a736a26176210da09fd5c57498de1cf77">CharBigrams::Create</a>(<span class="keyword">const</span> <span class="keywordtype">string</span> &data_file_path,
152
<a name="l00050"></a>00050 <span class="keyword">const</span> <span class="keywordtype">string</span> &<a class="code" href="a01266.html#a4d02e13fee24fdebbbe98ccdcb9c9279">lang</a>) {
153
<a name="l00051"></a>00051 <span class="keywordtype">string</span> file_name;
154
<a name="l00052"></a>00052 <span class="keywordtype">string</span> str;
155
<a name="l00053"></a>00053
156
<a name="l00054"></a>00054 file_name = data_file_path + <a class="code" href="a01266.html#a4d02e13fee24fdebbbe98ccdcb9c9279">lang</a>;
157
<a name="l00055"></a>00055 file_name += <span class="stringliteral">".cube.bigrams"</span>;
158
<a name="l00056"></a>00056
159
<a name="l00057"></a>00057 <span class="comment">// load the string into memory</span>
160
<a name="l00058"></a>00058 <span class="keywordflow">if</span> (!<a class="code" href="a00343.html#ac5c5bf284cd96f78f62f19938bec750a">CubeUtils::ReadFileToString</a>(file_name, &str)) {
161
<a name="l00059"></a>00059 <span class="keywordflow">return</span> NULL;
162
<a name="l00060"></a>00060 }
163
<a name="l00061"></a>00061
164
<a name="l00062"></a>00062 <span class="comment">// construct a new object</span>
165
<a name="l00063"></a>00063 <a class="code" href="a00302.html">CharBigrams</a> *char_bigrams_obj = <span class="keyword">new</span> <a class="code" href="a00302.html#a17e9f5a3b5a5dc9704b89c698e5db481">CharBigrams</a>();
166
<a name="l00064"></a>00064 <span class="keywordflow">if</span> (char_bigrams_obj == NULL) {
167
<a name="l00065"></a>00065 fprintf(stderr, <span class="stringliteral">"Cube ERROR (CharBigrams::Create): could not create "</span>
168
<a name="l00066"></a>00066 <span class="stringliteral">"character bigrams object.\n"</span>);
169
<a name="l00067"></a>00067 <span class="keywordflow">return</span> NULL;
170
<a name="l00068"></a>00068 }
171
<a name="l00069"></a>00069 <a class="code" href="a00303.html">CharBigramTable</a> *table = &char_bigrams_obj->bigram_table_;
172
<a name="l00070"></a>00070
173
<a name="l00071"></a>00071 table-><a class="code" href="a00303.html#ae5fc0fd5c907a7d871c08bffa7a08716">total_cnt</a> = 0;
174
<a name="l00072"></a>00072 table-><a class="code" href="a00303.html#adcb5fea862baff3582bcf5223218026d">max_char</a> = -1;
175
<a name="l00073"></a>00073 table-><a class="code" href="a00303.html#aae8c431003254c08e6ec02d1309e2d65">char_bigram</a> = NULL;
176
<a name="l00074"></a>00074
177
<a name="l00075"></a>00075 <span class="comment">// split into lines</span>
178
<a name="l00076"></a>00076 vector<string> str_vec;
179
<a name="l00077"></a>00077 <a class="code" href="a00343.html#af7dea4521db1e7099c93606d0f5bf4a4">CubeUtils::SplitStringUsing</a>(str, <span class="stringliteral">"\r\n"</span>, &str_vec);
180
<a name="l00078"></a>00078
181
<a name="l00079"></a>00079 <span class="keywordflow">for</span> (<span class="keywordtype">int</span> big = 0; big < str_vec.size(); big++) {
182
<a name="l00080"></a>00080 <a class="code" href="a01265.html#aea2c6172b0ca77907e29cd018595b425">char_32</a> ch1;
183
<a name="l00081"></a>00081 <a class="code" href="a01265.html#aea2c6172b0ca77907e29cd018595b425">char_32</a> ch2;
184
<a name="l00082"></a>00082 <span class="keywordtype">int</span> cnt;
185
<a name="l00083"></a>00083 <span class="keywordflow">if</span> (sscanf(str_vec[big].c_str(), <span class="stringliteral">"%d %x %x"</span>, &cnt, &ch1, &ch2) != 3) {
186
<a name="l00084"></a>00084 fprintf(stderr, <span class="stringliteral">"Cube ERROR (CharBigrams::Create): invalid format "</span>
187
<a name="l00085"></a>00085 <span class="stringliteral">"reading line: %s\n"</span>, str_vec[big].c_str());
188
<a name="l00086"></a>00086 <span class="keywordflow">return</span> NULL;
189
<a name="l00087"></a>00087 }
190
<a name="l00088"></a>00088
191
<a name="l00089"></a>00089 <span class="comment">// expand the bigram table</span>
192
<a name="l00090"></a>00090 <span class="keywordflow">if</span> (ch1 > table-><a class="code" href="a00303.html#adcb5fea862baff3582bcf5223218026d">max_char</a>) {
193
<a name="l00091"></a>00091 <a class="code" href="a00301.html">CharBigram</a> *char_bigram = <span class="keyword">new</span> <a class="code" href="a00301.html">CharBigram</a>[ch1 + 1];
194
<a name="l00092"></a>00092 <span class="keywordflow">if</span> (char_bigram == NULL) {
195
<a name="l00093"></a>00093 fprintf(stderr, <span class="stringliteral">"Cube ERROR (CharBigrams::Create): error allocating "</span>
196
<a name="l00094"></a>00094 <span class="stringliteral">"additional memory for character bigram table.\n"</span>);
197
<a name="l00095"></a>00095 <span class="keywordflow">return</span> NULL;
198
<a name="l00096"></a>00096 }
199
<a name="l00097"></a>00097
200
<a name="l00098"></a>00098 <span class="keywordflow">if</span> (table-><a class="code" href="a00303.html#aae8c431003254c08e6ec02d1309e2d65">char_bigram</a> != NULL && table-><a class="code" href="a00303.html#adcb5fea862baff3582bcf5223218026d">max_char</a> >= 0) {
201
<a name="l00099"></a>00099 memcpy(char_bigram, table-><a class="code" href="a00303.html#aae8c431003254c08e6ec02d1309e2d65">char_bigram</a>,
202
<a name="l00100"></a>00100 (table-><a class="code" href="a00303.html#adcb5fea862baff3582bcf5223218026d">max_char</a> + 1) * <span class="keyword">sizeof</span>(*char_bigram));
203
<a name="l00101"></a>00101
204
<a name="l00102"></a>00102 <span class="keyword">delete</span> []table-><a class="code" href="a00303.html#aae8c431003254c08e6ec02d1309e2d65">char_bigram</a>;
205
<a name="l00103"></a>00103 }
206
<a name="l00104"></a>00104 table-><a class="code" href="a00303.html#aae8c431003254c08e6ec02d1309e2d65">char_bigram</a> = char_bigram;
207
<a name="l00105"></a>00105
208
<a name="l00106"></a>00106 <span class="comment">// init</span>
209
<a name="l00107"></a>00107 <span class="keywordflow">for</span> (<span class="keywordtype">int</span> new_big = table-><a class="code" href="a00303.html#adcb5fea862baff3582bcf5223218026d">max_char</a> + 1; new_big <= ch1; new_big++) {
210
<a name="l00108"></a>00108 table-><a class="code" href="a00303.html#aae8c431003254c08e6ec02d1309e2d65">char_bigram</a>[new_big].<a class="code" href="a00301.html#a5fcb5e2bd655dcf9185f8059af6ae392">total_cnt</a> = 0;
211
<a name="l00109"></a>00109 table-><a class="code" href="a00303.html#aae8c431003254c08e6ec02d1309e2d65">char_bigram</a>[new_big].<a class="code" href="a00301.html#a2e2bd3dc796213605662064c8c2bebc9">max_char</a> = -1;
212
<a name="l00110"></a>00110 table-><a class="code" href="a00303.html#aae8c431003254c08e6ec02d1309e2d65">char_bigram</a>[new_big].<a class="code" href="a00301.html#acf18b9398a62f9f184fdd18b8d91bdd8">bigram</a> = NULL;
213
<a name="l00111"></a>00111 }
214
<a name="l00112"></a>00112 table-><a class="code" href="a00303.html#adcb5fea862baff3582bcf5223218026d">max_char</a> = ch1;
215
<a name="l00113"></a>00113 }
216
<a name="l00114"></a>00114
217
<a name="l00115"></a>00115 <span class="keywordflow">if</span> (ch2 > table-><a class="code" href="a00303.html#aae8c431003254c08e6ec02d1309e2d65">char_bigram</a>[ch1].<a class="code" href="a00301.html#a2e2bd3dc796213605662064c8c2bebc9">max_char</a>) {
218
<a name="l00116"></a>00116 <a class="code" href="a00271.html">Bigram</a> *bigram = <span class="keyword">new</span> <a class="code" href="a00271.html">Bigram</a>[ch2 + 1];
219
<a name="l00117"></a>00117 <span class="keywordflow">if</span> (bigram == NULL) {
220
<a name="l00118"></a>00118 fprintf(stderr, <span class="stringliteral">"Cube ERROR (CharBigrams::Create): error allocating "</span>
221
<a name="l00119"></a>00119 <span class="stringliteral">"memory for bigram.\n"</span>);
222
<a name="l00120"></a>00120 <span class="keywordflow">return</span> NULL;
223
<a name="l00121"></a>00121 }
224
<a name="l00122"></a>00122
225
<a name="l00123"></a>00123 <span class="keywordflow">if</span> (table-><a class="code" href="a00303.html#aae8c431003254c08e6ec02d1309e2d65">char_bigram</a>[ch1].<a class="code" href="a00301.html#acf18b9398a62f9f184fdd18b8d91bdd8">bigram</a> != NULL &&
226
<a name="l00124"></a>00124 table-><a class="code" href="a00303.html#aae8c431003254c08e6ec02d1309e2d65">char_bigram</a>[ch1].<a class="code" href="a00301.html#a2e2bd3dc796213605662064c8c2bebc9">max_char</a> >= 0) {
227
<a name="l00125"></a>00125 memcpy(bigram, table-><a class="code" href="a00303.html#aae8c431003254c08e6ec02d1309e2d65">char_bigram</a>[ch1].<a class="code" href="a00301.html#acf18b9398a62f9f184fdd18b8d91bdd8">bigram</a>,
228
<a name="l00126"></a>00126 (table-><a class="code" href="a00303.html#aae8c431003254c08e6ec02d1309e2d65">char_bigram</a>[ch1].<a class="code" href="a00301.html#a2e2bd3dc796213605662064c8c2bebc9">max_char</a> + 1) * <span class="keyword">sizeof</span>(*bigram));
229
<a name="l00127"></a>00127 <span class="keyword">delete</span> []table-><a class="code" href="a00303.html#aae8c431003254c08e6ec02d1309e2d65">char_bigram</a>[ch1].<a class="code" href="a00301.html#acf18b9398a62f9f184fdd18b8d91bdd8">bigram</a>;
230
<a name="l00128"></a>00128 }
231
<a name="l00129"></a>00129 table-><a class="code" href="a00303.html#aae8c431003254c08e6ec02d1309e2d65">char_bigram</a>[ch1].<a class="code" href="a00301.html#acf18b9398a62f9f184fdd18b8d91bdd8">bigram</a> = bigram;
232
<a name="l00130"></a>00130
233
<a name="l00131"></a>00131 <span class="comment">// init</span>
234
<a name="l00132"></a>00132 <span class="keywordflow">for</span> (<span class="keywordtype">int</span> new_big = table-><a class="code" href="a00303.html#aae8c431003254c08e6ec02d1309e2d65">char_bigram</a>[ch1].<a class="code" href="a00301.html#a2e2bd3dc796213605662064c8c2bebc9">max_char</a> + 1;
235
<a name="l00133"></a>00133 new_big <= ch2; new_big++) {
236
<a name="l00134"></a>00134 table-><a class="code" href="a00303.html#aae8c431003254c08e6ec02d1309e2d65">char_bigram</a>[ch1].<a class="code" href="a00301.html#acf18b9398a62f9f184fdd18b8d91bdd8">bigram</a>[new_big].<a class="code" href="a00271.html#ab3ca0606152af62f49b3d2f69c0effcc">cnt</a> = 0;
237
<a name="l00135"></a>00135 }
238
<a name="l00136"></a>00136 table-><a class="code" href="a00303.html#aae8c431003254c08e6ec02d1309e2d65">char_bigram</a>[ch1].<a class="code" href="a00301.html#a2e2bd3dc796213605662064c8c2bebc9">max_char</a> = ch2;
239
<a name="l00137"></a>00137 }
240
<a name="l00138"></a>00138
241
<a name="l00139"></a>00139 table-><a class="code" href="a00303.html#aae8c431003254c08e6ec02d1309e2d65">char_bigram</a>[ch1].<a class="code" href="a00301.html#acf18b9398a62f9f184fdd18b8d91bdd8">bigram</a>[ch2].<a class="code" href="a00271.html#ab3ca0606152af62f49b3d2f69c0effcc">cnt</a> = cnt;
242
<a name="l00140"></a>00140 table-><a class="code" href="a00303.html#aae8c431003254c08e6ec02d1309e2d65">char_bigram</a>[ch1].<a class="code" href="a00301.html#a5fcb5e2bd655dcf9185f8059af6ae392">total_cnt</a> += cnt;
243
<a name="l00141"></a>00141 table-><a class="code" href="a00303.html#ae5fc0fd5c907a7d871c08bffa7a08716">total_cnt</a> += cnt;
244
<a name="l00142"></a>00142 }
245
<a name="l00143"></a>00143
246
<a name="l00144"></a>00144 <span class="comment">// compute costs (-log probs)</span>
247
<a name="l00145"></a>00145 table-><a class="code" href="a00303.html#ac0c678c3fe7bd24f65b706e3fab1957b">worst_cost</a> = <span class="keyword">static_cast<</span><span class="keywordtype">int</span><span class="keyword">></span>(
248
<a name="l00146"></a>00146 -<a class="code" href="a00979.html#ab1e863295c568a5acf35176a474a87a9">PROB2COST_SCALE</a> * log(0.5 / table-><a class="code" href="a00303.html#ae5fc0fd5c907a7d871c08bffa7a08716">total_cnt</a>));
249
<a name="l00147"></a>00147 <span class="keywordflow">for</span> (<a class="code" href="a01265.html#aea2c6172b0ca77907e29cd018595b425">char_32</a> ch1 = 0; ch1 <= table-><a class="code" href="a00303.html#adcb5fea862baff3582bcf5223218026d">max_char</a>; ch1++) {
250
<a name="l00148"></a>00148 <span class="keywordflow">for</span> (<a class="code" href="a01265.html#aea2c6172b0ca77907e29cd018595b425">char_32</a> ch2 = 0; ch2 <= table-><a class="code" href="a00303.html#aae8c431003254c08e6ec02d1309e2d65">char_bigram</a>[ch1].<a class="code" href="a00301.html#a2e2bd3dc796213605662064c8c2bebc9">max_char</a>; ch2++) {
251
<a name="l00149"></a>00149 <span class="keywordtype">int</span> cnt = table-><a class="code" href="a00303.html#aae8c431003254c08e6ec02d1309e2d65">char_bigram</a>[ch1].<a class="code" href="a00301.html#acf18b9398a62f9f184fdd18b8d91bdd8">bigram</a>[ch2].<a class="code" href="a00271.html#ab3ca0606152af62f49b3d2f69c0effcc">cnt</a>;
252
<a name="l00150"></a>00150 table-><a class="code" href="a00303.html#aae8c431003254c08e6ec02d1309e2d65">char_bigram</a>[ch1].<a class="code" href="a00301.html#acf18b9398a62f9f184fdd18b8d91bdd8">bigram</a>[ch2].<a class="code" href="a00271.html#ac82fdf747c4a7a4c5626964c635eb251">cost</a> =
253
<a name="l00151"></a>00151 <span class="keyword">static_cast<</span><span class="keywordtype">int</span><span class="keyword">></span>(-<a class="code" href="a00979.html#ab1e863295c568a5acf35176a474a87a9">PROB2COST_SCALE</a> *
254
<a name="l00152"></a>00152 log(MAX(0.5, static_cast<double>(cnt)) /
255
<a name="l00153"></a>00153 table-><a class="code" href="a00303.html#ae5fc0fd5c907a7d871c08bffa7a08716">total_cnt</a>));
256
<a name="l00154"></a>00154 }
257
<a name="l00155"></a>00155 }
258
<a name="l00156"></a>00156 <span class="keywordflow">return</span> char_bigrams_obj;
259
<a name="l00157"></a>00157 }
260
<a name="l00158"></a>00158
261
<a name="l00159"></a><a class="code" href="a00302.html#acca74d8457ec604bc39f0f0b3a77443b">00159</a> <span class="keywordtype">int</span> <a class="code" href="a00302.html#acca74d8457ec604bc39f0f0b3a77443b">CharBigrams::PairCost</a>(<a class="code" href="a01265.html#aea2c6172b0ca77907e29cd018595b425">char_32</a> ch1, <a class="code" href="a01265.html#aea2c6172b0ca77907e29cd018595b425">char_32</a> ch2)<span class="keyword"> const </span>{
262
<a name="l00160"></a>00160 <span class="keywordflow">if</span> (ch1 > bigram_table_.<a class="code" href="a00303.html#adcb5fea862baff3582bcf5223218026d">max_char</a>) {
263
<a name="l00161"></a>00161 <span class="keywordflow">return</span> bigram_table_.<a class="code" href="a00303.html#ac0c678c3fe7bd24f65b706e3fab1957b">worst_cost</a>;
264
<a name="l00162"></a>00162 }
265
<a name="l00163"></a>00163 <span class="keywordflow">if</span> (ch2 > bigram_table_.<a class="code" href="a00303.html#aae8c431003254c08e6ec02d1309e2d65">char_bigram</a>[ch1].<a class="code" href="a00301.html#a2e2bd3dc796213605662064c8c2bebc9">max_char</a>) {
266
<a name="l00164"></a>00164 <span class="keywordflow">return</span> bigram_table_.<a class="code" href="a00303.html#ac0c678c3fe7bd24f65b706e3fab1957b">worst_cost</a>;
267
<a name="l00165"></a>00165 }
268
<a name="l00166"></a>00166 <span class="keywordflow">return</span> bigram_table_.<a class="code" href="a00303.html#aae8c431003254c08e6ec02d1309e2d65">char_bigram</a>[ch1].<a class="code" href="a00301.html#acf18b9398a62f9f184fdd18b8d91bdd8">bigram</a>[ch2].<a class="code" href="a00271.html#ac82fdf747c4a7a4c5626964c635eb251">cost</a>;
269
<a name="l00167"></a>00167 }
270
<a name="l00168"></a>00168
271
<a name="l00169"></a><a class="code" href="a00302.html#a2a19105bd0f09a1c124a051c6514569e">00169</a> <span class="keywordtype">int</span> <a class="code" href="a00302.html#a2a19105bd0f09a1c124a051c6514569e">CharBigrams::Cost</a>(<span class="keyword">const</span> <a class="code" href="a01265.html#aea2c6172b0ca77907e29cd018595b425">char_32</a> *char_32_ptr, <a class="code" href="a00309.html">CharSet</a> *char_set)<span class="keyword"> const </span>{
272
<a name="l00170"></a>00170 <span class="keywordflow">if</span> (!char_32_ptr || char_32_ptr[0] == 0) {
273
<a name="l00171"></a>00171 <span class="keywordflow">return</span> bigram_table_.<a class="code" href="a00303.html#ac0c678c3fe7bd24f65b706e3fab1957b">worst_cost</a>;
274
<a name="l00172"></a>00172 }
275
<a name="l00173"></a>00173 <span class="keywordtype">int</span> cost = <a class="code" href="a00302.html#aad8d15924e4b9793972b1c7488cc8466">MeanCostWithSpaces</a>(char_32_ptr);
276
<a name="l00174"></a>00174 <span class="keywordflow">if</span> (<a class="code" href="a00343.html#a88fe596e3dcadab7909c0bff64f61f59">CubeUtils::StrLen</a>(char_32_ptr) >= kMinLengthCaseInvariant &&
277
<a name="l00175"></a>00175 <a class="code" href="a00343.html#a0c67516e85144e0d736f30c21208aeda">CubeUtils::IsCaseInvariant</a>(char_32_ptr, char_set)) {
278
<a name="l00176"></a>00176 <a class="code" href="a01265.html#aea2c6172b0ca77907e29cd018595b425">char_32</a> *lower_32 = <a class="code" href="a00343.html#ac051dbde8b019f824b1bd8ae8d69d10e">CubeUtils::ToLower</a>(char_32_ptr, char_set);
279
<a name="l00177"></a>00177 <span class="keywordflow">if</span> (lower_32 && lower_32[0] != 0) {
280
<a name="l00178"></a>00178 <span class="keywordtype">int</span> cost_lower = <a class="code" href="a00302.html#aad8d15924e4b9793972b1c7488cc8466">MeanCostWithSpaces</a>(lower_32);
281
<a name="l00179"></a>00179 cost = MIN(cost, cost_lower);
282
<a name="l00180"></a>00180 <span class="keyword">delete</span> [] lower_32;
283
<a name="l00181"></a>00181 }
284
<a name="l00182"></a>00182 <a class="code" href="a01265.html#aea2c6172b0ca77907e29cd018595b425">char_32</a> *upper_32 = <a class="code" href="a00343.html#ac5f9453d9b30ec4343940ad428a4638d">CubeUtils::ToUpper</a>(char_32_ptr, char_set);
285
<a name="l00183"></a>00183 <span class="keywordflow">if</span> (upper_32 && upper_32[0] != 0) {
286
<a name="l00184"></a>00184 <span class="keywordtype">int</span> cost_upper = <a class="code" href="a00302.html#aad8d15924e4b9793972b1c7488cc8466">MeanCostWithSpaces</a>(upper_32);
287
<a name="l00185"></a>00185 cost = MIN(cost, cost_upper);
288
<a name="l00186"></a>00186 <span class="keyword">delete</span> [] upper_32;
289
<a name="l00187"></a>00187 }
290
<a name="l00188"></a>00188 }
291
<a name="l00189"></a>00189 <span class="keywordflow">return</span> cost;
292
<a name="l00190"></a>00190 }
293
<a name="l00191"></a>00191
294
<a name="l00192"></a><a class="code" href="a00302.html#aad8d15924e4b9793972b1c7488cc8466">00192</a> <span class="keywordtype">int</span> <a class="code" href="a00302.html#aad8d15924e4b9793972b1c7488cc8466">CharBigrams::MeanCostWithSpaces</a>(<span class="keyword">const</span> <a class="code" href="a01265.html#aea2c6172b0ca77907e29cd018595b425">char_32</a> *char_32_ptr)<span class="keyword"> const </span>{
295
<a name="l00193"></a>00193 <span class="keywordflow">if</span> (!char_32_ptr)
296
<a name="l00194"></a>00194 <span class="keywordflow">return</span> bigram_table_.<a class="code" href="a00303.html#ac0c678c3fe7bd24f65b706e3fab1957b">worst_cost</a>;
297
<a name="l00195"></a>00195 <span class="keywordtype">int</span> len = <a class="code" href="a00343.html#a88fe596e3dcadab7909c0bff64f61f59">CubeUtils::StrLen</a>(char_32_ptr);
298
<a name="l00196"></a>00196 <span class="keywordtype">int</span> cost = 0;
299
<a name="l00197"></a>00197 <span class="keywordtype">int</span> c = 0;
300
<a name="l00198"></a>00198 cost = <a class="code" href="a00302.html#acca74d8457ec604bc39f0f0b3a77443b">PairCost</a>(<span class="charliteral">' '</span>, char_32_ptr[0]);
301
<a name="l00199"></a>00199 <span class="keywordflow">for</span> (c = 1; c < len; c++) {
302
<a name="l00200"></a>00200 cost += <a class="code" href="a00302.html#acca74d8457ec604bc39f0f0b3a77443b">PairCost</a>(char_32_ptr[c - 1], char_32_ptr[c]);
303
<a name="l00201"></a>00201 }
304
<a name="l00202"></a>00202 cost += <a class="code" href="a00302.html#acca74d8457ec604bc39f0f0b3a77443b">PairCost</a>(char_32_ptr[len - 1], <span class="charliteral">' '</span>);
305
<a name="l00203"></a>00203 <span class="keywordflow">return</span> <span class="keyword">static_cast<</span><span class="keywordtype">int</span><span class="keyword">></span>(cost / <span class="keyword">static_cast<</span><span class="keywordtype">double</span><span class="keyword">></span>(len + 1));
306
<a name="l00204"></a>00204 }
307
<a name="l00205"></a>00205 } <span class="comment">// namespace tesseract</span>
308
</pre></div></div><!-- contents -->
310
<!-- window showing the filter options -->
311
<div id="MSearchSelectWindow"
312
onmouseover="return searchBox.OnSearchSelectShow()"
313
onmouseout="return searchBox.OnSearchSelectHide()"
314
onkeydown="return searchBox.OnSearchSelectKey(event)">
315
<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark"> </span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark"> </span>Classes</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark"> </span>Namespaces</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(3)"><span class="SelectionMark"> </span>Files</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(4)"><span class="SelectionMark"> </span>Functions</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(5)"><span class="SelectionMark"> </span>Variables</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(6)"><span class="SelectionMark"> </span>Typedefs</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(7)"><span class="SelectionMark"> </span>Enumerations</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(8)"><span class="SelectionMark"> </span>Enumerator</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(9)"><span class="SelectionMark"> </span>Friends</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(10)"><span class="SelectionMark"> </span>Defines</a></div>
317
<!-- iframe showing the search results (closed by default) -->
318
<div id="MSearchResultsWindow">
319
<iframe src="javascript:void(0)" frameborder="0"
320
name="MSearchResults" id="MSearchResults">
324
<div id="nav-path" class="navpath">
326
<li class="navelem"><a class="el" href="a00962.html">char_bigrams.cpp</a> </li>
328
<li class="footer">Generated on Mon Feb 3 2014 10:59:09 for tesseract by
329
<a href="http://www.doxygen.org/index.html">
330
<img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.7.6.1 </li>