1
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
2
<html xmlns="http://www.w3.org/1999/xhtml">
4
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
5
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
6
<title>tesseract: /usr/local/google/home/jbreiden/tesseract-ocr-read-only/wordrec/chopper.cpp Source File</title>
8
<link href="tabs.css" rel="stylesheet" type="text/css"/>
9
<link href="doxygen.css" rel="stylesheet" type="text/css" />
10
<link href="navtree.css" rel="stylesheet" type="text/css"/>
11
<script type="text/javascript" src="jquery.js"></script>
12
<script type="text/javascript" src="resize.js"></script>
13
<script type="text/javascript" src="navtree.js"></script>
14
<script type="text/javascript">
15
$(document).ready(initResizable);
17
<link href="search/search.css" rel="stylesheet" type="text/css"/>
18
<script type="text/javascript" src="search/search.js"></script>
19
<script type="text/javascript">
20
$(document).ready(function() { searchBox.OnSelectItem(0); });
25
<div id="top"><!-- do not remove this div! -->
29
<table cellspacing="0" cellpadding="0">
31
<tr style="height: 56px;">
34
<td style="padding-left: 0.5em;">
35
<div id="projectname">tesseract
36
 <span id="projectnumber">3.03</span>
48
<!-- Generated by Doxygen 1.7.6.1 -->
49
<script type="text/javascript">
50
var searchBox = new SearchBox("searchBox", "search",false,'Search');
52
<div id="navrow1" class="tabs">
54
<li><a href="index.html"><span>Main Page</span></a></li>
55
<li><a href="pages.html"><span>Related Pages</span></a></li>
56
<li><a href="modules.html"><span>Modules</span></a></li>
57
<li><a href="namespaces.html"><span>Namespaces</span></a></li>
58
<li><a href="annotated.html"><span>Classes</span></a></li>
59
<li class="current"><a href="files.html"><span>Files</span></a></li>
61
<div id="MSearchBox" class="MSearchBoxInactive">
63
<img id="MSearchSelect" src="search/mag_sel.png"
64
onmouseover="return searchBox.OnSearchSelectShow()"
65
onmouseout="return searchBox.OnSearchSelectHide()"
67
<input type="text" id="MSearchField" value="Search" accesskey="S"
68
onfocus="searchBox.OnSearchFieldFocus(true)"
69
onblur="searchBox.OnSearchFieldFocus(false)"
70
onkeyup="searchBox.OnSearchFieldChange(event)"/>
71
</span><span class="right">
72
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
78
<div id="navrow2" class="tabs2">
80
<li><a href="files.html"><span>File List</span></a></li>
81
<li><a href="globals.html"><span>File Members</span></a></li>
85
<div id="side-nav" class="ui-resizable side-nav-resizable">
87
<div id="nav-tree-contents">
90
<div id="splitbar" style="-moz-user-select:none;"
91
class="ui-resizable-handle">
94
<script type="text/javascript">
95
initNavTree('a01223.html','');
97
<div id="doc-content">
99
<div class="headertitle">
100
<div class="title">/usr/local/google/home/jbreiden/tesseract-ocr-read-only/wordrec/chopper.cpp</div> </div>
102
<div class="contents">
103
<a href="a01223.html">Go to the documentation of this file.</a><div class="fragment"><pre class="fragment"><a name="l00001"></a>00001 <span class="comment">/* -*-C-*-</span>
104
<a name="l00002"></a>00002 <span class="comment"> ********************************************************************************</span>
105
<a name="l00003"></a>00003 <span class="comment"> *</span>
106
<a name="l00004"></a>00004 <span class="comment"> * File: chopper.c (Formerly chopper.c)</span>
107
<a name="l00005"></a>00005 <span class="comment"> * Description:</span>
108
<a name="l00006"></a>00006 <span class="comment"> * Author: Mark Seaman, OCR Technology</span>
109
<a name="l00007"></a>00007 <span class="comment"> * Created: Fri Oct 16 14:37:00 1987</span>
110
<a name="l00008"></a>00008 <span class="comment"> * Modified: Tue Jul 30 16:18:52 1991 (Mark Seaman) marks@hpgrlt</span>
111
<a name="l00009"></a>00009 <span class="comment"> * Language: C</span>
112
<a name="l00010"></a>00010 <span class="comment"> * Package: N/A</span>
113
<a name="l00011"></a>00011 <span class="comment"> * Status: Reusable Software Component</span>
114
<a name="l00012"></a>00012 <span class="comment"> *</span>
115
<a name="l00013"></a>00013 <span class="comment"> * (c) Copyright 1987, Hewlett-Packard Company.</span>
116
<a name="l00014"></a>00014 <span class="comment"> ** Licensed under the Apache License, Version 2.0 (the "License");</span>
117
<a name="l00015"></a>00015 <span class="comment"> ** you may not use this file except in compliance with the License.</span>
118
<a name="l00016"></a>00016 <span class="comment"> ** You may obtain a copy of the License at</span>
119
<a name="l00017"></a>00017 <span class="comment"> ** http://www.apache.org/licenses/LICENSE-2.0</span>
120
<a name="l00018"></a>00018 <span class="comment"> ** Unless required by applicable law or agreed to in writing, software</span>
121
<a name="l00019"></a>00019 <span class="comment"> ** distributed under the License is distributed on an "AS IS" BASIS,</span>
122
<a name="l00020"></a>00020 <span class="comment"> ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
123
<a name="l00021"></a>00021 <span class="comment"> ** See the License for the specific language governing permissions and</span>
124
<a name="l00022"></a>00022 <span class="comment"> ** limitations under the License.</span>
125
<a name="l00023"></a>00023 <span class="comment"> *</span>
126
<a name="l00024"></a>00024 <span class="comment"> **************************************************************************/</span>
127
<a name="l00025"></a>00025
128
<a name="l00026"></a>00026 <span class="comment">/*----------------------------------------------------------------------</span>
129
<a name="l00027"></a>00027 <span class="comment"> I n c l u d e s</span>
130
<a name="l00028"></a>00028 <span class="comment">----------------------------------------------------------------------*/</span>
131
<a name="l00029"></a>00029
132
<a name="l00030"></a>00030 <span class="preprocessor">#include <math.h></span>
133
<a name="l00031"></a>00031
134
<a name="l00032"></a>00032 <span class="preprocessor">#include "<a class="code" href="a01224.html">chopper.h</a>"</span>
135
<a name="l00033"></a>00033
136
<a name="l00034"></a>00034 <span class="preprocessor">#include "assert.h"</span>
137
<a name="l00035"></a>00035 <span class="preprocessor">#include "<a class="code" href="a01220.html">associate.h</a>"</span>
138
<a name="l00036"></a>00036 <span class="preprocessor">#include "<a class="code" href="a00730.html">blobs.h</a>"</span>
139
<a name="l00037"></a>00037 <span class="preprocessor">#include "<a class="code" href="a01025.html">callcpp.h</a>"</span>
140
<a name="l00038"></a>00038 <span class="preprocessor">#include "<a class="code" href="a01026.html">const.h</a>"</span>
141
<a name="l00039"></a>00039 <span class="preprocessor">#include "<a class="code" href="a01228.html">findseam.h</a>"</span>
142
<a name="l00040"></a>00040 <span class="preprocessor">#include "<a class="code" href="a01038.html">freelist.h</a>"</span>
143
<a name="l00041"></a>00041 <span class="preprocessor">#include "<a class="code" href="a01039.html">globals.h</a>"</span>
144
<a name="l00042"></a>00042 <span class="preprocessor">#include "<a class="code" href="a01240.html">makechop.h</a>"</span>
145
<a name="l00043"></a>00043 <span class="preprocessor">#include "<a class="code" href="a01252.html">render.h</a>"</span>
146
<a name="l00044"></a>00044 <span class="preprocessor">#include "<a class="code" href="a00769.html">pageres.h</a>"</span>
147
<a name="l00045"></a>00045 <span class="preprocessor">#include "<a class="code" href="a00794.html">seam.h</a>"</span>
148
<a name="l00046"></a>00046 <span class="preprocessor">#include "<a class="code" href="a01057.html">stopper.h</a>"</span>
149
<a name="l00047"></a>00047 <span class="preprocessor">#include "<a class="code" href="a01045.html">structures.h</a>"</span>
150
<a name="l00048"></a>00048 <span class="preprocessor">#include "<a class="code" href="a00866.html">unicharset.h</a>"</span>
151
<a name="l00049"></a>00049 <span class="preprocessor">#include "<a class="code" href="a01257.html">wordrec.h</a>"</span>
152
<a name="l00050"></a>00050
153
<a name="l00051"></a>00051 <span class="comment">// Include automatically generated configuration file if running autoconf.</span>
154
<a name="l00052"></a>00052 <span class="preprocessor">#ifdef HAVE_CONFIG_H</span>
155
<a name="l00053"></a>00053 <span class="preprocessor"></span><span class="preprocessor">#include "<a class="code" href="a00950.html">config_auto.h</a>"</span>
156
<a name="l00054"></a>00054 <span class="preprocessor">#endif</span>
157
<a name="l00055"></a>00055 <span class="preprocessor"></span>
158
<a name="l00056"></a>00056 <span class="comment">// Even though the limit on the number of chunks may now be removed, keep</span>
159
<a name="l00057"></a>00057 <span class="comment">// the same limit for repeatable behavior, and it may be a speed advantage.</span>
160
<a name="l00058"></a>00058 <span class="keyword">static</span> <span class="keyword">const</span> <span class="keywordtype">int</span> kMaxNumChunks = 64;
161
<a name="l00059"></a>00059
162
<a name="l00060"></a>00060 <span class="comment">/*----------------------------------------------------------------------</span>
163
<a name="l00061"></a>00061 <span class="comment"> F u n c t i o n s</span>
164
<a name="l00062"></a>00062 <span class="comment">----------------------------------------------------------------------*/</span>
165
<a name="l00068"></a><a class="code" href="a01224.html#a0a3b9f61e193c679c40344dad4267e3d">00068</a> <span class="keywordtype">void</span> <a class="code" href="a01223.html#a0a3b9f61e193c679c40344dad4267e3d">preserve_outline</a>(<a class="code" href="a00362.html">EDGEPT</a> *start) {
166
<a name="l00069"></a>00069 <a class="code" href="a00362.html">EDGEPT</a> *srcpt;
167
<a name="l00070"></a>00070
168
<a name="l00071"></a>00071 <span class="keywordflow">if</span> (start == NULL)
169
<a name="l00072"></a>00072 <span class="keywordflow">return</span>;
170
<a name="l00073"></a>00073 srcpt = start;
171
<a name="l00074"></a>00074 <span class="keywordflow">do</span> {
172
<a name="l00075"></a>00075 srcpt-><a class="code" href="a00362.html#a7f92d543e033519df214c028e8737991">flags</a>[1] = 1;
173
<a name="l00076"></a>00076 srcpt = srcpt-><a class="code" href="a00362.html#a196f7786194752f89e364a586d24884d">next</a>;
174
<a name="l00077"></a>00077 }
175
<a name="l00078"></a>00078 <span class="keywordflow">while</span> (srcpt != start);
176
<a name="l00079"></a>00079 srcpt-><a class="code" href="a00362.html#a7f92d543e033519df214c028e8737991">flags</a>[1] = 2;
177
<a name="l00080"></a>00080 }
178
<a name="l00081"></a>00081
179
<a name="l00082"></a>00082
180
<a name="l00083"></a>00083 <span class="comment">/**************************************************************************/</span>
181
<a name="l00084"></a><a class="code" href="a01224.html#af2a51913c75408354ffd6e5344c30389">00084</a> <span class="keywordtype">void</span> <a class="code" href="a01223.html#af2a51913c75408354ffd6e5344c30389">preserve_outline_tree</a>(<a class="code" href="a00613.html">TESSLINE</a> *srcline) {
182
<a name="l00085"></a>00085 <a class="code" href="a00613.html">TESSLINE</a> *outline;
183
<a name="l00086"></a>00086
184
<a name="l00087"></a>00087 <span class="keywordflow">for</span> (outline = srcline; outline != NULL; outline = outline-><a class="code" href="a00613.html#a0fb224d43ea53a791f0957fa92793bef">next</a>) {
185
<a name="l00088"></a>00088 <a class="code" href="a01223.html#a0a3b9f61e193c679c40344dad4267e3d">preserve_outline</a> (outline-><a class="code" href="a00613.html#a0ab1e26b1dae548f0bd3c096c6183b18">loop</a>);
186
<a name="l00089"></a>00089 }
187
<a name="l00090"></a>00090 }
188
<a name="l00091"></a>00091
189
<a name="l00092"></a>00092
190
<a name="l00098"></a><a class="code" href="a01224.html#aca09efda43c5297643be6ba8706ceb83">00098</a> <a class="code" href="a00362.html">EDGEPT</a> *<a class="code" href="a01223.html#aca09efda43c5297643be6ba8706ceb83">restore_outline</a>(<a class="code" href="a00362.html">EDGEPT</a> *start) {
191
<a name="l00099"></a>00099 <a class="code" href="a00362.html">EDGEPT</a> *srcpt;
192
<a name="l00100"></a>00100 <a class="code" href="a00362.html">EDGEPT</a> *real_start;
193
<a name="l00101"></a>00101
194
<a name="l00102"></a>00102 <span class="keywordflow">if</span> (start == NULL)
195
<a name="l00103"></a>00103 <span class="keywordflow">return</span> NULL;
196
<a name="l00104"></a>00104 srcpt = start;
197
<a name="l00105"></a>00105 <span class="keywordflow">do</span> {
198
<a name="l00106"></a>00106 <span class="keywordflow">if</span> (srcpt-><a class="code" href="a00362.html#a7f92d543e033519df214c028e8737991">flags</a>[1] == 2)
199
<a name="l00107"></a>00107 <span class="keywordflow">break</span>;
200
<a name="l00108"></a>00108 srcpt = srcpt-><a class="code" href="a00362.html#a196f7786194752f89e364a586d24884d">next</a>;
201
<a name="l00109"></a>00109 }
202
<a name="l00110"></a>00110 <span class="keywordflow">while</span> (srcpt != start);
203
<a name="l00111"></a>00111 real_start = srcpt;
204
<a name="l00112"></a>00112 <span class="keywordflow">do</span> {
205
<a name="l00113"></a>00113 srcpt = srcpt-><a class="code" href="a00362.html#a196f7786194752f89e364a586d24884d">next</a>;
206
<a name="l00114"></a>00114 <span class="keywordflow">if</span> (srcpt-><a class="code" href="a00362.html#a8326da8daf597bcdc5a05064a37cbbb1">prev</a>-><a class="code" href="a00362.html#a7f92d543e033519df214c028e8737991">flags</a>[1] == 0) {
207
<a name="l00115"></a>00115 <a class="code" href="a00795.html#ada6ba0f2954768ab4b7d9d2cf165dee0">remove_edgept</a>(srcpt-><a class="code" href="a00362.html#a8326da8daf597bcdc5a05064a37cbbb1">prev</a>);
208
<a name="l00116"></a>00116 }
209
<a name="l00117"></a>00117 }
210
<a name="l00118"></a>00118 <span class="keywordflow">while</span> (srcpt != real_start);
211
<a name="l00119"></a>00119 <span class="keywordflow">return</span> real_start;
212
<a name="l00120"></a>00120 }
213
<a name="l00121"></a>00121
214
<a name="l00122"></a>00122
215
<a name="l00123"></a>00123 <span class="comment">/******************************************************************************/</span>
216
<a name="l00124"></a><a class="code" href="a01224.html#a426bc9c96107b36d173f86d8a74f9e8c">00124</a> <span class="keywordtype">void</span> <a class="code" href="a01223.html#a426bc9c96107b36d173f86d8a74f9e8c">restore_outline_tree</a>(<a class="code" href="a00613.html">TESSLINE</a> *srcline) {
217
<a name="l00125"></a>00125 <a class="code" href="a00613.html">TESSLINE</a> *outline;
218
<a name="l00126"></a>00126
219
<a name="l00127"></a>00127 <span class="keywordflow">for</span> (outline = srcline; outline != NULL; outline = outline-><a class="code" href="a00613.html#a0fb224d43ea53a791f0957fa92793bef">next</a>) {
220
<a name="l00128"></a>00128 outline-><a class="code" href="a00613.html#a0ab1e26b1dae548f0bd3c096c6183b18">loop</a> = <a class="code" href="a01223.html#aca09efda43c5297643be6ba8706ceb83">restore_outline</a> (outline-><a class="code" href="a00613.html#a0ab1e26b1dae548f0bd3c096c6183b18">loop</a>);
221
<a name="l00129"></a>00129 outline-><a class="code" href="a00613.html#a6eddfefc04de79fe19712d90925de2fb">start</a> = outline-><a class="code" href="a00613.html#a0ab1e26b1dae548f0bd3c096c6183b18">loop</a>-><a class="code" href="a00362.html#adee462d390e637a8393ba373c8d2a1bb">pos</a>;
222
<a name="l00130"></a>00130 }
223
<a name="l00131"></a>00131 }
224
<a name="l00132"></a>00132
225
<a name="l00133"></a>00133 <span class="comment">// Helper runs all the checks on a seam to make sure it is valid.</span>
226
<a name="l00134"></a>00134 <span class="comment">// Returns the seam if OK, otherwise deletes the seam and returns NULL.</span>
227
<a name="l00135"></a>00135 <span class="keyword">static</span> <a class="code" href="a00533.html">SEAM</a>* CheckSeam(<span class="keywordtype">int</span> debug_level, <a class="code" href="a00831.html#aba1f582fd0168f3ff9225d8c90fa9eb8">inT32</a> blob_number, <a class="code" href="a00635.html">TWERD</a>* word,
228
<a name="l00136"></a>00136 <a class="code" href="a00591.html">TBLOB</a>* blob, <a class="code" href="a00591.html">TBLOB</a>* other_blob,
229
<a name="l00137"></a>00137 <span class="keyword">const</span> <a class="code" href="a00403.html">GenericVector<SEAM*></a>& seams, <a class="code" href="a00533.html">SEAM</a>* seam) {
230
<a name="l00138"></a>00138 <span class="keywordflow">if</span> (seam == NULL ||
231
<a name="l00139"></a>00139 blob-><a class="code" href="a00591.html#a3c9282cad4102157d214bcc6311cda07">outlines</a> == NULL ||
232
<a name="l00140"></a>00140 other_blob-><a class="code" href="a00591.html#a3c9282cad4102157d214bcc6311cda07">outlines</a> == NULL ||
233
<a name="l00141"></a>00141 <a class="code" href="a01223.html#a8e8e9f6cb587b621054e6ff626a46a23">total_containment</a>(blob, other_blob) ||
234
<a name="l00142"></a>00142 <a class="code" href="a01223.html#adde6889c67dfb8661a9911b56bc05c96">check_blob</a>(other_blob) ||
235
<a name="l00143"></a>00143 !(<a class="code" href="a01223.html#a5b0b5e54a6dabe1f874383318ad2b172">check_seam_order</a>(blob, seam) &&
236
<a name="l00144"></a>00144 <a class="code" href="a01223.html#a5b0b5e54a6dabe1f874383318ad2b172">check_seam_order</a>(other_blob, seam)) ||
237
<a name="l00145"></a>00145 <a class="code" href="a01223.html#a9a0430f1c62268bf762a58a97c56acd7">any_shared_split_points</a>(seams, seam) ||
238
<a name="l00146"></a>00146 !<a class="code" href="a00793.html#a3c74fe95b1a95e4d792d3a4bbd958e55">test_insert_seam</a>(seams, word, blob_number)) {
239
<a name="l00147"></a>00147 word-><a class="code" href="a00635.html#a52fba22521039a79d8c1c8182f602dcc">blobs</a>.<a class="code" href="a00403.html#a3fd37a240a42f1c3052e8d28614d3702">remove</a>(blob_number + 1);
240
<a name="l00148"></a>00148 <span class="keywordflow">if</span> (seam) {
241
<a name="l00149"></a>00149 <a class="code" href="a01239.html#aa6a78540c9d1b2be89f28016c2aa408f">undo_seam</a>(blob, other_blob, seam);
242
<a name="l00150"></a>00150 <span class="keyword">delete</span> seam;
243
<a name="l00151"></a>00151 seam = NULL;
244
<a name="l00152"></a>00152 <span class="preprocessor">#ifndef GRAPHICS_DISABLED</span>
245
<a name="l00153"></a>00153 <span class="preprocessor"></span> <span class="keywordflow">if</span> (debug_level) {
246
<a name="l00154"></a>00154 <span class="keywordflow">if</span> (debug_level >2)
247
<a name="l00155"></a>00155 <a class="code" href="a01251.html#a2b56bf58c648ddd5efa02b1be0438659">display_blob</a>(blob, <a class="code" href="a01025.html#a17bc059e437838f094a5a25c2d5ab88fad3163c1fcda01965b692ec2c3122b743">Red</a>);
248
<a name="l00156"></a>00156 <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a>(<span class="stringliteral">"\n** seam being removed ** \n"</span>);
249
<a name="l00157"></a>00157 }
250
<a name="l00158"></a>00158 <span class="preprocessor">#endif</span>
251
<a name="l00159"></a>00159 <span class="preprocessor"></span> } <span class="keywordflow">else</span> {
252
<a name="l00160"></a>00160 <span class="keyword">delete</span> other_blob;
253
<a name="l00161"></a>00161 }
254
<a name="l00162"></a>00162 <span class="keywordflow">return</span> NULL;
255
<a name="l00163"></a>00163 }
256
<a name="l00164"></a>00164 <span class="keywordflow">return</span> seam;
257
<a name="l00165"></a>00165 }
258
<a name="l00166"></a>00166
259
<a name="l00167"></a>00167
260
<a name="l00174"></a>00174 <span class="keyword">namespace </span><a class="code" href="a01266.html#afed58feacb84df2de88bdd613cfdba6d">tesseract</a> {
261
<a name="l00175"></a><a class="code" href="a00655.html#a4c68a9c2e011ad6784eb26fefa13c00c">00175</a> <a class="code" href="a00533.html">SEAM</a> *<a class="code" href="a00655.html#a4c68a9c2e011ad6784eb26fefa13c00c">Wordrec::attempt_blob_chop</a>(<a class="code" href="a00635.html">TWERD</a> *word, <a class="code" href="a00591.html">TBLOB</a> *blob, <a class="code" href="a00831.html#aba1f582fd0168f3ff9225d8c90fa9eb8">inT32</a> blob_number,
262
<a name="l00176"></a>00176 <span class="keywordtype">bool</span> italic_blob,
263
<a name="l00177"></a>00177 <span class="keyword">const</span> <a class="code" href="a00403.html">GenericVector<SEAM*></a>& seams) {
264
<a name="l00178"></a>00178 <span class="keywordflow">if</span> (<a class="code" href="a00655.html#a34abe5c1b41feff68500d1b83a1f891d">repair_unchopped_blobs</a>)
265
<a name="l00179"></a>00179 <a class="code" href="a01223.html#af2a51913c75408354ffd6e5344c30389">preserve_outline_tree</a> (blob-><a class="code" href="a00591.html#a3c9282cad4102157d214bcc6311cda07">outlines</a>);
266
<a name="l00180"></a>00180 <a class="code" href="a00591.html">TBLOB</a> *other_blob = <a class="code" href="a00591.html#a9c8ff7e2dd5c43906b4075db07a3e465">TBLOB::ShallowCopy</a>(*blob); <span class="comment">/* Make new blob */</span>
267
<a name="l00181"></a>00181 <span class="comment">// Insert it into the word.</span>
268
<a name="l00182"></a>00182 word-><a class="code" href="a00635.html#a52fba22521039a79d8c1c8182f602dcc">blobs</a>.<a class="code" href="a00403.html#a57ca5259541548a97bcfd4d0925a27ff">insert</a>(other_blob, blob_number + 1);
269
<a name="l00183"></a>00183
270
<a name="l00184"></a>00184 <a class="code" href="a00533.html">SEAM</a> *seam = NULL;
271
<a name="l00185"></a>00185 <span class="keywordflow">if</span> (<a class="code" href="a00314.html#a22717c4d299d3a52683ad90114b20200">prioritize_division</a>) {
272
<a name="l00186"></a>00186 <a class="code" href="a00629.html">TPOINT</a> location;
273
<a name="l00187"></a>00187 <span class="keywordflow">if</span> (<a class="code" href="a00729.html#aa87284f5696fb5dd745dafa85539d760">divisible_blob</a>(blob, italic_blob, &location)) {
274
<a name="l00188"></a>00188 seam = <span class="keyword">new</span> <a class="code" href="a00533.html">SEAM</a>(0.0f, location, NULL, NULL, NULL);
275
<a name="l00189"></a>00189 }
276
<a name="l00190"></a>00190 }
277
<a name="l00191"></a>00191 <span class="keywordflow">if</span> (seam == NULL)
278
<a name="l00192"></a>00192 seam = <a class="code" href="a00655.html#adeb84d8b94a6c87354fc452461de3b39">pick_good_seam</a>(blob);
279
<a name="l00193"></a>00193 <span class="keywordflow">if</span> (<a class="code" href="a00655.html#a3f1c04cfbc1650eae6fc981984717248">chop_debug</a>) {
280
<a name="l00194"></a>00194 <span class="keywordflow">if</span> (seam != NULL)
281
<a name="l00195"></a>00195 <a class="code" href="a00793.html#a2168b2b29d8981405d068256c2b61e3b">print_seam</a>(<span class="stringliteral">"Good seam picked="</span>, seam);
282
<a name="l00196"></a>00196 <span class="keywordflow">else</span>
283
<a name="l00197"></a>00197 <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a>(<span class="stringliteral">"\n** no seam picked *** \n"</span>);
284
<a name="l00198"></a>00198 }
285
<a name="l00199"></a>00199 <span class="keywordflow">if</span> (seam) {
286
<a name="l00200"></a>00200 <a class="code" href="a01239.html#a0ead1c1a4df9b2c0861e9da84f4c3079">apply_seam</a>(blob, other_blob, italic_blob, seam);
287
<a name="l00201"></a>00201 }
288
<a name="l00202"></a>00202
289
<a name="l00203"></a>00203 seam = CheckSeam(<a class="code" href="a00655.html#a3f1c04cfbc1650eae6fc981984717248">chop_debug</a>, blob_number, word, blob, other_blob,
290
<a name="l00204"></a>00204 seams, seam);
291
<a name="l00205"></a>00205 <span class="keywordflow">if</span> (seam == NULL) {
292
<a name="l00206"></a>00206 <span class="keywordflow">if</span> (<a class="code" href="a00655.html#a34abe5c1b41feff68500d1b83a1f891d">repair_unchopped_blobs</a>)
293
<a name="l00207"></a>00207 <a class="code" href="a01223.html#a426bc9c96107b36d173f86d8a74f9e8c">restore_outline_tree</a>(blob-><a class="code" href="a00591.html#a3c9282cad4102157d214bcc6311cda07">outlines</a>);
294
<a name="l00208"></a>00208 <span class="keywordflow">if</span> (word-><a class="code" href="a00635.html#a408678da3281eb77874487aceb44b95c">latin_script</a>) {
295
<a name="l00209"></a>00209 <span class="comment">// If the blob can simply be divided into outlines, then do that.</span>
296
<a name="l00210"></a>00210 <a class="code" href="a00629.html">TPOINT</a> location;
297
<a name="l00211"></a>00211 <span class="keywordflow">if</span> (<a class="code" href="a00729.html#aa87284f5696fb5dd745dafa85539d760">divisible_blob</a>(blob, italic_blob, &location)) {
298
<a name="l00212"></a>00212 other_blob = <a class="code" href="a00591.html#a9c8ff7e2dd5c43906b4075db07a3e465">TBLOB::ShallowCopy</a>(*blob); <span class="comment">/* Make new blob */</span>
299
<a name="l00213"></a>00213 word-><a class="code" href="a00635.html#a52fba22521039a79d8c1c8182f602dcc">blobs</a>.<a class="code" href="a00403.html#a57ca5259541548a97bcfd4d0925a27ff">insert</a>(other_blob, blob_number + 1);
300
<a name="l00214"></a>00214 seam = <span class="keyword">new</span> <a class="code" href="a00533.html">SEAM</a>(0.0f, location, NULL, NULL, NULL);
301
<a name="l00215"></a>00215 <a class="code" href="a01239.html#a0ead1c1a4df9b2c0861e9da84f4c3079">apply_seam</a>(blob, other_blob, italic_blob, seam);
302
<a name="l00216"></a>00216 seam = CheckSeam(<a class="code" href="a00655.html#a3f1c04cfbc1650eae6fc981984717248">chop_debug</a>, blob_number, word, blob, other_blob,
303
<a name="l00217"></a>00217 seams, seam);
304
<a name="l00218"></a>00218 }
305
<a name="l00219"></a>00219 }
306
<a name="l00220"></a>00220 }
307
<a name="l00221"></a>00221 <span class="keywordflow">return</span> seam;
308
<a name="l00222"></a>00222 }
309
<a name="l00223"></a>00223
310
<a name="l00224"></a>00224
311
<a name="l00225"></a><a class="code" href="a00655.html#ae3589614b94c958d314d2fc45d8aad03">00225</a> <a class="code" href="a00533.html">SEAM</a> *<a class="code" href="a00655.html#ae3589614b94c958d314d2fc45d8aad03">Wordrec::chop_numbered_blob</a>(<a class="code" href="a00635.html">TWERD</a> *word, <a class="code" href="a00831.html#aba1f582fd0168f3ff9225d8c90fa9eb8">inT32</a> blob_number,
312
<a name="l00226"></a>00226 <span class="keywordtype">bool</span> italic_blob,
313
<a name="l00227"></a>00227 <span class="keyword">const</span> <a class="code" href="a00403.html">GenericVector<SEAM*></a>& seams) {
314
<a name="l00228"></a>00228 <span class="keywordflow">return</span> <a class="code" href="a00655.html#a4c68a9c2e011ad6784eb26fefa13c00c">attempt_blob_chop</a>(word, word-><a class="code" href="a00635.html#a52fba22521039a79d8c1c8182f602dcc">blobs</a>[blob_number], blob_number,
315
<a name="l00229"></a>00229 italic_blob, seams);
316
<a name="l00230"></a>00230 }
317
<a name="l00231"></a>00231
318
<a name="l00232"></a>00232
319
<a name="l00233"></a><a class="code" href="a00655.html#a2e4743aa750e03095d1b74d3567e8656">00233</a> <a class="code" href="a00533.html">SEAM</a> *<a class="code" href="a00655.html#a2e4743aa750e03095d1b74d3567e8656">Wordrec::chop_overlapping_blob</a>(<span class="keyword">const</span> <a class="code" href="a00403.html">GenericVector<TBOX></a>& boxes,
320
<a name="l00234"></a>00234 <span class="keywordtype">bool</span> italic_blob, <a class="code" href="a00650.html">WERD_RES</a> *word_res,
321
<a name="l00235"></a>00235 <span class="keywordtype">int</span> *blob_number) {
322
<a name="l00236"></a>00236 <a class="code" href="a00635.html">TWERD</a> *word = word_res-><a class="code" href="a00650.html#ad1d449f93bb0150bc7ef0d9b59ed27ba">chopped_word</a>;
323
<a name="l00237"></a>00237 <span class="keywordflow">for</span> (*blob_number = 0; *blob_number < word-><a class="code" href="a00635.html#adfdef9868e61650e076775011382ec70">NumBlobs</a>(); ++*blob_number) {
324
<a name="l00238"></a>00238 <a class="code" href="a00591.html">TBLOB</a> *blob = word-><a class="code" href="a00635.html#a52fba22521039a79d8c1c8182f602dcc">blobs</a>[*blob_number];
325
<a name="l00239"></a>00239 <a class="code" href="a00629.html">TPOINT</a> topleft, botright;
326
<a name="l00240"></a>00240 topleft.<a class="code" href="a00629.html#aa9c98655c3a2a0437821e5a3d3418b0f">x</a> = blob-><a class="code" href="a00591.html#a8f1f8f110170fe12cf8147d504cd0ea2">bounding_box</a>().<a class="code" href="a00592.html#a724fabf566586b663577dfa944ffbc61">left</a>();
327
<a name="l00241"></a>00241 topleft.<a class="code" href="a00629.html#a6d62f015b74612e43975e439a096039c">y</a> = blob-><a class="code" href="a00591.html#a8f1f8f110170fe12cf8147d504cd0ea2">bounding_box</a>().<a class="code" href="a00592.html#adf92e9fdac1bdf11c10d1c4d1178791a">top</a>();
328
<a name="l00242"></a>00242 botright.<a class="code" href="a00629.html#aa9c98655c3a2a0437821e5a3d3418b0f">x</a> = blob-><a class="code" href="a00591.html#a8f1f8f110170fe12cf8147d504cd0ea2">bounding_box</a>().<a class="code" href="a00592.html#a8703081c1a1c26db3a4dddaca1028e34">right</a>();
329
<a name="l00243"></a>00243 botright.<a class="code" href="a00629.html#a6d62f015b74612e43975e439a096039c">y</a> = blob-><a class="code" href="a00591.html#a8f1f8f110170fe12cf8147d504cd0ea2">bounding_box</a>().<a class="code" href="a00592.html#a4451d237f1cd18c4982d63fe36a11fc3">bottom</a>();
330
<a name="l00244"></a>00244
331
<a name="l00245"></a>00245 <a class="code" href="a00629.html">TPOINT</a> original_topleft, original_botright;
332
<a name="l00246"></a>00246 word_res-><a class="code" href="a00650.html#a202d9d5d2a462d84012011c689e16605">denorm</a>.<a class="code" href="a00352.html#afb0790f980cc477b448397e6a9ecd9f8">DenormTransform</a>(NULL, topleft, &original_topleft);
333
<a name="l00247"></a>00247 word_res-><a class="code" href="a00650.html#a202d9d5d2a462d84012011c689e16605">denorm</a>.<a class="code" href="a00352.html#afb0790f980cc477b448397e6a9ecd9f8">DenormTransform</a>(NULL, botright, &original_botright);
334
<a name="l00248"></a>00248
335
<a name="l00249"></a>00249 <a class="code" href="a00592.html">TBOX</a> original_box = <a class="code" href="a00592.html">TBOX</a>(original_topleft.<a class="code" href="a00629.html#aa9c98655c3a2a0437821e5a3d3418b0f">x</a>, original_botright.<a class="code" href="a00629.html#a6d62f015b74612e43975e439a096039c">y</a>,
336
<a name="l00250"></a>00250 original_botright.<a class="code" href="a00629.html#aa9c98655c3a2a0437821e5a3d3418b0f">x</a>, original_topleft.<a class="code" href="a00629.html#a6d62f015b74612e43975e439a096039c">y</a>);
337
<a name="l00251"></a>00251
338
<a name="l00252"></a>00252 <span class="keywordtype">bool</span> almost_equal_box = <span class="keyword">false</span>;
339
<a name="l00253"></a>00253 <span class="keywordtype">int</span> num_overlap = 0;
340
<a name="l00254"></a>00254 <span class="keywordflow">for</span> (<span class="keywordtype">int</span> i = 0; i < boxes.<a class="code" href="a00403.html#a111b51dd0bf1324cfb69ef70703d8e70">size</a>(); i++) {
341
<a name="l00255"></a>00255 <span class="keywordflow">if</span> (original_box.<a class="code" href="a00592.html#a9e8d88530048135be86afb906ff9dd74">overlap_fraction</a>(boxes[i]) > 0.125)
342
<a name="l00256"></a>00256 num_overlap++;
343
<a name="l00257"></a>00257 <span class="keywordflow">if</span> (original_box.<a class="code" href="a00592.html#a0f7a028a3ad0355f38e17db45c737384">almost_equal</a>(boxes[i], 3))
344
<a name="l00258"></a>00258 almost_equal_box = <span class="keyword">true</span>;
345
<a name="l00259"></a>00259 }
346
<a name="l00260"></a>00260
347
<a name="l00261"></a>00261 <a class="code" href="a00629.html">TPOINT</a> location;
348
<a name="l00262"></a>00262 <span class="keywordflow">if</span> (<a class="code" href="a00729.html#aa87284f5696fb5dd745dafa85539d760">divisible_blob</a>(blob, italic_blob, &location) ||
349
<a name="l00263"></a>00263 (!almost_equal_box && num_overlap > 1)) {
350
<a name="l00264"></a>00264 <a class="code" href="a00533.html">SEAM</a> *seam = <a class="code" href="a00655.html#a4c68a9c2e011ad6784eb26fefa13c00c">attempt_blob_chop</a>(word, blob, *blob_number,
351
<a name="l00265"></a>00265 italic_blob, word_res-><a class="code" href="a00650.html#a3d0de828a382a88581dafb5a304141f1">seam_array</a>);
352
<a name="l00266"></a>00266 <span class="keywordflow">if</span> (seam != NULL)
353
<a name="l00267"></a>00267 <span class="keywordflow">return</span> seam;
354
<a name="l00268"></a>00268 }
355
<a name="l00269"></a>00269 }
356
<a name="l00270"></a>00270
357
<a name="l00271"></a>00271 *blob_number = -1;
358
<a name="l00272"></a>00272 <span class="keywordflow">return</span> NULL;
359
<a name="l00273"></a>00273 }
360
<a name="l00274"></a>00274
361
<a name="l00275"></a>00275 } <span class="comment">// namespace tesseract</span>
362
<a name="l00276"></a>00276
363
<a name="l00277"></a>00277
364
<a name="l00283"></a><a class="code" href="a01224.html#a9a0430f1c62268bf762a58a97c56acd7">00283</a> <span class="keywordtype">int</span> <a class="code" href="a01223.html#a9a0430f1c62268bf762a58a97c56acd7">any_shared_split_points</a>(<span class="keyword">const</span> <a class="code" href="a00403.html">GenericVector<SEAM*></a>& seams, <a class="code" href="a00533.html">SEAM</a> *seam) {
365
<a name="l00284"></a>00284 <span class="keywordtype">int</span> length;
366
<a name="l00285"></a>00285 <span class="keywordtype">int</span> index;
367
<a name="l00286"></a>00286
368
<a name="l00287"></a>00287 length = seams.<a class="code" href="a00403.html#a111b51dd0bf1324cfb69ef70703d8e70">size</a>();
369
<a name="l00288"></a>00288 <span class="keywordflow">for</span> (index = 0; index < length; index++)
370
<a name="l00289"></a>00289 <span class="keywordflow">if</span> (<a class="code" href="a00793.html#a763f0ef90abf994a2af640e50c85dc65">shared_split_points</a>(seams[index], seam))
371
<a name="l00290"></a>00290 <span class="keywordflow">return</span> TRUE;
372
<a name="l00291"></a>00291 <span class="keywordflow">return</span> FALSE;
373
<a name="l00292"></a>00292 }
374
<a name="l00293"></a>00293
375
<a name="l00294"></a>00294
376
<a name="l00300"></a><a class="code" href="a01224.html#adde6889c67dfb8661a9911b56bc05c96">00300</a> <span class="keywordtype">int</span> <a class="code" href="a01223.html#adde6889c67dfb8661a9911b56bc05c96">check_blob</a>(<a class="code" href="a00591.html">TBLOB</a> *blob) {
377
<a name="l00301"></a>00301 <a class="code" href="a00613.html">TESSLINE</a> *outline;
378
<a name="l00302"></a>00302 <a class="code" href="a00362.html">EDGEPT</a> *edgept;
379
<a name="l00303"></a>00303
380
<a name="l00304"></a>00304 <span class="keywordflow">for</span> (outline = blob-><a class="code" href="a00591.html#a3c9282cad4102157d214bcc6311cda07">outlines</a>; outline != NULL; outline = outline-><a class="code" href="a00613.html#a0fb224d43ea53a791f0957fa92793bef">next</a>) {
381
<a name="l00305"></a>00305 edgept = outline-><a class="code" href="a00613.html#a0ab1e26b1dae548f0bd3c096c6183b18">loop</a>;
382
<a name="l00306"></a>00306 <span class="keywordflow">do</span> {
383
<a name="l00307"></a>00307 <span class="keywordflow">if</span> (edgept == NULL)
384
<a name="l00308"></a>00308 <span class="keywordflow">break</span>;
385
<a name="l00309"></a>00309 edgept = edgept-><a class="code" href="a00362.html#a196f7786194752f89e364a586d24884d">next</a>;
386
<a name="l00310"></a>00310 }
387
<a name="l00311"></a>00311 <span class="keywordflow">while</span> (edgept != outline-><a class="code" href="a00613.html#a0ab1e26b1dae548f0bd3c096c6183b18">loop</a>);
388
<a name="l00312"></a>00312 <span class="keywordflow">if</span> (edgept == NULL)
389
<a name="l00313"></a>00313 <span class="keywordflow">return</span> 1;
390
<a name="l00314"></a>00314 }
391
<a name="l00315"></a>00315 <span class="keywordflow">return</span> 0;
392
<a name="l00316"></a>00316 }
393
<a name="l00317"></a>00317
394
<a name="l00318"></a>00318
395
<a name="l00319"></a>00319 <span class="keyword">namespace </span><a class="code" href="a01266.html#afed58feacb84df2de88bdd613cfdba6d">tesseract</a> {
396
<a name="l00332"></a><a class="code" href="a00655.html#a5e68fccf487b2a822300011a2e1d5752">00332</a> <a class="code" href="a00533.html">SEAM</a>* <a class="code" href="a00655.html#a5e68fccf487b2a822300011a2e1d5752">Wordrec::improve_one_blob</a>(<span class="keyword">const</span> <a class="code" href="a00403.html">GenericVector<BLOB_CHOICE*></a>& blob_choices,
397
<a name="l00333"></a>00333 <a class="code" href="a00403.html">DANGERR</a> *fixpt,
398
<a name="l00334"></a>00334 <span class="keywordtype">bool</span> split_next_to_fragment,
399
<a name="l00335"></a>00335 <span class="keywordtype">bool</span> italic_blob,
400
<a name="l00336"></a>00336 <a class="code" href="a00650.html">WERD_RES</a>* word,
401
<a name="l00337"></a>00337 <span class="keywordtype">int</span>* blob_number) {
402
<a name="l00338"></a>00338 <span class="keywordtype">float</span> rating_ceiling = <a class="code" href="a00831.html#a9adcee815220ec9db98062213806ed04">MAX_FLOAT32</a>;
403
<a name="l00339"></a>00339 <a class="code" href="a00533.html">SEAM</a> *seam = NULL;
404
<a name="l00340"></a>00340 <span class="keywordflow">do</span> {
405
<a name="l00341"></a>00341 *blob_number = <a class="code" href="a00655.html#aa92122a173b68d7261a4bec13bbb02f9">select_blob_to_split_from_fixpt</a>(fixpt);
406
<a name="l00342"></a>00342 <span class="keywordflow">if</span> (<a class="code" href="a00655.html#a3f1c04cfbc1650eae6fc981984717248">chop_debug</a>) <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a>(<span class="stringliteral">"blob_number from fixpt = %d\n"</span>, *blob_number);
407
<a name="l00343"></a>00343 <span class="keywordtype">bool</span> split_point_from_dict = (*blob_number != -1);
408
<a name="l00344"></a>00344 <span class="keywordflow">if</span> (split_point_from_dict) {
409
<a name="l00345"></a>00345 fixpt-><a class="code" href="a00403.html#a9cdbff49b186574b83e43afba606fdd9">clear</a>();
410
<a name="l00346"></a>00346 } <span class="keywordflow">else</span> {
411
<a name="l00347"></a>00347 *blob_number = <a class="code" href="a00655.html#a1036ef6be417b423b0ec5dc03e812db8">select_blob_to_split</a>(blob_choices, rating_ceiling,
412
<a name="l00348"></a>00348 split_next_to_fragment);
413
<a name="l00349"></a>00349 }
414
<a name="l00350"></a>00350 <span class="keywordflow">if</span> (<a class="code" href="a00655.html#a3f1c04cfbc1650eae6fc981984717248">chop_debug</a>) <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a>(<span class="stringliteral">"blob_number = %d\n"</span>, *blob_number);
415
<a name="l00351"></a>00351 <span class="keywordflow">if</span> (*blob_number == -1)
416
<a name="l00352"></a>00352 <span class="keywordflow">return</span> NULL;
417
<a name="l00353"></a>00353
418
<a name="l00354"></a>00354 <span class="comment">// TODO(rays) it may eventually help to allow italic_blob to be true,</span>
419
<a name="l00355"></a>00355 seam = <a class="code" href="a00655.html#ae3589614b94c958d314d2fc45d8aad03">chop_numbered_blob</a>(word-><a class="code" href="a00650.html#ad1d449f93bb0150bc7ef0d9b59ed27ba">chopped_word</a>, *blob_number, italic_blob,
420
<a name="l00356"></a>00356 word-><a class="code" href="a00650.html#a3d0de828a382a88581dafb5a304141f1">seam_array</a>);
421
<a name="l00357"></a>00357 <span class="keywordflow">if</span> (seam != NULL)
422
<a name="l00358"></a>00358 <span class="keywordflow">return</span> seam; <span class="comment">// Success!</span>
423
<a name="l00359"></a>00359 <span class="keywordflow">if</span> (blob_choices[*blob_number] == NULL)
424
<a name="l00360"></a>00360 <span class="keywordflow">return</span> NULL;
425
<a name="l00361"></a>00361 <span class="keywordflow">if</span> (!split_point_from_dict) {
426
<a name="l00362"></a>00362 <span class="comment">// We chopped the worst rated blob, try something else next time.</span>
427
<a name="l00363"></a>00363 rating_ceiling = blob_choices[*blob_number]->rating();
428
<a name="l00364"></a>00364 }
429
<a name="l00365"></a>00365 } <span class="keywordflow">while</span> (<span class="keyword">true</span>);
430
<a name="l00366"></a>00366 <span class="keywordflow">return</span> seam;
431
<a name="l00367"></a>00367 }
432
<a name="l00368"></a>00368
433
<a name="l00376"></a><a class="code" href="a00655.html#a0feea5c3a09770ac6a08d1ae23e3e931">00376</a> <a class="code" href="a00533.html">SEAM</a>* <a class="code" href="a00655.html#a0feea5c3a09770ac6a08d1ae23e3e931">Wordrec::chop_one_blob</a>(<span class="keyword">const</span> <a class="code" href="a00403.html">GenericVector<TBOX></a>& boxes,
434
<a name="l00377"></a>00377 <span class="keyword">const</span> <a class="code" href="a00403.html">GenericVector<BLOB_CHOICE*></a>& blob_choices,
435
<a name="l00378"></a>00378 <a class="code" href="a00650.html">WERD_RES</a>* word_res,
436
<a name="l00379"></a>00379 <span class="keywordtype">int</span>* blob_number) {
437
<a name="l00380"></a>00380 <span class="keywordflow">if</span> (<a class="code" href="a00314.html#a22717c4d299d3a52683ad90114b20200">prioritize_division</a>) {
438
<a name="l00381"></a>00381 <span class="keywordflow">return</span> <a class="code" href="a00655.html#a2e4743aa750e03095d1b74d3567e8656">chop_overlapping_blob</a>(boxes, <span class="keyword">true</span>, word_res, blob_number);
439
<a name="l00382"></a>00382 } <span class="keywordflow">else</span> {
440
<a name="l00383"></a>00383 <span class="keywordflow">return</span> <a class="code" href="a00655.html#a5e68fccf487b2a822300011a2e1d5752">improve_one_blob</a>(blob_choices, NULL, <span class="keyword">false</span>, <span class="keyword">true</span>, word_res,
441
<a name="l00384"></a>00384 blob_number);
442
<a name="l00385"></a>00385 }
443
<a name="l00386"></a>00386 }
444
<a name="l00387"></a>00387 } <span class="comment">// namespace tesseract</span>
445
<a name="l00388"></a>00388
446
<a name="l00397"></a><a class="code" href="a01224.html#a5b0b5e54a6dabe1f874383318ad2b172">00397</a> <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> <a class="code" href="a01223.html#a5b0b5e54a6dabe1f874383318ad2b172">check_seam_order</a>(<a class="code" href="a00591.html">TBLOB</a> *blob, <a class="code" href="a00533.html">SEAM</a> *seam) {
447
<a name="l00398"></a>00398 <a class="code" href="a00613.html">TESSLINE</a> *outline;
448
<a name="l00399"></a>00399 <a class="code" href="a00831.html#a2ba4d271e85baf0d333318985cb3bced">inT8</a> found_em[3];
449
<a name="l00400"></a>00400
450
<a name="l00401"></a>00401 <span class="keywordflow">if</span> (seam-><a class="code" href="a00533.html#a83e8fa8204e6bfc47739b6717c7734f0">split1</a> == NULL || blob == NULL)
451
<a name="l00402"></a>00402 <span class="keywordflow">return</span> (TRUE);
452
<a name="l00403"></a>00403
453
<a name="l00404"></a>00404 found_em[0] = found_em[1] = found_em[2] = FALSE;
454
<a name="l00405"></a>00405
455
<a name="l00406"></a>00406 <span class="keywordflow">for</span> (outline = blob-><a class="code" href="a00591.html#a3c9282cad4102157d214bcc6311cda07">outlines</a>; outline; outline = outline-><a class="code" href="a00613.html#a0fb224d43ea53a791f0957fa92793bef">next</a>) {
456
<a name="l00407"></a>00407 <span class="keywordflow">if</span> (!found_em[0] &&
457
<a name="l00408"></a>00408 ((seam-><a class="code" href="a00533.html#a83e8fa8204e6bfc47739b6717c7734f0">split1</a> == NULL) ||
458
<a name="l00409"></a>00409 <a class="code" href="a01240.html#a215ea46db3a9b6343a2186a48aaa3487">is_split_outline</a> (outline, seam-><a class="code" href="a00533.html#a83e8fa8204e6bfc47739b6717c7734f0">split1</a>))) {
459
<a name="l00410"></a>00410 found_em[0] = TRUE;
460
<a name="l00411"></a>00411 }
461
<a name="l00412"></a>00412 <span class="keywordflow">if</span> (!found_em[1] &&
462
<a name="l00413"></a>00413 ((seam-><a class="code" href="a00533.html#a5d6d0e267b811e07fa1bb33aa4d362f2">split2</a> == NULL) ||
463
<a name="l00414"></a>00414 <a class="code" href="a01240.html#a215ea46db3a9b6343a2186a48aaa3487">is_split_outline</a> (outline, seam-><a class="code" href="a00533.html#a5d6d0e267b811e07fa1bb33aa4d362f2">split2</a>))) {
464
<a name="l00415"></a>00415 found_em[1] = TRUE;
465
<a name="l00416"></a>00416 }
466
<a name="l00417"></a>00417 <span class="keywordflow">if</span> (!found_em[2] &&
467
<a name="l00418"></a>00418 ((seam-><a class="code" href="a00533.html#a136389fc1622fbc9fc85fd45a264db06">split3</a> == NULL) ||
468
<a name="l00419"></a>00419 <a class="code" href="a01240.html#a215ea46db3a9b6343a2186a48aaa3487">is_split_outline</a> (outline, seam-><a class="code" href="a00533.html#a136389fc1622fbc9fc85fd45a264db06">split3</a>))) {
469
<a name="l00420"></a>00420 found_em[2] = TRUE;
470
<a name="l00421"></a>00421 }
471
<a name="l00422"></a>00422 }
472
<a name="l00423"></a>00423
473
<a name="l00424"></a>00424 <span class="keywordflow">if</span> (!found_em[0] || !found_em[1] || !found_em[2])
474
<a name="l00425"></a>00425 <span class="keywordflow">return</span> (FALSE);
475
<a name="l00426"></a>00426 <span class="keywordflow">else</span>
476
<a name="l00427"></a>00427 <span class="keywordflow">return</span> (TRUE);
477
<a name="l00428"></a>00428 }
478
<a name="l00429"></a>00429
479
<a name="l00430"></a>00430 <span class="keyword">namespace </span><a class="code" href="a01266.html#afed58feacb84df2de88bdd613cfdba6d">tesseract</a> {
480
<a name="l00431"></a>00431
481
<a name="l00440"></a><a class="code" href="a00655.html#a49b374bbdf849c459e752af77e5acdf0">00440</a> <span class="keywordtype">void</span> <a class="code" href="a00655.html#a49b374bbdf849c459e752af77e5acdf0">Wordrec::chop_word_main</a>(<a class="code" href="a00650.html">WERD_RES</a> *word) {
482
<a name="l00441"></a>00441 <span class="keywordtype">int</span> num_blobs = word-><a class="code" href="a00650.html#ad1d449f93bb0150bc7ef0d9b59ed27ba">chopped_word</a>-><a class="code" href="a00635.html#adfdef9868e61650e076775011382ec70">NumBlobs</a>();
483
<a name="l00442"></a>00442 <span class="keywordflow">if</span> (word-><a class="code" href="a00650.html#af7454f140688a32b2bebb60ab36e91ee">ratings</a> == NULL) {
484
<a name="l00443"></a>00443 word-><a class="code" href="a00650.html#af7454f140688a32b2bebb60ab36e91ee">ratings</a> = <span class="keyword">new</span> <a class="code" href="a00461.html">MATRIX</a>(num_blobs, <a class="code" href="a00655.html#a7ddc3eb04dc27973de25ebf79b365d1e">wordrec_max_join_chunks</a>);
485
<a name="l00444"></a>00444 }
486
<a name="l00445"></a>00445 <span class="keywordflow">if</span> (word-><a class="code" href="a00650.html#af7454f140688a32b2bebb60ab36e91ee">ratings</a>-><a class="code" href="a00401.html#ac2361d09d80314d64330578d8e162566">get</a>(0, 0) == NULL) {
487
<a name="l00446"></a>00446 <span class="comment">// Run initial classification.</span>
488
<a name="l00447"></a>00447 <span class="keywordflow">for</span> (<span class="keywordtype">int</span> b = 0; b < num_blobs; ++b) {
489
<a name="l00448"></a>00448 BLOB_CHOICE_LIST* choices = <a class="code" href="a00655.html#a481ae36065253f8ffd5b9668c6d65007">classify_piece</a>(word-><a class="code" href="a00650.html#a3d0de828a382a88581dafb5a304141f1">seam_array</a>, b, b,
490
<a name="l00449"></a>00449 <span class="stringliteral">"Initial:"</span>, word-><a class="code" href="a00650.html#ad1d449f93bb0150bc7ef0d9b59ed27ba">chopped_word</a>,
491
<a name="l00450"></a>00450 word-><a class="code" href="a00650.html#a267e5d5e9fe67117e47223c5d336b8ba">blamer_bundle</a>);
492
<a name="l00451"></a>00451 word-><a class="code" href="a00650.html#af7454f140688a32b2bebb60ab36e91ee">ratings</a>-><a class="code" href="a00401.html#a610f90d717d1a774cefa8736714f1cc2">put</a>(b, b, choices);
493
<a name="l00452"></a>00452 }
494
<a name="l00453"></a>00453 } <span class="keywordflow">else</span> {
495
<a name="l00454"></a>00454 <span class="comment">// Blobs have been pre-classified. Set matrix cell for all blob choices</span>
496
<a name="l00455"></a>00455 <span class="keywordflow">for</span> (<span class="keywordtype">int</span> col = 0; col < word-><a class="code" href="a00650.html#af7454f140688a32b2bebb60ab36e91ee">ratings</a>-><a class="code" href="a00264.html#a82cbdd8a6eb23048b3d1cb656d34a364">dimension</a>(); ++col) {
497
<a name="l00456"></a>00456 <span class="keywordflow">for</span> (<span class="keywordtype">int</span> row = col; row < word-><a class="code" href="a00650.html#af7454f140688a32b2bebb60ab36e91ee">ratings</a>-><a class="code" href="a00264.html#a82cbdd8a6eb23048b3d1cb656d34a364">dimension</a>() &&
498
<a name="l00457"></a>00457 row < col + word-><a class="code" href="a00650.html#af7454f140688a32b2bebb60ab36e91ee">ratings</a>-><a class="code" href="a00264.html#a4de2a0b0b259849a9d4c0427f759299c">bandwidth</a>(); ++row) {
499
<a name="l00458"></a>00458 BLOB_CHOICE_LIST* choices = word-><a class="code" href="a00650.html#af7454f140688a32b2bebb60ab36e91ee">ratings</a>-><a class="code" href="a00401.html#ac2361d09d80314d64330578d8e162566">get</a>(col, row);
500
<a name="l00459"></a>00459 <span class="keywordflow">if</span> (choices != NULL) {
501
<a name="l00460"></a>00460 BLOB_CHOICE_IT bc_it(choices);
502
<a name="l00461"></a>00461 <span class="keywordflow">for</span> (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) {
503
<a name="l00462"></a>00462 bc_it.data()->set_matrix_cell(col, row);
504
<a name="l00463"></a>00463 }
505
<a name="l00464"></a>00464 }
506
<a name="l00465"></a>00465 }
507
<a name="l00466"></a>00466 }
508
<a name="l00467"></a>00467 }
509
<a name="l00468"></a>00468
510
<a name="l00469"></a>00469 <span class="comment">// Run Segmentation Search.</span>
511
<a name="l00470"></a>00470 <a class="code" href="a00270.html">BestChoiceBundle</a> best_choice_bundle(word-><a class="code" href="a00650.html#af7454f140688a32b2bebb60ab36e91ee">ratings</a>-><a class="code" href="a00264.html#a82cbdd8a6eb23048b3d1cb656d34a364">dimension</a>());
512
<a name="l00471"></a>00471 <a class="code" href="a00655.html#ae035df67f0ec9b7a8a2d83cd582d4fba">SegSearch</a>(word, &best_choice_bundle, word-><a class="code" href="a00650.html#a267e5d5e9fe67117e47223c5d336b8ba">blamer_bundle</a>);
513
<a name="l00472"></a>00472
514
<a name="l00473"></a>00473 <span class="keywordflow">if</span> (word-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a> == NULL) {
515
<a name="l00474"></a>00474 <span class="comment">// SegSearch found no valid paths, so just use the leading diagonal.</span>
516
<a name="l00475"></a>00475 word-><a class="code" href="a00650.html#a1365a59f4be4013d3c23f01dc64c4c65">FakeWordFromRatings</a>();
517
<a name="l00476"></a>00476 }
518
<a name="l00477"></a>00477 word-><a class="code" href="a00650.html#ae57ef3e90db1cb0bcdf96f4b32dee362">RebuildBestState</a>();
519
<a name="l00478"></a>00478 <span class="comment">// If we finished without a hyphen at the end of the word, let the next word</span>
520
<a name="l00479"></a>00479 <span class="comment">// be found in the dictionary.</span>
521
<a name="l00480"></a>00480 <span class="keywordflow">if</span> (word-><a class="code" href="a00650.html#afc15fa7c1b5ad55d6772dea0d6865df1">word</a>-><a class="code" href="a00648.html#a81edde8597a3d9fd8a664d703d332c41">flag</a>(<a class="code" href="a00804.html#ad6968adbf8f2cc44adf333ec96efb0bea62efb985a62d85e014ee5ab039dd50ce">W_EOL</a>) &&
522
<a name="l00481"></a>00481 !<a class="code" href="a00314.html#a83e4207353a1bb07adfaa6a82a74c9ce">getDict</a>().<a class="code" href="a00354.html#aad63b54f05d3faaee2b8caec2a5145d7" title="Check whether the word has a hyphen at the end.">has_hyphen_end</a>(*word-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>)) {
523
<a name="l00482"></a>00482 <a class="code" href="a00314.html#a83e4207353a1bb07adfaa6a82a74c9ce">getDict</a>().<a class="code" href="a00354.html#a6b7508e4459f295073316fff69a05ee9">reset_hyphen_vars</a>(<span class="keyword">true</span>);
524
<a name="l00483"></a>00483 }
525
<a name="l00484"></a>00484
526
<a name="l00485"></a>00485 <span class="keywordflow">if</span> (word-><a class="code" href="a00650.html#a267e5d5e9fe67117e47223c5d336b8ba">blamer_bundle</a> != NULL && this->fill_lattice_ != NULL) {
527
<a name="l00486"></a>00486 <a class="code" href="a00655.html#a242e3ff39be00aff1f1241a657d86543">CallFillLattice</a>(*word-><a class="code" href="a00650.html#af7454f140688a32b2bebb60ab36e91ee">ratings</a>, word-><a class="code" href="a00650.html#a23445f9e8dce6db8902504c0fe1d2e37">best_choices</a>,
528
<a name="l00487"></a>00487 *word-><a class="code" href="a00650.html#a053a2aeeddc26a93a41a04fa08dd688a">uch_set</a>, word-><a class="code" href="a00650.html#a267e5d5e9fe67117e47223c5d336b8ba">blamer_bundle</a>);
529
<a name="l00488"></a>00488 }
530
<a name="l00489"></a>00489 <span class="keywordflow">if</span> (<a class="code" href="a00655.html#a15e6789d11062602e044c699bbd9b3aa">wordrec_debug_level</a> > 0) {
531
<a name="l00490"></a>00490 <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a>(<span class="stringliteral">"Final Ratings Matrix:\n"</span>);
532
<a name="l00491"></a>00491 word-><a class="code" href="a00650.html#af7454f140688a32b2bebb60ab36e91ee">ratings</a>-><a class="code" href="a00461.html#ae0f2489acb8b15666cb306f82ad2c6de">print</a>(<a class="code" href="a00314.html#a83e4207353a1bb07adfaa6a82a74c9ce">getDict</a>().getUnicharset());
533
<a name="l00492"></a>00492 }
534
<a name="l00493"></a>00493 word-><a class="code" href="a00650.html#a80aaa9d4d4a1b903aa53884e7bcf181e">FilterWordChoices</a>(<a class="code" href="a00314.html#a83e4207353a1bb07adfaa6a82a74c9ce">getDict</a>().stopper_debug_level);
535
<a name="l00494"></a>00494 }
536
<a name="l00495"></a>00495
537
<a name="l00503"></a><a class="code" href="a00655.html#a2d330fdd7f16efb28a436bb9da2b0eb2">00503</a> <span class="keywordtype">void</span> <a class="code" href="a00655.html#a2d330fdd7f16efb28a436bb9da2b0eb2">Wordrec::improve_by_chopping</a>(<span class="keywordtype">float</span> rating_cert_scale,
538
<a name="l00504"></a>00504 <a class="code" href="a00650.html">WERD_RES</a>* word,
539
<a name="l00505"></a>00505 <a class="code" href="a00270.html">BestChoiceBundle</a>* best_choice_bundle,
540
<a name="l00506"></a>00506 <a class="code" href="a00274.html">BlamerBundle</a>* blamer_bundle,
541
<a name="l00507"></a>00507 <a class="code" href="a00456.html">LMPainPoints</a>* pain_points,
542
<a name="l00508"></a>00508 <a class="code" href="a00403.html">GenericVector<SegSearchPending></a>* pending) {
543
<a name="l00509"></a>00509 <span class="keywordtype">int</span> blob_number;
544
<a name="l00510"></a>00510 <span class="keywordflow">do</span> { <span class="comment">// improvement loop.</span>
545
<a name="l00511"></a>00511 <span class="comment">// Make a simple vector of BLOB_CHOICEs to make it easy to pick which</span>
546
<a name="l00512"></a>00512 <span class="comment">// one to chop.</span>
547
<a name="l00513"></a>00513 <a class="code" href="a00403.html">GenericVector<BLOB_CHOICE*></a> blob_choices;
548
<a name="l00514"></a>00514 <span class="keywordtype">int</span> num_blobs = word-><a class="code" href="a00650.html#af7454f140688a32b2bebb60ab36e91ee">ratings</a>-><a class="code" href="a00264.html#a82cbdd8a6eb23048b3d1cb656d34a364">dimension</a>();
549
<a name="l00515"></a>00515 <span class="keywordflow">for</span> (<span class="keywordtype">int</span> i = 0; i < num_blobs; ++i) {
550
<a name="l00516"></a>00516 BLOB_CHOICE_LIST* choices = word-><a class="code" href="a00650.html#af7454f140688a32b2bebb60ab36e91ee">ratings</a>-><a class="code" href="a00401.html#ac2361d09d80314d64330578d8e162566">get</a>(i, i);
551
<a name="l00517"></a>00517 <span class="keywordflow">if</span> (choices == NULL || choices->empty()) {
552
<a name="l00518"></a>00518 blob_choices.<a class="code" href="a00403.html#a0dc89fe2a365b04a61017f9d78c1a303">push_back</a>(NULL);
553
<a name="l00519"></a>00519 } <span class="keywordflow">else</span> {
554
<a name="l00520"></a>00520 BLOB_CHOICE_IT bc_it(choices);
555
<a name="l00521"></a>00521 blob_choices.<a class="code" href="a00403.html#a0dc89fe2a365b04a61017f9d78c1a303">push_back</a>(bc_it.data());
556
<a name="l00522"></a>00522 }
557
<a name="l00523"></a>00523 }
558
<a name="l00524"></a>00524 <a class="code" href="a00533.html">SEAM</a>* seam = <a class="code" href="a00655.html#a5e68fccf487b2a822300011a2e1d5752">improve_one_blob</a>(blob_choices, &best_choice_bundle-><a class="code" href="a00270.html#a2a650c297a6ace5f9f506b972d9a4fb1">fixpt</a>,
559
<a name="l00525"></a>00525 <span class="keyword">false</span>, <span class="keyword">false</span>, word, &blob_number);
560
<a name="l00526"></a>00526 <span class="keywordflow">if</span> (seam == NULL) <span class="keywordflow">break</span>;
561
<a name="l00527"></a>00527 <span class="comment">// A chop has been made. We have to correct all the data structures to</span>
562
<a name="l00528"></a>00528 <span class="comment">// take into account the extra bottom-level blob.</span>
563
<a name="l00529"></a>00529 <span class="comment">// Put the seam into the seam_array and correct everything else on the</span>
564
<a name="l00530"></a>00530 <span class="comment">// word: ratings matrix (including matrix location in the BLOB_CHOICES),</span>
565
<a name="l00531"></a>00531 <span class="comment">// states in WERD_CHOICEs, and blob widths.</span>
566
<a name="l00532"></a>00532 word-><a class="code" href="a00650.html#ae322fbf32d8be7d9a22bd860ae08b38f">InsertSeam</a>(blob_number, seam);
567
<a name="l00533"></a>00533 <span class="comment">// Insert a new entry in the beam array.</span>
568
<a name="l00534"></a>00534 best_choice_bundle-><a class="code" href="a00270.html#ae1260cdb6c1f0766239bf05fd5fac577">beam</a>.<a class="code" href="a00403.html#a57ca5259541548a97bcfd4d0925a27ff">insert</a>(<span class="keyword">new</span> <a class="code" href="a00449.html">LanguageModelState</a>, blob_number);
569
<a name="l00535"></a>00535 <span class="comment">// Fixpts are outdated, but will get recalculated.</span>
570
<a name="l00536"></a>00536 best_choice_bundle-><a class="code" href="a00270.html#a2a650c297a6ace5f9f506b972d9a4fb1">fixpt</a>.<a class="code" href="a00403.html#a9cdbff49b186574b83e43afba606fdd9">clear</a>();
571
<a name="l00537"></a>00537 <span class="comment">// Remap existing pain points.</span>
572
<a name="l00538"></a>00538 pain_points-><a class="code" href="a00456.html#a4abf43f1384872660cf5a93453da4da0">RemapForSplit</a>(blob_number);
573
<a name="l00539"></a>00539 <span class="comment">// Insert a new pending at the chop point.</span>
574
<a name="l00540"></a>00540 pending-><a class="code" href="a00403.html#a57ca5259541548a97bcfd4d0925a27ff">insert</a>(<a class="code" href="a00538.html">SegSearchPending</a>(), blob_number);
575
<a name="l00541"></a>00541
576
<a name="l00542"></a>00542 <span class="comment">// Classify the two newly created blobs using ProcessSegSearchPainPoint,</span>
577
<a name="l00543"></a>00543 <span class="comment">// as that updates the pending correctly and adds new pain points.</span>
578
<a name="l00544"></a>00544 <a class="code" href="a00463.html">MATRIX_COORD</a> pain_point(blob_number, blob_number);
579
<a name="l00545"></a>00545 <a class="code" href="a00655.html#ae44c5dc9bd69b43bf7f1a740eb900724">ProcessSegSearchPainPoint</a>(0.0f, pain_point, <span class="stringliteral">"Chop1"</span>, pending, word,
580
<a name="l00546"></a>00546 pain_points, blamer_bundle);
581
<a name="l00547"></a>00547 pain_point.<a class="code" href="a00463.html#a22a122ba151628fafb64b2957c4690fd">col</a> = blob_number + 1;
582
<a name="l00548"></a>00548 pain_point.<a class="code" href="a00463.html#abde52ca4976ce8acc4e70215df074ff5">row</a> = blob_number + 1;
583
<a name="l00549"></a>00549 <a class="code" href="a00655.html#ae44c5dc9bd69b43bf7f1a740eb900724">ProcessSegSearchPainPoint</a>(0.0f, pain_point, <span class="stringliteral">"Chop2"</span>, pending, word,
584
<a name="l00550"></a>00550 pain_points, blamer_bundle);
585
<a name="l00551"></a>00551 <span class="keywordflow">if</span> (<a class="code" href="a00655.html#a3d9fc3b086e0f174a2cd602ea8f421c6">language_model_</a>-><a class="code" href="a00446.html#a3f2f47a18751c6eae3e6d766b7ec0252">language_model_ngram_on</a>) {
586
<a name="l00552"></a>00552 <span class="comment">// N-gram evaluation depends on the number of blobs in a chunk, so we</span>
587
<a name="l00553"></a>00553 <span class="comment">// have to re-evaluate everything in the word.</span>
588
<a name="l00554"></a>00554 <a class="code" href="a00655.html#a062218e28c252cea48ea375b7ab1d4a5">ResetNGramSearch</a>(word, best_choice_bundle, pending);
589
<a name="l00555"></a>00555 blob_number = 0;
590
<a name="l00556"></a>00556 }
591
<a name="l00557"></a>00557 <span class="comment">// Run language model incrementally. (Except with the n-gram model on.)</span>
592
<a name="l00558"></a>00558 <a class="code" href="a00655.html#a1241f0c7f9bf06ea05ec02c2b6e1a9c7">UpdateSegSearchNodes</a>(rating_cert_scale, blob_number, pending,
593
<a name="l00559"></a>00559 word, pain_points, best_choice_bundle, blamer_bundle);
594
<a name="l00560"></a>00560 } <span class="keywordflow">while</span> (!<a class="code" href="a00655.html#a3d9fc3b086e0f174a2cd602ea8f421c6">language_model_</a>-><a class="code" href="a00446.html#a3a6f034012a45448a018bd4042138f74">AcceptableChoiceFound</a>() &&
595
<a name="l00561"></a>00561 word-><a class="code" href="a00650.html#af7454f140688a32b2bebb60ab36e91ee">ratings</a>-><a class="code" href="a00264.html#a82cbdd8a6eb23048b3d1cb656d34a364">dimension</a>() < kMaxNumChunks);
596
<a name="l00562"></a>00562
597
<a name="l00563"></a>00563 <span class="comment">// If after running only the chopper best_choice is incorrect and no blame</span>
598
<a name="l00564"></a>00564 <span class="comment">// has been yet set, blame the classifier if best_choice is classifier's</span>
599
<a name="l00565"></a>00565 <span class="comment">// top choice and is a dictionary word (i.e. language model could not have</span>
600
<a name="l00566"></a>00566 <span class="comment">// helped). Otherwise blame the tradeoff between the classifier and</span>
601
<a name="l00567"></a>00567 <span class="comment">// the old language model (permuters).</span>
602
<a name="l00568"></a>00568 <span class="keywordflow">if</span> (word-><a class="code" href="a00650.html#a267e5d5e9fe67117e47223c5d336b8ba">blamer_bundle</a> != NULL &&
603
<a name="l00569"></a>00569 word-><a class="code" href="a00650.html#a267e5d5e9fe67117e47223c5d336b8ba">blamer_bundle</a>-><a class="code" href="a00274.html#af1132d873bea38b5fc5309c55fad2438">incorrect_result_reason</a>() == <a class="code" href="a00725.html#a8faf52e106c04bfdc9898ffa90b821e1a4f5ff200b61e6226eb6d74aec188e769">IRR_CORRECT</a> &&
604
<a name="l00570"></a>00570 !word-><a class="code" href="a00650.html#a267e5d5e9fe67117e47223c5d336b8ba">blamer_bundle</a>-><a class="code" href="a00274.html#a142bd71b742b90682f1ac351db4fe4e1">ChoiceIsCorrect</a>(word-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>)) {
605
<a name="l00571"></a>00571 <span class="keywordtype">bool</span> valid_permuter = word-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a> != NULL &&
606
<a name="l00572"></a>00572 <a class="code" href="a00354.html#af873650961191c2a844eed47c0829421" title="Check all the DAWGs to see if this word is in any of them.">Dict::valid_word_permuter</a>(word-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a161ac137df2d063a6421728e6aa08fcb">permuter</a>(), <span class="keyword">false</span>);
607
<a name="l00573"></a>00573 word-><a class="code" href="a00650.html#a267e5d5e9fe67117e47223c5d336b8ba">blamer_bundle</a>-><a class="code" href="a00274.html#ad9e9fee97358dbed386d0becabd6c549">BlameClassifierOrLangModel</a>(word,
608
<a name="l00574"></a>00574 <a class="code" href="a00314.html#a83e4207353a1bb07adfaa6a82a74c9ce">getDict</a>().getUnicharset(),
609
<a name="l00575"></a>00575 valid_permuter,
610
<a name="l00576"></a>00576 <a class="code" href="a00655.html#a76e95cb7cbfe636ab00ec46150b5524d">wordrec_debug_blamer</a>);
611
<a name="l00577"></a>00577 }
612
<a name="l00578"></a>00578 }
613
<a name="l00579"></a>00579
614
<a name="l00580"></a>00580
615
<a name="l00581"></a>00581 <span class="comment">/**********************************************************************</span>
616
<a name="l00582"></a>00582 <span class="comment"> * select_blob_to_split</span>
617
<a name="l00583"></a>00583 <span class="comment"> *</span>
618
<a name="l00584"></a>00584 <span class="comment"> * These are the results of the last classification. Find a likely</span>
619
<a name="l00585"></a>00585 <span class="comment"> * place to apply splits. If none, return -1.</span>
620
<a name="l00586"></a>00586 <span class="comment"> **********************************************************************/</span>
621
<a name="l00587"></a><a class="code" href="a00655.html#a1036ef6be417b423b0ec5dc03e812db8">00587</a> <span class="keywordtype">int</span> <a class="code" href="a00655.html#a1036ef6be417b423b0ec5dc03e812db8">Wordrec::select_blob_to_split</a>(
622
<a name="l00588"></a>00588 <span class="keyword">const</span> <a class="code" href="a00403.html">GenericVector<BLOB_CHOICE*></a>& blob_choices,
623
<a name="l00589"></a>00589 <span class="keywordtype">float</span> rating_ceiling, <span class="keywordtype">bool</span> split_next_to_fragment) {
624
<a name="l00590"></a>00590 <a class="code" href="a00276.html">BLOB_CHOICE</a> *blob_choice;
625
<a name="l00591"></a>00591 <span class="keywordtype">int</span> x;
626
<a name="l00592"></a>00592 <span class="keywordtype">float</span> worst = -<a class="code" href="a00831.html#a9adcee815220ec9db98062213806ed04">MAX_FLOAT32</a>;
627
<a name="l00593"></a>00593 <span class="keywordtype">int</span> worst_index = -1;
628
<a name="l00594"></a>00594 <span class="keywordtype">float</span> worst_near_fragment = -<a class="code" href="a00831.html#a9adcee815220ec9db98062213806ed04">MAX_FLOAT32</a>;
629
<a name="l00595"></a>00595 <span class="keywordtype">int</span> worst_index_near_fragment = -1;
630
<a name="l00596"></a>00596 <span class="keyword">const</span> <a class="code" href="a00298.html">CHAR_FRAGMENT</a> **fragments = NULL;
631
<a name="l00597"></a>00597
632
<a name="l00598"></a>00598 <span class="keywordflow">if</span> (<a class="code" href="a00655.html#a3f1c04cfbc1650eae6fc981984717248">chop_debug</a>) {
633
<a name="l00599"></a>00599 <span class="keywordflow">if</span> (rating_ceiling < <a class="code" href="a00831.html#a9adcee815220ec9db98062213806ed04">MAX_FLOAT32</a>)
634
<a name="l00600"></a>00600 <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a>(<span class="stringliteral">"rating_ceiling = %8.4f\n"</span>, rating_ceiling);
635
<a name="l00601"></a>00601 <span class="keywordflow">else</span>
636
<a name="l00602"></a>00602 <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a>(<span class="stringliteral">"rating_ceiling = No Limit\n"</span>);
637
<a name="l00603"></a>00603 }
638
<a name="l00604"></a>00604
639
<a name="l00605"></a>00605 <span class="keywordflow">if</span> (split_next_to_fragment && blob_choices.<a class="code" href="a00403.html#a111b51dd0bf1324cfb69ef70703d8e70">size</a>() > 0) {
640
<a name="l00606"></a>00606 fragments = <span class="keyword">new</span> <span class="keyword">const</span> <a class="code" href="a00298.html">CHAR_FRAGMENT</a> *[blob_choices.<a class="code" href="a00403.html#a690cd403343d371ed7763fdaf3ed71d2">length</a>()];
641
<a name="l00607"></a>00607 <span class="keywordflow">if</span> (blob_choices[0] != NULL) {
642
<a name="l00608"></a>00608 fragments[0] = <a class="code" href="a00314.html#a83e4207353a1bb07adfaa6a82a74c9ce">getDict</a>().<a class="code" href="a00354.html#a635ce00ba9aa24247563d26ae26da631">getUnicharset</a>().<a class="code" href="a00642.html#af08440927e3b87f99bd3a3fbd8e126dc">get_fragment</a>(
643
<a name="l00609"></a>00609 blob_choices[0]->unichar_id());
644
<a name="l00610"></a>00610 } <span class="keywordflow">else</span> {
645
<a name="l00611"></a>00611 fragments[0] = NULL;
646
<a name="l00612"></a>00612 }
647
<a name="l00613"></a>00613 }
648
<a name="l00614"></a>00614
649
<a name="l00615"></a>00615 <span class="keywordflow">for</span> (x = 0; x < blob_choices.<a class="code" href="a00403.html#a111b51dd0bf1324cfb69ef70703d8e70">size</a>(); ++x) {
650
<a name="l00616"></a>00616 <span class="keywordflow">if</span> (blob_choices[x] == NULL) {
651
<a name="l00617"></a>00617 <span class="keywordflow">if</span> (fragments != NULL) {
652
<a name="l00618"></a>00618 <span class="keyword">delete</span>[] fragments;
653
<a name="l00619"></a>00619 }
654
<a name="l00620"></a>00620 <span class="keywordflow">return</span> x;
655
<a name="l00621"></a>00621 } <span class="keywordflow">else</span> {
656
<a name="l00622"></a>00622 blob_choice = blob_choices[x];
657
<a name="l00623"></a>00623 <span class="comment">// Populate fragments for the following position.</span>
658
<a name="l00624"></a>00624 <span class="keywordflow">if</span> (split_next_to_fragment && x+1 < blob_choices.<a class="code" href="a00403.html#a111b51dd0bf1324cfb69ef70703d8e70">size</a>()) {
659
<a name="l00625"></a>00625 <span class="keywordflow">if</span> (blob_choices[x + 1] != NULL) {
660
<a name="l00626"></a>00626 fragments[x + 1] = <a class="code" href="a00314.html#a83e4207353a1bb07adfaa6a82a74c9ce">getDict</a>().<a class="code" href="a00354.html#a635ce00ba9aa24247563d26ae26da631">getUnicharset</a>().<a class="code" href="a00642.html#af08440927e3b87f99bd3a3fbd8e126dc">get_fragment</a>(
661
<a name="l00627"></a>00627 blob_choices[x + 1]->unichar_id());
662
<a name="l00628"></a>00628 } <span class="keywordflow">else</span> {
663
<a name="l00629"></a>00629 fragments[x + 1] = NULL;
664
<a name="l00630"></a>00630 }
665
<a name="l00631"></a>00631 }
666
<a name="l00632"></a>00632 <span class="keywordflow">if</span> (blob_choice-><a class="code" href="a00276.html#a7fbd2d505bf5b55beee424096fb1e9bc">rating</a>() < rating_ceiling &&
667
<a name="l00633"></a>00633 blob_choice-><a class="code" href="a00276.html#af53d45b718c562ea7209b583c77ac14e">certainty</a>() < <a class="code" href="a00655.html#a57ae3e0529a624fd3677f96d5b43fcc8">tessedit_certainty_threshold</a>) {
668
<a name="l00634"></a>00634 <span class="comment">// Update worst and worst_index.</span>
669
<a name="l00635"></a>00635 <span class="keywordflow">if</span> (blob_choice-><a class="code" href="a00276.html#a7fbd2d505bf5b55beee424096fb1e9bc">rating</a>() > worst) {
670
<a name="l00636"></a>00636 worst_index = x;
671
<a name="l00637"></a>00637 worst = blob_choice-><a class="code" href="a00276.html#a7fbd2d505bf5b55beee424096fb1e9bc">rating</a>();
672
<a name="l00638"></a>00638 }
673
<a name="l00639"></a>00639 <span class="keywordflow">if</span> (split_next_to_fragment) {
674
<a name="l00640"></a>00640 <span class="comment">// Update worst_near_fragment and worst_index_near_fragment.</span>
675
<a name="l00641"></a>00641 <span class="keywordtype">bool</span> expand_following_fragment =
676
<a name="l00642"></a>00642 (x + 1 < blob_choices.<a class="code" href="a00403.html#a111b51dd0bf1324cfb69ef70703d8e70">size</a>() &&
677
<a name="l00643"></a>00643 fragments[x+1] != NULL && !fragments[x+1]-><a class="code" href="a00298.html#a42707cfcbf1e9a38ec0333d3202f8d80">is_beginning</a>());
678
<a name="l00644"></a>00644 <span class="keywordtype">bool</span> expand_preceding_fragment =
679
<a name="l00645"></a>00645 (x > 0 && fragments[x-1] != NULL && !fragments[x-1]-><a class="code" href="a00298.html#af222b3b2c71de6199f75e3a7a64aa660">is_ending</a>());
680
<a name="l00646"></a>00646 <span class="keywordflow">if</span> ((expand_following_fragment || expand_preceding_fragment) &&
681
<a name="l00647"></a>00647 blob_choice-><a class="code" href="a00276.html#a7fbd2d505bf5b55beee424096fb1e9bc">rating</a>() > worst_near_fragment) {
682
<a name="l00648"></a>00648 worst_index_near_fragment = x;
683
<a name="l00649"></a>00649 worst_near_fragment = blob_choice-><a class="code" href="a00276.html#a7fbd2d505bf5b55beee424096fb1e9bc">rating</a>();
684
<a name="l00650"></a>00650 <span class="keywordflow">if</span> (<a class="code" href="a00655.html#a3f1c04cfbc1650eae6fc981984717248">chop_debug</a>) {
685
<a name="l00651"></a>00651 <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a>(<span class="stringliteral">"worst_index_near_fragment=%d"</span>
686
<a name="l00652"></a>00652 <span class="stringliteral">" expand_following_fragment=%d"</span>
687
<a name="l00653"></a>00653 <span class="stringliteral">" expand_preceding_fragment=%d\n"</span>,
688
<a name="l00654"></a>00654 worst_index_near_fragment,
689
<a name="l00655"></a>00655 expand_following_fragment,
690
<a name="l00656"></a>00656 expand_preceding_fragment);
691
<a name="l00657"></a>00657 }
692
<a name="l00658"></a>00658 }
693
<a name="l00659"></a>00659 }
694
<a name="l00660"></a>00660 }
695
<a name="l00661"></a>00661 }
696
<a name="l00662"></a>00662 }
697
<a name="l00663"></a>00663 <span class="keywordflow">if</span> (fragments != NULL) {
698
<a name="l00664"></a>00664 <span class="keyword">delete</span>[] fragments;
699
<a name="l00665"></a>00665 }
700
<a name="l00666"></a>00666 <span class="comment">// TODO(daria): maybe a threshold of badness for</span>
701
<a name="l00667"></a>00667 <span class="comment">// worst_near_fragment would be useful.</span>
702
<a name="l00668"></a>00668 <span class="keywordflow">return</span> worst_index_near_fragment != -1 ?
703
<a name="l00669"></a>00669 worst_index_near_fragment : worst_index;
704
<a name="l00670"></a>00670 }
705
<a name="l00671"></a>00671
706
<a name="l00672"></a>00672 <span class="comment">/**********************************************************************</span>
707
<a name="l00673"></a>00673 <span class="comment"> * select_blob_to_split_from_fixpt</span>
708
<a name="l00674"></a>00674 <span class="comment"> *</span>
709
<a name="l00675"></a>00675 <span class="comment"> * Given the fix point from a dictionary search, if there is a single</span>
710
<a name="l00676"></a>00676 <span class="comment"> * dangerous blob that maps to multiple characters, return that blob</span>
711
<a name="l00677"></a>00677 <span class="comment"> * index as a place we need to split. If none, return -1.</span>
712
<a name="l00678"></a>00678 <span class="comment"> **********************************************************************/</span>
713
<a name="l00679"></a><a class="code" href="a00655.html#aa92122a173b68d7261a4bec13bbb02f9">00679</a> <span class="keywordtype">int</span> <a class="code" href="a00655.html#aa92122a173b68d7261a4bec13bbb02f9">Wordrec::select_blob_to_split_from_fixpt</a>(<a class="code" href="a00403.html">DANGERR</a> *fixpt) {
714
<a name="l00680"></a>00680 <span class="keywordflow">if</span> (!fixpt)
715
<a name="l00681"></a>00681 <span class="keywordflow">return</span> -1;
716
<a name="l00682"></a>00682 <span class="keywordflow">for</span> (<span class="keywordtype">int</span> i = 0; i < fixpt-><a class="code" href="a00403.html#a111b51dd0bf1324cfb69ef70703d8e70">size</a>(); i++) {
717
<a name="l00683"></a>00683 <span class="keywordflow">if</span> ((*fixpt)[i].begin + 1 == (*fixpt)[i].end &&
718
<a name="l00684"></a>00684 (*fixpt)[i].dangerous &&
719
<a name="l00685"></a>00685 (*fixpt)[i].correct_is_ngram) {
720
<a name="l00686"></a>00686 <span class="keywordflow">return</span> (*fixpt)[i].begin;
721
<a name="l00687"></a>00687 }
722
<a name="l00688"></a>00688 }
723
<a name="l00689"></a>00689 <span class="keywordflow">return</span> -1;
724
<a name="l00690"></a>00690 }
725
<a name="l00691"></a>00691
726
<a name="l00692"></a>00692
727
<a name="l00693"></a>00693 } <span class="comment">// namespace tesseract</span>
728
<a name="l00694"></a>00694
729
<a name="l00695"></a>00695
730
<a name="l00696"></a>00696 <span class="comment">/**********************************************************************</span>
731
<a name="l00697"></a>00697 <span class="comment"> * total_containment</span>
732
<a name="l00698"></a>00698 <span class="comment"> *</span>
733
<a name="l00699"></a>00699 <span class="comment"> * Check to see if one of these outlines is totally contained within</span>
734
<a name="l00700"></a>00700 <span class="comment"> * the bounding box of the other.</span>
735
<a name="l00701"></a>00701 <span class="comment"> **********************************************************************/</span>
736
<a name="l00702"></a><a class="code" href="a01224.html#a8e8e9f6cb587b621054e6ff626a46a23">00702</a> <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> <a class="code" href="a01223.html#a8e8e9f6cb587b621054e6ff626a46a23">total_containment</a>(<a class="code" href="a00591.html">TBLOB</a> *blob1, <a class="code" href="a00591.html">TBLOB</a> *blob2) {
737
<a name="l00703"></a>00703 <a class="code" href="a00592.html">TBOX</a> box1 = blob1-><a class="code" href="a00591.html#a8f1f8f110170fe12cf8147d504cd0ea2">bounding_box</a>();
738
<a name="l00704"></a>00704 <a class="code" href="a00592.html">TBOX</a> box2 = blob2-><a class="code" href="a00591.html#a8f1f8f110170fe12cf8147d504cd0ea2">bounding_box</a>();
739
<a name="l00705"></a>00705 <span class="keywordflow">return</span> box1.<a class="code" href="a00592.html#a5d4e2c5f91b791e94d4c94e513180632">contains</a>(box2) || box2.<a class="code" href="a00592.html#a5d4e2c5f91b791e94d4c94e513180632">contains</a>(box1);
740
<a name="l00706"></a>00706 }
741
</pre></div></div><!-- contents -->
743
<!-- window showing the filter options -->
744
<div id="MSearchSelectWindow"
745
onmouseover="return searchBox.OnSearchSelectShow()"
746
onmouseout="return searchBox.OnSearchSelectHide()"
747
onkeydown="return searchBox.OnSearchSelectKey(event)">
748
<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark"> </span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark"> </span>Classes</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark"> </span>Namespaces</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(3)"><span class="SelectionMark"> </span>Files</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(4)"><span class="SelectionMark"> </span>Functions</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(5)"><span class="SelectionMark"> </span>Variables</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(6)"><span class="SelectionMark"> </span>Typedefs</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(7)"><span class="SelectionMark"> </span>Enumerations</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(8)"><span class="SelectionMark"> </span>Enumerator</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(9)"><span class="SelectionMark"> </span>Friends</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(10)"><span class="SelectionMark"> </span>Defines</a></div>
750
<!-- iframe showing the search results (closed by default) -->
751
<div id="MSearchResultsWindow">
752
<iframe src="javascript:void(0)" frameborder="0"
753
name="MSearchResults" id="MSearchResults">
757
<div id="nav-path" class="navpath">
759
<li class="navelem"><a class="el" href="a01223.html">chopper.cpp</a> </li>
761
<li class="footer">Generated on Mon Feb 3 2014 10:59:10 for tesseract by
762
<a href="http://www.doxygen.org/index.html">
763
<img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.7.6.1 </li>