1
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
2
<html xmlns="http://www.w3.org/1999/xhtml">
4
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
5
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
6
<title>tesseract: /usr/local/google/home/jbreiden/tesseract-ocr-read-only/ccmain/docqual.cpp Source File</title>
8
<link href="tabs.css" rel="stylesheet" type="text/css"/>
9
<link href="doxygen.css" rel="stylesheet" type="text/css" />
10
<link href="navtree.css" rel="stylesheet" type="text/css"/>
11
<script type="text/javascript" src="jquery.js"></script>
12
<script type="text/javascript" src="resize.js"></script>
13
<script type="text/javascript" src="navtree.js"></script>
14
<script type="text/javascript">
15
$(document).ready(initResizable);
17
<link href="search/search.css" rel="stylesheet" type="text/css"/>
18
<script type="text/javascript" src="search/search.js"></script>
19
<script type="text/javascript">
20
$(document).ready(function() { searchBox.OnSelectItem(0); });
25
<div id="top"><!-- do not remove this div! -->
29
<table cellspacing="0" cellpadding="0">
31
<tr style="height: 56px;">
34
<td style="padding-left: 0.5em;">
35
<div id="projectname">tesseract
36
 <span id="projectnumber">3.03</span>
48
<!-- Generated by Doxygen 1.7.6.1 -->
49
<script type="text/javascript">
50
var searchBox = new SearchBox("searchBox", "search",false,'Search');
52
<div id="navrow1" class="tabs">
54
<li><a href="index.html"><span>Main Page</span></a></li>
55
<li><a href="pages.html"><span>Related Pages</span></a></li>
56
<li><a href="modules.html"><span>Modules</span></a></li>
57
<li><a href="namespaces.html"><span>Namespaces</span></a></li>
58
<li><a href="annotated.html"><span>Classes</span></a></li>
59
<li class="current"><a href="files.html"><span>Files</span></a></li>
61
<div id="MSearchBox" class="MSearchBoxInactive">
63
<img id="MSearchSelect" src="search/mag_sel.png"
64
onmouseover="return searchBox.OnSearchSelectShow()"
65
onmouseout="return searchBox.OnSearchSelectHide()"
67
<input type="text" id="MSearchField" value="Search" accesskey="S"
68
onfocus="searchBox.OnSearchFieldFocus(true)"
69
onblur="searchBox.OnSearchFieldFocus(false)"
70
onkeyup="searchBox.OnSearchFieldChange(event)"/>
71
</span><span class="right">
72
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
78
<div id="navrow2" class="tabs2">
80
<li><a href="files.html"><span>File List</span></a></li>
81
<li><a href="globals.html"><span>File Members</span></a></li>
85
<div id="side-nav" class="ui-resizable side-nav-resizable">
87
<div id="nav-tree-contents">
90
<div id="splitbar" style="-moz-user-select:none;"
91
class="ui-resizable-handle">
94
<script type="text/javascript">
95
initNavTree('a00677.html','');
97
<div id="doc-content">
99
<div class="headertitle">
100
<div class="title">/usr/local/google/home/jbreiden/tesseract-ocr-read-only/ccmain/docqual.cpp</div> </div>
102
<div class="contents">
103
<a href="a00677.html">Go to the documentation of this file.</a><div class="fragment"><pre class="fragment"><a name="l00001"></a>00001 <span class="comment">/******************************************************************</span>
104
<a name="l00002"></a>00002 <span class="comment"> * File: docqual.cpp (Formerly docqual.c)</span>
105
<a name="l00003"></a>00003 <span class="comment"> * Description: Document Quality Metrics</span>
106
<a name="l00004"></a>00004 <span class="comment"> * Author: Phil Cheatle</span>
107
<a name="l00005"></a>00005 <span class="comment"> * Created: Mon May 9 11:27:28 BST 1994</span>
108
<a name="l00006"></a>00006 <span class="comment"> *</span>
109
<a name="l00007"></a>00007 <span class="comment"> * (C) Copyright 1994, Hewlett-Packard Ltd.</span>
110
<a name="l00008"></a>00008 <span class="comment"> ** Licensed under the Apache License, Version 2.0 (the "License");</span>
111
<a name="l00009"></a>00009 <span class="comment"> ** you may not use this file except in compliance with the License.</span>
112
<a name="l00010"></a>00010 <span class="comment"> ** You may obtain a copy of the License at</span>
113
<a name="l00011"></a>00011 <span class="comment"> ** http://www.apache.org/licenses/LICENSE-2.0</span>
114
<a name="l00012"></a>00012 <span class="comment"> ** Unless required by applicable law or agreed to in writing, software</span>
115
<a name="l00013"></a>00013 <span class="comment"> ** distributed under the License is distributed on an "AS IS" BASIS,</span>
116
<a name="l00014"></a>00014 <span class="comment"> ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
117
<a name="l00015"></a>00015 <span class="comment"> ** See the License for the specific language governing permissions and</span>
118
<a name="l00016"></a>00016 <span class="comment"> ** limitations under the License.</span>
119
<a name="l00017"></a>00017 <span class="comment"> *</span>
120
<a name="l00018"></a>00018 <span class="comment"> **********************************************************************/</span>
121
<a name="l00019"></a>00019
122
<a name="l00020"></a>00020 <span class="preprocessor">#ifdef _MSC_VER</span>
123
<a name="l00021"></a>00021 <span class="preprocessor"></span><span class="preprocessor">#pragma warning(disable:4244) // Conversion warnings</span>
124
<a name="l00022"></a>00022 <span class="preprocessor"></span><span class="preprocessor">#endif</span>
125
<a name="l00023"></a>00023 <span class="preprocessor"></span>
126
<a name="l00024"></a>00024 <span class="preprocessor">#include <ctype.h></span>
127
<a name="l00025"></a>00025 <span class="preprocessor">#include "<a class="code" href="a00678.html">docqual.h</a>"</span>
128
<a name="l00026"></a>00026 <span class="preprocessor">#include "<a class="code" href="a00705.html">reject.h</a>"</span>
129
<a name="l00027"></a>00027 <span class="preprocessor">#include "<a class="code" href="a00856.html">tesscallback.h</a>"</span>
130
<a name="l00028"></a>00028 <span class="preprocessor">#include "<a class="code" href="a00718.html">tessvars.h</a>"</span>
131
<a name="l00029"></a>00029 <span class="preprocessor">#include "<a class="code" href="a00849.html">secname.h</a>"</span>
132
<a name="l00030"></a>00030 <span class="preprocessor">#include "<a class="code" href="a01039.html">globals.h</a>"</span>
133
<a name="l00031"></a>00031 <span class="preprocessor">#include "<a class="code" href="a00716.html">tesseractclass.h</a>"</span>
134
<a name="l00032"></a>00032
135
<a name="l00033"></a>00033 <span class="keyword">namespace </span><a class="code" href="a01266.html#afed58feacb84df2de88bdd613cfdba6d">tesseract</a>{
136
<a name="l00034"></a>00034
137
<a name="l00035"></a>00035 <span class="comment">// A little class to provide the callbacks as we have no pre-bound args.</span>
138
<a name="l00036"></a><a class="code" href="a00356.html">00036</a> <span class="keyword">struct </span><a class="code" href="a00356.html">DocQualCallbacks</a> {
139
<a name="l00037"></a><a class="code" href="a00356.html#afcbe06fefeeaee8eb7f00b8e90a2d769">00037</a> <span class="keyword">explicit</span> <a class="code" href="a00356.html#afcbe06fefeeaee8eb7f00b8e90a2d769">DocQualCallbacks</a>(<a class="code" href="a00650.html">WERD_RES</a>* word0)
140
<a name="l00038"></a>00038 : <a class="code" href="a00356.html#a06e04c42b1ff2e5ffa24923e9f34e84e">word</a>(word0), <a class="code" href="a00356.html#a0bc1e44f1672adeace51f83874c0d0e2">match_count</a>(0), <a class="code" href="a00356.html#af534b546b519077d8dba7b375e6f8f59">accepted_match_count</a>(0) {}
141
<a name="l00039"></a>00039
142
<a name="l00040"></a><a class="code" href="a00356.html#a5f143209a4192b1872e2e2430d78e29a">00040</a> <span class="keywordtype">void</span> <a class="code" href="a00356.html#a5f143209a4192b1872e2e2430d78e29a">CountMatchingBlobs</a>(<span class="keywordtype">int</span> index) {
143
<a name="l00041"></a>00041 ++<a class="code" href="a00356.html#a0bc1e44f1672adeace51f83874c0d0e2">match_count</a>;
144
<a name="l00042"></a>00042 }
145
<a name="l00043"></a>00043
146
<a name="l00044"></a><a class="code" href="a00356.html#a7fb05da0218e4e94705cab1d751c7762">00044</a> <span class="keywordtype">void</span> <a class="code" href="a00356.html#a7fb05da0218e4e94705cab1d751c7762">CountAcceptedBlobs</a>(<span class="keywordtype">int</span> index) {
147
<a name="l00045"></a>00045 <span class="keywordflow">if</span> (<a class="code" href="a00356.html#a06e04c42b1ff2e5ffa24923e9f34e84e">word</a>-><a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>[index].accepted())
148
<a name="l00046"></a>00046 ++<a class="code" href="a00356.html#af534b546b519077d8dba7b375e6f8f59">accepted_match_count</a>;
149
<a name="l00047"></a>00047 ++<a class="code" href="a00356.html#a0bc1e44f1672adeace51f83874c0d0e2">match_count</a>;
150
<a name="l00048"></a>00048 }
151
<a name="l00049"></a>00049
152
<a name="l00050"></a><a class="code" href="a00356.html#a4b494c625c5aa534c6fa265fb7fe828f">00050</a> <span class="keywordtype">void</span> <a class="code" href="a00356.html#a4b494c625c5aa534c6fa265fb7fe828f">AcceptIfGoodQuality</a>(<span class="keywordtype">int</span> index) {
153
<a name="l00051"></a>00051 <span class="keywordflow">if</span> (<a class="code" href="a00356.html#a06e04c42b1ff2e5ffa24923e9f34e84e">word</a>-><a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>[index].accept_if_good_quality())
154
<a name="l00052"></a>00052 <a class="code" href="a00356.html#a06e04c42b1ff2e5ffa24923e9f34e84e">word</a>-><a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>[index].setrej_quality_accept();
155
<a name="l00053"></a>00053 }
156
<a name="l00054"></a>00054
157
<a name="l00055"></a><a class="code" href="a00356.html#a06e04c42b1ff2e5ffa24923e9f34e84e">00055</a> <a class="code" href="a00650.html">WERD_RES</a>* <a class="code" href="a00356.html#a06e04c42b1ff2e5ffa24923e9f34e84e">word</a>;
158
<a name="l00056"></a><a class="code" href="a00356.html#a0bc1e44f1672adeace51f83874c0d0e2">00056</a> <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> <a class="code" href="a00356.html#a0bc1e44f1672adeace51f83874c0d0e2">match_count</a>;
159
<a name="l00057"></a><a class="code" href="a00356.html#af534b546b519077d8dba7b375e6f8f59">00057</a> <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> <a class="code" href="a00356.html#af534b546b519077d8dba7b375e6f8f59">accepted_match_count</a>;
160
<a name="l00058"></a>00058 };
161
<a name="l00059"></a>00059
162
<a name="l00060"></a>00060 <span class="comment">/*************************************************************************</span>
163
<a name="l00061"></a>00061 <span class="comment"> * word_blob_quality()</span>
164
<a name="l00062"></a>00062 <span class="comment"> * How many blobs in the box_word are identical to those of the inword?</span>
165
<a name="l00063"></a>00063 <span class="comment"> * ASSUME blobs in both initial word and box_word are in ascending order of</span>
166
<a name="l00064"></a>00064 <span class="comment"> * left hand blob edge.</span>
167
<a name="l00065"></a>00065 <span class="comment"> *************************************************************************/</span>
168
<a name="l00066"></a><a class="code" href="a00607.html#a70d6e0fcde6ff6f76443e8a206f08783">00066</a> <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> <a class="code" href="a00607.html#a70d6e0fcde6ff6f76443e8a206f08783">Tesseract::word_blob_quality</a>(<a class="code" href="a00650.html">WERD_RES</a> *word, <a class="code" href="a00521.html">ROW</a> *row) {
169
<a name="l00067"></a>00067 <span class="keywordflow">if</span> (word-><a class="code" href="a00650.html#af171e37da16ed628bf4516e9f934dc08">bln_boxes</a> == NULL ||
170
<a name="l00068"></a>00068 word-><a class="code" href="a00650.html#a211ab5c5a13123ee7bdc7459d3363f9d">rebuild_word</a> == NULL || word-><a class="code" href="a00650.html#a211ab5c5a13123ee7bdc7459d3363f9d">rebuild_word</a>-><a class="code" href="a00635.html#a52fba22521039a79d8c1c8182f602dcc">blobs</a>.<a class="code" href="a00403.html#afb0d8c52a603b6aa965d63bbf06a27dd">empty</a>())
171
<a name="l00069"></a>00069 <span class="keywordflow">return</span> 0;
172
<a name="l00070"></a>00070
173
<a name="l00071"></a>00071 <a class="code" href="a00356.html">DocQualCallbacks</a> cb(word);
174
<a name="l00072"></a>00072 word-><a class="code" href="a00650.html#af171e37da16ed628bf4516e9f934dc08">bln_boxes</a>-><a class="code" href="a00287.html#aabd2c8e3b6f8dda66725754fa034cc8e">ProcessMatchedBlobs</a>(
175
<a name="l00073"></a>00073 *word-><a class="code" href="a00650.html#a211ab5c5a13123ee7bdc7459d3363f9d">rebuild_word</a>,
176
<a name="l00074"></a>00074 <a class="code" href="a00856.html#ad000c6729bae0f97075ca10a0b5bf7d6">NewPermanentTessCallback</a>(&cb, &<a class="code" href="a00356.html#a5f143209a4192b1872e2e2430d78e29a">DocQualCallbacks::CountMatchingBlobs</a>));
177
<a name="l00075"></a>00075 <span class="keywordflow">return</span> cb.<a class="code" href="a00356.html#a0bc1e44f1672adeace51f83874c0d0e2">match_count</a>;
178
<a name="l00076"></a>00076 }
179
<a name="l00077"></a>00077
180
<a name="l00078"></a><a class="code" href="a00607.html#ace8e2b02f0270dbfe4b7dc05638bfc0d">00078</a> <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> <a class="code" href="a00607.html#ace8e2b02f0270dbfe4b7dc05638bfc0d">Tesseract::word_outline_errs</a>(<a class="code" href="a00650.html">WERD_RES</a> *word) {
181
<a name="l00079"></a>00079 <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> i = 0;
182
<a name="l00080"></a>00080 <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> err_count = 0;
183
<a name="l00081"></a>00081
184
<a name="l00082"></a>00082 <span class="keywordflow">if</span> (word-><a class="code" href="a00650.html#a211ab5c5a13123ee7bdc7459d3363f9d">rebuild_word</a> != NULL) {
185
<a name="l00083"></a>00083 <span class="keywordflow">for</span> (<span class="keywordtype">int</span> b = 0; b < word-><a class="code" href="a00650.html#a211ab5c5a13123ee7bdc7459d3363f9d">rebuild_word</a>-><a class="code" href="a00635.html#adfdef9868e61650e076775011382ec70">NumBlobs</a>(); ++b) {
186
<a name="l00084"></a>00084 <a class="code" href="a00591.html">TBLOB</a>* blob = word-><a class="code" href="a00650.html#a211ab5c5a13123ee7bdc7459d3363f9d">rebuild_word</a>-><a class="code" href="a00635.html#a52fba22521039a79d8c1c8182f602dcc">blobs</a>[b];
187
<a name="l00085"></a>00085 err_count += <a class="code" href="a00607.html#a27dc89a077e5074d22c3c0f9cccb5047">count_outline_errs</a>(word-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>()[i],
188
<a name="l00086"></a>00086 blob-><a class="code" href="a00591.html#a7cc7be71e17444d728e9c68204bca007">NumOutlines</a>());
189
<a name="l00087"></a>00087 i++;
190
<a name="l00088"></a>00088 }
191
<a name="l00089"></a>00089 }
192
<a name="l00090"></a>00090 <span class="keywordflow">return</span> err_count;
193
<a name="l00091"></a>00091 }
194
<a name="l00092"></a>00092
195
<a name="l00093"></a>00093 <span class="comment">/*************************************************************************</span>
196
<a name="l00094"></a>00094 <span class="comment"> * word_char_quality()</span>
197
<a name="l00095"></a>00095 <span class="comment"> * Combination of blob quality and outline quality - how many good chars are</span>
198
<a name="l00096"></a>00096 <span class="comment"> * there? - I.e chars which pass the blob AND outline tests.</span>
199
<a name="l00097"></a>00097 <span class="comment"> *************************************************************************/</span>
200
<a name="l00098"></a><a class="code" href="a00607.html#a348314a50f786908e27bbaa3733d14d4">00098</a> <span class="keywordtype">void</span> <a class="code" href="a00607.html#a348314a50f786908e27bbaa3733d14d4">Tesseract::word_char_quality</a>(<a class="code" href="a00650.html">WERD_RES</a> *word,
201
<a name="l00099"></a>00099 <a class="code" href="a00521.html">ROW</a> *row,
202
<a name="l00100"></a>00100 <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> *match_count,
203
<a name="l00101"></a>00101 <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> *accepted_match_count) {
204
<a name="l00102"></a>00102 <span class="keywordflow">if</span> (word-><a class="code" href="a00650.html#af171e37da16ed628bf4516e9f934dc08">bln_boxes</a> == NULL ||
205
<a name="l00103"></a>00103 word-><a class="code" href="a00650.html#a211ab5c5a13123ee7bdc7459d3363f9d">rebuild_word</a> == NULL || word-><a class="code" href="a00650.html#a211ab5c5a13123ee7bdc7459d3363f9d">rebuild_word</a>-><a class="code" href="a00635.html#a52fba22521039a79d8c1c8182f602dcc">blobs</a>.<a class="code" href="a00403.html#afb0d8c52a603b6aa965d63bbf06a27dd">empty</a>())
206
<a name="l00104"></a>00104 <span class="keywordflow">return</span>;
207
<a name="l00105"></a>00105
208
<a name="l00106"></a>00106 <a class="code" href="a00356.html">DocQualCallbacks</a> cb(word);
209
<a name="l00107"></a>00107 word-><a class="code" href="a00650.html#af171e37da16ed628bf4516e9f934dc08">bln_boxes</a>-><a class="code" href="a00287.html#aabd2c8e3b6f8dda66725754fa034cc8e">ProcessMatchedBlobs</a>(
210
<a name="l00108"></a>00108 *word-><a class="code" href="a00650.html#a211ab5c5a13123ee7bdc7459d3363f9d">rebuild_word</a>,
211
<a name="l00109"></a>00109 <a class="code" href="a00856.html#ad000c6729bae0f97075ca10a0b5bf7d6">NewPermanentTessCallback</a>(&cb, &<a class="code" href="a00356.html#a7fb05da0218e4e94705cab1d751c7762">DocQualCallbacks::CountAcceptedBlobs</a>));
212
<a name="l00110"></a>00110 *match_count = cb.<a class="code" href="a00356.html#a0bc1e44f1672adeace51f83874c0d0e2">match_count</a>;
213
<a name="l00111"></a>00111 *accepted_match_count = cb.<a class="code" href="a00356.html#af534b546b519077d8dba7b375e6f8f59">accepted_match_count</a>;
214
<a name="l00112"></a>00112 }
215
<a name="l00113"></a>00113
216
<a name="l00114"></a>00114 <span class="comment">/*************************************************************************</span>
217
<a name="l00115"></a>00115 <span class="comment"> * unrej_good_chs()</span>
218
<a name="l00116"></a>00116 <span class="comment"> * Unreject POTENTIAL rejects if the blob passes the blob and outline checks</span>
219
<a name="l00117"></a>00117 <span class="comment"> *************************************************************************/</span>
220
<a name="l00118"></a><a class="code" href="a00607.html#ab156ed65c76c10dadacfc1121624ebc2">00118</a> <span class="keywordtype">void</span> <a class="code" href="a00607.html#ab156ed65c76c10dadacfc1121624ebc2">Tesseract::unrej_good_chs</a>(<a class="code" href="a00650.html">WERD_RES</a> *word, <a class="code" href="a00521.html">ROW</a> *row) {
221
<a name="l00119"></a>00119 <span class="keywordflow">if</span> (word-><a class="code" href="a00650.html#af171e37da16ed628bf4516e9f934dc08">bln_boxes</a> == NULL ||
222
<a name="l00120"></a>00120 word-><a class="code" href="a00650.html#a211ab5c5a13123ee7bdc7459d3363f9d">rebuild_word</a> == NULL || word-><a class="code" href="a00650.html#a211ab5c5a13123ee7bdc7459d3363f9d">rebuild_word</a>-><a class="code" href="a00635.html#a52fba22521039a79d8c1c8182f602dcc">blobs</a>.<a class="code" href="a00403.html#afb0d8c52a603b6aa965d63bbf06a27dd">empty</a>())
223
<a name="l00121"></a>00121 <span class="keywordflow">return</span>;
224
<a name="l00122"></a>00122
225
<a name="l00123"></a>00123 <a class="code" href="a00356.html">DocQualCallbacks</a> cb(word);
226
<a name="l00124"></a>00124 word-><a class="code" href="a00650.html#af171e37da16ed628bf4516e9f934dc08">bln_boxes</a>-><a class="code" href="a00287.html#aabd2c8e3b6f8dda66725754fa034cc8e">ProcessMatchedBlobs</a>(
227
<a name="l00125"></a>00125 *word-><a class="code" href="a00650.html#a211ab5c5a13123ee7bdc7459d3363f9d">rebuild_word</a>,
228
<a name="l00126"></a>00126 <a class="code" href="a00856.html#ad000c6729bae0f97075ca10a0b5bf7d6">NewPermanentTessCallback</a>(&cb, &<a class="code" href="a00356.html#a4b494c625c5aa534c6fa265fb7fe828f">DocQualCallbacks::AcceptIfGoodQuality</a>));
229
<a name="l00127"></a>00127 }
230
<a name="l00128"></a>00128
231
<a name="l00129"></a><a class="code" href="a00607.html#a27dc89a077e5074d22c3c0f9cccb5047">00129</a> <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> <a class="code" href="a00607.html#a27dc89a077e5074d22c3c0f9cccb5047">Tesseract::count_outline_errs</a>(<span class="keywordtype">char</span> c, <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> outline_count) {
232
<a name="l00130"></a>00130 <span class="keywordtype">int</span> expected_outline_count;
233
<a name="l00131"></a>00131
234
<a name="l00132"></a>00132 <span class="keywordflow">if</span> (<a class="code" href="a00557.html">STRING</a> (<a class="code" href="a00607.html#a6c17b9a72b83394d218773f9a297b77a">outlines_odd</a>).contains (c))
235
<a name="l00133"></a>00133 <span class="keywordflow">return</span> 0; <span class="comment">//Dont use this char</span>
236
<a name="l00134"></a>00134 <span class="keywordflow">else</span> <span class="keywordflow">if</span> (<a class="code" href="a00557.html">STRING</a> (<a class="code" href="a00607.html#acb4b01dca8f419468e5ccf97c451e7cf">outlines_2</a>).contains (c))
237
<a name="l00135"></a>00135 expected_outline_count = 2;
238
<a name="l00136"></a>00136 <span class="keywordflow">else</span>
239
<a name="l00137"></a>00137 expected_outline_count = 1;
240
<a name="l00138"></a>00138 <span class="keywordflow">return</span> abs (outline_count - expected_outline_count);
241
<a name="l00139"></a>00139 }
242
<a name="l00140"></a>00140
243
<a name="l00141"></a><a class="code" href="a00607.html#a04a98a9da10e23072f55bc39ec88a12c">00141</a> <span class="keywordtype">void</span> <a class="code" href="a00607.html#a04a98a9da10e23072f55bc39ec88a12c">Tesseract::quality_based_rejection</a>(<a class="code" href="a00482.html">PAGE_RES_IT</a> &page_res_it,
244
<a name="l00142"></a>00142 <a class="code" href="a00831.html#a7712a7e28433d0ade59219a129549b6f">BOOL8</a> good_quality_doc) {
245
<a name="l00143"></a>00143 <span class="keywordflow">if</span> ((<a class="code" href="a00607.html#ab72bc025f4efb212ec99ee21091d7f10">tessedit_good_quality_unrej</a> && good_quality_doc))
246
<a name="l00144"></a>00144 <a class="code" href="a00607.html#af247aff07d2cc55480e36f278c5a76a8">unrej_good_quality_words</a>(page_res_it);
247
<a name="l00145"></a>00145 <a class="code" href="a00607.html#a72b538b58cc5af58b35a390c50086ab5">doc_and_block_rejection</a>(page_res_it, good_quality_doc);
248
<a name="l00146"></a>00146 <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a8a848e8a83e73f182d0ef7324afc741f">unlv_tilde_crunching</a>) {
249
<a name="l00147"></a>00147 <a class="code" href="a00607.html#a2c77d5d94321d7fc717b5376bcb02b79">tilde_crunch</a>(page_res_it);
250
<a name="l00148"></a>00148 <a class="code" href="a00607.html#a06a8ab9064d4c6da290c82e7adf53a8c">tilde_delete</a>(page_res_it);
251
<a name="l00149"></a>00149 }
252
<a name="l00150"></a>00150 }
253
<a name="l00151"></a>00151
254
<a name="l00152"></a>00152
255
<a name="l00153"></a>00153 <span class="comment">/*************************************************************************</span>
256
<a name="l00154"></a>00154 <span class="comment"> * unrej_good_quality_words()</span>
257
<a name="l00155"></a>00155 <span class="comment"> * Accept potential rejects in words which pass the following checks:</span>
258
<a name="l00156"></a>00156 <span class="comment"> * - Contains a potential reject</span>
259
<a name="l00157"></a>00157 <span class="comment"> * - Word looks like a sensible alpha word.</span>
260
<a name="l00158"></a>00158 <span class="comment"> * - Word segmentation is the same as the original image</span>
261
<a name="l00159"></a>00159 <span class="comment"> * - All characters have the expected number of outlines</span>
262
<a name="l00160"></a>00160 <span class="comment"> * NOTE - the rejection counts are recalculated after unrejection</span>
263
<a name="l00161"></a>00161 <span class="comment"> * - CANT do it in a single pass without a bit of fiddling</span>
264
<a name="l00162"></a>00162 <span class="comment"> * - keep it simple but inefficient</span>
265
<a name="l00163"></a>00163 <span class="comment"> *************************************************************************/</span>
266
<a name="l00164"></a><a class="code" href="a00607.html#af247aff07d2cc55480e36f278c5a76a8">00164</a> <span class="keywordtype">void</span> <a class="code" href="a00607.html#af247aff07d2cc55480e36f278c5a76a8">Tesseract::unrej_good_quality_words</a>( <span class="comment">//unreject potential</span>
267
<a name="l00165"></a>00165 <a class="code" href="a00482.html">PAGE_RES_IT</a> &page_res_it) {
268
<a name="l00166"></a>00166 <a class="code" href="a00650.html">WERD_RES</a> *word;
269
<a name="l00167"></a>00167 <a class="code" href="a00522.html">ROW_RES</a> *current_row;
270
<a name="l00168"></a>00168 <a class="code" href="a00283.html">BLOCK_RES</a> *current_block;
271
<a name="l00169"></a>00169 <span class="keywordtype">int</span> i;
272
<a name="l00170"></a>00170
273
<a name="l00171"></a>00171 page_res_it.<a class="code" href="a00482.html#a15febd45d0904bb3c5f7d506368177f0">restart_page</a> ();
274
<a name="l00172"></a>00172 <span class="keywordflow">while</span> (page_res_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a> () != NULL) {
275
<a name="l00173"></a>00173 <a class="code" href="a00607.html#a0751722a5b26971d680f2d23c6e65b95">check_debug_pt</a> (page_res_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a> (), 100);
276
<a name="l00174"></a>00174 <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a40dee9aa1298b5edcd042b9ec45aeb59">bland_unrej</a>) {
277
<a name="l00175"></a>00175 word = page_res_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a> ();
278
<a name="l00176"></a>00176 <span class="keywordflow">for</span> (i = 0; i < word-><a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#ab3760d96d63b5ca6c4bfeb32ffc8183c">length</a> (); i++) {
279
<a name="l00177"></a>00177 <span class="keywordflow">if</span> (word-><a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>[i].accept_if_good_quality ())
280
<a name="l00178"></a>00178 word-><a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>[i].setrej_quality_accept ();
281
<a name="l00179"></a>00179 }
282
<a name="l00180"></a>00180 page_res_it.<a class="code" href="a00482.html#ab29005f235cc738eebafb7d9ed6fe70b">forward</a> ();
283
<a name="l00181"></a>00181 }
284
<a name="l00182"></a>00182 <span class="keywordflow">else</span> <span class="keywordflow">if</span> ((page_res_it.<a class="code" href="a00482.html#a312f58081e9410962396160eaf5f90e4">row</a> ()-><a class="code" href="a00522.html#aad86ecebb53eb63c4038c5a95d932eea">char_count</a> > 0) &&
285
<a name="l00183"></a>00183 ((page_res_it.<a class="code" href="a00482.html#a312f58081e9410962396160eaf5f90e4">row</a> ()-><a class="code" href="a00522.html#afe7c692eeef6f934ef1c3bd208dd115c">rej_count</a> /
286
<a name="l00184"></a>00184 (float) page_res_it.<a class="code" href="a00482.html#a312f58081e9410962396160eaf5f90e4">row</a> ()-><a class="code" href="a00522.html#aad86ecebb53eb63c4038c5a95d932eea">char_count</a>) <=
287
<a name="l00185"></a>00185 <a class="code" href="a00607.html#a4a2be7907ac8c08a04d704b316b2ee1c">quality_rowrej_pc</a>)) {
288
<a name="l00186"></a>00186 word = page_res_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a> ();
289
<a name="l00187"></a>00187 <span class="keywordflow">if</span> (word-><a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#aabdc95245f033b2e393eccf79d4641e5">quality_recoverable_rejects</a>() &&
290
<a name="l00188"></a>00188 (<a class="code" href="a00607.html#aec01ad16a705e92500c6dc8f209ab9f8">tessedit_unrej_any_wd</a> ||
291
<a name="l00189"></a>00189 <a class="code" href="a00607.html#a51f2ad8b2d849d90bbcfbfca3067c0f1">acceptable_word_string</a>(*word-><a class="code" href="a00650.html#a053a2aeeddc26a93a41a04fa08dd688a">uch_set</a>,
292
<a name="l00190"></a>00190 word-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>(),
293
<a name="l00191"></a>00191 word-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a93ab94b6fa6d5c0469fb52d66c62968c">unichar_lengths</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>())
294
<a name="l00192"></a>00192 != <a class="code" href="a00671.html#af9ff5a7d7a1b9eeea7ce4106bffa7eeeaab0741a5a5ff9757ca343af255353ba0" title="Unacceptable word.">AC_UNACCEPTABLE</a>)) {
295
<a name="l00193"></a>00193 <a class="code" href="a00607.html#ab156ed65c76c10dadacfc1121624ebc2">unrej_good_chs</a>(word, page_res_it.<a class="code" href="a00482.html#a312f58081e9410962396160eaf5f90e4">row</a> ()-><a class="code" href="a00522.html#ad3a856f3e9217c47b8f2f54cd3908721">row</a>);
296
<a name="l00194"></a>00194 }
297
<a name="l00195"></a>00195 page_res_it.<a class="code" href="a00482.html#ab29005f235cc738eebafb7d9ed6fe70b">forward</a> ();
298
<a name="l00196"></a>00196 }
299
<a name="l00197"></a>00197 <span class="keywordflow">else</span> {
300
<a name="l00198"></a>00198 <span class="comment">/* Skip to end of dodgy row */</span>
301
<a name="l00199"></a>00199 current_row = page_res_it.<a class="code" href="a00482.html#a312f58081e9410962396160eaf5f90e4">row</a> ();
302
<a name="l00200"></a>00200 <span class="keywordflow">while</span> ((page_res_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a> () != NULL) &&
303
<a name="l00201"></a>00201 (page_res_it.<a class="code" href="a00482.html#a312f58081e9410962396160eaf5f90e4">row</a> () == current_row))
304
<a name="l00202"></a>00202 page_res_it.<a class="code" href="a00482.html#ab29005f235cc738eebafb7d9ed6fe70b">forward</a> ();
305
<a name="l00203"></a>00203 }
306
<a name="l00204"></a>00204 <a class="code" href="a00607.html#a0751722a5b26971d680f2d23c6e65b95">check_debug_pt</a> (page_res_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a> (), 110);
307
<a name="l00205"></a>00205 }
308
<a name="l00206"></a>00206 page_res_it.<a class="code" href="a00482.html#a15febd45d0904bb3c5f7d506368177f0">restart_page</a> ();
309
<a name="l00207"></a>00207 page_res_it.<a class="code" href="a00482.html#aab221a373111c4be685444b5633e22a5">page_res</a>-><a class="code" href="a00481.html#a489c2f236ddd8271ce74950abb2d0881">char_count</a> = 0;
310
<a name="l00208"></a>00208 page_res_it.<a class="code" href="a00482.html#aab221a373111c4be685444b5633e22a5">page_res</a>-><a class="code" href="a00481.html#a173220902fa987fe73708fae5f2c9560">rej_count</a> = 0;
311
<a name="l00209"></a>00209 current_block = NULL;
312
<a name="l00210"></a>00210 current_row = NULL;
313
<a name="l00211"></a>00211 <span class="keywordflow">while</span> (page_res_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a> () != NULL) {
314
<a name="l00212"></a>00212 <span class="keywordflow">if</span> (current_block != page_res_it.<a class="code" href="a00482.html#a8a349c5163802e1f127d5c88db044059">block</a> ()) {
315
<a name="l00213"></a>00213 current_block = page_res_it.<a class="code" href="a00482.html#a8a349c5163802e1f127d5c88db044059">block</a> ();
316
<a name="l00214"></a>00214 current_block-><a class="code" href="a00283.html#a9b394a8aa95834346c8bc02f0493933b">char_count</a> = 0;
317
<a name="l00215"></a>00215 current_block-><a class="code" href="a00283.html#a508dffc62c4a514859372e0a9a6530ca">rej_count</a> = 0;
318
<a name="l00216"></a>00216 }
319
<a name="l00217"></a>00217 <span class="keywordflow">if</span> (current_row != page_res_it.<a class="code" href="a00482.html#a312f58081e9410962396160eaf5f90e4">row</a> ()) {
320
<a name="l00218"></a>00218 current_row = page_res_it.<a class="code" href="a00482.html#a312f58081e9410962396160eaf5f90e4">row</a> ();
321
<a name="l00219"></a>00219 current_row-><a class="code" href="a00522.html#aad86ecebb53eb63c4038c5a95d932eea">char_count</a> = 0;
322
<a name="l00220"></a>00220 current_row-><a class="code" href="a00522.html#afe7c692eeef6f934ef1c3bd208dd115c">rej_count</a> = 0;
323
<a name="l00221"></a>00221 current_row-><a class="code" href="a00522.html#aee04e075058db382613b9fd2d6302d1a">whole_word_rej_count</a> = 0;
324
<a name="l00222"></a>00222 }
325
<a name="l00223"></a>00223 page_res_it.<a class="code" href="a00482.html#a1d1af03a63da4b7f551770e07ca86414">rej_stat_word</a> ();
326
<a name="l00224"></a>00224 page_res_it.<a class="code" href="a00482.html#ab29005f235cc738eebafb7d9ed6fe70b">forward</a> ();
327
<a name="l00225"></a>00225 }
328
<a name="l00226"></a>00226 }
329
<a name="l00227"></a>00227
330
<a name="l00228"></a>00228
331
<a name="l00229"></a>00229 <span class="comment">/*************************************************************************</span>
332
<a name="l00230"></a>00230 <span class="comment"> * doc_and_block_rejection()</span>
333
<a name="l00231"></a>00231 <span class="comment"> *</span>
334
<a name="l00232"></a>00232 <span class="comment"> * If the page has too many rejects - reject all of it.</span>
335
<a name="l00233"></a>00233 <span class="comment"> * If any block has too many rejects - reject all words in the block</span>
336
<a name="l00234"></a>00234 <span class="comment"> *************************************************************************/</span>
337
<a name="l00235"></a>00235
338
<a name="l00236"></a><a class="code" href="a00607.html#a72b538b58cc5af58b35a390c50086ab5">00236</a> <span class="keywordtype">void</span> <a class="code" href="a00607.html#a72b538b58cc5af58b35a390c50086ab5">Tesseract::doc_and_block_rejection</a>( <span class="comment">//reject big chunks</span>
339
<a name="l00237"></a>00237 <a class="code" href="a00482.html">PAGE_RES_IT</a> &page_res_it,
340
<a name="l00238"></a>00238 <a class="code" href="a00831.html#a7712a7e28433d0ade59219a129549b6f">BOOL8</a> good_quality_doc) {
341
<a name="l00239"></a>00239 <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> block_no = 0;
342
<a name="l00240"></a>00240 <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> row_no = 0;
343
<a name="l00241"></a>00241 <a class="code" href="a00283.html">BLOCK_RES</a> *current_block;
344
<a name="l00242"></a>00242 <a class="code" href="a00522.html">ROW_RES</a> *current_row;
345
<a name="l00243"></a>00243
346
<a name="l00244"></a>00244 <a class="code" href="a00831.html#a7712a7e28433d0ade59219a129549b6f">BOOL8</a> rej_word;
347
<a name="l00245"></a>00245 <a class="code" href="a00831.html#a7712a7e28433d0ade59219a129549b6f">BOOL8</a> prev_word_rejected;
348
<a name="l00246"></a>00246 <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> char_quality = 0;
349
<a name="l00247"></a>00247 <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> accepted_char_quality;
350
<a name="l00248"></a>00248
351
<a name="l00249"></a>00249 <span class="keywordflow">if</span> (page_res_it.<a class="code" href="a00482.html#aab221a373111c4be685444b5633e22a5">page_res</a>-><a class="code" href="a00481.html#a173220902fa987fe73708fae5f2c9560">rej_count</a> * 100.0 /
352
<a name="l00250"></a>00250 page_res_it.<a class="code" href="a00482.html#aab221a373111c4be685444b5633e22a5">page_res</a>-><a class="code" href="a00481.html#a489c2f236ddd8271ce74950abb2d0881">char_count</a> > <a class="code" href="a00607.html#a0c689f16fb64963ca30f7b0fddbea414">tessedit_reject_doc_percent</a>) {
353
<a name="l00251"></a>00251 <a class="code" href="a00677.html#a1b9934928e2b953ee5e91abb58cf043f">reject_whole_page</a>(page_res_it);
354
<a name="l00252"></a>00252 <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a9771a56b87e31325c043fa195d6885ec">tessedit_debug_doc_rejection</a>) {
355
<a name="l00253"></a>00253 <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a>(<span class="stringliteral">"REJECT ALL #chars: %d #Rejects: %d; \n"</span>,
356
<a name="l00254"></a>00254 page_res_it.<a class="code" href="a00482.html#aab221a373111c4be685444b5633e22a5">page_res</a>-><a class="code" href="a00481.html#a489c2f236ddd8271ce74950abb2d0881">char_count</a>,
357
<a name="l00255"></a>00255 page_res_it.<a class="code" href="a00482.html#aab221a373111c4be685444b5633e22a5">page_res</a>-><a class="code" href="a00481.html#a173220902fa987fe73708fae5f2c9560">rej_count</a>);
358
<a name="l00256"></a>00256 }
359
<a name="l00257"></a>00257 } <span class="keywordflow">else</span> {
360
<a name="l00258"></a>00258 <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a9771a56b87e31325c043fa195d6885ec">tessedit_debug_doc_rejection</a>) {
361
<a name="l00259"></a>00259 <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a>(<span class="stringliteral">"NO PAGE REJECTION #chars: %d # Rejects: %d; \n"</span>,
362
<a name="l00260"></a>00260 page_res_it.<a class="code" href="a00482.html#aab221a373111c4be685444b5633e22a5">page_res</a>-><a class="code" href="a00481.html#a489c2f236ddd8271ce74950abb2d0881">char_count</a>,
363
<a name="l00261"></a>00261 page_res_it.<a class="code" href="a00482.html#aab221a373111c4be685444b5633e22a5">page_res</a>-><a class="code" href="a00481.html#a173220902fa987fe73708fae5f2c9560">rej_count</a>);
364
<a name="l00262"></a>00262 }
365
<a name="l00263"></a>00263
366
<a name="l00264"></a>00264 <span class="comment">/* Walk blocks testing for block rejection */</span>
367
<a name="l00265"></a>00265
368
<a name="l00266"></a>00266 page_res_it.<a class="code" href="a00482.html#a15febd45d0904bb3c5f7d506368177f0">restart_page</a>();
369
<a name="l00267"></a>00267 <a class="code" href="a00650.html">WERD_RES</a>* word;
370
<a name="l00268"></a>00268 <span class="keywordflow">while</span> ((word = page_res_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a>()) != NULL) {
371
<a name="l00269"></a>00269 current_block = page_res_it.<a class="code" href="a00482.html#a8a349c5163802e1f127d5c88db044059">block</a>();
372
<a name="l00270"></a>00270 block_no = current_block-><a class="code" href="a00283.html#ae7c240d4878247ebdfe78433446751bf">block</a>-><a class="code" href="a00500.html#acc3cf97b51fa93a659312c966df4a756">index</a>();
373
<a name="l00271"></a>00271 <span class="keywordflow">if</span> (current_block-><a class="code" href="a00283.html#a9b394a8aa95834346c8bc02f0493933b">char_count</a> > 0 &&
374
<a name="l00272"></a>00272 (current_block-><a class="code" href="a00283.html#a508dffc62c4a514859372e0a9a6530ca">rej_count</a> * 100.0 / current_block-><a class="code" href="a00283.html#a9b394a8aa95834346c8bc02f0493933b">char_count</a>) >
375
<a name="l00273"></a>00273 <a class="code" href="a00607.html#ae1f650e4873b5842f2ae57a3055868c9">tessedit_reject_block_percent</a>) {
376
<a name="l00274"></a>00274 <span class="keywordflow">if</span> (<a class="code" href="a00607.html#afe2ceb83896ec93f0add94703dee79ce">tessedit_debug_block_rejection</a>) {
377
<a name="l00275"></a>00275 <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a>(<span class="stringliteral">"REJECTING BLOCK %d #chars: %d; #Rejects: %d\n"</span>,
378
<a name="l00276"></a>00276 block_no, current_block-><a class="code" href="a00283.html#a9b394a8aa95834346c8bc02f0493933b">char_count</a>,
379
<a name="l00277"></a>00277 current_block-><a class="code" href="a00283.html#a508dffc62c4a514859372e0a9a6530ca">rej_count</a>);
380
<a name="l00278"></a>00278 }
381
<a name="l00279"></a>00279 prev_word_rejected = FALSE;
382
<a name="l00280"></a>00280 <span class="keywordflow">while</span> ((word = page_res_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a>()) != NULL &&
383
<a name="l00281"></a>00281 (page_res_it.<a class="code" href="a00482.html#a8a349c5163802e1f127d5c88db044059">block</a>() == current_block)) {
384
<a name="l00282"></a>00282 <span class="keywordflow">if</span> (<a class="code" href="a00607.html#acf0428aa8b6b2390e5ba2c5f185b7f49">tessedit_preserve_blk_rej_perfect_wds</a>) {
385
<a name="l00283"></a>00283 rej_word = word-><a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#ac8e3f1806f06dfb616d76f68f1b96bc6">reject_count</a>() > 0 ||
386
<a name="l00284"></a>00284 word-><a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#ab3760d96d63b5ca6c4bfeb32ffc8183c">length</a> () < <a class="code" href="a00607.html#a98362ec97923314723dbeb56fd2528fd">tessedit_preserve_min_wd_len</a>;
387
<a name="l00285"></a>00285 <span class="keywordflow">if</span> (rej_word && <a class="code" href="a00607.html#a00cc24fc46a2c217ff7e614f4ac0db39">tessedit_dont_blkrej_good_wds</a> &&
388
<a name="l00286"></a>00286 word-><a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#ab3760d96d63b5ca6c4bfeb32ffc8183c">length</a>() >= <a class="code" href="a00607.html#a98362ec97923314723dbeb56fd2528fd">tessedit_preserve_min_wd_len</a> &&
389
<a name="l00287"></a>00287 <a class="code" href="a00607.html#a51f2ad8b2d849d90bbcfbfca3067c0f1">acceptable_word_string</a>(
390
<a name="l00288"></a>00288 *word-><a class="code" href="a00650.html#a053a2aeeddc26a93a41a04fa08dd688a">uch_set</a>,
391
<a name="l00289"></a>00289 word-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>(),
392
<a name="l00290"></a>00290 word-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a93ab94b6fa6d5c0469fb52d66c62968c">unichar_lengths</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>()) !=
393
<a name="l00291"></a>00291 <a class="code" href="a00671.html#af9ff5a7d7a1b9eeea7ce4106bffa7eeeaab0741a5a5ff9757ca343af255353ba0" title="Unacceptable word.">AC_UNACCEPTABLE</a>) {
394
<a name="l00292"></a>00292 <a class="code" href="a00607.html#a348314a50f786908e27bbaa3733d14d4">word_char_quality</a>(word, page_res_it.<a class="code" href="a00482.html#a312f58081e9410962396160eaf5f90e4">row</a>()-><a class="code" href="a00522.html#ad3a856f3e9217c47b8f2f54cd3908721">row</a>,
395
<a name="l00293"></a>00293 &char_quality,
396
<a name="l00294"></a>00294 &accepted_char_quality);
397
<a name="l00295"></a>00295 rej_word = char_quality != word-><a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#ab3760d96d63b5ca6c4bfeb32ffc8183c">length</a>();
398
<a name="l00296"></a>00296 }
399
<a name="l00297"></a>00297 } <span class="keywordflow">else</span> {
400
<a name="l00298"></a>00298 rej_word = TRUE;
401
<a name="l00299"></a>00299 }
402
<a name="l00300"></a>00300 <span class="keywordflow">if</span> (rej_word) {
403
<a name="l00301"></a>00301 <span class="comment">/*</span>
404
<a name="l00302"></a>00302 <span class="comment"> Reject spacing if both current and prev words are rejected.</span>
405
<a name="l00303"></a>00303 <span class="comment"> NOTE - this is NOT restricted to FUZZY spaces. - When tried this</span>
406
<a name="l00304"></a>00304 <span class="comment"> generated more space errors.</span>
407
<a name="l00305"></a>00305 <span class="comment"> */</span>
408
<a name="l00306"></a>00306 <span class="keywordflow">if</span> (<a class="code" href="a00607.html#aad38132f39f060b3bf998f316abb4d76">tessedit_use_reject_spaces</a> &&
409
<a name="l00307"></a>00307 prev_word_rejected &&
410
<a name="l00308"></a>00308 page_res_it.<a class="code" href="a00482.html#a67bcb98908ab79b0a00e1d0823208a23">prev_row</a>() == page_res_it.<a class="code" href="a00482.html#a312f58081e9410962396160eaf5f90e4">row</a>() &&
411
<a name="l00309"></a>00309 word-><a class="code" href="a00650.html#afc15fa7c1b5ad55d6772dea0d6865df1">word</a>-><a class="code" href="a00648.html#ab2979974cf23f5c66f4e77cc51f2e189">space</a>() == 1)
412
<a name="l00310"></a>00310 word-><a class="code" href="a00650.html#a6164dbe5d7d34658c64682be24ea257a">reject_spaces</a> = TRUE;
413
<a name="l00311"></a>00311 word-><a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#aa86752e4e9277f2a616df08459ea08e3">rej_word_block_rej</a>();
414
<a name="l00312"></a>00312 }
415
<a name="l00313"></a>00313 prev_word_rejected = rej_word;
416
<a name="l00314"></a>00314 page_res_it.<a class="code" href="a00482.html#ab29005f235cc738eebafb7d9ed6fe70b">forward</a>();
417
<a name="l00315"></a>00315 }
418
<a name="l00316"></a>00316 } <span class="keywordflow">else</span> {
419
<a name="l00317"></a>00317 <span class="keywordflow">if</span> (<a class="code" href="a00607.html#afe2ceb83896ec93f0add94703dee79ce">tessedit_debug_block_rejection</a>) {
420
<a name="l00318"></a>00318 <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a>(<span class="stringliteral">"NOT REJECTING BLOCK %d #chars: %d # Rejects: %d; \n"</span>,
421
<a name="l00319"></a>00319 block_no, page_res_it.<a class="code" href="a00482.html#a8a349c5163802e1f127d5c88db044059">block</a>()-><a class="code" href="a00283.html#a9b394a8aa95834346c8bc02f0493933b">char_count</a>,
422
<a name="l00320"></a>00320 page_res_it.<a class="code" href="a00482.html#a8a349c5163802e1f127d5c88db044059">block</a>()-><a class="code" href="a00283.html#a508dffc62c4a514859372e0a9a6530ca">rej_count</a>);
423
<a name="l00321"></a>00321 }
424
<a name="l00322"></a>00322
425
<a name="l00323"></a>00323 <span class="comment">/* Walk rows in block testing for row rejection */</span>
426
<a name="l00324"></a>00324 row_no = 0;
427
<a name="l00325"></a>00325 <span class="keywordflow">while</span> ((word = page_res_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a>()) != NULL &&
428
<a name="l00326"></a>00326 page_res_it.<a class="code" href="a00482.html#a8a349c5163802e1f127d5c88db044059">block</a>() == current_block) {
429
<a name="l00327"></a>00327 current_row = page_res_it.<a class="code" href="a00482.html#a312f58081e9410962396160eaf5f90e4">row</a>();
430
<a name="l00328"></a>00328 row_no++;
431
<a name="l00329"></a>00329 <span class="comment">/* Reject whole row if:</span>
432
<a name="l00330"></a>00330 <span class="comment"> fraction of chars on row which are rejected exceed a limit AND</span>
433
<a name="l00331"></a>00331 <span class="comment"> fraction rejects which occur in WHOLE WERD rejects is LESS THAN a</span>
434
<a name="l00332"></a>00332 <span class="comment"> limit</span>
435
<a name="l00333"></a>00333 <span class="comment"> */</span>
436
<a name="l00334"></a>00334 <span class="keywordflow">if</span> (current_row-><a class="code" href="a00522.html#aad86ecebb53eb63c4038c5a95d932eea">char_count</a> > 0 &&
437
<a name="l00335"></a>00335 (current_row-><a class="code" href="a00522.html#afe7c692eeef6f934ef1c3bd208dd115c">rej_count</a> * 100.0 / current_row-><a class="code" href="a00522.html#aad86ecebb53eb63c4038c5a95d932eea">char_count</a>) >
438
<a name="l00336"></a>00336 <a class="code" href="a00607.html#a73cfd99b1d3cd76679efa1a19a4e1d1a">tessedit_reject_row_percent</a> &&
439
<a name="l00337"></a>00337 (current_row-><a class="code" href="a00522.html#aee04e075058db382613b9fd2d6302d1a">whole_word_rej_count</a> * 100.0 /
440
<a name="l00338"></a>00338 current_row-><a class="code" href="a00522.html#afe7c692eeef6f934ef1c3bd208dd115c">rej_count</a>) <
441
<a name="l00339"></a>00339 <a class="code" href="a00607.html#a7124d6d656fc163de05f88b6e31e9681">tessedit_whole_wd_rej_row_percent</a>) {
442
<a name="l00340"></a>00340 <span class="keywordflow">if</span> (<a class="code" href="a00607.html#afe2ceb83896ec93f0add94703dee79ce">tessedit_debug_block_rejection</a>) {
443
<a name="l00341"></a>00341 <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a>(<span class="stringliteral">"REJECTING ROW %d #chars: %d; #Rejects: %d\n"</span>,
444
<a name="l00342"></a>00342 row_no, current_row-><a class="code" href="a00522.html#aad86ecebb53eb63c4038c5a95d932eea">char_count</a>,
445
<a name="l00343"></a>00343 current_row-><a class="code" href="a00522.html#afe7c692eeef6f934ef1c3bd208dd115c">rej_count</a>);
446
<a name="l00344"></a>00344 }
447
<a name="l00345"></a>00345 prev_word_rejected = FALSE;
448
<a name="l00346"></a>00346 <span class="keywordflow">while</span> ((word = page_res_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a>()) != NULL &&
449
<a name="l00347"></a>00347 page_res_it.<a class="code" href="a00482.html#a312f58081e9410962396160eaf5f90e4">row</a> () == current_row) {
450
<a name="l00348"></a>00348 <span class="comment">/* Preserve words on good docs unless they are mostly rejected*/</span>
451
<a name="l00349"></a>00349 <span class="keywordflow">if</span> (!<a class="code" href="a00607.html#ade40da20d4ed20ac262fb25b50757623">tessedit_row_rej_good_docs</a> && good_quality_doc) {
452
<a name="l00350"></a>00350 rej_word = word-><a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#ac8e3f1806f06dfb616d76f68f1b96bc6">reject_count</a>() /
453
<a name="l00351"></a>00351 <span class="keyword">static_cast<</span><span class="keywordtype">float</span><span class="keyword">></span>(word-><a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#ab3760d96d63b5ca6c4bfeb32ffc8183c">length</a>()) >
454
<a name="l00352"></a>00352 <a class="code" href="a00607.html#afe905eb6b5f607815ccff87755af5d30">tessedit_good_doc_still_rowrej_wd</a>;
455
<a name="l00353"></a>00353 } <span class="keywordflow">else</span> <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a1ac173c27cc204ef8efa50dc7476296a">tessedit_preserve_row_rej_perfect_wds</a>) {
456
<a name="l00354"></a>00354 <span class="comment">/* Preserve perfect words anyway */</span>
457
<a name="l00355"></a>00355 rej_word = word-><a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#ac8e3f1806f06dfb616d76f68f1b96bc6">reject_count</a>() > 0 ||
458
<a name="l00356"></a>00356 word-><a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#ab3760d96d63b5ca6c4bfeb32ffc8183c">length</a> () < <a class="code" href="a00607.html#a98362ec97923314723dbeb56fd2528fd">tessedit_preserve_min_wd_len</a>;
459
<a name="l00357"></a>00357 <span class="keywordflow">if</span> (rej_word && <a class="code" href="a00607.html#a1ede84f4a4f39a9b6f5376ea36907b98">tessedit_dont_rowrej_good_wds</a> &&
460
<a name="l00358"></a>00358 word-><a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#ab3760d96d63b5ca6c4bfeb32ffc8183c">length</a>() >= <a class="code" href="a00607.html#a98362ec97923314723dbeb56fd2528fd">tessedit_preserve_min_wd_len</a> &&
461
<a name="l00359"></a>00359 <a class="code" href="a00607.html#a51f2ad8b2d849d90bbcfbfca3067c0f1">acceptable_word_string</a>(*word-><a class="code" href="a00650.html#a053a2aeeddc26a93a41a04fa08dd688a">uch_set</a>,
462
<a name="l00360"></a>00360 word-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>(),
463
<a name="l00361"></a>00361 word-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a93ab94b6fa6d5c0469fb52d66c62968c">unichar_lengths</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>()) !=
464
<a name="l00362"></a>00362 <a class="code" href="a00671.html#af9ff5a7d7a1b9eeea7ce4106bffa7eeeaab0741a5a5ff9757ca343af255353ba0" title="Unacceptable word.">AC_UNACCEPTABLE</a>) {
465
<a name="l00363"></a>00363 <a class="code" href="a00607.html#a348314a50f786908e27bbaa3733d14d4">word_char_quality</a>(word, page_res_it.<a class="code" href="a00482.html#a312f58081e9410962396160eaf5f90e4">row</a>()-><a class="code" href="a00522.html#ad3a856f3e9217c47b8f2f54cd3908721">row</a>,
466
<a name="l00364"></a>00364 &char_quality,
467
<a name="l00365"></a>00365 &accepted_char_quality);
468
<a name="l00366"></a>00366 rej_word = char_quality != word-><a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#ab3760d96d63b5ca6c4bfeb32ffc8183c">length</a>();
469
<a name="l00367"></a>00367 }
470
<a name="l00368"></a>00368 } <span class="keywordflow">else</span> {
471
<a name="l00369"></a>00369 rej_word = TRUE;
472
<a name="l00370"></a>00370 }
473
<a name="l00371"></a>00371 <span class="keywordflow">if</span> (rej_word) {
474
<a name="l00372"></a>00372 <span class="comment">/*</span>
475
<a name="l00373"></a>00373 <span class="comment"> Reject spacing if both current and prev words are rejected.</span>
476
<a name="l00374"></a>00374 <span class="comment"> NOTE - this is NOT restricted to FUZZY spaces. - When tried</span>
477
<a name="l00375"></a>00375 <span class="comment"> this generated more space errors.</span>
478
<a name="l00376"></a>00376 <span class="comment"> */</span>
479
<a name="l00377"></a>00377 <span class="keywordflow">if</span> (<a class="code" href="a00607.html#aad38132f39f060b3bf998f316abb4d76">tessedit_use_reject_spaces</a> &&
480
<a name="l00378"></a>00378 prev_word_rejected &&
481
<a name="l00379"></a>00379 page_res_it.<a class="code" href="a00482.html#a67bcb98908ab79b0a00e1d0823208a23">prev_row</a>() == page_res_it.<a class="code" href="a00482.html#a312f58081e9410962396160eaf5f90e4">row</a>() &&
482
<a name="l00380"></a>00380 word-><a class="code" href="a00650.html#afc15fa7c1b5ad55d6772dea0d6865df1">word</a>-><a class="code" href="a00648.html#ab2979974cf23f5c66f4e77cc51f2e189">space</a> () == 1)
483
<a name="l00381"></a>00381 word-><a class="code" href="a00650.html#a6164dbe5d7d34658c64682be24ea257a">reject_spaces</a> = TRUE;
484
<a name="l00382"></a>00382 word-><a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#a03037d4a12a4ea33a377cf21c9400139">rej_word_row_rej</a>();
485
<a name="l00383"></a>00383 }
486
<a name="l00384"></a>00384 prev_word_rejected = rej_word;
487
<a name="l00385"></a>00385 page_res_it.<a class="code" href="a00482.html#ab29005f235cc738eebafb7d9ed6fe70b">forward</a>();
488
<a name="l00386"></a>00386 }
489
<a name="l00387"></a>00387 } <span class="keywordflow">else</span> {
490
<a name="l00388"></a>00388 <span class="keywordflow">if</span> (<a class="code" href="a00607.html#afe2ceb83896ec93f0add94703dee79ce">tessedit_debug_block_rejection</a>) {
491
<a name="l00389"></a>00389 <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a>(<span class="stringliteral">"NOT REJECTING ROW %d #chars: %d # Rejects: %d; \n"</span>,
492
<a name="l00390"></a>00390 row_no, current_row-><a class="code" href="a00522.html#aad86ecebb53eb63c4038c5a95d932eea">char_count</a>, current_row-><a class="code" href="a00522.html#afe7c692eeef6f934ef1c3bd208dd115c">rej_count</a>);
493
<a name="l00391"></a>00391 }
494
<a name="l00392"></a>00392 <span class="keywordflow">while</span> (page_res_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a>() != NULL &&
495
<a name="l00393"></a>00393 page_res_it.<a class="code" href="a00482.html#a312f58081e9410962396160eaf5f90e4">row</a>() == current_row)
496
<a name="l00394"></a>00394 page_res_it.<a class="code" href="a00482.html#ab29005f235cc738eebafb7d9ed6fe70b">forward</a>();
497
<a name="l00395"></a>00395 }
498
<a name="l00396"></a>00396 }
499
<a name="l00397"></a>00397 }
500
<a name="l00398"></a>00398 }
501
<a name="l00399"></a>00399 }
502
<a name="l00400"></a>00400 }
503
<a name="l00401"></a>00401
504
<a name="l00402"></a>00402 } <span class="comment">// namespace tesseract</span>
505
<a name="l00403"></a>00403
506
<a name="l00404"></a>00404
507
<a name="l00405"></a>00405 <span class="comment">/*************************************************************************</span>
508
<a name="l00406"></a>00406 <span class="comment"> * reject_whole_page()</span>
509
<a name="l00407"></a>00407 <span class="comment"> * Dont believe any of it - set the reject map to 00..00 in all words</span>
510
<a name="l00408"></a>00408 <span class="comment"> *</span>
511
<a name="l00409"></a>00409 <span class="comment"> *************************************************************************/</span>
512
<a name="l00410"></a>00410
513
<a name="l00411"></a><a class="code" href="a00678.html#a1b9934928e2b953ee5e91abb58cf043f">00411</a> <span class="keywordtype">void</span> <a class="code" href="a00677.html#a1b9934928e2b953ee5e91abb58cf043f">reject_whole_page</a>(<a class="code" href="a00482.html">PAGE_RES_IT</a> &page_res_it) {
514
<a name="l00412"></a>00412 page_res_it.<a class="code" href="a00482.html#a15febd45d0904bb3c5f7d506368177f0">restart_page</a> ();
515
<a name="l00413"></a>00413 <span class="keywordflow">while</span> (page_res_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a> () != NULL) {
516
<a name="l00414"></a>00414 page_res_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a> ()-><a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#a9fb4a691f24ea06370b55650c197db8b">rej_word_doc_rej</a> ();
517
<a name="l00415"></a>00415 page_res_it.<a class="code" href="a00482.html#ab29005f235cc738eebafb7d9ed6fe70b">forward</a> ();
518
<a name="l00416"></a>00416 }
519
<a name="l00417"></a>00417 <span class="comment">//whole page is rejected</span>
520
<a name="l00418"></a>00418 page_res_it.<a class="code" href="a00482.html#aab221a373111c4be685444b5633e22a5">page_res</a>-><a class="code" href="a00481.html#a65ed9c199c2ef8b3dc464225ad893e7d">rejected</a> = TRUE;
521
<a name="l00419"></a>00419 }
522
<a name="l00420"></a>00420
523
<a name="l00421"></a>00421 <span class="keyword">namespace </span><a class="code" href="a01266.html#afed58feacb84df2de88bdd613cfdba6d">tesseract</a> {
524
<a name="l00422"></a><a class="code" href="a00607.html#a2c77d5d94321d7fc717b5376bcb02b79">00422</a> <span class="keywordtype">void</span> <a class="code" href="a00607.html#a2c77d5d94321d7fc717b5376bcb02b79">Tesseract::tilde_crunch</a>(<a class="code" href="a00482.html">PAGE_RES_IT</a> &page_res_it) {
525
<a name="l00423"></a>00423 <a class="code" href="a00650.html">WERD_RES</a> *word;
526
<a name="l00424"></a>00424 <a class="code" href="a00678.html#ac4a411895d63e3bb0bea788e92894d42">GARBAGE_LEVEL</a> garbage_level;
527
<a name="l00425"></a>00425 <a class="code" href="a00482.html">PAGE_RES_IT</a> copy_it;
528
<a name="l00426"></a>00426 <a class="code" href="a00831.html#a7712a7e28433d0ade59219a129549b6f">BOOL8</a> prev_potential_marked = FALSE;
529
<a name="l00427"></a>00427 <a class="code" href="a00831.html#a7712a7e28433d0ade59219a129549b6f">BOOL8</a> found_terrible_word = FALSE;
530
<a name="l00428"></a>00428 <a class="code" href="a00831.html#a7712a7e28433d0ade59219a129549b6f">BOOL8</a> ok_dict_word;
531
<a name="l00429"></a>00429
532
<a name="l00430"></a>00430 page_res_it.<a class="code" href="a00482.html#a15febd45d0904bb3c5f7d506368177f0">restart_page</a>();
533
<a name="l00431"></a>00431 <span class="keywordflow">while</span> (page_res_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a>() != NULL) {
534
<a name="l00432"></a>00432 <a class="code" href="a00505.html">POLY_BLOCK</a>* pb = page_res_it.<a class="code" href="a00482.html#a8a349c5163802e1f127d5c88db044059">block</a>()-><a class="code" href="a00283.html#ae7c240d4878247ebdfe78433446751bf">block</a>-><a class="code" href="a00500.html#a6670779c69aca2d574e4a0590d9b3939">poly_block</a>();
535
<a name="l00433"></a>00433 <span class="keywordflow">if</span> (pb != NULL && !pb-><a class="code" href="a00505.html#abd32dee532afe634cdbacffc0b53e660">IsText</a>()) {
536
<a name="l00434"></a>00434 page_res_it.<a class="code" href="a00482.html#ab29005f235cc738eebafb7d9ed6fe70b">forward</a>();
537
<a name="l00435"></a>00435 <span class="keywordflow">continue</span>;
538
<a name="l00436"></a>00436 }
539
<a name="l00437"></a>00437 word = page_res_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a>();
540
<a name="l00438"></a>00438
541
<a name="l00439"></a>00439 <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a49a94ed873a2ddeea43f4b12a8774222">crunch_early_convert_bad_unlv_chs</a>)
542
<a name="l00440"></a>00440 <a class="code" href="a00607.html#a81b3e2c28a090f5a877dd0f67a4080b6">convert_bad_unlv_chs</a>(word);
543
<a name="l00441"></a>00441
544
<a name="l00442"></a>00442 <span class="keywordflow">if</span> (<a class="code" href="a00607.html#aefd718b3d3abe04fba6ff083019f8374">crunch_early_merge_tess_fails</a>)
545
<a name="l00443"></a>00443 word-><a class="code" href="a00650.html#af092edf58a4a718687aa35c6a458cada">merge_tess_fails</a>();
546
<a name="l00444"></a>00444
547
<a name="l00445"></a>00445 <span class="keywordflow">if</span> (word-><a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#aec0cef1513cd939f0db5f5436f8fdbc6">accept_count</a> () != 0) {
548
<a name="l00446"></a>00446 found_terrible_word = FALSE;
549
<a name="l00447"></a>00447 <span class="comment">//Forget earlier potential crunches</span>
550
<a name="l00448"></a>00448 prev_potential_marked = FALSE;
551
<a name="l00449"></a>00449 }
552
<a name="l00450"></a>00450 <span class="keywordflow">else</span> {
553
<a name="l00451"></a>00451 ok_dict_word = <a class="code" href="a00607.html#a5c4eea7412e972bf2fdd2638315e5457">safe_dict_word</a>(word);
554
<a name="l00452"></a>00452 garbage_level = <a class="code" href="a00607.html#afad44e1105d7f57e1a99d9076c4a3b25">garbage_word</a> (word, ok_dict_word);
555
<a name="l00453"></a>00453
556
<a name="l00454"></a>00454 <span class="keywordflow">if</span> ((garbage_level != <a class="code" href="a00678.html#ac4a411895d63e3bb0bea788e92894d42ac744add072d392855d098a995acb3751">G_NEVER_CRUNCH</a>) &&
557
<a name="l00455"></a>00455 (<a class="code" href="a00607.html#a12c9ef89f1b328cb272cae894895b2c7">terrible_word_crunch</a> (word, garbage_level))) {
558
<a name="l00456"></a>00456 <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a20a1206ce17b07676f829fdee0034eb5">crunch_debug</a> > 0) {
559
<a name="l00457"></a>00457 <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a> (<span class="stringliteral">"T CRUNCHING: \"%s\"\n"</span>,
560
<a name="l00458"></a>00458 word-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>());
561
<a name="l00459"></a>00459 }
562
<a name="l00460"></a>00460 word-><a class="code" href="a00650.html#a82d32a9f59bc1c08da9537303c56615b">unlv_crunch_mode</a> = <a class="code" href="a00769.html#affc9a96d31e23e645355e534f11c40e9ad0198ebd5b9e3299dd94f0c5f4d3e003">CR_KEEP_SPACE</a>;
563
<a name="l00461"></a>00461 <span class="keywordflow">if</span> (prev_potential_marked) {
564
<a name="l00462"></a>00462 <span class="keywordflow">while</span> (copy_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a> () != word) {
565
<a name="l00463"></a>00463 <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a20a1206ce17b07676f829fdee0034eb5">crunch_debug</a> > 0) {
566
<a name="l00464"></a>00464 <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a> (<span class="stringliteral">"P1 CRUNCHING: \"%s\"\n"</span>,
567
<a name="l00465"></a>00465 copy_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a>()-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>());
568
<a name="l00466"></a>00466 }
569
<a name="l00467"></a>00467 copy_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a> ()-><a class="code" href="a00650.html#a82d32a9f59bc1c08da9537303c56615b">unlv_crunch_mode</a> = <a class="code" href="a00769.html#affc9a96d31e23e645355e534f11c40e9ad0198ebd5b9e3299dd94f0c5f4d3e003">CR_KEEP_SPACE</a>;
570
<a name="l00468"></a>00468 copy_it.<a class="code" href="a00482.html#ab29005f235cc738eebafb7d9ed6fe70b">forward</a> ();
571
<a name="l00469"></a>00469 }
572
<a name="l00470"></a>00470 prev_potential_marked = FALSE;
573
<a name="l00471"></a>00471 }
574
<a name="l00472"></a>00472 found_terrible_word = TRUE;
575
<a name="l00473"></a>00473 }
576
<a name="l00474"></a>00474 <span class="keywordflow">else</span> <span class="keywordflow">if</span> ((garbage_level != <a class="code" href="a00678.html#ac4a411895d63e3bb0bea788e92894d42ac744add072d392855d098a995acb3751">G_NEVER_CRUNCH</a>) &&
577
<a name="l00475"></a>00475 (<a class="code" href="a00607.html#a042a9bb2b0053f825baf0825addd54fe">potential_word_crunch</a> (word,
578
<a name="l00476"></a>00476 garbage_level, ok_dict_word))) {
579
<a name="l00477"></a>00477 <span class="keywordflow">if</span> (found_terrible_word) {
580
<a name="l00478"></a>00478 <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a20a1206ce17b07676f829fdee0034eb5">crunch_debug</a> > 0) {
581
<a name="l00479"></a>00479 <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a> (<span class="stringliteral">"P2 CRUNCHING: \"%s\"\n"</span>,
582
<a name="l00480"></a>00480 word-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>());
583
<a name="l00481"></a>00481 }
584
<a name="l00482"></a>00482 word-><a class="code" href="a00650.html#a82d32a9f59bc1c08da9537303c56615b">unlv_crunch_mode</a> = <a class="code" href="a00769.html#affc9a96d31e23e645355e534f11c40e9ad0198ebd5b9e3299dd94f0c5f4d3e003">CR_KEEP_SPACE</a>;
585
<a name="l00483"></a>00483 }
586
<a name="l00484"></a>00484 <span class="keywordflow">else</span> <span class="keywordflow">if</span> (!prev_potential_marked) {
587
<a name="l00485"></a>00485 copy_it = page_res_it;
588
<a name="l00486"></a>00486 prev_potential_marked = TRUE;
589
<a name="l00487"></a>00487 <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a20a1206ce17b07676f829fdee0034eb5">crunch_debug</a> > 1) {
590
<a name="l00488"></a>00488 <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a> (<span class="stringliteral">"P3 CRUNCHING: \"%s\"\n"</span>,
591
<a name="l00489"></a>00489 word-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>());
592
<a name="l00490"></a>00490 }
593
<a name="l00491"></a>00491 }
594
<a name="l00492"></a>00492 }
595
<a name="l00493"></a>00493 <span class="keywordflow">else</span> {
596
<a name="l00494"></a>00494 found_terrible_word = FALSE;
597
<a name="l00495"></a>00495 <span class="comment">//Forget earlier potential crunches</span>
598
<a name="l00496"></a>00496 prev_potential_marked = FALSE;
599
<a name="l00497"></a>00497 <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a20a1206ce17b07676f829fdee0034eb5">crunch_debug</a> > 2) {
600
<a name="l00498"></a>00498 <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a> (<span class="stringliteral">"NO CRUNCH: \"%s\"\n"</span>,
601
<a name="l00499"></a>00499 word-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>());
602
<a name="l00500"></a>00500 }
603
<a name="l00501"></a>00501 }
604
<a name="l00502"></a>00502 }
605
<a name="l00503"></a>00503 page_res_it.<a class="code" href="a00482.html#ab29005f235cc738eebafb7d9ed6fe70b">forward</a> ();
606
<a name="l00504"></a>00504 }
607
<a name="l00505"></a>00505 }
608
<a name="l00506"></a>00506
609
<a name="l00507"></a>00507
610
<a name="l00508"></a><a class="code" href="a00607.html#a12c9ef89f1b328cb272cae894895b2c7">00508</a> <a class="code" href="a00831.html#a7712a7e28433d0ade59219a129549b6f">BOOL8</a> <a class="code" href="a00607.html#a12c9ef89f1b328cb272cae894895b2c7">Tesseract::terrible_word_crunch</a>(<a class="code" href="a00650.html">WERD_RES</a> *word,
611
<a name="l00509"></a>00509 <a class="code" href="a00678.html#ac4a411895d63e3bb0bea788e92894d42">GARBAGE_LEVEL</a> garbage_level) {
612
<a name="l00510"></a>00510 <span class="keywordtype">float</span> rating_per_ch;
613
<a name="l00511"></a>00511 <span class="keywordtype">int</span> adjusted_len;
614
<a name="l00512"></a>00512 <span class="keywordtype">int</span> crunch_mode = 0;
615
<a name="l00513"></a>00513
616
<a name="l00514"></a>00514 <span class="keywordflow">if</span> ((word-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a168505a533eda60219c93e25adf4cdc5">length</a> () == 0) ||
617
<a name="l00515"></a>00515 (strspn (word-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>(), <span class="stringliteral">" "</span>) ==
618
<a name="l00516"></a>00516 word-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a168505a533eda60219c93e25adf4cdc5">length</a> ()))
619
<a name="l00517"></a>00517 crunch_mode = 1;
620
<a name="l00518"></a>00518 <span class="keywordflow">else</span> {
621
<a name="l00519"></a>00519 adjusted_len = word-><a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#ab3760d96d63b5ca6c4bfeb32ffc8183c">length</a> ();
622
<a name="l00520"></a>00520 <span class="keywordflow">if</span> (adjusted_len > <a class="code" href="a00607.html#a69c6af52a20db78eb5bfe14a1bc70162">crunch_rating_max</a>)
623
<a name="l00521"></a>00521 adjusted_len = <a class="code" href="a00607.html#a69c6af52a20db78eb5bfe14a1bc70162">crunch_rating_max</a>;
624
<a name="l00522"></a>00522 rating_per_ch = word-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a52ea24dcb5ff15ad8aee3fd774622216">rating</a> () / adjusted_len;
625
<a name="l00523"></a>00523
626
<a name="l00524"></a>00524 <span class="keywordflow">if</span> (rating_per_ch > <a class="code" href="a00607.html#a21a76e19dd03c1f03aad1b922fa3b21a">crunch_terrible_rating</a>)
627
<a name="l00525"></a>00525 crunch_mode = 2;
628
<a name="l00526"></a>00526 <span class="keywordflow">else</span> <span class="keywordflow">if</span> (<a class="code" href="a00607.html#ae69c2f057f0474b533183f618e27d447">crunch_terrible_garbage</a> && (garbage_level == <a class="code" href="a00678.html#ac4a411895d63e3bb0bea788e92894d42a2400fb7991d9c94896464522e10c49ad">G_TERRIBLE</a>))
629
<a name="l00527"></a>00527 crunch_mode = 3;
630
<a name="l00528"></a>00528 <span class="keywordflow">else</span> <span class="keywordflow">if</span> ((word-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a1b25ef2b44d21a7204483a7ca804f293">certainty</a> () < <a class="code" href="a00607.html#aa3b5f0beacd8d0d328e66dfe8b850187">crunch_poor_garbage_cert</a>) &&
631
<a name="l00529"></a>00529 (garbage_level != <a class="code" href="a00678.html#ac4a411895d63e3bb0bea788e92894d42a46d03f2b7d080cc8319a43ef4bad82b3">G_OK</a>))
632
<a name="l00530"></a>00530 crunch_mode = 4;
633
<a name="l00531"></a>00531 <span class="keywordflow">else</span> <span class="keywordflow">if</span> ((rating_per_ch > <a class="code" href="a00607.html#aec4f861a8e55288700ee622d3e885540">crunch_poor_garbage_rate</a>) &&
634
<a name="l00532"></a>00532 (garbage_level != <a class="code" href="a00678.html#ac4a411895d63e3bb0bea788e92894d42a46d03f2b7d080cc8319a43ef4bad82b3">G_OK</a>))
635
<a name="l00533"></a>00533 crunch_mode = 5;
636
<a name="l00534"></a>00534 }
637
<a name="l00535"></a>00535 <span class="keywordflow">if</span> (crunch_mode > 0) {
638
<a name="l00536"></a>00536 <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a20a1206ce17b07676f829fdee0034eb5">crunch_debug</a> > 2) {
639
<a name="l00537"></a>00537 <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a> (<span class="stringliteral">"Terrible_word_crunch (%d) on \"%s\"\n"</span>,
640
<a name="l00538"></a>00538 crunch_mode, word-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>());
641
<a name="l00539"></a>00539 }
642
<a name="l00540"></a>00540 <span class="keywordflow">return</span> TRUE;
643
<a name="l00541"></a>00541 }
644
<a name="l00542"></a>00542 <span class="keywordflow">else</span>
645
<a name="l00543"></a>00543 <span class="keywordflow">return</span> FALSE;
646
<a name="l00544"></a>00544 }
647
<a name="l00545"></a>00545
648
<a name="l00546"></a><a class="code" href="a00607.html#a042a9bb2b0053f825baf0825addd54fe">00546</a> <a class="code" href="a00831.html#a7712a7e28433d0ade59219a129549b6f">BOOL8</a> <a class="code" href="a00607.html#a042a9bb2b0053f825baf0825addd54fe">Tesseract::potential_word_crunch</a>(<a class="code" href="a00650.html">WERD_RES</a> *word,
649
<a name="l00547"></a>00547 <a class="code" href="a00678.html#ac4a411895d63e3bb0bea788e92894d42">GARBAGE_LEVEL</a> garbage_level,
650
<a name="l00548"></a>00548 <a class="code" href="a00831.html#a7712a7e28433d0ade59219a129549b6f">BOOL8</a> ok_dict_word) {
651
<a name="l00549"></a>00549 <span class="keywordtype">float</span> rating_per_ch;
652
<a name="l00550"></a>00550 <span class="keywordtype">int</span> adjusted_len;
653
<a name="l00551"></a>00551 <span class="keyword">const</span> <span class="keywordtype">char</span> *str = word-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>();
654
<a name="l00552"></a>00552 <span class="keyword">const</span> <span class="keywordtype">char</span> *lengths = word-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a93ab94b6fa6d5c0469fb52d66c62968c">unichar_lengths</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>();
655
<a name="l00553"></a>00553 <a class="code" href="a00831.html#a7712a7e28433d0ade59219a129549b6f">BOOL8</a> word_crunchable;
656
<a name="l00554"></a>00554 <span class="keywordtype">int</span> poor_indicator_count = 0;
657
<a name="l00555"></a>00555
658
<a name="l00556"></a>00556 word_crunchable = !<a class="code" href="a00607.html#a3b9dd1d5d1a309b0ce4f1ef34c0a8a2a">crunch_leave_accept_strings</a> ||
659
<a name="l00557"></a>00557 word-><a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#ab3760d96d63b5ca6c4bfeb32ffc8183c">length</a>() < 3 ||
660
<a name="l00558"></a>00558 (<a class="code" href="a00607.html#a51f2ad8b2d849d90bbcfbfca3067c0f1">acceptable_word_string</a>(*word-><a class="code" href="a00650.html#a053a2aeeddc26a93a41a04fa08dd688a">uch_set</a>,
661
<a name="l00559"></a>00559 str, lengths) == <a class="code" href="a00671.html#af9ff5a7d7a1b9eeea7ce4106bffa7eeeaab0741a5a5ff9757ca343af255353ba0" title="Unacceptable word.">AC_UNACCEPTABLE</a> &&
662
<a name="l00560"></a>00560 !ok_dict_word);
663
<a name="l00561"></a>00561
664
<a name="l00562"></a>00562 adjusted_len = word-><a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#ab3760d96d63b5ca6c4bfeb32ffc8183c">length</a>();
665
<a name="l00563"></a>00563 <span class="keywordflow">if</span> (adjusted_len > 10)
666
<a name="l00564"></a>00564 adjusted_len = 10;
667
<a name="l00565"></a>00565 rating_per_ch = word-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a52ea24dcb5ff15ad8aee3fd774622216">rating</a>() / adjusted_len;
668
<a name="l00566"></a>00566
669
<a name="l00567"></a>00567 <span class="keywordflow">if</span> (rating_per_ch > <a class="code" href="a00607.html#ae98cef95f80d5939eaf80b123fd3c402">crunch_pot_poor_rate</a>) {
670
<a name="l00568"></a>00568 <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a20a1206ce17b07676f829fdee0034eb5">crunch_debug</a> > 2) {
671
<a name="l00569"></a>00569 <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a>(<span class="stringliteral">"Potential poor rating on \"%s\"\n"</span>,
672
<a name="l00570"></a>00570 word-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>());
673
<a name="l00571"></a>00571 }
674
<a name="l00572"></a>00572 poor_indicator_count++;
675
<a name="l00573"></a>00573 }
676
<a name="l00574"></a>00574
677
<a name="l00575"></a>00575 <span class="keywordflow">if</span> (word_crunchable &&
678
<a name="l00576"></a>00576 word-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a1b25ef2b44d21a7204483a7ca804f293">certainty</a>() < <a class="code" href="a00607.html#aee487bb533118fcd5f1994e54ba86fff">crunch_pot_poor_cert</a>) {
679
<a name="l00577"></a>00577 <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a20a1206ce17b07676f829fdee0034eb5">crunch_debug</a> > 2) {
680
<a name="l00578"></a>00578 <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a>(<span class="stringliteral">"Potential poor cert on \"%s\"\n"</span>,
681
<a name="l00579"></a>00579 word-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>());
682
<a name="l00580"></a>00580 }
683
<a name="l00581"></a>00581 poor_indicator_count++;
684
<a name="l00582"></a>00582 }
685
<a name="l00583"></a>00583
686
<a name="l00584"></a>00584 <span class="keywordflow">if</span> (garbage_level != <a class="code" href="a00678.html#ac4a411895d63e3bb0bea788e92894d42a46d03f2b7d080cc8319a43ef4bad82b3">G_OK</a>) {
687
<a name="l00585"></a>00585 <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a20a1206ce17b07676f829fdee0034eb5">crunch_debug</a> > 2) {
688
<a name="l00586"></a>00586 <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a>(<span class="stringliteral">"Potential garbage on \"%s\"\n"</span>,
689
<a name="l00587"></a>00587 word-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>());
690
<a name="l00588"></a>00588 }
691
<a name="l00589"></a>00589 poor_indicator_count++;
692
<a name="l00590"></a>00590 }
693
<a name="l00591"></a>00591 <span class="keywordflow">return</span> poor_indicator_count >= <a class="code" href="a00607.html#af5c0420627cd4d8b1273916d8b86c354">crunch_pot_indicators</a>;
694
<a name="l00592"></a>00592 }
695
<a name="l00593"></a>00593
696
<a name="l00594"></a><a class="code" href="a00607.html#a06a8ab9064d4c6da290c82e7adf53a8c">00594</a> <span class="keywordtype">void</span> <a class="code" href="a00607.html#a06a8ab9064d4c6da290c82e7adf53a8c">Tesseract::tilde_delete</a>(<a class="code" href="a00482.html">PAGE_RES_IT</a> &page_res_it) {
697
<a name="l00595"></a>00595 <a class="code" href="a00650.html">WERD_RES</a> *word;
698
<a name="l00596"></a>00596 <a class="code" href="a00482.html">PAGE_RES_IT</a> copy_it;
699
<a name="l00597"></a>00597 <a class="code" href="a00831.html#a7712a7e28433d0ade59219a129549b6f">BOOL8</a> deleting_from_bol = FALSE;
700
<a name="l00598"></a>00598 <a class="code" href="a00831.html#a7712a7e28433d0ade59219a129549b6f">BOOL8</a> marked_delete_point = FALSE;
701
<a name="l00599"></a>00599 <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> debug_delete_mode;
702
<a name="l00600"></a>00600 <a class="code" href="a00769.html#affc9a96d31e23e645355e534f11c40e9">CRUNCH_MODE</a> delete_mode;
703
<a name="l00601"></a>00601 <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> x_debug_delete_mode;
704
<a name="l00602"></a>00602 <a class="code" href="a00769.html#affc9a96d31e23e645355e534f11c40e9">CRUNCH_MODE</a> x_delete_mode;
705
<a name="l00603"></a>00603
706
<a name="l00604"></a>00604 page_res_it.<a class="code" href="a00482.html#a15febd45d0904bb3c5f7d506368177f0">restart_page</a>();
707
<a name="l00605"></a>00605 <span class="keywordflow">while</span> (page_res_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a>() != NULL) {
708
<a name="l00606"></a>00606 word = page_res_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a>();
709
<a name="l00607"></a>00607
710
<a name="l00608"></a>00608 delete_mode = <a class="code" href="a00607.html#a42d3b5e26902af46296cfdc24bb4b195">word_deletable</a> (word, debug_delete_mode);
711
<a name="l00609"></a>00609 <span class="keywordflow">if</span> (delete_mode != <a class="code" href="a00769.html#affc9a96d31e23e645355e534f11c40e9a1eac25752f2c55f1809d11d4ef917ff9">CR_NONE</a>) {
712
<a name="l00610"></a>00610 <span class="keywordflow">if</span> (word-><a class="code" href="a00650.html#afc15fa7c1b5ad55d6772dea0d6865df1">word</a>-><a class="code" href="a00648.html#a81edde8597a3d9fd8a664d703d332c41">flag</a> (<a class="code" href="a00804.html#ad6968adbf8f2cc44adf333ec96efb0beaff21d7c8c8992120fff35942408a00d2">W_BOL</a>) || deleting_from_bol) {
713
<a name="l00611"></a>00611 <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a20a1206ce17b07676f829fdee0034eb5">crunch_debug</a> > 0) {
714
<a name="l00612"></a>00612 <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a> (<span class="stringliteral">"BOL CRUNCH DELETING(%d): \"%s\"\n"</span>,
715
<a name="l00613"></a>00613 debug_delete_mode,
716
<a name="l00614"></a>00614 word-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>());
717
<a name="l00615"></a>00615 }
718
<a name="l00616"></a>00616 word-><a class="code" href="a00650.html#a82d32a9f59bc1c08da9537303c56615b">unlv_crunch_mode</a> = delete_mode;
719
<a name="l00617"></a>00617 deleting_from_bol = TRUE;
720
<a name="l00618"></a>00618 } <span class="keywordflow">else</span> <span class="keywordflow">if</span> (word-><a class="code" href="a00650.html#afc15fa7c1b5ad55d6772dea0d6865df1">word</a>-><a class="code" href="a00648.html#a81edde8597a3d9fd8a664d703d332c41">flag</a>(<a class="code" href="a00804.html#ad6968adbf8f2cc44adf333ec96efb0bea62efb985a62d85e014ee5ab039dd50ce">W_EOL</a>)) {
721
<a name="l00619"></a>00619 <span class="keywordflow">if</span> (marked_delete_point) {
722
<a name="l00620"></a>00620 <span class="keywordflow">while</span> (copy_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a>() != word) {
723
<a name="l00621"></a>00621 x_delete_mode = <a class="code" href="a00607.html#a42d3b5e26902af46296cfdc24bb4b195">word_deletable</a> (copy_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a> (),
724
<a name="l00622"></a>00622 x_debug_delete_mode);
725
<a name="l00623"></a>00623 <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a20a1206ce17b07676f829fdee0034eb5">crunch_debug</a> > 0) {
726
<a name="l00624"></a>00624 <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a> (<span class="stringliteral">"EOL CRUNCH DELETING(%d): \"%s\"\n"</span>,
727
<a name="l00625"></a>00625 x_debug_delete_mode,
728
<a name="l00626"></a>00626 copy_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a>()-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>());
729
<a name="l00627"></a>00627 }
730
<a name="l00628"></a>00628 copy_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a> ()-><a class="code" href="a00650.html#a82d32a9f59bc1c08da9537303c56615b">unlv_crunch_mode</a> = x_delete_mode;
731
<a name="l00629"></a>00629 copy_it.<a class="code" href="a00482.html#ab29005f235cc738eebafb7d9ed6fe70b">forward</a> ();
732
<a name="l00630"></a>00630 }
733
<a name="l00631"></a>00631 }
734
<a name="l00632"></a>00632 <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a20a1206ce17b07676f829fdee0034eb5">crunch_debug</a> > 0) {
735
<a name="l00633"></a>00633 <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a> (<span class="stringliteral">"EOL CRUNCH DELETING(%d): \"%s\"\n"</span>,
736
<a name="l00634"></a>00634 debug_delete_mode,
737
<a name="l00635"></a>00635 word-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>());
738
<a name="l00636"></a>00636 }
739
<a name="l00637"></a>00637 word-><a class="code" href="a00650.html#a82d32a9f59bc1c08da9537303c56615b">unlv_crunch_mode</a> = delete_mode;
740
<a name="l00638"></a>00638 deleting_from_bol = FALSE;
741
<a name="l00639"></a>00639 marked_delete_point = FALSE;
742
<a name="l00640"></a>00640 }
743
<a name="l00641"></a>00641 <span class="keywordflow">else</span> {
744
<a name="l00642"></a>00642 <span class="keywordflow">if</span> (!marked_delete_point) {
745
<a name="l00643"></a>00643 copy_it = page_res_it;
746
<a name="l00644"></a>00644 marked_delete_point = TRUE;
747
<a name="l00645"></a>00645 }
748
<a name="l00646"></a>00646 }
749
<a name="l00647"></a>00647 }
750
<a name="l00648"></a>00648 <span class="keywordflow">else</span> {
751
<a name="l00649"></a>00649 deleting_from_bol = FALSE;
752
<a name="l00650"></a>00650 <span class="comment">//Forget earlier potential crunches</span>
753
<a name="l00651"></a>00651 marked_delete_point = FALSE;
754
<a name="l00652"></a>00652 }
755
<a name="l00653"></a>00653 <span class="comment">/*</span>
756
<a name="l00654"></a>00654 <span class="comment"> The following step has been left till now as the tess fails are used to</span>
757
<a name="l00655"></a>00655 <span class="comment"> determine if the word is deletable.</span>
758
<a name="l00656"></a>00656 <span class="comment"> */</span>
759
<a name="l00657"></a>00657 <span class="keywordflow">if</span> (!<a class="code" href="a00607.html#aefd718b3d3abe04fba6ff083019f8374">crunch_early_merge_tess_fails</a>)
760
<a name="l00658"></a>00658 word-><a class="code" href="a00650.html#af092edf58a4a718687aa35c6a458cada">merge_tess_fails</a>();
761
<a name="l00659"></a>00659 page_res_it.<a class="code" href="a00482.html#ab29005f235cc738eebafb7d9ed6fe70b">forward</a> ();
762
<a name="l00660"></a>00660 }
763
<a name="l00661"></a>00661 }
764
<a name="l00662"></a>00662
765
<a name="l00663"></a>00663
766
<a name="l00664"></a><a class="code" href="a00607.html#a81b3e2c28a090f5a877dd0f67a4080b6">00664</a> <span class="keywordtype">void</span> <a class="code" href="a00607.html#a81b3e2c28a090f5a877dd0f67a4080b6">Tesseract::convert_bad_unlv_chs</a>(<a class="code" href="a00650.html">WERD_RES</a> *word_res) {
767
<a name="l00665"></a>00665 <span class="keywordtype">int</span> i;
768
<a name="l00666"></a>00666 <a class="code" href="a00862.html#a8578b19fa1ff5ca75080db1a18ecc32e">UNICHAR_ID</a> unichar_dash = word_res-><a class="code" href="a00650.html#a053a2aeeddc26a93a41a04fa08dd688a">uch_set</a>-><a class="code" href="a00642.html#ae786fa834962ef4d6dbf8300c6c5d456">unichar_to_id</a>(<span class="stringliteral">"-"</span>);
769
<a name="l00667"></a>00667 <a class="code" href="a00862.html#a8578b19fa1ff5ca75080db1a18ecc32e">UNICHAR_ID</a> unichar_space = word_res-><a class="code" href="a00650.html#a053a2aeeddc26a93a41a04fa08dd688a">uch_set</a>-><a class="code" href="a00642.html#ae786fa834962ef4d6dbf8300c6c5d456">unichar_to_id</a>(<span class="stringliteral">" "</span>);
770
<a name="l00668"></a>00668 <a class="code" href="a00862.html#a8578b19fa1ff5ca75080db1a18ecc32e">UNICHAR_ID</a> unichar_tilde = word_res-><a class="code" href="a00650.html#a053a2aeeddc26a93a41a04fa08dd688a">uch_set</a>-><a class="code" href="a00642.html#ae786fa834962ef4d6dbf8300c6c5d456">unichar_to_id</a>(<span class="stringliteral">"~"</span>);
771
<a name="l00669"></a>00669 <a class="code" href="a00862.html#a8578b19fa1ff5ca75080db1a18ecc32e">UNICHAR_ID</a> unichar_pow = word_res-><a class="code" href="a00650.html#a053a2aeeddc26a93a41a04fa08dd688a">uch_set</a>-><a class="code" href="a00642.html#ae786fa834962ef4d6dbf8300c6c5d456">unichar_to_id</a>(<span class="stringliteral">"^"</span>);
772
<a name="l00670"></a>00670 <span class="keywordflow">for</span> (i = 0; i < word_res-><a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#ab3760d96d63b5ca6c4bfeb32ffc8183c">length</a>(); ++i) {
773
<a name="l00671"></a>00671 <span class="keywordflow">if</span> (word_res-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a6b917b2e6157f7956ff53ca230425ed3">unichar_id</a>(i) == unichar_tilde) {
774
<a name="l00672"></a>00672 word_res-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a8238d436806c40b2e01b9659dfab5eb3">set_unichar_id</a>(unichar_dash, i);
775
<a name="l00673"></a>00673 <span class="keywordflow">if</span> (word_res-><a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>[i].accepted ())
776
<a name="l00674"></a>00674 word_res-><a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>[i].setrej_unlv_rej ();
777
<a name="l00675"></a>00675 }
778
<a name="l00676"></a>00676 <span class="keywordflow">if</span> (word_res-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a6b917b2e6157f7956ff53ca230425ed3">unichar_id</a>(i) == unichar_pow) {
779
<a name="l00677"></a>00677 word_res-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a8238d436806c40b2e01b9659dfab5eb3">set_unichar_id</a>(unichar_space, i);
780
<a name="l00678"></a>00678 <span class="keywordflow">if</span> (word_res-><a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>[i].accepted ())
781
<a name="l00679"></a>00679 word_res-><a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>[i].setrej_unlv_rej ();
782
<a name="l00680"></a>00680 }
783
<a name="l00681"></a>00681 }
784
<a name="l00682"></a>00682 }
785
<a name="l00683"></a>00683
786
<a name="l00684"></a><a class="code" href="a00607.html#afad44e1105d7f57e1a99d9076c4a3b25">00684</a> <a class="code" href="a00678.html#ac4a411895d63e3bb0bea788e92894d42">GARBAGE_LEVEL</a> <a class="code" href="a00607.html#afad44e1105d7f57e1a99d9076c4a3b25">Tesseract::garbage_word</a>(<a class="code" href="a00650.html">WERD_RES</a> *word, <a class="code" href="a00831.html#a7712a7e28433d0ade59219a129549b6f">BOOL8</a> ok_dict_word) {
787
<a name="l00685"></a>00685 <span class="keyword">enum</span> STATES
788
<a name="l00686"></a>00686 {
789
<a name="l00687"></a>00687 JUNK,
790
<a name="l00688"></a>00688 FIRST_UPPER,
791
<a name="l00689"></a>00689 FIRST_LOWER,
792
<a name="l00690"></a>00690 FIRST_NUM,
793
<a name="l00691"></a>00691 SUBSEQUENT_UPPER,
794
<a name="l00692"></a>00692 SUBSEQUENT_LOWER,
795
<a name="l00693"></a>00693 SUBSEQUENT_NUM
796
<a name="l00694"></a>00694 };
797
<a name="l00695"></a>00695 <span class="keyword">const</span> <span class="keywordtype">char</span> *str = word-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>();
798
<a name="l00696"></a>00696 <span class="keyword">const</span> <span class="keywordtype">char</span> *lengths = word-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a93ab94b6fa6d5c0469fb52d66c62968c">unichar_lengths</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>();
799
<a name="l00697"></a>00697 STATES state = JUNK;
800
<a name="l00698"></a>00698 <span class="keywordtype">int</span> len = 0;
801
<a name="l00699"></a>00699 <span class="keywordtype">int</span> isolated_digits = 0;
802
<a name="l00700"></a>00700 <span class="keywordtype">int</span> isolated_alphas = 0;
803
<a name="l00701"></a>00701 <span class="keywordtype">int</span> bad_char_count = 0;
804
<a name="l00702"></a>00702 <span class="keywordtype">int</span> tess_rejs = 0;
805
<a name="l00703"></a>00703 <span class="keywordtype">int</span> dodgy_chars = 0;
806
<a name="l00704"></a>00704 <span class="keywordtype">int</span> ok_chars;
807
<a name="l00705"></a>00705 <a class="code" href="a00862.html#a8578b19fa1ff5ca75080db1a18ecc32e">UNICHAR_ID</a> last_char = -1;
808
<a name="l00706"></a>00706 <span class="keywordtype">int</span> alpha_repetition_count = 0;
809
<a name="l00707"></a>00707 <span class="keywordtype">int</span> longest_alpha_repetition_count = 0;
810
<a name="l00708"></a>00708 <span class="keywordtype">int</span> longest_lower_run_len = 0;
811
<a name="l00709"></a>00709 <span class="keywordtype">int</span> lower_string_count = 0;
812
<a name="l00710"></a>00710 <span class="keywordtype">int</span> longest_upper_run_len = 0;
813
<a name="l00711"></a>00711 <span class="keywordtype">int</span> upper_string_count = 0;
814
<a name="l00712"></a>00712 <span class="keywordtype">int</span> total_alpha_count = 0;
815
<a name="l00713"></a>00713 <span class="keywordtype">int</span> total_digit_count = 0;
816
<a name="l00714"></a>00714
817
<a name="l00715"></a>00715 <span class="keywordflow">for</span> (; *str != <span class="charliteral">'\0'</span>; str += *(lengths++)) {
818
<a name="l00716"></a>00716 len++;
819
<a name="l00717"></a>00717 <span class="keywordflow">if</span> (word-><a class="code" href="a00650.html#a053a2aeeddc26a93a41a04fa08dd688a">uch_set</a>-><a class="code" href="a00642.html#a482a94b3e6fcd4e5b9b9363bb9e0ee59">get_isupper</a> (str, *lengths)) {
820
<a name="l00718"></a>00718 total_alpha_count++;
821
<a name="l00719"></a>00719 <span class="keywordflow">switch</span> (state) {
822
<a name="l00720"></a>00720 <span class="keywordflow">case</span> SUBSEQUENT_UPPER:
823
<a name="l00721"></a>00721 <span class="keywordflow">case</span> FIRST_UPPER:
824
<a name="l00722"></a>00722 state = SUBSEQUENT_UPPER;
825
<a name="l00723"></a>00723 upper_string_count++;
826
<a name="l00724"></a>00724 <span class="keywordflow">if</span> (longest_upper_run_len < upper_string_count)
827
<a name="l00725"></a>00725 longest_upper_run_len = upper_string_count;
828
<a name="l00726"></a>00726 <span class="keywordflow">if</span> (last_char == word-><a class="code" href="a00650.html#a053a2aeeddc26a93a41a04fa08dd688a">uch_set</a>-><a class="code" href="a00642.html#ae786fa834962ef4d6dbf8300c6c5d456">unichar_to_id</a>(str, *lengths)) {
829
<a name="l00727"></a>00727 alpha_repetition_count++;
830
<a name="l00728"></a>00728 <span class="keywordflow">if</span> (longest_alpha_repetition_count < alpha_repetition_count) {
831
<a name="l00729"></a>00729 longest_alpha_repetition_count = alpha_repetition_count;
832
<a name="l00730"></a>00730 }
833
<a name="l00731"></a>00731 }
834
<a name="l00732"></a>00732 <span class="keywordflow">else</span> {
835
<a name="l00733"></a>00733 last_char = word-><a class="code" href="a00650.html#a053a2aeeddc26a93a41a04fa08dd688a">uch_set</a>-><a class="code" href="a00642.html#ae786fa834962ef4d6dbf8300c6c5d456">unichar_to_id</a>(str, *lengths);
836
<a name="l00734"></a>00734 alpha_repetition_count = 1;
837
<a name="l00735"></a>00735 }
838
<a name="l00736"></a>00736 <span class="keywordflow">break</span>;
839
<a name="l00737"></a>00737 <span class="keywordflow">case</span> FIRST_NUM:
840
<a name="l00738"></a>00738 isolated_digits++;
841
<a name="l00739"></a>00739 <span class="keywordflow">default</span>:
842
<a name="l00740"></a>00740 state = FIRST_UPPER;
843
<a name="l00741"></a>00741 last_char = word-><a class="code" href="a00650.html#a053a2aeeddc26a93a41a04fa08dd688a">uch_set</a>-><a class="code" href="a00642.html#ae786fa834962ef4d6dbf8300c6c5d456">unichar_to_id</a>(str, *lengths);
844
<a name="l00742"></a>00742 alpha_repetition_count = 1;
845
<a name="l00743"></a>00743 upper_string_count = 1;
846
<a name="l00744"></a>00744 <span class="keywordflow">break</span>;
847
<a name="l00745"></a>00745 }
848
<a name="l00746"></a>00746 }
849
<a name="l00747"></a>00747 <span class="keywordflow">else</span> <span class="keywordflow">if</span> (word-><a class="code" href="a00650.html#a053a2aeeddc26a93a41a04fa08dd688a">uch_set</a>-><a class="code" href="a00642.html#a074096c823dd5b9bc02c02a59784bf34">get_islower</a> (str, *lengths)) {
850
<a name="l00748"></a>00748 total_alpha_count++;
851
<a name="l00749"></a>00749 <span class="keywordflow">switch</span> (state) {
852
<a name="l00750"></a>00750 <span class="keywordflow">case</span> SUBSEQUENT_LOWER:
853
<a name="l00751"></a>00751 <span class="keywordflow">case</span> FIRST_LOWER:
854
<a name="l00752"></a>00752 state = SUBSEQUENT_LOWER;
855
<a name="l00753"></a>00753 lower_string_count++;
856
<a name="l00754"></a>00754 <span class="keywordflow">if</span> (longest_lower_run_len < lower_string_count)
857
<a name="l00755"></a>00755 longest_lower_run_len = lower_string_count;
858
<a name="l00756"></a>00756 <span class="keywordflow">if</span> (last_char == word-><a class="code" href="a00650.html#a053a2aeeddc26a93a41a04fa08dd688a">uch_set</a>-><a class="code" href="a00642.html#ae786fa834962ef4d6dbf8300c6c5d456">unichar_to_id</a>(str, *lengths)) {
859
<a name="l00757"></a>00757 alpha_repetition_count++;
860
<a name="l00758"></a>00758 <span class="keywordflow">if</span> (longest_alpha_repetition_count < alpha_repetition_count) {
861
<a name="l00759"></a>00759 longest_alpha_repetition_count = alpha_repetition_count;
862
<a name="l00760"></a>00760 }
863
<a name="l00761"></a>00761 }
864
<a name="l00762"></a>00762 <span class="keywordflow">else</span> {
865
<a name="l00763"></a>00763 last_char = word-><a class="code" href="a00650.html#a053a2aeeddc26a93a41a04fa08dd688a">uch_set</a>-><a class="code" href="a00642.html#ae786fa834962ef4d6dbf8300c6c5d456">unichar_to_id</a>(str, *lengths);
866
<a name="l00764"></a>00764 alpha_repetition_count = 1;
867
<a name="l00765"></a>00765 }
868
<a name="l00766"></a>00766 <span class="keywordflow">break</span>;
869
<a name="l00767"></a>00767 <span class="keywordflow">case</span> FIRST_NUM:
870
<a name="l00768"></a>00768 isolated_digits++;
871
<a name="l00769"></a>00769 <span class="keywordflow">default</span>:
872
<a name="l00770"></a>00770 state = FIRST_LOWER;
873
<a name="l00771"></a>00771 last_char = word-><a class="code" href="a00650.html#a053a2aeeddc26a93a41a04fa08dd688a">uch_set</a>-><a class="code" href="a00642.html#ae786fa834962ef4d6dbf8300c6c5d456">unichar_to_id</a>(str, *lengths);
874
<a name="l00772"></a>00772 alpha_repetition_count = 1;
875
<a name="l00773"></a>00773 lower_string_count = 1;
876
<a name="l00774"></a>00774 <span class="keywordflow">break</span>;
877
<a name="l00775"></a>00775 }
878
<a name="l00776"></a>00776 }
879
<a name="l00777"></a>00777 <span class="keywordflow">else</span> <span class="keywordflow">if</span> (word-><a class="code" href="a00650.html#a053a2aeeddc26a93a41a04fa08dd688a">uch_set</a>-><a class="code" href="a00642.html#a5951dde3ab90c12c309d776570a10725">get_isdigit</a> (str, *lengths)) {
880
<a name="l00778"></a>00778 total_digit_count++;
881
<a name="l00779"></a>00779 <span class="keywordflow">switch</span> (state) {
882
<a name="l00780"></a>00780 <span class="keywordflow">case</span> FIRST_NUM:
883
<a name="l00781"></a>00781 state = SUBSEQUENT_NUM;
884
<a name="l00782"></a>00782 <span class="keywordflow">case</span> SUBSEQUENT_NUM:
885
<a name="l00783"></a>00783 <span class="keywordflow">break</span>;
886
<a name="l00784"></a>00784 <span class="keywordflow">case</span> FIRST_UPPER:
887
<a name="l00785"></a>00785 <span class="keywordflow">case</span> FIRST_LOWER:
888
<a name="l00786"></a>00786 isolated_alphas++;
889
<a name="l00787"></a>00787 <span class="keywordflow">default</span>:
890
<a name="l00788"></a>00788 state = FIRST_NUM;
891
<a name="l00789"></a>00789 <span class="keywordflow">break</span>;
892
<a name="l00790"></a>00790 }
893
<a name="l00791"></a>00791 }
894
<a name="l00792"></a>00792 <span class="keywordflow">else</span> {
895
<a name="l00793"></a>00793 <span class="keywordflow">if</span> (*lengths == 1 && *str == <span class="charliteral">' '</span>)
896
<a name="l00794"></a>00794 tess_rejs++;
897
<a name="l00795"></a>00795 <span class="keywordflow">else</span>
898
<a name="l00796"></a>00796 bad_char_count++;
899
<a name="l00797"></a>00797 <span class="keywordflow">switch</span> (state) {
900
<a name="l00798"></a>00798 <span class="keywordflow">case</span> FIRST_NUM:
901
<a name="l00799"></a>00799 isolated_digits++;
902
<a name="l00800"></a>00800 <span class="keywordflow">break</span>;
903
<a name="l00801"></a>00801 <span class="keywordflow">case</span> FIRST_UPPER:
904
<a name="l00802"></a>00802 <span class="keywordflow">case</span> FIRST_LOWER:
905
<a name="l00803"></a>00803 isolated_alphas++;
906
<a name="l00804"></a>00804 <span class="keywordflow">default</span>:
907
<a name="l00805"></a>00805 <span class="keywordflow">break</span>;
908
<a name="l00806"></a>00806 }
909
<a name="l00807"></a>00807 state = JUNK;
910
<a name="l00808"></a>00808 }
911
<a name="l00809"></a>00809 }
912
<a name="l00810"></a>00810
913
<a name="l00811"></a>00811 <span class="keywordflow">switch</span> (state) {
914
<a name="l00812"></a>00812 <span class="keywordflow">case</span> FIRST_NUM:
915
<a name="l00813"></a>00813 isolated_digits++;
916
<a name="l00814"></a>00814 <span class="keywordflow">break</span>;
917
<a name="l00815"></a>00815 <span class="keywordflow">case</span> FIRST_UPPER:
918
<a name="l00816"></a>00816 <span class="keywordflow">case</span> FIRST_LOWER:
919
<a name="l00817"></a>00817 isolated_alphas++;
920
<a name="l00818"></a>00818 <span class="keywordflow">default</span>:
921
<a name="l00819"></a>00819 <span class="keywordflow">break</span>;
922
<a name="l00820"></a>00820 }
923
<a name="l00821"></a>00821
924
<a name="l00822"></a>00822 <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a260642901c32784e997473bc298346cc">crunch_include_numerals</a>) {
925
<a name="l00823"></a>00823 total_alpha_count += total_digit_count - isolated_digits;
926
<a name="l00824"></a>00824 }
927
<a name="l00825"></a>00825
928
<a name="l00826"></a>00826 <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a267ae2256478e0caa24cc4eda6f78afa">crunch_leave_ok_strings</a> && len >= 4 &&
929
<a name="l00827"></a>00827 2 * (total_alpha_count - isolated_alphas) > len &&
930
<a name="l00828"></a>00828 longest_alpha_repetition_count < <a class="code" href="a00607.html#a2cba6d61cfdced5bdea7461938b45c72">crunch_long_repetitions</a>) {
931
<a name="l00829"></a>00829 <span class="keywordflow">if</span> ((<a class="code" href="a00607.html#a0fc0f88a88d8cf4837c6b85ab9f1fd00">crunch_accept_ok</a> &&
932
<a name="l00830"></a>00830 <a class="code" href="a00607.html#a51f2ad8b2d849d90bbcfbfca3067c0f1">acceptable_word_string</a>(*word-><a class="code" href="a00650.html#a053a2aeeddc26a93a41a04fa08dd688a">uch_set</a>, str, lengths) !=
933
<a name="l00831"></a>00831 <a class="code" href="a00671.html#af9ff5a7d7a1b9eeea7ce4106bffa7eeeaab0741a5a5ff9757ca343af255353ba0" title="Unacceptable word.">AC_UNACCEPTABLE</a>) ||
934
<a name="l00832"></a>00832 longest_lower_run_len > <a class="code" href="a00607.html#a62ab8881690e98143d62f1ac528041f8">crunch_leave_lc_strings</a> ||
935
<a name="l00833"></a>00833 longest_upper_run_len > <a class="code" href="a00607.html#a557bc470ec0edc0bd11b4fb3432b2266">crunch_leave_uc_strings</a>)
936
<a name="l00834"></a>00834 <span class="keywordflow">return</span> <a class="code" href="a00678.html#ac4a411895d63e3bb0bea788e92894d42ac744add072d392855d098a995acb3751">G_NEVER_CRUNCH</a>;
937
<a name="l00835"></a>00835 }
938
<a name="l00836"></a>00836 <span class="keywordflow">if</span> (word-><a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#ab3760d96d63b5ca6c4bfeb32ffc8183c">length</a>() > 1 &&
939
<a name="l00837"></a>00837 strpbrk(str, <span class="stringliteral">" "</span>) == NULL &&
940
<a name="l00838"></a>00838 (word-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a161ac137df2d063a6421728e6aa08fcb">permuter</a>() == <a class="code" href="a00788.html#a18e2c75cefe9e5b78e8ce41aa5fa25bca616bdbcaed7dd414823b303af5c8b1f1">SYSTEM_DAWG_PERM</a> ||
941
<a name="l00839"></a>00839 word-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a161ac137df2d063a6421728e6aa08fcb">permuter</a>() == <a class="code" href="a00788.html#a18e2c75cefe9e5b78e8ce41aa5fa25bca4fd9d3b025ecc5ea8f2248fea29543e4">FREQ_DAWG_PERM</a> ||
942
<a name="l00840"></a>00840 word-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a161ac137df2d063a6421728e6aa08fcb">permuter</a>() == <a class="code" href="a00788.html#a18e2c75cefe9e5b78e8ce41aa5fa25bca5e293e54fa687876197992131e779d54">USER_DAWG_PERM</a> ||
943
<a name="l00841"></a>00841 word-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a161ac137df2d063a6421728e6aa08fcb">permuter</a>() == <a class="code" href="a00788.html#a18e2c75cefe9e5b78e8ce41aa5fa25bca809574e2067a5304fbd2279d869e9b24">NUMBER_PERM</a> ||
944
<a name="l00842"></a>00842 <a class="code" href="a00607.html#a51f2ad8b2d849d90bbcfbfca3067c0f1">acceptable_word_string</a>(*word-><a class="code" href="a00650.html#a053a2aeeddc26a93a41a04fa08dd688a">uch_set</a>, str, lengths) !=
945
<a name="l00843"></a>00843 <a class="code" href="a00671.html#af9ff5a7d7a1b9eeea7ce4106bffa7eeeaab0741a5a5ff9757ca343af255353ba0" title="Unacceptable word.">AC_UNACCEPTABLE</a> || ok_dict_word))
946
<a name="l00844"></a>00844 <span class="keywordflow">return</span> <a class="code" href="a00678.html#ac4a411895d63e3bb0bea788e92894d42a46d03f2b7d080cc8319a43ef4bad82b3">G_OK</a>;
947
<a name="l00845"></a>00845
948
<a name="l00846"></a>00846 ok_chars = len - bad_char_count - isolated_digits -
949
<a name="l00847"></a>00847 isolated_alphas - tess_rejs;
950
<a name="l00848"></a>00848
951
<a name="l00849"></a>00849 <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a20a1206ce17b07676f829fdee0034eb5">crunch_debug</a> > 3) {
952
<a name="l00850"></a>00850 <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a>(<span class="stringliteral">"garbage_word: \"%s\"\n"</span>,
953
<a name="l00851"></a>00851 word-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>());
954
<a name="l00852"></a>00852 <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a>(<span class="stringliteral">"LEN: %d bad: %d iso_N: %d iso_A: %d rej: %d\n"</span>,
955
<a name="l00853"></a>00853 len,
956
<a name="l00854"></a>00854 bad_char_count, isolated_digits, isolated_alphas, tess_rejs);
957
<a name="l00855"></a>00855 }
958
<a name="l00856"></a>00856 <span class="keywordflow">if</span> (bad_char_count == 0 &&
959
<a name="l00857"></a>00857 tess_rejs == 0 &&
960
<a name="l00858"></a>00858 (len > isolated_digits + isolated_alphas || len <= 2))
961
<a name="l00859"></a>00859 <span class="keywordflow">return</span> <a class="code" href="a00678.html#ac4a411895d63e3bb0bea788e92894d42a46d03f2b7d080cc8319a43ef4bad82b3">G_OK</a>;
962
<a name="l00860"></a>00860
963
<a name="l00861"></a>00861 <span class="keywordflow">if</span> (tess_rejs > ok_chars ||
964
<a name="l00862"></a>00862 (tess_rejs > 0 && (bad_char_count + tess_rejs) * 2 > len))
965
<a name="l00863"></a>00863 <span class="keywordflow">return</span> <a class="code" href="a00678.html#ac4a411895d63e3bb0bea788e92894d42a2400fb7991d9c94896464522e10c49ad">G_TERRIBLE</a>;
966
<a name="l00864"></a>00864
967
<a name="l00865"></a>00865 <span class="keywordflow">if</span> (len > 4) {
968
<a name="l00866"></a>00866 dodgy_chars = 2 * tess_rejs + bad_char_count + isolated_digits +
969
<a name="l00867"></a>00867 isolated_alphas;
970
<a name="l00868"></a>00868 <span class="keywordflow">if</span> (dodgy_chars > 5 || (dodgy_chars / (<span class="keywordtype">float</span>) len) > 0.5)
971
<a name="l00869"></a>00869 <span class="keywordflow">return</span> <a class="code" href="a00678.html#ac4a411895d63e3bb0bea788e92894d42a17acba0fc01478524c8214f0da82815a">G_DODGY</a>;
972
<a name="l00870"></a>00870 <span class="keywordflow">else</span>
973
<a name="l00871"></a>00871 <span class="keywordflow">return</span> <a class="code" href="a00678.html#ac4a411895d63e3bb0bea788e92894d42a46d03f2b7d080cc8319a43ef4bad82b3">G_OK</a>;
974
<a name="l00872"></a>00872 } <span class="keywordflow">else</span> {
975
<a name="l00873"></a>00873 dodgy_chars = 2 * tess_rejs + bad_char_count;
976
<a name="l00874"></a>00874 <span class="keywordflow">if</span> ((len == 4 && dodgy_chars > 2) ||
977
<a name="l00875"></a>00875 (len == 3 && dodgy_chars > 2) || dodgy_chars >= len)
978
<a name="l00876"></a>00876 <span class="keywordflow">return</span> <a class="code" href="a00678.html#ac4a411895d63e3bb0bea788e92894d42a17acba0fc01478524c8214f0da82815a">G_DODGY</a>;
979
<a name="l00877"></a>00877 <span class="keywordflow">else</span>
980
<a name="l00878"></a>00878 <span class="keywordflow">return</span> <a class="code" href="a00678.html#ac4a411895d63e3bb0bea788e92894d42a46d03f2b7d080cc8319a43ef4bad82b3">G_OK</a>;
981
<a name="l00879"></a>00879 }
982
<a name="l00880"></a>00880 }
983
<a name="l00881"></a>00881
984
<a name="l00882"></a>00882
985
<a name="l00883"></a>00883 <span class="comment">/*************************************************************************</span>
986
<a name="l00884"></a>00884 <span class="comment"> * word_deletable()</span>
987
<a name="l00885"></a>00885 <span class="comment"> * DELETE WERDS AT ENDS OF ROWS IF</span>
988
<a name="l00886"></a>00886 <span class="comment"> * Word is crunched &&</span>
989
<a name="l00887"></a>00887 <span class="comment"> * ( string length = 0 OR</span>
990
<a name="l00888"></a>00888 <span class="comment"> * > 50% of chars are "|" (before merging) OR</span>
991
<a name="l00889"></a>00889 <span class="comment"> * certainty < -10 OR</span>
992
<a name="l00890"></a>00890 <span class="comment"> * rating /char > 60 OR</span>
993
<a name="l00891"></a>00891 <span class="comment"> * TOP of word is more than 0.5 xht BELOW baseline OR</span>
994
<a name="l00892"></a>00892 <span class="comment"> * BOTTOM of word is more than 0.5 xht ABOVE xht OR</span>
995
<a name="l00893"></a>00893 <span class="comment"> * length of word < 3xht OR</span>
996
<a name="l00894"></a>00894 <span class="comment"> * height of word < 0.7 xht OR</span>
997
<a name="l00895"></a>00895 <span class="comment"> * height of word > 3.0 xht OR</span>
998
<a name="l00896"></a>00896 <span class="comment"> * >75% of the outline BBs have longest dimension < 0.5xht</span>
999
<a name="l00897"></a>00897 <span class="comment"> *************************************************************************/</span>
1000
<a name="l00898"></a>00898
1001
<a name="l00899"></a><a class="code" href="a00607.html#a42d3b5e26902af46296cfdc24bb4b195">00899</a> <a class="code" href="a00769.html#affc9a96d31e23e645355e534f11c40e9">CRUNCH_MODE</a> <a class="code" href="a00607.html#a42d3b5e26902af46296cfdc24bb4b195">Tesseract::word_deletable</a>(<a class="code" href="a00650.html">WERD_RES</a> *word, <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> &delete_mode) {
1002
<a name="l00900"></a>00900 <span class="keywordtype">int</span> word_len = word-><a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#ab3760d96d63b5ca6c4bfeb32ffc8183c">length</a> ();
1003
<a name="l00901"></a>00901 <span class="keywordtype">float</span> rating_per_ch;
1004
<a name="l00902"></a>00902 <a class="code" href="a00592.html">TBOX</a> box; <span class="comment">//BB of word</span>
1005
<a name="l00903"></a>00903
1006
<a name="l00904"></a>00904 <span class="keywordflow">if</span> (word-><a class="code" href="a00650.html#a82d32a9f59bc1c08da9537303c56615b">unlv_crunch_mode</a> == <a class="code" href="a00769.html#affc9a96d31e23e645355e534f11c40e9a1eac25752f2c55f1809d11d4ef917ff9">CR_NONE</a>) {
1007
<a name="l00905"></a>00905 delete_mode = 0;
1008
<a name="l00906"></a>00906 <span class="keywordflow">return</span> <a class="code" href="a00769.html#affc9a96d31e23e645355e534f11c40e9a1eac25752f2c55f1809d11d4ef917ff9">CR_NONE</a>;
1009
<a name="l00907"></a>00907 }
1010
<a name="l00908"></a>00908
1011
<a name="l00909"></a>00909 <span class="keywordflow">if</span> (word_len == 0) {
1012
<a name="l00910"></a>00910 delete_mode = 1;
1013
<a name="l00911"></a>00911 <span class="keywordflow">return</span> <a class="code" href="a00769.html#affc9a96d31e23e645355e534f11c40e9a1799c83261446306e1d1527f1779d6a4">CR_DELETE</a>;
1014
<a name="l00912"></a>00912 }
1015
<a name="l00913"></a>00913
1016
<a name="l00914"></a>00914 <span class="keywordflow">if</span> (word-><a class="code" href="a00650.html#a211ab5c5a13123ee7bdc7459d3363f9d">rebuild_word</a> != NULL) {
1017
<a name="l00915"></a>00915 <span class="comment">// Cube leaves rebuild_word NULL.</span>
1018
<a name="l00916"></a>00916 box = word-><a class="code" href="a00650.html#a211ab5c5a13123ee7bdc7459d3363f9d">rebuild_word</a>-><a class="code" href="a00635.html#a8278c9db39975f181ad074bf343ea402">bounding_box</a>();
1019
<a name="l00917"></a>00917 <span class="keywordflow">if</span> (box.<a class="code" href="a00592.html#a8379d4bbc72bdbb1f069fc14790e632d">height</a> () < <a class="code" href="a00607.html#a40051cf485581289adcf04cc4733df41">crunch_del_min_ht</a> * <a class="code" href="a00759.html#af974e6755e636c8bc7cbe57e3bcbdd73">kBlnXHeight</a>) {
1020
<a name="l00918"></a>00918 delete_mode = 4;
1021
<a name="l00919"></a>00919 <span class="keywordflow">return</span> <a class="code" href="a00769.html#affc9a96d31e23e645355e534f11c40e9a1799c83261446306e1d1527f1779d6a4">CR_DELETE</a>;
1022
<a name="l00920"></a>00920 }
1023
<a name="l00921"></a>00921
1024
<a name="l00922"></a>00922 <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a63d37ef049d0902573211d4301564a1a">noise_outlines</a>(word-><a class="code" href="a00650.html#a211ab5c5a13123ee7bdc7459d3363f9d">rebuild_word</a>)) {
1025
<a name="l00923"></a>00923 delete_mode = 5;
1026
<a name="l00924"></a>00924 <span class="keywordflow">return</span> <a class="code" href="a00769.html#affc9a96d31e23e645355e534f11c40e9a1799c83261446306e1d1527f1779d6a4">CR_DELETE</a>;
1027
<a name="l00925"></a>00925 }
1028
<a name="l00926"></a>00926 }
1029
<a name="l00927"></a>00927
1030
<a name="l00928"></a>00928 <span class="keywordflow">if</span> ((<a class="code" href="a00607.html#ae180ffab43d41336d8418e30004d1866">failure_count</a> (word) * 1.5) > word_len) {
1031
<a name="l00929"></a>00929 delete_mode = 2;
1032
<a name="l00930"></a>00930 <span class="keywordflow">return</span> <a class="code" href="a00769.html#affc9a96d31e23e645355e534f11c40e9a223aec7f7b2d11193c90d04d8f589387">CR_LOOSE_SPACE</a>;
1033
<a name="l00931"></a>00931 }
1034
<a name="l00932"></a>00932
1035
<a name="l00933"></a>00933 <span class="keywordflow">if</span> (word-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a1b25ef2b44d21a7204483a7ca804f293">certainty</a> () < <a class="code" href="a00607.html#a5c24b5ef1b42bebf68d3ab54acc94ccf">crunch_del_cert</a>) {
1036
<a name="l00934"></a>00934 delete_mode = 7;
1037
<a name="l00935"></a>00935 <span class="keywordflow">return</span> <a class="code" href="a00769.html#affc9a96d31e23e645355e534f11c40e9a223aec7f7b2d11193c90d04d8f589387">CR_LOOSE_SPACE</a>;
1038
<a name="l00936"></a>00936 }
1039
<a name="l00937"></a>00937
1040
<a name="l00938"></a>00938 rating_per_ch = word-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a52ea24dcb5ff15ad8aee3fd774622216">rating</a> () / word_len;
1041
<a name="l00939"></a>00939
1042
<a name="l00940"></a>00940 <span class="keywordflow">if</span> (rating_per_ch > <a class="code" href="a00607.html#a10e3b50d7d9fc570d752d31b05d6971b">crunch_del_rating</a>) {
1043
<a name="l00941"></a>00941 delete_mode = 8;
1044
<a name="l00942"></a>00942 <span class="keywordflow">return</span> <a class="code" href="a00769.html#affc9a96d31e23e645355e534f11c40e9a223aec7f7b2d11193c90d04d8f589387">CR_LOOSE_SPACE</a>;
1045
<a name="l00943"></a>00943 }
1046
<a name="l00944"></a>00944
1047
<a name="l00945"></a>00945 <span class="keywordflow">if</span> (box.<a class="code" href="a00592.html#adf92e9fdac1bdf11c10d1c4d1178791a">top</a> () < <a class="code" href="a00759.html#abe10ce41bf7240ae8a053dea471d6ed5">kBlnBaselineOffset</a> - <a class="code" href="a00607.html#a86f631e39200ed6d92a92ccd6a1de2c9">crunch_del_low_word</a> * <a class="code" href="a00759.html#af974e6755e636c8bc7cbe57e3bcbdd73">kBlnXHeight</a>) {
1048
<a name="l00946"></a>00946 delete_mode = 9;
1049
<a name="l00947"></a>00947 <span class="keywordflow">return</span> <a class="code" href="a00769.html#affc9a96d31e23e645355e534f11c40e9a223aec7f7b2d11193c90d04d8f589387">CR_LOOSE_SPACE</a>;
1050
<a name="l00948"></a>00948 }
1051
<a name="l00949"></a>00949
1052
<a name="l00950"></a>00950 <span class="keywordflow">if</span> (box.<a class="code" href="a00592.html#a4451d237f1cd18c4982d63fe36a11fc3">bottom</a> () >
1053
<a name="l00951"></a>00951 <a class="code" href="a00759.html#abe10ce41bf7240ae8a053dea471d6ed5">kBlnBaselineOffset</a> + <a class="code" href="a00607.html#ad6665d769dab6f71751fd64af3837674">crunch_del_high_word</a> * <a class="code" href="a00759.html#af974e6755e636c8bc7cbe57e3bcbdd73">kBlnXHeight</a>) {
1054
<a name="l00952"></a>00952 delete_mode = 10;
1055
<a name="l00953"></a>00953 <span class="keywordflow">return</span> <a class="code" href="a00769.html#affc9a96d31e23e645355e534f11c40e9a223aec7f7b2d11193c90d04d8f589387">CR_LOOSE_SPACE</a>;
1056
<a name="l00954"></a>00954 }
1057
<a name="l00955"></a>00955
1058
<a name="l00956"></a>00956 <span class="keywordflow">if</span> (box.<a class="code" href="a00592.html#a8379d4bbc72bdbb1f069fc14790e632d">height</a> () > <a class="code" href="a00607.html#ae8bf52f3bdd158ca70e25231e485ce1a">crunch_del_max_ht</a> * <a class="code" href="a00759.html#af974e6755e636c8bc7cbe57e3bcbdd73">kBlnXHeight</a>) {
1059
<a name="l00957"></a>00957 delete_mode = 11;
1060
<a name="l00958"></a>00958 <span class="keywordflow">return</span> <a class="code" href="a00769.html#affc9a96d31e23e645355e534f11c40e9a223aec7f7b2d11193c90d04d8f589387">CR_LOOSE_SPACE</a>;
1061
<a name="l00959"></a>00959 }
1062
<a name="l00960"></a>00960
1063
<a name="l00961"></a>00961 <span class="keywordflow">if</span> (box.<a class="code" href="a00592.html#af95494a2ccacc70cc2b83820b2948619">width</a> () < <a class="code" href="a00607.html#a82fcb38c1bdc5bce93ff69c94ef7e2b9">crunch_del_min_width</a> * <a class="code" href="a00759.html#af974e6755e636c8bc7cbe57e3bcbdd73">kBlnXHeight</a>) {
1064
<a name="l00962"></a>00962 delete_mode = 3;
1065
<a name="l00963"></a>00963 <span class="keywordflow">return</span> <a class="code" href="a00769.html#affc9a96d31e23e645355e534f11c40e9a223aec7f7b2d11193c90d04d8f589387">CR_LOOSE_SPACE</a>;
1066
<a name="l00964"></a>00964 }
1067
<a name="l00965"></a>00965
1068
<a name="l00966"></a>00966 delete_mode = 0;
1069
<a name="l00967"></a>00967 <span class="keywordflow">return</span> <a class="code" href="a00769.html#affc9a96d31e23e645355e534f11c40e9a1eac25752f2c55f1809d11d4ef917ff9">CR_NONE</a>;
1070
<a name="l00968"></a>00968 }
1071
<a name="l00969"></a>00969
1072
<a name="l00970"></a><a class="code" href="a00607.html#ae180ffab43d41336d8418e30004d1866">00970</a> <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> <a class="code" href="a00607.html#ae180ffab43d41336d8418e30004d1866">Tesseract::failure_count</a>(<a class="code" href="a00650.html">WERD_RES</a> *word) {
1073
<a name="l00971"></a>00971 <span class="keyword">const</span> <span class="keywordtype">char</span> *str = word-><a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-><a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>();
1074
<a name="l00972"></a>00972 <span class="keywordtype">int</span> tess_rejs = 0;
1075
<a name="l00973"></a>00973
1076
<a name="l00974"></a>00974 <span class="keywordflow">for</span> (; *str != <span class="charliteral">'\0'</span>; str++) {
1077
<a name="l00975"></a>00975 <span class="keywordflow">if</span> (*str == <span class="charliteral">' '</span>)
1078
<a name="l00976"></a>00976 tess_rejs++;
1079
<a name="l00977"></a>00977 }
1080
<a name="l00978"></a>00978 <span class="keywordflow">return</span> tess_rejs;
1081
<a name="l00979"></a>00979 }
1082
<a name="l00980"></a>00980
1083
<a name="l00981"></a>00981
1084
<a name="l00982"></a><a class="code" href="a00607.html#a63d37ef049d0902573211d4301564a1a">00982</a> <a class="code" href="a00831.html#a7712a7e28433d0ade59219a129549b6f">BOOL8</a> <a class="code" href="a00607.html#a63d37ef049d0902573211d4301564a1a">Tesseract::noise_outlines</a>(<a class="code" href="a00635.html">TWERD</a> *word) {
1085
<a name="l00983"></a>00983 <a class="code" href="a00592.html">TBOX</a> box; <span class="comment">// BB of outline</span>
1086
<a name="l00984"></a>00984 <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> outline_count = 0;
1087
<a name="l00985"></a>00985 <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> small_outline_count = 0;
1088
<a name="l00986"></a>00986 <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> max_dimension;
1089
<a name="l00987"></a>00987 <span class="keywordtype">float</span> small_limit = <a class="code" href="a00759.html#af974e6755e636c8bc7cbe57e3bcbdd73">kBlnXHeight</a> * <a class="code" href="a00607.html#a422f7f28537d7127b0965b6224f2891e">crunch_small_outlines_size</a>;
1090
<a name="l00988"></a>00988
1091
<a name="l00989"></a>00989 <span class="keywordflow">for</span> (<span class="keywordtype">int</span> b = 0; b < word-><a class="code" href="a00635.html#adfdef9868e61650e076775011382ec70">NumBlobs</a>(); ++b) {
1092
<a name="l00990"></a>00990 <a class="code" href="a00591.html">TBLOB</a>* blob = word-><a class="code" href="a00635.html#a52fba22521039a79d8c1c8182f602dcc">blobs</a>[b];
1093
<a name="l00991"></a>00991 <span class="keywordflow">for</span> (<a class="code" href="a00613.html">TESSLINE</a>* ol = blob-><a class="code" href="a00591.html#a3c9282cad4102157d214bcc6311cda07">outlines</a>; ol != NULL; ol = ol-><a class="code" href="a00613.html#a0fb224d43ea53a791f0957fa92793bef">next</a>) {
1094
<a name="l00992"></a>00992 outline_count++;
1095
<a name="l00993"></a>00993 box = ol->bounding_box();
1096
<a name="l00994"></a>00994 <span class="keywordflow">if</span> (box.<a class="code" href="a00592.html#a8379d4bbc72bdbb1f069fc14790e632d">height</a>() > box.<a class="code" href="a00592.html#af95494a2ccacc70cc2b83820b2948619">width</a>())
1097
<a name="l00995"></a>00995 max_dimension = box.<a class="code" href="a00592.html#a8379d4bbc72bdbb1f069fc14790e632d">height</a>();
1098
<a name="l00996"></a>00996 <span class="keywordflow">else</span>
1099
<a name="l00997"></a>00997 max_dimension = box.<a class="code" href="a00592.html#af95494a2ccacc70cc2b83820b2948619">width</a>();
1100
<a name="l00998"></a>00998 <span class="keywordflow">if</span> (max_dimension < small_limit)
1101
<a name="l00999"></a>00999 small_outline_count++;
1102
<a name="l01000"></a>01000 }
1103
<a name="l01001"></a>01001 }
1104
<a name="l01002"></a>01002 <span class="keywordflow">return</span> small_outline_count >= outline_count;
1105
<a name="l01003"></a>01003 }
1106
<a name="l01004"></a>01004
1107
<a name="l01005"></a>01005 } <span class="comment">// namespace tesseract</span>
1108
</pre></div></div><!-- contents -->
1110
<!-- window showing the filter options -->
1111
<div id="MSearchSelectWindow"
1112
onmouseover="return searchBox.OnSearchSelectShow()"
1113
onmouseout="return searchBox.OnSearchSelectHide()"
1114
onkeydown="return searchBox.OnSearchSelectKey(event)">
1115
<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark"> </span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark"> </span>Classes</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark"> </span>Namespaces</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(3)"><span class="SelectionMark"> </span>Files</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(4)"><span class="SelectionMark"> </span>Functions</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(5)"><span class="SelectionMark"> </span>Variables</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(6)"><span class="SelectionMark"> </span>Typedefs</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(7)"><span class="SelectionMark"> </span>Enumerations</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(8)"><span class="SelectionMark"> </span>Enumerator</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(9)"><span class="SelectionMark"> </span>Friends</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(10)"><span class="SelectionMark"> </span>Defines</a></div>
1117
<!-- iframe showing the search results (closed by default) -->
1118
<div id="MSearchResultsWindow">
1119
<iframe src="javascript:void(0)" frameborder="0"
1120
name="MSearchResults" id="MSearchResults">
1124
<div id="nav-path" class="navpath">
1126
<li class="navelem"><a class="el" href="a00677.html">docqual.cpp</a> </li>
1128
<li class="footer">Generated on Mon Feb 3 2014 10:59:07 for tesseract by
1129
<a href="http://www.doxygen.org/index.html">
1130
<img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.7.6.1 </li>