~ubuntu-branches/ubuntu/vivid/tesseract/vivid

« back to all changes in this revision

Viewing changes to doc/html/a00677_source.html

  • Committer: Package Import Robot
  • Author(s): Jeff Breidenbach
  • Date: 2014-02-03 11:10:20 UTC
  • mfrom: (1.3.1) (19.1.1 experimental)
  • Revision ID: package-import@ubuntu.com-20140203111020-igquodd7pjlp3uri
Tags: 3.03.01-1
* New upstream release, includes critical fix to PDF rendering
* Complete leptonlib transition (see bug #735509)
* Promote from experimental to unstable

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 
2
<html xmlns="http://www.w3.org/1999/xhtml">
 
3
<head>
 
4
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
 
5
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
 
6
<title>tesseract: /usr/local/google/home/jbreiden/tesseract-ocr-read-only/ccmain/docqual.cpp Source File</title>
 
7
 
 
8
<link href="tabs.css" rel="stylesheet" type="text/css"/>
 
9
<link href="doxygen.css" rel="stylesheet" type="text/css" />
 
10
<link href="navtree.css" rel="stylesheet" type="text/css"/>
 
11
<script type="text/javascript" src="jquery.js"></script>
 
12
<script type="text/javascript" src="resize.js"></script>
 
13
<script type="text/javascript" src="navtree.js"></script>
 
14
<script type="text/javascript">
 
15
  $(document).ready(initResizable);
 
16
</script>
 
17
<link href="search/search.css" rel="stylesheet" type="text/css"/>
 
18
<script type="text/javascript" src="search/search.js"></script>
 
19
<script type="text/javascript">
 
20
  $(document).ready(function() { searchBox.OnSelectItem(0); });
 
21
</script>
 
22
 
 
23
</head>
 
24
<body>
 
25
<div id="top"><!-- do not remove this div! -->
 
26
 
 
27
 
 
28
<div id="titlearea">
 
29
<table cellspacing="0" cellpadding="0">
 
30
 <tbody>
 
31
 <tr style="height: 56px;">
 
32
  
 
33
  
 
34
  <td style="padding-left: 0.5em;">
 
35
   <div id="projectname">tesseract
 
36
   &#160;<span id="projectnumber">3.03</span>
 
37
   </div>
 
38
   
 
39
  </td>
 
40
  
 
41
  
 
42
  
 
43
 </tr>
 
44
 </tbody>
 
45
</table>
 
46
</div>
 
47
 
 
48
<!-- Generated by Doxygen 1.7.6.1 -->
 
49
<script type="text/javascript">
 
50
var searchBox = new SearchBox("searchBox", "search",false,'Search');
 
51
</script>
 
52
  <div id="navrow1" class="tabs">
 
53
    <ul class="tablist">
 
54
      <li><a href="index.html"><span>Main&#160;Page</span></a></li>
 
55
      <li><a href="pages.html"><span>Related&#160;Pages</span></a></li>
 
56
      <li><a href="modules.html"><span>Modules</span></a></li>
 
57
      <li><a href="namespaces.html"><span>Namespaces</span></a></li>
 
58
      <li><a href="annotated.html"><span>Classes</span></a></li>
 
59
      <li class="current"><a href="files.html"><span>Files</span></a></li>
 
60
      <li>
 
61
        <div id="MSearchBox" class="MSearchBoxInactive">
 
62
        <span class="left">
 
63
          <img id="MSearchSelect" src="search/mag_sel.png"
 
64
               onmouseover="return searchBox.OnSearchSelectShow()"
 
65
               onmouseout="return searchBox.OnSearchSelectHide()"
 
66
               alt=""/>
 
67
          <input type="text" id="MSearchField" value="Search" accesskey="S"
 
68
               onfocus="searchBox.OnSearchFieldFocus(true)" 
 
69
               onblur="searchBox.OnSearchFieldFocus(false)" 
 
70
               onkeyup="searchBox.OnSearchFieldChange(event)"/>
 
71
          </span><span class="right">
 
72
            <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
 
73
          </span>
 
74
        </div>
 
75
      </li>
 
76
    </ul>
 
77
  </div>
 
78
  <div id="navrow2" class="tabs2">
 
79
    <ul class="tablist">
 
80
      <li><a href="files.html"><span>File&#160;List</span></a></li>
 
81
      <li><a href="globals.html"><span>File&#160;Members</span></a></li>
 
82
    </ul>
 
83
  </div>
 
84
</div>
 
85
<div id="side-nav" class="ui-resizable side-nav-resizable">
 
86
  <div id="nav-tree">
 
87
    <div id="nav-tree-contents">
 
88
    </div>
 
89
  </div>
 
90
  <div id="splitbar" style="-moz-user-select:none;" 
 
91
       class="ui-resizable-handle">
 
92
  </div>
 
93
</div>
 
94
<script type="text/javascript">
 
95
  initNavTree('a00677.html','');
 
96
</script>
 
97
<div id="doc-content">
 
98
<div class="header">
 
99
  <div class="headertitle">
 
100
<div class="title">/usr/local/google/home/jbreiden/tesseract-ocr-read-only/ccmain/docqual.cpp</div>  </div>
 
101
</div><!--header-->
 
102
<div class="contents">
 
103
<a href="a00677.html">Go to the documentation of this file.</a><div class="fragment"><pre class="fragment"><a name="l00001"></a>00001 <span class="comment">/******************************************************************</span>
 
104
<a name="l00002"></a>00002 <span class="comment"> * File:        docqual.cpp  (Formerly docqual.c)</span>
 
105
<a name="l00003"></a>00003 <span class="comment"> * Description: Document Quality Metrics</span>
 
106
<a name="l00004"></a>00004 <span class="comment"> * Author:              Phil Cheatle</span>
 
107
<a name="l00005"></a>00005 <span class="comment"> * Created:             Mon May  9 11:27:28 BST 1994</span>
 
108
<a name="l00006"></a>00006 <span class="comment"> *</span>
 
109
<a name="l00007"></a>00007 <span class="comment"> * (C) Copyright 1994, Hewlett-Packard Ltd.</span>
 
110
<a name="l00008"></a>00008 <span class="comment"> ** Licensed under the Apache License, Version 2.0 (the &quot;License&quot;);</span>
 
111
<a name="l00009"></a>00009 <span class="comment"> ** you may not use this file except in compliance with the License.</span>
 
112
<a name="l00010"></a>00010 <span class="comment"> ** You may obtain a copy of the License at</span>
 
113
<a name="l00011"></a>00011 <span class="comment"> ** http://www.apache.org/licenses/LICENSE-2.0</span>
 
114
<a name="l00012"></a>00012 <span class="comment"> ** Unless required by applicable law or agreed to in writing, software</span>
 
115
<a name="l00013"></a>00013 <span class="comment"> ** distributed under the License is distributed on an &quot;AS IS&quot; BASIS,</span>
 
116
<a name="l00014"></a>00014 <span class="comment"> ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
 
117
<a name="l00015"></a>00015 <span class="comment"> ** See the License for the specific language governing permissions and</span>
 
118
<a name="l00016"></a>00016 <span class="comment"> ** limitations under the License.</span>
 
119
<a name="l00017"></a>00017 <span class="comment"> *</span>
 
120
<a name="l00018"></a>00018 <span class="comment"> **********************************************************************/</span>
 
121
<a name="l00019"></a>00019 
 
122
<a name="l00020"></a>00020 <span class="preprocessor">#ifdef _MSC_VER</span>
 
123
<a name="l00021"></a>00021 <span class="preprocessor"></span><span class="preprocessor">#pragma warning(disable:4244)  // Conversion warnings</span>
 
124
<a name="l00022"></a>00022 <span class="preprocessor"></span><span class="preprocessor">#endif</span>
 
125
<a name="l00023"></a>00023 <span class="preprocessor"></span>
 
126
<a name="l00024"></a>00024 <span class="preprocessor">#include          &lt;ctype.h&gt;</span>
 
127
<a name="l00025"></a>00025 <span class="preprocessor">#include          &quot;<a class="code" href="a00678.html">docqual.h</a>&quot;</span>
 
128
<a name="l00026"></a>00026 <span class="preprocessor">#include          &quot;<a class="code" href="a00705.html">reject.h</a>&quot;</span>
 
129
<a name="l00027"></a>00027 <span class="preprocessor">#include          &quot;<a class="code" href="a00856.html">tesscallback.h</a>&quot;</span>
 
130
<a name="l00028"></a>00028 <span class="preprocessor">#include          &quot;<a class="code" href="a00718.html">tessvars.h</a>&quot;</span>
 
131
<a name="l00029"></a>00029 <span class="preprocessor">#include          &quot;<a class="code" href="a00849.html">secname.h</a>&quot;</span>
 
132
<a name="l00030"></a>00030 <span class="preprocessor">#include          &quot;<a class="code" href="a01039.html">globals.h</a>&quot;</span>
 
133
<a name="l00031"></a>00031 <span class="preprocessor">#include          &quot;<a class="code" href="a00716.html">tesseractclass.h</a>&quot;</span>
 
134
<a name="l00032"></a>00032 
 
135
<a name="l00033"></a>00033 <span class="keyword">namespace </span><a class="code" href="a01266.html#afed58feacb84df2de88bdd613cfdba6d">tesseract</a>{
 
136
<a name="l00034"></a>00034 
 
137
<a name="l00035"></a>00035 <span class="comment">// A little class to provide the callbacks as we have no pre-bound args.</span>
 
138
<a name="l00036"></a><a class="code" href="a00356.html">00036</a> <span class="keyword">struct </span><a class="code" href="a00356.html">DocQualCallbacks</a> {
 
139
<a name="l00037"></a><a class="code" href="a00356.html#afcbe06fefeeaee8eb7f00b8e90a2d769">00037</a>   <span class="keyword">explicit</span> <a class="code" href="a00356.html#afcbe06fefeeaee8eb7f00b8e90a2d769">DocQualCallbacks</a>(<a class="code" href="a00650.html">WERD_RES</a>* word0)
 
140
<a name="l00038"></a>00038     : <a class="code" href="a00356.html#a06e04c42b1ff2e5ffa24923e9f34e84e">word</a>(word0), <a class="code" href="a00356.html#a0bc1e44f1672adeace51f83874c0d0e2">match_count</a>(0), <a class="code" href="a00356.html#af534b546b519077d8dba7b375e6f8f59">accepted_match_count</a>(0) {}
 
141
<a name="l00039"></a>00039 
 
142
<a name="l00040"></a><a class="code" href="a00356.html#a5f143209a4192b1872e2e2430d78e29a">00040</a>   <span class="keywordtype">void</span> <a class="code" href="a00356.html#a5f143209a4192b1872e2e2430d78e29a">CountMatchingBlobs</a>(<span class="keywordtype">int</span> index) {
 
143
<a name="l00041"></a>00041     ++<a class="code" href="a00356.html#a0bc1e44f1672adeace51f83874c0d0e2">match_count</a>;
 
144
<a name="l00042"></a>00042   }
 
145
<a name="l00043"></a>00043 
 
146
<a name="l00044"></a><a class="code" href="a00356.html#a7fb05da0218e4e94705cab1d751c7762">00044</a>   <span class="keywordtype">void</span> <a class="code" href="a00356.html#a7fb05da0218e4e94705cab1d751c7762">CountAcceptedBlobs</a>(<span class="keywordtype">int</span> index) {
 
147
<a name="l00045"></a>00045     <span class="keywordflow">if</span> (<a class="code" href="a00356.html#a06e04c42b1ff2e5ffa24923e9f34e84e">word</a>-&gt;<a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>[index].accepted())
 
148
<a name="l00046"></a>00046       ++<a class="code" href="a00356.html#af534b546b519077d8dba7b375e6f8f59">accepted_match_count</a>;
 
149
<a name="l00047"></a>00047     ++<a class="code" href="a00356.html#a0bc1e44f1672adeace51f83874c0d0e2">match_count</a>;
 
150
<a name="l00048"></a>00048   }
 
151
<a name="l00049"></a>00049 
 
152
<a name="l00050"></a><a class="code" href="a00356.html#a4b494c625c5aa534c6fa265fb7fe828f">00050</a>   <span class="keywordtype">void</span> <a class="code" href="a00356.html#a4b494c625c5aa534c6fa265fb7fe828f">AcceptIfGoodQuality</a>(<span class="keywordtype">int</span> index) {
 
153
<a name="l00051"></a>00051     <span class="keywordflow">if</span> (<a class="code" href="a00356.html#a06e04c42b1ff2e5ffa24923e9f34e84e">word</a>-&gt;<a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>[index].accept_if_good_quality())
 
154
<a name="l00052"></a>00052       <a class="code" href="a00356.html#a06e04c42b1ff2e5ffa24923e9f34e84e">word</a>-&gt;<a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>[index].setrej_quality_accept();
 
155
<a name="l00053"></a>00053   }
 
156
<a name="l00054"></a>00054 
 
157
<a name="l00055"></a><a class="code" href="a00356.html#a06e04c42b1ff2e5ffa24923e9f34e84e">00055</a>   <a class="code" href="a00650.html">WERD_RES</a>* <a class="code" href="a00356.html#a06e04c42b1ff2e5ffa24923e9f34e84e">word</a>;
 
158
<a name="l00056"></a><a class="code" href="a00356.html#a0bc1e44f1672adeace51f83874c0d0e2">00056</a>   <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> <a class="code" href="a00356.html#a0bc1e44f1672adeace51f83874c0d0e2">match_count</a>;
 
159
<a name="l00057"></a><a class="code" href="a00356.html#af534b546b519077d8dba7b375e6f8f59">00057</a>   <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> <a class="code" href="a00356.html#af534b546b519077d8dba7b375e6f8f59">accepted_match_count</a>;
 
160
<a name="l00058"></a>00058 };
 
161
<a name="l00059"></a>00059 
 
162
<a name="l00060"></a>00060 <span class="comment">/*************************************************************************</span>
 
163
<a name="l00061"></a>00061 <span class="comment"> * word_blob_quality()</span>
 
164
<a name="l00062"></a>00062 <span class="comment"> * How many blobs in the box_word are identical to those of the inword?</span>
 
165
<a name="l00063"></a>00063 <span class="comment"> * ASSUME blobs in both initial word and box_word are in ascending order of</span>
 
166
<a name="l00064"></a>00064 <span class="comment"> * left hand blob edge.</span>
 
167
<a name="l00065"></a>00065 <span class="comment"> *************************************************************************/</span>
 
168
<a name="l00066"></a><a class="code" href="a00607.html#a70d6e0fcde6ff6f76443e8a206f08783">00066</a> <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> <a class="code" href="a00607.html#a70d6e0fcde6ff6f76443e8a206f08783">Tesseract::word_blob_quality</a>(<a class="code" href="a00650.html">WERD_RES</a> *word, <a class="code" href="a00521.html">ROW</a> *row) {
 
169
<a name="l00067"></a>00067   <span class="keywordflow">if</span> (word-&gt;<a class="code" href="a00650.html#af171e37da16ed628bf4516e9f934dc08">bln_boxes</a> == NULL ||
 
170
<a name="l00068"></a>00068       word-&gt;<a class="code" href="a00650.html#a211ab5c5a13123ee7bdc7459d3363f9d">rebuild_word</a> == NULL || word-&gt;<a class="code" href="a00650.html#a211ab5c5a13123ee7bdc7459d3363f9d">rebuild_word</a>-&gt;<a class="code" href="a00635.html#a52fba22521039a79d8c1c8182f602dcc">blobs</a>.<a class="code" href="a00403.html#afb0d8c52a603b6aa965d63bbf06a27dd">empty</a>())
 
171
<a name="l00069"></a>00069     <span class="keywordflow">return</span> 0;
 
172
<a name="l00070"></a>00070 
 
173
<a name="l00071"></a>00071   <a class="code" href="a00356.html">DocQualCallbacks</a> cb(word);
 
174
<a name="l00072"></a>00072   word-&gt;<a class="code" href="a00650.html#af171e37da16ed628bf4516e9f934dc08">bln_boxes</a>-&gt;<a class="code" href="a00287.html#aabd2c8e3b6f8dda66725754fa034cc8e">ProcessMatchedBlobs</a>(
 
175
<a name="l00073"></a>00073       *word-&gt;<a class="code" href="a00650.html#a211ab5c5a13123ee7bdc7459d3363f9d">rebuild_word</a>,
 
176
<a name="l00074"></a>00074       <a class="code" href="a00856.html#ad000c6729bae0f97075ca10a0b5bf7d6">NewPermanentTessCallback</a>(&amp;cb, &amp;<a class="code" href="a00356.html#a5f143209a4192b1872e2e2430d78e29a">DocQualCallbacks::CountMatchingBlobs</a>));
 
177
<a name="l00075"></a>00075   <span class="keywordflow">return</span> cb.<a class="code" href="a00356.html#a0bc1e44f1672adeace51f83874c0d0e2">match_count</a>;
 
178
<a name="l00076"></a>00076 }
 
179
<a name="l00077"></a>00077 
 
180
<a name="l00078"></a><a class="code" href="a00607.html#ace8e2b02f0270dbfe4b7dc05638bfc0d">00078</a> <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> <a class="code" href="a00607.html#ace8e2b02f0270dbfe4b7dc05638bfc0d">Tesseract::word_outline_errs</a>(<a class="code" href="a00650.html">WERD_RES</a> *word) {
 
181
<a name="l00079"></a>00079   <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> i = 0;
 
182
<a name="l00080"></a>00080   <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> err_count = 0;
 
183
<a name="l00081"></a>00081 
 
184
<a name="l00082"></a>00082   <span class="keywordflow">if</span> (word-&gt;<a class="code" href="a00650.html#a211ab5c5a13123ee7bdc7459d3363f9d">rebuild_word</a> != NULL) {
 
185
<a name="l00083"></a>00083     <span class="keywordflow">for</span> (<span class="keywordtype">int</span> b = 0; b &lt; word-&gt;<a class="code" href="a00650.html#a211ab5c5a13123ee7bdc7459d3363f9d">rebuild_word</a>-&gt;<a class="code" href="a00635.html#adfdef9868e61650e076775011382ec70">NumBlobs</a>(); ++b) {
 
186
<a name="l00084"></a>00084       <a class="code" href="a00591.html">TBLOB</a>* blob = word-&gt;<a class="code" href="a00650.html#a211ab5c5a13123ee7bdc7459d3363f9d">rebuild_word</a>-&gt;<a class="code" href="a00635.html#a52fba22521039a79d8c1c8182f602dcc">blobs</a>[b];
 
187
<a name="l00085"></a>00085       err_count += <a class="code" href="a00607.html#a27dc89a077e5074d22c3c0f9cccb5047">count_outline_errs</a>(word-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>()[i],
 
188
<a name="l00086"></a>00086                                       blob-&gt;<a class="code" href="a00591.html#a7cc7be71e17444d728e9c68204bca007">NumOutlines</a>());
 
189
<a name="l00087"></a>00087       i++;
 
190
<a name="l00088"></a>00088     }
 
191
<a name="l00089"></a>00089   }
 
192
<a name="l00090"></a>00090   <span class="keywordflow">return</span> err_count;
 
193
<a name="l00091"></a>00091 }
 
194
<a name="l00092"></a>00092 
 
195
<a name="l00093"></a>00093 <span class="comment">/*************************************************************************</span>
 
196
<a name="l00094"></a>00094 <span class="comment"> * word_char_quality()</span>
 
197
<a name="l00095"></a>00095 <span class="comment"> * Combination of blob quality and outline quality - how many good chars are</span>
 
198
<a name="l00096"></a>00096 <span class="comment"> * there? - I.e chars which pass the blob AND outline tests.</span>
 
199
<a name="l00097"></a>00097 <span class="comment"> *************************************************************************/</span>
 
200
<a name="l00098"></a><a class="code" href="a00607.html#a348314a50f786908e27bbaa3733d14d4">00098</a> <span class="keywordtype">void</span> <a class="code" href="a00607.html#a348314a50f786908e27bbaa3733d14d4">Tesseract::word_char_quality</a>(<a class="code" href="a00650.html">WERD_RES</a> *word,
 
201
<a name="l00099"></a>00099                                   <a class="code" href="a00521.html">ROW</a> *row,
 
202
<a name="l00100"></a>00100                                   <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> *match_count,
 
203
<a name="l00101"></a>00101                                   <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> *accepted_match_count) {
 
204
<a name="l00102"></a>00102   <span class="keywordflow">if</span> (word-&gt;<a class="code" href="a00650.html#af171e37da16ed628bf4516e9f934dc08">bln_boxes</a> == NULL ||
 
205
<a name="l00103"></a>00103       word-&gt;<a class="code" href="a00650.html#a211ab5c5a13123ee7bdc7459d3363f9d">rebuild_word</a> == NULL || word-&gt;<a class="code" href="a00650.html#a211ab5c5a13123ee7bdc7459d3363f9d">rebuild_word</a>-&gt;<a class="code" href="a00635.html#a52fba22521039a79d8c1c8182f602dcc">blobs</a>.<a class="code" href="a00403.html#afb0d8c52a603b6aa965d63bbf06a27dd">empty</a>())
 
206
<a name="l00104"></a>00104     <span class="keywordflow">return</span>;
 
207
<a name="l00105"></a>00105 
 
208
<a name="l00106"></a>00106   <a class="code" href="a00356.html">DocQualCallbacks</a> cb(word);
 
209
<a name="l00107"></a>00107   word-&gt;<a class="code" href="a00650.html#af171e37da16ed628bf4516e9f934dc08">bln_boxes</a>-&gt;<a class="code" href="a00287.html#aabd2c8e3b6f8dda66725754fa034cc8e">ProcessMatchedBlobs</a>(
 
210
<a name="l00108"></a>00108       *word-&gt;<a class="code" href="a00650.html#a211ab5c5a13123ee7bdc7459d3363f9d">rebuild_word</a>,
 
211
<a name="l00109"></a>00109       <a class="code" href="a00856.html#ad000c6729bae0f97075ca10a0b5bf7d6">NewPermanentTessCallback</a>(&amp;cb, &amp;<a class="code" href="a00356.html#a7fb05da0218e4e94705cab1d751c7762">DocQualCallbacks::CountAcceptedBlobs</a>));
 
212
<a name="l00110"></a>00110   *match_count = cb.<a class="code" href="a00356.html#a0bc1e44f1672adeace51f83874c0d0e2">match_count</a>;
 
213
<a name="l00111"></a>00111   *accepted_match_count = cb.<a class="code" href="a00356.html#af534b546b519077d8dba7b375e6f8f59">accepted_match_count</a>;
 
214
<a name="l00112"></a>00112 }
 
215
<a name="l00113"></a>00113 
 
216
<a name="l00114"></a>00114 <span class="comment">/*************************************************************************</span>
 
217
<a name="l00115"></a>00115 <span class="comment"> * unrej_good_chs()</span>
 
218
<a name="l00116"></a>00116 <span class="comment"> * Unreject POTENTIAL rejects if the blob passes the blob and outline checks</span>
 
219
<a name="l00117"></a>00117 <span class="comment"> *************************************************************************/</span>
 
220
<a name="l00118"></a><a class="code" href="a00607.html#ab156ed65c76c10dadacfc1121624ebc2">00118</a> <span class="keywordtype">void</span> <a class="code" href="a00607.html#ab156ed65c76c10dadacfc1121624ebc2">Tesseract::unrej_good_chs</a>(<a class="code" href="a00650.html">WERD_RES</a> *word, <a class="code" href="a00521.html">ROW</a> *row) {
 
221
<a name="l00119"></a>00119   <span class="keywordflow">if</span> (word-&gt;<a class="code" href="a00650.html#af171e37da16ed628bf4516e9f934dc08">bln_boxes</a> == NULL ||
 
222
<a name="l00120"></a>00120       word-&gt;<a class="code" href="a00650.html#a211ab5c5a13123ee7bdc7459d3363f9d">rebuild_word</a> == NULL || word-&gt;<a class="code" href="a00650.html#a211ab5c5a13123ee7bdc7459d3363f9d">rebuild_word</a>-&gt;<a class="code" href="a00635.html#a52fba22521039a79d8c1c8182f602dcc">blobs</a>.<a class="code" href="a00403.html#afb0d8c52a603b6aa965d63bbf06a27dd">empty</a>())
 
223
<a name="l00121"></a>00121     <span class="keywordflow">return</span>;
 
224
<a name="l00122"></a>00122 
 
225
<a name="l00123"></a>00123   <a class="code" href="a00356.html">DocQualCallbacks</a> cb(word);
 
226
<a name="l00124"></a>00124   word-&gt;<a class="code" href="a00650.html#af171e37da16ed628bf4516e9f934dc08">bln_boxes</a>-&gt;<a class="code" href="a00287.html#aabd2c8e3b6f8dda66725754fa034cc8e">ProcessMatchedBlobs</a>(
 
227
<a name="l00125"></a>00125       *word-&gt;<a class="code" href="a00650.html#a211ab5c5a13123ee7bdc7459d3363f9d">rebuild_word</a>,
 
228
<a name="l00126"></a>00126       <a class="code" href="a00856.html#ad000c6729bae0f97075ca10a0b5bf7d6">NewPermanentTessCallback</a>(&amp;cb, &amp;<a class="code" href="a00356.html#a4b494c625c5aa534c6fa265fb7fe828f">DocQualCallbacks::AcceptIfGoodQuality</a>));
 
229
<a name="l00127"></a>00127 }
 
230
<a name="l00128"></a>00128 
 
231
<a name="l00129"></a><a class="code" href="a00607.html#a27dc89a077e5074d22c3c0f9cccb5047">00129</a> <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> <a class="code" href="a00607.html#a27dc89a077e5074d22c3c0f9cccb5047">Tesseract::count_outline_errs</a>(<span class="keywordtype">char</span> c, <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> outline_count) {
 
232
<a name="l00130"></a>00130   <span class="keywordtype">int</span> expected_outline_count;
 
233
<a name="l00131"></a>00131 
 
234
<a name="l00132"></a>00132   <span class="keywordflow">if</span> (<a class="code" href="a00557.html">STRING</a> (<a class="code" href="a00607.html#a6c17b9a72b83394d218773f9a297b77a">outlines_odd</a>).contains (c))
 
235
<a name="l00133"></a>00133     <span class="keywordflow">return</span> 0;                    <span class="comment">//Dont use this char</span>
 
236
<a name="l00134"></a>00134   <span class="keywordflow">else</span> <span class="keywordflow">if</span> (<a class="code" href="a00557.html">STRING</a> (<a class="code" href="a00607.html#acb4b01dca8f419468e5ccf97c451e7cf">outlines_2</a>).contains (c))
 
237
<a name="l00135"></a>00135     expected_outline_count = 2;
 
238
<a name="l00136"></a>00136   <span class="keywordflow">else</span>
 
239
<a name="l00137"></a>00137     expected_outline_count = 1;
 
240
<a name="l00138"></a>00138   <span class="keywordflow">return</span> abs (outline_count - expected_outline_count);
 
241
<a name="l00139"></a>00139 }
 
242
<a name="l00140"></a>00140 
 
243
<a name="l00141"></a><a class="code" href="a00607.html#a04a98a9da10e23072f55bc39ec88a12c">00141</a> <span class="keywordtype">void</span> <a class="code" href="a00607.html#a04a98a9da10e23072f55bc39ec88a12c">Tesseract::quality_based_rejection</a>(<a class="code" href="a00482.html">PAGE_RES_IT</a> &amp;page_res_it,
 
244
<a name="l00142"></a>00142                                         <a class="code" href="a00831.html#a7712a7e28433d0ade59219a129549b6f">BOOL8</a> good_quality_doc) {
 
245
<a name="l00143"></a>00143   <span class="keywordflow">if</span> ((<a class="code" href="a00607.html#ab72bc025f4efb212ec99ee21091d7f10">tessedit_good_quality_unrej</a> &amp;&amp; good_quality_doc))
 
246
<a name="l00144"></a>00144     <a class="code" href="a00607.html#af247aff07d2cc55480e36f278c5a76a8">unrej_good_quality_words</a>(page_res_it);
 
247
<a name="l00145"></a>00145   <a class="code" href="a00607.html#a72b538b58cc5af58b35a390c50086ab5">doc_and_block_rejection</a>(page_res_it, good_quality_doc);
 
248
<a name="l00146"></a>00146   <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a8a848e8a83e73f182d0ef7324afc741f">unlv_tilde_crunching</a>) {
 
249
<a name="l00147"></a>00147     <a class="code" href="a00607.html#a2c77d5d94321d7fc717b5376bcb02b79">tilde_crunch</a>(page_res_it);
 
250
<a name="l00148"></a>00148     <a class="code" href="a00607.html#a06a8ab9064d4c6da290c82e7adf53a8c">tilde_delete</a>(page_res_it);
 
251
<a name="l00149"></a>00149   }
 
252
<a name="l00150"></a>00150 }
 
253
<a name="l00151"></a>00151 
 
254
<a name="l00152"></a>00152 
 
255
<a name="l00153"></a>00153 <span class="comment">/*************************************************************************</span>
 
256
<a name="l00154"></a>00154 <span class="comment"> * unrej_good_quality_words()</span>
 
257
<a name="l00155"></a>00155 <span class="comment"> * Accept potential rejects in words which pass the following checks:</span>
 
258
<a name="l00156"></a>00156 <span class="comment"> *    - Contains a potential reject</span>
 
259
<a name="l00157"></a>00157 <span class="comment"> *    - Word looks like a sensible alpha word.</span>
 
260
<a name="l00158"></a>00158 <span class="comment"> *    - Word segmentation is the same as the original image</span>
 
261
<a name="l00159"></a>00159 <span class="comment"> *              - All characters have the expected number of outlines</span>
 
262
<a name="l00160"></a>00160 <span class="comment"> * NOTE - the rejection counts are recalculated after unrejection</span>
 
263
<a name="l00161"></a>00161 <span class="comment"> *      - CANT do it in a single pass without a bit of fiddling</span>
 
264
<a name="l00162"></a>00162 <span class="comment"> *              - keep it simple but inefficient</span>
 
265
<a name="l00163"></a>00163 <span class="comment"> *************************************************************************/</span>
 
266
<a name="l00164"></a><a class="code" href="a00607.html#af247aff07d2cc55480e36f278c5a76a8">00164</a> <span class="keywordtype">void</span> <a class="code" href="a00607.html#af247aff07d2cc55480e36f278c5a76a8">Tesseract::unrej_good_quality_words</a>(  <span class="comment">//unreject potential</span>
 
267
<a name="l00165"></a>00165                                          <a class="code" href="a00482.html">PAGE_RES_IT</a> &amp;page_res_it) {
 
268
<a name="l00166"></a>00166   <a class="code" href="a00650.html">WERD_RES</a> *word;
 
269
<a name="l00167"></a>00167   <a class="code" href="a00522.html">ROW_RES</a> *current_row;
 
270
<a name="l00168"></a>00168   <a class="code" href="a00283.html">BLOCK_RES</a> *current_block;
 
271
<a name="l00169"></a>00169   <span class="keywordtype">int</span> i;
 
272
<a name="l00170"></a>00170 
 
273
<a name="l00171"></a>00171   page_res_it.<a class="code" href="a00482.html#a15febd45d0904bb3c5f7d506368177f0">restart_page</a> ();
 
274
<a name="l00172"></a>00172   <span class="keywordflow">while</span> (page_res_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a> () != NULL) {
 
275
<a name="l00173"></a>00173     <a class="code" href="a00607.html#a0751722a5b26971d680f2d23c6e65b95">check_debug_pt</a> (page_res_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a> (), 100);
 
276
<a name="l00174"></a>00174     <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a40dee9aa1298b5edcd042b9ec45aeb59">bland_unrej</a>) {
 
277
<a name="l00175"></a>00175       word = page_res_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a> ();
 
278
<a name="l00176"></a>00176       <span class="keywordflow">for</span> (i = 0; i &lt; word-&gt;<a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#ab3760d96d63b5ca6c4bfeb32ffc8183c">length</a> (); i++) {
 
279
<a name="l00177"></a>00177         <span class="keywordflow">if</span> (word-&gt;<a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>[i].accept_if_good_quality ())
 
280
<a name="l00178"></a>00178           word-&gt;<a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>[i].setrej_quality_accept ();
 
281
<a name="l00179"></a>00179       }
 
282
<a name="l00180"></a>00180       page_res_it.<a class="code" href="a00482.html#ab29005f235cc738eebafb7d9ed6fe70b">forward</a> ();
 
283
<a name="l00181"></a>00181     }
 
284
<a name="l00182"></a>00182     <span class="keywordflow">else</span> <span class="keywordflow">if</span> ((page_res_it.<a class="code" href="a00482.html#a312f58081e9410962396160eaf5f90e4">row</a> ()-&gt;<a class="code" href="a00522.html#aad86ecebb53eb63c4038c5a95d932eea">char_count</a> &gt; 0) &amp;&amp;
 
285
<a name="l00183"></a>00183       ((page_res_it.<a class="code" href="a00482.html#a312f58081e9410962396160eaf5f90e4">row</a> ()-&gt;<a class="code" href="a00522.html#afe7c692eeef6f934ef1c3bd208dd115c">rej_count</a> /
 
286
<a name="l00184"></a>00184       (float) page_res_it.<a class="code" href="a00482.html#a312f58081e9410962396160eaf5f90e4">row</a> ()-&gt;<a class="code" href="a00522.html#aad86ecebb53eb63c4038c5a95d932eea">char_count</a>) &lt;=
 
287
<a name="l00185"></a>00185     <a class="code" href="a00607.html#a4a2be7907ac8c08a04d704b316b2ee1c">quality_rowrej_pc</a>)) {
 
288
<a name="l00186"></a>00186       word = page_res_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a> ();
 
289
<a name="l00187"></a>00187       <span class="keywordflow">if</span> (word-&gt;<a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#aabdc95245f033b2e393eccf79d4641e5">quality_recoverable_rejects</a>() &amp;&amp;
 
290
<a name="l00188"></a>00188           (<a class="code" href="a00607.html#aec01ad16a705e92500c6dc8f209ab9f8">tessedit_unrej_any_wd</a> ||
 
291
<a name="l00189"></a>00189            <a class="code" href="a00607.html#a51f2ad8b2d849d90bbcfbfca3067c0f1">acceptable_word_string</a>(*word-&gt;<a class="code" href="a00650.html#a053a2aeeddc26a93a41a04fa08dd688a">uch_set</a>,
 
292
<a name="l00190"></a>00190                                   word-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>(),
 
293
<a name="l00191"></a>00191                                   word-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a93ab94b6fa6d5c0469fb52d66c62968c">unichar_lengths</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>())
 
294
<a name="l00192"></a>00192                != <a class="code" href="a00671.html#af9ff5a7d7a1b9eeea7ce4106bffa7eeeaab0741a5a5ff9757ca343af255353ba0" title="Unacceptable word.">AC_UNACCEPTABLE</a>)) {
 
295
<a name="l00193"></a>00193         <a class="code" href="a00607.html#ab156ed65c76c10dadacfc1121624ebc2">unrej_good_chs</a>(word, page_res_it.<a class="code" href="a00482.html#a312f58081e9410962396160eaf5f90e4">row</a> ()-&gt;<a class="code" href="a00522.html#ad3a856f3e9217c47b8f2f54cd3908721">row</a>);
 
296
<a name="l00194"></a>00194       }
 
297
<a name="l00195"></a>00195       page_res_it.<a class="code" href="a00482.html#ab29005f235cc738eebafb7d9ed6fe70b">forward</a> ();
 
298
<a name="l00196"></a>00196     }
 
299
<a name="l00197"></a>00197     <span class="keywordflow">else</span> {
 
300
<a name="l00198"></a>00198       <span class="comment">/* Skip to end of dodgy row */</span>
 
301
<a name="l00199"></a>00199       current_row = page_res_it.<a class="code" href="a00482.html#a312f58081e9410962396160eaf5f90e4">row</a> ();
 
302
<a name="l00200"></a>00200       <span class="keywordflow">while</span> ((page_res_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a> () != NULL) &amp;&amp;
 
303
<a name="l00201"></a>00201         (page_res_it.<a class="code" href="a00482.html#a312f58081e9410962396160eaf5f90e4">row</a> () == current_row))
 
304
<a name="l00202"></a>00202         page_res_it.<a class="code" href="a00482.html#ab29005f235cc738eebafb7d9ed6fe70b">forward</a> ();
 
305
<a name="l00203"></a>00203     }
 
306
<a name="l00204"></a>00204     <a class="code" href="a00607.html#a0751722a5b26971d680f2d23c6e65b95">check_debug_pt</a> (page_res_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a> (), 110);
 
307
<a name="l00205"></a>00205   }
 
308
<a name="l00206"></a>00206   page_res_it.<a class="code" href="a00482.html#a15febd45d0904bb3c5f7d506368177f0">restart_page</a> ();
 
309
<a name="l00207"></a>00207   page_res_it.<a class="code" href="a00482.html#aab221a373111c4be685444b5633e22a5">page_res</a>-&gt;<a class="code" href="a00481.html#a489c2f236ddd8271ce74950abb2d0881">char_count</a> = 0;
 
310
<a name="l00208"></a>00208   page_res_it.<a class="code" href="a00482.html#aab221a373111c4be685444b5633e22a5">page_res</a>-&gt;<a class="code" href="a00481.html#a173220902fa987fe73708fae5f2c9560">rej_count</a> = 0;
 
311
<a name="l00209"></a>00209   current_block = NULL;
 
312
<a name="l00210"></a>00210   current_row = NULL;
 
313
<a name="l00211"></a>00211   <span class="keywordflow">while</span> (page_res_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a> () != NULL) {
 
314
<a name="l00212"></a>00212     <span class="keywordflow">if</span> (current_block != page_res_it.<a class="code" href="a00482.html#a8a349c5163802e1f127d5c88db044059">block</a> ()) {
 
315
<a name="l00213"></a>00213       current_block = page_res_it.<a class="code" href="a00482.html#a8a349c5163802e1f127d5c88db044059">block</a> ();
 
316
<a name="l00214"></a>00214       current_block-&gt;<a class="code" href="a00283.html#a9b394a8aa95834346c8bc02f0493933b">char_count</a> = 0;
 
317
<a name="l00215"></a>00215       current_block-&gt;<a class="code" href="a00283.html#a508dffc62c4a514859372e0a9a6530ca">rej_count</a> = 0;
 
318
<a name="l00216"></a>00216     }
 
319
<a name="l00217"></a>00217     <span class="keywordflow">if</span> (current_row != page_res_it.<a class="code" href="a00482.html#a312f58081e9410962396160eaf5f90e4">row</a> ()) {
 
320
<a name="l00218"></a>00218       current_row = page_res_it.<a class="code" href="a00482.html#a312f58081e9410962396160eaf5f90e4">row</a> ();
 
321
<a name="l00219"></a>00219       current_row-&gt;<a class="code" href="a00522.html#aad86ecebb53eb63c4038c5a95d932eea">char_count</a> = 0;
 
322
<a name="l00220"></a>00220       current_row-&gt;<a class="code" href="a00522.html#afe7c692eeef6f934ef1c3bd208dd115c">rej_count</a> = 0;
 
323
<a name="l00221"></a>00221       current_row-&gt;<a class="code" href="a00522.html#aee04e075058db382613b9fd2d6302d1a">whole_word_rej_count</a> = 0;
 
324
<a name="l00222"></a>00222     }
 
325
<a name="l00223"></a>00223     page_res_it.<a class="code" href="a00482.html#a1d1af03a63da4b7f551770e07ca86414">rej_stat_word</a> ();
 
326
<a name="l00224"></a>00224     page_res_it.<a class="code" href="a00482.html#ab29005f235cc738eebafb7d9ed6fe70b">forward</a> ();
 
327
<a name="l00225"></a>00225   }
 
328
<a name="l00226"></a>00226 }
 
329
<a name="l00227"></a>00227 
 
330
<a name="l00228"></a>00228 
 
331
<a name="l00229"></a>00229 <span class="comment">/*************************************************************************</span>
 
332
<a name="l00230"></a>00230 <span class="comment"> * doc_and_block_rejection()</span>
 
333
<a name="l00231"></a>00231 <span class="comment"> *</span>
 
334
<a name="l00232"></a>00232 <span class="comment"> * If the page has too many rejects - reject all of it.</span>
 
335
<a name="l00233"></a>00233 <span class="comment"> * If any block has too many rejects - reject all words in the block</span>
 
336
<a name="l00234"></a>00234 <span class="comment"> *************************************************************************/</span>
 
337
<a name="l00235"></a>00235 
 
338
<a name="l00236"></a><a class="code" href="a00607.html#a72b538b58cc5af58b35a390c50086ab5">00236</a> <span class="keywordtype">void</span> <a class="code" href="a00607.html#a72b538b58cc5af58b35a390c50086ab5">Tesseract::doc_and_block_rejection</a>(  <span class="comment">//reject big chunks</span>
 
339
<a name="l00237"></a>00237                                         <a class="code" href="a00482.html">PAGE_RES_IT</a> &amp;page_res_it,
 
340
<a name="l00238"></a>00238                                         <a class="code" href="a00831.html#a7712a7e28433d0ade59219a129549b6f">BOOL8</a> good_quality_doc) {
 
341
<a name="l00239"></a>00239   <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> block_no = 0;
 
342
<a name="l00240"></a>00240   <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> row_no = 0;
 
343
<a name="l00241"></a>00241   <a class="code" href="a00283.html">BLOCK_RES</a> *current_block;
 
344
<a name="l00242"></a>00242   <a class="code" href="a00522.html">ROW_RES</a> *current_row;
 
345
<a name="l00243"></a>00243 
 
346
<a name="l00244"></a>00244   <a class="code" href="a00831.html#a7712a7e28433d0ade59219a129549b6f">BOOL8</a> rej_word;
 
347
<a name="l00245"></a>00245   <a class="code" href="a00831.html#a7712a7e28433d0ade59219a129549b6f">BOOL8</a> prev_word_rejected;
 
348
<a name="l00246"></a>00246   <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> char_quality = 0;
 
349
<a name="l00247"></a>00247   <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> accepted_char_quality;
 
350
<a name="l00248"></a>00248 
 
351
<a name="l00249"></a>00249   <span class="keywordflow">if</span> (page_res_it.<a class="code" href="a00482.html#aab221a373111c4be685444b5633e22a5">page_res</a>-&gt;<a class="code" href="a00481.html#a173220902fa987fe73708fae5f2c9560">rej_count</a> * 100.0 /
 
352
<a name="l00250"></a>00250       page_res_it.<a class="code" href="a00482.html#aab221a373111c4be685444b5633e22a5">page_res</a>-&gt;<a class="code" href="a00481.html#a489c2f236ddd8271ce74950abb2d0881">char_count</a> &gt; <a class="code" href="a00607.html#a0c689f16fb64963ca30f7b0fddbea414">tessedit_reject_doc_percent</a>) {
 
353
<a name="l00251"></a>00251     <a class="code" href="a00677.html#a1b9934928e2b953ee5e91abb58cf043f">reject_whole_page</a>(page_res_it);
 
354
<a name="l00252"></a>00252     <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a9771a56b87e31325c043fa195d6885ec">tessedit_debug_doc_rejection</a>) {
 
355
<a name="l00253"></a>00253       <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a>(<span class="stringliteral">&quot;REJECT ALL #chars: %d #Rejects: %d; \n&quot;</span>,
 
356
<a name="l00254"></a>00254               page_res_it.<a class="code" href="a00482.html#aab221a373111c4be685444b5633e22a5">page_res</a>-&gt;<a class="code" href="a00481.html#a489c2f236ddd8271ce74950abb2d0881">char_count</a>,
 
357
<a name="l00255"></a>00255               page_res_it.<a class="code" href="a00482.html#aab221a373111c4be685444b5633e22a5">page_res</a>-&gt;<a class="code" href="a00481.html#a173220902fa987fe73708fae5f2c9560">rej_count</a>);
 
358
<a name="l00256"></a>00256     }
 
359
<a name="l00257"></a>00257   } <span class="keywordflow">else</span> {
 
360
<a name="l00258"></a>00258     <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a9771a56b87e31325c043fa195d6885ec">tessedit_debug_doc_rejection</a>) {
 
361
<a name="l00259"></a>00259       <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a>(<span class="stringliteral">&quot;NO PAGE REJECTION #chars: %d  # Rejects: %d; \n&quot;</span>,
 
362
<a name="l00260"></a>00260               page_res_it.<a class="code" href="a00482.html#aab221a373111c4be685444b5633e22a5">page_res</a>-&gt;<a class="code" href="a00481.html#a489c2f236ddd8271ce74950abb2d0881">char_count</a>,
 
363
<a name="l00261"></a>00261               page_res_it.<a class="code" href="a00482.html#aab221a373111c4be685444b5633e22a5">page_res</a>-&gt;<a class="code" href="a00481.html#a173220902fa987fe73708fae5f2c9560">rej_count</a>);
 
364
<a name="l00262"></a>00262     }
 
365
<a name="l00263"></a>00263 
 
366
<a name="l00264"></a>00264     <span class="comment">/* Walk blocks testing for block rejection */</span>
 
367
<a name="l00265"></a>00265 
 
368
<a name="l00266"></a>00266     page_res_it.<a class="code" href="a00482.html#a15febd45d0904bb3c5f7d506368177f0">restart_page</a>();
 
369
<a name="l00267"></a>00267     <a class="code" href="a00650.html">WERD_RES</a>* word;
 
370
<a name="l00268"></a>00268     <span class="keywordflow">while</span> ((word = page_res_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a>()) != NULL) {
 
371
<a name="l00269"></a>00269       current_block = page_res_it.<a class="code" href="a00482.html#a8a349c5163802e1f127d5c88db044059">block</a>();
 
372
<a name="l00270"></a>00270       block_no = current_block-&gt;<a class="code" href="a00283.html#ae7c240d4878247ebdfe78433446751bf">block</a>-&gt;<a class="code" href="a00500.html#acc3cf97b51fa93a659312c966df4a756">index</a>();
 
373
<a name="l00271"></a>00271       <span class="keywordflow">if</span> (current_block-&gt;<a class="code" href="a00283.html#a9b394a8aa95834346c8bc02f0493933b">char_count</a> &gt; 0 &amp;&amp;
 
374
<a name="l00272"></a>00272           (current_block-&gt;<a class="code" href="a00283.html#a508dffc62c4a514859372e0a9a6530ca">rej_count</a> * 100.0 / current_block-&gt;<a class="code" href="a00283.html#a9b394a8aa95834346c8bc02f0493933b">char_count</a>) &gt;
 
375
<a name="l00273"></a>00273            <a class="code" href="a00607.html#ae1f650e4873b5842f2ae57a3055868c9">tessedit_reject_block_percent</a>) {
 
376
<a name="l00274"></a>00274         <span class="keywordflow">if</span> (<a class="code" href="a00607.html#afe2ceb83896ec93f0add94703dee79ce">tessedit_debug_block_rejection</a>) {
 
377
<a name="l00275"></a>00275           <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a>(<span class="stringliteral">&quot;REJECTING BLOCK %d  #chars: %d;  #Rejects: %d\n&quot;</span>,
 
378
<a name="l00276"></a>00276                   block_no, current_block-&gt;<a class="code" href="a00283.html#a9b394a8aa95834346c8bc02f0493933b">char_count</a>,
 
379
<a name="l00277"></a>00277                   current_block-&gt;<a class="code" href="a00283.html#a508dffc62c4a514859372e0a9a6530ca">rej_count</a>);
 
380
<a name="l00278"></a>00278         }
 
381
<a name="l00279"></a>00279         prev_word_rejected = FALSE;
 
382
<a name="l00280"></a>00280         <span class="keywordflow">while</span> ((word = page_res_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a>()) != NULL &amp;&amp;
 
383
<a name="l00281"></a>00281                (page_res_it.<a class="code" href="a00482.html#a8a349c5163802e1f127d5c88db044059">block</a>() == current_block)) {
 
384
<a name="l00282"></a>00282           <span class="keywordflow">if</span> (<a class="code" href="a00607.html#acf0428aa8b6b2390e5ba2c5f185b7f49">tessedit_preserve_blk_rej_perfect_wds</a>) {
 
385
<a name="l00283"></a>00283             rej_word = word-&gt;<a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#ac8e3f1806f06dfb616d76f68f1b96bc6">reject_count</a>() &gt; 0 ||
 
386
<a name="l00284"></a>00284                 word-&gt;<a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#ab3760d96d63b5ca6c4bfeb32ffc8183c">length</a> () &lt; <a class="code" href="a00607.html#a98362ec97923314723dbeb56fd2528fd">tessedit_preserve_min_wd_len</a>;
 
387
<a name="l00285"></a>00285             <span class="keywordflow">if</span> (rej_word &amp;&amp; <a class="code" href="a00607.html#a00cc24fc46a2c217ff7e614f4ac0db39">tessedit_dont_blkrej_good_wds</a> &amp;&amp;
 
388
<a name="l00286"></a>00286                 word-&gt;<a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#ab3760d96d63b5ca6c4bfeb32ffc8183c">length</a>() &gt;= <a class="code" href="a00607.html#a98362ec97923314723dbeb56fd2528fd">tessedit_preserve_min_wd_len</a> &amp;&amp;
 
389
<a name="l00287"></a>00287                 <a class="code" href="a00607.html#a51f2ad8b2d849d90bbcfbfca3067c0f1">acceptable_word_string</a>(
 
390
<a name="l00288"></a>00288                     *word-&gt;<a class="code" href="a00650.html#a053a2aeeddc26a93a41a04fa08dd688a">uch_set</a>,
 
391
<a name="l00289"></a>00289                     word-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>(),
 
392
<a name="l00290"></a>00290                     word-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a93ab94b6fa6d5c0469fb52d66c62968c">unichar_lengths</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>()) !=
 
393
<a name="l00291"></a>00291                 <a class="code" href="a00671.html#af9ff5a7d7a1b9eeea7ce4106bffa7eeeaab0741a5a5ff9757ca343af255353ba0" title="Unacceptable word.">AC_UNACCEPTABLE</a>) {
 
394
<a name="l00292"></a>00292               <a class="code" href="a00607.html#a348314a50f786908e27bbaa3733d14d4">word_char_quality</a>(word, page_res_it.<a class="code" href="a00482.html#a312f58081e9410962396160eaf5f90e4">row</a>()-&gt;<a class="code" href="a00522.html#ad3a856f3e9217c47b8f2f54cd3908721">row</a>,
 
395
<a name="l00293"></a>00293                                 &amp;char_quality,
 
396
<a name="l00294"></a>00294                                 &amp;accepted_char_quality);
 
397
<a name="l00295"></a>00295               rej_word = char_quality !=  word-&gt;<a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#ab3760d96d63b5ca6c4bfeb32ffc8183c">length</a>();
 
398
<a name="l00296"></a>00296             }
 
399
<a name="l00297"></a>00297           } <span class="keywordflow">else</span> {
 
400
<a name="l00298"></a>00298             rej_word = TRUE;
 
401
<a name="l00299"></a>00299           }
 
402
<a name="l00300"></a>00300           <span class="keywordflow">if</span> (rej_word) {
 
403
<a name="l00301"></a>00301             <span class="comment">/*</span>
 
404
<a name="l00302"></a>00302 <span class="comment">              Reject spacing if both current and prev words are rejected.</span>
 
405
<a name="l00303"></a>00303 <span class="comment">              NOTE - this is NOT restricted to FUZZY spaces. - When tried this</span>
 
406
<a name="l00304"></a>00304 <span class="comment">              generated more space errors.</span>
 
407
<a name="l00305"></a>00305 <span class="comment">            */</span>
 
408
<a name="l00306"></a>00306             <span class="keywordflow">if</span> (<a class="code" href="a00607.html#aad38132f39f060b3bf998f316abb4d76">tessedit_use_reject_spaces</a> &amp;&amp;
 
409
<a name="l00307"></a>00307                 prev_word_rejected &amp;&amp;
 
410
<a name="l00308"></a>00308                 page_res_it.<a class="code" href="a00482.html#a67bcb98908ab79b0a00e1d0823208a23">prev_row</a>() == page_res_it.<a class="code" href="a00482.html#a312f58081e9410962396160eaf5f90e4">row</a>() &amp;&amp;
 
411
<a name="l00309"></a>00309                 word-&gt;<a class="code" href="a00650.html#afc15fa7c1b5ad55d6772dea0d6865df1">word</a>-&gt;<a class="code" href="a00648.html#ab2979974cf23f5c66f4e77cc51f2e189">space</a>() == 1)
 
412
<a name="l00310"></a>00310               word-&gt;<a class="code" href="a00650.html#a6164dbe5d7d34658c64682be24ea257a">reject_spaces</a> = TRUE;
 
413
<a name="l00311"></a>00311             word-&gt;<a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#aa86752e4e9277f2a616df08459ea08e3">rej_word_block_rej</a>();
 
414
<a name="l00312"></a>00312           }
 
415
<a name="l00313"></a>00313           prev_word_rejected = rej_word;
 
416
<a name="l00314"></a>00314           page_res_it.<a class="code" href="a00482.html#ab29005f235cc738eebafb7d9ed6fe70b">forward</a>();
 
417
<a name="l00315"></a>00315         }
 
418
<a name="l00316"></a>00316       } <span class="keywordflow">else</span> {
 
419
<a name="l00317"></a>00317         <span class="keywordflow">if</span> (<a class="code" href="a00607.html#afe2ceb83896ec93f0add94703dee79ce">tessedit_debug_block_rejection</a>) {
 
420
<a name="l00318"></a>00318           <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a>(<span class="stringliteral">&quot;NOT REJECTING BLOCK %d #chars: %d  # Rejects: %d; \n&quot;</span>,
 
421
<a name="l00319"></a>00319                   block_no, page_res_it.<a class="code" href="a00482.html#a8a349c5163802e1f127d5c88db044059">block</a>()-&gt;<a class="code" href="a00283.html#a9b394a8aa95834346c8bc02f0493933b">char_count</a>,
 
422
<a name="l00320"></a>00320                   page_res_it.<a class="code" href="a00482.html#a8a349c5163802e1f127d5c88db044059">block</a>()-&gt;<a class="code" href="a00283.html#a508dffc62c4a514859372e0a9a6530ca">rej_count</a>);
 
423
<a name="l00321"></a>00321         }
 
424
<a name="l00322"></a>00322 
 
425
<a name="l00323"></a>00323         <span class="comment">/* Walk rows in block testing for row rejection */</span>
 
426
<a name="l00324"></a>00324         row_no = 0;
 
427
<a name="l00325"></a>00325         <span class="keywordflow">while</span> ((word = page_res_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a>()) != NULL &amp;&amp;
 
428
<a name="l00326"></a>00326                page_res_it.<a class="code" href="a00482.html#a8a349c5163802e1f127d5c88db044059">block</a>() == current_block) {
 
429
<a name="l00327"></a>00327           current_row = page_res_it.<a class="code" href="a00482.html#a312f58081e9410962396160eaf5f90e4">row</a>();
 
430
<a name="l00328"></a>00328           row_no++;
 
431
<a name="l00329"></a>00329           <span class="comment">/* Reject whole row if:</span>
 
432
<a name="l00330"></a>00330 <span class="comment">            fraction of chars on row which are rejected exceed a limit AND</span>
 
433
<a name="l00331"></a>00331 <span class="comment">            fraction rejects which occur in WHOLE WERD rejects is LESS THAN a</span>
 
434
<a name="l00332"></a>00332 <span class="comment">            limit</span>
 
435
<a name="l00333"></a>00333 <span class="comment">          */</span>
 
436
<a name="l00334"></a>00334           <span class="keywordflow">if</span> (current_row-&gt;<a class="code" href="a00522.html#aad86ecebb53eb63c4038c5a95d932eea">char_count</a> &gt; 0 &amp;&amp;
 
437
<a name="l00335"></a>00335               (current_row-&gt;<a class="code" href="a00522.html#afe7c692eeef6f934ef1c3bd208dd115c">rej_count</a> * 100.0 / current_row-&gt;<a class="code" href="a00522.html#aad86ecebb53eb63c4038c5a95d932eea">char_count</a>) &gt;
 
438
<a name="l00336"></a>00336               <a class="code" href="a00607.html#a73cfd99b1d3cd76679efa1a19a4e1d1a">tessedit_reject_row_percent</a> &amp;&amp;
 
439
<a name="l00337"></a>00337               (current_row-&gt;<a class="code" href="a00522.html#aee04e075058db382613b9fd2d6302d1a">whole_word_rej_count</a> * 100.0 /
 
440
<a name="l00338"></a>00338                   current_row-&gt;<a class="code" href="a00522.html#afe7c692eeef6f934ef1c3bd208dd115c">rej_count</a>) &lt;
 
441
<a name="l00339"></a>00339               <a class="code" href="a00607.html#a7124d6d656fc163de05f88b6e31e9681">tessedit_whole_wd_rej_row_percent</a>) {
 
442
<a name="l00340"></a>00340             <span class="keywordflow">if</span> (<a class="code" href="a00607.html#afe2ceb83896ec93f0add94703dee79ce">tessedit_debug_block_rejection</a>) {
 
443
<a name="l00341"></a>00341               <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a>(<span class="stringliteral">&quot;REJECTING ROW %d  #chars: %d;  #Rejects: %d\n&quot;</span>,
 
444
<a name="l00342"></a>00342                       row_no, current_row-&gt;<a class="code" href="a00522.html#aad86ecebb53eb63c4038c5a95d932eea">char_count</a>,
 
445
<a name="l00343"></a>00343                       current_row-&gt;<a class="code" href="a00522.html#afe7c692eeef6f934ef1c3bd208dd115c">rej_count</a>);
 
446
<a name="l00344"></a>00344             }
 
447
<a name="l00345"></a>00345             prev_word_rejected = FALSE;
 
448
<a name="l00346"></a>00346             <span class="keywordflow">while</span> ((word = page_res_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a>()) != NULL &amp;&amp;
 
449
<a name="l00347"></a>00347                    page_res_it.<a class="code" href="a00482.html#a312f58081e9410962396160eaf5f90e4">row</a> () == current_row) {
 
450
<a name="l00348"></a>00348               <span class="comment">/* Preserve words on good docs unless they are mostly rejected*/</span>
 
451
<a name="l00349"></a>00349               <span class="keywordflow">if</span> (!<a class="code" href="a00607.html#ade40da20d4ed20ac262fb25b50757623">tessedit_row_rej_good_docs</a> &amp;&amp; good_quality_doc) {
 
452
<a name="l00350"></a>00350                 rej_word = word-&gt;<a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#ac8e3f1806f06dfb616d76f68f1b96bc6">reject_count</a>() /
 
453
<a name="l00351"></a>00351                     <span class="keyword">static_cast&lt;</span><span class="keywordtype">float</span><span class="keyword">&gt;</span>(word-&gt;<a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#ab3760d96d63b5ca6c4bfeb32ffc8183c">length</a>()) &gt;
 
454
<a name="l00352"></a>00352                     <a class="code" href="a00607.html#afe905eb6b5f607815ccff87755af5d30">tessedit_good_doc_still_rowrej_wd</a>;
 
455
<a name="l00353"></a>00353               } <span class="keywordflow">else</span> <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a1ac173c27cc204ef8efa50dc7476296a">tessedit_preserve_row_rej_perfect_wds</a>) {
 
456
<a name="l00354"></a>00354                 <span class="comment">/* Preserve perfect words anyway */</span>
 
457
<a name="l00355"></a>00355                 rej_word = word-&gt;<a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#ac8e3f1806f06dfb616d76f68f1b96bc6">reject_count</a>() &gt; 0 ||
 
458
<a name="l00356"></a>00356                     word-&gt;<a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#ab3760d96d63b5ca6c4bfeb32ffc8183c">length</a> () &lt; <a class="code" href="a00607.html#a98362ec97923314723dbeb56fd2528fd">tessedit_preserve_min_wd_len</a>;
 
459
<a name="l00357"></a>00357                 <span class="keywordflow">if</span> (rej_word &amp;&amp; <a class="code" href="a00607.html#a1ede84f4a4f39a9b6f5376ea36907b98">tessedit_dont_rowrej_good_wds</a> &amp;&amp;
 
460
<a name="l00358"></a>00358                     word-&gt;<a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#ab3760d96d63b5ca6c4bfeb32ffc8183c">length</a>() &gt;= <a class="code" href="a00607.html#a98362ec97923314723dbeb56fd2528fd">tessedit_preserve_min_wd_len</a> &amp;&amp;
 
461
<a name="l00359"></a>00359                     <a class="code" href="a00607.html#a51f2ad8b2d849d90bbcfbfca3067c0f1">acceptable_word_string</a>(*word-&gt;<a class="code" href="a00650.html#a053a2aeeddc26a93a41a04fa08dd688a">uch_set</a>,
 
462
<a name="l00360"></a>00360                         word-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>(),
 
463
<a name="l00361"></a>00361                         word-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a93ab94b6fa6d5c0469fb52d66c62968c">unichar_lengths</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>()) !=
 
464
<a name="l00362"></a>00362                             <a class="code" href="a00671.html#af9ff5a7d7a1b9eeea7ce4106bffa7eeeaab0741a5a5ff9757ca343af255353ba0" title="Unacceptable word.">AC_UNACCEPTABLE</a>) {
 
465
<a name="l00363"></a>00363                   <a class="code" href="a00607.html#a348314a50f786908e27bbaa3733d14d4">word_char_quality</a>(word, page_res_it.<a class="code" href="a00482.html#a312f58081e9410962396160eaf5f90e4">row</a>()-&gt;<a class="code" href="a00522.html#ad3a856f3e9217c47b8f2f54cd3908721">row</a>,
 
466
<a name="l00364"></a>00364                                     &amp;char_quality,
 
467
<a name="l00365"></a>00365                                     &amp;accepted_char_quality);
 
468
<a name="l00366"></a>00366                   rej_word = char_quality != word-&gt;<a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#ab3760d96d63b5ca6c4bfeb32ffc8183c">length</a>();
 
469
<a name="l00367"></a>00367                 }
 
470
<a name="l00368"></a>00368               } <span class="keywordflow">else</span> {
 
471
<a name="l00369"></a>00369                 rej_word = TRUE;
 
472
<a name="l00370"></a>00370               }
 
473
<a name="l00371"></a>00371               <span class="keywordflow">if</span> (rej_word) {
 
474
<a name="l00372"></a>00372                 <span class="comment">/*</span>
 
475
<a name="l00373"></a>00373 <span class="comment">                  Reject spacing if both current and prev words are rejected.</span>
 
476
<a name="l00374"></a>00374 <span class="comment">                  NOTE - this is NOT restricted to FUZZY spaces. - When tried</span>
 
477
<a name="l00375"></a>00375 <span class="comment">                  this generated more space errors.</span>
 
478
<a name="l00376"></a>00376 <span class="comment">                */</span>
 
479
<a name="l00377"></a>00377                 <span class="keywordflow">if</span> (<a class="code" href="a00607.html#aad38132f39f060b3bf998f316abb4d76">tessedit_use_reject_spaces</a> &amp;&amp;
 
480
<a name="l00378"></a>00378                     prev_word_rejected &amp;&amp;
 
481
<a name="l00379"></a>00379                     page_res_it.<a class="code" href="a00482.html#a67bcb98908ab79b0a00e1d0823208a23">prev_row</a>() == page_res_it.<a class="code" href="a00482.html#a312f58081e9410962396160eaf5f90e4">row</a>() &amp;&amp;
 
482
<a name="l00380"></a>00380                     word-&gt;<a class="code" href="a00650.html#afc15fa7c1b5ad55d6772dea0d6865df1">word</a>-&gt;<a class="code" href="a00648.html#ab2979974cf23f5c66f4e77cc51f2e189">space</a> () == 1)
 
483
<a name="l00381"></a>00381                   word-&gt;<a class="code" href="a00650.html#a6164dbe5d7d34658c64682be24ea257a">reject_spaces</a> = TRUE;
 
484
<a name="l00382"></a>00382                 word-&gt;<a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#a03037d4a12a4ea33a377cf21c9400139">rej_word_row_rej</a>();
 
485
<a name="l00383"></a>00383               }
 
486
<a name="l00384"></a>00384               prev_word_rejected = rej_word;
 
487
<a name="l00385"></a>00385               page_res_it.<a class="code" href="a00482.html#ab29005f235cc738eebafb7d9ed6fe70b">forward</a>();
 
488
<a name="l00386"></a>00386             }
 
489
<a name="l00387"></a>00387           } <span class="keywordflow">else</span> {
 
490
<a name="l00388"></a>00388             <span class="keywordflow">if</span> (<a class="code" href="a00607.html#afe2ceb83896ec93f0add94703dee79ce">tessedit_debug_block_rejection</a>) {
 
491
<a name="l00389"></a>00389               <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a>(<span class="stringliteral">&quot;NOT REJECTING ROW %d #chars: %d  # Rejects: %d; \n&quot;</span>,
 
492
<a name="l00390"></a>00390                       row_no, current_row-&gt;<a class="code" href="a00522.html#aad86ecebb53eb63c4038c5a95d932eea">char_count</a>, current_row-&gt;<a class="code" href="a00522.html#afe7c692eeef6f934ef1c3bd208dd115c">rej_count</a>);
 
493
<a name="l00391"></a>00391             }
 
494
<a name="l00392"></a>00392             <span class="keywordflow">while</span> (page_res_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a>() != NULL &amp;&amp;
 
495
<a name="l00393"></a>00393                    page_res_it.<a class="code" href="a00482.html#a312f58081e9410962396160eaf5f90e4">row</a>() == current_row)
 
496
<a name="l00394"></a>00394               page_res_it.<a class="code" href="a00482.html#ab29005f235cc738eebafb7d9ed6fe70b">forward</a>();
 
497
<a name="l00395"></a>00395           }
 
498
<a name="l00396"></a>00396         }
 
499
<a name="l00397"></a>00397       }
 
500
<a name="l00398"></a>00398     }
 
501
<a name="l00399"></a>00399   }
 
502
<a name="l00400"></a>00400 }
 
503
<a name="l00401"></a>00401 
 
504
<a name="l00402"></a>00402 }  <span class="comment">// namespace tesseract</span>
 
505
<a name="l00403"></a>00403 
 
506
<a name="l00404"></a>00404 
 
507
<a name="l00405"></a>00405 <span class="comment">/*************************************************************************</span>
 
508
<a name="l00406"></a>00406 <span class="comment"> * reject_whole_page()</span>
 
509
<a name="l00407"></a>00407 <span class="comment"> * Dont believe any of it - set the reject map to 00..00 in all words</span>
 
510
<a name="l00408"></a>00408 <span class="comment"> *</span>
 
511
<a name="l00409"></a>00409 <span class="comment"> *************************************************************************/</span>
 
512
<a name="l00410"></a>00410 
 
513
<a name="l00411"></a><a class="code" href="a00678.html#a1b9934928e2b953ee5e91abb58cf043f">00411</a> <span class="keywordtype">void</span> <a class="code" href="a00677.html#a1b9934928e2b953ee5e91abb58cf043f">reject_whole_page</a>(<a class="code" href="a00482.html">PAGE_RES_IT</a> &amp;page_res_it) {
 
514
<a name="l00412"></a>00412   page_res_it.<a class="code" href="a00482.html#a15febd45d0904bb3c5f7d506368177f0">restart_page</a> ();
 
515
<a name="l00413"></a>00413   <span class="keywordflow">while</span> (page_res_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a> () != NULL) {
 
516
<a name="l00414"></a>00414     page_res_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a> ()-&gt;<a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#a9fb4a691f24ea06370b55650c197db8b">rej_word_doc_rej</a> ();
 
517
<a name="l00415"></a>00415     page_res_it.<a class="code" href="a00482.html#ab29005f235cc738eebafb7d9ed6fe70b">forward</a> ();
 
518
<a name="l00416"></a>00416   }
 
519
<a name="l00417"></a>00417                                  <span class="comment">//whole page is rejected</span>
 
520
<a name="l00418"></a>00418   page_res_it.<a class="code" href="a00482.html#aab221a373111c4be685444b5633e22a5">page_res</a>-&gt;<a class="code" href="a00481.html#a65ed9c199c2ef8b3dc464225ad893e7d">rejected</a> = TRUE;
 
521
<a name="l00419"></a>00419 }
 
522
<a name="l00420"></a>00420 
 
523
<a name="l00421"></a>00421 <span class="keyword">namespace </span><a class="code" href="a01266.html#afed58feacb84df2de88bdd613cfdba6d">tesseract</a> {
 
524
<a name="l00422"></a><a class="code" href="a00607.html#a2c77d5d94321d7fc717b5376bcb02b79">00422</a> <span class="keywordtype">void</span> <a class="code" href="a00607.html#a2c77d5d94321d7fc717b5376bcb02b79">Tesseract::tilde_crunch</a>(<a class="code" href="a00482.html">PAGE_RES_IT</a> &amp;page_res_it) {
 
525
<a name="l00423"></a>00423   <a class="code" href="a00650.html">WERD_RES</a> *word;
 
526
<a name="l00424"></a>00424   <a class="code" href="a00678.html#ac4a411895d63e3bb0bea788e92894d42">GARBAGE_LEVEL</a> garbage_level;
 
527
<a name="l00425"></a>00425   <a class="code" href="a00482.html">PAGE_RES_IT</a> copy_it;
 
528
<a name="l00426"></a>00426   <a class="code" href="a00831.html#a7712a7e28433d0ade59219a129549b6f">BOOL8</a> prev_potential_marked = FALSE;
 
529
<a name="l00427"></a>00427   <a class="code" href="a00831.html#a7712a7e28433d0ade59219a129549b6f">BOOL8</a> found_terrible_word = FALSE;
 
530
<a name="l00428"></a>00428   <a class="code" href="a00831.html#a7712a7e28433d0ade59219a129549b6f">BOOL8</a> ok_dict_word;
 
531
<a name="l00429"></a>00429 
 
532
<a name="l00430"></a>00430   page_res_it.<a class="code" href="a00482.html#a15febd45d0904bb3c5f7d506368177f0">restart_page</a>();
 
533
<a name="l00431"></a>00431   <span class="keywordflow">while</span> (page_res_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a>() != NULL) {
 
534
<a name="l00432"></a>00432     <a class="code" href="a00505.html">POLY_BLOCK</a>* pb = page_res_it.<a class="code" href="a00482.html#a8a349c5163802e1f127d5c88db044059">block</a>()-&gt;<a class="code" href="a00283.html#ae7c240d4878247ebdfe78433446751bf">block</a>-&gt;<a class="code" href="a00500.html#a6670779c69aca2d574e4a0590d9b3939">poly_block</a>();
 
535
<a name="l00433"></a>00433     <span class="keywordflow">if</span> (pb != NULL &amp;&amp; !pb-&gt;<a class="code" href="a00505.html#abd32dee532afe634cdbacffc0b53e660">IsText</a>()) {
 
536
<a name="l00434"></a>00434       page_res_it.<a class="code" href="a00482.html#ab29005f235cc738eebafb7d9ed6fe70b">forward</a>();
 
537
<a name="l00435"></a>00435       <span class="keywordflow">continue</span>;
 
538
<a name="l00436"></a>00436     }
 
539
<a name="l00437"></a>00437     word = page_res_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a>();
 
540
<a name="l00438"></a>00438 
 
541
<a name="l00439"></a>00439     <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a49a94ed873a2ddeea43f4b12a8774222">crunch_early_convert_bad_unlv_chs</a>)
 
542
<a name="l00440"></a>00440       <a class="code" href="a00607.html#a81b3e2c28a090f5a877dd0f67a4080b6">convert_bad_unlv_chs</a>(word);
 
543
<a name="l00441"></a>00441 
 
544
<a name="l00442"></a>00442     <span class="keywordflow">if</span> (<a class="code" href="a00607.html#aefd718b3d3abe04fba6ff083019f8374">crunch_early_merge_tess_fails</a>)
 
545
<a name="l00443"></a>00443       word-&gt;<a class="code" href="a00650.html#af092edf58a4a718687aa35c6a458cada">merge_tess_fails</a>();
 
546
<a name="l00444"></a>00444 
 
547
<a name="l00445"></a>00445     <span class="keywordflow">if</span> (word-&gt;<a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#aec0cef1513cd939f0db5f5436f8fdbc6">accept_count</a> () != 0) {
 
548
<a name="l00446"></a>00446       found_terrible_word = FALSE;
 
549
<a name="l00447"></a>00447                                  <span class="comment">//Forget earlier potential crunches</span>
 
550
<a name="l00448"></a>00448       prev_potential_marked = FALSE;
 
551
<a name="l00449"></a>00449     }
 
552
<a name="l00450"></a>00450     <span class="keywordflow">else</span> {
 
553
<a name="l00451"></a>00451       ok_dict_word = <a class="code" href="a00607.html#a5c4eea7412e972bf2fdd2638315e5457">safe_dict_word</a>(word);
 
554
<a name="l00452"></a>00452       garbage_level = <a class="code" href="a00607.html#afad44e1105d7f57e1a99d9076c4a3b25">garbage_word</a> (word, ok_dict_word);
 
555
<a name="l00453"></a>00453 
 
556
<a name="l00454"></a>00454       <span class="keywordflow">if</span> ((garbage_level != <a class="code" href="a00678.html#ac4a411895d63e3bb0bea788e92894d42ac744add072d392855d098a995acb3751">G_NEVER_CRUNCH</a>) &amp;&amp;
 
557
<a name="l00455"></a>00455       (<a class="code" href="a00607.html#a12c9ef89f1b328cb272cae894895b2c7">terrible_word_crunch</a> (word, garbage_level))) {
 
558
<a name="l00456"></a>00456         <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a20a1206ce17b07676f829fdee0034eb5">crunch_debug</a> &gt; 0) {
 
559
<a name="l00457"></a>00457           <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a> (<span class="stringliteral">&quot;T CRUNCHING: \&quot;%s\&quot;\n&quot;</span>,
 
560
<a name="l00458"></a>00458             word-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>());
 
561
<a name="l00459"></a>00459         }
 
562
<a name="l00460"></a>00460         word-&gt;<a class="code" href="a00650.html#a82d32a9f59bc1c08da9537303c56615b">unlv_crunch_mode</a> = <a class="code" href="a00769.html#affc9a96d31e23e645355e534f11c40e9ad0198ebd5b9e3299dd94f0c5f4d3e003">CR_KEEP_SPACE</a>;
 
563
<a name="l00461"></a>00461         <span class="keywordflow">if</span> (prev_potential_marked) {
 
564
<a name="l00462"></a>00462           <span class="keywordflow">while</span> (copy_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a> () != word) {
 
565
<a name="l00463"></a>00463             <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a20a1206ce17b07676f829fdee0034eb5">crunch_debug</a> &gt; 0) {
 
566
<a name="l00464"></a>00464               <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a> (<span class="stringliteral">&quot;P1 CRUNCHING: \&quot;%s\&quot;\n&quot;</span>,
 
567
<a name="l00465"></a>00465                 copy_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a>()-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>());
 
568
<a name="l00466"></a>00466             }
 
569
<a name="l00467"></a>00467             copy_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a> ()-&gt;<a class="code" href="a00650.html#a82d32a9f59bc1c08da9537303c56615b">unlv_crunch_mode</a> = <a class="code" href="a00769.html#affc9a96d31e23e645355e534f11c40e9ad0198ebd5b9e3299dd94f0c5f4d3e003">CR_KEEP_SPACE</a>;
 
570
<a name="l00468"></a>00468             copy_it.<a class="code" href="a00482.html#ab29005f235cc738eebafb7d9ed6fe70b">forward</a> ();
 
571
<a name="l00469"></a>00469           }
 
572
<a name="l00470"></a>00470           prev_potential_marked = FALSE;
 
573
<a name="l00471"></a>00471         }
 
574
<a name="l00472"></a>00472         found_terrible_word = TRUE;
 
575
<a name="l00473"></a>00473       }
 
576
<a name="l00474"></a>00474       <span class="keywordflow">else</span> <span class="keywordflow">if</span> ((garbage_level != <a class="code" href="a00678.html#ac4a411895d63e3bb0bea788e92894d42ac744add072d392855d098a995acb3751">G_NEVER_CRUNCH</a>) &amp;&amp;
 
577
<a name="l00475"></a>00475         (<a class="code" href="a00607.html#a042a9bb2b0053f825baf0825addd54fe">potential_word_crunch</a> (word,
 
578
<a name="l00476"></a>00476       garbage_level, ok_dict_word))) {
 
579
<a name="l00477"></a>00477         <span class="keywordflow">if</span> (found_terrible_word) {
 
580
<a name="l00478"></a>00478           <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a20a1206ce17b07676f829fdee0034eb5">crunch_debug</a> &gt; 0) {
 
581
<a name="l00479"></a>00479             <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a> (<span class="stringliteral">&quot;P2 CRUNCHING: \&quot;%s\&quot;\n&quot;</span>,
 
582
<a name="l00480"></a>00480               word-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>());
 
583
<a name="l00481"></a>00481           }
 
584
<a name="l00482"></a>00482           word-&gt;<a class="code" href="a00650.html#a82d32a9f59bc1c08da9537303c56615b">unlv_crunch_mode</a> = <a class="code" href="a00769.html#affc9a96d31e23e645355e534f11c40e9ad0198ebd5b9e3299dd94f0c5f4d3e003">CR_KEEP_SPACE</a>;
 
585
<a name="l00483"></a>00483         }
 
586
<a name="l00484"></a>00484         <span class="keywordflow">else</span> <span class="keywordflow">if</span> (!prev_potential_marked) {
 
587
<a name="l00485"></a>00485           copy_it = page_res_it;
 
588
<a name="l00486"></a>00486           prev_potential_marked = TRUE;
 
589
<a name="l00487"></a>00487           <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a20a1206ce17b07676f829fdee0034eb5">crunch_debug</a> &gt; 1) {
 
590
<a name="l00488"></a>00488             <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a> (<span class="stringliteral">&quot;P3 CRUNCHING: \&quot;%s\&quot;\n&quot;</span>,
 
591
<a name="l00489"></a>00489               word-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>());
 
592
<a name="l00490"></a>00490           }
 
593
<a name="l00491"></a>00491         }
 
594
<a name="l00492"></a>00492       }
 
595
<a name="l00493"></a>00493       <span class="keywordflow">else</span> {
 
596
<a name="l00494"></a>00494         found_terrible_word = FALSE;
 
597
<a name="l00495"></a>00495                                  <span class="comment">//Forget earlier potential crunches</span>
 
598
<a name="l00496"></a>00496         prev_potential_marked = FALSE;
 
599
<a name="l00497"></a>00497         <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a20a1206ce17b07676f829fdee0034eb5">crunch_debug</a> &gt; 2) {
 
600
<a name="l00498"></a>00498           <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a> (<span class="stringliteral">&quot;NO CRUNCH: \&quot;%s\&quot;\n&quot;</span>,
 
601
<a name="l00499"></a>00499             word-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>());
 
602
<a name="l00500"></a>00500         }
 
603
<a name="l00501"></a>00501       }
 
604
<a name="l00502"></a>00502     }
 
605
<a name="l00503"></a>00503     page_res_it.<a class="code" href="a00482.html#ab29005f235cc738eebafb7d9ed6fe70b">forward</a> ();
 
606
<a name="l00504"></a>00504   }
 
607
<a name="l00505"></a>00505 }
 
608
<a name="l00506"></a>00506 
 
609
<a name="l00507"></a>00507 
 
610
<a name="l00508"></a><a class="code" href="a00607.html#a12c9ef89f1b328cb272cae894895b2c7">00508</a> <a class="code" href="a00831.html#a7712a7e28433d0ade59219a129549b6f">BOOL8</a> <a class="code" href="a00607.html#a12c9ef89f1b328cb272cae894895b2c7">Tesseract::terrible_word_crunch</a>(<a class="code" href="a00650.html">WERD_RES</a> *word,
 
611
<a name="l00509"></a>00509                                       <a class="code" href="a00678.html#ac4a411895d63e3bb0bea788e92894d42">GARBAGE_LEVEL</a> garbage_level) {
 
612
<a name="l00510"></a>00510   <span class="keywordtype">float</span> rating_per_ch;
 
613
<a name="l00511"></a>00511   <span class="keywordtype">int</span> adjusted_len;
 
614
<a name="l00512"></a>00512   <span class="keywordtype">int</span> crunch_mode = 0;
 
615
<a name="l00513"></a>00513 
 
616
<a name="l00514"></a>00514   <span class="keywordflow">if</span> ((word-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a168505a533eda60219c93e25adf4cdc5">length</a> () == 0) ||
 
617
<a name="l00515"></a>00515     (strspn (word-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>(), <span class="stringliteral">&quot; &quot;</span>) ==
 
618
<a name="l00516"></a>00516     word-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a168505a533eda60219c93e25adf4cdc5">length</a> ()))
 
619
<a name="l00517"></a>00517     crunch_mode = 1;
 
620
<a name="l00518"></a>00518   <span class="keywordflow">else</span> {
 
621
<a name="l00519"></a>00519     adjusted_len = word-&gt;<a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#ab3760d96d63b5ca6c4bfeb32ffc8183c">length</a> ();
 
622
<a name="l00520"></a>00520     <span class="keywordflow">if</span> (adjusted_len &gt; <a class="code" href="a00607.html#a69c6af52a20db78eb5bfe14a1bc70162">crunch_rating_max</a>)
 
623
<a name="l00521"></a>00521       adjusted_len = <a class="code" href="a00607.html#a69c6af52a20db78eb5bfe14a1bc70162">crunch_rating_max</a>;
 
624
<a name="l00522"></a>00522     rating_per_ch = word-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a52ea24dcb5ff15ad8aee3fd774622216">rating</a> () / adjusted_len;
 
625
<a name="l00523"></a>00523 
 
626
<a name="l00524"></a>00524     <span class="keywordflow">if</span> (rating_per_ch &gt; <a class="code" href="a00607.html#a21a76e19dd03c1f03aad1b922fa3b21a">crunch_terrible_rating</a>)
 
627
<a name="l00525"></a>00525       crunch_mode = 2;
 
628
<a name="l00526"></a>00526     <span class="keywordflow">else</span> <span class="keywordflow">if</span> (<a class="code" href="a00607.html#ae69c2f057f0474b533183f618e27d447">crunch_terrible_garbage</a> &amp;&amp; (garbage_level == <a class="code" href="a00678.html#ac4a411895d63e3bb0bea788e92894d42a2400fb7991d9c94896464522e10c49ad">G_TERRIBLE</a>))
 
629
<a name="l00527"></a>00527       crunch_mode = 3;
 
630
<a name="l00528"></a>00528     <span class="keywordflow">else</span> <span class="keywordflow">if</span> ((word-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a1b25ef2b44d21a7204483a7ca804f293">certainty</a> () &lt; <a class="code" href="a00607.html#aa3b5f0beacd8d0d328e66dfe8b850187">crunch_poor_garbage_cert</a>) &amp;&amp;
 
631
<a name="l00529"></a>00529       (garbage_level != <a class="code" href="a00678.html#ac4a411895d63e3bb0bea788e92894d42a46d03f2b7d080cc8319a43ef4bad82b3">G_OK</a>))
 
632
<a name="l00530"></a>00530       crunch_mode = 4;
 
633
<a name="l00531"></a>00531     <span class="keywordflow">else</span> <span class="keywordflow">if</span> ((rating_per_ch &gt; <a class="code" href="a00607.html#aec4f861a8e55288700ee622d3e885540">crunch_poor_garbage_rate</a>) &amp;&amp;
 
634
<a name="l00532"></a>00532       (garbage_level != <a class="code" href="a00678.html#ac4a411895d63e3bb0bea788e92894d42a46d03f2b7d080cc8319a43ef4bad82b3">G_OK</a>))
 
635
<a name="l00533"></a>00533       crunch_mode = 5;
 
636
<a name="l00534"></a>00534   }
 
637
<a name="l00535"></a>00535   <span class="keywordflow">if</span> (crunch_mode &gt; 0) {
 
638
<a name="l00536"></a>00536     <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a20a1206ce17b07676f829fdee0034eb5">crunch_debug</a> &gt; 2) {
 
639
<a name="l00537"></a>00537       <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a> (<span class="stringliteral">&quot;Terrible_word_crunch (%d) on \&quot;%s\&quot;\n&quot;</span>,
 
640
<a name="l00538"></a>00538         crunch_mode, word-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>());
 
641
<a name="l00539"></a>00539     }
 
642
<a name="l00540"></a>00540     <span class="keywordflow">return</span> TRUE;
 
643
<a name="l00541"></a>00541   }
 
644
<a name="l00542"></a>00542   <span class="keywordflow">else</span>
 
645
<a name="l00543"></a>00543     <span class="keywordflow">return</span> FALSE;
 
646
<a name="l00544"></a>00544 }
 
647
<a name="l00545"></a>00545 
 
648
<a name="l00546"></a><a class="code" href="a00607.html#a042a9bb2b0053f825baf0825addd54fe">00546</a> <a class="code" href="a00831.html#a7712a7e28433d0ade59219a129549b6f">BOOL8</a> <a class="code" href="a00607.html#a042a9bb2b0053f825baf0825addd54fe">Tesseract::potential_word_crunch</a>(<a class="code" href="a00650.html">WERD_RES</a> *word,
 
649
<a name="l00547"></a>00547                                        <a class="code" href="a00678.html#ac4a411895d63e3bb0bea788e92894d42">GARBAGE_LEVEL</a> garbage_level,
 
650
<a name="l00548"></a>00548                                        <a class="code" href="a00831.html#a7712a7e28433d0ade59219a129549b6f">BOOL8</a> ok_dict_word) {
 
651
<a name="l00549"></a>00549   <span class="keywordtype">float</span> rating_per_ch;
 
652
<a name="l00550"></a>00550   <span class="keywordtype">int</span> adjusted_len;
 
653
<a name="l00551"></a>00551   <span class="keyword">const</span> <span class="keywordtype">char</span> *str = word-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>();
 
654
<a name="l00552"></a>00552   <span class="keyword">const</span> <span class="keywordtype">char</span> *lengths = word-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a93ab94b6fa6d5c0469fb52d66c62968c">unichar_lengths</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>();
 
655
<a name="l00553"></a>00553   <a class="code" href="a00831.html#a7712a7e28433d0ade59219a129549b6f">BOOL8</a> word_crunchable;
 
656
<a name="l00554"></a>00554   <span class="keywordtype">int</span> poor_indicator_count = 0;
 
657
<a name="l00555"></a>00555 
 
658
<a name="l00556"></a>00556   word_crunchable = !<a class="code" href="a00607.html#a3b9dd1d5d1a309b0ce4f1ef34c0a8a2a">crunch_leave_accept_strings</a> ||
 
659
<a name="l00557"></a>00557                     word-&gt;<a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#ab3760d96d63b5ca6c4bfeb32ffc8183c">length</a>() &lt; 3 ||
 
660
<a name="l00558"></a>00558                     (<a class="code" href="a00607.html#a51f2ad8b2d849d90bbcfbfca3067c0f1">acceptable_word_string</a>(*word-&gt;<a class="code" href="a00650.html#a053a2aeeddc26a93a41a04fa08dd688a">uch_set</a>,
 
661
<a name="l00559"></a>00559                                             str, lengths) == <a class="code" href="a00671.html#af9ff5a7d7a1b9eeea7ce4106bffa7eeeaab0741a5a5ff9757ca343af255353ba0" title="Unacceptable word.">AC_UNACCEPTABLE</a> &amp;&amp;
 
662
<a name="l00560"></a>00560                      !ok_dict_word);
 
663
<a name="l00561"></a>00561 
 
664
<a name="l00562"></a>00562   adjusted_len = word-&gt;<a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#ab3760d96d63b5ca6c4bfeb32ffc8183c">length</a>();
 
665
<a name="l00563"></a>00563   <span class="keywordflow">if</span> (adjusted_len &gt; 10)
 
666
<a name="l00564"></a>00564     adjusted_len = 10;
 
667
<a name="l00565"></a>00565   rating_per_ch = word-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a52ea24dcb5ff15ad8aee3fd774622216">rating</a>() / adjusted_len;
 
668
<a name="l00566"></a>00566 
 
669
<a name="l00567"></a>00567   <span class="keywordflow">if</span> (rating_per_ch &gt; <a class="code" href="a00607.html#ae98cef95f80d5939eaf80b123fd3c402">crunch_pot_poor_rate</a>) {
 
670
<a name="l00568"></a>00568     <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a20a1206ce17b07676f829fdee0034eb5">crunch_debug</a> &gt; 2) {
 
671
<a name="l00569"></a>00569       <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a>(<span class="stringliteral">&quot;Potential poor rating on \&quot;%s\&quot;\n&quot;</span>,
 
672
<a name="l00570"></a>00570               word-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>());
 
673
<a name="l00571"></a>00571     }
 
674
<a name="l00572"></a>00572     poor_indicator_count++;
 
675
<a name="l00573"></a>00573   }
 
676
<a name="l00574"></a>00574 
 
677
<a name="l00575"></a>00575   <span class="keywordflow">if</span> (word_crunchable &amp;&amp;
 
678
<a name="l00576"></a>00576       word-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a1b25ef2b44d21a7204483a7ca804f293">certainty</a>() &lt; <a class="code" href="a00607.html#aee487bb533118fcd5f1994e54ba86fff">crunch_pot_poor_cert</a>) {
 
679
<a name="l00577"></a>00577     <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a20a1206ce17b07676f829fdee0034eb5">crunch_debug</a> &gt; 2) {
 
680
<a name="l00578"></a>00578       <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a>(<span class="stringliteral">&quot;Potential poor cert on \&quot;%s\&quot;\n&quot;</span>,
 
681
<a name="l00579"></a>00579               word-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>());
 
682
<a name="l00580"></a>00580     }
 
683
<a name="l00581"></a>00581     poor_indicator_count++;
 
684
<a name="l00582"></a>00582   }
 
685
<a name="l00583"></a>00583 
 
686
<a name="l00584"></a>00584   <span class="keywordflow">if</span> (garbage_level != <a class="code" href="a00678.html#ac4a411895d63e3bb0bea788e92894d42a46d03f2b7d080cc8319a43ef4bad82b3">G_OK</a>) {
 
687
<a name="l00585"></a>00585     <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a20a1206ce17b07676f829fdee0034eb5">crunch_debug</a> &gt; 2) {
 
688
<a name="l00586"></a>00586       <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a>(<span class="stringliteral">&quot;Potential garbage on \&quot;%s\&quot;\n&quot;</span>,
 
689
<a name="l00587"></a>00587               word-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>());
 
690
<a name="l00588"></a>00588     }
 
691
<a name="l00589"></a>00589     poor_indicator_count++;
 
692
<a name="l00590"></a>00590   }
 
693
<a name="l00591"></a>00591   <span class="keywordflow">return</span> poor_indicator_count &gt;= <a class="code" href="a00607.html#af5c0420627cd4d8b1273916d8b86c354">crunch_pot_indicators</a>;
 
694
<a name="l00592"></a>00592 }
 
695
<a name="l00593"></a>00593 
 
696
<a name="l00594"></a><a class="code" href="a00607.html#a06a8ab9064d4c6da290c82e7adf53a8c">00594</a> <span class="keywordtype">void</span> <a class="code" href="a00607.html#a06a8ab9064d4c6da290c82e7adf53a8c">Tesseract::tilde_delete</a>(<a class="code" href="a00482.html">PAGE_RES_IT</a> &amp;page_res_it) {
 
697
<a name="l00595"></a>00595   <a class="code" href="a00650.html">WERD_RES</a> *word;
 
698
<a name="l00596"></a>00596   <a class="code" href="a00482.html">PAGE_RES_IT</a> copy_it;
 
699
<a name="l00597"></a>00597   <a class="code" href="a00831.html#a7712a7e28433d0ade59219a129549b6f">BOOL8</a> deleting_from_bol = FALSE;
 
700
<a name="l00598"></a>00598   <a class="code" href="a00831.html#a7712a7e28433d0ade59219a129549b6f">BOOL8</a> marked_delete_point = FALSE;
 
701
<a name="l00599"></a>00599   <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> debug_delete_mode;
 
702
<a name="l00600"></a>00600   <a class="code" href="a00769.html#affc9a96d31e23e645355e534f11c40e9">CRUNCH_MODE</a> delete_mode;
 
703
<a name="l00601"></a>00601   <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> x_debug_delete_mode;
 
704
<a name="l00602"></a>00602   <a class="code" href="a00769.html#affc9a96d31e23e645355e534f11c40e9">CRUNCH_MODE</a> x_delete_mode;
 
705
<a name="l00603"></a>00603 
 
706
<a name="l00604"></a>00604   page_res_it.<a class="code" href="a00482.html#a15febd45d0904bb3c5f7d506368177f0">restart_page</a>();
 
707
<a name="l00605"></a>00605   <span class="keywordflow">while</span> (page_res_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a>() != NULL) {
 
708
<a name="l00606"></a>00606     word = page_res_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a>();
 
709
<a name="l00607"></a>00607 
 
710
<a name="l00608"></a>00608     delete_mode = <a class="code" href="a00607.html#a42d3b5e26902af46296cfdc24bb4b195">word_deletable</a> (word, debug_delete_mode);
 
711
<a name="l00609"></a>00609     <span class="keywordflow">if</span> (delete_mode != <a class="code" href="a00769.html#affc9a96d31e23e645355e534f11c40e9a1eac25752f2c55f1809d11d4ef917ff9">CR_NONE</a>) {
 
712
<a name="l00610"></a>00610       <span class="keywordflow">if</span> (word-&gt;<a class="code" href="a00650.html#afc15fa7c1b5ad55d6772dea0d6865df1">word</a>-&gt;<a class="code" href="a00648.html#a81edde8597a3d9fd8a664d703d332c41">flag</a> (<a class="code" href="a00804.html#ad6968adbf8f2cc44adf333ec96efb0beaff21d7c8c8992120fff35942408a00d2">W_BOL</a>) || deleting_from_bol) {
 
713
<a name="l00611"></a>00611         <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a20a1206ce17b07676f829fdee0034eb5">crunch_debug</a> &gt; 0) {
 
714
<a name="l00612"></a>00612           <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a> (<span class="stringliteral">&quot;BOL CRUNCH DELETING(%d): \&quot;%s\&quot;\n&quot;</span>,
 
715
<a name="l00613"></a>00613             debug_delete_mode,
 
716
<a name="l00614"></a>00614             word-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>());
 
717
<a name="l00615"></a>00615         }
 
718
<a name="l00616"></a>00616         word-&gt;<a class="code" href="a00650.html#a82d32a9f59bc1c08da9537303c56615b">unlv_crunch_mode</a> = delete_mode;
 
719
<a name="l00617"></a>00617         deleting_from_bol = TRUE;
 
720
<a name="l00618"></a>00618       } <span class="keywordflow">else</span> <span class="keywordflow">if</span> (word-&gt;<a class="code" href="a00650.html#afc15fa7c1b5ad55d6772dea0d6865df1">word</a>-&gt;<a class="code" href="a00648.html#a81edde8597a3d9fd8a664d703d332c41">flag</a>(<a class="code" href="a00804.html#ad6968adbf8f2cc44adf333ec96efb0bea62efb985a62d85e014ee5ab039dd50ce">W_EOL</a>)) {
 
721
<a name="l00619"></a>00619         <span class="keywordflow">if</span> (marked_delete_point) {
 
722
<a name="l00620"></a>00620           <span class="keywordflow">while</span> (copy_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a>() != word) {
 
723
<a name="l00621"></a>00621             x_delete_mode = <a class="code" href="a00607.html#a42d3b5e26902af46296cfdc24bb4b195">word_deletable</a> (copy_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a> (),
 
724
<a name="l00622"></a>00622               x_debug_delete_mode);
 
725
<a name="l00623"></a>00623             <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a20a1206ce17b07676f829fdee0034eb5">crunch_debug</a> &gt; 0) {
 
726
<a name="l00624"></a>00624               <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a> (<span class="stringliteral">&quot;EOL CRUNCH DELETING(%d): \&quot;%s\&quot;\n&quot;</span>,
 
727
<a name="l00625"></a>00625                 x_debug_delete_mode,
 
728
<a name="l00626"></a>00626                 copy_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a>()-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>());
 
729
<a name="l00627"></a>00627             }
 
730
<a name="l00628"></a>00628             copy_it.<a class="code" href="a00482.html#ab84b4850200ba8802791c6197c113fd0">word</a> ()-&gt;<a class="code" href="a00650.html#a82d32a9f59bc1c08da9537303c56615b">unlv_crunch_mode</a> = x_delete_mode;
 
731
<a name="l00629"></a>00629             copy_it.<a class="code" href="a00482.html#ab29005f235cc738eebafb7d9ed6fe70b">forward</a> ();
 
732
<a name="l00630"></a>00630           }
 
733
<a name="l00631"></a>00631         }
 
734
<a name="l00632"></a>00632         <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a20a1206ce17b07676f829fdee0034eb5">crunch_debug</a> &gt; 0) {
 
735
<a name="l00633"></a>00633           <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a> (<span class="stringliteral">&quot;EOL CRUNCH DELETING(%d): \&quot;%s\&quot;\n&quot;</span>,
 
736
<a name="l00634"></a>00634             debug_delete_mode,
 
737
<a name="l00635"></a>00635             word-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>());
 
738
<a name="l00636"></a>00636         }
 
739
<a name="l00637"></a>00637         word-&gt;<a class="code" href="a00650.html#a82d32a9f59bc1c08da9537303c56615b">unlv_crunch_mode</a> = delete_mode;
 
740
<a name="l00638"></a>00638         deleting_from_bol = FALSE;
 
741
<a name="l00639"></a>00639         marked_delete_point = FALSE;
 
742
<a name="l00640"></a>00640       }
 
743
<a name="l00641"></a>00641       <span class="keywordflow">else</span> {
 
744
<a name="l00642"></a>00642         <span class="keywordflow">if</span> (!marked_delete_point) {
 
745
<a name="l00643"></a>00643           copy_it = page_res_it;
 
746
<a name="l00644"></a>00644           marked_delete_point = TRUE;
 
747
<a name="l00645"></a>00645         }
 
748
<a name="l00646"></a>00646       }
 
749
<a name="l00647"></a>00647     }
 
750
<a name="l00648"></a>00648     <span class="keywordflow">else</span> {
 
751
<a name="l00649"></a>00649       deleting_from_bol = FALSE;
 
752
<a name="l00650"></a>00650                                  <span class="comment">//Forget earlier potential crunches</span>
 
753
<a name="l00651"></a>00651       marked_delete_point = FALSE;
 
754
<a name="l00652"></a>00652     }
 
755
<a name="l00653"></a>00653     <span class="comment">/*</span>
 
756
<a name="l00654"></a>00654 <span class="comment">      The following step has been left till now as the tess fails are used to</span>
 
757
<a name="l00655"></a>00655 <span class="comment">      determine if the word is deletable.</span>
 
758
<a name="l00656"></a>00656 <span class="comment">    */</span>
 
759
<a name="l00657"></a>00657     <span class="keywordflow">if</span> (!<a class="code" href="a00607.html#aefd718b3d3abe04fba6ff083019f8374">crunch_early_merge_tess_fails</a>)
 
760
<a name="l00658"></a>00658       word-&gt;<a class="code" href="a00650.html#af092edf58a4a718687aa35c6a458cada">merge_tess_fails</a>();
 
761
<a name="l00659"></a>00659     page_res_it.<a class="code" href="a00482.html#ab29005f235cc738eebafb7d9ed6fe70b">forward</a> ();
 
762
<a name="l00660"></a>00660   }
 
763
<a name="l00661"></a>00661 }
 
764
<a name="l00662"></a>00662 
 
765
<a name="l00663"></a>00663 
 
766
<a name="l00664"></a><a class="code" href="a00607.html#a81b3e2c28a090f5a877dd0f67a4080b6">00664</a> <span class="keywordtype">void</span> <a class="code" href="a00607.html#a81b3e2c28a090f5a877dd0f67a4080b6">Tesseract::convert_bad_unlv_chs</a>(<a class="code" href="a00650.html">WERD_RES</a> *word_res) {
 
767
<a name="l00665"></a>00665   <span class="keywordtype">int</span> i;
 
768
<a name="l00666"></a>00666   <a class="code" href="a00862.html#a8578b19fa1ff5ca75080db1a18ecc32e">UNICHAR_ID</a> unichar_dash = word_res-&gt;<a class="code" href="a00650.html#a053a2aeeddc26a93a41a04fa08dd688a">uch_set</a>-&gt;<a class="code" href="a00642.html#ae786fa834962ef4d6dbf8300c6c5d456">unichar_to_id</a>(<span class="stringliteral">&quot;-&quot;</span>);
 
769
<a name="l00667"></a>00667   <a class="code" href="a00862.html#a8578b19fa1ff5ca75080db1a18ecc32e">UNICHAR_ID</a> unichar_space = word_res-&gt;<a class="code" href="a00650.html#a053a2aeeddc26a93a41a04fa08dd688a">uch_set</a>-&gt;<a class="code" href="a00642.html#ae786fa834962ef4d6dbf8300c6c5d456">unichar_to_id</a>(<span class="stringliteral">&quot; &quot;</span>);
 
770
<a name="l00668"></a>00668   <a class="code" href="a00862.html#a8578b19fa1ff5ca75080db1a18ecc32e">UNICHAR_ID</a> unichar_tilde = word_res-&gt;<a class="code" href="a00650.html#a053a2aeeddc26a93a41a04fa08dd688a">uch_set</a>-&gt;<a class="code" href="a00642.html#ae786fa834962ef4d6dbf8300c6c5d456">unichar_to_id</a>(<span class="stringliteral">&quot;~&quot;</span>);
 
771
<a name="l00669"></a>00669   <a class="code" href="a00862.html#a8578b19fa1ff5ca75080db1a18ecc32e">UNICHAR_ID</a> unichar_pow = word_res-&gt;<a class="code" href="a00650.html#a053a2aeeddc26a93a41a04fa08dd688a">uch_set</a>-&gt;<a class="code" href="a00642.html#ae786fa834962ef4d6dbf8300c6c5d456">unichar_to_id</a>(<span class="stringliteral">&quot;^&quot;</span>);
 
772
<a name="l00670"></a>00670   <span class="keywordflow">for</span> (i = 0; i &lt; word_res-&gt;<a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#ab3760d96d63b5ca6c4bfeb32ffc8183c">length</a>(); ++i) {
 
773
<a name="l00671"></a>00671     <span class="keywordflow">if</span> (word_res-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a6b917b2e6157f7956ff53ca230425ed3">unichar_id</a>(i) == unichar_tilde) {
 
774
<a name="l00672"></a>00672       word_res-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a8238d436806c40b2e01b9659dfab5eb3">set_unichar_id</a>(unichar_dash, i);
 
775
<a name="l00673"></a>00673       <span class="keywordflow">if</span> (word_res-&gt;<a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>[i].accepted ())
 
776
<a name="l00674"></a>00674         word_res-&gt;<a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>[i].setrej_unlv_rej ();
 
777
<a name="l00675"></a>00675     }
 
778
<a name="l00676"></a>00676     <span class="keywordflow">if</span> (word_res-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a6b917b2e6157f7956ff53ca230425ed3">unichar_id</a>(i) == unichar_pow) {
 
779
<a name="l00677"></a>00677       word_res-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a8238d436806c40b2e01b9659dfab5eb3">set_unichar_id</a>(unichar_space, i);
 
780
<a name="l00678"></a>00678       <span class="keywordflow">if</span> (word_res-&gt;<a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>[i].accepted ())
 
781
<a name="l00679"></a>00679         word_res-&gt;<a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>[i].setrej_unlv_rej ();
 
782
<a name="l00680"></a>00680     }
 
783
<a name="l00681"></a>00681   }
 
784
<a name="l00682"></a>00682 }
 
785
<a name="l00683"></a>00683 
 
786
<a name="l00684"></a><a class="code" href="a00607.html#afad44e1105d7f57e1a99d9076c4a3b25">00684</a> <a class="code" href="a00678.html#ac4a411895d63e3bb0bea788e92894d42">GARBAGE_LEVEL</a> <a class="code" href="a00607.html#afad44e1105d7f57e1a99d9076c4a3b25">Tesseract::garbage_word</a>(<a class="code" href="a00650.html">WERD_RES</a> *word, <a class="code" href="a00831.html#a7712a7e28433d0ade59219a129549b6f">BOOL8</a> ok_dict_word) {
 
787
<a name="l00685"></a>00685   <span class="keyword">enum</span> STATES
 
788
<a name="l00686"></a>00686   {
 
789
<a name="l00687"></a>00687     JUNK,
 
790
<a name="l00688"></a>00688     FIRST_UPPER,
 
791
<a name="l00689"></a>00689     FIRST_LOWER,
 
792
<a name="l00690"></a>00690     FIRST_NUM,
 
793
<a name="l00691"></a>00691     SUBSEQUENT_UPPER,
 
794
<a name="l00692"></a>00692     SUBSEQUENT_LOWER,
 
795
<a name="l00693"></a>00693     SUBSEQUENT_NUM
 
796
<a name="l00694"></a>00694   };
 
797
<a name="l00695"></a>00695   <span class="keyword">const</span> <span class="keywordtype">char</span> *str = word-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>();
 
798
<a name="l00696"></a>00696   <span class="keyword">const</span> <span class="keywordtype">char</span> *lengths = word-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a93ab94b6fa6d5c0469fb52d66c62968c">unichar_lengths</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>();
 
799
<a name="l00697"></a>00697   STATES state = JUNK;
 
800
<a name="l00698"></a>00698   <span class="keywordtype">int</span> len = 0;
 
801
<a name="l00699"></a>00699   <span class="keywordtype">int</span> isolated_digits = 0;
 
802
<a name="l00700"></a>00700   <span class="keywordtype">int</span> isolated_alphas = 0;
 
803
<a name="l00701"></a>00701   <span class="keywordtype">int</span> bad_char_count = 0;
 
804
<a name="l00702"></a>00702   <span class="keywordtype">int</span> tess_rejs = 0;
 
805
<a name="l00703"></a>00703   <span class="keywordtype">int</span> dodgy_chars = 0;
 
806
<a name="l00704"></a>00704   <span class="keywordtype">int</span> ok_chars;
 
807
<a name="l00705"></a>00705   <a class="code" href="a00862.html#a8578b19fa1ff5ca75080db1a18ecc32e">UNICHAR_ID</a> last_char = -1;
 
808
<a name="l00706"></a>00706   <span class="keywordtype">int</span> alpha_repetition_count = 0;
 
809
<a name="l00707"></a>00707   <span class="keywordtype">int</span> longest_alpha_repetition_count = 0;
 
810
<a name="l00708"></a>00708   <span class="keywordtype">int</span> longest_lower_run_len = 0;
 
811
<a name="l00709"></a>00709   <span class="keywordtype">int</span> lower_string_count = 0;
 
812
<a name="l00710"></a>00710   <span class="keywordtype">int</span> longest_upper_run_len = 0;
 
813
<a name="l00711"></a>00711   <span class="keywordtype">int</span> upper_string_count = 0;
 
814
<a name="l00712"></a>00712   <span class="keywordtype">int</span> total_alpha_count = 0;
 
815
<a name="l00713"></a>00713   <span class="keywordtype">int</span> total_digit_count = 0;
 
816
<a name="l00714"></a>00714 
 
817
<a name="l00715"></a>00715   <span class="keywordflow">for</span> (; *str != <span class="charliteral">&#39;\0&#39;</span>; str += *(lengths++)) {
 
818
<a name="l00716"></a>00716     len++;
 
819
<a name="l00717"></a>00717     <span class="keywordflow">if</span> (word-&gt;<a class="code" href="a00650.html#a053a2aeeddc26a93a41a04fa08dd688a">uch_set</a>-&gt;<a class="code" href="a00642.html#a482a94b3e6fcd4e5b9b9363bb9e0ee59">get_isupper</a> (str, *lengths)) {
 
820
<a name="l00718"></a>00718       total_alpha_count++;
 
821
<a name="l00719"></a>00719       <span class="keywordflow">switch</span> (state) {
 
822
<a name="l00720"></a>00720         <span class="keywordflow">case</span> SUBSEQUENT_UPPER:
 
823
<a name="l00721"></a>00721         <span class="keywordflow">case</span> FIRST_UPPER:
 
824
<a name="l00722"></a>00722           state = SUBSEQUENT_UPPER;
 
825
<a name="l00723"></a>00723           upper_string_count++;
 
826
<a name="l00724"></a>00724           <span class="keywordflow">if</span> (longest_upper_run_len &lt; upper_string_count)
 
827
<a name="l00725"></a>00725             longest_upper_run_len = upper_string_count;
 
828
<a name="l00726"></a>00726           <span class="keywordflow">if</span> (last_char == word-&gt;<a class="code" href="a00650.html#a053a2aeeddc26a93a41a04fa08dd688a">uch_set</a>-&gt;<a class="code" href="a00642.html#ae786fa834962ef4d6dbf8300c6c5d456">unichar_to_id</a>(str, *lengths)) {
 
829
<a name="l00727"></a>00727             alpha_repetition_count++;
 
830
<a name="l00728"></a>00728             <span class="keywordflow">if</span> (longest_alpha_repetition_count &lt; alpha_repetition_count) {
 
831
<a name="l00729"></a>00729               longest_alpha_repetition_count = alpha_repetition_count;
 
832
<a name="l00730"></a>00730             }
 
833
<a name="l00731"></a>00731           }
 
834
<a name="l00732"></a>00732           <span class="keywordflow">else</span> {
 
835
<a name="l00733"></a>00733             last_char = word-&gt;<a class="code" href="a00650.html#a053a2aeeddc26a93a41a04fa08dd688a">uch_set</a>-&gt;<a class="code" href="a00642.html#ae786fa834962ef4d6dbf8300c6c5d456">unichar_to_id</a>(str, *lengths);
 
836
<a name="l00734"></a>00734             alpha_repetition_count = 1;
 
837
<a name="l00735"></a>00735           }
 
838
<a name="l00736"></a>00736           <span class="keywordflow">break</span>;
 
839
<a name="l00737"></a>00737         <span class="keywordflow">case</span> FIRST_NUM:
 
840
<a name="l00738"></a>00738           isolated_digits++;
 
841
<a name="l00739"></a>00739         <span class="keywordflow">default</span>:
 
842
<a name="l00740"></a>00740           state = FIRST_UPPER;
 
843
<a name="l00741"></a>00741           last_char = word-&gt;<a class="code" href="a00650.html#a053a2aeeddc26a93a41a04fa08dd688a">uch_set</a>-&gt;<a class="code" href="a00642.html#ae786fa834962ef4d6dbf8300c6c5d456">unichar_to_id</a>(str, *lengths);
 
844
<a name="l00742"></a>00742           alpha_repetition_count = 1;
 
845
<a name="l00743"></a>00743           upper_string_count = 1;
 
846
<a name="l00744"></a>00744           <span class="keywordflow">break</span>;
 
847
<a name="l00745"></a>00745       }
 
848
<a name="l00746"></a>00746     }
 
849
<a name="l00747"></a>00747     <span class="keywordflow">else</span> <span class="keywordflow">if</span> (word-&gt;<a class="code" href="a00650.html#a053a2aeeddc26a93a41a04fa08dd688a">uch_set</a>-&gt;<a class="code" href="a00642.html#a074096c823dd5b9bc02c02a59784bf34">get_islower</a> (str, *lengths)) {
 
850
<a name="l00748"></a>00748       total_alpha_count++;
 
851
<a name="l00749"></a>00749       <span class="keywordflow">switch</span> (state) {
 
852
<a name="l00750"></a>00750         <span class="keywordflow">case</span> SUBSEQUENT_LOWER:
 
853
<a name="l00751"></a>00751         <span class="keywordflow">case</span> FIRST_LOWER:
 
854
<a name="l00752"></a>00752           state = SUBSEQUENT_LOWER;
 
855
<a name="l00753"></a>00753           lower_string_count++;
 
856
<a name="l00754"></a>00754           <span class="keywordflow">if</span> (longest_lower_run_len &lt; lower_string_count)
 
857
<a name="l00755"></a>00755             longest_lower_run_len = lower_string_count;
 
858
<a name="l00756"></a>00756           <span class="keywordflow">if</span> (last_char == word-&gt;<a class="code" href="a00650.html#a053a2aeeddc26a93a41a04fa08dd688a">uch_set</a>-&gt;<a class="code" href="a00642.html#ae786fa834962ef4d6dbf8300c6c5d456">unichar_to_id</a>(str, *lengths)) {
 
859
<a name="l00757"></a>00757             alpha_repetition_count++;
 
860
<a name="l00758"></a>00758             <span class="keywordflow">if</span> (longest_alpha_repetition_count &lt; alpha_repetition_count) {
 
861
<a name="l00759"></a>00759               longest_alpha_repetition_count = alpha_repetition_count;
 
862
<a name="l00760"></a>00760             }
 
863
<a name="l00761"></a>00761           }
 
864
<a name="l00762"></a>00762           <span class="keywordflow">else</span> {
 
865
<a name="l00763"></a>00763             last_char = word-&gt;<a class="code" href="a00650.html#a053a2aeeddc26a93a41a04fa08dd688a">uch_set</a>-&gt;<a class="code" href="a00642.html#ae786fa834962ef4d6dbf8300c6c5d456">unichar_to_id</a>(str, *lengths);
 
866
<a name="l00764"></a>00764             alpha_repetition_count = 1;
 
867
<a name="l00765"></a>00765           }
 
868
<a name="l00766"></a>00766           <span class="keywordflow">break</span>;
 
869
<a name="l00767"></a>00767         <span class="keywordflow">case</span> FIRST_NUM:
 
870
<a name="l00768"></a>00768           isolated_digits++;
 
871
<a name="l00769"></a>00769         <span class="keywordflow">default</span>:
 
872
<a name="l00770"></a>00770           state = FIRST_LOWER;
 
873
<a name="l00771"></a>00771           last_char = word-&gt;<a class="code" href="a00650.html#a053a2aeeddc26a93a41a04fa08dd688a">uch_set</a>-&gt;<a class="code" href="a00642.html#ae786fa834962ef4d6dbf8300c6c5d456">unichar_to_id</a>(str, *lengths);
 
874
<a name="l00772"></a>00772           alpha_repetition_count = 1;
 
875
<a name="l00773"></a>00773           lower_string_count = 1;
 
876
<a name="l00774"></a>00774           <span class="keywordflow">break</span>;
 
877
<a name="l00775"></a>00775       }
 
878
<a name="l00776"></a>00776     }
 
879
<a name="l00777"></a>00777     <span class="keywordflow">else</span> <span class="keywordflow">if</span> (word-&gt;<a class="code" href="a00650.html#a053a2aeeddc26a93a41a04fa08dd688a">uch_set</a>-&gt;<a class="code" href="a00642.html#a5951dde3ab90c12c309d776570a10725">get_isdigit</a> (str, *lengths)) {
 
880
<a name="l00778"></a>00778       total_digit_count++;
 
881
<a name="l00779"></a>00779       <span class="keywordflow">switch</span> (state) {
 
882
<a name="l00780"></a>00780         <span class="keywordflow">case</span> FIRST_NUM:
 
883
<a name="l00781"></a>00781           state = SUBSEQUENT_NUM;
 
884
<a name="l00782"></a>00782         <span class="keywordflow">case</span> SUBSEQUENT_NUM:
 
885
<a name="l00783"></a>00783           <span class="keywordflow">break</span>;
 
886
<a name="l00784"></a>00784         <span class="keywordflow">case</span> FIRST_UPPER:
 
887
<a name="l00785"></a>00785         <span class="keywordflow">case</span> FIRST_LOWER:
 
888
<a name="l00786"></a>00786           isolated_alphas++;
 
889
<a name="l00787"></a>00787         <span class="keywordflow">default</span>:
 
890
<a name="l00788"></a>00788           state = FIRST_NUM;
 
891
<a name="l00789"></a>00789           <span class="keywordflow">break</span>;
 
892
<a name="l00790"></a>00790       }
 
893
<a name="l00791"></a>00791     }
 
894
<a name="l00792"></a>00792     <span class="keywordflow">else</span> {
 
895
<a name="l00793"></a>00793       <span class="keywordflow">if</span> (*lengths == 1 &amp;&amp; *str == <span class="charliteral">&#39; &#39;</span>)
 
896
<a name="l00794"></a>00794         tess_rejs++;
 
897
<a name="l00795"></a>00795       <span class="keywordflow">else</span>
 
898
<a name="l00796"></a>00796         bad_char_count++;
 
899
<a name="l00797"></a>00797       <span class="keywordflow">switch</span> (state) {
 
900
<a name="l00798"></a>00798         <span class="keywordflow">case</span> FIRST_NUM:
 
901
<a name="l00799"></a>00799           isolated_digits++;
 
902
<a name="l00800"></a>00800           <span class="keywordflow">break</span>;
 
903
<a name="l00801"></a>00801         <span class="keywordflow">case</span> FIRST_UPPER:
 
904
<a name="l00802"></a>00802         <span class="keywordflow">case</span> FIRST_LOWER:
 
905
<a name="l00803"></a>00803           isolated_alphas++;
 
906
<a name="l00804"></a>00804         <span class="keywordflow">default</span>:
 
907
<a name="l00805"></a>00805           <span class="keywordflow">break</span>;
 
908
<a name="l00806"></a>00806       }
 
909
<a name="l00807"></a>00807       state = JUNK;
 
910
<a name="l00808"></a>00808     }
 
911
<a name="l00809"></a>00809   }
 
912
<a name="l00810"></a>00810 
 
913
<a name="l00811"></a>00811   <span class="keywordflow">switch</span> (state) {
 
914
<a name="l00812"></a>00812     <span class="keywordflow">case</span> FIRST_NUM:
 
915
<a name="l00813"></a>00813       isolated_digits++;
 
916
<a name="l00814"></a>00814       <span class="keywordflow">break</span>;
 
917
<a name="l00815"></a>00815     <span class="keywordflow">case</span> FIRST_UPPER:
 
918
<a name="l00816"></a>00816     <span class="keywordflow">case</span> FIRST_LOWER:
 
919
<a name="l00817"></a>00817       isolated_alphas++;
 
920
<a name="l00818"></a>00818     <span class="keywordflow">default</span>:
 
921
<a name="l00819"></a>00819       <span class="keywordflow">break</span>;
 
922
<a name="l00820"></a>00820   }
 
923
<a name="l00821"></a>00821 
 
924
<a name="l00822"></a>00822   <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a260642901c32784e997473bc298346cc">crunch_include_numerals</a>) {
 
925
<a name="l00823"></a>00823     total_alpha_count += total_digit_count - isolated_digits;
 
926
<a name="l00824"></a>00824   }
 
927
<a name="l00825"></a>00825 
 
928
<a name="l00826"></a>00826   <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a267ae2256478e0caa24cc4eda6f78afa">crunch_leave_ok_strings</a> &amp;&amp; len &gt;= 4 &amp;&amp;
 
929
<a name="l00827"></a>00827       2 * (total_alpha_count - isolated_alphas) &gt; len &amp;&amp;
 
930
<a name="l00828"></a>00828       longest_alpha_repetition_count &lt; <a class="code" href="a00607.html#a2cba6d61cfdced5bdea7461938b45c72">crunch_long_repetitions</a>) {
 
931
<a name="l00829"></a>00829     <span class="keywordflow">if</span> ((<a class="code" href="a00607.html#a0fc0f88a88d8cf4837c6b85ab9f1fd00">crunch_accept_ok</a> &amp;&amp;
 
932
<a name="l00830"></a>00830          <a class="code" href="a00607.html#a51f2ad8b2d849d90bbcfbfca3067c0f1">acceptable_word_string</a>(*word-&gt;<a class="code" href="a00650.html#a053a2aeeddc26a93a41a04fa08dd688a">uch_set</a>, str, lengths) !=
 
933
<a name="l00831"></a>00831              <a class="code" href="a00671.html#af9ff5a7d7a1b9eeea7ce4106bffa7eeeaab0741a5a5ff9757ca343af255353ba0" title="Unacceptable word.">AC_UNACCEPTABLE</a>) ||
 
934
<a name="l00832"></a>00832         longest_lower_run_len &gt; <a class="code" href="a00607.html#a62ab8881690e98143d62f1ac528041f8">crunch_leave_lc_strings</a> ||
 
935
<a name="l00833"></a>00833         longest_upper_run_len &gt; <a class="code" href="a00607.html#a557bc470ec0edc0bd11b4fb3432b2266">crunch_leave_uc_strings</a>)
 
936
<a name="l00834"></a>00834       <span class="keywordflow">return</span> <a class="code" href="a00678.html#ac4a411895d63e3bb0bea788e92894d42ac744add072d392855d098a995acb3751">G_NEVER_CRUNCH</a>;
 
937
<a name="l00835"></a>00835   }
 
938
<a name="l00836"></a>00836   <span class="keywordflow">if</span> (word-&gt;<a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#ab3760d96d63b5ca6c4bfeb32ffc8183c">length</a>() &gt; 1 &amp;&amp;
 
939
<a name="l00837"></a>00837       strpbrk(str, <span class="stringliteral">&quot; &quot;</span>) == NULL &amp;&amp;
 
940
<a name="l00838"></a>00838       (word-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a161ac137df2d063a6421728e6aa08fcb">permuter</a>() == <a class="code" href="a00788.html#a18e2c75cefe9e5b78e8ce41aa5fa25bca616bdbcaed7dd414823b303af5c8b1f1">SYSTEM_DAWG_PERM</a> ||
 
941
<a name="l00839"></a>00839        word-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a161ac137df2d063a6421728e6aa08fcb">permuter</a>() == <a class="code" href="a00788.html#a18e2c75cefe9e5b78e8ce41aa5fa25bca4fd9d3b025ecc5ea8f2248fea29543e4">FREQ_DAWG_PERM</a> ||
 
942
<a name="l00840"></a>00840        word-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a161ac137df2d063a6421728e6aa08fcb">permuter</a>() == <a class="code" href="a00788.html#a18e2c75cefe9e5b78e8ce41aa5fa25bca5e293e54fa687876197992131e779d54">USER_DAWG_PERM</a> ||
 
943
<a name="l00841"></a>00841        word-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a161ac137df2d063a6421728e6aa08fcb">permuter</a>() == <a class="code" href="a00788.html#a18e2c75cefe9e5b78e8ce41aa5fa25bca809574e2067a5304fbd2279d869e9b24">NUMBER_PERM</a> ||
 
944
<a name="l00842"></a>00842        <a class="code" href="a00607.html#a51f2ad8b2d849d90bbcfbfca3067c0f1">acceptable_word_string</a>(*word-&gt;<a class="code" href="a00650.html#a053a2aeeddc26a93a41a04fa08dd688a">uch_set</a>, str, lengths) !=
 
945
<a name="l00843"></a>00843            <a class="code" href="a00671.html#af9ff5a7d7a1b9eeea7ce4106bffa7eeeaab0741a5a5ff9757ca343af255353ba0" title="Unacceptable word.">AC_UNACCEPTABLE</a> || ok_dict_word))
 
946
<a name="l00844"></a>00844     <span class="keywordflow">return</span> <a class="code" href="a00678.html#ac4a411895d63e3bb0bea788e92894d42a46d03f2b7d080cc8319a43ef4bad82b3">G_OK</a>;
 
947
<a name="l00845"></a>00845 
 
948
<a name="l00846"></a>00846   ok_chars = len - bad_char_count - isolated_digits -
 
949
<a name="l00847"></a>00847     isolated_alphas - tess_rejs;
 
950
<a name="l00848"></a>00848 
 
951
<a name="l00849"></a>00849   <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a20a1206ce17b07676f829fdee0034eb5">crunch_debug</a> &gt; 3) {
 
952
<a name="l00850"></a>00850     <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a>(<span class="stringliteral">&quot;garbage_word: \&quot;%s\&quot;\n&quot;</span>,
 
953
<a name="l00851"></a>00851             word-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>());
 
954
<a name="l00852"></a>00852     <a class="code" href="a00860.html#a02aa7cca2c3857c14716e8cca96ace6c">tprintf</a>(<span class="stringliteral">&quot;LEN: %d  bad: %d  iso_N: %d  iso_A: %d  rej: %d\n&quot;</span>,
 
955
<a name="l00853"></a>00853             len,
 
956
<a name="l00854"></a>00854             bad_char_count, isolated_digits, isolated_alphas, tess_rejs);
 
957
<a name="l00855"></a>00855   }
 
958
<a name="l00856"></a>00856   <span class="keywordflow">if</span> (bad_char_count == 0 &amp;&amp;
 
959
<a name="l00857"></a>00857       tess_rejs == 0 &amp;&amp;
 
960
<a name="l00858"></a>00858       (len &gt; isolated_digits + isolated_alphas || len &lt;= 2))
 
961
<a name="l00859"></a>00859     <span class="keywordflow">return</span> <a class="code" href="a00678.html#ac4a411895d63e3bb0bea788e92894d42a46d03f2b7d080cc8319a43ef4bad82b3">G_OK</a>;
 
962
<a name="l00860"></a>00860 
 
963
<a name="l00861"></a>00861   <span class="keywordflow">if</span> (tess_rejs &gt; ok_chars ||
 
964
<a name="l00862"></a>00862       (tess_rejs &gt; 0 &amp;&amp; (bad_char_count + tess_rejs) * 2 &gt; len))
 
965
<a name="l00863"></a>00863     <span class="keywordflow">return</span> <a class="code" href="a00678.html#ac4a411895d63e3bb0bea788e92894d42a2400fb7991d9c94896464522e10c49ad">G_TERRIBLE</a>;
 
966
<a name="l00864"></a>00864 
 
967
<a name="l00865"></a>00865   <span class="keywordflow">if</span> (len &gt; 4) {
 
968
<a name="l00866"></a>00866     dodgy_chars = 2 * tess_rejs + bad_char_count + isolated_digits +
 
969
<a name="l00867"></a>00867         isolated_alphas;
 
970
<a name="l00868"></a>00868     <span class="keywordflow">if</span> (dodgy_chars &gt; 5 || (dodgy_chars / (<span class="keywordtype">float</span>) len) &gt; 0.5)
 
971
<a name="l00869"></a>00869       <span class="keywordflow">return</span> <a class="code" href="a00678.html#ac4a411895d63e3bb0bea788e92894d42a17acba0fc01478524c8214f0da82815a">G_DODGY</a>;
 
972
<a name="l00870"></a>00870     <span class="keywordflow">else</span>
 
973
<a name="l00871"></a>00871       <span class="keywordflow">return</span> <a class="code" href="a00678.html#ac4a411895d63e3bb0bea788e92894d42a46d03f2b7d080cc8319a43ef4bad82b3">G_OK</a>;
 
974
<a name="l00872"></a>00872   } <span class="keywordflow">else</span> {
 
975
<a name="l00873"></a>00873     dodgy_chars = 2 * tess_rejs + bad_char_count;
 
976
<a name="l00874"></a>00874     <span class="keywordflow">if</span> ((len == 4 &amp;&amp; dodgy_chars &gt; 2) ||
 
977
<a name="l00875"></a>00875         (len == 3 &amp;&amp; dodgy_chars &gt; 2) || dodgy_chars &gt;= len)
 
978
<a name="l00876"></a>00876       <span class="keywordflow">return</span> <a class="code" href="a00678.html#ac4a411895d63e3bb0bea788e92894d42a17acba0fc01478524c8214f0da82815a">G_DODGY</a>;
 
979
<a name="l00877"></a>00877     <span class="keywordflow">else</span>
 
980
<a name="l00878"></a>00878       <span class="keywordflow">return</span> <a class="code" href="a00678.html#ac4a411895d63e3bb0bea788e92894d42a46d03f2b7d080cc8319a43ef4bad82b3">G_OK</a>;
 
981
<a name="l00879"></a>00879   }
 
982
<a name="l00880"></a>00880 }
 
983
<a name="l00881"></a>00881 
 
984
<a name="l00882"></a>00882 
 
985
<a name="l00883"></a>00883 <span class="comment">/*************************************************************************</span>
 
986
<a name="l00884"></a>00884 <span class="comment"> * word_deletable()</span>
 
987
<a name="l00885"></a>00885 <span class="comment"> *     DELETE WERDS AT ENDS OF ROWS IF</span>
 
988
<a name="l00886"></a>00886 <span class="comment"> *        Word is crunched &amp;&amp;</span>
 
989
<a name="l00887"></a>00887 <span class="comment"> *        ( string length = 0                                          OR</span>
 
990
<a name="l00888"></a>00888 <span class="comment"> *          &gt; 50% of chars are &quot;|&quot; (before merging)                    OR</span>
 
991
<a name="l00889"></a>00889 <span class="comment"> *          certainty &lt; -10                                            OR</span>
 
992
<a name="l00890"></a>00890 <span class="comment"> *          rating /char &gt; 60                                          OR</span>
 
993
<a name="l00891"></a>00891 <span class="comment"> *          TOP of word is more than 0.5 xht BELOW baseline            OR</span>
 
994
<a name="l00892"></a>00892 <span class="comment"> *          BOTTOM of word is more than 0.5 xht ABOVE xht              OR</span>
 
995
<a name="l00893"></a>00893 <span class="comment"> *          length of word &lt; 3xht                                      OR</span>
 
996
<a name="l00894"></a>00894 <span class="comment"> *          height of word &lt; 0.7 xht                                   OR</span>
 
997
<a name="l00895"></a>00895 <span class="comment"> *          height of word &gt; 3.0 xht                                   OR</span>
 
998
<a name="l00896"></a>00896 <span class="comment"> *          &gt;75% of the outline BBs have longest dimension &lt; 0.5xht</span>
 
999
<a name="l00897"></a>00897 <span class="comment"> *************************************************************************/</span>
 
1000
<a name="l00898"></a>00898 
 
1001
<a name="l00899"></a><a class="code" href="a00607.html#a42d3b5e26902af46296cfdc24bb4b195">00899</a> <a class="code" href="a00769.html#affc9a96d31e23e645355e534f11c40e9">CRUNCH_MODE</a> <a class="code" href="a00607.html#a42d3b5e26902af46296cfdc24bb4b195">Tesseract::word_deletable</a>(<a class="code" href="a00650.html">WERD_RES</a> *word, <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> &amp;delete_mode) {
 
1002
<a name="l00900"></a>00900   <span class="keywordtype">int</span> word_len = word-&gt;<a class="code" href="a00650.html#ae24eb58be06cf275aee411d947fd31ef">reject_map</a>.<a class="code" href="a00517.html#ab3760d96d63b5ca6c4bfeb32ffc8183c">length</a> ();
 
1003
<a name="l00901"></a>00901   <span class="keywordtype">float</span> rating_per_ch;
 
1004
<a name="l00902"></a>00902   <a class="code" href="a00592.html">TBOX</a> box;                       <span class="comment">//BB of word</span>
 
1005
<a name="l00903"></a>00903 
 
1006
<a name="l00904"></a>00904   <span class="keywordflow">if</span> (word-&gt;<a class="code" href="a00650.html#a82d32a9f59bc1c08da9537303c56615b">unlv_crunch_mode</a> == <a class="code" href="a00769.html#affc9a96d31e23e645355e534f11c40e9a1eac25752f2c55f1809d11d4ef917ff9">CR_NONE</a>) {
 
1007
<a name="l00905"></a>00905     delete_mode = 0;
 
1008
<a name="l00906"></a>00906     <span class="keywordflow">return</span> <a class="code" href="a00769.html#affc9a96d31e23e645355e534f11c40e9a1eac25752f2c55f1809d11d4ef917ff9">CR_NONE</a>;
 
1009
<a name="l00907"></a>00907   }
 
1010
<a name="l00908"></a>00908 
 
1011
<a name="l00909"></a>00909   <span class="keywordflow">if</span> (word_len == 0) {
 
1012
<a name="l00910"></a>00910     delete_mode = 1;
 
1013
<a name="l00911"></a>00911     <span class="keywordflow">return</span> <a class="code" href="a00769.html#affc9a96d31e23e645355e534f11c40e9a1799c83261446306e1d1527f1779d6a4">CR_DELETE</a>;
 
1014
<a name="l00912"></a>00912   }
 
1015
<a name="l00913"></a>00913 
 
1016
<a name="l00914"></a>00914   <span class="keywordflow">if</span> (word-&gt;<a class="code" href="a00650.html#a211ab5c5a13123ee7bdc7459d3363f9d">rebuild_word</a> != NULL) {
 
1017
<a name="l00915"></a>00915     <span class="comment">// Cube leaves rebuild_word NULL.</span>
 
1018
<a name="l00916"></a>00916     box = word-&gt;<a class="code" href="a00650.html#a211ab5c5a13123ee7bdc7459d3363f9d">rebuild_word</a>-&gt;<a class="code" href="a00635.html#a8278c9db39975f181ad074bf343ea402">bounding_box</a>();
 
1019
<a name="l00917"></a>00917     <span class="keywordflow">if</span> (box.<a class="code" href="a00592.html#a8379d4bbc72bdbb1f069fc14790e632d">height</a> () &lt; <a class="code" href="a00607.html#a40051cf485581289adcf04cc4733df41">crunch_del_min_ht</a> * <a class="code" href="a00759.html#af974e6755e636c8bc7cbe57e3bcbdd73">kBlnXHeight</a>) {
 
1020
<a name="l00918"></a>00918       delete_mode = 4;
 
1021
<a name="l00919"></a>00919       <span class="keywordflow">return</span> <a class="code" href="a00769.html#affc9a96d31e23e645355e534f11c40e9a1799c83261446306e1d1527f1779d6a4">CR_DELETE</a>;
 
1022
<a name="l00920"></a>00920     }
 
1023
<a name="l00921"></a>00921 
 
1024
<a name="l00922"></a>00922     <span class="keywordflow">if</span> (<a class="code" href="a00607.html#a63d37ef049d0902573211d4301564a1a">noise_outlines</a>(word-&gt;<a class="code" href="a00650.html#a211ab5c5a13123ee7bdc7459d3363f9d">rebuild_word</a>)) {
 
1025
<a name="l00923"></a>00923       delete_mode = 5;
 
1026
<a name="l00924"></a>00924       <span class="keywordflow">return</span> <a class="code" href="a00769.html#affc9a96d31e23e645355e534f11c40e9a1799c83261446306e1d1527f1779d6a4">CR_DELETE</a>;
 
1027
<a name="l00925"></a>00925     }
 
1028
<a name="l00926"></a>00926   }
 
1029
<a name="l00927"></a>00927 
 
1030
<a name="l00928"></a>00928   <span class="keywordflow">if</span> ((<a class="code" href="a00607.html#ae180ffab43d41336d8418e30004d1866">failure_count</a> (word) * 1.5) &gt; word_len) {
 
1031
<a name="l00929"></a>00929     delete_mode = 2;
 
1032
<a name="l00930"></a>00930     <span class="keywordflow">return</span> <a class="code" href="a00769.html#affc9a96d31e23e645355e534f11c40e9a223aec7f7b2d11193c90d04d8f589387">CR_LOOSE_SPACE</a>;
 
1033
<a name="l00931"></a>00931   }
 
1034
<a name="l00932"></a>00932 
 
1035
<a name="l00933"></a>00933   <span class="keywordflow">if</span> (word-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a1b25ef2b44d21a7204483a7ca804f293">certainty</a> () &lt; <a class="code" href="a00607.html#a5c24b5ef1b42bebf68d3ab54acc94ccf">crunch_del_cert</a>) {
 
1036
<a name="l00934"></a>00934     delete_mode = 7;
 
1037
<a name="l00935"></a>00935     <span class="keywordflow">return</span> <a class="code" href="a00769.html#affc9a96d31e23e645355e534f11c40e9a223aec7f7b2d11193c90d04d8f589387">CR_LOOSE_SPACE</a>;
 
1038
<a name="l00936"></a>00936   }
 
1039
<a name="l00937"></a>00937 
 
1040
<a name="l00938"></a>00938   rating_per_ch = word-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a52ea24dcb5ff15ad8aee3fd774622216">rating</a> () / word_len;
 
1041
<a name="l00939"></a>00939 
 
1042
<a name="l00940"></a>00940   <span class="keywordflow">if</span> (rating_per_ch &gt; <a class="code" href="a00607.html#a10e3b50d7d9fc570d752d31b05d6971b">crunch_del_rating</a>) {
 
1043
<a name="l00941"></a>00941     delete_mode = 8;
 
1044
<a name="l00942"></a>00942     <span class="keywordflow">return</span> <a class="code" href="a00769.html#affc9a96d31e23e645355e534f11c40e9a223aec7f7b2d11193c90d04d8f589387">CR_LOOSE_SPACE</a>;
 
1045
<a name="l00943"></a>00943   }
 
1046
<a name="l00944"></a>00944 
 
1047
<a name="l00945"></a>00945   <span class="keywordflow">if</span> (box.<a class="code" href="a00592.html#adf92e9fdac1bdf11c10d1c4d1178791a">top</a> () &lt; <a class="code" href="a00759.html#abe10ce41bf7240ae8a053dea471d6ed5">kBlnBaselineOffset</a> - <a class="code" href="a00607.html#a86f631e39200ed6d92a92ccd6a1de2c9">crunch_del_low_word</a> * <a class="code" href="a00759.html#af974e6755e636c8bc7cbe57e3bcbdd73">kBlnXHeight</a>) {
 
1048
<a name="l00946"></a>00946     delete_mode = 9;
 
1049
<a name="l00947"></a>00947     <span class="keywordflow">return</span> <a class="code" href="a00769.html#affc9a96d31e23e645355e534f11c40e9a223aec7f7b2d11193c90d04d8f589387">CR_LOOSE_SPACE</a>;
 
1050
<a name="l00948"></a>00948   }
 
1051
<a name="l00949"></a>00949 
 
1052
<a name="l00950"></a>00950   <span class="keywordflow">if</span> (box.<a class="code" href="a00592.html#a4451d237f1cd18c4982d63fe36a11fc3">bottom</a> () &gt;
 
1053
<a name="l00951"></a>00951   <a class="code" href="a00759.html#abe10ce41bf7240ae8a053dea471d6ed5">kBlnBaselineOffset</a> + <a class="code" href="a00607.html#ad6665d769dab6f71751fd64af3837674">crunch_del_high_word</a> * <a class="code" href="a00759.html#af974e6755e636c8bc7cbe57e3bcbdd73">kBlnXHeight</a>) {
 
1054
<a name="l00952"></a>00952     delete_mode = 10;
 
1055
<a name="l00953"></a>00953     <span class="keywordflow">return</span> <a class="code" href="a00769.html#affc9a96d31e23e645355e534f11c40e9a223aec7f7b2d11193c90d04d8f589387">CR_LOOSE_SPACE</a>;
 
1056
<a name="l00954"></a>00954   }
 
1057
<a name="l00955"></a>00955 
 
1058
<a name="l00956"></a>00956   <span class="keywordflow">if</span> (box.<a class="code" href="a00592.html#a8379d4bbc72bdbb1f069fc14790e632d">height</a> () &gt; <a class="code" href="a00607.html#ae8bf52f3bdd158ca70e25231e485ce1a">crunch_del_max_ht</a> * <a class="code" href="a00759.html#af974e6755e636c8bc7cbe57e3bcbdd73">kBlnXHeight</a>) {
 
1059
<a name="l00957"></a>00957     delete_mode = 11;
 
1060
<a name="l00958"></a>00958     <span class="keywordflow">return</span> <a class="code" href="a00769.html#affc9a96d31e23e645355e534f11c40e9a223aec7f7b2d11193c90d04d8f589387">CR_LOOSE_SPACE</a>;
 
1061
<a name="l00959"></a>00959   }
 
1062
<a name="l00960"></a>00960 
 
1063
<a name="l00961"></a>00961   <span class="keywordflow">if</span> (box.<a class="code" href="a00592.html#af95494a2ccacc70cc2b83820b2948619">width</a> () &lt; <a class="code" href="a00607.html#a82fcb38c1bdc5bce93ff69c94ef7e2b9">crunch_del_min_width</a> * <a class="code" href="a00759.html#af974e6755e636c8bc7cbe57e3bcbdd73">kBlnXHeight</a>) {
 
1064
<a name="l00962"></a>00962     delete_mode = 3;
 
1065
<a name="l00963"></a>00963     <span class="keywordflow">return</span> <a class="code" href="a00769.html#affc9a96d31e23e645355e534f11c40e9a223aec7f7b2d11193c90d04d8f589387">CR_LOOSE_SPACE</a>;
 
1066
<a name="l00964"></a>00964   }
 
1067
<a name="l00965"></a>00965 
 
1068
<a name="l00966"></a>00966   delete_mode = 0;
 
1069
<a name="l00967"></a>00967   <span class="keywordflow">return</span> <a class="code" href="a00769.html#affc9a96d31e23e645355e534f11c40e9a1eac25752f2c55f1809d11d4ef917ff9">CR_NONE</a>;
 
1070
<a name="l00968"></a>00968 }
 
1071
<a name="l00969"></a>00969 
 
1072
<a name="l00970"></a><a class="code" href="a00607.html#ae180ffab43d41336d8418e30004d1866">00970</a> <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> <a class="code" href="a00607.html#ae180ffab43d41336d8418e30004d1866">Tesseract::failure_count</a>(<a class="code" href="a00650.html">WERD_RES</a> *word) {
 
1073
<a name="l00971"></a>00971   <span class="keyword">const</span> <span class="keywordtype">char</span> *str = word-&gt;<a class="code" href="a00650.html#a3d42ac7ec121263e757c9b7160dfd50d">best_choice</a>-&gt;<a class="code" href="a00649.html#a41d740f0accccdfb245d34d554122116">unichar_string</a>().<a class="code" href="a00557.html#a32beee43ecab1bed8bebc3466896ad6e">string</a>();
 
1074
<a name="l00972"></a>00972   <span class="keywordtype">int</span> tess_rejs = 0;
 
1075
<a name="l00973"></a>00973 
 
1076
<a name="l00974"></a>00974   <span class="keywordflow">for</span> (; *str != <span class="charliteral">&#39;\0&#39;</span>; str++) {
 
1077
<a name="l00975"></a>00975     <span class="keywordflow">if</span> (*str == <span class="charliteral">&#39; &#39;</span>)
 
1078
<a name="l00976"></a>00976       tess_rejs++;
 
1079
<a name="l00977"></a>00977   }
 
1080
<a name="l00978"></a>00978   <span class="keywordflow">return</span> tess_rejs;
 
1081
<a name="l00979"></a>00979 }
 
1082
<a name="l00980"></a>00980 
 
1083
<a name="l00981"></a>00981 
 
1084
<a name="l00982"></a><a class="code" href="a00607.html#a63d37ef049d0902573211d4301564a1a">00982</a> <a class="code" href="a00831.html#a7712a7e28433d0ade59219a129549b6f">BOOL8</a> <a class="code" href="a00607.html#a63d37ef049d0902573211d4301564a1a">Tesseract::noise_outlines</a>(<a class="code" href="a00635.html">TWERD</a> *word) {
 
1085
<a name="l00983"></a>00983   <a class="code" href="a00592.html">TBOX</a> box;                       <span class="comment">// BB of outline</span>
 
1086
<a name="l00984"></a>00984   <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> outline_count = 0;
 
1087
<a name="l00985"></a>00985   <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> small_outline_count = 0;
 
1088
<a name="l00986"></a>00986   <a class="code" href="a00831.html#a8d41499d38c24d39b221ab0c158fe5a8">inT16</a> max_dimension;
 
1089
<a name="l00987"></a>00987   <span class="keywordtype">float</span> small_limit = <a class="code" href="a00759.html#af974e6755e636c8bc7cbe57e3bcbdd73">kBlnXHeight</a> * <a class="code" href="a00607.html#a422f7f28537d7127b0965b6224f2891e">crunch_small_outlines_size</a>;
 
1090
<a name="l00988"></a>00988 
 
1091
<a name="l00989"></a>00989   <span class="keywordflow">for</span> (<span class="keywordtype">int</span> b = 0; b &lt; word-&gt;<a class="code" href="a00635.html#adfdef9868e61650e076775011382ec70">NumBlobs</a>(); ++b) {
 
1092
<a name="l00990"></a>00990     <a class="code" href="a00591.html">TBLOB</a>* blob = word-&gt;<a class="code" href="a00635.html#a52fba22521039a79d8c1c8182f602dcc">blobs</a>[b];
 
1093
<a name="l00991"></a>00991     <span class="keywordflow">for</span> (<a class="code" href="a00613.html">TESSLINE</a>* ol = blob-&gt;<a class="code" href="a00591.html#a3c9282cad4102157d214bcc6311cda07">outlines</a>; ol != NULL; ol = ol-&gt;<a class="code" href="a00613.html#a0fb224d43ea53a791f0957fa92793bef">next</a>) {
 
1094
<a name="l00992"></a>00992       outline_count++;
 
1095
<a name="l00993"></a>00993       box = ol-&gt;bounding_box();
 
1096
<a name="l00994"></a>00994       <span class="keywordflow">if</span> (box.<a class="code" href="a00592.html#a8379d4bbc72bdbb1f069fc14790e632d">height</a>() &gt; box.<a class="code" href="a00592.html#af95494a2ccacc70cc2b83820b2948619">width</a>())
 
1097
<a name="l00995"></a>00995         max_dimension = box.<a class="code" href="a00592.html#a8379d4bbc72bdbb1f069fc14790e632d">height</a>();
 
1098
<a name="l00996"></a>00996       <span class="keywordflow">else</span>
 
1099
<a name="l00997"></a>00997         max_dimension = box.<a class="code" href="a00592.html#af95494a2ccacc70cc2b83820b2948619">width</a>();
 
1100
<a name="l00998"></a>00998       <span class="keywordflow">if</span> (max_dimension &lt; small_limit)
 
1101
<a name="l00999"></a>00999         small_outline_count++;
 
1102
<a name="l01000"></a>01000     }
 
1103
<a name="l01001"></a>01001   }
 
1104
<a name="l01002"></a>01002   <span class="keywordflow">return</span> small_outline_count &gt;= outline_count;
 
1105
<a name="l01003"></a>01003 }
 
1106
<a name="l01004"></a>01004 
 
1107
<a name="l01005"></a>01005 }  <span class="comment">// namespace tesseract</span>
 
1108
</pre></div></div><!-- contents -->
 
1109
</div>
 
1110
<!-- window showing the filter options -->
 
1111
<div id="MSearchSelectWindow"
 
1112
     onmouseover="return searchBox.OnSearchSelectShow()"
 
1113
     onmouseout="return searchBox.OnSearchSelectHide()"
 
1114
     onkeydown="return searchBox.OnSearchSelectKey(event)">
 
1115
<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark">&#160;</span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark">&#160;</span>Classes</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark">&#160;</span>Namespaces</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(3)"><span class="SelectionMark">&#160;</span>Files</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(4)"><span class="SelectionMark">&#160;</span>Functions</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(5)"><span class="SelectionMark">&#160;</span>Variables</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(6)"><span class="SelectionMark">&#160;</span>Typedefs</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(7)"><span class="SelectionMark">&#160;</span>Enumerations</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(8)"><span class="SelectionMark">&#160;</span>Enumerator</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(9)"><span class="SelectionMark">&#160;</span>Friends</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(10)"><span class="SelectionMark">&#160;</span>Defines</a></div>
 
1116
 
 
1117
<!-- iframe showing the search results (closed by default) -->
 
1118
<div id="MSearchResultsWindow">
 
1119
<iframe src="javascript:void(0)" frameborder="0" 
 
1120
        name="MSearchResults" id="MSearchResults">
 
1121
</iframe>
 
1122
</div>
 
1123
 
 
1124
  <div id="nav-path" class="navpath">
 
1125
    <ul>
 
1126
      <li class="navelem"><a class="el" href="a00677.html">docqual.cpp</a>      </li>
 
1127
 
 
1128
    <li class="footer">Generated on Mon Feb 3 2014 10:59:07 for tesseract by
 
1129
    <a href="http://www.doxygen.org/index.html">
 
1130
    <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.7.6.1 </li>
 
1131
   </ul>
 
1132
 </div>
 
1133
 
 
1134
 
 
1135
</body>
 
1136
</html>