60
58
INT_MEMBER(chop_min_outline_points, 6, "Min Number of Points on Outline",
60
INT_MEMBER(chop_seam_pile_size, 150, "Max number of seams in seam_pile",
62
BOOL_MEMBER(chop_new_seam_pile, 1, "Use new seam_pile", params()),
62
63
INT_MEMBER(chop_inside_angle, -50, "Min Inside Angle Bend",
64
65
INT_MEMBER(chop_min_outline_area, 2000, "Min Outline Area",
84
88
BOOL_MEMBER(assume_fixed_pitch_char_segment, FALSE,
85
89
"include fixed-pitch heuristics in char segmentation",
87
BOOL_MEMBER(use_new_state_cost, FALSE,
88
"use new state cost heuristics for segmentation state evaluation",
90
double_MEMBER(heuristic_segcost_rating_base, 1.25,
91
"base factor for adding segmentation cost into word rating."
92
"It's a multiplying factor, the larger the value above 1, "
93
"the bigger the effect of segmentation cost.",
95
double_MEMBER(heuristic_weight_rating, 1.0,
96
"weight associated with char rating in combined cost of state",
98
double_MEMBER(heuristic_weight_width, 1000.0,
99
"weight associated with width evidence in combined cost of"
101
double_MEMBER(heuristic_weight_seamcut, 0.0,
102
"weight associated with seam cut in combined cost of state",
104
double_MEMBER(heuristic_max_char_wh_ratio, 2.0,
105
"max char width-to-height ratio allowed in segmentation",
107
91
INT_MEMBER(wordrec_debug_level, 0,
108
92
"Debug level for wordrec", params()),
93
INT_MEMBER(wordrec_max_join_chunks, 4,
94
"Max number of broken pieces to associate", params()),
95
BOOL_MEMBER(wordrec_skip_no_truth_words, false,
96
"Only run OCR for words that had truth recorded in BlamerBundle",
109
98
BOOL_MEMBER(wordrec_debug_blamer, false,
110
99
"Print blamer debug messages", params()),
111
100
BOOL_MEMBER(wordrec_run_blamer, false,
112
101
"Try to set the blame for errors", params()),
113
BOOL_MEMBER(enable_new_segsearch, true,
114
"Enable new segmentation search path.", params()),
115
102
INT_MEMBER(segsearch_debug_level, 0,
116
103
"SegSearch debug level", params()),
117
104
INT_MEMBER(segsearch_max_pain_points, 2000,
118
105
"Maximum number of pain points stored in the queue",
120
INT_MEMBER(segsearch_max_futile_classifications, 10,
121
"Maximum number of pain point classifications per word that"
107
INT_MEMBER(segsearch_max_futile_classifications, 20,
108
"Maximum number of pain point classifications per chunk that"
122
109
"did not result in finding a better word choice.",
124
111
double_MEMBER(segsearch_max_char_wh_ratio, 2.0,
125
112
"Maximum character width-to-height ratio", params()),
126
double_MEMBER(segsearch_max_fixed_pitch_char_wh_ratio, 2.0,
127
"Maximum character width-to-height ratio for"
128
" fixed-pitch fonts",
130
BOOL_MEMBER(save_alt_choices, false,
113
BOOL_MEMBER(save_alt_choices, true,
131
114
"Save alternative paths found during chopping"
132
115
" and segmentation search",
134
117
prev_word_best_choice_ = NULL;
135
118
language_model_ = new LanguageModel(&get_fontinfo_table(),
137
pass2_seg_states = 0;
141
120
fill_lattice_ = NULL;
145
124
delete language_model_;
148
void Wordrec::CopyCharChoices(const BLOB_CHOICE_LIST_VECTOR &from,
149
BLOB_CHOICE_LIST_VECTOR *to) {
150
to->delete_data_pointers();
152
for (int i = 0; i < from.size(); ++i) {
153
BLOB_CHOICE_LIST *cc_list = new BLOB_CHOICE_LIST();
154
cc_list->deep_copy(from[i], &BLOB_CHOICE::deep_copy);
155
to->push_back(cc_list);
159
bool Wordrec::ChoiceIsCorrect(const UNICHARSET &uni_set,
160
const WERD_CHOICE *choice,
161
const GenericVector<STRING> &truth_text) {
162
if (choice == NULL) return false;
165
for (i = 0; i < truth_text.length(); ++i) truth_str += truth_text[i];
166
STRING normed_choice_str;
167
for (i = 0; i < choice->length(); ++i) {
168
normed_choice_str += uni_set.get_normed_unichar(choice->unichar_id(i));
170
return (truth_str == normed_choice_str);
173
void Wordrec::SaveAltChoices(const LIST &best_choices, WERD_RES *word) {
174
ASSERT_HOST(word->alt_choices.empty());
175
ASSERT_HOST(word->alt_states.empty());
177
iterate_list(list_it, best_choices) {
178
VIABLE_CHOICE choice =
179
reinterpret_cast<VIABLE_CHOICE>(first_node(list_it));
180
CHAR_CHOICE *char_choice = &(choice->Blob[0]);
181
WERD_CHOICE *alt_choice = new WERD_CHOICE(word->uch_set, choice->Length);
182
word->alt_states.push_back(GenericVector<int>(choice->Length));
183
GenericVector<int> &alt_state = word->alt_states.back();
184
for (int i = 0; i < choice->Length; char_choice++, i++) {
185
alt_choice->append_unichar_id_space_allocated(
186
char_choice->Class, 1, 0, 0);
187
alt_state.push_back(char_choice->NumChunks);
189
alt_choice->set_rating(choice->Rating);
190
alt_choice->set_certainty(choice->Certainty);
192
ASSERT_HOST(choice->blob_choices != NULL);
193
alt_choice->set_blob_choices(choice->blob_choices);
194
choice->blob_choices = NULL;
196
word->alt_choices.push_back(alt_choice);
197
if (wordrec_debug_level > 0) {
198
tprintf("SaveAltChoices: %s %g\n",
199
alt_choice->unichar_string().string(), alt_choice->rating());
204
127
} // namespace tesseract