136
132
bool write_results_empty_block;
135
// Struct to hold all the pointers to relevant data for processing a word.
137
WordData() : word(NULL), row(NULL), block(NULL), prev_word(NULL) {}
138
explicit WordData(const PAGE_RES_IT& page_res_it)
139
: word(page_res_it.word()), row(page_res_it.row()->row),
140
block(page_res_it.block()->block), prev_word(NULL) {}
141
WordData(BLOCK* block_in, ROW* row_in, WERD_RES* word_res)
142
: word(word_res), row(row_in), block(block_in), prev_word(NULL) {}
148
GenericVector<WERD_RES> lang_words;
151
typedef void (Tesseract::*WordRecognizer)(WordData* word_data, WERD_RES* word);
139
153
class Tesseract : public Wordrec {
250
264
bool single_column, bool osd, bool only_osd,
251
265
BLOCK_LIST* blocks, Tesseract* osd_tess, OSResults* osr,
252
266
TO_BLOCK_LIST* to_blocks, Pix** photo_mask_pix, Pix** music_mask_pix);
268
void PrerecAllWordsPar(const GenericVector<WordData>& words);
254
270
//// control.h /////////////////////////////////////////////////////////
255
271
bool ProcessTargetWord(const TBOX& word_box, const TBOX& target_word_box,
256
272
const char* word_config, int pass);
273
// Sets up the words ready for whichever engine is to be run
274
void SetupAllWordsPassN(int pass_n,
275
const TBOX* target_word_box,
276
const char* word_config,
278
GenericVector<WordData>* words);
279
// Sets up the single word ready for whichever engine is to be run.
280
void SetupWordPassN(int pass_n, WordData* word);
281
// Runs word recognition on all the words.
282
bool RecogAllWordsPassN(int pass_n, ETEXT_DESC* monitor,
283
GenericVector<WordData>* words);
257
284
bool recog_all_words(PAGE_RES* page_res,
258
285
ETEXT_DESC* monitor,
259
286
const TBOX* target_word_box,
265
292
const char* word_config);
266
293
void bigram_correction_pass(PAGE_RES *page_res);
267
294
void blamer_pass(PAGE_RES* page_res);
295
// Sets script positions and detects smallcaps on all output words.
296
void script_pos_pass(PAGE_RES* page_res);
268
297
// Helper to recognize the word using the given (language-specific) tesseract.
269
298
// Returns true if the result was better than previously.
270
bool RetryWithLanguage(WERD_RES *word, BLOCK* block, ROW *row,
271
WordRecognizer recognizer);
299
bool RetryWithLanguage(const WERD_RES& best_word, WordData* word_data,
300
WERD_RES* word, WordRecognizer recognizer);
272
301
void classify_word_and_language(WordRecognizer recognizer,
273
BLOCK* block, ROW *row, WERD_RES *word);
274
void classify_word_pass1(BLOCK* block, ROW *row, WERD_RES *word);
302
WordData* word_data);
303
void classify_word_pass1(WordData* word_data, WERD_RES* word);
275
304
void recog_pseudo_word(PAGE_RES* page_res, // blocks to check
276
305
TBOX &selection_box);
283
312
const char *lengths);
284
313
void match_word_pass_n(int pass_n, WERD_RES *word, ROW *row, BLOCK* block);
285
void classify_word_pass2(BLOCK* block, ROW *row, WERD_RES *word);
314
void classify_word_pass2(WordData* word_data, WERD_RES* word);
286
315
void ReportXhtFixResult(bool accept_new_word, float new_x_ht,
287
316
WERD_RES* word, WERD_RES* new_word);
288
317
bool RunOldFixXht(WERD_RES *word, BLOCK* block, ROW *row);
936
965
"Only initialize with the config file. Useful if the instance is "
937
966
"not going to be used for OCR but say only for layout analysis.");
938
967
BOOL_VAR_H(textord_equation_detect, false, "Turn on equation detector");
968
INT_VAR_H(tessedit_parallelize, 0, "Run in parallel where possible");
940
970
// The following parameters were deprecated and removed from their original
941
971
// locations. The parameters are temporarily kept here to give Tesseract