~vcs-imports/tesseract-ocr/trunk : revision 691

97

inT16 char_qual;

98

inT16 good_char_qual;

99

100

classify_word_and_language(&Tesseract::classify_word_pass2,

101

block, row, word_res);

100

WordData word_data(block, row, word_res);

101

SetupWordPassN(2, &word_data);

102

classify_word_and_language(&Tesseract::classify_word_pass2, &word_data);

102

103

if (tessedit_debug_quality_metrics) {

103

104

word_char_quality(word_res, row, &char_qual, &good_char_qual);

104

105

tprintf

153

154

return true;

154

155

}

155

156

157

// If tesseract is to be run, sets the words up ready for it.

158

void Tesseract::SetupAllWordsPassN(int pass_n,

159

const TBOX* target_word_box,

160

const char* word_config,

161

PAGE_RES* page_res,

162

GenericVector<WordData>* words) {

163

// Prepare all the words.

164

PAGE_RES_IT page_res_it(page_res);

165

for (page_res_it.restart_page(); page_res_it.word() != NULL;

166

page_res_it.forward()) {

167

if (pass_n == 1)

168

page_res_it.word()->SetupFake(unicharset);

169

if (target_word_box == NULL ||

170

ProcessTargetWord(page_res_it.word()->word->bounding_box(),

171

*target_word_box, word_config, 1)) {

172

words->push_back(WordData(page_res_it));

173

}

174

}

175

// Setup all the words for recognition with polygonal approximation.

176

for (int w = 0; w < words->size(); ++w) {

177

SetupWordPassN(pass_n, &(*words)[w]);

178

if (w > 0) (*words)[w].prev_word = &(*words)[w - 1];

179

}

180

}

181

182

// Sets up the single word ready for whichever engine is to be run.

183

void Tesseract::SetupWordPassN(int pass_n, WordData* word) {

184

if (pass_n == 1 || !word->word->done || tessedit_training_tess) {

185

if (pass_n == 2) {

186

// TODO(rays) Should we do this on pass1 too?

187

word->word->caps_height = 0.0;

188

if (word->word->x_height == 0.0f)

189

word->word->x_height = word->row->x_height();

190

}

191

// Cube doesn't get setup for pass2.

192

if (pass_n != 2 || tessedit_ocr_engine_mode != OEM_CUBE_ONLY) {

193

word->word->SetupForRecognition(

194

unicharset, this, BestPix(), tessedit_ocr_engine_mode, NULL,

195

classify_bln_numeric_mode, textord_use_cjk_fp_model,

196

poly_allow_detailed_fx, word->row, word->block);

197

}

198

}

199

if (!sub_langs_.empty()) {

200

if (word->lang_words.size() != sub_langs_.size()) {

201

// Setup the words for all the sub-languages now.

202

WERD_RES empty;

203

word->lang_words.init_to_size(sub_langs_.size(), empty);

204

}

205

for (int s = 0; s < sub_langs_.size(); ++s) {

206

Tesseract* lang_t = sub_langs_[s];

207

if (pass_n == 1 || (lang_t->tessedit_ocr_engine_mode != OEM_CUBE_ONLY &&

208

(!word->lang_words[s].done || lang_t->tessedit_training_tess))) {

209

word->lang_words[s].InitForRetryRecognition(*word->word);

210

word->lang_words[s].SetupForRecognition(

211

lang_t->unicharset, lang_t, BestPix(),

212

lang_t->tessedit_ocr_engine_mode, NULL,

213

lang_t->classify_bln_numeric_mode,

214

lang_t->textord_use_cjk_fp_model,

215

lang_t->poly_allow_detailed_fx, word->row, word->block);

216

}

217

}

218

}

219

}

220

221

222

// Runs word recognition on all the words.

223

bool Tesseract::RecogAllWordsPassN(int pass_n, ETEXT_DESC* monitor,

224

GenericVector<WordData>* words) {

225

// TODO(rays) Before this loop can be parallelized (it would yield a massive

226

// speed-up) all remaining member globals need to be converted to local/heap

227

// (eg set_pass1 and set_pass2) and an intermediate adaption pass needs to be

228

// added. The results will be significantly different with adaption on, and

229

// deterioration will need investigation.

230

for (int w = 0; w < words->size(); ++w) {

231

WordData* word = &(*words)[w];

232

if (monitor != NULL) {

233

monitor->ocr_alive = TRUE;

234

if (pass_n == 1)

235

monitor->progress = 30 + 50 * w / words->size();

236

else

237

monitor->progress = 80 + 10 * w / words->size();

238

if (monitor->deadline_exceeded() ||

239

(monitor->cancel != NULL && (*monitor->cancel)(monitor->cancel_this,

240

words->size()))) {

241

// Timeout. Fake out the rest of the words.

242

for (; w < words->size(); ++w) {

243

(*words)[w].word->SetupFake(unicharset);

244

}

245

return false;

246

}

247

}

248

if (word->word->tess_failed) continue;

249

WordRecognizer recognizer = pass_n == 1 ? &Tesseract::classify_word_pass1

250

: &Tesseract::classify_word_pass2;

251

classify_word_and_language(recognizer, word);

252

if (tessedit_dump_choices) {

253

word_dumper(NULL, word->row, word->word);

254

tprintf("Pass%d: %s [%s]\n", pass_n,

255

word->word->best_choice->unichar_string().string(),

256

word->word->best_choice->debug_string().string());

257

}

258

}

259

return true;

260

}

261

156

262

/**

157

263

* recog_all_words()

158

264

*

179

285

const TBOX* target_word_box,

180

286

const char* word_config,

181

287

int dopasses) {

182

PAGE_RES_IT page_res_it;

183

inT32 word_index; // current word

288

PAGE_RES_IT page_res_it(page_res);

184

289

185

290

if (tessedit_minimal_rej_pass1) {

186

291

tessedit_test_adaption.set_value (TRUE);

187

292

tessedit_minimal_rejection.set_value (TRUE);

188

293

}

189

294

190

// Before the main recognition loop below, walk through the whole page and set

191

// up fake words. That way, if we run out of time a user will still get the

192

// expected best_choice and box_words out the end; they'll just be empty.

193

page_res_it.page_res = page_res;

194

for (page_res_it.restart_page(); page_res_it.word() != NULL;

195

page_res_it.forward()) {

196

page_res_it.word()->SetupFake(unicharset);

197

}

198

199

295

if (dopasses==0 || dopasses==1) {

200

page_res_it.page_res=page_res;

201

296

page_res_it.restart_page();

202

203

297

// ****************** Pass 1 *******************

204

298

205

299

// Clear adaptive classifier at the beginning of the page if it is full.

214

308

if (sub_langs_[i]->AdaptiveClassifierIsFull())

215

309

sub_langs_[i]->ResetAdaptiveClassifierInternal();

216

310

}

217

218

stats_.word_count = 0;

219

if (monitor != NULL) {

220

monitor->ocr_alive = TRUE;

221

while (page_res_it.word() != NULL) {

222

stats_.word_count++;

223

page_res_it.forward();

224

}

225

page_res_it.restart_page();

226

} else {

227

stats_.word_count = 1;

311

// Set up all words ready for recognition, so that if parallelism is on

312

// all the input and output classes are ready to run the classifier.

313

GenericVector<WordData> words;

314

SetupAllWordsPassN(1, target_word_box, word_config, page_res, &words);

315

if (tessedit_parallelize) {

316

PrerecAllWordsPar(words);

228

317

}

229

318

230

word_index = 0;

319

stats_.word_count = words.size();

231

320

232

321

stats_.dict_words = 0;

233

322

stats_.doc_blob_quality = 0;

237

326

stats_.doc_good_char_quality = 0;

238

327

239

328

most_recently_used_ = this;

329

// Run pass 1 word recognition.

330

if (!RecogAllWordsPassN(1, monitor, &words)) return false;

331

// Pass 1 post-processing.

240

332

while (page_res_it.word() != NULL) {

241

set_global_loc_code(LOC_PASS1);

242

word_index++;

243

if (monitor != NULL) {

244

monitor->ocr_alive = TRUE;

245

monitor->progress = 30 + 50 * word_index / stats_.word_count;

246

if (monitor->deadline_exceeded() ||

247

(monitor->cancel != NULL && (*monitor->cancel)(monitor->cancel_this,

248

stats_.dict_words)))

249

return false;

250

}

251

if (target_word_box &&

252

!ProcessTargetWord(page_res_it.word()->word->bounding_box(),

253

*target_word_box, word_config, 1)) {

254

page_res_it.forward();

255

continue;

256

}

257

classify_word_and_language(&Tesseract::classify_word_pass1,

258

page_res_it.block()->block,

259

page_res_it.row()->row,

260

page_res_it.word());

261

333

if (page_res_it.word()->word->flag(W_REP_CHAR)) {

262

334

fix_rep_char(&page_res_it);

263

335

page_res_it.forward();

264

336

continue;

265

337

}

266

if (tessedit_dump_choices) {

267

word_dumper(NULL, page_res_it.row()->row, page_res_it.word());

268

tprintf("Pass1: %s [%s]\n",

269

page_res_it.word()->best_choice->unichar_string().string(),

270

page_res_it.word()->best_choice->debug_string().string());

271

}

272

273

// tessedit_test_adaption enables testing of the accuracy of the

274

// input to the adaptive classifier.

275

if (tessedit_test_adaption && !tessedit_minimal_rejection) {

276

if (!word_adaptable (page_res_it.word(),

277

tessedit_test_adaption_mode)) {

278

page_res_it.word()->reject_map.rej_word_tess_failure();

279

// FAKE PERM REJ

280

} else {

281

// Override rejection mechanisms for this word.

282

UNICHAR_ID space = unicharset.unichar_to_id(" ");

283

for (int i = 0; i < page_res_it.word()->best_choice->length(); i++) {

284

if ((page_res_it.word()->best_choice->unichar_id(i) != space) &&

285

page_res_it.word()->reject_map[i].rejected())

286

page_res_it.word()->reject_map[i].setrej_minimal_rej_accept();

287

}

288

}

289

}

290

338

291

339

// Count dict words.

292

340

if (page_res_it.word()->best_choice->permuter() == USER_DAWG_PERM)

307

355

if (dopasses == 1) return true;

308

356

309

357

// ****************** Pass 2 *******************

310

page_res_it.restart_page();

311

word_index = 0;

312

most_recently_used_ = this;

313

while (tessedit_tess_adaption_mode != 0x0 && !tessedit_test_adaption &&

314

page_res_it.word() != NULL) {

315

set_global_loc_code(LOC_PASS2);

316

word_index++;

317

if (monitor != NULL) {

318

monitor->ocr_alive = TRUE;

319

monitor->progress = 80 + 10 * word_index / stats_.word_count;

320

if (monitor->deadline_exceeded() ||

321

(monitor->cancel != NULL && (*monitor->cancel)(monitor->cancel_this,

322

stats_.dict_words)))

323

return false;

324

}

325

326

// changed by jetsoft

327

// specific to its needs to extract one word when need

328

if (target_word_box &&

329

!ProcessTargetWord(page_res_it.word()->word->bounding_box(),

330

*target_word_box, word_config, 2)) {

331

page_res_it.forward();

332

continue;

333

}

334

// end jetsoft

335

336

classify_word_and_language(&Tesseract::classify_word_pass2,

337

page_res_it.block()->block,

338

page_res_it.row()->row,

339

page_res_it.word());

340

if (page_res_it.word()->word->flag(W_REP_CHAR) &&

341

!page_res_it.word()->done) {

342

fix_rep_char(&page_res_it);

343

page_res_it.forward();

344

continue;

345

}

346

if (tessedit_dump_choices) {

347

word_dumper(NULL, page_res_it.row()->row, page_res_it.word());

348

tprintf("Pass2: %s [%s]\n",

349

page_res_it.word()->best_choice->unichar_string().string(),

350

page_res_it.word()->best_choice->debug_string().string());

351

}

352

page_res_it.forward();

358

if (tessedit_tess_adaption_mode != 0x0 && !tessedit_test_adaption) {

359

page_res_it.restart_page();

360

GenericVector<WordData> words;

361

SetupAllWordsPassN(2, target_word_box, word_config, page_res, &words);

362

if (tessedit_parallelize) {

363

PrerecAllWordsPar(words);

364

}

365

most_recently_used_ = this;

366

// Run pass 2 word recognition.

367

if (!RecogAllWordsPassN(2, monitor, &words)) return false;

368

// Pass 2 post-processing.

369

while (page_res_it.word() != NULL) {

370

WERD_RES* word = page_res_it.word();

371

if (word->word->flag(W_REP_CHAR) && !word->done) {

372

fix_rep_char(&page_res_it);

373

page_res_it.forward();

374

continue;

375

}

376

page_res_it.forward();

377

}

353

378

}

354

379

355

380

// The next passes can only be run if tesseract has been used, as cube

384

409

// Check the correctness of the final results.

385

410

blamer_pass(page_res);

386

411

}

412

script_pos_pass(page_res);

387

413

388

414

// Write results pass.

389

415

set_global_loc_code(LOC_WRITE_RESULTS);

672

698

}

673

699

}

674

700

701

// Sets script positions and detects smallcaps on all output words.

702

void Tesseract::script_pos_pass(PAGE_RES* page_res) {

703

PAGE_RES_IT page_res_it(page_res);

704

for (page_res_it.restart_page(); page_res_it.word() != NULL;

705

page_res_it.forward()) {

706

WERD_RES* word = page_res_it.word();

707

if (word->word->flag(W_REP_CHAR)) {

708

page_res_it.forward();

709

continue;

710

}

711

float x_height = page_res_it.block()->block->x_height();

712

float word_x_height = word->x_height;

713

if (word_x_height < word->best_choice->min_x_height() ||

714

word_x_height > word->best_choice->max_x_height()) {

715

word_x_height = (word->best_choice->min_x_height() +

716

word->best_choice->max_x_height()) / 2.0f;

717

}

718

// Test for small caps. Word capheight must be close to block xheight,

719

// and word must contain no lower case letters, and at least one upper case.

720

double small_cap_xheight = x_height * kXHeightCapRatio;

721

double small_cap_delta = (x_height - small_cap_xheight) / 2.0;

722

if (word->uch_set->script_has_xheight() &&

723

small_cap_xheight - small_cap_delta <= word_x_height &&

724

word_x_height <= small_cap_xheight + small_cap_delta) {

725

// Scan for upper/lower.

726

int num_upper = 0;

727

int num_lower = 0;

728

for (int i = 0; i < word->best_choice->length(); ++i) {

729

if (word->uch_set->get_isupper(word->best_choice->unichar_id(i)))

730

++num_upper;

731

else if (word->uch_set->get_islower(word->best_choice->unichar_id(i)))

732

++num_lower;

733

}

734

if (num_upper > 0 && num_lower == 0)

735

word->small_caps = true;

736

}

737

word->SetScriptPositions();

738

}

739

}

740

675

741

// Helper returns true if the new_word is better than the word, using a

676

742

// simple test of better certainty AND rating (to reduce false positives

677

743

// from cube) or a dictionary vs non-dictionary word.

701

767

702

768

// Helper to recognize the word using the given (language-specific) tesseract.

703

769

// Returns true if the result was better than previously.

704

bool Tesseract::RetryWithLanguage(WERD_RES *word, BLOCK* block, ROW *row,

770

bool Tesseract::RetryWithLanguage(const WERD_RES& best_word,

771

WordData* word_data, WERD_RES* word,

705

772

WordRecognizer recognizer) {

706

773

if (classify_debug_level || cube_debug_level) {

707

774

tprintf("Retrying word using lang %s, oem %d\n",

708

775

lang.string(), static_cast<int>(tessedit_ocr_engine_mode));

709

776

}

710

// Setup a trial WERD_RES in which to classify.

711

WERD_RES lang_word;

712

lang_word.InitForRetryRecognition(*word);

713

777

// Run the recognizer on the word.

714

778

// Initial version is a bit of a hack based on better certainty and rating

715

779

// (to reduce false positives from cube) or a dictionary vs non-dictionary

716

780

// word.

717

(this->*recognizer)(block, row, &lang_word);

718

bool new_is_better = NewWordBetter(*word, lang_word,

781

(this->*recognizer)(word_data, word);

782

bool new_is_better = NewWordBetter(best_word, *word,

719

783

classify_max_rating_ratio,

720

784

classify_max_certainty_margin);

721

785

if (classify_debug_level || cube_debug_level) {

722

if (lang_word.best_choice == NULL) {

723

tprintf("New result %s better:%s\n",

786

if (word->best_choice == NULL) {

787

tprintf("NULL result %s better!\n",

724

788

new_is_better ? "IS" : "NOT");

725

789

} else {

726

790

tprintf("New result %s better:%s, r=%g, c=%g\n",

727

791

new_is_better ? "IS" : "NOT",

728

lang_word.best_choice->unichar_string().string(),

729

lang_word.best_choice->rating(),

730

lang_word.best_choice->certainty());

792

word->best_choice->unichar_string().string(),

793

word->best_choice->rating(),

794

word->best_choice->certainty());

731

795

}

732

796

}

733

if (new_is_better) {

734

word->ConsumeWordResults(&lang_word);

735

}

736

797

return new_is_better;

737

798

}

738

799

743

804

// If recognition was not successful, tries all available languages until

744

805

// it gets a successful result or runs out of languages. Keeps the best result.

745

806

void Tesseract::classify_word_and_language(WordRecognizer recognizer,

746

BLOCK* block,

747

ROW *row,

748

WERD_RES *word) {

807

WordData* word_data) {

808

// Points to the best result. May be word or in lang_words.

809

WERD_RES* word = word_data->word;

749

810

clock_t start_t = clock();

750

811

if (classify_debug_level || cube_debug_level) {

751

812

tprintf("Processing word with lang %s at:",

755

816

const char* result_type = "Initial";

756

817

bool initially_done = !word->tess_failed && word->done;

757

818

if (initially_done) {

758

// If done on pass1, we reuse the tesseract that did it, and don't try

759

// any more. The only need to call the classifier at all is for the

760

// cube combiner and xheight fixing (which may be bogus on a done word.)

819

// If done on pass1, leave it as-is.

761

820

most_recently_used_ = word->tesseract;

762

821

result_type = "Already done";

822

} else {

823

if (most_recently_used_ != this) {

824

// Point to the word for most_recently_used_.

825

for (int s = 0; s < sub_langs_.size(); ++s) {

826

if (most_recently_used_ == sub_langs_[s]) {

827

word = &word_data->lang_words[s];

828

break;

829

}

830

}

831

}

832

(most_recently_used_->*recognizer)(word_data, word);

833

if (!word->tess_failed && word->tess_accepted)

834

result_type = "Accepted";

763

835

}

764

(most_recently_used_->*recognizer)(block, row, word);

765

if (!word->tess_failed && word->tess_accepted)

766

result_type = "Accepted";

767

836

if (classify_debug_level || cube_debug_level) {

768

837

tprintf("%s result: %s r=%.4g, c=%.4g, accepted=%d, adaptable=%d"

769

838

" xht=[%g,%g]\n",

782

851

if (classify_debug_level) {

783

852

tprintf("Retrying with main-Tesseract, lang: %s\n", lang.string());

784

853

}

785

if (RetryWithLanguage(word, block, row, recognizer)) {

786

most_recently_used_ = this;

787

if (!word->tess_failed && word->tess_accepted)

788

return; // No need to look at the others.

854

if (word_data->word->tesseract == this) {

855

// This is pass1, and we are trying the main language.

856

if (RetryWithLanguage(*word, word_data, word_data->word, recognizer)) {

857

most_recently_used_ = this;

858

word = word_data->word;

859

}

860

} else {

861

// This is pass2, and we are trying the main language again, but it

862

// has no word allocated to it, so we must re-initialize it.

863

WERD_RES main_word(*word_data->word);

864

main_word.InitForRetryRecognition(*word_data->word);

865

main_word.SetupForRecognition(unicharset, this, BestPix(),

866

tessedit_ocr_engine_mode, NULL,

867

classify_bln_numeric_mode,

868

textord_use_cjk_fp_model,

869

poly_allow_detailed_fx,

870

word_data->row, word_data->block);

871

if (RetryWithLanguage(*word, word_data, &main_word, recognizer)) {

872

most_recently_used_ = this;

873

word_data->word->ConsumeWordResults(&main_word);

874

word = word_data->word;

875

}

789

876

}

877

if (!word->tess_failed && word->tess_accepted)

878

return; // No need to look at the others.

790

879

}

791

880

792

881

for (int i = 0; i < sub_langs_.size(); ++i) {

795

884

tprintf("Retrying with sub-Tesseract[%d] lang: %s\n",

796

885

i, sub_langs_[i]->lang.string());

797

886

}

798

if (sub_langs_[i]->RetryWithLanguage(word, block, row, recognizer)) {

887

if (sub_langs_[i]->RetryWithLanguage(*word, word_data,

888

&word_data->lang_words[i],

889

recognizer)) {

799

890

most_recently_used_ = sub_langs_[i];

891

word = &word_data->lang_words[i];

800

892

if (!word->tess_failed && word->tess_accepted)

801

return; // No need to look at the others.

893

break; // No need to look at the others.

802

894

}

803

895

}

804

896

}

805

897

}

898

if (word != word_data->word) {

899

// Move the result for the best language to the main word.

900

word_data->word->ConsumeWordResults(word);

901

}

806

902

clock_t ocr_t = clock();

807

903

if (tessedit_timing_debug) {

808

904

tprintf("%s (ocr took %.2f sec)\n",

817

913

* Baseline normalize the word and pass it to Tess.

818

914

*/

819

915

820

void Tesseract::classify_word_pass1(BLOCK* block, ROW *row, WERD_RES *word) {

916

void Tesseract::classify_word_pass1(WordData* word_data, WERD_RES* word) {

917

ROW* row = word_data->row;

918

BLOCK* block = word_data->block;

919

prev_word_best_choice_ = word_data->prev_word != NULL

920

? word_data->prev_word->word->best_choice : NULL;

821

921

// If we only intend to run cube - run it and return.

822

922

if (tessedit_ocr_engine_mode == OEM_CUBE_ONLY) {

823

923

cube_word_pass1(block, row, word);

880

980

}

881

981

new_x_ht_word.x_height = new_x_ht;

882

982

new_x_ht_word.caps_height = 0.0;

983

new_x_ht_word.SetupForRecognition(

984

unicharset, this, BestPix(), tessedit_ocr_engine_mode, NULL,

985

classify_bln_numeric_mode, textord_use_cjk_fp_model,

986

poly_allow_detailed_fx, row, block);

883

987

match_word_pass_n(2, &new_x_ht_word, row, block);

884

988

if (!new_x_ht_word.tess_failed) {

885

989

int new_misfits = CountMisfitTops(&new_x_ht_word);

916

1020

* Control what to do with the word in pass 2

917

1021

*/

918

1022

919

void Tesseract::classify_word_pass2(BLOCK* block, ROW *row, WERD_RES *word) {

1023

void Tesseract::classify_word_pass2(WordData* word_data, WERD_RES* word) {

920

1024

// Return if we do not want to run Tesseract.

921

1025

if (tessedit_ocr_engine_mode != OEM_TESSERACT_ONLY &&

922

1026

tessedit_ocr_engine_mode != OEM_TESSERACT_CUBE_COMBINED)

923

1027

return;

1028

ROW* row = word_data->row;

1029

BLOCK* block = word_data->block;

1030

prev_word_best_choice_ = word_data->prev_word != NULL

1031

? word_data->prev_word->word->best_choice : NULL;

924

1032

925

1033

set_global_subloc_code(SUBLOC_NORM);

926

1034

check_debug_pt(word, 30);

940

1048

// Use the tops and bottoms since they are available.

941

1049

TrainedXheightFix(word, block, row);

942

1050

}

943

// Test for small caps. Word capheight must be close to block xheight,

944

// and word must contain no lower case letters, and at least one upper case.

945

double small_cap_xheight = block->x_height() * kXHeightCapRatio;

946

double small_cap_delta = (block->x_height() - small_cap_xheight) / 2.0;

947

if (unicharset.script_has_xheight() &&

948

small_cap_xheight - small_cap_delta <= word->x_height &&

949

word->x_height <= small_cap_xheight + small_cap_delta) {

950

// Scan for upper/lower.

951

int num_upper = 0;

952

int num_lower = 0;

953

for (int i = 0; i < word->best_choice->length(); ++i) {

954

if (unicharset.get_isupper(word->best_choice->unichar_id(i)))

955

++num_upper;

956

else if (unicharset.get_islower(word->best_choice->unichar_id(i)))

957

++num_lower;

958

}

959

if (num_upper > 0 && num_lower == 0)

960

word->small_caps = true;

961

}

962

word->SetScriptPositions();

963

1051

964

1052

set_global_subloc_code(SUBLOC_NORM);

965

1053

}

988

1076

989

1077

void Tesseract::match_word_pass_n(int pass_n, WERD_RES *word,

990

1078

ROW *row, BLOCK* block) {

991

if (word->SetupForTessRecognition(unicharset, this, BestPix(),

992

classify_bln_numeric_mode,

993

textord_use_cjk_fp_model,

994

poly_allow_detailed_fx,

995

row, block))

996

tess_segment_pass_n(pass_n, word);

1079

if (word->tess_failed) return;

1080

tess_segment_pass_n(pass_n, word);

997

1081

998

1082

if (!word->tess_failed) {

999

1083

if (!word->word->flag (W_REP_CHAR)) {

1136

1220

WERD_RES* rep_word =

1137

1221

page_res_it->InsertSimpleCloneWord(*word_res, blob_word);

1138

1222

// Setup the single char WERD_RES

1139

if (rep_word->SetupForTessRecognition(*word_res->uch_set, this, BestPix(),

1140

false,

1141

textord_use_cjk_fp_model,

1142

poly_allow_detailed_fx,

1143

page_res_it->row()->row,

1144

page_res_it->block()->block)) {

1223

if (rep_word->SetupForRecognition(*word_res->uch_set, this, BestPix(),

1224

tessedit_ocr_engine_mode, NULL, false,

1225

textord_use_cjk_fp_model,

1226

poly_allow_detailed_fx,

1227

page_res_it->row()->row,

1228

page_res_it->block()->block)) {

1145

1229

rep_word->CloneChoppedToRebuild();

1146

1230

BLOB_CHOICE* blob_choice = new BLOB_CHOICE(*best_choice);

1147

1231

rep_word->FakeClassifyWord(1, &blob_choice);