~vcs-imports/tesseract-ocr/trunk

« back to all changes in this revision

Viewing changes to classify/classify.h

  • Committer: theraysmith at gmail
  • Date: 2013-11-08 20:30:56 UTC
  • Revision ID: svn-v4:d0cd1f9f-072b-0410-8dd7-cf729c803f20:trunk:904
Refactorerd control functions to enable parallel blob classification

Show diffs side-by-side

added added

removed removed

Lines of Context:
145
145
                        int FontinfoId,
146
146
                        ADAPT_CLASS Class,
147
147
                        ADAPT_TEMPLATES Templates);
148
 
  void AdaptToPunc(TBLOB *Blob,
149
 
                   CLASS_ID ClassId,
150
 
                   int FontinfoId,
151
 
                   FLOAT32 Threshold);
152
 
  void AmbigClassifier(TBLOB *Blob,
153
 
                       INT_TEMPLATES Templates,
154
 
                       ADAPT_CLASS *Classes,
155
 
                       UNICHAR_ID *Ambiguities,
156
 
                       ADAPT_RESULTS *Results);
 
148
  void AmbigClassifier(const GenericVector<INT_FEATURE_STRUCT>& int_features,
 
149
                       const INT_FX_RESULT_STRUCT& fx_info,
 
150
                       const TBLOB *blob,
 
151
                       INT_TEMPLATES templates,
 
152
                       ADAPT_CLASS *classes,
 
153
                       UNICHAR_ID *ambiguities,
 
154
                       ADAPT_RESULTS *results);
157
155
  void MasterMatcher(INT_TEMPLATES templates,
158
156
                     inT16 num_features,
159
157
                     const INT_FEATURE_STRUCT* features,
161
159
                     ADAPT_CLASS* classes,
162
160
                     int debug,
163
161
                     int num_classes,
 
162
                     int matcher_multiplier,
164
163
                     const TBOX& blob_box,
165
164
                     CLASS_PRUNER_RESULTS results,
166
165
                     ADAPT_RESULTS* final_results);
175
174
                                       int bottom, int top,
176
175
                                       float cp_rating,
177
176
                                       int blob_length,
 
177
                                       int matcher_multiplier,
178
178
                                       const uinT8* cn_factors,
179
179
                                       INT_RESULT_STRUCT& int_result,
180
180
                                       ADAPT_RESULTS* final_results);
184
184
  double ComputeCorrectedRating(bool debug, int unichar_id, double cp_rating,
185
185
                                double im_rating, int feature_misses,
186
186
                                int bottom, int top,
187
 
                                int blob_length, const uinT8* cn_factors);
 
187
                                int blob_length, int matcher_multiplier,
 
188
                                const uinT8* cn_factors);
188
189
  void ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box,
189
190
                               ADAPT_RESULTS *Results,
190
191
                               BLOB_CHOICE_LIST *Choices);
246
247
  // Converts a shape_table_ index to a classifier class_id index (not a
247
248
  // unichar-id!). Uses a search, so not fast.
248
249
  int ShapeIDToClassID(int shape_id) const;
249
 
  UNICHAR_ID *BaselineClassifier(TBLOB *Blob,
250
 
                                 ADAPT_TEMPLATES Templates,
251
 
                                 ADAPT_RESULTS *Results);
252
 
  int CharNormClassifier(TBLOB *Blob,
253
 
                         INT_TEMPLATES Templates,
254
 
                         ADAPT_RESULTS *Results);
 
250
  UNICHAR_ID *BaselineClassifier(
 
251
      TBLOB *Blob, const GenericVector<INT_FEATURE_STRUCT>& int_features,
 
252
      const INT_FX_RESULT_STRUCT& fx_info,
 
253
      ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results);
 
254
  int CharNormClassifier(TBLOB *blob,
 
255
                         const TrainingSample& sample,
 
256
                         ADAPT_RESULTS *adapt_results);
255
257
 
256
258
  // As CharNormClassifier, but operates on a TrainingSample and outputs to
257
259
  // a GenericVector of ShapeRating without conversion to classes.
267
269
  void DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class);
268
270
  bool AdaptableWord(WERD_RES* word);
269
271
  void EndAdaptiveClassifier();
270
 
  void PrintAdaptiveStatistics(FILE *File);
271
272
  void SettupPass1();
272
273
  void SettupPass2();
273
274
  void AdaptiveClassifier(TBLOB *Blob,
276
277
  void ClassifyAsNoise(ADAPT_RESULTS *Results);
277
278
  void ResetAdaptiveClassifierInternal();
278
279
 
279
 
  int GetBaselineFeatures(TBLOB *Blob,
280
 
                          INT_TEMPLATES Templates,
281
 
                          INT_FEATURE_ARRAY IntFeatures,
282
 
                          uinT8* CharNormArray,
283
 
                          inT32 *BlobLength);
284
 
  int GetCharNormFeatures(TBLOB *Blob,
285
 
                          INT_TEMPLATES Templates,
286
 
                          INT_FEATURE_ARRAY IntFeatures,
287
 
                          uinT8* PrunerNormArray,
288
 
                          uinT8* CharNormArray,
289
 
                          inT32 *BlobLength);
 
280
  int GetCharNormFeature(const INT_FX_RESULT_STRUCT& fx_info,
 
281
                         INT_TEMPLATES templates,
 
282
                         uinT8* pruner_norm_array,
 
283
                         uinT8* char_norm_array);
290
284
  // Computes the char_norm_array for the unicharset and, if not NULL, the
291
285
  // pruner_array as appropriate according to the existence of the shape_table.
292
286
  // The norm_feature is deleted as it is almost certainly no longer needed.
298
292
  bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG &config);
299
293
  void UpdateAmbigsGroup(CLASS_ID class_id, TBLOB *Blob);
300
294
 
301
 
  void ResetFeaturesHaveBeenExtracted();
302
295
  bool AdaptiveClassifierIsFull() { return NumAdaptationsFailed > 0; }
303
296
  bool LooksLikeGarbage(TBLOB *blob);
304
297
  void RefreshDebugWindow(ScrollView **win, const char *msg,
468
461
 
469
462
  // Create dummy proto and config masks for use with the built-in templates.
470
463
  BIT_VECTOR AllProtosOn;
471
 
  BIT_VECTOR PrunedProtos;
472
464
  BIT_VECTOR AllConfigsOn;
473
 
  BIT_VECTOR AllProtosOff;
474
465
  BIT_VECTOR AllConfigsOff;
475
466
  BIT_VECTOR TempProtoMask;
476
467
  bool EnableLearning;
504
495
  ShapeTable* shape_table_;
505
496
 
506
497
 private:
507
 
 
508
498
  Dict dict_;
509
499
  // The currently active static classifier.
510
500
  ShapeClassifier* static_classifier_;
511
501
 
512
502
  /* variables used to hold performance statistics */
513
 
  int AdaptiveMatcherCalls;
514
 
  int BaselineClassifierCalls;
515
 
  int CharNormClassifierCalls;
516
 
  int AmbigClassifierCalls;
517
 
  int NumWordsAdaptedTo;
518
 
  int NumCharsAdaptedTo;
519
 
  int NumBaselineClassesTried;
520
 
  int NumCharNormClassesTried;
521
 
  int NumAmbigClassesTried;
522
 
  int NumClassesOutput;
523
503
  int NumAdaptationsFailed;
524
504
 
525
 
  /* variables used to hold onto extracted features.  This is used
526
 
  to map from the old scheme in which baseline features and char norm
527
 
  features are extracted separately, to the new scheme in which they
528
 
  are extracted at the same time. */
529
 
  bool FeaturesHaveBeenExtracted;
530
 
  bool FeaturesOK;
531
 
  INT_FEATURE_ARRAY BaselineFeatures;
532
 
  INT_FEATURE_ARRAY CharNormFeatures;
533
 
  INT_FX_RESULT_STRUCT FXInfo;
534
 
 
535
505
  // Expected number of features in the class pruner, used to penalize
536
506
  // unknowns that have too few features (like a c being classified as e) so
537
507
  // it doesn't recognize everything as '@' or '#'.