~vcs-imports/tesseract-ocr/trunk

« back to all changes in this revision

Viewing changes to classify/adaptmatch.cpp

  • Committer: theraysmith at gmail
  • Date: 2013-11-08 20:30:56 UTC
  • Revision ID: svn-v4:d0cd1f9f-072b-0410-8dd7-cf729c803f20:trunk:904
Refactorerd control functions to enable parallel blob classification

Show diffs side-by-side

added added

removed removed

Lines of Context:
122
122
#define MarginalMatch(Rating)       \
123
123
((Rating) > matcher_great_threshold)
124
124
 
125
 
#define InitIntFX() (FeaturesHaveBeenExtracted = FALSE)
126
 
 
127
125
/*-----------------------------------------------------------------------------
128
126
          Private Function Prototypes
129
127
-----------------------------------------------------------------------------*/
179
177
  ADAPT_RESULTS *Results = new ADAPT_RESULTS();
180
178
  Results->Initialize();
181
179
 
182
 
  if (AdaptedTemplates == NULL)
183
 
    AdaptedTemplates = NewAdaptedTemplates (true);
 
180
  ASSERT_HOST(AdaptedTemplates != NULL);
184
181
 
185
182
  DoAdaptiveMatch(Blob, Results);
186
183
  if (CPResults != NULL)
207
204
    DebugAdaptiveClassifier(Blob, Results);
208
205
#endif
209
206
 
210
 
  NumClassesOutput += Choices->length();
211
207
  delete Results;
212
208
}                                /* AdaptiveClassifier */
213
209
 
249
245
    if (!EnableLearning || word->best_choice == NULL)
250
246
      return;  // Can't or won't adapt.
251
247
 
252
 
    NumWordsAdaptedTo++;
253
248
    if (classify_learning_debug_level >= 1)
254
249
      tprintf("\n\nAdapting to word = %s\n",
255
250
              word->best_choice->debug_string().string());
480
475
  FreeNormProtos();
481
476
  if (AllProtosOn != NULL) {
482
477
    FreeBitVector(AllProtosOn);
483
 
    FreeBitVector(PrunedProtos);
484
478
    FreeBitVector(AllConfigsOn);
485
 
    FreeBitVector(AllProtosOff);
486
479
    FreeBitVector(AllConfigsOff);
487
480
    FreeBitVector(TempProtoMask);
488
481
    AllProtosOn = NULL;
489
 
    PrunedProtos = NULL;
490
482
    AllConfigsOn = NULL;
491
 
    AllProtosOff = NULL;
492
483
    AllConfigsOff = NULL;
493
484
    TempProtoMask = NULL;
494
485
  }
561
552
    static_classifier_ = new TessClassifier(false, this);
562
553
  }
563
554
 
564
 
  im_.Init(&classify_debug_level, classify_integer_matcher_multiplier);
 
555
  im_.Init(&classify_debug_level);
565
556
  InitIntegerFX();
566
557
 
567
558
  AllProtosOn = NewBitVector(MAX_NUM_PROTOS);
568
 
  PrunedProtos = NewBitVector(MAX_NUM_PROTOS);
569
559
  AllConfigsOn = NewBitVector(MAX_NUM_CONFIGS);
570
 
  AllProtosOff = NewBitVector(MAX_NUM_PROTOS);
571
560
  AllConfigsOff = NewBitVector(MAX_NUM_CONFIGS);
572
561
  TempProtoMask = NewBitVector(MAX_NUM_PROTOS);
573
562
  set_all_bits(AllProtosOn, WordsInVectorOfSize(MAX_NUM_PROTOS));
574
 
  set_all_bits(PrunedProtos, WordsInVectorOfSize(MAX_NUM_PROTOS));
575
563
  set_all_bits(AllConfigsOn, WordsInVectorOfSize(MAX_NUM_CONFIGS));
576
 
  zero_all_bits(AllProtosOff, WordsInVectorOfSize(MAX_NUM_PROTOS));
577
564
  zero_all_bits(AllConfigsOff, WordsInVectorOfSize(MAX_NUM_CONFIGS));
578
565
 
579
566
  for (int i = 0; i < MAX_NUM_CLASSES; i++) {
617
604
            NumAdaptationsFailed);
618
605
  }
619
606
  free_adapted_templates(AdaptedTemplates);
620
 
  AdaptedTemplates = NULL;
 
607
  AdaptedTemplates = NewAdaptedTemplates(true);
621
608
  NumAdaptationsFailed = 0;
622
609
}
623
610
 
624
611
 
625
 
/*---------------------------------------------------------------------------*/
626
 
/**
627
 
 * Print to File the statistics which have
628
 
 * been gathered for the adaptive matcher.
629
 
 *
630
 
 * @param File open text file to print adaptive statistics to
631
 
 *
632
 
 * Globals: none
633
 
 *
634
 
 * @note Exceptions: none
635
 
 * @note History: Thu Apr 18 14:37:37 1991, DSJ, Created.
636
 
 */
637
 
void Classify::PrintAdaptiveStatistics(FILE *File) {
638
 
  #ifndef SECURE_NAMES
639
 
 
640
 
  fprintf (File, "\nADAPTIVE MATCHER STATISTICS:\n");
641
 
  fprintf (File, "\tNum blobs classified = %d\n", AdaptiveMatcherCalls);
642
 
  fprintf (File, "\tNum classes output   = %d (Avg = %4.2f)\n",
643
 
    NumClassesOutput,
644
 
    ((AdaptiveMatcherCalls == 0) ? (0.0) :
645
 
  ((float) NumClassesOutput / AdaptiveMatcherCalls)));
646
 
  fprintf (File, "\t\tBaseline Classifier: %4d calls (%4.2f classes/call)\n",
647
 
    BaselineClassifierCalls,
648
 
    ((BaselineClassifierCalls == 0) ? (0.0) :
649
 
  ((float) NumBaselineClassesTried / BaselineClassifierCalls)));
650
 
  fprintf (File, "\t\tCharNorm Classifier: %4d calls (%4.2f classes/call)\n",
651
 
    CharNormClassifierCalls,
652
 
    ((CharNormClassifierCalls == 0) ? (0.0) :
653
 
  ((float) NumCharNormClassesTried / CharNormClassifierCalls)));
654
 
  fprintf (File, "\t\tAmbig    Classifier: %4d calls (%4.2f classes/call)\n",
655
 
    AmbigClassifierCalls,
656
 
    ((AmbigClassifierCalls == 0) ? (0.0) :
657
 
  ((float) NumAmbigClassesTried / AmbigClassifierCalls)));
658
 
 
659
 
  fprintf (File, "\nADAPTIVE LEARNER STATISTICS:\n");
660
 
  fprintf (File, "\tNumber of words adapted to: %d\n", NumWordsAdaptedTo);
661
 
  fprintf (File, "\tNumber of chars adapted to: %d\n", NumCharsAdaptedTo);
662
 
 
663
 
  PrintAdaptedTemplates(File, AdaptedTemplates);
664
 
  #endif
665
 
}                                /* PrintAdaptiveStatistics */
666
 
 
667
612
 
668
613
/*---------------------------------------------------------------------------*/
669
614
/**
915
860
  FEATURE_SET FloatFeatures;
916
861
  int NewTempConfigId;
917
862
 
918
 
  ResetFeaturesHaveBeenExtracted();
919
 
  NumCharsAdaptedTo++;
920
863
  if (!LegalClassId (ClassId))
921
864
    return;
922
865
 
932
875
    if (NumFeatures <= 0)
933
876
      return;
934
877
 
935
 
    im_.SetBaseLineMatch();
936
878
    // Only match configs with the matching font.
937
879
    BIT_VECTOR MatchingFontConfigs = NewBitVector(MAX_NUM_PROTOS);
938
880
    for (int cfg = 0; cfg < IClass->NumConfigs; ++cfg) {
1004
946
 
1005
947
void Classify::DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class) {
1006
948
#ifndef GRAPHICS_DISABLED
1007
 
  int bloblength = 0;
1008
 
  INT_FEATURE_ARRAY features;
1009
 
  uinT8* norm_array = new uinT8[unicharset.size()];
1010
 
  int num_features = GetBaselineFeatures(blob, PreTrainedTemplates,
1011
 
                                         features,
1012
 
                                         norm_array, &bloblength);
1013
 
  delete [] norm_array;
 
949
  INT_FX_RESULT_STRUCT fx_info;
 
950
  GenericVector<INT_FEATURE_STRUCT> bl_features;
 
951
  TrainingSample* sample =
 
952
      BlobToTrainingSample(*blob, classify_nonlinear_norm, &fx_info,
 
953
                           &bl_features);
 
954
  if (sample == NULL) return;
 
955
 
1014
956
  INT_RESULT_STRUCT IntResult;
1015
 
 
1016
957
  im_.Match(int_class, AllProtosOn, AllConfigsOn,
1017
 
            num_features, features,
 
958
            bl_features.size(), &bl_features[0],
1018
959
            &IntResult, classify_adapt_feature_threshold,
1019
960
            NO_DEBUG, matcher_debug_separate_windows);
1020
961
  cprintf ("Best match to temp config %d = %4.1f%%.\n",
1024
965
    ConfigMask = 1 << IntResult.Config;
1025
966
    ShowMatchDisplay();
1026
967
    im_.Match(int_class, AllProtosOn, (BIT_VECTOR)&ConfigMask,
1027
 
              num_features, features,
 
968
              bl_features.size(), &bl_features[0],
1028
969
              &IntResult, classify_adapt_feature_threshold,
1029
970
              6 | 0x19, matcher_debug_separate_windows);
1030
971
    UpdateMatchDisplay();
1033
974
}
1034
975
 
1035
976
 
1036
 
/*---------------------------------------------------------------------------*/
1037
 
/**
1038
 
 * @param Blob blob to add to templates for ClassId
1039
 
 * @param ClassId class to add blob to
1040
 
 * @param FontinfoId font information from pre-trained teamples
1041
 
 * @param Threshold minimum match rating to existing template
1042
 
 *
1043
 
 * Globals:
1044
 
 * - PreTrainedTemplates current set of built-in templates
1045
 
 *
1046
 
 * @note Exceptions: none
1047
 
 * @note History: Thu Mar 14 09:36:03 1991, DSJ, Created.
1048
 
 */
1049
 
void Classify::AdaptToPunc(TBLOB *Blob,
1050
 
                           CLASS_ID ClassId,
1051
 
                           int FontinfoId,
1052
 
                           FLOAT32 Threshold) {
1053
 
  ADAPT_RESULTS *Results = new ADAPT_RESULTS();
1054
 
  int i;
1055
 
 
1056
 
  Results->Initialize();
1057
 
  CharNormClassifier(Blob, PreTrainedTemplates, Results);
1058
 
  RemoveBadMatches(Results);
1059
 
 
1060
 
  if (Results->NumMatches != 1) {
1061
 
    if (classify_learning_debug_level >= 1) {
1062
 
      cprintf ("Rejecting punc = %s (Alternatives = ",
1063
 
               unicharset.id_to_unichar(ClassId));
1064
 
 
1065
 
      for (i = 0; i < Results->NumMatches; i++)
1066
 
        tprintf("%s", unicharset.id_to_unichar(Results->match[i].unichar_id));
1067
 
      tprintf(")\n");
1068
 
    }
1069
 
  } else {
1070
 
    #ifndef SECURE_NAMES
1071
 
    if (classify_learning_debug_level >= 1)
1072
 
      cprintf ("Adapting to punc = %s, thr= %g\n",
1073
 
               unicharset.id_to_unichar(ClassId), Threshold);
1074
 
    #endif
1075
 
    AdaptToChar(Blob, ClassId, FontinfoId, Threshold);
1076
 
  }
1077
 
  delete Results;
1078
 
}                                /* AdaptToPunc */
1079
 
 
1080
977
 
1081
978
/*---------------------------------------------------------------------------*/
1082
979
/**
1167
1064
 * @note Exceptions: none
1168
1065
 * @note History: Tue Mar 12 19:40:36 1991, DSJ, Created.
1169
1066
 */
1170
 
void Classify::AmbigClassifier(TBLOB *Blob,
1171
 
                               INT_TEMPLATES Templates,
1172
 
                               ADAPT_CLASS *Classes,
1173
 
                               UNICHAR_ID *Ambiguities,
1174
 
                               ADAPT_RESULTS *Results) {
1175
 
  int NumFeatures;
1176
 
  INT_FEATURE_ARRAY IntFeatures;
 
1067
void Classify::AmbigClassifier(
 
1068
    const GenericVector<INT_FEATURE_STRUCT>& int_features,
 
1069
    const INT_FX_RESULT_STRUCT& fx_info,
 
1070
    const TBLOB *blob,
 
1071
    INT_TEMPLATES templates,
 
1072
    ADAPT_CLASS *classes,
 
1073
    UNICHAR_ID *ambiguities,
 
1074
    ADAPT_RESULTS *results) {
 
1075
  if (int_features.empty()) return;
1177
1076
  uinT8* CharNormArray = new uinT8[unicharset.size()];
1178
1077
  INT_RESULT_STRUCT IntResult;
1179
 
  CLASS_ID ClassId;
1180
 
 
1181
 
  AmbigClassifierCalls++;
1182
 
 
1183
 
  NumFeatures = GetCharNormFeatures(Blob, Templates, IntFeatures,
1184
 
                                    NULL, CharNormArray,
1185
 
                                    &(Results->BlobLength));
1186
 
  if (NumFeatures <= 0) {
1187
 
    delete [] CharNormArray;
1188
 
    return;
1189
 
  }
1190
 
 
 
1078
 
 
1079
  results->BlobLength = GetCharNormFeature(fx_info, templates, NULL,
 
1080
                                           CharNormArray);
1191
1081
  bool debug = matcher_debug_level >= 2 || classify_debug_level > 1;
1192
1082
  if (debug)
1193
1083
    tprintf("AM Matches =  ");
1194
1084
 
1195
 
  int top = Blob->bounding_box().top();
1196
 
  int bottom = Blob->bounding_box().bottom();
1197
 
  while (*Ambiguities >= 0) {
1198
 
    ClassId = *Ambiguities;
 
1085
  int top = blob->bounding_box().top();
 
1086
  int bottom = blob->bounding_box().bottom();
 
1087
  while (*ambiguities >= 0) {
 
1088
    CLASS_ID class_id = *ambiguities;
1199
1089
 
1200
 
    im_.SetCharNormMatch(classify_integer_matcher_multiplier);
1201
 
    im_.Match(ClassForClassId(Templates, ClassId),
 
1090
    im_.Match(ClassForClassId(templates, class_id),
1202
1091
              AllProtosOn, AllConfigsOn,
1203
 
              NumFeatures, IntFeatures,
 
1092
              int_features.size(), &int_features[0],
1204
1093
              &IntResult,
1205
1094
              classify_adapt_feature_threshold, NO_DEBUG,
1206
1095
              matcher_debug_separate_windows);
1207
1096
 
1208
 
    ExpandShapesAndApplyCorrections(NULL, debug, ClassId, bottom, top, 0,
1209
 
                                    Results->BlobLength, CharNormArray,
1210
 
                                    IntResult, Results);
1211
 
    Ambiguities++;
1212
 
 
1213
 
    NumAmbigClassesTried++;
 
1097
    ExpandShapesAndApplyCorrections(NULL, debug, class_id, bottom, top, 0,
 
1098
                                    results->BlobLength,
 
1099
                                    classify_integer_matcher_multiplier,
 
1100
                                    CharNormArray, IntResult, results);
 
1101
    ambiguities++;
1214
1102
  }
1215
1103
  delete [] CharNormArray;
1216
1104
}                                /* AmbigClassifier */
1225
1113
                             ADAPT_CLASS* classes,
1226
1114
                             int debug,
1227
1115
                             int num_classes,
 
1116
                             int matcher_multiplier,
1228
1117
                             const TBOX& blob_box,
1229
1118
                             CLASS_PRUNER_RESULTS results,
1230
1119
                             ADAPT_RESULTS* final_results) {
1246
1135
    bool debug = matcher_debug_level >= 2 || classify_debug_level > 1;
1247
1136
    ExpandShapesAndApplyCorrections(classes, debug, class_id, bottom, top,
1248
1137
                                    results[c].Rating,
1249
 
                                    final_results->BlobLength, norm_factors,
 
1138
                                    final_results->BlobLength,
 
1139
                                    matcher_multiplier, norm_factors,
1250
1140
                                    int_result, final_results);
1251
1141
  }
1252
1142
}
1258
1148
// The results are added to the final_results output.
1259
1149
void Classify::ExpandShapesAndApplyCorrections(
1260
1150
    ADAPT_CLASS* classes, bool debug, int class_id, int bottom, int top,
1261
 
    float cp_rating, int blob_length, const uinT8* cn_factors,
 
1151
    float cp_rating, int blob_length, int matcher_multiplier,
 
1152
    const uinT8* cn_factors,
1262
1153
    INT_RESULT_STRUCT& int_result, ADAPT_RESULTS* final_results) {
1263
1154
  // Compute the fontinfo_ids.
1264
1155
  int fontinfo_id = kBlankFontinfoId;
1292
1183
                                               int_result.Rating,
1293
1184
                                               int_result.FeatureMisses,
1294
1185
                                               bottom, top, blob_length,
1295
 
                                               cn_factors);
 
1186
                                               matcher_multiplier, cn_factors);
1296
1187
        if (c == 0 || rating < min_rating)
1297
1188
          min_rating = rating;
1298
1189
        if (unicharset.get_enabled(unichar_id)) {
1309
1200
                                         int_result.Rating,
1310
1201
                                         int_result.FeatureMisses,
1311
1202
                                         bottom, top, blob_length,
1312
 
                                         cn_factors);
 
1203
                                         matcher_multiplier, cn_factors);
1313
1204
  if (unicharset.get_enabled(class_id)) {
1314
1205
    AddNewResult(final_results, class_id, -1, rating,
1315
1206
                 classes != NULL, int_result.Config,
1325
1216
                                        double cp_rating, double im_rating,
1326
1217
                                        int feature_misses,
1327
1218
                                        int bottom, int top,
1328
 
                                        int blob_length,
 
1219
                                        int blob_length, int matcher_multiplier,
1329
1220
                                        const uinT8* cn_factors) {
1330
1221
  // Compute class feature corrections.
1331
1222
  double cn_corrected = im_.ApplyCNCorrection(im_rating, blob_length,
1332
 
                                              cn_factors[unichar_id]);
 
1223
                                              cn_factors[unichar_id],
 
1224
                                              matcher_multiplier);
1333
1225
  double miss_penalty = tessedit_class_miss_scale * feature_misses;
1334
1226
  double vertical_penalty = 0.0;
1335
1227
  // Penalize non-alnums for being vertical misfits.
1383
1275
 * @note Exceptions: none
1384
1276
 * @note History: Tue Mar 12 19:38:03 1991, DSJ, Created.
1385
1277
 */
1386
 
UNICHAR_ID *Classify::BaselineClassifier(TBLOB *Blob,
1387
 
                                         ADAPT_TEMPLATES Templates,
1388
 
                                         ADAPT_RESULTS *Results) {
1389
 
  int NumFeatures;
 
1278
UNICHAR_ID *Classify::BaselineClassifier(
 
1279
    TBLOB *Blob, const GenericVector<INT_FEATURE_STRUCT>& int_features,
 
1280
    const INT_FX_RESULT_STRUCT& fx_info,
 
1281
    ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results) {
 
1282
  if (int_features.empty()) return NULL;
1390
1283
  int NumClasses;
1391
 
  INT_FEATURE_ARRAY IntFeatures;
1392
1284
  uinT8* CharNormArray = new uinT8[unicharset.size()];
1393
 
  CLASS_ID ClassId;
1394
 
 
1395
 
  BaselineClassifierCalls++;
1396
 
 
1397
 
  NumFeatures = GetBaselineFeatures(Blob, Templates->Templates, IntFeatures,
1398
 
                                    CharNormArray, &Results->BlobLength);
1399
 
  if (NumFeatures <= 0) {
1400
 
    delete [] CharNormArray;
1401
 
    return NULL;
1402
 
  }
1403
 
 
1404
 
  NumClasses = PruneClasses(Templates->Templates, NumFeatures, IntFeatures,
 
1285
  ClearCharNormArray(CharNormArray);
 
1286
 
 
1287
  Results->BlobLength = IntCastRounded(fx_info.Length / kStandardFeatureLength);
 
1288
  NumClasses = PruneClasses(Templates->Templates, int_features.size(),
 
1289
                            &int_features[0],
1405
1290
                            CharNormArray, BaselineCutoffs, Results->CPResults);
1406
1291
 
1407
 
  NumBaselineClassesTried += NumClasses;
1408
 
 
1409
1292
  if (matcher_debug_level >= 2 || classify_debug_level > 1)
1410
1293
    cprintf ("BL Matches =  ");
1411
1294
 
1412
 
  im_.SetBaseLineMatch();
1413
 
  MasterMatcher(Templates->Templates, NumFeatures, IntFeatures, CharNormArray,
1414
 
                Templates->Class, matcher_debug_flags, NumClasses,
 
1295
  MasterMatcher(Templates->Templates, int_features.size(), &int_features[0],
 
1296
                CharNormArray,
 
1297
                Templates->Class, matcher_debug_flags, NumClasses, 0,
1415
1298
                Blob->bounding_box(), Results->CPResults, Results);
1416
1299
 
1417
1300
  delete [] CharNormArray;
1418
 
  ClassId = Results->best_match.unichar_id;
 
1301
  CLASS_ID ClassId = Results->best_match.unichar_id;
1419
1302
  if (ClassId == NO_CLASS)
1420
1303
    return (NULL);
1421
1304
  /* this is a bug - maybe should return "" */
1445
1328
 * @note History: Tue Mar 12 16:02:52 1991, DSJ, Created.
1446
1329
 */
1447
1330
int Classify::CharNormClassifier(TBLOB *blob,
1448
 
                                 INT_TEMPLATES Templates,
 
1331
                                 const TrainingSample& sample,
1449
1332
                                 ADAPT_RESULTS *adapt_results) {
1450
 
  CharNormClassifierCalls++;
1451
 
  TrainingSample* sample = BlobToTrainingSample(*blob, NM_CHAR_ANISOTROPIC,
1452
 
                                                classify_nonlinear_norm);
1453
 
  if (sample == NULL) return 0;
1454
1333
  // This is the length that is used for scaling ratings vs certainty.
1455
1334
  adapt_results->BlobLength =
1456
 
      IntCastRounded(sample->outline_length() / kStandardFeatureLength);
 
1335
      IntCastRounded(sample.outline_length() / kStandardFeatureLength);
1457
1336
  GenericVector<UnicharRating> unichar_results;
1458
 
  static_classifier_->UnicharClassifySample(*sample, blob->denorm().pix(), 0,
 
1337
  static_classifier_->UnicharClassifySample(sample, blob->denorm().pix(), 0,
1459
1338
                                            -1, &unichar_results);
1460
1339
  // Convert results to the format used internally by AdaptiveClassifier.
1461
1340
  for (int r = 0; r < unichar_results.size(); ++r) {
1468
1347
    float rating = 1.0f - unichar_results[r].rating;
1469
1348
    AddNewResult(adapt_results, unichar_id, -1, rating, false, 0, font1, font2);
1470
1349
  }
1471
 
  int num_features = sample->num_features();
1472
 
  delete sample;
1473
 
  return num_features;
 
1350
  return sample.num_features();
1474
1351
}                                /* CharNormClassifier */
1475
1352
 
1476
1353
// As CharNormClassifier, but operates on a TrainingSample and outputs to
1518
1395
          UnicharRating(class_id, 1.0f - adapt_results->CPResults[i].Rating));
1519
1396
    }
1520
1397
  } else {
1521
 
    im_.SetCharNormMatch(classify_integer_matcher_multiplier);
1522
1398
    MasterMatcher(PreTrainedTemplates, num_features, sample.features(),
1523
1399
                  char_norm_array,
1524
1400
                  NULL, matcher_debug_flags, num_classes,
 
1401
                  classify_integer_matcher_multiplier,
1525
1402
                  blob_box, adapt_results->CPResults, adapt_results);
1526
1403
    // Convert master matcher results to output format.
1527
1404
    for (int i = 0; i < adapt_results->NumMatches; i++) {
1711
1588
    if (i == 0 || Results->match[i].rating < Results->best_match.rating)
1712
1589
      Results->best_match = Results->match[i];
1713
1590
  }
1714
 
  TrainingSample* sample = BlobToTrainingSample(*blob, NM_CHAR_ANISOTROPIC,
1715
 
                                                classify_nonlinear_norm);
 
1591
  INT_FX_RESULT_STRUCT fx_info;
 
1592
  GenericVector<INT_FEATURE_STRUCT> bl_features;
 
1593
  TrainingSample* sample =
 
1594
      BlobToTrainingSample(*blob, false, &fx_info, &bl_features);
1716
1595
  if (sample == NULL) return;
1717
1596
  static_classifier_->DebugDisplay(*sample, blob->denorm().pix(),
1718
1597
                                   Results->best_match.unichar_id);
1745
1624
void Classify::DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results) {
1746
1625
  UNICHAR_ID *Ambiguities;
1747
1626
 
1748
 
  AdaptiveMatcherCalls++;
1749
 
  InitIntFX();
 
1627
  INT_FX_RESULT_STRUCT fx_info;
 
1628
  GenericVector<INT_FEATURE_STRUCT> bl_features;
 
1629
  TrainingSample* sample =
 
1630
      BlobToTrainingSample(*Blob, classify_nonlinear_norm, &fx_info,
 
1631
                           &bl_features);
 
1632
  if (sample == NULL) return;
1750
1633
 
1751
1634
  if (AdaptedTemplates->NumPermClasses < matcher_permanent_classes_min ||
1752
1635
      tess_cn_matching) {
1753
 
    CharNormClassifier(Blob, PreTrainedTemplates, Results);
 
1636
    CharNormClassifier(Blob, *sample, Results);
1754
1637
  } else {
1755
 
    Ambiguities = BaselineClassifier(Blob, AdaptedTemplates, Results);
 
1638
    Ambiguities = BaselineClassifier(Blob, bl_features, fx_info,
 
1639
                                     AdaptedTemplates, Results);
1756
1640
    if ((Results->NumMatches > 0 &&
1757
1641
         MarginalMatch (Results->best_match.rating) &&
1758
1642
         !tess_bn_matching) ||
1759
1643
        Results->NumMatches == 0) {
1760
 
      CharNormClassifier(Blob, PreTrainedTemplates, Results);
 
1644
      CharNormClassifier(Blob, *sample, Results);
1761
1645
    } else if (Ambiguities && *Ambiguities >= 0 && !tess_bn_matching) {
1762
 
      AmbigClassifier(Blob,
 
1646
      AmbigClassifier(bl_features, fx_info, Blob,
1763
1647
                      PreTrainedTemplates,
1764
1648
                      AdaptedTemplates->Class,
1765
1649
                      Ambiguities,
1773
1657
  // just adding a NULL classification.
1774
1658
  if (!Results->HasNonfragment || Results->NumMatches == 0)
1775
1659
    ClassifyAsNoise(Results);
 
1660
  delete sample;
1776
1661
}   /* DoAdaptiveMatch */
1777
1662
 
1778
1663
/*---------------------------------------------------------------------------*/
1799
1684
  int i;
1800
1685
 
1801
1686
  Results->Initialize();
 
1687
  INT_FX_RESULT_STRUCT fx_info;
 
1688
  GenericVector<INT_FEATURE_STRUCT> bl_features;
 
1689
  TrainingSample* sample =
 
1690
      BlobToTrainingSample(*Blob, classify_nonlinear_norm, &fx_info,
 
1691
                           &bl_features);
 
1692
  if (sample == NULL) return NULL;
1802
1693
 
1803
 
  CharNormClassifier(Blob, PreTrainedTemplates, Results);
 
1694
  CharNormClassifier(Blob, *sample, Results);
 
1695
  delete sample;
1804
1696
  RemoveBadMatches(Results);
1805
1697
  qsort((void *)Results->match, Results->NumMatches,
1806
1698
        sizeof(ScoredClass), CompareByRating);
1823
1715
  return Ambiguities;
1824
1716
}                              /* GetAmbiguities */
1825
1717
 
1826
 
/*---------------------------------------------------------------------------*/
1827
 
/**
1828
 
 * This routine calls the integer (Hardware) feature
1829
 
 * extractor if it has not been called before for this blob.
1830
 
 * The results from the feature extractor are placed into
1831
 
 * globals so that they can be used in other routines without
1832
 
 * re-extracting the features.
1833
 
 * It then copies the baseline features into the IntFeatures
1834
 
 * array provided by the caller.
1835
 
 *
1836
 
 * @param Blob blob to extract features from
1837
 
 * @param Templates used to compute char norm adjustments
1838
 
 * @param IntFeatures array to fill with integer features
1839
 
 * @param CharNormArray array to fill with dummy char norm adjustments
1840
 
 * @param BlobLength length of blob in baseline-normalized units
1841
 
 *
1842
 
 * Globals:
1843
 
 * - FeaturesHaveBeenExtracted TRUE if fx has been done
1844
 
 * - BaselineFeatures holds extracted baseline feat
1845
 
 * - CharNormFeatures holds extracted char norm feat
1846
 
 * - FXInfo holds misc. FX info
1847
 
 *
1848
 
 * @return Number of features extracted or 0 if an error occured.
1849
 
 * @note Exceptions: none
1850
 
 * @note History: Tue May 28 10:40:52 1991, DSJ, Created.
1851
 
 */
1852
 
int Classify::GetBaselineFeatures(TBLOB *Blob,
1853
 
                                  INT_TEMPLATES Templates,
1854
 
                                  INT_FEATURE_ARRAY IntFeatures,
1855
 
                                  uinT8* CharNormArray,
1856
 
                                  inT32 *BlobLength) {
1857
 
  if (!FeaturesHaveBeenExtracted) {
1858
 
    FeaturesOK = ExtractIntFeat(*Blob, classify_nonlinear_norm,
1859
 
                                BaselineFeatures, CharNormFeatures, &FXInfo);
1860
 
    FeaturesHaveBeenExtracted = TRUE;
1861
 
  }
1862
 
 
1863
 
  *BlobLength = IntCastRounded(FXInfo.Length / kStandardFeatureLength);
1864
 
  if (!FeaturesOK) {
1865
 
    return 0;
1866
 
  }
1867
 
 
1868
 
  memcpy(IntFeatures, BaselineFeatures, FXInfo.NumBL * sizeof(IntFeatures[0]));
1869
 
 
1870
 
  ClearCharNormArray(CharNormArray);
1871
 
  return FXInfo.NumBL;
1872
 
}                              /* GetBaselineFeatures */
1873
 
 
1874
 
void Classify::ResetFeaturesHaveBeenExtracted() {
1875
 
  FeaturesHaveBeenExtracted = FALSE;
1876
 
}
1877
 
 
1878
1718
// Returns true if the given blob looks too dissimilar to any character
1879
1719
// present in the classifier templates.
1880
1720
bool Classify::LooksLikeGarbage(TBLOB *blob) {
1921
1761
 * @param BlobLength length of blob in baseline-normalized units
1922
1762
 *
1923
1763
 * Globals:
1924
 
 * - FeaturesHaveBeenExtracted TRUE if fx has been done
1925
 
 * - BaselineFeatures holds extracted baseline feat
1926
 
 * - CharNormFeatures holds extracted char norm feat
1927
 
 * - FXInfo holds misc. FX info
1928
1764
 *
1929
1765
 * @return Number of features extracted or 0 if an error occured.
1930
1766
 * @note Exceptions: none
1931
1767
 * @note History: Tue May 28 10:40:52 1991, DSJ, Created.
1932
1768
 */
1933
 
int Classify::GetCharNormFeatures(TBLOB *Blob,
1934
 
                                  INT_TEMPLATES Templates,
1935
 
                                  INT_FEATURE_ARRAY IntFeatures,
1936
 
                                  uinT8* PrunerNormArray,
1937
 
                                  uinT8* CharNormArray,
1938
 
                                  inT32 *BlobLength) {
1939
 
  FEATURE NormFeature;
1940
 
  FLOAT32 Baseline, Scale;
1941
 
 
1942
 
  if (!FeaturesHaveBeenExtracted) {
1943
 
    FeaturesOK = ExtractIntFeat(*Blob, classify_nonlinear_norm,
1944
 
                                BaselineFeatures, CharNormFeatures, &FXInfo);
1945
 
    FeaturesHaveBeenExtracted = TRUE;
1946
 
  }
1947
 
 
1948
 
  *BlobLength = IntCastRounded(FXInfo.Length / kStandardFeatureLength);
1949
 
  if (!FeaturesOK) {
1950
 
    return 0;
1951
 
  }
1952
 
 
1953
 
  memcpy(IntFeatures, CharNormFeatures, FXInfo.NumCN * sizeof(IntFeatures[0]));
1954
 
 
1955
 
  NormFeature = NewFeature(&CharNormDesc);
1956
 
  Baseline = kBlnBaselineOffset;
1957
 
  Scale = MF_SCALE_FACTOR;
1958
 
  NormFeature->Params[CharNormY] = (FXInfo.Ymean - Baseline) * Scale;
1959
 
  NormFeature->Params[CharNormLength] =
1960
 
    FXInfo.Length * Scale / LENGTH_COMPRESSION;
1961
 
  NormFeature->Params[CharNormRx] = FXInfo.Rx * Scale;
1962
 
  NormFeature->Params[CharNormRy] = FXInfo.Ry * Scale;
1963
 
  ComputeCharNormArrays(NormFeature, Templates, CharNormArray, PrunerNormArray);
1964
 
  return FXInfo.NumCN;
1965
 
}                              /* GetCharNormFeatures */
 
1769
int Classify::GetCharNormFeature(const INT_FX_RESULT_STRUCT& fx_info,
 
1770
                                 INT_TEMPLATES templates,
 
1771
                                 uinT8* pruner_norm_array,
 
1772
                                 uinT8* char_norm_array) {
 
1773
  FEATURE norm_feature = NewFeature(&CharNormDesc);
 
1774
  float baseline = kBlnBaselineOffset;
 
1775
  float scale = MF_SCALE_FACTOR;
 
1776
  norm_feature->Params[CharNormY] = (fx_info.Ymean - baseline) * scale;
 
1777
  norm_feature->Params[CharNormLength] =
 
1778
      fx_info.Length * scale / LENGTH_COMPRESSION;
 
1779
  norm_feature->Params[CharNormRx] = fx_info.Rx * scale;
 
1780
  norm_feature->Params[CharNormRy] = fx_info.Ry * scale;
 
1781
  // Deletes norm_feature.
 
1782
  ComputeCharNormArrays(norm_feature, templates, char_norm_array,
 
1783
                        pruner_norm_array);
 
1784
  return IntCastRounded(fx_info.Length / kStandardFeatureLength);
 
1785
}                              /* GetCharNormFeature */
1966
1786
 
1967
1787
// Computes the char_norm_array for the unicharset and, if not NULL, the
1968
1788
// pruner_array as appropriate according to the existence of the shape_table.
2454
2274
  }
2455
2275
  INT_RESULT_STRUCT cn_result;
2456
2276
  classify_norm_method.set_value(character);
2457
 
  im_.SetCharNormMatch(classify_integer_matcher_multiplier);
2458
2277
  im_.Match(ClassForClassId(PreTrainedTemplates, shape_id),
2459
2278
            AllProtosOn, AllConfigsOn,
2460
2279
            num_features, features, &cn_result,