34
typedef inT16 BLOB_WEIGHTS[MAX_NUM_CHUNKS];
36
// Each unichar evaluated.
37
struct EVALUATION_RECORD {
45
typedef EVALUATION_RECORD EVALUATION_ARRAY[MAX_NUM_CHUNKS];
47
// Classification info for chunks.
49
// TODO(daria): move to tesseract namespace when obsolete code using
50
// this struct that is not in tesseract namespace is deprecated.
51
struct CHUNKS_RECORD {
54
WERD_RES* word_res; // Borrowed pointer - do not delete!
57
WIDTH_RECORD *chunk_widths;
58
WIDTH_RECORD *char_widths;
62
33
namespace tesseract {
64
35
// Statisitcs about character widths, gaps and seams.
89
61
// the blob on the right
90
62
bool bad_fixed_pitch_wh_ratio; // true if the blobs has width-to-hight
91
63
// ratio > kMaxFixedPitchCharAspectRatio
64
int gap_sum; // sum of gaps within the blob
94
67
// Utility functions for scoring segmentation paths according to their
98
71
static const float kMaxFixedPitchCharAspectRatio;
99
72
static const float kMinGap;
74
// Returns outline length of the given blob is computed as:
75
// rating_cert_scale * rating / certainty
76
// Since from Wordrec::SegSearch() in segsearch.cpp
77
// rating_cert_scale = -1.0 * getDict().certainty_scale / rating_scale
78
// And from Classify::ConvertMatchesToChoices() in adaptmatch.cpp
79
// Rating = Certainty = next.rating
80
// Rating *= rating_scale * Results->BlobLength
81
// Certainty *= -(getDict().certainty_scale)
82
static inline float ComputeOutlineLength(float rating_cert_scale,
83
const BLOB_CHOICE &b) {
84
return rating_cert_scale * b.rating() / b.certainty();
86
static inline float ComputeRating(float rating_cert_scale,
87
float cert, int width) {
88
return static_cast<float>(width) * cert / rating_cert_scale;
101
91
// Computes character widths, gaps and seams stats given the
102
92
// AssociateStats of the path so far, col, row of the blob that
103
// is being added to the path, and CHUNKS_RECORD containing information
93
// is being added to the path, and WERD_RES containing information
104
94
// about character widths, gaps and seams.
105
95
// Fills associate_cost with the combined shape, gap and seam cost
106
96
// of adding a unichar from (col, row) to the path (note that since
108
98
// pain points, (col, row) entry might not be classified yet; thus
109
99
// information in the (col, row) entry of the ratings matrix is not used).
111
// Note: the function assumes that chunks_record, stats and
101
// Note: the function assumes that word_res, stats and
112
102
// associate_cost pointers are not NULL.
113
103
static void ComputeStats(int col, int row,
114
104
const AssociateStats *parent_stats,
115
105
int parent_path_length,
116
106
bool fixed_pitch,
117
107
float max_char_wh_ratio,
118
const DENORM *denorm,
119
CHUNKS_RECORD *chunks_record,
121
110
AssociateStats *stats);
123
// Returns the width of a chunk which is a composed of several blobs
124
// blobs[start_blob..last_blob] inclusively.
125
// Widths/gaps records are in the form:
126
// width_record->num_char = n
127
// width_record->widths[2*n-1] = w0,g0,w1,g1..w(n-1),g(n-1)
128
static int GetChunksWidth(WIDTH_RECORD *width_record,
129
int start_blob, int last_blob);
131
// Returns the width of a gap between the specified chunk and the next one.
132
static inline int GetChunksGap(WIDTH_RECORD *width_record, int last_chunk) {
133
return (last_chunk >= 0 && last_chunk < width_record->num_chars - 1) ?
134
width_record->widths[last_chunk * 2 + 1] : 0;
137
112
// Returns the width cost for fixed-pitch text.
138
113
static float FixedPitchWidthCost(float norm_width, float right_gap,
139
114
bool end_pos, float max_char_wh_ratio);