36
class ResultIterator : public LTRResultIterator {
37
class TESS_API ResultIterator : public LTRResultIterator {
38
39
static ResultIterator *StartOfParagraph(const LTRResultIterator &resit);
40
// ResultIterator is copy constructible!
41
// The default copy constructor works just fine for us.
42
* ResultIterator is copy constructible!
43
* The default copy constructor works just fine for us.
42
45
virtual ~ResultIterator() {}
44
47
// ============= Moving around within the page ============.
45
// Moves the iterator to point to the start of the page to begin an iteration.
49
* Moves the iterator to point to the start of the page to begin
46
52
virtual void Begin();
48
// Moves to the start of the next object at the given level in the
49
// page hierarchy in the appropriate reading order and returns false if
50
// the end of the page was reached.
51
// NOTE that RIL_SYMBOL will skip non-text blocks, but all other
52
// PageIteratorLevel level values will visit each non-text block once.
53
// Think of non text blocks as containing a single para, with a single line,
54
// with a single imaginary word.
55
// Calls to Next with different levels may be freely intermixed.
56
// This function iterates words in right-to-left scripts correctly, if
57
// the appropriate language has been loaded into Tesseract.
55
* Moves to the start of the next object at the given level in the
56
* page hierarchy in the appropriate reading order and returns false if
57
* the end of the page was reached.
58
* NOTE that RIL_SYMBOL will skip non-text blocks, but all other
59
* PageIteratorLevel level values will visit each non-text block once.
60
* Think of non text blocks as containing a single para, with a single line,
61
* with a single imaginary word.
62
* Calls to Next with different levels may be freely intermixed.
63
* This function iterates words in right-to-left scripts correctly, if
64
* the appropriate language has been loaded into Tesseract.
58
66
virtual bool Next(PageIteratorLevel level);
60
// IsAtBeginningOf() returns whether we're at the logical beginning of the
61
// given level. (as opposed to ResultIterator's left-to-right top-to-bottom
62
// order). Otherwise, this acts the same as PageIterator::IsAtBeginningOf().
63
// For a full description, see pageiterator.h
69
* IsAtBeginningOf() returns whether we're at the logical beginning of the
70
* given level. (as opposed to ResultIterator's left-to-right top-to-bottom
71
* order). Otherwise, this acts the same as PageIterator::IsAtBeginningOf().
72
* For a full description, see pageiterator.h
64
74
virtual bool IsAtBeginningOf(PageIteratorLevel level) const;
66
// Implement PageIterator's IsAtFinalElement correctly in a BiDi context.
67
// For instance, IsAtFinalElement(RIL_PARA, RIL_WORD) returns whether we
68
// point at the last word in a paragraph. See PageIterator for full comment.
77
* Implement PageIterator's IsAtFinalElement correctly in a BiDi context.
78
* For instance, IsAtFinalElement(RIL_PARA, RIL_WORD) returns whether we
79
* point at the last word in a paragraph. See PageIterator for full comment.
69
81
virtual bool IsAtFinalElement(PageIteratorLevel level,
70
82
PageIteratorLevel element) const;
72
84
// ============= Accessing data ==============.
74
// Returns the null terminated UTF-8 encoded text string for the current
75
// object at the given level. Use delete [] to free after use.
87
* Returns the null terminated UTF-8 encoded text string for the current
88
* object at the given level. Use delete [] to free after use.
76
90
virtual char* GetUTF8Text(PageIteratorLevel level) const;
78
// Return whether the current paragraph's dominant reading direction
79
// is left-to-right (as opposed to right-to-left).
93
* Return whether the current paragraph's dominant reading direction
94
* is left-to-right (as opposed to right-to-left).
80
96
bool ParagraphIsLtr() const;
82
98
// ============= Exposed only for testing =============.
84
// Yields the reading order as a sequence of indices and (optional)
85
// meta-marks for a set of words (given left-to-right).
86
// The meta marks are passed as negative values:
87
// kMinorRunStart Start of minor direction text.
88
// kMinorRunEnd End of minor direction text.
89
// kComplexWord The next indexed word contains both left-to-right and
90
// right-to-left characters and was treated as neutral.
92
// For example, suppose we have five words in a text line,
93
// indexed [0,1,2,3,4] from the leftmost side of the text line.
94
// The following are all believable reading_orders:
96
// Left-to-Right (in ltr paragraph):
98
// Left-to-Right (in rtl paragraph):
99
// { kMinorRunStart, 0, 1, 2, 3, 4, kMinorRunEnd }
100
// Right-to-Left (in rtl paragraph):
102
// Left-to-Right except for an RTL phrase in words 2, 3 in an ltr paragraph:
103
// { 0, 1, kMinorRunStart, 3, 2, kMinorRunEnd, 4 }
101
* Yields the reading order as a sequence of indices and (optional)
102
* meta-marks for a set of words (given left-to-right).
103
* The meta marks are passed as negative values:
104
* kMinorRunStart Start of minor direction text.
105
* kMinorRunEnd End of minor direction text.
106
* kComplexWord The next indexed word contains both left-to-right and
107
* right-to-left characters and was treated as neutral.
109
* For example, suppose we have five words in a text line,
110
* indexed [0,1,2,3,4] from the leftmost side of the text line.
111
* The following are all believable reading_orders:
113
* Left-to-Right (in ltr paragraph):
115
* Left-to-Right (in rtl paragraph):
116
* { kMinorRunStart, 0, 1, 2, 3, 4, kMinorRunEnd }
117
* Right-to-Left (in rtl paragraph):
119
* Left-to-Right except for an RTL phrase in words 2, 3 in an ltr paragraph:
120
* { 0, 1, kMinorRunStart, 3, 2, kMinorRunEnd, 4 }
104
122
static void CalculateTextlineOrder(
105
123
bool paragraph_is_ltr,
106
124
const GenericVector<StrongScriptDirection> &word_dirs,
111
129
static const int kComplexWord;
114
// We presume the data associated with the given iterator will outlive us.
115
// NB: This is private because it does something that is non-obvious:
116
// it resets to the beginning of the paragraph instead of staying wherever
117
// resit might have pointed.
118
explicit ResultIterator(const LTRResultIterator &resit);
133
* We presume the data associated with the given iterator will outlive us.
134
* NB: This is private because it does something that is non-obvious:
135
* it resets to the beginning of the paragraph instead of staying wherever
136
* resit might have pointed.
138
TESS_LOCAL explicit ResultIterator(const LTRResultIterator &resit);
121
// Calculates the current paragraph's dominant writing direction.
122
// Typically, members should use current_paragraph_ltr_ instead.
142
* Calculates the current paragraph's dominant writing direction.
143
* Typically, members should use current_paragraph_ltr_ instead.
123
145
bool CurrentParagraphIsLtr() const;
125
// Returns word indices as measured from resit->RestartRow() = index 0
126
// for the reading order of words within a textline given an iterator
127
// into the middle of the text line.
128
// In addition to non-negative word indices, the following negative values
130
// kMinorRunStart Start of minor direction text.
131
// kMinorRunEnd End of minor direction text.
132
// kComplexWord The previous word contains both left-to-right and
133
// right-to-left characters and was treated as neutral.
148
* Returns word indices as measured from resit->RestartRow() = index 0
149
* for the reading order of words within a textline given an iterator
150
* into the middle of the text line.
151
* In addition to non-negative word indices, the following negative values
153
* kMinorRunStart Start of minor direction text.
154
* kMinorRunEnd End of minor direction text.
155
* kComplexWord The previous word contains both left-to-right and
156
* right-to-left characters and was treated as neutral.
134
158
void CalculateTextlineOrder(bool paragraph_is_ltr,
135
159
const LTRResultIterator &resit,
136
160
GenericVectorEqEq<int> *indices) const;
137
// Same as above, but the caller's ssd gets filled in if ssd != NULL.
161
/** Same as above, but the caller's ssd gets filled in if ssd != NULL. */
138
162
void CalculateTextlineOrder(bool paragraph_is_ltr,
139
163
const LTRResultIterator &resit,
140
164
GenericVector<StrongScriptDirection> *ssd,
141
165
GenericVectorEqEq<int> *indices) const;
143
// What is the index of the current word in a strict left-to-right reading
168
* What is the index of the current word in a strict left-to-right reading
145
171
int LTRWordIndex() const;
147
// Given an iterator pointing at a word, returns the logical reading order
148
// of blob indices for the word.
174
* Given an iterator pointing at a word, returns the logical reading order
175
* of blob indices for the word.
149
177
void CalculateBlobOrder(GenericVector<int> *blob_indices) const;
151
// Precondition: current_paragraph_is_ltr_ is set.
179
/** Precondition: current_paragraph_is_ltr_ is set. */
152
180
void MoveToLogicalStartOfTextline();
154
// Precondition: current_paragraph_is_ltr_ and in_minor_direction_ are set.
183
* Precondition: current_paragraph_is_ltr_ and in_minor_direction_
155
186
void MoveToLogicalStartOfWord();
157
// Are we pointing at the final (reading order) symbol of the word?
188
/** Are we pointing at the final (reading order) symbol of the word? */
158
189
bool IsAtFinalSymbolOfWord() const;
160
// Are we pointing at the first (reading order) symbol of the word?
191
/** Are we pointing at the first (reading order) symbol of the word? */
161
192
bool IsAtFirstSymbolOfWord() const;
163
// Append any extra marks that should be appended to this word when printed.
164
// Mostly, these are Unicode BiDi control characters.
195
* Append any extra marks that should be appended to this word when printed.
196
* Mostly, these are Unicode BiDi control characters.
165
198
void AppendSuffixMarks(STRING *text) const;
167
// Appends the current word in reading order to the given buffer.
200
/** Appends the current word in reading order to the given buffer.*/
168
201
void AppendUTF8WordText(STRING *text) const;
170
// Appends the text of the current text line, *assuming this iterator is
171
// positioned at the beginning of the text line* This function
172
// updates the iterator to point to the first position past the text line.
173
// Each textline is terminated in a single newline character.
174
// If the textline ends a paragraph, it gets a second terminal newline.
204
* Appends the text of the current text line, *assuming this iterator is
205
* positioned at the beginning of the text line* This function
206
* updates the iterator to point to the first position past the text line.
207
* Each textline is terminated in a single newline character.
208
* If the textline ends a paragraph, it gets a second terminal newline.
175
210
void IterateAndAppendUTF8TextlineText(STRING *text);
177
// Appends the text of the current paragraph in reading order
178
// to the given buffer.
179
// Each textline is terminated in a single newline character, and the
180
// paragraph gets an extra newline at the end.
213
* Appends the text of the current paragraph in reading order
214
* to the given buffer.
215
* Each textline is terminated in a single newline character, and the
216
* paragraph gets an extra newline at the end.
181
218
void AppendUTF8ParagraphText(STRING *text) const;
183
// Returns whether the bidi_debug flag is set to at least min_level.
220
/** Returns whether the bidi_debug flag is set to at least min_level. */
184
221
bool BidiDebug(int min_level) const;
186
223
bool current_paragraph_is_ltr_;
188
// Is the currently pointed-at character at the beginning of
189
// a minor-direction run?
226
* Is the currently pointed-at character at the beginning of
227
* a minor-direction run?
190
229
bool at_beginning_of_minor_run_;
192
// Is the currently pointed-at character in a minor-direction sequence?
231
/** Is the currently pointed-at character in a minor-direction sequence? */
193
232
bool in_minor_direction_;