77
78
boxaDestroy(&rotated);
81
const int kMaxLineLength = 1024;
82
// Helper appends a tab box to the string to indicate a newline. We can't use
83
// an actual newline as the file format is line-based text.
84
static void AppendTabBox(const Box* box, int height, int page, string* output) {
85
char buffer[kMaxLineLength];
86
int nbytes = snprintf(buffer, kMaxLineLength, "\t %d %d %d %d %d\n",
87
box->x + box->w, height - box->y - box->h,
88
box->x + box->w + 10, height - box->y, page);
89
output->append(buffer, nbytes);
81
93
void BoxChar::WriteTesseractBoxFile(const string& filename, int height,
82
94
const vector<BoxChar*>& boxes) {
84
const int kMaxLineLength = 1024;
85
96
char buffer[kMaxLineLength];
86
97
for (int i = 0; i < boxes.size(); ++i) {
87
if (boxes[i]->box_ != NULL) {
98
const Box* box = boxes[i]->box_;
100
if (i > 0 && boxes[i - 1]->box_ != NULL &&
101
boxes[i - 1]->page_ == boxes[i]->page_ &&
102
box->x + box->w < boxes[i - 1]->box_->x) {
103
// We are on a newline. Output a tab character to indicate the newline.
104
AppendTabBox(boxes[i - 1]->box_, height, boxes[i]->page_, &output);
88
106
int nbytes = snprintf(buffer, kMaxLineLength,
89
107
"%s %d %d %d %d %d\n",
90
108
boxes[i]->ch_.c_str(),
92
height - boxes[i]->box_->y - boxes[i]->box_->h,
93
boxes[i]->box_->x + boxes[i]->box_->w,
94
height - boxes[i]->box_->y,
109
box->x, height - box->y - box->h,
110
box->x + box->w, height - box->y,
96
112
output.append(buffer, nbytes);
113
} else if (i > 0 && boxes[i - 1]->box_ != NULL) {
115
// Find the next non-null box, as there may be multiple spaces.
116
while (j < boxes.size() && boxes[j]->box_ == NULL) ++j;
117
if (j < boxes.size() && boxes[i - 1]->page_ == boxes[j]->page_) {
118
const Box* prev = boxes[i - 1]->box_;
119
const Box* next = boxes[j]->box_;
120
if (next->x + next->w < prev->x) {
121
// We are on a newline. Output a tab character to indicate it.
122
AppendTabBox(prev, height, boxes[j]->page_, &output);
124
// Space between words.
125
int nbytes = snprintf(buffer, kMaxLineLength,
128
height - MAX(prev->y + prev->h,
130
next->x, height - MIN(prev->y, next->y),
131
boxes[i - 1]->page_);
132
output.append(buffer, nbytes);
99
137
File::WriteStringToFileOrDie(output, filename);