211
struct WordWithCharacters
213
WordWithCharacters(TinyTextEntity *w, const TextList &c)
214
: word(w), characters(c)
218
inline QString text() const
223
inline const NormalizedRect &area() const
228
TinyTextEntity *word;
231
typedef QList<WordWithCharacters> WordsWithCharacters;
217
234
* We will divide the whole page in some regions depending on the horizontal and
218
235
* vertical spacing among different regions. Each region will have an area and an
219
* associated TextList in sorted order.
236
* associated WordsWithCharacters in sorted order.
229
RegionText(const TextList &list,const QRect &area)
230
: m_region_text(list) ,m_area(area)
246
RegionText(const WordsWithCharacters &wordsWithCharacters, const QRect &area)
247
: m_region_wordWithCharacters(wordsWithCharacters), m_area(area)
234
251
inline QString string() const
237
foreach(TinyTextEntity *te, m_region_text)
254
foreach(const WordWithCharacters &word, m_region_wordWithCharacters)
242
inline TextList text() const
259
inline WordsWithCharacters text() const
244
return m_region_text;
261
return m_region_wordWithCharacters;
247
264
inline QRect area() const
995
static bool compareTinyTextEntityX(TinyTextEntity* first, TinyTextEntity* second)
1012
static bool compareTinyTextEntityX(const WordWithCharacters &first, const WordWithCharacters &second)
997
QRect firstArea = first->area.roundedGeometry(1000,1000);
998
QRect secondArea = second->area.roundedGeometry(1000,1000);
1014
QRect firstArea = first.area().roundedGeometry(1000,1000);
1015
QRect secondArea = second.area().roundedGeometry(1000,1000);
1000
1017
return firstArea.left() < secondArea.left();
1003
static bool compareTinyTextEntityY(TinyTextEntity* first, TinyTextEntity* second)
1020
static bool compareTinyTextEntityY(const WordWithCharacters &first, const WordWithCharacters &second)
1005
QRect firstArea = first->area.roundedGeometry(1000,1000);
1006
QRect secondArea = second->area.roundedGeometry(1000,1000);
1022
const QRect firstArea = first.area().roundedGeometry(1000,1000);
1023
const QRect secondArea = second.area().roundedGeometry(1000,1000);
1008
1025
return firstArea.top() < secondArea.top();
1012
* Copies a TextList to m_words with the same pointer
1029
* Sets a new world list. Deleting the contents of the old one
1014
1031
void TextPagePrivate::setWordList(const TextList &list)
1117
* We will the TinyTextEntity from m_words and try to create
1118
* We will read the TinyTextEntity from characters and try to create words from there.
1119
* Note: characters might be already characters for some generators, but we will keep
1120
* the nomenclature characters for the generator produced data. The resulting
1121
* WordsWithCharacters memory has to be managed by the caller, both the
1122
* WordWithCharacters::word and WordWithCharacters::characters contents
1120
QHash<QRect, RegionText> TextPagePrivate::makeWordFromCharacters()
1124
static WordsWithCharacters makeWordFromCharacters(const TextList &characters, int pageWidth, int pageHeight)
1123
* At first we will copy m_words to tmpList. Then, we will traverse the
1124
* tmpList and try to create words from the TinyTextEntities in tmpList.
1127
* We will traverse characters and try to create words from the TinyTextEntities in it.
1125
1128
* We will search TinyTextEntity blocks and merge them until we get a
1126
1129
* space between two consecutive TinyTextEntities. When we get a space
1127
1130
* we can take it as a end of word. Then we store the word as a TinyTextEntity
1128
1131
* and keep it in newList.
1130
* We also keep a mapping between every element in newList and word. We create a
1131
* RegionText named regionWord and create a hash key from the TinyTextEntity
1132
* rectangle area of the element in newList. So, we can get the TinyTextEntities from
1133
* which every element(word) of newList is generated. It will be necessary later
1134
* when we will divide the word into characters.
1133
* We create a RegionText named regionWord that contains the word and the characters associated with it and
1134
* a rectangle area of the element in newList.
1136
* Finally we copy the newList to m_words.
1139
QHash<QRect, RegionText> word_chars_map;
1140
const TextList tmpList = m_words;
1143
TextList::ConstIterator it = tmpList.begin(), itEnd = tmpList.end(), tmpIt;
1137
WordsWithCharacters wordsWithCharacters;
1139
TextList::ConstIterator it = characters.begin(), itEnd = characters.end(), tmpIt;
1144
1140
int newLeft,newRight,newTop,newBottom;
1145
const int pageWidth = m_page->m_page->width();
1146
const int pageHeight = m_page->m_page->height();
1149
1143
for( ; it != itEnd ; it++)
1151
1145
QString textString = (*it)->text();
1152
1146
QString newString;
1153
1147
QRect lineArea = (*it)->area.roundedGeometry(pageWidth,pageHeight),elementArea;
1148
TextList wordCharacters;
1160
if(textString.length())
1154
if (textString.length())
1162
1156
newString.append(textString);
1164
1158
// when textString is the start of the word
1167
1161
NormalizedRect newRect(lineArea,pageWidth,pageHeight);
1168
word.append(new TinyTextEntity(textString.normalized
1162
wordCharacters.append(new TinyTextEntity(textString.normalized
1169
1163
(QString::NormalizationForm_KC), newRect));
1173
1167
NormalizedRect newRect(elementArea,pageWidth,pageHeight);
1174
word.append(new TinyTextEntity(textString.normalized
1168
wordCharacters.append(new TinyTextEntity(textString.normalized
1175
1169
(QString::NormalizationForm_KC), newRect));
1182
1176
we must have to put this line before the if condition of it==itEnd
1183
1177
otherwise the last character can be missed
1185
if(it == itEnd) break;
1179
if (it == itEnd) break;
1186
1180
elementArea = (*it)->area.roundedGeometry(pageWidth,pageHeight);
1187
if(!doesConsumeY(elementArea,lineArea,60))
1181
if (!doesConsumeY(elementArea, lineArea, 60))
1222
1216
// if newString is not empty, save it
1223
if(newString.length())
1217
if (!newString.isEmpty())
1225
const NormalizedRect newRect(lineArea,pageWidth,pageHeight);
1226
newList.append(new TinyTextEntity(newString.normalized
1227
(QString::NormalizationForm_KC), newRect ));
1228
const QRect rect = newRect.geometry(pageWidth,pageHeight);
1229
const RegionText regionWord(word,rect);
1231
// there may be more than one element in the same key
1232
word_chars_map.insertMulti(rect,regionWord);
1219
const NormalizedRect newRect(lineArea, pageWidth, pageHeight);
1220
TinyTextEntity *word = new TinyTextEntity(newString.normalized(QString::NormalizationForm_KC), newRect);
1221
wordsWithCharacters.append(WordWithCharacters(word, wordCharacters));
1237
1226
if(it == itEnd) break;
1240
setWordList(newList);
1242
return word_chars_map;
1229
return wordsWithCharacters;
1246
1233
* Create Lines from the words and sort them
1248
void TextPagePrivate::makeAndSortLines(const TextList &wordsTmp, SortedTextList *lines, LineRect *line_rects)
1235
QList< QPair<WordsWithCharacters, QRect> > makeAndSortLines(const WordsWithCharacters &wordsTmp, int pageWidth, int pageHeight)
1251
1238
* We cannot assume that the generator will give us texts in the right order.
1257
1244
* 2. Create textline where there is y overlap between TinyTextEntity 's
1258
1245
* 3. Within each line sort the TinyTextEntity 's by x0(left)
1248
QList< QPair<WordsWithCharacters, QRect> > lines;
1262
1251
Make a new copy of the TextList in the words, so that the wordsTmp and lines do
1263
1252
not contain same pointers for all the TinyTextEntity.
1266
for(int i = 0 ; i < wordsTmp.length() ; i++)
1268
TinyTextEntity* ent = wordsTmp.at(i);
1269
words.append( new TinyTextEntity( ent->text(),ent->area ) );
1254
QList<WordWithCharacters> words = wordsTmp;
1273
1257
qSort(words.begin(),words.end(),compareTinyTextEntityY);
1276
TextList::Iterator it = words.begin(), itEnd = words.end();
1277
const int pageWidth = m_page->m_page->width();
1278
const int pageHeight = m_page->m_page->height();
1260
QList<WordWithCharacters>::Iterator it = words.begin(), itEnd = words.end();
1280
1262
//for every non-space texts(characters/words) in the textList
1281
1263
for( ; it != itEnd ; it++)
1283
const QRect elementArea = (*it)->area.roundedGeometry(pageWidth,pageHeight);
1265
const QRect elementArea = (*it).area().roundedGeometry(pageWidth,pageHeight);
1284
1266
bool found = false;
1286
for( int i = 0 ; i < lines->length() ; i++)
1268
for( int i = 0 ; i < lines.length() ; i++)
1288
1270
/* the line area which will be expanded
1289
1271
line_rects is only necessary to preserve the topmin and bottommax of all
1290
1272
the texts in the line, left and right is not necessary at all
1292
QRect &lineArea = (*line_rects)[i];
1274
QRect &lineArea = lines[i].second;
1293
1275
const int text_y1 = elementArea.top() ,
1294
1276
text_y2 = elementArea.top() + elementArea.height() ,
1295
1277
text_x1 = elementArea.left(),
1310
WordsWithCharacters tmp;
1329
1311
tmp.append((*it));
1331
line_rects->append(elementArea);
1312
lines.append(QPair<WordsWithCharacters, QRect>(tmp, elementArea));
1336
for(int i = 0 ; i < lines->length() ; i++)
1317
for(int i = 0 ; i < lines.length() ; i++)
1338
TextList &list = (*lines)[i];
1339
qSort(list.begin(),list.end(),compareTinyTextEntityX);
1319
WordsWithCharacters &list = lines[i].first;
1320
qSort(list.begin(), list.end(), compareTinyTextEntityX);
1344
1327
* Calculate Statistical information from the lines we made previously
1346
void TextPagePrivate::calculateStatisticalInformation(const SortedTextList &lines, const LineRect &line_rects, int *word_spacing, int *line_spacing, int *col_spacing)
1329
static void calculateStatisticalInformation(const QList<WordWithCharacters> &words, int pageWidth, int pageHeight, int *word_spacing, int *line_spacing, int *col_spacing)
1349
1332
* For the region, defined by line_rects and lines
1351
1334
* 2. Make character statistical analysis to differentiate between
1352
1335
* word spacing and column spacing.
1341
const QList< QPair<WordsWithCharacters, QRect> > sortedLines = makeAndSortLines(words, pageWidth, pageHeight);
1358
1346
QMap<int,int> line_space_stat;
1359
for(int i = 0 ; i < line_rects.length(); i++)
1347
for(int i = 0 ; i < sortedLines.length(); i++)
1361
const QRect rectUpper = line_rects.at(i);
1349
const QRect rectUpper = sortedLines.at(i).second;
1363
if(i+1 == line_rects.length()) break;
1364
const QRect rectLower = line_rects.at(i+1);
1351
if(i+1 == sortedLines.length()) break;
1352
const QRect rectLower = sortedLines.at(i+1).second;
1366
1354
int linespace = rectLower.top() - (rectUpper.top() + rectUpper.height());
1367
1355
if(linespace < 0) linespace =-linespace;
1393
1381
QList< QList<QRect> > space_rects;
1394
1382
QList<QRect> max_hor_space_rects;
1396
int pageWidth = m_page->m_page->width(), pageHeight = m_page->m_page->height();
1398
1384
// Space in every line
1399
for(int i = 0 ; i < lines.length() ; i++)
1385
for(int i = 0 ; i < sortedLines.length() ; i++)
1401
TextList list = lines.at(i);
1387
const WordsWithCharacters list = sortedLines.at(i).first;
1402
1388
QList<QRect> line_space_rects;
1403
1389
int maxSpace = 0, minSpace = pageWidth;
1405
1391
// for every TinyTextEntity element in the line
1406
TextList::Iterator it = list.begin(), itEnd = list.end();
1392
WordsWithCharacters::ConstIterator it = list.begin(), itEnd = list.end();
1407
1393
QRect max_area1,max_area2;
1408
1394
QString before_max, after_max;
1410
1396
// for every line
1411
1397
for( ; it != itEnd ; it++ )
1413
const QRect area1 = (*it)->area.roundedGeometry(pageWidth,pageHeight);
1399
const QRect area1 = (*it).area().roundedGeometry(pageWidth,pageHeight);
1414
1400
if( it+1 == itEnd ) break;
1416
const QRect area2 = (*(it+1))->area.roundedGeometry(pageWidth,pageHeight);
1402
const QRect area2 = (*(it+1)).area().roundedGeometry(pageWidth,pageHeight);
1417
1403
int space = area2.left() - area1.right();
1419
1405
if(space > maxSpace)
1505
1491
*col_spacing = col_space_stat.key(*col_spacing);
1507
1493
// if there is just one line in a region, there is no point in dividing it
1508
if(lines.length() == 1)
1494
if(sortedLines.length() == 1)
1509
1495
*word_spacing = *col_spacing;
1513
1499
* Implements the XY Cut algorithm for textpage segmentation
1500
* The resulting RegionTextList will contain RegionText whose WordsWithCharacters::word and
1501
* WordsWithCharacters::characters are reused from wordsWithCharacters (i.e. no new nor delete happens in this function)
1515
RegionTextList TextPagePrivate::XYCutForBoundingBoxes(int tcx, int tcy)
1503
static RegionTextList XYCutForBoundingBoxes(const QList<WordWithCharacters> &wordsWithCharacters, const NormalizedRect &boundingBox, int pageWidth, int pageHeight)
1517
const int pageWidth = m_page->m_page->width();
1518
const int pageHeight = m_page->m_page->height();
1519
1505
RegionTextList tree;
1520
QRect contentRect(m_page->m_page->boundingBox().geometry(pageWidth,pageHeight));
1521
const TextList words = duplicateWordList();
1522
const RegionText root(words,contentRect);
1506
QRect contentRect(boundingBox.geometry(pageWidth,pageHeight));
1507
const RegionText root(wordsWithCharacters, contentRect);
1524
1509
// start the tree with the root, it is our only region at the start
1525
1510
tree.push_back(root);
1530
1514
// while traversing the tree has not been ended
1531
1515
while(i < tree.length())
1537
1521
* 1. calculation of projection profiles
1539
1523
// allocate the size of proj profiles and initialize with 0
1540
int size_proj_y = node.area().height() ;
1541
int size_proj_x = node.area().width() ;
1524
int size_proj_y = node.area().height();
1525
int size_proj_x = node.area().width();
1542
1526
//dynamic memory allocation
1543
1527
QVarLengthArray<int> proj_on_xaxis(size_proj_x);
1544
1528
QVarLengthArray<int> proj_on_yaxis(size_proj_y);
1546
for( j = 0 ; j < size_proj_y ; j++ ) proj_on_yaxis[j] = 0;
1547
for( j = 0 ; j < size_proj_x ; j++ ) proj_on_xaxis[j] = 0;
1530
for( int j = 0 ; j < size_proj_y ; ++j ) proj_on_yaxis[j] = 0;
1531
for( int j = 0 ; j < size_proj_x ; ++j ) proj_on_xaxis[j] = 0;
1549
TextList list = node.text();
1533
const QList<WordWithCharacters> list = node.text();
1551
1535
// Calculate tcx and tcy locally for each new region
1554
SortedTextList lines;
1555
LineRect line_rects;
1556
int word_spacing, line_spacing, column_spacing;
1558
makeAndSortLines(list, &lines, &line_rects);
1559
calculateStatisticalInformation(lines, line_rects, &word_spacing, &line_spacing, &column_spacing);
1560
for(int i = 0 ; i < lines.length() ; i++)
1562
qDeleteAll(lines.at(i));
1566
tcx = word_spacing * 2;
1567
tcy = line_spacing * 2;
1536
int word_spacing, line_spacing, column_spacing;
1537
calculateStatisticalInformation(list, pageWidth, pageHeight, &word_spacing, &line_spacing, &column_spacing);
1539
const int tcx = word_spacing * 2;
1540
const int tcy = line_spacing * 2;
1570
1542
int maxX = 0 , maxY = 0;
1574
1546
// for every text in the region
1575
for( j = 0 ; j < list.length() ; j++ )
1547
for(int j = 0 ; j < list.length() ; ++j )
1577
TinyTextEntity *ent = list.at(j);
1578
QRect entRect = ent->area.geometry(pageWidth,pageHeight);
1549
TinyTextEntity *ent = list.at(j).word;
1550
const QRect entRect = ent->area.geometry(pageWidth, pageHeight);
1580
1552
// calculate vertical projection profile proj_on_xaxis1
1581
for(k = entRect.left() ; k <= entRect.left() + entRect.width() ; k++)
1553
for(int k = entRect.left() ; k <= entRect.left() + entRect.width() ; ++k)
1583
1555
if( ( k-regionRect.left() ) < size_proj_x && ( k-regionRect.left() ) >= 0 )
1584
1556
proj_on_xaxis[k - regionRect.left()] += entRect.height();
1587
1559
// calculate horizontal projection profile in the same way
1588
for(k = entRect.top() ; k <= entRect.top() + entRect.height() ; k++)
1560
for(int k = entRect.top() ; k <= entRect.top() + entRect.height() ; ++k)
1590
1562
if( ( k-regionRect.top() ) < size_proj_y && ( k-regionRect.top() ) >= 0 )
1591
1563
proj_on_yaxis[k - regionRect.top()] += entRect.width();
1595
for( j = 0 ; j < size_proj_y ; j++ )
1567
for( int j = 0 ; j < size_proj_y ; ++j )
1597
1569
if (proj_on_yaxis[j] > maxY)
1598
1570
maxY = proj_on_yaxis[j];
1601
1573
avgX = count = 0;
1602
for( j = 0 ; j < size_proj_x ; j++ )
1574
for( int j = 0 ; j < size_proj_x ; ++j )
1604
1576
if(proj_on_xaxis[j] > maxX) maxX = proj_on_xaxis[j];
1605
1577
if(proj_on_xaxis[j])
1743
TextList list1,list2;
1744
TinyTextEntity* ent;
1715
WordsWithCharacters list1,list2;
1747
1717
// horizontal cut, topRect and bottomRect
1750
for( j = 0 ; j < list.length() ; j++ )
1720
for( int j = 0 ; j < list.length() ; ++j )
1753
entRect = ent->area.geometry(pageWidth,pageHeight);
1722
const WordWithCharacters word = list.at(j);
1723
const QRect wordRect = word.area().geometry(pageWidth,pageHeight);
1755
if(topRect.intersects(entRect))
1725
if(topRect.intersects(wordRect))
1761
1731
RegionText node1(list1,topRect);
1768
1738
//vertical cut, leftRect and rightRect
1769
1739
else if(cut_ver)
1771
for( j = 0 ; j < list.length() ; j++ )
1741
for( int j = 0 ; j < list.length() ; ++j )
1774
entRect = ent->area.geometry(pageWidth,pageHeight);
1743
const WordWithCharacters word = list.at(j);
1744
const QRect wordRect = word.area().geometry(pageWidth,pageHeight);
1776
if(leftRect.intersects(entRect))
1778
else list2.append(ent);
1746
if(leftRect.intersects(wordRect))
1781
1752
RegionText node1(list1,leftRect);
1790
for(i = 0 ; i < tree.length() ; i++)
1792
tmp += tree.at(i).text();
1794
// set tmp as new m_words
1801
* Add spaces in between words in a line
1764
* Add spaces in between words in a line. It reuses the pointers passed in tree and might add new ones. You will need to take care of deleting them if needed
1803
void TextPagePrivate::addNecessarySpace(RegionTextList tree)
1766
WordsWithCharacters addNecessarySpace(RegionTextList tree, int pageWidth, int pageHeight)
1806
1769
* 1. Call makeAndSortLines before adding spaces in between words in a line
1807
1770
* 2. Now add spaces between every two words in a line
1808
* 3. Finally, extract all the space separated texts from each region and
1809
* make m_words nice again.
1771
* 3. Finally, extract all the space separated texts from each region and return it
1812
const int pageWidth = m_page->m_page->width();
1813
const int pageHeight = m_page->m_page->height();
1815
1774
// Only change the texts under RegionTexts, not the area
1816
1775
for(int j = 0 ; j < tree.length() ; j++)
1818
1777
RegionText &tmpRegion = tree[j];
1819
SortedTextList lines;
1820
LineRect line_rects;
1823
makeAndSortLines(tmpRegion.text(), &lines, &line_rects);
1780
QList< QPair<WordsWithCharacters, QRect> > sortedLines = makeAndSortLines(tmpRegion.text(), pageWidth, pageHeight);
1826
for(int i = 0 ; i < lines.length() ; i++)
1783
for(int i = 0 ; i < sortedLines.length() ; i++)
1828
TextList &list = lines[i];
1785
WordsWithCharacters &list = sortedLines[i].first;
1829
1786
for(int k = 0 ; k < list.length() ; k++ )
1831
const QRect area1 = list.at(k)->area.roundedGeometry(pageWidth,pageHeight);
1788
const QRect area1 = list.at(k).area().roundedGeometry(pageWidth,pageHeight);
1832
1789
if( k+1 >= list.length() ) break;
1834
const QRect area2 = list.at(k+1)->area.roundedGeometry(pageWidth,pageHeight);
1791
const QRect area2 = list.at(k+1).area().roundedGeometry(pageWidth,pageHeight);
1835
1792
const int space = area2.left() - area1.right();
1845
1802
const QString spaceStr(" ");
1846
1803
const QRect rect(QPoint(left,top),QPoint(right,bottom));
1847
1804
const NormalizedRect entRect(rect,pageWidth,pageHeight);
1848
TinyTextEntity *ent = new TinyTextEntity(spaceStr,entRect);
1805
TinyTextEntity *ent1 = new TinyTextEntity(spaceStr, entRect);
1806
TinyTextEntity *ent2 = new TinyTextEntity(spaceStr, entRect);
1807
WordWithCharacters word(ent1, QList<TinyTextEntity*>() << ent2);
1850
list.insert(k+1,ent);
1809
list.insert(k+1, word);
1852
1811
// Skip the space
1859
for(int i = 0 ; i < lines.length() ; i++)
1817
WordsWithCharacters tmpList;
1818
for(int i = 0 ; i < sortedLines.length() ; i++)
1861
tmpList += lines.at(i);
1820
tmpList += sortedLines.at(i).first;
1863
1822
tmpRegion.setText(tmpList);
1826
WordsWithCharacters tmp;
1868
1827
for(int i = 0 ; i < tree.length() ; i++)
1870
1829
tmp += tree.at(i).text();
1876
* Break Words into Characters, takes Entities from m_words and for each of
1877
* them insert the character entities in tmp. Finally, copies tmp back to m_words
1879
void TextPagePrivate::breakWordIntoCharacters(const QHash<QRect, RegionText> &word_chars_map)
1881
const QString spaceStr(" ");
1883
const int pageWidth = m_page->m_page->width();
1884
const int pageHeight = m_page->m_page->height();
1886
for(int i = 0 ; i < m_words.length() ; i++)
1888
TinyTextEntity *ent = m_words.at(i);
1889
const QRect rect = ent->area.geometry(pageWidth,pageHeight);
1891
// the spaces contains only one character, so we can skip them
1892
if(ent->text() == spaceStr)
1893
tmp.append( new TinyTextEntity(ent->text(),ent->area) );
1896
RegionText word_text;
1898
QHash<QRect, RegionText>::const_iterator it = word_chars_map.find(rect);
1899
while( it != word_chars_map.end() && it.key() == rect )
1901
word_text = it.value();
1903
if (ent->text() == word_text.string())
1908
tmp.append(word_text.text());
1916
1835
* Correct the textOrder, all layout recognition works here
1918
1837
void TextPagePrivate::correctTextOrder()
1839
const int pageWidth = m_page->m_page->width();
1840
const int pageHeight = m_page->m_page->height();
1842
TextList characters = m_words;
1921
1845
* Remove spaces from the text
1847
removeSpace(&characters);
1926
1850
* Construct words from characters
1928
const QHash<QRect, RegionText> word_chars_map = makeWordFromCharacters();
1930
SortedTextList lines;
1931
LineRect line_rects;
1933
* Create arbitrary lines from words and sort them according to X and Y position
1935
makeAndSortLines(m_words, &lines, &line_rects);
1938
* Calculate statistical information which will be needed later for algorithm implementation
1940
int word_spacing, line_spacing, col_spacing;
1941
calculateStatisticalInformation(lines, line_rects, &word_spacing, &line_spacing, &col_spacing);
1942
for(int i = 0 ; i < lines.length() ; i++)
1944
qDeleteAll(lines.at(i));
1852
const QList<WordWithCharacters> wordsWithCharacters = makeWordFromCharacters(characters, pageWidth, pageHeight);
1949
1855
* Make a XY Cut tree for segmentation of the texts
1951
const RegionTextList tree = XYCutForBoundingBoxes(word_spacing * 2, line_spacing * 2);
1857
const RegionTextList tree = XYCutForBoundingBoxes(wordsWithCharacters, m_page->m_page->boundingBox(), pageWidth, pageHeight);
1954
1860
* Add spaces to the word
1956
addNecessarySpace(tree);
1862
const WordsWithCharacters listWithWordsAndSpaces = addNecessarySpace(tree, pageWidth, pageHeight);
1959
1865
* Break the words into characters
1961
breakWordIntoCharacters(word_chars_map);
1867
TextList listOfCharacters;
1868
foreach(const WordWithCharacters &word, listWithWordsAndSpaces)
1871
listOfCharacters.append(word.characters);
1873
setWordList(listOfCharacters);
1964
1876
TextEntity::List TextPage::words(const RegularAreaRect *area, TextAreaInclusionBehaviour b) const