1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
1 |
//========================================================================
|
2 |
//
|
|
3 |
// ABWOutputDev.cc
|
|
4 |
//
|
|
1.1.18
by Sebastien Bacher
Import upstream version 0.10.3 |
5 |
// Copyright 2006-2007 Jauco Noordzij <jauco@jauco.nl>
|
6 |
// Copyright 2007 Dominic Lachowicz <cinamod@hotmail.com>
|
|
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
7 |
//
|
8 |
// Based somewhat on HtmlOutputDev.cc
|
|
9 |
//
|
|
10 |
//========================================================================
|
|
11 |
||
12 |
#ifdef __GNUC__
|
|
13 |
#pragma implementation
|
|
14 |
#endif
|
|
15 |
||
16 |
#include "config.h" |
|
17 |
#include <stdio.h> |
|
18 |
#include <stdlib.h> |
|
19 |
#include <stdarg.h> |
|
20 |
#include <stddef.h> |
|
21 |
#include <ctype.h> |
|
22 |
#include <math.h> |
|
23 |
#include "goo/GooString.h" |
|
24 |
#include "goo/GooList.h" |
|
25 |
#include "UnicodeMap.h" |
|
26 |
#include "goo/gmem.h" |
|
27 |
#include "Error.h" |
|
28 |
#include "GfxState.h" |
|
29 |
#include "GlobalParams.h" |
|
30 |
#include "ABWOutputDev.h" |
|
31 |
#include "PDFDoc.h" |
|
32 |
||
33 |
#include <libxml/parser.h> |
|
34 |
#include <libxml/tree.h> |
|
35 |
#include <libxml/xpath.h> |
|
36 |
#include <libxml/xpathInternals.h> |
|
37 |
||
38 |
||
39 |
// Inter-character space width which will cause addChar to start a new
|
|
40 |
// word.
|
|
41 |
#define minWordBreakSpace 0.1
|
|
42 |
||
43 |
// Maximum inter-word spacing, as a fraction of the font size.
|
|
44 |
#define maxWordSpacing 1.5
|
|
45 |
||
46 |
// Max distance between baselines of two lines within a block, as a
|
|
47 |
// fraction of the font size.
|
|
48 |
#define maxLineSpacingDelta 1.5
|
|
49 |
||
50 |
#define C_maxVCutValue 4
|
|
51 |
#define C_maxHCutValue 5
|
|
52 |
//------------------------------------------------------------------------
|
|
53 |
// ABWOutputDev
|
|
54 |
//------------------------------------------------------------------------
|
|
55 |
||
56 |
ABWOutputDev::ABWOutputDev(xmlDocPtr ext_doc) |
|
57 |
{
|
|
58 |
pdfdoc = NULL; |
|
59 |
N_page = N_style = N_text = N_styleset = N_Block = N_word = NULL; |
|
60 |
doc = ext_doc; |
|
61 |
N_root = xmlNewNode(NULL, BAD_CAST "abiword"); |
|
62 |
xmlDocSetRootElement(doc, N_root); |
|
63 |
N_styleset = xmlNewChild(N_root, NULL, BAD_CAST "styles", NULL); |
|
64 |
N_content = xmlNewChild(N_root, NULL, BAD_CAST "content", NULL); |
|
65 |
uMap = globalParams->getTextEncoding(); |
|
66 |
maxStyle = Style = 1; |
|
67 |
}
|
|
68 |
||
69 |
ABWOutputDev::~ABWOutputDev() { |
|
70 |
xmlCleanupParser(); |
|
71 |
}
|
|
72 |
||
73 |
void ABWOutputDev::startPage(int pageNum, GfxState *state) { |
|
74 |
/*While reading a pdf page this node acts as a placeholder parent.
|
|
75 |
when conversion is finished and the page is structured as we like it
|
|
76 |
all text fragments are moved from N_page to N_content.*/
|
|
77 |
N_page = xmlNewNode(NULL, BAD_CAST "page"); |
|
78 |
G_pageNum = pageNum; |
|
79 |
}
|
|
80 |
||
81 |
/*Callback to denote that poppler reached the end of a page
|
|
82 |
here I insert most of the interesting processing stuff*/
|
|
83 |
void ABWOutputDev::endPage() { |
|
84 |
//make sure all words are closed
|
|
85 |
endTextBlock(); |
|
86 |
cleanUpNode(N_page, true); |
|
87 |
//xmlAddChild(N_content, N_page);
|
|
88 |
//xmlSaveFormatFileEnc("pre-cut.xml", doc, "UTF-8", 1);
|
|
89 |
//xmlUnlinkNode(N_page);
|
|
90 |
//call the top down cutting mechanism
|
|
91 |
recursiveXYC(N_page); |
|
92 |
//by stopping to worry about creating empty nodes I made the code quite a
|
|
93 |
//bit more robust. This function makes sure we have a nice'n'clean tree
|
|
94 |
cleanUpNode(N_page, true); |
|
95 |
//xmlAddChild(N_content, N_page);
|
|
96 |
//xmlSaveFormatFileEnc("raw.xml", doc, "UTF-8", 1);
|
|
97 |
//xmlUnlinkNode(N_page);
|
|
98 |
||
99 |
//Interpret the XY tree and infer text blocks and columns
|
|
100 |
interpretXYTree(); |
|
101 |
cleanUpNode(N_page, true); |
|
102 |
//xmlAddChild(N_content, N_page);
|
|
103 |
//xmlSaveFormatFileEnc("interpreted.xml", doc, "UTF-8", 1);
|
|
104 |
//xmlUnlinkNode(N_page);
|
|
105 |
||
106 |
//I have blocks and columns, this function will turn that into paragraphs and
|
|
107 |
//columns
|
|
108 |
generateParagraphs(); |
|
109 |
cleanUpNode(N_page, true); |
|
110 |
xmlAddChild(N_content, N_page); |
|
111 |
N_page = NULL; |
|
112 |
}
|
|
113 |
||
114 |
void ABWOutputDev::recursiveXYC(xmlNodePtr nodeset) { |
|
115 |
/*This function implements the recursive XY Cut. basically, it gets
|
|
116 |
the largest piece of whitespace (using getBiggestSeperator()) and then
|
|
117 |
splits the page using splitNodes on that whitespace. It calls itself again
|
|
118 |
with both the halves*/
|
|
119 |
float bhs, bvs, X1, X2, Y1, Y2; |
|
120 |
||
121 |
bvs = getBiggestSeperator(nodeset, VERTICAL, &X1, &X2); |
|
122 |
bhs = getBiggestSeperator(nodeset, HORIZONTAL, &Y1, &Y2); |
|
123 |
||
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
124 |
if (bvs == -1){ |
125 |
if (bhs == -1){//both -1 |
|
126 |
//FIXME: add assertions that bvs and bhs are >=-1
|
|
127 |
printf("No seperators\n"); |
|
128 |
return; |
|
129 |
}
|
|
130 |
else { //only bhs > -1 |
|
131 |
splitNodes(Y1, HORIZONTAL, nodeset, bhs); |
|
132 |
}
|
|
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
133 |
}
|
134 |
else { |
|
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
135 |
if (bhs == -1){//only bvs > -1 |
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
136 |
splitNodes(X1, VERTICAL, nodeset, bvs); |
137 |
}
|
|
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
138 |
else {//both > -1 |
139 |
if (bvs >= (bhs/1.7)){ |
|
140 |
//When people read a text they prefer vertical cuts over horizontal
|
|
141 |
//ones. I'm not that sure about the 1.7 value, but it seems to work.
|
|
142 |
splitNodes(X1, VERTICAL, nodeset, bvs); |
|
143 |
}
|
|
144 |
else { |
|
145 |
splitNodes(Y1, HORIZONTAL, nodeset, bhs); |
|
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
146 |
}
|
147 |
}
|
|
148 |
}
|
|
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
149 |
recursiveXYC(nodeset->children); |
150 |
recursiveXYC(nodeset->children->next); |
|
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
151 |
}
|
152 |
||
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
153 |
void ABWOutputDev::splitNodes(float splitValue, unsigned int direction, xmlNodePtr N_parent, double seperator){ |
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
154 |
//This function takes a nodeset and splits it based on a cut value. It returns
|
155 |
//the nodePtr with two childnodes, the both chunks.
|
|
156 |
xmlNodePtr N_move, N_cur, N_newH, N_newL; |
|
157 |
char * propName; |
|
158 |
const char *nodeName; |
|
159 |
char buf[20]; |
|
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
160 |
if (direction == HORIZONTAL) { |
161 |
propName = "Y1"; |
|
162 |
nodeName = "horizontal"; |
|
163 |
}
|
|
164 |
else { |
|
165 |
propName = "X1"; |
|
166 |
nodeName = "vertical"; |
|
167 |
}
|
|
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
168 |
N_newH = xmlNewNode(NULL, BAD_CAST nodeName); |
169 |
N_newL = xmlNewNode(NULL, BAD_CAST nodeName); |
|
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
170 |
sprintf(buf, "%f", seperator); |
171 |
xmlNewProp(N_newH, BAD_CAST "diff", BAD_CAST buf); |
|
172 |
sprintf(buf, "%f", seperator); |
|
173 |
xmlNewProp(N_newL, BAD_CAST "diff", BAD_CAST buf); |
|
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
174 |
N_cur = N_parent->children; |
175 |
while (N_cur){ |
|
176 |
N_move = N_cur->next; |
|
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
177 |
xmlUnlinkNode(N_cur); |
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
178 |
if (xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST propName)) > splitValue){ |
179 |
xmlAddChild(N_newH, N_cur); |
|
180 |
}
|
|
181 |
else { |
|
182 |
xmlAddChild(N_newL, N_cur); |
|
183 |
}
|
|
184 |
N_cur = N_move; |
|
185 |
}
|
|
186 |
xmlAddChild(N_parent, N_newL); |
|
187 |
xmlAddChild(N_parent, N_newH); |
|
188 |
}
|
|
189 |
||
190 |
float ABWOutputDev::getBiggestSeperator(xmlNodePtr N_set, unsigned int direction, float * C1, float * C2) |
|
191 |
{
|
|
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
192 |
int i = 0; |
193 |
int nodeCount = xmlLsCountNode(N_set); |
|
194 |
float store; |
|
195 |
int min; |
|
196 |
float gap, endV; |
|
197 |
float * stt; |
|
198 |
float * end; |
|
199 |
if (nodeCount == 0){ |
|
200 |
//Add assertion that this shouldn't happen
|
|
201 |
fprintf(stderr,"No child nodes"); |
|
202 |
return -1; |
|
203 |
}
|
|
204 |
stt = new float[nodeCount]; |
|
205 |
end = new float[nodeCount]; |
|
206 |
//store all variables in two arrays (one for start, one for end coordinates)
|
|
207 |
if (direction == VERTICAL) { |
|
208 |
for (xmlNodePtr N_cur = N_set->children; N_cur != NULL; N_cur = N_cur->next){ |
|
209 |
stt[i] = xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "X1")); |
|
210 |
end[i] = xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "X2")); |
|
211 |
i++; |
|
212 |
}
|
|
213 |
}
|
|
214 |
else { |
|
215 |
for (xmlNodePtr N_cur = N_set->children; N_cur != NULL; N_cur = N_cur->next){ |
|
216 |
stt[i] = xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "Y1")); |
|
217 |
end[i] = xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "Y2")); |
|
218 |
i++; |
|
219 |
}
|
|
220 |
}
|
|
221 |
//Sort them
|
|
222 |
for (i = 0; i < nodeCount - 1; i++){ |
|
223 |
min = i; |
|
224 |
for (int j = i + 1; j < nodeCount; j++) |
|
225 |
if (stt[j] < stt[i]) |
|
226 |
min = j; |
|
227 |
store = stt[i]; |
|
228 |
stt[i] = stt[min]; |
|
229 |
stt[min] = store; |
|
230 |
store = end[i]; |
|
231 |
end[i] = end[min]; |
|
232 |
end[min] = store; |
|
233 |
}
|
|
234 |
//find the largest gap
|
|
235 |
gap = -1; |
|
236 |
endV = end[0]; |
|
237 |
*C1 = 0; |
|
238 |
*C2 = 0; |
|
239 |
for (int inspect = 1; inspect < nodeCount; inspect++){ |
|
240 |
//no gap
|
|
241 |
if (((stt[inspect] - endV) - gap) < 0.5){ //FIXME:This is copied almost directly from the previous function, needs checking out |
|
242 |
//partial overlap instead of complete one
|
|
243 |
if (end[inspect] > endV) |
|
244 |
endV = end[inspect]; |
|
245 |
}
|
|
246 |
//gap
|
|
247 |
else{ |
|
248 |
//gap is larger than any previous gap
|
|
249 |
if (gap < (stt[inspect] - endV)){ |
|
250 |
gap = stt[inspect] - endV; |
|
251 |
*C1 = endV; |
|
252 |
*C2 = stt[inspect]; |
|
253 |
}
|
|
254 |
endV = end[inspect]; |
|
255 |
}
|
|
256 |
}
|
|
1.1.14
by Loic Minier
Import upstream version 0.8.4 |
257 |
delete[] stt; |
258 |
delete[] end; |
|
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
259 |
return gap; |
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
260 |
}
|
261 |
||
262 |
void ABWOutputDev::updateFont(GfxState *state) { |
|
263 |
char buf[160]; |
|
264 |
xmlNodePtr N_cur; |
|
265 |
GfxFont *font; |
|
266 |
bool found = false; |
|
267 |
bool isBold, isItalic, S_isBold, S_isItalic; |
|
268 |
isBold = isItalic = S_isBold = S_isItalic = false; |
|
269 |
font = state->getFont(); |
|
270 |
GooString *ftName; |
|
271 |
char *fnEnd, *fnName; |
|
1.1.12
by Loic Minier
Import upstream version 0.8.2 |
272 |
int fnStart, ftSize; |
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
273 |
//the first time this function is called there is no funt.
|
274 |
//Fixme: find out if that isn'y a bug
|
|
275 |
if (font){ |
|
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
276 |
isBold = (font->isBold() || font->getWeight() >6 || (strstr(font->getOrigName()->getCString(), "Bold")-font->getOrigName()->getCString() == (font->getOrigName()->getLength()-4))); |
277 |
isItalic = (font->isItalic() || (strstr(font->getOrigName()->getCString(), "Italic")-font->getOrigName()->getCString() == (font->getOrigName()->getLength()-6))); |
|
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
278 |
ftSize = int(state->getTransformedFontSize())-1; |
279 |
ftName = new GooString(font->getOrigName()); |
|
280 |
fnStart = strcspn(ftName->getCString(), "+"); |
|
281 |
if (fnStart < ftName->getLength()) |
|
282 |
ftName->del(0,fnStart+1); |
|
283 |
fnEnd = strrchr(ftName->getCString(), 44); |
|
284 |
if (fnEnd == 0) |
|
285 |
fnEnd = strrchr(ftName->getCString(), 45); |
|
286 |
if (fnEnd != 0) |
|
287 |
ftName->del(fnEnd-ftName->getCString(),ftName->getLength()-1); |
|
288 |
||
289 |
/* fnName = ftName;
|
|
290 |
if (isBold or isItalic){
|
|
291 |
fnStart = strcspn(fnName, "+");
|
|
292 |
if (fnStart == font->getOrigName()->getLength())
|
|
293 |
fnStart = 0;
|
|
294 |
else fnStart++;
|
|
295 |
||
296 |
fnEnd = strstr(fnName, ",");
|
|
297 |
if (fnEnd == 0)
|
|
298 |
fnEnd = strstr(fnName, "-");
|
|
299 |
if (fnEnd != 0)
|
|
300 |
fnName[fnEnd-fnName] = 0;
|
|
301 |
// char fntName[fnLength];
|
|
302 |
// strncpy (fntName,fnName+fnStart+1,fnLength);
|
|
303 |
fnName+=fnStart;
|
|
304 |
// fnName = fntName;
|
|
305 |
}
|
|
306 |
else {*/
|
|
307 |
fnName = ftName->getCString(); |
|
308 |
// }
|
|
309 |
for (N_cur = N_styleset->children; N_cur; N_cur = N_cur ->next){ |
|
310 |
if ( |
|
311 |
isBold == (xmlStrcasecmp(xmlGetProp(N_cur,BAD_CAST "bold"),BAD_CAST "bold;") == 0) |
|
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
312 |
&&
|
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
313 |
isItalic == (xmlStrcasecmp(xmlGetProp(N_cur,BAD_CAST "italic"),BAD_CAST "italic") == 0) |
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
314 |
&&
|
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
315 |
xmlStrcasecmp(xmlGetProp(N_cur,BAD_CAST "font"),BAD_CAST fnName) == 0 |
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
316 |
&&
|
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
317 |
xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "size")) == ftSize |
318 |
) { |
|
319 |
found = true; |
|
320 |
Style = int(xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "id"))); |
|
321 |
}
|
|
322 |
}
|
|
323 |
if (!found){ |
|
324 |
N_cur = xmlNewChild(N_styleset, NULL, BAD_CAST "s", NULL); |
|
325 |
xmlSetProp(N_cur, BAD_CAST "type", BAD_CAST "P"); |
|
326 |
sprintf(buf, "%d", maxStyle++); |
|
327 |
xmlSetProp(N_cur, BAD_CAST "name", BAD_CAST buf); |
|
328 |
xmlSetProp(N_cur, BAD_CAST "id", BAD_CAST buf); |
|
329 |
Style = maxStyle; |
|
330 |
sprintf(buf, "%d", ftSize); xmlSetProp(N_cur, BAD_CAST "size", BAD_CAST buf); |
|
331 |
isBold ? xmlSetProp(N_cur, BAD_CAST "bold", BAD_CAST "bold;") : xmlSetProp(N_cur, BAD_CAST "bold", BAD_CAST "normal;"); |
|
332 |
isItalic ? xmlSetProp(N_cur, BAD_CAST "italic", BAD_CAST "italic"): xmlSetProp(N_cur, BAD_CAST "italic", BAD_CAST "normal"); |
|
333 |
xmlSetProp(N_cur, BAD_CAST "font", BAD_CAST fnName); |
|
334 |
}
|
|
335 |
}
|
|
336 |
}
|
|
337 |
||
338 |
void ABWOutputDev::drawChar(GfxState *state, double x, double y, |
|
339 |
double dx, double dy, |
|
340 |
double originX, double originY, |
|
341 |
CharCode code, int nBytes, Unicode *u, int uLen) |
|
342 |
{
|
|
343 |
//I wouldn't know what size this should safely be. I guess 64 bytes should be
|
|
344 |
//enough for any unicode character
|
|
345 |
char buf[64]; |
|
346 |
int charLen; |
|
347 |
x = dx; |
|
348 |
y = dy; |
|
349 |
//state->textTransformDelta(dx * state->getHorizScaling(), dy, &dx, &dy);
|
|
350 |
//state->transformDelta(dx, dy, &dx, &dy);
|
|
351 |
if (uLen == 1 && code == 0x20) { |
|
352 |
//If we break a text sequence on space, then the X1 should be increased
|
|
353 |
//but the Y1 and Y2 should remain the same.
|
|
354 |
beginWord(state,X2+dx,Y2); |
|
355 |
}
|
|
356 |
else { |
|
357 |
X2 += dx; |
|
358 |
Y2 += dy; |
|
359 |
charLen = uMap->mapUnicode(*u,buf,sizeof(buf)); |
|
360 |
//Getting Unicode to libxml is something I need to fix.
|
|
361 |
//simply passing it using a bad-cast isn't working.
|
|
362 |
//I assume that CharCode code it the U+value of the unicode character
|
|
363 |
//But for a ligature code gives me DF which is the ringel-s, I guess
|
|
364 |
//code should be two bytes wide?
|
|
365 |
xmlNodeAddContentLen(N_word, BAD_CAST buf, charLen); |
|
366 |
}
|
|
367 |
}
|
|
368 |
||
369 |
void ABWOutputDev::beginString(GfxState *state, GooString *s) { |
|
370 |
double x,y; |
|
371 |
//state->textTransform(x, y, &x, &y);
|
|
372 |
state->transform(state->getCurX(), state->getCurY(), &x, &y); |
|
373 |
if (N_word) { |
|
374 |
verDist = y-Y2; |
|
375 |
horDist = x-X2; |
|
376 |
//TEST:changed fabs(horDist) to horDist
|
|
377 |
//FIXME: this if statement seems awkward to me.
|
|
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
378 |
if (horDist > (state->getTransformedFontSize()*maxWordSpacing) || (fabs(verDist) > (state->getTransformedFontSize()/maxLineSpacingDelta))) { |
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
379 |
beginTextBlock(state,x,y); |
380 |
}
|
|
381 |
else { |
|
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
382 |
if ((horDist > (state->getTransformedFontSize()*minWordBreakSpace)) || (fabs(verDist) > (state->getTransformedFontSize()/maxLineSpacingDelta))) { |
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
383 |
beginWord(state,x,y); |
384 |
}
|
|
385 |
}
|
|
386 |
}
|
|
387 |
else { |
|
388 |
//This is the first word. Clear all values and call beginWord;
|
|
389 |
X2 = x; |
|
390 |
Y2 = y; |
|
391 |
horDist = 0; |
|
392 |
verDist = 0; |
|
393 |
height = 0; |
|
394 |
beginTextBlock(state,x,y); |
|
395 |
}
|
|
396 |
}
|
|
397 |
||
398 |
void ABWOutputDev::endString(GfxState *state) { |
|
399 |
||
400 |
}
|
|
401 |
||
402 |
void ABWOutputDev::beginWord(GfxState *state, double x, double y){ |
|
403 |
char buf[20]; |
|
404 |
// printf("***BREAK!***\n");
|
|
405 |
endWord(); |
|
406 |
X1 = x; |
|
407 |
Y2 = y; |
|
408 |
||
409 |
horDist = X1-X2; |
|
410 |
verDist = Y1-Y2; |
|
411 |
||
412 |
X2 = X1; |
|
413 |
height = state->getFont()->getAscent() * state->getTransformedFontSize(); |
|
414 |
Y1 = Y2-height; |
|
415 |
||
416 |
N_word = xmlNewChild(N_Block, NULL, BAD_CAST "word", NULL); |
|
417 |
sprintf(buf, "%f", X1); xmlNewProp(N_word, BAD_CAST "X1", BAD_CAST buf); |
|
418 |
sprintf(buf, "%f", Y1); xmlNewProp(N_word, BAD_CAST "Y1", BAD_CAST buf); |
|
419 |
sprintf(buf, "%d", Style); xmlNewProp(N_word, BAD_CAST "style", BAD_CAST buf); |
|
420 |
}
|
|
421 |
||
422 |
void ABWOutputDev::endWord(){ |
|
423 |
char buf[20]; |
|
424 |
if (N_word) { |
|
425 |
sprintf(buf, "%f", X2); xmlNewProp(N_word, BAD_CAST "X2", BAD_CAST buf); |
|
426 |
sprintf(buf, "%f", Y2); xmlNewProp(N_word, BAD_CAST "Y2", BAD_CAST buf); |
|
427 |
sprintf(buf, "%f", X2-X1); xmlNewProp(N_word, BAD_CAST "width", BAD_CAST buf); |
|
428 |
sprintf(buf, "%f", Y2-Y1); xmlNewProp(N_word, BAD_CAST "height", BAD_CAST buf); |
|
429 |
N_word = NULL; |
|
430 |
}
|
|
431 |
}
|
|
432 |
||
433 |
void ABWOutputDev::beginTextBlock(GfxState *state, double x, double y){ |
|
434 |
endTextBlock(); |
|
435 |
N_Block = xmlNewChild(N_page, NULL, BAD_CAST "Textblock", NULL); |
|
436 |
beginWord(state,x,y); |
|
437 |
}
|
|
438 |
||
439 |
void ABWOutputDev::endTextBlock(){ |
|
440 |
if (N_Block) { |
|
441 |
endWord(); |
|
442 |
N_Block = NULL; |
|
443 |
}
|
|
444 |
}
|
|
445 |
/*
|
|
446 |
This will be a function to retrieve coherent text blocks from the chunk tree.*/
|
|
447 |
void ABWOutputDev::interpretXYTree(){ |
|
448 |
xmlNodePtr N_oldPage; |
|
449 |
N_oldPage = N_page; |
|
450 |
N_page = xmlNewNode(NULL, BAD_CAST "page"); |
|
451 |
N_column = N_page; |
|
452 |
//xmlAddChild(N_content, N_page);
|
|
453 |
N_Block = xmlNewChild(N_column, NULL, BAD_CAST "chunk", NULL); |
|
454 |
ATP_recursive(N_oldPage); |
|
455 |
}
|
|
456 |
||
457 |
void ABWOutputDev::ATP_recursive(xmlNodePtr N_parent){ |
|
458 |
xmlNodePtr N_first, N_second, N_line, N_tempCol, N_tempColset; |
|
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
459 |
|
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
460 |
N_first = N_parent->children; |
461 |
if (!N_first) |
|
462 |
return; |
|
463 |
||
464 |
N_second = N_first->next; |
|
465 |
/*
|
|
466 |
Possibilities:
|
|
467 |
there is one child node
|
|
468 |
Because we cleaned up before the only case where we allow one childnode is
|
|
469 |
within Textblocks and textBlocks within 'vertical' nodes.
|
|
470 |
basically one text node means: add it to the current block.
|
|
471 |
There are two childnodes
|
|
472 |
This can be two verticals, two horizontals or one horizontal and a text node.
|
|
473 |
verticals:
|
|
474 |
If the first is vertical, the second is as well.
|
|
475 |
verticals mean: create a new Block, add a column per vertical make the
|
|
476 |
vertical the block and recurse inside.
|
|
477 |
then make the second vertical the block and recurse inside
|
|
478 |
then finish the block (ie. create a new one)
|
|
479 |
horizontal and or Textblocks
|
|
480 |
if first is textnode
|
|
481 |
add first to block
|
|
482 |
if second is textnode
|
|
483 |
at to block
|
|
484 |
else
|
|
485 |
call again
|
|
486 |
else
|
|
487 |
begin new block
|
|
488 |
call again
|
|
489 |
begin new block
|
|
490 |
if second is text node
|
|
491 |
add to block
|
|
492 |
else
|
|
493 |
call again
|
|
494 |
there are more then two child nodes
|
|
495 |
this can be a number of Textblocks and horizontals
|
|
496 |
add the textNodes to the current Block
|
|
497 |
if a horizontal is encountered enter it and generate a new block afterwards
|
|
498 |
*/
|
|
499 |
//fprintf(stderr,"**********************************************************************\n");
|
|
500 |
//xmlSaveFormatFileEnc("-", doc, "UTF-8", 1);
|
|
501 |
switch (xmlLsCountNode(N_parent)) { |
|
502 |
case 1: |
|
503 |
//fprintf(stderr,"case 1\n");
|
|
504 |
N_line = xmlNewChild(N_Block, NULL, BAD_CAST "line", NULL); |
|
505 |
xmlUnlinkNode(N_first); |
|
506 |
xmlAddChild(N_line, N_first); |
|
507 |
break; |
|
508 |
case 2: |
|
509 |
//fprintf(stderr,"case 2\n");
|
|
510 |
if (xmlStrcasecmp(N_first->name,BAD_CAST "vertical") == 0){ |
|
511 |
//store the column for the moment
|
|
512 |
N_tempCol = N_column; |
|
513 |
/*If we have three columns they will turn up in the tree as:
|
|
514 |
<vertical>
|
|
515 |
<vertical/>
|
|
516 |
<vertical/>
|
|
517 |
</vertical>
|
|
518 |
<vertical/>
|
|
519 |
*/
|
|
520 |
//if the parent is a vertical as well, we can skip the colset generation
|
|
521 |
//thing here we can also remove the just added column and block, because
|
|
522 |
//these are going to replace them
|
|
523 |
if (xmlStrcasecmp(N_parent->name,BAD_CAST "vertical") != 0){ |
|
524 |
//fprintf(stderr,"first time column\n");
|
|
525 |
N_tempColset = N_colset; |
|
526 |
N_colset = xmlNewChild(N_column, NULL, BAD_CAST "colset", NULL); |
|
527 |
N_column = xmlNewChild(N_colset, NULL, BAD_CAST "column", NULL); |
|
528 |
N_Block = xmlNewChild(N_column, NULL, BAD_CAST "chunk", NULL); |
|
529 |
}
|
|
530 |
else { |
|
531 |
//fprintf(stderr,"second time column\n");
|
|
532 |
xmlUnlinkNode(N_column); |
|
533 |
N_column = xmlNewChild(N_colset, NULL, BAD_CAST "column", NULL); |
|
534 |
N_Block = xmlNewChild(N_column, NULL, BAD_CAST "chunk", NULL); |
|
535 |
}
|
|
536 |
//fprintf(stderr,"Building first column...\n");
|
|
537 |
ATP_recursive(N_first); |
|
538 |
N_column = xmlNewChild(N_colset, NULL, BAD_CAST "column", NULL); |
|
539 |
N_Block = xmlNewChild(N_column, NULL, BAD_CAST "chunk", NULL); |
|
540 |
//fprintf(stderr,"Building second column...\n");
|
|
541 |
ATP_recursive(N_second); |
|
542 |
//make sure we end the column by continuing in the master column and
|
|
543 |
//setting the block and line to it
|
|
544 |
N_column = N_tempCol; |
|
545 |
if (xmlStrcasecmp(N_parent->name,BAD_CAST "vertical") != 0){ |
|
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
546 |
if (N_tempColset != NULL) |
547 |
N_colset = N_tempColset; |
|
548 |
else
|
|
549 |
fprintf(stderr,"N_templColset should not! be empty (line 823)");//FIXME: add assert |
|
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
550 |
}
|
551 |
}
|
|
552 |
else { |
|
553 |
if (xmlStrcasecmp(N_first->name,BAD_CAST "Textblock") == 0) { |
|
554 |
//fprintf(stderr,"add first as textblock\n");
|
|
555 |
N_line = xmlNewChild(N_Block, NULL, BAD_CAST "line", NULL); |
|
556 |
xmlUnlinkNode(N_first); |
|
557 |
xmlAddChild(N_line, N_first); |
|
558 |
if (xmlStrcasecmp(N_second->name,BAD_CAST "Textblock") == 0) { |
|
559 |
//fprintf(stderr,"add second as textblock\n");
|
|
560 |
//FIXME: this is not neat. We should ignore the cut ignoring when there are only two elements above
|
|
561 |
//line aggregation doesn't work anyway atm.
|
|
562 |
xmlUnlinkNode(N_second); |
|
563 |
xmlAddChild(N_line, N_second); |
|
564 |
//We have two textChunks that are going to be added to the line.
|
|
565 |
//the following statements make the line wrap around both textblocks
|
|
566 |
//if the firstX1 is smaller then the second X1 use the first, else use the second etc.
|
|
567 |
}
|
|
568 |
else { |
|
569 |
//fprintf(stderr,"recursing into second\n");
|
|
570 |
ATP_recursive(N_second); |
|
571 |
}
|
|
572 |
}
|
|
573 |
else { |
|
574 |
N_Block = xmlNewChild(N_column, NULL, BAD_CAST "chunk", NULL); |
|
575 |
//fprintf(stderr,"recursing into first\n");
|
|
576 |
ATP_recursive(N_first); |
|
577 |
N_Block = xmlNewChild(N_column, NULL, BAD_CAST "chunk", NULL); |
|
578 |
if (xmlStrcasecmp(N_second->name,BAD_CAST "Textblock") == 0) { |
|
579 |
//fprintf(stderr,"add second as textblock\n");
|
|
580 |
N_line = xmlNewChild(N_Block, NULL, BAD_CAST "line", NULL); |
|
581 |
xmlUnlinkNode(N_second); |
|
582 |
xmlAddChild(N_line, N_second); |
|
583 |
}
|
|
584 |
else { |
|
585 |
//fprintf(stderr,"recursing into second\n");
|
|
586 |
ATP_recursive(N_second); |
|
587 |
}
|
|
588 |
}
|
|
589 |
}
|
|
590 |
break; |
|
591 |
default: |
|
1.1.12
by Loic Minier
Import upstream version 0.8.2 |
592 |
//double tX1=0, tX2=0, tY1=0, tY2=0;
|
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
593 |
//fprintf(stderr,"case default\n");
|
594 |
N_line = xmlNewChild(N_Block, NULL, BAD_CAST "line", NULL); |
|
595 |
while (N_first){ |
|
596 |
//xmlXPathCastStringToNumber(xmlGetProp(N_first,BAD_CAST "X1")) < tX1 ? tX1 = xmlXPathCastStringToNumber(xmlGetProp(N_first,BAD_CAST "X1")) : tX1 = tX1;
|
|
597 |
//xmlXPathCastStringToNumber(xmlGetProp(N_first,BAD_CAST "X2")) > tX2 ? tX2 = xmlXPathCastStringToNumber(xmlGetProp(N_first,BAD_CAST "X2")) : tX2 = tX2;
|
|
598 |
//xmlXPathCastStringToNumber(xmlGetProp(N_first,BAD_CAST "Y1")) < tY1 ? tY1 = xmlXPathCastStringToNumber(xmlGetProp(N_first,BAD_CAST "Y1")) : tY1 = tY1;
|
|
599 |
//xmlXPathCastStringToNumber(xmlGetProp(N_first,BAD_CAST "Y2")) > tY2 ? tY2 = xmlXPathCastStringToNumber(xmlGetProp(N_first,BAD_CAST "Y2")) : tY1 = tY2;
|
|
600 |
N_second = N_first->next; |
|
601 |
if (xmlStrcasecmp(N_first->name,BAD_CAST "Textblock") == 0){ |
|
602 |
xmlUnlinkNode(N_first); |
|
603 |
xmlAddChild(N_line, N_first); |
|
604 |
}
|
|
605 |
else { //fprintf(stderr,"This shouldn't happen! (line 700)\n"); |
|
606 |
}
|
|
607 |
N_first = N_second; |
|
608 |
}
|
|
609 |
break; |
|
610 |
}
|
|
611 |
}
|
|
612 |
||
613 |
/*The cleanup function. It started out as a simple function to remove empty nodes
|
|
614 |
so that I could call xmladdnewchildnode as often as I liked so that I wouldn't get seg-faults
|
|
615 |
It is now a bit more advanced, makes sure the tree is as it's supposed to be and adds information too*/
|
|
616 |
void ABWOutputDev::cleanUpNode(xmlNodePtr N_parent, bool aggregateInfo){ |
|
617 |
double tX1=-1, tX2=-1, tY1=-1, tY2=-1; |
|
618 |
xmlNodePtr N_cur, N_next; |
|
619 |
N_cur = N_parent->children; |
|
620 |
char buf[20]; |
|
621 |
int prevStyle = -1; |
|
622 |
xmlChar *val; |
|
623 |
int styleLength = xmlLsCountNode(N_styleset)+1; |
|
624 |
float stylePos; |
|
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
625 |
int *styles = new int[styleLength]; |
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
626 |
for (int i=1; i< styleLength; i++) { styles[i] = 0;} |
627 |
/*
|
|
628 |
ignore two horizontal nodes with textBlocks right underneath them. They
|
|
629 |
signal the end of a chunk, and the horizontal seperation needs to be
|
|
630 |
preserved, because it means they are different lines. The second horizontal
|
|
631 |
therefore needs to be kept.
|
|
632 |
*/
|
|
633 |
if ((xmlLsCountNode(N_parent) == 2) |
|
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
634 |
&&
|
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
635 |
xmlStrcasecmp(N_parent->name,BAD_CAST "horizontal") == 0 |
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
636 |
&&
|
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
637 |
N_cur
|
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
638 |
&&
|
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
639 |
N_cur->next |
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
640 |
&&
|
641 |
xmlStrcasecmp(N_cur->name,BAD_CAST "horizontal") == 0 && xmlStrcasecmp(N_cur->next->name,BAD_CAST "horizontal") == 0 |
|
642 |
&&
|
|
643 |
xmlLsCountNode(N_cur) == 1 && xmlLsCountNode(N_cur->next) == 1 |
|
644 |
&&
|
|
645 |
xmlStrcasecmp(N_cur->children->name,BAD_CAST "Textblock") == 0 && xmlStrcasecmp(N_cur->next->children->name,BAD_CAST "Textblock") == 0 |
|
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
646 |
) { |
647 |
xmlAddPrevSibling(N_cur->next,N_cur->children); |
|
648 |
xmlUnlinkNode(N_cur); |
|
649 |
}
|
|
650 |
/*
|
|
651 |
This removes columns if one of the parts is actually a single letter.
|
|
652 |
I found out I liked the columns better, so I have the code commented out.
|
|
653 |
*/
|
|
654 |
/* else if ((xmlLsCountNode(N_parent) == 2)
|
|
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
655 |
&&
|
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
656 |
N_cur
|
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
657 |
&&
|
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
658 |
N_cur->next
|
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
659 |
&&
|
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
660 |
xmlStrcasecmp(N_cur->name,BAD_CAST "vertical") == 0
|
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
661 |
&&
|
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
662 |
xmlStrcasecmp(N_cur->next->name,BAD_CAST "vertical") == 0
|
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
663 |
&&
|
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
664 |
(N_cur->children)
|
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
665 |
&&
|
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
666 |
(N_cur->children->children)
|
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
667 |
&&
|
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
668 |
(N_cur->children->children->children)
|
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
669 |
&&
|
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
670 |
xmlStrlen(N_cur->children->children->children->content) == 1) {
|
671 |
N_next = N_cur->next;
|
|
672 |
xmlAddChild(N_parent, N_next->children);
|
|
673 |
xmlAddPrevSibling(N_next->children->children, N_cur->children);
|
|
674 |
xmlUnlinkNode(N_cur);
|
|
675 |
xmlUnlinkNode(N_next);
|
|
676 |
} */else { |
|
677 |
while (N_cur){ |
|
678 |
N_next = N_cur->next; |
|
679 |
cleanUpNode(N_cur, aggregateInfo); |
|
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
680 |
if (xmlLsCountNode(N_cur) == 0 && (xmlStrcasecmp(N_cur->name,BAD_CAST "cbr") != 0) && (xmlStrcasecmp(N_cur->name,BAD_CAST "s") != 0)) |
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
681 |
xmlUnlinkNode(N_cur); |
682 |
//If the node is still around
|
|
683 |
N_cur = N_next; |
|
684 |
}
|
|
685 |
}
|
|
686 |
//If a countainer element has only one child, it can be removed except for vertical
|
|
687 |
//cuts with only one textElement;
|
|
688 |
//the main reason for this code is to remove the crumbs after cleaning up in the loop above
|
|
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
689 |
if ((xmlLsCountNode(N_parent) == 1) && ((xmlStrcasecmp(N_parent->name,BAD_CAST "horizontal") == 0) || ((xmlStrcasecmp(N_parent->name,BAD_CAST "vertical") == 0) && (xmlStrcasecmp(N_parent->children->name,BAD_CAST "Textblock") != 0)))){ |
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
690 |
N_cur = N_parent->children; |
691 |
xmlAddPrevSibling(N_parent,N_cur); |
|
692 |
xmlUnlinkNode(N_parent); |
|
693 |
}
|
|
694 |
//We cannot remove the page element so if it has only one childnode, we remove that childnode instead
|
|
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
695 |
if ((xmlStrcasecmp(N_parent->name,BAD_CAST "page") == 0) && (xmlLsCountNode(N_parent) == 1)) { |
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
696 |
N_cur = N_parent->children->children; |
697 |
while (N_cur){ |
|
698 |
N_next = N_cur->next; |
|
699 |
xmlUnlinkNode(N_cur); |
|
700 |
xmlAddChild(N_parent, N_cur); |
|
701 |
N_cur = N_next; |
|
702 |
}
|
|
703 |
xmlUnlinkNode(N_parent->children); |
|
704 |
}
|
|
705 |
//Ok, so by this time the N_parent and his children are guaranteed to be clean
|
|
706 |
//this for loop gets information from the 'word' elements and propagates it up
|
|
707 |
//the tree.
|
|
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
708 |
if (aggregateInfo && xmlStrcasecmp(N_parent->name,BAD_CAST "word") != 0) { |
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
709 |
for (N_cur = N_parent->children; N_cur; N_cur = N_cur->next){ |
710 |
val = xmlGetProp(N_cur,BAD_CAST "style"); |
|
711 |
stylePos = xmlXPathCastStringToNumber(val); |
|
712 |
//fprintf(stderr,"1: %f, %d\n",stylePos,int(stylePos));
|
|
713 |
styles[int(stylePos)]=styles[int(stylePos)]+1; |
|
714 |
//fprintf(stderr,"2: styles[%d] = %d\n",int(stylePos),styles[int(stylePos)]);
|
|
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
715 |
(xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "X1")) < tX1 || tX1 == -1)? tX1 = xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "X1")) : tX1 = tX1; |
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
716 |
(xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "X2")) > tX2) ? tX2 = xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "X2")) : tX2 = tX2; |
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
717 |
(xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "Y1")) < tY1 || tY1 == -1)? tY1 = xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "Y1")) : tY1 = tY1; |
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
718 |
(xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "Y2")) > tY2) ? tY2 = xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "Y2")) : tY2 = tY2; |
719 |
}
|
|
720 |
sprintf(buf, "%f", tX1); xmlSetProp(N_parent, BAD_CAST "X1", BAD_CAST buf); |
|
721 |
sprintf(buf, "%f", tX2); xmlSetProp(N_parent, BAD_CAST "X2", BAD_CAST buf); |
|
722 |
sprintf(buf, "%f", tY1); xmlSetProp(N_parent, BAD_CAST "Y1", BAD_CAST buf); |
|
723 |
sprintf(buf, "%f", tY2); xmlSetProp(N_parent, BAD_CAST "Y2", BAD_CAST buf); |
|
724 |
sprintf(buf, "%f", tX2-tX1); xmlSetProp(N_parent, BAD_CAST "width", BAD_CAST buf); |
|
725 |
sprintf(buf, "%f", tY2-tY1); xmlSetProp(N_parent, BAD_CAST "height", BAD_CAST buf); |
|
726 |
prevStyle = 0; |
|
727 |
styles[0] = -1; |
|
728 |
for (int i=1; i< styleLength; i++) { if (styles[i] > styles[prevStyle]) prevStyle = i; } |
|
729 |
//fprintf(stderr,"%d\n", prevStyle);
|
|
730 |
if (prevStyle > 0){ |
|
731 |
sprintf(buf, "%d", prevStyle); xmlSetProp(N_parent, BAD_CAST "style", BAD_CAST buf); |
|
732 |
}
|
|
733 |
}
|
|
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
734 |
if (N_parent->children && xmlStrcasecmp(N_parent->children->name,BAD_CAST "line") == 0 && xmlGetProp(N_parent->children,BAD_CAST "alignment") != NULL) |
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
735 |
xmlSetProp(N_parent, BAD_CAST "alignment", xmlGetProp(N_parent->children,BAD_CAST "alignment")); |
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
736 |
|
1.1.18
by Sebastien Bacher
Import upstream version 0.10.3 |
737 |
delete[] styles; |
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
738 |
}
|
739 |
||
740 |
void ABWOutputDev::generateParagraphs() { |
|
741 |
xmlNodePtr N_cur, N_parent, N_p, N_line, N_next; |
|
742 |
int lvl; |
|
743 |
//basically I first detect the text-alignment within blocks.
|
|
744 |
//ASSUMPTION: my block seperation thing is good enough so I don't need to
|
|
745 |
//worry about two alignments in one paragraph
|
|
746 |
||
747 |
X1 = 0; |
|
748 |
X2 = pdfdoc->getPageCropWidth(G_pageNum); |
|
749 |
Y1 = 0; |
|
750 |
Y2 = pdfdoc->getPageCropHeight(G_pageNum); |
|
751 |
addAlignment(N_page); |
|
752 |
||
753 |
//then it's a switch per alignement
|
|
754 |
N_cur = N_page->children; |
|
755 |
N_parent = N_page; |
|
756 |
lvl = 1; |
|
757 |
while (N_cur) { |
|
758 |
if (xmlStrcasecmp(N_cur->name,BAD_CAST "chunk") == 0){ |
|
759 |
N_p = xmlNewNode(NULL, BAD_CAST "chunk"); |
|
760 |
xmlAddPrevSibling(N_cur,N_p); |
|
761 |
//N_p = xmlNewChild(N_parent, NULL, BAD_CAST "chunk", NULL);
|
|
762 |
//A new paragraph is created when:
|
|
763 |
switch (int(xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "alignment")))){ |
|
764 |
//left
|
|
765 |
case 1: //the distance between the texblock X2 and the last word X2 is more than |
|
766 |
//the following first word width.
|
|
767 |
N_line = N_cur->children; |
|
768 |
while (N_line){ |
|
769 |
N_next = N_line->next; |
|
770 |
xmlUnlinkNode(N_line); |
|
771 |
xmlAddChild(N_p,N_line); |
|
772 |
xmlSetProp(N_line, BAD_CAST "alignment", BAD_CAST "1"); |
|
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
773 |
if (N_next && xmlStrcasecmp(N_next->name,BAD_CAST "line") == 0){ |
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
774 |
if (xmlXPathCastStringToNumber(xmlGetProp(N_next->children->children,BAD_CAST "width")) < (xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "width")) - xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "width")))){ |
775 |
N_p = xmlNewNode(NULL, BAD_CAST "chunk"); |
|
776 |
xmlAddPrevSibling(N_cur,N_p); |
|
777 |
}
|
|
778 |
}
|
|
779 |
N_line = N_next; |
|
780 |
}
|
|
781 |
break; |
|
782 |
//right
|
|
783 |
case 2: //the same but now with X1 and first word and following last word |
|
784 |
N_line = N_cur->children; |
|
785 |
while (N_line){ |
|
786 |
N_next = N_line->next; |
|
787 |
xmlUnlinkNode(N_line); |
|
788 |
xmlAddChild(N_p,N_line); |
|
789 |
xmlSetProp(N_line, BAD_CAST "alignment", BAD_CAST "2"); |
|
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
790 |
if (N_next && xmlStrcasecmp(N_next->name,BAD_CAST "line") == 0){ |
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
791 |
//fprintf(stderr,"width_next=%f, X2_bl=%f, X2_w=%f\n",xmlXPathCastStringToNumber(xmlGetProp(N_next->children->children,BAD_CAST "width")),xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "width")),xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "width")));
|
792 |
if (xmlXPathCastStringToNumber(xmlGetProp(N_next->children->children,BAD_CAST "width")) < (xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "width")) - xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "width")))){ |
|
793 |
N_p = xmlNewNode(NULL, BAD_CAST "chunk"); |
|
794 |
xmlAddPrevSibling(N_cur,N_p); |
|
795 |
}
|
|
796 |
}
|
|
797 |
N_line = N_next; |
|
798 |
}
|
|
799 |
break; |
|
800 |
//centered
|
|
801 |
case 3: //the combined left and right space is more than the following first word |
|
802 |
N_line = N_cur->children; |
|
803 |
while (N_line){ |
|
804 |
N_next = N_line->next; |
|
805 |
xmlUnlinkNode(N_line); |
|
806 |
xmlAddChild(N_p,N_line); |
|
807 |
xmlSetProp(N_line, BAD_CAST "alignment", BAD_CAST "3"); |
|
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
808 |
if (N_next && xmlStrcasecmp(N_next->name,BAD_CAST "line") == 0){ |
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
809 |
//fprintf(stderr,"width_next=%f, X2_bl=%f, X2_w=%f\n",xmlXPathCastStringToNumber(xmlGetProp(N_next->children->children,BAD_CAST "width")),xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "width")),xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "width")));
|
810 |
if (xmlXPathCastStringToNumber(xmlGetProp(N_next->children->children,BAD_CAST "width")) < (xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "width")) - xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "width")))){ |
|
811 |
N_p = xmlNewNode(NULL, BAD_CAST "chunk"); |
|
812 |
xmlAddPrevSibling(N_cur,N_p); |
|
813 |
}
|
|
814 |
}
|
|
815 |
N_line = N_next; |
|
816 |
}
|
|
817 |
break; |
|
818 |
//justified
|
|
819 |
case 4: |
|
820 |
//we break on all alignment=1 lines. A line with alignment=1 that is the first of a block will
|
|
821 |
//also initiate a paragraph break before.
|
|
822 |
N_line = N_cur->children; |
|
823 |
if (xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "alignment")) == 1){ |
|
824 |
N_p = xmlNewNode(NULL, BAD_CAST "chunk"); |
|
825 |
xmlAddPrevSibling(N_cur,N_p); |
|
826 |
}
|
|
827 |
while (N_line){ |
|
828 |
N_next = N_line->next; |
|
829 |
xmlUnlinkNode(N_line); |
|
830 |
xmlAddChild(N_p,N_line); |
|
831 |
if (xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "alignment")) == 1){ |
|
832 |
N_p = xmlNewNode(NULL, BAD_CAST "chunk"); |
|
833 |
xmlAddPrevSibling(N_cur,N_p); |
|
834 |
}
|
|
835 |
xmlSetProp(N_line, BAD_CAST "alignment", BAD_CAST "4"); |
|
836 |
N_line = N_next; |
|
837 |
}
|
|
838 |
break; |
|
839 |
}
|
|
840 |
}
|
|
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
841 |
else if (xmlStrcasecmp(N_cur->name,BAD_CAST "colset") == 0 || xmlStrcasecmp(N_cur->name,BAD_CAST "column") == 0){ |
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
842 |
N_parent = N_cur; |
843 |
N_cur = N_cur->children; |
|
844 |
lvl++; |
|
845 |
N_p = xmlNewNode(NULL, BAD_CAST "chunk"); |
|
846 |
xmlAddPrevSibling(N_cur,N_p); |
|
847 |
continue; |
|
848 |
}
|
|
849 |
if (N_cur->next) |
|
850 |
N_cur = N_cur->next; |
|
851 |
else while (lvl > 0){ |
|
852 |
N_cur = N_parent; |
|
853 |
N_parent = N_cur->parent; |
|
854 |
lvl--; |
|
855 |
if (N_cur->next){ |
|
856 |
N_cur = N_cur->next; |
|
857 |
break; |
|
858 |
}
|
|
859 |
}
|
|
860 |
if (lvl==0) |
|
861 |
N_cur = NULL; |
|
862 |
}
|
|
863 |
}
|
|
864 |
||
865 |
//function that adds an 'alignment=' property to the <chunk>s
|
|
866 |
void ABWOutputDev::addAlignment(xmlNodePtr N_parent) { |
|
867 |
xmlNodePtr N_chunk, N_line; |
|
868 |
double tX1, tX2; |
|
869 |
bool leftMatch, rightMatch, centerMatch; |
|
870 |
int leftCnt = 0, rightCnt = 0, cntrCnt = 0, justCnt = 0; |
|
871 |
//fprintf(stderr,"Entering addAlignment\n");
|
|
872 |
for (N_chunk = N_parent->children; N_chunk; N_chunk = N_chunk->next) { |
|
873 |
if (xmlStrcasecmp(N_chunk->name,BAD_CAST "chunk") == 0){ |
|
874 |
X1 = xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X1")); |
|
875 |
X2 = xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X2")); |
|
876 |
//fprintf(stderr,"Found chunk\n");
|
|
877 |
//if the chunk contains only one line, we don't need to loop through it.
|
|
878 |
if (xmlLsCountNode(N_chunk) == 1){ |
|
879 |
//fprintf(stderr,"Processing line\n");
|
|
880 |
//fprintf(stderr,"X1=%f, X2=%f, cX1=%f, cX2=%f\n",X1,X2,xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X1")), xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X2")));
|
|
881 |
//fprintf(stderr,"%f\n",(xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X1")) - X1)-(X2 - xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X2"))));
|
|
882 |
//fprintf(stderr,"cX1-X1=%f, X2-cX2=%f\n",(xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X1")) - X1),(X2 - xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X2"))));
|
|
883 |
// a one line chunk, is either centered or left or right-aligned.
|
|
884 |
if ((xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X1"))-X1)-(X2-xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X2"))) > 1) { |
|
885 |
xmlNewProp(N_chunk, BAD_CAST "alignment", BAD_CAST "2"); |
|
886 |
xmlNewProp(N_chunk->children, BAD_CAST "alignment", BAD_CAST "2"); |
|
887 |
//fprintf(stderr,"alignment = right\n");
|
|
888 |
}
|
|
889 |
else { |
|
890 |
if ((xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X1"))-X1)-(X2 - xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X2")))< -1) { |
|
891 |
xmlNewProp(N_chunk, BAD_CAST "alignment", BAD_CAST "1"); |
|
892 |
xmlNewProp(N_chunk->children, BAD_CAST "alignment", BAD_CAST "1"); |
|
893 |
//fprintf(stderr,"alignment = left\n");
|
|
894 |
}
|
|
895 |
else { |
|
896 |
xmlNewProp(N_chunk, BAD_CAST "alignment", BAD_CAST "3"); |
|
897 |
xmlNewProp(N_chunk->children, BAD_CAST "alignment", BAD_CAST "3"); |
|
898 |
//fprintf(stderr,"alignment = center\n");
|
|
899 |
}
|
|
900 |
}
|
|
901 |
}
|
|
902 |
else { |
|
903 |
leftCnt = 0; |
|
904 |
rightCnt = 0; |
|
905 |
cntrCnt = 0; |
|
906 |
justCnt = 0; |
|
907 |
for (N_line = N_chunk->children; N_line; N_line = N_line->next) { |
|
908 |
//fprintf(stderr,"Processing line\n");
|
|
909 |
/*
|
|
910 |
|X1 - cX1| == 1
|
|
911 |
|X2 - cX2| == 1
|
|
912 |
|(cX1-X1)-(X2-cX2)| == 1
|
|
913 |
ok, each line can be just as wide as the current set,
|
|
914 |
it can be smaller and moved to the right
|
|
915 |
it can be smaller and moved to the left.
|
|
916 |
it can
|
|
917 |
*/
|
|
918 |
//fprintf(stderr,"X1=%f, X2=%f, cX1=%f, cX2=%f\n",X1,X2,xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "X1")), xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "X2")));
|
|
919 |
//fprintf(stderr,"cX1-X1=%f, X2-cX2=%f\n",(xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "X1")) - X1),(X2 - xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "X2"))));
|
|
920 |
leftMatch = fabs(xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "X1"))-X1) < 2; |
|
921 |
rightMatch = fabs(X2-xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "X2"))) < 2; |
|
922 |
centerMatch = fabs((xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "X1"))-X1)-(X2-xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "X2")))) < 2; |
|
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
923 |
if (leftMatch && rightMatch) { |
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
924 |
xmlNewProp(N_line, BAD_CAST "alignment", BAD_CAST "4"); |
925 |
justCnt++; |
|
926 |
}
|
|
927 |
else if (centerMatch) { |
|
928 |
xmlNewProp(N_line, BAD_CAST "alignment", BAD_CAST "3"); |
|
929 |
cntrCnt++; |
|
930 |
}
|
|
931 |
else if (rightMatch) { |
|
932 |
xmlNewProp(N_line, BAD_CAST "alignment", BAD_CAST "2"); |
|
933 |
rightCnt++; |
|
934 |
}
|
|
935 |
else { |
|
936 |
xmlNewProp(N_line, BAD_CAST "alignment", BAD_CAST "1"); |
|
937 |
leftCnt++; |
|
938 |
}
|
|
939 |
}
|
|
940 |
//there is almost always one justified line in a centered text
|
|
941 |
//and most justified blocks have at least one left aligned line
|
|
942 |
//fprintf(stderr,"1:%d ,2:%d ,3:%d ,4:%d\n",leftCnt,justCnt,cntrCnt,rightCnt);
|
|
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
943 |
if ((leftCnt-1 >= justCnt) && (leftCnt >= rightCnt) && (leftCnt >= cntrCnt)) |
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
944 |
xmlNewProp(N_chunk, BAD_CAST "alignment", BAD_CAST "1"); |
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
945 |
else if ((justCnt >= leftCnt-1) && (justCnt >= rightCnt) && (justCnt >= cntrCnt)) |
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
946 |
xmlNewProp(N_chunk, BAD_CAST "alignment", BAD_CAST "4"); |
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
947 |
else if ((cntrCnt >= justCnt-1) && (cntrCnt >= rightCnt) && (cntrCnt >= leftCnt)) |
1.1.7
by Sebastien Bacher
Import upstream version 0.5.9 |
948 |
xmlNewProp(N_chunk, BAD_CAST "alignment", BAD_CAST "3"); |
949 |
else
|
|
950 |
xmlNewProp(N_chunk, BAD_CAST "alignment", BAD_CAST "2"); |
|
951 |
}
|
|
952 |
}
|
|
953 |
else { |
|
954 |
if (xmlStrcasecmp(N_chunk->name,BAD_CAST "colset") == 0){ |
|
955 |
//fprintf(stderr,"Found a colset\n");
|
|
956 |
addAlignment(N_chunk); |
|
957 |
}
|
|
958 |
else { |
|
959 |
if (xmlStrcasecmp(N_chunk->name,BAD_CAST "column") == 0){ |
|
960 |
//fprintf(stderr,"Found a column\n");
|
|
961 |
tX1 = X1; |
|
962 |
tX2 = X2; |
|
963 |
X1 = xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X1")); |
|
964 |
X2 = xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X2")); |
|
965 |
addAlignment(N_chunk); |
|
966 |
X1 = tX1; |
|
967 |
X2 = tX2; |
|
968 |
}
|
|
969 |
else { //fprintf(stderr,"Found something else\n"); |
|
970 |
}
|
|
971 |
}
|
|
972 |
}
|
|
973 |
}
|
|
974 |
//parse all blocks, and all lines within all blocks
|
|
975 |
//do a set of checks and tick a flag if the check fails
|
|
976 |
//check for line X1 is textBlock X1
|
|
977 |
//check for line X2 is textblock X2
|
|
978 |
//check if line is centered in textBock (LX1 != TX1 && LX2 != TX2 && LX1-TX1 == TX2=LX2)
|
|
979 |
//if the LX1 != TX1 then how much is the difference?
|
|
980 |
//a line isn't left aligned if all lines have a different X1 <= not so strong assumption.
|
|
981 |
||
982 |
//justified if both are straight except for a couple of (same factor sized) indents at the left
|
|
983 |
//else centered if above calculation is correct
|
|
984 |
//else left aligned if left side is more straight than right (more lines in the same X1 or common factor
|
|
985 |
//else right
|
|
986 |
}
|
|
987 |
||
988 |
void ABWOutputDev::setPDFDoc(PDFDoc *priv_pdfdoc) { |
|
989 |
pdfdoc = priv_pdfdoc; |
|
990 |
}
|
|
991 |
||
992 |
void ABWOutputDev::createABW() { |
|
993 |
//*************************************************************
|
|
994 |
//change styles to abiword format
|
|
995 |
xmlNodePtr N_cur, N_next; |
|
996 |
xmlAttrPtr N_prop; |
|
997 |
char buf[500]; |
|
998 |
for (N_cur = N_styleset->children; N_cur; N_cur = N_cur->next){ |
|
999 |
sprintf(buf,"margin-top:0pt; color:000000; margin-left:0pt; text-position:normal; widows:2; text-indent:0in; font-variant:normal; margin-right:0pt; lang:nl-NL; line-height:1.0; font-size:%dpt; text-decoration:none; margin-bottom:0pt; bgcolor:transparent; text-align:left; font-stretch:normal;",int(xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "size")))); |
|
1000 |
strncat(buf,"font-family:",12); |
|
1001 |
strncat(buf,(char *)xmlGetProp(N_cur,BAD_CAST "font"),strlen((char *)xmlGetProp(N_cur,BAD_CAST "font"))); |
|
1002 |
strncat(buf,";",1); |
|
1003 |
strncat(buf,"font-weight:",12); |
|
1004 |
strncat(buf,(char *)xmlGetProp(N_cur,BAD_CAST "bold"),strlen((char *)xmlGetProp(N_cur,BAD_CAST "bold"))); |
|
1005 |
strncat(buf,"font-style:",12); |
|
1006 |
strncat(buf,(char *)xmlGetProp(N_cur,BAD_CAST "italic"),strlen((char *)xmlGetProp(N_cur,BAD_CAST "italic"))); |
|
1007 |
xmlSetProp(N_cur, BAD_CAST "props", BAD_CAST buf); |
|
1008 |
N_prop = xmlHasProp(N_cur, BAD_CAST "id"); |
|
1009 |
if (N_prop != NULL) xmlRemoveProp(N_prop); |
|
1010 |
N_prop = xmlHasProp(N_cur, BAD_CAST "size"); |
|
1011 |
if (N_prop != NULL) xmlRemoveProp(N_prop); |
|
1012 |
N_prop = xmlHasProp(N_cur, BAD_CAST "bold"); |
|
1013 |
if (N_prop != NULL) xmlRemoveProp(N_prop); |
|
1014 |
N_prop = xmlHasProp(N_cur, BAD_CAST "italic"); |
|
1015 |
if (N_prop != NULL) xmlRemoveProp(N_prop); |
|
1016 |
N_prop = xmlHasProp(N_cur, BAD_CAST "font"); |
|
1017 |
if (N_prop != NULL) xmlRemoveProp(N_prop); |
|
1018 |
}
|
|
1019 |
//*************************************************************
|
|
1020 |
//Change the rest of the document
|
|
1021 |
//each child of N_content is a page
|
|
1022 |
N_cur = N_content->children; |
|
1023 |
while (N_cur){ |
|
1024 |
//we creat a section node and attach it to the root, it will com after all
|
|
1025 |
//the page nodes. Then we transform the page, and finally remove it
|
|
1026 |
N_next = N_cur->next; |
|
1027 |
//fprintf(stderr,"***Transforming page\n");
|
|
1028 |
N_Block = xmlNewChild(N_root, NULL, BAD_CAST "section", NULL); |
|
1029 |
transformPage(N_cur); |
|
1030 |
xmlUnlinkNode(N_cur); |
|
1031 |
//fprintf(stderr,"***Finished transforming page\n");
|
|
1032 |
N_cur = N_next; |
|
1033 |
}
|
|
1034 |
cleanUpNode(N_root, false); |
|
1035 |
}
|
|
1036 |
||
1037 |
void ABWOutputDev::transformPage(xmlNodePtr N_parent){ |
|
1038 |
char buf[60]; |
|
1039 |
xmlNodePtr N_cur, N_curLine, N_curText, N_curWord, text, space; |
|
1040 |
//translate the nodes into abiword nodes
|
|
1041 |
if (xmlStrcasecmp(N_parent->name,BAD_CAST "page") == 0){ |
|
1042 |
for (N_cur = N_parent->children; N_cur; N_cur = N_cur->next){ |
|
1043 |
//fprintf(stderr,"**pass a page child\n");
|
|
1044 |
transformPage(N_cur); |
|
1045 |
}
|
|
1046 |
}
|
|
1047 |
if (xmlStrcasecmp(N_parent->name,BAD_CAST "chunk") == 0){ |
|
1048 |
//fprintf(stderr,"Found a chunk\n");
|
|
1049 |
//I start a <p> on each chunk and add all word containment
|
|
1050 |
N_text = xmlNewChild(N_Block, NULL, BAD_CAST "p", NULL); |
|
1051 |
if (int(xmlXPathCastStringToNumber(xmlGetProp(N_parent,BAD_CAST "style"))) > 0){ |
|
1052 |
xmlNewProp(N_text, BAD_CAST "style", xmlGetProp(N_parent,BAD_CAST "style")); |
|
1053 |
}
|
|
1054 |
switch (int(xmlXPathCastStringToNumber(xmlGetProp(N_parent,BAD_CAST "alignment")))){ |
|
1055 |
case 1: xmlNewProp(N_text, BAD_CAST "props", BAD_CAST "text-align:left"); |
|
1056 |
break; |
|
1057 |
case 2: xmlNewProp(N_text, BAD_CAST "props", BAD_CAST "text-align:right"); |
|
1058 |
break; |
|
1059 |
case 3: xmlNewProp(N_text, BAD_CAST "props", BAD_CAST "text-align:center"); |
|
1060 |
break; |
|
1061 |
case 4: xmlNewProp(N_text, BAD_CAST "props", BAD_CAST "text-align:justify"); |
|
1062 |
break; |
|
1063 |
}
|
|
1064 |
for (N_curLine = N_parent->children; N_curLine; N_curLine = N_curLine->next){ |
|
1065 |
//fprintf(stderr,"A line\n");
|
|
1066 |
for (N_curText = N_curLine->children; N_curText; N_curText = N_curText->next){ |
|
1067 |
//fprintf(stderr,"a textNode\n");
|
|
1068 |
for (N_curWord = N_curText->children; N_curWord; N_curWord = N_curWord->next){ |
|
1069 |
//fprintf(stderr,"a word\n");
|
|
1070 |
text = N_curWord->children; |
|
1071 |
xmlUnlinkNode(text); |
|
1072 |
xmlAddChild(N_text,text); |
|
1073 |
space = xmlNewText(BAD_CAST " "); |
|
1074 |
xmlAddChild(N_text,space); |
|
1075 |
}
|
|
1076 |
}
|
|
1077 |
}
|
|
1078 |
}
|
|
1079 |
if (xmlStrcasecmp(N_parent->name,BAD_CAST "column") == 0){ |
|
1080 |
//fprintf(stderr,"Found a column\n");
|
|
1081 |
for (N_cur = N_parent->children; N_cur; N_cur = N_cur->next){ |
|
1082 |
transformPage(N_cur); |
|
1083 |
}
|
|
1084 |
xmlNewChild(N_text, NULL, BAD_CAST "cbr", NULL); |
|
1085 |
}
|
|
1086 |
if (xmlStrcasecmp(N_parent->name,BAD_CAST "colset") == 0){ |
|
1087 |
//fprintf(stderr,"Found a colset\n");
|
|
1088 |
//create new section columns: count childNodes of N_cur
|
|
1089 |
//recurse through chunks and create textNodes
|
|
1090 |
N_Block = xmlNewChild(N_root, NULL, BAD_CAST "section", NULL); |
|
1091 |
sprintf(buf,"columns:%d",xmlLsCountNode(N_parent)); |
|
1092 |
xmlNewProp(N_Block, BAD_CAST "props", BAD_CAST buf); |
|
1093 |
for (N_cur = N_parent->children; N_cur; N_cur = N_cur->next){ |
|
1094 |
transformPage(N_cur); |
|
1095 |
}
|
|
1096 |
N_Block = xmlNewChild(N_root, NULL, BAD_CAST "section", NULL); |
|
1097 |
}
|
|
1098 |
//fprintf(stderr,"at the end\n");
|
|
1099 |
}
|
|
1.1.8
by Martin Pitt
Import upstream version 0.5.91 |
1100 |
|
1101 |
//Count nodes, copied from debugxml.c from libxml
|
|
1102 |
// libxml copyright file below
|
|
1103 |
/*
|
|
1104 |
Except where otherwise noted in the source code (e.g. the files hash.c,
|
|
1105 |
list.c and the trio files, which are covered by a similar licence but
|
|
1106 |
with different Copyright notices) all the files are:
|
|
1107 |
||
1108 |
Copyright (C) 1998-2003 Daniel Veillard. All Rights Reserved.
|
|
1109 |
||
1110 |
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
1111 |
of this software and associated documentation files (the "Software"), to deal
|
|
1112 |
in the Software without restriction, including without limitation the rights
|
|
1113 |
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
1114 |
copies of the Software, and to permit persons to whom the Software is fur-
|
|
1115 |
nished to do so, subject to the following conditions:
|
|
1116 |
||
1117 |
The above copyright notice and this permission notice shall be included in
|
|
1118 |
all copies or substantial portions of the Software.
|
|
1119 |
||
1120 |
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
1121 |
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FIT-
|
|
1122 |
NESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
1123 |
DANIEL VEILLARD BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
|
1124 |
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CON-
|
|
1125 |
NECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
1126 |
||
1127 |
Except as contained in this notice, the name of Daniel Veillard shall not
|
|
1128 |
be used in advertising or otherwise to promote the sale, use or other deal-
|
|
1129 |
ings in this Software without prior written authorization from him.
|
|
1130 |
*/
|
|
1131 |
int ABWOutputDev::xmlLsCountNode(xmlNodePtr node) { |
|
1132 |
int ret = 0; |
|
1133 |
xmlNodePtr list = NULL; |
|
1134 |
||
1135 |
if (node == NULL) |
|
1136 |
return(0); |
|
1137 |
||
1138 |
switch (node->type) { |
|
1139 |
case XML_ELEMENT_NODE: |
|
1140 |
list = node->children; |
|
1141 |
break; |
|
1142 |
case XML_DOCUMENT_NODE: |
|
1143 |
case XML_HTML_DOCUMENT_NODE: |
|
1144 |
#ifdef LIBXML_DOCB_ENABLED
|
|
1145 |
case XML_DOCB_DOCUMENT_NODE: |
|
1146 |
#endif
|
|
1147 |
list = ((xmlDocPtr) node)->children; |
|
1148 |
break; |
|
1149 |
case XML_ATTRIBUTE_NODE: |
|
1150 |
list = ((xmlAttrPtr) node)->children; |
|
1151 |
break; |
|
1152 |
case XML_TEXT_NODE: |
|
1153 |
case XML_CDATA_SECTION_NODE: |
|
1154 |
case XML_PI_NODE: |
|
1155 |
case XML_COMMENT_NODE: |
|
1156 |
if (node->content != NULL) { |
|
1157 |
ret = xmlStrlen(node->content); |
|
1158 |
}
|
|
1159 |
break; |
|
1160 |
case XML_ENTITY_REF_NODE: |
|
1161 |
case XML_DOCUMENT_TYPE_NODE: |
|
1162 |
case XML_ENTITY_NODE: |
|
1163 |
case XML_DOCUMENT_FRAG_NODE: |
|
1164 |
case XML_NOTATION_NODE: |
|
1165 |
case XML_DTD_NODE: |
|
1166 |
case XML_ELEMENT_DECL: |
|
1167 |
case XML_ATTRIBUTE_DECL: |
|
1168 |
case XML_ENTITY_DECL: |
|
1169 |
case XML_NAMESPACE_DECL: |
|
1170 |
case XML_XINCLUDE_START: |
|
1171 |
case XML_XINCLUDE_END: |
|
1172 |
ret = 1; |
|
1173 |
break; |
|
1174 |
}
|
|
1175 |
for (;list != NULL;ret++) |
|
1176 |
list = list->next; |
|
1177 |
return(ret); |
|
1178 |
}
|