1
type separator = Space | Comma
3
let separator_to_string = function
11
| AStr of aname * string
12
| AStrL of separator * aname * string list
13
type attribs = attrib list
15
let attrib_to_string encode = function
16
| AInt (name, i) -> name ^ "=\"" ^ string_of_int i ^ "\""
17
| AStr (name, s) -> name ^ "=\"" ^ encode s ^ "\""
18
| AStrL (sep, name, slist) ->
19
name ^ "=\"" ^ encode (String.concat (separator_to_string sep) slist) ^ "\""
25
(* I add, for the syntax xml extension: *)
26
| Whitespace of string
27
| Element of ename * attrib list * elt list
28
| BlockElement of ename * attrib list * elt list
29
| SemiBlockElement of ename * attrib list * elt list
30
(* Element is a Node that is not a BlockElement nor a SemiBlockElement *)
31
(* Pretty-printing of Element/ BlockElement/ SemiBlockElement is faster *)
35
| Leaf of ename * attrib list
36
| Node of ename * attrib list * elt list
45
type tag = [ `Br | `Span | `Bdo | `Map | `Object | `Img | `Tt | `I | `B | `Big
46
| `Small | `Em | `Strong | `Dfn | `Code | `Q | `Samp | `Kbd | `Var
47
| `Cite | `Abbr | `Acronym | `Sub | `Sup | `Input | `Select
48
| `Textarea | `Label | `Button | `Ins | `Del | `Script | `Noscript
49
| `A | `H1 | `H2 | `H3 | `H4 | `H5 | `H6 | `Ul | `Ol | `Dl | `Pre
50
| `Hr | `Blockquote | `Address | `P | `Div | `Fieldset | `Table
51
| `Form | `Html | `Head | `Body | `Title | `Base | `Style | `Meta
52
| `Link | `Li | `Dt | `Dd | `Param | `Area | `Optgroup | `Option
53
| `Legend | `Caption | `Thead | `Tfoot | `Tbody | `Colgroup
54
| `Col | `Tr | `Th | `Td]
57
let blocktags = [ "fieldset"; "form"; "address"; "body"; "head"; "blockquote"; "div"; "html"; "h1"; "h2"; "h3"; "h4"; "h5"; "h6"; "p"; "dd"; "dl"; "li"; "ol"; "ul"; "colgroup"; "table"; "tbody"; "tfoot"; "thead"; "td"; "th"; "tr" ]
59
let semiblocktags = [ "pre"; "style"; "title" ]
61
type xhtml = [ `Html ]
62
type xhform = [ `Form ]
65
type xhheadlink = [ `Link ]
66
type xhscript = [ `Script ]
67
type xhinput = [ `Input ]
68
type xhtextarea = [ `Textarea ]
70
type pcdata = [ `PCDATA ]
74
type xhhtmlcont = [ `Body | `Head ]
76
type xhbodycont = [ `Address | `Blockquote | `Del | `Div | `Dl | `Fieldset
77
| `Form | `H1 | `H2 | `H3 | `H4 | `H5 | `H6 | `Hr | `Ins | `Noscript | `Ol
78
| `P | `Pre | `Script | `Table | `Ul ]
81
[ `A | `Abbr | `Acronym | `Address | `B | `Bdo | `Big | `Blockquote | `Br | `Button | `Cite | `Code | `Del | `Dfn | `Div | `Dl | `Em | `Fieldset | `Form | `H1 | `H2 | `H3 | `H4 | `H5 | `H6 | `Hr | `I | `Img | `Input | `Ins | `Kbd | `Label | `Map | `Noscript | `Object | `Ol | `P | `PCDATA | `Pre | `Q | `Samp | `Script | `Select | `Small | `Span | `Strong | `Sub | `Sup | `Table | `Textarea | `Tt | `Ul | `Var ]
84
[ `A | `Abbr | `Acronym | `Address | `B | `Bdo | `Big | `Blockquote | `Br | `Button | `Cite | `Code | `Del | `Dfn | `Div | `Dl | `Em | `Fieldset | `Form | `H1 | `H2 | `H3 | `H4 | `H5 | `H6 | `Hr | `I | `Img | `Input | `Ins | `Kbd | `Label | `Map | `Noscript | `Object | `Ol | `P | `Param | `PCDATA | `Pre | `Q | `Samp | `Script | `Select | `Small | `Span | `Strong | `Sub | `Sup | `Table | `Textarea | `Tt | `Ul | `Var ]
87
[ `A | `Abbr | `Acronym | `Address | `B | `Bdo | `Big | `Blockquote | `Br | `Button | `Cite | `Code | `Del | `Dfn | `Div | `Dl | `Em | `Fieldset | `Form | `H1 | `H2 | `H3 | `H4 | `H5 | `H6 | `Hr | `I | `Img | `Input | `Ins | `Kbd | `Label | `Legend | `Map | `Noscript | `Object | `Ol | `P | `PCDATA | `Pre | `Q | `Samp | `Script | `Select | `Small | `Span | `Strong | `Sub | `Sup | `Table | `Textarea | `Tt | `Ul | `Var ]
90
[ `Abbr | `Acronym | `Address | `B | `Bdo | `Big | `Blockquote | `Br | `Cite | `Code | `Del | `Dfn | `Div | `Dl | `Em | `H1 | `H2 | `H3 | `H4 | `H5 | `H6 | `Hr | `I | `Img | `Ins | `Kbd | `Map | `Noscript | `Object | `Ol | `P | `PCDATA | `Pre | `Q | `Samp | `Script | `Small | `Span | `Strong | `Sub | `Sup | `Table | `Tt | `Ul | `Var ]
93
[ `Base | `Link | `Object | `Script | `Style | `Title ]
96
[ `Address | `Blockquote | `Del | `Div | `Dl | `Fieldset | `H1 | `H2 | `H3 | `H4 | `H5 | `H6 | `Hr | `Ins | `Noscript | `Ol | `P | `Pre | `Script | `Table | `Ul ]
98
type xhblockquotecont =
99
[ `Address | `Blockquote | `Del | `Div | `Dl | `Fieldset | `Form | `H1 | `H2 | `H3 | `H4 | `H5 | `H6 | `Hr | `Ins | `Noscript | `Ol | `P | `Pre | `Script | `Table | `Ul ]
102
[ `Address | `Area | `Blockquote | `Del | `Div | `Dl | `Fieldset | `Form | `H1 | `H2 | `H3 | `H4 | `H5 | `H6 | `Hr | `Ins | `Noscript | `Ol | `P | `Pre | `Script | `Table | `Ul ]
105
[ `A | `Abbr | `Acronym | `B | `Bdo | `Big | `Br | `Button | `Cite | `Code | `Del | `Dfn | `Em | `I | `Img | `Input | `Ins | `Kbd | `Label | `Map | `Object | `PCDATA | `Q | `Samp | `Script | `Select | `Small | `Span | `Strong | `Sub | `Sup | `Textarea | `Tt | `Var ]
108
[ `A | `Abbr | `Acronym | `B | `Bdo | `Big | `Br | `Button | `Cite | `Code | `Del | `Dfn | `Em | `I | `Img | `Input | `Ins | `Kbd | `Map | `Object | `PCDATA | `Q | `Samp | `Script | `Select | `Small | `Span | `Strong | `Sub | `Sup | `Textarea | `Tt | `Var ]
111
[ `Abbr | `Acronym | `B | `Bdo | `Big | `Br | `Button | `Cite | `Code | `Del | `Dfn | `Em | `I | `Img | `Input | `Ins | `Kbd | `Label | `Map | `Object | `PCDATA | `Q | `Samp | `Script | `Select | `Small | `Span | `Strong | `Sub | `Sup | `Textarea | `Tt | `Var ]
114
[ `A | `Abbr | `Acronym | `B | `Bdo | `Br | `Cite | `Code | `Dfn | `Em | `I | `Kbd | `Map | `PCDATA | `Q | `Samp | `Script | `Span | `Strong | `Tt | `Var ]
119
type xhoptgroupcont = [ `Option ]
121
type xhcolgroupcont = [ `Col ]
123
type xhulcont = [ `Li ]
125
type xhselectcont = [ `Optgroup | `Option ]
127
type xhtbodycont = [ `Tr ]
130
[ `Caption | `Col | `Colgroup | `Tbody | `Tfoot | `Thead | `Tr ]
132
type xhtrcont = [ `Td | `Th ]
134
type xhabbrcont = xhinlinecont
135
type xhacronymcont = xhinlinecont
136
type xhaddresscont = xhinlinecont
137
type xhbcont = xhinlinecont
138
type xhbdocont = xhinlinecont
139
type xhbigcont = xhinlinecont
140
type xhcaptioncont = xhinlinecont
141
type xhcitecont = xhinlinecont
142
type xhcodecont = xhinlinecont
143
type xhdfncont = xhinlinecont
144
type xhdtcont = xhinlinecont
145
type xhemcont = xhinlinecont
146
type xhh1cont = xhinlinecont
147
type xhh2cont = xhinlinecont
148
type xhh3cont = xhinlinecont
149
type xhh4cont = xhinlinecont
150
type xhh5cont = xhinlinecont
151
type xhh6cont = xhinlinecont
152
type xhicont = xhinlinecont
153
type xhkbdcont = xhinlinecont
154
type xhlegendcont = xhinlinecont
155
type xhpcont = xhinlinecont
156
type xhqcont = xhinlinecont
157
type xhsampcont = xhinlinecont
158
type xhsmallcont = xhinlinecont
159
type xhspancont = xhinlinecont
160
type xhstrongcont = xhinlinecont
161
type xhsubcont = xhinlinecont
162
type xhsupcont = xhinlinecont
163
type xhttcont = xhinlinecont
164
type xhvarcont = xhinlinecont
166
type xhddcont = xhdivcont
167
type xhdelcont = xhdivcont
168
(* type xhdivcont = xhdivcont *)
169
type xhinscont = xhdivcont
170
type xhlicont = xhdivcont
171
type xhthcont = xhdivcont
172
type xhtdcont = xhdivcont
174
(* type xhtbodycont = xhbodycont *)
175
type xhnoscriptcont = xhbodycont
177
type xhareacont = xhnotag
178
type xhbasecont = xhnotag
179
type xhbrcont = xhnotag
180
type xhcolcont = xhnotag
181
type xhhrcont = xhnotag
182
type xhimgcont = xhnotag
183
type xhinputcont = xhnotag
184
type xhmetacont = xhnotag
185
type xhparamcont = xhnotag
189
type xhobjectcont = xhobjectcont
190
type xhfieldsetcont = xhfieldsetcont
191
type xhheadcont = xhheadcont
192
type xhformcont = xhformcont
193
type xhmapcont = xhmapcont
194
type xhlabelcont = xhlabelcont
195
type xhacont = xhacont
196
type xhprecont = xhprecont
197
type xhdlcont = xhdlcont
198
type xhoptgroupcont = xhoptgroupcont
199
type xhcolgroupcont = xhcolgroupcont
200
type xhulcont = xhulcont
201
type xhselectcont = xhselectcont
202
type xhtablecont = xhtablecont
203
type xhtrcont = xhtrcont
204
type xhbuttoncont = xhbuttoncont
205
type xhblockquotecont = xhblockquotecont
208
type xhlinkcont = pcdata
209
type xhoptioncont = pcdata
210
type xhscriptcont = pcdata
211
type xhstylecont = pcdata
212
type xhtextareacont = pcdata
213
type xhtitlecont = pcdata
215
type xholcont = xhulcont
216
type xhtheadcont = xhtbodycont
217
type xhtfootcont = xhtbodycont
224
let xh_string = str_formatter
227
let xh_topxml = "<?xml version=\"1.0\" encoding=\"iso-8859-1\"?>"
228
let xh_topdoctype = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">"
232
let xh_print ?(encode = fun x -> x) arbre =
234
let rec xh_print_attrs attrs = match attrs with
237
pp_print_string xh_string (" "^(attrib_to_string encode attr));
240
and xh_print_text texte i is_first =
241
pp_print_string xh_string texte
243
and xh_print_closedtag tag attrs i is_first = pp_open_tbox xh_string ();
244
if (i > 0) || is_first then
245
pp_force_newline xh_string ();
246
if ((i > 0) || is_first) then
247
pp_print_tbreak xh_string (taille_tab*i) 0;
248
pp_print_string xh_string ("<"^tag);
249
xh_print_attrs attrs;
250
pp_print_string xh_string "/>";
251
pp_close_tbox xh_string ();
253
and xh_print_inlinetag tag attrs taglist i is_first =
254
pp_print_string xh_string ("<"^tag);
255
xh_print_attrs attrs;
256
pp_print_string xh_string ">";
257
xh_print_taglist taglist 0 false false;
258
pp_print_string xh_string ("</"^tag^">")
260
and xh_print_blocktag tag attrs taglist i =
262
then xh_print_closedtag tag attrs i true
264
pp_open_tbox xh_string ();
265
pp_force_newline xh_string ();
267
pp_print_tbreak xh_string (taille_tab*i) 0;
268
pp_print_string xh_string ("<"^tag);
269
xh_print_attrs attrs;
270
pp_print_string xh_string ">";
272
xh_print_taglist_removews taglist (i+1) true;
274
pp_force_newline xh_string ();
276
pp_print_tbreak xh_string (taille_tab*i) 0;
277
pp_print_string xh_string ("</"^tag^">");
278
pp_close_tbox xh_string ()
281
and xh_print_semiblocktag tag attrs taglist i =
282
(* New line before and after but not inside, for ex for <pre> *)
284
then xh_print_closedtag tag attrs i true
286
pp_open_tbox xh_string ();
287
pp_force_newline xh_string ();
289
pp_print_tbreak xh_string (taille_tab*i) 0;
290
pp_print_string xh_string ("<"^tag);
291
xh_print_attrs attrs;
292
pp_print_string xh_string ">";
294
xh_print_taglist taglist 0 false false;
296
pp_print_string xh_string ("</"^tag^">");
297
pp_close_tbox xh_string ()
300
and xh_print_taglist_removews taglist i is_first =
302
(Whitespace s)::l -> xh_print_taglist_removews l i is_first
303
| l -> xh_print_taglist l i is_first true
306
and print_nodes ws1 name xh_attrs xh_taglist ws2 queue i is_first removetailingws =
307
if (List.mem name blocktags)
308
then xh_print_blocktag name xh_attrs xh_taglist i
310
(if (List.mem name semiblocktags)
311
then xh_print_semiblocktag name xh_attrs xh_taglist i
313
xh_print_text (encode ws1) i is_first;
314
xh_print_inlinetag name xh_attrs xh_taglist i is_first;
315
xh_print_text (encode ws2) i is_first;
317
xh_print_taglist queue i false removetailingws;
319
and xh_print_taglist taglist i is_first removetailingws = match taglist with
321
[] -> pp_open_tbox xh_string ();
322
pp_close_tbox xh_string ();
324
| (Comment texte)::queue ->
325
xh_print_text ("<!--"^(encode texte)^"-->") i is_first;
326
xh_print_taglist queue i false removetailingws;
328
| (Entity e)::queue ->
329
xh_print_text ("&"^e^";") i is_first; (* no encoding *)
330
xh_print_taglist queue i false removetailingws;
332
| (PCDATA texte)::queue ->
333
xh_print_text (encode texte) i is_first;
334
xh_print_taglist queue i false removetailingws;
336
| (Whitespace _)::(Element ("hr",xh_attrs,[]))::(Whitespace _)::queue
337
| (Element ("hr",xh_attrs,[]))::(Whitespace _)::queue
338
| (Whitespace _)::(Element ("hr",xh_attrs,[]))::queue
339
| (Element ("hr",xh_attrs,[]))::queue ->
340
xh_print_closedtag "hr" xh_attrs i is_first;
341
xh_print_taglist queue i false removetailingws;
343
| (Element (name, xh_attrs, []))::queue ->
344
xh_print_closedtag name xh_attrs i is_first;
345
xh_print_taglist queue i false removetailingws;
347
(* Balises de presentation, type inline *)
348
| (Element (name, xh_attrs, xh_taglist))::queue ->
349
xh_print_inlinetag name xh_attrs xh_taglist i is_first;
350
xh_print_taglist queue i false removetailingws;
352
(* Balises de type block *)
353
| (Whitespace _)::(BlockElement (name,xh_attrs,xh_taglist))::(Whitespace _)::queue
354
| (BlockElement (name,xh_attrs,xh_taglist))::(Whitespace _)::queue
355
| (Whitespace _)::(BlockElement (name,xh_attrs,xh_taglist))::queue
356
| (BlockElement (name,xh_attrs,xh_taglist))::queue ->
357
xh_print_blocktag name xh_attrs xh_taglist i;
358
xh_print_taglist queue i false removetailingws;
360
(* Balises de type "semi block", for ex <pre> *)
361
| (Whitespace _)::(SemiBlockElement (name,xh_attrs,xh_taglist))::(Whitespace _)::queue
362
| (SemiBlockElement (name,xh_attrs,xh_taglist))::(Whitespace _)::queue
363
| (Whitespace _)::(SemiBlockElement (name,xh_attrs,xh_taglist))::queue
364
| (SemiBlockElement (name,xh_attrs,xh_taglist))::queue ->
365
xh_print_semiblocktag name xh_attrs xh_taglist i;
366
xh_print_taglist queue i false removetailingws;
368
(* Nodes and Leafs *)
369
| (Whitespace ws1)::(Node (name,xh_attrs,xh_taglist))::(Whitespace ws2)::queue ->
370
print_nodes ws1 name xh_attrs xh_taglist ws2 queue i is_first removetailingws
372
| (Node (name,xh_attrs,xh_taglist))::(Whitespace ws2)::queue ->
373
print_nodes "" name xh_attrs xh_taglist ws2 queue i is_first removetailingws
375
| (Whitespace ws1)::(Node (name,xh_attrs,xh_taglist))::queue ->
376
print_nodes ws1 name xh_attrs xh_taglist "" queue i is_first removetailingws
378
| (Node (name,xh_attrs,xh_taglist))::queue ->
379
print_nodes "" name xh_attrs xh_taglist "" queue i is_first removetailingws
381
| (Whitespace ws1)::(Leaf (name,xh_attrs))::(Whitespace ws2)::queue ->
382
print_nodes ws1 name xh_attrs [] ws2 queue i is_first removetailingws
384
| (Leaf (name,xh_attrs))::(Whitespace ws2)::queue ->
385
print_nodes "" name xh_attrs [] ws2 queue i is_first removetailingws
387
| (Whitespace ws1)::(Leaf (name,xh_attrs))::queue ->
388
print_nodes ws1 name xh_attrs [] "" queue i is_first removetailingws
390
| (Leaf (name,xh_attrs))::queue ->
391
print_nodes "" name xh_attrs [] "" queue i is_first removetailingws
394
| (Whitespace(texte))::queue ->
395
xh_print_text (encode texte) i is_first;
396
xh_print_taglist queue i false removetailingws
399
xh_print_taglist queue i false removetailingws
404
pp_open_tbox xh_string ();
405
pp_print_string xh_string xh_topxml;
406
pp_force_newline xh_string ();
407
pp_print_string xh_string xh_topdoctype;
408
pp_force_newline xh_string ();
410
xh_print_taglist [arbre] 0 true false;
412
pp_force_newline xh_string ();
413
pp_close_tbox xh_string ();
415
flush_str_formatter ()