2
Author: Francis Giraldeau <francis.giraldeau@usherbrooke.ca>
4
Reference: http://www.w3.org/TR/2006/REC-xml11-20060816/
11
(************************************************************************
13
*************************************************************************)
15
let dels (s:string) = del s s
18
let sep_spc = del /[ \t\n]+/ " "
19
let sep_osp = del /[ \t\n]*/ ""
20
let sep_eq = del /[ \t\n]*=[ \t\n]*/ "="
22
let nmtoken = /[a-zA-Z:_][a-zA-Z0-9:_\.-]*/
23
let word = /[a-zA-Z][a-zA-Z0-9\._\-]*/
25
(* if we hide the quotes, then we can only accept single or double quotes *)
26
(* otherwise a put ambiguity is raised *)
27
let sto_dquote = dels "\"" . store /[^"]*/ . dels "\""
28
let sto_squote = dels "'" . store /[^']*/ . dels "'"
30
let comment = [ label "#comment" .
32
store /([^-]|-[^-])*/ .
35
let pi_target = nmtoken - /[Xx][Mm][Ll]/
36
let empty = Util.empty
37
let del_end = del />[\n]?/ ">\n"
38
let del_end_simple = dels ">"
40
(* This is siplified version of processing instruction
41
* pi has to not start or end with a white space and the string
42
* must not contain "?>". We restrict too much by not allowing any
45
let pi = /[^ \n\t]|[^ \n\t][^?>]*[^ \n\t]/
47
(************************************************************************
49
*************************************************************************)
52
let decl = [ label "#decl" . sep_spc .
53
store /[^> \t\n\r]|[^> \t\n\r][^>\t\n\r]*[^> \t\n\r]/ ]
55
let decl_def (r:regexp) (b:lens) = [ dels "<" . key r .
56
sep_spc . store word .
57
b . sep_osp . del_end_simple ]
59
let elem_def = decl_def /!ELEMENT/ decl
61
let enum = "(" . osp . nmtoken . ( osp . "|" . osp . nmtoken )* . osp . ")"
63
let att_type = /CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS/ |
66
let id_def = [ sep_spc . key /PUBLIC/ .
67
[ label "#literal" . sep_spc . sto_dquote ]* ] |
68
[ sep_spc . key /SYSTEM/ . sep_spc . sto_dquote ]
70
let notation_def = decl_def /!NOTATION/ id_def
72
let att_def = counter "att_id" .
73
[ sep_spc . seq "att_id" .
74
[ label "#name" . store word . sep_spc ] .
75
[ label "#type" . store att_type . sep_spc ] .
76
([ key /#REQUIRED|#IMPLIED/ ] |
77
[ label "#FIXED" . del /#FIXED[ \n\t]*|/ "" . sto_dquote ]) ]*
79
let att_list_def = decl_def /!ATTLIST/ att_def
81
let entity_def = decl_def /!ENTITY/ ([sep_spc . label "#decl" . sto_dquote ])
83
let decl_def_item = elem_def | entity_def | att_list_def | notation_def
85
let decl_outer = sep_osp . del /\[[ \n\t\r]*/ "[\n" .
86
(decl_def_item . sep_osp )* . dels "]"
88
(* let dtd_def = [ sep_spc . key "SYSTEM" . sep_spc . sto_dquote ] *)
90
let doctype = decl_def /!DOCTYPE/ (decl_outer|id_def)
92
let attributes = [ label "#attribute" .
93
[ sep_spc . key nmtoken . sep_eq . sto_dquote ]+ ]
95
let prolog = [ label "#declaration" .
102
(************************************************************************
104
*************************************************************************)
106
(* we consider entities as simple text *)
107
let text_re = /[^<]+/ - /([^<]*\]\]>[^<]*)/
108
let text = [ label "#text" . store text_re ]
109
let cdata = [ label "#CDATA" . dels "<![CDATA[" .
110
store (char* - (char* . "]]>" . char*)) . dels "]]>" ]
112
let element (body:lens) =
113
let h = attributes? . sep_osp . dels ">" . body* . dels "</" in
114
[ dels "<" . square nmtoken h . sep_osp . del_end ]
116
let empty_element = [ dels "<" . key nmtoken . value "#empty" .
117
attributes? . sep_osp . del /\/>[\n]?/ "/>\n" ]
119
let pi_instruction = [ dels "<?" . label "#pi" .
120
[ label "#target" . store pi_target ] .
121
[ sep_spc . label "#instruction" . store pi ]? .
122
sep_osp . del /\?>/ "?>" ]
124
(* Typecheck is weaker on rec lens, detected by unfolding *)
126
let content1 = element text
127
let rec content2 = element (content1|text|comment)
130
let rec content = element (text|comment|content|empty_element|pi_instruction)
132
(* Constraints are weaker here, but it's better than being too strict *)
133
let doc = (sep_osp . (prolog | comment | doctype | pi_instruction))* .
134
((sep_osp . content) | (sep_osp . empty_element)) .
135
(sep_osp . (comment | pi_instruction ))* . sep_osp
139
let filter = (incl "/etc/xml/*.xml")
140
. (incl "/etc/xml/catalog")
143
let xfm = transform lns filter