1
(* $Id: netstring_pcre.mlp 1588 2011-04-28 13:59:54Z gerd $
2
* ----------------------------------------------------------------------
8
type regexp = Pcre.regexp;;
9
type split_result = Pcre.split_result =
12
| Group of int * string
16
type result = Pcre.substrings;;
19
Pcre.regexp ~flags:[`MULTILINE] s
22
let regexp_case_fold s =
23
Pcre.regexp ~flags:[`MULTILINE; `CASELESS] s
34
let regexp_string_case_fold s =
35
regexp_case_fold (quote s)
38
let string_match pat s pos =
40
let result = Pcre.exec ~rex:pat ~flags:[`ANCHORED] ~pos s in
42
with Not_found -> None
45
let search_forward pat s pos =
46
let result = Pcre.exec ~rex:pat ~pos s in
47
fst (Pcre.get_substring_ofs result 0), result
50
let search_backward pat s pos =
53
(* `ANCHORED: virtually prepends "^" to the regexp *)
54
let result = Pcre.exec ~flags:[`ANCHORED] ~rex:pat ~pos:p s in
55
fst (Pcre.get_substring_ofs result 0), result
58
if p > 0 then search (p-1) else raise Not_found
63
let matched_string result _ =
64
(* Unfortunately, Pcre.get_substring will not raise Not_found if there is
65
* no matched string. Instead, it returns "", but this value cannot be
66
* distinguished from an empty match.
67
* The workaround is to call Pcre.get_substring_ofs first. This function
68
* will raise Not_found if there is not any matched string.
70
* NOTE: Current versions of Pcre do return Not_found!
72
ignore(Pcre.get_substring_ofs result 0);
73
Pcre.get_substring result 0
76
let match_beginning result =
77
fst (Pcre.get_substring_ofs result 0)
80
let match_end result =
81
snd (Pcre.get_substring_ofs result 0)
84
let matched_group result n _ =
85
(* See also the comment for [matched_string] *)
86
if n < 0 || n >= Pcre.num_of_subs result then raise Not_found;
87
ignore(Pcre.get_substring_ofs result n);
88
Pcre.get_substring result n
91
let group_beginning result n =
92
fst (Pcre.get_substring_ofs result n)
95
let group_end result n =
96
snd (Pcre.get_substring_ofs result n)
99
let templ_re = Pcre.regexp "(?:\\\\\\d)|[\\$\\\\]" ;;
100
(* matches a backslash and a digit, or a single dollar or a single
105
(* Convert \n to $n etc. *)
106
(* Unfortunately we cannot just replace \ by $. *)
109
Pcre.Delim "$" :: l' -> "$$" :: tr l'
110
| Pcre.Delim "\\" :: Pcre.Delim "$" :: l' -> "$$" :: tr l'
111
| Pcre.Delim "\\" :: Pcre.Delim s :: l' -> s :: tr l'
112
| Pcre.Delim "\\" :: Pcre.Text s :: l' -> s :: tr l'
113
| [ Pcre.Delim "\\" ] -> failwith "trailing backslash"
114
| Pcre.Delim d :: l' ->
115
assert(d.[0] = '\\');
116
let n = Char.code d.[1] - Char.code '0' in
120
("$" ^ string_of_int n ^ "$!") :: tr l'
121
| Pcre.Text t :: l' -> t :: tr l'
122
| Pcre.Group(_,_) :: _ -> assert false
123
| Pcre.NoGroup :: _ -> assert false
126
let l = Pcre.full_split ~rex:templ_re ~max:(-1) s in
127
String.concat "" (tr l)
131
let global_replace pat templ s =
132
Pcre.replace ~rex:pat ~itempl:(Pcre.subst (tr_templ templ)) s
135
let replace_first pat templ s =
136
Pcre.replace_first ~rex:pat ~itempl:(Pcre.subst (tr_templ templ)) s
139
let global_substitute pat subst s =
140
Pcre.substitute_substrings ~rex:pat ~subst:(fun r -> subst r s) s
143
let string_before s n =
147
let string_after s n =
148
String.sub s n (String.length s - n)
151
let first_chars s len =
155
let last_chars s len =
156
String.sub s (String.length s - len) len
160
* Uncomment for next version of Pcre
161
let substitute_first ?groups ~pat ~subst s =
162
Pcre.substitute_substrings_first ~rex:pat ~subst:(fun r -> subst r s) s
166
let substitute_first pat subst s =
167
(* Do it yourself in the meantime *)
169
let substrs = Pcre.exec ~rex:pat s in (* or Not_found *)
170
let (match_beg,match_end) = Pcre.get_substring_ofs substrs 0 in
171
let replacement_text = subst substrs s in
172
String.concat "" [string_before s match_beg;
174
string_after s match_end]
181
(* Copied from Str for exact compatibility: *)
182
let bounded_split expr text num =
185
let start_substrs = Pcre.exec ~rex:expr ~flags:[`ANCHORED] text in
187
let (_,match_end) = Pcre.get_substring_ofs start_substrs 0 in
192
let rec split start n =
193
if start >= String.length text then [] else
194
if n = 1 then [string_after text start] else
196
let next_substrs = Pcre.exec ~rex:expr ~pos:start text
197
in (* or Not_found *)
198
let (pos,match_end) = Pcre.get_substring_ofs next_substrs 0 in
199
String.sub text start (pos-start) :: split match_end (n-1)
201
[string_after text start] in
205
let split sep s = bounded_split sep s 0
208
(* Copied from Str for exact compatibility: *)
209
let bounded_split_delim expr text num =
210
let rec split start n =
211
if start > String.length text then [] else
212
if n = 1 then [string_after text start] else
214
let next_substrs = Pcre.exec ~rex:expr ~pos:start text
215
in (* or Not_found *)
216
let (pos,match_end) = Pcre.get_substring_ofs next_substrs 0 in
217
String.sub text start (pos-start) :: split match_end (n-1)
219
[string_after text start] in
220
if text = "" then [] else split 0 num
223
let split_delim sep text = bounded_split_delim sep text 0 ;;
225
let full_split sep s =
226
Pcre.full_split ~rex:sep ~max:(-1) s
229
let bounded_full_split sep s max =
230
let max' = if max <= 0 then -1 else max in
231
Pcre.full_split ~rex:sep ~max:max' s
239
| Group of int * string
244
invalid_arg "Netstring_pcre: unavailable"
245
let regexp_case_fold _ =
246
invalid_arg "Netstring_pcre: unavailable"
248
invalid_arg "Netstring_pcre: unavailable"
249
let regexp_string _ =
250
invalid_arg "Netstring_pcre: unavailable"
251
let regexp_string_case_fold _ =
252
invalid_arg "Netstring_pcre: unavailable"
253
let string_match _ _ _ =
254
invalid_arg "Netstring_pcre: unavailable"
255
let search_forward _ _ _ =
256
invalid_arg "Netstring_pcre: unavailable"
257
let search_backward _ _ _ =
258
invalid_arg "Netstring_pcre: unavailable"
259
let matched_string _ _ =
260
invalid_arg "Netstring_pcre: unavailable"
261
let match_beginning _ =
262
invalid_arg "Netstring_pcre: unavailable"
264
invalid_arg "Netstring_pcre: unavailable"
265
let matched_group _ _ _ =
266
invalid_arg "Netstring_pcre: unavailable"
267
let group_beginning _ _ =
268
invalid_arg "Netstring_pcre: unavailable"
270
invalid_arg "Netstring_pcre: unavailable"
271
let global_replace _ _ _ =
272
invalid_arg "Netstring_pcre: unavailable"
273
let replace_first _ _ _ =
274
invalid_arg "Netstring_pcre: unavailable"
275
let global_substitute _ _ _ =
276
invalid_arg "Netstring_pcre: unavailable"
277
let substitute_first _ _ _ =
278
invalid_arg "Netstring_pcre: unavailable"
280
invalid_arg "Netstring_pcre: unavailable"
281
let bounded_split _ _ _ =
282
invalid_arg "Netstring_pcre: unavailable"
283
let split_delim _ _ =
284
invalid_arg "Netstring_pcre: unavailable"
285
let bounded_split_delim _ _ =
286
invalid_arg "Netstring_pcre: unavailable"
288
invalid_arg "Netstring_pcre: unavailable"
289
let bounded_full_split _ _ _ =
290
invalid_arg "Netstring_pcre: unavailable"
291
let string_before _ _ =
292
invalid_arg "Netstring_pcre: unavailable"
293
let string_after _ _ =
294
invalid_arg "Netstring_pcre: unavailable"
295
let first_chars _ _ =
296
invalid_arg "Netstring_pcre: unavailable"
298
invalid_arg "Netstring_pcre: unavailable"