1
(* $Id: netglob.ml 1659 2011-08-23 17:28:03Z gerd $ *)
6
type glob_expr = glob_expr_atom list
12
| `Bracket of (bool * glob_set)
13
| `Brace of glob_expr list
17
and glob_set = < set : (int * int) list >
20
type valid_glob_expr =
22
encoding : Netconversion.encoding;
25
exception Bad_glob_expr of string
27
exception Unsupported_expr of string
29
class type user_info =
31
method path_encoding : Netconversion.encoding option
32
method home_directory : string -> string
35
class type glob_fsys =
37
method path_encoding : Netconversion.encoding option
38
method read_dir : string -> string list
39
method file_is_dir : string -> bool
40
method file_exists : string -> bool
43
type glob_mode = [ `Existing_paths
48
type pattern = [ `String of string | `Expr of valid_glob_expr ]
51
let literal_glob_expr enc s =
52
{ pat = [ `Literal s ];
57
let reparse_bracket_expr enc l =
58
(* In order to support multi-byte encodings, reparse the expression
59
now. For simplifying this, we require that ranges (like c-d) are
60
purely ASCII. So only the chars outside ranges need to be reparsed
62
let rec collect buf toks =
64
| Bracket_char c :: toks' ->
65
Buffer.add_char buf c;
67
| Bracket_range(c1,c2) as tok :: toks' ->
68
let new_toks = reparse buf in
69
new_toks @ [tok] @ collect (Buffer.create 80) toks'
70
| Bracket_code _ :: _ ->
77
let s = Buffer.contents buf in
80
Netconversion.ustring_iter enc (fun i -> codes := i :: !codes) s
81
with _ -> raise Lexing_Error
84
(fun i -> Bracket_code i)
88
collect (Buffer.create 80) l
92
?(encoding = `Enc_iso88591)
94
?(enable_qmark = true)
95
?(enable_brackets = true)
96
?(enable_braces = true)
97
?(enable_tilde = true)
98
?(enable_escape = true)
101
if not (Netconversion.is_ascii_compatible encoding) then
103
"Netglob.parse_glob_expr: the encoding is not ASCII-compatible";
106
{ enable_star = enable_star;
107
enable_qmark = enable_qmark;
108
enable_brackets = enable_brackets;
109
enable_braces = enable_braces;
110
enable_tilde = enable_tilde;
111
enable_escape = enable_escape;
115
let rec collect_until lexbuf =
116
let tok = glob_expr feat lexbuf in
117
if tok = Glob_end then
120
tok :: (collect_until lexbuf)
123
let rec process_brace_list current list =
125
| Brace_literal s :: list' ->
126
let gl = collect_until (Lexing.from_string s) in
127
process_brace_list (current @ gl) list'
128
| Brace_braces l :: list' ->
129
process_brace_list (current @ [Glob_braces l]) list'
130
| Brace_comma :: list' ->
131
let ge = process_glob_list [] current in
132
ge :: process_brace_list [] list'
136
let ge = process_glob_list [] current in
139
and process_glob_list acc list =
141
| Glob_star :: list' ->
144
(* Ignore the second star! *)
145
process_glob_list acc list'
147
process_glob_list (`Star :: acc) list'
149
| Glob_qmark :: list' ->
150
process_glob_list (`Qmark :: acc) list'
151
| Glob_brackets (neg,btoks) :: list' ->
157
| Bracket_range (c1,c2) -> (* c1, c2 are ASCII *)
158
(Char.code c1, Char.code c2)
164
(reparse_bracket_expr encoding btoks) in
165
let set_obj = ( object method set = set end ) in
166
process_glob_list (`Bracket(neg,set_obj) :: acc) list'
167
| Glob_braces btoks :: list' ->
168
let alts = process_brace_list [] btoks in
169
process_glob_list (`Brace alts :: acc) list'
170
| Glob_literal s :: list' ->
173
| `Literal s' :: acc' ->
174
process_glob_list (`Literal(s' ^ s) :: acc') list'
176
process_glob_list (`Literal s :: acc) list'
179
process_glob_list acc list'
180
| Glob_tilde(s,slash) :: list' ->
182
if slash then [ `Literal "/"; `Tilde s ] else [ `Tilde s ] in
183
process_glob_list ( atoms @ acc ) list'
192
collect_until (Lexing.from_string s) in
195
process_glob_list [] glob_list in
202
| Bracket_Unsupported ->
203
raise (Unsupported_expr s)
205
raise (Bad_glob_expr s)
208
let validate_glob_expr enc expr =
210
try Netconversion.verify enc s
212
failwith "Netglob.validate_glob_expr: literal does not conform \
213
to selected pattern encoding" in
215
let rec validate ge =
217
| `Literal s :: ge' ->
219
failwith "Netglob.validate_glob_expr: empty literal";
222
| `Bracket(_,set) :: ge' ->
225
if j < 0 || k < 0 || j > k then
226
failwith "Netglob.validate_glob_expr: bad bracket set";
230
List.iter validate l;
239
if not (Netconversion.is_ascii_compatible enc) then
241
"Netglob.validate_glob_expr: the encoding is not ASCII-compatible";
247
let recover_glob_expr expr =
250
let encoding_of_glob_expr expr =
254
(* A more efficient representation for sets: *)
257
{ ascii : bool array;
258
non_ascii : (int, unit) Hashtbl.t
262
let ascii = Array.create 128 false in
263
let non_ascii = Hashtbl.create 13 in
271
Hashtbl.replace non_ascii p ()
275
{ ascii = ascii; non_ascii = non_ascii }
278
let rec mem_eset code eset =
279
if code >= 0 && code < 128 then
282
Hashtbl.mem eset.non_ascii code
288
if eset.ascii.(k) then incr n
290
!n + Hashtbl.length eset.non_ascii
293
let ascii_ranges eset =
294
let ranges = ref [] in
295
let inrange = ref None in
297
let p = eset.ascii.(k) in
300
if p then inrange := Some k
303
ranges := (q, k-1) :: !ranges;
307
( match !inrange with
309
| Some q -> ranges := (q, 127) :: !ranges
314
let rec exclude_set codes set =
318
let x' = if List.mem x codes then x+1 else x in
319
let y' = if List.mem y codes then y-1 else y in
320
if x = x' && y = y' && x <= y then
321
(x,y) :: exclude_set codes set'
322
else if x' <= y' then
323
exclude_set codes ( (x',y') :: set')
325
exclude_set codes set'
328
let print_set buf encoding neg_char negated set =
329
(* Always produce a portable expression: *)
330
let eset = to_eset set in
332
(* Check for special characters: *)
333
let want_minus = mem_eset (Char.code '-') eset in
334
let want_rbracket = mem_eset (Char.code ']') eset in
335
let want_circum = mem_eset (Char.code '^') eset in
336
let want_exclam = mem_eset (Char.code '!') eset in
337
let size = size_eset eset in
339
(* Check for very special sets: *)
340
if not negated && want_circum && size = 1 then
341
Buffer.add_string buf "^" (* "[^]" would not be portable enough *)
342
else if not negated && want_exclam && size = 1 then
343
Buffer.add_string buf "!" (* "[!]" would not be portable enough *)
344
else if not negated && want_circum && want_exclam && size = 2 then
345
failwith "print_glob_expr"
346
(* There is no portable representation *)
348
(* First create a set expression where the special characters
349
* '-', ']', '^', and '!' do not occur literally.
351
let empty = ref true in
352
let buf' = Buffer.create 200 in
353
let ascii_part = ascii_ranges eset in
355
exclude_set (List.map Char.code ['-'; ']'; '^'; '!']) ascii_part in
356
let ascii_part'_eset = to_eset ascii_part' in
360
Buffer.add_char buf' (Char.chr x0);
363
else if x0 <= x1 then (
364
Buffer.add_char buf' (Char.chr x0);
365
Buffer.add_char buf' '-';
366
Buffer.add_char buf' (Char.chr x1);
371
(* The non-ascii part is easy: *)
375
Netconversion.ustring_of_uarray encoding [| code |] in
376
Buffer.add_string buf' encoded
379
(* Check which of the special characters are already covered
382
let done_minus = mem_eset (Char.code '-') ascii_part'_eset in
383
let done_rbracket = mem_eset (Char.code ']') ascii_part'_eset in
384
let done_circum = mem_eset (Char.code '^') ascii_part'_eset in
385
let done_exclam = mem_eset (Char.code '!') ascii_part'_eset in
386
(* Begin with printing *)
389
(if negated then "[" ^ String.make 1 neg_char else "[");
390
(* ']' must always be the first character of the set: *)
391
if want_rbracket && not done_rbracket then (
392
Buffer.add_string buf "]";
395
Buffer.add_buffer buf buf';
396
(* '-' must be the first or the last character; '^' and '!' must
397
* not be the first character. So we usually print these
398
* characters in the order "^!-". One case is special: We have
399
* not yet printed any character. Then, "-" must be printed
400
* first (if member of the set), or we have one of the very
401
* special cases already tested above.
404
if want_minus && not done_minus then Buffer.add_char buf '-';
405
if want_circum && not done_circum then Buffer.add_char buf '^';
406
if want_exclam && not done_exclam then Buffer.add_char buf '!';
408
if want_circum && not done_circum then Buffer.add_char buf '^';
409
if want_exclam && not done_exclam then Buffer.add_char buf '!';
410
if want_minus && not done_minus then Buffer.add_char buf '-';
412
Buffer.add_char buf ']';
416
let esc_re = Netstring_str.regexp "[][*?{},\\~]";;
419
"\\" ^ Netstring_str.matched_group m 0 s
421
let print_glob_expr ?(escape_in_literals=true) expr =
422
let buf = Buffer.create 200 in
425
| `Literal s :: gl' ->
426
Buffer.add_string buf
427
(if escape_in_literals then
428
Netstring_str.global_substitute esc_re esc_subst s
434
Buffer.add_string buf "*";
437
Buffer.add_string buf "?";
439
| `Bracket (negated,set) :: gl' ->
440
print_set buf expr.encoding '!' negated set#set;
442
| `Brace ge_list :: gl' ->
443
Buffer.add_string buf "{";
444
let first = ref true in
447
if not !first then Buffer.add_string buf ",";
451
Buffer.add_string buf "}";
454
Buffer.add_char buf '~';
455
Buffer.add_string buf s;
464
class local_user_info() =
466
match Sys.os_type with
468
Netconversion.user_encoding()
471
method path_encoding = pe
473
method home_directory name =
474
(* Win32: only the HOME method works *)
477
try Sys.getenv "HOME"
479
let pw = Unix.getpwuid(Unix.getuid()) in
482
(Unix.getpwnam name).Unix.pw_dir
484
| _ -> raise Not_found
488
let local_user_info = new local_user_info
491
let rec product f l1 l2 =
496
List.map (fun x2 -> f x1 x2) l2 @ product f l1' l2
499
let rec expand_braces ge =
503
| `Brace gelist :: ge' ->
505
List.flatten (List.map expand_braces gelist) in
506
let ge_alts' = expand_braces ge' in
507
product ( @ ) gelist' ge_alts'
510
let ge_alts' = expand_braces ge' in
511
List.map (fun ge_alt' -> any :: ge_alt') ge_alts'
514
let rec expand_tildes encoding user_info ge =
518
| `Tilde name :: ge' ->
521
let dir = user_info#home_directory name in
522
if dir="" then raise Not_found; (* empty literals not allowed *)
523
( match user_info#path_encoding with
524
| None -> `Literal dir
526
if ui_enc = encoding then
530
(Netconversion.convert
531
~in_enc:ui_enc ~out_enc:encoding dir)
534
`Literal ("~" ^ name) in
535
atom :: expand_tildes encoding user_info ge'
537
any :: expand_tildes encoding user_info ge'
540
let expand_glob_expr ?(user_info=local_user_info())
541
?(expand_brace=true) ?(expand_tilde=true) expr =
544
expand_tildes expr.encoding user_info expr.pat
552
List.map (fun p -> { expr with pat = p }) pat_l
555
let period = Char.code '.'
556
let slash = Char.code '/'
558
let match_glob_expr ?(protect_period=true) ?(protect_slash=true)
561
let esets = Hashtbl.create 5 in
563
try Hashtbl.find esets set
565
let eset = to_eset set#set in
566
Hashtbl.add esets set eset;
570
Netconversion.uarray_of_ustring
571
( match encoding with
572
| None -> expr.encoding
576
let n = Array.length u in
578
let leading_period p =
580
(p = 0 || (protect_slash && u.(p - 1) = slash)) in
582
let rec match_at c ge =
584
| `Literal lit :: ge' ->
585
let lit_u = Netconversion.uarray_of_ustring expr.encoding lit in
586
let lit_n = Array.length lit_u in
589
for k = 0 to lit_n - 1 do
590
if c+k >= n then raise Not_found;
591
let code = u.(c+k) in
592
if code <> lit_u.(k) then raise Not_found;
596
| Not_found -> false in
597
ok && match_at (c+lit_n) ge'
600
let cont = ref true in
601
let found = ref false in
602
while c + !k <= n && not !found && !cont do
603
found := match_at (c + !k) ge';
606
(not protect_period || not (leading_period (c + !k))) &&
607
(not protect_slash || u.(c + !k) <> slash);
614
(not protect_period || not (leading_period c)) &&
615
(not protect_slash || u.(c) <> slash) in
616
ok && match_at (c+1) ge'
617
| `Bracket(neg,set) :: ge' ->
621
(not protect_slash || code <> slash) &&
622
(not protect_period || not (leading_period c)) && (
623
let eset = get_eset set in
624
let is_mem = mem_eset code eset in
631
failwith "Netglob.match_glob_expr: found `Brace subpattern"
633
failwith "Netglob.match_glob_expr: found `Tilde subpattern"
640
let skip_slashes s k =
641
let l = String.length s in
643
while !j < l && s.[!j] = '/' do incr j done;
646
let rev_skip_slashes s k =
648
while !j >= 0 && s.[!j] = '/' do decr j done;
652
let k = String.index s '/' in
653
let j = skip_slashes s (k+1) in
657
let split_glob_expr expr =
659
let rec split_loop is_first acc ge =
660
(* acc: accumulates the current component *)
664
| (`Literal s as atom) :: ge' ->
667
let (k,j) = search_slash s in (* or Not_found *)
668
let l = String.length s in
669
let s1 = String.sub s 0 k in (* part before '/' *)
670
let s2 = String.sub s j (l - j) in (* part after '/' *)
671
if is_first && k = 0 then (
672
(* Case: rooted expression *)
674
if s2 <> "" then (`Literal s2) :: ge' else ge' in
675
let comps = split_loop false [] ge'' in
676
(* N.B. comps is a list of lists... *)
678
| ( (`Literal s3) :: r ) :: l ->
679
( `Literal("/" ^ s3) :: r) :: l
681
(`Literal "/" :: r) :: l
686
if ge' = [] && s2 = "" then (
687
(* Case: component matches only directory *)
688
[ List.rev (`Literal (s1 ^ "/") :: acc) ]
692
if s1 <> "" then (`Literal s1)::acc else acc in
694
if s2 <> "" then (`Literal s2) :: ge' else ge' in
695
(List.rev acc') :: split_loop false [] ge''
699
split_loop false (atom::acc) ge'
701
| (`Star | `Qmark | `Bracket(_,_) as atom) :: ge' ->
702
split_loop false (atom::acc) ge'
705
failwith "Netglob.split_glob_expr: brace expression found"
708
failwith "Netglob.split_glob_expr: tilde expression found"
712
(fun p -> { expr with pat = p })
713
(split_loop true [] expr.pat)
716
let check_rooted_glob_expr expr =
718
| (`Literal s) :: r ->
720
if s.[0] = '/' then (
721
let j = skip_slashes s 1 in
722
let l = String.length s in
723
let s' = String.sub s j (l - j) in (* part after '/' *)
725
Some { expr with pat = r }
727
Some { expr with pat = `Literal s' :: r }
735
let check_directory_glob_expr expr =
736
match List.rev expr.pat with
737
| (`Literal s) :: r ->
740
let l = String.length s in
741
if s.[l-1] <> '/' then raise Not_found;
742
let k = rev_skip_slashes s (l-1) + 1 in
743
let s' = String.sub s 0 k in (* the part before '/' *)
745
Some { expr with pat = List.rev r }
747
Some { expr with pat = List.rev (`Literal s' :: r) }
754
class of_dual_stream_fs (abs_fs:Netfs.stream_fs) rel_fs =
755
let is_abs name = name <> "" && name.[0] = '/' in
760
(rel_fs, "/" ^ name) in
762
method path_encoding = abs_fs#path_encoding
763
method read_dir name =
764
let (fs,name) = fix name in
765
try fs#readdir [] name with _ -> []
766
method file_is_dir name =
767
let (fs,name) = fix name in
768
try fs#test [] name `D with _ -> false
769
method file_exists name =
770
let (fs,name) = fix name in
771
try fs#test [] name `E with _ -> false
775
class of_stream_fs fs0 =
776
let fs = (fs0 : #Netfs.stream_fs :> Netfs.stream_fs) in
777
of_dual_stream_fs fs fs
779
let of_stream_fs = new of_stream_fs
782
class local_fsys ?encoding () =
783
let abs_fs = Netfs.local_fs ?encoding () in
784
let rel_fs = Netfs.local_fs ?encoding ~root:"." () in
785
of_dual_stream_fs abs_fs rel_fs
787
let local_fsys = new local_fsys
793
let l = String.length d in
794
if l = 0 || d.[l-1] = '/' then
801
?(protect_period=true)
802
?(fsys = local_fsys())
804
?(mode = `Existing_paths)
807
(* File names and paths are encoded as [fsys] demands it.
808
The encoding of the pattern can be different!
811
let rec collect_and_match base_dir generated_prefix components =
812
match components with
814
if generated_prefix <> "" then [ generated_prefix ] else []
815
| comp :: components' ->
818
| Some d -> fn_concat d file
821
let dir_ge = check_directory_glob_expr comp in
827
let check_for_match only_dirs e file =
828
(* file is encoded in fsys#path_encoding. For matching, we
829
need to convert it to the encoding of the pattern.
833
match fsys#path_encoding with
834
| None -> `Enc_iso88591 (* so no conv errors possible *)
836
match_glob_expr ~protect_period ~encoding:pe e file &&
837
(not only_dirs || fsys#file_is_dir (full_path file))
839
| Netconversion.Cannot_represent _ -> false
845
(* s is encoded in expr.encoding. We need it here
850
match fsys#path_encoding with
853
Netconversion.convert
854
~in_enc:expr.encoding ~out_enc:pe s in
857
let path = full_path s' in
858
if fsys # file_exists path then
864
with Netconversion.Cannot_represent _
865
when mode = `Existing_paths -> []
868
let only_dirs = components' <> [] || dir_ge <> None in
869
let file_list = fsys#read_dir (full_path ".") in
870
(*eprintf "Files in %s: %s\n%!" (full_path ".") (String.concat "," file_list);*)
871
List.filter (check_for_match only_dirs comp') file_list
877
fn_concat generated_prefix file
878
^ (if dir_ge <> None then "/" else "") in
881
(Some(full_path file))
890
let collect_and_match_0 components =
891
match components with
892
| comp :: components' ->
893
( match check_rooted_glob_expr comp with
895
collect_and_match base_dir "" components
897
if comp'.pat = [] then
898
(* Special case "/" *)
901
collect_and_match (Some "/") "/" (comp' :: components')
907
let e_list = expand_glob_expr ?user_info expr in
911
let l = collect_and_match_0 (split_glob_expr e') in
912
if mode = `All_words && l = [] && e'.pat <> [] then
921
let glob ?encoding ?base_dir ?protect_period ?fsys ?user_info ?mode pat =
924
glob1 ?base_dir ?protect_period ?fsys ?user_info ?mode e
927
parse_glob_expr ?encoding s in
928
glob1 ?base_dir ?protect_period ?fsys ?user_info ?mode e