1
(* $Id: http_fs.ml 1661 2011-08-28 22:45:55Z gerd $ *)
4
- factor streaming get/put out
9
module StrSet = Set.Make(String)
13
[ Netfs.read_flag | `Header of (string*string)list ]
16
[ Netfs.read_file_flag | `Header of (string*string)list ]
19
[ Netfs.write_flag | `Header of (string*string)list ]
21
type write_file_flag =
22
[ Netfs.write_file_flag | `Header of (string*string)list ]
24
type any_wflag = [ write_flag | write_file_flag ]
26
class type http_stream_fs =
28
method read : read_flag list -> string -> Netchannels.in_obj_channel
29
method read_file : read_file_flag list -> string -> Netfs.local_file
30
method write : write_flag list -> string -> Netchannels.out_obj_channel
31
method write_file : write_file_flag list -> string -> Netfs.local_file -> unit
32
method last_response_header : Nethttp.http_header
33
method pipeline : Http_client.pipeline
34
method translate : string -> string
36
method path_encoding : Netconversion.encoding option
37
method path_exclusions : (int * int) list
38
method nominal_dot_dot : bool
39
method size : Netfs.size_flag list -> string -> int64
40
method test : Netfs.test_flag list -> string -> Netfs.test_type -> bool
41
method test_list : Netfs.test_flag list -> string -> Netfs.test_type list -> bool list
42
method remove : Netfs.remove_flag list -> string -> unit
43
method rename : Netfs.rename_flag list -> string -> string -> unit
44
method symlink : Netfs.symlink_flag list -> string -> string -> unit
45
method readdir : Netfs.readdir_flag list -> string -> string list
46
method readlink : Netfs.readlink_flag list -> string -> string
47
method mkdir : Netfs.mkdir_flag list -> string -> unit
48
method rmdir : Netfs.rmdir_flag list -> string -> unit
49
method copy : Netfs.copy_flag list -> string -> string -> unit
50
method cancel : unit -> unit
53
(* ensure this is a subtype of Netfs.stream_fs *)
54
let _f (x : http_stream_fs) =
55
ignore(x :> Netfs.stream_fs)
58
let http_body ~open_value_rd ~open_value_wr =
60
method open_value_rd = open_value_rd
61
method open_value_wr = open_value_wr
62
method value = failwith "value: not supported here"
63
method store = failwith "store: not supported here"
64
method finalize() = ()
66
method set_value _ = failwith "set_value: not supported here"
70
let buffer_body ?(add_sub_string = Netpagebuffer.add_sub_string)
71
buf eof ondata onempty drop : Netmime.mime_body =
72
(* ondata: is called whenever new data becomes available. The arg is
73
the number of buffered bytes
74
onempty: is called when the input side runs out of data
75
drop: this number of bytes are dropped
77
let drop = ref drop in
81
let (inch : Netchannels.rec_in_channel) =
83
method input s pos len =
84
let buf_len = Netpagebuffer.length buf in
86
if !eof then raise End_of_file else onempty()
88
let buf_len = Netpagebuffer.length buf in
89
let n = min len buf_len in
90
Netpagebuffer.blit_to_string buf 0 s pos n;
91
Netpagebuffer.delete_hd buf n;
97
Netchannels.lift_in ~buffered:false (`Rec inch)
101
let (out : Netchannels.rec_out_channel) =
103
method output s pos len =
104
let d = Int64.to_int (min !drop (Int64.of_int len)) in
105
drop := Int64.sub !drop (Int64.of_int d);
106
let len' = len - d in
108
add_sub_string buf s (pos + d) len';
109
ondata(Netpagebuffer.length buf)
115
ondata(Netpagebuffer.length buf)
118
Netchannels.lift_out ~buffered:false (`Rec out)
122
let discarding_body() : Netmime.mime_body =
126
let (inch : Netchannels.rec_in_channel) =
128
method input s pos len =
134
Netchannels.lift_in ~buffered:false (`Rec inch)
138
let (out : Netchannels.rec_out_channel) =
140
method output s pos len =
143
method close_out() = ()
146
Netchannels.lift_out ~buffered:false (`Rec out)
150
let drop_out_channel ch drop =
151
(* drops the first n bytes written to it, and the remaining bytes are
154
let drop = ref drop in
157
method output s pos len =
158
let d = Int64.to_int (min !drop (Int64.of_int len)) in
159
drop := Int64.sub !drop (Int64.of_int d);
160
let len' = len - d in
161
ch # really_output s (pos+d) len';
163
method flush() = ch # flush()
164
method close_out() = ch # close_out()
167
Netchannels.lift_out ~buffered:false (`Rec out)
170
let rec find_file_members =
172
| Nethtml.Element(e,atts,subl) ->
174
if String.lowercase e = "a" then (
176
let href = List.assoc "href" atts in
177
if href="" || href.[0] = '/' then raise Not_found;
179
let i = String.index href '/' in
180
if i+1=String.length href then
181
[String.sub href 0 (String.length href-1)]
184
with Not_found -> [href]
188
m @ find_file_members_in_list subl
189
| Nethtml.Data _ -> []
190
and find_file_members_in_list l =
192
(List.map find_file_members l)
195
let unique_str_list l =
197
List.fold_left (fun acc s -> StrSet.add s acc) StrSet.empty l in
204
let is_error_response ?(precondfailed = Unix.EPERM) path call =
205
match call#status with
207
| `Successful -> None
208
| `Http_protocol_error e -> Some e
209
| `Redirection | `Client_error | `Server_error ->
210
( match call # response_status with
212
Some(Unix.Unix_error(Unix.ENOENT, "Http_fs", path))
213
| `Forbidden | `Unauthorized ->
214
Some(Unix.Unix_error(Unix.EACCES, "Http_fs", path))
215
| `Precondition_failed ->
216
Some(Unix.Unix_error(precondfailed, "Http_fs", path))
218
Some(Unix.Unix_error(Unix.EPERM, "Http_fs", path))
222
let find_flag f flags =
236
?(config_pipeline = fun p -> ())
240
?(path_encoding = `Enc_utf8)
241
?(enable_read_for_directories=false)
243
(* ?(is_error_response = is_error_response) *)
244
base_url : http_stream_fs =
245
let p = new Http_client.pipeline in
248
let ftp_syn = Hashtbl.find Neturl.common_url_syntax "ftp" in
249
let opts = p # get_options in
252
Http_client.schemes = opts.Http_client.schemes @
253
[ "ftp", ftp_syn, Some 21, Http_client.proxy_only_cb_id ]
255
p # set_options opts'
257
let () = config_pipeline p in
258
let base_url_ends_with_slash =
259
base_url <> "" && base_url.[String.length base_url-1] = '/' in
260
let einval path detail =
261
raise(Unix.Unix_error(Unix.EINVAL, detail, path)) in
264
einval path "Http_fs: path is empty";
265
if path.[0] <> '/' then
266
einval path "Http_fs: path is not absolute";
267
if String.contains path '\000' then
268
einval path "Http_fs: path contains NUL byte";
270
Netconversion.verify path_encoding path
272
| Netconversion.Malformed_code_at _ ->
273
einval path "Http_fs: path is not properly encoded"
275
let npath = Neturl.norm_path(Neturl.split_path path) in
276
let npath_s = Neturl.join_path npath in
278
| "" :: ".." :: _ -> (* CHECK: maybe ENOENT? *)
279
einval path "Http_fs: path starts with /.."
282
if base_url_ends_with_slash then
283
base_url ^ String.sub npath_s 1 (String.length npath_s - 1)
287
let handle_error ?precondfailed path call =
288
match call#status with
289
| `Unserved -> assert false
290
| `Successful -> assert false
292
( match is_error_response ?precondfailed path call with
294
failwith "Http_fs: No response received but \
295
is_error_response does not indicate an error"
301
url <> "" && url.[String.length url - 1] = '/' in
302
let check_dir url path =
303
if is_dir_url url then
304
raise(Unix.Unix_error(Unix.EISDIR, "Http_fs", path)) in
307
with Interrupt -> () in
308
let last_response_header = ref None in
310
let cancel_flag = ref (ref false) in
314
method path_encoding = Some path_encoding
315
method path_exclusions = [0,0; 47,47]
316
method nominal_dot_dot = true
318
method translate = translate
320
method last_response_header =
321
match !last_response_header with
327
method read flags path =
328
let url = translate path in
329
let call = new Http_client.get url in
330
let g = Unixqueue.new_group p#event_system in
331
let req_hdr = call # request_header `Base in
333
try find_flag (function `Skip p -> Some p | _ -> None) flags
334
with Not_found -> 0L in
336
Nethttp.Header.set_range req_hdr (`Bytes[Some skip, None]);
337
call # set_accept_encoding();
339
try find_flag (function `Header h -> Some h | _ -> None) flags
340
with Not_found -> [] in
341
List.iter (fun (n,v) -> req_hdr # update_field n v) header;
342
last_response_header := None;
344
if streaming || List.mem `Streaming flags then (
345
let onempty() = () in
346
let page_size = Netsys_mem.pagesize in
347
let buf = Netpagebuffer.create page_size in
348
let eof = ref false in
349
let running = ref true in (* prevents that Interrupt escapes *)
352
Unixqueue.once p#event_system g 0.0
353
(fun () -> if !running then raise Interrupt) in
354
let cur_ch = ref None in
355
let call_done = ref false in
356
call # set_response_body_storage
358
(* Check whether this is the last body *)
359
last_response_header := Some(call#response_header);
360
match is_error_response path call with
362
if !cur_ch <> None then
363
failwith "Http_fs: unexpected reconnect";
364
let code = call#response_status_code in
366
(* Maybe Range headers are not supported: *)
367
if code = 200 && skip > 0L then skip else 0L in
368
let body = buffer_body buf eof ondata onempty drop in
369
cur_ch := Some (body # open_value_rd());
374
(* We cannot reconnect in streaming mode :-( *)
375
call # set_reconnect_mode Http_client.Request_fails;
376
p # add_with_callback call (fun _ -> call_done := true);
377
(* Wait until data is available, or the whole call is done (in case
385
handle_error path call (* raise exception *)
388
if not enable_read_for_directories then
389
check_dir (call # effective_request_uri) path;
390
let (ch : Netchannels.rec_in_channel) =
392
method input s pos len =
393
while (not !call_done && not !eof &&
394
Netpagebuffer.length buf < 16 * page_size)
398
if !call_done then run(); (* ensure proper shutdown *)
400
c_ch # input s pos len
403
(* check for pending error *)
404
( match is_error_response path call with
405
| None -> raise End_of_file
406
| Some e -> handle_error path call
412
while not !call_done do run() done;
414
(* We ignore any pending error here *)
416
| err -> running := false; raise err
419
Netchannels.lift_in ~buffered:true (`Rec ch)
421
| err -> running := false; raise err
424
let cur_tmp = ref None in
425
call # set_response_body_storage
427
last_response_header := Some(call#response_header);
428
(* Check whether this is the last body *)
429
match is_error_response path call with
431
let (tmp_name, inch, outch) =
432
Netchannels.make_temporary_file
433
?tmp_directory ?tmp_prefix () in
434
cur_tmp := Some (tmp_name, inch);
435
let code = call#response_status_code in
437
(* Maybe Range headers are not supported: *)
438
if code = 200 && skip > 0L then skip else 0L in
440
~open_value_rd:(fun () ->
441
new Netchannels.input_channel inch)
445
(new Netchannels.output_channel outch)
455
handle_error path call (* raise exception *)
456
| Some (tmp_name,c_ch) ->
458
Unix.unlink tmp_name; (* CHECK Win32 *)
459
if not enable_read_for_directories then
460
check_dir (call # effective_request_uri) path;
461
call # response_body # open_value_rd()
464
method read_file flags path =
465
let url = translate path in
466
let call = new Http_client.get url in
467
let req_hdr = call # request_header `Base in
468
call # set_accept_encoding();
470
try find_flag (function `Header h -> Some h | _ -> None) flags
471
with Not_found -> [] in
472
List.iter (fun (n,v) -> req_hdr # update_field n v) header;
473
last_response_header := None;
475
let cur_tmp = ref None in
476
call # set_response_body_storage
478
last_response_header := Some(call#response_header);
479
(* Check whether this is the last body *)
480
match is_error_response path call with
482
let (tmp_name, inch, outch) =
483
Netchannels.make_temporary_file
484
?tmp_directory ?tmp_prefix () in
486
cur_tmp := Some tmp_name;
488
~open_value_rd:(fun () ->
489
new Netchannels.input_channel inch)
490
~open_value_wr: (fun () ->
491
new Netchannels.output_channel outch)
500
handle_error path call (* raise exception *)
504
try Unix.unlink tmp_name with _ -> () in
505
if not enable_read_for_directories then
506
check_dir (call # effective_request_uri) path;
508
method filename = tmp_name
513
method write flags path =
514
self # write_impl (flags :> any_wflag list) path None
516
method write_file flags path local =
517
ignore(self # write_impl (flags :> any_wflag list) path (Some local))
519
method private write_impl flags path local_opt =
520
let this_cancel_flag = !cancel_flag in
521
let url = translate path in
522
let call = new Http_client.put_call in
523
call # set_request_uri url;
524
let g = Unixqueue.new_group p#event_system in
525
let req_hdr = call # request_header `Base in
526
req_hdr # update_field "Content-Type" "application/octet-stream";
527
req_hdr # update_field "Expect" "100-continue";
529
try find_flag (function `Header h -> Some h | _ -> None) flags
530
with Not_found -> [] in
531
List.iter (fun (n,v) -> req_hdr # update_field n v) header;
532
last_response_header := None;
534
let create_flag = List.mem `Create flags in
535
let trunc_flag = List.mem `Truncate flags in
536
let excl_flag = List.mem `Exclusive flags in
538
if not create_flag && not trunc_flag then
539
einval path "Http_fs.write: you need to request either file creation \
542
let precondfailed = ref Unix.EPERM in
544
if create_flag && excl_flag then (
545
req_hdr # update_field "If-None-Match" "*";
546
(* = do PUT only if the file does not exist *)
547
precondfailed := Unix.EEXIST;
549
if create_flag && not excl_flag && not trunc_flag then (
550
req_hdr # update_field "If-None-Match" "*";
551
(* = do PUT only if the file does not exist *)
552
precondfailed := Unix.EPERM;
554
if not create_flag then (
555
req_hdr # update_field "If-Match" "*";
556
(* = do PUT only if the file exists *)
557
precondfailed := Unix.ENOENT;
560
let precondfailed = !precondfailed in
562
last_response_header := None;
563
if (streaming || List.mem `Streaming flags) && local_opt = None then (
564
(* We cannot reconnect in streaming mode :-( *)
565
call # set_reconnect_mode Http_client.Request_fails;
566
(* We have to use chunked transfer encoding: *)
567
req_hdr # update_field "Transfer-Encoding" "chunked";
569
let page_size = Netsys_mem.pagesize in
570
let buf = Netpagebuffer.create page_size in
571
let eof = ref false in
572
let added = ref false in
573
let running = ref true in (* prevents that Interrupt escapes *)
576
if n>=16*page_size || !eof then (
583
Netpagebuffer.add_string buf "0\r\n\r\n"
588
(* check for errors *)
589
last_response_header := Some(call#response_header);
590
match is_error_response ~precondfailed path call with
592
| Some e -> handle_error ~precondfailed path call
596
Unixqueue.once p#event_system g 0.0
597
(fun () -> if !running then raise Interrupt) in
598
let add_sub_string buf s pos len =
599
(* Create a chunk: *)
600
Netpagebuffer.add_string buf (sprintf "%x\r\n" len);
601
Netpagebuffer.add_sub_string buf s pos len;
602
Netpagebuffer.add_string buf "\r\n";
604
let body = buffer_body ~add_sub_string buf eof ondata onempty 0L in
605
call # set_request_body body;
607
body # open_value_wr()
610
let (fname, mk_return, close) =
614
Netchannels.make_temporary_file
615
?tmp_directory ?tmp_prefix () in
618
new Netchannels.output_channel ~onclose outch in
620
(try Unix.unlink n with _ -> ()) in
625
new Netchannels.output_null() in
626
(local#filename, mkr, local#close) in
629
if !this_cancel_flag then
632
let st = Unix.LargeFile.stat fname in
633
req_hdr # update_field
634
"Content-length" (Int64.to_string st.Unix.LargeFile.st_size);
635
call # set_request_body (new Netmime.file_mime_body fname);
640
with e -> close(); raise e
642
last_response_header := Some(call#response_header);
643
match is_error_response ~precondfailed path call with
655
(* This cancellation affects all [write]s that were started until
658
let this_cancel_flag = !cancel_flag in
659
this_cancel_flag := true;
660
(* All new [write]s are not cancelled, of course: *)
661
cancel_flag := (ref false)
664
last_response_header := None;
665
let url = translate path in
666
let call = new Http_client.head url in
669
last_response_header := Some(call#response_header);
670
match is_error_response path call with
673
Int64.of_string(call # response_header # field "Content-length")
676
raise(Unix.Unix_error(Unix.ESPIPE,"Http_fs",path))
678
raise(Http_client.Bad_message
679
("Field Content-length: Parse error"))
682
handle_error path call
684
method test flags path t =
685
List.hd(self # test_list flags path [t])
687
method test_list flags path tl =
688
last_response_header := None;
689
let url = translate path in
690
let call = new Http_client.head url in
693
last_response_header := Some(call#response_header);
694
match is_error_response path call with
697
is_dir_url(call # effective_request_uri) in
701
(call # response_header # field "Content-length") > 0L
718
(* We only raise protocol exceptions *)
719
match call#status with
720
| `Unserved -> assert false
721
| `Http_protocol_error e -> raise e
722
| `Successful -> assert false
723
| `Redirection | `Client_error | `Server_error ->
724
List.map (fun _ -> false) tl
726
method readdir _ path =
728
(* We generally return ENOTDIR - meaning we cannot access this as
731
raise(Unix.Unix_error(Unix.ENOTDIR,"Http_fs.readdir",path)) in
732
last_response_header := None;
734
if path <> "" && path.[String.length path - 1] <> '/' then
738
let url = translate path1 in
739
let call = new Http_client.get url in
742
last_response_header := Some(call#response_header);
743
match is_error_response path call with
745
(* The is_dir_url test only works if the server redirects to a
746
URL ending with a slash. Some servers don't do this.
748
(* if not (is_dir_url(call # effective_request_uri)) then fail(); *)
749
(* Get the MIME type and the charset of the result: *)
750
let (cont_type, charset) =
752
let (cont_type, params) = call#response_header#content_type() in
754
try Mimestring.param_value(List.assoc "charset" params)
755
with Not_found -> "US-ASCII" in
756
(* FIXME: We could also look into the doc *)
759
(* we only support text/html: *)
760
if String.lowercase cont_type <> "text/html" then fail();
761
(* check if we know the encoding: *)
763
try Netconversion.encoding_of_string charset
765
(* convert to UTF-8 *)
766
let text0 = call#response_body#value in
769
Netconversion.convert ~in_enc:enc ~out_enc:`Enc_utf8 text0
773
let html = Nethtml.parse (new Netchannels.input_string text) in
774
let names = find_file_members_in_list html in
775
(* Convert the names to our path encoding. Omit names with
778
let base_syntax = Neturl.ip_url_syntax in
785
Neturl.parse_url ~base_syntax ~accept_8bits:true
786
(Neturl.fixup_url_string name) in
787
let q1 = Neturl.url_path u in
788
(* Some URLs contain "%2f" in the path. We don't like
791
if List.exists (fun s -> String.contains s '/') q1 then
794
if q1 <> [] && q1 <> [""] then
795
let r1 = List.rev q1 in
796
if List.hd r1 = "" then
802
if q2 <> [] && not(List.mem "" q2) then (
803
let qj = Neturl.join_path q2 in
804
Netconversion.verify path_encoding qj;
812
let names2 = "." :: ".." :: names1 in
813
unique_str_list names2
814
| Some(Unix.Unix_error(Unix.ENOENT,_,_)) ->
815
fail() (* prefer ENOTDIR in this case *)
817
handle_error path call
819
method remove flags path =
820
last_response_header := None;
821
if List.mem `Recursive flags then
822
raise(Unix.Unix_error(Unix.EINVAL,
823
"Http_fs.remove: recursion not supported",
825
let url = translate path in
826
let call = new Http_client.get url in
829
last_response_header := Some(call#response_header);
830
match is_error_response path call with
834
handle_error path call
836
method copy _ path1 path2 = assert false (* TODO *)
838
method rename _ path1 path2 =
839
raise(Unix.Unix_error(Unix.ENOSYS, "Http_fs.rename", path1))
841
method readlink _ path =
842
raise(Unix.Unix_error(Unix.ENOSYS, "Http_fs.readlink", path))
844
method symlink _ path1 path2 =
845
raise(Unix.Unix_error(Unix.ENOSYS, "Http_fs.symlink", path1))
847
method mkdir _ path =
848
raise(Unix.Unix_error(Unix.ENOSYS, "Http_fs.mkdir", path))
850
method rmdir _ path =
851
raise(Unix.Unix_error(Unix.ENOSYS, "Http_fs.rmdir", path))
856
let http_fs = new http_fs