1
(* $Id: pxp_lexing.mlp,v 1.4 2002/03/15 16:14:40 gerd Exp $
1
(* $Id: pxp_lexing.mlp 662 2004-05-25 20:57:28Z gerd $
2
2
* ----------------------------------------------------------------------
3
3
* PXP: The polymorphic XML parser for Objective Caml.
4
4
* Copyright by Gerd Stolpmann. See LICENSE for details.
7
(* This is a version of lexing.ml (from stdlib) that can cope with large
11
(* In recent CVS versions of O'Caml the record component lex_buffer_len
12
* has been renamed to lex_buffer_end.
17
9
type lexbuf = Lexing.lexbuf
19
let lex_refill read_fun aux_buffer lexbuf =
21
read_fun aux_buffer (String.length aux_buffer) in
25
else (lexbuf.lex_eof_reached <- true; 0) in
26
(* Is there enough space at the end of the buffer? *)
27
let space = String.length lexbuf.lex_buffer - lexbuf.LEX_BUFFER_LEN in
28
if space < n then begin
30
(* First try to remove the first lex_start_pos bytes of the buffer *)
31
let s = lexbuf.lex_start_pos in
32
let space' = space - s in
33
let efflen = lexbuf.LEX_BUFFER_LEN - s in
34
if space' >= n then begin
35
(* There is enough space at the beginning of the buffer *)
36
String.(*unsafe_*)blit lexbuf.lex_buffer s lexbuf.lex_buffer 0 efflen;
39
(* Allocate a bigger buffer *)
40
let oldlen = String.length lexbuf.lex_buffer in
41
let newlen = max (oldlen * 2) (n + efflen) in
43
if newlen > Sys.max_string_length && n+efflen <= Sys.max_string_length
44
then Sys.max_string_length
46
let newbuf = String.create newlen in
47
String.(*unsafe_*)blit lexbuf.lex_buffer s newbuf 0 efflen;
48
lexbuf.lex_buffer <- newbuf;
50
(* Anyway, the first s bytes have been removed. Update the positions. *)
51
lexbuf.lex_abs_pos <- lexbuf.lex_abs_pos + s;
52
lexbuf.lex_curr_pos <- lexbuf.lex_curr_pos - s;
53
lexbuf.lex_start_pos <- 0;
54
lexbuf.lex_last_pos <- lexbuf.lex_last_pos - s;
55
lexbuf.LEX_BUFFER_LEN <- efflen;
57
(* There is now enough space at the end of the buffer *)
58
String.unsafe_blit aux_buffer 0
59
lexbuf.lex_buffer lexbuf.LEX_BUFFER_LEN
61
lexbuf.LEX_BUFFER_LEN <- lexbuf.LEX_BUFFER_LEN + n
64
{ refill_buff = lex_refill f (String.create 512);
65
lex_buffer = String.create 1024;
72
lex_eof_reached = false }
75
from_function (fun buf n -> input ic buf 0 n)
78
{ refill_buff = (fun lexbuf -> lexbuf.lex_eof_reached <- true);
80
LEX_BUFFER_LEN = String.length s;
86
lex_eof_reached = true }
11
(* We are now again using the functions from Lexing. They are ok since
12
* O'Caml 3.05. In O'Caml 3.04 and earlier versions, the Lexing module
13
* had serious performance problems for very large tokens.
15
* Note that the type of lexbuf has changed between O'Caml 3.06 and 3.07.
16
* (New components lex_mem, lex_start_p, lex_curr_p.)
19
let from_function = Lexing.from_function
21
let from_channel = Lexing.from_channel
23
let from_string = Lexing.from_string
88
25
let lexeme = Lexing.lexeme
89
26
let lexeme_char = Lexing.lexeme_char
90
27
let lexeme_start = Lexing.lexeme_start
91
28
let lexeme_end = Lexing.lexeme_end
30
let lexeme_len lexbuf =
31
lexbuf.lex_curr_pos - lexbuf.lex_start_pos
93
33
let from_string_inplace s =
94
34
(* avoids copying s *)
95
{ refill_buff = (fun lexbuf -> lexbuf.lex_eof_reached <- true);
96
lex_buffer = s; (* instead of s ^ "" *)
97
LEX_BUFFER_LEN = String.length s;
103
lex_eof_reached = true }
35
let lb = from_string "" in
38
lex_buffer_len = String.length s
106
42
let from_another_string_inplace lexbuf s =
107
43
(* uses lexbuf again for another string (avoids memory allocation) *)
108
44
lexbuf.lex_buffer <- s;
109
lexbuf.LEX_BUFFER_LEN <- String.length s;
45
lexbuf.lex_buffer_len <- String.length s;
110
46
lexbuf.lex_abs_pos <- 0;
111
47
lexbuf.lex_start_pos <- 0;
112
48
lexbuf.lex_curr_pos <- 0;
113
49
lexbuf.lex_last_pos <- 0;
114
50
lexbuf.lex_last_action <- 0;
115
lexbuf.lex_eof_reached <- true
51
lexbuf.lex_eof_reached <- true;
59
lexbuf.lex_mem <- [| |];
60
lexbuf.lex_start_p <- zero_pos;
61
lexbuf.lex_curr_p <- zero_pos
119
66
let sub_lexeme lexbuf k l =
120
67
(* = String.sub (Lexing.lexeme lexbuf) k l *)
68
(* In recent versions of O'Caml (3.06+X), there are already definitions
69
* of sub_lexeme. These have the same effect, but don't protect against
121
72
let lexeme_len = lexbuf.lex_curr_pos - lexbuf.lex_start_pos in
122
73
if (k < 0 || k > lexeme_len || l < 0 || k+l > lexeme_len) then
123
74
invalid_arg "sub_lexeme";