57
let found_newline lexbuf diff =
58
let curr_p = lexbuf.lex_curr_p in
32
let found_newline ({ lex_curr_p; _ } as lexbuf) diff =
59
33
lexbuf.lex_curr_p <-
62
pos_lnum = curr_p.pos_lnum + 1;
63
pos_bol = max 1 (curr_p.pos_cnum - diff);
36
pos_lnum = lex_curr_p.pos_lnum + 1;
37
pos_bol = lex_curr_p.pos_cnum - diff;
66
let get_lexeme_len lexbuf = lexbuf.lex_curr_pos - lexbuf.lex_start_pos
40
(* same length computation as in [Lexing.lexeme] *)
41
let lexeme_len { lex_start_pos; lex_curr_pos; _ } = lex_curr_pos - lex_start_pos
43
let main_failure lexbuf msg =
44
let { pos_lnum; pos_bol; pos_cnum; pos_fname = _ } = lexeme_start_p lexbuf in
47
"Sexplib.Lexer.main: %s at line %d char %d"
48
msg pos_lnum (pos_cnum - pos_bol)
53
module Quoted_string_buffer : sig
56
val add_char : t -> char -> unit
57
val add_substring : t -> string -> int -> int -> unit
58
val add_lexeme : t -> lexbuf -> unit
60
val of_buffer : Buffer.t -> t
67
val simple_string : string -> t
69
val quoted_string : Lexing.position -> Quoted_string_buffer.t -> t
70
type s = Quoted_string_buffer.t -> Lexing.lexbuf -> t
71
val comment : string -> main:s -> s
72
val block_comment : Lexing.position -> main:s -> s
76
module Make (X : T) : sig
77
val main : ?buf:Buffer.t -> Lexing.lexbuf -> X.Token.t
78
end = struct (* BEGIN FUNCTOR BODY CONTAINING GENERATED CODE *)
69
let newline = ('\010' | '\013' | "\013\010")
70
let space = [' ' '\009' '\012']
71
let whitespace = [' ' '\010' '\013' '\009' '\012']
72
let backslash_escapes = ['\\' '"' '\'' 'n' 't' 'b' 'r']
83
let lf_cr = ['\010' '\013']
84
let dos_newline = "\013\010"
85
let blank = [' ' '\009' '\012']
86
let unquoted = [^ ';' '(' ')' '"'] # blank # lf_cr
88
let hexdigit = digit | ['a'-'f' 'A'-'F']
91
unquoted # ['#' '|'] | '#' unquoted # ['|'] | '|' unquoted # ['#']
74
93
rule main buf = parse
75
| newline { found_newline lexbuf 1; main buf lexbuf }
76
| space+ | ';' [^ '\n' '\r']* { main buf lexbuf }
94
| lf | dos_newline { found_newline lexbuf 0;
96
| blank+ { main buf lexbuf }
97
| (';' (_ # lf_cr)*) as text { Token.comment text ~main buf lexbuf }
98
| '(' { Token.lparen }
99
| ')' { Token.rparen }
81
scan_string buf lexbuf;
82
let str = Buffer.contents buf in
86
| ([^ ';' '(' ')' '"'] # whitespace)+ as str { STRING str }
102
let pos = Lexing.lexeme_start_p lexbuf in
103
Quoted_string_buffer.add_lexeme buf lexbuf;
104
scan_string buf pos lexbuf;
105
let tok = Token.quoted_string pos buf in
106
Quoted_string_buffer.clear buf;
109
| "#;" { Token.hash_semi }
112
let pos = Lexing.lexeme_start_p lexbuf in
113
Quoted_string_buffer.add_lexeme buf lexbuf;
114
scan_block_comment buf [pos] lexbuf;
115
let tok = Token.block_comment pos ~main buf lexbuf in
116
Quoted_string_buffer.clear buf;
119
| "|#" { main_failure lexbuf "illegal end of comment" }
120
| "#" "#"+ "|" unquoted* (* unquoted_start can match ##, so ##| (which should be
121
refused) would not not be parsed by this case if the regexp
122
on the left was not there *)
123
| "|" "|"+ "#" unquoted*
124
| unquoted_start unquoted* ("#|" | "|#") unquoted*
125
{ main_failure lexbuf "comment tokens in unquoted atom" }
126
| "#" | "|" | unquoted_start unquoted* as str { Token.simple_string str }
89
and scan_string buf = parse
91
| '\\' ['\010' '\013'] [' ' '\009']*
93
let len = get_lexeme_len lexbuf in
94
found_newline lexbuf (len - 2);
95
scan_string buf lexbuf
97
| '\\' "\013\010" [' ' '\009']*
99
let len = get_lexeme_len lexbuf in
100
found_newline lexbuf (len - 3);
101
scan_string buf lexbuf
103
| '\\' (backslash_escapes as c)
105
Buffer.add_char buf (char_for_backslash c);
106
scan_string buf lexbuf
108
| '\\' (['0'-'9'] as c1) (['0'-'9'] as c2) (['0'-'9'] as c3)
129
and scan_string buf start = parse
130
| '"' { Quoted_string_buffer.add_lexeme buf lexbuf; () }
131
| '\\' lf [' ' '\t']*
133
let len = lexeme_len lexbuf - 2 in
134
found_newline lexbuf len;
135
Quoted_string_buffer.add_lexeme buf lexbuf;
136
scan_string buf start lexbuf
138
| '\\' dos_newline [' ' '\t']*
140
let len = lexeme_len lexbuf - 3 in
141
found_newline lexbuf len;
142
Quoted_string_buffer.add_lexeme buf lexbuf;
143
scan_string buf start lexbuf
145
| '\\' (['\\' '\'' '"' 'n' 't' 'b' 'r' ' '] as c)
147
Quoted_string_buffer.add_char buf (char_for_backslash c);
148
Quoted_string_buffer.add_lexeme buf lexbuf;
149
scan_string buf start lexbuf
151
| '\\' (digit as c1) (digit as c2) (digit as c3)
110
153
let v = dec_code c1 c2 c3 in
111
154
if v > 255 then (
112
let pos = lexbuf.lex_curr_p in
155
let { pos_lnum; pos_bol; pos_cnum; pos_fname = _ } = lexeme_end_p lexbuf in
115
158
"Sexplib.Lexer.scan_string: \
116
159
illegal escape at line %d char %d: `\\%c%c%c'"
117
pos.pos_lnum (pos.pos_cnum - pos.pos_bol - 3)
160
pos_lnum (pos_cnum - pos_bol - 3)
120
Buffer.add_char buf (Char.chr v);
121
scan_string buf lexbuf
163
Quoted_string_buffer.add_char buf (Char.chr v);
164
Quoted_string_buffer.add_lexeme buf lexbuf;
165
scan_string buf start lexbuf
123
| '\\' 'x' (['0'-'9' 'a'-'f' 'A'-'F'] as c1) (['0'-'9' 'a'-'f' 'A'-'F'] as c2)
167
| '\\' 'x' (hexdigit as c1) (hexdigit as c2)
125
169
let v = hex_code c1 c2 in
127
let pos = lexbuf.lex_curr_p in
130
"Sexplib.Lexer.scan_string: \
131
illegal escape at line %d char %d: `\\x%c%c'"
132
pos.pos_lnum (pos.pos_cnum - pos.pos_bol - 3)
135
Buffer.add_char buf (Char.chr v);
136
scan_string buf lexbuf
170
Quoted_string_buffer.add_char buf (Char.chr v);
171
Quoted_string_buffer.add_lexeme buf lexbuf;
172
scan_string buf start lexbuf
140
Buffer.add_char buf '\\';
141
Buffer.add_char buf c;
142
scan_string buf lexbuf
144
| ['\010' '\013'] as c
146
found_newline lexbuf 1;
147
Buffer.add_char buf c;
148
scan_string buf lexbuf
152
found_newline lexbuf 2;
153
Buffer.add_string buf double_nl;
154
scan_string buf lexbuf
176
Quoted_string_buffer.add_char buf '\\';
177
Quoted_string_buffer.add_char buf c;
178
Quoted_string_buffer.add_lexeme buf lexbuf;
179
scan_string buf start lexbuf
183
found_newline lexbuf 0;
184
Quoted_string_buffer.add_char buf lf;
185
Quoted_string_buffer.add_lexeme buf lexbuf;
186
scan_string buf start lexbuf
188
| ([^ '\\' '"'] # lf)+
158
190
let ofs = lexbuf.lex_start_pos in
159
191
let len = lexbuf.lex_curr_pos - ofs in
160
Buffer.add_substring buf lexbuf.lex_buffer ofs len;
161
scan_string buf lexbuf
163
| eof { failwith "Sexplib.Lexer.scan_string: unterminated string" }
169
| None -> Buffer.create 64
170
| Some buf -> Buffer.clear buf; buf
192
Quoted_string_buffer.add_substring buf lexbuf.lex_buffer ofs len;
193
Quoted_string_buffer.add_lexeme buf lexbuf;
194
scan_string buf start lexbuf
200
"Sexplib.Lexer.scan_string: unterminated string at line %d char %d"
201
start.pos_lnum (start.pos_cnum - start.pos_bol)
206
and scan_block_comment buf locs = parse
208
{ Quoted_string_buffer.add_lexeme buf lexbuf;
209
found_newline lexbuf 0; scan_block_comment buf locs lexbuf }
210
| (('#'* | '|'*) [^ '"' '#' '|'] # lf)+
211
{ Quoted_string_buffer.add_lexeme buf lexbuf;
212
scan_block_comment buf locs lexbuf }
215
Quoted_string_buffer.add_lexeme buf lexbuf;
216
let cur = lexeme_end_p lexbuf in
217
let start = { cur with pos_cnum = cur.pos_cnum - 1 } in
218
scan_string buf start lexbuf;
219
scan_block_comment buf locs lexbuf
223
Quoted_string_buffer.add_lexeme buf lexbuf;
224
let cur = lexeme_end_p lexbuf in
225
let start = { cur with pos_cnum = cur.pos_cnum - 2 } in
226
scan_block_comment buf (start :: locs) lexbuf
230
Quoted_string_buffer.add_lexeme buf lexbuf;
232
| [_] -> () (* the comment is finished *)
233
| _ :: (_ :: _ as t) -> scan_block_comment buf t lexbuf
234
| [] -> assert false (* impossible *)
240
| { pos_lnum; pos_bol; pos_cnum; pos_fname = _ } :: _ ->
242
sprintf "Sexplib.Lexer.scan_block_comment: \
243
unterminated block comment at line %d char %d"
244
pos_lnum (pos_cnum - pos_bol)
249
{ (* RESUME FUNCTOR BODY CONTAINING GENERATED CODE *)
254
| None -> Quoted_string_buffer.create 64
257
Quoted_string_buffer.of_buffer buf
261
end (* END FUNCTOR BODY CONTAINING GENERATED CODE *)
265
module Quoted_string_buffer = struct
267
let add_lexeme _ _ = ()
270
module Token = struct
273
type s = Quoted_string_buffer.t -> Lexing.lexbuf -> t
277
let hash_semi = HASH_SEMI
278
let simple_string x = STRING x
279
let quoted_string _ buf = STRING (Buffer.contents buf)
280
let block_comment _pos ~main buf lexbuf =
282
let comment _text ~main buf lexbuf =
283
main buf lexbuf (* skip and continue lexing *)
289
module Quoted_string_buffer = struct
294
let create n = {contents = Buffer.create n; lexeme = Buffer.create n}
295
let of_buffer contents = { contents; lexeme = Buffer.create 64 }
296
let add_char t ch = Buffer.add_char t.contents ch
297
let add_substring t str ofs len = Buffer.add_substring t.contents str ofs len
298
let add_lexeme t lexbuf = Buffer.add_string t.lexeme (Lexing.lexeme lexbuf)
299
let clear t = Buffer.clear t.lexeme; Buffer.clear t.contents
301
module Token = struct
302
open Parser_with_layout
304
type s = Quoted_string_buffer.t -> Lexing.lexbuf -> t
308
let hash_semi = HASH_SEMI
309
let simple_string x = STRING (x, None)
310
let quoted_string pos {Quoted_string_buffer.contents; lexeme} =
311
STRING (Buffer.contents contents, Some (pos, Buffer.contents lexeme))
312
let block_comment pos ~main:_ {Quoted_string_buffer.contents = _; lexeme} _lexbuf =
313
COMMENT (Buffer.contents lexeme, Some pos)
314
let comment text ~main:_ _buf _lexbuf =
319
let main = Vanilla.main
320
let main_with_layout = With_layout.main