2
* CharParser - Parsing unicode text
3
* Copyright (C) 2008 David Teller
5
* This library is free software; you can redistribute it and/or
6
* modify it under the terms of the GNU Lesser General Public
7
* License as published by the Free Software Foundation; either
8
* version 2.1 of the License, or (at your option) any later version,
9
* with the special exception on linking described in file LICENSE.
11
* This library is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
* Lesser General Public License for more details.
16
* You should have received a copy of the GNU Lesser General Public
17
* License along with this library; if not, write to the Free Software
18
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
(** Parsing unicode text
23
This module defines common functions for parsing Unicode
24
texts. These functions are meant to be used in conjunction with
25
the {!ParserCo} module.
27
{b Note} As ParserCo, this module is still very rough and needs testing.
36
(** The position inside one file or one stream. *)
37
type position = CharParser.position =
39
offset: int;(**Offset on the line (starting at 0)*)
40
line: int (**Line number (starting at 0)*)
43
val advance : UChar.t-> position -> position
44
(**Advance by one char.
46
[advance c p] returns a new position advanced by one char. If [c] is '\r' or '\n',
47
the result is [{offset = 0; line = p.line + 1}]. Other wise, the result is
48
[{offset = p.offset + 1; line = p.line}].*)
50
val source_of_rope : Rope.t -> (UChar.t, position) Source.t
51
(** Create a source from a Unicode Rope.*)
53
val source_of_enum : UChar.t Enum.t -> (UChar.t, position) Source.t
54
(** Create a source from an enumeration of unicode characters.*)
56
val parse : (UChar.t, 'a, position) t -> Rope.t -> ('a, position report) Std.result
57
(**Apply a parser to a Unicode Rope.*)
61
val char : UChar.t -> (UChar.t, UChar.t, position) t
62
(** Recognize exactly one char*)
64
val none_of : UChar.t list -> (UChar.t, UChar.t, position) t
65
(**Accept any value not in a list
66
As [ParserCo.none_of], just with improved error message.*)
68
val not_char : UChar.t -> (UChar.t, UChar.t, position) t
69
(**Accept any value not a given char
72
val string : string -> (UChar.t, string, position) t
73
(** Recognize exactly one string*)
75
val rope : Rope.t -> (UChar.t, Rope.t, position) t
76
(** Recognize exactly one string*)
78
val ustring : UTF8.t -> (UChar.t, UTF8.t, position) t
79
(** Recognize exactly one string*)
81
val case_char : UChar.t -> (UChar.t, UTF8.t, position) t
82
(** As [char], but case-insensitive *)
84
val case_string : string -> (UChar.t, string, position) t
85
(** As [string], but case-insensitive *)
87
val case_ustring : UTF8.t -> (UChar.t, UTF8.t, position) t
88
(** As [ustring], but case-insensitive *)
90
val case_rope : Rope.t -> (UChar.t, Rope.t, position) t
91
(** As [rope], but case-insensitive *)
93
val newline : (UChar.t, UChar.t, position) t
94
(**Recognizes a newline*)
96
val whitespace : (UChar.t, UChar.t, position) t
97
(**Recognizes white-space*)
99
val uppercase : (UChar.t, UChar.t, position) t
100
(**Recognizes one upper-case ASCII character, including
101
accentuated characters.*)
103
val lowercase : (UChar.t, UChar.t, position) t
104
(**Recognizes one lower-case character, including
105
accentuated characters.*)
107
val letter: (UChar.t, UChar.t, position) t
108
(**Recognizes one lower- or upper-case character.*)
110
val digit : (UChar.t, UChar.t, position) t
111
(**Recognizes one decimal digit*)
113
val hex : (UChar.t, UChar.t, position) t
114
(**Recognizes one hexadecimal digit (case-insensitive)*)