146
172
| Attval_nl_normalized _ -> "Attval_nl_normalized"
147
173
| Unparsed_string _ -> "Unparsed_string"
148
174
| LineEnd _ -> "LineEnd"
175
| LineEnd_att _ -> "LineEnd_att"
177
| LLcurly -> "LLcurly"
179
| RRcurly -> "RRcurly"
182
| ERef_att _ -> "ERef_att"
184
class type lexer_factory =
186
method encoding : Pxp_core_types.rep_encoding
187
method open_source : Pxp_reader.lexer_source -> lexer_obj
188
method open_string : string -> lexer_obj
189
method open_string_inplace : string -> lexer_obj
194
method factory : lexer_factory
195
method encoding : Pxp_core_types.rep_encoding
196
method open_source : Pxp_reader.lexer_source -> unit
197
method open_string : string -> unit
198
method open_string_inplace : string -> unit
200
method scan_document : unit -> (token * lexers)
201
method scan_content : unit -> (token * lexers)
202
method scan_within_tag : unit -> (token * lexers)
203
method scan_document_type : unit -> (token * lexers)
204
method scan_declaration : unit -> (token * lexers)
205
method scan_comment : unit -> lexers -> (token * lexers)
206
method scan_ignored_section : unit -> (token * lexers)
207
method detect_xml_pi : unit -> bool
208
method scan_xml_pi : unit -> prolog_token
209
method scan_pi_string : unit -> string option
210
method scan_dtd_string : unit -> token
211
method scan_content_string : unit -> token
212
method scan_name_string : unit -> token
213
method scan_for_crlf : unit -> token
214
method scan_characters : unit -> unit
215
method scan_character : unit -> unit
216
method scan_tag_eb : unit -> (token * lexers)
217
method scan_tag_eb_att : unit -> bool -> (token * lexers)
219
method lexeme_length : int
220
method lexeme_char : int -> int
221
method lexeme : string
222
method lexeme_strlen : int
223
method sub_lexeme : int -> int -> string
152
{ lex_encoding : Pxp_types.rep_encoding;
153
scan_document : Lexing.lexbuf -> (token * lexers);
154
scan_content : Lexing.lexbuf -> (token * lexers);
155
scan_within_tag : Lexing.lexbuf -> (token * lexers);
156
scan_document_type : Lexing.lexbuf -> (token * lexers);
157
scan_declaration : Lexing.lexbuf -> (token * lexers);
158
scan_content_comment : Lexing.lexbuf -> (token * lexers);
159
scan_decl_comment : Lexing.lexbuf -> (token * lexers);
160
scan_document_comment: Lexing.lexbuf -> (token * lexers);
161
scan_ignored_section : Lexing.lexbuf -> (token * lexers);
162
scan_xml_pi : Lexing.lexbuf -> prolog_token;
163
scan_dtd_string : Lexing.lexbuf -> token;
164
scan_content_string : Lexing.lexbuf -> token;
165
scan_name_string : Lexing.lexbuf -> token;
166
scan_only_xml_decl : Lexing.lexbuf -> token;
167
scan_for_crlf : Lexing.lexbuf -> token;
170
(* ======================================================================
173
* $Log: pxp_lexer_types.ml,v $
174
* Revision 1.6 2002/03/13 22:45:42 gerd
175
* Improved Pxp_lexing.
177
* Revision 1.5 2001/06/28 22:42:07 gerd
178
* Fixed minor problems:
179
* - Comments must be contained in one entity
180
* - Pxp_document.document is now initialized with encoding.
181
* the DTD encoding may be initialized too late.
183
* Revision 1.4 2000/10/01 19:47:53 gerd
184
* New functions: sub_lexeme, fast_lexing_from_string,
185
* reuse_lexing_from_string.
187
* Revision 1.3 2000/09/21 21:28:16 gerd
188
* New token IgnoreLineEnd: simplifies line counting, and
191
* Revision 1.2 2000/08/18 20:14:31 gerd
192
* Comment -> Comment_begin, Comment_material, Comment_end.
194
* Revision 1.1 2000/05/29 23:48:38 gerd
195
* Changed module names:
196
* Markup_aux into Pxp_aux
197
* Markup_codewriter into Pxp_codewriter
198
* Markup_document into Pxp_document
199
* Markup_dtd into Pxp_dtd
200
* Markup_entity into Pxp_entity
201
* Markup_lexer_types into Pxp_lexer_types
202
* Markup_reader into Pxp_reader
203
* Markup_types into Pxp_types
204
* Markup_yacc into Pxp_yacc
205
* See directory "compatibility" for (almost) compatible wrappers emulating
206
* Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
208
* ======================================================================
209
* Old logs from markup_lexer_types.ml:
211
* Revision 1.6 2000/05/29 21:14:57 gerd
212
* Changed the type 'encoding' into a polymorphic variant.
214
* Revision 1.5 2000/05/20 20:31:40 gerd
215
* Big change: Added support for various encodings of the
216
* internal representation.
218
* Revision 1.4 2000/05/14 17:45:36 gerd
221
* Revision 1.3 2000/05/14 17:35:12 gerd
222
* Conditional_begin, _end, and _body have an entity_id.
224
* Revision 1.2 2000/05/08 21:59:06 gerd
225
* New token Bof (beginning of file).
227
* Revision 1.1 2000/05/06 23:21:49 gerd
231
* ======================================================================
233
* DERIVED FROM REVISION 1.4 of markup_lexer_types_shadow.ml
235
* Revision 1.4 2000/04/30 18:19:04 gerd
238
* Revision 1.3 1999/08/31 19:13:31 gerd
239
* Added checks on proper PE nesting. The idea is that tokens such
240
* as Decl_element and Decl_rangle carry an entity ID with them. This ID
241
* is simply an object of type < >, i.e. you can only test on identity.
242
* The lexer always produces tokens with a dummy ID because it does not
243
* know which entity is the current one. The entity layer replaces the dummy
244
* ID with the actual ID. The parser checks that the IDs of pairs such as
245
* Decl_element and Decl_rangle are the same; otherwise a Validation_error
248
* Revision 1.2 1999/08/10 21:35:08 gerd
249
* The XML/encoding declaration at the beginning of entities is
250
* evaluated. In particular, entities have now a method "xml_declaration"
251
* which returns the name/value pairs of such a declaration. The "encoding"
252
* setting is interpreted by the entity itself; "version", and "standalone"
253
* are interpreted by Markup_yacc.parse_document_entity. Other settings
254
* are ignored (this does not conform to the standard; the standard prescribes
255
* that "version" MUST be given in the declaration of document; "standalone"
256
* and "encoding" CAN be declared; no other settings are allowed).
257
* TODO: The user should be warned if the standard is not exactly
258
* fulfilled. -- The "standalone" property is not checked yet.
260
* Revision 1.1 1999/08/10 00:35:51 gerd
228
{ scan_name_string : Lexing.lexbuf -> token }
229
(* DEPRECATED. Only exists because WDialog needs it. *)