1
%%% The contents of this file are subject to the Erlang Public License,
2
%%% Version 1.0, (the "License"); you may not use this file except in
3
%%% compliance with the License. You may obtain a copy of the License at
4
%%% http://www.erlang.org/license/EPL1_0.txt
6
%%% Software distributed under the License is distributed on an "AS IS"
7
%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
8
%%% the License for the specific language governing rights and limitations
11
%%% The Original Code is xmerl-0.19
13
%%% The Initial Developer of the Original Code is Ericsson Telecom
14
%%% AB. Portions created by Ericsson are Copyright (C), 1998, Ericsson
15
%%% Telecom AB. All Rights Reserved.
17
%%% Contributor(s): ______________________________________.
19
%%%----------------------------------------------------------------------
20
%%% #0. BASIC INFORMATION
21
%%%----------------------------------------------------------------------
22
%%% File: xmerl_eventp.erl
23
%%% Author : Ulf Wiger <ulf.wiger@ericsson.com>
24
%%% Johan Blom <Johan.Blom@mobilearts.se>
25
%%% Description : Simple event-based processor (front-end to xmerl_scan)
29
%%%----------------------------------------------------------------------
30
%% @doc Simple event-based front-ends to xmerl_scan for processing
31
%% of XML documents in streams and for parsing in SAX style.
32
%% Each contain more elaborate settings of xmerl_scan that makes usage of
33
%% the customization functions.
35
-module(xmerl_eventp).
39
-export([stream/2,stream_sax/4, file_sax/4, string_sax/4]).
41
% -export([cont/3, rules_read/3,rules_write/4,fetch/2,close/1]).
43
-include("xmerl.hrl").
44
-include_lib("kernel/include/file.hrl").
46
%% @spec stream(Fname::string(), Options::option_list()) -> xmlElement()
48
%% @doc Parse file containing an XML document as a stream, DOM style.
49
%% Wrapper for a call to the XML parser <code>xmerl_scan</code> with a
50
%% <code>continuation_fun</code> for handling streams of XML data.
51
%% Note that the <code>continuation_fun</code>, <code>acc_fun</code>,
52
%% <code>fetch_fun</code>, <code>rules</code> and <code>close_fun</code>
53
%% options cannot be user defined using this parser.
54
stream(Fname, Options) ->
55
AccF = fun(X, Acc, S) -> acc(X,Acc,S) end,
56
case file:open(Fname, [read, raw, binary]) of
58
B0 = list_to_binary([]),
59
ContS = [{B0, Fname, Fd}],
60
Opts=scanner_options(Options,
61
[{continuation_fun, fun cont/3, ContS},
63
{fetch_fun, fun fetch/2},
64
{rules,fun rules_read/3,fun rules_write/4,""},
65
{close_fun, fun close/1}]),
66
xmerl_scan:string([], Opts);
72
%% @spec stream_sax(Fname,CallBackModule,UserState,Options) -> xmlElement()
74
%% CallBackModule = atom()
75
%% Options = option_list()
77
%% @doc Parse file containing an XML document as a stream, SAX style.
78
%% Wrapper for a call to the XML parser <code>xmerl_scan</code> with a
79
%% <code>continuation_fun</code> for handling streams of XML data.
80
%% Note that the <code>continuation_fun</code>, <code>acc_fun</code>,
81
%% <code>fetch_fun</code>, <code>rules</code>, <code>hook_fun</code>,
82
%% <code>close_fun</code> and <code>user_state</code> options cannot be user
83
%% defined using this parser.
84
stream_sax(Fname, CallBack, UserState,Options) ->
85
US={xmerl:callbacks(CallBack), UserState},
86
AccF = fun(X, Acc, S) -> acc(X,Acc,S) end,
88
fun(ParsedEntity, S) ->
89
{CBs,Arg}=xmerl_scan:user_state(S),
90
% io:format("stream_sax Arg=~p~n",[Arg]),
92
#xmlComment{} -> % Toss away comments...
94
_ -> % Use callback module for the rest
95
% io:format("stream_sax ParsedEntity=~p~n",[ParsedEntity]),
96
case xmerl:export_element(ParsedEntity,CBs,Arg) of
98
throw({error,Reason});
100
% io:format("stream_sax Resp=~p~n",[Resp]),
101
{Resp,xmerl_scan:user_state({CBs,Resp},S)}
105
case file:open(Fname, [read, raw, binary]) of
107
B0 = list_to_binary([]),
108
ContS = [{B0, Fname, Fd}],
109
Opts=scanner_options(Options,
111
{close_fun, fun close/1},
112
{continuation_fun, fun cont/3, ContS},
113
{fetch_fun, fun fetch/2},
115
{rules,fun rules_read/3,fun rules_write/4,""},
117
xmerl_scan:string([], Opts);
123
%% @spec file_sax(Fname::string(), CallBackModule::atom(), UserState,
124
%% Options::option_list()) -> NewUserState
126
%% @doc Parse file containing an XML document, SAX style.
127
%% Wrapper for a call to the XML parser <code>xmerl_scan</code> with a
128
%% <code>hook_fun</code> for using xmerl export functionality directly after
129
%% an entity is parsed.
130
file_sax(Fname,CallBack, UserState, Options) ->
131
US={xmerl:callbacks(CallBack), UserState},
132
AccF=fun(X,Acc,S) -> {[X|Acc], S} end,
134
fun(ParsedEntity, S) ->
135
{CBs,Arg}=xmerl_scan:user_state(S),
137
#xmlComment{} -> % Toss away comments...
139
_ -> % Use callback module for the rest
140
case xmerl:export_element(ParsedEntity,CBs,Arg) of
142
throw({error,Reason});
144
{Resp,xmerl_scan:user_state({CBs,Resp},S)}
149
Opts=scanner_options(Options,[{acc_fun, AccF},
152
xmerl_scan:file(Fname,Opts).
155
%% @spec string_sax(String::list(), CallBackModule::atom(), UserState,
156
%% Options::option_list()) ->
159
%% @doc Parse file containing an XML document, SAX style.
160
%% Wrapper for a call to the XML parser <code>xmerl_scan</code> with a
161
%% <code>hook_fun</code> for using xmerl export functionality directly after
162
%% an entity is parsed.
163
string_sax(String,CallBack, UserState, Options) ->
164
US={xmerl:callbacks(CallBack), UserState},
165
AccF=fun(X,Acc,S) -> {[X|Acc], S} end,
167
fun(ParsedEntity, S) ->
168
{CBs,Arg}=xmerl_scan:user_state(S),
170
#xmlComment{} -> % Toss away comments...
172
_ -> % Use callback module for the rest
173
case xmerl:export_element(ParsedEntity,CBs,Arg) of
175
throw({error,Reason});
177
{Resp,xmerl_scan:user_state({CBs,Resp},S)}
182
Opts=scanner_options(Options,[{acc_fun, AccF},
185
xmerl_scan:string(String,Opts).
189
%%% ----------------------------------------------------------------------------
190
%%% Streaming support functions
192
%%% Continuation callback function for xmerl_scan
193
cont(F, Exception, S) ->
194
case xmerl_scan:cont_state(S) of
197
[{Sofar, Fname, Fd}|T] ->
198
cont2(F, Exception, Sofar, Fd, Fname, T, S)
202
cont2(F, Exception, Sofar, Fd, Fname, T, S) ->
203
case catch read_chunk(Fd, Fname, Sofar) of
205
find_good_split(list_to_binary([Sofar,Bin]),
206
F,Exception,Fd,Fname,T,S);
209
NewS = xmerl_scan:cont_state([{Fname, eof}|T], S),
210
F(binary_to_list(Sofar), NewS);
215
read_chunk(Fd, _Fname, _Sofar) ->
218
-ifndef(no_bitsyntax).
220
find_good_split(Bin, F, Exception, Fd, Fname, T, S) ->
221
find_good_split(size(Bin)-1, Bin, F, Exception, Fd, Fname, T, S).
223
find_good_split(0, B, F, Exception, Fd, Fname, T, S) ->
224
cont2(F, Exception, B, Fd, Fname, T, S);
225
find_good_split(Size, B, F, Exception, Fd, Fname, T, S) ->
227
<<_Bytes:Size/binary, H/integer, Tail/binary>> when ?whitespace(H) ->
228
{SubB,_} = split_binary(B, Size+1),
229
NewS = xmerl_scan:cont_state([{Tail, Fname, Fd}|T], S),
230
F(binary_to_list(SubB), NewS);
232
find_good_split(Size-1, B, F, Exception, Fd, Fname, T, S)
237
find_good_split(Bin, F, Exception, Fd, Fname, T, S) ->
238
find_good_split(size(Bin), Bin, F, Exception, Fd, Fname, T, S).
240
find_good_split(0, B, F, Exception, Fd, Fname, T, S) ->
241
cont2(F, Exception, B, Fd, Fname, T, S);
242
find_good_split(Size, B, F, Exception, Fd, Fname, T, S) ->
243
case binary_to_list(B, Size, Size) of
244
[H] when ?whitespace(H) ->
245
{SubB,Tail} = split_binary(B, Size),
246
NewS = xmerl_scan:cont_state([{Tail, Fname, Fd}|T], S),
247
F(binary_to_list(SubB), NewS);
249
find_good_split(Size-1, B, F, Exception, Fd, Fname, T, S)
256
%%% Accumulator callback function for xmerl_scan
257
acc(X = #xmlText{value = Text}, Acc, S) ->
258
case detect_nul_text(Text) of
260
{[X#xmlText{value = lists:flatten(Text)}|Acc], S};
267
%%% don't acc xmlText when text contains only " " , "\n" and "\t".
268
detect_nul_text([H|T]) when H==10; H==32; H==9->
270
detect_nul_text([]) ->
277
%%% Fetch callback function for xmerl_scan
278
fetch({system, URI}, S) ->
280
fetch({public, _PublicID, URI}, S) ->
284
%% assume URI is a filename
285
Split = filename:split(URI),
286
Filename = lists:last(Split),
290
%% absolute path name
294
filename:join(S#xmerl_scanner.xmlbase, Name);
296
filename:join(S#xmerl_scanner.xmlbase, URI)
298
File = path_locate(S#xmerl_scanner.fetch_path, Filename, Fullname),
299
?dbg("fetch(~p) -> {file, ~p}.~n", [URI, File]),
300
case file:open(File, [read, raw, binary]) of
302
ContS=xmerl_scan:cont_state(S),
303
NewS=xmerl_scan:cont_state([{list_to_binary([]),File,Fd}|ContS],S),
304
{ok, {string, []}, NewS};
306
?dbg("ERROR fetch(~p) -> ~p~n", [URI, _Error]),
310
path_locate([Dir|Dirs], FN, FullName) ->
311
F = filename:join(Dir, FN),
312
case file:read_file_info(F) of
313
{ok, #file_info{type = regular}} ->
316
path_locate(Dirs, FN, FullName)
318
path_locate([], _FN, FullName) ->
321
%%% Close callback function for xmerl_scan
323
ContS = xmerl_scan:cont_state(S),
326
xmerl_scan:cont_state(T, S);
327
[{_Sofar, _Fname, Fd}|T] ->
329
xmerl_scan:cont_state(T, S)
333
%%% Rules callback functions for xmerl_scan
334
rules_write(Context, Name, Value, #xmerl_scanner{rules = undefined}=S) ->
335
Tab = ets:new(rules, [set, public]),
336
rules_write(Context, Name, Value, S#xmerl_scanner{rules = Tab});
337
rules_write(Context, Name, Value, #xmerl_scanner{rules = T} = S) ->
338
ets:insert(T, {{Context, Name}, Value}),
341
rules_read(_Context, _Name, #xmerl_scanner{rules = undefined}) ->
343
rules_read(Context, Name, #xmerl_scanner{rules = T}) ->
344
case ets:lookup(T, {Context, Name}) of
353
%%% ----------------------------------------------------------------------------
354
%%% Generic helper functions
356
scanner_options([H|T], Opts) ->
357
case catch keyreplace(H, 1, Opts) of
359
scanner_options(T, [H|Opts]);
361
scanner_options(T, NewOpts)
363
scanner_options([], Opts) ->
366
keyreplace(X, Pos, [H|T]) when element(Pos, X) == element(Pos, H) ->
368
keyreplace(X, Pos, [H|T]) ->
369
[H|keyreplace(X, Pos, T)];
370
keyreplace(_, _Pos, []) ->