1
%% Copyright (c) 2008 Robert Virding. All rights reserved.
3
%% Redistribution and use in source and binary forms, with or without
4
%% modification, are permitted provided that the following conditions
7
%% 1. Redistributions of source code must retain the above copyright
8
%% notice, this list of conditions and the following disclaimer.
9
%% 2. Redistributions in binary form must reproduce the above copyright
10
%% notice, this list of conditions and the following disclaimer in the
11
%% documentation and/or other materials provided with the distribution.
13
%% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
14
%% "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
15
%% LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
16
%% FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
17
%% COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
18
%% INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19
%% BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20
%% LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21
%% CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22
%% LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
23
%% ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24
%% POSSIBILITY OF SUCH DAMAGE.
26
%%% A Lexical Analyser Generator for Erlang.
28
%%% Most of the algorithms used here are taken pretty much as
29
%%% described in the "Dragon Book" by Aho, Sethi and Ullman. Some
30
%%% completing details were taken from "Compiler Design in C" by
35
-export([compile/3,file/1,file/2,format_error/1]).
37
-import(lists, [member/2,reverse/1,sort/1,keysearch/3,keysort/2,
38
map/2,foldl/3,foreach/2,flatmap/2,
39
delete/2,keydelete/3]).
40
-import(string, [substr/2,substr/3,span/2,tokens/2]).
41
-import(ordsets, [is_element/2,add_element/2,union/2]).
42
-import(orddict, [store/3]).
44
-include("erl_compile.hrl").
46
-define(LEEXINC, "leexinc.hrl"). % Include file
48
-define(DEFS_HEAD, "Definitions.").
49
-define(RULE_HEAD, "Rules.").
50
-define(CODE_HEAD, "Erlang code.").
52
-record(leex, {xfile=[], % Xrl file
54
ifile=[], % Include file
55
gfile=[], % Graph file
62
-record(nfa_state, {no,edges=[],accept=noaccept}).
63
-record(dfa_state, {no,nfa=[],trans=[],accept=noaccept}).
66
%%% Exported functions
69
%%% Interface to erl_compile.
71
compile(Input0, Output0,
72
#options{warning = WarnLevel, verbose=Verbose, includes=Includes}) ->
73
Input = assure_extension(shorten_filename(Input0), ".xrl"),
74
Output = assure_extension(shorten_filename(Output0), ".erl"),
75
Includefile = lists:sublist(Includes, 1),
76
Opts = [{scannerfile,Output},{includefile,Includefile},{verbose,Verbose},
77
{report_errors,true},{report_warnings,WarnLevel > 0}],
78
case file(Input, Opts) of
85
%% file(File) -> ok | error.
86
%% file(File, Options) -> ok | error.
88
file(File) -> file(File, []).
91
case is_filename(File) of
92
no -> erlang:error(badarg, [File,Opts0]);
95
Opts = case options(Opts0) of
97
erlang:error(badarg, [File,Opts0]);
102
St1 = filenames(File, Opts, St0), % Get all the filenames
104
{ok,REAs,Actions,Code,St2} = parse_file(St1),
105
{DFA,DF} = make_dfa(REAs, St2),
106
St3 = out_file(St2, DFA, DF, Actions, Code),
107
case lists:member(dfa_graph, St3#leex.opts) of
108
true -> out_dfa_graph(St3, DFA, DF);
116
format_error({file_error, Reason}) ->
117
io_lib:fwrite("~s",[file:format_error(Reason)]);
118
format_error(missing_defs) -> "missing Definitions";
119
format_error(missing_rules) -> "missing Rules";
120
format_error(missing_code) -> "missing Erlang code";
121
format_error(empty_rules) -> "no rules";
122
format_error(bad_rule) -> "bad rule";
123
format_error({regexp,E})-> ["bad regexp `",regexp:format_error(E),"'"];
124
format_error({after_regexp,S}) ->
125
["bad code after regexp ",io_lib:write_string(S)];
126
format_error(ignored_characters) ->
127
"ignored characters";
128
format_error(not_yet_implemented) ->
129
"anchoring a regular expression with ^ and $ is not yet implemented".
135
assure_extension(File, Ext) ->
136
lists:concat([strip_extension(File, Ext), Ext]).
138
%% Assumes File is a filename.
139
strip_extension(File, Ext) ->
140
case filename:extension(File) of
141
Ext -> filename:rootname(File);
145
options(Options0) when is_list(Options0) ->
147
Options = flatmap(fun(return) -> short_option(return, true);
148
(report) -> short_option(report, true);
149
({return,T}) -> short_option(return, T);
150
({report,T}) -> short_option(report, T);
153
options(Options, [scannerfile,includefile,report_errors,
154
report_warnings,return_errors,return_warnings,
155
verbose,dfa_graph], [])
156
catch error: _ -> badarg
161
short_option(return, T) ->
162
[{return_errors,T}, {return_warnings,T}];
163
short_option(report, T) ->
164
[{report_errors,T}, {report_warnings,T}].
166
options(Options0, [Key|Keys], L) when is_list(Options0) ->
167
Options = case member(Key, Options0) of
169
[atom_option(Key)|delete(Key, Options0)];
173
V = case keysearch(Key, 1, Options) of
174
{value, {Key, Filename0}} when Key =:= includefile;
175
Key =:= scannerfile ->
176
case is_filename(Filename0) of
180
{ok,[{Key,Filename}]}
182
{value,{Key,Bool}} when Bool; not Bool ->
187
{ok,[{Key,default_option(Key)}]}
193
NewOptions = keydelete(Key, 1, Options),
194
options(NewOptions, Keys, KeyValueL ++ L)
196
options([], [], L) ->
197
foldl(fun({_,false}, A) -> A;
198
({Tag,true}, A) -> [Tag|A];
201
options(_Options, _, _L) ->
204
default_option(dfa_graph) -> false;
205
default_option(includefile) -> [];
206
default_option(report_errors) -> true;
207
default_option(report_warnings) -> true;
208
default_option(return_errors) -> false;
209
default_option(return_warnings) -> false;
210
default_option(scannerfile) -> [];
211
default_option(verbose) -> false.
213
atom_option(dfa_graph) -> {dfa_graph,true};
214
atom_option(report_errors) -> {report_errors,true};
215
atom_option(report_warnings) -> {report_warnings,true};
216
atom_option(return_errors) -> {return_errors,true};
217
atom_option(return_warnings) -> {return_warnings,true};
218
atom_option(verbose) -> {verbose,true};
219
atom_option(Key) -> Key.
222
try filename:flatten(T) of
227
shorten_filename(Name0) ->
228
{ok,Cwd} = file:get_cwd(),
229
case lists:prefix(Cwd, Name0) of
232
case lists:nthtail(length(Cwd), Name0) of
241
Es = pack_errors(St#leex.errors),
242
Ws = pack_warnings(St#leex.warnings),
245
case member(return_warnings, St#leex.opts) of
246
true -> {ok, St#leex.efile, Ws};
247
false -> {ok, St#leex.efile}
250
case member(return_errors, St#leex.opts) of
251
true -> {error, Es, Ws};
256
pack_errors([{File,_} | _] = Es) ->
257
[{File, flatmap(fun({_,E}) -> [E] end, sort(Es))}];
261
pack_warnings([{File,_} | _] = Ws) ->
262
[{File, flatmap(fun({_,W}) -> [W] end, sort(Ws))}];
268
foreach(fun({File,{none,Mod,E}}) ->
269
io:fwrite("~s: ~s\n",
270
[File,Mod:format_error(E)]);
271
({File,{Line,Mod,E}}) ->
272
io:fwrite("~s:~w: ~s\n",
273
[File,Line,Mod:format_error(E)])
274
end, sort(St#leex.errors))
275
end, report_errors, St#leex.opts).
277
report_warnings(St) ->
279
foreach(fun({File,{none,Mod,W}}) ->
280
io:fwrite("~s: Warning: ~s\n",
281
[File,Mod:format_error(W)]);
282
({File,{Line,Mod,W}}) ->
283
io:fwrite("~s:~w: Warning: ~s\n",
284
[File,Line,Mod:format_error(W)])
285
end, sort(St#leex.warnings))
286
end, report_warnings, St#leex.opts).
289
add_error(St#leex.xfile, E, St).
291
add_error(File, Error, St) ->
292
throw(St#leex{errors = [{File,Error}|St#leex.errors]}).
294
add_warning(Line, W, St) ->
295
St#leex{warnings = [{St#leex.xfile,{Line,leex,W}}|St#leex.warnings]}.
297
%% filenames(File, Options, State) -> State.
298
%% The default output dir is the current directory unless an
299
%% explicit one has been given in the options.
301
filenames(File, Opts, St0) ->
302
Dir = filename:dirname(File),
303
Base = filename:basename(File, ".xrl"),
304
Xfile = filename:join(Dir, Base ++ ".xrl"),
305
Efile = Base ++ ".erl",
306
Gfile = Base ++ ".dot",
307
Module = list_to_atom(Base),
308
St1 = St0#leex{xfile=Xfile,
311
{value,{includefile,Ifile0}} = keysearch(includefile, 1, Opts),
312
Ifile = inc_file_name(Ifile0),
313
%% Test for explicit scanner file.
314
{value,{scannerfile,Ofile}} = keysearch(scannerfile, 1, Opts),
317
St1#leex{efile=filename:join(Dir, Efile),
319
gfile=filename:join(Dir, Gfile)};
321
D = filename:dirname(Ofile),
322
St1#leex{efile=Ofile,
324
gfile=filename:join(D, Gfile)}
327
when_opt(Do, Opt, Opts) ->
328
case member(Opt, Opts) of
333
verbose_print(St, Format, Args) ->
334
when_opt(fun () -> io:fwrite(Format, Args) end, verbose, St#leex.opts).
336
%% parse_file(State) -> {ok,[REA],[Action],Code,NewState} | throw(NewState)
338
%% REA = {RegExp,ActionNo};
339
%% Action = {ActionNo,ActionString};
340
%% Code = {StartLine, StartPos, NumOfLines}. Where the Erlang code is.
342
%% Read and parse the file Xfile.
343
%% After each section of the file has been parsed we directly call the
344
%% next section. This is done when we detect a line we don't recognise
345
%% in the current section. The file format is very simple and Erlang
346
%% token based, we allow empty lines and Erlang style comments.
349
case file:open(St0#leex.xfile, [read]) of
352
verbose_print(St0, "Parsing file ~s, ", [St0#leex.xfile]),
353
{ok,REAs,Actions,Code,St1} = parse_head(St0, Xfile),
354
verbose_print(St1, "contained ~w rules.~n", [length(REAs)]),
355
{ok,REAs,Actions,Code,St1}
356
after file:close(Xfile)
359
add_error({none,leex,{file_error,Error}}, St0)
362
%% parse_head(State, File)
363
%% Parse the head of the file. Skip all comments and blank lines.
365
parse_head(St, Ifile) -> parse_defs(St, Ifile, nextline(Ifile, 0)).
367
%% parse_defs(State, File, Line)
368
%% Parse the macro definition section of a file. This must exist.
370
parse_defs(St, Ifile, {ok,?DEFS_HEAD ++ Rest,L}) ->
371
St1 = warn_ignored_chars(St, L, Rest),
372
parse_defs(St1, Ifile, nextline(Ifile, L), []);
373
parse_defs(St, _, {ok,_,L}) ->
374
add_error({L,leex,missing_defs}, St);
375
parse_defs(St, _, {eof,L}) ->
376
add_error({L,leex,missing_defs}, St).
378
parse_defs(St, Ifile, {ok,Chars,L}, Ms) ->
379
case tokens(Chars, " \t\n") of % Also strips \n from eol!
381
parse_defs(St, Ifile, nextline(Ifile, L), [{Name,Def}|Ms]);
383
parse_rules(St, Ifile, {ok,Chars,L}, Ms)
385
parse_defs(St, Ifile, Line, Ms) ->
386
parse_rules(St, Ifile, Line, Ms).
388
%% parse_rules(State, File, Line, Macros)
389
%% Parse the RE rules section of the file. This must exist.
391
parse_rules(St, Ifile, {ok,?RULE_HEAD ++ Rest,L}, Ms) ->
392
St1 = warn_ignored_chars(St, L, Rest),
393
parse_rules(St1, Ifile, nextline(Ifile, L), Ms, [], [], 0);
394
parse_rules(St, _, {ok,_,L}, _) ->
395
add_error({L,leex,missing_rules}, St);
396
parse_rules(St, _, {eof,L}, _) ->
397
add_error({L,leex,missing_rules}, St).
399
%% parse_rules(State, File, Result, Macros, RegExpActions, Actions, Acount) ->
400
%% {ok,RegExpActions,Actions,Code,NewState} | throw(NewState)
402
parse_rules(St, Ifile, NextLine, Ms, REAs, As, N) ->
404
{ok,?CODE_HEAD ++ _Rest,_} ->
405
parse_rules_end(St, Ifile, NextLine, REAs, As);
407
%%io:fwrite("~w: ~p~n", [L0,Chars]),
408
case collect_rule(Ifile, Chars, L0) of
410
{ok,REA,A,St1} = parse_rule(St, Re, L0, Atoks, Ms, N),
411
parse_rules(St1, Ifile, nextline(Ifile, L1), Ms,
412
[REA|REAs], [A|As], N+1);
413
{error,E} -> add_error(E, St)
416
parse_rules_end(St, Ifile, NextLine, REAs, As)
419
parse_rules_end(St, _, {ok,_,L}, [], []) ->
420
add_error({L,leex,empty_rules}, St);
421
parse_rules_end(St, _, {eof,L}, [], []) ->
422
add_error({L,leex,empty_rules}, St);
423
parse_rules_end(St, Ifile, NextLine, REAs, As) ->
424
%% Must be *VERY* careful to put rules in correct order!
425
parse_code(St, Ifile, NextLine, reverse(REAs), reverse(As)).
427
%% collect_rule(File, Line, Lineno) ->
428
%% {ok,RegExp,ActionTokens,NewLineno} | {error,E}.
429
%% Collect a complete rule by reading lines until the the regexp and
430
%% action has been read. Keep track of line number.
432
collect_rule(Ifile, Chars, L0) ->
433
{match,St,Len} = regexp:first_match(Chars, "[^ \t\r\n]+"),
434
%%io:fwrite("RE = ~p~n", [substr(Chars, St, Len)]),
435
case collect_action(Ifile, substr(Chars, St+Len), L0, []) of
436
{ok,[{':',_}|Toks],L1} -> {ok,substr(Chars, St, Len),Toks,L1};
437
{ok,_,_} -> {error,{L0,leex,bad_rule}};
438
{eof,L1} -> {error,{L1,leex,bad_rule}};
439
{error,E,_} -> {error,E}
442
collect_action(Ifile, Chars, L0, Cont0) ->
443
case erl_scan:tokens(Cont0, Chars, L0) of
444
{done,{ok,Toks,_},_} -> {ok,Toks,L0};
445
{done,{eof,_},_} -> {eof,L0};
446
{done,{error,E,_},_} -> {error,E,L0};
448
collect_action(Ifile, io:get_line(Ifile, leex), L0+1, Cont1)
451
%% parse_rule(State, RegExpString, RegExpLine, ActionTokens, Macros, Counter)
452
%% Parse one regexp after performing macro substition.
454
parse_rule(St, S, Line, [{dot,_}], Ms, N) ->
455
case parse_rule_regexp(S, Ms) of
457
ok = anchors_not_yet_implemented(St, R, Line),
458
{ok,{R,N},{N,empty_action},St};
460
add_error({Line,leex,{regexp,E}}, St)
462
parse_rule(St, S, Line, Atoks, Ms, N) ->
463
case parse_rule_regexp(S, Ms) of
465
ok = anchors_not_yet_implemented(St, R, Line),
466
case erl_parse:parse_exprs(Atoks) of
468
%% Check for token variables.
469
TokenChars = var_used('TokenChars', Atoks),
470
TokenLen = var_used('TokenLen', Atoks),
471
TokenLine = var_used('TokenLine', Atoks),
472
{ok,{R,N},{N,Atoks,TokenChars,TokenLen,TokenLine},St};
474
add_error({Line,leex,{after_regexp,S}}, St)
477
add_error({Line,leex,{regexp,E}}, St)
480
anchors_not_yet_implemented(St, R, L) ->
481
case catch build_nfa(R, 1, 0) of
483
add_error({L,leex,not_yet_implemented}, St);
488
var_used(Name, Toks) ->
489
case keysearch(Name, 3, Toks) of
490
{value,{var,_,Name}} -> true;
494
%% parse_rule_regexp(RegExpString, Macros) -> {ok,RegExp} | {error,Error}.
495
%% Substitute in macros and parse RegExpString. Cannot use regexp:gsub
496
%% here as it uses info in replace string (&).
498
parse_rule_regexp(RE0, [{M,Exp}|Ms]) ->
499
case regexp:matches(RE0, "{" ++ M ++ "}") of
501
RE1 = sub_repl(Mats, Exp, RE0, 1),
502
parse_rule_regexp(RE1, Ms);
504
parse_rule_regexp(RE0, Ms)
506
parse_rule_regexp(RE, []) ->
507
%%io:fwrite("RE = ~p~n", [RE]),
510
sub_repl([{St,L}|Ss], Rep, S, Pos) ->
511
Rs = sub_repl(Ss, Rep, S, St+L),
512
substr(S, Pos, St-Pos) ++ Rep ++ Rs;
513
sub_repl([], _Rep, S, Pos) -> substr(S, Pos).
515
%% parse_code(State, File, Line, REAs, Actions) ->
516
%% {ok,RegExpActions,Actions,CodeLine,NewState}.
517
%% Finds the line and the position where the code section of the file
518
%% begins. This must exist.
520
parse_code(St, Ifile, {ok,?CODE_HEAD ++ Rest,CodeL}, REAs, As) ->
521
St1 = warn_ignored_chars(St, CodeL, Rest),
522
{ok, CodePos} = file:position(Ifile, cur),
523
%% Just count the lines; copy the code from file to file later.
524
NCodeLines = count_lines(Ifile, 0),
525
{ok,REAs,As,{CodeL,CodePos,NCodeLines},St1};
526
parse_code(St, _, {ok,_,L}, _, _) ->
527
add_error({L,leex,missing_code}, St);
528
parse_code(St, _, {eof,L}, _, _) ->
529
add_error({L,leex,missing_code}, St).
531
count_lines(File, N) ->
532
case io:get_line(File, leex) of
534
_Line -> count_lines(File, N+1)
537
%% nextline(InputFile, PrevLineNo) -> {ok,Chars,LineNo} | {eof,LineNo}.
538
%% Get the next line skipping comment lines and blank lines.
540
nextline(Ifile, L) ->
541
case io:get_line(Ifile, leex) of
544
case substr(Chars, span(Chars, " \t\n")+1) of
545
[$%|_Rest] -> nextline(Ifile, L+1);
546
[] -> nextline(Ifile, L+1);
547
_Other -> {ok,Chars,L+1}
551
warn_ignored_chars(St, Line, S) ->
554
_ -> add_warning(Line, ignored_characters, St)
558
[C || C <- S, C > $\s, C < $\200 orelse C > $\240].
560
%% We use standard methods, Thompson's construction and subset
561
%% construction, to create first an NFA and then a DFA from the
562
%% regexps. A non-standard feature is that we work with sets of
563
%% character ranges (crs) instead sets of characters. This is most
564
%% noticeable when constructing DFAs. The major benefit is that we can
565
%% handle characters from any set, not just limited ASCII or 8859,
566
%% even 16/32 bit unicode.
568
%% The whole range of characters is 0-maxchar, where maxchar is a BIG
569
%% number. We don't make any assumptions about the size of maxchar, it
570
%% is just bigger than any character.
572
%% Using character ranges makes describing many regexps very simple,
573
%% for example the regexp "." just becomes the range
574
%% [{0-9},{11-maxchar}].
576
%% make_nfa(RegExpActions) -> {ok,{NFA,StartState}} | {error,E}.
577
%% Build a complete nfa from a list of {RegExp,Action}. The NFA field
578
%% accept has values {yes,Action}|no. The NFA is a list of states.
580
make_dfa(REAs, St) ->
581
{NFA,NF} = build_combined_nfa(REAs),
582
verbose_print(St, "NFA contains ~w states, ", [tuple_size(NFA)]),
583
{DFA0,DF0} = build_dfa(NFA, NF),
584
verbose_print(St, "DFA contains ~w states, ", [length(DFA0)]),
585
{DFA,DF} = minimise_dfa(DFA0, DF0),
586
verbose_print(St, "minimised to ~w states.~n", [length(DFA)]),
587
%%io:fwrite("~p\n", [{NF,NFA}]),
588
%%io:fwrite("~p\n", [{DF0,DFA0}]),
589
%%io:fwrite("~p\n", [{DF,DFA}]),
592
%% build_combined_nfa(RegExpActionList) -> {NFA,FirstState}.
593
%% Build the combined NFA using Thompson's construction straight out
594
%% of the book. Build the separate NFAs in the same order as the
595
%% rules so that the accepting have ascending states have ascending
596
%% state numbers. Start numbering the states from 1 as we put the
597
%% states in a tuple with the state number as the index.
599
%% The edges from a state are a list of {CharRange,State} | {epsilon,State}.
601
build_combined_nfa(REAs) ->
602
{NFA0,Firsts,Free} = build_nfa_list(REAs, [], [], 1),
603
F = #nfa_state{no=Free,edges=epsilon_trans(Firsts)},
604
{list_to_tuple(keysort(#nfa_state.no, [F|NFA0])),Free}.
606
build_nfa_list([{RE,Action}|REAs], NFA0, Firsts, Free0) ->
607
{NFA1,Free1,First} = build_nfa(RE, Free0, Action),
608
build_nfa_list(REAs, NFA1 ++ NFA0, [First|Firsts], Free1);
609
build_nfa_list([], NFA, Firsts, Free) ->
610
{NFA,reverse(Firsts),Free}.
612
epsilon_trans(Firsts) -> [ {epsilon,F} || F <- Firsts ].
614
%% build_nfa(RegExp, NextState, Action) -> {NFA,NextState,FirstState}.
615
%% When building the NFA states for a ??? we don't build the end
616
%% state, just allocate a State for it and return this state
617
%% number. This allows us to avoid building unnecessary states for
618
%% concatenation which would then have to be removed by overwriting
619
%% an existing state.
621
build_nfa(RE, N0, Action) ->
622
{NFA,N1,E} = build_nfa(RE, N0+1, N0, []),
623
{[#nfa_state{no=E,accept={accept,Action}}|NFA],N1,N0}.
625
%% build_nfa(RegExp, NextState, FirstState, NFA) -> {NFA,NextState,EndState}.
626
%% The NFA is a list of nfa_state is no predefined order. The state
627
%% number of the returned EndState is already allocated!
629
build_nfa({'or',RE1,RE2}, N0, F, NFA0) ->
630
{NFA1,N1,E1} = build_nfa(RE1, N0+1, N0, NFA0),
631
{NFA2,N2,E2} = build_nfa(RE2, N1+1, N1, NFA1),
633
{[#nfa_state{no=F,edges=[{epsilon,N0},{epsilon,N1}]},
634
#nfa_state{no=E1,edges=[{epsilon,E}]},
635
#nfa_state{no=E2,edges=[{epsilon,E}]}|NFA2],
637
build_nfa({concat,RE1, RE2}, N0, F, NFA0) ->
638
{NFA1,N1,E1} = build_nfa(RE1, N0, F, NFA0),
639
{NFA2,N2,E2} = build_nfa(RE2, N1, E1, NFA1),
641
build_nfa({kclosure,RE}, N0, F, NFA0) ->
642
{NFA1,N1,E1} = build_nfa(RE, N0+1, N0, NFA0),
644
{[#nfa_state{no=F,edges=[{epsilon,N0},{epsilon,E}]},
645
#nfa_state{no=E1,edges=[{epsilon,N0},{epsilon,E}]}|NFA1],
647
build_nfa({pclosure,RE}, N0, F, NFA0) ->
648
{NFA1,N1,E1} = build_nfa(RE, N0+1, N0, NFA0),
650
{[#nfa_state{no=F,edges=[{epsilon,N0}]},
651
#nfa_state{no=E1,edges=[{epsilon,N0},{epsilon,E}]}|NFA1],
653
build_nfa({optional,RE}, N0, F, NFA0) ->
654
{NFA1,N1,E1} = build_nfa(RE, N0+1, N0, NFA0),
656
{[#nfa_state{no=F,edges=[{epsilon,N0},{epsilon,E}]},
657
#nfa_state{no=E1,edges=[{epsilon,E}]}|NFA1],
659
build_nfa({char_class,Cc}, N, F, NFA) ->
660
{[#nfa_state{no=F,edges=[{char_class(Cc),N}]}|NFA],N+1,N};
661
build_nfa({comp_class,Cc}, N, F, NFA) ->
662
{[#nfa_state{no=F,edges=[{comp_class(Cc),N}]}|NFA],N+1,N};
663
build_nfa(C, N, F, NFA) when is_integer(C) ->
664
{[#nfa_state{no=F,edges=[{[{C,C}],N}]}|NFA],N+1,N}.
667
Crs = lists:foldl(fun ({C1,C2}, Set) -> add_element({C1,C2}, Set);
668
(C, Set) -> add_element({C,C}, Set)
669
end, ordsets:new(), Cc),
670
pack_crs(ordsets:to_list(Crs)).
672
pack_crs([{C1,C2}=Cr,{C3,C4}|Crs]) when C1 =< C3, C2 >= C4 ->
676
pack_crs([{C1,C2},{C3,C4}|Crs]) when C2 >= C3, C2 < C4 ->
679
pack_crs([{C1,C4}|Crs]);
680
pack_crs([{C1,C2},{C3,C4}|Crs]) when C2 + 1 == C3 ->
683
pack_crs([{C1,C4}|Crs]);
684
pack_crs([Cr|Crs]) -> [Cr|pack_crs(Crs)];
688
Crs = char_class(Cc),
689
%%io:fwrite("comp: ~p\n", [Crs]),
692
comp_crs([{C1,C2}|Crs], Last) ->
693
[{Last,C1-1}|comp_crs(Crs, C2+1)];
694
comp_crs([], Last) -> [{Last,maxchar}].
696
%% build_dfa(NFA, NfaFirstState) -> {DFA,DfaFirstState}.
697
%% Build a DFA from an NFA using "subset construction". The major
698
%% difference from the book is that we keep the marked and unmarked
699
%% DFA states in seperate lists. New DFA states are added to the
700
%% unmarked list and states are marked by moving them to the marked
701
%% list. We assume that the NFA accepting state numbers are in
702
%% ascending order for the rules and use ordsets to keep this order.
704
build_dfa(NFA, Nf) ->
705
D = #dfa_state{no=0,nfa=eclosure([Nf], NFA)},
706
{build_dfa([D], 1, [], NFA),0}.
708
%% build_dfa([UnMarked], NextState, [Marked], NFA) -> DFA.
709
%% Traverse the unmarked states. Temporarily add the current unmarked
710
%% state to the marked list before calculating translation, this is
711
%% to avoid adding too many duplicate states. Add it properly to the
712
%% marked list afterwards with correct translations.
714
build_dfa([U|Us0], N0, Ms, NFA) ->
715
{Ts,Us1,N1} = build_dfa(U#dfa_state.nfa, Us0, N0, [], [U|Ms], NFA),
716
M = U#dfa_state{trans=Ts,accept=accept(U#dfa_state.nfa, NFA)},
717
build_dfa(Us1, N1, [M|Ms], NFA);
718
build_dfa([], _, Ms, _) -> Ms.
720
%% build_dfa([NfaState], [Unmarked], NextState, [Transition], [Marked], NFA) ->
721
%% {Transitions,UnmarkedStates,NextState}.
722
%% Foreach NFA state set calculate the legal translations. N.B. must
723
%% search *BOTH* the unmarked and marked lists to check if DFA state
724
%% already exists. As the range of characters is potentially VERY
725
%% large we cannot explicitly test all characters. Instead we first
726
%% calculate the set of all disjoint character ranges which are
727
%% possible candidates to the set of NFA states. The transitions are
728
%% an orddict so we get the transition lists in ascending order.
730
build_dfa(Set, Us, N, Ts, Ms, NFA) ->
731
%% List of all transition sets.
732
Crs0 = [Cr || S <- Set,
733
{Crs,_St} <- (element(S, NFA))#nfa_state.edges,
734
Crs /= epsilon, % Not an epsilon transition
736
Crs1 = lists:usort(Crs0), % Must remove duplicates!
737
%% Build list of disjoint test ranges.
738
Test = disjoint_crs(Crs1),
739
%%io:fwrite("bd: ~p\n ~p\n ~p\n ~p\n", [Set,Crs0,Crs1,Test]),
740
build_dfa(Test, Set, Us, N, Ts, Ms, NFA).
742
%% disjoint_crs([CharRange]) -> [CharRange].
743
%% Take a sorted list of char ranges and make a sorted list of
744
%% disjoint char ranges. No new char range extends past an existing
747
disjoint_crs([{_C1,C2}=Cr1,{C3,_C4}=Cr2|Crs]) when C2 < C3 ->
750
[Cr1|disjoint_crs([Cr2|Crs])];
751
disjoint_crs([{C1,C2},{C3,C4}|Crs]) when C1 == C3 ->
754
[{C1,C2}|disjoint_crs(add_element({C2+1,C4}, Crs))];
755
disjoint_crs([{C1,C2},{C3,C4}|Crs]) when C1 < C3, C2 >= C3, C2 < C4 ->
758
[{C1,C3-1}|disjoint_crs(union([{C3,C2},{C2+1,C4}], Crs))];
759
disjoint_crs([{C1,C2},{C3,C4}|Crs]) when C1 < C3, C2 == C4 ->
762
[{C1,C3-1}|disjoint_crs(add_element({C3,C4}, Crs))];
763
disjoint_crs([{C1,C2},{C3,C4}|Crs]) when C1 < C3, C2 > C4 ->
766
[{C1,C3-1}|disjoint_crs(union([{C3,C4},{C4+1,C2}], Crs))];
767
disjoint_crs([Cr|Crs]) -> [Cr|disjoint_crs(Crs)];
768
disjoint_crs([]) -> [].
770
build_dfa([Cr|Crs], Set, Us, N, Ts, Ms, NFA) ->
771
case eclosure(move(Set, Cr, NFA), NFA) of
773
case dfa_state_exist(S, Us, Ms) of
775
build_dfa(Crs, Set, Us, N, store(Cr, T, Ts), Ms, NFA);
777
U = #dfa_state{no=N,nfa=S},
778
build_dfa(Crs, Set, [U|Us], N+1, store(Cr, N, Ts), Ms, NFA)
781
build_dfa(Crs, Set, Us, N, Ts, Ms, NFA)
783
build_dfa([], _, Us, N, Ts, _, _) ->
786
%% dfa_state_exist(Set, Unmarked, Marked) -> {yes,State} | no.
788
dfa_state_exist(S, Us, Ms) ->
789
case keysearch(S, #dfa_state.nfa, Us) of
790
{value,#dfa_state{no=T}} -> {yes,T};
792
case keysearch(S, #dfa_state.nfa, Ms) of
793
{value,#dfa_state{no=T}} -> {yes,T};
798
%% eclosure([State], NFA) -> [State].
799
%% move([State], Char, NFA) -> [State].
800
%% These are straight out of the book. As eclosure uses ordsets then
801
%% the generated state sets are in ascending order.
803
eclosure(Sts, NFA) -> eclosure(Sts, NFA, []).
805
eclosure([St|Sts], NFA, Ec) ->
806
#nfa_state{edges=Es} = element(St, NFA),
807
eclosure([ N || {epsilon,N} <- Es,
808
not is_element(N, Ec) ] ++ Sts,
809
NFA, add_element(St, Ec));
810
eclosure([], _, Ec) -> Ec.
812
move(Sts, Cr, NFA) ->
813
%% io:fwrite("move1: ~p\n", [{Sts,Cr}]),
815
{Crs,St} <- (element(N, NFA))#nfa_state.edges,
816
Crs /= epsilon, % Not an epsilon transition
819
in_crs({C1,C2}, [{C3,C4}|_Crs]) when C1 >= C3, C2 =< C4 -> true;
820
in_crs(Cr, [Cr|_Crs]) -> true; % Catch bos and eos.
821
in_crs(Cr, [_|Crs]) -> in_crs(Cr, Crs);
822
in_crs(_Cr, []) -> false.
824
%% accept([State], NFA) -> {accept,A} | noaccept.
825
%% Scan down the state list until we find an accepting state.
827
accept([St|Sts], NFA) ->
828
case element(St, NFA) of
829
#nfa_state{accept={accept,A}} -> {accept,A};
830
#nfa_state{accept=noaccept} -> accept(Sts, NFA)
832
accept([], _) -> noaccept.
834
%% minimise_dfa(DFA, DfaFirst) -> {DFA,DfaFirst}.
835
%% Minimise the DFA by removing equivalent states. We consider a
836
%% state if both the transitions and the their accept state is the
837
%% same. First repeatedly run throught the DFA state list removing
838
%% equivalent states and updating remaining transitions with
839
%% remaining equivalent state numbers. When no more reductions are
840
%% possible then pack the remaining state numbers to get consecutive
843
minimise_dfa(DFA0, Df0) ->
844
case min_dfa(DFA0) of
845
{DFA1,[]} -> % No reduction!
846
{DFA2,Rs} = pack_dfa(DFA1),
847
{min_update(DFA2, Rs),min_use(Df0, Rs)};
849
minimise_dfa(min_update(DFA1, Rs), min_use(Df0, Rs))
852
min_dfa(DFA) -> min_dfa(DFA, [], []).
854
min_dfa([D|DFA0], Rs0, MDFA) ->
855
{DFA1,Rs1} = min_delete(DFA0, D#dfa_state.trans, D#dfa_state.accept,
856
D#dfa_state.no, Rs0, []),
857
min_dfa(DFA1, Rs1, [D|MDFA]);
858
min_dfa([], Rs, MDFA) -> {MDFA,Rs}.
860
%% min_delete(States, Trans, Action, NewN, Rs, MiniDFA) -> {MiniDFA,Rs}.
861
%% Delete all states with same transactions and action. Return
862
%% rewrites and minimised DFA with no duplicate states.
864
min_delete([#dfa_state{no=N,trans=T,accept=A}|DFA], T, A, NewN, Rs, MDFA) ->
865
min_delete(DFA, T, A, NewN, [{N,NewN}|Rs], MDFA);
866
min_delete([D|DFA], T, A, NewN, Rs, MDFA) ->
867
min_delete(DFA, T, A, NewN, Rs, [D|MDFA]);
868
min_delete([], _, _, _, Rs, MDFA) -> {MDFA,Rs}.
870
min_update(DFA, Rs) ->
871
[ D#dfa_state{trans=min_update_trans(D#dfa_state.trans, Rs)} || D <- DFA ].
873
min_update_trans(Tr, Rs) ->
874
[ {C,min_use(S, Rs)} || {C,S} <- Tr ].
876
min_use(Old, [{Old,New}|_]) -> New;
877
min_use(Old, [_|Reds]) -> min_use(Old, Reds);
878
min_use(Old, []) -> Old.
880
pack_dfa(DFA) -> pack_dfa(DFA, 0, [], []).
882
pack_dfa([D|DFA], NewN, Rs, PDFA) ->
883
pack_dfa(DFA, NewN+1,
884
[{D#dfa_state.no,NewN}|Rs], [D#dfa_state{no=NewN}|PDFA]);
885
pack_dfa([], _, Rs, PDFA) -> {PDFA,Rs}.
887
%% The main output is the yystate function which is built from the
888
%% DFA. It has the spec:
890
%% yystate() -> InitialState.
891
%% yystate(State, InChars, Line, CurrTokLen, AcceptAction, AcceptLen) ->
892
%% {Action, AcceptLength, RestChars, Line} | Accepting end state
893
%% {Action, AcceptLength, RestChars, Line, State} | Accepting state
894
%% {reject, AcceptLength, CurrTokLen, RestChars, Line, State} |
895
%% {Action, AcceptLength, CurrTokLen, RestChars, Line, State}.
897
%% The return CurrTokLen is always the current number of characters
898
%% scanned in the current token. The returns have the follwoing
900
%% {Action, AcceptLength, RestChars, Line} -
901
%% The scanner has reached an accepting end-state, for example after
902
%% a regexp "abc". Action is the action number and AcceptLength is
903
%% the length of the matching token.
905
%% {Action, AcceptLength, RestChars, Line, State} -
906
%% The scanner has reached an accepting transition state, for example
907
%% after c in regexp "abc(xyz)?", continuation depends on
908
%% RestChars. If RestChars == [] (no more current characters) then we
909
%% need to get more characters to see if it is an end-state,
910
%% otherwise (eof or chars) then we have not found continuing
911
%% characters and it is an end state.
913
%% {reject, AcceptLength, CurrTokLen, RestChars, Line, State} -
914
%% {Action, AcceptLength, CurrTokLen, RestChars, Line, State} -
915
%% The scanner has reached a non-accepting transistion state. If
916
%% RestChars == [] we need to get more characters to continue.
917
%% Otherwise if 'reject' then no accepting state has been reached it
918
%% is an error. If we have an Action and AcceptLength then these are
919
%% the last accept state, use them and continue from there.
921
%% out_file(LeexState, DFA, DfaStart, [Action], Code) -> ok | error.
922
%% Generate an output .erl file from the include file, the DFA and
923
%% the code for the actions.
925
out_file(St0, DFA, DF, Actions, Code) ->
926
verbose_print(St0, "Writing file ~s, ", [St0#leex.efile]),
927
case open_inc_file(St0) of
930
case file:open(St0#leex.efile, [write]) of
933
output_file_directive(Ofile, St0#leex.ifile, 0),
934
out_file(Ifile, Ofile, St0, DFA, DF, Actions,
936
verbose_print(St0, "ok~n", []),
938
after file:close(Ofile)
941
verbose_print(St0, "not ok~n", []),
942
add_error({none,leex,{file_error,Error}}, St0)
944
after file:close(Ifile)
946
{{error,Error},Ifile} ->
947
add_error(Ifile, {none,leex,{file_error,Error}}, St0)
950
open_inc_file(State) ->
951
Ifile = State#leex.ifile,
952
case file:open(Ifile, [read]) of
954
Error -> {Error,Ifile}
958
Incdir = filename:join(code:lib_dir(parsetools), "include"),
959
filename:join(Incdir, ?LEEXINC);
960
inc_file_name(Filename) ->
963
%% out_file(IncFile, OutFile, State, DFA, DfaStart, Actions, Code, Line) -> ok
964
%% Copy the include file line by line substituting special lines with
965
%% generated code. We cheat by only looking at the first 5
968
out_file(Ifile, Ofile, St, DFA, DF, Actions, Code, L) ->
969
case io:get_line(Ifile, leex) of
970
eof -> output_file_directive(Ofile, St#leex.ifile, L);
972
case substr(Line, 1, 5) of
973
"##mod" -> out_module(Ofile, St);
974
"##cod" -> out_erlang_code(Ofile, St, Code, L);
975
"##dfa" -> out_dfa(Ofile, St, DFA, Code, DF, L);
976
"##act" -> out_actions(Ofile, St#leex.xfile, Actions);
977
_ -> io:put_chars(Ofile, Line)
979
out_file(Ifile, Ofile, St, DFA, DF, Actions, Code, L+1)
982
out_module(File, St) ->
983
io:fwrite(File, "-module(~w).\n", [St#leex.module]).
985
out_erlang_code(File, St, Code, L) ->
986
{CodeL,CodePos,_NCodeLines} = Code,
987
output_file_directive(File, St#leex.xfile, CodeL),
988
{ok,Xfile} = file:open(St#leex.xfile, [read]),
990
{ok,_} = file:position(Xfile, CodePos),
991
{ok,_} = file:copy(Xfile, File)
996
output_file_directive(File, St#leex.ifile, L).
998
out_dfa(File, St, DFA, Code, DF, L) ->
999
{_CodeL,_CodePos,NCodeLines} = Code,
1000
%% Three file attributes before this one...
1001
output_file_directive(File, St#leex.efile, L+(NCodeLines-1)+3),
1002
io:fwrite(File, "yystate() -> ~w.~n~n", [DF]),
1003
foreach(fun (S) -> out_trans(File, S) end, DFA),
1004
io:fwrite(File, "yystate(S, Ics, Line, Tlen, Action, Alen) ->~n", []),
1005
io:fwrite(File, " {Action,Alen,Tlen,Ics,Line,S}.~n", []).
1007
out_trans(File, #dfa_state{no=N,trans=[],accept={accept,A}}) ->
1008
%% Accepting end state, guaranteed done.
1009
io:fwrite(File, "yystate(~w, Ics, Line, Tlen, _, _) ->~n", [N]),
1010
io:fwrite(File, " {~w,Tlen,Ics,Line};~n", [A]);
1011
out_trans(File, #dfa_state{no=N,trans=Tr,accept={accept,A}}) ->
1012
%% Accepting state, but there maybe more.
1013
foreach(fun (T) -> out_accept_tran(File, N, A, T) end, pack_trans(Tr)),
1014
io:fwrite(File, "yystate(~w, Ics, Line, Tlen, _, _) ->~n", [N]),
1015
io:fwrite(File, " {~w,Tlen,Ics,Line,~w};~n", [A,N]);
1016
out_trans(File, #dfa_state{no=N,trans=Tr,accept=noaccept}) ->
1017
%% Non-accepting transition state.
1018
foreach(fun (T) -> out_noaccept_tran(File, N, T) end, pack_trans(Tr)),
1019
io:fwrite(File, "yystate(~w, Ics, Line, Tlen, Action, Alen) ->~n", [N]),
1020
io:fwrite(File, " {Action,Alen,Tlen,Ics,Line,~w};~n", [N]).
1022
out_accept_tran(File, N, A, {{Cf,maxchar},S}) ->
1023
out_accept_head_max(File, N, Cf),
1024
out_accept_body(File, S, "Line", A);
1025
out_accept_tran(File, N, A, {{Cf,Cl},S}) ->
1026
out_accept_head_range(File, N, Cf, Cl),
1027
out_accept_body(File, S, "Line", A);
1028
out_accept_tran(File, N, A, {$\n,S}) ->
1029
out_accept_head_1(File, N, $\n),
1030
out_accept_body(File, S, "Line+1", A);
1031
out_accept_tran(File, N, A, {C,S}) ->
1032
out_accept_head_1(File, N, C),
1033
out_accept_body(File, S, "Line", A).
1035
out_accept_head_1(File, State, Char) ->
1036
out_head_1(File, State, Char, "_", "_").
1038
out_accept_head_max(File, State, Min) ->
1039
out_head_max(File, State, Min, "_", "_").
1041
out_accept_head_range(File, State, Min, Max) ->
1042
out_head_range(File, State, Min, Max, "_", "_").
1044
out_accept_body(File, Next, Line, Action) ->
1045
out_body(File, Next, Line, io_lib:write(Action), "Tlen").
1047
out_noaccept_tran(File, N, {{Cf,maxchar},S}) ->
1048
out_noaccept_head_max(File, N, Cf),
1049
out_noaccept_body(File, S, "Line");
1050
out_noaccept_tran(File, N, {{Cf,Cl},S}) ->
1051
out_noaccept_head_range(File, N, Cf, Cl),
1052
out_noaccept_body(File, S, "Line");
1053
out_noaccept_tran(File, N, {$\n,S}) ->
1054
out_noaccept_head_1(File, N, $\n),
1055
out_noaccept_body(File, S, "Line+1");
1056
out_noaccept_tran(File, N, {C,S}) ->
1057
out_noaccept_head_1(File, N, C),
1058
out_noaccept_body(File, S, "Line").
1060
out_noaccept_head_1(File, State, Char) ->
1061
out_head_1(File, State, Char, "Action", "Alen").
1063
out_noaccept_head_max(File, State, Min) ->
1064
out_head_max(File, State, Min, "Action", "Alen").
1066
out_noaccept_head_range(File, State, Min, Max) ->
1067
out_head_range(File, State, Min, Max, "Action", "Alen").
1069
out_noaccept_body(File, Next, Line) ->
1070
out_body(File, Next, Line, "Action", "Alen").
1072
out_head_1(File, State, Char, Action, Alen) ->
1073
io:fwrite(File, "yystate(~w, [~w|Ics], Line, Tlen, ~s, ~s) ->\n",
1074
[State,Char,Action,Alen]).
1076
out_head_max(File, State, Min, Action, Alen) ->
1077
io:fwrite(File, "yystate(~w, [C|Ics], Line, Tlen, ~s, ~s) when C >= ~w ->\n",
1078
[State,Action,Alen,Min]).
1080
out_head_range(File, State, Min, Max, Action, Alen) ->
1081
io:fwrite(File, "yystate(~w, [C|Ics], Line, Tlen, ~s, ~s) when C >= ~w, C =< ~w ->\n",
1082
[State,Action,Alen,Min,Max]).
1084
out_body(File, Next, Line, Action, Alen) ->
1085
io:fwrite(File, " yystate(~w, Ics, ~s, Tlen+1, ~s, ~s);\n",
1086
[Next,Line,Action,Alen]).
1088
%% pack_trans([{Crange,State}]) -> [{Crange,State}] when
1089
%% Crange = {Char,Char} | Char.
1090
%% Pack the translation table into something more suitable for
1091
%% generating code. We KNOW how the pattern matching compiler works
1092
%% so solitary characters are stored before ranges. We do this by
1093
%% prepending singletons to the front of the packed transitions and
1094
%% appending ranges to the back. This preserves the smallest to
1095
%% largest order of ranges. Newline characters, $\n, are always
1096
%% extracted and handled as singeltons.
1098
pack_trans(Trs) -> pack_trans(Trs, []).
1100
%% pack_trans(Trs) ->
1101
%% Trs1 = pack_trans(Trs, []),
1102
%% io:fwrite("tr:~p\n=> ~p\n", [Trs,Trs1]),
1105
pack_trans([{{C,C},S}|Trs], Pt) -> % Singletons to the head
1106
pack_trans(Trs, [{C,S}|Pt]);
1107
%% Special detection and handling of $\n.
1108
pack_trans([{{Cf,$\n},S}|Trs], Pt) ->
1109
pack_trans([{{Cf,$\n-1},S}|Trs], [{$\n,S}|Pt]);
1110
pack_trans([{{$\n,Cl},S}|Trs], Pt) ->
1111
pack_trans([{{$\n+1,Cl},S}|Trs], [{$\n,S}|Pt]);
1112
pack_trans([{{Cf,Cl},S}|Trs], Pt) when Cf < $\n, Cl > $\n ->
1113
pack_trans([{{Cf,$\n-1},S},{{$\n+1,Cl},S}|Trs], [{$\n,S}|Pt]);
1114
%% Small ranges become singletons.
1115
pack_trans([{{Cf,Cl},S}|Trs], Pt) when Cl == Cf + 1 ->
1116
pack_trans(Trs, [{Cf,S},{Cl,S}|Pt]);
1117
pack_trans([Tr|Trs], Pt) -> % The default uninteresting case
1118
pack_trans(Trs, Pt ++ [Tr]);
1119
pack_trans([], Pt) -> Pt.
1121
%% out_actions(File, XrlFile, ActionList) -> ok.
1122
%% Write out the action table.
1124
out_actions(File, XrlFile, As) ->
1125
As1 = prep_out_actions(As),
1126
foreach(fun (A) -> out_action(File, A) end, As1),
1127
io:fwrite(File, "yyaction(_, _, _, _) -> error.~n", []),
1128
foreach(fun (A) -> out_action_code(File, XrlFile, A) end, As1).
1130
prep_out_actions(As) ->
1131
map(fun ({A,empty_action}) ->
1133
({A,Code,TokenChars,TokenLen,TokenLine}) ->
1134
Vs = [{"TokenChars",TokenChars},
1135
{"TokenLen",TokenLen},
1136
{"TokenLine",TokenLine},
1137
{"YYtcs",TokenChars},
1138
{"TokenLen",TokenLen or TokenChars}],
1139
Vars = [if F -> S; true -> "_" end || {S,F} <- Vs],
1140
Name = list_to_atom(lists:concat([yy_,A,'_'])),
1141
[Chars,Len,Line,_,_] = Vars,
1142
Args = [V || V <- [Chars,Len,Line], V =/= "_"],
1143
ArgsChars = string:join(Args, ", "),
1144
{A,Code,Vars,Name,Args,ArgsChars}
1147
out_action(File, {A,empty_action}) ->
1148
io:fwrite(File, "yyaction(~w, _, _, _) -> skip_token;~n", [A]);
1149
out_action(File, {A,_Code,Vars,Name,_Args,ArgsChars}) ->
1150
[_,_,Line,Tcs,Len] = Vars,
1151
io:fwrite(File, "yyaction(~w, ~s, ~s, ~s) ->~n", [A,Len,Tcs,Line]),
1154
io:fwrite(File, " TokenChars = yypre(YYtcs, TokenLen),~n", []);
1157
io:fwrite(File, " ~s(~s);~n", [Name, ArgsChars]).
1159
out_action_code(_File, _XrlFile, {_A,empty_action}) ->
1161
out_action_code(File, XrlFile, {_A,Code,_Vars,Name,Args,ArgsChars}) ->
1162
%% Should set the file to the .erl file, but instead assumes that
1163
%% ?LEEXINC is syntactically correct.
1164
io:fwrite(File, "\n-compile({inline,~w/~w}).\n", [Name, length(Args)]),
1165
{line, L} = erl_scan:token_info(hd(Code), line),
1166
output_file_directive(File, XrlFile, L-2),
1167
io:fwrite(File, "~s(~s) ->~n", [Name, ArgsChars]),
1168
io:fwrite(File, " ~s.\n", [pp_tokens(Code, L)]).
1170
%% Keeps the line breaks of the original code.
1171
pp_tokens(Tokens, Line0) ->
1172
lists:concat(pp_tokens1(Tokens, Line0, [])).
1174
pp_tokens1([], _Line0, _T0) ->
1176
pp_tokens1([T | Ts], Line0, T0) ->
1177
{line, Line} = erl_scan:token_info(T, line),
1178
[pp_sep(Line, Line0, T0), pp_symbol(T) | pp_tokens1(Ts, Line, T)].
1180
pp_symbol({var,_,Var}) -> Var;
1181
pp_symbol({_,_,Symbol}) -> io_lib:fwrite("~p", [Symbol]);
1182
pp_symbol({dot, _}) -> "";
1183
pp_symbol({Symbol, _}) -> Symbol.
1185
pp_sep(Line, Line0, T0) when Line > Line0 ->
1186
["\n " | pp_sep(Line - 1, Line0, T0)];
1187
pp_sep(_Line, _Line0, {'.',_}) ->
1189
pp_sep(_Line, _Line0, _T0) ->
1192
%% out_dfa_graph(LeexState, DFA, DfaStart) -> ok | error.
1193
%% Writes the DFA to a .dot file in DOT-format which can be viewed
1196
out_dfa_graph(St, DFA, DF) ->
1197
verbose_print(St, "Writing DFA to file ~s, ", [St#leex.gfile]),
1198
case file:open(St#leex.gfile, [write]) of
1201
io:fwrite(Gfile, "digraph DFA {~n", []),
1202
out_dfa_states(Gfile, DFA, DF),
1203
out_dfa_edges(Gfile, DFA),
1204
io:fwrite(Gfile, "}~n", []),
1205
verbose_print(St, "ok~n", []),
1207
after file:close(Gfile)
1210
verbose_print(St, "not ok~n", []),
1211
add_error({none,leex,{file_error,Error}}, St)
1214
out_dfa_states(File, DFA, DF) ->
1215
foreach(fun (S) -> out_dfa_state(File, DF, S) end, DFA),
1216
io:fwrite(File, "~n", []).
1218
out_dfa_state(File, DF, #dfa_state{no=DF, accept={accept,_}}) ->
1219
io:fwrite(File, " ~b [shape=doublecircle color=green];~n", [DF]);
1220
out_dfa_state(File, DF, #dfa_state{no=DF, accept=noaccept}) ->
1221
io:fwrite(File, " ~b [shape=circle color=green];~n", [DF]);
1222
out_dfa_state(File, _, #dfa_state{no=S, accept={accept,_}}) ->
1223
io:fwrite(File, " ~b [shape=doublecircle];~n", [S]);
1224
out_dfa_state(File, _, #dfa_state{no=S, accept=noaccept}) ->
1225
io:fwrite(File, " ~b [shape=circle];~n", [S]).
1227
out_dfa_edges(File, DFA) ->
1228
foreach(fun (#dfa_state{no=S,trans=Trans}) ->
1229
Pt = pack_trans(Trans),
1230
Tdict = foldl(fun ({Cr,T}, D) ->
1231
orddict:append(T, Cr, D)
1232
end, orddict:new(), Pt),
1234
Crs = orddict:fetch(T, Tdict),
1235
Edgelab = dfa_edgelabel(Crs),
1236
io:fwrite(File, " ~b -> ~b [label=\"~s\"];~n",
1238
end, sort(orddict:fetch_keys(Tdict)))
1241
dfa_edgelabel([C]) when is_integer(C) -> quote(C);
1242
dfa_edgelabel(Cranges) ->
1243
"[" ++ map(fun ({A,B}) -> [quote(A), "-", quote(B)];
1245
end, Cranges) ++ "]".
1247
output_file_directive(File, Filename, Line) ->
1248
io:fwrite(File, <<"-file(~s, ~w).\n">>,
1249
[format_filename(Filename), Line]).
1251
format_filename(Filename) ->
1252
io_lib:write_string(filename:flatten(Filename)).
1260
quote($\s) -> "\\\\s";
1261
quote($\") -> "\\\"";
1262
quote($\b) -> "\\\\b";
1263
quote($\f) -> "\\\\f";
1264
quote($\n) -> "\\\\n";
1265
quote($\r) -> "\\\\r";
1266
quote($\t) -> "\\\\t";
1267
quote($\e) -> "\\\\e";
1268
quote($\v) -> "\\\\v";
1269
quote($\d) -> "\\\\d";
1270
quote($\\) -> "\\\\";
1271
quote(C) when 32 =< C, C =< 126 -> [C];
1272
quote(C) when 0 =< C, C =< 255 ->
1273
<<T2:2,T1:3,T0:3>> = <<C>>,
1274
["\\\\", $0+T2, $0+T1, $0+T0];