1
%% ``The contents of this file are subject to the Erlang Public License,
2
%% Version 1.1, (the "License"); you may not use this file except in
3
%% compliance with the License. You should have received a copy of the
4
%% Erlang Public License along with this software. If not, it can be
5
%% retrieved via the world wide web at http://www.erlang.org/.
7
%% Software distributed under the License is distributed on an "AS IS"
8
%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
9
%% the License for the specific language governing rights and limitations
12
%% The Initial Developer of the Original Code is Ericsson Utvecklings AB.
13
%% Portions created by Ericsson are Copyright 1999, Ericsson Utvecklings
14
%% AB. All Rights Reserved.''
16
%% $Id: asn1ct_tok.erl,v 1.1 2008/12/17 09:53:30 mikpe Exp $
20
%% Tokenize ASN.1 code (input to parser generated with yecc)
22
-export([get_name/2,tokenise/2, file/1]).
26
case file:open(File, [read]) of
28
{error,{File,file:format_error(Reason)}};
36
process(Stream,Lno,R) ->
37
process(io:get_line(Stream, ''), Stream,Lno+1,R).
39
process(eof, Stream,Lno,R) ->
41
lists:flatten(lists:reverse([{'$end',Lno}|R]));
44
process(L, Stream,Lno,R) when list(L) ->
45
%%io:format('read:~s',[L]),
46
case catch tokenise(L,Lno) of
48
io:format("Tokeniser error on line: ~w ~w~n",[Lno,Reason]),
51
%%io:format('toks:~w~n',[T]),
52
process(Stream,Lno,[T|R])
56
tokenise([H|T],Lno) when $a =< H , H =< $z ->
57
{X, T1} = get_name(T, [H]),
58
[{identifier,Lno, list_to_atom(X)}|tokenise(T1,Lno)];
60
tokenise([$&,H|T],Lno) when $A =< H , H =< $Z ->
61
{Y, T1} = get_name(T, [H]),
63
[{typefieldreference, Lno, X} | tokenise(T1, Lno)];
65
tokenise([$&,H|T],Lno) when $a =< H , H =< $z ->
66
{Y, T1} = get_name(T, [H]),
68
[{valuefieldreference, Lno, X} | tokenise(T1, Lno)];
70
tokenise([H|T],Lno) when $A =< H , H =< $Z ->
71
{Y, T1} = get_name(T, [H]),
73
case reserved_word(X) of
75
[{X,Lno}|tokenise(T1,Lno)];
77
[{typereference,Lno,X}|tokenise(T1,Lno)];
79
[{restrictedcharacterstringtype,Lno,X}|tokenise(T1,Lno)]
82
tokenise([$-,H|T],Lno) when $0 =< H , H =< $9 ->
83
{X, T1} = get_number(T, [H]),
84
[{number,Lno,-1 * list_to_integer(X)}|tokenise(T1,Lno)];
86
tokenise([H|T],Lno) when $0 =< H , H =< $9 ->
87
{X, T1} = get_number(T, [H]),
88
[{number,Lno,list_to_integer(X)}|tokenise(T1,Lno)];
90
tokenise([$-,$-|T],Lno) ->
91
tokenise(skip_comment(T),Lno);
92
tokenise([$:,$:,$=|T],Lno) ->
93
[{'::=',Lno}|tokenise(T,Lno)];
95
tokenise([$'|T],Lno) ->
96
case catch collect_quoted(T,Lno,[]) of
98
throw({'ERR','bad_quote'});
100
[Thing|tokenise(T1,Lno)]
103
tokenise([$"|T],Lno) ->
104
collect_string(T,Lno);
106
tokenise([${|T],Lno) ->
107
[{'{',Lno}|tokenise(T,Lno)];
109
tokenise([$}|T],Lno) ->
110
[{'}',Lno}|tokenise(T,Lno)];
112
tokenise([$]|T],Lno) ->
113
[{']',Lno}|tokenise(T,Lno)];
115
tokenise([$[|T],Lno) ->
116
[{'[',Lno}|tokenise(T,Lno)];
118
tokenise([$,|T],Lno) ->
119
[{',',Lno}|tokenise(T,Lno)];
121
tokenise([$(|T],Lno) ->
122
[{'(',Lno}|tokenise(T,Lno)];
123
tokenise([$)|T],Lno) ->
124
[{')',Lno}|tokenise(T,Lno)];
126
tokenise([$.,$.,$.|T],Lno) ->
127
[{'...',Lno}|tokenise(T,Lno)];
129
tokenise([$.,$.|T],Lno) ->
130
[{'..',Lno}|tokenise(T,Lno)];
132
tokenise([$.|T],Lno) ->
133
[{'.',Lno}|tokenise(T,Lno)];
134
tokenise([$^|T],Lno) ->
135
[{'^',Lno}|tokenise(T,Lno)];
136
tokenise([$!|T],Lno) ->
137
[{'!',Lno}|tokenise(T,Lno)];
138
tokenise([$||T],Lno) ->
139
[{'|',Lno}|tokenise(T,Lno)];
142
tokenise([H|T],Lno) ->
143
case white_space(H) of
147
[{list_to_atom([H]),Lno}|tokenise(T,Lno)]
153
collect_string(L,Lno) ->
154
collect_string(L,Lno,[]).
156
collect_string([],_,_) ->
157
throw({'ERR','bad_quote found eof'});
159
collect_string([H|T],Lno,Str) ->
162
[{cstring,1,lists:reverse(Str)}|tokenise(T,Lno)];
164
collect_string(T,Lno,[Ch|Str])
169
% <name> is letters digits hyphens
170
% hypen is not the last character. Hypen hyphen is NOT allowed
172
% <identifier> ::= <lowercase> <name>
174
get_name([$-,Char|T], L) ->
175
case isalnum(Char) of
177
get_name(T,[Char,$-|L]);
179
{lists:reverse(L),[$-,Char|T]}
181
get_name([$-|T], L) ->
182
{lists:reverse(L),[$-|T]};
183
get_name([Char|T], L) ->
184
case isalnum(Char) of
186
get_name(T,[Char|L]);
188
{lists:reverse(L),[Char|T]}
191
{lists:reverse(L), []}.
194
isalnum(H) when $A =< H , H =< $Z ->
196
isalnum(H) when $a =< H , H =< $z ->
198
isalnum(H) when $0 =< H , H =< $9 ->
203
isdigit(H) when $0 =< H , H =< $9 ->
208
white_space(9) -> true;
209
white_space(10) -> true;
210
white_space(13) -> true;
211
white_space(32) -> true;
212
white_space(_) -> false.
215
get_number([H|T], L) ->
218
get_number(T, [H|L]);
220
{lists:reverse(L), [H|T]}
223
{lists:reverse(L), []}.
227
skip_comment([$-,$-|T]) ->
229
skip_comment([_|T]) ->
232
collect_quoted([$',$B|T],Lno, L) ->
235
{{bstring,Lno, lists:reverse(L)}, T};
237
throw({'ERR',{invalid_binary_number, lists:reverse(L)}})
239
collect_quoted([$',$H|T],Lno, L) ->
242
{{hstring,Lno, lists:reverse(L)}, T};
244
throw({'ERR',{invalid_binary_number, lists:reverse(L)}})
246
collect_quoted([H|T], Lno, L) ->
247
collect_quoted(T, Lno,[H|L]);
248
collect_quoted([], _, _) -> % This should be allowed FIX later
249
throw({'ERR',{eol_in_token}}).
260
check_hex([H|T]) when $0 =< H , H =< $9 ->
262
check_hex([H|T]) when $A =< H , H =< $F ->
270
%% reserved_word(A) -> true|false|rstrtype
272
%% returns true if A is a reserved ASN.1 word
273
%% returns false if A is not a reserved word
274
%% returns rstrtype if A is a reserved word in the group
275
%% RestrictedCharacterStringType
276
reserved_word('ABSENT') -> true;
277
%reserved_word('ABSTRACT-SYNTAX') -> true; % impl as predef item
278
reserved_word('ALL') -> true;
279
reserved_word('ANY') -> true;
280
reserved_word('APPLICATION') -> true;
281
reserved_word('AUTOMATIC') -> true;
282
reserved_word('BEGIN') -> true;
283
reserved_word('BIT') -> true;
284
reserved_word('BMPString') -> rstrtype;
285
reserved_word('BOOLEAN') -> true;
286
reserved_word('BY') -> true;
287
reserved_word('CHARACTER') -> true;
288
reserved_word('CHOICE') -> true;
289
reserved_word('CLASS') -> true;
290
reserved_word('COMPONENT') -> true;
291
reserved_word('COMPONENTS') -> true;
292
reserved_word('CONSTRAINED') -> true;
293
reserved_word('DEFAULT') -> true;
294
reserved_word('DEFINED') -> true;
295
reserved_word('DEFINITIONS') -> true;
296
reserved_word('EMBEDDED') -> true;
297
reserved_word('END') -> true;
298
reserved_word('ENUMERATED') -> true;
299
reserved_word('EXCEPT') -> true;
300
reserved_word('EXPLICIT') -> true;
301
reserved_word('EXPORTS') -> true;
302
reserved_word('EXTERNAL') -> true;
303
reserved_word('FALSE') -> true;
304
reserved_word('FROM') -> true;
305
reserved_word('GeneralizedTime') -> true;
306
reserved_word('GeneralString') -> rstrtype;
307
reserved_word('GraphicString') -> rstrtype;
308
reserved_word('IA5String') -> rstrtype;
309
% reserved_word('TYPE-IDENTIFIER') -> true; % impl as predef item
310
reserved_word('IDENTIFIER') -> true;
311
reserved_word('IMPLICIT') -> true;
312
reserved_word('IMPORTS') -> true;
313
reserved_word('INCLUDES') -> true;
314
reserved_word('INSTANCE') -> true;
315
reserved_word('INTEGER') -> true;
316
reserved_word('INTERSECTION') -> true;
317
reserved_word('ISO646String') -> rstrtype;
318
reserved_word('MAX') -> true;
319
reserved_word('MIN') -> true;
320
reserved_word('MINUS-INFINITY') -> true;
321
reserved_word('NULL') -> true;
322
reserved_word('NumericString') -> rstrtype;
323
reserved_word('OBJECT') -> true;
324
reserved_word('ObjectDescriptor') -> true;
325
reserved_word('OCTET') -> true;
326
reserved_word('OF') -> true;
327
reserved_word('OPTIONAL') -> true;
328
reserved_word('PDV') -> true;
329
reserved_word('PLUS-INFINITY') -> true;
330
reserved_word('PRESENT') -> true;
331
reserved_word('PrintableString') -> rstrtype;
332
reserved_word('PRIVATE') -> true;
333
reserved_word('REAL') -> true;
334
reserved_word('SEQUENCE') -> true;
335
reserved_word('SET') -> true;
336
reserved_word('SIZE') -> true;
337
reserved_word('STRING') -> true;
338
reserved_word('SYNTAX') -> true;
339
reserved_word('T61String') -> rstrtype;
340
reserved_word('TAGS') -> true;
341
reserved_word('TeletexString') -> rstrtype;
342
reserved_word('TRUE') -> true;
343
reserved_word('UNION') -> true;
344
reserved_word('UNIQUE') -> true;
345
reserved_word('UNIVERSAL') -> true;
346
reserved_word('UniversalString') -> rstrtype;
347
reserved_word('UTCTime') -> true;
348
reserved_word('VideotexString') -> rstrtype;
349
reserved_word('VisibleString') -> rstrtype;
350
reserved_word('WITH') -> true;
351
reserved_word(_) -> false.