1
-------------------------------------------------------------------------------
3
-- This file is part of AdaBrowse.
5
-- <STRONG>Copyright (c) 2002 by Thomas Wolf.</STRONG>
7
-- AdaBrowse is free software; you can redistribute it and/or modify it
8
-- under the terms of the GNU General Public License as published by the
9
-- Free Software Foundation; either version 2, or (at your option) any
10
-- later version. AdaBrowse is distributed in the hope that it will be
11
-- useful, but <EM>without any warranty</EM>; without even the implied
12
-- warranty of <EM>merchantability or fitness for a particular purpose.</EM>
13
-- See the GNU General Public License for more details. You should have
14
-- received a copy of the GNU General Public License with this distribution,
15
-- see file "<A HREF="GPL.txt">GPL.txt</A>". If not, write to the Free
16
-- Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307,
21
-- Author:</STRONG><DD>
23
-- <ADDRESS><A HREF="mailto:twolf@acm.org">twolf@acm.org</A></ADDRESS></DL>
26
-- Purpose:</STRONG><DD>
27
-- Simplified Ada 95 parser. Parses the source until it finds the
28
-- name of the library unit declaration. Note: this parser (and its
29
-- scanner!) doesn't need to be hyper-fast, it'll only be used for
30
-- krunched file names, and then parse the file only up to the
36
-- 26-MAR-2002 TW Initial version.
37
-- 21-JUN-2002 TW Uses Util.Text now instead of Ada.Strings.Unbounded.
39
-------------------------------------------------------------------------------
43
with Ada.Strings.Maps;
46
with Util.Files.Text_IO;
48
with Util.Text.Internal;
50
pragma Elaborate_All (Util.Files.Text_IO);
51
pragma Elaborate_All (Util.Text);
53
package body AD.Parse is
55
package UT renames Util.Text;
57
----------------------------------------------------------------------------
58
-- Scanning routines. This is a very simple, line-based scanner. Not
59
-- particularly efficient, but does the job nicely.
65
Left_Paren_Token, Right_Paren_Token, Semicolon_Token, Period_Token,
66
With_Token, Use_Token, Pragma_Token, Type_Token, Package_Token,
67
Procedure_Token, Function_Token, Is_Token, New_Token, Return_Token,
68
Private_Token, Generic_Token, Name_Token, String_Token);
70
procedure Init (File_Name : in String);
74
function Current_Token return Token;
76
function Image return UT.Unbounded_String;
80
Scan_Error : exception;
84
pragma Inline (Current_Token);
88
package body Scanner is
92
F : Ada.Text_IO.File_Type;
94
function Ada_Skip_String
100
return Skip_String (S, Delim, Delim);
104
new Util.Files.Text_IO.Next_Line
105
(Line_Continuation => "",
106
Comment_Start => "--",
107
Delimiters => Ada.Strings.Maps.To_Set ('"'),
108
Strings => Ada_Skip_String);
109
-- Note: we only need to handle the double quote as a string delimiter,
110
-- for "--" can only occur in strings, but never in character literals.
111
-- Hence it isn't necessary to handle the single quote at all here.
113
Curr_Line : UT.Unbounded_String;
114
Curr : UT.String_Access;
117
Curr_Token : Token := Other_Token;
118
Token_Image : UT.Unbounded_String;
119
Token_Ptr : UT.String_Access;
120
-- Set for 'Name_Token' and 'String_Token'; in the latter case, it
121
-- also contains the delimiting double quotes.
126
UT.Set (Curr_Line, Get_Line (F));
127
Curr := UT.Internal.Get_Ptr (Curr_Line);
129
if Curr_Idx > Curr'Last then
138
case Token_Ptr (Token_Ptr'First) is
140
if To_Lower (Token_Ptr.all) = "function" then
141
return Function_Token;
144
if To_Lower (Token_Ptr.all) = "generic" then
145
return Generic_Token;
148
if To_Lower (Token_Ptr.all) = "is" then
152
if To_Lower (Token_Ptr.all) = "new" then
157
S : constant String := To_Lower (Token_Ptr.all);
159
if S = "package" then
160
return Package_Token;
161
elsif S = "pragma" then
163
elsif S = "private" then
164
return Private_Token;
165
elsif S = "procedure" then
166
return Procedure_Token;
170
if To_Lower (Token_Ptr.all) = "return" then
174
if To_Lower (Token_Ptr.all) = "type" then
178
if To_Lower (Token_Ptr.all) = "use" then
182
if To_Lower (Token_Ptr.all) = "with" then
191
Numeral : constant Ada.Strings.Maps.Character_Set :=
192
Ada.Strings.Maps.To_Set ("0123456789_");
194
Based_Numeral : constant Ada.Strings.Maps.Character_Set :=
195
Ada.Strings.Maps.To_Set ("0123456789_ABCDEFabcdef");
200
if Curr_Idx > Curr'Last then Load_Line; end if;
202
Ch : Character := Curr (Curr_Idx);
204
while Is_Blank (Ch) loop
205
Curr_Idx := Curr_Idx + 1;
206
if Curr_Idx > Curr'Last then
207
Load_Line; Curr_Idx := 1;
209
Ch := Curr (Curr_Idx);
213
Curr_Token := Left_Paren_Token;
216
Curr_Token := Right_Paren_Token;
219
Curr_Token := Semicolon_Token;
222
Curr_Token := Period_Token;
224
when 'A' .. 'Z' | 'a' .. 'z' =>
225
-- Parse a name: any sequence of characters, digits, and
228
Stop_Idx : constant Natural :=
229
Identifier (Curr (Curr_Idx .. Curr'Last));
231
UT.Set (Token_Image, Curr (Curr_Idx .. Stop_Idx));
232
Token_Ptr := UT.Internal.Get_Ptr (Token_Image);
233
Curr_Idx := Stop_Idx;
235
Curr_Token := Find_Token;
238
if Curr_Idx + 2 <= Curr'Last and then
239
Curr (Curr_Idx + 2) = '''
241
Curr_Idx := Curr_Idx + 2;
243
Curr_Token := Other_Token;
248
Stop_Idx : constant Natural :=
249
Ada_Skip_String (Curr (Curr_Idx .. Curr'Last), '"');
254
UT.Set (Token_Image, Curr (Curr_Idx .. Stop_Idx));
255
Token_Ptr := UT.Internal.Get_Ptr (Token_Image);
256
Curr_Idx := Stop_Idx;
257
Curr_Token := String_Token;
261
-- Skip a number. Note: use a simplified syntax!
263
Stop_Idx : Natural := Curr_Idx;
265
while Stop_Idx <= Curr'Last and then
266
Is_In (Numeral, Curr (Stop_Idx))
268
Stop_Idx := Stop_Idx + 1;
270
if Stop_Idx <= Curr'Last then
271
if Curr (Stop_Idx) = '#' then
272
Stop_Idx := Stop_Idx + 1;
273
-- Actually, there must be at least one digit, and
274
-- at most one period.
275
while Stop_Idx <= Curr'Last and then
276
Is_In (Based_Numeral, Curr (Stop_Idx))
278
Stop_Idx := Stop_Idx + 1;
280
if Stop_Idx <= Curr'Last and then
281
Curr (Stop_Idx) = '#'
283
Stop_Idx := Stop_Idx + 1;
287
elsif Curr (Stop_Idx) = '.' then
288
Stop_Idx := Stop_Idx + 1;
289
-- Actually, there must be at least one digit.
290
while Stop_Idx <= Curr'Last and then
291
Is_In (Numeral, Curr (Stop_Idx))
293
Stop_Idx := Stop_Idx + 1;
295
end if; -- Fraction or Based
297
if Stop_Idx <= Curr'Last and then
298
Curr (Stop_Idx) = 'E'
300
Stop_Idx := Stop_Idx + 1;
301
if Stop_Idx > Curr'Last then raise Scan_Error; end if;
302
case Curr (Stop_Idx) is
306
Stop_Idx := Stop_Idx + 1;
307
if Stop_Idx > Curr'Last then
313
-- Actually, there must be at least one digit now.
314
while Stop_Idx <= Curr'Last and then
315
Is_In (Numeral, Curr (Stop_Idx))
317
Stop_Idx := Stop_Idx + 1;
320
Curr_Idx := Stop_Idx - 1;
322
Curr_Token := Other_Token;
325
Curr_Token := Other_Token;
328
Curr_Idx := Curr_Idx + 1;
332
function Current_Token
340
return UT.Unbounded_String
343
if Curr_Token = Name_Token or else
344
Curr_Token = String_Token
348
return UT.Null_Unbounded_String;
353
(File_Name : in String)
356
Ada.Text_IO.Open (F, Ada.Text_IO.In_File, File_Name);
364
if Ada.Text_IO.Is_Open (F) then
365
Ada.Text_IO.Close (F);
371
----------------------------------------------------------------------------
372
-- Parsing routines. This is a very simple recursive descent parser, yet
373
-- it recognizes syntactically correct Ada 95 library unit headers up
374
-- to the library unit name. It doesn't do any error recovery, and it
375
-- skips source chunks that are not interesting. The sole purpose of this
376
-- is to get the name of the library unit, not any syntax or semantics
381
function Library_Unit
384
Parse_Error : exception;
388
package body Parser is
392
procedure Skip_Parentheses
394
Level : Natural := 0;
397
case Current_Token is
398
when Left_Paren_Token =>
401
when Right_Paren_Token =>
411
end Skip_Parentheses;
413
procedure Skip_To_Semicolon
416
while Current_Token /= Semicolon_Token loop
419
end Skip_To_Semicolon;
421
procedure Skip_To_Semicolon_Nested
424
while Current_Token /= Semicolon_Token loop
425
if Current_Token = Left_Paren_Token then
431
end Skip_To_Semicolon_Nested;
433
procedure Context_Clauses
437
case Current_Token is
438
when With_Token | Use_Token =>
442
Skip_To_Semicolon_Nested;
448
-- Skip the semicolon.
453
procedure Generic_Formals
457
case Current_Token is
459
-- Just to be on the safe side: allow pragmas in the generic
461
Skip_To_Semicolon_Nested;
467
-- Generic formal type.
469
if Current_Token /= Name_Token then
473
if Current_Token = Left_Paren_Token then
477
if Current_Token /= Is_Token then
483
-- Generic formal subprogram or formal package.
485
case Current_Token is
486
when Package_Token =>
488
if Current_Token /= Name_Token then
492
if Current_Token /= Is_Token then
496
if Current_Token /= New_Token then
500
if Current_Token /= Name_Token then
504
-- It may be an expanded name (Package.Name).
505
while Current_Token = Period_Token loop
507
if Current_Token /= Name_Token then
512
if Current_Token = Left_Paren_Token then
513
-- Generic actual part.
518
when Procedure_Token | Function_Token =>
520
Initial : constant Token := Current_Token;
523
if Current_Token /= Name_Token and then
524
(Initial /= Function_Token or else
525
Current_Token /= String_Token)
530
if Current_Token = Left_Paren_Token then
531
-- Parameter specifications.
534
if Initial = Function_Token then
536
if Current_Token /= Return_Token then
540
if Current_Token /= Name_Token then
554
-- Generic formal object. Skip to first semicolon not within
556
Skip_To_Semicolon_Nested;
558
when Package_Token | Procedure_Token | Function_Token =>
565
if Current_Token /= Semicolon_Token then
568
-- Skip the semicolon.
573
function Library_Unit
578
if Current_Token = Private_Token then Advance; end if;
579
if Current_Token = Generic_Token then
583
case Current_Token is
584
when Package_Token | Procedure_Token | Function_Token =>
586
Initial : constant Token := Current_Token;
587
Unit_Name : UT.Unbounded_String;
589
-- Next one must be the unit name.
591
if Current_Token = Name_Token or else
592
(Initial = Function_Token and then
593
Current_Token = String_Token)
597
Last_Token : Token := Current_Token;
600
while Current_Token = Period_Token loop
602
if Last_Token /= Name_Token then
605
if Current_Token = Name_Token or else
606
(Initial = Function_Token and then
607
Current_Token = String_Token)
609
UT.Append (Unit_Name, '.');
610
UT.Append (Unit_Name, Image);
611
Last_Token := Current_Token;
621
return UT.To_String (Unit_Name);
633
----------------------------------------------------------------------------
634
-- Exported routines.
636
function Get_Unit_Name
637
(File_Name : in String)
641
Scanner.Init (File_Name);
643
Unit_Name : constant String := Parser.Library_Unit;