1
-------------------------------------------------------------------------------
3
-- <STRONG>Copyright © 2001, 2002 by Thomas Wolf.</STRONG>
5
-- This piece of software is free software; you can redistribute it and/or
6
-- modify it under the terms of the GNU General Public License as published
7
-- by the Free Software Foundation; either version 2, or (at your option)
8
-- any later version. This software is distributed in the hope that it will
9
-- be useful, but <EM>without any warranty</EM>; without even the implied
10
-- warranty of <EM>merchantability or fitness for a particular purpose.</EM>
11
-- See the GNU General Public License for more details. You should have
12
-- received a copy of the GNU General Public License with this distribution,
13
-- see file "<A HREF="GPL.txt">GPL.txt</A>". If not, write to the Free
14
-- Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307,
18
-- As a special exception from the GPL, if other files instantiate generics
19
-- from this unit, or you link this unit with other files to produce an
20
-- executable, this unit does not by itself cause the resulting executable
21
-- to be covered by the GPL. This exception does not however invalidate any
22
-- other reasons why the executable file might be covered by the GPL.
26
-- Thomas Wolf (TW) <E_MAIL>
30
-- Various string utilities not provided in the standard library. Some
31
-- of these also are repeated here, so that one can get all one needs
32
-- with a single "@with@".
40
-- 01-MAR-2002 TW Initial version.
41
-- 14-MAR-2002 TW Added 'Count'.
42
-- 18-MAR-2002 TW Added 'Letters'.
43
-- 02-MAY-2002 TW Added 'Identifier'.
44
-- 02-AUG-2002 TW Added 'Replace'; corrected bug in 'To_Mixed'.
45
-- 06-AUG-2002 TW Replaced the body of 'Replace' with a non-recursive
46
-- algorithm. The recursive one got slow on large strings
47
-- with many replacements.
48
-- 07-AUG-2002 TW Added 'First_Index', 'Last_Index', and 'Count' with
50
-- 12-OCT-2002 TW Added 'Next_Blank'.
51
-- 18-JUN-2003 TW Added 'Equal'.
52
-- 07-JUL-2003 TW Added 'Cardinality'.
54
-------------------------------------------------------------------------------
56
pragma License (Modified_GPL);
60
package body Util.Strings is
62
----------------------------------------------------------------------------
64
package ASF renames Ada.Strings.Fixed;
66
----------------------------------------------------------------------------
68
function To_Mixed (S : in String) return String
70
Result : String (S'Range);
71
Prev : Character := '_';
73
for I in Result'Range loop
74
if Prev = '_' or else Prev = '.' or else
75
Ada.Strings.Maps.Is_In (Prev, Blanks)
77
Result (I) := To_Upper (S (I));
79
Result (I) := To_Lower (S (I));
86
----------------------------------------------------------------------------
89
(Set : in Ada.Strings.Maps.Character_Set)
94
for Ch in Character loop
95
if Ada.Strings.Maps.Is_In (Ch, Set) then N := N + 1; end if;
100
-- Returns the number of characters in @Set@.
103
(Left, Right : in String)
107
return To_Lower (Left) = To_Lower (Right);
116
for I in Src'Range loop
117
if Src (I) = Ch then return I; end if;
128
for I in reverse Src'Range loop
129
if Src (I) = Ch then return I; end if;
139
Length : constant Natural := Pattern'Length;
141
if Length = 0 then raise Ada.Strings.Pattern_Error; end if;
142
for I in Source'First .. Source'Last - Length + 1 loop
143
if Source (I .. I + Length - 1) = Pattern then
155
Length : constant Natural := Pattern'Length;
157
if Length = 0 then raise Ada.Strings.Pattern_Error; end if;
158
for I in reverse Source'First .. Source'Last - Length + 1 loop
159
if Source (I .. I + Length - 1) = Pattern then
169
Dir : in Ada.Strings.Direction := Forward)
172
use type Ada.Strings.Direction;
174
if Dir = Forward then
175
return First_Index (Src, Ch);
177
return Last_Index (Src, Ch);
184
Dir : in Ada.Strings.Direction := Forward)
187
use type Ada.Strings.Direction;
189
if Dir = Forward then
190
return First_Index (Source, Pattern);
192
return Last_Index (Source, Pattern);
196
----------------------------------------------------------------------------
205
for I in Src'Range loop
206
if Src (I) = Ch then N := N + 1; end if;
216
Length : constant Natural := Pattern'Length;
218
if Length = 0 then raise Ada.Strings.Pattern_Error; end if;
219
if Length > Source'Length then return 0; end if;
221
Stop : constant Natural := Source'Last - Length + 1;
222
I : Natural := Source'First;
226
if Source (I .. I + Length - 1) = Pattern then
227
N := N + 1; I := I + Length;
236
----------------------------------------------------------------------------
243
return Ada.Strings.Maps.Is_In (Ch, Blanks);
246
----------------------------------------------------------------------------
249
(Set : in Ada.Strings.Maps.Character_Set;
254
return Ada.Strings.Maps.Is_In (Ch, Set);
257
----------------------------------------------------------------------------
261
Side : in Ada.Strings.Trim_End := Both)
267
return ASF.Trim (S, Blanks, Blanks);
269
return ASF.Trim (S, Blanks, Null_Set);
271
return ASF.Trim (S, Null_Set, Blanks);
275
----------------------------------------------------------------------------
279
From, To : out Natural;
280
Delim : in Character := '"';
281
Escape : in Character := No_Escape)
284
From := S'First; To := 0;
285
while From <= S'Last and then S (From) /= Delim loop
288
if From = S'Last then
290
elsif From > S'Last then
293
To := Skip_String (S (From .. S'Last), Delim, Escape);
298
Delim : in Character := '"';
299
Escape : in Character := No_Escape)
302
I, From, To : Natural;
306
Get_String (S (I .. S'Last), From, To, Delim, Escape);
307
exit when From = 0 or else To = 0;
310
return From /= 0 and then To = 0;
315
Delim : in Character := '"';
316
Escape : in Character := No_Escape)
322
if S'Last < S'First then return 0; end if;
324
if Escape = No_Escape then
325
To := First_Index (S (From .. S'Last), Delim);
328
if Escape /= Delim then
330
Escaped : Boolean := False;
332
-- Be careful: a double occurrence of the escape is a literal
334
while To <= S'Last loop
335
if S (To) = Escape then
336
Escaped := not Escaped;
337
elsif S (To) = Delim and then not Escaped then
346
-- Delimiters are escaped by doubling!
347
while To < S'Last loop
348
if S (To) = Delim then
349
if S (To + 1) /= Delim then
357
if To <= S'Last and then S (To) = Delim then return To; end if;
366
Delim : in Character;
367
Escape : in Character)
371
if Escape = No_Escape then
375
Result : String (1 .. S'Length * 2);
378
if Escape = Delim then
379
for I in S'Range loop
380
if S (I) = Delim then
381
Result (J) := Escape; J := J + 1;
383
Result (J) := S (I); J := J + 1;
386
for I in S'Range loop
387
if S (I) = Delim or else S (I) = Escape then
388
Result (J) := Escape; J := J + 1;
390
Result (J) := S (I); J := J + 1;
393
return Result (1 .. J - 1);
400
Delim : in Character;
401
Escape : in Character)
404
Result : String (1 .. S'Length);
405
I : Natural := S'First;
406
J : Natural := Result'First;
408
if Escape = Delim then
409
while I <= S'Last loop
410
if S (I) = Delim and then
411
I < S'Last and then S (I + 1) = Delim
416
J := J + 1; I := I + 1;
419
while I <= S'Last loop
420
if S (I) = Escape and then
422
(S (I + 1) = Escape or else S (I + 1) = Delim)
427
J := J + 1; I := I + 1;
430
return Result (Result'First .. J - 1);
435
Quotes : in Ada.Strings.Maps.Character_Set;
436
Escape : in Character := No_Escape)
442
I := ASF.Index (S, Quotes);
443
if I = 0 then return S; end if; -- No string found
444
if Escape = No_Escape then
449
J := Skip_String (S (I .. S'Last), S (I), Ch);
451
-- Unterminated string!
454
return S (S'First .. I) &
455
Unquote (S (I + 1 .. J - 1), S (I), Ch) & S (I) &
456
Unquote_All (S (J + 1 .. S'Last), Quotes, Escape);
459
----------------------------------------------------------------------------
468
Source (Source'First ..
469
Natural'Min (Source'Last,
470
Source'First + Prefix'Length - 1)) =
480
if Suffix'Length > Source'Length then return False; end if;
482
Source (Source'Last - Suffix'Length + 1 .. Source'Last) = Suffix;
485
----------------------------------------------------------------------------
487
ID_Chars : constant Ada.Strings.Maps.Character_Set :=
488
Ada.Strings.Maps."or" (Letters, Ada.Strings.Maps.To_Set ("0123456789_"));
495
if S'Last < S'First or else not Is_In (Letters, S (S'First)) then
499
I : Natural := S'First + 1;
501
while I <= S'Last and then Is_In (ID_Chars, S (I)) loop
508
----------------------------------------------------------------------------
510
function Next_Non_Blank
515
for I in S'Range loop
516
if not Is_Blank (S (I)) then return I; end if;
521
----------------------------------------------------------------------------
528
for I in S'Range loop
529
if Is_Blank (S (I)) then return I; end if;
534
----------------------------------------------------------------------------
542
-- Speed optimized for both small and large strings, with few and many
544
N : constant Natural := Count (Source, What);
546
if N = 0 then return Source; end if;
548
By_Length : constant Natural := By'Length;
549
What_Length : constant Natural := What'Length;
550
Result : String (1 ..
551
Source'Length - N * (What_Length - By_Length));
552
-- Now if only the compiler was smart enough to allocate 'Result'
553
-- directly on the secondary stack instead of using 'alloca'...
554
-- Note that 'Result' has exactly the length it needs!
559
while I <= Source'Last loop
560
K := First_Index (Source (I .. Source'Last), What);
562
Result (J .. Result'Last) := Source (I .. Source'Last);
563
-- J := Result'Last + 1;
564
I := Source'Last + 1;
566
Result (J .. J + (K - I) - 1) := Source (I .. K - 1);
568
Result (J .. J + By_Length - 1) := By;
570
I := K + What_Length;
577
----------------------------------------------------------------------------
578
-- Wildcard string matching
580
package ASM renames Ada.Strings.Maps;
582
procedure Raise_Unterminated_Set
583
(Pattern : in String)
586
Ada.Exceptions.Raise_Exception
587
(Illegal_Pattern'Identity,
588
"Unterminated character set: '" & Pattern & "'");
589
end Raise_Unterminated_Set;
591
procedure Raise_Backslash_At_End
594
Ada.Exceptions.Raise_Exception
595
(Illegal_Pattern'Identity,
596
"'\' at end of pattern (must be followed by a character)");
597
end Raise_Backslash_At_End;
600
function Wildcard_Match
601
(Pattern : in String;
607
(Pattern : in String;
608
Set : out ASM.Character_Set;
610
Inverted : out Boolean)
612
use type ASM.Character_Set;
618
if Pattern'Last <= Pattern'First + 1 then
619
-- Even if it is "[]", it is unterminated!
620
Raise_Unterminated_Set (Pattern);
622
Start := Pattern'First + 1;
624
Set_Inverter /= No_Set_Inverter and then
625
Pattern (Start) = Set_Inverter;
626
if Inverted then Start := Start + 1; end if;
627
-- Go looking for the end of the set.
628
if Start <= Pattern'Last and then Pattern (Start) = ']' then
629
-- This ']' is to be part of the set!
634
Stop := First_Index (Pattern (Stop .. Pattern'Last), ']');
635
if Stop <= Start then
636
Raise_Unterminated_Set (Pattern);
638
-- The set is defined by 'Pattern (Start .. Stop-1)'
639
while Start < Stop loop
640
Lower := Pattern (Start);
642
if Pattern (Start) = '-' and then (Start + 1 < Stop) then
645
if Pattern (Start) < Lower then
646
Ada.Exceptions.Raise_Exception
647
(Illegal_Pattern'Identity,
648
"Upper bound of range in character set is smaller " &
649
"than lower bound: '" &
650
Lower & '-' & Pattern (Start) & "'");
654
ASM.To_Set (ASM.Character_Range'(Lower, Pattern (Start)));
658
Set or ASM.To_Set (ASM.Character_Range'(Lower, Lower));
663
Match_Impossible : exception;
665
function Internal_Match
666
(Pattern : in String;
670
Pattern_I : Natural := Pattern'First;
671
Text_I : Natural := Text'First;
674
while Text_I <= Text'Last loop
675
if (Pattern_I > Pattern'Last) then
676
-- The text is not yet exhausted, hence it can't match!
679
Switch := Pattern (Pattern_I);
680
if Has_Escape and then Switch = '\' then
681
-- Literal match with next pattern character
682
if Pattern_I = Pattern'Last then
683
Raise_Backslash_At_End;
685
Pattern_I := Pattern_I + 1;
686
if Pattern (Pattern_I) /= Text (Text_I) then
689
Pattern_I := Pattern_I + 1;
690
Text_I := Text_I + 1;
691
elsif Has_Char_Set and then Switch = '[' then
694
Set : ASM.Character_Set;
697
Parse_Set (Pattern (Pattern_I .. Pattern'Last),
698
Set, Pattern_I, Inverted);
699
if Inverted = ASM.Is_In (Text (Text_I), Set) then
703
Pattern_I := Pattern_I + 1;
704
Text_I := Text_I + 1;
705
elsif Switch = Any_One then
707
-- Null matches are OK!
711
-- First try the null match, i.e. advance the
712
-- pattern, but not the text.
715
(Pattern (Pattern_I + 1 .. Pattern'Last),
716
Text (Text_I .. Text'Last));
717
if Result then return Result; end if;
718
-- No match: try the any-one match: advance both.
721
-- Match any character.
722
Pattern_I := Pattern_I + 1;
723
Text_I := Text_I + 1;
724
elsif Switch = Zero_Or_More then
725
-- First, collate sequences of '*'s (and maybe '?'s).
726
while Pattern_I < Pattern'Last and then
727
(Pattern (Pattern_I + 1) = Zero_Or_More or
728
(Zero_Or_One and then
729
Pattern (Pattern_I + 1) = Any_One))
731
Pattern_I := Pattern_I + 1;
733
-- '*' at the end of the pattern matches anything:
734
if Pattern_I >= Pattern'Last then return True; end if;
735
-- Find next possible match, if any.
738
P_I : constant Natural := Pattern_I + 1;
740
if Pattern (P_I) = Any_One then
741
-- Can match any one character: don't skip.
743
elsif Has_Char_Set and then Pattern (P_I) = '[' then
744
-- Skip to the next character matching the set
746
Set : ASM.Character_Set;
750
Parse_Set (Pattern (P_I .. Pattern'Last),
751
Set, Dummy, Inverted);
753
Next := ASF.Index (Text (Text_I .. Text'Last),
754
Set, Ada.Strings.Outside);
756
Next := ASF.Index (Text (Text_I .. Text'Last),
757
Set, Ada.Strings.Inside);
761
-- Skip ahead to the next matching character
763
Ch : Character := Pattern (P_I);
765
if Has_Escape and then Ch = '\' then
766
if P_I = Pattern'Last then
767
Raise_Backslash_At_End;
769
Ch := Pattern (P_I + 1);
772
First_Index (Text (Text_I .. Text'Last), Ch);
775
if Next = 0 then raise Match_Impossible; end if;
776
-- No match was possible, so abort the whole thing.
777
-- (This can be done safely because any other match
778
-- for any previous '*' could only require an even
779
-- later match of the character following the current
780
-- '*'-sequence - but we know that this is impossible.
781
-- Therefore, any other matching for previous '*'s is
782
-- bound to fail and hence we may give up. Without
783
-- this, the algorithm would have quadratic behaviour
784
-- in some failure cases, e.g. the "-adobe" negative
785
-- case in the test program.)
790
Internal_Match (Pattern (P_I .. Pattern'Last),
791
Text (Next .. Text'Last));
792
-- This recursion is limited by the number of '*'
793
-- sequences in the pattern.
794
if Result then return Result; end if;
796
-- This star couldn't match this minimal text
797
-- sequence; try to extend it. Note: 'Pattern_I' has
798
-- not been incremented here.
802
-- Literal match between the pattern and the text
803
if Pattern (Pattern_I) /= Text (Text_I) then
806
Pattern_I := Pattern_I + 1;
807
Text_I := Text_I + 1;
810
-- Skip remaining '*'s (and maybe '?'s) at the end of the pattern.
811
while Pattern_I <= Pattern'Last and then
812
(Pattern (Pattern_I) = Zero_Or_More or
813
(Zero_Or_One and then Pattern (Pattern_I) = Any_One))
815
Pattern_I := Pattern_I + 1;
817
return Pattern_I > Pattern'Last;
821
return Internal_Match (Pattern, Text);
823
when Match_Impossible =>
827
function Instance is new Wildcard_Match;
830
(Pattern : in String;