2
This file is part of the Free Component Library
5
Copyright (c) 2006 by Sergei Gorelkin, sergei_gorelkin@mail.ru
7
See the file COPYING.FPC, included in this distribution,
8
for details about the copyright.
10
This program is distributed in the hope that it will be useful,
11
but WITHOUT ANY WARRANTY; without even the implied warranty of
12
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14
**********************************************************************}
17
{$ifdef fpc}{$mode objfpc}{$endif}
19
{$ifopt Q+}{$define overflow_check}{$endif}
28
TXMLUtilString = AnsiString;
31
PXMLUtilString = ^TXMLUtilString;
33
function IsXmlName(const Value: TXMLUtilString; Xml11: Boolean = False): Boolean; overload;
34
function IsXmlName(Value: PXMLUtilChar; Len: Integer; Xml11: Boolean = False): Boolean; overload;
35
function IsXmlNames(const Value: TXMLUtilString; Xml11: Boolean = False): Boolean;
36
function IsXmlNmToken(const Value: TXMLUtilString; Xml11: Boolean = False): Boolean;
37
function IsXmlNmTokens(const Value: TXMLUtilString; Xml11: Boolean = False): Boolean;
38
function IsValidXmlEncoding(const Value: TXMLUtilString): Boolean;
39
function Xml11NamePages: PByteArray;
40
procedure NormalizeSpaces(var Value: TXMLUtilString);
41
function IsXmlWhiteSpace(c: PXMLUtilChar): Boolean;
42
function Hash(InitValue: LongWord; Key: PXMLUtilChar; KeyLen: Integer): LongWord;
43
{ beware, works in ASCII range only }
44
function WStrLIComp(S1, S2: PXMLUtilChar; Len: Integer): Integer;
46
{ a simple hash table with TXMLUtilString keys }
54
PPHashItem = ^PHashItem;
55
PHashItem = ^THashItem;
62
THashItemArray = array[0..MaxInt div sizeof(Pointer)-1] of PHashItem;
63
PHashItemArray = ^THashItemArray;
65
THashForEach = function(Entry: PHashItem; arg: Pointer): Boolean;
67
THashTable = class(TObject)
70
FBucketCount: LongWord;
71
FBucket: PHashItemArray;
72
FOwnsObjects: Boolean;
73
function Lookup(Key: PXMLUtilChar; KeyLength: Integer; out Found: Boolean; CanCreate: Boolean): PHashItem;
74
procedure Resize(NewCapacity: LongWord);
76
constructor Create(InitSize: Integer; OwnObjects: Boolean);
77
destructor Destroy; override;
79
function Find(Key: PXMLUtilChar; KeyLen: Integer): PHashItem;
80
function FindOrAdd(Key: PXMLUtilChar; KeyLen: Integer; out Found: Boolean): PHashItem; overload;
81
function FindOrAdd(Key: PXMLUtilChar; KeyLen: Integer): PHashItem; overload;
82
function Get(Key: PXMLUtilChar; KeyLen: Integer): TObject;
83
function Remove(Entry: PHashItem): Boolean;
84
function RemoveData(aData: TObject): Boolean;
85
procedure ForEach(proc: THashForEach; arg: Pointer);
86
property Count: LongWord read FCount;
89
{ another hash, for detecting duplicate namespaced attributes without memory allocations }
91
TExpHashEntry = record
94
uriPtr: PXMLUtilString;
98
TExpHashEntryArray = array[0..MaxInt div sizeof(TExpHashEntry)-1] of TExpHashEntry;
99
PExpHashEntryArray = ^TExpHashEntryArray;
101
TDblHashArray = class(TObject)
105
FData: PExpHashEntryArray;
107
procedure Init(NumSlots: Integer);
108
function Locate(uri: PXMLUtilString; localName: PXMLUtilChar; localLength: Integer): Boolean;
109
destructor Destroy; override;
116
prevPrefixBinding: TObject;
122
aaPrefix, // only override the prefix
123
aaBoth // override prefix and emit namespace definition
126
TNSSupport = class(TObject)
129
FPrefixSeqNo: Integer;
130
FFreeBindings: TBinding;
132
FBindingStack: array of TBinding;
133
FPrefixes: THashTable;
134
FDefaultPrefix: THashItem;
137
destructor Destroy; override;
138
procedure DefineBinding(const Prefix, nsURI: TXMLUtilString; out Binding: TBinding);
139
function CheckAttribute(const Prefix, nsURI: TXMLUtilString;
140
out Binding: TBinding): TAttributeAction;
141
function IsPrefixBound(P: PXMLUtilChar; Len: Integer; out Prefix: PHashItem): Boolean;
142
function GetPrefix(P: PXMLUtilChar; Len: Integer): PHashItem;
143
function BindPrefix(const nsURI: TXMLUtilString; aPrefix: PHashItem): TBinding;
144
function DefaultNSBinding: TBinding;
145
procedure StartElement;
146
procedure EndElement;
154
Xml11Pg: PByteArray = nil;
156
function Xml11NamePages: PByteArray;
161
if Xml11Pg = nil then
165
p^[I] := ord(Byte(I) in Xml11HighPages);
175
Move(p^, p^[256], 256);
184
function IsXml11Char(Value: PXMLUtilChar; var Index: Integer): Boolean; overload;
186
if (Value[Index] >= #$D800) and (Value[Index] <= #$DB7F) then
189
Result := (Value[Index] >= #$DC00) and (Value[Index] <= #$DFFF);
195
function IsXml11Char(const Value: TXMLUtilString; var Index: Integer): Boolean; overload;
197
if (Value[Index] >= #$D800) and (Value[Index] <= #$DB7F) then
200
Result := (Value[Index] >= #$DC00) and (Value[Index] <= #$DFFF);
206
function IsXmlName(const Value: TXMLUtilString; Xml11: Boolean): Boolean;
208
Result := IsXmlName(PXMLUtilChar(Value), Length(Value), Xml11);
211
function IsXmlName(Value: PXMLUtilChar; Len: Integer; Xml11: Boolean = False): Boolean;
218
Pages := Xml11NamePages
223
if (Len = 0) or not ((Byte(Value[I]) in NamingBitmap[Pages^[hi(Word(Value[I]))]]) or
225
(Xml11 and IsXml11Char(Value, I))) then
230
if not ((Byte(Value[I]) in NamingBitmap[Pages^[$100+hi(Word(Value[I]))]]) or
232
(Xml11 and IsXml11Char(Value, I))) then
239
function IsXmlNames(const Value: TXMLUtilString; Xml11: Boolean): Boolean;
246
Pages := Xml11NamePages
254
while I <= Length(Value) do
256
if not ((Byte(Value[I]) in NamingBitmap[Pages^[Offset+hi(Word(Value[I]))]]) or
258
(Xml11 and IsXml11Char(Value, I))) then
260
if (I = Length(Value)) or (Value[I] <> #32) then
272
function IsXmlNmToken(const Value: TXMLUtilString; Xml11: Boolean): Boolean;
278
Pages := Xml11NamePages
285
while I <= Length(Value) do
287
if not ((Byte(Value[I]) in NamingBitmap[Pages^[$100+hi(Word(Value[I]))]]) or
289
(Xml11 and IsXml11Char(Value, I))) then
296
function IsXmlNmTokens(const Value: TXMLUtilString; Xml11: Boolean): Boolean;
302
Pages := Xml11NamePages
309
while I <= Length(Value) do
311
if not ((Byte(Value[I]) in NamingBitmap[Pages^[$100+hi(Word(Value[I]))]]) or
313
(Xml11 and IsXml11Char(Value, I))) then
315
if (I = Length(Value)) or (Value[I] <> #32) then
323
function IsValidXmlEncoding(const Value: TXMLUtilString): Boolean;
328
if (Value = '') or (Value[1] > #255) or not (char(ord(Value[1])) in ['A'..'Z', 'a'..'z']) then
330
for I := 2 to Length(Value) do
331
if (Value[I] > #255) or not (char(ord(Value[I])) in ['A'..'Z', 'a'..'z', '0'..'9', '.', '_', '-']) then
336
procedure NormalizeSpaces(var Value: TXMLUtilString);
341
// speed: trim only whed needed
342
if (I > 0) and ((Value[1] = #32) or (Value[I] = #32)) then
343
Value := Trim(Value);
345
while I < Length(Value) do
347
if Value[I] = #32 then
350
while (J <= Length(Value)) and (Value[J] = #32) do Inc(J);
351
if J-I > 1 then Delete(Value, I+1, J-I-1);
357
function IsXmlWhiteSpace(c: PXMLUtilChar): Boolean;
359
Result := c^ in [#32,#9,#10,#13];
362
function WStrLIComp(S1, S2: PXMLUtilChar; Len: Integer): Integer;
372
c1 := ord(S1[counter]);
373
c2 := ord(S2[counter]);
374
if (c1 = 0) or (c2 = 0) then break;
377
if c1 in [97..122] then
379
if c2 in [97..122] then
385
until counter >= Len;
389
function Hash(InitValue: LongWord; Key: PXMLUtilChar; KeyLen: Integer): LongWord;
394
{$ifdef overflow_check}{$q-}{$endif}
395
Result := Result * $F4243 xor ord(Key^);
396
{$ifdef overflow_check}{$q+}{$endif}
402
function KeyCompare(const Key1: TXMLUtilString; Key2: Pointer; Key2Len: Integer): Boolean;
404
{$IF defined(FPC) and (SizeOf(TXMLUtilChar)=2)}
405
Result := (Length(Key1)=Key2Len) and (CompareWord(Pointer(Key1)^, Key2^, Key2Len) = 0);
407
Result := (Length(Key1)=Key2Len) and CompareMem(Pointer(Key1), Key2, Key2Len*SizeOf(TXMLUtilChar));
413
constructor THashTable.Create(InitSize: Integer; OwnObjects: Boolean);
418
FOwnsObjects := OwnObjects;
420
while I < InitSize do I := I shl 1;
422
FBucket := AllocMem(I * sizeof(PHashItem));
425
destructor THashTable.Destroy;
432
procedure THashTable.Clear;
435
item, next: PHashItem;
437
for I := 0 to FBucketCount-1 do
440
while Assigned(item) do
452
function THashTable.Find(Key: PXMLUtilChar; KeyLen: Integer): PHashItem;
456
Result := Lookup(Key, KeyLen, Dummy, False);
459
function THashTable.FindOrAdd(Key: PXMLUtilChar; KeyLen: Integer;
460
out Found: Boolean): PHashItem;
462
Result := Lookup(Key, KeyLen, Found, True);
465
function THashTable.FindOrAdd(Key: PXMLUtilChar; KeyLen: Integer): PHashItem;
469
Result := Lookup(Key, KeyLen, Dummy, True);
472
function THashTable.Get(Key: PXMLUtilChar; KeyLen: Integer): TObject;
477
e := Lookup(Key, KeyLen, Dummy, False);
484
function THashTable.Lookup(Key: PXMLUtilChar; KeyLength: Integer;
485
out Found: Boolean; CanCreate: Boolean): PHashItem;
490
h := Hash(0, Key, KeyLength);
491
Entry := @FBucket^[h mod FBucketCount];
492
while Assigned(Entry^) and not ((Entry^^.HashValue = h) and KeyCompare(Entry^^.Key, Key, KeyLength) ) do
493
Entry := @Entry^^.Next;
494
Found := Assigned(Entry^);
495
if Found or (not CanCreate) then
500
if FCount > ((FBucketCount*7) div 8) then
502
Resize(FBucketCount * 2);
503
Result := Lookup(Key, KeyLength, Found, CanCreate);
508
// SetString for TXMLUtilStrings trims on zero chars [fixed, #14740]
509
SetLength(Result^.Key, KeyLength);
510
Move(Key^, Pointer(Result^.Key)^, KeyLength*sizeof(TXMLUtilChar));
511
Result^.HashValue := h;
519
procedure THashTable.Resize(NewCapacity: LongWord);
526
p := AllocMem(NewCapacity * sizeof(PHashItem));
527
for i := 0 to FBucketCount-1 do
532
chain := @p^[e^.HashValue mod NewCapacity];
539
FBucketCount := NewCapacity;
544
function THashTable.Remove(Entry: PHashItem): Boolean;
548
chain := @FBucket^[Entry^.HashValue mod FBucketCount];
549
while Assigned(chain^) do
551
if chain^ = Entry then
553
chain^ := Entry^.Next;
561
chain := @chain^^.Next;
566
// this does not free the aData object
567
function THashTable.RemoveData(aData: TObject): Boolean;
573
for i := 0 to FBucketCount-1 do
575
chain := @FBucket^[i];
576
while Assigned(chain^) do
578
if chain^^.Data = aData then
587
chain := @chain^^.Next;
593
procedure THashTable.ForEach(proc: THashForEach; arg: Pointer);
598
for i := 0 to FBucketCount-1 do
603
if not proc(e, arg) then
612
destructor TDblHashArray.Destroy;
618
procedure TDblHashArray.Init(NumSlots: Integer);
622
if ((NumSlots * 2) shr FSizeLog) <> 0 then // need at least twice more entries, and no less than 8
625
while (NumSlots shr FSizeLog) <> 0 do
627
ReallocMem(FData, (1 shl FSizeLog) * sizeof(TExpHashEntry));
630
if FRevision = 0 then
632
FRevision := $FFFFFFFF;
633
for i := (1 shl FSizeLog)-1 downto 0 do
634
FData^[i].rev := FRevision;
639
function TDblHashArray.Locate(uri: PXMLUtilString; localName: PXMLUtilChar; localLength: Integer): Boolean;
646
HashValue := Hash(0, PXMLUtilChar(uri^), Length(uri^));
647
HashValue := Hash(HashValue, localName, localLength);
649
mask := (1 shl FSizeLog) - 1;
650
step := (HashValue and (not mask)) shr (FSizeLog-1) and (mask shr 2) or 1;
651
idx := HashValue and mask;
653
while FData^[idx].rev = FRevision do
655
if (HashValue = FData^[idx].hash) and (FData^[idx].uriPtr^ = uri^) and
656
(FData^[idx].lnameLen = localLength) and
657
CompareMem(FData^[idx].lname, localName, localLength * sizeof(TXMLUtilChar)) then
660
Inc(idx, (1 shl FSizeLog) - step)
670
lnameLen := localLength;
677
constructor TNSSupport.Create;
682
FPrefixes := THashTable.Create(16, False);
683
FBindings := TFPList.Create;
684
SetLength(FBindingStack, 16);
686
{ provide implicit binding for the 'xml' prefix }
687
// TODO: move stduri_xml, etc. to this unit, so they are reused.
688
DefineBinding('xml', 'http://www.w3.org/XML/1998/namespace', b);
691
destructor TNSSupport.Destroy;
695
for I := FBindings.Count-1 downto 0 do
696
TObject(FBindings.List^[I]).Free;
702
function TNSSupport.BindPrefix(const nsURI: TXMLUtilString; aPrefix: PHashItem): TBinding;
704
{ try to reuse an existing binding }
705
result := FFreeBindings;
706
if Assigned(result) then
707
FFreeBindings := result.Next
708
else { no free bindings, create a new one }
710
result := TBinding.Create;
711
FBindings.Add(result);
714
{ link it into chain of bindings at the current element level }
715
result.Next := FBindingStack[FNesting];
716
FBindingStack[FNesting] := result;
720
result.Prefix := aPrefix;
721
result.PrevPrefixBinding := aPrefix^.Data;
722
aPrefix^.Data := result;
725
function TNSSupport.DefaultNSBinding: TBinding;
727
result := TBinding(FDefaultPrefix.Data);
730
procedure TNSSupport.DefineBinding(const Prefix, nsURI: TXMLUtilString;
731
out Binding: TBinding);
735
Pfx := @FDefaultPrefix;
736
if (nsURI <> '') and (Prefix <> '') then
737
Pfx := FPrefixes.FindOrAdd(PXMLUtilChar(Prefix), Length(Prefix));
738
if (Pfx^.Data = nil) or (TBinding(Pfx^.Data).uri <> nsURI) then
739
Binding := BindPrefix(nsURI, Pfx)
744
function TNSSupport.CheckAttribute(const Prefix, nsURI: TXMLUtilString;
745
out Binding: TBinding): TAttributeAction;
750
buf: array[0..31] of TXMLUtilChar;
755
Result := aaUnchanged;
757
Pfx := FPrefixes.FindOrAdd(PXMLUtilChar(Prefix), Length(Prefix))
758
else if nsURI = '' then
760
{ if the prefix is already bound to correct URI, we're done }
761
if Assigned(Pfx) and Assigned(Pfx^.Data) and (TBinding(Pfx^.Data).uri = nsURI) then
764
{ see if there's another prefix bound to the target URI }
765
// TODO: should use something faster than linear search
766
for i := FNesting downto 0 do
768
b := FBindingStack[i];
771
if (b.uri = nsURI) and (b.Prefix <> @FDefaultPrefix) then
773
Binding := b; // found one -> override the attribute's prefix
780
{ no prefix, or bound (to wrong URI) -> use generated prefix instead }
781
if (Pfx = nil) or Assigned(Pfx^.Data) then
784
i := FPrefixSeqNo; // This is just 'NS'+IntToStr(FPrefixSeqNo);
785
p := @Buf[high(Buf)]; // done without using strings
788
p^ := TXMLUtilChar(i mod 10+ord('0'));
794
Pfx := FPrefixes.FindOrAdd(p, @Buf[high(Buf)]-p+1);
795
until Pfx^.Data = nil;
796
Binding := BindPrefix(nsURI, Pfx);
800
function TNSSupport.IsPrefixBound(P: PXMLUtilChar; Len: Integer; out
801
Prefix: PHashItem): Boolean;
803
Prefix := FPrefixes.FindOrAdd(P, Len);
804
Result := Assigned(Prefix^.Data) and (TBinding(Prefix^.Data).uri <> '');
807
function TNSSupport.GetPrefix(P: PXMLUtilChar; Len: Integer): PHashItem;
809
if Assigned(P) and (Len > 0) then
810
Result := FPrefixes.FindOrAdd(P, Len)
812
Result := @FDefaultPrefix;
815
procedure TNSSupport.StartElement;
818
if FNesting >= Length(FBindingStack) then
819
SetLength(FBindingStack, FNesting * 2);
822
procedure TNSSupport.EndElement;
826
temp := FBindingStack[FNesting];
827
while Assigned(temp) do
831
b.next := FFreeBindings;
833
b.Prefix^.Data := b.prevPrefixBinding;
835
FBindingStack[FNesting] := nil;
844
if Assigned(Xml11Pg) then