2
*****************************************************************************
4
* This file is part of the LazUtils package *
6
* See the file COPYING.modifiedLGPL.txt, included in this distribution, *
7
* for details about the copyright. *
9
* This program is distributed in the hope that it will be useful, *
10
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
11
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
13
*****************************************************************************
19
//{$define PASWSTRING_VERBOSE}
20
//{.$define PASWSTRING_SUPPORT_NONUTF8_ANSISTRING} disabled by default because
21
// non utf-8 ansistring is rare in UNIXes and lconvencoding makes the executable big
27
{$ifdef PASWSTRING_SUPPORT_NONUTF8_ANSISTRING}, lconvencoding{$endif}
31
procedure SetPasWidestringManager;
37
procedure fpc_rangeerror; [external name 'FPC_RANGEERROR'];
39
// len comes in widechars, not bytes
40
procedure Wide2AnsiMove(source:pwidechar;var dest:ansistring;len:SizeInt);
44
{$ifdef PASWSTRING_VERBOSE}WriteLn('Wide2AnsiMove START');{$endif}
45
// Copy the originating string taking into account the specified length
46
SetLength(widestr, len);
47
System.Move(source^, widestr[1], len * SizeOf(WideChar));
49
// Now convert it, using UTF-16 -> UTF-8
50
dest := UTF16ToUTF8(widestr);
51
{$ifdef PASWSTRING_SUPPORT_NONUTF8_ANSISTRING}
52
// And correct to the real Ansi encoding
53
dest := ConvertEncoding(dest, EncodingUTF8, GetDefaultTextEncoding());
57
procedure Ansi2WideMove(source:pchar;var dest:widestring;len:SizeInt);
61
{$ifdef PASWSTRING_VERBOSE}WriteLn('Ansi2WideMove START');{$endif}
62
// Copy the originating string taking into account the specified length
63
SetLength(ansistr, len);
64
System.Move(source^, ansistr[1], len);
66
{$ifdef PASWSTRING_SUPPORT_NONUTF8_ANSISTRING}
68
ansistr := ConvertEncoding(ansistr, GetDefaultTextEncoding(), EncodingUTF8);
70
// Now convert it, using UTF-8 -> UTF-16
71
dest := UTF8ToUTF16(ansistr);
74
function LowerWideString(const s : WideString) : WideString;
78
{$ifdef PASWSTRING_VERBOSE}WriteLn('LowerWideString START');{$endif}
79
str := UTF16ToUTF8(s);
80
str := UTF8LowerCase(str);
81
Result := UTF8ToUTF16(str);
84
function UpperWideString(const s : WideString) : WideString;
88
{$ifdef PASWSTRING_VERBOSE}WriteLn('UpperWideString START');{$endif}
89
str := UTF16ToUTF8(s);
90
str := UTF8UpperCase(str);
91
Result := UTF8ToUTF16(str);
94
procedure EnsureAnsiLen(var S: AnsiString; const len: SizeInt); inline;
96
{$ifdef PASWSTRING_VERBOSE}WriteLn('EnsureAnsiLen START');{$endif}
97
if (len>length(s)) then
98
if (length(s) < 10*256) then
99
setlength(s,length(s)+10)
101
setlength(s,length(s)+length(s) shr 8);
105
procedure ConcatCharToAnsiStr(const c: char; var S: AnsiString; var index: SizeInt);
107
{$ifdef PASWSTRING_VERBOSE}WriteLn('ConcatCharToAnsiStr START');{$endif}
108
EnsureAnsiLen(s,index);
109
pchar(@s[index])^:=c;
113
function LowerAnsiString(const s : AnsiString) : AnsiString;
117
{$ifdef PASWSTRING_VERBOSE}WriteLn('LowerAnsiString START');{$endif}
119
Str := UTF8LowerCase(Str);
120
Result := UTF8ToSys(Str);
123
function UpperAnsiString(const s : AnsiString) : AnsiString;
127
{$ifdef PASWSTRING_VERBOSE}WriteLn('UpperAnsiString START');{$endif}
129
Str := UTF8UpperCase(Str);
130
Result := UTF8ToSys(Str);
133
// Just do a simple byte comparison
134
// A more complex analysis would require normalization
135
function WideCompareStr(const s1, s2 : WideString) : PtrInt;
137
count, count1, count2: integer;
139
{$ifdef PASWSTRING_VERBOSE}WriteLn('WideCompareStr START');{$endif}
141
Count1 := Length(S1);
142
Count2 := Length(S2);
143
if Count1>Count2 then
147
result := SysUtils.CompareMemRange(Pointer(S1),Pointer(S2), Count*2);
149
result:=Count1-Count2;
152
function WideCompareText(const s1, s2 : WideString): PtrInt;
156
{$ifdef PASWSTRING_VERBOSE}WriteLn('WideCompareText START');{$endif}
157
a:=LowerWidestring(s1);
158
b:=LowerWidestring(s2);
159
result := WideCompareStr(a,b);
162
function CharLengthPChar(const Str: PChar): PtrInt;
164
{$ifdef PASWSTRING_VERBOSE}WriteLn('CharLengthPChar START');{$endif}
165
Result := UTF8CharacterLength(Str);
168
function AnsiCompareStr(const s1, s2: ansistring): PtrInt;
170
{$ifdef PASWSTRING_VERBOSE}WriteLn('AnsiCompareStr START');{$endif}
171
Result := SysUtils.CompareStr(s1, s2);
174
// Similar to AnsiCompareStr, but with PChar
175
function StrCompAnsi(s1,s2 : PChar): PtrInt;
177
ansi1, ansi2: ansistring;
179
{$ifdef PASWSTRING_VERBOSE}WriteLn('StrCompAnsi START');{$endif}
182
Result := SysUtils.CompareStr(ansi1, ansi2);
186
function AnsiCompareText(const S1, S2: ansistring): PtrInt;
188
str1, str2: utf8string;
190
{$ifdef PASWSTRING_VERBOSE}WriteLn('AnsiCompareText START');{$endif}
191
str1 := SysToUTF8(S1);
192
str2 := SysToUTF8(S2);
193
Result := UTF8CompareText(str1, str2);
197
function AnsiStrIComp(S1, S2: PChar): PtrInt;
199
{$ifdef PASWSTRING_VERBOSE}WriteLn('AnsiStrIComp START');{$endif}
200
Result := AnsiCompareText(StrPas(s1),StrPas(s2));
204
function AnsiStrLComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
208
{$ifdef PASWSTRING_VERBOSE}WriteLn('AnsiStrLComp START');{$endif}
212
if (s1[maxlen]<>#0) then
220
if (s2[maxlen]<>#0) then
228
result:=StrCompAnsi(a,b);
236
function AnsiStrLIComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
240
{$ifdef PASWSTRING_VERBOSE}WriteLn('AnsiStrLIComp START');{$endif}
244
move(s1^,a[1],maxlen);
246
move(s2^,b[1],maxlen);
247
result:=AnsiCompareText(a,b);
251
procedure ansi2pchar(const s: ansistring; const orgp: pchar; out p: pchar);
255
{$ifdef PASWSTRING_VERBOSE}WriteLn('ansi2pchar START');{$endif}
257
if newlen>strlen(orgp) then
261
move(s[1],p[0],newlen);
266
function AnsiStrLower(Str: PChar): PChar;
270
{$ifdef PASWSTRING_VERBOSE}WriteLn('AnsiStrLower START');{$endif}
271
temp:=loweransistring(str);
272
ansi2pchar(temp,str,result);
276
function AnsiStrUpper(Str: PChar): PChar;
280
{$ifdef PASWSTRING_VERBOSE}WriteLn('AnsiStrUpper START');{$endif}
281
temp:=upperansistring(str);
282
ansi2pchar(temp,str,result);
286
procedure InitThread;
291
procedure FiniThread;
297
procedure Unicode2AnsiMove(source:pwidechar;var dest:ansistring;len:SizeInt);
299
widestr: unicodestring;
301
{$ifdef PASWSTRING_VERBOSE}WriteLn('Unicode2AnsiMove START');{$endif}
302
// Copy the originating string taking into account the specified length
303
SetLength(widestr, len);
304
System.Move(source^, widestr[1], len*2);
306
// Now convert it, using UTF-16 -> UTF-8
307
dest := UTF16ToUTF8(widestr);
308
{$ifdef PASWSTRING_SUPPORT_NONUTF8_ANSISTRING}
309
// And correct to the real Ansi encoding
310
dest := ConvertEncoding(dest, EncodingUTF8, GetDefaultTextEncoding());
314
procedure Ansi2UnicodeMove(source:pchar;var dest:UnicodeString;len:SizeInt);
318
{$ifdef PASWSTRING_VERBOSE}WriteLn('Ansi2UnicodeMove START');{$endif}
319
// Copy the originating string taking into account the specified length
320
SetLength(ansistr, len);
321
System.Move(source^, ansistr[1], len);
323
{$ifdef PASWSTRING_SUPPORT_NONUTF8_ANSISTRING}
325
ansistr := ConvertEncoding(ansistr, GetDefaultTextEncoding(), EncodingUTF8);
327
// Now convert it, using UTF-8 -> UTF-16
328
dest := UTF8ToUTF16(ansistr);
331
function UpperUnicodeString(const s : UnicodeString) : UnicodeString;
335
{$ifdef PASWSTRING_VERBOSE}WriteLn('UpperUnicodeString START');{$endif}
336
str := UTF16ToUTF8(s);
337
str := UTF8UpperCase(str);
338
Result := UTF8ToUTF16(str);
341
function LowerUnicodeString(const s : UnicodeString) : UnicodeString;
345
{$ifdef PASWSTRING_VERBOSE}WriteLn('LowerUnicodeString START');{$endif}
346
str := UTF16ToUTF8(s);
347
str := UTF8LowerCase(str);
348
Result := UTF8ToUTF16(str);
351
// Just do a simple byte comparison
352
// A more complex analysis would require normalization
353
function PasUnicodeCompareStr(const s1, s2 : unicodestring) : PtrInt;
355
count, count1, count2: integer;
357
{$ifdef PASWSTRING_VERBOSE}WriteLn('PasUnicodeCompareStr START');{$endif}
359
Count1 := Length(S1);
360
Count2 := Length(S2);
361
if Count1>Count2 then
365
result := SysUtils.CompareMemRange(Pointer(S1),Pointer(S2), Count*2);
367
result:=Count1-Count2;
370
function PasUnicodeCompareText(const s1, s2 : unicodestring): PtrInt;
374
{$ifdef PASWSTRING_VERBOSE}WriteLn('PasUnicodeCompareText START');{$endif}
375
a:=LowerWidestring(s1);
376
b:=LowerWidestring(s2);
377
result := WideCompareStr(a,b);
380
Procedure SetPasWideStringManager;
382
PasWideStringManager : TUnicodeStringManager;
384
PasWideStringManager:=widestringmanager;
385
PasWideStringManager.Wide2AnsiMoveProc:=@Wide2AnsiMove;
386
PasWideStringManager.Ansi2WideMoveProc:=@Ansi2WideMove;
388
// UpperUTF8 : procedure(p:PUTF8String);
389
PasWideStringManager.UpperWideStringProc:=@UpperWideString;
390
// UpperUCS4 : procedure(p:PUCS4Char);
391
// LowerUTF8 : procedure(p:PUTF8String);
392
PasWideStringManager.LowerWideStringProc:=@LowerWideString;
393
// LowerUCS4 : procedure(p:PUCS4Char);
396
CompUTF8 : function(p1,p2:PUTF8String) : shortint;
397
CompUCS2 : function(p1,p2:PUCS2Char) : shortint;
398
CompUCS4 : function(p1,p2:PUC42Char) : shortint;
400
PasWideStringManager.CompareWideStringProc:=@WideCompareStr;
401
PasWideStringManager.CompareTextWideStringProc:=@WideCompareText;
403
{ return value: number of code points in the string. Whenever an invalid
404
code point is encountered, all characters part of this invalid code point
405
are considered to form one "character" and the next character is
406
considered to be the start of a new (possibly also invalid) code point }
407
PasWideStringManager.CharLengthPCharProc:=@CharLengthPChar;
410
PasWideStringManager.UpperAnsiStringProc:=@UpperAnsiString;
411
PasWideStringManager.LowerAnsiStringProc:=@LowerAnsiString;
412
PasWideStringManager.CompareStrAnsiStringProc:=@AnsiCompareStr;
413
PasWideStringManager.CompareTextAnsiStringProc:=@AnsiCompareText;
414
PasWideStringManager.StrCompAnsiStringProc:=@StrCompAnsi;
415
PasWideStringManager.StrICompAnsiStringProc:=@AnsiStrIComp;
416
PasWideStringManager.StrLCompAnsiStringProc:=@AnsiStrLComp;
417
PasWideStringManager.StrLICompAnsiStringProc:=@AnsiStrLIComp;
418
PasWideStringManager.StrLowerAnsiStringProc:=@AnsiStrLower;
419
PasWideStringManager.StrUpperAnsiStringProc:=@AnsiStrUpper;
420
PasWideStringManager.ThreadInitProc:=@InitThread;
421
PasWideStringManager.ThreadFiniProc:=@FiniThread;
424
PasWideStringManager.Unicode2AnsiMoveProc:=@Unicode2AnsiMove;
425
PasWideStringManager.Ansi2UnicodeMoveProc:=@Ansi2UnicodeMove;
426
PasWideStringManager.UpperUnicodeStringProc:=@UpperUnicodeString;
427
PasWideStringManager.LowerUnicodeStringProc:=@LowerUnicodeString;
428
PasWideStringManager.CompareUnicodeStringProc:=@PasUnicodeCompareStr;
429
PasWideStringManager.CompareTextUnicodeStringProc:=@PasUnicodeCompareText;
431
SetUnicodeStringManager(PasWideStringManager);
436
SetPasWideStringManager;