1
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
/* ***** BEGIN LICENSE BLOCK *****
3
* Version: NPL 1.1/GPL 2.0/LGPL 2.1
5
* The contents of this file are subject to the Netscape Public License
6
* Version 1.1 (the "License"); you may not use this file except in
7
* compliance with the License. You may obtain a copy of the License at
8
* http://www.mozilla.org/NPL/
10
* Software distributed under the License is distributed on an "AS IS" basis,
11
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12
* for the specific language governing rights and limitations under the
15
* The Original Code is Mozilla Communicator client code.
17
* The Initial Developer of the Original Code is
18
* Netscape Communications Corporation.
19
* Portions created by the Initial Developer are Copyright (C) 1998
20
* the Initial Developer. All Rights Reserved.
24
* Alternatively, the contents of this file may be used under the terms of
25
* either the GNU General Public License Version 2 or later (the "GPL"), or
26
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27
* in which case the provisions of the GPL or the LGPL are applicable instead
28
* of those above. If you wish to allow use of your version of this file only
29
* under the terms of either the GPL or the LGPL, and not to allow others to
30
* use your version of this file under the terms of the NPL, indicate your
31
* decision by deleting the provisions above and replace them with the notice
32
* and other provisions required by the GPL or the LGPL. If you do not delete
33
* the provisions above, a recipient may use your version of this file under
34
* the terms of any one of the NPL, the GPL or the LGPL.
36
* ***** END LICENSE BLOCK ***** */
39
#include "nsTextTransformer.h"
40
#include "nsIContent.h"
42
#include "nsITextContent.h"
43
#include "nsStyleConsts.h"
44
#include "nsILineBreaker.h"
45
#include "nsIWordBreaker.h"
46
#include "nsIServiceManager.h"
47
#include "nsUnicharUtilCIID.h"
48
#include "nsUnicharUtils.h"
49
#include "nsICaseConversion.h"
51
#include "nsIPrefBranchInternal.h"
52
#include "nsIPrefBranch.h"
53
#include "nsIPrefService.h"
55
#include "nsLayoutAtoms.h"
59
nsTextTransformer::WordSelectListener *nsTextTransformer::sWordSelectListener = nsnull;
60
PRBool nsTextTransformer::sWordSelectStopAtPunctuation = PR_FALSE;
61
static const char kWordSelectPref[] = "layout.word_select.stop_at_punctuation";
63
NS_IMPL_ISUPPORTS1(nsTextTransformer::WordSelectListener, nsIObserver)
66
nsTextTransformer::WordSelectListener::Observe(nsISupports *aSubject,
68
const PRUnichar *aData)
70
NS_ASSERTION(!nsCRT::strcmp(aTopic, NS_PREFBRANCH_PREFCHANGE_TOPIC_ID),
72
nsCOMPtr<nsIPrefBranch> prefBranch = do_QueryInterface(aSubject);
73
return prefBranch->GetBoolPref(kWordSelectPref, &sWordSelectStopAtPunctuation);
76
nsAutoTextBuffer::nsAutoTextBuffer()
77
: mBuffer(mAutoBuffer),
78
mBufferLen(NS_TEXT_TRANSFORMER_AUTO_WORD_BUF_SIZE)
82
nsAutoTextBuffer::~nsAutoTextBuffer()
84
if (mBuffer && (mBuffer != mAutoBuffer)) {
90
nsAutoTextBuffer::GrowBy(PRInt32 aAtLeast, PRBool aCopyToHead)
92
PRInt32 newSize = mBufferLen * 2;
93
if (newSize < mBufferLen + aAtLeast) {
94
newSize = mBufferLen + aAtLeast + 100;
96
return GrowTo(newSize, aCopyToHead);
100
nsAutoTextBuffer::GrowTo(PRInt32 aNewSize, PRBool aCopyToHead)
102
if (aNewSize > mBufferLen) {
103
PRUnichar* newBuffer = new PRUnichar[aNewSize];
105
return NS_ERROR_OUT_OF_MEMORY;
107
memcpy(&newBuffer[aCopyToHead ? 0 : mBufferLen],
108
mBuffer, sizeof(PRUnichar) * mBufferLen);
109
if (mBuffer != mAutoBuffer) {
113
mBufferLen = aNewSize;
118
//----------------------------------------------------------------------
120
static NS_DEFINE_CID(kUnicharUtilCID, NS_UNICHARUTIL_CID);
122
static nsICaseConversion* gCaseConv = nsnull;
125
nsTextTransformer::Initialize()
127
// read in our global word selection prefs
128
if ( !sWordSelectListener ) {
129
nsCOMPtr<nsIPrefBranchInternal> prefBranch =
130
do_GetService( NS_PREFSERVICE_CONTRACTID );
132
prefBranch->GetBoolPref(kWordSelectPref, &sWordSelectStopAtPunctuation);
133
sWordSelectListener = new WordSelectListener();
134
if (sWordSelectListener) {
135
NS_ADDREF(sWordSelectListener);
136
prefBranch->AddObserver(kWordSelectPref, sWordSelectListener, PR_FALSE);
143
static nsresult EnsureCaseConv()
145
nsresult res = NS_OK;
147
res = nsServiceManager::GetService(kUnicharUtilCID, NS_GET_IID(nsICaseConversion),
148
(nsISupports**)&gCaseConv);
149
NS_ASSERTION( NS_SUCCEEDED(res), "cannot get UnicharUtil");
150
NS_ASSERTION( gCaseConv != NULL, "cannot get UnicharUtil");
156
nsTextTransformer::Shutdown()
158
NS_IF_RELEASE(sWordSelectListener);
160
nsServiceManager::ReleaseService(kUnicharUtilCID, gCaseConv);
165
// For now, we have only a couple of characters to strip out. If we get
166
// any more, change this to use a bitset to lookup into.
167
// CH_SHY - soft hyphen (discretionary hyphen)
169
// added BIDI formatting codes
170
#define IS_DISCARDED(_ch) \
171
(((_ch) == CH_SHY) || ((_ch) == '\r') || IS_BIDI_CONTROL(_ch))
173
#define IS_DISCARDED(_ch) \
174
(((_ch) == CH_SHY) || ((_ch) == '\r'))
178
#define MAX_UNIBYTE 127
180
MOZ_DECL_CTOR_COUNTER(nsTextTransformer)
182
nsTextTransformer::nsTextTransformer(nsILineBreaker* aLineBreaker,
183
nsIWordBreaker* aWordBreaker,
184
nsIPresContext* aPresContext)
188
mLineBreaker(aLineBreaker),
189
mWordBreaker(aWordBreaker),
191
mTextTransform(NS_STYLE_TEXT_TRANSFORM_NONE),
194
MOZ_COUNT_CTOR(nsTextTransformer);
197
GetLanguageSpecificTransformType(&mLanguageSpecificTransformType);
200
mPresContext = aPresContext;
202
if (aLineBreaker == nsnull && aWordBreaker == nsnull )
203
NS_ASSERTION(0, "invalid creation of nsTextTransformer");
206
static PRBool firstTime = PR_TRUE;
208
firstTime = PR_FALSE;
209
SelfTest(aLineBreaker, aWordBreaker, aPresContext);
214
nsTextTransformer::~nsTextTransformer()
216
MOZ_COUNT_DTOR(nsTextTransformer);
220
nsTextTransformer::Init(nsIFrame* aFrame,
221
nsIContent* aContent,
222
PRInt32 aStartingOffset,
223
PRBool aForceArabicShaping,
224
PRBool aLeaveAsAscii)
227
* If the document has Bidi content, check whether we need to do
230
* Does the frame contains Arabic characters
231
* (mCharType == eCharType_RightToLeftArabic)?
232
* Are we rendering character by character (aForceArabicShaping ==
233
* PR_TRUE)? If so, we always do our own Arabic shaping, even if
234
* the platform has native shaping support. Otherwise, we only do
235
* shaping if the platform has no shaping support.
237
* We do numeric shaping in all Bidi documents.
241
mPresContext->GetBidiEnabled(&bidiEnabled);
243
aFrame->GetBidiProperty(mPresContext, nsLayoutAtoms::charType,
244
(void**)&mCharType, sizeof(mCharType));
245
if (mCharType == eCharType_RightToLeftArabic) {
246
if (aForceArabicShaping) {
247
SetNeedsArabicShaping(PR_TRUE);
250
if (!mPresContext->IsBidiSystem()) {
251
SetNeedsArabicShaping(PR_TRUE);
255
SetNeedsNumericShaping(PR_TRUE);
258
// Get the contents text content
260
nsCOMPtr<nsITextContent> tc = do_QueryInterface(aContent, &rv);
264
// Sanitize aStartingOffset
265
if (aStartingOffset < 0) {
266
NS_WARNING("bad starting offset");
269
else if (aStartingOffset > mFrag->GetLength()) {
270
NS_WARNING("bad starting offset");
271
aStartingOffset = mFrag->GetLength();
273
mOffset = aStartingOffset;
275
// Get the frames text style information
276
const nsStyleText* styleText = aFrame->GetStyleText();
277
if (NS_STYLE_WHITESPACE_PRE == styleText->mWhiteSpace) {
278
mMode = ePreformatted;
280
else if (NS_STYLE_WHITESPACE_MOZ_PRE_WRAP == styleText->mWhiteSpace) {
283
mTextTransform = styleText->mTextTransform;
285
if (aLeaveAsAscii) { // See if the text fragment is 1-byte text
286
SetLeaveAsAscii(PR_TRUE);
287
// XXX Currently we only leave it as ascii for normal text and not for preformatted
288
// or preformatted wrapped text or language specific transforms
289
if (mFrag->Is2b() || (eNormal != mMode) ||
290
(mLanguageSpecificTransformType !=
291
eLanguageSpecificTransformType_None))
292
// We don't step down from Unicode to ascii
293
SetLeaveAsAscii(PR_FALSE);
296
SetLeaveAsAscii(PR_FALSE);
301
//----------------------------------------------------------------------
303
// wordlen==1, contentlen=newOffset-currentOffset, isWhitespace=t
305
nsTextTransformer::ScanNormalWhiteSpace_F()
307
const nsTextFragment* frag = mFrag;
308
PRInt32 fragLen = frag->GetLength();
309
PRInt32 offset = mOffset;
311
for (; offset < fragLen; offset++) {
312
PRUnichar ch = frag->CharAt(offset);
313
if (!XP_IS_SPACE(ch)) {
314
// If character is not discardable then stop looping, otherwise
315
// let the discarded character collapse with the other spaces.
316
if (!IS_DISCARDED(ch)) {
322
// Make sure we have enough room in the transform buffer
323
if (mBufferPos >= mTransformBuf.mBufferLen) {
324
mTransformBuf.GrowBy(128);
327
if (TransformedTextIsAscii()) {
328
unsigned char* bp = (unsigned char*)mTransformBuf.mBuffer;
329
bp[mBufferPos++] = ' ';
331
mTransformBuf.mBuffer[mBufferPos++] = PRUnichar(' ');
337
nsTextTransformer::ConvertTransformedTextToUnicode()
339
// Go backwards over the characters and convert them.
340
PRInt32 lastChar = mBufferPos - 1;
341
unsigned char* cp1 = (unsigned char*)mTransformBuf.mBuffer + lastChar;
342
PRUnichar* cp2 = mTransformBuf.mBuffer + lastChar;
344
NS_ASSERTION(mTransformBuf.mBufferLen >= mBufferPos,
345
"transform buffer is too small");
346
for (PRInt32 count = mBufferPos; count > 0; count--) {
347
*cp2-- = PRUnichar(*cp1--);
351
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
353
nsTextTransformer::ScanNormalAsciiText_F(PRInt32* aWordLen,
354
PRBool* aWasTransformed)
356
const nsTextFragment* frag = mFrag;
357
PRInt32 fragLen = frag->GetLength();
358
PRInt32 offset = mOffset;
359
PRInt32 prevBufferPos = mBufferPos;
360
const unsigned char* cp = (const unsigned char*)frag->Get1b() + offset;
365
bp2 = mTransformBuf.GetBuffer();
366
if (TransformedTextIsAscii()) {
372
for (; offset < fragLen; offset++) {
373
unsigned char ch = *cp++;
374
if (XP_IS_SPACE(ch)) {
379
*aWasTransformed = PR_TRUE;
381
else if (IS_DISCARDED(ch)) {
382
// Strip discarded characters from the transformed output
385
if (ch > MAX_UNIBYTE) {
386
// The text has a multibyte character so we can no longer leave the
387
// text as ascii text
388
SetHasMultibyte(PR_TRUE);
390
if (TransformedTextIsAscii()) {
391
SetTransformedTextIsAscii(PR_FALSE);
392
*aWasTransformed = PR_TRUE;
394
// Transform any existing ascii text to Unicode
395
if (mBufferPos > 0) {
396
ConvertTransformedTextToUnicode();
397
bp2 = mTransformBuf.GetBuffer() + mBufferPos;
401
if (mBufferPos >= mTransformBuf.mBufferLen) {
402
nsresult rv = mTransformBuf.GrowBy(128);
404
// If we run out of space then just truncate the text
407
bp2 = mTransformBuf.GetBuffer();
408
if (TransformedTextIsAscii()) {
414
if (TransformedTextIsAscii()) {
417
*bp2++ = PRUnichar(ch);
422
*aWordLen = mBufferPos - prevBufferPos;
427
nsTextTransformer::ScanNormalAsciiText_F_ForWordBreak(PRInt32* aWordLen,
428
PRBool* aWasTransformed,
429
PRBool aIsKeyboardSelect)
431
const nsTextFragment* frag = mFrag;
432
PRInt32 fragLen = frag->GetLength();
433
PRInt32 offset = mOffset;
434
PRInt32 prevBufferPos = mBufferPos;
435
PRBool breakAfterThis = PR_FALSE;
436
const unsigned char* cp = (const unsigned char*)frag->Get1b() + offset;
441
bp2 = mTransformBuf.GetBuffer();
442
if (TransformedTextIsAscii()) {
447
PRBool readingAlphaNumeric = PR_TRUE; //only used in sWordSelectStopAtPunctuation
449
// We must know if we are starting in alpha numerics.
450
// Treat high bit chars as alphanumeric, otherwise we get stuck on accented letters
451
// We can't trust isalnum() results for isalnum()
452
// Therefore we don't stop at non-ascii (high bit) punctuation,
453
// which is just fine. The punctuation we care about is low bit.
454
if (sWordSelectStopAtPunctuation && offset < fragLen)
455
readingAlphaNumeric = isalnum((unsigned char)*cp) || !IS_ASCII_CHAR(*cp);
457
for (; offset < fragLen && !breakAfterThis; offset++) {
458
unsigned char ch = *cp++;
461
*aWasTransformed = PR_TRUE;
462
if (offset == mOffset)
463
breakAfterThis = PR_TRUE;
467
else if (XP_IS_SPACE(ch)) {
470
else if (sWordSelectStopAtPunctuation &&
471
readingAlphaNumeric && !isalnum(ch) && IS_ASCII_CHAR(ch)) {
472
if (!aIsKeyboardSelect)
474
// For keyboard move-by-word, need to pass by at least
475
// one alphanumeric char before stopping at punct
476
readingAlphaNumeric = PR_FALSE;
478
else if (sWordSelectStopAtPunctuation &&
479
!readingAlphaNumeric && (isalnum(ch) || !IS_ASCII_CHAR(ch))) {
480
// On some platforms, punctuation breaks for word selection
483
else if (IS_DISCARDED(ch)) {
484
// Strip discarded characters from the transformed output
487
if (ch > MAX_UNIBYTE) {
488
// The text has a multibyte character so we can no longer leave the
489
// text as ascii text
490
SetHasMultibyte(PR_TRUE);
492
if (TransformedTextIsAscii()) {
493
SetTransformedTextIsAscii(PR_FALSE);
494
*aWasTransformed = PR_TRUE;
496
// Transform any existing ascii text to Unicode
497
if (mBufferPos > 0) {
498
ConvertTransformedTextToUnicode();
499
bp2 = mTransformBuf.GetBuffer() + mBufferPos;
503
if (mBufferPos >= mTransformBuf.mBufferLen) {
504
nsresult rv = mTransformBuf.GrowBy(128);
506
// If we run out of space then just truncate the text
509
bp2 = mTransformBuf.GetBuffer();
510
if (TransformedTextIsAscii()) {
516
if (TransformedTextIsAscii()) {
519
*bp2++ = PRUnichar(ch);
524
*aWordLen = mBufferPos - prevBufferPos;
529
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
531
nsTextTransformer::ScanNormalUnicodeText_F(PRBool aForLineBreak,
533
PRBool* aWasTransformed)
535
const nsTextFragment* frag = mFrag;
536
const PRUnichar* cp0 = frag->Get2b();
537
PRInt32 fragLen = frag->GetLength();
539
if (*aWordLen > 0 && *aWordLen < fragLen) {
543
PRInt32 offset = mOffset;
545
PRUnichar firstChar = frag->CharAt(offset++);
548
// Need to strip BIDI controls even when those are 'firstChars'.
549
// This doesn't seem to produce bug 14280 (or similar bugs).
550
while (offset < fragLen && IS_BIDI_CONTROL(firstChar) ) {
551
firstChar = frag->CharAt(offset++);
555
if (firstChar > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE);
557
// Only evaluate complex breaking logic if there are more characters
558
// beyond the first to look at.
559
PRInt32 numChars = 1;
560
if (offset < fragLen) {
561
const PRUnichar* cp = cp0 + offset;
562
PRBool breakBetween = PR_FALSE;
564
mLineBreaker->BreakInBetween(&firstChar, 1, cp, (fragLen-offset), &breakBetween);
567
mWordBreaker->BreakInBetween(&firstChar, 1, cp, (fragLen-offset), &breakBetween);
570
// don't transform the first character until after BreakInBetween is called
571
// Kipp originally did this at the top of the function, which was too early.
573
if (CH_NBSP == firstChar) {
575
*aWasTransformed = PR_TRUE;
577
nsresult rv = mTransformBuf.GrowTo(mBufferPos + 1);
583
mTransformBuf.mBuffer[mBufferPos++] = firstChar;
586
// Find next position
590
mLineBreaker->Next(cp0, fragLen, offset, &next, &tryNextFrag);
593
mWordBreaker->NextWord(cp0, fragLen, offset, &next, &tryNextFrag);
595
numChars = (PRInt32) (next - (PRUint32) offset) + 1;
597
// Since we know the number of characters we're adding grow the buffer
598
// now before we start copying
599
nsresult rv = mTransformBuf.GrowTo(mBufferPos + numChars);
601
numChars = mTransformBuf.GetBufferLength() - mBufferPos;
604
offset += numChars - 1;
606
// 1. convert nbsp into space
607
// 2. check for discarded characters
608
// 3. check mHasMultibyte flag
610
PRUnichar* bp = &mTransformBuf.mBuffer[mBufferPos];
611
const PRUnichar* end = cp + numChars - 1;
613
PRUnichar ch = *cp++;
617
else if (IS_DISCARDED(ch) || (ch == 0x0a) || (ch == 0x0d)) {
618
// Strip discarded characters from the transformed output
622
if (ch > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE);
629
{ // transform the first character
630
// we do this here, rather than at the top of the function (like Kipp originally had it)
631
// because if we must call BreakInBetween, then we must do so before the transformation
632
// this is the case where BreakInBetween does not need to be called at all.
634
if (CH_NBSP == firstChar) {
636
*aWasTransformed = PR_TRUE;
638
nsresult rv = mTransformBuf.GrowTo(mBufferPos + 1);
643
mTransformBuf.mBuffer[mBufferPos++] = firstChar;
646
*aWordLen = numChars;
650
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=t
652
nsTextTransformer::ScanPreWrapWhiteSpace_F(PRInt32* aWordLen)
654
const nsTextFragment* frag = mFrag;
655
PRInt32 fragLen = frag->GetLength();
656
PRInt32 offset = mOffset;
657
PRUnichar* bp = mTransformBuf.GetBuffer() + mBufferPos;
658
PRUnichar* endbp = mTransformBuf.GetBufferEnd();
659
PRInt32 prevBufferPos = mBufferPos;
661
for (; offset < fragLen; offset++) {
662
// This function is used for both Unicode and ascii strings so don't
663
// make any assumptions about what kind of data it is
664
PRUnichar ch = frag->CharAt(offset);
665
if (!XP_IS_SPACE(ch) || (ch == '\t') || (ch == '\n')) {
666
if (IS_DISCARDED(ch)) {
667
// Keep looping if this is a discarded character
673
PRInt32 oldLength = bp - mTransformBuf.GetBuffer();
674
nsresult rv = mTransformBuf.GrowBy(1000);
676
// If we run out of space (unlikely) then just chop the input
679
bp = mTransformBuf.GetBuffer() + oldLength;
680
endbp = mTransformBuf.GetBufferEnd();
686
*aWordLen = mBufferPos - prevBufferPos;
690
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
692
nsTextTransformer::ScanPreData_F(PRInt32* aWordLen,
693
PRBool* aWasTransformed)
695
const nsTextFragment* frag = mFrag;
696
PRInt32 fragLen = frag->GetLength();
697
PRInt32 offset = mOffset;
698
PRUnichar* bp = mTransformBuf.GetBuffer() + mBufferPos;
699
PRUnichar* endbp = mTransformBuf.GetBufferEnd();
700
PRInt32 prevBufferPos = mBufferPos;
702
for (; offset < fragLen; offset++) {
703
// This function is used for both Unicode and ascii strings so don't
704
// make any assumptions about what kind of data it is
705
PRUnichar ch = frag->CharAt(offset);
706
if ((ch == '\t') || (ch == '\n')) {
711
*aWasTransformed = PR_TRUE;
713
else if (IS_DISCARDED(ch)) {
716
if (ch > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE);
718
PRInt32 oldLength = bp - mTransformBuf.GetBuffer();
719
nsresult rv = mTransformBuf.GrowBy(1000);
721
// If we run out of space (unlikely) then just chop the input
724
bp = mTransformBuf.GetBuffer() + oldLength;
725
endbp = mTransformBuf.GetBufferEnd();
731
*aWordLen = mBufferPos - prevBufferPos;
735
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
737
nsTextTransformer::ScanPreAsciiData_F(PRInt32* aWordLen,
738
PRBool* aWasTransformed)
740
const nsTextFragment* frag = mFrag;
741
PRUnichar* bp = mTransformBuf.GetBuffer() + mBufferPos;
742
PRUnichar* endbp = mTransformBuf.GetBufferEnd();
743
const unsigned char* cp = (const unsigned char*) frag->Get1b();
744
const unsigned char* end = cp + frag->GetLength();
745
PRInt32 prevBufferPos = mBufferPos;
749
PRUnichar ch = (PRUnichar) *cp++;
750
if ((ch == '\t') || (ch == '\n')) {
756
*aWasTransformed = PR_TRUE;
758
else if (IS_DISCARDED(ch)) {
761
if (ch > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE);
763
PRInt32 oldLength = bp - mTransformBuf.GetBuffer();
764
nsresult rv = mTransformBuf.GrowBy(1000);
766
// If we run out of space (unlikely) then just chop the input
769
bp = mTransformBuf.GetBuffer() + oldLength;
770
endbp = mTransformBuf.GetBufferEnd();
776
*aWordLen = mBufferPos - prevBufferPos;
777
return cp - ((const unsigned char*)frag->Get1b());
780
//----------------------------------------
783
AsciiToLowerCase(unsigned char* aText, PRInt32 aWordLen)
785
while (aWordLen-- > 0) {
786
*aText = tolower(*aText);
792
AsciiToUpperCase(unsigned char* aText, PRInt32 aWordLen)
794
while (aWordLen-- > 0) {
795
*aText = toupper(*aText);
800
#define kSzlig 0x00DF
801
static PRInt32 CountGermanSzlig(const PRUnichar* aText, PRInt32 len)
804
for(i=0,cnt=0; i<len; i++, aText++)
811
static void ReplaceGermanSzligToSS(PRUnichar* aText, PRInt32 len, PRInt32 szCnt)
813
PRUnichar *src, *dest;
814
src = aText + len - 1;
816
while( (src!=dest) && (src >= aText) )
820
*dest-- = PRUnichar('S');
821
*dest-- = PRUnichar('S');
830
nsTextTransformer::LanguageSpecificTransform(PRUnichar* aText, PRInt32 aLen,
831
PRBool* aWasTransformed)
833
if (mLanguageSpecificTransformType ==
834
eLanguageSpecificTransformType_Japanese) {
835
for (PRInt32 i = 0; i < aLen; i++) {
836
if (aText[i] == 0x5C) { // BACKSLASH
837
aText[i] = 0xA5; // YEN SIGN
838
SetHasMultibyte(PR_TRUE);
839
*aWasTransformed = PR_TRUE;
843
* We considered doing this, but since some systems may not have fonts
844
* with this OVERLINE glyph, we decided not to do this.
846
else if (aText[i] == 0x7E) { // TILDE
847
aText[i] = 0x203E; // OVERLINE
848
SetHasMultibyte(PR_TRUE);
849
*aWasTransformed = PR_TRUE;
854
/* we once do transformation for Korean, but later decide to remove it */
855
/* see bug 88050 for more information */
859
nsTextTransformer::GetNextWord(PRBool aInWord,
860
PRInt32* aWordLenResult,
861
PRInt32* aContentLenResult,
862
PRBool* aIsWhiteSpaceResult,
863
PRBool* aWasTransformed,
864
PRBool aResetTransformBuf,
865
PRBool aForLineBreak,
866
PRBool aIsKeyboardSelect)
868
const nsTextFragment* frag = mFrag;
869
PRInt32 fragLen = frag->GetLength();
871
if (*aWordLenResult > 0 && *aWordLenResult < fragLen) {
872
fragLen = *aWordLenResult;
875
PRInt32 offset = mOffset;
877
PRBool isWhitespace = PR_FALSE;
878
PRUnichar* result = nsnull;
879
PRBool prevBufferPos;
880
PRBool skippedWhitespace = PR_FALSE;
882
// Initialize OUT parameter
883
*aWasTransformed = PR_FALSE;
885
// See if we should reset the current buffer position back to the
886
// beginning of the buffer
887
if (aResetTransformBuf) {
889
SetTransformedTextIsAscii(LeaveAsAscii());
891
prevBufferPos = mBufferPos;
893
// Fix word breaking problem w/ PREFORMAT and PREWRAP
894
// for word breaking, we should really go to the normal code
895
if((! aForLineBreak) && (eNormal != mMode))
898
while (offset < fragLen) {
899
PRUnichar firstChar = frag->CharAt(offset);
901
// Eat up any discarded characters before dispatching
902
if (IS_DISCARDED(firstChar)) {
910
if (XP_IS_SPACE(firstChar)) {
911
offset = ScanNormalWhiteSpace_F();
913
// if this is just a '\n', and characters before and after it are CJK chars,
914
// we will skip this one.
915
if (firstChar == '\n' &&
916
offset - mOffset == 1 &&
920
PRUnichar lastChar = frag->CharAt(mOffset - 1);
921
PRUnichar nextChar = frag->CharAt(offset);
922
if (IS_CJ_CHAR(lastChar) && IS_CJ_CHAR(nextChar)) {
923
skippedWhitespace = PR_TRUE;
928
if (firstChar != ' ') {
929
*aWasTransformed = PR_TRUE;
932
isWhitespace = PR_TRUE;
934
else if (CH_NBSP == firstChar && !aForLineBreak) {
936
isWhitespace = PR_TRUE;
937
*aWasTransformed = PR_TRUE;
939
// Make sure we have enough room in the transform buffer
940
if (mBufferPos >= mTransformBuf.mBufferLen) {
941
mTransformBuf.GrowBy(128);
945
if (TransformedTextIsAscii()) {
946
((unsigned char*)mTransformBuf.mBuffer)[mBufferPos++] = ' ';
948
mTransformBuf.mBuffer[mBufferPos++] = PRUnichar(' ');
951
else if (frag->Is2b()) {
953
wordLen = *aWordLenResult;
955
offset = ScanNormalUnicodeText_F(aForLineBreak, &wordLen, aWasTransformed);
959
offset = ScanNormalAsciiText_F_ForWordBreak(&wordLen,
963
offset = ScanNormalAsciiText_F(&wordLen, aWasTransformed);
968
if (('\n' == firstChar) || ('\t' == firstChar)) {
969
mTransformBuf.mBuffer[mBufferPos++] = firstChar;
972
isWhitespace = PR_TRUE;
974
else if (frag->Is2b()) {
975
offset = ScanPreData_F(&wordLen, aWasTransformed);
978
offset = ScanPreAsciiData_F(&wordLen, aWasTransformed);
983
if (XP_IS_SPACE(firstChar)) {
984
if (('\n' == firstChar) || ('\t' == firstChar)) {
985
mTransformBuf.mBuffer[mBufferPos++] = firstChar;
990
offset = ScanPreWrapWhiteSpace_F(&wordLen);
992
isWhitespace = PR_TRUE;
994
else if (frag->Is2b()) {
996
wordLen = *aWordLenResult;
998
offset = ScanNormalUnicodeText_F(aForLineBreak, &wordLen, aWasTransformed);
1002
offset = ScanNormalAsciiText_F_ForWordBreak(&wordLen, aWasTransformed,
1005
offset = ScanNormalAsciiText_F(&wordLen, aWasTransformed);
1010
if (TransformedTextIsAscii()) {
1011
unsigned char* wordPtr = (unsigned char*)mTransformBuf.mBuffer + prevBufferPos;
1013
if (!isWhitespace) {
1014
switch (mTextTransform) {
1015
case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE:
1016
*wordPtr = toupper(*wordPtr);
1018
case NS_STYLE_TEXT_TRANSFORM_LOWERCASE:
1019
AsciiToLowerCase(wordPtr, wordLen);
1021
case NS_STYLE_TEXT_TRANSFORM_UPPERCASE:
1022
AsciiToUpperCase(wordPtr, wordLen);
1025
NS_ASSERTION(mLanguageSpecificTransformType ==
1026
eLanguageSpecificTransformType_None,
1027
"should not be ASCII for language specific transforms");
1029
result = (PRUnichar*)wordPtr;
1032
result = &mTransformBuf.mBuffer[prevBufferPos];
1034
if (!isWhitespace) {
1035
switch (mTextTransform) {
1036
case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE:
1037
if(NS_SUCCEEDED(EnsureCaseConv()))
1038
gCaseConv->ToTitle(result, result, wordLen, !aInWord);
1039
// if the first character is szlig
1040
if(kSzlig == *result)
1042
if ((prevBufferPos + wordLen + 1) >= mTransformBuf.mBufferLen) {
1043
mTransformBuf.GrowBy(128);
1044
result = &mTransformBuf.mBuffer[prevBufferPos];
1046
PRUnichar* src = result + wordLen;
1052
result[0] = PRUnichar('S');
1053
result[1] = PRUnichar('S');
1057
case NS_STYLE_TEXT_TRANSFORM_LOWERCASE:
1058
if(NS_SUCCEEDED(EnsureCaseConv()))
1059
gCaseConv->ToLower(result, result, wordLen);
1061
case NS_STYLE_TEXT_TRANSFORM_UPPERCASE:
1063
if(NS_SUCCEEDED(EnsureCaseConv()))
1064
gCaseConv->ToUpper(result, result, wordLen);
1066
// first we search for German Szlig
1067
PRInt32 szligCnt = CountGermanSzlig(result, wordLen);
1069
// Make sure we have enough room in the transform buffer
1070
if ((prevBufferPos + wordLen + szligCnt) >= mTransformBuf.mBufferLen)
1072
mTransformBuf.GrowBy(128);
1073
result = &mTransformBuf.mBuffer[prevBufferPos];
1075
ReplaceGermanSzligToSS(result, wordLen, szligCnt);
1076
wordLen += szligCnt;
1081
if (mLanguageSpecificTransformType !=
1082
eLanguageSpecificTransformType_None) {
1083
LanguageSpecificTransform(result, wordLen, aWasTransformed);
1085
if (NeedsArabicShaping()) {
1086
DoArabicShaping(result, wordLen, aWasTransformed);
1088
if (NeedsNumericShaping()) {
1089
DoNumericShaping(result, wordLen, aWasTransformed);
1097
*aIsWhiteSpaceResult = isWhitespace;
1098
*aWordLenResult = wordLen;
1099
*aContentLenResult = offset - mOffset;
1101
// we need to adjust the length if a '\n' has been skip between CJK chars
1102
*aContentLenResult += (skippedWhitespace ? 1 : 0);
1104
// If the word length doesn't match the content length then we transformed
1106
if ((mTextTransform != NS_STYLE_TEXT_TRANSFORM_NONE) ||
1107
(*aWordLenResult != *aContentLenResult)) {
1108
*aWasTransformed = PR_TRUE;
1109
mBufferPos = prevBufferPos + *aWordLenResult;
1114
NS_ASSERTION(mBufferPos == prevBufferPos + *aWordLenResult, "internal error");
1118
//----------------------------------------------------------------------
1120
// wordlen==1, contentlen=newOffset-currentOffset, isWhitespace=t
1122
nsTextTransformer::ScanNormalWhiteSpace_B()
1124
const nsTextFragment* frag = mFrag;
1125
PRInt32 offset = mOffset;
1127
while (--offset >= 0) {
1128
PRUnichar ch = frag->CharAt(offset);
1129
if (!XP_IS_SPACE(ch)) {
1130
// If character is not discardable then stop looping, otherwise
1131
// let the discarded character collapse with the other spaces.
1132
if (!IS_DISCARDED(ch)) {
1138
mTransformBuf.mBuffer[mTransformBuf.mBufferLen - 1] = ' ';
1142
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
1144
nsTextTransformer::ScanNormalAsciiText_B(PRInt32* aWordLen, PRBool aIsKeyboardSelect)
1146
const nsTextFragment* frag = mFrag;
1147
PRInt32 offset = mOffset;
1148
PRUnichar* bp = mTransformBuf.GetBufferEnd();
1149
PRUnichar* startbp = mTransformBuf.GetBuffer();
1151
PRUnichar ch = frag->CharAt(offset - 1);
1152
// Treat high bit chars as alphanumeric, otherwise we get stuck on accented letters
1153
// We can't trust isalnum() results for isalnum()
1154
// Therefore we don't stop at non-ascii (high bit) punctuation,
1155
// which is just fine. The punctuation we care about is low bit.
1156
PRBool readingAlphaNumeric = isalnum(ch) || !IS_ASCII_CHAR(ch);
1158
while (--offset >= 0) {
1159
PRUnichar ch = frag->CharAt(offset);
1160
if (CH_NBSP == ch) {
1163
if (XP_IS_SPACE(ch)) {
1166
else if (IS_DISCARDED(ch)) {
1169
else if (sWordSelectStopAtPunctuation && readingAlphaNumeric &&
1170
!isalnum(ch) && IS_ASCII_CHAR(ch)) {
1171
// Break on ascii punctuation
1174
else if (sWordSelectStopAtPunctuation && !readingAlphaNumeric &&
1175
(isalnum(ch) || !IS_ASCII_CHAR(ch))) {
1176
if (!aIsKeyboardSelect)
1178
readingAlphaNumeric = PR_TRUE;
1181
if (ch > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE);
1182
if (bp == startbp) {
1183
PRInt32 oldLength = mTransformBuf.mBufferLen;
1184
nsresult rv = mTransformBuf.GrowBy(1000);
1185
if (NS_FAILED(rv)) {
1186
// If we run out of space (unlikely) then just chop the input
1189
bp = mTransformBuf.GetBufferEnd() - oldLength;
1190
startbp = mTransformBuf.GetBuffer();
1195
*aWordLen = mTransformBuf.GetBufferEnd() - bp;
1199
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
1201
nsTextTransformer::ScanNormalUnicodeText_B(PRBool aForLineBreak,
1204
const nsTextFragment* frag = mFrag;
1205
const PRUnichar* cp0 = frag->Get2b();
1206
PRInt32 offset = mOffset - 1;
1208
PRUnichar firstChar = frag->CharAt(offset);
1211
PRInt32 limit = (*aWordLen > 0) ? *aWordLen : 0;
1213
while (offset > limit && IS_BIDI_CONTROL(firstChar) ) {
1214
firstChar = frag->CharAt(--offset);
1218
mTransformBuf.mBuffer[mTransformBuf.mBufferLen - 1] = firstChar;
1219
if (firstChar > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE);
1221
PRInt32 numChars = 1;
1224
if (offset > limit) {
1228
const PRUnichar* cp = cp0 + offset;
1229
PRBool breakBetween = PR_FALSE;
1230
if (aForLineBreak) {
1231
mLineBreaker->BreakInBetween(cp0, offset + 1,
1232
mTransformBuf.GetBufferEnd()-1, 1,
1236
mWordBreaker->BreakInBetween(cp0, offset + 1,
1237
mTransformBuf.GetBufferEnd()-1, 1,
1241
if (!breakBetween) {
1242
// Find next position
1245
if (aForLineBreak) {
1246
mLineBreaker->Prev(cp0, offset, offset, &prev, &tryPrevFrag);
1249
mWordBreaker->PrevWord(cp0, offset, offset, &prev, &tryPrevFrag);
1251
numChars = (PRInt32) ((PRUint32) offset - prev) + 1;
1253
// Grow buffer before copying
1254
nsresult rv = mTransformBuf.GrowTo(numChars);
1255
if (NS_FAILED(rv)) {
1256
numChars = mTransformBuf.GetBufferLength();
1259
// 1. convert nbsp into space
1260
// 2. check mHasMultibyte flag
1262
PRUnichar* bp = mTransformBuf.GetBufferEnd() - 1;
1263
const PRUnichar* end = cp - numChars + 1;
1265
PRUnichar ch = *--cp;
1266
if (CH_NBSP == ch) {
1269
else if (IS_DISCARDED(ch)) {
1272
if (ch > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE);
1276
// Recompute offset and numChars in case we stripped something
1277
offset = offset - numChars;
1278
numChars = mTransformBuf.GetBufferEnd() - bp;
1284
*aWordLen = numChars;
1288
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=t
1290
nsTextTransformer::ScanPreWrapWhiteSpace_B(PRInt32* aWordLen)
1292
const nsTextFragment* frag = mFrag;
1293
PRInt32 offset = mOffset;
1294
PRUnichar* bp = mTransformBuf.GetBufferEnd();
1295
PRUnichar* startbp = mTransformBuf.GetBuffer();
1297
while (--offset >= 0) {
1298
PRUnichar ch = frag->CharAt(offset);
1299
if (!XP_IS_SPACE(ch) || (ch == '\t') || (ch == '\n')) {
1300
// Keep looping if this is a discarded character
1301
if (IS_DISCARDED(ch)) {
1306
if (bp == startbp) {
1307
PRInt32 oldLength = mTransformBuf.mBufferLen;
1308
nsresult rv = mTransformBuf.GrowBy(1000);
1309
if (NS_FAILED(rv)) {
1310
// If we run out of space (unlikely) then just chop the input
1313
bp = mTransformBuf.GetBufferEnd() - oldLength;
1314
startbp = mTransformBuf.GetBuffer();
1319
*aWordLen = mTransformBuf.GetBufferEnd() - bp;
1323
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
1325
nsTextTransformer::ScanPreData_B(PRInt32* aWordLen)
1327
const nsTextFragment* frag = mFrag;
1328
PRInt32 offset = mOffset;
1329
PRUnichar* bp = mTransformBuf.GetBufferEnd();
1330
PRUnichar* startbp = mTransformBuf.GetBuffer();
1332
while (--offset >= 0) {
1333
PRUnichar ch = frag->CharAt(offset);
1334
if ((ch == '\t') || (ch == '\n')) {
1337
if (CH_NBSP == ch) {
1340
else if (IS_DISCARDED(ch)) {
1343
if (ch > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE);
1344
if (bp == startbp) {
1345
PRInt32 oldLength = mTransformBuf.mBufferLen;
1346
nsresult rv = mTransformBuf.GrowBy(1000);
1347
if (NS_FAILED(rv)) {
1348
// If we run out of space (unlikely) then just chop the input
1352
bp = mTransformBuf.GetBufferEnd() - oldLength;
1353
startbp = mTransformBuf.GetBuffer();
1358
*aWordLen = mTransformBuf.GetBufferEnd() - bp;
1362
//----------------------------------------
1365
nsTextTransformer::GetPrevWord(PRBool aInWord,
1366
PRInt32* aWordLenResult,
1367
PRInt32* aContentLenResult,
1368
PRBool* aIsWhiteSpaceResult,
1369
PRBool aForLineBreak,
1370
PRBool aIsKeyboardSelect)
1372
const nsTextFragment* frag = mFrag;
1373
PRInt32 offset = mOffset;
1374
PRInt32 wordLen = 0;
1375
PRBool isWhitespace = PR_FALSE;
1376
PRUnichar* result = nsnull;
1378
// Fix word breaking problem w/ PREFORMAT and PREWRAP
1379
// for word breaking, we should really go to the normal code
1380
if((! aForLineBreak) && (eNormal != mMode))
1384
PRInt32 limit = (*aWordLenResult > 0) ? *aWordLenResult : 0;
1385
while (--offset >= limit) {
1387
while (--offset >= 0) {
1389
PRUnichar firstChar = frag->CharAt(offset);
1391
// Eat up any discarded characters before dispatching
1392
if (IS_DISCARDED(firstChar)) {
1399
if (XP_IS_SPACE(firstChar)) {
1400
offset = ScanNormalWhiteSpace_B();
1402
isWhitespace = PR_TRUE;
1404
else if (CH_NBSP == firstChar && !aForLineBreak) {
1406
isWhitespace = PR_TRUE;
1407
mTransformBuf.mBuffer[mTransformBuf.mBufferLen - 1] = ' ';
1409
} else if (frag->Is2b()) {
1411
wordLen = *aWordLenResult;
1413
offset = ScanNormalUnicodeText_B(aForLineBreak, &wordLen);
1416
offset = ScanNormalAsciiText_B(&wordLen, aIsKeyboardSelect);
1421
if (('\n' == firstChar) || ('\t' == firstChar)) {
1422
mTransformBuf.mBuffer[mTransformBuf.mBufferLen-1] = firstChar;
1423
offset--; // make sure we overshoot
1425
isWhitespace = PR_TRUE;
1428
offset = ScanPreData_B(&wordLen);
1433
if (XP_IS_SPACE(firstChar)) {
1434
if (('\n' == firstChar) || ('\t' == firstChar)) {
1435
mTransformBuf.mBuffer[mTransformBuf.mBufferLen-1] = firstChar;
1436
offset--; // make sure we overshoot
1440
offset = ScanPreWrapWhiteSpace_B(&wordLen);
1442
isWhitespace = PR_TRUE;
1444
else if (frag->Is2b()) {
1446
wordLen = *aWordLenResult;
1448
offset = ScanNormalUnicodeText_B(aForLineBreak, &wordLen);
1451
offset = ScanNormalAsciiText_B(&wordLen, aIsKeyboardSelect);
1456
// Backwards scanning routines *always* overshoot by one for the
1457
// returned offset value.
1458
offset = offset + 1;
1460
result = mTransformBuf.GetBufferEnd() - wordLen;
1462
if (!isWhitespace) {
1463
switch (mTextTransform) {
1464
case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE:
1465
if(NS_SUCCEEDED(EnsureCaseConv()))
1466
gCaseConv->ToTitle(result, result, wordLen, !aInWord);
1468
case NS_STYLE_TEXT_TRANSFORM_LOWERCASE:
1469
if(NS_SUCCEEDED(EnsureCaseConv()))
1470
gCaseConv->ToLower(result, result, wordLen);
1472
case NS_STYLE_TEXT_TRANSFORM_UPPERCASE:
1473
if(NS_SUCCEEDED(EnsureCaseConv()))
1474
gCaseConv->ToUpper(result, result, wordLen);
1481
*aWordLenResult = wordLen;
1482
*aContentLenResult = mOffset - offset;
1483
*aIsWhiteSpaceResult = isWhitespace;
1490
nsTextTransformer::DoArabicShaping(PRUnichar* aText,
1491
PRInt32& aTextLength,
1492
PRBool* aWasTransformed)
1494
if (aTextLength <= 0)
1498
PRBool isVisual = mPresContext->IsVisualMode();
1501
buf.SetLength(aTextLength);
1502
PRUnichar* buffer = buf.BeginWriting();
1504
ArabicShaping(aText, buf.Length(), buffer, (PRUint32 *)&newLen, !isVisual, !isVisual);
1506
aTextLength = newLen;
1507
*aWasTransformed = PR_TRUE;
1509
StripZeroWidthJoinControls(buffer, aText, aTextLength, aWasTransformed);
1513
nsTextTransformer::DoNumericShaping(PRUnichar* aText,
1514
PRInt32& aTextLength,
1515
PRBool* aWasTransformed)
1517
if (aTextLength <= 0)
1520
PRUint32 bidiOptions;
1521
mPresContext->GetBidi(&bidiOptions);
1523
switch (GET_BIDI_OPTION_NUMERAL(bidiOptions)) {
1525
case IBMBIDI_NUMERAL_HINDI:
1526
HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_HINDI);
1529
case IBMBIDI_NUMERAL_ARABIC:
1530
HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_ARABIC);
1533
case IBMBIDI_NUMERAL_REGULAR:
1535
switch (mCharType) {
1537
case eCharType_EuropeanNumber:
1538
HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_ARABIC);
1541
case eCharType_ArabicNumber:
1542
HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_HINDI);
1550
case IBMBIDI_NUMERAL_HINDICONTEXT:
1551
if (((GET_BIDI_OPTION_DIRECTION(bidiOptions)==IBMBIDI_TEXTDIRECTION_RTL) &&
1552
(IS_ARABIC_DIGIT (aText[0]))) ||
1553
(eCharType_ArabicNumber == mCharType))
1554
HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_HINDI);
1555
else if (eCharType_EuropeanNumber == mCharType)
1556
HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_ARABIC);
1559
case IBMBIDI_NUMERAL_NOMINAL:
1566
nsTextTransformer::StripZeroWidthJoinControls(PRUnichar* aSource,
1568
PRInt32& aTextLength,
1569
PRBool* aWasTransformed)
1571
PRUnichar *src, *dest;
1572
PRInt32 stripped = 0;
1577
for (PRInt32 i = 0; i < aTextLength; ++i) {
1578
while (*src == CH_ZWNJ || *src == CH_ZWJ) {
1581
*aWasTransformed = PR_TRUE;
1585
aTextLength -= stripped;
1588
//----------------------------------------------------------------------
1589
// Self test logic for this class. This will (hopefully) make sure
1590
// that the forward and backward word iterator methods continue to
1591
// function as people change things...
1594
struct SelfTestSection {
1601
struct SelfTestData {
1602
const PRUnichar* text;
1603
SelfTestSection modes[NUM_MODES];
1606
static PRUint8 preModeValue[NUM_MODES] = {
1607
NS_STYLE_WHITESPACE_NORMAL,
1608
NS_STYLE_WHITESPACE_PRE,
1609
NS_STYLE_WHITESPACE_MOZ_PRE_WRAP
1612
static PRUnichar test1text[] = {
1613
'o', 'n', 'c', 'e', ' ', 'u', 'p', 'o', 'n', '\t',
1614
'a', ' ', 's', 'h', 'o', 'r', 't', ' ', 't', 'i', 'm', 'e', 0
1616
static int test1Results[] = { 4, 1, 4, 1, 1, 1, 5, 1, 4 };
1617
static int test1PreResults[] = { 9, 1, 12 };
1618
static int test1PreWrapResults[] = { 4, 1, 4, 1, 1, 1, 5, 1, 4 };
1620
static PRUnichar test2text[] = {
1621
0xF6, 'n', 'c', 'e', ' ', 0xFB, 'p', 'o', 'n', '\t',
1622
0xE3, ' ', 's', 'h', 0xF3, 'r', 't', ' ', 't', 0xEE, 'm', 'e', ' ', 0
1624
static int test2Results[] = { 4, 1, 4, 1, 1, 1, 5, 1, 4, 1 };
1625
static int test2PreResults[] = { 9, 1, 13 };
1626
static int test2PreWrapResults[] = { 4, 1, 4, 1, 1, 1, 5, 1, 4, 1 };
1628
static PRUnichar test3text[] = {
1629
0x0152, 'n', 'c', 'e', ' ', 'x', 'y', '\t', 'z', 'y', ' ', 0
1631
static int test3Results[] = { 4, 1, 2, 1, 2, 1, };
1632
static int test3PreResults[] = { 7, 1, 3, };
1633
static int test3PreWrapResults[] = { 4, 1, 2, 1, 2, 1, };
1635
static PRUnichar test4text[] = {
1636
'o', 'n', CH_SHY, 'c', 'e', ' ', CH_SHY, ' ', 'u', 'p', 'o', 'n', '\t',
1637
'a', ' ', 's', 'h', 'o', 'r', 't', ' ', 't', 'i', 'm', 'e', 0
1639
static int test4Results[] = { 4, 1, 4, 1, 1, 1, 5, 1, 4 };
1640
static int test4PreResults[] = { 10, 1, 12 };
1641
static int test4PreWrapResults[] = { 4, 2, 4, 1, 1, 1, 5, 1, 4 };
1643
static PRUnichar test5text[] = {
1646
static int test5Results[] = { 0 };
1647
static int test5PreResults[] = { 0 };
1648
static int test5PreWrapResults[] = { 0 };
1651
static PRUnichar test6text[] = {
1652
0x30d5, 0x30b8, 0x30c6, 0x30ec, 0x30d3, 0x306e, 0x97f3, 0x697d,
1653
0x756a, 0x7d44, 0x300c, 'H', 'E', 'Y', '!', ' ', 'H', 'E', 'Y', '!',
1654
'\t', 'H', 'E', 'Y', '!', 0x300d, 0x306e, 0x30db, 0x30fc, 0x30e0,
1655
0x30da, 0x30fc, 0x30b8, 0x3002, 0
1657
static int test6Results[] = { 1, 1, 1, 1, 1,
1661
static int test6PreResults[] = { 20, 1, 13 };
1662
static int test6PreWrapResults[] = { 1, 1, 1, 1, 1,
1668
static SelfTestData tests[] = {
1670
{ { sizeof(test1Results)/sizeof(int), test1Results, },
1671
{ sizeof(test1PreResults)/sizeof(int), test1PreResults, },
1672
{ sizeof(test1PreWrapResults)/sizeof(int), test1PreWrapResults, } }
1675
{ { sizeof(test2Results)/sizeof(int), test2Results, },
1676
{ sizeof(test2PreResults)/sizeof(int), test2PreResults, },
1677
{ sizeof(test2PreWrapResults)/sizeof(int), test2PreWrapResults, } }
1680
{ { sizeof(test3Results)/sizeof(int), test3Results, },
1681
{ sizeof(test3PreResults)/sizeof(int), test3PreResults, },
1682
{ sizeof(test3PreWrapResults)/sizeof(int), test3PreWrapResults, } }
1685
{ { sizeof(test4Results)/sizeof(int), test4Results, },
1686
{ sizeof(test4PreResults)/sizeof(int), test4PreResults, },
1687
{ sizeof(test4PreWrapResults)/sizeof(int), test4PreWrapResults, } }
1690
{ { sizeof(test5Results)/sizeof(int), test5Results, },
1691
{ sizeof(test5PreResults)/sizeof(int), test5PreResults, },
1692
{ sizeof(test5PreWrapResults)/sizeof(int), test5PreWrapResults, } }
1696
{ { sizeof(test6Results)/sizeof(int), test6Results, },
1697
{ sizeof(test6PreResults)/sizeof(int), test6PreResults, },
1698
{ sizeof(test6PreWrapResults)/sizeof(int), test6PreWrapResults, } }
1703
#define NUM_TESTS (sizeof(tests) / sizeof(tests[0]))
1706
nsTextTransformer::SelfTest(nsILineBreaker* aLineBreaker,
1707
nsIWordBreaker* aWordBreaker,
1708
nsIPresContext* aPresContext)
1710
PRBool gNoisy = PR_FALSE;
1711
if (PR_GetEnv("GECKO_TEXT_TRANSFORMER_NOISY_SELF_TEST")) {
1715
PRBool error = PR_FALSE;
1716
PRInt32 testNum = 0;
1717
SelfTestData* st = tests;
1718
SelfTestData* last = st + NUM_TESTS;
1719
for (; st < last; st++) {
1721
PRInt32 wordLen, contentLen;
1722
PRBool ws, transformed;
1724
PRBool isAsciiTest = PR_TRUE;
1725
const PRUnichar* cp = st->text;
1728
isAsciiTest = PR_FALSE;
1734
nsTextFragment frag(st->text);
1735
nsTextTransformer tx(aLineBreaker, aWordBreaker, aPresContext);
1737
for (PRInt32 preMode = 0; preMode < NUM_MODES; preMode++) {
1740
nsAutoString uc2(st->text);
1741
printf("%s forwards test: '", isAsciiTest ? "ascii" : "unicode");
1742
fputs(NS_ConvertUCS2toUTF8(uc2).get(), stdout);
1745
tx.Init2(&frag, 0, preModeValue[preMode], NS_STYLE_TEXT_TRANSFORM_NONE);
1747
int* expectedResults = st->modes[preMode].data;
1748
int resultsLen = st->modes[preMode].length;
1753
while ((bp = tx.GetNextWord(PR_FALSE, &wordLen, &contentLen, &ws, &transformed))) {
1755
nsAutoString tmp(bp, wordLen);
1757
fputs(NS_ConvertUCS2toUTF8(tmp).get(), stdout);
1758
printf("': ws=%s wordLen=%d (%d) contentLen=%d (offset=%d)\n",
1760
wordLen, *expectedResults, contentLen, tx.mOffset);
1762
if (*expectedResults != wordLen) {
1771
if (expectedResults != st->modes[preMode].data + resultsLen) {
1772
if (st->modes[preMode].data[0] != 0) {
1777
// Do backwards test
1779
nsAutoString uc2(st->text);
1780
printf("%s backwards test: '", isAsciiTest ? "ascii" : "unicode");
1781
fputs(NS_ConvertUCS2toUTF8(uc2).get(), stdout);
1784
tx.Init2(&frag, frag.GetLength(), NS_STYLE_WHITESPACE_NORMAL,
1785
NS_STYLE_TEXT_TRANSFORM_NONE);
1786
expectedResults = st->modes[preMode].data + resultsLen;
1790
while ((bp = tx.GetPrevWord(PR_FALSE, &wordLen, &contentLen, &ws))) {
1793
nsAutoString tmp(bp, wordLen);
1795
fputs(NS_ConvertUCS2toUTF8(tmp).get(), stdout);
1796
printf("': ws=%s wordLen=%d contentLen=%d (offset=%d)\n",
1798
wordLen, contentLen, tx.mOffset);
1800
if (*expectedResults != wordLen) {
1808
if (expectedResults != st->modes[preMode].data) {
1809
if (st->modes[preMode].data[0] != 0) {
1815
fprintf(stderr, "nsTextTransformer: self test %d failed\n", testNum);
1818
fprintf(stdout, "nsTextTransformer: self test %d succeeded\n", testNum);
1830
nsTextTransformer::Init2(const nsTextFragment* aFrag,
1831
PRInt32 aStartingOffset,
1832
PRUint8 aWhiteSpace,
1833
PRUint8 aTextTransform)
1837
// Sanitize aStartingOffset
1838
if (aStartingOffset < 0) {
1839
NS_WARNING("bad starting offset");
1840
aStartingOffset = 0;
1842
else if (aStartingOffset > mFrag->GetLength()) {
1843
NS_WARNING("bad starting offset");
1844
aStartingOffset = mFrag->GetLength();
1846
mOffset = aStartingOffset;
1848
// Get the frames text style information
1849
if (NS_STYLE_WHITESPACE_PRE == aWhiteSpace) {
1850
mMode = ePreformatted;
1852
else if (NS_STYLE_WHITESPACE_MOZ_PRE_WRAP == aWhiteSpace) {
1855
mTextTransform = aTextTransform;