2
**********************************************************************
3
* Copyright (C) 2001, International Business Machines
4
* Corporation and others. All Rights Reserved.
5
**********************************************************************
6
* Date Name Description
7
* 05/24/01 aliu Creation.
8
**********************************************************************
11
#include "unicode/uchar.h"
13
#include "unicode/uniset.h"
16
#include "unicode/ustring.h"
23
* ID for this transliterator.
25
const char TitlecaseTransliterator::_ID[] = "Any-Title";
28
* Mutex for statics IN THIS FILE
30
static UMTX MUTEX = 0;
33
* The set of characters we skip. These are neither cased nor
34
* non-cased, to us; we copy them verbatim.
36
static UnicodeSet* SKIP = NULL;
39
* The set of characters that cause the next non-SKIP character
42
static UnicodeSet* CASED = NULL;
44
TitlecaseTransliterator::TitlecaseTransliterator(const Locale& theLoc) :
45
Transliterator(_ID, 0),
48
buffer = new UChar[u_getMaxCaseExpansion()];
49
// Need to look back 2 characters in the case of "can't"
50
setMaximumContextLength(2);
56
TitlecaseTransliterator::~TitlecaseTransliterator() {
63
TitlecaseTransliterator::TitlecaseTransliterator(const TitlecaseTransliterator& o) :
67
buffer = new UChar[u_getMaxCaseExpansion()];
68
uprv_arrayCopy(o.buffer, 0, this->buffer, 0, u_getMaxCaseExpansion());
72
* Assignment operator.
74
TitlecaseTransliterator& TitlecaseTransliterator::operator=(
75
const TitlecaseTransliterator& o) {
76
Transliterator::operator=(o);
78
uprv_arrayCopy(o.buffer, 0, this->buffer, 0, u_getMaxCaseExpansion());
85
Transliterator* TitlecaseTransliterator::clone(void) const {
86
return new TitlecaseTransliterator(*this);
90
* Implements {@link Transliterator#handleTransliterate}.
92
void TitlecaseTransliterator::handleTransliterate(
93
Replaceable& text, UTransPosition& offsets,
94
UBool isIncremental) const {
98
UErrorCode ec = U_ZERO_ERROR;
99
SKIP = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\u00AD \\u2019 \\' [:Mn:] [:Me:] [:Cf:] [:Lm:] [:Sk:]]"), ec);
100
CASED = new UnicodeSet(UNICODE_STRING_SIMPLE("[[:Lu:] [:Ll:] [:Lt:]]"), ec);
101
ucln_i18n_registerCleanup();
105
// Our mode; we are either converting letter toTitle or
107
UBool doTitle = TRUE;
109
// Determine if there is a preceding context of CASED SKIP*,
110
// in which case we want to start in toLower mode. If the
111
// prior context is anything else (including empty) then start
115
for (start = offsets.start - 1; start >= offsets.contextStart; start -= UTF_CHAR_LENGTH(c)) {
116
c = text.char32At(start);
117
if (SKIP->contains(c)) {
120
doTitle = !CASED->contains(c);
124
// Convert things after a CASED character toLower; things
125
// after a non-CASED, non-SKIP character toTitle. SKIP
126
// characters are copied directly and do not change the mode.
127
int32_t textPos = offsets.start;
128
if (textPos >= offsets.limit) return;
130
UnicodeString original;
131
text.extractBetween(offsets.contextStart, offsets.contextLimit, original);
134
uiter_setReplaceable(&iter, &text);
135
iter.start = offsets.contextStart;
136
iter.limit = offsets.contextLimit;
138
// Walk through original string
139
// If there is a case change, modify corresponding position in replaceable
141
int32_t i = textPos - offsets.contextStart;
142
int32_t limit = offsets.limit - offsets.contextStart;
147
for (; i < limit; ) {
148
UTF_GET_CHAR(original.getBuffer(), 0, i, original.length(), cp);
149
oldLen = UTF_CHAR_LENGTH(cp);
151
iter.index = i; // Point _past_ current char
152
if (!SKIP->contains(cp)) {
154
newLen = u_internalToTitle(cp, &iter, buffer, u_getMaxCaseExpansion(), loc.getName());
156
newLen = u_internalToLower(cp, &iter, buffer, u_getMaxCaseExpansion(), loc.getName());
158
doTitle = !CASED->contains(cp);
160
UnicodeString temp(buffer, newLen);
161
text.handleReplaceBetween(textPos, textPos + oldLen, temp);
162
if (newLen != oldLen) {
164
offsets.limit += newLen - oldLen;
165
offsets.contextLimit += newLen - oldLen;
172
offsets.start = offsets.limit;
176
* Static memory cleanup function.
178
void TitlecaseTransliterator::cleanup() {
180
delete SKIP; SKIP = NULL;
181
delete CASED; CASED = NULL;
182
umtx_destroy(&MUTEX);