~xnox/sword/soname

« back to all changes in this revision

Viewing changes to bindings/objc/dependencies/icu/include/unicode/translit.h

Committer: mdbergmann
Date: 2010-07-25 14:34:36 UTC
Revision ID: svn-v4:bcd7d363-81e1-0310-97ec-a550e20fc99c:trunk:2529

minor changes to icu includes.

files added:
bindings/objc/dependencies/icu/include/unicode

bindings/objc/dependencies/icu/include/unicode/brkiter.h

bindings/objc/dependencies/icu/include/unicode/calendar.h

bindings/objc/dependencies/icu/include/unicode/caniter.h

bindings/objc/dependencies/icu/include/unicode/chariter.h

bindings/objc/dependencies/icu/include/unicode/choicfmt.h

bindings/objc/dependencies/icu/include/unicode/coleitr.h

bindings/objc/dependencies/icu/include/unicode/coll.h

bindings/objc/dependencies/icu/include/unicode/curramt.h

bindings/objc/dependencies/icu/include/unicode/currunit.h

bindings/objc/dependencies/icu/include/unicode/datefmt.h

bindings/objc/dependencies/icu/include/unicode/dbbi.h

bindings/objc/dependencies/icu/include/unicode/dcfmtsym.h

bindings/objc/dependencies/icu/include/unicode/decimfmt.h

bindings/objc/dependencies/icu/include/unicode/docmain.h

bindings/objc/dependencies/icu/include/unicode/dtfmtsym.h

bindings/objc/dependencies/icu/include/unicode/fieldpos.h

bindings/objc/dependencies/icu/include/unicode/fmtable.h

bindings/objc/dependencies/icu/include/unicode/format.h

bindings/objc/dependencies/icu/include/unicode/gregocal.h

bindings/objc/dependencies/icu/include/unicode/locid.h

bindings/objc/dependencies/icu/include/unicode/measfmt.h

bindings/objc/dependencies/icu/include/unicode/measunit.h

bindings/objc/dependencies/icu/include/unicode/measure.h

bindings/objc/dependencies/icu/include/unicode/msgfmt.h

bindings/objc/dependencies/icu/include/unicode/normlzr.h

bindings/objc/dependencies/icu/include/unicode/numfmt.h

bindings/objc/dependencies/icu/include/unicode/parseerr.h

bindings/objc/dependencies/icu/include/unicode/parsepos.h

bindings/objc/dependencies/icu/include/unicode/platform.h

bindings/objc/dependencies/icu/include/unicode/ppalmos.h

bindings/objc/dependencies/icu/include/unicode/putil.h

bindings/objc/dependencies/icu/include/unicode/pwin32.h

bindings/objc/dependencies/icu/include/unicode/rbbi.h

bindings/objc/dependencies/icu/include/unicode/rbnf.h

bindings/objc/dependencies/icu/include/unicode/regex.h

bindings/objc/dependencies/icu/include/unicode/rep.h

bindings/objc/dependencies/icu/include/unicode/resbund.h

bindings/objc/dependencies/icu/include/unicode/schriter.h

bindings/objc/dependencies/icu/include/unicode/search.h

bindings/objc/dependencies/icu/include/unicode/simpletz.h

bindings/objc/dependencies/icu/include/unicode/smpdtfmt.h

bindings/objc/dependencies/icu/include/unicode/sortkey.h

bindings/objc/dependencies/icu/include/unicode/strenum.h

bindings/objc/dependencies/icu/include/unicode/stsearch.h

bindings/objc/dependencies/icu/include/unicode/symtable.h

bindings/objc/dependencies/icu/include/unicode/tblcoll.h

bindings/objc/dependencies/icu/include/unicode/timezone.h

bindings/objc/dependencies/icu/include/unicode/translit.h

bindings/objc/dependencies/icu/include/unicode/ubidi.h

bindings/objc/dependencies/icu/include/unicode/ubrk.h

bindings/objc/dependencies/icu/include/unicode/ucal.h

bindings/objc/dependencies/icu/include/unicode/ucasemap.h

bindings/objc/dependencies/icu/include/unicode/ucat.h

bindings/objc/dependencies/icu/include/unicode/uchar.h

bindings/objc/dependencies/icu/include/unicode/uchriter.h

bindings/objc/dependencies/icu/include/unicode/uclean.h

bindings/objc/dependencies/icu/include/unicode/ucnv.h

bindings/objc/dependencies/icu/include/unicode/ucnv_cb.h

bindings/objc/dependencies/icu/include/unicode/ucnv_err.h

bindings/objc/dependencies/icu/include/unicode/ucol.h

bindings/objc/dependencies/icu/include/unicode/ucoleitr.h

bindings/objc/dependencies/icu/include/unicode/uconfig.h

bindings/objc/dependencies/icu/include/unicode/ucsdet.h

bindings/objc/dependencies/icu/include/unicode/ucurr.h

bindings/objc/dependencies/icu/include/unicode/udat.h

bindings/objc/dependencies/icu/include/unicode/udata.h

bindings/objc/dependencies/icu/include/unicode/udeprctd.h

bindings/objc/dependencies/icu/include/unicode/udraft.h

bindings/objc/dependencies/icu/include/unicode/uenum.h

bindings/objc/dependencies/icu/include/unicode/uidna.h

bindings/objc/dependencies/icu/include/unicode/uintrnal.h

bindings/objc/dependencies/icu/include/unicode/uiter.h

bindings/objc/dependencies/icu/include/unicode/uloc.h

bindings/objc/dependencies/icu/include/unicode/ulocdata.h

bindings/objc/dependencies/icu/include/unicode/umachine.h

bindings/objc/dependencies/icu/include/unicode/umisc.h

bindings/objc/dependencies/icu/include/unicode/umsg.h

bindings/objc/dependencies/icu/include/unicode/unifilt.h

bindings/objc/dependencies/icu/include/unicode/unifunct.h

bindings/objc/dependencies/icu/include/unicode/unimatch.h

bindings/objc/dependencies/icu/include/unicode/unirepl.h

bindings/objc/dependencies/icu/include/unicode/uniset.h

bindings/objc/dependencies/icu/include/unicode/unistr.h

bindings/objc/dependencies/icu/include/unicode/unorm.h

bindings/objc/dependencies/icu/include/unicode/unum.h

bindings/objc/dependencies/icu/include/unicode/uobject.h

bindings/objc/dependencies/icu/include/unicode/uobslete.h

bindings/objc/dependencies/icu/include/unicode/urbtok.h

bindings/objc/dependencies/icu/include/unicode/uregex.h

bindings/objc/dependencies/icu/include/unicode/urename.h

bindings/objc/dependencies/icu/include/unicode/urep.h

bindings/objc/dependencies/icu/include/unicode/ures.h

bindings/objc/dependencies/icu/include/unicode/uscript.h

bindings/objc/dependencies/icu/include/unicode/usearch.h

bindings/objc/dependencies/icu/include/unicode/uset.h

bindings/objc/dependencies/icu/include/unicode/usetiter.h

bindings/objc/dependencies/icu/include/unicode/ushape.h

bindings/objc/dependencies/icu/include/unicode/usprep.h

bindings/objc/dependencies/icu/include/unicode/ustdio.h

bindings/objc/dependencies/icu/include/unicode/ustream.h

bindings/objc/dependencies/icu/include/unicode/ustring.h

bindings/objc/dependencies/icu/include/unicode/usystem.h

bindings/objc/dependencies/icu/include/unicode/utext.h

bindings/objc/dependencies/icu/include/unicode/utf.h

bindings/objc/dependencies/icu/include/unicode/utf16.h

bindings/objc/dependencies/icu/include/unicode/utf32.h

bindings/objc/dependencies/icu/include/unicode/utf8.h

bindings/objc/dependencies/icu/include/unicode/utf_old.h

bindings/objc/dependencies/icu/include/unicode/utmscale.h

bindings/objc/dependencies/icu/include/unicode/utrace.h

bindings/objc/dependencies/icu/include/unicode/utrans.h

bindings/objc/dependencies/icu/include/unicode/utypes.h

bindings/objc/dependencies/icu/include/unicode/uversion.h

files removed:
bindings/objc/dependencies/icu/include/layout

bindings/objc/dependencies/icu/include/layout/LEFontInstance.h

bindings/objc/dependencies/icu/include/layout/LEGlyphFilter.h

bindings/objc/dependencies/icu/include/layout/LEGlyphStorage.h

bindings/objc/dependencies/icu/include/layout/LEInsertionList.h

bindings/objc/dependencies/icu/include/layout/LELanguages.h

bindings/objc/dependencies/icu/include/layout/LEScripts.h

bindings/objc/dependencies/icu/include/layout/LESwaps.h

bindings/objc/dependencies/icu/include/layout/LETypes.h

bindings/objc/dependencies/icu/include/layout/LayoutEngine.h

bindings/objc/dependencies/icu/include/layout/ParagraphLayout.h

bindings/objc/dependencies/icu/include/layout/RunArrays.h

files modified:
bindings/objc/ObjCSword.xcodeproj/project.pbxproj

Show diffs side-by-side

added added

removed removed

bindings/objc/dependencies/icu/include/unicode/translit.h

**********************************************************************

* Date Name Description

* 11/17/99 aliu Creation.

**********************************************************************

#ifndef TRANSLIT_H

#define TRANSLIT_H

#include "unicode/utypes.h"

/**

* \file

* \brief C++ API: Tranforms text from one format to another.

#if !UCONFIG_NO_TRANSLITERATION

#include "unicode/uobject.h"

#include "unicode/unistr.h"

#include "unicode/parseerr.h"

#include "unicode/utrans.h" // UTransPosition, UTransDirection

#include "unicode/strenum.h"

U_NAMESPACE_BEGIN

class UnicodeFilter;

class UnicodeSet;

class CompoundTransliterator;

class TransliteratorParser;

class NormalizationTransliterator;

class TransliteratorIDParser;

/**

* <code>Transliterator</code> is an abstract class that

* transliterates text from one format to another. The most common

* kind of transliterator is a script, or alphabet, transliterator.

* For example, a Russian to Latin transliterator changes Russian text

* written in Cyrillic characters to phonetically equivalent Latin

* characters. It does not translate Russian to English!

* Transliteration, unlike translation, operates on characters, without

* reference to the meanings of words and sentences.

* Although script conversion is its most common use, a

* transliterator can actually perform a more general class of tasks.

* In fact, <code>Transliterator</code> defines a very general API

* which specifies only that a segment of the input text is replaced

* by new text. The particulars of this conversion are determined

* entirely by subclasses of <code>Transliterator</code>.

* Transliterators are stateless

* <code>Transliterator</code> objects are stateless; they

* retain no information between calls to

* <code>transliterate()</code>. (However, this does not

* mean that threads may share transliterators without synchronizing

* them. Transliterators are not immutable, so they must be

* synchronized when shared between threads.) This1 might seem to

* limit the complexity of the transliteration operation. In

* practice, subclasses perform complex transliterations by delaying

* the replacement of text until it is known that no other

* replacements are possible. In other words, although the

* <code>Transliterator</code> objects are stateless, the source text

* itself embodies all the needed information, and delayed operation

* allows arbitrary complexity.

* Batch transliteration

* The simplest way to perform transliteration is all at once, on a

* string of existing text. This is referred to as batch

* transliteration. For example, given a string <code>input</code>

* and a transliterator <code>t</code>, the call

* \htmlonly<blockquote>\endhtmlonly<code>String result = t.transliterate(input);

* </code>\htmlonly</blockquote>\endhtmlonly

* will transliterate it and return the result. Other methods allow

* the client to specify a substring to be transliterated and to use

* {@link Replaceable } objects instead of strings, in order to

* preserve out-of-band information (such as text styles).

* Keyboard transliteration

* Somewhat more involved is keyboard, or incremental

* transliteration. This is the transliteration of text that is

* arriving from some source (typically the user's keyboard) one

* character at a time, or in some other piecemeal fashion.

* In keyboard transliteration, a <code>Replaceable</code> buffer

* stores the text. As text is inserted, as much as possible is

* transliterated on the fly. This means a GUI that displays the

* contents of the buffer may show text being modified as each new

* character arrives.

* Consider the simple <code>RuleBasedTransliterator</code>:

100

101

* \htmlonly<blockquote>\endhtmlonly<code>

102

* th>{theta}

103

* t>{tau}

104

* </code>\htmlonly</blockquote>\endhtmlonly

105

106

* When the user types 't', nothing will happen, since the

107

* transliterator is waiting to see if the next character is 'h'. To

108

* remedy this, we introduce the notion of a cursor, marked by a '|'

109

* in the output string:

110

111

* \htmlonly<blockquote>\endhtmlonly<code>

112

* t>|{tau}

113

* {tau}h>{theta}

114

* </code>\htmlonly</blockquote>\endhtmlonly

115

116

* Now when the user types 't', tau appears, and if the next character

117

* is 'h', the tau changes to a theta. This is accomplished by

118

* maintaining a cursor position (independent of the insertion point,

119

* and invisible in the GUI) across calls to

120

* <code>transliterate()</code>. Typically, the cursor will

121

* be coincident with the insertion point, but in a case like the one

122

* above, it will precede the insertion point.

123

124

* Keyboard transliteration methods maintain a set of three indices

125

* that are updated with each call to

126

* <code>transliterate()</code>, including the cursor, start,

127

* and limit. Since these indices are changed by the method, they are

128

* passed in an <code>int[]</code> array. The <code>START</code> index

129

* marks the beginning of the substring that the transliterator will

130

* look at. It is advanced as text becomes committed (but it is not

131

* the committed index; that's the <code>CURSOR</code>). The

132

* <code>CURSOR</code> index, described above, marks the point at

133

* which the transliterator last stopped, either because it reached

134

* the end, or because it required more characters to disambiguate

135

* between possible inputs. The <code>CURSOR</code> can also be

136

* explicitly set by rules in a <code>RuleBasedTransliterator</code>.

137

* Any characters before the <code>CURSOR</code> index are frozen;

138

* future keyboard transliteration calls within this input sequence

139

* will not change them. New text is inserted at the

140

* <code>LIMIT</code> index, which marks the end of the substring that

141

* the transliterator looks at.

142

143

* Because keyboard transliteration assumes that more characters

144

* are to arrive, it is conservative in its operation. It only

145

* transliterates when it can do so unambiguously. Otherwise it waits

146

* for more characters to arrive. When the client code knows that no

147

* more characters are forthcoming, perhaps because the user has

148

* performed some input termination operation, then it should call

149

* <code>finishTransliteration()</code> to complete any

150

* pending transliterations.

151

152

* Inverses

153

154

* Pairs of transliterators may be inverses of one another. For

155

* example, if transliterator A transliterates characters by

156

* incrementing their Unicode value (so "abc" -> "def"), and

157

* transliterator B decrements character values, then A

158

* is an inverse of B and vice versa. If we compose A

159

* with B in a compound transliterator, the result is the

160

* indentity transliterator, that is, a transliterator that does not

161

* change its input text.

162

163

* The <code>Transliterator</code> method <code>getInverse()</code>

164

* returns a transliterator's inverse, if one exists, or

165

* <code>null</code> otherwise. However, the result of

166

* <code>getInverse()</code> usually will not be a true

167

* mathematical inverse. This is because true inverse transliterators

168

* are difficult to formulate. For example, consider two

169

* transliterators: AB, which transliterates the character 'A'

170

* to 'B', and BA, which transliterates 'B' to 'A'. It might

171

* seem that these are exact inverses, since

172

173

* \htmlonly<blockquote>\endhtmlonly"A" x AB -> "B"

174

* "B" x BA -> "A"\htmlonly</blockquote>\endhtmlonly

175

176

* where 'x' represents transliteration. However,

177

178

* \htmlonly<blockquote>\endhtmlonly"ABCD" x AB -> "BBCD"

179

* "BBCD" x BA -> "AACD"\htmlonly</blockquote>\endhtmlonly

180

181

* so AB composed with BA is not the

182

* identity. Nonetheless, BA may be usefully considered to be

183

* AB's inverse, and it is on this basis that

184

* AB<code>.getInverse()</code> could legitimately return

185

* BA.

186

187

* IDs and display names

188

189

* A transliterator is designated by a short identifier string or

190

* ID. IDs follow the format source-destination,

191

* where source describes the entity being replaced, and

192

* destination describes the entity replacing

193

* source. The entities may be the names of scripts,

194

* particular sequences of characters, or whatever else it is that the

195

* transliterator converts to or from. For example, a transliterator

196

* from Russian to Latin might be named "Russian-Latin". A

197

* transliterator from keyboard escape sequences to Latin-1 characters

198

* might be named "KeyboardEscape-Latin1". By convention, system

199

* entity names are in English, with the initial letters of words

200

* capitalized; user entity names may follow any format so long as

201

* they do not contain dashes.

202

203

* In addition to programmatic IDs, transliterator objects have

204

* display names for presentation in user interfaces, returned by

205

* {@link #getDisplayName }.

206

207

* Factory methods and registration

208

209

* In general, client code should use the factory method

210

* {@link #createInstance } to obtain an instance of a

211

* transliterator given its ID. Valid IDs may be enumerated using

212

* <code>getAvailableIDs()</code>. Since transliterators are mutable,

213

* multiple calls to {@link #createInstance } with the same ID will

214

* return distinct objects.

215

216

* In addition to the system transliterators registered at startup,

217

* user transliterators may be registered by calling

218

* <code>registerInstance()</code> at run time. A registered instance

219

* acts a template; future calls to {@link #createInstance } with the ID

220

* of the registered object return clones of that object. Thus any

221

* object passed to <tt>registerInstance()</tt> must implement

222

* <tt>clone()</tt> propertly. To register a transliterator subclass

223

* without instantiating it (until it is needed), users may call

224

* {@link #registerFactory }. In this case, the objects are

225

* instantiated by invoking the zero-argument public constructor of

226

* the class.

227

228

* Subclassing

229

230

* Subclasses must implement the abstract method

231

* <code>handleTransliterate()</code>. Subclasses should override

232

* the <code>transliterate()</code> method taking a

233

* <code>Replaceable</code> and the <code>transliterate()</code>

234

* method taking a <code>String</code> and <code>StringBuffer</code>

235

* if the performance of these methods can be improved over the

236

* performance obtained by the default implementations in this class.

237

238

* @author Alan Liu

239

* @stable ICU 2.0

240

241

class U_I18N_API Transliterator : public UObject {

242

243

private:

244

245

/**

246

* Programmatic name, e.g., "Latin-Arabic".

247

248

UnicodeString ID;

249

250

/**

251

* This transliterator's filter. Any character for which

252

* <tt>filter.contains()</tt> returns <tt>false</tt> will not be

253

* altered by this transliterator. If <tt>filter</tt> is

254

* <tt>null</tt> then no filtering is applied.

255

256

UnicodeFilter* filter;

257

258

int32_t maximumContextLength;

259

260

public:

261

262

/**

263

* A context integer or pointer for a factory function, passed by

264

* value.

265

* @stable ICU 2.4

266

267

union Token {

268

/**

269

* This token, interpreted as a 32-bit integer.

270

* @stable ICU 2.4

271

272

int32_t integer;

273

/**

274

* This token, interpreted as a native pointer.

275

* @stable ICU 2.4

276

277

void* pointer;

278

};

279

280

/**

281

* Return a token containing an integer.

282

* @return a token containing an integer.

283

* @internal

284

285

inline static Token integerToken(int32_t);

286

287

/**

288

* Return a token containing a pointer.

289

* @return a token containing a pointer.

290

* @internal

291

292

inline static Token pointerToken(void*);

293

294

/**

295

* A function that creates and returns a Transliterator. When

296

* invoked, it will be passed the ID string that is being

297

* instantiated, together with the context pointer that was passed

298

* in when the factory function was first registered. Many

299

* factory functions will ignore both parameters, however,

300

* functions that are registered to more than one ID may use the

301

* ID or the context parameter to parameterize the transliterator

302

* they create.

303

* @param ID the string identifier for this transliterator

304

* @param context a context pointer that will be stored and

305

* later passed to the factory function when an ID matching

306

* the registration ID is being instantiated with this factory.

307

* @stable ICU 2.4

308

309

typedef Transliterator* (U_EXPORT2 *Factory)(const UnicodeString& ID, Token context);

310

311

protected:

312

313

/**

314

* Default constructor.

315

* @param ID the string identifier for this transliterator

316

* @param adoptedFilter the filter. Any character for which

317

* <tt>filter.contains()</tt> returns <tt>false</tt> will not be

318

* altered by this transliterator. If <tt>filter</tt> is

319

* <tt>null</tt> then no filtering is applied.

320

* @stable ICU 2.4

321

322

Transliterator(const UnicodeString& ID, UnicodeFilter* adoptedFilter);

323

324

/**

325

* Copy constructor.

326

* @stable ICU 2.4

327

328

Transliterator(const Transliterator&);

329

330

/**

331

* Assignment operator.

332

* @stable ICU 2.4

333

334

Transliterator& operator=(const Transliterator&);

335

336

/**

337

* Create a transliterator from a basic ID. This is an ID

338

* containing only the forward direction source, target, and

339

* variant.

340

* @param id a basic ID of the form S-T or S-T/V.

341

* @param canon canonical ID to assign to the object, or

342

* NULL to leave the ID unchanged

343

* @return a newly created Transliterator or null if the ID is

344

* invalid.

345

* @stable ICU 2.4

346

347

static Transliterator* createBasicInstance(const UnicodeString& id,

348

const UnicodeString* canon);

349

350

friend class TransliteratorParser; // for parseID()

351

friend class TransliteratorIDParser; // for createBasicInstance()

352

friend class TransliteratorAlias; // for setID()

353

354

public:

355

356

/**

357

* Destructor.

358

* @stable ICU 2.0

359

360

virtual ~Transliterator();

361

362

/**

363

* Implements Cloneable.

364

* All subclasses are encouraged to implement this method if it is

365

* possible and reasonable to do so. Subclasses that are to be

366

* registered with the system using <tt>registerInstance()</tt>

367

* are required to implement this method. If a subclass does not

368

* implement clone() properly and is registered with the system

369

* using registerInstance(), then the default clone() implementation

370

* will return null, and calls to createInstance() will fail.

371

372

* @return a copy of the object.

373

* @see #registerInstance

374

* @stable ICU 2.0

375

376

virtual Transliterator* clone() const;

377

378

/**

379

* Transliterates a segment of a string, with optional filtering.

380

381

* @param text the string to be transliterated

382

* @param start the beginning index, inclusive; <code>0 <= start

383

* <= limit</code>.

384

* @param limit the ending index, exclusive; <code>start <= limit

385

* <= text.length()</code>.

386

* @return The new limit index. The text previously occupying <code>[start,

387

* limit)</code> has been transliterated, possibly to a string of a different

388

* length, at <code>[start, </code>new-limit<code>)</code>, where

389

* new-limit is the return value. If the input offsets are out of bounds,

390

* the returned value is -1 and the input string remains unchanged.

391

* @stable ICU 2.0

392

393

virtual int32_t transliterate(Replaceable& text,

394

int32_t start, int32_t limit) const;

395

396

/**

397

* Transliterates an entire string in place. Convenience method.

398

* @param text the string to be transliterated

399

* @stable ICU 2.0

400

401

virtual void transliterate(Replaceable& text) const;

402

403

/**

404

* Transliterates the portion of the text buffer that can be

405

* transliterated unambiguosly after new text has been inserted,

406

* typically as a result of a keyboard event. The new text in

407

* <code>insertion</code> will be inserted into <code>text</code>

408

* at <code>index.limit</code>, advancing

409

* <code>index.limit</code> by <code>insertion.length()</code>.

410

* Then the transliterator will try to transliterate characters of

411

* <code>text</code> between <code>index.cursor</code> and

412

* <code>index.limit</code>. Characters before

413

* <code>index.cursor</code> will not be changed.

414

415

* Upon return, values in <code>index</code> will be updated.

416

* <code>index.start</code> will be advanced to the first

417

* character that future calls to this method will read.

418

* <code>index.cursor</code> and <code>index.limit</code> will

419

* be adjusted to delimit the range of text that future calls to

420

* this method may change.

421

422

* Typical usage of this method begins with an initial call

423

* with <code>index.start</code> and <code>index.limit</code>

424

* set to indicate the portion of <code>text</code> to be

425

* transliterated, and <code>index.cursor == index.start</code>.

426

* Thereafter, <code>index</code> can be used without

427

* modification in future calls, provided that all changes to

428

* <code>text</code> are made via this method.

429

430

* This method assumes that future calls may be made that will

431

* insert new text into the buffer. As a result, it only performs

432

* unambiguous transliterations. After the last call to this

433

* method, there may be untransliterated text that is waiting for

434

* more input to resolve an ambiguity. In order to perform these

435

* pending transliterations, clients should call {@link

436

* #finishTransliteration } after the last call to this

437

* method has been made.

438

439

* @param text the buffer holding transliterated and untransliterated text

440

* @param index an array of three integers.

441

442

* <ul><li><code>index.start</code>: the beginning index,

443

* inclusive; <code>0 <= index.start <= index.limit</code>.

444

445

* <li><code>index.limit</code>: the ending index, exclusive;

446

* <code>index.start <= index.limit <= text.length()</code>.

447

* <code>insertion</code> is inserted at

448

* <code>index.limit</code>.

449

450

* <li><code>index.cursor</code>: the next character to be

451

* considered for transliteration; <code>index.start <=

452

* index.cursor <= index.limit</code>. Characters before

453

* <code>index.cursor</code> will not be changed by future calls

454

* to this method.</ul>

455

456

* @param insertion text to be inserted and possibly

457

* transliterated into the translation buffer at

458

* <code>index.limit</code>. If <code>null</code> then no text

459

* is inserted.

460

* @param status Output param to filled in with a success or an error.

461

* @see #handleTransliterate

462

* @exception IllegalArgumentException if <code>index</code>

463

* is invalid

464

* @see UTransPosition

465

* @stable ICU 2.0

466

467

virtual void transliterate(Replaceable& text, UTransPosition& index,

468

const UnicodeString& insertion,

469

UErrorCode& status) const;

470

471

/**

472

* Transliterates the portion of the text buffer that can be

473

* transliterated unambiguosly after a new character has been

474

* inserted, typically as a result of a keyboard event. This is a

475

* convenience method; see {@link

476

* #transliterate(Replaceable&, UTransPosition&, const UnicodeString&, UErrorCode&) const}

477

* for details.

478

* @param text the buffer holding transliterated and

479

* untransliterated text

480

* @param index an array of three integers. See {@link

481

* #transliterate(Replaceable&, UTransPosition&, const UnicodeString&, UErrorCode&) const }.

482

* @param insertion text to be inserted and possibly

483

* transliterated into the translation buffer at

484

* <code>index.limit</code>.

485

* @param status Output param to filled in with a success or an error.

486

* @see #transliterate(Replaceable&, UTransPosition&, const UnicodeString&, UErrorCode&) const

487

* @stable ICU 2.0

488

489

virtual void transliterate(Replaceable& text, UTransPosition& index,

490

UChar32 insertion,

491

UErrorCode& status) const;

492

493

/**

494

* Transliterates the portion of the text buffer that can be

495

* transliterated unambiguosly. This is a convenience method; see

496

* {@link

497

* #transliterate(Replaceable&, UTransPosition&, const UnicodeString&, UErrorCode&) const }

498

* for details.

499

* @param text the buffer holding transliterated and

500

* untransliterated text

501

* @param index an array of three integers. See {@link

502

* #transliterate(Replaceable&, UTransPosition&, const UnicodeString&, UErrorCode&) const }.

503

* @param status Output param to filled in with a success or an error.

504

* @see #transliterate(Replaceable, int[], String)

505

* @stable ICU 2.0

506

507

virtual void transliterate(Replaceable& text, UTransPosition& index,

508

UErrorCode& status) const;

509

510

/**

511

* Finishes any pending transliterations that were waiting for

512

* more characters. Clients should call this method as the last

513

* call after a sequence of one or more calls to

514

* <code>transliterate()</code>.

515

* @param text the buffer holding transliterated and

516

* untransliterated text.

517

* @param index the array of indices previously passed to {@link

518

* #transliterate }

519

* @stable ICU 2.0

520

521

virtual void finishTransliteration(Replaceable& text,

522

UTransPosition& index) const;

523

524

private:

525

526

/**

527

* This internal method does incremental transliteration. If the

528

* 'insertion' is non-null then we append it to 'text' before

529

* proceeding. This method calls through to the pure virtual

530

* framework method handleTransliterate() to do the actual

531

* work.

532

* @param text the buffer holding transliterated and

533

* untransliterated text

534

* @param index an array of three integers. See {@link

535

* #transliterate(Replaceable, int[], String)}.

536

* @param insertion text to be inserted and possibly

537

* transliterated into the translation buffer at

538

* <code>index.limit</code>.

539

* @param status Output param to filled in with a success or an error.

540

541

void _transliterate(Replaceable& text,

542

UTransPosition& index,

543

const UnicodeString* insertion,

544

UErrorCode &status) const;

545

546

protected:

547

548

/**

549

* Abstract method that concrete subclasses define to implement

550

* their transliteration algorithm. This method handles both

551

* incremental and non-incremental transliteration. Let

552

* <code>originalStart</code> refer to the value of

553

* <code>pos.start</code> upon entry.

554

555

* <ul>

556

* <li>If <code>incremental</code> is false, then this method

557

* should transliterate all characters between

558

* <code>pos.start</code> and <code>pos.limit</code>. Upon return

559

* <code>pos.start</code> must == <code> pos.limit</code>.</li>

560

561

* <li>If <code>incremental</code> is true, then this method

562

* should transliterate all characters between

563

* <code>pos.start</code> and <code>pos.limit</code> that can be

564

* unambiguously transliterated, regardless of future insertions

565

* of text at <code>pos.limit</code>. Upon return,

566

* <code>pos.start</code> should be in the range

567

* [<code>originalStart</code>, <code>pos.limit</code>).

568

* <code>pos.start</code> should be positioned such that

569

* characters [<code>originalStart</code>, <code>

570

* pos.start</code>) will not be changed in the future by this

571

* transliterator and characters [<code>pos.start</code>,

572

* <code>pos.limit</code>) are unchanged.</li>

573

* </ul>

574

575

* Implementations of this method should also obey the

576

* following invariants:

577

578

* <ul>

579

* <li> <code>pos.limit</code> and <code>pos.contextLimit</code>

580

* should be updated to reflect changes in length of the text

581

* between <code>pos.start</code> and <code>pos.limit</code>. The

582

* difference <code> pos.contextLimit - pos.limit</code> should

583

* not change.</li>

584

585

* <li><code>pos.contextStart</code> should not change.</li>

586

587

* <li>Upon return, neither <code>pos.start</code> nor

588

* <code>pos.limit</code> should be less than

589

* <code>originalStart</code>.</li>

590

591

* <li>Text before <code>originalStart</code> and text after

592

* <code>pos.limit</code> should not change.</li>

593

594

* <li>Text before <code>pos.contextStart</code> and text after

595

* <code> pos.contextLimit</code> should be ignored.</li>

596

* </ul>

597

598

* Subclasses may safely assume that all characters in

599

* [<code>pos.start</code>, <code>pos.limit</code>) are filtered.

600

* In other words, the filter has already been applied by the time

601

* this method is called. See

602

* <code>filteredTransliterate()</code>.

603

604

* This method is not for public consumption. Calling

605

* this method directly will transliterate

606

* [<code>pos.start</code>, <code>pos.limit</code>) without

607

* applying the filter. End user code should call <code>

608

* transliterate()</code> instead of this method. Subclass code

609

* and wrapping transliterators should call

610

* <code>filteredTransliterate()</code> instead of this method.

611

612

* @param text the buffer holding transliterated and

613

* untransliterated text

614

615

* @param pos the indices indicating the start, limit, context

616

* start, and context limit of the text.

617

618

* @param incremental if true, assume more text may be inserted at

619

* <code>pos.limit</code> and act accordingly. Otherwise,

620

* transliterate all text between <code>pos.start</code> and

621

* <code>pos.limit</code> and move <code>pos.start</code> up to

622

* <code>pos.limit</code>.

623

624

* @see #transliterate

625

* @stable ICU 2.4

626

627

virtual void handleTransliterate(Replaceable& text,

628

UTransPosition& pos,

629

UBool incremental) const = 0;

630

631

public:

632

/**

633

* Transliterate a substring of text, as specified by index, taking filters

634

* into account. This method is for subclasses that need to delegate to

635

* another transliterator, such as CompoundTransliterator.

636

* @param text the text to be transliterated

637

* @param index the position indices

638

* @param incremental if TRUE, then assume more characters may be inserted

639

* at index.limit, and postpone processing to accomodate future incoming

640

* characters

641

* @stable ICU 2.4

642

643

virtual void filteredTransliterate(Replaceable& text,

644

UTransPosition& index,

645

UBool incremental) const;

646

647

private:

648

649

/**

650

* Top-level transliteration method, handling filtering, incremental and

651

* non-incremental transliteration, and rollback. All transliteration

652

* public API methods eventually call this method with a rollback argument

653

* of TRUE. Other entities may call this method but rollback should be

654

* FALSE.

655

656

* If this transliterator has a filter, break up the input text into runs

657

* of unfiltered characters. Pass each run to

658

* <subclass>.handleTransliterate().

659

660

* In incremental mode, if rollback is TRUE, perform a special

661

* incremental procedure in which several passes are made over the input

662

* text, adding one character at a time, and committing successful

663

* transliterations as they occur. Unsuccessful transliterations are rolled

664

* back and retried with additional characters to give correct results.

665

666

* @param text the text to be transliterated

667

* @param index the position indices

668

* @param incremental if TRUE, then assume more characters may be inserted

669

* at index.limit, and postpone processing to accomodate future incoming

670

* characters

671

* @param rollback if TRUE and if incremental is TRUE, then perform special

672

* incremental processing, as described above, and undo partial

673

* transliterations where necessary. If incremental is FALSE then this

674

* parameter is ignored.

675

676

virtual void filteredTransliterate(Replaceable& text,

677

UTransPosition& index,

678

UBool incremental,

679

UBool rollback) const;

680

681

public:

682

683

/**

684

* Returns the length of the longest context required by this transliterator.

685

* This is preceding context. The default implementation supplied

686

* by <code>Transliterator</code> returns zero; subclasses

687

* that use preceding context should override this method to return the

688

* correct value. For example, if a transliterator translates "ddd" (where

689

* d is any digit) to "555" when preceded by "(ddd)", then the preceding

690

* context length is 5, the length of "(ddd)".

691

692

* @return The maximum number of preceding context characters this

693

* transliterator needs to examine

694

* @stable ICU 2.0

695

696

int32_t getMaximumContextLength(void) const;

697

698

protected:

699

700

/**

701

* Method for subclasses to use to set the maximum context length.

702

* @param maxContextLength the new value to be set.

703

* @see #getMaximumContextLength

704

* @stable ICU 2.4

705

706

void setMaximumContextLength(int32_t maxContextLength);

707

708

public:

709

710

/**

711

* Returns a programmatic identifier for this transliterator.

712

* If this identifier is passed to <code>createInstance()</code>, it

713

* will return this object, if it has been registered.

714

* @return a programmatic identifier for this transliterator.

715

* @see #registerInstance

716

* @see #registerFactory

717

* @see #getAvailableIDs

718

* @stable ICU 2.0

719

720

virtual const UnicodeString& getID(void) const;

721

722

/**

723

* Returns a name for this transliterator that is appropriate for

724

* display to the user in the default locale. See {@link

725

* #getDisplayName } for details.

726

* @param ID the string identifier for this transliterator

727

* @param result Output param to receive the display name

728

* @return A reference to 'result'.

729

* @stable ICU 2.0

730

731

static UnicodeString& U_EXPORT2 getDisplayName(const UnicodeString& ID,

732

UnicodeString& result);

733

734

/**

735

* Returns a name for this transliterator that is appropriate for

736

* display to the user in the given locale. This name is taken

737

* from the locale resource data in the standard manner of the

738

* <code>java.text</code> package.

739

740

* If no localized names exist in the system resource bundles,

741

* a name is synthesized using a localized

742

* <code>MessageFormat</code> pattern from the resource data. The

743

* arguments to this pattern are an integer followed by one or two

744

* strings. The integer is the number of strings, either 1 or 2.

745

* The strings are formed by splitting the ID for this

746

* transliterator at the first '-'. If there is no '-', then the

747

* entire ID forms the only string.

748

* @param ID the string identifier for this transliterator

749

* @param inLocale the Locale in which the display name should be

750

* localized.

751

* @param result Output param to receive the display name

752

* @return A reference to 'result'.

753

* @stable ICU 2.0

754

755

static UnicodeString& U_EXPORT2 getDisplayName(const UnicodeString& ID,

756

const Locale& inLocale,

757

UnicodeString& result);

758

759

/**

760

* Returns the filter used by this transliterator, or <tt>NULL</tt>

761

* if this transliterator uses no filter.

762

* @return the filter used by this transliterator, or <tt>NULL</tt>

763

* if this transliterator uses no filter.

764

* @stable ICU 2.0

765

766

const UnicodeFilter* getFilter(void) const;

767

768

/**

769

* Returns the filter used by this transliterator, or <tt>NULL</tt> if this

770

* transliterator uses no filter. The caller must eventually delete the

771

* result. After this call, this transliterator's filter is set to

772

* <tt>NULL</tt>.

773

* @return the filter used by this transliterator, or <tt>NULL</tt> if this

774

* transliterator uses no filter.

775

* @stable ICU 2.4

776

777

UnicodeFilter* orphanFilter(void);

778

779

/**

780

* Changes the filter used by this transliterator. If the filter

781

* is set to <tt>null</tt> then no filtering will occur.

782

783

* Callers must take care if a transliterator is in use by

784

* multiple threads. The filter should not be changed by one

785

* thread while another thread may be transliterating.

786

* @param adoptedFilter the new filter to be adopted.

787

* @stable ICU 2.0

788

789

void adoptFilter(UnicodeFilter* adoptedFilter);

790

791

/**

792

* Returns this transliterator's inverse. See the class

793

* documentation for details. This implementation simply inverts

794

* the two entities in the ID and attempts to retrieve the

795

* resulting transliterator. That is, if <code>getID()</code>

796

* returns "A-B", then this method will return the result of

797

* <code>createInstance("B-A")</code>, or <code>null</code> if that

798

* call fails.

799

800

* Subclasses with knowledge of their inverse may wish to

801

* override this method.

802

803

* @param status Output param to filled in with a success or an error.

804

* @return a transliterator that is an inverse, not necessarily

805

* exact, of this transliterator, or <code>null</code> if no such

806

* transliterator is registered.

807

* @see #registerInstance

808

* @stable ICU 2.0

809

810

Transliterator* createInverse(UErrorCode& status) const;

811

812

/**

813

* Returns a <code>Transliterator</code> object given its ID.

814

* The ID must be either a system transliterator ID or a ID registered

815

* using <code>registerInstance()</code>.

816

817

* @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code>

818

* @param dir either FORWARD or REVERSE.

819

* @param parseError Struct to recieve information on position

820

* of error if an error is encountered

821

* @param status Output param to filled in with a success or an error.

822

* @return A <code>Transliterator</code> object with the given ID

823

* @see #registerInstance

824

* @see #getAvailableIDs

825

* @see #getID

826

* @stable ICU 2.0

827

828

static Transliterator* U_EXPORT2 createInstance(const UnicodeString& ID,

829

UTransDirection dir,

830

UParseError& parseError,

831

UErrorCode& status);

832

833

/**

834

* Returns a <code>Transliterator</code> object given its ID.

835

* The ID must be either a system transliterator ID or a ID registered

836

* using <code>registerInstance()</code>.

837

* @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code>

838

* @param dir either FORWARD or REVERSE.

839

* @param status Output param to filled in with a success or an error.

840

* @return A <code>Transliterator</code> object with the given ID

841

* @stable ICU 2.0

842

843

static Transliterator* U_EXPORT2 createInstance(const UnicodeString& ID,

844

UTransDirection dir,

845

UErrorCode& status);

846

847

/**

848

* Returns a <code>Transliterator</code> object constructed from

849

* the given rule string. This will be a RuleBasedTransliterator,

850

* if the rule string contains only rules, or a

851

* CompoundTransliterator, if it contains ID blocks, or a

852

* NullTransliterator, if it contains ID blocks which parse as

853

* empty for the given direction.

854

* @param ID the id for the transliterator.

855

* @param rules rules, separated by ';'

856

* @param dir either FORWARD or REVERSE.

857

* @param parseError Struct to recieve information on position

858

* of error if an error is encountered

859

* @param status Output param set to success/failure code.

860

* @stable ICU 2.0

861

862

static Transliterator* U_EXPORT2 createFromRules(const UnicodeString& ID,

863

const UnicodeString& rules,

864

UTransDirection dir,

865

UParseError& parseError,

866

UErrorCode& status);

867

868

/**

869

* Create a rule string that can be passed to createFromRules()

870

* to recreate this transliterator.

871

* @param result the string to receive the rules. Previous

872

* contents will be deleted.

873

* @param escapeUnprintable if TRUE then convert unprintable

874

* character to their hex escape representations, \\uxxxx or

875

* \\Uxxxxxxxx. Unprintable characters are those other than

876

* U+000A, U+0020..U+007E.

877

* @stable ICU 2.0

878

879

virtual UnicodeString& toRules(UnicodeString& result,

880

UBool escapeUnprintable) const;

881

882

/**

883

* Return the number of elements that make up this transliterator.

884

* For example, if the transliterator "NFD;Jamo-Latin;Latin-Greek"

885

* were created, the return value of this method would be 3.

886

887

* If this transliterator is not composed of other

888

* transliterators, then this method returns 1.

889

* @return the number of transliterators that compose this

890

* transliterator, or 1 if this transliterator is not composed of

891

* multiple transliterators

892

* @stable ICU 3.0

893

894

int32_t countElements() const;

895

896

/**

897

* Return an element that makes up this transliterator. For

898

* example, if the transliterator "NFD;Jamo-Latin;Latin-Greek"

899

* were created, the return value of this method would be one

900

* of the three transliterator objects that make up that

901

* transliterator: [NFD, Jamo-Latin, Latin-Greek].

902

903

* If this transliterator is not composed of other

904

* transliterators, then this method will return a reference to

905

* this transliterator when given the index 0.

906

* @param index a value from 0..countElements()-1 indicating the

907

* transliterator to return

908

* @param ec input-output error code

909

* @return one of the transliterators that makes up this

910

* transliterator, if this transliterator is made up of multiple

911

* transliterators, otherwise a reference to this object if given

912

* an index of 0

913

* @stable ICU 3.0

914

915

const Transliterator& getElement(int32_t index, UErrorCode& ec) const;

916

917

/**

918

* Returns the set of all characters that may be modified in the

919

* input text by this Transliterator. This incorporates this

920

* object's current filter; if the filter is changed, the return

921

* value of this function will change. The default implementation

922

* returns an empty set. Some subclasses may override {@link

923

* #handleGetSourceSet } to return a more precise result. The

924

* return result is approximate in any case and is intended for

925

* use by tests, tools, or utilities.

926

* @param result receives result set; previous contents lost

927

* @return a reference to result

928

* @see #getTargetSet

929

* @see #handleGetSourceSet

930

* @stable ICU 2.4

931

932

UnicodeSet& getSourceSet(UnicodeSet& result) const;

933

934

/**

935

* Framework method that returns the set of all characters that

936

* may be modified in the input text by this Transliterator,

937

* ignoring the effect of this object's filter. The base class

938

* implementation returns the empty set. Subclasses that wish to

939

* implement this should override this method.

940

* @return the set of characters that this transliterator may

941

* modify. The set may be modified, so subclasses should return a

942

* newly-created object.

943

* @param result receives result set; previous contents lost

944

* @see #getSourceSet

945

* @see #getTargetSet

946

* @stable ICU 2.4

947

948

virtual void handleGetSourceSet(UnicodeSet& result) const;

949

950

/**

951

* Returns the set of all characters that may be generated as

952

* replacement text by this transliterator. The default

953

* implementation returns the empty set. Some subclasses may

954

* override this method to return a more precise result. The

955

* return result is approximate in any case and is intended for

956

* use by tests, tools, or utilities requiring such

957

* meta-information.

958

* @param result receives result set; previous contents lost

959

* @return a reference to result

960

* @see #getTargetSet

961

* @stable ICU 2.4

962

963

virtual UnicodeSet& getTargetSet(UnicodeSet& result) const;

964

965

public:

966

967

/**

968

* Registers a factory function that creates transliterators of

969

* a given ID.

970

* @param id the ID being registered

971

* @param factory a function pointer that will be copied and

972

* called later when the given ID is passed to createInstance()

973

* @param context a context pointer that will be stored and

974

* later passed to the factory function when an ID matching

975

* the registration ID is being instantiated with this factory.

976

* @stable ICU 2.0

977

978

static void U_EXPORT2 registerFactory(const UnicodeString& id,

979

Factory factory,

980

Token context);

981

982

/**

983

* Registers an instance <tt>obj</tt> of a subclass of

984

* <code>Transliterator</code> with the system. When

985

* <tt>createInstance()</tt> is called with an ID string that is

986

* equal to <tt>obj->getID()</tt>, then <tt>obj->clone()</tt> is

987

* returned.

988

989

* After this call the Transliterator class owns the adoptedObj

990

* and will delete it.

991

992

* @param adoptedObj an instance of subclass of

993

* <code>Transliterator</code> that defines <tt>clone()</tt>

994

* @see #createInstance

995

* @see #registerFactory

996

* @see #unregister

997

* @stable ICU 2.0

998

999

static void U_EXPORT2 registerInstance(Transliterator* adoptedObj);

1000

1001

/**

1002

* Registers an ID string as an alias of another ID string.

1003

* That is, after calling this function, <tt>createInstance(aliasID)</tt>

1004

* will return the same thing as <tt>createInstance(realID)</tt>.

1005

* This is generally used to create shorter, more mnemonic aliases

1006

* for long compound IDs.

1007

1008

* @param aliasID The new ID being registered.

1009

* @param realID The ID that the new ID is to be an alias for.

1010

* This can be a compound ID and can include filters and should

1011

* refer to transliterators that have already been registered with

1012

* the framework, although this isn't checked.

1013

* @draft ICU 3.6

1014

1015

static void U_EXPORT2 registerAlias(const UnicodeString& aliasID,

1016

const UnicodeString& realID);

1017

1018

protected:

1019

1020

/**

1021

* @internal

1022

* @param id the ID being registered

1023

* @param factory a function pointer that will be copied and

1024

* called later when the given ID is passed to createInstance()

1025

* @param context a context pointer that will be stored and

1026

* later passed to the factory function when an ID matching

1027

* the registration ID is being instantiated with this factory.

1028

1029

static void _registerFactory(const UnicodeString& id,

1030

Factory factory,

1031

Token context);

1032

1033

/**

1034

* @internal

1035

1036

static void _registerInstance(Transliterator* adoptedObj);

1037

1038

/**

1039

* @internal

1040

1041

static void _registerAlias(const UnicodeString& aliasID, const UnicodeString& realID);

1042

1043

/**

1044

* Register two targets as being inverses of one another. For

1045

* example, calling registerSpecialInverse("NFC", "NFD", true) causes

1046

* Transliterator to form the following inverse relationships:

1047

1048

* <pre>NFC => NFD

1049

* Any-NFC => Any-NFD

1050

* NFD => NFC

1051

* Any-NFD => Any-NFC</pre>

1052

1053

* (Without the special inverse registration, the inverse of NFC

1054

* would be NFC-Any.) Note that NFD is shorthand for Any-NFD, but

1055

* that the presence or absence of "Any-" is preserved.

1056

1057

* The relationship is symmetrical; registering (a, b) is

1058

* equivalent to registering (b, a).

1059

1060

* The relevant IDs must still be registered separately as

1061

* factories or classes.

1062

1063

* Only the targets are specified. Special inverses always

1064

* have the form Any-Target1 <=> Any-Target2. The target should

1065

* have canonical casing (the casing desired to be produced when

1066

* an inverse is formed) and should contain no whitespace or other

1067

* extraneous characters.

1068

1069

* @param target the target against which to register the inverse

1070

* @param inverseTarget the inverse of target, that is

1071

* Any-target.getInverse() => Any-inverseTarget

1072

* @param bidirectional if true, register the reverse relation

1073

* as well, that is, Any-inverseTarget.getInverse() => Any-target

1074

* @internal

1075

1076

static void _registerSpecialInverse(const UnicodeString& target,

1077

const UnicodeString& inverseTarget,

1078

UBool bidirectional);

1079

1080

public:

1081

1082

/**

1083

* Unregisters a transliterator or class. This may be either

1084

* a system transliterator or a user transliterator or class.

1085

* Any attempt to construct an unregistered transliterator based

1086

* on its ID will fail.

1087

1088

* @param ID the ID of the transliterator or class

1089

* @return the <code>Object</code> that was registered with

1090

* <code>ID</code>, or <code>null</code> if none was

1091

* @see #registerInstance

1092

* @see #registerFactory

1093

* @stable ICU 2.0

1094

1095

static void U_EXPORT2 unregister(const UnicodeString& ID);

1096

1097

public:

1098

1099

/**

1100

* Return a StringEnumeration over the IDs available at the time of the

1101

* call, including user-registered IDs.

1102

* @param ec input-output error code

1103

* @return a newly-created StringEnumeration over the transliterators

1104

* available at the time of the call. The caller should delete this object

1105

* when done using it.

1106

* @stable ICU 3.0

1107

1108

static StringEnumeration* U_EXPORT2 getAvailableIDs(UErrorCode& ec);

1109

1110

/**

1111

* Return the number of registered source specifiers.

1112

* @return the number of registered source specifiers.

1113

* @stable ICU 2.0

1114

1115

static int32_t U_EXPORT2 countAvailableSources(void);

1116

1117

/**

1118

* Return a registered source specifier.

1119

* @param index which specifier to return, from 0 to n-1, where

1120

* n = countAvailableSources()

1121

* @param result fill-in paramter to receive the source specifier.

1122

* If index is out of range, result will be empty.

1123

* @return reference to result

1124

* @stable ICU 2.0

1125

1126

static UnicodeString& U_EXPORT2 getAvailableSource(int32_t index,

1127

UnicodeString& result);

1128

1129

/**

1130

* Return the number of registered target specifiers for a given

1131

* source specifier.

1132

* @param source the given source specifier.

1133

* @return the number of registered target specifiers for a given

1134

* source specifier.

1135

* @stable ICU 2.0

1136

1137

static int32_t U_EXPORT2 countAvailableTargets(const UnicodeString& source);

1138

1139

/**

1140

* Return a registered target specifier for a given source.

1141

* @param index which specifier to return, from 0 to n-1, where

1142

* n = countAvailableTargets(source)

1143

* @param source the source specifier

1144

* @param result fill-in paramter to receive the target specifier.

1145

* If source is invalid or if index is out of range, result will

1146

* be empty.

1147

* @return reference to result

1148

* @stable ICU 2.0

1149

1150

static UnicodeString& U_EXPORT2 getAvailableTarget(int32_t index,

1151

const UnicodeString& source,

1152

UnicodeString& result);

1153

1154

/**

1155

* Return the number of registered variant specifiers for a given

1156

* source-target pair.

1157

* @param source the source specifiers.

1158

* @param target the target specifiers.

1159

* @stable ICU 2.0

1160

1161

static int32_t U_EXPORT2 countAvailableVariants(const UnicodeString& source,

1162

const UnicodeString& target);

1163

1164

/**

1165

* Return a registered variant specifier for a given source-target

1166

* pair.

1167

* @param index which specifier to return, from 0 to n-1, where

1168

* n = countAvailableVariants(source, target)

1169

* @param source the source specifier

1170

* @param target the target specifier

1171

* @param result fill-in paramter to receive the variant

1172

* specifier. If source is invalid or if target is invalid or if

1173

* index is out of range, result will be empty.

1174

* @return reference to result

1175

* @stable ICU 2.0

1176

1177

static UnicodeString& U_EXPORT2 getAvailableVariant(int32_t index,

1178

const UnicodeString& source,

1179

const UnicodeString& target,

1180

UnicodeString& result);

1181

1182

protected:

1183

1184

/**

1185

* Non-mutexed internal method

1186

* @internal

1187

1188

static int32_t _countAvailableSources(void);

1189

1190

/**

1191

* Non-mutexed internal method

1192

* @internal

1193

1194

static UnicodeString& _getAvailableSource(int32_t index,

1195

UnicodeString& result);

1196

1197

/**

1198

* Non-mutexed internal method

1199

* @internal

1200

1201

static int32_t _countAvailableTargets(const UnicodeString& source);

1202

1203

/**

1204

* Non-mutexed internal method

1205

* @internal

1206

1207

static UnicodeString& _getAvailableTarget(int32_t index,

1208

const UnicodeString& source,

1209

UnicodeString& result);

1210

1211

/**

1212

* Non-mutexed internal method

1213

* @internal

1214

1215

static int32_t _countAvailableVariants(const UnicodeString& source,

1216

const UnicodeString& target);

1217

1218

/**

1219

* Non-mutexed internal method

1220

* @internal

1221

1222

static UnicodeString& _getAvailableVariant(int32_t index,

1223

const UnicodeString& source,

1224

const UnicodeString& target,

1225

UnicodeString& result);

1226

1227

protected:

1228

1229

/**

1230

* Set the ID of this transliterators. Subclasses shouldn't do

1231

* this, unless the underlying script behavior has changed.

1232

* @param id the new id t to be set.

1233

* @stable ICU 2.4

1234

1235

void setID(const UnicodeString& id);

1236

1237

public:

1238

1239

/**

1240

* Return the class ID for this class. This is useful only for

1241

* comparing to a return value from getDynamicClassID().

1242

* Note that Transliterator is an abstract base class, and therefor

1243

* no fully constructed object will have a dynamic

1244

* UCLassID that equals the UClassID returned from

1245

* TRansliterator::getStaticClassID().

1246

* @return The class ID for class Transliterator.

1247

* @stable ICU 2.0

1248

1249

static UClassID U_EXPORT2 getStaticClassID(void);

1250

1251

/**

1252

* Returns a unique class ID polymorphically. This method

1253

* is to implement a simple version of RTTI, since not all C++

1254

* compilers support genuine RTTI. Polymorphic operator==() and

1255

* clone() methods call this method.

1256

1257

* Concrete subclasses of Transliterator must use the

1258

* UOBJECT_DEFINE_RTTI_IMPLEMENTATION macro from

1259

* uobject.h to provide the RTTI functions.

1260

1261

* @return The class ID for this object. All objects of a given

1262

* class have the same class ID. Objects of other classes have

1263

* different class IDs.

1264

* @stable ICU 2.0

1265

1266

virtual UClassID getDynamicClassID(void) const = 0;

1267

1268

private:

1269

static UBool initializeRegistry(void);

1270

1271

public:

1272

/**

1273

* Return the number of IDs currently registered with the system.

1274

* To retrieve the actual IDs, call getAvailableID(i) with

1275

* i from 0 to countAvailableIDs() - 1.

1276

* @return the number of IDs currently registered with the system.

1277

* @obsolete ICU 3.4 use getAvailableIDs() instead

1278

1279

static int32_t U_EXPORT2 countAvailableIDs(void);

1280

1281

/**

1282

* Return the index-th available ID. index must be between 0

1283

* and countAvailableIDs() - 1, inclusive. If index is out of

1284

* range, the result of getAvailableID(0) is returned.

1285

* @param index the given ID index.

1286

* @return the index-th available ID. index must be between 0

1287

* and countAvailableIDs() - 1, inclusive. If index is out of

1288

* range, the result of getAvailableID(0) is returned.

1289

* @obsolete ICU 3.4 use getAvailableIDs() instead; this function

1290

* is not thread safe, since it returns a reference to storage that

1291

* may become invalid if another thread calls unregister

1292

1293

static const UnicodeString& U_EXPORT2 getAvailableID(int32_t index);

1294

};

1295

1296

inline int32_t Transliterator::getMaximumContextLength(void) const {

1297

return maximumContextLength;

1298

}

1299

1300

inline void Transliterator::setID(const UnicodeString& id) {

1301

ID = id;

1302

// NUL-terminate the ID string, which is a non-aliased copy.

1303

ID.append((UChar)0);

1304

ID.truncate(ID.length()-1);

1305

}

1306

1307

inline Transliterator::Token Transliterator::integerToken(int32_t i) {

1308

Token t;

1309

t.integer = i;

1310

return t;

1311

}

1312

1313

inline Transliterator::Token Transliterator::pointerToken(void* p) {

1314

Token t;

1315

t.pointer = p;

1316

return t;

1317

}

1318

1319

U_NAMESPACE_END

1320

1321

#endif /* #if !UCONFIG_NO_TRANSLITERATION */

1322

1323

#endif

Older »