39
39
package java.awt.font;
41
41
import java.io.Serializable;
42
import java.lang.Character.UnicodeBlock;
45
* This class handles numeric shaping. A shaper can either be contextual
46
* or not. A non-contextual shaper will always translate ASCII digits
47
* in its input into the target Unicode range. A contextual shaper will
48
* change the target Unicode range depending on the characters it has
49
* previously processed.
44
51
* @author Michael Koch
55
* @specnote This class does not handle LIMBU or OSMANYA.
56
* @specnote The JDK does not seem to properly handle ranges without a
57
* digit zero, such as TAMIL. This implementation does.
47
59
public final class NumericShaper implements Serializable
49
61
private static final long serialVersionUID = -8022764705923730308L;
63
/** Convenience constant representing all the valid Unicode ranges. */
51
64
public static final int ALL_RANGES = 524287;
67
* Constant representing the Unicode ARABIC range. Shaping done
68
* using this range will translate to the arabic decimal characters.
69
* Use EASTERN_ARABIC if you want to shape to the eastern arabic
70
* (also known as the extended arabic) decimal characters.
52
72
public static final int ARABIC = 2;
74
/** Constant representing the Unicode BENGALI range. */
53
75
public static final int BENGALI = 16;
77
/** Constant representing the Unicode DEVANAGARI range. */
54
78
public static final int DEVANAGARI = 8;
81
* Constant representing the Unicode extended arabic range.
82
* In Unicode there are two different sets of arabic digits;
83
* this selects the extended or eastern set.
55
85
public static final int EASTERN_ARABIC = 4;
88
* Constant representing the Unicode ETHIOPIC range. Note that
89
* there is no digit zero in this range; an ASCII digit zero
90
* is left unchanged when shaping to this range.
56
92
public static final int ETHIOPIC = 65536;
95
* Constant representing the Unicode EUROPEAN range. For
96
* contextual shaping purposes, characters in the various
97
* extended Latin character blocks are recognized as EUROPEAN.
57
99
public static final int EUROPEAN = 1;
101
/** Constant representing the Unicode GUJARATI range. */
58
102
public static final int GUJARATI = 64;
104
/** Constant representing the Unicode GURMUKHI range. */
59
105
public static final int GURMUKHI = 32;
107
/** Constant representing the Unicode KANNADA range. */
60
108
public static final int KANNADA = 1024;
110
/** Constant representing the Unicode KHMER range. */
61
111
public static final int KHMER = 131072;
113
/** Constant representing the Unicode LAO range. */
62
114
public static final int LAO = 8192;
116
/** Constant representing the Unicode MALAYALAM range. */
63
117
public static final int MALAYALAM = 2048;
119
/** Constant representing the Unicode MONGOLIAN range. */
64
120
public static final int MONGOLIAN = 262144;
122
/** Constant representing the Unicode MYANMAR range. */
65
123
public static final int MYANMAR = 32768;
125
/** Constant representing the Unicode ORIYA range. */
66
126
public static final int ORIYA = 128;
129
* Constant representing the Unicode TAMIL range. Note that
130
* there is no digit zero in this range; an ASCII digit zero
131
* is left unchanged when shaping to this range.
67
133
public static final int TAMIL = 256;
135
/** Constant representing the Unicode TELUGU range. */
68
136
public static final int TELUGU = 512;
138
/** Constant representing the Unicode THAI range. */
69
139
public static final int THAI = 4096;
141
/** Constant representing the Unicode TIBETAN range. */
70
142
public static final int TIBETAN = 16384;
75
private NumericShaper (int ranges, int context)
78
this.context = context;
145
* This table holds the zero digits for each language. This is hard-coded
146
* because the values will not change and the table layout is tied to the
147
* other constants in this class in any case. In the two places where a
148
* language does not have a zero digit, the character immediately preceeding
149
* the one digit is used instead. These languages are special-cased in
152
private static final char[] zeroDigits =
156
'\u06f0', // EASTERN_ARABIC
157
'\u0966', // DEVANAGARI
159
'\u0a66', // GURMUKHI
160
'\u0ae6', // GUJARATI
162
'\u0be6', // TAMIL - special case as there is no digit zero
165
'\u0d66', // MALAYALAM
170
'\u1368', // ETHIOPIC - special case as there is no digit zero
172
'\u1810' // MONGOLIAN
176
* The default initial context for this shaper, specified as
177
* an integer from 0 to 18.
182
* The target ranges handled by this shaper. If the shaper
183
* is not contextual, the high bit of this field will be set.
184
* @specnote This was discovered by reading the serialization spec
189
* Create a new numeric shaper. The key given is a constant from
190
* this class, the constructor turns it into its internal form.
191
* @param key the key to use, as one of the manifest constants
192
* @param mask a mask of languages to shape for
194
private NumericShaper (int key, int mask)
196
// This internal form is a bit goofy, but it is specified by
197
// the serialization spec.
198
this.key = Integer.numberOfTrailingZeros(key);
203
* Return an integer representing all the languages for which this
204
* shaper will shape. The result is taken by "or"ing together
205
* the constants representing the various languages.
207
public int getRanges ()
209
return mask & ALL_RANGES;
213
* Return true if this shaper is contextual, false if it is not.
215
public boolean isContextual ()
221
* Shape the text in the given array. The starting context will
222
* be the context passed to the shaper at creation time.
223
* @param text the text to shape
224
* @param start the index of the starting character of the array
225
* @param count the number of characters in the array
227
public void shape (char[] text, int start, int count)
229
shape (text, start, count, 1 << key);
233
* Given a unicode block object, return corresponding language constant.
234
* If the block is not recognized, returns zero. Note that as there
235
* is no separate ARABIC block in Character, this case must
236
* be specially handled by the caller; EASTERN_ARABIC is preferred when
237
* both are specified.
238
* @param b the unicode block to classify
239
* @return the language constant, or zero if not recognized
241
private int classify(UnicodeBlock b)
245
// ARABIC is handled by the caller; from testing we know
246
// that EASTERN_ARABIC takes precedence.
247
if (b == UnicodeBlock.ARABIC)
248
return EASTERN_ARABIC;
249
if (b == UnicodeBlock.BENGALI)
251
if (b == UnicodeBlock.DEVANAGARI)
253
if (b == UnicodeBlock.ETHIOPIC)
255
if (b == UnicodeBlock.BASIC_LATIN
256
|| b == UnicodeBlock.LATIN_1_SUPPLEMENT
257
|| b == UnicodeBlock.LATIN_EXTENDED_A
258
|| b == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL
259
|| b == UnicodeBlock.LATIN_EXTENDED_B)
261
if (b == UnicodeBlock.GUJARATI)
263
if (b == UnicodeBlock.GURMUKHI)
265
if (b == UnicodeBlock.KANNADA)
267
if (b == UnicodeBlock.KHMER)
269
if (b == UnicodeBlock.LAO)
271
if (b == UnicodeBlock.MALAYALAM)
273
if (b == UnicodeBlock.MONGOLIAN)
275
if (b == UnicodeBlock.MYANMAR)
277
if (b == UnicodeBlock.ORIYA)
279
if (b == UnicodeBlock.TAMIL)
281
if (b == UnicodeBlock.TELUGU)
283
if (b == UnicodeBlock.THAI)
285
if (b == UnicodeBlock.TIBETAN)
291
* Shape the given text, using the indicated initial context.
292
* If this shaper is not a contextual shaper, then the given context
294
* @param text the text to shape
295
* @param start the index of the first character of the text to shape
296
* @param count the number of characters to shape in the text
297
* @param context the initial context
298
* @throws IllegalArgumentException if the initial context is invalid
300
public void shape (char[] text, int start, int count, int context)
305
if (Integer.bitCount(context) != 1 || (context & ~ALL_RANGES) != 0)
306
throw new IllegalArgumentException("invalid context argument");
307
// If the indicated context is not one we are handling, reset it.
308
if ((context & mask) == 0)
311
currentContext = Integer.numberOfTrailingZeros(context);
314
currentContext = key;
316
for (int i = 0; i < count; ++i)
318
char c = text[start + i];
319
if (c >= '0' && c <= '9')
321
if (currentContext >= 0)
323
// Shape into the current context.
325
&& ((1 << currentContext) == TAMIL
326
|| (1 << currentContext) == ETHIOPIC))
328
// No digit 0 in this context; do nothing.
332
= (char) (zeroDigits[currentContext] + c - '0');
335
else if (isContextual())
337
// if c is in a group, set currentContext; else reset it.
338
int group = classify(UnicodeBlock.of(c));
339
// Specially handle ARABIC.
340
if (group == EASTERN_ARABIC && (mask & EASTERN_ARABIC) == 0
341
&& (mask & ARABIC) != 0)
343
if ((mask & group) != 0)
345
// The character was classified as being in a group
346
// we recognize, and it was selected by the shaper.
347
// So, change the context.
348
currentContext = Integer.numberOfTrailingZeros(group);
81
354
public boolean equals (Object obj)
83
356
if (! (obj instanceof NumericShaper))
86
358
NumericShaper tmp = (NumericShaper) obj;
88
return (ranges == tmp.ranges
89
&& context == tmp.context);
359
return key == tmp.key && mask == tmp.mask;
362
public int hashCode ()
367
public String toString ()
369
// For debugging only.
370
return "key=" + key + "; mask=" + mask;
374
* Return a non-contextual shaper which can shape to a single range.
375
* All ASCII digits in the input text are translated to this language.
376
* @param singleRange the target language
377
* @return a non-contextual shaper for this language
378
* @throws IllegalArgumentException if the argument does not name a
379
* single language, as specified by the constants declared in this class
381
public static NumericShaper getShaper (int singleRange)
383
if (Integer.bitCount(singleRange) != 1)
384
throw new IllegalArgumentException("more than one bit set in argument");
385
if ((singleRange & ~ALL_RANGES) != 0)
386
throw new IllegalArgumentException("argument out of range");
387
return new NumericShaper(singleRange, Integer.MIN_VALUE | singleRange);
391
* Return a contextual shaper which can shape to any of the indicated
392
* languages. The default initial context for this shaper is EUROPEAN.
393
* @param ranges the ranges to shape to
394
* @return a contextual shaper which will target any of these ranges
395
* @throws IllegalArgumentException if the argument specifies an
92
398
public static NumericShaper getContextualShaper (int ranges)
94
throw new Error ("not implemented");
400
if ((ranges & ~ALL_RANGES) != 0)
401
throw new IllegalArgumentException("argument out of range");
402
return new NumericShaper(EUROPEAN, ranges);
406
* Return a contextual shaper which can shape to any of the indicated
407
* languages. The default initial context for this shaper is given as
409
* @param ranges the ranges to shape to
410
* @param defaultContext the default initial context
411
* @return a contextual shaper which will target any of these ranges
412
* @throws IllegalArgumentException if the ranges argument specifies an
413
* unrecognized range, or if the defaultContext argument does not specify
414
* a single valid range
97
416
public static NumericShaper getContextualShaper (int ranges,
98
417
int defaultContext)
100
throw new Error ("not implemented");
103
public int getRanges ()
108
public static NumericShaper getShaper (int singleRange)
110
throw new Error ("not implemented");
113
public int hashCode ()
115
throw new Error ("not implemented");
118
public boolean isContextual ()
120
throw new Error ("not implemented");
123
public void shape (char[] text, int start, int count)
125
shape (text, start, count, context);
128
public void shape (char[] text, int start, int count, int context)
130
throw new Error ("not implemented");
133
public String toString ()
135
throw new Error ("not implemented");
419
if (Integer.bitCount(defaultContext) != 1)
420
throw new IllegalArgumentException("more than one bit set in context");
421
if ((ranges & ~ALL_RANGES) != 0 || (defaultContext & ~ALL_RANGES) != 0)
422
throw new IllegalArgumentException("argument out of range");
423
return new NumericShaper(defaultContext, ranges);