2
* Copyright (C) 2015 The Qt Company Ltd
4
* This is part of HarfBuzz, an OpenType Layout engine library.
6
* Permission is hereby granted, without written agreement and without
7
* license or royalty fees, to use, copy, modify, and distribute this
8
* software and its documentation for any purpose, provided that the
9
* above copyright notice and the following two paragraphs appear in
10
* all copies of this software.
12
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
18
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
21
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
25
#include "harfbuzz-shaper.h"
26
#include "harfbuzz-shaper-private.h"
27
#include "harfbuzz-external.h"
32
#define LIBTHAI_MAJOR 0
35
* if libthai changed please update these codes too.
38
unsigned char base; /**< base character */
39
unsigned char hilo; /**< upper/lower vowel/diacritic */
40
unsigned char top; /**< top-level mark */
42
typedef int (*th_brk_def) (const unsigned char*, int*, size_t);
43
typedef int (*th_render_cell_tis_def) (struct thcell_t cell, unsigned char res[], size_t res_sz, int is_decomp_am);
44
typedef int (*th_render_cell_win_def) (struct thcell_t cell, unsigned char res[], size_t res_sz, int is_decomp_am);
45
typedef int (*th_render_cell_mac_def) (struct thcell_t cell, unsigned char res[], size_t res_sz, int is_decomp_am);
46
typedef size_t (*th_next_cell_def) (const unsigned char *, size_t, struct thcell_t *, int);
48
/* libthai releated function handles */
49
static th_brk_def th_brk = 0;
50
static th_next_cell_def th_next_cell = 0;
51
static th_render_cell_tis_def th_render_cell_tis = 0;
52
static th_render_cell_win_def th_render_cell_win = 0;
53
static th_render_cell_mac_def th_render_cell_mac = 0;
55
static int init_libthai() {
56
static HB_Bool initialized = false;
57
if (!initialized && (!th_brk || !th_next_cell || !th_render_cell_tis || !th_render_cell_win || !th_render_cell_mac)) {
58
th_brk = (th_brk_def) HB_Library_Resolve("thai", (int)LIBTHAI_MAJOR, "th_brk");
59
th_next_cell = (th_next_cell_def)HB_Library_Resolve("thai", LIBTHAI_MAJOR, "th_next_cell");
60
th_render_cell_tis = (th_render_cell_tis_def) HB_Library_Resolve("thai", (int)LIBTHAI_MAJOR, "th_render_cell_tis");
61
th_render_cell_win = (th_render_cell_win_def) HB_Library_Resolve("thai", (int)LIBTHAI_MAJOR, "th_render_cell_win");
62
th_render_cell_mac = (th_render_cell_mac_def) HB_Library_Resolve("thai", (int)LIBTHAI_MAJOR, "th_render_cell_mac");
65
if (th_brk && th_next_cell && th_render_cell_tis && th_render_cell_win && th_render_cell_mac)
71
static void to_tis620(const HB_UChar16 *string, hb_uint32 len, char *cstr)
74
unsigned char *result = (unsigned char *)cstr;
76
for (i = 0; i < len; ++i) {
77
if (string[i] <= 0xa0)
78
result[i] = (unsigned char)string[i];
79
else if (string[i] >= 0xe01 && string[i] <= 0xe5b)
80
result[i] = (unsigned char)(string[i] - 0xe00 + 0xa0);
82
result[i] = (unsigned char)~0; // Same encoding as libthai uses for invalid chars
89
* ---------------------------------------------------------------------------
90
* Thai Shaper / Attributes
91
* ---------------------------------------------------------------------------
95
* USe basic_features prepare for future adding.
98
static const HB_OpenTypeFeature thai_features[] = {
99
{ HB_MAKE_TAG('c', 'c', 'm', 'p'), CcmpProperty },
100
{ HB_MAKE_TAG('l', 'i', 'g', 'a'), CcmpProperty },
101
{ HB_MAKE_TAG('c', 'l', 'i', 'g'), CcmpProperty },
106
/* TIS-to-Unicode glyph maps for characters 0x80-0xff */
107
static int tis620_0[128] = {
108
/**/ 0, 0, 0, 0, 0, 0, 0, 0,
109
/**/ 0, 0, 0, 0, 0, 0, 0, 0,
110
/**/ 0, 0, 0, 0, 0, 0, 0, 0,
111
/**/ 0, 0, 0, 0, 0, 0, 0, 0,
112
0x0020, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
113
0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
114
0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
115
0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
116
0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
117
0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
118
0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
119
0x0e38, 0x0e39, 0x0e3a, 0, 0, 0, 0, 0x0e3f,
120
0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
121
0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
122
0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
123
0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0, 0, 0, 0
126
static int tis620_1[128] = {
127
0xf89e, 0, 0, 0xf88c, 0xf88f, 0xf892, 0xf895, 0xf898,
128
0xf88b, 0xf88e, 0xf891, 0xf894, 0xf897, 0, 0, 0xf899,
129
0xf89a, 0, 0xf884, 0xf889, 0xf885, 0xf886, 0xf887, 0xf888,
130
0xf88a, 0xf88d, 0xf890, 0xf893, 0xf896, 0, 0, 0,
131
/**/ 0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
132
0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
133
0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
134
0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
135
0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
136
0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
137
0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
138
0x0e38, 0x0e39, 0x0e3a, 0, 0, 0, 0, 0x0e3f,
139
0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
140
0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0, 0x0e4f,
141
0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
142
0x0e58, 0x0e59, 0, 0, 0xf89b, 0xf89c, 0xf89d, 0
145
static int tis620_2[128] = {
146
0xf700, 0xf701, 0xf702, 0xf703, 0xf704, 0x2026, 0xf705, 0xf706,
147
0xf707, 0xf708, 0xf709, 0xf70a, 0xf70b, 0xf70c, 0xf70d, 0xf70e,
148
0xf70f, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
149
0xf710, 0xf711, 0xf712, 0xf713, 0xf714, 0xf715, 0xf716, 0xf717,
150
0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
151
0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
152
0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
153
0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
154
0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
155
0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
156
0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
157
0x0e38, 0x0e39, 0x0e3a, 0, 0, 0, 0, 0x0e3f,
158
0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
159
0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
160
0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
161
0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0xf718, 0xf719, 0xf71a, 0
170
static int thai_get_glyph_index (ThaiFontType font_type, unsigned char c)
173
case TIS: return (c & 0x80) ? tis620_0[c & 0x7f] : c;
174
case WIN: return (c & 0x80) ? tis620_1[c & 0x7f] : c;
175
case MAC: return (c & 0x80) ? tis620_2[c & 0x7f] : c;
180
static int thai_contain_glyphs (HB_ShaperItem *shaper_item, const int glyph_map[128])
184
for (c = 0; c < 0x80; c++) {
185
if ( glyph_map[c] ) {
186
if ( !shaper_item->font->klass->canRender (shaper_item->font, (const HB_UChar16 *) &glyph_map[c], 1) )
193
static ThaiFontType getThaiFontType(HB_ShaperItem *shaper_item)
195
if ( thai_contain_glyphs (shaper_item, tis620_2) )
197
else if ( thai_contain_glyphs (shaper_item, tis620_1) )
204
* convert to the correct display level of THAI vowels and marks.
206
static HB_Bool HB_ThaiConvertStringToGlyphIndices (HB_ShaperItem *item)
210
const HB_UChar16 *string = item->string + item->item.pos;
211
const hb_uint32 len = item->item.length;
212
unsigned short *logClusters = item->log_clusters;
213
hb_uint32 i = 0, slen = 0;
216
return HB_BasicShape (item);
219
cstr = (char *)malloc(len*sizeof(char) + 1);
222
return HB_BasicShape (item);
224
to_tis620(string, len, cstr);
227
static ThaiFontType font_type;
228
static HB_Font itemFont;
229
if (itemFont != item->font) {
230
font_type = getThaiFontType (item);
231
itemFont = item->font;
234
/* allocate temporary glyphs buffers */
235
HB_STACKARRAY (HB_UChar16, glyphString, (item->item.length * 2));
237
while (i < item->item.length) {
238
struct thcell_t tis_cell;
239
unsigned char rglyphs[4];
242
HB_Bool haveSaraAm = false;
244
cell_length = (int)(th_next_cell ((const unsigned char *)cstr + i, len - i, &tis_cell, true)); /* !item->fixedPitch); */
245
haveSaraAm = (cstr[i + cell_length - 1] == (char)0xd3);
247
/* set shaper item's log_clusters */
248
logClusters[i] = slen;
249
for (int j = 1; j < cell_length; j++) {
250
logClusters[i + j] = logClusters[i];
253
/* Find Logical Glyphs by font type */
255
case TIS: lgn = th_render_cell_tis (tis_cell, rglyphs, sizeof(rglyphs) / sizeof(rglyphs[0]), true); break;
256
case WIN: lgn = th_render_cell_mac (tis_cell, rglyphs, sizeof(rglyphs) / sizeof(rglyphs[0]), true); break;
257
case MAC: lgn = th_render_cell_win (tis_cell, rglyphs, sizeof(rglyphs) / sizeof(rglyphs[0]), true); break;
260
/* Add glyphs to glyphs string and setting some attributes */
261
for (int lgi = 0; lgi < lgn; lgi++) {
262
if ( rglyphs[lgi] == 0xdd/*TH_BLANK_BASE_GLYPH*/ ) {
263
glyphString[slen++] = C_DOTTED_CIRCLE;
264
} else if ((unsigned char)cstr[i] == (unsigned char)~0) {
265
// The only glyphs that should be passed to this function that cannot be mapped to
266
// tis620 are the ones of type Inherited class. Pass these glyphs untouched.
267
glyphString[slen++] = string[i];
268
if (string[i] == 0x200D || string[i] == 0x200C) {
269
// Check that we do not run out of bounds when setting item->attributes. If we do
270
// run out of bounds then this function will return false, the necessary amount of
271
// memory is reallocated, and this function will then be called again.
272
if (slen <= item->num_glyphs)
273
item->attributes[slen-1].dontPrint = true; // Hide ZWJ and ZWNJ characters
276
glyphString[slen++] = (HB_UChar16) thai_get_glyph_index (font_type, rglyphs[lgi]);
280
/* Special case to handle U+0E33 (SARA AM, ำ): SARA AM is normally written at the end of a
281
* word with a base character and an optional top character before it. For example, U+0E0B
282
* (base), U+0E49 (top), U+0E33 (SARA AM). The sequence should be converted to 4 glyphs:
283
* base, hilo (the little circle in the top left part of SARA AM, NIKHAHIT), top, then the
284
* right part of SARA AM (SARA AA).
286
* The painting process finds out the starting glyph and ending glyph of a character
287
* sequence by checking the logClusters array. In this case, logClusters array should
288
* ideally be [ 0, 1, 3 ] so that glyphsStart = 0 and glyphsEnd = 3 (slen - 1) to paint out
289
* all the glyphs generated.
291
* A special case in this special case is when we have no base character. When an isolated
292
* SARA AM is processed (cell_length = 1), libthai will produce 3 glyphs: dotted circle
293
* (indicates that the base is empty), NIKHAHIT then SARA AA. If logClusters[0] = 1, it will
294
* paint from the second glyph in the glyphs array. So in this case logClusters[0] should
295
* point to the first glyph it produces, aka. the dotted circle. */
297
logClusters[i + cell_length - 1] = cell_length == 1 ? slen - 3 : slen - 1;
298
if (tis_cell.top != 0) {
299
if (cell_length > 1) {
300
/* set the logClusters[top character] to slen - 2 as it points to the second to
301
* lastglyph (slen - 2) */
302
logClusters[i + cell_length - 2] = slen - 2;
305
/* check for overflow */
306
if (logClusters[i + cell_length - 1] > slen)
307
logClusters[i + cell_length - 1] = 0;
312
glyphString[slen] = (HB_UChar16) '\0';
314
/* for check, should reallocate space or not */
315
HB_Bool spaceOK = (item->num_glyphs >= slen);
317
/* Convert to Glyph indices */
318
HB_Bool haveGlyphs = item->font->klass->convertStringToGlyphIndices (
321
item->glyphs, &item->num_glyphs,
324
HB_FREE_STACKARRAY (glyphString);
329
return (haveGlyphs && spaceOK);
333
* set the glyph attributes heuristically.
335
static void HB_ThaiHeuristicSetGlyphAttributes (HB_ShaperItem *item)
337
/* Set Glyph Attributes */
338
hb_uint32 iCluster = 0;
339
hb_uint32 length = item->item.length;
340
while (iCluster < length) {
341
int cluster_start = item->log_clusters[iCluster];
343
while (iCluster < length && item->log_clusters[iCluster] == cluster_start) {
346
int cluster_end = (iCluster < length) ? item->log_clusters[iCluster] : item->num_glyphs;
347
item->attributes[cluster_start].clusterStart = true;
348
for (int i = cluster_start + 1; i < cluster_end; i++) {
349
item->attributes[i].clusterStart = false;
357
HB_Bool HB_ThaiShape (HB_ShaperItem *shaper_item)
359
if ( !HB_ThaiConvertStringToGlyphIndices (shaper_item) )
362
HB_ThaiHeuristicSetGlyphAttributes (shaper_item);
365
const int availableGlyphs = shaper_item->num_glyphs;
366
if ( HB_SelectScript (shaper_item, thai_features) ) {
367
HB_OpenTypeShape (shaper_item, /*properties*/0);
368
return HB_OpenTypePosition (shaper_item, availableGlyphs, /*doLogClusters*/true);
372
HB_HeuristicPosition (shaper_item);
377
* Thai Attributes: computes Word Break, Word Boundary and Char stop for THAI.
379
static void HB_ThaiAssignAttributes(const HB_UChar16 *string, hb_uint32 len, HB_CharAttributes *attributes)
383
int *break_positions = 0;
386
hb_uint32 numbreaks, i, j, cell_length;
387
struct thcell_t tis_cell;
393
cstr = (char *)malloc(len*sizeof(char) + 1);
395
to_tis620(string, len, cstr);
397
for (i = 0; i < len; ++i) {
398
attributes[i].wordBreak = FALSE;
399
attributes[i].wordStart = FALSE;
400
attributes[i].wordEnd = FALSE;
401
attributes[i].lineBreak = FALSE;
405
break_positions = (int*) malloc (sizeof(int) * len);
406
memset (break_positions, 0, sizeof(int) * len);
410
break_positions = brp;
414
if (break_positions) {
415
attributes[0].wordBreak = TRUE;
416
attributes[0].wordStart = TRUE;
417
attributes[0].wordEnd = FALSE;
418
numbreaks = th_brk((const unsigned char *)cstr, break_positions, brp_size);
419
for (i = 0; i < numbreaks; ++i) {
420
attributes[break_positions[i]].wordBreak = TRUE;
421
attributes[break_positions[i]].wordStart = TRUE;
422
attributes[break_positions[i]].wordEnd = TRUE;
423
attributes[break_positions[i]].lineBreak = TRUE;
426
attributes[break_positions[numbreaks - 1]].wordStart = FALSE;
428
if (break_positions != brp)
429
free(break_positions);
432
/* manage grapheme boundaries */
435
cell_length = (hb_uint32)(th_next_cell((const unsigned char *)cstr + i, len - i, &tis_cell, true));
437
attributes[i].graphemeBoundary = true;
438
for (j = 1; j < cell_length; j++)
439
attributes[i + j].graphemeBoundary = false;
441
/* Set graphemeBoundary for SARA AM */
442
if (cstr[i + cell_length - 1] == (char)0xd3)
443
attributes[i + cell_length - 1].graphemeBoundary = true;
452
void HB_ThaiAttributes(HB_Script script, const HB_UChar16 *text, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes)
454
assert(script == HB_Script_Thai);
455
const HB_UChar16 *uc = text + from;
458
HB_ThaiAssignAttributes(uc, len, attributes);