4
* Copyright (c) 2001 by Sun Microsystems, Inc.
5
* Author: Chookij Vanatham <Chookij.Vanatham@Eng.Sun.COM>
7
* Hebrew points positioning improvements 2001
8
* Author: Dov Grobgeld <dov@imagic.weizmann.ac.il>
10
* This library is free software; you can redistribute it and/or
11
* modify it under the terms of the GNU Library General Public
12
* License as published by the Free Software Foundation; either
13
* version 2 of the License, or (at your option) any later version.
15
* This library is distributed in the hope that it will be useful,
16
* but WITHOUT ANY WARRANTY; without even the implied warranty of
17
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18
* Library General Public License for more details.
20
* You should have received a copy of the GNU Library General Public
21
* License along with this library; if not, write to the
22
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23
* Boston, MA 02111-1307, USA.
25
* Note March 9, 2003: I fixed a crash with regards to precomposed
26
* characters, by wraping all of them to be considered as ALEF as
27
* far as consideration about composability is concerned. The rendering
28
* with regards to precomposed characters AND nikud comes out really
29
* bad though, and should be fixed, once I have more time.
34
#include "pango-engine.h"
35
#include "hebrew-shaper.h"
37
/* Wrap all characters above 0xF00 to ALEF. */
38
#define ishebrew(wc) ((wc)>0x590 && (wc)<0x600)
39
#define ucs2iso8859_8(wc) ((unsigned int)((unsigned int)(wc) - 0x0590 + 0x10))
40
#define iso8859_8_2uni(c) ((gunichar)(c) - 0x10 + 0x0590)
42
#define MAX_CLUSTER_CHRS 256
44
/* Define Hebrew character classes */
48
#define _DA (1<<2) /* only for dagesh... */
51
#define SpacingLetter _SP
52
#define NonSpacingPunc _NS
54
/* Define Hebrew character types */
60
/* Unicode definitions needed in logics below... */
61
#define UNI_ALEF 0x05D0
62
#define UNI_BET 0x05D1
63
#define UNI_GIMMEL 0x05d2
64
#define UNI_DALED 0x05D3
65
#define UNI_KAF 0x05DB
66
#define UNI_FINAL_KAF 0x05DA
67
#define UNI_VAV 0x05D5
68
#define UNI_YOD 0x05D9
69
#define UNI_RESH 0x05E8
70
#define UNI_LAMED 0x05DC
71
#define UNI_SHIN 0x05E9
72
#define UNI_FINAL_PE 0x05E3
74
#define UNI_QOF 0x05E7
75
#define UNI_TAV 0x05EA
76
#define UNI_SHIN_DOT 0x05C1
77
#define UNI_SIN_DOT 0x05C2
78
#define UNI_MAPIQ 0x05BC
79
#define UNI_SHEVA 0x05B0
80
#define UNI_HOLAM 0x05B9
81
#define UNI_QUBUTS 0x05BB
82
#define UNI_HATAF_SEGOL 0x05B1
83
#define UNI_HATAF_QAMATZ 0x05B3
84
#define UNI_TSERE 0x05B5
85
#define UNI_QAMATS 0x05B8
86
#define UNI_QUBUTS 0x05BB
88
/*======================================================================
89
// In the tables below all Hebrew characters are categorized to
90
// one of the following four classes:
92
// non used entries Not defined (ND)
93
// accents, points Non spacing (NS)
94
// punctuation and characters Spacing characters (SP)
95
// dagesh "Dagesh" (DA)
96
//----------------------------------------------------------------------*/
97
static const gint char_class_table[128] = {
98
/* 0, 1, 2, 3, 4, 5, 6, 7 */
100
/*00*/ _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
101
_ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
103
/*10*/ _ND, _NS, _NS, _NS, _NS, _NS, _NS, _NS,
104
_NS, _NS, _NS, _NS, _NS, _NS, _NS, _NS,
105
/*20*/ _NS, _NS, _ND, _NS, _NS, _NS, _NS, _NS,
106
_NS, _NS, _NS, _NS, _NS, _NS, _NS, _NS,
107
/*30*/ _NS, _NS, _NS, _NS, _NS, _NS, _NS, _NS,
108
_NS, _NS, _ND, _NS, _DA, _NS, _SP, _NS,
109
/*40*/ _SP, _NS, _NS, _SP, _NS, _ND, _ND, _ND,
110
_ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
111
/*50*/ _SP, _SP, _SP, _SP, _SP, _SP, _SP, _SP,
112
_SP, _SP, _SP, _SP, _SP, _SP, _SP, _SP,
113
/*60*/ _SP, _SP, _SP, _SP, _SP, _SP, _SP, _SP,
114
_SP, _SP, _SP, _ND, _ND, _ND, _ND, _ND,
115
/*70*/ _SP, _SP, _SP, _SP, _SP, _ND, _ND, _ND,
116
_ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
119
static const gint char_type_table[128] = {
120
/* 0, 1, 2, 3, 4, 5, 6, 7 */
122
/*00*/ __ND, __ND, __ND, __ND, __ND, __ND, __ND, __ND,
123
__ND, __ND, __ND, __ND, __ND, __ND, __ND, __ND,
125
/*10*/ __ND, __NS, __NS, __NS, __NS, __NS, __NS, __NS,
126
__NS, __NS, __NS, __NS, __NS, __NS, __NS, __NS,
127
/*20*/ __NS, __NS, __ND, __NS, __NS, __NS, __NS, __NS,
128
__NS, __NS, __NS, __NS, __NS, __NS, __NS, __NS,
129
/*30*/ __NS, __NS, __NS, __NS, __NS, __NS, __NS, __NS,
130
__NS, __NS, __ND, __NS, __DA, __NS, __SP, __NS,
131
/*40*/ __SP, __NS, __NS, __SP, __NS, __ND, __ND, __ND,
132
__ND, __ND, __ND, __ND, __ND, __ND, __ND, __ND,
133
/*50*/ __SP, __SP, __SP, __SP, __SP, __SP, __SP, __SP,
134
__SP, __SP, __SP, __SP, __SP, __SP, __SP, __SP,
135
/*60*/ __SP, __SP, __SP, __SP, __SP, __SP, __SP, __SP,
136
__SP, __SP, __SP, __ND, __ND, __ND, __ND, __ND,
137
/*70*/ __SP, __SP, __SP, __SP, __SP, __ND, __ND, __ND,
138
__ND, __ND, __ND, __ND, __ND, __ND, __ND, __ND,
141
/*======================================================================
142
// The following table answers the question whether two characters
143
// are composible or not. The decision is made by looking at the
144
// char_type_table values for the first character in a cluster
145
// vs a following charactrer. The only three combinations that
146
// are composible in Hebrew according to the table are:
148
// 1. a spacing character followed by non-spacing character
149
// 2. a spacing character followed by a dagesh.
150
// 3. a dagesh followed by a non-spacing character.
152
// Note that a spacing character may be followed by several non-spacing
153
// accents, as the decision is always made on the base character of
155
//----------------------------------------------------------------------*/
156
static const gboolean compose_table[4][4] = {
157
/* Cn */ /* 0, 1, 2, 3, */
158
/* Cn-1 00 */ { FALSE, FALSE, FALSE, FALSE },
159
/* 10 */ { FALSE, FALSE, TRUE, TRUE },
160
/* 20 */ { FALSE, FALSE, FALSE, FALSE },
161
/* 30 */ { FALSE, FALSE, TRUE, FALSE },
164
/* Treat all characters above 0xF000 as characters */
165
#define is_hebrew(wc) ((wc) >= 0x590 && (wc) < 0x600)
166
#define is_char_class(wc, mask) (char_class_table[ucs2iso8859_8 ((wc))] & (mask))
167
#define is_composible(cur_wc, nxt_wc) (compose_table[char_type_table[ucs2iso8859_8 (cur_wc)]]\
168
[char_type_table[ucs2iso8859_8 (nxt_wc)]])
170
G_CONST_RETURN char *
171
hebrew_shaper_get_next_cluster(const char *text,
181
while (p < text + length && n_chars < MAX_CLUSTER_CHRS)
183
gunichar current = g_utf8_get_char (p);
185
if (!ishebrew (current) ||
186
(n_chars == 0 && is_char_class(current, ~(NoDefine|SpacingLetter))))
188
/* Not a legal Hebrew cluster */
192
cluster[n_chars++] = current;
193
p = g_utf8_next_char (p);
197
else if (n_chars == 0 ||
198
is_composible (cluster[0], current))
200
cluster[n_chars++] = current;
201
p = g_utf8_next_char (p);
212
hebrew_shaper_get_cluster_kerning(gunichar *cluster,
214
PangoRectangle ink_rect[],
216
/* input and output */
222
int base_ink_x_offset, base_ink_y_offset, base_ink_width, base_ink_height;
223
gunichar base_char = cluster[0];
228
if (cluster_length == 1)
230
/* Make lone 'vav dot' have zero width */
231
if (base_char == UNI_SHIN_DOT
232
|| base_char == UNI_SIN_DOT
233
|| base_char == UNI_HOLAM
235
x_offset[0] = -ink_rect[0].x - ink_rect[0].width;
242
base_ink_x_offset = ink_rect[0].x;
243
base_ink_y_offset = ink_rect[0].y;
244
base_ink_width = ink_rect[0].width;
245
base_ink_height = ink_rect[0].height;
248
for (i=1; i<cluster_length; i++)
254
/* Check if it is a point */
255
if (gl < 0x5B0 || gl >= 0x05D0)
258
/* Center dot of VAV */
259
if (gl == UNI_MAPIQ && base_char == UNI_VAV)
261
x_offset[i] = base_ink_x_offset - ink_rect[i].x;
263
/* If VAV is a vertical bar without a roof, then we
264
need to make room for the dot by increasing the
265
cluster width. But how can I check if that is the
268
/* This is wild, but it does the job of differentiating
269
between two M$ fonts... Base the decision on the
270
aspect ratio of the vav...
272
if (base_ink_height > base_ink_width * 3.5)
278
/* Shift all characters to make place for the mapiq */
280
x_offset[j] += ink_rect[i].width*(1+space-kern);
282
width[cluster_length-1] += ink_rect[i].width*(1+space-kern);
283
x_offset[i] -= ink_rect[i].width*(kern);
288
else if (gl == UNI_SHIN_DOT && base_char == UNI_SHIN)
290
x_offset[i] = base_ink_x_offset + base_ink_width
291
- ink_rect[i].x - ink_rect[i].width;
295
else if (gl == UNI_SIN_DOT && base_char == UNI_SHIN)
297
x_offset[i] = base_ink_x_offset - ink_rect[i].x;
300
/* VOWEL DOT above to any other character than
301
SHIN or VAV should stick out a bit to the left. */
302
else if ((gl == UNI_SIN_DOT || gl == UNI_HOLAM)
303
&& base_char != UNI_SHIN && base_char != UNI_VAV)
305
x_offset[i] = base_ink_x_offset -ink_rect[i].x - ink_rect[i].width * 3/ 2;
308
/* VOWELS under resh or vav are right aligned, if they are
309
narrower than the characters. Otherwise they are centered.
311
else if ((base_char == UNI_VAV
312
|| base_char == UNI_RESH
313
|| base_char == UNI_YOD
314
|| base_char == UNI_DALED
316
&& ((gl >= UNI_SHEVA && gl <= UNI_QAMATS) ||
318
&& ink_rect[i].width < base_ink_width
321
x_offset[i] = base_ink_x_offset + base_ink_width
322
- ink_rect[i].x - ink_rect[i].width;
325
/* VOWELS under FINAL KAF are offset centered and offset in
327
else if ((base_char == UNI_FINAL_KAF
329
&& ((gl >= UNI_SHEVA && gl <= UNI_QAMATS) ||
332
/* x are at 1/3 to take into accoun the stem */
333
x_offset[i] = base_ink_x_offset - ink_rect[i].x
334
+ base_ink_width * 1/3 - ink_rect[i].width/2;
337
y_offset[i] = base_ink_y_offset - ink_rect[i].y
338
+ base_ink_height * 1/2 - ink_rect[i].height/2;
342
/* MAPIQ in PE or FINAL PE */
343
else if (gl == UNI_MAPIQ
344
&& (base_char == UNI_PE || base_char == UNI_FINAL_PE))
346
x_offset[i]= base_ink_x_offset - ink_rect[i].x
347
+ base_ink_width * 2/3 - ink_rect[i].width/2;
349
/* Another option is to offset the MAPIQ in y...
350
glyphs->glyphs[cluster_start_idx+i].geometry.y_offset
351
-= base_ink_height/5; */
354
/* MAPIQ in SHIN should be moved a bit to the right */
355
else if (gl == UNI_MAPIQ
356
&& base_char == UNI_SHIN)
358
x_offset[i]= base_ink_x_offset - ink_rect[i].x
359
+ base_ink_width * 3/5 - ink_rect[i].width/2;
362
/* MAPIQ in YUD is right aligned */
363
else if (gl == UNI_MAPIQ
364
&& base_char == UNI_YOD)
366
x_offset[i]= base_ink_x_offset - ink_rect[i].x;
368
/* Lower left in y */
369
y_offset[i] = base_ink_y_offset - ink_rect[i].y
370
+ base_ink_height - ink_rect[i].height*1.75;
372
if (base_ink_height > base_ink_width * 2)
378
/* Shift all cluster characters to make space for mapiq */
380
x_offset[j] += ink_rect[i].width*(1+space-kern);
382
width[cluster_length-1] += ink_rect[i].width*(1+space-kern);
387
/* VOWEL DOT next to any other character */
388
else if ((gl == UNI_SIN_DOT || gl == UNI_HOLAM)
389
&& (base_char != UNI_VAV))
391
x_offset[i] = base_ink_x_offset -ink_rect[i].x;
394
/* Move nikud of taf a bit ... */
395
else if (base_char == UNI_TAV && gl == UNI_MAPIQ)
397
x_offset[i] = base_ink_x_offset - ink_rect[i].x
398
+ base_ink_width * 5/8 - ink_rect[i].width/2;
401
/* Move center dot of characters with a right stem and no
403
else if (gl == UNI_MAPIQ &&
404
(base_char == UNI_BET
405
|| base_char == UNI_DALED
406
|| base_char == UNI_KAF
407
|| base_char == UNI_GIMMEL
410
x_offset[i] = base_ink_x_offset - ink_rect[i].x
411
+ base_ink_width * 3/8 - ink_rect[i].width/2;
414
/* Right align wide nikud under QOF */
415
else if (base_char == UNI_QOF &&
416
( (gl >= UNI_HATAF_SEGOL
417
&& gl <= UNI_HATAF_QAMATZ)
420
|| (gl == UNI_QUBUTS)))
422
x_offset[i] = base_ink_x_offset + base_ink_width
423
- ink_rect[i].x - ink_rect[i].width;
426
/* Center by default */
429
x_offset[i] = base_ink_x_offset - ink_rect[i].x
430
+ base_ink_width/2 - ink_rect[i].width/2;
437
hebrew_shaper_swap_range (PangoGlyphString *glyphs,
443
for (i = start, j = end - 1; i < j; i++, j--)
445
PangoGlyphInfo glyph_info;
448
glyph_info = glyphs->glyphs[i];
449
glyphs->glyphs[i] = glyphs->glyphs[j];
450
glyphs->glyphs[j] = glyph_info;
452
log_cluster = glyphs->log_clusters[i];
453
glyphs->log_clusters[i] = glyphs->log_clusters[j];
454
glyphs->log_clusters[j] = log_cluster;
459
hebrew_shaper_bidi_reorder(PangoGlyphString *glyphs)
463
/* Swap all glyphs */
464
hebrew_shaper_swap_range (glyphs, 0, glyphs->num_glyphs);
466
/* Now reorder glyphs within each cluster back to LTR */
467
for (start = 0; start < glyphs->num_glyphs;)
470
while (end < glyphs->num_glyphs &&
471
glyphs->log_clusters[end] == glyphs->log_clusters[start])
474
hebrew_shaper_swap_range (glyphs, start, end);