~ubuntu-branches/ubuntu/trusty/harfbuzz/trusty

« back to all changes in this revision

Viewing changes to src/hb-old/harfbuzz-khmer.c

Committer: Package Import Robot
Author(s): أحمد المحمودي (Ahmed El-Mahmoudy)
Date: 2012-12-07 10:31:26 UTC
Revision ID: package-import@ubuntu.com-20121207103126-uara6bzqpvrr85wl

Tags: upstream-0.9.9

Import upstream version 0.9.9

files added:

AUTHORS

COPYING

ChangeLog

INSTALL

Makefile.am

Makefile.in

NEWS

README

THANKS

TODO

aclocal.m4

autogen.sh

compile

config.guess

config.h.in

config.sub

configure

configure.ac

depcomp

harfbuzz.doap

harfbuzz.pc.in

install-sh

ltmain.sh

m4/ax_pthread.m4

m4/libtool.m4

m4/ltoptions.m4

m4/ltsugar.m4

m4/ltversion.m4

m4/lt~obsolete.m4

m4/pkg.m4

missing

src/Makefile.am

src/Makefile.in

src/check-c-linkage-decls.sh

src/check-exported-symbols.sh

src/check-header-guards.sh

src/check-includes.sh

src/check-internal-symbols.sh

src/check-libstdc++.sh

src/check-static-inits.sh

src/gen-arabic-table.py

src/gen-indic-table.py

src/hb-atomic-private.hh

src/hb-blob.cc

src/hb-blob.h

src/hb-buffer-private.hh

src/hb-buffer.cc

src/hb-buffer.h

src/hb-cache-private.hh

src/hb-common.cc

src/hb-common.h

src/hb-coretext.cc

src/hb-coretext.h

src/hb-fallback-shape.cc

src/hb-font-private.hh

src/hb-font.cc

src/hb-font.h

src/hb-ft.cc

src/hb-ft.h

src/hb-glib.cc

src/hb-glib.h

src/hb-gobject-enums.cc.tmpl

src/hb-gobject-structs.cc

src/hb-gobject.h

src/hb-graphite2.cc

src/hb-graphite2.h

src/hb-icu-le

src/hb-icu-le.cc

src/hb-icu-le/COPYING

src/hb-icu-le/FontTableCache.cpp

src/hb-icu-le/FontTableCache.h

src/hb-icu-le/Makefile.am

src/hb-icu-le/Makefile.in

src/hb-icu-le/PortableFontInstance.cpp

src/hb-icu-le/PortableFontInstance.h

src/hb-icu-le/README

src/hb-icu-le/cmaps.cpp

src/hb-icu-le/cmaps.h

src/hb-icu-le/letest.h

src/hb-icu-le/license.html

src/hb-icu-le/sfnt.h

src/hb-icu.cc

src/hb-icu.h

src/hb-mutex-private.hh

src/hb-object-private.hh

src/hb-old

src/hb-old.cc

src/hb-old/COPYING

src/hb-old/Makefile.am

src/hb-old/Makefile.in

src/hb-old/README

src/hb-old/harfbuzz-arabic.c

src/hb-old/harfbuzz-buffer-private.h

src/hb-old/harfbuzz-buffer.c

src/hb-old/harfbuzz-buffer.h

src/hb-old/harfbuzz-external.h

src/hb-old/harfbuzz-gdef-private.h

src/hb-old/harfbuzz-gdef.c

src/hb-old/harfbuzz-gdef.h

src/hb-old/harfbuzz-global.h

src/hb-old/harfbuzz-gpos-private.h

src/hb-old/harfbuzz-gpos.c

src/hb-old/harfbuzz-gpos.h

src/hb-old/harfbuzz-greek.c

src/hb-old/harfbuzz-gsub-private.h

src/hb-old/harfbuzz-gsub.c

src/hb-old/harfbuzz-gsub.h

src/hb-old/harfbuzz-hangul.c

src/hb-old/harfbuzz-hebrew.c

src/hb-old/harfbuzz-impl.c

src/hb-old/harfbuzz-impl.h

src/hb-old/harfbuzz-indic.cpp

src/hb-old/harfbuzz-khmer.c

src/hb-old/harfbuzz-myanmar.c

src/hb-old/harfbuzz-open-private.h

src/hb-old/harfbuzz-open.c

src/hb-old/harfbuzz-open.h

src/hb-old/harfbuzz-shaper-private.h

src/hb-old/harfbuzz-shaper.cpp

src/hb-old/harfbuzz-shaper.h

src/hb-old/harfbuzz-stream-private.h

src/hb-old/harfbuzz-stream.c

src/hb-old/harfbuzz-stream.h

src/hb-old/harfbuzz-tibetan.c

src/hb-old/harfbuzz.h

src/hb-open-file-private.hh

src/hb-open-type-private.hh

src/hb-ot-head-table.hh

src/hb-ot-hhea-table.hh

src/hb-ot-hmtx-table.hh

src/hb-ot-layout-common-private.hh

src/hb-ot-layout-gdef-table.hh

src/hb-ot-layout-gpos-table.hh

src/hb-ot-layout-gsub-table.hh

src/hb-ot-layout-gsubgpos-private.hh

src/hb-ot-layout-private.hh

src/hb-ot-layout.cc

src/hb-ot-layout.h

src/hb-ot-map-private.hh

src/hb-ot-map.cc

src/hb-ot-maxp-table.hh

src/hb-ot-name-table.hh

src/hb-ot-shape-complex-arabic-fallback.hh

src/hb-ot-shape-complex-arabic-table.hh

src/hb-ot-shape-complex-arabic.cc

src/hb-ot-shape-complex-default.cc

src/hb-ot-shape-complex-indic-machine.hh

src/hb-ot-shape-complex-indic-machine.rl

src/hb-ot-shape-complex-indic-private.hh

src/hb-ot-shape-complex-indic-table.hh

src/hb-ot-shape-complex-indic.cc

src/hb-ot-shape-complex-private.hh

src/hb-ot-shape-complex-thai.cc

src/hb-ot-shape-fallback-private.hh

src/hb-ot-shape-fallback.cc

src/hb-ot-shape-normalize-private.hh

src/hb-ot-shape-normalize.cc

src/hb-ot-shape-private.hh

src/hb-ot-shape.cc

src/hb-ot-tag.cc

src/hb-ot-tag.h

src/hb-ot.h

src/hb-private.hh

src/hb-set-private.hh

src/hb-set.cc

src/hb-set.h

src/hb-shape-plan-private.hh

src/hb-shape-plan.cc

src/hb-shape-plan.h

src/hb-shape.cc

src/hb-shape.h

src/hb-shaper-impl-private.hh

src/hb-shaper-list.hh

src/hb-shaper-private.hh

src/hb-shaper.cc

src/hb-tt-font.cc

src/hb-ucdn

src/hb-ucdn.cc

src/hb-ucdn/COPYING

src/hb-ucdn/Makefile.am

src/hb-ucdn/Makefile.in

src/hb-ucdn/README

src/hb-ucdn/ucdn.c

src/hb-ucdn/ucdn.h

src/hb-ucdn/unicodedata_db.h

src/hb-unicode-private.hh

src/hb-unicode.cc

src/hb-unicode.h

src/hb-uniscribe.cc

src/hb-uniscribe.h

src/hb-utf-private.hh

src/hb-version.h

src/hb-version.h.in

src/hb-warning.cc

src/hb.h

src/main.cc

src/test-size-params.cc

src/test-would-substitute.cc

src/test.cc

test

test/Makefile.am

test/Makefile.in

test/api

test/api/Makefile.am

test/api/Makefile.in

test/api/hb-test.h

test/api/test-blob.c

test/api/test-buffer.c

test/api/test-c.c

test/api/test-common.c

test/api/test-cplusplus.cc

test/api/test-font.c

test/api/test-object.c

test/api/test-ot-tag.c

test/api/test-shape.c

test/api/test-unicode.c

test/api/test-version.c

test/shaping

test/shaping/Makefile.am

test/shaping/Makefile.in

test/shaping/hb-diff

test/shaping/hb-diff-colorize

test/shaping/hb-diff-filter-failures

test/shaping/hb-diff-ngrams

test/shaping/hb-diff-stat

test/shaping/hb-manifest-read

test/shaping/hb-manifest-update

test/shaping/hb-unicode-decode

test/shaping/hb-unicode-encode

test/shaping/hb-unicode-prettyname

test/shaping/hb_test_tools.py

util

util/Makefile.am

util/Makefile.in

util/ansi-print.cc

util/ansi-print.hh

util/hb-ot-shape-closure.cc

util/hb-shape.cc

util/hb-view.cc

util/helper-cairo-ansi.cc

util/helper-cairo-ansi.hh

util/helper-cairo.cc

util/helper-cairo.hh

util/main-font-text.hh

util/options.cc

util/options.hh

util/shape-consumer.hh

util/view-cairo.cc

util/view-cairo.hh

Show diffs side-by-side

added added

removed removed

src/hb-old/harfbuzz-khmer.c

* This is part of HarfBuzz, an OpenType Layout engine library.

* Permission is hereby granted, without written agreement and without

* license or royalty fees, to use, copy, modify, and distribute this

* software and its documentation for any purpose, provided that the

* above copyright notice and the following two paragraphs appear in

* all copies of this software.

* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR

* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES

* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN

* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH

* DAMAGE.

* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,

* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND

* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS

* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO

* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.

#include "harfbuzz-shaper.h"

#include "harfbuzz-shaper-private.h"

#include <assert.h>

#include <stdio.h>

// Vocabulary

// Base -> A consonant or an independent vowel in its full (not subscript) form. It is the

// center of the syllable, it can be surrounded by coeng (subscript) consonants, vowels,

// split vowels, signs... but there is only one base in a syllable, it has to be coded as

// the first character of the syllable.

// split vowel --> vowel that has two parts placed separately (e.g. Before and after the consonant).

// Khmer language has five of them. Khmer split vowels either have one part before the

// base and one after the base or they have a part before the base and a part above the base.

// The first part of all Khmer split vowels is the same character, identical to

// the glyph of Khmer dependent vowel SRA EI

// coeng --> modifier used in Khmer to construct coeng (subscript) consonants

// Differently than indian languages, the coeng modifies the consonant that follows it,

// not the one preceding it Each consonant has two forms, the base form and the subscript form

// the base form is the normal one (using the consonants code-point), the subscript form is

// displayed when the combination coeng + consonant is encountered.

// Consonant of type 1 -> A consonant which has subscript for that only occupies space under a base consonant

// Consonant of type 2.-> Its subscript form occupies space under and before the base (only one, RO)

// Consonant of Type 3 -> Its subscript form occupies space under and after the base (KHO, CHHO, THHO, BA, YO, SA)

// Consonant shifter -> Khmer has to series of consonants. The same dependent vowel has different sounds

// if it is attached to a consonant of the first series or a consonant of the second series

// Most consonants have an equivalent in the other series, but some of theme exist only in

// one series (for example SA). If we want to use the consonant SA with a vowel sound that

// can only be done with a vowel sound that corresponds to a vowel accompanying a consonant

// of the other series, then we need to use a consonant shifter: TRIISAP or MUSIKATOAN

// x17C9 y x17CA. TRIISAP changes a first series consonant to second series sound and

// MUSIKATOAN a second series consonant to have a first series vowel sound.

// Consonant shifter are both normally supercript marks, but, when they are followed by a

// superscript, they change shape and take the form of subscript dependent vowel SRA U.

// If they are in the same syllable as a coeng consonant, Unicode 3.0 says that they

// should be typed before the coeng. Unicode 4.0 breaks the standard and says that it should

// be placed after the coeng consonant.

// Dependent vowel -> In khmer dependent vowels can be placed above, below, before or after the base

// Each vowel has its own position. Only one vowel per syllable is allowed.

// Signs -> Khmer has above signs and post signs. Only one above sign and/or one post sign are

// Allowed in a syllable.

// order is important here! This order must be the same that is found in each horizontal

// line in the statetable for Khmer (see khmerStateTable) .

enum KhmerCharClassValues {

CC_RESERVED = 0,

CC_CONSONANT = 1, /* Consonant of type 1 or independent vowel */

CC_CONSONANT2 = 2, /* Consonant of type 2 */

CC_CONSONANT3 = 3, /* Consonant of type 3 */

CC_ZERO_WIDTH_NJ_MARK = 4, /* Zero Width non joiner character (0x200C) */

CC_CONSONANT_SHIFTER = 5,

CC_ROBAT = 6, /* Khmer special diacritic accent -treated differently in state table */

CC_COENG = 7, /* Subscript consonant combining character */

CC_DEPENDENT_VOWEL = 8,

CC_SIGN_ABOVE = 9,

CC_SIGN_AFTER = 10,

CC_ZERO_WIDTH_J_MARK = 11, /* Zero width joiner character */

CC_COUNT = 12 /* This is the number of character classes */

};

enum KhmerCharClassFlags {

CF_CLASS_MASK = 0x0000FFFF,

CF_CONSONANT = 0x01000000, /* flag to speed up comparing */

CF_SPLIT_VOWEL = 0x02000000, /* flag for a split vowel -> the first part is added in front of the syllable */

CF_DOTTED_CIRCLE = 0x04000000, /* add a dotted circle if a character with this flag is the first in a syllable */

CF_COENG = 0x08000000, /* flag to speed up comparing */

CF_SHIFTER = 0x10000000, /* flag to speed up comparing */

CF_ABOVE_VOWEL = 0x20000000, /* flag to speed up comparing */

/* position flags */

100

CF_POS_BEFORE = 0x00080000,

101

CF_POS_BELOW = 0x00040000,

102

CF_POS_ABOVE = 0x00020000,

103

CF_POS_AFTER = 0x00010000,

104

CF_POS_MASK = 0x000f0000

105

};

106

107

108

/* Characters that get referred to by name */

109

enum KhmerChar {

110

C_SIGN_ZWNJ = 0x200C,

111

C_SIGN_ZWJ = 0x200D,

112

C_RO = 0x179A,

113

C_VOWEL_AA = 0x17B6,

114

C_SIGN_NIKAHIT = 0x17C6,

115

C_VOWEL_E = 0x17C1,

116

C_COENG = 0x17D2

117

};

118

119

120

121

// simple classes, they are used in the statetable (in this file) to control the length of a syllable

122

// they are also used to know where a character should be placed (location in reference to the base character)

123

// and also to know if a character, when independently displayed, should be displayed with a dotted-circle to

124

// indicate error in syllable construction

125

126

enum {

127

_xx = CC_RESERVED,

128

_sa = CC_SIGN_ABOVE | CF_DOTTED_CIRCLE | CF_POS_ABOVE,

129

_sp = CC_SIGN_AFTER | CF_DOTTED_CIRCLE| CF_POS_AFTER,

130

_c1 = CC_CONSONANT | CF_CONSONANT,

131

_c2 = CC_CONSONANT2 | CF_CONSONANT,

132

_c3 = CC_CONSONANT3 | CF_CONSONANT,

133

_rb = CC_ROBAT | CF_POS_ABOVE | CF_DOTTED_CIRCLE,

134

_cs = CC_CONSONANT_SHIFTER | CF_DOTTED_CIRCLE | CF_SHIFTER,

135

_dl = CC_DEPENDENT_VOWEL | CF_POS_BEFORE | CF_DOTTED_CIRCLE,

136

_db = CC_DEPENDENT_VOWEL | CF_POS_BELOW | CF_DOTTED_CIRCLE,

137

_da = CC_DEPENDENT_VOWEL | CF_POS_ABOVE | CF_DOTTED_CIRCLE | CF_ABOVE_VOWEL,

138

_dr = CC_DEPENDENT_VOWEL | CF_POS_AFTER | CF_DOTTED_CIRCLE,

139

_co = CC_COENG | CF_COENG | CF_DOTTED_CIRCLE,

140

141

/* split vowel */

142

_va = _da | CF_SPLIT_VOWEL,

143

_vr = _dr | CF_SPLIT_VOWEL

144

};

145

146

147

148

// Character class: a character class value

149

// ORed with character class flags.

150

151

typedef unsigned long KhmerCharClass;

152

153

154

155

// Character class tables

156

// _xx character does not combine into syllable, such as numbers, puntuation marks, non-Khmer signs...

157

// _sa Sign placed above the base

158

// _sp Sign placed after the base

159

// _c1 Consonant of type 1 or independent vowel (independent vowels behave as type 1 consonants)

160

// _c2 Consonant of type 2 (only RO)

161

// _c3 Consonant of type 3

162

// _rb Khmer sign robat u17CC. combining mark for subscript consonants

163

// _cd Consonant-shifter

164

// _dl Dependent vowel placed before the base (left of the base)

165

// _db Dependent vowel placed below the base

166

// _da Dependent vowel placed above the base

167

// _dr Dependent vowel placed behind the base (right of the base)

168

// _co Khmer combining mark COENG u17D2, combines with the consonant or independent vowel following

169

// it to create a subscript consonant or independent vowel

170

// _va Khmer split vowel in which the first part is before the base and the second one above the base

171

// _vr Khmer split vowel in which the first part is before the base and the second one behind (right of) the base

172

173

static const KhmerCharClass khmerCharClasses[] = {

174

_c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c1, _c1, /* 1780 - 178F */

175

_c1, _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c2, _c1, _c1, _c1, _c3, _c3, /* 1790 - 179F */

176

_c1, _c3, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, /* 17A0 - 17AF */

177

_c1, _c1, _c1, _c1, _dr, _dr, _dr, _da, _da, _da, _da, _db, _db, _db, _va, _vr, /* 17B0 - 17BF */

178

_vr, _dl, _dl, _dl, _vr, _vr, _sa, _sp, _sp, _cs, _cs, _sa, _rb, _sa, _sa, _sa, /* 17C0 - 17CF */

179

_sa, _sa, _co, _sa, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _sa, _xx, _xx /* 17D0 - 17DF */

180

};

181

182

/* this enum must reflect the range of khmerCharClasses */

183

enum KhmerCharClassesRange {

184

KhmerFirstChar = 0x1780,

185

KhmerLastChar = 0x17df

186

};

187

188

189

// Below we define how a character in the input string is either in the khmerCharClasses table

190

// (in which case we get its type back), a ZWJ or ZWNJ (two characters that may appear

191

// within the syllable, but are not in the table) we also get their type back, or an unknown object

192

// in which case we get _xx (CC_RESERVED) back

193

194

static KhmerCharClass getKhmerCharClass(HB_UChar16 uc)

195

{

196

if (uc == C_SIGN_ZWJ) {

197

return CC_ZERO_WIDTH_J_MARK;

198

}

199

200

if (uc == C_SIGN_ZWNJ) {

201

return CC_ZERO_WIDTH_NJ_MARK;

202

}

203

204

if (uc < KhmerFirstChar || uc > KhmerLastChar) {

205

return CC_RESERVED;

206

}

207

208

return khmerCharClasses[uc - KhmerFirstChar];

209

}

210

211

212

213

// The stateTable is used to calculate the end (the length) of a well

214

// formed Khmer Syllable.

215

216

// Each horizontal line is ordered exactly the same way as the values in KhmerClassTable

217

// CharClassValues. This coincidence of values allows the follow up of the table.

218

219

// Each line corresponds to a state, which does not necessarily need to be a type

220

// of component... for example, state 2 is a base, with is always a first character

221

// in the syllable, but the state could be produced a consonant of any type when

222

// it is the first character that is analysed (in ground state).

223

224

// Differentiating 3 types of consonants is necessary in order to

225

// forbid the use of certain combinations, such as having a second

226

// coeng after a coeng RO,

227

// The inexistent possibility of having a type 3 after another type 3 is permitted,

228

// eliminating it would very much complicate the table, and it does not create typing

229

// problems, as the case above.

230

231

// The table is quite complex, in order to limit the number of coeng consonants

232

// to 2 (by means of the table).

233

234

// There a peculiarity, as far as Unicode is concerned:

235

// - The consonant-shifter is considered in two possible different

236

// locations, the one considered in Unicode 3.0 and the one considered in

237

// Unicode 4.0. (there is a backwards compatibility problem in this standard).

238

239

240

// xx independent character, such as a number, punctuation sign or non-khmer char

241

242

// c1 Khmer consonant of type 1 or an independent vowel

243

// that is, a letter in which the subscript for is only under the

244

// base, not taking any space to the right or to the left

245

246

// c2 Khmer consonant of type 2, the coeng form takes space under

247

// and to the left of the base (only RO is of this type)

248

249

// c3 Khmer consonant of type 3. Its subscript form takes space under

250

// and to the right of the base.

251

252

// cs Khmer consonant shifter

253

254

// rb Khmer robat

255

256

// co coeng character (u17D2)

257

258

// dv dependent vowel (including split vowels, they are treated in the same way).

259

// even if dv is not defined above, the component that is really tested for is

260

// KhmerClassTable::CC_DEPENDENT_VOWEL, which is common to all dependent vowels

261

262

// zwj Zero Width joiner

263

264

// zwnj Zero width non joiner

265

266

// sa above sign

267

268

// sp post sign

269

270

// there are lines with equal content but for an easier understanding

271

// (and maybe change in the future) we did not join them

272

273

static const signed char khmerStateTable[][CC_COUNT] =

274

{

275

/* xx c1 c2 c3 zwnj cs rb co dv sa sp zwj */

276

{ 1, 2, 2, 2, 1, 1, 1, 6, 1, 1, 1, 2}, /* 0 - ground state */

277

{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, /* 1 - exit state (or sign to the right of the syllable) */

278

{-1, -1, -1, -1, 3, 4, 5, 6, 16, 17, 1, -1}, /* 2 - Base consonant */

279

{-1, -1, -1, -1, -1, 4, -1, -1, 16, -1, -1, -1}, /* 3 - First ZWNJ before a register shifter It can only be followed by a shifter or a vowel */

280

{-1, -1, -1, -1, 15, -1, -1, 6, 16, 17, 1, 14}, /* 4 - First register shifter */

281

{-1, -1, -1, -1, -1, -1, -1, -1, 20, -1, 1, -1}, /* 5 - Robat */

282

{-1, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, -1}, /* 6 - First Coeng */

283

{-1, -1, -1, -1, 12, 13, -1, 10, 16, 17, 1, 14}, /* 7 - First consonant of type 1 after coeng */

284

{-1, -1, -1, -1, 12, 13, -1, -1, 16, 17, 1, 14}, /* 8 - First consonant of type 2 after coeng */

285

{-1, -1, -1, -1, 12, 13, -1, 10, 16, 17, 1, 14}, /* 9 - First consonant or type 3 after ceong */

286

{-1, 11, 11, 11, -1, -1, -1, -1, -1, -1, -1, -1}, /* 10 - Second Coeng (no register shifter before) */

287

{-1, -1, -1, -1, 15, -1, -1, -1, 16, 17, 1, 14}, /* 11 - Second coeng consonant (or ind. vowel) no register shifter before */

288

{-1, -1, -1, -1, -1, 13, -1, -1, 16, -1, -1, -1}, /* 12 - Second ZWNJ before a register shifter */

289

{-1, -1, -1, -1, 15, -1, -1, -1, 16, 17, 1, 14}, /* 13 - Second register shifter */

290

{-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1}, /* 14 - ZWJ before vowel */

291

{-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1}, /* 15 - ZWNJ before vowel */

292

{-1, -1, -1, -1, -1, -1, -1, -1, -1, 17, 1, 18}, /* 16 - dependent vowel */

293

{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 18}, /* 17 - sign above */

294

{-1, -1, -1, -1, -1, -1, -1, 19, -1, -1, -1, -1}, /* 18 - ZWJ after vowel */

295

{-1, 1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1}, /* 19 - Third coeng */

296

{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1}, /* 20 - dependent vowel after a Robat */

297

};

298

299

300

/* #define KHMER_DEBUG */

301

#ifdef KHMER_DEBUG

302

#define KHDEBUG qDebug

303

#else

304

#define KHDEBUG if(0) printf

305

#endif

306

307

308

// Given an input string of characters and a location in which to start looking

309

// calculate, using the state table, which one is the last character of the syllable

310

// that starts in the starting position.

311

312

static int khmer_nextSyllableBoundary(const HB_UChar16 *s, int start, int end, HB_Bool *invalid)

313

{

314

const HB_UChar16 *uc = s + start;

315

int state = 0;

316

int pos = start;

317

*invalid = FALSE;

318

319

while (pos < end) {

320

KhmerCharClass charClass = getKhmerCharClass(*uc);

321

if (pos == start) {

322

*invalid = (charClass > 0) && ! (charClass & CF_CONSONANT);

323

}

324

state = khmerStateTable[state][charClass & CF_CLASS_MASK];

325

326

KHDEBUG("state[%d]=%d class=%8lx (uc=%4x)", pos - start, state,

327

charClass, *uc );

328

329

if (state < 0) {

330

break;

331

}

332

++uc;

333

++pos;

334

}

335

return pos;

336

}

337

338

#ifndef NO_OPENTYPE

339

static const HB_OpenTypeFeature khmer_features[] = {

340

{ HB_MAKE_TAG( 'p', 'r', 'e', 'f' ), PreFormProperty },

341

{ HB_MAKE_TAG( 'b', 'l', 'w', 'f' ), BelowFormProperty },

342

{ HB_MAKE_TAG( 'a', 'b', 'v', 'f' ), AboveFormProperty },

343

{ HB_MAKE_TAG( 'p', 's', 't', 'f' ), PostFormProperty },

344

{ HB_MAKE_TAG( 'p', 'r', 'e', 's' ), PreSubstProperty },

345

{ HB_MAKE_TAG( 'b', 'l', 'w', 's' ), BelowSubstProperty },

346

{ HB_MAKE_TAG( 'a', 'b', 'v', 's' ), AboveSubstProperty },

347

{ HB_MAKE_TAG( 'p', 's', 't', 's' ), PostSubstProperty },

348

{ HB_MAKE_TAG( 'c', 'l', 'i', 'g' ), CligProperty },

349

{ 0, 0 }

350

};

351

#endif

352

353

354

static HB_Bool khmer_shape_syllable(HB_Bool openType, HB_ShaperItem *item)

355

{

356

/* KHDEBUG("syllable from %d len %d, str='%s'", item->from, item->length,

357

item->string->mid(item->from, item->length).toUtf8().data()); */

358

359

int len = 0;

360

int syllableEnd = item->item.pos + item->item.length;

361

unsigned short reordered[16];

362

unsigned char properties[16];

363

enum {

364

AboveForm = 0x01,

365

PreForm = 0x02,

366

PostForm = 0x04,

367

BelowForm = 0x08

368

};

369

#ifndef NO_OPENTYPE

370

const int availableGlyphs = item->num_glyphs;

371

#endif

372

int coengRo;

373

int i;

374

375

/* according to the specs this is the max length one can get

376

### the real value should be smaller */

377

assert(item->item.length < 13);

378

379

memset(properties, 0, 16*sizeof(unsigned char));

380

381

#ifdef KHMER_DEBUG

382

qDebug("original:");

383

for (int i = from; i < syllableEnd; i++) {

384

qDebug(" %d: %4x", i, string[i]);

385

}

386

#endif

387

388

389

// write a pre vowel or the pre part of a split vowel first

390

// and look out for coeng + ro. RO is the only vowel of type 2, and

391

// therefore the only one that requires saving space before the base.

392

393

coengRo = -1; /* There is no Coeng Ro, if found this value will change */

394

for (i = item->item.pos; i < syllableEnd; i += 1) {

395

KhmerCharClass charClass = getKhmerCharClass(item->string[i]);

396

397

/* if a split vowel, write the pre part. In Khmer the pre part

398

is the same for all split vowels, same glyph as pre vowel C_VOWEL_E */

399

if (charClass & CF_SPLIT_VOWEL) {

400

reordered[len] = C_VOWEL_E;

401

properties[len] = PreForm;

402

++len;

403

break; /* there can be only one vowel */

404

}

405

/* if a vowel with pos before write it out */

406

if (charClass & CF_POS_BEFORE) {

407

reordered[len] = item->string[i];

408

properties[len] = PreForm;

409

++len;

410

break; /* there can be only one vowel */

411

}

412

/* look for coeng + ro and remember position

413

works because coeng + ro is always in front of a vowel (if there is a vowel)

414

and because CC_CONSONANT2 is enough to identify it, as it is the only consonant

415

with this flag */

416

if ( (charClass & CF_COENG) && (i + 1 < syllableEnd) &&

417

( (getKhmerCharClass(item->string[i+1]) & CF_CLASS_MASK) == CC_CONSONANT2) ) {

418

coengRo = i;

419

}

420

}

421

422

/* write coeng + ro if found */

423

if (coengRo > -1) {

424

reordered[len] = C_COENG;

425

properties[len] = PreForm;

426

++len;

427

reordered[len] = C_RO;

428

properties[len] = PreForm;

429

++len;

430

}

431

432

433

shall we add a dotted circle?

434

If in the position in which the base should be (first char in the string) there is

435

a character that has the Dotted circle flag (a character that cannot be a base)

436

then write a dotted circle */

437

if (getKhmerCharClass(item->string[item->item.pos]) & CF_DOTTED_CIRCLE) {

438

reordered[len] = C_DOTTED_CIRCLE;

439

++len;

440

}

441

442

/* copy what is left to the output, skipping before vowels and

443

coeng Ro if they are present */

444

for (i = item->item.pos; i < syllableEnd; i += 1) {

445

HB_UChar16 uc = item->string[i];

446

KhmerCharClass charClass = getKhmerCharClass(uc);

447

448

/* skip a before vowel, it was already processed */

449

if (charClass & CF_POS_BEFORE) {

450

continue;

451

}

452

453

/* skip coeng + ro, it was already processed */

454

if (i == coengRo) {

455

i += 1;

456

continue;

457

}

458

459

switch (charClass & CF_POS_MASK)

460

{

461

case CF_POS_ABOVE :

462

reordered[len] = uc;

463

properties[len] = AboveForm;

464

++len;

465

break;

466

467

case CF_POS_AFTER :

468

reordered[len] = uc;

469

properties[len] = PostForm;

470

++len;

471

break;

472

473

case CF_POS_BELOW :

474

reordered[len] = uc;

475

properties[len] = BelowForm;

476

++len;

477

break;

478

479

default:

480

/* assign the correct flags to a coeng consonant

481

Consonants of type 3 are taged as Post forms and those type 1 as below forms */

482

if ( (charClass & CF_COENG) && i + 1 < syllableEnd ) {

483

unsigned char property = (getKhmerCharClass(item->string[i+1]) & CF_CLASS_MASK) == CC_CONSONANT3 ?

484

PostForm : BelowForm;

485

reordered[len] = uc;

486

properties[len] = property;

487

++len;

488

i += 1;

489

reordered[len] = item->string[i];

490

properties[len] = property;

491

++len;

492

break;

493

}

494

495

/* if a shifter is followed by an above vowel change the shifter to below form,

496

an above vowel can have two possible positions i + 1 or i + 3

497

(position i+1 corresponds to unicode 3, position i+3 to Unicode 4)

498

and there is an extra rule for C_VOWEL_AA + C_SIGN_NIKAHIT also for two

499

different positions, right after the shifter or after a vowel (Unicode 4) */

500

if ( (charClass & CF_SHIFTER) && (i + 1 < syllableEnd) ) {

501

if (getKhmerCharClass(item->string[i+1]) & CF_ABOVE_VOWEL ) {

502

reordered[len] = uc;

503

properties[len] = BelowForm;

504

++len;

505

break;

506

}

507

if (i + 2 < syllableEnd &&

508

(item->string[i+1] == C_VOWEL_AA) &&

509

(item->string[i+2] == C_SIGN_NIKAHIT) )

510

{

511

reordered[len] = uc;

512

properties[len] = BelowForm;

513

++len;

514

break;

515

}

516

if (i + 3 < syllableEnd && (getKhmerCharClass(item->string[i+3]) & CF_ABOVE_VOWEL) ) {

517

reordered[len] = uc;

518

properties[len] = BelowForm;

519

++len;

520

break;

521

}

522

if (i + 4 < syllableEnd &&

523

(item->string[i+3] == C_VOWEL_AA) &&

524

(item->string[i+4] == C_SIGN_NIKAHIT) )

525

{

526

reordered[len] = uc;

527

properties[len] = BelowForm;

528

++len;

529

break;

530

}

531

}

532

533

/* default - any other characters */

534

reordered[len] = uc;

535

++len;

536

break;

537

} /* switch */

538

} /* for */

539

540

if (!item->font->klass->convertStringToGlyphIndices(item->font,

541

reordered, len,

542

item->glyphs, &item->num_glyphs,

543

item->item.bidiLevel % 2))

544

return FALSE;

545

546

547

KHDEBUG("after shaping: len=%d", len);

548

for (i = 0; i < len; i++) {

549

item->attributes[i].mark = FALSE;

550

item->attributes[i].clusterStart = FALSE;

551

item->attributes[i].justification = 0;

552

item->attributes[i].zeroWidth = FALSE;

553

KHDEBUG(" %d: %4x property=%x", i, reordered[i], properties[i]);

554

}

555

556

/* now we have the syllable in the right order, and can start running it through open type. */

557

558

#ifndef NO_OPENTYPE

559

if (openType) {

560

hb_uint32 where[16];

561

for (i = 0; i < len; ++i) {

562

where[i] = ~(PreSubstProperty

563

| BelowSubstProperty

564

| AboveSubstProperty

565

| PostSubstProperty

566

| CligProperty

567

| PositioningProperties);

568

if (properties[i] == PreForm)

569

where[i] &= ~PreFormProperty;

570

else if (properties[i] == BelowForm)

571

where[i] &= ~BelowFormProperty;

572

else if (properties[i] == AboveForm)

573

where[i] &= ~AboveFormProperty;

574

else if (properties[i] == PostForm)

575

where[i] &= ~PostFormProperty;

576

}

577

578

HB_OpenTypeShape(item, where);

579

if (!HB_OpenTypePosition(item, availableGlyphs, /*doLogClusters*/FALSE))

580

return FALSE;

581

} else

582

#endif

583

{

584

KHDEBUG("Not using openType");

585

HB_HeuristicPosition(item);

586

}

587

588

item->attributes[0].clusterStart = TRUE;

589

return TRUE;

590

}

591

592

HB_Bool HB_KhmerShape(HB_ShaperItem *item)

593

{

594

HB_Bool openType = FALSE;

595

unsigned short *logClusters = item->log_clusters;

596

int i;

597

598

HB_ShaperItem syllable = *item;

599

int first_glyph = 0;

600

601

int sstart = item->item.pos;

602

int end = sstart + item->item.length;

603

604

assert(item->item.script == HB_Script_Khmer);

605

606

#ifndef NO_OPENTYPE

607

openType = HB_SelectScript(item, khmer_features);

608

#endif

609

610

KHDEBUG("khmer_shape: from %d length %d", item->item.pos, item->item.length);

611

while (sstart < end) {

612

HB_Bool invalid;

613

int send = khmer_nextSyllableBoundary(item->string, sstart, end, &invalid);

614

KHDEBUG("syllable from %d, length %d, invalid=%s", sstart, send-sstart,

615

invalid ? "TRUE" : "FALSE");

616

syllable.item.pos = sstart;

617

syllable.item.length = send-sstart;

618

syllable.glyphs = item->glyphs + first_glyph;

619

syllable.attributes = item->attributes + first_glyph;

620

syllable.offsets = item->offsets + first_glyph;

621

syllable.advances = item->advances + first_glyph;

622

syllable.num_glyphs = item->num_glyphs - first_glyph;

623

if (!khmer_shape_syllable(openType, &syllable)) {

624

KHDEBUG("syllable shaping failed, syllable requests %d glyphs", syllable.num_glyphs);

625

item->num_glyphs += syllable.num_glyphs;

626

return FALSE;

627

}

628

/* fix logcluster array */

629

KHDEBUG("syllable:");

630

for (i = first_glyph; i < first_glyph + (int)syllable.num_glyphs; ++i)

631

KHDEBUG(" %d -> glyph %x", i, item->glyphs[i]);

632

KHDEBUG(" logclusters:");

633

for (i = sstart; i < send; ++i) {

634

KHDEBUG(" %d -> glyph %d", i, first_glyph);

635

logClusters[i-item->item.pos] = first_glyph;

636

}

637

sstart = send;

638

first_glyph += syllable.num_glyphs;

639

}

640

item->num_glyphs = first_glyph;

641

return TRUE;

642

}

Older »