~dannf/ubuntu/saucy/screen/lp1213278-from-debian

« back to all changes in this revision

Viewing changes to encoding.c

Committer: Bazaar Package Importer
Author(s): Nathaniel McCallum
Date: 2004-09-03 15:15:33 UTC
Revision ID: james.westby@ubuntu.com-20040903151533-px02yqlrchs4fv2t

Tags: upstream-4.0.2

Import upstream version 4.0.2

files added:

COPYING

ChangeLog

INSTALL

Makefile

Makefile.in

NEWS

NEWS.3.5

NEWS.3.6

NEWS.3.7

NEWS.3.9

README

TODO

acls.c

acls.h

ansi.c

ansi.h

attacher.c

braille.c

braille.h

braille_tsi.c

comm.c

comm.h.dist

comm.sh

config.h.in

configure

configure.in

display.c

display.h

doc/FAQ

doc/Makefile.in

doc/README.DOTSCREEN

doc/fdpat.ps

doc/install.sh

doc/make.help

doc/screen.1

doc/screen.info

doc/screen.info-1

doc/screen.info-2

doc/screen.info-3

doc/screen.info-4

doc/screen.info-5

doc/screen.texinfo

doc/window_to_display.ps

encoding.c

etc/ccdefs

etc/completer.zsh

etc/countmail

etc/etcscreenrc

etc/gr-braille.tbl

etc/gs-braille.tbl

etc/mkinstalldirs

etc/newsyntax

etc/newsyntax38

etc/screenrc

etc/toolcheck

etc/us-braille.tbl

extern.h

fileio.c

help.c

image.h

input.c

install.sh

kmapdef.c.dist

layer.c

layer.h

loadav.c

logfile.c

logfile.h

mark.c

mark.h

misc.c

nethack.c

os.h

osdef.h.in

osdef.sh

patchlevel.h

process.c

pty.c

putenv.c

resize.c

sched.c

sched.h

screen.c

screen.h

search.c

socket.c

teln.c

term.c

term.h.dist

term.sh

termcap.c

terminfo

terminfo/8bits

terminfo/README

terminfo/checktc.c

terminfo/screencap

terminfo/screeninfo.src

terminfo/test.txt

terminfo/tetris.c

tty.c.dist

tty.sh

utf8encodings

utf8encodings/01

utf8encodings/02

utf8encodings/03

utf8encodings/04

utf8encodings/18

utf8encodings/19

utf8encodings/a1

utf8encodings/bf

utf8encodings/c2

utf8encodings/c3

utf8encodings/c4

utf8encodings/c6

utf8encodings/c7

utf8encodings/c8

utf8encodings/cc

utf8encodings/cd

utf8encodings/d6

utmp.c

window.c

window.h

Show diffs side-by-side

added added

removed removed

encoding.c

* Juergen Weigert (jnweiger@immd4.informatik.uni-erlangen.de)

* Michael Schroeder (mlschroe@immd4.informatik.uni-erlangen.de)

* This program is free software; you can redistribute it and/or modify

* it under the terms of the GNU General Public License as published by

* the Free Software Foundation; either version 2, or (at your option)

* any later version.

* This program is distributed in the hope that it will be useful,

* but WITHOUT ANY WARRANTY; without even the implied warranty of

* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

* GNU General Public License for more details.

* You should have received a copy of the GNU General Public License

* along with this program (see the file COPYING); if not, write to the

* Free Software Foundation, Inc.,

* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA

****************************************************************

#include <sys/types.h>

#include "config.h"

#include "screen.h"

#include "extern.h"

#ifdef ENCODINGS

extern unsigned char *null;

extern struct display *display, *displays;

extern struct layer *flayer;

extern char *screenencodings;

static int encmatch __P((char *, char *));

# ifdef UTF8

static int recode_char __P((int, int, int));

static int recode_char_to_encoding __P((int, int));

static void comb_tofront __P((int, int));

# ifdef DW_CHARS

static int recode_char_dw __P((int, int *, int, int));

static int recode_char_dw_to_encoding __P((int, int *, int));

# endif

struct encoding {

char *name;

char *charsets;

int deffont;

int usegr;

int noc1;

char *fontlist;

};

/* big5 font: ^X */

/* KOI8-R font: 96 ! */

/* CP1251 font: 96 ? */

struct encoding encodings[] = {

{ "C", 0, 0, 0, 0, 0 },

{ "eucJP", "B\002I\00401", 0, 1, 0, "\002\004I" },

{ "SJIS", "BIBB01", 0, 1, 1, "\002I" },

{ "eucKR", "B\003BB01", 0, 1, 0, "\003" },

{ "eucCN", "B\001BB01", 0, 1, 0, "\001" },

{ "Big5", "B\030BB01", 0, 1, 0, "\030" },

{ "KOI8-R", 0, 0x80|'!', 0, 1, 0 },

{ "CP1251", 0, 0x80|'?', 0, 1, 0 },

{ "UTF-8", 0, -1, 0, 0, 0 },

{ "ISO8859-2", 0, 0x80|'B', 0, 0, 0 },

{ "ISO8859-3", 0, 0x80|'C', 0, 0, 0 },

{ "ISO8859-4", 0, 0x80|'D', 0, 0, 0 },

{ "ISO8859-5", 0, 0x80|'L', 0, 0, 0 },

{ "ISO8859-6", 0, 0x80|'G', 0, 0, 0 },

{ "ISO8859-7", 0, 0x80|'F', 0, 0, 0 },

{ "ISO8859-8", 0, 0x80|'H', 0, 0, 0 },

{ "ISO8859-9", 0, 0x80|'M', 0, 0, 0 },

{ "ISO8859-10", 0, 0x80|'V', 0, 0, 0 },

{ "ISO8859-15", 0, 0x80|'b', 0, 0, 0 },

{ "jis", 0, 0, 0, 0, "\002\004I" },

{ "GBK", "B\031BB01", 0x80|'b', 1, 1, "\031" }

};

#ifdef UTF8

static unsigned short builtin_tabs[][2] = {

{ 0x30, 0 }, /* 0: special graphics (line drawing) */

{ 0x005f, 0x25AE },

{ 0x0060, 0x25C6 },

{ 0x0061, 0x2592 },

{ 0x0062, 0x2409 },

{ 0x0063, 0x240C },

{ 0x0064, 0x240D },

{ 0x0065, 0x240A },

{ 0x0066, 0x00B0 },

{ 0x0067, 0x00B1 },

{ 0x0068, 0x2424 },

100

{ 0x0069, 0x240B },

101

{ 0x006a, 0x2518 },

102

{ 0x006b, 0x2510 },

103

{ 0x006c, 0x250C },

104

{ 0x006d, 0x2514 },

105

{ 0x006e, 0x253C },

106

{ 0x006f, 0x23BA },

107

{ 0x0070, 0x23BB },

108

{ 0x0071, 0x2500 },

109

{ 0x0072, 0x23BC },

110

{ 0x0073, 0x23BD },

111

{ 0x0074, 0x251C },

112

{ 0x0075, 0x2524 },

113

{ 0x0076, 0x2534 },

114

{ 0x0077, 0x252C },

115

{ 0x0078, 0x2502 },

116

{ 0x0079, 0x2264 },

117

{ 0x007a, 0x2265 },

118

{ 0x007b, 0x03C0 },

119

{ 0x007c, 0x2260 },

120

{ 0x007d, 0x00A3 },

121

{ 0x007e, 0x00B7 },

122

{ 0, 0},

123

124

{ 0x34, 0 }, /* 4: Dutch */

125

{ 0x0023, 0x00a3 },

126

{ 0x0040, 0x00be },

127

{ 0x005b, 0x00ff },

128

{ 0x005c, 0x00bd },

129

{ 0x005d, 0x007c },

130

{ 0x007b, 0x00a8 },

131

{ 0x007c, 0x0066 },

132

{ 0x007d, 0x00bc },

133

{ 0x007e, 0x00b4 },

134

{ 0, 0},

135

136

{ 0x35, 0 }, /* 5: Finnish */

137

{ 0x005b, 0x00c4 },

138

{ 0x005c, 0x00d6 },

139

{ 0x005d, 0x00c5 },

140

{ 0x005e, 0x00dc },

141

{ 0x0060, 0x00e9 },

142

{ 0x007b, 0x00e4 },

143

{ 0x007c, 0x00f6 },

144

{ 0x007d, 0x00e5 },

145

{ 0x007e, 0x00fc },

146

{ 0, 0},

147

148

{ 0x36, 0 }, /* 6: Norwegian/Danish */

149

{ 0x0040, 0x00c4 },

150

{ 0x005b, 0x00c6 },

151

{ 0x005c, 0x00d8 },

152

{ 0x005d, 0x00c5 },

153

{ 0x005e, 0x00dc },

154

{ 0x0060, 0x00e4 },

155

{ 0x007b, 0x00e6 },

156

{ 0x007c, 0x00f8 },

157

{ 0x007d, 0x00e5 },

158

{ 0x007e, 0x00fc },

159

{ 0, 0},

160

161

{ 0x37, 0 }, /* 7: Swedish */

162

{ 0x0040, 0x00c9 },

163

{ 0x005b, 0x00c4 },

164

{ 0x005c, 0x00d6 },

165

{ 0x005d, 0x00c5 },

166

{ 0x005e, 0x00dc },

167

{ 0x0060, 0x00e9 },

168

{ 0x007b, 0x00e4 },

169

{ 0x007c, 0x00f6 },

170

{ 0x007d, 0x00e5 },

171

{ 0x007e, 0x00fc },

172

{ 0, 0},

173

174

{ 0x3d, 0}, /* =: Swiss */

175

{ 0x0023, 0x00f9 },

176

{ 0x0040, 0x00e0 },

177

{ 0x005b, 0x00e9 },

178

{ 0x005c, 0x00e7 },

179

{ 0x005d, 0x00ea },

180

{ 0x005e, 0x00ee },

181

{ 0x005f, 0x00e8 },

182

{ 0x0060, 0x00f4 },

183

{ 0x007b, 0x00e4 },

184

{ 0x007c, 0x00f6 },

185

{ 0x007d, 0x00fc },

186

{ 0x007e, 0x00fb },

187

{ 0, 0},

188

189

{ 0x41, 0}, /* A: UK */

190

{ 0x0023, 0x00a3 },

191

{ 0, 0},

192

193

{ 0x4b, 0}, /* K: German */

194

{ 0x0040, 0x00a7 },

195

{ 0x005b, 0x00c4 },

196

{ 0x005c, 0x00d6 },

197

{ 0x005d, 0x00dc },

198

{ 0x007b, 0x00e4 },

199

{ 0x007c, 0x00f6 },

200

{ 0x007d, 0x00fc },

201

{ 0x007e, 0x00df },

202

{ 0, 0},

203

204

{ 0x51, 0}, /* Q: French Canadian */

205

{ 0x0040, 0x00e0 },

206

{ 0x005b, 0x00e2 },

207

{ 0x005c, 0x00e7 },

208

{ 0x005d, 0x00ea },

209

{ 0x005e, 0x00ee },

210

{ 0x0060, 0x00f4 },

211

{ 0x007b, 0x00e9 },

212

{ 0x007c, 0x00f9 },

213

{ 0x007d, 0x00e8 },

214

{ 0x007e, 0x00fb },

215

{ 0, 0},

216

217

{ 0x52, 0}, /* R: French */

218

{ 0x0023, 0x00a3 },

219

{ 0x0040, 0x00e0 },

220

{ 0x005b, 0x00b0 },

221

{ 0x005c, 0x00e7 },

222

{ 0x005d, 0x00a7 },

223

{ 0x007b, 0x00e9 },

224

{ 0x007c, 0x00f9 },

225

{ 0x007d, 0x00e8 },

226

{ 0x007e, 0x00a8 },

227

{ 0, 0},

228

229

{ 0x59, 0}, /* Y: Italian */

230

{ 0x0023, 0x00a3 },

231

{ 0x0040, 0x00a7 },

232

{ 0x005b, 0x00b0 },

233

{ 0x005c, 0x00e7 },

234

{ 0x005d, 0x00e9 },

235

{ 0x0060, 0x00f9 },

236

{ 0x007b, 0x00e0 },

237

{ 0x007c, 0x00f2 },

238

{ 0x007d, 0x00e8 },

239

{ 0x007e, 0x00ec },

240

{ 0, 0},

241

242

{ 0x5a, 0}, /* Z: Spanish */

243

{ 0x0023, 0x00a3 },

244

{ 0x0040, 0x00a7 },

245

{ 0x005b, 0x00a1 },

246

{ 0x005c, 0x00d1 },

247

{ 0x005d, 0x00bf },

248

{ 0x007b, 0x00b0 },

249

{ 0x007c, 0x00f1 },

250

{ 0x007d, 0x00e7 },

251

{ 0, 0},

252

253

{ 0xe2, 0}, /* 96-b: ISO-8859-15 */

254

{ 0x00a4, 0x20ac },

255

{ 0x00a6, 0x0160 },

256

{ 0x00a8, 0x0161 },

257

{ 0x00b4, 0x017D },

258

{ 0x00b8, 0x017E },

259

{ 0x00bc, 0x0152 },

260

{ 0x00bd, 0x0153 },

261

{ 0x00be, 0x0178 },

262

{ 0, 0},

263

264

{ 0x4a, 0}, /* J: JIS 0201 Roman */

265

{ 0x005c, 0x00a5 },

266

{ 0x007e, 0x203e },

267

{ 0, 0},

268

269

{ 0x49, 0}, /* I: halfwidth katakana */

270

{ 0x0021, 0xff61 },

271

{ 0x005f|0x8000, 0xff9f },

272

{ 0, 0},

273

274

{ 0, 0}

275

};

276

277

struct recodetab

278

{

279

unsigned short (*tab)[2];

280

int flags;

281

};

282

283

#define RECODETAB_ALLOCED 1

284

#define RECODETAB_BUILTIN 2

285

#define RECODETAB_TRIED 4

286

287

static struct recodetab recodetabs[256];

288

289

void

290

InitBuiltinTabs()

291

{

292

unsigned short (*p)[2];

293

for (p = builtin_tabs; (*p)[0]; p++)

294

{

295

recodetabs[(*p)[0]].flags = RECODETAB_BUILTIN;

296

recodetabs[(*p)[0]].tab = p + 1;

297

p++;

298

while((*p)[0])

299

p++;

300

}

301

}

302

303

static int

304

recode_char(c, to_utf, font)

305

int c, to_utf, font;

306

{

307

int f;

308

unsigned short (*p)[2];

309

310

if (to_utf)

311

{

312

if (c < 256)

313

return c;

314

f = (c >> 8) & 0xff;

315

c &= 0xff;

316

/* map aliases to keep the table small */

317

switch (f)

318

{

319

case 'C':

320

f ^= ('C' ^ '5');

321

break;

322

case 'E':

323

f ^= ('E' ^ '6');

324

break;

325

case 'H':

326

f ^= ('H' ^ '7');

327

break;

328

default:

329

break;

330

}

331

p = recodetabs[f].tab;

332

if (p == 0 && recodetabs[f].flags == 0)

333

{

334

LoadFontTranslation(f, 0);

335

p = recodetabs[f].tab;

336

}

337

if (p)

338

for (; (*p)[0]; p++)

339

{

340

if ((p[0][0] & 0x8000) && (c <= (p[0][0] & 0x7fff)) && c >= p[-1][0])

341

return c - p[-1][0] + p[-1][1];

342

if ((*p)[0] == c)

343

return (*p)[1];

344

}

345

return c & 0xff; /* map to latin1 */

346

}

347

if (font == -1)

348

{

349

if (c < 256)

350

return c; /* latin1 */

351

for (font = 32; font < 128; font++)

352

{

353

p = recodetabs[font].tab;

354

if (p)

355

for (; (*p)[1]; p++)

356

{

357

if ((p[0][0] & 0x8000) && c <= p[0][1] && c >= p[-1][1])

358

return (c - p[-1][1] + p[-1][0]) | (font << 8);

359

if ((*p)[1] == c)

360

return (*p)[0] | (font << 8);

361

}

362

}

363

return '?';

364

}

365

if (c < 128 && (font & 128) != 0)

366

return c;

367

if (font >= 32)

368

{

369

p = recodetabs[font].tab;

370

if (p == 0 && recodetabs[font].flags == 0)

371

{

372

LoadFontTranslation(font, 0);

373

p = recodetabs[font].tab;

374

}

375

if (p)

376

for (; (*p)[1]; p++)

377

{

378

if ((p[0][0] & 0x8000) && c <= p[0][1] && c >= p[-1][1])

379

return (c - p[-1][1] + p[-1][0]) | (font & 128 ? 0 : font << 8);

380

if ((*p)[1] == c)

381

return (*p)[0] | (font & 128 ? 0 : font << 8);

382

}

383

}

384

return -1;

385

}

386

387

388

#ifdef DW_CHARS

389

static int

390

recode_char_dw(c, c2p, to_utf, font)

391

int c, *c2p, to_utf, font;

392

{

393

int f;

394

unsigned short (*p)[2];

395

396

if (to_utf)

397

{

398

f = (c >> 8) & 0xff;

399

c = (c & 255) << 8 | (*c2p & 255);

400

*c2p = 0xffff;

401

p = recodetabs[f].tab;

402

if (p == 0 && recodetabs[f].flags == 0)

403

{

404

LoadFontTranslation(f, 0);

405

p = recodetabs[f].tab;

406

}

407

if (p)

408

for (; (*p)[0]; p++)

409

if ((*p)[0] == c)

410

{

411

#ifdef DW_CHARS

412

if (!utf8_isdouble((*p)[1]))

413

*c2p = ' ';

414

#endif

415

return (*p)[1];

416

}

417

return UCS_REPL_DW;

418

}

419

if (font == -1)

420

{

421

for (font = 0; font < 030; font++)

422

{

423

p = recodetabs[font].tab;

424

if (p)

425

for (; (*p)[1]; p++)

426

if ((*p)[1] == c)

427

{

428

*c2p = ((*p)[0] & 255) | font << 8 | 0x8000;

429

return ((*p)[0] >> 8) | font << 8;

430

}

431

}

432

*c2p = '?';

433

return '?';

434

}

435

if (font < 32)

436

{

437

p = recodetabs[font].tab;

438

if (p == 0 && recodetabs[font].flags == 0)

439

{

440

LoadFontTranslation(font, 0);

441

p = recodetabs[font].tab;

442

}

443

if (p)

444

for (; (*p)[1]; p++)

445

if ((*p)[1] == c)

446

{

447

*c2p = ((*p)[0] & 255) | font << 8 | 0x8000;

448

return ((*p)[0] >> 8) | font << 8;

449

}

450

}

451

return -1;

452

}

453

#endif

454

455

static int

456

recode_char_to_encoding(c, encoding)

457

int c, encoding;

458

{

459

char *fp;

460

int x;

461

462

if (encoding == UTF8)

463

return recode_char(c, 1, -1);

464

if ((fp = encodings[encoding].fontlist) != 0)

465

while(*fp)

466

if ((x = recode_char(c, 0, (unsigned char)*fp++)) != -1)

467

return x;

468

if (encodings[encoding].deffont)

469

if ((x = recode_char(c, 0, encodings[encoding].deffont)) != -1)

470

return x;

471

return recode_char(c, 0, -1);

472

}

473

474

#ifdef DW_CHARS

475

static int

476

recode_char_dw_to_encoding(c, c2p, encoding)

477

int c, *c2p, encoding;

478

{

479

char *fp;

480

int x;

481

482

if (encoding == UTF8)

483

return recode_char_dw(c, c2p, 1, -1);

484

if ((fp = encodings[encoding].fontlist) != 0)

485

while(*fp)

486

if ((x = recode_char_dw(c, c2p, 0, (unsigned char)*fp++)) != -1)

487

return x;

488

if (encodings[encoding].deffont)

489

if ((x = recode_char_dw(c, c2p, 0, encodings[encoding].deffont)) != -1)

490

return x;

491

return recode_char_dw(c, c2p, 0, -1);

492

}

493

#endif

494

495

496

struct mchar *

497

recode_mchar(mc, from, to)

498

struct mchar *mc;

499

int from, to;

500

{

501

static struct mchar rmc;

502

int c;

503

504

debug3("recode_mchar %02x from %d to %d\n", mc->image, from, to);

505

if (from == to || (from != UTF8 && to != UTF8))

506

return mc;

507

rmc = *mc;

508

if (rmc.font == 0 && from != UTF8)

509

rmc.font = encodings[from].deffont;

510

if (rmc.font == 0) /* latin1 is the same in unicode */

511

return mc;

512

c = rmc.image | (rmc.font << 8);

513

#ifdef DW_CHARS

514

if (rmc.mbcs)

515

{

516

int c2 = rmc.mbcs;

517

c = recode_char_dw_to_encoding(c, &c2, to);

518

rmc.mbcs = c2;

519

}

520

else

521

#endif

522

c = recode_char_to_encoding(c, to);

523

rmc.image = c & 255;

524

rmc.font = c >> 8 & 255;

525

return &rmc;

526

}

527

528

struct mline *

529

recode_mline(ml, w, from, to)

530

struct mline *ml;

531

int w;

532

int from, to;

533

{

534

static int maxlen;

535

static int last;

536

static struct mline rml[2], *rl;

537

int i, c;

538

539

if (from == to || (from != UTF8 && to != UTF8) || w == 0)

540

return ml;

541

if (ml->font == null && encodings[from].deffont == 0)

542

return ml;

543

if (w > maxlen)

544

{

545

for (i = 0; i < 2; i++)

546

{

547

if (rml[i].image == 0)

548

rml[i].image = malloc(w);

549

else

550

rml[i].image = realloc(rml[i].image, w);

551

if (rml[i].font == 0)

552

rml[i].font = malloc(w);

553

else

554

rml[i].font = realloc(rml[i].font, w);

555

if (rml[i].image == 0 || rml[i].font == 0)

556

{

557

maxlen = 0;

558

return ml; /* sorry */

559

}

560

}

561

maxlen = w;

562

}

563

564

debug("recode_mline: from\n");

565

for (i = 0; i < w; i++)

566

debug1("%c", "0123456789abcdef"[(ml->image[i] >> 4) & 15]);

567

debug("\n");

568

for (i = 0; i < w; i++)

569

debug1("%c", "0123456789abcdef"[(ml->image[i] ) & 15]);

570

debug("\n");

571

for (i = 0; i < w; i++)

572

debug1("%c", "0123456789abcdef"[(ml->font[i] >> 4) & 15]);

573

debug("\n");

574

for (i = 0; i < w; i++)

575

debug1("%c", "0123456789abcdef"[(ml->font[i] ) & 15]);

576

debug("\n");

577

578

rl = rml + last;

579

rl->attr = ml->attr;

580

#ifdef COLOR

581

rl->color = ml->color;

582

# ifdef COLORS256

583

rl->colorx = ml->colorx;

584

# endif

585

#endif

586

for (i = 0; i < w; i++)

587

{

588

c = ml->image[i] | (ml->font[i] << 8);

589

if (from != UTF8 && c < 256)

590

c |= encodings[from].deffont << 8;

591

#ifdef DW_CHARS

592

if ((from != UTF8 && (c & 0x1f00) != 0 && (c & 0xe000) == 0) || (from == UTF8 && utf8_isdouble(c)))

593

{

594

if (i + 1 == w)

595

c = '?';

596

else

597

{

598

int c2;

599

i++;

600

c2 = ml->image[i] | (ml->font[i] << 8);

601

c = recode_char_dw_to_encoding(c, &c2, to);

602

rl->font[i - 1] = c >> 8 & 255;

603

rl->image[i - 1] = c & 255;

604

c = c2;

605

}

606

}

607

else

608

#endif

609

c = recode_char_to_encoding(c, to);

610

rl->image[i] = c & 255;

611

rl->font[i] = c >> 8 & 255;

612

}

613

last ^= 1;

614

debug("recode_mline: to\n");

615

for (i = 0; i < w; i++)

616

debug1("%c", "0123456789abcdef"[(rl->image[i] >> 4) & 15]);

617

debug("\n");

618

for (i = 0; i < w; i++)

619

debug1("%c", "0123456789abcdef"[(rl->image[i] ) & 15]);

620

debug("\n");

621

for (i = 0; i < w; i++)

622

debug1("%c", "0123456789abcdef"[(rl->font[i] >> 4) & 15]);

623

debug("\n");

624

for (i = 0; i < w; i++)

625

debug1("%c", "0123456789abcdef"[(rl->font[i] ) & 15]);

626

debug("\n");

627

return rl;

628

}

629

630

struct combchar {

631

unsigned short c1;

632

unsigned short c2;

633

unsigned short next;

634

unsigned short prev;

635

};

636

struct combchar **combchars;

637

638

void

639

AddUtf8(c)

640

int c;

641

{

642

ASSERT(D_encoding == UTF8);

643

if (c >= 0xd800 && c < 0xe000 && combchars && combchars[c - 0xd800])

644

{

645

AddUtf8(combchars[c - 0xd800]->c1);

646

c = combchars[c - 0xd800]->c2;

647

}

648

if (c >= 0x800)

649

{

650

AddChar((c & 0xf000) >> 12 | 0xe0);

651

c = (c & 0x0fff) | 0x1000;

652

}

653

if (c >= 0x80)

654

{

655

AddChar((c & 0x1fc0) >> 6 ^ 0xc0);

656

c = (c & 0x3f) | 0x80;

657

}

658

AddChar(c);

659

}

660

661

int

662

ToUtf8_comb(p, c)

663

char *p;

664

int c;

665

{

666

int l;

667

668

if (c >= 0xd800 && c < 0xe000 && combchars && combchars[c - 0xd800])

669

{

670

l = ToUtf8_comb(p, combchars[c - 0xd800]->c1);

671

return l + ToUtf8(p ? p + l : 0, combchars[c - 0xd800]->c2);

672

}

673

return ToUtf8(p, c);

674

}

675

676

int

677

ToUtf8(p, c)

678

char *p;

679

int c;

680

{

681

int l = 1;

682

if (c >= 0x800)

683

{

684

if (p)

685

*p++ = (c & 0xf000) >> 12 | 0xe0;

686

l++;

687

c = (c & 0x0fff) | 0x1000;

688

}

689

if (c >= 0x80)

690

{

691

if (p)

692

*p++ = (c & 0x1fc0) >> 6 ^ 0xc0;

693

l++;

694

c = (c & 0x3f) | 0x80;

695

}

696

if (p)

697

*p++ = c;

698

return l;

699

}

700

701

702

* returns:

703

* -1: need more bytes, sequence not finished

704

* -2: corrupt sequence found, redo last char

705

* >= 0: decoded character

706

707

int

708

FromUtf8(c, utf8charp)

709

int c, *utf8charp;

710

{

711

int utf8char = *utf8charp;

712

if (utf8char)

713

{

714

if ((c & 0xc0) != 0x80)

715

{

716

*utf8charp = 0;

717

return -2; /* corrupt sequence! */

718

}

719

else

720

c = (c & 0x3f) | (utf8char << 6);

721

if (!(utf8char & 0x40000000))

722

{

723

/* check for overlong sequences */

724

if ((c & 0x820823e0) == 0x80000000)

725

c = 0xfdffffff;

726

else if ((c & 0x020821f0) == 0x02000000)

727

c = 0xfff7ffff;

728

else if ((c & 0x000820f8) == 0x00080000)

729

c = 0xffffd000;

730

else if ((c & 0x0000207c) == 0x00002000)

731

c = 0xffffff70;

732

}

733

}

734

else

735

{

736

/* new sequence */

737

if (c >= 0xfe)

738

c = UCS_REPL;

739

else if (c >= 0xfc)

740

c = (c & 0x01) | 0xbffffffc; /* 5 bytes to follow */

741

else if (c >= 0xf8)

742

c = (c & 0x03) | 0xbfffff00; /* 4 */

743

else if (c >= 0xf0)

744

c = (c & 0x07) | 0xbfffc000; /* 3 */

745

else if (c >= 0xe0)

746

c = (c & 0x0f) | 0xbff00000; /* 2 */

747

else if (c >= 0xc2)

748

c = (c & 0x1f) | 0xfc000000; /* 1 */

749

else if (c >= 0xc0)

750

c = 0xfdffffff; /* overlong */

751

else if (c >= 0x80)

752

c = UCS_REPL;

753

}

754

*utf8charp = utf8char = (c & 0x80000000) ? c : 0;

755

if (utf8char)

756

return -1;

757

if (c & 0xffff0000)

758

c = UCS_REPL; /* sorry, only know 16bit Unicode */

759

if (c >= 0xd800 && (c <= 0xdfff || c == 0xfffe || c == 0xffff))

760

c = UCS_REPL; /* illegal code */

761

return c;

762

}

763

764

765

void

766

WinSwitchEncoding(p, encoding)

767

struct win *p;

768

int encoding;

769

{

770

int i, j, c;

771

struct mline *ml;

772

struct display *d;

773

struct canvas *cv;

774

struct layer *oldflayer;

775

776

if ((p->w_encoding == UTF8) == (encoding == UTF8))

777

{

778

p->w_encoding = encoding;

779

return;

780

}

781

oldflayer = flayer;

782

for (d = displays; d; d = d->d_next)

783

for (cv = d->d_cvlist; cv; cv = cv->c_next)

784

if (p == Layer2Window(cv->c_layer))

785

{

786

flayer = cv->c_layer;

787

while(flayer->l_next)

788

{

789

if (oldflayer == flayer)

790

oldflayer = flayer->l_next;

791

ExitOverlayPage();

792

}

793

}

794

flayer = oldflayer;

795

for (j = 0; j < p->w_height + p->w_histheight; j++)

796

{

797

#ifdef COPY_PASTE

798

ml = j < p->w_height ? &p->w_mlines[j] : &p->w_hlines[j - p->w_height];

799

#else

800

ml = &p->w_mlines[j];

801

#endif

802

if (ml->font == null && encodings[p->w_encoding].deffont == 0)

803

continue;

804

for (i = 0; i < p->w_width; i++)

805

{

806

c = ml->image[i] | (ml->font[i] << 8);

807

if (p->w_encoding != UTF8 && c < 256)

808

c |= encodings[p->w_encoding].deffont << 8;

809

if (c < 256)

810

continue;

811

if (ml->font == null)

812

{

813

if ((ml->font = (unsigned char *)malloc(p->w_width + 1)) == 0)

814

{

815

ml->font = null;

816

break;

817

}

818

bzero(ml->font, p->w_width + 1);

819

}

820

#ifdef DW_CHARS

821

if ((p->w_encoding != UTF8 && (c & 0x1f00) != 0 && (c & 0xe000) == 0) || (p->w_encoding == UTF8 && utf8_isdouble(c)))

822

{

823

if (i + 1 == p->w_width)

824

c = '?';

825

else

826

{

827

int c2;

828

i++;

829

c2 = ml->image[i] | (ml->font[i] << 8);

830

c = recode_char_dw_to_encoding(c, &c2, encoding);

831

ml->font[i - 1] = c >> 8 & 255;

832

ml->image[i - 1] = c & 255;

833

c = c2;

834

}

835

}

836

else

837

#endif

838

c = recode_char_to_encoding(c, encoding);

839

ml->image[i] = c & 255;

840

ml->font[i] = c >> 8 & 255;

841

}

842

}

843

p->w_encoding = encoding;

844

return;

845

}

846

847

#ifdef DW_CHARS

848

int

849

utf8_isdouble(c)

850

int c;

851

{

852

return

853

(c >= 0x1100 &&

854

(c <= 0x115f || /* Hangul Jamo init. consonants */

855

(c >= 0x2e80 && c <= 0xa4cf && (c & ~0x0011) != 0x300a &&

856

c != 0x303f) || /* CJK ... Yi */

857

(c >= 0xac00 && c <= 0xd7a3) || /* Hangul Syllables */

858

(c >= 0xdf00 && c <= 0xdfff) || /* dw combining sequence */

859

(c >= 0xf900 && c <= 0xfaff) || /* CJK Compatibility Ideographs */

860

(c >= 0xfe30 && c <= 0xfe6f) || /* CJK Compatibility Forms */

861

(c >= 0xff00 && c <= 0xff5f) || /* Fullwidth Forms */

862

(c >= 0xffe0 && c <= 0xffe6) ||

863

(c >= 0x20000 && c <= 0x2ffff)));

864

}

865

#endif

866

867

int

868

utf8_iscomb(c)

869

int c;

870

{

871

/* taken from Markus Kuhn's wcwidth */

872

static struct {

873

unsigned short first;

874

unsigned short last;

875

} combining[] = {

876

{ 0x0300, 0x034F }, { 0x0360, 0x036F }, { 0x0483, 0x0486 },

877

{ 0x0488, 0x0489 }, { 0x0591, 0x05A1 }, { 0x05A3, 0x05B9 },

878

{ 0x05BB, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 },

879

{ 0x05C4, 0x05C4 }, { 0x064B, 0x0655 }, { 0x0670, 0x0670 },

880

{ 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED },

881

{ 0x070F, 0x070F }, { 0x0711, 0x0711 }, { 0x0730, 0x074A },

882

{ 0x07A6, 0x07B0 }, { 0x0901, 0x0902 }, { 0x093C, 0x093C },

883

{ 0x0941, 0x0948 }, { 0x094D, 0x094D }, { 0x0951, 0x0954 },

884

{ 0x0962, 0x0963 }, { 0x0981, 0x0981 }, { 0x09BC, 0x09BC },

885

{ 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD }, { 0x09E2, 0x09E3 },

886

{ 0x0A02, 0x0A02 }, { 0x0A3C, 0x0A3C }, { 0x0A41, 0x0A42 },

887

{ 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D }, { 0x0A70, 0x0A71 },

888

{ 0x0A81, 0x0A82 }, { 0x0ABC, 0x0ABC }, { 0x0AC1, 0x0AC5 },

889

{ 0x0AC7, 0x0AC8 }, { 0x0ACD, 0x0ACD }, { 0x0B01, 0x0B01 },

890

{ 0x0B3C, 0x0B3C }, { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 },

891

{ 0x0B4D, 0x0B4D }, { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 },

892

{ 0x0BC0, 0x0BC0 }, { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 },

893

{ 0x0C46, 0x0C48 }, { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 },

894

{ 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD },

895

{ 0x0D41, 0x0D43 }, { 0x0D4D, 0x0D4D }, { 0x0DCA, 0x0DCA },

896

{ 0x0DD2, 0x0DD4 }, { 0x0DD6, 0x0DD6 }, { 0x0E31, 0x0E31 },

897

{ 0x0E34, 0x0E3A }, { 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 },

898

{ 0x0EB4, 0x0EB9 }, { 0x0EBB, 0x0EBC }, { 0x0EC8, 0x0ECD },

899

{ 0x0F18, 0x0F19 }, { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 },

900

{ 0x0F39, 0x0F39 }, { 0x0F71, 0x0F7E }, { 0x0F80, 0x0F84 },

901

{ 0x0F86, 0x0F87 }, { 0x0F90, 0x0F97 }, { 0x0F99, 0x0FBC },

902

{ 0x0FC6, 0x0FC6 }, { 0x102D, 0x1030 }, { 0x1032, 0x1032 },

903

{ 0x1036, 0x1037 }, { 0x1039, 0x1039 }, { 0x1058, 0x1059 },

904

{ 0x1160, 0x11FF }, { 0x1712, 0x1714 }, { 0x1732, 0x1734 },

905

{ 0x1752, 0x1753 }, { 0x1772, 0x1773 }, { 0x17B7, 0x17BD },

906

{ 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x180B, 0x180E },

907

{ 0x18A9, 0x18A9 }, { 0x200B, 0x200F }, { 0x202A, 0x202E },

908

{ 0x2060, 0x2063 }, { 0x206A, 0x206F }, { 0x20D0, 0x20EA },

909

{ 0x302A, 0x302F }, { 0x3099, 0x309A }, { 0xFB1E, 0xFB1E },

910

{ 0xFE00, 0xFE0F }, { 0xFE20, 0xFE23 }, { 0xFEFF, 0xFEFF },

911

{ 0xFFF9, 0xFFFB }

912

};

913

int mid, min = 0, max = sizeof(combining)/sizeof(*combining) - 1;

914

915

if (c < 0x0300 || c > 0xfffb)

916

return 0;

917

while (max >= min)

918

{

919

mid = (min + max) / 2;

920

if (c > combining[mid].last)

921

min = mid + 1;

922

else if (c < combining[mid].first)

923

max = mid - 1;

924

else

925

return 1;

926

}

927

return 0;

928

}

929

930

static void

931

comb_tofront(root, i)

932

int root, i;

933

{

934

for (;;)

935

{

936

debug1("bring to front: %x\n", i);

937

combchars[combchars[i]->prev]->next = combchars[i]->next;

938

combchars[combchars[i]->next]->prev = combchars[i]->prev;

939

combchars[i]->next = combchars[root]->next;

940

combchars[i]->prev = root;

941

combchars[combchars[root]->next]->prev = i;

942

combchars[root]->next = i;

943

i = combchars[i]->c1;

944

if (i < 0xd800 || i >= 0xe000)

945

return;

946

i -= 0xd800;

947

}

948

}

949

950

void

951

utf8_handle_comb(c, mc)

952

int c;

953

struct mchar *mc;

954

{

955

int root, i, c1;

956

int isdouble;

957

958

c1 = mc->image | (mc->font << 8);

959

isdouble = c1 >= 0x1100 && utf8_isdouble(c1);

960

if (!combchars)

961

{

962

combchars = (struct combchar **)malloc(sizeof(struct combchar *) * 0x802);

963

if (!combchars)

964

return;

965

bzero((char *)combchars, sizeof(struct combchar *) * 0x802);

966

combchars[0x800] = (struct combchar *)malloc(sizeof(struct combchar));

967

combchars[0x801] = (struct combchar *)malloc(sizeof(struct combchar));

968

if (!combchars[0x800] || !combchars[0x801])

969

{

970

if (combchars[0x800])

971

free(combchars[0x800]);

972

if (combchars[0x801])

973

free(combchars[0x801]);

974

free(combchars);

975

return;

976

}

977

combchars[0x800]->c1 = 0x000;

978

combchars[0x800]->c2 = 0x700;

979

combchars[0x800]->next = 0x800;

980

combchars[0x800]->prev = 0x800;

981

combchars[0x801]->c1 = 0x700;

982

combchars[0x801]->c2 = 0x800;

983

combchars[0x801]->next = 0x801;

984

combchars[0x801]->prev = 0x801;

985

}

986

root = isdouble ? 0x801 : 0x800;

987

for (i = combchars[root]->c1; i < combchars[root]->c2; i++)

988

{

989

if (!combchars[i])

990

break;

991

if (combchars[i]->c1 == c1 && combchars[i]->c2 == c)

992

break;

993

}

994

if (i == combchars[root]->c2)

995

{

996

/* full, recycle old entry */

997

if (c1 >= 0xd800 && c1 < 0xe000)

998

comb_tofront(root, c1);

999

i = combchars[root]->prev;

1000

/* FIXME: delete old char from all buffers */

1001

}

1002

else if (!combchars[i])

1003

{

1004

combchars[i] = (struct combchar *)malloc(sizeof(struct combchar));

1005

if (!combchars[i])

1006

return;

1007

combchars[i]->prev = i;

1008

combchars[i]->next = i;

1009

}

1010

combchars[i]->c1 = c1;

1011

combchars[i]->c2 = c;

1012

mc->image = i & 0xff;

1013

mc->font = (i >> 8) + 0xd8;

1014

debug3("combinig char %x %x -> %x\n", c1, c, i + 0xd800);

1015

comb_tofront(root, i);

1016

}

1017

1018

#else /* !UTF8 */

1019

1020

void

1021

WinSwitchEncoding(p, encoding)

1022

struct win *p;

1023

int encoding;

1024

{

1025

p->w_encoding = encoding;

1026

return;

1027

}

1028

1029

#endif /* UTF8 */

1030

1031

static int

1032

encmatch(s1, s2)

1033

char *s1;

1034

char *s2;

1035

{

1036

int c1, c2;

1037

1038

{

1039

c1 = (unsigned char)*s1;

1040

if (c1 >= 'A' && c1 <= 'Z')

1041

c1 += 'a' - 'A';

1042

if (!(c1 >= 'a' && c1 <= 'z') && !(c1 >= '0' && c1 <= '9'))

1043

{

1044

s1++;

1045

continue;

1046

}

1047

c2 = (unsigned char)*s2;

1048

if (c2 >= 'A' && c2 <= 'Z')

1049

c2 += 'a' - 'A';

1050

if (!(c2 >= 'a' && c2 <= 'z') && !(c2 >= '0' && c2 <= '9'))

1051

{

1052

s2++;

1053

continue;

1054

}

1055

if (c1 != c2)

1056

return 0;

1057

s1++;

1058

s2++;

1059

}

1060

while(c1);

1061

return 1;

1062

}

1063

1064

int

1065

FindEncoding(name)

1066

char *name;

1067

{

1068

int encoding;

1069

1070

debug1("FindEncoding %s\n", name);

1071

if (name == 0 || *name == 0)

1072

return 0;

1073

if (encmatch(name, "euc"))

1074

name = "eucJP";

1075

if (encmatch(name, "off") || encmatch(name, "iso8859-1"))

1076

return 0;

1077

#ifndef UTF8

1078

if (encmatch(name, "UTF-8"))

1079

return -1;

1080

#endif

1081

for (encoding = 0; encoding < (int)(sizeof(encodings)/sizeof(*encodings)); encoding++)

1082

if (encmatch(name, encodings[encoding].name))

1083

{

1084

#ifdef UTF8

1085

LoadFontTranslationsForEncoding(encoding);

1086

#endif

1087

return encoding;

1088

}

1089

return -1;

1090

}

1091

1092

char *

1093

EncodingName(encoding)

1094

int encoding;

1095

{

1096

if (encoding >= (int)(sizeof(encodings)/sizeof(*encodings)))

1097

return 0;

1098

return encodings[encoding].name;

1099

}

1100

1101

int

1102

EncodingDefFont(encoding)

1103

int encoding;

1104

{

1105

return encodings[encoding].deffont;

1106

}

1107

1108

void

1109

ResetEncoding(p)

1110

struct win *p;

1111

{

1112

char *c;

1113

int encoding = p->w_encoding;

1114

1115

c = encodings[encoding].charsets;

1116

if (c)

1117

SetCharsets(p, c);

1118

#ifdef UTF8

1119

LoadFontTranslationsForEncoding(encoding);

1120

#endif

1121

if (encodings[encoding].usegr)

1122

{

1123

p->w_gr = 2;

1124

p->w_FontE = encodings[encoding].charsets[1];

1125

}

1126

else

1127

p->w_FontE = 0;

1128

if (encodings[encoding].noc1)

1129

p->w_c1 = 0;

1130

}

1131

1132

int

1133

DecodeChar(c, encoding, statep)

1134

int c;

1135

int encoding;

1136

int *statep;

1137

{

1138

int t;

1139

1140

debug2("Decoding char %02x for encoding %d\n", c, encoding);

1141

#ifdef UTF8

1142

if (encoding == UTF8)

1143

return FromUtf8(c, statep);

1144

#endif

1145

if (encoding == SJIS)

1146

{

1147

if (!*statep)

1148

{

1149

if ((0x81 <= c && c <= 0x9f) || (0xe0 <= c && c <= 0xef))

1150

{

1151

*statep = c;

1152

return -1;

1153

}

1154

return c | (KANA << 16);

1155

}

1156

t = c;

1157

c = *statep;

1158

*statep = 0;

1159

if (0x40 <= t && t <= 0xfc && t != 0x7f)

1160

{

1161

if (c <= 0x9f) c = (c - 0x81) * 2 + 0x21;

1162

else c = (c - 0xc1) * 2 + 0x21;

1163

if (t <= 0x7e) t -= 0x1f;

1164

else if (t <= 0x9e) t -= 0x20;

1165

else t -= 0x7e, c++;

1166

return (c << 8) | t | (KANJI << 16);

1167

}

1168

return t;

1169

}

1170

if (encoding == EUC_JP || encoding == EUC_KR || encoding == EUC_CN)

1171

{

1172

if (!*statep)

1173

{

1174

if (c & 0x80)

1175

{

1176

*statep = c;

1177

return -1;

1178

}

1179

return c;

1180

}

1181

t = c;

1182

c = *statep;

1183

*statep = 0;

1184

if (encoding == EUC_JP)

1185

{

1186

if (c == 0x8e)

1187

return t | (KANA << 16);

1188

if (c == 0x8f)

1189

{

1190

*statep = t | (KANJI0212 << 8);

1191

return -1;

1192

}

1193

}

1194

c &= 0xff7f;

1195

t &= 0x7f;

1196

c = c << 8 | t;

1197

if (encoding == EUC_KR)

1198

return c | (3 << 16);

1199

if (encoding == EUC_CN)

1200

return c | (1 << 16);

1201

if (c & (KANJI0212 << 16))

1202

return c;

1203

else

1204

return c | (KANJI << 16);

1205

}

1206

if (encoding == BIG5 || encoding == GBK)

1207

{

1208

if (!*statep)

1209

{

1210

if (c & 0x80)

1211

{

1212

if (encoding == GBK && c == 0x80)

1213

return 0xa4 | (('b'|0x80) << 16);

1214

*statep = c;

1215

return -1;

1216

}

1217

return c;

1218

}

1219

t = c;

1220

c = *statep;

1221

*statep = 0;

1222

c &= 0x7f;

1223

return c << 8 | t | (encoding == BIG5 ? 030 << 16 : 031 << 16);

1224

}

1225

return c | (encodings[encoding].deffont << 16);

1226

}

1227

1228

int

1229

EncodeChar(bp, c, encoding, fontp)

1230

char *bp;

1231

int c;

1232

int encoding;

1233

int *fontp;

1234

{

1235

int t, f, l;

1236

1237

debug2("Encoding char %02x for encoding %d\n", c, encoding);

1238

if (c == -1 && fontp)

1239

{

1240

if (*fontp == 0)

1241

return 0;

1242

if (bp)

1243

{

1244

*bp++ = 033;

1245

*bp++ = '(';

1246

*bp++ = 'B';

1247

}

1248

return 3;

1249

}

1250

f = c >> 16;

1251

1252

#ifdef UTF8

1253

if (encoding == UTF8)

1254

{

1255

if (f)

1256

{

1257

# ifdef DW_CHARS

1258

if (is_dw_font(f))

1259

{

1260

int c2 = c & 0xff;

1261

c = (c >> 8 & 0xff) | (f << 8);

1262

c = recode_char_dw_to_encoding(c, &c2, encoding);

1263

}

1264

else

1265

# endif

1266

{

1267

c = (c & 0xff) | (f << 8);

1268

c = recode_char_to_encoding(c, encoding);

1269

}

1270

}

1271

return ToUtf8(bp, c);

1272

}

1273

if ((c & 0xff00) && f == 0) /* is_utf8? */

1274

{

1275

# ifdef DW_CHARS

1276

if (utf8_isdouble(c))

1277

{

1278

int c2 = 0xffff;

1279

c = recode_char_dw_to_encoding(c, &c2, encoding);

1280

c = (c << 8) | (c2 & 0xff);

1281

}

1282

else

1283

# endif

1284

{

1285

c = recode_char_to_encoding(c, encoding);

1286

c = ((c & 0xff00) << 8) | (c & 0xff);

1287

}

1288

debug1("Encode: char mapped from utf8 to %x\n", c);

1289

f = c >> 16;

1290

}

1291

#endif

1292

if (f & 0x80) /* map special 96-fonts to latin1 */

1293

f = 0;

1294

1295

if (encoding == SJIS)

1296

{

1297

if (f == KANA)

1298

c = (c & 0xff) | 0x80;

1299

else if (f == KANJI)

1300

{

1301

if (!bp)

1302

return 2;

1303

t = c & 0xff;

1304

c = (c >> 8) & 0xff;

1305

t += (c & 1) ? ((t <= 0x5f) ? 0x1f : 0x20) : 0x7e;

1306

c = (c - 0x21) / 2 + ((c < 0x5f) ? 0x81 : 0xc1);

1307

*bp++ = c;

1308

*bp++ = t;

1309

return 2;

1310

}

1311

}

1312

if (encoding == EUC)

1313

{

1314

if (f == KANA)

1315

{

1316

if (bp)

1317

{

1318

*bp++ = 0x8e;

1319

*bp++ = c;

1320

}

1321

return 2;

1322

}

1323

if (f == KANJI)

1324

{

1325

if (bp)

1326

{

1327

*bp++ = (c >> 8) | 0x80;

1328

*bp++ = c | 0x80;

1329

}

1330

return 2;

1331

}

1332

if (f == KANJI0212)

1333

{

1334

if (bp)

1335

{

1336

*bp++ = 0x8f;

1337

*bp++ = c >> 8;

1338

*bp++ = c;

1339

}

1340

return 3;

1341

}

1342

}

1343

if ((encoding == EUC_KR && f == 3) || (encoding == EUC_CN && f == 1))

1344

{

1345

if (bp)

1346

{

1347

*bp++ = (c >> 8) | 0x80;

1348

*bp++ = c | 0x80;

1349

}

1350

return 2;

1351

}

1352

if ((encoding == BIG5 && f == 030) || (encoding == GBK && f == 031))

1353

{

1354

if (bp)

1355

{

1356

*bp++ = (c >> 8) | 0x80;

1357

*bp++ = c;

1358

}

1359

return 2;

1360

}

1361

if (encoding == GBK && f == 0 && c == 0xa4)

1362

c = 0x80;

1363

1364

l = 0;

1365

if (fontp && f != *fontp)

1366

{

1367

*fontp = f;

1368

if (f && f < ' ')

1369

{

1370

if (bp)

1371

{

1372

*bp++ = 033;

1373

*bp++ = '$';

1374

if (f > 2)

1375

*bp++ = '(';

1376

*bp++ = '@' + f;

1377

}

1378

l += f > 2 ? 4 : 3;

1379

}

1380

else if (f < 128)

1381

{

1382

if (f == 0)

1383

f = 'B';

1384

if (bp)

1385

{

1386

*bp++ = 033;

1387

*bp++ = '(';

1388

*bp++ = f;

1389

}

1390

l += 3;

1391

}

1392

}

1393

if (c & 0xff00)

1394

{

1395

if (bp)

1396

*bp++ = c >> 8;

1397

l++;

1398

}

1399

if (bp)

1400

*bp++ = c;

1401

return l + 1;

1402

}

1403

1404

int

1405

CanEncodeFont(encoding, f)

1406

int encoding, f;

1407

{

1408

switch(encoding)

1409

{

1410

#ifdef UTF8

1411

case UTF8:

1412

return 1;

1413

#endif

1414

case SJIS:

1415

return f == KANJI || f == KANA;

1416

case EUC:

1417

return f == KANJI || f == KANA || f == KANJI0212;

1418

case EUC_KR:

1419

return f == 3;

1420

case EUC_CN:

1421

return f == 1;

1422

case BIG5:

1423

return f == 030;

1424

case GBK:

1425

return f == 031;

1426

default:

1427

break;

1428

}

1429

return 0;

1430

}

1431

1432

#ifdef DW_CHARS

1433

int

1434

PrepareEncodedChar(c)

1435

int c;

1436

{

1437

int encoding;

1438

int t = 0;

1439

int f;

1440

1441

encoding = D_encoding;

1442

f = D_rend.font;

1443

t = D_mbcs;

1444

if (encoding == SJIS)

1445

{

1446

if (f == KANA)

1447

return c | 0x80;

1448

else if (f == KANJI)

1449

{

1450

t += (c & 1) ? ((t <= 0x5f) ? 0x1f : 0x20) : 0x7e;

1451

c = (c - 0x21) / 2 + ((c < 0x5f) ? 0x81 : 0xc1);

1452

D_mbcs = t;

1453

}

1454

return c;

1455

}

1456

if (encoding == EUC)

1457

{

1458

if (f == KANA)

1459

{

1460

AddChar(0x8e);

1461

return c | 0x80;

1462

}

1463

if (f == KANJI)

1464

{

1465

D_mbcs = t | 0x80;

1466

return c | 0x80;

1467

}

1468

if (f == KANJI0212)

1469

{

1470

AddChar(0x8f);

1471

D_mbcs = t | 0x80;

1472

return c | 0x80;

1473

}

1474

}

1475

if ((encoding == EUC_KR && f == 3) || (encoding == EUC_CN && f == 1))

1476

{

1477

D_mbcs = t | 0x80;

1478

return c | 0x80;

1479

}

1480

if ((encoding == BIG5 && f == 030) || (encoding == GBK && f == 031))

1481

return c | 0x80;

1482

return c;

1483

}

1484

#endif

1485

1486

int

1487

RecodeBuf(fbuf, flen, fenc, tenc, tbuf)

1488

unsigned char *fbuf;

1489

int flen;

1490

int fenc, tenc;

1491

unsigned char *tbuf;

1492

{

1493

int c, i, j;

1494

int decstate = 0, font = 0;

1495

1496

for (i = j = 0; i < flen; i++)

1497

{

1498

c = fbuf[i];

1499

c = DecodeChar(c, fenc, &decstate);

1500

if (c == -2)

1501

i--;

1502

if (c < 0)

1503

continue;

1504

j += EncodeChar(tbuf ? (char *)tbuf + j : 0, c, tenc, &font);

1505

}

1506

j += EncodeChar(tbuf ? (char *)tbuf + j : 0, -1, tenc, &font);

1507

return j;

1508

}

1509

1510

#ifdef UTF8

1511

int

1512

ContainsSpecialDeffont(ml, xs, xe, encoding)

1513

struct mline *ml;

1514

int xs, xe;

1515

int encoding;

1516

{

1517

unsigned char *f, *i;

1518

int c, x, dx;

1519

1520

if (encoding == UTF8 || encodings[encoding].deffont == 0)

1521

return 0;

1522

i = ml->image + xs;

1523

f = ml->font + xs;

1524

dx = xe - xs + 1;

1525

while (dx-- > 0)

1526

{

1527

if (*f++)

1528

continue;

1529

c = *i++;

1530

x = recode_char_to_encoding(c | (encodings[encoding].deffont << 8), UTF8);

1531

if (c != x)

1532

{

1533

debug2("ContainsSpecialDeffont: yes %02x != %02x\n", c, x);

1534

return 1;

1535

}

1536

}

1537

debug("ContainsSpecialDeffont: no\n");

1538

return 0;

1539

}

1540

1541

1542

int

1543

LoadFontTranslation(font, file)

1544

int font;

1545

char *file;

1546

{

1547

char buf[1024], *myfile;

1548

FILE *f;

1549

int i;

1550

int fo;

1551

int x, u, c, ok;

1552

unsigned short (*p)[2], (*tab)[2];

1553

1554

myfile = file;

1555

if (myfile == 0)

1556

{

1557

if (font == 0 || screenencodings == 0)

1558

return -1;

1559

if (strlen(screenencodings) > sizeof(buf) - 10)

1560

return -1;

1561

sprintf(buf, "%s/%02x", screenencodings, font & 0xff);

1562

myfile = buf;

1563

}

1564

debug1("LoadFontTranslation: trying %s\n", myfile);

1565

if ((f = secfopen(myfile, "r")) == 0)

1566

return -1;

1567

i = ok = 0;

1568

for (;;)

1569

{

1570

for(; i < 12; i++)

1571

if (getc(f) != "ScreenI2UTF8"[i])

1572

break;

1573

if (getc(f) != 0) /* format */

1574

break;

1575

fo = getc(f); /* id */

1576

if (fo == EOF)

1577

break;

1578

if (font != -1 && font != fo)

1579

break;

1580

i = getc(f);

1581

x = getc(f);

1582

if (x == EOF)

1583

break;

1584

i = i << 8 | x;

1585

getc(f);

1586

while ((x = getc(f)) && x != EOF)

1587

getc(f); /* skip font name (padded to 2 bytes) */

1588

if ((p = malloc(sizeof(*p) * (i + 1))) == 0)

1589

break;

1590

tab = p;

1591

while(i > 0)

1592

{

1593

x = getc(f);

1594

x = x << 8 | getc(f);

1595

u = getc(f);

1596

c = getc(f);

1597

u = u << 8 | c;

1598

if (c == EOF)

1599

break;

1600

(*p)[0] = x;

1601

(*p)[1] = u;

1602

p++;

1603

i--;

1604

}

1605

(*p)[0] = 0;

1606

(*p)[1] = 0;

1607

if (i || (tab[0][0] & 0x8000))

1608

{

1609

free(tab);

1610

break;

1611

}

1612

if (recodetabs[fo].tab && (recodetabs[fo].flags & RECODETAB_ALLOCED) != 0)

1613

free(recodetabs[fo].tab);

1614

recodetabs[fo].tab = tab;

1615

recodetabs[fo].flags = RECODETAB_ALLOCED;

1616

debug1("Successful load of recodetab %02x\n", fo);

1617

c = getc(f);

1618

if (c == EOF)

1619

{

1620

ok = 1;

1621

break;

1622

}

1623

if (c != 'S')

1624

break;

1625

i = 1;

1626

}

1627

fclose(f);

1628

if (font != -1 && file == 0 && recodetabs[font].flags == 0)

1629

recodetabs[font].flags = RECODETAB_TRIED;

1630

return ok ? 0 : -1;

1631

}

1632

1633

void

1634

LoadFontTranslationsForEncoding(encoding)

1635

int encoding;

1636

{

1637

char *c;

1638

int f;

1639

1640

debug1("LoadFontTranslationsForEncoding: encoding %d\n", encoding);

1641

if ((c = encodings[encoding].fontlist) != 0)

1642

while ((f = (unsigned char)*c++) != 0)

1643

if (recodetabs[f].flags == 0)

1644

LoadFontTranslation(f, 0);

1645

f = encodings[encoding].deffont;

1646

if (f > 0 && recodetabs[f].flags == 0)

1647

LoadFontTranslation(f, 0);

1648

}

1649

1650

#endif /* UTF8 */

1651

1652

#else /* !ENCODINGS */

1653

1654

/* Simple version of EncodeChar to encode font changes for

1655

* copy/paste mode

1656

1657

int

1658

EncodeChar(bp, c, encoding, fontp)

1659

char *bp;

1660

int c;

1661

int encoding;

1662

int *fontp;

1663

{

1664

int f, l;

1665

f = (c == -1) ? 0 : c >> 16;

1666

l = 0;

1667

if (fontp && f != *fontp)

1668

{

1669

*fontp = f;

1670

if (f && f < ' ')

1671

{

1672

if (bp)

1673

{

1674

*bp++ = 033;

1675

*bp++ = '$';

1676

if (f > 2)

1677

*bp++ = '(';

1678

*bp++ = '@' + f;

1679

}

1680

l += f > 2 ? 4 : 3;

1681

}

1682

else if (f < 128)

1683

{

1684

if (f == 0)

1685

f = 'B';

1686

if (bp)

1687

{

1688

*bp++ = 033;

1689

*bp++ = '(';

1690

*bp++ = f;

1691

}

1692

l += 3;

1693

}

1694

}

1695

if (c == -1)

1696

return l;

1697

if (c & 0xff00)

1698

{

1699

if (bp)

1700

*bp++ = c >> 8;

1701

l++;

1702

}

1703

if (bp)

1704

*bp++ = c;

1705

return l + 1;

1706

}

1707

1708

#endif /* ENCODINGS */

Older »