~ubuntu-branches/ubuntu/edgy/lynx/edgy

« back to all changes in this revision

Viewing changes to WWW/Library/Implementation/SGML.c

Committer: Bazaar Package Importer
Author(s): Martin Pitt
Date: 2004-09-16 12:14:10 UTC
Revision ID: james.westby@ubuntu.com-20040916121410-cz1gu92c4nqfeyrg

Tags: upstream-2.8.5

Import upstream version 2.8.5

files added:

ABOUT-NLS

CHANGES

COPYHEADER

COPYING

INSTALLATION

LYHelp.hin

LYMessages_en.h

PROBLEMS

README

VMSPrint.com

WWW/FreeofCharge.html

WWW/Library

WWW/Library/Implementation

WWW/Library/Implementation/HTAABrow.c

WWW/Library/Implementation/HTAABrow.h

WWW/Library/Implementation/HTAAProt.c

WWW/Library/Implementation/HTAAProt.h

WWW/Library/Implementation/HTAAUtil.c

WWW/Library/Implementation/HTAAUtil.h

WWW/Library/Implementation/HTAccess.c

WWW/Library/Implementation/HTAccess.h

WWW/Library/Implementation/HTAnchor.c

WWW/Library/Implementation/HTAnchor.h

WWW/Library/Implementation/HTAssoc.c

WWW/Library/Implementation/HTAssoc.h

WWW/Library/Implementation/HTAtom.c

WWW/Library/Implementation/HTAtom.h

WWW/Library/Implementation/HTBTree.c

WWW/Library/Implementation/HTBTree.h

WWW/Library/Implementation/HTCJK.h

WWW/Library/Implementation/HTChunk.c

WWW/Library/Implementation/HTChunk.h

WWW/Library/Implementation/HTDOS.c

WWW/Library/Implementation/HTDOS.h

WWW/Library/Implementation/HTFTP.c

WWW/Library/Implementation/HTFTP.h

WWW/Library/Implementation/HTFWriter.c

WWW/Library/Implementation/HTFWriter.h

WWW/Library/Implementation/HTFile.c

WWW/Library/Implementation/HTFile.h

WWW/Library/Implementation/HTFinger.c

WWW/Library/Implementation/HTFinger.h

WWW/Library/Implementation/HTFormat.c

WWW/Library/Implementation/HTFormat.h

WWW/Library/Implementation/HTGopher.c

WWW/Library/Implementation/HTGopher.h

WWW/Library/Implementation/HTGroup.c

WWW/Library/Implementation/HTGroup.h

WWW/Library/Implementation/HTInit.h

WWW/Library/Implementation/HTLex.c

WWW/Library/Implementation/HTLex.h

WWW/Library/Implementation/HTList.c

WWW/Library/Implementation/HTList.h

WWW/Library/Implementation/HTMIME.c

WWW/Library/Implementation/HTMIME.h

WWW/Library/Implementation/HTMLDTD.c

WWW/Library/Implementation/HTMLDTD.h

WWW/Library/Implementation/HTMLGen.c

WWW/Library/Implementation/HTMLGen.h

WWW/Library/Implementation/HTNews.c

WWW/Library/Implementation/HTNews.h

WWW/Library/Implementation/HTParse.c

WWW/Library/Implementation/HTParse.h

WWW/Library/Implementation/HTPlain.c

WWW/Library/Implementation/HTPlain.h

WWW/Library/Implementation/HTRules.c

WWW/Library/Implementation/HTRules.h

WWW/Library/Implementation/HTStream.h

WWW/Library/Implementation/HTString.c

WWW/Library/Implementation/HTString.h

WWW/Library/Implementation/HTStyle.c

WWW/Library/Implementation/HTStyle.h

WWW/Library/Implementation/HTTCP.c

WWW/Library/Implementation/HTTCP.h

WWW/Library/Implementation/HTTP.c

WWW/Library/Implementation/HTTP.h

WWW/Library/Implementation/HTTelnet.c

WWW/Library/Implementation/HTTelnet.h

WWW/Library/Implementation/HTUU.c

WWW/Library/Implementation/HTUU.h

WWW/Library/Implementation/HTUtils.h

WWW/Library/Implementation/HTVMSUtils.c

WWW/Library/Implementation/HTVMSUtils.h

WWW/Library/Implementation/HTVMS_WaisProt.c

WWW/Library/Implementation/HTVMS_WaisProt.h

WWW/Library/Implementation/HTVMS_WaisUI.c

WWW/Library/Implementation/HTVMS_WaisUI.h

WWW/Library/Implementation/HTWAIS.c

WWW/Library/Implementation/HTWAIS.h

WWW/Library/Implementation/HTWSRC.c

WWW/Library/Implementation/HTWSRC.h

WWW/Library/Implementation/HText.h

WWW/Library/Implementation/HTioctl.h

WWW/Library/Implementation/LYLeaks.h

WWW/Library/Implementation/LYexit.h

WWW/Library/Implementation/SGML.c

WWW/Library/Implementation/SGML.h

WWW/Library/Implementation/UCAux.h

WWW/Library/Implementation/UCDefs.h

WWW/Library/Implementation/UCMap.h

WWW/Library/Implementation/Version.make

WWW/Library/Implementation/makefile.in

WWW/Library/Implementation/www_tcp.h

WWW/Library/Implementation/www_wait.h

WWW/Library/djgpp

WWW/Library/djgpp/CommonMakefile

WWW/Library/djgpp/makefile

WWW/Library/djgpp/makefile.sla

WWW/Library/vms

WWW/Library/vms/COPYING.LIB

WWW/Library/vms/descrip.mms

WWW/Library/vms/libmake.com

aclocal.m4

build-slang.com

build.com

cfg_defs.sh

cfg_edit.sh

config.guess

config.hin

config.sub

configure

configure.in

descrip.mms

docs

docs/CHANGES2.3

docs/CHANGES2.4

docs/CHANGES2.5

docs/CHANGES2.6

docs/CHANGES2.7

docs/CHANGES2.8

docs/CMU.announce

docs/CRAWL.announce

docs/FM.announce

docs/IBMPC-charsets.announce

docs/OS-390.announce

docs/README.TRST

docs/README.chartrans

docs/README.defines

docs/README.jp

docs/README.rootcerts

docs/README.ssl

docs/README.sslcerts

docs/SOCKETSHR.announce

docs/TCPWARE.announce

docs/VMSWAIS.announce

docs/djgpp.key

docs/pdcurses.key

docs/slang.key

docs/win-386.announce

fixed512.com

fixtext.sh

install.sh

lib/dirent.c

lib/dirent.h

lynx.cfg

lynx.hlp

lynx.man

lynx.rsp

lynx_help

lynx_help/Lynx_users_guide.html

lynx_help/about_lynx.html

lynx_help/help_files.txt

lynx_help/keystrokes

lynx_help/keystrokes/alt_edit_help.html

lynx_help/keystrokes/bashlike_edit_help.html

lynx_help/keystrokes/bookmark_help.html

lynx_help/keystrokes/cookie_help.html

lynx_help/keystrokes/dired_help.html

lynx_help/keystrokes/edit_help.html

lynx_help/keystrokes/environments.html

lynx_help/keystrokes/follow_help.html

lynx_help/keystrokes/gopher_types_help.html

lynx_help/keystrokes/history_help.html

lynx_help/keystrokes/keystroke_help.html

lynx_help/keystrokes/movement_help.html

lynx_help/keystrokes/option_help.html

lynx_help/keystrokes/other_help.html

lynx_help/keystrokes/print_help.html

lynx_help/keystrokes/scrolling_help.html

lynx_help/keystrokes/test_display.html

lynx_help/keystrokes/visited_help.html

lynx_help/keystrokes/xterm_help.html

lynx_help/lynx-dev.html

lynx_help/lynx_help_main.html

lynx_help/lynx_url_support.html

make-msc.bat

makefile.bcb

makefile.in

makefile.msc

makelynx.bat

makew32.bat

mkdirs.sh

po/POTFILES.in

po/ca.po

po/cs.po

po/da.po

po/de.po

po/et.po

po/fr.po

po/hu.po

po/it.po

po/ja.po

po/lynx.pot

po/makefile.inn

po/nl.po

po/pt_BR.po

po/readme

po/ru.po

po/sl.po

po/sv.po

po/tr.po

po/uk.po

po/zh_CN.po

po/zh_TW.po

samples

samples/blue-background.lss

samples/bright-blue.lss

samples/cernrules.txt

samples/installdirs.html

samples/jumpsUnix.html

samples/jumpsVMS.html

samples/keepviewer

samples/lynx-keymaps

samples/lynx.com

samples/lynx.lss

samples/lynxdump

samples/mailcap

samples/mailto-form.pl

samples/mild-colors.lss

samples/mime.types

scripts

scripts/cfg2html.pl

scripts/install-cfg.sh

scripts/man2hlp.sh

src/AttrList.h

src/DefaultStyle.c

src/GridText.c

src/GridText.h

src/HTAlert.c

src/HTAlert.h

src/HTFWriter.c

src/HTFont.h

src/HTForms.h

src/HTInit.c

src/HTML.c

src/HTML.h

src/HTNestedList.h

src/HTSaveToFile.h

src/LYBookmark.c

src/LYBookmark.h

src/LYCgi.c

src/LYCgi.h

src/LYCharSets.c

src/LYCharSets.h

src/LYCharUtils.c

src/LYCharUtils.h

src/LYCharVals.h

src/LYClean.c

src/LYClean.h

src/LYCookie.c

src/LYCookie.h

src/LYCurses.c

src/LYCurses.h

src/LYDownload.c

src/LYDownload.h

src/LYEdit.c

src/LYEdit.h

src/LYEditmap.c

src/LYExtern.c

src/LYExtern.h

src/LYForms.c

src/LYGCurses.h

src/LYGetFile.c

src/LYGetFile.h

src/LYGlobalDefs.h

src/LYHash.c

src/LYHash.h

src/LYHistory.c

src/LYHistory.h

src/LYJump.c

src/LYJump.h

src/LYJustify.h

src/LYKeymap.c

src/LYKeymap.h

src/LYLeaks.c

src/LYList.c

src/LYList.h

src/LYLocal.c

src/LYLocal.h

src/LYMail.c

src/LYMail.h

src/LYMain.c

src/LYMainLoop.c

src/LYMainLoop.h

src/LYMap.c

src/LYMap.h

src/LYNews.c

src/LYNews.h

src/LYOptions.c

src/LYOptions.h

src/LYPrettySrc.c

src/LYPrettySrc.h

src/LYPrint.c

src/LYPrint.h

src/LYReadCFG.c

src/LYReadCFG.h

src/LYSearch.c

src/LYSearch.h

src/LYShowInfo.c

src/LYShowInfo.h

src/LYSignal.h

src/LYStrings.c

src/LYStrings.h

src/LYStructs.h

src/LYStyle.c

src/LYStyle.h

src/LYTraversal.c

src/LYTraversal.h

src/LYUpload.c

src/LYUpload.h

src/LYUtils.c

src/LYUtils.h

src/LYVMSdef.h

src/LYexit.c

src/LYrcFile.c

src/LYrcFile.h

src/TRSTable.c

src/TRSTable.h

src/UCAuto.c

src/UCAuto.h

src/UCAux.c

src/UCdomap.c

src/UCdomap.h

src/Xsystem.c

src/chrtrans

src/chrtrans/README.format

src/chrtrans/README.tables

src/chrtrans/UCkd.h

src/chrtrans/build-chrtrans.com

src/chrtrans/build-header.com

src/chrtrans/caselower.h

src/chrtrans/cp1250_uni.tbl

src/chrtrans/cp1251_uni.tbl

src/chrtrans/cp1252_uni.tbl

src/chrtrans/cp1253_uni.tbl

src/chrtrans/cp1255_uni.tbl

src/chrtrans/cp1256_uni.tbl

src/chrtrans/cp1257_uni.tbl

src/chrtrans/cp437_uni.tbl

src/chrtrans/cp737_uni.tbl

src/chrtrans/cp775_uni.tbl

src/chrtrans/cp850_uni.tbl

src/chrtrans/cp852_uni.tbl

src/chrtrans/cp862_uni.tbl

src/chrtrans/cp864_uni.tbl

src/chrtrans/cp866_uni.tbl

src/chrtrans/cp866u_uni.tbl

src/chrtrans/cp869_uni.tbl

src/chrtrans/def7_uni.tbl

src/chrtrans/dmcs_uni.tbl

src/chrtrans/entities.h

src/chrtrans/hp_uni.tbl

src/chrtrans/iso01_uni.tbl

src/chrtrans/iso02_uni.tbl

src/chrtrans/iso03_uni.tbl

src/chrtrans/iso04_uni.tbl

src/chrtrans/iso05_uni.tbl

src/chrtrans/iso06_uni.tbl

src/chrtrans/iso07_uni.tbl

src/chrtrans/iso08_uni.tbl

src/chrtrans/iso09_uni.tbl

src/chrtrans/iso10_uni.tbl

src/chrtrans/iso15_uni.tbl

src/chrtrans/jcuken_kb.h

src/chrtrans/koi8r_uni.tbl

src/chrtrans/koi8u_uni.tbl

src/chrtrans/mac_uni.tbl

src/chrtrans/make-msc.bat

src/chrtrans/makefile.bcb

src/chrtrans/makefile.dos

src/chrtrans/makefile.in

src/chrtrans/makefile.msc

src/chrtrans/makeuctb.c

src/chrtrans/makew32.bat

src/chrtrans/mnem2_suni.tbl

src/chrtrans/mnem_suni.tbl

src/chrtrans/next_uni.tbl

src/chrtrans/pt154_uni.tbl

src/chrtrans/rfc_suni.tbl

src/chrtrans/rot13_kb.h

src/chrtrans/utf8_uni.tbl

src/chrtrans/viscii_uni.tbl

src/chrtrans/yawerty_kb.h

src/cmu_tcp.opt

src/decc.opt

src/descrip.mms

src/gnuc.opt

src/makefile.dos

src/makefile.dsl

src/makefile.in

src/makefile.wsl

src/mktime.c

src/multinet.opt

src/socketshr_tcp.opt

src/strstr.c

src/structdump.h

src/tcpwareolb.opt

src/tcpwareshr.opt

src/ucxolb.opt

src/ucxshr.opt

src/vaxc.opt

src/win_tcp.opt

test

test/ALT88592.html

test/ISO_LATIN1_test.html

test/README.txt

test/TestComment.html

test/c1.html

test/iso8859-1.html

test/iso88592.html

test/koi8-r.html

test/quickbrown.html

test/raw8bit.html

test/sgml.html

test/spaces.html

test/tabtest.html

test/unicode.html

test/utf-8-demo.html

userdefs.h

Show diffs side-by-side

added added

removed removed

WWW/Library/Implementation/SGML.c

/* General SGML Parser code SGML.c

** ========================

** This module implements an HTStream object. To parse an

** SGML file, create this object which is a parser. The object

** is (currently) created by being passed a DTD structure,

** and a target HTStructured object at which to throw the parsed stuff.

** 6 Feb 93 Binary searches used. Interface modified.

#include <HTUtils.h>

/* Remove the following to disable the experimental HTML DTD parsing.

Currently only used in this source file. - kw */

#ifndef NO_EXTENDED_HTMLDTD

#define EXTENDED_HTMLDTD

#endif

#include <SGML.h>

#include <HTMLDTD.h>

#include <HTCJK.h>

#include <UCMap.h>

#include <UCDefs.h>

#include <UCAux.h>

#include <HTChunk.h>

#include <LYCharSets.h>

#include <LYCharVals.h> /* S/390 -- gil -- 0635 */

#include <LYGlobalDefs.h>

#include <LYStrings.h>

#include <LYLeaks.h>

#ifdef USE_COLOR_STYLE

# include <LYStyle.h>

#endif

#ifdef USE_PRETTYSRC

# include <LYPrettySrc.h>

#endif

#define INVALID (-1)

#ifdef USE_PRETTYSRC

char* entity_string; /* this is used for printing entity name.

Unconditionally added since redundant assigments don't hurt much*/

PRIVATE void fake_put_character ARGS2(

void*, p GCC_UNUSED,

char, c GCC_UNUSED)

{

}

#define START TRUE

#define STOP FALSE

#define PUTS_TR(x) psrc_convert_string = TRUE; PUTS(x)

#endif

/* my_casecomp() - optimized by the first character, NOT_ASCII ok */

#define my_casecomp(a,b) ((TOUPPER(*a) == TOUPPER(*b)) ? \

AS_casecomp(a,b) : \

(TOASCII(TOUPPER(*a)) - TOASCII(TOUPPER(*b))))

#if ANSI_PREPRO

/* will use partially inlined version */

#define orig_HTChunkPutUtf8Char HTChunkPutUtf8Char

#undef HTChunkPutUtf8Char

/* ...used for comments and attributes value like href... */

#define HTChunkPutUtf8Char(ch,x) \

{ \

if ((TOASCII(x) < 128) && (ch->size < ch->allocated)) \

ch->data[ch->size++] = (char)x; \

else \

orig_HTChunkPutUtf8Char(ch,x); \

}

#if 0

#define orig_HTChunkPutc HTChunkPutc

#undef HTChunkPutc

#define HTChunkPutc(ch,x) \

{ \

if (ch->size < ch->allocated) \

ch->data[ch->size++] = x; \

else \

orig_HTChunkPutc(ch,x); \

}

#undef HTChunkTerminate

#define HTChunkTerminate(ch) \

HTChunkPutc(ch, (char)0)

#endif /* */

#endif /* ANSI_PREPRO */

100

101

#define PUTS(str) ((*context->actions->put_string)(context->target, str))

102

#define PUTC(ch) ((*context->actions->put_character)(context->target, ch))

103

#define PUTUTF8(code) (UCPutUtf8_charstring((HTStream *)context->target, \

104

(putc_func_t*)(context->actions->put_character), code))

105

106

#define OPT 1

107

108

109

/*the following macros are used for pretty source view. */

110

#define IS_C(attr) (attr.type == HTMLA_CLASS)

111

112

PUBLIC HTCJKlang HTCJK = NOCJK; /* CJK enum value. */

113

PUBLIC BOOL HTPassEightBitRaw = FALSE; /* Pass 161-172,174-255 raw. */

114

PUBLIC BOOL HTPassEightBitNum = FALSE; /* Pass ^ numeric entities raw. */

115

PUBLIC BOOL HTPassHighCtrlRaw = FALSE; /* Pass 127-160,173, raw. */

116

PUBLIC BOOL HTPassHighCtrlNum = FALSE; /* Pass - raw. */

117

118

/* The State (context) of the parser

119

120

** This is passed with each call to make the parser reentrant

121

122

123

124

#define MAX_ATTRIBUTES 36 /* Max number of attributes per element */

125

126

127

/* Element Stack

128

** -------------

129

** This allows us to return down the stack reselecting styles.

130

** As we return, attribute values will be garbage in general.

131

132

typedef struct _HTElement HTElement;

133

struct _HTElement {

134

HTElement * next; /* Previously nested element or 0 */

135

HTTag* tag; /* The tag at this level */

136

};

137

138

typedef enum {

139

S_text = 0

140

,S_attr

141

,S_attr_gap

142

,S_comment

143

,S_cro

144

,S_doctype

145

,S_dollar

146

,S_dollar_dq

147

,S_dollar_paren

148

,S_dollar_paren_dq

149

,S_dollar_paren_sq

150

,S_dollar_sq

151

,S_dquoted

152

,S_end

153

,S_entity

154

,S_equals

155

,S_ero

156

,S_esc

157

,S_esc_dq

158

,S_esc_sq

159

,S_exclamation

160

,S_in_kanji

161

,S_incro

162

,S_junk_pi

163

,S_junk_tag

164

,S_litteral

165

,S_marked

166

,S_nonascii_text

167

,S_nonascii_text_dq

168

,S_nonascii_text_sq

169

,S_paren

170

,S_paren_dq

171

,S_paren_sq

172

,S_pcdata

173

,S_script

174

,S_sgmlatt

175

,S_sgmlele

176

,S_sgmlent

177

,S_squoted

178

,S_tag

179

,S_tag_gap

180

,S_tagname_slash

181

,S_value

182

} sgml_state;

183

184

/* Internal Context Data Structure

185

** -------------------------------

186

187

struct _HTStream {

188

189

CONST HTStreamClass * isa; /* inherited from HTStream */

190

191

CONST SGML_dtd *dtd;

192

CONST HTStructuredClass *actions; /* target class */

193

HTStructured *target; /* target object */

194

195

HTTag *current_tag;

196

HTTag *slashedtag;

197

CONST HTTag *unknown_tag;

198

BOOL inSELECT;

199

BOOL no_lynx_specialcodes;

200

int current_attribute_number;

201

HTChunk *string;

202

int leading_spaces;

203

int trailing_spaces;

204

HTElement *element_stack;

205

sgml_state state;

206

unsigned char kanji_buf;

207

#ifdef CALLERDATA

208

void * callerData;

209

#endif /* CALLERDATA */

210

BOOL present[MAX_ATTRIBUTES]; /* Flags: attribute is present? */

211

char * value[MAX_ATTRIBUTES]; /* NULL, or strings alloc'd with StrAllocCopy_extra() */

212

213

BOOL lead_exclamation;

214

BOOL first_dash;

215

BOOL end_comment;

216

BOOL doctype_bracket;

217

BOOL first_bracket;

218

BOOL second_bracket;

219

BOOL isHex;

220

221

HTParentAnchor * node_anchor;

222

LYUCcharset * inUCI; /* pointer to anchor UCInfo */

223

int inUCLYhndl; /* charset we are fed */

224

LYUCcharset * outUCI; /* anchor UCInfo for target */

225

int outUCLYhndl; /* charset for target */

226

char utf_count;

227

UCode_t utf_char;

228

char utf_buf[8];

229

char * utf_buf_p;

230

UCTransParams T;

231

int current_tag_charset; /* charset to pass attributes */

232

233

char * recover;

234

int recover_index;

235

char * include;

236

char * active_include;

237

int include_index;

238

char * url;

239

char * csi;

240

int csi_index;

241

#ifdef USE_PRETTYSRC

242

BOOL cur_attr_is_href;

243

BOOL cur_attr_is_name;

244

BOOL seen_nonwhite_in_junk_tag;

245

#endif

246

};

247

248

#ifndef NO_LYNX_TRACE

249

PRIVATE char *state_name ARGS1(sgml_state, n)

250

{

251

char *result = "?";

252

switch (n) {

253

case S_attr: result = "S_attr"; break;

254

case S_attr_gap: result = "S_attr_gap"; break;

255

case S_comment: result = "S_comment"; break;

256

case S_cro: result = "S_cro"; break;

257

case S_doctype: result = "S_doctype"; break;

258

case S_dollar: result = "S_dollar"; break;

259

case S_dollar_dq: result = "S_dollar_dq"; break;

260

case S_dollar_paren: result = "S_dollar_paren"; break;

261

case S_dollar_paren_dq: result = "S_dollar_paren_dq"; break;

262

case S_dollar_paren_sq: result = "S_dollar_paren_sq"; break;

263

case S_dollar_sq: result = "S_dollar_sq"; break;

264

case S_dquoted: result = "S_dquoted"; break;

265

case S_end: result = "S_end"; break;

266

case S_entity: result = "S_entity"; break;

267

case S_equals: result = "S_equals"; break;

268

case S_ero: result = "S_ero"; break;

269

case S_esc: result = "S_esc"; break;

270

case S_esc_dq: result = "S_esc_dq"; break;

271

case S_esc_sq: result = "S_esc_sq"; break;

272

case S_exclamation: result = "S_exclamation"; break;

273

case S_in_kanji: result = "S_in_kanji"; break;

274

case S_incro: result = "S_incro"; break;

275

case S_junk_pi: result = "S_junk_pi"; break;

276

case S_junk_tag: result = "S_junk_tag"; break;

277

case S_litteral: result = "S_litteral"; break;

278

case S_marked: result = "S_marked"; break;

279

case S_nonascii_text: result = "S_nonascii_text"; break;

280

case S_nonascii_text_dq: result = "S_nonascii_text_dq"; break;

281

case S_nonascii_text_sq: result = "S_nonascii_text_sq"; break;

282

case S_paren: result = "S_paren"; break;

283

case S_paren_dq: result = "S_paren_dq"; break;

284

case S_paren_sq: result = "S_paren_sq"; break;

285

case S_pcdata: result = "S_pcdata"; break;

286

case S_script: result = "S_script"; break;

287

case S_sgmlatt: result = "S_sgmlatt"; break;

288

case S_sgmlele: result = "S_sgmlele"; break;

289

case S_sgmlent: result = "S_sgmlent"; break;

290

case S_squoted: result = "S_squoted"; break;

291

case S_tag: result = "S_tag"; break;

292

case S_tag_gap: result = "S_tag_gap"; break;

293

case S_tagname_slash: result = "S_tagname_slash"; break;

294

case S_text: result = "S_text"; break;

295

case S_value: result = "S_value"; break;

296

}

297

return result;

298

}

299

#endif

300

301

/* storage for Element Stack */

302

#define DEPTH 10

303

static HTElement pool[DEPTH];

304

static int depth = 0;

305

306

PRIVATE HTElement* pool_alloc NOARGS

307

{

308

depth++;

309

if (depth > DEPTH)

310

return (HTElement*) malloc(sizeof(HTElement));

311

return (pool + depth - 1);

312

}

313

314

PRIVATE void pool_free ARGS1(HTElement*, e)

315

{

316

if (depth > DEPTH)

317

FREE(e);

318

depth--;

319

return;

320

}

321

322

#ifdef USE_PRETTYSRC

323

324

PRIVATE void HTMLSRC_apply_markup ARGS3(

325

HTStream *, context,

326

HTlexeme, lexeme,

327

BOOL, start)

328

{

329

HT_tagspec* ts = *( ( start ? lexeme_start : lexeme_end ) + lexeme);

330

331

while (ts) {

332

#ifdef USE_COLOR_STYLE

333

if (ts->start) {

334

current_tag_style = ts->style;

335

force_current_tag_style = TRUE;

336

forced_classname = ts->class_name;

337

force_classname = TRUE;

338

}

339

#endif

340

CTRACE((tfp,ts->start ? "SRCSTART %d\n" : "SRCSTOP %d\n",(int)lexeme));

341

if (ts->start)

342

(*context->actions->start_element)(

343

context->target,

344

ts->element,

345

ts->present,

346

(CONST char **)ts->value,

347

context->current_tag_charset,

348

(char **)&context->include);

349

else

350

(*context->actions->end_element)(

351

context->target,

352

ts->element,

353

(char **)&context->include);

354

ts = ts->next;

355

}

356

}

357

358

#if ANSI_PREPRO

359

# define PSRCSTART(x) HTMLSRC_apply_markup(context,HTL_##x,START)

360

# define PSRCSTOP(x) HTMLSRC_apply_markup(context,HTL_##x,STOP)

361

#else

362

# define PSRCSTART(x) HTMLSRC_apply_markup(context,HTL_/**/x,START)

363

# define PSRCSTOP(x) HTMLSRC_apply_markup(context,HTL_/**/x,STOP)

364

#endif

365

366

#define attr_is_href context->cur_attr_is_href

367

#define attr_is_name context->cur_attr_is_name

368

#endif

369

370

PRIVATE void set_chartrans_handling ARGS3(

371

HTStream *, context,

372

HTParentAnchor *, anchor,

373

int, chndl)

374

{

375

if (chndl < 0) {

376

377

** Nothing was set for the parser in earlier stages,

378

** so the HTML parser's UCLYhndl should still be its

379

** default. - FM

380

381

chndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_STRUCTURED);

382

if (chndl < 0)

383

384

** That wasn't set either, so seek the HText default. - FM

385

386

chndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_HTEXT);

387

if (chndl < 0)

388

389

** That wasn't set either, so assume the current display

390

** character set. - FM

391

392

chndl = current_char_set;

393

394

** Try to set the HText and HTML stages' chartrans info

395

** with the default lock level (will not be changed if

396

** it was set previously with a higher lock level). - FM

397

398

HTAnchor_setUCInfoStage(anchor, chndl,

399

UCT_STAGE_HTEXT,

400

UCT_SETBY_DEFAULT);

401

HTAnchor_setUCInfoStage(anchor, chndl,

402

UCT_STAGE_STRUCTURED,

403

UCT_SETBY_DEFAULT);

404

405

** Get the chartrans info for output to the HTML parser. - FM

406

407

context->outUCI = HTAnchor_getUCInfoStage(anchor,

408

UCT_STAGE_STRUCTURED);

409

context->outUCLYhndl = HTAnchor_getUCLYhndl(context->node_anchor,

410

UCT_STAGE_STRUCTURED);

411

}

412

413

** Set the in->out transformation parameters. - FM

414

415

UCSetTransParams(&context->T,

416

context->inUCLYhndl, context->inUCI,

417

context->outUCLYhndl, context->outUCI);

418

419

** This is intended for passing the SGML parser's input

420

** charset as an argument in each call to the HTML

421

** parser's start tag function, but it would be better

422

** to call a Lynx_HTML_parser function to set an element

423

** in its HTStructured object, itself, if this were

424

** needed. - FM

425

426

if (HTCJK != NOCJK) {

427

context->current_tag_charset = -1;

428

} else if (context->T.transp) {

429

context->current_tag_charset = context->inUCLYhndl;

430

} else if (context->T.decode_utf8) {

431

context->current_tag_charset = context->inUCLYhndl;

432

} else if (context->T.do_8bitraw ||

433

context->T.use_raw_char_in) {

434

context->current_tag_charset = context->inUCLYhndl;

435

} else if (context->T.output_utf8 ||

436

context->T.trans_from_uni) {

437

context->current_tag_charset = UCGetLYhndl_byMIME("utf-8");

438

} else {

439

context->current_tag_charset = LATIN1;

440

}

441

}

442

443

PRIVATE void change_chartrans_handling ARGS1(

444

HTStream *, context)

445

{

446

int new_LYhndl = HTAnchor_getUCLYhndl(context->node_anchor,

447

UCT_STAGE_PARSER);

448

if (new_LYhndl != context->inUCLYhndl &&

449

new_LYhndl >= 0) {

450

451

* Something changed. but ignore if a META wants an unknown charset.

452

453

LYUCcharset * new_UCI = HTAnchor_getUCInfoStage(context->node_anchor,

454

UCT_STAGE_PARSER);

455

if (new_UCI) {

456

LYUCcharset * next_UCI = HTAnchor_getUCInfoStage(

457

context->node_anchor, UCT_STAGE_STRUCTURED

458

);

459

int next_LYhndl = HTAnchor_getUCLYhndl(

460

context->node_anchor, UCT_STAGE_STRUCTURED

461

);

462

context->inUCI = new_UCI;

463

context->inUCLYhndl = new_LYhndl;

464

context->outUCI = next_UCI;

465

context->outUCLYhndl = next_LYhndl;

466

set_chartrans_handling(context,

467

context->node_anchor, next_LYhndl);

468

}

469

}

470

}

471

472

#ifdef USE_COLOR_STYLE

473

#include <AttrList.h>

474

static int current_is_class = 0;

475

#endif

476

477

/* Handle Attribute

478

** ----------------

479

480

/* PUBLIC CONST char * SGML_default = ""; ?? */

481

482

PRIVATE void handle_attribute_name ARGS2(

483

HTStream *, context,

484

CONST char *, s)

485

{

486

HTTag * tag = context->current_tag;

487

attr * attributes = tag->attributes;

488

int high, low, i, diff;

489

490

#ifdef USE_PRETTYSRC

491

if (psrc_view) {

492

attr_is_href = FALSE;

493

attr_is_name = FALSE;

494

}

495

#endif

496

497

** Ignore unknown tag. - KW

498

499

if (tag == context->unknown_tag) {

500

#ifdef USE_PRETTYSRC

501

if (psrc_view)

502

context->current_attribute_number = 1; /* anything !=INVALID */

503

#endif

504

return;

505

}

506

507

508

** Binary search for attribute name.

509

510

for (low = 0, high = tag->number_of_attributes;

511

high > low;

512

diff < 0 ? (low = i+1) : (high = i)) {

513

i = (low + (high-low)/2);

514

diff = my_casecomp(attributes[i].name, s);

515

if (diff == 0) { /* success: found it */

516

context->current_attribute_number = i;

517

#ifdef USE_PRETTYSRC

518

if (psrc_view) {

519

attr_is_name = (BOOL) (attributes[i].type == HTMLA_ANAME);

520

attr_is_href = (BOOL) (attributes[i].type == HTMLA_HREF);

521

} else

522

#endif

523

{

524

context->present[i] = YES;

525

Clear_extra(context->value[i]);

526

#ifdef USE_COLOR_STYLE

527

# ifdef USE_PRETTYSRC

528

current_is_class = IS_C(attributes[i]);

529

# else

530

current_is_class = (!strcasecomp("class", s));

531

# endif

532

CTRACE((tfp, "SGML: found attribute %s, %d\n", s, current_is_class));

533

#endif

534

}

535

return;

536

} /* if */

537

538

} /* for */

539

540

CTRACE((tfp, "SGML: Unknown attribute %s for tag %s\n",

541

s, context->current_tag->name));

542

context->current_attribute_number = INVALID; /* Invalid */

543

}

544

545

546

/* Handle attribute value

547

** ----------------------

548

549

PRIVATE void handle_attribute_value ARGS2(

550

HTStream *, context,

551

CONST char *, s)

552

{

553

if (context->current_attribute_number != INVALID) {

554

StrAllocCopy_extra(context->value[context->current_attribute_number], s);

555

#ifdef USE_COLOR_STYLE

556

if (current_is_class)

557

{

558

strncpy (class_string, s, TEMPSTRINGSIZE);

559

CTRACE((tfp, "SGML: class is '%s'\n", s));

560

}

561

else

562

{

563

CTRACE((tfp, "SGML: attribute value is '%s'\n", s));

564

}

565

#endif

566

} else {

567

CTRACE((tfp, "SGML: Attribute value %s ***ignored\n", s));

568

}

569

context->current_attribute_number = INVALID; /* can't have two assignments! */

570

}

571

572

573

574

** Translate some Unicodes to Lynx special codes and output them.

575

** Special codes - ones those output depend on parsing.

576

577

** Additional issue, like handling bidirectional text if necessary

578

** may be called from here: zwnj (8204), zwj (8205), lrm (8206), rlm (8207)

579

** - currently they are ignored in SGML.c and LYCharUtils.c

580

** but also in UCdomap.c because they are non printable...

581

582

583

PRIVATE BOOL put_special_unicodes ARGS2(

584

HTStream *, context,

585

UCode_t, code)

586

{

587

/* (Tgf_nolyspcl) */

588

if (context->no_lynx_specialcodes) {

589

590

** We were asked by a "DTD" flag to not generate lynx specials. - kw

591

592

return NO;

593

}

594

595

if (code == CH_NBSP) { /* S/390 -- gil -- 0657 */

596

597

** Use Lynx special character for nbsp.

598

599

#ifdef USE_PRETTYSRC

600

if (!psrc_view)

601

#endif

602

PUTC(HT_NON_BREAK_SPACE);

603

} else if (code == CH_SHY) {

604

605

** Use Lynx special character for shy.

606

607

#ifdef USE_PRETTYSRC

608

if (!psrc_view)

609

#endif

610

PUTC(LY_SOFT_HYPHEN);

611

} else if (code == 8194 || code == 8201) {

612

613

** Use Lynx special character for ensp or thinsp.

614

615

** Originally, Lynx use space '32' as word delimiter and omits this

616

** space at end of line if word is wrapped to the next line. There

617

** are several other spaces in the Unicode repertoire and we should

618

** teach Lynx to understand them, not only as regular characters but

619

** in the context of line wrapping. Unfortunately, if we use

620

** HT_EN_SPACE we override the chartrans tables for those spaces

621

** with a single '32' for all (but do line wrapping more fancy).

622

623

** We may treat emsp as one or two ensp (below).

624

625

#ifdef USE_PRETTYSRC

626

if (!psrc_view)

627

#endif

628

PUTC(HT_EN_SPACE);

629

} else if (code == 8195) {

630

631

** Use Lynx special character for emsp.

632

633

#ifdef USE_PRETTYSRC

634

if (!psrc_view) {

635

#endif

636

/* PUTC(HT_EN_SPACE); let's stay with a single space :) */

637

PUTC(HT_EN_SPACE);

638

#ifdef USE_PRETTYSRC

639

}

640

#endif

641

} else {

642

643

** Return NO if nothing done.

644

645

return NO;

646

}

647

648

** We have handled it.

649

650

return YES;

651

}

652

653

#ifdef USE_PRETTYSRC

654

PRIVATE void put_pretty_entity ARGS2(HTStream *, context, int, term)

655

{

656

PSRCSTART(entity);

657

PUTC('&');

658

PUTS(entity_string);

659

if (term)

660

PUTC((char)term);

661

PSRCSTOP(entity);

662

}

663

664

PRIVATE void put_pretty_number ARGS1(HTStream *, context)

665

{

666

PSRCSTART(entity);

667

PUTS( (context->isHex ? "&#x" : "&#") );

668

PUTS(entity_string);

669

PUTC(';');

670

PSRCSTOP(entity);

671

}

672

#endif /* USE_PRETTYSRC */

673

674

/* Handle entity

675

** -------------

676

677

** On entry,

678

** s contains the entity name zero terminated

679

** Bugs:

680

** If the entity name is unknown, the terminator is treated as

681

** a printable non-special character in all cases, even if it is '<'

682

** Bug-fix:

683

** Modified SGML_character() so we only come here with terminator

684

** as '\0' and check a FoundEntity flag. -- Foteos Macrides

685

686

** Modified more (for use with Lynx character translation code):

687

688

PRIVATE char replace_buf [64]; /* buffer for replacement strings */

689

PRIVATE BOOL FoundEntity = FALSE;

690

691

PRIVATE void handle_entity ARGS2(

692

HTStream *, context,

693

char, term)

694

{

695

UCode_t code;

696

long uck = -1;

697

CONST char *s = context->string->data;

698

699

700

** Handle all entities normally. - FM

701

702

FoundEntity = FALSE;

703

if ((code = HTMLGetEntityUCValue(s)) != 0) {

704

705

** We got a Unicode value for the entity name.

706

** Check for special Unicodes. - FM

707

708

if (put_special_unicodes(context, code)) {

709

#ifdef USE_PRETTYSRC

710

if (psrc_view) {

711

put_pretty_entity(context, term);

712

}

713

#endif

714

FoundEntity = TRUE;

715

return;

716

}

717

718

** Seek a translation from the chartrans tables.

719

720

if ((uck = UCTransUniChar(code, context->outUCLYhndl)) >= 32 &&

721

/* =============== work in ASCII below here =============== S/390 -- gil -- 0672 */

722

uck < 256 &&

723

(uck < 127 ||

724

uck >= LYlowest_eightbit[context->outUCLYhndl])) {

725

#ifdef USE_PRETTYSRC

726

if (psrc_view) {

727

put_pretty_entity(context, term);

728

} else

729

#endif

730

PUTC(FROMASCII((char)uck));

731

FoundEntity = TRUE;

732

return;

733

} else if ((uck == -4 ||

734

(context->T.repl_translated_C0 &&

735

uck > 0 && uck < 32)) &&

736

737

** Not found; look for replacement string.

738

739

(uck = UCTransUniCharStr(replace_buf, 60, code,

740

context->outUCLYhndl, 0) >= 0)) {

741

#ifdef USE_PRETTYSRC

742

if (psrc_view) {

743

put_pretty_entity(context, term);

744

} else

745

#endif

746

PUTS(replace_buf);

747

FoundEntity = TRUE;

748

return;

749

}

750

751

** If we're displaying UTF-8, try that now. - FM

752

753

#ifndef USE_PRETTYSRC

754

if (context->T.output_utf8 && PUTUTF8(code)) {

755

FoundEntity = TRUE;

756

return;

757

}

758

#else

759

if (context->T.output_utf8 && (psrc_view ?

760

(UCPutUtf8_charstring((HTStream *)context->target,

761

(putc_func_t*)(fake_put_character), code)): PUTUTF8(code) ) ) {

762

763

if (psrc_view) {

764

put_pretty_entity(context, term);

765

}

766

767

FoundEntity = TRUE;

768

return;

769

}

770

#endif

771

772

** If it's safe ASCII, use it. - FM

773

774

if (code >= 32 && code < 127) {

775

#ifdef USE_PRETTYSRC

776

if (psrc_view) {

777

put_pretty_entity(context, term);

778

} else

779

#endif

780

781

PUTC(FROMASCII((char)code));

782

FoundEntity = TRUE;

783

return;

784

}

785

/* =============== work in ASCII above here =============== S/390 -- gil -- 0682 */

786

787

** Ignore zwnj (8204) and zwj (8205), if we get to here.

788

** Note that zwnj may have been handled as

789

** by the calling function. - FM

790

791

if (!strcmp(s, "zwnj") ||

792

!strcmp(s, "zwj")) {

793

CTRACE((tfp, "handle_entity: Ignoring '%s'.\n", s));

794

#ifdef USE_PRETTYSRC

795

if (psrc_view) {

796

put_pretty_entity(context, term);

797

}

798

#endif

799

FoundEntity = TRUE;

800

return;

801

}

802

803

** Ignore lrm (8206), and rln (8207), if we get to here. - FM

804

805

if (!strcmp(s, "lrm") ||

806

!strcmp(s, "rlm")) {

807

CTRACE((tfp, "handle_entity: Ignoring '%s'.\n", s));

808

#ifdef USE_PRETTYSRC

809

if (psrc_view) {

810

put_pretty_entity(context, term);

811

}

812

#endif

813

FoundEntity = TRUE;

814

return;

815

}

816

}

817

818

819

** If entity string not found, display as text.

820

821

#ifdef USE_PRETTYSRC

822

if (psrc_view)

823

PSRCSTART(badseq);

824

#endif

825

CTRACE((tfp, "SGML: Unknown entity '%s' %ld %ld\n", s, (long)code, uck)); /* S/390 -- gil -- 0695 */

826

PUTC('&');

827

PUTS(s);

828

if (term != '\0')

829

PUTC(term);

830

#ifdef USE_PRETTYSRC

831

if (psrc_view)

832

PSRCSTOP(badseq);

833

#endif

834

}

835

836

837

/* Handle comment

838

** --------------

839

840

PRIVATE void handle_comment ARGS1(

841

HTStream *, context)

842

{

843

CONST char *s = context->string->data;

844

845

CTRACE((tfp, "SGML Comment:\n<%s>\n", s));

846

847

if (context->csi == NULL &&

848

strncmp(s, "!--#", 4) == 0 &&

849

LYCheckForCSI(context->node_anchor, (char **)&context->url) == TRUE) {

850

LYDoCSI(context->url, s, (char **)&context->csi);

851

} else {

852

LYCommentHacks(context->node_anchor, context->string->data);

853

}

854

855

return;

856

}

857

858

859

/* Handle identifier

860

** -----------------

861

862

PRIVATE void handle_identifier ARGS1(

863

HTStream *, context)

864

{

865

CONST char *s = context->string->data;

866

867

CTRACE((tfp, "SGML Identifier:\n<%s>\n", s));

868

869

return;

870

}

871

872

873

/* Handle doctype

874

** --------------

875

876

PRIVATE void handle_doctype ARGS1(

877

HTStream *, context)

878

{

879

CONST char *s = context->string->data;

880

881

CTRACE((tfp, "SGML Doctype:\n<%s>\n", s));

882

883

return;

884

}

885

886

PRIVATE void SGML_write PARAMS((

887

HTStream * me,

888

CONST char * s,

889

int l));

890

891

/* Handle marked

892

** -------------

893

894

PRIVATE void handle_marked ARGS1(

895

HTStream *, context)

896

{

897

CONST char *s = context->string->data;

898

899

CTRACE((tfp, "SGML Marked Section:\n<%s>\n", s));

900

901

if (!strncmp(context->string->data, "![INCLUDE[", 10)) {

902

context->string->data[context->string->size - 3] = '\0';

903

StrAllocCat(context->include, context->string->data + 10);

904

/* @@@ This needs to take charset into account! @@@

905

the wrong assumptions will be made about the data's

906

charset once it is in include - kw */

907

908

} else if (!strncmp(context->string->data, "![CDATA[", 8)) {

909

(*context->actions->_write)(context->target,

910

context->string->data + 8,

911

context->string->size - 11);

912

913

}

914

return;

915

}

916

917

918

/* Handle sgmlent

919

** --------------

920

921

PRIVATE void handle_sgmlent ARGS1(

922

HTStream *, context)

923

{

924

CONST char *s = context->string->data;

925

926

CTRACE((tfp, "SGML Entity Declaration:\n<%s>\n", s));

927

928

return;

929

}

930

931

932

/* Handle sgmlent

933

** --------------

934

935

PRIVATE void handle_sgmlele ARGS1(

936

HTStream *, context)

937

{

938

CONST char *s = context->string->data;

939

940

CTRACE((tfp, "SGML Element Declaration:\n<%s>\n", s));

941

942

return;

943

}

944

945

946

/* Handle sgmlatt

947

** --------------

948

949

PRIVATE void handle_sgmlatt ARGS1(

950

HTStream *, context)

951

{

952

CONST char *s = context->string->data;

953

954

CTRACE((tfp, "SGML Attribute Declaration:\n<%s>\n", s));

955

956

return;

957

}

958

959

960

* Convenience macros - tags (elements) are identified sometimes

961

* by an int or enum value ('TAGNUM'), sometimes

962

* by a pointer to HTTag ('TAGP'). - kw

963

964

#define TAGNUM_OF_TAGP(t) (t - context->dtd->tags)

965

#define TAGP_OF_TAGNUM(e) (context->dtd->tags + e)

966

967

968

* The following implement special knowledge about OBJECT.

969

* As long as HTML_OBJECT is the only tag for which an alternative

970

* variant exist, they can be simple macros. - kw

971

972

/* does 'TAGNUM' e have an alternative (variant) parsing mode? */

973

#define HAS_ALT_TAGNUM(e) (e == HTML_OBJECT)

974

975

/* return 'TAGNUM' of the alternative mode for 'TAGNUM' e, if any. */

976

#define ALT_TAGNUM(e) ((e == HTML_OBJECT) ? HTML_ALT_OBJECT : e)

977

978

/* return 'TAGNUM' of the normal mode for 'TAGNUM' e which may be alt. */

979

#define NORMAL_TAGNUM(e) ((e >= HTML_ELEMENTS) ? HTML_OBJECT : e)

980

981

/* More convenience stuff. - kw */

982

#define ALT_TAGP_OF_TAGNUM(e) TAGP_OF_TAGNUM(ALT_TAGNUM(e))

983

#define NORMAL_TAGP_OF_TAGNUM(e) TAGP_OF_TAGNUM(NORMAL_TAGNUM(e))

984

985

#define ALT_TAGP(t) ALT_TAGP_OF_TAGNUM(TAGNUM_OF_TAGP(t))

986

#define NORMAL_TAGP(t) NORMAL_TAGP_OF_TAGNUM(TAGNUM_OF_TAGP(t))

987

988

989

#ifdef EXTENDED_HTMLDTD

990

991

PRIVATE BOOL element_valid_within ARGS3(

992

HTTag *, new_tag,

993

HTTag *, stacked_tag,

994

BOOL, direct)

995

{

996

TagClass usecontains, usecontained;

997

if (!stacked_tag || !new_tag)

998

return YES;

999

usecontains = (direct ? stacked_tag->contains : stacked_tag->icontains);

1000

usecontained = (direct ? new_tag->contained : new_tag->icontained);

1001

if (new_tag == stacked_tag)

1002

return (BOOL) ((Tgc_same & usecontains) &&

1003

(Tgc_same & usecontained));

1004

else

1005

return (BOOL) ((new_tag->tagclass & usecontains) &&

1006

(stacked_tag->tagclass & usecontained));

1007

}

1008

1009

typedef enum {

1010

close_NO = 0,

1011

close_error = 1,

1012

close_valid = 2

1013

} canclose_t;

1014

1015

PRIVATE canclose_t can_close ARGS2(

1016

HTTag *, new_tag,

1017

HTTag *, stacked_tag)

1018

{

1019

if (!stacked_tag)

1020

return close_NO;

1021

if (stacked_tag->flags & Tgf_endO)

1022

return close_valid;

1023

else if (new_tag == stacked_tag)

1024

return ((Tgc_same & new_tag->canclose) ? close_error : close_NO);

1025

else

1026

return ((stacked_tag->tagclass & new_tag->canclose) ?

1027

close_error : close_NO);

1028

}

1029

1030

PRIVATE void do_close_stacked ARGS1(

1031

HTStream *, context)

1032

{

1033

HTElement * stacked = context->element_stack;

1034

HTMLElement e;

1035

if (!stacked)

1036

return; /* stack was empty */

1037

if (context->inSELECT && !strcasecomp(stacked->tag->name, "SELECT")) {

1038

context->inSELECT = FALSE;

1039

}

1040

e = NORMAL_TAGNUM(TAGNUM_OF_TAGP(stacked->tag));

1041

#ifdef USE_PRETTYSRC

1042

if (!psrc_view) /* Don't actually pass call on if viewing psrc - kw */

1043

#endif

1044

(*context->actions->end_element)(

1045

context->target,

1046

1047

(char **)&context->include);

1048

context->element_stack = stacked->next;

1049

pool_free(stacked);

1050

context->no_lynx_specialcodes = context->element_stack ?

1051

(context->element_stack->tag->flags & Tgf_nolyspcl) : NO;

1052

}

1053

1054

PRIVATE int is_on_stack ARGS2(

1055

HTStream *, context,

1056

HTTag *, old_tag)

1057

{

1058

HTElement * stacked = context->element_stack;

1059

int i = 1;

1060

for (; stacked; stacked = stacked->next, i++) {

1061

if (stacked->tag == old_tag ||

1062

stacked->tag == ALT_TAGP(old_tag))

1063

return i;

1064

}

1065

return 0;

1066

}

1067

#endif /* EXTENDED_HTMLDTD */

1068

1069

/* End element

1070

** -----------

1071

1072

PRIVATE void end_element ARGS2(

1073

HTStream *, context,

1074

HTTag *, old_tag)

1075

{

1076

#ifdef EXTENDED_HTMLDTD

1077

1078

BOOL extra_action_taken = NO;

1079

canclose_t canclose_check = close_valid;

1080

int stackpos = is_on_stack(context, old_tag);

1081

1082

if (!Old_DTD) {

1083

while (canclose_check != close_NO &&

1084

context->element_stack &&

1085

(stackpos > 1 || (!extra_action_taken && stackpos == 0))) {

1086

if (stackpos == 0 && (old_tag->flags & Tgf_startO) &&

1087

element_valid_within(old_tag, context->element_stack->tag, YES)) {

1088

CTRACE((tfp, "SGML: </%s> ignored\n", old_tag->name));

1089

return;

1090

}

1091

canclose_check = can_close(old_tag, context->element_stack->tag);

1092

if (canclose_check != close_NO) {

1093

CTRACE((tfp, "SGML: End </%s> \t<- %s end </%s>\n",

1094

context->element_stack->tag->name,

1095

canclose_check == close_valid ? "supplied," : "***forced by",

1096

old_tag->name));

1097

do_close_stacked(context);

1098

extra_action_taken = YES;

1099

stackpos = is_on_stack(context, old_tag);

1100

}

1101

}

1102

1103

if (stackpos == 0 && old_tag->contents != SGML_EMPTY) {

1104

CTRACE((tfp, "SGML: Still open %s, ***no open %s for </%s>\n",

1105

context->element_stack ?

1106

context->element_stack->tag->name : "none",

1107

old_tag->name,

1108

old_tag->name));

1109

return;

1110

}

1111

if (stackpos > 1) {

1112

CTRACE((tfp, "SGML: Nesting <%s>...<%s> \t<- ***invalid end </%s>\n",

1113

old_tag->name,

1114

context->element_stack->tag->name,

1115

old_tag->name));

1116

return;

1117

}

1118

}

1119

/* Now let the non-extended code deal with the rest. - kw */

1120

1121

#endif /* EXTENDED_HTMLDTD */

1122

1123

1124

** If we are in a SELECT block, ignore anything

1125

** but a SELECT end tag. - FM

1126

1127

if (context->inSELECT) {

1128

if (!strcasecomp(old_tag->name, "SELECT")) {

1129

1130

** Turn off the inSELECT flag and fall through. - FM

1131

1132

context->inSELECT = FALSE;

1133

} else {

1134

1135

** Ignore the end tag. - FM

1136

1137

CTRACE((tfp, "SGML: ***Ignoring end tag </%s> in SELECT block.\n",

1138

old_tag->name));

1139

return;

1140

}

1141

}

1142

1143

** Handle the end tag. - FM

1144

1145

CTRACE((tfp, "SGML: End </%s>\n", old_tag->name));

1146

if (old_tag->contents == SGML_EMPTY) {

1147

CTRACE((tfp, "SGML: ***Illegal end tag </%s> found.\n",

1148

old_tag->name));

1149

return;

1150

}

1151

#ifdef WIND_DOWN_STACK

1152

while (context->element_stack) /* Loop is error path only */

1153

#else

1154

if (context->element_stack) /* Substitute and remove one stack element */

1155

#endif /* WIND_DOWN_STACK */

1156

{

1157

int status = HT_OK;

1158

HTMLElement e;

1159

HTElement * N = context->element_stack;

1160

HTTag * t = (N->tag != old_tag) ? NORMAL_TAGP(N->tag) : N->tag;

1161

1162

if (old_tag != t) { /* Mismatch: syntax error */

1163

if (context->element_stack->next) { /* This is not the last level */

1164

CTRACE((tfp, "SGML: Found </%s> when expecting </%s>. </%s> ***assumed.\n",

1165

old_tag->name, t->name, t->name));

1166

} else { /* last level */

1167

CTRACE((tfp, "SGML: Found </%s> when expecting </%s>. </%s> ***Ignored.\n",

1168

old_tag->name, t->name, old_tag->name));

1169

return; /* Ignore */

1170

}

1171

}

1172

1173

e = NORMAL_TAGNUM(TAGNUM_OF_TAGP(t));

1174

CTRACE2(TRACE_SGML, (tfp, "tagnum(%p) = %d\n", t, e));

1175

#ifdef USE_PRETTYSRC

1176

if (!psrc_view) /* Don't actually pass call on if viewing psrc - kw */

1177

#endif

1178

status = (*context->actions->end_element)(context->target,

1179

e, (char **)&context->include);

1180

if (status == HT_PARSER_REOPEN_ELT) {

1181

CTRACE((tfp, "SGML: Restart <%s>\n", t->name));

1182

(*context->actions->start_element)(

1183

context->target,

1184

1185

NULL,

1186

NULL,

1187

context->current_tag_charset,

1188

(char **)&context->include);

1189

} else if (status == HT_PARSER_OTHER_CONTENT) {

1190

CTRACE((tfp, "SGML: Continue with other content model for <%s>\n", t->name));

1191

context->element_stack->tag = ALT_TAGP_OF_TAGNUM(e);

1192

} else {

1193

context->element_stack = N->next; /* Remove from stack */

1194

pool_free(N);

1195

}

1196

context->no_lynx_specialcodes = context->element_stack ?

1197

(context->element_stack->tag->flags & Tgf_nolyspcl) : NO;

1198

#ifdef WIND_DOWN_STACK

1199

if (old_tag == t)

1200

return; /* Correct sequence */

1201

#else

1202

return;

1203

#endif /* WIND_DOWN_STACK */

1204

1205

/* Syntax error path only */

1206

1207

}

1208

CTRACE((tfp, "SGML: Extra end tag </%s> found and ignored.\n",

1209

old_tag->name));

1210

}

1211

1212

1213

/* Start a element

1214

1215

PRIVATE void start_element ARGS1(

1216

HTStream *, context)

1217

{

1218

int status;

1219

HTTag * new_tag = context->current_tag;

1220

HTMLElement e = TAGNUM_OF_TAGP(new_tag);

1221

BOOL ok = FALSE;

1222

1223

#ifdef EXTENDED_HTMLDTD

1224

1225

BOOL valid = YES;

1226

BOOL direct_container = YES;

1227

BOOL extra_action_taken = NO;

1228

canclose_t canclose_check = close_valid;

1229

1230

if (!Old_DTD) {

1231

while (context->element_stack &&

1232

(canclose_check == close_valid ||

1233

(canclose_check == close_error &&

1234

new_tag == context->element_stack->tag)) &&

1235

!(valid = element_valid_within(new_tag, context->element_stack->tag,

1236

direct_container))) {

1237

canclose_check = can_close(new_tag, context->element_stack->tag);

1238

if (canclose_check != close_NO) {

1239

CTRACE((tfp, "SGML: End </%s> \t<- %s start <%s>\n",

1240

context->element_stack->tag->name,

1241

canclose_check == close_valid ? "supplied," : "***forced by",

1242

new_tag->name));

1243

do_close_stacked(context);

1244

extra_action_taken = YES;

1245

if (canclose_check == close_error)

1246

direct_container = NO;

1247

} else {

1248

CTRACE((tfp, "SGML: Still open %s \t<- ***invalid start <%s>\n",

1249

context->element_stack->tag->name,

1250

new_tag->name));

1251

}

1252

}

1253

if (context->element_stack && !valid &&

1254

(context->element_stack->tag->flags & Tgf_strict) &&

1255

!(valid = element_valid_within(new_tag, context->element_stack->tag,

1256

direct_container))) {

1257

CTRACE((tfp, "SGML: Still open %s \t<- ***ignoring start <%s>\n",

1258

context->element_stack->tag->name,

1259

new_tag->name));

1260

return;

1261

}

1262

1263

if (context->element_stack && !extra_action_taken &&

1264

canclose_check == close_NO && !valid && (new_tag->flags & Tgf_mafse)) {

1265

BOOL has_attributes = NO;

1266

int i = 0;

1267

for (; i< new_tag->number_of_attributes && !has_attributes; i++)

1268

has_attributes = context->present[i];

1269

if (!has_attributes) {

1270

CTRACE((tfp, "SGML: Still open %s, ***converting invalid <%s> to </%s>\n",

1271

context->element_stack->tag->name,

1272

new_tag->name,

1273

new_tag->name));

1274

end_element(context, new_tag);

1275

return;

1276

}

1277

}

1278

1279

if (context->element_stack &&

1280

canclose_check == close_error && !(valid =

1281

element_valid_within(

1282

new_tag,

1283

context->element_stack->tag,

1284

direct_container))) {

1285

CTRACE((tfp, "SGML: Still open %s \t<- ***invalid start <%s>\n",

1286

context->element_stack->tag->name,

1287

new_tag->name));

1288

}

1289

}

1290

/* Fall through to the non-extended code - kw */

1291

1292

#endif /* EXTENDED_HTMLDTD */

1293

1294

1295

** If we are not in a SELECT block, check if this is

1296

** a SELECT start tag. Otherwise (i.e., we are in a

1297

** SELECT block) accept only OPTION as valid, terminate

1298

** the SELECT block if it is any other form-related

1299

** element, and otherwise ignore it. - FM

1300

1301

if (!context->inSELECT) {

1302

1303

** We are not in a SELECT block, so check if this starts one. - FM

1304

** (frequent case!)

1305

1306

/* my_casecomp() - optimized by the first character */

1307

if (!my_casecomp(new_tag->name, "SELECT")) {

1308

1309

** Set the inSELECT flag and fall through. - FM

1310

1311

context->inSELECT = TRUE;

1312

}

1313

} else {

1314

1315

** We are in a SELECT block. - FM

1316

1317

if (strcasecomp(new_tag->name, "OPTION")) {

1318

1319

** Ugh, it is not an OPTION. - FM

1320

1321

switch (e) {

1322

case HTML_INPUT: case HTML_TEXTAREA: case HTML_SELECT:

1323

case HTML_BUTTON: case HTML_FIELDSET: case HTML_LABEL:

1324

case HTML_LEGEND: case HTML_FORM:

1325

ok = TRUE;

1326

break;

1327

default:

1328

break;

1329

}

1330

if (ok)

1331

{

1332

1333

** It is another form-related start tag, so terminate

1334

** the current SELECT block and fall through. - FM

1335

1336

CTRACE((tfp, "SGML: ***Faking SELECT end tag before <%s> start tag.\n",

1337

new_tag->name));

1338

end_element(context, SGMLFindTag(context->dtd, "SELECT"));

1339

} else {

1340

1341

** Ignore the start tag. - FM

1342

1343

CTRACE((tfp, "SGML: ***Ignoring start tag <%s> in SELECT block.\n",

1344

new_tag->name));

1345

return;

1346

}

1347

}

1348

}

1349

1350

** Handle the start tag. - FM

1351

1352

CTRACE((tfp, "SGML: Start <%s>\n", new_tag->name));

1353

status = (*context->actions->start_element)(

1354

context->target,

1355

TAGNUM_OF_TAGP(new_tag),

1356

context->present,

1357

(CONST char**) context->value, /* coerce type for think c */

1358

context->current_tag_charset,

1359

(char **)&context->include);

1360

if (status == HT_PARSER_OTHER_CONTENT)

1361

new_tag = ALT_TAGP(new_tag); /* this is only returned for OBJECT */

1362

if (new_tag->contents != SGML_EMPTY) { /* i.e., tag not empty */

1363

HTElement * N = pool_alloc();

1364

if (N == NULL)

1365

outofmem(__FILE__, "start_element");

1366

N->next = context->element_stack;

1367

N->tag = new_tag;

1368

context->element_stack = N;

1369

context->no_lynx_specialcodes = (new_tag->flags & Tgf_nolyspcl);

1370

1371

} else if (e == HTML_META ) {

1372

1373

** Check for result of META tag. - KW & FM

1374

1375

change_chartrans_handling(context);

1376

}

1377

}

1378

1379

1380

/* Find Tag in DTD tag list

1381

** ------------------------

1382

1383

** On entry,

1384

** dtd points to dtd structure including valid tag list

1385

** string points to name of tag in question

1386

1387

** On exit,

1388

** returns:

1389

** NULL tag not found

1390

** else address of tag structure in dtd

1391

1392

PUBLIC HTTag * SGMLFindTag ARGS2(

1393

CONST SGML_dtd*, dtd,

1394

CONST char *, s)

1395

{

1396

int high, low, i, diff;

1397

static HTTag* last[64] = {NULL}; /*optimize using the previous results*/

1398

HTTag** res = last + (UCH(*s) % 64); /*pointer arithmetic*/

1399

1400

if (*res && !strcasecomp((*res)->name, s))

1401

return *res;

1402

1403

for (low = 0, high=dtd->number_of_tags;

1404

high > low;

1405

diff < 0 ? (low = i+1) : (high = i)) { /* Binary search */

1406

i = (low + (high-low)/2);

1407

/* my_casecomp() - optimized by the first character, NOT_ASCII ok */

1408

diff = my_casecomp(dtd->tags[i].name, s); /* Case insensitive */

1409

if (diff == 0) { /* success: found it */

1410

*res = &dtd->tags[i];

1411

return *res;

1412

}

1413

}

1414

if (IsNmStart(*s)) {

1415

1416

** Unrecognized, but may be valid. - KW

1417

1418

return &HTTag_unrecognized;

1419

}

1420

return NULL;

1421

}

1422

1423

/*________________________________________________________________________

1424

** Public Methods

1425

1426

1427

1428

/* Could check that we are back to bottom of stack! @@ */

1429

/* Do check! - FM */

1430

/* */

1431

PRIVATE void SGML_free ARGS1(

1432

HTStream *, context)

1433

{

1434

int i;

1435

HTElement * cur;

1436

HTTag * t;

1437

1438

1439

** Free the buffers. - FM

1440

1441

FREE(context->recover);

1442

FREE(context->url);

1443

FREE(context->csi);

1444

FREE(context->include);

1445

FREE(context->active_include);

1446

1447

1448

** Wind down stack if any elements are open. - FM

1449

1450

while (context->element_stack) {

1451

cur = context->element_stack;

1452

t = cur->tag;

1453

context->element_stack = cur->next; /* Remove from stack */

1454

pool_free(cur);

1455

#ifdef USE_PRETTYSRC

1456

if (!psrc_view) /* Don't actually call on target if viewing psrc - kw */

1457

#endif

1458

(*context->actions->end_element)(context->target,

1459

NORMAL_TAGNUM(TAGNUM_OF_TAGP(t)),

1460

(char **)&context->include);

1461

FREE(context->include);

1462

}

1463

1464

1465

** Finish off the target. - FM

1466

1467

(*context->actions->_free)(context->target);

1468

1469

1470

** Free the strings and context structure. - FM

1471

1472

HTChunkFree(context->string);

1473

for (i = 0; i < MAX_ATTRIBUTES; i++)

1474

FREE_extra(context->value[i]);

1475

FREE(context);

1476

1477

#ifdef USE_PRETTYSRC

1478

sgml_in_psrc_was_initialized = FALSE;

1479

#endif

1480

}

1481

1482

PRIVATE void SGML_abort ARGS2(

1483

HTStream *, context,

1484

HTError, e)

1485

{

1486

int i;

1487

HTElement * cur;

1488

1489

1490

** Abort the target. - FM

1491

1492

(*context->actions->_abort)(context->target, e);

1493

1494

1495

** Free the buffers. - FM

1496

1497

FREE(context->recover);

1498

FREE(context->include);

1499

FREE(context->active_include);

1500

FREE(context->url);

1501

FREE(context->csi);

1502

1503

1504

** Free stack memory if any elements were left open. - KW

1505

1506

while (context->element_stack) {

1507

cur = context->element_stack;

1508

context->element_stack = cur->next; /* Remove from stack */

1509

pool_free(cur);

1510

}

1511

1512

1513

** Free the strings and context structure. - FM

1514

1515

HTChunkFree(context->string);

1516

for (i = 0; i < MAX_ATTRIBUTES; i++)

1517

FREE_extra(context->value[i]);

1518

FREE(context);

1519

1520

#ifdef USE_PRETTYSRC

1521

sgml_in_psrc_was_initialized = FALSE;

1522

#endif

1523

}

1524

1525

1526

/* Read and write user callback handle

1527

** -----------------------------------

1528

1529

** The callbacks from the SGML parser have an SGML context parameter.

1530

** These calls allow the caller to associate his own context with a

1531

** particular SGML context.

1532

1533

1534

#ifdef CALLERDATA

1535

PUBLIC void* SGML_callerData ARGS1(

1536

HTStream *, context)

1537

{

1538

return context->callerData;

1539

}

1540

1541

PUBLIC void SGML_setCallerData ARGS2(

1542

HTStream *, context,

1543

void*, data)

1544

{

1545

context->callerData = data;

1546

}

1547

#endif /* CALLERDATA */

1548

1549

PRIVATE void SGML_character ARGS2(

1550

HTStream *, context,

1551

char, c_in)

1552

{

1553

CONST SGML_dtd *dtd = context->dtd;

1554

HTChunk *string = context->string;

1555

CONST char * EntityName;

1556

HTTag * testtag = NULL;

1557

BOOLEAN chk; /* Helps (?) walk through all the else ifs... */

1558

UCode_t clong, uck = 0; /* Enough bits for UCS4 ... */

1559

int testlast;

1560

#ifdef CJK_EX

1561

unsigned char c;

1562

#else

1563

char c;

1564

#endif

1565

char saved_char_in = '\0';

1566

1567

1568

** Now some fun with the preprocessor.

1569

** Use copies for c and unsign_c == clong, so that

1570

** we can revert back to the unchanged c_in. - KW

1571

1572

#define unsign_c clong

1573

1574

c = c_in;

1575

clong = UCH(c); /* a.k.a. unsign_c */

1576

1577

if (context->T.decode_utf8) {

1578

1579

** Combine UTF-8 into Unicode.

1580

** Incomplete characters silently ignored.

1581

** From Linux kernel's console.c. - KW

1582

1583

if (TOASCII(UCH(c)) > 127) { /* S/390 -- gil -- 0710 */

1584

1585

** We have an octet from a multibyte character. - FM

1586

1587

if (context->utf_count > 0 && (TOASCII(c) & 0xc0) == 0x80) {

1588

context->utf_char = (context->utf_char << 6) | (TOASCII(c) & 0x3f);

1589

context->utf_count--;

1590

*(context->utf_buf_p) = c;

1591

(context->utf_buf_p)++;

1592

if (context->utf_count == 0) {

1593

1594

** We have all of the bytes, so terminate

1595

** the buffer and set 'clong' to the UCode_t

1596

** value. - FM

1597

1598

*(context->utf_buf_p) = '\0';

1599

clong = context->utf_char;

1600

if (clong < 256) {

1601

c = ((char)(clong & 0xff));

1602

}

1603

goto top1;

1604

} else {

1605

1606

** Wait for more. - KW

1607

1608

return;

1609

}

1610

} else {

1611

1612

** Start handling a new multibyte character. - FM

1613

1614

context->utf_buf_p = context->utf_buf;

1615

*(context->utf_buf_p) = c;

1616

(context->utf_buf_p)++;

1617

if ((c & 0xe0) == 0xc0) {

1618

context->utf_count = 1;

1619

context->utf_char = (c & 0x1f);

1620

} else if ((c & 0xf0) == 0xe0) {

1621

context->utf_count = 2;

1622

context->utf_char = (c & 0x0f);

1623

} else if ((c & 0xf8) == 0xf0) {

1624

context->utf_count = 3;

1625

context->utf_char = (c & 0x07);

1626

} else if ((c & 0xfc) == 0xf8) {

1627

context->utf_count = 4;

1628

context->utf_char = (c & 0x03);

1629

} else if ((c & 0xfe) == 0xfc) {

1630

context->utf_count = 5;

1631

context->utf_char = (c & 0x01);

1632

} else {

1633

1634

** Garbage. - KW

1635

1636

context->utf_count = 0;

1637

context->utf_buf_p = context->utf_buf;

1638

*(context->utf_buf_p) = '\0';

1639

}

1640

1641

** Wait for more. - KW

1642

1643

return;

1644

}

1645

} else {

1646

1647

** Got an ASCII char. - KW

1648

1649

context->utf_count = 0;

1650

context->utf_buf_p = context->utf_buf;

1651

*(context->utf_buf_p) = '\0';

1652

/* goto top; */

1653

}

1654

} /* end of context->T.decode_utf8 S/390 -- gil -- 0726 */

1655

1656

#ifdef NOTDEFINED

1657

1658

** If we have a koi8-r input and do not have

1659

** koi8-r as the output, save the raw input

1660

** in saved_char_in before we potentially

1661

** convert it to Unicode. - FM

1662

1663

if (context->T.strip_raw_char_in)

1664

saved_char_in = c;

1665

#endif /* NOTDEFINED */

1666

1667

1668

** If we want the raw input converted

1669

** to Unicode, try that now. - FM

1670

1671

if (context->T.trans_to_uni &&

1672

((TOASCII(unsign_c) >= LYlowest_eightbit[context->inUCLYhndl]) || /* S/390 -- gil -- 0744 */

1673

(unsign_c < ' ' && unsign_c != 0 &&

1674

context->T.trans_C0_to_uni))) {

1675

1676

** Convert the octet to Unicode. - FM

1677

1678

clong = UCTransToUni(c, context->inUCLYhndl);

1679

if (clong > 0) {

1680

saved_char_in = c;

1681

if (clong < 256) {

1682

c = FROMASCII((char)clong);

1683

}

1684

}

1685

goto top1;

1686

} else if (unsign_c < ' ' && unsign_c != 0 && /* S/390 -- gil -- 0768 */

1687

context->T.trans_C0_to_uni) {

1688

1689

** This else if may be too ugly to keep. - KW

1690

1691

if (context->T.trans_from_uni &&

1692

(((clong = UCTransToUni(c, context->inUCLYhndl)) >= ' ') ||

1693

(context->T.transp &&

1694

(clong = UCTransToUni(c, context->inUCLYhndl)) > 0))) {

1695

saved_char_in = c;

1696

if (clong < 256) {

1697

c = FROMASCII((char)clong);

1698

}

1699

goto top1;

1700

} else {

1701

uck = -1;

1702

if (context->T.transp) {

1703

uck = UCTransCharStr(replace_buf, 60, c,

1704

context->inUCLYhndl,

1705

context->inUCLYhndl, NO);

1706

}

1707

if (!context->T.transp || uck < 0) {

1708

uck = UCTransCharStr(replace_buf, 60, c,

1709

context->inUCLYhndl,

1710

context->outUCLYhndl, YES);

1711

}

1712

if (uck == 0) {

1713

return;

1714

} else if (uck < 0) {

1715

goto top0a;

1716

}

1717

c = replace_buf[0];

1718

if (c && replace_buf[1]) {

1719

if (context->state == S_text) {

1720

PUTS(replace_buf);

1721

return;

1722

}

1723

StrAllocCat(context->recover, replace_buf + 1);

1724

}

1725

goto top0a;

1726

} /* Next line end of ugly stuff for C0. - KW */

1727

} else { /* end of context->T.trans_to_uni S/390 -- gil -- 0791 */

1728

goto top0a;

1729

}

1730

1731

1732

** At this point we have either unsign_c a.k.a. clong in

1733

** Unicode (and c in latin1 if clong is in the latin1 range),

1734

** or unsign_c and c will have to be passed raw. - KW

1735

1736

1737

** We jump up to here from below if we have

1738

** stuff in the recover, insert, or csi buffers

1739

** to process. We zero saved_char_in, in effect

1740

** as a flag that the octet in not that of the

1741

** actual call to this function. This may be OK

1742

** for now, for the stuff this function adds to

1743

** its recover buffer, but it might not be for

1744

** stuff other functions added to the insert or

1745

** csi buffer, so bear that in mind. - FM

1746

** Stuff from the recover buffer is now handled

1747

** as UTF-8 if we can expect that's what it is,

1748

** and in that case we don't come back up here. - kw

1749

1750

top:

1751

saved_char_in = '\0';

1752

1753

** We jump to here from above when we don't have

1754

** UTF-8 input, haven't converted to Unicode, and

1755

** want clong set to the input octet (unsigned)

1756

** without zeroing its saved_char_in copy (which

1757

** is signed). - FM

1758

1759

top0a:

1760

*(context->utf_buf) = '\0';

1761

clong = UCH(c);

1762

1763

** We jump to here from above if we have converted

1764

** the input, or a multibyte sequence across calls,

1765

** to a Unicode value and loaded it into clong (to

1766

** which unsign_c has been defined), and from below

1767

** when we are recycling a character (e.g., because

1768

** it terminated an entity but is not the standard

1769

** semi-colon). The character will already have

1770

** been put through the Unicode conversions. - FM

1771

1772

top1:

1773

1774

** Ignore low ISO 646 7-bit control characters

1775

** if HTCJK is not set. - FM

1776

1777

1778

** Works for both ASCII and EBCDIC. -- gil

1779

*/ /* S/390 -- gil -- 0811 */

1780

if (TOASCII(unsign_c) < 32 &&

1781

c != '\t' && c != '\n' && c != '\r' &&

1782

HTCJK == NOCJK)

1783

goto after_switch;

1784

1785

1786

** Ignore 127 if we don't have HTPassHighCtrlRaw

1787

** or HTCJK set. - FM

1788

1789

#define PASSHICTRL (context->T.transp || \

1790

unsign_c >= LYlowest_eightbit[context->inUCLYhndl])

1791

if (TOASCII(c) == 127 && /* S/390 -- gil -- 0830 */

1792

!(PASSHICTRL || HTCJK != NOCJK))

1793

goto after_switch;

1794

1795

1796

** Ignore 8-bit control characters 128 - 159 if

1797

** neither HTPassHighCtrlRaw nor HTCJK is set. - FM

1798

1799

if (TOASCII(unsign_c) > 127 && TOASCII(unsign_c) < 160 && /* S/390 -- gil -- 0847 */

1800

!(PASSHICTRL || HTCJK != NOCJK))

1801

goto after_switch;

1802

1803

/* Almost all CJK characters are double byte but only Japanese

1804

* JIS X0201 Kana is single byte. To prevent to fail SGML parsing

1805

* we have to care them here. -- TH

1806

1807

if ((HTCJK==JAPANESE) && (context->state==S_in_kanji) &&

1808

!IS_JAPANESE_2BYTE(context->kanji_buf, UCH(c))) {

1809

#ifdef CONV_JISX0201KANA_JISX0208KANA

1810

if (IS_SJIS_X0201KANA(context->kanji_buf)) {

1811

unsigned char sjis_hi, sjis_lo;

1812

JISx0201TO0208_SJIS(context->kanji_buf, &sjis_hi, &sjis_lo);

1813

PUTC(sjis_hi);

1814

PUTC(sjis_lo);

1815

}

1816

else

1817

#endif

1818

PUTC(context->kanji_buf);

1819

context->state = S_text;

1820

}

1821

1822

1823

** Handle character based on context->state.

1824

1825

CTRACE2(TRACE_SGML, (tfp, "SGML before %s|%.*s|%c|\n",

1826

state_name(context->state),

1827

string->size,

1828

NonNull(string->data),

1829

UCH(c)));

1830

switch(context->state) {

1831

1832

case S_in_kanji:

1833

1834

** Note that if we don't have a CJK input, then this

1835

** is not the second byte of a CJK di-byte, and we're

1836

** trashing the input. That's why 8-bit characters

1837

** followed by, for example, '<' can cause the tag to

1838

** be treated as text, not markup. We could try to deal

1839

** with it by holding each first byte and then checking

1840

** byte pairs, but that doesn't seem worth the overhead

1841

** (see below). - FM

1842

1843

context->state = S_text;

1844

PUTC(context->kanji_buf);

1845

PUTC(c);

1846

break;

1847

1848

case S_tagname_slash:

1849

1850

* We had something link "<name/" so far, set state to S_text

1851

* but keep context->slashedtag as as a flag; except if we get

1852

* '>' directly after the "<name/", and really have a tag for

1853

* that name in context->slashedtag, in which case keep state as

1854

* is and let code below deal with it. - kw

1855

1856

if (!(c == '>' && context->slashedtag && TOASCII(unsign_c) < 127)) {

1857

context->state = S_text;

1858

} /* fall through in any case! */

1859

1860

case S_text:

1861

if (HTCJK != NOCJK && (TOASCII(c) & 0200) != 0) { /* S/390 -- gil -- 0864 */

1862

1863

** Setting up for Kanji multibyte handling (based on

1864

** Takuya ASADA's (asada@three-a.co.jp) CJK Lynx).

1865

** Note that if the input is not in fact CJK, the

1866

** next byte also will be mishandled, as explained

1867

** above. Toggle raw mode off in such cases, or

1868

** select the "7 bit approximations" display

1869

** character set, which is largely equivalent

1870

** to having raw mode off with CJK. - FM

1871

1872

context->state = S_in_kanji;

1873

context->kanji_buf = c;

1874

break;

1875

} else if (HTCJK != NOCJK && TOASCII(c) == '\033') { /* S/390 -- gil -- 0881 */

1876

1877

** Setting up for CJK escape sequence handling (based on

1878

** Takuya ASADA's (asada@three-a.co.jp) CJK Lynx). - FM

1879

1880

context->state = S_esc;

1881

PUTC(c);

1882

break;

1883

}

1884

1885

if (c == '&' || c == '<') {

1886

#ifdef USE_PRETTYSRC

1887

if (psrc_view) { /*there is nothing useful in the element_stack*/

1888

testtag = context->current_tag;

1889

} else

1890

#endif

1891

{

1892

testtag = context->element_stack ?

1893

context->element_stack->tag : NULL;

1894

}

1895

}

1896

1897

if (c == '&' && TOASCII(unsign_c) < 127 && /* S/390 -- gil -- 0898 */

1898

(!testtag ||

1899

(testtag->contents == SGML_MIXED ||

1900

testtag->contents == SGML_ELEMENT ||

1901

testtag->contents == SGML_PCDATA ||

1902

#ifdef USE_PRETTYSRC

1903

testtag->contents == SGML_EMPTY ||

1904

#endif

1905

testtag->contents == SGML_RCDATA))) {

1906

1907

** Setting up for possible entity, without the leading '&'. - FM

1908

1909

string->size = 0;

1910

context->state = S_ero;

1911

} else if (c == '<' && TOASCII(unsign_c) < 127) { /* S/390 -- gil -- 0915 */

1912

1913

** Setting up for possible tag. - FM

1914

1915

string->size = 0;

1916

if (testtag && testtag->contents == SGML_PCDATA) {

1917

context->state = S_pcdata;

1918

} else if (testtag && (testtag->contents == SGML_LITTERAL

1919

|| testtag->contents == SGML_CDATA)) {

1920

context->state = S_litteral;

1921

} else if (testtag && (testtag->contents == SGML_SCRIPT)) {

1922

context->state = S_script;

1923

} else {

1924

context->state = S_tag;

1925

}

1926

context->slashedtag = NULL;

1927

} else if (context->slashedtag &&

1928

(c == '/' ||

1929

(c == '>' && context->state == S_tagname_slash)) &&

1930

TOASCII(unsign_c) < 127) {

1931

1932

** We got either the second slash of a pending "<NAME/blah blah/"

1933

** shortref construct, or the '>' of a mere "<NAME/>". In both

1934

** cases generate a "</NAME>" end tag in the recover buffer for

1935

** reparsing unless NAME is really an empty element. - kw

1936

1937

#ifdef USE_PRETTYSRC

1938

if (psrc_view) {

1939

PSRCSTART(abracket);

1940

PUTC(c);

1941

PSRCSTOP(abracket);

1942

} else

1943

#endif

1944

if (context->slashedtag != context->unknown_tag &&

1945

!ReallyEmptyTag(context->slashedtag)) {

1946

if (context->recover == NULL) {

1947

StrAllocCopy(context->recover, "</");

1948

context->recover_index = 0;

1949

} else {

1950

StrAllocCat(context->recover, "</");

1951

}

1952

StrAllocCat(context->recover, context->slashedtag->name);

1953

StrAllocCat(context->recover, ">");

1954

}

1955

context->slashedtag = NULL;

1956

1957

} else if (context->element_stack &&

1958

(context->element_stack->tag->flags & Tgf_frecyc)) {

1959

1960

* The element stack says we are within the contents of an

1961

* element that the next stage (HTML.c) may want to feed

1962

* us back again (via the *include string). So try to output

1963

* text in UTF-8 if possible, using the same logic as for

1964

* attribute values (which should be in line with what

1965

* context->current_tag_charset indicates). - kw

1966

1967

if (context->T.decode_utf8 &&

1968

*context->utf_buf) {

1969

PUTS(context->utf_buf);

1970

context->utf_buf_p = context->utf_buf;

1971

*(context->utf_buf_p) = '\0';

1972

} else if (HTCJK == NOCJK &&

1973

(context->T.output_utf8 ||

1974

context->T.trans_from_uni)) {

1975

if (LYIsASCII(clong)) {

1976

PUTC(c);

1977

} else if (clong == 0xfffd && saved_char_in &&

1978

HTPassEightBitRaw &&

1979

UCH(saved_char_in) >=

1980

LYlowest_eightbit[context->outUCLYhndl]) {

1981

PUTUTF8((0xf000 | UCH(saved_char_in)));

1982

} else {

1983

PUTUTF8(clong);

1984

}

1985

} else if (saved_char_in && context->T.use_raw_char_in) {

1986

PUTC(saved_char_in);

1987

} else {

1988

PUTC(c);

1989

}

1990

1991

#define PASS8859SPECL context->T.pass_160_173_raw

1992

1993

** Convert 160 (nbsp) to Lynx special character if

1994

** neither HTPassHighCtrlRaw nor HTCJK is set. - FM

1995

1996

} else if (unsign_c == CH_NBSP && /* S/390 -- gil -- 0932 */

1997

!context->no_lynx_specialcodes &&

1998

!(PASS8859SPECL || HTCJK != NOCJK)) {

1999

PUTC(HT_NON_BREAK_SPACE);

2000

2001

** Convert 173 (shy) to Lynx special character if

2002

** neither HTPassHighCtrlRaw nor HTCJK is set. - FM

2003

2004

} else if (unsign_c == CH_SHY && /* S/390 -- gil -- 0949 */

2005

!context->no_lynx_specialcodes &&

2006

!(PASS8859SPECL || HTCJK != NOCJK)) {

2007

PUTC(LY_SOFT_HYPHEN);

2008

2009

** Handle the case in which we think we have a character

2010

** which doesn't need further processing (e.g., a koi8-r

2011

** input for a koi8-r output). - FM

2012

2013

} else if (context->T.use_raw_char_in && saved_char_in) {

2014

2015

** Only if the original character is still in saved_char_in,

2016

** otherwise we may be iterating from a goto top. - KW

2017

2018

PUTC(saved_char_in);

2019

saved_char_in = '\0';

2020

/******************************************************************

2021

* I. LATIN-1 OR UCS2 TO DISPLAY CHARSET

2022

******************************************************************/

2023

} else if ((chk = (BOOL) (context->T.trans_from_uni && TOASCII(unsign_c) >= 160)) && /* S/390 -- gil -- 0968 */

2024

(uck = UCTransUniChar(unsign_c,

2025

context->outUCLYhndl)) >= ' ' &&

2026

uck < 256) {

2027

CTRACE((tfp, "UCTransUniChar returned 0x%.2lX:'%c'.\n",

2028

uck, FROMASCII((char)uck)));

2029

2030

** We got one octet from the conversions, so use it. - FM

2031

2032

PUTC(FROMASCII((char)uck));

2033

} else if ((chk &&

2034

(uck == -4 ||

2035

(context->T.repl_translated_C0 &&

2036

uck > 0 && uck < 32))) &&

2037

2038

** Not found; look for replacement string. - KW

2039

2040

(uck = UCTransUniCharStr(replace_buf, 60, clong,

2041

context->outUCLYhndl,

2042

0) >= 0)) {

2043

2044

** Got a replacement string.

2045

** No further tests for validity - assume that whoever

2046

** defined replacement strings knew what she was doing. - KW

2047

2048

PUTS(replace_buf);

2049

2050

** If we're displaying UTF-8, try that now. - FM

2051

2052

} else if (context->T.output_utf8 && PUTUTF8(clong)) {

2053

; /* do nothing more */

2054

2055

** If it's any other (> 160) 8-bit character, and

2056

** we have not set HTPassEightBitRaw nor HTCJK, nor

2057

** have the "ISO Latin 1" character set selected,

2058

** back translate for our character set. - FM

2059

2060

#define IncludesLatin1Enc \

2061

(context->outUCLYhndl == LATIN1 || \

2062

(context->outUCI && \

2063

(context->outUCI->enc & (UCT_CP_SUPERSETOF_LAT1))))

2064

2065

#define PASSHI8BIT (HTPassEightBitRaw || \

2066

(context->T.do_8bitraw && !context->T.trans_from_uni))

2067

2068

} else if (unsign_c > 160 && unsign_c < 256 &&

2069

!(PASSHI8BIT || HTCJK != NOCJK) &&

2070

!IncludesLatin1Enc) {

2071

#ifdef USE_PRETTYSRC

2072

int psrc_view_backup = 0;

2073

#endif

2074

2075

string->size = 0;

2076

EntityName = HTMLGetEntityName((int)(unsign_c - 160));

2077

HTChunkPuts(string, EntityName);

2078

HTChunkTerminate(string);

2079

#ifdef USE_PRETTYSRC

2080

/* we need to disable it temporary*/

2081

if (psrc_view) {

2082

psrc_view_backup =1; psrc_view =0;

2083

}

2084

#endif

2085

handle_entity(context, '\0');

2086

#ifdef USE_PRETTYSRC

2087

/* we need to disable it temporary*/

2088

if (psrc_view_backup)

2089

psrc_view = TRUE;

2090

#endif

2091

2092

string->size = 0;

2093

if (!FoundEntity)

2094

PUTC(';');

2095

2096

** If we get to here and have an ASCII char,

2097

** pass the character. - KW

2098

2099

} else if (TOASCII(unsign_c) < 127 && unsign_c > 0) { /* S/390 -- gil -- 0987 */

2100

PUTC(c);

2101

2102

** If we get to here, and should have translated,

2103

** translation has failed so far. - KW

2104

2105

** We should have sent UTF-8 output to the parser

2106

** already, but what the heck, try again. - FM

2107

2108

} else if (context->T.output_utf8 && *context->utf_buf) {

2109

PUTS(context->utf_buf);

2110

context->utf_buf_p = context->utf_buf;

2111

*(context->utf_buf_p) = '\0';

2112

#ifdef NOTDEFINED

2113

2114

** Check for a strippable koi8-r 8-bit character. - FM

2115

2116

} else if (context->T.strip_raw_char_in && saved_char_in &&

2117

(UCH(saved_char_in) >= 0xc0) &&

2118

(UCH(saved_char_in) < 255)) {

2119

2120

** KOI8 special: strip high bit, gives (somewhat) readable

2121

** ASCII or KOI7 - it was constructed that way! - KW

2122

2123

PUTC(((char)(saved_char_in & 0x7f)));

2124

saved_char_in = '\0';

2125

#endif /* NOTDEFINED */

2126

2127

** If we don't actually want the character,

2128

** make it safe and output that now. - FM

2129

2130

} else if (TOASCII(UCH(c)) < /* S/390 -- gil -- 0997 */

2131

LYlowest_eightbit[context->outUCLYhndl] ||

2132

(context->T.trans_from_uni && !HTPassEightBitRaw)) {

2133

2134

** If we get to here, pass the character. - FM

2135

2136

} else {

2137

PUTC(c);

2138

}

2139

break;

2140

2141

2142

** Found '<' in SGML_PCDATA content; treat this mode nearly like

2143

** S_litteral, but recognize '<!' and '<?' to filter out comments

2144

** and processing instructions. - kw

2145

2146

case S_pcdata:

2147

if (!string->size && TOASCII(unsign_c) < 127) { /* first after '<' */

2148

if (c == '!') { /* <! */

2149

2150

** Terminate and set up for possible comment,

2151

** identifier, declaration, or marked section

2152

** as under S_tag. - kw

2153

2154

context->state = S_exclamation;

2155

context->lead_exclamation = TRUE;

2156

context->doctype_bracket = FALSE;

2157

context->first_bracket = FALSE;

2158

HTChunkPutc(string, c);

2159

break;

2160

} else if (c == '?') { /* <? - ignore as a PI until '>' - kw */

2161

CTRACE((tfp,

2162

"SGML: Found PI in PCDATA, junking it until '>'\n"));

2163

#ifdef USE_PRETTYSRC

2164

if (psrc_view) {

2165

PSRCSTART(abracket);

2166

PUTS("<?");

2167

PSRCSTOP(abracket);

2168

context->seen_nonwhite_in_junk_tag = TRUE; /* show all */

2169

}

2170

#endif

2171

context->state = S_junk_pi;

2172

break;

2173

}

2174

}

2175

goto case_S_litteral;

2176

2177

2178

** Found '<' in SGML_SCRIPT content; treat this mode nearly like

2179

** S_litteral, but recognize '<!' to allow the content to be treated

2180

** as a comment by lynx.

2181

2182

case S_script:

2183

if (!string->size && TOASCII(unsign_c) < 127) { /* first after '<' */

2184

if (c == '!') { /* <! */

2185

2186

** Terminate and set up for possible comment,

2187

** identifier, declaration, or marked section

2188

** as under S_tag. - kw

2189

2190

context->state = S_exclamation;

2191

context->lead_exclamation = TRUE;

2192

context->doctype_bracket = FALSE;

2193

context->first_bracket = FALSE;

2194

HTChunkPutc(string, c);

2195

break;

2196

}

2197

}

2198

goto case_S_litteral;

2199

2200

2201

** In litteral mode, waits only for specific end tag (for

2202

** compatibility with old servers, and for Lynx). - FM

2203

2204

case_S_litteral:

2205

case S_litteral: /*PSRC:this case not understood completely by HV, not done*/

2206

HTChunkPutc(string, c);

2207

#ifdef USE_PRETTYSRC

2208

if (psrc_view) { /*there is nothing useful in the element_stack*/

2209

testtag = context->current_tag;

2210

} else

2211

#endif

2212

testtag = context->element_stack ?

2213

context->element_stack->tag : NULL;

2214

2215

if (testtag == NULL) {

2216

string->size--;

2217

context->state = S_text;

2218

goto top1;

2219

}

2220

2221

2222

* Normally when we get the closing ">",

2223

* testtag contains something like "TITLE"

2224

* string contains something like "/title>"

2225

* so we decrement by 2 to compare the final character of each.

2226

2227

testlast = string->size - 2 - context->trailing_spaces - context->leading_spaces;

2228

2229

if (TOUPPER(c) != ((testlast < 0)

2230

? '/'

2231

: testtag->name[testlast])) {

2232

int i;

2233

2234

2235

** If complete match, end litteral.

2236

2237

if ((c == '>') &&

2238

testlast >= 0 && !testtag->name[testlast]) {

2239

#ifdef USE_PRETTYSRC

2240

if (psrc_view) {

2241

PSRCSTART(abracket);

2242

PUTS("</");

2243

PSRCSTOP(abracket);

2244

PSRCSTART(tag);

2245

strcpy(string->data,context->current_tag->name);

2246

if (tagname_transform != 1) {

2247

if (tagname_transform == 0)

2248

LYLowerCase(string->data);

2249

else

2250

LYUpperCase(string->data);

2251

}

2252

PUTS(string->data);

2253

PSRCSTOP(tag);

2254

PSRCSTART(abracket);

2255

PUTC('>');

2256

PSRCSTOP(abracket);

2257

2258

context->current_tag = NULL;

2259

} else

2260

#endif

2261

end_element(context, context->element_stack->tag);

2262

2263

string->size = 0;

2264

context->current_attribute_number = INVALID;

2265

context->state = S_text;

2266

context->leading_spaces = 0;

2267

context->trailing_spaces = 0;

2268

break;

2269

}

2270

2271

2272

* Allow whitespace between the "<" or ">" and the keyword, for

2273

* error-recovery.

2274

2275

if (isspace(UCH(c))) {

2276

if (testlast == -1) {

2277

context->leading_spaces += 1;

2278

CTRACE2(TRACE_SGML, (tfp, "leading spaces: %d\n", context->leading_spaces));

2279

break;

2280

} else if (testlast > 0) {

2281

context->trailing_spaces += 1;

2282

CTRACE2(TRACE_SGML, (tfp, "trailing spaces: %d\n", context->trailing_spaces));

2283

break;

2284

}

2285

}

2286

2287

2288

* Mismatch - recover.

2289

2290

context->leading_spaces = 0;

2291

context->trailing_spaces = 0;

2292

if (((testtag->contents != SGML_LITTERAL &&

2293

(testtag->flags & Tgf_strict)) ||

2294

(context->state == S_pcdata &&

2295

(testtag->flags & (Tgf_strict|Tgf_endO)))) &&

2296

(testlast > -1 &&

2297

(c == '>' || testlast > 0 || IsNmStart(c)))) {

2298

context->state = S_end;

2299

string->size--;

2300

for (i = 0; i < string->size; i++) /* remove '/' */

2301

string->data[i] = string->data[i+1];

2302

if ((string->size == 1) ? IsNmStart(c) : IsNmChar(c))

2303

break;

2304

string->size--;

2305

goto top1;

2306

}

2307

if (context->state == S_pcdata &&

2308

(testtag->flags & (Tgf_strict|Tgf_endO)) &&

2309

(testlast < 0 && IsNmStart(c))) {

2310

context->state = S_tag;

2311

break;

2312

}

2313

2314

** If Mismatch: recover string literally.

2315

2316

PUTC('<');

2317

for (i = 0; i < string->size-1; i++) /* recover, except last c */

2318

PUTC(string->data[i]);

2319

string->size = 0;

2320

context->state = S_text;

2321

goto top1; /* to recover last c */

2322

}

2323

break;

2324

2325

2326

** Character reference (numeric entity) or named entity.

2327

2328

case S_ero:

2329

if (c == '#') {

2330

2331

** Setting up for possible numeric entity.

2332

2333

context->state = S_cro; /* &# is Char Ref Open */

2334

break;

2335

}

2336

context->state = S_entity; /* Fall through! */

2337

2338

2339

** Handle possible named entity.

2340

2341

case S_entity:

2342

if (TOASCII(unsign_c) < 127 && (string->size ? /* S/390 -- gil -- 1029 */

2343

isalnum(UCH(c)) : isalpha(UCH(c)))) {

2344

/* Should probably use IsNmStart/IsNmChar above (is that right?),

2345

but the world is not ready for that - there's &nbsp: (note

2346

colon!) and stuff around. */

2347

2348

** Accept valid ASCII character. - FM

2349

2350

HTChunkPutc(string, c);

2351

} else if (string->size == 0) {

2352

2353

** It was an ampersand that's just text, so output

2354

** the ampersand and recycle this character. - FM

2355

2356

#ifdef USE_PRETTYSRC

2357

if (psrc_view)

2358

PSRCSTART(badseq);

2359

#endif

2360

PUTC('&');

2361

#ifdef USE_PRETTYSRC

2362

if (psrc_view)

2363

PSRCSTOP(badseq);

2364

#endif

2365

context->state = S_text;

2366

goto top1;

2367

} else {

2368

2369

** Terminate entity name and try to handle it. - FM

2370

2371

HTChunkTerminate(string);

2372

#ifdef USE_PRETTYSRC

2373

entity_string = string->data;

2374

#endif

2375

/* S/390 -- gil -- 1039 */

2376

/* CTRACE((tfp, "%s: %d: %s\n", __FILE__, __LINE__, string->data)); */

2377

if (!strcmp(string->data, "zwnj") &&

2378

(!context->element_stack ||

2379

(context->element_stack->tag &&

2380

context->element_stack->tag->contents == SGML_MIXED))) {

2381

2382

** Handle zwnj (8204) as . - FM

2383

2384

char temp[8];

2385

2386

CTRACE((tfp, "SGML_character: Handling 'zwnj' entity as 'WBR' element.\n"));

2387

2388

if (c != ';') {

2389

sprintf(temp, "%c", c);

2390

} else {

2391

sprintf(temp, "");

2392

}

2393

if (context->recover == NULL) {

2394

StrAllocCopy(context->recover, temp);

2395

context->recover_index = 0;

2396

} else {

2397

StrAllocCat(context->recover, temp);

2398

}

2399

string->size = 0;

2400

context->state = S_text;

2401

break;

2402

} else {

2403

handle_entity(context, '\0');

2404

}

2405

string->size = 0;

2406

context->state = S_text;

2407

2408

** Don't eat the terminator if we didn't find the

2409

** entity name and therefore sent the raw string

2410

** via handle_entity(), or if the terminator is

2411

** not the "standard" semi-colon for HTML. - FM

2412

2413

#ifdef USE_PRETTYSRC

2414

if (psrc_view && FoundEntity && c == ';') {

2415

PSRCSTART(entity);

2416

PUTC(c);

2417

PSRCSTOP(entity);

2418

}

2419

#endif

2420

if (!FoundEntity || c != ';')

2421

goto top1;

2422

}

2423

break;

2424

2425

2426

** Check for a numeric entity.

2427

2428

case S_cro:

2429

if (TOASCII(unsign_c) < 127 && TOLOWER(UCH(c)) == 'x') { /* S/390 -- gil -- 1060 */

2430

context->isHex = TRUE;

2431

context->state = S_incro;

2432

} else if (TOASCII(unsign_c) < 127 && isdigit(UCH(c))) {

2433

2434

** Accept only valid ASCII digits. - FM

2435

2436

HTChunkPutc(string, c); /* accumulate a character NUMBER */

2437

context->isHex = FALSE;

2438

context->state = S_incro;

2439

} else if (string->size == 0) {

2440

2441

** No 'x' or digit following the "&#" so recover

2442

** them and recycle the character. - FM

2443

2444

#ifdef USE_PRETTYSRC

2445

if (psrc_view)

2446

PSRCSTART(badseq);

2447

#endif

2448

PUTC('&');

2449

PUTC('#');

2450

#ifdef USE_PRETTYSRC

2451

if (psrc_view)

2452

PSRCSTOP(badseq);

2453

#endif

2454

context->state = S_text;

2455

goto top1;

2456

}

2457

break;

2458

2459

2460

** Handle a numeric entity.

2461

2462

case S_incro:

2463

/* S/390 -- gil -- 1075 */ /* CTRACE((tfp, "%s: %d: numeric %d %d\n",

2464

__FILE__, __LINE__, unsign_c, c)); */

2465

if ((TOASCII(unsign_c) < 127) &&

2466

(context->isHex ? isxdigit(UCH(c)) :

2467

isdigit(UCH(c)))) {

2468

2469

** Accept only valid hex or ASCII digits. - FM

2470

2471

HTChunkPutc(string, c); /* accumulate a character NUMBER */

2472

} else if (string->size == 0) {

2473

2474

** No hex digit following the "&#x" so recover

2475

** them and recycle the character. - FM

2476

2477

#ifdef USE_PRETTYSRC

2478

if (psrc_view)

2479

PSRCSTART(badseq);

2480

#endif

2481

PUTS("&#x");

2482

#ifdef USE_PRETTYSRC

2483

if (psrc_view)

2484

PSRCSTOP(badseq);

2485

#endif

2486

context->isHex = FALSE;

2487

context->state = S_text;

2488

goto top1;

2489

} else {

2490

2491

** Terminate the numeric entity and try to handle it. - FM

2492

2493

UCode_t code;

2494

int i;

2495

HTChunkTerminate(string);

2496

#ifdef USE_PRETTYSRC

2497

entity_string = string->data;

2498

#endif

2499

if ((context->isHex ? sscanf(string->data, "%lx", &code) :

2500

sscanf(string->data, "%ld", &code)) == 1) {

2501

/* =============== work in ASCII below here =============== S/390 -- gil -- 1092 */

2502

if ((code == 1) ||

2503

(code > 127 && code < 156)) {

2504

2505

** Assume these are Microsoft code points,

2506

** inflicted on us by FrontPage. - FM

2507

2508

** MS FrontPage uses syntax like  in 128-159 range

2509

** and doesn't follow Unicode standards for this area.

2510

** Windows-1252 codepoints are assumed here.

2511

2512

switch (code) {

2513

case 1:

2514

2515

** WHITE SMILING FACE

2516

2517

code = 0x263a;

2518

break;

2519

case 128:

2520

2521

** EURO currency sign

2522

2523

code = 0x20ac;

2524

break;

2525

case 130:

2526

2527

** SINGLE LOW-9 QUOTATION MARK (sbquo)

2528

2529

code = 0x201a;

2530

break;

2531

case 132:

2532

2533

** DOUBLE LOW-9 QUOTATION MARK (bdquo)

2534

2535

code = 0x201e;

2536

break;

2537

case 133:

2538

2539

** HORIZONTAL ELLIPSIS (hellip)

2540

2541

code = 0x2026;

2542

break;

2543

case 134:

2544

2545

** DAGGER (dagger)

2546

2547

code = 0x2020;

2548

break;

2549

case 135:

2550

2551

** DOUBLE DAGGER (Dagger)

2552

2553

code = 0x2021;

2554

break;

2555

case 137:

2556

2557

** PER MILLE SIGN (permil)

2558

2559

code = 0x2030;

2560

break;

2561

case 139:

2562

2563

** SINGLE LEFT-POINTING ANGLE QUOTATION MARK

2564

** (lsaquo)

2565

2566

code = 0x2039;

2567

break;

2568

case 145:

2569

2570

** LEFT SINGLE QUOTATION MARK (lsquo)

2571

2572

code = 0x2018;

2573

break;

2574

case 146:

2575

2576

** RIGHT SINGLE QUOTATION MARK (rsquo)

2577

2578

code = 0x2019;

2579

break;

2580

case 147:

2581

2582

** LEFT DOUBLE QUOTATION MARK (ldquo)

2583

2584

code = 0x201c;

2585

break;

2586

case 148:

2587

2588

** RIGHT DOUBLE QUOTATION MARK (rdquo)

2589

2590

code = 0x201d;

2591

break;

2592

case 149:

2593

2594

** BULLET (bull)

2595

2596

code = 0x2022;

2597

break;

2598

case 150:

2599

2600

** EN DASH (ndash)

2601

2602

code = 0x2013;

2603

break;

2604

case 151:

2605

2606

** EM DASH (mdash)

2607

2608

code = 0x2014;

2609

break;

2610

case 152:

2611

2612

** SMALL TILDE (tilde)

2613

2614

code = 0x02dc;

2615

break;

2616

case 153:

2617

2618

** TRADE MARK SIGN (trade)

2619

2620

code = 0x2122;

2621

break;

2622

case 155:

2623

2624

** SINGLE RIGHT-POINTING ANGLE QUOTATION MARK

2625

** (rsaquo)

2626

2627

code = 0x203a;

2628

break;

2629

default:

2630

2631

** Do not attempt a conversion

2632

** to valid Unicode values.

2633

2634

break;

2635

}

2636

}

2637

2638

** Check for special values. - FM

2639

2640

if ((code == 8204) &&

2641

(!context->element_stack ||

2642

(context->element_stack->tag &&

2643

context->element_stack->tag->contents == SGML_MIXED))) {

2644

2645

** Handle zwnj (8204) as . - FM

2646

2647

char temp[8];

2648

2649

CTRACE((tfp, "SGML_character: Handling '8204' (zwnj) reference as 'WBR' element.\n"));

2650

2651

2652

** Include the terminator if it is not

2653

** the standard semi-colon. - FM

2654

2655

if (c != ';') {

2656

sprintf(temp, "%c", c);

2657

} else {

2658

sprintf(temp, "");

2659

}

2660

2661

** Add the replacement string to the

2662

** recover buffer for processing. - FM

2663

2664

if (context->recover == NULL) {

2665

StrAllocCopy(context->recover, temp);

2666

context->recover_index = 0;

2667

} else {

2668

StrAllocCat(context->recover, temp);

2669

}

2670

string->size = 0;

2671

context->isHex = FALSE;

2672

context->state = S_text;

2673

break;

2674

} else if (put_special_unicodes(context, code)) {

2675

2676

** We handled the value as a special character,

2677

** so recycle the terminator or break. - FM

2678

2679

#ifdef USE_PRETTYSRC

2680

if (psrc_view) {

2681

PSRCSTART(entity);

2682

PUTS( (context->isHex ? "&#x" : "&#") );

2683

PUTS(entity_string);

2684

if (c == ';')

2685

PUTC(';');

2686

PSRCSTOP(entity);

2687

}

2688

#endif

2689

string->size = 0;

2690

context->isHex = FALSE;

2691

context->state = S_text;

2692

if (c != ';')

2693

goto top1;

2694

break;

2695

}

2696

2697

** Seek a translation from the chartrans tables.

2698

2699

if ((uck = UCTransUniChar(code,

2700

context->outUCLYhndl)) >= 32 &&

2701

uck < 256 &&

2702

(uck < 127 ||

2703

uck >= LYlowest_eightbit[context->outUCLYhndl])) {

2704

#ifdef USE_PRETTYSRC

2705

if (!psrc_view) {

2706

#endif

2707

PUTC(FROMASCII((char)uck));

2708

#ifdef USE_PRETTYSRC

2709

} else {

2710

put_pretty_number(context);

2711

}

2712

#endif

2713

} else if ((uck == -4 ||

2714

(context->T.repl_translated_C0 &&

2715

uck > 0 && uck < 32)) &&

2716

2717

** Not found; look for replacement string.

2718

2719

(uck = UCTransUniCharStr(replace_buf, 60, code,

2720

context->outUCLYhndl,

2721

0) >= 0)) {

2722

#ifdef USE_PRETTYSRC

2723

if (psrc_view) {

2724

put_pretty_number(context);

2725

} else

2726

#endif

2727

PUTS(replace_buf);

2728

2729

** If we're displaying UTF-8, try that now. - FM

2730

2731

} else if (context->T.output_utf8 && PUTUTF8(code)) {

2732

; /* do nothing more */

2733

2734

** Ignore 8205 (zwj),

2735

** 8206 (lrm), and 8207 (rln), if we get to here. - FM

2736

2737

} else if (code == 8205 ||

2738

code == 8206 ||

2739

code == 8207) {

2740

if (TRACE) {

2741

string->size--;

2742

LYstrncpy(replace_buf,

2743

string->data,

2744

(string->size < 64 ? string->size : 63));

2745

fprintf(tfp,

2746

"SGML_character: Ignoring '%s%s'.\n",

2747

(context->isHex ? "&#x" : "&#"),

2748

replace_buf);

2749

}

2750

#ifdef USE_PRETTYSRC

2751

if (psrc_view) {

2752

PSRCSTART(badseq);

2753

PUTS( (context->isHex ? "&#x" : "&#") );

2754

PUTS(entity_string);

2755

if (c == ';')

2756

PUTC(';');

2757

PSRCSTOP(badseq);

2758

}

2759

#endif

2760

string->size = 0;

2761

context->isHex = FALSE;

2762

context->state = S_text;

2763

if (c != ';')

2764

goto top1;

2765

break;

2766

2767

** Show the numeric entity if we get to here

2768

** and the value:

2769

** (1) Is greater than 255 (but use ASCII characters

2770

** for spaces or dashes).

2771

** (2) Is less than 32, and not valid or we don't

2772

** have HTCJK set.

2773

** (3) Is 127 and we don't have HTPassHighCtrlRaw or

2774

** HTCJK set.

2775

** (4) Is 128 - 159 and we don't have HTPassHighCtrlNum

2776

** set.

2777

** - FM

2778

2779

} else if ((code > 255) ||

2780

(code < ' ' && /* S/390 -- gil -- 1140 */

2781

code != '\t' && code != '\n' && code != '\r' &&

2782

HTCJK == NOCJK) ||

2783

(TOASCII(code) == 127 &&

2784

!(HTPassHighCtrlRaw || HTCJK != NOCJK)) ||

2785

(TOASCII(code) > 127 && code < 160 &&

2786

!HTPassHighCtrlNum)) {

2787

2788

** Unhandled or illegal value. Recover the

2789

** "&#" or "&#x" and digit(s), and recycle

2790

** the terminator. - FM

2791

2792

#ifdef USE_PRETTYSRC

2793

if (psrc_view) {

2794

PSRCSTART(badseq);

2795

}

2796

#endif

2797

if (context->isHex) {

2798

PUTS("&#x");

2799

context->isHex = FALSE;

2800

} else {

2801

PUTS("&#");

2802

}

2803

string->size--;

2804

for (i = 0; i < string->size; i++) /* recover */

2805

PUTC(string->data[i]);

2806

#ifdef USE_PRETTYSRC

2807

if (psrc_view) {

2808

PSRCSTOP(badseq);

2809

}

2810

#endif

2811

string->size = 0;

2812

context->isHex = FALSE;

2813

context->state = S_text;

2814

goto top1;

2815

} else if (TOASCII(code) < 161 || /* S/390 -- gil -- 1162 */

2816

HTPassEightBitNum ||

2817

IncludesLatin1Enc) {

2818

2819

** No conversion needed. - FM

2820

2821

#ifdef USE_PRETTYSRC

2822

if (psrc_view) {

2823

put_pretty_number(context);

2824

} else

2825

#endif

2826

PUTC(FROMASCII((char)code));

2827

} else {

2828

2829

** Handle as named entity. - FM

2830

2831

code -= 160;

2832

EntityName = HTMLGetEntityName(code);

2833

if (EntityName && EntityName[0] != '\0') {

2834

string->size = 0;

2835

HTChunkPuts(string, EntityName);

2836

HTChunkTerminate(string);

2837

handle_entity(context, '\0');

2838

2839

** Add a semi-colon if something went wrong

2840

** and handle_entity() sent the string. - FM

2841

2842

if (!FoundEntity) {

2843

PUTC(';');

2844

}

2845

} else {

2846

2847

** Our conversion failed, so recover the "&#"

2848

** and digit(s), and recycle the terminator. - FM

2849

2850

#ifdef USE_PRETTYSRC

2851

if (psrc_view)

2852

PSRCSTART(badseq);

2853

#endif

2854

if (context->isHex) {

2855

PUTS("&#x");

2856

context->isHex = FALSE;

2857

} else {

2858

PUTS("&#");

2859

}

2860

string->size--;

2861

for (i = 0; i < string->size; i++) /* recover */

2862

PUTC(string->data[i]);

2863

#ifdef USE_PRETTYSRC

2864

if (psrc_view)

2865

PSRCSTOP(badseq);

2866

#endif

2867

string->size = 0;

2868

context->isHex = FALSE;

2869

context->state = S_text;

2870

goto top1;

2871

}

2872

}

2873

2874

** If we get to here, we succeeded. Hoorah!!! - FM

2875

2876

string->size = 0;

2877

context->isHex = FALSE;

2878

context->state = S_text;

2879

2880

** Don't eat the terminator if it's not

2881

** the "standard" semi-colon for HTML. - FM

2882

2883

if (c != ';') {

2884

goto top1;

2885

}

2886

} else {

2887

2888

** Not an entity, and don't know why not, so add

2889

** the terminator to the string, output the "&#"

2890

** or "&#x", and process the string via the recover

2891

** element. - FM

2892

2893

string->size--;

2894

HTChunkPutc(string, c);

2895

HTChunkTerminate(string);

2896

#ifdef USE_PRETTYSRC

2897

if (psrc_view)

2898

PSRCSTART(badseq);

2899

#endif

2900

if (context->isHex) {

2901

PUTS("&#x");

2902

context->isHex = FALSE;

2903

} else {

2904

PUTS("&#");

2905

}

2906

#ifdef USE_PRETTYSRC

2907

if (psrc_view)

2908

PSRCSTOP(badseq);

2909

#endif

2910

if (context->recover == NULL) {

2911

StrAllocCopy(context->recover, string->data);

2912

context->recover_index = 0;

2913

} else {

2914

StrAllocCat(context->recover, string->data);

2915

}

2916

string->size = 0;

2917

context->isHex = FALSE;

2918

context->state = S_text;

2919

break;

2920

}

2921

}

2922

break;

2923

2924

2925

** Tag

2926

2927

case S_tag: /* new tag */

2928

if (TOASCII(unsign_c) < 127 && (string->size ? /* S/390 -- gil -- 1179 */

2929

IsNmChar(c) : IsNmStart(c))) {

2930

2931

** Add valid ASCII character. - FM

2932

2933

HTChunkPutc(string, c);

2934

} else if (c == '!' && !string->size) { /* <! */

2935

2936

** Terminate and set up for possible comment,

2937

** identifier, declaration, or marked section. - FM

2938

2939

context->state = S_exclamation;

2940

context->lead_exclamation = TRUE;

2941

context->doctype_bracket = FALSE;

2942

context->first_bracket = FALSE;

2943

HTChunkPutc(string, c);

2944

break;

2945

} else if (!string->size &&

2946

(TOASCII(unsign_c) <= 160 && /* S/390 -- gil -- 1196 */

2947

(c != '/' && c != '?' && c != '_' && c != ':'))) {

2948

2949

** '<' must be followed by an ASCII letter to be a valid

2950

** start tag. Here it isn't, nor do we have a '/' for an

2951

** end tag, nor one of some other characters with a

2952

** special meaning for SGML or which are likely to be legal

2953

** Name Start characters in XML or some other extension.

2954

** So recover the '<' and following character as data. - FM & KW

2955

2956

context->state = S_text;

2957

#ifdef USE_PRETTYSRC

2958

if (psrc_view)

2959

PSRCSTART(badseq);

2960

#endif

2961

PUTC('<');

2962

#ifdef USE_PRETTYSRC

2963

if (psrc_view)

2964

PSRCSTOP(badseq);

2965

#endif

2966

goto top1;

2967

} else { /* End of tag name */

2968

2969

** Try to handle tag. - FM

2970

2971

HTTag * t;

2972

if (c == '/') {

2973

if (string->size == 0) {

2974

context->state = S_end;

2975

break;

2976

}

2977

CTRACE((tfp,"SGML: `<%.*s/' found!\n", string->size, string->data));

2978

}

2979

HTChunkTerminate(string) ;

2980

2981

t = SGMLFindTag(dtd, string->data);

2982

if (t == context->unknown_tag &&

2983

((c == ':' &&

2984

string->size == 4 && 0 == strcasecomp(string->data, "URL")) ||

2985

(string->size > 4 && 0 == strncasecomp(string->data, "URL:", 4)))) {

2986

2987

** Treat <URL: as text rather than a junk tag,

2988

** so we display it and the URL (Lynxism 8-). - FM

2989

2990

#ifdef USE_PRETTYSRC

2991

if (psrc_view)

2992

PSRCSTART(badseq);

2993

#endif

2994

PUTC('<');

2995

PUTS(string->data); /* recover */

2996

PUTC(c);

2997

#ifdef USE_PRETTYSRC

2998

if (psrc_view)

2999

PSRCSTOP(badseq);

3000

#endif

3001

CTRACE((tfp, "SGML: Treating <%s%c as text\n",

3002

string->data, c));

3003

string->size = 0;

3004

context->state = S_text;

3005

break;

3006

}

3007

if (c == '/' && t) {

3008

3009

* Element name was ended by '/'. Remember the tag that

3010

* ended thusly, we'll interpret this as either an indication

3011

* of an empty element (if '>' follows directly) or do

3012

* some SGMLshortref-ish treatment. - kw

3013

3014

context->slashedtag = t;

3015

}

3016

if (!t) {

3017

if (c == '?' && string->size <= 1) {

3018

CTRACE((tfp, "SGML: Found PI, junking it until '>'\n"));

3019

#ifdef USE_PRETTYSRC

3020

if (psrc_view) {

3021

PSRCSTART(abracket);

3022

PUTS("<?");

3023

PSRCSTOP(abracket);

3024

context->seen_nonwhite_in_junk_tag = TRUE; /*show all*/

3025

}

3026

#endif

3027

context->state = S_junk_pi;

3028

break;

3029

}

3030

CTRACE((tfp, "SGML: *** Invalid element %s\n",

3031

string->data));

3032

3033

#ifdef USE_PRETTYSRC

3034

if (psrc_view) {

3035

PSRCSTART(abracket);

3036

PUTC('<');

3037

PSRCSTOP(abracket);

3038

PSRCSTART(badtag);

3039

if (tagname_transform != 1) {

3040

if (tagname_transform == 0)

3041

LYLowerCase(string->data);

3042

else

3043

LYUpperCase(string->data);

3044

}

3045

PUTS(string->data);

3046

if (c == '>' ) {

3047

PSRCSTOP(badtag);

3048

PSRCSTART(abracket);

3049

PUTC('>');

3050

PSRCSTOP(abracket);

3051

} else {

3052

PUTC(c);

3053

}

3054

}

3055

#endif

3056

context->state = (c == '>') ? S_text : S_junk_tag;

3057

break;

3058

} else if (t == context->unknown_tag) {

3059

CTRACE((tfp, "SGML: *** Unknown element %s\n",

3060

string->data));

3061

3062

** Fall through and treat like valid

3063

** tag for attribute parsing. - KW

3064

3065

3066

}

3067

context->current_tag = t;

3068

3069

#ifdef USE_PRETTYSRC

3070

if (psrc_view) {

3071

PSRCSTART(abracket);

3072

PUTC('<');

3073

PSRCSTOP(abracket);

3074

if (t != context->unknown_tag)

3075

PSRCSTART(tag);

3076

else

3077

PSRCSTART(badtag);

3078

if (tagname_transform != 1) {

3079

if (tagname_transform == 0)

3080

LYLowerCase(string->data);

3081

else

3082

LYUpperCase(string->data);

3083

}

3084

PUTS(string->data);

3085

if (t != context->unknown_tag)

3086

PSRCSTOP(tag);

3087

else

3088

PSRCSTOP(badtag);

3089

}

3090

if (!psrc_view) /*don't waste time */

3091

#endif

3092

{

3093

3094

** Clear out attributes.

3095

3096

memset( (void*)context->present, 0 , sizeof(BOOL)*

3097

context->current_tag->number_of_attributes);

3098

}

3099

3100

string->size = 0;

3101

context->current_attribute_number = INVALID;

3102

#ifdef USE_PRETTYSRC

3103

if (psrc_view) {

3104

if (c == '>' || c == '<' || (c == '/' && context->slashedtag)) {

3105

if (c != '<') {

3106

PSRCSTART(abracket);

3107

PUTC(c);

3108

PSRCSTOP(abracket);

3109

context->state = (c == '>') ? S_text : S_tagname_slash;

3110

} else {

3111

context->state = S_tag;

3112

}

3113

} else {

3114

if (!WHITE(c))

3115

PUTC(c);

3116

context->state = S_tag_gap;

3117

}

3118

} else

3119

#endif

3120

if (c == '>' || c == '<' || (c == '/' && context->slashedtag)) {

3121

if (context->current_tag->name)

3122

start_element(context);

3123

context->state = (c == '>') ? S_text :

3124

(c == '<') ? S_tag : S_tagname_slash;

3125

} else {

3126

context->state = S_tag_gap;

3127

}

3128

}

3129

break;

3130

3131

case S_exclamation:

3132

if (context->lead_exclamation && c == '-') {

3133

3134

** Set up for possible comment. - FM

3135

3136

context->lead_exclamation = FALSE;

3137

context->first_dash = TRUE;

3138

HTChunkPutc(string, c);

3139

break;

3140

}

3141

if (context->lead_exclamation && c == '[') {

3142

3143

** Set up for possible marked section. - FM

3144

3145

context->lead_exclamation = FALSE;

3146

context->first_bracket = TRUE;

3147

context->second_bracket = FALSE;

3148

HTChunkPutc(string, c);

3149

context->state = S_marked;

3150

break;

3151

}

3152

if (context->first_dash && c == '-') {

3153

3154

** Set up to handle comment. - FM

3155

3156

context->lead_exclamation = FALSE;

3157

context->first_dash = FALSE;

3158

context->end_comment = FALSE;

3159

HTChunkPutc(string, c);

3160

context->state = S_comment;

3161

break;

3162

}

3163

context->lead_exclamation = FALSE;

3164

context->first_dash = FALSE;

3165

if (c == '>') {

3166

3167

** Try to handle identifier. - FM

3168

3169

HTChunkTerminate(string);

3170

#ifdef USE_PRETTYSRC

3171

if (psrc_view) {

3172

PSRCSTART(sgmlspecial);

3173

PUTC('<');

3174

PUTS(string->data);

3175

PUTC('>');

3176

PSRCSTOP(sgmlspecial);

3177

} else

3178

#endif

3179

handle_identifier(context);

3180

string->size = 0;

3181

context->state = S_text;

3182

break;

3183

}

3184

if (WHITE(c)) {

3185

if (string->size == 8 &&

3186

!strncasecomp(string->data, "!DOCTYPE", 8)) {

3187

3188

** Set up for DOCTYPE declaration. - FM

3189

3190

HTChunkPutc(string, c);

3191

context->doctype_bracket = FALSE;

3192

context->state = S_doctype;

3193

break;

3194

}

3195

if (string->size == 7 &&

3196

!strncasecomp(string->data, "!ENTITY", 7)) {

3197

3198

** Set up for ENTITY declaration. - FM

3199

3200

HTChunkPutc(string, c);

3201

context->first_dash = FALSE;

3202

context->end_comment = TRUE;

3203

context->state = S_sgmlent;

3204

break;

3205

}

3206

if (string->size == 8 &&

3207

!strncasecomp(string->data, "!ELEMENT", 8)) {

3208

3209

** Set up for ELEMENT declaration. - FM

3210

3211

HTChunkPutc(string, c);

3212

context->first_dash = FALSE;

3213

context->end_comment = TRUE;

3214

context->state = S_sgmlele;

3215

break;

3216

}

3217

if (string->size == 8 &&

3218

!strncasecomp(string->data, "!ATTLIST", 8)) {

3219

3220

** Set up for ATTLIST declaration. - FM

3221

3222

HTChunkPutc(string, c);

3223

context->first_dash = FALSE;

3224

context->end_comment = TRUE;

3225

context->state = S_sgmlatt;

3226

break;

3227

}

3228

}

3229

HTChunkPutc(string, c);

3230

break;

3231

3232

case S_comment: /* Expecting comment. - FM */

3233

if (historical_comments) {

3234

3235

** Any '>' terminates. - FM

3236

3237

if (c == '>') {

3238

HTChunkTerminate(string);

3239

#ifdef USE_PRETTYSRC

3240

if (psrc_view) {

3241

PSRCSTART(comm);

3242

PUTC('<');

3243

PUTS_TR(string->data);

3244

PUTC('>');

3245

PSRCSTOP(comm);

3246

} else

3247

#endif

3248

handle_comment(context);

3249

string->size = 0;

3250

context->end_comment = FALSE;

3251

context->first_dash = FALSE;

3252

context->state = S_text;

3253

break;

3254

}

3255

goto S_comment_put_c;

3256

}

3257

if (!context->first_dash && c == '-') {

3258

HTChunkPutc(string, c);

3259

context->first_dash = TRUE;

3260

break;

3261

}

3262

if (context->first_dash && c == '-') {

3263

HTChunkPutc(string, c);

3264

context->first_dash = FALSE;

3265

if (!context->end_comment)

3266

context->end_comment = TRUE;

3267

else if (!minimal_comments)

3268

3269

** Validly treat '--' pairs as successive comments

3270

** (for minimal, any "--WHITE>" terminates). - FM

3271

3272

context->end_comment = FALSE;

3273

break;

3274

}

3275

if (context->end_comment && c == '>') {

3276

3277

** Terminate and handle the comment. - FM

3278

3279

HTChunkTerminate(string);

3280

#ifdef USE_PRETTYSRC

3281

if (psrc_view) {

3282

PSRCSTART(comm);

3283

PUTC('<');

3284

PUTS_TR(string->data);

3285

PUTC('>');

3286

PSRCSTOP(comm);

3287

} else

3288

#endif

3289

handle_comment(context);

3290

string->size = 0;

3291

context->end_comment = FALSE;

3292

context->first_dash = FALSE;

3293

context->state = S_text;

3294

break;

3295

}

3296

context->first_dash = FALSE;

3297

if (context->end_comment && !isspace(UCH(c)))

3298

context->end_comment = FALSE;

3299

3300

S_comment_put_c:

3301

if (context->T.decode_utf8 &&

3302

*context->utf_buf) {

3303

HTChunkPuts(string, context->utf_buf);

3304

context->utf_buf_p = context->utf_buf;

3305

*(context->utf_buf_p) = '\0';

3306

} else if (HTCJK == NOCJK &&

3307

(context->T.output_utf8 ||

3308

context->T.trans_from_uni)) {

3309

if (clong == 0xfffd && saved_char_in &&

3310

HTPassEightBitRaw &&

3311

UCH(saved_char_in) >=

3312

LYlowest_eightbit[context->outUCLYhndl]) {

3313

HTChunkPutUtf8Char(string,

3314

(0xf000 | UCH(saved_char_in)));

3315

} else {

3316

HTChunkPutUtf8Char(string, clong);

3317

}

3318

} else if (saved_char_in && context->T.use_raw_char_in) {

3319

HTChunkPutc(string, saved_char_in);

3320

} else {

3321

HTChunkPutc(string, c);

3322

}

3323

break;

3324

3325

case S_doctype: /* Expecting DOCTYPE. - FM */

3326

if (context->doctype_bracket) {

3327

HTChunkPutc(string, c);

3328

if (c == ']')

3329

context->doctype_bracket = FALSE;

3330

break;

3331

}

3332

if (c == '[' && WHITE(string->data[string->size - 1])) {

3333

HTChunkPutc(string, c);

3334

context->doctype_bracket = TRUE;

3335

break;

3336

}

3337

if (c == '>') {

3338

HTChunkTerminate(string);

3339

#ifdef USE_PRETTYSRC

3340

if (psrc_view) {

3341

PSRCSTART(sgmlspecial);

3342

PUTC('<');

3343

PUTS(string->data);

3344

PUTC('>');

3345

PSRCSTOP(sgmlspecial);

3346

} else

3347

#endif

3348

handle_doctype(context);

3349

string->size = 0;

3350

context->state = S_text;

3351

break;

3352

}

3353

HTChunkPutc(string, c);

3354

break;

3355

3356

case S_marked: /* Expecting marked section. - FM */

3357

if (context->first_bracket && c == '[') {

3358

HTChunkPutc(string, c);

3359

context->first_bracket = FALSE;

3360

context->second_bracket = TRUE;

3361

break;

3362

}

3363

if (context->second_bracket && c == ']' &&

3364

string->data[string->size - 1] == ']') {

3365

HTChunkPutc(string, c);

3366

context->second_bracket = FALSE;

3367

break;

3368

}

3369

if (!context->second_bracket && c == '>') {

3370

HTChunkTerminate(string);

3371

#ifdef USE_PRETTYSRC

3372

if (psrc_view) {

3373

PSRCSTART(sgmlspecial);

3374

PUTC('<');

3375

PUTS(string->data);

3376

PUTC('>');

3377

PSRCSTOP(sgmlspecial);

3378

} else

3379

#endif

3380

handle_marked(context);

3381

string->size = 0;

3382

context->state = S_text;

3383

break;

3384

}

3385

HTChunkPutc(string, c);

3386

break;

3387

3388

case S_sgmlent: /* Expecting ENTITY. - FM */

3389

if (!context->first_dash && c == '-') {

3390

HTChunkPutc(string, c);

3391

context->first_dash = TRUE;

3392

break;

3393

}

3394

if (context->first_dash && c == '-') {

3395

HTChunkPutc(string, c);

3396

context->first_dash = FALSE;

3397

if (!context->end_comment)

3398

context->end_comment = TRUE;

3399

else

3400

context->end_comment = FALSE;

3401

break;

3402

}

3403

if (context->end_comment && c == '>') {

3404

HTChunkTerminate(string);

3405

#ifdef USE_PRETTYSRC

3406

if (psrc_view) {

3407

PSRCSTART(sgmlspecial);

3408

PUTC('<');

3409

PUTS(string->data);

3410

PUTC('>');

3411

PSRCSTOP(sgmlspecial);

3412

} else

3413

#endif

3414

handle_sgmlent(context);

3415

string->size = 0;

3416

context->end_comment = FALSE;

3417

context->first_dash = FALSE;

3418

context->state = S_text;

3419

break;

3420

}

3421

context->first_dash = FALSE;

3422

HTChunkPutc(string, c);

3423

break;

3424

3425

case S_sgmlele: /* Expecting ELEMENT. - FM */

3426

if (!context->first_dash && c == '-') {

3427

HTChunkPutc(string, c);

3428

context->first_dash = TRUE;

3429

break;

3430

}

3431

if (context->first_dash && c == '-') {

3432

HTChunkPutc(string, c);

3433

context->first_dash = FALSE;

3434

if (!context->end_comment)

3435

context->end_comment = TRUE;

3436

else

3437

context->end_comment = FALSE;

3438

break;

3439

}

3440

if (context->end_comment && c == '>') {

3441

HTChunkTerminate(string);

3442

#ifdef USE_PRETTYSRC

3443

if (psrc_view) {

3444

PSRCSTART(sgmlspecial);

3445

PUTC('<');

3446

PUTS(string->data);

3447

PUTC('>');

3448

PSRCSTOP(sgmlspecial);

3449

} else

3450

#endif

3451

handle_sgmlele(context);

3452

string->size = 0;

3453

context->end_comment = FALSE;

3454

context->first_dash = FALSE;

3455

context->state = S_text;

3456

break;

3457

}

3458

context->first_dash = FALSE;

3459

HTChunkPutc(string, c);

3460

break;

3461

3462

case S_sgmlatt: /* Expecting ATTLIST. - FM */

3463

if (!context->first_dash && c == '-') {

3464

HTChunkPutc(string, c);

3465

context->first_dash = TRUE;

3466

break;

3467

}

3468

if (context->first_dash && c == '-') {

3469

HTChunkPutc(string, c);

3470

context->first_dash = FALSE;

3471

if (!context->end_comment)

3472

context->end_comment = TRUE;

3473

else

3474

context->end_comment = FALSE;

3475

break;

3476

}

3477

if (context->end_comment && c == '>') {

3478

HTChunkTerminate(string);

3479

#ifdef USE_PRETTYSRC

3480

if (psrc_view) {

3481

PSRCSTART(sgmlspecial);

3482

PUTC('<');

3483

PUTS(string->data);

3484

PUTC('>');

3485

PSRCSTOP(sgmlspecial);

3486

} else

3487

#endif

3488

handle_sgmlatt(context);

3489

string->size = 0;

3490

context->end_comment = FALSE;

3491

context->first_dash = FALSE;

3492

context->state = S_text;

3493

break;

3494

}

3495

context->first_dash = FALSE;

3496

HTChunkPutc(string, c);

3497

break;

3498

3499

case S_tag_gap: /* Expecting attribute or '>' */

3500

if (WHITE(c))

3501

break; /* Gap between attributes */

3502

if (c == '>') { /* End of tag */

3503

#ifdef USE_PRETTYSRC

3504

if (!psrc_view)

3505

#endif

3506

if (context->current_tag->name)

3507

start_element(context);

3508

#ifdef USE_PRETTYSRC

3509

if (psrc_view) {

3510

PSRCSTART(abracket);

3511

PUTC('>');

3512

PSRCSTOP(abracket);

3513

}

3514

#endif

3515

context->state = S_text;

3516

break;

3517

}

3518

HTChunkPutc(string, c);

3519

context->state = S_attr; /* Get attribute */

3520

break;

3521

3522

/* accumulating value */

3523

case S_attr:

3524

if (WHITE(c) || (c == '>') || (c == '=')) { /* End of word */

3525

HTChunkTerminate(string);

3526

handle_attribute_name(context, string->data);

3527

#ifdef USE_PRETTYSRC

3528

if (!psrc_view) {

3529

#endif

3530

string->size = 0;

3531

if (c == '>') { /* End of tag */

3532

if (context->current_tag->name)

3533

start_element(context);

3534

context->state = S_text;

3535

break;

3536

}

3537

#ifdef USE_PRETTYSRC

3538

} else {

3539

PUTC(' ');

3540

if (context->current_attribute_number == INVALID)

3541

PSRCSTART(badattr);

3542

else

3543

PSRCSTART(attrib);

3544

if (attrname_transform != 1) {

3545

if (attrname_transform == 0)

3546

LYLowerCase(string->data);

3547

else

3548

LYUpperCase(string->data);

3549

}

3550

PUTS(string->data);

3551

if (c == '=' ) PUTC('=');

3552

if (c == '=' || c == '>') {

3553

if (context->current_attribute_number == INVALID)

3554

PSRCSTOP(badattr);

3555

else

3556

PSRCSTOP(attrib);

3557

}

3558

if (c == '>') {

3559

PSRCSTART(abracket);

3560

PUTC('>');

3561

PSRCSTOP(abracket);

3562

context->state = S_text;

3563

break;

3564

}

3565

string->size = 0;

3566

}

3567

#endif

3568

context->state = (c == '=' ? S_equals: S_attr_gap);

3569

} else {

3570

HTChunkPutc(string, c);

3571

}

3572

break;

3573

3574

case S_attr_gap: /* Expecting attribute or '=' or '>' */

3575

if (WHITE(c))

3576

break; /* Gap after attribute */

3577

if (c == '>') { /* End of tag */

3578

#ifdef USE_PRETTYSRC

3579

if (psrc_view) {

3580

if (context->current_attribute_number == INVALID) {

3581

PSRCSTOP(badattr);

3582

} else {

3583

PSRCSTOP(attrib);

3584

}

3585

PSRCSTART(abracket);

3586

PUTC('>');

3587

PSRCSTOP(abracket);

3588

} else

3589

#endif

3590

if (context->current_tag->name)

3591

start_element(context);

3592

context->state = S_text;

3593

break;

3594

} else if (c == '=') {

3595

#ifdef USE_PRETTYSRC

3596

if (psrc_view) {

3597

PUTC('=');

3598

if (context->current_attribute_number == INVALID) {

3599

PSRCSTOP(badattr);

3600

} else {

3601

PSRCSTOP(attrib);

3602

}

3603

}

3604

#endif

3605

context->state = S_equals;

3606

break;

3607

}

3608

HTChunkPutc(string, c);

3609

context->state = S_attr; /* Get next attribute */

3610

break;

3611

3612

case S_equals: /* After attr = */

3613

if (WHITE(c))

3614

break; /* Before attribute value */

3615

if (c == '>') { /* End of tag */

3616

CTRACE((tfp, "SGML: found = but no value\n"));

3617

#ifdef USE_PRETTYSRC

3618

if (psrc_view) {

3619

PSRCSTART(abracket);

3620

PUTC('>');

3621

PSRCSTOP(abracket);

3622

} else

3623

#endif

3624

if (context->current_tag->name)

3625

start_element(context);

3626

context->state = S_text;

3627

break;

3628

3629

} else if (c == '\'') {

3630

#ifdef USE_PRETTYSRC

3631

if (psrc_view) {

3632

PSRCSTART(attrval);

3633

PUTC(c);

3634

}

3635

#endif

3636

context->state = S_squoted;

3637

break;

3638

3639

} else if (c == '"') {

3640

#ifdef USE_PRETTYSRC

3641

if (psrc_view) {

3642

PSRCSTART(attrval);

3643

PUTC(c);

3644

}

3645

#endif

3646

context->state = S_dquoted;

3647

break;

3648

}

3649

#ifdef USE_PRETTYSRC

3650

if (psrc_view)

3651

PSRCSTART(attrval);

3652

#endif

3653

context->state = S_value;

3654

/* no break! fall through to S_value and process current `c` */

3655

3656

case S_value:

3657

if (WHITE(c) || (c == '>')) { /* End of word */

3658

HTChunkTerminate(string) ;

3659

#ifdef USE_PRETTYSRC

3660

if (psrc_view) {

3661

/*PSRCSTART(attrval);*/

3662

if (attr_is_name) {

3663

HTStartAnchor(context->target, string->data, NULL);

3664

(*context->actions->end_element)(

3665

context->target,

3666

HTML_A,

3667

(char **)&context->include);

3668

} else if (attr_is_href) {

3669

PSRCSTART(href);

3670

HTStartAnchor(context->target,NULL,string->data);

3671

}

3672

PUTS_TR(string->data);

3673

if (attr_is_href) {

3674

(*context->actions->end_element)(

3675

context->target,

3676

HTML_A,

3677

(char **)&context->include);

3678

PSRCSTOP(href);

3679

}

3680

PSRCSTOP(attrval);

3681

} else

3682

#endif

3683

#ifdef CJK_EX /* Quick hack. - JH7AYN */

3684

{ char jis_buf[512];

3685

if (string->data[0] == '$') {

3686

if (string->data[1] == 'B' || string->data[1] == '@') {

3687

jis_buf[0] = '\033';

3688

strcpy(jis_buf + 1, string->data);

3689

TO_EUC((CONST unsigned char *)jis_buf, (unsigned char *)string->data);

3690

}

3691

}

3692

}

3693

#endif

3694

handle_attribute_value(context, string->data);

3695

string->size = 0;

3696

if (c == '>') { /* End of tag */

3697

#ifdef USE_PRETTYSRC

3698

if (psrc_view) {

3699

PSRCSTART(abracket);

3700

PUTC('>');

3701

PSRCSTOP(abracket);

3702

} else

3703

#endif

3704

if (context->current_tag->name)

3705

start_element(context);

3706

context->state = S_text;

3707

break;

3708

}

3709

else context->state = S_tag_gap;

3710

} else if (context->T.decode_utf8 &&

3711

*context->utf_buf) {

3712

HTChunkPuts(string, context->utf_buf);

3713

context->utf_buf_p = context->utf_buf;

3714

*(context->utf_buf_p) = '\0';

3715

} else if (HTCJK == NOCJK &&

3716

(context->T.output_utf8 ||

3717

context->T.trans_from_uni)) {

3718

if (clong == 0xfffd && saved_char_in &&

3719

HTPassEightBitRaw &&

3720

UCH(saved_char_in) >=

3721

LYlowest_eightbit[context->outUCLYhndl]) {

3722

HTChunkPutUtf8Char(string,

3723

(0xf000 | UCH(saved_char_in)));

3724

} else {

3725

HTChunkPutUtf8Char(string, clong);

3726

}

3727

} else if (saved_char_in && context->T.use_raw_char_in) {

3728

HTChunkPutc(string, saved_char_in);

3729

} else {

3730

HTChunkPutc(string, c);

3731

}

3732

break;

3733

3734

case S_squoted: /* Quoted attribute value */

3735

if (c == '\'') { /* End of attribute value */

3736

HTChunkTerminate(string) ;

3737

#ifdef USE_PRETTYSRC

3738

if (psrc_view) {

3739

/*PSRCSTART(attrval);*/

3740

if (attr_is_name) {

3741

HTStartAnchor(context->target,string->data, NULL);

3742

(*context->actions->end_element)(

3743

context->target,

3744

HTML_A,

3745

(char **)&context->include);

3746

} else if (attr_is_href) {

3747

PSRCSTART(href);

3748

HTStartAnchor(context->target,NULL,string->data);

3749

}

3750

PUTS_TR(string->data);

3751

if (attr_is_href) {

3752

(*context->actions->end_element)(

3753

context->target,

3754

HTML_A,

3755

(char **)&context->include);

3756

PSRCSTOP(href);

3757

}

3758

PUTC('\'');

3759

PSRCSTOP(attrval);

3760

} else

3761

#endif

3762

handle_attribute_value(context, string->data);

3763

string->size = 0;

3764

context->state = S_tag_gap;

3765

} else if (TOASCII(c) == '\033') { /* S/390 -- gil -- 1213 */

3766

3767

** Setting up for possible single quotes in CJK escape

3768

** sequences. - Takuya ASADA (asada@three-a.co.jp)

3769

3770

context->state = S_esc_sq;

3771

HTChunkPutc(string, c);

3772

} else if (context->T.decode_utf8 &&

3773

*context->utf_buf) {

3774

HTChunkPuts(string, context->utf_buf);

3775

context->utf_buf_p = context->utf_buf;

3776

*(context->utf_buf_p) = '\0';

3777

} else if (HTCJK == NOCJK &&

3778

(context->T.output_utf8 ||

3779

context->T.trans_from_uni)) {

3780

if (clong == 0xfffd && saved_char_in &&

3781

HTPassEightBitRaw &&

3782

UCH(saved_char_in) >=

3783

LYlowest_eightbit[context->outUCLYhndl]) {

3784

HTChunkPutUtf8Char(string,

3785

(0xf000 | UCH(saved_char_in)));

3786

} else {

3787

HTChunkPutUtf8Char(string, clong);

3788

}

3789

} else if (saved_char_in && context->T.use_raw_char_in) {

3790

HTChunkPutc(string, saved_char_in);

3791

} else {

3792

HTChunkPutc(string, c);

3793

}

3794

break;

3795

3796

case S_dquoted: /* Quoted attribute value */

3797

if (c == '"' || /* Valid end of attribute value */

3798

(soft_dquotes && /* If emulating old Netscape bug, treat '>' */

3799

c == '>')) { /* as a co-terminator of dquoted and tag */

3800

HTChunkTerminate(string) ;

3801

#ifdef USE_PRETTYSRC

3802

if (psrc_view) {

3803

/*PSRCSTART(attrval);*/

3804

if (attr_is_name) {

3805

HTStartAnchor(context->target,string->data, NULL);

3806

(*context->actions->end_element)(

3807

context->target,

3808

HTML_A,

3809

(char **)&context->include);

3810

} else if (attr_is_href) {

3811

PSRCSTART(href);

3812

HTStartAnchor(context->target,NULL,string->data);

3813

}

3814

PUTS_TR(string->data);

3815

if (attr_is_href) {

3816

(*context->actions->end_element)(

3817

context->target,

3818

HTML_A,

3819

(char **)&context->include);

3820

PSRCSTOP(href);

3821

}

3822

PUTC(c);

3823

PSRCSTOP(attrval);

3824

} else

3825

#endif

3826

3827

handle_attribute_value(context, string->data);

3828

string->size = 0;

3829

context->state = S_tag_gap;

3830

if (c == '>') /* We emulated the Netscape bug, so we go */

3831

goto top1; /* back and treat it as the tag terminator */

3832

} else if (TOASCII(c) == '\033') { /* S/390 -- gil -- 1230 */

3833

3834

** Setting up for possible double quotes in CJK escape

3835

** sequences. - Takuya ASADA (asada@three-a.co.jp)

3836

3837

context->state = S_esc_dq;

3838

HTChunkPutc(string, c);

3839

} else if (context->T.decode_utf8 &&

3840

*context->utf_buf) {

3841

HTChunkPuts(string, context->utf_buf);

3842

context->utf_buf_p = context->utf_buf;

3843

*(context->utf_buf_p) = '\0';

3844

} else if (HTCJK == NOCJK &&

3845

(context->T.output_utf8 ||

3846

context->T.trans_from_uni)) {

3847

if (clong == 0xfffd && saved_char_in &&

3848

HTPassEightBitRaw &&

3849

UCH(saved_char_in) >=

3850

LYlowest_eightbit[context->outUCLYhndl]) {

3851

HTChunkPutUtf8Char(string,

3852

(0xf000 | UCH(saved_char_in)));

3853

} else {

3854

HTChunkPutUtf8Char(string, clong);

3855

}

3856

} else if (saved_char_in && context->T.use_raw_char_in) {

3857

HTChunkPutc(string, saved_char_in);

3858

} else {

3859

HTChunkPutc(string, c);

3860

}

3861

break;

3862

3863

case S_end: /* </ */

3864

if (TOASCII(unsign_c) < 127 && (string->size ? /* S/390 -- gil -- 1247 */

3865

IsNmChar(c) : IsNmStart(c))) {

3866

HTChunkPutc(string, c);

3867

} else { /* End of end tag name */

3868

HTTag * t = 0;

3869

#ifdef USE_PRETTYSRC

3870

BOOL psrc_tagname_processed = FALSE;

3871

#endif

3872

3873

HTChunkTerminate(string);

3874

if (!*string->data) { /* Empty end tag */

3875

if (context->element_stack)

3876

t = context->element_stack->tag;

3877

} else {

3878

t = SGMLFindTag(dtd, string->data);

3879

}

3880

if (!t || t == context->unknown_tag) {

3881

CTRACE((tfp, "Unknown end tag </%s>\n", string->data));

3882

#ifdef USE_PRETTYSRC

3883

if (psrc_view) {

3884

PSRCSTART(abracket);

3885

PUTS("</");

3886

PSRCSTOP(abracket);

3887

PSRCSTART(badtag);

3888

if (tagname_transform != 1) {

3889

if (tagname_transform == 0)

3890

LYLowerCase(string->data);

3891

else

3892

LYUpperCase(string->data);

3893

}

3894

PUTS(string->data);

3895

if (c != '>') {

3896

PUTC(c);

3897

} else {

3898

PSRCSTOP(badtag);

3899

PSRCSTART(abracket);

3900

PUTC('>');

3901

PSRCSTOP(abracket);

3902

}

3903

psrc_tagname_processed=TRUE;

3904

}

3905

} else if (psrc_view) {

3906

#endif

3907

} else {

3908

BOOL tag_OK = (BOOL) (c == '>' || WHITE(c));

3909

#if OPT

3910

HTMLElement e = TAGNUM_OF_TAGP(t);

3911

int branch = 2; /* it can be 0,1,2*/

3912

#endif

3913

context->current_tag = t;

3914

if (HAS_ALT_TAGNUM(TAGNUM_OF_TAGP(t)) &&

3915

context->element_stack &&

3916

ALT_TAGP(t) == context->element_stack->tag)

3917

context->element_stack->tag = NORMAL_TAGP(context->element_stack->tag);

3918

#if OPT

3919

if (tag_OK

3920

#ifdef EXTENDED_HTMLDTD

3921

&& Old_DTD

3922

#endif

3923

) {

3924

switch (e) {

3925

case HTML_DD: case HTML_DT: case HTML_LI: case HTML_LH :

3926

case HTML_TD: case HTML_TH: case HTML_TR: case HTML_THEAD:

3927

case HTML_TFOOT : case HTML_TBODY : case HTML_COLGROUP:

3928

branch = 0;

3929

break;

3930

3931

case HTML_A: case HTML_B: case HTML_BLINK: case HTML_CITE:

3932

case HTML_EM: case HTML_FONT: case HTML_FORM: case HTML_I:

3933

case HTML_P: case HTML_STRONG: case HTML_TT: case HTML_U:

3934

branch = 1;

3935

break;

3936

default:

3937

break;

3938

}

3939

}

3940

#endif

3941

3942

#ifdef EXTENDED_HTMLDTD

3943

3944

** Just handle ALL end tags normally :-) - kw

3945

3946

if (!Old_DTD) {

3947

end_element( context, context->current_tag);

3948

} else

3949

#endif /* EXTENDED_HTMLDTD */

3950

3951

if (tag_OK &&

3952

#if OPT

3953

(branch == 0)

3954

#else

3955

(!strcasecomp(string->data, "DD") ||

3956

!strcasecomp(string->data, "DT") ||

3957

!strcasecomp(string->data, "LI") ||

3958

!strcasecomp(string->data, "LH") ||

3959

!strcasecomp(string->data, "TD") ||

3960

!strcasecomp(string->data, "TH") ||

3961

!strcasecomp(string->data, "TR") ||

3962

!strcasecomp(string->data, "THEAD") ||

3963

!strcasecomp(string->data, "TFOOT") ||

3964

!strcasecomp(string->data, "TBODY") ||

3965

!strcasecomp(string->data, "COLGROUP"))

3966

#endif

3967

) {

3968

3969

** Don't treat these end tags as invalid,

3970

** nor act on them. - FM

3971

3972

CTRACE((tfp, "SGML: `</%s%c' found! Ignoring it.\n",

3973

string->data, c));

3974

string->size = 0;

3975

context->current_attribute_number = INVALID;

3976

if (c != '>') {

3977

context->state = S_junk_tag;

3978

} else {

3979

context->current_tag = NULL;

3980

context->state = S_text;

3981

}

3982

break;

3983

} else if (tag_OK &&

3984

#if OPT

3985

(branch == 1)

3986

#else

3987

(!strcasecomp(string->data, "A") ||

3988

!strcasecomp(string->data, "B") ||

3989

!strcasecomp(string->data, "BLINK") ||

3990

!strcasecomp(string->data, "CITE") ||

3991

!strcasecomp(string->data, "EM") ||

3992

!strcasecomp(string->data, "FONT") ||

3993

!strcasecomp(string->data, "FORM") ||

3994

!strcasecomp(string->data, "I") ||

3995

!strcasecomp(string->data, "P") ||

3996

!strcasecomp(string->data, "STRONG") ||

3997

!strcasecomp(string->data, "TT") ||

3998

!strcasecomp(string->data, "U"))

3999

#endif

4000

) {

4001

4002

** Handle end tags for container elements declared

4003

** as SGML_EMPTY to prevent "expected tag substitution"

4004

** but still processed via HTML_end_element() in HTML.c

4005

** with checks there to avoid throwing the HTML.c stack

4006

** out of whack (Ugh, what a hack! 8-). - FM

4007

4008

if (context->inSELECT) {

4009

4010

** We are in a SELECT block. - FM

4011

4012

if (strcasecomp(string->data, "FORM")) {

4013

4014

** It is not at FORM end tag, so ignore it. - FM

4015

4016

CTRACE((tfp, "SGML: ***Ignoring end tag </%s> in SELECT block.\n",

4017

string->data));

4018

} else {

4019

4020

** End the SELECT block and then

4021

** handle the FORM end tag. - FM

4022

4023

CTRACE((tfp, "SGML: ***Faking SELECT end tag before </%s> end tag.\n",

4024

string->data));

4025

end_element(context,

4026

SGMLFindTag(context->dtd, "SELECT"));

4027

CTRACE((tfp, "SGML: End </%s>\n", string->data));

4028

4029

#ifdef USE_PRETTYSRC

4030

if (!psrc_view) /* Don't actually call if viewing psrc - kw */

4031

#endif

4032

(*context->actions->end_element)

4033

(context->target,

4034

TAGNUM_OF_TAGP(context->current_tag),

4035

(char **)&context->include);

4036

}

4037

} else if (!strcasecomp(string->data, "P")) {

4038

4039

** Treat a P end tag like a P start tag (Ugh,

4040

** what a hack! 8-). - FM

4041

4042

CTRACE((tfp, "SGML: `</%s%c' found! Treating as '<%s%c'.\n",

4043

string->data, c, string->data, c));

4044

{

4045

int i;

4046

for (i = 0;

4047

i < context->current_tag->number_of_attributes;

4048

i++) {

4049

context->present[i] = NO;

4050

}

4051

}

4052

if (context->current_tag->name)

4053

start_element(context);

4054

} else {

4055

CTRACE((tfp, "SGML: End </%s>\n", string->data));

4056

4057

#ifdef USE_PRETTYSRC

4058

if (!psrc_view) /* Don't actually call if viewing psrc - kw */

4059

#endif

4060

(*context->actions->end_element)

4061

(context->target,

4062

TAGNUM_OF_TAGP(context->current_tag),

4063

(char **)&context->include);

4064

}

4065

string->size = 0;

4066

context->current_attribute_number = INVALID;

4067

if (c != '>') {

4068

context->state = S_junk_tag;

4069

} else {

4070

context->current_tag = NULL;

4071

context->state = S_text;

4072

}

4073

break;

4074

} else {

4075

4076

** Handle all other end tags normally. - FM

4077

4078

end_element( context, context->current_tag);

4079

}

4080

}

4081

4082

#ifdef USE_PRETTYSRC

4083

if (psrc_view && !psrc_tagname_processed) {

4084

PSRCSTART(abracket);

4085

PUTS("</");

4086

PSRCSTOP(abracket);

4087

PSRCSTART(tag);

4088

if (tagname_transform != 1) {

4089

if (tagname_transform == 0)

4090

LYLowerCase(string->data);

4091

else

4092

LYUpperCase(string->data);

4093

}

4094

PUTS(string->data);

4095

PSRCSTOP(tag);

4096

if ( c != '>' ) {

4097

PSRCSTART(badtag);

4098

PUTC(c);

4099

} else {

4100

PSRCSTART(abracket);

4101

PUTC('>');

4102

PSRCSTOP(abracket);

4103

}

4104

}

4105

#endif

4106

4107

string->size = 0;

4108

context->current_attribute_number = INVALID;

4109

if (c != '>') {

4110

if (!WHITE(c))

4111

CTRACE((tfp,"SGML: `</%s%c' found!\n", string->data, c));

4112

context->state = S_junk_tag;

4113

} else {

4114

context->current_tag = NULL;

4115

context->state = S_text;

4116

}

4117

}

4118

break;

4119

4120

4121

case S_esc: /* Expecting '$'or '(' following CJK ESC. */

4122

if (c == '$') {

4123

context->state = S_dollar;

4124

} else if (c == '(') {

4125

context->state = S_paren;

4126

} else {

4127

context->state = S_text;

4128

}

4129

PUTC(c);

4130

break;

4131

4132

case S_dollar: /* Expecting '@', 'B', 'A' or '(' after CJK "ESC$". */

4133

if (c == '@' || c == 'B' || c == 'A') {

4134

context->state = S_nonascii_text;

4135

} else if (c == '(') {

4136

context->state = S_dollar_paren;

4137

}

4138

PUTC(c);

4139

break;

4140

4141

case S_dollar_paren: /* Expecting 'C' after CJK "ESC$(". */

4142

if (c == 'C') {

4143

context->state = S_nonascii_text;

4144

} else {

4145

context->state = S_text;

4146

}

4147

PUTC(c);

4148

break;

4149

4150

case S_paren: /* Expecting 'B', 'J', 'T' or 'I' after CJK "ESC(". */

4151

if (c == 'B' || c == 'J' || c == 'T') {

4152

context->state = S_text;

4153

} else if (c == 'I') {

4154

context->state = S_nonascii_text;

4155

} else {

4156

context->state = S_text;

4157

}

4158

PUTC(c);

4159

break;

4160

4161

case S_nonascii_text: /* Expecting CJK ESC after non-ASCII text. */

4162

if (TOASCII(c) == '\033') { /* S/390 -- gil -- 1264 */

4163

context->state = S_esc;

4164

}

4165

PUTC(c);

4166

if (c < 32)

4167

context->state = S_text;

4168

break;

4169

4170

case S_esc_sq: /* Expecting '$'or '(' following CJK ESC. */

4171

if (c == '$') {

4172

context->state = S_dollar_sq;

4173

} else if (c == '(') {

4174

context->state = S_paren_sq;

4175

} else {

4176

context->state = S_squoted;

4177

}

4178

HTChunkPutc(string, c);

4179

break;

4180

4181

case S_dollar_sq: /* Expecting '@', 'B', 'A' or '(' after CJK "ESC$". */

4182

if (c == '@' || c == 'B' || c == 'A') {

4183

context->state = S_nonascii_text_sq;

4184

} else if (c == '(') {

4185

context->state = S_dollar_paren_sq;

4186

}

4187

HTChunkPutc(string, c);

4188

break;

4189

4190

case S_dollar_paren_sq: /* Expecting 'C' after CJK "ESC$(". */

4191

if (c == 'C') {

4192

context->state = S_nonascii_text_sq;

4193

} else {

4194

context->state = S_squoted;

4195

}

4196

HTChunkPutc(string, c);

4197

break;

4198

4199

case S_paren_sq: /* Expecting 'B', 'J', 'T' or 'I' after CJK "ESC(". */

4200

if (c == 'B' || c == 'J' || c == 'T') {

4201

context->state = S_squoted;

4202

} else if (c == 'I') {

4203

context->state = S_nonascii_text_sq;

4204

} else {

4205

context->state = S_squoted;

4206

}

4207

HTChunkPutc(string, c);

4208

break;

4209

4210

case S_nonascii_text_sq: /* Expecting CJK ESC after non-ASCII text. */

4211

if (TOASCII(c) == '\033') { /* S/390 -- gil -- 1281 */

4212

context->state = S_esc_sq;

4213

}

4214

HTChunkPutc(string, c);

4215

break;

4216

4217

case S_esc_dq: /* Expecting '$'or '(' following CJK ESC. */

4218

if (c == '$') {

4219

context->state = S_dollar_dq;

4220

} else if (c == '(') {

4221

context->state = S_paren_dq;

4222

} else {

4223

context->state = S_dquoted;

4224

}

4225

HTChunkPutc(string, c);

4226

break;

4227

4228

case S_dollar_dq: /* Expecting '@', 'B', 'A' or '(' after CJK "ESC$". */

4229

if (c == '@' || c == 'B' || c == 'A') {

4230

context->state = S_nonascii_text_dq;

4231

} else if (c == '(') {

4232

context->state = S_dollar_paren_dq;

4233

}

4234

HTChunkPutc(string, c);

4235

break;

4236

4237

case S_dollar_paren_dq: /* Expecting 'C' after CJK "ESC$(". */

4238

if (c == 'C') {

4239

context->state = S_nonascii_text_dq;

4240

} else {

4241

context->state = S_dquoted;

4242

}

4243

HTChunkPutc(string, c);

4244

break;

4245

4246

case S_paren_dq: /* Expecting 'B', 'J', 'T' or 'I' after CJK "ESC(". */

4247

if (c == 'B' || c == 'J' || c == 'T') {

4248

context->state = S_dquoted;

4249

} else if (c == 'I') {

4250

context->state = S_nonascii_text_dq;

4251

} else {

4252

context->state = S_dquoted;

4253

}

4254

HTChunkPutc(string, c);

4255

break;

4256

4257

case S_nonascii_text_dq: /* Expecting CJK ESC after non-ASCII text. */

4258

if (TOASCII(c) == '\033') { /* S/390 -- gil -- 1298 */

4259

context->state = S_esc_dq;

4260

}

4261

HTChunkPutc(string, c);

4262

break;

4263

4264

case S_junk_tag:

4265

case S_junk_pi:

4266

if (c == '>') {

4267

#ifdef USE_PRETTYSRC

4268

if (psrc_view) {

4269

if (context->state == S_junk_tag) {

4270

PSRCSTOP(badtag);

4271

}

4272

PSRCSTART(abracket);

4273

PUTC('>');

4274

PSRCSTOP(abracket);

4275

context->seen_nonwhite_in_junk_tag = FALSE;

4276

}

4277

#endif

4278

context->current_tag = NULL;

4279

context->state = S_text;

4280

}

4281

#ifdef USE_PRETTYSRC

4282

else if (psrc_view) {

4283

/*pack spaces until first non-space is seen*/

4284

if (!context->seen_nonwhite_in_junk_tag) {

4285

if (!WHITE(c)) {

4286

context->seen_nonwhite_in_junk_tag = TRUE;

4287

PUTC(c);

4288

}

4289

} else

4290

PUTC(c);

4291

}

4292

#endif

4293

4294

} /* switch on context->state */

4295

CTRACE2(TRACE_SGML, (tfp, "SGML after %s|%.*s|%c|\n",

4296

state_name(context->state),

4297

string->size,

4298

NonNull(string->data),

4299

UCH(c)));

4300

4301

after_switch:

4302

4303

** Check whether an external function has added

4304

** anything to the include buffer. If so, move the

4305

** new stuff to the beginning of active_include. - kw

4306

4307

if (context->include != NULL) {

4308

if (context->include[0] == '\0') {

4309

FREE(context->include);

4310

} else {

4311

if (context->active_include &&

4312

context->active_include[context->include_index] != '\0')

4313

StrAllocCat(context->include,

4314

context->active_include + context->include_index);

4315

FREE(context->active_include);

4316

context->active_include = context->include;

4317

context->include_index = 0;

4318

context->include = NULL;

4319

}

4320

}

4321

4322

4323

** Check whether we've added anything to the recover buffer. - FM

4324

4325

if (context->recover != NULL) {

4326

if (context->recover[context->recover_index] == '\0') {

4327

FREE(context->recover);

4328

context->recover_index = 0;

4329

} else {

4330

c = context->recover[context->recover_index];

4331

context->recover_index++;

4332

goto top;

4333

}

4334

}

4335

4336

4337

** Check whether an external function had added

4338

** anything to the include buffer; it should now be

4339

** in active_include. - FM / kw

4340

4341

if (context->active_include != NULL) {

4342

if (context->active_include[context->include_index] == '\0') {

4343

FREE(context->active_include);

4344

context->include_index = 0;

4345

} else {

4346

if (context->current_tag_charset == UTF8_handle ||

4347

context->T.trans_from_uni) {

4348

4349

* If it looks like we would have fed UTF-8 to the

4350

* next processing stage, assume that whatever we were

4351

* fed back is in UTF-8 form, too. This won't be always

4352

* true for all uses of the include buffer, but it's a

4353

* start. - kw

4354

4355

char *puni = context->active_include + context->include_index;

4356

c = *puni;

4357

clong = UCGetUniFromUtf8String(&puni);

4358

if (clong < 256 && clong >= 0) {

4359

c = ((char)(clong & 0xff));

4360

}

4361

saved_char_in = '\0';

4362

context->include_index = puni - context->active_include + 1;

4363

goto top1;

4364

} else {

4365

4366

* Otherwise assume no UTF-8 - do charset-naive processing

4367

* and hope for the best. - kw

4368

4369

c = context->active_include[context->include_index];

4370

context->include_index++;

4371

goto top;

4372

}

4373

}

4374

}

4375

4376

4377

** Check whether an external function has added

4378

** anything to the csi buffer. - FM

4379

4380

if (context->csi != NULL) {

4381

if (context->csi[context->csi_index] == '\0') {

4382

FREE(context->csi);

4383

context->csi_index = 0;

4384

} else {

4385

c = context->csi[context->csi_index];

4386

context->csi_index++;

4387

goto top;

4388

}

4389

}

4390

} /* SGML_character */

4391

4392

4393

PRIVATE void SGML_string ARGS2(

4394

HTStream *, context,

4395

CONST char*, str)

4396

{

4397

CONST char *p;

4398

for (p = str; *p; p++)

4399

SGML_character(context, *p);

4400

}

4401

4402

4403

PRIVATE void SGML_write ARGS3(

4404

HTStream *, context,

4405

CONST char*, str,

4406

int, l)

4407

{

4408

CONST char *p;

4409

CONST char *e = str+l;

4410

for (p = str; p < e; p++)

4411

SGML_character(context, *p);

4412

}

4413

4414

/*_______________________________________________________________________

4415

4416

4417

/* Structured Object Class

4418

** -----------------------

4419

4420

PUBLIC CONST HTStreamClass SGMLParser =

4421

{

4422

"SGMLParser",

4423

SGML_free,

4424

SGML_abort,

4425

SGML_character,

4426

SGML_string,

4427

SGML_write,

4428

};

4429

4430

/* Create SGML Engine

4431

** ------------------

4432

4433

** On entry,

4434

** dtd represents the DTD, along with

4435

** actions is the sink for the data as a set of routines.

4436

4437

4438

4439

PUBLIC HTStream* SGML_new ARGS3(

4440

CONST SGML_dtd *, dtd,

4441

HTParentAnchor *, anchor,

4442

HTStructured *, target)

4443

{

4444

int i;

4445

HTStream* context = (HTStream *) malloc(sizeof(*context));

4446

if (!context)

4447

outofmem(__FILE__, "SGML_begin");

4448

4449

context->isa = &SGMLParser;

4450

context->string = HTChunkCreate(128); /* Grow by this much */

4451

context->leading_spaces = 0;

4452

context->trailing_spaces = 0;

4453

context->dtd = dtd;

4454

context->target = target;

4455

context->actions = (CONST HTStructuredClass*)(((HTStream*)target)->isa);

4456

/* Ugh: no OO */

4457

context->unknown_tag = &HTTag_unrecognized;

4458

context->current_tag = context->slashedtag = NULL;

4459

context->state = S_text;

4460

context->kanji_buf = '\0';

4461

context->element_stack = 0; /* empty */

4462

context->inSELECT = FALSE;

4463

context->no_lynx_specialcodes = NO; /* special codes normally generated */

4464

#ifdef CALLERDATA

4465

context->callerData = (void*) callerData;

4466

#endif /* CALLERDATA */

4467

for (i = 0; i < MAX_ATTRIBUTES; i++)

4468

context->value[i] = 0;

4469

4470

context->lead_exclamation = FALSE;

4471

context->first_dash = FALSE;

4472

context->end_comment = FALSE;

4473

context->doctype_bracket = FALSE;

4474

context->first_bracket = FALSE;

4475

context->second_bracket = FALSE;

4476

context->isHex = FALSE;

4477

4478

context->node_anchor = anchor; /* Could be NULL? */

4479

context->utf_count = 0;

4480

context->utf_char = 0;

4481

context->utf_buf[0] = context->utf_buf[6] = '\0';

4482

context->utf_buf_p = context->utf_buf;

4483

UCTransParams_clear(&context->T);

4484

context->inUCLYhndl = HTAnchor_getUCLYhndl(anchor,

4485

UCT_STAGE_PARSER);

4486

if (context->inUCLYhndl < 0) {

4487

HTAnchor_copyUCInfoStage(anchor,

4488

UCT_STAGE_PARSER,

4489

UCT_STAGE_MIME,

4490

-1);

4491

context->inUCLYhndl = HTAnchor_getUCLYhndl(anchor,

4492

UCT_STAGE_PARSER);

4493

}

4494

#ifdef CAN_SWITCH_DISPLAY_CHARSET /* Allow a switch to a more suitable display charset */

4495

else if (anchor->UCStages

4496

&& anchor->UCStages->s[UCT_STAGE_PARSER].LYhndl >= 0

4497

&& anchor->UCStages->s[UCT_STAGE_PARSER].LYhndl != current_char_set ) {

4498

int o = anchor->UCStages->s[UCT_STAGE_PARSER].LYhndl;

4499

4500

anchor->UCStages->s[UCT_STAGE_PARSER].LYhndl = -1; /* Force reset */

4501

HTAnchor_resetUCInfoStage(anchor, o, UCT_STAGE_PARSER,

4502

/* Preserve change this: */

4503

anchor->UCStages->s[UCT_STAGE_PARSER].lock);

4504

}

4505

#endif

4506

4507

context->inUCI = HTAnchor_getUCInfoStage(anchor,

4508

UCT_STAGE_PARSER);

4509

set_chartrans_handling(context, anchor, -1);

4510

4511

context->recover = NULL;

4512

context->recover_index = 0;

4513

context->include = NULL;

4514

context->active_include = NULL;

4515

context->include_index = 0;

4516

context->url = NULL;

4517

context->csi = NULL;

4518

context->csi_index = 0;

4519

4520

#ifdef USE_PRETTYSRC

4521

if (psrc_view) {

4522

psrc_view = FALSE;

4523

mark_htext_as_source = TRUE;

4524

SGML_string(context,

4525

"<HTML><HEAD><TITLE>source</TITLE></HEAD><BODY><PRE>");

4526

psrc_view = TRUE;

4527

psrc_convert_string = FALSE;

4528

sgml_in_psrc_was_initialized = TRUE;

4529

context->seen_nonwhite_in_junk_tag = FALSE;

4530

}

4531

#endif

4532

4533

return context;

4534

}

4535

4536

/* Asian character conversion functions

4537

** ====================================

4538

4539

** Added 24-Mar-96 by FM, based on:

4540

4541

////////////////////////////////////////////////////////////////////////

4542

4543

4544

Permission to use, copy, modify, and distribute this material

4545

for any purpose and without fee is hereby granted, provided

4546

that the above copyright notice and this permission notice

4547

appear in all copies, and that the name of ETL not be

4548

used in advertising or publicity pertaining to this

4549

material without the specific, prior written permission

4550

of an authorized representative of ETL.

4551

ETL MAKES NO REPRESENTATIONS ABOUT THE ACCURACY OR SUITABILITY

4552

OF THIS MATERIAL FOR ANY PURPOSE. IT IS PROVIDED "AS IS",

4553

WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES.

4554

/////////////////////////////////////////////////////////////////////////

4555

Content-Type: program/C; charset=US-ASCII

4556

Program: SJIS.c

4557

Author: Yutaka Sato <ysato@etl.go.jp>

4558

Description:

4559

History:

4560

930923 extracted from codeconv.c of cosmos

4561

///////////////////////////////////////////////////////////////////////

4562

4563

4564

PUBLIC int TREAT_SJIS = 1;

4565

4566

PUBLIC void JISx0201TO0208_EUC ARGS4(

4567

4568

4569

4570

4571

{

4572

static char *table[] = {

4573

"\241\243", /* A1,A3 */

4574

"\241\326", /* A1,D6 */

4575

"\241\327", /* A1,D7 */

4576

"\241\242", /* A1,A2 */

4577

"\241\246", /* A1,A6 */

4578

"\245\362", /* A5,F2 */

4579

"\245\241", /* A5,A1 */

4580

"\245\243", /* A5,A3 */

4581

"\245\245", /* A5,A5 */

4582

"\245\247", /* A5,A7 */

4583

"\245\251", /* A5,A9 */

4584

"\245\343", /* A5,E3 */

4585

"\245\345", /* A5,E5 */

4586

"\245\347", /* A5,E7 */

4587

"\245\303", /* A5,C3 */

4588

"\241\274", /* A1,BC */

4589

"\245\242", /* A5,A2 */

4590

"\245\244", /* A5,A4 */

4591

"\245\246", /* A5,A6 */

4592

"\245\250", /* A5,A8 */

4593

"\245\252", /* A5,AA */

4594

"\245\253", /* A5,AB */

4595

"\245\255", /* A5,AD */

4596

"\245\257", /* A5,AF */

4597

"\245\261", /* A5,B1 */

4598

"\245\263", /* A5,B3 */

4599

"\245\265", /* A5,B5 */

4600

"\245\267", /* A5,B7 */

4601

"\245\271", /* A5,B9 */

4602

"\245\273", /* A5,BB */

4603

"\245\275", /* A5,BD */

4604

"\245\277", /* A5,BF */

4605

"\245\301", /* A5,C1 */

4606

"\245\304", /* A5,C4 */

4607

"\245\306", /* A5,C6 */

4608

"\245\310", /* A5,C8 */

4609

"\245\312", /* A5,CA */

4610

"\245\313", /* A5,CB */

4611

"\245\314", /* A5,CC */

4612

"\245\315", /* A5,CD */

4613

"\245\316", /* A5,CE */

4614

"\245\317", /* A5,CF */

4615

"\245\322", /* A5,D2 */

4616

"\245\325", /* A5,D5 */

4617

"\245\330", /* A5,D8 */

4618

"\245\333", /* A5,DB */

4619

"\245\336", /* A5,DE */

4620

"\245\337", /* A5,DF */

4621

"\245\340", /* A5,E0 */

4622

"\245\341", /* A5,E1 */

4623

"\245\342", /* A5,E2 */

4624

"\245\344", /* A5,E4 */

4625

"\245\346", /* A5,E6 */

4626

"\245\350", /* A5,E8 */

4627

"\245\351", /* A5,E9 */

4628

"\245\352", /* A5,EA */

4629

"\245\353", /* A5,EB */

4630

"\245\354", /* A5,EC */

4631

"\245\355", /* A5,ED */

4632

"\245\357", /* A5,EF */

4633

"\245\363", /* A5,F3 */

4634

"\241\253", /* A1,AB */

4635

"\241\254" /* A1,AC */

4636

};

4637

4638

if ((IHI == 0x8E) && (ILO >= 0xA1) && (ILO <= 0xDF)) {

4639

*OHI = table[ILO - 0xA1][0];

4640

*OLO = table[ILO - 0xA1][1];

4641

} else {

4642

*OHI = IHI;

4643

*OLO = ILO;

4644

}

4645

}

4646

4647

PRIVATE int IS_SJIS_STR ARGS1(CONST unsigned char *, str)

4648

{

4649

CONST unsigned char *s;

4650

unsigned char ch;

4651

int is_sjis = 0;

4652

4653

s = str;

4654

while ((ch = *s++) != '\0') {

4655

if (ch & 0x80)

4656

if (IS_SJIS(ch, *s, is_sjis))

4657

return 1;

4658

}

4659

return 0;

4660

}

4661

4662

PUBLIC unsigned char * SJIS_TO_JIS1 ARGS3(

4663

4664

4665

4666

{

4667

HI -= UCH((HI <= 0x9F) ? 0x71 : 0xB1);

4668

HI = UCH((HI << 1) + 1);

4669

if (0x7F < LO)

4670

LO--;

4671

if (0x9E <= LO) {

4672

LO -= UCH(0x7D);

4673

HI++;

4674

} else {

4675

LO -= UCH(0x1F);

4676

}

4677

JCODE[0] = HI;

4678

JCODE[1] = LO;

4679

return JCODE;

4680

}

4681

4682

PUBLIC unsigned char * JIS_TO_SJIS1 ARGS3(

4683

4684

4685

4686

{

4687

if (HI & 1)

4688

LO += UCH(0x1F);

4689

else

4690

LO += UCH(0x7D);

4691

if (0x7F <= LO)

4692

LO++;

4693

4694

HI = UCH(((HI - 0x21) >> 1) + 0x81);

4695

if (0x9F < HI)

4696

HI += UCH(0x40);

4697

SJCODE[0] = HI;

4698

SJCODE[1] = LO;

4699

return SJCODE;

4700

}

4701

4702

PUBLIC unsigned char * EUC_TO_SJIS1 ARGS3(

4703

unsigned char, HI,

4704

unsigned char, LO,

4705

4706

{

4707

if (HI == 0x8E)

4708

JISx0201TO0208_EUC(HI, LO, &HI, &LO);

4709

JIS_TO_SJIS1(UCH(HI & 0x7F), UCH(LO & 0x7F), SJCODE);

4710

return SJCODE;

4711

}

4712

4713

PUBLIC void JISx0201TO0208_SJIS ARGS3(

4714

4715

4716

4717

{

4718

unsigned char SJCODE[2];

4719

4720

JISx0201TO0208_EUC(0x8E, I, OHI, OLO);

4721

JIS_TO_SJIS1(UCH(*OHI & 0x7F), UCH(*OLO & 0x7F), SJCODE);

4722

*OHI = SJCODE[0];

4723

*OLO = SJCODE[1];

4724

}

4725

4726

PUBLIC unsigned char * SJIS_TO_EUC1 ARGS3(

4727

unsigned char, HI,

4728

unsigned char, LO,

4729

unsigned char *, data)

4730

{

4731

SJIS_TO_JIS1(HI, LO, data);

4732

data[0] |= 0x80;

4733

data[1] |= 0x80;

4734

return data;

4735

}

4736

4737

PUBLIC unsigned char * SJIS_TO_EUC ARGS2(

4738

unsigned char *, src,

4739

unsigned char *, dst)

4740

{

4741

4742

4743

4744

in_sjis = IS_SJIS_STR(src);

4745

for (sp = src, dp = dst; (hi = sp[0]) != '\0';) {

4746

lo = sp[1];

4747

if (TREAT_SJIS && IS_SJIS(hi, lo, in_sjis)) {

4748

SJIS_TO_JIS1(hi, lo, dp);

4749

dp[0] |= 0x80;

4750

dp[1] |= 0x80;

4751

dp += 2;

4752

sp += 2;

4753

} else

4754

*dp++ = *sp++;

4755

}

4756

*dp = 0;

4757

return dst;

4758

}

4759

4760

PUBLIC unsigned char * EUC_TO_SJIS ARGS2(

4761

unsigned char *, src,

4762

unsigned char *, dst)

4763

{

4764

4765

4766

for (sp = src, dp = dst; *sp;) {

4767

if (*sp & 0x80) {

4768

if (sp[1] && (sp[1] & 0x80)) {

4769

JIS_TO_SJIS1(UCH(sp[0] & 0x7F), UCH(sp[1] & 0x7F), dp);

4770

dp += 2;

4771

sp += 2;

4772

} else {

4773

sp++;

4774

}

4775

} else {

4776

*dp++ = *sp++;

4777

}

4778

}

4779

*dp = 0;

4780

return dst;

4781

}

4782

4783

#define Strcpy(a,b) (strcpy((char*)a,(CONST char*)b),&a[strlen((CONST char*)a)])

4784

4785

PUBLIC unsigned char *EUC_TO_JIS ARGS4(

4786

unsigned char *, src,

4787

unsigned char *, dst,

4788

CONST char *, toK,

4789

CONST char *, toA)

4790

{

4791

4792

4793

4794

4795

int is_JIS = 0;

4796

4797

while ((cch = *sp++) != '\0') {

4798

if (cch & 0x80) {

4799

if (!IS_EUC(cch, *sp)) {

4800

if (cch == 0xA0 && is_JIS) /* ignore NBSP */

4801

continue;

4802

is_JIS++;

4803

*dp++ = cch;

4804

continue;

4805

}

4806

if (!kana_mode) {

4807

kana_mode = UCH(~kana_mode);

4808

dp = Strcpy(dp, toK);

4809

}

4810

if (*sp & 0x80) {

4811

*dp++ = UCH(cch & ~0x80);

4812

*dp++ = UCH(*sp++ & ~0x80);

4813

}

4814

} else {

4815

if (kana_mode) {

4816

kana_mode = UCH(~kana_mode);

4817

dp = Strcpy(dp, toA);

4818

}

4819

*dp++ = cch;

4820

}

4821

}

4822

if (kana_mode)

4823

dp = Strcpy(dp, toA);

4824

4825

if (dp)

4826

*dp = 0;

4827

return dst;

4828

}

4829

4830

#define IS_JIS7(c1,c2) (0x20<(c1)&&(c1)<0x7F && 0x20<(c2)&&(c2)<0x7F)

4831

#define SO ('N'-0x40)

4832

#define SI ('O'-0x40)

4833

4834

PUBLIC int repair_JIS = 0;

4835

4836

PRIVATE CONST unsigned char *repairJIStoEUC ARGS2(

4837

CONST unsigned char *, src,

4838

unsigned char **, dstp)

4839

{

4840

CONST unsigned char *s;

4841

unsigned char *d, ch1, ch2;

4842

4843

d = *dstp;

4844

s = src;

4845

while ((ch1 = s[0]) && (ch2 = s[1])) {

4846

s += 2;

4847

if (ch1 == '(')

4848

if (ch2 == 'B' || ch2 == 'J') {

4849

*dstp = d;

4850

return s;

4851

}

4852

if (!IS_JIS7(ch1, ch2))

4853

return 0;

4854

4855

*d++ = UCH(0x80 | ch1);

4856

*d++ = UCH(0x80 | ch2);

4857

}

4858

return 0;

4859

}

4860

4861

PUBLIC unsigned char *TO_EUC ARGS2(

4862

CONST unsigned char *, jis,

4863

unsigned char *, euc)

4864

{

4865

4866

4867

unsigned char *d;

4868

4869

4870

static int nje;

4871

int n8bits;

4872

int is_JIS;

4873

4874

nje++;

4875

n8bits = 0;

4876

s = jis;

4877

d = euc;

4878

jis_stat = 0;

4879

to2B = TO_2BCODE;

4880

to1B = TO_1BCODE;

4881

in_sjis = IS_SJIS_STR(jis);

4882

is_JIS = 0;

4883

4884

while ((c = *s++) != '\0') {

4885

if (c == 0x80)

4886

continue; /* ignore it */

4887

if (c == 0xA0 && is_JIS)

4888

continue; /* ignore Non-breaking space */

4889

4890

if (c == to2B && jis_stat == 0 && repair_JIS) {

4891

if (*s == 'B' || *s == '@') {

4892

CONST unsigned char *ts;

4893

if ((ts = repairJIStoEUC(s + 1, &d)) != NULL) {

4894

s = ts;

4895

continue;

4896

}

4897

}

4898

}

4899

if (c == ESC) {

4900

if (*s == to2B) {

4901

if ((s[1] == 'B') || (s[1] == '@')) {

4902

jis_stat = 0x80;

4903

s += 2;

4904

is_JIS++;

4905

continue;

4906

}

4907

jis_stat = 0;

4908

} else if (*s == to1B) {

4909

jis_stat = 0;

4910

if ((s[1] == 'B') || (s[1] == 'J') || (s[1] == 'H')) {

4911

s += 2;

4912

continue;

4913

}

4914

} else if (*s == ',') { /* MULE */

4915

jis_stat = 0;

4916

}

4917

}

4918

if (c & 0x80)

4919

n8bits++;

4920

4921

if (IS_SJIS(c, *s, in_sjis)) {

4922

SJIS_TO_EUC1(c, *s, d);

4923

d += 2;

4924

s++;

4925

is_JIS++;

4926

} else if (jis_stat) {

4927

if (c <= 0x20 || 0x7F <= c) {

4928

*d++ = c;

4929

if (c == '\n')

4930

jis_stat = 0;

4931

} else {

4932

if (IS_JIS7(c, *s)) {

4933

*d++ = jis_stat | c;

4934

*d++ = jis_stat | *s++;

4935

} else

4936

*d++ = c;

4937

}

4938

} else {

4939

if (n8bits == 0 && (c == SI || c == SO)) {

4940

} else {

4941

*d++ = c;

4942

}

4943

}

4944

}

4945

*d = 0;

4946

return euc;

4947

}

4948

4949

#define non94(ch) ((ch) <= 0x20 || (ch) == 0x7F)

4950

4951

PRIVATE int is_EUC_JP ARGS1(unsigned char *, euc)

4952

{

4953

unsigned char *cp;

4954

int ch1, ch2;

4955

4956

for (cp = euc; (ch1 = *cp) != '\0'; cp++) {

4957

if (ch1 & 0x80) {

4958

ch2 = cp[1] & 0xFF;

4959

if ((ch2 & 0x80) == 0) {

4960

/* sv1log("NOT_EUC1[%x][%x]\n",ch1,ch2); */

4961

return 0;

4962

}

4963

if (non94(ch1 & 0x7F) || non94(ch2 & 0x7F)) {

4964

/* sv1log("NOT_EUC2[%x][%x]\n",ch1,ch2); */

4965

return 0;

4966

}

4967

cp++;

4968

}

4969

}

4970

return 1;

4971

}

4972

4973

PUBLIC void TO_SJIS ARGS2(

4974

CONST unsigned char *, any,

4975

unsigned char *, sjis)

4976

{

4977

unsigned char *euc;

4978

4979

euc = malloc(strlen((CONST char *) any) + 1);

4980

#ifdef CJK_EX

4981

if (!euc)

4982

outofmem(__FILE__, "TO_SJIS");

4983

#endif

4984

TO_EUC(any, euc);

4985

if (is_EUC_JP(euc))

4986

EUC_TO_SJIS(euc, sjis);

4987

else

4988

strcpy((char *) sjis, (CONST char *) any);

4989

free(euc);

4990

}

4991

4992

PUBLIC void TO_JIS ARGS2(

4993

CONST unsigned char *, any,

4994

unsigned char *, jis)

4995

{

4996

unsigned char *euc;

4997

4998

if (any[0] == 0) {

4999

jis[0] = 0;

5000

return;

5001

}

5002

euc = malloc(strlen((CONST char *) any) + 1);

5003

#ifdef CJK_EX

5004

if (!euc)

5005

outofmem(__FILE__, "TO_JIS");

5006

#endif

5007

TO_EUC(any, euc);

5008

is_EUC_JP(euc);

5009

EUC_TO_JIS(euc, jis, TO_KANJI, TO_ASCII);

5010

5011

free(euc);

5012

}

Older »