~ubuntu-branches/ubuntu/warty/lynx/warty-security

« back to all changes in this revision

Viewing changes to src/LYCharUtils.c

Committer: Bazaar Package Importer
Author(s): Martin Pitt
Date: 2004-09-16 12:14:10 UTC
Revision ID: james.westby@ubuntu.com-20040916121410-cz1gu92c4nqfeyrg

Tags: upstream-2.8.5

Import upstream version 2.8.5

files added:

ABOUT-NLS

CHANGES

COPYHEADER

COPYING

INSTALLATION

LYHelp.hin

LYMessages_en.h

PROBLEMS

README

VMSPrint.com

WWW/FreeofCharge.html

WWW/Library

WWW/Library/Implementation

WWW/Library/Implementation/HTAABrow.c

WWW/Library/Implementation/HTAABrow.h

WWW/Library/Implementation/HTAAProt.c

WWW/Library/Implementation/HTAAProt.h

WWW/Library/Implementation/HTAAUtil.c

WWW/Library/Implementation/HTAAUtil.h

WWW/Library/Implementation/HTAccess.c

WWW/Library/Implementation/HTAccess.h

WWW/Library/Implementation/HTAnchor.c

WWW/Library/Implementation/HTAnchor.h

WWW/Library/Implementation/HTAssoc.c

WWW/Library/Implementation/HTAssoc.h

WWW/Library/Implementation/HTAtom.c

WWW/Library/Implementation/HTAtom.h

WWW/Library/Implementation/HTBTree.c

WWW/Library/Implementation/HTBTree.h

WWW/Library/Implementation/HTCJK.h

WWW/Library/Implementation/HTChunk.c

WWW/Library/Implementation/HTChunk.h

WWW/Library/Implementation/HTDOS.c

WWW/Library/Implementation/HTDOS.h

WWW/Library/Implementation/HTFTP.c

WWW/Library/Implementation/HTFTP.h

WWW/Library/Implementation/HTFWriter.c

WWW/Library/Implementation/HTFWriter.h

WWW/Library/Implementation/HTFile.c

WWW/Library/Implementation/HTFile.h

WWW/Library/Implementation/HTFinger.c

WWW/Library/Implementation/HTFinger.h

WWW/Library/Implementation/HTFormat.c

WWW/Library/Implementation/HTFormat.h

WWW/Library/Implementation/HTGopher.c

WWW/Library/Implementation/HTGopher.h

WWW/Library/Implementation/HTGroup.c

WWW/Library/Implementation/HTGroup.h

WWW/Library/Implementation/HTInit.h

WWW/Library/Implementation/HTLex.c

WWW/Library/Implementation/HTLex.h

WWW/Library/Implementation/HTList.c

WWW/Library/Implementation/HTList.h

WWW/Library/Implementation/HTMIME.c

WWW/Library/Implementation/HTMIME.h

WWW/Library/Implementation/HTMLDTD.c

WWW/Library/Implementation/HTMLDTD.h

WWW/Library/Implementation/HTMLGen.c

WWW/Library/Implementation/HTMLGen.h

WWW/Library/Implementation/HTNews.c

WWW/Library/Implementation/HTNews.h

WWW/Library/Implementation/HTParse.c

WWW/Library/Implementation/HTParse.h

WWW/Library/Implementation/HTPlain.c

WWW/Library/Implementation/HTPlain.h

WWW/Library/Implementation/HTRules.c

WWW/Library/Implementation/HTRules.h

WWW/Library/Implementation/HTStream.h

WWW/Library/Implementation/HTString.c

WWW/Library/Implementation/HTString.h

WWW/Library/Implementation/HTStyle.c

WWW/Library/Implementation/HTStyle.h

WWW/Library/Implementation/HTTCP.c

WWW/Library/Implementation/HTTCP.h

WWW/Library/Implementation/HTTP.c

WWW/Library/Implementation/HTTP.h

WWW/Library/Implementation/HTTelnet.c

WWW/Library/Implementation/HTTelnet.h

WWW/Library/Implementation/HTUU.c

WWW/Library/Implementation/HTUU.h

WWW/Library/Implementation/HTUtils.h

WWW/Library/Implementation/HTVMSUtils.c

WWW/Library/Implementation/HTVMSUtils.h

WWW/Library/Implementation/HTVMS_WaisProt.c

WWW/Library/Implementation/HTVMS_WaisProt.h

WWW/Library/Implementation/HTVMS_WaisUI.c

WWW/Library/Implementation/HTVMS_WaisUI.h

WWW/Library/Implementation/HTWAIS.c

WWW/Library/Implementation/HTWAIS.h

WWW/Library/Implementation/HTWSRC.c

WWW/Library/Implementation/HTWSRC.h

WWW/Library/Implementation/HText.h

WWW/Library/Implementation/HTioctl.h

WWW/Library/Implementation/LYLeaks.h

WWW/Library/Implementation/LYexit.h

WWW/Library/Implementation/SGML.c

WWW/Library/Implementation/SGML.h

WWW/Library/Implementation/UCAux.h

WWW/Library/Implementation/UCDefs.h

WWW/Library/Implementation/UCMap.h

WWW/Library/Implementation/Version.make

WWW/Library/Implementation/makefile.in

WWW/Library/Implementation/www_tcp.h

WWW/Library/Implementation/www_wait.h

WWW/Library/djgpp

WWW/Library/djgpp/CommonMakefile

WWW/Library/djgpp/makefile

WWW/Library/djgpp/makefile.sla

WWW/Library/vms

WWW/Library/vms/COPYING.LIB

WWW/Library/vms/descrip.mms

WWW/Library/vms/libmake.com

aclocal.m4

build-slang.com

build.com

cfg_defs.sh

cfg_edit.sh

config.guess

config.hin

config.sub

configure

configure.in

descrip.mms

docs

docs/CHANGES2.3

docs/CHANGES2.4

docs/CHANGES2.5

docs/CHANGES2.6

docs/CHANGES2.7

docs/CHANGES2.8

docs/CMU.announce

docs/CRAWL.announce

docs/FM.announce

docs/IBMPC-charsets.announce

docs/OS-390.announce

docs/README.TRST

docs/README.chartrans

docs/README.defines

docs/README.jp

docs/README.rootcerts

docs/README.ssl

docs/README.sslcerts

docs/SOCKETSHR.announce

docs/TCPWARE.announce

docs/VMSWAIS.announce

docs/djgpp.key

docs/pdcurses.key

docs/slang.key

docs/win-386.announce

fixed512.com

fixtext.sh

install.sh

lib/dirent.c

lib/dirent.h

lynx.cfg

lynx.hlp

lynx.man

lynx.rsp

lynx_help

lynx_help/Lynx_users_guide.html

lynx_help/about_lynx.html

lynx_help/help_files.txt

lynx_help/keystrokes

lynx_help/keystrokes/alt_edit_help.html

lynx_help/keystrokes/bashlike_edit_help.html

lynx_help/keystrokes/bookmark_help.html

lynx_help/keystrokes/cookie_help.html

lynx_help/keystrokes/dired_help.html

lynx_help/keystrokes/edit_help.html

lynx_help/keystrokes/environments.html

lynx_help/keystrokes/follow_help.html

lynx_help/keystrokes/gopher_types_help.html

lynx_help/keystrokes/history_help.html

lynx_help/keystrokes/keystroke_help.html

lynx_help/keystrokes/movement_help.html

lynx_help/keystrokes/option_help.html

lynx_help/keystrokes/other_help.html

lynx_help/keystrokes/print_help.html

lynx_help/keystrokes/scrolling_help.html

lynx_help/keystrokes/test_display.html

lynx_help/keystrokes/visited_help.html

lynx_help/keystrokes/xterm_help.html

lynx_help/lynx-dev.html

lynx_help/lynx_help_main.html

lynx_help/lynx_url_support.html

make-msc.bat

makefile.bcb

makefile.in

makefile.msc

makelynx.bat

makew32.bat

mkdirs.sh

po/POTFILES.in

po/ca.po

po/cs.po

po/da.po

po/de.po

po/et.po

po/fr.po

po/hu.po

po/it.po

po/ja.po

po/lynx.pot

po/makefile.inn

po/nl.po

po/pt_BR.po

po/readme

po/ru.po

po/sl.po

po/sv.po

po/tr.po

po/uk.po

po/zh_CN.po

po/zh_TW.po

samples

samples/blue-background.lss

samples/bright-blue.lss

samples/cernrules.txt

samples/installdirs.html

samples/jumpsUnix.html

samples/jumpsVMS.html

samples/keepviewer

samples/lynx-keymaps

samples/lynx.com

samples/lynx.lss

samples/lynxdump

samples/mailcap

samples/mailto-form.pl

samples/mild-colors.lss

samples/mime.types

scripts

scripts/cfg2html.pl

scripts/install-cfg.sh

scripts/man2hlp.sh

src/AttrList.h

src/DefaultStyle.c

src/GridText.c

src/GridText.h

src/HTAlert.c

src/HTAlert.h

src/HTFWriter.c

src/HTFont.h

src/HTForms.h

src/HTInit.c

src/HTML.c

src/HTML.h

src/HTNestedList.h

src/HTSaveToFile.h

src/LYBookmark.c

src/LYBookmark.h

src/LYCgi.c

src/LYCgi.h

src/LYCharSets.c

src/LYCharSets.h

src/LYCharUtils.c

src/LYCharUtils.h

src/LYCharVals.h

src/LYClean.c

src/LYClean.h

src/LYCookie.c

src/LYCookie.h

src/LYCurses.c

src/LYCurses.h

src/LYDownload.c

src/LYDownload.h

src/LYEdit.c

src/LYEdit.h

src/LYEditmap.c

src/LYExtern.c

src/LYExtern.h

src/LYForms.c

src/LYGCurses.h

src/LYGetFile.c

src/LYGetFile.h

src/LYGlobalDefs.h

src/LYHash.c

src/LYHash.h

src/LYHistory.c

src/LYHistory.h

src/LYJump.c

src/LYJump.h

src/LYJustify.h

src/LYKeymap.c

src/LYKeymap.h

src/LYLeaks.c

src/LYList.c

src/LYList.h

src/LYLocal.c

src/LYLocal.h

src/LYMail.c

src/LYMail.h

src/LYMain.c

src/LYMainLoop.c

src/LYMainLoop.h

src/LYMap.c

src/LYMap.h

src/LYNews.c

src/LYNews.h

src/LYOptions.c

src/LYOptions.h

src/LYPrettySrc.c

src/LYPrettySrc.h

src/LYPrint.c

src/LYPrint.h

src/LYReadCFG.c

src/LYReadCFG.h

src/LYSearch.c

src/LYSearch.h

src/LYShowInfo.c

src/LYShowInfo.h

src/LYSignal.h

src/LYStrings.c

src/LYStrings.h

src/LYStructs.h

src/LYStyle.c

src/LYStyle.h

src/LYTraversal.c

src/LYTraversal.h

src/LYUpload.c

src/LYUpload.h

src/LYUtils.c

src/LYUtils.h

src/LYVMSdef.h

src/LYexit.c

src/LYrcFile.c

src/LYrcFile.h

src/TRSTable.c

src/TRSTable.h

src/UCAuto.c

src/UCAuto.h

src/UCAux.c

src/UCdomap.c

src/UCdomap.h

src/Xsystem.c

src/chrtrans

src/chrtrans/README.format

src/chrtrans/README.tables

src/chrtrans/UCkd.h

src/chrtrans/build-chrtrans.com

src/chrtrans/build-header.com

src/chrtrans/caselower.h

src/chrtrans/cp1250_uni.tbl

src/chrtrans/cp1251_uni.tbl

src/chrtrans/cp1252_uni.tbl

src/chrtrans/cp1253_uni.tbl

src/chrtrans/cp1255_uni.tbl

src/chrtrans/cp1256_uni.tbl

src/chrtrans/cp1257_uni.tbl

src/chrtrans/cp437_uni.tbl

src/chrtrans/cp737_uni.tbl

src/chrtrans/cp775_uni.tbl

src/chrtrans/cp850_uni.tbl

src/chrtrans/cp852_uni.tbl

src/chrtrans/cp862_uni.tbl

src/chrtrans/cp864_uni.tbl

src/chrtrans/cp866_uni.tbl

src/chrtrans/cp866u_uni.tbl

src/chrtrans/cp869_uni.tbl

src/chrtrans/def7_uni.tbl

src/chrtrans/dmcs_uni.tbl

src/chrtrans/entities.h

src/chrtrans/hp_uni.tbl

src/chrtrans/iso01_uni.tbl

src/chrtrans/iso02_uni.tbl

src/chrtrans/iso03_uni.tbl

src/chrtrans/iso04_uni.tbl

src/chrtrans/iso05_uni.tbl

src/chrtrans/iso06_uni.tbl

src/chrtrans/iso07_uni.tbl

src/chrtrans/iso08_uni.tbl

src/chrtrans/iso09_uni.tbl

src/chrtrans/iso10_uni.tbl

src/chrtrans/iso15_uni.tbl

src/chrtrans/jcuken_kb.h

src/chrtrans/koi8r_uni.tbl

src/chrtrans/koi8u_uni.tbl

src/chrtrans/mac_uni.tbl

src/chrtrans/make-msc.bat

src/chrtrans/makefile.bcb

src/chrtrans/makefile.dos

src/chrtrans/makefile.in

src/chrtrans/makefile.msc

src/chrtrans/makeuctb.c

src/chrtrans/makew32.bat

src/chrtrans/mnem2_suni.tbl

src/chrtrans/mnem_suni.tbl

src/chrtrans/next_uni.tbl

src/chrtrans/pt154_uni.tbl

src/chrtrans/rfc_suni.tbl

src/chrtrans/rot13_kb.h

src/chrtrans/utf8_uni.tbl

src/chrtrans/viscii_uni.tbl

src/chrtrans/yawerty_kb.h

src/cmu_tcp.opt

src/decc.opt

src/descrip.mms

src/gnuc.opt

src/makefile.dos

src/makefile.dsl

src/makefile.in

src/makefile.wsl

src/mktime.c

src/multinet.opt

src/socketshr_tcp.opt

src/strstr.c

src/structdump.h

src/tcpwareolb.opt

src/tcpwareshr.opt

src/ucxolb.opt

src/ucxshr.opt

src/vaxc.opt

src/win_tcp.opt

test

test/ALT88592.html

test/ISO_LATIN1_test.html

test/README.txt

test/TestComment.html

test/c1.html

test/iso8859-1.html

test/iso88592.html

test/koi8-r.html

test/quickbrown.html

test/raw8bit.html

test/sgml.html

test/spaces.html

test/tabtest.html

test/unicode.html

test/utf-8-demo.html

userdefs.h

Show diffs side-by-side

added added

removed removed

src/LYCharUtils.c

** Functions associated with LYCharSets.c and the Lynx version of HTML.c - FM

** ==========================================================================

#include <HTUtils.h>

#include <SGML.h>

#define Lynx_HTML_Handler

#include <HTChunk.h>

#include <HText.h>

#include <HTStyle.h>

#include <HTMIME.h>

#include <HTML.h>

#include <HTCJK.h>

#include <HTAtom.h>

#include <HTMLGen.h>

#include <HTParse.h>

#include <UCMap.h>

#include <UCDefs.h>

#include <UCAux.h>

#include <LYGlobalDefs.h>

#include <LYCharUtils.h>

#include <LYCharSets.h>

#include <HTAlert.h>

#include <HTForms.h>

#include <HTNestedList.h>

#include <GridText.h>

#include <LYStrings.h>

#include <LYUtils.h>

#include <LYMap.h>

#include <LYBookmark.h>

#include <LYCurses.h>

#include <LYCookie.h>

#include <LYexit.h>

#include <LYLeaks.h>

* Used for nested lists. - FM

PUBLIC int OL_CONTINUE = -29999; /* flag for whether CONTINUE is set */

PUBLIC int OL_VOID = -29998; /* flag for whether a count is set */

** This function converts any ampersands in allocated

** strings to "&". If isTITLE is TRUE, it also

** converts any angle-brackets to "<" or ">". - FM

PUBLIC void LYEntify ARGS2(

char **, str,

BOOLEAN, isTITLE)

{

char *p = *str;

char *q = NULL, *cp = NULL;

int amps = 0, lts = 0, gts = 0;

#ifdef CJK_EX

enum _state

{ S_text, S_esc, S_dollar, S_paren,

S_nonascii_text, S_dollar_paren } state = S_text;

int in_sjis = 0;

#endif

if (isEmpty(p))

return;

* Count the ampersands. - FM

while ((*p != '\0') && (q = strchr(p, '&')) != NULL) {

amps++;

p = (q + 1);

}

* Count the left-angle-brackets, if needed. - FM

if (isTITLE == TRUE) {

p = *str;

while ((*p != '\0') && (q = strchr(p, '<')) != NULL) {

lts++;

p = (q + 1);

}

* Count the right-angle-brackets, if needed. - FM

if (isTITLE == TRUE) {

p = *str;

while ((*p != '\0') && (q = strchr(p, '>')) != NULL) {

gts++;

p = (q + 1);

}

100

101

* Check whether we need to convert anything. - FM

102

103

if (amps == 0 && lts == 0 && gts == 0)

104

return;

105

106

107

* Allocate space and convert. - FM

108

109

q = typecallocn(char,

110

(strlen(*str) + (4 * amps) + (3 * lts) + (3 * gts) + 1));

111

if ((cp = q) == NULL)

112

outofmem(__FILE__, "LYEntify");

113

for (p = *str; *p; p++) {

114

#ifdef CJK_EX

115

if (HTCJK != NOCJK) {

116

switch(state) {

117

case S_text:

118

if (*p == '\033') {

119

state = S_esc;

120

*q++ = *p;

121

continue;

122

}

123

break;

124

125

case S_esc:

126

if (*p == '$') {

127

state = S_dollar;

128

*q++ = *p;

129

continue;

130

} else if (*p == '(') {

131

state = S_paren;

132

*q++ = *p;

133

continue;

134

} else {

135

state = S_text;

136

*q++ = *p;

137

continue;

138

}

139

140

case S_dollar:

141

if (*p == '@' || *p == 'B' || *p == 'A') {

142

state = S_nonascii_text;

143

*q++ = *p;

144

continue;

145

} else if (*p == '(') {

146

state = S_dollar_paren;

147

*q++ = *p;

148

continue;

149

} else {

150

state = S_text;

151

*q++ = *p;

152

continue;

153

}

154

155

case S_dollar_paren:

156

if (*p == 'C') {

157

state = S_nonascii_text;

158

*q++ = *p;

159

continue;

160

} else {

161

state = S_text;

162

*q++ = *p;

163

continue;

164

}

165

166

case S_paren:

167

if (*p == 'B' || *p == 'J' || *p =='T') {

168

state = S_text;

169

*q++ = *p;

170

continue;

171

} else if (*p == 'I') {

172

state = S_nonascii_text;

173

*q++ = *p;

174

continue;

175

}

176

/* FALLTHRU */

177

178

case S_nonascii_text:

179

if (*p == '\033')

180

state = S_esc;

181

*q++ = *p;

182

continue;

183

184

default:

185

break;

186

}

187

if (*(p+1) != '\0' &&

188

(IS_EUC(UCH(*p), UCH(*(p+1))) ||

189

IS_SJIS(UCH(*p), UCH(*(p+1)), in_sjis) ||

190

IS_BIG5(UCH(*p), UCH(*(p+1))))) {

191

*q++ = *p++;

192

*q++ = *p;

193

continue;

194

}

195

}

196

#endif

197

if (*p == '&') {

198

*q++ = '&';

199

*q++ = 'a';

200

*q++ = 'm';

201

*q++ = 'p';

202

*q++ = ';';

203

} else if (isTITLE && *p == '<') {

204

*q++ = '&';

205

*q++ = 'l';

206

*q++ = 't';

207

*q++ = ';';

208

} else if (isTITLE && *p == '>') {

209

*q++ = '&';

210

*q++ = 'g';

211

*q++ = 't';

212

*q++ = ';';

213

} else {

214

*q++ = *p;

215

}

216

}

217

*q = '\0';

218

FREE(*str);

219

*str = cp;

220

}

221

222

223

** This function trims characters <= that of a space (32),

224

** including HT_NON_BREAK_SPACE (1) and HT_EN_SPACE (2),

225

** but not ESC, from the heads of strings. - FM

226

227

PUBLIC void LYTrimHead ARGS1(

228

char *, str)

229

{

230

CONST char *s = str;

231

232

if (isEmpty(s))

233

return;

234

235

while (*s && WHITE(*s) && UCH(*s) != UCH(CH_ESC)) /* S/390 -- gil -- 1669 */

236

s++;

237

if (s > str) {

238

char *ns = str;

239

while (*s) {

240

*ns++ = *s++;

241

}

242

*ns = '\0';

243

}

244

}

245

246

247

** This function trims characters <= that of a space (32),

248

** including HT_NON_BREAK_SPACE (1), HT_EN_SPACE (2), and

249

** ESC from the tails of strings. - FM

250

251

PUBLIC void LYTrimTail ARGS1(

252

char *, str)

253

{

254

int i;

255

256

if (isEmpty(str))

257

return;

258

259

i = strlen(str) - 1;

260

while (i >= 0) {

261

if (WHITE(str[i]))

262

str[i] = '\0';

263

else

264

break;

265

i--;

266

}

267

}

268

269

270

** This function should receive a pointer to the start

271

** of a comment. It returns a pointer to the end ('>')

272

** character of comment, or it's best guess if the comment

273

** is invalid. - FM

274

275

PUBLIC char *LYFindEndOfComment ARGS1(

276

char *, str)

277

{

278

char *cp, *cp1;

279

enum comment_state { start1, start2, end1, end2 } state;

280

281

if (str == NULL)

282

283

* We got NULL, so return NULL. - FM

284

285

return NULL;

286

287

if (strncmp(str, "<!--", 4))

288

289

* We don't have the start of a comment, so

290

* return the beginning of the string. - FM

291

292

return str;

293

294

cp = (str + 4);

295

if (*cp =='>')

296

297

* It's an invalid comment, so

298

* return this end character. - FM

299

300

return cp;

301

302

if ((cp1 = strchr(cp, '>')) == NULL)

303

304

* We don't have an end character, so

305

* return the beginning of the string. - FM

306

307

return str;

308

309

if (*cp == '-')

310

311

* Ugh, it's a "decorative" series of dashes,

312

* so return the next end character. - FM

313

314

return cp1;

315

316

317

* OK, we're ready to start parsing. - FM

318

319

state = start2;

320

while (*cp != '\0') {

321

switch (state) {

322

case start1:

323

if (*cp == '-')

324

state = start2;

325

else

326

327

* Invalid comment, so return the first

328

* '>' from the start of the string. - FM

329

330

return cp1;

331

break;

332

333

case start2:

334

if (*cp == '-')

335

state = end1;

336

break;

337

338

case end1:

339

if (*cp == '-')

340

state = end2;

341

else

342

343

* Invalid comment, so return the first

344

* '>' from the start of the string. - FM

345

346

return cp1;

347

break;

348

349

case end2:

350

if (*cp == '>')

351

352

* Valid comment, so return the end character. - FM

353

354

return cp;

355

if (*cp == '-') {

356

state = start1;

357

} else if (!(WHITE(*cp) && UCH(*cp) != UCH(CH_ESC))) { /* S/390 -- gil -- 1686 */

358

359

* Invalid comment, so return the first

360

* '>' from the start of the string. - FM

361

362

return cp1;

363

}

364

break;

365

366

default:

367

break;

368

}

369

cp++;

370

}

371

372

373

* Invalid comment, so return the first

374

* '>' from the start of the string. - FM

375

376

return cp1;

377

}

378

379

380

** If an HREF, itself or if resolved against a base,

381

** represents a file URL, and the host is defaulted,

382

** force in "//localhost". We need this until

383

** all the other Lynx code which performs security

384

** checks based on the "localhost" string is changed

385

** to assume "//localhost" when a host field is not

386

** present in file URLs - FM

387

388

PUBLIC void LYFillLocalFileURL ARGS2(

389

char **, href,

390

CONST char *, base)

391

{

392

char * temp = NULL;

393

394

if (isEmpty(*href))

395

return;

396

397

if (!strcmp(*href, "//") || !strncmp(*href, "///", 3)) {

398

if (base != NULL && isFILE_URL(base)) {

399

StrAllocCopy(temp, STR_FILE_URL);

400

StrAllocCat(temp, *href);

401

StrAllocCopy(*href, temp);

402

}

403

}

404

if (isFILE_URL(*href)) {

405

if (*(*href+5) == '\0') {

406

StrAllocCat(*href, "//localhost");

407

} else if (!strcmp(*href, "file://")) {

408

StrAllocCat(*href, "localhost");

409

} else if (!strncmp(*href, "file:///", 8)) {

410

StrAllocCopy(temp, (*href+7));

411

LYLocalFileToURL (href, temp);

412

} else if (!strncmp(*href, "file:/", 6) && !LYIsHtmlSep(*(*href+6))) {

413

StrAllocCopy(temp, (*href+5));

414

LYLocalFileToURL (href, temp);

415

}

416

}

417

418

#if defined(USE_DOS_DRIVES)

419

if (LYIsDosDrive(*href)) {

420

421

* If it's a local DOS path beginning with drive letter,

422

* add file://localhost/ prefix and go ahead.

423

424

StrAllocCopy(temp, *href);

425

LYLocalFileToURL (href, temp);

426

}

427

428

/* use below: strlen("file://localhost/") = 17 */

429

if (!strncmp(*href, "file://localhost/", 17)

430

&& (strlen(*href) == 19)

431

&& LYIsDosDrive(*href+17)) {

432

433

* Terminate DOS drive letter with a slash to surf root successfully.

434

* Here seems a proper place to do so.

435

436

LYAddPathSep(href);

437

}

438

#endif /* USE_DOS_DRIVES */

439

440

441

* No path in a file://localhost URL means a

442

* directory listing for the current default. - FM

443

444

if (!strcmp(*href, "file://localhost")) {

445

CONST char *temp2;

446

#ifdef VMS

447

temp2 = HTVMS_wwwName(LYGetEnv("PATH"));

448

#else

449

char curdir[LY_MAXPATH];

450

temp2 = wwwName(Current_Dir(curdir));

451

#endif /* VMS */

452

if (!LYIsHtmlSep(*temp2))

453

LYAddHtmlSep(href);

454

455

* Check for pathological cases - current dir has chars which

456

* MUST BE URL-escaped - kw

457

458

if (strchr(temp2, '%') != NULL || strchr(temp2, '#') != NULL) {

459

FREE(temp);

460

temp = HTEscape(temp2, URL_PATH);

461

StrAllocCat(*href, temp);

462

} else {

463

StrAllocCat(*href, temp2);

464

}

465

}

466

467

#ifdef VMS

468

469

* On VMS, a file://localhost/ URL means

470

* a listing for the login directory. - FM

471

472

if (!strcmp(*href, "file://localhost/"))

473

StrAllocCat(*href, (HTVMS_wwwName(Home_Dir())+1));

474

#endif /* VMS */

475

476

FREE(temp);

477

return;

478

}

479

480

481

** This function writes a line with a META tag to an open file,

482

** which will specify a charset parameter to use when the file is

483

** read back in. It is meant for temporary HTML files used by the

484

** various special pages which may show titles of documents. When those

485

** files are created, the title strings normally have been translated and

486

** expanded to the display character set, so we have to make sure they

487

** don't get translated again.

488

** If the user has changed the display character set during the lifetime

489

** of the Lynx session (or, more exactly, during the time the title

490

** strings to be written were generated), they may now have different

491

** character encodings and there is currently no way to get it all right.

492

** To change this, we would have to add a variable for each string which

493

** keeps track of its character encoding.

494

** But at least we can try to ensure that reading the file after future

495

** display character set changes will give reasonable output.

496

497

** The META tag is not written if the display character set (passed as

498

** disp_chndl) already corresponds to the charset assumption that

499

** would be made when the file is read. - KW

500

501

** Currently this function is used for temporary files like "Lynx Info Page"

502

** and for one permanent - bookmarks (so it may be a problem if you change

503

** the display charset later: new bookmark entries may be mistranslated).

504

** - LP

505

506

PUBLIC void LYAddMETAcharsetToFD ARGS2(

507

FILE *, fd,

508

int, disp_chndl)

509

{

510

if (disp_chndl == -1)

511

512

* -1 means use current_char_set.

513

514

disp_chndl = current_char_set;

515

516

if (fd == NULL || disp_chndl < 0)

517

518

* Should not happen.

519

520

return;

521

522

if (UCLYhndl_HTFile_for_unspec == disp_chndl)

523

524

* Not need to do, so we don't.

525

526

return;

527

528

if (LYCharSet_UC[disp_chndl].enc == UCT_ENC_7BIT)

529

530

* There shouldn't be any 8-bit characters in this case.

531

532

return;

533

534

535

* In other cases we don't know because UCLYhndl_for_unspec may

536

* change during the lifetime of the file (by toggling raw mode

537

* or changing the display character set), so proceed.

538

539

fprintf(fd, "<META %s content=\"text/html;charset=%s\">\n",

540

"http-equiv=\"content-type\"",

541

LYCharSet_UC[disp_chndl].MIMEname);

542

}

543

544

545

** This function returns OL TYPE="A" strings in

546

** the range of " A." (1) to "ZZZ." (18278). - FM

547

548

PUBLIC char *LYUppercaseA_OL_String ARGS1(

549

int, seqnum)

550

{

551

static char OLstring[8];

552

553

if (seqnum <= 1 ) {

554

strcpy(OLstring, " A.");

555

return OLstring;

556

}

557

if (seqnum < 27) {

558

sprintf(OLstring, " %c.", (seqnum + 64));

559

return OLstring;

560

}

561

if (seqnum < 703) {

562

sprintf(OLstring, "%c%c.", ((seqnum-1)/26 + 64),

563

(seqnum - ((seqnum-1)/26)*26 + 64));

564

return OLstring;

565

}

566

if (seqnum < 18279) {

567

sprintf(OLstring, "%c%c%c.", ((seqnum-27)/676 + 64),

568

(((seqnum - ((seqnum-27)/676)*676)-1)/26 + 64),

569

(seqnum - ((seqnum-1)/26)*26 + 64));

570

return OLstring;

571

}

572

strcpy(OLstring, "ZZZ.");

573

return OLstring;

574

}

575

576

577

** This function returns OL TYPE="a" strings in

578

** the range of " a." (1) to "zzz." (18278). - FM

579

580

PUBLIC char *LYLowercaseA_OL_String ARGS1(

581

int, seqnum)

582

{

583

static char OLstring[8];

584

585

if (seqnum <= 1 ) {

586

strcpy(OLstring, " a.");

587

return OLstring;

588

}

589

if (seqnum < 27) {

590

sprintf(OLstring, " %c.", (seqnum + 96));

591

return OLstring;

592

}

593

if (seqnum < 703) {

594

sprintf(OLstring, "%c%c.", ((seqnum-1)/26 + 96),

595

(seqnum - ((seqnum-1)/26)*26 + 96));

596

return OLstring;

597

}

598

if (seqnum < 18279) {

599

sprintf(OLstring, "%c%c%c.", ((seqnum-27)/676 + 96),

600

(((seqnum - ((seqnum-27)/676)*676)-1)/26 + 96),

601

(seqnum - ((seqnum-1)/26)*26 + 96));

602

return OLstring;

603

}

604

strcpy(OLstring, "zzz.");

605

return OLstring;

606

}

607

608

609

** This function returns OL TYPE="I" strings in the

610

** range of " I." (1) to "MMM." (3000).- FM

611

** Maximum length: 16 -TD

612

613

PUBLIC char *LYUppercaseI_OL_String ARGS1(

614

int, seqnum)

615

{

616

static char OLstring[20];

617

int Arabic = seqnum;

618

619

if (Arabic >= 3000) {

620

strcpy(OLstring, "MMM.");

621

return OLstring;

622

}

623

624

switch(Arabic) {

625

case 1:

626

strcpy(OLstring, " I.");

627

return OLstring;

628

case 5:

629

strcpy(OLstring, " V.");

630

return OLstring;

631

case 10:

632

strcpy(OLstring, " X.");

633

return OLstring;

634

case 50:

635

strcpy(OLstring, " L.");

636

return OLstring;

637

case 100:

638

strcpy(OLstring, " C.");

639

return OLstring;

640

case 500:

641

strcpy(OLstring, " D.");

642

return OLstring;

643

case 1000:

644

strcpy(OLstring, " M.");

645

return OLstring;

646

default:

647

OLstring[0] = '\0';

648

break;

649

}

650

651

while (Arabic >= 1000) {

652

strcat(OLstring, "M");

653

Arabic -= 1000;

654

}

655

656

if (Arabic >= 900) {

657

strcat(OLstring, "CM");

658

Arabic -= 900;

659

}

660

661

if (Arabic >= 500) {

662

strcat(OLstring, "D");

663

Arabic -= 500;

664

while (Arabic >= 500) {

665

strcat(OLstring, "C");

666

Arabic -= 10;

667

}

668

}

669

670

if (Arabic >= 400) {

671

strcat(OLstring, "CD");

672

Arabic -= 400;

673

}

674

675

while (Arabic >= 100) {

676

strcat(OLstring, "C");

677

Arabic -= 100;

678

}

679

680

if (Arabic >= 90) {

681

strcat(OLstring, "XC");

682

Arabic -= 90;

683

}

684

685

if (Arabic >= 50) {

686

strcat(OLstring, "L");

687

Arabic -= 50;

688

while (Arabic >= 50) {

689

strcat(OLstring, "X");

690

Arabic -= 10;

691

}

692

}

693

694

if (Arabic >= 40) {

695

strcat(OLstring, "XL");

696

Arabic -= 40;

697

}

698

699

while (Arabic > 10) {

700

strcat(OLstring, "X");

701

Arabic -= 10;

702

}

703

704

switch (Arabic) {

705

case 1:

706

strcat(OLstring, "I.");

707

break;

708

case 2:

709

strcat(OLstring, "II.");

710

break;

711

case 3:

712

strcat(OLstring, "III.");

713

break;

714

case 4:

715

strcat(OLstring, "IV.");

716

break;

717

case 5:

718

strcat(OLstring, "V.");

719

break;

720

case 6:

721

strcat(OLstring, "VI.");

722

break;

723

case 7:

724

strcat(OLstring, "VII.");

725

break;

726

case 8:

727

strcat(OLstring, "VIII.");

728

break;

729

case 9:

730

strcat(OLstring, "IX.");

731

break;

732

case 10:

733

strcat(OLstring, "X.");

734

break;

735

default:

736

strcat(OLstring, ".");

737

break;

738

}

739

740

return OLstring;

741

}

742

743

744

** This function returns OL TYPE="i" strings in

745

** range of " i." (1) to "mmm." (3000).- FM

746

** Maximum length: 16 -TD

747

748

PUBLIC char *LYLowercaseI_OL_String ARGS1(

749

int, seqnum)

750

{

751

static char OLstring[20];

752

int Arabic = seqnum;

753

754

if (Arabic >= 3000) {

755

strcpy(OLstring, "mmm.");

756

return OLstring;

757

}

758

759

switch(Arabic) {

760

case 1:

761

strcpy(OLstring, " i.");

762

return OLstring;

763

case 5:

764

strcpy(OLstring, " v.");

765

return OLstring;

766

case 10:

767

strcpy(OLstring, " x.");

768

return OLstring;

769

case 50:

770

strcpy(OLstring, " l.");

771

return OLstring;

772

case 100:

773

strcpy(OLstring, " c.");

774

return OLstring;

775

case 500:

776

strcpy(OLstring, " d.");

777

return OLstring;

778

case 1000:

779

strcpy(OLstring, " m.");

780

return OLstring;

781

default:

782

OLstring[0] = '\0';

783

break;

784

}

785

786

while (Arabic >= 1000) {

787

strcat(OLstring, "m");

788

Arabic -= 1000;

789

}

790

791

if (Arabic >= 900) {

792

strcat(OLstring, "cm");

793

Arabic -= 900;

794

}

795

796

if (Arabic >= 500) {

797

strcat(OLstring, "d");

798

Arabic -= 500;

799

while (Arabic >= 500) {

800

strcat(OLstring, "c");

801

Arabic -= 10;

802

}

803

}

804

805

if (Arabic >= 400) {

806

strcat(OLstring, "cd");

807

Arabic -= 400;

808

}

809

810

while (Arabic >= 100) {

811

strcat(OLstring, "c");

812

Arabic -= 100;

813

}

814

815

if (Arabic >= 90) {

816

strcat(OLstring, "xc");

817

Arabic -= 90;

818

}

819

820

if (Arabic >= 50) {

821

strcat(OLstring, "l");

822

Arabic -= 50;

823

while (Arabic >= 50) {

824

strcat(OLstring, "x");

825

Arabic -= 10;

826

}

827

}

828

829

if (Arabic >= 40) {

830

strcat(OLstring, "xl");

831

Arabic -= 40;

832

}

833

834

while (Arabic > 10) {

835

strcat(OLstring, "x");

836

Arabic -= 10;

837

}

838

839

switch (Arabic) {

840

case 1:

841

strcat(OLstring, "i.");

842

break;

843

case 2:

844

strcat(OLstring, "ii.");

845

break;

846

case 3:

847

strcat(OLstring, "iii.");

848

break;

849

case 4:

850

strcat(OLstring, "iv.");

851

break;

852

case 5:

853

strcat(OLstring, "v.");

854

break;

855

case 6:

856

strcat(OLstring, "vi.");

857

break;

858

case 7:

859

strcat(OLstring, "vii.");

860

break;

861

case 8:

862

strcat(OLstring, "viii.");

863

break;

864

case 9:

865

strcat(OLstring, "ix.");

866

break;

867

case 10:

868

strcat(OLstring, "x.");

869

break;

870

default:

871

strcat(OLstring, ".");

872

break;

873

}

874

875

return OLstring;

876

}

877

878

879

** This function initializes the Ordered List counter. - FM

880

881

PUBLIC void LYZero_OL_Counter ARGS1(

882

HTStructured *, me)

883

{

884

int i;

885

886

if (!me)

887

return;

888

889

for (i = 0; i < 12; i++) {

890

me->OL_Counter[i] = OL_VOID;

891

me->OL_Type[i] = '1';

892

}

893

894

me->Last_OL_Count = 0;

895

me->Last_OL_Type = '1';

896

897

return;

898

}

899

900

901

** This function is used by the HTML Structured object. - KW

902

903

PUBLIC void LYGetChartransInfo ARGS1(

904

HTStructured *, me)

905

{

906

me->UCLYhndl = HTAnchor_getUCLYhndl(me->node_anchor,

907

UCT_STAGE_STRUCTURED);

908

if (me->UCLYhndl < 0) {

909

int chndl = HTAnchor_getUCLYhndl(me->node_anchor, UCT_STAGE_HTEXT);

910

911

if (chndl < 0) {

912

chndl = current_char_set;

913

HTAnchor_setUCInfoStage(me->node_anchor, chndl,

914

UCT_STAGE_HTEXT,

915

UCT_SETBY_STRUCTURED);

916

}

917

HTAnchor_setUCInfoStage(me->node_anchor, chndl,

918

UCT_STAGE_STRUCTURED,

919

UCT_SETBY_STRUCTURED);

920

me->UCLYhndl = HTAnchor_getUCLYhndl(me->node_anchor,

921

UCT_STAGE_STRUCTURED);

922

}

923

me->UCI = HTAnchor_getUCInfoStage(me->node_anchor,

924

UCT_STAGE_STRUCTURED);

925

}

926

927

928

* Given an UCS character code, will fill buffer passed in as q with

929

* the code's UTF-8 encoding.

930

* If terminate = YES, terminates string on success and returns pointer

931

* to beginning.

932

* If terminate = NO, does not terminate string, and returns pointer

933

* next char after the UTF-8 put into buffer.

934

* On failure, including invalid code or 7-bit code, returns NULL.

935

936

PRIVATE char * UCPutUtf8ToBuffer ARGS3(char *, q, UCode_t, code, BOOL, terminate)

937

{

938

char *q_in = q;

939

if (!q)

940

return NULL;

941

if (code > 127 && code < 0x7fffffffL) {

942

if (code < 0x800L) {

943

*q++ = (char)(0xc0 | (code>>6));

944

*q++ = (char)(0x80 | (0x3f & (code)));

945

} else if (code < 0x10000L) {

946

*q++ = (char)(0xe0 | (code>>12));

947

*q++ = (char)(0x80 | (0x3f & (code>>6)));

948

*q++ = (char)(0x80 | (0x3f & (code)));

949

} else if (code < 0x200000L) {

950

*q++ = (char)(0xf0 | (code>>18));

951

*q++ = (char)(0x80 | (0x3f & (code>>12)));

952

*q++ = (char)(0x80 | (0x3f & (code>>6)));

953

*q++ = (char)(0x80 | (0x3f & (code)));

954

} else if (code < 0x4000000L) {

955

*q++ = (char)(0xf8 | (code>>24));

956

*q++ = (char)(0x80 | (0x3f & (code>>18)));

957

*q++ = (char)(0x80 | (0x3f & (code>>12)));

958

*q++ = (char)(0x80 | (0x3f & (code>>6)));

959

*q++ = (char)(0x80 | (0x3f & (code)));

960

} else {

961

*q++ = (char)(0xfc | (code>>30));

962

*q++ = (char)(0x80 | (0x3f & (code>>24)));

963

*q++ = (char)(0x80 | (0x3f & (code>>18)));

964

*q++ = (char)(0x80 | (0x3f & (code>>12)));

965

*q++ = (char)(0x80 | (0x3f & (code>>6)));

966

*q++ = (char)(0x80 | (0x3f & (code)));

967

}

968

} else {

969

return NULL;

970

}

971

if (terminate) {

972

*q = '\0';

973

return q_in;

974

} else {

975

return q;

976

}

977

}

978

979

/* as in HTParse.c, saves some calls - kw */

980

PRIVATE CONST char *hex = "0123456789ABCDEF";

981

982

983

* Any raw 8-bit or multibyte characters already have been

984

* handled in relation to the display character set

985

* in SGML_character(), including named and numeric entities.

986

987

** This function used for translations HTML special fields inside tags

988

** (ALT=, VALUE=, etc.) from charset `cs_from' to charset `cs_to'.

989

** It also unescapes non-ASCII characters from URL (#fragments !)

990

** if st_URL is active.

991

992

** If `do_ent' is YES, it converts named entities

993

** and numeric character references (NCRs) to their `cs_to' replacements.

994

995

** Named entities converted to unicodes. NCRs (unicodes) converted

996

** by UCdomap.c chartrans functions.

997

** ???NCRs with values in the ISO-8859-1 range 160-255 may be converted

998

** to their HTML entity names (via old-style entities) and then translated

999

** according to the LYCharSets.c array for `cs_out'???.

1000

1001

** Some characters (see descriptions in `put_special_unicodes' from SGML.c)

1002

** translated in relation with the state of boolean variables

1003

** `use_lynx_specials', `plain_space' and `hidden'. It is not clear yet:

1004

1005

** If plain_space is TRUE, nbsp (160) will be treated as an ASCII

1006

** space (32). If hidden is TRUE, entities will be translated

1007

** (if `do_ent' is YES) but escape sequences will be passed unaltered.

1008

** If `hidden' is FALSE, some characters are converted to Lynx special

1009

** codes (see `put_special_unicodes') or ASCII space if `plain_space'

1010

** applies). @@ is `use_lynx_specials' needed, does it have any effect? @@

1011

** If `use_lynx_specials' is YES, translate byte values 160 and 173

1012

** meaning U+00A0 and U+00AD given as or converted from raw char input

1013

** are converted to HT_NON_BREAK_SPACE and LY_SOFT_HYPHEN, respectively

1014

** (unless input and output charset are both iso-8859-1, for compatibility

1015

** with previous usage in HTML.c) even if `hidden' or `plain_space' is set.

1016

1017

** If `Back' is YES, the reverse is done instead i.e., Lynx special codes

1018

** in the input are translated back to character values.

1019

1020

** If `Back' is YES, an attempt is made to use UCReverseTransChar() for

1021

** back translation which may be more efficient. (?)

1022

1023

** If `stype' is st_URL, non-ASCII characters are URL-encoded instead.

1024

** The sequence of bytes being URL-encoded is the raw input character if

1025

** we couldn't translate it from `cs_in' (CJK etc.); otherwise it is the

1026

** UTF-8 representation if either `cs_to' requires this or if the

1027

** character's Unicode value is > 255, otherwise it should be the iso-8859-1

1028

** representation.

1029

** No general URL-encoding occurs for displayable ASCII characters and

1030

** spaces and some C0 controls valid in HTML (LF, TAB), it is expected

1031

** that other functions will take care of that as appropriate.

1032

1033

** Escape characters (0x1B, '\033') are

1034

** - URL-encoded if `stype' is st_URL, otherwise

1035

** - dropped if `stype' is st_other, otherwise (i.e., st_HTML)

1036

** - passed if `hidden' is TRUE or HTCJK is set, otherwise

1037

** - dropped.

1038

1039

** (If `stype' is st_URL or st_other most of the parameters really predefined:

1040

** cs_from=cs_to, use_lynx_specials=plain_space=NO, and hidden=YES)

1041

1042

1043

** Returns pointer to the char** passed in

1044

** if string translated or translation unnecessary,

1045

** NULL otherwise

1046

** (in which case something probably went wrong.)

1047

1048

1049

** In general, this somehow ugly function (KW)

1050

** cover three functions from v.2.7.2 (FM):

1051

** extern void LYExpandString PARAMS((

1052

** HTStructured * me,

1053

** char ** str));

1054

** extern void LYUnEscapeEntities PARAMS((

1055

** HTStructured * me,

1056

** char ** str));

1057

** extern void LYUnEscapeToLatinOne PARAMS((

1058

** HTStructured * me,

1059

** char ** str,

1060

** BOOLEAN isURL));

1061

1062

1063

PUBLIC char ** LYUCFullyTranslateString ARGS9(

1064

char **, str,

1065

int, cs_from,

1066

int, cs_to,

1067

BOOLEAN, do_ent,

1068

BOOL, use_lynx_specials,

1069

BOOLEAN, plain_space,

1070

BOOLEAN, hidden,

1071

BOOL, Back,

1072

CharUtil_st, stype)

1073

{

1074

char * p;

1075

char *q, *qs;

1076

HTChunk *chunk = NULL;

1077

char * cp = 0;

1078

char cpe = 0;

1079

char *esc = NULL;

1080

char replace_buf [64];

1081

int uck;

1082

int lowest_8;

1083

UCode_t code = 0;

1084

long int lcode;

1085

BOOL output_utf8 = 0, repl_translated_C0 = 0;

1086

size_t len;

1087

CONST char * name = NULL;

1088

BOOLEAN no_bytetrans;

1089

UCTransParams T;

1090

BOOL from_is_utf8 = FALSE;

1091

char * puni;

1092

enum _state

1093

{ S_text, S_esc, S_dollar, S_paren, S_nonascii_text, S_dollar_paren,

1094

S_trans_byte, S_check_ent, S_ncr, S_check_uni, S_named, S_check_name,

1095

S_recover,

1096

S_got_oututf8, S_got_outstring, S_put_urlstring,

1097

S_got_outchar, S_put_urlchar, S_next_char, S_done} state = S_text;

1098

enum _parsing_what

1099

{ P_text, P_utf8, P_hex, P_decimal, P_named

1100

} what = P_text;

1101

#ifdef KANJI_CODE_OVERRIDE

1102

static unsigned char sjis_1st = '\0';

1103

#ifdef CONV_JISX0201KANA_JISX0208KANA

1104

unsigned char sjis_str[3];

1105

#endif

1106

#endif

1107

1108

1109

** Make sure we have a non-empty string. - FM

1110

1111

if (!str || isEmpty(*str))

1112

return str;

1113

1114

1115

* FIXME: something's wrong with the limit checks here (clearing the

1116

* buffer helps).

1117

1118

memset(replace_buf, 0, sizeof(replace_buf));

1119

1120

1121

** Don't do byte translation

1122

** if original AND target character sets

1123

** are both iso-8859-1 (and we are not called to back-translate),

1124

** or if we are in CJK mode.

1125

1126

if (HTCJK != NOCJK) {

1127

no_bytetrans = TRUE;

1128

} else if (cs_to <= 0 && cs_from == cs_to && (!Back || cs_to < 0)) {

1129

no_bytetrans = TRUE;

1130

} else {

1131

/* No need to translate or examine the string any further */

1132

no_bytetrans = (BOOL) (!use_lynx_specials && !Back &&

1133

UCNeedNotTranslate(cs_from, cs_to));

1134

}

1135

1136

** Save malloc/calloc overhead in simple case - kw

1137

1138

if (do_ent && hidden && (stype != st_URL) && (strchr(*str, '&') == NULL))

1139

do_ent = FALSE;

1140

1141

/* Can't do, caller should figure out what to do... */

1142

if (!UCCanTranslateFromTo(cs_from, cs_to)) {

1143

if (cs_to < 0)

1144

return NULL;

1145

if (!do_ent && no_bytetrans)

1146

return NULL;

1147

no_bytetrans = TRUE;

1148

} else if (cs_to < 0) {

1149

do_ent = FALSE;

1150

}

1151

1152

if (!do_ent && no_bytetrans)

1153

return str;

1154

p = *str;

1155

1156

if (!no_bytetrans) {

1157

UCTransParams_clear(&T);

1158

UCSetTransParams(&T, cs_from, &LYCharSet_UC[cs_from],

1159

cs_to, &LYCharSet_UC[cs_to]);

1160

from_is_utf8 = (BOOL) (LYCharSet_UC[cs_from].enc == UCT_ENC_UTF8);

1161

output_utf8 = T.output_utf8;

1162

repl_translated_C0 = T.repl_translated_C0;

1163

puni = p;

1164

} else if (do_ent) {

1165

output_utf8 = (BOOL) (LYCharSet_UC[cs_to].enc == UCT_ENC_UTF8 ||

1166

HText_hasUTF8OutputSet(HTMainText));

1167

repl_translated_C0 = (BOOL) (LYCharSet_UC[cs_to].enc == UCT_ENC_8BIT_C0);

1168

}

1169

1170

lowest_8 = LYlowest_eightbit[cs_to];

1171

1172

1173

** Create a buffer string seven times the length of the original,

1174

** so we have plenty of room for expansions. - FM

1175

1176

len = strlen(p) + 16;

1177

q = p;

1178

1179

qs = q;

1180

1181

/* Create the HTChunk only if we need it */

1182

#define CHUNK (chunk ? chunk : (chunk = HTChunkCreate2(128, len+1)))

1183

1184

#define REPLACE_STRING(s) \

1185

if (q != qs) HTChunkPutb(CHUNK, qs, q-qs); \

1186

HTChunkPuts(CHUNK, s); \

1187

qs = q = *str

1188

1189

#define REPLACE_CHAR(c) if (q > p) { \

1190

HTChunkPutb(CHUNK, qs, q-qs); \

1191

qs = q = *str; \

1192

*q++ = c; \

1193

} else \

1194

*q++ = c

1195

1196

1197

* Loop through string, making conversions as needed.

1198

1199

* The while() checks for a non-'\0' char only for the normal

1200

* text states since other states may temporarily modify p or *p

1201

* (which should be restored before S_done!) - kw

1202

1203

1204

while (*p || (state != S_text && state != S_nonascii_text)) {

1205

switch(state) {

1206

case S_text:

1207

code = UCH(*p);

1208

#ifdef KANJI_CODE_OVERRIDE

1209

if (HTCJK == JAPANESE && last_kcode == SJIS) {

1210

if (sjis_1st == '\0' && (IS_SJIS_HI1(code)||IS_SJIS_HI2(code))){

1211

sjis_1st = UCH(code);

1212

} else if (sjis_1st && IS_SJIS_LO(code)) {

1213

sjis_1st = '\0';

1214

} else {

1215

#ifdef CONV_JISX0201KANA_JISX0208KANA

1216

if (0xA1 <= code && code <= 0xDF) {

1217

sjis_str[2] = '\0';

1218

JISx0201TO0208_SJIS(UCH(code),

1219

sjis_str, sjis_str + 1);

1220

REPLACE_STRING(sjis_str);

1221

p++;

1222

continue;

1223

}

1224

#endif

1225

}

1226

}

1227

#endif

1228

if (*p == '\033') {

1229

if ((HTCJK != NOCJK && !hidden) || stype != st_HTML) {

1230

state = S_esc;

1231

if (stype == st_URL) {

1232

REPLACE_STRING("%1B");

1233

p++;

1234

continue;

1235

} else if (stype != st_HTML) {

1236

p++;

1237

continue;

1238

} else {

1239

*q++ = *p++;

1240

continue;

1241

}

1242

} else if (!hidden) {

1243

1244

** CJK handling not on, and not a hidden INPUT,

1245

** so block escape. - FM

1246

1247

state = S_next_char;

1248

} else {

1249

state = S_trans_byte;

1250

}

1251

} else {

1252

state = (do_ent ? S_check_ent : S_trans_byte);

1253

}

1254

break;

1255

1256

case S_esc:

1257

if (*p == '$') {

1258

state = S_dollar;

1259

*q++ = *p++;

1260

continue;

1261

} else if (*p == '(') {

1262

state = S_paren;

1263

*q++ = *p++;

1264

continue;

1265

} else {

1266

state = S_text;

1267

}

1268

break;

1269

1270

case S_dollar:

1271

if (*p == '@' || *p == 'B' || *p == 'A') {

1272

state = S_nonascii_text;

1273

*q++ = *p++;

1274

continue;

1275

} else if (*p == '(') {

1276

state = S_dollar_paren;

1277

*q++ = *p++;

1278

continue;

1279

} else {

1280

state = S_text;

1281

}

1282

break;

1283

1284

case S_dollar_paren:

1285

if (*p == 'C') {

1286

state = S_nonascii_text;

1287

*q++ = *p++;

1288

continue;

1289

} else {

1290

state = S_text;

1291

}

1292

break;

1293

1294

case S_paren:

1295

if (*p == 'B' || *p == 'J' || *p == 'T') {

1296

state = S_text;

1297

*q++ = *p++;

1298

continue;

1299

} else if (*p == 'I') {

1300

state = S_nonascii_text;

1301

*q++ = *p++;

1302

continue;

1303

} else {

1304

state = S_text;

1305

}

1306

break;

1307

1308

case S_nonascii_text:

1309

if (*p == '\033') {

1310

if ((HTCJK != NOCJK && !hidden) || stype != st_HTML) {

1311

state = S_esc;

1312

if (stype == st_URL) {

1313

REPLACE_STRING("%1B");

1314

p++;

1315

continue;

1316

} else if (stype != st_HTML) {

1317

p++;

1318

continue;

1319

}

1320

}

1321

}

1322

*q++ = *p++;

1323

continue;

1324

1325

case S_trans_byte:

1326

/* character translation goes here */

1327

1328

** Don't do anything if we have no string,

1329

** or if original AND target character sets

1330

** are both iso-8859-1,

1331

** or if we are in CJK mode.

1332

1333

if (*p == '\0' || no_bytetrans) {

1334

state = S_got_outchar;

1335

break;

1336

}

1337

1338

if (Back) {

1339

int rev_c;

1340

if ((*p) == HT_NON_BREAK_SPACE ||

1341

(*p) == HT_EN_SPACE) {

1342

if (plain_space) {

1343

code = *p = ' ';

1344

state = S_got_outchar;

1345

break;

1346

} else {

1347

code = 160;

1348

if (LYCharSet_UC[cs_to].enc == UCT_ENC_8859 ||

1349

(LYCharSet_UC[cs_to].like8859 & UCT_R_8859SPECL)) {

1350

state = S_got_outchar;

1351

break;

1352

} else if (!(LYCharSet_UC[cs_from].enc == UCT_ENC_8859

1353

||(LYCharSet_UC[cs_from].like8859 & UCT_R_8859SPECL))) {

1354

state = S_check_uni;

1355

break;

1356

} else {

1357

*(unsigned char *)p = UCH(160);

1358

}

1359

}

1360

} else if ((*p) == LY_SOFT_HYPHEN) {

1361

code = 173;

1362

if (LYCharSet_UC[cs_to].enc == UCT_ENC_8859 ||

1363

(LYCharSet_UC[cs_to].like8859 & UCT_R_8859SPECL)) {

1364

state = S_got_outchar;

1365

break;

1366

} else if (!(LYCharSet_UC[cs_from].enc == UCT_ENC_8859

1367

||(LYCharSet_UC[cs_from].like8859 & UCT_R_8859SPECL))) {

1368

state = S_check_uni;

1369

break;

1370

} else {

1371

*(unsigned char *)p = UCH(173);

1372

}

1373

} else if (code < 127 || T.transp) {

1374

state = S_got_outchar;

1375

break;

1376

}

1377

rev_c = UCReverseTransChar(*p, cs_to, cs_from);

1378

if (rev_c > 127) {

1379

*p = (char) rev_c;

1380

code = rev_c;

1381

state = S_got_outchar;

1382

break;

1383

}

1384

} else if (code < 127) {

1385

state = S_got_outchar;

1386

break;

1387

}

1388

1389

if (from_is_utf8) {

1390

if (((*p)&0xc0)==0xc0) {

1391

puni = p;

1392

code = UCGetUniFromUtf8String(&puni);

1393

if (code <= 0) {

1394

code = UCH(*p);

1395

} else {

1396

what = P_utf8;

1397

}

1398

}

1399

} else if (use_lynx_specials && !Back &&

1400

(code == 160 || code == 173) &&

1401

(LYCharSet_UC[cs_from].enc == UCT_ENC_8859 ||

1402

(LYCharSet_UC[cs_from].like8859 & UCT_R_8859SPECL))) {

1403

if (code == 160)

1404

code = *p = HT_NON_BREAK_SPACE;

1405

else if (code == 173)

1406

code = *p = LY_SOFT_HYPHEN;

1407

state = S_got_outchar;

1408

break;

1409

} else if (T.trans_to_uni) {

1410

code = UCTransToUni(*p, cs_from);

1411

if (code <= 0) {

1412

/* What else can we do? */

1413

code = UCH(*p);

1414

}

1415

} else if (!T.trans_from_uni) {

1416

state = S_got_outchar;

1417

break;

1418

}

1419

1420

** Substitute Lynx special character for

1421

** 160 (nbsp) if use_lynx_specials is set.

1422

1423

if (use_lynx_specials && !Back &&

1424

(code == 160 || code == 173)) {

1425

code = ((code==160 ? HT_NON_BREAK_SPACE : LY_SOFT_HYPHEN));

1426

state = S_got_outchar;

1427

break;

1428

}

1429

1430

state = S_check_uni;

1431

break;

1432

1433

case S_check_ent:

1434

if (*p == '&') {

1435

char * pp = p + 1;

1436

len = strlen(pp);

1437

1438

** Check for a numeric entity. - FM

1439

1440

if (*pp == '#' && len > 2 &&

1441

(*(pp+1) == 'x' || *(pp+1) == 'X') &&

1442

UCH(*(pp+2)) < 127 &&

1443

isxdigit(UCH(*(pp+2)))) {

1444

what = P_hex;

1445

state = S_ncr;

1446

} else if (*pp == '#' && len > 2 &&

1447

UCH(*(pp+1)) < 127 &&

1448

isdigit(UCH(*(pp+1)))) {

1449

what = P_decimal;

1450

state = S_ncr;

1451

} else if (UCH(*pp) < 127 &&

1452

isalpha(UCH(*pp))) {

1453

what = P_named;

1454

state = S_named;

1455

} else {

1456

state = S_trans_byte;

1457

}

1458

} else {

1459

state = S_trans_byte;

1460

}

1461

break;

1462

1463

case S_ncr:

1464

if (what == P_hex) {

1465

p += 3;

1466

} else { /* P_decimal */

1467

p += 2;

1468

}

1469

cp = p;

1470

while (*p && UCH(*p) < 127 &&

1471

(what == P_hex ? isxdigit(UCH(*p)) :

1472

isdigit(UCH(*p)))) {

1473

p++;

1474

}

1475

1476

** Save the terminator and isolate the digit(s). - FM

1477

1478

cpe = *p;

1479

if (*p)

1480

*p++ = '\0';

1481

1482

** Show the numeric entity if the value:

1483

** (1) Is greater than 255 and unhandled Unicode.

1484

** (2) Is less than 32, and not valid and we don't

1485

** have HTCJK set.

1486

** (3) Is 127 and we don't have HTPassHighCtrlRaw

1487

** or HTCJK set.

1488

** (4) Is 128 - 159 and we don't have HTPassHighCtrlNum set.

1489

1490

if ((((what == P_hex) ? sscanf(cp, "%lx", &lcode) :

1491

sscanf(cp, "%ld", &lcode)) != 1) ||

1492

lcode > 0x7fffffffL || lcode < 0) {

1493

state = S_recover;

1494

break;

1495

} else {

1496

code = lcode;

1497

if ((code == 1) ||

1498

(code > 127 && code < 156)) {

1499

1500

** Assume these are Microsoft code points, inflicted on

1501

** us by FrontPage. - FM

1502

1503

** MS FrontPage uses syntax like  in 128-159

1504

** range and doesn't follow Unicode standards for this

1505

** area. Windows-1252 codepoints are assumed here.

1506

1507

switch (code) {

1508

case 1:

1509

1510

** WHITE SMILING FACE

1511

1512

code = 0x263a;

1513

break;

1514

case 128:

1515

1516

** EURO currency sign

1517

1518

code = 0x20ac;

1519

break;

1520

case 130:

1521

1522

** SINGLE LOW-9 QUOTATION MARK (sbquo)

1523

1524

code = 0x201a;

1525

break;

1526

case 132:

1527

1528

** DOUBLE LOW-9 QUOTATION MARK (bdquo)

1529

1530

code = 0x201e;

1531

break;

1532

case 133:

1533

1534

** HORIZONTAL ELLIPSIS (hellip)

1535

1536

code = 0x2026;

1537

break;

1538

case 134:

1539

1540

** DAGGER (dagger)

1541

1542

code = 0x2020;

1543

break;

1544

case 135:

1545

1546

** DOUBLE DAGGER (Dagger)

1547

1548

code = 0x2021;

1549

break;

1550

case 137:

1551

1552

** PER MILLE SIGN (permil)

1553

1554

code = 0x2030;

1555

break;

1556

case 139:

1557

1558

** SINGLE LEFT-POINTING ANGLE QUOTATION MARK

1559

** (lsaquo)

1560

1561

code = 0x2039;

1562

break;

1563

case 145:

1564

1565

** LEFT SINGLE QUOTATION MARK (lsquo)

1566

1567

code = 0x2018;

1568

break;

1569

case 146:

1570

1571

** RIGHT SINGLE QUOTATION MARK (rsquo)

1572

1573

code = 0x2019;

1574

break;

1575

case 147:

1576

1577

** LEFT DOUBLE QUOTATION MARK (ldquo)

1578

1579

code = 0x201c;

1580

break;

1581

case 148:

1582

1583

** RIGHT DOUBLE QUOTATION MARK (rdquo)

1584

1585

code = 0x201d;

1586

break;

1587

case 149:

1588

1589

** BULLET (bull)

1590

1591

code = 0x2022;

1592

break;

1593

case 150:

1594

1595

** EN DASH (ndash)

1596

1597

code = 0x2013;

1598

break;

1599

case 151:

1600

1601

** EM DASH (mdash)

1602

1603

code = 0x2014;

1604

break;

1605

case 152:

1606

1607

** SMALL TILDE (tilde)

1608

1609

code = 0x02dc;

1610

break;

1611

case 153:

1612

1613

** TRADE MARK SIGN (trade)

1614

1615

code = 0x2122;

1616

break;

1617

case 155:

1618

1619

** SINGLE RIGHT-POINTING ANGLE QUOTATION MARK

1620

** (rsaquo)

1621

1622

code = 0x203a;

1623

break;

1624

default:

1625

1626

** Do not attempt a conversion

1627

** to valid Unicode values.

1628

1629

break;

1630

}

1631

}

1632

state = S_check_uni;

1633

}

1634

break;

1635

1636

case S_check_uni:

1637

1638

** Show the numeric entity if the value:

1639

** (2) Is less than 32, and not valid and we don't

1640

** have HTCJK set.

1641

** (3) Is 127 and we don't have HTPassHighCtrlRaw

1642

** or HTCJK set.

1643

** (4) Is 128 - 159 and we don't have HTPassHighCtrlNum set.

1644

1645

if ((code < 32 &&

1646

code != 9 && code != 10 && code != 13 &&

1647

HTCJK == NOCJK) ||

1648

(code == 127 &&

1649

!(HTPassHighCtrlRaw || HTCJK != NOCJK)) ||

1650

(code > 127 && code < 160 &&

1651

!HTPassHighCtrlNum)) {

1652

state = S_recover;

1653

break;

1654

}

1655

1656

** Convert the value as an unsigned char,

1657

** hex escaped if isURL is set and it's

1658

** 8-bit, and then recycle the terminator

1659

** if it is not a semicolon. - FM

1660

1661

if (code > 159 && stype == st_URL) {

1662

state = S_got_oututf8;

1663

break;

1664

}

1665

1666

** For 160 (nbsp), use that value if it's

1667

** a hidden INPUT, otherwise use an ASCII

1668

** space (32) if plain_space is TRUE,

1669

** otherwise use the Lynx special character. - FM

1670

1671

if (code == 160) {

1672

if (plain_space) {

1673

code = ' ';

1674

state = S_got_outchar;

1675

break;

1676

} else if (use_lynx_specials) {

1677

code = HT_NON_BREAK_SPACE;

1678

state = S_got_outchar;

1679

break;

1680

} else if ((hidden && !Back) ||

1681

(LYCharSet_UC[cs_to].codepoints & UCT_CP_SUPERSETOF_LAT1) ||

1682

LYCharSet_UC[cs_to].enc == UCT_ENC_8859 ||

1683

(LYCharSet_UC[cs_to].like8859 &

1684

UCT_R_8859SPECL)) {

1685

state = S_got_outchar;

1686

break;

1687

} else if (

1688

(LYCharSet_UC[cs_to].repertoire & UCT_REP_SUPERSETOF_LAT1)) {

1689

; /* nothing, may be translated later */

1690

} else {

1691

code = ' ';

1692

state = S_got_outchar;

1693

break;

1694

}

1695

}

1696

1697

** For 173 (shy), use that value if it's

1698

** a hidden INPUT, otherwise ignore it

1699

** if plain_space is TRUE, otherwise use

1700

** the Lynx special character. - FM

1701

1702

if (code == 173) {

1703

if (plain_space) {

1704

replace_buf[0] = '\0';

1705

state = S_got_outstring;

1706

break;

1707

} else if (Back &&

1708

!(LYCharSet_UC[cs_to].enc == UCT_ENC_8859 ||

1709

(LYCharSet_UC[cs_to].like8859 &

1710

UCT_R_8859SPECL))) {

1711

; /* nothing, may be translated later */

1712

} else if (hidden || Back) {

1713

state = S_got_outchar;

1714

break;

1715

} else if (use_lynx_specials) {

1716

code = LY_SOFT_HYPHEN;

1717

state = S_got_outchar;

1718

break;

1719

}

1720

}

1721

1722

** Seek a translation from the chartrans tables.

1723

1724

if ((uck = UCTransUniChar(code,

1725

cs_to)) >= 32 &&

1726

uck < 256 &&

1727

(uck < 127 || uck >= lowest_8)) {

1728

code = uck;

1729

state = S_got_outchar;

1730

break;

1731

} else if ((uck == -4 ||

1732

(repl_translated_C0 &&

1733

uck > 0 && uck < 32)) &&

1734

1735

** Not found; look for replacement string.

1736

1737

(uck = UCTransUniCharStr(replace_buf,

1738

60, code,

1739

cs_to,

1740

0) >= 0)) {

1741

state = S_got_outstring;

1742

break;

1743

}

1744

if (output_utf8 &&

1745

code > 127 && code < 0x7fffffffL) {

1746

state = S_got_oututf8;

1747

break;

1748

}

1749

1750

** For 8194 (ensp), 8195 (emsp), or 8201 (thinsp),

1751

** use the character reference if it's a hidden INPUT,

1752

** otherwise use an ASCII space (32) if plain_space is

1753

** TRUE, otherwise use the Lynx special character. - FM

1754

1755

if (code == 8194 || code == 8195 || code == 8201) {

1756

if (hidden) {

1757

state = S_recover;

1758

} else if (plain_space) {

1759

code = ' ';

1760

state = S_got_outchar;

1761

} else {

1762

code = HT_EN_SPACE;

1763

state = S_got_outchar;

1764

}

1765

break;

1766

1767

** Ignore 8204 (zwnj), 8205 (zwj)

1768

** 8206 (lrm), and 8207 (rlm),

1769

** for now, if we got this far without

1770

** finding a representation for them.

1771

1772

} else if (code == 8204 || code == 8205 ||

1773

code == 8206 || code == 8207) {

1774

CTRACE((tfp, "LYUCFullyTranslateString: Ignoring '%ld'.\n", code));

1775

replace_buf[0] = '\0';

1776

state = S_got_outstring;

1777

break;

1778

1779

** Show the numeric entity if the value:

1780

** (1) Is greater than 255 and unhandled Unicode.

1781

1782

} else if (code > 255) {

1783

1784

** Illegal or not yet handled value.

1785

** Return "&#" verbatim and continue

1786

** from there. - FM

1787

1788

state = S_recover;

1789

break;

1790

1791

** If it's ASCII, or is 8-bit but HTPassEightBitNum

1792

** is set or the character set is "ISO Latin 1",

1793

** use it's value. - FM

1794

1795

} else if (code < 161 ||

1796

(code < 256 &&

1797

(HTPassEightBitNum || cs_to == LATIN1))) {

1798

1799

** No conversion needed.

1800

1801

state = S_got_outchar;

1802

break;

1803

1804

/* The following disabled section doesn't make sense

1805

** any more. It used to make sense in the past, when

1806

** S_check_named would look in "old style" tables

1807

** in addition to what it does now.

1808

** Disabling of going to S_check_name here prevents

1809

** endless looping between S_check_uni and S_check_names

1810

** states, which could occur here for Latin 1 codes

1811

** for some cs_to if they had no translation in that

1812

** cs_to. Normally all cs_to *should* now have valid

1813

** translations via UCTransUniChar or UCTransUniCharStr

1814

** for all Latin 1 codes, so that we would not get here

1815

** anyway, and no loop could occur. Still, if we *do*

1816

** get here, FALL THROUGH to case S_recover now. - kw

1817

1818

#if 0

1819

1820

** If we get to here, convert and handle

1821

** the character as a named entity. - FM

1822

1823

} else {

1824

name = HTMLGetEntityName(code - 160);

1825

state = S_check_name;

1826

break;

1827

#endif

1828

}

1829

1830

case S_recover:

1831

if (what == P_decimal || what == P_hex) {

1832

1833

** Illegal or not yet handled value.

1834

** Return "&#" verbatim and continue

1835

** from there. - FM

1836

1837

*q++ = '&';

1838

*q++ = '#';

1839

if (what == P_hex)

1840

*q++ = 'x';

1841

if (cpe != '\0')

1842

*(p-1) = cpe;

1843

p = cp;

1844

state = S_done;

1845

} else if (what == P_named) {

1846

*cp = cpe;

1847

*q++ = '&';

1848

state = S_done;

1849

} else if (!T.output_utf8 && stype == st_HTML && !hidden &&

1850

!(HTPassEightBitRaw &&

1851

UCH(*p) >= lowest_8)) {

1852

sprintf(replace_buf, "U%.2lX", code);

1853

state = S_got_outstring;

1854

} else {

1855

puni = p;

1856

code = UCH(*p);

1857

state = S_got_outchar;

1858

}

1859

break;

1860

1861

case S_named:

1862

cp = ++p;

1863

while (*cp && UCH(*cp) < 127 &&

1864

isalnum(UCH(*cp)))

1865

cp++;

1866

cpe = *cp;

1867

*cp = '\0';

1868

name = p;

1869

state = S_check_name;

1870

break;

1871

1872

case S_check_name:

1873

1874

** Seek the Unicode value for the named entity.

1875

1876

** !!!! We manually recover the case of '=' terminator which

1877

** is commonly found on query to CGI-scripts

1878

** enclosed as href= URLs like "somepath/?x=1&yz=2"

1879

** Without this dirty fix, submission of such URLs was broken

1880

** if &yz string happened to be a recognized entity name. - LP

1881

1882

if ( ((code = HTMLGetEntityUCValue(name)) > 0) &&

1883

!((cpe == '=') && (stype == st_URL)) ) {

1884

state = S_check_uni;

1885

break;

1886

}

1887

1888

** Didn't find the entity.

1889

** Return verbatim.

1890

1891

state = S_recover;

1892

break;

1893

1894

/* * * O U T P U T S T A T E S * * */

1895

1896

case S_got_oututf8:

1897

if (code > 255 ||

1898

(code >= 128 && LYCharSet_UC[cs_to].enc == UCT_ENC_UTF8)) {

1899

UCPutUtf8ToBuffer(replace_buf, code, YES);

1900

state = S_got_outstring;

1901

} else {

1902

state = S_got_outchar;

1903

}

1904

break;

1905

case S_got_outstring:

1906

if (what == P_decimal || what == P_hex) {

1907

if (cpe != ';' && cpe != '\0')

1908

*(--p) = cpe;

1909

p--;

1910

} else if (what == P_named) {

1911

*cp = cpe;

1912

p = (*cp != ';') ? (cp - 1) : cp;

1913

} else if (what == P_utf8) {

1914

p = puni;

1915

}

1916

if (replace_buf[0] == '\0') {

1917

state = S_next_char;

1918

break;

1919

}

1920

if (stype == st_URL) {

1921

code = replace_buf[0]; /* assume string OK if first char is */

1922

if (code >= 127 ||

1923

(code < 32 && (code != 9 && code != 10 && code != 0))) {

1924

state = S_put_urlstring;

1925

break;

1926

}

1927

}

1928

REPLACE_STRING(replace_buf);

1929

state = S_next_char;

1930

break;

1931

case S_put_urlstring:

1932

esc = HTEscape(replace_buf, URL_XALPHAS);

1933

REPLACE_STRING(esc);

1934

FREE(esc);

1935

state = S_next_char;

1936

break;

1937

case S_got_outchar:

1938

if (what == P_decimal || what == P_hex) {

1939

if (cpe != ';' && cpe != '\0')

1940

*(--p) = cpe;

1941

p--;

1942

} else if (what == P_named) {

1943

*cp = cpe;

1944

p = (*cp != ';') ? (cp - 1) : cp;

1945

} else if (what == P_utf8) {

1946

p = puni;

1947

}

1948

if (stype == st_URL &&

1949

/* Not a full HTEscape, only for 8bit and ctrl chars */

1950

(TOASCII(code) >= 127 || /* S/390 -- gil -- 1925 */

1951

(code < ' ' && (code != '\t' && code != '\n')))) {

1952

state = S_put_urlchar;

1953

break;

1954

} else if (!hidden && code == 10 && *p == 10

1955

&& q != qs && *(q-1) == 13) {

1956

1957

** If this is not a hidden string, and the current char is

1958

** the LF ('\n') of a CRLF pair, drop the CR ('\r'). - KW

1959

1960

*(q-1) = *p++;

1961

state = S_done;

1962

break;

1963

}

1964

*q++ = (char)code;

1965

state = S_next_char;

1966

break;

1967

case S_put_urlchar:

1968

*q++ = '%';

1969

REPLACE_CHAR(hex[(TOASCII(code) >> 4) & 15]); /* S/390 -- gil -- 1944 */

1970

REPLACE_CHAR(hex[(TOASCII(code) & 15)]);

1971

/* fall through */

1972

case S_next_char:

1973

p++; /* fall through */

1974

case S_done:

1975

state = S_text;

1976

what = P_text;

1977

/* for next round */

1978

}

1979

}

1980

1981

*q = '\0';

1982

if (chunk) {

1983

HTChunkPutb(CHUNK, qs, q-qs + 1); /* also terminates */

1984

if (stype == st_URL || stype == st_other) {

1985

LYTrimHead(chunk->data);

1986

LYTrimTail(chunk->data);

1987

}

1988

StrAllocCopy(*str, chunk->data);

1989

HTChunkFree(chunk);

1990

} else {

1991

if (stype == st_URL || stype == st_other) {

1992

LYTrimHead(qs);

1993

LYTrimTail(qs);

1994

}

1995

}

1996

return str;

1997

}

1998

1999

#undef REPLACE_CHAR

2000

#undef REPLACE_STRING

2001

2002

PUBLIC BOOL LYUCTranslateHTMLString ARGS7(

2003

char **, str,

2004

int, cs_from,

2005

int, cs_to,

2006

BOOL, use_lynx_specials,

2007

BOOLEAN, plain_space,

2008

BOOLEAN, hidden,

2009

CharUtil_st, stype)

2010

{

2011

BOOL ret = YES;

2012

/* May reallocate *str even if cs_to == 0 */

2013

if (!LYUCFullyTranslateString(str, cs_from, cs_to, TRUE,

2014

use_lynx_specials, plain_space, hidden,

2015

NO, stype)) {

2016

ret = NO;

2017

}

2018

return ret;

2019

}

2020

2021

PUBLIC BOOL LYUCTranslateBackFormData ARGS4(

2022

char **, str,

2023

int, cs_from,

2024

int, cs_to,

2025

BOOLEAN, plain_space)

2026

{

2027

char ** ret;

2028

/* May reallocate *str */

2029

ret = (LYUCFullyTranslateString(str, cs_from, cs_to, FALSE,

2030

NO, plain_space, YES,

2031

YES, st_HTML));

2032

return (BOOL) (ret != NULL);

2033

}

2034

2035

2036

* Parse a parameter from an HTML META tag, i.e., the CONTENT.

2037

2038

PUBLIC char *LYParseTagParam ARGS2(

2039

char *, from,

2040

char *, name)

2041

{

2042

size_t len = strlen(name);

2043

char *result = NULL;

2044

char *string = from;

2045

2046

do {

2047

if ((string = strchr(string, ';')) == NULL)

2048

return NULL;

2049

while (*string != '\0' && (*string == ';' || isspace(UCH(*string)))) {

2050

string++;

2051

}

2052

if (strlen(string) < len) return NULL;

2053

} while (strncasecomp(string, name, len) != 0);

2054

string += len;

2055

while (*string != '\0' && (UCH(isspace(*string)) || *string == '=')) {

2056

string++;

2057

}

2058

2059

StrAllocCopy(result, string);

2060

len = 0;

2061

while (isprint(UCH(string[len])) && !isspace(UCH(string[len]))) {

2062

len++;

2063

}

2064

result[len] = '\0';

2065

2066

2067

* Strip single quotes, just in case.

2068

2069

if (len > 2 && result[0] == '\'' && result[len-1] == result[0]) {

2070

result[len-1] = '\0';

2071

for (string = result; (string[0] = string[1]) != '\0'; ++string)

2072

;

2073

}

2074

return result;

2075

}

2076

2077

2078

* Given a refresh-URL content string, parses the delay time and the URL

2079

* string. Ignore the remainder of the content.

2080

2081

PUBLIC void LYParseRefreshURL ARGS3(

2082

char *, content,

2083

char **, p_seconds,

2084

char **, p_address)

2085

{

2086

char *cp;

2087

char *cp1 = NULL;

2088

char *Seconds = NULL;

2089

2090

2091

* Look for the Seconds field. - FM

2092

2093

cp = LYSkipBlanks(content);

2094

if (*cp && isdigit(UCH(*cp))) {

2095

cp1 = cp;

2096

while (*cp1 && isdigit(UCH(*cp1)))

2097

cp1++;

2098

StrnAllocCopy(Seconds, cp, cp1 - cp);

2099

}

2100

*p_seconds = Seconds;

2101

*p_address = LYParseTagParam(content, "URL");

2102

2103

CTRACE((tfp, "LYParseRefreshURL\n\tcontent: %s\n\tseconds: %s\n\taddress: %s\n",

2104

content, NonNull(*p_seconds), NonNull(*p_address)));

2105

}

2106

2107

2108

** This function processes META tags in HTML streams. - FM

2109

2110

PUBLIC void LYHandleMETA ARGS4(

2111

HTStructured *, me,

2112

CONST BOOL*, present,

2113

CONST char **, value,

2114

char **, include GCC_UNUSED)

2115

{

2116

char *http_equiv = NULL, *name = NULL, *content = NULL;

2117

char *href = NULL, *id_string = NULL, *temp = NULL;

2118

char *cp, *cp0, *cp1 = NULL;

2119

int url_type = 0;

2120

2121

if (!me || !present)

2122

return;

2123

2124

2125

* Load the attributes for possible use by Lynx. - FM

2126

2127

if (present[HTML_META_HTTP_EQUIV] &&

2128

value[HTML_META_HTTP_EQUIV] && *value[HTML_META_HTTP_EQUIV]) {

2129

StrAllocCopy(http_equiv, value[HTML_META_HTTP_EQUIV]);

2130

convert_to_spaces(http_equiv, TRUE);

2131

LYUCTranslateHTMLString(&http_equiv, me->tag_charset, me->tag_charset,

2132

NO, NO, YES, st_other);

2133

if (*http_equiv == '\0') {

2134

FREE(http_equiv);

2135

}

2136

}

2137

if (present[HTML_META_NAME] &&

2138

value[HTML_META_NAME] && *value[HTML_META_NAME]) {

2139

StrAllocCopy(name, value[HTML_META_NAME]);

2140

convert_to_spaces(name, TRUE);

2141

LYUCTranslateHTMLString(&name, me->tag_charset, me->tag_charset,

2142

NO, NO, YES, st_other);

2143

if (*name == '\0') {

2144

FREE(name);

2145

}

2146

}

2147

if (present[HTML_META_CONTENT] &&

2148

value[HTML_META_CONTENT] && *value[HTML_META_CONTENT]) {

2149

2150

* Technically, we should be creating a comma-separated

2151

* list, but META tags come one at a time, and we'll

2152

* handle (or ignore) them as each is received. Also,

2153

* at this point, we only trim leading and trailing

2154

* blanks from the CONTENT value, without translating

2155

* any named entities or numeric character references,

2156

* because how we should do that depends on what type

2157

* of information it contains, and whether or not any

2158

* of it might be sent to the screen. - FM

2159

2160

StrAllocCopy(content, value[HTML_META_CONTENT]);

2161

convert_to_spaces(content, FALSE);

2162

LYTrimHead(content);

2163

LYTrimTail(content);

2164

if (*content == '\0') {

2165

FREE(content);

2166

}

2167

}

2168

CTRACE((tfp, "LYHandleMETA: HTTP-EQUIV=\"%s\" NAME=\"%s\" CONTENT=\"%s\"\n",

2169

(http_equiv ? http_equiv : "NULL"),

2170

(name ? name : "NULL"),

2171

(content ? content : "NULL")));

2172

2173

2174

* Make sure we have META name/value pairs to handle. - FM

2175

2176

if (!(http_equiv || name) || !content)

2177

goto free_META_copies;

2178

2179

2180

* Check for a no-cache Pragma

2181

* or Cache-Control directive. - FM

2182

2183

if (!strcasecomp(NonNull(http_equiv), "Pragma") ||

2184

!strcasecomp(NonNull(http_equiv), "Cache-Control")) {

2185

LYUCTranslateHTMLString(&content, me->tag_charset, me->tag_charset,

2186

NO, NO, YES, st_other);

2187

if (!strcasecomp(content, "no-cache")) {

2188

me->node_anchor->no_cache = TRUE;

2189

HText_setNoCache(me->text);

2190

}

2191

2192

2193

* If we didn't get a Cache-Control MIME header,

2194

* and the META has one, convert to lowercase,

2195

* store it in the anchor element, and if we

2196

* haven't yet set no_cache, check whether we

2197

* should. - FM

2198

2199

if ((!me->node_anchor->cache_control) &&

2200

!strcasecomp(NonNull(http_equiv), "Cache-Control")) {

2201

LYLowerCase(content);

2202

StrAllocCopy(me->node_anchor->cache_control, content);

2203

if (me->node_anchor->no_cache == FALSE) {

2204

cp0 = content;

2205

while ((cp = strstr(cp0, "no-cache")) != NULL) {

2206

cp += 8;

2207

while (*cp != '\0' && WHITE(*cp))

2208

cp++;

2209

if (*cp == '\0' || *cp == ';') {

2210

me->node_anchor->no_cache = TRUE;

2211

HText_setNoCache(me->text);

2212

break;

2213

}

2214

cp0 = cp;

2215

}

2216

if (me->node_anchor->no_cache == TRUE)

2217

goto free_META_copies;

2218

cp0 = content;

2219

while ((cp = strstr(cp0, "max-age")) != NULL) {

2220

cp += 7;

2221

while (*cp != '\0' && WHITE(*cp))

2222

cp++;

2223

if (*cp == '=') {

2224

cp++;

2225

while (*cp != '\0' && WHITE(*cp))

2226

cp++;

2227

if (isdigit(UCH(*cp))) {

2228

cp0 = cp;

2229

while (isdigit(UCH(*cp)))

2230

cp++;

2231

if (*cp0 == '0' && cp == (cp0 + 1)) {

2232

me->node_anchor->no_cache = TRUE;

2233

HText_setNoCache(me->text);

2234

break;

2235

}

2236

}

2237

}

2238

cp0 = cp;

2239

}

2240

}

2241

}

2242

2243

2244

* Check for an Expires directive. - FM

2245

2246

} else if (!strcasecomp(NonNull(http_equiv), "Expires")) {

2247

2248

* If we didn't get an Expires MIME header,

2249

* store it in the anchor element, and if we

2250

* haven't yet set no_cache, check whether we

2251

* should. Note that we don't accept a Date

2252

* header via META tags, because it's likely

2253

* to be untrustworthy, but do check for a

2254

* Date header from a server when making the

2255

* comparison. - FM

2256

2257

LYUCTranslateHTMLString(&content, me->tag_charset, me->tag_charset,

2258

NO, NO, YES, st_other);

2259

StrAllocCopy(me->node_anchor->expires, content);

2260

if (me->node_anchor->no_cache == FALSE) {

2261

if (!strcmp(content, "0")) {

2262

2263

* The value is zero, which we treat as

2264

* an absolute no-cache directive. - FM

2265

2266

me->node_anchor->no_cache = TRUE;

2267

HText_setNoCache(me->text);

2268

} else if (me->node_anchor->date != NULL) {

2269

2270

* We have a Date header, so check if

2271

* the value is less than or equal to

2272

* that. - FM

2273

2274

if (LYmktime(content, TRUE) <=

2275

LYmktime(me->node_anchor->date, TRUE)) {

2276

me->node_anchor->no_cache = TRUE;

2277

HText_setNoCache(me->text);

2278

}

2279

} else if (LYmktime(content, FALSE) == 0) {

2280

2281

* We don't have a Date header, and

2282

* the value is in past for us. - FM

2283

2284

me->node_anchor->no_cache = TRUE;

2285

HText_setNoCache(me->text);

2286

}

2287

}

2288

2289

2290

* Check for a text/html Content-Type with a

2291

* charset directive, if we didn't already set

2292

* the charset via a server's header. - AAC & FM

2293

2294

} else if (!(me->node_anchor->charset && *me->node_anchor->charset) &&

2295

!strcasecomp(NonNull(http_equiv), "Content-Type")) {

2296

LYUCcharset * p_in = NULL;

2297

LYUCcharset * p_out = NULL;

2298

LYUCTranslateHTMLString(&content, me->tag_charset, me->tag_charset,

2299

NO, NO, YES, st_other);

2300

LYLowerCase(content);

2301

2302

if ((cp1 = strstr(content, "charset")) != NULL) {

2303

BOOL chartrans_ok = NO;

2304

char *cp3 = NULL, *cp4;

2305

int chndl;

2306

2307

cp1 += 7;

2308

while (*cp1 == ' ' || *cp1 == '=' || *cp1 == '"')

2309

cp1++;

2310

2311

StrAllocCopy(cp3, cp1); /* copy to mutilate more */

2312

for (cp4 = cp3; (*cp4 != '\0' && *cp4 != '"' &&

2313

*cp4 != ';' && *cp4 != ':' &&

2314

!WHITE(*cp4)); cp4++) {

2315

; /* do nothing */

2316

}

2317

*cp4 = '\0';

2318

cp4 = cp3;

2319

chndl = UCGetLYhndl_byMIME(cp3);

2320

2321

#ifdef CAN_SWITCH_DISPLAY_CHARSET

2322

/* Allow a switch to a more suitable display charset */

2323

if (Switch_Display_Charset (chndl, SWITCH_DISPLAY_CHARSET_MAYBE)) {

2324

/* UCT_STAGE_STRUCTURED and UCT_STAGE_HTEXT

2325

should have the same setting for UCInfoStage. */

2326

int structured = HTAnchor_getUCInfoStage(me->node_anchor,

2327

UCT_STAGE_STRUCTURED);

2328

me->outUCLYhndl = current_char_set;

2329

HTAnchor_setUCInfoStage(me->node_anchor,

2330

current_char_set,

2331

UCT_STAGE_HTEXT,

2332

UCT_SETBY_MIME); /* highest priorty! */

2333

HTAnchor_setUCInfoStage(me->node_anchor,

2334

current_char_set,

2335

UCT_STAGE_STRUCTURED,

2336

UCT_SETBY_MIME); /* highest priorty! */

2337

me->outUCI = HTAnchor_getUCInfoStage(me->node_anchor,

2338

UCT_STAGE_HTEXT);

2339

/* The SGML stage will be reset in change_chartrans_handling */

2340

}

2341

#endif

2342

2343

if (UCCanTranslateFromTo(chndl, current_char_set)) {

2344

chartrans_ok = YES;

2345

StrAllocCopy(me->node_anchor->charset, cp4);

2346

HTAnchor_setUCInfoStage(me->node_anchor, chndl,

2347

UCT_STAGE_PARSER,

2348

UCT_SETBY_STRUCTURED);

2349

} else if (chndl < 0) {

2350

2351

* Got something but we don't recognize it.

2352

2353

chndl = UCLYhndl_for_unrec;

2354

if (chndl < 0) /* UCLYhndl_for_unrec not defined :-( */

2355

chndl = UCLYhndl_for_unspec; /* always >= 0 */

2356

if (UCCanTranslateFromTo(chndl, current_char_set)) {

2357

chartrans_ok = YES;

2358

HTAnchor_setUCInfoStage(me->node_anchor, chndl,

2359

UCT_STAGE_PARSER,

2360

UCT_SETBY_STRUCTURED);

2361

}

2362

}

2363

if (chartrans_ok) {

2364

p_in = HTAnchor_getUCInfoStage(me->node_anchor,

2365

UCT_STAGE_PARSER);

2366

p_out = HTAnchor_setUCInfoStage(me->node_anchor,

2367

current_char_set,

2368

UCT_STAGE_HTEXT,

2369

UCT_SETBY_DEFAULT);

2370

if (!p_out) {

2371

2372

* Try again.

2373

2374

p_out = HTAnchor_getUCInfoStage(me->node_anchor,

2375

UCT_STAGE_HTEXT);

2376

}

2377

if (!strcmp(p_in->MIMEname, "x-transparent")) {

2378

HTPassEightBitRaw = TRUE;

2379

HTAnchor_setUCInfoStage(me->node_anchor,

2380

HTAnchor_getUCLYhndl(me->node_anchor,

2381

UCT_STAGE_HTEXT),

2382

UCT_STAGE_PARSER,

2383

UCT_SETBY_DEFAULT);

2384

}

2385

if (!strcmp(p_out->MIMEname, "x-transparent")) {

2386

HTPassEightBitRaw = TRUE;

2387

HTAnchor_setUCInfoStage(me->node_anchor,

2388

HTAnchor_getUCLYhndl(me->node_anchor,

2389

UCT_STAGE_PARSER),

2390

UCT_STAGE_HTEXT,

2391

UCT_SETBY_DEFAULT);

2392

}

2393

if (p_in->enc != UCT_ENC_CJK) {

2394

HTCJK = NOCJK;

2395

if (!(p_in->codepoints &

2396

UCT_CP_SUBSETOF_LAT1) &&

2397

chndl == current_char_set) {

2398

HTPassEightBitRaw = TRUE;

2399

}

2400

} else if (p_out->enc == UCT_ENC_CJK) {

2401

Set_HTCJK(p_in->MIMEname, p_out->MIMEname);

2402

}

2403

LYGetChartransInfo(me);

2404

2405

** Update the chartrans info homologously to

2406

** a Content-Type MIME header with a charset

2407

** parameter. - FM

2408

2409

if (me->UCLYhndl != chndl) {

2410

HTAnchor_setUCInfoStage(me->node_anchor, chndl,

2411

UCT_STAGE_MIME,

2412

UCT_SETBY_STRUCTURED);

2413

HTAnchor_setUCInfoStage(me->node_anchor, chndl,

2414

UCT_STAGE_PARSER,

2415

UCT_SETBY_STRUCTURED);

2416

me->inUCLYhndl = HTAnchor_getUCLYhndl(me->node_anchor,

2417

UCT_STAGE_PARSER);

2418

me->inUCI = HTAnchor_getUCInfoStage(me->node_anchor,

2419

UCT_STAGE_PARSER);

2420

}

2421

UCSetTransParams(&me->T,

2422

me->inUCLYhndl, me->inUCI,

2423

me->outUCLYhndl, me->outUCI);

2424

} else {

2425

2426

* Cannot translate.

2427

* If according to some heuristic the given

2428

* charset and the current display character

2429

* both are likely to be like ISO-8859 in

2430

* structure, pretend we have some kind

2431

* of match.

2432

2433

BOOL given_is_8859

2434

= (BOOL) (!strncmp(cp4, "iso-8859-", 9) &&

2435

isdigit(UCH(cp4[9])));

2436

BOOL given_is_8859like

2437

= (BOOL) (given_is_8859 || !strncmp(cp4, "windows-", 8) ||

2438

!strncmp(cp4, "cp12", 4) ||

2439

!strncmp(cp4, "cp-12", 5));

2440

BOOL given_and_display_8859like

2441

= (BOOL) (given_is_8859like &&

2442

(strstr(LYchar_set_names[current_char_set],

2443

"ISO-8859") ||

2444

strstr(LYchar_set_names[current_char_set],

2445

"windows-")));

2446

2447

if (given_is_8859) {

2448

cp1 = &cp4[10];

2449

while (*cp1 &&

2450

isdigit(UCH((*cp1))))

2451

cp1++;

2452

*cp1 = '\0';

2453

}

2454

if (given_and_display_8859like) {

2455

StrAllocCopy(me->node_anchor->charset, cp4);

2456

HTPassEightBitRaw = TRUE;

2457

}

2458

HTAlert(*cp4 ? cp4 : me->node_anchor->charset);

2459

2460

}

2461

FREE(cp3);

2462

2463

if (me->node_anchor->charset) {

2464

CTRACE((tfp,

2465

"LYHandleMETA: New charset: %s\n",

2466

me->node_anchor->charset));

2467

}

2468

}

2469

2470

* Set the kcode element based on the charset. - FM

2471

2472

HText_setKcode(me->text, me->node_anchor->charset, p_in);

2473

2474

2475

* Check for a Refresh directive. - FM

2476

2477

} else if (!strcasecomp(NonNull(http_equiv), "Refresh")) {

2478

char *Seconds = NULL;

2479

2480

LYParseRefreshURL(content, &Seconds, &href);

2481

2482

if (Seconds) {

2483

if (href) {

2484

2485

* We found a URL field, so check it out. - FM

2486

2487

if (!(url_type = LYLegitimizeHREF(me, &href, TRUE, FALSE))) {

2488

2489

* The specs require a complete URL,

2490

* but this is a Netscapism, so don't

2491

* expect the author to know that. - FM

2492

2493

HTUserMsg(REFRESH_URL_NOT_ABSOLUTE);

2494

2495

* Use the document's address

2496

* as the base. - FM

2497

2498

if (*href != '\0') {

2499

temp = HTParse(href,

2500

me->node_anchor->address, PARSE_ALL);

2501

StrAllocCopy(href, temp);

2502

FREE(temp);

2503

} else {

2504

StrAllocCopy(href, me->node_anchor->address);

2505

HText_setNoCache(me->text);

2506

}

2507

2508

} else {

2509

2510

* Check whether to fill in localhost. - FM

2511

2512

LYFillLocalFileURL(&href,

2513

(me->inBASE ?

2514

me->base_href : me->node_anchor->address));

2515

}

2516

2517

2518

* Set the no_cache flag if the Refresh URL

2519

* is the same as the document's address. - FM

2520

2521

if (!strcmp(href, me->node_anchor->address)) {

2522

HText_setNoCache(me->text);

2523

}

2524

} else {

2525

2526

* We didn't find a URL field, so use

2527

* the document's own address and set

2528

* the no_cache flag. - FM

2529

2530

StrAllocCopy(href, me->node_anchor->address);

2531

HText_setNoCache(me->text);

2532

}

2533

2534

* Check for an anchor in http or https URLs. - FM

2535

2536

cp = NULL;

2537

#ifndef DONT_TRACK_INTERNAL_LINKS

2538

/* id_string seems to be used wrong below if given.

2539

not that it matters much. avoid setting it here. - kw */

2540

if ((strncmp(href, "http", 4) == 0) &&

2541

(cp = strchr(href, '#')) != NULL) {

2542

StrAllocCopy(id_string, cp);

2543

*cp = '\0';

2544

}

2545

#endif

2546

if (me->inA) {

2547

2548

* Ugh! The META tag, which is a HEAD element,

2549

* is in an Anchor, which is BODY element. All

2550

* we can do is close the Anchor and cross our

2551

* fingers. - FM

2552

2553

if (me->inBoldA == TRUE && me->inBoldH == FALSE)

2554

HText_appendCharacter(me->text, LY_BOLD_END_CHAR);

2555

me->inBoldA = FALSE;

2556

HText_endAnchor(me->text, me->CurrentANum);

2557

me->inA = FALSE;

2558

me->CurrentANum = 0;

2559

}

2560

me->CurrentA = HTAnchor_findChildAndLink(

2561

me->node_anchor, /* Parent */

2562

id_string, /* Tag */

2563

href, /* Addresss */

2564

(void *)0); /* Type */

2565

if (id_string)

2566

*cp = '#';

2567

FREE(id_string);

2568

LYEnsureSingleSpace(me);

2569

if (me->inUnderline == FALSE)

2570

HText_appendCharacter(me->text, LY_UNDERLINE_START_CHAR);

2571

HTML_put_string(me, "REFRESH(");

2572

HTML_put_string(me, Seconds);

2573

HTML_put_string(me, " sec):");

2574

FREE(Seconds);

2575

if (me->inUnderline == FALSE)

2576

HText_appendCharacter(me->text, LY_UNDERLINE_END_CHAR);

2577

HTML_put_character(me, ' ');

2578

me->in_word = NO;

2579

HText_beginAnchor(me->text, me->inUnderline, me->CurrentA);

2580

if (me->inBoldH == FALSE)

2581

HText_appendCharacter(me->text, LY_BOLD_START_CHAR);

2582

HTML_put_string(me, href);

2583

FREE(href);

2584

if (me->inBoldH == FALSE)

2585

HText_appendCharacter(me->text, LY_BOLD_END_CHAR);

2586

HText_endAnchor(me->text, 0);

2587

LYEnsureSingleSpace(me);

2588

}

2589

2590

2591

* Check for a suggested filename via a Content-Disposition with

2592

* a filename=name.suffix in it, if we don't already have it

2593

* via a server header. - FM

2594

2595

} else if (!(me->node_anchor->SugFname && *me->node_anchor->SugFname) &&

2596

!strcasecomp((http_equiv ?

2597

http_equiv : ""), "Content-Disposition")) {

2598

cp = content;

2599

while (*cp != '\0' && strncasecomp(cp, "filename", 8))

2600

cp++;

2601

if (*cp != '\0') {

2602

cp += 8;

2603

while ((*cp != '\0') && (WHITE(*cp) || *cp == '='))

2604

cp++;

2605

while (*cp != '\0' && WHITE(*cp))

2606

cp++;

2607

if (*cp != '\0') {

2608

StrAllocCopy(me->node_anchor->SugFname, cp);

2609

if (*me->node_anchor->SugFname == '\"') {

2610

if ((cp = strchr((me->node_anchor->SugFname + 1),

2611

'\"')) != NULL) {

2612

*(cp + 1) = '\0';

2613

HTMIME_TrimDoubleQuotes(me->node_anchor->SugFname);

2614

} else {

2615

FREE(me->node_anchor->SugFname);

2616

}

2617

if (me->node_anchor->SugFname != NULL &&

2618

*me->node_anchor->SugFname == '\0') {

2619

FREE(me->node_anchor->SugFname);

2620

}

2621

}

2622

if ((cp = me->node_anchor->SugFname) != NULL) {

2623

while (*cp != '\0' && !WHITE(*cp))

2624

cp++;

2625

*cp = '\0';

2626

if (*me->node_anchor->SugFname == '\0')

2627

FREE(me->node_anchor->SugFname);

2628

}

2629

}

2630

}

2631

2632

* Check for a Set-Cookie directive. - AK

2633

2634

} else if (!strcasecomp(NonNull(http_equiv), "Set-Cookie")) {

2635

2636

* This will need to be updated when Set-Cookie/Set-Cookie2

2637

* handling is finalized. For now, we'll still assume

2638

* "historical" cookies in META directives. - FM

2639

2640

url_type = is_url(me->inBASE ?

2641

me->base_href : me->node_anchor->address);

2642

if (url_type == HTTP_URL_TYPE || url_type == HTTPS_URL_TYPE) {

2643

LYSetCookie(content,

2644

NULL,

2645

(me->inBASE ?

2646

me->base_href : me->node_anchor->address));

2647

}

2648

}

2649

2650

2651

* Free the copies. - FM

2652

2653

free_META_copies:

2654

FREE(http_equiv);

2655

FREE(name);

2656

FREE(content);

2657

}

2658

2659

2660

** This function handles P elements in HTML streams.

2661

** If start is TRUE it handles a start tag, and if

2662

** FALSE, an end tag. We presently handle start

2663

** and end tags identically, but this can lead to

2664

** a different number of blank lines between the

2665

** current paragraph and subsequent text when a P

2666

** end tag is present or not in the markup. - FM

2667

2668

PUBLIC void LYHandlePlike ARGS6(

2669

HTStructured *, me,

2670

CONST BOOL*, present,

2671

CONST char **, value,

2672

char **, include GCC_UNUSED,

2673

int, align_idx,

2674

BOOL, start)

2675

{

2676

if (TRUE) {

2677

2678

* FIG content should be a true block, which like P inherits

2679

* the current style. APPLET is like character elements or

2680

* an ALT attribute, unless it content contains a block element.

2681

* If we encounter a P in either's content, we set flags to treat

2682

* the content as a block. - FM

2683

2684

if (start) {

2685

if (me->inFIG)

2686

me->inFIGwithP = TRUE;

2687

2688

if (me->inAPPLET)

2689

me->inAPPLETwithP = TRUE;

2690

}

2691

2692

UPDATE_STYLE;

2693

if (me->List_Nesting_Level >= 0) {

2694

2695

* We're in a list. Treat P as an instruction to

2696

* create one blank line, if not already present,

2697

* then fall through to handle attributes, with

2698

* the "second line" margins. - FM

2699

2700

if (me->inP) {

2701

if (me->inFIG || me->inAPPLET ||

2702

me->inCAPTION || me->inCREDIT ||

2703

me->sp->style->spaceAfter > 0 ||

2704

(start && me->sp->style->spaceBefore > 0)) {

2705

LYEnsureDoubleSpace(me);

2706

} else {

2707

LYEnsureSingleSpace(me);

2708

}

2709

}

2710

} else if (me->sp[0].tag_number == HTML_ADDRESS) {

2711

2712

* We're in an ADDRESS. Treat P as an instruction

2713

* to start a newline, if needed, then fall through

2714

* to handle attributes. - FM

2715

2716

if (!HText_LastLineEmpty(me->text, FALSE)) {

2717

HText_setLastChar(me->text, ' '); /* absorb white space */

2718

HText_appendCharacter(me->text, '\r');

2719

}

2720

} else {

2721

if (start) {

2722

if (!(me->inLABEL && !me->inP)) {

2723

HText_appendParagraph(me->text);

2724

}

2725

} else if (me->sp->style->spaceAfter > 0) {

2726

LYEnsureDoubleSpace(me);

2727

} else {

2728

LYEnsureSingleSpace(me);

2729

}

2730

me->inLABEL = FALSE;

2731

}

2732

me->in_word = NO;

2733

2734

if (LYoverride_default_alignment(me)) {

2735

me->sp->style->alignment = LYstyles(me->sp[0].tag_number)->alignment;

2736

} else if ((me->List_Nesting_Level >= 0 &&

2737

(me->sp->style->id == ST_DivCenter ||

2738

me->sp->style->id == ST_DivLeft ||

2739

me->sp->style->id == ST_DivRight)) ||

2740

((me->Division_Level < 0) &&

2741

(me->sp->style->id == ST_Normal ||

2742

me->sp->style->id == ST_Preformatted))) {

2743

me->sp->style->alignment = HT_LEFT;

2744

} else {

2745

me->sp->style->alignment = (short) me->current_default_alignment;

2746

}

2747

2748

if (start) {

2749

if (present && present[align_idx] && value[align_idx]) {

2750

if (!strcasecomp(value[align_idx], "center") &&

2751

!(me->List_Nesting_Level >= 0 && !me->inP))

2752

me->sp->style->alignment = HT_CENTER;

2753

else if (!strcasecomp(value[align_idx], "right") &&

2754

!(me->List_Nesting_Level >= 0 && !me->inP))

2755

me->sp->style->alignment = HT_RIGHT;

2756

else if (!strcasecomp(value[align_idx], "left") ||

2757

!strcasecomp(value[align_idx], "justify"))

2758

me->sp->style->alignment = HT_LEFT;

2759

}

2760

2761

}

2762

2763

2764

* Mark that we are starting a new paragraph

2765

* and don't have any of it's text yet. - FM

2766

2767

2768

me->inP = FALSE;

2769

}

2770

2771

return;

2772

}

2773

2774

2775

** This function handles SELECT elements in HTML streams.

2776

** If start is TRUE it handles a start tag, and if FALSE,

2777

** an end tag. - FM

2778

2779

PUBLIC void LYHandleSELECT ARGS5(

2780

HTStructured *, me,

2781

CONST BOOL*, present,

2782

CONST char **, value,

2783

char **, include GCC_UNUSED,

2784

BOOL, start)

2785

{

2786

int i;

2787

2788

if (start == TRUE) {

2789

char *name = NULL;

2790

BOOLEAN multiple = NO;

2791

char *size = NULL;

2792

2793

2794

* Initialize the disable attribute.

2795

2796

me->select_disabled = FALSE;

2797

2798

2799

* Make sure we're in a form.

2800

2801

if (!me->inFORM) {

2802

if (LYBadHTML(me))

2803

CTRACE((tfp,

2804

"Bad HTML: SELECT start tag not within FORM tag\n"));

2805

2806

2807

* We should have covered all crash possibilities with the

2808

* current TagSoup parser, so we'll allow it because some

2809

* people with other browsers use SELECT for "information"

2810

* popups, outside of FORM blocks, though no Lynx user

2811

* would do anything that awful, right? - FM

2812

*//***

2813

return;

2814

***/

2815

}

2816

2817

2818

* Check for unclosed TEXTAREA.

2819

2820

if (me->inTEXTAREA) {

2821

if (LYBadHTML(me))

2822

CTRACE((tfp, "Bad HTML: Missing TEXTAREA end tag\n"));

2823

}

2824

2825

2826

* Set to know we are in a select tag.

2827

2828

me->inSELECT = TRUE;

2829

2830

if (!(present && present[HTML_SELECT_NAME] &&

2831

value[HTML_SELECT_NAME] && *value[HTML_SELECT_NAME])) {

2832

StrAllocCopy(name, "");

2833

} else if (strchr(value[HTML_SELECT_NAME], '&') == NULL) {

2834

StrAllocCopy(name, value[HTML_SELECT_NAME]);

2835

} else {

2836

StrAllocCopy(name, value[HTML_SELECT_NAME]);

2837

UNESCAPE_FIELDNAME_TO_STD(&name);

2838

}

2839

if (present && present[HTML_SELECT_MULTIPLE])

2840

multiple=YES;

2841

if (present && present[HTML_SELECT_DISABLED])

2842

me->select_disabled = TRUE;

2843

if (present && present[HTML_SELECT_SIZE] &&

2844

value[HTML_SELECT_SIZE] && *value[HTML_SELECT_SIZE]) {

2845

2846

* Let the size be determined by the number of OPTIONs. - FM

2847

2848

CTRACE((tfp, "LYHandleSELECT: Ignoring SIZE=\"%s\" for SELECT.\n",

2849

value[HTML_SELECT_SIZE]));

2850

}

2851

2852

if (me->inBoldH == TRUE &&

2853

(multiple == NO || LYSelectPopups == FALSE)) {

2854

HText_appendCharacter(me->text, LY_BOLD_END_CHAR);

2855

me->inBoldH = FALSE;

2856

me->needBoldH = TRUE;

2857

}

2858

if (me->inUnderline == TRUE &&

2859

(multiple == NO || LYSelectPopups == FALSE)) {

2860

HText_appendCharacter(me->text, LY_UNDERLINE_END_CHAR);

2861

me->inUnderline = FALSE;

2862

}

2863

2864

if ((multiple == NO && LYSelectPopups == TRUE) &&

2865

(me->sp[0].tag_number == HTML_PRE || me->inPRE == TRUE ||

2866

!me->sp->style->freeFormat) &&

2867

HText_LastLineSize(me->text, FALSE) > (LYcols - 8)) {

2868

2869

* Force a newline when we're using a popup in

2870

* a PRE block and are within 7 columns from the

2871

* right margin. This will allow for the '['

2872

* popup designator and help avoid a wrap in the

2873

* underscore placeholder for the retracted popup

2874

* entry in the HText structure. - FM

2875

2876

HTML_put_character(me, '\n');

2877

me->in_word = NO;

2878

}

2879

2880

LYCheckForID(me, present, value, (int)HTML_SELECT_ID);

2881

2882

HText_beginSelect(name, ATTR_CS_IN, multiple, size);

2883

FREE(name);

2884

FREE(size);

2885

2886

me->first_option = TRUE;

2887

} else {

2888

2889

* Handle end tag.

2890

2891

char *ptr;

2892

2893

2894

* Make sure we had a select start tag.

2895

2896

if (!me->inSELECT) {

2897

if (LYBadHTML(me))

2898

CTRACE((tfp, "Bad HTML: Unmatched SELECT end tag\n"));

2899

return;

2900

}

2901

2902

2903

* Set to know that we are no longer in a select tag.

2904

2905

me->inSELECT = FALSE;

2906

2907

2908

* Clear the disable attribute.

2909

2910

me->select_disabled = FALSE;

2911

2912

2913

* Finish the data off.

2914

2915

HTChunkTerminate(&me->option);

2916

2917

* Finish the previous option.

2918

2919

ptr = HText_setLastOptionValue(me->text,

2920

me->option.data,

2921

me->LastOptionValue,

2922

LAST_ORDER,

2923

me->LastOptionChecked,

2924

me->UCLYhndl,

2925

ATTR_CS_IN);

2926

FREE(me->LastOptionValue);

2927

2928

me->LastOptionChecked = FALSE;

2929

2930

if (HTCurSelectGroupType == F_CHECKBOX_TYPE ||

2931

LYSelectPopups == FALSE) {

2932

2933

* Start a newline after the last checkbox/button option.

2934

2935

LYEnsureSingleSpace(me);

2936

} else {

2937

2938

* Output popup box with the default option to screen,

2939

* but use non-breaking spaces for output.

2940

2941

if (ptr &&

2942

me->sp[0].tag_number == HTML_PRE && strlen(ptr) > 6) {

2943

2944

* The code inadequately handles OPTION fields in PRE tags.

2945

* We'll put up a minimum of 6 characters, and if any

2946

* more would exceed the wrap column, we'll ignore them.

2947

2948

for (i = 0; i < 6; i++) {

2949

if (*ptr == ' ')

2950

HText_appendCharacter(me->text, HT_NON_BREAK_SPACE);

2951

else

2952

HText_appendCharacter(me->text, *ptr);

2953

ptr++;

2954

}

2955

HText_setIgnoreExcess(me->text, TRUE);

2956

}

2957

for (; ptr && *ptr != '\0'; ptr++) {

2958

if (*ptr == ' ')

2959

HText_appendCharacter(me->text, HT_NON_BREAK_SPACE);

2960

else

2961

HText_appendCharacter(me->text, *ptr);

2962

}

2963

2964

* Add end option character.

2965

2966

if (!me->first_option) {

2967

HText_appendCharacter(me->text, ']');

2968

HText_setLastChar(me->text, ']');

2969

me->in_word = YES;

2970

}

2971

HText_setIgnoreExcess(me->text, FALSE);

2972

}

2973

HTChunkClear(&me->option);

2974

2975

if (me->Underline_Level > 0 && me->inUnderline == FALSE) {

2976

HText_appendCharacter(me->text, LY_UNDERLINE_START_CHAR);

2977

me->inUnderline = TRUE;

2978

}

2979

if (me->needBoldH == TRUE && me->inBoldH == FALSE) {

2980

HText_appendCharacter(me->text, LY_BOLD_START_CHAR);

2981

me->inBoldH = TRUE;

2982

me->needBoldH = FALSE;

2983

}

2984

}

2985

}

2986

2987

2988

** This function strips white characters and

2989

** generally fixes up attribute values that

2990

** were received from the SGML parser and

2991

** are to be treated as partial or absolute

2992

** URLs. - FM

2993

2994

PUBLIC int LYLegitimizeHREF ARGS4(

2995

HTStructured *, me,

2996

char **, href,

2997

BOOL, force_slash,

2998

BOOL, strip_dots)

2999

{

3000

int url_type = 0;

3001

char *p = NULL;

3002

char *pound = NULL;

3003

CONST char *Base = NULL;

3004

3005

if (!me || !href || isEmpty(*href))

3006

return(url_type);

3007

3008

if (!LYTrimStartfile(*href)) {

3009

3010

* Collapse spaces in the actual URL, but just

3011

* protect against tabs or newlines in the

3012

* fragment, if present. This seeks to cope

3013

* with atrocities inflicted on the Web by

3014

* authoring tools such as Frontpage. - FM

3015

3016

3017

/* Before working on spaces check if we have any, usually none. */

3018

for (p = *href; (*p && !isspace(*p)); p++)

3019

;

3020

3021

if (*p) { /* p == first space character */

3022

/* no reallocs below, all converted in place */

3023

3024

pound = findPoundSelector(*href);

3025

3026

if (pound != NULL && pound < p) {

3027

convert_to_spaces(p, FALSE); /* done */

3028

3029

} else {

3030

if (pound != NULL)

3031

*pound = '\0'; /* mark */

3032

3033

3034

* No blanks really belong in the HREF,

3035

* but if it refers to an actual file,

3036

* it may actually have blanks in the name.

3037

* Try to accommodate. See also HTParse().

3038

3039

if (LYRemoveNewlines(p) || strchr(p, '\t') != 0) {

3040

LYRemoveBlanks(p); /* a compromise... */

3041

}

3042

3043

if (pound != NULL) {

3044

p = strchr(p, '\0');

3045

*pound = '#'; /* restore */

3046

convert_to_spaces(pound, FALSE);

3047

if (p < pound)

3048

strcpy(p, pound);

3049

}

3050

}

3051

}

3052

}

3053

if (**href == '\0')

3054

return(url_type);

3055

3056

TRANSLATE_AND_UNESCAPE_TO_STD(href);

3057

3058

Base = me->inBASE ?

3059

me->base_href : me->node_anchor->address;

3060

3061

url_type = is_url(*href);

3062

if (!url_type && force_slash && **href == '.' &&

3063

(!strcmp(*href, ".") || !strcmp(*href, "..")) &&

3064

!isFILE_URL(Base)) {

3065

3066

* The Fielding RFC/ID for resolving partial HREFs says

3067

* that a slash should be on the end of the preceding

3068

* symbolic element for "." and "..", but all tested

3069

* browsers only do that for an explicit "./" or "../",

3070

* so we'll respect the RFC/ID only if force_slash was

3071

* TRUE and it's not a file URL. - FM

3072

3073

StrAllocCat(*href, "/");

3074

}

3075

if ((!url_type && LYStripDotDotURLs && strip_dots && **href == '.') &&

3076

!strncasecomp(Base, "http", 4)) {

3077

3078

* We will be resolving a partial reference versus an http

3079

* or https URL, and it has lead dots, which may be retained

3080

* when resolving via HTParse(), but the request would fail

3081

* if the first element of the resultant path is two dots,

3082

* because no http or https server accepts such paths, and

3083

* the current URL draft, likely to become an RFC, says that

3084

* it's optional for the UA to strip them as a form of error

3085

* recovery. So we will, recursively, for http/https URLs,

3086

* like the "major market browsers" which made this problem

3087

* so common on the Web, but we'll also issue a message about

3088

* it, such that the bad partial reference might get corrected

3089

* by the document provider. - FM

3090

3091

char *temp = NULL, *path = NULL, *cp;

3092

CONST char *str = "";

3093

3094

temp = HTParse(*href, Base, PARSE_ALL);

3095

path = HTParse(temp, "", PARSE_PATH+PARSE_PUNCTUATION);

3096

if (!strncmp(path, "/..", 3)) {

3097

cp = (path + 3);

3098

if (LYIsHtmlSep(*cp) || *cp == '\0') {

3099

if (Base[4] == 's') {

3100

str = "s";

3101

}

3102

CTRACE((tfp, "LYLegitimizeHREF: Bad value '%s' for http%s URL.\n",

3103

*href, str));

3104

CTRACE((tfp, " Stripping lead dots.\n"));

3105

if (!me->inBadHREF) {

3106

HTUserMsg(BAD_PARTIAL_REFERENCE);

3107

me->inBadHREF = TRUE;

3108

}

3109

}

3110

if (*cp == '\0') {

3111

StrAllocCopy(*href, "/");

3112

} else if (LYIsHtmlSep(*cp)) {

3113

while (!strncmp(cp, "/..", 3)) {

3114

if (*(cp + 3) == '/') {

3115

cp += 3;

3116

continue;

3117

} else if (*(cp + 3) == '\0') {

3118

*(cp + 1) = '\0';

3119

*(cp + 2) = '\0';

3120

}

3121

break;

3122

}

3123

StrAllocCopy(*href, cp);

3124

}

3125

}

3126

FREE(temp);

3127

FREE(path);

3128

}

3129

return(url_type);

3130

}

3131

3132

3133

** This function checks for a Content-Base header,

3134

** and if not present, a Content-Location header

3135

** which is an absolute URL, and sets the BASE

3136

** accordingly. If set, it will be replaced by

3137

** any BASE tag in the HTML stream, itself. - FM

3138

3139

PUBLIC void LYCheckForContentBase ARGS1(

3140

HTStructured *, me)

3141

{

3142

char *cp = NULL;

3143

BOOL present[HTML_BASE_ATTRIBUTES];

3144

CONST char *value[HTML_BASE_ATTRIBUTES];

3145

int i;

3146

3147

if (!(me && me->node_anchor))

3148

return;

3149

3150

if (me->node_anchor->content_base != NULL) {

3151

3152

* We have a Content-Base value. Use it

3153

* if it's non-zero length. - FM

3154

3155

if (*me->node_anchor->content_base == '\0')

3156

return;

3157

StrAllocCopy(cp, me->node_anchor->content_base);

3158

LYRemoveBlanks(cp);

3159

} else if (me->node_anchor->content_location != NULL) {

3160

3161

* We didn't have a Content-Base value, but do

3162

* have a Content-Location value. Use it if

3163

* it's an absolute URL. - FM

3164

3165

if (*me->node_anchor->content_location == '\0')

3166

return;

3167

StrAllocCopy(cp, me->node_anchor->content_location);

3168

LYRemoveBlanks(cp);

3169

if (!is_url(cp)) {

3170

FREE(cp);

3171

return;

3172

}

3173

} else {

3174

3175

* We had neither a Content-Base nor

3176

* Content-Location value. - FM

3177

3178

return;

3179

}

3180

3181

3182

* If we collapsed to a zero-length value,

3183

* ignore it. - FM

3184

3185

if (*cp == '\0') {

3186

FREE(cp);

3187

return;

3188

}

3189

3190

3191

* Pass the value to HTML_start_element as

3192

* the HREF of a BASE tag. - FM

3193

3194

for (i = 0; i < HTML_BASE_ATTRIBUTES; i++)

3195

present[i] = NO;

3196

present[HTML_BASE_HREF] = YES;

3197

value[HTML_BASE_HREF] = (CONST char *)cp;

3198

(*me->isa->start_element)(me, HTML_BASE, present, value,

3199

0, 0);

3200

FREE(cp);

3201

}

3202

3203

3204

** This function creates NAMEd Anchors if a non-zero-length NAME

3205

** or ID attribute was present in the tag. - FM

3206

3207

PUBLIC void LYCheckForID ARGS4(

3208

HTStructured *, me,

3209

CONST BOOL *, present,

3210

CONST char **, value,

3211

int, attribute)

3212

{

3213

HTChildAnchor *ID_A = NULL;

3214

char *temp = NULL;

3215

3216

if (!(me && me->text))

3217

return;

3218

3219

if (present && present[attribute]

3220

&& value[attribute] && *value[attribute]) {

3221

3222

* Translate any named or numeric character references. - FM

3223

3224

StrAllocCopy(temp, value[attribute]);

3225

LYUCTranslateHTMLString(&temp, me->tag_charset, me->tag_charset,

3226

NO, NO, YES, st_URL);

3227

3228

3229

* Create the link if we still have a non-zero-length string. - FM

3230

3231

if ((temp[0] != '\0') &&

3232

(ID_A = HTAnchor_findChildAndLink(

3233

me->node_anchor, /* Parent */

3234

temp, /* Tag */

3235

NULL, /* Addresss */

3236

(void *)0))) { /* Type */

3237

HText_beginAnchor(me->text, me->inUnderline, ID_A);

3238

HText_endAnchor(me->text, 0);

3239

}

3240

FREE(temp);

3241

}

3242

}

3243

3244

3245

** This function creates a NAMEd Anchor for the ID string

3246

** passed to it directly as an argument. It assumes the

3247

** does not need checking for character references. - FM

3248

3249

PUBLIC void LYHandleID ARGS2(

3250

HTStructured *, me,

3251

CONST char *, id)

3252

{

3253

HTChildAnchor *ID_A = NULL;

3254

3255

if (!(me && me->text) ||

3256

!(id && *id))

3257

return;

3258

3259

3260

* Create the link if we still have a non-zero-length string. - FM

3261

3262

if ((ID_A = HTAnchor_findChildAndLink(

3263

me->node_anchor, /* Parent */

3264

id, /* Tag */

3265

NULL, /* Addresss */

3266

(void *)0)) != NULL) { /* Type */

3267

HText_beginAnchor(me->text, me->inUnderline, ID_A);

3268

HText_endAnchor(me->text, 0);

3269

}

3270

}

3271

3272

3273

** This function checks whether we want to override

3274

** the current default alignment for paragraphs and

3275

** instead use that specified in the element's style

3276

** sheet. - FM

3277

3278

PUBLIC BOOLEAN LYoverride_default_alignment ARGS1(

3279

HTStructured *, me)

3280

{

3281

if (!me)

3282

return NO;

3283

3284

switch(me->sp[0].tag_number) {

3285

case HTML_BLOCKQUOTE:

3286

case HTML_BQ:

3287

case HTML_NOTE:

3288

case HTML_FN:

3289

case HTML_ADDRESS:

3290

me->sp->style->alignment = HT_LEFT;

3291

return YES;

3292

3293

default:

3294

break;

3295

}

3296

return NO;

3297

}

3298

3299

3300

** This function inserts newlines if needed to create double spacing,

3301

** and sets the left margin for subsequent text to the second line

3302

** indentation of the current style. - FM

3303

3304

PUBLIC void LYEnsureDoubleSpace ARGS1(

3305

HTStructured *, me)

3306

{

3307

if (!me || !me->text)

3308

return;

3309

3310

if (!HText_LastLineEmpty(me->text, FALSE)) {

3311

HText_setLastChar(me->text, ' '); /* absorb white space */

3312

HText_appendCharacter(me->text, '\r');

3313

HText_appendCharacter(me->text, '\r');

3314

} else if (!HText_PreviousLineEmpty(me->text, FALSE)) {

3315

HText_setLastChar(me->text, ' '); /* absorb white space */

3316

HText_appendCharacter(me->text, '\r');

3317

} else if (me->List_Nesting_Level >= 0) {

3318

HText_NegateLineOne(me->text);

3319

}

3320

me->in_word = NO;

3321

return;

3322

}

3323

3324

3325

** This function inserts a newline if needed to create single spacing,

3326

** and sets the left margin for subsequent text to the second line

3327

** indentation of the current style. - FM

3328

3329

PUBLIC void LYEnsureSingleSpace ARGS1(

3330

HTStructured *, me)

3331

{

3332

if (!me || !me->text)

3333

return;

3334

3335

if (!HText_LastLineEmpty(me->text, FALSE)) {

3336

HText_setLastChar(me->text, ' '); /* absorb white space */

3337

HText_appendCharacter(me->text, '\r');

3338

} else if (me->List_Nesting_Level >= 0) {

3339

HText_NegateLineOne(me->text);

3340

}

3341

me->in_word = NO;

3342

return;

3343

}

3344

3345

3346

** This function resets paragraph alignments for block

3347

** elements which do not have a defined style sheet. - FM

3348

3349

PUBLIC void LYResetParagraphAlignment ARGS1(

3350

HTStructured *, me)

3351

{

3352

if (!me)

3353

return;

3354

3355

if (me->List_Nesting_Level >= 0 ||

3356

((me->Division_Level < 0) &&

3357

(me->sp->style->id == ST_Normal ||

3358

me->sp->style->id == ST_Preformatted))) {

3359

me->sp->style->alignment = HT_LEFT;

3360

} else {

3361

me->sp->style->alignment = (short) me->current_default_alignment;

3362

}

3363

return;

3364

}

3365

3366

3367

** This example function checks whether the given anchor has

3368

** an address with a file scheme, and if so, loads it into the

3369

** the SGML parser's context->url element, which was passed as

3370

** the second argument. The handle_comment() calling function in

3371

** SGML.c then calls LYDoCSI() in LYUtils.c to insert HTML markup

3372

** into the corresponding stream, homologously to an SSI by an

3373

** HTTP server. - FM

3374

3375

** For functions similar to this but which depend on details of

3376

** the HTML handler's internal data, the calling interface should

3377

** be changed, and functions in SGML.c would have to make sure not

3378

** to call such functions inappropriately (e.g., calling a function

3379

** specific to the Lynx_HTML_Handler when SGML.c output goes to

3380

** some other HTStructured object like in HTMLGen.c), or the new

3381

** functions could be added to the SGML.h interface.

3382

3383

PUBLIC BOOLEAN LYCheckForCSI ARGS2(

3384

HTParentAnchor *, anchor,

3385

char **, url)

3386

{

3387

if (!(anchor && anchor->address))

3388

return FALSE;

3389

3390

if (!isFILE_URL(anchor->address))

3391

return FALSE;

3392

3393

if (!LYisLocalHost(anchor->address))

3394

return FALSE;

3395

3396

StrAllocCopy(*url, anchor->address);

3397

return TRUE;

3398

}

3399

3400

3401

** This function is called from the SGML parser to look at comments

3402

** and see whether we should collect some info from them. Currently

3403

** it only looks for comments with Message-Id and Subject info, in the

3404

** exact form generated by MHonArc for archived mailing list. If found,

3405

** the info is stored in the document's HTParentAnchor. It can later be

3406

** used for generating a mail response.

3407

3408

** We are extra picky here because there isn't any official definition

3409

** for these kinds of comments - we might (and still can) misinterpret

3410

** arbitrary comments as something they aren't.

3411

3412

** If something doesn't look right, for example invalid characters, the

3413

** strings are not stored. Mail responses will use something else as

3414

** the subject, probably the document URL, and will not have an

3415

** In-Reply-To header.

3416

3417

** All this is a hack - to do this the right way, mailing list archivers

3418

** would have to agree on some better mechanism to make this kind of info

3419

** from original mail headers available, for example using LINK. - kw

3420

3421

PUBLIC BOOLEAN LYCommentHacks ARGS2(

3422

HTParentAnchor *, anchor,

3423

CONST char *, comment)

3424

{

3425

CONST char *cp = comment;

3426

size_t len;

3427

3428

if (comment == NULL)

3429

return FALSE;

3430

3431

if (!(anchor && anchor->address))

3432

return FALSE;

3433

3434

if (strncmp(comment, "!--X-Message-Id: ", 17) == 0) {

3435

char *messageid = NULL;

3436

char *p;

3437

for (cp = comment+17; *cp; cp++) {

3438

if (UCH(*cp) >= 127 || !isgraph(UCH(*cp))) {

3439

break;

3440

}

3441

}

3442

if (strcmp(cp, " --")) {

3443

return FALSE;

3444

}

3445

cp = comment + 17;

3446

StrAllocCopy(messageid, cp);

3447

/* This should be ok - message-id should only contain 7-bit ASCII */

3448

if (!LYUCTranslateHTMLString(&messageid, 0, 0, NO, NO, YES, st_URL))

3449

return FALSE;

3450

for (p = messageid; *p; p++) {

3451

if (UCH(*p) >= 127 || !isgraph(UCH(*p))) {

3452

break;

3453

}

3454

}

3455

if (strcmp(p, " --")) {

3456

FREE(messageid);

3457

return FALSE;

3458

}

3459

if ((p = strchr(messageid, '@')) == NULL || p[1] == '\0') {

3460

FREE(messageid);

3461

return FALSE;

3462

}

3463

p = messageid;

3464

if ((len = strlen(p)) >= 8 && !strcmp(&p[len-3], " --")) {

3465

p[len-3] = '\0';

3466

} else {

3467

FREE(messageid);

3468

return FALSE;

3469

}

3470

if (HTAnchor_setMessageID(anchor, messageid)) {

3471

FREE(messageid);

3472

return TRUE;

3473

} else {

3474

FREE(messageid);

3475

return FALSE;

3476

}

3477

}

3478

if (strncmp(comment, "!--X-Subject: ", 14) == 0) {

3479

char *subject = NULL;

3480

char *p;

3481

for (cp = comment+14; *cp; cp++) {

3482

if (UCH(*cp) >= 127 || !isprint(UCH(*cp))) {

3483

return FALSE;

3484

}

3485

}

3486

cp = comment + 14;

3487

StrAllocCopy(subject, cp);

3488

/* @@@

3489

* This may not be the right thing for the subject - but mail

3490

* subjects shouldn't contain 8-bit characters in raw form anyway.

3491

* We have to unescape character entities, since that's what MHonArc

3492

* seems to generate. But if after that there are 8-bit characters

3493

* the string is rejected. We would probably not know correctly

3494

* what charset to assume anyway - the mail sender's can differ from

3495

* the archive's. And the code for sending mail cannot deal well

3496

* with 8-bit characters - we should not put them in the Subject

3497

* header in raw form, but don't have MIME encoding implemented.

3498

* Someone may want to do more about this... - kw

3499

3500

if (!LYUCTranslateHTMLString(&subject, 0, 0, NO, YES, NO, st_HTML))

3501

return FALSE;

3502

for (p = subject; *p; p++) {

3503

if (UCH(*p) >= 127 || !isprint(UCH(*p))) {

3504

FREE(subject);

3505

return FALSE;

3506

}

3507

}

3508

p = subject;

3509

if ((len = strlen(p)) >= 4 && !strcmp(&p[len-3], " --")) {

3510

p[len-3] = '\0';

3511

} else {

3512

FREE(subject);

3513

return FALSE;

3514

}

3515

if (HTAnchor_setSubject(anchor, subject)) {

3516

FREE(subject);

3517

return TRUE;

3518

} else {

3519

FREE(subject);

3520

return FALSE;

3521

}

3522

}

3523

3524

return FALSE;

3525

}

3526

3527

3528

* Create the Title with any left-angle-brackets

3529

* converted to < entities and any ampersands

3530

* converted to & entities. - FM

3531

3532

* Convert 8-bit letters to &#xUUUU to avoid dependencies

3533

* from display character set which may need changing.

3534

* Do NOT convert any 8-bit chars if we have CJK display. - LP

3535

3536

void LYformTitle ARGS2(

3537

char **, dst,

3538

CONST char *, src)

3539

{

3540

if (HTCJK == JAPANESE) {

3541

char *tmp_buffer = NULL;

3542

if ((tmp_buffer = (char *) malloc (strlen(src)+1)) == 0)

3543

outofmem(__FILE__, "LYformTitle");

3544

switch(kanji_code) { /* 1997/11/22 (Sat) 09:28:00 */

3545

case EUC:

3546

TO_EUC((CONST unsigned char *) src, (unsigned char *) tmp_buffer);

3547

break;

3548

case SJIS:

3549

TO_SJIS((CONST unsigned char *) src, (unsigned char *) tmp_buffer);

3550

break;

3551

default:

3552

CTRACE((tfp, "\nLYformTitle: kanji_code is an unexpected value."));

3553

strcpy(tmp_buffer, src);

3554

break;

3555

}

3556

StrAllocCopy(*dst, tmp_buffer);

3557

FREE(tmp_buffer);

3558

} else {

3559

StrAllocCopy(*dst, src);

3560

}

3561

}

Older »