~ubuntu-branches/ubuntu/karmic/postgresql-8.4/karmic-security

« back to all changes in this revision

Viewing changes to src/pl/plpgsql/src/scan.l

Committer: Bazaar Package Importer
Author(s): Martin Pitt
Date: 2009-05-05 00:58:06 UTC
mfrom: (1.1.2 upstream)
Revision ID: james.westby@ubuntu.com-20090505005806-c19tt7oyqb7kuw49

Tags: 8.4~beta1+cvs20090503-1

New upstream snapshot.

files added:
doc/src/sgml/generate_history.pl

doc/src/sgml/release-7.4.sgml

doc/src/sgml/release-8.0.sgml

doc/src/sgml/release-8.1.sgml

doc/src/sgml/release-8.2.sgml

doc/src/sgml/release-8.3.sgml

doc/src/sgml/release-8.4.sgml

doc/src/sgml/release-old.sgml

src/test/regress/expected/prepared_xacts_1.out

src/tools/check_keywords.pl

files removed:
src/backend/port/dynloader/README.dlfcn.aix

src/tools/major_release_split

files modified:
contrib/citext/expected/citext.out

contrib/citext/expected/citext_1.out

contrib/citext/sql/citext.sql

contrib/pg_trgm/Makefile

contrib/pgcrypto/crypt-md5.c

contrib/pgrowlocks/Makefile

contrib/pgstattuple/Makefile

debian/changelog

doc/src/sgml/Makefile

doc/src/sgml/advanced.sgml

doc/src/sgml/arch-dev.sgml

doc/src/sgml/array.sgml

doc/src/sgml/backup.sgml

doc/src/sgml/client-auth.sgml

doc/src/sgml/config.sgml

doc/src/sgml/contrib.sgml

doc/src/sgml/datatype.sgml

doc/src/sgml/ddl.sgml

doc/src/sgml/dml.sgml

doc/src/sgml/docguide.sgml

doc/src/sgml/ecpg.sgml

doc/src/sgml/filelist.sgml

doc/src/sgml/func.sgml

doc/src/sgml/high-availability.sgml

doc/src/sgml/history.sgml

doc/src/sgml/indices.sgml

doc/src/sgml/info.sgml

doc/src/sgml/install-win32.sgml

doc/src/sgml/installation.sgml

doc/src/sgml/intro.sgml

doc/src/sgml/libpq.sgml

doc/src/sgml/maintenance.sgml

doc/src/sgml/monitoring.sgml

doc/src/sgml/mvcc.sgml

doc/src/sgml/perform.sgml

doc/src/sgml/pgbuffercache.sgml

doc/src/sgml/plpgsql.sgml

doc/src/sgml/postgres.sgml

doc/src/sgml/problems.sgml

doc/src/sgml/queries.sgml

doc/src/sgml/query.sgml

doc/src/sgml/ref/alter_table.sgml

doc/src/sgml/ref/declare.sgml

doc/src/sgml/ref/fetch.sgml

doc/src/sgml/ref/prepare_transaction.sgml

doc/src/sgml/ref/psql-ref.sgml

doc/src/sgml/ref/select.sgml

doc/src/sgml/ref/truncate.sgml

doc/src/sgml/regress.sgml

doc/src/sgml/release.sgml

doc/src/sgml/rowtypes.sgml

doc/src/sgml/runtime.sgml

doc/src/sgml/sources.sgml

doc/src/sgml/sql.sgml

doc/src/sgml/start.sgml

doc/src/sgml/storage.sgml

doc/src/sgml/syntax.sgml

doc/src/sgml/textsearch.sgml

doc/src/sgml/typeconv.sgml

doc/src/sgml/xfunc.sgml

doc/src/sgml/xml2.sgml

src/backend/access/transam/twophase.c

src/backend/access/transam/varsup.c

src/backend/access/transam/xlog.c

src/backend/catalog/heap.c

src/backend/commands/copy.c

src/backend/commands/dbcommands.c

src/backend/libpq/hba.c

src/backend/libpq/ip.c

src/backend/nodes/copyfuncs.c

src/backend/nodes/equalfuncs.c

src/backend/nodes/outfuncs.c

src/backend/optimizer/path/allpaths.c

src/backend/optimizer/path/costsize.c

src/backend/optimizer/path/equivclass.c

src/backend/optimizer/path/indxpath.c

src/backend/optimizer/path/orindxpath.c

src/backend/optimizer/plan/createplan.c

src/backend/optimizer/plan/initsplan.c

src/backend/optimizer/plan/planner.c

src/backend/optimizer/plan/subselect.c

src/backend/optimizer/prep/prepjointree.c

src/backend/optimizer/prep/preptlist.c

src/backend/optimizer/util/placeholder.c

src/backend/optimizer/util/restrictinfo.c

src/backend/optimizer/util/tlist.c

src/backend/optimizer/util/var.c

src/backend/parser/gram.y

src/backend/parser/parse_func.c

src/backend/parser/parser.c

src/backend/parser/scan.l

src/backend/port/dynloader/aix.c

src/backend/port/dynloader/aix.h

src/backend/port/dynloader/osf.c

src/backend/postmaster/postmaster.c

src/backend/utils/adt/arrayfuncs.c

src/backend/utils/adt/datetime.c

src/backend/utils/adt/selfuncs.c

src/backend/utils/adt/varlena.c

src/backend/utils/mb/Unicode/euc_kr_to_utf8.map

src/backend/utils/mb/Unicode/johab_to_utf8.map

src/backend/utils/mb/Unicode/uhc_to_utf8.map

src/backend/utils/mb/Unicode/utf8_to_euc_kr.map

src/backend/utils/mb/Unicode/utf8_to_johab.map

src/backend/utils/mb/Unicode/utf8_to_uhc.map

src/backend/utils/mb/encnames.c

src/backend/utils/mb/mbutils.c

src/backend/utils/misc/guc.c

src/backend/utils/misc/postgresql.conf.sample

src/backend/utils/misc/tzparser.c

src/bin/pg_dump/pg_backup_archiver.c

src/bin/pg_dump/pg_dumpall.c

src/bin/pg_dump/pg_restore.c

src/bin/psql/command.c

src/bin/psql/common.c

src/bin/psql/copy.c

src/bin/psql/describe.c

src/bin/psql/describe.h

src/bin/psql/help.c

src/bin/psql/print.c

src/bin/scripts/vacuumdb.c

src/include/libpq/libpq-be.h

src/include/mb/pg_wchar.h

src/include/nodes/relation.h

src/include/optimizer/prep.h

src/include/optimizer/restrictinfo.h

src/include/optimizer/var.h

src/include/parser/parser.h

src/include/port/win32.h

src/interfaces/libpq/bcc32.mak

src/interfaces/libpq/fe-connect.c

src/interfaces/libpq/fe-secure.c

src/interfaces/libpq/libpq-int.h

src/pl/plpgsql/src/gram.y

src/pl/plpgsql/src/pl_comp.c

src/pl/plpgsql/src/pl_exec.c

src/pl/plpgsql/src/plpgsql.h

src/pl/plpgsql/src/scan.l

src/test/regress/expected/aggregates.out

src/test/regress/expected/arrays.out

src/test/regress/expected/plpgsql.out

src/test/regress/expected/polymorphism.out

src/test/regress/expected/prepared_xacts.out

src/test/regress/pg_regress.c

src/test/regress/sql/aggregates.sql

src/test/regress/sql/plpgsql.sql

src/test/regress/sql/prepared_xacts.sql

src/tools/RELEASE_CHANGES

src/tools/msvc/Install.pm

Show diffs side-by-side

added added

removed removed

src/pl/plpgsql/src/scan.l

#include "mb/pg_wchar.h"

/* No reason to constrain amount of data slurped */

#define YY_READ_BUF_SIZE 16777216

/* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */

#undef fprintf

#define fprintf(file, fmt, msg) ereport(ERROR, (errmsg_internal("%s", msg)))

* When we parse a token that requires multiple lexer rules to process,

* remember the token's starting position this way.

#define SAVE_TOKEN_START() \

( start_lineno = plpgsql_scanner_lineno(), start_charpos = yytext )

/* Handles to the buffer that the lexer uses internally */

static YY_BUFFER_STATE scanbufhandle;

static char *scanbuf;

static const char *scanstr; /* original input string */

static int scanner_functype;

static bool scanner_typereported;

static int pushback_token;

static bool have_pushback_token;

static const char *cur_line_start;

static int cur_line_num;

static int xcdepth = 0; /* depth of nesting in slash-star comments */

static char *dolqstart; /* current $foo$ quote start string */

static int dolqlen; /* signal to plpgsql_get_string_value */

extern PGDLLIMPORT bool standard_conforming_strings;

bool plpgsql_SpaceScanned = false;

%option case-insensitive

%x IN_STRING

%x IN_COMMENT

%x IN_DOLLARQUOTE

* Exclusive states are a subset of the core lexer's:

* <xc> extended C-style comments

* <xq> standard quoted strings

* <xe> extended quoted strings (support backslash escape sequences)

* <xdolq> $foo$ quoted strings

%x xc

%x xe

%x xq

%x xdolq

* Definitions --- these generally must match the core lexer, but in some

* cases we can simplify, since we only care about identifying the token

* boundaries and not about deriving the represented value. Also, we

* aren't trying to lex multicharacter operators so their interactions

* with comments go away.

space [ \t\n\r\f]

horiz_space [ \t\f]

newline [\n\r]

non_newline [^\n\r]

comment ("--"{non_newline}*)

whitespace ({space}+|{comment})

special_whitespace ({space}+|{comment}{newline})

horiz_whitespace ({horiz_space}|{comment})

whitespace_with_newline ({horiz_whitespace}*{newline}{special_whitespace}*)

quote '

quotestop {quote}{whitespace}*

quotecontinue {quote}{whitespace_with_newline}{quote}

quotefail {quote}{whitespace}*"-"

xestart [eE]{quote}

100

xeinside [^\\']+

101

xeescape [\\].

102

103

xqstart {quote}

104

xqdouble {quote}{quote}

105

xqinside [^']+

106

107

dolq_start [A-Za-z\200-\377_]

108

dolq_cont [A-Za-z\200-\377_0-9]

109

dolqdelim \$({dolq_start}{dolq_cont}*)?\$

110

dolqfailed \${dolq_start}{dolq_cont}*

111

dolqinside [^$]+

112

113

xcstart \/\*

114

xcstop \*+\/

115

xcinside [^*/]+

116

117

digit [0-9]

118

ident_start [A-Za-z\200-\377_]

119

ident_cont [A-Za-z\200-\377_0-9\$]

120

121

/* This is a simpler treatment of quoted identifiers than the core uses */

122

quoted_ident (\"[^\"]*\")+

123

124

identifier ({ident_start}{ident_cont}*|{quoted_ident})

125

126

param \${digit}+

127

space [ \t\n\r\f]

/* $foo$ style quotes ("dollar quoting")

* copied straight from the backend SQL parser

dolq_start [A-Za-z\200-\377_]

dolq_cont [A-Za-z\200-\377_0-9]

dolqdelim \$({dolq_start}{dolq_cont}*)?\$

dolqinside [^$]+

128

129

/* ----------

130

* Local variables in scanner to remember where

142

plpgsql_SpaceScanned = false;

143

144

/* ----------

* On the first call to a new source report the

100

* function's type (T_FUNCTION or T_TRIGGER)

101

* ----------

102

103

if (!scanner_typereported)

104

{

105

scanner_typereported = true;

106

return scanner_functype;

107

}

108

109

/* ----------

110

145

* The keyword rules

111

146

* ----------

112

147

225

260

226

261

{digit}+ { return T_NUMBER; }

227

262

228

\". {

229

plpgsql_error_lineno = plpgsql_scanner_lineno();

230

ereport(ERROR,

231

(errcode(ERRCODE_DATATYPE_MISMATCH),

232

errmsg("unterminated quoted identifier")));

233

}

234

235

/* ----------

236

* Ignore whitespaces but remember this happened

237

* ----------

238

239

{space}+ { plpgsql_SpaceScanned = true; }

240

241

/* ----------

242

* Eat up comments

243

* ----------

244

245

--[^\r\n]* ;

246

247

\/\* { start_lineno = plpgsql_scanner_lineno();

248

BEGIN(IN_COMMENT);

249

}

250

<IN_COMMENT>\*\/ { BEGIN(INITIAL); plpgsql_SpaceScanned = true; }

251

<IN_COMMENT>\n ;

252

<IN_COMMENT>. ;

253

<IN_COMMENT><<EOF>> {

254

plpgsql_error_lineno = start_lineno;

255

ereport(ERROR,

256

(errcode(ERRCODE_DATATYPE_MISMATCH),

257

errmsg("unterminated /* comment")));

258

}

259

260

/* ----------

261

* Collect anything inside of ''s and return one STRING token

262

263

* Hacking yytext/yyleng here lets us avoid using yymore(), which is

264

* a win for performance. It's safe because we know the underlying

265

* input buffer is not changing.

266

* ----------

267

268

' {

269

start_lineno = plpgsql_scanner_lineno();

270

start_charpos = yytext;

271

BEGIN(IN_STRING);

272

}

273

[eE]' {

274

/* for now, treat the same as a regular literal */

275

start_lineno = plpgsql_scanner_lineno();

276

start_charpos = yytext;

277

BEGIN(IN_STRING);

278

}

279

<IN_STRING>\\. { }

280

<IN_STRING>\\ { /* can only happen with \ at EOF */ }

281

<IN_STRING>'' { }

282

<IN_STRING>' {

283

/* tell plpgsql_get_string_value it's not a dollar quote */

284

dolqlen = 0;

285

/* adjust yytext/yyleng to describe whole string token */

286

yyleng += (yytext - start_charpos);

287

yytext = start_charpos;

288

BEGIN(INITIAL);

289

return T_STRING;

290

}

291

<IN_STRING>[^'\\]+ { }

292

<IN_STRING><<EOF>> {

293

plpgsql_error_lineno = start_lineno;

294

ereport(ERROR,

295

(errcode(ERRCODE_DATATYPE_MISMATCH),

296

errmsg("unterminated quoted string")));

297

}

298

299

{dolqdelim} {

300

start_lineno = plpgsql_scanner_lineno();

301

start_charpos = yytext;

302

dolqstart = pstrdup(yytext);

303

BEGIN(IN_DOLLARQUOTE);

304

}

305

<IN_DOLLARQUOTE>{dolqdelim} {

306

if (strcmp(yytext, dolqstart) == 0)

307

{

308

pfree(dolqstart);

309

/* tell plpgsql_get_string_value it is a dollar quote */

310

dolqlen = yyleng;

263

\". { yyerror("unterminated quoted identifier"); }

264

265

/* ----------

266

* Ignore whitespace (including comments) but remember this happened

267

* ----------

268

269

{whitespace} { plpgsql_SpaceScanned = true; }

270

271

/* ----------

272

* Comment and literal handling is mostly copied from the core lexer

273

* ----------

274

275

{xcstart} {

276

/* Set location in case of syntax error in comment */

277

SAVE_TOKEN_START();

278

xcdepth = 0;

279

BEGIN(xc);

280

plpgsql_SpaceScanned = true;

281

}

282

283

<xc>{xcstart} {

284

xcdepth++;

285

}

286

287

<xc>{xcstop} {

288

if (xcdepth <= 0)

289

BEGIN(INITIAL);

290

else

291

xcdepth--;

292

}

293

294

<xc>{xcinside} {

295

/* ignore */

296

}

297

298

<xc>\/+ {

299

/* ignore */

300

}

301

302

<xc>\*+ {

303

/* ignore */

304

}

305

306

<xc><<EOF>> { yyerror("unterminated /* comment"); }

307

308

{xqstart} {

309

SAVE_TOKEN_START();

310

if (standard_conforming_strings)

311

BEGIN(xq);

312

else

313

BEGIN(xe);

314

}

315

{xestart} {

316

SAVE_TOKEN_START();

317

BEGIN(xe);

318

}

319

<xq,xe>{quotestop} |

320

<xq,xe>{quotefail} {

321

yyless(1);

322

BEGIN(INITIAL);

311

323

/* adjust yytext/yyleng to describe whole string token */

312

324

yyleng += (yytext - start_charpos);

313

325

yytext = start_charpos;

314

BEGIN(INITIAL);

315

326

return T_STRING;

316

}

317

else

318

{

319

320

* When we fail to match $...$ to dolqstart, transfer

321

* the $... part to the output, but put back the final

322

* $ for rescanning. Consider $delim$...$junk$delim$

323

324

yyless(yyleng-1);

325

}

326

}

327

<IN_DOLLARQUOTE>{dolqinside} { }

328

<IN_DOLLARQUOTE>. { /* needed for $ inside the quoted text */ }

329

<IN_DOLLARQUOTE><<EOF>> {

330

plpgsql_error_lineno = start_lineno;

331

ereport(ERROR,

332

(errcode(ERRCODE_DATATYPE_MISMATCH),

333

errmsg("unterminated dollar-quoted string")));

334

}

327

}

328

<xq,xe>{xqdouble} {

329

}

330

<xq>{xqinside} {

331

}

332

<xe>{xeinside} {

333

}

334

<xe>{xeescape} {

335

}

336

<xq,xe>{quotecontinue} {

337

/* ignore */

338

}

339

<xe>. {

340

/* This is only needed for \ just before EOF */

341

}

342

<xq,xe><<EOF>> { yyerror("unterminated quoted string"); }

343

344

{dolqdelim} {

345

SAVE_TOKEN_START();

346

dolqstart = pstrdup(yytext);

347

BEGIN(xdolq);

348

}

349

{dolqfailed} {

350

/* throw back all but the initial "$" */

351

yyless(1);

352

/* and treat it as {other} */

353

return yytext[0];

354

}

355

<xdolq>{dolqdelim} {

356

if (strcmp(yytext, dolqstart) == 0)

357

{

358

pfree(dolqstart);

359

BEGIN(INITIAL);

360

/* adjust yytext/yyleng to describe whole string */

361

yyleng += (yytext - start_charpos);

362

yytext = start_charpos;

363

return T_STRING;

364

}

365

else

366

{

367

368

* When we fail to match $...$ to dolqstart, transfer

369

* the $... part to the output, but put back the final

370

* $ for rescanning. Consider $delim$...$junk$delim$

371

372

yyless(yyleng-1);

373

}

374

}

375

<xdolq>{dolqinside} {

376

}

377

<xdolq>{dolqfailed} {

378

}

379

<xdolq>. {

380

/* This is only needed for $ inside the quoted text */

381

}

382

<xdolq><<EOF>> { yyerror("unterminated dollar-quoted string"); }

335

383

336

384

/* ----------

337

385

* Any unmatched character is returned as is

338

386

* ----------

339

387

340

. { return yytext[0]; }

388

. {

389

return yytext[0];

390

}

341

391

342

392

343

393

437

487

* to cite in error messages.

438

488

439

489

void

440

plpgsql_scanner_init(const char *str, int functype)

490

plpgsql_scanner_init(const char *str)

441

491

{

442

492

Size slen;

443

493

460

510

/* Other setup */

461

511

scanstr = str;

462

512

463

scanner_functype = functype;

464

scanner_typereported = false;

465

466

513

have_pushback_token = false;

467

514

468

515

cur_line_start = scanbuf;

493

540

yy_delete_buffer(scanbufhandle);

494

541

pfree(scanbuf);

495

542

}

496

497

498

* Called after a T_STRING token is read to get the string literal's value

499

* as a palloc'd string. (We make this a separate call because in many

500

* scenarios there's no need to get the decoded value.)

501

502

* Note: we expect the literal to be the most recently lexed token. This

503

* would not work well if we supported multiple-token pushback or if

504

* plpgsql_yylex() wanted to read ahead beyond a T_STRING token.

505

506

char *

507

plpgsql_get_string_value(void)

508

{

509

char *result;

510

const char *cp;

511

int len;

512

513

if (dolqlen > 0)

514

{

515

/* Token is a $foo$...$foo$ string */

516

len = yyleng - 2 * dolqlen;

517

Assert(len >= 0);

518

result = (char *) palloc(len + 1);

519

memcpy(result, yytext + dolqlen, len);

520

result[len] = '\0';

521

}

522

else if (*yytext == 'E' || *yytext == 'e')

523

{

524

/* Token is an E'...' string */

525

result = (char *) palloc(yyleng + 1); /* more than enough room */

526

len = 0;

527

for (cp = yytext + 2; *cp; cp++)

528

{

529

if (*cp == '\'')

530

{

531

if (cp[1] == '\'')

532

result[len++] = *cp++;

533

/* else it must be string end quote */

534

}

535

else if (*cp == '\\')

536

{

537

if (cp[1] != '\0') /* just a paranoid check */

538

result[len++] = *(++cp);

539

}

540

else

541

result[len++] = *cp;

542

}

543

result[len] = '\0';

544

}

545

else

546

{

547

/* Token is a '...' string */

548

result = (char *) palloc(yyleng + 1); /* more than enough room */

549

len = 0;

550

for (cp = yytext + 1; *cp; cp++)

551

{

552

if (*cp == '\'')

553

{

554

if (cp[1] == '\'')

555

result[len++] = *cp++;

556

/* else it must be string end quote */

557

}

558

else if (*cp == '\\')

559

{

560

if (cp[1] != '\0') /* just a paranoid check */

561

result[len++] = *(++cp);

562

}

563

else

564

result[len++] = *cp;

565

}

566

result[len] = '\0';

567

}

568

return result;

569

}

Older »