~ubuntu-branches/ubuntu/oneiric/pcre3/oneiric

« back to all changes in this revision

Viewing changes to pcre_internal.h

Committer: Bazaar Package Importer
Author(s): Sebastien Bacher
Date: 2011-01-25 12:25:05 UTC
mfrom: (1.1.10 upstream)
Revision ID: james.westby@ubuntu.com-20110125122505-poii7htffw5ctwdy

Tags: 8.12-0ubuntu1

New upstream version

files added:
testdata/grepinput3

files modified:
ChangeLog

HACKING

Makefile.am

Makefile.in

NEWS

NON-UNIX-USE

PrepareRelease

RunGrepTest

RunTest

aclocal.m4

config.guess

config.h.generic

config.sub

configure

configure.ac

debian/changelog

debian/control

depcomp

doc/html/pcre.html

doc/html/pcre_compile.html

doc/html/pcre_compile2.html

doc/html/pcre_exec.html

doc/html/pcreapi.html

doc/html/pcrecallout.html

doc/html/pcrecompat.html

doc/html/pcrecpp.html

doc/html/pcredemo.html

doc/html/pcregrep.html

doc/html/pcrematching.html

doc/html/pcrepartial.html

doc/html/pcrepattern.html

doc/html/pcreperform.html

doc/html/pcreposix.html

doc/html/pcreprecompile.html

doc/html/pcresample.html

doc/html/pcresyntax.html

doc/html/pcretest.html

doc/pcre.3

doc/pcre.txt

doc/pcre_compile.3

doc/pcre_compile2.3

doc/pcre_config.3

doc/pcre_exec.3

doc/pcreapi.3

doc/pcrecallout.3

doc/pcrecompat.3

doc/pcrecpp.3

doc/pcregrep.1

doc/pcregrep.txt

doc/pcrematching.3

doc/pcrepartial.3

doc/pcrepattern.3

doc/pcreperform.3

doc/pcreposix.3

doc/pcreprecompile.3

doc/pcresample.3

doc/pcresyntax.3

doc/pcretest.1

doc/pcretest.txt

ltmain.sh

m4/libtool.m4

missing

pcre.h.generic

pcre.h.in

pcre_chartables.c.dist

pcre_compile.c

pcre_dfa_exec.c

pcre_exec.c

pcre_internal.h

pcre_printint.src

pcre_scanner.cc

pcre_scanner_unittest.cc

pcre_stringpiece.h.in

pcre_stringpiece_unittest.cc

pcre_study.c

pcre_tables.c

pcre_valid_utf8.c

pcre_xclass.c

pcrecpp.cc

pcredemo.c

pcregrep.c

pcreposix.c

pcreposix.h

pcretest.c

perltest.pl

testdata/grepoutput

testdata/grepoutput8

testdata/testinput1

testdata/testinput10

testdata/testinput11

testdata/testinput12

testdata/testinput2

testdata/testinput4

testdata/testinput5

testdata/testinput6

testdata/testinput7

testdata/testinput8

testdata/testinput9

testdata/testoutput1

testdata/testoutput10

testdata/testoutput11

testdata/testoutput12

testdata/testoutput2

testdata/testoutput4

testdata/testoutput5

testdata/testoutput6

testdata/testoutput7

testdata/testoutput8

testdata/testoutput9

Show diffs side-by-side

added added

removed removed

pcre_internal.h

408

409

/* When UTF-8 encoding is being used, a character is no longer just a single

410

byte. The macros for character handling generate simple sequences when used in

411

byte-mode, and more complicated ones for UTF-8 characters. BACKCHAR should

412

never be called in byte mode. To make sure it can never even appear when UTF-8

413

support is omitted, we don't even define it. */

411

byte-mode, and more complicated ones for UTF-8 characters. GETCHARLENTEST is

412

not used when UTF-8 is not supported, so it is not defined, and BACKCHAR should

413

never be called in byte mode. To make sure they can never even appear when

414

UTF-8 support is omitted, we don't even define them. */

414

415

416

#ifndef SUPPORT_UTF8

416

417

#define GETCHAR(c, eptr) c = *eptr;

418

419

#define GETCHARINC(c, eptr) c = *eptr++;

419

420

#define GETCHARINCTEST(c, eptr) c = *eptr++;

420

421

#define GETCHARLEN(c, eptr, len) c = *eptr;

422

/* #define GETCHARLENTEST(c, eptr, len) */

421

423

/* #define BACKCHAR(eptr) */

422

424

423

425

#else /* SUPPORT_UTF8 */

424

426

427

/* These macros were originally written in the form of loops that used data

428

from the tables whose names start with _pcre_utf8_table. They were rewritten by

429

a user so as not to use loops, because in some environments this gives a

430

significant performance advantage, and it seems never to do any harm. */

431

432

/* Base macro to pick up the remaining bytes of a UTF-8 character, not

433

advancing the pointer. */

434

435

#define GETUTF8(c, eptr) \

436

{ \

437

if ((c & 0x20) == 0) \

438

c = ((c & 0x1f) << 6) | (eptr[1] & 0x3f); \

439

else if ((c & 0x10) == 0) \

440

c = ((c & 0x0f) << 12) | ((eptr[1] & 0x3f) << 6) | (eptr[2] & 0x3f); \

441

else if ((c & 0x08) == 0) \

442

c = ((c & 0x07) << 18) | ((eptr[1] & 0x3f) << 12) | \

443

((eptr[2] & 0x3f) << 6) | (eptr[3] & 0x3f); \

444

else if ((c & 0x04) == 0) \

445

c = ((c & 0x03) << 24) | ((eptr[1] & 0x3f) << 18) | \

446

((eptr[2] & 0x3f) << 12) | ((eptr[3] & 0x3f) << 6) | \

447

(eptr[4] & 0x3f); \

448

else \

449

c = ((c & 0x01) << 30) | ((eptr[1] & 0x3f) << 24) | \

450

((eptr[2] & 0x3f) << 18) | ((eptr[3] & 0x3f) << 12) | \

451

((eptr[4] & 0x3f) << 6) | (eptr[5] & 0x3f); \

452

}

453

425

454

/* Get the next UTF-8 character, not advancing the pointer. This is called when

426

455

we know we are in UTF-8 mode. */

427

456

428

457

#define GETCHAR(c, eptr) \

429

458

c = *eptr; \

430

if (c >= 0xc0) \

431

{ \

432

int gcii; \

433

int gcaa = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */ \

434

int gcss = 6*gcaa; \

435

c = (c & _pcre_utf8_table3[gcaa]) << gcss; \

436

for (gcii = 1; gcii <= gcaa; gcii++) \

437

{ \

438

gcss -= 6; \

439

c |= (eptr[gcii] & 0x3f) << gcss; \

440

} \

441

}

459

if (c >= 0xc0) GETUTF8(c, eptr);

442

460

443

461

/* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the

444

462

pointer. */

445

463

446

464

#define GETCHARTEST(c, eptr) \

447

465

c = *eptr; \

448

if (utf8 && c >= 0xc0) \

466

if (utf8 && c >= 0xc0) GETUTF8(c, eptr);

467

468

/* Base macro to pick up the remaining bytes of a UTF-8 character, advancing

469

the pointer. */

470

471

#define GETUTF8INC(c, eptr) \

449

472

{ \

450

int gcii; \

451

int gcaa = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */ \

452

int gcss = 6*gcaa; \

453

c = (c & _pcre_utf8_table3[gcaa]) << gcss; \

454

for (gcii = 1; gcii <= gcaa; gcii++) \

455

{ \

456

gcss -= 6; \

457

c |= (eptr[gcii] & 0x3f) << gcss; \

473

if ((c & 0x20) == 0) \

474

c = ((c & 0x1f) << 6) | (*eptr++ & 0x3f); \

475

else if ((c & 0x10) == 0) \

476

{ \

477

c = ((c & 0x0f) << 12) | ((*eptr & 0x3f) << 6) | (eptr[1] & 0x3f); \

478

eptr += 2; \

479

} \

480

else if ((c & 0x08) == 0) \

481

{ \

482

c = ((c & 0x07) << 18) | ((*eptr & 0x3f) << 12) | \

483

((eptr[1] & 0x3f) << 6) | (eptr[2] & 0x3f); \

484

eptr += 3; \

485

} \

486

else if ((c & 0x04) == 0) \

487

{ \

488

c = ((c & 0x03) << 24) | ((*eptr & 0x3f) << 18) | \

489

((eptr[1] & 0x3f) << 12) | ((eptr[2] & 0x3f) << 6) | \

490

(eptr[3] & 0x3f); \

491

eptr += 4; \

492

} \

493

else \

494

{ \

495

c = ((c & 0x01) << 30) | ((*eptr & 0x3f) << 24) | \

496

((eptr[1] & 0x3f) << 18) | ((eptr[2] & 0x3f) << 12) | \

497

((eptr[3] & 0x3f) << 6) | (eptr[4] & 0x3f); \

498

eptr += 5; \

458

499

} \

459

500

}

460

501

463

504

464

505

#define GETCHARINC(c, eptr) \

465

506

c = *eptr++; \

466

if (c >= 0xc0) \

467

{ \

468

int gcaa = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */ \

469

int gcss = 6*gcaa; \

470

c = (c & _pcre_utf8_table3[gcaa]) << gcss; \

471

while (gcaa-- > 0) \

472

{ \

473

gcss -= 6; \

474

c |= (*eptr++ & 0x3f) << gcss; \

475

} \

476

}

507

if (c >= 0xc0) GETUTF8INC(c, eptr);

477

508

478

/* Get the next character, testing for UTF-8 mode, and advancing the pointer */

509

/* Get the next character, testing for UTF-8 mode, and advancing the pointer.

510

This is called when we don't know if we are in UTF-8 mode. */

479

511

480

512

#define GETCHARINCTEST(c, eptr) \

481

513

c = *eptr++; \

482

if (utf8 && c >= 0xc0) \

514

if (utf8 && c >= 0xc0) GETUTF8INC(c, eptr);

515

516

/* Base macro to pick up the remaining bytes of a UTF-8 character, not

517

advancing the pointer, incrementing the length. */

518

519

#define GETUTF8LEN(c, eptr, len) \

483

520

{ \

484

int gcaa = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */ \

485

int gcss = 6*gcaa; \

486

c = (c & _pcre_utf8_table3[gcaa]) << gcss; \

487

while (gcaa-- > 0) \

488

{ \

489

gcss -= 6; \

490

c |= (*eptr++ & 0x3f) << gcss; \

521

if ((c & 0x20) == 0) \

522

{ \

523

c = ((c & 0x1f) << 6) | (eptr[1] & 0x3f); \

524

len++; \

525

} \

526

else if ((c & 0x10) == 0) \

527

{ \

528

c = ((c & 0x0f) << 12) | ((eptr[1] & 0x3f) << 6) | (eptr[2] & 0x3f); \

529

len += 2; \

530

} \

531

else if ((c & 0x08) == 0) \

532

533

c = ((c & 0x07) << 18) | ((eptr[1] & 0x3f) << 12) | \

534

((eptr[2] & 0x3f) << 6) | (eptr[3] & 0x3f); \

535

len += 3; \

536

} \

537

else if ((c & 0x04) == 0) \

538

{ \

539

c = ((c & 0x03) << 24) | ((eptr[1] & 0x3f) << 18) | \

540

((eptr[2] & 0x3f) << 12) | ((eptr[3] & 0x3f) << 6) | \

541

(eptr[4] & 0x3f); \

542

len += 4; \

543

} \

544

else \

545

546

c = ((c & 0x01) << 30) | ((eptr[1] & 0x3f) << 24) | \

547

((eptr[2] & 0x3f) << 18) | ((eptr[3] & 0x3f) << 12) | \

548

((eptr[4] & 0x3f) << 6) | (eptr[5] & 0x3f); \

549

len += 5; \

491

550

} \

492

551

}

493

552

496

555

497

556

#define GETCHARLEN(c, eptr, len) \

498

557

c = *eptr; \

499

if (c >= 0xc0) \

500

{ \

501

int gcii; \

502

int gcaa = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */ \

503

int gcss = 6*gcaa; \

504

c = (c & _pcre_utf8_table3[gcaa]) << gcss; \

505

for (gcii = 1; gcii <= gcaa; gcii++) \

506

{ \

507

gcss -= 6; \

508

c |= (eptr[gcii] & 0x3f) << gcss; \

509

} \

510

len += gcaa; \

511

}

558

if (c >= 0xc0) GETUTF8LEN(c, eptr, len);

512

559

513

560

/* Get the next UTF-8 character, testing for UTF-8 mode, not advancing the

514

561

pointer, incrementing length if there are extra bytes. This is called when we

515

know we are in UTF-8 mode. */

562

do not know if we are in UTF-8 mode. */

516

563

517

564

#define GETCHARLENTEST(c, eptr, len) \

518

565

c = *eptr; \

519

if (utf8 && c >= 0xc0) \

520

{ \

521

int gcii; \

522

int gcaa = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */ \

523

int gcss = 6*gcaa; \

524

c = (c & _pcre_utf8_table3[gcaa]) << gcss; \

525

for (gcii = 1; gcii <= gcaa; gcii++) \

526

{ \

527

gcss -= 6; \

528

c |= (eptr[gcii] & 0x3f) << gcss; \

529

} \

530

len += gcaa; \

531

}

566

if (utf8 && c >= 0xc0) GETUTF8LEN(c, eptr, len);

532

567

533

568

/* If the pointer is not at the start of a character, move it back until

534

569

it is. This is called only in UTF-8 mode - we don't put a test within the macro

536

571

537

572

#define BACKCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr--

538

573

539

#endif

574

#endif /* SUPPORT_UTF8 */

540

575

541

576

542

577

/* In case there is no definition of offsetof() provided - though any proper

580

615

581

616

PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \

582

617

PCRE_DUPNAMES|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \

583

PCRE_JAVASCRIPT_COMPAT)

618

PCRE_JAVASCRIPT_COMPAT|PCRE_UCP|PCRE_NO_START_OPTIMIZE)

584

619

585

620

#define PUBLIC_EXEC_OPTIONS \

586

621

875

910

#define STRING_COMMIT0 "COMMIT\0"

876

911

#define STRING_F0 "F\0"

877

912

#define STRING_FAIL0 "FAIL\0"

913

#define STRING_MARK0 "MARK\0"

878

914

#define STRING_PRUNE0 "PRUNE\0"

879

915

#define STRING_SKIP0 "SKIP\0"

880

916

#define STRING_THEN "THEN"

896

932

897

933

#define STRING_DEFINE "DEFINE"

898

934

899

#define STRING_CR_RIGHTPAR "CR)"

900

#define STRING_LF_RIGHTPAR "LF)"

901

#define STRING_CRLF_RIGHTPAR "CRLF)"

902

#define STRING_ANY_RIGHTPAR "ANY)"

903

#define STRING_ANYCRLF_RIGHTPAR "ANYCRLF)"

904

#define STRING_BSR_ANYCRLF_RIGHTPAR "BSR_ANYCRLF)"

905

#define STRING_BSR_UNICODE_RIGHTPAR "BSR_UNICODE)"

906

#define STRING_UTF8_RIGHTPAR "UTF8)"

935

#define STRING_CR_RIGHTPAR "CR)"

936

#define STRING_LF_RIGHTPAR "LF)"

937

#define STRING_CRLF_RIGHTPAR "CRLF)"

938

#define STRING_ANY_RIGHTPAR "ANY)"

939

#define STRING_ANYCRLF_RIGHTPAR "ANYCRLF)"

940

#define STRING_BSR_ANYCRLF_RIGHTPAR "BSR_ANYCRLF)"

941

#define STRING_BSR_UNICODE_RIGHTPAR "BSR_UNICODE)"

942

#define STRING_UTF8_RIGHTPAR "UTF8)"

943

#define STRING_UCP_RIGHTPAR "UCP)"

944

#define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)"

907

945

908

946

#else /* SUPPORT_UTF8 */

909

947

1127

1165

#define STRING_COMMIT0 STR_C STR_O STR_M STR_M STR_I STR_T "\0"

1128

1166

#define STRING_F0 STR_F "\0"

1129

1167

#define STRING_FAIL0 STR_F STR_A STR_I STR_L "\0"

1168

#define STRING_MARK0 STR_M STR_A STR_R STR_K "\0"

1130

1169

#define STRING_PRUNE0 STR_P STR_R STR_U STR_N STR_E "\0"

1131

1170

#define STRING_SKIP0 STR_S STR_K STR_I STR_P "\0"

1132

1171

#define STRING_THEN STR_T STR_H STR_E STR_N

1148

1187

1149

1188

#define STRING_DEFINE STR_D STR_E STR_F STR_I STR_N STR_E

1150

1189

1151

#define STRING_CR_RIGHTPAR STR_C STR_R STR_RIGHT_PARENTHESIS

1152

#define STRING_LF_RIGHTPAR STR_L STR_F STR_RIGHT_PARENTHESIS

1153

#define STRING_CRLF_RIGHTPAR STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS

1154

#define STRING_ANY_RIGHTPAR STR_A STR_N STR_Y STR_RIGHT_PARENTHESIS

1155

#define STRING_ANYCRLF_RIGHTPAR STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS

1156

#define STRING_BSR_ANYCRLF_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS

1157

#define STRING_BSR_UNICODE_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS

1158

#define STRING_UTF8_RIGHTPAR STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS

1190

#define STRING_CR_RIGHTPAR STR_C STR_R STR_RIGHT_PARENTHESIS

1191

#define STRING_LF_RIGHTPAR STR_L STR_F STR_RIGHT_PARENTHESIS

1192

#define STRING_CRLF_RIGHTPAR STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS

1193

#define STRING_ANY_RIGHTPAR STR_A STR_N STR_Y STR_RIGHT_PARENTHESIS

1194

#define STRING_ANYCRLF_RIGHTPAR STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS

1195

#define STRING_BSR_ANYCRLF_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS

1196

#define STRING_BSR_UNICODE_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS

1197

#define STRING_UTF8_RIGHTPAR STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS

1198

#define STRING_UCP_RIGHTPAR STR_U STR_C STR_P STR_RIGHT_PARENTHESIS

1199

#define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS

1159

1200

1160

1201

#endif /* SUPPORT_UTF8 */

1161

1202

1188

1229

1189

1230

#define PT_ANY 0 /* Any property - matches all chars */

1190

1231

#define PT_LAMP 1 /* L& - the union of Lu, Ll, Lt */

1191

#define PT_GC 2 /* General characteristic (e.g. L) */

1192

#define PT_PC 3 /* Particular characteristic (e.g. Lu) */

1232

#define PT_GC 2 /* Specified general characteristic (e.g. L) */

1233

#define PT_PC 3 /* Specified particular characteristic (e.g. Lu) */

1193

1234

#define PT_SC 4 /* Script (e.g. Han) */

1235

#define PT_ALNUM 5 /* Alphanumeric - the union of L and N */

1236

#define PT_SPACE 6 /* Perl space - Z plus 9,10,12,13 */

1237

#define PT_PXSPACE 7 /* POSIX space - Z plus 9,10,11,12,13 */

1238

#define PT_WORD 8 /* Word - L plus N plus underscore */

1194

1239

1195

1240

/* Flag bits and data types for the extended class (OP_XCLASS) for classes that

1196

1241

contain UTF-8 characters with values greater than 255. */

1207

1252

/* These are escaped items that aren't just an encoding of a particular data

1208

1253

value such as \n. They must have non-zero values, as check_escape() returns

1209

1254

their negation. Also, they must appear in the same order as in the opcode

1210

definitions below, up to ESC_z. There's a dummy for OP_ANY because it

1211

corresponds to "." rather than an escape sequence, and another for OP_ALLANY

1212

(which is used for [^] in JavaScript compatibility mode).

1255

definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it

1256

corresponds to "." in DOTALL mode rather than an escape sequence. It is also

1257

used for [^] in JavaScript compatibility mode. In non-DOTALL mode, "." behaves

1258

like \N.

1259

1260

The special values ESC_DU, ESC_du, etc. are used instead of ESC_D, ESC_d, etc.

1261

when PCRE_UCP is set, when replacement of \d etc by \p sequences is required.

1262

They must be contiguous, and remain in order so that the replacements can be

1263

looked up from a table.

1213

1264

1214

1265

The final escape must be ESC_REF as subsequent values are used for

1215

1266

backreferences (\1, \2, \3, etc). There are two tests in the code for an escape

1219

1270

1220

1271

1221

1272

enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,

1222

ESC_W, ESC_w, ESC_dum1, ESC_dum2, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H,

1223

ESC_h, ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z, ESC_E, ESC_Q, ESC_g, ESC_k,

1273

ESC_W, ESC_w, ESC_N, ESC_dum, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H,

1274

ESC_h, ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z,

1275

ESC_E, ESC_Q, ESC_g, ESC_k,

1276

ESC_DU, ESC_du, ESC_SU, ESC_su, ESC_WU, ESC_wu,

1224

1277

ESC_REF };

1225

1278

1226

1227

1279

/* Opcode table: Starting from 1 (i.e. after OP_END), the values up to

1228

1280

OP_EOD must correspond in order to the list of escapes immediately above.

1229

1281

1247

1299

OP_WHITESPACE, /* 9 \s */

1248

1300

OP_NOT_WORDCHAR, /* 10 \W */

1249

1301

OP_WORDCHAR, /* 11 \w */

1250

OP_ANY, /* 12 Match any character (subject to DOTALL) */

1251

OP_ALLANY, /* 13 Match any character (not subject to DOTALL) */

1302

OP_ANY, /* 12 Match any character except newline */

1303

OP_ALLANY, /* 13 Match any character */

1252

1304

OP_ANYBYTE, /* 14 Match any byte (\C); different to OP_ANY for UTF-8 */

1253

1305

OP_NOTPROP, /* 15 \P (not Unicode property) */

1254

1306

OP_PROP, /* 16 \p (Unicode property) */

1378

1430

1379

1431

/* These are backtracking control verbs */

1380

1432

1381

OP_PRUNE, /* 107 */

1382

OP_SKIP, /* 108 */

1383

OP_THEN, /* 109 */

1384

OP_COMMIT, /* 110 */

1433

OP_MARK, /* 107 always has an argument */

1434

OP_PRUNE, /* 108 */

1435

OP_PRUNE_ARG, /* 109 same, but with argument */

1436

OP_SKIP, /* 110 */

1437

OP_SKIP_ARG, /* 111 same, but with argument */

1438

OP_THEN, /* 112 */

1439

OP_THEN_ARG, /* 113 same, but with argument */

1440

OP_COMMIT, /* 114 */

1385

1441

1386

1442

/* These are forced failure and success verbs */

1387

1443

1388

OP_FAIL, /* 111 */

1389

OP_ACCEPT, /* 112 */

1390

OP_CLOSE, /* 113 Used before OP_ACCEPT to close open captures */

1444

OP_FAIL, /* 115 */

1445

OP_ACCEPT, /* 116 */

1446

OP_CLOSE, /* 117 Used before OP_ACCEPT to close open captures */

1391

1447

1392

1448

/* This is used to skip a subpattern with a {0} quantifier */

1393

1449

1394

OP_SKIPZERO, /* 114 */

1450

OP_SKIPZERO, /* 118 */

1395

1451

1396

1452

/* This is not an opcode, but is used to check that tables indexed by opcode

1397

1453

are the correct length, in order to catch updating errors - there have been

1402

1458

1403

1459

/* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro

1404

1460

definitions that follow must also be updated to match. There are also tables

1405

called "coptable" cna "poptable" in pcre_dfa_exec.c that must be updated. */

1461

called "coptable" and "poptable" in pcre_dfa_exec.c that must be updated. */

1406

1462

1407

1463

1408

1464

/* This macro defines textual names for all the opcodes. These are used only

1427

1483

"Once", "Bra", "CBra", "Cond", "SBra", "SCBra", "SCond", \

1428

1484

"Cond ref", "Cond nref", "Cond rec", "Cond nrec", "Cond def", \

1429

1485

"Brazero", "Braminzero", \

1430

"*PRUNE", "*SKIP", "*THEN", "*COMMIT", "*FAIL", "*ACCEPT", \

1486

"*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP", \

1487

"*THEN", "*THEN", "*COMMIT", "*FAIL", "*ACCEPT", \

1431

1488

"Close", "Skip zero"

1432

1489

1433

1490

1493

1550

3, 3, /* RREF, NRREF */ \

1494

1551

1, /* DEF */ \

1495

1552

1, 1, /* BRAZERO, BRAMINZERO */ \

1496

1, 1, 1, 1, /* PRUNE, SKIP, THEN, COMMIT, */ \

1497

1, 1, 3, 1 /* FAIL, ACCEPT, CLOSE, SKIPZERO */

1553

3, 1, 3, /* MARK, PRUNE, PRUNE_ARG */ \

1554

1, 3, /* SKIP, SKIP_ARG */ \

1555

1+LINK_SIZE, 3+LINK_SIZE, /* THEN, THEN_ARG */ \

1556

1, 1, 1, 3, 1 /* COMMIT, FAIL, ACCEPT, CLOSE, SKIPZERO */

1498

1557

1499

1558

1500

1559

/* A magic value for OP_RREF and OP_NRREF to indicate the "any recursion"

1512

1571

ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,

1513

1572

ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,

1514

1573

ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,

1515

ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERRCOUNT };

1574

ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68,

1575

ERRCOUNT };

1516

1576

1517

1577

/* The real format of the start of the pcre block; the index of names and the

1518

1578

code vector run on as long as necessary after the end. We store an explicit

1655

1715

BOOL noteol; /* NOTEOL flag */

1656

1716

BOOL utf8; /* UTF8 flag */

1657

1717

BOOL jscript_compat; /* JAVASCRIPT_COMPAT flag */

1718

BOOL use_ucp; /* PCRE_UCP flag */

1658

1719

BOOL endonly; /* Dollar not before final \n */

1659

1720

BOOL notempty; /* Empty string match not wanted */

1660

1721

BOOL notempty_atstart; /* Empty string match at start not wanted */

1674

1735

int eptrn; /* Next free eptrblock */

1675

1736

recursion_info *recursive; /* Linked list of recursion data */

1676

1737

void *callout_data; /* To pass back to callouts */

1738

const uschar *mark; /* Mark pointer to pass back */

1677

1739

} match_data;

1678

1740

1679

1741

/* A similar structure is used for the same purpose by the DFA matching

Older »