~ubuntu-branches/ubuntu/trusty/pcre3/trusty

Committer: Package Import Robot
Author(s): Mark Baker
Date: 2012-09-13 19:58:45 UTC
mfrom: (23.1.11 sid)
Revision ID: package-import@ubuntu.com-20120913195845-wd12z63sm0b07n59

Tags: 1:8.31-1

http://bugs.debian.org/686495

* New upstream release
* Applied patch from upstream bugzilla #1287 to fix bug where wrong
value is in re_nsub in some cases (Closes: #686495)

files added:
.pc/bug1287

.pc/bug1287/pcreposix.c

NON-AUTOTOOLS-BUILD

cmake/FindEditline.cmake

debian/patches/bug1287

testdata/grepbinary

testdata/grepfilelist

files removed:
.pc/autoconfupdate.patch

.pc/autoconfupdate.patch/config.guess

.pc/autoconfupdate.patch/config.sub

debian/patches/autoconfupdate.patch

files modified:
.pc/PCRE6_compatible_API.patch/pcretest.c

.pc/applied-patches

.pc/pcre_info.patch/Makefile.am

.pc/pcre_info.patch/Makefile.in

.pc/pcregrep.1-patch/doc/pcregrep.1

.pc/soname.patch/configure

CMakeLists.txt

ChangeLog

HACKING

Makefile.am

Makefile.in

NEWS

NON-UNIX-USE

PrepareRelease

README

RunGrepTest

RunTest

config-cmake.h.in

config.guess

config.h.generic

config.h.in

config.sub

configure

configure.ac

debian/changelog

debian/control

debian/patches/series

debian/patches/soname.patch

doc/html/index.html

doc/html/pcre16.html

doc/html/pcre_assign_jit_stack.html

doc/html/pcre_compile.html

doc/html/pcre_compile2.html

doc/html/pcre_jit_stack_alloc.html

doc/html/pcreapi.html

doc/html/pcrebuild.html

doc/html/pcrecompat.html

doc/html/pcrecpp.html

doc/html/pcredemo.html

doc/html/pcregrep.html

doc/html/pcrejit.html

doc/html/pcrelimits.html

doc/html/pcrepartial.html

doc/html/pcrepattern.html

doc/html/pcresyntax.html

doc/html/pcretest.html

doc/html/pcreunicode.html

doc/index.html.src

doc/pcre-config.1

doc/pcre.3

doc/pcre.txt

doc/pcre16.3

doc/pcre_assign_jit_stack.3

doc/pcre_compile.3

doc/pcre_compile2.3

doc/pcre_config.3

doc/pcre_copy_named_substring.3

doc/pcre_copy_substring.3

doc/pcre_dfa_exec.3

doc/pcre_exec.3

doc/pcre_free_study.3

doc/pcre_free_substring.3

doc/pcre_free_substring_list.3

doc/pcre_fullinfo.3

doc/pcre_get_named_substring.3

doc/pcre_get_stringnumber.3

doc/pcre_get_stringtable_entries.3

doc/pcre_get_substring.3

doc/pcre_get_substring_list.3

doc/pcre_jit_stack_alloc.3

doc/pcre_jit_stack_free.3

doc/pcre_maketables.3

doc/pcre_pattern_to_host_byte_order.3

doc/pcre_refcount.3

doc/pcre_study.3

doc/pcre_utf16_to_host_byte_order.3

doc/pcre_version.3

doc/pcreapi.3

doc/pcrebuild.3

doc/pcrecallout.3

doc/pcrecompat.3

doc/pcrecpp.3

doc/pcregrep.1

doc/pcregrep.txt

doc/pcrejit.3

doc/pcrelimits.3

doc/pcrematching.3

doc/pcrepartial.3

doc/pcrepattern.3

doc/pcreperform.3

doc/pcreposix.3

doc/pcreprecompile.3

doc/pcresample.3

doc/pcrestack.3

doc/pcresyntax.3

doc/pcretest.1

doc/pcretest.txt

doc/pcreunicode.3

pcre.h.generic

pcre.h.in

pcre_compile.c

pcre_dfa_exec.c

pcre_exec.c

pcre_fullinfo.c

pcre_internal.h

pcre_jit_compile.c

pcre_jit_test.c

pcre_printint.c

pcre_study.c

pcre_tables.c

pcre_ucd.c

pcredemo.c

pcregrep.c

pcreposix.c

pcretest.c

sljit/sljitConfig.h

sljit/sljitConfigInternal.h

sljit/sljitLir.c

sljit/sljitLir.h

sljit/sljitNativeARM_Thumb2.c

sljit/sljitNativeARM_v5.c

sljit/sljitNativeMIPS_common.c

sljit/sljitNativePPC_common.c

sljit/sljitNativeX86_32.c

sljit/sljitNativeX86_64.c

sljit/sljitNativeX86_common.c

sljit/sljitUtils.c

testdata/grepoutput

testdata/testinput1

testdata/testinput10

testdata/testinput12

testdata/testinput14

testdata/testinput15

testdata/testinput17

testdata/testinput18

testdata/testinput2

testdata/testinput5

testdata/testinput6

testdata/testinput7

testdata/testinput8

testdata/testinput9

testdata/testoutput1

testdata/testoutput10

testdata/testoutput11-16

testdata/testoutput11-8

testdata/testoutput12

testdata/testoutput14

testdata/testoutput15

testdata/testoutput16

testdata/testoutput17

testdata/testoutput18

testdata/testoutput2

testdata/testoutput5

testdata/testoutput6

testdata/testoutput7

testdata/testoutput8

testdata/testoutput9

ucp.h

Show diffs side-by-side

added added

removed removed

pcre_compile.c

489

"too many forward references\0"

490

"disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0"

491

"invalid UTF-16 string\0"

492

/* 75 */

493

"name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"

494

"character value in \\u.... sequence is too large\0"

492

495

;

493

496

494

497

/* Table to identify digits and hex digits. This is used when compiling

829

832

c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));

830

833

#endif

831

834

}

835

836

#ifdef COMPILE_PCRE8

837

if (c > (utf ? 0x10ffff : 0xff))

838

#else

839

#ifdef COMPILE_PCRE16

840

if (c > (utf ? 0x10ffff : 0xffff))

841

#endif

842

#endif

843

{

844

*errorcodeptr = ERR76;

845

}

846

else if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;

832

847

}

833

848

}

834

849

else

2225

2240

{

2226

2241

case OP_CHAR:

2227

2242

case OP_CHARI:

2243

case OP_NOT:

2244

case OP_NOTI:

2228

2245

case OP_EXACT:

2229

2246

case OP_EXACTI:

2247

case OP_NOTEXACT:

2248

case OP_NOTEXACTI:

2230

2249

case OP_UPTO:

2231

2250

case OP_UPTOI:

2251

case OP_NOTUPTO:

2252

case OP_NOTUPTOI:

2232

2253

case OP_MINUPTO:

2233

2254

case OP_MINUPTOI:

2255

case OP_NOTMINUPTO:

2256

case OP_NOTMINUPTOI:

2234

2257

case OP_POSUPTO:

2235

2258

case OP_POSUPTOI:

2259

case OP_NOTPOSUPTO:

2260

case OP_NOTPOSUPTOI:

2236

2261

case OP_STAR:

2237

2262

case OP_STARI:

2263

case OP_NOTSTAR:

2264

case OP_NOTSTARI:

2238

2265

case OP_MINSTAR:

2239

2266

case OP_MINSTARI:

2267

case OP_NOTMINSTAR:

2268

case OP_NOTMINSTARI:

2240

2269

case OP_POSSTAR:

2241

2270

case OP_POSSTARI:

2271

case OP_NOTPOSSTAR:

2272

case OP_NOTPOSSTARI:

2242

2273

case OP_PLUS:

2243

2274

case OP_PLUSI:

2275

case OP_NOTPLUS:

2276

case OP_NOTPLUSI:

2244

2277

case OP_MINPLUS:

2245

2278

case OP_MINPLUSI:

2279

case OP_NOTMINPLUS:

2280

case OP_NOTMINPLUSI:

2246

2281

case OP_POSPLUS:

2247

2282

case OP_POSPLUSI:

2283

case OP_NOTPOSPLUS:

2284

case OP_NOTPOSPLUSI:

2248

2285

case OP_QUERY:

2249

2286

case OP_QUERYI:

2287

case OP_NOTQUERY:

2288

case OP_NOTQUERYI:

2250

2289

case OP_MINQUERY:

2251

2290

case OP_MINQUERYI:

2291

case OP_NOTMINQUERY:

2292

case OP_NOTMINQUERYI:

2252

2293

case OP_POSQUERY:

2253

2294

case OP_POSQUERYI:

2295

case OP_NOTPOSQUERY:

2296

case OP_NOTPOSQUERYI:

2254

2297

if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);

2255

2298

break;

2256

2299

}

3069

3112

#endif /* SUPPORT_UTF */

3070

3113

return (c != TABLE_GET((unsigned int)next, cd->fcc, next)); /* Non-UTF-8 mode */

3071

3114

3072

/* For OP_NOT and OP_NOTI, the data is always a single-byte character. These

3073

opcodes are not used for multi-byte characters, because they are coded using

3074

an XCLASS instead. */

3075

3076

3115

case OP_NOT:

3077

return (c = *previous) == next;

3116

#ifdef SUPPORT_UTF

3117

GETCHARTEST(c, previous);

3118

#else

3119

c = *previous;

3120

#endif

3121

return c == next;

3078

3122

3079

3123

case OP_NOTI:

3080

if ((c = *previous) == next) return TRUE;

3124

#ifdef SUPPORT_UTF

3125

GETCHARTEST(c, previous);

3126

#else

3127

c = *previous;

3128

#endif

3129

if (c == next) return TRUE;

3081

3130

#ifdef SUPPORT_UTF

3082

3131

if (utf)

3083

3132

{

3084

3133

unsigned int othercase;

3085

3134

if (next < 128) othercase = cd->fcc[next]; else

3086

3135

#ifdef SUPPORT_UCP

3087

othercase = UCD_OTHERCASE(next);

3136

othercase = UCD_OTHERCASE((unsigned int)next);

3088

3137

#else

3089

3138

othercase = NOTACHAR;

3090

3139

#endif

3092

3141

}

3093

3142

else

3094

3143

#endif /* SUPPORT_UTF */

3095

return (c == (int)(TABLE_GET((unsigned int)next, cd->fcc, next))); /* Non-UTF-8 mode */

3144

return (c == TABLE_GET((unsigned int)next, cd->fcc, next)); /* Non-UTF-8 mode */

3096

3145

3097

3146

/* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set.

3098

3147

When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */

3099

3148

3100

3149

case OP_DIGIT:

3101

return next > 127 || (cd->ctypes[next] & ctype_digit) == 0;

3150

return next > 255 || (cd->ctypes[next] & ctype_digit) == 0;

3102

3151

3103

3152

case OP_NOT_DIGIT:

3104

return next <= 127 && (cd->ctypes[next] & ctype_digit) != 0;

3153

return next <= 255 && (cd->ctypes[next] & ctype_digit) != 0;

3105

3154

3106

3155

case OP_WHITESPACE:

3107

return next > 127 || (cd->ctypes[next] & ctype_space) == 0;

3156

return next > 255 || (cd->ctypes[next] & ctype_space) == 0;

3108

3157

3109

3158

case OP_NOT_WHITESPACE:

3110

return next <= 127 && (cd->ctypes[next] & ctype_space) != 0;

3159

return next <= 255 && (cd->ctypes[next] & ctype_space) != 0;

3111

3160

3112

3161

case OP_WORDCHAR:

3113

return next > 127 || (cd->ctypes[next] & ctype_word) == 0;

3162

return next > 255 || (cd->ctypes[next] & ctype_word) == 0;

3114

3163

3115

3164

case OP_NOT_WORDCHAR:

3116

return next <= 127 && (cd->ctypes[next] & ctype_word) != 0;

3165

return next <= 255 && (cd->ctypes[next] & ctype_word) != 0;

3117

3166

3118

3167

case OP_HSPACE:

3119

3168

case OP_NOT_HSPACE:

3191

3240

switch(-next)

3192

3241

{

3193

3242

case ESC_d:

3194

return c > 127 || (cd->ctypes[c] & ctype_digit) == 0;

3243

return c > 255 || (cd->ctypes[c] & ctype_digit) == 0;

3195

3244

3196

3245

case ESC_D:

3197

return c <= 127 && (cd->ctypes[c] & ctype_digit) != 0;

3246

return c <= 255 && (cd->ctypes[c] & ctype_digit) != 0;

3198

3247

3199

3248

case ESC_s:

3200

return c > 127 || (cd->ctypes[c] & ctype_space) == 0;

3249

return c > 255 || (cd->ctypes[c] & ctype_space) == 0;

3201

3250

3202

3251

case ESC_S:

3203

return c <= 127 && (cd->ctypes[c] & ctype_space) != 0;

3252

return c <= 255 && (cd->ctypes[c] & ctype_space) != 0;

3204

3253

3205

3254

case ESC_w:

3206

return c > 127 || (cd->ctypes[c] & ctype_word) == 0;

3255

return c > 255 || (cd->ctypes[c] & ctype_word) == 0;

3207

3256

3208

3257

case ESC_W:

3209

return c <= 127 && (cd->ctypes[c] & ctype_word) != 0;

3258

return c <= 255 && (cd->ctypes[c] & ctype_word) != 0;

3210

3259

3211

3260

case ESC_h:

3212

3261

case ESC_H:

3315

3364

return next == -ESC_d;

3316

3365

3317

3366

case OP_WHITESPACE:

3318

return next == -ESC_S || next == -ESC_d || next == -ESC_w || next == -ESC_R;

3367

return next == -ESC_S || next == -ESC_d || next == -ESC_w;

3319

3368

3320

3369

case OP_NOT_WHITESPACE:

3321

return next == -ESC_s || next == -ESC_h || next == -ESC_v;

3370

return next == -ESC_s || next == -ESC_h || next == -ESC_v || next == -ESC_R;

3322

3371

3323

3372

case OP_HSPACE:

3324

3373

return next == -ESC_S || next == -ESC_H || next == -ESC_d ||

4482

4531

LONE_SINGLE_CHARACTER:

4483

4532

4484

4533

/* Only the value of 1 matters for class_single_char. */

4534

4485

4535

if (class_single_char < 2) class_single_char++;

4486

4536

4487

4537

/* If class_charcount is 1, we saw precisely one character. As long as

4488

there were no negated characters >= 128 and there was no use of \p or \P,

4489

in other words, no use of any XCLASS features, we can optimize.

4490

4491

In UTF-8 mode, we can optimize the negative case only if there were no

4492

characters >= 128 because OP_NOT and the related opcodes like OP_NOTSTAR

4493

operate on single-bytes characters only. This is an historical hangover.

4494

Maybe one day we can tidy these opcodes to handle multi-byte characters.

4538

there was no use of \p or \P, in other words, no use of any XCLASS

4539

features, we can optimize.

4495

4540

4496

4541

The optimization throws away the bit map. We turn the item into a

4497

4542

1-character OP_CHAR[I] if it's positive, or OP_NOT[I] if it's negative.

4498

Note that OP_NOT[I] does not support multibyte characters. In the positive

4499

case, it can cause firstchar to be set. Otherwise, there can be no first

4500

char if this item is first, whatever repeat count may follow. In the case

4501

of reqchar, save the previous value for reinstating. */

4543

In the positive case, it can cause firstchar to be set. Otherwise, there

4544

can be no first char if this item is first, whatever repeat count may

4545

follow. In the case of reqchar, save the previous value for reinstating. */

4502

4546

4503

#ifdef SUPPORT_UTF

4504

if (class_single_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET

4505

&& (!utf || !negate_class || c < (MAX_VALUE_FOR_SINGLE_CHAR + 1)))

4506

#else

4507

4547

if (class_single_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)

4508

#endif

4509

4548

{

4510

4549

ptr++;

4511

4550

zeroreqchar = reqchar;

4512

4551

4513

/* The OP_NOT[I] opcodes work on single characters only. */

4514

4515

4552

if (negate_class)

4516

4553

{

4517

4554

if (firstchar == REQ_UNSET) firstchar = REQ_NONE;

4518

4555

zerofirstchar = firstchar;

4519

4556

*code++ = ((options & PCRE_CASELESS) != 0)? OP_NOTI: OP_NOT;

4520

*code++ = c;

4557

#ifdef SUPPORT_UTF

4558

if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR)

4559

code += PRIV(ord2utf)(c, code);

4560

else

4561

#endif

4562

*code++ = c;

4521

4563

goto NOT_CHAR;

4522

4564

}

4523

4565

4775

4817

4776

4818

/* Now handle repetition for the different types of item. */

4777

4819

4778

/* If previous was a character match, abolish the item and generate a

4779

repeat item instead. If a char item has a minumum of more than one, ensure

4780

that it is set in reqchar - it might not be if a sequence such as x{3} is

4781

the first thing in a branch because the x will have gone into firstchar

4782

instead. */

4820

/* If previous was a character or negated character match, abolish the item

4821

and generate a repeat item instead. If a char item has a minimum of more

4822

than one, ensure that it is set in reqchar - it might not be if a sequence

4823

such as x{3} is the first thing in a branch because the x will have gone

4824

into firstchar instead. */

4783

4825

4784

if (*previous == OP_CHAR || *previous == OP_CHARI)

4826

if (*previous == OP_CHAR || *previous == OP_CHARI

4827

|| *previous == OP_NOT || *previous == OP_NOTI)

4785

4828

{

4786

op_type = (*previous == OP_CHAR)? 0 : OP_STARI - OP_STAR;

4829

switch (*previous)

4830

{

4831

default: /* Make compiler happy. */

4832

case OP_CHAR: op_type = OP_STAR - OP_STAR; break;

4833

case OP_CHARI: op_type = OP_STARI - OP_STAR; break;

4834

case OP_NOT: op_type = OP_NOTSTAR - OP_STAR; break;

4835

case OP_NOTI: op_type = OP_NOTSTARI - OP_STAR; break;

4836

}

4787

4837

4788

4838

/* Deal with UTF characters that take up more than one character. It's

4789

4839

easier to write this out separately than try to macrify it. Use c to

4806

4856

with UTF disabled, or for a single character UTF character. */

4807

4857

{

4808

4858

c = code[-1];

4809

if (repeat_min > 1) reqchar = c | req_caseopt | cd->req_varyopt;

4859

if (*previous <= OP_CHARI && repeat_min > 1)

4860

reqchar = c | req_caseopt | cd->req_varyopt;

4810

4861

}

4811

4862

4812

4863

/* If the repetition is unlimited, it pays to see if the next thing on

4825

4876

goto OUTPUT_SINGLE_REPEAT; /* Code shared with single character types */

4826

4877

}

4827

4878

4828

/* If previous was a single negated character ([^a] or similar), we use

4829

one of the special opcodes, replacing it. The code is shared with single-

4830

character repeats by setting opt_type to add a suitable offset into

4831

repeat_type. We can also test for auto-possessification. OP_NOT and OP_NOTI

4832

are currently used only for single-byte chars. */

4833

4834

else if (*previous == OP_NOT || *previous == OP_NOTI)

4835

{

4836

op_type = ((*previous == OP_NOT)? OP_NOTSTAR : OP_NOTSTARI) - OP_STAR;

4837

c = previous[1];

4838

if (!possessive_quantifier &&

4839

repeat_max < 0 &&

4840

check_auto_possessive(previous, utf, ptr + 1, options, cd))

4841

{

4842

repeat_type = 0; /* Force greedy */

4843

possessive_quantifier = TRUE;

4844

}

4845

goto OUTPUT_SINGLE_REPEAT;

4846

}

4847

4848

4879

/* If previous was a character type match (\d or similar), abolish it and

4849

4880

create a suitable repeat item. The code is shared with single-character

4850

4881

repeats by setting op_type to add a suitable offset into repeat_type. Note

5585

5616

arg = ++ptr;

5586

5617

while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;

5587

5618

arglen = (int)(ptr - arg);

5619

if (arglen > (int)MAX_MARK)

5620

{

5621

*errorcodeptr = ERR75;

5622

goto FAILED;

5623

}

5588

5624

}

5589

5625

5590

5626

if (*ptr != CHAR_RIGHT_PARENTHESIS)

6836

6872

/* For the rest (including \X when Unicode properties are supported), we

6837

6873

can obtain the OP value by negating the escape value in the default

6838

6874

situation when PCRE_UCP is not set. When it *is* set, we substitute

6839

Unicode property tests. */

6875

Unicode property tests. Note that \b and \B do a one-character

6876

lookbehind. */

6840

6877

6841

6878

else

6842

6879

{

6880

if ((-c == ESC_b || -c == ESC_B) && cd->max_lookbehind == 0)

6881

cd->max_lookbehind = 1;

6843

6882

#ifdef SUPPORT_UCP

6844

6883

if (-c >= ESC_DU && -c <= ESC_wu)

6845

6884

{

7147

7186

*ptrptr = ptr;

7148

7187

return FALSE;

7149

7188

}

7150

else { PUT(reverse_count, 0, fixed_length); }

7189

else

7190

{

7191

if (fixed_length > cd->max_lookbehind)

7192

cd->max_lookbehind = fixed_length;

7193

PUT(reverse_count, 0, fixed_length);

7194

}

7151

7195

}

7152

7196

}

7153

7197

7817

7861

cd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC((const pcre_uchar *)pattern));

7818

7862

cd->req_varyopt = 0;

7819

7863

cd->assert_depth = 0;

7864

cd->max_lookbehind = 0;

7820

7865

cd->external_options = options;

7821

7866

cd->external_flags = 0;

7822

7867

cd->open_caps = NULL;

7867

7912

re->size = (int)size;

7868

7913

re->options = cd->external_options;

7869

7914

re->flags = cd->external_flags;

7870

re->dummy1 = 0;

7871

7915

re->first_char = 0;

7872

7916

re->req_char = 0;

7873

7917

re->name_table_offset = sizeof(REAL_PCRE) / sizeof(pcre_uchar);

7887

7931

cd->final_bracount = cd->bracount; /* Save for checking forward references */

7888

7932

cd->assert_depth = 0;

7889

7933

cd->bracount = 0;

7934

cd->max_lookbehind = 0;

7890

7935

cd->names_found = 0;

7891

7936

cd->name_table = (pcre_uchar *)re + re->name_table_offset;

7892

7937

codestart = cd->name_table + re->name_entry_size * re->name_count;

7908

7953

&firstchar, &reqchar, NULL, cd, NULL);

7909

7954

re->top_bracket = cd->bracount;

7910

7955

re->top_backref = cd->top_backref;

7956

re->max_lookbehind = cd->max_lookbehind;

7911

7957

re->flags = cd->external_flags | PCRE_MODE;

7912

7958

7913

7959

if (cd->had_accept) reqchar = REQ_NONE; /* Must disable after (*ACCEPT) */

7995

8041

(fixed_length == -4)? ERR70 : ERR25;

7996

8042

break;

7997

8043

}

8044

if (fixed_length > cd->max_lookbehind) cd->max_lookbehind = fixed_length;

7998

8045

PUT(cc, 1, fixed_length);

7999

8046

}

8000

8047

cc += 1 + LINK_SIZE;

Older »