~pythonregexp2.7/python/issue2636 : revision 39036

1

/* String object implementation */

2

3

#define PY_SSIZE_T_CLEAN

4

5

#include "Python.h"

6

#include <ctype.h>

7

8

#ifdef COUNT_ALLOCS

9

int null_strings, one_strings;

10

#endif

11

12

static PyBytesObject *characters[UCHAR_MAX + 1];

13

static PyBytesObject *nullstring;

14

15

/* This dictionary holds all interned strings. Note that references to

16

strings in this dictionary are *not* counted in the string's ob_refcnt.

17

When the interned string reaches a refcnt of 0 the string deallocation

18

function will delete the reference from this dictionary.

19

20

Another way to look at this is that to say that the actual reference

21

count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)

22

*/

23

static PyObject *interned;

24

25

/*

26

For both PyBytes_FromString() and PyBytes_FromStringAndSize(), the

27

parameter `size' denotes number of characters to allocate, not counting any

28

null terminating character.

29

30

For PyBytes_FromString(), the parameter `str' points to a null-terminated

31

string containing exactly `size' bytes.

32

33

For PyBytes_FromStringAndSize(), the parameter the parameter `str' is

34

either NULL or else points to a string containing at least `size' bytes.

35

For PyBytes_FromStringAndSize(), the string in the `str' parameter does

36

not have to be null-terminated. (Therefore it is safe to construct a

37

substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)

38

If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'

39

bytes (setting the last byte to the null terminating character) and you can

40

fill in the data yourself. If `str' is non-NULL then the resulting

41

PyString object must be treated as immutable and you must not fill in nor

42

alter the data yourself, since the strings may be shared.

43

44

The PyObject member `op->ob_size', which denotes the number of "extra

45

items" in a variable-size object, will contain the number of bytes

46

allocated for string data, not counting the null terminating character. It

47

is therefore equal to the equal to the `size' parameter (for

48

PyBytes_FromStringAndSize()) or the length of the string in the `str'

49

parameter (for PyBytes_FromString()).

50

*/

51

PyObject *

52

PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)

53

{

54

register PyBytesObject *op;

55

if (size < 0) {

56

PyErr_SetString(PyExc_SystemError,

57

"Negative size passed to PyBytes_FromStringAndSize");

58

return NULL;

59

}

60

if (size == 0 && (op = nullstring) != NULL) {

61

#ifdef COUNT_ALLOCS

62

null_strings++;

63

#endif

64

Py_INCREF(op);

65

return (PyObject *)op;

66

}

67

if (size == 1 && str != NULL &&

68

(op = characters[*str & UCHAR_MAX]) != NULL)

69

{

70

#ifdef COUNT_ALLOCS

71

one_strings++;

72

#endif

73

Py_INCREF(op);

74

return (PyObject *)op;

75

}

76

77

/* Inline PyObject_NewVar */

78

op = (PyBytesObject *)PyObject_MALLOC(sizeof(PyBytesObject) + size);

79

if (op == NULL)

80

return PyErr_NoMemory();

81

PyObject_INIT_VAR(op, &PyBytes_Type, size);

82

op->ob_shash = -1;

83

op->ob_sstate = SSTATE_NOT_INTERNED;

84

if (str != NULL)

85

Py_MEMCPY(op->ob_sval, str, size);

86

op->ob_sval[size] = '\0';

87

/* share short strings */

88

if (size == 0) {

89

PyObject *t = (PyObject *)op;

90

PyString_InternInPlace(&t);

91

op = (PyBytesObject *)t;

92

nullstring = op;

93

Py_INCREF(op);

94

} else if (size == 1 && str != NULL) {

95

PyObject *t = (PyObject *)op;

96

PyString_InternInPlace(&t);

97

op = (PyBytesObject *)t;

98

characters[*str & UCHAR_MAX] = op;

99

Py_INCREF(op);

100

}

101

return (PyObject *) op;

102

}

103

104

PyObject *

105

PyBytes_FromString(const char *str)

106

{

107

register size_t size;

108

register PyBytesObject *op;

109

110

assert(str != NULL);

111

size = strlen(str);

112

if (size > PY_SSIZE_T_MAX) {

113

PyErr_SetString(PyExc_OverflowError,

114

"string is too long for a Python string");

115

return NULL;

116

}

117

if (size == 0 && (op = nullstring) != NULL) {

118

#ifdef COUNT_ALLOCS

119

null_strings++;

120

#endif

121

Py_INCREF(op);

122

return (PyObject *)op;

123

}

124

if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {

125

#ifdef COUNT_ALLOCS

126

one_strings++;

127

#endif

128

Py_INCREF(op);

129

return (PyObject *)op;

130

}

131

132

/* Inline PyObject_NewVar */

133

op = (PyBytesObject *)PyObject_MALLOC(sizeof(PyBytesObject) + size);

134

if (op == NULL)

135

return PyErr_NoMemory();

136

PyObject_INIT_VAR(op, &PyBytes_Type, size);

137

op->ob_shash = -1;

138

op->ob_sstate = SSTATE_NOT_INTERNED;

139

Py_MEMCPY(op->ob_sval, str, size+1);

140

/* share short strings */

141

if (size == 0) {

142

PyObject *t = (PyObject *)op;

143

PyString_InternInPlace(&t);

144

op = (PyBytesObject *)t;

145

nullstring = op;

146

Py_INCREF(op);

147

} else if (size == 1) {

148

PyObject *t = (PyObject *)op;

149

PyString_InternInPlace(&t);

150

op = (PyBytesObject *)t;

151

characters[*str & UCHAR_MAX] = op;

152

Py_INCREF(op);

153

}

154

return (PyObject *) op;

155

}

156

157

PyObject *

158

PyBytes_FromFormatV(const char *format, va_list vargs)

159

{

160

va_list count;

161

Py_ssize_t n = 0;

162

const char* f;

163

char *s;

164

PyObject* string;

165

166

#ifdef VA_LIST_IS_ARRAY

167

Py_MEMCPY(count, vargs, sizeof(va_list));

168

#else

169

#ifdef __va_copy

170

__va_copy(count, vargs);

171

#else

172

count = vargs;

173

#endif

174

#endif

175

/* step 1: figure out how large a buffer we need */

176

for (f = format; *f; f++) {

177

if (*f == '%') {

178

const char* p = f;

179

while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))

180

;

181

182

/* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since

183

* they don't affect the amount of space we reserve.

184

*/

185

if ((*f == 'l' || *f == 'z') &&

186

(f[1] == 'd' || f[1] == 'u'))

187

++f;

188

189

switch (*f) {

190

case 'c':

191

(void)va_arg(count, int);

192

/* fall through... */

193

case '%':

194

n++;

195

break;

196

case 'd': case 'u': case 'i': case 'x':

197

(void) va_arg(count, int);

198

/* 20 bytes is enough to hold a 64-bit

199

integer. Decimal takes the most space.

200

This isn't enough for octal. */

201

n += 20;

202

break;

203

case 's':

204

s = va_arg(count, char*);

205

n += strlen(s);

206

break;

207

case 'p':

208

(void) va_arg(count, int);

209

/* maximum 64-bit pointer representation:

210

* 0xffffffffffffffff

211

* so 19 characters is enough.

212

* XXX I count 18 -- what's the extra for?

213

*/

214

n += 19;

215

break;

216

default:

217

/* if we stumble upon an unknown

218

formatting code, copy the rest of

219

the format string to the output

220

string. (we cannot just skip the

221

code, since there's no way to know

222

what's in the argument list) */

223

n += strlen(p);

224

goto expand;

225

}

226

} else

227

n++;

228

}

229

expand:

230

/* step 2: fill the buffer */

231

/* Since we've analyzed how much space we need for the worst case,

232

use sprintf directly instead of the slower PyOS_snprintf. */

233

string = PyBytes_FromStringAndSize(NULL, n);

234

if (!string)

235

return NULL;

236

237

s = PyBytes_AsString(string);

238

239

for (f = format; *f; f++) {

240

if (*f == '%') {

241

const char* p = f++;

242

Py_ssize_t i;

243

int longflag = 0;

244

int size_tflag = 0;

245

/* parse the width.precision part (we're only

246

interested in the precision value, if any) */

247

n = 0;

248

while (isdigit(Py_CHARMASK(*f)))

249

n = (n*10) + *f++ - '0';

250

if (*f == '.') {

251

f++;

252

n = 0;

253

while (isdigit(Py_CHARMASK(*f)))

254

n = (n*10) + *f++ - '0';

255

}

256

while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))

257

f++;

258

/* handle the long flag, but only for %ld and %lu.

259

others can be added when necessary. */

260

if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {

261

longflag = 1;

262

++f;

263

}

264

/* handle the size_t flag. */

265

if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {

266

size_tflag = 1;

267

++f;

268

}

269

270

switch (*f) {

271

case 'c':

272

*s++ = va_arg(vargs, int);

273

break;

274

case 'd':

275

if (longflag)

276

sprintf(s, "%ld", va_arg(vargs, long));

277

else if (size_tflag)

278

sprintf(s, "%" PY_FORMAT_SIZE_T "d",

279

va_arg(vargs, Py_ssize_t));

280

else

281

sprintf(s, "%d", va_arg(vargs, int));

282

s += strlen(s);

283

break;

284

case 'u':

285

if (longflag)

286

sprintf(s, "%lu",

287

va_arg(vargs, unsigned long));

288

else if (size_tflag)

289

sprintf(s, "%" PY_FORMAT_SIZE_T "u",

290

va_arg(vargs, size_t));

291

else

292

sprintf(s, "%u",

293

va_arg(vargs, unsigned int));

294

s += strlen(s);

295

break;

296

case 'i':

297

sprintf(s, "%i", va_arg(vargs, int));

298

s += strlen(s);

299

break;

300

case 'x':

301

sprintf(s, "%x", va_arg(vargs, int));

302

s += strlen(s);

303

break;

304

case 's':

305

p = va_arg(vargs, char*);

306

i = strlen(p);

307

if (n > 0 && i > n)

308

i = n;

309

Py_MEMCPY(s, p, i);

310

s += i;

311

break;

312

case 'p':

313

sprintf(s, "%p", va_arg(vargs, void*));

314

/* %p is ill-defined: ensure leading 0x. */

315

if (s[1] == 'X')

316

s[1] = 'x';

317

else if (s[1] != 'x') {

318

memmove(s+2, s, strlen(s)+1);

319

s[0] = '0';

320

s[1] = 'x';

321

}

322

s += strlen(s);

323

break;

324

case '%':

325

*s++ = '%';

326

break;

327

default:

328

strcpy(s, p);

329

s += strlen(s);

330

goto end;

331

}

332

} else

333

*s++ = *f;

334

}

335

336

end:

337

_PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));

338

return string;

339

}

340

341

PyObject *

342

PyBytes_FromFormat(const char *format, ...)

343

{

344

PyObject* ret;

345

va_list vargs;

346

347

#ifdef HAVE_STDARG_PROTOTYPES

348

va_start(vargs, format);

349

#else

350

va_start(vargs);

351

#endif

352

ret = PyBytes_FromFormatV(format, vargs);

353

va_end(vargs);

354

return ret;

355

}

356

357

358

PyObject *PyBytes_Decode(const char *s,

359

Py_ssize_t size,

360

const char *encoding,

361

const char *errors)

362

{

363

PyObject *v, *str;

364

365

str = PyBytes_FromStringAndSize(s, size);

366

if (str == NULL)

367

return NULL;

368

v = PyBytes_AsDecodedString(str, encoding, errors);

369

Py_DECREF(str);

370

return v;

371

}

372

373

PyObject *PyBytes_AsDecodedObject(PyObject *str,

374

const char *encoding,

375

const char *errors)

376

{

377

PyObject *v;

378

379

if (!PyBytes_Check(str)) {

380

PyErr_BadArgument();

381

goto onError;

382

}

383

384

if (encoding == NULL) {

385

#ifdef Py_USING_UNICODE

386

encoding = PyUnicode_GetDefaultEncoding();

387

#else

388

PyErr_SetString(PyExc_ValueError, "no encoding specified");

389

goto onError;

390

#endif

391

}

392

393

/* Decode via the codec registry */

394

v = PyCodec_Decode(str, encoding, errors);

395

if (v == NULL)

396

goto onError;

397

398

return v;

399

400

onError:

401

return NULL;

402

}

403

404

PyObject *PyBytes_AsDecodedString(PyObject *str,

405

const char *encoding,

406

const char *errors)

407

{

408

PyObject *v;

409

410

v = PyBytes_AsDecodedObject(str, encoding, errors);

411

if (v == NULL)

412

goto onError;

413

414

#ifdef Py_USING_UNICODE

415

/* Convert Unicode to a string using the default encoding */

416

if (PyUnicode_Check(v)) {

417

PyObject *temp = v;

418

v = PyUnicode_AsEncodedString(v, NULL, NULL);

419

Py_DECREF(temp);

420

if (v == NULL)

421

goto onError;

422

}

423

#endif

424

if (!PyBytes_Check(v)) {

425

PyErr_Format(PyExc_TypeError,

426

"decoder did not return a string object (type=%.400s)",

427

Py_TYPE(v)->tp_name);

428

Py_DECREF(v);

429

goto onError;

430

}

431

432

return v;

433

434

onError:

435

return NULL;

436

}

437

438

PyObject *PyBytes_Encode(const char *s,

439

Py_ssize_t size,

440

const char *encoding,

441

const char *errors)

442

{

443

PyObject *v, *str;

444

445

str = PyBytes_FromStringAndSize(s, size);

446

if (str == NULL)

447

return NULL;

448

v = PyBytes_AsEncodedString(str, encoding, errors);

449

Py_DECREF(str);

450

return v;

451

}

452

453

PyObject *PyBytes_AsEncodedObject(PyObject *str,

454

const char *encoding,

455

const char *errors)

456

{

457

PyObject *v;

458

459

if (!PyBytes_Check(str)) {

460

PyErr_BadArgument();

461

goto onError;

462

}

463

464

if (encoding == NULL) {

465

#ifdef Py_USING_UNICODE

466

encoding = PyUnicode_GetDefaultEncoding();

467

#else

468

PyErr_SetString(PyExc_ValueError, "no encoding specified");

469

goto onError;

470

#endif

471

}

472

473

/* Encode via the codec registry */

474

v = PyCodec_Encode(str, encoding, errors);

475

if (v == NULL)

476

goto onError;

477

478

return v;

479

480

onError:

481

return NULL;

482

}

483

484

PyObject *PyBytes_AsEncodedString(PyObject *str,

485

const char *encoding,

486

const char *errors)

487

{

488

PyObject *v;

489

490

v = PyBytes_AsEncodedObject(str, encoding, errors);

491

if (v == NULL)

492

goto onError;

493

494

#ifdef Py_USING_UNICODE

495

/* Convert Unicode to a string using the default encoding */

496

if (PyUnicode_Check(v)) {

497

PyObject *temp = v;

498

v = PyUnicode_AsEncodedString(v, NULL, NULL);

499

Py_DECREF(temp);

500

if (v == NULL)

501

goto onError;

502

}

503

#endif

504

if (!PyBytes_Check(v)) {

505

PyErr_Format(PyExc_TypeError,

506

"encoder did not return a string object (type=%.400s)",

507

Py_TYPE(v)->tp_name);

508

Py_DECREF(v);

509

goto onError;

510

}

511

512

return v;

513

514

onError:

515

return NULL;

516

}

517

518

static void

519

string_dealloc(PyObject *op)

520

{

521

switch (PyBytes_CHECK_INTERNED(op)) {

522

case SSTATE_NOT_INTERNED:

523

break;

524

525

case SSTATE_INTERNED_MORTAL:

526

/* revive dead object temporarily for DelItem */

527

Py_REFCNT(op) = 3;

528

if (PyDict_DelItem(interned, op) != 0)

529

Py_FatalError(

530

"deletion of interned string failed");

531

break;

532

533

case SSTATE_INTERNED_IMMORTAL:

534

Py_FatalError("Immortal interned string died.");

535

536

default:

537

Py_FatalError("Inconsistent interned string state.");

538

}

539

Py_TYPE(op)->tp_free(op);

540

}

541

542

/* Unescape a backslash-escaped string. If unicode is non-zero,

543

the string is a u-literal. If recode_encoding is non-zero,

544

the string is UTF-8 encoded and should be re-encoded in the

545

specified encoding. */

546

547

PyObject *PyBytes_DecodeEscape(const char *s,

548

Py_ssize_t len,

549

const char *errors,

550

Py_ssize_t unicode,

551

const char *recode_encoding)

552

{

553

int c;

554

char *p, *buf;

555

const char *end;

556

PyObject *v;

557

Py_ssize_t newlen = recode_encoding ? 4*len:len;

558

v = PyBytes_FromStringAndSize((char *)NULL, newlen);

559

if (v == NULL)

560

return NULL;

561

p = buf = PyBytes_AsString(v);

562

end = s + len;

563

while (s < end) {

564

if (*s != '\\') {

565

non_esc:

566

#ifdef Py_USING_UNICODE

567

if (recode_encoding && (*s & 0x80)) {

568

PyObject *u, *w;

569

char *r;

570

const char* t;

571

Py_ssize_t rn;

572

t = s;

573

/* Decode non-ASCII bytes as UTF-8. */

574

while (t < end && (*t & 0x80)) t++;

575

u = PyUnicode_DecodeUTF8(s, t - s, errors);

576

if(!u) goto failed;

577

578

/* Recode them in target encoding. */

579

w = PyUnicode_AsEncodedString(

580

u, recode_encoding, errors);

581

Py_DECREF(u);

582

if (!w) goto failed;

583

584

/* Append bytes to output buffer. */

585

assert(PyBytes_Check(w));

586

r = PyBytes_AS_STRING(w);

587

rn = PyBytes_GET_SIZE(w);

588

Py_MEMCPY(p, r, rn);

589

p += rn;

590

Py_DECREF(w);

591

s = t;

592

} else {

593

*p++ = *s++;

594

}

595

#else

596

*p++ = *s++;

597

#endif

598

continue;

599

}

600

s++;

601

if (s==end) {

602

PyErr_SetString(PyExc_ValueError,

603

"Trailing \\ in string");

604

goto failed;

605

}

606

switch (*s++) {

607

/* XXX This assumes ASCII! */

608

case '\n': break;

609

case '\\': *p++ = '\\'; break;

610

case '\'': *p++ = '\''; break;

611

case '\"': *p++ = '\"'; break;

612

case 'b': *p++ = '\b'; break;

613

case 'f': *p++ = '\014'; break; /* FF */

614

case 't': *p++ = '\t'; break;

615

case 'n': *p++ = '\n'; break;

616

case 'r': *p++ = '\r'; break;

617

case 'v': *p++ = '\013'; break; /* VT */

618

case 'a': *p++ = '\007'; break; /* BEL, not classic C */

619

case '0': case '1': case '2': case '3':

620

case '4': case '5': case '6': case '7':

621

c = s[-1] - '0';

622

if (s < end && '0' <= *s && *s <= '7') {

623

c = (c<<3) + *s++ - '0';

624

if (s < end && '0' <= *s && *s <= '7')

625

c = (c<<3) + *s++ - '0';

626

}

627

*p++ = c;

628

break;

629

case 'x':

630

if (s+1 < end &&

631

isxdigit(Py_CHARMASK(s[0])) &&

632

isxdigit(Py_CHARMASK(s[1])))

633

{

634

unsigned int x = 0;

635

c = Py_CHARMASK(*s);

636

s++;

637

if (isdigit(c))

638

x = c - '0';

639

else if (islower(c))

640

x = 10 + c - 'a';

641

else

642

x = 10 + c - 'A';

643

x = x << 4;

644

c = Py_CHARMASK(*s);

645

s++;

646

if (isdigit(c))

647

x += c - '0';

648

else if (islower(c))

649

x += 10 + c - 'a';

650

else

651

x += 10 + c - 'A';

652

*p++ = x;

653

break;

654

}

655

if (!errors || strcmp(errors, "strict") == 0) {

656

PyErr_SetString(PyExc_ValueError,

657

"invalid \\x escape");

658

goto failed;

659

}

660

if (strcmp(errors, "replace") == 0) {

661

*p++ = '?';

662

} else if (strcmp(errors, "ignore") == 0)

663

/* do nothing */;

664

else {

665

PyErr_Format(PyExc_ValueError,

666

"decoding error; "

667

"unknown error handling code: %.400s",

668

errors);

669

goto failed;

670

}

671

#ifndef Py_USING_UNICODE

672

case 'u':

673

case 'U':

674

case 'N':

675

if (unicode) {

676

PyErr_SetString(PyExc_ValueError,

677

"Unicode escapes not legal "

678

"when Unicode disabled");

679

goto failed;

680

}

681

#endif

682

default:

683

*p++ = '\\';

684

s--;

685

goto non_esc; /* an arbitry number of unescaped

686

UTF-8 bytes may follow. */

687

}

688

}

689

if (p-buf < newlen)

690

_PyBytes_Resize(&v, p - buf);

691

return v;

692

failed:

693

Py_DECREF(v);

694

return NULL;

695

}

696

697

/* -------------------------------------------------------------------- */

698

/* object api */

699

700

static Py_ssize_t

701

string_getsize(register PyObject *op)

702

{

703

char *s;

704

Py_ssize_t len;

705

if (PyBytes_AsStringAndSize(op, &s, &len))

706

return -1;

707

return len;

708

}

709

710

static /*const*/ char *

711

string_getbuffer(register PyObject *op)

712

{

713

char *s;

714

Py_ssize_t len;

715

if (PyBytes_AsStringAndSize(op, &s, &len))

716

return NULL;

717

return s;

718

}

719

720

Py_ssize_t

721

PyBytes_Size(register PyObject *op)

722

{

723

if (!PyBytes_Check(op))

724

return string_getsize(op);

725

return Py_SIZE(op);

726

}

727

728

/*const*/ char *

729

PyBytes_AsString(register PyObject *op)

730

{

731

if (!PyBytes_Check(op))

732

return string_getbuffer(op);

733

return ((PyBytesObject *)op) -> ob_sval;

734

}

735

736

int

737

PyBytes_AsStringAndSize(register PyObject *obj,

738

register char **s,

739

register Py_ssize_t *len)

740

{

741

if (s == NULL) {

742

PyErr_BadInternalCall();

743

return -1;

744

}

745

746

if (!PyBytes_Check(obj)) {

747

#ifdef Py_USING_UNICODE

748

if (PyUnicode_Check(obj)) {

749

obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);

750

if (obj == NULL)

751

return -1;

752

}

753

else

754

#endif

755

{

756

PyErr_Format(PyExc_TypeError,

757

"expected string or Unicode object, "

758

"%.200s found", Py_TYPE(obj)->tp_name);

759

return -1;

760

}

761

}

762

763

*s = PyBytes_AS_STRING(obj);

764

if (len != NULL)

765

*len = PyBytes_GET_SIZE(obj);

766

else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {

767

PyErr_SetString(PyExc_TypeError,

768

"expected string without null bytes");

769

return -1;

770

}

771

return 0;

772

}

773

774

/* -------------------------------------------------------------------- */

775

/* Methods */

776

777

#include "stringlib/stringdefs.h"

778

#include "stringlib/fastsearch.h"

779

780

#include "stringlib/count.h"

781

#include "stringlib/find.h"

782

#include "stringlib/partition.h"

783

784

#define _Py_InsertThousandsGrouping _PyBytes_InsertThousandsGrouping

785

#include "stringlib/localeutil.h"

786

787

788

789

static int

790

string_print(PyBytesObject *op, FILE *fp, int flags)

791

{

792

Py_ssize_t i, str_len;

793

char c;

794

int quote;

795

796

/* XXX Ought to check for interrupts when writing long strings */

797

if (! PyBytes_CheckExact(op)) {

798

int ret;

799

/* A str subclass may have its own __str__ method. */

800

op = (PyBytesObject *) PyObject_Str((PyObject *)op);

801

if (op == NULL)

802

return -1;

803

ret = string_print(op, fp, flags);

804

Py_DECREF(op);

805

return ret;

806

}

807

if (flags & Py_PRINT_RAW) {

808

char *data = op->ob_sval;

809

Py_ssize_t size = Py_SIZE(op);

810

Py_BEGIN_ALLOW_THREADS

811

while (size > INT_MAX) {

812

/* Very long strings cannot be written atomically.

813

* But don't write exactly INT_MAX bytes at a time

814

* to avoid memory aligment issues.

815

*/

816

const int chunk_size = INT_MAX & ~0x3FFF;

817

fwrite(data, 1, chunk_size, fp);

818

data += chunk_size;

819

size -= chunk_size;

820

}

821

#ifdef __VMS

822

if (size) fwrite(data, (int)size, 1, fp);

823

#else

824

fwrite(data, 1, (int)size, fp);

825

#endif

826

Py_END_ALLOW_THREADS

827

return 0;

828

}

829

830

/* figure out which quote to use; single is preferred */

831

quote = '\'';

832

if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&

833

!memchr(op->ob_sval, '"', Py_SIZE(op)))

834

quote = '"';

835

836

str_len = Py_SIZE(op);

837

Py_BEGIN_ALLOW_THREADS

838

fputc(quote, fp);

839

for (i = 0; i < str_len; i++) {

840

/* Since strings are immutable and the caller should have a

841

reference, accessing the interal buffer should not be an issue

842

with the GIL released. */

843

c = op->ob_sval[i];

844

if (c == quote || c == '\\')

845

fprintf(fp, "\\%c", c);

846

else if (c == '\t')

847

fprintf(fp, "\\t");

848

else if (c == '\n')

849

fprintf(fp, "\\n");

850

else if (c == '\r')

851

fprintf(fp, "\\r");

852

else if (c < ' ' || c >= 0x7f)

853

fprintf(fp, "\\x%02x", c & 0xff);

854

else

855

fputc(c, fp);

856

}

857

fputc(quote, fp);

858

Py_END_ALLOW_THREADS

859

return 0;

860

}

861

862

PyObject *

863

PyBytes_Repr(PyObject *obj, int smartquotes)

864

{

865

register PyBytesObject* op = (PyBytesObject*) obj;

866

size_t newsize = 2 + 4 * Py_SIZE(op);

867

PyObject *v;

868

if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {

869

PyErr_SetString(PyExc_OverflowError,

870

"string is too large to make repr");

871

return NULL;

872

}

873

v = PyBytes_FromStringAndSize((char *)NULL, newsize);

874

if (v == NULL) {

875

return NULL;

876

}

877

else {

878

register Py_ssize_t i;

879

register char c;

880

register char *p;

881

int quote;

882

883

/* figure out which quote to use; single is preferred */

884

quote = '\'';

885

if (smartquotes &&

886

memchr(op->ob_sval, '\'', Py_SIZE(op)) &&

887

!memchr(op->ob_sval, '"', Py_SIZE(op)))

888

quote = '"';

889

890

p = PyBytes_AS_STRING(v);

891

*p++ = quote;

892

for (i = 0; i < Py_SIZE(op); i++) {

893

/* There's at least enough room for a hex escape

894

and a closing quote. */

895

assert(newsize - (p - PyBytes_AS_STRING(v)) >= 5);

896

c = op->ob_sval[i];

897

if (c == quote || c == '\\')

898

*p++ = '\\', *p++ = c;

899

else if (c == '\t')

900

*p++ = '\\', *p++ = 't';

901

else if (c == '\n')

902

*p++ = '\\', *p++ = 'n';

903

else if (c == '\r')

904

*p++ = '\\', *p++ = 'r';

905

else if (c < ' ' || c >= 0x7f) {

906

/* For performance, we don't want to call

907

PyOS_snprintf here (extra layers of

908

function call). */

909

sprintf(p, "\\x%02x", c & 0xff);

910

p += 4;

911

}

912

else

913

*p++ = c;

914

}

915

assert(newsize - (p - PyBytes_AS_STRING(v)) >= 1);

916

*p++ = quote;

917

*p = '\0';

918

_PyBytes_Resize(

919

&v, (p - PyBytes_AS_STRING(v)));

920

return v;

921

}

922

}

923

924

static PyObject *

925

string_repr(PyObject *op)

926

{

927

return PyBytes_Repr(op, 1);

928

}

929

930

static PyObject *

931

string_str(PyObject *s)

932

{

933

assert(PyBytes_Check(s));

934

if (PyBytes_CheckExact(s)) {

935

Py_INCREF(s);

936

return s;

937

}

938

else {

939

/* Subtype -- return genuine string with the same value. */

940

PyBytesObject *t = (PyBytesObject *) s;

941

return PyBytes_FromStringAndSize(t->ob_sval, Py_SIZE(t));

942

}

943

}

944

945

static Py_ssize_t

946

string_length(PyBytesObject *a)

947

{

948

return Py_SIZE(a);

949

}

950

951

static PyObject *

952

string_concat(register PyBytesObject *a, register PyObject *bb)

953

{

954

register Py_ssize_t size;

955

register PyBytesObject *op;

956

if (!PyBytes_Check(bb)) {

957

#ifdef Py_USING_UNICODE

958

if (PyUnicode_Check(bb))

959

return PyUnicode_Concat((PyObject *)a, bb);

960

#endif

961

if (PyByteArray_Check(bb))

962

return PyByteArray_Concat((PyObject *)a, bb);

963

PyErr_Format(PyExc_TypeError,

964

"cannot concatenate 'str' and '%.200s' objects",

965

Py_TYPE(bb)->tp_name);

966

return NULL;

967

}

968

#define b ((PyBytesObject *)bb)

969

/* Optimize cases with empty left or right operand */

970

if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&

971

PyBytes_CheckExact(a) && PyBytes_CheckExact(b)) {

972

if (Py_SIZE(a) == 0) {

973

Py_INCREF(bb);

974

return bb;

975

}

976

Py_INCREF(a);

977

return (PyObject *)a;

978

}

979

size = Py_SIZE(a) + Py_SIZE(b);

980

if (size < 0) {

981

PyErr_SetString(PyExc_OverflowError,

982

"strings are too large to concat");

983

return NULL;

984

}

985

986

/* Inline PyObject_NewVar */

987

op = (PyBytesObject *)PyObject_MALLOC(sizeof(PyBytesObject) + size);

988

if (op == NULL)

989

return PyErr_NoMemory();

990

PyObject_INIT_VAR(op, &PyBytes_Type, size);

991

op->ob_shash = -1;

992

op->ob_sstate = SSTATE_NOT_INTERNED;

993

Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));

994

Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));

995

op->ob_sval[size] = '\0';

996

return (PyObject *) op;

997

#undef b

998

}

999

1000

static PyObject *

1001

string_repeat(register PyBytesObject *a, register Py_ssize_t n)

1002

{

1003

register Py_ssize_t i;

1004

register Py_ssize_t j;

1005

register Py_ssize_t size;

1006

register PyBytesObject *op;

1007

size_t nbytes;

1008

if (n < 0)

1009

n = 0;

1010

/* watch out for overflows: the size can overflow int,

1011

* and the # of bytes needed can overflow size_t

1012

*/

1013

size = Py_SIZE(a) * n;

1014

if (n && size / n != Py_SIZE(a)) {

1015

PyErr_SetString(PyExc_OverflowError,

1016

"repeated string is too long");

1017

return NULL;

1018

}

1019

if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {

1020

Py_INCREF(a);

1021

return (PyObject *)a;

1022

}

1023

nbytes = (size_t)size;

1024

if (nbytes + sizeof(PyBytesObject) <= nbytes) {

1025

PyErr_SetString(PyExc_OverflowError,

1026

"repeated string is too long");

1027

return NULL;

1028

}

1029

op = (PyBytesObject *)

1030

PyObject_MALLOC(sizeof(PyBytesObject) + nbytes);

1031

if (op == NULL)

1032

return PyErr_NoMemory();

1033

PyObject_INIT_VAR(op, &PyBytes_Type, size);

1034

op->ob_shash = -1;

1035

op->ob_sstate = SSTATE_NOT_INTERNED;

1036

op->ob_sval[size] = '\0';

1037

if (Py_SIZE(a) == 1 && n > 0) {

1038

memset(op->ob_sval, a->ob_sval[0] , n);

1039

return (PyObject *) op;

1040

}

1041

i = 0;

1042

if (i < size) {

1043

Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));

1044

i = Py_SIZE(a);

1045

}

1046

while (i < size) {

1047

j = (i <= size-i) ? i : size-i;

1048

Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);

1049

i += j;

1050

}

1051

return (PyObject *) op;

1052

}

1053

1054

/* String slice a[i:j] consists of characters a[i] ... a[j-1] */

1055

1056

static PyObject *

1057

string_slice(register PyBytesObject *a, register Py_ssize_t i,

1058

register Py_ssize_t j)

1059

/* j -- may be negative! */

1060

{

1061

if (i < 0)

1062

i = 0;

1063

if (j < 0)

1064

j = 0; /* Avoid signed/unsigned bug in next line */

1065

if (j > Py_SIZE(a))

1066

j = Py_SIZE(a);

1067

if (i == 0 && j == Py_SIZE(a) && PyBytes_CheckExact(a)) {

1068

/* It's the same as a */

1069

Py_INCREF(a);

1070

return (PyObject *)a;

1071

}

1072

if (j < i)

1073

j = i;

1074

return PyBytes_FromStringAndSize(a->ob_sval + i, j-i);

1075

}

1076

1077

static int

1078

string_contains(PyObject *str_obj, PyObject *sub_obj)

1079

{

1080

if (!PyBytes_CheckExact(sub_obj)) {

1081

#ifdef Py_USING_UNICODE

1082

if (PyUnicode_Check(sub_obj))

1083

return PyUnicode_Contains(str_obj, sub_obj);

1084

#endif

1085

if (!PyBytes_Check(sub_obj)) {

1086

PyErr_Format(PyExc_TypeError,

1087

"'in <string>' requires string as left operand, "

1088

"not %.200s", Py_TYPE(sub_obj)->tp_name);

1089

return -1;

1090

}

1091

}

1092

1093

return stringlib_contains_obj(str_obj, sub_obj);

1094

}

1095

1096

static PyObject *

1097

string_item(PyBytesObject *a, register Py_ssize_t i)

1098

{

1099

char pchar;

1100

PyObject *v;

1101

if (i < 0 || i >= Py_SIZE(a)) {

1102

PyErr_SetString(PyExc_IndexError, "string index out of range");

1103

return NULL;

1104

}

1105

pchar = a->ob_sval[i];

1106

v = (PyObject *)characters[pchar & UCHAR_MAX];

1107

if (v == NULL)

1108

v = PyBytes_FromStringAndSize(&pchar, 1);

1109

else {

1110

#ifdef COUNT_ALLOCS

1111

one_strings++;

1112

#endif

1113

Py_INCREF(v);

1114

}

1115

return v;

1116

}

1117

1118

static PyObject*

1119

string_richcompare(PyBytesObject *a, PyBytesObject *b, int op)

1120

{

1121

int c;

1122

Py_ssize_t len_a, len_b;

1123

Py_ssize_t min_len;

1124

PyObject *result;

1125

1126

/* Make sure both arguments are strings. */

1127

if (!(PyBytes_Check(a) && PyBytes_Check(b))) {

1128

result = Py_NotImplemented;

1129

goto out;

1130

}

1131

if (a == b) {

1132

switch (op) {

1133

case Py_EQ:case Py_LE:case Py_GE:

1134

result = Py_True;

1135

goto out;

1136

case Py_NE:case Py_LT:case Py_GT:

1137

result = Py_False;

1138

goto out;

1139

}

1140

}

1141

if (op == Py_EQ) {

1142

/* Supporting Py_NE here as well does not save

1143

much time, since Py_NE is rarely used. */

1144

if (Py_SIZE(a) == Py_SIZE(b)

1145

&& (a->ob_sval[0] == b->ob_sval[0]

1146

&& memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {

1147

result = Py_True;

1148

} else {

1149

result = Py_False;

1150

}

1151

goto out;

1152

}

1153

len_a = Py_SIZE(a); len_b = Py_SIZE(b);

1154

min_len = (len_a < len_b) ? len_a : len_b;

1155

if (min_len > 0) {

1156

c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);

1157

if (c==0)

1158

c = memcmp(a->ob_sval, b->ob_sval, min_len);

1159

} else

1160

c = 0;

1161

if (c == 0)

1162

c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;

1163

switch (op) {

1164

case Py_LT: c = c < 0; break;

1165

case Py_LE: c = c <= 0; break;

1166

case Py_EQ: assert(0); break; /* unreachable */

1167

case Py_NE: c = c != 0; break;

1168

case Py_GT: c = c > 0; break;

1169

case Py_GE: c = c >= 0; break;

1170

default:

1171

result = Py_NotImplemented;

1172

goto out;

1173

}

1174

result = c ? Py_True : Py_False;

1175

out:

1176

Py_INCREF(result);

1177

return result;

1178

}

1179

1180

int

1181

_PyBytes_Eq(PyObject *o1, PyObject *o2)

1182

{

1183

PyBytesObject *a = (PyBytesObject*) o1;

1184

PyBytesObject *b = (PyBytesObject*) o2;

1185

return Py_SIZE(a) == Py_SIZE(b)

1186

&& *a->ob_sval == *b->ob_sval

1187

&& memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;

1188

}

1189

1190

static long

1191

string_hash(PyBytesObject *a)

1192

{

1193

register Py_ssize_t len;

1194

register unsigned char *p;

1195

register long x;

1196

1197

if (a->ob_shash != -1)

1198

return a->ob_shash;

1199

len = Py_SIZE(a);

1200

p = (unsigned char *) a->ob_sval;

1201

x = *p << 7;

1202

while (--len >= 0)

1203

x = (1000003*x) ^ *p++;

1204

x ^= Py_SIZE(a);

1205

if (x == -1)

1206

x = -2;

1207

a->ob_shash = x;

1208

return x;

1209

}

1210

1211

static PyObject*

1212

string_subscript(PyBytesObject* self, PyObject* item)

1213

{

1214

if (PyIndex_Check(item)) {

1215

Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);

1216

if (i == -1 && PyErr_Occurred())

1217

return NULL;

1218

if (i < 0)

1219

i += PyBytes_GET_SIZE(self);

1220

return string_item(self, i);

1221

}

1222

else if (PySlice_Check(item)) {

1223

Py_ssize_t start, stop, step, slicelength, cur, i;

1224

char* source_buf;

1225

char* result_buf;

1226

PyObject* result;

1227

1228

if (PySlice_GetIndicesEx((PySliceObject*)item,

1229

PyBytes_GET_SIZE(self),

1230

&start, &stop, &step, &slicelength) < 0) {

1231

return NULL;

1232

}

1233

1234

if (slicelength <= 0) {

1235

return PyBytes_FromStringAndSize("", 0);

1236

}

1237

else if (start == 0 && step == 1 &&

1238

slicelength == PyBytes_GET_SIZE(self) &&

1239

PyBytes_CheckExact(self)) {

1240

Py_INCREF(self);

1241

return (PyObject *)self;

1242

}

1243

else if (step == 1) {

1244

return PyBytes_FromStringAndSize(

1245

PyBytes_AS_STRING(self) + start,

1246

slicelength);

1247

}

1248

else {

1249

source_buf = PyBytes_AsString((PyObject*)self);

1250

result_buf = (char *)PyMem_Malloc(slicelength);

1251

if (result_buf == NULL)

1252

return PyErr_NoMemory();

1253

1254

for (cur = start, i = 0; i < slicelength;

1255

cur += step, i++) {

1256

result_buf[i] = source_buf[cur];

1257

}

1258

1259

result = PyBytes_FromStringAndSize(result_buf,

1260

slicelength);

1261

PyMem_Free(result_buf);

1262

return result;

1263

}

1264

}

1265

else {

1266

PyErr_Format(PyExc_TypeError,

1267

"string indices must be integers, not %.200s",

1268

Py_TYPE(item)->tp_name);

1269

return NULL;

1270

}

1271

}

1272

1273

static Py_ssize_t

1274

string_buffer_getreadbuf(PyBytesObject *self, Py_ssize_t index, const void **ptr)

1275

{

1276

if ( index != 0 ) {

1277

PyErr_SetString(PyExc_SystemError,

1278

"accessing non-existent string segment");

1279

return -1;

1280

}

1281

*ptr = (void *)self->ob_sval;

1282

return Py_SIZE(self);

1283

}

1284

1285

static Py_ssize_t

1286

string_buffer_getwritebuf(PyBytesObject *self, Py_ssize_t index, const void **ptr)

1287

{

1288

PyErr_SetString(PyExc_TypeError,

1289

"Cannot use string as modifiable buffer");

1290

return -1;

1291

}

1292

1293

static Py_ssize_t

1294

string_buffer_getsegcount(PyBytesObject *self, Py_ssize_t *lenp)

1295

{

1296

if ( lenp )

1297

*lenp = Py_SIZE(self);

1298

return 1;

1299

}

1300

1301

static Py_ssize_t

1302

string_buffer_getcharbuf(PyBytesObject *self, Py_ssize_t index, const char **ptr)

1303

{

1304

if ( index != 0 ) {

1305

PyErr_SetString(PyExc_SystemError,

1306

"accessing non-existent string segment");

1307

return -1;

1308

}

1309

*ptr = self->ob_sval;

1310

return Py_SIZE(self);

1311

}

1312

1313

static int

1314

string_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)

1315

{

1316

return PyBuffer_FillInfo(view, (void *)self->ob_sval, Py_SIZE(self),

1317

0, flags);

1318

}

1319

1320

static PySequenceMethods string_as_sequence = {

1321

(lenfunc)string_length, /*sq_length*/

1322

(binaryfunc)string_concat, /*sq_concat*/

1323

(ssizeargfunc)string_repeat, /*sq_repeat*/

1324

(ssizeargfunc)string_item, /*sq_item*/

1325

(ssizessizeargfunc)string_slice, /*sq_slice*/

1326

0, /*sq_ass_item*/

1327

0, /*sq_ass_slice*/

1328

(objobjproc)string_contains /*sq_contains*/

1329

};

1330

1331

static PyMappingMethods string_as_mapping = {

1332

(lenfunc)string_length,

1333

(binaryfunc)string_subscript,

1334

0,

1335

};

1336

1337

static PyBufferProcs string_as_buffer = {

1338

(readbufferproc)string_buffer_getreadbuf,

1339

(writebufferproc)string_buffer_getwritebuf,

1340

(segcountproc)string_buffer_getsegcount,

1341

(charbufferproc)string_buffer_getcharbuf,

1342

(getbufferproc)string_buffer_getbuffer,

1343

0, /* XXX */

1344

};

1345

1346

1347

1348

#define LEFTSTRIP 0

1349

#define RIGHTSTRIP 1

1350

#define BOTHSTRIP 2

1351

1352

/* Arrays indexed by above */

1353

static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};

1354

1355

#define STRIPNAME(i) (stripformat[i]+3)

1356

1357

1358

/* Don't call if length < 2 */

1359

#define Py_STRING_MATCH(target, offset, pattern, length) \

1360

(target[offset] == pattern[0] && \

1361

target[offset+length-1] == pattern[length-1] && \

1362

!memcmp(target+offset+1, pattern+1, length-2) )

1363

1364

1365

/* Overallocate the initial list to reduce the number of reallocs for small

1366

split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three

1367

resizes, to sizes 4, 8, then 16. Most observed string splits are for human

1368

text (roughly 11 words per line) and field delimited data (usually 1-10

1369

fields). For large strings the split algorithms are bandwidth limited

1370

so increasing the preallocation likely will not improve things.*/

1371

1372

#define MAX_PREALLOC 12

1373

1374

/* 5 splits gives 6 elements */

1375

#define PREALLOC_SIZE(maxsplit) \

1376

(maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)

1377

1378

#define SPLIT_APPEND(data, left, right) \

1379

str = PyBytes_FromStringAndSize((data) + (left), \

1380

(right) - (left)); \

1381

if (str == NULL) \

1382

goto onError; \

1383

if (PyList_Append(list, str)) { \

1384

Py_DECREF(str); \

1385

goto onError; \

1386

} \

1387

else \

1388

Py_DECREF(str);

1389

1390

#define SPLIT_ADD(data, left, right) { \

1391

str = PyBytes_FromStringAndSize((data) + (left), \

1392

(right) - (left)); \

1393

if (str == NULL) \

1394

goto onError; \

1395

if (count < MAX_PREALLOC) { \

1396

PyList_SET_ITEM(list, count, str); \

1397

} else { \

1398

if (PyList_Append(list, str)) { \

1399

Py_DECREF(str); \

1400

goto onError; \

1401

} \

1402

else \

1403

Py_DECREF(str); \

1404

} \

1405

count++; }

1406

1407

/* Always force the list to the expected size. */

1408

#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count

1409

1410

#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }

1411

#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }

1412

#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }

1413

#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }

1414

1415

Py_LOCAL_INLINE(PyObject *)

1416

split_whitespace(PyBytesObject *self, Py_ssize_t len, Py_ssize_t maxsplit)

1417

{

1418

const char *s = PyBytes_AS_STRING(self);

1419

Py_ssize_t i, j, count=0;

1420

PyObject *str;

1421

PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));

1422

1423

if (list == NULL)

1424

return NULL;

1425

1426

i = j = 0;

1427

1428

while (maxsplit-- > 0) {

1429

SKIP_SPACE(s, i, len);

1430

if (i==len) break;

1431

j = i; i++;

1432

SKIP_NONSPACE(s, i, len);

1433

if (j == 0 && i == len && PyBytes_CheckExact(self)) {

1434

/* No whitespace in self, so just use it as list[0] */

1435

Py_INCREF(self);

1436

PyList_SET_ITEM(list, 0, (PyObject *)self);

1437

count++;

1438

break;

1439

}

1440

SPLIT_ADD(s, j, i);

1441

}

1442

1443

if (i < len) {

1444

/* Only occurs when maxsplit was reached */

1445

/* Skip any remaining whitespace and copy to end of string */

1446

SKIP_SPACE(s, i, len);

1447

if (i != len)

1448

SPLIT_ADD(s, i, len);

1449

}

1450

FIX_PREALLOC_SIZE(list);

1451

return list;

1452

onError:

1453

Py_DECREF(list);

1454

return NULL;

1455

}

1456

1457

Py_LOCAL_INLINE(PyObject *)

1458

split_char(PyBytesObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)

1459

{

1460

const char *s = PyBytes_AS_STRING(self);

1461

register Py_ssize_t i, j, count=0;

1462

PyObject *str;

1463

PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));

1464

1465

if (list == NULL)

1466

return NULL;

1467

1468

i = j = 0;

1469

while ((j < len) && (maxcount-- > 0)) {

1470

for(; j<len; j++) {

1471

/* I found that using memchr makes no difference */

1472

if (s[j] == ch) {

1473

SPLIT_ADD(s, i, j);

1474

i = j = j + 1;

1475

break;

1476

}

1477

}

1478

}

1479

if (i == 0 && count == 0 && PyBytes_CheckExact(self)) {

1480

/* ch not in self, so just use self as list[0] */

1481

Py_INCREF(self);

1482

PyList_SET_ITEM(list, 0, (PyObject *)self);

1483

count++;

1484

}

1485

else if (i <= len) {

1486

SPLIT_ADD(s, i, len);

1487

}

1488

FIX_PREALLOC_SIZE(list);

1489

return list;

1490

1491

onError:

1492

Py_DECREF(list);

1493

return NULL;

1494

}

1495

1496

PyDoc_STRVAR(split__doc__,

1497

"S.split([sep [,maxsplit]]) -> list of strings\n\

1498

\n\

1499

Return a list of the words in the string S, using sep as the\n\

1500

delimiter string. If maxsplit is given, at most maxsplit\n\

1501

splits are done. If sep is not specified or is None, any\n\

1502

whitespace string is a separator and empty strings are removed\n\

1503

from the result.");

1504

1505

static PyObject *

1506

string_split(PyBytesObject *self, PyObject *args)

1507

{

1508

Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;

1509

Py_ssize_t maxsplit = -1, count=0;

1510

const char *s = PyBytes_AS_STRING(self), *sub;

1511

PyObject *list, *str, *subobj = Py_None;

1512

#ifdef USE_FAST

1513

Py_ssize_t pos;

1514

#endif

1515

1516

if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))

1517

return NULL;

1518

if (maxsplit < 0)

1519

maxsplit = PY_SSIZE_T_MAX;

1520

if (subobj == Py_None)

1521

return split_whitespace(self, len, maxsplit);

1522

if (PyBytes_Check(subobj)) {

1523

sub = PyBytes_AS_STRING(subobj);

1524

n = PyBytes_GET_SIZE(subobj);

1525

}

1526

#ifdef Py_USING_UNICODE

1527

else if (PyUnicode_Check(subobj))

1528

return PyUnicode_Split((PyObject *)self, subobj, maxsplit);

1529

#endif

1530

else if (PyObject_AsCharBuffer(subobj, &sub, &n))

1531

return NULL;

1532

1533

if (n == 0) {

1534

PyErr_SetString(PyExc_ValueError, "empty separator");

1535

return NULL;

1536

}

1537

else if (n == 1)

1538

return split_char(self, len, sub[0], maxsplit);

1539

1540

list = PyList_New(PREALLOC_SIZE(maxsplit));

1541

if (list == NULL)

1542

return NULL;

1543

1544

#ifdef USE_FAST

1545

i = j = 0;

1546

while (maxsplit-- > 0) {

1547

pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);

1548

if (pos < 0)

1549

break;

1550

j = i+pos;

1551

SPLIT_ADD(s, i, j);

1552

i = j + n;

1553

}

1554

#else

1555

i = j = 0;

1556

while ((j+n <= len) && (maxsplit-- > 0)) {

1557

for (; j+n <= len; j++) {

1558

if (Py_STRING_MATCH(s, j, sub, n)) {

1559

SPLIT_ADD(s, i, j);

1560

i = j = j + n;

1561

break;

1562

}

1563

}

1564

}

1565

#endif

1566

SPLIT_ADD(s, i, len);

1567

FIX_PREALLOC_SIZE(list);

1568

return list;

1569

1570

onError:

1571

Py_DECREF(list);

1572

return NULL;

1573

}

1574

1575

PyDoc_STRVAR(partition__doc__,

1576

"S.partition(sep) -> (head, sep, tail)\n\

1577

\n\

1578

Searches for the separator sep in S, and returns the part before it,\n\

1579

the separator itself, and the part after it. If the separator is not\n\

1580

found, returns S and two empty strings.");

1581

1582

static PyObject *

1583

string_partition(PyBytesObject *self, PyObject *sep_obj)

1584

{

1585

const char *sep;

1586

Py_ssize_t sep_len;

1587

1588

if (PyBytes_Check(sep_obj)) {

1589

sep = PyBytes_AS_STRING(sep_obj);

1590

sep_len = PyBytes_GET_SIZE(sep_obj);

1591

}

1592

#ifdef Py_USING_UNICODE

1593

else if (PyUnicode_Check(sep_obj))

1594

return PyUnicode_Partition((PyObject *) self, sep_obj);

1595

#endif

1596

else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))

1597

return NULL;

1598

1599

return stringlib_partition(

1600

(PyObject*) self,

1601

PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),

1602

sep_obj, sep, sep_len

1603

);

1604

}

1605

1606

PyDoc_STRVAR(rpartition__doc__,

1607

"S.rpartition(sep) -> (tail, sep, head)\n\

1608

\n\

1609

Searches for the separator sep in S, starting at the end of S, and returns\n\

1610

the part before it, the separator itself, and the part after it. If the\n\

1611

separator is not found, returns two empty strings and S.");

1612

1613

static PyObject *

1614

string_rpartition(PyBytesObject *self, PyObject *sep_obj)

1615

{

1616

const char *sep;

1617

Py_ssize_t sep_len;

1618

1619

if (PyBytes_Check(sep_obj)) {

1620

sep = PyBytes_AS_STRING(sep_obj);

1621

sep_len = PyBytes_GET_SIZE(sep_obj);

1622

}

1623

#ifdef Py_USING_UNICODE

1624

else if (PyUnicode_Check(sep_obj))

1625

return PyUnicode_Partition((PyObject *) self, sep_obj);

1626

#endif

1627

else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))

1628

return NULL;

1629

1630

return stringlib_rpartition(

1631

(PyObject*) self,

1632

PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),

1633

sep_obj, sep, sep_len

1634

);

1635

}

1636

1637

Py_LOCAL_INLINE(PyObject *)

1638

rsplit_whitespace(PyBytesObject *self, Py_ssize_t len, Py_ssize_t maxsplit)

1639

{

1640

const char *s = PyBytes_AS_STRING(self);

1641

Py_ssize_t i, j, count=0;

1642

PyObject *str;

1643

PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));

1644

1645

if (list == NULL)

1646

return NULL;

1647

1648

i = j = len-1;

1649

1650

while (maxsplit-- > 0) {

1651

RSKIP_SPACE(s, i);

1652

if (i<0) break;

1653

j = i; i--;

1654

RSKIP_NONSPACE(s, i);

1655

if (j == len-1 && i < 0 && PyBytes_CheckExact(self)) {

1656

/* No whitespace in self, so just use it as list[0] */

1657

Py_INCREF(self);

1658

PyList_SET_ITEM(list, 0, (PyObject *)self);

1659

count++;

1660

break;

1661

}

1662

SPLIT_ADD(s, i + 1, j + 1);

1663

}

1664

if (i >= 0) {

1665

/* Only occurs when maxsplit was reached */

1666

/* Skip any remaining whitespace and copy to beginning of string */

1667

RSKIP_SPACE(s, i);

1668

if (i >= 0)

1669

SPLIT_ADD(s, 0, i + 1);

1670

1671

}

1672

FIX_PREALLOC_SIZE(list);

1673

if (PyList_Reverse(list) < 0)

1674

goto onError;

1675

return list;

1676

onError:

1677

Py_DECREF(list);

1678

return NULL;

1679

}

1680

1681

Py_LOCAL_INLINE(PyObject *)

1682

rsplit_char(PyBytesObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)

1683

{

1684

const char *s = PyBytes_AS_STRING(self);

1685

register Py_ssize_t i, j, count=0;

1686

PyObject *str;

1687

PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));

1688

1689

if (list == NULL)

1690

return NULL;

1691

1692

i = j = len - 1;

1693

while ((i >= 0) && (maxcount-- > 0)) {

1694

for (; i >= 0; i--) {

1695

if (s[i] == ch) {

1696

SPLIT_ADD(s, i + 1, j + 1);

1697

j = i = i - 1;

1698

break;

1699

}

1700

}

1701

}

1702

if (i < 0 && count == 0 && PyBytes_CheckExact(self)) {

1703

/* ch not in self, so just use self as list[0] */

1704

Py_INCREF(self);

1705

PyList_SET_ITEM(list, 0, (PyObject *)self);

1706

count++;

1707

}

1708

else if (j >= -1) {

1709

SPLIT_ADD(s, 0, j + 1);

1710

}

1711

FIX_PREALLOC_SIZE(list);

1712

if (PyList_Reverse(list) < 0)

1713

goto onError;

1714

return list;

1715

1716

onError:

1717

Py_DECREF(list);

1718

return NULL;

1719

}

1720

1721

PyDoc_STRVAR(rsplit__doc__,

1722

"S.rsplit([sep [,maxsplit]]) -> list of strings\n\

1723

\n\

1724

Return a list of the words in the string S, using sep as the\n\

1725

delimiter string, starting at the end of the string and working\n\

1726

to the front. If maxsplit is given, at most maxsplit splits are\n\

1727

done. If sep is not specified or is None, any whitespace string\n\

1728

is a separator.");

1729

1730

static PyObject *

1731

string_rsplit(PyBytesObject *self, PyObject *args)

1732

{

1733

Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;

1734

Py_ssize_t maxsplit = -1, count=0;

1735

const char *s, *sub;

1736

PyObject *list, *str, *subobj = Py_None;

1737

1738

if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))

1739

return NULL;

1740

if (maxsplit < 0)

1741

maxsplit = PY_SSIZE_T_MAX;

1742

if (subobj == Py_None)

1743

return rsplit_whitespace(self, len, maxsplit);

1744

if (PyBytes_Check(subobj)) {

1745

sub = PyBytes_AS_STRING(subobj);

1746

n = PyBytes_GET_SIZE(subobj);

1747

}

1748

#ifdef Py_USING_UNICODE

1749

else if (PyUnicode_Check(subobj))

1750

return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);

1751

#endif

1752

else if (PyObject_AsCharBuffer(subobj, &sub, &n))

1753

return NULL;

1754

1755

if (n == 0) {

1756

PyErr_SetString(PyExc_ValueError, "empty separator");

1757

return NULL;

1758

}

1759

else if (n == 1)

1760

return rsplit_char(self, len, sub[0], maxsplit);

1761

1762

list = PyList_New(PREALLOC_SIZE(maxsplit));

1763

if (list == NULL)

1764

return NULL;

1765

1766

j = len;

1767

i = j - n;

1768

1769

s = PyBytes_AS_STRING(self);

1770

while ( (i >= 0) && (maxsplit-- > 0) ) {

1771

for (; i>=0; i--) {

1772

if (Py_STRING_MATCH(s, i, sub, n)) {

1773

SPLIT_ADD(s, i + n, j);

1774

j = i;

1775

i -= n;

1776

break;

1777

}

1778

}

1779

}

1780

SPLIT_ADD(s, 0, j);

1781

FIX_PREALLOC_SIZE(list);

1782

if (PyList_Reverse(list) < 0)

1783

goto onError;

1784

return list;

1785

1786

onError:

1787

Py_DECREF(list);

1788

return NULL;

1789

}

1790

1791

1792

PyDoc_STRVAR(join__doc__,

1793

"S.join(sequence) -> string\n\

1794

\n\

1795

Return a string which is the concatenation of the strings in the\n\

1796

sequence. The separator between elements is S.");

1797

1798

static PyObject *

1799

string_join(PyBytesObject *self, PyObject *orig)

1800

{

1801

char *sep = PyBytes_AS_STRING(self);

1802

const Py_ssize_t seplen = PyBytes_GET_SIZE(self);

1803

PyObject *res = NULL;

1804

char *p;

1805

Py_ssize_t seqlen = 0;

1806

size_t sz = 0;

1807

Py_ssize_t i;

1808

PyObject *seq, *item;

1809

1810

seq = PySequence_Fast(orig, "");

1811

if (seq == NULL) {

1812

return NULL;

1813

}

1814

1815

seqlen = PySequence_Size(seq);

1816

if (seqlen == 0) {

1817

Py_DECREF(seq);

1818

return PyBytes_FromString("");

1819

}

1820

if (seqlen == 1) {

1821

item = PySequence_Fast_GET_ITEM(seq, 0);

1822

if (PyBytes_CheckExact(item) || PyUnicode_CheckExact(item)) {

1823

Py_INCREF(item);

1824

Py_DECREF(seq);

1825

return item;

1826

}

1827

}

1828

1829

/* There are at least two things to join, or else we have a subclass

1830

* of the builtin types in the sequence.

1831

* Do a pre-pass to figure out the total amount of space we'll

1832

* need (sz), see whether any argument is absurd, and defer to

1833

* the Unicode join if appropriate.

1834

*/

1835

for (i = 0; i < seqlen; i++) {

1836

const size_t old_sz = sz;

1837

item = PySequence_Fast_GET_ITEM(seq, i);

1838

if (!PyBytes_Check(item)){

1839

#ifdef Py_USING_UNICODE

1840

if (PyUnicode_Check(item)) {

1841

/* Defer to Unicode join.

1842

* CAUTION: There's no gurantee that the

1843

* original sequence can be iterated over

1844

* again, so we must pass seq here.

1845

*/

1846

PyObject *result;

1847

result = PyUnicode_Join((PyObject *)self, seq);

1848

Py_DECREF(seq);

1849

return result;

1850

}

1851

#endif

1852

PyErr_Format(PyExc_TypeError,

1853

"sequence item %zd: expected string,"

1854

" %.80s found",

1855

i, Py_TYPE(item)->tp_name);

1856

Py_DECREF(seq);

1857

return NULL;

1858

}

1859

sz += PyBytes_GET_SIZE(item);

1860

if (i != 0)

1861

sz += seplen;

1862

if (sz < old_sz || sz > PY_SSIZE_T_MAX) {

1863

PyErr_SetString(PyExc_OverflowError,

1864

"join() result is too long for a Python string");

1865

Py_DECREF(seq);

1866

return NULL;

1867

}

1868

}

1869

1870

/* Allocate result space. */

1871

res = PyBytes_FromStringAndSize((char*)NULL, sz);

1872

if (res == NULL) {

1873

Py_DECREF(seq);

1874

return NULL;

1875

}

1876

1877

/* Catenate everything. */

1878

p = PyBytes_AS_STRING(res);

1879

for (i = 0; i < seqlen; ++i) {

1880

size_t n;

1881

item = PySequence_Fast_GET_ITEM(seq, i);

1882

n = PyBytes_GET_SIZE(item);

1883

Py_MEMCPY(p, PyBytes_AS_STRING(item), n);

1884

p += n;

1885

if (i < seqlen - 1) {

1886

Py_MEMCPY(p, sep, seplen);

1887

p += seplen;

1888

}

1889

}

1890

1891

Py_DECREF(seq);

1892

return res;

1893

}

1894

1895

PyObject *

1896

_PyBytes_Join(PyObject *sep, PyObject *x)

1897

{

1898

assert(sep != NULL && PyBytes_Check(sep));

1899

assert(x != NULL);

1900

return string_join((PyBytesObject *)sep, x);

1901

}

1902

1903

Py_LOCAL_INLINE(void)

1904

string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)

1905

{

1906

if (*end > len)

1907

*end = len;

1908

else if (*end < 0)

1909

*end += len;

1910

if (*end < 0)

1911

*end = 0;

1912

if (*start < 0)

1913

*start += len;

1914

if (*start < 0)

1915

*start = 0;

1916

}

1917

1918

Py_LOCAL_INLINE(Py_ssize_t)

1919

string_find_internal(PyBytesObject *self, PyObject *args, int dir)

1920

{

1921

PyObject *subobj;

1922

const char *sub;

1923

Py_ssize_t sub_len;

1924

Py_ssize_t start=0, end=PY_SSIZE_T_MAX;

1925

PyObject *obj_start=Py_None, *obj_end=Py_None;

1926

1927

if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,

1928

&obj_start, &obj_end))

1929

return -2;

1930

/* To support None in "start" and "end" arguments, meaning

1931

the same as if they were not passed.

1932

*/

1933

if (obj_start != Py_None)

1934

if (!_PyEval_SliceIndex(obj_start, &start))

1935

return -2;

1936

if (obj_end != Py_None)

1937

if (!_PyEval_SliceIndex(obj_end, &end))

1938

return -2;

1939

1940

if (PyBytes_Check(subobj)) {

1941

sub = PyBytes_AS_STRING(subobj);

1942

sub_len = PyBytes_GET_SIZE(subobj);

1943

}

1944

#ifdef Py_USING_UNICODE

1945

else if (PyUnicode_Check(subobj))

1946

return PyUnicode_Find(

1947

(PyObject *)self, subobj, start, end, dir);

1948

#endif

1949

else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))

1950

/* XXX - the "expected a character buffer object" is pretty

1951

confusing for a non-expert. remap to something else ? */

1952

return -2;

1953

1954

if (dir > 0)

1955

return stringlib_find_slice(

1956

PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),

1957

sub, sub_len, start, end);

1958

else

1959

return stringlib_rfind_slice(

1960

PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),

1961

sub, sub_len, start, end);

1962

}

1963

1964

1965

PyDoc_STRVAR(find__doc__,

1966

"S.find(sub [,start [,end]]) -> int\n\

1967

\n\

1968

Return the lowest index in S where substring sub is found,\n\

1969

such that sub is contained within s[start:end]. Optional\n\

1970

arguments start and end are interpreted as in slice notation.\n\

1971

\n\

1972

Return -1 on failure.");

1973

1974

static PyObject *

1975

string_find(PyBytesObject *self, PyObject *args)

1976

{

1977

Py_ssize_t result = string_find_internal(self, args, +1);

1978

if (result == -2)

1979

return NULL;

1980

return PyInt_FromSsize_t(result);

1981

}

1982

1983

1984

PyDoc_STRVAR(index__doc__,

1985

"S.index(sub [,start [,end]]) -> int\n\

1986

\n\

1987

Like S.find() but raise ValueError when the substring is not found.");

1988

1989

static PyObject *

1990

string_index(PyBytesObject *self, PyObject *args)

1991

{

1992

Py_ssize_t result = string_find_internal(self, args, +1);

1993

if (result == -2)

1994

return NULL;

1995

if (result == -1) {

1996

PyErr_SetString(PyExc_ValueError,

1997

"substring not found");

1998

return NULL;

1999

}

2000

return PyInt_FromSsize_t(result);

2001

}

2002

2003

2004

PyDoc_STRVAR(rfind__doc__,

2005

"S.rfind(sub [,start [,end]]) -> int\n\

2006

\n\

2007

Return the highest index in S where substring sub is found,\n\

2008

such that sub is contained within s[start:end]. Optional\n\

2009

arguments start and end are interpreted as in slice notation.\n\

2010

\n\

2011

Return -1 on failure.");

2012

2013

static PyObject *

2014

string_rfind(PyBytesObject *self, PyObject *args)

2015

{

2016

Py_ssize_t result = string_find_internal(self, args, -1);

2017

if (result == -2)

2018

return NULL;

2019

return PyInt_FromSsize_t(result);

2020

}

2021

2022

2023

PyDoc_STRVAR(rindex__doc__,

2024

"S.rindex(sub [,start [,end]]) -> int\n\

2025

\n\

2026

Like S.rfind() but raise ValueError when the substring is not found.");

2027

2028

static PyObject *

2029

string_rindex(PyBytesObject *self, PyObject *args)

2030

{

2031

Py_ssize_t result = string_find_internal(self, args, -1);

2032

if (result == -2)

2033

return NULL;

2034

if (result == -1) {

2035

PyErr_SetString(PyExc_ValueError,

2036

"substring not found");

2037

return NULL;

2038

}

2039

return PyInt_FromSsize_t(result);

2040

}

2041

2042

2043

Py_LOCAL_INLINE(PyObject *)

2044

do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)

2045

{

2046

char *s = PyBytes_AS_STRING(self);

2047

Py_ssize_t len = PyBytes_GET_SIZE(self);

2048

char *sep = PyBytes_AS_STRING(sepobj);

2049

Py_ssize_t seplen = PyBytes_GET_SIZE(sepobj);

2050

Py_ssize_t i, j;

2051

2052

i = 0;

2053

if (striptype != RIGHTSTRIP) {

2054

while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {

2055

i++;

2056

}

2057

}

2058

2059

j = len;

2060

if (striptype != LEFTSTRIP) {

2061

do {

2062

j--;

2063

} while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));

2064

j++;

2065

}

2066

2067

if (i == 0 && j == len && PyBytes_CheckExact(self)) {

2068

Py_INCREF(self);

2069

return (PyObject*)self;

2070

}

2071

else

2072

return PyBytes_FromStringAndSize(s+i, j-i);

2073

}

2074

2075

2076

Py_LOCAL_INLINE(PyObject *)

2077

do_strip(PyBytesObject *self, int striptype)

2078

{

2079

char *s = PyBytes_AS_STRING(self);

2080

Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;

2081

2082

i = 0;

2083

if (striptype != RIGHTSTRIP) {

2084

while (i < len && isspace(Py_CHARMASK(s[i]))) {

2085

i++;

2086

}

2087

}

2088

2089

j = len;

2090

if (striptype != LEFTSTRIP) {

2091

do {

2092

j--;

2093

} while (j >= i && isspace(Py_CHARMASK(s[j])));

2094

j++;

2095

}

2096

2097

if (i == 0 && j == len && PyBytes_CheckExact(self)) {

2098

Py_INCREF(self);

2099

return (PyObject*)self;

2100

}

2101

else

2102

return PyBytes_FromStringAndSize(s+i, j-i);

2103

}

2104

2105

2106

Py_LOCAL_INLINE(PyObject *)

2107

do_argstrip(PyBytesObject *self, int striptype, PyObject *args)

2108

{

2109

PyObject *sep = NULL;

2110

2111

if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))

2112

return NULL;

2113

2114

if (sep != NULL && sep != Py_None) {

2115

if (PyBytes_Check(sep))

2116

return do_xstrip(self, striptype, sep);

2117

#ifdef Py_USING_UNICODE

2118

else if (PyUnicode_Check(sep)) {

2119

PyObject *uniself = PyUnicode_FromObject((PyObject *)self);

2120

PyObject *res;

2121

if (uniself==NULL)

2122

return NULL;

2123

res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,

2124

striptype, sep);

2125

Py_DECREF(uniself);

2126

return res;

2127

}

2128

#endif

2129

PyErr_Format(PyExc_TypeError,

2130

#ifdef Py_USING_UNICODE

2131

"%s arg must be None, str or unicode",

2132

#else

2133

"%s arg must be None or str",

2134

#endif

2135

STRIPNAME(striptype));

2136

return NULL;

2137

}

2138

2139

return do_strip(self, striptype);

2140

}

2141

2142

2143

PyDoc_STRVAR(strip__doc__,

2144

"S.strip([chars]) -> string or unicode\n\

2145

\n\

2146

Return a copy of the string S with leading and trailing\n\

2147

whitespace removed.\n\

2148

If chars is given and not None, remove characters in chars instead.\n\

2149

If chars is unicode, S will be converted to unicode before stripping");

2150

2151

static PyObject *

2152

string_strip(PyBytesObject *self, PyObject *args)

2153

{

2154

if (PyTuple_GET_SIZE(args) == 0)

2155

return do_strip(self, BOTHSTRIP); /* Common case */

2156

else

2157

return do_argstrip(self, BOTHSTRIP, args);

2158

}

2159

2160

2161

PyDoc_STRVAR(lstrip__doc__,

2162

"S.lstrip([chars]) -> string or unicode\n\

2163

\n\

2164

Return a copy of the string S with leading whitespace removed.\n\

2165

If chars is given and not None, remove characters in chars instead.\n\

2166

If chars is unicode, S will be converted to unicode before stripping");

2167

2168

static PyObject *

2169

string_lstrip(PyBytesObject *self, PyObject *args)

2170

{

2171

if (PyTuple_GET_SIZE(args) == 0)

2172

return do_strip(self, LEFTSTRIP); /* Common case */

2173

else

2174

return do_argstrip(self, LEFTSTRIP, args);

2175

}

2176

2177

2178

PyDoc_STRVAR(rstrip__doc__,

2179

"S.rstrip([chars]) -> string or unicode\n\

2180

\n\

2181

Return a copy of the string S with trailing whitespace removed.\n\

2182

If chars is given and not None, remove characters in chars instead.\n\

2183

If chars is unicode, S will be converted to unicode before stripping");

2184

2185

static PyObject *

2186

string_rstrip(PyBytesObject *self, PyObject *args)

2187

{

2188

if (PyTuple_GET_SIZE(args) == 0)

2189

return do_strip(self, RIGHTSTRIP); /* Common case */

2190

else

2191

return do_argstrip(self, RIGHTSTRIP, args);

2192

}

2193

2194

2195

PyDoc_STRVAR(lower__doc__,

2196

"S.lower() -> string\n\

2197

\n\

2198

Return a copy of the string S converted to lowercase.");

2199

2200

/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */

2201

#ifndef _tolower

2202

#define _tolower tolower

2203

#endif

2204

2205

static PyObject *

2206

string_lower(PyBytesObject *self)

2207

{

2208

char *s;

2209

Py_ssize_t i, n = PyBytes_GET_SIZE(self);

2210

PyObject *newobj;

2211

2212

newobj = PyBytes_FromStringAndSize(NULL, n);

2213

if (!newobj)

2214

return NULL;

2215

2216

s = PyBytes_AS_STRING(newobj);

2217

2218

Py_MEMCPY(s, PyBytes_AS_STRING(self), n);

2219

2220

for (i = 0; i < n; i++) {

2221

int c = Py_CHARMASK(s[i]);

2222

if (isupper(c))

2223

s[i] = _tolower(c);

2224

}

2225

2226

return newobj;

2227

}

2228

2229

PyDoc_STRVAR(upper__doc__,

2230

"S.upper() -> string\n\

2231

\n\

2232

Return a copy of the string S converted to uppercase.");

2233

2234

#ifndef _toupper

2235

#define _toupper toupper

2236

#endif

2237

2238

static PyObject *

2239

string_upper(PyBytesObject *self)

2240

{

2241

char *s;

2242

Py_ssize_t i, n = PyBytes_GET_SIZE(self);

2243

PyObject *newobj;

2244

2245

newobj = PyBytes_FromStringAndSize(NULL, n);

2246

if (!newobj)

2247

return NULL;

2248

2249

s = PyBytes_AS_STRING(newobj);

2250

2251

Py_MEMCPY(s, PyBytes_AS_STRING(self), n);

2252

2253

for (i = 0; i < n; i++) {

2254

int c = Py_CHARMASK(s[i]);

2255

if (islower(c))

2256

s[i] = _toupper(c);

2257

}

2258

2259

return newobj;

2260

}

2261

2262

PyDoc_STRVAR(title__doc__,

2263

"S.title() -> string\n\

2264

\n\

2265

Return a titlecased version of S, i.e. words start with uppercase\n\

2266

characters, all remaining cased characters have lowercase.");

2267

2268

static PyObject*

2269

string_title(PyBytesObject *self)

2270

{

2271

char *s = PyBytes_AS_STRING(self), *s_new;

2272

Py_ssize_t i, n = PyBytes_GET_SIZE(self);

2273

int previous_is_cased = 0;

2274

PyObject *newobj;

2275

2276

newobj = PyBytes_FromStringAndSize(NULL, n);

2277

if (newobj == NULL)

2278

return NULL;

2279

s_new = PyBytes_AsString(newobj);

2280

for (i = 0; i < n; i++) {

2281

int c = Py_CHARMASK(*s++);

2282

if (islower(c)) {

2283

if (!previous_is_cased)

2284

c = toupper(c);

2285

previous_is_cased = 1;

2286

} else if (isupper(c)) {

2287

if (previous_is_cased)

2288

c = tolower(c);

2289

previous_is_cased = 1;

2290

} else

2291

previous_is_cased = 0;

2292

*s_new++ = c;

2293

}

2294

return newobj;

2295

}

2296

2297

PyDoc_STRVAR(capitalize__doc__,

2298

"S.capitalize() -> string\n\

2299

\n\

2300

Return a copy of the string S with only its first character\n\

2301

capitalized.");

2302

2303

static PyObject *

2304

string_capitalize(PyBytesObject *self)

2305

{

2306

char *s = PyBytes_AS_STRING(self), *s_new;

2307

Py_ssize_t i, n = PyBytes_GET_SIZE(self);

2308

PyObject *newobj;

2309

2310

newobj = PyBytes_FromStringAndSize(NULL, n);

2311

if (newobj == NULL)

2312

return NULL;

2313

s_new = PyBytes_AsString(newobj);

2314

if (0 < n) {

2315

int c = Py_CHARMASK(*s++);

2316

if (islower(c))

2317

*s_new = toupper(c);

2318

else

2319

*s_new = c;

2320

s_new++;

2321

}

2322

for (i = 1; i < n; i++) {

2323

int c = Py_CHARMASK(*s++);

2324

if (isupper(c))

2325

*s_new = tolower(c);

2326

else

2327

*s_new = c;

2328

s_new++;

2329

}

2330

return newobj;

2331

}

2332

2333

2334

PyDoc_STRVAR(count__doc__,

2335

"S.count(sub[, start[, end]]) -> int\n\

2336

\n\

2337

Return the number of non-overlapping occurrences of substring sub in\n\

2338

string S[start:end]. Optional arguments start and end are interpreted\n\

2339

as in slice notation.");

2340

2341

static PyObject *

2342

string_count(PyBytesObject *self, PyObject *args)

2343

{

2344

PyObject *sub_obj;

2345

const char *str = PyBytes_AS_STRING(self), *sub;

2346

Py_ssize_t sub_len;

2347

Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;

2348

2349

if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,

2350

_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))

2351

return NULL;

2352

2353

if (PyBytes_Check(sub_obj)) {

2354

sub = PyBytes_AS_STRING(sub_obj);

2355

sub_len = PyBytes_GET_SIZE(sub_obj);

2356

}

2357

#ifdef Py_USING_UNICODE

2358

else if (PyUnicode_Check(sub_obj)) {

2359

Py_ssize_t count;

2360

count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);

2361

if (count == -1)

2362

return NULL;

2363

else

2364

return PyInt_FromSsize_t(count);

2365

}

2366

#endif

2367

else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))

2368

return NULL;

2369

2370

string_adjust_indices(&start, &end, PyBytes_GET_SIZE(self));

2371

2372

return PyInt_FromSsize_t(

2373

stringlib_count(str + start, end - start, sub, sub_len)

2374

);

2375

}

2376

2377

PyDoc_STRVAR(swapcase__doc__,

2378

"S.swapcase() -> string\n\

2379

\n\

2380

Return a copy of the string S with uppercase characters\n\

2381

converted to lowercase and vice versa.");

2382

2383

static PyObject *

2384

string_swapcase(PyBytesObject *self)

2385

{

2386

char *s = PyBytes_AS_STRING(self), *s_new;

2387

Py_ssize_t i, n = PyBytes_GET_SIZE(self);

2388

PyObject *newobj;

2389

2390

newobj = PyBytes_FromStringAndSize(NULL, n);

2391

if (newobj == NULL)

2392

return NULL;

2393

s_new = PyBytes_AsString(newobj);

2394

for (i = 0; i < n; i++) {

2395

int c = Py_CHARMASK(*s++);

2396

if (islower(c)) {

2397

*s_new = toupper(c);

2398

}

2399

else if (isupper(c)) {

2400

*s_new = tolower(c);

2401

}

2402

else

2403

*s_new = c;

2404

s_new++;

2405

}

2406

return newobj;

2407

}

2408

2409

2410

PyDoc_STRVAR(translate__doc__,

2411

"S.translate(table [,deletechars]) -> string\n\

2412

\n\

2413

Return a copy of the string S, where all characters occurring\n\

2414

in the optional argument deletechars are removed, and the\n\

2415

remaining characters have been mapped through the given\n\

2416

translation table, which must be a string of length 256.");

2417

2418

static PyObject *

2419

string_translate(PyBytesObject *self, PyObject *args)

2420

{

2421

register char *input, *output;

2422

const char *table;

2423

register Py_ssize_t i, c, changed = 0;

2424

PyObject *input_obj = (PyObject*)self;

2425

const char *output_start, *del_table=NULL;

2426

Py_ssize_t inlen, tablen, dellen = 0;

2427

PyObject *result;

2428

int trans_table[256];

2429

PyObject *tableobj, *delobj = NULL;

2430

2431

if (!PyArg_UnpackTuple(args, "translate", 1, 2,

2432

&tableobj, &delobj))

2433

return NULL;

2434

2435

if (PyBytes_Check(tableobj)) {

2436

table = PyBytes_AS_STRING(tableobj);

2437

tablen = PyBytes_GET_SIZE(tableobj);

2438

}

2439

else if (tableobj == Py_None) {

2440

table = NULL;

2441

tablen = 256;

2442

}

2443

#ifdef Py_USING_UNICODE

2444

else if (PyUnicode_Check(tableobj)) {

2445

/* Unicode .translate() does not support the deletechars

2446

parameter; instead a mapping to None will cause characters

2447

to be deleted. */

2448

if (delobj != NULL) {

2449

PyErr_SetString(PyExc_TypeError,

2450

"deletions are implemented differently for unicode");

2451

return NULL;

2452

}

2453

return PyUnicode_Translate((PyObject *)self, tableobj, NULL);

2454

}

2455

#endif

2456

else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))

2457

return NULL;

2458

2459

if (tablen != 256) {

2460

PyErr_SetString(PyExc_ValueError,

2461

"translation table must be 256 characters long");

2462

return NULL;

2463

}

2464

2465

if (delobj != NULL) {

2466

if (PyBytes_Check(delobj)) {

2467

del_table = PyBytes_AS_STRING(delobj);

2468

dellen = PyBytes_GET_SIZE(delobj);

2469

}

2470

#ifdef Py_USING_UNICODE

2471

else if (PyUnicode_Check(delobj)) {

2472

PyErr_SetString(PyExc_TypeError,

2473

"deletions are implemented differently for unicode");

2474

return NULL;

2475

}

2476

#endif

2477

else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))

2478

return NULL;

2479

}

2480

else {

2481

del_table = NULL;

2482

dellen = 0;

2483

}

2484

2485

inlen = PyBytes_GET_SIZE(input_obj);

2486

result = PyBytes_FromStringAndSize((char *)NULL, inlen);

2487

if (result == NULL)

2488

return NULL;

2489

output_start = output = PyBytes_AsString(result);

2490

input = PyBytes_AS_STRING(input_obj);

2491

2492

if (dellen == 0 && table != NULL) {

2493

/* If no deletions are required, use faster code */

2494

for (i = inlen; --i >= 0; ) {

2495

c = Py_CHARMASK(*input++);

2496

if (Py_CHARMASK((*output++ = table[c])) != c)

2497

changed = 1;

2498

}

2499

if (changed || !PyBytes_CheckExact(input_obj))

2500

return result;

2501

Py_DECREF(result);

2502

Py_INCREF(input_obj);

2503

return input_obj;

2504

}

2505

2506

if (table == NULL) {

2507

for (i = 0; i < 256; i++)

2508

trans_table[i] = Py_CHARMASK(i);

2509

} else {

2510

for (i = 0; i < 256; i++)

2511

trans_table[i] = Py_CHARMASK(table[i]);

2512

}

2513

2514

for (i = 0; i < dellen; i++)

2515

trans_table[(int) Py_CHARMASK(del_table[i])] = -1;

2516

2517

for (i = inlen; --i >= 0; ) {

2518

c = Py_CHARMASK(*input++);

2519

if (trans_table[c] != -1)

2520

if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)

2521

continue;

2522

changed = 1;

2523

}

2524

if (!changed && PyBytes_CheckExact(input_obj)) {

2525

Py_DECREF(result);

2526

Py_INCREF(input_obj);

2527

return input_obj;

2528

}

2529

/* Fix the size of the resulting string */

2530

if (inlen > 0)

2531

_PyBytes_Resize(&result, output - output_start);

2532

return result;

2533

}

2534

2535

2536

#define FORWARD 1

2537

#define REVERSE -1

2538

2539

/* find and count characters and substrings */

2540

2541

#define findchar(target, target_len, c) \

2542

((char *)memchr((const void *)(target), c, target_len))

2543

2544

/* String ops must return a string. */

2545

/* If the object is subclass of string, create a copy */

2546

Py_LOCAL(PyBytesObject *)

2547

return_self(PyBytesObject *self)

2548

{

2549

if (PyBytes_CheckExact(self)) {

2550

Py_INCREF(self);

2551

return self;

2552

}

2553

return (PyBytesObject *)PyBytes_FromStringAndSize(

2554

PyBytes_AS_STRING(self),

2555

PyBytes_GET_SIZE(self));

2556

}

2557

2558

Py_LOCAL_INLINE(Py_ssize_t)

2559

countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)

2560

{

2561

Py_ssize_t count=0;

2562

const char *start=target;

2563

const char *end=target+target_len;

2564

2565

while ( (start=findchar(start, end-start, c)) != NULL ) {

2566

count++;

2567

if (count >= maxcount)

2568

break;

2569

start += 1;

2570

}

2571

return count;

2572

}

2573

2574

Py_LOCAL(Py_ssize_t)

2575

findstring(const char *target, Py_ssize_t target_len,

2576

const char *pattern, Py_ssize_t pattern_len,

2577

Py_ssize_t start,

2578

Py_ssize_t end,

2579

int direction)

2580

{

2581

if (start < 0) {

2582

start += target_len;

2583

if (start < 0)

2584

start = 0;

2585

}

2586

if (end > target_len) {

2587

end = target_len;

2588

} else if (end < 0) {

2589

end += target_len;

2590

if (end < 0)

2591

end = 0;

2592

}

2593

2594

/* zero-length substrings always match at the first attempt */

2595

if (pattern_len == 0)

2596

return (direction > 0) ? start : end;

2597

2598

end -= pattern_len;

2599

2600

if (direction < 0) {

2601

for (; end >= start; end--)

2602

if (Py_STRING_MATCH(target, end, pattern, pattern_len))

2603

return end;

2604

} else {

2605

for (; start <= end; start++)

2606

if (Py_STRING_MATCH(target, start, pattern, pattern_len))

2607

return start;

2608

}

2609

return -1;

2610

}

2611

2612

Py_LOCAL_INLINE(Py_ssize_t)

2613

countstring(const char *target, Py_ssize_t target_len,

2614

const char *pattern, Py_ssize_t pattern_len,

2615

Py_ssize_t start,

2616

Py_ssize_t end,

2617

int direction, Py_ssize_t maxcount)

2618

{

2619

Py_ssize_t count=0;

2620

2621

if (start < 0) {

2622

start += target_len;

2623

if (start < 0)

2624

start = 0;

2625

}

2626

if (end > target_len) {

2627

end = target_len;

2628

} else if (end < 0) {

2629

end += target_len;

2630

if (end < 0)

2631

end = 0;

2632

}

2633

2634

/* zero-length substrings match everywhere */

2635

if (pattern_len == 0 || maxcount == 0) {

2636

if (target_len+1 < maxcount)

2637

return target_len+1;

2638

return maxcount;

2639

}

2640

2641

end -= pattern_len;

2642

if (direction < 0) {

2643

for (; (end >= start); end--)

2644

if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {

2645

count++;

2646

if (--maxcount <= 0) break;

2647

end -= pattern_len-1;

2648

}

2649

} else {

2650

for (; (start <= end); start++)

2651

if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {

2652

count++;

2653

if (--maxcount <= 0)

2654

break;

2655

start += pattern_len-1;

2656

}

2657

}

2658

return count;

2659

}

2660

2661

2662

/* Algorithms for different cases of string replacement */

2663

2664

/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */

2665

Py_LOCAL(PyBytesObject *)

2666

replace_interleave(PyBytesObject *self,

2667

const char *to_s, Py_ssize_t to_len,

2668

Py_ssize_t maxcount)

2669

{

2670

char *self_s, *result_s;

2671

Py_ssize_t self_len, result_len;

2672

Py_ssize_t count, i, product;

2673

PyBytesObject *result;

2674

2675

self_len = PyBytes_GET_SIZE(self);

2676

2677

/* 1 at the end plus 1 after every character */

2678

count = self_len+1;

2679

if (maxcount < count)

2680

count = maxcount;

2681

2682

/* Check for overflow */

2683

/* result_len = count * to_len + self_len; */

2684

product = count * to_len;

2685

if (product / to_len != count) {

2686

PyErr_SetString(PyExc_OverflowError,

2687

"replace string is too long");

2688

return NULL;

2689

}

2690

result_len = product + self_len;

2691

if (result_len < 0) {

2692

PyErr_SetString(PyExc_OverflowError,

2693

"replace string is too long");

2694

return NULL;

2695

}

2696

2697

if (! (result = (PyBytesObject *)

2698

PyBytes_FromStringAndSize(NULL, result_len)) )

2699

return NULL;

2700

2701

self_s = PyBytes_AS_STRING(self);

2702

result_s = PyBytes_AS_STRING(result);

2703

2704

/* TODO: special case single character, which doesn't need memcpy */

2705

2706

/* Lay the first one down (guaranteed this will occur) */

2707

Py_MEMCPY(result_s, to_s, to_len);

2708

result_s += to_len;

2709

count -= 1;

2710

2711

for (i=0; i<count; i++) {

2712

*result_s++ = *self_s++;

2713

Py_MEMCPY(result_s, to_s, to_len);

2714

result_s += to_len;

2715

}

2716

2717

/* Copy the rest of the original string */

2718

Py_MEMCPY(result_s, self_s, self_len-i);

2719

2720

return result;

2721

}

2722

2723

/* Special case for deleting a single character */

2724

/* len(self)>=1, len(from)==1, to="", maxcount>=1 */

2725

Py_LOCAL(PyBytesObject *)

2726

replace_delete_single_character(PyBytesObject *self,

2727

char from_c, Py_ssize_t maxcount)

2728

{

2729

char *self_s, *result_s;

2730

char *start, *next, *end;

2731

Py_ssize_t self_len, result_len;

2732

Py_ssize_t count;

2733

PyBytesObject *result;

2734

2735

self_len = PyBytes_GET_SIZE(self);

2736

self_s = PyBytes_AS_STRING(self);

2737

2738

count = countchar(self_s, self_len, from_c, maxcount);

2739

if (count == 0) {

2740

return return_self(self);

2741

}

2742

2743

result_len = self_len - count; /* from_len == 1 */

2744

assert(result_len>=0);

2745

2746

if ( (result = (PyBytesObject *)

2747

PyBytes_FromStringAndSize(NULL, result_len)) == NULL)

2748

return NULL;

2749

result_s = PyBytes_AS_STRING(result);

2750

2751

start = self_s;

2752

end = self_s + self_len;

2753

while (count-- > 0) {

2754

next = findchar(start, end-start, from_c);

2755

if (next == NULL)

2756

break;

2757

Py_MEMCPY(result_s, start, next-start);

2758

result_s += (next-start);

2759

start = next+1;

2760

}

2761

Py_MEMCPY(result_s, start, end-start);

2762

2763

return result;

2764

}

2765

2766

/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */

2767

2768

Py_LOCAL(PyBytesObject *)

2769

replace_delete_substring(PyBytesObject *self,

2770

const char *from_s, Py_ssize_t from_len,

2771

Py_ssize_t maxcount) {

2772

char *self_s, *result_s;

2773

char *start, *next, *end;

2774

Py_ssize_t self_len, result_len;

2775

Py_ssize_t count, offset;

2776

PyBytesObject *result;

2777

2778

self_len = PyBytes_GET_SIZE(self);

2779

self_s = PyBytes_AS_STRING(self);

2780

2781

count = countstring(self_s, self_len,

2782

from_s, from_len,

2783

0, self_len, 1,

2784

maxcount);

2785

2786

if (count == 0) {

2787

/* no matches */

2788

return return_self(self);

2789

}

2790

2791

result_len = self_len - (count * from_len);

2792

assert (result_len>=0);

2793

2794

if ( (result = (PyBytesObject *)

2795

PyBytes_FromStringAndSize(NULL, result_len)) == NULL )

2796

return NULL;

2797

2798

result_s = PyBytes_AS_STRING(result);

2799

2800

start = self_s;

2801

end = self_s + self_len;

2802

while (count-- > 0) {

2803

offset = findstring(start, end-start,

2804

from_s, from_len,

2805

0, end-start, FORWARD);

2806

if (offset == -1)

2807

break;

2808

next = start + offset;

2809

2810

Py_MEMCPY(result_s, start, next-start);

2811

2812

result_s += (next-start);

2813

start = next+from_len;

2814

}

2815

Py_MEMCPY(result_s, start, end-start);

2816

return result;

2817

}

2818

2819

/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */

2820

Py_LOCAL(PyBytesObject *)

2821

replace_single_character_in_place(PyBytesObject *self,

2822

char from_c, char to_c,

2823

Py_ssize_t maxcount)

2824

{

2825

char *self_s, *result_s, *start, *end, *next;

2826

Py_ssize_t self_len;

2827

PyBytesObject *result;

2828

2829

/* The result string will be the same size */

2830

self_s = PyBytes_AS_STRING(self);

2831

self_len = PyBytes_GET_SIZE(self);

2832

2833

next = findchar(self_s, self_len, from_c);

2834

2835

if (next == NULL) {

2836

/* No matches; return the original string */

2837

return return_self(self);

2838

}

2839

2840

/* Need to make a new string */

2841

result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);

2842

if (result == NULL)

2843

return NULL;

2844

result_s = PyBytes_AS_STRING(result);

2845

Py_MEMCPY(result_s, self_s, self_len);

2846

2847

/* change everything in-place, starting with this one */

2848

start = result_s + (next-self_s);

2849

*start = to_c;

2850

start++;

2851

end = result_s + self_len;

2852

2853

while (--maxcount > 0) {

2854

next = findchar(start, end-start, from_c);

2855

if (next == NULL)

2856

break;

2857

*next = to_c;

2858

start = next+1;

2859

}

2860

2861

return result;

2862

}

2863

2864

/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */

2865

Py_LOCAL(PyBytesObject *)

2866

replace_substring_in_place(PyBytesObject *self,

2867

const char *from_s, Py_ssize_t from_len,

2868

const char *to_s, Py_ssize_t to_len,

2869

Py_ssize_t maxcount)

2870

{

2871

char *result_s, *start, *end;

2872

char *self_s;

2873

Py_ssize_t self_len, offset;

2874

PyBytesObject *result;

2875

2876

/* The result string will be the same size */

2877

2878

self_s = PyBytes_AS_STRING(self);

2879

self_len = PyBytes_GET_SIZE(self);

2880

2881

offset = findstring(self_s, self_len,

2882

from_s, from_len,

2883

0, self_len, FORWARD);

2884

if (offset == -1) {

2885

/* No matches; return the original string */

2886

return return_self(self);

2887

}

2888

2889

/* Need to make a new string */

2890

result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);

2891

if (result == NULL)

2892

return NULL;

2893

result_s = PyBytes_AS_STRING(result);

2894

Py_MEMCPY(result_s, self_s, self_len);

2895

2896

/* change everything in-place, starting with this one */

2897

start = result_s + offset;

2898

Py_MEMCPY(start, to_s, from_len);

2899

start += from_len;

2900

end = result_s + self_len;

2901

2902

while ( --maxcount > 0) {

2903

offset = findstring(start, end-start,

2904

from_s, from_len,

2905

0, end-start, FORWARD);

2906

if (offset==-1)

2907

break;

2908

Py_MEMCPY(start+offset, to_s, from_len);

2909

start += offset+from_len;

2910

}

2911

2912

return result;

2913

}

2914

2915

/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */

2916

Py_LOCAL(PyBytesObject *)

2917

replace_single_character(PyBytesObject *self,

2918

char from_c,

2919

const char *to_s, Py_ssize_t to_len,

2920

Py_ssize_t maxcount)

2921

{

2922

char *self_s, *result_s;

2923

char *start, *next, *end;

2924

Py_ssize_t self_len, result_len;

2925

Py_ssize_t count, product;

2926

PyBytesObject *result;

2927

2928

self_s = PyBytes_AS_STRING(self);

2929

self_len = PyBytes_GET_SIZE(self);

2930

2931

count = countchar(self_s, self_len, from_c, maxcount);

2932

if (count == 0) {

2933

/* no matches, return unchanged */

2934

return return_self(self);

2935

}

2936

2937

/* use the difference between current and new, hence the "-1" */

2938

/* result_len = self_len + count * (to_len-1) */

2939

product = count * (to_len-1);

2940

if (product / (to_len-1) != count) {

2941

PyErr_SetString(PyExc_OverflowError, "replace string is too long");

2942

return NULL;

2943

}

2944

result_len = self_len + product;

2945

if (result_len < 0) {

2946

PyErr_SetString(PyExc_OverflowError, "replace string is too long");

2947

return NULL;

2948

}

2949

2950

if ( (result = (PyBytesObject *)

2951

PyBytes_FromStringAndSize(NULL, result_len)) == NULL)

2952

return NULL;

2953

result_s = PyBytes_AS_STRING(result);

2954

2955

start = self_s;

2956

end = self_s + self_len;

2957

while (count-- > 0) {

2958

next = findchar(start, end-start, from_c);

2959

if (next == NULL)

2960

break;

2961

2962

if (next == start) {

2963

/* replace with the 'to' */

2964

Py_MEMCPY(result_s, to_s, to_len);

2965

result_s += to_len;

2966

start += 1;

2967

} else {

2968

/* copy the unchanged old then the 'to' */

2969

Py_MEMCPY(result_s, start, next-start);

2970

result_s += (next-start);

2971

Py_MEMCPY(result_s, to_s, to_len);

2972

result_s += to_len;

2973

start = next+1;

2974

}

2975

}

2976

/* Copy the remainder of the remaining string */

2977

Py_MEMCPY(result_s, start, end-start);

2978

2979

return result;

2980

}

2981

2982

/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */

2983

Py_LOCAL(PyBytesObject *)

2984

replace_substring(PyBytesObject *self,

2985

const char *from_s, Py_ssize_t from_len,

2986

const char *to_s, Py_ssize_t to_len,

2987

Py_ssize_t maxcount) {

2988

char *self_s, *result_s;

2989

char *start, *next, *end;

2990

Py_ssize_t self_len, result_len;

2991

Py_ssize_t count, offset, product;

2992

PyBytesObject *result;

2993

2994

self_s = PyBytes_AS_STRING(self);

2995

self_len = PyBytes_GET_SIZE(self);

2996

2997

count = countstring(self_s, self_len,

2998

from_s, from_len,

2999

0, self_len, FORWARD, maxcount);

3000

if (count == 0) {

3001

/* no matches, return unchanged */

3002

return return_self(self);

3003

}

3004

3005

/* Check for overflow */

3006

/* result_len = self_len + count * (to_len-from_len) */

3007

product = count * (to_len-from_len);

3008

if (product / (to_len-from_len) != count) {

3009

PyErr_SetString(PyExc_OverflowError, "replace string is too long");

3010

return NULL;

3011

}

3012

result_len = self_len + product;

3013

if (result_len < 0) {

3014

PyErr_SetString(PyExc_OverflowError, "replace string is too long");

3015

return NULL;

3016

}

3017

3018

if ( (result = (PyBytesObject *)

3019

PyBytes_FromStringAndSize(NULL, result_len)) == NULL)

3020

return NULL;

3021

result_s = PyBytes_AS_STRING(result);

3022

3023

start = self_s;

3024

end = self_s + self_len;

3025

while (count-- > 0) {

3026

offset = findstring(start, end-start,

3027

from_s, from_len,

3028

0, end-start, FORWARD);

3029

if (offset == -1)

3030

break;

3031

next = start+offset;

3032

if (next == start) {

3033

/* replace with the 'to' */

3034

Py_MEMCPY(result_s, to_s, to_len);

3035

result_s += to_len;

3036

start += from_len;

3037

} else {

3038

/* copy the unchanged old then the 'to' */

3039

Py_MEMCPY(result_s, start, next-start);

3040

result_s += (next-start);

3041

Py_MEMCPY(result_s, to_s, to_len);

3042

result_s += to_len;

3043

start = next+from_len;

3044

}

3045

}

3046

/* Copy the remainder of the remaining string */

3047

Py_MEMCPY(result_s, start, end-start);

3048

3049

return result;

3050

}

3051

3052

3053

Py_LOCAL(PyBytesObject *)

3054

replace(PyBytesObject *self,

3055

const char *from_s, Py_ssize_t from_len,

3056

const char *to_s, Py_ssize_t to_len,

3057

Py_ssize_t maxcount)

3058

{

3059

if (maxcount < 0) {

3060

maxcount = PY_SSIZE_T_MAX;

3061

} else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {

3062

/* nothing to do; return the original string */

3063

return return_self(self);

3064

}

3065

3066

if (maxcount == 0 ||

3067

(from_len == 0 && to_len == 0)) {

3068

/* nothing to do; return the original string */

3069

return return_self(self);

3070

}

3071

3072

/* Handle zero-length special cases */

3073

3074

if (from_len == 0) {

3075

/* insert the 'to' string everywhere. */

3076

/* >>> "Python".replace("", ".") */

3077

/* '.P.y.t.h.o.n.' */

3078

return replace_interleave(self, to_s, to_len, maxcount);

3079

}

3080

3081

/* Except for "".replace("", "A") == "A" there is no way beyond this */

3082

/* point for an empty self string to generate a non-empty string */

3083

/* Special case so the remaining code always gets a non-empty string */

3084

if (PyBytes_GET_SIZE(self) == 0) {

3085

return return_self(self);

3086

}

3087

3088

if (to_len == 0) {

3089

/* delete all occurances of 'from' string */

3090

if (from_len == 1) {

3091

return replace_delete_single_character(

3092

self, from_s[0], maxcount);

3093

} else {

3094

return replace_delete_substring(self, from_s, from_len, maxcount);

3095

}

3096

}

3097

3098

/* Handle special case where both strings have the same length */

3099

3100

if (from_len == to_len) {

3101

if (from_len == 1) {

3102

return replace_single_character_in_place(

3103

self,

3104

from_s[0],

3105

to_s[0],

3106

maxcount);

3107

} else {

3108

return replace_substring_in_place(

3109

self, from_s, from_len, to_s, to_len, maxcount);

3110

}

3111

}

3112

3113

/* Otherwise use the more generic algorithms */

3114

if (from_len == 1) {

3115

return replace_single_character(self, from_s[0],

3116

to_s, to_len, maxcount);

3117

} else {

3118

/* len('from')>=2, len('to')>=1 */

3119

return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);

3120

}

3121

}

3122

3123

PyDoc_STRVAR(replace__doc__,

3124

"S.replace (old, new[, count]) -> string\n\

3125

\n\

3126

Return a copy of string S with all occurrences of substring\n\

3127

old replaced by new. If the optional argument count is\n\

3128

given, only the first count occurrences are replaced.");

3129

3130

static PyObject *

3131

string_replace(PyBytesObject *self, PyObject *args)

3132

{

3133

Py_ssize_t count = -1;

3134

PyObject *from, *to;

3135

const char *from_s, *to_s;

3136

Py_ssize_t from_len, to_len;

3137

3138

if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))

3139

return NULL;

3140

3141

if (PyBytes_Check(from)) {

3142

from_s = PyBytes_AS_STRING(from);

3143

from_len = PyBytes_GET_SIZE(from);

3144

}

3145

#ifdef Py_USING_UNICODE

3146

if (PyUnicode_Check(from))

3147

return PyUnicode_Replace((PyObject *)self,

3148

from, to, count);

3149

#endif

3150

else if (PyObject_AsCharBuffer(from, &from_s, &from_len))

3151

return NULL;

3152

3153

if (PyBytes_Check(to)) {

3154

to_s = PyBytes_AS_STRING(to);

3155

to_len = PyBytes_GET_SIZE(to);

3156

}

3157

#ifdef Py_USING_UNICODE

3158

else if (PyUnicode_Check(to))

3159

return PyUnicode_Replace((PyObject *)self,

3160

from, to, count);

3161

#endif

3162

else if (PyObject_AsCharBuffer(to, &to_s, &to_len))

3163

return NULL;

3164

3165

return (PyObject *)replace((PyBytesObject *) self,

3166

from_s, from_len,

3167

to_s, to_len, count);

3168

}

3169

3170

/** End DALKE **/

3171

3172

/* Matches the end (direction >= 0) or start (direction < 0) of self

3173

* against substr, using the start and end arguments. Returns

3174

* -1 on error, 0 if not found and 1 if found.

3175

*/

3176

Py_LOCAL(int)

3177

_string_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,

3178

Py_ssize_t end, int direction)

3179

{

3180

Py_ssize_t len = PyBytes_GET_SIZE(self);

3181

Py_ssize_t slen;

3182

const char* sub;

3183

const char* str;

3184

3185

if (PyBytes_Check(substr)) {

3186

sub = PyBytes_AS_STRING(substr);

3187

slen = PyBytes_GET_SIZE(substr);

3188

}

3189

#ifdef Py_USING_UNICODE

3190

else if (PyUnicode_Check(substr))

3191

return PyUnicode_Tailmatch((PyObject *)self,

3192

substr, start, end, direction);

3193

#endif

3194

else if (PyObject_AsCharBuffer(substr, &sub, &slen))

3195

return -1;

3196

str = PyBytes_AS_STRING(self);

3197

3198

string_adjust_indices(&start, &end, len);

3199

3200

if (direction < 0) {

3201

/* startswith */

3202

if (start+slen > len)

3203

return 0;

3204

} else {

3205

/* endswith */

3206

if (end-start < slen || start > len)

3207

return 0;

3208

3209

if (end-slen > start)

3210

start = end - slen;

3211

}

3212

if (end-start >= slen)

3213

return ! memcmp(str+start, sub, slen);

3214

return 0;

3215

}

3216

3217

3218

PyDoc_STRVAR(startswith__doc__,

3219

"S.startswith(prefix[, start[, end]]) -> bool\n\

3220

\n\

3221

Return True if S starts with the specified prefix, False otherwise.\n\

3222

With optional start, test S beginning at that position.\n\

3223

With optional end, stop comparing S at that position.\n\

3224

prefix can also be a tuple of strings to try.");

3225

3226

static PyObject *

3227

string_startswith(PyBytesObject *self, PyObject *args)

3228

{

3229

Py_ssize_t start = 0;

3230

Py_ssize_t end = PY_SSIZE_T_MAX;

3231

PyObject *subobj;

3232

int result;

3233

3234

if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,

3235

_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))

3236

return NULL;

3237

if (PyTuple_Check(subobj)) {

3238

Py_ssize_t i;

3239

for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {

3240

result = _string_tailmatch(self,

3241

PyTuple_GET_ITEM(subobj, i),

3242

start, end, -1);

3243

if (result == -1)

3244

return NULL;

3245

else if (result) {

3246

Py_RETURN_TRUE;

3247

}

3248

}

3249

Py_RETURN_FALSE;

3250

}

3251

result = _string_tailmatch(self, subobj, start, end, -1);

3252

if (result == -1)

3253

return NULL;

3254

else

3255

return PyBool_FromLong(result);

3256

}

3257

3258

3259

PyDoc_STRVAR(endswith__doc__,

3260

"S.endswith(suffix[, start[, end]]) -> bool\n\

3261

\n\

3262

Return True if S ends with the specified suffix, False otherwise.\n\

3263

With optional start, test S beginning at that position.\n\

3264

With optional end, stop comparing S at that position.\n\

3265

suffix can also be a tuple of strings to try.");

3266

3267

static PyObject *

3268

string_endswith(PyBytesObject *self, PyObject *args)

3269

{

3270

Py_ssize_t start = 0;

3271

Py_ssize_t end = PY_SSIZE_T_MAX;

3272

PyObject *subobj;

3273

int result;

3274

3275

if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,

3276

_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))

3277

return NULL;

3278

if (PyTuple_Check(subobj)) {

3279

Py_ssize_t i;

3280

for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {

3281

result = _string_tailmatch(self,

3282

PyTuple_GET_ITEM(subobj, i),

3283

start, end, +1);

3284

if (result == -1)

3285

return NULL;

3286

else if (result) {

3287

Py_RETURN_TRUE;

3288

}

3289

}

3290

Py_RETURN_FALSE;

3291

}

3292

result = _string_tailmatch(self, subobj, start, end, +1);

3293

if (result == -1)

3294

return NULL;

3295

else

3296

return PyBool_FromLong(result);

3297

}

3298

3299

3300

PyDoc_STRVAR(encode__doc__,

3301

"S.encode([encoding[,errors]]) -> object\n\

3302

\n\

3303

Encodes S using the codec registered for encoding. encoding defaults\n\

3304

to the default encoding. errors may be given to set a different error\n\

3305

handling scheme. Default is 'strict' meaning that encoding errors raise\n\

3306

a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\

3307

'xmlcharrefreplace' as well as any other name registered with\n\

3308

codecs.register_error that is able to handle UnicodeEncodeErrors.");

3309

3310

static PyObject *

3311

string_encode(PyBytesObject *self, PyObject *args)

3312

{

3313

char *encoding = NULL;

3314

char *errors = NULL;

3315

PyObject *v;

3316

3317

if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))

3318

return NULL;

3319

v = PyBytes_AsEncodedObject((PyObject *)self, encoding, errors);

3320

if (v == NULL)

3321

goto onError;

3322

if (!PyBytes_Check(v) && !PyUnicode_Check(v)) {

3323

PyErr_Format(PyExc_TypeError,

3324

"encoder did not return a string/unicode object "

3325

"(type=%.400s)",

3326

Py_TYPE(v)->tp_name);

3327

Py_DECREF(v);

3328

return NULL;

3329

}

3330

return v;

3331

3332

onError:

3333

return NULL;

3334

}

3335

3336

3337

PyDoc_STRVAR(decode__doc__,

3338

"S.decode([encoding[,errors]]) -> object\n\

3339

\n\

3340

Decodes S using the codec registered for encoding. encoding defaults\n\

3341

to the default encoding. errors may be given to set a different error\n\

3342

handling scheme. Default is 'strict' meaning that encoding errors raise\n\

3343

a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\

3344

as well as any other name registerd with codecs.register_error that is\n\

3345

able to handle UnicodeDecodeErrors.");

3346

3347

static PyObject *

3348

string_decode(PyBytesObject *self, PyObject *args)

3349

{

3350

char *encoding = NULL;

3351

char *errors = NULL;

3352

PyObject *v;

3353

3354

if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))

3355

return NULL;

3356

v = PyBytes_AsDecodedObject((PyObject *)self, encoding, errors);

3357

if (v == NULL)

3358

goto onError;

3359

if (!PyBytes_Check(v) && !PyUnicode_Check(v)) {

3360

PyErr_Format(PyExc_TypeError,

3361

"decoder did not return a string/unicode object "

3362

"(type=%.400s)",

3363

Py_TYPE(v)->tp_name);

3364

Py_DECREF(v);

3365

return NULL;

3366

}

3367

return v;

3368

3369

onError:

3370

return NULL;

3371

}

3372

3373

3374

PyDoc_STRVAR(expandtabs__doc__,

3375

"S.expandtabs([tabsize]) -> string\n\

3376

\n\

3377

Return a copy of S where all tab characters are expanded using spaces.\n\

3378

If tabsize is not given, a tab size of 8 characters is assumed.");

3379

3380

static PyObject*

3381

string_expandtabs(PyBytesObject *self, PyObject *args)

3382

{

3383

const char *e, *p, *qe;

3384

char *q;

3385

Py_ssize_t i, j, incr;

3386

PyObject *u;

3387

int tabsize = 8;

3388

3389

if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))

3390

return NULL;

3391

3392

/* First pass: determine size of output string */

3393

i = 0; /* chars up to and including most recent \n or \r */

3394

j = 0; /* chars since most recent \n or \r (use in tab calculations) */

3395

e = PyBytes_AS_STRING(self) + PyBytes_GET_SIZE(self); /* end of input */

3396

for (p = PyBytes_AS_STRING(self); p < e; p++)

3397

if (*p == '\t') {

3398

if (tabsize > 0) {

3399

incr = tabsize - (j % tabsize);

3400

if (j > PY_SSIZE_T_MAX - incr)

3401

goto overflow1;

3402

j += incr;

3403

}

3404

}

3405

else {

3406

if (j > PY_SSIZE_T_MAX - 1)

3407

goto overflow1;

3408

j++;

3409

if (*p == '\n' || *p == '\r') {

3410

if (i > PY_SSIZE_T_MAX - j)

3411

goto overflow1;

3412

i += j;

3413

j = 0;

3414

}

3415

}

3416

3417

if (i > PY_SSIZE_T_MAX - j)

3418

goto overflow1;

3419

3420

/* Second pass: create output string and fill it */

3421

u = PyBytes_FromStringAndSize(NULL, i + j);

3422

if (!u)

3423

return NULL;

3424

3425

j = 0; /* same as in first pass */

3426

q = PyBytes_AS_STRING(u); /* next output char */

3427

qe = PyBytes_AS_STRING(u) + PyBytes_GET_SIZE(u); /* end of output */

3428

3429

for (p = PyBytes_AS_STRING(self); p < e; p++)

3430

if (*p == '\t') {

3431

if (tabsize > 0) {

3432

i = tabsize - (j % tabsize);

3433

j += i;

3434

while (i--) {

3435

if (q >= qe)

3436

goto overflow2;

3437

*q++ = ' ';

3438

}

3439

}

3440

}

3441

else {

3442

if (q >= qe)

3443

goto overflow2;

3444

*q++ = *p;

3445

j++;

3446

if (*p == '\n' || *p == '\r')

3447

j = 0;

3448

}

3449

3450

return u;

3451

3452

overflow2:

3453

Py_DECREF(u);

3454

overflow1:

3455

PyErr_SetString(PyExc_OverflowError, "new string is too long");

3456

return NULL;

3457

}

3458

3459

Py_LOCAL_INLINE(PyObject *)

3460

pad(PyBytesObject *self, Py_ssize_t left, Py_ssize_t right, char fill)

3461

{

3462

PyObject *u;

3463

3464

if (left < 0)

3465

left = 0;

3466

if (right < 0)

3467

right = 0;

3468

3469

if (left == 0 && right == 0 && PyBytes_CheckExact(self)) {

3470

Py_INCREF(self);

3471

return (PyObject *)self;

3472

}

3473

3474

u = PyBytes_FromStringAndSize(NULL,

3475

left + PyBytes_GET_SIZE(self) + right);

3476

if (u) {

3477

if (left)

3478

memset(PyBytes_AS_STRING(u), fill, left);

3479

Py_MEMCPY(PyBytes_AS_STRING(u) + left,

3480

PyBytes_AS_STRING(self),

3481

PyBytes_GET_SIZE(self));

3482

if (right)

3483

memset(PyBytes_AS_STRING(u) + left + PyBytes_GET_SIZE(self),

3484

fill, right);

3485

}

3486

3487

return u;

3488

}

3489

3490

PyDoc_STRVAR(ljust__doc__,

3491

"S.ljust(width[, fillchar]) -> string\n"

3492

"\n"

3493

"Return S left justified in a string of length width. Padding is\n"

3494

"done using the specified fill character (default is a space).");

3495

3496

static PyObject *

3497

string_ljust(PyBytesObject *self, PyObject *args)

3498

{

3499

Py_ssize_t width;

3500

char fillchar = ' ';

3501

3502

if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))

3503

return NULL;

3504

3505

if (PyBytes_GET_SIZE(self) >= width && PyBytes_CheckExact(self)) {

3506

Py_INCREF(self);

3507

return (PyObject*) self;

3508

}

3509

3510

return pad(self, 0, width - PyBytes_GET_SIZE(self), fillchar);

3511

}

3512

3513

3514

PyDoc_STRVAR(rjust__doc__,

3515

"S.rjust(width[, fillchar]) -> string\n"

3516

"\n"

3517

"Return S right justified in a string of length width. Padding is\n"

3518

"done using the specified fill character (default is a space)");

3519

3520

static PyObject *

3521

string_rjust(PyBytesObject *self, PyObject *args)

3522

{

3523

Py_ssize_t width;

3524

char fillchar = ' ';

3525

3526

if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))

3527

return NULL;

3528

3529

if (PyBytes_GET_SIZE(self) >= width && PyBytes_CheckExact(self)) {

3530

Py_INCREF(self);

3531

return (PyObject*) self;

3532

}

3533

3534

return pad(self, width - PyBytes_GET_SIZE(self), 0, fillchar);

3535

}

3536

3537

3538

PyDoc_STRVAR(center__doc__,

3539

"S.center(width[, fillchar]) -> string\n"

3540

"\n"

3541

"Return S centered in a string of length width. Padding is\n"

3542

"done using the specified fill character (default is a space)");

3543

3544

static PyObject *

3545

string_center(PyBytesObject *self, PyObject *args)

3546

{

3547

Py_ssize_t marg, left;

3548

Py_ssize_t width;

3549

char fillchar = ' ';

3550

3551

if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))

3552

return NULL;

3553

3554

if (PyBytes_GET_SIZE(self) >= width && PyBytes_CheckExact(self)) {

3555

Py_INCREF(self);

3556

return (PyObject*) self;

3557

}

3558

3559

marg = width - PyBytes_GET_SIZE(self);

3560

left = marg / 2 + (marg & width & 1);

3561

3562

return pad(self, left, marg - left, fillchar);

3563

}

3564

3565

PyDoc_STRVAR(zfill__doc__,

3566

"S.zfill(width) -> string\n"

3567

"\n"

3568

"Pad a numeric string S with zeros on the left, to fill a field\n"

3569

"of the specified width. The string S is never truncated.");

3570

3571

static PyObject *

3572

string_zfill(PyBytesObject *self, PyObject *args)

3573

{

3574

Py_ssize_t fill;

3575

PyObject *s;

3576

char *p;

3577

Py_ssize_t width;

3578

3579

if (!PyArg_ParseTuple(args, "n:zfill", &width))

3580

return NULL;

3581

3582

if (PyBytes_GET_SIZE(self) >= width) {

3583

if (PyBytes_CheckExact(self)) {

3584

Py_INCREF(self);

3585

return (PyObject*) self;

3586

}

3587

else

3588

return PyBytes_FromStringAndSize(

3589

PyBytes_AS_STRING(self),

3590

PyBytes_GET_SIZE(self)

3591

);

3592

}

3593

3594

fill = width - PyBytes_GET_SIZE(self);

3595

3596

s = pad(self, fill, 0, '0');

3597

3598

if (s == NULL)

3599

return NULL;

3600

3601

p = PyBytes_AS_STRING(s);

3602

if (p[fill] == '+' || p[fill] == '-') {

3603

/* move sign to beginning of string */

3604

p[0] = p[fill];

3605

p[fill] = '0';

3606

}

3607

3608

return (PyObject*) s;

3609

}

3610

3611

PyDoc_STRVAR(isspace__doc__,

3612

"S.isspace() -> bool\n\

3613

\n\

3614

Return True if all characters in S are whitespace\n\

3615

and there is at least one character in S, False otherwise.");

3616

3617

static PyObject*

3618

string_isspace(PyBytesObject *self)

3619

{

3620

register const unsigned char *p

3621

= (unsigned char *) PyBytes_AS_STRING(self);

3622

register const unsigned char *e;

3623

3624

/* Shortcut for single character strings */

3625

if (PyBytes_GET_SIZE(self) == 1 &&

3626

isspace(*p))

3627

return PyBool_FromLong(1);

3628

3629

/* Special case for empty strings */

3630

if (PyBytes_GET_SIZE(self) == 0)

3631

return PyBool_FromLong(0);

3632

3633

e = p + PyBytes_GET_SIZE(self);

3634

for (; p < e; p++) {

3635

if (!isspace(*p))

3636

return PyBool_FromLong(0);

3637

}

3638

return PyBool_FromLong(1);

3639

}

3640

3641

3642

PyDoc_STRVAR(isalpha__doc__,

3643

"S.isalpha() -> bool\n\

3644

\n\

3645

Return True if all characters in S are alphabetic\n\

3646

and there is at least one character in S, False otherwise.");

3647

3648

static PyObject*

3649

string_isalpha(PyBytesObject *self)

3650

{

3651

register const unsigned char *p

3652

= (unsigned char *) PyBytes_AS_STRING(self);

3653

register const unsigned char *e;

3654

3655

/* Shortcut for single character strings */

3656

if (PyBytes_GET_SIZE(self) == 1 &&

3657

isalpha(*p))

3658

return PyBool_FromLong(1);

3659

3660

/* Special case for empty strings */

3661

if (PyBytes_GET_SIZE(self) == 0)

3662

return PyBool_FromLong(0);

3663

3664

e = p + PyBytes_GET_SIZE(self);

3665

for (; p < e; p++) {

3666

if (!isalpha(*p))

3667

return PyBool_FromLong(0);

3668

}

3669

return PyBool_FromLong(1);

3670

}

3671

3672

3673

PyDoc_STRVAR(isalnum__doc__,

3674

"S.isalnum() -> bool\n\

3675

\n\

3676

Return True if all characters in S are alphanumeric\n\

3677

and there is at least one character in S, False otherwise.");

3678

3679

static PyObject*

3680

string_isalnum(PyBytesObject *self)

3681

{

3682

register const unsigned char *p

3683

= (unsigned char *) PyBytes_AS_STRING(self);

3684

register const unsigned char *e;

3685

3686

/* Shortcut for single character strings */

3687

if (PyBytes_GET_SIZE(self) == 1 &&

3688

isalnum(*p))

3689

return PyBool_FromLong(1);

3690

3691

/* Special case for empty strings */

3692

if (PyBytes_GET_SIZE(self) == 0)

3693

return PyBool_FromLong(0);

3694

3695

e = p + PyBytes_GET_SIZE(self);

3696

for (; p < e; p++) {

3697

if (!isalnum(*p))

3698

return PyBool_FromLong(0);

3699

}

3700

return PyBool_FromLong(1);

3701

}

3702

3703

3704

PyDoc_STRVAR(isdigit__doc__,

3705

"S.isdigit() -> bool\n\

3706

\n\

3707

Return True if all characters in S are digits\n\

3708

and there is at least one character in S, False otherwise.");

3709

3710

static PyObject*

3711

string_isdigit(PyBytesObject *self)

3712

{

3713

register const unsigned char *p

3714

= (unsigned char *) PyBytes_AS_STRING(self);

3715

register const unsigned char *e;

3716

3717

/* Shortcut for single character strings */

3718

if (PyBytes_GET_SIZE(self) == 1 &&

3719

isdigit(*p))

3720

return PyBool_FromLong(1);

3721

3722

/* Special case for empty strings */

3723

if (PyBytes_GET_SIZE(self) == 0)

3724

return PyBool_FromLong(0);

3725

3726

e = p + PyBytes_GET_SIZE(self);

3727

for (; p < e; p++) {

3728

if (!isdigit(*p))

3729

return PyBool_FromLong(0);

3730

}

3731

return PyBool_FromLong(1);

3732

}

3733

3734

3735

PyDoc_STRVAR(islower__doc__,

3736

"S.islower() -> bool\n\

3737

\n\

3738

Return True if all cased characters in S are lowercase and there is\n\

3739

at least one cased character in S, False otherwise.");

3740

3741

static PyObject*

3742

string_islower(PyBytesObject *self)

3743

{

3744

register const unsigned char *p

3745

= (unsigned char *) PyBytes_AS_STRING(self);

3746

register const unsigned char *e;

3747

int cased;

3748

3749

/* Shortcut for single character strings */

3750

if (PyBytes_GET_SIZE(self) == 1)

3751

return PyBool_FromLong(islower(*p) != 0);

3752

3753

/* Special case for empty strings */

3754

if (PyBytes_GET_SIZE(self) == 0)

3755

return PyBool_FromLong(0);

3756

3757

e = p + PyBytes_GET_SIZE(self);

3758

cased = 0;

3759

for (; p < e; p++) {

3760

if (isupper(*p))

3761

return PyBool_FromLong(0);

3762

else if (!cased && islower(*p))

3763

cased = 1;

3764

}

3765

return PyBool_FromLong(cased);

3766

}

3767

3768

3769

PyDoc_STRVAR(isupper__doc__,

3770

"S.isupper() -> bool\n\

3771

\n\

3772

Return True if all cased characters in S are uppercase and there is\n\

3773

at least one cased character in S, False otherwise.");

3774

3775

static PyObject*

3776

string_isupper(PyBytesObject *self)

3777

{

3778

register const unsigned char *p

3779

= (unsigned char *) PyBytes_AS_STRING(self);

3780

register const unsigned char *e;

3781

int cased;

3782

3783

/* Shortcut for single character strings */

3784

if (PyBytes_GET_SIZE(self) == 1)

3785

return PyBool_FromLong(isupper(*p) != 0);

3786

3787

/* Special case for empty strings */

3788

if (PyBytes_GET_SIZE(self) == 0)

3789

return PyBool_FromLong(0);

3790

3791

e = p + PyBytes_GET_SIZE(self);

3792

cased = 0;

3793

for (; p < e; p++) {

3794

if (islower(*p))

3795

return PyBool_FromLong(0);

3796

else if (!cased && isupper(*p))

3797

cased = 1;

3798

}

3799

return PyBool_FromLong(cased);

3800

}

3801

3802

3803

PyDoc_STRVAR(istitle__doc__,

3804

"S.istitle() -> bool\n\

3805

\n\

3806

Return True if S is a titlecased string and there is at least one\n\

3807

character in S, i.e. uppercase characters may only follow uncased\n\

3808

characters and lowercase characters only cased ones. Return False\n\

3809

otherwise.");

3810

3811

static PyObject*

3812

string_istitle(PyBytesObject *self, PyObject *uncased)

3813

{

3814

register const unsigned char *p

3815

= (unsigned char *) PyBytes_AS_STRING(self);

3816

register const unsigned char *e;

3817

int cased, previous_is_cased;

3818

3819

/* Shortcut for single character strings */

3820

if (PyBytes_GET_SIZE(self) == 1)

3821

return PyBool_FromLong(isupper(*p) != 0);

3822

3823

/* Special case for empty strings */

3824

if (PyBytes_GET_SIZE(self) == 0)

3825

return PyBool_FromLong(0);

3826

3827

e = p + PyBytes_GET_SIZE(self);

3828

cased = 0;

3829

previous_is_cased = 0;

3830

for (; p < e; p++) {

3831

register const unsigned char ch = *p;

3832

3833

if (isupper(ch)) {

3834

if (previous_is_cased)

3835

return PyBool_FromLong(0);

3836

previous_is_cased = 1;

3837

cased = 1;

3838

}

3839

else if (islower(ch)) {

3840

if (!previous_is_cased)

3841

return PyBool_FromLong(0);

3842

previous_is_cased = 1;

3843

cased = 1;

3844

}

3845

else

3846

previous_is_cased = 0;

3847

}

3848

return PyBool_FromLong(cased);

3849

}

3850

3851

3852

PyDoc_STRVAR(splitlines__doc__,

3853

"S.splitlines([keepends]) -> list of strings\n\

3854

\n\

3855

Return a list of the lines in S, breaking at line boundaries.\n\

3856

Line breaks are not included in the resulting list unless keepends\n\

3857

is given and true.");

3858

3859

static PyObject*

3860

string_splitlines(PyBytesObject *self, PyObject *args)

3861

{

3862

register Py_ssize_t i;

3863

register Py_ssize_t j;

3864

Py_ssize_t len;

3865

int keepends = 0;

3866

PyObject *list;

3867

PyObject *str;

3868

char *data;

3869

3870

if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))

3871

return NULL;

3872

3873

data = PyBytes_AS_STRING(self);

3874

len = PyBytes_GET_SIZE(self);

3875

3876

/* This does not use the preallocated list because splitlines is

3877

usually run with hundreds of newlines. The overhead of

3878

switching between PyList_SET_ITEM and append causes about a

3879

2-3% slowdown for that common case. A smarter implementation

3880

could move the if check out, so the SET_ITEMs are done first

3881

and the appends only done when the prealloc buffer is full.

3882

That's too much work for little gain.*/

3883

3884

list = PyList_New(0);

3885

if (!list)

3886

goto onError;

3887

3888

for (i = j = 0; i < len; ) {

3889

Py_ssize_t eol;

3890

3891

/* Find a line and append it */

3892

while (i < len && data[i] != '\n' && data[i] != '\r')

3893

i++;

3894

3895

/* Skip the line break reading CRLF as one line break */

3896

eol = i;

3897

if (i < len) {

3898

if (data[i] == '\r' && i + 1 < len &&

3899

data[i+1] == '\n')

3900

i += 2;

3901

else

3902

i++;

3903

if (keepends)

3904

eol = i;

3905

}

3906

SPLIT_APPEND(data, j, eol);

3907

j = i;

3908

}

3909

if (j < len) {

3910

SPLIT_APPEND(data, j, len);

3911

}

3912

3913

return list;

3914

3915

onError:

3916

Py_XDECREF(list);

3917

return NULL;

3918

}

3919

3920

PyDoc_STRVAR(sizeof__doc__,

3921

"S.__sizeof__() -> size of S in memory, in bytes");

3922

3923

static PyObject *

3924

string_sizeof(PyBytesObject *v)

3925

{

3926

Py_ssize_t res;

3927

res = sizeof(PyBytesObject) + v->ob_size * v->ob_type->tp_itemsize;

3928

return PyInt_FromSsize_t(res);

3929

}

3930

3931

#undef SPLIT_APPEND

3932

#undef SPLIT_ADD

3933

#undef MAX_PREALLOC

3934

#undef PREALLOC_SIZE

3935

3936

static PyObject *

3937

string_getnewargs(PyBytesObject *v)

3938

{

3939

return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));

3940

}

3941

3942

3943

#include "stringlib/string_format.h"

3944

3945

PyDoc_STRVAR(format__doc__,

3946

"S.format(*args, **kwargs) -> unicode\n\

3947

\n\

3948

");

3949

3950

static PyObject *

3951

string__format__(PyObject* self, PyObject* args)

3952

{

3953

PyObject *format_spec;

3954

PyObject *result = NULL;

3955

PyObject *tmp = NULL;

3956

3957

/* If 2.x, convert format_spec to the same type as value */

3958

/* This is to allow things like u''.format('') */

3959

if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))

3960

goto done;

3961

if (!(PyBytes_Check(format_spec) || PyUnicode_Check(format_spec))) {

3962

PyErr_Format(PyExc_TypeError, "__format__ arg must be str "

3963

"or unicode, not %s", Py_TYPE(format_spec)->tp_name);

3964

goto done;

3965

}

3966

tmp = PyObject_Str(format_spec);

3967

if (tmp == NULL)

3968

goto done;

3969

format_spec = tmp;

3970

3971

result = _PyBytes_FormatAdvanced(self,

3972

PyBytes_AS_STRING(format_spec),

3973

PyBytes_GET_SIZE(format_spec));

3974

done:

3975

Py_XDECREF(tmp);

3976

return result;

3977

}

3978

3979

PyDoc_STRVAR(p_format__doc__,

3980

"S.__format__(format_spec) -> unicode\n\

3981

\n\

3982

");

3983

3984

3985

static PyMethodDef

3986

string_methods[] = {

3987

/* Counterparts of the obsolete stropmodule functions; except

3988

string.maketrans(). */

3989

{"join", (PyCFunction)string_join, METH_O, join__doc__},

3990

{"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},

3991

{"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},

3992

{"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},

3993

{"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},

3994

{"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},

3995

{"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},

3996

{"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},

3997

{"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},

3998

{"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},

3999

{"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},

4000

{"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},

4001

{"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,

4002

capitalize__doc__},

4003

{"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},

4004

{"endswith", (PyCFunction)string_endswith, METH_VARARGS,

4005

endswith__doc__},

4006

{"partition", (PyCFunction)string_partition, METH_O, partition__doc__},

4007

{"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},

4008

{"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},

4009

{"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},

4010

{"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},

4011

{"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},

4012

{"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},

4013

{"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},

4014

{"rpartition", (PyCFunction)string_rpartition, METH_O,

4015

rpartition__doc__},

4016

{"startswith", (PyCFunction)string_startswith, METH_VARARGS,

4017

startswith__doc__},

4018

{"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},

4019

{"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,

4020

swapcase__doc__},

4021

{"translate", (PyCFunction)string_translate, METH_VARARGS,

4022

translate__doc__},

4023

{"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},

4024

{"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},

4025

{"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},

4026

{"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},

4027

{"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},

4028

{"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},

4029

{"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},

4030

{"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},

4031

{"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},

4032

{"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},

4033

{"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},

4034

{"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,

4035

expandtabs__doc__},

4036

{"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,

4037

splitlines__doc__},

4038

{"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,

4039

sizeof__doc__},

4040

{"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},

4041

{NULL, NULL} /* sentinel */

4042

};

4043

4044

static PyObject *

4045

str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);

4046

4047

static PyObject *

4048

string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)

4049

{

4050

PyObject *x = NULL;

4051

static char *kwlist[] = {"object", 0};

4052

4053

if (type != &PyBytes_Type)

4054

return str_subtype_new(type, args, kwds);

4055

if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))

4056

return NULL;

4057

if (x == NULL)

4058

return PyBytes_FromString("");

4059

return PyObject_Str(x);

4060

}

4061

4062

static PyObject *

4063

str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)

4064

{

4065

PyObject *tmp, *pnew;

4066

Py_ssize_t n;

4067

4068

assert(PyType_IsSubtype(type, &PyBytes_Type));

4069

tmp = string_new(&PyBytes_Type, args, kwds);

4070

if (tmp == NULL)

4071

return NULL;

4072

assert(PyBytes_CheckExact(tmp));

4073

n = PyBytes_GET_SIZE(tmp);

4074

pnew = type->tp_alloc(type, n);

4075

if (pnew != NULL) {

4076

Py_MEMCPY(PyBytes_AS_STRING(pnew), PyBytes_AS_STRING(tmp), n+1);

4077

((PyBytesObject *)pnew)->ob_shash =

4078

((PyBytesObject *)tmp)->ob_shash;

4079

((PyBytesObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;

4080

}

4081

Py_DECREF(tmp);

4082

return pnew;

4083

}

4084

4085

static PyObject *

4086

basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)

4087

{

4088

PyErr_SetString(PyExc_TypeError,

4089

"The basestring type cannot be instantiated");

4090

return NULL;

4091

}

4092

4093

static PyObject *

4094

string_mod(PyObject *v, PyObject *w)

4095

{

4096

if (!PyBytes_Check(v)) {

4097

Py_INCREF(Py_NotImplemented);

4098

return Py_NotImplemented;

4099

}

4100

return PyBytes_Format(v, w);

4101

}

4102

4103

PyDoc_STRVAR(basestring_doc,

4104

"Type basestring cannot be instantiated; it is the base for str and unicode.");

4105

4106

static PyNumberMethods string_as_number = {

4107

0, /*nb_add*/

4108

0, /*nb_subtract*/

4109

0, /*nb_multiply*/

4110

0, /*nb_divide*/

4111

string_mod, /*nb_remainder*/

4112

};

4113

4114

4115

PyTypeObject PyBaseString_Type = {

4116

PyVarObject_HEAD_INIT(&PyType_Type, 0)

4117

"basestring",

4118

0,

4119

0,

4120

0, /* tp_dealloc */

4121

0, /* tp_print */

4122

0, /* tp_getattr */

4123

0, /* tp_setattr */

4124

0, /* tp_compare */

4125

0, /* tp_repr */

4126

0, /* tp_as_number */

4127

0, /* tp_as_sequence */

4128

0, /* tp_as_mapping */

4129

0, /* tp_hash */

4130

0, /* tp_call */

4131

0, /* tp_str */

4132

0, /* tp_getattro */

4133

0, /* tp_setattro */

4134

0, /* tp_as_buffer */

4135

Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */

4136

basestring_doc, /* tp_doc */

4137

0, /* tp_traverse */

4138

0, /* tp_clear */

4139

0, /* tp_richcompare */

4140

0, /* tp_weaklistoffset */

4141

0, /* tp_iter */

4142

0, /* tp_iternext */

4143

0, /* tp_methods */

4144

0, /* tp_members */

4145

0, /* tp_getset */

4146

&PyBaseObject_Type, /* tp_base */

4147

0, /* tp_dict */

4148

0, /* tp_descr_get */

4149

0, /* tp_descr_set */

4150

0, /* tp_dictoffset */

4151

0, /* tp_init */

4152

0, /* tp_alloc */

4153

basestring_new, /* tp_new */

4154

0, /* tp_free */

4155

};

4156

4157

PyDoc_STRVAR(string_doc,

4158

"str(object) -> string\n\

4159

\n\

4160

Return a nice string representation of the object.\n\

4161

If the argument is a string, the return value is the same object.");

4162

4163

PyTypeObject PyBytes_Type = {

4164

PyVarObject_HEAD_INIT(&PyType_Type, 0)

4165

"str",

4166

sizeof(PyBytesObject),

4167

sizeof(char),

4168

string_dealloc, /* tp_dealloc */

4169

(printfunc)string_print, /* tp_print */

4170

0, /* tp_getattr */

4171

0, /* tp_setattr */

4172

0, /* tp_compare */

4173

string_repr, /* tp_repr */

4174

&string_as_number, /* tp_as_number */

4175

&string_as_sequence, /* tp_as_sequence */

4176

&string_as_mapping, /* tp_as_mapping */

4177

(hashfunc)string_hash, /* tp_hash */

4178

0, /* tp_call */

4179

string_str, /* tp_str */

4180

PyObject_GenericGetAttr, /* tp_getattro */

4181

0, /* tp_setattro */

4182

&string_as_buffer, /* tp_as_buffer */

4183

Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |

4184

Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |

4185

Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */

4186

string_doc, /* tp_doc */

4187

0, /* tp_traverse */

4188

0, /* tp_clear */

4189

(richcmpfunc)string_richcompare, /* tp_richcompare */

4190

0, /* tp_weaklistoffset */

4191

0, /* tp_iter */

4192

0, /* tp_iternext */

4193

string_methods, /* tp_methods */

4194

0, /* tp_members */

4195

0, /* tp_getset */

4196

&PyBaseString_Type, /* tp_base */

4197

0, /* tp_dict */

4198

0, /* tp_descr_get */

4199

0, /* tp_descr_set */

4200

0, /* tp_dictoffset */

4201

0, /* tp_init */

4202

0, /* tp_alloc */

4203

string_new, /* tp_new */

4204

PyObject_Del, /* tp_free */

4205

};

4206

4207

void

4208

PyBytes_Concat(register PyObject **pv, register PyObject *w)

4209

{

4210

register PyObject *v;

4211

if (*pv == NULL)

4212

return;

4213

if (w == NULL || !PyBytes_Check(*pv)) {

4214

Py_DECREF(*pv);

4215

*pv = NULL;

4216

return;

4217

}

4218

v = string_concat((PyBytesObject *) *pv, w);

4219

Py_DECREF(*pv);

4220

*pv = v;

4221

}

4222

4223

void

4224

PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)

4225

{

4226

PyBytes_Concat(pv, w);

4227

Py_XDECREF(w);

4228

}

4229

4230

4231

/* The following function breaks the notion that strings are immutable:

4232

it changes the size of a string. We get away with this only if there

4233

is only one module referencing the object. You can also think of it

4234

as creating a new string object and destroying the old one, only

4235

more efficiently. In any case, don't use this if the string may

4236

already be known to some other part of the code...

4237

Note that if there's not enough memory to resize the string, the original

4238

string object at *pv is deallocated, *pv is set to NULL, an "out of

4239

memory" exception is set, and -1 is returned. Else (on success) 0 is

4240

returned, and the value in *pv may or may not be the same as on input.

4241

As always, an extra byte is allocated for a trailing \0 byte (newsize

4242

does *not* include that), and a trailing \0 byte is stored.

4243

*/

4244

4245

int

4246

_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)

4247

{

4248

register PyObject *v;

4249

register PyBytesObject *sv;

4250

v = *pv;

4251

if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||

4252

PyBytes_CHECK_INTERNED(v)) {

4253

*pv = 0;

4254

Py_DECREF(v);

4255

PyErr_BadInternalCall();

4256

return -1;

4257

}

4258

/* XXX UNREF/NEWREF interface should be more symmetrical */

4259

_Py_DEC_REFTOTAL;

4260

_Py_ForgetReference(v);

4261

*pv = (PyObject *)

4262

PyObject_REALLOC((char *)v, sizeof(PyBytesObject) + newsize);

4263

if (*pv == NULL) {

4264

PyObject_Del(v);

4265

PyErr_NoMemory();

4266

return -1;

4267

}

4268

_Py_NewReference(*pv);

4269

sv = (PyBytesObject *) *pv;

4270

Py_SIZE(sv) = newsize;

4271

sv->ob_sval[newsize] = '\0';

4272

sv->ob_shash = -1; /* invalidate cached hash value */

4273

return 0;

4274

}

4275

4276

/* Helpers for formatstring */

4277

4278

Py_LOCAL_INLINE(PyObject *)

4279

getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)

4280

{

4281

Py_ssize_t argidx = *p_argidx;

4282

if (argidx < arglen) {

4283

(*p_argidx)++;

4284

if (arglen < 0)

4285

return args;

4286

else

4287

return PyTuple_GetItem(args, argidx);

4288

}

4289

PyErr_SetString(PyExc_TypeError,

4290

"not enough arguments for format string");

4291

return NULL;

4292

}

4293

4294

/* Format codes

4295

* F_LJUST '-'

4296

* F_SIGN '+'

4297

* F_BLANK ' '

4298

* F_ALT '#'

4299

* F_ZERO '0'

4300

*/

4301

#define F_LJUST (1<<0)

4302

#define F_SIGN (1<<1)

4303

#define F_BLANK (1<<2)

4304

#define F_ALT (1<<3)

4305

#define F_ZERO (1<<4)

4306

4307

Py_LOCAL_INLINE(int)

4308

formatfloat(char *buf, size_t buflen, int flags,

4309

int prec, int type, PyObject *v)

4310

{

4311

/* fmt = '%#.' + `prec` + `type`

4312

worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/

4313

char fmt[20];

4314

double x;

4315

x = PyFloat_AsDouble(v);

4316

if (x == -1.0 && PyErr_Occurred()) {

4317

PyErr_Format(PyExc_TypeError, "float argument required, "

4318

"not %.200s", Py_TYPE(v)->tp_name);

4319

return -1;

4320

}

4321

if (prec < 0)

4322

prec = 6;

4323

if (type == 'f' && fabs(x)/1e25 >= 1e25)

4324

type = 'g';

4325

/* Worst case length calc to ensure no buffer overrun:

4326

4327

'g' formats:

4328

fmt = %#.<prec>g

4329

buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp

4330

for any double rep.)

4331

len = 1 + prec + 1 + 2 + 5 = 9 + prec

4332

4333

'f' formats:

4334

buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)

4335

len = 1 + 50 + 1 + prec = 52 + prec

4336

4337

If prec=0 the effective precision is 1 (the leading digit is

4338

always given), therefore increase the length by one.

4339

4340

*/

4341

if (((type == 'g' || type == 'G') &&

4342

buflen <= (size_t)10 + (size_t)prec) ||

4343

(type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {

4344

PyErr_SetString(PyExc_OverflowError,

4345

"formatted float is too long (precision too large?)");

4346

return -1;

4347

}

4348

PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",

4349

(flags&F_ALT) ? "#" : "",

4350

prec, type);

4351

PyOS_ascii_formatd(buf, buflen, fmt, x);

4352

return (int)strlen(buf);

4353

}

4354

4355

/* _PyBytes_FormatLong emulates the format codes d, u, o, x and X, and

4356

* the F_ALT flag, for Python's long (unbounded) ints. It's not used for

4357

* Python's regular ints.

4358

* Return value: a new PyString*, or NULL if error.

4359

* . *pbuf is set to point into it,

4360

* *plen set to the # of chars following that.

4361

* Caller must decref it when done using pbuf.

4362

* The string starting at *pbuf is of the form

4363

* "-"? ("0x" | "0X")? digit+

4364

* "0x"/"0X" are present only for x and X conversions, with F_ALT

4365

* set in flags. The case of hex digits will be correct,

4366

* There will be at least prec digits, zero-filled on the left if

4367

* necessary to get that many.

4368

* val object to be converted

4369

* flags bitmask of format flags; only F_ALT is looked at

4370

* prec minimum number of digits; 0-fill on left if needed

4371

* type a character in [duoxX]; u acts the same as d

4372

*

4373

* CAUTION: o, x and X conversions on regular ints can never

4374

* produce a '-' sign, but can for Python's unbounded ints.

4375

*/

4376

PyObject*

4377

_PyBytes_FormatLong(PyObject *val, int flags, int prec, int type,

4378

char **pbuf, int *plen)

4379

{

4380

PyObject *result = NULL;

4381

char *buf;

4382

Py_ssize_t i;

4383

int sign; /* 1 if '-', else 0 */

4384

int len; /* number of characters */

4385

Py_ssize_t llen;

4386

int numdigits; /* len == numnondigits + numdigits */

4387

int numnondigits = 0;

4388

4389

switch (type) {

4390

case 'd':

4391

case 'u':

4392

result = Py_TYPE(val)->tp_str(val);

4393

break;

4394

case 'o':

4395

result = Py_TYPE(val)->tp_as_number->nb_oct(val);

4396

break;

4397

case 'x':

4398

case 'X':

4399

numnondigits = 2;

4400

result = Py_TYPE(val)->tp_as_number->nb_hex(val);

4401

break;

4402

default:

4403

assert(!"'type' not in [duoxX]");

4404

}

4405

if (!result)

4406

return NULL;

4407

4408

buf = PyBytes_AsString(result);

4409

if (!buf) {

4410

Py_DECREF(result);

4411

return NULL;

4412

}

4413

4414

/* To modify the string in-place, there can only be one reference. */

4415

if (Py_REFCNT(result) != 1) {

4416

PyErr_BadInternalCall();

4417

return NULL;

4418

}

4419

llen = PyBytes_Size(result);

4420

if (llen > INT_MAX) {

4421

PyErr_SetString(PyExc_ValueError, "string too large in _PyBytes_FormatLong");

4422

return NULL;

4423

}

4424

len = (int)llen;

4425

if (buf[len-1] == 'L') {

4426

--len;

4427

buf[len] = '\0';

4428

}

4429

sign = buf[0] == '-';

4430

numnondigits += sign;

4431

numdigits = len - numnondigits;

4432

assert(numdigits > 0);

4433

4434

/* Get rid of base marker unless F_ALT */

4435

if ((flags & F_ALT) == 0) {

4436

/* Need to skip 0x, 0X or 0. */

4437

int skipped = 0;

4438

switch (type) {

4439

case 'o':

4440

assert(buf[sign] == '0');

4441

/* If 0 is only digit, leave it alone. */

4442

if (numdigits > 1) {

4443

skipped = 1;

4444

--numdigits;

4445

}

4446

break;

4447

case 'x':

4448

case 'X':

4449

assert(buf[sign] == '0');

4450

assert(buf[sign + 1] == 'x');

4451

skipped = 2;

4452

numnondigits -= 2;

4453

break;

4454

}

4455

if (skipped) {

4456

buf += skipped;

4457

len -= skipped;

4458

if (sign)

4459

buf[0] = '-';

4460

}

4461

assert(len == numnondigits + numdigits);

4462

assert(numdigits > 0);

4463

}

4464

4465

/* Fill with leading zeroes to meet minimum width. */

4466

if (prec > numdigits) {

4467

PyObject *r1 = PyBytes_FromStringAndSize(NULL,

4468

numnondigits + prec);

4469

char *b1;

4470

if (!r1) {

4471

Py_DECREF(result);

4472

return NULL;

4473

}

4474

b1 = PyBytes_AS_STRING(r1);

4475

for (i = 0; i < numnondigits; ++i)

4476

*b1++ = *buf++;

4477

for (i = 0; i < prec - numdigits; i++)

4478

*b1++ = '0';

4479

for (i = 0; i < numdigits; i++)

4480

*b1++ = *buf++;

4481

*b1 = '\0';

4482

Py_DECREF(result);

4483

result = r1;

4484

buf = PyBytes_AS_STRING(result);

4485

len = numnondigits + prec;

4486

}

4487

4488

/* Fix up case for hex conversions. */

4489

if (type == 'X') {

4490

/* Need to convert all lower case letters to upper case.

4491

and need to convert 0x to 0X (and -0x to -0X). */

4492

for (i = 0; i < len; i++)

4493

if (buf[i] >= 'a' && buf[i] <= 'x')

4494

buf[i] -= 'a'-'A';

4495

}

4496

*pbuf = buf;

4497

*plen = len;

4498

return result;

4499

}

4500

4501

Py_LOCAL_INLINE(int)

4502

formatint(char *buf, size_t buflen, int flags,

4503

int prec, int type, PyObject *v)

4504

{

4505

/* fmt = '%#.' + `prec` + 'l' + `type`

4506

worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)

4507

+ 1 + 1 = 24 */

4508

char fmt[64]; /* plenty big enough! */

4509

char *sign;

4510

long x;

4511

4512

x = PyInt_AsLong(v);

4513

if (x == -1 && PyErr_Occurred()) {

4514

PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",

4515

Py_TYPE(v)->tp_name);

4516

return -1;

4517

}

4518

if (x < 0 && type == 'u') {

4519

type = 'd';

4520

}

4521

if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))

4522

sign = "-";

4523

else

4524

sign = "";

4525

if (prec < 0)

4526

prec = 1;

4527

4528

if ((flags & F_ALT) &&

4529

(type == 'x' || type == 'X')) {

4530

/* When converting under %#x or %#X, there are a number

4531

* of issues that cause pain:

4532

* - when 0 is being converted, the C standard leaves off

4533

* the '0x' or '0X', which is inconsistent with other

4534

* %#x/%#X conversions and inconsistent with Python's

4535

* hex() function

4536

* - there are platforms that violate the standard and

4537

* convert 0 with the '0x' or '0X'

4538

* (Metrowerks, Compaq Tru64)

4539

* - there are platforms that give '0x' when converting

4540

* under %#X, but convert 0 in accordance with the

4541

* standard (OS/2 EMX)

4542

*

4543

* We can achieve the desired consistency by inserting our

4544

* own '0x' or '0X' prefix, and substituting %x/%X in place

4545

* of %#x/%#X.

4546

*

4547

* Note that this is the same approach as used in

4548

* formatint() in unicodeobject.c

4549

*/

4550

PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",

4551

sign, type, prec, type);

4552

}

4553

else {

4554

PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",

4555

sign, (flags&F_ALT) ? "#" : "",

4556

prec, type);

4557

}

4558

4559

/* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))

4560

* worst case buf = '-0x' + [0-9]*prec, where prec >= 11

4561

*/

4562

if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {

4563

PyErr_SetString(PyExc_OverflowError,

4564

"formatted integer is too long (precision too large?)");

4565

return -1;

4566

}

4567

if (sign[0])

4568

PyOS_snprintf(buf, buflen, fmt, -x);

4569

else

4570

PyOS_snprintf(buf, buflen, fmt, x);

4571

return (int)strlen(buf);

4572

}

4573

4574

Py_LOCAL_INLINE(int)

4575

formatchar(char *buf, size_t buflen, PyObject *v)

4576

{

4577

/* presume that the buffer is at least 2 characters long */

4578

if (PyBytes_Check(v)) {

4579

if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))

4580

return -1;

4581

}

4582

else {

4583

if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))

4584

return -1;

4585

}

4586

buf[1] = '\0';

4587

return 1;

4588

}

4589

4590

/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)

4591

4592

FORMATBUFLEN is the length of the buffer in which the floats, ints, &

4593

chars are formatted. XXX This is a magic number. Each formatting

4594

routine does bounds checking to ensure no overflow, but a better

4595

solution may be to malloc a buffer of appropriate size for each

4596

format. For now, the current solution is sufficient.

4597

*/

4598

#define FORMATBUFLEN (size_t)120

4599

4600

PyObject *

4601

PyBytes_Format(PyObject *format, PyObject *args)

4602

{

4603

char *fmt, *res;

4604

Py_ssize_t arglen, argidx;

4605

Py_ssize_t reslen, rescnt, fmtcnt;

4606

int args_owned = 0;

4607

PyObject *result, *orig_args;

4608

#ifdef Py_USING_UNICODE

4609

PyObject *v, *w;

4610

#endif

4611

PyObject *dict = NULL;

4612

if (format == NULL || !PyBytes_Check(format) || args == NULL) {

4613

PyErr_BadInternalCall();

4614

return NULL;

4615

}

4616

orig_args = args;

4617

fmt = PyBytes_AS_STRING(format);

4618

fmtcnt = PyBytes_GET_SIZE(format);

4619

reslen = rescnt = fmtcnt + 100;

4620

result = PyBytes_FromStringAndSize((char *)NULL, reslen);

4621

if (result == NULL)

4622

return NULL;

4623

res = PyBytes_AsString(result);

4624

if (PyTuple_Check(args)) {

4625

arglen = PyTuple_GET_SIZE(args);

4626

argidx = 0;

4627

}

4628

else {

4629

arglen = -1;

4630

argidx = -2;

4631

}

4632

if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&

4633

!PyObject_TypeCheck(args, &PyBaseString_Type))

4634

dict = args;

4635

while (--fmtcnt >= 0) {

4636

if (*fmt != '%') {

4637

if (--rescnt < 0) {

4638

rescnt = fmtcnt + 100;

4639

reslen += rescnt;

4640

if (_PyBytes_Resize(&result, reslen) < 0)

4641

return NULL;

4642

res = PyBytes_AS_STRING(result)

4643

+ reslen - rescnt;

4644

--rescnt;

4645

}

4646

*res++ = *fmt++;

4647

}

4648

else {

4649

/* Got a format specifier */

4650

int flags = 0;

4651

Py_ssize_t width = -1;

4652

int prec = -1;

4653

int c = '\0';

4654

int fill;

4655

int isnumok;

4656

PyObject *v = NULL;

4657

PyObject *temp = NULL;

4658

char *pbuf;

4659

int sign;

4660

Py_ssize_t len;

4661

char formatbuf[FORMATBUFLEN];

4662

/* For format{float,int,char}() */

4663

#ifdef Py_USING_UNICODE

4664

char *fmt_start = fmt;

4665

Py_ssize_t argidx_start = argidx;

4666

#endif

4667

4668

fmt++;

4669

if (*fmt == '(') {

4670

char *keystart;

4671

Py_ssize_t keylen;

4672

PyObject *key;

4673

int pcount = 1;

4674

4675

if (dict == NULL) {

4676

PyErr_SetString(PyExc_TypeError,

4677

"format requires a mapping");

4678

goto error;

4679

}

4680

++fmt;

4681

--fmtcnt;

4682

keystart = fmt;

4683

/* Skip over balanced parentheses */

4684

while (pcount > 0 && --fmtcnt >= 0) {

4685

if (*fmt == ')')

4686

--pcount;

4687

else if (*fmt == '(')

4688

++pcount;

4689

fmt++;

4690

}

4691

keylen = fmt - keystart - 1;

4692

if (fmtcnt < 0 || pcount > 0) {

4693

PyErr_SetString(PyExc_ValueError,

4694

"incomplete format key");

4695

goto error;

4696

}

4697

key = PyBytes_FromStringAndSize(keystart,

4698

keylen);

4699

if (key == NULL)

4700

goto error;

4701

if (args_owned) {

4702

Py_DECREF(args);

4703

args_owned = 0;

4704

}

4705

args = PyObject_GetItem(dict, key);

4706

Py_DECREF(key);

4707

if (args == NULL) {

4708

goto error;

4709

}

4710

args_owned = 1;

4711

arglen = -1;

4712

argidx = -2;

4713

}

4714

while (--fmtcnt >= 0) {

4715

switch (c = *fmt++) {

4716

case '-': flags |= F_LJUST; continue;

4717

case '+': flags |= F_SIGN; continue;

4718

case ' ': flags |= F_BLANK; continue;

4719

case '#': flags |= F_ALT; continue;

4720

case '0': flags |= F_ZERO; continue;

4721

}

4722

break;

4723

}

4724

if (c == '*') {

4725

v = getnextarg(args, arglen, &argidx);

4726

if (v == NULL)

4727

goto error;

4728

if (!PyInt_Check(v)) {

4729

PyErr_SetString(PyExc_TypeError,

4730

"* wants int");

4731

goto error;

4732

}

4733

width = PyInt_AsLong(v);

4734

if (width < 0) {

4735

flags |= F_LJUST;

4736

width = -width;

4737

}

4738

if (--fmtcnt >= 0)

4739

c = *fmt++;

4740

}

4741

else if (c >= 0 && isdigit(c)) {

4742

width = c - '0';

4743

while (--fmtcnt >= 0) {

4744

c = Py_CHARMASK(*fmt++);

4745

if (!isdigit(c))

4746

break;

4747

if ((width*10) / 10 != width) {

4748

PyErr_SetString(

4749

PyExc_ValueError,

4750

"width too big");

4751

goto error;

4752

}

4753

width = width*10 + (c - '0');

4754

}

4755

}

4756

if (c == '.') {

4757

prec = 0;

4758

if (--fmtcnt >= 0)

4759

c = *fmt++;

4760

if (c == '*') {

4761

v = getnextarg(args, arglen, &argidx);

4762

if (v == NULL)

4763

goto error;

4764

if (!PyInt_Check(v)) {

4765

PyErr_SetString(

4766

PyExc_TypeError,

4767

"* wants int");

4768

goto error;

4769

}

4770

prec = PyInt_AsLong(v);

4771

if (prec < 0)

4772

prec = 0;

4773

if (--fmtcnt >= 0)

4774

c = *fmt++;

4775

}

4776

else if (c >= 0 && isdigit(c)) {

4777

prec = c - '0';

4778

while (--fmtcnt >= 0) {

4779

c = Py_CHARMASK(*fmt++);

4780

if (!isdigit(c))

4781

break;

4782

if ((prec*10) / 10 != prec) {

4783

PyErr_SetString(

4784

PyExc_ValueError,

4785

"prec too big");

4786

goto error;

4787

}

4788

prec = prec*10 + (c - '0');

4789

}

4790

}

4791

} /* prec */

4792

if (fmtcnt >= 0) {

4793

if (c == 'h' || c == 'l' || c == 'L') {

4794

if (--fmtcnt >= 0)

4795

c = *fmt++;

4796

}

4797

}

4798

if (fmtcnt < 0) {

4799

PyErr_SetString(PyExc_ValueError,

4800

"incomplete format");

4801

goto error;

4802

}

4803

if (c != '%') {

4804

v = getnextarg(args, arglen, &argidx);

4805

if (v == NULL)

4806

goto error;

4807

}

4808

sign = 0;

4809

fill = ' ';

4810

switch (c) {

4811

case '%':

4812

pbuf = "%";

4813

len = 1;

4814

break;

4815

case 's':

4816

#ifdef Py_USING_UNICODE

4817

if (PyUnicode_Check(v)) {

4818

fmt = fmt_start;

4819

argidx = argidx_start;

4820

goto unicode;

4821

}

4822

#endif

4823

temp = _PyObject_Str(v);

4824

#ifdef Py_USING_UNICODE

4825

if (temp != NULL && PyUnicode_Check(temp)) {

4826

Py_DECREF(temp);

4827

fmt = fmt_start;

4828

argidx = argidx_start;

4829

goto unicode;

4830

}

4831

#endif

4832

/* Fall through */

4833

case 'r':

4834

if (c == 'r')

4835

temp = PyObject_Repr(v);

4836

if (temp == NULL)

4837

goto error;

4838

if (!PyBytes_Check(temp)) {

4839

PyErr_SetString(PyExc_TypeError,

4840

"%s argument has non-string str()");

4841

Py_DECREF(temp);

4842

goto error;

4843

}

4844

pbuf = PyBytes_AS_STRING(temp);

4845

len = PyBytes_GET_SIZE(temp);

4846

if (prec >= 0 && len > prec)

4847

len = prec;

4848

break;

4849

case 'i':

4850

case 'd':

4851

case 'u':

4852

case 'o':

4853

case 'x':

4854

case 'X':

4855

if (c == 'i')

4856

c = 'd';

4857

isnumok = 0;

4858

if (PyNumber_Check(v)) {

4859

PyObject *iobj=NULL;

4860

4861

if (PyInt_Check(v) || (PyLong_Check(v))) {

4862

iobj = v;

4863

Py_INCREF(iobj);

4864

}

4865

else {

4866

iobj = PyNumber_Int(v);

4867

if (iobj==NULL) iobj = PyNumber_Long(v);

4868

}

4869

if (iobj!=NULL) {

4870

if (PyInt_Check(iobj)) {

4871

isnumok = 1;

4872

pbuf = formatbuf;

4873

len = formatint(pbuf,

4874

sizeof(formatbuf),

4875

flags, prec, c, iobj);

4876

Py_DECREF(iobj);

4877

if (len < 0)

4878

goto error;

4879

sign = 1;

4880

}

4881

else if (PyLong_Check(iobj)) {

4882

int ilen;

4883

4884

isnumok = 1;

4885

temp = _PyBytes_FormatLong(iobj, flags,

4886

prec, c, &pbuf, &ilen);

4887

Py_DECREF(iobj);

4888

len = ilen;

4889

if (!temp)

4890

goto error;

4891

sign = 1;

4892

}

4893

else {

4894

Py_DECREF(iobj);

4895

}

4896

}

4897

}

4898

if (!isnumok) {

4899

PyErr_Format(PyExc_TypeError,

4900

"%%%c format: a number is required, "

4901

"not %.200s", c, Py_TYPE(v)->tp_name);

4902

goto error;

4903

}

4904

if (flags & F_ZERO)

4905

fill = '0';

4906

break;

4907

case 'e':

4908

case 'E':

4909

case 'f':

4910

case 'F':

4911

case 'g':

4912

case 'G':

4913

if (c == 'F')

4914

c = 'f';

4915

pbuf = formatbuf;

4916

len = formatfloat(pbuf, sizeof(formatbuf),

4917

flags, prec, c, v);

4918

if (len < 0)

4919

goto error;

4920

sign = 1;

4921

if (flags & F_ZERO)

4922

fill = '0';

4923

break;

4924

case 'c':

4925

#ifdef Py_USING_UNICODE

4926

if (PyUnicode_Check(v)) {

4927

fmt = fmt_start;

4928

argidx = argidx_start;

4929

goto unicode;

4930

}

4931

#endif

4932

pbuf = formatbuf;

4933

len = formatchar(pbuf, sizeof(formatbuf), v);

4934

if (len < 0)

4935

goto error;

4936

break;

4937

default:

4938

PyErr_Format(PyExc_ValueError,

4939

"unsupported format character '%c' (0x%x) "

4940

"at index %zd",

4941

c, c,

4942

(Py_ssize_t)(fmt - 1 -

4943

PyBytes_AsString(format)));

4944

goto error;

4945

}

4946

if (sign) {

4947

if (*pbuf == '-' || *pbuf == '+') {

4948

sign = *pbuf++;

4949

len--;

4950

}

4951

else if (flags & F_SIGN)

4952

sign = '+';

4953

else if (flags & F_BLANK)

4954

sign = ' ';

4955

else

4956

sign = 0;

4957

}

4958

if (width < len)

4959

width = len;

4960

if (rescnt - (sign != 0) < width) {

4961

reslen -= rescnt;

4962

rescnt = width + fmtcnt + 100;

4963

reslen += rescnt;

4964

if (reslen < 0) {

4965

Py_DECREF(result);

4966

Py_XDECREF(temp);

4967

return PyErr_NoMemory();

4968

}

4969

if (_PyBytes_Resize(&result, reslen) < 0) {

4970

Py_XDECREF(temp);

4971

return NULL;

4972

}

4973

res = PyBytes_AS_STRING(result)

4974

+ reslen - rescnt;

4975

}

4976

if (sign) {

4977

if (fill != ' ')

4978

*res++ = sign;

4979

rescnt--;

4980

if (width > len)

4981

width--;

4982

}

4983

if ((flags & F_ALT) && (c == 'x' || c == 'X')) {

4984

assert(pbuf[0] == '0');

4985

assert(pbuf[1] == c);

4986

if (fill != ' ') {

4987

*res++ = *pbuf++;

4988

*res++ = *pbuf++;

4989

}

4990

rescnt -= 2;

4991

width -= 2;

4992

if (width < 0)

4993

width = 0;

4994

len -= 2;

4995

}

4996

if (width > len && !(flags & F_LJUST)) {

4997

do {

4998

--rescnt;

4999

*res++ = fill;

5000

} while (--width > len);

5001

}

5002

if (fill == ' ') {

5003

if (sign)

5004

*res++ = sign;

5005

if ((flags & F_ALT) &&

5006

(c == 'x' || c == 'X')) {

5007

assert(pbuf[0] == '0');

5008

assert(pbuf[1] == c);

5009

*res++ = *pbuf++;

5010

*res++ = *pbuf++;

5011

}

5012

}

5013

Py_MEMCPY(res, pbuf, len);

5014

res += len;

5015

rescnt -= len;

5016

while (--width >= len) {

5017

--rescnt;

5018

*res++ = ' ';

5019

}

5020

if (dict && (argidx < arglen) && c != '%') {

5021

PyErr_SetString(PyExc_TypeError,

5022

"not all arguments converted during string formatting");

5023

Py_XDECREF(temp);

5024

goto error;

5025

}

5026

Py_XDECREF(temp);

5027

} /* '%' */

5028

} /* until end */

5029

if (argidx < arglen && !dict) {

5030

PyErr_SetString(PyExc_TypeError,

5031

"not all arguments converted during string formatting");

5032

goto error;

5033

}

5034

if (args_owned) {

5035

Py_DECREF(args);

5036

}

5037

_PyBytes_Resize(&result, reslen - rescnt);

5038

return result;

5039

5040

#ifdef Py_USING_UNICODE

5041

unicode:

5042

if (args_owned) {

5043

Py_DECREF(args);

5044

args_owned = 0;

5045

}

5046

/* Fiddle args right (remove the first argidx arguments) */

5047

if (PyTuple_Check(orig_args) && argidx > 0) {

5048

PyObject *v;

5049

Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;

5050

v = PyTuple_New(n);

5051

if (v == NULL)

5052

goto error;

5053

while (--n >= 0) {

5054

PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);

5055

Py_INCREF(w);

5056

PyTuple_SET_ITEM(v, n, w);

5057

}

5058

args = v;

5059

} else {

5060

Py_INCREF(orig_args);

5061

args = orig_args;

5062

}

5063

args_owned = 1;

5064

/* Take what we have of the result and let the Unicode formatting

5065

function format the rest of the input. */

5066

rescnt = res - PyBytes_AS_STRING(result);

5067

if (_PyBytes_Resize(&result, rescnt))

5068

goto error;

5069

fmtcnt = PyBytes_GET_SIZE(format) - \

5070

(fmt - PyBytes_AS_STRING(format));

5071

format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);

5072

if (format == NULL)

5073

goto error;

5074

v = PyUnicode_Format(format, args);

5075

Py_DECREF(format);

5076

if (v == NULL)

5077

goto error;

5078

/* Paste what we have (result) to what the Unicode formatting

5079

function returned (v) and return the result (or error) */

5080

w = PyUnicode_Concat(result, v);

5081

Py_DECREF(result);

5082

Py_DECREF(v);

5083

Py_DECREF(args);

5084

return w;

5085

#endif /* Py_USING_UNICODE */

5086

5087

error:

5088

Py_DECREF(result);

5089

if (args_owned) {

5090

Py_DECREF(args);

5091

}

5092

return NULL;

5093

}

5094

5095

void

5096

PyString_InternInPlace(PyObject **p)

5097

{

5098

register PyBytesObject *s = (PyBytesObject *)(*p);

5099

PyObject *t;

5100

if (s == NULL || !PyBytes_Check(s))

5101

Py_FatalError("PyString_InternInPlace: strings only please!");

5102

/* If it's a string subclass, we don't really know what putting

5103

it in the interned dict might do. */

5104

if (!PyBytes_CheckExact(s))

5105

return;

5106

if (PyBytes_CHECK_INTERNED(s))

5107

return;

5108

if (interned == NULL) {

5109

interned = PyDict_New();

5110

if (interned == NULL) {

5111

PyErr_Clear(); /* Don't leave an exception */

5112

return;

5113

}

5114

}

5115

t = PyDict_GetItem(interned, (PyObject *)s);

5116

if (t) {

5117

Py_INCREF(t);

5118

Py_DECREF(*p);

5119

*p = t;

5120

return;

5121

}

5122

5123

if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {

5124

PyErr_Clear();

5125

return;

5126

}

5127

/* The two references in interned are not counted by refcnt.

5128

The string deallocator will take care of this */

5129

Py_REFCNT(s) -= 2;

5130

PyBytes_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;

5131

}

5132

5133

void

5134

PyString_InternImmortal(PyObject **p)

5135

{

5136

PyString_InternInPlace(p);

5137

if (PyBytes_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {

5138

PyBytes_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;

5139

Py_INCREF(*p);

5140

}

5141

}

5142

5143

5144

PyObject *

5145

PyString_InternFromString(const char *cp)

5146

{

5147

PyObject *s = PyBytes_FromString(cp);

5148

if (s == NULL)

5149

return NULL;

5150

PyString_InternInPlace(&s);

5151

return s;

5152

}

5153

5154

void

5155

PyString_Fini(void)

5156

{

5157

int i;

5158

for (i = 0; i < UCHAR_MAX + 1; i++) {

5159

Py_XDECREF(characters[i]);

5160

characters[i] = NULL;

5161

}

5162

Py_XDECREF(nullstring);

5163

nullstring = NULL;

5164

}

5165

5166

void _Py_ReleaseInternedStrings(void)

5167

{

5168

PyObject *keys;

5169

PyBytesObject *s;

5170

Py_ssize_t i, n;

5171

Py_ssize_t immortal_size = 0, mortal_size = 0;

5172

5173

if (interned == NULL || !PyDict_Check(interned))

5174

return;

5175

keys = PyDict_Keys(interned);

5176

if (keys == NULL || !PyList_Check(keys)) {

5177

PyErr_Clear();

5178

return;

5179

}

5180

5181

/* Since _Py_ReleaseInternedStrings() is intended to help a leak

5182

detector, interned strings are not forcibly deallocated; rather, we

5183

give them their stolen references back, and then clear and DECREF

5184

the interned dict. */

5185

5186

n = PyList_GET_SIZE(keys);

5187

fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",

5188

n);

5189

for (i = 0; i < n; i++) {

5190

s = (PyBytesObject *) PyList_GET_ITEM(keys, i);

5191

switch (s->ob_sstate) {

5192

case SSTATE_NOT_INTERNED:

5193

/* XXX Shouldn't happen */

5194

break;

5195

case SSTATE_INTERNED_IMMORTAL:

5196

Py_REFCNT(s) += 1;

5197

immortal_size += Py_SIZE(s);

5198

break;

5199

case SSTATE_INTERNED_MORTAL:

5200

Py_REFCNT(s) += 2;

5201

mortal_size += Py_SIZE(s);

5202

break;

5203

default:

5204

Py_FatalError("Inconsistent interned string state.");

5205

}

5206

s->ob_sstate = SSTATE_NOT_INTERNED;

5207

}

5208

fprintf(stderr, "total size of all interned strings: "

5209

"%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "

5210

"mortal/immortal\n", mortal_size, immortal_size);

5211

Py_DECREF(keys);

5212

PyDict_Clear(interned);

5213

Py_DECREF(interned);

5214

interned = NULL;

5215

}