~pythonregexp2.7/python/issue2636-22 : revision 39036

1

/* String object implementation */

2

3

#define PY_SSIZE_T_CLEAN

4

5

#include "Python.h"

6

7

#include "formatter_string.h"

8

9

#include <ctype.h>

10

11

#ifdef COUNT_ALLOCS

12

int null_strings, one_strings;

13

#endif

14

15

static PyStringObject *characters[UCHAR_MAX + 1];

16

static PyStringObject *nullstring;

17

18

/* This dictionary holds all interned strings. Note that references to

19

strings in this dictionary are *not* counted in the string's ob_refcnt.

20

When the interned string reaches a refcnt of 0 the string deallocation

21

function will delete the reference from this dictionary.

22

23

Another way to look at this is that to say that the actual reference

24

count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)

25

*/

26

static PyObject *interned;

27

28

/*

29

For both PyString_FromString() and PyString_FromStringAndSize(), the

30

parameter `size' denotes number of characters to allocate, not counting any

31

null terminating character.

32

33

For PyString_FromString(), the parameter `str' points to a null-terminated

34

string containing exactly `size' bytes.

35

36

For PyString_FromStringAndSize(), the parameter the parameter `str' is

37

either NULL or else points to a string containing at least `size' bytes.

38

For PyString_FromStringAndSize(), the string in the `str' parameter does

39

not have to be null-terminated. (Therefore it is safe to construct a

40

substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)

41

If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'

42

bytes (setting the last byte to the null terminating character) and you can

43

fill in the data yourself. If `str' is non-NULL then the resulting

44

PyString object must be treated as immutable and you must not fill in nor

45

alter the data yourself, since the strings may be shared.

46

47

The PyObject member `op->ob_size', which denotes the number of "extra

48

items" in a variable-size object, will contain the number of bytes

49

allocated for string data, not counting the null terminating character. It

50

is therefore equal to the equal to the `size' parameter (for

51

PyString_FromStringAndSize()) or the length of the string in the `str'

52

parameter (for PyString_FromString()).

53

*/

54

PyObject *

55

PyString_FromStringAndSize(const char *str, Py_ssize_t size)

56

{

57

register PyStringObject *op;

58

if (size < 0) {

59

PyErr_SetString(PyExc_SystemError,

60

"Negative size passed to PyString_FromStringAndSize");

61

return NULL;

62

}

63

if (size == 0 && (op = nullstring) != NULL) {

64

#ifdef COUNT_ALLOCS

65

null_strings++;

66

#endif

67

Py_INCREF(op);

68

return (PyObject *)op;

69

}

70

if (size == 1 && str != NULL &&

71

(op = characters[*str & UCHAR_MAX]) != NULL)

72

{

73

#ifdef COUNT_ALLOCS

74

one_strings++;

75

#endif

76

Py_INCREF(op);

77

return (PyObject *)op;

78

}

79

80

/* Inline PyObject_NewVar */

81

op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);

82

if (op == NULL)

83

return PyErr_NoMemory();

84

PyObject_INIT_VAR(op, &PyString_Type, size);

85

op->ob_shash = -1;

86

op->ob_sstate = SSTATE_NOT_INTERNED;

87

if (str != NULL)

88

Py_MEMCPY(op->ob_sval, str, size);

89

op->ob_sval[size] = '\0';

90

/* share short strings */

91

if (size == 0) {

92

PyObject *t = (PyObject *)op;

93

PyString_InternInPlace(&t);

94

op = (PyStringObject *)t;

95

nullstring = op;

96

Py_INCREF(op);

97

} else if (size == 1 && str != NULL) {

98

PyObject *t = (PyObject *)op;

99

PyString_InternInPlace(&t);

100

op = (PyStringObject *)t;

101

characters[*str & UCHAR_MAX] = op;

102

Py_INCREF(op);

103

}

104

return (PyObject *) op;

105

}

106

107

PyObject *

108

PyString_FromString(const char *str)

109

{

110

register size_t size;

111

register PyStringObject *op;

112

113

assert(str != NULL);

114

size = strlen(str);

115

if (size > PY_SSIZE_T_MAX) {

116

PyErr_SetString(PyExc_OverflowError,

117

"string is too long for a Python string");

118

return NULL;

119

}

120

if (size == 0 && (op = nullstring) != NULL) {

121

#ifdef COUNT_ALLOCS

122

null_strings++;

123

#endif

124

Py_INCREF(op);

125

return (PyObject *)op;

126

}

127

if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {

128

#ifdef COUNT_ALLOCS

129

one_strings++;

130

#endif

131

Py_INCREF(op);

132

return (PyObject *)op;

133

}

134

135

/* Inline PyObject_NewVar */

136

op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);

137

if (op == NULL)

138

return PyErr_NoMemory();

139

PyObject_INIT_VAR(op, &PyString_Type, size);

140

op->ob_shash = -1;

141

op->ob_sstate = SSTATE_NOT_INTERNED;

142

Py_MEMCPY(op->ob_sval, str, size+1);

143

/* share short strings */

144

if (size == 0) {

145

PyObject *t = (PyObject *)op;

146

PyString_InternInPlace(&t);

147

op = (PyStringObject *)t;

148

nullstring = op;

149

Py_INCREF(op);

150

} else if (size == 1) {

151

PyObject *t = (PyObject *)op;

152

PyString_InternInPlace(&t);

153

op = (PyStringObject *)t;

154

characters[*str & UCHAR_MAX] = op;

155

Py_INCREF(op);

156

}

157

return (PyObject *) op;

158

}

159

160

PyObject *

161

PyString_FromFormatV(const char *format, va_list vargs)

162

{

163

va_list count;

164

Py_ssize_t n = 0;

165

const char* f;

166

char *s;

167

PyObject* string;

168

169

#ifdef VA_LIST_IS_ARRAY

170

Py_MEMCPY(count, vargs, sizeof(va_list));

171

#else

172

#ifdef __va_copy

173

__va_copy(count, vargs);

174

#else

175

count = vargs;

176

#endif

177

#endif

178

/* step 1: figure out how large a buffer we need */

179

for (f = format; *f; f++) {

180

if (*f == '%') {

181

const char* p = f;

182

while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))

183

;

184

185

/* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since

186

* they don't affect the amount of space we reserve.

187

*/

188

if ((*f == 'l' || *f == 'z') &&

189

(f[1] == 'd' || f[1] == 'u'))

190

++f;

191

192

switch (*f) {

193

case 'c':

194

(void)va_arg(count, int);

195

/* fall through... */

196

case '%':

197

n++;

198

break;

199

case 'd': case 'u': case 'i': case 'x':

200

(void) va_arg(count, int);

201

/* 20 bytes is enough to hold a 64-bit

202

integer. Decimal takes the most space.

203

This isn't enough for octal. */

204

n += 20;

205

break;

206

case 's':

207

s = va_arg(count, char*);

208

n += strlen(s);

209

break;

210

case 'p':

211

(void) va_arg(count, int);

212

/* maximum 64-bit pointer representation:

213

* 0xffffffffffffffff

214

* so 19 characters is enough.

215

* XXX I count 18 -- what's the extra for?

216

*/

217

n += 19;

218

break;

219

default:

220

/* if we stumble upon an unknown

221

formatting code, copy the rest of

222

the format string to the output

223

string. (we cannot just skip the

224

code, since there's no way to know

225

what's in the argument list) */

226

n += strlen(p);

227

goto expand;

228

}

229

} else

230

n++;

231

}

232

expand:

233

/* step 2: fill the buffer */

234

/* Since we've analyzed how much space we need for the worst case,

235

use sprintf directly instead of the slower PyOS_snprintf. */

236

string = PyString_FromStringAndSize(NULL, n);

237

if (!string)

238

return NULL;

239

240

s = PyString_AsString(string);

241

242

for (f = format; *f; f++) {

243

if (*f == '%') {

244

const char* p = f++;

245

Py_ssize_t i;

246

int longflag = 0;

247

int size_tflag = 0;

248

/* parse the width.precision part (we're only

249

interested in the precision value, if any) */

250

n = 0;

251

while (isdigit(Py_CHARMASK(*f)))

252

n = (n*10) + *f++ - '0';

253

if (*f == '.') {

254

f++;

255

n = 0;

256

while (isdigit(Py_CHARMASK(*f)))

257

n = (n*10) + *f++ - '0';

258

}

259

while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))

260

f++;

261

/* handle the long flag, but only for %ld and %lu.

262

others can be added when necessary. */

263

if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {

264

longflag = 1;

265

++f;

266

}

267

/* handle the size_t flag. */

268

if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {

269

size_tflag = 1;

270

++f;

271

}

272

273

switch (*f) {

274

case 'c':

275

*s++ = va_arg(vargs, int);

276

break;

277

case 'd':

278

if (longflag)

279

sprintf(s, "%ld", va_arg(vargs, long));

280

else if (size_tflag)

281

sprintf(s, "%" PY_FORMAT_SIZE_T "d",

282

va_arg(vargs, Py_ssize_t));

283

else

284

sprintf(s, "%d", va_arg(vargs, int));

285

s += strlen(s);

286

break;

287

case 'u':

288

if (longflag)

289

sprintf(s, "%lu",

290

va_arg(vargs, unsigned long));

291

else if (size_tflag)

292

sprintf(s, "%" PY_FORMAT_SIZE_T "u",

293

va_arg(vargs, size_t));

294

else

295

sprintf(s, "%u",

296

va_arg(vargs, unsigned int));

297

s += strlen(s);

298

break;

299

case 'i':

300

sprintf(s, "%i", va_arg(vargs, int));

301

s += strlen(s);

302

break;

303

case 'x':

304

sprintf(s, "%x", va_arg(vargs, int));

305

s += strlen(s);

306

break;

307

case 's':

308

p = va_arg(vargs, char*);

309

i = strlen(p);

310

if (n > 0 && i > n)

311

i = n;

312

Py_MEMCPY(s, p, i);

313

s += i;

314

break;

315

case 'p':

316

sprintf(s, "%p", va_arg(vargs, void*));

317

/* %p is ill-defined: ensure leading 0x. */

318

if (s[1] == 'X')

319

s[1] = 'x';

320

else if (s[1] != 'x') {

321

memmove(s+2, s, strlen(s)+1);

322

s[0] = '0';

323

s[1] = 'x';

324

}

325

s += strlen(s);

326

break;

327

case '%':

328

*s++ = '%';

329

break;

330

default:

331

strcpy(s, p);

332

s += strlen(s);

333

goto end;

334

}

335

} else

336

*s++ = *f;

337

}

338

339

end:

340

_PyString_Resize(&string, s - PyString_AS_STRING(string));

341

return string;

342

}

343

344

PyObject *

345

PyString_FromFormat(const char *format, ...)

346

{

347

PyObject* ret;

348

va_list vargs;

349

350

#ifdef HAVE_STDARG_PROTOTYPES

351

va_start(vargs, format);

352

#else

353

va_start(vargs);

354

#endif

355

ret = PyString_FromFormatV(format, vargs);

356

va_end(vargs);

357

return ret;

358

}

359

360

361

PyObject *PyString_Decode(const char *s,

362

Py_ssize_t size,

363

const char *encoding,

364

const char *errors)

365

{

366

PyObject *v, *str;

367

368

str = PyString_FromStringAndSize(s, size);

369

if (str == NULL)

370

return NULL;

371

v = PyString_AsDecodedString(str, encoding, errors);

372

Py_DECREF(str);

373

return v;

374

}

375

376

PyObject *PyString_AsDecodedObject(PyObject *str,

377

const char *encoding,

378

const char *errors)

379

{

380

PyObject *v;

381

382

if (!PyString_Check(str)) {

383

PyErr_BadArgument();

384

goto onError;

385

}

386

387

if (encoding == NULL) {

388

#ifdef Py_USING_UNICODE

389

encoding = PyUnicode_GetDefaultEncoding();

390

#else

391

PyErr_SetString(PyExc_ValueError, "no encoding specified");

392

goto onError;

393

#endif

394

}

395

396

/* Decode via the codec registry */

397

v = PyCodec_Decode(str, encoding, errors);

398

if (v == NULL)

399

goto onError;

400

401

return v;

402

403

onError:

404

return NULL;

405

}

406

407

PyObject *PyString_AsDecodedString(PyObject *str,

408

const char *encoding,

409

const char *errors)

410

{

411

PyObject *v;

412

413

v = PyString_AsDecodedObject(str, encoding, errors);

414

if (v == NULL)

415

goto onError;

416

417

#ifdef Py_USING_UNICODE

418

/* Convert Unicode to a string using the default encoding */

419

if (PyUnicode_Check(v)) {

420

PyObject *temp = v;

421

v = PyUnicode_AsEncodedString(v, NULL, NULL);

422

Py_DECREF(temp);

423

if (v == NULL)

424

goto onError;

425

}

426

#endif

427

if (!PyString_Check(v)) {

428

PyErr_Format(PyExc_TypeError,

429

"decoder did not return a string object (type=%.400s)",

430

Py_TYPE(v)->tp_name);

431

Py_DECREF(v);

432

goto onError;

433

}

434

435

return v;

436

437

onError:

438

return NULL;

439

}

440

441

PyObject *PyString_Encode(const char *s,

442

Py_ssize_t size,

443

const char *encoding,

444

const char *errors)

445

{

446

PyObject *v, *str;

447

448

str = PyString_FromStringAndSize(s, size);

449

if (str == NULL)

450

return NULL;

451

v = PyString_AsEncodedString(str, encoding, errors);

452

Py_DECREF(str);

453

return v;

454

}

455

456

PyObject *PyString_AsEncodedObject(PyObject *str,

457

const char *encoding,

458

const char *errors)

459

{

460

PyObject *v;

461

462

if (!PyString_Check(str)) {

463

PyErr_BadArgument();

464

goto onError;

465

}

466

467

if (encoding == NULL) {

468

#ifdef Py_USING_UNICODE

469

encoding = PyUnicode_GetDefaultEncoding();

470

#else

471

PyErr_SetString(PyExc_ValueError, "no encoding specified");

472

goto onError;

473

#endif

474

}

475

476

/* Encode via the codec registry */

477

v = PyCodec_Encode(str, encoding, errors);

478

if (v == NULL)

479

goto onError;

480

481

return v;

482

483

onError:

484

return NULL;

485

}

486

487

PyObject *PyString_AsEncodedString(PyObject *str,

488

const char *encoding,

489

const char *errors)

490

{

491

PyObject *v;

492

493

v = PyString_AsEncodedObject(str, encoding, errors);

494

if (v == NULL)

495

goto onError;

496

497

#ifdef Py_USING_UNICODE

498

/* Convert Unicode to a string using the default encoding */

499

if (PyUnicode_Check(v)) {

500

PyObject *temp = v;

501

v = PyUnicode_AsEncodedString(v, NULL, NULL);

502

Py_DECREF(temp);

503

if (v == NULL)

504

goto onError;

505

}

506

#endif

507

if (!PyString_Check(v)) {

508

PyErr_Format(PyExc_TypeError,

509

"encoder did not return a string object (type=%.400s)",

510

Py_TYPE(v)->tp_name);

511

Py_DECREF(v);

512

goto onError;

513

}

514

515

return v;

516

517

onError:

518

return NULL;

519

}

520

521

static void

522

string_dealloc(PyObject *op)

523

{

524

switch (PyString_CHECK_INTERNED(op)) {

525

case SSTATE_NOT_INTERNED:

526

break;

527

528

case SSTATE_INTERNED_MORTAL:

529

/* revive dead object temporarily for DelItem */

530

Py_REFCNT(op) = 3;

531

if (PyDict_DelItem(interned, op) != 0)

532

Py_FatalError(

533

"deletion of interned string failed");

534

break;

535

536

case SSTATE_INTERNED_IMMORTAL:

537

Py_FatalError("Immortal interned string died.");

538

539

default:

540

Py_FatalError("Inconsistent interned string state.");

541

}

542

Py_TYPE(op)->tp_free(op);

543

}

544

545

/* Unescape a backslash-escaped string. If unicode is non-zero,

546

the string is a u-literal. If recode_encoding is non-zero,

547

the string is UTF-8 encoded and should be re-encoded in the

548

specified encoding. */

549

550

PyObject *PyString_DecodeEscape(const char *s,

551

Py_ssize_t len,

552

const char *errors,

553

Py_ssize_t unicode,

554

const char *recode_encoding)

555

{

556

int c;

557

char *p, *buf;

558

const char *end;

559

PyObject *v;

560

Py_ssize_t newlen = recode_encoding ? 4*len:len;

561

v = PyString_FromStringAndSize((char *)NULL, newlen);

562

if (v == NULL)

563

return NULL;

564

p = buf = PyString_AsString(v);

565

end = s + len;

566

while (s < end) {

567

if (*s != '\\') {

568

non_esc:

569

#ifdef Py_USING_UNICODE

570

if (recode_encoding && (*s & 0x80)) {

571

PyObject *u, *w;

572

char *r;

573

const char* t;

574

Py_ssize_t rn;

575

t = s;

576

/* Decode non-ASCII bytes as UTF-8. */

577

while (t < end && (*t & 0x80)) t++;

578

u = PyUnicode_DecodeUTF8(s, t - s, errors);

579

if(!u) goto failed;

580

581

/* Recode them in target encoding. */

582

w = PyUnicode_AsEncodedString(

583

u, recode_encoding, errors);

584

Py_DECREF(u);

585

if (!w) goto failed;

586

587

/* Append bytes to output buffer. */

588

assert(PyString_Check(w));

589

r = PyString_AS_STRING(w);

590

rn = PyString_GET_SIZE(w);

591

Py_MEMCPY(p, r, rn);

592

p += rn;

593

Py_DECREF(w);

594

s = t;

595

} else {

596

*p++ = *s++;

597

}

598

#else

599

*p++ = *s++;

600

#endif

601

continue;

602

}

603

s++;

604

if (s==end) {

605

PyErr_SetString(PyExc_ValueError,

606

"Trailing \\ in string");

607

goto failed;

608

}

609

switch (*s++) {

610

/* XXX This assumes ASCII! */

611

case '\n': break;

612

case '\\': *p++ = '\\'; break;

613

case '\'': *p++ = '\''; break;

614

case '\"': *p++ = '\"'; break;

615

case 'b': *p++ = '\b'; break;

616

case 'f': *p++ = '\014'; break; /* FF */

617

case 't': *p++ = '\t'; break;

618

case 'n': *p++ = '\n'; break;

619

case 'r': *p++ = '\r'; break;

620

case 'v': *p++ = '\013'; break; /* VT */

621

case 'a': *p++ = '\007'; break; /* BEL, not classic C */

622

case '0': case '1': case '2': case '3':

623

case '4': case '5': case '6': case '7':

624

c = s[-1] - '0';

625

if (s < end && '0' <= *s && *s <= '7') {

626

c = (c<<3) + *s++ - '0';

627

if (s < end && '0' <= *s && *s <= '7')

628

c = (c<<3) + *s++ - '0';

629

}

630

*p++ = c;

631

break;

632

case 'x':

633

if (s+1 < end &&

634

isxdigit(Py_CHARMASK(s[0])) &&

635

isxdigit(Py_CHARMASK(s[1])))

636

{

637

unsigned int x = 0;

638

c = Py_CHARMASK(*s);

639

s++;

640

if (isdigit(c))

641

x = c - '0';

642

else if (islower(c))

643

x = 10 + c - 'a';

644

else

645

x = 10 + c - 'A';

646

x = x << 4;

647

c = Py_CHARMASK(*s);

648

s++;

649

if (isdigit(c))

650

x += c - '0';

651

else if (islower(c))

652

x += 10 + c - 'a';

653

else

654

x += 10 + c - 'A';

655

*p++ = x;

656

break;

657

}

658

if (!errors || strcmp(errors, "strict") == 0) {

659

PyErr_SetString(PyExc_ValueError,

660

"invalid \\x escape");

661

goto failed;

662

}

663

if (strcmp(errors, "replace") == 0) {

664

*p++ = '?';

665

} else if (strcmp(errors, "ignore") == 0)

666

/* do nothing */;

667

else {

668

PyErr_Format(PyExc_ValueError,

669

"decoding error; "

670

"unknown error handling code: %.400s",

671

errors);

672

goto failed;

673

}

674

#ifndef Py_USING_UNICODE

675

case 'u':

676

case 'U':

677

case 'N':

678

if (unicode) {

679

PyErr_SetString(PyExc_ValueError,

680

"Unicode escapes not legal "

681

"when Unicode disabled");

682

goto failed;

683

}

684

#endif

685

default:

686

*p++ = '\\';

687

s--;

688

goto non_esc; /* an arbitry number of unescaped

689

UTF-8 bytes may follow. */

690

}

691

}

692

if (p-buf < newlen)

693

_PyString_Resize(&v, p - buf);

694

return v;

695

failed:

696

Py_DECREF(v);

697

return NULL;

698

}

699

700

/* -------------------------------------------------------------------- */

701

/* object api */

702

703

static Py_ssize_t

704

string_getsize(register PyObject *op)

705

{

706

char *s;

707

Py_ssize_t len;

708

if (PyString_AsStringAndSize(op, &s, &len))

709

return -1;

710

return len;

711

}

712

713

static /*const*/ char *

714

string_getbuffer(register PyObject *op)

715

{

716

char *s;

717

Py_ssize_t len;

718

if (PyString_AsStringAndSize(op, &s, &len))

719

return NULL;

720

return s;

721

}

722

723

Py_ssize_t

724

PyString_Size(register PyObject *op)

725

{

726

if (!PyString_Check(op))

727

return string_getsize(op);

728

return Py_SIZE(op);

729

}

730

731

/*const*/ char *

732

PyString_AsString(register PyObject *op)

733

{

734

if (!PyString_Check(op))

735

return string_getbuffer(op);

736

return ((PyStringObject *)op) -> ob_sval;

737

}

738

739

int

740

PyString_AsStringAndSize(register PyObject *obj,

741

register char **s,

742

register Py_ssize_t *len)

743

{

744

if (s == NULL) {

745

PyErr_BadInternalCall();

746

return -1;

747

}

748

749

if (!PyString_Check(obj)) {

750

#ifdef Py_USING_UNICODE

751

if (PyUnicode_Check(obj)) {

752

obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);

753

if (obj == NULL)

754

return -1;

755

}

756

else

757

#endif

758

{

759

PyErr_Format(PyExc_TypeError,

760

"expected string or Unicode object, "

761

"%.200s found", Py_TYPE(obj)->tp_name);

762

return -1;

763

}

764

}

765

766

*s = PyString_AS_STRING(obj);

767

if (len != NULL)

768

*len = PyString_GET_SIZE(obj);

769

else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {

770

PyErr_SetString(PyExc_TypeError,

771

"expected string without null bytes");

772

return -1;

773

}

774

return 0;

775

}

776

777

/* -------------------------------------------------------------------- */

778

/* Methods */

779

780

#include "stringlib/stringdefs.h"

781

#include "stringlib/fastsearch.h"

782

783

#include "stringlib/count.h"

784

#include "stringlib/find.h"

785

#include "stringlib/partition.h"

786

787

#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping

788

#include "stringlib/localeutil.h"

789

790

791

792

static int

793

string_print(PyStringObject *op, FILE *fp, int flags)

794

{

795

Py_ssize_t i, str_len;

796

char c;

797

int quote;

798

799

/* XXX Ought to check for interrupts when writing long strings */

800

if (! PyString_CheckExact(op)) {

801

int ret;

802

/* A str subclass may have its own __str__ method. */

803

op = (PyStringObject *) PyObject_Str((PyObject *)op);

804

if (op == NULL)

805

return -1;

806

ret = string_print(op, fp, flags);

807

Py_DECREF(op);

808

return ret;

809

}

810

if (flags & Py_PRINT_RAW) {

811

char *data = op->ob_sval;

812

Py_ssize_t size = Py_SIZE(op);

813

Py_BEGIN_ALLOW_THREADS

814

while (size > INT_MAX) {

815

/* Very long strings cannot be written atomically.

816

* But don't write exactly INT_MAX bytes at a time

817

* to avoid memory aligment issues.

818

*/

819

const int chunk_size = INT_MAX & ~0x3FFF;

820

fwrite(data, 1, chunk_size, fp);

821

data += chunk_size;

822

size -= chunk_size;

823

}

824

#ifdef __VMS

825

if (size) fwrite(data, (int)size, 1, fp);

826

#else

827

fwrite(data, 1, (int)size, fp);

828

#endif

829

Py_END_ALLOW_THREADS

830

return 0;

831

}

832

833

/* figure out which quote to use; single is preferred */

834

quote = '\'';

835

if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&

836

!memchr(op->ob_sval, '"', Py_SIZE(op)))

837

quote = '"';

838

839

str_len = Py_SIZE(op);

840

Py_BEGIN_ALLOW_THREADS

841

fputc(quote, fp);

842

for (i = 0; i < str_len; i++) {

843

/* Since strings are immutable and the caller should have a

844

reference, accessing the interal buffer should not be an issue

845

with the GIL released. */

846

c = op->ob_sval[i];

847

if (c == quote || c == '\\')

848

fprintf(fp, "\\%c", c);

849

else if (c == '\t')

850

fprintf(fp, "\\t");

851

else if (c == '\n')

852

fprintf(fp, "\\n");

853

else if (c == '\r')

854

fprintf(fp, "\\r");

855

else if (c < ' ' || c >= 0x7f)

856

fprintf(fp, "\\x%02x", c & 0xff);

857

else

858

fputc(c, fp);

859

}

860

fputc(quote, fp);

861

Py_END_ALLOW_THREADS

862

return 0;

863

}

864

865

PyObject *

866

PyString_Repr(PyObject *obj, int smartquotes)

867

{

868

register PyStringObject* op = (PyStringObject*) obj;

869

size_t newsize = 2 + 4 * Py_SIZE(op);

870

PyObject *v;

871

if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {

872

PyErr_SetString(PyExc_OverflowError,

873

"string is too large to make repr");

874

return NULL;

875

}

876

v = PyString_FromStringAndSize((char *)NULL, newsize);

877

if (v == NULL) {

878

return NULL;

879

}

880

else {

881

register Py_ssize_t i;

882

register char c;

883

register char *p;

884

int quote;

885

886

/* figure out which quote to use; single is preferred */

887

quote = '\'';

888

if (smartquotes &&

889

memchr(op->ob_sval, '\'', Py_SIZE(op)) &&

890

!memchr(op->ob_sval, '"', Py_SIZE(op)))

891

quote = '"';

892

893

p = PyString_AS_STRING(v);

894

*p++ = quote;

895

for (i = 0; i < Py_SIZE(op); i++) {

896

/* There's at least enough room for a hex escape

897

and a closing quote. */

898

assert(newsize - (p - PyString_AS_STRING(v)) >= 5);

899

c = op->ob_sval[i];

900

if (c == quote || c == '\\')

901

*p++ = '\\', *p++ = c;

902

else if (c == '\t')

903

*p++ = '\\', *p++ = 't';

904

else if (c == '\n')

905

*p++ = '\\', *p++ = 'n';

906

else if (c == '\r')

907

*p++ = '\\', *p++ = 'r';

908

else if (c < ' ' || c >= 0x7f) {

909

/* For performance, we don't want to call

910

PyOS_snprintf here (extra layers of

911

function call). */

912

sprintf(p, "\\x%02x", c & 0xff);

913

p += 4;

914

}

915

else

916

*p++ = c;

917

}

918

assert(newsize - (p - PyString_AS_STRING(v)) >= 1);

919

*p++ = quote;

920

*p = '\0';

921

_PyString_Resize(

922

&v, (p - PyString_AS_STRING(v)));

923

return v;

924

}

925

}

926

927

static PyObject *

928

string_repr(PyObject *op)

929

{

930

return PyString_Repr(op, 1);

931

}

932

933

static PyObject *

934

string_str(PyObject *s)

935

{

936

assert(PyString_Check(s));

937

if (PyString_CheckExact(s)) {

938

Py_INCREF(s);

939

return s;

940

}

941

else {

942

/* Subtype -- return genuine string with the same value. */

943

PyStringObject *t = (PyStringObject *) s;

944

return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));

945

}

946

}

947

948

static Py_ssize_t

949

string_length(PyStringObject *a)

950

{

951

return Py_SIZE(a);

952

}

953

954

static PyObject *

955

string_concat(register PyStringObject *a, register PyObject *bb)

956

{

957

register Py_ssize_t size;

958

register PyStringObject *op;

959

if (!PyString_Check(bb)) {

960

#ifdef Py_USING_UNICODE

961

if (PyUnicode_Check(bb))

962

return PyUnicode_Concat((PyObject *)a, bb);

963

#endif

964

if (PyBytes_Check(bb))

965

return PyBytes_Concat((PyObject *)a, bb);

966

PyErr_Format(PyExc_TypeError,

967

"cannot concatenate 'str' and '%.200s' objects",

968

Py_TYPE(bb)->tp_name);

969

return NULL;

970

}

971

#define b ((PyStringObject *)bb)

972

/* Optimize cases with empty left or right operand */

973

if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&

974

PyString_CheckExact(a) && PyString_CheckExact(b)) {

975

if (Py_SIZE(a) == 0) {

976

Py_INCREF(bb);

977

return bb;

978

}

979

Py_INCREF(a);

980

return (PyObject *)a;

981

}

982

size = Py_SIZE(a) + Py_SIZE(b);

983

if (size < 0) {

984

PyErr_SetString(PyExc_OverflowError,

985

"strings are too large to concat");

986

return NULL;

987

}

988

989

/* Inline PyObject_NewVar */

990

op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);

991

if (op == NULL)

992

return PyErr_NoMemory();

993

PyObject_INIT_VAR(op, &PyString_Type, size);

994

op->ob_shash = -1;

995

op->ob_sstate = SSTATE_NOT_INTERNED;

996

Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));

997

Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));

998

op->ob_sval[size] = '\0';

999

return (PyObject *) op;

1000

#undef b

1001

}

1002

1003

static PyObject *

1004

string_repeat(register PyStringObject *a, register Py_ssize_t n)

1005

{

1006

register Py_ssize_t i;

1007

register Py_ssize_t j;

1008

register Py_ssize_t size;

1009

register PyStringObject *op;

1010

size_t nbytes;

1011

if (n < 0)

1012

n = 0;

1013

/* watch out for overflows: the size can overflow int,

1014

* and the # of bytes needed can overflow size_t

1015

*/

1016

size = Py_SIZE(a) * n;

1017

if (n && size / n != Py_SIZE(a)) {

1018

PyErr_SetString(PyExc_OverflowError,

1019

"repeated string is too long");

1020

return NULL;

1021

}

1022

if (size == Py_SIZE(a) && PyString_CheckExact(a)) {

1023

Py_INCREF(a);

1024

return (PyObject *)a;

1025

}

1026

nbytes = (size_t)size;

1027

if (nbytes + sizeof(PyStringObject) <= nbytes) {

1028

PyErr_SetString(PyExc_OverflowError,

1029

"repeated string is too long");

1030

return NULL;

1031

}

1032

op = (PyStringObject *)

1033

PyObject_MALLOC(sizeof(PyStringObject) + nbytes);

1034

if (op == NULL)

1035

return PyErr_NoMemory();

1036

PyObject_INIT_VAR(op, &PyString_Type, size);

1037

op->ob_shash = -1;

1038

op->ob_sstate = SSTATE_NOT_INTERNED;

1039

op->ob_sval[size] = '\0';

1040

if (Py_SIZE(a) == 1 && n > 0) {

1041

memset(op->ob_sval, a->ob_sval[0] , n);

1042

return (PyObject *) op;

1043

}

1044

i = 0;

1045

if (i < size) {

1046

Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));

1047

i = Py_SIZE(a);

1048

}

1049

while (i < size) {

1050

j = (i <= size-i) ? i : size-i;

1051

Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);

1052

i += j;

1053

}

1054

return (PyObject *) op;

1055

}

1056

1057

/* String slice a[i:j] consists of characters a[i] ... a[j-1] */

1058

1059

static PyObject *

1060

string_slice(register PyStringObject *a, register Py_ssize_t i,

1061

register Py_ssize_t j)

1062

/* j -- may be negative! */

1063

{

1064

if (i < 0)

1065

i = 0;

1066

if (j < 0)

1067

j = 0; /* Avoid signed/unsigned bug in next line */

1068

if (j > Py_SIZE(a))

1069

j = Py_SIZE(a);

1070

if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {

1071

/* It's the same as a */

1072

Py_INCREF(a);

1073

return (PyObject *)a;

1074

}

1075

if (j < i)

1076

j = i;

1077

return PyString_FromStringAndSize(a->ob_sval + i, j-i);

1078

}

1079

1080

static int

1081

string_contains(PyObject *str_obj, PyObject *sub_obj)

1082

{

1083

if (!PyString_CheckExact(sub_obj)) {

1084

#ifdef Py_USING_UNICODE

1085

if (PyUnicode_Check(sub_obj))

1086

return PyUnicode_Contains(str_obj, sub_obj);

1087

#endif

1088

if (!PyString_Check(sub_obj)) {

1089

PyErr_Format(PyExc_TypeError,

1090

"'in <string>' requires string as left operand, "

1091

"not %.200s", Py_TYPE(sub_obj)->tp_name);

1092

return -1;

1093

}

1094

}

1095

1096

return stringlib_contains_obj(str_obj, sub_obj);

1097

}

1098

1099

static PyObject *

1100

string_item(PyStringObject *a, register Py_ssize_t i)

1101

{

1102

char pchar;

1103

PyObject *v;

1104

if (i < 0 || i >= Py_SIZE(a)) {

1105

PyErr_SetString(PyExc_IndexError, "string index out of range");

1106

return NULL;

1107

}

1108

pchar = a->ob_sval[i];

1109

v = (PyObject *)characters[pchar & UCHAR_MAX];

1110

if (v == NULL)

1111

v = PyString_FromStringAndSize(&pchar, 1);

1112

else {

1113

#ifdef COUNT_ALLOCS

1114

one_strings++;

1115

#endif

1116

Py_INCREF(v);

1117

}

1118

return v;

1119

}

1120

1121

static PyObject*

1122

string_richcompare(PyStringObject *a, PyStringObject *b, int op)

1123

{

1124

int c;

1125

Py_ssize_t len_a, len_b;

1126

Py_ssize_t min_len;

1127

PyObject *result;

1128

1129

/* Make sure both arguments are strings. */

1130

if (!(PyString_Check(a) && PyString_Check(b))) {

1131

result = Py_NotImplemented;

1132

goto out;

1133

}

1134

if (a == b) {

1135

switch (op) {

1136

case Py_EQ:case Py_LE:case Py_GE:

1137

result = Py_True;

1138

goto out;

1139

case Py_NE:case Py_LT:case Py_GT:

1140

result = Py_False;

1141

goto out;

1142

}

1143

}

1144

if (op == Py_EQ) {

1145

/* Supporting Py_NE here as well does not save

1146

much time, since Py_NE is rarely used. */

1147

if (Py_SIZE(a) == Py_SIZE(b)

1148

&& (a->ob_sval[0] == b->ob_sval[0]

1149

&& memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {

1150

result = Py_True;

1151

} else {

1152

result = Py_False;

1153

}

1154

goto out;

1155

}

1156

len_a = Py_SIZE(a); len_b = Py_SIZE(b);

1157

min_len = (len_a < len_b) ? len_a : len_b;

1158

if (min_len > 0) {

1159

c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);

1160

if (c==0)

1161

c = memcmp(a->ob_sval, b->ob_sval, min_len);

1162

} else

1163

c = 0;

1164

if (c == 0)

1165

c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;

1166

switch (op) {

1167

case Py_LT: c = c < 0; break;

1168

case Py_LE: c = c <= 0; break;

1169

case Py_EQ: assert(0); break; /* unreachable */

1170

case Py_NE: c = c != 0; break;

1171

case Py_GT: c = c > 0; break;

1172

case Py_GE: c = c >= 0; break;

1173

default:

1174

result = Py_NotImplemented;

1175

goto out;

1176

}

1177

result = c ? Py_True : Py_False;

1178

out:

1179

Py_INCREF(result);

1180

return result;

1181

}

1182

1183

int

1184

_PyString_Eq(PyObject *o1, PyObject *o2)

1185

{

1186

PyStringObject *a = (PyStringObject*) o1;

1187

PyStringObject *b = (PyStringObject*) o2;

1188

return Py_SIZE(a) == Py_SIZE(b)

1189

&& *a->ob_sval == *b->ob_sval

1190

&& memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;

1191

}

1192

1193

static long

1194

string_hash(PyStringObject *a)

1195

{

1196

register Py_ssize_t len;

1197

register unsigned char *p;

1198

register long x;

1199

1200

if (a->ob_shash != -1)

1201

return a->ob_shash;

1202

len = Py_SIZE(a);

1203

p = (unsigned char *) a->ob_sval;

1204

x = *p << 7;

1205

while (--len >= 0)

1206

x = (1000003*x) ^ *p++;

1207

x ^= Py_SIZE(a);

1208

if (x == -1)

1209

x = -2;

1210

a->ob_shash = x;

1211

return x;

1212

}

1213

1214

static PyObject*

1215

string_subscript(PyStringObject* self, PyObject* item)

1216

{

1217

if (PyIndex_Check(item)) {

1218

Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);

1219

if (i == -1 && PyErr_Occurred())

1220

return NULL;

1221

if (i < 0)

1222

i += PyString_GET_SIZE(self);

1223

return string_item(self, i);

1224

}

1225

else if (PySlice_Check(item)) {

1226

Py_ssize_t start, stop, step, slicelength, cur, i;

1227

char* source_buf;

1228

char* result_buf;

1229

PyObject* result;

1230

1231

if (PySlice_GetIndicesEx((PySliceObject*)item,

1232

PyString_GET_SIZE(self),

1233

&start, &stop, &step, &slicelength) < 0) {

1234

return NULL;

1235

}

1236

1237

if (slicelength <= 0) {

1238

return PyString_FromStringAndSize("", 0);

1239

}

1240

else if (start == 0 && step == 1 &&

1241

slicelength == PyString_GET_SIZE(self) &&

1242

PyString_CheckExact(self)) {

1243

Py_INCREF(self);

1244

return (PyObject *)self;

1245

}

1246

else if (step == 1) {

1247

return PyString_FromStringAndSize(

1248

PyString_AS_STRING(self) + start,

1249

slicelength);

1250

}

1251

else {

1252

source_buf = PyString_AsString((PyObject*)self);

1253

result_buf = (char *)PyMem_Malloc(slicelength);

1254

if (result_buf == NULL)

1255

return PyErr_NoMemory();

1256

1257

for (cur = start, i = 0; i < slicelength;

1258

cur += step, i++) {

1259

result_buf[i] = source_buf[cur];

1260

}

1261

1262

result = PyString_FromStringAndSize(result_buf,

1263

slicelength);

1264

PyMem_Free(result_buf);

1265

return result;

1266

}

1267

}

1268

else {

1269

PyErr_Format(PyExc_TypeError,

1270

"string indices must be integers, not %.200s",

1271

Py_TYPE(item)->tp_name);

1272

return NULL;

1273

}

1274

}

1275

1276

static Py_ssize_t

1277

string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)

1278

{

1279

if ( index != 0 ) {

1280

PyErr_SetString(PyExc_SystemError,

1281

"accessing non-existent string segment");

1282

return -1;

1283

}

1284

*ptr = (void *)self->ob_sval;

1285

return Py_SIZE(self);

1286

}

1287

1288

static Py_ssize_t

1289

string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)

1290

{

1291

PyErr_SetString(PyExc_TypeError,

1292

"Cannot use string as modifiable buffer");

1293

return -1;

1294

}

1295

1296

static Py_ssize_t

1297

string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)

1298

{

1299

if ( lenp )

1300

*lenp = Py_SIZE(self);

1301

return 1;

1302

}

1303

1304

static Py_ssize_t

1305

string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)

1306

{

1307

if ( index != 0 ) {

1308

PyErr_SetString(PyExc_SystemError,

1309

"accessing non-existent string segment");

1310

return -1;

1311

}

1312

*ptr = self->ob_sval;

1313

return Py_SIZE(self);

1314

}

1315

1316

static int

1317

string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)

1318

{

1319

return PyBuffer_FillInfo(view, (void *)self->ob_sval, Py_SIZE(self),

1320

0, flags);

1321

}

1322

1323

static PySequenceMethods string_as_sequence = {

1324

(lenfunc)string_length, /*sq_length*/

1325

(binaryfunc)string_concat, /*sq_concat*/

1326

(ssizeargfunc)string_repeat, /*sq_repeat*/

1327

(ssizeargfunc)string_item, /*sq_item*/

1328

(ssizessizeargfunc)string_slice, /*sq_slice*/

1329

0, /*sq_ass_item*/

1330

0, /*sq_ass_slice*/

1331

(objobjproc)string_contains /*sq_contains*/

1332

};

1333

1334

static PyMappingMethods string_as_mapping = {

1335

(lenfunc)string_length,

1336

(binaryfunc)string_subscript,

1337

0,

1338

};

1339

1340

static PyBufferProcs string_as_buffer = {

1341

(readbufferproc)string_buffer_getreadbuf,

1342

(writebufferproc)string_buffer_getwritebuf,

1343

(segcountproc)string_buffer_getsegcount,

1344

(charbufferproc)string_buffer_getcharbuf,

1345

(getbufferproc)string_buffer_getbuffer,

1346

0, /* XXX */

1347

};

1348

1349

1350

1351

#define LEFTSTRIP 0

1352

#define RIGHTSTRIP 1

1353

#define BOTHSTRIP 2

1354

1355

/* Arrays indexed by above */

1356

static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};

1357

1358

#define STRIPNAME(i) (stripformat[i]+3)

1359

1360

1361

/* Don't call if length < 2 */

1362

#define Py_STRING_MATCH(target, offset, pattern, length) \

1363

(target[offset] == pattern[0] && \

1364

target[offset+length-1] == pattern[length-1] && \

1365

!memcmp(target+offset+1, pattern+1, length-2) )

1366

1367

1368

/* Overallocate the initial list to reduce the number of reallocs for small

1369

split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three

1370

resizes, to sizes 4, 8, then 16. Most observed string splits are for human

1371

text (roughly 11 words per line) and field delimited data (usually 1-10

1372

fields). For large strings the split algorithms are bandwidth limited

1373

so increasing the preallocation likely will not improve things.*/

1374

1375

#define MAX_PREALLOC 12

1376

1377

/* 5 splits gives 6 elements */

1378

#define PREALLOC_SIZE(maxsplit) \

1379

(maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)

1380

1381

#define SPLIT_APPEND(data, left, right) \

1382

str = PyString_FromStringAndSize((data) + (left), \

1383

(right) - (left)); \

1384

if (str == NULL) \

1385

goto onError; \

1386

if (PyList_Append(list, str)) { \

1387

Py_DECREF(str); \

1388

goto onError; \

1389

} \

1390

else \

1391

Py_DECREF(str);

1392

1393

#define SPLIT_ADD(data, left, right) { \

1394

str = PyString_FromStringAndSize((data) + (left), \

1395

(right) - (left)); \

1396

if (str == NULL) \

1397

goto onError; \

1398

if (count < MAX_PREALLOC) { \

1399

PyList_SET_ITEM(list, count, str); \

1400

} else { \

1401

if (PyList_Append(list, str)) { \

1402

Py_DECREF(str); \

1403

goto onError; \

1404

} \

1405

else \

1406

Py_DECREF(str); \

1407

} \

1408

count++; }

1409

1410

/* Always force the list to the expected size. */

1411

#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count

1412

1413

#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }

1414

#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }

1415

#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }

1416

#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }

1417

1418

Py_LOCAL_INLINE(PyObject *)

1419

split_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)

1420

{

1421

const char *s = PyString_AS_STRING(self);

1422

Py_ssize_t i, j, count=0;

1423

PyObject *str;

1424

PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));

1425

1426

if (list == NULL)

1427

return NULL;

1428

1429

i = j = 0;

1430

1431

while (maxsplit-- > 0) {

1432

SKIP_SPACE(s, i, len);

1433

if (i==len) break;

1434

j = i; i++;

1435

SKIP_NONSPACE(s, i, len);

1436

if (j == 0 && i == len && PyString_CheckExact(self)) {

1437

/* No whitespace in self, so just use it as list[0] */

1438

Py_INCREF(self);

1439

PyList_SET_ITEM(list, 0, (PyObject *)self);

1440

count++;

1441

break;

1442

}

1443

SPLIT_ADD(s, j, i);

1444

}

1445

1446

if (i < len) {

1447

/* Only occurs when maxsplit was reached */

1448

/* Skip any remaining whitespace and copy to end of string */

1449

SKIP_SPACE(s, i, len);

1450

if (i != len)

1451

SPLIT_ADD(s, i, len);

1452

}

1453

FIX_PREALLOC_SIZE(list);

1454

return list;

1455

onError:

1456

Py_DECREF(list);

1457

return NULL;

1458

}

1459

1460

Py_LOCAL_INLINE(PyObject *)

1461

split_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)

1462

{

1463

const char *s = PyString_AS_STRING(self);

1464

register Py_ssize_t i, j, count=0;

1465

PyObject *str;

1466

PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));

1467

1468

if (list == NULL)

1469

return NULL;

1470

1471

i = j = 0;

1472

while ((j < len) && (maxcount-- > 0)) {

1473

for(; j<len; j++) {

1474

/* I found that using memchr makes no difference */

1475

if (s[j] == ch) {

1476

SPLIT_ADD(s, i, j);

1477

i = j = j + 1;

1478

break;

1479

}

1480

}

1481

}

1482

if (i == 0 && count == 0 && PyString_CheckExact(self)) {

1483

/* ch not in self, so just use self as list[0] */

1484

Py_INCREF(self);

1485

PyList_SET_ITEM(list, 0, (PyObject *)self);

1486

count++;

1487

}

1488

else if (i <= len) {

1489

SPLIT_ADD(s, i, len);

1490

}

1491

FIX_PREALLOC_SIZE(list);

1492

return list;

1493

1494

onError:

1495

Py_DECREF(list);

1496

return NULL;

1497

}

1498

1499

PyDoc_STRVAR(split__doc__,

1500

"S.split([sep [,maxsplit]]) -> list of strings\n\

1501

\n\

1502

Return a list of the words in the string S, using sep as the\n\

1503

delimiter string. If maxsplit is given, at most maxsplit\n\

1504

splits are done. If sep is not specified or is None, any\n\

1505

whitespace string is a separator and empty strings are removed\n\

1506

from the result.");

1507

1508

static PyObject *

1509

string_split(PyStringObject *self, PyObject *args)

1510

{

1511

Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;

1512

Py_ssize_t maxsplit = -1, count=0;

1513

const char *s = PyString_AS_STRING(self), *sub;

1514

PyObject *list, *str, *subobj = Py_None;

1515

#ifdef USE_FAST

1516

Py_ssize_t pos;

1517

#endif

1518

1519

if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))

1520

return NULL;

1521

if (maxsplit < 0)

1522

maxsplit = PY_SSIZE_T_MAX;

1523

if (subobj == Py_None)

1524

return split_whitespace(self, len, maxsplit);

1525

if (PyString_Check(subobj)) {

1526

sub = PyString_AS_STRING(subobj);

1527

n = PyString_GET_SIZE(subobj);

1528

}

1529

#ifdef Py_USING_UNICODE

1530

else if (PyUnicode_Check(subobj))

1531

return PyUnicode_Split((PyObject *)self, subobj, maxsplit);

1532

#endif

1533

else if (PyObject_AsCharBuffer(subobj, &sub, &n))

1534

return NULL;

1535

1536

if (n == 0) {

1537

PyErr_SetString(PyExc_ValueError, "empty separator");

1538

return NULL;

1539

}

1540

else if (n == 1)

1541

return split_char(self, len, sub[0], maxsplit);

1542

1543

list = PyList_New(PREALLOC_SIZE(maxsplit));

1544

if (list == NULL)

1545

return NULL;

1546

1547

#ifdef USE_FAST

1548

i = j = 0;

1549

while (maxsplit-- > 0) {

1550

pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);

1551

if (pos < 0)

1552

break;

1553

j = i+pos;

1554

SPLIT_ADD(s, i, j);

1555

i = j + n;

1556

}

1557

#else

1558

i = j = 0;

1559

while ((j+n <= len) && (maxsplit-- > 0)) {

1560

for (; j+n <= len; j++) {

1561

if (Py_STRING_MATCH(s, j, sub, n)) {

1562

SPLIT_ADD(s, i, j);

1563

i = j = j + n;

1564

break;

1565

}

1566

}

1567

}

1568

#endif

1569

SPLIT_ADD(s, i, len);

1570

FIX_PREALLOC_SIZE(list);

1571

return list;

1572

1573

onError:

1574

Py_DECREF(list);

1575

return NULL;

1576

}

1577

1578

PyDoc_STRVAR(partition__doc__,

1579

"S.partition(sep) -> (head, sep, tail)\n\

1580

\n\

1581

Searches for the separator sep in S, and returns the part before it,\n\

1582

the separator itself, and the part after it. If the separator is not\n\

1583

found, returns S and two empty strings.");

1584

1585

static PyObject *

1586

string_partition(PyStringObject *self, PyObject *sep_obj)

1587

{

1588

const char *sep;

1589

Py_ssize_t sep_len;

1590

1591

if (PyString_Check(sep_obj)) {

1592

sep = PyString_AS_STRING(sep_obj);

1593

sep_len = PyString_GET_SIZE(sep_obj);

1594

}

1595

#ifdef Py_USING_UNICODE

1596

else if (PyUnicode_Check(sep_obj))

1597

return PyUnicode_Partition((PyObject *) self, sep_obj);

1598

#endif

1599

else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))

1600

return NULL;

1601

1602

return stringlib_partition(

1603

(PyObject*) self,

1604

PyString_AS_STRING(self), PyString_GET_SIZE(self),

1605

sep_obj, sep, sep_len

1606

);

1607

}

1608

1609

PyDoc_STRVAR(rpartition__doc__,

1610

"S.rpartition(sep) -> (tail, sep, head)\n\

1611

\n\

1612

Searches for the separator sep in S, starting at the end of S, and returns\n\

1613

the part before it, the separator itself, and the part after it. If the\n\

1614

separator is not found, returns two empty strings and S.");

1615

1616

static PyObject *

1617

string_rpartition(PyStringObject *self, PyObject *sep_obj)

1618

{

1619

const char *sep;

1620

Py_ssize_t sep_len;

1621

1622

if (PyString_Check(sep_obj)) {

1623

sep = PyString_AS_STRING(sep_obj);

1624

sep_len = PyString_GET_SIZE(sep_obj);

1625

}

1626

#ifdef Py_USING_UNICODE

1627

else if (PyUnicode_Check(sep_obj))

1628

return PyUnicode_Partition((PyObject *) self, sep_obj);

1629

#endif

1630

else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))

1631

return NULL;

1632

1633

return stringlib_rpartition(

1634

(PyObject*) self,

1635

PyString_AS_STRING(self), PyString_GET_SIZE(self),

1636

sep_obj, sep, sep_len

1637

);

1638

}

1639

1640

Py_LOCAL_INLINE(PyObject *)

1641

rsplit_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)

1642

{

1643

const char *s = PyString_AS_STRING(self);

1644

Py_ssize_t i, j, count=0;

1645

PyObject *str;

1646

PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));

1647

1648

if (list == NULL)

1649

return NULL;

1650

1651

i = j = len-1;

1652

1653

while (maxsplit-- > 0) {

1654

RSKIP_SPACE(s, i);

1655

if (i<0) break;

1656

j = i; i--;

1657

RSKIP_NONSPACE(s, i);

1658

if (j == len-1 && i < 0 && PyString_CheckExact(self)) {

1659

/* No whitespace in self, so just use it as list[0] */

1660

Py_INCREF(self);

1661

PyList_SET_ITEM(list, 0, (PyObject *)self);

1662

count++;

1663

break;

1664

}

1665

SPLIT_ADD(s, i + 1, j + 1);

1666

}

1667

if (i >= 0) {

1668

/* Only occurs when maxsplit was reached */

1669

/* Skip any remaining whitespace and copy to beginning of string */

1670

RSKIP_SPACE(s, i);

1671

if (i >= 0)

1672

SPLIT_ADD(s, 0, i + 1);

1673

1674

}

1675

FIX_PREALLOC_SIZE(list);

1676

if (PyList_Reverse(list) < 0)

1677

goto onError;

1678

return list;

1679

onError:

1680

Py_DECREF(list);

1681

return NULL;

1682

}

1683

1684

Py_LOCAL_INLINE(PyObject *)

1685

rsplit_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)

1686

{

1687

const char *s = PyString_AS_STRING(self);

1688

register Py_ssize_t i, j, count=0;

1689

PyObject *str;

1690

PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));

1691

1692

if (list == NULL)

1693

return NULL;

1694

1695

i = j = len - 1;

1696

while ((i >= 0) && (maxcount-- > 0)) {

1697

for (; i >= 0; i--) {

1698

if (s[i] == ch) {

1699

SPLIT_ADD(s, i + 1, j + 1);

1700

j = i = i - 1;

1701

break;

1702

}

1703

}

1704

}

1705

if (i < 0 && count == 0 && PyString_CheckExact(self)) {

1706

/* ch not in self, so just use self as list[0] */

1707

Py_INCREF(self);

1708

PyList_SET_ITEM(list, 0, (PyObject *)self);

1709

count++;

1710

}

1711

else if (j >= -1) {

1712

SPLIT_ADD(s, 0, j + 1);

1713

}

1714

FIX_PREALLOC_SIZE(list);

1715

if (PyList_Reverse(list) < 0)

1716

goto onError;

1717

return list;

1718

1719

onError:

1720

Py_DECREF(list);

1721

return NULL;

1722

}

1723

1724

PyDoc_STRVAR(rsplit__doc__,

1725

"S.rsplit([sep [,maxsplit]]) -> list of strings\n\

1726

\n\

1727

Return a list of the words in the string S, using sep as the\n\

1728

delimiter string, starting at the end of the string and working\n\

1729

to the front. If maxsplit is given, at most maxsplit splits are\n\

1730

done. If sep is not specified or is None, any whitespace string\n\

1731

is a separator.");

1732

1733

static PyObject *

1734

string_rsplit(PyStringObject *self, PyObject *args)

1735

{

1736

Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;

1737

Py_ssize_t maxsplit = -1, count=0;

1738

const char *s, *sub;

1739

PyObject *list, *str, *subobj = Py_None;

1740

1741

if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))

1742

return NULL;

1743

if (maxsplit < 0)

1744

maxsplit = PY_SSIZE_T_MAX;

1745

if (subobj == Py_None)

1746

return rsplit_whitespace(self, len, maxsplit);

1747

if (PyString_Check(subobj)) {

1748

sub = PyString_AS_STRING(subobj);

1749

n = PyString_GET_SIZE(subobj);

1750

}

1751

#ifdef Py_USING_UNICODE

1752

else if (PyUnicode_Check(subobj))

1753

return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);

1754

#endif

1755

else if (PyObject_AsCharBuffer(subobj, &sub, &n))

1756

return NULL;

1757

1758

if (n == 0) {

1759

PyErr_SetString(PyExc_ValueError, "empty separator");

1760

return NULL;

1761

}

1762

else if (n == 1)

1763

return rsplit_char(self, len, sub[0], maxsplit);

1764

1765

list = PyList_New(PREALLOC_SIZE(maxsplit));

1766

if (list == NULL)

1767

return NULL;

1768

1769

j = len;

1770

i = j - n;

1771

1772

s = PyString_AS_STRING(self);

1773

while ( (i >= 0) && (maxsplit-- > 0) ) {

1774

for (; i>=0; i--) {

1775

if (Py_STRING_MATCH(s, i, sub, n)) {

1776

SPLIT_ADD(s, i + n, j);

1777

j = i;

1778

i -= n;

1779

break;

1780

}

1781

}

1782

}

1783

SPLIT_ADD(s, 0, j);

1784

FIX_PREALLOC_SIZE(list);

1785

if (PyList_Reverse(list) < 0)

1786

goto onError;

1787

return list;

1788

1789

onError:

1790

Py_DECREF(list);

1791

return NULL;

1792

}

1793

1794

1795

PyDoc_STRVAR(join__doc__,

1796

"S.join(sequence) -> string\n\

1797

\n\

1798

Return a string which is the concatenation of the strings in the\n\

1799

sequence. The separator between elements is S.");

1800

1801

static PyObject *

1802

string_join(PyStringObject *self, PyObject *orig)

1803

{

1804

char *sep = PyString_AS_STRING(self);

1805

const Py_ssize_t seplen = PyString_GET_SIZE(self);

1806

PyObject *res = NULL;

1807

char *p;

1808

Py_ssize_t seqlen = 0;

1809

size_t sz = 0;

1810

Py_ssize_t i;

1811

PyObject *seq, *item;

1812

1813

seq = PySequence_Fast(orig, "");

1814

if (seq == NULL) {

1815

return NULL;

1816

}

1817

1818

seqlen = PySequence_Size(seq);

1819

if (seqlen == 0) {

1820

Py_DECREF(seq);

1821

return PyString_FromString("");

1822

}

1823

if (seqlen == 1) {

1824

item = PySequence_Fast_GET_ITEM(seq, 0);

1825

if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {

1826

Py_INCREF(item);

1827

Py_DECREF(seq);

1828

return item;

1829

}

1830

}

1831

1832

/* There are at least two things to join, or else we have a subclass

1833

* of the builtin types in the sequence.

1834

* Do a pre-pass to figure out the total amount of space we'll

1835

* need (sz), see whether any argument is absurd, and defer to

1836

* the Unicode join if appropriate.

1837

*/

1838

for (i = 0; i < seqlen; i++) {

1839

const size_t old_sz = sz;

1840

item = PySequence_Fast_GET_ITEM(seq, i);

1841

if (!PyString_Check(item)){

1842

#ifdef Py_USING_UNICODE

1843

if (PyUnicode_Check(item)) {

1844

/* Defer to Unicode join.

1845

* CAUTION: There's no gurantee that the

1846

* original sequence can be iterated over

1847

* again, so we must pass seq here.

1848

*/

1849

PyObject *result;

1850

result = PyUnicode_Join((PyObject *)self, seq);

1851

Py_DECREF(seq);

1852

return result;

1853

}

1854

#endif

1855

PyErr_Format(PyExc_TypeError,

1856

"sequence item %zd: expected string,"

1857

" %.80s found",

1858

i, Py_TYPE(item)->tp_name);

1859

Py_DECREF(seq);

1860

return NULL;

1861

}

1862

sz += PyString_GET_SIZE(item);

1863

if (i != 0)

1864

sz += seplen;

1865

if (sz < old_sz || sz > PY_SSIZE_T_MAX) {

1866

PyErr_SetString(PyExc_OverflowError,

1867

"join() result is too long for a Python string");

1868

Py_DECREF(seq);

1869

return NULL;

1870

}

1871

}

1872

1873

/* Allocate result space. */

1874

res = PyString_FromStringAndSize((char*)NULL, sz);

1875

if (res == NULL) {

1876

Py_DECREF(seq);

1877

return NULL;

1878

}

1879

1880

/* Catenate everything. */

1881

p = PyString_AS_STRING(res);

1882

for (i = 0; i < seqlen; ++i) {

1883

size_t n;

1884

item = PySequence_Fast_GET_ITEM(seq, i);

1885

n = PyString_GET_SIZE(item);

1886

Py_MEMCPY(p, PyString_AS_STRING(item), n);

1887

p += n;

1888

if (i < seqlen - 1) {

1889

Py_MEMCPY(p, sep, seplen);

1890

p += seplen;

1891

}

1892

}

1893

1894

Py_DECREF(seq);

1895

return res;

1896

}

1897

1898

PyObject *

1899

_PyString_Join(PyObject *sep, PyObject *x)

1900

{

1901

assert(sep != NULL && PyString_Check(sep));

1902

assert(x != NULL);

1903

return string_join((PyStringObject *)sep, x);

1904

}

1905

1906

Py_LOCAL_INLINE(void)

1907

string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)

1908

{

1909

if (*end > len)

1910

*end = len;

1911

else if (*end < 0)

1912

*end += len;

1913

if (*end < 0)

1914

*end = 0;

1915

if (*start < 0)

1916

*start += len;

1917

if (*start < 0)

1918

*start = 0;

1919

}

1920

1921

Py_LOCAL_INLINE(Py_ssize_t)

1922

string_find_internal(PyStringObject *self, PyObject *args, int dir)

1923

{

1924

PyObject *subobj;

1925

const char *sub;

1926

Py_ssize_t sub_len;

1927

Py_ssize_t start=0, end=PY_SSIZE_T_MAX;

1928

PyObject *obj_start=Py_None, *obj_end=Py_None;

1929

1930

if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,

1931

&obj_start, &obj_end))

1932

return -2;

1933

/* To support None in "start" and "end" arguments, meaning

1934

the same as if they were not passed.

1935

*/

1936

if (obj_start != Py_None)

1937

if (!_PyEval_SliceIndex(obj_start, &start))

1938

return -2;

1939

if (obj_end != Py_None)

1940

if (!_PyEval_SliceIndex(obj_end, &end))

1941

return -2;

1942

1943

if (PyString_Check(subobj)) {

1944

sub = PyString_AS_STRING(subobj);

1945

sub_len = PyString_GET_SIZE(subobj);

1946

}

1947

#ifdef Py_USING_UNICODE

1948

else if (PyUnicode_Check(subobj))

1949

return PyUnicode_Find(

1950

(PyObject *)self, subobj, start, end, dir);

1951

#endif

1952

else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))

1953

/* XXX - the "expected a character buffer object" is pretty

1954

confusing for a non-expert. remap to something else ? */

1955

return -2;

1956

1957

if (dir > 0)

1958

return stringlib_find_slice(

1959

PyString_AS_STRING(self), PyString_GET_SIZE(self),

1960

sub, sub_len, start, end);

1961

else

1962

return stringlib_rfind_slice(

1963

PyString_AS_STRING(self), PyString_GET_SIZE(self),

1964

sub, sub_len, start, end);

1965

}

1966

1967

1968

PyDoc_STRVAR(find__doc__,

1969

"S.find(sub [,start [,end]]) -> int\n\

1970

\n\

1971

Return the lowest index in S where substring sub is found,\n\

1972

such that sub is contained within s[start:end]. Optional\n\

1973

arguments start and end are interpreted as in slice notation.\n\

1974

\n\

1975

Return -1 on failure.");

1976

1977

static PyObject *

1978

string_find(PyStringObject *self, PyObject *args)

1979

{

1980

Py_ssize_t result = string_find_internal(self, args, +1);

1981

if (result == -2)

1982

return NULL;

1983

return PyInt_FromSsize_t(result);

1984

}

1985

1986

1987

PyDoc_STRVAR(index__doc__,

1988

"S.index(sub [,start [,end]]) -> int\n\

1989

\n\

1990

Like S.find() but raise ValueError when the substring is not found.");

1991

1992

static PyObject *

1993

string_index(PyStringObject *self, PyObject *args)

1994

{

1995

Py_ssize_t result = string_find_internal(self, args, +1);

1996

if (result == -2)

1997

return NULL;

1998

if (result == -1) {

1999

PyErr_SetString(PyExc_ValueError,

2000

"substring not found");

2001

return NULL;

2002

}

2003

return PyInt_FromSsize_t(result);

2004

}

2005

2006

2007

PyDoc_STRVAR(rfind__doc__,

2008

"S.rfind(sub [,start [,end]]) -> int\n\

2009

\n\

2010

Return the highest index in S where substring sub is found,\n\

2011

such that sub is contained within s[start:end]. Optional\n\

2012

arguments start and end are interpreted as in slice notation.\n\

2013

\n\

2014

Return -1 on failure.");

2015

2016

static PyObject *

2017

string_rfind(PyStringObject *self, PyObject *args)

2018

{

2019

Py_ssize_t result = string_find_internal(self, args, -1);

2020

if (result == -2)

2021

return NULL;

2022

return PyInt_FromSsize_t(result);

2023

}

2024

2025

2026

PyDoc_STRVAR(rindex__doc__,

2027

"S.rindex(sub [,start [,end]]) -> int\n\

2028

\n\

2029

Like S.rfind() but raise ValueError when the substring is not found.");

2030

2031

static PyObject *

2032

string_rindex(PyStringObject *self, PyObject *args)

2033

{

2034

Py_ssize_t result = string_find_internal(self, args, -1);

2035

if (result == -2)

2036

return NULL;

2037

if (result == -1) {

2038

PyErr_SetString(PyExc_ValueError,

2039

"substring not found");

2040

return NULL;

2041

}

2042

return PyInt_FromSsize_t(result);

2043

}

2044

2045

2046

Py_LOCAL_INLINE(PyObject *)

2047

do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)

2048

{

2049

char *s = PyString_AS_STRING(self);

2050

Py_ssize_t len = PyString_GET_SIZE(self);

2051

char *sep = PyString_AS_STRING(sepobj);

2052

Py_ssize_t seplen = PyString_GET_SIZE(sepobj);

2053

Py_ssize_t i, j;

2054

2055

i = 0;

2056

if (striptype != RIGHTSTRIP) {

2057

while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {

2058

i++;

2059

}

2060

}

2061

2062

j = len;

2063

if (striptype != LEFTSTRIP) {

2064

do {

2065

j--;

2066

} while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));

2067

j++;

2068

}

2069

2070

if (i == 0 && j == len && PyString_CheckExact(self)) {

2071

Py_INCREF(self);

2072

return (PyObject*)self;

2073

}

2074

else

2075

return PyString_FromStringAndSize(s+i, j-i);

2076

}

2077

2078

2079

Py_LOCAL_INLINE(PyObject *)

2080

do_strip(PyStringObject *self, int striptype)

2081

{

2082

char *s = PyString_AS_STRING(self);

2083

Py_ssize_t len = PyString_GET_SIZE(self), i, j;

2084

2085

i = 0;

2086

if (striptype != RIGHTSTRIP) {

2087

while (i < len && isspace(Py_CHARMASK(s[i]))) {

2088

i++;

2089

}

2090

}

2091

2092

j = len;

2093

if (striptype != LEFTSTRIP) {

2094

do {

2095

j--;

2096

} while (j >= i && isspace(Py_CHARMASK(s[j])));

2097

j++;

2098

}

2099

2100

if (i == 0 && j == len && PyString_CheckExact(self)) {

2101

Py_INCREF(self);

2102

return (PyObject*)self;

2103

}

2104

else

2105

return PyString_FromStringAndSize(s+i, j-i);

2106

}

2107

2108

2109

Py_LOCAL_INLINE(PyObject *)

2110

do_argstrip(PyStringObject *self, int striptype, PyObject *args)

2111

{

2112

PyObject *sep = NULL;

2113

2114

if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))

2115

return NULL;

2116

2117

if (sep != NULL && sep != Py_None) {

2118

if (PyString_Check(sep))

2119

return do_xstrip(self, striptype, sep);

2120

#ifdef Py_USING_UNICODE

2121

else if (PyUnicode_Check(sep)) {

2122

PyObject *uniself = PyUnicode_FromObject((PyObject *)self);

2123

PyObject *res;

2124

if (uniself==NULL)

2125

return NULL;

2126

res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,

2127

striptype, sep);

2128

Py_DECREF(uniself);

2129

return res;

2130

}

2131

#endif

2132

PyErr_Format(PyExc_TypeError,

2133

#ifdef Py_USING_UNICODE

2134

"%s arg must be None, str or unicode",

2135

#else

2136

"%s arg must be None or str",

2137

#endif

2138

STRIPNAME(striptype));

2139

return NULL;

2140

}

2141

2142

return do_strip(self, striptype);

2143

}

2144

2145

2146

PyDoc_STRVAR(strip__doc__,

2147

"S.strip([chars]) -> string or unicode\n\

2148

\n\

2149

Return a copy of the string S with leading and trailing\n\

2150

whitespace removed.\n\

2151

If chars is given and not None, remove characters in chars instead.\n\

2152

If chars is unicode, S will be converted to unicode before stripping");

2153

2154

static PyObject *

2155

string_strip(PyStringObject *self, PyObject *args)

2156

{

2157

if (PyTuple_GET_SIZE(args) == 0)

2158

return do_strip(self, BOTHSTRIP); /* Common case */

2159

else

2160

return do_argstrip(self, BOTHSTRIP, args);

2161

}

2162

2163

2164

PyDoc_STRVAR(lstrip__doc__,

2165

"S.lstrip([chars]) -> string or unicode\n\

2166

\n\

2167

Return a copy of the string S with leading whitespace removed.\n\

2168

If chars is given and not None, remove characters in chars instead.\n\

2169

If chars is unicode, S will be converted to unicode before stripping");

2170

2171

static PyObject *

2172

string_lstrip(PyStringObject *self, PyObject *args)

2173

{

2174

if (PyTuple_GET_SIZE(args) == 0)

2175

return do_strip(self, LEFTSTRIP); /* Common case */

2176

else

2177

return do_argstrip(self, LEFTSTRIP, args);

2178

}

2179

2180

2181

PyDoc_STRVAR(rstrip__doc__,

2182

"S.rstrip([chars]) -> string or unicode\n\

2183

\n\

2184

Return a copy of the string S with trailing whitespace removed.\n\

2185

If chars is given and not None, remove characters in chars instead.\n\

2186

If chars is unicode, S will be converted to unicode before stripping");

2187

2188

static PyObject *

2189

string_rstrip(PyStringObject *self, PyObject *args)

2190

{

2191

if (PyTuple_GET_SIZE(args) == 0)

2192

return do_strip(self, RIGHTSTRIP); /* Common case */

2193

else

2194

return do_argstrip(self, RIGHTSTRIP, args);

2195

}

2196

2197

2198

PyDoc_STRVAR(lower__doc__,

2199

"S.lower() -> string\n\

2200

\n\

2201

Return a copy of the string S converted to lowercase.");

2202

2203

/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */

2204

#ifndef _tolower

2205

#define _tolower tolower

2206

#endif

2207

2208

static PyObject *

2209

string_lower(PyStringObject *self)

2210

{

2211

char *s;

2212

Py_ssize_t i, n = PyString_GET_SIZE(self);

2213

PyObject *newobj;

2214

2215

newobj = PyString_FromStringAndSize(NULL, n);

2216

if (!newobj)

2217

return NULL;

2218

2219

s = PyString_AS_STRING(newobj);

2220

2221

Py_MEMCPY(s, PyString_AS_STRING(self), n);

2222

2223

for (i = 0; i < n; i++) {

2224

int c = Py_CHARMASK(s[i]);

2225

if (isupper(c))

2226

s[i] = _tolower(c);

2227

}

2228

2229

return newobj;

2230

}

2231

2232

PyDoc_STRVAR(upper__doc__,

2233

"S.upper() -> string\n\

2234

\n\

2235

Return a copy of the string S converted to uppercase.");

2236

2237

#ifndef _toupper

2238

#define _toupper toupper

2239

#endif

2240

2241

static PyObject *

2242

string_upper(PyStringObject *self)

2243

{

2244

char *s;

2245

Py_ssize_t i, n = PyString_GET_SIZE(self);

2246

PyObject *newobj;

2247

2248

newobj = PyString_FromStringAndSize(NULL, n);

2249

if (!newobj)

2250

return NULL;

2251

2252

s = PyString_AS_STRING(newobj);

2253

2254

Py_MEMCPY(s, PyString_AS_STRING(self), n);

2255

2256

for (i = 0; i < n; i++) {

2257

int c = Py_CHARMASK(s[i]);

2258

if (islower(c))

2259

s[i] = _toupper(c);

2260

}

2261

2262

return newobj;

2263

}

2264

2265

PyDoc_STRVAR(title__doc__,

2266

"S.title() -> string\n\

2267

\n\

2268

Return a titlecased version of S, i.e. words start with uppercase\n\

2269

characters, all remaining cased characters have lowercase.");

2270

2271

static PyObject*

2272

string_title(PyStringObject *self)

2273

{

2274

char *s = PyString_AS_STRING(self), *s_new;

2275

Py_ssize_t i, n = PyString_GET_SIZE(self);

2276

int previous_is_cased = 0;

2277

PyObject *newobj;

2278

2279

newobj = PyString_FromStringAndSize(NULL, n);

2280

if (newobj == NULL)

2281

return NULL;

2282

s_new = PyString_AsString(newobj);

2283

for (i = 0; i < n; i++) {

2284

int c = Py_CHARMASK(*s++);

2285

if (islower(c)) {

2286

if (!previous_is_cased)

2287

c = toupper(c);

2288

previous_is_cased = 1;

2289

} else if (isupper(c)) {

2290

if (previous_is_cased)

2291

c = tolower(c);

2292

previous_is_cased = 1;

2293

} else

2294

previous_is_cased = 0;

2295

*s_new++ = c;

2296

}

2297

return newobj;

2298

}

2299

2300

PyDoc_STRVAR(capitalize__doc__,

2301

"S.capitalize() -> string\n\

2302

\n\

2303

Return a copy of the string S with only its first character\n\

2304

capitalized.");

2305

2306

static PyObject *

2307

string_capitalize(PyStringObject *self)

2308

{

2309

char *s = PyString_AS_STRING(self), *s_new;

2310

Py_ssize_t i, n = PyString_GET_SIZE(self);

2311

PyObject *newobj;

2312

2313

newobj = PyString_FromStringAndSize(NULL, n);

2314

if (newobj == NULL)

2315

return NULL;

2316

s_new = PyString_AsString(newobj);

2317

if (0 < n) {

2318

int c = Py_CHARMASK(*s++);

2319

if (islower(c))

2320

*s_new = toupper(c);

2321

else

2322

*s_new = c;

2323

s_new++;

2324

}

2325

for (i = 1; i < n; i++) {

2326

int c = Py_CHARMASK(*s++);

2327

if (isupper(c))

2328

*s_new = tolower(c);

2329

else

2330

*s_new = c;

2331

s_new++;

2332

}

2333

return newobj;

2334

}

2335

2336

2337

PyDoc_STRVAR(count__doc__,

2338

"S.count(sub[, start[, end]]) -> int\n\

2339

\n\

2340

Return the number of non-overlapping occurrences of substring sub in\n\

2341

string S[start:end]. Optional arguments start and end are interpreted\n\

2342

as in slice notation.");

2343

2344

static PyObject *

2345

string_count(PyStringObject *self, PyObject *args)

2346

{

2347

PyObject *sub_obj;

2348

const char *str = PyString_AS_STRING(self), *sub;

2349

Py_ssize_t sub_len;

2350

Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;

2351

2352

if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,

2353

_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))

2354

return NULL;

2355

2356

if (PyString_Check(sub_obj)) {

2357

sub = PyString_AS_STRING(sub_obj);

2358

sub_len = PyString_GET_SIZE(sub_obj);

2359

}

2360

#ifdef Py_USING_UNICODE

2361

else if (PyUnicode_Check(sub_obj)) {

2362

Py_ssize_t count;

2363

count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);

2364

if (count == -1)

2365

return NULL;

2366

else

2367

return PyInt_FromSsize_t(count);

2368

}

2369

#endif

2370

else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))

2371

return NULL;

2372

2373

string_adjust_indices(&start, &end, PyString_GET_SIZE(self));

2374

2375

return PyInt_FromSsize_t(

2376

stringlib_count(str + start, end - start, sub, sub_len)

2377

);

2378

}

2379

2380

PyDoc_STRVAR(swapcase__doc__,

2381

"S.swapcase() -> string\n\

2382

\n\

2383

Return a copy of the string S with uppercase characters\n\

2384

converted to lowercase and vice versa.");

2385

2386

static PyObject *

2387

string_swapcase(PyStringObject *self)

2388

{

2389

char *s = PyString_AS_STRING(self), *s_new;

2390

Py_ssize_t i, n = PyString_GET_SIZE(self);

2391

PyObject *newobj;

2392

2393

newobj = PyString_FromStringAndSize(NULL, n);

2394

if (newobj == NULL)

2395

return NULL;

2396

s_new = PyString_AsString(newobj);

2397

for (i = 0; i < n; i++) {

2398

int c = Py_CHARMASK(*s++);

2399

if (islower(c)) {

2400

*s_new = toupper(c);

2401

}

2402

else if (isupper(c)) {

2403

*s_new = tolower(c);

2404

}

2405

else

2406

*s_new = c;

2407

s_new++;

2408

}

2409

return newobj;

2410

}

2411

2412

2413

PyDoc_STRVAR(translate__doc__,

2414

"S.translate(table [,deletechars]) -> string\n\

2415

\n\

2416

Return a copy of the string S, where all characters occurring\n\

2417

in the optional argument deletechars are removed, and the\n\

2418

remaining characters have been mapped through the given\n\

2419

translation table, which must be a string of length 256.");

2420

2421

static PyObject *

2422

string_translate(PyStringObject *self, PyObject *args)

2423

{

2424

register char *input, *output;

2425

const char *table;

2426

register Py_ssize_t i, c, changed = 0;

2427

PyObject *input_obj = (PyObject*)self;

2428

const char *output_start, *del_table=NULL;

2429

Py_ssize_t inlen, tablen, dellen = 0;

2430

PyObject *result;

2431

int trans_table[256];

2432

PyObject *tableobj, *delobj = NULL;

2433

2434

if (!PyArg_UnpackTuple(args, "translate", 1, 2,

2435

&tableobj, &delobj))

2436

return NULL;

2437

2438

if (PyString_Check(tableobj)) {

2439

table = PyString_AS_STRING(tableobj);

2440

tablen = PyString_GET_SIZE(tableobj);

2441

}

2442

else if (tableobj == Py_None) {

2443

table = NULL;

2444

tablen = 256;

2445

}

2446

#ifdef Py_USING_UNICODE

2447

else if (PyUnicode_Check(tableobj)) {

2448

/* Unicode .translate() does not support the deletechars

2449

parameter; instead a mapping to None will cause characters

2450

to be deleted. */

2451

if (delobj != NULL) {

2452

PyErr_SetString(PyExc_TypeError,

2453

"deletions are implemented differently for unicode");

2454

return NULL;

2455

}

2456

return PyUnicode_Translate((PyObject *)self, tableobj, NULL);

2457

}

2458

#endif

2459

else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))

2460

return NULL;

2461

2462

if (tablen != 256) {

2463

PyErr_SetString(PyExc_ValueError,

2464

"translation table must be 256 characters long");

2465

return NULL;

2466

}

2467

2468

if (delobj != NULL) {

2469

if (PyString_Check(delobj)) {

2470

del_table = PyString_AS_STRING(delobj);

2471

dellen = PyString_GET_SIZE(delobj);

2472

}

2473

#ifdef Py_USING_UNICODE

2474

else if (PyUnicode_Check(delobj)) {

2475

PyErr_SetString(PyExc_TypeError,

2476

"deletions are implemented differently for unicode");

2477

return NULL;

2478

}

2479

#endif

2480

else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))

2481

return NULL;

2482

}

2483

else {

2484

del_table = NULL;

2485

dellen = 0;

2486

}

2487

2488

inlen = PyString_GET_SIZE(input_obj);

2489

result = PyString_FromStringAndSize((char *)NULL, inlen);

2490

if (result == NULL)

2491

return NULL;

2492

output_start = output = PyString_AsString(result);

2493

input = PyString_AS_STRING(input_obj);

2494

2495

if (dellen == 0 && table != NULL) {

2496

/* If no deletions are required, use faster code */

2497

for (i = inlen; --i >= 0; ) {

2498

c = Py_CHARMASK(*input++);

2499

if (Py_CHARMASK((*output++ = table[c])) != c)

2500

changed = 1;

2501

}

2502

if (changed || !PyString_CheckExact(input_obj))

2503

return result;

2504

Py_DECREF(result);

2505

Py_INCREF(input_obj);

2506

return input_obj;

2507

}

2508

2509

if (table == NULL) {

2510

for (i = 0; i < 256; i++)

2511

trans_table[i] = Py_CHARMASK(i);

2512

} else {

2513

for (i = 0; i < 256; i++)

2514

trans_table[i] = Py_CHARMASK(table[i]);

2515

}

2516

2517

for (i = 0; i < dellen; i++)

2518

trans_table[(int) Py_CHARMASK(del_table[i])] = -1;

2519

2520

for (i = inlen; --i >= 0; ) {

2521

c = Py_CHARMASK(*input++);

2522

if (trans_table[c] != -1)

2523

if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)

2524

continue;

2525

changed = 1;

2526

}

2527

if (!changed && PyString_CheckExact(input_obj)) {

2528

Py_DECREF(result);

2529

Py_INCREF(input_obj);

2530

return input_obj;

2531

}

2532

/* Fix the size of the resulting string */

2533

if (inlen > 0)

2534

_PyString_Resize(&result, output - output_start);

2535

return result;

2536

}

2537

2538

2539

#define FORWARD 1

2540

#define REVERSE -1

2541

2542

/* find and count characters and substrings */

2543

2544

#define findchar(target, target_len, c) \

2545

((char *)memchr((const void *)(target), c, target_len))

2546

2547

/* String ops must return a string. */

2548

/* If the object is subclass of string, create a copy */

2549

Py_LOCAL(PyStringObject *)

2550

return_self(PyStringObject *self)

2551

{

2552

if (PyString_CheckExact(self)) {

2553

Py_INCREF(self);

2554

return self;

2555

}

2556

return (PyStringObject *)PyString_FromStringAndSize(

2557

PyString_AS_STRING(self),

2558

PyString_GET_SIZE(self));

2559

}

2560

2561

Py_LOCAL_INLINE(Py_ssize_t)

2562

countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)

2563

{

2564

Py_ssize_t count=0;

2565

const char *start=target;

2566

const char *end=target+target_len;

2567

2568

while ( (start=findchar(start, end-start, c)) != NULL ) {

2569

count++;

2570

if (count >= maxcount)

2571

break;

2572

start += 1;

2573

}

2574

return count;

2575

}

2576

2577

Py_LOCAL(Py_ssize_t)

2578

findstring(const char *target, Py_ssize_t target_len,

2579

const char *pattern, Py_ssize_t pattern_len,

2580

Py_ssize_t start,

2581

Py_ssize_t end,

2582

int direction)

2583

{

2584

if (start < 0) {

2585

start += target_len;

2586

if (start < 0)

2587

start = 0;

2588

}

2589

if (end > target_len) {

2590

end = target_len;

2591

} else if (end < 0) {

2592

end += target_len;

2593

if (end < 0)

2594

end = 0;

2595

}

2596

2597

/* zero-length substrings always match at the first attempt */

2598

if (pattern_len == 0)

2599

return (direction > 0) ? start : end;

2600

2601

end -= pattern_len;

2602

2603

if (direction < 0) {

2604

for (; end >= start; end--)

2605

if (Py_STRING_MATCH(target, end, pattern, pattern_len))

2606

return end;

2607

} else {

2608

for (; start <= end; start++)

2609

if (Py_STRING_MATCH(target, start, pattern, pattern_len))

2610

return start;

2611

}

2612

return -1;

2613

}

2614

2615

Py_LOCAL_INLINE(Py_ssize_t)

2616

countstring(const char *target, Py_ssize_t target_len,

2617

const char *pattern, Py_ssize_t pattern_len,

2618

Py_ssize_t start,

2619

Py_ssize_t end,

2620

int direction, Py_ssize_t maxcount)

2621

{

2622

Py_ssize_t count=0;

2623

2624

if (start < 0) {

2625

start += target_len;

2626

if (start < 0)

2627

start = 0;

2628

}

2629

if (end > target_len) {

2630

end = target_len;

2631

} else if (end < 0) {

2632

end += target_len;

2633

if (end < 0)

2634

end = 0;

2635

}

2636

2637

/* zero-length substrings match everywhere */

2638

if (pattern_len == 0 || maxcount == 0) {

2639

if (target_len+1 < maxcount)

2640

return target_len+1;

2641

return maxcount;

2642

}

2643

2644

end -= pattern_len;

2645

if (direction < 0) {

2646

for (; (end >= start); end--)

2647

if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {

2648

count++;

2649

if (--maxcount <= 0) break;

2650

end -= pattern_len-1;

2651

}

2652

} else {

2653

for (; (start <= end); start++)

2654

if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {

2655

count++;

2656

if (--maxcount <= 0)

2657

break;

2658

start += pattern_len-1;

2659

}

2660

}

2661

return count;

2662

}

2663

2664

2665

/* Algorithms for different cases of string replacement */

2666

2667

/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */

2668

Py_LOCAL(PyStringObject *)

2669

replace_interleave(PyStringObject *self,

2670

const char *to_s, Py_ssize_t to_len,

2671

Py_ssize_t maxcount)

2672

{

2673

char *self_s, *result_s;

2674

Py_ssize_t self_len, result_len;

2675

Py_ssize_t count, i, product;

2676

PyStringObject *result;

2677

2678

self_len = PyString_GET_SIZE(self);

2679

2680

/* 1 at the end plus 1 after every character */

2681

count = self_len+1;

2682

if (maxcount < count)

2683

count = maxcount;

2684

2685

/* Check for overflow */

2686

/* result_len = count * to_len + self_len; */

2687

product = count * to_len;

2688

if (product / to_len != count) {

2689

PyErr_SetString(PyExc_OverflowError,

2690

"replace string is too long");

2691

return NULL;

2692

}

2693

result_len = product + self_len;

2694

if (result_len < 0) {

2695

PyErr_SetString(PyExc_OverflowError,

2696

"replace string is too long");

2697

return NULL;

2698

}

2699

2700

if (! (result = (PyStringObject *)

2701

PyString_FromStringAndSize(NULL, result_len)) )

2702

return NULL;

2703

2704

self_s = PyString_AS_STRING(self);

2705

result_s = PyString_AS_STRING(result);

2706

2707

/* TODO: special case single character, which doesn't need memcpy */

2708

2709

/* Lay the first one down (guaranteed this will occur) */

2710

Py_MEMCPY(result_s, to_s, to_len);

2711

result_s += to_len;

2712

count -= 1;

2713

2714

for (i=0; i<count; i++) {

2715

*result_s++ = *self_s++;

2716

Py_MEMCPY(result_s, to_s, to_len);

2717

result_s += to_len;

2718

}

2719

2720

/* Copy the rest of the original string */

2721

Py_MEMCPY(result_s, self_s, self_len-i);

2722

2723

return result;

2724

}

2725

2726

/* Special case for deleting a single character */

2727

/* len(self)>=1, len(from)==1, to="", maxcount>=1 */

2728

Py_LOCAL(PyStringObject *)

2729

replace_delete_single_character(PyStringObject *self,

2730

char from_c, Py_ssize_t maxcount)

2731

{

2732

char *self_s, *result_s;

2733

char *start, *next, *end;

2734

Py_ssize_t self_len, result_len;

2735

Py_ssize_t count;

2736

PyStringObject *result;

2737

2738

self_len = PyString_GET_SIZE(self);

2739

self_s = PyString_AS_STRING(self);

2740

2741

count = countchar(self_s, self_len, from_c, maxcount);

2742

if (count == 0) {

2743

return return_self(self);

2744

}

2745

2746

result_len = self_len - count; /* from_len == 1 */

2747

assert(result_len>=0);

2748

2749

if ( (result = (PyStringObject *)

2750

PyString_FromStringAndSize(NULL, result_len)) == NULL)

2751

return NULL;

2752

result_s = PyString_AS_STRING(result);

2753

2754

start = self_s;

2755

end = self_s + self_len;

2756

while (count-- > 0) {

2757

next = findchar(start, end-start, from_c);

2758

if (next == NULL)

2759

break;

2760

Py_MEMCPY(result_s, start, next-start);

2761

result_s += (next-start);

2762

start = next+1;

2763

}

2764

Py_MEMCPY(result_s, start, end-start);

2765

2766

return result;

2767

}

2768

2769

/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */

2770

2771

Py_LOCAL(PyStringObject *)

2772

replace_delete_substring(PyStringObject *self,

2773

const char *from_s, Py_ssize_t from_len,

2774

Py_ssize_t maxcount) {

2775

char *self_s, *result_s;

2776

char *start, *next, *end;

2777

Py_ssize_t self_len, result_len;

2778

Py_ssize_t count, offset;

2779

PyStringObject *result;

2780

2781

self_len = PyString_GET_SIZE(self);

2782

self_s = PyString_AS_STRING(self);

2783

2784

count = countstring(self_s, self_len,

2785

from_s, from_len,

2786

0, self_len, 1,

2787

maxcount);

2788

2789

if (count == 0) {

2790

/* no matches */

2791

return return_self(self);

2792

}

2793

2794

result_len = self_len - (count * from_len);

2795

assert (result_len>=0);

2796

2797

if ( (result = (PyStringObject *)

2798

PyString_FromStringAndSize(NULL, result_len)) == NULL )

2799

return NULL;

2800

2801

result_s = PyString_AS_STRING(result);

2802

2803

start = self_s;

2804

end = self_s + self_len;

2805

while (count-- > 0) {

2806

offset = findstring(start, end-start,

2807

from_s, from_len,

2808

0, end-start, FORWARD);

2809

if (offset == -1)

2810

break;

2811

next = start + offset;

2812

2813

Py_MEMCPY(result_s, start, next-start);

2814

2815

result_s += (next-start);

2816

start = next+from_len;

2817

}

2818

Py_MEMCPY(result_s, start, end-start);

2819

return result;

2820

}

2821

2822

/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */

2823

Py_LOCAL(PyStringObject *)

2824

replace_single_character_in_place(PyStringObject *self,

2825

char from_c, char to_c,

2826

Py_ssize_t maxcount)

2827

{

2828

char *self_s, *result_s, *start, *end, *next;

2829

Py_ssize_t self_len;

2830

PyStringObject *result;

2831

2832

/* The result string will be the same size */

2833

self_s = PyString_AS_STRING(self);

2834

self_len = PyString_GET_SIZE(self);

2835

2836

next = findchar(self_s, self_len, from_c);

2837

2838

if (next == NULL) {

2839

/* No matches; return the original string */

2840

return return_self(self);

2841

}

2842

2843

/* Need to make a new string */

2844

result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);

2845

if (result == NULL)

2846

return NULL;

2847

result_s = PyString_AS_STRING(result);

2848

Py_MEMCPY(result_s, self_s, self_len);

2849

2850

/* change everything in-place, starting with this one */

2851

start = result_s + (next-self_s);

2852

*start = to_c;

2853

start++;

2854

end = result_s + self_len;

2855

2856

while (--maxcount > 0) {

2857

next = findchar(start, end-start, from_c);

2858

if (next == NULL)

2859

break;

2860

*next = to_c;

2861

start = next+1;

2862

}

2863

2864

return result;

2865

}

2866

2867

/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */

2868

Py_LOCAL(PyStringObject *)

2869

replace_substring_in_place(PyStringObject *self,

2870

const char *from_s, Py_ssize_t from_len,

2871

const char *to_s, Py_ssize_t to_len,

2872

Py_ssize_t maxcount)

2873

{

2874

char *result_s, *start, *end;

2875

char *self_s;

2876

Py_ssize_t self_len, offset;

2877

PyStringObject *result;

2878

2879

/* The result string will be the same size */

2880

2881

self_s = PyString_AS_STRING(self);

2882

self_len = PyString_GET_SIZE(self);

2883

2884

offset = findstring(self_s, self_len,

2885

from_s, from_len,

2886

0, self_len, FORWARD);

2887

if (offset == -1) {

2888

/* No matches; return the original string */

2889

return return_self(self);

2890

}

2891

2892

/* Need to make a new string */

2893

result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);

2894

if (result == NULL)

2895

return NULL;

2896

result_s = PyString_AS_STRING(result);

2897

Py_MEMCPY(result_s, self_s, self_len);

2898

2899

/* change everything in-place, starting with this one */

2900

start = result_s + offset;

2901

Py_MEMCPY(start, to_s, from_len);

2902

start += from_len;

2903

end = result_s + self_len;

2904

2905

while ( --maxcount > 0) {

2906

offset = findstring(start, end-start,

2907

from_s, from_len,

2908

0, end-start, FORWARD);

2909

if (offset==-1)

2910

break;

2911

Py_MEMCPY(start+offset, to_s, from_len);

2912

start += offset+from_len;

2913

}

2914

2915

return result;

2916

}

2917

2918

/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */

2919

Py_LOCAL(PyStringObject *)

2920

replace_single_character(PyStringObject *self,

2921

char from_c,

2922

const char *to_s, Py_ssize_t to_len,

2923

Py_ssize_t maxcount)

2924

{

2925

char *self_s, *result_s;

2926

char *start, *next, *end;

2927

Py_ssize_t self_len, result_len;

2928

Py_ssize_t count, product;

2929

PyStringObject *result;

2930

2931

self_s = PyString_AS_STRING(self);

2932

self_len = PyString_GET_SIZE(self);

2933

2934

count = countchar(self_s, self_len, from_c, maxcount);

2935

if (count == 0) {

2936

/* no matches, return unchanged */

2937

return return_self(self);

2938

}

2939

2940

/* use the difference between current and new, hence the "-1" */

2941

/* result_len = self_len + count * (to_len-1) */

2942

product = count * (to_len-1);

2943

if (product / (to_len-1) != count) {

2944

PyErr_SetString(PyExc_OverflowError, "replace string is too long");

2945

return NULL;

2946

}

2947

result_len = self_len + product;

2948

if (result_len < 0) {

2949

PyErr_SetString(PyExc_OverflowError, "replace string is too long");

2950

return NULL;

2951

}

2952

2953

if ( (result = (PyStringObject *)

2954

PyString_FromStringAndSize(NULL, result_len)) == NULL)

2955

return NULL;

2956

result_s = PyString_AS_STRING(result);

2957

2958

start = self_s;

2959

end = self_s + self_len;

2960

while (count-- > 0) {

2961

next = findchar(start, end-start, from_c);

2962

if (next == NULL)

2963

break;

2964

2965

if (next == start) {

2966

/* replace with the 'to' */

2967

Py_MEMCPY(result_s, to_s, to_len);

2968

result_s += to_len;

2969

start += 1;

2970

} else {

2971

/* copy the unchanged old then the 'to' */

2972

Py_MEMCPY(result_s, start, next-start);

2973

result_s += (next-start);

2974

Py_MEMCPY(result_s, to_s, to_len);

2975

result_s += to_len;

2976

start = next+1;

2977

}

2978

}

2979

/* Copy the remainder of the remaining string */

2980

Py_MEMCPY(result_s, start, end-start);

2981

2982

return result;

2983

}

2984

2985

/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */

2986

Py_LOCAL(PyStringObject *)

2987

replace_substring(PyStringObject *self,

2988

const char *from_s, Py_ssize_t from_len,

2989

const char *to_s, Py_ssize_t to_len,

2990

Py_ssize_t maxcount) {

2991

char *self_s, *result_s;

2992

char *start, *next, *end;

2993

Py_ssize_t self_len, result_len;

2994

Py_ssize_t count, offset, product;

2995

PyStringObject *result;

2996

2997

self_s = PyString_AS_STRING(self);

2998

self_len = PyString_GET_SIZE(self);

2999

3000

count = countstring(self_s, self_len,

3001

from_s, from_len,

3002

0, self_len, FORWARD, maxcount);

3003

if (count == 0) {

3004

/* no matches, return unchanged */

3005

return return_self(self);

3006

}

3007

3008

/* Check for overflow */

3009

/* result_len = self_len + count * (to_len-from_len) */

3010

product = count * (to_len-from_len);

3011

if (product / (to_len-from_len) != count) {

3012

PyErr_SetString(PyExc_OverflowError, "replace string is too long");

3013

return NULL;

3014

}

3015

result_len = self_len + product;

3016

if (result_len < 0) {

3017

PyErr_SetString(PyExc_OverflowError, "replace string is too long");

3018

return NULL;

3019

}

3020

3021

if ( (result = (PyStringObject *)

3022

PyString_FromStringAndSize(NULL, result_len)) == NULL)

3023

return NULL;

3024

result_s = PyString_AS_STRING(result);

3025

3026

start = self_s;

3027

end = self_s + self_len;

3028

while (count-- > 0) {

3029

offset = findstring(start, end-start,

3030

from_s, from_len,

3031

0, end-start, FORWARD);

3032

if (offset == -1)

3033

break;

3034

next = start+offset;

3035

if (next == start) {

3036

/* replace with the 'to' */

3037

Py_MEMCPY(result_s, to_s, to_len);

3038

result_s += to_len;

3039

start += from_len;

3040

} else {

3041

/* copy the unchanged old then the 'to' */

3042

Py_MEMCPY(result_s, start, next-start);

3043

result_s += (next-start);

3044

Py_MEMCPY(result_s, to_s, to_len);

3045

result_s += to_len;

3046

start = next+from_len;

3047

}

3048

}

3049

/* Copy the remainder of the remaining string */

3050

Py_MEMCPY(result_s, start, end-start);

3051

3052

return result;

3053

}

3054

3055

3056

Py_LOCAL(PyStringObject *)

3057

replace(PyStringObject *self,

3058

const char *from_s, Py_ssize_t from_len,

3059

const char *to_s, Py_ssize_t to_len,

3060

Py_ssize_t maxcount)

3061

{

3062

if (maxcount < 0) {

3063

maxcount = PY_SSIZE_T_MAX;

3064

} else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {

3065

/* nothing to do; return the original string */

3066

return return_self(self);

3067

}

3068

3069

if (maxcount == 0 ||

3070

(from_len == 0 && to_len == 0)) {

3071

/* nothing to do; return the original string */

3072

return return_self(self);

3073

}

3074

3075

/* Handle zero-length special cases */

3076

3077

if (from_len == 0) {

3078

/* insert the 'to' string everywhere. */

3079

/* >>> "Python".replace("", ".") */

3080

/* '.P.y.t.h.o.n.' */

3081

return replace_interleave(self, to_s, to_len, maxcount);

3082

}

3083

3084

/* Except for "".replace("", "A") == "A" there is no way beyond this */

3085

/* point for an empty self string to generate a non-empty string */

3086

/* Special case so the remaining code always gets a non-empty string */

3087

if (PyString_GET_SIZE(self) == 0) {

3088

return return_self(self);

3089

}

3090

3091

if (to_len == 0) {

3092

/* delete all occurances of 'from' string */

3093

if (from_len == 1) {

3094

return replace_delete_single_character(

3095

self, from_s[0], maxcount);

3096

} else {

3097

return replace_delete_substring(self, from_s, from_len, maxcount);

3098

}

3099

}

3100

3101

/* Handle special case where both strings have the same length */

3102

3103

if (from_len == to_len) {

3104

if (from_len == 1) {

3105

return replace_single_character_in_place(

3106

self,

3107

from_s[0],

3108

to_s[0],

3109

maxcount);

3110

} else {

3111

return replace_substring_in_place(

3112

self, from_s, from_len, to_s, to_len, maxcount);

3113

}

3114

}

3115

3116

/* Otherwise use the more generic algorithms */

3117

if (from_len == 1) {

3118

return replace_single_character(self, from_s[0],

3119

to_s, to_len, maxcount);

3120

} else {

3121

/* len('from')>=2, len('to')>=1 */

3122

return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);

3123

}

3124

}

3125

3126

PyDoc_STRVAR(replace__doc__,

3127

"S.replace (old, new[, count]) -> string\n\

3128

\n\

3129

Return a copy of string S with all occurrences of substring\n\

3130

old replaced by new. If the optional argument count is\n\

3131

given, only the first count occurrences are replaced.");

3132

3133

static PyObject *

3134

string_replace(PyStringObject *self, PyObject *args)

3135

{

3136

Py_ssize_t count = -1;

3137

PyObject *from, *to;

3138

const char *from_s, *to_s;

3139

Py_ssize_t from_len, to_len;

3140

3141

if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))

3142

return NULL;

3143

3144

if (PyString_Check(from)) {

3145

from_s = PyString_AS_STRING(from);

3146

from_len = PyString_GET_SIZE(from);

3147

}

3148

#ifdef Py_USING_UNICODE

3149

if (PyUnicode_Check(from))

3150

return PyUnicode_Replace((PyObject *)self,

3151

from, to, count);

3152

#endif

3153

else if (PyObject_AsCharBuffer(from, &from_s, &from_len))

3154

return NULL;

3155

3156

if (PyString_Check(to)) {

3157

to_s = PyString_AS_STRING(to);

3158

to_len = PyString_GET_SIZE(to);

3159

}

3160

#ifdef Py_USING_UNICODE

3161

else if (PyUnicode_Check(to))

3162

return PyUnicode_Replace((PyObject *)self,

3163

from, to, count);

3164

#endif

3165

else if (PyObject_AsCharBuffer(to, &to_s, &to_len))

3166

return NULL;

3167

3168

return (PyObject *)replace((PyStringObject *) self,

3169

from_s, from_len,

3170

to_s, to_len, count);

3171

}

3172

3173

/** End DALKE **/

3174

3175

/* Matches the end (direction >= 0) or start (direction < 0) of self

3176

* against substr, using the start and end arguments. Returns

3177

* -1 on error, 0 if not found and 1 if found.

3178

*/

3179

Py_LOCAL(int)

3180

_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,

3181

Py_ssize_t end, int direction)

3182

{

3183

Py_ssize_t len = PyString_GET_SIZE(self);

3184

Py_ssize_t slen;

3185

const char* sub;

3186

const char* str;

3187

3188

if (PyString_Check(substr)) {

3189

sub = PyString_AS_STRING(substr);

3190

slen = PyString_GET_SIZE(substr);

3191

}

3192

#ifdef Py_USING_UNICODE

3193

else if (PyUnicode_Check(substr))

3194

return PyUnicode_Tailmatch((PyObject *)self,

3195

substr, start, end, direction);

3196

#endif

3197

else if (PyObject_AsCharBuffer(substr, &sub, &slen))

3198

return -1;

3199

str = PyString_AS_STRING(self);

3200

3201

string_adjust_indices(&start, &end, len);

3202

3203

if (direction < 0) {

3204

/* startswith */

3205

if (start+slen > len)

3206

return 0;

3207

} else {

3208

/* endswith */

3209

if (end-start < slen || start > len)

3210

return 0;

3211

3212

if (end-slen > start)

3213

start = end - slen;

3214

}

3215

if (end-start >= slen)

3216

return ! memcmp(str+start, sub, slen);

3217

return 0;

3218

}

3219

3220

3221

PyDoc_STRVAR(startswith__doc__,

3222

"S.startswith(prefix[, start[, end]]) -> bool\n\

3223

\n\

3224

Return True if S starts with the specified prefix, False otherwise.\n\

3225

With optional start, test S beginning at that position.\n\

3226

With optional end, stop comparing S at that position.\n\

3227

prefix can also be a tuple of strings to try.");

3228

3229

static PyObject *

3230

string_startswith(PyStringObject *self, PyObject *args)

3231

{

3232

Py_ssize_t start = 0;

3233

Py_ssize_t end = PY_SSIZE_T_MAX;

3234

PyObject *subobj;

3235

int result;

3236

3237

if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,

3238

_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))

3239

return NULL;

3240

if (PyTuple_Check(subobj)) {

3241

Py_ssize_t i;

3242

for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {

3243

result = _string_tailmatch(self,

3244

PyTuple_GET_ITEM(subobj, i),

3245

start, end, -1);

3246

if (result == -1)

3247

return NULL;

3248

else if (result) {

3249

Py_RETURN_TRUE;

3250

}

3251

}

3252

Py_RETURN_FALSE;

3253

}

3254

result = _string_tailmatch(self, subobj, start, end, -1);

3255

if (result == -1)

3256

return NULL;

3257

else

3258

return PyBool_FromLong(result);

3259

}

3260

3261

3262

PyDoc_STRVAR(endswith__doc__,

3263

"S.endswith(suffix[, start[, end]]) -> bool\n\

3264

\n\

3265

Return True if S ends with the specified suffix, False otherwise.\n\

3266

With optional start, test S beginning at that position.\n\

3267

With optional end, stop comparing S at that position.\n\

3268

suffix can also be a tuple of strings to try.");

3269

3270

static PyObject *

3271

string_endswith(PyStringObject *self, PyObject *args)

3272

{

3273

Py_ssize_t start = 0;

3274

Py_ssize_t end = PY_SSIZE_T_MAX;

3275

PyObject *subobj;

3276

int result;

3277

3278

if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,

3279

_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))

3280

return NULL;

3281

if (PyTuple_Check(subobj)) {

3282

Py_ssize_t i;

3283

for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {

3284

result = _string_tailmatch(self,

3285

PyTuple_GET_ITEM(subobj, i),

3286

start, end, +1);

3287

if (result == -1)

3288

return NULL;

3289

else if (result) {

3290

Py_RETURN_TRUE;

3291

}

3292

}

3293

Py_RETURN_FALSE;

3294

}

3295

result = _string_tailmatch(self, subobj, start, end, +1);

3296

if (result == -1)

3297

return NULL;

3298

else

3299

return PyBool_FromLong(result);

3300

}

3301

3302

3303

PyDoc_STRVAR(encode__doc__,

3304

"S.encode([encoding[,errors]]) -> object\n\

3305

\n\

3306

Encodes S using the codec registered for encoding. encoding defaults\n\

3307

to the default encoding. errors may be given to set a different error\n\

3308

handling scheme. Default is 'strict' meaning that encoding errors raise\n\

3309

a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\

3310

'xmlcharrefreplace' as well as any other name registered with\n\

3311

codecs.register_error that is able to handle UnicodeEncodeErrors.");

3312

3313

static PyObject *

3314

string_encode(PyStringObject *self, PyObject *args)

3315

{

3316

char *encoding = NULL;

3317

char *errors = NULL;

3318

PyObject *v;

3319

3320

if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))

3321

return NULL;

3322

v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);

3323

if (v == NULL)

3324

goto onError;

3325

if (!PyString_Check(v) && !PyUnicode_Check(v)) {

3326

PyErr_Format(PyExc_TypeError,

3327

"encoder did not return a string/unicode object "

3328

"(type=%.400s)",

3329

Py_TYPE(v)->tp_name);

3330

Py_DECREF(v);

3331

return NULL;

3332

}

3333

return v;

3334

3335

onError:

3336

return NULL;

3337

}

3338

3339

3340

PyDoc_STRVAR(decode__doc__,

3341

"S.decode([encoding[,errors]]) -> object\n\

3342

\n\

3343

Decodes S using the codec registered for encoding. encoding defaults\n\

3344

to the default encoding. errors may be given to set a different error\n\

3345

handling scheme. Default is 'strict' meaning that encoding errors raise\n\

3346

a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\

3347

as well as any other name registerd with codecs.register_error that is\n\

3348

able to handle UnicodeDecodeErrors.");

3349

3350

static PyObject *

3351

string_decode(PyStringObject *self, PyObject *args)

3352

{

3353

char *encoding = NULL;

3354

char *errors = NULL;

3355

PyObject *v;

3356

3357

if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))

3358

return NULL;

3359

v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);

3360

if (v == NULL)

3361

goto onError;

3362

if (!PyString_Check(v) && !PyUnicode_Check(v)) {

3363

PyErr_Format(PyExc_TypeError,

3364

"decoder did not return a string/unicode object "

3365

"(type=%.400s)",

3366

Py_TYPE(v)->tp_name);

3367

Py_DECREF(v);

3368

return NULL;

3369

}

3370

return v;

3371

3372

onError:

3373

return NULL;

3374

}

3375

3376

3377

PyDoc_STRVAR(expandtabs__doc__,

3378

"S.expandtabs([tabsize]) -> string\n\

3379

\n\

3380

Return a copy of S where all tab characters are expanded using spaces.\n\

3381

If tabsize is not given, a tab size of 8 characters is assumed.");

3382

3383

static PyObject*

3384

string_expandtabs(PyStringObject *self, PyObject *args)

3385

{

3386

const char *e, *p, *qe;

3387

char *q;

3388

Py_ssize_t i, j, incr;

3389

PyObject *u;

3390

int tabsize = 8;

3391

3392

if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))

3393

return NULL;

3394

3395

/* First pass: determine size of output string */

3396

i = 0; /* chars up to and including most recent \n or \r */

3397

j = 0; /* chars since most recent \n or \r (use in tab calculations) */

3398

e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */

3399

for (p = PyString_AS_STRING(self); p < e; p++)

3400

if (*p == '\t') {

3401

if (tabsize > 0) {

3402

incr = tabsize - (j % tabsize);

3403

if (j > PY_SSIZE_T_MAX - incr)

3404

goto overflow1;

3405

j += incr;

3406

}

3407

}

3408

else {

3409

if (j > PY_SSIZE_T_MAX - 1)

3410

goto overflow1;

3411

j++;

3412

if (*p == '\n' || *p == '\r') {

3413

if (i > PY_SSIZE_T_MAX - j)

3414

goto overflow1;

3415

i += j;

3416

j = 0;

3417

}

3418

}

3419

3420

if (i > PY_SSIZE_T_MAX - j)

3421

goto overflow1;

3422

3423

/* Second pass: create output string and fill it */

3424

u = PyString_FromStringAndSize(NULL, i + j);

3425

if (!u)

3426

return NULL;

3427

3428

j = 0; /* same as in first pass */

3429

q = PyString_AS_STRING(u); /* next output char */

3430

qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */

3431

3432

for (p = PyString_AS_STRING(self); p < e; p++)

3433

if (*p == '\t') {

3434

if (tabsize > 0) {

3435

i = tabsize - (j % tabsize);

3436

j += i;

3437

while (i--) {

3438

if (q >= qe)

3439

goto overflow2;

3440

*q++ = ' ';

3441

}

3442

}

3443

}

3444

else {

3445

if (q >= qe)

3446

goto overflow2;

3447

*q++ = *p;

3448

j++;

3449

if (*p == '\n' || *p == '\r')

3450

j = 0;

3451

}

3452

3453

return u;

3454

3455

overflow2:

3456

Py_DECREF(u);

3457

overflow1:

3458

PyErr_SetString(PyExc_OverflowError, "new string is too long");

3459

return NULL;

3460

}

3461

3462

Py_LOCAL_INLINE(PyObject *)

3463

pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)

3464

{

3465

PyObject *u;

3466

3467

if (left < 0)

3468

left = 0;

3469

if (right < 0)

3470

right = 0;

3471

3472

if (left == 0 && right == 0 && PyString_CheckExact(self)) {

3473

Py_INCREF(self);

3474

return (PyObject *)self;

3475

}

3476

3477

u = PyString_FromStringAndSize(NULL,

3478

left + PyString_GET_SIZE(self) + right);

3479

if (u) {

3480

if (left)

3481

memset(PyString_AS_STRING(u), fill, left);

3482

Py_MEMCPY(PyString_AS_STRING(u) + left,

3483

PyString_AS_STRING(self),

3484

PyString_GET_SIZE(self));

3485

if (right)

3486

memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),

3487

fill, right);

3488

}

3489

3490

return u;

3491

}

3492

3493

PyDoc_STRVAR(ljust__doc__,

3494

"S.ljust(width[, fillchar]) -> string\n"

3495

"\n"

3496

"Return S left justified in a string of length width. Padding is\n"

3497

"done using the specified fill character (default is a space).");

3498

3499

static PyObject *

3500

string_ljust(PyStringObject *self, PyObject *args)

3501

{

3502

Py_ssize_t width;

3503

char fillchar = ' ';

3504

3505

if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))

3506

return NULL;

3507

3508

if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {

3509

Py_INCREF(self);

3510

return (PyObject*) self;

3511

}

3512

3513

return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);

3514

}

3515

3516

3517

PyDoc_STRVAR(rjust__doc__,

3518

"S.rjust(width[, fillchar]) -> string\n"

3519

"\n"

3520

"Return S right justified in a string of length width. Padding is\n"

3521

"done using the specified fill character (default is a space)");

3522

3523

static PyObject *

3524

string_rjust(PyStringObject *self, PyObject *args)

3525

{

3526

Py_ssize_t width;

3527

char fillchar = ' ';

3528

3529

if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))

3530

return NULL;

3531

3532

if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {

3533

Py_INCREF(self);

3534

return (PyObject*) self;

3535

}

3536

3537

return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);

3538

}

3539

3540

3541

PyDoc_STRVAR(center__doc__,

3542

"S.center(width[, fillchar]) -> string\n"

3543

"\n"

3544

"Return S centered in a string of length width. Padding is\n"

3545

"done using the specified fill character (default is a space)");

3546

3547

static PyObject *

3548

string_center(PyStringObject *self, PyObject *args)

3549

{

3550

Py_ssize_t marg, left;

3551

Py_ssize_t width;

3552

char fillchar = ' ';

3553

3554

if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))

3555

return NULL;

3556

3557

if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {

3558

Py_INCREF(self);

3559

return (PyObject*) self;

3560

}

3561

3562

marg = width - PyString_GET_SIZE(self);

3563

left = marg / 2 + (marg & width & 1);

3564

3565

return pad(self, left, marg - left, fillchar);

3566

}

3567

3568

PyDoc_STRVAR(zfill__doc__,

3569

"S.zfill(width) -> string\n"

3570

"\n"

3571

"Pad a numeric string S with zeros on the left, to fill a field\n"

3572

"of the specified width. The string S is never truncated.");

3573

3574

static PyObject *

3575

string_zfill(PyStringObject *self, PyObject *args)

3576

{

3577

Py_ssize_t fill;

3578

PyObject *s;

3579

char *p;

3580

Py_ssize_t width;

3581

3582

if (!PyArg_ParseTuple(args, "n:zfill", &width))

3583

return NULL;

3584

3585

if (PyString_GET_SIZE(self) >= width) {

3586

if (PyString_CheckExact(self)) {

3587

Py_INCREF(self);

3588

return (PyObject*) self;

3589

}

3590

else

3591

return PyString_FromStringAndSize(

3592

PyString_AS_STRING(self),

3593

PyString_GET_SIZE(self)

3594

);

3595

}

3596

3597

fill = width - PyString_GET_SIZE(self);

3598

3599

s = pad(self, fill, 0, '0');

3600

3601

if (s == NULL)

3602

return NULL;

3603

3604

p = PyString_AS_STRING(s);

3605

if (p[fill] == '+' || p[fill] == '-') {

3606

/* move sign to beginning of string */

3607

p[0] = p[fill];

3608

p[fill] = '0';

3609

}

3610

3611

return (PyObject*) s;

3612

}

3613

3614

PyDoc_STRVAR(isspace__doc__,

3615

"S.isspace() -> bool\n\

3616

\n\

3617

Return True if all characters in S are whitespace\n\

3618

and there is at least one character in S, False otherwise.");

3619

3620

static PyObject*

3621

string_isspace(PyStringObject *self)

3622

{

3623

register const unsigned char *p

3624

= (unsigned char *) PyString_AS_STRING(self);

3625

register const unsigned char *e;

3626

3627

/* Shortcut for single character strings */

3628

if (PyString_GET_SIZE(self) == 1 &&

3629

isspace(*p))

3630

return PyBool_FromLong(1);

3631

3632

/* Special case for empty strings */

3633

if (PyString_GET_SIZE(self) == 0)

3634

return PyBool_FromLong(0);

3635

3636

e = p + PyString_GET_SIZE(self);

3637

for (; p < e; p++) {

3638

if (!isspace(*p))

3639

return PyBool_FromLong(0);

3640

}

3641

return PyBool_FromLong(1);

3642

}

3643

3644

3645

PyDoc_STRVAR(isalpha__doc__,

3646

"S.isalpha() -> bool\n\

3647

\n\

3648

Return True if all characters in S are alphabetic\n\

3649

and there is at least one character in S, False otherwise.");

3650

3651

static PyObject*

3652

string_isalpha(PyStringObject *self)

3653

{

3654

register const unsigned char *p

3655

= (unsigned char *) PyString_AS_STRING(self);

3656

register const unsigned char *e;

3657

3658

/* Shortcut for single character strings */

3659

if (PyString_GET_SIZE(self) == 1 &&

3660

isalpha(*p))

3661

return PyBool_FromLong(1);

3662

3663

/* Special case for empty strings */

3664

if (PyString_GET_SIZE(self) == 0)

3665

return PyBool_FromLong(0);

3666

3667

e = p + PyString_GET_SIZE(self);

3668

for (; p < e; p++) {

3669

if (!isalpha(*p))

3670

return PyBool_FromLong(0);

3671

}

3672

return PyBool_FromLong(1);

3673

}

3674

3675

3676

PyDoc_STRVAR(isalnum__doc__,

3677

"S.isalnum() -> bool\n\

3678

\n\

3679

Return True if all characters in S are alphanumeric\n\

3680

and there is at least one character in S, False otherwise.");

3681

3682

static PyObject*

3683

string_isalnum(PyStringObject *self)

3684

{

3685

register const unsigned char *p

3686

= (unsigned char *) PyString_AS_STRING(self);

3687

register const unsigned char *e;

3688

3689

/* Shortcut for single character strings */

3690

if (PyString_GET_SIZE(self) == 1 &&

3691

isalnum(*p))

3692

return PyBool_FromLong(1);

3693

3694

/* Special case for empty strings */

3695

if (PyString_GET_SIZE(self) == 0)

3696

return PyBool_FromLong(0);

3697

3698

e = p + PyString_GET_SIZE(self);

3699

for (; p < e; p++) {

3700

if (!isalnum(*p))

3701

return PyBool_FromLong(0);

3702

}

3703

return PyBool_FromLong(1);

3704

}

3705

3706

3707

PyDoc_STRVAR(isdigit__doc__,

3708

"S.isdigit() -> bool\n\

3709

\n\

3710

Return True if all characters in S are digits\n\

3711

and there is at least one character in S, False otherwise.");

3712

3713

static PyObject*

3714

string_isdigit(PyStringObject *self)

3715

{

3716

register const unsigned char *p

3717

= (unsigned char *) PyString_AS_STRING(self);

3718

register const unsigned char *e;

3719

3720

/* Shortcut for single character strings */

3721

if (PyString_GET_SIZE(self) == 1 &&

3722

isdigit(*p))

3723

return PyBool_FromLong(1);

3724

3725

/* Special case for empty strings */

3726

if (PyString_GET_SIZE(self) == 0)

3727

return PyBool_FromLong(0);

3728

3729

e = p + PyString_GET_SIZE(self);

3730

for (; p < e; p++) {

3731

if (!isdigit(*p))

3732

return PyBool_FromLong(0);

3733

}

3734

return PyBool_FromLong(1);

3735

}

3736

3737

3738

PyDoc_STRVAR(islower__doc__,

3739

"S.islower() -> bool\n\

3740

\n\

3741

Return True if all cased characters in S are lowercase and there is\n\

3742

at least one cased character in S, False otherwise.");

3743

3744

static PyObject*

3745

string_islower(PyStringObject *self)

3746

{

3747

register const unsigned char *p

3748

= (unsigned char *) PyString_AS_STRING(self);

3749

register const unsigned char *e;

3750

int cased;

3751

3752

/* Shortcut for single character strings */

3753

if (PyString_GET_SIZE(self) == 1)

3754

return PyBool_FromLong(islower(*p) != 0);

3755

3756

/* Special case for empty strings */

3757

if (PyString_GET_SIZE(self) == 0)

3758

return PyBool_FromLong(0);

3759

3760

e = p + PyString_GET_SIZE(self);

3761

cased = 0;

3762

for (; p < e; p++) {

3763

if (isupper(*p))

3764

return PyBool_FromLong(0);

3765

else if (!cased && islower(*p))

3766

cased = 1;

3767

}

3768

return PyBool_FromLong(cased);

3769

}

3770

3771

3772

PyDoc_STRVAR(isupper__doc__,

3773

"S.isupper() -> bool\n\

3774

\n\

3775

Return True if all cased characters in S are uppercase and there is\n\

3776

at least one cased character in S, False otherwise.");

3777

3778

static PyObject*

3779

string_isupper(PyStringObject *self)

3780

{

3781

register const unsigned char *p

3782

= (unsigned char *) PyString_AS_STRING(self);

3783

register const unsigned char *e;

3784

int cased;

3785

3786

/* Shortcut for single character strings */

3787

if (PyString_GET_SIZE(self) == 1)

3788

return PyBool_FromLong(isupper(*p) != 0);

3789

3790

/* Special case for empty strings */

3791

if (PyString_GET_SIZE(self) == 0)

3792

return PyBool_FromLong(0);

3793

3794

e = p + PyString_GET_SIZE(self);

3795

cased = 0;

3796

for (; p < e; p++) {

3797

if (islower(*p))

3798

return PyBool_FromLong(0);

3799

else if (!cased && isupper(*p))

3800

cased = 1;

3801

}

3802

return PyBool_FromLong(cased);

3803

}

3804

3805

3806

PyDoc_STRVAR(istitle__doc__,

3807

"S.istitle() -> bool\n\

3808

\n\

3809

Return True if S is a titlecased string and there is at least one\n\

3810

character in S, i.e. uppercase characters may only follow uncased\n\

3811

characters and lowercase characters only cased ones. Return False\n\

3812

otherwise.");

3813

3814

static PyObject*

3815

string_istitle(PyStringObject *self, PyObject *uncased)

3816

{

3817

register const unsigned char *p

3818

= (unsigned char *) PyString_AS_STRING(self);

3819

register const unsigned char *e;

3820

int cased, previous_is_cased;

3821

3822

/* Shortcut for single character strings */

3823

if (PyString_GET_SIZE(self) == 1)

3824

return PyBool_FromLong(isupper(*p) != 0);

3825

3826

/* Special case for empty strings */

3827

if (PyString_GET_SIZE(self) == 0)

3828

return PyBool_FromLong(0);

3829

3830

e = p + PyString_GET_SIZE(self);

3831

cased = 0;

3832

previous_is_cased = 0;

3833

for (; p < e; p++) {

3834

register const unsigned char ch = *p;

3835

3836

if (isupper(ch)) {

3837

if (previous_is_cased)

3838

return PyBool_FromLong(0);

3839

previous_is_cased = 1;

3840

cased = 1;

3841

}

3842

else if (islower(ch)) {

3843

if (!previous_is_cased)

3844

return PyBool_FromLong(0);

3845

previous_is_cased = 1;

3846

cased = 1;

3847

}

3848

else

3849

previous_is_cased = 0;

3850

}

3851

return PyBool_FromLong(cased);

3852

}

3853

3854

3855

PyDoc_STRVAR(splitlines__doc__,

3856

"S.splitlines([keepends]) -> list of strings\n\

3857

\n\

3858

Return a list of the lines in S, breaking at line boundaries.\n\

3859

Line breaks are not included in the resulting list unless keepends\n\

3860

is given and true.");

3861

3862

static PyObject*

3863

string_splitlines(PyStringObject *self, PyObject *args)

3864

{

3865

register Py_ssize_t i;

3866

register Py_ssize_t j;

3867

Py_ssize_t len;

3868

int keepends = 0;

3869

PyObject *list;

3870

PyObject *str;

3871

char *data;

3872

3873

if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))

3874

return NULL;

3875

3876

data = PyString_AS_STRING(self);

3877

len = PyString_GET_SIZE(self);

3878

3879

/* This does not use the preallocated list because splitlines is

3880

usually run with hundreds of newlines. The overhead of

3881

switching between PyList_SET_ITEM and append causes about a

3882

2-3% slowdown for that common case. A smarter implementation

3883

could move the if check out, so the SET_ITEMs are done first

3884

and the appends only done when the prealloc buffer is full.

3885

That's too much work for little gain.*/

3886

3887

list = PyList_New(0);

3888

if (!list)

3889

goto onError;

3890

3891

for (i = j = 0; i < len; ) {

3892

Py_ssize_t eol;

3893

3894

/* Find a line and append it */

3895

while (i < len && data[i] != '\n' && data[i] != '\r')

3896

i++;

3897

3898

/* Skip the line break reading CRLF as one line break */

3899

eol = i;

3900

if (i < len) {

3901

if (data[i] == '\r' && i + 1 < len &&

3902

data[i+1] == '\n')

3903

i += 2;

3904

else

3905

i++;

3906

if (keepends)

3907

eol = i;

3908

}

3909

SPLIT_APPEND(data, j, eol);

3910

j = i;

3911

}

3912

if (j < len) {

3913

SPLIT_APPEND(data, j, len);

3914

}

3915

3916

return list;

3917

3918

onError:

3919

Py_XDECREF(list);

3920

return NULL;

3921

}

3922

3923

#undef SPLIT_APPEND

3924

#undef SPLIT_ADD

3925

#undef MAX_PREALLOC

3926

#undef PREALLOC_SIZE

3927

3928

static PyObject *

3929

string_getnewargs(PyStringObject *v)

3930

{

3931

return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));

3932

}

3933

3934

3935

#include "stringlib/string_format.h"

3936

3937

PyDoc_STRVAR(format__doc__,

3938

"S.format(*args, **kwargs) -> unicode\n\

3939

\n\

3940

");

3941

3942

PyDoc_STRVAR(p_format__doc__,

3943

"S.__format__(format_spec) -> unicode\n\

3944

\n\

3945

");

3946

3947

3948

static PyMethodDef

3949

string_methods[] = {

3950

/* Counterparts of the obsolete stropmodule functions; except

3951

string.maketrans(). */

3952

{"join", (PyCFunction)string_join, METH_O, join__doc__},

3953

{"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},

3954

{"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},

3955

{"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},

3956

{"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},

3957

{"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},

3958

{"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},

3959

{"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},

3960

{"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},

3961

{"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},

3962

{"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},

3963

{"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},

3964

{"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,

3965

capitalize__doc__},

3966

{"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},

3967

{"endswith", (PyCFunction)string_endswith, METH_VARARGS,

3968

endswith__doc__},

3969

{"partition", (PyCFunction)string_partition, METH_O, partition__doc__},

3970

{"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},

3971

{"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},

3972

{"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},

3973

{"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},

3974

{"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},

3975

{"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},

3976

{"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},

3977

{"rpartition", (PyCFunction)string_rpartition, METH_O,

3978

rpartition__doc__},

3979

{"startswith", (PyCFunction)string_startswith, METH_VARARGS,

3980

startswith__doc__},

3981

{"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},

3982

{"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,

3983

swapcase__doc__},

3984

{"translate", (PyCFunction)string_translate, METH_VARARGS,

3985

translate__doc__},

3986

{"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},

3987

{"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},

3988

{"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},

3989

{"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},

3990

{"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},

3991

{"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},

3992

{"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},

3993

{"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},

3994

{"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},

3995

{"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},

3996

{"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},

3997

{"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,

3998

expandtabs__doc__},

3999

{"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,

4000

splitlines__doc__},

4001

{"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},

4002

{NULL, NULL} /* sentinel */

4003

};

4004

4005

static PyObject *

4006

str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);

4007

4008

static PyObject *

4009

string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)

4010

{

4011

PyObject *x = NULL;

4012

static char *kwlist[] = {"object", 0};

4013

4014

if (type != &PyString_Type)

4015

return str_subtype_new(type, args, kwds);

4016

if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))

4017

return NULL;

4018

if (x == NULL)

4019

return PyString_FromString("");

4020

return PyObject_Str(x);

4021

}

4022

4023

static PyObject *

4024

str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)

4025

{

4026

PyObject *tmp, *pnew;

4027

Py_ssize_t n;

4028

4029

assert(PyType_IsSubtype(type, &PyString_Type));

4030

tmp = string_new(&PyString_Type, args, kwds);

4031

if (tmp == NULL)

4032

return NULL;

4033

assert(PyString_CheckExact(tmp));

4034

n = PyString_GET_SIZE(tmp);

4035

pnew = type->tp_alloc(type, n);

4036

if (pnew != NULL) {

4037

Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);

4038

((PyStringObject *)pnew)->ob_shash =

4039

((PyStringObject *)tmp)->ob_shash;

4040

((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;

4041

}

4042

Py_DECREF(tmp);

4043

return pnew;

4044

}

4045

4046

static PyObject *

4047

basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)

4048

{

4049

PyErr_SetString(PyExc_TypeError,

4050

"The basestring type cannot be instantiated");

4051

return NULL;

4052

}

4053

4054

static PyObject *

4055

string_mod(PyObject *v, PyObject *w)

4056

{

4057

if (!PyString_Check(v)) {

4058

Py_INCREF(Py_NotImplemented);

4059

return Py_NotImplemented;

4060

}

4061

return PyString_Format(v, w);

4062

}

4063

4064

PyDoc_STRVAR(basestring_doc,

4065

"Type basestring cannot be instantiated; it is the base for str and unicode.");

4066

4067

static PyNumberMethods string_as_number = {

4068

0, /*nb_add*/

4069

0, /*nb_subtract*/

4070

0, /*nb_multiply*/

4071

0, /*nb_divide*/

4072

string_mod, /*nb_remainder*/

4073

};

4074

4075

4076

PyTypeObject PyBaseString_Type = {

4077

PyVarObject_HEAD_INIT(&PyType_Type, 0)

4078

"basestring",

4079

0,

4080

0,

4081

0, /* tp_dealloc */

4082

0, /* tp_print */

4083

0, /* tp_getattr */

4084

0, /* tp_setattr */

4085

0, /* tp_compare */

4086

0, /* tp_repr */

4087

0, /* tp_as_number */

4088

0, /* tp_as_sequence */

4089

0, /* tp_as_mapping */

4090

0, /* tp_hash */

4091

0, /* tp_call */

4092

0, /* tp_str */

4093

0, /* tp_getattro */

4094

0, /* tp_setattro */

4095

0, /* tp_as_buffer */

4096

Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */

4097

basestring_doc, /* tp_doc */

4098

0, /* tp_traverse */

4099

0, /* tp_clear */

4100

0, /* tp_richcompare */

4101

0, /* tp_weaklistoffset */

4102

0, /* tp_iter */

4103

0, /* tp_iternext */

4104

0, /* tp_methods */

4105

0, /* tp_members */

4106

0, /* tp_getset */

4107

&PyBaseObject_Type, /* tp_base */

4108

0, /* tp_dict */

4109

0, /* tp_descr_get */

4110

0, /* tp_descr_set */

4111

0, /* tp_dictoffset */

4112

0, /* tp_init */

4113

0, /* tp_alloc */

4114

basestring_new, /* tp_new */

4115

0, /* tp_free */

4116

};

4117

4118

PyDoc_STRVAR(string_doc,

4119

"str(object) -> string\n\

4120

\n\

4121

Return a nice string representation of the object.\n\

4122

If the argument is a string, the return value is the same object.");

4123

4124

PyTypeObject PyString_Type = {

4125

PyVarObject_HEAD_INIT(&PyType_Type, 0)

4126

"str",

4127

sizeof(PyStringObject),

4128

sizeof(char),

4129

string_dealloc, /* tp_dealloc */

4130

(printfunc)string_print, /* tp_print */

4131

0, /* tp_getattr */

4132

0, /* tp_setattr */

4133

0, /* tp_compare */

4134

string_repr, /* tp_repr */

4135

&string_as_number, /* tp_as_number */

4136

&string_as_sequence, /* tp_as_sequence */

4137

&string_as_mapping, /* tp_as_mapping */

4138

(hashfunc)string_hash, /* tp_hash */

4139

0, /* tp_call */

4140

string_str, /* tp_str */

4141

PyObject_GenericGetAttr, /* tp_getattro */

4142

0, /* tp_setattro */

4143

&string_as_buffer, /* tp_as_buffer */

4144

Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |

4145

Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |

4146

Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */

4147

string_doc, /* tp_doc */

4148

0, /* tp_traverse */

4149

0, /* tp_clear */

4150

(richcmpfunc)string_richcompare, /* tp_richcompare */

4151

0, /* tp_weaklistoffset */

4152

0, /* tp_iter */

4153

0, /* tp_iternext */

4154

string_methods, /* tp_methods */

4155

0, /* tp_members */

4156

0, /* tp_getset */

4157

&PyBaseString_Type, /* tp_base */

4158

0, /* tp_dict */

4159

0, /* tp_descr_get */

4160

0, /* tp_descr_set */

4161

0, /* tp_dictoffset */

4162

0, /* tp_init */

4163

0, /* tp_alloc */

4164

string_new, /* tp_new */

4165

PyObject_Del, /* tp_free */

4166

};

4167

4168

void

4169

PyString_Concat(register PyObject **pv, register PyObject *w)

4170

{

4171

register PyObject *v;

4172

if (*pv == NULL)

4173

return;

4174

if (w == NULL || !PyString_Check(*pv)) {

4175

Py_DECREF(*pv);

4176

*pv = NULL;

4177

return;

4178

}

4179

v = string_concat((PyStringObject *) *pv, w);

4180

Py_DECREF(*pv);

4181

*pv = v;

4182

}

4183

4184

void

4185

PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)

4186

{

4187

PyString_Concat(pv, w);

4188

Py_XDECREF(w);

4189

}

4190

4191

4192

/* The following function breaks the notion that strings are immutable:

4193

it changes the size of a string. We get away with this only if there

4194

is only one module referencing the object. You can also think of it

4195

as creating a new string object and destroying the old one, only

4196

more efficiently. In any case, don't use this if the string may

4197

already be known to some other part of the code...

4198

Note that if there's not enough memory to resize the string, the original

4199

string object at *pv is deallocated, *pv is set to NULL, an "out of

4200

memory" exception is set, and -1 is returned. Else (on success) 0 is

4201

returned, and the value in *pv may or may not be the same as on input.

4202

As always, an extra byte is allocated for a trailing \0 byte (newsize

4203

does *not* include that), and a trailing \0 byte is stored.

4204

*/

4205

4206

int

4207

_PyString_Resize(PyObject **pv, Py_ssize_t newsize)

4208

{

4209

register PyObject *v;

4210

register PyStringObject *sv;

4211

v = *pv;

4212

if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||

4213

PyString_CHECK_INTERNED(v)) {

4214

*pv = 0;

4215

Py_DECREF(v);

4216

PyErr_BadInternalCall();

4217

return -1;

4218

}

4219

/* XXX UNREF/NEWREF interface should be more symmetrical */

4220

_Py_DEC_REFTOTAL;

4221

_Py_ForgetReference(v);

4222

*pv = (PyObject *)

4223

PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);

4224

if (*pv == NULL) {

4225

PyObject_Del(v);

4226

PyErr_NoMemory();

4227

return -1;

4228

}

4229

_Py_NewReference(*pv);

4230

sv = (PyStringObject *) *pv;

4231

Py_SIZE(sv) = newsize;

4232

sv->ob_sval[newsize] = '\0';

4233

sv->ob_shash = -1; /* invalidate cached hash value */

4234

return 0;

4235

}

4236

4237

/* Helpers for formatstring */

4238

4239

Py_LOCAL_INLINE(PyObject *)

4240

getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)

4241

{

4242

Py_ssize_t argidx = *p_argidx;

4243

if (argidx < arglen) {

4244

(*p_argidx)++;

4245

if (arglen < 0)

4246

return args;

4247

else

4248

return PyTuple_GetItem(args, argidx);

4249

}

4250

PyErr_SetString(PyExc_TypeError,

4251

"not enough arguments for format string");

4252

return NULL;

4253

}

4254

4255

/* Format codes

4256

* F_LJUST '-'

4257

* F_SIGN '+'

4258

* F_BLANK ' '

4259

* F_ALT '#'

4260

* F_ZERO '0'

4261

*/

4262

#define F_LJUST (1<<0)

4263

#define F_SIGN (1<<1)

4264

#define F_BLANK (1<<2)

4265

#define F_ALT (1<<3)

4266

#define F_ZERO (1<<4)

4267

4268

Py_LOCAL_INLINE(int)

4269

formatfloat(char *buf, size_t buflen, int flags,

4270

int prec, int type, PyObject *v)

4271

{

4272

/* fmt = '%#.' + `prec` + `type`

4273

worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/

4274

char fmt[20];

4275

double x;

4276

x = PyFloat_AsDouble(v);

4277

if (x == -1.0 && PyErr_Occurred()) {

4278

PyErr_Format(PyExc_TypeError, "float argument required, "

4279

"not %.200s", Py_TYPE(v)->tp_name);

4280

return -1;

4281

}

4282

if (prec < 0)

4283

prec = 6;

4284

if (type == 'f' && fabs(x)/1e25 >= 1e25)

4285

type = 'g';

4286

/* Worst case length calc to ensure no buffer overrun:

4287

4288

'g' formats:

4289

fmt = %#.<prec>g

4290

buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp

4291

for any double rep.)

4292

len = 1 + prec + 1 + 2 + 5 = 9 + prec

4293

4294

'f' formats:

4295

buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)

4296

len = 1 + 50 + 1 + prec = 52 + prec

4297

4298

If prec=0 the effective precision is 1 (the leading digit is

4299

always given), therefore increase the length by one.

4300

4301

*/

4302

if (((type == 'g' || type == 'G') &&

4303

buflen <= (size_t)10 + (size_t)prec) ||

4304

(type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {

4305

PyErr_SetString(PyExc_OverflowError,

4306

"formatted float is too long (precision too large?)");

4307

return -1;

4308

}

4309

PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",

4310

(flags&F_ALT) ? "#" : "",

4311

prec, type);

4312

PyOS_ascii_formatd(buf, buflen, fmt, x);

4313

return (int)strlen(buf);

4314

}

4315

4316

/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and

4317

* the F_ALT flag, for Python's long (unbounded) ints. It's not used for

4318

* Python's regular ints.

4319

* Return value: a new PyString*, or NULL if error.

4320

* . *pbuf is set to point into it,

4321

* *plen set to the # of chars following that.

4322

* Caller must decref it when done using pbuf.

4323

* The string starting at *pbuf is of the form

4324

* "-"? ("0x" | "0X")? digit+

4325

* "0x"/"0X" are present only for x and X conversions, with F_ALT

4326

* set in flags. The case of hex digits will be correct,

4327

* There will be at least prec digits, zero-filled on the left if

4328

* necessary to get that many.

4329

* val object to be converted

4330

* flags bitmask of format flags; only F_ALT is looked at

4331

* prec minimum number of digits; 0-fill on left if needed

4332

* type a character in [duoxX]; u acts the same as d

4333

*

4334

* CAUTION: o, x and X conversions on regular ints can never

4335

* produce a '-' sign, but can for Python's unbounded ints.

4336

*/

4337

PyObject*

4338

_PyString_FormatLong(PyObject *val, int flags, int prec, int type,

4339

char **pbuf, int *plen)

4340

{

4341

PyObject *result = NULL;

4342

char *buf;

4343

Py_ssize_t i;

4344

int sign; /* 1 if '-', else 0 */

4345

int len; /* number of characters */

4346

Py_ssize_t llen;

4347

int numdigits; /* len == numnondigits + numdigits */

4348

int numnondigits = 0;

4349

4350

switch (type) {

4351

case 'd':

4352

case 'u':

4353

result = Py_TYPE(val)->tp_str(val);

4354

break;

4355

case 'o':

4356

result = Py_TYPE(val)->tp_as_number->nb_oct(val);

4357

break;

4358

case 'x':

4359

case 'X':

4360

numnondigits = 2;

4361

result = Py_TYPE(val)->tp_as_number->nb_hex(val);

4362

break;

4363

default:

4364

assert(!"'type' not in [duoxX]");

4365

}

4366

if (!result)

4367

return NULL;

4368

4369

buf = PyString_AsString(result);

4370

if (!buf) {

4371

Py_DECREF(result);

4372

return NULL;

4373

}

4374

4375

/* To modify the string in-place, there can only be one reference. */

4376

if (Py_REFCNT(result) != 1) {

4377

PyErr_BadInternalCall();

4378

return NULL;

4379

}

4380

llen = PyString_Size(result);

4381

if (llen > INT_MAX) {

4382

PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");

4383

return NULL;

4384

}

4385

len = (int)llen;

4386

if (buf[len-1] == 'L') {

4387

--len;

4388

buf[len] = '\0';

4389

}

4390

sign = buf[0] == '-';

4391

numnondigits += sign;

4392

numdigits = len - numnondigits;

4393

assert(numdigits > 0);

4394

4395

/* Get rid of base marker unless F_ALT */

4396

if ((flags & F_ALT) == 0) {

4397

/* Need to skip 0x, 0X or 0. */

4398

int skipped = 0;

4399

switch (type) {

4400

case 'o':

4401

assert(buf[sign] == '0');

4402

/* If 0 is only digit, leave it alone. */

4403

if (numdigits > 1) {

4404

skipped = 1;

4405

--numdigits;

4406

}

4407

break;

4408

case 'x':

4409

case 'X':

4410

assert(buf[sign] == '0');

4411

assert(buf[sign + 1] == 'x');

4412

skipped = 2;

4413

numnondigits -= 2;

4414

break;

4415

}

4416

if (skipped) {

4417

buf += skipped;

4418

len -= skipped;

4419

if (sign)

4420

buf[0] = '-';

4421

}

4422

assert(len == numnondigits + numdigits);

4423

assert(numdigits > 0);

4424

}

4425

4426

/* Fill with leading zeroes to meet minimum width. */

4427

if (prec > numdigits) {

4428

PyObject *r1 = PyString_FromStringAndSize(NULL,

4429

numnondigits + prec);

4430

char *b1;

4431

if (!r1) {

4432

Py_DECREF(result);

4433

return NULL;

4434

}

4435

b1 = PyString_AS_STRING(r1);

4436

for (i = 0; i < numnondigits; ++i)

4437

*b1++ = *buf++;

4438

for (i = 0; i < prec - numdigits; i++)

4439

*b1++ = '0';

4440

for (i = 0; i < numdigits; i++)

4441

*b1++ = *buf++;

4442

*b1 = '\0';

4443

Py_DECREF(result);

4444

result = r1;

4445

buf = PyString_AS_STRING(result);

4446

len = numnondigits + prec;

4447

}

4448

4449

/* Fix up case for hex conversions. */

4450

if (type == 'X') {

4451

/* Need to convert all lower case letters to upper case.

4452

and need to convert 0x to 0X (and -0x to -0X). */

4453

for (i = 0; i < len; i++)

4454

if (buf[i] >= 'a' && buf[i] <= 'x')

4455

buf[i] -= 'a'-'A';

4456

}

4457

*pbuf = buf;

4458

*plen = len;

4459

return result;

4460

}

4461

4462

Py_LOCAL_INLINE(int)

4463

formatint(char *buf, size_t buflen, int flags,

4464

int prec, int type, PyObject *v)

4465

{

4466

/* fmt = '%#.' + `prec` + 'l' + `type`

4467

worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)

4468

+ 1 + 1 = 24 */

4469

char fmt[64]; /* plenty big enough! */

4470

char *sign;

4471

long x;

4472

4473

x = PyInt_AsLong(v);

4474

if (x == -1 && PyErr_Occurred()) {

4475

PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",

4476

Py_TYPE(v)->tp_name);

4477

return -1;

4478

}

4479

if (x < 0 && type == 'u') {

4480

type = 'd';

4481

}

4482

if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))

4483

sign = "-";

4484

else

4485

sign = "";

4486

if (prec < 0)

4487

prec = 1;

4488

4489

if ((flags & F_ALT) &&

4490

(type == 'x' || type == 'X')) {

4491

/* When converting under %#x or %#X, there are a number

4492

* of issues that cause pain:

4493

* - when 0 is being converted, the C standard leaves off

4494

* the '0x' or '0X', which is inconsistent with other

4495

* %#x/%#X conversions and inconsistent with Python's

4496

* hex() function

4497

* - there are platforms that violate the standard and

4498

* convert 0 with the '0x' or '0X'

4499

* (Metrowerks, Compaq Tru64)

4500

* - there are platforms that give '0x' when converting

4501

* under %#X, but convert 0 in accordance with the

4502

* standard (OS/2 EMX)

4503

*

4504

* We can achieve the desired consistency by inserting our

4505

* own '0x' or '0X' prefix, and substituting %x/%X in place

4506

* of %#x/%#X.

4507

*

4508

* Note that this is the same approach as used in

4509

* formatint() in unicodeobject.c

4510

*/

4511

PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",

4512

sign, type, prec, type);

4513

}

4514

else {

4515

PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",

4516

sign, (flags&F_ALT) ? "#" : "",

4517

prec, type);

4518

}

4519

4520

/* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))

4521

* worst case buf = '-0x' + [0-9]*prec, where prec >= 11

4522

*/

4523

if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {

4524

PyErr_SetString(PyExc_OverflowError,

4525

"formatted integer is too long (precision too large?)");

4526

return -1;

4527

}

4528

if (sign[0])

4529

PyOS_snprintf(buf, buflen, fmt, -x);

4530

else

4531

PyOS_snprintf(buf, buflen, fmt, x);

4532

return (int)strlen(buf);

4533

}

4534

4535

Py_LOCAL_INLINE(int)

4536

formatchar(char *buf, size_t buflen, PyObject *v)

4537

{

4538

/* presume that the buffer is at least 2 characters long */

4539

if (PyString_Check(v)) {

4540

if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))

4541

return -1;

4542

}

4543

else {

4544

if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))

4545

return -1;

4546

}

4547

buf[1] = '\0';

4548

return 1;

4549

}

4550

4551

/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)

4552

4553

FORMATBUFLEN is the length of the buffer in which the floats, ints, &

4554

chars are formatted. XXX This is a magic number. Each formatting

4555

routine does bounds checking to ensure no overflow, but a better

4556

solution may be to malloc a buffer of appropriate size for each

4557

format. For now, the current solution is sufficient.

4558

*/

4559

#define FORMATBUFLEN (size_t)120

4560

4561

PyObject *

4562

PyString_Format(PyObject *format, PyObject *args)

4563

{

4564

char *fmt, *res;

4565

Py_ssize_t arglen, argidx;

4566

Py_ssize_t reslen, rescnt, fmtcnt;

4567

int args_owned = 0;

4568

PyObject *result, *orig_args;

4569

#ifdef Py_USING_UNICODE

4570

PyObject *v, *w;

4571

#endif

4572

PyObject *dict = NULL;

4573

if (format == NULL || !PyString_Check(format) || args == NULL) {

4574

PyErr_BadInternalCall();

4575

return NULL;

4576

}

4577

orig_args = args;

4578

fmt = PyString_AS_STRING(format);

4579

fmtcnt = PyString_GET_SIZE(format);

4580

reslen = rescnt = fmtcnt + 100;

4581

result = PyString_FromStringAndSize((char *)NULL, reslen);

4582

if (result == NULL)

4583

return NULL;

4584

res = PyString_AsString(result);

4585

if (PyTuple_Check(args)) {

4586

arglen = PyTuple_GET_SIZE(args);

4587

argidx = 0;

4588

}

4589

else {

4590

arglen = -1;

4591

argidx = -2;

4592

}

4593

if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&

4594

!PyObject_TypeCheck(args, &PyBaseString_Type))

4595

dict = args;

4596

while (--fmtcnt >= 0) {

4597

if (*fmt != '%') {

4598

if (--rescnt < 0) {

4599

rescnt = fmtcnt + 100;

4600

reslen += rescnt;

4601

if (_PyString_Resize(&result, reslen) < 0)

4602

return NULL;

4603

res = PyString_AS_STRING(result)

4604

+ reslen - rescnt;

4605

--rescnt;

4606

}

4607

*res++ = *fmt++;

4608

}

4609

else {

4610

/* Got a format specifier */

4611

int flags = 0;

4612

Py_ssize_t width = -1;

4613

int prec = -1;

4614

int c = '\0';

4615

int fill;

4616

int isnumok;

4617

PyObject *v = NULL;

4618

PyObject *temp = NULL;

4619

char *pbuf;

4620

int sign;

4621

Py_ssize_t len;

4622

char formatbuf[FORMATBUFLEN];

4623

/* For format{float,int,char}() */

4624

#ifdef Py_USING_UNICODE

4625

char *fmt_start = fmt;

4626

Py_ssize_t argidx_start = argidx;

4627

#endif

4628

4629

fmt++;

4630

if (*fmt == '(') {

4631

char *keystart;

4632

Py_ssize_t keylen;

4633

PyObject *key;

4634

int pcount = 1;

4635

4636

if (dict == NULL) {

4637

PyErr_SetString(PyExc_TypeError,

4638

"format requires a mapping");

4639

goto error;

4640

}

4641

++fmt;

4642

--fmtcnt;

4643

keystart = fmt;

4644

/* Skip over balanced parentheses */

4645

while (pcount > 0 && --fmtcnt >= 0) {

4646

if (*fmt == ')')

4647

--pcount;

4648

else if (*fmt == '(')

4649

++pcount;

4650

fmt++;

4651

}

4652

keylen = fmt - keystart - 1;

4653

if (fmtcnt < 0 || pcount > 0) {

4654

PyErr_SetString(PyExc_ValueError,

4655

"incomplete format key");

4656

goto error;

4657

}

4658

key = PyString_FromStringAndSize(keystart,

4659

keylen);

4660

if (key == NULL)

4661

goto error;

4662

if (args_owned) {

4663

Py_DECREF(args);

4664

args_owned = 0;

4665

}

4666

args = PyObject_GetItem(dict, key);

4667

Py_DECREF(key);

4668

if (args == NULL) {

4669

goto error;

4670

}

4671

args_owned = 1;

4672

arglen = -1;

4673

argidx = -2;

4674

}

4675

while (--fmtcnt >= 0) {

4676

switch (c = *fmt++) {

4677

case '-': flags |= F_LJUST; continue;

4678

case '+': flags |= F_SIGN; continue;

4679

case ' ': flags |= F_BLANK; continue;

4680

case '#': flags |= F_ALT; continue;

4681

case '0': flags |= F_ZERO; continue;

4682

}

4683

break;

4684

}

4685

if (c == '*') {

4686

v = getnextarg(args, arglen, &argidx);

4687

if (v == NULL)

4688

goto error;

4689

if (!PyInt_Check(v)) {

4690

PyErr_SetString(PyExc_TypeError,

4691

"* wants int");

4692

goto error;

4693

}

4694

width = PyInt_AsLong(v);

4695

if (width < 0) {

4696

flags |= F_LJUST;

4697

width = -width;

4698

}

4699

if (--fmtcnt >= 0)

4700

c = *fmt++;

4701

}

4702

else if (c >= 0 && isdigit(c)) {

4703

width = c - '0';

4704

while (--fmtcnt >= 0) {

4705

c = Py_CHARMASK(*fmt++);

4706

if (!isdigit(c))

4707

break;

4708

if ((width*10) / 10 != width) {

4709

PyErr_SetString(

4710

PyExc_ValueError,

4711

"width too big");

4712

goto error;

4713

}

4714

width = width*10 + (c - '0');

4715

}

4716

}

4717

if (c == '.') {

4718

prec = 0;

4719

if (--fmtcnt >= 0)

4720

c = *fmt++;

4721

if (c == '*') {

4722

v = getnextarg(args, arglen, &argidx);

4723

if (v == NULL)

4724

goto error;

4725

if (!PyInt_Check(v)) {

4726

PyErr_SetString(

4727

PyExc_TypeError,

4728

"* wants int");

4729

goto error;

4730

}

4731

prec = PyInt_AsLong(v);

4732

if (prec < 0)

4733

prec = 0;

4734

if (--fmtcnt >= 0)

4735

c = *fmt++;

4736

}

4737

else if (c >= 0 && isdigit(c)) {

4738

prec = c - '0';

4739

while (--fmtcnt >= 0) {

4740

c = Py_CHARMASK(*fmt++);

4741

if (!isdigit(c))

4742

break;

4743

if ((prec*10) / 10 != prec) {

4744

PyErr_SetString(

4745

PyExc_ValueError,

4746

"prec too big");

4747

goto error;

4748

}

4749

prec = prec*10 + (c - '0');

4750

}

4751

}

4752

} /* prec */

4753

if (fmtcnt >= 0) {

4754

if (c == 'h' || c == 'l' || c == 'L') {

4755

if (--fmtcnt >= 0)

4756

c = *fmt++;

4757

}

4758

}

4759

if (fmtcnt < 0) {

4760

PyErr_SetString(PyExc_ValueError,

4761

"incomplete format");

4762

goto error;

4763

}

4764

if (c != '%') {

4765

v = getnextarg(args, arglen, &argidx);

4766

if (v == NULL)

4767

goto error;

4768

}

4769

sign = 0;

4770

fill = ' ';

4771

switch (c) {

4772

case '%':

4773

pbuf = "%";

4774

len = 1;

4775

break;

4776

case 's':

4777

#ifdef Py_USING_UNICODE

4778

if (PyUnicode_Check(v)) {

4779

fmt = fmt_start;

4780

argidx = argidx_start;

4781

goto unicode;

4782

}

4783

#endif

4784

temp = _PyObject_Str(v);

4785

#ifdef Py_USING_UNICODE

4786

if (temp != NULL && PyUnicode_Check(temp)) {

4787

Py_DECREF(temp);

4788

fmt = fmt_start;

4789

argidx = argidx_start;

4790

goto unicode;

4791

}

4792

#endif

4793

/* Fall through */

4794

case 'r':

4795

if (c == 'r')

4796

temp = PyObject_Repr(v);

4797

if (temp == NULL)

4798

goto error;

4799

if (!PyString_Check(temp)) {

4800

PyErr_SetString(PyExc_TypeError,

4801

"%s argument has non-string str()");

4802

Py_DECREF(temp);

4803

goto error;

4804

}

4805

pbuf = PyString_AS_STRING(temp);

4806

len = PyString_GET_SIZE(temp);

4807

if (prec >= 0 && len > prec)

4808

len = prec;

4809

break;

4810

case 'i':

4811

case 'd':

4812

case 'u':

4813

case 'o':

4814

case 'x':

4815

case 'X':

4816

if (c == 'i')

4817

c = 'd';

4818

isnumok = 0;

4819

if (PyNumber_Check(v)) {

4820

PyObject *iobj=NULL;

4821

4822

if (PyInt_Check(v) || (PyLong_Check(v))) {

4823

iobj = v;

4824

Py_INCREF(iobj);

4825

}

4826

else {

4827

iobj = PyNumber_Int(v);

4828

if (iobj==NULL) iobj = PyNumber_Long(v);

4829

}

4830

if (iobj!=NULL) {

4831

if (PyInt_Check(iobj)) {

4832

isnumok = 1;

4833

pbuf = formatbuf;

4834

len = formatint(pbuf,

4835

sizeof(formatbuf),

4836

flags, prec, c, iobj);

4837

Py_DECREF(iobj);

4838

if (len < 0)

4839

goto error;

4840

sign = 1;

4841

}

4842

else if (PyLong_Check(iobj)) {

4843

int ilen;

4844

4845

isnumok = 1;

4846

temp = _PyString_FormatLong(iobj, flags,

4847

prec, c, &pbuf, &ilen);

4848

Py_DECREF(iobj);

4849

len = ilen;

4850

if (!temp)

4851

goto error;

4852

sign = 1;

4853

}

4854

else {

4855

Py_DECREF(iobj);

4856

}

4857

}

4858

}

4859

if (!isnumok) {

4860

PyErr_Format(PyExc_TypeError,

4861

"%%%c format: a number is required, "

4862

"not %.200s", c, Py_TYPE(v)->tp_name);

4863

goto error;

4864

}

4865

if (flags & F_ZERO)

4866

fill = '0';

4867

break;

4868

case 'e':

4869

case 'E':

4870

case 'f':

4871

case 'F':

4872

case 'g':

4873

case 'G':

4874

if (c == 'F')

4875

c = 'f';

4876

pbuf = formatbuf;

4877

len = formatfloat(pbuf, sizeof(formatbuf),

4878

flags, prec, c, v);

4879

if (len < 0)

4880

goto error;

4881

sign = 1;

4882

if (flags & F_ZERO)

4883

fill = '0';

4884

break;

4885

case 'c':

4886

#ifdef Py_USING_UNICODE

4887

if (PyUnicode_Check(v)) {

4888

fmt = fmt_start;

4889

argidx = argidx_start;

4890

goto unicode;

4891

}

4892

#endif

4893

pbuf = formatbuf;

4894

len = formatchar(pbuf, sizeof(formatbuf), v);

4895

if (len < 0)

4896

goto error;

4897

break;

4898

default:

4899

PyErr_Format(PyExc_ValueError,

4900

"unsupported format character '%c' (0x%x) "

4901

"at index %zd",

4902

c, c,

4903

(Py_ssize_t)(fmt - 1 -

4904

PyString_AsString(format)));

4905

goto error;

4906

}

4907

if (sign) {

4908

if (*pbuf == '-' || *pbuf == '+') {

4909

sign = *pbuf++;

4910

len--;

4911

}

4912

else if (flags & F_SIGN)

4913

sign = '+';

4914

else if (flags & F_BLANK)

4915

sign = ' ';

4916

else

4917

sign = 0;

4918

}

4919

if (width < len)

4920

width = len;

4921

if (rescnt - (sign != 0) < width) {

4922

reslen -= rescnt;

4923

rescnt = width + fmtcnt + 100;

4924

reslen += rescnt;

4925

if (reslen < 0) {

4926

Py_DECREF(result);

4927

Py_XDECREF(temp);

4928

return PyErr_NoMemory();

4929

}

4930

if (_PyString_Resize(&result, reslen) < 0) {

4931

Py_XDECREF(temp);

4932

return NULL;

4933

}

4934

res = PyString_AS_STRING(result)

4935

+ reslen - rescnt;

4936

}

4937

if (sign) {

4938

if (fill != ' ')

4939

*res++ = sign;

4940

rescnt--;

4941

if (width > len)

4942

width--;

4943

}

4944

if ((flags & F_ALT) && (c == 'x' || c == 'X')) {

4945

assert(pbuf[0] == '0');

4946

assert(pbuf[1] == c);

4947

if (fill != ' ') {

4948

*res++ = *pbuf++;

4949

*res++ = *pbuf++;

4950

}

4951

rescnt -= 2;

4952

width -= 2;

4953

if (width < 0)

4954

width = 0;

4955

len -= 2;

4956

}

4957

if (width > len && !(flags & F_LJUST)) {

4958

do {

4959

--rescnt;

4960

*res++ = fill;

4961

} while (--width > len);

4962

}

4963

if (fill == ' ') {

4964

if (sign)

4965

*res++ = sign;

4966

if ((flags & F_ALT) &&

4967

(c == 'x' || c == 'X')) {

4968

assert(pbuf[0] == '0');

4969

assert(pbuf[1] == c);

4970

*res++ = *pbuf++;

4971

*res++ = *pbuf++;

4972

}

4973

}

4974

Py_MEMCPY(res, pbuf, len);

4975

res += len;

4976

rescnt -= len;

4977

while (--width >= len) {

4978

--rescnt;

4979

*res++ = ' ';

4980

}

4981

if (dict && (argidx < arglen) && c != '%') {

4982

PyErr_SetString(PyExc_TypeError,

4983

"not all arguments converted during string formatting");

4984

Py_XDECREF(temp);

4985

goto error;

4986

}

4987

Py_XDECREF(temp);

4988

} /* '%' */

4989

} /* until end */

4990

if (argidx < arglen && !dict) {

4991

PyErr_SetString(PyExc_TypeError,

4992

"not all arguments converted during string formatting");

4993

goto error;

4994

}

4995

if (args_owned) {

4996

Py_DECREF(args);

4997

}

4998

_PyString_Resize(&result, reslen - rescnt);

4999

return result;

5000

5001

#ifdef Py_USING_UNICODE

5002

unicode:

5003

if (args_owned) {

5004

Py_DECREF(args);

5005

args_owned = 0;

5006

}

5007

/* Fiddle args right (remove the first argidx arguments) */

5008

if (PyTuple_Check(orig_args) && argidx > 0) {

5009

PyObject *v;

5010

Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;

5011

v = PyTuple_New(n);

5012

if (v == NULL)

5013

goto error;

5014

while (--n >= 0) {

5015

PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);

5016

Py_INCREF(w);

5017

PyTuple_SET_ITEM(v, n, w);

5018

}

5019

args = v;

5020

} else {

5021

Py_INCREF(orig_args);

5022

args = orig_args;

5023

}

5024

args_owned = 1;

5025

/* Take what we have of the result and let the Unicode formatting

5026

function format the rest of the input. */

5027

rescnt = res - PyString_AS_STRING(result);

5028

if (_PyString_Resize(&result, rescnt))

5029

goto error;

5030

fmtcnt = PyString_GET_SIZE(format) - \

5031

(fmt - PyString_AS_STRING(format));

5032

format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);

5033

if (format == NULL)

5034

goto error;

5035

v = PyUnicode_Format(format, args);

5036

Py_DECREF(format);

5037

if (v == NULL)

5038

goto error;

5039

/* Paste what we have (result) to what the Unicode formatting

5040

function returned (v) and return the result (or error) */

5041

w = PyUnicode_Concat(result, v);

5042

Py_DECREF(result);

5043

Py_DECREF(v);

5044

Py_DECREF(args);

5045

return w;

5046

#endif /* Py_USING_UNICODE */

5047

5048

error:

5049

Py_DECREF(result);

5050

if (args_owned) {

5051

Py_DECREF(args);

5052

}

5053

return NULL;

5054

}

5055

5056

void

5057

PyString_InternInPlace(PyObject **p)

5058

{

5059

register PyStringObject *s = (PyStringObject *)(*p);

5060

PyObject *t;

5061

if (s == NULL || !PyString_Check(s))

5062

Py_FatalError("PyString_InternInPlace: strings only please!");

5063

/* If it's a string subclass, we don't really know what putting

5064

it in the interned dict might do. */

5065

if (!PyString_CheckExact(s))

5066

return;

5067

if (PyString_CHECK_INTERNED(s))

5068

return;

5069

if (interned == NULL) {

5070

interned = PyDict_New();

5071

if (interned == NULL) {

5072

PyErr_Clear(); /* Don't leave an exception */

5073

return;

5074

}

5075

}

5076

t = PyDict_GetItem(interned, (PyObject *)s);

5077

if (t) {

5078

Py_INCREF(t);

5079

Py_DECREF(*p);

5080

*p = t;

5081

return;

5082

}

5083

5084

if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {

5085

PyErr_Clear();

5086

return;

5087

}

5088

/* The two references in interned are not counted by refcnt.

5089

The string deallocator will take care of this */

5090

Py_REFCNT(s) -= 2;

5091

PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;

5092

}

5093

5094

void

5095

PyString_InternImmortal(PyObject **p)

5096

{

5097

PyString_InternInPlace(p);

5098

if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {

5099

PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;

5100

Py_INCREF(*p);

5101

}

5102

}

5103

5104

5105

PyObject *

5106

PyString_InternFromString(const char *cp)

5107

{

5108

PyObject *s = PyString_FromString(cp);

5109

if (s == NULL)

5110

return NULL;

5111

PyString_InternInPlace(&s);

5112

return s;

5113

}

5114

5115

void

5116

PyString_Fini(void)

5117

{

5118

int i;

5119

for (i = 0; i < UCHAR_MAX + 1; i++) {

5120

Py_XDECREF(characters[i]);

5121

characters[i] = NULL;

5122

}

5123

Py_XDECREF(nullstring);

5124

nullstring = NULL;

5125

}

5126

5127

void _Py_ReleaseInternedStrings(void)

5128

{

5129

PyObject *keys;

5130

PyStringObject *s;

5131

Py_ssize_t i, n;

5132

Py_ssize_t immortal_size = 0, mortal_size = 0;

5133

5134

if (interned == NULL || !PyDict_Check(interned))

5135

return;

5136

keys = PyDict_Keys(interned);

5137

if (keys == NULL || !PyList_Check(keys)) {

5138

PyErr_Clear();

5139

return;

5140

}

5141

5142

/* Since _Py_ReleaseInternedStrings() is intended to help a leak

5143

detector, interned strings are not forcibly deallocated; rather, we

5144

give them their stolen references back, and then clear and DECREF

5145

the interned dict. */

5146

5147

n = PyList_GET_SIZE(keys);

5148

fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",

5149

n);

5150

for (i = 0; i < n; i++) {

5151

s = (PyStringObject *) PyList_GET_ITEM(keys, i);

5152

switch (s->ob_sstate) {

5153

case SSTATE_NOT_INTERNED:

5154

/* XXX Shouldn't happen */

5155

break;

5156

case SSTATE_INTERNED_IMMORTAL:

5157

Py_REFCNT(s) += 1;

5158

immortal_size += Py_SIZE(s);

5159

break;

5160

case SSTATE_INTERNED_MORTAL:

5161

Py_REFCNT(s) += 2;

5162

mortal_size += Py_SIZE(s);

5163

break;

5164

default:

5165

Py_FatalError("Inconsistent interned string state.");

5166

}

5167

s->ob_sstate = SSTATE_NOT_INTERNED;

5168

}

5169

fprintf(stderr, "total size of all interned strings: "

5170

"%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "

5171

"mortal/immortal\n", mortal_size, immortal_size);

5172

Py_DECREF(keys);

5173

PyDict_Clear(interned);

5174

Py_DECREF(interned);

5175

interned = NULL;

5176

}