1
/* String object implementation */
3
#define PY_SSIZE_T_CLEAN
9
int null_strings, one_strings;
12
static PyBytesObject *characters[UCHAR_MAX + 1];
13
static PyBytesObject *nullstring;
15
/* This dictionary holds all interned strings. Note that references to
16
strings in this dictionary are *not* counted in the string's ob_refcnt.
17
When the interned string reaches a refcnt of 0 the string deallocation
18
function will delete the reference from this dictionary.
20
Another way to look at this is that to say that the actual reference
21
count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23
static PyObject *interned;
26
For both PyBytes_FromString() and PyBytes_FromStringAndSize(), the
27
parameter `size' denotes number of characters to allocate, not counting any
28
null terminating character.
30
For PyBytes_FromString(), the parameter `str' points to a null-terminated
31
string containing exactly `size' bytes.
33
For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
34
either NULL or else points to a string containing at least `size' bytes.
35
For PyBytes_FromStringAndSize(), the string in the `str' parameter does
36
not have to be null-terminated. (Therefore it is safe to construct a
37
substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
38
If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
39
bytes (setting the last byte to the null terminating character) and you can
40
fill in the data yourself. If `str' is non-NULL then the resulting
41
PyString object must be treated as immutable and you must not fill in nor
42
alter the data yourself, since the strings may be shared.
44
The PyObject member `op->ob_size', which denotes the number of "extra
45
items" in a variable-size object, will contain the number of bytes
46
allocated for string data, not counting the null terminating character. It
47
is therefore equal to the equal to the `size' parameter (for
48
PyBytes_FromStringAndSize()) or the length of the string in the `str'
49
parameter (for PyBytes_FromString()).
52
PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
54
register PyBytesObject *op;
56
PyErr_SetString(PyExc_SystemError,
57
"Negative size passed to PyBytes_FromStringAndSize");
60
if (size == 0 && (op = nullstring) != NULL) {
65
return (PyObject *)op;
67
if (size == 1 && str != NULL &&
68
(op = characters[*str & UCHAR_MAX]) != NULL)
74
return (PyObject *)op;
77
/* Inline PyObject_NewVar */
78
op = (PyBytesObject *)PyObject_MALLOC(sizeof(PyBytesObject) + size);
80
return PyErr_NoMemory();
81
PyObject_INIT_VAR(op, &PyBytes_Type, size);
83
op->ob_sstate = SSTATE_NOT_INTERNED;
85
Py_MEMCPY(op->ob_sval, str, size);
86
op->ob_sval[size] = '\0';
87
/* share short strings */
89
PyObject *t = (PyObject *)op;
90
PyString_InternInPlace(&t);
91
op = (PyBytesObject *)t;
94
} else if (size == 1 && str != NULL) {
95
PyObject *t = (PyObject *)op;
96
PyString_InternInPlace(&t);
97
op = (PyBytesObject *)t;
98
characters[*str & UCHAR_MAX] = op;
101
return (PyObject *) op;
105
PyBytes_FromString(const char *str)
107
register size_t size;
108
register PyBytesObject *op;
112
if (size > PY_SSIZE_T_MAX) {
113
PyErr_SetString(PyExc_OverflowError,
114
"string is too long for a Python string");
117
if (size == 0 && (op = nullstring) != NULL) {
122
return (PyObject *)op;
124
if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
129
return (PyObject *)op;
132
/* Inline PyObject_NewVar */
133
op = (PyBytesObject *)PyObject_MALLOC(sizeof(PyBytesObject) + size);
135
return PyErr_NoMemory();
136
PyObject_INIT_VAR(op, &PyBytes_Type, size);
138
op->ob_sstate = SSTATE_NOT_INTERNED;
139
Py_MEMCPY(op->ob_sval, str, size+1);
140
/* share short strings */
142
PyObject *t = (PyObject *)op;
143
PyString_InternInPlace(&t);
144
op = (PyBytesObject *)t;
147
} else if (size == 1) {
148
PyObject *t = (PyObject *)op;
149
PyString_InternInPlace(&t);
150
op = (PyBytesObject *)t;
151
characters[*str & UCHAR_MAX] = op;
154
return (PyObject *) op;
158
PyBytes_FromFormatV(const char *format, va_list vargs)
166
#ifdef VA_LIST_IS_ARRAY
167
Py_MEMCPY(count, vargs, sizeof(va_list));
170
__va_copy(count, vargs);
175
/* step 1: figure out how large a buffer we need */
176
for (f = format; *f; f++) {
179
while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
182
/* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
183
* they don't affect the amount of space we reserve.
185
if ((*f == 'l' || *f == 'z') &&
186
(f[1] == 'd' || f[1] == 'u'))
191
(void)va_arg(count, int);
192
/* fall through... */
196
case 'd': case 'u': case 'i': case 'x':
197
(void) va_arg(count, int);
198
/* 20 bytes is enough to hold a 64-bit
199
integer. Decimal takes the most space.
200
This isn't enough for octal. */
204
s = va_arg(count, char*);
208
(void) va_arg(count, int);
209
/* maximum 64-bit pointer representation:
211
* so 19 characters is enough.
212
* XXX I count 18 -- what's the extra for?
217
/* if we stumble upon an unknown
218
formatting code, copy the rest of
219
the format string to the output
220
string. (we cannot just skip the
221
code, since there's no way to know
222
what's in the argument list) */
230
/* step 2: fill the buffer */
231
/* Since we've analyzed how much space we need for the worst case,
232
use sprintf directly instead of the slower PyOS_snprintf. */
233
string = PyBytes_FromStringAndSize(NULL, n);
237
s = PyBytes_AsString(string);
239
for (f = format; *f; f++) {
245
/* parse the width.precision part (we're only
246
interested in the precision value, if any) */
248
while (isdigit(Py_CHARMASK(*f)))
249
n = (n*10) + *f++ - '0';
253
while (isdigit(Py_CHARMASK(*f)))
254
n = (n*10) + *f++ - '0';
256
while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
258
/* handle the long flag, but only for %ld and %lu.
259
others can be added when necessary. */
260
if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
264
/* handle the size_t flag. */
265
if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
272
*s++ = va_arg(vargs, int);
276
sprintf(s, "%ld", va_arg(vargs, long));
278
sprintf(s, "%" PY_FORMAT_SIZE_T "d",
279
va_arg(vargs, Py_ssize_t));
281
sprintf(s, "%d", va_arg(vargs, int));
287
va_arg(vargs, unsigned long));
289
sprintf(s, "%" PY_FORMAT_SIZE_T "u",
290
va_arg(vargs, size_t));
293
va_arg(vargs, unsigned int));
297
sprintf(s, "%i", va_arg(vargs, int));
301
sprintf(s, "%x", va_arg(vargs, int));
305
p = va_arg(vargs, char*);
313
sprintf(s, "%p", va_arg(vargs, void*));
314
/* %p is ill-defined: ensure leading 0x. */
317
else if (s[1] != 'x') {
318
memmove(s+2, s, strlen(s)+1);
337
_PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
342
PyBytes_FromFormat(const char *format, ...)
347
#ifdef HAVE_STDARG_PROTOTYPES
348
va_start(vargs, format);
352
ret = PyBytes_FromFormatV(format, vargs);
358
PyObject *PyBytes_Decode(const char *s,
360
const char *encoding,
365
str = PyBytes_FromStringAndSize(s, size);
368
v = PyBytes_AsDecodedString(str, encoding, errors);
373
PyObject *PyBytes_AsDecodedObject(PyObject *str,
374
const char *encoding,
379
if (!PyBytes_Check(str)) {
384
if (encoding == NULL) {
385
#ifdef Py_USING_UNICODE
386
encoding = PyUnicode_GetDefaultEncoding();
388
PyErr_SetString(PyExc_ValueError, "no encoding specified");
393
/* Decode via the codec registry */
394
v = PyCodec_Decode(str, encoding, errors);
404
PyObject *PyBytes_AsDecodedString(PyObject *str,
405
const char *encoding,
410
v = PyBytes_AsDecodedObject(str, encoding, errors);
414
#ifdef Py_USING_UNICODE
415
/* Convert Unicode to a string using the default encoding */
416
if (PyUnicode_Check(v)) {
418
v = PyUnicode_AsEncodedString(v, NULL, NULL);
424
if (!PyBytes_Check(v)) {
425
PyErr_Format(PyExc_TypeError,
426
"decoder did not return a string object (type=%.400s)",
427
Py_TYPE(v)->tp_name);
438
PyObject *PyBytes_Encode(const char *s,
440
const char *encoding,
445
str = PyBytes_FromStringAndSize(s, size);
448
v = PyBytes_AsEncodedString(str, encoding, errors);
453
PyObject *PyBytes_AsEncodedObject(PyObject *str,
454
const char *encoding,
459
if (!PyBytes_Check(str)) {
464
if (encoding == NULL) {
465
#ifdef Py_USING_UNICODE
466
encoding = PyUnicode_GetDefaultEncoding();
468
PyErr_SetString(PyExc_ValueError, "no encoding specified");
473
/* Encode via the codec registry */
474
v = PyCodec_Encode(str, encoding, errors);
484
PyObject *PyBytes_AsEncodedString(PyObject *str,
485
const char *encoding,
490
v = PyBytes_AsEncodedObject(str, encoding, errors);
494
#ifdef Py_USING_UNICODE
495
/* Convert Unicode to a string using the default encoding */
496
if (PyUnicode_Check(v)) {
498
v = PyUnicode_AsEncodedString(v, NULL, NULL);
504
if (!PyBytes_Check(v)) {
505
PyErr_Format(PyExc_TypeError,
506
"encoder did not return a string object (type=%.400s)",
507
Py_TYPE(v)->tp_name);
519
string_dealloc(PyObject *op)
521
switch (PyBytes_CHECK_INTERNED(op)) {
522
case SSTATE_NOT_INTERNED:
525
case SSTATE_INTERNED_MORTAL:
526
/* revive dead object temporarily for DelItem */
528
if (PyDict_DelItem(interned, op) != 0)
530
"deletion of interned string failed");
533
case SSTATE_INTERNED_IMMORTAL:
534
Py_FatalError("Immortal interned string died.");
537
Py_FatalError("Inconsistent interned string state.");
539
Py_TYPE(op)->tp_free(op);
542
/* Unescape a backslash-escaped string. If unicode is non-zero,
543
the string is a u-literal. If recode_encoding is non-zero,
544
the string is UTF-8 encoded and should be re-encoded in the
545
specified encoding. */
547
PyObject *PyBytes_DecodeEscape(const char *s,
551
const char *recode_encoding)
557
Py_ssize_t newlen = recode_encoding ? 4*len:len;
558
v = PyBytes_FromStringAndSize((char *)NULL, newlen);
561
p = buf = PyBytes_AsString(v);
566
#ifdef Py_USING_UNICODE
567
if (recode_encoding && (*s & 0x80)) {
573
/* Decode non-ASCII bytes as UTF-8. */
574
while (t < end && (*t & 0x80)) t++;
575
u = PyUnicode_DecodeUTF8(s, t - s, errors);
578
/* Recode them in target encoding. */
579
w = PyUnicode_AsEncodedString(
580
u, recode_encoding, errors);
584
/* Append bytes to output buffer. */
585
assert(PyBytes_Check(w));
586
r = PyBytes_AS_STRING(w);
587
rn = PyBytes_GET_SIZE(w);
602
PyErr_SetString(PyExc_ValueError,
603
"Trailing \\ in string");
607
/* XXX This assumes ASCII! */
609
case '\\': *p++ = '\\'; break;
610
case '\'': *p++ = '\''; break;
611
case '\"': *p++ = '\"'; break;
612
case 'b': *p++ = '\b'; break;
613
case 'f': *p++ = '\014'; break; /* FF */
614
case 't': *p++ = '\t'; break;
615
case 'n': *p++ = '\n'; break;
616
case 'r': *p++ = '\r'; break;
617
case 'v': *p++ = '\013'; break; /* VT */
618
case 'a': *p++ = '\007'; break; /* BEL, not classic C */
619
case '0': case '1': case '2': case '3':
620
case '4': case '5': case '6': case '7':
622
if (s < end && '0' <= *s && *s <= '7') {
623
c = (c<<3) + *s++ - '0';
624
if (s < end && '0' <= *s && *s <= '7')
625
c = (c<<3) + *s++ - '0';
631
isxdigit(Py_CHARMASK(s[0])) &&
632
isxdigit(Py_CHARMASK(s[1])))
655
if (!errors || strcmp(errors, "strict") == 0) {
656
PyErr_SetString(PyExc_ValueError,
657
"invalid \\x escape");
660
if (strcmp(errors, "replace") == 0) {
662
} else if (strcmp(errors, "ignore") == 0)
665
PyErr_Format(PyExc_ValueError,
667
"unknown error handling code: %.400s",
671
#ifndef Py_USING_UNICODE
676
PyErr_SetString(PyExc_ValueError,
677
"Unicode escapes not legal "
678
"when Unicode disabled");
685
goto non_esc; /* an arbitry number of unescaped
686
UTF-8 bytes may follow. */
690
_PyBytes_Resize(&v, p - buf);
697
/* -------------------------------------------------------------------- */
701
string_getsize(register PyObject *op)
705
if (PyBytes_AsStringAndSize(op, &s, &len))
710
static /*const*/ char *
711
string_getbuffer(register PyObject *op)
715
if (PyBytes_AsStringAndSize(op, &s, &len))
721
PyBytes_Size(register PyObject *op)
723
if (!PyBytes_Check(op))
724
return string_getsize(op);
729
PyBytes_AsString(register PyObject *op)
731
if (!PyBytes_Check(op))
732
return string_getbuffer(op);
733
return ((PyBytesObject *)op) -> ob_sval;
737
PyBytes_AsStringAndSize(register PyObject *obj,
739
register Py_ssize_t *len)
742
PyErr_BadInternalCall();
746
if (!PyBytes_Check(obj)) {
747
#ifdef Py_USING_UNICODE
748
if (PyUnicode_Check(obj)) {
749
obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
756
PyErr_Format(PyExc_TypeError,
757
"expected string or Unicode object, "
758
"%.200s found", Py_TYPE(obj)->tp_name);
763
*s = PyBytes_AS_STRING(obj);
765
*len = PyBytes_GET_SIZE(obj);
766
else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
767
PyErr_SetString(PyExc_TypeError,
768
"expected string without null bytes");
774
/* -------------------------------------------------------------------- */
777
#include "stringlib/stringdefs.h"
778
#include "stringlib/fastsearch.h"
780
#include "stringlib/count.h"
781
#include "stringlib/find.h"
782
#include "stringlib/partition.h"
784
#define _Py_InsertThousandsGrouping _PyBytes_InsertThousandsGrouping
785
#include "stringlib/localeutil.h"
790
string_print(PyBytesObject *op, FILE *fp, int flags)
792
Py_ssize_t i, str_len;
796
/* XXX Ought to check for interrupts when writing long strings */
797
if (! PyBytes_CheckExact(op)) {
799
/* A str subclass may have its own __str__ method. */
800
op = (PyBytesObject *) PyObject_Str((PyObject *)op);
803
ret = string_print(op, fp, flags);
807
if (flags & Py_PRINT_RAW) {
808
char *data = op->ob_sval;
809
Py_ssize_t size = Py_SIZE(op);
810
Py_BEGIN_ALLOW_THREADS
811
while (size > INT_MAX) {
812
/* Very long strings cannot be written atomically.
813
* But don't write exactly INT_MAX bytes at a time
814
* to avoid memory aligment issues.
816
const int chunk_size = INT_MAX & ~0x3FFF;
817
fwrite(data, 1, chunk_size, fp);
822
if (size) fwrite(data, (int)size, 1, fp);
824
fwrite(data, 1, (int)size, fp);
830
/* figure out which quote to use; single is preferred */
832
if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
833
!memchr(op->ob_sval, '"', Py_SIZE(op)))
836
str_len = Py_SIZE(op);
837
Py_BEGIN_ALLOW_THREADS
839
for (i = 0; i < str_len; i++) {
840
/* Since strings are immutable and the caller should have a
841
reference, accessing the interal buffer should not be an issue
842
with the GIL released. */
844
if (c == quote || c == '\\')
845
fprintf(fp, "\\%c", c);
852
else if (c < ' ' || c >= 0x7f)
853
fprintf(fp, "\\x%02x", c & 0xff);
863
PyBytes_Repr(PyObject *obj, int smartquotes)
865
register PyBytesObject* op = (PyBytesObject*) obj;
866
size_t newsize = 2 + 4 * Py_SIZE(op);
868
if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
869
PyErr_SetString(PyExc_OverflowError,
870
"string is too large to make repr");
873
v = PyBytes_FromStringAndSize((char *)NULL, newsize);
878
register Py_ssize_t i;
883
/* figure out which quote to use; single is preferred */
886
memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
887
!memchr(op->ob_sval, '"', Py_SIZE(op)))
890
p = PyBytes_AS_STRING(v);
892
for (i = 0; i < Py_SIZE(op); i++) {
893
/* There's at least enough room for a hex escape
894
and a closing quote. */
895
assert(newsize - (p - PyBytes_AS_STRING(v)) >= 5);
897
if (c == quote || c == '\\')
898
*p++ = '\\', *p++ = c;
900
*p++ = '\\', *p++ = 't';
902
*p++ = '\\', *p++ = 'n';
904
*p++ = '\\', *p++ = 'r';
905
else if (c < ' ' || c >= 0x7f) {
906
/* For performance, we don't want to call
907
PyOS_snprintf here (extra layers of
909
sprintf(p, "\\x%02x", c & 0xff);
915
assert(newsize - (p - PyBytes_AS_STRING(v)) >= 1);
919
&v, (p - PyBytes_AS_STRING(v)));
925
string_repr(PyObject *op)
927
return PyBytes_Repr(op, 1);
931
string_str(PyObject *s)
933
assert(PyBytes_Check(s));
934
if (PyBytes_CheckExact(s)) {
939
/* Subtype -- return genuine string with the same value. */
940
PyBytesObject *t = (PyBytesObject *) s;
941
return PyBytes_FromStringAndSize(t->ob_sval, Py_SIZE(t));
946
string_length(PyBytesObject *a)
952
string_concat(register PyBytesObject *a, register PyObject *bb)
954
register Py_ssize_t size;
955
register PyBytesObject *op;
956
if (!PyBytes_Check(bb)) {
957
#ifdef Py_USING_UNICODE
958
if (PyUnicode_Check(bb))
959
return PyUnicode_Concat((PyObject *)a, bb);
961
if (PyByteArray_Check(bb))
962
return PyByteArray_Concat((PyObject *)a, bb);
963
PyErr_Format(PyExc_TypeError,
964
"cannot concatenate 'str' and '%.200s' objects",
965
Py_TYPE(bb)->tp_name);
968
#define b ((PyBytesObject *)bb)
969
/* Optimize cases with empty left or right operand */
970
if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
971
PyBytes_CheckExact(a) && PyBytes_CheckExact(b)) {
972
if (Py_SIZE(a) == 0) {
977
return (PyObject *)a;
979
size = Py_SIZE(a) + Py_SIZE(b);
981
PyErr_SetString(PyExc_OverflowError,
982
"strings are too large to concat");
986
/* Inline PyObject_NewVar */
987
op = (PyBytesObject *)PyObject_MALLOC(sizeof(PyBytesObject) + size);
989
return PyErr_NoMemory();
990
PyObject_INIT_VAR(op, &PyBytes_Type, size);
992
op->ob_sstate = SSTATE_NOT_INTERNED;
993
Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
994
Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
995
op->ob_sval[size] = '\0';
996
return (PyObject *) op;
1001
string_repeat(register PyBytesObject *a, register Py_ssize_t n)
1003
register Py_ssize_t i;
1004
register Py_ssize_t j;
1005
register Py_ssize_t size;
1006
register PyBytesObject *op;
1010
/* watch out for overflows: the size can overflow int,
1011
* and the # of bytes needed can overflow size_t
1013
size = Py_SIZE(a) * n;
1014
if (n && size / n != Py_SIZE(a)) {
1015
PyErr_SetString(PyExc_OverflowError,
1016
"repeated string is too long");
1019
if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1021
return (PyObject *)a;
1023
nbytes = (size_t)size;
1024
if (nbytes + sizeof(PyBytesObject) <= nbytes) {
1025
PyErr_SetString(PyExc_OverflowError,
1026
"repeated string is too long");
1029
op = (PyBytesObject *)
1030
PyObject_MALLOC(sizeof(PyBytesObject) + nbytes);
1032
return PyErr_NoMemory();
1033
PyObject_INIT_VAR(op, &PyBytes_Type, size);
1035
op->ob_sstate = SSTATE_NOT_INTERNED;
1036
op->ob_sval[size] = '\0';
1037
if (Py_SIZE(a) == 1 && n > 0) {
1038
memset(op->ob_sval, a->ob_sval[0] , n);
1039
return (PyObject *) op;
1043
Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1047
j = (i <= size-i) ? i : size-i;
1048
Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1051
return (PyObject *) op;
1054
/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1057
string_slice(register PyBytesObject *a, register Py_ssize_t i,
1058
register Py_ssize_t j)
1059
/* j -- may be negative! */
1064
j = 0; /* Avoid signed/unsigned bug in next line */
1067
if (i == 0 && j == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1068
/* It's the same as a */
1070
return (PyObject *)a;
1074
return PyBytes_FromStringAndSize(a->ob_sval + i, j-i);
1078
string_contains(PyObject *str_obj, PyObject *sub_obj)
1080
if (!PyBytes_CheckExact(sub_obj)) {
1081
#ifdef Py_USING_UNICODE
1082
if (PyUnicode_Check(sub_obj))
1083
return PyUnicode_Contains(str_obj, sub_obj);
1085
if (!PyBytes_Check(sub_obj)) {
1086
PyErr_Format(PyExc_TypeError,
1087
"'in <string>' requires string as left operand, "
1088
"not %.200s", Py_TYPE(sub_obj)->tp_name);
1093
return stringlib_contains_obj(str_obj, sub_obj);
1097
string_item(PyBytesObject *a, register Py_ssize_t i)
1101
if (i < 0 || i >= Py_SIZE(a)) {
1102
PyErr_SetString(PyExc_IndexError, "string index out of range");
1105
pchar = a->ob_sval[i];
1106
v = (PyObject *)characters[pchar & UCHAR_MAX];
1108
v = PyBytes_FromStringAndSize(&pchar, 1);
1119
string_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
1122
Py_ssize_t len_a, len_b;
1126
/* Make sure both arguments are strings. */
1127
if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
1128
result = Py_NotImplemented;
1133
case Py_EQ:case Py_LE:case Py_GE:
1136
case Py_NE:case Py_LT:case Py_GT:
1142
/* Supporting Py_NE here as well does not save
1143
much time, since Py_NE is rarely used. */
1144
if (Py_SIZE(a) == Py_SIZE(b)
1145
&& (a->ob_sval[0] == b->ob_sval[0]
1146
&& memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1153
len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1154
min_len = (len_a < len_b) ? len_a : len_b;
1156
c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1158
c = memcmp(a->ob_sval, b->ob_sval, min_len);
1162
c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1164
case Py_LT: c = c < 0; break;
1165
case Py_LE: c = c <= 0; break;
1166
case Py_EQ: assert(0); break; /* unreachable */
1167
case Py_NE: c = c != 0; break;
1168
case Py_GT: c = c > 0; break;
1169
case Py_GE: c = c >= 0; break;
1171
result = Py_NotImplemented;
1174
result = c ? Py_True : Py_False;
1181
_PyBytes_Eq(PyObject *o1, PyObject *o2)
1183
PyBytesObject *a = (PyBytesObject*) o1;
1184
PyBytesObject *b = (PyBytesObject*) o2;
1185
return Py_SIZE(a) == Py_SIZE(b)
1186
&& *a->ob_sval == *b->ob_sval
1187
&& memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
1191
string_hash(PyBytesObject *a)
1193
register Py_ssize_t len;
1194
register unsigned char *p;
1197
if (a->ob_shash != -1)
1200
p = (unsigned char *) a->ob_sval;
1203
x = (1000003*x) ^ *p++;
1212
string_subscript(PyBytesObject* self, PyObject* item)
1214
if (PyIndex_Check(item)) {
1215
Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1216
if (i == -1 && PyErr_Occurred())
1219
i += PyBytes_GET_SIZE(self);
1220
return string_item(self, i);
1222
else if (PySlice_Check(item)) {
1223
Py_ssize_t start, stop, step, slicelength, cur, i;
1228
if (PySlice_GetIndicesEx((PySliceObject*)item,
1229
PyBytes_GET_SIZE(self),
1230
&start, &stop, &step, &slicelength) < 0) {
1234
if (slicelength <= 0) {
1235
return PyBytes_FromStringAndSize("", 0);
1237
else if (start == 0 && step == 1 &&
1238
slicelength == PyBytes_GET_SIZE(self) &&
1239
PyBytes_CheckExact(self)) {
1241
return (PyObject *)self;
1243
else if (step == 1) {
1244
return PyBytes_FromStringAndSize(
1245
PyBytes_AS_STRING(self) + start,
1249
source_buf = PyBytes_AsString((PyObject*)self);
1250
result_buf = (char *)PyMem_Malloc(slicelength);
1251
if (result_buf == NULL)
1252
return PyErr_NoMemory();
1254
for (cur = start, i = 0; i < slicelength;
1256
result_buf[i] = source_buf[cur];
1259
result = PyBytes_FromStringAndSize(result_buf,
1261
PyMem_Free(result_buf);
1266
PyErr_Format(PyExc_TypeError,
1267
"string indices must be integers, not %.200s",
1268
Py_TYPE(item)->tp_name);
1274
string_buffer_getreadbuf(PyBytesObject *self, Py_ssize_t index, const void **ptr)
1277
PyErr_SetString(PyExc_SystemError,
1278
"accessing non-existent string segment");
1281
*ptr = (void *)self->ob_sval;
1282
return Py_SIZE(self);
1286
string_buffer_getwritebuf(PyBytesObject *self, Py_ssize_t index, const void **ptr)
1288
PyErr_SetString(PyExc_TypeError,
1289
"Cannot use string as modifiable buffer");
1294
string_buffer_getsegcount(PyBytesObject *self, Py_ssize_t *lenp)
1297
*lenp = Py_SIZE(self);
1302
string_buffer_getcharbuf(PyBytesObject *self, Py_ssize_t index, const char **ptr)
1305
PyErr_SetString(PyExc_SystemError,
1306
"accessing non-existent string segment");
1309
*ptr = self->ob_sval;
1310
return Py_SIZE(self);
1314
string_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
1316
return PyBuffer_FillInfo(view, (void *)self->ob_sval, Py_SIZE(self),
1320
static PySequenceMethods string_as_sequence = {
1321
(lenfunc)string_length, /*sq_length*/
1322
(binaryfunc)string_concat, /*sq_concat*/
1323
(ssizeargfunc)string_repeat, /*sq_repeat*/
1324
(ssizeargfunc)string_item, /*sq_item*/
1325
(ssizessizeargfunc)string_slice, /*sq_slice*/
1328
(objobjproc)string_contains /*sq_contains*/
1331
static PyMappingMethods string_as_mapping = {
1332
(lenfunc)string_length,
1333
(binaryfunc)string_subscript,
1337
static PyBufferProcs string_as_buffer = {
1338
(readbufferproc)string_buffer_getreadbuf,
1339
(writebufferproc)string_buffer_getwritebuf,
1340
(segcountproc)string_buffer_getsegcount,
1341
(charbufferproc)string_buffer_getcharbuf,
1342
(getbufferproc)string_buffer_getbuffer,
1349
#define RIGHTSTRIP 1
1352
/* Arrays indexed by above */
1353
static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1355
#define STRIPNAME(i) (stripformat[i]+3)
1358
/* Don't call if length < 2 */
1359
#define Py_STRING_MATCH(target, offset, pattern, length) \
1360
(target[offset] == pattern[0] && \
1361
target[offset+length-1] == pattern[length-1] && \
1362
!memcmp(target+offset+1, pattern+1, length-2) )
1365
/* Overallocate the initial list to reduce the number of reallocs for small
1366
split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1367
resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1368
text (roughly 11 words per line) and field delimited data (usually 1-10
1369
fields). For large strings the split algorithms are bandwidth limited
1370
so increasing the preallocation likely will not improve things.*/
1372
#define MAX_PREALLOC 12
1374
/* 5 splits gives 6 elements */
1375
#define PREALLOC_SIZE(maxsplit) \
1376
(maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1378
#define SPLIT_APPEND(data, left, right) \
1379
str = PyBytes_FromStringAndSize((data) + (left), \
1380
(right) - (left)); \
1383
if (PyList_Append(list, str)) { \
1390
#define SPLIT_ADD(data, left, right) { \
1391
str = PyBytes_FromStringAndSize((data) + (left), \
1392
(right) - (left)); \
1395
if (count < MAX_PREALLOC) { \
1396
PyList_SET_ITEM(list, count, str); \
1398
if (PyList_Append(list, str)) { \
1407
/* Always force the list to the expected size. */
1408
#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
1410
#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1411
#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1412
#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1413
#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1415
Py_LOCAL_INLINE(PyObject *)
1416
split_whitespace(PyBytesObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
1418
const char *s = PyBytes_AS_STRING(self);
1419
Py_ssize_t i, j, count=0;
1421
PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
1428
while (maxsplit-- > 0) {
1429
SKIP_SPACE(s, i, len);
1432
SKIP_NONSPACE(s, i, len);
1433
if (j == 0 && i == len && PyBytes_CheckExact(self)) {
1434
/* No whitespace in self, so just use it as list[0] */
1436
PyList_SET_ITEM(list, 0, (PyObject *)self);
1444
/* Only occurs when maxsplit was reached */
1445
/* Skip any remaining whitespace and copy to end of string */
1446
SKIP_SPACE(s, i, len);
1448
SPLIT_ADD(s, i, len);
1450
FIX_PREALLOC_SIZE(list);
1457
Py_LOCAL_INLINE(PyObject *)
1458
split_char(PyBytesObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
1460
const char *s = PyBytes_AS_STRING(self);
1461
register Py_ssize_t i, j, count=0;
1463
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
1469
while ((j < len) && (maxcount-- > 0)) {
1471
/* I found that using memchr makes no difference */
1479
if (i == 0 && count == 0 && PyBytes_CheckExact(self)) {
1480
/* ch not in self, so just use self as list[0] */
1482
PyList_SET_ITEM(list, 0, (PyObject *)self);
1485
else if (i <= len) {
1486
SPLIT_ADD(s, i, len);
1488
FIX_PREALLOC_SIZE(list);
1496
PyDoc_STRVAR(split__doc__,
1497
"S.split([sep [,maxsplit]]) -> list of strings\n\
1499
Return a list of the words in the string S, using sep as the\n\
1500
delimiter string. If maxsplit is given, at most maxsplit\n\
1501
splits are done. If sep is not specified or is None, any\n\
1502
whitespace string is a separator and empty strings are removed\n\
1506
string_split(PyBytesObject *self, PyObject *args)
1508
Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
1509
Py_ssize_t maxsplit = -1, count=0;
1510
const char *s = PyBytes_AS_STRING(self), *sub;
1511
PyObject *list, *str, *subobj = Py_None;
1516
if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1519
maxsplit = PY_SSIZE_T_MAX;
1520
if (subobj == Py_None)
1521
return split_whitespace(self, len, maxsplit);
1522
if (PyBytes_Check(subobj)) {
1523
sub = PyBytes_AS_STRING(subobj);
1524
n = PyBytes_GET_SIZE(subobj);
1526
#ifdef Py_USING_UNICODE
1527
else if (PyUnicode_Check(subobj))
1528
return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1530
else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1534
PyErr_SetString(PyExc_ValueError, "empty separator");
1538
return split_char(self, len, sub[0], maxsplit);
1540
list = PyList_New(PREALLOC_SIZE(maxsplit));
1546
while (maxsplit-- > 0) {
1547
pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1556
while ((j+n <= len) && (maxsplit-- > 0)) {
1557
for (; j+n <= len; j++) {
1558
if (Py_STRING_MATCH(s, j, sub, n)) {
1566
SPLIT_ADD(s, i, len);
1567
FIX_PREALLOC_SIZE(list);
1575
PyDoc_STRVAR(partition__doc__,
1576
"S.partition(sep) -> (head, sep, tail)\n\
1578
Searches for the separator sep in S, and returns the part before it,\n\
1579
the separator itself, and the part after it. If the separator is not\n\
1580
found, returns S and two empty strings.");
1583
string_partition(PyBytesObject *self, PyObject *sep_obj)
1588
if (PyBytes_Check(sep_obj)) {
1589
sep = PyBytes_AS_STRING(sep_obj);
1590
sep_len = PyBytes_GET_SIZE(sep_obj);
1592
#ifdef Py_USING_UNICODE
1593
else if (PyUnicode_Check(sep_obj))
1594
return PyUnicode_Partition((PyObject *) self, sep_obj);
1596
else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1599
return stringlib_partition(
1601
PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1602
sep_obj, sep, sep_len
1606
PyDoc_STRVAR(rpartition__doc__,
1607
"S.rpartition(sep) -> (tail, sep, head)\n\
1609
Searches for the separator sep in S, starting at the end of S, and returns\n\
1610
the part before it, the separator itself, and the part after it. If the\n\
1611
separator is not found, returns two empty strings and S.");
1614
string_rpartition(PyBytesObject *self, PyObject *sep_obj)
1619
if (PyBytes_Check(sep_obj)) {
1620
sep = PyBytes_AS_STRING(sep_obj);
1621
sep_len = PyBytes_GET_SIZE(sep_obj);
1623
#ifdef Py_USING_UNICODE
1624
else if (PyUnicode_Check(sep_obj))
1625
return PyUnicode_Partition((PyObject *) self, sep_obj);
1627
else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1630
return stringlib_rpartition(
1632
PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1633
sep_obj, sep, sep_len
1637
Py_LOCAL_INLINE(PyObject *)
1638
rsplit_whitespace(PyBytesObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
1640
const char *s = PyBytes_AS_STRING(self);
1641
Py_ssize_t i, j, count=0;
1643
PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
1650
while (maxsplit-- > 0) {
1654
RSKIP_NONSPACE(s, i);
1655
if (j == len-1 && i < 0 && PyBytes_CheckExact(self)) {
1656
/* No whitespace in self, so just use it as list[0] */
1658
PyList_SET_ITEM(list, 0, (PyObject *)self);
1662
SPLIT_ADD(s, i + 1, j + 1);
1665
/* Only occurs when maxsplit was reached */
1666
/* Skip any remaining whitespace and copy to beginning of string */
1669
SPLIT_ADD(s, 0, i + 1);
1672
FIX_PREALLOC_SIZE(list);
1673
if (PyList_Reverse(list) < 0)
1681
Py_LOCAL_INLINE(PyObject *)
1682
rsplit_char(PyBytesObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
1684
const char *s = PyBytes_AS_STRING(self);
1685
register Py_ssize_t i, j, count=0;
1687
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
1693
while ((i >= 0) && (maxcount-- > 0)) {
1694
for (; i >= 0; i--) {
1696
SPLIT_ADD(s, i + 1, j + 1);
1702
if (i < 0 && count == 0 && PyBytes_CheckExact(self)) {
1703
/* ch not in self, so just use self as list[0] */
1705
PyList_SET_ITEM(list, 0, (PyObject *)self);
1709
SPLIT_ADD(s, 0, j + 1);
1711
FIX_PREALLOC_SIZE(list);
1712
if (PyList_Reverse(list) < 0)
1721
PyDoc_STRVAR(rsplit__doc__,
1722
"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1724
Return a list of the words in the string S, using sep as the\n\
1725
delimiter string, starting at the end of the string and working\n\
1726
to the front. If maxsplit is given, at most maxsplit splits are\n\
1727
done. If sep is not specified or is None, any whitespace string\n\
1731
string_rsplit(PyBytesObject *self, PyObject *args)
1733
Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
1734
Py_ssize_t maxsplit = -1, count=0;
1735
const char *s, *sub;
1736
PyObject *list, *str, *subobj = Py_None;
1738
if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1741
maxsplit = PY_SSIZE_T_MAX;
1742
if (subobj == Py_None)
1743
return rsplit_whitespace(self, len, maxsplit);
1744
if (PyBytes_Check(subobj)) {
1745
sub = PyBytes_AS_STRING(subobj);
1746
n = PyBytes_GET_SIZE(subobj);
1748
#ifdef Py_USING_UNICODE
1749
else if (PyUnicode_Check(subobj))
1750
return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1752
else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1756
PyErr_SetString(PyExc_ValueError, "empty separator");
1760
return rsplit_char(self, len, sub[0], maxsplit);
1762
list = PyList_New(PREALLOC_SIZE(maxsplit));
1769
s = PyBytes_AS_STRING(self);
1770
while ( (i >= 0) && (maxsplit-- > 0) ) {
1772
if (Py_STRING_MATCH(s, i, sub, n)) {
1773
SPLIT_ADD(s, i + n, j);
1781
FIX_PREALLOC_SIZE(list);
1782
if (PyList_Reverse(list) < 0)
1792
PyDoc_STRVAR(join__doc__,
1793
"S.join(sequence) -> string\n\
1795
Return a string which is the concatenation of the strings in the\n\
1796
sequence. The separator between elements is S.");
1799
string_join(PyBytesObject *self, PyObject *orig)
1801
char *sep = PyBytes_AS_STRING(self);
1802
const Py_ssize_t seplen = PyBytes_GET_SIZE(self);
1803
PyObject *res = NULL;
1805
Py_ssize_t seqlen = 0;
1808
PyObject *seq, *item;
1810
seq = PySequence_Fast(orig, "");
1815
seqlen = PySequence_Size(seq);
1818
return PyBytes_FromString("");
1821
item = PySequence_Fast_GET_ITEM(seq, 0);
1822
if (PyBytes_CheckExact(item) || PyUnicode_CheckExact(item)) {
1829
/* There are at least two things to join, or else we have a subclass
1830
* of the builtin types in the sequence.
1831
* Do a pre-pass to figure out the total amount of space we'll
1832
* need (sz), see whether any argument is absurd, and defer to
1833
* the Unicode join if appropriate.
1835
for (i = 0; i < seqlen; i++) {
1836
const size_t old_sz = sz;
1837
item = PySequence_Fast_GET_ITEM(seq, i);
1838
if (!PyBytes_Check(item)){
1839
#ifdef Py_USING_UNICODE
1840
if (PyUnicode_Check(item)) {
1841
/* Defer to Unicode join.
1842
* CAUTION: There's no gurantee that the
1843
* original sequence can be iterated over
1844
* again, so we must pass seq here.
1847
result = PyUnicode_Join((PyObject *)self, seq);
1852
PyErr_Format(PyExc_TypeError,
1853
"sequence item %zd: expected string,"
1855
i, Py_TYPE(item)->tp_name);
1859
sz += PyBytes_GET_SIZE(item);
1862
if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1863
PyErr_SetString(PyExc_OverflowError,
1864
"join() result is too long for a Python string");
1870
/* Allocate result space. */
1871
res = PyBytes_FromStringAndSize((char*)NULL, sz);
1877
/* Catenate everything. */
1878
p = PyBytes_AS_STRING(res);
1879
for (i = 0; i < seqlen; ++i) {
1881
item = PySequence_Fast_GET_ITEM(seq, i);
1882
n = PyBytes_GET_SIZE(item);
1883
Py_MEMCPY(p, PyBytes_AS_STRING(item), n);
1885
if (i < seqlen - 1) {
1886
Py_MEMCPY(p, sep, seplen);
1896
_PyBytes_Join(PyObject *sep, PyObject *x)
1898
assert(sep != NULL && PyBytes_Check(sep));
1900
return string_join((PyBytesObject *)sep, x);
1903
Py_LOCAL_INLINE(void)
1904
string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1918
Py_LOCAL_INLINE(Py_ssize_t)
1919
string_find_internal(PyBytesObject *self, PyObject *args, int dir)
1924
Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1925
PyObject *obj_start=Py_None, *obj_end=Py_None;
1927
if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1928
&obj_start, &obj_end))
1930
/* To support None in "start" and "end" arguments, meaning
1931
the same as if they were not passed.
1933
if (obj_start != Py_None)
1934
if (!_PyEval_SliceIndex(obj_start, &start))
1936
if (obj_end != Py_None)
1937
if (!_PyEval_SliceIndex(obj_end, &end))
1940
if (PyBytes_Check(subobj)) {
1941
sub = PyBytes_AS_STRING(subobj);
1942
sub_len = PyBytes_GET_SIZE(subobj);
1944
#ifdef Py_USING_UNICODE
1945
else if (PyUnicode_Check(subobj))
1946
return PyUnicode_Find(
1947
(PyObject *)self, subobj, start, end, dir);
1949
else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1950
/* XXX - the "expected a character buffer object" is pretty
1951
confusing for a non-expert. remap to something else ? */
1955
return stringlib_find_slice(
1956
PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1957
sub, sub_len, start, end);
1959
return stringlib_rfind_slice(
1960
PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1961
sub, sub_len, start, end);
1965
PyDoc_STRVAR(find__doc__,
1966
"S.find(sub [,start [,end]]) -> int\n\
1968
Return the lowest index in S where substring sub is found,\n\
1969
such that sub is contained within s[start:end]. Optional\n\
1970
arguments start and end are interpreted as in slice notation.\n\
1972
Return -1 on failure.");
1975
string_find(PyBytesObject *self, PyObject *args)
1977
Py_ssize_t result = string_find_internal(self, args, +1);
1980
return PyInt_FromSsize_t(result);
1984
PyDoc_STRVAR(index__doc__,
1985
"S.index(sub [,start [,end]]) -> int\n\
1987
Like S.find() but raise ValueError when the substring is not found.");
1990
string_index(PyBytesObject *self, PyObject *args)
1992
Py_ssize_t result = string_find_internal(self, args, +1);
1996
PyErr_SetString(PyExc_ValueError,
1997
"substring not found");
2000
return PyInt_FromSsize_t(result);
2004
PyDoc_STRVAR(rfind__doc__,
2005
"S.rfind(sub [,start [,end]]) -> int\n\
2007
Return the highest index in S where substring sub is found,\n\
2008
such that sub is contained within s[start:end]. Optional\n\
2009
arguments start and end are interpreted as in slice notation.\n\
2011
Return -1 on failure.");
2014
string_rfind(PyBytesObject *self, PyObject *args)
2016
Py_ssize_t result = string_find_internal(self, args, -1);
2019
return PyInt_FromSsize_t(result);
2023
PyDoc_STRVAR(rindex__doc__,
2024
"S.rindex(sub [,start [,end]]) -> int\n\
2026
Like S.rfind() but raise ValueError when the substring is not found.");
2029
string_rindex(PyBytesObject *self, PyObject *args)
2031
Py_ssize_t result = string_find_internal(self, args, -1);
2035
PyErr_SetString(PyExc_ValueError,
2036
"substring not found");
2039
return PyInt_FromSsize_t(result);
2043
Py_LOCAL_INLINE(PyObject *)
2044
do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
2046
char *s = PyBytes_AS_STRING(self);
2047
Py_ssize_t len = PyBytes_GET_SIZE(self);
2048
char *sep = PyBytes_AS_STRING(sepobj);
2049
Py_ssize_t seplen = PyBytes_GET_SIZE(sepobj);
2053
if (striptype != RIGHTSTRIP) {
2054
while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2060
if (striptype != LEFTSTRIP) {
2063
} while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2067
if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2069
return (PyObject*)self;
2072
return PyBytes_FromStringAndSize(s+i, j-i);
2076
Py_LOCAL_INLINE(PyObject *)
2077
do_strip(PyBytesObject *self, int striptype)
2079
char *s = PyBytes_AS_STRING(self);
2080
Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
2083
if (striptype != RIGHTSTRIP) {
2084
while (i < len && isspace(Py_CHARMASK(s[i]))) {
2090
if (striptype != LEFTSTRIP) {
2093
} while (j >= i && isspace(Py_CHARMASK(s[j])));
2097
if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2099
return (PyObject*)self;
2102
return PyBytes_FromStringAndSize(s+i, j-i);
2106
Py_LOCAL_INLINE(PyObject *)
2107
do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
2109
PyObject *sep = NULL;
2111
if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
2114
if (sep != NULL && sep != Py_None) {
2115
if (PyBytes_Check(sep))
2116
return do_xstrip(self, striptype, sep);
2117
#ifdef Py_USING_UNICODE
2118
else if (PyUnicode_Check(sep)) {
2119
PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2123
res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2129
PyErr_Format(PyExc_TypeError,
2130
#ifdef Py_USING_UNICODE
2131
"%s arg must be None, str or unicode",
2133
"%s arg must be None or str",
2135
STRIPNAME(striptype));
2139
return do_strip(self, striptype);
2143
PyDoc_STRVAR(strip__doc__,
2144
"S.strip([chars]) -> string or unicode\n\
2146
Return a copy of the string S with leading and trailing\n\
2147
whitespace removed.\n\
2148
If chars is given and not None, remove characters in chars instead.\n\
2149
If chars is unicode, S will be converted to unicode before stripping");
2152
string_strip(PyBytesObject *self, PyObject *args)
2154
if (PyTuple_GET_SIZE(args) == 0)
2155
return do_strip(self, BOTHSTRIP); /* Common case */
2157
return do_argstrip(self, BOTHSTRIP, args);
2161
PyDoc_STRVAR(lstrip__doc__,
2162
"S.lstrip([chars]) -> string or unicode\n\
2164
Return a copy of the string S with leading whitespace removed.\n\
2165
If chars is given and not None, remove characters in chars instead.\n\
2166
If chars is unicode, S will be converted to unicode before stripping");
2169
string_lstrip(PyBytesObject *self, PyObject *args)
2171
if (PyTuple_GET_SIZE(args) == 0)
2172
return do_strip(self, LEFTSTRIP); /* Common case */
2174
return do_argstrip(self, LEFTSTRIP, args);
2178
PyDoc_STRVAR(rstrip__doc__,
2179
"S.rstrip([chars]) -> string or unicode\n\
2181
Return a copy of the string S with trailing whitespace removed.\n\
2182
If chars is given and not None, remove characters in chars instead.\n\
2183
If chars is unicode, S will be converted to unicode before stripping");
2186
string_rstrip(PyBytesObject *self, PyObject *args)
2188
if (PyTuple_GET_SIZE(args) == 0)
2189
return do_strip(self, RIGHTSTRIP); /* Common case */
2191
return do_argstrip(self, RIGHTSTRIP, args);
2195
PyDoc_STRVAR(lower__doc__,
2196
"S.lower() -> string\n\
2198
Return a copy of the string S converted to lowercase.");
2200
/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2202
#define _tolower tolower
2206
string_lower(PyBytesObject *self)
2209
Py_ssize_t i, n = PyBytes_GET_SIZE(self);
2212
newobj = PyBytes_FromStringAndSize(NULL, n);
2216
s = PyBytes_AS_STRING(newobj);
2218
Py_MEMCPY(s, PyBytes_AS_STRING(self), n);
2220
for (i = 0; i < n; i++) {
2221
int c = Py_CHARMASK(s[i]);
2229
PyDoc_STRVAR(upper__doc__,
2230
"S.upper() -> string\n\
2232
Return a copy of the string S converted to uppercase.");
2235
#define _toupper toupper
2239
string_upper(PyBytesObject *self)
2242
Py_ssize_t i, n = PyBytes_GET_SIZE(self);
2245
newobj = PyBytes_FromStringAndSize(NULL, n);
2249
s = PyBytes_AS_STRING(newobj);
2251
Py_MEMCPY(s, PyBytes_AS_STRING(self), n);
2253
for (i = 0; i < n; i++) {
2254
int c = Py_CHARMASK(s[i]);
2262
PyDoc_STRVAR(title__doc__,
2263
"S.title() -> string\n\
2265
Return a titlecased version of S, i.e. words start with uppercase\n\
2266
characters, all remaining cased characters have lowercase.");
2269
string_title(PyBytesObject *self)
2271
char *s = PyBytes_AS_STRING(self), *s_new;
2272
Py_ssize_t i, n = PyBytes_GET_SIZE(self);
2273
int previous_is_cased = 0;
2276
newobj = PyBytes_FromStringAndSize(NULL, n);
2279
s_new = PyBytes_AsString(newobj);
2280
for (i = 0; i < n; i++) {
2281
int c = Py_CHARMASK(*s++);
2283
if (!previous_is_cased)
2285
previous_is_cased = 1;
2286
} else if (isupper(c)) {
2287
if (previous_is_cased)
2289
previous_is_cased = 1;
2291
previous_is_cased = 0;
2297
PyDoc_STRVAR(capitalize__doc__,
2298
"S.capitalize() -> string\n\
2300
Return a copy of the string S with only its first character\n\
2304
string_capitalize(PyBytesObject *self)
2306
char *s = PyBytes_AS_STRING(self), *s_new;
2307
Py_ssize_t i, n = PyBytes_GET_SIZE(self);
2310
newobj = PyBytes_FromStringAndSize(NULL, n);
2313
s_new = PyBytes_AsString(newobj);
2315
int c = Py_CHARMASK(*s++);
2317
*s_new = toupper(c);
2322
for (i = 1; i < n; i++) {
2323
int c = Py_CHARMASK(*s++);
2325
*s_new = tolower(c);
2334
PyDoc_STRVAR(count__doc__,
2335
"S.count(sub[, start[, end]]) -> int\n\
2337
Return the number of non-overlapping occurrences of substring sub in\n\
2338
string S[start:end]. Optional arguments start and end are interpreted\n\
2339
as in slice notation.");
2342
string_count(PyBytesObject *self, PyObject *args)
2345
const char *str = PyBytes_AS_STRING(self), *sub;
2347
Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2349
if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2350
_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2353
if (PyBytes_Check(sub_obj)) {
2354
sub = PyBytes_AS_STRING(sub_obj);
2355
sub_len = PyBytes_GET_SIZE(sub_obj);
2357
#ifdef Py_USING_UNICODE
2358
else if (PyUnicode_Check(sub_obj)) {
2360
count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2364
return PyInt_FromSsize_t(count);
2367
else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2370
string_adjust_indices(&start, &end, PyBytes_GET_SIZE(self));
2372
return PyInt_FromSsize_t(
2373
stringlib_count(str + start, end - start, sub, sub_len)
2377
PyDoc_STRVAR(swapcase__doc__,
2378
"S.swapcase() -> string\n\
2380
Return a copy of the string S with uppercase characters\n\
2381
converted to lowercase and vice versa.");
2384
string_swapcase(PyBytesObject *self)
2386
char *s = PyBytes_AS_STRING(self), *s_new;
2387
Py_ssize_t i, n = PyBytes_GET_SIZE(self);
2390
newobj = PyBytes_FromStringAndSize(NULL, n);
2393
s_new = PyBytes_AsString(newobj);
2394
for (i = 0; i < n; i++) {
2395
int c = Py_CHARMASK(*s++);
2397
*s_new = toupper(c);
2399
else if (isupper(c)) {
2400
*s_new = tolower(c);
2410
PyDoc_STRVAR(translate__doc__,
2411
"S.translate(table [,deletechars]) -> string\n\
2413
Return a copy of the string S, where all characters occurring\n\
2414
in the optional argument deletechars are removed, and the\n\
2415
remaining characters have been mapped through the given\n\
2416
translation table, which must be a string of length 256.");
2419
string_translate(PyBytesObject *self, PyObject *args)
2421
register char *input, *output;
2423
register Py_ssize_t i, c, changed = 0;
2424
PyObject *input_obj = (PyObject*)self;
2425
const char *output_start, *del_table=NULL;
2426
Py_ssize_t inlen, tablen, dellen = 0;
2428
int trans_table[256];
2429
PyObject *tableobj, *delobj = NULL;
2431
if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2432
&tableobj, &delobj))
2435
if (PyBytes_Check(tableobj)) {
2436
table = PyBytes_AS_STRING(tableobj);
2437
tablen = PyBytes_GET_SIZE(tableobj);
2439
else if (tableobj == Py_None) {
2443
#ifdef Py_USING_UNICODE
2444
else if (PyUnicode_Check(tableobj)) {
2445
/* Unicode .translate() does not support the deletechars
2446
parameter; instead a mapping to None will cause characters
2448
if (delobj != NULL) {
2449
PyErr_SetString(PyExc_TypeError,
2450
"deletions are implemented differently for unicode");
2453
return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2456
else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2459
if (tablen != 256) {
2460
PyErr_SetString(PyExc_ValueError,
2461
"translation table must be 256 characters long");
2465
if (delobj != NULL) {
2466
if (PyBytes_Check(delobj)) {
2467
del_table = PyBytes_AS_STRING(delobj);
2468
dellen = PyBytes_GET_SIZE(delobj);
2470
#ifdef Py_USING_UNICODE
2471
else if (PyUnicode_Check(delobj)) {
2472
PyErr_SetString(PyExc_TypeError,
2473
"deletions are implemented differently for unicode");
2477
else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2485
inlen = PyBytes_GET_SIZE(input_obj);
2486
result = PyBytes_FromStringAndSize((char *)NULL, inlen);
2489
output_start = output = PyBytes_AsString(result);
2490
input = PyBytes_AS_STRING(input_obj);
2492
if (dellen == 0 && table != NULL) {
2493
/* If no deletions are required, use faster code */
2494
for (i = inlen; --i >= 0; ) {
2495
c = Py_CHARMASK(*input++);
2496
if (Py_CHARMASK((*output++ = table[c])) != c)
2499
if (changed || !PyBytes_CheckExact(input_obj))
2502
Py_INCREF(input_obj);
2506
if (table == NULL) {
2507
for (i = 0; i < 256; i++)
2508
trans_table[i] = Py_CHARMASK(i);
2510
for (i = 0; i < 256; i++)
2511
trans_table[i] = Py_CHARMASK(table[i]);
2514
for (i = 0; i < dellen; i++)
2515
trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2517
for (i = inlen; --i >= 0; ) {
2518
c = Py_CHARMASK(*input++);
2519
if (trans_table[c] != -1)
2520
if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2524
if (!changed && PyBytes_CheckExact(input_obj)) {
2526
Py_INCREF(input_obj);
2529
/* Fix the size of the resulting string */
2531
_PyBytes_Resize(&result, output - output_start);
2539
/* find and count characters and substrings */
2541
#define findchar(target, target_len, c) \
2542
((char *)memchr((const void *)(target), c, target_len))
2544
/* String ops must return a string. */
2545
/* If the object is subclass of string, create a copy */
2546
Py_LOCAL(PyBytesObject *)
2547
return_self(PyBytesObject *self)
2549
if (PyBytes_CheckExact(self)) {
2553
return (PyBytesObject *)PyBytes_FromStringAndSize(
2554
PyBytes_AS_STRING(self),
2555
PyBytes_GET_SIZE(self));
2558
Py_LOCAL_INLINE(Py_ssize_t)
2559
countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2562
const char *start=target;
2563
const char *end=target+target_len;
2565
while ( (start=findchar(start, end-start, c)) != NULL ) {
2567
if (count >= maxcount)
2574
Py_LOCAL(Py_ssize_t)
2575
findstring(const char *target, Py_ssize_t target_len,
2576
const char *pattern, Py_ssize_t pattern_len,
2582
start += target_len;
2586
if (end > target_len) {
2588
} else if (end < 0) {
2594
/* zero-length substrings always match at the first attempt */
2595
if (pattern_len == 0)
2596
return (direction > 0) ? start : end;
2600
if (direction < 0) {
2601
for (; end >= start; end--)
2602
if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2605
for (; start <= end; start++)
2606
if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2612
Py_LOCAL_INLINE(Py_ssize_t)
2613
countstring(const char *target, Py_ssize_t target_len,
2614
const char *pattern, Py_ssize_t pattern_len,
2617
int direction, Py_ssize_t maxcount)
2622
start += target_len;
2626
if (end > target_len) {
2628
} else if (end < 0) {
2634
/* zero-length substrings match everywhere */
2635
if (pattern_len == 0 || maxcount == 0) {
2636
if (target_len+1 < maxcount)
2637
return target_len+1;
2642
if (direction < 0) {
2643
for (; (end >= start); end--)
2644
if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2646
if (--maxcount <= 0) break;
2647
end -= pattern_len-1;
2650
for (; (start <= end); start++)
2651
if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2653
if (--maxcount <= 0)
2655
start += pattern_len-1;
2662
/* Algorithms for different cases of string replacement */
2664
/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2665
Py_LOCAL(PyBytesObject *)
2666
replace_interleave(PyBytesObject *self,
2667
const char *to_s, Py_ssize_t to_len,
2668
Py_ssize_t maxcount)
2670
char *self_s, *result_s;
2671
Py_ssize_t self_len, result_len;
2672
Py_ssize_t count, i, product;
2673
PyBytesObject *result;
2675
self_len = PyBytes_GET_SIZE(self);
2677
/* 1 at the end plus 1 after every character */
2679
if (maxcount < count)
2682
/* Check for overflow */
2683
/* result_len = count * to_len + self_len; */
2684
product = count * to_len;
2685
if (product / to_len != count) {
2686
PyErr_SetString(PyExc_OverflowError,
2687
"replace string is too long");
2690
result_len = product + self_len;
2691
if (result_len < 0) {
2692
PyErr_SetString(PyExc_OverflowError,
2693
"replace string is too long");
2697
if (! (result = (PyBytesObject *)
2698
PyBytes_FromStringAndSize(NULL, result_len)) )
2701
self_s = PyBytes_AS_STRING(self);
2702
result_s = PyBytes_AS_STRING(result);
2704
/* TODO: special case single character, which doesn't need memcpy */
2706
/* Lay the first one down (guaranteed this will occur) */
2707
Py_MEMCPY(result_s, to_s, to_len);
2711
for (i=0; i<count; i++) {
2712
*result_s++ = *self_s++;
2713
Py_MEMCPY(result_s, to_s, to_len);
2717
/* Copy the rest of the original string */
2718
Py_MEMCPY(result_s, self_s, self_len-i);
2723
/* Special case for deleting a single character */
2724
/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2725
Py_LOCAL(PyBytesObject *)
2726
replace_delete_single_character(PyBytesObject *self,
2727
char from_c, Py_ssize_t maxcount)
2729
char *self_s, *result_s;
2730
char *start, *next, *end;
2731
Py_ssize_t self_len, result_len;
2733
PyBytesObject *result;
2735
self_len = PyBytes_GET_SIZE(self);
2736
self_s = PyBytes_AS_STRING(self);
2738
count = countchar(self_s, self_len, from_c, maxcount);
2740
return return_self(self);
2743
result_len = self_len - count; /* from_len == 1 */
2744
assert(result_len>=0);
2746
if ( (result = (PyBytesObject *)
2747
PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2749
result_s = PyBytes_AS_STRING(result);
2752
end = self_s + self_len;
2753
while (count-- > 0) {
2754
next = findchar(start, end-start, from_c);
2757
Py_MEMCPY(result_s, start, next-start);
2758
result_s += (next-start);
2761
Py_MEMCPY(result_s, start, end-start);
2766
/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2768
Py_LOCAL(PyBytesObject *)
2769
replace_delete_substring(PyBytesObject *self,
2770
const char *from_s, Py_ssize_t from_len,
2771
Py_ssize_t maxcount) {
2772
char *self_s, *result_s;
2773
char *start, *next, *end;
2774
Py_ssize_t self_len, result_len;
2775
Py_ssize_t count, offset;
2776
PyBytesObject *result;
2778
self_len = PyBytes_GET_SIZE(self);
2779
self_s = PyBytes_AS_STRING(self);
2781
count = countstring(self_s, self_len,
2788
return return_self(self);
2791
result_len = self_len - (count * from_len);
2792
assert (result_len>=0);
2794
if ( (result = (PyBytesObject *)
2795
PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2798
result_s = PyBytes_AS_STRING(result);
2801
end = self_s + self_len;
2802
while (count-- > 0) {
2803
offset = findstring(start, end-start,
2805
0, end-start, FORWARD);
2808
next = start + offset;
2810
Py_MEMCPY(result_s, start, next-start);
2812
result_s += (next-start);
2813
start = next+from_len;
2815
Py_MEMCPY(result_s, start, end-start);
2819
/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2820
Py_LOCAL(PyBytesObject *)
2821
replace_single_character_in_place(PyBytesObject *self,
2822
char from_c, char to_c,
2823
Py_ssize_t maxcount)
2825
char *self_s, *result_s, *start, *end, *next;
2826
Py_ssize_t self_len;
2827
PyBytesObject *result;
2829
/* The result string will be the same size */
2830
self_s = PyBytes_AS_STRING(self);
2831
self_len = PyBytes_GET_SIZE(self);
2833
next = findchar(self_s, self_len, from_c);
2836
/* No matches; return the original string */
2837
return return_self(self);
2840
/* Need to make a new string */
2841
result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2844
result_s = PyBytes_AS_STRING(result);
2845
Py_MEMCPY(result_s, self_s, self_len);
2847
/* change everything in-place, starting with this one */
2848
start = result_s + (next-self_s);
2851
end = result_s + self_len;
2853
while (--maxcount > 0) {
2854
next = findchar(start, end-start, from_c);
2864
/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2865
Py_LOCAL(PyBytesObject *)
2866
replace_substring_in_place(PyBytesObject *self,
2867
const char *from_s, Py_ssize_t from_len,
2868
const char *to_s, Py_ssize_t to_len,
2869
Py_ssize_t maxcount)
2871
char *result_s, *start, *end;
2873
Py_ssize_t self_len, offset;
2874
PyBytesObject *result;
2876
/* The result string will be the same size */
2878
self_s = PyBytes_AS_STRING(self);
2879
self_len = PyBytes_GET_SIZE(self);
2881
offset = findstring(self_s, self_len,
2883
0, self_len, FORWARD);
2885
/* No matches; return the original string */
2886
return return_self(self);
2889
/* Need to make a new string */
2890
result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2893
result_s = PyBytes_AS_STRING(result);
2894
Py_MEMCPY(result_s, self_s, self_len);
2896
/* change everything in-place, starting with this one */
2897
start = result_s + offset;
2898
Py_MEMCPY(start, to_s, from_len);
2900
end = result_s + self_len;
2902
while ( --maxcount > 0) {
2903
offset = findstring(start, end-start,
2905
0, end-start, FORWARD);
2908
Py_MEMCPY(start+offset, to_s, from_len);
2909
start += offset+from_len;
2915
/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2916
Py_LOCAL(PyBytesObject *)
2917
replace_single_character(PyBytesObject *self,
2919
const char *to_s, Py_ssize_t to_len,
2920
Py_ssize_t maxcount)
2922
char *self_s, *result_s;
2923
char *start, *next, *end;
2924
Py_ssize_t self_len, result_len;
2925
Py_ssize_t count, product;
2926
PyBytesObject *result;
2928
self_s = PyBytes_AS_STRING(self);
2929
self_len = PyBytes_GET_SIZE(self);
2931
count = countchar(self_s, self_len, from_c, maxcount);
2933
/* no matches, return unchanged */
2934
return return_self(self);
2937
/* use the difference between current and new, hence the "-1" */
2938
/* result_len = self_len + count * (to_len-1) */
2939
product = count * (to_len-1);
2940
if (product / (to_len-1) != count) {
2941
PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2944
result_len = self_len + product;
2945
if (result_len < 0) {
2946
PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2950
if ( (result = (PyBytesObject *)
2951
PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2953
result_s = PyBytes_AS_STRING(result);
2956
end = self_s + self_len;
2957
while (count-- > 0) {
2958
next = findchar(start, end-start, from_c);
2962
if (next == start) {
2963
/* replace with the 'to' */
2964
Py_MEMCPY(result_s, to_s, to_len);
2968
/* copy the unchanged old then the 'to' */
2969
Py_MEMCPY(result_s, start, next-start);
2970
result_s += (next-start);
2971
Py_MEMCPY(result_s, to_s, to_len);
2976
/* Copy the remainder of the remaining string */
2977
Py_MEMCPY(result_s, start, end-start);
2982
/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2983
Py_LOCAL(PyBytesObject *)
2984
replace_substring(PyBytesObject *self,
2985
const char *from_s, Py_ssize_t from_len,
2986
const char *to_s, Py_ssize_t to_len,
2987
Py_ssize_t maxcount) {
2988
char *self_s, *result_s;
2989
char *start, *next, *end;
2990
Py_ssize_t self_len, result_len;
2991
Py_ssize_t count, offset, product;
2992
PyBytesObject *result;
2994
self_s = PyBytes_AS_STRING(self);
2995
self_len = PyBytes_GET_SIZE(self);
2997
count = countstring(self_s, self_len,
2999
0, self_len, FORWARD, maxcount);
3001
/* no matches, return unchanged */
3002
return return_self(self);
3005
/* Check for overflow */
3006
/* result_len = self_len + count * (to_len-from_len) */
3007
product = count * (to_len-from_len);
3008
if (product / (to_len-from_len) != count) {
3009
PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3012
result_len = self_len + product;
3013
if (result_len < 0) {
3014
PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3018
if ( (result = (PyBytesObject *)
3019
PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
3021
result_s = PyBytes_AS_STRING(result);
3024
end = self_s + self_len;
3025
while (count-- > 0) {
3026
offset = findstring(start, end-start,
3028
0, end-start, FORWARD);
3031
next = start+offset;
3032
if (next == start) {
3033
/* replace with the 'to' */
3034
Py_MEMCPY(result_s, to_s, to_len);
3038
/* copy the unchanged old then the 'to' */
3039
Py_MEMCPY(result_s, start, next-start);
3040
result_s += (next-start);
3041
Py_MEMCPY(result_s, to_s, to_len);
3043
start = next+from_len;
3046
/* Copy the remainder of the remaining string */
3047
Py_MEMCPY(result_s, start, end-start);
3053
Py_LOCAL(PyBytesObject *)
3054
replace(PyBytesObject *self,
3055
const char *from_s, Py_ssize_t from_len,
3056
const char *to_s, Py_ssize_t to_len,
3057
Py_ssize_t maxcount)
3060
maxcount = PY_SSIZE_T_MAX;
3061
} else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
3062
/* nothing to do; return the original string */
3063
return return_self(self);
3066
if (maxcount == 0 ||
3067
(from_len == 0 && to_len == 0)) {
3068
/* nothing to do; return the original string */
3069
return return_self(self);
3072
/* Handle zero-length special cases */
3074
if (from_len == 0) {
3075
/* insert the 'to' string everywhere. */
3076
/* >>> "Python".replace("", ".") */
3077
/* '.P.y.t.h.o.n.' */
3078
return replace_interleave(self, to_s, to_len, maxcount);
3081
/* Except for "".replace("", "A") == "A" there is no way beyond this */
3082
/* point for an empty self string to generate a non-empty string */
3083
/* Special case so the remaining code always gets a non-empty string */
3084
if (PyBytes_GET_SIZE(self) == 0) {
3085
return return_self(self);
3089
/* delete all occurances of 'from' string */
3090
if (from_len == 1) {
3091
return replace_delete_single_character(
3092
self, from_s[0], maxcount);
3094
return replace_delete_substring(self, from_s, from_len, maxcount);
3098
/* Handle special case where both strings have the same length */
3100
if (from_len == to_len) {
3101
if (from_len == 1) {
3102
return replace_single_character_in_place(
3108
return replace_substring_in_place(
3109
self, from_s, from_len, to_s, to_len, maxcount);
3113
/* Otherwise use the more generic algorithms */
3114
if (from_len == 1) {
3115
return replace_single_character(self, from_s[0],
3116
to_s, to_len, maxcount);
3118
/* len('from')>=2, len('to')>=1 */
3119
return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
3123
PyDoc_STRVAR(replace__doc__,
3124
"S.replace (old, new[, count]) -> string\n\
3126
Return a copy of string S with all occurrences of substring\n\
3127
old replaced by new. If the optional argument count is\n\
3128
given, only the first count occurrences are replaced.");
3131
string_replace(PyBytesObject *self, PyObject *args)
3133
Py_ssize_t count = -1;
3134
PyObject *from, *to;
3135
const char *from_s, *to_s;
3136
Py_ssize_t from_len, to_len;
3138
if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
3141
if (PyBytes_Check(from)) {
3142
from_s = PyBytes_AS_STRING(from);
3143
from_len = PyBytes_GET_SIZE(from);
3145
#ifdef Py_USING_UNICODE
3146
if (PyUnicode_Check(from))
3147
return PyUnicode_Replace((PyObject *)self,
3150
else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
3153
if (PyBytes_Check(to)) {
3154
to_s = PyBytes_AS_STRING(to);
3155
to_len = PyBytes_GET_SIZE(to);
3157
#ifdef Py_USING_UNICODE
3158
else if (PyUnicode_Check(to))
3159
return PyUnicode_Replace((PyObject *)self,
3162
else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
3165
return (PyObject *)replace((PyBytesObject *) self,
3167
to_s, to_len, count);
3172
/* Matches the end (direction >= 0) or start (direction < 0) of self
3173
* against substr, using the start and end arguments. Returns
3174
* -1 on error, 0 if not found and 1 if found.
3177
_string_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
3178
Py_ssize_t end, int direction)
3180
Py_ssize_t len = PyBytes_GET_SIZE(self);
3185
if (PyBytes_Check(substr)) {
3186
sub = PyBytes_AS_STRING(substr);
3187
slen = PyBytes_GET_SIZE(substr);
3189
#ifdef Py_USING_UNICODE
3190
else if (PyUnicode_Check(substr))
3191
return PyUnicode_Tailmatch((PyObject *)self,
3192
substr, start, end, direction);
3194
else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3196
str = PyBytes_AS_STRING(self);
3198
string_adjust_indices(&start, &end, len);
3200
if (direction < 0) {
3202
if (start+slen > len)
3206
if (end-start < slen || start > len)
3209
if (end-slen > start)
3212
if (end-start >= slen)
3213
return ! memcmp(str+start, sub, slen);
3218
PyDoc_STRVAR(startswith__doc__,
3219
"S.startswith(prefix[, start[, end]]) -> bool\n\
3221
Return True if S starts with the specified prefix, False otherwise.\n\
3222
With optional start, test S beginning at that position.\n\
3223
With optional end, stop comparing S at that position.\n\
3224
prefix can also be a tuple of strings to try.");
3227
string_startswith(PyBytesObject *self, PyObject *args)
3229
Py_ssize_t start = 0;
3230
Py_ssize_t end = PY_SSIZE_T_MAX;
3234
if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3235
_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3237
if (PyTuple_Check(subobj)) {
3239
for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3240
result = _string_tailmatch(self,
3241
PyTuple_GET_ITEM(subobj, i),
3251
result = _string_tailmatch(self, subobj, start, end, -1);
3255
return PyBool_FromLong(result);
3259
PyDoc_STRVAR(endswith__doc__,
3260
"S.endswith(suffix[, start[, end]]) -> bool\n\
3262
Return True if S ends with the specified suffix, False otherwise.\n\
3263
With optional start, test S beginning at that position.\n\
3264
With optional end, stop comparing S at that position.\n\
3265
suffix can also be a tuple of strings to try.");
3268
string_endswith(PyBytesObject *self, PyObject *args)
3270
Py_ssize_t start = 0;
3271
Py_ssize_t end = PY_SSIZE_T_MAX;
3275
if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3276
_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3278
if (PyTuple_Check(subobj)) {
3280
for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3281
result = _string_tailmatch(self,
3282
PyTuple_GET_ITEM(subobj, i),
3292
result = _string_tailmatch(self, subobj, start, end, +1);
3296
return PyBool_FromLong(result);
3300
PyDoc_STRVAR(encode__doc__,
3301
"S.encode([encoding[,errors]]) -> object\n\
3303
Encodes S using the codec registered for encoding. encoding defaults\n\
3304
to the default encoding. errors may be given to set a different error\n\
3305
handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3306
a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3307
'xmlcharrefreplace' as well as any other name registered with\n\
3308
codecs.register_error that is able to handle UnicodeEncodeErrors.");
3311
string_encode(PyBytesObject *self, PyObject *args)
3313
char *encoding = NULL;
3314
char *errors = NULL;
3317
if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3319
v = PyBytes_AsEncodedObject((PyObject *)self, encoding, errors);
3322
if (!PyBytes_Check(v) && !PyUnicode_Check(v)) {
3323
PyErr_Format(PyExc_TypeError,
3324
"encoder did not return a string/unicode object "
3326
Py_TYPE(v)->tp_name);
3337
PyDoc_STRVAR(decode__doc__,
3338
"S.decode([encoding[,errors]]) -> object\n\
3340
Decodes S using the codec registered for encoding. encoding defaults\n\
3341
to the default encoding. errors may be given to set a different error\n\
3342
handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3343
a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3344
as well as any other name registerd with codecs.register_error that is\n\
3345
able to handle UnicodeDecodeErrors.");
3348
string_decode(PyBytesObject *self, PyObject *args)
3350
char *encoding = NULL;
3351
char *errors = NULL;
3354
if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3356
v = PyBytes_AsDecodedObject((PyObject *)self, encoding, errors);
3359
if (!PyBytes_Check(v) && !PyUnicode_Check(v)) {
3360
PyErr_Format(PyExc_TypeError,
3361
"decoder did not return a string/unicode object "
3363
Py_TYPE(v)->tp_name);
3374
PyDoc_STRVAR(expandtabs__doc__,
3375
"S.expandtabs([tabsize]) -> string\n\
3377
Return a copy of S where all tab characters are expanded using spaces.\n\
3378
If tabsize is not given, a tab size of 8 characters is assumed.");
3381
string_expandtabs(PyBytesObject *self, PyObject *args)
3383
const char *e, *p, *qe;
3385
Py_ssize_t i, j, incr;
3389
if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3392
/* First pass: determine size of output string */
3393
i = 0; /* chars up to and including most recent \n or \r */
3394
j = 0; /* chars since most recent \n or \r (use in tab calculations) */
3395
e = PyBytes_AS_STRING(self) + PyBytes_GET_SIZE(self); /* end of input */
3396
for (p = PyBytes_AS_STRING(self); p < e; p++)
3399
incr = tabsize - (j % tabsize);
3400
if (j > PY_SSIZE_T_MAX - incr)
3406
if (j > PY_SSIZE_T_MAX - 1)
3409
if (*p == '\n' || *p == '\r') {
3410
if (i > PY_SSIZE_T_MAX - j)
3417
if (i > PY_SSIZE_T_MAX - j)
3420
/* Second pass: create output string and fill it */
3421
u = PyBytes_FromStringAndSize(NULL, i + j);
3425
j = 0; /* same as in first pass */
3426
q = PyBytes_AS_STRING(u); /* next output char */
3427
qe = PyBytes_AS_STRING(u) + PyBytes_GET_SIZE(u); /* end of output */
3429
for (p = PyBytes_AS_STRING(self); p < e; p++)
3432
i = tabsize - (j % tabsize);
3446
if (*p == '\n' || *p == '\r')
3455
PyErr_SetString(PyExc_OverflowError, "new string is too long");
3459
Py_LOCAL_INLINE(PyObject *)
3460
pad(PyBytesObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
3469
if (left == 0 && right == 0 && PyBytes_CheckExact(self)) {
3471
return (PyObject *)self;
3474
u = PyBytes_FromStringAndSize(NULL,
3475
left + PyBytes_GET_SIZE(self) + right);
3478
memset(PyBytes_AS_STRING(u), fill, left);
3479
Py_MEMCPY(PyBytes_AS_STRING(u) + left,
3480
PyBytes_AS_STRING(self),
3481
PyBytes_GET_SIZE(self));
3483
memset(PyBytes_AS_STRING(u) + left + PyBytes_GET_SIZE(self),
3490
PyDoc_STRVAR(ljust__doc__,
3491
"S.ljust(width[, fillchar]) -> string\n"
3493
"Return S left justified in a string of length width. Padding is\n"
3494
"done using the specified fill character (default is a space).");
3497
string_ljust(PyBytesObject *self, PyObject *args)
3500
char fillchar = ' ';
3502
if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
3505
if (PyBytes_GET_SIZE(self) >= width && PyBytes_CheckExact(self)) {
3507
return (PyObject*) self;
3510
return pad(self, 0, width - PyBytes_GET_SIZE(self), fillchar);
3514
PyDoc_STRVAR(rjust__doc__,
3515
"S.rjust(width[, fillchar]) -> string\n"
3517
"Return S right justified in a string of length width. Padding is\n"
3518
"done using the specified fill character (default is a space)");
3521
string_rjust(PyBytesObject *self, PyObject *args)
3524
char fillchar = ' ';
3526
if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
3529
if (PyBytes_GET_SIZE(self) >= width && PyBytes_CheckExact(self)) {
3531
return (PyObject*) self;
3534
return pad(self, width - PyBytes_GET_SIZE(self), 0, fillchar);
3538
PyDoc_STRVAR(center__doc__,
3539
"S.center(width[, fillchar]) -> string\n"
3541
"Return S centered in a string of length width. Padding is\n"
3542
"done using the specified fill character (default is a space)");
3545
string_center(PyBytesObject *self, PyObject *args)
3547
Py_ssize_t marg, left;
3549
char fillchar = ' ';
3551
if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
3554
if (PyBytes_GET_SIZE(self) >= width && PyBytes_CheckExact(self)) {
3556
return (PyObject*) self;
3559
marg = width - PyBytes_GET_SIZE(self);
3560
left = marg / 2 + (marg & width & 1);
3562
return pad(self, left, marg - left, fillchar);
3565
PyDoc_STRVAR(zfill__doc__,
3566
"S.zfill(width) -> string\n"
3568
"Pad a numeric string S with zeros on the left, to fill a field\n"
3569
"of the specified width. The string S is never truncated.");
3572
string_zfill(PyBytesObject *self, PyObject *args)
3579
if (!PyArg_ParseTuple(args, "n:zfill", &width))
3582
if (PyBytes_GET_SIZE(self) >= width) {
3583
if (PyBytes_CheckExact(self)) {
3585
return (PyObject*) self;
3588
return PyBytes_FromStringAndSize(
3589
PyBytes_AS_STRING(self),
3590
PyBytes_GET_SIZE(self)
3594
fill = width - PyBytes_GET_SIZE(self);
3596
s = pad(self, fill, 0, '0');
3601
p = PyBytes_AS_STRING(s);
3602
if (p[fill] == '+' || p[fill] == '-') {
3603
/* move sign to beginning of string */
3608
return (PyObject*) s;
3611
PyDoc_STRVAR(isspace__doc__,
3612
"S.isspace() -> bool\n\
3614
Return True if all characters in S are whitespace\n\
3615
and there is at least one character in S, False otherwise.");
3618
string_isspace(PyBytesObject *self)
3620
register const unsigned char *p
3621
= (unsigned char *) PyBytes_AS_STRING(self);
3622
register const unsigned char *e;
3624
/* Shortcut for single character strings */
3625
if (PyBytes_GET_SIZE(self) == 1 &&
3627
return PyBool_FromLong(1);
3629
/* Special case for empty strings */
3630
if (PyBytes_GET_SIZE(self) == 0)
3631
return PyBool_FromLong(0);
3633
e = p + PyBytes_GET_SIZE(self);
3634
for (; p < e; p++) {
3636
return PyBool_FromLong(0);
3638
return PyBool_FromLong(1);
3642
PyDoc_STRVAR(isalpha__doc__,
3643
"S.isalpha() -> bool\n\
3645
Return True if all characters in S are alphabetic\n\
3646
and there is at least one character in S, False otherwise.");
3649
string_isalpha(PyBytesObject *self)
3651
register const unsigned char *p
3652
= (unsigned char *) PyBytes_AS_STRING(self);
3653
register const unsigned char *e;
3655
/* Shortcut for single character strings */
3656
if (PyBytes_GET_SIZE(self) == 1 &&
3658
return PyBool_FromLong(1);
3660
/* Special case for empty strings */
3661
if (PyBytes_GET_SIZE(self) == 0)
3662
return PyBool_FromLong(0);
3664
e = p + PyBytes_GET_SIZE(self);
3665
for (; p < e; p++) {
3667
return PyBool_FromLong(0);
3669
return PyBool_FromLong(1);
3673
PyDoc_STRVAR(isalnum__doc__,
3674
"S.isalnum() -> bool\n\
3676
Return True if all characters in S are alphanumeric\n\
3677
and there is at least one character in S, False otherwise.");
3680
string_isalnum(PyBytesObject *self)
3682
register const unsigned char *p
3683
= (unsigned char *) PyBytes_AS_STRING(self);
3684
register const unsigned char *e;
3686
/* Shortcut for single character strings */
3687
if (PyBytes_GET_SIZE(self) == 1 &&
3689
return PyBool_FromLong(1);
3691
/* Special case for empty strings */
3692
if (PyBytes_GET_SIZE(self) == 0)
3693
return PyBool_FromLong(0);
3695
e = p + PyBytes_GET_SIZE(self);
3696
for (; p < e; p++) {
3698
return PyBool_FromLong(0);
3700
return PyBool_FromLong(1);
3704
PyDoc_STRVAR(isdigit__doc__,
3705
"S.isdigit() -> bool\n\
3707
Return True if all characters in S are digits\n\
3708
and there is at least one character in S, False otherwise.");
3711
string_isdigit(PyBytesObject *self)
3713
register const unsigned char *p
3714
= (unsigned char *) PyBytes_AS_STRING(self);
3715
register const unsigned char *e;
3717
/* Shortcut for single character strings */
3718
if (PyBytes_GET_SIZE(self) == 1 &&
3720
return PyBool_FromLong(1);
3722
/* Special case for empty strings */
3723
if (PyBytes_GET_SIZE(self) == 0)
3724
return PyBool_FromLong(0);
3726
e = p + PyBytes_GET_SIZE(self);
3727
for (; p < e; p++) {
3729
return PyBool_FromLong(0);
3731
return PyBool_FromLong(1);
3735
PyDoc_STRVAR(islower__doc__,
3736
"S.islower() -> bool\n\
3738
Return True if all cased characters in S are lowercase and there is\n\
3739
at least one cased character in S, False otherwise.");
3742
string_islower(PyBytesObject *self)
3744
register const unsigned char *p
3745
= (unsigned char *) PyBytes_AS_STRING(self);
3746
register const unsigned char *e;
3749
/* Shortcut for single character strings */
3750
if (PyBytes_GET_SIZE(self) == 1)
3751
return PyBool_FromLong(islower(*p) != 0);
3753
/* Special case for empty strings */
3754
if (PyBytes_GET_SIZE(self) == 0)
3755
return PyBool_FromLong(0);
3757
e = p + PyBytes_GET_SIZE(self);
3759
for (; p < e; p++) {
3761
return PyBool_FromLong(0);
3762
else if (!cased && islower(*p))
3765
return PyBool_FromLong(cased);
3769
PyDoc_STRVAR(isupper__doc__,
3770
"S.isupper() -> bool\n\
3772
Return True if all cased characters in S are uppercase and there is\n\
3773
at least one cased character in S, False otherwise.");
3776
string_isupper(PyBytesObject *self)
3778
register const unsigned char *p
3779
= (unsigned char *) PyBytes_AS_STRING(self);
3780
register const unsigned char *e;
3783
/* Shortcut for single character strings */
3784
if (PyBytes_GET_SIZE(self) == 1)
3785
return PyBool_FromLong(isupper(*p) != 0);
3787
/* Special case for empty strings */
3788
if (PyBytes_GET_SIZE(self) == 0)
3789
return PyBool_FromLong(0);
3791
e = p + PyBytes_GET_SIZE(self);
3793
for (; p < e; p++) {
3795
return PyBool_FromLong(0);
3796
else if (!cased && isupper(*p))
3799
return PyBool_FromLong(cased);
3803
PyDoc_STRVAR(istitle__doc__,
3804
"S.istitle() -> bool\n\
3806
Return True if S is a titlecased string and there is at least one\n\
3807
character in S, i.e. uppercase characters may only follow uncased\n\
3808
characters and lowercase characters only cased ones. Return False\n\
3812
string_istitle(PyBytesObject *self, PyObject *uncased)
3814
register const unsigned char *p
3815
= (unsigned char *) PyBytes_AS_STRING(self);
3816
register const unsigned char *e;
3817
int cased, previous_is_cased;
3819
/* Shortcut for single character strings */
3820
if (PyBytes_GET_SIZE(self) == 1)
3821
return PyBool_FromLong(isupper(*p) != 0);
3823
/* Special case for empty strings */
3824
if (PyBytes_GET_SIZE(self) == 0)
3825
return PyBool_FromLong(0);
3827
e = p + PyBytes_GET_SIZE(self);
3829
previous_is_cased = 0;
3830
for (; p < e; p++) {
3831
register const unsigned char ch = *p;
3834
if (previous_is_cased)
3835
return PyBool_FromLong(0);
3836
previous_is_cased = 1;
3839
else if (islower(ch)) {
3840
if (!previous_is_cased)
3841
return PyBool_FromLong(0);
3842
previous_is_cased = 1;
3846
previous_is_cased = 0;
3848
return PyBool_FromLong(cased);
3852
PyDoc_STRVAR(splitlines__doc__,
3853
"S.splitlines([keepends]) -> list of strings\n\
3855
Return a list of the lines in S, breaking at line boundaries.\n\
3856
Line breaks are not included in the resulting list unless keepends\n\
3857
is given and true.");
3860
string_splitlines(PyBytesObject *self, PyObject *args)
3862
register Py_ssize_t i;
3863
register Py_ssize_t j;
3870
if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
3873
data = PyBytes_AS_STRING(self);
3874
len = PyBytes_GET_SIZE(self);
3876
/* This does not use the preallocated list because splitlines is
3877
usually run with hundreds of newlines. The overhead of
3878
switching between PyList_SET_ITEM and append causes about a
3879
2-3% slowdown for that common case. A smarter implementation
3880
could move the if check out, so the SET_ITEMs are done first
3881
and the appends only done when the prealloc buffer is full.
3882
That's too much work for little gain.*/
3884
list = PyList_New(0);
3888
for (i = j = 0; i < len; ) {
3891
/* Find a line and append it */
3892
while (i < len && data[i] != '\n' && data[i] != '\r')
3895
/* Skip the line break reading CRLF as one line break */
3898
if (data[i] == '\r' && i + 1 < len &&
3906
SPLIT_APPEND(data, j, eol);
3910
SPLIT_APPEND(data, j, len);
3920
PyDoc_STRVAR(sizeof__doc__,
3921
"S.__sizeof__() -> size of S in memory, in bytes");
3924
string_sizeof(PyBytesObject *v)
3927
res = sizeof(PyBytesObject) + v->ob_size * v->ob_type->tp_itemsize;
3928
return PyInt_FromSsize_t(res);
3934
#undef PREALLOC_SIZE
3937
string_getnewargs(PyBytesObject *v)
3939
return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
3943
#include "stringlib/string_format.h"
3945
PyDoc_STRVAR(format__doc__,
3946
"S.format(*args, **kwargs) -> unicode\n\
3951
string__format__(PyObject* self, PyObject* args)
3953
PyObject *format_spec;
3954
PyObject *result = NULL;
3955
PyObject *tmp = NULL;
3957
/* If 2.x, convert format_spec to the same type as value */
3958
/* This is to allow things like u''.format('') */
3959
if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
3961
if (!(PyBytes_Check(format_spec) || PyUnicode_Check(format_spec))) {
3962
PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3963
"or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3966
tmp = PyObject_Str(format_spec);
3971
result = _PyBytes_FormatAdvanced(self,
3972
PyBytes_AS_STRING(format_spec),
3973
PyBytes_GET_SIZE(format_spec));
3979
PyDoc_STRVAR(p_format__doc__,
3980
"S.__format__(format_spec) -> unicode\n\
3986
string_methods[] = {
3987
/* Counterparts of the obsolete stropmodule functions; except
3988
string.maketrans(). */
3989
{"join", (PyCFunction)string_join, METH_O, join__doc__},
3990
{"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3991
{"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3992
{"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3993
{"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3994
{"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3995
{"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3996
{"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3997
{"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3998
{"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3999
{"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
4000
{"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
4001
{"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
4003
{"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
4004
{"endswith", (PyCFunction)string_endswith, METH_VARARGS,
4006
{"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
4007
{"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
4008
{"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
4009
{"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
4010
{"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
4011
{"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
4012
{"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
4013
{"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
4014
{"rpartition", (PyCFunction)string_rpartition, METH_O,
4016
{"startswith", (PyCFunction)string_startswith, METH_VARARGS,
4018
{"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
4019
{"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
4021
{"translate", (PyCFunction)string_translate, METH_VARARGS,
4023
{"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
4024
{"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
4025
{"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
4026
{"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
4027
{"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
4028
{"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
4029
{"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
4030
{"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
4031
{"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
4032
{"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
4033
{"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
4034
{"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
4036
{"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
4038
{"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
4040
{"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
4041
{NULL, NULL} /* sentinel */
4045
str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
4048
string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4051
static char *kwlist[] = {"object", 0};
4053
if (type != &PyBytes_Type)
4054
return str_subtype_new(type, args, kwds);
4055
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
4058
return PyBytes_FromString("");
4059
return PyObject_Str(x);
4063
str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4065
PyObject *tmp, *pnew;
4068
assert(PyType_IsSubtype(type, &PyBytes_Type));
4069
tmp = string_new(&PyBytes_Type, args, kwds);
4072
assert(PyBytes_CheckExact(tmp));
4073
n = PyBytes_GET_SIZE(tmp);
4074
pnew = type->tp_alloc(type, n);
4076
Py_MEMCPY(PyBytes_AS_STRING(pnew), PyBytes_AS_STRING(tmp), n+1);
4077
((PyBytesObject *)pnew)->ob_shash =
4078
((PyBytesObject *)tmp)->ob_shash;
4079
((PyBytesObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
4086
basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4088
PyErr_SetString(PyExc_TypeError,
4089
"The basestring type cannot be instantiated");
4094
string_mod(PyObject *v, PyObject *w)
4096
if (!PyBytes_Check(v)) {
4097
Py_INCREF(Py_NotImplemented);
4098
return Py_NotImplemented;
4100
return PyBytes_Format(v, w);
4103
PyDoc_STRVAR(basestring_doc,
4104
"Type basestring cannot be instantiated; it is the base for str and unicode.");
4106
static PyNumberMethods string_as_number = {
4111
string_mod, /*nb_remainder*/
4115
PyTypeObject PyBaseString_Type = {
4116
PyVarObject_HEAD_INIT(&PyType_Type, 0)
4126
0, /* tp_as_number */
4127
0, /* tp_as_sequence */
4128
0, /* tp_as_mapping */
4132
0, /* tp_getattro */
4133
0, /* tp_setattro */
4134
0, /* tp_as_buffer */
4135
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4136
basestring_doc, /* tp_doc */
4137
0, /* tp_traverse */
4139
0, /* tp_richcompare */
4140
0, /* tp_weaklistoffset */
4142
0, /* tp_iternext */
4146
&PyBaseObject_Type, /* tp_base */
4148
0, /* tp_descr_get */
4149
0, /* tp_descr_set */
4150
0, /* tp_dictoffset */
4153
basestring_new, /* tp_new */
4157
PyDoc_STRVAR(string_doc,
4158
"str(object) -> string\n\
4160
Return a nice string representation of the object.\n\
4161
If the argument is a string, the return value is the same object.");
4163
PyTypeObject PyBytes_Type = {
4164
PyVarObject_HEAD_INIT(&PyType_Type, 0)
4166
sizeof(PyBytesObject),
4168
string_dealloc, /* tp_dealloc */
4169
(printfunc)string_print, /* tp_print */
4173
string_repr, /* tp_repr */
4174
&string_as_number, /* tp_as_number */
4175
&string_as_sequence, /* tp_as_sequence */
4176
&string_as_mapping, /* tp_as_mapping */
4177
(hashfunc)string_hash, /* tp_hash */
4179
string_str, /* tp_str */
4180
PyObject_GenericGetAttr, /* tp_getattro */
4181
0, /* tp_setattro */
4182
&string_as_buffer, /* tp_as_buffer */
4183
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
4184
Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
4185
Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
4186
string_doc, /* tp_doc */
4187
0, /* tp_traverse */
4189
(richcmpfunc)string_richcompare, /* tp_richcompare */
4190
0, /* tp_weaklistoffset */
4192
0, /* tp_iternext */
4193
string_methods, /* tp_methods */
4196
&PyBaseString_Type, /* tp_base */
4198
0, /* tp_descr_get */
4199
0, /* tp_descr_set */
4200
0, /* tp_dictoffset */
4203
string_new, /* tp_new */
4204
PyObject_Del, /* tp_free */
4208
PyBytes_Concat(register PyObject **pv, register PyObject *w)
4210
register PyObject *v;
4213
if (w == NULL || !PyBytes_Check(*pv)) {
4218
v = string_concat((PyBytesObject *) *pv, w);
4224
PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
4226
PyBytes_Concat(pv, w);
4231
/* The following function breaks the notion that strings are immutable:
4232
it changes the size of a string. We get away with this only if there
4233
is only one module referencing the object. You can also think of it
4234
as creating a new string object and destroying the old one, only
4235
more efficiently. In any case, don't use this if the string may
4236
already be known to some other part of the code...
4237
Note that if there's not enough memory to resize the string, the original
4238
string object at *pv is deallocated, *pv is set to NULL, an "out of
4239
memory" exception is set, and -1 is returned. Else (on success) 0 is
4240
returned, and the value in *pv may or may not be the same as on input.
4241
As always, an extra byte is allocated for a trailing \0 byte (newsize
4242
does *not* include that), and a trailing \0 byte is stored.
4246
_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
4248
register PyObject *v;
4249
register PyBytesObject *sv;
4251
if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
4252
PyBytes_CHECK_INTERNED(v)) {
4255
PyErr_BadInternalCall();
4258
/* XXX UNREF/NEWREF interface should be more symmetrical */
4260
_Py_ForgetReference(v);
4262
PyObject_REALLOC((char *)v, sizeof(PyBytesObject) + newsize);
4268
_Py_NewReference(*pv);
4269
sv = (PyBytesObject *) *pv;
4270
Py_SIZE(sv) = newsize;
4271
sv->ob_sval[newsize] = '\0';
4272
sv->ob_shash = -1; /* invalidate cached hash value */
4276
/* Helpers for formatstring */
4278
Py_LOCAL_INLINE(PyObject *)
4279
getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
4281
Py_ssize_t argidx = *p_argidx;
4282
if (argidx < arglen) {
4287
return PyTuple_GetItem(args, argidx);
4289
PyErr_SetString(PyExc_TypeError,
4290
"not enough arguments for format string");
4301
#define F_LJUST (1<<0)
4302
#define F_SIGN (1<<1)
4303
#define F_BLANK (1<<2)
4304
#define F_ALT (1<<3)
4305
#define F_ZERO (1<<4)
4307
Py_LOCAL_INLINE(int)
4308
formatfloat(char *buf, size_t buflen, int flags,
4309
int prec, int type, PyObject *v)
4311
/* fmt = '%#.' + `prec` + `type`
4312
worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
4315
x = PyFloat_AsDouble(v);
4316
if (x == -1.0 && PyErr_Occurred()) {
4317
PyErr_Format(PyExc_TypeError, "float argument required, "
4318
"not %.200s", Py_TYPE(v)->tp_name);
4323
if (type == 'f' && fabs(x)/1e25 >= 1e25)
4325
/* Worst case length calc to ensure no buffer overrun:
4329
buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
4330
for any double rep.)
4331
len = 1 + prec + 1 + 2 + 5 = 9 + prec
4334
buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4335
len = 1 + 50 + 1 + prec = 52 + prec
4337
If prec=0 the effective precision is 1 (the leading digit is
4338
always given), therefore increase the length by one.
4341
if (((type == 'g' || type == 'G') &&
4342
buflen <= (size_t)10 + (size_t)prec) ||
4343
(type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
4344
PyErr_SetString(PyExc_OverflowError,
4345
"formatted float is too long (precision too large?)");
4348
PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4349
(flags&F_ALT) ? "#" : "",
4351
PyOS_ascii_formatd(buf, buflen, fmt, x);
4352
return (int)strlen(buf);
4355
/* _PyBytes_FormatLong emulates the format codes d, u, o, x and X, and
4356
* the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4357
* Python's regular ints.
4358
* Return value: a new PyString*, or NULL if error.
4359
* . *pbuf is set to point into it,
4360
* *plen set to the # of chars following that.
4361
* Caller must decref it when done using pbuf.
4362
* The string starting at *pbuf is of the form
4363
* "-"? ("0x" | "0X")? digit+
4364
* "0x"/"0X" are present only for x and X conversions, with F_ALT
4365
* set in flags. The case of hex digits will be correct,
4366
* There will be at least prec digits, zero-filled on the left if
4367
* necessary to get that many.
4368
* val object to be converted
4369
* flags bitmask of format flags; only F_ALT is looked at
4370
* prec minimum number of digits; 0-fill on left if needed
4371
* type a character in [duoxX]; u acts the same as d
4373
* CAUTION: o, x and X conversions on regular ints can never
4374
* produce a '-' sign, but can for Python's unbounded ints.
4377
_PyBytes_FormatLong(PyObject *val, int flags, int prec, int type,
4378
char **pbuf, int *plen)
4380
PyObject *result = NULL;
4383
int sign; /* 1 if '-', else 0 */
4384
int len; /* number of characters */
4386
int numdigits; /* len == numnondigits + numdigits */
4387
int numnondigits = 0;
4392
result = Py_TYPE(val)->tp_str(val);
4395
result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4400
result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4403
assert(!"'type' not in [duoxX]");
4408
buf = PyBytes_AsString(result);
4414
/* To modify the string in-place, there can only be one reference. */
4415
if (Py_REFCNT(result) != 1) {
4416
PyErr_BadInternalCall();
4419
llen = PyBytes_Size(result);
4420
if (llen > INT_MAX) {
4421
PyErr_SetString(PyExc_ValueError, "string too large in _PyBytes_FormatLong");
4425
if (buf[len-1] == 'L') {
4429
sign = buf[0] == '-';
4430
numnondigits += sign;
4431
numdigits = len - numnondigits;
4432
assert(numdigits > 0);
4434
/* Get rid of base marker unless F_ALT */
4435
if ((flags & F_ALT) == 0) {
4436
/* Need to skip 0x, 0X or 0. */
4440
assert(buf[sign] == '0');
4441
/* If 0 is only digit, leave it alone. */
4442
if (numdigits > 1) {
4449
assert(buf[sign] == '0');
4450
assert(buf[sign + 1] == 'x');
4461
assert(len == numnondigits + numdigits);
4462
assert(numdigits > 0);
4465
/* Fill with leading zeroes to meet minimum width. */
4466
if (prec > numdigits) {
4467
PyObject *r1 = PyBytes_FromStringAndSize(NULL,
4468
numnondigits + prec);
4474
b1 = PyBytes_AS_STRING(r1);
4475
for (i = 0; i < numnondigits; ++i)
4477
for (i = 0; i < prec - numdigits; i++)
4479
for (i = 0; i < numdigits; i++)
4484
buf = PyBytes_AS_STRING(result);
4485
len = numnondigits + prec;
4488
/* Fix up case for hex conversions. */
4490
/* Need to convert all lower case letters to upper case.
4491
and need to convert 0x to 0X (and -0x to -0X). */
4492
for (i = 0; i < len; i++)
4493
if (buf[i] >= 'a' && buf[i] <= 'x')
4501
Py_LOCAL_INLINE(int)
4502
formatint(char *buf, size_t buflen, int flags,
4503
int prec, int type, PyObject *v)
4505
/* fmt = '%#.' + `prec` + 'l' + `type`
4506
worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4508
char fmt[64]; /* plenty big enough! */
4512
x = PyInt_AsLong(v);
4513
if (x == -1 && PyErr_Occurred()) {
4514
PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4515
Py_TYPE(v)->tp_name);
4518
if (x < 0 && type == 'u') {
4521
if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4528
if ((flags & F_ALT) &&
4529
(type == 'x' || type == 'X')) {
4530
/* When converting under %#x or %#X, there are a number
4531
* of issues that cause pain:
4532
* - when 0 is being converted, the C standard leaves off
4533
* the '0x' or '0X', which is inconsistent with other
4534
* %#x/%#X conversions and inconsistent with Python's
4536
* - there are platforms that violate the standard and
4537
* convert 0 with the '0x' or '0X'
4538
* (Metrowerks, Compaq Tru64)
4539
* - there are platforms that give '0x' when converting
4540
* under %#X, but convert 0 in accordance with the
4541
* standard (OS/2 EMX)
4543
* We can achieve the desired consistency by inserting our
4544
* own '0x' or '0X' prefix, and substituting %x/%X in place
4547
* Note that this is the same approach as used in
4548
* formatint() in unicodeobject.c
4550
PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4551
sign, type, prec, type);
4554
PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4555
sign, (flags&F_ALT) ? "#" : "",
4559
/* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4560
* worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4562
if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4563
PyErr_SetString(PyExc_OverflowError,
4564
"formatted integer is too long (precision too large?)");
4568
PyOS_snprintf(buf, buflen, fmt, -x);
4570
PyOS_snprintf(buf, buflen, fmt, x);
4571
return (int)strlen(buf);
4574
Py_LOCAL_INLINE(int)
4575
formatchar(char *buf, size_t buflen, PyObject *v)
4577
/* presume that the buffer is at least 2 characters long */
4578
if (PyBytes_Check(v)) {
4579
if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4583
if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4590
/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4592
FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4593
chars are formatted. XXX This is a magic number. Each formatting
4594
routine does bounds checking to ensure no overflow, but a better
4595
solution may be to malloc a buffer of appropriate size for each
4596
format. For now, the current solution is sufficient.
4598
#define FORMATBUFLEN (size_t)120
4601
PyBytes_Format(PyObject *format, PyObject *args)
4604
Py_ssize_t arglen, argidx;
4605
Py_ssize_t reslen, rescnt, fmtcnt;
4607
PyObject *result, *orig_args;
4608
#ifdef Py_USING_UNICODE
4611
PyObject *dict = NULL;
4612
if (format == NULL || !PyBytes_Check(format) || args == NULL) {
4613
PyErr_BadInternalCall();
4617
fmt = PyBytes_AS_STRING(format);
4618
fmtcnt = PyBytes_GET_SIZE(format);
4619
reslen = rescnt = fmtcnt + 100;
4620
result = PyBytes_FromStringAndSize((char *)NULL, reslen);
4623
res = PyBytes_AsString(result);
4624
if (PyTuple_Check(args)) {
4625
arglen = PyTuple_GET_SIZE(args);
4632
if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
4633
!PyObject_TypeCheck(args, &PyBaseString_Type))
4635
while (--fmtcnt >= 0) {
4638
rescnt = fmtcnt + 100;
4640
if (_PyBytes_Resize(&result, reslen) < 0)
4642
res = PyBytes_AS_STRING(result)
4649
/* Got a format specifier */
4651
Py_ssize_t width = -1;
4657
PyObject *temp = NULL;
4661
char formatbuf[FORMATBUFLEN];
4662
/* For format{float,int,char}() */
4663
#ifdef Py_USING_UNICODE
4664
char *fmt_start = fmt;
4665
Py_ssize_t argidx_start = argidx;
4676
PyErr_SetString(PyExc_TypeError,
4677
"format requires a mapping");
4683
/* Skip over balanced parentheses */
4684
while (pcount > 0 && --fmtcnt >= 0) {
4687
else if (*fmt == '(')
4691
keylen = fmt - keystart - 1;
4692
if (fmtcnt < 0 || pcount > 0) {
4693
PyErr_SetString(PyExc_ValueError,
4694
"incomplete format key");
4697
key = PyBytes_FromStringAndSize(keystart,
4705
args = PyObject_GetItem(dict, key);
4714
while (--fmtcnt >= 0) {
4715
switch (c = *fmt++) {
4716
case '-': flags |= F_LJUST; continue;
4717
case '+': flags |= F_SIGN; continue;
4718
case ' ': flags |= F_BLANK; continue;
4719
case '#': flags |= F_ALT; continue;
4720
case '0': flags |= F_ZERO; continue;
4725
v = getnextarg(args, arglen, &argidx);
4728
if (!PyInt_Check(v)) {
4729
PyErr_SetString(PyExc_TypeError,
4733
width = PyInt_AsLong(v);
4741
else if (c >= 0 && isdigit(c)) {
4743
while (--fmtcnt >= 0) {
4744
c = Py_CHARMASK(*fmt++);
4747
if ((width*10) / 10 != width) {
4753
width = width*10 + (c - '0');
4761
v = getnextarg(args, arglen, &argidx);
4764
if (!PyInt_Check(v)) {
4770
prec = PyInt_AsLong(v);
4776
else if (c >= 0 && isdigit(c)) {
4778
while (--fmtcnt >= 0) {
4779
c = Py_CHARMASK(*fmt++);
4782
if ((prec*10) / 10 != prec) {
4788
prec = prec*10 + (c - '0');
4793
if (c == 'h' || c == 'l' || c == 'L') {
4799
PyErr_SetString(PyExc_ValueError,
4800
"incomplete format");
4804
v = getnextarg(args, arglen, &argidx);
4816
#ifdef Py_USING_UNICODE
4817
if (PyUnicode_Check(v)) {
4819
argidx = argidx_start;
4823
temp = _PyObject_Str(v);
4824
#ifdef Py_USING_UNICODE
4825
if (temp != NULL && PyUnicode_Check(temp)) {
4828
argidx = argidx_start;
4835
temp = PyObject_Repr(v);
4838
if (!PyBytes_Check(temp)) {
4839
PyErr_SetString(PyExc_TypeError,
4840
"%s argument has non-string str()");
4844
pbuf = PyBytes_AS_STRING(temp);
4845
len = PyBytes_GET_SIZE(temp);
4846
if (prec >= 0 && len > prec)
4858
if (PyNumber_Check(v)) {
4859
PyObject *iobj=NULL;
4861
if (PyInt_Check(v) || (PyLong_Check(v))) {
4866
iobj = PyNumber_Int(v);
4867
if (iobj==NULL) iobj = PyNumber_Long(v);
4870
if (PyInt_Check(iobj)) {
4873
len = formatint(pbuf,
4875
flags, prec, c, iobj);
4881
else if (PyLong_Check(iobj)) {
4885
temp = _PyBytes_FormatLong(iobj, flags,
4886
prec, c, &pbuf, &ilen);
4899
PyErr_Format(PyExc_TypeError,
4900
"%%%c format: a number is required, "
4901
"not %.200s", c, Py_TYPE(v)->tp_name);
4916
len = formatfloat(pbuf, sizeof(formatbuf),
4925
#ifdef Py_USING_UNICODE
4926
if (PyUnicode_Check(v)) {
4928
argidx = argidx_start;
4933
len = formatchar(pbuf, sizeof(formatbuf), v);
4938
PyErr_Format(PyExc_ValueError,
4939
"unsupported format character '%c' (0x%x) "
4942
(Py_ssize_t)(fmt - 1 -
4943
PyBytes_AsString(format)));
4947
if (*pbuf == '-' || *pbuf == '+') {
4951
else if (flags & F_SIGN)
4953
else if (flags & F_BLANK)
4960
if (rescnt - (sign != 0) < width) {
4962
rescnt = width + fmtcnt + 100;
4967
return PyErr_NoMemory();
4969
if (_PyBytes_Resize(&result, reslen) < 0) {
4973
res = PyBytes_AS_STRING(result)
4983
if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4984
assert(pbuf[0] == '0');
4985
assert(pbuf[1] == c);
4996
if (width > len && !(flags & F_LJUST)) {
5000
} while (--width > len);
5005
if ((flags & F_ALT) &&
5006
(c == 'x' || c == 'X')) {
5007
assert(pbuf[0] == '0');
5008
assert(pbuf[1] == c);
5013
Py_MEMCPY(res, pbuf, len);
5016
while (--width >= len) {
5020
if (dict && (argidx < arglen) && c != '%') {
5021
PyErr_SetString(PyExc_TypeError,
5022
"not all arguments converted during string formatting");
5029
if (argidx < arglen && !dict) {
5030
PyErr_SetString(PyExc_TypeError,
5031
"not all arguments converted during string formatting");
5037
_PyBytes_Resize(&result, reslen - rescnt);
5040
#ifdef Py_USING_UNICODE
5046
/* Fiddle args right (remove the first argidx arguments) */
5047
if (PyTuple_Check(orig_args) && argidx > 0) {
5049
Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
5054
PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
5056
PyTuple_SET_ITEM(v, n, w);
5060
Py_INCREF(orig_args);
5064
/* Take what we have of the result and let the Unicode formatting
5065
function format the rest of the input. */
5066
rescnt = res - PyBytes_AS_STRING(result);
5067
if (_PyBytes_Resize(&result, rescnt))
5069
fmtcnt = PyBytes_GET_SIZE(format) - \
5070
(fmt - PyBytes_AS_STRING(format));
5071
format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
5074
v = PyUnicode_Format(format, args);
5078
/* Paste what we have (result) to what the Unicode formatting
5079
function returned (v) and return the result (or error) */
5080
w = PyUnicode_Concat(result, v);
5085
#endif /* Py_USING_UNICODE */
5096
PyString_InternInPlace(PyObject **p)
5098
register PyBytesObject *s = (PyBytesObject *)(*p);
5100
if (s == NULL || !PyBytes_Check(s))
5101
Py_FatalError("PyString_InternInPlace: strings only please!");
5102
/* If it's a string subclass, we don't really know what putting
5103
it in the interned dict might do. */
5104
if (!PyBytes_CheckExact(s))
5106
if (PyBytes_CHECK_INTERNED(s))
5108
if (interned == NULL) {
5109
interned = PyDict_New();
5110
if (interned == NULL) {
5111
PyErr_Clear(); /* Don't leave an exception */
5115
t = PyDict_GetItem(interned, (PyObject *)s);
5123
if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
5127
/* The two references in interned are not counted by refcnt.
5128
The string deallocator will take care of this */
5130
PyBytes_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
5134
PyString_InternImmortal(PyObject **p)
5136
PyString_InternInPlace(p);
5137
if (PyBytes_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
5138
PyBytes_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
5145
PyString_InternFromString(const char *cp)
5147
PyObject *s = PyBytes_FromString(cp);
5150
PyString_InternInPlace(&s);
5158
for (i = 0; i < UCHAR_MAX + 1; i++) {
5159
Py_XDECREF(characters[i]);
5160
characters[i] = NULL;
5162
Py_XDECREF(nullstring);
5166
void _Py_ReleaseInternedStrings(void)
5171
Py_ssize_t immortal_size = 0, mortal_size = 0;
5173
if (interned == NULL || !PyDict_Check(interned))
5175
keys = PyDict_Keys(interned);
5176
if (keys == NULL || !PyList_Check(keys)) {
5181
/* Since _Py_ReleaseInternedStrings() is intended to help a leak
5182
detector, interned strings are not forcibly deallocated; rather, we
5183
give them their stolen references back, and then clear and DECREF
5184
the interned dict. */
5186
n = PyList_GET_SIZE(keys);
5187
fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
5189
for (i = 0; i < n; i++) {
5190
s = (PyBytesObject *) PyList_GET_ITEM(keys, i);
5191
switch (s->ob_sstate) {
5192
case SSTATE_NOT_INTERNED:
5193
/* XXX Shouldn't happen */
5195
case SSTATE_INTERNED_IMMORTAL:
5197
immortal_size += Py_SIZE(s);
5199
case SSTATE_INTERNED_MORTAL:
5201
mortal_size += Py_SIZE(s);
5204
Py_FatalError("Inconsistent interned string state.");
5206
s->ob_sstate = SSTATE_NOT_INTERNED;
5208
fprintf(stderr, "total size of all interned strings: "
5209
"%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
5210
"mortal/immortal\n", mortal_size, immortal_size);
5212
PyDict_Clear(interned);
5213
Py_DECREF(interned);