1
/* String (str/bytes) object implementation */
3
#define PY_SSIZE_T_CLEAN
9
int null_strings, one_strings;
12
static PyStringObject *characters[UCHAR_MAX + 1];
13
static PyStringObject *nullstring;
15
/* This dictionary holds all interned strings. Note that references to
16
strings in this dictionary are *not* counted in the string's ob_refcnt.
17
When the interned string reaches a refcnt of 0 the string deallocation
18
function will delete the reference from this dictionary.
20
Another way to look at this is that to say that the actual reference
21
count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23
static PyObject *interned;
26
For both PyString_FromString() and PyString_FromStringAndSize(), the
27
parameter `size' denotes number of characters to allocate, not counting any
28
null terminating character.
30
For PyString_FromString(), the parameter `str' points to a null-terminated
31
string containing exactly `size' bytes.
33
For PyString_FromStringAndSize(), the parameter the parameter `str' is
34
either NULL or else points to a string containing at least `size' bytes.
35
For PyString_FromStringAndSize(), the string in the `str' parameter does
36
not have to be null-terminated. (Therefore it is safe to construct a
37
substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
38
If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
39
bytes (setting the last byte to the null terminating character) and you can
40
fill in the data yourself. If `str' is non-NULL then the resulting
41
PyString object must be treated as immutable and you must not fill in nor
42
alter the data yourself, since the strings may be shared.
44
The PyObject member `op->ob_size', which denotes the number of "extra
45
items" in a variable-size object, will contain the number of bytes
46
allocated for string data, not counting the null terminating character. It
47
is therefore equal to the equal to the `size' parameter (for
48
PyString_FromStringAndSize()) or the length of the string in the `str'
49
parameter (for PyString_FromString()).
52
PyString_FromStringAndSize(const char *str, Py_ssize_t size)
54
register PyStringObject *op;
56
PyErr_SetString(PyExc_SystemError,
57
"Negative size passed to PyString_FromStringAndSize");
60
if (size == 0 && (op = nullstring) != NULL) {
65
return (PyObject *)op;
67
if (size == 1 && str != NULL &&
68
(op = characters[*str & UCHAR_MAX]) != NULL)
74
return (PyObject *)op;
77
if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) {
78
PyErr_SetString(PyExc_OverflowError, "string is too large");
82
/* Inline PyObject_NewVar */
83
op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
85
return PyErr_NoMemory();
86
PyObject_INIT_VAR(op, &PyString_Type, size);
88
op->ob_sstate = SSTATE_NOT_INTERNED;
90
Py_MEMCPY(op->ob_sval, str, size);
91
op->ob_sval[size] = '\0';
92
/* share short strings */
94
PyObject *t = (PyObject *)op;
95
PyString_InternInPlace(&t);
96
op = (PyStringObject *)t;
99
} else if (size == 1 && str != NULL) {
100
PyObject *t = (PyObject *)op;
101
PyString_InternInPlace(&t);
102
op = (PyStringObject *)t;
103
characters[*str & UCHAR_MAX] = op;
106
return (PyObject *) op;
110
PyString_FromString(const char *str)
112
register size_t size;
113
register PyStringObject *op;
117
if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) {
118
PyErr_SetString(PyExc_OverflowError,
119
"string is too long for a Python string");
122
if (size == 0 && (op = nullstring) != NULL) {
127
return (PyObject *)op;
129
if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
134
return (PyObject *)op;
137
/* Inline PyObject_NewVar */
138
op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
140
return PyErr_NoMemory();
141
PyObject_INIT_VAR(op, &PyString_Type, size);
143
op->ob_sstate = SSTATE_NOT_INTERNED;
144
Py_MEMCPY(op->ob_sval, str, size+1);
145
/* share short strings */
147
PyObject *t = (PyObject *)op;
148
PyString_InternInPlace(&t);
149
op = (PyStringObject *)t;
152
} else if (size == 1) {
153
PyObject *t = (PyObject *)op;
154
PyString_InternInPlace(&t);
155
op = (PyStringObject *)t;
156
characters[*str & UCHAR_MAX] = op;
159
return (PyObject *) op;
163
PyString_FromFormatV(const char *format, va_list vargs)
171
#ifdef VA_LIST_IS_ARRAY
172
Py_MEMCPY(count, vargs, sizeof(va_list));
175
__va_copy(count, vargs);
180
/* step 1: figure out how large a buffer we need */
181
for (f = format; *f; f++) {
184
while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
187
/* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
188
* they don't affect the amount of space we reserve.
190
if ((*f == 'l' || *f == 'z') &&
191
(f[1] == 'd' || f[1] == 'u'))
196
(void)va_arg(count, int);
197
/* fall through... */
201
case 'd': case 'u': case 'i': case 'x':
202
(void) va_arg(count, int);
203
/* 20 bytes is enough to hold a 64-bit
204
integer. Decimal takes the most space.
205
This isn't enough for octal. */
209
s = va_arg(count, char*);
213
(void) va_arg(count, int);
214
/* maximum 64-bit pointer representation:
216
* so 19 characters is enough.
217
* XXX I count 18 -- what's the extra for?
222
/* if we stumble upon an unknown
223
formatting code, copy the rest of
224
the format string to the output
225
string. (we cannot just skip the
226
code, since there's no way to know
227
what's in the argument list) */
235
/* step 2: fill the buffer */
236
/* Since we've analyzed how much space we need for the worst case,
237
use sprintf directly instead of the slower PyOS_snprintf. */
238
string = PyString_FromStringAndSize(NULL, n);
242
s = PyString_AsString(string);
244
for (f = format; *f; f++) {
250
/* parse the width.precision part (we're only
251
interested in the precision value, if any) */
253
while (isdigit(Py_CHARMASK(*f)))
254
n = (n*10) + *f++ - '0';
258
while (isdigit(Py_CHARMASK(*f)))
259
n = (n*10) + *f++ - '0';
261
while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
263
/* handle the long flag, but only for %ld and %lu.
264
others can be added when necessary. */
265
if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
269
/* handle the size_t flag. */
270
if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
277
*s++ = va_arg(vargs, int);
281
sprintf(s, "%ld", va_arg(vargs, long));
283
sprintf(s, "%" PY_FORMAT_SIZE_T "d",
284
va_arg(vargs, Py_ssize_t));
286
sprintf(s, "%d", va_arg(vargs, int));
292
va_arg(vargs, unsigned long));
294
sprintf(s, "%" PY_FORMAT_SIZE_T "u",
295
va_arg(vargs, size_t));
298
va_arg(vargs, unsigned int));
302
sprintf(s, "%i", va_arg(vargs, int));
306
sprintf(s, "%x", va_arg(vargs, int));
310
p = va_arg(vargs, char*);
318
sprintf(s, "%p", va_arg(vargs, void*));
319
/* %p is ill-defined: ensure leading 0x. */
322
else if (s[1] != 'x') {
323
memmove(s+2, s, strlen(s)+1);
342
_PyString_Resize(&string, s - PyString_AS_STRING(string));
347
PyString_FromFormat(const char *format, ...)
352
#ifdef HAVE_STDARG_PROTOTYPES
353
va_start(vargs, format);
357
ret = PyString_FromFormatV(format, vargs);
363
PyObject *PyString_Decode(const char *s,
365
const char *encoding,
370
str = PyString_FromStringAndSize(s, size);
373
v = PyString_AsDecodedString(str, encoding, errors);
378
PyObject *PyString_AsDecodedObject(PyObject *str,
379
const char *encoding,
384
if (!PyString_Check(str)) {
389
if (encoding == NULL) {
390
#ifdef Py_USING_UNICODE
391
encoding = PyUnicode_GetDefaultEncoding();
393
PyErr_SetString(PyExc_ValueError, "no encoding specified");
398
/* Decode via the codec registry */
399
v = PyCodec_Decode(str, encoding, errors);
409
PyObject *PyString_AsDecodedString(PyObject *str,
410
const char *encoding,
415
v = PyString_AsDecodedObject(str, encoding, errors);
419
#ifdef Py_USING_UNICODE
420
/* Convert Unicode to a string using the default encoding */
421
if (PyUnicode_Check(v)) {
423
v = PyUnicode_AsEncodedString(v, NULL, NULL);
429
if (!PyString_Check(v)) {
430
PyErr_Format(PyExc_TypeError,
431
"decoder did not return a string object (type=%.400s)",
432
Py_TYPE(v)->tp_name);
443
PyObject *PyString_Encode(const char *s,
445
const char *encoding,
450
str = PyString_FromStringAndSize(s, size);
453
v = PyString_AsEncodedString(str, encoding, errors);
458
PyObject *PyString_AsEncodedObject(PyObject *str,
459
const char *encoding,
464
if (!PyString_Check(str)) {
469
if (encoding == NULL) {
470
#ifdef Py_USING_UNICODE
471
encoding = PyUnicode_GetDefaultEncoding();
473
PyErr_SetString(PyExc_ValueError, "no encoding specified");
478
/* Encode via the codec registry */
479
v = PyCodec_Encode(str, encoding, errors);
489
PyObject *PyString_AsEncodedString(PyObject *str,
490
const char *encoding,
495
v = PyString_AsEncodedObject(str, encoding, errors);
499
#ifdef Py_USING_UNICODE
500
/* Convert Unicode to a string using the default encoding */
501
if (PyUnicode_Check(v)) {
503
v = PyUnicode_AsEncodedString(v, NULL, NULL);
509
if (!PyString_Check(v)) {
510
PyErr_Format(PyExc_TypeError,
511
"encoder did not return a string object (type=%.400s)",
512
Py_TYPE(v)->tp_name);
524
string_dealloc(PyObject *op)
526
switch (PyString_CHECK_INTERNED(op)) {
527
case SSTATE_NOT_INTERNED:
530
case SSTATE_INTERNED_MORTAL:
531
/* revive dead object temporarily for DelItem */
533
if (PyDict_DelItem(interned, op) != 0)
535
"deletion of interned string failed");
538
case SSTATE_INTERNED_IMMORTAL:
539
Py_FatalError("Immortal interned string died.");
542
Py_FatalError("Inconsistent interned string state.");
544
Py_TYPE(op)->tp_free(op);
547
/* Unescape a backslash-escaped string. If unicode is non-zero,
548
the string is a u-literal. If recode_encoding is non-zero,
549
the string is UTF-8 encoded and should be re-encoded in the
550
specified encoding. */
552
PyObject *PyString_DecodeEscape(const char *s,
556
const char *recode_encoding)
562
Py_ssize_t newlen = recode_encoding ? 4*len:len;
563
v = PyString_FromStringAndSize((char *)NULL, newlen);
566
p = buf = PyString_AsString(v);
571
#ifdef Py_USING_UNICODE
572
if (recode_encoding && (*s & 0x80)) {
578
/* Decode non-ASCII bytes as UTF-8. */
579
while (t < end && (*t & 0x80)) t++;
580
u = PyUnicode_DecodeUTF8(s, t - s, errors);
583
/* Recode them in target encoding. */
584
w = PyUnicode_AsEncodedString(
585
u, recode_encoding, errors);
589
/* Append bytes to output buffer. */
590
assert(PyString_Check(w));
591
r = PyString_AS_STRING(w);
592
rn = PyString_GET_SIZE(w);
607
PyErr_SetString(PyExc_ValueError,
608
"Trailing \\ in string");
612
/* XXX This assumes ASCII! */
614
case '\\': *p++ = '\\'; break;
615
case '\'': *p++ = '\''; break;
616
case '\"': *p++ = '\"'; break;
617
case 'b': *p++ = '\b'; break;
618
case 'f': *p++ = '\014'; break; /* FF */
619
case 't': *p++ = '\t'; break;
620
case 'n': *p++ = '\n'; break;
621
case 'r': *p++ = '\r'; break;
622
case 'v': *p++ = '\013'; break; /* VT */
623
case 'a': *p++ = '\007'; break; /* BEL, not classic C */
624
case '0': case '1': case '2': case '3':
625
case '4': case '5': case '6': case '7':
627
if (s < end && '0' <= *s && *s <= '7') {
628
c = (c<<3) + *s++ - '0';
629
if (s < end && '0' <= *s && *s <= '7')
630
c = (c<<3) + *s++ - '0';
636
isxdigit(Py_CHARMASK(s[0])) &&
637
isxdigit(Py_CHARMASK(s[1])))
660
if (!errors || strcmp(errors, "strict") == 0) {
661
PyErr_SetString(PyExc_ValueError,
662
"invalid \\x escape");
665
if (strcmp(errors, "replace") == 0) {
667
} else if (strcmp(errors, "ignore") == 0)
670
PyErr_Format(PyExc_ValueError,
672
"unknown error handling code: %.400s",
676
#ifndef Py_USING_UNICODE
681
PyErr_SetString(PyExc_ValueError,
682
"Unicode escapes not legal "
683
"when Unicode disabled");
690
goto non_esc; /* an arbitry number of unescaped
691
UTF-8 bytes may follow. */
695
_PyString_Resize(&v, p - buf);
702
/* -------------------------------------------------------------------- */
706
string_getsize(register PyObject *op)
710
if (PyString_AsStringAndSize(op, &s, &len))
715
static /*const*/ char *
716
string_getbuffer(register PyObject *op)
720
if (PyString_AsStringAndSize(op, &s, &len))
726
PyString_Size(register PyObject *op)
728
if (!PyString_Check(op))
729
return string_getsize(op);
734
PyString_AsString(register PyObject *op)
736
if (!PyString_Check(op))
737
return string_getbuffer(op);
738
return ((PyStringObject *)op) -> ob_sval;
742
PyString_AsStringAndSize(register PyObject *obj,
744
register Py_ssize_t *len)
747
PyErr_BadInternalCall();
751
if (!PyString_Check(obj)) {
752
#ifdef Py_USING_UNICODE
753
if (PyUnicode_Check(obj)) {
754
obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
761
PyErr_Format(PyExc_TypeError,
762
"expected string or Unicode object, "
763
"%.200s found", Py_TYPE(obj)->tp_name);
768
*s = PyString_AS_STRING(obj);
770
*len = PyString_GET_SIZE(obj);
771
else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
772
PyErr_SetString(PyExc_TypeError,
773
"expected string without null bytes");
779
/* -------------------------------------------------------------------- */
782
#include "stringlib/stringdefs.h"
783
#include "stringlib/fastsearch.h"
785
#include "stringlib/count.h"
786
#include "stringlib/find.h"
787
#include "stringlib/partition.h"
789
#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
790
#include "stringlib/localeutil.h"
795
string_print(PyStringObject *op, FILE *fp, int flags)
797
Py_ssize_t i, str_len;
801
/* XXX Ought to check for interrupts when writing long strings */
802
if (! PyString_CheckExact(op)) {
804
/* A str subclass may have its own __str__ method. */
805
op = (PyStringObject *) PyObject_Str((PyObject *)op);
808
ret = string_print(op, fp, flags);
812
if (flags & Py_PRINT_RAW) {
813
char *data = op->ob_sval;
814
Py_ssize_t size = Py_SIZE(op);
815
Py_BEGIN_ALLOW_THREADS
816
while (size > INT_MAX) {
817
/* Very long strings cannot be written atomically.
818
* But don't write exactly INT_MAX bytes at a time
819
* to avoid memory aligment issues.
821
const int chunk_size = INT_MAX & ~0x3FFF;
822
fwrite(data, 1, chunk_size, fp);
827
if (size) fwrite(data, (int)size, 1, fp);
829
fwrite(data, 1, (int)size, fp);
835
/* figure out which quote to use; single is preferred */
837
if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
838
!memchr(op->ob_sval, '"', Py_SIZE(op)))
841
str_len = Py_SIZE(op);
842
Py_BEGIN_ALLOW_THREADS
844
for (i = 0; i < str_len; i++) {
845
/* Since strings are immutable and the caller should have a
846
reference, accessing the interal buffer should not be an issue
847
with the GIL released. */
849
if (c == quote || c == '\\')
850
fprintf(fp, "\\%c", c);
857
else if (c < ' ' || c >= 0x7f)
858
fprintf(fp, "\\x%02x", c & 0xff);
868
PyString_Repr(PyObject *obj, int smartquotes)
870
register PyStringObject* op = (PyStringObject*) obj;
871
size_t newsize = 2 + 4 * Py_SIZE(op);
873
if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
874
PyErr_SetString(PyExc_OverflowError,
875
"string is too large to make repr");
878
v = PyString_FromStringAndSize((char *)NULL, newsize);
883
register Py_ssize_t i;
888
/* figure out which quote to use; single is preferred */
891
memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
892
!memchr(op->ob_sval, '"', Py_SIZE(op)))
895
p = PyString_AS_STRING(v);
897
for (i = 0; i < Py_SIZE(op); i++) {
898
/* There's at least enough room for a hex escape
899
and a closing quote. */
900
assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
902
if (c == quote || c == '\\')
903
*p++ = '\\', *p++ = c;
905
*p++ = '\\', *p++ = 't';
907
*p++ = '\\', *p++ = 'n';
909
*p++ = '\\', *p++ = 'r';
910
else if (c < ' ' || c >= 0x7f) {
911
/* For performance, we don't want to call
912
PyOS_snprintf here (extra layers of
914
sprintf(p, "\\x%02x", c & 0xff);
920
assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
924
&v, (p - PyString_AS_STRING(v)));
930
string_repr(PyObject *op)
932
return PyString_Repr(op, 1);
936
string_str(PyObject *s)
938
assert(PyString_Check(s));
939
if (PyString_CheckExact(s)) {
944
/* Subtype -- return genuine string with the same value. */
945
PyStringObject *t = (PyStringObject *) s;
946
return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
951
string_length(PyStringObject *a)
957
string_concat(register PyStringObject *a, register PyObject *bb)
959
register Py_ssize_t size;
960
register PyStringObject *op;
961
if (!PyString_Check(bb)) {
962
#ifdef Py_USING_UNICODE
963
if (PyUnicode_Check(bb))
964
return PyUnicode_Concat((PyObject *)a, bb);
966
if (PyByteArray_Check(bb))
967
return PyByteArray_Concat((PyObject *)a, bb);
968
PyErr_Format(PyExc_TypeError,
969
"cannot concatenate 'str' and '%.200s' objects",
970
Py_TYPE(bb)->tp_name);
973
#define b ((PyStringObject *)bb)
974
/* Optimize cases with empty left or right operand */
975
if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
976
PyString_CheckExact(a) && PyString_CheckExact(b)) {
977
if (Py_SIZE(a) == 0) {
982
return (PyObject *)a;
984
size = Py_SIZE(a) + Py_SIZE(b);
985
/* Check that string sizes are not negative, to prevent an
986
overflow in cases where we are passed incorrectly-created
987
strings with negative lengths (due to a bug in other code).
989
if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
990
Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
991
PyErr_SetString(PyExc_OverflowError,
992
"strings are too large to concat");
996
/* Inline PyObject_NewVar */
997
if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) {
998
PyErr_SetString(PyExc_OverflowError,
999
"strings are too large to concat");
1002
op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
1004
return PyErr_NoMemory();
1005
PyObject_INIT_VAR(op, &PyString_Type, size);
1007
op->ob_sstate = SSTATE_NOT_INTERNED;
1008
Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1009
Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1010
op->ob_sval[size] = '\0';
1011
return (PyObject *) op;
1016
string_repeat(register PyStringObject *a, register Py_ssize_t n)
1018
register Py_ssize_t i;
1019
register Py_ssize_t j;
1020
register Py_ssize_t size;
1021
register PyStringObject *op;
1025
/* watch out for overflows: the size can overflow int,
1026
* and the # of bytes needed can overflow size_t
1028
size = Py_SIZE(a) * n;
1029
if (n && size / n != Py_SIZE(a)) {
1030
PyErr_SetString(PyExc_OverflowError,
1031
"repeated string is too long");
1034
if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
1036
return (PyObject *)a;
1038
nbytes = (size_t)size;
1039
if (nbytes + sizeof(PyStringObject) <= nbytes) {
1040
PyErr_SetString(PyExc_OverflowError,
1041
"repeated string is too long");
1044
op = (PyStringObject *)
1045
PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
1047
return PyErr_NoMemory();
1048
PyObject_INIT_VAR(op, &PyString_Type, size);
1050
op->ob_sstate = SSTATE_NOT_INTERNED;
1051
op->ob_sval[size] = '\0';
1052
if (Py_SIZE(a) == 1 && n > 0) {
1053
memset(op->ob_sval, a->ob_sval[0] , n);
1054
return (PyObject *) op;
1058
Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1062
j = (i <= size-i) ? i : size-i;
1063
Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1066
return (PyObject *) op;
1069
/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1072
string_slice(register PyStringObject *a, register Py_ssize_t i,
1073
register Py_ssize_t j)
1074
/* j -- may be negative! */
1079
j = 0; /* Avoid signed/unsigned bug in next line */
1082
if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
1083
/* It's the same as a */
1085
return (PyObject *)a;
1089
return PyString_FromStringAndSize(a->ob_sval + i, j-i);
1093
string_contains(PyObject *str_obj, PyObject *sub_obj)
1095
if (!PyString_CheckExact(sub_obj)) {
1096
#ifdef Py_USING_UNICODE
1097
if (PyUnicode_Check(sub_obj))
1098
return PyUnicode_Contains(str_obj, sub_obj);
1100
if (!PyString_Check(sub_obj)) {
1101
PyErr_Format(PyExc_TypeError,
1102
"'in <string>' requires string as left operand, "
1103
"not %.200s", Py_TYPE(sub_obj)->tp_name);
1108
return stringlib_contains_obj(str_obj, sub_obj);
1112
string_item(PyStringObject *a, register Py_ssize_t i)
1116
if (i < 0 || i >= Py_SIZE(a)) {
1117
PyErr_SetString(PyExc_IndexError, "string index out of range");
1120
pchar = a->ob_sval[i];
1121
v = (PyObject *)characters[pchar & UCHAR_MAX];
1123
v = PyString_FromStringAndSize(&pchar, 1);
1134
string_richcompare(PyStringObject *a, PyStringObject *b, int op)
1137
Py_ssize_t len_a, len_b;
1141
/* Make sure both arguments are strings. */
1142
if (!(PyString_Check(a) && PyString_Check(b))) {
1143
result = Py_NotImplemented;
1148
case Py_EQ:case Py_LE:case Py_GE:
1151
case Py_NE:case Py_LT:case Py_GT:
1157
/* Supporting Py_NE here as well does not save
1158
much time, since Py_NE is rarely used. */
1159
if (Py_SIZE(a) == Py_SIZE(b)
1160
&& (a->ob_sval[0] == b->ob_sval[0]
1161
&& memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1168
len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1169
min_len = (len_a < len_b) ? len_a : len_b;
1171
c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1173
c = memcmp(a->ob_sval, b->ob_sval, min_len);
1177
c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1179
case Py_LT: c = c < 0; break;
1180
case Py_LE: c = c <= 0; break;
1181
case Py_EQ: assert(0); break; /* unreachable */
1182
case Py_NE: c = c != 0; break;
1183
case Py_GT: c = c > 0; break;
1184
case Py_GE: c = c >= 0; break;
1186
result = Py_NotImplemented;
1189
result = c ? Py_True : Py_False;
1196
_PyString_Eq(PyObject *o1, PyObject *o2)
1198
PyStringObject *a = (PyStringObject*) o1;
1199
PyStringObject *b = (PyStringObject*) o2;
1200
return Py_SIZE(a) == Py_SIZE(b)
1201
&& *a->ob_sval == *b->ob_sval
1202
&& memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
1206
string_hash(PyStringObject *a)
1208
register Py_ssize_t len;
1209
register unsigned char *p;
1212
if (a->ob_shash != -1)
1215
p = (unsigned char *) a->ob_sval;
1218
x = (1000003*x) ^ *p++;
1227
string_subscript(PyStringObject* self, PyObject* item)
1229
if (PyIndex_Check(item)) {
1230
Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1231
if (i == -1 && PyErr_Occurred())
1234
i += PyString_GET_SIZE(self);
1235
return string_item(self, i);
1237
else if (PySlice_Check(item)) {
1238
Py_ssize_t start, stop, step, slicelength, cur, i;
1243
if (PySlice_GetIndicesEx((PySliceObject*)item,
1244
PyString_GET_SIZE(self),
1245
&start, &stop, &step, &slicelength) < 0) {
1249
if (slicelength <= 0) {
1250
return PyString_FromStringAndSize("", 0);
1252
else if (start == 0 && step == 1 &&
1253
slicelength == PyString_GET_SIZE(self) &&
1254
PyString_CheckExact(self)) {
1256
return (PyObject *)self;
1258
else if (step == 1) {
1259
return PyString_FromStringAndSize(
1260
PyString_AS_STRING(self) + start,
1264
source_buf = PyString_AsString((PyObject*)self);
1265
result_buf = (char *)PyMem_Malloc(slicelength);
1266
if (result_buf == NULL)
1267
return PyErr_NoMemory();
1269
for (cur = start, i = 0; i < slicelength;
1271
result_buf[i] = source_buf[cur];
1274
result = PyString_FromStringAndSize(result_buf,
1276
PyMem_Free(result_buf);
1281
PyErr_Format(PyExc_TypeError,
1282
"string indices must be integers, not %.200s",
1283
Py_TYPE(item)->tp_name);
1289
string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1292
PyErr_SetString(PyExc_SystemError,
1293
"accessing non-existent string segment");
1296
*ptr = (void *)self->ob_sval;
1297
return Py_SIZE(self);
1301
string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1303
PyErr_SetString(PyExc_TypeError,
1304
"Cannot use string as modifiable buffer");
1309
string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
1312
*lenp = Py_SIZE(self);
1317
string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
1320
PyErr_SetString(PyExc_SystemError,
1321
"accessing non-existent string segment");
1324
*ptr = self->ob_sval;
1325
return Py_SIZE(self);
1329
string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
1331
return PyBuffer_FillInfo(view, (PyObject*)self,
1332
(void *)self->ob_sval, Py_SIZE(self),
1336
static PySequenceMethods string_as_sequence = {
1337
(lenfunc)string_length, /*sq_length*/
1338
(binaryfunc)string_concat, /*sq_concat*/
1339
(ssizeargfunc)string_repeat, /*sq_repeat*/
1340
(ssizeargfunc)string_item, /*sq_item*/
1341
(ssizessizeargfunc)string_slice, /*sq_slice*/
1344
(objobjproc)string_contains /*sq_contains*/
1347
static PyMappingMethods string_as_mapping = {
1348
(lenfunc)string_length,
1349
(binaryfunc)string_subscript,
1353
static PyBufferProcs string_as_buffer = {
1354
(readbufferproc)string_buffer_getreadbuf,
1355
(writebufferproc)string_buffer_getwritebuf,
1356
(segcountproc)string_buffer_getsegcount,
1357
(charbufferproc)string_buffer_getcharbuf,
1358
(getbufferproc)string_buffer_getbuffer,
1365
#define RIGHTSTRIP 1
1368
/* Arrays indexed by above */
1369
static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1371
#define STRIPNAME(i) (stripformat[i]+3)
1374
/* Don't call if length < 2 */
1375
#define Py_STRING_MATCH(target, offset, pattern, length) \
1376
(target[offset] == pattern[0] && \
1377
target[offset+length-1] == pattern[length-1] && \
1378
!memcmp(target+offset+1, pattern+1, length-2) )
1381
/* Overallocate the initial list to reduce the number of reallocs for small
1382
split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1383
resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1384
text (roughly 11 words per line) and field delimited data (usually 1-10
1385
fields). For large strings the split algorithms are bandwidth limited
1386
so increasing the preallocation likely will not improve things.*/
1388
#define MAX_PREALLOC 12
1390
/* 5 splits gives 6 elements */
1391
#define PREALLOC_SIZE(maxsplit) \
1392
(maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1394
#define SPLIT_APPEND(data, left, right) \
1395
str = PyString_FromStringAndSize((data) + (left), \
1396
(right) - (left)); \
1399
if (PyList_Append(list, str)) { \
1406
#define SPLIT_ADD(data, left, right) { \
1407
str = PyString_FromStringAndSize((data) + (left), \
1408
(right) - (left)); \
1411
if (count < MAX_PREALLOC) { \
1412
PyList_SET_ITEM(list, count, str); \
1414
if (PyList_Append(list, str)) { \
1423
/* Always force the list to the expected size. */
1424
#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
1426
#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1427
#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1428
#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1429
#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1431
Py_LOCAL_INLINE(PyObject *)
1432
split_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
1434
const char *s = PyString_AS_STRING(self);
1435
Py_ssize_t i, j, count=0;
1437
PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
1444
while (maxsplit-- > 0) {
1445
SKIP_SPACE(s, i, len);
1448
SKIP_NONSPACE(s, i, len);
1449
if (j == 0 && i == len && PyString_CheckExact(self)) {
1450
/* No whitespace in self, so just use it as list[0] */
1452
PyList_SET_ITEM(list, 0, (PyObject *)self);
1460
/* Only occurs when maxsplit was reached */
1461
/* Skip any remaining whitespace and copy to end of string */
1462
SKIP_SPACE(s, i, len);
1464
SPLIT_ADD(s, i, len);
1466
FIX_PREALLOC_SIZE(list);
1473
Py_LOCAL_INLINE(PyObject *)
1474
split_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
1476
const char *s = PyString_AS_STRING(self);
1477
register Py_ssize_t i, j, count=0;
1479
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
1485
while ((j < len) && (maxcount-- > 0)) {
1487
/* I found that using memchr makes no difference */
1495
if (i == 0 && count == 0 && PyString_CheckExact(self)) {
1496
/* ch not in self, so just use self as list[0] */
1498
PyList_SET_ITEM(list, 0, (PyObject *)self);
1501
else if (i <= len) {
1502
SPLIT_ADD(s, i, len);
1504
FIX_PREALLOC_SIZE(list);
1512
PyDoc_STRVAR(split__doc__,
1513
"S.split([sep [,maxsplit]]) -> list of strings\n\
1515
Return a list of the words in the string S, using sep as the\n\
1516
delimiter string. If maxsplit is given, at most maxsplit\n\
1517
splits are done. If sep is not specified or is None, any\n\
1518
whitespace string is a separator and empty strings are removed\n\
1522
string_split(PyStringObject *self, PyObject *args)
1524
Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1525
Py_ssize_t maxsplit = -1, count=0;
1526
const char *s = PyString_AS_STRING(self), *sub;
1527
PyObject *list, *str, *subobj = Py_None;
1532
if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1535
maxsplit = PY_SSIZE_T_MAX;
1536
if (subobj == Py_None)
1537
return split_whitespace(self, len, maxsplit);
1538
if (PyString_Check(subobj)) {
1539
sub = PyString_AS_STRING(subobj);
1540
n = PyString_GET_SIZE(subobj);
1542
#ifdef Py_USING_UNICODE
1543
else if (PyUnicode_Check(subobj))
1544
return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1546
else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1550
PyErr_SetString(PyExc_ValueError, "empty separator");
1554
return split_char(self, len, sub[0], maxsplit);
1556
list = PyList_New(PREALLOC_SIZE(maxsplit));
1562
while (maxsplit-- > 0) {
1563
pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1572
while ((j+n <= len) && (maxsplit-- > 0)) {
1573
for (; j+n <= len; j++) {
1574
if (Py_STRING_MATCH(s, j, sub, n)) {
1582
SPLIT_ADD(s, i, len);
1583
FIX_PREALLOC_SIZE(list);
1591
PyDoc_STRVAR(partition__doc__,
1592
"S.partition(sep) -> (head, sep, tail)\n\
1594
Searches for the separator sep in S, and returns the part before it,\n\
1595
the separator itself, and the part after it. If the separator is not\n\
1596
found, returns S and two empty strings.");
1599
string_partition(PyStringObject *self, PyObject *sep_obj)
1604
if (PyString_Check(sep_obj)) {
1605
sep = PyString_AS_STRING(sep_obj);
1606
sep_len = PyString_GET_SIZE(sep_obj);
1608
#ifdef Py_USING_UNICODE
1609
else if (PyUnicode_Check(sep_obj))
1610
return PyUnicode_Partition((PyObject *) self, sep_obj);
1612
else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1615
return stringlib_partition(
1617
PyString_AS_STRING(self), PyString_GET_SIZE(self),
1618
sep_obj, sep, sep_len
1622
PyDoc_STRVAR(rpartition__doc__,
1623
"S.rpartition(sep) -> (tail, sep, head)\n\
1625
Searches for the separator sep in S, starting at the end of S, and returns\n\
1626
the part before it, the separator itself, and the part after it. If the\n\
1627
separator is not found, returns two empty strings and S.");
1630
string_rpartition(PyStringObject *self, PyObject *sep_obj)
1635
if (PyString_Check(sep_obj)) {
1636
sep = PyString_AS_STRING(sep_obj);
1637
sep_len = PyString_GET_SIZE(sep_obj);
1639
#ifdef Py_USING_UNICODE
1640
else if (PyUnicode_Check(sep_obj))
1641
return PyUnicode_RPartition((PyObject *) self, sep_obj);
1643
else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1646
return stringlib_rpartition(
1648
PyString_AS_STRING(self), PyString_GET_SIZE(self),
1649
sep_obj, sep, sep_len
1653
Py_LOCAL_INLINE(PyObject *)
1654
rsplit_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
1656
const char *s = PyString_AS_STRING(self);
1657
Py_ssize_t i, j, count=0;
1659
PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
1666
while (maxsplit-- > 0) {
1670
RSKIP_NONSPACE(s, i);
1671
if (j == len-1 && i < 0 && PyString_CheckExact(self)) {
1672
/* No whitespace in self, so just use it as list[0] */
1674
PyList_SET_ITEM(list, 0, (PyObject *)self);
1678
SPLIT_ADD(s, i + 1, j + 1);
1681
/* Only occurs when maxsplit was reached */
1682
/* Skip any remaining whitespace and copy to beginning of string */
1685
SPLIT_ADD(s, 0, i + 1);
1688
FIX_PREALLOC_SIZE(list);
1689
if (PyList_Reverse(list) < 0)
1697
Py_LOCAL_INLINE(PyObject *)
1698
rsplit_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
1700
const char *s = PyString_AS_STRING(self);
1701
register Py_ssize_t i, j, count=0;
1703
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
1709
while ((i >= 0) && (maxcount-- > 0)) {
1710
for (; i >= 0; i--) {
1712
SPLIT_ADD(s, i + 1, j + 1);
1718
if (i < 0 && count == 0 && PyString_CheckExact(self)) {
1719
/* ch not in self, so just use self as list[0] */
1721
PyList_SET_ITEM(list, 0, (PyObject *)self);
1725
SPLIT_ADD(s, 0, j + 1);
1727
FIX_PREALLOC_SIZE(list);
1728
if (PyList_Reverse(list) < 0)
1737
PyDoc_STRVAR(rsplit__doc__,
1738
"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1740
Return a list of the words in the string S, using sep as the\n\
1741
delimiter string, starting at the end of the string and working\n\
1742
to the front. If maxsplit is given, at most maxsplit splits are\n\
1743
done. If sep is not specified or is None, any whitespace string\n\
1747
string_rsplit(PyStringObject *self, PyObject *args)
1749
Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1750
Py_ssize_t maxsplit = -1, count=0;
1751
const char *s, *sub;
1752
PyObject *list, *str, *subobj = Py_None;
1754
if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1757
maxsplit = PY_SSIZE_T_MAX;
1758
if (subobj == Py_None)
1759
return rsplit_whitespace(self, len, maxsplit);
1760
if (PyString_Check(subobj)) {
1761
sub = PyString_AS_STRING(subobj);
1762
n = PyString_GET_SIZE(subobj);
1764
#ifdef Py_USING_UNICODE
1765
else if (PyUnicode_Check(subobj))
1766
return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1768
else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1772
PyErr_SetString(PyExc_ValueError, "empty separator");
1776
return rsplit_char(self, len, sub[0], maxsplit);
1778
list = PyList_New(PREALLOC_SIZE(maxsplit));
1785
s = PyString_AS_STRING(self);
1786
while ( (i >= 0) && (maxsplit-- > 0) ) {
1788
if (Py_STRING_MATCH(s, i, sub, n)) {
1789
SPLIT_ADD(s, i + n, j);
1797
FIX_PREALLOC_SIZE(list);
1798
if (PyList_Reverse(list) < 0)
1808
PyDoc_STRVAR(join__doc__,
1809
"S.join(sequence) -> string\n\
1811
Return a string which is the concatenation of the strings in the\n\
1812
sequence. The separator between elements is S.");
1815
string_join(PyStringObject *self, PyObject *orig)
1817
char *sep = PyString_AS_STRING(self);
1818
const Py_ssize_t seplen = PyString_GET_SIZE(self);
1819
PyObject *res = NULL;
1821
Py_ssize_t seqlen = 0;
1824
PyObject *seq, *item;
1826
seq = PySequence_Fast(orig, "");
1831
seqlen = PySequence_Size(seq);
1834
return PyString_FromString("");
1837
item = PySequence_Fast_GET_ITEM(seq, 0);
1838
if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1845
/* There are at least two things to join, or else we have a subclass
1846
* of the builtin types in the sequence.
1847
* Do a pre-pass to figure out the total amount of space we'll
1848
* need (sz), see whether any argument is absurd, and defer to
1849
* the Unicode join if appropriate.
1851
for (i = 0; i < seqlen; i++) {
1852
const size_t old_sz = sz;
1853
item = PySequence_Fast_GET_ITEM(seq, i);
1854
if (!PyString_Check(item)){
1855
#ifdef Py_USING_UNICODE
1856
if (PyUnicode_Check(item)) {
1857
/* Defer to Unicode join.
1858
* CAUTION: There's no gurantee that the
1859
* original sequence can be iterated over
1860
* again, so we must pass seq here.
1863
result = PyUnicode_Join((PyObject *)self, seq);
1868
PyErr_Format(PyExc_TypeError,
1869
"sequence item %zd: expected string,"
1871
i, Py_TYPE(item)->tp_name);
1875
sz += PyString_GET_SIZE(item);
1878
if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1879
PyErr_SetString(PyExc_OverflowError,
1880
"join() result is too long for a Python string");
1886
/* Allocate result space. */
1887
res = PyString_FromStringAndSize((char*)NULL, sz);
1893
/* Catenate everything. */
1894
p = PyString_AS_STRING(res);
1895
for (i = 0; i < seqlen; ++i) {
1897
item = PySequence_Fast_GET_ITEM(seq, i);
1898
n = PyString_GET_SIZE(item);
1899
Py_MEMCPY(p, PyString_AS_STRING(item), n);
1901
if (i < seqlen - 1) {
1902
Py_MEMCPY(p, sep, seplen);
1912
_PyString_Join(PyObject *sep, PyObject *x)
1914
assert(sep != NULL && PyString_Check(sep));
1916
return string_join((PyStringObject *)sep, x);
1919
Py_LOCAL_INLINE(void)
1920
string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1934
Py_LOCAL_INLINE(Py_ssize_t)
1935
string_find_internal(PyStringObject *self, PyObject *args, int dir)
1940
Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1941
PyObject *obj_start=Py_None, *obj_end=Py_None;
1943
if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1944
&obj_start, &obj_end))
1946
/* To support None in "start" and "end" arguments, meaning
1947
the same as if they were not passed.
1949
if (obj_start != Py_None)
1950
if (!_PyEval_SliceIndex(obj_start, &start))
1952
if (obj_end != Py_None)
1953
if (!_PyEval_SliceIndex(obj_end, &end))
1956
if (PyString_Check(subobj)) {
1957
sub = PyString_AS_STRING(subobj);
1958
sub_len = PyString_GET_SIZE(subobj);
1960
#ifdef Py_USING_UNICODE
1961
else if (PyUnicode_Check(subobj))
1962
return PyUnicode_Find(
1963
(PyObject *)self, subobj, start, end, dir);
1965
else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1966
/* XXX - the "expected a character buffer object" is pretty
1967
confusing for a non-expert. remap to something else ? */
1971
return stringlib_find_slice(
1972
PyString_AS_STRING(self), PyString_GET_SIZE(self),
1973
sub, sub_len, start, end);
1975
return stringlib_rfind_slice(
1976
PyString_AS_STRING(self), PyString_GET_SIZE(self),
1977
sub, sub_len, start, end);
1981
PyDoc_STRVAR(find__doc__,
1982
"S.find(sub [,start [,end]]) -> int\n\
1984
Return the lowest index in S where substring sub is found,\n\
1985
such that sub is contained within s[start:end]. Optional\n\
1986
arguments start and end are interpreted as in slice notation.\n\
1988
Return -1 on failure.");
1991
string_find(PyStringObject *self, PyObject *args)
1993
Py_ssize_t result = string_find_internal(self, args, +1);
1996
return PyInt_FromSsize_t(result);
2000
PyDoc_STRVAR(index__doc__,
2001
"S.index(sub [,start [,end]]) -> int\n\
2003
Like S.find() but raise ValueError when the substring is not found.");
2006
string_index(PyStringObject *self, PyObject *args)
2008
Py_ssize_t result = string_find_internal(self, args, +1);
2012
PyErr_SetString(PyExc_ValueError,
2013
"substring not found");
2016
return PyInt_FromSsize_t(result);
2020
PyDoc_STRVAR(rfind__doc__,
2021
"S.rfind(sub [,start [,end]]) -> int\n\
2023
Return the highest index in S where substring sub is found,\n\
2024
such that sub is contained within s[start:end]. Optional\n\
2025
arguments start and end are interpreted as in slice notation.\n\
2027
Return -1 on failure.");
2030
string_rfind(PyStringObject *self, PyObject *args)
2032
Py_ssize_t result = string_find_internal(self, args, -1);
2035
return PyInt_FromSsize_t(result);
2039
PyDoc_STRVAR(rindex__doc__,
2040
"S.rindex(sub [,start [,end]]) -> int\n\
2042
Like S.rfind() but raise ValueError when the substring is not found.");
2045
string_rindex(PyStringObject *self, PyObject *args)
2047
Py_ssize_t result = string_find_internal(self, args, -1);
2051
PyErr_SetString(PyExc_ValueError,
2052
"substring not found");
2055
return PyInt_FromSsize_t(result);
2059
Py_LOCAL_INLINE(PyObject *)
2060
do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
2062
char *s = PyString_AS_STRING(self);
2063
Py_ssize_t len = PyString_GET_SIZE(self);
2064
char *sep = PyString_AS_STRING(sepobj);
2065
Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
2069
if (striptype != RIGHTSTRIP) {
2070
while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2076
if (striptype != LEFTSTRIP) {
2079
} while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2083
if (i == 0 && j == len && PyString_CheckExact(self)) {
2085
return (PyObject*)self;
2088
return PyString_FromStringAndSize(s+i, j-i);
2092
Py_LOCAL_INLINE(PyObject *)
2093
do_strip(PyStringObject *self, int striptype)
2095
char *s = PyString_AS_STRING(self);
2096
Py_ssize_t len = PyString_GET_SIZE(self), i, j;
2099
if (striptype != RIGHTSTRIP) {
2100
while (i < len && isspace(Py_CHARMASK(s[i]))) {
2106
if (striptype != LEFTSTRIP) {
2109
} while (j >= i && isspace(Py_CHARMASK(s[j])));
2113
if (i == 0 && j == len && PyString_CheckExact(self)) {
2115
return (PyObject*)self;
2118
return PyString_FromStringAndSize(s+i, j-i);
2122
Py_LOCAL_INLINE(PyObject *)
2123
do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2125
PyObject *sep = NULL;
2127
if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
2130
if (sep != NULL && sep != Py_None) {
2131
if (PyString_Check(sep))
2132
return do_xstrip(self, striptype, sep);
2133
#ifdef Py_USING_UNICODE
2134
else if (PyUnicode_Check(sep)) {
2135
PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2139
res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2145
PyErr_Format(PyExc_TypeError,
2146
#ifdef Py_USING_UNICODE
2147
"%s arg must be None, str or unicode",
2149
"%s arg must be None or str",
2151
STRIPNAME(striptype));
2155
return do_strip(self, striptype);
2159
PyDoc_STRVAR(strip__doc__,
2160
"S.strip([chars]) -> string or unicode\n\
2162
Return a copy of the string S with leading and trailing\n\
2163
whitespace removed.\n\
2164
If chars is given and not None, remove characters in chars instead.\n\
2165
If chars is unicode, S will be converted to unicode before stripping");
2168
string_strip(PyStringObject *self, PyObject *args)
2170
if (PyTuple_GET_SIZE(args) == 0)
2171
return do_strip(self, BOTHSTRIP); /* Common case */
2173
return do_argstrip(self, BOTHSTRIP, args);
2177
PyDoc_STRVAR(lstrip__doc__,
2178
"S.lstrip([chars]) -> string or unicode\n\
2180
Return a copy of the string S with leading whitespace removed.\n\
2181
If chars is given and not None, remove characters in chars instead.\n\
2182
If chars is unicode, S will be converted to unicode before stripping");
2185
string_lstrip(PyStringObject *self, PyObject *args)
2187
if (PyTuple_GET_SIZE(args) == 0)
2188
return do_strip(self, LEFTSTRIP); /* Common case */
2190
return do_argstrip(self, LEFTSTRIP, args);
2194
PyDoc_STRVAR(rstrip__doc__,
2195
"S.rstrip([chars]) -> string or unicode\n\
2197
Return a copy of the string S with trailing whitespace removed.\n\
2198
If chars is given and not None, remove characters in chars instead.\n\
2199
If chars is unicode, S will be converted to unicode before stripping");
2202
string_rstrip(PyStringObject *self, PyObject *args)
2204
if (PyTuple_GET_SIZE(args) == 0)
2205
return do_strip(self, RIGHTSTRIP); /* Common case */
2207
return do_argstrip(self, RIGHTSTRIP, args);
2211
PyDoc_STRVAR(lower__doc__,
2212
"S.lower() -> string\n\
2214
Return a copy of the string S converted to lowercase.");
2216
/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2218
#define _tolower tolower
2222
string_lower(PyStringObject *self)
2225
Py_ssize_t i, n = PyString_GET_SIZE(self);
2228
newobj = PyString_FromStringAndSize(NULL, n);
2232
s = PyString_AS_STRING(newobj);
2234
Py_MEMCPY(s, PyString_AS_STRING(self), n);
2236
for (i = 0; i < n; i++) {
2237
int c = Py_CHARMASK(s[i]);
2245
PyDoc_STRVAR(upper__doc__,
2246
"S.upper() -> string\n\
2248
Return a copy of the string S converted to uppercase.");
2251
#define _toupper toupper
2255
string_upper(PyStringObject *self)
2258
Py_ssize_t i, n = PyString_GET_SIZE(self);
2261
newobj = PyString_FromStringAndSize(NULL, n);
2265
s = PyString_AS_STRING(newobj);
2267
Py_MEMCPY(s, PyString_AS_STRING(self), n);
2269
for (i = 0; i < n; i++) {
2270
int c = Py_CHARMASK(s[i]);
2278
PyDoc_STRVAR(title__doc__,
2279
"S.title() -> string\n\
2281
Return a titlecased version of S, i.e. words start with uppercase\n\
2282
characters, all remaining cased characters have lowercase.");
2285
string_title(PyStringObject *self)
2287
char *s = PyString_AS_STRING(self), *s_new;
2288
Py_ssize_t i, n = PyString_GET_SIZE(self);
2289
int previous_is_cased = 0;
2292
newobj = PyString_FromStringAndSize(NULL, n);
2295
s_new = PyString_AsString(newobj);
2296
for (i = 0; i < n; i++) {
2297
int c = Py_CHARMASK(*s++);
2299
if (!previous_is_cased)
2301
previous_is_cased = 1;
2302
} else if (isupper(c)) {
2303
if (previous_is_cased)
2305
previous_is_cased = 1;
2307
previous_is_cased = 0;
2313
PyDoc_STRVAR(capitalize__doc__,
2314
"S.capitalize() -> string\n\
2316
Return a copy of the string S with only its first character\n\
2320
string_capitalize(PyStringObject *self)
2322
char *s = PyString_AS_STRING(self), *s_new;
2323
Py_ssize_t i, n = PyString_GET_SIZE(self);
2326
newobj = PyString_FromStringAndSize(NULL, n);
2329
s_new = PyString_AsString(newobj);
2331
int c = Py_CHARMASK(*s++);
2333
*s_new = toupper(c);
2338
for (i = 1; i < n; i++) {
2339
int c = Py_CHARMASK(*s++);
2341
*s_new = tolower(c);
2350
PyDoc_STRVAR(count__doc__,
2351
"S.count(sub[, start[, end]]) -> int\n\
2353
Return the number of non-overlapping occurrences of substring sub in\n\
2354
string S[start:end]. Optional arguments start and end are interpreted\n\
2355
as in slice notation.");
2358
string_count(PyStringObject *self, PyObject *args)
2361
const char *str = PyString_AS_STRING(self), *sub;
2363
Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2365
if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2366
_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2369
if (PyString_Check(sub_obj)) {
2370
sub = PyString_AS_STRING(sub_obj);
2371
sub_len = PyString_GET_SIZE(sub_obj);
2373
#ifdef Py_USING_UNICODE
2374
else if (PyUnicode_Check(sub_obj)) {
2376
count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2380
return PyInt_FromSsize_t(count);
2383
else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2386
string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
2388
return PyInt_FromSsize_t(
2389
stringlib_count(str + start, end - start, sub, sub_len)
2393
PyDoc_STRVAR(swapcase__doc__,
2394
"S.swapcase() -> string\n\
2396
Return a copy of the string S with uppercase characters\n\
2397
converted to lowercase and vice versa.");
2400
string_swapcase(PyStringObject *self)
2402
char *s = PyString_AS_STRING(self), *s_new;
2403
Py_ssize_t i, n = PyString_GET_SIZE(self);
2406
newobj = PyString_FromStringAndSize(NULL, n);
2409
s_new = PyString_AsString(newobj);
2410
for (i = 0; i < n; i++) {
2411
int c = Py_CHARMASK(*s++);
2413
*s_new = toupper(c);
2415
else if (isupper(c)) {
2416
*s_new = tolower(c);
2426
PyDoc_STRVAR(translate__doc__,
2427
"S.translate(table [,deletechars]) -> string\n\
2429
Return a copy of the string S, where all characters occurring\n\
2430
in the optional argument deletechars are removed, and the\n\
2431
remaining characters have been mapped through the given\n\
2432
translation table, which must be a string of length 256.");
2435
string_translate(PyStringObject *self, PyObject *args)
2437
register char *input, *output;
2439
register Py_ssize_t i, c, changed = 0;
2440
PyObject *input_obj = (PyObject*)self;
2441
const char *output_start, *del_table=NULL;
2442
Py_ssize_t inlen, tablen, dellen = 0;
2444
int trans_table[256];
2445
PyObject *tableobj, *delobj = NULL;
2447
if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2448
&tableobj, &delobj))
2451
if (PyString_Check(tableobj)) {
2452
table = PyString_AS_STRING(tableobj);
2453
tablen = PyString_GET_SIZE(tableobj);
2455
else if (tableobj == Py_None) {
2459
#ifdef Py_USING_UNICODE
2460
else if (PyUnicode_Check(tableobj)) {
2461
/* Unicode .translate() does not support the deletechars
2462
parameter; instead a mapping to None will cause characters
2464
if (delobj != NULL) {
2465
PyErr_SetString(PyExc_TypeError,
2466
"deletions are implemented differently for unicode");
2469
return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2472
else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2475
if (tablen != 256) {
2476
PyErr_SetString(PyExc_ValueError,
2477
"translation table must be 256 characters long");
2481
if (delobj != NULL) {
2482
if (PyString_Check(delobj)) {
2483
del_table = PyString_AS_STRING(delobj);
2484
dellen = PyString_GET_SIZE(delobj);
2486
#ifdef Py_USING_UNICODE
2487
else if (PyUnicode_Check(delobj)) {
2488
PyErr_SetString(PyExc_TypeError,
2489
"deletions are implemented differently for unicode");
2493
else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2501
inlen = PyString_GET_SIZE(input_obj);
2502
result = PyString_FromStringAndSize((char *)NULL, inlen);
2505
output_start = output = PyString_AsString(result);
2506
input = PyString_AS_STRING(input_obj);
2508
if (dellen == 0 && table != NULL) {
2509
/* If no deletions are required, use faster code */
2510
for (i = inlen; --i >= 0; ) {
2511
c = Py_CHARMASK(*input++);
2512
if (Py_CHARMASK((*output++ = table[c])) != c)
2515
if (changed || !PyString_CheckExact(input_obj))
2518
Py_INCREF(input_obj);
2522
if (table == NULL) {
2523
for (i = 0; i < 256; i++)
2524
trans_table[i] = Py_CHARMASK(i);
2526
for (i = 0; i < 256; i++)
2527
trans_table[i] = Py_CHARMASK(table[i]);
2530
for (i = 0; i < dellen; i++)
2531
trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2533
for (i = inlen; --i >= 0; ) {
2534
c = Py_CHARMASK(*input++);
2535
if (trans_table[c] != -1)
2536
if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2540
if (!changed && PyString_CheckExact(input_obj)) {
2542
Py_INCREF(input_obj);
2545
/* Fix the size of the resulting string */
2547
_PyString_Resize(&result, output - output_start);
2555
/* find and count characters and substrings */
2557
#define findchar(target, target_len, c) \
2558
((char *)memchr((const void *)(target), c, target_len))
2560
/* String ops must return a string. */
2561
/* If the object is subclass of string, create a copy */
2562
Py_LOCAL(PyStringObject *)
2563
return_self(PyStringObject *self)
2565
if (PyString_CheckExact(self)) {
2569
return (PyStringObject *)PyString_FromStringAndSize(
2570
PyString_AS_STRING(self),
2571
PyString_GET_SIZE(self));
2574
Py_LOCAL_INLINE(Py_ssize_t)
2575
countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2578
const char *start=target;
2579
const char *end=target+target_len;
2581
while ( (start=findchar(start, end-start, c)) != NULL ) {
2583
if (count >= maxcount)
2590
Py_LOCAL(Py_ssize_t)
2591
findstring(const char *target, Py_ssize_t target_len,
2592
const char *pattern, Py_ssize_t pattern_len,
2598
start += target_len;
2602
if (end > target_len) {
2604
} else if (end < 0) {
2610
/* zero-length substrings always match at the first attempt */
2611
if (pattern_len == 0)
2612
return (direction > 0) ? start : end;
2616
if (direction < 0) {
2617
for (; end >= start; end--)
2618
if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2621
for (; start <= end; start++)
2622
if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2628
Py_LOCAL_INLINE(Py_ssize_t)
2629
countstring(const char *target, Py_ssize_t target_len,
2630
const char *pattern, Py_ssize_t pattern_len,
2633
int direction, Py_ssize_t maxcount)
2638
start += target_len;
2642
if (end > target_len) {
2644
} else if (end < 0) {
2650
/* zero-length substrings match everywhere */
2651
if (pattern_len == 0 || maxcount == 0) {
2652
if (target_len+1 < maxcount)
2653
return target_len+1;
2658
if (direction < 0) {
2659
for (; (end >= start); end--)
2660
if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2662
if (--maxcount <= 0) break;
2663
end -= pattern_len-1;
2666
for (; (start <= end); start++)
2667
if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2669
if (--maxcount <= 0)
2671
start += pattern_len-1;
2678
/* Algorithms for different cases of string replacement */
2680
/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2681
Py_LOCAL(PyStringObject *)
2682
replace_interleave(PyStringObject *self,
2683
const char *to_s, Py_ssize_t to_len,
2684
Py_ssize_t maxcount)
2686
char *self_s, *result_s;
2687
Py_ssize_t self_len, result_len;
2688
Py_ssize_t count, i, product;
2689
PyStringObject *result;
2691
self_len = PyString_GET_SIZE(self);
2693
/* 1 at the end plus 1 after every character */
2695
if (maxcount < count)
2698
/* Check for overflow */
2699
/* result_len = count * to_len + self_len; */
2700
product = count * to_len;
2701
if (product / to_len != count) {
2702
PyErr_SetString(PyExc_OverflowError,
2703
"replace string is too long");
2706
result_len = product + self_len;
2707
if (result_len < 0) {
2708
PyErr_SetString(PyExc_OverflowError,
2709
"replace string is too long");
2713
if (! (result = (PyStringObject *)
2714
PyString_FromStringAndSize(NULL, result_len)) )
2717
self_s = PyString_AS_STRING(self);
2718
result_s = PyString_AS_STRING(result);
2720
/* TODO: special case single character, which doesn't need memcpy */
2722
/* Lay the first one down (guaranteed this will occur) */
2723
Py_MEMCPY(result_s, to_s, to_len);
2727
for (i=0; i<count; i++) {
2728
*result_s++ = *self_s++;
2729
Py_MEMCPY(result_s, to_s, to_len);
2733
/* Copy the rest of the original string */
2734
Py_MEMCPY(result_s, self_s, self_len-i);
2739
/* Special case for deleting a single character */
2740
/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2741
Py_LOCAL(PyStringObject *)
2742
replace_delete_single_character(PyStringObject *self,
2743
char from_c, Py_ssize_t maxcount)
2745
char *self_s, *result_s;
2746
char *start, *next, *end;
2747
Py_ssize_t self_len, result_len;
2749
PyStringObject *result;
2751
self_len = PyString_GET_SIZE(self);
2752
self_s = PyString_AS_STRING(self);
2754
count = countchar(self_s, self_len, from_c, maxcount);
2756
return return_self(self);
2759
result_len = self_len - count; /* from_len == 1 */
2760
assert(result_len>=0);
2762
if ( (result = (PyStringObject *)
2763
PyString_FromStringAndSize(NULL, result_len)) == NULL)
2765
result_s = PyString_AS_STRING(result);
2768
end = self_s + self_len;
2769
while (count-- > 0) {
2770
next = findchar(start, end-start, from_c);
2773
Py_MEMCPY(result_s, start, next-start);
2774
result_s += (next-start);
2777
Py_MEMCPY(result_s, start, end-start);
2782
/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2784
Py_LOCAL(PyStringObject *)
2785
replace_delete_substring(PyStringObject *self,
2786
const char *from_s, Py_ssize_t from_len,
2787
Py_ssize_t maxcount) {
2788
char *self_s, *result_s;
2789
char *start, *next, *end;
2790
Py_ssize_t self_len, result_len;
2791
Py_ssize_t count, offset;
2792
PyStringObject *result;
2794
self_len = PyString_GET_SIZE(self);
2795
self_s = PyString_AS_STRING(self);
2797
count = countstring(self_s, self_len,
2804
return return_self(self);
2807
result_len = self_len - (count * from_len);
2808
assert (result_len>=0);
2810
if ( (result = (PyStringObject *)
2811
PyString_FromStringAndSize(NULL, result_len)) == NULL )
2814
result_s = PyString_AS_STRING(result);
2817
end = self_s + self_len;
2818
while (count-- > 0) {
2819
offset = findstring(start, end-start,
2821
0, end-start, FORWARD);
2824
next = start + offset;
2826
Py_MEMCPY(result_s, start, next-start);
2828
result_s += (next-start);
2829
start = next+from_len;
2831
Py_MEMCPY(result_s, start, end-start);
2835
/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2836
Py_LOCAL(PyStringObject *)
2837
replace_single_character_in_place(PyStringObject *self,
2838
char from_c, char to_c,
2839
Py_ssize_t maxcount)
2841
char *self_s, *result_s, *start, *end, *next;
2842
Py_ssize_t self_len;
2843
PyStringObject *result;
2845
/* The result string will be the same size */
2846
self_s = PyString_AS_STRING(self);
2847
self_len = PyString_GET_SIZE(self);
2849
next = findchar(self_s, self_len, from_c);
2852
/* No matches; return the original string */
2853
return return_self(self);
2856
/* Need to make a new string */
2857
result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2860
result_s = PyString_AS_STRING(result);
2861
Py_MEMCPY(result_s, self_s, self_len);
2863
/* change everything in-place, starting with this one */
2864
start = result_s + (next-self_s);
2867
end = result_s + self_len;
2869
while (--maxcount > 0) {
2870
next = findchar(start, end-start, from_c);
2880
/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2881
Py_LOCAL(PyStringObject *)
2882
replace_substring_in_place(PyStringObject *self,
2883
const char *from_s, Py_ssize_t from_len,
2884
const char *to_s, Py_ssize_t to_len,
2885
Py_ssize_t maxcount)
2887
char *result_s, *start, *end;
2889
Py_ssize_t self_len, offset;
2890
PyStringObject *result;
2892
/* The result string will be the same size */
2894
self_s = PyString_AS_STRING(self);
2895
self_len = PyString_GET_SIZE(self);
2897
offset = findstring(self_s, self_len,
2899
0, self_len, FORWARD);
2901
/* No matches; return the original string */
2902
return return_self(self);
2905
/* Need to make a new string */
2906
result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2909
result_s = PyString_AS_STRING(result);
2910
Py_MEMCPY(result_s, self_s, self_len);
2912
/* change everything in-place, starting with this one */
2913
start = result_s + offset;
2914
Py_MEMCPY(start, to_s, from_len);
2916
end = result_s + self_len;
2918
while ( --maxcount > 0) {
2919
offset = findstring(start, end-start,
2921
0, end-start, FORWARD);
2924
Py_MEMCPY(start+offset, to_s, from_len);
2925
start += offset+from_len;
2931
/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2932
Py_LOCAL(PyStringObject *)
2933
replace_single_character(PyStringObject *self,
2935
const char *to_s, Py_ssize_t to_len,
2936
Py_ssize_t maxcount)
2938
char *self_s, *result_s;
2939
char *start, *next, *end;
2940
Py_ssize_t self_len, result_len;
2941
Py_ssize_t count, product;
2942
PyStringObject *result;
2944
self_s = PyString_AS_STRING(self);
2945
self_len = PyString_GET_SIZE(self);
2947
count = countchar(self_s, self_len, from_c, maxcount);
2949
/* no matches, return unchanged */
2950
return return_self(self);
2953
/* use the difference between current and new, hence the "-1" */
2954
/* result_len = self_len + count * (to_len-1) */
2955
product = count * (to_len-1);
2956
if (product / (to_len-1) != count) {
2957
PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2960
result_len = self_len + product;
2961
if (result_len < 0) {
2962
PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2966
if ( (result = (PyStringObject *)
2967
PyString_FromStringAndSize(NULL, result_len)) == NULL)
2969
result_s = PyString_AS_STRING(result);
2972
end = self_s + self_len;
2973
while (count-- > 0) {
2974
next = findchar(start, end-start, from_c);
2978
if (next == start) {
2979
/* replace with the 'to' */
2980
Py_MEMCPY(result_s, to_s, to_len);
2984
/* copy the unchanged old then the 'to' */
2985
Py_MEMCPY(result_s, start, next-start);
2986
result_s += (next-start);
2987
Py_MEMCPY(result_s, to_s, to_len);
2992
/* Copy the remainder of the remaining string */
2993
Py_MEMCPY(result_s, start, end-start);
2998
/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2999
Py_LOCAL(PyStringObject *)
3000
replace_substring(PyStringObject *self,
3001
const char *from_s, Py_ssize_t from_len,
3002
const char *to_s, Py_ssize_t to_len,
3003
Py_ssize_t maxcount) {
3004
char *self_s, *result_s;
3005
char *start, *next, *end;
3006
Py_ssize_t self_len, result_len;
3007
Py_ssize_t count, offset, product;
3008
PyStringObject *result;
3010
self_s = PyString_AS_STRING(self);
3011
self_len = PyString_GET_SIZE(self);
3013
count = countstring(self_s, self_len,
3015
0, self_len, FORWARD, maxcount);
3017
/* no matches, return unchanged */
3018
return return_self(self);
3021
/* Check for overflow */
3022
/* result_len = self_len + count * (to_len-from_len) */
3023
product = count * (to_len-from_len);
3024
if (product / (to_len-from_len) != count) {
3025
PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3028
result_len = self_len + product;
3029
if (result_len < 0) {
3030
PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3034
if ( (result = (PyStringObject *)
3035
PyString_FromStringAndSize(NULL, result_len)) == NULL)
3037
result_s = PyString_AS_STRING(result);
3040
end = self_s + self_len;
3041
while (count-- > 0) {
3042
offset = findstring(start, end-start,
3044
0, end-start, FORWARD);
3047
next = start+offset;
3048
if (next == start) {
3049
/* replace with the 'to' */
3050
Py_MEMCPY(result_s, to_s, to_len);
3054
/* copy the unchanged old then the 'to' */
3055
Py_MEMCPY(result_s, start, next-start);
3056
result_s += (next-start);
3057
Py_MEMCPY(result_s, to_s, to_len);
3059
start = next+from_len;
3062
/* Copy the remainder of the remaining string */
3063
Py_MEMCPY(result_s, start, end-start);
3069
Py_LOCAL(PyStringObject *)
3070
replace(PyStringObject *self,
3071
const char *from_s, Py_ssize_t from_len,
3072
const char *to_s, Py_ssize_t to_len,
3073
Py_ssize_t maxcount)
3076
maxcount = PY_SSIZE_T_MAX;
3077
} else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
3078
/* nothing to do; return the original string */
3079
return return_self(self);
3082
if (maxcount == 0 ||
3083
(from_len == 0 && to_len == 0)) {
3084
/* nothing to do; return the original string */
3085
return return_self(self);
3088
/* Handle zero-length special cases */
3090
if (from_len == 0) {
3091
/* insert the 'to' string everywhere. */
3092
/* >>> "Python".replace("", ".") */
3093
/* '.P.y.t.h.o.n.' */
3094
return replace_interleave(self, to_s, to_len, maxcount);
3097
/* Except for "".replace("", "A") == "A" there is no way beyond this */
3098
/* point for an empty self string to generate a non-empty string */
3099
/* Special case so the remaining code always gets a non-empty string */
3100
if (PyString_GET_SIZE(self) == 0) {
3101
return return_self(self);
3105
/* delete all occurances of 'from' string */
3106
if (from_len == 1) {
3107
return replace_delete_single_character(
3108
self, from_s[0], maxcount);
3110
return replace_delete_substring(self, from_s, from_len, maxcount);
3114
/* Handle special case where both strings have the same length */
3116
if (from_len == to_len) {
3117
if (from_len == 1) {
3118
return replace_single_character_in_place(
3124
return replace_substring_in_place(
3125
self, from_s, from_len, to_s, to_len, maxcount);
3129
/* Otherwise use the more generic algorithms */
3130
if (from_len == 1) {
3131
return replace_single_character(self, from_s[0],
3132
to_s, to_len, maxcount);
3134
/* len('from')>=2, len('to')>=1 */
3135
return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
3139
PyDoc_STRVAR(replace__doc__,
3140
"S.replace (old, new[, count]) -> string\n\
3142
Return a copy of string S with all occurrences of substring\n\
3143
old replaced by new. If the optional argument count is\n\
3144
given, only the first count occurrences are replaced.");
3147
string_replace(PyStringObject *self, PyObject *args)
3149
Py_ssize_t count = -1;
3150
PyObject *from, *to;
3151
const char *from_s, *to_s;
3152
Py_ssize_t from_len, to_len;
3154
if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
3157
if (PyString_Check(from)) {
3158
from_s = PyString_AS_STRING(from);
3159
from_len = PyString_GET_SIZE(from);
3161
#ifdef Py_USING_UNICODE
3162
if (PyUnicode_Check(from))
3163
return PyUnicode_Replace((PyObject *)self,
3166
else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
3169
if (PyString_Check(to)) {
3170
to_s = PyString_AS_STRING(to);
3171
to_len = PyString_GET_SIZE(to);
3173
#ifdef Py_USING_UNICODE
3174
else if (PyUnicode_Check(to))
3175
return PyUnicode_Replace((PyObject *)self,
3178
else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
3181
return (PyObject *)replace((PyStringObject *) self,
3183
to_s, to_len, count);
3188
/* Matches the end (direction >= 0) or start (direction < 0) of self
3189
* against substr, using the start and end arguments. Returns
3190
* -1 on error, 0 if not found and 1 if found.
3193
_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3194
Py_ssize_t end, int direction)
3196
Py_ssize_t len = PyString_GET_SIZE(self);
3201
if (PyString_Check(substr)) {
3202
sub = PyString_AS_STRING(substr);
3203
slen = PyString_GET_SIZE(substr);
3205
#ifdef Py_USING_UNICODE
3206
else if (PyUnicode_Check(substr))
3207
return PyUnicode_Tailmatch((PyObject *)self,
3208
substr, start, end, direction);
3210
else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3212
str = PyString_AS_STRING(self);
3214
string_adjust_indices(&start, &end, len);
3216
if (direction < 0) {
3218
if (start+slen > len)
3222
if (end-start < slen || start > len)
3225
if (end-slen > start)
3228
if (end-start >= slen)
3229
return ! memcmp(str+start, sub, slen);
3234
PyDoc_STRVAR(startswith__doc__,
3235
"S.startswith(prefix[, start[, end]]) -> bool\n\
3237
Return True if S starts with the specified prefix, False otherwise.\n\
3238
With optional start, test S beginning at that position.\n\
3239
With optional end, stop comparing S at that position.\n\
3240
prefix can also be a tuple of strings to try.");
3243
string_startswith(PyStringObject *self, PyObject *args)
3245
Py_ssize_t start = 0;
3246
Py_ssize_t end = PY_SSIZE_T_MAX;
3250
if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3251
_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3253
if (PyTuple_Check(subobj)) {
3255
for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3256
result = _string_tailmatch(self,
3257
PyTuple_GET_ITEM(subobj, i),
3267
result = _string_tailmatch(self, subobj, start, end, -1);
3271
return PyBool_FromLong(result);
3275
PyDoc_STRVAR(endswith__doc__,
3276
"S.endswith(suffix[, start[, end]]) -> bool\n\
3278
Return True if S ends with the specified suffix, False otherwise.\n\
3279
With optional start, test S beginning at that position.\n\
3280
With optional end, stop comparing S at that position.\n\
3281
suffix can also be a tuple of strings to try.");
3284
string_endswith(PyStringObject *self, PyObject *args)
3286
Py_ssize_t start = 0;
3287
Py_ssize_t end = PY_SSIZE_T_MAX;
3291
if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3292
_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3294
if (PyTuple_Check(subobj)) {
3296
for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3297
result = _string_tailmatch(self,
3298
PyTuple_GET_ITEM(subobj, i),
3308
result = _string_tailmatch(self, subobj, start, end, +1);
3312
return PyBool_FromLong(result);
3316
PyDoc_STRVAR(encode__doc__,
3317
"S.encode([encoding[,errors]]) -> object\n\
3319
Encodes S using the codec registered for encoding. encoding defaults\n\
3320
to the default encoding. errors may be given to set a different error\n\
3321
handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3322
a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3323
'xmlcharrefreplace' as well as any other name registered with\n\
3324
codecs.register_error that is able to handle UnicodeEncodeErrors.");
3327
string_encode(PyStringObject *self, PyObject *args)
3329
char *encoding = NULL;
3330
char *errors = NULL;
3333
if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3335
v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
3338
if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3339
PyErr_Format(PyExc_TypeError,
3340
"encoder did not return a string/unicode object "
3342
Py_TYPE(v)->tp_name);
3353
PyDoc_STRVAR(decode__doc__,
3354
"S.decode([encoding[,errors]]) -> object\n\
3356
Decodes S using the codec registered for encoding. encoding defaults\n\
3357
to the default encoding. errors may be given to set a different error\n\
3358
handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3359
a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3360
as well as any other name registerd with codecs.register_error that is\n\
3361
able to handle UnicodeDecodeErrors.");
3364
string_decode(PyStringObject *self, PyObject *args)
3366
char *encoding = NULL;
3367
char *errors = NULL;
3370
if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3372
v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
3375
if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3376
PyErr_Format(PyExc_TypeError,
3377
"decoder did not return a string/unicode object "
3379
Py_TYPE(v)->tp_name);
3390
PyDoc_STRVAR(expandtabs__doc__,
3391
"S.expandtabs([tabsize]) -> string\n\
3393
Return a copy of S where all tab characters are expanded using spaces.\n\
3394
If tabsize is not given, a tab size of 8 characters is assumed.");
3397
string_expandtabs(PyStringObject *self, PyObject *args)
3399
const char *e, *p, *qe;
3401
Py_ssize_t i, j, incr;
3405
if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3408
/* First pass: determine size of output string */
3409
i = 0; /* chars up to and including most recent \n or \r */
3410
j = 0; /* chars since most recent \n or \r (use in tab calculations) */
3411
e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3412
for (p = PyString_AS_STRING(self); p < e; p++)
3415
incr = tabsize - (j % tabsize);
3416
if (j > PY_SSIZE_T_MAX - incr)
3422
if (j > PY_SSIZE_T_MAX - 1)
3425
if (*p == '\n' || *p == '\r') {
3426
if (i > PY_SSIZE_T_MAX - j)
3433
if (i > PY_SSIZE_T_MAX - j)
3436
/* Second pass: create output string and fill it */
3437
u = PyString_FromStringAndSize(NULL, i + j);
3441
j = 0; /* same as in first pass */
3442
q = PyString_AS_STRING(u); /* next output char */
3443
qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
3445
for (p = PyString_AS_STRING(self); p < e; p++)
3448
i = tabsize - (j % tabsize);
3462
if (*p == '\n' || *p == '\r')
3471
PyErr_SetString(PyExc_OverflowError, "new string is too long");
3475
Py_LOCAL_INLINE(PyObject *)
3476
pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
3485
if (left == 0 && right == 0 && PyString_CheckExact(self)) {
3487
return (PyObject *)self;
3490
u = PyString_FromStringAndSize(NULL,
3491
left + PyString_GET_SIZE(self) + right);
3494
memset(PyString_AS_STRING(u), fill, left);
3495
Py_MEMCPY(PyString_AS_STRING(u) + left,
3496
PyString_AS_STRING(self),
3497
PyString_GET_SIZE(self));
3499
memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3506
PyDoc_STRVAR(ljust__doc__,
3507
"S.ljust(width[, fillchar]) -> string\n"
3509
"Return S left justified in a string of length width. Padding is\n"
3510
"done using the specified fill character (default is a space).");
3513
string_ljust(PyStringObject *self, PyObject *args)
3516
char fillchar = ' ';
3518
if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
3521
if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3523
return (PyObject*) self;
3526
return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
3530
PyDoc_STRVAR(rjust__doc__,
3531
"S.rjust(width[, fillchar]) -> string\n"
3533
"Return S right justified in a string of length width. Padding is\n"
3534
"done using the specified fill character (default is a space)");
3537
string_rjust(PyStringObject *self, PyObject *args)
3540
char fillchar = ' ';
3542
if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
3545
if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3547
return (PyObject*) self;
3550
return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
3554
PyDoc_STRVAR(center__doc__,
3555
"S.center(width[, fillchar]) -> string\n"
3557
"Return S centered in a string of length width. Padding is\n"
3558
"done using the specified fill character (default is a space)");
3561
string_center(PyStringObject *self, PyObject *args)
3563
Py_ssize_t marg, left;
3565
char fillchar = ' ';
3567
if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
3570
if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3572
return (PyObject*) self;
3575
marg = width - PyString_GET_SIZE(self);
3576
left = marg / 2 + (marg & width & 1);
3578
return pad(self, left, marg - left, fillchar);
3581
PyDoc_STRVAR(zfill__doc__,
3582
"S.zfill(width) -> string\n"
3584
"Pad a numeric string S with zeros on the left, to fill a field\n"
3585
"of the specified width. The string S is never truncated.");
3588
string_zfill(PyStringObject *self, PyObject *args)
3595
if (!PyArg_ParseTuple(args, "n:zfill", &width))
3598
if (PyString_GET_SIZE(self) >= width) {
3599
if (PyString_CheckExact(self)) {
3601
return (PyObject*) self;
3604
return PyString_FromStringAndSize(
3605
PyString_AS_STRING(self),
3606
PyString_GET_SIZE(self)
3610
fill = width - PyString_GET_SIZE(self);
3612
s = pad(self, fill, 0, '0');
3617
p = PyString_AS_STRING(s);
3618
if (p[fill] == '+' || p[fill] == '-') {
3619
/* move sign to beginning of string */
3624
return (PyObject*) s;
3627
PyDoc_STRVAR(isspace__doc__,
3628
"S.isspace() -> bool\n\
3630
Return True if all characters in S are whitespace\n\
3631
and there is at least one character in S, False otherwise.");
3634
string_isspace(PyStringObject *self)
3636
register const unsigned char *p
3637
= (unsigned char *) PyString_AS_STRING(self);
3638
register const unsigned char *e;
3640
/* Shortcut for single character strings */
3641
if (PyString_GET_SIZE(self) == 1 &&
3643
return PyBool_FromLong(1);
3645
/* Special case for empty strings */
3646
if (PyString_GET_SIZE(self) == 0)
3647
return PyBool_FromLong(0);
3649
e = p + PyString_GET_SIZE(self);
3650
for (; p < e; p++) {
3652
return PyBool_FromLong(0);
3654
return PyBool_FromLong(1);
3658
PyDoc_STRVAR(isalpha__doc__,
3659
"S.isalpha() -> bool\n\
3661
Return True if all characters in S are alphabetic\n\
3662
and there is at least one character in S, False otherwise.");
3665
string_isalpha(PyStringObject *self)
3667
register const unsigned char *p
3668
= (unsigned char *) PyString_AS_STRING(self);
3669
register const unsigned char *e;
3671
/* Shortcut for single character strings */
3672
if (PyString_GET_SIZE(self) == 1 &&
3674
return PyBool_FromLong(1);
3676
/* Special case for empty strings */
3677
if (PyString_GET_SIZE(self) == 0)
3678
return PyBool_FromLong(0);
3680
e = p + PyString_GET_SIZE(self);
3681
for (; p < e; p++) {
3683
return PyBool_FromLong(0);
3685
return PyBool_FromLong(1);
3689
PyDoc_STRVAR(isalnum__doc__,
3690
"S.isalnum() -> bool\n\
3692
Return True if all characters in S are alphanumeric\n\
3693
and there is at least one character in S, False otherwise.");
3696
string_isalnum(PyStringObject *self)
3698
register const unsigned char *p
3699
= (unsigned char *) PyString_AS_STRING(self);
3700
register const unsigned char *e;
3702
/* Shortcut for single character strings */
3703
if (PyString_GET_SIZE(self) == 1 &&
3705
return PyBool_FromLong(1);
3707
/* Special case for empty strings */
3708
if (PyString_GET_SIZE(self) == 0)
3709
return PyBool_FromLong(0);
3711
e = p + PyString_GET_SIZE(self);
3712
for (; p < e; p++) {
3714
return PyBool_FromLong(0);
3716
return PyBool_FromLong(1);
3720
PyDoc_STRVAR(isdigit__doc__,
3721
"S.isdigit() -> bool\n\
3723
Return True if all characters in S are digits\n\
3724
and there is at least one character in S, False otherwise.");
3727
string_isdigit(PyStringObject *self)
3729
register const unsigned char *p
3730
= (unsigned char *) PyString_AS_STRING(self);
3731
register const unsigned char *e;
3733
/* Shortcut for single character strings */
3734
if (PyString_GET_SIZE(self) == 1 &&
3736
return PyBool_FromLong(1);
3738
/* Special case for empty strings */
3739
if (PyString_GET_SIZE(self) == 0)
3740
return PyBool_FromLong(0);
3742
e = p + PyString_GET_SIZE(self);
3743
for (; p < e; p++) {
3745
return PyBool_FromLong(0);
3747
return PyBool_FromLong(1);
3751
PyDoc_STRVAR(islower__doc__,
3752
"S.islower() -> bool\n\
3754
Return True if all cased characters in S are lowercase and there is\n\
3755
at least one cased character in S, False otherwise.");
3758
string_islower(PyStringObject *self)
3760
register const unsigned char *p
3761
= (unsigned char *) PyString_AS_STRING(self);
3762
register const unsigned char *e;
3765
/* Shortcut for single character strings */
3766
if (PyString_GET_SIZE(self) == 1)
3767
return PyBool_FromLong(islower(*p) != 0);
3769
/* Special case for empty strings */
3770
if (PyString_GET_SIZE(self) == 0)
3771
return PyBool_FromLong(0);
3773
e = p + PyString_GET_SIZE(self);
3775
for (; p < e; p++) {
3777
return PyBool_FromLong(0);
3778
else if (!cased && islower(*p))
3781
return PyBool_FromLong(cased);
3785
PyDoc_STRVAR(isupper__doc__,
3786
"S.isupper() -> bool\n\
3788
Return True if all cased characters in S are uppercase and there is\n\
3789
at least one cased character in S, False otherwise.");
3792
string_isupper(PyStringObject *self)
3794
register const unsigned char *p
3795
= (unsigned char *) PyString_AS_STRING(self);
3796
register const unsigned char *e;
3799
/* Shortcut for single character strings */
3800
if (PyString_GET_SIZE(self) == 1)
3801
return PyBool_FromLong(isupper(*p) != 0);
3803
/* Special case for empty strings */
3804
if (PyString_GET_SIZE(self) == 0)
3805
return PyBool_FromLong(0);
3807
e = p + PyString_GET_SIZE(self);
3809
for (; p < e; p++) {
3811
return PyBool_FromLong(0);
3812
else if (!cased && isupper(*p))
3815
return PyBool_FromLong(cased);
3819
PyDoc_STRVAR(istitle__doc__,
3820
"S.istitle() -> bool\n\
3822
Return True if S is a titlecased string and there is at least one\n\
3823
character in S, i.e. uppercase characters may only follow uncased\n\
3824
characters and lowercase characters only cased ones. Return False\n\
3828
string_istitle(PyStringObject *self, PyObject *uncased)
3830
register const unsigned char *p
3831
= (unsigned char *) PyString_AS_STRING(self);
3832
register const unsigned char *e;
3833
int cased, previous_is_cased;
3835
/* Shortcut for single character strings */
3836
if (PyString_GET_SIZE(self) == 1)
3837
return PyBool_FromLong(isupper(*p) != 0);
3839
/* Special case for empty strings */
3840
if (PyString_GET_SIZE(self) == 0)
3841
return PyBool_FromLong(0);
3843
e = p + PyString_GET_SIZE(self);
3845
previous_is_cased = 0;
3846
for (; p < e; p++) {
3847
register const unsigned char ch = *p;
3850
if (previous_is_cased)
3851
return PyBool_FromLong(0);
3852
previous_is_cased = 1;
3855
else if (islower(ch)) {
3856
if (!previous_is_cased)
3857
return PyBool_FromLong(0);
3858
previous_is_cased = 1;
3862
previous_is_cased = 0;
3864
return PyBool_FromLong(cased);
3868
PyDoc_STRVAR(splitlines__doc__,
3869
"S.splitlines([keepends]) -> list of strings\n\
3871
Return a list of the lines in S, breaking at line boundaries.\n\
3872
Line breaks are not included in the resulting list unless keepends\n\
3873
is given and true.");
3876
string_splitlines(PyStringObject *self, PyObject *args)
3878
register Py_ssize_t i;
3879
register Py_ssize_t j;
3886
if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
3889
data = PyString_AS_STRING(self);
3890
len = PyString_GET_SIZE(self);
3892
/* This does not use the preallocated list because splitlines is
3893
usually run with hundreds of newlines. The overhead of
3894
switching between PyList_SET_ITEM and append causes about a
3895
2-3% slowdown for that common case. A smarter implementation
3896
could move the if check out, so the SET_ITEMs are done first
3897
and the appends only done when the prealloc buffer is full.
3898
That's too much work for little gain.*/
3900
list = PyList_New(0);
3904
for (i = j = 0; i < len; ) {
3907
/* Find a line and append it */
3908
while (i < len && data[i] != '\n' && data[i] != '\r')
3911
/* Skip the line break reading CRLF as one line break */
3914
if (data[i] == '\r' && i + 1 < len &&
3922
SPLIT_APPEND(data, j, eol);
3926
SPLIT_APPEND(data, j, len);
3936
PyDoc_STRVAR(sizeof__doc__,
3937
"S.__sizeof__() -> size of S in memory, in bytes");
3940
string_sizeof(PyStringObject *v)
3943
res = sizeof(PyStringObject) + v->ob_size * v->ob_type->tp_itemsize;
3944
return PyInt_FromSsize_t(res);
3950
#undef PREALLOC_SIZE
3953
string_getnewargs(PyStringObject *v)
3955
return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
3959
#include "stringlib/string_format.h"
3961
PyDoc_STRVAR(format__doc__,
3962
"S.format(*args, **kwargs) -> unicode\n\
3967
string__format__(PyObject* self, PyObject* args)
3969
PyObject *format_spec;
3970
PyObject *result = NULL;
3971
PyObject *tmp = NULL;
3973
/* If 2.x, convert format_spec to the same type as value */
3974
/* This is to allow things like u''.format('') */
3975
if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
3977
if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
3978
PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3979
"or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3982
tmp = PyObject_Str(format_spec);
3987
result = _PyBytes_FormatAdvanced(self,
3988
PyString_AS_STRING(format_spec),
3989
PyString_GET_SIZE(format_spec));
3995
PyDoc_STRVAR(p_format__doc__,
3996
"S.__format__(format_spec) -> unicode\n\
4002
string_methods[] = {
4003
/* Counterparts of the obsolete stropmodule functions; except
4004
string.maketrans(). */
4005
{"join", (PyCFunction)string_join, METH_O, join__doc__},
4006
{"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
4007
{"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
4008
{"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
4009
{"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
4010
{"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
4011
{"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
4012
{"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
4013
{"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
4014
{"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
4015
{"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
4016
{"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
4017
{"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
4019
{"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
4020
{"endswith", (PyCFunction)string_endswith, METH_VARARGS,
4022
{"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
4023
{"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
4024
{"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
4025
{"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
4026
{"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
4027
{"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
4028
{"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
4029
{"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
4030
{"rpartition", (PyCFunction)string_rpartition, METH_O,
4032
{"startswith", (PyCFunction)string_startswith, METH_VARARGS,
4034
{"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
4035
{"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
4037
{"translate", (PyCFunction)string_translate, METH_VARARGS,
4039
{"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
4040
{"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
4041
{"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
4042
{"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
4043
{"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
4044
{"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
4045
{"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
4046
{"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
4047
{"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
4048
{"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
4049
{"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
4050
{"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
4052
{"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
4054
{"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
4056
{"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
4057
{NULL, NULL} /* sentinel */
4061
str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
4064
string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4067
static char *kwlist[] = {"object", 0};
4069
if (type != &PyString_Type)
4070
return str_subtype_new(type, args, kwds);
4071
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
4074
return PyString_FromString("");
4075
return PyObject_Str(x);
4079
str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4081
PyObject *tmp, *pnew;
4084
assert(PyType_IsSubtype(type, &PyString_Type));
4085
tmp = string_new(&PyString_Type, args, kwds);
4088
assert(PyString_CheckExact(tmp));
4089
n = PyString_GET_SIZE(tmp);
4090
pnew = type->tp_alloc(type, n);
4092
Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
4093
((PyStringObject *)pnew)->ob_shash =
4094
((PyStringObject *)tmp)->ob_shash;
4095
((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
4102
basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4104
PyErr_SetString(PyExc_TypeError,
4105
"The basestring type cannot be instantiated");
4110
string_mod(PyObject *v, PyObject *w)
4112
if (!PyString_Check(v)) {
4113
Py_INCREF(Py_NotImplemented);
4114
return Py_NotImplemented;
4116
return PyString_Format(v, w);
4119
PyDoc_STRVAR(basestring_doc,
4120
"Type basestring cannot be instantiated; it is the base for str and unicode.");
4122
static PyNumberMethods string_as_number = {
4127
string_mod, /*nb_remainder*/
4131
PyTypeObject PyBaseString_Type = {
4132
PyVarObject_HEAD_INIT(&PyType_Type, 0)
4142
0, /* tp_as_number */
4143
0, /* tp_as_sequence */
4144
0, /* tp_as_mapping */
4148
0, /* tp_getattro */
4149
0, /* tp_setattro */
4150
0, /* tp_as_buffer */
4151
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4152
basestring_doc, /* tp_doc */
4153
0, /* tp_traverse */
4155
0, /* tp_richcompare */
4156
0, /* tp_weaklistoffset */
4158
0, /* tp_iternext */
4162
&PyBaseObject_Type, /* tp_base */
4164
0, /* tp_descr_get */
4165
0, /* tp_descr_set */
4166
0, /* tp_dictoffset */
4169
basestring_new, /* tp_new */
4173
PyDoc_STRVAR(string_doc,
4174
"str(object) -> string\n\
4176
Return a nice string representation of the object.\n\
4177
If the argument is a string, the return value is the same object.");
4179
PyTypeObject PyString_Type = {
4180
PyVarObject_HEAD_INIT(&PyType_Type, 0)
4182
sizeof(PyStringObject),
4184
string_dealloc, /* tp_dealloc */
4185
(printfunc)string_print, /* tp_print */
4189
string_repr, /* tp_repr */
4190
&string_as_number, /* tp_as_number */
4191
&string_as_sequence, /* tp_as_sequence */
4192
&string_as_mapping, /* tp_as_mapping */
4193
(hashfunc)string_hash, /* tp_hash */
4195
string_str, /* tp_str */
4196
PyObject_GenericGetAttr, /* tp_getattro */
4197
0, /* tp_setattro */
4198
&string_as_buffer, /* tp_as_buffer */
4199
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
4200
Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
4201
Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
4202
string_doc, /* tp_doc */
4203
0, /* tp_traverse */
4205
(richcmpfunc)string_richcompare, /* tp_richcompare */
4206
0, /* tp_weaklistoffset */
4208
0, /* tp_iternext */
4209
string_methods, /* tp_methods */
4212
&PyBaseString_Type, /* tp_base */
4214
0, /* tp_descr_get */
4215
0, /* tp_descr_set */
4216
0, /* tp_dictoffset */
4219
string_new, /* tp_new */
4220
PyObject_Del, /* tp_free */
4224
PyString_Concat(register PyObject **pv, register PyObject *w)
4226
register PyObject *v;
4229
if (w == NULL || !PyString_Check(*pv)) {
4234
v = string_concat((PyStringObject *) *pv, w);
4240
PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
4242
PyString_Concat(pv, w);
4247
/* The following function breaks the notion that strings are immutable:
4248
it changes the size of a string. We get away with this only if there
4249
is only one module referencing the object. You can also think of it
4250
as creating a new string object and destroying the old one, only
4251
more efficiently. In any case, don't use this if the string may
4252
already be known to some other part of the code...
4253
Note that if there's not enough memory to resize the string, the original
4254
string object at *pv is deallocated, *pv is set to NULL, an "out of
4255
memory" exception is set, and -1 is returned. Else (on success) 0 is
4256
returned, and the value in *pv may or may not be the same as on input.
4257
As always, an extra byte is allocated for a trailing \0 byte (newsize
4258
does *not* include that), and a trailing \0 byte is stored.
4262
_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
4264
register PyObject *v;
4265
register PyStringObject *sv;
4267
if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
4268
PyString_CHECK_INTERNED(v)) {
4271
PyErr_BadInternalCall();
4274
/* XXX UNREF/NEWREF interface should be more symmetrical */
4276
_Py_ForgetReference(v);
4278
PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
4284
_Py_NewReference(*pv);
4285
sv = (PyStringObject *) *pv;
4286
Py_SIZE(sv) = newsize;
4287
sv->ob_sval[newsize] = '\0';
4288
sv->ob_shash = -1; /* invalidate cached hash value */
4292
/* Helpers for formatstring */
4294
Py_LOCAL_INLINE(PyObject *)
4295
getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
4297
Py_ssize_t argidx = *p_argidx;
4298
if (argidx < arglen) {
4303
return PyTuple_GetItem(args, argidx);
4305
PyErr_SetString(PyExc_TypeError,
4306
"not enough arguments for format string");
4317
#define F_LJUST (1<<0)
4318
#define F_SIGN (1<<1)
4319
#define F_BLANK (1<<2)
4320
#define F_ALT (1<<3)
4321
#define F_ZERO (1<<4)
4323
Py_LOCAL_INLINE(int)
4324
formatfloat(char *buf, size_t buflen, int flags,
4325
int prec, int type, PyObject *v)
4327
/* fmt = '%#.' + `prec` + `type`
4328
worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
4331
x = PyFloat_AsDouble(v);
4332
if (x == -1.0 && PyErr_Occurred()) {
4333
PyErr_Format(PyExc_TypeError, "float argument required, "
4334
"not %.200s", Py_TYPE(v)->tp_name);
4339
if (type == 'f' && fabs(x)/1e25 >= 1e25)
4341
/* Worst case length calc to ensure no buffer overrun:
4345
buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
4346
for any double rep.)
4347
len = 1 + prec + 1 + 2 + 5 = 9 + prec
4350
buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4351
len = 1 + 50 + 1 + prec = 52 + prec
4353
If prec=0 the effective precision is 1 (the leading digit is
4354
always given), therefore increase the length by one.
4357
if (((type == 'g' || type == 'G') &&
4358
buflen <= (size_t)10 + (size_t)prec) ||
4359
(type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
4360
PyErr_SetString(PyExc_OverflowError,
4361
"formatted float is too long (precision too large?)");
4364
PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4365
(flags&F_ALT) ? "#" : "",
4367
PyOS_ascii_formatd(buf, buflen, fmt, x);
4368
return (int)strlen(buf);
4371
/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4372
* the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4373
* Python's regular ints.
4374
* Return value: a new PyString*, or NULL if error.
4375
* . *pbuf is set to point into it,
4376
* *plen set to the # of chars following that.
4377
* Caller must decref it when done using pbuf.
4378
* The string starting at *pbuf is of the form
4379
* "-"? ("0x" | "0X")? digit+
4380
* "0x"/"0X" are present only for x and X conversions, with F_ALT
4381
* set in flags. The case of hex digits will be correct,
4382
* There will be at least prec digits, zero-filled on the left if
4383
* necessary to get that many.
4384
* val object to be converted
4385
* flags bitmask of format flags; only F_ALT is looked at
4386
* prec minimum number of digits; 0-fill on left if needed
4387
* type a character in [duoxX]; u acts the same as d
4389
* CAUTION: o, x and X conversions on regular ints can never
4390
* produce a '-' sign, but can for Python's unbounded ints.
4393
_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4394
char **pbuf, int *plen)
4396
PyObject *result = NULL;
4399
int sign; /* 1 if '-', else 0 */
4400
int len; /* number of characters */
4402
int numdigits; /* len == numnondigits + numdigits */
4403
int numnondigits = 0;
4408
result = Py_TYPE(val)->tp_str(val);
4411
result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4416
result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4419
assert(!"'type' not in [duoxX]");
4424
buf = PyString_AsString(result);
4430
/* To modify the string in-place, there can only be one reference. */
4431
if (Py_REFCNT(result) != 1) {
4432
PyErr_BadInternalCall();
4435
llen = PyString_Size(result);
4436
if (llen > INT_MAX) {
4437
PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4441
if (buf[len-1] == 'L') {
4445
sign = buf[0] == '-';
4446
numnondigits += sign;
4447
numdigits = len - numnondigits;
4448
assert(numdigits > 0);
4450
/* Get rid of base marker unless F_ALT */
4451
if ((flags & F_ALT) == 0) {
4452
/* Need to skip 0x, 0X or 0. */
4456
assert(buf[sign] == '0');
4457
/* If 0 is only digit, leave it alone. */
4458
if (numdigits > 1) {
4465
assert(buf[sign] == '0');
4466
assert(buf[sign + 1] == 'x');
4477
assert(len == numnondigits + numdigits);
4478
assert(numdigits > 0);
4481
/* Fill with leading zeroes to meet minimum width. */
4482
if (prec > numdigits) {
4483
PyObject *r1 = PyString_FromStringAndSize(NULL,
4484
numnondigits + prec);
4490
b1 = PyString_AS_STRING(r1);
4491
for (i = 0; i < numnondigits; ++i)
4493
for (i = 0; i < prec - numdigits; i++)
4495
for (i = 0; i < numdigits; i++)
4500
buf = PyString_AS_STRING(result);
4501
len = numnondigits + prec;
4504
/* Fix up case for hex conversions. */
4506
/* Need to convert all lower case letters to upper case.
4507
and need to convert 0x to 0X (and -0x to -0X). */
4508
for (i = 0; i < len; i++)
4509
if (buf[i] >= 'a' && buf[i] <= 'x')
4517
Py_LOCAL_INLINE(int)
4518
formatint(char *buf, size_t buflen, int flags,
4519
int prec, int type, PyObject *v)
4521
/* fmt = '%#.' + `prec` + 'l' + `type`
4522
worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4524
char fmt[64]; /* plenty big enough! */
4528
x = PyInt_AsLong(v);
4529
if (x == -1 && PyErr_Occurred()) {
4530
PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4531
Py_TYPE(v)->tp_name);
4534
if (x < 0 && type == 'u') {
4537
if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4544
if ((flags & F_ALT) &&
4545
(type == 'x' || type == 'X')) {
4546
/* When converting under %#x or %#X, there are a number
4547
* of issues that cause pain:
4548
* - when 0 is being converted, the C standard leaves off
4549
* the '0x' or '0X', which is inconsistent with other
4550
* %#x/%#X conversions and inconsistent with Python's
4552
* - there are platforms that violate the standard and
4553
* convert 0 with the '0x' or '0X'
4554
* (Metrowerks, Compaq Tru64)
4555
* - there are platforms that give '0x' when converting
4556
* under %#X, but convert 0 in accordance with the
4557
* standard (OS/2 EMX)
4559
* We can achieve the desired consistency by inserting our
4560
* own '0x' or '0X' prefix, and substituting %x/%X in place
4563
* Note that this is the same approach as used in
4564
* formatint() in unicodeobject.c
4566
PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4567
sign, type, prec, type);
4570
PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4571
sign, (flags&F_ALT) ? "#" : "",
4575
/* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4576
* worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4578
if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4579
PyErr_SetString(PyExc_OverflowError,
4580
"formatted integer is too long (precision too large?)");
4584
PyOS_snprintf(buf, buflen, fmt, -x);
4586
PyOS_snprintf(buf, buflen, fmt, x);
4587
return (int)strlen(buf);
4590
Py_LOCAL_INLINE(int)
4591
formatchar(char *buf, size_t buflen, PyObject *v)
4593
/* presume that the buffer is at least 2 characters long */
4594
if (PyString_Check(v)) {
4595
if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4599
if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4606
/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4608
FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4609
chars are formatted. XXX This is a magic number. Each formatting
4610
routine does bounds checking to ensure no overflow, but a better
4611
solution may be to malloc a buffer of appropriate size for each
4612
format. For now, the current solution is sufficient.
4614
#define FORMATBUFLEN (size_t)120
4617
PyString_Format(PyObject *format, PyObject *args)
4620
Py_ssize_t arglen, argidx;
4621
Py_ssize_t reslen, rescnt, fmtcnt;
4623
PyObject *result, *orig_args;
4624
#ifdef Py_USING_UNICODE
4627
PyObject *dict = NULL;
4628
if (format == NULL || !PyString_Check(format) || args == NULL) {
4629
PyErr_BadInternalCall();
4633
fmt = PyString_AS_STRING(format);
4634
fmtcnt = PyString_GET_SIZE(format);
4635
reslen = rescnt = fmtcnt + 100;
4636
result = PyString_FromStringAndSize((char *)NULL, reslen);
4639
res = PyString_AsString(result);
4640
if (PyTuple_Check(args)) {
4641
arglen = PyTuple_GET_SIZE(args);
4648
if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
4649
!PyObject_TypeCheck(args, &PyBaseString_Type))
4651
while (--fmtcnt >= 0) {
4654
rescnt = fmtcnt + 100;
4656
if (_PyString_Resize(&result, reslen) < 0)
4658
res = PyString_AS_STRING(result)
4665
/* Got a format specifier */
4667
Py_ssize_t width = -1;
4673
PyObject *temp = NULL;
4677
char formatbuf[FORMATBUFLEN];
4678
/* For format{float,int,char}() */
4679
#ifdef Py_USING_UNICODE
4680
char *fmt_start = fmt;
4681
Py_ssize_t argidx_start = argidx;
4692
PyErr_SetString(PyExc_TypeError,
4693
"format requires a mapping");
4699
/* Skip over balanced parentheses */
4700
while (pcount > 0 && --fmtcnt >= 0) {
4703
else if (*fmt == '(')
4707
keylen = fmt - keystart - 1;
4708
if (fmtcnt < 0 || pcount > 0) {
4709
PyErr_SetString(PyExc_ValueError,
4710
"incomplete format key");
4713
key = PyString_FromStringAndSize(keystart,
4721
args = PyObject_GetItem(dict, key);
4730
while (--fmtcnt >= 0) {
4731
switch (c = *fmt++) {
4732
case '-': flags |= F_LJUST; continue;
4733
case '+': flags |= F_SIGN; continue;
4734
case ' ': flags |= F_BLANK; continue;
4735
case '#': flags |= F_ALT; continue;
4736
case '0': flags |= F_ZERO; continue;
4741
v = getnextarg(args, arglen, &argidx);
4744
if (!PyInt_Check(v)) {
4745
PyErr_SetString(PyExc_TypeError,
4749
width = PyInt_AsLong(v);
4757
else if (c >= 0 && isdigit(c)) {
4759
while (--fmtcnt >= 0) {
4760
c = Py_CHARMASK(*fmt++);
4763
if ((width*10) / 10 != width) {
4769
width = width*10 + (c - '0');
4777
v = getnextarg(args, arglen, &argidx);
4780
if (!PyInt_Check(v)) {
4786
prec = PyInt_AsLong(v);
4792
else if (c >= 0 && isdigit(c)) {
4794
while (--fmtcnt >= 0) {
4795
c = Py_CHARMASK(*fmt++);
4798
if ((prec*10) / 10 != prec) {
4804
prec = prec*10 + (c - '0');
4809
if (c == 'h' || c == 'l' || c == 'L') {
4815
PyErr_SetString(PyExc_ValueError,
4816
"incomplete format");
4820
v = getnextarg(args, arglen, &argidx);
4832
#ifdef Py_USING_UNICODE
4833
if (PyUnicode_Check(v)) {
4835
argidx = argidx_start;
4839
temp = _PyObject_Str(v);
4840
#ifdef Py_USING_UNICODE
4841
if (temp != NULL && PyUnicode_Check(temp)) {
4844
argidx = argidx_start;
4851
temp = PyObject_Repr(v);
4854
if (!PyString_Check(temp)) {
4855
PyErr_SetString(PyExc_TypeError,
4856
"%s argument has non-string str()");
4860
pbuf = PyString_AS_STRING(temp);
4861
len = PyString_GET_SIZE(temp);
4862
if (prec >= 0 && len > prec)
4874
if (PyNumber_Check(v)) {
4875
PyObject *iobj=NULL;
4877
if (PyInt_Check(v) || (PyLong_Check(v))) {
4882
iobj = PyNumber_Int(v);
4883
if (iobj==NULL) iobj = PyNumber_Long(v);
4886
if (PyInt_Check(iobj)) {
4889
len = formatint(pbuf,
4891
flags, prec, c, iobj);
4897
else if (PyLong_Check(iobj)) {
4901
temp = _PyString_FormatLong(iobj, flags,
4902
prec, c, &pbuf, &ilen);
4915
PyErr_Format(PyExc_TypeError,
4916
"%%%c format: a number is required, "
4917
"not %.200s", c, Py_TYPE(v)->tp_name);
4932
len = formatfloat(pbuf, sizeof(formatbuf),
4941
#ifdef Py_USING_UNICODE
4942
if (PyUnicode_Check(v)) {
4944
argidx = argidx_start;
4949
len = formatchar(pbuf, sizeof(formatbuf), v);
4954
PyErr_Format(PyExc_ValueError,
4955
"unsupported format character '%c' (0x%x) "
4958
(Py_ssize_t)(fmt - 1 -
4959
PyString_AsString(format)));
4963
if (*pbuf == '-' || *pbuf == '+') {
4967
else if (flags & F_SIGN)
4969
else if (flags & F_BLANK)
4976
if (rescnt - (sign != 0) < width) {
4978
rescnt = width + fmtcnt + 100;
4983
return PyErr_NoMemory();
4985
if (_PyString_Resize(&result, reslen) < 0) {
4989
res = PyString_AS_STRING(result)
4999
if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
5000
assert(pbuf[0] == '0');
5001
assert(pbuf[1] == c);
5012
if (width > len && !(flags & F_LJUST)) {
5016
} while (--width > len);
5021
if ((flags & F_ALT) &&
5022
(c == 'x' || c == 'X')) {
5023
assert(pbuf[0] == '0');
5024
assert(pbuf[1] == c);
5029
Py_MEMCPY(res, pbuf, len);
5032
while (--width >= len) {
5036
if (dict && (argidx < arglen) && c != '%') {
5037
PyErr_SetString(PyExc_TypeError,
5038
"not all arguments converted during string formatting");
5045
if (argidx < arglen && !dict) {
5046
PyErr_SetString(PyExc_TypeError,
5047
"not all arguments converted during string formatting");
5053
_PyString_Resize(&result, reslen - rescnt);
5056
#ifdef Py_USING_UNICODE
5062
/* Fiddle args right (remove the first argidx arguments) */
5063
if (PyTuple_Check(orig_args) && argidx > 0) {
5065
Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
5070
PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
5072
PyTuple_SET_ITEM(v, n, w);
5076
Py_INCREF(orig_args);
5080
/* Take what we have of the result and let the Unicode formatting
5081
function format the rest of the input. */
5082
rescnt = res - PyString_AS_STRING(result);
5083
if (_PyString_Resize(&result, rescnt))
5085
fmtcnt = PyString_GET_SIZE(format) - \
5086
(fmt - PyString_AS_STRING(format));
5087
format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
5090
v = PyUnicode_Format(format, args);
5094
/* Paste what we have (result) to what the Unicode formatting
5095
function returned (v) and return the result (or error) */
5096
w = PyUnicode_Concat(result, v);
5101
#endif /* Py_USING_UNICODE */
5112
PyString_InternInPlace(PyObject **p)
5114
register PyStringObject *s = (PyStringObject *)(*p);
5116
if (s == NULL || !PyString_Check(s))
5117
Py_FatalError("PyString_InternInPlace: strings only please!");
5118
/* If it's a string subclass, we don't really know what putting
5119
it in the interned dict might do. */
5120
if (!PyString_CheckExact(s))
5122
if (PyString_CHECK_INTERNED(s))
5124
if (interned == NULL) {
5125
interned = PyDict_New();
5126
if (interned == NULL) {
5127
PyErr_Clear(); /* Don't leave an exception */
5131
t = PyDict_GetItem(interned, (PyObject *)s);
5139
if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
5143
/* The two references in interned are not counted by refcnt.
5144
The string deallocator will take care of this */
5146
PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
5150
PyString_InternImmortal(PyObject **p)
5152
PyString_InternInPlace(p);
5153
if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
5154
PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
5161
PyString_InternFromString(const char *cp)
5163
PyObject *s = PyString_FromString(cp);
5166
PyString_InternInPlace(&s);
5174
for (i = 0; i < UCHAR_MAX + 1; i++) {
5175
Py_XDECREF(characters[i]);
5176
characters[i] = NULL;
5178
Py_XDECREF(nullstring);
5182
void _Py_ReleaseInternedStrings(void)
5187
Py_ssize_t immortal_size = 0, mortal_size = 0;
5189
if (interned == NULL || !PyDict_Check(interned))
5191
keys = PyDict_Keys(interned);
5192
if (keys == NULL || !PyList_Check(keys)) {
5197
/* Since _Py_ReleaseInternedStrings() is intended to help a leak
5198
detector, interned strings are not forcibly deallocated; rather, we
5199
give them their stolen references back, and then clear and DECREF
5200
the interned dict. */
5202
n = PyList_GET_SIZE(keys);
5203
fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
5205
for (i = 0; i < n; i++) {
5206
s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5207
switch (s->ob_sstate) {
5208
case SSTATE_NOT_INTERNED:
5209
/* XXX Shouldn't happen */
5211
case SSTATE_INTERNED_IMMORTAL:
5213
immortal_size += Py_SIZE(s);
5215
case SSTATE_INTERNED_MORTAL:
5217
mortal_size += Py_SIZE(s);
5220
Py_FatalError("Inconsistent interned string state.");
5222
s->ob_sstate = SSTATE_NOT_INTERNED;
5224
fprintf(stderr, "total size of all interned strings: "
5225
"%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
5226
"mortal/immortal\n", mortal_size, immortal_size);
5228
PyDict_Clear(interned);
5229
Py_DECREF(interned);