1
/* String object implementation */
3
#define PY_SSIZE_T_CLEAN
7
#include "formatter_string.h"
12
int null_strings, one_strings;
15
static PyStringObject *characters[UCHAR_MAX + 1];
16
static PyStringObject *nullstring;
18
/* This dictionary holds all interned strings. Note that references to
19
strings in this dictionary are *not* counted in the string's ob_refcnt.
20
When the interned string reaches a refcnt of 0 the string deallocation
21
function will delete the reference from this dictionary.
23
Another way to look at this is that to say that the actual reference
24
count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
26
static PyObject *interned;
29
For both PyString_FromString() and PyString_FromStringAndSize(), the
30
parameter `size' denotes number of characters to allocate, not counting any
31
null terminating character.
33
For PyString_FromString(), the parameter `str' points to a null-terminated
34
string containing exactly `size' bytes.
36
For PyString_FromStringAndSize(), the parameter the parameter `str' is
37
either NULL or else points to a string containing at least `size' bytes.
38
For PyString_FromStringAndSize(), the string in the `str' parameter does
39
not have to be null-terminated. (Therefore it is safe to construct a
40
substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
41
If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
42
bytes (setting the last byte to the null terminating character) and you can
43
fill in the data yourself. If `str' is non-NULL then the resulting
44
PyString object must be treated as immutable and you must not fill in nor
45
alter the data yourself, since the strings may be shared.
47
The PyObject member `op->ob_size', which denotes the number of "extra
48
items" in a variable-size object, will contain the number of bytes
49
allocated for string data, not counting the null terminating character. It
50
is therefore equal to the equal to the `size' parameter (for
51
PyString_FromStringAndSize()) or the length of the string in the `str'
52
parameter (for PyString_FromString()).
55
PyString_FromStringAndSize(const char *str, Py_ssize_t size)
57
register PyStringObject *op;
59
PyErr_SetString(PyExc_SystemError,
60
"Negative size passed to PyString_FromStringAndSize");
63
if (size == 0 && (op = nullstring) != NULL) {
68
return (PyObject *)op;
70
if (size == 1 && str != NULL &&
71
(op = characters[*str & UCHAR_MAX]) != NULL)
77
return (PyObject *)op;
80
/* Inline PyObject_NewVar */
81
op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
83
return PyErr_NoMemory();
84
PyObject_INIT_VAR(op, &PyString_Type, size);
86
op->ob_sstate = SSTATE_NOT_INTERNED;
88
Py_MEMCPY(op->ob_sval, str, size);
89
op->ob_sval[size] = '\0';
90
/* share short strings */
92
PyObject *t = (PyObject *)op;
93
PyString_InternInPlace(&t);
94
op = (PyStringObject *)t;
97
} else if (size == 1 && str != NULL) {
98
PyObject *t = (PyObject *)op;
99
PyString_InternInPlace(&t);
100
op = (PyStringObject *)t;
101
characters[*str & UCHAR_MAX] = op;
104
return (PyObject *) op;
108
PyString_FromString(const char *str)
110
register size_t size;
111
register PyStringObject *op;
115
if (size > PY_SSIZE_T_MAX) {
116
PyErr_SetString(PyExc_OverflowError,
117
"string is too long for a Python string");
120
if (size == 0 && (op = nullstring) != NULL) {
125
return (PyObject *)op;
127
if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
132
return (PyObject *)op;
135
/* Inline PyObject_NewVar */
136
op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
138
return PyErr_NoMemory();
139
PyObject_INIT_VAR(op, &PyString_Type, size);
141
op->ob_sstate = SSTATE_NOT_INTERNED;
142
Py_MEMCPY(op->ob_sval, str, size+1);
143
/* share short strings */
145
PyObject *t = (PyObject *)op;
146
PyString_InternInPlace(&t);
147
op = (PyStringObject *)t;
150
} else if (size == 1) {
151
PyObject *t = (PyObject *)op;
152
PyString_InternInPlace(&t);
153
op = (PyStringObject *)t;
154
characters[*str & UCHAR_MAX] = op;
157
return (PyObject *) op;
161
PyString_FromFormatV(const char *format, va_list vargs)
169
#ifdef VA_LIST_IS_ARRAY
170
Py_MEMCPY(count, vargs, sizeof(va_list));
173
__va_copy(count, vargs);
178
/* step 1: figure out how large a buffer we need */
179
for (f = format; *f; f++) {
182
while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
185
/* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
186
* they don't affect the amount of space we reserve.
188
if ((*f == 'l' || *f == 'z') &&
189
(f[1] == 'd' || f[1] == 'u'))
194
(void)va_arg(count, int);
195
/* fall through... */
199
case 'd': case 'u': case 'i': case 'x':
200
(void) va_arg(count, int);
201
/* 20 bytes is enough to hold a 64-bit
202
integer. Decimal takes the most space.
203
This isn't enough for octal. */
207
s = va_arg(count, char*);
211
(void) va_arg(count, int);
212
/* maximum 64-bit pointer representation:
214
* so 19 characters is enough.
215
* XXX I count 18 -- what's the extra for?
220
/* if we stumble upon an unknown
221
formatting code, copy the rest of
222
the format string to the output
223
string. (we cannot just skip the
224
code, since there's no way to know
225
what's in the argument list) */
233
/* step 2: fill the buffer */
234
/* Since we've analyzed how much space we need for the worst case,
235
use sprintf directly instead of the slower PyOS_snprintf. */
236
string = PyString_FromStringAndSize(NULL, n);
240
s = PyString_AsString(string);
242
for (f = format; *f; f++) {
248
/* parse the width.precision part (we're only
249
interested in the precision value, if any) */
251
while (isdigit(Py_CHARMASK(*f)))
252
n = (n*10) + *f++ - '0';
256
while (isdigit(Py_CHARMASK(*f)))
257
n = (n*10) + *f++ - '0';
259
while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
261
/* handle the long flag, but only for %ld and %lu.
262
others can be added when necessary. */
263
if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
267
/* handle the size_t flag. */
268
if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
275
*s++ = va_arg(vargs, int);
279
sprintf(s, "%ld", va_arg(vargs, long));
281
sprintf(s, "%" PY_FORMAT_SIZE_T "d",
282
va_arg(vargs, Py_ssize_t));
284
sprintf(s, "%d", va_arg(vargs, int));
290
va_arg(vargs, unsigned long));
292
sprintf(s, "%" PY_FORMAT_SIZE_T "u",
293
va_arg(vargs, size_t));
296
va_arg(vargs, unsigned int));
300
sprintf(s, "%i", va_arg(vargs, int));
304
sprintf(s, "%x", va_arg(vargs, int));
308
p = va_arg(vargs, char*);
316
sprintf(s, "%p", va_arg(vargs, void*));
317
/* %p is ill-defined: ensure leading 0x. */
320
else if (s[1] != 'x') {
321
memmove(s+2, s, strlen(s)+1);
340
_PyString_Resize(&string, s - PyString_AS_STRING(string));
345
PyString_FromFormat(const char *format, ...)
350
#ifdef HAVE_STDARG_PROTOTYPES
351
va_start(vargs, format);
355
ret = PyString_FromFormatV(format, vargs);
361
PyObject *PyString_Decode(const char *s,
363
const char *encoding,
368
str = PyString_FromStringAndSize(s, size);
371
v = PyString_AsDecodedString(str, encoding, errors);
376
PyObject *PyString_AsDecodedObject(PyObject *str,
377
const char *encoding,
382
if (!PyString_Check(str)) {
387
if (encoding == NULL) {
388
#ifdef Py_USING_UNICODE
389
encoding = PyUnicode_GetDefaultEncoding();
391
PyErr_SetString(PyExc_ValueError, "no encoding specified");
396
/* Decode via the codec registry */
397
v = PyCodec_Decode(str, encoding, errors);
407
PyObject *PyString_AsDecodedString(PyObject *str,
408
const char *encoding,
413
v = PyString_AsDecodedObject(str, encoding, errors);
417
#ifdef Py_USING_UNICODE
418
/* Convert Unicode to a string using the default encoding */
419
if (PyUnicode_Check(v)) {
421
v = PyUnicode_AsEncodedString(v, NULL, NULL);
427
if (!PyString_Check(v)) {
428
PyErr_Format(PyExc_TypeError,
429
"decoder did not return a string object (type=%.400s)",
430
Py_TYPE(v)->tp_name);
441
PyObject *PyString_Encode(const char *s,
443
const char *encoding,
448
str = PyString_FromStringAndSize(s, size);
451
v = PyString_AsEncodedString(str, encoding, errors);
456
PyObject *PyString_AsEncodedObject(PyObject *str,
457
const char *encoding,
462
if (!PyString_Check(str)) {
467
if (encoding == NULL) {
468
#ifdef Py_USING_UNICODE
469
encoding = PyUnicode_GetDefaultEncoding();
471
PyErr_SetString(PyExc_ValueError, "no encoding specified");
476
/* Encode via the codec registry */
477
v = PyCodec_Encode(str, encoding, errors);
487
PyObject *PyString_AsEncodedString(PyObject *str,
488
const char *encoding,
493
v = PyString_AsEncodedObject(str, encoding, errors);
497
#ifdef Py_USING_UNICODE
498
/* Convert Unicode to a string using the default encoding */
499
if (PyUnicode_Check(v)) {
501
v = PyUnicode_AsEncodedString(v, NULL, NULL);
507
if (!PyString_Check(v)) {
508
PyErr_Format(PyExc_TypeError,
509
"encoder did not return a string object (type=%.400s)",
510
Py_TYPE(v)->tp_name);
522
string_dealloc(PyObject *op)
524
switch (PyString_CHECK_INTERNED(op)) {
525
case SSTATE_NOT_INTERNED:
528
case SSTATE_INTERNED_MORTAL:
529
/* revive dead object temporarily for DelItem */
531
if (PyDict_DelItem(interned, op) != 0)
533
"deletion of interned string failed");
536
case SSTATE_INTERNED_IMMORTAL:
537
Py_FatalError("Immortal interned string died.");
540
Py_FatalError("Inconsistent interned string state.");
542
Py_TYPE(op)->tp_free(op);
545
/* Unescape a backslash-escaped string. If unicode is non-zero,
546
the string is a u-literal. If recode_encoding is non-zero,
547
the string is UTF-8 encoded and should be re-encoded in the
548
specified encoding. */
550
PyObject *PyString_DecodeEscape(const char *s,
554
const char *recode_encoding)
560
Py_ssize_t newlen = recode_encoding ? 4*len:len;
561
v = PyString_FromStringAndSize((char *)NULL, newlen);
564
p = buf = PyString_AsString(v);
569
#ifdef Py_USING_UNICODE
570
if (recode_encoding && (*s & 0x80)) {
576
/* Decode non-ASCII bytes as UTF-8. */
577
while (t < end && (*t & 0x80)) t++;
578
u = PyUnicode_DecodeUTF8(s, t - s, errors);
581
/* Recode them in target encoding. */
582
w = PyUnicode_AsEncodedString(
583
u, recode_encoding, errors);
587
/* Append bytes to output buffer. */
588
assert(PyString_Check(w));
589
r = PyString_AS_STRING(w);
590
rn = PyString_GET_SIZE(w);
605
PyErr_SetString(PyExc_ValueError,
606
"Trailing \\ in string");
610
/* XXX This assumes ASCII! */
612
case '\\': *p++ = '\\'; break;
613
case '\'': *p++ = '\''; break;
614
case '\"': *p++ = '\"'; break;
615
case 'b': *p++ = '\b'; break;
616
case 'f': *p++ = '\014'; break; /* FF */
617
case 't': *p++ = '\t'; break;
618
case 'n': *p++ = '\n'; break;
619
case 'r': *p++ = '\r'; break;
620
case 'v': *p++ = '\013'; break; /* VT */
621
case 'a': *p++ = '\007'; break; /* BEL, not classic C */
622
case '0': case '1': case '2': case '3':
623
case '4': case '5': case '6': case '7':
625
if (s < end && '0' <= *s && *s <= '7') {
626
c = (c<<3) + *s++ - '0';
627
if (s < end && '0' <= *s && *s <= '7')
628
c = (c<<3) + *s++ - '0';
634
isxdigit(Py_CHARMASK(s[0])) &&
635
isxdigit(Py_CHARMASK(s[1])))
658
if (!errors || strcmp(errors, "strict") == 0) {
659
PyErr_SetString(PyExc_ValueError,
660
"invalid \\x escape");
663
if (strcmp(errors, "replace") == 0) {
665
} else if (strcmp(errors, "ignore") == 0)
668
PyErr_Format(PyExc_ValueError,
670
"unknown error handling code: %.400s",
674
#ifndef Py_USING_UNICODE
679
PyErr_SetString(PyExc_ValueError,
680
"Unicode escapes not legal "
681
"when Unicode disabled");
688
goto non_esc; /* an arbitry number of unescaped
689
UTF-8 bytes may follow. */
693
_PyString_Resize(&v, p - buf);
700
/* -------------------------------------------------------------------- */
704
string_getsize(register PyObject *op)
708
if (PyString_AsStringAndSize(op, &s, &len))
713
static /*const*/ char *
714
string_getbuffer(register PyObject *op)
718
if (PyString_AsStringAndSize(op, &s, &len))
724
PyString_Size(register PyObject *op)
726
if (!PyString_Check(op))
727
return string_getsize(op);
732
PyString_AsString(register PyObject *op)
734
if (!PyString_Check(op))
735
return string_getbuffer(op);
736
return ((PyStringObject *)op) -> ob_sval;
740
PyString_AsStringAndSize(register PyObject *obj,
742
register Py_ssize_t *len)
745
PyErr_BadInternalCall();
749
if (!PyString_Check(obj)) {
750
#ifdef Py_USING_UNICODE
751
if (PyUnicode_Check(obj)) {
752
obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
759
PyErr_Format(PyExc_TypeError,
760
"expected string or Unicode object, "
761
"%.200s found", Py_TYPE(obj)->tp_name);
766
*s = PyString_AS_STRING(obj);
768
*len = PyString_GET_SIZE(obj);
769
else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
770
PyErr_SetString(PyExc_TypeError,
771
"expected string without null bytes");
777
/* -------------------------------------------------------------------- */
780
#include "stringlib/stringdefs.h"
781
#include "stringlib/fastsearch.h"
783
#include "stringlib/count.h"
784
#include "stringlib/find.h"
785
#include "stringlib/partition.h"
787
#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
788
#include "stringlib/localeutil.h"
793
string_print(PyStringObject *op, FILE *fp, int flags)
795
Py_ssize_t i, str_len;
799
/* XXX Ought to check for interrupts when writing long strings */
800
if (! PyString_CheckExact(op)) {
802
/* A str subclass may have its own __str__ method. */
803
op = (PyStringObject *) PyObject_Str((PyObject *)op);
806
ret = string_print(op, fp, flags);
810
if (flags & Py_PRINT_RAW) {
811
char *data = op->ob_sval;
812
Py_ssize_t size = Py_SIZE(op);
813
Py_BEGIN_ALLOW_THREADS
814
while (size > INT_MAX) {
815
/* Very long strings cannot be written atomically.
816
* But don't write exactly INT_MAX bytes at a time
817
* to avoid memory aligment issues.
819
const int chunk_size = INT_MAX & ~0x3FFF;
820
fwrite(data, 1, chunk_size, fp);
825
if (size) fwrite(data, (int)size, 1, fp);
827
fwrite(data, 1, (int)size, fp);
833
/* figure out which quote to use; single is preferred */
835
if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
836
!memchr(op->ob_sval, '"', Py_SIZE(op)))
839
str_len = Py_SIZE(op);
840
Py_BEGIN_ALLOW_THREADS
842
for (i = 0; i < str_len; i++) {
843
/* Since strings are immutable and the caller should have a
844
reference, accessing the interal buffer should not be an issue
845
with the GIL released. */
847
if (c == quote || c == '\\')
848
fprintf(fp, "\\%c", c);
855
else if (c < ' ' || c >= 0x7f)
856
fprintf(fp, "\\x%02x", c & 0xff);
866
PyString_Repr(PyObject *obj, int smartquotes)
868
register PyStringObject* op = (PyStringObject*) obj;
869
size_t newsize = 2 + 4 * Py_SIZE(op);
871
if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
872
PyErr_SetString(PyExc_OverflowError,
873
"string is too large to make repr");
876
v = PyString_FromStringAndSize((char *)NULL, newsize);
881
register Py_ssize_t i;
886
/* figure out which quote to use; single is preferred */
889
memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
890
!memchr(op->ob_sval, '"', Py_SIZE(op)))
893
p = PyString_AS_STRING(v);
895
for (i = 0; i < Py_SIZE(op); i++) {
896
/* There's at least enough room for a hex escape
897
and a closing quote. */
898
assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
900
if (c == quote || c == '\\')
901
*p++ = '\\', *p++ = c;
903
*p++ = '\\', *p++ = 't';
905
*p++ = '\\', *p++ = 'n';
907
*p++ = '\\', *p++ = 'r';
908
else if (c < ' ' || c >= 0x7f) {
909
/* For performance, we don't want to call
910
PyOS_snprintf here (extra layers of
912
sprintf(p, "\\x%02x", c & 0xff);
918
assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
922
&v, (p - PyString_AS_STRING(v)));
928
string_repr(PyObject *op)
930
return PyString_Repr(op, 1);
934
string_str(PyObject *s)
936
assert(PyString_Check(s));
937
if (PyString_CheckExact(s)) {
942
/* Subtype -- return genuine string with the same value. */
943
PyStringObject *t = (PyStringObject *) s;
944
return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
949
string_length(PyStringObject *a)
955
string_concat(register PyStringObject *a, register PyObject *bb)
957
register Py_ssize_t size;
958
register PyStringObject *op;
959
if (!PyString_Check(bb)) {
960
#ifdef Py_USING_UNICODE
961
if (PyUnicode_Check(bb))
962
return PyUnicode_Concat((PyObject *)a, bb);
964
if (PyBytes_Check(bb))
965
return PyBytes_Concat((PyObject *)a, bb);
966
PyErr_Format(PyExc_TypeError,
967
"cannot concatenate 'str' and '%.200s' objects",
968
Py_TYPE(bb)->tp_name);
971
#define b ((PyStringObject *)bb)
972
/* Optimize cases with empty left or right operand */
973
if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
974
PyString_CheckExact(a) && PyString_CheckExact(b)) {
975
if (Py_SIZE(a) == 0) {
980
return (PyObject *)a;
982
size = Py_SIZE(a) + Py_SIZE(b);
984
PyErr_SetString(PyExc_OverflowError,
985
"strings are too large to concat");
989
/* Inline PyObject_NewVar */
990
op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
992
return PyErr_NoMemory();
993
PyObject_INIT_VAR(op, &PyString_Type, size);
995
op->ob_sstate = SSTATE_NOT_INTERNED;
996
Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
997
Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
998
op->ob_sval[size] = '\0';
999
return (PyObject *) op;
1004
string_repeat(register PyStringObject *a, register Py_ssize_t n)
1006
register Py_ssize_t i;
1007
register Py_ssize_t j;
1008
register Py_ssize_t size;
1009
register PyStringObject *op;
1013
/* watch out for overflows: the size can overflow int,
1014
* and the # of bytes needed can overflow size_t
1016
size = Py_SIZE(a) * n;
1017
if (n && size / n != Py_SIZE(a)) {
1018
PyErr_SetString(PyExc_OverflowError,
1019
"repeated string is too long");
1022
if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
1024
return (PyObject *)a;
1026
nbytes = (size_t)size;
1027
if (nbytes + sizeof(PyStringObject) <= nbytes) {
1028
PyErr_SetString(PyExc_OverflowError,
1029
"repeated string is too long");
1032
op = (PyStringObject *)
1033
PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
1035
return PyErr_NoMemory();
1036
PyObject_INIT_VAR(op, &PyString_Type, size);
1038
op->ob_sstate = SSTATE_NOT_INTERNED;
1039
op->ob_sval[size] = '\0';
1040
if (Py_SIZE(a) == 1 && n > 0) {
1041
memset(op->ob_sval, a->ob_sval[0] , n);
1042
return (PyObject *) op;
1046
Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1050
j = (i <= size-i) ? i : size-i;
1051
Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1054
return (PyObject *) op;
1057
/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1060
string_slice(register PyStringObject *a, register Py_ssize_t i,
1061
register Py_ssize_t j)
1062
/* j -- may be negative! */
1067
j = 0; /* Avoid signed/unsigned bug in next line */
1070
if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
1071
/* It's the same as a */
1073
return (PyObject *)a;
1077
return PyString_FromStringAndSize(a->ob_sval + i, j-i);
1081
string_contains(PyObject *str_obj, PyObject *sub_obj)
1083
if (!PyString_CheckExact(sub_obj)) {
1084
#ifdef Py_USING_UNICODE
1085
if (PyUnicode_Check(sub_obj))
1086
return PyUnicode_Contains(str_obj, sub_obj);
1088
if (!PyString_Check(sub_obj)) {
1089
PyErr_Format(PyExc_TypeError,
1090
"'in <string>' requires string as left operand, "
1091
"not %.200s", Py_TYPE(sub_obj)->tp_name);
1096
return stringlib_contains_obj(str_obj, sub_obj);
1100
string_item(PyStringObject *a, register Py_ssize_t i)
1104
if (i < 0 || i >= Py_SIZE(a)) {
1105
PyErr_SetString(PyExc_IndexError, "string index out of range");
1108
pchar = a->ob_sval[i];
1109
v = (PyObject *)characters[pchar & UCHAR_MAX];
1111
v = PyString_FromStringAndSize(&pchar, 1);
1122
string_richcompare(PyStringObject *a, PyStringObject *b, int op)
1125
Py_ssize_t len_a, len_b;
1129
/* Make sure both arguments are strings. */
1130
if (!(PyString_Check(a) && PyString_Check(b))) {
1131
result = Py_NotImplemented;
1136
case Py_EQ:case Py_LE:case Py_GE:
1139
case Py_NE:case Py_LT:case Py_GT:
1145
/* Supporting Py_NE here as well does not save
1146
much time, since Py_NE is rarely used. */
1147
if (Py_SIZE(a) == Py_SIZE(b)
1148
&& (a->ob_sval[0] == b->ob_sval[0]
1149
&& memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1156
len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1157
min_len = (len_a < len_b) ? len_a : len_b;
1159
c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1161
c = memcmp(a->ob_sval, b->ob_sval, min_len);
1165
c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1167
case Py_LT: c = c < 0; break;
1168
case Py_LE: c = c <= 0; break;
1169
case Py_EQ: assert(0); break; /* unreachable */
1170
case Py_NE: c = c != 0; break;
1171
case Py_GT: c = c > 0; break;
1172
case Py_GE: c = c >= 0; break;
1174
result = Py_NotImplemented;
1177
result = c ? Py_True : Py_False;
1184
_PyString_Eq(PyObject *o1, PyObject *o2)
1186
PyStringObject *a = (PyStringObject*) o1;
1187
PyStringObject *b = (PyStringObject*) o2;
1188
return Py_SIZE(a) == Py_SIZE(b)
1189
&& *a->ob_sval == *b->ob_sval
1190
&& memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
1194
string_hash(PyStringObject *a)
1196
register Py_ssize_t len;
1197
register unsigned char *p;
1200
if (a->ob_shash != -1)
1203
p = (unsigned char *) a->ob_sval;
1206
x = (1000003*x) ^ *p++;
1215
string_subscript(PyStringObject* self, PyObject* item)
1217
if (PyIndex_Check(item)) {
1218
Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1219
if (i == -1 && PyErr_Occurred())
1222
i += PyString_GET_SIZE(self);
1223
return string_item(self, i);
1225
else if (PySlice_Check(item)) {
1226
Py_ssize_t start, stop, step, slicelength, cur, i;
1231
if (PySlice_GetIndicesEx((PySliceObject*)item,
1232
PyString_GET_SIZE(self),
1233
&start, &stop, &step, &slicelength) < 0) {
1237
if (slicelength <= 0) {
1238
return PyString_FromStringAndSize("", 0);
1240
else if (start == 0 && step == 1 &&
1241
slicelength == PyString_GET_SIZE(self) &&
1242
PyString_CheckExact(self)) {
1244
return (PyObject *)self;
1246
else if (step == 1) {
1247
return PyString_FromStringAndSize(
1248
PyString_AS_STRING(self) + start,
1252
source_buf = PyString_AsString((PyObject*)self);
1253
result_buf = (char *)PyMem_Malloc(slicelength);
1254
if (result_buf == NULL)
1255
return PyErr_NoMemory();
1257
for (cur = start, i = 0; i < slicelength;
1259
result_buf[i] = source_buf[cur];
1262
result = PyString_FromStringAndSize(result_buf,
1264
PyMem_Free(result_buf);
1269
PyErr_Format(PyExc_TypeError,
1270
"string indices must be integers, not %.200s",
1271
Py_TYPE(item)->tp_name);
1277
string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1280
PyErr_SetString(PyExc_SystemError,
1281
"accessing non-existent string segment");
1284
*ptr = (void *)self->ob_sval;
1285
return Py_SIZE(self);
1289
string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1291
PyErr_SetString(PyExc_TypeError,
1292
"Cannot use string as modifiable buffer");
1297
string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
1300
*lenp = Py_SIZE(self);
1305
string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
1308
PyErr_SetString(PyExc_SystemError,
1309
"accessing non-existent string segment");
1312
*ptr = self->ob_sval;
1313
return Py_SIZE(self);
1317
string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
1319
return PyBuffer_FillInfo(view, (void *)self->ob_sval, Py_SIZE(self),
1323
static PySequenceMethods string_as_sequence = {
1324
(lenfunc)string_length, /*sq_length*/
1325
(binaryfunc)string_concat, /*sq_concat*/
1326
(ssizeargfunc)string_repeat, /*sq_repeat*/
1327
(ssizeargfunc)string_item, /*sq_item*/
1328
(ssizessizeargfunc)string_slice, /*sq_slice*/
1331
(objobjproc)string_contains /*sq_contains*/
1334
static PyMappingMethods string_as_mapping = {
1335
(lenfunc)string_length,
1336
(binaryfunc)string_subscript,
1340
static PyBufferProcs string_as_buffer = {
1341
(readbufferproc)string_buffer_getreadbuf,
1342
(writebufferproc)string_buffer_getwritebuf,
1343
(segcountproc)string_buffer_getsegcount,
1344
(charbufferproc)string_buffer_getcharbuf,
1345
(getbufferproc)string_buffer_getbuffer,
1352
#define RIGHTSTRIP 1
1355
/* Arrays indexed by above */
1356
static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1358
#define STRIPNAME(i) (stripformat[i]+3)
1361
/* Don't call if length < 2 */
1362
#define Py_STRING_MATCH(target, offset, pattern, length) \
1363
(target[offset] == pattern[0] && \
1364
target[offset+length-1] == pattern[length-1] && \
1365
!memcmp(target+offset+1, pattern+1, length-2) )
1368
/* Overallocate the initial list to reduce the number of reallocs for small
1369
split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1370
resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1371
text (roughly 11 words per line) and field delimited data (usually 1-10
1372
fields). For large strings the split algorithms are bandwidth limited
1373
so increasing the preallocation likely will not improve things.*/
1375
#define MAX_PREALLOC 12
1377
/* 5 splits gives 6 elements */
1378
#define PREALLOC_SIZE(maxsplit) \
1379
(maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1381
#define SPLIT_APPEND(data, left, right) \
1382
str = PyString_FromStringAndSize((data) + (left), \
1383
(right) - (left)); \
1386
if (PyList_Append(list, str)) { \
1393
#define SPLIT_ADD(data, left, right) { \
1394
str = PyString_FromStringAndSize((data) + (left), \
1395
(right) - (left)); \
1398
if (count < MAX_PREALLOC) { \
1399
PyList_SET_ITEM(list, count, str); \
1401
if (PyList_Append(list, str)) { \
1410
/* Always force the list to the expected size. */
1411
#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
1413
#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1414
#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1415
#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1416
#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1418
Py_LOCAL_INLINE(PyObject *)
1419
split_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
1421
const char *s = PyString_AS_STRING(self);
1422
Py_ssize_t i, j, count=0;
1424
PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
1431
while (maxsplit-- > 0) {
1432
SKIP_SPACE(s, i, len);
1435
SKIP_NONSPACE(s, i, len);
1436
if (j == 0 && i == len && PyString_CheckExact(self)) {
1437
/* No whitespace in self, so just use it as list[0] */
1439
PyList_SET_ITEM(list, 0, (PyObject *)self);
1447
/* Only occurs when maxsplit was reached */
1448
/* Skip any remaining whitespace and copy to end of string */
1449
SKIP_SPACE(s, i, len);
1451
SPLIT_ADD(s, i, len);
1453
FIX_PREALLOC_SIZE(list);
1460
Py_LOCAL_INLINE(PyObject *)
1461
split_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
1463
const char *s = PyString_AS_STRING(self);
1464
register Py_ssize_t i, j, count=0;
1466
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
1472
while ((j < len) && (maxcount-- > 0)) {
1474
/* I found that using memchr makes no difference */
1482
if (i == 0 && count == 0 && PyString_CheckExact(self)) {
1483
/* ch not in self, so just use self as list[0] */
1485
PyList_SET_ITEM(list, 0, (PyObject *)self);
1488
else if (i <= len) {
1489
SPLIT_ADD(s, i, len);
1491
FIX_PREALLOC_SIZE(list);
1499
PyDoc_STRVAR(split__doc__,
1500
"S.split([sep [,maxsplit]]) -> list of strings\n\
1502
Return a list of the words in the string S, using sep as the\n\
1503
delimiter string. If maxsplit is given, at most maxsplit\n\
1504
splits are done. If sep is not specified or is None, any\n\
1505
whitespace string is a separator and empty strings are removed\n\
1509
string_split(PyStringObject *self, PyObject *args)
1511
Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1512
Py_ssize_t maxsplit = -1, count=0;
1513
const char *s = PyString_AS_STRING(self), *sub;
1514
PyObject *list, *str, *subobj = Py_None;
1519
if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1522
maxsplit = PY_SSIZE_T_MAX;
1523
if (subobj == Py_None)
1524
return split_whitespace(self, len, maxsplit);
1525
if (PyString_Check(subobj)) {
1526
sub = PyString_AS_STRING(subobj);
1527
n = PyString_GET_SIZE(subobj);
1529
#ifdef Py_USING_UNICODE
1530
else if (PyUnicode_Check(subobj))
1531
return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1533
else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1537
PyErr_SetString(PyExc_ValueError, "empty separator");
1541
return split_char(self, len, sub[0], maxsplit);
1543
list = PyList_New(PREALLOC_SIZE(maxsplit));
1549
while (maxsplit-- > 0) {
1550
pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1559
while ((j+n <= len) && (maxsplit-- > 0)) {
1560
for (; j+n <= len; j++) {
1561
if (Py_STRING_MATCH(s, j, sub, n)) {
1569
SPLIT_ADD(s, i, len);
1570
FIX_PREALLOC_SIZE(list);
1578
PyDoc_STRVAR(partition__doc__,
1579
"S.partition(sep) -> (head, sep, tail)\n\
1581
Searches for the separator sep in S, and returns the part before it,\n\
1582
the separator itself, and the part after it. If the separator is not\n\
1583
found, returns S and two empty strings.");
1586
string_partition(PyStringObject *self, PyObject *sep_obj)
1591
if (PyString_Check(sep_obj)) {
1592
sep = PyString_AS_STRING(sep_obj);
1593
sep_len = PyString_GET_SIZE(sep_obj);
1595
#ifdef Py_USING_UNICODE
1596
else if (PyUnicode_Check(sep_obj))
1597
return PyUnicode_Partition((PyObject *) self, sep_obj);
1599
else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1602
return stringlib_partition(
1604
PyString_AS_STRING(self), PyString_GET_SIZE(self),
1605
sep_obj, sep, sep_len
1609
PyDoc_STRVAR(rpartition__doc__,
1610
"S.rpartition(sep) -> (tail, sep, head)\n\
1612
Searches for the separator sep in S, starting at the end of S, and returns\n\
1613
the part before it, the separator itself, and the part after it. If the\n\
1614
separator is not found, returns two empty strings and S.");
1617
string_rpartition(PyStringObject *self, PyObject *sep_obj)
1622
if (PyString_Check(sep_obj)) {
1623
sep = PyString_AS_STRING(sep_obj);
1624
sep_len = PyString_GET_SIZE(sep_obj);
1626
#ifdef Py_USING_UNICODE
1627
else if (PyUnicode_Check(sep_obj))
1628
return PyUnicode_Partition((PyObject *) self, sep_obj);
1630
else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1633
return stringlib_rpartition(
1635
PyString_AS_STRING(self), PyString_GET_SIZE(self),
1636
sep_obj, sep, sep_len
1640
Py_LOCAL_INLINE(PyObject *)
1641
rsplit_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
1643
const char *s = PyString_AS_STRING(self);
1644
Py_ssize_t i, j, count=0;
1646
PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
1653
while (maxsplit-- > 0) {
1657
RSKIP_NONSPACE(s, i);
1658
if (j == len-1 && i < 0 && PyString_CheckExact(self)) {
1659
/* No whitespace in self, so just use it as list[0] */
1661
PyList_SET_ITEM(list, 0, (PyObject *)self);
1665
SPLIT_ADD(s, i + 1, j + 1);
1668
/* Only occurs when maxsplit was reached */
1669
/* Skip any remaining whitespace and copy to beginning of string */
1672
SPLIT_ADD(s, 0, i + 1);
1675
FIX_PREALLOC_SIZE(list);
1676
if (PyList_Reverse(list) < 0)
1684
Py_LOCAL_INLINE(PyObject *)
1685
rsplit_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
1687
const char *s = PyString_AS_STRING(self);
1688
register Py_ssize_t i, j, count=0;
1690
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
1696
while ((i >= 0) && (maxcount-- > 0)) {
1697
for (; i >= 0; i--) {
1699
SPLIT_ADD(s, i + 1, j + 1);
1705
if (i < 0 && count == 0 && PyString_CheckExact(self)) {
1706
/* ch not in self, so just use self as list[0] */
1708
PyList_SET_ITEM(list, 0, (PyObject *)self);
1712
SPLIT_ADD(s, 0, j + 1);
1714
FIX_PREALLOC_SIZE(list);
1715
if (PyList_Reverse(list) < 0)
1724
PyDoc_STRVAR(rsplit__doc__,
1725
"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1727
Return a list of the words in the string S, using sep as the\n\
1728
delimiter string, starting at the end of the string and working\n\
1729
to the front. If maxsplit is given, at most maxsplit splits are\n\
1730
done. If sep is not specified or is None, any whitespace string\n\
1734
string_rsplit(PyStringObject *self, PyObject *args)
1736
Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1737
Py_ssize_t maxsplit = -1, count=0;
1738
const char *s, *sub;
1739
PyObject *list, *str, *subobj = Py_None;
1741
if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1744
maxsplit = PY_SSIZE_T_MAX;
1745
if (subobj == Py_None)
1746
return rsplit_whitespace(self, len, maxsplit);
1747
if (PyString_Check(subobj)) {
1748
sub = PyString_AS_STRING(subobj);
1749
n = PyString_GET_SIZE(subobj);
1751
#ifdef Py_USING_UNICODE
1752
else if (PyUnicode_Check(subobj))
1753
return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1755
else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1759
PyErr_SetString(PyExc_ValueError, "empty separator");
1763
return rsplit_char(self, len, sub[0], maxsplit);
1765
list = PyList_New(PREALLOC_SIZE(maxsplit));
1772
s = PyString_AS_STRING(self);
1773
while ( (i >= 0) && (maxsplit-- > 0) ) {
1775
if (Py_STRING_MATCH(s, i, sub, n)) {
1776
SPLIT_ADD(s, i + n, j);
1784
FIX_PREALLOC_SIZE(list);
1785
if (PyList_Reverse(list) < 0)
1795
PyDoc_STRVAR(join__doc__,
1796
"S.join(sequence) -> string\n\
1798
Return a string which is the concatenation of the strings in the\n\
1799
sequence. The separator between elements is S.");
1802
string_join(PyStringObject *self, PyObject *orig)
1804
char *sep = PyString_AS_STRING(self);
1805
const Py_ssize_t seplen = PyString_GET_SIZE(self);
1806
PyObject *res = NULL;
1808
Py_ssize_t seqlen = 0;
1811
PyObject *seq, *item;
1813
seq = PySequence_Fast(orig, "");
1818
seqlen = PySequence_Size(seq);
1821
return PyString_FromString("");
1824
item = PySequence_Fast_GET_ITEM(seq, 0);
1825
if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1832
/* There are at least two things to join, or else we have a subclass
1833
* of the builtin types in the sequence.
1834
* Do a pre-pass to figure out the total amount of space we'll
1835
* need (sz), see whether any argument is absurd, and defer to
1836
* the Unicode join if appropriate.
1838
for (i = 0; i < seqlen; i++) {
1839
const size_t old_sz = sz;
1840
item = PySequence_Fast_GET_ITEM(seq, i);
1841
if (!PyString_Check(item)){
1842
#ifdef Py_USING_UNICODE
1843
if (PyUnicode_Check(item)) {
1844
/* Defer to Unicode join.
1845
* CAUTION: There's no gurantee that the
1846
* original sequence can be iterated over
1847
* again, so we must pass seq here.
1850
result = PyUnicode_Join((PyObject *)self, seq);
1855
PyErr_Format(PyExc_TypeError,
1856
"sequence item %zd: expected string,"
1858
i, Py_TYPE(item)->tp_name);
1862
sz += PyString_GET_SIZE(item);
1865
if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1866
PyErr_SetString(PyExc_OverflowError,
1867
"join() result is too long for a Python string");
1873
/* Allocate result space. */
1874
res = PyString_FromStringAndSize((char*)NULL, sz);
1880
/* Catenate everything. */
1881
p = PyString_AS_STRING(res);
1882
for (i = 0; i < seqlen; ++i) {
1884
item = PySequence_Fast_GET_ITEM(seq, i);
1885
n = PyString_GET_SIZE(item);
1886
Py_MEMCPY(p, PyString_AS_STRING(item), n);
1888
if (i < seqlen - 1) {
1889
Py_MEMCPY(p, sep, seplen);
1899
_PyString_Join(PyObject *sep, PyObject *x)
1901
assert(sep != NULL && PyString_Check(sep));
1903
return string_join((PyStringObject *)sep, x);
1906
Py_LOCAL_INLINE(void)
1907
string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1921
Py_LOCAL_INLINE(Py_ssize_t)
1922
string_find_internal(PyStringObject *self, PyObject *args, int dir)
1927
Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1928
PyObject *obj_start=Py_None, *obj_end=Py_None;
1930
if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1931
&obj_start, &obj_end))
1933
/* To support None in "start" and "end" arguments, meaning
1934
the same as if they were not passed.
1936
if (obj_start != Py_None)
1937
if (!_PyEval_SliceIndex(obj_start, &start))
1939
if (obj_end != Py_None)
1940
if (!_PyEval_SliceIndex(obj_end, &end))
1943
if (PyString_Check(subobj)) {
1944
sub = PyString_AS_STRING(subobj);
1945
sub_len = PyString_GET_SIZE(subobj);
1947
#ifdef Py_USING_UNICODE
1948
else if (PyUnicode_Check(subobj))
1949
return PyUnicode_Find(
1950
(PyObject *)self, subobj, start, end, dir);
1952
else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1953
/* XXX - the "expected a character buffer object" is pretty
1954
confusing for a non-expert. remap to something else ? */
1958
return stringlib_find_slice(
1959
PyString_AS_STRING(self), PyString_GET_SIZE(self),
1960
sub, sub_len, start, end);
1962
return stringlib_rfind_slice(
1963
PyString_AS_STRING(self), PyString_GET_SIZE(self),
1964
sub, sub_len, start, end);
1968
PyDoc_STRVAR(find__doc__,
1969
"S.find(sub [,start [,end]]) -> int\n\
1971
Return the lowest index in S where substring sub is found,\n\
1972
such that sub is contained within s[start:end]. Optional\n\
1973
arguments start and end are interpreted as in slice notation.\n\
1975
Return -1 on failure.");
1978
string_find(PyStringObject *self, PyObject *args)
1980
Py_ssize_t result = string_find_internal(self, args, +1);
1983
return PyInt_FromSsize_t(result);
1987
PyDoc_STRVAR(index__doc__,
1988
"S.index(sub [,start [,end]]) -> int\n\
1990
Like S.find() but raise ValueError when the substring is not found.");
1993
string_index(PyStringObject *self, PyObject *args)
1995
Py_ssize_t result = string_find_internal(self, args, +1);
1999
PyErr_SetString(PyExc_ValueError,
2000
"substring not found");
2003
return PyInt_FromSsize_t(result);
2007
PyDoc_STRVAR(rfind__doc__,
2008
"S.rfind(sub [,start [,end]]) -> int\n\
2010
Return the highest index in S where substring sub is found,\n\
2011
such that sub is contained within s[start:end]. Optional\n\
2012
arguments start and end are interpreted as in slice notation.\n\
2014
Return -1 on failure.");
2017
string_rfind(PyStringObject *self, PyObject *args)
2019
Py_ssize_t result = string_find_internal(self, args, -1);
2022
return PyInt_FromSsize_t(result);
2026
PyDoc_STRVAR(rindex__doc__,
2027
"S.rindex(sub [,start [,end]]) -> int\n\
2029
Like S.rfind() but raise ValueError when the substring is not found.");
2032
string_rindex(PyStringObject *self, PyObject *args)
2034
Py_ssize_t result = string_find_internal(self, args, -1);
2038
PyErr_SetString(PyExc_ValueError,
2039
"substring not found");
2042
return PyInt_FromSsize_t(result);
2046
Py_LOCAL_INLINE(PyObject *)
2047
do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
2049
char *s = PyString_AS_STRING(self);
2050
Py_ssize_t len = PyString_GET_SIZE(self);
2051
char *sep = PyString_AS_STRING(sepobj);
2052
Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
2056
if (striptype != RIGHTSTRIP) {
2057
while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2063
if (striptype != LEFTSTRIP) {
2066
} while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2070
if (i == 0 && j == len && PyString_CheckExact(self)) {
2072
return (PyObject*)self;
2075
return PyString_FromStringAndSize(s+i, j-i);
2079
Py_LOCAL_INLINE(PyObject *)
2080
do_strip(PyStringObject *self, int striptype)
2082
char *s = PyString_AS_STRING(self);
2083
Py_ssize_t len = PyString_GET_SIZE(self), i, j;
2086
if (striptype != RIGHTSTRIP) {
2087
while (i < len && isspace(Py_CHARMASK(s[i]))) {
2093
if (striptype != LEFTSTRIP) {
2096
} while (j >= i && isspace(Py_CHARMASK(s[j])));
2100
if (i == 0 && j == len && PyString_CheckExact(self)) {
2102
return (PyObject*)self;
2105
return PyString_FromStringAndSize(s+i, j-i);
2109
Py_LOCAL_INLINE(PyObject *)
2110
do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2112
PyObject *sep = NULL;
2114
if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
2117
if (sep != NULL && sep != Py_None) {
2118
if (PyString_Check(sep))
2119
return do_xstrip(self, striptype, sep);
2120
#ifdef Py_USING_UNICODE
2121
else if (PyUnicode_Check(sep)) {
2122
PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2126
res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2132
PyErr_Format(PyExc_TypeError,
2133
#ifdef Py_USING_UNICODE
2134
"%s arg must be None, str or unicode",
2136
"%s arg must be None or str",
2138
STRIPNAME(striptype));
2142
return do_strip(self, striptype);
2146
PyDoc_STRVAR(strip__doc__,
2147
"S.strip([chars]) -> string or unicode\n\
2149
Return a copy of the string S with leading and trailing\n\
2150
whitespace removed.\n\
2151
If chars is given and not None, remove characters in chars instead.\n\
2152
If chars is unicode, S will be converted to unicode before stripping");
2155
string_strip(PyStringObject *self, PyObject *args)
2157
if (PyTuple_GET_SIZE(args) == 0)
2158
return do_strip(self, BOTHSTRIP); /* Common case */
2160
return do_argstrip(self, BOTHSTRIP, args);
2164
PyDoc_STRVAR(lstrip__doc__,
2165
"S.lstrip([chars]) -> string or unicode\n\
2167
Return a copy of the string S with leading whitespace removed.\n\
2168
If chars is given and not None, remove characters in chars instead.\n\
2169
If chars is unicode, S will be converted to unicode before stripping");
2172
string_lstrip(PyStringObject *self, PyObject *args)
2174
if (PyTuple_GET_SIZE(args) == 0)
2175
return do_strip(self, LEFTSTRIP); /* Common case */
2177
return do_argstrip(self, LEFTSTRIP, args);
2181
PyDoc_STRVAR(rstrip__doc__,
2182
"S.rstrip([chars]) -> string or unicode\n\
2184
Return a copy of the string S with trailing whitespace removed.\n\
2185
If chars is given and not None, remove characters in chars instead.\n\
2186
If chars is unicode, S will be converted to unicode before stripping");
2189
string_rstrip(PyStringObject *self, PyObject *args)
2191
if (PyTuple_GET_SIZE(args) == 0)
2192
return do_strip(self, RIGHTSTRIP); /* Common case */
2194
return do_argstrip(self, RIGHTSTRIP, args);
2198
PyDoc_STRVAR(lower__doc__,
2199
"S.lower() -> string\n\
2201
Return a copy of the string S converted to lowercase.");
2203
/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2205
#define _tolower tolower
2209
string_lower(PyStringObject *self)
2212
Py_ssize_t i, n = PyString_GET_SIZE(self);
2215
newobj = PyString_FromStringAndSize(NULL, n);
2219
s = PyString_AS_STRING(newobj);
2221
Py_MEMCPY(s, PyString_AS_STRING(self), n);
2223
for (i = 0; i < n; i++) {
2224
int c = Py_CHARMASK(s[i]);
2232
PyDoc_STRVAR(upper__doc__,
2233
"S.upper() -> string\n\
2235
Return a copy of the string S converted to uppercase.");
2238
#define _toupper toupper
2242
string_upper(PyStringObject *self)
2245
Py_ssize_t i, n = PyString_GET_SIZE(self);
2248
newobj = PyString_FromStringAndSize(NULL, n);
2252
s = PyString_AS_STRING(newobj);
2254
Py_MEMCPY(s, PyString_AS_STRING(self), n);
2256
for (i = 0; i < n; i++) {
2257
int c = Py_CHARMASK(s[i]);
2265
PyDoc_STRVAR(title__doc__,
2266
"S.title() -> string\n\
2268
Return a titlecased version of S, i.e. words start with uppercase\n\
2269
characters, all remaining cased characters have lowercase.");
2272
string_title(PyStringObject *self)
2274
char *s = PyString_AS_STRING(self), *s_new;
2275
Py_ssize_t i, n = PyString_GET_SIZE(self);
2276
int previous_is_cased = 0;
2279
newobj = PyString_FromStringAndSize(NULL, n);
2282
s_new = PyString_AsString(newobj);
2283
for (i = 0; i < n; i++) {
2284
int c = Py_CHARMASK(*s++);
2286
if (!previous_is_cased)
2288
previous_is_cased = 1;
2289
} else if (isupper(c)) {
2290
if (previous_is_cased)
2292
previous_is_cased = 1;
2294
previous_is_cased = 0;
2300
PyDoc_STRVAR(capitalize__doc__,
2301
"S.capitalize() -> string\n\
2303
Return a copy of the string S with only its first character\n\
2307
string_capitalize(PyStringObject *self)
2309
char *s = PyString_AS_STRING(self), *s_new;
2310
Py_ssize_t i, n = PyString_GET_SIZE(self);
2313
newobj = PyString_FromStringAndSize(NULL, n);
2316
s_new = PyString_AsString(newobj);
2318
int c = Py_CHARMASK(*s++);
2320
*s_new = toupper(c);
2325
for (i = 1; i < n; i++) {
2326
int c = Py_CHARMASK(*s++);
2328
*s_new = tolower(c);
2337
PyDoc_STRVAR(count__doc__,
2338
"S.count(sub[, start[, end]]) -> int\n\
2340
Return the number of non-overlapping occurrences of substring sub in\n\
2341
string S[start:end]. Optional arguments start and end are interpreted\n\
2342
as in slice notation.");
2345
string_count(PyStringObject *self, PyObject *args)
2348
const char *str = PyString_AS_STRING(self), *sub;
2350
Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2352
if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2353
_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2356
if (PyString_Check(sub_obj)) {
2357
sub = PyString_AS_STRING(sub_obj);
2358
sub_len = PyString_GET_SIZE(sub_obj);
2360
#ifdef Py_USING_UNICODE
2361
else if (PyUnicode_Check(sub_obj)) {
2363
count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2367
return PyInt_FromSsize_t(count);
2370
else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2373
string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
2375
return PyInt_FromSsize_t(
2376
stringlib_count(str + start, end - start, sub, sub_len)
2380
PyDoc_STRVAR(swapcase__doc__,
2381
"S.swapcase() -> string\n\
2383
Return a copy of the string S with uppercase characters\n\
2384
converted to lowercase and vice versa.");
2387
string_swapcase(PyStringObject *self)
2389
char *s = PyString_AS_STRING(self), *s_new;
2390
Py_ssize_t i, n = PyString_GET_SIZE(self);
2393
newobj = PyString_FromStringAndSize(NULL, n);
2396
s_new = PyString_AsString(newobj);
2397
for (i = 0; i < n; i++) {
2398
int c = Py_CHARMASK(*s++);
2400
*s_new = toupper(c);
2402
else if (isupper(c)) {
2403
*s_new = tolower(c);
2413
PyDoc_STRVAR(translate__doc__,
2414
"S.translate(table [,deletechars]) -> string\n\
2416
Return a copy of the string S, where all characters occurring\n\
2417
in the optional argument deletechars are removed, and the\n\
2418
remaining characters have been mapped through the given\n\
2419
translation table, which must be a string of length 256.");
2422
string_translate(PyStringObject *self, PyObject *args)
2424
register char *input, *output;
2426
register Py_ssize_t i, c, changed = 0;
2427
PyObject *input_obj = (PyObject*)self;
2428
const char *output_start, *del_table=NULL;
2429
Py_ssize_t inlen, tablen, dellen = 0;
2431
int trans_table[256];
2432
PyObject *tableobj, *delobj = NULL;
2434
if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2435
&tableobj, &delobj))
2438
if (PyString_Check(tableobj)) {
2439
table = PyString_AS_STRING(tableobj);
2440
tablen = PyString_GET_SIZE(tableobj);
2442
else if (tableobj == Py_None) {
2446
#ifdef Py_USING_UNICODE
2447
else if (PyUnicode_Check(tableobj)) {
2448
/* Unicode .translate() does not support the deletechars
2449
parameter; instead a mapping to None will cause characters
2451
if (delobj != NULL) {
2452
PyErr_SetString(PyExc_TypeError,
2453
"deletions are implemented differently for unicode");
2456
return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2459
else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2462
if (tablen != 256) {
2463
PyErr_SetString(PyExc_ValueError,
2464
"translation table must be 256 characters long");
2468
if (delobj != NULL) {
2469
if (PyString_Check(delobj)) {
2470
del_table = PyString_AS_STRING(delobj);
2471
dellen = PyString_GET_SIZE(delobj);
2473
#ifdef Py_USING_UNICODE
2474
else if (PyUnicode_Check(delobj)) {
2475
PyErr_SetString(PyExc_TypeError,
2476
"deletions are implemented differently for unicode");
2480
else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2488
inlen = PyString_GET_SIZE(input_obj);
2489
result = PyString_FromStringAndSize((char *)NULL, inlen);
2492
output_start = output = PyString_AsString(result);
2493
input = PyString_AS_STRING(input_obj);
2495
if (dellen == 0 && table != NULL) {
2496
/* If no deletions are required, use faster code */
2497
for (i = inlen; --i >= 0; ) {
2498
c = Py_CHARMASK(*input++);
2499
if (Py_CHARMASK((*output++ = table[c])) != c)
2502
if (changed || !PyString_CheckExact(input_obj))
2505
Py_INCREF(input_obj);
2509
if (table == NULL) {
2510
for (i = 0; i < 256; i++)
2511
trans_table[i] = Py_CHARMASK(i);
2513
for (i = 0; i < 256; i++)
2514
trans_table[i] = Py_CHARMASK(table[i]);
2517
for (i = 0; i < dellen; i++)
2518
trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2520
for (i = inlen; --i >= 0; ) {
2521
c = Py_CHARMASK(*input++);
2522
if (trans_table[c] != -1)
2523
if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2527
if (!changed && PyString_CheckExact(input_obj)) {
2529
Py_INCREF(input_obj);
2532
/* Fix the size of the resulting string */
2534
_PyString_Resize(&result, output - output_start);
2542
/* find and count characters and substrings */
2544
#define findchar(target, target_len, c) \
2545
((char *)memchr((const void *)(target), c, target_len))
2547
/* String ops must return a string. */
2548
/* If the object is subclass of string, create a copy */
2549
Py_LOCAL(PyStringObject *)
2550
return_self(PyStringObject *self)
2552
if (PyString_CheckExact(self)) {
2556
return (PyStringObject *)PyString_FromStringAndSize(
2557
PyString_AS_STRING(self),
2558
PyString_GET_SIZE(self));
2561
Py_LOCAL_INLINE(Py_ssize_t)
2562
countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2565
const char *start=target;
2566
const char *end=target+target_len;
2568
while ( (start=findchar(start, end-start, c)) != NULL ) {
2570
if (count >= maxcount)
2577
Py_LOCAL(Py_ssize_t)
2578
findstring(const char *target, Py_ssize_t target_len,
2579
const char *pattern, Py_ssize_t pattern_len,
2585
start += target_len;
2589
if (end > target_len) {
2591
} else if (end < 0) {
2597
/* zero-length substrings always match at the first attempt */
2598
if (pattern_len == 0)
2599
return (direction > 0) ? start : end;
2603
if (direction < 0) {
2604
for (; end >= start; end--)
2605
if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2608
for (; start <= end; start++)
2609
if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2615
Py_LOCAL_INLINE(Py_ssize_t)
2616
countstring(const char *target, Py_ssize_t target_len,
2617
const char *pattern, Py_ssize_t pattern_len,
2620
int direction, Py_ssize_t maxcount)
2625
start += target_len;
2629
if (end > target_len) {
2631
} else if (end < 0) {
2637
/* zero-length substrings match everywhere */
2638
if (pattern_len == 0 || maxcount == 0) {
2639
if (target_len+1 < maxcount)
2640
return target_len+1;
2645
if (direction < 0) {
2646
for (; (end >= start); end--)
2647
if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2649
if (--maxcount <= 0) break;
2650
end -= pattern_len-1;
2653
for (; (start <= end); start++)
2654
if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2656
if (--maxcount <= 0)
2658
start += pattern_len-1;
2665
/* Algorithms for different cases of string replacement */
2667
/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2668
Py_LOCAL(PyStringObject *)
2669
replace_interleave(PyStringObject *self,
2670
const char *to_s, Py_ssize_t to_len,
2671
Py_ssize_t maxcount)
2673
char *self_s, *result_s;
2674
Py_ssize_t self_len, result_len;
2675
Py_ssize_t count, i, product;
2676
PyStringObject *result;
2678
self_len = PyString_GET_SIZE(self);
2680
/* 1 at the end plus 1 after every character */
2682
if (maxcount < count)
2685
/* Check for overflow */
2686
/* result_len = count * to_len + self_len; */
2687
product = count * to_len;
2688
if (product / to_len != count) {
2689
PyErr_SetString(PyExc_OverflowError,
2690
"replace string is too long");
2693
result_len = product + self_len;
2694
if (result_len < 0) {
2695
PyErr_SetString(PyExc_OverflowError,
2696
"replace string is too long");
2700
if (! (result = (PyStringObject *)
2701
PyString_FromStringAndSize(NULL, result_len)) )
2704
self_s = PyString_AS_STRING(self);
2705
result_s = PyString_AS_STRING(result);
2707
/* TODO: special case single character, which doesn't need memcpy */
2709
/* Lay the first one down (guaranteed this will occur) */
2710
Py_MEMCPY(result_s, to_s, to_len);
2714
for (i=0; i<count; i++) {
2715
*result_s++ = *self_s++;
2716
Py_MEMCPY(result_s, to_s, to_len);
2720
/* Copy the rest of the original string */
2721
Py_MEMCPY(result_s, self_s, self_len-i);
2726
/* Special case for deleting a single character */
2727
/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2728
Py_LOCAL(PyStringObject *)
2729
replace_delete_single_character(PyStringObject *self,
2730
char from_c, Py_ssize_t maxcount)
2732
char *self_s, *result_s;
2733
char *start, *next, *end;
2734
Py_ssize_t self_len, result_len;
2736
PyStringObject *result;
2738
self_len = PyString_GET_SIZE(self);
2739
self_s = PyString_AS_STRING(self);
2741
count = countchar(self_s, self_len, from_c, maxcount);
2743
return return_self(self);
2746
result_len = self_len - count; /* from_len == 1 */
2747
assert(result_len>=0);
2749
if ( (result = (PyStringObject *)
2750
PyString_FromStringAndSize(NULL, result_len)) == NULL)
2752
result_s = PyString_AS_STRING(result);
2755
end = self_s + self_len;
2756
while (count-- > 0) {
2757
next = findchar(start, end-start, from_c);
2760
Py_MEMCPY(result_s, start, next-start);
2761
result_s += (next-start);
2764
Py_MEMCPY(result_s, start, end-start);
2769
/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2771
Py_LOCAL(PyStringObject *)
2772
replace_delete_substring(PyStringObject *self,
2773
const char *from_s, Py_ssize_t from_len,
2774
Py_ssize_t maxcount) {
2775
char *self_s, *result_s;
2776
char *start, *next, *end;
2777
Py_ssize_t self_len, result_len;
2778
Py_ssize_t count, offset;
2779
PyStringObject *result;
2781
self_len = PyString_GET_SIZE(self);
2782
self_s = PyString_AS_STRING(self);
2784
count = countstring(self_s, self_len,
2791
return return_self(self);
2794
result_len = self_len - (count * from_len);
2795
assert (result_len>=0);
2797
if ( (result = (PyStringObject *)
2798
PyString_FromStringAndSize(NULL, result_len)) == NULL )
2801
result_s = PyString_AS_STRING(result);
2804
end = self_s + self_len;
2805
while (count-- > 0) {
2806
offset = findstring(start, end-start,
2808
0, end-start, FORWARD);
2811
next = start + offset;
2813
Py_MEMCPY(result_s, start, next-start);
2815
result_s += (next-start);
2816
start = next+from_len;
2818
Py_MEMCPY(result_s, start, end-start);
2822
/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2823
Py_LOCAL(PyStringObject *)
2824
replace_single_character_in_place(PyStringObject *self,
2825
char from_c, char to_c,
2826
Py_ssize_t maxcount)
2828
char *self_s, *result_s, *start, *end, *next;
2829
Py_ssize_t self_len;
2830
PyStringObject *result;
2832
/* The result string will be the same size */
2833
self_s = PyString_AS_STRING(self);
2834
self_len = PyString_GET_SIZE(self);
2836
next = findchar(self_s, self_len, from_c);
2839
/* No matches; return the original string */
2840
return return_self(self);
2843
/* Need to make a new string */
2844
result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2847
result_s = PyString_AS_STRING(result);
2848
Py_MEMCPY(result_s, self_s, self_len);
2850
/* change everything in-place, starting with this one */
2851
start = result_s + (next-self_s);
2854
end = result_s + self_len;
2856
while (--maxcount > 0) {
2857
next = findchar(start, end-start, from_c);
2867
/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2868
Py_LOCAL(PyStringObject *)
2869
replace_substring_in_place(PyStringObject *self,
2870
const char *from_s, Py_ssize_t from_len,
2871
const char *to_s, Py_ssize_t to_len,
2872
Py_ssize_t maxcount)
2874
char *result_s, *start, *end;
2876
Py_ssize_t self_len, offset;
2877
PyStringObject *result;
2879
/* The result string will be the same size */
2881
self_s = PyString_AS_STRING(self);
2882
self_len = PyString_GET_SIZE(self);
2884
offset = findstring(self_s, self_len,
2886
0, self_len, FORWARD);
2888
/* No matches; return the original string */
2889
return return_self(self);
2892
/* Need to make a new string */
2893
result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2896
result_s = PyString_AS_STRING(result);
2897
Py_MEMCPY(result_s, self_s, self_len);
2899
/* change everything in-place, starting with this one */
2900
start = result_s + offset;
2901
Py_MEMCPY(start, to_s, from_len);
2903
end = result_s + self_len;
2905
while ( --maxcount > 0) {
2906
offset = findstring(start, end-start,
2908
0, end-start, FORWARD);
2911
Py_MEMCPY(start+offset, to_s, from_len);
2912
start += offset+from_len;
2918
/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2919
Py_LOCAL(PyStringObject *)
2920
replace_single_character(PyStringObject *self,
2922
const char *to_s, Py_ssize_t to_len,
2923
Py_ssize_t maxcount)
2925
char *self_s, *result_s;
2926
char *start, *next, *end;
2927
Py_ssize_t self_len, result_len;
2928
Py_ssize_t count, product;
2929
PyStringObject *result;
2931
self_s = PyString_AS_STRING(self);
2932
self_len = PyString_GET_SIZE(self);
2934
count = countchar(self_s, self_len, from_c, maxcount);
2936
/* no matches, return unchanged */
2937
return return_self(self);
2940
/* use the difference between current and new, hence the "-1" */
2941
/* result_len = self_len + count * (to_len-1) */
2942
product = count * (to_len-1);
2943
if (product / (to_len-1) != count) {
2944
PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2947
result_len = self_len + product;
2948
if (result_len < 0) {
2949
PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2953
if ( (result = (PyStringObject *)
2954
PyString_FromStringAndSize(NULL, result_len)) == NULL)
2956
result_s = PyString_AS_STRING(result);
2959
end = self_s + self_len;
2960
while (count-- > 0) {
2961
next = findchar(start, end-start, from_c);
2965
if (next == start) {
2966
/* replace with the 'to' */
2967
Py_MEMCPY(result_s, to_s, to_len);
2971
/* copy the unchanged old then the 'to' */
2972
Py_MEMCPY(result_s, start, next-start);
2973
result_s += (next-start);
2974
Py_MEMCPY(result_s, to_s, to_len);
2979
/* Copy the remainder of the remaining string */
2980
Py_MEMCPY(result_s, start, end-start);
2985
/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2986
Py_LOCAL(PyStringObject *)
2987
replace_substring(PyStringObject *self,
2988
const char *from_s, Py_ssize_t from_len,
2989
const char *to_s, Py_ssize_t to_len,
2990
Py_ssize_t maxcount) {
2991
char *self_s, *result_s;
2992
char *start, *next, *end;
2993
Py_ssize_t self_len, result_len;
2994
Py_ssize_t count, offset, product;
2995
PyStringObject *result;
2997
self_s = PyString_AS_STRING(self);
2998
self_len = PyString_GET_SIZE(self);
3000
count = countstring(self_s, self_len,
3002
0, self_len, FORWARD, maxcount);
3004
/* no matches, return unchanged */
3005
return return_self(self);
3008
/* Check for overflow */
3009
/* result_len = self_len + count * (to_len-from_len) */
3010
product = count * (to_len-from_len);
3011
if (product / (to_len-from_len) != count) {
3012
PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3015
result_len = self_len + product;
3016
if (result_len < 0) {
3017
PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3021
if ( (result = (PyStringObject *)
3022
PyString_FromStringAndSize(NULL, result_len)) == NULL)
3024
result_s = PyString_AS_STRING(result);
3027
end = self_s + self_len;
3028
while (count-- > 0) {
3029
offset = findstring(start, end-start,
3031
0, end-start, FORWARD);
3034
next = start+offset;
3035
if (next == start) {
3036
/* replace with the 'to' */
3037
Py_MEMCPY(result_s, to_s, to_len);
3041
/* copy the unchanged old then the 'to' */
3042
Py_MEMCPY(result_s, start, next-start);
3043
result_s += (next-start);
3044
Py_MEMCPY(result_s, to_s, to_len);
3046
start = next+from_len;
3049
/* Copy the remainder of the remaining string */
3050
Py_MEMCPY(result_s, start, end-start);
3056
Py_LOCAL(PyStringObject *)
3057
replace(PyStringObject *self,
3058
const char *from_s, Py_ssize_t from_len,
3059
const char *to_s, Py_ssize_t to_len,
3060
Py_ssize_t maxcount)
3063
maxcount = PY_SSIZE_T_MAX;
3064
} else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
3065
/* nothing to do; return the original string */
3066
return return_self(self);
3069
if (maxcount == 0 ||
3070
(from_len == 0 && to_len == 0)) {
3071
/* nothing to do; return the original string */
3072
return return_self(self);
3075
/* Handle zero-length special cases */
3077
if (from_len == 0) {
3078
/* insert the 'to' string everywhere. */
3079
/* >>> "Python".replace("", ".") */
3080
/* '.P.y.t.h.o.n.' */
3081
return replace_interleave(self, to_s, to_len, maxcount);
3084
/* Except for "".replace("", "A") == "A" there is no way beyond this */
3085
/* point for an empty self string to generate a non-empty string */
3086
/* Special case so the remaining code always gets a non-empty string */
3087
if (PyString_GET_SIZE(self) == 0) {
3088
return return_self(self);
3092
/* delete all occurances of 'from' string */
3093
if (from_len == 1) {
3094
return replace_delete_single_character(
3095
self, from_s[0], maxcount);
3097
return replace_delete_substring(self, from_s, from_len, maxcount);
3101
/* Handle special case where both strings have the same length */
3103
if (from_len == to_len) {
3104
if (from_len == 1) {
3105
return replace_single_character_in_place(
3111
return replace_substring_in_place(
3112
self, from_s, from_len, to_s, to_len, maxcount);
3116
/* Otherwise use the more generic algorithms */
3117
if (from_len == 1) {
3118
return replace_single_character(self, from_s[0],
3119
to_s, to_len, maxcount);
3121
/* len('from')>=2, len('to')>=1 */
3122
return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
3126
PyDoc_STRVAR(replace__doc__,
3127
"S.replace (old, new[, count]) -> string\n\
3129
Return a copy of string S with all occurrences of substring\n\
3130
old replaced by new. If the optional argument count is\n\
3131
given, only the first count occurrences are replaced.");
3134
string_replace(PyStringObject *self, PyObject *args)
3136
Py_ssize_t count = -1;
3137
PyObject *from, *to;
3138
const char *from_s, *to_s;
3139
Py_ssize_t from_len, to_len;
3141
if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
3144
if (PyString_Check(from)) {
3145
from_s = PyString_AS_STRING(from);
3146
from_len = PyString_GET_SIZE(from);
3148
#ifdef Py_USING_UNICODE
3149
if (PyUnicode_Check(from))
3150
return PyUnicode_Replace((PyObject *)self,
3153
else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
3156
if (PyString_Check(to)) {
3157
to_s = PyString_AS_STRING(to);
3158
to_len = PyString_GET_SIZE(to);
3160
#ifdef Py_USING_UNICODE
3161
else if (PyUnicode_Check(to))
3162
return PyUnicode_Replace((PyObject *)self,
3165
else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
3168
return (PyObject *)replace((PyStringObject *) self,
3170
to_s, to_len, count);
3175
/* Matches the end (direction >= 0) or start (direction < 0) of self
3176
* against substr, using the start and end arguments. Returns
3177
* -1 on error, 0 if not found and 1 if found.
3180
_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3181
Py_ssize_t end, int direction)
3183
Py_ssize_t len = PyString_GET_SIZE(self);
3188
if (PyString_Check(substr)) {
3189
sub = PyString_AS_STRING(substr);
3190
slen = PyString_GET_SIZE(substr);
3192
#ifdef Py_USING_UNICODE
3193
else if (PyUnicode_Check(substr))
3194
return PyUnicode_Tailmatch((PyObject *)self,
3195
substr, start, end, direction);
3197
else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3199
str = PyString_AS_STRING(self);
3201
string_adjust_indices(&start, &end, len);
3203
if (direction < 0) {
3205
if (start+slen > len)
3209
if (end-start < slen || start > len)
3212
if (end-slen > start)
3215
if (end-start >= slen)
3216
return ! memcmp(str+start, sub, slen);
3221
PyDoc_STRVAR(startswith__doc__,
3222
"S.startswith(prefix[, start[, end]]) -> bool\n\
3224
Return True if S starts with the specified prefix, False otherwise.\n\
3225
With optional start, test S beginning at that position.\n\
3226
With optional end, stop comparing S at that position.\n\
3227
prefix can also be a tuple of strings to try.");
3230
string_startswith(PyStringObject *self, PyObject *args)
3232
Py_ssize_t start = 0;
3233
Py_ssize_t end = PY_SSIZE_T_MAX;
3237
if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3238
_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3240
if (PyTuple_Check(subobj)) {
3242
for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3243
result = _string_tailmatch(self,
3244
PyTuple_GET_ITEM(subobj, i),
3254
result = _string_tailmatch(self, subobj, start, end, -1);
3258
return PyBool_FromLong(result);
3262
PyDoc_STRVAR(endswith__doc__,
3263
"S.endswith(suffix[, start[, end]]) -> bool\n\
3265
Return True if S ends with the specified suffix, False otherwise.\n\
3266
With optional start, test S beginning at that position.\n\
3267
With optional end, stop comparing S at that position.\n\
3268
suffix can also be a tuple of strings to try.");
3271
string_endswith(PyStringObject *self, PyObject *args)
3273
Py_ssize_t start = 0;
3274
Py_ssize_t end = PY_SSIZE_T_MAX;
3278
if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3279
_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3281
if (PyTuple_Check(subobj)) {
3283
for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3284
result = _string_tailmatch(self,
3285
PyTuple_GET_ITEM(subobj, i),
3295
result = _string_tailmatch(self, subobj, start, end, +1);
3299
return PyBool_FromLong(result);
3303
PyDoc_STRVAR(encode__doc__,
3304
"S.encode([encoding[,errors]]) -> object\n\
3306
Encodes S using the codec registered for encoding. encoding defaults\n\
3307
to the default encoding. errors may be given to set a different error\n\
3308
handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3309
a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3310
'xmlcharrefreplace' as well as any other name registered with\n\
3311
codecs.register_error that is able to handle UnicodeEncodeErrors.");
3314
string_encode(PyStringObject *self, PyObject *args)
3316
char *encoding = NULL;
3317
char *errors = NULL;
3320
if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3322
v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
3325
if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3326
PyErr_Format(PyExc_TypeError,
3327
"encoder did not return a string/unicode object "
3329
Py_TYPE(v)->tp_name);
3340
PyDoc_STRVAR(decode__doc__,
3341
"S.decode([encoding[,errors]]) -> object\n\
3343
Decodes S using the codec registered for encoding. encoding defaults\n\
3344
to the default encoding. errors may be given to set a different error\n\
3345
handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3346
a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3347
as well as any other name registerd with codecs.register_error that is\n\
3348
able to handle UnicodeDecodeErrors.");
3351
string_decode(PyStringObject *self, PyObject *args)
3353
char *encoding = NULL;
3354
char *errors = NULL;
3357
if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3359
v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
3362
if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3363
PyErr_Format(PyExc_TypeError,
3364
"decoder did not return a string/unicode object "
3366
Py_TYPE(v)->tp_name);
3377
PyDoc_STRVAR(expandtabs__doc__,
3378
"S.expandtabs([tabsize]) -> string\n\
3380
Return a copy of S where all tab characters are expanded using spaces.\n\
3381
If tabsize is not given, a tab size of 8 characters is assumed.");
3384
string_expandtabs(PyStringObject *self, PyObject *args)
3386
const char *e, *p, *qe;
3388
Py_ssize_t i, j, incr;
3392
if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3395
/* First pass: determine size of output string */
3396
i = 0; /* chars up to and including most recent \n or \r */
3397
j = 0; /* chars since most recent \n or \r (use in tab calculations) */
3398
e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3399
for (p = PyString_AS_STRING(self); p < e; p++)
3402
incr = tabsize - (j % tabsize);
3403
if (j > PY_SSIZE_T_MAX - incr)
3409
if (j > PY_SSIZE_T_MAX - 1)
3412
if (*p == '\n' || *p == '\r') {
3413
if (i > PY_SSIZE_T_MAX - j)
3420
if (i > PY_SSIZE_T_MAX - j)
3423
/* Second pass: create output string and fill it */
3424
u = PyString_FromStringAndSize(NULL, i + j);
3428
j = 0; /* same as in first pass */
3429
q = PyString_AS_STRING(u); /* next output char */
3430
qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
3432
for (p = PyString_AS_STRING(self); p < e; p++)
3435
i = tabsize - (j % tabsize);
3449
if (*p == '\n' || *p == '\r')
3458
PyErr_SetString(PyExc_OverflowError, "new string is too long");
3462
Py_LOCAL_INLINE(PyObject *)
3463
pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
3472
if (left == 0 && right == 0 && PyString_CheckExact(self)) {
3474
return (PyObject *)self;
3477
u = PyString_FromStringAndSize(NULL,
3478
left + PyString_GET_SIZE(self) + right);
3481
memset(PyString_AS_STRING(u), fill, left);
3482
Py_MEMCPY(PyString_AS_STRING(u) + left,
3483
PyString_AS_STRING(self),
3484
PyString_GET_SIZE(self));
3486
memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3493
PyDoc_STRVAR(ljust__doc__,
3494
"S.ljust(width[, fillchar]) -> string\n"
3496
"Return S left justified in a string of length width. Padding is\n"
3497
"done using the specified fill character (default is a space).");
3500
string_ljust(PyStringObject *self, PyObject *args)
3503
char fillchar = ' ';
3505
if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
3508
if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3510
return (PyObject*) self;
3513
return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
3517
PyDoc_STRVAR(rjust__doc__,
3518
"S.rjust(width[, fillchar]) -> string\n"
3520
"Return S right justified in a string of length width. Padding is\n"
3521
"done using the specified fill character (default is a space)");
3524
string_rjust(PyStringObject *self, PyObject *args)
3527
char fillchar = ' ';
3529
if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
3532
if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3534
return (PyObject*) self;
3537
return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
3541
PyDoc_STRVAR(center__doc__,
3542
"S.center(width[, fillchar]) -> string\n"
3544
"Return S centered in a string of length width. Padding is\n"
3545
"done using the specified fill character (default is a space)");
3548
string_center(PyStringObject *self, PyObject *args)
3550
Py_ssize_t marg, left;
3552
char fillchar = ' ';
3554
if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
3557
if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3559
return (PyObject*) self;
3562
marg = width - PyString_GET_SIZE(self);
3563
left = marg / 2 + (marg & width & 1);
3565
return pad(self, left, marg - left, fillchar);
3568
PyDoc_STRVAR(zfill__doc__,
3569
"S.zfill(width) -> string\n"
3571
"Pad a numeric string S with zeros on the left, to fill a field\n"
3572
"of the specified width. The string S is never truncated.");
3575
string_zfill(PyStringObject *self, PyObject *args)
3582
if (!PyArg_ParseTuple(args, "n:zfill", &width))
3585
if (PyString_GET_SIZE(self) >= width) {
3586
if (PyString_CheckExact(self)) {
3588
return (PyObject*) self;
3591
return PyString_FromStringAndSize(
3592
PyString_AS_STRING(self),
3593
PyString_GET_SIZE(self)
3597
fill = width - PyString_GET_SIZE(self);
3599
s = pad(self, fill, 0, '0');
3604
p = PyString_AS_STRING(s);
3605
if (p[fill] == '+' || p[fill] == '-') {
3606
/* move sign to beginning of string */
3611
return (PyObject*) s;
3614
PyDoc_STRVAR(isspace__doc__,
3615
"S.isspace() -> bool\n\
3617
Return True if all characters in S are whitespace\n\
3618
and there is at least one character in S, False otherwise.");
3621
string_isspace(PyStringObject *self)
3623
register const unsigned char *p
3624
= (unsigned char *) PyString_AS_STRING(self);
3625
register const unsigned char *e;
3627
/* Shortcut for single character strings */
3628
if (PyString_GET_SIZE(self) == 1 &&
3630
return PyBool_FromLong(1);
3632
/* Special case for empty strings */
3633
if (PyString_GET_SIZE(self) == 0)
3634
return PyBool_FromLong(0);
3636
e = p + PyString_GET_SIZE(self);
3637
for (; p < e; p++) {
3639
return PyBool_FromLong(0);
3641
return PyBool_FromLong(1);
3645
PyDoc_STRVAR(isalpha__doc__,
3646
"S.isalpha() -> bool\n\
3648
Return True if all characters in S are alphabetic\n\
3649
and there is at least one character in S, False otherwise.");
3652
string_isalpha(PyStringObject *self)
3654
register const unsigned char *p
3655
= (unsigned char *) PyString_AS_STRING(self);
3656
register const unsigned char *e;
3658
/* Shortcut for single character strings */
3659
if (PyString_GET_SIZE(self) == 1 &&
3661
return PyBool_FromLong(1);
3663
/* Special case for empty strings */
3664
if (PyString_GET_SIZE(self) == 0)
3665
return PyBool_FromLong(0);
3667
e = p + PyString_GET_SIZE(self);
3668
for (; p < e; p++) {
3670
return PyBool_FromLong(0);
3672
return PyBool_FromLong(1);
3676
PyDoc_STRVAR(isalnum__doc__,
3677
"S.isalnum() -> bool\n\
3679
Return True if all characters in S are alphanumeric\n\
3680
and there is at least one character in S, False otherwise.");
3683
string_isalnum(PyStringObject *self)
3685
register const unsigned char *p
3686
= (unsigned char *) PyString_AS_STRING(self);
3687
register const unsigned char *e;
3689
/* Shortcut for single character strings */
3690
if (PyString_GET_SIZE(self) == 1 &&
3692
return PyBool_FromLong(1);
3694
/* Special case for empty strings */
3695
if (PyString_GET_SIZE(self) == 0)
3696
return PyBool_FromLong(0);
3698
e = p + PyString_GET_SIZE(self);
3699
for (; p < e; p++) {
3701
return PyBool_FromLong(0);
3703
return PyBool_FromLong(1);
3707
PyDoc_STRVAR(isdigit__doc__,
3708
"S.isdigit() -> bool\n\
3710
Return True if all characters in S are digits\n\
3711
and there is at least one character in S, False otherwise.");
3714
string_isdigit(PyStringObject *self)
3716
register const unsigned char *p
3717
= (unsigned char *) PyString_AS_STRING(self);
3718
register const unsigned char *e;
3720
/* Shortcut for single character strings */
3721
if (PyString_GET_SIZE(self) == 1 &&
3723
return PyBool_FromLong(1);
3725
/* Special case for empty strings */
3726
if (PyString_GET_SIZE(self) == 0)
3727
return PyBool_FromLong(0);
3729
e = p + PyString_GET_SIZE(self);
3730
for (; p < e; p++) {
3732
return PyBool_FromLong(0);
3734
return PyBool_FromLong(1);
3738
PyDoc_STRVAR(islower__doc__,
3739
"S.islower() -> bool\n\
3741
Return True if all cased characters in S are lowercase and there is\n\
3742
at least one cased character in S, False otherwise.");
3745
string_islower(PyStringObject *self)
3747
register const unsigned char *p
3748
= (unsigned char *) PyString_AS_STRING(self);
3749
register const unsigned char *e;
3752
/* Shortcut for single character strings */
3753
if (PyString_GET_SIZE(self) == 1)
3754
return PyBool_FromLong(islower(*p) != 0);
3756
/* Special case for empty strings */
3757
if (PyString_GET_SIZE(self) == 0)
3758
return PyBool_FromLong(0);
3760
e = p + PyString_GET_SIZE(self);
3762
for (; p < e; p++) {
3764
return PyBool_FromLong(0);
3765
else if (!cased && islower(*p))
3768
return PyBool_FromLong(cased);
3772
PyDoc_STRVAR(isupper__doc__,
3773
"S.isupper() -> bool\n\
3775
Return True if all cased characters in S are uppercase and there is\n\
3776
at least one cased character in S, False otherwise.");
3779
string_isupper(PyStringObject *self)
3781
register const unsigned char *p
3782
= (unsigned char *) PyString_AS_STRING(self);
3783
register const unsigned char *e;
3786
/* Shortcut for single character strings */
3787
if (PyString_GET_SIZE(self) == 1)
3788
return PyBool_FromLong(isupper(*p) != 0);
3790
/* Special case for empty strings */
3791
if (PyString_GET_SIZE(self) == 0)
3792
return PyBool_FromLong(0);
3794
e = p + PyString_GET_SIZE(self);
3796
for (; p < e; p++) {
3798
return PyBool_FromLong(0);
3799
else if (!cased && isupper(*p))
3802
return PyBool_FromLong(cased);
3806
PyDoc_STRVAR(istitle__doc__,
3807
"S.istitle() -> bool\n\
3809
Return True if S is a titlecased string and there is at least one\n\
3810
character in S, i.e. uppercase characters may only follow uncased\n\
3811
characters and lowercase characters only cased ones. Return False\n\
3815
string_istitle(PyStringObject *self, PyObject *uncased)
3817
register const unsigned char *p
3818
= (unsigned char *) PyString_AS_STRING(self);
3819
register const unsigned char *e;
3820
int cased, previous_is_cased;
3822
/* Shortcut for single character strings */
3823
if (PyString_GET_SIZE(self) == 1)
3824
return PyBool_FromLong(isupper(*p) != 0);
3826
/* Special case for empty strings */
3827
if (PyString_GET_SIZE(self) == 0)
3828
return PyBool_FromLong(0);
3830
e = p + PyString_GET_SIZE(self);
3832
previous_is_cased = 0;
3833
for (; p < e; p++) {
3834
register const unsigned char ch = *p;
3837
if (previous_is_cased)
3838
return PyBool_FromLong(0);
3839
previous_is_cased = 1;
3842
else if (islower(ch)) {
3843
if (!previous_is_cased)
3844
return PyBool_FromLong(0);
3845
previous_is_cased = 1;
3849
previous_is_cased = 0;
3851
return PyBool_FromLong(cased);
3855
PyDoc_STRVAR(splitlines__doc__,
3856
"S.splitlines([keepends]) -> list of strings\n\
3858
Return a list of the lines in S, breaking at line boundaries.\n\
3859
Line breaks are not included in the resulting list unless keepends\n\
3860
is given and true.");
3863
string_splitlines(PyStringObject *self, PyObject *args)
3865
register Py_ssize_t i;
3866
register Py_ssize_t j;
3873
if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
3876
data = PyString_AS_STRING(self);
3877
len = PyString_GET_SIZE(self);
3879
/* This does not use the preallocated list because splitlines is
3880
usually run with hundreds of newlines. The overhead of
3881
switching between PyList_SET_ITEM and append causes about a
3882
2-3% slowdown for that common case. A smarter implementation
3883
could move the if check out, so the SET_ITEMs are done first
3884
and the appends only done when the prealloc buffer is full.
3885
That's too much work for little gain.*/
3887
list = PyList_New(0);
3891
for (i = j = 0; i < len; ) {
3894
/* Find a line and append it */
3895
while (i < len && data[i] != '\n' && data[i] != '\r')
3898
/* Skip the line break reading CRLF as one line break */
3901
if (data[i] == '\r' && i + 1 < len &&
3909
SPLIT_APPEND(data, j, eol);
3913
SPLIT_APPEND(data, j, len);
3926
#undef PREALLOC_SIZE
3929
string_getnewargs(PyStringObject *v)
3931
return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
3935
#include "stringlib/string_format.h"
3937
PyDoc_STRVAR(format__doc__,
3938
"S.format(*args, **kwargs) -> unicode\n\
3942
PyDoc_STRVAR(p_format__doc__,
3943
"S.__format__(format_spec) -> unicode\n\
3949
string_methods[] = {
3950
/* Counterparts of the obsolete stropmodule functions; except
3951
string.maketrans(). */
3952
{"join", (PyCFunction)string_join, METH_O, join__doc__},
3953
{"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3954
{"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3955
{"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3956
{"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3957
{"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3958
{"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3959
{"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3960
{"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3961
{"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3962
{"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3963
{"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3964
{"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3966
{"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3967
{"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3969
{"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3970
{"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3971
{"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3972
{"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3973
{"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3974
{"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3975
{"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3976
{"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3977
{"rpartition", (PyCFunction)string_rpartition, METH_O,
3979
{"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3981
{"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3982
{"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3984
{"translate", (PyCFunction)string_translate, METH_VARARGS,
3986
{"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3987
{"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3988
{"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3989
{"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3990
{"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3991
{"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
3992
{"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
3993
{"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
3994
{"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
3995
{"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3996
{"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3997
{"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3999
{"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
4001
{"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
4002
{NULL, NULL} /* sentinel */
4006
str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
4009
string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4012
static char *kwlist[] = {"object", 0};
4014
if (type != &PyString_Type)
4015
return str_subtype_new(type, args, kwds);
4016
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
4019
return PyString_FromString("");
4020
return PyObject_Str(x);
4024
str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4026
PyObject *tmp, *pnew;
4029
assert(PyType_IsSubtype(type, &PyString_Type));
4030
tmp = string_new(&PyString_Type, args, kwds);
4033
assert(PyString_CheckExact(tmp));
4034
n = PyString_GET_SIZE(tmp);
4035
pnew = type->tp_alloc(type, n);
4037
Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
4038
((PyStringObject *)pnew)->ob_shash =
4039
((PyStringObject *)tmp)->ob_shash;
4040
((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
4047
basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4049
PyErr_SetString(PyExc_TypeError,
4050
"The basestring type cannot be instantiated");
4055
string_mod(PyObject *v, PyObject *w)
4057
if (!PyString_Check(v)) {
4058
Py_INCREF(Py_NotImplemented);
4059
return Py_NotImplemented;
4061
return PyString_Format(v, w);
4064
PyDoc_STRVAR(basestring_doc,
4065
"Type basestring cannot be instantiated; it is the base for str and unicode.");
4067
static PyNumberMethods string_as_number = {
4072
string_mod, /*nb_remainder*/
4076
PyTypeObject PyBaseString_Type = {
4077
PyVarObject_HEAD_INIT(&PyType_Type, 0)
4087
0, /* tp_as_number */
4088
0, /* tp_as_sequence */
4089
0, /* tp_as_mapping */
4093
0, /* tp_getattro */
4094
0, /* tp_setattro */
4095
0, /* tp_as_buffer */
4096
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4097
basestring_doc, /* tp_doc */
4098
0, /* tp_traverse */
4100
0, /* tp_richcompare */
4101
0, /* tp_weaklistoffset */
4103
0, /* tp_iternext */
4107
&PyBaseObject_Type, /* tp_base */
4109
0, /* tp_descr_get */
4110
0, /* tp_descr_set */
4111
0, /* tp_dictoffset */
4114
basestring_new, /* tp_new */
4118
PyDoc_STRVAR(string_doc,
4119
"str(object) -> string\n\
4121
Return a nice string representation of the object.\n\
4122
If the argument is a string, the return value is the same object.");
4124
PyTypeObject PyString_Type = {
4125
PyVarObject_HEAD_INIT(&PyType_Type, 0)
4127
sizeof(PyStringObject),
4129
string_dealloc, /* tp_dealloc */
4130
(printfunc)string_print, /* tp_print */
4134
string_repr, /* tp_repr */
4135
&string_as_number, /* tp_as_number */
4136
&string_as_sequence, /* tp_as_sequence */
4137
&string_as_mapping, /* tp_as_mapping */
4138
(hashfunc)string_hash, /* tp_hash */
4140
string_str, /* tp_str */
4141
PyObject_GenericGetAttr, /* tp_getattro */
4142
0, /* tp_setattro */
4143
&string_as_buffer, /* tp_as_buffer */
4144
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
4145
Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
4146
Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
4147
string_doc, /* tp_doc */
4148
0, /* tp_traverse */
4150
(richcmpfunc)string_richcompare, /* tp_richcompare */
4151
0, /* tp_weaklistoffset */
4153
0, /* tp_iternext */
4154
string_methods, /* tp_methods */
4157
&PyBaseString_Type, /* tp_base */
4159
0, /* tp_descr_get */
4160
0, /* tp_descr_set */
4161
0, /* tp_dictoffset */
4164
string_new, /* tp_new */
4165
PyObject_Del, /* tp_free */
4169
PyString_Concat(register PyObject **pv, register PyObject *w)
4171
register PyObject *v;
4174
if (w == NULL || !PyString_Check(*pv)) {
4179
v = string_concat((PyStringObject *) *pv, w);
4185
PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
4187
PyString_Concat(pv, w);
4192
/* The following function breaks the notion that strings are immutable:
4193
it changes the size of a string. We get away with this only if there
4194
is only one module referencing the object. You can also think of it
4195
as creating a new string object and destroying the old one, only
4196
more efficiently. In any case, don't use this if the string may
4197
already be known to some other part of the code...
4198
Note that if there's not enough memory to resize the string, the original
4199
string object at *pv is deallocated, *pv is set to NULL, an "out of
4200
memory" exception is set, and -1 is returned. Else (on success) 0 is
4201
returned, and the value in *pv may or may not be the same as on input.
4202
As always, an extra byte is allocated for a trailing \0 byte (newsize
4203
does *not* include that), and a trailing \0 byte is stored.
4207
_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
4209
register PyObject *v;
4210
register PyStringObject *sv;
4212
if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
4213
PyString_CHECK_INTERNED(v)) {
4216
PyErr_BadInternalCall();
4219
/* XXX UNREF/NEWREF interface should be more symmetrical */
4221
_Py_ForgetReference(v);
4223
PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
4229
_Py_NewReference(*pv);
4230
sv = (PyStringObject *) *pv;
4231
Py_SIZE(sv) = newsize;
4232
sv->ob_sval[newsize] = '\0';
4233
sv->ob_shash = -1; /* invalidate cached hash value */
4237
/* Helpers for formatstring */
4239
Py_LOCAL_INLINE(PyObject *)
4240
getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
4242
Py_ssize_t argidx = *p_argidx;
4243
if (argidx < arglen) {
4248
return PyTuple_GetItem(args, argidx);
4250
PyErr_SetString(PyExc_TypeError,
4251
"not enough arguments for format string");
4262
#define F_LJUST (1<<0)
4263
#define F_SIGN (1<<1)
4264
#define F_BLANK (1<<2)
4265
#define F_ALT (1<<3)
4266
#define F_ZERO (1<<4)
4268
Py_LOCAL_INLINE(int)
4269
formatfloat(char *buf, size_t buflen, int flags,
4270
int prec, int type, PyObject *v)
4272
/* fmt = '%#.' + `prec` + `type`
4273
worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
4276
x = PyFloat_AsDouble(v);
4277
if (x == -1.0 && PyErr_Occurred()) {
4278
PyErr_Format(PyExc_TypeError, "float argument required, "
4279
"not %.200s", Py_TYPE(v)->tp_name);
4284
if (type == 'f' && fabs(x)/1e25 >= 1e25)
4286
/* Worst case length calc to ensure no buffer overrun:
4290
buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
4291
for any double rep.)
4292
len = 1 + prec + 1 + 2 + 5 = 9 + prec
4295
buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4296
len = 1 + 50 + 1 + prec = 52 + prec
4298
If prec=0 the effective precision is 1 (the leading digit is
4299
always given), therefore increase the length by one.
4302
if (((type == 'g' || type == 'G') &&
4303
buflen <= (size_t)10 + (size_t)prec) ||
4304
(type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
4305
PyErr_SetString(PyExc_OverflowError,
4306
"formatted float is too long (precision too large?)");
4309
PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4310
(flags&F_ALT) ? "#" : "",
4312
PyOS_ascii_formatd(buf, buflen, fmt, x);
4313
return (int)strlen(buf);
4316
/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4317
* the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4318
* Python's regular ints.
4319
* Return value: a new PyString*, or NULL if error.
4320
* . *pbuf is set to point into it,
4321
* *plen set to the # of chars following that.
4322
* Caller must decref it when done using pbuf.
4323
* The string starting at *pbuf is of the form
4324
* "-"? ("0x" | "0X")? digit+
4325
* "0x"/"0X" are present only for x and X conversions, with F_ALT
4326
* set in flags. The case of hex digits will be correct,
4327
* There will be at least prec digits, zero-filled on the left if
4328
* necessary to get that many.
4329
* val object to be converted
4330
* flags bitmask of format flags; only F_ALT is looked at
4331
* prec minimum number of digits; 0-fill on left if needed
4332
* type a character in [duoxX]; u acts the same as d
4334
* CAUTION: o, x and X conversions on regular ints can never
4335
* produce a '-' sign, but can for Python's unbounded ints.
4338
_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4339
char **pbuf, int *plen)
4341
PyObject *result = NULL;
4344
int sign; /* 1 if '-', else 0 */
4345
int len; /* number of characters */
4347
int numdigits; /* len == numnondigits + numdigits */
4348
int numnondigits = 0;
4353
result = Py_TYPE(val)->tp_str(val);
4356
result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4361
result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4364
assert(!"'type' not in [duoxX]");
4369
buf = PyString_AsString(result);
4375
/* To modify the string in-place, there can only be one reference. */
4376
if (Py_REFCNT(result) != 1) {
4377
PyErr_BadInternalCall();
4380
llen = PyString_Size(result);
4381
if (llen > INT_MAX) {
4382
PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4386
if (buf[len-1] == 'L') {
4390
sign = buf[0] == '-';
4391
numnondigits += sign;
4392
numdigits = len - numnondigits;
4393
assert(numdigits > 0);
4395
/* Get rid of base marker unless F_ALT */
4396
if ((flags & F_ALT) == 0) {
4397
/* Need to skip 0x, 0X or 0. */
4401
assert(buf[sign] == '0');
4402
/* If 0 is only digit, leave it alone. */
4403
if (numdigits > 1) {
4410
assert(buf[sign] == '0');
4411
assert(buf[sign + 1] == 'x');
4422
assert(len == numnondigits + numdigits);
4423
assert(numdigits > 0);
4426
/* Fill with leading zeroes to meet minimum width. */
4427
if (prec > numdigits) {
4428
PyObject *r1 = PyString_FromStringAndSize(NULL,
4429
numnondigits + prec);
4435
b1 = PyString_AS_STRING(r1);
4436
for (i = 0; i < numnondigits; ++i)
4438
for (i = 0; i < prec - numdigits; i++)
4440
for (i = 0; i < numdigits; i++)
4445
buf = PyString_AS_STRING(result);
4446
len = numnondigits + prec;
4449
/* Fix up case for hex conversions. */
4451
/* Need to convert all lower case letters to upper case.
4452
and need to convert 0x to 0X (and -0x to -0X). */
4453
for (i = 0; i < len; i++)
4454
if (buf[i] >= 'a' && buf[i] <= 'x')
4462
Py_LOCAL_INLINE(int)
4463
formatint(char *buf, size_t buflen, int flags,
4464
int prec, int type, PyObject *v)
4466
/* fmt = '%#.' + `prec` + 'l' + `type`
4467
worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4469
char fmt[64]; /* plenty big enough! */
4473
x = PyInt_AsLong(v);
4474
if (x == -1 && PyErr_Occurred()) {
4475
PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4476
Py_TYPE(v)->tp_name);
4479
if (x < 0 && type == 'u') {
4482
if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4489
if ((flags & F_ALT) &&
4490
(type == 'x' || type == 'X')) {
4491
/* When converting under %#x or %#X, there are a number
4492
* of issues that cause pain:
4493
* - when 0 is being converted, the C standard leaves off
4494
* the '0x' or '0X', which is inconsistent with other
4495
* %#x/%#X conversions and inconsistent with Python's
4497
* - there are platforms that violate the standard and
4498
* convert 0 with the '0x' or '0X'
4499
* (Metrowerks, Compaq Tru64)
4500
* - there are platforms that give '0x' when converting
4501
* under %#X, but convert 0 in accordance with the
4502
* standard (OS/2 EMX)
4504
* We can achieve the desired consistency by inserting our
4505
* own '0x' or '0X' prefix, and substituting %x/%X in place
4508
* Note that this is the same approach as used in
4509
* formatint() in unicodeobject.c
4511
PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4512
sign, type, prec, type);
4515
PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4516
sign, (flags&F_ALT) ? "#" : "",
4520
/* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4521
* worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4523
if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4524
PyErr_SetString(PyExc_OverflowError,
4525
"formatted integer is too long (precision too large?)");
4529
PyOS_snprintf(buf, buflen, fmt, -x);
4531
PyOS_snprintf(buf, buflen, fmt, x);
4532
return (int)strlen(buf);
4535
Py_LOCAL_INLINE(int)
4536
formatchar(char *buf, size_t buflen, PyObject *v)
4538
/* presume that the buffer is at least 2 characters long */
4539
if (PyString_Check(v)) {
4540
if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4544
if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4551
/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4553
FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4554
chars are formatted. XXX This is a magic number. Each formatting
4555
routine does bounds checking to ensure no overflow, but a better
4556
solution may be to malloc a buffer of appropriate size for each
4557
format. For now, the current solution is sufficient.
4559
#define FORMATBUFLEN (size_t)120
4562
PyString_Format(PyObject *format, PyObject *args)
4565
Py_ssize_t arglen, argidx;
4566
Py_ssize_t reslen, rescnt, fmtcnt;
4568
PyObject *result, *orig_args;
4569
#ifdef Py_USING_UNICODE
4572
PyObject *dict = NULL;
4573
if (format == NULL || !PyString_Check(format) || args == NULL) {
4574
PyErr_BadInternalCall();
4578
fmt = PyString_AS_STRING(format);
4579
fmtcnt = PyString_GET_SIZE(format);
4580
reslen = rescnt = fmtcnt + 100;
4581
result = PyString_FromStringAndSize((char *)NULL, reslen);
4584
res = PyString_AsString(result);
4585
if (PyTuple_Check(args)) {
4586
arglen = PyTuple_GET_SIZE(args);
4593
if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
4594
!PyObject_TypeCheck(args, &PyBaseString_Type))
4596
while (--fmtcnt >= 0) {
4599
rescnt = fmtcnt + 100;
4601
if (_PyString_Resize(&result, reslen) < 0)
4603
res = PyString_AS_STRING(result)
4610
/* Got a format specifier */
4612
Py_ssize_t width = -1;
4618
PyObject *temp = NULL;
4622
char formatbuf[FORMATBUFLEN];
4623
/* For format{float,int,char}() */
4624
#ifdef Py_USING_UNICODE
4625
char *fmt_start = fmt;
4626
Py_ssize_t argidx_start = argidx;
4637
PyErr_SetString(PyExc_TypeError,
4638
"format requires a mapping");
4644
/* Skip over balanced parentheses */
4645
while (pcount > 0 && --fmtcnt >= 0) {
4648
else if (*fmt == '(')
4652
keylen = fmt - keystart - 1;
4653
if (fmtcnt < 0 || pcount > 0) {
4654
PyErr_SetString(PyExc_ValueError,
4655
"incomplete format key");
4658
key = PyString_FromStringAndSize(keystart,
4666
args = PyObject_GetItem(dict, key);
4675
while (--fmtcnt >= 0) {
4676
switch (c = *fmt++) {
4677
case '-': flags |= F_LJUST; continue;
4678
case '+': flags |= F_SIGN; continue;
4679
case ' ': flags |= F_BLANK; continue;
4680
case '#': flags |= F_ALT; continue;
4681
case '0': flags |= F_ZERO; continue;
4686
v = getnextarg(args, arglen, &argidx);
4689
if (!PyInt_Check(v)) {
4690
PyErr_SetString(PyExc_TypeError,
4694
width = PyInt_AsLong(v);
4702
else if (c >= 0 && isdigit(c)) {
4704
while (--fmtcnt >= 0) {
4705
c = Py_CHARMASK(*fmt++);
4708
if ((width*10) / 10 != width) {
4714
width = width*10 + (c - '0');
4722
v = getnextarg(args, arglen, &argidx);
4725
if (!PyInt_Check(v)) {
4731
prec = PyInt_AsLong(v);
4737
else if (c >= 0 && isdigit(c)) {
4739
while (--fmtcnt >= 0) {
4740
c = Py_CHARMASK(*fmt++);
4743
if ((prec*10) / 10 != prec) {
4749
prec = prec*10 + (c - '0');
4754
if (c == 'h' || c == 'l' || c == 'L') {
4760
PyErr_SetString(PyExc_ValueError,
4761
"incomplete format");
4765
v = getnextarg(args, arglen, &argidx);
4777
#ifdef Py_USING_UNICODE
4778
if (PyUnicode_Check(v)) {
4780
argidx = argidx_start;
4784
temp = _PyObject_Str(v);
4785
#ifdef Py_USING_UNICODE
4786
if (temp != NULL && PyUnicode_Check(temp)) {
4789
argidx = argidx_start;
4796
temp = PyObject_Repr(v);
4799
if (!PyString_Check(temp)) {
4800
PyErr_SetString(PyExc_TypeError,
4801
"%s argument has non-string str()");
4805
pbuf = PyString_AS_STRING(temp);
4806
len = PyString_GET_SIZE(temp);
4807
if (prec >= 0 && len > prec)
4819
if (PyNumber_Check(v)) {
4820
PyObject *iobj=NULL;
4822
if (PyInt_Check(v) || (PyLong_Check(v))) {
4827
iobj = PyNumber_Int(v);
4828
if (iobj==NULL) iobj = PyNumber_Long(v);
4831
if (PyInt_Check(iobj)) {
4834
len = formatint(pbuf,
4836
flags, prec, c, iobj);
4842
else if (PyLong_Check(iobj)) {
4846
temp = _PyString_FormatLong(iobj, flags,
4847
prec, c, &pbuf, &ilen);
4860
PyErr_Format(PyExc_TypeError,
4861
"%%%c format: a number is required, "
4862
"not %.200s", c, Py_TYPE(v)->tp_name);
4877
len = formatfloat(pbuf, sizeof(formatbuf),
4886
#ifdef Py_USING_UNICODE
4887
if (PyUnicode_Check(v)) {
4889
argidx = argidx_start;
4894
len = formatchar(pbuf, sizeof(formatbuf), v);
4899
PyErr_Format(PyExc_ValueError,
4900
"unsupported format character '%c' (0x%x) "
4903
(Py_ssize_t)(fmt - 1 -
4904
PyString_AsString(format)));
4908
if (*pbuf == '-' || *pbuf == '+') {
4912
else if (flags & F_SIGN)
4914
else if (flags & F_BLANK)
4921
if (rescnt - (sign != 0) < width) {
4923
rescnt = width + fmtcnt + 100;
4928
return PyErr_NoMemory();
4930
if (_PyString_Resize(&result, reslen) < 0) {
4934
res = PyString_AS_STRING(result)
4944
if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4945
assert(pbuf[0] == '0');
4946
assert(pbuf[1] == c);
4957
if (width > len && !(flags & F_LJUST)) {
4961
} while (--width > len);
4966
if ((flags & F_ALT) &&
4967
(c == 'x' || c == 'X')) {
4968
assert(pbuf[0] == '0');
4969
assert(pbuf[1] == c);
4974
Py_MEMCPY(res, pbuf, len);
4977
while (--width >= len) {
4981
if (dict && (argidx < arglen) && c != '%') {
4982
PyErr_SetString(PyExc_TypeError,
4983
"not all arguments converted during string formatting");
4990
if (argidx < arglen && !dict) {
4991
PyErr_SetString(PyExc_TypeError,
4992
"not all arguments converted during string formatting");
4998
_PyString_Resize(&result, reslen - rescnt);
5001
#ifdef Py_USING_UNICODE
5007
/* Fiddle args right (remove the first argidx arguments) */
5008
if (PyTuple_Check(orig_args) && argidx > 0) {
5010
Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
5015
PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
5017
PyTuple_SET_ITEM(v, n, w);
5021
Py_INCREF(orig_args);
5025
/* Take what we have of the result and let the Unicode formatting
5026
function format the rest of the input. */
5027
rescnt = res - PyString_AS_STRING(result);
5028
if (_PyString_Resize(&result, rescnt))
5030
fmtcnt = PyString_GET_SIZE(format) - \
5031
(fmt - PyString_AS_STRING(format));
5032
format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
5035
v = PyUnicode_Format(format, args);
5039
/* Paste what we have (result) to what the Unicode formatting
5040
function returned (v) and return the result (or error) */
5041
w = PyUnicode_Concat(result, v);
5046
#endif /* Py_USING_UNICODE */
5057
PyString_InternInPlace(PyObject **p)
5059
register PyStringObject *s = (PyStringObject *)(*p);
5061
if (s == NULL || !PyString_Check(s))
5062
Py_FatalError("PyString_InternInPlace: strings only please!");
5063
/* If it's a string subclass, we don't really know what putting
5064
it in the interned dict might do. */
5065
if (!PyString_CheckExact(s))
5067
if (PyString_CHECK_INTERNED(s))
5069
if (interned == NULL) {
5070
interned = PyDict_New();
5071
if (interned == NULL) {
5072
PyErr_Clear(); /* Don't leave an exception */
5076
t = PyDict_GetItem(interned, (PyObject *)s);
5084
if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
5088
/* The two references in interned are not counted by refcnt.
5089
The string deallocator will take care of this */
5091
PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
5095
PyString_InternImmortal(PyObject **p)
5097
PyString_InternInPlace(p);
5098
if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
5099
PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
5106
PyString_InternFromString(const char *cp)
5108
PyObject *s = PyString_FromString(cp);
5111
PyString_InternInPlace(&s);
5119
for (i = 0; i < UCHAR_MAX + 1; i++) {
5120
Py_XDECREF(characters[i]);
5121
characters[i] = NULL;
5123
Py_XDECREF(nullstring);
5127
void _Py_ReleaseInternedStrings(void)
5132
Py_ssize_t immortal_size = 0, mortal_size = 0;
5134
if (interned == NULL || !PyDict_Check(interned))
5136
keys = PyDict_Keys(interned);
5137
if (keys == NULL || !PyList_Check(keys)) {
5142
/* Since _Py_ReleaseInternedStrings() is intended to help a leak
5143
detector, interned strings are not forcibly deallocated; rather, we
5144
give them their stolen references back, and then clear and DECREF
5145
the interned dict. */
5147
n = PyList_GET_SIZE(keys);
5148
fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
5150
for (i = 0; i < n; i++) {
5151
s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5152
switch (s->ob_sstate) {
5153
case SSTATE_NOT_INTERNED:
5154
/* XXX Shouldn't happen */
5156
case SSTATE_INTERNED_IMMORTAL:
5158
immortal_size += Py_SIZE(s);
5160
case SSTATE_INTERNED_MORTAL:
5162
mortal_size += Py_SIZE(s);
5165
Py_FatalError("Inconsistent interned string state.");
5167
s->ob_sstate = SSTATE_NOT_INTERNED;
5169
fprintf(stderr, "total size of all interned strings: "
5170
"%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
5171
"mortal/immortal\n", mortal_size, immortal_size);
5173
PyDict_Clear(interned);
5174
Py_DECREF(interned);