202
202
oldstr = unicode->str;
203
PyMem_RESIZE(unicode->str, Py_UNICODE, length + 1);
203
unicode->str = PyObject_REALLOC(unicode->str,
204
sizeof(Py_UNICODE) * (length + 1));
204
205
if (!unicode->str) {
205
206
unicode->str = (Py_UNICODE *)oldstr;
206
207
PyErr_NoMemory();
239
240
return unicode_empty;
243
/* Ensure we won't overflow the size. */
244
if (length > ((PY_SSIZE_T_MAX / sizeof(Py_UNICODE)) - 1)) {
245
return (PyUnicodeObject *)PyErr_NoMemory();
242
248
/* Unicode freelist & memory allocation */
243
249
if (unicode_freelist) {
244
250
unicode = unicode_freelist;
249
255
never downsize it. */
250
256
if ((unicode->length < length) &&
251
257
unicode_resize(unicode, length) < 0) {
252
PyMem_DEL(unicode->str);
258
PyObject_DEL(unicode->str);
257
unicode->str = PyMem_NEW(Py_UNICODE, length + 1);
263
size_t new_size = sizeof(Py_UNICODE) * ((size_t)length + 1);
264
unicode->str = (Py_UNICODE*) PyObject_MALLOC(new_size);
259
266
PyObject_INIT(unicode, &PyUnicode_Type);
262
270
unicode = PyObject_New(PyUnicodeObject, &PyUnicode_Type);
263
271
if (unicode == NULL)
265
unicode->str = PyMem_NEW(Py_UNICODE, length + 1);
273
new_size = sizeof(Py_UNICODE) * ((size_t)length + 1);
274
unicode->str = (Py_UNICODE*) PyObject_MALLOC(new_size);
268
277
if (!unicode->str) {
1713
1726
if (s[i] >= 0x10000)
1716
v = PyString_FromStringAndSize(NULL,
1717
2 * (size + pairs + (byteorder == 0)));
1729
/* 2 * (size + pairs + (byteorder == 0)) */
1730
if (size > PY_SSIZE_T_MAX ||
1731
size > PY_SSIZE_T_MAX - pairs - (byteorder == 0))
1732
return PyErr_NoMemory();
1733
nsize = (size + pairs + (byteorder == 0));
1734
bytesize = nsize * 2;
1735
if (bytesize / 2 != nsize)
1736
return PyErr_NoMemory();
1737
v = PyString_FromStringAndSize(NULL, bytesize);
2270
2294
x += 10 + c - 'A';
2272
#ifndef Py_UNICODE_WIDE
2297
/* UCS-2 character */
2298
*p++ = (Py_UNICODE) x;
2299
else if (x <= 0x10ffff) {
2300
/* UCS-4 character. Either store directly, or as
2302
#ifdef Py_UNICODE_WIDE
2303
*p++ = (Py_UNICODE) x;
2306
*p++ = 0xD800 + (Py_UNICODE) (x >> 10);
2307
*p++ = 0xDC00 + (Py_UNICODE) (x & 0x03FF);
2310
endinpos = s-starts;
2311
outpos = p-PyUnicode_AS_UNICODE(v);
2274
2312
if (unicode_decode_call_errorhandler(
2275
2313
errors, &errorHandler,
2276
2314
"rawunicodeescape", "\\Uxxxxxxxx out of range",
2306
2342
static const char *hexdigit = "0123456789abcdef";
2308
2343
#ifdef Py_UNICODE_WIDE
2309
repr = PyString_FromStringAndSize(NULL, 10 * size);
2344
const Py_ssize_t expandsize = 10;
2311
repr = PyString_FromStringAndSize(NULL, 6 * size);
2346
const Py_ssize_t expandsize = 6;
2349
if (size > PY_SSIZE_T_MAX / expandsize)
2350
return PyErr_NoMemory();
2352
repr = PyString_FromStringAndSize(NULL, expandsize * size);
2313
2353
if (repr == NULL)
2333
2373
*p++ = hexdigit[ch & 15];
2377
/* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
2378
if (ch >= 0xD800 && ch < 0xDC00) {
2384
if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
2385
ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
2388
*p++ = hexdigit[(ucs >> 28) & 0xf];
2389
*p++ = hexdigit[(ucs >> 24) & 0xf];
2390
*p++ = hexdigit[(ucs >> 20) & 0xf];
2391
*p++ = hexdigit[(ucs >> 16) & 0xf];
2392
*p++ = hexdigit[(ucs >> 12) & 0xf];
2393
*p++ = hexdigit[(ucs >> 8) & 0xf];
2394
*p++ = hexdigit[(ucs >> 4) & 0xf];
2395
*p++ = hexdigit[ucs & 0xf];
2398
/* Fall through: isolated surrogates are copied as-is */
2337
2403
/* Map 16-bit characters to '\uxxxx' */
2338
2404
if (ch >= 256) {
4788
if (left > PY_SSIZE_T_MAX - self->length ||
4789
right > PY_SSIZE_T_MAX - (left + self->length)) {
4790
PyErr_SetString(PyExc_OverflowError, "padded string is too long");
4722
4793
u = _PyUnicode_New(left + self->length + right);
5699
5771
/* First pass: determine size of output string */
5701
e = self->str + self->length;
5772
i = 0; /* chars up to and including most recent \n or \r */
5773
j = 0; /* chars since most recent \n or \r (use in tab calculations) */
5774
e = self->str + self->length; /* end of input */
5702
5775
for (p = self->str; p < e; p++)
5703
5776
if (*p == '\t') {
5704
5777
if (tabsize > 0) {
5705
j += tabsize - (j % tabsize);
5707
PyErr_SetString(PyExc_OverflowError,
5708
"new string is too long");
5778
incr = tabsize - (j % tabsize); /* cannot overflow */
5779
if (j > PY_SSIZE_T_MAX - incr)
5785
if (j > PY_SSIZE_T_MAX - 1)
5716
5788
if (*p == '\n' || *p == '\r') {
5789
if (i > PY_SSIZE_T_MAX - j)
5720
PyErr_SetString(PyExc_OverflowError,
5721
"new string is too long");
5728
PyErr_SetString(PyExc_OverflowError, "new string is too long");
5796
if (i > PY_SSIZE_T_MAX - j)
5732
5799
/* Second pass: create output string and fill it */
5733
5800
u = _PyUnicode_New(i + j);
5804
j = 0; /* same as in first pass */
5805
q = u->str; /* next output char */
5806
qe = u->str + u->length; /* end of output */
5740
5808
for (p = self->str; p < e; p++)
5741
5809
if (*p == '\t') {
5742
5810
if (tabsize > 0) {
5743
5811
i = tabsize - (j % tabsize);
5752
5825
if (*p == '\n' || *p == '\r')
5756
5829
return (PyObject*) u;
5834
PyErr_SetString(PyExc_OverflowError, "new string is too long");
5759
5838
PyDoc_STRVAR(find__doc__,
7121
7200
return PyUnicode_FromUnicode(NULL, 0);
7123
7202
source_buf = PyUnicode_AS_UNICODE((PyObject*)self);
7124
result_buf = (Py_UNICODE *)PyMem_MALLOC(slicelength*
7125
sizeof(Py_UNICODE));
7203
result_buf = (Py_UNICODE *)PyObject_MALLOC(slicelength*
7204
sizeof(Py_UNICODE));
7127
7206
if (result_buf == NULL)
7128
7207
return PyErr_NoMemory();