~ubuntu-branches/ubuntu/oneiric/python2.5/oneiric

Viewing changes to Objects/unicodeobject.c

Committer: Bazaar Package Importer
Author(s): Matthias Klose
Date: 2008-12-21 08:57:49 UTC
mfrom: (1.1.9 upstream)
Revision ID: james.westby@ubuntu.com-20081221085749-bijjr25h8na5jdsu

Tags: 2.5.3-0ubuntu1

* New upstream version.
* Regenerate the included documentation.
* Add an option --install-layout=deb, which is ignored for 2.5.

files added:
PC/VC6/_msi.dsp

debian/patches/distutils-install-layout.dpatch

files removed:
Doc/missfont.log

Lib/test/output/test_resource

debian/patches/fix-bashisms.dpatch

files modified:
Doc/Makefile

Doc/commontex/boilerplate.tex

Doc/dist/dist.tex

Doc/inst/inst.tex

Doc/lib/emailmessage.tex

Doc/lib/libitertools.tex

Doc/lib/libuserdict.tex

Doc/lib/tzinfo-examples.py

Include/patchlevel.h

Include/pymem.h

Include/pythread.h

Lib/csv.py

Lib/ctypes/__init__.py

Lib/ctypes/test/test_bitfields.py

Lib/decimal.py

Lib/distutils/command/sdist.py

Lib/distutils/cygwinccompiler.py

Lib/dummy_thread.py

Lib/hashlib.py

Lib/idlelib/NEWS.txt

Lib/idlelib/configHandler.py

Lib/idlelib/idlever.py

Lib/imaplib.py

Lib/lib-tk/ScrolledText.py

Lib/lib-tk/Tkinter.py

Lib/lib-tk/tkColorChooser.py

Lib/lib-tk/turtle.py

Lib/logging/__init__.py

Lib/optparse.py

Lib/os.py

Lib/plat-mac/Carbon/AppleEvents.py

Lib/plat-mac/macerrors.py

Lib/plat-mac/terminalcommand.py

Lib/platform.py

Lib/pydoc.py

Lib/rlcompleter.py

Lib/sched.py

Lib/shutil.py

Lib/smtplib.py

Lib/socket.py

Lib/subprocess.py

Lib/test/decimaltestdata/extra.decTest

Lib/test/output/test_tokenize

Lib/test/pickletester.py

Lib/test/seq_tests.py

Lib/test/string_tests.py

Lib/test/test_bigmem.py

Lib/test/test_bz2.py

Lib/test/test_cProfile.py

Lib/test/test_compile.py

Lib/test/test_cookielib.py

Lib/test/test_csv.py

Lib/test/test_decimal.py

Lib/test/test_descr.py

Lib/test/test_dict.py

Lib/test/test_dummy_thread.py

Lib/test/test_file.py

Lib/test/test_grammar.py

Lib/test/test_hashlib.py

Lib/test/test_imageop.py

Lib/test/test_ioctl.py

Lib/test/test_itertools.py

Lib/test/test_minidom.py

Lib/test/test_mmap.py

Lib/test/test_os.py

Lib/test/test_parser.py

Lib/test/test_posix.py

Lib/test/test_resource.py

Lib/test/test_socket.py

Lib/test/test_strop.py

Lib/test/test_struct.py

Lib/test/test_subprocess.py

Lib/test/test_support.py

Lib/test/test_threading.py

Lib/test/test_threading_local.py

Lib/test/test_unicode.py

Lib/test/test_urllib2.py

Lib/test/test_weakref.py

Lib/test/test_winsound.py

Lib/test/test_with.py

Lib/test/test_zlib.py

Lib/test/tokenize_tests.txt

Lib/threading.py

Lib/tokenize.py

Lib/urllib2.py

Lib/xml/dom/minidom.py

Mac/BuildScript/build-installer.py

Mac/PythonLauncher/doscript.m

Makefile.pre.in

Misc/ACKS

Misc/NEWS

Misc/python.man

Modules/_bsddb.c

Modules/_ctypes/_ctypes.c

Modules/_ctypes/callbacks.c

Modules/_ctypes/cfield.c

Modules/_ctypes/ctypes.h

Modules/_hashopenssl.c

Modules/_lsprof.c

Modules/_sqlite/connection.c

Modules/_sqlite/cursor.c

Modules/_sqlite/module.c

Modules/_struct.c

Modules/almodule.c

Modules/arraymodule.c

Modules/bz2module.c

Modules/cPickle.c

Modules/fcntlmodule.c

Modules/gcmodule.c

Modules/imageop.c

Modules/itertoolsmodule.c

Modules/main.c

Modules/mmapmodule.c

Modules/parsermodule.c

Modules/posixmodule.c

Modules/rgbimgmodule.c

Modules/selectmodule.c

Modules/signalmodule.c

Modules/stropmodule.c

Modules/threadmodule.c

Modules/timing.h

Modules/zlibmodule.c

Objects/bufferobject.c

Objects/dictobject.c

Objects/fileobject.c

Objects/longobject.c

Objects/object.c

Objects/obmalloc.c

Objects/stringlib/count.h

Objects/stringlib/find.h

Objects/stringobject.c

Objects/tupleobject.c

Objects/typeobject.c

Objects/unicodeobject.c

Objects/weakrefobject.c

PC/VC6/_bsddb.dsp

PC/VC6/_sqlite3.dsp

PC/VC6/build_ssl.py

PC/VC6/pcbuild.dsw

PC/VC6/python.dsp

PC/VC6/readme.txt

PC/_subprocess.c

Parser/intrcheck.c

Parser/parsetok.c

Python/ast.c

Python/ceval.c

Python/compile.c

Python/marshal.c

Python/mysnprintf.c

Python/pystate.c

Python/sysmodule.c

Python/thread.c

README

Tools/faqwiz/move-faqwiz.sh

Tools/msi/msi.py

Tools/msi/uuids.py

configure

configure.in

debian/changelog

debian/patches/docs.uue

debian/patches/svn-doc-updates.dpatch

debian/patches/svn-updates.dpatch

debian/rules

setup.py

Show diffs side-by-side

added added

removed removed

Objects/unicodeobject.c

200

it contains). */

201

202

oldstr = unicode->str;

203

PyMem_RESIZE(unicode->str, Py_UNICODE, length + 1);

203

unicode->str = PyObject_REALLOC(unicode->str,

204

sizeof(Py_UNICODE) * (length + 1));

204

205

if (!unicode->str) {

205

206

unicode->str = (Py_UNICODE *)oldstr;

206

207

PyErr_NoMemory();

239

240

return unicode_empty;

240

241

}

241

242

243

/* Ensure we won't overflow the size. */

244

if (length > ((PY_SSIZE_T_MAX / sizeof(Py_UNICODE)) - 1)) {

245

return (PyUnicodeObject *)PyErr_NoMemory();

246

}

247

242

248

/* Unicode freelist & memory allocation */

243

249

if (unicode_freelist) {

244

250

unicode = unicode_freelist;

249

255

never downsize it. */

250

256

if ((unicode->length < length) &&

251

257

unicode_resize(unicode, length) < 0) {

252

PyMem_DEL(unicode->str);

258

PyObject_DEL(unicode->str);

253

259

goto onError;

254

260

}

255

261

}

256

262

else {

257

unicode->str = PyMem_NEW(Py_UNICODE, length + 1);

263

size_t new_size = sizeof(Py_UNICODE) * ((size_t)length + 1);

264

unicode->str = (Py_UNICODE*) PyObject_MALLOC(new_size);

258

265

}

259

266

PyObject_INIT(unicode, &PyUnicode_Type);

260

267

}

261

268

else {

269

size_t new_size;

262

270

unicode = PyObject_New(PyUnicodeObject, &PyUnicode_Type);

263

271

if (unicode == NULL)

264

272

return NULL;

265

unicode->str = PyMem_NEW(Py_UNICODE, length + 1);

273

new_size = sizeof(Py_UNICODE) * ((size_t)length + 1);

274

unicode->str = (Py_UNICODE*) PyObject_MALLOC(new_size);

266

275

}

267

276

268

277

if (!unicode->str) {

296

305

unicode_freelist_size < MAX_UNICODE_FREELIST_SIZE) {

297

306

/* Keep-Alive optimization */

298

307

if (unicode->length >= KEEPALIVE_SIZE_LIMIT) {

299

PyMem_DEL(unicode->str);

308

PyObject_DEL(unicode->str);

300

309

unicode->str = NULL;

301

310

unicode->length = 0;

302

311

}

310

319

unicode_freelist_size++;

311

320

}

312

321

else {

313

PyMem_DEL(unicode->str);

322

PyObject_DEL(unicode->str);

314

323

Py_XDECREF(unicode->defenc);

315

324

unicode->ob_type->tp_free((PyObject *)unicode);

316

325

}

970

979

while (s < e) {

971

980

Py_UNICODE ch;

972

981

restart:

973

ch = *s;

982

ch = (unsigned char) *s;

974

983

975

984

if (inShift) {

976

985

if ((ch == '-') || !B64CHAR(ch)) {

1091

1100

char * out;

1092

1101

char * start;

1093

1102

1103

if (cbAllocated / 5 != size)

1104

return PyErr_NoMemory();

1105

1094

1106

if (size == 0)

1095

1107

return PyString_FromStringAndSize(NULL, 0);

1096

1108

1689

1701

{

1690

1702

PyObject *v;

1691

1703

unsigned char *p;

1704

Py_ssize_t nsize, bytesize;

1692

1705

#ifdef Py_UNICODE_WIDE

1693

int i, pairs;

1706

Py_ssize_t i, pairs;

1694

1707

#else

1695

1708

const int pairs = 0;

1696

1709

#endif

1713

1726

if (s[i] >= 0x10000)

1714

1727

pairs++;

1715

1728

#endif

1716

v = PyString_FromStringAndSize(NULL,

1717

2 * (size + pairs + (byteorder == 0)));

1729

/* 2 * (size + pairs + (byteorder == 0)) */

1730

if (size > PY_SSIZE_T_MAX ||

1731

size > PY_SSIZE_T_MAX - pairs - (byteorder == 0))

1732

return PyErr_NoMemory();

1733

nsize = (size + pairs + (byteorder == 0));

1734

bytesize = nsize * 2;

1735

if (bytesize / 2 != nsize)

1736

return PyErr_NoMemory();

1737

v = PyString_FromStringAndSize(NULL, bytesize);

1718

1738

if (v == NULL)

1719

1739

return NULL;

1720

1740

2042

2062

char *p;

2043

2063

2044

2064

static const char *hexdigit = "0123456789abcdef";

2065

#ifdef Py_UNICODE_WIDE

2066

const Py_ssize_t expandsize = 10;

2067

#else

2068

const Py_ssize_t expandsize = 6;

2069

#endif

2045

2070

2046

2071

/* Initial allocation is based on the longest-possible unichr

2047

2072

escape.

2057

2082

escape.

2058

2083

2059

2084

2085

if (size > (PY_SSIZE_T_MAX - 2 - 1) / expandsize)

2086

return PyErr_NoMemory();

2087

2060

2088

repr = PyString_FromStringAndSize(NULL,

2061

2089

2062

#ifdef Py_UNICODE_WIDE

2063

+ 10*size

2064

#else

2065

+ 6*size

2066

#endif

2090

+ expandsize*size

2067

2091

+ 1);

2068

2092

if (repr == NULL)

2069

2093

return NULL;

2269

2293

else

2270

2294

x += 10 + c - 'A';

2271

2295

}

2272

#ifndef Py_UNICODE_WIDE

2273

if (x > 0x10000) {

2296

if (x <= 0xffff)

2297

/* UCS-2 character */

2298

*p++ = (Py_UNICODE) x;

2299

else if (x <= 0x10ffff) {

2300

/* UCS-4 character. Either store directly, or as

2301

surrogate pair. */

2302

#ifdef Py_UNICODE_WIDE

2303

*p++ = (Py_UNICODE) x;

2304

#else

2305

x -= 0x10000L;

2306

*p++ = 0xD800 + (Py_UNICODE) (x >> 10);

2307

*p++ = 0xDC00 + (Py_UNICODE) (x & 0x03FF);

2308

#endif

2309

} else {

2310

endinpos = s-starts;

2311

outpos = p-PyUnicode_AS_UNICODE(v);

2274

2312

if (unicode_decode_call_errorhandler(

2275

2313

errors, &errorHandler,

2276

2314

"rawunicodeescape", "\\Uxxxxxxxx out of range",

2278

2316

(PyObject **)&v, &outpos, &p))

2279

2317

goto onError;

2280

2318

}

2281

#endif

2282

*p++ = x;

2283

2319

nextByte:

2284

2320

;

2285

2321

}

2304

2340

char *q;

2305

2341

2306

2342

static const char *hexdigit = "0123456789abcdef";

2307

2308

2343

#ifdef Py_UNICODE_WIDE

2309

repr = PyString_FromStringAndSize(NULL, 10 * size);

2344

const Py_ssize_t expandsize = 10;

2310

2345

#else

2311

repr = PyString_FromStringAndSize(NULL, 6 * size);

2346

const Py_ssize_t expandsize = 6;

2312

2347

#endif

2348

2349

if (size > PY_SSIZE_T_MAX / expandsize)

2350

return PyErr_NoMemory();

2351

2352

repr = PyString_FromStringAndSize(NULL, expandsize * size);

2313

2353

if (repr == NULL)

2314

2354

return NULL;

2315

2355

if (size == 0)

2333

2373

*p++ = hexdigit[ch & 15];

2334

2374

}

2335

2375

else

2376

#else

2377

/* Map UTF-16 surrogate pairs to '\U00xxxxxx' */

2378

if (ch >= 0xD800 && ch < 0xDC00) {

2379

Py_UNICODE ch2;

2380

Py_UCS4 ucs;

2381

2382

ch2 = *s++;

2383

size--;

2384

if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {

2385

ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;

2386

*p++ = '\\';

2387

*p++ = 'U';

2388

*p++ = hexdigit[(ucs >> 28) & 0xf];

2389

*p++ = hexdigit[(ucs >> 24) & 0xf];

2390

*p++ = hexdigit[(ucs >> 20) & 0xf];

2391

*p++ = hexdigit[(ucs >> 16) & 0xf];

2392

*p++ = hexdigit[(ucs >> 12) & 0xf];

2393

*p++ = hexdigit[(ucs >> 8) & 0xf];

2394

*p++ = hexdigit[(ucs >> 4) & 0xf];

2395

*p++ = hexdigit[ucs & 0xf];

2396

continue;

2397

}

2398

/* Fall through: isolated surrogates are copied as-is */

2399

s--;

2400

size++;

2401

}

2336

2402

#endif

2337

2403

/* Map 16-bit characters to '\uxxxx' */

2338

2404

if (ch >= 256) {

4719

4785

return self;

4720

4786

}

4721

4787

4788

if (left > PY_SSIZE_T_MAX - self->length ||

4789

right > PY_SSIZE_T_MAX - (left + self->length)) {

4790

PyErr_SetString(PyExc_OverflowError, "padded string is too long");

4791

return NULL;

4792

}

4722

4793

u = _PyUnicode_New(left + self->length + right);

4723

4794

if (u) {

4724

4795

if (left)

5689

5760

Py_UNICODE *e;

5690

5761

Py_UNICODE *p;

5691

5762

Py_UNICODE *q;

5692

Py_ssize_t i, j, old_j;

5763

Py_UNICODE *qe;

5764

Py_ssize_t i, j, incr;

5693

5765

PyUnicodeObject *u;

5694

5766

int tabsize = 8;

5695

5767

5697

5769

return NULL;

5698

5770

5699

5771

/* First pass: determine size of output string */

5700

i = j = old_j = 0;

5701

e = self->str + self->length;

5772

i = 0; /* chars up to and including most recent \n or \r */

5773

j = 0; /* chars since most recent \n or \r (use in tab calculations) */

5774

e = self->str + self->length; /* end of input */

5702

5775

for (p = self->str; p < e; p++)

5703

5776

if (*p == '\t') {

5704

5777

if (tabsize > 0) {

5705

j += tabsize - (j % tabsize);

5706

if (old_j > j) {

5707

PyErr_SetString(PyExc_OverflowError,

5708

"new string is too long");

5709

return NULL;

5710

}

5711

old_j = j;

5712

}

5778

incr = tabsize - (j % tabsize); /* cannot overflow */

5779

if (j > PY_SSIZE_T_MAX - incr)

5780

goto overflow1;

5781

j += incr;

5782

}

5713

5783

}

5714

5784

else {

5785

if (j > PY_SSIZE_T_MAX - 1)

5786

goto overflow1;

5715

5787

j++;

5716

5788

if (*p == '\n' || *p == '\r') {

5789

if (i > PY_SSIZE_T_MAX - j)

5790

goto overflow1;

5717

5791

i += j;

5718

old_j = j = 0;

5719

if (i < 0) {

5720

PyErr_SetString(PyExc_OverflowError,

5721

"new string is too long");

5722

return NULL;

5723

}

5792

j = 0;

5724

5793

}

5725

5794

}

5726

5795

5727

if ((i + j) < 0) {

5728

PyErr_SetString(PyExc_OverflowError, "new string is too long");

5729

return NULL;

5730

}

5796

if (i > PY_SSIZE_T_MAX - j)

5797

goto overflow1;

5731

5798

5732

5799

/* Second pass: create output string and fill it */

5733

5800

u = _PyUnicode_New(i + j);

5734

5801

if (!u)

5735

5802

return NULL;

5736

5803

5737

j = 0;

5738

q = u->str;

5804

j = 0; /* same as in first pass */

5805

q = u->str; /* next output char */

5806

qe = u->str + u->length; /* end of output */

5739

5807

5740

5808

for (p = self->str; p < e; p++)

5741

5809

if (*p == '\t') {

5742

5810

if (tabsize > 0) {

5743

5811

i = tabsize - (j % tabsize);

5744

5812

j += i;

5745

while (i--)

5813

while (i--) {

5814

if (q >= qe)

5815

goto overflow2;

5746

5816

*q++ = ' ';

5817

}

5747

5818

}

5748

5819

}

5749

5820

else {

5750

j++;

5821

if (q >= qe)

5822

goto overflow2;

5751

5823

*q++ = *p;

5824

j++;

5752

5825

if (*p == '\n' || *p == '\r')

5753

5826

j = 0;

5754

5827

}

5755

5828

5756

5829

return (PyObject*) u;

5830

5831

overflow2:

5832

Py_DECREF(u);

5833

overflow1:

5834

PyErr_SetString(PyExc_OverflowError, "new string is too long");

5835

return NULL;

5757

5836

}

5758

5837

5759

5838

PyDoc_STRVAR(find__doc__,

7121

7200

return PyUnicode_FromUnicode(NULL, 0);

7122

7201

} else {

7123

7202

source_buf = PyUnicode_AS_UNICODE((PyObject*)self);

7124

result_buf = (Py_UNICODE *)PyMem_MALLOC(slicelength*

7125

sizeof(Py_UNICODE));

7203

result_buf = (Py_UNICODE *)PyObject_MALLOC(slicelength*

7204

sizeof(Py_UNICODE));

7126

7205

7127

7206

if (result_buf == NULL)

7128

7207

return PyErr_NoMemory();

7132

7211

}

7133

7212

7134

7213

result = PyUnicode_FromUnicode(result_buf, slicelength);

7135

PyMem_FREE(result_buf);

7214

PyObject_FREE(result_buf);

7136

7215

return result;

7137

7216

}

7138

7217

} else {

7940

8019

Py_DECREF(tmp);

7941

8020

return NULL;

7942

8021

}

7943

pnew->str = PyMem_NEW(Py_UNICODE, n+1);

8022

pnew->str = (Py_UNICODE*) PyObject_MALLOC(sizeof(Py_UNICODE) * (n+1));

7944

8023

if (pnew->str == NULL) {

7945

8024

_Py_ForgetReference((PyObject *)pnew);

7946

8025

PyObject_Del(pnew);

8067

8146

PyUnicodeObject *v = u;

8068

8147

u = *(PyUnicodeObject **)u;

8069

8148

if (v->str)

8070

PyMem_DEL(v->str);

8149

PyObject_DEL(v->str);

8071

8150

Py_XDECREF(v->defenc);

8072

8151

PyObject_Del(v);

8073

8152

}

Older »