1
#ifndef Py_UNICODEOBJECT_H
2
#define Py_UNICODEOBJECT_H
8
Unicode implementation based on original code by Fredrik Lundh,
9
modified by Marc-Andre Lemburg (mal@lemburg.com) according to the
10
Unicode Integration Proposal. (See
11
http://www.egenix.com/files/python/unicode-proposal.txt).
13
Copyright (c) Corporation for National Research Initiatives.
17
--------------------------------------------------------------------
19
* Yet another Unicode string type for Python. This type supports the
20
* 16-bit Basic Multilingual Plane (BMP) only.
22
* Written by Fredrik Lundh, January 1999.
24
* Copyright (c) 1999 by Secret Labs AB.
25
* Copyright (c) 1999 by Fredrik Lundh.
27
* fredrik@pythonware.com
28
* http://www.pythonware.com
30
* --------------------------------------------------------------------
31
* This Unicode String Type is
33
* Copyright (c) 1999 by Secret Labs AB
34
* Copyright (c) 1999 by Fredrik Lundh
36
* By obtaining, using, and/or copying this software and/or its
37
* associated documentation, you agree that you have read, understood,
38
* and will comply with the following terms and conditions:
40
* Permission to use, copy, modify, and distribute this software and its
41
* associated documentation for any purpose and without fee is hereby
42
* granted, provided that the above copyright notice appears in all
43
* copies, and that both that copyright notice and this permission notice
44
* appear in supporting documentation, and that the name of Secret Labs
45
* AB or the author not be used in advertising or publicity pertaining to
46
* distribution of the software without specific, written prior
49
* SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
50
* THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
51
* FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
52
* ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
53
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
54
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
55
* OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
56
* -------------------------------------------------------------------- */
60
/* === Internal API ======================================================= */
62
/* --- Internal Unicode Format -------------------------------------------- */
64
/* Python 3.x requires unicode */
65
#define Py_USING_UNICODE
67
#ifndef SIZEOF_WCHAR_T
68
#error Must define SIZEOF_WCHAR_T
71
#define Py_UNICODE_SIZE SIZEOF_WCHAR_T
73
/* If wchar_t can be used for UCS-4 storage, set Py_UNICODE_WIDE.
74
Otherwise, Unicode strings are stored as UCS-2 (with limited support
77
#if Py_UNICODE_SIZE >= 4
78
#define Py_UNICODE_WIDE
81
/* Set these flags if the platform has "wchar.h" and the
82
wchar_t type is a 16-bit unsigned type */
83
/* #define HAVE_WCHAR_H */
84
/* #define HAVE_USABLE_WCHAR_T */
86
/* Py_UNICODE was the native Unicode storage format (code unit) used by
87
Python and represents a single Unicode element in the Unicode type.
88
With PEP 393, Py_UNICODE is deprecated and replaced with a
89
typedef to wchar_t. */
91
#ifndef Py_LIMITED_API
92
#define PY_UNICODE_TYPE wchar_t
93
typedef wchar_t Py_UNICODE;
96
/* If the compiler provides a wchar_t type we try to support it
97
through the interface functions PyUnicode_FromWideChar(),
98
PyUnicode_AsWideChar() and PyUnicode_AsWideCharString(). */
100
#ifdef HAVE_USABLE_WCHAR_T
101
# ifndef HAVE_WCHAR_H
102
# define HAVE_WCHAR_H
106
#if defined(MS_WINDOWS)
111
/* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */
118
/* Py_UCS4 and Py_UCS2 are typedefs for the respective
119
unicode representations. */
121
typedef unsigned int Py_UCS4;
122
#elif SIZEOF_LONG == 4
123
typedef unsigned long Py_UCS4;
125
#error "Could not find a proper typedef for Py_UCS4"
128
#if SIZEOF_SHORT == 2
129
typedef unsigned short Py_UCS2;
131
#error "Could not find a proper typedef for Py_UCS2"
134
typedef unsigned char Py_UCS1;
136
/* --- Internal Unicode Operations ---------------------------------------- */
138
/* Since splitting on whitespace is an important use case, and
139
whitespace in most situations is solely ASCII whitespace, we
140
optimize for the common case by using a quick look-up table
141
_Py_ascii_whitespace (see below) with an inlined check.
144
#ifndef Py_LIMITED_API
145
#define Py_UNICODE_ISSPACE(ch) \
146
((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch))
148
#define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
149
#define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
150
#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
151
#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
153
#define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
154
#define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
155
#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
157
#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
158
#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
159
#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
160
#define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch)
162
#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
163
#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
164
#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
166
#define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
168
#define Py_UNICODE_ISALNUM(ch) \
169
(Py_UNICODE_ISALPHA(ch) || \
170
Py_UNICODE_ISDECIMAL(ch) || \
171
Py_UNICODE_ISDIGIT(ch) || \
172
Py_UNICODE_ISNUMERIC(ch))
174
#define Py_UNICODE_COPY(target, source, length) \
175
Py_MEMCPY((target), (source), (length)*sizeof(Py_UNICODE))
177
#define Py_UNICODE_FILL(target, value, length) \
178
do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\
179
for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\
182
/* macros to work with surrogates */
183
#define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDFFF)
184
#define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDBFF)
185
#define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <= (ch) && (ch) <= 0xDFFF)
186
/* Join two surrogate characters and return a single Py_UCS4 value. */
187
#define Py_UNICODE_JOIN_SURROGATES(high, low) \
188
(((((Py_UCS4)(high) & 0x03FF) << 10) | \
189
((Py_UCS4)(low) & 0x03FF)) + 0x10000)
190
/* high surrogate = top 10 bits added to D800 */
191
#define Py_UNICODE_HIGH_SURROGATE(ch) (0xD800 - (0x10000 >> 10) + ((ch) >> 10))
192
/* low surrogate = bottom 10 bits added to DC00 */
193
#define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF))
195
/* Check if substring matches at given offset. The offset must be
196
valid, and the substring must not be empty. */
198
#define Py_UNICODE_MATCH(string, offset, substring) \
199
((*((string)->wstr + (offset)) == *((substring)->wstr)) && \
200
((*((string)->wstr + (offset) + (substring)->wstr_length-1) == *((substring)->wstr + (substring)->wstr_length-1))) && \
201
!memcmp((string)->wstr + (offset), (substring)->wstr, (substring)->wstr_length*sizeof(Py_UNICODE)))
203
#endif /* Py_LIMITED_API */
209
/* --- Unicode Type ------------------------------------------------------- */
211
#ifndef Py_LIMITED_API
213
/* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
214
structure. state.ascii and state.compact are set, and the data
215
immediately follow the structure. utf8_length and wstr_length can be found
216
in the length field; the utf8 pointer is equal to the data pointer. */
218
/* There are 4 forms of Unicode strings:
222
* structure = PyASCIIObject
223
* test: PyUnicode_IS_COMPACT_ASCII(op)
224
* kind = PyUnicode_1BYTE_KIND
228
* (length is the length of the utf8 and wstr strings)
229
* (data starts just after the structure)
230
* (since ASCII is decoded from UTF-8, the utf8 string are the data)
234
* structure = PyCompactUnicodeObject
235
* test: PyUnicode_IS_COMPACT(op) && !PyUnicode_IS_ASCII(op)
236
* kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
241
* utf8 is not shared with data
242
* utf8_length = 0 if utf8 is NULL
243
* wstr is shared with data and wstr_length=length
244
if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
245
or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_t)=4
246
* wstr_length = 0 if wstr is NULL
247
* (data starts just after the structure)
249
- legacy string, not ready:
251
* structure = PyUnicodeObject
252
* test: kind == PyUnicode_WCHAR_KIND
253
* length = 0 (use wstr_length)
255
* kind = PyUnicode_WCHAR_KIND
259
* interned = SSTATE_NOT_INTERNED
265
- legacy string, ready:
267
* structure = PyUnicodeObject structure
268
* test: !PyUnicode_IS_COMPACT(op) && kind != PyUnicode_WCHAR_KIND
269
* kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
273
* data.any is not NULL
274
* utf8 is shared and utf8_length = length with data.any if ascii = 1
275
* utf8_length = 0 if utf8 is NULL
276
* wstr is shared with data.any and wstr_length = length
277
if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
278
or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4
279
* wstr_length = 0 if wstr is NULL
281
Compact strings use only one memory block (structure + characters),
282
whereas legacy strings use one block for the structure and one block
285
Legacy strings are created by PyUnicode_FromUnicode() and
286
PyUnicode_FromStringAndSize(NULL, size) functions. They become ready
287
when PyUnicode_READY() is called.
289
See also _PyUnicode_CheckConsistency().
292
Py_ssize_t length; /* Number of code points in the string */
293
Py_hash_t hash; /* Hash value; -1 if not set */
296
SSTATE_NOT_INTERNED (0)
297
SSTATE_INTERNED_MORTAL (1)
298
SSTATE_INTERNED_IMMORTAL (2)
300
If interned != SSTATE_NOT_INTERNED, the two references from the
301
dictionary to this object are *not* counted in ob_refcnt.
303
unsigned int interned:2;
306
- PyUnicode_WCHAR_KIND (0):
308
* character type = wchar_t (16 or 32 bits, depending on the
311
- PyUnicode_1BYTE_KIND (1):
313
* character type = Py_UCS1 (8 bits, unsigned)
314
* all characters are in the range U+0000-U+00FF (latin1)
315
* if ascii is set, all characters are in the range U+0000-U+007F
316
(ASCII), otherwise at least one character is in the range
319
- PyUnicode_2BYTE_KIND (2):
321
* character type = Py_UCS2 (16 bits, unsigned)
322
* all characters are in the range U+0000-U+FFFF (BMP)
323
* at least one character is in the range U+0100-U+FFFF
325
- PyUnicode_4BYTE_KIND (4):
327
* character type = Py_UCS4 (32 bits, unsigned)
328
* all characters are in the range U+0000-U+10FFFF
329
* at least one character is in the range U+10000-U+10FFFF
332
/* Compact is with respect to the allocation scheme. Compact unicode
333
objects only require one memory block while non-compact objects use
334
one block for the PyUnicodeObject struct and another for its data
336
unsigned int compact:1;
337
/* The string only contains characters in the range U+0000-U+007F (ASCII)
338
and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
339
set, use the PyASCIIObject structure. */
340
unsigned int ascii:1;
341
/* The ready flag indicates whether the object layout is initialized
342
completely. This means that this is either a compact object, or
343
the data pointer is filled out. The bit is redundant, and helps
344
to minimize the test in PyUnicode_IS_READY(). */
345
unsigned int ready:1;
347
wchar_t *wstr; /* wchar_t representation (null-terminated) */
350
/* Non-ASCII strings allocated through PyUnicode_New use the
351
PyCompactUnicodeObject structure. state.compact is set, and the data
352
immediately follow the structure. */
355
Py_ssize_t utf8_length; /* Number of bytes in utf8, excluding the
357
char *utf8; /* UTF-8 representation (null-terminated) */
358
Py_ssize_t wstr_length; /* Number of code points in wstr, possible
359
* surrogates count as two code points. */
360
} PyCompactUnicodeObject;
362
/* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the
363
PyUnicodeObject structure. The actual string data is initially in the wstr
364
block, and copied into the data block using _PyUnicode_Ready. */
366
PyCompactUnicodeObject _base;
372
} data; /* Canonical, smallest-form Unicode buffer */
376
PyAPI_DATA(PyTypeObject) PyUnicode_Type;
377
PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
379
#define PyUnicode_Check(op) \
380
PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS)
381
#define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type)
383
/* Fast access macros */
384
#ifndef Py_LIMITED_API
386
#define PyUnicode_WSTR_LENGTH(op) \
387
(PyUnicode_IS_COMPACT_ASCII(op) ? \
388
((PyASCIIObject*)op)->length : \
389
((PyCompactUnicodeObject*)op)->wstr_length)
391
/* Returns the deprecated Py_UNICODE representation's size in code units
392
(this includes surrogate pairs as 2 units).
393
If the Py_UNICODE representation is not available, it will be computed
394
on request. Use PyUnicode_GET_LENGTH() for the length in code points. */
396
#define PyUnicode_GET_SIZE(op) \
397
(assert(PyUnicode_Check(op)), \
398
(((PyASCIIObject *)(op))->wstr) ? \
399
PyUnicode_WSTR_LENGTH(op) : \
400
((void)PyUnicode_AsUnicode((PyObject *)(op)), \
401
assert(((PyASCIIObject *)(op))->wstr), \
402
PyUnicode_WSTR_LENGTH(op)))
404
#define PyUnicode_GET_DATA_SIZE(op) \
405
(PyUnicode_GET_SIZE(op) * Py_UNICODE_SIZE)
407
/* Alias for PyUnicode_AsUnicode(). This will create a wchar_t/Py_UNICODE
408
representation on demand. Using this macro is very inefficient now,
409
try to port your code to use the new PyUnicode_*BYTE_DATA() macros or
410
use PyUnicode_WRITE() and PyUnicode_READ(). */
412
#define PyUnicode_AS_UNICODE(op) \
413
(assert(PyUnicode_Check(op)), \
414
(((PyASCIIObject *)(op))->wstr) ? (((PyASCIIObject *)(op))->wstr) : \
415
PyUnicode_AsUnicode((PyObject *)(op)))
417
#define PyUnicode_AS_DATA(op) \
418
((const char *)(PyUnicode_AS_UNICODE(op)))
421
/* --- Flexible String Representation Helper Macros (PEP 393) -------------- */
423
/* Values for PyASCIIObject.state: */
425
/* Interning state. */
426
#define SSTATE_NOT_INTERNED 0
427
#define SSTATE_INTERNED_MORTAL 1
428
#define SSTATE_INTERNED_IMMORTAL 2
430
/* Return true if the string contains only ASCII characters, or 0 if not. The
431
string may be compact (PyUnicode_IS_COMPACT_ASCII) or not, but must be
433
#define PyUnicode_IS_ASCII(op) \
434
(assert(PyUnicode_Check(op)), \
435
assert(PyUnicode_IS_READY(op)), \
436
((PyASCIIObject*)op)->state.ascii)
438
/* Return true if the string is compact or 0 if not.
439
No type checks or Ready calls are performed. */
440
#define PyUnicode_IS_COMPACT(op) \
441
(((PyASCIIObject*)(op))->state.compact)
443
/* Return true if the string is a compact ASCII string (use PyASCIIObject
444
structure), or 0 if not. No type checks or Ready calls are performed. */
445
#define PyUnicode_IS_COMPACT_ASCII(op) \
446
(((PyASCIIObject*)op)->state.ascii && PyUnicode_IS_COMPACT(op))
448
enum PyUnicode_Kind {
449
/* String contains only wstr byte characters. This is only possible
450
when the string was created with a legacy API and _PyUnicode_Ready()
451
has not been called yet. */
452
PyUnicode_WCHAR_KIND = 0,
453
/* Return values of the PyUnicode_KIND() macro: */
454
PyUnicode_1BYTE_KIND = 1,
455
PyUnicode_2BYTE_KIND = 2,
456
PyUnicode_4BYTE_KIND = 4
459
/* Return pointers to the canonical representation cast to unsigned char,
460
Py_UCS2, or Py_UCS4 for direct character access.
461
No checks are performed, use PyUnicode_KIND() before to ensure
462
these will work correctly. */
464
#define PyUnicode_1BYTE_DATA(op) ((Py_UCS1*)PyUnicode_DATA(op))
465
#define PyUnicode_2BYTE_DATA(op) ((Py_UCS2*)PyUnicode_DATA(op))
466
#define PyUnicode_4BYTE_DATA(op) ((Py_UCS4*)PyUnicode_DATA(op))
468
/* Return one of the PyUnicode_*_KIND values defined above. */
469
#define PyUnicode_KIND(op) \
470
(assert(PyUnicode_Check(op)), \
471
assert(PyUnicode_IS_READY(op)), \
472
((PyASCIIObject *)(op))->state.kind)
474
/* Return a void pointer to the raw unicode buffer. */
475
#define _PyUnicode_COMPACT_DATA(op) \
476
(PyUnicode_IS_ASCII(op) ? \
477
((void*)((PyASCIIObject*)(op) + 1)) : \
478
((void*)((PyCompactUnicodeObject*)(op) + 1)))
480
#define _PyUnicode_NONCOMPACT_DATA(op) \
481
(assert(((PyUnicodeObject*)(op))->data.any), \
482
((((PyUnicodeObject *)(op))->data.any)))
484
#define PyUnicode_DATA(op) \
485
(assert(PyUnicode_Check(op)), \
486
PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) : \
487
_PyUnicode_NONCOMPACT_DATA(op))
489
/* In the access macros below, "kind" may be evaluated more than once.
490
All other macro parameters are evaluated exactly once, so it is safe
491
to put side effects into them (such as increasing the index). */
493
/* Write into the canonical representation, this macro does not do any sanity
494
checks and is intended for usage in loops. The caller should cache the
495
kind and data pointers obtained from other macro calls.
496
index is the index in the string (starts at 0) and value is the new
497
code point value which should be written to that location. */
498
#define PyUnicode_WRITE(kind, data, index, value) \
501
case PyUnicode_1BYTE_KIND: { \
502
((Py_UCS1 *)(data))[(index)] = (Py_UCS1)(value); \
505
case PyUnicode_2BYTE_KIND: { \
506
((Py_UCS2 *)(data))[(index)] = (Py_UCS2)(value); \
510
assert((kind) == PyUnicode_4BYTE_KIND); \
511
((Py_UCS4 *)(data))[(index)] = (Py_UCS4)(value); \
516
/* Read a code point from the string's canonical representation. No checks
517
or ready calls are performed. */
518
#define PyUnicode_READ(kind, data, index) \
520
((kind) == PyUnicode_1BYTE_KIND ? \
521
((const Py_UCS1 *)(data))[(index)] : \
522
((kind) == PyUnicode_2BYTE_KIND ? \
523
((const Py_UCS2 *)(data))[(index)] : \
524
((const Py_UCS4 *)(data))[(index)] \
528
/* PyUnicode_READ_CHAR() is less efficient than PyUnicode_READ() because it
529
calls PyUnicode_KIND() and might call it twice. For single reads, use
530
PyUnicode_READ_CHAR, for multiple consecutive reads callers should
531
cache kind and use PyUnicode_READ instead. */
532
#define PyUnicode_READ_CHAR(unicode, index) \
533
(assert(PyUnicode_Check(unicode)), \
534
assert(PyUnicode_IS_READY(unicode)), \
536
(PyUnicode_KIND((unicode)) == PyUnicode_1BYTE_KIND ? \
537
((const Py_UCS1 *)(PyUnicode_DATA((unicode))))[(index)] : \
538
(PyUnicode_KIND((unicode)) == PyUnicode_2BYTE_KIND ? \
539
((const Py_UCS2 *)(PyUnicode_DATA((unicode))))[(index)] : \
540
((const Py_UCS4 *)(PyUnicode_DATA((unicode))))[(index)] \
544
/* Returns the length of the unicode string. The caller has to make sure that
545
the string has it's canonical representation set before calling
546
this macro. Call PyUnicode_(FAST_)Ready to ensure that. */
547
#define PyUnicode_GET_LENGTH(op) \
548
(assert(PyUnicode_Check(op)), \
549
assert(PyUnicode_IS_READY(op)), \
550
((PyASCIIObject *)(op))->length)
553
/* Fast check to determine whether an object is ready. Equivalent to
554
PyUnicode_IS_COMPACT(op) || ((PyUnicodeObject*)(op))->data.any) */
556
#define PyUnicode_IS_READY(op) (((PyASCIIObject*)op)->state.ready)
558
/* PyUnicode_READY() does less work than _PyUnicode_Ready() in the best
559
case. If the canonical representation is not yet set, it will still call
561
Returns 0 on success and -1 on errors. */
562
#define PyUnicode_READY(op) \
563
(assert(PyUnicode_Check(op)), \
564
(PyUnicode_IS_READY(op) ? \
565
0 : _PyUnicode_Ready((PyObject *)(op))))
567
/* Return a maximum character value which is suitable for creating another
568
string based on op. This is always an approximation but more efficient
569
than iterating over the string. */
570
#define PyUnicode_MAX_CHAR_VALUE(op) \
571
(assert(PyUnicode_IS_READY(op)), \
572
(PyUnicode_IS_ASCII(op) ? \
574
(PyUnicode_KIND(op) == PyUnicode_1BYTE_KIND ? \
576
(PyUnicode_KIND(op) == PyUnicode_2BYTE_KIND ? \
582
/* --- Constants ---------------------------------------------------------- */
584
/* This Unicode character will be used as replacement character during
585
decoding if the errors argument is set to "replace". Note: the
586
Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
589
#define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UCS4) 0xFFFD)
591
/* === Public API ========================================================= */
593
/* --- Plain Py_UNICODE --------------------------------------------------- */
595
/* With PEP 393, this is the recommended way to allocate a new unicode object.
596
This function will allocate the object and its buffer in a single memory
597
block. Objects created using this function are not resizable. */
598
#ifndef Py_LIMITED_API
599
PyAPI_FUNC(PyObject*) PyUnicode_New(
600
Py_ssize_t size, /* Number of code points in the new string */
601
Py_UCS4 maxchar /* maximum code point value in the string */
605
/* Initializes the canonical string representation from a the deprecated
606
wstr/Py_UNICODE representation. This function is used to convert Unicode
607
objects which were created using the old API to the new flexible format
608
introduced with PEP 393.
610
Don't call this function directly, use the public PyUnicode_READY() macro
612
#ifndef Py_LIMITED_API
613
PyAPI_FUNC(int) _PyUnicode_Ready(
614
PyObject *unicode /* Unicode object */
618
/* Get a copy of a Unicode string. */
619
#ifndef Py_LIMITED_API
620
PyAPI_FUNC(PyObject*) _PyUnicode_Copy(
625
/* Copy character from one unicode object into another, this function performs
626
character conversion when necessary and falls back to memcpy() if possible.
628
Fail if to is too small (smaller than *how_many* or smaller than
629
len(from)-from_start), or if kind(from[from_start:from_start+how_many]) >
630
kind(to), or if *to* has more than 1 reference.
632
Return the number of written character, or return -1 and raise an exception
637
how_many = min(how_many, len(from) - from_start)
638
to[to_start:to_start+how_many] = from[from_start:from_start+how_many]
641
Note: The function doesn't write a terminating null character.
643
#ifndef Py_LIMITED_API
644
PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters(
648
Py_ssize_t from_start,
652
/* Unsafe version of PyUnicode_CopyCharacters(): don't check arguments and so
653
may crash if parameters are invalid (e.g. if the output string
655
PyAPI_FUNC(void) _PyUnicode_FastCopyCharacters(
659
Py_ssize_t from_start,
664
#ifndef Py_LIMITED_API
665
/* Fill a string with a character: write fill_char into
666
unicode[start:start+length].
668
Fail if fill_char is bigger than the string maximum character, or if the
669
string has more than 1 reference.
671
Return the number of written character, or return -1 and raise an exception
673
PyAPI_FUNC(Py_ssize_t) PyUnicode_Fill(
680
/* Unsafe version of PyUnicode_Fill(): don't check arguments and so may crash
681
if parameters are invalid (e.g. if length is longer than the string). */
682
PyAPI_FUNC(void) _PyUnicode_FastFill(
690
/* Create a Unicode Object from the Py_UNICODE buffer u of the given
693
u may be NULL which causes the contents to be undefined. It is the
694
user's responsibility to fill in the needed data afterwards. Note
695
that modifying the Unicode object contents after construction is
696
only allowed if u was set to NULL.
698
The buffer is copied into the new object. */
700
#ifndef Py_LIMITED_API
701
PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
702
const Py_UNICODE *u, /* Unicode buffer */
703
Py_ssize_t size /* size of buffer */
707
/* Similar to PyUnicode_FromUnicode(), but u points to UTF-8 encoded bytes */
708
PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize(
709
const char *u, /* UTF-8 encoded string */
710
Py_ssize_t size /* size of buffer */
713
/* Similar to PyUnicode_FromUnicode(), but u points to null-terminated
714
UTF-8 encoded bytes. The size is determined with strlen(). */
715
PyAPI_FUNC(PyObject*) PyUnicode_FromString(
716
const char *u /* UTF-8 encoded string */
719
#ifndef Py_LIMITED_API
720
/* Create a new string from a buffer of Py_UCS1, Py_UCS2 or Py_UCS4 characters.
721
Scan the string to find the maximum character. */
722
PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
727
/* Create a new string from a buffer of ASCII characters.
728
WARNING: Don't check if the string contains any non-ASCII character. */
729
PyAPI_FUNC(PyObject*) _PyUnicode_FromASCII(
734
PyAPI_FUNC(PyObject*) PyUnicode_Substring(
739
#ifndef Py_LIMITED_API
740
/* Compute the maximum character of the substring unicode[start:end].
741
Return 127 for an empty string. */
742
PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar (
748
/* Copy the string into a UCS4 buffer including the null character if copy_null
749
is set. Return NULL and raise an exception on error. Raise a ValueError if
750
the buffer is smaller than the string. Return buffer on success.
752
buflen is the length of the buffer in (Py_UCS4) characters. */
753
PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4(
759
/* Copy the string into a UCS4 buffer. A new buffer is allocated using
760
* PyMem_Malloc; if this fails, NULL is returned with a memory error
762
PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4Copy(PyObject *unicode);
764
/* Return a read-only pointer to the Unicode object's internal
766
If the wchar_t/Py_UNICODE representation is not yet available, this
767
function will calculate it. */
769
#ifndef Py_LIMITED_API
770
PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
771
PyObject *unicode /* Unicode object */
775
/* Return a read-only pointer to the Unicode object's internal
776
Py_UNICODE buffer and save the length at size.
777
If the wchar_t/Py_UNICODE representation is not yet available, this
778
function will calculate it. */
780
#ifndef Py_LIMITED_API
781
PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize(
782
PyObject *unicode, /* Unicode object */
783
Py_ssize_t *size /* location where to save the length */
787
/* Get the length of the Unicode object. */
789
PyAPI_FUNC(Py_ssize_t) PyUnicode_GetLength(
793
/* Get the number of Py_UNICODE units in the
794
string representation. */
796
PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize(
797
PyObject *unicode /* Unicode object */
800
/* Read a character from the string. */
802
PyAPI_FUNC(Py_UCS4) PyUnicode_ReadChar(
807
/* Write a character to the string. The string must have been created through
808
PyUnicode_New, must not be shared, and must not have been hashed yet.
810
Return 0 on success, -1 on error. */
812
PyAPI_FUNC(int) PyUnicode_WriteChar(
818
#ifndef Py_LIMITED_API
819
/* Get the maximum ordinal for a Unicode character. */
820
PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void);
823
/* Resize an Unicode object. The length is the number of characters, except
824
if the kind of the string is PyUnicode_WCHAR_KIND: in this case, the length
825
is the number of Py_UNICODE characters.
827
*unicode is modified to point to the new (resized) object and 0
830
Try to resize the string in place (which is usually faster than allocating
831
a new string and copy characters), or create a new string.
833
Error handling is implemented as follows: an exception is set, -1
834
is returned and *unicode left untouched.
836
WARNING: The function doesn't check string content, the result may not be a
837
string in canonical representation. */
839
PyAPI_FUNC(int) PyUnicode_Resize(
840
PyObject **unicode, /* Pointer to the Unicode object */
841
Py_ssize_t length /* New length */
844
/* Coerce obj to an Unicode object and return a reference with
845
*incremented* refcount.
847
Coercion is done in the following way:
849
1. bytes, bytearray and other char buffer compatible objects are decoded
850
under the assumptions that they contain data using the UTF-8
851
encoding. Decoding is done in "strict" mode.
853
2. All other objects (including Unicode objects) raise an
856
The API returns NULL in case of an error. The caller is responsible
857
for decref'ing the returned objects.
861
PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject(
862
PyObject *obj, /* Object */
863
const char *encoding, /* encoding */
864
const char *errors /* error handling */
867
/* Coerce obj to an Unicode object and return a reference with
868
*incremented* refcount.
870
Unicode objects are passed back as-is (subclasses are converted to
871
true Unicode objects), all other objects are delegated to
872
PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in
873
using UTF-8 encoding as basis for decoding the object.
875
The API returns NULL in case of an error. The caller is responsible
876
for decref'ing the returned objects.
880
PyAPI_FUNC(PyObject*) PyUnicode_FromObject(
881
PyObject *obj /* Object */
884
PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(
885
const char *format, /* ASCII-encoded string */
888
PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(
889
const char *format, /* ASCII-encoded string */
893
#ifndef Py_LIMITED_API
897
enum PyUnicode_Kind kind;
902
/* minimum number of allocated characters (default: 0) */
903
Py_ssize_t min_length;
905
/* minimum character (default: 127, ASCII) */
908
/* If non-zero, overallocate the buffer by 25% (default: 0). */
909
unsigned char overallocate;
911
/* If readonly is 1, buffer is a shared string (cannot be modified)
912
and size is set to 0. */
913
unsigned char readonly;
916
/* Initialize a Unicode writer.
918
* By default, the minimum buffer size is 0 character and overallocation is
919
* disabled. Set min_length, min_char and overallocate attributes to control
920
* the allocation of the buffer. */
922
_PyUnicodeWriter_Init(_PyUnicodeWriter *writer);
924
/* Prepare the buffer to write 'length' characters
925
with the specified maximum character.
927
Return 0 on success, raise an exception and return -1 on error. */
928
#define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \
929
(((MAXCHAR) <= (WRITER)->maxchar \
930
&& (LENGTH) <= (WRITER)->size - (WRITER)->pos) \
934
: _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))
936
/* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro
939
_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
940
Py_ssize_t length, Py_UCS4 maxchar);
942
/* Append a Unicode character.
943
Return 0 on success, raise an exception and return -1 on error. */
945
_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer,
949
/* Append a Unicode string.
950
Return 0 on success, raise an exception and return -1 on error. */
952
_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer,
953
PyObject *str /* Unicode string */
956
/* Append a substring of a Unicode string.
957
Return 0 on success, raise an exception and return -1 on error. */
959
_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer,
960
PyObject *str, /* Unicode string */
965
/* Append a ASCII-encoded byte string.
966
Return 0 on success, raise an exception and return -1 on error. */
968
_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
969
const char *str, /* ASCII-encoded byte string */
970
Py_ssize_t len /* number of bytes, or -1 if unknown */
973
/* Append a latin1-encoded byte string.
974
Return 0 on success, raise an exception and return -1 on error. */
976
_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
977
const char *str, /* latin1-encoded byte string */
978
Py_ssize_t len /* length in bytes */
981
/* Get the value of the writer as an Unicode string. Clear the
982
buffer of the writer. Raise an exception and return NULL
984
PyAPI_FUNC(PyObject *)
985
_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);
987
/* Deallocate memory of a writer (clear its internal buffer). */
989
_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);
992
#ifndef Py_LIMITED_API
993
/* Format the object based on the format_spec, as defined in PEP 3101
994
(Advanced String Formatting). */
995
PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
996
_PyUnicodeWriter *writer,
998
PyObject *format_spec,
1003
PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **);
1004
PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **);
1005
PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(
1006
const char *u /* UTF-8 encoded string */
1008
#ifndef Py_LIMITED_API
1009
PyAPI_FUNC(void) _Py_ReleaseInternedUnicodeStrings(void);
1012
/* Use only if you know it's a string */
1013
#define PyUnicode_CHECK_INTERNED(op) \
1014
(((PyASCIIObject *)(op))->state.interned)
1016
/* --- wchar_t support for platforms which support it --------------------- */
1020
/* Create a Unicode Object from the wchar_t buffer w of the given
1023
The buffer is copied into the new object. */
1025
PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar(
1026
const wchar_t *w, /* wchar_t buffer */
1027
Py_ssize_t size /* size of buffer */
1030
/* Copies the Unicode Object contents into the wchar_t buffer w. At
1031
most size wchar_t characters are copied.
1033
Note that the resulting wchar_t string may or may not be
1034
0-terminated. It is the responsibility of the caller to make sure
1035
that the wchar_t string is 0-terminated in case this is required by
1038
Returns the number of wchar_t characters copied (excluding a
1039
possibly trailing 0-termination character) or -1 in case of an
1042
PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar(
1043
PyObject *unicode, /* Unicode object */
1044
wchar_t *w, /* wchar_t buffer */
1045
Py_ssize_t size /* size of buffer */
1048
/* Convert the Unicode object to a wide character string. The output string
1049
always ends with a nul character. If size is not NULL, write the number of
1050
wide characters (excluding the null character) into *size.
1052
Returns a buffer allocated by PyMem_Alloc() (use PyMem_Free() to free it)
1053
on success. On error, returns NULL, *size is undefined and raises a
1056
PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharString(
1057
PyObject *unicode, /* Unicode object */
1058
Py_ssize_t *size /* number of characters of the result */
1061
#ifndef Py_LIMITED_API
1062
PyAPI_FUNC(void*) _PyUnicode_AsKind(PyObject *s, unsigned int kind);
1067
/* --- Unicode ordinals --------------------------------------------------- */
1069
/* Create a Unicode Object from the given Unicode code point ordinal.
1071
The ordinal must be in range(0x110000). A ValueError is
1072
raised in case it is not.
1076
PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal);
1078
/* --- Free-list management ----------------------------------------------- */
1080
/* Clear the free list used by the Unicode implementation.
1082
This can be used to release memory used for objects on the free
1083
list back to the Python memory allocator.
1087
PyAPI_FUNC(int) PyUnicode_ClearFreeList(void);
1089
/* === Builtin Codecs =====================================================
1091
Many of these APIs take two arguments encoding and errors. These
1092
parameters encoding and errors have the same semantics as the ones
1093
of the builtin str() API.
1095
Setting encoding to NULL causes the default encoding (UTF-8) to be used.
1097
Error handling is set by errors which may also be set to NULL
1098
meaning to use the default handling defined for the codec. Default
1099
error handling for all builtin codecs is "strict" (ValueErrors are
1102
The codecs all use a similar interface. Only deviation from the
1103
generic ones are documented.
1107
/* --- Manage the default encoding ---------------------------------------- */
1109
/* Returns a pointer to the default encoding (UTF-8) of the
1110
Unicode object unicode and the size of the encoded representation
1111
in bytes stored in *size.
1113
In case of an error, no *size is set.
1115
This function caches the UTF-8 encoded string in the unicodeobject
1116
and subsequent calls will return the same string. The memory is released
1117
when the unicodeobject is deallocated.
1119
_PyUnicode_AsStringAndSize is a #define for PyUnicode_AsUTF8AndSize to
1120
support the previous internal function with the same behaviour.
1122
*** This API is for interpreter INTERNAL USE ONLY and will likely
1123
*** be removed or changed in the future.
1125
*** If you need to access the Unicode object as UTF-8 bytes string,
1126
*** please use PyUnicode_AsUTF8String() instead.
1129
#ifndef Py_LIMITED_API
1130
PyAPI_FUNC(char *) PyUnicode_AsUTF8AndSize(
1133
#define _PyUnicode_AsStringAndSize PyUnicode_AsUTF8AndSize
1136
/* Returns a pointer to the default encoding (UTF-8) of the
1137
Unicode object unicode.
1139
Like PyUnicode_AsUTF8AndSize(), this also caches the UTF-8 representation
1140
in the unicodeobject.
1142
_PyUnicode_AsString is a #define for PyUnicode_AsUTF8 to
1143
support the previous internal function with the same behaviour.
1145
Use of this API is DEPRECATED since no size information can be
1146
extracted from the returned data.
1148
*** This API is for interpreter INTERNAL USE ONLY and will likely
1149
*** be removed or changed for Python 3.1.
1151
*** If you need to access the Unicode object as UTF-8 bytes string,
1152
*** please use PyUnicode_AsUTF8String() instead.
1156
#ifndef Py_LIMITED_API
1157
PyAPI_FUNC(char *) PyUnicode_AsUTF8(PyObject *unicode);
1158
#define _PyUnicode_AsString PyUnicode_AsUTF8
1161
/* Returns "utf-8". */
1163
PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void);
1165
/* --- Generic Codecs ----------------------------------------------------- */
1167
/* Create a Unicode object by decoding the encoded string s of the
1170
PyAPI_FUNC(PyObject*) PyUnicode_Decode(
1171
const char *s, /* encoded string */
1172
Py_ssize_t size, /* size of buffer */
1173
const char *encoding, /* encoding */
1174
const char *errors /* error handling */
1177
/* Decode a Unicode object unicode and return the result as Python
1180
PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedObject(
1181
PyObject *unicode, /* Unicode object */
1182
const char *encoding, /* encoding */
1183
const char *errors /* error handling */
1186
/* Decode a Unicode object unicode and return the result as Unicode
1189
PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedUnicode(
1190
PyObject *unicode, /* Unicode object */
1191
const char *encoding, /* encoding */
1192
const char *errors /* error handling */
1195
/* Encodes a Py_UNICODE buffer of the given size and returns a
1196
Python string object. */
1198
#ifndef Py_LIMITED_API
1199
PyAPI_FUNC(PyObject*) PyUnicode_Encode(
1200
const Py_UNICODE *s, /* Unicode char buffer */
1201
Py_ssize_t size, /* number of Py_UNICODE chars to encode */
1202
const char *encoding, /* encoding */
1203
const char *errors /* error handling */
1207
/* Encodes a Unicode object and returns the result as Python
1210
PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject(
1211
PyObject *unicode, /* Unicode object */
1212
const char *encoding, /* encoding */
1213
const char *errors /* error handling */
1216
/* Encodes a Unicode object and returns the result as Python string
1219
PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString(
1220
PyObject *unicode, /* Unicode object */
1221
const char *encoding, /* encoding */
1222
const char *errors /* error handling */
1225
/* Encodes a Unicode object and returns the result as Unicode
1228
PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedUnicode(
1229
PyObject *unicode, /* Unicode object */
1230
const char *encoding, /* encoding */
1231
const char *errors /* error handling */
1234
/* Build an encoding map. */
1236
PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap(
1237
PyObject* string /* 256 character map */
1240
/* --- UTF-7 Codecs ------------------------------------------------------- */
1242
PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7(
1243
const char *string, /* UTF-7 encoded string */
1244
Py_ssize_t length, /* size of string */
1245
const char *errors /* error handling */
1248
PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful(
1249
const char *string, /* UTF-7 encoded string */
1250
Py_ssize_t length, /* size of string */
1251
const char *errors, /* error handling */
1252
Py_ssize_t *consumed /* bytes consumed */
1255
#ifndef Py_LIMITED_API
1256
PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
1257
const Py_UNICODE *data, /* Unicode char buffer */
1258
Py_ssize_t length, /* number of Py_UNICODE chars to encode */
1259
int base64SetO, /* Encode RFC2152 Set O characters in base64 */
1260
int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */
1261
const char *errors /* error handling */
1263
PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF7(
1264
PyObject *unicode, /* Unicode object */
1265
int base64SetO, /* Encode RFC2152 Set O characters in base64 */
1266
int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */
1267
const char *errors /* error handling */
1271
/* --- UTF-8 Codecs ------------------------------------------------------- */
1273
PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8(
1274
const char *string, /* UTF-8 encoded string */
1275
Py_ssize_t length, /* size of string */
1276
const char *errors /* error handling */
1279
PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful(
1280
const char *string, /* UTF-8 encoded string */
1281
Py_ssize_t length, /* size of string */
1282
const char *errors, /* error handling */
1283
Py_ssize_t *consumed /* bytes consumed */
1286
PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(
1287
PyObject *unicode /* Unicode object */
1290
#ifndef Py_LIMITED_API
1291
PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String(
1293
const char *errors);
1295
PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(
1296
const Py_UNICODE *data, /* Unicode char buffer */
1297
Py_ssize_t length, /* number of Py_UNICODE chars to encode */
1298
const char *errors /* error handling */
1302
/* --- UTF-32 Codecs ------------------------------------------------------ */
1304
/* Decodes length bytes from a UTF-32 encoded buffer string and returns
1305
the corresponding Unicode object.
1307
errors (if non-NULL) defines the error handling. It defaults
1310
If byteorder is non-NULL, the decoder starts decoding using the
1313
*byteorder == -1: little endian
1314
*byteorder == 0: native order
1315
*byteorder == 1: big endian
1317
In native mode, the first four bytes of the stream are checked for a
1318
BOM mark. If found, the BOM mark is analysed, the byte order
1319
adjusted and the BOM skipped. In the other modes, no BOM mark
1320
interpretation is done. After completion, *byteorder is set to the
1321
current byte order at the end of input data.
1323
If byteorder is NULL, the codec starts in native order mode.
1327
PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32(
1328
const char *string, /* UTF-32 encoded string */
1329
Py_ssize_t length, /* size of string */
1330
const char *errors, /* error handling */
1331
int *byteorder /* pointer to byteorder to use
1332
0=native;-1=LE,1=BE; updated on
1336
PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful(
1337
const char *string, /* UTF-32 encoded string */
1338
Py_ssize_t length, /* size of string */
1339
const char *errors, /* error handling */
1340
int *byteorder, /* pointer to byteorder to use
1341
0=native;-1=LE,1=BE; updated on
1343
Py_ssize_t *consumed /* bytes consumed */
1346
/* Returns a Python string using the UTF-32 encoding in native byte
1347
order. The string always starts with a BOM mark. */
1349
PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String(
1350
PyObject *unicode /* Unicode object */
1353
/* Returns a Python string object holding the UTF-32 encoded value of
1356
If byteorder is not 0, output is written according to the following
1359
byteorder == -1: little endian
1360
byteorder == 0: native byte order (writes a BOM mark)
1361
byteorder == 1: big endian
1363
If byteorder is 0, the output string will always start with the
1364
Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
1369
#ifndef Py_LIMITED_API
1370
PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32(
1371
const Py_UNICODE *data, /* Unicode char buffer */
1372
Py_ssize_t length, /* number of Py_UNICODE chars to encode */
1373
const char *errors, /* error handling */
1374
int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
1376
PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF32(
1377
PyObject *object, /* Unicode object */
1378
const char *errors, /* error handling */
1379
int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
1383
/* --- UTF-16 Codecs ------------------------------------------------------ */
1385
/* Decodes length bytes from a UTF-16 encoded buffer string and returns
1386
the corresponding Unicode object.
1388
errors (if non-NULL) defines the error handling. It defaults
1391
If byteorder is non-NULL, the decoder starts decoding using the
1394
*byteorder == -1: little endian
1395
*byteorder == 0: native order
1396
*byteorder == 1: big endian
1398
In native mode, the first two bytes of the stream are checked for a
1399
BOM mark. If found, the BOM mark is analysed, the byte order
1400
adjusted and the BOM skipped. In the other modes, no BOM mark
1401
interpretation is done. After completion, *byteorder is set to the
1402
current byte order at the end of input data.
1404
If byteorder is NULL, the codec starts in native order mode.
1408
PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16(
1409
const char *string, /* UTF-16 encoded string */
1410
Py_ssize_t length, /* size of string */
1411
const char *errors, /* error handling */
1412
int *byteorder /* pointer to byteorder to use
1413
0=native;-1=LE,1=BE; updated on
1417
PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful(
1418
const char *string, /* UTF-16 encoded string */
1419
Py_ssize_t length, /* size of string */
1420
const char *errors, /* error handling */
1421
int *byteorder, /* pointer to byteorder to use
1422
0=native;-1=LE,1=BE; updated on
1424
Py_ssize_t *consumed /* bytes consumed */
1427
/* Returns a Python string using the UTF-16 encoding in native byte
1428
order. The string always starts with a BOM mark. */
1430
PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String(
1431
PyObject *unicode /* Unicode object */
1434
/* Returns a Python string object holding the UTF-16 encoded value of
1437
If byteorder is not 0, output is written according to the following
1440
byteorder == -1: little endian
1441
byteorder == 0: native byte order (writes a BOM mark)
1442
byteorder == 1: big endian
1444
If byteorder is 0, the output string will always start with the
1445
Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
1448
Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
1449
UCS-2. This trick makes it possible to add full UTF-16 capabilities
1450
at a later point without compromising the APIs.
1454
#ifndef Py_LIMITED_API
1455
PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16(
1456
const Py_UNICODE *data, /* Unicode char buffer */
1457
Py_ssize_t length, /* number of Py_UNICODE chars to encode */
1458
const char *errors, /* error handling */
1459
int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
1461
PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF16(
1462
PyObject* unicode, /* Unicode object */
1463
const char *errors, /* error handling */
1464
int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
1468
/* --- Unicode-Escape Codecs ---------------------------------------------- */
1470
PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape(
1471
const char *string, /* Unicode-Escape encoded string */
1472
Py_ssize_t length, /* size of string */
1473
const char *errors /* error handling */
1476
PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString(
1477
PyObject *unicode /* Unicode object */
1480
#ifndef Py_LIMITED_API
1481
PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape(
1482
const Py_UNICODE *data, /* Unicode char buffer */
1483
Py_ssize_t length /* Number of Py_UNICODE chars to encode */
1487
/* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
1489
PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape(
1490
const char *string, /* Raw-Unicode-Escape encoded string */
1491
Py_ssize_t length, /* size of string */
1492
const char *errors /* error handling */
1495
PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
1496
PyObject *unicode /* Unicode object */
1499
#ifndef Py_LIMITED_API
1500
PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
1501
const Py_UNICODE *data, /* Unicode char buffer */
1502
Py_ssize_t length /* Number of Py_UNICODE chars to encode */
1506
/* --- Unicode Internal Codec ---------------------------------------------
1508
Only for internal use in _codecsmodule.c */
1510
#ifndef Py_LIMITED_API
1511
PyObject *_PyUnicode_DecodeUnicodeInternal(
1518
/* --- Latin-1 Codecs -----------------------------------------------------
1520
Note: Latin-1 corresponds to the first 256 Unicode ordinals.
1524
PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1(
1525
const char *string, /* Latin-1 encoded string */
1526
Py_ssize_t length, /* size of string */
1527
const char *errors /* error handling */
1530
PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String(
1531
PyObject *unicode /* Unicode object */
1534
#ifndef Py_LIMITED_API
1535
PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String(
1537
const char* errors);
1539
PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1(
1540
const Py_UNICODE *data, /* Unicode char buffer */
1541
Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
1542
const char *errors /* error handling */
1546
/* --- ASCII Codecs -------------------------------------------------------
1548
Only 7-bit ASCII data is excepted. All other codes generate errors.
1552
PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII(
1553
const char *string, /* ASCII encoded string */
1554
Py_ssize_t length, /* size of string */
1555
const char *errors /* error handling */
1558
PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString(
1559
PyObject *unicode /* Unicode object */
1562
#ifndef Py_LIMITED_API
1563
PyAPI_FUNC(PyObject*) _PyUnicode_AsASCIIString(
1565
const char* errors);
1567
PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII(
1568
const Py_UNICODE *data, /* Unicode char buffer */
1569
Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
1570
const char *errors /* error handling */
1574
/* --- Character Map Codecs -----------------------------------------------
1576
This codec uses mappings to encode and decode characters.
1578
Decoding mappings must map single string characters to single
1579
Unicode characters, integers (which are then interpreted as Unicode
1580
ordinals) or None (meaning "undefined mapping" and causing an
1583
Encoding mappings must map single Unicode characters to single
1584
string characters, integers (which are then interpreted as Latin-1
1585
ordinals) or None (meaning "undefined mapping" and causing an
1588
If a character lookup fails with a LookupError, the character is
1589
copied as-is meaning that its ordinal value will be interpreted as
1590
Unicode or Latin-1 ordinal resp. Because of this mappings only need
1591
to contain those mappings which map characters to different code
1596
PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap(
1597
const char *string, /* Encoded string */
1598
Py_ssize_t length, /* size of string */
1599
PyObject *mapping, /* character mapping
1600
(char ordinal -> unicode ordinal) */
1601
const char *errors /* error handling */
1604
PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString(
1605
PyObject *unicode, /* Unicode object */
1606
PyObject *mapping /* character mapping
1607
(unicode ordinal -> char ordinal) */
1610
#ifndef Py_LIMITED_API
1611
PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
1612
const Py_UNICODE *data, /* Unicode char buffer */
1613
Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
1614
PyObject *mapping, /* character mapping
1615
(unicode ordinal -> char ordinal) */
1616
const char *errors /* error handling */
1618
PyAPI_FUNC(PyObject*) _PyUnicode_EncodeCharmap(
1619
PyObject *unicode, /* Unicode object */
1620
PyObject *mapping, /* character mapping
1621
(unicode ordinal -> char ordinal) */
1622
const char *errors /* error handling */
1626
/* Translate a Py_UNICODE buffer of the given length by applying a
1627
character mapping table to it and return the resulting Unicode
1630
The mapping table must map Unicode ordinal integers to Unicode
1631
ordinal integers or None (causing deletion of the character).
1633
Mapping tables may be dictionaries or sequences. Unmapped character
1634
ordinals (ones which cause a LookupError) are left untouched and
1639
#ifndef Py_LIMITED_API
1640
PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap(
1641
const Py_UNICODE *data, /* Unicode char buffer */
1642
Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
1643
PyObject *table, /* Translate table */
1644
const char *errors /* error handling */
1650
/* --- MBCS codecs for Windows -------------------------------------------- */
1652
PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS(
1653
const char *string, /* MBCS encoded string */
1654
Py_ssize_t length, /* size of string */
1655
const char *errors /* error handling */
1658
PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful(
1659
const char *string, /* MBCS encoded string */
1660
Py_ssize_t length, /* size of string */
1661
const char *errors, /* error handling */
1662
Py_ssize_t *consumed /* bytes consumed */
1665
PyAPI_FUNC(PyObject*) PyUnicode_DecodeCodePageStateful(
1666
int code_page, /* code page number */
1667
const char *string, /* encoded string */
1668
Py_ssize_t length, /* size of string */
1669
const char *errors, /* error handling */
1670
Py_ssize_t *consumed /* bytes consumed */
1673
PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString(
1674
PyObject *unicode /* Unicode object */
1677
#ifndef Py_LIMITED_API
1678
PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS(
1679
const Py_UNICODE *data, /* Unicode char buffer */
1680
Py_ssize_t length, /* number of Py_UNICODE chars to encode */
1681
const char *errors /* error handling */
1685
PyAPI_FUNC(PyObject*) PyUnicode_EncodeCodePage(
1686
int code_page, /* code page number */
1687
PyObject *unicode, /* Unicode object */
1688
const char *errors /* error handling */
1691
#endif /* HAVE_MBCS */
1693
/* --- Decimal Encoder ---------------------------------------------------- */
1695
/* Takes a Unicode string holding a decimal value and writes it into
1696
an output buffer using standard ASCII digit codes.
1698
The output buffer has to provide at least length+1 bytes of storage
1699
area. The output string is 0-terminated.
1701
The encoder converts whitespace to ' ', decimal characters to their
1702
corresponding ASCII digit and all other Latin-1 characters except
1703
\0 as-is. Characters outside this range (Unicode ordinals 1-256)
1704
are treated as errors. This includes embedded NULL bytes.
1706
Error handling is defined by the errors argument:
1708
NULL or "strict": raise a ValueError
1709
"ignore": ignore the wrong characters (these are not copied to the
1711
"replace": replaces illegal characters with '?'
1713
Returns 0 on success, -1 on failure.
1717
#ifndef Py_LIMITED_API
1718
PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
1719
Py_UNICODE *s, /* Unicode buffer */
1720
Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
1721
char *output, /* Output buffer; must have size >= length */
1722
const char *errors /* error handling */
1726
/* Transforms code points that have decimal digit property to the
1727
corresponding ASCII digit code points.
1729
Returns a new Unicode string on success, NULL on failure.
1732
#ifndef Py_LIMITED_API
1733
PyAPI_FUNC(PyObject*) PyUnicode_TransformDecimalToASCII(
1734
Py_UNICODE *s, /* Unicode buffer */
1735
Py_ssize_t length /* Number of Py_UNICODE chars to transform */
1739
/* Similar to PyUnicode_TransformDecimalToASCII(), but takes a PyObject
1740
as argument instead of a raw buffer and length. This function additionally
1741
transforms spaces to ASCII because this is what the callers in longobject,
1742
floatobject, and complexobject did anyways. */
1744
#ifndef Py_LIMITED_API
1745
PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII(
1746
PyObject *unicode /* Unicode object */
1750
/* --- Locale encoding --------------------------------------------------- */
1752
/* Decode a string from the current locale encoding. The decoder is strict if
1753
*surrogateescape* is equal to zero, otherwise it uses the 'surrogateescape'
1754
error handler (PEP 383) to escape undecodable bytes. If a byte sequence can
1755
be decoded as a surrogate character and *surrogateescape* is not equal to
1756
zero, the byte sequence is escaped using the 'surrogateescape' error handler
1757
instead of being decoded. *str* must end with a null character but cannot
1758
contain embedded null characters. */
1760
PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocaleAndSize(
1763
const char *errors);
1765
/* Similar to PyUnicode_DecodeLocaleAndSize(), but compute the string
1766
length using strlen(). */
1768
PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocale(
1770
const char *errors);
1772
/* Encode a Unicode object to the current locale encoding. The encoder is
1773
strict is *surrogateescape* is equal to zero, otherwise the
1774
"surrogateescape" error handler is used. Return a bytes object. The string
1775
cannot contain embedded null characters. */
1777
PyAPI_FUNC(PyObject*) PyUnicode_EncodeLocale(
1782
/* --- File system encoding ---------------------------------------------- */
1784
/* ParseTuple converter: encode str objects to bytes using
1785
PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */
1787
PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject*, void*);
1789
/* ParseTuple converter: decode bytes objects to unicode using
1790
PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. */
1792
PyAPI_FUNC(int) PyUnicode_FSDecoder(PyObject*, void*);
1794
/* Decode a null-terminated string using Py_FileSystemDefaultEncoding
1795
and the "surrogateescape" error handler.
1797
If Py_FileSystemDefaultEncoding is not set, fall back to the locale
1800
Use PyUnicode_DecodeFSDefaultAndSize() if the string length is known.
1803
PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
1804
const char *s /* encoded string */
1807
/* Decode a string using Py_FileSystemDefaultEncoding
1808
and the "surrogateescape" error handler.
1810
If Py_FileSystemDefaultEncoding is not set, fall back to the locale
1814
PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
1815
const char *s, /* encoded string */
1816
Py_ssize_t size /* size */
1819
/* Encode a Unicode object to Py_FileSystemDefaultEncoding with the
1820
"surrogateescape" error handler, and return bytes.
1822
If Py_FileSystemDefaultEncoding is not set, fall back to the locale
1826
PyAPI_FUNC(PyObject*) PyUnicode_EncodeFSDefault(
1830
/* --- Methods & Slots ----------------------------------------------------
1832
These are capable of handling Unicode objects and strings on input
1833
(we refer to them as strings in the descriptions) and return
1834
Unicode objects or integers as appropriate. */
1836
/* Concat two strings giving a new Unicode string. */
1838
PyAPI_FUNC(PyObject*) PyUnicode_Concat(
1839
PyObject *left, /* Left string */
1840
PyObject *right /* Right string */
1843
/* Concat two strings and put the result in *pleft
1844
(sets *pleft to NULL on error) */
1846
PyAPI_FUNC(void) PyUnicode_Append(
1847
PyObject **pleft, /* Pointer to left string */
1848
PyObject *right /* Right string */
1851
/* Concat two strings, put the result in *pleft and drop the right object
1852
(sets *pleft to NULL on error) */
1854
PyAPI_FUNC(void) PyUnicode_AppendAndDel(
1855
PyObject **pleft, /* Pointer to left string */
1856
PyObject *right /* Right string */
1859
/* Split a string giving a list of Unicode strings.
1861
If sep is NULL, splitting will be done at all whitespace
1862
substrings. Otherwise, splits occur at the given separator.
1864
At most maxsplit splits will be done. If negative, no limit is set.
1866
Separators are not included in the resulting list.
1870
PyAPI_FUNC(PyObject*) PyUnicode_Split(
1871
PyObject *s, /* String to split */
1872
PyObject *sep, /* String separator */
1873
Py_ssize_t maxsplit /* Maxsplit count */
1876
/* Dito, but split at line breaks.
1878
CRLF is considered to be one line break. Line breaks are not
1879
included in the resulting list. */
1881
PyAPI_FUNC(PyObject*) PyUnicode_Splitlines(
1882
PyObject *s, /* String to split */
1883
int keepends /* If true, line end markers are included */
1886
/* Partition a string using a given separator. */
1888
PyAPI_FUNC(PyObject*) PyUnicode_Partition(
1889
PyObject *s, /* String to partition */
1890
PyObject *sep /* String separator */
1893
/* Partition a string using a given separator, searching from the end of the
1896
PyAPI_FUNC(PyObject*) PyUnicode_RPartition(
1897
PyObject *s, /* String to partition */
1898
PyObject *sep /* String separator */
1901
/* Split a string giving a list of Unicode strings.
1903
If sep is NULL, splitting will be done at all whitespace
1904
substrings. Otherwise, splits occur at the given separator.
1906
At most maxsplit splits will be done. But unlike PyUnicode_Split
1907
PyUnicode_RSplit splits from the end of the string. If negative,
1910
Separators are not included in the resulting list.
1914
PyAPI_FUNC(PyObject*) PyUnicode_RSplit(
1915
PyObject *s, /* String to split */
1916
PyObject *sep, /* String separator */
1917
Py_ssize_t maxsplit /* Maxsplit count */
1920
/* Translate a string by applying a character mapping table to it and
1921
return the resulting Unicode object.
1923
The mapping table must map Unicode ordinal integers to Unicode
1924
ordinal integers or None (causing deletion of the character).
1926
Mapping tables may be dictionaries or sequences. Unmapped character
1927
ordinals (ones which cause a LookupError) are left untouched and
1932
PyAPI_FUNC(PyObject *) PyUnicode_Translate(
1933
PyObject *str, /* String */
1934
PyObject *table, /* Translate table */
1935
const char *errors /* error handling */
1938
/* Join a sequence of strings using the given separator and return
1939
the resulting Unicode string. */
1941
PyAPI_FUNC(PyObject*) PyUnicode_Join(
1942
PyObject *separator, /* Separator string */
1943
PyObject *seq /* Sequence object */
1946
/* Return 1 if substr matches str[start:end] at the given tail end, 0
1949
PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch(
1950
PyObject *str, /* String */
1951
PyObject *substr, /* Prefix or Suffix string */
1952
Py_ssize_t start, /* Start index */
1953
Py_ssize_t end, /* Stop index */
1954
int direction /* Tail end: -1 prefix, +1 suffix */
1957
/* Return the first position of substr in str[start:end] using the
1958
given search direction or -1 if not found. -2 is returned in case
1959
an error occurred and an exception is set. */
1961
PyAPI_FUNC(Py_ssize_t) PyUnicode_Find(
1962
PyObject *str, /* String */
1963
PyObject *substr, /* Substring to find */
1964
Py_ssize_t start, /* Start index */
1965
Py_ssize_t end, /* Stop index */
1966
int direction /* Find direction: +1 forward, -1 backward */
1969
/* Like PyUnicode_Find, but search for single character only. */
1970
PyAPI_FUNC(Py_ssize_t) PyUnicode_FindChar(
1978
/* Count the number of occurrences of substr in str[start:end]. */
1980
PyAPI_FUNC(Py_ssize_t) PyUnicode_Count(
1981
PyObject *str, /* String */
1982
PyObject *substr, /* Substring to count */
1983
Py_ssize_t start, /* Start index */
1984
Py_ssize_t end /* Stop index */
1987
/* Replace at most maxcount occurrences of substr in str with replstr
1988
and return the resulting Unicode object. */
1990
PyAPI_FUNC(PyObject *) PyUnicode_Replace(
1991
PyObject *str, /* String */
1992
PyObject *substr, /* Substring to find */
1993
PyObject *replstr, /* Substring to replace */
1994
Py_ssize_t maxcount /* Max. number of replacements to apply;
1998
/* Compare two strings and return -1, 0, 1 for less than, equal,
2000
Raise an exception and return -1 on error. */
2002
PyAPI_FUNC(int) PyUnicode_Compare(
2003
PyObject *left, /* Left string */
2004
PyObject *right /* Right string */
2007
PyAPI_FUNC(int) _PyUnicode_CompareWithId(
2008
PyObject *left, /* Left string */
2009
_Py_Identifier *right /* Right identifier */
2012
PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString(
2014
const char *right /* ASCII-encoded string */
2017
/* Rich compare two strings and return one of the following:
2019
- NULL in case an exception was raised
2020
- Py_True or Py_False for successfully comparisons
2021
- Py_NotImplemented in case the type combination is unknown
2023
Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in
2024
case the conversion of the arguments to Unicode fails with a
2027
Possible values for op:
2029
Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE
2033
PyAPI_FUNC(PyObject *) PyUnicode_RichCompare(
2034
PyObject *left, /* Left string */
2035
PyObject *right, /* Right string */
2036
int op /* Operation: Py_EQ, Py_NE, Py_GT, etc. */
2039
/* Apply a argument tuple or dictionary to a format string and return
2040
the resulting Unicode string. */
2042
PyAPI_FUNC(PyObject *) PyUnicode_Format(
2043
PyObject *format, /* Format string */
2044
PyObject *args /* Argument tuple or dictionary */
2047
/* Checks whether element is contained in container and return 1/0
2050
element has to coerce to an one element Unicode string. -1 is
2051
returned in case of an error. */
2053
PyAPI_FUNC(int) PyUnicode_Contains(
2054
PyObject *container, /* Container string */
2055
PyObject *element /* Element string */
2058
/* Checks whether the string contains any NUL characters. */
2060
#ifndef Py_LIMITED_API
2061
PyAPI_FUNC(int) _PyUnicode_HasNULChars(PyObject *);
2064
/* Checks whether argument is a valid identifier. */
2066
PyAPI_FUNC(int) PyUnicode_IsIdentifier(PyObject *s);
2068
#ifndef Py_LIMITED_API
2069
/* Externally visible for str.strip(unicode) */
2070
PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
2077
/* Using explicit passed-in values, insert the thousands grouping
2078
into the string pointed to by buffer. For the argument descriptions,
2079
see Objects/stringlib/localeutil.h */
2080
#ifndef Py_LIMITED_API
2081
PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
2084
Py_ssize_t n_buffer,
2086
Py_ssize_t n_digits,
2087
Py_ssize_t min_width,
2088
const char *grouping,
2089
PyObject *thousands_sep,
2092
/* === Characters Type APIs =============================================== */
2094
/* Helper array used by Py_UNICODE_ISSPACE(). */
2096
#ifndef Py_LIMITED_API
2097
PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[];
2099
/* These should not be used directly. Use the Py_UNICODE_IS* and
2100
Py_UNICODE_TO* macros instead.
2102
These APIs are implemented in Objects/unicodectype.c.
2106
PyAPI_FUNC(int) _PyUnicode_IsLowercase(
2107
Py_UCS4 ch /* Unicode character */
2110
PyAPI_FUNC(int) _PyUnicode_IsUppercase(
2111
Py_UCS4 ch /* Unicode character */
2114
PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
2115
Py_UCS4 ch /* Unicode character */
2118
PyAPI_FUNC(int) _PyUnicode_IsXidStart(
2119
Py_UCS4 ch /* Unicode character */
2122
PyAPI_FUNC(int) _PyUnicode_IsXidContinue(
2123
Py_UCS4 ch /* Unicode character */
2126
PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
2127
const Py_UCS4 ch /* Unicode character */
2130
PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
2131
const Py_UCS4 ch /* Unicode character */
2134
PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase(
2135
Py_UCS4 ch /* Unicode character */
2138
PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase(
2139
Py_UCS4 ch /* Unicode character */
2142
PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase(
2143
Py_UCS4 ch /* Unicode character */
2146
PyAPI_FUNC(int) _PyUnicode_ToLowerFull(
2147
Py_UCS4 ch, /* Unicode character */
2151
PyAPI_FUNC(int) _PyUnicode_ToTitleFull(
2152
Py_UCS4 ch, /* Unicode character */
2156
PyAPI_FUNC(int) _PyUnicode_ToUpperFull(
2157
Py_UCS4 ch, /* Unicode character */
2161
PyAPI_FUNC(int) _PyUnicode_ToFoldedFull(
2162
Py_UCS4 ch, /* Unicode character */
2166
PyAPI_FUNC(int) _PyUnicode_IsCaseIgnorable(
2167
Py_UCS4 ch /* Unicode character */
2170
PyAPI_FUNC(int) _PyUnicode_IsCased(
2171
Py_UCS4 ch /* Unicode character */
2174
PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
2175
Py_UCS4 ch /* Unicode character */
2178
PyAPI_FUNC(int) _PyUnicode_ToDigit(
2179
Py_UCS4 ch /* Unicode character */
2182
PyAPI_FUNC(double) _PyUnicode_ToNumeric(
2183
Py_UCS4 ch /* Unicode character */
2186
PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
2187
Py_UCS4 ch /* Unicode character */
2190
PyAPI_FUNC(int) _PyUnicode_IsDigit(
2191
Py_UCS4 ch /* Unicode character */
2194
PyAPI_FUNC(int) _PyUnicode_IsNumeric(
2195
Py_UCS4 ch /* Unicode character */
2198
PyAPI_FUNC(int) _PyUnicode_IsPrintable(
2199
Py_UCS4 ch /* Unicode character */
2202
PyAPI_FUNC(int) _PyUnicode_IsAlpha(
2203
Py_UCS4 ch /* Unicode character */
2206
PyAPI_FUNC(size_t) Py_UNICODE_strlen(
2210
PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcpy(
2212
const Py_UNICODE *s2);
2214
PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcat(
2215
Py_UNICODE *s1, const Py_UNICODE *s2);
2217
PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strncpy(
2219
const Py_UNICODE *s2,
2222
PyAPI_FUNC(int) Py_UNICODE_strcmp(
2223
const Py_UNICODE *s1,
2224
const Py_UNICODE *s2
2227
PyAPI_FUNC(int) Py_UNICODE_strncmp(
2228
const Py_UNICODE *s1,
2229
const Py_UNICODE *s2,
2233
PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strchr(
2234
const Py_UNICODE *s,
2238
PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strrchr(
2239
const Py_UNICODE *s,
2243
/* Create a copy of a unicode string ending with a nul character. Return NULL
2244
and raise a MemoryError exception on memory allocation failure, otherwise
2245
return a new allocated buffer (use PyMem_Free() to free the buffer). */
2247
PyAPI_FUNC(Py_UNICODE*) PyUnicode_AsUnicodeCopy(
2250
#endif /* Py_LIMITED_API */
2252
#if defined(Py_DEBUG) && !defined(Py_LIMITED_API)
2253
PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
2258
/* Return an interned Unicode object for an Identifier; may fail if there is no memory.*/
2259
PyAPI_FUNC(PyObject*) _PyUnicode_FromId(_Py_Identifier*);
2260
/* Clear all static strings. */
2261
PyAPI_FUNC(void) _PyUnicode_ClearStaticStrings(void);
2266
#endif /* !Py_UNICODEOBJECT_H */