145
145
/* --- String codecs ------------------------------------------------------ */
146
146
static PyObject *
147
147
escape_decode(PyObject *self,
150
150
const char *errors = NULL;
151
151
const char *data;
154
154
if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
155
&data, &size, &errors))
155
&data, &size, &errors))
157
157
return codec_tuple(PyBytes_DecodeEscape(data, size, errors, 0, NULL),
161
161
static PyObject *
162
162
escape_encode(PyObject *self,
165
static const char *hexdigits = "0123456789abcdef";
169
const char *errors = NULL;
172
if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
173
&PyBytes_Type, &str, &errors))
176
size = PyBytes_GET_SIZE(str);
178
if (newsize > PY_SSIZE_T_MAX || newsize / 4 != size) {
179
PyErr_SetString(PyExc_OverflowError,
180
"string is too large to encode");
183
v = PyBytes_FromStringAndSize(NULL, newsize);
189
register Py_ssize_t i;
191
register char *p = PyBytes_AS_STRING(v);
193
for (i = 0; i < size; i++) {
194
/* There's at least enough room for a hex escape */
195
assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4);
196
c = PyBytes_AS_STRING(str)[i];
197
if (c == '\'' || c == '\\')
198
*p++ = '\\', *p++ = c;
200
*p++ = '\\', *p++ = 't';
202
*p++ = '\\', *p++ = 'n';
204
*p++ = '\\', *p++ = 'r';
205
else if (c < ' ' || c >= 0x7f) {
208
*p++ = hexdigits[(c & 0xf0) >> 4];
209
*p++ = hexdigits[c & 0xf];
215
if (_PyBytes_Resize(&v, (p - PyBytes_AS_STRING(v)))) {
220
return codec_tuple(v, PyBytes_Size(v));
165
static const char *hexdigits = "0123456789abcdef";
169
const char *errors = NULL;
172
if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
173
&PyBytes_Type, &str, &errors))
176
size = PyBytes_GET_SIZE(str);
178
if (newsize > PY_SSIZE_T_MAX || newsize / 4 != size) {
179
PyErr_SetString(PyExc_OverflowError,
180
"string is too large to encode");
183
v = PyBytes_FromStringAndSize(NULL, newsize);
189
register Py_ssize_t i;
191
register char *p = PyBytes_AS_STRING(v);
193
for (i = 0; i < size; i++) {
194
/* There's at least enough room for a hex escape */
195
assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4);
196
c = PyBytes_AS_STRING(str)[i];
197
if (c == '\'' || c == '\\')
198
*p++ = '\\', *p++ = c;
200
*p++ = '\\', *p++ = 't';
202
*p++ = '\\', *p++ = 'n';
204
*p++ = '\\', *p++ = 'r';
205
else if (c < ' ' || c >= 0x7f) {
208
*p++ = hexdigits[(c & 0xf0) >> 4];
209
*p++ = hexdigits[c & 0xf];
215
if (_PyBytes_Resize(&v, (p - PyBytes_AS_STRING(v)))) {
220
return codec_tuple(v, size);
223
223
/* --- Decoder ------------------------------------------------------------ */
225
225
static PyObject *
226
226
unicode_internal_decode(PyObject *self,
230
230
const char *errors = NULL;
503
503
static PyObject *
504
504
unicode_escape_decode(PyObject *self,
508
508
const char *errors = NULL;
511
511
if (!PyArg_ParseTuple(args, "s*|z:unicode_escape_decode",
515
unicode = PyUnicode_DecodeUnicodeEscape(pbuf.buf, pbuf.len, errors);
516
PyBuffer_Release(&pbuf);
517
return codec_tuple(unicode, pbuf.len);
515
unicode = PyUnicode_DecodeUnicodeEscape(pbuf.buf, pbuf.len, errors);
516
PyBuffer_Release(&pbuf);
517
return codec_tuple(unicode, pbuf.len);
520
520
static PyObject *
521
521
raw_unicode_escape_decode(PyObject *self,
525
525
const char *errors = NULL;
528
528
if (!PyArg_ParseTuple(args, "s*|z:raw_unicode_escape_decode",
532
unicode = PyUnicode_DecodeRawUnicodeEscape(pbuf.buf, pbuf.len, errors);
533
PyBuffer_Release(&pbuf);
534
return codec_tuple(unicode, pbuf.len);
532
unicode = PyUnicode_DecodeRawUnicodeEscape(pbuf.buf, pbuf.len, errors);
533
PyBuffer_Release(&pbuf);
534
return codec_tuple(unicode, pbuf.len);
537
537
static PyObject *
538
538
latin_1_decode(PyObject *self,
543
543
const char *errors = NULL;
545
545
if (!PyArg_ParseTuple(args, "y*|z:latin_1_decode",
549
unicode = PyUnicode_DecodeLatin1(pbuf.buf, pbuf.len, errors);
550
PyBuffer_Release(&pbuf);
551
return codec_tuple(unicode, pbuf.len);
549
unicode = PyUnicode_DecodeLatin1(pbuf.buf, pbuf.len, errors);
550
PyBuffer_Release(&pbuf);
551
return codec_tuple(unicode, pbuf.len);
554
554
static PyObject *
555
555
ascii_decode(PyObject *self,
560
560
const char *errors = NULL;
562
562
if (!PyArg_ParseTuple(args, "y*|z:ascii_decode",
566
unicode = PyUnicode_DecodeASCII(pbuf.buf, pbuf.len, errors);
567
PyBuffer_Release(&pbuf);
568
return codec_tuple(unicode, pbuf.len);
566
unicode = PyUnicode_DecodeASCII(pbuf.buf, pbuf.len, errors);
567
PyBuffer_Release(&pbuf);
568
return codec_tuple(unicode, pbuf.len);
571
571
static PyObject *
572
572
charmap_decode(PyObject *self,
577
577
const char *errors = NULL;
578
578
PyObject *mapping = NULL;
580
580
if (!PyArg_ParseTuple(args, "y*|zO:charmap_decode",
581
&pbuf, &errors, &mapping))
581
&pbuf, &errors, &mapping))
583
583
if (mapping == Py_None)
586
unicode = PyUnicode_DecodeCharmap(pbuf.buf, pbuf.len, mapping, errors);
587
PyBuffer_Release(&pbuf);
588
return codec_tuple(unicode, pbuf.len);
586
unicode = PyUnicode_DecodeCharmap(pbuf.buf, pbuf.len, mapping, errors);
587
PyBuffer_Release(&pbuf);
588
return codec_tuple(unicode, pbuf.len);
591
591
#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
593
593
static PyObject *
594
594
mbcs_decode(PyObject *self,
598
598
const char *errors = NULL;
600
600
Py_ssize_t consumed;
601
601
PyObject *decoded = NULL;
603
603
if (!PyArg_ParseTuple(args, "y*|zi:mbcs_decode",
604
&pbuf, &errors, &final))
604
&pbuf, &errors, &final))
606
606
consumed = pbuf.len;
608
608
decoded = PyUnicode_DecodeMBCSStateful(pbuf.buf, pbuf.len, errors,
609
final ? NULL : &consumed);
610
PyBuffer_Release(&pbuf);
609
final ? NULL : &consumed);
610
PyBuffer_Release(&pbuf);
611
611
if (decoded == NULL)
613
613
return codec_tuple(decoded, consumed);
812
812
static PyObject *
813
813
utf_32_encode(PyObject *self,
816
816
PyObject *str, *v;
817
817
const char *errors = NULL;
818
818
int byteorder = 0;
820
820
if (!PyArg_ParseTuple(args, "O|zi:utf_32_encode",
821
&str, &errors, &byteorder))
821
&str, &errors, &byteorder))
824
824
str = PyUnicode_FromObject(str);
827
827
v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
828
PyUnicode_GET_SIZE(str),
831
PyUnicode_GET_SIZE(str));
828
PyUnicode_GET_SIZE(str),
831
PyUnicode_GET_SIZE(str));
836
836
static PyObject *
837
837
utf_32_le_encode(PyObject *self,
840
840
PyObject *str, *v;
841
841
const char *errors = NULL;
843
843
if (!PyArg_ParseTuple(args, "O|z:utf_32_le_encode",
847
847
str = PyUnicode_FromObject(str);
850
850
v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
851
PyUnicode_GET_SIZE(str),
854
PyUnicode_GET_SIZE(str));
851
PyUnicode_GET_SIZE(str),
854
PyUnicode_GET_SIZE(str));
859
859
static PyObject *
860
860
utf_32_be_encode(PyObject *self,
863
863
PyObject *str, *v;
864
864
const char *errors = NULL;
866
866
if (!PyArg_ParseTuple(args, "O|z:utf_32_be_encode",
870
870
str = PyUnicode_FromObject(str);
873
873
v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
874
PyUnicode_GET_SIZE(str),
877
PyUnicode_GET_SIZE(str));
874
PyUnicode_GET_SIZE(str),
877
PyUnicode_GET_SIZE(str));
882
882
static PyObject *
883
883
unicode_escape_encode(PyObject *self,
886
886
PyObject *str, *v;
887
887
const char *errors = NULL;
889
889
if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
893
893
str = PyUnicode_FromObject(str);
896
896
v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str),
897
PyUnicode_GET_SIZE(str)),
898
PyUnicode_GET_SIZE(str));
897
PyUnicode_GET_SIZE(str)),
898
PyUnicode_GET_SIZE(str));
903
903
static PyObject *
904
904
raw_unicode_escape_encode(PyObject *self,
907
907
PyObject *str, *v;
908
908
const char *errors = NULL;
910
910
if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
914
914
str = PyUnicode_FromObject(str);
917
917
v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
918
PyUnicode_AS_UNICODE(str),
919
PyUnicode_GET_SIZE(str)),
920
PyUnicode_GET_SIZE(str));
918
PyUnicode_AS_UNICODE(str),
919
PyUnicode_GET_SIZE(str)),
920
PyUnicode_GET_SIZE(str));
925
925
static PyObject *
926
926
latin_1_encode(PyObject *self,
929
929
PyObject *str, *v;
930
930
const char *errors = NULL;
932
932
if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
936
936
str = PyUnicode_FromObject(str);
939
939
v = codec_tuple(PyUnicode_EncodeLatin1(
940
PyUnicode_AS_UNICODE(str),
941
PyUnicode_GET_SIZE(str),
943
PyUnicode_GET_SIZE(str));
940
PyUnicode_AS_UNICODE(str),
941
PyUnicode_GET_SIZE(str),
943
PyUnicode_GET_SIZE(str));
948
948
static PyObject *
949
949
ascii_encode(PyObject *self,
952
952
PyObject *str, *v;
953
953
const char *errors = NULL;
955
955
if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
959
959
str = PyUnicode_FromObject(str);
962
962
v = codec_tuple(PyUnicode_EncodeASCII(
963
PyUnicode_AS_UNICODE(str),
964
PyUnicode_GET_SIZE(str),
966
PyUnicode_GET_SIZE(str));
963
PyUnicode_AS_UNICODE(str),
964
PyUnicode_GET_SIZE(str),
966
PyUnicode_GET_SIZE(str));
971
971
static PyObject *
972
972
charmap_encode(PyObject *self,
975
975
PyObject *str, *v;
976
976
const char *errors = NULL;
977
977
PyObject *mapping = NULL;
979
979
if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
980
&str, &errors, &mapping))
980
&str, &errors, &mapping))
982
982
if (mapping == Py_None)
985
985
str = PyUnicode_FromObject(str);
988
988
v = codec_tuple(PyUnicode_EncodeCharmap(
989
PyUnicode_AS_UNICODE(str),
990
PyUnicode_GET_SIZE(str),
993
PyUnicode_GET_SIZE(str));
989
PyUnicode_AS_UNICODE(str),
990
PyUnicode_GET_SIZE(str),
993
PyUnicode_GET_SIZE(str));
1066
1066
const char *name;
1068
1068
if (!PyArg_ParseTuple(args, "s:lookup_error",
1071
1071
return PyCodec_LookupError(name);
1074
1074
/* --- Module API --------------------------------------------------------- */
1076
1076
static PyMethodDef _codecs_functions[] = {
1077
{"register", codec_register, METH_O,
1077
{"register", codec_register, METH_O,
1078
1078
register__doc__},
1079
{"lookup", codec_lookup, METH_VARARGS,
1079
{"lookup", codec_lookup, METH_VARARGS,
1080
1080
lookup__doc__},
1081
{"encode", codec_encode, METH_VARARGS,
1083
{"decode", codec_decode, METH_VARARGS,
1085
{"escape_encode", escape_encode, METH_VARARGS},
1086
{"escape_decode", escape_decode, METH_VARARGS},
1087
{"utf_8_encode", utf_8_encode, METH_VARARGS},
1088
{"utf_8_decode", utf_8_decode, METH_VARARGS},
1089
{"utf_7_encode", utf_7_encode, METH_VARARGS},
1090
{"utf_7_decode", utf_7_decode, METH_VARARGS},
1091
{"utf_16_encode", utf_16_encode, METH_VARARGS},
1092
{"utf_16_le_encode", utf_16_le_encode, METH_VARARGS},
1093
{"utf_16_be_encode", utf_16_be_encode, METH_VARARGS},
1094
{"utf_16_decode", utf_16_decode, METH_VARARGS},
1095
{"utf_16_le_decode", utf_16_le_decode, METH_VARARGS},
1096
{"utf_16_be_decode", utf_16_be_decode, METH_VARARGS},
1097
{"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS},
1098
{"utf_32_encode", utf_32_encode, METH_VARARGS},
1099
{"utf_32_le_encode", utf_32_le_encode, METH_VARARGS},
1100
{"utf_32_be_encode", utf_32_be_encode, METH_VARARGS},
1101
{"utf_32_decode", utf_32_decode, METH_VARARGS},
1102
{"utf_32_le_decode", utf_32_le_decode, METH_VARARGS},
1103
{"utf_32_be_decode", utf_32_be_decode, METH_VARARGS},
1104
{"utf_32_ex_decode", utf_32_ex_decode, METH_VARARGS},
1105
{"unicode_escape_encode", unicode_escape_encode, METH_VARARGS},
1106
{"unicode_escape_decode", unicode_escape_decode, METH_VARARGS},
1107
{"unicode_internal_encode", unicode_internal_encode, METH_VARARGS},
1108
{"unicode_internal_decode", unicode_internal_decode, METH_VARARGS},
1109
{"raw_unicode_escape_encode", raw_unicode_escape_encode, METH_VARARGS},
1110
{"raw_unicode_escape_decode", raw_unicode_escape_decode, METH_VARARGS},
1111
{"latin_1_encode", latin_1_encode, METH_VARARGS},
1112
{"latin_1_decode", latin_1_decode, METH_VARARGS},
1113
{"ascii_encode", ascii_encode, METH_VARARGS},
1114
{"ascii_decode", ascii_decode, METH_VARARGS},
1115
{"charmap_encode", charmap_encode, METH_VARARGS},
1116
{"charmap_decode", charmap_decode, METH_VARARGS},
1117
{"charmap_build", charmap_build, METH_VARARGS},
1118
{"readbuffer_encode", readbuffer_encode, METH_VARARGS},
1119
{"charbuffer_encode", charbuffer_encode, METH_VARARGS},
1081
{"encode", codec_encode, METH_VARARGS,
1083
{"decode", codec_decode, METH_VARARGS,
1085
{"escape_encode", escape_encode, METH_VARARGS},
1086
{"escape_decode", escape_decode, METH_VARARGS},
1087
{"utf_8_encode", utf_8_encode, METH_VARARGS},
1088
{"utf_8_decode", utf_8_decode, METH_VARARGS},
1089
{"utf_7_encode", utf_7_encode, METH_VARARGS},
1090
{"utf_7_decode", utf_7_decode, METH_VARARGS},
1091
{"utf_16_encode", utf_16_encode, METH_VARARGS},
1092
{"utf_16_le_encode", utf_16_le_encode, METH_VARARGS},
1093
{"utf_16_be_encode", utf_16_be_encode, METH_VARARGS},
1094
{"utf_16_decode", utf_16_decode, METH_VARARGS},
1095
{"utf_16_le_decode", utf_16_le_decode, METH_VARARGS},
1096
{"utf_16_be_decode", utf_16_be_decode, METH_VARARGS},
1097
{"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS},
1098
{"utf_32_encode", utf_32_encode, METH_VARARGS},
1099
{"utf_32_le_encode", utf_32_le_encode, METH_VARARGS},
1100
{"utf_32_be_encode", utf_32_be_encode, METH_VARARGS},
1101
{"utf_32_decode", utf_32_decode, METH_VARARGS},
1102
{"utf_32_le_decode", utf_32_le_decode, METH_VARARGS},
1103
{"utf_32_be_decode", utf_32_be_decode, METH_VARARGS},
1104
{"utf_32_ex_decode", utf_32_ex_decode, METH_VARARGS},
1105
{"unicode_escape_encode", unicode_escape_encode, METH_VARARGS},
1106
{"unicode_escape_decode", unicode_escape_decode, METH_VARARGS},
1107
{"unicode_internal_encode", unicode_internal_encode, METH_VARARGS},
1108
{"unicode_internal_decode", unicode_internal_decode, METH_VARARGS},
1109
{"raw_unicode_escape_encode", raw_unicode_escape_encode, METH_VARARGS},
1110
{"raw_unicode_escape_decode", raw_unicode_escape_decode, METH_VARARGS},
1111
{"latin_1_encode", latin_1_encode, METH_VARARGS},
1112
{"latin_1_decode", latin_1_decode, METH_VARARGS},
1113
{"ascii_encode", ascii_encode, METH_VARARGS},
1114
{"ascii_decode", ascii_decode, METH_VARARGS},
1115
{"charmap_encode", charmap_encode, METH_VARARGS},
1116
{"charmap_decode", charmap_decode, METH_VARARGS},
1117
{"charmap_build", charmap_build, METH_VARARGS},
1118
{"readbuffer_encode", readbuffer_encode, METH_VARARGS},
1119
{"charbuffer_encode", charbuffer_encode, METH_VARARGS},
1120
1120
#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
1121
{"mbcs_encode", mbcs_encode, METH_VARARGS},
1122
{"mbcs_decode", mbcs_decode, METH_VARARGS},
1121
{"mbcs_encode", mbcs_encode, METH_VARARGS},
1122
{"mbcs_decode", mbcs_decode, METH_VARARGS},
1124
{"register_error", register_error, METH_VARARGS,
1124
{"register_error", register_error, METH_VARARGS,
1125
1125
register_error__doc__},
1126
{"lookup_error", lookup_error, METH_VARARGS,
1126
{"lookup_error", lookup_error, METH_VARARGS,
1127
1127
lookup_error__doc__},
1128
{NULL, NULL} /* sentinel */
1128
{NULL, NULL} /* sentinel */
1131
1131
static struct PyModuleDef codecsmodule = {
1132
PyModuleDef_HEAD_INIT,
1132
PyModuleDef_HEAD_INIT,
1144
1144
PyInit__codecs(void)
1146
return PyModule_Create(&codecsmodule);
1146
return PyModule_Create(&codecsmodule);