1
/* PyByteArray (bytearray) implementation */
3
#define PY_SSIZE_T_CLEAN
5
#include "structmember.h"
6
#include "bytes_methods.h"
8
static PyByteArrayObject *nullbytes = NULL;
11
PyByteArray_Fini(void)
17
PyByteArray_Init(void)
19
nullbytes = PyObject_New(PyByteArrayObject, &PyByteArray_Type);
20
if (nullbytes == NULL)
22
nullbytes->ob_bytes = NULL;
23
Py_SIZE(nullbytes) = nullbytes->ob_alloc = 0;
24
nullbytes->ob_exports = 0;
28
/* end nullbytes support */
33
_getbytevalue(PyObject* arg, int *value)
37
if (PyLong_Check(arg)) {
38
face_value = PyLong_AsLong(arg);
40
PyObject *index = PyNumber_Index(arg);
42
PyErr_Format(PyExc_TypeError, "an integer is required");
45
face_value = PyLong_AsLong(index);
49
if (face_value < 0 || face_value >= 256) {
50
/* this includes the OverflowError in case the long is too large */
51
PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
60
bytes_getbuffer(PyByteArrayObject *obj, Py_buffer *view, int flags)
68
if (obj->ob_bytes == NULL)
72
ret = PyBuffer_FillInfo(view, (PyObject*)obj, ptr, Py_SIZE(obj), 0, flags);
80
bytes_releasebuffer(PyByteArrayObject *obj, Py_buffer *view)
86
_getbuffer(PyObject *obj, Py_buffer *view)
88
PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
90
if (buffer == NULL || buffer->bf_getbuffer == NULL)
92
PyErr_Format(PyExc_TypeError,
93
"Type %.100s doesn't support the buffer API",
94
Py_TYPE(obj)->tp_name);
98
if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
104
_canresize(PyByteArrayObject *self)
106
if (self->ob_exports > 0) {
107
PyErr_SetString(PyExc_BufferError,
108
"Existing exports of data: object cannot be re-sized");
114
/* Direct API functions */
117
PyByteArray_FromObject(PyObject *input)
119
return PyObject_CallFunctionObjArgs((PyObject *)&PyByteArray_Type,
124
PyByteArray_FromStringAndSize(const char *bytes, Py_ssize_t size)
126
PyByteArrayObject *new;
130
PyErr_SetString(PyExc_SystemError,
131
"Negative size passed to PyByteArray_FromStringAndSize");
135
/* Prevent buffer overflow when setting alloc to size+1. */
136
if (size == PY_SSIZE_T_MAX) {
137
return PyErr_NoMemory();
140
new = PyObject_New(PyByteArrayObject, &PyByteArray_Type);
145
new->ob_bytes = NULL;
150
new->ob_bytes = PyMem_Malloc(alloc);
151
if (new->ob_bytes == NULL) {
153
return PyErr_NoMemory();
156
memcpy(new->ob_bytes, bytes, size);
157
new->ob_bytes[size] = '\0'; /* Trailing null byte */
160
new->ob_alloc = alloc;
163
return (PyObject *)new;
167
PyByteArray_Size(PyObject *self)
169
assert(self != NULL);
170
assert(PyByteArray_Check(self));
172
return PyByteArray_GET_SIZE(self);
176
PyByteArray_AsString(PyObject *self)
178
assert(self != NULL);
179
assert(PyByteArray_Check(self));
181
return PyByteArray_AS_STRING(self);
185
PyByteArray_Resize(PyObject *self, Py_ssize_t size)
188
Py_ssize_t alloc = ((PyByteArrayObject *)self)->ob_alloc;
190
assert(self != NULL);
191
assert(PyByteArray_Check(self));
194
if (size == Py_SIZE(self)) {
197
if (!_canresize((PyByteArrayObject *)self)) {
201
if (size < alloc / 2) {
202
/* Major downsize; resize down to exact size */
205
else if (size < alloc) {
206
/* Within allocated size; quick exit */
207
Py_SIZE(self) = size;
208
((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null */
211
else if (size <= alloc * 1.125) {
212
/* Moderate upsize; overallocate similar to list_resize() */
213
alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
216
/* Major upsize; resize up to exact size */
220
sval = PyMem_Realloc(((PyByteArrayObject *)self)->ob_bytes, alloc);
226
((PyByteArrayObject *)self)->ob_bytes = sval;
227
Py_SIZE(self) = size;
228
((PyByteArrayObject *)self)->ob_alloc = alloc;
229
((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
235
PyByteArray_Concat(PyObject *a, PyObject *b)
239
PyByteArrayObject *result = NULL;
243
if (_getbuffer(a, &va) < 0 ||
244
_getbuffer(b, &vb) < 0) {
245
PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
246
Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
250
size = va.len + vb.len;
256
result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, size);
257
if (result != NULL) {
258
memcpy(result->ob_bytes, va.buf, va.len);
259
memcpy(result->ob_bytes + va.len, vb.buf, vb.len);
264
PyBuffer_Release(&va);
266
PyBuffer_Release(&vb);
267
return (PyObject *)result;
270
/* Functions stuffed into the type object */
273
bytes_length(PyByteArrayObject *self)
275
return Py_SIZE(self);
279
bytes_iconcat(PyByteArrayObject *self, PyObject *other)
285
if (_getbuffer(other, &vo) < 0) {
286
PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
287
Py_TYPE(other)->tp_name, Py_TYPE(self)->tp_name);
291
mysize = Py_SIZE(self);
292
size = mysize + vo.len;
294
PyBuffer_Release(&vo);
295
return PyErr_NoMemory();
297
if (size < self->ob_alloc) {
298
Py_SIZE(self) = size;
299
self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
301
else if (PyByteArray_Resize((PyObject *)self, size) < 0) {
302
PyBuffer_Release(&vo);
305
memcpy(self->ob_bytes + mysize, vo.buf, vo.len);
306
PyBuffer_Release(&vo);
308
return (PyObject *)self;
312
bytes_repeat(PyByteArrayObject *self, Py_ssize_t count)
314
PyByteArrayObject *result;
320
mysize = Py_SIZE(self);
321
size = mysize * count;
322
if (count != 0 && size / count != mysize)
323
return PyErr_NoMemory();
324
result = (PyByteArrayObject *)PyByteArray_FromStringAndSize(NULL, size);
325
if (result != NULL && size != 0) {
327
memset(result->ob_bytes, self->ob_bytes[0], size);
330
for (i = 0; i < count; i++)
331
memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
334
return (PyObject *)result;
338
bytes_irepeat(PyByteArrayObject *self, Py_ssize_t count)
345
mysize = Py_SIZE(self);
346
size = mysize * count;
347
if (count != 0 && size / count != mysize)
348
return PyErr_NoMemory();
349
if (size < self->ob_alloc) {
350
Py_SIZE(self) = size;
351
self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
353
else if (PyByteArray_Resize((PyObject *)self, size) < 0)
357
memset(self->ob_bytes, self->ob_bytes[0], size);
360
for (i = 1; i < count; i++)
361
memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
365
return (PyObject *)self;
369
bytes_getitem(PyByteArrayObject *self, Py_ssize_t i)
373
if (i < 0 || i >= Py_SIZE(self)) {
374
PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
377
return PyLong_FromLong((unsigned char)(self->ob_bytes[i]));
381
bytes_subscript(PyByteArrayObject *self, PyObject *index)
383
if (PyIndex_Check(index)) {
384
Py_ssize_t i = PyNumber_AsSsize_t(index, PyExc_IndexError);
386
if (i == -1 && PyErr_Occurred())
390
i += PyByteArray_GET_SIZE(self);
392
if (i < 0 || i >= Py_SIZE(self)) {
393
PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
396
return PyLong_FromLong((unsigned char)(self->ob_bytes[i]));
398
else if (PySlice_Check(index)) {
399
Py_ssize_t start, stop, step, slicelength, cur, i;
400
if (PySlice_GetIndicesEx((PySliceObject *)index,
401
PyByteArray_GET_SIZE(self),
402
&start, &stop, &step, &slicelength) < 0) {
406
if (slicelength <= 0)
407
return PyByteArray_FromStringAndSize("", 0);
408
else if (step == 1) {
409
return PyByteArray_FromStringAndSize(self->ob_bytes + start,
413
char *source_buf = PyByteArray_AS_STRING(self);
414
char *result_buf = (char *)PyMem_Malloc(slicelength);
417
if (result_buf == NULL)
418
return PyErr_NoMemory();
420
for (cur = start, i = 0; i < slicelength;
422
result_buf[i] = source_buf[cur];
424
result = PyByteArray_FromStringAndSize(result_buf, slicelength);
425
PyMem_Free(result_buf);
430
PyErr_SetString(PyExc_TypeError, "bytearray indices must be integers");
436
bytes_setslice(PyByteArrayObject *self, Py_ssize_t lo, Py_ssize_t hi,
439
Py_ssize_t avail, needed;
445
if (values == (PyObject *)self) {
446
/* Make a copy and call this function recursively */
448
values = PyByteArray_FromObject(values);
451
err = bytes_setslice(self, lo, hi, values);
455
if (values == NULL) {
461
if (_getbuffer(values, &vbytes) < 0) {
462
PyErr_Format(PyExc_TypeError,
463
"can't set bytearray slice from %.100s",
464
Py_TYPE(values)->tp_name);
475
if (hi > Py_SIZE(self))
482
if (avail != needed) {
483
if (avail > needed) {
484
if (!_canresize(self)) {
490
| |<----avail----->|<-----tomove------>|
491
| |<-needed->|<-----tomove------>|
494
memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
497
/* XXX(nnorwitz): need to verify this can't overflow! */
498
if (PyByteArray_Resize((PyObject *)self,
499
Py_SIZE(self) + needed - avail) < 0) {
503
if (avail < needed) {
506
| |<-avail->|<-----tomove------>|
507
| |<----needed---->|<-----tomove------>|
510
memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
511
Py_SIZE(self) - lo - needed);
516
memcpy(self->ob_bytes + lo, bytes, needed);
520
if (vbytes.len != -1)
521
PyBuffer_Release(&vbytes);
526
bytes_setitem(PyByteArrayObject *self, Py_ssize_t i, PyObject *value)
533
if (i < 0 || i >= Py_SIZE(self)) {
534
PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
539
return bytes_setslice(self, i, i+1, NULL);
541
if (!_getbytevalue(value, &ival))
544
self->ob_bytes[i] = ival;
549
bytes_ass_subscript(PyByteArrayObject *self, PyObject *index, PyObject *values)
551
Py_ssize_t start, stop, step, slicelen, needed;
554
if (PyIndex_Check(index)) {
555
Py_ssize_t i = PyNumber_AsSsize_t(index, PyExc_IndexError);
557
if (i == -1 && PyErr_Occurred())
561
i += PyByteArray_GET_SIZE(self);
563
if (i < 0 || i >= Py_SIZE(self)) {
564
PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
568
if (values == NULL) {
569
/* Fall through to slice assignment */
577
if (!_getbytevalue(values, &ival))
579
self->ob_bytes[i] = (char)ival;
583
else if (PySlice_Check(index)) {
584
if (PySlice_GetIndicesEx((PySliceObject *)index,
585
PyByteArray_GET_SIZE(self),
586
&start, &stop, &step, &slicelen) < 0) {
591
PyErr_SetString(PyExc_TypeError, "bytearray indices must be integer");
595
if (values == NULL) {
599
else if (values == (PyObject *)self || !PyByteArray_Check(values)) {
600
/* Make a copy an call this function recursively */
602
values = PyByteArray_FromObject(values);
605
err = bytes_ass_subscript(self, index, values);
610
assert(PyByteArray_Check(values));
611
bytes = ((PyByteArrayObject *)values)->ob_bytes;
612
needed = Py_SIZE(values);
614
/* Make sure b[5:2] = ... inserts before 5, not before 2. */
615
if ((step < 0 && start < stop) ||
616
(step > 0 && start > stop))
619
if (slicelen != needed) {
620
if (!_canresize(self))
622
if (slicelen > needed) {
624
0 start stop old_size
625
| |<---slicelen--->|<-----tomove------>|
626
| |<-needed->|<-----tomove------>|
629
memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
630
Py_SIZE(self) - stop);
632
if (PyByteArray_Resize((PyObject *)self,
633
Py_SIZE(self) + needed - slicelen) < 0)
635
if (slicelen < needed) {
638
| |<-avail->|<-----tomove------>|
639
| |<----needed---->|<-----tomove------>|
642
memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
643
Py_SIZE(self) - start - needed);
648
memcpy(self->ob_bytes + start, bytes, needed);
657
if (!_canresize(self))
661
start = stop + step * (slicelen - 1) - 1;
664
for (cur = start, i = 0;
665
i < slicelen; cur += step, i++) {
666
Py_ssize_t lim = step - 1;
668
if (cur + step >= PyByteArray_GET_SIZE(self))
669
lim = PyByteArray_GET_SIZE(self) - cur - 1;
671
memmove(self->ob_bytes + cur - i,
672
self->ob_bytes + cur + 1, lim);
674
/* Move the tail of the bytes, in one chunk */
675
cur = start + slicelen*step;
676
if (cur < PyByteArray_GET_SIZE(self)) {
677
memmove(self->ob_bytes + cur - slicelen,
678
self->ob_bytes + cur,
679
PyByteArray_GET_SIZE(self) - cur);
681
if (PyByteArray_Resize((PyObject *)self,
682
PyByteArray_GET_SIZE(self) - slicelen) < 0)
691
if (needed != slicelen) {
692
PyErr_Format(PyExc_ValueError,
693
"attempt to assign bytes of size %zd "
694
"to extended slice of size %zd",
698
for (cur = start, i = 0; i < slicelen; cur += step, i++)
699
self->ob_bytes[cur] = bytes[i];
706
bytes_init(PyByteArrayObject *self, PyObject *args, PyObject *kwds)
708
static char *kwlist[] = {"source", "encoding", "errors", 0};
709
PyObject *arg = NULL;
710
const char *encoding = NULL;
711
const char *errors = NULL;
714
PyObject *(*iternext)(PyObject *);
716
if (Py_SIZE(self) != 0) {
717
/* Empty previous contents (yes, do this first of all!) */
718
if (PyByteArray_Resize((PyObject *)self, 0) < 0)
722
/* Parse arguments */
723
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytearray", kwlist,
724
&arg, &encoding, &errors))
727
/* Make a quick exit if no first argument */
729
if (encoding != NULL || errors != NULL) {
730
PyErr_SetString(PyExc_TypeError,
731
"encoding or errors without sequence argument");
737
if (PyUnicode_Check(arg)) {
738
/* Encode via the codec registry */
739
PyObject *encoded, *new;
740
if (encoding == NULL) {
741
PyErr_SetString(PyExc_TypeError,
742
"string argument without an encoding");
745
encoded = PyUnicode_AsEncodedString(arg, encoding, errors);
748
assert(PyBytes_Check(encoded));
749
new = bytes_iconcat(self, encoded);
757
/* If it's not unicode, there can't be encoding or errors */
758
if (encoding != NULL || errors != NULL) {
759
PyErr_SetString(PyExc_TypeError,
760
"encoding or errors without a string argument");
765
count = PyNumber_AsSsize_t(arg, PyExc_ValueError);
766
if (count == -1 && PyErr_Occurred())
770
PyErr_SetString(PyExc_ValueError, "negative count");
774
if (PyByteArray_Resize((PyObject *)self, count))
776
memset(self->ob_bytes, 0, count);
781
/* Use the buffer API */
782
if (PyObject_CheckBuffer(arg)) {
785
if (PyObject_GetBuffer(arg, &view, PyBUF_FULL_RO) < 0)
788
if (PyByteArray_Resize((PyObject *)self, size) < 0) goto fail;
789
if (PyBuffer_ToContiguous(self->ob_bytes, &view, size, 'C') < 0)
791
PyBuffer_Release(&view);
794
PyBuffer_Release(&view);
798
/* XXX Optimize this if the arguments is a list, tuple */
800
/* Get the iterator */
801
it = PyObject_GetIter(arg);
804
iternext = *Py_TYPE(it)->tp_iternext;
806
/* Run the iterator to exhaustion */
811
/* Get the next item */
814
if (PyErr_Occurred()) {
815
if (!PyErr_ExceptionMatches(PyExc_StopIteration))
822
/* Interpret it as an int (__index__) */
823
rc = _getbytevalue(item, &value);
828
/* Append the byte */
829
if (Py_SIZE(self) < self->ob_alloc)
831
else if (PyByteArray_Resize((PyObject *)self, Py_SIZE(self)+1) < 0)
833
self->ob_bytes[Py_SIZE(self)-1] = value;
836
/* Clean up and return success */
841
/* Error handling when it != NULL */
846
/* Mostly copied from string_repr, but without the
847
"smart quote" functionality. */
849
bytes_repr(PyByteArrayObject *self)
851
static const char *hexdigits = "0123456789abcdef";
852
const char *quote_prefix = "bytearray(b";
853
const char *quote_postfix = ")";
854
Py_ssize_t length = Py_SIZE(self);
855
/* 14 == strlen(quote_prefix) + 2 + strlen(quote_postfix) */
856
size_t newsize = 14 + 4 * length;
858
if (newsize > PY_SSIZE_T_MAX || newsize / 4 - 3 != length) {
859
PyErr_SetString(PyExc_OverflowError,
860
"bytearray object is too large to make repr");
863
v = PyUnicode_FromUnicode(NULL, newsize);
868
register Py_ssize_t i;
869
register Py_UNICODE c;
870
register Py_UNICODE *p;
873
/* Figure out which quote to use; single is preferred */
877
start = PyByteArray_AS_STRING(self);
878
for (test = start; test < start+length; ++test) {
880
quote = '\''; /* back to single */
883
else if (*test == '\'')
890
p = PyUnicode_AS_UNICODE(v);
891
while (*quote_prefix)
892
*p++ = *quote_prefix++;
895
for (i = 0; i < length; i++) {
896
/* There's at least enough room for a hex escape
897
and a closing quote. */
898
assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
899
c = self->ob_bytes[i];
900
if (c == '\'' || c == '\\')
901
*p++ = '\\', *p++ = c;
903
*p++ = '\\', *p++ = 't';
905
*p++ = '\\', *p++ = 'n';
907
*p++ = '\\', *p++ = 'r';
909
*p++ = '\\', *p++ = 'x', *p++ = '0', *p++ = '0';
910
else if (c < ' ' || c >= 0x7f) {
913
*p++ = hexdigits[(c & 0xf0) >> 4];
914
*p++ = hexdigits[c & 0xf];
919
assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
921
while (*quote_postfix) {
922
*p++ = *quote_postfix++;
925
if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
934
bytes_str(PyObject *op)
936
if (Py_BytesWarningFlag) {
937
if (PyErr_WarnEx(PyExc_BytesWarning,
938
"str() on a bytearray instance", 1))
941
return bytes_repr((PyByteArrayObject*)op);
945
bytes_richcompare(PyObject *self, PyObject *other, int op)
947
Py_ssize_t self_size, other_size;
948
Py_buffer self_bytes, other_bytes;
953
/* Bytes can be compared to anything that supports the (binary)
954
buffer API. Except that a comparison with Unicode is always an
955
error, even if the comparison is for equality. */
956
if (PyObject_IsInstance(self, (PyObject*)&PyUnicode_Type) ||
957
PyObject_IsInstance(other, (PyObject*)&PyUnicode_Type)) {
958
if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) {
959
if (PyErr_WarnEx(PyExc_BytesWarning,
960
"Comparison between bytearray and string", 1))
964
Py_INCREF(Py_NotImplemented);
965
return Py_NotImplemented;
968
self_size = _getbuffer(self, &self_bytes);
971
Py_INCREF(Py_NotImplemented);
972
return Py_NotImplemented;
975
other_size = _getbuffer(other, &other_bytes);
976
if (other_size < 0) {
978
PyBuffer_Release(&self_bytes);
979
Py_INCREF(Py_NotImplemented);
980
return Py_NotImplemented;
983
if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
984
/* Shortcut: if the lengths differ, the objects differ */
989
if (other_size < minsize)
990
minsize = other_size;
992
cmp = memcmp(self_bytes.buf, other_bytes.buf, minsize);
993
/* In ISO C, memcmp() guarantees to use unsigned bytes! */
996
if (self_size < other_size)
998
else if (self_size > other_size)
1003
case Py_LT: cmp = cmp < 0; break;
1004
case Py_LE: cmp = cmp <= 0; break;
1005
case Py_EQ: cmp = cmp == 0; break;
1006
case Py_NE: cmp = cmp != 0; break;
1007
case Py_GT: cmp = cmp > 0; break;
1008
case Py_GE: cmp = cmp >= 0; break;
1012
res = cmp ? Py_True : Py_False;
1013
PyBuffer_Release(&self_bytes);
1014
PyBuffer_Release(&other_bytes);
1020
bytes_dealloc(PyByteArrayObject *self)
1022
if (self->ob_exports > 0) {
1023
PyErr_SetString(PyExc_SystemError,
1024
"deallocated bytearray object has exported buffers");
1027
if (self->ob_bytes != 0) {
1028
PyMem_Free(self->ob_bytes);
1030
Py_TYPE(self)->tp_free((PyObject *)self);
1034
/* -------------------------------------------------------------------- */
1037
#define STRINGLIB_CHAR char
1038
#define STRINGLIB_CMP memcmp
1039
#define STRINGLIB_LEN PyByteArray_GET_SIZE
1040
#define STRINGLIB_STR PyByteArray_AS_STRING
1041
#define STRINGLIB_NEW PyByteArray_FromStringAndSize
1042
#define STRINGLIB_EMPTY nullbytes
1043
#define STRINGLIB_CHECK_EXACT PyByteArray_CheckExact
1044
#define STRINGLIB_MUTABLE 1
1045
#define FROM_BYTEARRAY 1
1047
#include "stringlib/fastsearch.h"
1048
#include "stringlib/count.h"
1049
#include "stringlib/find.h"
1050
#include "stringlib/partition.h"
1051
#include "stringlib/ctype.h"
1052
#include "stringlib/transmogrify.h"
1055
/* The following Py_LOCAL_INLINE and Py_LOCAL functions
1056
were copied from the old char* style string object. */
1058
Py_LOCAL_INLINE(void)
1059
_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1074
Py_LOCAL_INLINE(Py_ssize_t)
1075
bytes_find_internal(PyByteArrayObject *self, PyObject *args, int dir)
1079
Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1082
if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1083
_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1085
if (_getbuffer(subobj, &subbuf) < 0)
1088
res = stringlib_find_slice(
1089
PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
1090
subbuf.buf, subbuf.len, start, end);
1092
res = stringlib_rfind_slice(
1093
PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
1094
subbuf.buf, subbuf.len, start, end);
1095
PyBuffer_Release(&subbuf);
1099
PyDoc_STRVAR(find__doc__,
1100
"B.find(sub[, start[, end]]) -> int\n\
1102
Return the lowest index in B where subsection sub is found,\n\
1103
such that sub is contained within s[start,end]. Optional\n\
1104
arguments start and end are interpreted as in slice notation.\n\
1106
Return -1 on failure.");
1109
bytes_find(PyByteArrayObject *self, PyObject *args)
1111
Py_ssize_t result = bytes_find_internal(self, args, +1);
1114
return PyLong_FromSsize_t(result);
1117
PyDoc_STRVAR(count__doc__,
1118
"B.count(sub[, start[, end]]) -> int\n\
1120
Return the number of non-overlapping occurrences of subsection sub in\n\
1121
bytes B[start:end]. Optional arguments start and end are interpreted\n\
1122
as in slice notation.");
1125
bytes_count(PyByteArrayObject *self, PyObject *args)
1128
const char *str = PyByteArray_AS_STRING(self);
1129
Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1131
PyObject *count_obj;
1133
if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1134
_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1137
if (_getbuffer(sub_obj, &vsub) < 0)
1140
_adjust_indices(&start, &end, PyByteArray_GET_SIZE(self));
1142
count_obj = PyLong_FromSsize_t(
1143
stringlib_count(str + start, end - start, vsub.buf, vsub.len)
1145
PyBuffer_Release(&vsub);
1150
PyDoc_STRVAR(index__doc__,
1151
"B.index(sub[, start[, end]]) -> int\n\
1153
Like B.find() but raise ValueError when the subsection is not found.");
1156
bytes_index(PyByteArrayObject *self, PyObject *args)
1158
Py_ssize_t result = bytes_find_internal(self, args, +1);
1162
PyErr_SetString(PyExc_ValueError,
1163
"subsection not found");
1166
return PyLong_FromSsize_t(result);
1170
PyDoc_STRVAR(rfind__doc__,
1171
"B.rfind(sub[, start[, end]]) -> int\n\
1173
Return the highest index in B where subsection sub is found,\n\
1174
such that sub is contained within s[start,end]. Optional\n\
1175
arguments start and end are interpreted as in slice notation.\n\
1177
Return -1 on failure.");
1180
bytes_rfind(PyByteArrayObject *self, PyObject *args)
1182
Py_ssize_t result = bytes_find_internal(self, args, -1);
1185
return PyLong_FromSsize_t(result);
1189
PyDoc_STRVAR(rindex__doc__,
1190
"B.rindex(sub[, start[, end]]) -> int\n\
1192
Like B.rfind() but raise ValueError when the subsection is not found.");
1195
bytes_rindex(PyByteArrayObject *self, PyObject *args)
1197
Py_ssize_t result = bytes_find_internal(self, args, -1);
1201
PyErr_SetString(PyExc_ValueError,
1202
"subsection not found");
1205
return PyLong_FromSsize_t(result);
1210
bytes_contains(PyObject *self, PyObject *arg)
1212
Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1213
if (ival == -1 && PyErr_Occurred()) {
1217
if (_getbuffer(arg, &varg) < 0)
1219
pos = stringlib_find(PyByteArray_AS_STRING(self), Py_SIZE(self),
1220
varg.buf, varg.len, 0);
1221
PyBuffer_Release(&varg);
1224
if (ival < 0 || ival >= 256) {
1225
PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1229
return memchr(PyByteArray_AS_STRING(self), ival, Py_SIZE(self)) != NULL;
1233
/* Matches the end (direction >= 0) or start (direction < 0) of self
1234
* against substr, using the start and end arguments. Returns
1235
* -1 on error, 0 if not found and 1 if found.
1238
_bytes_tailmatch(PyByteArrayObject *self, PyObject *substr, Py_ssize_t start,
1239
Py_ssize_t end, int direction)
1241
Py_ssize_t len = PyByteArray_GET_SIZE(self);
1246
str = PyByteArray_AS_STRING(self);
1248
if (_getbuffer(substr, &vsubstr) < 0)
1251
_adjust_indices(&start, &end, len);
1253
if (direction < 0) {
1255
if (start+vsubstr.len > len) {
1260
if (end-start < vsubstr.len || start > len) {
1264
if (end-vsubstr.len > start)
1265
start = end - vsubstr.len;
1267
if (end-start >= vsubstr.len)
1268
rv = ! memcmp(str+start, vsubstr.buf, vsubstr.len);
1271
PyBuffer_Release(&vsubstr);
1276
PyDoc_STRVAR(startswith__doc__,
1277
"B.startswith(prefix[, start[, end]]) -> bool\n\
1279
Return True if B starts with the specified prefix, False otherwise.\n\
1280
With optional start, test B beginning at that position.\n\
1281
With optional end, stop comparing B at that position.\n\
1282
prefix can also be a tuple of strings to try.");
1285
bytes_startswith(PyByteArrayObject *self, PyObject *args)
1287
Py_ssize_t start = 0;
1288
Py_ssize_t end = PY_SSIZE_T_MAX;
1292
if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1293
_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1295
if (PyTuple_Check(subobj)) {
1297
for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1298
result = _bytes_tailmatch(self,
1299
PyTuple_GET_ITEM(subobj, i),
1309
result = _bytes_tailmatch(self, subobj, start, end, -1);
1313
return PyBool_FromLong(result);
1316
PyDoc_STRVAR(endswith__doc__,
1317
"B.endswith(suffix[, start[, end]]) -> bool\n\
1319
Return True if B ends with the specified suffix, False otherwise.\n\
1320
With optional start, test B beginning at that position.\n\
1321
With optional end, stop comparing B at that position.\n\
1322
suffix can also be a tuple of strings to try.");
1325
bytes_endswith(PyByteArrayObject *self, PyObject *args)
1327
Py_ssize_t start = 0;
1328
Py_ssize_t end = PY_SSIZE_T_MAX;
1332
if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1333
_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1335
if (PyTuple_Check(subobj)) {
1337
for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1338
result = _bytes_tailmatch(self,
1339
PyTuple_GET_ITEM(subobj, i),
1349
result = _bytes_tailmatch(self, subobj, start, end, +1);
1353
return PyBool_FromLong(result);
1357
PyDoc_STRVAR(translate__doc__,
1358
"B.translate(table[, deletechars]) -> bytearray\n\
1360
Return a copy of B, where all characters occurring in the\n\
1361
optional argument deletechars are removed, and the remaining\n\
1362
characters have been mapped through the given translation\n\
1363
table, which must be a bytes object of length 256.");
1366
bytes_translate(PyByteArrayObject *self, PyObject *args)
1368
register char *input, *output;
1369
register const char *table;
1370
register Py_ssize_t i, c;
1371
PyObject *input_obj = (PyObject*)self;
1372
const char *output_start;
1374
PyObject *result = NULL;
1375
int trans_table[256];
1376
PyObject *tableobj = NULL, *delobj = NULL;
1377
Py_buffer vtable, vdel;
1379
if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1380
&tableobj, &delobj))
1383
if (tableobj == Py_None) {
1386
} else if (_getbuffer(tableobj, &vtable) < 0) {
1389
if (vtable.len != 256) {
1390
PyErr_SetString(PyExc_ValueError,
1391
"translation table must be 256 characters long");
1394
table = (const char*)vtable.buf;
1397
if (delobj != NULL) {
1398
if (_getbuffer(delobj, &vdel) < 0) {
1399
delobj = NULL; /* don't try to release vdel buffer on exit */
1408
inlen = PyByteArray_GET_SIZE(input_obj);
1409
result = PyByteArray_FromStringAndSize((char *)NULL, inlen);
1412
output_start = output = PyByteArray_AsString(result);
1413
input = PyByteArray_AS_STRING(input_obj);
1415
if (vdel.len == 0 && table != NULL) {
1416
/* If no deletions are required, use faster code */
1417
for (i = inlen; --i >= 0; ) {
1418
c = Py_CHARMASK(*input++);
1419
*output++ = table[c];
1424
if (table == NULL) {
1425
for (i = 0; i < 256; i++)
1426
trans_table[i] = Py_CHARMASK(i);
1428
for (i = 0; i < 256; i++)
1429
trans_table[i] = Py_CHARMASK(table[i]);
1432
for (i = 0; i < vdel.len; i++)
1433
trans_table[(int) Py_CHARMASK( ((unsigned char*)vdel.buf)[i] )] = -1;
1435
for (i = inlen; --i >= 0; ) {
1436
c = Py_CHARMASK(*input++);
1437
if (trans_table[c] != -1)
1438
if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1441
/* Fix the size of the resulting string */
1443
PyByteArray_Resize(result, output - output_start);
1446
if (tableobj != NULL)
1447
PyBuffer_Release(&vtable);
1449
PyBuffer_Release(&vdel);
1457
/* find and count characters and substrings */
1459
#define findchar(target, target_len, c) \
1460
((char *)memchr((const void *)(target), c, target_len))
1462
/* Don't call if length < 2 */
1463
#define Py_STRING_MATCH(target, offset, pattern, length) \
1464
(target[offset] == pattern[0] && \
1465
target[offset+length-1] == pattern[length-1] && \
1466
!memcmp(target+offset+1, pattern+1, length-2) )
1469
/* Bytes ops must return a string, create a copy */
1470
Py_LOCAL(PyByteArrayObject *)
1471
return_self(PyByteArrayObject *self)
1473
/* always return a new bytearray */
1474
return (PyByteArrayObject *)PyByteArray_FromStringAndSize(
1475
PyByteArray_AS_STRING(self),
1476
PyByteArray_GET_SIZE(self));
1479
Py_LOCAL_INLINE(Py_ssize_t)
1480
countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
1483
const char *start=target;
1484
const char *end=target+target_len;
1486
while ( (start=findchar(start, end-start, c)) != NULL ) {
1488
if (count >= maxcount)
1495
Py_LOCAL(Py_ssize_t)
1496
findstring(const char *target, Py_ssize_t target_len,
1497
const char *pattern, Py_ssize_t pattern_len,
1503
start += target_len;
1507
if (end > target_len) {
1509
} else if (end < 0) {
1515
/* zero-length substrings always match at the first attempt */
1516
if (pattern_len == 0)
1517
return (direction > 0) ? start : end;
1521
if (direction < 0) {
1522
for (; end >= start; end--)
1523
if (Py_STRING_MATCH(target, end, pattern, pattern_len))
1526
for (; start <= end; start++)
1527
if (Py_STRING_MATCH(target, start, pattern, pattern_len))
1533
Py_LOCAL_INLINE(Py_ssize_t)
1534
countstring(const char *target, Py_ssize_t target_len,
1535
const char *pattern, Py_ssize_t pattern_len,
1538
int direction, Py_ssize_t maxcount)
1543
start += target_len;
1547
if (end > target_len) {
1549
} else if (end < 0) {
1555
/* zero-length substrings match everywhere */
1556
if (pattern_len == 0 || maxcount == 0) {
1557
if (target_len+1 < maxcount)
1558
return target_len+1;
1563
if (direction < 0) {
1564
for (; (end >= start); end--)
1565
if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
1567
if (--maxcount <= 0) break;
1568
end -= pattern_len-1;
1571
for (; (start <= end); start++)
1572
if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
1574
if (--maxcount <= 0)
1576
start += pattern_len-1;
1583
/* Algorithms for different cases of string replacement */
1585
/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1586
Py_LOCAL(PyByteArrayObject *)
1587
replace_interleave(PyByteArrayObject *self,
1588
const char *to_s, Py_ssize_t to_len,
1589
Py_ssize_t maxcount)
1591
char *self_s, *result_s;
1592
Py_ssize_t self_len, result_len;
1593
Py_ssize_t count, i, product;
1594
PyByteArrayObject *result;
1596
self_len = PyByteArray_GET_SIZE(self);
1598
/* 1 at the end plus 1 after every character */
1600
if (maxcount < count)
1603
/* Check for overflow */
1604
/* result_len = count * to_len + self_len; */
1605
product = count * to_len;
1606
if (product / to_len != count) {
1607
PyErr_SetString(PyExc_OverflowError,
1608
"replace string is too long");
1611
result_len = product + self_len;
1612
if (result_len < 0) {
1613
PyErr_SetString(PyExc_OverflowError,
1614
"replace string is too long");
1618
if (! (result = (PyByteArrayObject *)
1619
PyByteArray_FromStringAndSize(NULL, result_len)) )
1622
self_s = PyByteArray_AS_STRING(self);
1623
result_s = PyByteArray_AS_STRING(result);
1625
/* TODO: special case single character, which doesn't need memcpy */
1627
/* Lay the first one down (guaranteed this will occur) */
1628
Py_MEMCPY(result_s, to_s, to_len);
1632
for (i=0; i<count; i++) {
1633
*result_s++ = *self_s++;
1634
Py_MEMCPY(result_s, to_s, to_len);
1638
/* Copy the rest of the original string */
1639
Py_MEMCPY(result_s, self_s, self_len-i);
1644
/* Special case for deleting a single character */
1645
/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1646
Py_LOCAL(PyByteArrayObject *)
1647
replace_delete_single_character(PyByteArrayObject *self,
1648
char from_c, Py_ssize_t maxcount)
1650
char *self_s, *result_s;
1651
char *start, *next, *end;
1652
Py_ssize_t self_len, result_len;
1654
PyByteArrayObject *result;
1656
self_len = PyByteArray_GET_SIZE(self);
1657
self_s = PyByteArray_AS_STRING(self);
1659
count = countchar(self_s, self_len, from_c, maxcount);
1661
return return_self(self);
1664
result_len = self_len - count; /* from_len == 1 */
1665
assert(result_len>=0);
1667
if ( (result = (PyByteArrayObject *)
1668
PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
1670
result_s = PyByteArray_AS_STRING(result);
1673
end = self_s + self_len;
1674
while (count-- > 0) {
1675
next = findchar(start, end-start, from_c);
1678
Py_MEMCPY(result_s, start, next-start);
1679
result_s += (next-start);
1682
Py_MEMCPY(result_s, start, end-start);
1687
/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1689
Py_LOCAL(PyByteArrayObject *)
1690
replace_delete_substring(PyByteArrayObject *self,
1691
const char *from_s, Py_ssize_t from_len,
1692
Py_ssize_t maxcount)
1694
char *self_s, *result_s;
1695
char *start, *next, *end;
1696
Py_ssize_t self_len, result_len;
1697
Py_ssize_t count, offset;
1698
PyByteArrayObject *result;
1700
self_len = PyByteArray_GET_SIZE(self);
1701
self_s = PyByteArray_AS_STRING(self);
1703
count = countstring(self_s, self_len,
1710
return return_self(self);
1713
result_len = self_len - (count * from_len);
1714
assert (result_len>=0);
1716
if ( (result = (PyByteArrayObject *)
1717
PyByteArray_FromStringAndSize(NULL, result_len)) == NULL )
1720
result_s = PyByteArray_AS_STRING(result);
1723
end = self_s + self_len;
1724
while (count-- > 0) {
1725
offset = findstring(start, end-start,
1727
0, end-start, FORWARD);
1730
next = start + offset;
1732
Py_MEMCPY(result_s, start, next-start);
1734
result_s += (next-start);
1735
start = next+from_len;
1737
Py_MEMCPY(result_s, start, end-start);
1741
/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1742
Py_LOCAL(PyByteArrayObject *)
1743
replace_single_character_in_place(PyByteArrayObject *self,
1744
char from_c, char to_c,
1745
Py_ssize_t maxcount)
1747
char *self_s, *result_s, *start, *end, *next;
1748
Py_ssize_t self_len;
1749
PyByteArrayObject *result;
1751
/* The result string will be the same size */
1752
self_s = PyByteArray_AS_STRING(self);
1753
self_len = PyByteArray_GET_SIZE(self);
1755
next = findchar(self_s, self_len, from_c);
1758
/* No matches; return the original bytes */
1759
return return_self(self);
1762
/* Need to make a new bytes */
1763
result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len);
1766
result_s = PyByteArray_AS_STRING(result);
1767
Py_MEMCPY(result_s, self_s, self_len);
1769
/* change everything in-place, starting with this one */
1770
start = result_s + (next-self_s);
1773
end = result_s + self_len;
1775
while (--maxcount > 0) {
1776
next = findchar(start, end-start, from_c);
1786
/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1787
Py_LOCAL(PyByteArrayObject *)
1788
replace_substring_in_place(PyByteArrayObject *self,
1789
const char *from_s, Py_ssize_t from_len,
1790
const char *to_s, Py_ssize_t to_len,
1791
Py_ssize_t maxcount)
1793
char *result_s, *start, *end;
1795
Py_ssize_t self_len, offset;
1796
PyByteArrayObject *result;
1798
/* The result bytes will be the same size */
1800
self_s = PyByteArray_AS_STRING(self);
1801
self_len = PyByteArray_GET_SIZE(self);
1803
offset = findstring(self_s, self_len,
1805
0, self_len, FORWARD);
1807
/* No matches; return the original bytes */
1808
return return_self(self);
1811
/* Need to make a new bytes */
1812
result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len);
1815
result_s = PyByteArray_AS_STRING(result);
1816
Py_MEMCPY(result_s, self_s, self_len);
1818
/* change everything in-place, starting with this one */
1819
start = result_s + offset;
1820
Py_MEMCPY(start, to_s, from_len);
1822
end = result_s + self_len;
1824
while ( --maxcount > 0) {
1825
offset = findstring(start, end-start,
1827
0, end-start, FORWARD);
1830
Py_MEMCPY(start+offset, to_s, from_len);
1831
start += offset+from_len;
1837
/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1838
Py_LOCAL(PyByteArrayObject *)
1839
replace_single_character(PyByteArrayObject *self,
1841
const char *to_s, Py_ssize_t to_len,
1842
Py_ssize_t maxcount)
1844
char *self_s, *result_s;
1845
char *start, *next, *end;
1846
Py_ssize_t self_len, result_len;
1847
Py_ssize_t count, product;
1848
PyByteArrayObject *result;
1850
self_s = PyByteArray_AS_STRING(self);
1851
self_len = PyByteArray_GET_SIZE(self);
1853
count = countchar(self_s, self_len, from_c, maxcount);
1855
/* no matches, return unchanged */
1856
return return_self(self);
1859
/* use the difference between current and new, hence the "-1" */
1860
/* result_len = self_len + count * (to_len-1) */
1861
product = count * (to_len-1);
1862
if (product / (to_len-1) != count) {
1863
PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1866
result_len = self_len + product;
1867
if (result_len < 0) {
1868
PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1872
if ( (result = (PyByteArrayObject *)
1873
PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
1875
result_s = PyByteArray_AS_STRING(result);
1878
end = self_s + self_len;
1879
while (count-- > 0) {
1880
next = findchar(start, end-start, from_c);
1884
if (next == start) {
1885
/* replace with the 'to' */
1886
Py_MEMCPY(result_s, to_s, to_len);
1890
/* copy the unchanged old then the 'to' */
1891
Py_MEMCPY(result_s, start, next-start);
1892
result_s += (next-start);
1893
Py_MEMCPY(result_s, to_s, to_len);
1898
/* Copy the remainder of the remaining bytes */
1899
Py_MEMCPY(result_s, start, end-start);
1904
/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1905
Py_LOCAL(PyByteArrayObject *)
1906
replace_substring(PyByteArrayObject *self,
1907
const char *from_s, Py_ssize_t from_len,
1908
const char *to_s, Py_ssize_t to_len,
1909
Py_ssize_t maxcount)
1911
char *self_s, *result_s;
1912
char *start, *next, *end;
1913
Py_ssize_t self_len, result_len;
1914
Py_ssize_t count, offset, product;
1915
PyByteArrayObject *result;
1917
self_s = PyByteArray_AS_STRING(self);
1918
self_len = PyByteArray_GET_SIZE(self);
1920
count = countstring(self_s, self_len,
1922
0, self_len, FORWARD, maxcount);
1924
/* no matches, return unchanged */
1925
return return_self(self);
1928
/* Check for overflow */
1929
/* result_len = self_len + count * (to_len-from_len) */
1930
product = count * (to_len-from_len);
1931
if (product / (to_len-from_len) != count) {
1932
PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1935
result_len = self_len + product;
1936
if (result_len < 0) {
1937
PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1941
if ( (result = (PyByteArrayObject *)
1942
PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
1944
result_s = PyByteArray_AS_STRING(result);
1947
end = self_s + self_len;
1948
while (count-- > 0) {
1949
offset = findstring(start, end-start,
1951
0, end-start, FORWARD);
1954
next = start+offset;
1955
if (next == start) {
1956
/* replace with the 'to' */
1957
Py_MEMCPY(result_s, to_s, to_len);
1961
/* copy the unchanged old then the 'to' */
1962
Py_MEMCPY(result_s, start, next-start);
1963
result_s += (next-start);
1964
Py_MEMCPY(result_s, to_s, to_len);
1966
start = next+from_len;
1969
/* Copy the remainder of the remaining bytes */
1970
Py_MEMCPY(result_s, start, end-start);
1976
Py_LOCAL(PyByteArrayObject *)
1977
replace(PyByteArrayObject *self,
1978
const char *from_s, Py_ssize_t from_len,
1979
const char *to_s, Py_ssize_t to_len,
1980
Py_ssize_t maxcount)
1983
maxcount = PY_SSIZE_T_MAX;
1984
} else if (maxcount == 0 || PyByteArray_GET_SIZE(self) == 0) {
1985
/* nothing to do; return the original bytes */
1986
return return_self(self);
1989
if (maxcount == 0 ||
1990
(from_len == 0 && to_len == 0)) {
1991
/* nothing to do; return the original bytes */
1992
return return_self(self);
1995
/* Handle zero-length special cases */
1997
if (from_len == 0) {
1998
/* insert the 'to' bytes everywhere. */
1999
/* >>> "Python".replace("", ".") */
2000
/* '.P.y.t.h.o.n.' */
2001
return replace_interleave(self, to_s, to_len, maxcount);
2004
/* Except for "".replace("", "A") == "A" there is no way beyond this */
2005
/* point for an empty self bytes to generate a non-empty bytes */
2006
/* Special case so the remaining code always gets a non-empty bytes */
2007
if (PyByteArray_GET_SIZE(self) == 0) {
2008
return return_self(self);
2012
/* delete all occurrences of 'from' bytes */
2013
if (from_len == 1) {
2014
return replace_delete_single_character(
2015
self, from_s[0], maxcount);
2017
return replace_delete_substring(self, from_s, from_len, maxcount);
2021
/* Handle special case where both bytes have the same length */
2023
if (from_len == to_len) {
2024
if (from_len == 1) {
2025
return replace_single_character_in_place(
2031
return replace_substring_in_place(
2032
self, from_s, from_len, to_s, to_len, maxcount);
2036
/* Otherwise use the more generic algorithms */
2037
if (from_len == 1) {
2038
return replace_single_character(self, from_s[0],
2039
to_s, to_len, maxcount);
2041
/* len('from')>=2, len('to')>=1 */
2042
return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2047
PyDoc_STRVAR(replace__doc__,
2048
"B.replace(old, new[, count]) -> bytearray\n\
2050
Return a copy of B with all occurrences of subsection\n\
2051
old replaced by new. If the optional argument count is\n\
2052
given, only the first count occurrences are replaced.");
2055
bytes_replace(PyByteArrayObject *self, PyObject *args)
2057
Py_ssize_t count = -1;
2058
PyObject *from, *to, *res;
2059
Py_buffer vfrom, vto;
2061
if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2064
if (_getbuffer(from, &vfrom) < 0)
2066
if (_getbuffer(to, &vto) < 0) {
2067
PyBuffer_Release(&vfrom);
2071
res = (PyObject *)replace((PyByteArrayObject *) self,
2072
vfrom.buf, vfrom.len,
2073
vto.buf, vto.len, count);
2075
PyBuffer_Release(&vfrom);
2076
PyBuffer_Release(&vto);
2081
/* Overallocate the initial list to reduce the number of reallocs for small
2082
split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
2083
resizes, to sizes 4, 8, then 16. Most observed string splits are for human
2084
text (roughly 11 words per line) and field delimited data (usually 1-10
2085
fields). For large strings the split algorithms are bandwidth limited
2086
so increasing the preallocation likely will not improve things.*/
2088
#define MAX_PREALLOC 12
2090
/* 5 splits gives 6 elements */
2091
#define PREALLOC_SIZE(maxsplit) \
2092
(maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
2094
#define SPLIT_APPEND(data, left, right) \
2095
str = PyByteArray_FromStringAndSize((data) + (left), \
2096
(right) - (left)); \
2099
if (PyList_Append(list, str)) { \
2106
#define SPLIT_ADD(data, left, right) { \
2107
str = PyByteArray_FromStringAndSize((data) + (left), \
2108
(right) - (left)); \
2111
if (count < MAX_PREALLOC) { \
2112
PyList_SET_ITEM(list, count, str); \
2114
if (PyList_Append(list, str)) { \
2123
/* Always force the list to the expected size. */
2124
#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
2127
Py_LOCAL_INLINE(PyObject *)
2128
split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2130
register Py_ssize_t i, j, count = 0;
2132
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2138
while ((j < len) && (maxcount-- > 0)) {
2139
for(; j < len; j++) {
2140
/* I found that using memchr makes no difference */
2149
SPLIT_ADD(s, i, len);
2151
FIX_PREALLOC_SIZE(list);
2160
Py_LOCAL_INLINE(PyObject *)
2161
split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2163
register Py_ssize_t i, j, count = 0;
2165
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2170
for (i = j = 0; i < len; ) {
2172
while (i < len && ISSPACE(s[i]))
2175
while (i < len && !ISSPACE(s[i]))
2178
if (maxcount-- <= 0)
2181
while (i < len && ISSPACE(s[i]))
2187
SPLIT_ADD(s, j, len);
2189
FIX_PREALLOC_SIZE(list);
2197
PyDoc_STRVAR(split__doc__,
2198
"B.split([sep[, maxsplit]]) -> list of bytearrays\n\
2200
Return a list of the sections in B, using sep as the delimiter.\n\
2201
If sep is not given, B is split on ASCII whitespace characters\n\
2202
(space, tab, return, newline, formfeed, vertical tab).\n\
2203
If maxsplit is given, at most maxsplit splits are done.");
2206
bytes_split(PyByteArrayObject *self, PyObject *args)
2208
Py_ssize_t len = PyByteArray_GET_SIZE(self), n, i, j;
2209
Py_ssize_t maxsplit = -1, count = 0;
2210
const char *s = PyByteArray_AS_STRING(self), *sub;
2211
PyObject *list, *str, *subobj = Py_None;
2217
if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
2220
maxsplit = PY_SSIZE_T_MAX;
2222
if (subobj == Py_None)
2223
return split_whitespace(s, len, maxsplit);
2225
if (_getbuffer(subobj, &vsub) < 0)
2231
PyErr_SetString(PyExc_ValueError, "empty separator");
2232
PyBuffer_Release(&vsub);
2236
list = split_char(s, len, sub[0], maxsplit);
2237
PyBuffer_Release(&vsub);
2241
list = PyList_New(PREALLOC_SIZE(maxsplit));
2243
PyBuffer_Release(&vsub);
2249
while (maxsplit-- > 0) {
2250
pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
2259
while ((j+n <= len) && (maxsplit-- > 0)) {
2260
for (; j+n <= len; j++) {
2261
if (Py_STRING_MATCH(s, j, sub, n)) {
2269
SPLIT_ADD(s, i, len);
2270
FIX_PREALLOC_SIZE(list);
2271
PyBuffer_Release(&vsub);
2276
PyBuffer_Release(&vsub);
2280
/* stringlib's partition shares nullbytes in some cases.
2281
undo this, we don't want the nullbytes to be shared. */
2283
make_nullbytes_unique(PyObject *result)
2285
if (result != NULL) {
2287
assert(PyTuple_Check(result));
2288
assert(PyTuple_GET_SIZE(result) == 3);
2289
for (i = 0; i < 3; i++) {
2290
if (PyTuple_GET_ITEM(result, i) == (PyObject *)nullbytes) {
2291
PyObject *new = PyByteArray_FromStringAndSize(NULL, 0);
2297
Py_DECREF(nullbytes);
2298
PyTuple_SET_ITEM(result, i, new);
2305
PyDoc_STRVAR(partition__doc__,
2306
"B.partition(sep) -> (head, sep, tail)\n\
2308
Search for the separator sep in B, and return the part before it,\n\
2309
the separator itself, and the part after it. If the separator is not\n\
2310
found, returns B and two empty bytearray objects.");
2313
bytes_partition(PyByteArrayObject *self, PyObject *sep_obj)
2315
PyObject *bytesep, *result;
2317
bytesep = PyByteArray_FromObject(sep_obj);
2321
result = stringlib_partition(
2323
PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
2325
PyByteArray_AS_STRING(bytesep), PyByteArray_GET_SIZE(bytesep)
2329
return make_nullbytes_unique(result);
2332
PyDoc_STRVAR(rpartition__doc__,
2333
"B.rpartition(sep) -> (tail, sep, head)\n\
2335
Search for the separator sep in B, starting at the end of B,\n\
2336
and return the part before it, the separator itself, and the\n\
2337
part after it. If the separator is not found, returns two empty\n\
2338
bytearray objects and B.");
2341
bytes_rpartition(PyByteArrayObject *self, PyObject *sep_obj)
2343
PyObject *bytesep, *result;
2345
bytesep = PyByteArray_FromObject(sep_obj);
2349
result = stringlib_rpartition(
2351
PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
2353
PyByteArray_AS_STRING(bytesep), PyByteArray_GET_SIZE(bytesep)
2357
return make_nullbytes_unique(result);
2360
Py_LOCAL_INLINE(PyObject *)
2361
rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2363
register Py_ssize_t i, j, count=0;
2365
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2371
while ((i >= 0) && (maxcount-- > 0)) {
2372
for (; i >= 0; i--) {
2374
SPLIT_ADD(s, i + 1, j + 1);
2381
SPLIT_ADD(s, 0, j + 1);
2383
FIX_PREALLOC_SIZE(list);
2384
if (PyList_Reverse(list) < 0)
2394
Py_LOCAL_INLINE(PyObject *)
2395
rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2397
register Py_ssize_t i, j, count = 0;
2399
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2404
for (i = j = len - 1; i >= 0; ) {
2406
while (i >= 0 && ISSPACE(s[i]))
2409
while (i >= 0 && !ISSPACE(s[i]))
2412
if (maxcount-- <= 0)
2414
SPLIT_ADD(s, i + 1, j + 1);
2415
while (i >= 0 && ISSPACE(s[i]))
2421
SPLIT_ADD(s, 0, j + 1);
2423
FIX_PREALLOC_SIZE(list);
2424
if (PyList_Reverse(list) < 0)
2434
PyDoc_STRVAR(rsplit__doc__,
2435
"B.rsplit(sep[, maxsplit]) -> list of bytearrays\n\
2437
Return a list of the sections in B, using sep as the delimiter,\n\
2438
starting at the end of B and working to the front.\n\
2439
If sep is not given, B is split on ASCII whitespace characters\n\
2440
(space, tab, return, newline, formfeed, vertical tab).\n\
2441
If maxsplit is given, at most maxsplit splits are done.");
2444
bytes_rsplit(PyByteArrayObject *self, PyObject *args)
2446
Py_ssize_t len = PyByteArray_GET_SIZE(self), n, i, j;
2447
Py_ssize_t maxsplit = -1, count = 0;
2448
const char *s = PyByteArray_AS_STRING(self), *sub;
2449
PyObject *list, *str, *subobj = Py_None;
2452
if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
2455
maxsplit = PY_SSIZE_T_MAX;
2457
if (subobj == Py_None)
2458
return rsplit_whitespace(s, len, maxsplit);
2460
if (_getbuffer(subobj, &vsub) < 0)
2466
PyErr_SetString(PyExc_ValueError, "empty separator");
2467
PyBuffer_Release(&vsub);
2471
list = rsplit_char(s, len, sub[0], maxsplit);
2472
PyBuffer_Release(&vsub);
2476
list = PyList_New(PREALLOC_SIZE(maxsplit));
2478
PyBuffer_Release(&vsub);
2485
while ( (i >= 0) && (maxsplit-- > 0) ) {
2487
if (Py_STRING_MATCH(s, i, sub, n)) {
2488
SPLIT_ADD(s, i + n, j);
2496
FIX_PREALLOC_SIZE(list);
2497
if (PyList_Reverse(list) < 0)
2499
PyBuffer_Release(&vsub);
2504
PyBuffer_Release(&vsub);
2508
PyDoc_STRVAR(reverse__doc__,
2509
"B.reverse() -> None\n\
2511
Reverse the order of the values in B in place.");
2513
bytes_reverse(PyByteArrayObject *self, PyObject *unused)
2515
char swap, *head, *tail;
2516
Py_ssize_t i, j, n = Py_SIZE(self);
2519
head = self->ob_bytes;
2520
tail = head + n - 1;
2521
for (i = 0; i < j; i++) {
2530
PyDoc_STRVAR(insert__doc__,
2531
"B.insert(index, int) -> None\n\
2533
Insert a single item into the bytearray before the given index.");
2535
bytes_insert(PyByteArrayObject *self, PyObject *args)
2539
Py_ssize_t where, n = Py_SIZE(self);
2541
if (!PyArg_ParseTuple(args, "nO:insert", &where, &value))
2544
if (n == PY_SSIZE_T_MAX) {
2545
PyErr_SetString(PyExc_OverflowError,
2546
"cannot add more objects to bytes");
2549
if (!_getbytevalue(value, &ival))
2551
if (PyByteArray_Resize((PyObject *)self, n + 1) < 0)
2561
memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where);
2562
self->ob_bytes[where] = ival;
2567
PyDoc_STRVAR(append__doc__,
2568
"B.append(int) -> None\n\
2570
Append a single item to the end of B.");
2572
bytes_append(PyByteArrayObject *self, PyObject *arg)
2575
Py_ssize_t n = Py_SIZE(self);
2577
if (! _getbytevalue(arg, &value))
2579
if (n == PY_SSIZE_T_MAX) {
2580
PyErr_SetString(PyExc_OverflowError,
2581
"cannot add more objects to bytes");
2584
if (PyByteArray_Resize((PyObject *)self, n + 1) < 0)
2587
self->ob_bytes[n] = value;
2592
PyDoc_STRVAR(extend__doc__,
2593
"B.extend(iterable_of_ints) -> None\n\
2595
Append all the elements from the iterator or sequence to the\n\
2598
bytes_extend(PyByteArrayObject *self, PyObject *arg)
2600
PyObject *it, *item, *bytes_obj;
2601
Py_ssize_t buf_size = 0, len = 0;
2605
/* bytes_setslice code only accepts something supporting PEP 3118. */
2606
if (PyObject_CheckBuffer(arg)) {
2607
if (bytes_setslice(self, Py_SIZE(self), Py_SIZE(self), arg) == -1)
2613
it = PyObject_GetIter(arg);
2617
/* Try to determine the length of the argument. 32 is abitrary. */
2618
buf_size = _PyObject_LengthHint(arg, 32);
2619
if (buf_size == -1) {
2624
bytes_obj = PyByteArray_FromStringAndSize(NULL, buf_size);
2625
if (bytes_obj == NULL)
2627
buf = PyByteArray_AS_STRING(bytes_obj);
2629
while ((item = PyIter_Next(it)) != NULL) {
2630
if (! _getbytevalue(item, &value)) {
2633
Py_DECREF(bytes_obj);
2639
if (len >= buf_size) {
2640
buf_size = len + (len >> 1) + 1;
2641
if (PyByteArray_Resize((PyObject *)bytes_obj, buf_size) < 0) {
2643
Py_DECREF(bytes_obj);
2646
/* Recompute the `buf' pointer, since the resizing operation may
2647
have invalidated it. */
2648
buf = PyByteArray_AS_STRING(bytes_obj);
2653
/* Resize down to exact size. */
2654
if (PyByteArray_Resize((PyObject *)bytes_obj, len) < 0) {
2655
Py_DECREF(bytes_obj);
2659
if (bytes_setslice(self, Py_SIZE(self), Py_SIZE(self), bytes_obj) == -1)
2661
Py_DECREF(bytes_obj);
2666
PyDoc_STRVAR(pop__doc__,
2667
"B.pop([index]) -> int\n\
2669
Remove and return a single item from B. If no index\n\
2670
argument is given, will pop the last value.");
2672
bytes_pop(PyByteArrayObject *self, PyObject *args)
2675
Py_ssize_t where = -1, n = Py_SIZE(self);
2677
if (!PyArg_ParseTuple(args, "|n:pop", &where))
2681
PyErr_SetString(PyExc_OverflowError,
2682
"cannot pop an empty bytes");
2686
where += Py_SIZE(self);
2687
if (where < 0 || where >= Py_SIZE(self)) {
2688
PyErr_SetString(PyExc_IndexError, "pop index out of range");
2691
if (!_canresize(self))
2694
value = self->ob_bytes[where];
2695
memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2696
if (PyByteArray_Resize((PyObject *)self, n - 1) < 0)
2699
return PyLong_FromLong(value);
2702
PyDoc_STRVAR(remove__doc__,
2703
"B.remove(int) -> None\n\
2705
Remove the first occurrence of a value in B.");
2707
bytes_remove(PyByteArrayObject *self, PyObject *arg)
2710
Py_ssize_t where, n = Py_SIZE(self);
2712
if (! _getbytevalue(arg, &value))
2715
for (where = 0; where < n; where++) {
2716
if (self->ob_bytes[where] == value)
2720
PyErr_SetString(PyExc_ValueError, "value not found in bytes");
2723
if (!_canresize(self))
2726
memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2727
if (PyByteArray_Resize((PyObject *)self, n - 1) < 0)
2733
/* XXX These two helpers could be optimized if argsize == 1 */
2736
lstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2737
void *argptr, Py_ssize_t argsize)
2740
while (i < mysize && memchr(argptr, myptr[i], argsize))
2746
rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2747
void *argptr, Py_ssize_t argsize)
2749
Py_ssize_t i = mysize - 1;
2750
while (i >= 0 && memchr(argptr, myptr[i], argsize))
2755
PyDoc_STRVAR(strip__doc__,
2756
"B.strip([bytes]) -> bytearray\n\
2758
Strip leading and trailing bytes contained in the argument\n\
2759
and return the result as a new bytearray.\n\
2760
If the argument is omitted, strip ASCII whitespace.");
2762
bytes_strip(PyByteArrayObject *self, PyObject *args)
2764
Py_ssize_t left, right, mysize, argsize;
2765
void *myptr, *argptr;
2766
PyObject *arg = Py_None;
2768
if (!PyArg_ParseTuple(args, "|O:strip", &arg))
2770
if (arg == Py_None) {
2771
argptr = "\t\n\r\f\v ";
2775
if (_getbuffer(arg, &varg) < 0)
2780
myptr = self->ob_bytes;
2781
mysize = Py_SIZE(self);
2782
left = lstrip_helper(myptr, mysize, argptr, argsize);
2786
right = rstrip_helper(myptr, mysize, argptr, argsize);
2788
PyBuffer_Release(&varg);
2789
return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2792
PyDoc_STRVAR(lstrip__doc__,
2793
"B.lstrip([bytes]) -> bytearray\n\
2795
Strip leading bytes contained in the argument\n\
2796
and return the result as a new bytearray.\n\
2797
If the argument is omitted, strip leading ASCII whitespace.");
2799
bytes_lstrip(PyByteArrayObject *self, PyObject *args)
2801
Py_ssize_t left, right, mysize, argsize;
2802
void *myptr, *argptr;
2803
PyObject *arg = Py_None;
2805
if (!PyArg_ParseTuple(args, "|O:lstrip", &arg))
2807
if (arg == Py_None) {
2808
argptr = "\t\n\r\f\v ";
2812
if (_getbuffer(arg, &varg) < 0)
2817
myptr = self->ob_bytes;
2818
mysize = Py_SIZE(self);
2819
left = lstrip_helper(myptr, mysize, argptr, argsize);
2822
PyBuffer_Release(&varg);
2823
return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2826
PyDoc_STRVAR(rstrip__doc__,
2827
"B.rstrip([bytes]) -> bytearray\n\
2829
Strip trailing bytes contained in the argument\n\
2830
and return the result as a new bytearray.\n\
2831
If the argument is omitted, strip trailing ASCII whitespace.");
2833
bytes_rstrip(PyByteArrayObject *self, PyObject *args)
2835
Py_ssize_t left, right, mysize, argsize;
2836
void *myptr, *argptr;
2837
PyObject *arg = Py_None;
2839
if (!PyArg_ParseTuple(args, "|O:rstrip", &arg))
2841
if (arg == Py_None) {
2842
argptr = "\t\n\r\f\v ";
2846
if (_getbuffer(arg, &varg) < 0)
2851
myptr = self->ob_bytes;
2852
mysize = Py_SIZE(self);
2854
right = rstrip_helper(myptr, mysize, argptr, argsize);
2856
PyBuffer_Release(&varg);
2857
return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2860
PyDoc_STRVAR(decode_doc,
2861
"B.decode([encoding[, errors]]) -> str\n\
2863
Decode B using the codec registered for encoding. encoding defaults\n\
2864
to the default encoding. errors may be given to set a different error\n\
2865
handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2866
a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2867
as well as any other name registered with codecs.register_error that is\n\
2868
able to handle UnicodeDecodeErrors.");
2871
bytes_decode(PyObject *self, PyObject *args)
2873
const char *encoding = NULL;
2874
const char *errors = NULL;
2876
if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2878
if (encoding == NULL)
2879
encoding = PyUnicode_GetDefaultEncoding();
2880
return PyUnicode_FromEncodedObject(self, encoding, errors);
2883
PyDoc_STRVAR(alloc_doc,
2884
"B.__alloc__() -> int\n\
2886
Return the number of bytes actually allocated.");
2889
bytes_alloc(PyByteArrayObject *self)
2891
return PyLong_FromSsize_t(self->ob_alloc);
2894
PyDoc_STRVAR(join_doc,
2895
"B.join(iterable_of_bytes) -> bytearray\n\
2897
Concatenate any number of bytes/bytearray objects, with B\n\
2898
in between each pair, and return the result as a new bytearray.");
2901
bytes_join(PyByteArrayObject *self, PyObject *it)
2904
Py_ssize_t mysize = Py_SIZE(self);
2908
Py_ssize_t totalsize = 0;
2912
seq = PySequence_Fast(it, "can only join an iterable");
2915
n = PySequence_Fast_GET_SIZE(seq);
2916
items = PySequence_Fast_ITEMS(seq);
2918
/* Compute the total size, and check that they are all bytes */
2919
/* XXX Shouldn't we use _getbuffer() on these items instead? */
2920
for (i = 0; i < n; i++) {
2921
PyObject *obj = items[i];
2922
if (!PyByteArray_Check(obj) && !PyBytes_Check(obj)) {
2923
PyErr_Format(PyExc_TypeError,
2924
"can only join an iterable of bytes "
2925
"(item %ld has type '%.100s')",
2926
/* XXX %ld isn't right on Win64 */
2927
(long)i, Py_TYPE(obj)->tp_name);
2931
totalsize += mysize;
2932
totalsize += Py_SIZE(obj);
2933
if (totalsize < 0) {
2939
/* Allocate the result, and copy the bytes */
2940
result = PyByteArray_FromStringAndSize(NULL, totalsize);
2943
dest = PyByteArray_AS_STRING(result);
2944
for (i = 0; i < n; i++) {
2945
PyObject *obj = items[i];
2946
Py_ssize_t size = Py_SIZE(obj);
2948
if (PyByteArray_Check(obj))
2949
buf = PyByteArray_AS_STRING(obj);
2951
buf = PyBytes_AS_STRING(obj);
2953
memcpy(dest, self->ob_bytes, mysize);
2956
memcpy(dest, buf, size);
2964
/* Error handling */
2970
PyDoc_STRVAR(fromhex_doc,
2971
"bytearray.fromhex(string) -> bytearray (static method)\n\
2973
Create a bytearray object from a string of hexadecimal numbers.\n\
2974
Spaces between two numbers are accepted.\n\
2975
Example: bytearray.fromhex('B9 01EF') -> bytearray(b'\\xb9\\x01\\xef').");
2978
hex_digit_to_int(Py_UNICODE c)
2987
if (c >= 'a' && c <= 'f')
2988
return c - 'a' + 10;
2994
bytes_fromhex(PyObject *cls, PyObject *args)
2996
PyObject *newbytes, *hexobj;
2999
Py_ssize_t hexlen, byteslen, i, j;
3002
if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
3004
assert(PyUnicode_Check(hexobj));
3005
hexlen = PyUnicode_GET_SIZE(hexobj);
3006
hex = PyUnicode_AS_UNICODE(hexobj);
3007
byteslen = hexlen/2; /* This overestimates if there are spaces */
3008
newbytes = PyByteArray_FromStringAndSize(NULL, byteslen);
3011
buf = PyByteArray_AS_STRING(newbytes);
3012
for (i = j = 0; i < hexlen; i += 2) {
3013
/* skip over spaces in the input */
3014
while (hex[i] == ' ')
3018
top = hex_digit_to_int(hex[i]);
3019
bot = hex_digit_to_int(hex[i+1]);
3020
if (top == -1 || bot == -1) {
3021
PyErr_Format(PyExc_ValueError,
3022
"non-hexadecimal number found in "
3023
"fromhex() arg at position %zd", i);
3026
buf[j++] = (top << 4) + bot;
3028
if (PyByteArray_Resize(newbytes, j) < 0)
3033
Py_DECREF(newbytes);
3037
PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3040
bytes_reduce(PyByteArrayObject *self)
3042
PyObject *latin1, *dict;
3044
latin1 = PyUnicode_DecodeLatin1(self->ob_bytes,
3045
Py_SIZE(self), NULL);
3047
latin1 = PyUnicode_FromString("");
3049
dict = PyObject_GetAttrString((PyObject *)self, "__dict__");
3056
return Py_BuildValue("(O(Ns)N)", Py_TYPE(self), latin1, "latin-1", dict);
3059
PyDoc_STRVAR(sizeof_doc,
3060
"B.__sizeof__() -> int\n\
3062
Returns the size of B in memory, in bytes");
3064
bytes_sizeof(PyByteArrayObject *self)
3068
res = sizeof(PyByteArrayObject) + self->ob_alloc * sizeof(char);
3069
return PyLong_FromSsize_t(res);
3072
static PySequenceMethods bytes_as_sequence = {
3073
(lenfunc)bytes_length, /* sq_length */
3074
(binaryfunc)PyByteArray_Concat, /* sq_concat */
3075
(ssizeargfunc)bytes_repeat, /* sq_repeat */
3076
(ssizeargfunc)bytes_getitem, /* sq_item */
3078
(ssizeobjargproc)bytes_setitem, /* sq_ass_item */
3079
0, /* sq_ass_slice */
3080
(objobjproc)bytes_contains, /* sq_contains */
3081
(binaryfunc)bytes_iconcat, /* sq_inplace_concat */
3082
(ssizeargfunc)bytes_irepeat, /* sq_inplace_repeat */
3085
static PyMappingMethods bytes_as_mapping = {
3086
(lenfunc)bytes_length,
3087
(binaryfunc)bytes_subscript,
3088
(objobjargproc)bytes_ass_subscript,
3091
static PyBufferProcs bytes_as_buffer = {
3092
(getbufferproc)bytes_getbuffer,
3093
(releasebufferproc)bytes_releasebuffer,
3098
{"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc},
3099
{"__reduce__", (PyCFunction)bytes_reduce, METH_NOARGS, reduce_doc},
3100
{"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS, sizeof_doc},
3101
{"append", (PyCFunction)bytes_append, METH_O, append__doc__},
3102
{"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
3103
_Py_capitalize__doc__},
3104
{"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
3105
{"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
3106
{"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
3107
{"endswith", (PyCFunction)bytes_endswith, METH_VARARGS, endswith__doc__},
3108
{"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
3110
{"extend", (PyCFunction)bytes_extend, METH_O, extend__doc__},
3111
{"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
3112
{"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
3114
{"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
3115
{"insert", (PyCFunction)bytes_insert, METH_VARARGS, insert__doc__},
3116
{"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
3117
_Py_isalnum__doc__},
3118
{"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
3119
_Py_isalpha__doc__},
3120
{"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
3121
_Py_isdigit__doc__},
3122
{"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
3123
_Py_islower__doc__},
3124
{"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
3125
_Py_isspace__doc__},
3126
{"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
3127
_Py_istitle__doc__},
3128
{"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
3129
_Py_isupper__doc__},
3130
{"join", (PyCFunction)bytes_join, METH_O, join_doc},
3131
{"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3132
{"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
3133
{"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
3134
{"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
3135
{"pop", (PyCFunction)bytes_pop, METH_VARARGS, pop__doc__},
3136
{"remove", (PyCFunction)bytes_remove, METH_O, remove__doc__},
3137
{"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
3138
{"reverse", (PyCFunction)bytes_reverse, METH_NOARGS, reverse__doc__},
3139
{"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
3140
{"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
3141
{"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
3142
{"rpartition", (PyCFunction)bytes_rpartition, METH_O, rpartition__doc__},
3143
{"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
3144
{"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
3145
{"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
3146
{"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
3148
{"startswith", (PyCFunction)bytes_startswith, METH_VARARGS ,
3150
{"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
3151
{"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3152
_Py_swapcase__doc__},
3153
{"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
3154
{"translate", (PyCFunction)bytes_translate, METH_VARARGS,
3156
{"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
3157
{"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
3161
PyDoc_STRVAR(bytes_doc,
3162
"bytearray(iterable_of_ints) -> bytearray\n\
3163
bytearray(string, encoding[, errors]) -> bytearray\n\
3164
bytearray(bytes_or_bytearray) -> mutable copy of bytes_or_bytearray\n\
3165
bytearray(memory_view) -> bytearray\n\
3167
Construct an mutable bytearray object from:\n\
3168
- an iterable yielding integers in range(256)\n\
3169
- a text string encoded using the specified encoding\n\
3170
- a bytes or a bytearray object\n\
3171
- any object implementing the buffer API.\n\
3173
bytearray(int) -> bytearray\n\
3175
Construct a zero-initialized bytearray of the given length.");
3178
static PyObject *bytes_iter(PyObject *seq);
3180
PyTypeObject PyByteArray_Type = {
3181
PyVarObject_HEAD_INIT(&PyType_Type, 0)
3183
sizeof(PyByteArrayObject),
3185
(destructor)bytes_dealloc, /* tp_dealloc */
3189
0, /* tp_reserved */
3190
(reprfunc)bytes_repr, /* tp_repr */
3191
0, /* tp_as_number */
3192
&bytes_as_sequence, /* tp_as_sequence */
3193
&bytes_as_mapping, /* tp_as_mapping */
3196
bytes_str, /* tp_str */
3197
PyObject_GenericGetAttr, /* tp_getattro */
3198
0, /* tp_setattro */
3199
&bytes_as_buffer, /* tp_as_buffer */
3200
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3201
bytes_doc, /* tp_doc */
3202
0, /* tp_traverse */
3204
(richcmpfunc)bytes_richcompare, /* tp_richcompare */
3205
0, /* tp_weaklistoffset */
3206
bytes_iter, /* tp_iter */
3207
0, /* tp_iternext */
3208
bytes_methods, /* tp_methods */
3213
0, /* tp_descr_get */
3214
0, /* tp_descr_set */
3215
0, /* tp_dictoffset */
3216
(initproc)bytes_init, /* tp_init */
3217
PyType_GenericAlloc, /* tp_alloc */
3218
PyType_GenericNew, /* tp_new */
3219
PyObject_Del, /* tp_free */
3222
/*********************** Bytes Iterator ****************************/
3226
Py_ssize_t it_index;
3227
PyByteArrayObject *it_seq; /* Set to NULL when iterator is exhausted */
3231
bytesiter_dealloc(bytesiterobject *it)
3233
_PyObject_GC_UNTRACK(it);
3234
Py_XDECREF(it->it_seq);
3235
PyObject_GC_Del(it);
3239
bytesiter_traverse(bytesiterobject *it, visitproc visit, void *arg)
3241
Py_VISIT(it->it_seq);
3246
bytesiter_next(bytesiterobject *it)
3248
PyByteArrayObject *seq;
3255
assert(PyByteArray_Check(seq));
3257
if (it->it_index < PyByteArray_GET_SIZE(seq)) {
3258
item = PyLong_FromLong(
3259
(unsigned char)seq->ob_bytes[it->it_index]);
3271
bytesiter_length_hint(bytesiterobject *it)
3275
len = PyByteArray_GET_SIZE(it->it_seq) - it->it_index;
3276
return PyLong_FromSsize_t(len);
3279
PyDoc_STRVAR(length_hint_doc,
3280
"Private method returning an estimate of len(list(it)).");
3282
static PyMethodDef bytesiter_methods[] = {
3283
{"__length_hint__", (PyCFunction)bytesiter_length_hint, METH_NOARGS,
3285
{NULL, NULL} /* sentinel */
3288
PyTypeObject PyByteArrayIter_Type = {
3289
PyVarObject_HEAD_INIT(&PyType_Type, 0)
3290
"bytearray_iterator", /* tp_name */
3291
sizeof(bytesiterobject), /* tp_basicsize */
3292
0, /* tp_itemsize */
3294
(destructor)bytesiter_dealloc, /* tp_dealloc */
3298
0, /* tp_reserved */
3300
0, /* tp_as_number */
3301
0, /* tp_as_sequence */
3302
0, /* tp_as_mapping */
3306
PyObject_GenericGetAttr, /* tp_getattro */
3307
0, /* tp_setattro */
3308
0, /* tp_as_buffer */
3309
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
3311
(traverseproc)bytesiter_traverse, /* tp_traverse */
3313
0, /* tp_richcompare */
3314
0, /* tp_weaklistoffset */
3315
PyObject_SelfIter, /* tp_iter */
3316
(iternextfunc)bytesiter_next, /* tp_iternext */
3317
bytesiter_methods, /* tp_methods */
3322
bytes_iter(PyObject *seq)
3324
bytesiterobject *it;
3326
if (!PyByteArray_Check(seq)) {
3327
PyErr_BadInternalCall();
3330
it = PyObject_GC_New(bytesiterobject, &PyByteArrayIter_Type);
3335
it->it_seq = (PyByteArrayObject *)seq;
3336
_PyObject_GC_TRACK(it);
3337
return (PyObject *)it;