1
from numerictypes import string, unicode_, integer, object_
2
from numeric import ndarray, broadcast, empty
3
from numeric import array as narray
6
__all__ = ['chararray']
11
# special sub-class for character arrays (string and unicode_)
12
# This adds equality testing and methods of str and unicode types
13
# which operate on an element-by-element basis
16
class chararray(ndarray):
17
def __new__(subtype, shape, itemsize=1, unicode=False, buffer=None,
18
offset=0, strides=None, order=None):
28
self = ndarray.__new__(subtype, shape, (dtype, itemsize),
31
self = ndarray.__new__(subtype, shape, (dtype, itemsize),
33
offset=offset, strides=strides,
38
def __array_finalize__(self, obj):
39
# The b is a special case because it is used for reconstructing.
40
if not _globalvar and self.dtype.char not in 'SUb':
41
raise ValueError, "Can only create a chararray from string data."
44
## def _richcmpfunc(self, other, op):
45
## b = broadcast(self, other)
46
## result = empty(b.shape, dtype=bool)
48
## for k, val in enumerate(b):
49
## r1 = val[0].rstrip('\x00')
51
## res[k] = eval("r1 %s r2" % op, {'r1':r1,'r2':r2})
54
# these have been moved to C
55
## def __eq__(self, other):
56
## return self._richcmpfunc(other, '==')
58
## def __ne__(self, other):
59
## return self._richcmpfunc(other, '!=')
61
## def __ge__(self, other):
62
## return self._richcmpfunc(other, '>=')
64
## def __le__(self, other):
65
## return self._richcmpfunc(other, '<=')
67
## def __gt__(self, other):
68
## return self._richcmpfunc(other, '>')
70
## def __lt__(self, other):
71
## return self._richcmpfunc(other, '<')
73
def __add__(self, other):
74
b = broadcast(self, other)
76
outitem = self.itemsize + arr.itemsize
77
result = chararray(b.shape, outitem, self.dtype is unicode_)
79
for k, val in enumerate(b):
80
res[k] = (val[0] + val[1])
83
def __radd__(self, other):
84
b = broadcast(other, self)
85
outitem = b.iters[0].base.itemsize + \
86
b.iters[1].base.itemsize
87
result = chararray(b.shape, outitem, self.dtype is unicode_)
89
for k, val in enumerate(b):
90
res[k] = (val[0] + val[1])
93
def __mul__(self, other):
94
b = broadcast(self, other)
96
if not issubclass(arr.dtype.type, integer):
97
raise ValueError, "Can only multiply by integers"
98
outitem = b.iters[0].base.itemsize * arr.max()
99
result = chararray(b.shape, outitem, self.dtype is unicode_)
101
for k, val in enumerate(b):
102
res[k] = val[0]*val[1]
105
def __rmul__(self, other):
106
b = broadcast(self, other)
107
arr = b.iters[1].base
108
if not issubclass(arr.dtype.type, integer):
109
raise ValueError, "Can only multiply by integers"
110
outitem = b.iters[0].base.itemsize * arr.max()
111
result = chararray(b.shape, outitem, self.dtype is unicode_)
113
for k, val in enumerate(b):
114
res[k] = val[0]*val[1]
117
def __mod__(self, other):
118
b = broadcast(self, other)
121
for k,val in enumerate(b):
122
newval = val[0] % val[1]
123
maxsize = max(len(newval), maxsize)
125
newarr = chararray(b.shape, maxsize, self.dtype is unicode_)
129
def __rmod__(self, other):
130
return NotImplemented
132
def _generalmethod(self, name, myiter):
133
res = [None]*myiter.size
135
for k, val in enumerate(myiter):
138
if chk.dtype is object_ and chk.item() is None:
141
newitem = getattr(val[0],name)(*newval)
142
maxsize = max(len(newitem), maxsize)
144
newarr = chararray(myiter.shape, maxsize, self.dtype is unicode_)
149
def _typedmethod(self, name, myiter, dtype):
150
result = empty(myiter.shape, dtype=dtype)
152
for k, val in enumerate(myiter):
155
if chk.dtype is object_ and chk.item() is None:
158
this_str = val[0].rstrip('\x00')
159
newitem = getattr(this_str,name)(*newval)
163
def _samemethod(self, name):
166
for k, val in enumerate(self.flat):
167
res[k] = getattr(val, name)()
170
def capitalize(self):
171
return self._samemethod('capitalize')
173
if sys.version[:3] >= '2.4':
174
def center(self, width, fillchar=' '):
175
return self._generalmethod('center',
176
broadcast(self, width, fillchar))
177
def ljust(self, width, fillchar=' '):
178
return self._generalmethod('ljust',
179
broadcast(self, width, fillchar))
180
def rjust(self, width, fillchar=' '):
181
return self._generalmethod('rjust',
182
broadcast(self, width, fillchar))
183
def rsplit(self, sep=None, maxsplit=None):
184
return self._typedmethod('rsplit', broadcast(self, sep, maxsplit),
187
def ljust(self, width):
188
return self._generalmethod('ljust', broadcast(self, width))
189
def rjust(self, width):
190
return self._generalmethod('rjust', broadcast(self, width))
191
def center(self, width):
192
return self._generalmethod('center', broadcast(self, width))
194
def count(self, sub, start=None, end=None):
195
return self._typedmethod('count', broadcast(self, sub, start, end), int)
197
def decode(self,encoding=None,errors=None):
198
return self._generalmethod('decode', broadcast(self, encoding, errors))
200
def encode(self,encoding=None,errors=None):
201
return self._generalmethod('encode', broadcast(self, encoding, errors))
203
def endswith(self, suffix, start=None, end=None):
204
return self._typedmethod('endswith', broadcast(self, suffix, start, end), bool)
206
def expandtabs(self, tabsize=None):
207
return self._generalmethod('endswith', broadcast(self, tabsize))
209
def find(self, sub, start=None, end=None):
210
return self._typedmethod('find', broadcast(self, sub, start, end), int)
212
def index(self, sub, start=None, end=None):
213
return self._typedmethod('index', broadcast(self, sub, start, end), int)
215
def _ismethod(self, name):
216
result = empty(self.shape, dtype=bool)
218
for k, val in enumerate(self.flat):
219
item = val.rstrip('\x00')
220
res[k] = getattr(item, name)()
224
return self._ismethod('isalnum')
227
return self._ismethod('isalpha')
230
return self._ismethod('isdigit')
233
return self._ismethod('islower')
236
return self._ismethod('isspace')
239
return self._ismethod('istitle')
242
return self._ismethod('isupper')
245
return self._generalmethod('join', broadcast(self, seq))
248
return self._samemethod('lower')
250
def lstrip(self, chars):
251
return self._generalmethod('lstrip', broadcast(self, chars))
253
def replace(self, old, new, count=None):
254
return self._generalmethod('replace', broadcast(self, old, new, count))
256
def rfind(self, sub, start=None, end=None):
257
return self._typedmethod('rfind', broadcast(self, sub, start, end), int)
259
def rindex(self, sub, start=None, end=None):
260
return self._typedmethod('rindex', broadcast(self, sub, start, end), int)
262
def rstrip(self, chars=None):
263
return self._generalmethod('rstrip', broadcast(self, chars))
265
def split(self, sep=None, maxsplit=None):
266
return self._typedmethod('split', broadcast(self, sep, maxsplit), object)
268
def splitlines(self, keepends=None):
269
return self._typedmethod('splitlines', broadcast(self, keepends), object)
271
def startswith(self, prefix, start=None, end=None):
272
return self._typedmethod('startswith', broadcast(self, prefix, start, end), bool)
274
def strip(self, chars=None):
275
return self._generalmethod('strip', broadcast(self, chars))
278
return self._samemethod('swapcase')
281
return self._samemethod('title')
283
def translate(self, table, deletechars=None):
284
if self.dtype is unicode_:
285
return self._generalmethod('translate', broadcast(self, table))
287
return self._generalmethod('translate', broadcast(self, table, deletechars))
290
return self._samemethod('upper')
292
def zfill(self, width):
293
return self._generalmethod('zfill', broadcast(self, width))
296
def array(obj, itemsize=None, copy=True, unicode=False, order=None):
298
if isinstance(obj, chararray):
300
itemsize = obj.itemsize
301
if copy or (itemsize != obj.itemsize) \
302
or (not unicode and obj.dtype == unicode_) \
303
or (unicode and obj.dtype == string):
304
return obj.astype("%s%d" % (obj.dtype.char, itemsize))
308
if isinstance(obj, ndarray) and (obj.dtype in [unicode_, string]):
309
new = obj.view(chararray)
310
if unicode and obj.dtype == string:
311
return new.astype((unicode_, obj.itemsize))
312
elif obj.dtype == unicode_:
313
return new.astype((string, obj.itemsize))
315
if copy: return new.copy()
318
if unicode: dtype = "U"
321
if itemsize is not None:
322
dtype += str(itemsize)
324
if isinstance(obj, (str, _unicode)):
327
shape = len(obj) / itemsize
328
return chararray(shape, itemsize=itemsize, unicode=unicode,
332
val = narray(obj, dtype=dtype, order=order, subok=1)
334
return val.view(chararray)
336
def asarray(obj, itemsize=None, unicode=False, order=None):
337
return array(obj, itemsize, copy=False,
338
unicode=unicode, order=order)