1
""" Utilties for casting numpy values in various ways
3
Most routines work round some numpy oddities in floating point precision and
4
casting. Others work round numpy casting to and from python ints
7
from platform import processor, machine
12
class CastingError(Exception):
16
def float_to_int(arr, int_type, nan2zero=True, infmax=False):
17
""" Convert floating point array `arr` to type `int_type`
19
* Rounds numbers to nearest integer
20
* Clips values to prevent overflows when casting
21
* Converts NaN to 0 (for `nan2zero`==True
23
Casting floats to integers is delicate because the result is undefined
24
and platform specific for float values outside the range of `int_type`.
25
Define ``shared_min`` to be the minimum value that can be exactly
26
represented in both the float type of `arr` and `int_type`. Define
27
`shared_max` to be the equivalent maximum value. To avoid undefined results
28
we threshold `arr` at ``shared_min`` and ``shared_max``.
33
Array of floating point type
36
nan2zero : {True, False, None}
37
Whether to convert NaN value to zero. Default is True. If False, and
38
NaNs are present, raise CastingError. If None, do not check for NaN
39
values and pass through directly to the ``astype`` casting mechanism.
40
In this last case, the resulting value is undefined.
41
infmax : {False, True}
42
If True, set np.inf values in `arr` to be `int_type` integer maximum
43
value, -np.inf as `int_type` integer minimum. If False, set +/- infs to
44
be ``shared_min``, ``shared_max`` as defined above. Therefore False
45
gives faster conversion at the expense of infs that are further from
55
>>> float_to_int([np.nan, np.inf, -np.inf, 1.1, 6.6], np.int16)
56
array([ 0, 32767, -32768, 1, 7], dtype=int16)
60
Numpy relies on the C library to cast from float to int using the standard
61
``astype`` method of the array.
63
Quoting from section F4 of the C99 standard:
65
If the floating value is infinite or NaN or if the integral part of the
66
floating value exceeds the range of the integer type, then the
67
"invalid" floating-point exception is raised and the resulting value
70
Hence we threshold at ``shared_min`` and ``shared_max`` to avoid casting to
71
values that are undefined.
73
See: http://en.wikipedia.org/wiki/C99 . There are links to the C99 standard
77
flt_type = arr.dtype.type
78
int_type = np.dtype(int_type).type
79
# Deal with scalar as input; fancy indexing needs 1D
81
arr = np.atleast_1d(arr)
82
mn, mx = shared_range(flt_type, int_type)
87
seen_nans = np.any(nans)
88
if nan2zero == False and seen_nans:
89
raise CastingError('NaNs in array, nan2zero is False')
90
iarr = np.clip(np.rint(arr), mn, mx).astype(int_type)
94
return iarr.reshape(shape)
95
ii = np.iinfo(int_type)
96
iarr[arr == np.inf] = ii.max
98
iarr[arr == -np.inf] = ii.min
99
return iarr.reshape(shape)
105
def shared_range(flt_type, int_type):
106
""" Min and max in float type that are >=min, <=max in integer type
108
This is not as easy as it sounds, because the float type may not be able to
109
exactly represent the max or min integer values, so we have to find the next
110
exactly representable floating point value to do the thresholding.
114
flt_type : dtype specifier
115
A dtype specifier referring to a numpy floating point type. For
116
example, ``f4``, ``np.dtype('f4')``, ``np.float32`` are equivalent.
117
int_type : dtype specifier
118
A dtype specifier referring to a numpy integer type. For example,
119
``i4``, ``np.dtype('i4')``, ``np.int32`` are equivalent
124
Number of type `flt_type` that is the minumum value in the range of
125
`int_type`, such that ``mn.astype(int_type)`` >= min of `int_type`
127
Number of type `flt_type` that is the maximum value in the range of
128
`int_type`, such that ``mx.astype(int_type)`` <= max of `int_type`
132
>>> shared_range(np.float32, np.int32) == (-2147483648.0, 2147483520.0)
134
>>> shared_range('f4', 'i4') == (-2147483648.0, 2147483520.0)
137
flt_type = np.dtype(flt_type).type
138
int_type = np.dtype(int_type).type
139
key = (flt_type, int_type)
140
# Used cached value if present
142
return _SHARED_RANGES[key]
145
ii = np.iinfo(int_type)
146
fi = np.finfo(flt_type)
147
mn = ceil_exact(ii.min, flt_type)
150
mx = floor_exact(ii.max, flt_type)
153
_SHARED_RANGES[key] = (mn, mx)
156
# ----------------------------------------------------------------------------
157
# Routines to work out the next lowest representable integer in floating point
159
# ----------------------------------------------------------------------------
162
_float16 = np.float16
163
except AttributeError: # float16 not present in np < 1.6
167
class FloatingError(Exception):
171
def type_info(np_type):
172
""" Return dict with min, max, nexp, nmant, width for numpy type `np_type`
174
Type can be integer in which case nexp and nmant are None.
178
np_type : numpy type specifier
179
Any specifier for a numpy dtype
184
with fields ``min`` (minimum value), ``max`` (maximum value), ``nexp``
185
(exponent width), ``nmant`` (significand precision not including
186
implicit first digit), ``minexp`` (minimum exponent), ``maxexp``
187
(maximum exponent), ``width`` (width in bytes). (``nexp``, ``nmant``,
188
``minexp``, ``maxexp``) are None for integer types. Both ``min`` and
189
``max`` are of type `np_type`.
193
FloatingError : for floating point types we don't recognize
197
You might be thinking that ``np.finfo`` does this job, and it does, except
198
for PPC long doubles (http://projects.scipy.org/numpy/ticket/2077) and
199
float96 on Windows compiled with Mingw. This routine protects against such
200
errors in ``np.finfo`` by only accepting values that we know are likely to
203
dt = np.dtype(np_type)
211
return dict(min=np_type(info.min), max=np_type(info.max), minexp=None,
212
maxexp=None, nmant=None, nexp=None, width=width)
214
# Trust the standard IEEE types
215
nmant, nexp = info.nmant, info.nexp
216
ret = dict(min=np_type(info.min),
217
max=np_type(info.max),
223
if np_type in (_float16, np.float32, np.float64,
224
np.complex64, np.complex128):
226
info_64 = np.finfo(np.float64)
228
assert np_type is np.longcomplex
229
vals = (nmant, nexp, width / 2)
231
assert np_type is np.longdouble
232
vals = (nmant, nexp, width)
233
if vals in ((112, 15, 16), # binary128
234
(info_64.nmant, info_64.nexp, 8), # float64
235
(63, 15, 12), (63, 15, 16)): # Intel extended 80
236
return ret # these are OK without modification
237
# The remaining types are longdoubles with bad finfo values. Some we
238
# correct, others we wait to hear of errors.
239
# We start with float64 as basis
240
ret = type_info(np.float64)
241
if vals in ((52, 15, 12), # windows float96
242
(52, 15, 16)): # windows float128?
243
# On windows 32 bit at least, float96 is Intel 80 storage but operating
244
# at float64 precision. The finfo values give nexp == 15 (as for intel
245
# 80) but in calculations nexp in fact appears to be 11 as for float64
246
ret.update(dict(width=width))
247
elif vals == (1, 1, 16) and processor() == 'powerpc': # broken PPC
248
ret.update(dict(nmant=106, width=width))
249
else: # don't recognize the type
250
raise FloatingError('We had not expected type %s' % np_type)
254
def as_int(x, check=True):
255
""" Return python integer representation of number
257
This is useful because the numpy int(val) mechanism is broken for large
258
values in np.longdouble.
260
It is also useful to work around a numpy 1.4.1 bug in conversion of uints to
263
This routine will still raise an OverflowError for values that are outside
264
the range of float64.
269
integer, unsigned integer or floating point value
270
check : {True, False}
271
If True, raise error for values that are not integers
284
>>> as_int(2.1) #doctest: +IGNORE_EXCEPTION_DETAIL
285
Traceback (most recent call last):
287
FloatingError: Not an integer: 2.1
288
>>> as_int(2.1, check=False)
292
if x.dtype.kind in 'iu':
293
# This works around a nasty numpy 1.4.1 bug such that:
294
# >>> int(np.uint32(2**32-1)
301
if check and fx != x:
302
raise FloatingError('Not an integer: %s' % x)
303
if not fx.dtype.type == np.longdouble:
305
# Subtract float64 chunks until we have all of the number. If the int is too
306
# large, it will overflow
315
def int_to_float(val, flt_type):
316
""" Convert integer `val` to floating point type `flt_type`
318
Why is this so complicated?
320
At least in numpy <= 1.6.1, numpy longdoubles do not correctly convert to
321
ints, and ints do not correctly convert to longdoubles. Specifically, in
322
both cases, the values seem to go through float64 conversion on the way, so
323
to convert better, we need to split into float64s and sum up the result.
330
numpy floating point type
337
if not flt_type is np.longdouble:
339
faval = np.longdouble(0)
341
f64 = np.float64(val)
347
def floor_exact(val, flt_type):
348
""" Return nearest exact integer <= `val` in float type `flt_type`
353
We have to pass val as an int rather than the floating point type
354
because large integers cast as floating point may be rounded by the
356
flt_type : numpy type
362
value of same floating point type as `val`, that is the nearest exact
363
integer in this type such that `floor_val` <= `val`. Thus if `val` is
364
exact in `flt_type`, `floor_val` == `val`.
368
Obviously 2 is within the range of representable integers for float32
370
>>> floor_exact(2, np.float32)
373
As is 2**24-1 (the number of significand digits is 23 + 1 implicit)
375
>>> floor_exact(2**24-1, np.float32) == 2**24-1
378
But 2**24+1 gives a number that float32 can't represent exactly
380
>>> floor_exact(2**24+1, np.float32) == 2**24
383
As for the numpy floor function, negatives floor towards -inf
385
>>> floor_exact(-2**24-1, np.float32) == -2**24-2
389
flt_type = np.dtype(flt_type).type
390
sign = 1 if val > 0 else -1
391
try: # int_to_float deals with longdouble safely
392
fval = int_to_float(val, flt_type)
393
except OverflowError:
395
if not np.isfinite(fval):
397
info = type_info(flt_type)
398
diff = val - as_int(fval)
399
if diff >= 0: # floating point value <= val
401
# Float casting made the value go up
402
biggest_gap = 2**(floor_log2(val) - info['nmant'])
403
assert biggest_gap > 1
404
fval -= flt_type(biggest_gap)
408
def ceil_exact(val, flt_type):
409
""" Return nearest exact integer >= `val` in float type `flt_type`
414
We have to pass val as an int rather than the floating point type
415
because large integers cast as floating point may be rounded by the
417
flt_type : numpy type
423
value of same floating point type as `val`, that is the nearest exact
424
integer in this type such that `floor_val` >= `val`. Thus if `val` is
425
exact in `flt_type`, `ceil_val` == `val`.
429
Obviously 2 is within the range of representable integers for float32
431
>>> ceil_exact(2, np.float32)
434
As is 2**24-1 (the number of significand digits is 23 + 1 implicit)
436
>>> ceil_exact(2**24-1, np.float32) == 2**24-1
439
But 2**24+1 gives a number that float32 can't represent exactly
441
>>> ceil_exact(2**24+1, np.float32) == 2**24+2
444
As for the numpy ceil function, negatives ceil towards inf
446
>>> ceil_exact(-2**24-1, np.float32) == -2**24
449
return -floor_exact(-val, flt_type)
453
""" Absolute values of array taking care of max negative int values
462
array the same shape as `arr` in which all negative numbers have been
463
changed to positive numbers with the magnitude.
467
This kind of thing is confusing in base numpy:
469
>>> import numpy as np
470
>>> np.abs(np.int8(-128))
473
``int_abs`` fixes that:
475
>>> int_abs(np.int8(-128))
477
>>> int_abs(np.array([-128, 127], dtype=np.int8))
478
array([128, 127], dtype=uint8)
479
>>> int_abs(np.array([-128, 127], dtype=np.float32))
480
array([ 128., 127.], dtype=float32)
482
arr = np.array(arr, copy=False)
487
return np.absolute(arr)
488
out = arr.astype(np.dtype(dt.str.replace('i', 'u')))
489
return np.choose(arr < 0, (arr, arr * -1), out=out)
493
""" floor of log2 of abs(`x`)
495
Embarrassingly, from http://en.wikipedia.org/wiki/Binary_logarithm
504
floor of base 2 log of `x`. None if `x` == 0.
508
>>> floor_log2(2**9+1)
510
>>> floor_log2(-2**9+1)
514
>>> floor_log2(0) is None
533
""" Floating point type with best precision
535
This is nearly always np.longdouble, except on Windows, where np.longdouble
536
is Intel80 storage, but with float64 precision for calculations. In that
537
case we return float64 on the basis it's the fastest and smallest at the
542
best_type : numpy type
543
floating point type with highest precision
545
if (type_info(np.longdouble)['nmant'] > type_info(np.float64)['nmant'] and
546
machine() != 'sparc64'): # sparc has crazy-slow float128
552
""" Return floating point types sorted by precision
554
Remove longdouble if it has no higher precision than float64
556
floats = sorted(np.sctypes['float'], key=lambda f : type_info(f)['nmant'])
557
if best_float() != np.longdouble and np.longdouble in floats:
558
floats.remove(np.longdouble)
562
OK_FLOATS = ok_floats()
565
def able_int_type(values):
566
""" Find the smallest integer numpy type to contain sequence `values`
568
Prefers uint to int if minimum is >= 0
573
sequence of integer values
577
itype : None or numpy type
578
numpy integer type or None if no integer type holds all `values`
582
>>> able_int_type([0, 1]) == np.uint8
584
>>> able_int_type([-1, 1]) == np.int8
587
if any([v % 1 for v in values]):
592
for ityp in np.sctypes['uint']:
593
if mx <= np.iinfo(ityp).max:
595
for ityp in np.sctypes['int']:
596
info = np.iinfo(ityp)
597
if mn >= info.min and mx <= info.max:
602
def ulp(val=np.float64(1.0)):
603
""" Return gap between `val` and nearest representable number of same type
605
This is the value of a unit in the last place (ULP), and is similar in
606
meaning to the MATLAB eps function.
610
val : scalar, optional
611
scalar value of any numpy type. Default is 1.0 (float64)
616
gap between `val` and nearest representable number of same type
620
The wikipedia article on machine epsilon points out that the term *epsilon*
621
can be used in the sense of a unit in the last place (ULP), or as the
622
maximum relative rounding error. The MATLAB ``eps`` function uses the ULP
623
meaning, but this function is ``ulp`` rather than ``eps`` to avoid confusion
624
between different meanings of *eps*.
627
if not np.isfinite(val):
629
if val.dtype.kind in 'iu':
632
info = type_info(val.dtype)
633
fl2 = floor_log2(aval)
634
if fl2 is None or fl2 < info['minexp']: # subnormal
636
# 'nmant' value does not include implicit first bit
637
return 2**(fl2 - info['nmant'])