707
/********* GENERIC UFUNC USING ITERATOR *********/
1069
* Concatenate the loop and core dimensions of
1070
* PyArrayMultiIterObject's iarg-th argument, to recover a full
1071
* dimension array (used for output arguments).
710
* Parses the positional and keyword arguments for a generic ufunc call.
712
* Note that if an error is returned, the caller must free the
713
* non-zero references in out_op. This
714
* function does not do its own clean-up.
1074
_compute_output_dims(PyUFuncLoopObject *loop, int iarg,
1075
int *out_nd, npy_intp *tmp_dims)
1078
PyUFuncObject *ufunc = loop->ufunc;
1079
if (ufunc->core_enabled == 0) {
1080
/* case of ufunc with trivial core-signature */
1082
return loop->dimensions;
1085
*out_nd = loop->nd + ufunc->core_num_dims[iarg];
1086
if (*out_nd > NPY_MAXARGS) {
1087
PyErr_SetString(PyExc_ValueError,
1088
"dimension of output variable exceeds limit");
1092
/* copy loop dimensions */
1093
memcpy(tmp_dims, loop->dimensions, sizeof(npy_intp) * loop->nd);
1095
/* copy core dimension */
1096
for (i = 0; i < ufunc->core_num_dims[iarg]; i++) {
1097
tmp_dims[loop->nd + i] = loop->core_dim_sizes[1 +
1098
ufunc->core_dim_ixs[ufunc->core_offsets[iarg] + i]];
1103
/* Check and set core_dim_sizes and core_strides for the i-th argument. */
1105
_compute_dimension_size(PyUFuncLoopObject *loop, PyArrayObject **mps, int i)
1107
PyUFuncObject *ufunc = loop->ufunc;
1108
int j = ufunc->core_offsets[i];
1109
int k = PyArray_NDIM(mps[i]) - ufunc->core_num_dims[i];
1111
for (ind = 0; ind < ufunc->core_num_dims[i]; ind++, j++, k++) {
1112
npy_intp dim = k < 0 ? 1 : PyArray_DIM(mps[i], k);
1113
/* First element of core_dim_sizes will be used for looping */
1114
int dim_ix = ufunc->core_dim_ixs[j] + 1;
1115
if (loop->core_dim_sizes[dim_ix] == 1) {
1116
/* broadcast core dimension */
1117
loop->core_dim_sizes[dim_ix] = dim;
1119
else if (dim != 1 && dim != loop->core_dim_sizes[dim_ix]) {
1120
PyErr_SetString(PyExc_ValueError, "core dimensions mismatch");
1123
/* First ufunc->nargs elements will be used for looping */
1124
loop->core_strides[ufunc->nargs + j] =
1125
dim == 1 ? 0 : PyArray_STRIDE(mps[i], k);
1130
/* Return a view of array "ap" with "core_nd" dimensions cut from tail. */
1131
static PyArrayObject *
1132
_trunc_coredim(PyArrayObject *ap, int core_nd)
1135
int nd = ap->nd - core_nd;
1140
/* The following code is basically taken from PyArray_Transpose */
1141
/* NewFromDescr will steal this reference */
1142
Py_INCREF(ap->descr);
1143
ret = (PyArrayObject *)
1144
PyArray_NewFromDescr(Py_TYPE(ap), ap->descr,
1146
ap->strides, ap->data, ap->flags,
1151
/* point at true owner of memory: */
1152
ret->base = (PyObject *)ap;
1154
PyArray_UpdateFlags(ret, CONTIGUOUS | FORTRAN);
1159
construct_arrays(PyUFuncLoopObject *loop, PyObject *args, PyArrayObject **mps,
1164
int arg_types[NPY_MAXARGS];
1165
PyArray_SCALARKIND scalars[NPY_MAXARGS];
1166
PyArray_SCALARKIND maxarrkind, maxsckind, new;
1167
PyUFuncObject *self = loop->ufunc;
1168
Bool allscalars = TRUE;
1169
PyTypeObject *subtype = &PyArray_Type;
1170
PyObject *context = NULL;
1175
npy_intp temp_dims[NPY_MAXDIMS];
1178
PyObject *wraparr[NPY_MAXARGS];
716
static int get_ufunc_arguments(PyUFuncObject *self,
717
PyObject *args, PyObject *kwds,
718
PyArrayObject **out_op,
719
NPY_ORDER *out_order,
720
NPY_CASTING *out_casting,
721
PyObject **out_extobj,
722
PyObject **out_typetup,
726
npy_intp i, nargs, nin = self->nin;
727
PyObject *obj, *context;
728
PyObject *str_key_obj = NULL;
731
int any_flexible = 0, any_object = 0;
733
ufunc_name = self->name ? self->name : "<unnamed ufunc>";
1180
735
/* Check number of arguments */
1181
736
nargs = PyTuple_Size(args);
1182
if ((nargs < self->nin) || (nargs > self->nargs)) {
737
if ((nargs < nin) || (nargs > self->nargs)) {
1183
738
PyErr_SetString(PyExc_ValueError, "invalid number of arguments");
1187
/* Get each input argument */
1188
maxarrkind = PyArray_NOSCALAR;
1189
maxsckind = PyArray_NOSCALAR;
1190
for(i = 0; i < self->nin; i++) {
1191
obj = PyTuple_GET_ITEM(args,i);
742
/* Get input arguments */
743
for(i = 0; i < nin; ++i) {
744
obj = PyTuple_GET_ITEM(args, i);
1192
745
if (!PyArray_Check(obj) && !PyArray_IsScalar(obj, Generic)) {
747
* TODO: There should be a comment here explaining what
1193
750
context = Py_BuildValue("OOi", self, args, i);
751
if (context == NULL) {
1198
mps[i] = (PyArrayObject *)PyArray_FromAny(obj, NULL, 0, 0, 0, context);
758
out_op[i] = (PyArrayObject *)PyArray_FromAny(obj,
759
NULL, 0, 0, 0, context);
1199
760
Py_XDECREF(context);
1200
if (mps[i] == NULL) {
1203
arg_types[i] = PyArray_TYPE(mps[i]);
1204
if (!flexible && PyTypeNum_ISFLEXIBLE(arg_types[i])) {
1207
if (!object && PyTypeNum_ISOBJECT(arg_types[i])) {
1212
* fprintf(stderr, "array %d has reference %d\n", i,
1213
* (mps[i])->ob_refcnt);
1217
* Scalars are 0-dimensional arrays at this point
1221
* We need to keep track of whether or not scalars
1222
* are mixed with arrays of different kinds.
1225
if (mps[i]->nd > 0) {
1226
scalars[i] = PyArray_NOSCALAR;
1228
new = PyArray_ScalarKind(arg_types[i], NULL);
1229
maxarrkind = NPY_MAX(new, maxarrkind);
1232
scalars[i] = PyArray_ScalarKind(arg_types[i], &(mps[i]));
1233
maxsckind = NPY_MAX(scalars[i], maxsckind);
1237
/* We don't do strings */
1238
if (flexible && !object) {
1239
loop->notimplemented = 1;
1244
* If everything is a scalar, or scalars mixed with arrays of
1245
* different kinds of lesser kinds then use normal coercion rules
1247
if (allscalars || (maxsckind > maxarrkind)) {
1248
for (i = 0; i < self->nin; i++) {
1249
scalars[i] = PyArray_NOSCALAR;
1253
/* Select an appropriate function for these argument types. */
1254
if (select_types(loop->ufunc, arg_types, &(loop->function),
1255
&(loop->funcdata), scalars, typetup) == -1) {
1259
* FAIL with NotImplemented if the other object has
1260
* the __r<op>__ method and has __array_priority__ as
1261
* an attribute (signalling it can handle ndarray's)
1262
* and is not already an ndarray or a subtype of the same type.
1264
if ((arg_types[1] == PyArray_OBJECT)
1265
&& (loop->ufunc->nin==2) && (loop->ufunc->nout == 1)) {
1266
PyObject *_obj = PyTuple_GET_ITEM(args, 1);
1267
if (!PyArray_CheckExact(_obj)
1268
/* If both are same subtype of object arrays, then proceed */
1269
&& !(Py_TYPE(_obj) == Py_TYPE(PyTuple_GET_ITEM(args, 0)))
1270
&& PyObject_HasAttrString(_obj, "__array_priority__")
1271
&& _has_reflected_op(_obj, loop->ufunc->name)) {
1272
loop->notimplemented = 1;
1278
* Create copies for some of the arrays if they are small
1279
* enough and not already contiguous
1281
if (_create_copies(loop, arg_types, mps) < 0) {
1286
* Only use loop dimensions when constructing Iterator:
1287
* temporarily replace mps[i] (will be recovered below).
1289
if (self->core_enabled) {
1290
for (i = 0; i < self->nin; i++) {
1293
if (_compute_dimension_size(loop, mps, i) < 0) {
1296
ao = _trunc_coredim(mps[i], self->core_num_dims[i]);
1304
/* Create Iterators for the Inputs */
1305
for (i = 0; i < self->nin; i++) {
1306
loop->iters[i] = (PyArrayIterObject *)
1307
PyArray_IterNew((PyObject *)mps[i]);
1308
if (loop->iters[i] == NULL) {
1313
/* Recover mps[i]. */
1314
if (self->core_enabled) {
1315
for (i = 0; i < self->nin; i++) {
1316
PyArrayObject *ao = mps[i];
1317
mps[i] = (PyArrayObject *)mps[i]->base;
1322
/* Broadcast the result */
1323
loop->numiter = self->nin;
1324
if (PyArray_Broadcast((PyArrayMultiIterObject *)loop) < 0) {
1328
/* Get any return arguments */
1329
for (i = self->nin; i < nargs; i++) {
1330
mps[i] = (PyArrayObject *)PyTuple_GET_ITEM(args, i);
1331
if (((PyObject *)mps[i])==Py_None) {
761
if (out_op[i] == NULL) {
765
PyTypeNum_ISFLEXIBLE(PyArray_DESCR(out_op[i])->type_num)) {
769
PyTypeNum_ISOBJECT(PyArray_DESCR(out_op[i])->type_num)) {
775
* Indicate not implemented if there are flexible objects (structured
776
* type or string) but no object types.
778
* Not sure - adding this increased to 246 errors, 150 failures.
780
if (any_flexible && !any_object) {
785
/* Get positional output arguments */
786
for (i = nin; i < nargs; ++i) {
787
obj = PyTuple_GET_ITEM(args, i);
788
/* Translate None to NULL */
789
if (obj == Py_None) {
1336
if (!PyArray_Check((PyObject *)mps[i])) {
1338
if (PyArrayIter_Check(mps[i])) {
1339
new = PyObject_CallMethod((PyObject *)mps[i],
1342
mps[i] = (PyArrayObject *)new;
1345
PyErr_SetString(PyExc_TypeError,
1346
"return arrays must be "\
1354
if (self->core_enabled) {
1355
if (_compute_dimension_size(loop, mps, i) < 0) {
1359
out_dims = _compute_output_dims(loop, i, &out_nd, temp_dims);
1363
if (mps[i]->nd != out_nd
1364
|| !PyArray_CompareLists(mps[i]->dimensions, out_dims, out_nd)) {
1365
PyErr_SetString(PyExc_ValueError, "invalid return array shape");
1370
if (!PyArray_ISWRITEABLE(mps[i])) {
1371
PyErr_SetString(PyExc_ValueError, "return array is not writeable");
1378
/* construct any missing return arrays and make output iterators */
1379
for(i = self->nin; i < self->nargs; i++) {
1380
PyArray_Descr *ntype;
1382
if (mps[i] == NULL) {
1383
out_dims = _compute_output_dims(loop, i, &out_nd, temp_dims);
1387
mps[i] = (PyArrayObject *)PyArray_New(subtype,
1393
if (mps[i] == NULL) {
1399
* reset types for outputs that are equivalent
1400
* -- no sense casting uselessly
792
/* If it's an array, can use it */
793
if (PyArray_Check(obj)) {
794
if (!PyArray_ISWRITEABLE(obj)) {
795
PyErr_SetString(PyExc_ValueError,
796
"return array is not writeable");
800
out_op[i] = (PyArrayObject *)obj;
1403
if (mps[i]->descr->type_num != arg_types[i]) {
1404
PyArray_Descr *atype;
1405
ntype = mps[i]->descr;
1406
atype = PyArray_DescrFromType(arg_types[i]);
1407
if (PyArray_EquivTypes(atype, ntype)) {
1408
arg_types[i] = ntype->type_num;
1413
/* still not the same -- or will we have to use buffers?*/
1414
if (mps[i]->descr->type_num != arg_types[i]
1415
|| !PyArray_ISBEHAVED_RO(mps[i])) {
1416
if (loop->size < loop->bufsize || self->core_enabled) {
1419
* Copy the array to a temporary copy
1420
* and set the UPDATEIFCOPY flag
1422
ntype = PyArray_DescrFromType(arg_types[i]);
1423
new = PyArray_FromAny((PyObject *)mps[i],
1425
FORCECAST | ALIGNED |
1426
UPDATEIFCOPY, NULL);
1431
mps[i] = (PyArrayObject *)new;
1436
if (self->core_enabled) {
1439
/* computer for all output arguments, and set strides in "loop" */
1440
if (_compute_dimension_size(loop, mps, i) < 0) {
1443
ao = _trunc_coredim(mps[i], self->core_num_dims[i]);
1447
/* Temporarily modify mps[i] for constructing iterator. */
1451
loop->iters[i] = (PyArrayIterObject *)
1452
PyArray_IterNew((PyObject *)mps[i]);
1453
if (loop->iters[i] == NULL) {
1457
/* Recover mps[i]. */
1458
if (self->core_enabled) {
1459
PyArrayObject *ao = mps[i];
1460
mps[i] = (PyArrayObject *)mps[i]->base;
1467
* Use __array_prepare__ on all outputs
1468
* if present on one of the input arguments.
1469
* If present for multiple inputs:
1470
* use __array_prepare__ of input object with largest
1471
* __array_priority__ (default = 0.0)
1473
* Exception: we should not wrap outputs for items already
1474
* passed in as output-arguments. These items should either
1475
* be left unwrapped or wrapped by calling their own __array_prepare__
1478
* For each output argument, wrap will be either
1479
* NULL --- call PyArray_Return() -- default if no output arguments given
1480
* None --- array-object passed in don't call PyArray_Return
1481
* method --- the __array_prepare__ method to call.
1483
_find_array_prepare(args, wraparr, loop->ufunc->nin, loop->ufunc->nout);
1486
for (i = 0; i < loop->ufunc->nout; i++) {
1487
int j = loop->ufunc->nin+i;
1492
if (wrap == Py_None) {
1496
res = PyObject_CallFunction(wrap, "O(OOi)",
1497
mps[j], loop->ufunc, args, i);
1499
if ((res == NULL) || (res == Py_None)) {
1500
if (!PyErr_Occurred()){
1501
PyErr_SetString(PyExc_TypeError,
1502
"__array_prepare__ must return an ndarray or subclass thereof");
1507
mps[j] = (PyArrayObject *)res;
1512
* If any of different type, or misaligned or swapped
1513
* then must use buffers
1517
/* Determine looping method needed */
1518
loop->meth = NO_UFUNCLOOP;
1519
if (loop->size == 0) {
1522
if (self->core_enabled) {
1523
loop->meth = SIGNATURE_NOBUFFER_UFUNCLOOP;
1525
for (i = 0; i < self->nargs; i++) {
1526
loop->needbuffer[i] = 0;
1527
if (arg_types[i] != mps[i]->descr->type_num
1528
|| !PyArray_ISBEHAVED_RO(mps[i])) {
1529
if (self->core_enabled) {
1530
PyErr_SetString(PyExc_RuntimeError,
1531
"never reached; copy should have been made");
1534
loop->meth = BUFFER_UFUNCLOOP;
1535
loop->needbuffer[i] = 1;
1537
if (!(loop->obj & UFUNC_OBJ_ISOBJECT)
1538
&& ((mps[i]->descr->type_num == PyArray_OBJECT)
1539
|| (arg_types[i] == PyArray_OBJECT))) {
1540
loop->obj = UFUNC_OBJ_ISOBJECT|UFUNC_OBJ_NEEDS_API;
1544
if (self->core_enabled && (loop->obj & UFUNC_OBJ_ISOBJECT)) {
1545
PyErr_SetString(PyExc_TypeError,
1546
"Object type not allowed in ufunc with signature");
1549
if (loop->meth == NO_UFUNCLOOP) {
1550
loop->meth = ONE_UFUNCLOOP;
1552
/* All correct type and BEHAVED */
1553
/* Check for non-uniform stridedness */
1554
for (i = 0; i < self->nargs; i++) {
1555
if (!(loop->iters[i]->contiguous)) {
1557
* May still have uniform stride
1558
* if (broadcast result) <= 1-d
1560
if (mps[i]->nd != 0 && \
1561
(loop->iters[i]->nd_m1 > 0)) {
1562
loop->meth = NOBUFFER_UFUNCLOOP;
1567
if (loop->meth == ONE_UFUNCLOOP) {
1568
for (i = 0; i < self->nargs; i++) {
1569
loop->bufptr[i] = mps[i]->data;
1574
loop->numiter = self->nargs;
1577
if (loop->meth == SIGNATURE_NOBUFFER_UFUNCLOOP && loop->nd == 0) {
1578
/* Use default core_strides */
1580
else if (loop->meth != ONE_UFUNCLOOP) {
1584
PyArrayIterObject *it;
1585
intp stride_sum[NPY_MAXDIMS];
1591
* Optimize axis the iteration takes place over
1593
* The first thought was to have the loop go
1594
* over the largest dimension to minimize the number of loops
1596
* However, on processors with slow memory bus and cache,
1597
* the slowest loops occur when the memory access occurs for
1600
* Thus, choose the axis for which strides of the last iterator is
1601
* smallest but non-zero.
1603
for (i = 0; i < loop->nd; i++) {
1605
for (j = 0; j < loop->numiter; j++) {
1606
stride_sum[i] += loop->iters[j]->strides[i];
1610
ldim = loop->nd - 1;
1611
minsum = stride_sum[loop->nd - 1];
1612
for (i = loop->nd - 2; i >= 0; i--) {
1613
if (stride_sum[i] < minsum ) {
1615
minsum = stride_sum[i];
1618
maxdim = loop->dimensions[ldim];
1619
loop->size /= maxdim;
1620
loop->bufcnt = maxdim;
1621
loop->lastdim = ldim;
1624
* Fix the iterators so the inner loop occurs over the
1625
* largest dimensions -- This can be done by
1626
* setting the size to 1 in that dimension
1627
* (just in the iterators)
1629
for (i = 0; i < loop->numiter; i++) {
1630
it = loop->iters[i];
1632
it->size /= (it->dims_m1[ldim] + 1);
1633
it->dims_m1[ldim] = 0;
1634
it->backstrides[ldim] = 0;
1637
* (won't fix factors because we
1638
* don't use PyArray_ITER_GOTO1D
1639
* so don't change them)
1641
* Set the steps to the strides in that dimension
1643
loop->steps[i] = it->strides[ldim];
1647
* Set looping part of core_dim_sizes and core_strides.
1649
if (loop->meth == SIGNATURE_NOBUFFER_UFUNCLOOP) {
1650
loop->core_dim_sizes[0] = maxdim;
1651
for (i = 0; i < self->nargs; i++) {
1652
loop->core_strides[i] = loop->steps[i];
1657
* fix up steps where we will be copying data to
1658
* buffers and calculate the ninnerloops and leftover
1659
* values -- if step size is already zero that is not changed...
1661
if (loop->meth == BUFFER_UFUNCLOOP) {
1662
loop->leftover = maxdim % loop->bufsize;
1663
loop->ninnerloops = (maxdim / loop->bufsize) + 1;
1664
for (i = 0; i < self->nargs; i++) {
1665
if (loop->needbuffer[i] && loop->steps[i]) {
1666
loop->steps[i] = mps[i]->descr->elsize;
1668
/* These are changed later if casting is needed */
1672
else if (loop->meth == ONE_UFUNCLOOP) {
1673
/* uniformly-strided case */
1674
for (i = 0; i < self->nargs; i++) {
1675
if (PyArray_SIZE(mps[i]) == 1) {
1679
loop->steps[i] = mps[i]->strides[mps[i]->nd - 1];
1685
/* Finally, create memory for buffers if we need them */
1688
* Buffers for scalars are specially made small -- scalars are
1689
* not copied multiple times
1691
if (loop->meth == BUFFER_UFUNCLOOP) {
1692
int cnt = 0, cntcast = 0;
1693
int scnt = 0, scntcast = 0;
1696
int last_was_scalar = 0;
1697
int last_cast_was_scalar = 0;
1700
int scbufsize = 4*sizeof(double);
1702
PyArray_Descr *descr;
1704
/* compute the element size */
1705
for (i = 0; i < self->nargs; i++) {
1706
if (!loop->needbuffer[i]) {
1709
if (arg_types[i] != mps[i]->descr->type_num) {
1710
descr = PyArray_DescrFromType(arg_types[i]);
1711
if (loop->steps[i]) {
1712
cntcast += descr->elsize;
1715
scntcast += descr->elsize;
1717
if (i < self->nin) {
1718
loop->cast[i] = PyArray_GetCastFunc(mps[i]->descr,
1722
loop->cast[i] = PyArray_GetCastFunc \
1723
(descr, mps[i]->descr->type_num);
1726
if (!loop->cast[i]) {
1730
loop->swap[i] = !(PyArray_ISNOTSWAPPED(mps[i]));
1731
if (loop->steps[i]) {
1732
cnt += mps[i]->descr->elsize;
1735
scnt += mps[i]->descr->elsize;
1738
memsize = loop->bufsize*(cnt+cntcast) + scbufsize*(scnt+scntcast);
1739
loop->buffer[0] = PyDataMem_NEW(memsize);
1743
* fprintf(stderr, "Allocated buffer at %p of size %d, cnt=%d, cntcast=%d\n",
1744
* loop->buffer[0], loop->bufsize * (cnt + cntcast), cnt, cntcast);
1746
if (loop->buffer[0] == NULL) {
1750
if (loop->obj & UFUNC_OBJ_ISOBJECT) {
1751
memset(loop->buffer[0], 0, memsize);
1753
castptr = loop->buffer[0] + loop->bufsize*cnt + scbufsize*scnt;
1754
bufptr = loop->buffer[0];
1756
for (i = 0; i < self->nargs; i++) {
1757
if (!loop->needbuffer[i]) {
1760
loop->buffer[i] = bufptr + (last_was_scalar ? scbufsize :
1761
loop->bufsize)*oldbufsize;
1762
last_was_scalar = (loop->steps[i] == 0);
1763
bufptr = loop->buffer[i];
1764
oldbufsize = mps[i]->descr->elsize;
1765
/* fprintf(stderr, "buffer[%d] = %p\n", i, loop->buffer[i]); */
1766
if (loop->cast[i]) {
1767
PyArray_Descr *descr;
1768
loop->castbuf[i] = castptr + (last_cast_was_scalar ? scbufsize :
1769
loop->bufsize)*oldsize;
1770
last_cast_was_scalar = last_was_scalar;
1771
/* fprintf(stderr, "castbuf[%d] = %p\n", i, loop->castbuf[i]); */
1772
descr = PyArray_DescrFromType(arg_types[i]);
1773
oldsize = descr->elsize;
1775
loop->bufptr[i] = loop->castbuf[i];
1776
castptr = loop->castbuf[i];
1777
if (loop->steps[i]) {
1778
loop->steps[i] = oldsize;
1782
loop->bufptr[i] = loop->buffer[i];
1784
if (!loop->objfunc && (loop->obj & UFUNC_OBJ_ISOBJECT)) {
1785
if (arg_types[i] == PyArray_OBJECT) {
1792
if (_does_loop_use_arrays(loop->funcdata)) {
1793
loop->funcdata = (void*)mps;
1800
ufuncreduce_dealloc(PyUFuncReduceObject *self)
1803
Py_XDECREF(self->it);
1804
Py_XDECREF(self->rit);
1805
Py_XDECREF(self->ret);
1806
Py_XDECREF(self->errobj);
1807
Py_XDECREF(self->decref);
1809
PyDataMem_FREE(self->buffer);
1811
Py_DECREF(self->ufunc);
1817
ufuncloop_dealloc(PyUFuncLoopObject *self)
1821
if (self->ufunc != NULL) {
1822
if (self->core_dim_sizes) {
1823
_pya_free(self->core_dim_sizes);
1825
if (self->core_strides) {
1826
_pya_free(self->core_strides);
1828
for (i = 0; i < self->ufunc->nargs; i++) {
1829
Py_XDECREF(self->iters[i]);
1831
if (self->buffer[0]) {
1832
PyDataMem_FREE(self->buffer[0]);
1834
Py_XDECREF(self->errobj);
1835
Py_DECREF(self->ufunc);
1840
static PyUFuncLoopObject *
1841
construct_loop(PyUFuncObject *self, PyObject *args, PyObject *kwds, PyArrayObject **mps)
1843
PyUFuncLoopObject *loop;
1845
PyObject *typetup = NULL;
1846
PyObject *extobj = NULL;
1850
PyErr_SetString(PyExc_ValueError, "function not supported");
1853
if ((loop = _pya_malloc(sizeof(PyUFuncLoopObject))) == NULL) {
1861
loop->buffer[0] = NULL;
1862
for (i = 0; i < self->nargs; i++) {
1863
loop->iters[i] = NULL;
1864
loop->cast[i] = NULL;
1866
loop->errobj = NULL;
1867
loop->notimplemented = 0;
1869
loop->core_dim_sizes = NULL;
1870
loop->core_strides = NULL;
1872
if (self->core_enabled) {
1873
int num_dim_ix = 1 + self->core_num_dim_ix;
1874
int nstrides = self->nargs + self->core_offsets[self->nargs - 1]
1875
+ self->core_num_dims[self->nargs - 1];
1876
loop->core_dim_sizes = _pya_malloc(sizeof(npy_intp)*num_dim_ix);
1877
loop->core_strides = _pya_malloc(sizeof(npy_intp)*nstrides);
1878
if (loop->core_dim_sizes == NULL || loop->core_strides == NULL) {
1882
memset(loop->core_strides, 0, sizeof(npy_intp) * nstrides);
1883
for (i = 0; i < num_dim_ix; i++) {
1884
loop->core_dim_sizes[i] = 1;
1887
name = self->name ? self->name : "";
1890
* Extract sig= keyword and extobj= keyword if present.
803
PyErr_SetString(PyExc_TypeError,
804
"return arrays must be "
811
* Get keyword output and other arguments.
1891
812
* Raise an error if anything else is present in the
1892
* keyword dictionary
813
* keyword dictionary.
1894
815
if (kwds != NULL) {
1895
816
PyObject *key, *value;
1896
817
Py_ssize_t pos = 0;
1897
818
while (PyDict_Next(kwds, &pos, &key, &value)) {
1898
char *keystring = PyString_AsString(key);
1900
if (keystring == NULL) {
1902
PyErr_SetString(PyExc_TypeError, "invalid keyword");
1905
if (strncmp(keystring,"extobj",6) == 0) {
1908
else if (strncmp(keystring,"sig",3) == 0) {
1912
char *format = "'%s' is an invalid keyword to %s";
1913
PyErr_Format(PyExc_TypeError,format,keystring, name);
819
Py_ssize_t length = 0;
823
#if defined(NPY_PY3K)
824
Py_XDECREF(str_key_obj);
825
str_key_obj = PyUnicode_AsASCIIString(key);
826
if (str_key_obj != NULL) {
831
if (PyBytes_AsStringAndSize(key, &str, &length) == -1) {
832
PyErr_SetString(PyExc_TypeError, "invalid keyword argument");
838
/* Provides a policy for allowed casting */
839
if (strncmp(str,"casting",7) == 0) {
840
if (!PyArray_CastingConverter(value, out_casting)) {
848
* Overrides the global parameters buffer size,
849
* error mask, and error object
851
if (strncmp(str,"extobj",6) == 0) {
857
/* First output may be specified as a keyword parameter */
858
if (strncmp(str,"out",3) == 0) {
859
if (out_op[nin] != NULL) {
860
PyErr_SetString(PyExc_ValueError,
861
"cannot specify 'out' as both a "
862
"positional and keyword argument");
866
if (PyArray_Check(value)) {
867
if (!PyArray_ISWRITEABLE(value)) {
868
PyErr_SetString(PyExc_ValueError,
869
"return array is not writeable");
873
out_op[nin] = (PyArrayObject *)value;
876
PyErr_SetString(PyExc_TypeError,
877
"return arrays must be "
883
/* Allows the default output layout to be overridden */
884
else if (strncmp(str,"order",5) == 0) {
885
if (!PyArray_OrderConverter(value, out_order)) {
892
/* Allows a specific function inner loop to be selected */
893
if (strncmp(str,"sig",3) == 0) {
894
if (*out_typetup != NULL) {
895
PyErr_SetString(PyExc_RuntimeError,
896
"cannot specify both 'sig' and 'dtype'");
899
*out_typetup = value;
903
else if (strncmp(str,"subok",5) == 0) {
904
if (!PyBool_Check(value)) {
905
PyErr_SetString(PyExc_TypeError,
906
"'subok' must be a boolean");
909
*out_subok = (value == Py_True);
914
/* Another way to specify 'sig' */
915
if (strncmp(str,"dtype",5) == 0) {
916
/* Allow this parameter to be None */
917
PyArray_Descr *dtype;
918
if (!PyArray_DescrConverter2(value, &dtype)) {
922
if (*out_typetup != NULL) {
923
PyErr_SetString(PyExc_RuntimeError,
924
"cannot specify both 'sig' and 'dtype'");
927
*out_typetup = Py_BuildValue("(N)", dtype);
934
char *format = "'%s' is an invalid keyword to ufunc '%s'";
935
PyErr_Format(PyExc_TypeError, format, str, ufunc_name);
941
*out_any_object = any_object;
943
Py_XDECREF(str_key_obj);
947
Py_XDECREF(str_key_obj);
952
_casting_to_string(NPY_CASTING casting)
957
case NPY_EQUIV_CASTING:
959
case NPY_SAFE_CASTING:
961
case NPY_SAME_KIND_CASTING:
963
case NPY_UNSAFE_CASTING:
972
ufunc_loop_matches(PyUFuncObject *self,
974
NPY_CASTING input_casting,
975
NPY_CASTING output_casting,
979
int *out_no_castable_output,
980
char *out_err_src_typecode,
981
char *out_err_dst_typecode)
983
npy_intp i, nin = self->nin, nop = nin + self->nout;
986
* First check if all the inputs can be safely cast
987
* to the types for this function
989
for (i = 0; i < nin; ++i) {
993
* If no inputs are objects and there are more than one
994
* loop, don't allow conversion to object. The rationale
995
* behind this is mostly performance. Except for custom
996
* ufuncs built with just one object-parametered inner loop,
997
* only the types that are supported are implemented. Trying
998
* the object version of logical_or on float arguments doesn't
1001
if (types[i] == NPY_OBJECT && !any_object && self->ntypes > 1) {
1005
tmp = PyArray_DescrFromType(types[i]);
1010
#if NPY_UF_DBG_TRACING
1011
printf("Checking type for op %d, type %d: ", (int)i, (int)types[i]);
1012
PyObject_Print((PyObject *)tmp, stdout, 0);
1013
printf(", operand type: ");
1014
PyObject_Print((PyObject *)PyArray_DESCR(op[i]), stdout, 0);
1018
* If all the inputs are scalars, use the regular
1019
* promotion rules, not the special value-checking ones.
1021
if (!use_min_scalar) {
1022
if (!PyArray_CanCastTypeTo(PyArray_DESCR(op[i]), tmp,
1029
if (!PyArray_CanCastArrayTo(op[i], tmp, input_casting)) {
1036
NPY_UF_DBG_PRINT("The inputs all worked\n");
1039
* If all the inputs were ok, then check casting back to the
1042
for (i = nin; i < nop; ++i) {
1043
if (op[i] != NULL) {
1044
PyArray_Descr *tmp = PyArray_DescrFromType(types[i]);
1048
if (!PyArray_CanCastTypeTo(tmp, PyArray_DESCR(op[i]),
1050
if (!(*out_no_castable_output)) {
1051
*out_no_castable_output = 1;
1052
*out_err_src_typecode = tmp->type;
1053
*out_err_dst_typecode = PyArray_DESCR(op[i])->type;
1061
NPY_UF_DBG_PRINT("The outputs all worked\n");
1067
set_ufunc_loop_data_types(PyUFuncObject *self, PyArrayObject **op,
1068
PyArray_Descr **out_dtype,
1070
npy_intp buffersize, int *out_trivial_loop_ok)
1072
npy_intp i, nin = self->nin, nop = nin + self->nout;
1074
*out_trivial_loop_ok = 1;
1075
/* Fill the dtypes array */
1076
for (i = 0; i < nop; ++i) {
1077
out_dtype[i] = PyArray_DescrFromType(types[i]);
1078
if (out_dtype[i] == NULL) {
1082
* If the dtype doesn't match, or the array isn't aligned,
1083
* indicate that the trivial loop can't be done.
1085
if (*out_trivial_loop_ok && op[i] != NULL &&
1086
(!PyArray_ISALIGNED(op[i]) ||
1087
!PyArray_EquivTypes(out_dtype[i], PyArray_DESCR(op[i]))
1090
* If op[j] is a scalar or small one dimensional
1091
* array input, make a copy to keep the opportunity
1092
* for a trivial loop.
1094
if (i < nin && (PyArray_NDIM(op[i]) == 0 ||
1095
(PyArray_NDIM(op[i]) == 1 &&
1096
PyArray_DIM(op[i],0) <= buffersize))) {
1098
Py_INCREF(out_dtype[i]);
1099
tmp = (PyArrayObject *)
1100
PyArray_CastToType(op[i], out_dtype[i], 0);
1108
*out_trivial_loop_ok = 0;
1117
* Does a search through the arguments and the loops
1120
find_ufunc_matching_userloop(PyUFuncObject *self,
1122
NPY_CASTING input_casting,
1123
NPY_CASTING output_casting,
1124
npy_intp buffersize,
1127
PyArray_Descr **out_dtype,
1128
PyUFuncGenericFunction *out_innerloop,
1129
void **out_innerloopdata,
1130
int *out_trivial_loop_ok,
1131
int *out_no_castable_output,
1132
char *out_err_src_typecode,
1133
char *out_err_dst_typecode)
1135
npy_intp i, nin = self->nin;
1136
PyUFunc_Loop1d *funcdata;
1138
/* Use this to try to avoid repeating the same userdef loop search */
1139
int last_userdef = -1;
1141
for (i = 0; i < nin; ++i) {
1142
int type_num = PyArray_DESCR(op[i])->type_num;
1143
if (type_num != last_userdef && PyTypeNum_ISUSERDEF(type_num)) {
1144
PyObject *key, *obj;
1146
last_userdef = type_num;
1148
key = PyInt_FromLong(type_num);
1152
obj = PyDict_GetItem(self->userloops, key);
1157
funcdata = (PyUFunc_Loop1d *)NpyCapsule_AsVoidPtr(obj);
1158
while (funcdata != NULL) {
1159
int *types = funcdata->arg_types;
1160
switch (ufunc_loop_matches(self, op,
1161
input_casting, output_casting,
1162
any_object, use_min_scalar,
1164
out_no_castable_output, out_err_src_typecode,
1165
out_err_dst_typecode)) {
1171
set_ufunc_loop_data_types(self, op, out_dtype, types,
1172
buffersize, out_trivial_loop_ok);
1174
/* Save the inner loop and its data */
1175
*out_innerloop = funcdata->func;
1176
*out_innerloopdata = funcdata->data;
1178
NPY_UF_DBG_PRINT("Returning userdef inner "
1179
"loop successfully\n");
1184
funcdata = funcdata->next;
1189
/* Didn't find a match */
1194
* Does a search through the arguments and the loops
1197
find_ufunc_specified_userloop(PyUFuncObject *self,
1199
int *specified_types,
1201
NPY_CASTING casting,
1202
npy_intp buffersize,
1205
PyArray_Descr **out_dtype,
1206
PyUFuncGenericFunction *out_innerloop,
1207
void **out_innerloopdata,
1208
int *out_trivial_loop_ok)
1210
npy_intp i, j, nin = self->nin, nop = nin + self->nout;
1211
PyUFunc_Loop1d *funcdata;
1213
/* Use this to try to avoid repeating the same userdef loop search */
1214
int last_userdef = -1;
1216
int no_castable_output = 0;
1217
char err_src_typecode = '-', err_dst_typecode = '-';
1219
for (i = 0; i < nin; ++i) {
1220
int type_num = PyArray_DESCR(op[i])->type_num;
1221
if (type_num != last_userdef && PyTypeNum_ISUSERDEF(type_num)) {
1222
PyObject *key, *obj;
1224
last_userdef = type_num;
1226
key = PyInt_FromLong(type_num);
1230
obj = PyDict_GetItem(self->userloops, key);
1235
funcdata = (PyUFunc_Loop1d *)NpyCapsule_AsVoidPtr(obj);
1236
while (funcdata != NULL) {
1237
int *types = funcdata->arg_types;
1240
if (n_specified == nop) {
1241
for (j = 0; j < nop; ++j) {
1242
if (types[j] != specified_types[j]) {
1248
if (types[nin] != specified_types[0]) {
1256
switch (ufunc_loop_matches(self, op,
1258
any_object, use_min_scalar,
1260
&no_castable_output, &err_src_typecode,
1261
&err_dst_typecode)) {
1264
set_ufunc_loop_data_types(self, op, out_dtype, types,
1265
buffersize, out_trivial_loop_ok);
1267
/* Save the inner loop and its data */
1268
*out_innerloop = funcdata->func;
1269
*out_innerloopdata = funcdata->data;
1271
NPY_UF_DBG_PRINT("Returning userdef inner "
1272
"loop successfully\n");
1277
PyErr_Format(PyExc_TypeError,
1278
"found a user loop for ufunc '%s' "
1279
"matching the type-tuple, "
1280
"but the inputs and/or outputs could not be "
1281
"cast according to the casting rule",
1282
self->name ? self->name : "(unknown)");
1289
funcdata = funcdata->next;
1294
/* Didn't find a match */
1299
* Provides an ordering for the dtype 'kind' character codes, to help
1300
* determine when to use the min_scalar_type function. This groups
1301
* 'kind' into boolean, integer, floating point, and everything else.
1305
dtype_kind_to_simplified_ordering(char kind)
1311
/* Unsigned int kind */
1313
/* Signed int kind */
1328
should_use_min_scalar(PyArrayObject **op, int nop)
1330
int i, use_min_scalar, kind;
1331
int all_scalars = 1, max_scalar_kind = -1, max_array_kind = -1;
1334
* Determine if there are any scalars, and if so, whether
1335
* the maximum "kind" of the scalars surpasses the maximum
1336
* "kind" of the arrays
1340
for(i = 0; i < nop; ++i) {
1341
kind = dtype_kind_to_simplified_ordering(
1342
PyArray_DESCR(op[i])->kind);
1343
if (PyArray_NDIM(op[i]) == 0) {
1344
if (kind > max_scalar_kind) {
1345
max_scalar_kind = kind;
1350
if (kind > max_array_kind) {
1351
max_array_kind = kind;
1357
/* Indicate whether to use the min_scalar_type function */
1358
if (!all_scalars && max_array_kind >= max_scalar_kind) {
1363
return use_min_scalar;
1367
* Does a linear search for the best inner loop of the ufunc.
1368
* When op[i] is a scalar or a one dimensional array smaller than
1369
* the buffersize, and needs a dtype conversion, this function
1370
* may substitute op[i] with a version cast to the correct type. This way,
1371
* the later trivial loop detection has a higher chance of being triggered.
1373
* Note that if an error is returned, the caller must free the non-zero
1374
* references in out_dtype. This function does not do its own clean-up.
1377
find_best_ufunc_inner_loop(PyUFuncObject *self,
1379
NPY_CASTING input_casting,
1380
NPY_CASTING output_casting,
1381
npy_intp buffersize,
1383
PyArray_Descr **out_dtype,
1384
PyUFuncGenericFunction *out_innerloop,
1385
void **out_innerloopdata,
1386
int *out_trivial_loop_ok)
1388
npy_intp i, j, nin = self->nin, nop = nin + self->nout;
1389
int types[NPY_MAXARGS];
1391
int no_castable_output, use_min_scalar;
1393
/* For making a better error message on coercion error */
1394
char err_dst_typecode = '-', err_src_typecode = '-';
1396
ufunc_name = self->name ? self->name : "(unknown)";
1398
use_min_scalar = should_use_min_scalar(op, nin);
1400
/* If the ufunc has userloops, search for them. */
1401
if (self->userloops) {
1402
switch (find_ufunc_matching_userloop(self, op,
1403
input_casting, output_casting,
1404
buffersize, any_object, use_min_scalar,
1405
out_dtype, out_innerloop, out_innerloopdata,
1406
out_trivial_loop_ok,
1407
&no_castable_output, &err_src_typecode,
1408
&err_dst_typecode)) {
1412
/* A loop was found */
1419
* Determine the UFunc loop. This could in general be *much* faster,
1420
* and a better way to implement it might be for the ufunc to
1421
* provide a function which gives back the result type and inner
1424
* A default fast mechanism could be provided for functions which
1425
* follow the most typical pattern, when all functions have signatures
1426
* "xx...x -> x" for some built-in data type x, as follows.
1427
* - Use PyArray_ResultType to get the output type
1428
* - Look up the inner loop in a table based on the output type_num
1430
* The method for finding the loop in the previous code did not
1431
* appear consistent (as noted by some asymmetry in the generated
1432
* coercion tables for np.add).
1434
no_castable_output = 0;
1435
for (i = 0; i < self->ntypes; ++i) {
1436
char *orig_types = self->types + i*self->nargs;
1438
/* Copy the types into an int array for matching */
1439
for (j = 0; j < nop; ++j) {
1440
types[j] = orig_types[j];
1443
NPY_UF_DBG_PRINT1("Trying function loop %d\n", (int)i);
1444
switch (ufunc_loop_matches(self, op,
1445
input_casting, output_casting,
1446
any_object, use_min_scalar,
1448
&no_castable_output, &err_src_typecode,
1449
&err_dst_typecode)) {
1455
set_ufunc_loop_data_types(self, op, out_dtype, types,
1456
buffersize, out_trivial_loop_ok);
1458
/* Save the inner loop and its data */
1459
*out_innerloop = self->functions[i];
1460
*out_innerloopdata = self->data[i];
1462
NPY_UF_DBG_PRINT("Returning inner loop successfully\n");
1469
/* If no function was found, throw an error */
1470
NPY_UF_DBG_PRINT("No loop was found\n");
1471
if (no_castable_output) {
1472
PyErr_Format(PyExc_TypeError,
1473
"ufunc '%s' output (typecode '%c') could not be coerced to "
1474
"provided output parameter (typecode '%c') according "
1475
"to the casting rule '%s'",
1476
ufunc_name, err_src_typecode, err_dst_typecode,
1477
_casting_to_string(output_casting));
1481
* TODO: We should try again if the casting rule is same_kind
1482
* or unsafe, and look for a function more liberally.
1484
PyErr_Format(PyExc_TypeError,
1485
"ufunc '%s' not supported for the input types, and the "
1486
"inputs could not be safely coerced to any supported "
1487
"types according to the casting rule '%s'",
1489
_casting_to_string(input_casting));
1496
* Does a linear search for the inner loop of the ufunc specified by type_tup.
1497
* When op[i] is a scalar or a one dimensional array smaller than
1498
* the buffersize, and needs a dtype conversion, this function
1499
* may substitute op[i] with a version cast to the correct type. This way,
1500
* the later trivial loop detection has a higher chance of being triggered.
1502
* Note that if an error is returned, the caller must free the non-zero
1503
* references in out_dtype. This function does not do its own clean-up.
1506
find_specified_ufunc_inner_loop(PyUFuncObject *self,
1509
NPY_CASTING casting,
1510
npy_intp buffersize,
1512
PyArray_Descr **out_dtype,
1513
PyUFuncGenericFunction *out_innerloop,
1514
void **out_innerloopdata,
1515
int *out_trivial_loop_ok)
1517
npy_intp i, j, n, nin = self->nin, nop = nin + self->nout;
1518
int n_specified = 0;
1519
int specified_types[NPY_MAXARGS], types[NPY_MAXARGS];
1521
int no_castable_output, use_min_scalar;
1523
/* For making a better error message on coercion error */
1524
char err_dst_typecode = '-', err_src_typecode = '-';
1526
ufunc_name = self->name ? self->name : "(unknown)";
1528
use_min_scalar = should_use_min_scalar(op, nin);
1530
/* Fill in specified_types from the tuple or string */
1531
if (PyTuple_Check(type_tup)) {
1532
n = PyTuple_GET_SIZE(type_tup);
1533
if (n != 1 && n != nop) {
1534
PyErr_Format(PyExc_ValueError,
1535
"a type-tuple must be specified " \
1536
"of length 1 or %d for ufunc '%s'", (int)nop,
1537
self->name ? self->name : "(unknown)");
1541
for (i = 0; i < n; ++i) {
1542
PyArray_Descr *dtype = NULL;
1543
if (!PyArray_DescrConverter(PyTuple_GET_ITEM(type_tup, i),
1547
specified_types[i] = dtype->type_num;
1553
else if (PyBytes_Check(type_tup) || PyUnicode_Check(type_tup)) {
1556
PyObject *str_obj = NULL;
1558
if (PyUnicode_Check(type_tup)) {
1559
str_obj = PyUnicode_AsASCIIString(type_tup);
1560
if (str_obj == NULL) {
1566
if (!PyBytes_AsStringAndSize(type_tup, &str, &length) < 0) {
1567
Py_XDECREF(str_obj);
1570
if (length != 1 && (length != nop + 2 ||
1571
str[nin] != '-' || str[nin+1] != '>')) {
1572
PyErr_Format(PyExc_ValueError,
1573
"a type-string for %s, " \
1574
"requires 1 typecode, or "
1575
"%d typecode(s) before " \
1576
"and %d after the -> sign",
1577
self->name ? self->name : "(unknown)",
1578
self->nin, self->nout);
1579
Py_XDECREF(str_obj);
1583
PyArray_Descr *dtype;
1585
dtype = PyArray_DescrFromType(str[0]);
1586
if (dtype == NULL) {
1587
Py_XDECREF(str_obj);
1590
NPY_UF_DBG_PRINT2("signature character '%c', type num %d\n",
1591
str[0], dtype->type_num);
1592
specified_types[0] = dtype->type_num;
1596
PyArray_Descr *dtype;
1597
n_specified = (int)nop;
1599
for (i = 0; i < nop; ++i) {
1600
npy_intp istr = i < nin ? i : i+2;
1602
dtype = PyArray_DescrFromType(str[istr]);
1603
if (dtype == NULL) {
1604
Py_XDECREF(str_obj);
1607
NPY_UF_DBG_PRINT2("signature character '%c', type num %d\n",
1608
str[istr], dtype->type_num);
1609
specified_types[i] = dtype->type_num;
1613
Py_XDECREF(str_obj);
1616
/* If the ufunc has userloops, search for them. */
1617
if (self->userloops) {
1618
NPY_UF_DBG_PRINT("Searching user loops for specified sig\n");
1619
switch (find_ufunc_specified_userloop(self,
1620
n_specified, specified_types,
1622
buffersize, any_object, use_min_scalar,
1623
out_dtype, out_innerloop, out_innerloopdata,
1624
out_trivial_loop_ok)) {
1628
/* Found matching loop */
1634
NPY_UF_DBG_PRINT("Searching loops for specified sig\n");
1635
for (i = 0; i < self->ntypes; ++i) {
1636
char *orig_types = self->types + i*self->nargs;
1639
NPY_UF_DBG_PRINT1("Trying function loop %d\n", (int)i);
1641
/* Copy the types into an int array for matching */
1642
for (j = 0; j < nop; ++j) {
1643
types[j] = orig_types[j];
1646
if (n_specified == nop) {
1647
for (j = 0; j < nop; ++j) {
1648
if (types[j] != specified_types[j]) {
1654
NPY_UF_DBG_PRINT2("Specified type: %d, first output type: %d\n",
1655
specified_types[0], types[nin]);
1656
if (types[nin] != specified_types[0]) {
1664
NPY_UF_DBG_PRINT("It matches, confirming type casting\n");
1665
switch (ufunc_loop_matches(self, op,
1667
any_object, use_min_scalar,
1669
&no_castable_output, &err_src_typecode,
1670
&err_dst_typecode)) {
1676
set_ufunc_loop_data_types(self, op, out_dtype, types,
1677
buffersize, out_trivial_loop_ok);
1679
/* Save the inner loop and its data */
1680
*out_innerloop = self->functions[i];
1681
*out_innerloopdata = self->data[i];
1683
NPY_UF_DBG_PRINT("Returning specified inner loop successfully\n");
1688
PyErr_Format(PyExc_TypeError,
1689
"found a loop for ufunc '%s' "
1690
"matching the type-tuple, "
1691
"but the inputs and/or outputs could not be "
1692
"cast according to the casting rule",
1699
/* If no function was found, throw an error */
1700
NPY_UF_DBG_PRINT("No specified loop was found\n");
1702
PyErr_Format(PyExc_TypeError,
1703
"No loop matching the specified signature was found "
1704
"for ufunc %s", ufunc_name);
1710
trivial_two_operand_loop(PyArrayObject **op,
1711
PyUFuncGenericFunction innerloop,
1712
void *innerloopdata)
1715
npy_intp count[2], stride[2];
1717
NPY_BEGIN_THREADS_DEF;
1719
needs_api = PyDataType_REFCHK(PyArray_DESCR(op[0])) ||
1720
PyDataType_REFCHK(PyArray_DESCR(op[1]));
1722
PyArray_PREPARE_TRIVIAL_PAIR_ITERATION(op[0], op[1],
1725
stride[0], stride[1]);
1726
count[1] = count[0];
1727
NPY_UF_DBG_PRINT1("two operand loop count %d\n", (int)count[0]);
1733
innerloop(data, count, stride, innerloopdata);
1741
trivial_three_operand_loop(PyArrayObject **op,
1742
PyUFuncGenericFunction innerloop,
1743
void *innerloopdata)
1746
npy_intp count[3], stride[3];
1748
NPY_BEGIN_THREADS_DEF;
1750
needs_api = PyDataType_REFCHK(PyArray_DESCR(op[0])) ||
1751
PyDataType_REFCHK(PyArray_DESCR(op[1])) ||
1752
PyDataType_REFCHK(PyArray_DESCR(op[2]));
1754
PyArray_PREPARE_TRIVIAL_TRIPLE_ITERATION(op[0], op[1], op[2],
1756
data[0], data[1], data[2],
1757
stride[0], stride[1], stride[2]);
1758
count[1] = count[0];
1759
count[2] = count[0];
1760
NPY_UF_DBG_PRINT1("three operand loop count %d\n", (int)count[0]);
1766
innerloop(data, count, stride, innerloopdata);
1774
* Calls the given __array_prepare__ function on the operand *op,
1775
* substituting it in place if a new array is returned and matches
1778
* This requires that the dimensions, strides and data type remain
1779
* exactly the same, which may be more strict than before.
1782
prepare_ufunc_output(PyUFuncObject *self,
1785
PyObject *arr_prep_args,
1788
if (arr_prep != NULL && arr_prep != Py_None) {
1791
res = PyObject_CallFunction(arr_prep, "O(OOi)",
1792
*op, self, arr_prep_args, i);
1793
if ((res == NULL) || (res == Py_None) || !PyArray_Check(res)) {
1794
if (!PyErr_Occurred()){
1795
PyErr_SetString(PyExc_TypeError,
1796
"__array_prepare__ must return an "
1797
"ndarray or subclass thereof");
1803
/* If the same object was returned, nothing to do */
1804
if (res == (PyObject *)*op) {
1807
/* If the result doesn't match, throw an error */
1808
else if (PyArray_NDIM(res) != PyArray_NDIM(*op) ||
1809
!PyArray_CompareLists(PyArray_DIMS(res),
1811
PyArray_NDIM(res)) ||
1812
!PyArray_CompareLists(PyArray_STRIDES(res),
1813
PyArray_STRIDES(*op),
1814
PyArray_NDIM(res)) ||
1815
!PyArray_EquivTypes(PyArray_DESCR(res),
1816
PyArray_DESCR(*op))) {
1817
PyErr_SetString(PyExc_TypeError,
1818
"__array_prepare__ must return an "
1819
"ndarray or subclass thereof which is "
1820
"otherwise identical to its input");
1824
/* Replace the op value */
1827
*op = (PyArrayObject *)res;
1835
iterator_loop(PyUFuncObject *self,
1837
PyArray_Descr **dtype,
1839
npy_intp buffersize,
1840
PyObject **arr_prep,
1841
PyObject *arr_prep_args,
1842
PyUFuncGenericFunction innerloop,
1843
void *innerloopdata)
1845
npy_intp i, nin = self->nin, nout = self->nout;
1846
npy_intp nop = nin + nout;
1847
npy_uint32 op_flags[NPY_MAXARGS];
1849
char *baseptrs[NPY_MAXARGS];
1852
NpyIter_IterNextFunc *iternext;
1855
npy_intp *count_ptr;
1857
PyArrayObject **op_it;
1859
NPY_BEGIN_THREADS_DEF;
1861
/* Set up the flags */
1862
for (i = 0; i < nin; ++i) {
1863
op_flags[i] = NPY_ITER_READONLY|
1866
for (i = nin; i < nop; ++i) {
1867
op_flags[i] = NPY_ITER_WRITEONLY|
1870
NPY_ITER_NO_BROADCAST|
1871
NPY_ITER_NO_SUBTYPE;
1875
* Allocate the iterator. Because the types of the inputs
1876
* were already checked, we use the casting rule 'unsafe' which
1877
* is faster to calculate.
1879
iter = NpyIter_AdvancedNew(nop, op,
1880
NPY_ITER_EXTERNAL_LOOP|
1882
NPY_ITER_ZEROSIZE_OK|
1885
NPY_ITER_DELAY_BUFALLOC,
1886
order, NPY_UNSAFE_CASTING,
1888
0, NULL, NULL, buffersize);
1893
needs_api = NpyIter_IterationNeedsAPI(iter);
1895
/* Copy any allocated outputs */
1896
op_it = NpyIter_GetOperandArray(iter);
1897
for (i = nin; i < nop; ++i) {
1898
if (op[i] == NULL) {
1904
/* Call the __array_prepare__ functions where necessary */
1905
for (i = 0; i < nout; ++i) {
1906
if (prepare_ufunc_output(self, &op[nin+i],
1907
arr_prep[i], arr_prep_args, i) < 0) {
1908
NpyIter_Deallocate(iter);
1913
/* Only do the loop if the iteration size is non-zero */
1914
if (NpyIter_GetIterSize(iter) != 0) {
1916
/* Reset the iterator with the base pointers from the wrapped outputs */
1917
for (i = 0; i < nin; ++i) {
1918
baseptrs[i] = PyArray_BYTES(op_it[i]);
1920
for (i = nin; i < nop; ++i) {
1921
baseptrs[i] = PyArray_BYTES(op[i]);
1923
if (NpyIter_ResetBasePointers(iter, baseptrs, NULL) != NPY_SUCCEED) {
1924
NpyIter_Deallocate(iter);
1928
/* Get the variables needed for the loop */
1929
iternext = NpyIter_GetIterNext(iter, NULL);
1930
if (iternext == NULL) {
1931
NpyIter_Deallocate(iter);
1934
dataptr = NpyIter_GetDataPtrArray(iter);
1935
stride = NpyIter_GetInnerStrideArray(iter);
1936
count_ptr = NpyIter_GetInnerLoopSizePtr(iter);
1942
/* Execute the loop */
1944
NPY_UF_DBG_PRINT1("iterator loop count %d\n", (int)*count_ptr);
1945
innerloop(dataptr, count_ptr, stride, innerloopdata);
1946
} while (iternext(iter));
1953
NpyIter_Deallocate(iter);
1958
* trivial_loop_ok - 1 if no alignment, data conversion, etc required
1959
* nin - number of inputs
1960
* nout - number of outputs
1961
* op - the operands (nin + nout of them)
1962
* order - the loop execution order/output memory order
1963
* buffersize - how big of a buffer to use
1964
* arr_prep - the __array_prepare__ functions for the outputs
1965
* innerloop - the inner loop function
1966
* innerloopdata - data to pass to the inner loop
1969
execute_ufunc_loop(PyUFuncObject *self,
1970
int trivial_loop_ok,
1972
PyArray_Descr **dtype,
1974
npy_intp buffersize,
1975
PyObject **arr_prep,
1976
PyObject *arr_prep_args,
1977
PyUFuncGenericFunction innerloop,
1978
void *innerloopdata)
1980
npy_intp nin = self->nin, nout = self->nout;
1982
/* First check for the trivial cases that don't need an iterator */
1983
if (trivial_loop_ok) {
1984
if (nin == 1 && nout == 1) {
1985
if (op[1] == NULL &&
1986
(order == NPY_ANYORDER || order == NPY_KEEPORDER) &&
1987
PyArray_TRIVIALLY_ITERABLE(op[0])) {
1988
Py_INCREF(dtype[1]);
1989
op[1] = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type,
1991
PyArray_NDIM(op[0]),
1992
PyArray_DIMS(op[0]),
1994
PyArray_ISFORTRAN(op[0]) ? NPY_F_CONTIGUOUS : 0,
1997
/* Call the __prepare_array__ if necessary */
1998
if (prepare_ufunc_output(self, &op[1],
1999
arr_prep[0], arr_prep_args, 0) < 0) {
2003
NPY_UF_DBG_PRINT("trivial 1 input with allocated output\n");
2004
trivial_two_operand_loop(op, innerloop, innerloopdata);
2008
else if (op[1] != NULL &&
2009
PyArray_NDIM(op[1]) >= PyArray_NDIM(op[0]) &&
2010
PyArray_TRIVIALLY_ITERABLE_PAIR(op[0], op[1])) {
2012
/* Call the __prepare_array__ if necessary */
2013
if (prepare_ufunc_output(self, &op[1],
2014
arr_prep[0], arr_prep_args, 0) < 0) {
2018
NPY_UF_DBG_PRINT("trivial 1 input\n");
2019
trivial_two_operand_loop(op, innerloop, innerloopdata);
2024
else if (nin == 2 && nout == 1) {
2025
if (op[2] == NULL &&
2026
(order == NPY_ANYORDER || order == NPY_KEEPORDER) &&
2027
PyArray_TRIVIALLY_ITERABLE_PAIR(op[0], op[1])) {
2030
* Have to choose the input with more dimensions to clone, as
2031
* one of them could be a scalar.
2033
if (PyArray_NDIM(op[0]) >= PyArray_NDIM(op[1])) {
2039
Py_INCREF(dtype[2]);
2040
op[2] = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type,
2045
PyArray_ISFORTRAN(tmp) ? NPY_F_CONTIGUOUS : 0,
2048
/* Call the __prepare_array__ if necessary */
2049
if (prepare_ufunc_output(self, &op[2],
2050
arr_prep[0], arr_prep_args, 0) < 0) {
2054
NPY_UF_DBG_PRINT("trivial 2 input with allocated output\n");
2055
trivial_three_operand_loop(op, innerloop, innerloopdata);
2059
else if (op[2] != NULL &&
2060
PyArray_NDIM(op[2]) >= PyArray_NDIM(op[0]) &&
2061
PyArray_NDIM(op[2]) >= PyArray_NDIM(op[1]) &&
2062
PyArray_TRIVIALLY_ITERABLE_TRIPLE(op[0], op[1], op[2])) {
2064
/* Call the __prepare_array__ if necessary */
2065
if (prepare_ufunc_output(self, &op[2],
2066
arr_prep[0], arr_prep_args, 0) < 0) {
2070
NPY_UF_DBG_PRINT("trivial 2 input\n");
2071
trivial_three_operand_loop(op, innerloop, innerloopdata);
2079
* If no trivial loop matched, an iterator is required to
2080
* resolve broadcasting, etc
2083
NPY_UF_DBG_PRINT("iterator loop\n");
2084
if (iterator_loop(self, op, dtype, order,
2085
buffersize, arr_prep, arr_prep_args,
2086
innerloop, innerloopdata) < 0) {
2094
make_arr_prep_args(npy_intp nin, PyObject *args, PyObject *kwds)
2096
PyObject *out = kwds ? PyDict_GetItemString(kwds, "out") : NULL;
2097
PyObject *arr_prep_args;
2104
npy_intp i, nargs = PyTuple_GET_SIZE(args), n;
2109
arr_prep_args = PyTuple_New(n);
2110
if (arr_prep_args == NULL) {
2113
/* Copy the tuple, but set the nin-th item to the keyword arg */
2114
for (i = 0; i < nin; ++i) {
2115
PyObject *item = PyTuple_GET_ITEM(args, i);
2117
PyTuple_SET_ITEM(arr_prep_args, i, item);
2120
PyTuple_SET_ITEM(arr_prep_args, nin, out);
2121
for (i = nin+1; i < n; ++i) {
2122
PyObject *item = PyTuple_GET_ITEM(args, i);
2124
PyTuple_SET_ITEM(arr_prep_args, i, item);
2127
return arr_prep_args;
2132
PyUFunc_GeneralizedFunction(PyUFuncObject *self,
2133
PyObject *args, PyObject *kwds,
2139
int retval = -1, any_object = 0, subok = 1;
2140
NPY_CASTING input_casting;
2142
PyArray_Descr *dtype[NPY_MAXARGS];
2144
/* Use remapped axes for generalized ufunc */
2145
int broadcast_ndim, op_ndim;
2146
int op_axes_arrays[NPY_MAXARGS][NPY_MAXDIMS];
2147
int *op_axes[NPY_MAXARGS];
2149
npy_uint32 op_flags[NPY_MAXARGS];
2151
NpyIter *iter = NULL;
2153
/* These parameters come from extobj= or from a TLS global */
2154
int buffersize = 0, errormask = 0;
2155
PyObject *errobj = NULL;
2156
int first_error = 1;
2158
/* The selected inner loop */
2159
PyUFuncGenericFunction innerloop = NULL;
2160
void *innerloopdata = NULL;
2161
/* The dimensions which get passed to the inner loop */
2162
npy_intp inner_dimensions[NPY_MAXDIMS+1];
2163
/* The strides which get passed to the inner loop */
2164
npy_intp *inner_strides = NULL;
2166
npy_intp *inner_strides_tmp, *ax_strides_tmp[NPY_MAXDIMS];
2167
int core_dim_ixs_size, *core_dim_ixs;
2169
/* The __array_prepare__ function to call for each output */
2170
PyObject *arr_prep[NPY_MAXARGS];
2172
* This is either args, or args with the out= parameter from
2173
* kwds added appropriately.
2175
PyObject *arr_prep_args = NULL;
2177
int trivial_loop_ok = 0;
2179
NPY_ORDER order = NPY_KEEPORDER;
2181
* Many things in NumPy do unsafe casting (doing int += float, etc).
2182
* The strictness should probably become a state parameter, similar
2183
* to the seterr/geterr.
2185
NPY_CASTING casting = NPY_UNSAFE_CASTING;
2186
/* When provided, extobj and typetup contain borrowed references */
2187
PyObject *extobj = NULL, *type_tup = NULL;
2190
PyErr_SetString(PyExc_ValueError, "function not supported");
2198
ufunc_name = self->name ? self->name : "<unnamed ufunc>";
2200
NPY_UF_DBG_PRINT1("\nEvaluating ufunc %s\n", ufunc_name);
2202
/* Initialize all the operands and dtypes to NULL */
2203
for (i = 0; i < nop; ++i) {
2209
NPY_UF_DBG_PRINT("Getting arguments\n");
2211
/* Get all the arguments */
2212
retval = get_ufunc_arguments(self, args, kwds,
2213
op, &order, &casting, &extobj, &type_tup, &subok, &any_object);
2218
/* Figure out the number of dimensions needed by the iterator */
2220
for (i = 0; i < nin; ++i) {
2221
int n = PyArray_NDIM(op[i]) - self->core_num_dims[i];
2222
if (n > broadcast_ndim) {
2226
op_ndim = broadcast_ndim + self->core_num_dim_ix;
2227
if (op_ndim > NPY_MAXDIMS) {
2228
PyErr_Format(PyExc_ValueError,
2229
"too many dimensions for generalized ufunc %s",
2235
/* Fill in op_axes for all the operands */
2236
core_dim_ixs_size = 0;
2237
core_dim_ixs = self->core_dim_ixs;
2238
for (i = 0; i < nop; ++i) {
2242
* Note that n may be negative if broadcasting
2243
* extends into the core dimensions.
2245
n = PyArray_NDIM(op[i]) - self->core_num_dims[i];
2250
/* Broadcast all the unspecified dimensions normally */
2251
for (idim = 0; idim < broadcast_ndim; ++idim) {
2252
if (idim >= broadcast_ndim - n) {
2253
op_axes_arrays[i][idim] = idim - (broadcast_ndim - n);
2256
op_axes_arrays[i][idim] = -1;
2259
/* Use the signature information for the rest */
2260
for (idim = broadcast_ndim; idim < op_ndim; ++idim) {
2261
op_axes_arrays[i][idim] = -1;
2263
for (idim = 0; idim < self->core_num_dims[i]; ++idim) {
2264
if (n + idim >= 0) {
2265
op_axes_arrays[i][broadcast_ndim + core_dim_ixs[idim]] =
2269
op_axes_arrays[i][broadcast_ndim + core_dim_ixs[idim]] = -1;
2272
core_dim_ixs_size += self->core_num_dims[i];
2273
core_dim_ixs += self->core_num_dims[i];
2274
op_axes[i] = op_axes_arrays[i];
2277
/* Get the buffersize, errormask, and error object globals */
1919
2278
if (extobj == NULL) {
1920
if (PyUFunc_GetPyValues(name,
1921
&(loop->bufsize), &(loop->errormask),
1922
&(loop->errobj)) < 0) {
2279
if (PyUFunc_GetPyValues(ufunc_name,
2280
&buffersize, &errormask, &errobj) < 0) {
1927
if (_extract_pyvals(extobj, name,
1928
&(loop->bufsize), &(loop->errormask),
1929
&(loop->errobj)) < 0) {
1934
/* Setup the arrays */
1935
if (construct_arrays(loop, args, mps, typetup) < 0) {
2286
if (_extract_pyvals(extobj, ufunc_name,
2287
&buffersize, &errormask, &errobj) < 0) {
2293
NPY_UF_DBG_PRINT("Finding inner loop\n");
2296
* Decide the casting rules for inputs and outputs. We want
2297
* NPY_SAFE_CASTING or stricter, so that the loop selection code
2298
* doesn't choose an integer loop for float inputs, for example.
2300
input_casting = (casting > NPY_SAFE_CASTING) ? NPY_SAFE_CASTING : casting;
2302
if (type_tup == NULL) {
2303
/* Find the best ufunc inner loop, and fill in the dtypes */
2304
retval = find_best_ufunc_inner_loop(self, op, input_casting, casting,
2305
buffersize, any_object, dtype,
2306
&innerloop, &innerloopdata, &trivial_loop_ok);
2308
/* Find the specified ufunc inner loop, and fill in the dtypes */
2309
retval = find_specified_ufunc_inner_loop(self, type_tup,
2311
buffersize, any_object, dtype,
2312
&innerloop, &innerloopdata, &trivial_loop_ok);
2319
* FAIL with NotImplemented if the other object has
2320
* the __r<op>__ method and has __array_priority__ as
2321
* an attribute (signalling it can handle ndarray's)
2322
* and is not already an ndarray or a subtype of the same type.
2324
if (nin == 2 && nout == 1 && dtype[1]->type_num == NPY_OBJECT) {
2325
PyObject *_obj = PyTuple_GET_ITEM(args, 1);
2326
if (!PyArray_CheckExact(_obj)
2327
/* If both are same subtype of object arrays, then proceed */
2328
&& !(Py_TYPE(_obj) == Py_TYPE(PyTuple_GET_ITEM(args, 0)))
2329
&& PyObject_HasAttrString(_obj, "__array_priority__")
2330
&& _has_reflected_op(_obj, ufunc_name)) {
2336
#if NPY_UF_DBG_TRACING
2337
printf("input types:\n");
2338
for (i = 0; i < nin; ++i) {
2339
PyObject_Print((PyObject *)dtype[i], stdout, 0);
2342
printf("\noutput types:\n");
2343
for (i = nin; i < nop; ++i) {
2344
PyObject_Print((PyObject *)dtype[i], stdout, 0);
2352
* Get the appropriate __array_prepare__ function to call
2355
_find_array_prepare(args, kwds, arr_prep, nin, nout);
2357
/* Set up arr_prep_args if a prep function was needed */
2358
for (i = 0; i < nout; ++i) {
2359
if (arr_prep[i] != NULL && arr_prep[i] != Py_None) {
2360
arr_prep_args = make_arr_prep_args(nin, args, kwds);
2366
/* If the loop wants the arrays, provide them */
2367
if (_does_loop_use_arrays(innerloopdata)) {
2368
innerloopdata = (void*)op;
2372
* Set up the iterator per-op flags. For generalized ufuncs, we
2373
* can't do buffering, so must COPY or UPDATEIFCOPY.
2375
for (i = 0; i < nin; ++i) {
2376
op_flags[i] = NPY_ITER_READONLY|
2380
for (i = nin; i < nop; ++i) {
2381
op_flags[i] = NPY_ITER_READWRITE|
2382
NPY_ITER_UPDATEIFCOPY|
2385
NPY_ITER_NO_BROADCAST;
2388
/* Create the iterator */
2389
iter = NpyIter_AdvancedNew(nop, op, NPY_ITER_MULTI_INDEX|
2392
order, NPY_UNSAFE_CASTING, op_flags,
2393
dtype, op_ndim, op_axes, NULL, 0);
2399
/* Fill in any allocated outputs */
2400
for (i = nin; i < nop; ++i) {
2401
if (op[i] == NULL) {
2402
op[i] = NpyIter_GetOperandArray(iter)[i];
2408
* Set up the inner strides array. Because we're not doing
2409
* buffering, the strides are fixed throughout the looping.
2411
inner_strides = (npy_intp *)_pya_malloc(
2412
NPY_SIZEOF_INTP * (nop+core_dim_ixs_size));
2413
/* The strides after the first nop match core_dim_ixs */
2414
core_dim_ixs = self->core_dim_ixs;
2415
inner_strides_tmp = inner_strides + nop;
2416
for (idim = 0; idim < self->core_num_dim_ix; ++idim) {
2417
ax_strides_tmp[idim] = NpyIter_GetAxisStrideArray(iter,
2418
broadcast_ndim+idim);
2419
if (ax_strides_tmp[idim] == NULL) {
2424
for (i = 0; i < nop; ++i) {
2425
for (idim = 0; idim < self->core_num_dims[i]; ++idim) {
2426
inner_strides_tmp[idim] = ax_strides_tmp[core_dim_ixs[idim]][i];
2429
core_dim_ixs += self->core_num_dims[i];
2430
inner_strides_tmp += self->core_num_dims[i];
2433
/* Set up the inner dimensions array */
2434
if (NpyIter_GetShape(iter, inner_dimensions) != NPY_SUCCEED) {
2438
/* Move the core dimensions to start at the second element */
2439
memmove(&inner_dimensions[1], &inner_dimensions[broadcast_ndim],
2440
NPY_SIZEOF_INTP * self->core_num_dim_ix);
2442
/* Remove all the core dimensions from the iterator */
2443
for (i = 0; i < self->core_num_dim_ix; ++i) {
2444
if (NpyIter_RemoveAxis(iter, broadcast_ndim) != NPY_SUCCEED) {
2449
if (NpyIter_RemoveMultiIndex(iter) != NPY_SUCCEED) {
2453
if (NpyIter_EnableExternalLoop(iter) != NPY_SUCCEED) {
2459
* The first nop strides are for the inner loop (but only can
2460
* copy them after removing the core axes
2462
memcpy(inner_strides, NpyIter_GetInnerStrideArray(iter),
2463
NPY_SIZEOF_INTP * nop);
2466
printf("strides: ");
2467
for (i = 0; i < nop+core_dim_ixs_size; ++i) {
2468
printf("%d ", (int)inner_strides[i]);
2473
/* Start with the floating-point exception flags cleared */
1938
2474
PyUFunc_clearfperr();
2476
NPY_UF_DBG_PRINT("Executing inner loop\n");
2478
/* Do the ufunc loop */
2479
if (NpyIter_GetIterSize(iter) != 0) {
2480
NpyIter_IterNextFunc *iternext;
2482
npy_intp *count_ptr;
2484
/* Get the variables needed for the loop */
2485
iternext = NpyIter_GetIterNext(iter, NULL);
2486
if (iternext == NULL) {
2487
NpyIter_Deallocate(iter);
2491
dataptr = NpyIter_GetDataPtrArray(iter);
2492
count_ptr = NpyIter_GetInnerLoopSizePtr(iter);
2495
inner_dimensions[0] = *count_ptr;
2496
innerloop(dataptr, inner_dimensions, inner_strides, innerloopdata);
2497
} while (iternext(iter));
2500
/* Check whether any errors occurred during the loop */
2501
if (PyErr_Occurred() || (errormask &&
2502
PyUFunc_checkfperr(errormask, errobj, &first_error))) {
2507
_pya_free(inner_strides);
2508
NpyIter_Deallocate(iter);
2509
/* The caller takes ownership of all the references in op */
2510
for (i = 0; i < nop; ++i) {
2511
Py_XDECREF(dtype[i]);
2512
Py_XDECREF(arr_prep[i]);
2515
Py_XDECREF(type_tup);
2516
Py_XDECREF(arr_prep_args);
2518
NPY_UF_DBG_PRINT("Returning Success\n");
1942
ufuncloop_dealloc(loop);
2523
NPY_UF_DBG_PRINT1("Returning failure code %d\n", retval);
2524
if (inner_strides) {
2525
_pya_free(inner_strides);
2528
NpyIter_Deallocate(iter);
2530
for (i = 0; i < nop; ++i) {
2533
Py_XDECREF(dtype[i]);
2534
Py_XDECREF(arr_prep[i]);
2537
Py_XDECREF(type_tup);
2538
Py_XDECREF(arr_prep_args);
1949
_printbytebuf(PyUFuncLoopObject *loop, int bufnum)
1953
fprintf(stderr, "Printing byte buffer %d\n", bufnum);
1954
for(i=0; i<loop->bufcnt; i++) {
1955
fprintf(stderr, " %d\n", *(((byte *)(loop->buffer[bufnum]))+i));
1960
_printlongbuf(PyUFuncLoopObject *loop, int bufnum)
1964
fprintf(stderr, "Printing long buffer %d\n", bufnum);
1965
for(i=0; i<loop->bufcnt; i++) {
1966
fprintf(stderr, " %ld\n", *(((long *)(loop->buffer[bufnum]))+i));
1971
_printlongbufptr(PyUFuncLoopObject *loop, int bufnum)
1975
fprintf(stderr, "Printing long buffer %d\n", bufnum);
1976
for(i=0; i<loop->bufcnt; i++) {
1977
fprintf(stderr, " %ld\n", *(((long *)(loop->bufptr[bufnum]))+i));
1984
_printcastbuf(PyUFuncLoopObject *loop, int bufnum)
1988
fprintf(stderr, "Printing long buffer %d\n", bufnum);
1989
for(i=0; i<loop->bufcnt; i++) {
1990
fprintf(stderr, " %ld\n", *(((long *)(loop->castbuf[bufnum]))+i));
2000
* currently generic ufuncs cannot be built for use on flexible arrays.
2002
* The cast functions in the generic loop would need to be fixed to pass
2003
* in something besides NULL, NULL.
2005
* Also the underlying ufunc loops would not know the element-size unless
2006
* that was passed in as data (which could be arranged).
2012
2545
* This generic function is called with the ufunc object, the arguments to it,
2013
* and an array of (pointers to) PyArrayObjects which are NULL. The
2014
* arguments are parsed and placed in mps in construct_loop (construct_arrays)
2546
* and an array of (pointers to) PyArrayObjects which are NULL.
2016
2548
NPY_NO_EXPORT int
2017
PyUFunc_GenericFunction(PyUFuncObject *self, PyObject *args, PyObject *kwds,
2018
PyArrayObject **mps)
2020
PyUFuncLoopObject *loop;
2549
PyUFunc_GenericFunction(PyUFuncObject *self,
2550
PyObject *args, PyObject *kwds,
2556
int retval = -1, any_object = 0, subok = 1;
2557
NPY_CASTING input_casting;
2559
PyArray_Descr *dtype[NPY_MAXARGS];
2561
/* These parameters come from extobj= or from a TLS global */
2562
int buffersize = 0, errormask = 0;
2563
PyObject *errobj = NULL;
2564
int first_error = 1;
2566
/* The selected inner loop */
2567
PyUFuncGenericFunction innerloop = NULL;
2568
void *innerloopdata = NULL;
2570
/* The __array_prepare__ function to call for each output */
2571
PyObject *arr_prep[NPY_MAXARGS];
2573
* This is either args, or args with the out= parameter from
2574
* kwds added appropriately.
2576
PyObject *arr_prep_args = NULL;
2578
int trivial_loop_ok = 0;
2580
/* TODO: For 1.6, the default should probably be NPY_CORDER */
2581
NPY_ORDER order = NPY_KEEPORDER;
2583
* Many things in NumPy do unsafe casting (doing int += float, etc).
2584
* The strictness should probably become a state parameter, similar
2585
* to the seterr/geterr.
2587
NPY_CASTING casting = NPY_UNSAFE_CASTING;
2588
/* When provided, extobj and typetup contain borrowed references */
2589
PyObject *extobj = NULL, *type_tup = NULL;
2592
PyErr_SetString(PyExc_ValueError, "function not supported");
2596
/* TODO: support generalized ufunc */
2597
if (self->core_enabled) {
2598
return PyUFunc_GeneralizedFunction(self, args, kwds, op);
2605
ufunc_name = self->name ? self->name : "<unnamed ufunc>";
2607
NPY_UF_DBG_PRINT1("\nEvaluating ufunc %s\n", ufunc_name);
2609
/* Initialize all the operands and dtypes to NULL */
2610
for (i = 0; i < nop; ++i) {
2616
NPY_UF_DBG_PRINT("Getting arguments\n");
2618
/* Get all the arguments */
2619
retval = get_ufunc_arguments(self, args, kwds,
2620
op, &order, &casting, &extobj, &type_tup, &subok, &any_object);
2625
/* Get the buffersize, errormask, and error object globals */
2626
if (extobj == NULL) {
2627
if (PyUFunc_GetPyValues(ufunc_name,
2628
&buffersize, &errormask, &errobj) < 0) {
2634
if (_extract_pyvals(extobj, ufunc_name,
2635
&buffersize, &errormask, &errobj) < 0) {
2641
NPY_UF_DBG_PRINT("Finding inner loop\n");
2644
* Decide the casting rules for inputs and outputs. We want
2645
* NPY_SAFE_CASTING or stricter, so that the loop selection code
2646
* doesn't choose an integer loop for float inputs, for example.
2648
input_casting = (casting > NPY_SAFE_CASTING) ? NPY_SAFE_CASTING : casting;
2650
if (type_tup == NULL) {
2651
/* Find the best ufunc inner loop, and fill in the dtypes */
2652
retval = find_best_ufunc_inner_loop(self, op, input_casting, casting,
2653
buffersize, any_object, dtype,
2654
&innerloop, &innerloopdata, &trivial_loop_ok);
2656
/* Find the specified ufunc inner loop, and fill in the dtypes */
2657
retval = find_specified_ufunc_inner_loop(self, type_tup,
2659
buffersize, any_object, dtype,
2660
&innerloop, &innerloopdata, &trivial_loop_ok);
2667
* FAIL with NotImplemented if the other object has
2668
* the __r<op>__ method and has __array_priority__ as
2669
* an attribute (signalling it can handle ndarray's)
2670
* and is not already an ndarray or a subtype of the same type.
2672
if (nin == 2 && nout == 1 && dtype[1]->type_num == NPY_OBJECT) {
2673
PyObject *_obj = PyTuple_GET_ITEM(args, 1);
2674
if (!PyArray_CheckExact(_obj)
2675
/* If both are same subtype of object arrays, then proceed */
2676
&& !(Py_TYPE(_obj) == Py_TYPE(PyTuple_GET_ITEM(args, 0)))
2677
&& PyObject_HasAttrString(_obj, "__array_priority__")
2678
&& _has_reflected_op(_obj, ufunc_name)) {
2684
#if NPY_UF_DBG_TRACING
2685
printf("input types:\n");
2686
for (i = 0; i < nin; ++i) {
2687
PyObject_Print((PyObject *)dtype[i], stdout, 0);
2690
printf("\noutput types:\n");
2691
for (i = nin; i < nop; ++i) {
2692
PyObject_Print((PyObject *)dtype[i], stdout, 0);
2700
* Get the appropriate __array_prepare__ function to call
2703
_find_array_prepare(args, kwds, arr_prep, nin, nout);
2705
/* Set up arr_prep_args if a prep function was needed */
2706
for (i = 0; i < nout; ++i) {
2707
if (arr_prep[i] != NULL && arr_prep[i] != Py_None) {
2708
arr_prep_args = make_arr_prep_args(nin, args, kwds);
2714
/* If the loop wants the arrays, provide them */
2715
if (_does_loop_use_arrays(innerloopdata)) {
2716
innerloopdata = (void*)op;
2719
/* Start with the floating-point exception flags cleared */
2720
PyUFunc_clearfperr();
2722
NPY_UF_DBG_PRINT("Executing inner loop\n");
2724
/* Do the ufunc loop */
2725
retval = execute_ufunc_loop(self, trivial_loop_ok, op, dtype, order,
2726
buffersize, arr_prep, arr_prep_args,
2727
innerloop, innerloopdata);
2732
/* Check whether any errors occurred during the loop */
2733
if (PyErr_Occurred() || (errormask &&
2734
PyUFunc_checkfperr(errormask, errobj, &first_error))) {
2739
/* The caller takes ownership of all the references in op */
2740
for (i = 0; i < nop; ++i) {
2741
Py_XDECREF(dtype[i]);
2742
Py_XDECREF(arr_prep[i]);
2745
Py_XDECREF(type_tup);
2746
Py_XDECREF(arr_prep_args);
2748
NPY_UF_DBG_PRINT("Returning Success\n");
2753
NPY_UF_DBG_PRINT1("Returning failure code %d\n", retval);
2754
for (i = 0; i < nop; ++i) {
2757
Py_XDECREF(dtype[i]);
2758
Py_XDECREF(arr_prep[i]);
2761
Py_XDECREF(type_tup);
2762
Py_XDECREF(arr_prep_args);
2768
* Given the output type, finds the specified binary op. The
2769
* ufunc must have nin==2 and nout==1. The function may modify
2770
* otype if the given type isn't found.
2772
* Returns 0 on success, -1 on failure.
2775
get_binary_op_function(PyUFuncObject *self, int *otype,
2776
PyUFuncGenericFunction *out_innerloop,
2777
void **out_innerloopdata)
2022
NPY_BEGIN_THREADS_DEF;
2024
if (!(loop = construct_loop(self, args, kwds, mps))) {
2027
if (loop->notimplemented) {
2028
ufuncloop_dealloc(loop);
2031
if (self->core_enabled && loop->meth != SIGNATURE_NOBUFFER_UFUNCLOOP) {
2032
PyErr_SetString(PyExc_RuntimeError,
2033
"illegal loop method for ufunc with signature");
2037
NPY_LOOP_BEGIN_THREADS;
2038
switch(loop->meth) {
2041
* Everything is contiguous, notswapped, aligned,
2042
* and of the right type. -- Fastest.
2043
* Or if not contiguous, then a single-stride
2044
* increment moves through the entire array.
2046
/*fprintf(stderr, "ONE...%d\n", loop->size);*/
2047
loop->function((char **)loop->bufptr, &(loop->size),
2048
loop->steps, loop->funcdata);
2049
UFUNC_CHECK_ERROR(loop);
2051
case NOBUFFER_UFUNCLOOP:
2053
* Everything is notswapped, aligned and of the
2054
* right type but not contiguous. -- Almost as fast.
2056
/*fprintf(stderr, "NOBUFFER...%d\n", loop->size);*/
2057
while (loop->index < loop->size) {
2058
for (i = 0; i < self->nargs; i++) {
2059
loop->bufptr[i] = loop->iters[i]->dataptr;
2061
loop->function((char **)loop->bufptr, &(loop->bufcnt),
2062
loop->steps, loop->funcdata);
2063
UFUNC_CHECK_ERROR(loop);
2065
/* Adjust loop pointers */
2066
for (i = 0; i < self->nargs; i++) {
2067
PyArray_ITER_NEXT(loop->iters[i]);
2072
case SIGNATURE_NOBUFFER_UFUNCLOOP:
2073
while (loop->index < loop->size) {
2074
for (i = 0; i < self->nargs; i++) {
2075
loop->bufptr[i] = loop->iters[i]->dataptr;
2077
loop->function((char **)loop->bufptr, loop->core_dim_sizes,
2078
loop->core_strides, loop->funcdata);
2079
UFUNC_CHECK_ERROR(loop);
2081
/* Adjust loop pointers */
2082
for (i = 0; i < self->nargs; i++) {
2083
PyArray_ITER_NEXT(loop->iters[i]);
2088
case BUFFER_UFUNCLOOP: {
2089
/* This should be a function */
2090
PyArray_CopySwapNFunc *copyswapn[NPY_MAXARGS];
2091
PyArrayIterObject **iters=loop->iters;
2092
int *swap=loop->swap;
2093
char **dptr=loop->dptr;
2094
int mpselsize[NPY_MAXARGS];
2095
intp laststrides[NPY_MAXARGS];
2096
int fastmemcpy[NPY_MAXARGS];
2097
int *needbuffer = loop->needbuffer;
2098
intp index=loop->index, size=loop->size;
2101
int copysizes[NPY_MAXARGS];
2102
char **bufptr = loop->bufptr;
2103
char **buffer = loop->buffer;
2104
char **castbuf = loop->castbuf;
2105
intp *steps = loop->steps;
2106
char *tptr[NPY_MAXARGS];
2107
int ninnerloops = loop->ninnerloops;
2108
Bool pyobject[NPY_MAXARGS];
2109
int datasize[NPY_MAXARGS];
2110
int j, k, stopcondition;
2111
char *myptr1, *myptr2;
2113
for (i = 0; i <self->nargs; i++) {
2114
copyswapn[i] = mps[i]->descr->f->copyswapn;
2115
mpselsize[i] = mps[i]->descr->elsize;
2116
pyobject[i] = ((loop->obj & UFUNC_OBJ_ISOBJECT)
2117
&& (mps[i]->descr->type_num == PyArray_OBJECT));
2118
laststrides[i] = iters[i]->strides[loop->lastdim];
2119
if (steps[i] && laststrides[i] != mpselsize[i]) {
2780
PyUFunc_Loop1d *funcdata;
2782
NPY_UF_DBG_PRINT1("Getting binary op function for type number %d\n",
2785
/* If the type is custom and there are userloops, search for it here */
2786
if (self->userloops != NULL && PyTypeNum_ISUSERDEF(*otype)) {
2787
PyObject *key, *obj;
2788
key = PyInt_FromLong(*otype);
2792
obj = PyDict_GetItem(self->userloops, key);
2795
funcdata = (PyUFunc_Loop1d *)NpyCapsule_AsVoidPtr(obj);
2796
while (funcdata != NULL) {
2797
int *types = funcdata->arg_types;
2799
if (types[0] == *otype && types[1] == *otype &&
2800
types[2] == *otype) {
2801
*out_innerloop = funcdata->func;
2802
*out_innerloopdata = funcdata->data;
2806
funcdata = funcdata->next;
2811
/* Search for a function with compatible inputs */
2812
for (i = 0; i < self->ntypes; ++i) {
2813
char *types = self->types + i*self->nargs;
2815
NPY_UF_DBG_PRINT3("Trying loop with signature %d %d -> %d\n",
2816
types[0], types[1], types[2]);
2818
if (PyArray_CanCastSafely(*otype, types[0]) &&
2819
types[0] == types[1] &&
2820
(*otype == NPY_OBJECT || types[0] != NPY_OBJECT)) {
2821
/* If the signature is "xx->x", we found the loop */
2822
if (types[2] == types[0]) {
2823
*out_innerloop = self->functions[i];
2824
*out_innerloopdata = self->data[i];
2829
* Otherwise, we found the natural type of the reduction,
2830
* replace otype and search again
2126
/* Do generic buffered looping here (works for any kind of
2127
* arrays -- some need buffers, some don't.
2130
* New algorithm: N is the largest dimension. B is the buffer-size.
2131
* quotient is loop->ninnerloops-1
2132
* remainder is loop->leftover
2134
* Compute N = quotient * B + remainder.
2135
* quotient = N / B # integer math
2136
* (store quotient + 1) as the number of innerloops
2137
* remainder = N % B # integer remainder
2139
* On the inner-dimension we will have (quotient + 1) loops where
2140
* the size of the inner function is B for all but the last when the niter size is
2143
* So, the code looks very similar to NOBUFFER_LOOP except the inner-most loop is
2146
* for(i=0; i<quotient+1; i++) {
2147
* if (i==quotient+1) make itersize remainder size
2148
* copy only needed items to buffer.
2149
* swap input buffers if needed
2150
* cast input buffers if needed
2151
* call loop_function()
2152
* cast outputs in buffers if needed
2153
* swap outputs in buffers if needed
2154
* copy only needed items back to output arrays.
2155
* update all data-pointers by strides*niter
2161
* fprintf(stderr, "BUFFER...%d,%d,%d\n", loop->size,
2162
* loop->ninnerloops, loop->leftover);
2165
* for(i=0; i<self->nargs; i++) {
2166
* fprintf(stderr, "iters[%d]->dataptr = %p, %p of size %d\n", i,
2167
* iters[i], iters[i]->ao->data, PyArray_NBYTES(iters[i]->ao));
2170
stopcondition = ninnerloops;
2171
if (loop->leftover == 0) {
2174
while (index < size) {
2175
bufsize=loop->bufsize;
2176
for(i = 0; i<self->nargs; i++) {
2177
tptr[i] = loop->iters[i]->dataptr;
2178
if (needbuffer[i]) {
2179
dptr[i] = bufptr[i];
2180
datasize[i] = (steps[i] ? bufsize : 1);
2181
copysizes[i] = datasize[i] * mpselsize[i];
2188
/* This is the inner function over the last dimension */
2189
for (k = 1; k<=stopcondition; k++) {
2190
if (k == ninnerloops) {
2191
bufsize = loop->leftover;
2192
for (i=0; i<self->nargs;i++) {
2193
if (!needbuffer[i]) {
2196
datasize[i] = (steps[i] ? bufsize : 1);
2197
copysizes[i] = datasize[i] * mpselsize[i];
2200
for (i = 0; i < self->nin; i++) {
2201
if (!needbuffer[i]) {
2204
if (fastmemcpy[i]) {
2205
memcpy(buffer[i], tptr[i], copysizes[i]);
2210
for (j = 0; j < bufsize; j++) {
2211
memcpy(myptr1, myptr2, mpselsize[i]);
2212
myptr1 += mpselsize[i];
2213
myptr2 += laststrides[i];
2217
/* swap the buffer if necessary */
2219
/* fprintf(stderr, "swapping...\n");*/
2220
copyswapn[i](buffer[i], mpselsize[i], NULL, -1,
2221
(intp) datasize[i], 1,
2224
/* cast to the other buffer if necessary */
2225
if (loop->cast[i]) {
2226
/* fprintf(stderr, "casting... %d, %p %p\n", i, buffer[i]); */
2227
loop->cast[i](buffer[i], castbuf[i],
2233
bufcnt = (intp) bufsize;
2234
loop->function((char **)dptr, &bufcnt, steps, loop->funcdata);
2235
UFUNC_CHECK_ERROR(loop);
2237
for (i = self->nin; i < self->nargs; i++) {
2238
if (!needbuffer[i]) {
2241
if (loop->cast[i]) {
2242
/* fprintf(stderr, "casting back... %d, %p", i, castbuf[i]); */
2243
loop->cast[i](castbuf[i],
2249
copyswapn[i](buffer[i], mpselsize[i], NULL, -1,
2250
(intp) datasize[i], 1,
2254
* copy back to output arrays
2255
* decref what's already there for object arrays
2259
for (j = 0; j < datasize[i]; j++) {
2260
Py_XDECREF(*((PyObject **)myptr1));
2261
myptr1 += laststrides[i];
2264
if (fastmemcpy[i]) {
2265
memcpy(tptr[i], buffer[i], copysizes[i]);
2270
for (j = 0; j < bufsize; j++) {
2271
memcpy(myptr1, myptr2, mpselsize[i]);
2272
myptr1 += laststrides[i];
2273
myptr2 += mpselsize[i];
2277
if (k == stopcondition) {
2280
for (i = 0; i < self->nargs; i++) {
2281
tptr[i] += bufsize * laststrides[i];
2282
if (!needbuffer[i]) {
2287
/* end inner function over last dimension */
2289
if (loop->objfunc) {
2291
* DECREF castbuf when underlying function used
2292
* object arrays and casting was needed to get
2295
for (i = 0; i < self->nargs; i++) {
2296
if (loop->cast[i]) {
2297
if (steps[i] == 0) {
2298
Py_XDECREF(*((PyObject **)castbuf[i]));
2301
int size = loop->bufsize;
2303
PyObject **objptr = (PyObject **)castbuf[i];
2305
* size is loop->bufsize unless there
2308
if (ninnerloops == 1) {
2309
size = loop->leftover;
2311
for (j = 0; j < size; j++) {
2312
Py_XDECREF(*objptr);
2320
/* fixme -- probably not needed here*/
2321
UFUNC_CHECK_ERROR(loop);
2323
for (i = 0; i < self->nargs; i++) {
2324
PyArray_ITER_NEXT(loop->iters[i]);
2328
} /* end of last case statement */
2331
NPY_LOOP_END_THREADS;
2332
ufuncloop_dealloc(loop);
2336
NPY_LOOP_END_THREADS;
2338
ufuncloop_dealloc(loop);
2839
/* Search for the exact function */
2840
for (i = 0; i < self->ntypes; ++i) {
2841
char *types = self->types + i*self->nargs;
2843
if (PyArray_CanCastSafely(*otype, types[0]) &&
2844
types[0] == types[1] &&
2845
types[1] == types[2] &&
2846
(*otype == NPY_OBJECT || types[0] != NPY_OBJECT)) {
2847
/* Since the signature is "xx->x", we found the loop */
2848
*out_innerloop = self->functions[i];
2849
*out_innerloopdata = self->data[i];
2343
static PyArrayObject *
2344
_getidentity(PyUFuncObject *self, int otype, char *str)
2859
* The implementation of the reduction operators with the new iterator
2860
* turned into a bit of a long function here, but I think the design
2861
* of this part needs to be changed to be more like einsum, so it may
2862
* not be worth refactoring it too much. Consider this timing:
2864
* >>> a = arange(10000)
2867
* 10000 loops, best of 3: 17 us per loop
2869
* >>> timeit einsum("i->",a)
2870
* 100000 loops, best of 3: 13.5 us per loop
2874
PyUFunc_ReductionOp(PyUFuncObject *self, PyArrayObject *arr,
2876
int axis, int otype, int operation, char *opname)
2346
PyObject *obj, *arr;
2347
PyArray_Descr *typecode;
2349
if (self->identity == PyUFunc_None) {
2878
PyArrayObject *op[2];
2879
PyArray_Descr *op_dtypes[2] = {NULL, NULL};
2880
int op_axes_arrays[2][NPY_MAXDIMS];
2881
int *op_axes[2] = {op_axes_arrays[0], op_axes_arrays[1]};
2882
npy_uint32 op_flags[2];
2883
int i, idim, ndim, otype_final;
2884
int needs_api, need_outer_iterator;
2886
NpyIter *iter = NULL, *iter_inner = NULL;
2888
/* The selected inner loop */
2889
PyUFuncGenericFunction innerloop = NULL;
2890
void *innerloopdata = NULL;
2892
char *ufunc_name = self->name ? self->name : "(unknown)";
2894
/* These parameters come from extobj= or from a TLS global */
2895
int buffersize = 0, errormask = 0;
2896
PyObject *errobj = NULL;
2898
NPY_BEGIN_THREADS_DEF;
2900
NPY_UF_DBG_PRINT2("\nEvaluating ufunc %s.%s\n", ufunc_name, opname);
2903
printf("Doing %s.%s on array with dtype : ", ufunc_name, opname);
2904
PyObject_Print((PyObject *)PyArray_DESCR(arr), stdout, 0);
2908
if (PyUFunc_GetPyValues(opname, &buffersize, &errormask, &errobj) < 0) {
2912
/* Take a reference to out for later returning */
2915
otype_final = otype;
2916
if (get_binary_op_function(self, &otype_final,
2917
&innerloop, &innerloopdata) < 0) {
2918
PyArray_Descr *dtype = PyArray_DescrFromType(otype);
2350
2919
PyErr_Format(PyExc_ValueError,
2351
"zero-size array to ufunc.%s " \
2352
"without identity", str);
2355
if (self->identity == PyUFunc_One) {
2356
obj = PyInt_FromLong((long) 1);
2358
obj = PyInt_FromLong((long) 0);
2361
typecode = PyArray_DescrFromType(otype);
2362
arr = PyArray_FromAny(obj, typecode, 0, 0, CARRAY, NULL);
2364
return (PyArrayObject *)arr;
2368
_create_reduce_copy(PyUFuncReduceObject *loop, PyArrayObject **arr, int rtype)
2372
PyArray_Descr *ntype;
2374
maxsize = PyArray_SIZE(*arr);
2376
if (maxsize < loop->bufsize) {
2377
if (!(PyArray_ISBEHAVED_RO(*arr))
2378
|| PyArray_TYPE(*arr) != rtype) {
2379
ntype = PyArray_DescrFromType(rtype);
2380
new = PyArray_FromAny((PyObject *)(*arr),
2382
FORCECAST | ALIGNED, NULL);
2386
*arr = (PyArrayObject *)new;
2392
* Don't decref *arr before re-assigning
2393
* because it was not going to be DECREF'd anyway.
2395
* If a copy is made, then the copy will be removed
2396
* on deallocation of the loop structure by setting
2402
static PyUFuncReduceObject *
2403
construct_reduce(PyUFuncObject *self, PyArrayObject **arr, PyArrayObject *out,
2404
int axis, int otype, int operation, intp ind_size, char *str)
2406
PyUFuncReduceObject *loop;
2407
PyArrayObject *idarr;
2409
intp loop_i[MAX_DIMS], outsize = 0;
2411
PyArray_SCALARKIND scalars[3] = {PyArray_NOSCALAR, PyArray_NOSCALAR,
2416
/* Reduce type is the type requested of the input during reduction */
2417
if (self->core_enabled) {
2418
PyErr_Format(PyExc_RuntimeError,
2419
"construct_reduce not allowed on ufunc with signature");
2423
arg_types[0] = otype;
2424
arg_types[1] = otype;
2425
arg_types[2] = otype;
2426
if ((loop = _pya_malloc(sizeof(PyUFuncReduceObject))) == NULL) {
2437
loop->buffer = NULL;
2441
loop->errobj = NULL;
2443
loop->decref = NULL;
2444
loop->N = (*arr)->dimensions[axis];
2445
loop->instrides = (*arr)->strides[axis];
2446
if (select_types(loop->ufunc, arg_types, &(loop->function),
2447
&(loop->funcdata), scalars, NULL) == -1) {
2451
* output type may change -- if it does
2452
* reduction is forced into that type
2453
* and we need to select the reduction function again
2455
if (otype != arg_types[2]) {
2456
otype = arg_types[2];
2457
arg_types[0] = otype;
2458
arg_types[1] = otype;
2459
if (select_types(loop->ufunc, arg_types, &(loop->function),
2460
&(loop->funcdata), scalars, NULL) == -1) {
2465
/* get looping parameters from Python */
2466
if (PyUFunc_GetPyValues(str, &(loop->bufsize), &(loop->errormask),
2467
&(loop->errobj)) < 0) {
2470
/* Make copy if misbehaved or not otype for small arrays */
2471
if (_create_reduce_copy(loop, arr, otype) < 0) {
2477
loop->meth = ZERO_EL_REDUCELOOP;
2479
else if (PyArray_ISBEHAVED_RO(aar) && (otype == (aar)->descr->type_num)) {
2481
loop->meth = ONE_EL_REDUCELOOP;
2484
loop->meth = NOBUFFER_UFUNCLOOP;
2485
loop->steps[1] = (aar)->strides[axis];
2490
loop->meth = BUFFER_UFUNCLOOP;
2491
loop->swap = !(PyArray_ISNOTSWAPPED(aar));
2494
/* Determine if object arrays are involved */
2495
if (otype == PyArray_OBJECT || aar->descr->type_num == PyArray_OBJECT) {
2496
loop->obj = UFUNC_OBJ_ISOBJECT | UFUNC_OBJ_NEEDS_API;
2501
if ((loop->meth == ZERO_EL_REDUCELOOP)
2502
|| ((operation == UFUNC_REDUCEAT)
2503
&& (loop->meth == BUFFER_UFUNCLOOP))) {
2504
idarr = _getidentity(self, otype, str);
2505
if (idarr == NULL) {
2508
if (idarr->descr->elsize > UFUNC_MAXIDENTITY) {
2509
PyErr_Format(PyExc_RuntimeError,
2510
"UFUNC_MAXIDENTITY (%d) is too small"\
2511
"(needs to be at least %d)",
2512
UFUNC_MAXIDENTITY, idarr->descr->elsize);
2516
memcpy(loop->idptr, idarr->data, idarr->descr->elsize);
2520
/* Construct return array */
2521
flags = NPY_CARRAY | NPY_UPDATEIFCOPY | NPY_FORCECAST;
2524
for (j = 0, i = 0; i < nd; i++) {
2526
loop_i[j++] = (aar)->dimensions[i];
2530
loop->ret = (PyArrayObject *)
2531
PyArray_New(Py_TYPE(aar), aar->nd-1, loop_i,
2532
otype, NULL, NULL, 0, 0,
2536
outsize = PyArray_MultiplyList(loop_i, aar->nd - 1);
2539
case UFUNC_ACCUMULATE:
2541
loop->ret = (PyArrayObject *)
2542
PyArray_New(Py_TYPE(aar), aar->nd, aar->dimensions,
2543
otype, NULL, NULL, 0, 0, (PyObject *)aar);
2546
outsize = PyArray_MultiplyList(aar->dimensions, aar->nd);
2549
case UFUNC_REDUCEAT:
2550
memcpy(loop_i, aar->dimensions, nd*sizeof(intp));
2551
/* Index is 1-d array */
2552
loop_i[axis] = ind_size;
2554
loop->ret = (PyArrayObject *)
2555
PyArray_New(Py_TYPE(aar), aar->nd, loop_i, otype,
2556
NULL, NULL, 0, 0, (PyObject *)aar);
2559
outsize = PyArray_MultiplyList(loop_i, aar->nd);
2561
if (ind_size == 0) {
2562
loop->meth = ZERO_EL_REDUCELOOP;
2565
if (loop->meth == ONE_EL_REDUCELOOP) {
2566
loop->meth = NOBUFFER_REDUCELOOP;
2571
if (PyArray_SIZE(out) != outsize) {
2572
PyErr_SetString(PyExc_ValueError,
2573
"wrong shape for output");
2576
loop->ret = (PyArrayObject *)
2577
PyArray_FromArray(out, PyArray_DescrFromType(otype), flags);
2578
if (loop->ret && loop->ret != out) {
2582
if (loop->ret == NULL) {
2585
loop->insize = aar->descr->elsize;
2586
loop->outsize = loop->ret->descr->elsize;
2587
loop->bufptr[0] = loop->ret->data;
2589
if (loop->meth == ZERO_EL_REDUCELOOP) {
2590
loop->size = PyArray_SIZE(loop->ret);
2594
loop->it = (PyArrayIterObject *)PyArray_IterNew((PyObject *)aar);
2595
if (loop->it == NULL) {
2598
if (loop->meth == ONE_EL_REDUCELOOP) {
2599
loop->size = loop->it->size;
2604
* Fix iterator to loop over correct dimension
2605
* Set size in axis dimension to 1
2607
loop->it->contiguous = 0;
2608
loop->it->size /= (loop->it->dims_m1[axis]+1);
2609
loop->it->dims_m1[axis] = 0;
2610
loop->it->backstrides[axis] = 0;
2611
loop->size = loop->it->size;
2920
"could not find a matching type for %s.%s, "
2921
"requested type has type code '%c'",
2922
ufunc_name, opname, dtype ? dtype->type : '-');
2927
ndim = PyArray_NDIM(arr);
2929
/* Set up the output data type */
2930
op_dtypes[0] = PyArray_DescrFromType(otype_final);
2931
if (op_dtypes[0] == NULL) {
2935
#if NPY_UF_DBG_TRACING
2936
printf("Found %s.%s inner loop with dtype : ", ufunc_name, opname);
2937
PyObject_Print((PyObject *)op_dtypes[0], stdout, 0);
2941
/* Set up the op_axes for the outer loop */
2612
2942
if (operation == UFUNC_REDUCE) {
2943
for (i = 0, idim = 0; idim < ndim; ++idim) {
2945
op_axes_arrays[0][i] = i;
2946
op_axes_arrays[1][i] = idim;
2951
else if (operation == UFUNC_ACCUMULATE) {
2952
for (idim = 0; idim < ndim; ++idim) {
2953
op_axes_arrays[0][idim] = idim;
2954
op_axes_arrays[1][idim] = idim;
2616
loop->rit = (PyArrayIterObject *) \
2617
PyArray_IterNew((PyObject *)(loop->ret));
2618
if (loop->rit == NULL) {
2622
* Fix iterator to loop over correct dimension
2623
* Set size in axis dimension to 1
2625
loop->rit->contiguous = 0;
2626
loop->rit->size /= (loop->rit->dims_m1[axis] + 1);
2627
loop->rit->dims_m1[axis] = 0;
2628
loop->rit->backstrides[axis] = 0;
2958
PyErr_Format(PyExc_RuntimeError,
2959
"invalid reduction operation %s.%s", ufunc_name, opname);
2963
/* The per-operand flags for the outer loop */
2964
op_flags[0] = NPY_ITER_READWRITE|
2965
NPY_ITER_NO_BROADCAST|
2967
NPY_ITER_NO_SUBTYPE;
2968
op_flags[1] = NPY_ITER_READONLY;
2973
need_outer_iterator = (ndim > 1);
2974
if (operation == UFUNC_ACCUMULATE) {
2975
/* This is because we can't buffer, so must do UPDATEIFCOPY */
2976
if (!PyArray_ISALIGNED(arr) || (out && !PyArray_ISALIGNED(out)) ||
2977
!PyArray_EquivTypes(op_dtypes[0], PyArray_DESCR(arr)) ||
2979
!PyArray_EquivTypes(op_dtypes[0], PyArray_DESCR(out)))) {
2980
need_outer_iterator = 1;
2984
if (need_outer_iterator) {
2986
npy_uint32 flags = NPY_ITER_ZEROSIZE_OK|
2988
PyArray_Descr **op_dtypes_param = NULL;
2990
if (operation == UFUNC_REDUCE) {
2991
ndim_iter = ndim - 1;
2993
op_dtypes_param = op_dtypes;
2996
else if (operation == UFUNC_ACCUMULATE) {
2998
* The way accumulate is set up, we can't do buffering,
2999
* so make a copy instead when necessary.
3002
flags |= NPY_ITER_MULTI_INDEX;
3003
/* Add some more flags */
3004
op_flags[0] |= NPY_ITER_UPDATEIFCOPY|NPY_ITER_ALIGNED;
3005
op_flags[1] |= NPY_ITER_COPY|NPY_ITER_ALIGNED;
3006
op_dtypes_param = op_dtypes;
3007
op_dtypes[1] = op_dtypes[0];
3009
NPY_UF_DBG_PRINT("Allocating outer iterator\n");
3010
iter = NpyIter_AdvancedNew(2, op, flags,
3011
NPY_KEEPORDER, NPY_UNSAFE_CASTING,
3014
ndim_iter, op_axes, NULL, 0);
2630
3019
if (operation == UFUNC_ACCUMULATE) {
2631
loop->steps[0] = loop->ret->strides[axis];
2637
loop->steps[2] = loop->steps[0];
2638
loop->bufptr[2] = loop->bufptr[0] + loop->steps[2];
2639
if (loop->meth == BUFFER_UFUNCLOOP) {
2642
loop->steps[1] = loop->outsize;
2643
if (otype != aar->descr->type_num) {
2644
_size=loop->bufsize*(loop->outsize + aar->descr->elsize);
2645
loop->buffer = PyDataMem_NEW(_size);
2646
if (loop->buffer == NULL) {
2649
if (loop->obj & UFUNC_OBJ_ISOBJECT) {
2650
memset(loop->buffer, 0, _size);
2652
loop->castbuf = loop->buffer + loop->bufsize*aar->descr->elsize;
2653
loop->bufptr[1] = loop->castbuf;
2654
loop->cast = PyArray_GetCastFunc(aar->descr, otype);
2655
if (loop->cast == NULL) {
2660
_size = loop->bufsize * loop->outsize;
2661
loop->buffer = PyDataMem_NEW(_size);
2662
if (loop->buffer == NULL) {
2665
if (loop->obj & UFUNC_OBJ_ISOBJECT) {
2666
memset(loop->buffer, 0, _size);
2668
loop->bufptr[1] = loop->buffer;
2671
PyUFunc_clearfperr();
2675
ufuncreduce_dealloc(loop);
3020
/* In case COPY or UPDATEIFCOPY occurred */
3021
op[0] = NpyIter_GetOperandArray(iter)[0];
3022
op[1] = NpyIter_GetOperandArray(iter)[1];
3024
if (PyArray_SIZE(op[0]) == 0) {
3032
if (NpyIter_RemoveAxis(iter, axis) != NPY_SUCCEED) {
3035
if (NpyIter_RemoveMultiIndex(iter) != NPY_SUCCEED) {
3041
/* Get the output */
3044
op[0] = out = NpyIter_GetOperandArray(iter)[0];
3048
PyArray_Descr *dtype = op_dtypes[0];
3050
if (operation == UFUNC_REDUCE) {
3051
op[0] = out = (PyArrayObject *)PyArray_NewFromDescr(
3052
&PyArray_Type, dtype,
3053
0, NULL, NULL, NULL,
3056
else if (operation == UFUNC_ACCUMULATE) {
3057
op[0] = out = (PyArrayObject *)PyArray_NewFromDescr(
3058
&PyArray_Type, dtype,
3059
ndim, PyArray_DIMS(op[1]), NULL, NULL,
3069
* If the reduction unit has size zero, either return the reduction
3070
* unit for UFUNC_REDUCE, or return the zero-sized output array
3071
* for UFUNC_ACCUMULATE.
3073
if (PyArray_DIM(op[1], axis) == 0) {
3074
if (operation == UFUNC_REDUCE) {
3075
if (self->identity == PyUFunc_None) {
3076
PyErr_Format(PyExc_ValueError,
3077
"zero-size array to %s.%s "
3078
"without identity", ufunc_name, opname);
3081
if (self->identity == PyUFunc_One) {
3082
PyObject *obj = PyInt_FromLong((long) 1);
3086
PyArray_FillWithScalar(op[0], obj);
3089
PyObject *obj = PyInt_FromLong((long) 0);
3093
PyArray_FillWithScalar(op[0], obj);
3100
else if (PyArray_SIZE(op[0]) == 0) {
3104
/* Only allocate an inner iterator if it's necessary */
3105
if (!PyArray_ISALIGNED(op[1]) || !PyArray_ISALIGNED(op[0]) ||
3106
!PyArray_EquivTypes(op_dtypes[0], PyArray_DESCR(op[1])) ||
3107
!PyArray_EquivTypes(op_dtypes[0], PyArray_DESCR(op[0]))) {
3108
/* Also set the dtype for buffering arr */
3109
op_dtypes[1] = op_dtypes[0];
3111
NPY_UF_DBG_PRINT("Allocating inner iterator\n");
3112
if (operation == UFUNC_REDUCE) {
3113
/* The per-operand flags for the inner loop */
3114
op_flags[0] = NPY_ITER_READWRITE|
3116
op_flags[1] = NPY_ITER_READONLY|
3120
op_axes[1][0] = axis;
3122
iter_inner = NpyIter_AdvancedNew(2, op, NPY_ITER_EXTERNAL_LOOP|
3124
NPY_ITER_DELAY_BUFALLOC|
3128
NPY_CORDER, NPY_UNSAFE_CASTING,
3129
op_flags, op_dtypes,
3130
1, op_axes, NULL, buffersize);
3132
/* Should never get an inner iterator for ACCUMULATE */
3134
PyErr_SetString(PyExc_RuntimeError,
3135
"internal ufunc reduce error, should not need inner iterator");
3138
if (iter_inner == NULL) {
3143
if (iter && NpyIter_GetIterSize(iter) != 0) {
3144
char *dataptr_copy[3];
3145
npy_intp stride_copy[3];
3147
NpyIter_IterNextFunc *iternext;
3150
int itemsize = op_dtypes[0]->elsize;
3152
/* Get the variables needed for the loop */
3153
iternext = NpyIter_GetIterNext(iter, NULL);
3154
if (iternext == NULL) {
3157
dataptr = NpyIter_GetDataPtrArray(iter);
3160
/* Execute the loop with two nested iterators */
3162
/* Only UFUNC_REDUCE uses iter_inner */
3163
NpyIter_IterNextFunc *iternext_inner;
3164
char **dataptr_inner;
3165
npy_intp *stride_inner;
3166
npy_intp count, *count_ptr_inner;
3168
NPY_UF_DBG_PRINT("UFunc: Reduce loop with two nested iterators\n");
3169
iternext_inner = NpyIter_GetIterNext(iter_inner, NULL);
3170
if (iternext_inner == NULL) {
3173
dataptr_inner = NpyIter_GetDataPtrArray(iter_inner);
3174
stride_inner = NpyIter_GetInnerStrideArray(iter_inner);
3175
count_ptr_inner = NpyIter_GetInnerLoopSizePtr(iter_inner);
3177
needs_api = NpyIter_IterationNeedsAPI(iter) ||
3178
NpyIter_IterationNeedsAPI(iter_inner);
3187
/* Reset the inner iterator to the outer's data */
3188
if (NpyIter_ResetBasePointers(iter_inner, dataptr, NULL)
3193
/* Copy the first element to start the reduction */
3194
if (otype == NPY_OBJECT) {
3195
Py_XDECREF(*(PyObject **)dataptr_inner[0]);
3196
*(PyObject **)dataptr_inner[0] =
3197
*(PyObject **)dataptr_inner[1];
3198
Py_XINCREF(*(PyObject **)dataptr_inner[0]);
3201
memcpy(dataptr_inner[0], dataptr_inner[1], itemsize);
3207
count = *count_ptr_inner;
3208
/* Turn the two items into three for the inner loop */
3209
dataptr_copy[0] = dataptr_inner[0];
3210
dataptr_copy[1] = dataptr_inner[1];
3211
dataptr_copy[2] = dataptr_inner[0];
3214
dataptr_copy[1] += stride_inner[1];
3217
stride_copy[1] = stride_inner[1];
3218
NPY_UF_DBG_PRINT1("iterator loop count %d\n", (int)count);
3219
innerloop(dataptr_copy, &count,
3220
stride_copy, innerloopdata);
3221
} while(iternext_inner(iter_inner));
3222
} while (iternext(iter));
3228
/* Execute the loop with just the outer iterator */
3230
npy_intp count_m1 = PyArray_DIM(op[1], axis)-1;
3231
npy_intp stride0 = 0, stride1 = PyArray_STRIDE(op[1], axis);
3233
NPY_UF_DBG_PRINT("UFunc: Reduce loop with just outer iterator\n");
3235
if (operation == UFUNC_ACCUMULATE) {
3236
stride0 = PyArray_STRIDE(op[0], axis);
3239
stride_copy[0] = stride0;
3240
stride_copy[1] = stride1;
3241
stride_copy[2] = stride0;
3243
needs_api = NpyIter_IterationNeedsAPI(iter);
3251
dataptr_copy[0] = dataptr[0];
3252
dataptr_copy[1] = dataptr[1];
3253
dataptr_copy[2] = dataptr[0];
3255
/* Copy the first element to start the reduction */
3256
if (otype == NPY_OBJECT) {
3257
Py_XDECREF(*(PyObject **)dataptr_copy[0]);
3258
*(PyObject **)dataptr_copy[0] =
3259
*(PyObject **)dataptr_copy[1];
3260
Py_XINCREF(*(PyObject **)dataptr_copy[0]);
3263
memcpy(dataptr_copy[0], dataptr_copy[1], itemsize);
3267
/* Turn the two items into three for the inner loop */
3268
if (operation == UFUNC_REDUCE) {
3269
dataptr_copy[1] += stride1;
3271
else if (operation == UFUNC_ACCUMULATE) {
3272
dataptr_copy[1] += stride1;
3273
dataptr_copy[2] += stride0;
3275
NPY_UF_DBG_PRINT1("iterator loop count %d\n",
3277
innerloop(dataptr_copy, &count_m1,
3278
stride_copy, innerloopdata);
3280
} while (iternext(iter));
3287
else if (iter == NULL) {
3288
char *dataptr_copy[3];
3289
npy_intp stride_copy[3];
3291
int itemsize = op_dtypes[0]->elsize;
3293
/* Execute the loop with just the inner iterator */
3295
/* Only UFUNC_REDUCE uses iter_inner */
3296
NpyIter_IterNextFunc *iternext_inner;
3297
char **dataptr_inner;
3298
npy_intp *stride_inner;
3299
npy_intp count, *count_ptr_inner;
3302
NPY_UF_DBG_PRINT("UFunc: Reduce loop with just inner iterator\n");
3304
iternext_inner = NpyIter_GetIterNext(iter_inner, NULL);
3305
if (iternext_inner == NULL) {
3308
dataptr_inner = NpyIter_GetDataPtrArray(iter_inner);
3309
stride_inner = NpyIter_GetInnerStrideArray(iter_inner);
3310
count_ptr_inner = NpyIter_GetInnerLoopSizePtr(iter_inner);
3312
/* Reset the inner iterator to prepare the buffers */
3313
if (NpyIter_Reset(iter_inner, NULL) != NPY_SUCCEED) {
3317
needs_api = NpyIter_IterationNeedsAPI(iter_inner);
3323
/* Copy the first element to start the reduction */
3324
if (otype == NPY_OBJECT) {
3325
Py_XDECREF(*(PyObject **)dataptr_inner[0]);
3326
*(PyObject **)dataptr_inner[0] =
3327
*(PyObject **)dataptr_inner[1];
3328
Py_XINCREF(*(PyObject **)dataptr_inner[0]);
3331
memcpy(dataptr_inner[0], dataptr_inner[1], itemsize);
3337
count = *count_ptr_inner;
3338
/* Turn the two items into three for the inner loop */
3339
dataptr_copy[0] = dataptr_inner[0];
3340
dataptr_copy[1] = dataptr_inner[1];
3341
dataptr_copy[2] = dataptr_inner[0];
3344
dataptr_copy[1] += stride_inner[1];
3347
stride_copy[1] = stride_inner[1];
3348
NPY_UF_DBG_PRINT1("iterator loop count %d\n", (int)count);
3349
innerloop(dataptr_copy, &count,
3350
stride_copy, innerloopdata);
3351
} while(iternext_inner(iter_inner));
3357
/* Execute the loop with no iterators */
3359
npy_intp count = PyArray_DIM(op[1], axis);
3360
npy_intp stride0 = 0, stride1 = PyArray_STRIDE(op[1], axis);
3362
NPY_UF_DBG_PRINT("UFunc: Reduce loop with no iterators\n");
3364
if (operation == UFUNC_REDUCE) {
3365
if (PyArray_NDIM(op[0]) != 0) {
3366
PyErr_SetString(PyExc_ValueError,
3367
"provided out is the wrong size "
3368
"for the reduction");
3372
else if (operation == UFUNC_ACCUMULATE) {
3373
if (PyArray_NDIM(op[0]) != PyArray_NDIM(op[1]) ||
3374
!PyArray_CompareLists(PyArray_DIMS(op[0]),
3375
PyArray_DIMS(op[1]),
3376
PyArray_NDIM(op[0]))) {
3377
PyErr_SetString(PyExc_ValueError,
3378
"provided out is the wrong size "
3379
"for the reduction");
3382
stride0 = PyArray_STRIDE(op[0], axis);
3385
stride_copy[0] = stride0;
3386
stride_copy[1] = stride1;
3387
stride_copy[2] = stride0;
3389
/* Turn the two items into three for the inner loop */
3390
dataptr_copy[0] = PyArray_BYTES(op[0]);
3391
dataptr_copy[1] = PyArray_BYTES(op[1]);
3392
dataptr_copy[2] = PyArray_BYTES(op[0]);
3394
/* Copy the first element to start the reduction */
3395
if (otype == NPY_OBJECT) {
3396
Py_XDECREF(*(PyObject **)dataptr_copy[0]);
3397
*(PyObject **)dataptr_copy[0] =
3398
*(PyObject **)dataptr_copy[1];
3399
Py_XINCREF(*(PyObject **)dataptr_copy[0]);
3402
memcpy(dataptr_copy[0], dataptr_copy[1], itemsize);
3407
if (operation == UFUNC_REDUCE) {
3408
dataptr_copy[1] += stride1;
3410
else if (operation == UFUNC_ACCUMULATE) {
3411
dataptr_copy[1] += stride1;
3412
dataptr_copy[2] += stride0;
3415
NPY_UF_DBG_PRINT1("iterator loop count %d\n", (int)count);
3417
needs_api = PyDataType_REFCHK(op_dtypes[0]);
3423
innerloop(dataptr_copy, &count,
3424
stride_copy, innerloopdata);
3434
Py_XDECREF(op_dtypes[0]);
3436
NpyIter_Deallocate(iter);
3438
if (iter_inner != NULL) {
3439
NpyIter_Deallocate(iter_inner);
3444
return (PyObject *)out;
3448
Py_XDECREF(op_dtypes[0]);
3451
NpyIter_Deallocate(iter);
3453
if (iter_inner != NULL) {
3454
NpyIter_Deallocate(iter_inner);
2681
3463
* We have two basic kinds of loops. One is used when arr is not-swapped
2682
3464
* and aligned and output type is the same as input type. The other uses