1
// Numexpr - Fast numerical array expression evaluator for NumPy.
4
// Author: See AUTHORS.txt
6
// See LICENSE.txt for details about copyright and rights to use.
8
// module.cpp contains the CPython-specific module exposure.
10
#define DO_NUMPY_IMPORT_ARRAY
13
#include <structmember.h>
16
#include "interpreter.hpp"
17
#include "numexpr_object.hpp"
21
// Global state. The file interpreter.hpp also has some global state
22
// in its 'th_params' variable
26
/* Do the worker job for a certain thread */
27
void *th_worker(void *tidptr)
29
int tid = *(int *)tidptr;
30
/* Parameters for threads */
44
npy_intp istart, iend;
46
// For output buffering if needed
47
vector<char> out_buffer;
51
gs.init_sentinels_done = 0; /* sentinels have to be initialised yet */
53
/* Meeting point for all threads (wait for initialization) */
54
pthread_mutex_lock(&gs.count_threads_mutex);
55
if (gs.count_threads < gs.nthreads) {
57
pthread_cond_wait(&gs.count_threads_cv, &gs.count_threads_mutex);
60
pthread_cond_broadcast(&gs.count_threads_cv);
62
pthread_mutex_unlock(&gs.count_threads_mutex);
64
/* Check if thread has been asked to return */
69
/* Get parameters for this thread before entering the main loop */
70
start = th_params.start;
71
vlen = th_params.vlen;
72
block_size = th_params.block_size;
73
params = th_params.params;
74
pc_error = th_params.pc_error;
76
// If output buffering is needed, allocate it
77
if (th_params.need_output_buffering) {
78
out_buffer.resize(params.memsizes[0] * BLOCK_SIZE1);
79
params.out_buffer = &out_buffer[0];
81
params.out_buffer = NULL;
84
/* Populate private data for each thread */
85
n_inputs = params.n_inputs;
86
n_constants = params.n_constants;
87
n_temps = params.n_temps;
88
memsize = (1+n_inputs+n_constants+n_temps) * sizeof(char *);
89
/* XXX malloc seems thread safe for POSIX, but for Win? */
90
mem = (char **)malloc(memsize);
91
memcpy(mem, params.mem, memsize);
93
errmsg = th_params.errmsg;
97
/* Loop over blocks */
98
pthread_mutex_lock(&gs.count_mutex);
99
if (!gs.init_sentinels_done) {
100
/* Set sentinels and other global variables */
103
iend = istart + block_size;
107
gs.init_sentinels_done = 1; /* sentinels have been initialised */
108
gs.giveup = 0; /* no giveup initially */
110
gs.gindex += block_size;
112
iend = istart + block_size;
117
/* Grab one of the iterators */
118
iter = th_params.iter[tid];
120
th_params.ret_code = -1;
123
memsteps = th_params.memsteps[tid];
124
/* Get temporary space for each thread */
125
ret = get_temps_space(params, mem, BLOCK_SIZE1);
127
/* Propagate error to main thread */
128
th_params.ret_code = ret;
131
pthread_mutex_unlock(&gs.count_mutex);
133
while (istart < vlen && !gs.giveup) {
134
/* Reset the iterator to the range for this task */
135
ret = NpyIter_ResetToIterIndexRange(iter, istart, iend,
137
/* Execute the task */
139
ret = vm_engine_iter_task(iter, memsteps, params, pc_error, errmsg);
143
pthread_mutex_lock(&gs.count_mutex);
145
/* Propagate error to main thread */
146
th_params.ret_code = ret;
147
pthread_mutex_unlock(&gs.count_mutex);
151
pthread_mutex_lock(&gs.count_mutex);
152
gs.gindex += block_size;
154
iend = istart + block_size;
158
pthread_mutex_unlock(&gs.count_mutex);
161
/* Meeting point for all threads (wait for finalization) */
162
pthread_mutex_lock(&gs.count_threads_mutex);
163
if (gs.count_threads > 0) {
165
pthread_cond_wait(&gs.count_threads_cv, &gs.count_threads_mutex);
168
pthread_cond_broadcast(&gs.count_threads_cv);
170
pthread_mutex_unlock(&gs.count_threads_mutex);
172
/* Release resources */
173
free_temps_space(params, mem);
176
} /* closes while(1) */
178
/* This should never be reached, but anyway */
182
/* Initialize threads */
183
int init_threads(void)
187
/* Initialize mutex and condition variable objects */
188
pthread_mutex_init(&gs.count_mutex, NULL);
190
/* Barrier initialization */
191
pthread_mutex_init(&gs.count_threads_mutex, NULL);
192
pthread_cond_init(&gs.count_threads_cv, NULL);
193
gs.count_threads = 0; /* Reset threads counter */
195
/* Finally, create the threads */
196
for (tid = 0; tid < gs.nthreads; tid++) {
198
rc = pthread_create(&gs.threads[tid], NULL, th_worker,
199
(void *)&gs.tids[tid]);
202
"ERROR; return code from pthread_create() is %d\n", rc);
203
fprintf(stderr, "\tError detail: %s\n", strerror(rc));
208
gs.init_threads_done = 1; /* Initialization done! */
209
gs.pid = (int)getpid(); /* save the PID for this process */
214
/* Set the number of threads in numexpr's VM */
215
int numexpr_set_nthreads(int nthreads_new)
217
int nthreads_old = gs.nthreads;
221
if (nthreads_new > MAX_THREADS) {
223
"Error. nthreads cannot be larger than MAX_THREADS (%d)",
227
else if (nthreads_new <= 0) {
228
fprintf(stderr, "Error. nthreads must be a positive integer");
232
/* Only join threads if they are not initialized or if our PID is
233
different from that in pid var (probably means that we are a
234
subprocess, and thus threads are non-existent). */
235
if (gs.nthreads > 1 && gs.init_threads_done && gs.pid == getpid()) {
236
/* Tell all existing threads to finish */
238
pthread_mutex_lock(&gs.count_threads_mutex);
239
if (gs.count_threads < gs.nthreads) {
241
pthread_cond_wait(&gs.count_threads_cv, &gs.count_threads_mutex);
244
pthread_cond_broadcast(&gs.count_threads_cv);
246
pthread_mutex_unlock(&gs.count_threads_mutex);
248
/* Join exiting threads */
249
for (t=0; t<gs.nthreads; t++) {
250
rc = pthread_join(gs.threads[t], &status);
253
"ERROR; return code from pthread_join() is %d\n",
255
fprintf(stderr, "\tError detail: %s\n", strerror(rc));
259
gs.init_threads_done = 0;
263
/* Launch a new pool of threads (if necessary) */
264
gs.nthreads = nthreads_new;
265
if (gs.nthreads > 1 && (!gs.init_threads_done || gs.pid != getpid())) {
276
_get_vml_version(PyObject *self, PyObject *args)
280
MKL_Get_Version_String(buf, len);
281
return Py_BuildValue("s", buf);
285
_set_vml_accuracy_mode(PyObject *self, PyObject *args)
287
int mode_in, mode_old;
288
if (!PyArg_ParseTuple(args, "i", &mode_in))
290
mode_old = vmlGetMode() & VML_ACCURACY_MASK;
291
vmlSetMode((mode_in & VML_ACCURACY_MASK) | VML_ERRMODE_IGNORE );
292
return Py_BuildValue("i", mode_old);
296
_set_vml_num_threads(PyObject *self, PyObject *args)
299
if (!PyArg_ParseTuple(args, "i", &max_num_threads))
301
mkl_domain_set_num_threads(max_num_threads, MKL_VML);
308
_set_num_threads(PyObject *self, PyObject *args)
310
int num_threads, nthreads_old;
311
if (!PyArg_ParseTuple(args, "i", &num_threads))
313
nthreads_old = numexpr_set_nthreads(num_threads);
314
return Py_BuildValue("i", nthreads_old);
317
static PyMethodDef module_methods[] = {
319
{"_get_vml_version", _get_vml_version, METH_VARARGS,
320
"Get the VML/MKL library version."},
321
{"_set_vml_accuracy_mode", _set_vml_accuracy_mode, METH_VARARGS,
322
"Set accuracy mode for VML functions."},
323
{"_set_vml_num_threads", _set_vml_num_threads, METH_VARARGS,
324
"Suggests a maximum number of threads to be used in VML operations."},
326
{"_set_num_threads", _set_num_threads, METH_VARARGS,
327
"Suggests a maximum number of threads to be used in operations."},
332
add_symbol(PyObject *d, const char *sname, int name, const char* routine_name)
341
o = PyLong_FromLong(name);
342
s = PyBytes_FromString(sname);
344
PyErr_SetString(PyExc_RuntimeError, routine_name);
347
r = PyDict_SetItem(d, s, o);
356
#if PY_MAJOR_VERSION >= 3
358
/* XXX: handle the "global_state" state via moduedef */
359
static struct PyModuleDef moduledef = {
360
PyModuleDef_HEAD_INIT,
363
-1, /* sizeof(struct global_state), */
366
NULL, /* module_traverse, */
367
NULL, /* module_clear, */
371
#define INITERROR return NULL
374
PyInit_interpreter(void)
377
#define INITERROR return
385
if (PyType_Ready(&NumExprType) < 0)
388
#if PY_MAJOR_VERSION >= 3
389
m = PyModule_Create(&moduledef);
391
m = Py_InitModule3("interpreter", module_methods, NULL);
397
Py_INCREF(&NumExprType);
398
PyModule_AddObject(m, "NumExpr", (PyObject *)&NumExprType);
405
#define OPCODE(n, name, sname, ...) \
406
if (add_symbol(d, sname, name, "add_op") < 0) { INITERROR; }
407
#include "opcodes.hpp"
410
if (PyModule_AddObject(m, "opcodes", d) < 0) INITERROR;
415
#define add_func(name, sname) \
416
if (add_symbol(d, sname, name, "add_func") < 0) { INITERROR; }
417
#define FUNC_FF(name, sname, ...) add_func(name, sname);
418
#define FUNC_FFF(name, sname, ...) add_func(name, sname);
419
#define FUNC_DD(name, sname, ...) add_func(name, sname);
420
#define FUNC_DDD(name, sname, ...) add_func(name, sname);
421
#define FUNC_CC(name, sname, ...) add_func(name, sname);
422
#define FUNC_CCC(name, sname, ...) add_func(name, sname);
423
#include "functions.hpp"
433
if (PyModule_AddObject(m, "funccodes", d) < 0) INITERROR;
435
if (PyModule_AddObject(m, "allaxes", PyLong_FromLong(255)) < 0) INITERROR;
436
if (PyModule_AddObject(m, "maxdims", PyLong_FromLong(NPY_MAXDIMS)) < 0) INITERROR;
438
#if PY_MAJOR_VERSION >= 3