2
// Copyright 2012 Francisco Jerez
4
// Permission is hereby granted, free of charge, to any person obtaining a
5
// copy of this software and associated documentation files (the "Software"),
6
// to deal in the Software without restriction, including without limitation
7
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
// and/or sell copies of the Software, and to permit persons to whom the
9
// Software is furnished to do so, subject to the following conditions:
11
// The above copyright notice and this permission notice shall be included in
12
// all copies or substantial portions of the Software.
14
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17
// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19
// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
#include "api/util.hpp"
24
#include "core/kernel.hpp"
25
#include "core/event.hpp"
27
using namespace clover;
30
clCreateKernel(cl_program prog, const char *name,
31
cl_int *errcode_ret) try {
33
throw error(CL_INVALID_PROGRAM);
36
throw error(CL_INVALID_VALUE);
38
if (prog->binaries().empty())
39
throw error(CL_INVALID_PROGRAM_EXECUTABLE);
41
auto sym = prog->binaries().begin()->second.sym(name);
43
ret_error(errcode_ret, CL_SUCCESS);
44
return new kernel(*prog, name, { sym.args.begin(), sym.args.end() });
46
} catch (module::noent_error &e) {
47
ret_error(errcode_ret, CL_INVALID_KERNEL_NAME);
51
ret_error(errcode_ret, e);
56
clCreateKernelsInProgram(cl_program prog, cl_uint count,
57
cl_kernel *kerns, cl_uint *count_ret) {
59
throw error(CL_INVALID_PROGRAM);
61
if (prog->binaries().empty())
62
throw error(CL_INVALID_PROGRAM_EXECUTABLE);
64
auto &syms = prog->binaries().begin()->second.syms;
66
if (kerns && count < syms.size())
67
throw error(CL_INVALID_VALUE);
70
std::transform(syms.begin(), syms.end(), kerns,
71
[=](const module::symbol &sym) {
72
return new kernel(*prog, compat::string(sym.name),
73
{ sym.args.begin(), sym.args.end() });
77
*count_ret = syms.size();
83
clRetainKernel(cl_kernel kern) {
85
return CL_INVALID_KERNEL;
92
clReleaseKernel(cl_kernel kern) {
94
return CL_INVALID_KERNEL;
103
clSetKernelArg(cl_kernel kern, cl_uint idx, size_t size,
104
const void *value) try {
106
throw error(CL_INVALID_KERNEL);
108
if (idx >= kern->args.size())
109
throw error(CL_INVALID_ARG_INDEX);
111
kern->args[idx]->set(size, value);
120
clGetKernelInfo(cl_kernel kern, cl_kernel_info param,
121
size_t size, void *buf, size_t *size_ret) {
123
return CL_INVALID_KERNEL;
126
case CL_KERNEL_FUNCTION_NAME:
127
return string_property(buf, size, size_ret, kern->name());
129
case CL_KERNEL_NUM_ARGS:
130
return scalar_property<cl_uint>(buf, size, size_ret,
133
case CL_KERNEL_REFERENCE_COUNT:
134
return scalar_property<cl_uint>(buf, size, size_ret,
137
case CL_KERNEL_CONTEXT:
138
return scalar_property<cl_context>(buf, size, size_ret,
141
case CL_KERNEL_PROGRAM:
142
return scalar_property<cl_program>(buf, size, size_ret,
146
return CL_INVALID_VALUE;
151
clGetKernelWorkGroupInfo(cl_kernel kern, cl_device_id dev,
152
cl_kernel_work_group_info param,
153
size_t size, void *buf, size_t *size_ret) {
155
return CL_INVALID_KERNEL;
157
if ((!dev && kern->prog.binaries().size() != 1) ||
158
(dev && !kern->prog.binaries().count(dev)))
159
return CL_INVALID_DEVICE;
162
case CL_KERNEL_WORK_GROUP_SIZE:
163
return scalar_property<size_t>(buf, size, size_ret,
164
kern->max_block_size());
166
case CL_KERNEL_COMPILE_WORK_GROUP_SIZE:
167
return vector_property<size_t>(buf, size, size_ret,
170
case CL_KERNEL_LOCAL_MEM_SIZE:
171
return scalar_property<cl_ulong>(buf, size, size_ret,
174
case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
175
return scalar_property<size_t>(buf, size, size_ret, 1);
177
case CL_KERNEL_PRIVATE_MEM_SIZE:
178
return scalar_property<cl_ulong>(buf, size, size_ret,
179
kern->mem_private());
182
return CL_INVALID_VALUE;
188
/// Common argument checking shared by kernel invocation commands.
191
kernel_validate(cl_command_queue q, cl_kernel kern,
192
cl_uint dims, const size_t *grid_offset,
193
const size_t *grid_size, const size_t *block_size,
194
cl_uint num_deps, const cl_event *deps,
197
throw error(CL_INVALID_COMMAND_QUEUE);
200
throw error(CL_INVALID_KERNEL);
202
if (&kern->prog.ctx != &q->ctx ||
203
any_of([&](const cl_event ev) {
204
return &ev->ctx != &q->ctx;
205
}, deps, deps + num_deps))
206
throw error(CL_INVALID_CONTEXT);
208
if (bool(num_deps) != bool(deps) ||
209
any_of(is_zero<cl_event>(), deps, deps + num_deps))
210
throw error(CL_INVALID_EVENT_WAIT_LIST);
212
if (any_of([](std::unique_ptr<kernel::argument> &arg) {
214
}, kern->args.begin(), kern->args.end()))
215
throw error(CL_INVALID_KERNEL_ARGS);
217
if (!kern->prog.binaries().count(&q->dev))
218
throw error(CL_INVALID_PROGRAM_EXECUTABLE);
220
if (dims < 1 || dims > q->dev.max_block_size().size())
221
throw error(CL_INVALID_WORK_DIMENSION);
223
if (!grid_size || any_of(is_zero<size_t>(), grid_size, grid_size + dims))
224
throw error(CL_INVALID_GLOBAL_WORK_SIZE);
227
if (any_of([](size_t b, size_t max) {
228
return b == 0 || b > max;
229
}, block_size, block_size + dims,
230
q->dev.max_block_size().begin()))
231
throw error(CL_INVALID_WORK_ITEM_SIZE);
233
if (any_of([](size_t b, size_t g) {
235
}, block_size, block_size + dims, grid_size))
236
throw error(CL_INVALID_WORK_GROUP_SIZE);
238
if (fold(std::multiplies<size_t>(), 1u,
239
block_size, block_size + dims) >
240
q->dev.max_threads_per_block())
241
throw error(CL_INVALID_WORK_GROUP_SIZE);
246
/// Common event action shared by kernel invocation commands.
248
std::function<void (event &)>
249
kernel_op(cl_command_queue q, cl_kernel kern,
250
const std::vector<size_t> &grid_offset,
251
const std::vector<size_t> &grid_size,
252
const std::vector<size_t> &block_size) {
253
const std::vector<size_t> reduced_grid_size = map(
254
std::divides<size_t>(), grid_size.begin(), grid_size.end(),
257
return [=](event &) {
258
kern->launch(*q, grid_offset, reduced_grid_size, block_size);
262
template<typename T, typename S>
264
opt_vector(const T *p, S n) {
273
clEnqueueNDRangeKernel(cl_command_queue q, cl_kernel kern,
274
cl_uint dims, const size_t *pgrid_offset,
275
const size_t *pgrid_size, const size_t *pblock_size,
276
cl_uint num_deps, const cl_event *deps,
278
const std::vector<size_t> grid_offset = opt_vector(pgrid_offset, dims);
279
const std::vector<size_t> grid_size = opt_vector(pgrid_size, dims);
280
const std::vector<size_t> block_size = opt_vector(pblock_size, dims);
282
kernel_validate(q, kern, dims, pgrid_offset, pgrid_size, pblock_size,
285
hard_event *hev = new hard_event(
286
*q, CL_COMMAND_NDRANGE_KERNEL, { deps, deps + num_deps },
287
kernel_op(q, kern, grid_offset, grid_size, block_size));
297
clEnqueueTask(cl_command_queue q, cl_kernel kern,
298
cl_uint num_deps, const cl_event *deps,
300
const std::vector<size_t> grid_offset = { 0 };
301
const std::vector<size_t> grid_size = { 1 };
302
const std::vector<size_t> block_size = { 1 };
304
kernel_validate(q, kern, 1, grid_offset.data(), grid_size.data(),
305
block_size.data(), num_deps, deps, ev);
307
hard_event *hev = new hard_event(
308
*q, CL_COMMAND_TASK, { deps, deps + num_deps },
309
kernel_op(q, kern, grid_offset, grid_size, block_size));
319
clEnqueueNativeKernel(cl_command_queue q, void (*func)(void *),
320
void *args, size_t args_size,
321
cl_uint obj_count, const cl_mem *obj_list,
322
const void **obj_args, cl_uint num_deps,
323
const cl_event *deps, cl_event *ev) {
324
return CL_INVALID_OPERATION;