2
// Copyright 2012 Francisco Jerez
4
// Permission is hereby granted, free of charge, to any person obtaining a
5
// copy of this software and associated documentation files (the "Software"),
6
// to deal in the Software without restriction, including without limitation
7
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
// and/or sell copies of the Software, and to permit persons to whom the
9
// Software is furnished to do so, subject to the following conditions:
11
// The above copyright notice and this permission notice shall be included in
12
// all copies or substantial portions of the Software.
14
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20
// OTHER DEALINGS IN THE SOFTWARE.
25
#include "util/bitscan.h"
27
#include "api/dispatch.hpp"
28
#include "api/util.hpp"
29
#include "core/event.hpp"
30
#include "core/memory.hpp"
32
using namespace clover;
35
typedef resource::vector vector_t;
38
vector(const size_t *p) {
40
throw error(CL_INVALID_VALUE);
45
pitch(const vector_t ®ion, vector_t pitch) {
46
for (auto x : zip(tail(pitch),
47
map(multiplies(), region, pitch))) {
48
// The spec defines a value of zero as the natural pitch,
49
// i.e. the unaligned size of the previous dimension.
50
if (std::get<0>(x) == 0)
51
std::get<0>(x) = std::get<1>(x);
58
/// Size of a region in bytes.
61
size(const vector_t &pitch, const vector_t ®ion) {
62
if (any_of(is_zero(), region))
65
return dot(pitch, region - vector_t{ 0, 1, 1 });
69
/// Common argument checking shared by memory transfer commands.
72
validate_common(command_queue &q,
73
const ref_vector<event> &deps) {
74
if (any_of([&](const event &ev) {
75
return ev.context() != q.context();
77
throw error(CL_INVALID_CONTEXT);
81
/// Common error checking for a buffer object argument.
84
validate_object(command_queue &q, buffer &mem, const vector_t &origin,
85
const vector_t &pitch, const vector_t ®ion) {
86
if (mem.context() != q.context())
87
throw error(CL_INVALID_CONTEXT);
89
// The region must fit within the specified pitch,
90
if (any_of(greater(), map(multiplies(), pitch, region), tail(pitch)))
91
throw error(CL_INVALID_VALUE);
93
// ...and within the specified object.
94
if (dot(pitch, origin) + size(pitch, region) > mem.size())
95
throw error(CL_INVALID_VALUE);
97
if (any_of(is_zero(), region))
98
throw error(CL_INVALID_VALUE);
102
/// Common error checking for an image argument.
105
validate_object(command_queue &q, image &img,
106
const vector_t &orig, const vector_t ®ion) {
107
size_t height = img.type() == CL_MEM_OBJECT_IMAGE1D_ARRAY ? img.array_size() : img.height();
108
size_t depth = img.type() == CL_MEM_OBJECT_IMAGE2D_ARRAY ? img.array_size() : img.depth();
109
vector_t size = { img.width(), height, depth };
110
const auto &dev = q.device();
112
if (!dev.image_support())
113
throw error(CL_INVALID_OPERATION);
115
if (img.context() != q.context())
116
throw error(CL_INVALID_CONTEXT);
118
if (any_of(greater(), orig + region, size))
119
throw error(CL_INVALID_VALUE);
121
if (any_of(is_zero(), region))
122
throw error(CL_INVALID_VALUE);
124
switch (img.type()) {
125
case CL_MEM_OBJECT_IMAGE1D: {
126
const size_t max = dev.max_image_size();
127
if (img.width() > max)
128
throw error(CL_INVALID_IMAGE_SIZE);
131
case CL_MEM_OBJECT_IMAGE1D_ARRAY: {
132
const size_t max_size = dev.max_image_size();
133
const size_t max_array = dev.max_image_array_number();
134
if (img.width() > max_size || img.array_size() > max_array)
135
throw error(CL_INVALID_IMAGE_SIZE);
138
case CL_MEM_OBJECT_IMAGE2D: {
139
const size_t max = dev.max_image_size();
140
if (img.width() > max || img.height() > max)
141
throw error(CL_INVALID_IMAGE_SIZE);
144
case CL_MEM_OBJECT_IMAGE2D_ARRAY: {
145
const size_t max_size = dev.max_image_size();
146
const size_t max_array = dev.max_image_array_number();
147
if (img.width() > max_size || img.height() > max_size || img.array_size() > max_array)
148
throw error(CL_INVALID_IMAGE_SIZE);
151
case CL_MEM_OBJECT_IMAGE3D: {
152
const size_t max = dev.max_image_size_3d();
153
if (img.width() > max || img.height() > max || img.depth() > max)
154
throw error(CL_INVALID_IMAGE_SIZE);
157
// XXX: Implement missing checks once Clover supports more image types.
159
throw error(CL_INVALID_IMAGE_SIZE);
164
/// Common error checking for a host pointer argument.
167
validate_object(command_queue &q, const void *ptr, const vector_t &orig,
168
const vector_t &pitch, const vector_t ®ion) {
170
throw error(CL_INVALID_VALUE);
172
// The region must fit within the specified pitch.
173
if (any_of(greater(), map(multiplies(), pitch, region), tail(pitch)))
174
throw error(CL_INVALID_VALUE);
178
/// Common argument checking for a copy between two buffer objects.
181
validate_copy(command_queue &q, buffer &dst_mem,
182
const vector_t &dst_orig, const vector_t &dst_pitch,
184
const vector_t &src_orig, const vector_t &src_pitch,
185
const vector_t ®ion) {
186
if (dst_mem == src_mem) {
187
auto dst_offset = dot(dst_pitch, dst_orig);
188
auto src_offset = dot(src_pitch, src_orig);
190
if (interval_overlaps()(
191
dst_offset, dst_offset + size(dst_pitch, region),
192
src_offset, src_offset + size(src_pitch, region)))
193
throw error(CL_MEM_COPY_OVERLAP);
198
/// Common argument checking for a copy between two image objects.
201
validate_copy(command_queue &q,
202
image &dst_img, const vector_t &dst_orig,
203
image &src_img, const vector_t &src_orig,
204
const vector_t ®ion) {
205
if (dst_img.format() != src_img.format())
206
throw error(CL_IMAGE_FORMAT_MISMATCH);
208
if (dst_img == src_img) {
209
if (all_of(interval_overlaps(),
210
dst_orig, dst_orig + region,
211
src_orig, src_orig + region))
212
throw error(CL_MEM_COPY_OVERLAP);
217
/// Checks that the host access flags of the memory object are
218
/// within the allowed set \a flags.
221
validate_object_access(const memory_obj &mem, const cl_mem_flags flags) {
222
if (mem.flags() & ~flags &
223
(CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY |
224
CL_MEM_HOST_NO_ACCESS))
225
throw error(CL_INVALID_OPERATION);
229
/// Checks that the mapping flags are correct.
232
validate_map_flags(const memory_obj &mem, const cl_map_flags flags) {
233
if ((flags & (CL_MAP_WRITE | CL_MAP_READ)) &&
234
(flags & CL_MAP_WRITE_INVALIDATE_REGION))
235
throw error(CL_INVALID_VALUE);
237
if (flags & CL_MAP_READ)
238
validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
240
if (flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION))
241
validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
245
/// Checks that the memory migration flags are correct.
248
validate_mem_migration_flags(const cl_mem_migration_flags flags) {
249
const cl_mem_migration_flags valid =
250
CL_MIGRATE_MEM_OBJECT_HOST |
251
CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED;
254
throw error(CL_INVALID_VALUE);
258
/// Class that encapsulates the task of mapping an object of type
259
/// \a T. The return value of get() should be implicitly
260
/// convertible to \a void *.
266
struct _map<image*> {
267
_map(command_queue &q, image *img, cl_map_flags flags,
268
vector_t offset, vector_t pitch, vector_t region) :
269
map(q, img->resource_in(q), flags, true, offset, region),
274
operator T *() const {
275
return static_cast<T *>(map);
283
struct _map<buffer*> {
284
_map(command_queue &q, buffer *mem, cl_map_flags flags,
285
vector_t offset, vector_t pitch, vector_t region) :
286
map(q, mem->resource_in(q), flags, true,
287
{{ dot(pitch, offset) }}, {{ size(pitch, region) }}),
292
operator T *() const {
293
return static_cast<T *>(map);
302
_map(command_queue &q, P *ptr, cl_map_flags flags,
303
vector_t offset, vector_t pitch, vector_t region) :
304
ptr((P *)((char *)ptr + dot(pitch, offset))), pitch(pitch)
308
operator T *() const {
309
return static_cast<T *>(ptr);
317
/// Software copy from \a src_obj to \a dst_obj. They can be
318
/// either pointers or memory objects.
320
template<typename T, typename S>
321
std::function<void (event &)>
322
soft_copy_op(command_queue &q,
323
T dst_obj, const vector_t &dst_orig, const vector_t &dst_pitch,
324
S src_obj, const vector_t &src_orig, const vector_t &src_pitch,
325
const vector_t ®ion) {
326
return [=, &q](event &) {
327
_map<T> dst = { q, dst_obj, CL_MAP_WRITE,
328
dst_orig, dst_pitch, region };
329
_map<S> src = { q, src_obj, CL_MAP_READ,
330
src_orig, src_pitch, region };
331
assert(src.pitch[0] == dst.pitch[0]);
334
for (v[2] = 0; v[2] < region[2]; ++v[2]) {
335
for (v[1] = 0; v[1] < region[1]; ++v[1]) {
337
static_cast<char *>(dst) + dot(dst.pitch, v),
338
static_cast<const char *>(src) + dot(src.pitch, v),
339
src.pitch[0] * region[0]);
346
/// Hardware copy from \a src_obj to \a dst_obj.
348
template<typename T, typename S>
349
std::function<void (event &)>
350
hard_copy_op(command_queue &q, T dst_obj, const vector_t &dst_orig,
351
S src_obj, const vector_t &src_orig, const vector_t ®ion) {
352
return [=, &q](event &) {
353
dst_obj->resource_in(q).copy(q, dst_orig, region,
354
src_obj->resource_in(q), src_orig);
360
clEnqueueReadBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
361
size_t offset, size_t size, void *ptr,
362
cl_uint num_deps, const cl_event *d_deps,
363
cl_event *rd_ev) try {
365
auto &mem = obj<buffer>(d_mem);
366
auto deps = objs<wait_list_tag>(d_deps, num_deps);
367
vector_t region = { size, 1, 1 };
368
vector_t obj_origin = { offset };
369
auto obj_pitch = pitch(region, {{ 1 }});
371
validate_common(q, deps);
372
validate_object(q, ptr, {}, obj_pitch, region);
373
validate_object(q, mem, obj_origin, obj_pitch, region);
374
validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
376
auto hev = create<hard_event>(
377
q, CL_COMMAND_READ_BUFFER, deps,
378
soft_copy_op(q, ptr, {}, obj_pitch,
379
&mem, obj_origin, obj_pitch,
383
hev().wait_signalled();
385
ret_object(rd_ev, hev);
393
clEnqueueWriteBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
394
size_t offset, size_t size, const void *ptr,
395
cl_uint num_deps, const cl_event *d_deps,
396
cl_event *rd_ev) try {
398
auto &mem = obj<buffer>(d_mem);
399
auto deps = objs<wait_list_tag>(d_deps, num_deps);
400
vector_t region = { size, 1, 1 };
401
vector_t obj_origin = { offset };
402
auto obj_pitch = pitch(region, {{ 1 }});
404
validate_common(q, deps);
405
validate_object(q, mem, obj_origin, obj_pitch, region);
406
validate_object(q, ptr, {}, obj_pitch, region);
407
validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
409
auto hev = create<hard_event>(
410
q, CL_COMMAND_WRITE_BUFFER, deps,
411
soft_copy_op(q, &mem, obj_origin, obj_pitch,
416
hev().wait_signalled();
418
ret_object(rd_ev, hev);
426
clEnqueueReadBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
427
const size_t *p_obj_origin,
428
const size_t *p_host_origin,
429
const size_t *p_region,
430
size_t obj_row_pitch, size_t obj_slice_pitch,
431
size_t host_row_pitch, size_t host_slice_pitch,
433
cl_uint num_deps, const cl_event *d_deps,
434
cl_event *rd_ev) try {
436
auto &mem = obj<buffer>(d_mem);
437
auto deps = objs<wait_list_tag>(d_deps, num_deps);
438
auto region = vector(p_region);
439
auto obj_origin = vector(p_obj_origin);
440
auto obj_pitch = pitch(region, {{ 1, obj_row_pitch, obj_slice_pitch }});
441
auto host_origin = vector(p_host_origin);
442
auto host_pitch = pitch(region, {{ 1, host_row_pitch, host_slice_pitch }});
444
validate_common(q, deps);
445
validate_object(q, ptr, host_origin, host_pitch, region);
446
validate_object(q, mem, obj_origin, obj_pitch, region);
447
validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
449
auto hev = create<hard_event>(
450
q, CL_COMMAND_READ_BUFFER_RECT, deps,
451
soft_copy_op(q, ptr, host_origin, host_pitch,
452
&mem, obj_origin, obj_pitch,
456
hev().wait_signalled();
458
ret_object(rd_ev, hev);
466
clEnqueueWriteBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
467
const size_t *p_obj_origin,
468
const size_t *p_host_origin,
469
const size_t *p_region,
470
size_t obj_row_pitch, size_t obj_slice_pitch,
471
size_t host_row_pitch, size_t host_slice_pitch,
473
cl_uint num_deps, const cl_event *d_deps,
474
cl_event *rd_ev) try {
476
auto &mem = obj<buffer>(d_mem);
477
auto deps = objs<wait_list_tag>(d_deps, num_deps);
478
auto region = vector(p_region);
479
auto obj_origin = vector(p_obj_origin);
480
auto obj_pitch = pitch(region, {{ 1, obj_row_pitch, obj_slice_pitch }});
481
auto host_origin = vector(p_host_origin);
482
auto host_pitch = pitch(region, {{ 1, host_row_pitch, host_slice_pitch }});
484
validate_common(q, deps);
485
validate_object(q, mem, obj_origin, obj_pitch, region);
486
validate_object(q, ptr, host_origin, host_pitch, region);
487
validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
489
auto hev = create<hard_event>(
490
q, CL_COMMAND_WRITE_BUFFER_RECT, deps,
491
soft_copy_op(q, &mem, obj_origin, obj_pitch,
492
ptr, host_origin, host_pitch,
496
hev().wait_signalled();
498
ret_object(rd_ev, hev);
506
clEnqueueFillBuffer(cl_command_queue d_queue, cl_mem d_mem,
507
const void *pattern, size_t pattern_size,
508
size_t offset, size_t size,
509
cl_uint num_deps, const cl_event *d_deps,
510
cl_event *rd_ev) try {
511
auto &q = obj(d_queue);
512
auto &mem = obj<buffer>(d_mem);
513
auto deps = objs<wait_list_tag>(d_deps, num_deps);
514
vector_t region = { size, 1, 1 };
515
vector_t origin = { offset };
516
auto dst_pitch = pitch(region, {{ 1 }});
518
validate_common(q, deps);
519
validate_object(q, mem, origin, dst_pitch, region);
522
return CL_INVALID_VALUE;
524
if (!util_is_power_of_two_nonzero(pattern_size) ||
525
pattern_size > 128 || size % pattern_size
526
|| offset % pattern_size) {
527
return CL_INVALID_VALUE;
530
auto sub = dynamic_cast<sub_buffer *>(&mem);
531
if (sub && sub->offset() % q.device().mem_base_addr_align()) {
532
return CL_MISALIGNED_SUB_BUFFER_OFFSET;
535
std::string data = std::string((char *)pattern, pattern_size);
536
auto hev = create<hard_event>(
537
q, CL_COMMAND_FILL_BUFFER, deps,
538
[=, &q, &mem](event &) {
539
mem.resource_in(q).clear(q, origin, region, data);
542
ret_object(rd_ev, hev);
550
clEnqueueCopyBuffer(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem,
551
size_t src_offset, size_t dst_offset, size_t size,
552
cl_uint num_deps, const cl_event *d_deps,
553
cl_event *rd_ev) try {
555
auto &src_mem = obj<buffer>(d_src_mem);
556
auto &dst_mem = obj<buffer>(d_dst_mem);
557
auto deps = objs<wait_list_tag>(d_deps, num_deps);
558
vector_t region = { size, 1, 1 };
559
vector_t dst_origin = { dst_offset };
560
auto dst_pitch = pitch(region, {{ 1 }});
561
vector_t src_origin = { src_offset };
562
auto src_pitch = pitch(region, {{ 1 }});
564
validate_common(q, deps);
565
validate_object(q, dst_mem, dst_origin, dst_pitch, region);
566
validate_object(q, src_mem, src_origin, src_pitch, region);
567
validate_copy(q, dst_mem, dst_origin, dst_pitch,
568
src_mem, src_origin, src_pitch, region);
570
auto hev = create<hard_event>(
571
q, CL_COMMAND_COPY_BUFFER, deps,
572
hard_copy_op(q, &dst_mem, dst_origin,
573
&src_mem, src_origin, region));
575
ret_object(rd_ev, hev);
583
clEnqueueCopyBufferRect(cl_command_queue d_q, cl_mem d_src_mem,
585
const size_t *p_src_origin, const size_t *p_dst_origin,
586
const size_t *p_region,
587
size_t src_row_pitch, size_t src_slice_pitch,
588
size_t dst_row_pitch, size_t dst_slice_pitch,
589
cl_uint num_deps, const cl_event *d_deps,
590
cl_event *rd_ev) try {
592
auto &src_mem = obj<buffer>(d_src_mem);
593
auto &dst_mem = obj<buffer>(d_dst_mem);
594
auto deps = objs<wait_list_tag>(d_deps, num_deps);
595
auto region = vector(p_region);
596
auto dst_origin = vector(p_dst_origin);
597
auto dst_pitch = pitch(region, {{ 1, dst_row_pitch, dst_slice_pitch }});
598
auto src_origin = vector(p_src_origin);
599
auto src_pitch = pitch(region, {{ 1, src_row_pitch, src_slice_pitch }});
601
validate_common(q, deps);
602
validate_object(q, dst_mem, dst_origin, dst_pitch, region);
603
validate_object(q, src_mem, src_origin, src_pitch, region);
604
validate_copy(q, dst_mem, dst_origin, dst_pitch,
605
src_mem, src_origin, src_pitch, region);
607
auto hev = create<hard_event>(
608
q, CL_COMMAND_COPY_BUFFER_RECT, deps,
609
soft_copy_op(q, &dst_mem, dst_origin, dst_pitch,
610
&src_mem, src_origin, src_pitch,
613
ret_object(rd_ev, hev);
621
clEnqueueReadImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
622
const size_t *p_origin, const size_t *p_region,
623
size_t row_pitch, size_t slice_pitch, void *ptr,
624
cl_uint num_deps, const cl_event *d_deps,
625
cl_event *rd_ev) try {
627
auto &img = obj<image>(d_mem);
628
auto deps = objs<wait_list_tag>(d_deps, num_deps);
629
auto region = vector(p_region);
630
auto dst_pitch = pitch(region, {{ img.pixel_size(),
631
row_pitch, slice_pitch }});
632
auto src_origin = vector(p_origin);
633
auto src_pitch = pitch(region, {{ img.pixel_size(),
634
img.row_pitch(), img.slice_pitch() }});
636
validate_common(q, deps);
637
validate_object(q, ptr, {}, dst_pitch, region);
638
validate_object(q, img, src_origin, region);
639
validate_object_access(img, CL_MEM_HOST_READ_ONLY);
641
auto hev = create<hard_event>(
642
q, CL_COMMAND_READ_IMAGE, deps,
643
soft_copy_op(q, ptr, {}, dst_pitch,
644
&img, src_origin, src_pitch,
648
hev().wait_signalled();
650
ret_object(rd_ev, hev);
658
clEnqueueWriteImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
659
const size_t *p_origin, const size_t *p_region,
660
size_t row_pitch, size_t slice_pitch, const void *ptr,
661
cl_uint num_deps, const cl_event *d_deps,
662
cl_event *rd_ev) try {
664
auto &img = obj<image>(d_mem);
665
auto deps = objs<wait_list_tag>(d_deps, num_deps);
666
auto region = vector(p_region);
667
auto dst_origin = vector(p_origin);
668
auto dst_pitch = pitch(region, {{ img.pixel_size(),
669
img.row_pitch(), img.slice_pitch() }});
670
auto src_pitch = pitch(region, {{ img.pixel_size(),
671
row_pitch, slice_pitch }});
673
validate_common(q, deps);
674
validate_object(q, img, dst_origin, region);
675
validate_object(q, ptr, {}, src_pitch, region);
676
validate_object_access(img, CL_MEM_HOST_WRITE_ONLY);
678
auto hev = create<hard_event>(
679
q, CL_COMMAND_WRITE_IMAGE, deps,
680
soft_copy_op(q, &img, dst_origin, dst_pitch,
685
hev().wait_signalled();
687
ret_object(rd_ev, hev);
695
clEnqueueFillImage(cl_command_queue d_queue, cl_mem d_mem,
696
const void *fill_color,
697
const size_t *p_origin, const size_t *p_region,
698
cl_uint num_deps, const cl_event *d_deps,
699
cl_event *rd_ev) try {
700
auto &q = obj(d_queue);
701
auto &img = obj<image>(d_mem);
702
auto deps = objs<wait_list_tag>(d_deps, num_deps);
703
auto origin = vector(p_origin);
704
auto region = vector(p_region);
706
validate_common(q, deps);
707
validate_object(q, img, origin, region);
710
return CL_INVALID_VALUE;
712
std::string data = std::string((char *)fill_color, sizeof(cl_uint4));
713
auto hev = create<hard_event>(
714
q, CL_COMMAND_FILL_IMAGE, deps,
715
[=, &q, &img](event &) {
716
img.resource_in(q).clear(q, origin, region, data);
719
ret_object(rd_ev, hev);
727
clEnqueueCopyImage(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem,
728
const size_t *p_src_origin, const size_t *p_dst_origin,
729
const size_t *p_region,
730
cl_uint num_deps, const cl_event *d_deps,
731
cl_event *rd_ev) try {
733
auto &src_img = obj<image>(d_src_mem);
734
auto &dst_img = obj<image>(d_dst_mem);
735
auto deps = objs<wait_list_tag>(d_deps, num_deps);
736
auto region = vector(p_region);
737
auto dst_origin = vector(p_dst_origin);
738
auto src_origin = vector(p_src_origin);
740
validate_common(q, deps);
741
validate_object(q, dst_img, dst_origin, region);
742
validate_object(q, src_img, src_origin, region);
743
validate_copy(q, dst_img, dst_origin, src_img, src_origin, region);
745
auto hev = create<hard_event>(
746
q, CL_COMMAND_COPY_IMAGE, deps,
747
hard_copy_op(q, &dst_img, dst_origin,
748
&src_img, src_origin,
751
ret_object(rd_ev, hev);
759
clEnqueueCopyImageToBuffer(cl_command_queue d_q,
760
cl_mem d_src_mem, cl_mem d_dst_mem,
761
const size_t *p_src_origin, const size_t *p_region,
763
cl_uint num_deps, const cl_event *d_deps,
764
cl_event *rd_ev) try {
766
auto &src_img = obj<image>(d_src_mem);
767
auto &dst_mem = obj<buffer>(d_dst_mem);
768
auto deps = objs<wait_list_tag>(d_deps, num_deps);
769
auto region = vector(p_region);
770
vector_t dst_origin = { dst_offset };
771
auto dst_pitch = pitch(region, {{ src_img.pixel_size() }});
772
auto src_origin = vector(p_src_origin);
773
auto src_pitch = pitch(region, {{ src_img.pixel_size(),
775
src_img.slice_pitch() }});
777
validate_common(q, deps);
778
validate_object(q, dst_mem, dst_origin, dst_pitch, region);
779
validate_object(q, src_img, src_origin, region);
781
auto hev = create<hard_event>(
782
q, CL_COMMAND_COPY_IMAGE_TO_BUFFER, deps,
783
soft_copy_op(q, &dst_mem, dst_origin, dst_pitch,
784
&src_img, src_origin, src_pitch,
787
ret_object(rd_ev, hev);
795
clEnqueueCopyBufferToImage(cl_command_queue d_q,
796
cl_mem d_src_mem, cl_mem d_dst_mem,
798
const size_t *p_dst_origin, const size_t *p_region,
799
cl_uint num_deps, const cl_event *d_deps,
800
cl_event *rd_ev) try {
802
auto &src_mem = obj<buffer>(d_src_mem);
803
auto &dst_img = obj<image>(d_dst_mem);
804
auto deps = objs<wait_list_tag>(d_deps, num_deps);
805
auto region = vector(p_region);
806
auto dst_origin = vector(p_dst_origin);
807
auto dst_pitch = pitch(region, {{ dst_img.pixel_size(),
809
dst_img.slice_pitch() }});
810
vector_t src_origin = { src_offset };
811
auto src_pitch = pitch(region, {{ dst_img.pixel_size() }});
813
validate_common(q, deps);
814
validate_object(q, dst_img, dst_origin, region);
815
validate_object(q, src_mem, src_origin, src_pitch, region);
817
auto hev = create<hard_event>(
818
q, CL_COMMAND_COPY_BUFFER_TO_IMAGE, deps,
819
soft_copy_op(q, &dst_img, dst_origin, dst_pitch,
820
&src_mem, src_origin, src_pitch,
823
ret_object(rd_ev, hev);
831
clEnqueueMapBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
832
cl_map_flags flags, size_t offset, size_t size,
833
cl_uint num_deps, const cl_event *d_deps,
834
cl_event *rd_ev, cl_int *r_errcode) try {
836
auto &mem = obj<buffer>(d_mem);
837
auto deps = objs<wait_list_tag>(d_deps, num_deps);
838
vector_t region = { size, 1, 1 };
839
vector_t obj_origin = { offset };
840
auto obj_pitch = pitch(region, {{ 1 }});
842
validate_common(q, deps);
843
validate_object(q, mem, obj_origin, obj_pitch, region);
844
validate_map_flags(mem, flags);
846
auto *map = mem.resource_in(q).add_map(q, flags, blocking, obj_origin, region);
848
auto hev = create<hard_event>(q, CL_COMMAND_MAP_BUFFER, deps);
850
hev().wait_signalled();
852
ret_object(rd_ev, hev);
853
ret_error(r_errcode, CL_SUCCESS);
857
ret_error(r_errcode, e);
862
clEnqueueMapImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
864
const size_t *p_origin, const size_t *p_region,
865
size_t *row_pitch, size_t *slice_pitch,
866
cl_uint num_deps, const cl_event *d_deps,
867
cl_event *rd_ev, cl_int *r_errcode) try {
869
auto &img = obj<image>(d_mem);
870
auto deps = objs<wait_list_tag>(d_deps, num_deps);
871
auto region = vector(p_region);
872
auto origin = vector(p_origin);
874
validate_common(q, deps);
875
validate_object(q, img, origin, region);
876
validate_map_flags(img, flags);
879
throw error(CL_INVALID_VALUE);
881
if ((img.slice_pitch() || img.array_size()) && !slice_pitch)
882
throw error(CL_INVALID_VALUE);
884
auto *map = img.resource_in(q).add_map(q, flags, blocking, origin, region);
885
*row_pitch = map->pitch()[1];
887
*slice_pitch = map->pitch()[2];
889
auto hev = create<hard_event>(q, CL_COMMAND_MAP_IMAGE, deps);
891
hev().wait_signalled();
893
ret_object(rd_ev, hev);
894
ret_error(r_errcode, CL_SUCCESS);
898
ret_error(r_errcode, e);
903
clEnqueueUnmapMemObject(cl_command_queue d_q, cl_mem d_mem, void *ptr,
904
cl_uint num_deps, const cl_event *d_deps,
905
cl_event *rd_ev) try {
907
auto &mem = obj(d_mem);
908
auto deps = objs<wait_list_tag>(d_deps, num_deps);
910
validate_common(q, deps);
912
auto hev = create<hard_event>(
913
q, CL_COMMAND_UNMAP_MEM_OBJECT, deps,
914
[=, &q, &mem](event &) {
915
mem.resource_in(q).del_map(ptr);
918
ret_object(rd_ev, hev);
926
clEnqueueMigrateMemObjects(cl_command_queue d_q,
928
const cl_mem *d_mems,
929
cl_mem_migration_flags flags,
931
const cl_event *d_deps,
932
cl_event *rd_ev) try {
934
auto mems = objs<memory_obj>(d_mems, num_mems);
935
auto deps = objs<wait_list_tag>(d_deps, num_deps);
937
validate_common(q, deps);
938
validate_mem_migration_flags(flags);
940
if (any_of([&](const memory_obj &m) {
941
return m.context() != q.context();
943
throw error(CL_INVALID_CONTEXT);
945
auto hev = create<hard_event>(
946
q, CL_COMMAND_MIGRATE_MEM_OBJECTS, deps,
948
for (auto &mem: mems) {
949
if (flags & CL_MIGRATE_MEM_OBJECT_HOST) {
950
if ((flags & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED))
953
// For flags == CL_MIGRATE_MEM_OBJECT_HOST only to be
954
// efficient we would need cl*ReadBuffer* to implement
955
// reading from host memory.
958
if (flags & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED)
959
mem.resource_undef(q);
966
ret_object(rd_ev, hev);
974
clover::EnqueueSVMFree(cl_command_queue d_q,
975
cl_uint num_svm_pointers,
976
void *svm_pointers[],
977
void (CL_CALLBACK *pfn_free_func) (
978
cl_command_queue queue, cl_uint num_svm_pointers,
979
void *svm_pointers[], void *user_data),
981
cl_uint num_events_in_wait_list,
982
const cl_event *event_wait_list,
986
if (bool(num_svm_pointers) != bool(svm_pointers))
987
return CL_INVALID_VALUE;
991
if (!q.device().svm_support())
992
return CL_INVALID_OPERATION;
994
bool can_emulate = q.device().has_system_svm();
995
auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
997
validate_common(q, deps);
999
std::vector<void *> svm_pointers_cpy(svm_pointers,
1000
svm_pointers + num_svm_pointers);
1001
if (!pfn_free_func) {
1003
CLOVER_NOT_SUPPORTED_UNTIL("2.0");
1004
return CL_INVALID_VALUE;
1006
pfn_free_func = [](cl_command_queue d_q, cl_uint num_svm_pointers,
1007
void *svm_pointers[], void *) {
1008
clover::context &ctx = obj(d_q).context();
1009
for (void *p : range(svm_pointers, num_svm_pointers)) {
1010
ctx.remove_svm_allocation(p);
1016
auto hev = create<hard_event>(q, cmd, deps,
1017
[=](clover::event &) mutable {
1018
pfn_free_func(d_q, num_svm_pointers, svm_pointers_cpy.data(),
1022
ret_object(event, hev);
1025
} catch (error &e) {
1030
clEnqueueSVMFree(cl_command_queue d_q,
1031
cl_uint num_svm_pointers,
1032
void *svm_pointers[],
1033
void (CL_CALLBACK *pfn_free_func) (
1034
cl_command_queue queue, cl_uint num_svm_pointers,
1035
void *svm_pointers[], void *user_data),
1037
cl_uint num_events_in_wait_list,
1038
const cl_event *event_wait_list,
1041
return EnqueueSVMFree(d_q, num_svm_pointers, svm_pointers,
1042
pfn_free_func, user_data, num_events_in_wait_list,
1043
event_wait_list, event, CL_COMMAND_SVM_FREE);
1047
clover::EnqueueSVMMemcpy(cl_command_queue d_q,
1048
cl_bool blocking_copy,
1050
const void *src_ptr,
1052
cl_uint num_events_in_wait_list,
1053
const cl_event *event_wait_list,
1058
if (!q.device().svm_support())
1059
return CL_INVALID_OPERATION;
1061
if (dst_ptr == nullptr || src_ptr == nullptr)
1062
return CL_INVALID_VALUE;
1064
if (static_cast<size_t>(abs(reinterpret_cast<ptrdiff_t>(dst_ptr) -
1065
reinterpret_cast<ptrdiff_t>(src_ptr))) < size)
1066
return CL_MEM_COPY_OVERLAP;
1069
bool can_emulate = q.device().has_system_svm();
1070
auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
1072
validate_common(q, deps);
1075
auto hev = create<hard_event>(q, cmd, deps,
1076
[=](clover::event &) {
1077
memcpy(dst_ptr, src_ptr, size);
1082
ret_object(event, hev);
1086
CLOVER_NOT_SUPPORTED_UNTIL("2.0");
1087
return CL_INVALID_VALUE;
1089
} catch (error &e) {
1094
clEnqueueSVMMemcpy(cl_command_queue d_q,
1095
cl_bool blocking_copy,
1097
const void *src_ptr,
1099
cl_uint num_events_in_wait_list,
1100
const cl_event *event_wait_list,
1103
return EnqueueSVMMemcpy(d_q, blocking_copy, dst_ptr, src_ptr,
1104
size, num_events_in_wait_list, event_wait_list,
1105
event, CL_COMMAND_SVM_MEMCPY);
1109
clover::EnqueueSVMMemFill(cl_command_queue d_q,
1111
const void *pattern,
1112
size_t pattern_size,
1114
cl_uint num_events_in_wait_list,
1115
const cl_event *event_wait_list,
1120
if (!q.device().svm_support())
1121
return CL_INVALID_OPERATION;
1123
if (svm_ptr == nullptr || pattern == nullptr ||
1124
!util_is_power_of_two_nonzero(pattern_size) ||
1125
pattern_size > 128 ||
1126
!ptr_is_aligned(svm_ptr, pattern_size) ||
1127
size % pattern_size)
1128
return CL_INVALID_VALUE;
1130
bool can_emulate = q.device().has_system_svm();
1131
auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
1133
validate_common(q, deps);
1136
auto hev = create<hard_event>(q, cmd, deps,
1137
[=](clover::event &) {
1138
void *ptr = svm_ptr;
1139
for (size_t s = size; s; s -= pattern_size) {
1140
memcpy(ptr, pattern, pattern_size);
1141
ptr = static_cast<uint8_t*>(ptr) + pattern_size;
1145
ret_object(event, hev);
1149
CLOVER_NOT_SUPPORTED_UNTIL("2.0");
1150
return CL_INVALID_VALUE;
1152
} catch (error &e) {
1157
clEnqueueSVMMemFill(cl_command_queue d_q,
1159
const void *pattern,
1160
size_t pattern_size,
1162
cl_uint num_events_in_wait_list,
1163
const cl_event *event_wait_list,
1166
return EnqueueSVMMemFill(d_q, svm_ptr, pattern, pattern_size,
1167
size, num_events_in_wait_list, event_wait_list,
1168
event, CL_COMMAND_SVM_MEMFILL);
1172
clover::EnqueueSVMMap(cl_command_queue d_q,
1173
cl_bool blocking_map,
1174
cl_map_flags map_flags,
1177
cl_uint num_events_in_wait_list,
1178
const cl_event *event_wait_list,
1183
if (!q.device().svm_support())
1184
return CL_INVALID_OPERATION;
1186
if (svm_ptr == nullptr || size == 0)
1187
return CL_INVALID_VALUE;
1189
bool can_emulate = q.device().has_system_svm();
1190
auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
1192
validate_common(q, deps);
1195
auto hev = create<hard_event>(q, cmd, deps,
1196
[](clover::event &) { });
1198
ret_object(event, hev);
1202
CLOVER_NOT_SUPPORTED_UNTIL("2.0");
1203
return CL_INVALID_VALUE;
1205
} catch (error &e) {
1210
clEnqueueSVMMap(cl_command_queue d_q,
1211
cl_bool blocking_map,
1212
cl_map_flags map_flags,
1215
cl_uint num_events_in_wait_list,
1216
const cl_event *event_wait_list,
1219
return EnqueueSVMMap(d_q, blocking_map, map_flags, svm_ptr, size,
1220
num_events_in_wait_list, event_wait_list, event,
1221
CL_COMMAND_SVM_MAP);
1225
clover::EnqueueSVMUnmap(cl_command_queue d_q,
1227
cl_uint num_events_in_wait_list,
1228
const cl_event *event_wait_list,
1233
if (!q.device().svm_support())
1234
return CL_INVALID_OPERATION;
1236
if (svm_ptr == nullptr)
1237
return CL_INVALID_VALUE;
1239
bool can_emulate = q.device().has_system_svm();
1240
auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
1242
validate_common(q, deps);
1245
auto hev = create<hard_event>(q, cmd, deps,
1246
[](clover::event &) { });
1248
ret_object(event, hev);
1252
CLOVER_NOT_SUPPORTED_UNTIL("2.0");
1253
return CL_INVALID_VALUE;
1255
} catch (error &e) {
1260
clEnqueueSVMUnmap(cl_command_queue d_q,
1262
cl_uint num_events_in_wait_list,
1263
const cl_event *event_wait_list,
1266
return EnqueueSVMUnmap(d_q, svm_ptr, num_events_in_wait_list,
1267
event_wait_list, event, CL_COMMAND_SVM_UNMAP);
1271
clEnqueueSVMMigrateMem(cl_command_queue d_q,
1272
cl_uint num_svm_pointers,
1273
const void **svm_pointers,
1274
const size_t *sizes,
1275
const cl_mem_migration_flags flags,
1277
const cl_event *d_deps,
1278
cl_event *rd_ev) try {
1280
auto deps = objs<wait_list_tag>(d_deps, num_deps);
1282
validate_common(q, deps);
1283
validate_mem_migration_flags(flags);
1285
if (!q.device().svm_support())
1286
return CL_INVALID_OPERATION;
1288
if (!num_svm_pointers || !svm_pointers)
1289
return CL_INVALID_VALUE;
1291
std::vector<size_t> sizes_copy(num_svm_pointers);
1292
std::vector<const void*> ptrs(num_svm_pointers);
1294
for (unsigned i = 0; i < num_svm_pointers; ++i) {
1295
const void *ptr = svm_pointers[i];
1296
size_t size = sizes ? sizes[i] : 0;
1298
return CL_INVALID_VALUE;
1300
auto p = q.context().find_svm_allocation(ptr);
1302
return CL_INVALID_VALUE;
1304
std::ptrdiff_t pdiff = (uint8_t*)ptr - (uint8_t*)p.first;
1305
if (size && size + pdiff > p.second)
1306
return CL_INVALID_VALUE;
1308
sizes_copy[i] = size ? size : p.second;
1309
ptrs[i] = size ? svm_pointers[i] : p.first;
1312
auto hev = create<hard_event>(
1313
q, CL_COMMAND_MIGRATE_MEM_OBJECTS, deps,
1315
q.svm_migrate(ptrs, sizes_copy, flags);
1318
ret_object(rd_ev, hev);
1321
} catch (error &e) {