2
* Copyright © 2021 Bas Nieuwenhuizen
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23
#include "radv_acceleration_structure.h"
24
#include "radv_private.h"
26
#include "util/format/format_utils.h"
27
#include "util/half_float.h"
28
#include "nir_builder.h"
30
#include "radv_meta.h"
32
VKAPI_ATTR void VKAPI_CALL
33
radv_GetAccelerationStructureBuildSizesKHR(
34
VkDevice _device, VkAccelerationStructureBuildTypeKHR buildType,
35
const VkAccelerationStructureBuildGeometryInfoKHR *pBuildInfo,
36
const uint32_t *pMaxPrimitiveCounts, VkAccelerationStructureBuildSizesInfoKHR *pSizeInfo)
38
uint64_t triangles = 0, boxes = 0, instances = 0;
40
STATIC_ASSERT(sizeof(struct radv_bvh_triangle_node) == 64);
41
STATIC_ASSERT(sizeof(struct radv_bvh_aabb_node) == 64);
42
STATIC_ASSERT(sizeof(struct radv_bvh_instance_node) == 128);
43
STATIC_ASSERT(sizeof(struct radv_bvh_box16_node) == 64);
44
STATIC_ASSERT(sizeof(struct radv_bvh_box32_node) == 128);
46
for (uint32_t i = 0; i < pBuildInfo->geometryCount; ++i) {
47
const VkAccelerationStructureGeometryKHR *geometry;
48
if (pBuildInfo->pGeometries)
49
geometry = &pBuildInfo->pGeometries[i];
51
geometry = pBuildInfo->ppGeometries[i];
53
switch (geometry->geometryType) {
54
case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
55
triangles += pMaxPrimitiveCounts[i];
57
case VK_GEOMETRY_TYPE_AABBS_KHR:
58
boxes += pMaxPrimitiveCounts[i];
60
case VK_GEOMETRY_TYPE_INSTANCES_KHR:
61
instances += pMaxPrimitiveCounts[i];
63
case VK_GEOMETRY_TYPE_MAX_ENUM_KHR:
64
unreachable("VK_GEOMETRY_TYPE_MAX_ENUM_KHR unhandled");
68
uint64_t children = boxes + instances + triangles;
69
uint64_t internal_nodes = 0;
70
while (children > 1) {
71
children = DIV_ROUND_UP(children, 4);
72
internal_nodes += children;
75
/* The stray 128 is to ensure we have space for a header
76
* which we'd want to use for some metadata (like the
77
* total AABB of the BVH) */
78
uint64_t size = boxes * 128 + instances * 128 + triangles * 64 + internal_nodes * 128 + 192;
80
pSizeInfo->accelerationStructureSize = size;
82
/* 2x the max number of nodes in a BVH layer (one uint32_t each) */
83
pSizeInfo->updateScratchSize = pSizeInfo->buildScratchSize =
84
MAX2(4096, 2 * (boxes + instances + triangles) * sizeof(uint32_t));
87
VKAPI_ATTR VkResult VKAPI_CALL
88
radv_CreateAccelerationStructureKHR(VkDevice _device,
89
const VkAccelerationStructureCreateInfoKHR *pCreateInfo,
90
const VkAllocationCallbacks *pAllocator,
91
VkAccelerationStructureKHR *pAccelerationStructure)
93
RADV_FROM_HANDLE(radv_device, device, _device);
94
RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer);
95
struct radv_acceleration_structure *accel;
97
accel = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*accel), 8,
98
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
100
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
102
vk_object_base_init(&device->vk, &accel->base, VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR);
104
accel->mem_offset = buffer->offset + pCreateInfo->offset;
105
accel->size = pCreateInfo->size;
106
accel->bo = buffer->bo;
108
*pAccelerationStructure = radv_acceleration_structure_to_handle(accel);
112
VKAPI_ATTR void VKAPI_CALL
113
radv_DestroyAccelerationStructureKHR(VkDevice _device,
114
VkAccelerationStructureKHR accelerationStructure,
115
const VkAllocationCallbacks *pAllocator)
117
RADV_FROM_HANDLE(radv_device, device, _device);
118
RADV_FROM_HANDLE(radv_acceleration_structure, accel, accelerationStructure);
123
vk_object_base_finish(&accel->base);
124
vk_free2(&device->vk.alloc, pAllocator, accel);
127
VKAPI_ATTR VkDeviceAddress VKAPI_CALL
128
radv_GetAccelerationStructureDeviceAddressKHR(
129
VkDevice _device, const VkAccelerationStructureDeviceAddressInfoKHR *pInfo)
131
RADV_FROM_HANDLE(radv_acceleration_structure, accel, pInfo->accelerationStructure);
132
return radv_accel_struct_get_va(accel);
135
VKAPI_ATTR VkResult VKAPI_CALL
136
radv_WriteAccelerationStructuresPropertiesKHR(
137
VkDevice _device, uint32_t accelerationStructureCount,
138
const VkAccelerationStructureKHR *pAccelerationStructures, VkQueryType queryType,
139
size_t dataSize, void *pData, size_t stride)
141
RADV_FROM_HANDLE(radv_device, device, _device);
142
char *data_out = (char*)pData;
144
for (uint32_t i = 0; i < accelerationStructureCount; ++i) {
145
RADV_FROM_HANDLE(radv_acceleration_structure, accel, pAccelerationStructures[i]);
146
const char *base_ptr = (const char *)device->ws->buffer_map(accel->bo);
148
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
150
const struct radv_accel_struct_header *header = (const void*)(base_ptr + accel->mem_offset);
151
if (stride * i + sizeof(VkDeviceSize) <= dataSize) {
154
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR:
155
value = header->compacted_size;
157
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR:
158
value = header->serialization_size;
161
unreachable("Unhandled acceleration structure query");
163
*(VkDeviceSize *)(data_out + stride * i) = value;
165
device->ws->buffer_unmap(accel->bo);
170
struct radv_bvh_build_ctx {
171
uint32_t *write_scratch;
177
build_triangles(struct radv_bvh_build_ctx *ctx, const VkAccelerationStructureGeometryKHR *geom,
178
const VkAccelerationStructureBuildRangeInfoKHR *range, unsigned geometry_id)
180
const VkAccelerationStructureGeometryTrianglesDataKHR *tri_data = &geom->geometry.triangles;
181
VkTransformMatrixKHR matrix;
182
const char *index_data = (const char *)tri_data->indexData.hostAddress;
183
const char *v_data_base = (const char *)tri_data->vertexData.hostAddress;
185
if (tri_data->indexType == VK_INDEX_TYPE_NONE_KHR)
186
v_data_base += range->primitiveOffset;
188
index_data += range->primitiveOffset;
190
if (tri_data->transformData.hostAddress) {
191
matrix = *(const VkTransformMatrixKHR *)((const char *)tri_data->transformData.hostAddress +
192
range->transformOffset);
194
matrix = (VkTransformMatrixKHR){
195
.matrix = {{1.0, 0.0, 0.0, 0.0}, {0.0, 1.0, 0.0, 0.0}, {0.0, 0.0, 1.0, 0.0}}};
198
for (uint32_t p = 0; p < range->primitiveCount; ++p, ctx->curr_ptr += 64) {
199
struct radv_bvh_triangle_node *node = (void*)ctx->curr_ptr;
200
uint32_t node_offset = ctx->curr_ptr - ctx->base;
201
uint32_t node_id = node_offset >> 3;
202
*ctx->write_scratch++ = node_id;
204
for (unsigned v = 0; v < 3; ++v) {
205
uint32_t v_index = range->firstVertex;
206
switch (tri_data->indexType) {
207
case VK_INDEX_TYPE_NONE_KHR:
208
v_index += p * 3 + v;
210
case VK_INDEX_TYPE_UINT8_EXT:
211
v_index += *(const uint8_t *)index_data;
214
case VK_INDEX_TYPE_UINT16:
215
v_index += *(const uint16_t *)index_data;
218
case VK_INDEX_TYPE_UINT32:
219
v_index += *(const uint32_t *)index_data;
222
case VK_INDEX_TYPE_MAX_ENUM:
223
unreachable("Unhandled VK_INDEX_TYPE_MAX_ENUM");
227
const char *v_data = v_data_base + v_index * tri_data->vertexStride;
229
switch (tri_data->vertexFormat) {
230
case VK_FORMAT_R32G32_SFLOAT:
231
coords[0] = *(const float *)(v_data + 0);
232
coords[1] = *(const float *)(v_data + 4);
236
case VK_FORMAT_R32G32B32_SFLOAT:
237
coords[0] = *(const float *)(v_data + 0);
238
coords[1] = *(const float *)(v_data + 4);
239
coords[2] = *(const float *)(v_data + 8);
242
case VK_FORMAT_R32G32B32A32_SFLOAT:
243
coords[0] = *(const float *)(v_data + 0);
244
coords[1] = *(const float *)(v_data + 4);
245
coords[2] = *(const float *)(v_data + 8);
246
coords[3] = *(const float *)(v_data + 12);
248
case VK_FORMAT_R16G16_SFLOAT:
249
coords[0] = _mesa_half_to_float(*(const uint16_t *)(v_data + 0));
250
coords[1] = _mesa_half_to_float(*(const uint16_t *)(v_data + 2));
254
case VK_FORMAT_R16G16B16_SFLOAT:
255
coords[0] = _mesa_half_to_float(*(const uint16_t *)(v_data + 0));
256
coords[1] = _mesa_half_to_float(*(const uint16_t *)(v_data + 2));
257
coords[2] = _mesa_half_to_float(*(const uint16_t *)(v_data + 4));
260
case VK_FORMAT_R16G16B16A16_SFLOAT:
261
coords[0] = _mesa_half_to_float(*(const uint16_t *)(v_data + 0));
262
coords[1] = _mesa_half_to_float(*(const uint16_t *)(v_data + 2));
263
coords[2] = _mesa_half_to_float(*(const uint16_t *)(v_data + 4));
264
coords[3] = _mesa_half_to_float(*(const uint16_t *)(v_data + 6));
266
case VK_FORMAT_R16G16_SNORM:
267
coords[0] = _mesa_snorm_to_float(*(const int16_t *)(v_data + 0), 16);
268
coords[1] = _mesa_snorm_to_float(*(const int16_t *)(v_data + 2), 16);
272
case VK_FORMAT_R16G16_UNORM:
273
coords[0] = _mesa_unorm_to_float(*(const uint16_t *)(v_data + 0), 16);
274
coords[1] = _mesa_unorm_to_float(*(const uint16_t *)(v_data + 2), 16);
278
case VK_FORMAT_R16G16B16A16_SNORM:
279
coords[0] = _mesa_snorm_to_float(*(const int16_t *)(v_data + 0), 16);
280
coords[1] = _mesa_snorm_to_float(*(const int16_t *)(v_data + 2), 16);
281
coords[2] = _mesa_snorm_to_float(*(const int16_t *)(v_data + 4), 16);
282
coords[3] = _mesa_snorm_to_float(*(const int16_t *)(v_data + 6), 16);
284
case VK_FORMAT_R16G16B16A16_UNORM:
285
coords[0] = _mesa_unorm_to_float(*(const uint16_t *)(v_data + 0), 16);
286
coords[1] = _mesa_unorm_to_float(*(const uint16_t *)(v_data + 2), 16);
287
coords[2] = _mesa_unorm_to_float(*(const uint16_t *)(v_data + 4), 16);
288
coords[3] = _mesa_unorm_to_float(*(const uint16_t *)(v_data + 6), 16);
290
case VK_FORMAT_R8G8_SNORM:
291
coords[0] = _mesa_snorm_to_float(*(const int8_t *)(v_data + 0), 8);
292
coords[1] = _mesa_snorm_to_float(*(const int8_t *)(v_data + 1), 8);
296
case VK_FORMAT_R8G8_UNORM:
297
coords[0] = _mesa_unorm_to_float(*(const uint8_t *)(v_data + 0), 8);
298
coords[1] = _mesa_unorm_to_float(*(const uint8_t *)(v_data + 1), 8);
302
case VK_FORMAT_R8G8B8A8_SNORM:
303
coords[0] = _mesa_snorm_to_float(*(const int8_t *)(v_data + 0), 8);
304
coords[1] = _mesa_snorm_to_float(*(const int8_t *)(v_data + 1), 8);
305
coords[2] = _mesa_snorm_to_float(*(const int8_t *)(v_data + 2), 8);
306
coords[3] = _mesa_snorm_to_float(*(const int8_t *)(v_data + 3), 8);
308
case VK_FORMAT_R8G8B8A8_UNORM:
309
coords[0] = _mesa_unorm_to_float(*(const uint8_t *)(v_data + 0), 8);
310
coords[1] = _mesa_unorm_to_float(*(const uint8_t *)(v_data + 1), 8);
311
coords[2] = _mesa_unorm_to_float(*(const uint8_t *)(v_data + 2), 8);
312
coords[3] = _mesa_unorm_to_float(*(const uint8_t *)(v_data + 3), 8);
314
case VK_FORMAT_A2B10G10R10_UNORM_PACK32: {
315
uint32_t val = *(const uint32_t *)v_data;
316
coords[0] = _mesa_unorm_to_float((val >> 0) & 0x3FF, 10);
317
coords[1] = _mesa_unorm_to_float((val >> 10) & 0x3FF, 10);
318
coords[2] = _mesa_unorm_to_float((val >> 20) & 0x3FF, 10);
319
coords[3] = _mesa_unorm_to_float((val >> 30) & 0x3, 2);
322
unreachable("Unhandled vertex format in BVH build");
325
for (unsigned j = 0; j < 3; ++j) {
327
for (unsigned k = 0; k < 4; ++k)
328
r += matrix.matrix[j][k] * coords[k];
329
node->coords[v][j] = r;
332
node->triangle_id = p;
333
node->geometry_id_and_flags = geometry_id | (geom->flags << 28);
335
/* Seems to be needed for IJ, otherwise I = J = ? */
342
build_instances(struct radv_device *device, struct radv_bvh_build_ctx *ctx,
343
const VkAccelerationStructureGeometryKHR *geom,
344
const VkAccelerationStructureBuildRangeInfoKHR *range)
346
const VkAccelerationStructureGeometryInstancesDataKHR *inst_data = &geom->geometry.instances;
348
for (uint32_t p = 0; p < range->primitiveCount; ++p, ctx->curr_ptr += 128) {
349
const char *instance_data =
350
(const char *)inst_data->data.hostAddress + range->primitiveOffset;
351
const VkAccelerationStructureInstanceKHR *instance =
352
inst_data->arrayOfPointers
353
? (((const VkAccelerationStructureInstanceKHR *const *)instance_data)[p])
354
: &((const VkAccelerationStructureInstanceKHR *)instance_data)[p];
355
if (!instance->accelerationStructureReference) {
359
struct radv_bvh_instance_node *node = (void*)ctx->curr_ptr;
360
uint32_t node_offset = ctx->curr_ptr - ctx->base;
361
uint32_t node_id = (node_offset >> 3) | 6;
362
*ctx->write_scratch++ = node_id;
364
float transform[16], inv_transform[16];
365
memcpy(transform, &instance->transform.matrix, sizeof(instance->transform.matrix));
366
transform[12] = transform[13] = transform[14] = 0.0f;
367
transform[15] = 1.0f;
369
util_invert_mat4x4(inv_transform, transform);
370
memcpy(node->wto_matrix, inv_transform, sizeof(node->wto_matrix));
371
node->wto_matrix[3] = transform[3];
372
node->wto_matrix[7] = transform[7];
373
node->wto_matrix[11] = transform[11];
374
node->custom_instance_and_mask = instance->instanceCustomIndex | (instance->mask << 24);
375
node->sbt_offset_and_flags =
376
instance->instanceShaderBindingTableRecordOffset | (instance->flags << 24);
377
node->instance_id = p;
379
for (unsigned i = 0; i < 3; ++i)
380
for (unsigned j = 0; j < 3; ++j)
381
node->otw_matrix[i * 3 + j] = instance->transform.matrix[j][i];
383
RADV_FROM_HANDLE(radv_acceleration_structure, src_accel_struct,
384
(VkAccelerationStructureKHR)instance->accelerationStructureReference);
385
const void *src_base = device->ws->buffer_map(src_accel_struct->bo);
387
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
389
src_base = (const char *)src_base + src_accel_struct->mem_offset;
390
const struct radv_accel_struct_header *src_header = src_base;
391
node->base_ptr = radv_accel_struct_get_va(src_accel_struct) | src_header->root_node_offset;
393
for (unsigned j = 0; j < 3; ++j) {
394
node->aabb[0][j] = instance->transform.matrix[j][3];
395
node->aabb[1][j] = instance->transform.matrix[j][3];
396
for (unsigned k = 0; k < 3; ++k) {
397
node->aabb[0][j] += MIN2(instance->transform.matrix[j][k] * src_header->aabb[0][k],
398
instance->transform.matrix[j][k] * src_header->aabb[1][k]);
399
node->aabb[1][j] += MAX2(instance->transform.matrix[j][k] * src_header->aabb[0][k],
400
instance->transform.matrix[j][k] * src_header->aabb[1][k]);
403
device->ws->buffer_unmap(src_accel_struct->bo);
409
build_aabbs(struct radv_bvh_build_ctx *ctx, const VkAccelerationStructureGeometryKHR *geom,
410
const VkAccelerationStructureBuildRangeInfoKHR *range, unsigned geometry_id)
412
const VkAccelerationStructureGeometryAabbsDataKHR *aabb_data = &geom->geometry.aabbs;
414
for (uint32_t p = 0; p < range->primitiveCount; ++p, ctx->curr_ptr += 64) {
415
struct radv_bvh_aabb_node *node = (void*)ctx->curr_ptr;
416
uint32_t node_offset = ctx->curr_ptr - ctx->base;
417
uint32_t node_id = (node_offset >> 3) | 7;
418
*ctx->write_scratch++ = node_id;
420
const VkAabbPositionsKHR *aabb =
421
(const VkAabbPositionsKHR *)((const char *)aabb_data->data.hostAddress +
422
range->primitiveOffset + p * aabb_data->stride);
424
node->aabb[0][0] = aabb->minX;
425
node->aabb[0][1] = aabb->minY;
426
node->aabb[0][2] = aabb->minZ;
427
node->aabb[1][0] = aabb->maxX;
428
node->aabb[1][1] = aabb->maxY;
429
node->aabb[1][2] = aabb->maxZ;
430
node->primitive_id = p;
431
node->geometry_id_and_flags = geometry_id;
436
leaf_node_count(const VkAccelerationStructureBuildGeometryInfoKHR *info,
437
const VkAccelerationStructureBuildRangeInfoKHR *ranges)
440
for (uint32_t i = 0; i < info->geometryCount; ++i) {
441
count += ranges[i].primitiveCount;
447
compute_bounds(const char *base_ptr, uint32_t node_id, float *bounds)
449
for (unsigned i = 0; i < 3; ++i)
450
bounds[i] = INFINITY;
451
for (unsigned i = 0; i < 3; ++i)
452
bounds[3 + i] = -INFINITY;
454
switch (node_id & 7) {
456
const struct radv_bvh_triangle_node *node = (const void*)(base_ptr + (node_id / 8 * 64));
457
for (unsigned v = 0; v < 3; ++v) {
458
for (unsigned j = 0; j < 3; ++j) {
459
bounds[j] = MIN2(bounds[j], node->coords[v][j]);
460
bounds[3 + j] = MAX2(bounds[3 + j], node->coords[v][j]);
466
const struct radv_bvh_box32_node *node = (const void*)(base_ptr + (node_id / 8 * 64));
467
for (unsigned c2 = 0; c2 < 4; ++c2) {
468
if (isnan(node->coords[c2][0][0]))
470
for (unsigned j = 0; j < 3; ++j) {
471
bounds[j] = MIN2(bounds[j], node->coords[c2][0][j]);
472
bounds[3 + j] = MAX2(bounds[3 + j], node->coords[c2][1][j]);
478
const struct radv_bvh_instance_node *node = (const void*)(base_ptr + (node_id / 8 * 64));
479
for (unsigned j = 0; j < 3; ++j) {
480
bounds[j] = MIN2(bounds[j], node->aabb[0][j]);
481
bounds[3 + j] = MAX2(bounds[3 + j], node->aabb[1][j]);
486
const struct radv_bvh_aabb_node *node = (const void*)(base_ptr + (node_id / 8 * 64));
487
for (unsigned j = 0; j < 3; ++j) {
488
bounds[j] = MIN2(bounds[j], node->aabb[0][j]);
489
bounds[3 + j] = MAX2(bounds[3 + j], node->aabb[1][j]);
496
struct bvh_opt_entry {
502
bvh_opt_compare(const void *_a, const void *_b)
504
const struct bvh_opt_entry *a = _a;
505
const struct bvh_opt_entry *b = _b;
511
if (a->node_id < b->node_id)
513
if (a->node_id > b->node_id)
519
optimize_bvh(const char *base_ptr, uint32_t *node_ids, uint32_t node_count)
525
for (unsigned i = 0; i < 3; ++i)
526
bounds[i] = INFINITY;
527
for (unsigned i = 0; i < 3; ++i)
528
bounds[3 + i] = -INFINITY;
530
for (uint32_t i = 0; i < node_count; ++i) {
531
float node_bounds[6];
532
compute_bounds(base_ptr, node_ids[i], node_bounds);
533
for (unsigned j = 0; j < 3; ++j)
534
bounds[j] = MIN2(bounds[j], node_bounds[j]);
535
for (unsigned j = 0; j < 3; ++j)
536
bounds[3 + j] = MAX2(bounds[3 + j], node_bounds[3 + j]);
539
struct bvh_opt_entry *entries = calloc(node_count, sizeof(struct bvh_opt_entry));
543
for (uint32_t i = 0; i < node_count; ++i) {
544
float node_bounds[6];
545
compute_bounds(base_ptr, node_ids[i], node_bounds);
546
float node_coords[3];
547
for (unsigned j = 0; j < 3; ++j)
548
node_coords[j] = (node_bounds[j] + node_bounds[3 + j]) * 0.5;
550
for (unsigned j = 0; j < 3; ++j)
552
MIN2((int32_t)((node_coords[j] - bounds[j]) / (bounds[3 + j] - bounds[j]) * (1 << 21)),
556
for (unsigned j = 0; j < 21; ++j)
557
for (unsigned k = 0; k < 3; ++k)
558
key |= (uint64_t)((coords[k] >> j) & 1) << (j * 3 + k);
559
entries[i].key = key;
560
entries[i].node_id = node_ids[i];
563
qsort(entries, node_count, sizeof(entries[0]), bvh_opt_compare);
564
for (unsigned i = 0; i < node_count; ++i)
565
node_ids[i] = entries[i].node_id;
571
build_bvh(struct radv_device *device, const VkAccelerationStructureBuildGeometryInfoKHR *info,
572
const VkAccelerationStructureBuildRangeInfoKHR *ranges)
574
RADV_FROM_HANDLE(radv_acceleration_structure, accel, info->dstAccelerationStructure);
575
VkResult result = VK_SUCCESS;
577
uint32_t *scratch[2];
578
scratch[0] = info->scratchData.hostAddress;
579
scratch[1] = scratch[0] + leaf_node_count(info, ranges);
581
char *base_ptr = (char*)device->ws->buffer_map(accel->bo);
583
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
585
base_ptr = base_ptr + accel->mem_offset;
586
struct radv_accel_struct_header *header = (void*)base_ptr;
587
void *first_node_ptr = (char *)base_ptr + ALIGN(sizeof(*header), 64);
589
struct radv_bvh_build_ctx ctx = {.write_scratch = scratch[0],
591
.curr_ptr = (char *)first_node_ptr + 128};
593
uint64_t instance_offset = (const char *)ctx.curr_ptr - (const char *)base_ptr;
594
uint64_t instance_count = 0;
596
/* This initializes the leaf nodes of the BVH all at the same level. */
597
for (int inst = 1; inst >= 0; --inst) {
598
for (uint32_t i = 0; i < info->geometryCount; ++i) {
599
const VkAccelerationStructureGeometryKHR *geom =
600
info->pGeometries ? &info->pGeometries[i] : info->ppGeometries[i];
602
if ((inst && geom->geometryType != VK_GEOMETRY_TYPE_INSTANCES_KHR) ||
603
(!inst && geom->geometryType == VK_GEOMETRY_TYPE_INSTANCES_KHR))
606
switch (geom->geometryType) {
607
case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
608
build_triangles(&ctx, geom, ranges + i, i);
610
case VK_GEOMETRY_TYPE_AABBS_KHR:
611
build_aabbs(&ctx, geom, ranges + i, i);
613
case VK_GEOMETRY_TYPE_INSTANCES_KHR: {
614
result = build_instances(device, &ctx, geom, ranges + i);
615
if (result != VK_SUCCESS)
618
instance_count += ranges[i].primitiveCount;
621
case VK_GEOMETRY_TYPE_MAX_ENUM_KHR:
622
unreachable("VK_GEOMETRY_TYPE_MAX_ENUM_KHR unhandled");
627
uint32_t node_counts[2] = {ctx.write_scratch - scratch[0], 0};
628
optimize_bvh(base_ptr, scratch[0], node_counts[0]);
632
* This is the most naive BVH building algorithm I could think of:
633
* just iteratively builds each level from bottom to top with
634
* the children of each node being in-order and tightly packed.
636
* Is probably terrible for traversal but should be easy to build an
637
* equivalent GPU version.
639
for (d = 0; node_counts[d & 1] > 1 || d == 0; ++d) {
640
uint32_t child_count = node_counts[d & 1];
641
const uint32_t *children = scratch[d & 1];
642
uint32_t *dst_ids = scratch[(d & 1) ^ 1];
644
unsigned child_idx = 0;
645
for (dst_count = 0; child_idx < MAX2(1, child_count); ++dst_count, child_idx += 4) {
646
unsigned local_child_count = MIN2(4, child_count - child_idx);
647
uint32_t child_ids[4];
650
for (unsigned c = 0; c < local_child_count; ++c) {
651
uint32_t id = children[child_idx + c];
654
compute_bounds(base_ptr, id, bounds[c]);
657
struct radv_bvh_box32_node *node;
659
/* Put the root node at base_ptr so the id = 0, which allows some
660
* traversal optimizations. */
661
if (child_idx == 0 && local_child_count == child_count) {
662
node = first_node_ptr;
663
header->root_node_offset = ((char *)first_node_ptr - (char *)base_ptr) / 64 * 8 + 5;
665
uint32_t dst_id = (ctx.curr_ptr - base_ptr) / 64;
666
dst_ids[dst_count] = dst_id * 8 + 5;
668
node = (void*)ctx.curr_ptr;
672
for (unsigned c = 0; c < local_child_count; ++c) {
673
node->children[c] = child_ids[c];
674
for (unsigned i = 0; i < 2; ++i)
675
for (unsigned j = 0; j < 3; ++j)
676
node->coords[c][i][j] = bounds[c][i * 3 + j];
678
for (unsigned c = local_child_count; c < 4; ++c) {
679
for (unsigned i = 0; i < 2; ++i)
680
for (unsigned j = 0; j < 3; ++j)
681
node->coords[c][i][j] = NAN;
685
node_counts[(d & 1) ^ 1] = dst_count;
688
compute_bounds(base_ptr, header->root_node_offset, &header->aabb[0][0]);
690
header->instance_offset = instance_offset;
691
header->instance_count = instance_count;
692
header->compacted_size = (char *)ctx.curr_ptr - base_ptr;
694
/* 16 bytes per invocation, 64 invocations per workgroup */
695
header->copy_dispatch_size[0] = DIV_ROUND_UP(header->compacted_size, 16 * 64);
696
header->copy_dispatch_size[1] = 1;
697
header->copy_dispatch_size[2] = 1;
699
header->serialization_size =
700
header->compacted_size + align(sizeof(struct radv_accel_struct_serialization_header) +
701
sizeof(uint64_t) * header->instance_count,
705
device->ws->buffer_unmap(accel->bo);
709
VKAPI_ATTR VkResult VKAPI_CALL
710
radv_BuildAccelerationStructuresKHR(
711
VkDevice _device, VkDeferredOperationKHR deferredOperation, uint32_t infoCount,
712
const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
713
const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos)
715
RADV_FROM_HANDLE(radv_device, device, _device);
716
VkResult result = VK_SUCCESS;
718
for (uint32_t i = 0; i < infoCount; ++i) {
719
result = build_bvh(device, pInfos + i, ppBuildRangeInfos[i]);
720
if (result != VK_SUCCESS)
726
VKAPI_ATTR VkResult VKAPI_CALL
727
radv_CopyAccelerationStructureKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation,
728
const VkCopyAccelerationStructureInfoKHR *pInfo)
730
RADV_FROM_HANDLE(radv_device, device, _device);
731
RADV_FROM_HANDLE(radv_acceleration_structure, src_struct, pInfo->src);
732
RADV_FROM_HANDLE(radv_acceleration_structure, dst_struct, pInfo->dst);
734
char *src_ptr = (char *)device->ws->buffer_map(src_struct->bo);
736
return VK_ERROR_OUT_OF_HOST_MEMORY;
738
char *dst_ptr = (char *)device->ws->buffer_map(dst_struct->bo);
740
device->ws->buffer_unmap(src_struct->bo);
741
return VK_ERROR_OUT_OF_HOST_MEMORY;
744
src_ptr += src_struct->mem_offset;
745
dst_ptr += dst_struct->mem_offset;
747
const struct radv_accel_struct_header *header = (const void *)src_ptr;
748
memcpy(dst_ptr, src_ptr, header->compacted_size);
750
device->ws->buffer_unmap(src_struct->bo);
751
device->ws->buffer_unmap(dst_struct->bo);
756
get_indices(nir_builder *b, nir_ssa_def *addr, nir_ssa_def *type, nir_ssa_def *id)
758
const struct glsl_type *uvec3_type = glsl_vector_type(GLSL_TYPE_UINT, 3);
759
nir_variable *result =
760
nir_variable_create(b->shader, nir_var_shader_temp, uvec3_type, "indices");
762
nir_push_if(b, nir_ult(b, type, nir_imm_int(b, 2)));
763
nir_push_if(b, nir_ieq(b, type, nir_imm_int(b, VK_INDEX_TYPE_UINT16)));
765
nir_ssa_def *index_id = nir_umul24(b, id, nir_imm_int(b, 6));
766
nir_ssa_def *indices[3];
767
for (unsigned i = 0; i < 3; ++i) {
768
indices[i] = nir_build_load_global(
770
nir_iadd(b, addr, nir_u2u64(b, nir_iadd(b, index_id, nir_imm_int(b, 2 * i)))));
772
nir_store_var(b, result, nir_u2u32(b, nir_vec(b, indices, 3)), 7);
774
nir_push_else(b, NULL);
776
nir_ssa_def *index_id = nir_umul24(b, id, nir_imm_int(b, 12));
777
nir_ssa_def *indices =
778
nir_build_load_global(b, 3, 32, nir_iadd(b, addr, nir_u2u64(b, index_id)));
779
nir_store_var(b, result, indices, 7);
782
nir_push_else(b, NULL);
784
nir_ssa_def *index_id = nir_umul24(b, id, nir_imm_int(b, 3));
785
nir_ssa_def *indices[] = {
787
nir_iadd(b, index_id, nir_imm_int(b, 1)),
788
nir_iadd(b, index_id, nir_imm_int(b, 2)),
791
nir_push_if(b, nir_ieq(b, type, nir_imm_int(b, VK_INDEX_TYPE_NONE_KHR)));
793
nir_store_var(b, result, nir_vec(b, indices, 3), 7);
795
nir_push_else(b, NULL);
797
for (unsigned i = 0; i < 3; ++i) {
799
nir_build_load_global(b, 1, 8, nir_iadd(b, addr, nir_u2u64(b, indices[i])));
801
nir_store_var(b, result, nir_u2u32(b, nir_vec(b, indices, 3)), 7);
806
return nir_load_var(b, result);
810
get_vertices(nir_builder *b, nir_ssa_def *addresses, nir_ssa_def *format, nir_ssa_def *positions[3])
812
const struct glsl_type *vec3_type = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
813
nir_variable *results[3] = {
814
nir_variable_create(b->shader, nir_var_shader_temp, vec3_type, "vertex0"),
815
nir_variable_create(b->shader, nir_var_shader_temp, vec3_type, "vertex1"),
816
nir_variable_create(b->shader, nir_var_shader_temp, vec3_type, "vertex2")};
818
VkFormat formats[] = {
819
VK_FORMAT_R32G32B32_SFLOAT,
820
VK_FORMAT_R32G32B32A32_SFLOAT,
821
VK_FORMAT_R16G16B16_SFLOAT,
822
VK_FORMAT_R16G16B16A16_SFLOAT,
823
VK_FORMAT_R16G16_SFLOAT,
824
VK_FORMAT_R32G32_SFLOAT,
825
VK_FORMAT_R16G16_SNORM,
826
VK_FORMAT_R16G16_UNORM,
827
VK_FORMAT_R16G16B16A16_SNORM,
828
VK_FORMAT_R16G16B16A16_UNORM,
829
VK_FORMAT_R8G8_SNORM,
830
VK_FORMAT_R8G8_UNORM,
831
VK_FORMAT_R8G8B8A8_SNORM,
832
VK_FORMAT_R8G8B8A8_UNORM,
833
VK_FORMAT_A2B10G10R10_UNORM_PACK32,
836
for (unsigned f = 0; f < ARRAY_SIZE(formats); ++f) {
837
if (f + 1 < ARRAY_SIZE(formats))
838
nir_push_if(b, nir_ieq(b, format, nir_imm_int(b, formats[f])));
840
for (unsigned i = 0; i < 3; ++i) {
841
switch (formats[f]) {
842
case VK_FORMAT_R32G32B32_SFLOAT:
843
case VK_FORMAT_R32G32B32A32_SFLOAT:
844
nir_store_var(b, results[i],
845
nir_build_load_global(b, 3, 32, nir_channel(b, addresses, i)), 7);
847
case VK_FORMAT_R32G32_SFLOAT:
848
case VK_FORMAT_R16G16_SFLOAT:
849
case VK_FORMAT_R16G16B16_SFLOAT:
850
case VK_FORMAT_R16G16B16A16_SFLOAT:
851
case VK_FORMAT_R16G16_SNORM:
852
case VK_FORMAT_R16G16_UNORM:
853
case VK_FORMAT_R16G16B16A16_SNORM:
854
case VK_FORMAT_R16G16B16A16_UNORM:
855
case VK_FORMAT_R8G8_SNORM:
856
case VK_FORMAT_R8G8_UNORM:
857
case VK_FORMAT_R8G8B8A8_SNORM:
858
case VK_FORMAT_R8G8B8A8_UNORM:
859
case VK_FORMAT_A2B10G10R10_UNORM_PACK32: {
860
unsigned components = MIN2(3, vk_format_get_nr_components(formats[f]));
862
vk_format_get_blocksizebits(formats[f]) / vk_format_get_nr_components(formats[f]);
863
unsigned comp_bytes = comp_bits / 8;
864
nir_ssa_def *values[3];
865
nir_ssa_def *addr = nir_channel(b, addresses, i);
867
if (formats[f] == VK_FORMAT_A2B10G10R10_UNORM_PACK32) {
869
nir_ssa_def *val = nir_build_load_global(b, 1, 32, addr);
870
for (unsigned j = 0; j < 3; ++j)
871
values[j] = nir_ubfe(b, val, nir_imm_int(b, j * 10), nir_imm_int(b, 10));
873
for (unsigned j = 0; j < components; ++j)
874
values[j] = nir_build_load_global(
875
b, 1, comp_bits, nir_iadd(b, addr, nir_imm_int64(b, j * comp_bytes)));
877
for (unsigned j = components; j < 3; ++j)
878
values[j] = nir_imm_intN_t(b, 0, comp_bits);
882
if (util_format_is_snorm(vk_format_to_pipe_format(formats[f]))) {
883
for (unsigned j = 0; j < 3; ++j) {
884
values[j] = nir_fdiv(b, nir_i2f32(b, values[j]),
885
nir_imm_float(b, (1u << (comp_bits - 1)) - 1));
886
values[j] = nir_fmax(b, values[j], nir_imm_float(b, -1.0));
888
vec = nir_vec(b, values, 3);
889
} else if (util_format_is_unorm(vk_format_to_pipe_format(formats[f]))) {
890
for (unsigned j = 0; j < 3; ++j) {
892
nir_fdiv(b, nir_u2f32(b, values[j]), nir_imm_float(b, (1u << comp_bits) - 1));
893
values[j] = nir_fmin(b, values[j], nir_imm_float(b, 1.0));
895
vec = nir_vec(b, values, 3);
896
} else if (comp_bits == 16)
897
vec = nir_f2f32(b, nir_vec(b, values, 3));
899
vec = nir_vec(b, values, 3);
900
nir_store_var(b, results[i], vec, 7);
904
unreachable("Unhandled format");
907
if (f + 1 < ARRAY_SIZE(formats))
908
nir_push_else(b, NULL);
910
for (unsigned f = 1; f < ARRAY_SIZE(formats); ++f) {
914
for (unsigned i = 0; i < 3; ++i)
915
positions[i] = nir_load_var(b, results[i]);
918
struct build_primitive_constants {
919
uint64_t node_dst_addr;
920
uint64_t scratch_addr;
922
uint32_t dst_scratch_offset;
923
uint32_t geometry_type;
924
uint32_t geometry_id;
928
uint64_t vertex_addr;
930
uint64_t transform_addr;
931
uint32_t vertex_stride;
932
uint32_t vertex_format;
933
uint32_t index_format;
936
uint64_t instance_data;
937
uint32_t array_of_pointers;
941
uint32_t aabb_stride;
946
struct build_internal_constants {
947
uint64_t node_dst_addr;
948
uint64_t scratch_addr;
950
uint32_t dst_scratch_offset;
951
uint32_t src_scratch_offset;
952
uint32_t fill_header;
955
/* This inverts a 3x3 matrix using cofactors, as in e.g.
956
* https://www.mathsisfun.com/algebra/matrix-inverse-minors-cofactors-adjugate.html */
958
nir_invert_3x3(nir_builder *b, nir_ssa_def *in[3][3], nir_ssa_def *out[3][3])
960
nir_ssa_def *cofactors[3][3];
961
for (unsigned i = 0; i < 3; ++i) {
962
for (unsigned j = 0; j < 3; ++j) {
964
nir_fsub(b, nir_fmul(b, in[(i + 1) % 3][(j + 1) % 3], in[(i + 2) % 3][(j + 2) % 3]),
965
nir_fmul(b, in[(i + 1) % 3][(j + 2) % 3], in[(i + 2) % 3][(j + 1) % 3]));
969
nir_ssa_def *det = NULL;
970
for (unsigned i = 0; i < 3; ++i) {
971
nir_ssa_def *det_part = nir_fmul(b, in[0][i], cofactors[0][i]);
972
det = det ? nir_fadd(b, det, det_part) : det_part;
975
nir_ssa_def *det_inv = nir_frcp(b, det);
976
for (unsigned i = 0; i < 3; ++i) {
977
for (unsigned j = 0; j < 3; ++j) {
978
out[i][j] = nir_fmul(b, cofactors[j][i], det_inv);
984
build_leaf_shader(struct radv_device *dev)
986
const struct glsl_type *vec3_type = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
987
nir_builder b = radv_meta_init_shader(MESA_SHADER_COMPUTE, "accel_build_leaf_shader");
989
b.shader->info.workgroup_size[0] = 64;
991
nir_ssa_def *pconst0 =
992
nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 0, .range = 16);
993
nir_ssa_def *pconst1 =
994
nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 16, .range = 16);
995
nir_ssa_def *pconst2 =
996
nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 32, .range = 16);
997
nir_ssa_def *pconst3 =
998
nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 48, .range = 16);
999
nir_ssa_def *pconst4 =
1000
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 64, .range = 4);
1002
nir_ssa_def *geom_type = nir_channel(&b, pconst1, 2);
1003
nir_ssa_def *node_dst_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst0, 3));
1004
nir_ssa_def *scratch_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst0, 12));
1005
nir_ssa_def *node_dst_offset = nir_channel(&b, pconst1, 0);
1006
nir_ssa_def *scratch_offset = nir_channel(&b, pconst1, 1);
1007
nir_ssa_def *geometry_id = nir_channel(&b, pconst1, 3);
1009
nir_ssa_def *global_id =
1011
nir_umul24(&b, nir_channels(&b, nir_load_workgroup_id(&b, 32), 1),
1012
nir_imm_int(&b, b.shader->info.workgroup_size[0])),
1013
nir_channels(&b, nir_load_local_invocation_id(&b), 1));
1014
scratch_addr = nir_iadd(
1016
nir_u2u64(&b, nir_iadd(&b, scratch_offset, nir_umul24(&b, global_id, nir_imm_int(&b, 4)))));
1018
nir_push_if(&b, nir_ieq(&b, geom_type, nir_imm_int(&b, VK_GEOMETRY_TYPE_TRIANGLES_KHR)));
1020
nir_ssa_def *vertex_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst2, 3));
1021
nir_ssa_def *index_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst2, 12));
1022
nir_ssa_def *transform_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst3, 3));
1023
nir_ssa_def *vertex_stride = nir_channel(&b, pconst3, 2);
1024
nir_ssa_def *vertex_format = nir_channel(&b, pconst3, 3);
1025
nir_ssa_def *index_format = nir_channel(&b, pconst4, 0);
1026
unsigned repl_swizzle[4] = {0, 0, 0, 0};
1028
nir_ssa_def *node_offset =
1029
nir_iadd(&b, node_dst_offset, nir_umul24(&b, global_id, nir_imm_int(&b, 64)));
1030
nir_ssa_def *triangle_node_dst_addr = nir_iadd(&b, node_dst_addr, nir_u2u64(&b, node_offset));
1032
nir_ssa_def *indices = get_indices(&b, index_addr, index_format, global_id);
1033
nir_ssa_def *vertex_addresses = nir_iadd(
1034
&b, nir_u2u64(&b, nir_imul(&b, indices, nir_swizzle(&b, vertex_stride, repl_swizzle, 3))),
1035
nir_swizzle(&b, vertex_addr, repl_swizzle, 3));
1036
nir_ssa_def *positions[3];
1037
get_vertices(&b, vertex_addresses, vertex_format, positions);
1039
nir_ssa_def *node_data[16];
1040
memset(node_data, 0, sizeof(node_data));
1042
nir_variable *transform[] = {
1043
nir_variable_create(b.shader, nir_var_shader_temp, glsl_vec4_type(), "transform0"),
1044
nir_variable_create(b.shader, nir_var_shader_temp, glsl_vec4_type(), "transform1"),
1045
nir_variable_create(b.shader, nir_var_shader_temp, glsl_vec4_type(), "transform2"),
1047
nir_store_var(&b, transform[0], nir_imm_vec4(&b, 1.0, 0.0, 0.0, 0.0), 0xf);
1048
nir_store_var(&b, transform[1], nir_imm_vec4(&b, 0.0, 1.0, 0.0, 0.0), 0xf);
1049
nir_store_var(&b, transform[2], nir_imm_vec4(&b, 0.0, 0.0, 1.0, 0.0), 0xf);
1051
nir_push_if(&b, nir_ine(&b, transform_addr, nir_imm_int64(&b, 0)));
1054
nir_build_load_global(&b, 4, 32, nir_iadd(&b, transform_addr, nir_imm_int64(&b, 0))), 0xf);
1057
nir_build_load_global(&b, 4, 32, nir_iadd(&b, transform_addr, nir_imm_int64(&b, 16))),
1061
nir_build_load_global(&b, 4, 32, nir_iadd(&b, transform_addr, nir_imm_int64(&b, 32))),
1063
nir_pop_if(&b, NULL);
1065
for (unsigned i = 0; i < 3; ++i)
1066
for (unsigned j = 0; j < 3; ++j)
1067
node_data[i * 3 + j] = nir_fdph(&b, positions[i], nir_load_var(&b, transform[j]));
1069
node_data[12] = global_id;
1070
node_data[13] = geometry_id;
1071
node_data[15] = nir_imm_int(&b, 9);
1072
for (unsigned i = 0; i < ARRAY_SIZE(node_data); ++i)
1074
node_data[i] = nir_imm_int(&b, 0);
1076
for (unsigned i = 0; i < 4; ++i) {
1077
nir_build_store_global(&b, nir_vec(&b, node_data + i * 4, 4),
1078
nir_iadd(&b, triangle_node_dst_addr, nir_imm_int64(&b, i * 16)),
1082
nir_ssa_def *node_id = nir_ushr(&b, node_offset, nir_imm_int(&b, 3));
1083
nir_build_store_global(&b, node_id, scratch_addr);
1085
nir_push_else(&b, NULL);
1086
nir_push_if(&b, nir_ieq(&b, geom_type, nir_imm_int(&b, VK_GEOMETRY_TYPE_AABBS_KHR)));
1088
nir_ssa_def *aabb_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst2, 3));
1089
nir_ssa_def *aabb_stride = nir_channel(&b, pconst2, 2);
1091
nir_ssa_def *node_offset =
1092
nir_iadd(&b, node_dst_offset, nir_umul24(&b, global_id, nir_imm_int(&b, 64)));
1093
nir_ssa_def *aabb_node_dst_addr = nir_iadd(&b, node_dst_addr, nir_u2u64(&b, node_offset));
1094
nir_ssa_def *node_id =
1095
nir_iadd(&b, nir_ushr(&b, node_offset, nir_imm_int(&b, 3)), nir_imm_int(&b, 7));
1096
nir_build_store_global(&b, node_id, scratch_addr);
1098
aabb_addr = nir_iadd(&b, aabb_addr, nir_u2u64(&b, nir_imul(&b, aabb_stride, global_id)));
1100
nir_ssa_def *min_bound =
1101
nir_build_load_global(&b, 3, 32, nir_iadd(&b, aabb_addr, nir_imm_int64(&b, 0)));
1102
nir_ssa_def *max_bound =
1103
nir_build_load_global(&b, 3, 32, nir_iadd(&b, aabb_addr, nir_imm_int64(&b, 12)));
1105
nir_ssa_def *values[] = {nir_channel(&b, min_bound, 0),
1106
nir_channel(&b, min_bound, 1),
1107
nir_channel(&b, min_bound, 2),
1108
nir_channel(&b, max_bound, 0),
1109
nir_channel(&b, max_bound, 1),
1110
nir_channel(&b, max_bound, 2),
1114
nir_build_store_global(&b, nir_vec(&b, values + 0, 4),
1115
nir_iadd(&b, aabb_node_dst_addr, nir_imm_int64(&b, 0)),
1117
nir_build_store_global(&b, nir_vec(&b, values + 4, 4),
1118
nir_iadd(&b, aabb_node_dst_addr, nir_imm_int64(&b, 16)),
1121
nir_push_else(&b, NULL);
1124
nir_variable *instance_addr_var =
1125
nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint64_t_type(), "instance_addr");
1126
nir_push_if(&b, nir_ine(&b, nir_channel(&b, pconst2, 2), nir_imm_int(&b, 0)));
1128
nir_ssa_def *ptr = nir_iadd(&b, nir_pack_64_2x32(&b, nir_channels(&b, pconst2, 3)),
1129
nir_u2u64(&b, nir_imul(&b, global_id, nir_imm_int(&b, 8))));
1131
nir_pack_64_2x32(&b, nir_build_load_global(&b, 2, 32, ptr, .align_mul = 8));
1132
nir_store_var(&b, instance_addr_var, addr, 1);
1134
nir_push_else(&b, NULL);
1136
nir_ssa_def *addr = nir_iadd(&b, nir_pack_64_2x32(&b, nir_channels(&b, pconst2, 3)),
1137
nir_u2u64(&b, nir_imul(&b, global_id, nir_imm_int(&b, 64))));
1138
nir_store_var(&b, instance_addr_var, addr, 1);
1140
nir_pop_if(&b, NULL);
1141
nir_ssa_def *instance_addr = nir_load_var(&b, instance_addr_var);
1143
nir_ssa_def *inst_transform[] = {
1144
nir_build_load_global(&b, 4, 32, nir_iadd(&b, instance_addr, nir_imm_int64(&b, 0))),
1145
nir_build_load_global(&b, 4, 32, nir_iadd(&b, instance_addr, nir_imm_int64(&b, 16))),
1146
nir_build_load_global(&b, 4, 32, nir_iadd(&b, instance_addr, nir_imm_int64(&b, 32)))};
1147
nir_ssa_def *inst3 =
1148
nir_build_load_global(&b, 4, 32, nir_iadd(&b, instance_addr, nir_imm_int64(&b, 48)));
1150
nir_ssa_def *node_offset =
1151
nir_iadd(&b, node_dst_offset, nir_umul24(&b, global_id, nir_imm_int(&b, 128)));
1152
node_dst_addr = nir_iadd(&b, node_dst_addr, nir_u2u64(&b, node_offset));
1153
nir_ssa_def *node_id =
1154
nir_iadd(&b, nir_ushr(&b, node_offset, nir_imm_int(&b, 3)), nir_imm_int(&b, 6));
1155
nir_build_store_global(&b, node_id, scratch_addr);
1157
nir_variable *bounds[2] = {
1158
nir_variable_create(b.shader, nir_var_shader_temp, vec3_type, "min_bound"),
1159
nir_variable_create(b.shader, nir_var_shader_temp, vec3_type, "max_bound"),
1162
nir_store_var(&b, bounds[0], nir_channels(&b, nir_imm_vec4(&b, NAN, NAN, NAN, NAN), 7), 7);
1163
nir_store_var(&b, bounds[1], nir_channels(&b, nir_imm_vec4(&b, NAN, NAN, NAN, NAN), 7), 7);
1165
nir_ssa_def *header_addr = nir_pack_64_2x32(&b, nir_channels(&b, inst3, 12));
1166
nir_push_if(&b, nir_ine(&b, header_addr, nir_imm_int64(&b, 0)));
1167
nir_ssa_def *header_root_offset =
1168
nir_build_load_global(&b, 1, 32, nir_iadd(&b, header_addr, nir_imm_int64(&b, 0)));
1169
nir_ssa_def *header_min =
1170
nir_build_load_global(&b, 3, 32, nir_iadd(&b, header_addr, nir_imm_int64(&b, 8)));
1171
nir_ssa_def *header_max =
1172
nir_build_load_global(&b, 3, 32, nir_iadd(&b, header_addr, nir_imm_int64(&b, 20)));
1174
nir_ssa_def *bound_defs[2][3];
1175
for (unsigned i = 0; i < 3; ++i) {
1176
bound_defs[0][i] = bound_defs[1][i] = nir_channel(&b, inst_transform[i], 3);
1178
nir_ssa_def *mul_a = nir_fmul(&b, nir_channels(&b, inst_transform[i], 7), header_min);
1179
nir_ssa_def *mul_b = nir_fmul(&b, nir_channels(&b, inst_transform[i], 7), header_max);
1180
nir_ssa_def *mi = nir_fmin(&b, mul_a, mul_b);
1181
nir_ssa_def *ma = nir_fmax(&b, mul_a, mul_b);
1182
for (unsigned j = 0; j < 3; ++j) {
1183
bound_defs[0][i] = nir_fadd(&b, bound_defs[0][i], nir_channel(&b, mi, j));
1184
bound_defs[1][i] = nir_fadd(&b, bound_defs[1][i], nir_channel(&b, ma, j));
1188
nir_store_var(&b, bounds[0], nir_vec(&b, bound_defs[0], 3), 7);
1189
nir_store_var(&b, bounds[1], nir_vec(&b, bound_defs[1], 3), 7);
1191
/* Store object to world matrix */
1192
for (unsigned i = 0; i < 3; ++i) {
1193
nir_ssa_def *vals[3];
1194
for (unsigned j = 0; j < 3; ++j)
1195
vals[j] = nir_channel(&b, inst_transform[j], i);
1197
nir_build_store_global(&b, nir_vec(&b, vals, 3),
1198
nir_iadd(&b, node_dst_addr, nir_imm_int64(&b, 92 + 12 * i)));
1201
nir_ssa_def *m_in[3][3], *m_out[3][3], *m_vec[3][4];
1202
for (unsigned i = 0; i < 3; ++i)
1203
for (unsigned j = 0; j < 3; ++j)
1204
m_in[i][j] = nir_channel(&b, inst_transform[i], j);
1205
nir_invert_3x3(&b, m_in, m_out);
1206
for (unsigned i = 0; i < 3; ++i) {
1207
for (unsigned j = 0; j < 3; ++j)
1208
m_vec[i][j] = m_out[i][j];
1209
m_vec[i][3] = nir_channel(&b, inst_transform[i], 3);
1212
for (unsigned i = 0; i < 3; ++i) {
1213
nir_build_store_global(&b, nir_vec(&b, m_vec[i], 4),
1214
nir_iadd(&b, node_dst_addr, nir_imm_int64(&b, 16 + 16 * i)));
1217
nir_ssa_def *out0[4] = {
1218
nir_ior(&b, nir_channel(&b, nir_unpack_64_2x32(&b, header_addr), 0), header_root_offset),
1219
nir_channel(&b, nir_unpack_64_2x32(&b, header_addr), 1), nir_channel(&b, inst3, 0),
1220
nir_channel(&b, inst3, 1)};
1221
nir_build_store_global(&b, nir_vec(&b, out0, 4),
1222
nir_iadd(&b, node_dst_addr, nir_imm_int64(&b, 0)));
1223
nir_build_store_global(&b, global_id, nir_iadd(&b, node_dst_addr, nir_imm_int64(&b, 88)));
1224
nir_pop_if(&b, NULL);
1225
nir_build_store_global(&b, nir_load_var(&b, bounds[0]),
1226
nir_iadd(&b, node_dst_addr, nir_imm_int64(&b, 64)));
1227
nir_build_store_global(&b, nir_load_var(&b, bounds[1]),
1228
nir_iadd(&b, node_dst_addr, nir_imm_int64(&b, 76)));
1230
nir_pop_if(&b, NULL);
1231
nir_pop_if(&b, NULL);
1237
determine_bounds(nir_builder *b, nir_ssa_def *node_addr, nir_ssa_def *node_id,
1238
nir_variable *bounds_vars[2])
1240
nir_ssa_def *node_type = nir_iand(b, node_id, nir_imm_int(b, 7));
1241
node_addr = nir_iadd(
1243
nir_u2u64(b, nir_ishl(b, nir_iand(b, node_id, nir_imm_int(b, ~7u)), nir_imm_int(b, 3))));
1245
nir_push_if(b, nir_ieq(b, node_type, nir_imm_int(b, 0)));
1247
nir_ssa_def *positions[3];
1248
for (unsigned i = 0; i < 3; ++i)
1250
nir_build_load_global(b, 3, 32, nir_iadd(b, node_addr, nir_imm_int64(b, i * 12)));
1251
nir_ssa_def *bounds[] = {positions[0], positions[0]};
1252
for (unsigned i = 1; i < 3; ++i) {
1253
bounds[0] = nir_fmin(b, bounds[0], positions[i]);
1254
bounds[1] = nir_fmax(b, bounds[1], positions[i]);
1256
nir_store_var(b, bounds_vars[0], bounds[0], 7);
1257
nir_store_var(b, bounds_vars[1], bounds[1], 7);
1259
nir_push_else(b, NULL);
1260
nir_push_if(b, nir_ieq(b, node_type, nir_imm_int(b, 5)));
1262
nir_ssa_def *input_bounds[4][2];
1263
for (unsigned i = 0; i < 4; ++i)
1264
for (unsigned j = 0; j < 2; ++j)
1265
input_bounds[i][j] = nir_build_load_global(
1266
b, 3, 32, nir_iadd(b, node_addr, nir_imm_int64(b, 16 + i * 24 + j * 12)));
1267
nir_ssa_def *bounds[] = {input_bounds[0][0], input_bounds[0][1]};
1268
for (unsigned i = 1; i < 4; ++i) {
1269
bounds[0] = nir_fmin(b, bounds[0], input_bounds[i][0]);
1270
bounds[1] = nir_fmax(b, bounds[1], input_bounds[i][1]);
1273
nir_store_var(b, bounds_vars[0], bounds[0], 7);
1274
nir_store_var(b, bounds_vars[1], bounds[1], 7);
1276
nir_push_else(b, NULL);
1277
nir_push_if(b, nir_ieq(b, node_type, nir_imm_int(b, 6)));
1279
nir_ssa_def *bounds[2];
1280
for (unsigned i = 0; i < 2; ++i)
1282
nir_build_load_global(b, 3, 32, nir_iadd(b, node_addr, nir_imm_int64(b, 64 + i * 12)));
1283
nir_store_var(b, bounds_vars[0], bounds[0], 7);
1284
nir_store_var(b, bounds_vars[1], bounds[1], 7);
1286
nir_push_else(b, NULL);
1288
nir_ssa_def *bounds[2];
1289
for (unsigned i = 0; i < 2; ++i)
1291
nir_build_load_global(b, 3, 32, nir_iadd(b, node_addr, nir_imm_int64(b, i * 12)));
1292
nir_store_var(b, bounds_vars[0], bounds[0], 7);
1293
nir_store_var(b, bounds_vars[1], bounds[1], 7);
1295
nir_pop_if(b, NULL);
1296
nir_pop_if(b, NULL);
1297
nir_pop_if(b, NULL);
1301
build_internal_shader(struct radv_device *dev)
1303
const struct glsl_type *vec3_type = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
1304
nir_builder b = radv_meta_init_shader(MESA_SHADER_COMPUTE, "accel_build_internal_shader");
1306
b.shader->info.workgroup_size[0] = 64;
1310
* i32 x 2: node dst address
1311
* i32 x 2: scratch address
1313
* i32: dst scratch offset
1314
* i32: src scratch offset
1315
* i32: src_node_count | (fill_header << 31)
1317
nir_ssa_def *pconst0 =
1318
nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 0, .range = 16);
1319
nir_ssa_def *pconst1 =
1320
nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 16, .range = 16);
1322
nir_ssa_def *node_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst0, 3));
1323
nir_ssa_def *scratch_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst0, 12));
1324
nir_ssa_def *node_dst_offset = nir_channel(&b, pconst1, 0);
1325
nir_ssa_def *dst_scratch_offset = nir_channel(&b, pconst1, 1);
1326
nir_ssa_def *src_scratch_offset = nir_channel(&b, pconst1, 2);
1327
nir_ssa_def *src_node_count =
1328
nir_iand(&b, nir_channel(&b, pconst1, 3), nir_imm_int(&b, 0x7FFFFFFFU));
1329
nir_ssa_def *fill_header =
1330
nir_ine(&b, nir_iand(&b, nir_channel(&b, pconst1, 3), nir_imm_int(&b, 0x80000000U)),
1331
nir_imm_int(&b, 0));
1333
nir_ssa_def *global_id =
1335
nir_umul24(&b, nir_channels(&b, nir_load_workgroup_id(&b, 32), 1),
1336
nir_imm_int(&b, b.shader->info.workgroup_size[0])),
1337
nir_channels(&b, nir_load_local_invocation_id(&b), 1));
1338
nir_ssa_def *src_idx = nir_imul(&b, global_id, nir_imm_int(&b, 4));
1339
nir_ssa_def *src_count = nir_umin(&b, nir_imm_int(&b, 4), nir_isub(&b, src_node_count, src_idx));
1341
nir_ssa_def *node_offset =
1342
nir_iadd(&b, node_dst_offset, nir_ishl(&b, global_id, nir_imm_int(&b, 7)));
1343
nir_ssa_def *node_dst_addr = nir_iadd(&b, node_addr, nir_u2u64(&b, node_offset));
1344
nir_ssa_def *src_nodes = nir_build_load_global(
1346
nir_iadd(&b, scratch_addr,
1347
nir_u2u64(&b, nir_iadd(&b, src_scratch_offset,
1348
nir_ishl(&b, global_id, nir_imm_int(&b, 4))))));
1350
nir_build_store_global(&b, src_nodes, nir_iadd(&b, node_dst_addr, nir_imm_int64(&b, 0)));
1352
nir_ssa_def *total_bounds[2] = {
1353
nir_channels(&b, nir_imm_vec4(&b, NAN, NAN, NAN, NAN), 7),
1354
nir_channels(&b, nir_imm_vec4(&b, NAN, NAN, NAN, NAN), 7),
1357
for (unsigned i = 0; i < 4; ++i) {
1358
nir_variable *bounds[2] = {
1359
nir_variable_create(b.shader, nir_var_shader_temp, vec3_type, "min_bound"),
1360
nir_variable_create(b.shader, nir_var_shader_temp, vec3_type, "max_bound"),
1362
nir_store_var(&b, bounds[0], nir_channels(&b, nir_imm_vec4(&b, NAN, NAN, NAN, NAN), 7), 7);
1363
nir_store_var(&b, bounds[1], nir_channels(&b, nir_imm_vec4(&b, NAN, NAN, NAN, NAN), 7), 7);
1365
nir_push_if(&b, nir_ilt(&b, nir_imm_int(&b, i), src_count));
1366
determine_bounds(&b, node_addr, nir_channel(&b, src_nodes, i), bounds);
1367
nir_pop_if(&b, NULL);
1368
nir_build_store_global(&b, nir_load_var(&b, bounds[0]),
1369
nir_iadd(&b, node_dst_addr, nir_imm_int64(&b, 16 + 24 * i)));
1370
nir_build_store_global(&b, nir_load_var(&b, bounds[1]),
1371
nir_iadd(&b, node_dst_addr, nir_imm_int64(&b, 28 + 24 * i)));
1372
total_bounds[0] = nir_fmin(&b, total_bounds[0], nir_load_var(&b, bounds[0]));
1373
total_bounds[1] = nir_fmax(&b, total_bounds[1], nir_load_var(&b, bounds[1]));
1376
nir_ssa_def *node_id =
1377
nir_iadd(&b, nir_ushr(&b, node_offset, nir_imm_int(&b, 3)), nir_imm_int(&b, 5));
1378
nir_ssa_def *dst_scratch_addr = nir_iadd(
1380
nir_u2u64(&b, nir_iadd(&b, dst_scratch_offset, nir_ishl(&b, global_id, nir_imm_int(&b, 2)))));
1381
nir_build_store_global(&b, node_id, dst_scratch_addr);
1383
nir_push_if(&b, fill_header);
1384
nir_build_store_global(&b, node_id, node_addr);
1385
nir_build_store_global(&b, total_bounds[0], nir_iadd(&b, node_addr, nir_imm_int64(&b, 8)));
1386
nir_build_store_global(&b, total_bounds[1], nir_iadd(&b, node_addr, nir_imm_int64(&b, 20)));
1387
nir_pop_if(&b, NULL);
1393
COPY_MODE_SERIALIZE,
1394
COPY_MODE_DESERIALIZE,
1397
struct copy_constants {
1404
build_copy_shader(struct radv_device *dev)
1406
nir_builder b = radv_meta_init_shader(MESA_SHADER_COMPUTE, "accel_copy");
1407
b.shader->info.workgroup_size[0] = 64;
1409
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
1410
nir_ssa_def *wg_id = nir_load_workgroup_id(&b, 32);
1411
nir_ssa_def *block_size =
1412
nir_imm_ivec4(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1],
1413
b.shader->info.workgroup_size[2], 0);
1415
nir_ssa_def *global_id =
1416
nir_channel(&b, nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id), 0);
1418
nir_variable *offset_var =
1419
nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "offset");
1420
nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
1421
nir_store_var(&b, offset_var, offset, 1);
1423
nir_ssa_def *increment = nir_imul(&b, nir_channel(&b, nir_load_num_workgroups(&b, 32), 0),
1424
nir_imm_int(&b, b.shader->info.workgroup_size[0] * 16));
1426
nir_ssa_def *pconst0 =
1427
nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 0, .range = 16);
1428
nir_ssa_def *pconst1 =
1429
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 16, .range = 4);
1430
nir_ssa_def *src_base_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst0, 3));
1431
nir_ssa_def *dst_base_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst0, 0xc));
1432
nir_ssa_def *mode = nir_channel(&b, pconst1, 0);
1434
nir_variable *compacted_size_var =
1435
nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint64_t_type(), "compacted_size");
1436
nir_variable *src_offset_var =
1437
nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "src_offset");
1438
nir_variable *dst_offset_var =
1439
nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "dst_offset");
1440
nir_variable *instance_offset_var =
1441
nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "instance_offset");
1442
nir_variable *instance_count_var =
1443
nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "instance_count");
1444
nir_variable *value_var =
1445
nir_variable_create(b.shader, nir_var_shader_temp, glsl_vec4_type(), "value");
1447
nir_push_if(&b, nir_ieq(&b, mode, nir_imm_int(&b, COPY_MODE_SERIALIZE)));
1449
nir_ssa_def *instance_count = nir_build_load_global(
1451
nir_iadd(&b, src_base_addr,
1452
nir_imm_int64(&b, offsetof(struct radv_accel_struct_header, instance_count))));
1453
nir_ssa_def *compacted_size = nir_build_load_global(
1455
nir_iadd(&b, src_base_addr,
1456
nir_imm_int64(&b, offsetof(struct radv_accel_struct_header, compacted_size))));
1457
nir_ssa_def *serialization_size = nir_build_load_global(
1461
nir_imm_int64(&b, offsetof(struct radv_accel_struct_header, serialization_size))));
1463
nir_store_var(&b, compacted_size_var, compacted_size, 1);
1465
&b, instance_offset_var,
1466
nir_build_load_global(&b, 1, 32,
1467
nir_iadd(&b, src_base_addr,
1468
nir_imm_int64(&b, offsetof(struct radv_accel_struct_header,
1469
instance_offset)))),
1471
nir_store_var(&b, instance_count_var, instance_count, 1);
1473
nir_ssa_def *dst_offset =
1474
nir_iadd(&b, nir_imm_int(&b, sizeof(struct radv_accel_struct_serialization_header)),
1475
nir_imul(&b, instance_count, nir_imm_int(&b, sizeof(uint64_t))));
1476
nir_store_var(&b, src_offset_var, nir_imm_int(&b, 0), 1);
1477
nir_store_var(&b, dst_offset_var, dst_offset, 1);
1479
nir_push_if(&b, nir_ieq(&b, global_id, nir_imm_int(&b, 0)));
1481
nir_build_store_global(
1482
&b, serialization_size,
1483
nir_iadd(&b, dst_base_addr,
1484
nir_imm_int64(&b, offsetof(struct radv_accel_struct_serialization_header,
1485
serialization_size))));
1486
nir_build_store_global(
1488
nir_iadd(&b, dst_base_addr,
1489
nir_imm_int64(&b, offsetof(struct radv_accel_struct_serialization_header,
1491
nir_build_store_global(
1492
&b, nir_u2u64(&b, instance_count),
1493
nir_iadd(&b, dst_base_addr,
1494
nir_imm_int64(&b, offsetof(struct radv_accel_struct_serialization_header,
1497
nir_pop_if(&b, NULL);
1499
nir_push_else(&b, NULL);
1500
nir_push_if(&b, nir_ieq(&b, mode, nir_imm_int(&b, COPY_MODE_DESERIALIZE)));
1502
nir_ssa_def *instance_count = nir_build_load_global(
1504
nir_iadd(&b, src_base_addr,
1506
&b, offsetof(struct radv_accel_struct_serialization_header, instance_count))));
1507
nir_ssa_def *src_offset =
1508
nir_iadd(&b, nir_imm_int(&b, sizeof(struct radv_accel_struct_serialization_header)),
1509
nir_imul(&b, instance_count, nir_imm_int(&b, sizeof(uint64_t))));
1511
nir_ssa_def *header_addr = nir_iadd(&b, src_base_addr, nir_u2u64(&b, src_offset));
1513
&b, compacted_size_var,
1514
nir_build_load_global(
1516
nir_iadd(&b, header_addr,
1517
nir_imm_int64(&b, offsetof(struct radv_accel_struct_header, compacted_size)))),
1520
&b, instance_offset_var,
1521
nir_build_load_global(&b, 1, 32,
1522
nir_iadd(&b, header_addr,
1523
nir_imm_int64(&b, offsetof(struct radv_accel_struct_header,
1524
instance_offset)))),
1526
nir_store_var(&b, instance_count_var, instance_count, 1);
1527
nir_store_var(&b, src_offset_var, src_offset, 1);
1528
nir_store_var(&b, dst_offset_var, nir_imm_int(&b, 0), 1);
1530
nir_push_else(&b, NULL); /* COPY_MODE_COPY */
1533
&b, compacted_size_var,
1534
nir_build_load_global(
1536
nir_iadd(&b, src_base_addr,
1537
nir_imm_int64(&b, offsetof(struct radv_accel_struct_header, compacted_size)))),
1540
nir_store_var(&b, src_offset_var, nir_imm_int(&b, 0), 1);
1541
nir_store_var(&b, dst_offset_var, nir_imm_int(&b, 0), 1);
1542
nir_store_var(&b, instance_offset_var, nir_imm_int(&b, 0), 1);
1543
nir_store_var(&b, instance_count_var, nir_imm_int(&b, 0), 1);
1545
nir_pop_if(&b, NULL);
1546
nir_pop_if(&b, NULL);
1548
nir_ssa_def *instance_bound =
1549
nir_imul(&b, nir_imm_int(&b, sizeof(struct radv_bvh_instance_node)),
1550
nir_load_var(&b, instance_count_var));
1551
nir_ssa_def *compacted_size = nir_build_load_global(
1553
nir_iadd(&b, src_base_addr,
1554
nir_imm_int64(&b, offsetof(struct radv_accel_struct_header, compacted_size))));
1558
offset = nir_load_var(&b, offset_var);
1559
nir_push_if(&b, nir_ilt(&b, offset, compacted_size));
1561
nir_ssa_def *src_offset = nir_iadd(&b, offset, nir_load_var(&b, src_offset_var));
1562
nir_ssa_def *dst_offset = nir_iadd(&b, offset, nir_load_var(&b, dst_offset_var));
1563
nir_ssa_def *src_addr = nir_iadd(&b, src_base_addr, nir_u2u64(&b, src_offset));
1564
nir_ssa_def *dst_addr = nir_iadd(&b, dst_base_addr, nir_u2u64(&b, dst_offset));
1566
nir_ssa_def *value = nir_build_load_global(&b, 4, 32, src_addr, .align_mul = 16);
1567
nir_store_var(&b, value_var, value, 0xf);
1569
nir_ssa_def *instance_offset = nir_isub(&b, offset, nir_load_var(&b, instance_offset_var));
1570
nir_ssa_def *in_instance_bound =
1571
nir_iand(&b, nir_uge(&b, offset, nir_load_var(&b, instance_offset_var)),
1572
nir_ult(&b, instance_offset, instance_bound));
1573
nir_ssa_def *instance_start =
1575
nir_iand(&b, instance_offset,
1576
nir_imm_int(&b, sizeof(struct radv_bvh_instance_node) - 1)),
1577
nir_imm_int(&b, 0));
1579
nir_push_if(&b, nir_iand(&b, in_instance_bound, instance_start));
1581
nir_ssa_def *instance_id = nir_ushr(&b, instance_offset, nir_imm_int(&b, 7));
1583
nir_push_if(&b, nir_ieq(&b, mode, nir_imm_int(&b, COPY_MODE_SERIALIZE)));
1585
nir_ssa_def *instance_addr =
1586
nir_imul(&b, instance_id, nir_imm_int(&b, sizeof(uint64_t)));
1588
nir_iadd(&b, instance_addr,
1589
nir_imm_int(&b, sizeof(struct radv_accel_struct_serialization_header)));
1590
instance_addr = nir_iadd(&b, dst_base_addr, nir_u2u64(&b, instance_addr));
1592
nir_build_store_global(&b, nir_channels(&b, value, 3), instance_addr,
1595
nir_push_else(&b, NULL);
1597
nir_ssa_def *instance_addr =
1598
nir_imul(&b, instance_id, nir_imm_int(&b, sizeof(uint64_t)));
1600
nir_iadd(&b, instance_addr,
1601
nir_imm_int(&b, sizeof(struct radv_accel_struct_serialization_header)));
1602
instance_addr = nir_iadd(&b, src_base_addr, nir_u2u64(&b, instance_addr));
1604
nir_ssa_def *instance_value =
1605
nir_build_load_global(&b, 2, 32, instance_addr, .align_mul = 8);
1607
nir_ssa_def *values[] = {
1608
nir_channel(&b, instance_value, 0),
1609
nir_channel(&b, instance_value, 1),
1610
nir_channel(&b, value, 2),
1611
nir_channel(&b, value, 3),
1614
nir_store_var(&b, value_var, nir_vec(&b, values, 4), 0xf);
1616
nir_pop_if(&b, NULL);
1618
nir_pop_if(&b, NULL);
1620
nir_store_var(&b, offset_var, nir_iadd(&b, offset, increment), 1);
1622
nir_build_store_global(&b, nir_load_var(&b, value_var), dst_addr, .align_mul = 16);
1624
nir_push_else(&b, NULL);
1626
nir_jump(&b, nir_jump_break);
1628
nir_pop_if(&b, NULL);
1630
nir_pop_loop(&b, NULL);
1635
radv_device_finish_accel_struct_build_state(struct radv_device *device)
1637
struct radv_meta_state *state = &device->meta_state;
1638
radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.copy_pipeline,
1640
radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.internal_pipeline,
1642
radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.leaf_pipeline,
1644
radv_DestroyPipelineLayout(radv_device_to_handle(device),
1645
state->accel_struct_build.copy_p_layout, &state->alloc);
1646
radv_DestroyPipelineLayout(radv_device_to_handle(device),
1647
state->accel_struct_build.internal_p_layout, &state->alloc);
1648
radv_DestroyPipelineLayout(radv_device_to_handle(device),
1649
state->accel_struct_build.leaf_p_layout, &state->alloc);
1653
radv_device_init_accel_struct_build_state(struct radv_device *device)
1656
nir_shader *leaf_cs = build_leaf_shader(device);
1657
nir_shader *internal_cs = build_internal_shader(device);
1658
nir_shader *copy_cs = build_copy_shader(device);
1660
const VkPipelineLayoutCreateInfo leaf_pl_create_info = {
1661
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1662
.setLayoutCount = 0,
1663
.pushConstantRangeCount = 1,
1664
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0,
1665
sizeof(struct build_primitive_constants)},
1668
result = radv_CreatePipelineLayout(radv_device_to_handle(device), &leaf_pl_create_info,
1669
&device->meta_state.alloc,
1670
&device->meta_state.accel_struct_build.leaf_p_layout);
1671
if (result != VK_SUCCESS)
1674
VkPipelineShaderStageCreateInfo leaf_shader_stage = {
1675
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1676
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
1677
.module = vk_shader_module_handle_from_nir(leaf_cs),
1679
.pSpecializationInfo = NULL,
1682
VkComputePipelineCreateInfo leaf_pipeline_info = {
1683
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1684
.stage = leaf_shader_stage,
1686
.layout = device->meta_state.accel_struct_build.leaf_p_layout,
1689
result = radv_CreateComputePipelines(
1690
radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
1691
&leaf_pipeline_info, NULL, &device->meta_state.accel_struct_build.leaf_pipeline);
1692
if (result != VK_SUCCESS)
1695
const VkPipelineLayoutCreateInfo internal_pl_create_info = {
1696
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1697
.setLayoutCount = 0,
1698
.pushConstantRangeCount = 1,
1699
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0,
1700
sizeof(struct build_internal_constants)},
1703
result = radv_CreatePipelineLayout(radv_device_to_handle(device), &internal_pl_create_info,
1704
&device->meta_state.alloc,
1705
&device->meta_state.accel_struct_build.internal_p_layout);
1706
if (result != VK_SUCCESS)
1709
VkPipelineShaderStageCreateInfo internal_shader_stage = {
1710
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1711
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
1712
.module = vk_shader_module_handle_from_nir(internal_cs),
1714
.pSpecializationInfo = NULL,
1717
VkComputePipelineCreateInfo internal_pipeline_info = {
1718
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1719
.stage = internal_shader_stage,
1721
.layout = device->meta_state.accel_struct_build.internal_p_layout,
1724
result = radv_CreateComputePipelines(
1725
radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
1726
&internal_pipeline_info, NULL, &device->meta_state.accel_struct_build.internal_pipeline);
1727
if (result != VK_SUCCESS)
1730
const VkPipelineLayoutCreateInfo copy_pl_create_info = {
1731
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1732
.setLayoutCount = 0,
1733
.pushConstantRangeCount = 1,
1734
.pPushConstantRanges =
1735
&(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(struct copy_constants)},
1738
result = radv_CreatePipelineLayout(radv_device_to_handle(device), ©_pl_create_info,
1739
&device->meta_state.alloc,
1740
&device->meta_state.accel_struct_build.copy_p_layout);
1741
if (result != VK_SUCCESS)
1744
VkPipelineShaderStageCreateInfo copy_shader_stage = {
1745
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1746
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
1747
.module = vk_shader_module_handle_from_nir(copy_cs),
1749
.pSpecializationInfo = NULL,
1752
VkComputePipelineCreateInfo copy_pipeline_info = {
1753
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1754
.stage = copy_shader_stage,
1756
.layout = device->meta_state.accel_struct_build.copy_p_layout,
1759
result = radv_CreateComputePipelines(
1760
radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
1761
©_pipeline_info, NULL, &device->meta_state.accel_struct_build.copy_pipeline);
1762
if (result != VK_SUCCESS)
1765
ralloc_free(copy_cs);
1766
ralloc_free(internal_cs);
1767
ralloc_free(leaf_cs);
1772
radv_device_finish_accel_struct_build_state(device);
1773
ralloc_free(copy_cs);
1774
ralloc_free(internal_cs);
1775
ralloc_free(leaf_cs);
1780
uint32_t node_offset;
1781
uint32_t node_count;
1782
uint32_t scratch_offset;
1784
uint32_t instance_offset;
1785
uint32_t instance_count;
1788
VKAPI_ATTR void VKAPI_CALL
1789
radv_CmdBuildAccelerationStructuresKHR(
1790
VkCommandBuffer commandBuffer, uint32_t infoCount,
1791
const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
1792
const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos)
1794
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1795
struct radv_meta_saved_state saved_state;
1798
&saved_state, cmd_buffer,
1799
RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
1800
struct bvh_state *bvh_states = calloc(infoCount, sizeof(struct bvh_state));
1802
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
1803
cmd_buffer->device->meta_state.accel_struct_build.leaf_pipeline);
1805
for (uint32_t i = 0; i < infoCount; ++i) {
1806
RADV_FROM_HANDLE(radv_acceleration_structure, accel_struct,
1807
pInfos[i].dstAccelerationStructure);
1809
struct build_primitive_constants prim_consts = {
1810
.node_dst_addr = radv_accel_struct_get_va(accel_struct),
1811
.scratch_addr = pInfos[i].scratchData.deviceAddress,
1812
.dst_offset = ALIGN(sizeof(struct radv_accel_struct_header), 64) + 128,
1813
.dst_scratch_offset = 0,
1815
bvh_states[i].node_offset = prim_consts.dst_offset;
1816
bvh_states[i].instance_offset = prim_consts.dst_offset;
1818
for (int inst = 1; inst >= 0; --inst) {
1819
for (unsigned j = 0; j < pInfos[i].geometryCount; ++j) {
1820
const VkAccelerationStructureGeometryKHR *geom =
1821
pInfos[i].pGeometries ? &pInfos[i].pGeometries[j] : pInfos[i].ppGeometries[j];
1823
if ((inst && geom->geometryType != VK_GEOMETRY_TYPE_INSTANCES_KHR) ||
1824
(!inst && geom->geometryType == VK_GEOMETRY_TYPE_INSTANCES_KHR))
1827
prim_consts.geometry_type = geom->geometryType;
1828
prim_consts.geometry_id = j | (geom->flags << 28);
1830
switch (geom->geometryType) {
1831
case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
1832
prim_consts.vertex_addr =
1833
geom->geometry.triangles.vertexData.deviceAddress +
1834
ppBuildRangeInfos[i][j].firstVertex * geom->geometry.triangles.vertexStride;
1835
prim_consts.index_addr = geom->geometry.triangles.indexData.deviceAddress;
1837
if (geom->geometry.triangles.indexType == VK_INDEX_TYPE_NONE_KHR)
1838
prim_consts.vertex_addr += ppBuildRangeInfos[i][j].primitiveOffset;
1840
prim_consts.index_addr += ppBuildRangeInfos[i][j].primitiveOffset;
1842
prim_consts.transform_addr = geom->geometry.triangles.transformData.deviceAddress +
1843
ppBuildRangeInfos[i][j].transformOffset;
1844
prim_consts.vertex_stride = geom->geometry.triangles.vertexStride;
1845
prim_consts.vertex_format = geom->geometry.triangles.vertexFormat;
1846
prim_consts.index_format = geom->geometry.triangles.indexType;
1849
case VK_GEOMETRY_TYPE_AABBS_KHR:
1850
prim_consts.aabb_addr =
1851
geom->geometry.aabbs.data.deviceAddress + ppBuildRangeInfos[i][j].primitiveOffset;
1852
prim_consts.aabb_stride = geom->geometry.aabbs.stride;
1855
case VK_GEOMETRY_TYPE_INSTANCES_KHR:
1856
prim_consts.instance_data = geom->geometry.instances.data.deviceAddress +
1857
ppBuildRangeInfos[i][j].primitiveOffset;
1858
prim_consts.array_of_pointers = geom->geometry.instances.arrayOfPointers ? 1 : 0;
1860
bvh_states[i].instance_count += ppBuildRangeInfos[i][j].primitiveCount;
1863
unreachable("Unknown geometryType");
1866
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1867
cmd_buffer->device->meta_state.accel_struct_build.leaf_p_layout,
1868
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(prim_consts),
1870
radv_unaligned_dispatch(cmd_buffer, ppBuildRangeInfos[i][j].primitiveCount, 1, 1);
1871
prim_consts.dst_offset += prim_size * ppBuildRangeInfos[i][j].primitiveCount;
1872
prim_consts.dst_scratch_offset += 4 * ppBuildRangeInfos[i][j].primitiveCount;
1875
bvh_states[i].node_offset = prim_consts.dst_offset;
1876
bvh_states[i].node_count = prim_consts.dst_scratch_offset / 4;
1879
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
1880
cmd_buffer->device->meta_state.accel_struct_build.internal_pipeline);
1881
bool progress = true;
1882
for (unsigned iter = 0; progress; ++iter) {
1884
for (uint32_t i = 0; i < infoCount; ++i) {
1885
RADV_FROM_HANDLE(radv_acceleration_structure, accel_struct,
1886
pInfos[i].dstAccelerationStructure);
1888
if (iter && bvh_states[i].node_count == 1)
1892
cmd_buffer->state.flush_bits |=
1893
RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
1894
radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, NULL) |
1895
radv_dst_access_flush(cmd_buffer,
1896
VK_ACCESS_2_SHADER_READ_BIT | VK_ACCESS_2_SHADER_WRITE_BIT, NULL);
1899
uint32_t dst_node_count = MAX2(1, DIV_ROUND_UP(bvh_states[i].node_count, 4));
1900
bool final_iter = dst_node_count == 1;
1901
uint32_t src_scratch_offset = bvh_states[i].scratch_offset;
1902
uint32_t dst_scratch_offset = src_scratch_offset ? 0 : bvh_states[i].node_count * 4;
1903
uint32_t dst_node_offset = bvh_states[i].node_offset;
1905
dst_node_offset = ALIGN(sizeof(struct radv_accel_struct_header), 64);
1907
const struct build_internal_constants consts = {
1908
.node_dst_addr = radv_accel_struct_get_va(accel_struct),
1909
.scratch_addr = pInfos[i].scratchData.deviceAddress,
1910
.dst_offset = dst_node_offset,
1911
.dst_scratch_offset = dst_scratch_offset,
1912
.src_scratch_offset = src_scratch_offset,
1913
.fill_header = bvh_states[i].node_count | (final_iter ? 0x80000000U : 0),
1916
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1917
cmd_buffer->device->meta_state.accel_struct_build.internal_p_layout,
1918
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
1919
radv_unaligned_dispatch(cmd_buffer, dst_node_count, 1, 1);
1921
bvh_states[i].node_offset += dst_node_count * 128;
1922
bvh_states[i].node_count = dst_node_count;
1923
bvh_states[i].scratch_offset = dst_scratch_offset;
1926
for (uint32_t i = 0; i < infoCount; ++i) {
1927
RADV_FROM_HANDLE(radv_acceleration_structure, accel_struct,
1928
pInfos[i].dstAccelerationStructure);
1929
const size_t base = offsetof(struct radv_accel_struct_header, compacted_size);
1930
struct radv_accel_struct_header header;
1932
header.instance_offset = bvh_states[i].instance_offset;
1933
header.instance_count = bvh_states[i].instance_count;
1934
header.compacted_size = bvh_states[i].node_offset;
1936
/* 16 bytes per invocation, 64 invocations per workgroup */
1937
header.copy_dispatch_size[0] = DIV_ROUND_UP(header.compacted_size, 16 * 64);
1938
header.copy_dispatch_size[1] = 1;
1939
header.copy_dispatch_size[2] = 1;
1941
header.serialization_size =
1942
header.compacted_size + align(sizeof(struct radv_accel_struct_serialization_header) +
1943
sizeof(uint64_t) * header.instance_count,
1946
radv_update_buffer_cp(cmd_buffer,
1947
radv_buffer_get_va(accel_struct->bo) + accel_struct->mem_offset + base,
1948
(const char *)&header + base, sizeof(header) - base);
1951
radv_meta_restore(&saved_state, cmd_buffer);
1954
VKAPI_ATTR void VKAPI_CALL
1955
radv_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer,
1956
const VkCopyAccelerationStructureInfoKHR *pInfo)
1958
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1959
RADV_FROM_HANDLE(radv_acceleration_structure, src, pInfo->src);
1960
RADV_FROM_HANDLE(radv_acceleration_structure, dst, pInfo->dst);
1961
struct radv_meta_saved_state saved_state;
1964
&saved_state, cmd_buffer,
1965
RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
1967
uint64_t src_addr = radv_accel_struct_get_va(src);
1968
uint64_t dst_addr = radv_accel_struct_get_va(dst);
1970
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
1971
cmd_buffer->device->meta_state.accel_struct_build.copy_pipeline);
1973
const struct copy_constants consts = {
1974
.src_addr = src_addr,
1975
.dst_addr = dst_addr,
1976
.mode = COPY_MODE_COPY,
1979
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1980
cmd_buffer->device->meta_state.accel_struct_build.copy_p_layout,
1981
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
1983
cmd_buffer->state.flush_bits |=
1984
radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT, NULL);
1986
radv_indirect_dispatch(cmd_buffer, src->bo,
1987
src_addr + offsetof(struct radv_accel_struct_header, copy_dispatch_size));
1988
radv_meta_restore(&saved_state, cmd_buffer);
1991
VKAPI_ATTR void VKAPI_CALL
1992
radv_GetDeviceAccelerationStructureCompatibilityKHR(
1993
VkDevice _device, const VkAccelerationStructureVersionInfoKHR *pVersionInfo,
1994
VkAccelerationStructureCompatibilityKHR *pCompatibility)
1996
RADV_FROM_HANDLE(radv_device, device, _device);
1997
uint8_t zero[VK_UUID_SIZE] = {
2001
memcmp(pVersionInfo->pVersionData, device->physical_device->driver_uuid, VK_UUID_SIZE) == 0 &&
2002
memcmp(pVersionInfo->pVersionData + VK_UUID_SIZE, zero, VK_UUID_SIZE) == 0;
2003
*pCompatibility = compat ? VK_ACCELERATION_STRUCTURE_COMPATIBILITY_COMPATIBLE_KHR
2004
: VK_ACCELERATION_STRUCTURE_COMPATIBILITY_INCOMPATIBLE_KHR;
2007
VKAPI_ATTR VkResult VKAPI_CALL
2008
radv_CopyMemoryToAccelerationStructureKHR(VkDevice _device,
2009
VkDeferredOperationKHR deferredOperation,
2010
const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo)
2012
RADV_FROM_HANDLE(radv_device, device, _device);
2013
RADV_FROM_HANDLE(radv_acceleration_structure, accel_struct, pInfo->dst);
2015
char *base = device->ws->buffer_map(accel_struct->bo);
2017
return VK_ERROR_OUT_OF_HOST_MEMORY;
2019
base += accel_struct->mem_offset;
2020
const struct radv_accel_struct_header *header = (const struct radv_accel_struct_header *)base;
2022
const char *src = pInfo->src.hostAddress;
2023
struct radv_accel_struct_serialization_header *src_header = (void *)src;
2024
src += sizeof(*src_header) + sizeof(uint64_t) * src_header->instance_count;
2026
memcpy(base, src, src_header->compacted_size);
2028
for (unsigned i = 0; i < src_header->instance_count; ++i) {
2029
uint64_t *p = (uint64_t *)(base + i * 128 + header->instance_offset);
2030
*p = (*p & 63) | src_header->instances[i];
2033
device->ws->buffer_unmap(accel_struct->bo);
2037
VKAPI_ATTR VkResult VKAPI_CALL
2038
radv_CopyAccelerationStructureToMemoryKHR(VkDevice _device,
2039
VkDeferredOperationKHR deferredOperation,
2040
const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo)
2042
RADV_FROM_HANDLE(radv_device, device, _device);
2043
RADV_FROM_HANDLE(radv_acceleration_structure, accel_struct, pInfo->src);
2045
const char *base = device->ws->buffer_map(accel_struct->bo);
2047
return VK_ERROR_OUT_OF_HOST_MEMORY;
2049
base += accel_struct->mem_offset;
2050
const struct radv_accel_struct_header *header = (const struct radv_accel_struct_header *)base;
2052
char *dst = pInfo->dst.hostAddress;
2053
struct radv_accel_struct_serialization_header *dst_header = (void *)dst;
2054
dst += sizeof(*dst_header) + sizeof(uint64_t) * header->instance_count;
2056
memcpy(dst_header->driver_uuid, device->physical_device->driver_uuid, VK_UUID_SIZE);
2057
memset(dst_header->accel_struct_compat, 0, VK_UUID_SIZE);
2059
dst_header->serialization_size = header->serialization_size;
2060
dst_header->compacted_size = header->compacted_size;
2061
dst_header->instance_count = header->instance_count;
2063
memcpy(dst, base, header->compacted_size);
2065
for (unsigned i = 0; i < header->instance_count; ++i) {
2066
dst_header->instances[i] =
2067
*(const uint64_t *)(base + i * 128 + header->instance_offset) & ~63ull;
2070
device->ws->buffer_unmap(accel_struct->bo);
2074
VKAPI_ATTR void VKAPI_CALL
2075
radv_CmdCopyMemoryToAccelerationStructureKHR(
2076
VkCommandBuffer commandBuffer, const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo)
2078
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2079
RADV_FROM_HANDLE(radv_acceleration_structure, dst, pInfo->dst);
2080
struct radv_meta_saved_state saved_state;
2083
&saved_state, cmd_buffer,
2084
RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
2086
uint64_t dst_addr = radv_accel_struct_get_va(dst);
2088
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
2089
cmd_buffer->device->meta_state.accel_struct_build.copy_pipeline);
2091
const struct copy_constants consts = {
2092
.src_addr = pInfo->src.deviceAddress,
2093
.dst_addr = dst_addr,
2094
.mode = COPY_MODE_DESERIALIZE,
2097
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
2098
cmd_buffer->device->meta_state.accel_struct_build.copy_p_layout,
2099
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
2101
radv_CmdDispatch(commandBuffer, 512, 1, 1);
2102
radv_meta_restore(&saved_state, cmd_buffer);
2105
VKAPI_ATTR void VKAPI_CALL
2106
radv_CmdCopyAccelerationStructureToMemoryKHR(
2107
VkCommandBuffer commandBuffer, const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo)
2109
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2110
RADV_FROM_HANDLE(radv_acceleration_structure, src, pInfo->src);
2111
struct radv_meta_saved_state saved_state;
2114
&saved_state, cmd_buffer,
2115
RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
2117
uint64_t src_addr = radv_accel_struct_get_va(src);
2119
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
2120
cmd_buffer->device->meta_state.accel_struct_build.copy_pipeline);
2122
const struct copy_constants consts = {
2123
.src_addr = src_addr,
2124
.dst_addr = pInfo->dst.deviceAddress,
2125
.mode = COPY_MODE_SERIALIZE,
2128
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
2129
cmd_buffer->device->meta_state.accel_struct_build.copy_p_layout,
2130
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
2132
cmd_buffer->state.flush_bits |=
2133
radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT, NULL);
2135
radv_indirect_dispatch(cmd_buffer, src->bo,
2136
src_addr + offsetof(struct radv_accel_struct_header, copy_dispatch_size));
2137
radv_meta_restore(&saved_state, cmd_buffer);
2139
/* Set the header of the serialized data. */
2140
uint8_t header_data[2 * VK_UUID_SIZE] = {0};
2141
memcpy(header_data, cmd_buffer->device->physical_device->driver_uuid, VK_UUID_SIZE);
2143
radv_update_buffer_cp(cmd_buffer, pInfo->dst.deviceAddress, header_data, sizeof(header_data));
2146
VKAPI_ATTR void VKAPI_CALL
2147
radv_CmdBuildAccelerationStructuresIndirectKHR(
2148
VkCommandBuffer commandBuffer, uint32_t infoCount,
2149
const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
2150
const VkDeviceAddress *pIndirectDeviceAddresses, const uint32_t *pIndirectStrides,
2151
const uint32_t *const *ppMaxPrimitiveCounts)
2153
unreachable("Unimplemented");