70
static_assert(sizeof(struct rra_file_chunk_description) == 64,
71
"rra_file_chunk_description does not match RRA spec");
70
static_assert(sizeof(struct rra_file_chunk_description) == 64, "rra_file_chunk_description does not match RRA spec");
74
73
node_to_addr(uint64_t node)
94
rra_dump_chunk_description(uint64_t offset, uint64_t header_size, uint64_t data_size,
95
const char *name, enum rra_chunk_type type, FILE *output)
93
rra_dump_chunk_description(uint64_t offset, uint64_t header_size, uint64_t data_size, const char *name,
94
enum rra_chunk_type type, FILE *output)
97
96
struct rra_file_chunk_description chunk = {
162
rra_dump_asic_info(struct radeon_info *rad_info, FILE *output)
161
rra_dump_asic_info(const struct radeon_info *rad_info, FILE *output)
164
163
struct rra_asic_info asic_info = {
165
164
/* All frequencies are in Hz */
178
177
.rev_id = rad_info->pci_rev_id,
181
strncpy(asic_info.device_name,
182
rad_info->marketing_name ? rad_info->marketing_name : rad_info->name,
180
strncpy(asic_info.device_name, rad_info->marketing_name ? rad_info->marketing_name : rad_info->name,
183
181
RRA_FILE_DEVICE_NAME_MAX_SIZE - 1);
185
183
fwrite(&asic_info, sizeof(struct rra_asic_info), 1, output);
247
245
#define RRA_ROOT_NODE_OFFSET align(sizeof(struct rra_accel_struct_header), 64)
249
static_assert(sizeof(struct rra_accel_struct_header) == 120,
250
"rra_accel_struct_header does not match RRA spec");
247
static_assert(sizeof(struct rra_accel_struct_header) == 120, "rra_accel_struct_header does not match RRA spec");
252
249
struct rra_accel_struct_metadata {
253
250
uint64_t virtual_address;
255
252
char unused[116];
258
static_assert(sizeof(struct rra_accel_struct_metadata) == 128,
259
"rra_accel_struct_metadata does not match RRA spec");
255
static_assert(sizeof(struct rra_accel_struct_metadata) == 128, "rra_accel_struct_metadata does not match RRA spec");
261
257
struct rra_geometry_info {
262
258
uint32_t primitive_count : 29;
268
264
static_assert(sizeof(struct rra_geometry_info) == 12, "rra_geometry_info does not match RRA spec");
270
266
static struct rra_accel_struct_header
271
rra_fill_accel_struct_header_common(struct radv_accel_struct_header *header,
272
size_t parent_id_table_size, size_t leaf_node_data_size,
273
size_t internal_node_data_size, uint64_t primitive_count)
267
rra_fill_accel_struct_header_common(struct radv_accel_struct_header *header, size_t parent_id_table_size,
268
size_t leaf_node_data_size, size_t internal_node_data_size,
269
uint64_t primitive_count)
275
271
struct rra_accel_struct_header result = {
276
272
.post_build_info =
290
286
result.metadata_size = sizeof(struct rra_accel_struct_metadata) + parent_id_table_size;
291
result.file_size = result.metadata_size + sizeof(struct rra_accel_struct_header) +
292
internal_node_data_size + leaf_node_data_size;
288
result.metadata_size + sizeof(struct rra_accel_struct_header) + internal_node_data_size + leaf_node_data_size;
294
290
result.internal_nodes_offset = sizeof(struct rra_accel_struct_metadata);
295
291
result.leaf_nodes_offset = result.internal_nodes_offset + internal_node_data_size;
331
327
float otw_matrix[12];
334
static_assert(sizeof(struct rra_instance_node) == 128,
335
"rra_instance_node does not match RRA spec!");
330
static_assert(sizeof(struct rra_instance_node) == 128, "rra_instance_node does not match RRA spec!");
338
333
* Format RRA uses for aabb nodes
361
356
static_assert(sizeof(struct rra_triangle_node) == 64, "rra_triangle_node does not match RRA spec!");
364
rra_dump_tlas_header(struct radv_accel_struct_header *header, size_t parent_id_table_size,
365
size_t leaf_node_data_size, size_t internal_node_data_size,
366
uint64_t primitive_count, FILE *output)
359
rra_dump_tlas_header(struct radv_accel_struct_header *header, size_t parent_id_table_size, size_t leaf_node_data_size,
360
size_t internal_node_data_size, uint64_t primitive_count, FILE *output)
368
362
struct rra_accel_struct_header file_header = rra_fill_accel_struct_header_common(
369
363
header, parent_id_table_size, leaf_node_data_size, internal_node_data_size, primitive_count);
377
371
rra_dump_blas_header(struct radv_accel_struct_header *header, size_t parent_id_table_size,
378
struct radv_accel_struct_geometry_info *geometry_infos,
379
size_t leaf_node_data_size, size_t internal_node_data_size,
380
uint64_t primitive_count, FILE *output)
372
struct radv_accel_struct_geometry_info *geometry_infos, size_t leaf_node_data_size,
373
size_t internal_node_data_size, uint64_t primitive_count, FILE *output)
382
375
struct rra_accel_struct_header file_header = rra_fill_accel_struct_header_common(
383
376
header, parent_id_table_size, leaf_node_data_size, internal_node_data_size, primitive_count);
384
377
file_header.post_build_info.bvh_type = RRA_BVH_TYPE_BLAS;
385
file_header.geometry_type =
386
header->geometry_count ? geometry_infos->type : VK_GEOMETRY_TYPE_TRIANGLES_KHR;
378
file_header.geometry_type = header->geometry_count ? geometry_infos->type : VK_GEOMETRY_TYPE_TRIANGLES_KHR;
388
380
fwrite(&file_header, sizeof(struct rra_accel_struct_header), 1, output);
400
392
char location[31];
403
static void PRINTFLIKE(2, 3)
404
rra_validation_fail(struct rra_validation_context *ctx, const char *message, ...)
395
static void PRINTFLIKE(2, 3) rra_validation_fail(struct rra_validation_context *ctx, const char *message, ...)
406
397
if (!ctx->failed) {
407
398
fprintf(stderr, "radv: rra: Validation failed at %s:\n", ctx->location);
422
rra_validate_header(struct radv_rra_accel_struct_data *accel_struct,
423
const struct radv_accel_struct_header *header)
413
rra_validate_header(struct radv_rra_accel_struct_data *accel_struct, const struct radv_accel_struct_header *header)
425
415
struct rra_validation_context ctx = {
426
416
.location = "header",
429
if (accel_struct->type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR &&
430
header->instance_count > 0)
419
if (accel_struct->type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR && header->instance_count > 0)
431
420
rra_validation_fail(&ctx, "BLAS contains instances");
433
422
if (header->bvh_offset >= accel_struct->size)
460
rra_validate_node(struct hash_table_u64 *accel_struct_vas, uint8_t *data, void *node,
461
uint32_t geometry_count, uint32_t size, bool is_bottom_level)
449
rra_validate_node(struct hash_table_u64 *accel_struct_vas, uint8_t *data, void *node, uint32_t geometry_count,
450
uint32_t size, bool is_bottom_level)
463
452
struct rra_validation_context ctx = {0};
477
466
if (!is_internal_node(type) && is_bottom_level == (type == radv_bvh_node_instance))
478
467
rra_validation_fail(&ctx,
479
is_bottom_level ? "%s node in BLAS (child index %u)"
480
: "%s node in TLAS (child index %u)",
468
is_bottom_level ? "%s node in BLAS (child index %u)" : "%s node in TLAS (child index %u)",
481
469
node_type_names[type], i);
483
471
if (offset > size) {
488
476
struct rra_validation_context child_ctx = {0};
489
snprintf(child_ctx.location, sizeof(child_ctx.location), "%s node (offset=%u)",
490
node_type_names[type], offset);
477
snprintf(child_ctx.location, sizeof(child_ctx.location), "%s node (offset=%u)", node_type_names[type], offset);
492
479
if (is_internal_node(type)) {
493
ctx.failed |= rra_validate_node(accel_struct_vas, data, data + offset, geometry_count,
494
size, is_bottom_level);
480
ctx.failed |= rra_validate_node(accel_struct_vas, data, data + offset, geometry_count, size, is_bottom_level);
495
481
} else if (type == radv_bvh_node_instance) {
496
482
struct radv_bvh_instance_node *src = (struct radv_bvh_instance_node *)(data + offset);
497
483
uint64_t blas_va = node_to_addr(src->bvh_ptr) - src->bvh_offset;
528
rra_transcode_triangle_node(struct rra_transcoding_context *ctx,
529
const struct radv_bvh_triangle_node *src)
514
rra_transcode_triangle_node(struct rra_transcoding_context *ctx, const struct radv_bvh_triangle_node *src)
531
516
struct rra_triangle_node *dst = (struct rra_triangle_node *)(ctx->dst + ctx->dst_leaf_offset);
532
517
ctx->dst_leaf_offset += sizeof(struct rra_triangle_node);
544
rra_transcode_aabb_node(struct rra_transcoding_context *ctx, const struct radv_bvh_aabb_node *src,
529
rra_transcode_aabb_node(struct rra_transcoding_context *ctx, const struct radv_bvh_aabb_node *src, radv_aabb bounds)
547
531
struct rra_aabb_node *dst = (struct rra_aabb_node *)(ctx->dst + ctx->dst_leaf_offset);
548
532
ctx->dst_leaf_offset += sizeof(struct rra_aabb_node);
563
rra_transcode_instance_node(struct rra_transcoding_context *ctx,
564
const struct radv_bvh_instance_node *src)
547
rra_transcode_instance_node(struct rra_transcoding_context *ctx, const struct radv_bvh_instance_node *src)
566
549
uint64_t blas_va = node_to_addr(src->bvh_ptr) - src->bvh_offset;
580
563
memcpy(dst->otw_matrix, src->otw_matrix.values, sizeof(dst->otw_matrix));
583
static uint32_t rra_transcode_node(struct rra_transcoding_context *ctx, uint32_t parent_id,
584
uint32_t src_id, radv_aabb bounds);
566
static uint32_t rra_transcode_node(struct rra_transcoding_context *ctx, uint32_t parent_id, uint32_t src_id,
587
570
rra_transcode_box16_node(struct rra_transcoding_context *ctx, const struct radv_bvh_box16_node *src)
617
rra_transcode_node(ctx, radv_bvh_node_box16 | (dst_offset >> 3), src->children[i], bounds);
599
dst->children[i] = rra_transcode_node(ctx, radv_bvh_node_box16 | (dst_offset >> 3), src->children[i], bounds);
636
dst->children[i] = rra_transcode_node(ctx, radv_bvh_node_box32 | (dst_offset >> 3),
637
src->children[i], src->coords[i]);
619
rra_transcode_node(ctx, radv_bvh_node_box32 | (dst_offset >> 3), src->children[i], src->coords[i]);
658
rra_transcode_node(struct rra_transcoding_context *ctx, uint32_t parent_id, uint32_t src_id,
640
rra_transcode_node(struct rra_transcoding_context *ctx, uint32_t parent_id, uint32_t src_id, radv_aabb bounds)
661
642
uint32_t node_type = src_id & 7;
662
643
uint32_t src_offset = (src_id & (~7u)) << 3;
681
662
rra_transcode_instance_node(ctx, src_child_node);
684
uint32_t parent_id_index =
685
rra_parent_table_index_from_offset(dst_offset, ctx->parent_id_table_size);
665
uint32_t parent_id_index = rra_parent_table_index_from_offset(dst_offset, ctx->parent_id_table_size);
686
666
ctx->parent_id_table[parent_id_index] = parent_id;
688
668
uint32_t dst_id = node_type | (dst_offset >> 3);
739
719
rra_dump_acceleration_structure(struct radv_rra_accel_struct_data *accel_struct, uint8_t *data,
740
struct hash_table_u64 *accel_struct_vas, bool should_validate,
720
struct hash_table_u64 *accel_struct_vas, bool should_validate, FILE *output)
743
722
struct radv_accel_struct_header *header = (struct radv_accel_struct_header *)data;
753
732
if (rra_validate_header(accel_struct, header)) {
754
733
return VK_ERROR_VALIDATION_FAILED_EXT;
756
if (rra_validate_node(accel_struct_vas, data + header->bvh_offset,
757
data + header->bvh_offset + src_root_offset, header->geometry_count,
758
accel_struct->size, !is_tlas)) {
735
if (rra_validate_node(accel_struct_vas, data + header->bvh_offset, data + header->bvh_offset + src_root_offset,
736
header->geometry_count, accel_struct->size, !is_tlas)) {
759
737
return VK_ERROR_VALIDATION_FAILED_EXT;
811
789
result = VK_ERROR_OUT_OF_HOST_MEMORY;
815
calloc(RRA_ROOT_NODE_OFFSET + bvh_info.internal_nodes_size + bvh_info.leaf_nodes_size, 1);
792
dst_structure_data = calloc(RRA_ROOT_NODE_OFFSET + bvh_info.internal_nodes_size + bvh_info.leaf_nodes_size, 1);
816
793
if (!dst_structure_data) {
817
794
result = VK_ERROR_OUT_OF_HOST_MEMORY;
859
836
struct rra_accel_struct_metadata rra_metadata = {
860
837
.virtual_address = va,
861
.byte_size = bvh_info.leaf_nodes_size + bvh_info.internal_nodes_size +
862
sizeof(struct rra_accel_struct_header),
838
.byte_size = bvh_info.leaf_nodes_size + bvh_info.internal_nodes_size + sizeof(struct rra_accel_struct_header),
865
841
fwrite(&chunk_header, sizeof(struct rra_accel_struct_chunk_header), 1, output);
869
845
fwrite(node_parent_table, 1, node_parent_table_size, output);
872
rra_dump_tlas_header(header, node_parent_table_size, bvh_info.leaf_nodes_size,
873
bvh_info.internal_nodes_size, primitive_count, output);
848
rra_dump_tlas_header(header, node_parent_table_size, bvh_info.leaf_nodes_size, bvh_info.internal_nodes_size,
849
primitive_count, output);
875
851
rra_dump_blas_header(header, node_parent_table_size, geometry_infos, bvh_info.leaf_nodes_size,
876
852
bvh_info.internal_nodes_size, primitive_count, output);
878
854
/* Write acceleration structure data */
879
fwrite(dst_structure_data + RRA_ROOT_NODE_OFFSET, 1,
880
bvh_info.internal_nodes_size + bvh_info.leaf_nodes_size, output);
855
fwrite(dst_structure_data + RRA_ROOT_NODE_OFFSET, 1, bvh_info.internal_nodes_size + bvh_info.leaf_nodes_size,
883
859
fwrite(rra_geometry_infos, sizeof(struct rra_geometry_info), header->geometry_count, output);
900
radv_rra_trace_frame()
902
return radv_get_int_debug_option("RADV_RRA_TRACE", -1);
906
radv_rra_trace_trigger_file()
908
return getenv("RADV_RRA_TRACE_TRIGGER");
912
radv_rra_trace_enabled()
914
return radv_rra_trace_frame() != -1 || radv_rra_trace_trigger_file();
918
876
radv_rra_trace_init(struct radv_device *device)
920
device->rra_trace.trace_frame = radv_rra_trace_frame();
921
device->rra_trace.elapsed_frames = 0;
922
device->rra_trace.trigger_file = radv_rra_trace_trigger_file();
923
device->rra_trace.validate_as = radv_get_int_debug_option("RADV_RRA_TRACE_VALIDATE", 0) != 0;
924
device->rra_trace.copy_after_build =
925
radv_get_int_debug_option("RADV_RRA_TRACE_COPY_AFTER_BUILD", 0) != 0;
878
device->rra_trace.validate_as = debug_get_bool_option("RADV_RRA_TRACE_VALIDATE", false);
879
device->rra_trace.copy_after_build = debug_get_bool_option("RADV_RRA_TRACE_COPY_AFTER_BUILD", false);
926
880
device->rra_trace.accel_structs = _mesa_pointer_hash_table_create(NULL);
927
881
device->rra_trace.accel_struct_vas = _mesa_hash_table_u64_create(NULL);
928
882
simple_mtx_init(&device->rra_trace.data_mtx, mtx_plain);
930
device->rra_trace.copy_memory_index =
931
radv_find_memory_index(device->physical_device, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
932
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
933
VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
884
device->rra_trace.copy_memory_index = radv_find_memory_index(
885
device->physical_device,
886
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
1036
989
if (result != VK_SUCCESS)
1037
990
goto fail_buffer;
1040
vk_common_MapMemory(ctx->device, ctx->memory, 0, VK_WHOLE_SIZE, 0, (void **)&ctx->mapped_data);
992
result = vk_common_MapMemory(ctx->device, ctx->memory, 0, VK_WHOLE_SIZE, 0, (void **)&ctx->mapped_data);
1041
993
if (result != VK_SUCCESS)
1042
994
goto fail_memory;
1186
1138
uint64_t written_accel_struct_count = 0;
1188
1140
struct hash_entry *last_entry = NULL;
1189
for (unsigned i = 0;
1190
(last_entry = _mesa_hash_table_next_entry(device->rra_trace.accel_structs, last_entry));
1141
for (unsigned i = 0; (last_entry = _mesa_hash_table_next_entry(device->rra_trace.accel_structs, last_entry)); ++i)
1192
1142
hash_entries[i] = last_entry;
1194
1144
qsort(hash_entries, struct_count, sizeof(*hash_entries), accel_struct_entry_cmp);
1217
1167
accel_struct_offsets[written_accel_struct_count] = (uint64_t)ftell(file);
1219
rra_dump_acceleration_structure(data, mapped_data, device->rra_trace.accel_struct_vas,
1220
device->rra_trace.validate_as, file);
1168
result = rra_dump_acceleration_structure(data, mapped_data, device->rra_trace.accel_struct_vas,
1169
device->rra_trace.validate_as, file);
1222
1171
rra_unmap_accel_struct_data(©_ctx, i);
1228
1177
rra_copy_context_finish(©_ctx);
1230
1179
uint64_t chunk_info_offset = (uint64_t)ftell(file);
1231
rra_dump_chunk_description(api_info_offset, 0, 8, "ApiInfo", RADV_RRA_CHUNK_ID_ASIC_API_INFO,
1180
rra_dump_chunk_description(api_info_offset, 0, 8, "ApiInfo", RADV_RRA_CHUNK_ID_ASIC_API_INFO, file);
1233
1181
rra_dump_chunk_description(asic_info_offset, 0, sizeof(struct rra_asic_info), "AsicInfo",
1234
1182
RADV_RRA_CHUNK_ID_ASIC_API_INFO, file);
1241
1189
accel_struct_size = (uint64_t)(accel_struct_offsets[i + 1] - accel_struct_offsets[i]);
1243
rra_dump_chunk_description(accel_struct_offsets[i],
1244
sizeof(struct rra_accel_struct_chunk_header), accel_struct_size,
1245
"RawAccelStruc", RADV_RRA_CHUNK_ID_ACCEL_STRUCT, file);
1191
rra_dump_chunk_description(accel_struct_offsets[i], sizeof(struct rra_accel_struct_chunk_header),
1192
accel_struct_size, "RawAccelStruct", RADV_RRA_CHUNK_ID_ACCEL_STRUCT, file);
1248
1195
uint64_t file_end = (uint64_t)ftell(file);