47
48
static VkDevice device_cache[CHIP_LAST] = {VK_NULL_HANDLE};
48
49
static std::mutex create_device_mutex;
50
#define FUNCTION_LIST\
52
ITEM(DestroyInstance)\
53
ITEM(EnumeratePhysicalDevices)\
54
ITEM(GetPhysicalDeviceProperties2)\
57
ITEM(CreateShaderModule)\
58
ITEM(DestroyShaderModule)\
59
ITEM(CreateGraphicsPipelines)\
60
ITEM(CreateComputePipelines)\
61
ITEM(DestroyPipeline)\
62
ITEM(CreateDescriptorSetLayout)\
63
ITEM(DestroyDescriptorSetLayout)\
64
ITEM(CreatePipelineLayout)\
65
ITEM(DestroyPipelineLayout)\
66
ITEM(CreateRenderPass)\
67
ITEM(DestroyRenderPass)\
68
ITEM(GetPipelineExecutablePropertiesKHR)\
51
#define FUNCTION_LIST \
52
ITEM(CreateInstance) \
53
ITEM(DestroyInstance) \
54
ITEM(EnumeratePhysicalDevices) \
55
ITEM(GetPhysicalDeviceProperties2) \
58
ITEM(CreateShaderModule) \
59
ITEM(DestroyShaderModule) \
60
ITEM(CreateGraphicsPipelines) \
61
ITEM(CreateComputePipelines) \
62
ITEM(DestroyPipeline) \
63
ITEM(CreateDescriptorSetLayout) \
64
ITEM(DestroyDescriptorSetLayout) \
65
ITEM(CreatePipelineLayout) \
66
ITEM(DestroyPipelineLayout) \
67
ITEM(CreateRenderPass) \
68
ITEM(DestroyRenderPass) \
69
ITEM(GetPipelineExecutablePropertiesKHR) \
69
70
ITEM(GetPipelineExecutableInternalRepresentationsKHR)
71
72
#define ITEM(n) PFN_vk##n n;
75
void create_program(enum amd_gfx_level gfx_level, Stage stage, unsigned wave_size, enum radeon_family family)
77
create_program(enum amd_gfx_level gfx_level, Stage stage, unsigned wave_size,
78
enum radeon_family family)
77
80
memset(&config, 0, sizeof(config));
78
81
info.wave_size = wave_size;
215
224
aco_print_program(program.get(), output);
218
void finish_optimizer_postRA_test()
228
finish_optimizer_postRA_test()
220
230
finish_program(program.get());
221
231
aco::optimize_postRA(program.get());
222
232
aco_print_program(program.get(), output);
225
void finish_to_hw_instr_test()
236
finish_to_hw_instr_test()
227
238
finish_program(program.get());
228
239
aco::lower_to_hw_instr(program.get());
229
240
aco_print_program(program.get(), output);
232
void finish_waitcnt_test()
244
finish_waitcnt_test()
234
246
finish_program(program.get());
235
247
aco::insert_wait_states(program.get());
236
248
aco_print_program(program.get(), output);
239
void finish_insert_nops_test()
252
finish_insert_nops_test()
241
254
finish_program(program.get());
242
255
aco::insert_NOPs(program.get());
243
256
aco_print_program(program.get(), output);
246
void finish_form_hard_clause_test()
260
finish_form_hard_clause_test()
248
262
finish_program(program.get());
249
263
aco::form_hard_clauses(program.get());
250
264
aco_print_program(program.get(), output);
253
void finish_assembler_test()
268
finish_assembler_test()
255
270
finish_program(program.get());
256
271
std::vector<uint32_t> binary;
257
unsigned exec_size = emit_program(program.get(), binary);
272
unsigned exec_size = emit_program(program.get(), binary, NULL);
259
274
/* we could use CLRX for disassembly but that would require it to be
261
276
if (program->gfx_level >= GFX8) {
262
277
print_asm(program.get(), binary, exec_size / 4u, output);
264
//TODO: maybe we should use CLRX and skip this test if it's not available?
279
// TODO: maybe we should use CLRX and skip this test if it's not available?
265
280
for (uint32_t dword : binary)
266
281
fprintf(output, "%.8x\n", dword);
270
void writeout(unsigned i, Temp tmp)
286
writeout(unsigned i, Temp tmp)
273
289
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), tmp);
275
291
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i));
278
void writeout(unsigned i, aco::Builder::Result res)
295
writeout(unsigned i, aco::Builder::Result res)
280
297
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), res);
283
void writeout(unsigned i, Operand op)
301
writeout(unsigned i, Operand op)
285
303
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), op);
288
void writeout(unsigned i, Operand op0, Operand op1)
307
writeout(unsigned i, Operand op0, Operand op1)
290
309
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), op0, op1);
293
Temp fneg(Temp src, Builder b)
313
fneg(Temp src, Builder b)
295
315
if (src.bytes() == 2)
296
316
return b.vop2(aco_opcode::v_mul_f16, b.def(v2b), Operand::c16(0xbc00u), src);
298
318
return b.vop2(aco_opcode::v_mul_f32, b.def(v1), Operand::c32(0xbf800000u), src);
301
Temp fabs(Temp src, Builder b)
322
fabs(Temp src, Builder b)
303
324
if (src.bytes() == 2) {
304
Builder::Result res = b.vop2_e64(aco_opcode::v_mul_f16, b.def(v2b), Operand::c16(0x3c00), src);
325
Builder::Result res =
326
b.vop2_e64(aco_opcode::v_mul_f16, b.def(v2b), Operand::c16(0x3c00), src);
305
327
res->valu().abs[1] = true;
308
Builder::Result res = b.vop2_e64(aco_opcode::v_mul_f32, b.def(v1), Operand::c32(0x3f800000u), src);
330
Builder::Result res =
331
b.vop2_e64(aco_opcode::v_mul_f32, b.def(v1), Operand::c32(0x3f800000u), src);
309
332
res->valu().abs[1] = true;
314
Temp f2f32(Temp src, Builder b)
338
f2f32(Temp src, Builder b)
316
340
return b.vop1(aco_opcode::v_cvt_f32_f16, b.def(v1), src);
319
Temp f2f16(Temp src, Builder b)
344
f2f16(Temp src, Builder b)
321
346
return b.vop1(aco_opcode::v_cvt_f16_f32, b.def(v2b), src);
324
Temp u2u16(Temp src, Builder b)
350
u2u16(Temp src, Builder b)
326
352
return b.pseudo(aco_opcode::p_extract_vector, b.def(v2b), src, Operand::zero());
329
Temp fadd(Temp src0, Temp src1, Builder b)
356
fadd(Temp src0, Temp src1, Builder b)
331
358
if (src0.bytes() == 2)
332
359
return b.vop2(aco_opcode::v_add_f16, b.def(v2b), src0, src1);
350
379
return b.vop3(aco_opcode::v_fma_f32, b.def(v1), src0, src1, src2);
353
Temp fsat(Temp src, Builder b)
383
fsat(Temp src, Builder b)
355
385
if (src.bytes() == 2)
356
return b.vop3(aco_opcode::v_med3_f16, b.def(v2b), Operand::c16(0u),
357
Operand::c16(0x3c00u), src);
386
return b.vop3(aco_opcode::v_med3_f16, b.def(v2b), Operand::c16(0u), Operand::c16(0x3c00u),
359
return b.vop3(aco_opcode::v_med3_f32, b.def(v1), Operand::zero(),
360
Operand::c32(0x3f800000u), src);
389
return b.vop3(aco_opcode::v_med3_f32, b.def(v1), Operand::zero(), Operand::c32(0x3f800000u),
363
Temp fmin(Temp src0, Temp src1, Builder b)
394
fmin(Temp src0, Temp src1, Builder b)
365
396
return b.vop2(aco_opcode::v_min_f32, b.def(v1), src0, src1);
368
Temp fmax(Temp src0, Temp src1, Builder b)
400
fmax(Temp src0, Temp src1, Builder b)
370
402
return b.vop2(aco_opcode::v_max_f32, b.def(v1), src0, src1);
373
Temp ext_ushort(Temp src, unsigned idx, Builder b)
406
ext_ushort(Temp src, unsigned idx, Builder b)
375
408
return b.pseudo(aco_opcode::p_extract, b.def(src.regClass()), src, Operand::c32(idx),
376
409
Operand::c32(16u), Operand::c32(false));
379
Temp ext_ubyte(Temp src, unsigned idx, Builder b)
413
ext_ubyte(Temp src, unsigned idx, Builder b)
381
415
return b.pseudo(aco_opcode::p_extract, b.def(src.regClass()), src, Operand::c32(idx),
382
416
Operand::c32(8u), Operand::c32(false));
385
void emit_divergent_if_else(Program* prog, aco::Builder& b, Operand cond, std::function<void()> then,
386
std::function<void()> els)
420
emit_divergent_if_else(Program* prog, aco::Builder& b, Operand cond, std::function<void()> then,
421
std::function<void()> els)
388
423
prog->blocks.reserve(prog->blocks.size() + 6);
418
453
PhysReg saved_exec_reg(84);
420
455
b.reset(if_block);
421
Temp saved_exec = b.sop1(Builder::s_and_saveexec, b.def(b.lm, saved_exec_reg), Definition(scc, s1), Definition(exec, b.lm), cond, Operand(exec, b.lm));
422
b.branch(aco_opcode::p_cbranch_nz, Definition(vcc, bld.lm), then_logical->index, then_linear->index);
456
Temp saved_exec = b.sop1(Builder::s_and_saveexec, b.def(b.lm, saved_exec_reg),
457
Definition(scc, s1), Definition(exec, b.lm), cond, Operand(exec, b.lm));
458
b.branch(aco_opcode::p_cbranch_nz, Definition(vcc, bld.lm), then_logical->index,
424
461
b.reset(then_logical);
425
462
b.pseudo(aco_opcode::p_logical_start);
431
468
b.branch(aco_opcode::p_branch, Definition(vcc, bld.lm), invert->index);
434
b.sop2(Builder::s_andn2, Definition(exec, bld.lm), Definition(scc, s1), Operand(saved_exec, saved_exec_reg), Operand(exec, bld.lm));
435
b.branch(aco_opcode::p_cbranch_nz, Definition(vcc, bld.lm), else_logical->index, else_linear->index);
471
b.sop2(Builder::s_andn2, Definition(exec, bld.lm), Definition(scc, s1),
472
Operand(saved_exec, saved_exec_reg), Operand(exec, bld.lm));
473
b.branch(aco_opcode::p_cbranch_nz, Definition(vcc, bld.lm), else_logical->index,
437
476
b.reset(else_logical);
438
477
b.pseudo(aco_opcode::p_logical_start);
444
483
b.branch(aco_opcode::p_branch, Definition(vcc, bld.lm), endif_block->index);
446
485
b.reset(endif_block);
447
b.pseudo(aco_opcode::p_parallelcopy, Definition(exec, bld.lm), Operand(saved_exec, saved_exec_reg));
486
b.pseudo(aco_opcode::p_parallelcopy, Definition(exec, bld.lm),
487
Operand(saved_exec, saved_exec_reg));
450
VkDevice get_vk_device(enum amd_gfx_level gfx_level)
491
get_vk_device(enum amd_gfx_level gfx_level)
452
493
enum radeon_family family;
453
494
switch (gfx_level) {
455
family = CHIP_TAHITI;
458
family = CHIP_BONAIRE;
461
family = CHIP_POLARIS10;
464
family = CHIP_VEGA10;
467
family = CHIP_NAVI10;
470
family = CHIP_NAVI21;
473
family = CHIP_GFX1100;
476
family = CHIP_UNKNOWN;
495
case GFX6: family = CHIP_TAHITI; break;
496
case GFX7: family = CHIP_BONAIRE; break;
497
case GFX8: family = CHIP_POLARIS10; break;
498
case GFX9: family = CHIP_VEGA10; break;
499
case GFX10: family = CHIP_NAVI10; break;
500
case GFX10_3: family = CHIP_NAVI21; break;
501
case GFX11: family = CHIP_GFX1100; break;
502
default: family = CHIP_UNKNOWN; break;
479
504
return get_vk_device(family);
482
VkDevice get_vk_device(enum radeon_family family)
508
get_vk_device(enum radeon_family family)
484
510
assert(family != CHIP_UNKNOWN);
591
VkShaderModule __qoCreateShaderModule(VkDevice dev, const QoShaderModuleCreateInfo *module_info)
622
__qoCreateShaderModule(VkDevice dev, const QoShaderModuleCreateInfo* module_info)
593
VkShaderModuleCreateInfo vk_module_info;
594
vk_module_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
595
vk_module_info.pNext = NULL;
596
vk_module_info.flags = 0;
597
vk_module_info.codeSize = module_info->spirvSize;
598
vk_module_info.pCode = (const uint32_t*)module_info->pSpirv;
600
VkShaderModule module;
601
ASSERTED VkResult result = CreateShaderModule(dev, &vk_module_info, NULL, &module);
602
assert(result == VK_SUCCESS);
624
VkShaderModuleCreateInfo vk_module_info;
625
vk_module_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
626
vk_module_info.pNext = NULL;
627
vk_module_info.flags = 0;
628
vk_module_info.codeSize = module_info->spirvSize;
629
vk_module_info.pCode = (const uint32_t*)module_info->pSpirv;
631
VkShaderModule module;
632
ASSERTED VkResult result = CreateShaderModule(dev, &vk_module_info, NULL, &module);
633
assert(result == VK_SUCCESS);
607
PipelineBuilder::PipelineBuilder(VkDevice dev) {
638
PipelineBuilder::PipelineBuilder(VkDevice dev)
608
640
memset(this, 0, sizeof(*this));
609
641
topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
628
660
DestroyRenderPass(device, render_pass, NULL);
631
void PipelineBuilder::add_desc_binding(VkShaderStageFlags stage_flags, uint32_t layout,
632
uint32_t binding, VkDescriptorType type, uint32_t count)
664
PipelineBuilder::add_desc_binding(VkShaderStageFlags stage_flags, uint32_t layout, uint32_t binding,
665
VkDescriptorType type, uint32_t count)
634
667
desc_layouts_used |= 1ull << layout;
635
668
desc_bindings[layout][num_desc_bindings[layout]++] = {binding, type, count, stage_flags, NULL};
638
void PipelineBuilder::add_vertex_binding(uint32_t binding, uint32_t stride, VkVertexInputRate rate)
672
PipelineBuilder::add_vertex_binding(uint32_t binding, uint32_t stride, VkVertexInputRate rate)
640
674
vs_bindings[vs_input.vertexBindingDescriptionCount++] = {binding, stride, rate};
643
void PipelineBuilder::add_vertex_attribute(uint32_t location, uint32_t binding, VkFormat format, uint32_t offset)
678
PipelineBuilder::add_vertex_attribute(uint32_t location, uint32_t binding, VkFormat format,
645
681
vs_attributes[vs_input.vertexAttributeDescriptionCount++] = {location, binding, format, offset};
648
void PipelineBuilder::add_resource_decls(QoShaderModuleCreateInfo *module)
685
PipelineBuilder::add_resource_decls(QoShaderModuleCreateInfo* module)
650
687
for (unsigned i = 0; i < module->declarationCount; i++) {
651
const QoShaderDecl *decl = &module->pDeclarations[i];
688
const QoShaderDecl* decl = &module->pDeclarations[i];
652
689
switch (decl->decl_type) {
653
690
case QoShaderDeclType_ubo:
654
add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
691
add_desc_binding(module->stage, decl->set, decl->binding,
692
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
656
694
case QoShaderDeclType_ssbo:
657
add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
695
add_desc_binding(module->stage, decl->set, decl->binding,
696
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
659
698
case QoShaderDeclType_img_buf:
660
add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
699
add_desc_binding(module->stage, decl->set, decl->binding,
700
VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
662
702
case QoShaderDeclType_img:
663
add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE);
703
add_desc_binding(module->stage, decl->set, decl->binding,
704
VK_DESCRIPTOR_TYPE_STORAGE_IMAGE);
665
706
case QoShaderDeclType_tex_buf:
666
add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER);
707
add_desc_binding(module->stage, decl->set, decl->binding,
708
VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER);
668
710
case QoShaderDeclType_combined:
669
add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
711
add_desc_binding(module->stage, decl->set, decl->binding,
712
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
671
714
case QoShaderDeclType_tex:
672
add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE);
715
add_desc_binding(module->stage, decl->set, decl->binding,
716
VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE);
674
718
case QoShaderDeclType_samp:
675
719
add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_SAMPLER);
683
void PipelineBuilder::add_io_decls(QoShaderModuleCreateInfo *module)
727
PipelineBuilder::add_io_decls(QoShaderModuleCreateInfo* module)
685
729
unsigned next_vtx_offset = 0;
686
730
for (unsigned i = 0; i < module->declarationCount; i++) {
687
const QoShaderDecl *decl = &module->pDeclarations[i];
731
const QoShaderDecl* decl = &module->pDeclarations[i];
688
732
switch (decl->decl_type) {
689
733
case QoShaderDeclType_in:
690
734
if (module->stage == VK_SHADER_STAGE_VERTEX_BIT) {
691
735
if (!strcmp(decl->type, "float") || decl->type[0] == 'v')
692
add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_SFLOAT, next_vtx_offset);
736
add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_SFLOAT,
693
738
else if (decl->type[0] == 'u')
694
add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_UINT, next_vtx_offset);
739
add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_UINT,
695
741
else if (decl->type[0] == 'i')
696
add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_SINT, next_vtx_offset);
742
add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_SINT,
697
744
next_vtx_offset += 16;
732
779
owned_stages |= stage;
735
void PipelineBuilder::add_stage(VkShaderStageFlagBits stage, QoShaderModuleCreateInfo module, const char *name)
783
PipelineBuilder::add_stage(VkShaderStageFlagBits stage, QoShaderModuleCreateInfo module,
737
786
add_stage(stage, __qoCreateShaderModule(device, &module), name);
738
787
add_resource_decls(&module);
739
788
add_io_decls(&module);
742
void PipelineBuilder::add_vsfs(VkShaderModule vs, VkShaderModule fs)
744
add_stage(VK_SHADER_STAGE_VERTEX_BIT, vs);
745
add_stage(VK_SHADER_STAGE_FRAGMENT_BIT, fs);
748
void PipelineBuilder::add_vsfs(QoShaderModuleCreateInfo vs, QoShaderModuleCreateInfo fs)
750
add_stage(VK_SHADER_STAGE_VERTEX_BIT, vs);
751
add_stage(VK_SHADER_STAGE_FRAGMENT_BIT, fs);
754
void PipelineBuilder::add_cs(VkShaderModule cs)
756
add_stage(VK_SHADER_STAGE_COMPUTE_BIT, cs);
759
void PipelineBuilder::add_cs(QoShaderModuleCreateInfo cs)
761
add_stage(VK_SHADER_STAGE_COMPUTE_BIT, cs);
764
bool PipelineBuilder::is_compute() {
792
PipelineBuilder::add_vsfs(VkShaderModule vs, VkShaderModule fs)
794
add_stage(VK_SHADER_STAGE_VERTEX_BIT, vs);
795
add_stage(VK_SHADER_STAGE_FRAGMENT_BIT, fs);
799
PipelineBuilder::add_vsfs(QoShaderModuleCreateInfo vs, QoShaderModuleCreateInfo fs)
801
add_stage(VK_SHADER_STAGE_VERTEX_BIT, vs);
802
add_stage(VK_SHADER_STAGE_FRAGMENT_BIT, fs);
806
PipelineBuilder::add_cs(VkShaderModule cs)
808
add_stage(VK_SHADER_STAGE_COMPUTE_BIT, cs);
812
PipelineBuilder::add_cs(QoShaderModuleCreateInfo cs)
814
add_stage(VK_SHADER_STAGE_COMPUTE_BIT, cs);
818
PipelineBuilder::is_compute()
765
820
return gfx_pipeline_info.stageCount == 0;
768
void PipelineBuilder::create_compute_pipeline() {
824
PipelineBuilder::create_compute_pipeline()
769
826
VkComputePipelineCreateInfo create_info;
770
827
create_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
771
828
create_info.pNext = NULL;
915
972
color_blend_state.attachmentCount = num_color_attachments;
916
973
color_blend_state.pAttachments = blend_attachment_states;
918
VkDynamicState dynamic_states[9] = {
919
VK_DYNAMIC_STATE_VIEWPORT,
920
VK_DYNAMIC_STATE_SCISSOR,
921
VK_DYNAMIC_STATE_LINE_WIDTH,
922
VK_DYNAMIC_STATE_DEPTH_BIAS,
923
VK_DYNAMIC_STATE_BLEND_CONSTANTS,
924
VK_DYNAMIC_STATE_DEPTH_BOUNDS,
925
VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
926
VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
927
VK_DYNAMIC_STATE_STENCIL_REFERENCE
975
VkDynamicState dynamic_states[9] = {VK_DYNAMIC_STATE_VIEWPORT,
976
VK_DYNAMIC_STATE_SCISSOR,
977
VK_DYNAMIC_STATE_LINE_WIDTH,
978
VK_DYNAMIC_STATE_DEPTH_BIAS,
979
VK_DYNAMIC_STATE_BLEND_CONSTANTS,
980
VK_DYNAMIC_STATE_DEPTH_BOUNDS,
981
VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
982
VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
983
VK_DYNAMIC_STATE_STENCIL_REFERENCE};
930
985
VkPipelineDynamicStateCreateInfo dynamic_state;
931
986
dynamic_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;