365
369
memset(info, 0, sizeof(*info));
367
entries_buffer = vk_alloc2(&device->vk.alloc,
369
const_entries_size_in_bytes,
371
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
373
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
371
info->entries = vk_alloc2(&device->vk.alloc,
373
const_entries_size_in_bytes,
375
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
376
if (!info->entries) {
377
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
375
info->entries = entries_buffer;
376
381
info->entries_size_in_bytes = const_entries_size_in_bytes;
378
pvr_pds_generate_vertex_primary_program(input,
381
device->features.robustBufferAccess,
382
&device->pdevice->dev_info);
383
pvr_pds_generate_vertex_primary_program(
387
device->vk.enabled_features.robustBufferAccess,
388
&device->pdevice->dev_info);
384
390
code_size_in_dwords = info->code_size_in_dwords;
385
staging_buffer_size = info->code_size_in_dwords * sizeof(*staging_buffer);
391
staging_buffer_size = PVR_DW_TO_BYTES(info->code_size_in_dwords);
387
393
staging_buffer = vk_alloc2(&device->vk.alloc,
391
397
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
392
398
if (!staging_buffer) {
393
vk_free2(&device->vk.alloc, allocator, entries_buffer);
394
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
399
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
400
goto err_free_entries;
397
403
/* This also fills in info->entries. */
398
pvr_pds_generate_vertex_primary_program(input,
401
device->features.robustBufferAccess,
402
&device->pdevice->dev_info);
404
pvr_pds_generate_vertex_primary_program(
408
device->vk.enabled_features.robustBufferAccess,
409
&device->pdevice->dev_info);
404
411
assert(info->code_size_in_dwords <= code_size_in_dwords);
406
413
/* FIXME: Add a vk_realloc2() ? */
407
entries_buffer = vk_realloc((!allocator) ? &device->vk.alloc : allocator,
409
info->entries_written_size_in_bytes,
411
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
412
if (!entries_buffer) {
413
vk_free2(&device->vk.alloc, allocator, staging_buffer);
414
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
414
new_entries = vk_realloc((!allocator) ? &device->vk.alloc : allocator,
416
info->entries_written_size_in_bytes,
418
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
420
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
421
goto err_free_staging_buffer;
417
info->entries = entries_buffer;
424
info->entries = new_entries;
418
425
info->entries_size_in_bytes = info->entries_written_size_in_bytes;
420
427
/* FIXME: Figure out the define for alignment of 16. */
460
486
/* If allocator == NULL, the internal one will be used.
462
488
* programs_out_ptr is a pointer to the array where the outputs will be placed.
464
490
static VkResult pvr_pds_vertex_attrib_programs_create_and_upload(
465
491
struct pvr_device *device,
466
492
const VkAllocationCallbacks *const allocator,
467
493
const VkPipelineVertexInputStateCreateInfo *const vertex_input_state,
468
494
uint32_t usc_temp_count,
469
495
const struct rogue_vs_build_data *vs_data,
497
/* Needed for the new path. */
498
/* TODO: Remove some of the above once the compiler is hooked up. */
499
const struct pvr_pds_vertex_dma
500
dma_descriptions[static const PVR_MAX_VERTEX_ATTRIB_DMAS],
502
const struct pvr_vertex_special_vars *special_vars_layout,
470
504
pvr_pds_attrib_programs_array_ptr programs_out_ptr)
472
struct pvr_pds_vertex_dma dma_descriptions[PVR_MAX_VERTEX_ATTRIB_DMAS];
506
struct pvr_pds_vertex_dma dma_descriptions_old[PVR_MAX_VERTEX_ATTRIB_DMAS];
473
508
struct pvr_pds_attrib_program *const programs_out = *programs_out_ptr;
474
struct pvr_pds_vertex_primary_program_input input = {
475
.dma_list = dma_descriptions,
509
struct pvr_pds_vertex_primary_program_input input = { 0 };
479
pvr_pds_vertex_attrib_init_dma_descriptions(vertex_input_state,
512
const bool old_path = pvr_has_hard_coded_shaders(&device->pdevice->dev_info);
515
pvr_pds_vertex_attrib_init_dma_descriptions(vertex_input_state,
517
&dma_descriptions_old,
520
input.dma_list = dma_descriptions_old;
522
input.dma_list = dma_descriptions;
523
input.dma_count = dma_count;
525
if (special_vars_layout->vertex_id_offset !=
526
PVR_VERTEX_SPECIAL_VAR_UNUSED) {
527
/* Gets filled by the HW and copied into the appropriate reg. */
528
input.flags |= PVR_PDS_VERTEX_FLAGS_VERTEX_ID_REQUIRED;
529
input.vertex_id_register = special_vars_layout->vertex_id_offset;
532
if (special_vars_layout->instance_id_offset !=
533
PVR_VERTEX_SPECIAL_VAR_UNUSED) {
534
/* Gets filled by the HW and copied into the appropriate reg. */
535
input.flags |= PVR_PDS_VERTEX_FLAGS_INSTANCE_ID_REQUIRED;
536
input.instance_id_register = special_vars_layout->instance_id_offset;
484
540
pvr_pds_setup_doutu(&input.usc_task_control,
759
818
program.addr_literal_count = addr_literals;
762
entries_buffer = vk_alloc2(&device->vk.alloc,
764
const_entries_size_in_bytes,
766
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
767
if (!entries_buffer) {
768
pvr_bo_free(device, descriptor_state->static_consts);
770
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
821
pds_info->entries = vk_alloc2(&device->vk.alloc,
823
const_entries_size_in_bytes,
825
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
826
if (!pds_info->entries) {
827
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
828
goto err_free_static_consts;
773
pds_info->entries = entries_buffer;
774
831
pds_info->entries_size_in_bytes = const_entries_size_in_bytes;
776
833
pvr_pds_generate_descriptor_upload_program(&program, NULL, pds_info);
778
835
code_size_in_dwords = pds_info->code_size_in_dwords;
779
staging_buffer_size =
780
pds_info->code_size_in_dwords * sizeof(*staging_buffer);
836
staging_buffer_size = PVR_DW_TO_BYTES(pds_info->code_size_in_dwords);
782
838
if (!staging_buffer_size) {
783
vk_free2(&device->vk.alloc, allocator, entries_buffer);
839
vk_free2(&device->vk.alloc, allocator, pds_info->entries);
785
841
*descriptor_state = (struct pvr_stage_allocation_descriptor_state){ 0 };
1628
1698
#undef PVR_DEV_ADDR_SIZE_IN_SH_REGS
1700
static void pvr_graphics_pipeline_alloc_vertex_inputs(
1701
const VkPipelineVertexInputStateCreateInfo *const vs_data,
1702
rogue_vertex_inputs *const vertex_input_layout_out,
1703
unsigned *num_vertex_input_regs_out,
1704
pvr_pds_attrib_dma_descriptions_array_ptr dma_descriptions_out_ptr,
1705
uint32_t *const dma_count_out)
1707
const VkVertexInputBindingDescription
1708
*sorted_bindings[PVR_MAX_VERTEX_INPUT_BINDINGS] = { 0 };
1709
const VkVertexInputAttributeDescription
1710
*sorted_attributes[PVR_MAX_VERTEX_INPUT_BINDINGS] = { 0 };
1712
rogue_vertex_inputs build_data = {
1713
.num_input_vars = vs_data->vertexAttributeDescriptionCount,
1715
uint32_t next_reg_offset = 0;
1717
struct pvr_pds_vertex_dma *const dma_descriptions =
1718
*dma_descriptions_out_ptr;
1719
uint32_t dma_count = 0;
1721
/* Vertex attributes map to the `layout(location = x)` annotation in the
1722
* shader where `x` is the attribute's location.
1723
* Vertex bindings have NO relation to the shader. They have nothing to do
1724
* with the `layout(set = x, binding = y)` notation. They instead indicate
1725
* where the data for a collection of vertex attributes comes from. The
1726
* application binds a VkBuffer with vkCmdBindVertexBuffers() to a specific
1727
* binding number and based on that we'll know which buffer to DMA the data
1728
* from, to fill in the collection of vertex attributes.
1731
for (uint32_t i = 0; i < vs_data->vertexBindingDescriptionCount; i++) {
1732
const VkVertexInputBindingDescription *binding_desc =
1733
&vs_data->pVertexBindingDescriptions[i];
1735
sorted_bindings[binding_desc->binding] = binding_desc;
1738
for (uint32_t i = 0; i < vs_data->vertexAttributeDescriptionCount; i++) {
1739
const VkVertexInputAttributeDescription *attribute_desc =
1740
&vs_data->pVertexAttributeDescriptions[i];
1742
sorted_attributes[attribute_desc->location] = attribute_desc;
1745
for (uint32_t i = 0, j = 0; i < ARRAY_SIZE(sorted_attributes); i++) {
1746
if (sorted_attributes[i])
1747
sorted_attributes[j++] = sorted_attributes[i];
1750
for (uint32_t i = 0; i < vs_data->vertexAttributeDescriptionCount; i++) {
1751
const VkVertexInputAttributeDescription *attribute = sorted_attributes[i];
1752
const VkVertexInputBindingDescription *binding =
1753
sorted_bindings[attribute->binding];
1754
const struct util_format_description *fmt_description =
1755
vk_format_description(attribute->format);
1756
struct pvr_pds_vertex_dma *dma_desc = &dma_descriptions[dma_count];
1757
unsigned vtxin_reg_offset;
1759
/* Reg allocation. */
1761
vtxin_reg_offset = next_reg_offset;
1762
build_data.base[i] = vtxin_reg_offset;
1764
if (fmt_description->colorspace != UTIL_FORMAT_COLORSPACE_RGB ||
1765
fmt_description->layout != UTIL_FORMAT_LAYOUT_PLAIN ||
1766
fmt_description->block.bits % 32 != 0 || !fmt_description->is_array) {
1767
/* For now we only support formats with 32 bit components since we
1768
* don't need to pack/unpack them.
1770
/* TODO: Support any other format with VERTEX_BUFFER_BIT set that
1771
* doesn't have 32 bit components if we're advertising any.
1776
/* TODO: Check if this is fine with the compiler. Does it want the amount
1777
* of components or does it want a size in dwords to figure out how many
1778
* vtxin regs are covered. For formats with 32 bit components the
1779
* distinction doesn't change anything.
1781
build_data.components[i] =
1782
util_format_get_nr_components(fmt_description->format);
1784
next_reg_offset += build_data.components[i];
1788
/* The PDS program sets up DDMADs to DMA attributes into vtxin regs.
1790
* DDMAD -> Multiply, add, and DOUTD (i.e. DMA from that address).
1791
* DMA source addr = src0 * src1 + src2
1794
* In the PDS program we setup src0 with the binding's stride and src1
1795
* with either the instance id or vertex id (both of which get filled by
1796
* the hardware). We setup src2 later on once we know which VkBuffer to
1797
* DMA the data from so it's saved for later when we patch the data
1801
/* TODO: Right now we're setting up a DMA per attribute. In a case where
1802
* there are multiple attributes packed into a single binding with
1803
* adjacent locations we'd still be DMAing them separately. This is not
1804
* great so the DMA setup should be smarter and could do with some
1808
*dma_desc = (struct pvr_pds_vertex_dma){ 0 };
1810
/* In relation to the Vulkan spec. 22.4. Vertex Input Address Calculation
1811
* this corresponds to `attribDesc.offset`.
1812
* The PDS program doesn't do anything with it but just save it in the
1813
* PDS program entry.
1815
dma_desc->offset = attribute->offset;
1817
/* In relation to the Vulkan spec. 22.4. Vertex Input Address Calculation
1818
* this corresponds to `bindingDesc.stride`.
1819
* The PDS program will calculate the `effectiveVertexOffset` with this
1820
* and add it to the address provided in the patched data segment.
1822
dma_desc->stride = binding->stride;
1824
if (binding->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE)
1825
dma_desc->flags = PVR_PDS_VERTEX_DMA_FLAGS_INSTANCE_RATE;
1827
dma_desc->flags = 0;
1829
/* Size to DMA per vertex attribute. Used to setup src3 in the DDMAD. */
1830
assert(fmt_description->block.bits != 0); /* Likely an unsupported fmt. */
1831
dma_desc->size_in_dwords = fmt_description->block.bits / 32;
1833
/* Vtxin reg offset to start DMAing into. */
1834
dma_desc->destination = vtxin_reg_offset;
1836
/* Will be used by the driver to figure out buffer address to patch in the
1837
* data section. I.e. which binding we should DMA from.
1839
dma_desc->binding_index = attribute->binding;
1841
/* We don't currently support VK_EXT_vertex_attribute_divisor so no
1842
* repeating of instance-rate vertex attributes needed. We should always
1843
* move on to the next vertex attribute.
1845
dma_desc->divisor = 1;
1847
/* Will be used to generate PDS code that takes care of robust buffer
1848
* access, and later on by the driver to write the correct robustness
1849
* buffer address to DMA the fallback values from.
1851
dma_desc->robustness_buffer_offset =
1852
pvr_get_robustness_buffer_format_offset(attribute->format);
1854
/* Used by later on by the driver to figure out if the buffer is being
1855
* accessed out of bounds, for robust buffer access.
1857
dma_desc->component_size_in_bytes =
1858
fmt_description->block.bits / fmt_description->nr_channels / 8;
1863
*vertex_input_layout_out = build_data;
1864
*num_vertex_input_regs_out = next_reg_offset;
1865
*dma_count_out = dma_count;
1868
static void pvr_graphics_pipeline_alloc_vertex_special_vars(
1869
unsigned *num_vertex_input_regs,
1870
struct pvr_vertex_special_vars *special_vars_layout_out)
1872
unsigned next_free_reg = *num_vertex_input_regs;
1873
struct pvr_vertex_special_vars layout;
1875
/* We don't support VK_KHR_shader_draw_parameters or Vulkan 1.1 so no
1876
* BaseInstance, BaseVertex, DrawIndex.
1879
/* TODO: The shader might not necessarily be using this so we'd just be
1880
* wasting regs. Get the info from the compiler about whether or not the
1881
* shader uses them and allocate them accordingly. For now we'll set them up
1885
layout.vertex_id_offset = (int16_t)next_free_reg;
1888
layout.instance_id_offset = (int16_t)next_free_reg;
1891
*num_vertex_input_regs = next_free_reg;
1892
*special_vars_layout_out = layout;
1630
1895
/* Compiles and uploads shaders and PDS programs. */
1631
1896
static VkResult
1632
1897
pvr_graphics_pipeline_compile(struct pvr_device *const device,
1787
2076
vertex_state->stage_state.const_shared_reg_count =
1788
2077
sh_count[PVR_STAGE_ALLOCATION_VERTEX_GEOMETRY];
2079
gfx_pipeline->shader_state.vertex.vertex_input_size =
2080
ctx->stage_data.vs.num_vertex_input_regs;
1792
result = pvr_gpu_upload_usc(device,
1793
ctx->binary[MESA_SHADER_VERTEX].data,
1794
ctx->binary[MESA_SHADER_VERTEX].size,
1796
&gfx_pipeline->shader_state.vertex.bo);
2085
pvr_gpu_upload_usc(device,
2086
util_dynarray_begin(&ctx->binary[MESA_SHADER_VERTEX]),
2087
ctx->binary[MESA_SHADER_VERTEX].size,
2089
&gfx_pipeline->shader_state.vertex.bo);
1797
2090
if (result != VK_SUCCESS)
1798
2091
goto err_free_build_context;
1800
if (pvr_has_hard_coded_shaders(&device->pdevice->dev_info) &&
1801
pvr_hard_code_graphics_get_flags(&device->pdevice->dev_info) &
1802
BITFIELD_BIT(MESA_SHADER_FRAGMENT)) {
1803
pvr_hard_code_graphics_fragment_state(
1804
&device->pdevice->dev_info,
1805
hard_code_pipeline_n,
1806
&gfx_pipeline->shader_state.fragment);
1808
pvr_fragment_state_init(gfx_pipeline,
1809
&ctx->common_data[MESA_SHADER_FRAGMENT]);
1812
struct pvr_fragment_shader_state *fragment_state =
1813
&gfx_pipeline->shader_state.fragment;
1815
/* FIXME: For now we just overwrite it but the compiler shouldn't be
1816
* returning the sh count since the driver is in charge of allocating
1819
fragment_state->stage_state.const_shared_reg_count =
1820
sh_count[PVR_STAGE_ALLOCATION_FRAGMENT];
2093
if (ctx->nir[MESA_SHADER_FRAGMENT]) {
2094
struct pvr_fragment_shader_state *fragment_state =
2095
&gfx_pipeline->shader_state.fragment;
2097
if (pvr_has_hard_coded_shaders(&device->pdevice->dev_info) &&
2098
pvr_hard_code_graphics_get_flags(&device->pdevice->dev_info) &
2099
BITFIELD_BIT(MESA_SHADER_FRAGMENT)) {
2100
pvr_hard_code_graphics_fragment_state(
2101
&device->pdevice->dev_info,
2102
hard_code_pipeline_n,
2103
&gfx_pipeline->shader_state.fragment);
2105
pvr_fragment_state_init(gfx_pipeline,
2106
&ctx->common_data[MESA_SHADER_FRAGMENT]);
2109
/* FIXME: For now we just overwrite it but the compiler shouldn't be
2110
* returning the sh count since the driver is in charge of
2113
fragment_state->stage_state.const_shared_reg_count =
2114
sh_count[PVR_STAGE_ALLOCATION_FRAGMENT];
2118
result = pvr_gpu_upload_usc(
2120
util_dynarray_begin(&ctx->binary[MESA_SHADER_FRAGMENT]),
2121
ctx->binary[MESA_SHADER_FRAGMENT].size,
2123
&gfx_pipeline->shader_state.fragment.bo);
2124
if (result != VK_SUCCESS)
2125
goto err_free_vertex_bo;
2127
/* TODO: powervr has an optimization where it attempts to recompile
2128
* shaders. See PipelineCompileNoISPFeedbackFragmentStage. Unimplemented
2129
* since in our case the optimization doesn't happen.
2132
result = pvr_pds_coeff_program_create_and_upload(
2135
ctx->stage_data.fs.iterator_args.fpu_iterators,
2136
ctx->stage_data.fs.iterator_args.num_fpu_iterators,
2137
ctx->stage_data.fs.iterator_args.destination,
2138
&fragment_state->pds_coeff_program,
2139
&fragment_state->stage_state.pds_temps_count);
2140
if (result != VK_SUCCESS)
2141
goto err_free_fragment_bo;
2143
result = pvr_pds_fragment_program_create_and_upload(
2146
gfx_pipeline->shader_state.fragment.bo,
2147
ctx->common_data[MESA_SHADER_FRAGMENT].temps,
2148
ctx->stage_data.fs.msaa_mode,
2149
ctx->stage_data.fs.phas,
2150
&fragment_state->pds_fragment_program);
2151
if (result != VK_SUCCESS)
2152
goto err_free_coeff_program;
2154
/* FIXME: For now we pass in the same explicit_const_usage since it
2155
* contains all invalid entries. Fix this by hooking it up to the
2158
result = pvr_pds_descriptor_program_create_and_upload(
2161
&ctx->common_data[MESA_SHADER_FRAGMENT].compile_time_consts_data,
2162
&ctx->common_data[MESA_SHADER_FRAGMENT].ubo_data,
2163
&frag_explicit_const_usage,
2165
PVR_STAGE_ALLOCATION_FRAGMENT,
2167
&fragment_state->descriptor_state);
2168
if (result != VK_SUCCESS)
2169
goto err_free_frag_program;
2171
/* If not, we need to MAX2() and set
2172
* `fragment_state->stage_state.pds_temps_count` appropriately.
2174
assert(fragment_state->descriptor_state.pds_info.temps_required == 0);
1824
result = pvr_gpu_upload_usc(device,
1825
ctx->binary[MESA_SHADER_FRAGMENT].data,
1826
ctx->binary[MESA_SHADER_FRAGMENT].size,
1828
&gfx_pipeline->shader_state.fragment.bo);
1829
if (result != VK_SUCCESS)
1830
goto err_free_vertex_bo;
1832
/* TODO: powervr has an optimization where it attempts to recompile shaders.
1833
* See PipelineCompileNoISPFeedbackFragmentStage. Unimplemented since in our
1834
* case the optimization doesn't happen.
1837
result = pvr_pds_coeff_program_create_and_upload(
1840
ctx->stage_data.fs.iterator_args.fpu_iterators,
1841
ctx->stage_data.fs.iterator_args.num_fpu_iterators,
1842
ctx->stage_data.fs.iterator_args.destination,
1843
&gfx_pipeline->shader_state.fragment.pds_coeff_program);
1844
if (result != VK_SUCCESS)
1845
goto err_free_fragment_bo;
1847
result = pvr_pds_fragment_program_create_and_upload(
1850
gfx_pipeline->shader_state.fragment.bo,
1851
ctx->common_data[MESA_SHADER_FRAGMENT].temps,
1852
ctx->stage_data.fs.msaa_mode,
1853
ctx->stage_data.fs.phas,
1854
&gfx_pipeline->shader_state.fragment.pds_fragment_program);
1855
if (result != VK_SUCCESS)
1856
goto err_free_coeff_program;
1858
2177
result = pvr_pds_vertex_attrib_programs_create_and_upload(
1861
2180
vertex_input_state,
1862
2181
ctx->common_data[MESA_SHADER_VERTEX].temps,
1863
2182
&ctx->stage_data.vs,
2183
vtx_dma_descriptions,
2185
&special_vars_layout,
1864
2186
&gfx_pipeline->shader_state.vertex.pds_attrib_programs);
1865
2187
if (result != VK_SUCCESS)
1866
goto err_free_frag_program;
2188
goto err_free_frag_descriptor_program;
1868
2190
result = pvr_pds_descriptor_program_create_and_upload(