2
* Copyright © 2016 Red Hat.
3
* Copyright © 2016 Bas Nieuwenhuizen
5
* based in part on anv driver which is:
6
* Copyright © 2015 Intel Corporation
8
* Permission is hereby granted, free of charge, to any person obtaining a
9
* copy of this software and associated documentation files (the "Software"),
10
* to deal in the Software without restriction, including without limitation
11
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
12
* and/or sell copies of the Software, and to permit persons to whom the
13
* Software is furnished to do so, subject to the following conditions:
15
* The above copyright notice and this permission notice (including the next
16
* paragraph) shall be included in all copies or substantial portions of the
19
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28
#ifndef RADV_PRIVATE_H
29
#define RADV_PRIVATE_H
42
#define VG(x) ((void)0)
45
#include "c11/threads.h"
50
#include "compiler/shader_enums.h"
51
#include "util/bitscan.h"
52
#include "util/cnd_monotonic.h"
53
#include "util/list.h"
54
#include "util/macros.h"
55
#include "util/rwlock.h"
56
#include "util/xmlconfig.h"
58
#include "vk_command_buffer.h"
59
#include "vk_command_pool.h"
60
#include "vk_debug_report.h"
61
#include "vk_device.h"
62
#include "vk_format.h"
63
#include "vk_instance.h"
65
#include "vk_physical_device.h"
66
#include "vk_shader_module.h"
70
#include "vk_framebuffer.h"
72
#include "ac_binary.h"
73
#include "ac_gpu_info.h"
74
#include "ac_shader_util.h"
77
#include "ac_surface.h"
78
#include "radv_constants.h"
79
#include "radv_descriptor_set.h"
80
#include "radv_radeon_winsys.h"
81
#include "radv_shader.h"
82
#include "radv_shader_args.h"
85
/* Pre-declarations needed for WSI entrypoints */
88
typedef struct xcb_connection_t xcb_connection_t;
89
typedef uint32_t xcb_visualid_t;
90
typedef uint32_t xcb_window_t;
92
#include <vulkan/vk_android_native_buffer.h>
93
#include <vulkan/vk_icd.h>
94
#include <vulkan/vulkan.h>
95
#include <vulkan/vulkan_android.h>
97
#include "radv_entrypoints.h"
99
#include "wsi_common.h"
106
/* Helper to determine if we should compile
107
* any of the Android AHB support.
109
* To actually enable the ext we also need
110
* the necessary kernel support.
112
#if defined(ANDROID) && ANDROID_API_LEVEL >= 26
113
#define RADV_SUPPORT_ANDROID_HARDWARE_BUFFER 1
114
#include <vndk/hardware_buffer.h>
116
#define RADV_SUPPORT_ANDROID_HARDWARE_BUFFER 0
120
#define RADV_SUPPORT_CALIBRATED_TIMESTAMPS 0
122
#define RADV_SUPPORT_CALIBRATED_TIMESTAMPS 1
126
#define radv_printflike(a, b)
128
#define radv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
131
static inline uint32_t
132
align_u32(uint32_t v, uint32_t a)
134
assert(a != 0 && a == (a & -a));
135
return (v + a - 1) & ~(a - 1);
138
static inline uint32_t
139
align_u32_npot(uint32_t v, uint32_t a)
141
return (v + a - 1) / a * a;
144
static inline uint64_t
145
align_u64(uint64_t v, uint64_t a)
147
assert(a != 0 && a == (a & -a));
148
return (v + a - 1) & ~(a - 1);
151
static inline int32_t
152
align_i32(int32_t v, int32_t a)
154
assert(a != 0 && a == (a & -a));
155
return (v + a - 1) & ~(a - 1);
158
/** Alignment must be a power of 2. */
160
radv_is_aligned(uintmax_t n, uintmax_t a)
162
assert(a == (a & -a));
163
return (n & (a - 1)) == 0;
166
static inline uint32_t
167
round_up_u32(uint32_t v, uint32_t a)
169
return (v + a - 1) / a;
172
static inline uint64_t
173
round_up_u64(uint64_t v, uint64_t a)
175
return (v + a - 1) / a;
178
static inline uint32_t
179
radv_minify(uint32_t n, uint32_t levels)
181
if (unlikely(n == 0))
184
return MAX2(n >> levels, 1);
187
radv_clamp_f(float f, float min, float max)
200
radv_clear_mask(uint32_t *inout_mask, uint32_t clear_mask)
202
if (*inout_mask & clear_mask) {
203
*inout_mask &= ~clear_mask;
211
radv_float_to_sfixed(float value, unsigned frac_bits)
213
return value * (1 << frac_bits);
216
static inline unsigned int
217
radv_float_to_ufixed(float value, unsigned frac_bits)
219
return value * (1 << frac_bits);
222
/* Whenever we generate an error, pass it through this function. Useful for
223
* debugging, where we can break on it. Only call at error site, not when
224
* propagating errors. Might be useful to plug in a stack trace here.
227
struct radv_image_view;
228
struct radv_instance;
230
/* A non-fatal assert. Useful for debugging. */
232
#define radv_assert(x) \
236
#define radv_assert(x) \
238
if (unlikely(!(x))) \
239
fprintf(stderr, "%s:%d ASSERT: %s\n", __FILE__, __LINE__, #x); \
243
int radv_get_instance_entrypoint_index(const char *name);
244
int radv_get_device_entrypoint_index(const char *name);
245
int radv_get_physical_device_entrypoint_index(const char *name);
247
const char *radv_get_instance_entry_name(int index);
248
const char *radv_get_physical_device_entry_name(int index);
249
const char *radv_get_device_entry_name(int index);
252
enum radv_queue_family {
256
RADV_MAX_QUEUE_FAMILIES,
257
RADV_QUEUE_FOREIGN = RADV_MAX_QUEUE_FAMILIES,
261
struct radv_physical_device {
262
struct vk_physical_device vk;
264
/* Link in radv_instance::physical_devices */
265
struct list_head link;
267
struct radv_instance *instance;
269
struct radeon_winsys *ws;
270
struct radeon_info rad_info;
271
char name[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE];
272
uint8_t driver_uuid[VK_UUID_SIZE];
273
uint8_t device_uuid[VK_UUID_SIZE];
274
uint8_t cache_uuid[VK_UUID_SIZE];
278
struct wsi_device wsi_device;
280
bool out_of_order_rast_allowed;
282
/* Whether DCC should be enabled for MSAA textures. */
283
bool dcc_msaa_allowed;
285
/* Whether to enable NGG. */
288
/* Whether to enable NGG culling. */
289
bool use_ngg_culling;
291
/* Whether to enable NGG streamout. */
292
bool use_ngg_streamout;
294
/* Number of threads per wave. */
295
uint8_t ps_wave_size;
296
uint8_t cs_wave_size;
297
uint8_t ge_wave_size;
298
uint8_t rt_wave_size;
300
/* Whether to use the LLVM compiler backend */
303
/* Whether to emulate ETC2 image support on HW without support. */
306
/* This is the drivers on-disk cache used as a fallback as opposed to
307
* the pipeline cache defined by apps.
309
struct disk_cache *disk_cache;
311
VkPhysicalDeviceMemoryProperties memory_properties;
312
enum radeon_bo_domain memory_domains[VK_MAX_MEMORY_TYPES];
313
enum radeon_bo_flag memory_flags[VK_MAX_MEMORY_TYPES];
318
drmPciBusInfo bus_info;
324
nir_shader_compiler_options nir_options[MESA_VULKAN_SHADER_STAGES];
326
enum radv_queue_family vk_queue_to_radv[RADV_MAX_QUEUE_FAMILIES];
330
struct radv_instance {
331
struct vk_instance vk;
333
VkAllocationCallbacks alloc;
335
uint64_t debug_flags;
336
uint64_t perftest_flags;
338
bool physical_devices_enumerated;
339
struct list_head physical_devices;
341
struct driOptionCache dri_options;
342
struct driOptionCache available_dri_options;
345
* Workarounds for game bugs.
347
bool enable_mrt_output_nan_fixup;
348
bool disable_tc_compat_htile_in_general;
349
bool disable_shrink_image_store;
350
bool absolute_depth_bias;
351
bool report_apu_as_dgpu;
352
bool disable_htile_layers;
353
bool disable_aniso_single_level;
355
bool disable_sinking_load_input_fs;
358
VkResult radv_init_wsi(struct radv_physical_device *physical_device);
359
void radv_finish_wsi(struct radv_physical_device *physical_device);
363
struct radv_pipeline_cache {
364
struct vk_object_base base;
365
struct radv_device *device;
367
VkPipelineCacheCreateFlags flags;
371
uint32_t kernel_count;
372
struct cache_entry **hash_table;
375
VkAllocationCallbacks alloc;
378
struct radv_shader_binary;
380
struct radv_pipeline_shader_stack_size;
382
void radv_pipeline_cache_init(struct radv_pipeline_cache *cache, struct radv_device *device);
383
void radv_pipeline_cache_finish(struct radv_pipeline_cache *cache);
384
bool radv_pipeline_cache_load(struct radv_pipeline_cache *cache, const void *data, size_t size);
386
bool radv_create_shaders_from_pipeline_cache(
387
struct radv_device *device, struct radv_pipeline_cache *cache, const unsigned char *sha1,
388
struct radv_pipeline *pipeline, struct radv_pipeline_shader_stack_size **stack_sizes,
389
uint32_t *num_stack_sizes, bool *found_in_application_cache);
391
void radv_pipeline_cache_insert_shaders(
392
struct radv_device *device, struct radv_pipeline_cache *cache, const unsigned char *sha1,
393
struct radv_pipeline *pipeline, struct radv_shader_binary *const *binaries,
394
const struct radv_pipeline_shader_stack_size *stack_sizes, uint32_t num_stack_sizes);
396
VkResult radv_upload_shaders(struct radv_device *device, struct radv_pipeline *pipeline,
397
struct radv_shader_binary **binaries,
398
struct radv_shader_binary *gs_copy_binary);
400
enum radv_blit_ds_layout {
401
RADV_BLIT_DS_LAYOUT_TILE_ENABLE,
402
RADV_BLIT_DS_LAYOUT_TILE_DISABLE,
403
RADV_BLIT_DS_LAYOUT_COUNT,
406
static inline enum radv_blit_ds_layout
407
radv_meta_blit_ds_to_type(VkImageLayout layout)
409
return (layout == VK_IMAGE_LAYOUT_GENERAL) ? RADV_BLIT_DS_LAYOUT_TILE_DISABLE
410
: RADV_BLIT_DS_LAYOUT_TILE_ENABLE;
413
static inline VkImageLayout
414
radv_meta_blit_ds_to_layout(enum radv_blit_ds_layout ds_layout)
416
return ds_layout == RADV_BLIT_DS_LAYOUT_TILE_ENABLE ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
417
: VK_IMAGE_LAYOUT_GENERAL;
420
enum radv_meta_dst_layout {
421
RADV_META_DST_LAYOUT_GENERAL,
422
RADV_META_DST_LAYOUT_OPTIMAL,
423
RADV_META_DST_LAYOUT_COUNT,
426
static inline enum radv_meta_dst_layout
427
radv_meta_dst_layout_from_layout(VkImageLayout layout)
429
return (layout == VK_IMAGE_LAYOUT_GENERAL) ? RADV_META_DST_LAYOUT_GENERAL
430
: RADV_META_DST_LAYOUT_OPTIMAL;
433
static inline VkImageLayout
434
radv_meta_dst_layout_to_layout(enum radv_meta_dst_layout layout)
436
return layout == RADV_META_DST_LAYOUT_OPTIMAL ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
437
: VK_IMAGE_LAYOUT_GENERAL;
440
struct radv_meta_state {
441
VkAllocationCallbacks alloc;
443
struct radv_pipeline_cache cache;
446
* For on-demand pipeline creation, makes sure that
447
* only one thread tries to build a pipeline at the same time.
452
* Use array element `i` for images with `2^i` samples.
455
VkPipeline color_pipelines[NUM_META_FS_KEYS];
456
} color_clear[MAX_SAMPLES_LOG2][MAX_RTS];
459
VkPipeline depth_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
460
VkPipeline stencil_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
461
VkPipeline depthstencil_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
463
VkPipeline depth_only_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
464
VkPipeline stencil_only_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
465
VkPipeline depthstencil_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
466
} ds_clear[MAX_SAMPLES_LOG2];
468
VkPipelineLayout clear_color_p_layout;
469
VkPipelineLayout clear_depth_p_layout;
470
VkPipelineLayout clear_depth_unrestricted_p_layout;
472
/* Optimized compute fast HTILE clear for stencil or depth only. */
473
VkPipeline clear_htile_mask_pipeline;
474
VkPipelineLayout clear_htile_mask_p_layout;
475
VkDescriptorSetLayout clear_htile_mask_ds_layout;
477
/* Copy VRS into HTILE. */
478
VkPipeline copy_vrs_htile_pipeline;
479
VkPipelineLayout copy_vrs_htile_p_layout;
480
VkDescriptorSetLayout copy_vrs_htile_ds_layout;
482
/* Clear DCC with comp-to-single. */
483
VkPipeline clear_dcc_comp_to_single_pipeline[2]; /* 0: 1x, 1: 2x/4x/8x */
484
VkPipelineLayout clear_dcc_comp_to_single_p_layout;
485
VkDescriptorSetLayout clear_dcc_comp_to_single_ds_layout;
488
/** Pipeline that blits from a 1D image. */
489
VkPipeline pipeline_1d_src[NUM_META_FS_KEYS];
491
/** Pipeline that blits from a 2D image. */
492
VkPipeline pipeline_2d_src[NUM_META_FS_KEYS];
494
/** Pipeline that blits from a 3D image. */
495
VkPipeline pipeline_3d_src[NUM_META_FS_KEYS];
497
VkPipeline depth_only_1d_pipeline;
498
VkPipeline depth_only_2d_pipeline;
499
VkPipeline depth_only_3d_pipeline;
501
VkPipeline stencil_only_1d_pipeline;
502
VkPipeline stencil_only_2d_pipeline;
503
VkPipeline stencil_only_3d_pipeline;
504
VkPipelineLayout pipeline_layout;
505
VkDescriptorSetLayout ds_layout;
509
VkPipelineLayout p_layouts[5];
510
VkDescriptorSetLayout ds_layouts[5];
511
VkPipeline pipelines[5][NUM_META_FS_KEYS];
513
VkPipeline depth_only_pipeline[5];
515
VkPipeline stencil_only_pipeline[5];
516
} blit2d[MAX_SAMPLES_LOG2];
519
VkPipelineLayout img_p_layout;
520
VkDescriptorSetLayout img_ds_layout;
522
VkPipeline pipeline_3d;
525
VkPipelineLayout img_p_layout;
526
VkDescriptorSetLayout img_ds_layout;
528
VkPipeline pipeline_3d;
531
VkPipelineLayout img_p_layout;
532
VkDescriptorSetLayout img_ds_layout;
536
VkPipelineLayout img_p_layout;
537
VkDescriptorSetLayout img_ds_layout;
538
VkPipeline pipeline[MAX_SAMPLES_LOG2];
539
VkPipeline pipeline_3d;
542
VkPipelineLayout img_p_layout;
543
VkDescriptorSetLayout img_ds_layout;
547
VkPipelineLayout img_p_layout;
548
VkDescriptorSetLayout img_ds_layout;
549
VkPipeline pipeline[MAX_SAMPLES_LOG2];
550
VkPipeline pipeline_3d;
553
VkPipelineLayout img_p_layout;
554
VkDescriptorSetLayout img_ds_layout;
558
VkPipelineLayout p_layout;
559
VkDescriptorSetLayout ds_layout;
560
VkPipeline pipeline[MAX_SAMPLES_LOG2];
564
VkPipelineLayout p_layout;
565
VkPipeline pipeline[NUM_META_FS_KEYS];
569
VkDescriptorSetLayout ds_layout;
570
VkPipelineLayout p_layout;
573
VkPipeline i_pipeline;
574
VkPipeline srgb_pipeline;
575
} rc[MAX_SAMPLES_LOG2];
577
VkPipeline depth_zero_pipeline;
579
VkPipeline average_pipeline;
580
VkPipeline max_pipeline;
581
VkPipeline min_pipeline;
582
} depth[MAX_SAMPLES_LOG2];
584
VkPipeline stencil_zero_pipeline;
586
VkPipeline max_pipeline;
587
VkPipeline min_pipeline;
588
} stencil[MAX_SAMPLES_LOG2];
592
VkDescriptorSetLayout ds_layout;
593
VkPipelineLayout p_layout;
596
VkPipeline pipeline[NUM_META_FS_KEYS];
597
} rc[MAX_SAMPLES_LOG2];
599
VkPipeline depth_zero_pipeline;
601
VkPipeline average_pipeline;
602
VkPipeline max_pipeline;
603
VkPipeline min_pipeline;
604
} depth[MAX_SAMPLES_LOG2];
606
VkPipeline stencil_zero_pipeline;
608
VkPipeline max_pipeline;
609
VkPipeline min_pipeline;
610
} stencil[MAX_SAMPLES_LOG2];
614
VkPipelineLayout p_layout;
615
VkPipeline decompress_pipeline;
616
VkPipeline resummarize_pipeline;
617
} depth_decomp[MAX_SAMPLES_LOG2];
619
VkDescriptorSetLayout expand_depth_stencil_compute_ds_layout;
620
VkPipelineLayout expand_depth_stencil_compute_p_layout;
621
VkPipeline expand_depth_stencil_compute_pipeline;
624
VkPipelineLayout p_layout;
625
VkPipeline cmask_eliminate_pipeline;
626
VkPipeline fmask_decompress_pipeline;
627
VkPipeline dcc_decompress_pipeline;
629
VkDescriptorSetLayout dcc_decompress_compute_ds_layout;
630
VkPipelineLayout dcc_decompress_compute_p_layout;
631
VkPipeline dcc_decompress_compute_pipeline;
635
VkPipelineLayout fill_p_layout;
636
VkPipelineLayout copy_p_layout;
637
VkDescriptorSetLayout fill_ds_layout;
638
VkDescriptorSetLayout copy_ds_layout;
639
VkPipeline fill_pipeline;
640
VkPipeline copy_pipeline;
644
VkDescriptorSetLayout ds_layout;
645
VkPipelineLayout p_layout;
646
VkPipeline occlusion_query_pipeline;
647
VkPipeline pipeline_statistics_query_pipeline;
648
VkPipeline tfb_query_pipeline;
649
VkPipeline timestamp_query_pipeline;
653
VkDescriptorSetLayout ds_layout;
654
VkPipelineLayout p_layout;
655
VkPipeline pipeline[MAX_SAMPLES_LOG2];
659
VkDescriptorSetLayout ds_layout;
660
VkPipelineLayout p_layout;
661
VkPipeline pipeline[32];
665
VkPipelineLayout leaf_p_layout;
666
VkPipeline leaf_pipeline;
667
VkPipelineLayout internal_p_layout;
668
VkPipeline internal_pipeline;
669
VkPipelineLayout copy_p_layout;
670
VkPipeline copy_pipeline;
671
} accel_struct_build;
674
VkDescriptorSetLayout ds_layout;
675
VkPipelineLayout p_layout;
680
#define RADV_NUM_HW_CTX (RADEON_CTX_PRIORITY_REALTIME + 1)
682
struct radv_deferred_queue_submission;
684
static inline enum radv_queue_family
685
vk_queue_to_radv(const struct radv_physical_device *phys_dev, int queue_family_index)
687
if (queue_family_index == VK_QUEUE_FAMILY_EXTERNAL ||
688
queue_family_index == VK_QUEUE_FAMILY_FOREIGN_EXT)
689
return RADV_QUEUE_FOREIGN;
690
if (queue_family_index == VK_QUEUE_FAMILY_IGNORED)
691
return RADV_QUEUE_IGNORED;
693
assert(queue_family_index < RADV_MAX_QUEUE_FAMILIES);
694
return phys_dev->vk_queue_to_radv[queue_family_index];
697
enum ring_type radv_queue_family_to_ring(struct radv_physical_device *physical_device,
698
enum radv_queue_family f);
702
struct radv_device *device;
703
struct radeon_winsys_ctx *hw_ctx;
704
enum radeon_ctx_priority priority;
706
enum radv_queue_family qf;
707
uint32_t scratch_size_per_wave;
708
uint32_t scratch_waves;
709
uint32_t compute_scratch_size_per_wave;
710
uint32_t compute_scratch_waves;
711
uint32_t esgs_ring_size;
712
uint32_t gsvs_ring_size;
716
bool has_sample_positions;
718
struct radeon_winsys_bo *scratch_bo;
719
struct radeon_winsys_bo *descriptor_bo;
720
struct radeon_winsys_bo *compute_scratch_bo;
721
struct radeon_winsys_bo *esgs_ring_bo;
722
struct radeon_winsys_bo *gsvs_ring_bo;
723
struct radeon_winsys_bo *tess_rings_bo;
724
struct radeon_winsys_bo *gds_bo;
725
struct radeon_winsys_bo *gds_oa_bo;
726
struct radeon_cmdbuf *initial_preamble_cs;
727
struct radeon_cmdbuf *initial_full_flush_preamble_cs;
728
struct radeon_cmdbuf *continue_preamble_cs;
731
#define RADV_BORDER_COLOR_COUNT 4096
732
#define RADV_BORDER_COLOR_BUFFER_SIZE (sizeof(VkClearColorValue) * RADV_BORDER_COLOR_COUNT)
734
struct radv_device_border_color_data {
735
bool used[RADV_BORDER_COLOR_COUNT];
737
struct radeon_winsys_bo *bo;
738
VkClearColorValue *colors_gpu_ptr;
740
/* Mutex is required to guarantee vkCreateSampler thread safety
741
* given that we are writing to a buffer and checking color occupation */
745
enum radv_force_vrs {
746
RADV_FORCE_VRS_1x1 = 0,
752
struct radv_notifier {
762
struct radv_instance *instance;
763
struct radeon_winsys *ws;
765
struct radeon_winsys_ctx *hw_ctx[RADV_NUM_HW_CTX];
766
struct radv_meta_state meta_state;
768
struct radv_queue *queues[RADV_MAX_QUEUE_FAMILIES];
769
int queue_count[RADV_MAX_QUEUE_FAMILIES];
772
uint32_t tess_offchip_block_dw_size;
773
uint32_t scratch_waves;
774
uint32_t dispatch_initiator;
776
uint32_t gs_table_depth;
778
/* MSAA sample locations.
779
* The first index is the sample index.
780
* The second index is the coordinate: X, Y. */
781
float sample_locations_1x[1][2];
782
float sample_locations_2x[2][2];
783
float sample_locations_4x[4][2];
784
float sample_locations_8x[8][2];
787
uint32_t gfx_init_size_dw;
788
struct radeon_winsys_bo *gfx_init;
790
struct radeon_winsys_bo *trace_bo;
791
uint32_t *trace_id_ptr;
793
/* Whether to keep shader debug info, for debugging. */
794
bool keep_shader_info;
796
struct radv_physical_device *physical_device;
798
/* Backup in-memory cache to be used if the app doesn't provide one */
799
struct radv_pipeline_cache *mem_cache;
802
* use different counters so MSAA MRTs get consecutive surface indices,
803
* even if MASK is allocated in between.
805
uint32_t image_mrt_offset_counter;
806
uint32_t fmask_mrt_offset_counter;
808
struct list_head shader_arenas;
809
unsigned shader_arena_shift;
810
uint8_t shader_free_list_mask;
811
struct list_head shader_free_lists[RADV_SHADER_ALLOC_NUM_FREE_LISTS];
812
struct list_head shader_block_obj_pool;
813
mtx_t shader_arena_mutex;
815
/* For detecting VM faults reported by dmesg. */
816
uint64_t dmesg_timestamp;
818
/* Whether the app has enabled the robustBufferAccess/robustBufferAccess2 features. */
819
bool robust_buffer_access;
820
bool robust_buffer_access2;
822
/* Whether gl_FragCoord.z should be adjusted for VRS due to a hw bug
823
* on some GFX10.3 chips.
825
bool adjust_frag_coord_z;
827
/* Whether to inline the compute dispatch size in user sgprs. */
828
bool load_grid_size_from_user_sgpr;
830
/* Whether the driver uses a global BO list. */
831
bool use_global_bo_list;
833
/* Whether attachment VRS is enabled. */
834
bool attachment_vrs_enabled;
836
/* Whether shader image 32-bit float atomics are enabled. */
837
bool image_float32_atomics;
839
/* Whether anisotropy is forced with RADV_TEX_ANISO (-1 is disabled). */
842
struct radv_device_border_color_data border_color_data;
845
struct ac_thread_trace_data thread_trace;
848
struct ac_spm_trace_data spm_trace;
850
/* Performance counters. */
851
struct ac_perfcounters perfcounters;
854
struct radv_trap_handler_shader *trap_handler_shader;
855
struct radeon_winsys_bo *tma_bo; /* Trap Memory Address */
858
/* Overallocation. */
859
bool overallocation_disallowed;
860
uint64_t allocated_memory_size[VK_MAX_MEMORY_HEAPS];
861
mtx_t overallocation_mutex;
863
/* RADV_FORCE_VRS. */
864
struct radv_notifier notifier;
865
enum radv_force_vrs force_vrs;
867
/* Depth image for VRS when not bound by the app. */
869
struct radv_image *image;
870
struct radv_buffer *buffer; /* HTILE */
871
struct radv_device_memory *mem;
874
struct u_rwlock vs_prologs_lock;
875
struct hash_table *vs_prologs;
877
/* Prime blit sdma queue */
878
struct radv_queue *private_sdma_queue;
880
struct radv_shader_prolog *simple_vs_prologs[MAX_VERTEX_ATTRIBS];
881
struct radv_shader_prolog *instance_rate_vs_prologs[816];
883
simple_mtx_t trace_mtx;
885
/* Whether per-vertex VRS is forced. */
886
bool force_vrs_enabled;
889
struct radv_device_memory {
890
struct vk_object_base base;
891
struct radeon_winsys_bo *bo;
892
/* for dedicated allocations */
893
struct radv_image *image;
894
struct radv_buffer *buffer;
900
#if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
901
struct AHardwareBuffer *android_hardware_buffer;
905
void radv_device_memory_init(struct radv_device_memory *mem, struct radv_device *device,
906
struct radeon_winsys_bo *bo);
907
void radv_device_memory_finish(struct radv_device_memory *mem);
909
struct radv_descriptor_range {
914
struct radv_descriptor_set_header {
915
struct vk_object_base base;
916
struct radv_descriptor_set_layout *layout;
918
uint32_t buffer_count;
920
struct radeon_winsys_bo *bo;
922
uint32_t *mapped_ptr;
923
struct radv_descriptor_range *dynamic_descriptors;
926
struct radv_descriptor_set {
927
struct radv_descriptor_set_header header;
929
struct radeon_winsys_bo *descriptors[];
932
struct radv_push_descriptor_set {
933
struct radv_descriptor_set_header set;
937
struct radv_descriptor_pool_entry {
940
struct radv_descriptor_set *set;
943
struct radv_descriptor_pool {
944
struct vk_object_base base;
945
struct radeon_winsys_bo *bo;
948
uint64_t current_offset;
951
uint8_t *host_memory_base;
952
uint8_t *host_memory_ptr;
953
uint8_t *host_memory_end;
955
uint32_t entry_count;
956
uint32_t max_entry_count;
957
struct radv_descriptor_pool_entry entries[0];
960
struct radv_descriptor_update_template_entry {
961
VkDescriptorType descriptor_type;
963
/* The number of descriptors to update */
964
uint32_t descriptor_count;
966
/* Into mapped_ptr or dynamic_descriptors, in units of the respective array */
969
/* In dwords. Not valid/used for dynamic descriptors */
972
uint32_t buffer_offset;
974
/* Only valid for combined image samplers and samplers */
976
uint8_t sampler_offset;
982
/* For push descriptors */
983
const uint32_t *immutable_samplers;
986
struct radv_descriptor_update_template {
987
struct vk_object_base base;
988
uint32_t entry_count;
989
VkPipelineBindPoint bind_point;
990
struct radv_descriptor_update_template_entry entry[0];
993
void radv_descriptor_set_layout_destroy(struct radv_device *device,
994
struct radv_descriptor_set_layout *set_layout);
997
radv_descriptor_set_layout_ref(struct radv_descriptor_set_layout *set_layout)
999
assert(set_layout && set_layout->ref_cnt >= 1);
1000
p_atomic_inc(&set_layout->ref_cnt);
1004
radv_descriptor_set_layout_unref(struct radv_device *device,
1005
struct radv_descriptor_set_layout *set_layout)
1007
assert(set_layout && set_layout->ref_cnt >= 1);
1008
if (p_atomic_dec_zero(&set_layout->ref_cnt))
1009
radv_descriptor_set_layout_destroy(device, set_layout);
1012
struct radv_buffer {
1013
struct vk_object_base base;
1016
VkBufferUsageFlags usage;
1017
VkBufferCreateFlags flags;
1019
/* Set when bound */
1020
struct radeon_winsys_bo *bo;
1021
VkDeviceSize offset;
1024
void radv_buffer_init(struct radv_buffer *buffer, struct radv_device *device,
1025
struct radeon_winsys_bo *bo, uint64_t size, uint64_t offset);
1026
void radv_buffer_finish(struct radv_buffer *buffer);
1028
enum radv_dynamic_state_bits {
1029
RADV_DYNAMIC_VIEWPORT = 1ull << 0,
1030
RADV_DYNAMIC_SCISSOR = 1ull << 1,
1031
RADV_DYNAMIC_LINE_WIDTH = 1ull << 2,
1032
RADV_DYNAMIC_DEPTH_BIAS = 1ull << 3,
1033
RADV_DYNAMIC_BLEND_CONSTANTS = 1ull << 4,
1034
RADV_DYNAMIC_DEPTH_BOUNDS = 1ull << 5,
1035
RADV_DYNAMIC_STENCIL_COMPARE_MASK = 1ull << 6,
1036
RADV_DYNAMIC_STENCIL_WRITE_MASK = 1ull << 7,
1037
RADV_DYNAMIC_STENCIL_REFERENCE = 1ull << 8,
1038
RADV_DYNAMIC_DISCARD_RECTANGLE = 1ull << 9,
1039
RADV_DYNAMIC_SAMPLE_LOCATIONS = 1ull << 10,
1040
RADV_DYNAMIC_LINE_STIPPLE = 1ull << 11,
1041
RADV_DYNAMIC_CULL_MODE = 1ull << 12,
1042
RADV_DYNAMIC_FRONT_FACE = 1ull << 13,
1043
RADV_DYNAMIC_PRIMITIVE_TOPOLOGY = 1ull << 14,
1044
RADV_DYNAMIC_DEPTH_TEST_ENABLE = 1ull << 15,
1045
RADV_DYNAMIC_DEPTH_WRITE_ENABLE = 1ull << 16,
1046
RADV_DYNAMIC_DEPTH_COMPARE_OP = 1ull << 17,
1047
RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE = 1ull << 18,
1048
RADV_DYNAMIC_STENCIL_TEST_ENABLE = 1ull << 19,
1049
RADV_DYNAMIC_STENCIL_OP = 1ull << 20,
1050
RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE = 1ull << 21,
1051
RADV_DYNAMIC_FRAGMENT_SHADING_RATE = 1ull << 22,
1052
RADV_DYNAMIC_PATCH_CONTROL_POINTS = 1ull << 23,
1053
RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE = 1ull << 24,
1054
RADV_DYNAMIC_DEPTH_BIAS_ENABLE = 1ull << 25,
1055
RADV_DYNAMIC_LOGIC_OP = 1ull << 26,
1056
RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE = 1ull << 27,
1057
RADV_DYNAMIC_COLOR_WRITE_ENABLE = 1ull << 28,
1058
RADV_DYNAMIC_VERTEX_INPUT = 1ull << 29,
1059
RADV_DYNAMIC_ALL = (1ull << 30) - 1,
1062
enum radv_cmd_dirty_bits {
1063
/* Keep the dynamic state dirty bits in sync with
1064
* enum radv_dynamic_state_bits */
1065
RADV_CMD_DIRTY_DYNAMIC_VIEWPORT = 1ull << 0,
1066
RADV_CMD_DIRTY_DYNAMIC_SCISSOR = 1ull << 1,
1067
RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH = 1ull << 2,
1068
RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS = 1ull << 3,
1069
RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS = 1ull << 4,
1070
RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS = 1ull << 5,
1071
RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK = 1ull << 6,
1072
RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1ull << 7,
1073
RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1ull << 8,
1074
RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE = 1ull << 9,
1075
RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS = 1ull << 10,
1076
RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE = 1ull << 11,
1077
RADV_CMD_DIRTY_DYNAMIC_CULL_MODE = 1ull << 12,
1078
RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE = 1ull << 13,
1079
RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY = 1ull << 14,
1080
RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE = 1ull << 15,
1081
RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE = 1ull << 16,
1082
RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP = 1ull << 17,
1083
RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE = 1ull << 18,
1084
RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE = 1ull << 19,
1085
RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP = 1ull << 20,
1086
RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE = 1ull << 21,
1087
RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE = 1ull << 22,
1088
RADV_CMD_DIRTY_DYNAMIC_PATCH_CONTROL_POINTS = 1ull << 23,
1089
RADV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE = 1ull << 24,
1090
RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE = 1ull << 25,
1091
RADV_CMD_DIRTY_DYNAMIC_LOGIC_OP = 1ull << 26,
1092
RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE = 1ull << 27,
1093
RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_ENABLE = 1ull << 28,
1094
RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT = 1ull << 29,
1095
RADV_CMD_DIRTY_DYNAMIC_ALL = (1ull << 30) - 1,
1096
RADV_CMD_DIRTY_PIPELINE = 1ull << 30,
1097
RADV_CMD_DIRTY_INDEX_BUFFER = 1ull << 31,
1098
RADV_CMD_DIRTY_FRAMEBUFFER = 1ull << 32,
1099
RADV_CMD_DIRTY_VERTEX_BUFFER = 1ull << 33,
1100
RADV_CMD_DIRTY_STREAMOUT_BUFFER = 1ull << 34,
1103
enum radv_cmd_flush_bits {
1104
/* Instruction cache. */
1105
RADV_CMD_FLAG_INV_ICACHE = 1 << 0,
1106
/* Scalar L1 cache. */
1107
RADV_CMD_FLAG_INV_SCACHE = 1 << 1,
1108
/* Vector L1 cache. */
1109
RADV_CMD_FLAG_INV_VCACHE = 1 << 2,
1110
/* L2 cache + L2 metadata cache writeback & invalidate.
1111
* GFX6-8: Used by shaders only. GFX9-10: Used by everything. */
1112
RADV_CMD_FLAG_INV_L2 = 1 << 3,
1113
/* L2 writeback (write dirty L2 lines to memory for non-L2 clients).
1114
* Only used for coherency with non-L2 clients like CB, DB, CP on GFX6-8.
1115
* GFX6-7 will do complete invalidation, because the writeback is unsupported. */
1116
RADV_CMD_FLAG_WB_L2 = 1 << 4,
1117
/* Invalidate the metadata cache. To be used when the DCC/HTILE metadata
1118
* changed and we want to read an image from shaders. */
1119
RADV_CMD_FLAG_INV_L2_METADATA = 1 << 5,
1120
/* Framebuffer caches */
1121
RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 6,
1122
RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 7,
1123
RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 8,
1124
RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 9,
1125
/* Engine synchronization. */
1126
RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 10,
1127
RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 11,
1128
RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 12,
1129
RADV_CMD_FLAG_VGT_FLUSH = 1 << 13,
1130
/* Pipeline query controls. */
1131
RADV_CMD_FLAG_START_PIPELINE_STATS = 1 << 14,
1132
RADV_CMD_FLAG_STOP_PIPELINE_STATS = 1 << 15,
1133
RADV_CMD_FLAG_VGT_STREAMOUT_SYNC = 1 << 16,
1135
RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER =
1136
(RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
1137
RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META)
1140
struct radv_vertex_binding {
1141
struct radv_buffer *buffer;
1142
VkDeviceSize offset;
1144
VkDeviceSize stride;
1147
struct radv_streamout_binding {
1148
struct radv_buffer *buffer;
1149
VkDeviceSize offset;
1153
struct radv_streamout_state {
1154
/* Mask of bound streamout buffers. */
1155
uint8_t enabled_mask;
1157
/* State of VGT_STRMOUT_BUFFER_(CONFIG|END) */
1158
uint32_t hw_enabled_mask;
1160
/* State of VGT_STRMOUT_(CONFIG|EN) */
1161
bool streamout_enabled;
1164
struct radv_viewport_state {
1166
VkViewport viewports[MAX_VIEWPORTS];
1170
} xform[MAX_VIEWPORTS];
1173
struct radv_scissor_state {
1175
VkRect2D scissors[MAX_SCISSORS];
1178
struct radv_discard_rectangle_state {
1180
VkRect2D rectangles[MAX_DISCARD_RECTANGLES];
1183
struct radv_sample_locations_state {
1184
VkSampleCountFlagBits per_pixel;
1185
VkExtent2D grid_size;
1187
VkSampleLocationEXT locations[MAX_SAMPLE_LOCATIONS];
1190
struct radv_dynamic_state {
1192
* Bitmask of (1ull << VK_DYNAMIC_STATE_*).
1193
* Defines the set of saved dynamic state.
1197
struct radv_viewport_state viewport;
1199
struct radv_scissor_state scissor;
1209
float blend_constants[4];
1219
} stencil_compare_mask;
1224
} stencil_write_mask;
1228
VkStencilOp fail_op;
1229
VkStencilOp pass_op;
1230
VkStencilOp depth_fail_op;
1231
VkCompareOp compare_op;
1235
VkStencilOp fail_op;
1236
VkStencilOp pass_op;
1237
VkStencilOp depth_fail_op;
1238
VkCompareOp compare_op;
1245
} stencil_reference;
1247
struct radv_discard_rectangle_state discard_rectangle;
1249
struct radv_sample_locations_state sample_location;
1256
VkCullModeFlags cull_mode;
1257
VkFrontFace front_face;
1258
unsigned primitive_topology;
1260
bool depth_test_enable;
1261
bool depth_write_enable;
1262
VkCompareOp depth_compare_op;
1263
bool depth_bounds_test_enable;
1264
bool stencil_test_enable;
1268
VkFragmentShadingRateCombinerOpKHR combiner_ops[2];
1269
} fragment_shading_rate;
1271
bool depth_bias_enable;
1272
bool primitive_restart_enable;
1273
bool rasterizer_discard_enable;
1277
uint32_t color_write_enable;
1280
extern const struct radv_dynamic_state default_dynamic_state;
1282
const char *radv_get_debug_option_name(int id);
1284
const char *radv_get_perftest_option_name(int id);
1286
int radv_get_int_debug_option(const char *name, int default_value);
1288
struct radv_color_buffer_info {
1289
uint64_t cb_color_base;
1290
uint64_t cb_color_cmask;
1291
uint64_t cb_color_fmask;
1292
uint64_t cb_dcc_base;
1293
uint32_t cb_color_slice;
1294
uint32_t cb_color_view;
1295
uint32_t cb_color_info;
1296
uint32_t cb_color_attrib;
1297
uint32_t cb_color_attrib2; /* GFX9 and later */
1298
uint32_t cb_color_attrib3; /* GFX10 and later */
1299
uint32_t cb_dcc_control;
1300
uint32_t cb_color_cmask_slice;
1301
uint32_t cb_color_fmask_slice;
1303
uint32_t cb_color_pitch; // GFX6-GFX8
1304
uint32_t cb_mrt_epitch; // GFX9+
1308
struct radv_ds_buffer_info {
1309
uint64_t db_z_read_base;
1310
uint64_t db_stencil_read_base;
1311
uint64_t db_z_write_base;
1312
uint64_t db_stencil_write_base;
1313
uint64_t db_htile_data_base;
1314
uint32_t db_depth_info;
1316
uint32_t db_stencil_info;
1317
uint32_t db_depth_view;
1318
uint32_t db_depth_size;
1319
uint32_t db_depth_slice;
1320
uint32_t db_htile_surface;
1321
uint32_t pa_su_poly_offset_db_fmt_cntl;
1322
uint32_t db_z_info2; /* GFX9 only */
1323
uint32_t db_stencil_info2; /* GFX9 only */
1326
void radv_initialise_color_surface(struct radv_device *device, struct radv_color_buffer_info *cb,
1327
struct radv_image_view *iview);
1328
void radv_initialise_ds_surface(struct radv_device *device, struct radv_ds_buffer_info *ds,
1329
struct radv_image_view *iview);
1330
void radv_initialise_vrs_surface(struct radv_image *image, struct radv_buffer *htile_buffer,
1331
struct radv_ds_buffer_info *ds);
1334
* Attachment state when recording a renderpass instance.
1336
* The clear value is valid only if there exists a pending clear.
1338
struct radv_attachment_state {
1339
VkImageAspectFlags pending_clear_aspects;
1340
uint32_t cleared_views;
1341
VkClearValue clear_value;
1342
VkImageLayout current_layout;
1343
VkImageLayout current_stencil_layout;
1344
bool current_in_render_loop;
1345
struct radv_sample_locations_state sample_location;
1348
struct radv_color_buffer_info cb;
1349
struct radv_ds_buffer_info ds;
1351
struct radv_image_view *iview;
1354
struct radv_descriptor_state {
1355
struct radv_descriptor_set *sets[MAX_SETS];
1358
struct radv_push_descriptor_set push_set;
1360
uint32_t dynamic_buffers[4 * MAX_DYNAMIC_BUFFERS];
1363
struct radv_subpass_sample_locs_state {
1364
uint32_t subpass_idx;
1365
struct radv_sample_locations_state sample_location;
1368
enum rgp_flush_bits {
1369
RGP_FLUSH_WAIT_ON_EOP_TS = 0x1,
1370
RGP_FLUSH_VS_PARTIAL_FLUSH = 0x2,
1371
RGP_FLUSH_PS_PARTIAL_FLUSH = 0x4,
1372
RGP_FLUSH_CS_PARTIAL_FLUSH = 0x8,
1373
RGP_FLUSH_PFP_SYNC_ME = 0x10,
1374
RGP_FLUSH_SYNC_CP_DMA = 0x20,
1375
RGP_FLUSH_INVAL_VMEM_L0 = 0x40,
1376
RGP_FLUSH_INVAL_ICACHE = 0x80,
1377
RGP_FLUSH_INVAL_SMEM_L0 = 0x100,
1378
RGP_FLUSH_FLUSH_L2 = 0x200,
1379
RGP_FLUSH_INVAL_L2 = 0x400,
1380
RGP_FLUSH_FLUSH_CB = 0x800,
1381
RGP_FLUSH_INVAL_CB = 0x1000,
1382
RGP_FLUSH_FLUSH_DB = 0x2000,
1383
RGP_FLUSH_INVAL_DB = 0x4000,
1384
RGP_FLUSH_INVAL_L1 = 0x8000,
1387
struct radv_cmd_state {
1388
/* Vertex descriptors */
1394
uint32_t prefetch_L2_mask;
1396
struct radv_pipeline *pipeline;
1397
struct radv_pipeline *emitted_pipeline;
1398
struct radv_pipeline *compute_pipeline;
1399
struct radv_pipeline *emitted_compute_pipeline;
1400
struct radv_pipeline *rt_pipeline; /* emitted = emitted_compute_pipeline */
1401
struct vk_framebuffer *framebuffer;
1402
struct radv_render_pass *pass;
1403
const struct radv_subpass *subpass;
1404
struct radv_dynamic_state dynamic;
1405
struct radv_vs_input_state dynamic_vs_input;
1406
struct radv_attachment_state *attachments;
1407
struct radv_streamout_state streamout;
1408
VkRect2D render_area;
1410
uint32_t num_subpass_sample_locs;
1411
struct radv_subpass_sample_locs_state *subpass_sample_locs;
1414
struct radv_buffer *index_buffer;
1415
uint64_t index_offset;
1416
uint32_t index_type;
1417
uint32_t max_index_count;
1419
int32_t last_index_type;
1421
int32_t last_primitive_reset_en;
1422
uint32_t last_primitive_reset_index;
1423
enum radv_cmd_flush_bits flush_bits;
1424
unsigned active_occlusion_queries;
1425
bool perfect_occlusion_queries_enabled;
1426
unsigned active_pipeline_queries;
1427
unsigned active_pipeline_gds_queries;
1429
uint32_t last_ia_multi_vgt_param;
1431
uint32_t last_num_instances;
1432
uint32_t last_first_instance;
1433
uint32_t last_vertex_offset;
1434
uint32_t last_drawid;
1436
uint32_t last_sx_ps_downconvert;
1437
uint32_t last_sx_blend_opt_epsilon;
1438
uint32_t last_sx_blend_opt_control;
1440
/* Whether CP DMA is busy/idle. */
1443
/* Whether any images that are not L2 coherent are dirty from the CB. */
1444
bool rb_noncoherent_dirty;
1446
/* Conditional rendering info. */
1447
uint8_t predication_op; /* 32-bit or 64-bit predicate value */
1448
int predication_type; /* -1: disabled, 0: normal, 1: inverted */
1449
uint64_t predication_va;
1451
/* Inheritance info. */
1452
VkQueryPipelineStatisticFlags inherited_pipeline_statistics;
1454
bool context_roll_without_scissor_emitted;
1456
/* SQTT related state. */
1457
uint32_t current_event_type;
1458
uint32_t num_events;
1459
uint32_t num_layout_transitions;
1460
bool pending_sqtt_barrier_end;
1461
enum rgp_flush_bits sqtt_flush_bits;
1463
/* NGG culling state. */
1464
uint32_t last_nggc_settings;
1465
int8_t last_nggc_settings_sgpr_idx;
1466
bool last_nggc_skip;
1468
/* Mesh shading state. */
1471
uint8_t cb_mip[MAX_RTS];
1473
/* Whether DRAW_{INDEX}_INDIRECT_MULTI is emitted. */
1474
bool uses_draw_indirect_multi;
1476
uint32_t rt_stack_size;
1478
struct radv_shader_prolog *emitted_vs_prolog;
1479
uint32_t *emitted_vs_prolog_key;
1480
uint32_t emitted_vs_prolog_key_hash;
1481
uint32_t vbo_misaligned_mask;
1482
uint32_t vbo_bound_mask;
1484
/* Whether the cmdbuffer owns the current render pass rather than the app. */
1485
bool own_render_pass;
1487
/* Per-vertex VRS state. */
1488
uint32_t last_vrs_rates;
1489
int8_t last_vrs_rates_sgpr_idx;
1492
struct radv_cmd_pool {
1493
struct vk_command_pool vk;
1494
struct list_head cmd_buffers;
1495
struct list_head free_cmd_buffers;
1498
struct radv_cmd_buffer_upload {
1502
struct radeon_winsys_bo *upload_bo;
1503
struct list_head list;
1506
enum radv_cmd_buffer_status {
1507
RADV_CMD_BUFFER_STATUS_INVALID,
1508
RADV_CMD_BUFFER_STATUS_INITIAL,
1509
RADV_CMD_BUFFER_STATUS_RECORDING,
1510
RADV_CMD_BUFFER_STATUS_EXECUTABLE,
1511
RADV_CMD_BUFFER_STATUS_PENDING,
1514
struct radv_cmd_buffer {
1515
struct vk_command_buffer vk;
1517
struct radv_device *device;
1519
struct radv_cmd_pool *pool;
1520
struct list_head pool_link;
1522
VkCommandBufferUsageFlags usage_flags;
1523
enum radv_cmd_buffer_status status;
1524
struct radeon_cmdbuf *cs;
1525
struct radv_cmd_state state;
1526
struct radv_vertex_binding vertex_bindings[MAX_VBS];
1527
struct radv_streamout_binding streamout_bindings[MAX_SO_BUFFERS];
1528
enum radv_queue_family qf;
1530
uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE];
1531
VkShaderStageFlags push_constant_stages;
1532
struct radv_descriptor_set_header meta_push_descriptors;
1534
struct radv_descriptor_state descriptors[MAX_BIND_POINTS];
1536
struct radv_cmd_buffer_upload upload;
1538
uint32_t scratch_size_per_wave_needed;
1539
uint32_t scratch_waves_wanted;
1540
uint32_t compute_scratch_size_per_wave_needed;
1541
uint32_t compute_scratch_waves_wanted;
1542
uint32_t esgs_ring_size_needed;
1543
uint32_t gsvs_ring_size_needed;
1544
bool tess_rings_needed;
1545
bool gds_needed; /* for GFX10 streamout and NGG GS queries */
1546
bool gds_oa_needed; /* for GFX10 streamout */
1547
bool sample_positions_needed;
1549
VkResult record_result;
1551
uint64_t gfx9_fence_va;
1552
uint32_t gfx9_fence_idx;
1553
uint64_t gfx9_eop_bug_va;
1556
* Whether a query pool has been resetted and we have to flush caches.
1558
bool pending_reset_query;
1561
* Bitmask of pending active query flushes.
1563
enum radv_cmd_flush_bits active_query_flush_bits;
1567
struct radv_image_view;
1569
bool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer);
1571
void si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs);
1572
void si_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs);
1574
void cik_create_gfx_config(struct radv_device *device);
1576
void si_write_scissors(struct radeon_cmdbuf *cs, int first, int count, const VkRect2D *scissors,
1577
const VkViewport *viewports, bool can_use_guardband);
1578
uint32_t si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_draw,
1579
bool indirect_draw, bool count_from_stream_output,
1580
uint32_t draw_vertex_count, unsigned topology,
1581
bool prim_restart_enable);
1582
void si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum chip_class chip_class, bool is_mec,
1583
unsigned event, unsigned event_flags, unsigned dst_sel,
1584
unsigned data_sel, uint64_t va, uint32_t new_fence,
1585
uint64_t gfx9_eop_bug_va);
1587
void radv_cp_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va, uint32_t ref,
1589
void si_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum chip_class chip_class,
1590
uint32_t *fence_ptr, uint64_t va, bool is_mec,
1591
enum radv_cmd_flush_bits flush_bits,
1592
enum rgp_flush_bits *sqtt_flush_bits, uint64_t gfx9_eop_bug_va);
1593
void si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer);
1594
void si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_visible,
1595
unsigned pred_op, uint64_t va);
1596
void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t dest_va,
1598
void si_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va, unsigned size);
1599
void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t size,
1601
void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer);
1603
void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer, bool enable_occlusion_queries);
1605
unsigned radv_instance_rate_prolog_index(unsigned num_attributes, uint32_t instance_rate_inputs);
1606
uint32_t radv_hash_vs_prolog(const void *key_);
1607
bool radv_cmp_vs_prolog(const void *a_, const void *b_);
1609
bool radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size,
1610
unsigned *out_offset, void **ptr);
1611
void radv_cmd_buffer_set_subpass(struct radv_cmd_buffer *cmd_buffer,
1612
const struct radv_subpass *subpass);
1613
void radv_cmd_buffer_restore_subpass(struct radv_cmd_buffer *cmd_buffer,
1614
const struct radv_subpass *subpass);
1615
bool radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, unsigned size,
1616
const void *data, unsigned *out_offset);
1618
void radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer);
1619
void radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer);
1620
void radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer);
1621
void radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer,
1622
VkImageAspectFlags aspects,
1623
VkResolveModeFlagBits resolve_mode);
1624
void radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer);
1625
void radv_depth_stencil_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer,
1626
VkImageAspectFlags aspects,
1627
VkResolveModeFlagBits resolve_mode);
1628
void radv_emit_default_sample_locations(struct radeon_cmdbuf *cs, int nr_samples);
1629
unsigned radv_get_default_max_sample_dist(int log_samples);
1630
void radv_device_init_msaa(struct radv_device *device);
1631
VkResult radv_device_init_vrs_state(struct radv_device *device);
1633
void radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
1634
const struct radv_image_view *iview,
1635
VkClearDepthStencilValue ds_clear_value,
1636
VkImageAspectFlags aspects);
1638
void radv_update_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
1639
const struct radv_image_view *iview, int cb_idx,
1640
uint32_t color_values[2]);
1642
bool radv_image_use_dcc_image_stores(const struct radv_device *device,
1643
const struct radv_image *image);
1644
bool radv_image_use_dcc_predication(const struct radv_device *device,
1645
const struct radv_image *image);
1647
void radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
1648
const VkImageSubresourceRange *range, bool value);
1650
void radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
1651
const VkImageSubresourceRange *range, bool value);
1652
enum radv_cmd_flush_bits radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer,
1653
VkAccessFlags2 src_flags,
1654
const struct radv_image *image);
1655
enum radv_cmd_flush_bits radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer,
1656
VkAccessFlags2 dst_flags,
1657
const struct radv_image *image);
1658
uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image,
1659
struct radeon_winsys_bo *bo, uint64_t offset, uint64_t size,
1661
void radv_copy_buffer(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *src_bo,
1662
struct radeon_winsys_bo *dst_bo, uint64_t src_offset, uint64_t dst_offset,
1665
void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer);
1666
bool radv_get_memory_fd(struct radv_device *device, struct radv_device_memory *memory, int *pFD);
1667
void radv_free_memory(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
1668
struct radv_device_memory *mem);
1671
radv_emit_shader_pointer_head(struct radeon_cmdbuf *cs, unsigned sh_offset, unsigned pointer_count,
1672
bool use_32bit_pointers)
1674
radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count * (use_32bit_pointers ? 1 : 2), 0));
1675
radeon_emit(cs, (sh_offset - SI_SH_REG_OFFSET) >> 2);
1679
radv_emit_shader_pointer_body(struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va,
1680
bool use_32bit_pointers)
1682
radeon_emit(cs, va);
1684
if (use_32bit_pointers) {
1685
assert(va == 0 || (va >> 32) == device->physical_device->rad_info.address32_hi);
1687
radeon_emit(cs, va >> 32);
1692
radv_emit_shader_pointer(struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t sh_offset,
1693
uint64_t va, bool global)
1695
bool use_32bit_pointers = !global;
1697
radv_emit_shader_pointer_head(cs, sh_offset, 1, use_32bit_pointers);
1698
radv_emit_shader_pointer_body(device, cs, va, use_32bit_pointers);
1701
static inline struct radv_descriptor_state *
1702
radv_get_descriptors_state(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point)
1704
switch (bind_point) {
1705
case VK_PIPELINE_BIND_POINT_GRAPHICS:
1706
case VK_PIPELINE_BIND_POINT_COMPUTE:
1707
return &cmd_buffer->descriptors[bind_point];
1708
case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR:
1709
return &cmd_buffer->descriptors[2];
1711
unreachable("Unhandled bind point");
1716
radv_get_viewport_xform(const VkViewport *viewport, float scale[3], float translate[3]);
1719
* Takes x,y,z as exact numbers of invocations, instead of blocks.
1721
* Limitations: Can't call normal dispatch functions without binding or rebinding
1722
* the compute pipeline.
1724
void radv_unaligned_dispatch(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y,
1727
void radv_indirect_dispatch(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *bo,
1731
struct vk_object_base base;
1732
struct radeon_winsys_bo *bo;
1736
#define RADV_HASH_SHADER_CS_WAVE32 (1 << 1)
1737
#define RADV_HASH_SHADER_PS_WAVE32 (1 << 2)
1738
#define RADV_HASH_SHADER_GE_WAVE32 (1 << 3)
1739
#define RADV_HASH_SHADER_LLVM (1 << 4)
1740
#define RADV_HASH_SHADER_KEEP_STATISTICS (1 << 8)
1741
#define RADV_HASH_SHADER_USE_NGG_CULLING (1 << 13)
1742
#define RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS (1 << 14)
1743
#define RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS2 (1 << 15)
1744
#define RADV_HASH_SHADER_FORCE_EMULATE_RT (1 << 16)
1745
#define RADV_HASH_SHADER_SPLIT_FMA (1 << 17)
1746
#define RADV_HASH_SHADER_RT_WAVE64 (1 << 18)
1748
struct radv_pipeline_key;
1750
void radv_pipeline_stage_init(const VkPipelineShaderStageCreateInfo *sinfo,
1751
struct radv_pipeline_stage *out_stage, gl_shader_stage stage);
1753
void radv_hash_shaders(unsigned char *hash, const struct radv_pipeline_stage *stages,
1754
const struct radv_pipeline_layout *layout,
1755
const struct radv_pipeline_key *key, uint32_t flags);
1757
void radv_hash_rt_shaders(unsigned char *hash, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
1760
uint32_t radv_get_hash_flags(const struct radv_device *device, bool stats);
1762
bool radv_rt_pipeline_has_dynamic_stack_size(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo);
1764
bool radv_enable_rt(const struct radv_physical_device *pdevice);
1766
bool radv_emulate_rt(const struct radv_physical_device *pdevice);
1768
#define RADV_STAGE_MASK ((1 << MESA_VULKAN_SHADER_STAGES) - 1)
1770
#define radv_foreach_stage(stage, stage_bits) \
1771
for (gl_shader_stage stage, __tmp = (gl_shader_stage)((stage_bits)&RADV_STAGE_MASK); \
1772
stage = ffs(__tmp) - 1, __tmp; __tmp &= ~(1 << (stage)))
1774
extern const VkFormat radv_fs_key_format_exemplars[NUM_META_FS_KEYS];
1775
unsigned radv_format_meta_fs_key(struct radv_device *device, VkFormat format);
1777
struct radv_multisample_state {
1779
uint32_t pa_sc_mode_cntl_0;
1780
uint32_t pa_sc_mode_cntl_1;
1781
uint32_t pa_sc_aa_config;
1782
uint32_t pa_sc_aa_mask[2];
1783
unsigned num_samples;
1786
struct radv_vrs_state {
1787
uint32_t pa_cl_vrs_cntl;
1790
struct radv_prim_vertex_count {
1795
struct radv_ia_multi_vgt_param_helpers {
1797
bool partial_es_wave;
1798
uint8_t primgroup_size;
1799
bool ia_switch_on_eoi;
1800
bool partial_vs_wave;
1803
struct radv_binning_state {
1804
uint32_t pa_sc_binner_cntl_0;
1807
#define SI_GS_PER_ES 128
1809
enum radv_pipeline_type {
1810
RADV_PIPELINE_GRAPHICS,
1811
/* Compute pipeline (incl raytracing pipeline) */
1812
RADV_PIPELINE_COMPUTE,
1813
/* Pipeline library. This can't actually run and merely is a partial pipeline. */
1814
RADV_PIPELINE_LIBRARY
1817
struct radv_pipeline_group_handle {
1818
uint32_t handles[2];
1821
struct radv_pipeline_shader_stack_size {
1822
uint32_t recursive_size;
1823
/* anyhit + intersection */
1824
uint32_t non_recursive_size;
1827
struct radv_pipeline_slab {
1830
union radv_shader_arena_block *alloc;
1833
void radv_pipeline_slab_destroy(struct radv_device *device, struct radv_pipeline_slab *slab);
1835
struct radv_pipeline {
1836
struct vk_object_base base;
1837
enum radv_pipeline_type type;
1839
struct radv_device *device;
1840
struct radv_dynamic_state dynamic_state;
1842
struct radv_pipeline_slab *slab;
1844
bool need_indirect_descriptor_sets;
1845
struct radv_shader *shaders[MESA_VULKAN_SHADER_STAGES];
1846
struct radv_shader *gs_copy_shader;
1847
VkShaderStageFlags active_stages;
1849
struct radeon_cmdbuf cs;
1850
uint32_t ctx_cs_hash;
1851
struct radeon_cmdbuf ctx_cs;
1853
uint32_t binding_stride[MAX_VBS];
1855
uint8_t attrib_bindings[MAX_VERTEX_ATTRIBS];
1856
uint32_t attrib_ends[MAX_VERTEX_ATTRIBS];
1857
uint32_t attrib_index_offset[MAX_VERTEX_ATTRIBS];
1859
bool use_per_attribute_vb_descs;
1860
bool can_use_simple_input;
1861
uint8_t last_vertex_attrib_bit;
1862
uint8_t next_vertex_stage : 8;
1863
uint32_t vb_desc_usage_mask;
1864
uint32_t vb_desc_alloc_size;
1866
uint32_t user_data_0[MESA_VULKAN_SHADER_STAGES];
1869
uint64_t dynamic_states;
1870
struct radv_multisample_state ms;
1871
struct radv_binning_state binning;
1872
struct radv_vrs_state vrs;
1873
uint32_t spi_baryc_cntl;
1874
unsigned esgs_ring_size;
1875
unsigned gsvs_ring_size;
1876
uint32_t vtx_base_sgpr;
1877
struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param;
1878
uint8_t vtx_emit_num;
1880
bool uses_baseinstance;
1881
bool can_use_guardband;
1882
uint64_t needed_dynamic_state;
1883
bool disable_out_of_order_rast_for_occlusion;
1884
unsigned tess_patch_control_points;
1885
unsigned pa_su_sc_mode_cntl;
1886
unsigned db_depth_control;
1887
unsigned pa_cl_clip_cntl;
1888
unsigned cb_color_control;
1889
bool uses_dynamic_stride;
1890
bool uses_conservative_overestimate;
1891
bool negative_one_to_one;
1893
/* Used for rbplus */
1894
uint32_t col_format;
1895
uint32_t cb_target_mask;
1897
/* Whether the pipeline uses NGG (GFX10+). */
1899
bool has_ngg_culling;
1901
/* Last pre-PS API stage */
1902
gl_shader_stage last_vgt_api_stage;
1904
/* Whether the pipeline forces per-vertex VRS (GFX10.3+). */
1905
bool force_vrs_per_vertex;
1908
struct radv_pipeline_group_handle *rt_group_handles;
1909
struct radv_pipeline_shader_stack_size *rt_stack_sizes;
1910
bool dynamic_stack_size;
1911
uint32_t group_count;
1912
bool cs_regalloc_hang_bug;
1915
unsigned stage_count;
1916
VkPipelineShaderStageCreateInfo *stages;
1917
unsigned group_count;
1918
VkRayTracingShaderGroupCreateInfoKHR *groups;
1923
unsigned scratch_bytes_per_wave;
1925
/* Not NULL if graphics pipeline uses streamout. */
1926
struct radv_shader *streamout_shader;
1928
/* Unique pipeline hash identifier. */
1929
uint64_t pipeline_hash;
1931
/* Pipeline layout info. */
1932
uint32_t push_constant_size;
1933
uint32_t dynamic_offset_count;
1936
struct radv_pipeline_stage {
1937
gl_shader_stage stage;
1940
const struct vk_object_base *object;
1943
unsigned char sha1[20];
1946
const char *entrypoint;
1947
const VkSpecializationInfo *spec_info;
1949
unsigned char shader_sha1[20];
1952
nir_shader *internal_nir; /* meta shaders */
1954
struct radv_shader_info info;
1955
struct radv_shader_args args;
1957
VkPipelineCreationFeedbackEXT feedback;
1961
radv_pipeline_has_gs(const struct radv_pipeline *pipeline)
1963
return pipeline->shaders[MESA_SHADER_GEOMETRY] ? true : false;
1967
radv_pipeline_has_tess(const struct radv_pipeline *pipeline)
1969
return pipeline->shaders[MESA_SHADER_TESS_CTRL] ? true : false;
1973
radv_pipeline_has_mesh(const struct radv_pipeline *pipeline)
1975
return !!pipeline->shaders[MESA_SHADER_MESH];
1978
bool radv_pipeline_has_ngg_passthrough(const struct radv_pipeline *pipeline);
1980
bool radv_pipeline_has_gs_copy_shader(const struct radv_pipeline *pipeline);
1982
struct radv_userdata_info *radv_lookup_user_sgpr(struct radv_pipeline *pipeline,
1983
gl_shader_stage stage, int idx);
1985
struct radv_shader *radv_get_shader(const struct radv_pipeline *pipeline, gl_shader_stage stage);
1987
struct radv_graphics_pipeline_create_info {
1989
bool db_depth_clear;
1990
bool db_stencil_clear;
1991
bool depth_compress_disable;
1992
bool stencil_compress_disable;
1993
bool resummarize_enable;
1994
uint32_t custom_blend_mode;
1997
void radv_pipeline_init(struct radv_device *device, struct radv_pipeline *pipeline,
1998
enum radv_pipeline_type type);
2000
VkResult radv_graphics_pipeline_create(VkDevice device, VkPipelineCache cache,
2001
const VkGraphicsPipelineCreateInfo *pCreateInfo,
2002
const struct radv_graphics_pipeline_create_info *extra,
2003
const VkAllocationCallbacks *alloc, VkPipeline *pPipeline);
2005
VkResult radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache,
2006
const VkComputePipelineCreateInfo *pCreateInfo,
2007
const VkAllocationCallbacks *pAllocator,
2008
const uint8_t *custom_hash,
2009
struct radv_pipeline_shader_stack_size *rt_stack_sizes,
2010
uint32_t rt_group_count, VkPipeline *pPipeline);
2012
void radv_pipeline_destroy(struct radv_device *device, struct radv_pipeline *pipeline,
2013
const VkAllocationCallbacks *allocator);
2015
struct radv_binning_settings {
2016
unsigned context_states_per_bin; /* allowed range: [1, 6] */
2017
unsigned persistent_states_per_bin; /* allowed range: [1, 32] */
2018
unsigned fpovs_per_batch; /* allowed range: [0, 255], 0 = unlimited */
2021
struct radv_binning_settings radv_get_binning_settings(const struct radv_physical_device *pdev);
2023
struct vk_format_description;
2024
uint32_t radv_translate_buffer_dataformat(const struct util_format_description *desc,
2025
int first_non_void);
2026
uint32_t radv_translate_buffer_numformat(const struct util_format_description *desc,
2027
int first_non_void);
2028
bool radv_is_buffer_format_supported(VkFormat format, bool *scaled);
2029
void radv_translate_vertex_format(const struct radv_physical_device *pdevice, VkFormat format,
2030
const struct util_format_description *desc, unsigned *dfmt,
2031
unsigned *nfmt, bool *post_shuffle,
2032
enum radv_vs_input_alpha_adjust *alpha_adjust);
2033
uint32_t radv_translate_colorformat(VkFormat format);
2034
uint32_t radv_translate_color_numformat(VkFormat format, const struct util_format_description *desc,
2035
int first_non_void);
2036
uint32_t radv_colorformat_endian_swap(uint32_t colorformat);
2037
unsigned radv_translate_colorswap(VkFormat format, bool do_endian_swap);
2038
uint32_t radv_translate_dbformat(VkFormat format);
2039
uint32_t radv_translate_tex_dataformat(VkFormat format, const struct util_format_description *desc,
2040
int first_non_void);
2041
uint32_t radv_translate_tex_numformat(VkFormat format, const struct util_format_description *desc,
2042
int first_non_void);
2043
bool radv_format_pack_clear_color(VkFormat format, uint32_t clear_vals[2],
2044
VkClearColorValue *value);
2045
bool radv_is_storage_image_format_supported(struct radv_physical_device *physical_device,
2047
bool radv_is_colorbuffer_format_supported(const struct radv_physical_device *pdevice,
2048
VkFormat format, bool *blendable);
2049
bool radv_dcc_formats_compatible(VkFormat format1, VkFormat format2, bool *sign_reinterpret);
2050
bool radv_is_atomic_format_supported(VkFormat format);
2051
bool radv_device_supports_etc(struct radv_physical_device *physical_device);
2053
struct radv_image_plane {
2055
struct radeon_surf surface;
2059
struct vk_object_base base;
2061
/* The original VkFormat provided by the client. This may not match any
2062
* of the actual surface formats.
2065
VkImageUsageFlags usage; /**< Superset of VkImageCreateInfo::usage. */
2066
struct ac_surf_info info;
2067
VkImageTiling tiling; /** VkImageCreateInfo::tiling */
2068
VkImageCreateFlags flags; /** VkImageCreateInfo::flags */
2073
unsigned queue_family_mask;
2077
bool dcc_sign_reinterpret;
2078
bool support_comp_to_single;
2080
/* Set when bound */
2081
struct radeon_winsys_bo *bo;
2082
VkDeviceSize offset;
2083
bool tc_compatible_cmask;
2085
uint64_t clear_value_offset;
2086
uint64_t fce_pred_offset;
2087
uint64_t dcc_pred_offset;
2090
* Metadata for the TC-compat zrange workaround. If the 32-bit value
2091
* stored at this offset is UINT_MAX, the driver will emit
2092
* DB_Z_INFO.ZRANGE_PRECISION=0, otherwise it will skip the
2093
* SET_CONTEXT_REG packet.
2095
uint64_t tc_compat_zrange_offset;
2097
/* For VK_ANDROID_native_buffer, the WSI image owns the memory, */
2098
VkDeviceMemory owned_memory;
2100
unsigned plane_count;
2101
struct radv_image_plane planes[0];
2104
/* Whether the image has a htile that is known consistent with the contents of
2105
* the image and is allowed to be in compressed form.
2107
* If this is false reads that don't use the htile should be able to return
2110
bool radv_layout_is_htile_compressed(const struct radv_device *device,
2111
const struct radv_image *image, VkImageLayout layout,
2112
bool in_render_loop, unsigned queue_mask);
2114
bool radv_layout_can_fast_clear(const struct radv_device *device, const struct radv_image *image,
2115
unsigned level, VkImageLayout layout, bool in_render_loop,
2116
unsigned queue_mask);
2118
bool radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_image *image,
2119
unsigned level, VkImageLayout layout, bool in_render_loop,
2120
unsigned queue_mask);
2122
bool radv_layout_fmask_compressed(const struct radv_device *device, const struct radv_image *image,
2123
VkImageLayout layout, unsigned queue_mask);
2126
* Return whether the image has CMASK metadata for color surfaces.
2129
radv_image_has_cmask(const struct radv_image *image)
2131
return image->planes[0].surface.cmask_offset;
2135
* Return whether the image has FMASK metadata for color surfaces.
2138
radv_image_has_fmask(const struct radv_image *image)
2140
return image->planes[0].surface.fmask_offset;
2144
* Return whether the image has DCC metadata for color surfaces.
2147
radv_image_has_dcc(const struct radv_image *image)
2149
return !(image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER) &&
2150
image->planes[0].surface.meta_offset;
2154
* Return whether the image is TC-compatible CMASK.
2157
radv_image_is_tc_compat_cmask(const struct radv_image *image)
2159
return radv_image_has_fmask(image) && image->tc_compatible_cmask;
2163
* Return whether DCC metadata is enabled for a level.
2166
radv_dcc_enabled(const struct radv_image *image, unsigned level)
2168
return radv_image_has_dcc(image) && level < image->planes[0].surface.num_meta_levels;
2172
* Return whether the image has CB metadata.
2175
radv_image_has_CB_metadata(const struct radv_image *image)
2177
return radv_image_has_cmask(image) || radv_image_has_fmask(image) || radv_image_has_dcc(image);
2181
* Return whether the image has HTILE metadata for depth surfaces.
2184
radv_image_has_htile(const struct radv_image *image)
2186
return image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER &&
2187
image->planes[0].surface.meta_size;
2191
* Return whether the image has VRS HTILE metadata for depth surfaces
2194
radv_image_has_vrs_htile(const struct radv_device *device, const struct radv_image *image)
2196
/* Any depth buffer can potentially use VRS. */
2197
return device->attachment_vrs_enabled && radv_image_has_htile(image) &&
2198
(image->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT);
2202
* Return whether HTILE metadata is enabled for a level.
2205
radv_htile_enabled(const struct radv_image *image, unsigned level)
2207
return radv_image_has_htile(image) && level < image->planes[0].surface.num_meta_levels;
2211
* Return whether the image is TC-compatible HTILE.
2214
radv_image_is_tc_compat_htile(const struct radv_image *image)
2216
return radv_image_has_htile(image) &&
2217
(image->planes[0].surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE);
2221
* Return whether the entire HTILE buffer can be used for depth in order to
2222
* improve HiZ Z-Range precision.
2225
radv_image_tile_stencil_disabled(const struct radv_device *device, const struct radv_image *image)
2227
if (device->physical_device->rad_info.chip_class >= GFX9) {
2228
return !vk_format_has_stencil(image->vk_format) && !radv_image_has_vrs_htile(device, image);
2230
/* Due to a hw bug, TILE_STENCIL_DISABLE must be set to 0 for
2231
* the TC-compat ZRANGE issue even if no stencil is used.
2233
return !vk_format_has_stencil(image->vk_format) && !radv_image_is_tc_compat_htile(image);
2238
radv_image_has_clear_value(const struct radv_image *image)
2240
return image->clear_value_offset != 0;
2243
static inline uint64_t
2244
radv_image_get_fast_clear_va(const struct radv_image *image, uint32_t base_level)
2246
assert(radv_image_has_clear_value(image));
2248
uint64_t va = radv_buffer_get_va(image->bo);
2249
va += image->offset + image->clear_value_offset + base_level * 8;
2253
static inline uint64_t
2254
radv_image_get_fce_pred_va(const struct radv_image *image, uint32_t base_level)
2256
assert(image->fce_pred_offset != 0);
2258
uint64_t va = radv_buffer_get_va(image->bo);
2259
va += image->offset + image->fce_pred_offset + base_level * 8;
2263
static inline uint64_t
2264
radv_image_get_dcc_pred_va(const struct radv_image *image, uint32_t base_level)
2266
assert(image->dcc_pred_offset != 0);
2268
uint64_t va = radv_buffer_get_va(image->bo);
2269
va += image->offset + image->dcc_pred_offset + base_level * 8;
2273
static inline uint64_t
2274
radv_get_tc_compat_zrange_va(const struct radv_image *image, uint32_t base_level)
2276
assert(image->tc_compat_zrange_offset != 0);
2278
uint64_t va = radv_buffer_get_va(image->bo);
2279
va += image->offset + image->tc_compat_zrange_offset + base_level * 4;
2283
static inline uint64_t
2284
radv_get_ds_clear_value_va(const struct radv_image *image, uint32_t base_level)
2286
assert(radv_image_has_clear_value(image));
2288
uint64_t va = radv_buffer_get_va(image->bo);
2289
va += image->offset + image->clear_value_offset + base_level * 8;
2293
static inline uint32_t
2294
radv_get_htile_initial_value(const struct radv_device *device, const struct radv_image *image)
2296
uint32_t initial_value;
2298
if (radv_image_tile_stencil_disabled(device, image)) {
2299
/* Z only (no stencil):
2302
* +---------+---------+-------+
2303
* | Max Z | Min Z | ZMask |
2305
initial_value = 0xfffc000f;
2309
* |31 12|11 10|9 8|7 6|5 4|3 0|
2310
* +-----------+-----+------+-----+-----+-------+
2311
* | Z Range | | SMem | SR1 | SR0 | ZMask |
2313
* SR0/SR1 contains the stencil test results. Initializing
2314
* SR0/SR1 to 0x3 means the stencil test result is unknown.
2316
* Z, stencil and 4 bit VRS encoding:
2317
* |31 12|11 10|9 8|7 6|5 4|3 0|
2318
* +-----------+------------+------+------------+-----+-------+
2319
* | Z Range | VRS y-rate | SMem | VRS x-rate | SR0 | ZMask |
2321
if (radv_image_has_vrs_htile(device, image)) {
2322
/* Initialize the VRS x-rate value at 0, so the hw interprets it as 1 sample. */
2323
initial_value = 0xfffff33f;
2325
initial_value = 0xfffff3ff;
2329
return initial_value;
2333
radv_image_get_iterate256(struct radv_device *device, struct radv_image *image)
2335
/* ITERATE_256 is required for depth or stencil MSAA images that are TC-compatible HTILE. */
2336
return device->physical_device->rad_info.chip_class >= GFX10 &&
2337
(image->usage & (VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT |
2338
VK_IMAGE_USAGE_TRANSFER_DST_BIT)) &&
2339
radv_image_is_tc_compat_htile(image) &&
2340
image->info.samples > 1;
2343
unsigned radv_image_queue_family_mask(const struct radv_image *image,
2344
enum radv_queue_family family,
2345
enum radv_queue_family queue_family);
2347
static inline uint32_t
2348
radv_get_layerCount(const struct radv_image *image, const VkImageSubresourceRange *range)
2350
return range->layerCount == VK_REMAINING_ARRAY_LAYERS
2351
? image->info.array_size - range->baseArrayLayer
2352
: range->layerCount;
2355
static inline uint32_t
2356
radv_get_levelCount(const struct radv_image *image, const VkImageSubresourceRange *range)
2358
return range->levelCount == VK_REMAINING_MIP_LEVELS ? image->info.levels - range->baseMipLevel
2359
: range->levelCount;
2362
bool radv_image_is_renderable(struct radv_device *device, struct radv_image *image);
2364
struct radeon_bo_metadata;
2365
void radv_init_metadata(struct radv_device *device, struct radv_image *image,
2366
struct radeon_bo_metadata *metadata);
2368
void radv_image_override_offset_stride(struct radv_device *device, struct radv_image *image,
2369
uint64_t offset, uint32_t stride);
2371
union radv_descriptor {
2373
uint32_t plane0_descriptor[8];
2374
uint32_t fmask_descriptor[8];
2377
uint32_t plane_descriptors[3][8];
2381
struct radv_image_view {
2382
struct vk_object_base base;
2383
struct radv_image *image; /**< VkImageViewCreateInfo::image */
2385
VkImageViewType type;
2386
VkImageAspectFlags aspect_mask;
2389
uint32_t base_layer;
2390
uint32_t layer_count;
2392
uint32_t level_count;
2393
VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */
2395
/* Whether the image iview supports fast clear. */
2396
bool support_fast_clear;
2398
bool disable_dcc_mrt;
2400
union radv_descriptor descriptor;
2402
/* Descriptor for use as a storage image as opposed to a sampled image.
2403
* This has a few differences for cube maps (e.g. type).
2405
union radv_descriptor storage_descriptor;
2408
struct radv_image_create_info {
2409
const VkImageCreateInfo *vk_info;
2411
bool no_metadata_planes;
2412
bool prime_blit_src;
2413
const struct radeon_bo_metadata *bo_metadata;
2417
radv_image_create_layout(struct radv_device *device, struct radv_image_create_info create_info,
2418
const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *mod_info,
2419
struct radv_image *image);
2421
VkResult radv_image_create(VkDevice _device, const struct radv_image_create_info *info,
2422
const VkAllocationCallbacks *alloc, VkImage *pImage);
2424
bool radv_are_formats_dcc_compatible(const struct radv_physical_device *pdev, const void *pNext,
2425
VkFormat format, VkImageCreateFlags flags,
2426
bool *sign_reinterpret);
2428
bool vi_alpha_is_on_msb(struct radv_device *device, VkFormat format);
2430
VkResult radv_image_from_gralloc(VkDevice device_h, const VkImageCreateInfo *base_info,
2431
const VkNativeBufferANDROID *gralloc_info,
2432
const VkAllocationCallbacks *alloc, VkImage *out_image_h);
2433
uint64_t radv_ahb_usage_from_vk_usage(const VkImageCreateFlags vk_create,
2434
const VkImageUsageFlags vk_usage);
2435
VkResult radv_import_ahb_memory(struct radv_device *device, struct radv_device_memory *mem,
2437
const VkImportAndroidHardwareBufferInfoANDROID *info);
2438
VkResult radv_create_ahb_memory(struct radv_device *device, struct radv_device_memory *mem,
2439
unsigned priority, const VkMemoryAllocateInfo *pAllocateInfo);
2441
VkFormat radv_select_android_external_format(const void *next, VkFormat default_format);
2443
bool radv_android_gralloc_supports_format(VkFormat format, VkImageUsageFlagBits usage);
2445
struct radv_image_view_extra_create_info {
2446
bool disable_compression;
2447
bool enable_compression;
2448
bool disable_dcc_mrt;
2451
void radv_image_view_init(struct radv_image_view *view, struct radv_device *device,
2452
const VkImageViewCreateInfo *pCreateInfo,
2453
const struct radv_image_view_extra_create_info *extra_create_info);
2454
void radv_image_view_finish(struct radv_image_view *iview);
2456
VkFormat radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask);
2458
struct radv_sampler_ycbcr_conversion_state {
2460
VkSamplerYcbcrModelConversion ycbcr_model;
2461
VkSamplerYcbcrRange ycbcr_range;
2462
VkComponentMapping components;
2463
VkChromaLocation chroma_offsets[2];
2464
VkFilter chroma_filter;
2467
struct radv_sampler_ycbcr_conversion {
2468
struct vk_object_base base;
2469
/* The state is hashed for the descriptor set layout. */
2470
struct radv_sampler_ycbcr_conversion_state state;
2473
struct radv_buffer_view {
2474
struct vk_object_base base;
2475
struct radeon_winsys_bo *bo;
2477
uint64_t range; /**< VkBufferViewCreateInfo::range */
2480
void radv_buffer_view_init(struct radv_buffer_view *view, struct radv_device *device,
2481
const VkBufferViewCreateInfo *pCreateInfo);
2482
void radv_buffer_view_finish(struct radv_buffer_view *view);
2484
static inline struct VkExtent3D
2485
radv_sanitize_image_extent(const VkImageType imageType, const struct VkExtent3D imageExtent)
2487
switch (imageType) {
2488
case VK_IMAGE_TYPE_1D:
2489
return (VkExtent3D){imageExtent.width, 1, 1};
2490
case VK_IMAGE_TYPE_2D:
2491
return (VkExtent3D){imageExtent.width, imageExtent.height, 1};
2492
case VK_IMAGE_TYPE_3D:
2495
unreachable("invalid image type");
2499
static inline struct VkOffset3D
2500
radv_sanitize_image_offset(const VkImageType imageType, const struct VkOffset3D imageOffset)
2502
switch (imageType) {
2503
case VK_IMAGE_TYPE_1D:
2504
return (VkOffset3D){imageOffset.x, 0, 0};
2505
case VK_IMAGE_TYPE_2D:
2506
return (VkOffset3D){imageOffset.x, imageOffset.y, 0};
2507
case VK_IMAGE_TYPE_3D:
2510
unreachable("invalid image type");
2515
radv_image_extent_compare(const struct radv_image *image, const VkExtent3D *extent)
2517
if (extent->width != image->info.width || extent->height != image->info.height ||
2518
extent->depth != image->info.depth)
2523
struct radv_sampler {
2524
struct vk_object_base base;
2526
struct radv_sampler_ycbcr_conversion *ycbcr_sampler;
2527
uint32_t border_color_slot;
2530
struct radv_subpass_barrier {
2531
VkPipelineStageFlags2 src_stage_mask;
2532
VkAccessFlags2 src_access_mask;
2533
VkAccessFlags2 dst_access_mask;
2536
void radv_emit_subpass_barrier(struct radv_cmd_buffer *cmd_buffer,
2537
const struct radv_subpass_barrier *barrier);
2539
struct radv_subpass_attachment {
2540
uint32_t attachment;
2541
VkImageLayout layout;
2542
VkImageLayout stencil_layout;
2543
bool in_render_loop;
2546
struct radv_subpass {
2547
uint32_t attachment_count;
2548
struct radv_subpass_attachment *attachments;
2550
uint32_t input_count;
2551
uint32_t color_count;
2552
struct radv_subpass_attachment *input_attachments;
2553
struct radv_subpass_attachment *color_attachments;
2554
struct radv_subpass_attachment *resolve_attachments;
2555
struct radv_subpass_attachment *depth_stencil_attachment;
2556
struct radv_subpass_attachment *ds_resolve_attachment;
2557
struct radv_subpass_attachment *vrs_attachment;
2558
VkResolveModeFlagBits depth_resolve_mode;
2559
VkResolveModeFlagBits stencil_resolve_mode;
2561
/** Subpass has at least one color resolve attachment */
2562
bool has_color_resolve;
2564
struct radv_subpass_barrier start_barrier;
2568
VkSampleCountFlagBits color_sample_count;
2569
VkSampleCountFlagBits depth_sample_count;
2570
VkSampleCountFlagBits max_sample_count;
2572
/* Whether the subpass has ingoing/outgoing external dependencies. */
2573
bool has_ingoing_dep;
2574
bool has_outgoing_dep;
2577
uint32_t radv_get_subpass_id(struct radv_cmd_buffer *cmd_buffer);
2579
struct radv_render_pass_attachment {
2582
VkAttachmentLoadOp load_op;
2583
VkAttachmentLoadOp stencil_load_op;
2584
VkImageLayout initial_layout;
2585
VkImageLayout final_layout;
2586
VkImageLayout stencil_initial_layout;
2587
VkImageLayout stencil_final_layout;
2589
/* The subpass id in which the attachment will be used first/last. */
2590
uint32_t first_subpass_idx;
2591
uint32_t last_subpass_idx;
2594
struct radv_render_pass {
2595
struct vk_object_base base;
2596
uint32_t attachment_count;
2597
uint32_t subpass_count;
2598
struct radv_subpass_attachment *subpass_attachments;
2599
struct radv_render_pass_attachment *attachments;
2600
struct radv_subpass_barrier end_barrier;
2601
struct radv_subpass subpasses[0];
2604
VkResult radv_device_init_meta(struct radv_device *device);
2605
void radv_device_finish_meta(struct radv_device *device);
2607
struct radv_query_pool {
2608
struct vk_object_base base;
2609
struct radeon_winsys_bo *bo;
2611
uint32_t availability_offset;
2615
uint32_t pipeline_stats_mask;
2618
bool radv_queue_internal_submit(struct radv_queue *queue, struct radeon_cmdbuf *cs);
2620
int radv_queue_init(struct radv_device *device, struct radv_queue *queue, int idx,
2621
const VkDeviceQueueCreateInfo *create_info,
2622
const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority);
2624
void radv_set_descriptor_set(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point,
2625
struct radv_descriptor_set *set, unsigned idx);
2627
void radv_cmd_update_descriptor_sets(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer,
2628
VkDescriptorSet overrideSet, uint32_t descriptorWriteCount,
2629
const VkWriteDescriptorSet *pDescriptorWrites,
2630
uint32_t descriptorCopyCount,
2631
const VkCopyDescriptorSet *pDescriptorCopies);
2633
void radv_cmd_update_descriptor_set_with_template(struct radv_device *device,
2634
struct radv_cmd_buffer *cmd_buffer,
2635
struct radv_descriptor_set *set,
2636
VkDescriptorUpdateTemplate descriptorUpdateTemplate,
2639
void radv_meta_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer,
2640
VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout _layout,
2641
uint32_t set, uint32_t descriptorWriteCount,
2642
const VkWriteDescriptorSet *pDescriptorWrites);
2644
uint32_t radv_init_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
2645
const VkImageSubresourceRange *range, uint32_t value);
2647
uint32_t radv_init_fmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
2648
const VkImageSubresourceRange *range);
2650
/* radv_nir_to_llvm.c */
2651
struct radv_shader_args;
2652
struct radv_nir_compiler_options;
2653
struct radv_shader_info;
2655
void llvm_compile_shader(const struct radv_nir_compiler_options *options,
2656
const struct radv_shader_info *info, unsigned shader_count,
2657
struct nir_shader *const *shaders, struct radv_shader_binary **binary,
2658
const struct radv_shader_args *args);
2660
/* radv_shader_info.h */
2661
struct radv_shader_info;
2663
void radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *nir,
2664
const struct radv_pipeline_layout *layout,
2665
const struct radv_pipeline_key *pipeline_key,
2666
struct radv_shader_info *info);
2668
void radv_nir_shader_info_init(struct radv_shader_info *info);
2670
bool radv_thread_trace_init(struct radv_device *device);
2671
void radv_thread_trace_finish(struct radv_device *device);
2672
bool radv_begin_thread_trace(struct radv_queue *queue);
2673
bool radv_end_thread_trace(struct radv_queue *queue);
2674
bool radv_get_thread_trace(struct radv_queue *queue, struct ac_thread_trace *thread_trace);
2675
void radv_emit_thread_trace_userdata(const struct radv_device *device, struct radeon_cmdbuf *cs,
2676
const void *data, uint32_t num_dwords);
2677
bool radv_is_instruction_timing_enabled(void);
2679
bool radv_sdma_copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
2680
struct radv_buffer *buffer, const VkBufferImageCopy2 *region);
2682
/* radv_sqtt_layer_.c */
2683
struct radv_barrier_data {
2686
uint16_t depth_stencil_expand : 1;
2687
uint16_t htile_hiz_range_expand : 1;
2688
uint16_t depth_stencil_resummarize : 1;
2689
uint16_t dcc_decompress : 1;
2690
uint16_t fmask_decompress : 1;
2691
uint16_t fast_clear_eliminate : 1;
2692
uint16_t fmask_color_expand : 1;
2693
uint16_t init_mask_ram : 1;
2694
uint16_t reserved : 8;
2697
} layout_transitions;
2701
* Value for the reason field of an RGP barrier start marker originating from
2702
* the Vulkan client (does not include PAL-defined values). (Table 15)
2704
enum rgp_barrier_reason {
2705
RGP_BARRIER_UNKNOWN_REASON = 0xFFFFFFFF,
2707
/* External app-generated barrier reasons, i.e. API synchronization
2708
* commands Range of valid values: [0x00000001 ... 0x7FFFFFFF].
2710
RGP_BARRIER_EXTERNAL_CMD_PIPELINE_BARRIER = 0x00000001,
2711
RGP_BARRIER_EXTERNAL_RENDER_PASS_SYNC = 0x00000002,
2712
RGP_BARRIER_EXTERNAL_CMD_WAIT_EVENTS = 0x00000003,
2714
/* Internal barrier reasons, i.e. implicit synchronization inserted by
2715
* the Vulkan driver Range of valid values: [0xC0000000 ... 0xFFFFFFFE].
2717
RGP_BARRIER_INTERNAL_BASE = 0xC0000000,
2718
RGP_BARRIER_INTERNAL_PRE_RESET_QUERY_POOL_SYNC = RGP_BARRIER_INTERNAL_BASE + 0,
2719
RGP_BARRIER_INTERNAL_POST_RESET_QUERY_POOL_SYNC = RGP_BARRIER_INTERNAL_BASE + 1,
2720
RGP_BARRIER_INTERNAL_GPU_EVENT_RECYCLE_STALL = RGP_BARRIER_INTERNAL_BASE + 2,
2721
RGP_BARRIER_INTERNAL_PRE_COPY_QUERY_POOL_RESULTS_SYNC = RGP_BARRIER_INTERNAL_BASE + 3
2724
void radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer);
2725
void radv_describe_end_cmd_buffer(struct radv_cmd_buffer *cmd_buffer);
2726
void radv_describe_draw(struct radv_cmd_buffer *cmd_buffer);
2727
void radv_describe_dispatch(struct radv_cmd_buffer *cmd_buffer, int x, int y, int z);
2728
void radv_describe_begin_render_pass_clear(struct radv_cmd_buffer *cmd_buffer,
2729
VkImageAspectFlagBits aspects);
2730
void radv_describe_end_render_pass_clear(struct radv_cmd_buffer *cmd_buffer);
2731
void radv_describe_begin_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer);
2732
void radv_describe_end_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer);
2733
void radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer,
2734
enum rgp_barrier_reason reason);
2735
void radv_describe_barrier_end(struct radv_cmd_buffer *cmd_buffer);
2736
void radv_describe_barrier_end_delayed(struct radv_cmd_buffer *cmd_buffer);
2737
void radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer,
2738
const struct radv_barrier_data *barrier);
2740
uint64_t radv_get_current_time(void);
2742
static inline uint32_t
2743
si_conv_gl_prim_to_vertices(enum shader_prim gl_prim)
2746
case SHADER_PRIM_POINTS:
2748
case SHADER_PRIM_LINES:
2749
case SHADER_PRIM_LINE_STRIP:
2751
case SHADER_PRIM_TRIANGLES:
2752
case SHADER_PRIM_TRIANGLE_STRIP:
2754
case SHADER_PRIM_LINES_ADJACENCY:
2756
case SHADER_PRIM_TRIANGLES_ADJACENCY:
2758
case SHADER_PRIM_QUADS:
2759
return V_028A6C_TRISTRIP;
2766
static inline uint32_t
2767
si_conv_prim_to_gs_out(enum VkPrimitiveTopology topology)
2770
case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
2771
case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
2772
return V_028A6C_POINTLIST;
2773
case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
2774
case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
2775
case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
2776
case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
2777
return V_028A6C_LINESTRIP;
2778
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
2779
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
2780
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
2781
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
2782
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
2783
return V_028A6C_TRISTRIP;
2790
static inline uint32_t
2791
si_translate_prim(unsigned topology)
2794
case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
2795
return V_008958_DI_PT_POINTLIST;
2796
case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
2797
return V_008958_DI_PT_LINELIST;
2798
case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
2799
return V_008958_DI_PT_LINESTRIP;
2800
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
2801
return V_008958_DI_PT_TRILIST;
2802
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
2803
return V_008958_DI_PT_TRISTRIP;
2804
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
2805
return V_008958_DI_PT_TRIFAN;
2806
case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
2807
return V_008958_DI_PT_LINELIST_ADJ;
2808
case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
2809
return V_008958_DI_PT_LINESTRIP_ADJ;
2810
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
2811
return V_008958_DI_PT_TRILIST_ADJ;
2812
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
2813
return V_008958_DI_PT_TRISTRIP_ADJ;
2814
case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
2815
return V_008958_DI_PT_PATCH;
2822
static inline uint32_t
2823
si_translate_stencil_op(enum VkStencilOp op)
2826
case VK_STENCIL_OP_KEEP:
2827
return V_02842C_STENCIL_KEEP;
2828
case VK_STENCIL_OP_ZERO:
2829
return V_02842C_STENCIL_ZERO;
2830
case VK_STENCIL_OP_REPLACE:
2831
return V_02842C_STENCIL_REPLACE_TEST;
2832
case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
2833
return V_02842C_STENCIL_ADD_CLAMP;
2834
case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
2835
return V_02842C_STENCIL_SUB_CLAMP;
2836
case VK_STENCIL_OP_INVERT:
2837
return V_02842C_STENCIL_INVERT;
2838
case VK_STENCIL_OP_INCREMENT_AND_WRAP:
2839
return V_02842C_STENCIL_ADD_WRAP;
2840
case VK_STENCIL_OP_DECREMENT_AND_WRAP:
2841
return V_02842C_STENCIL_SUB_WRAP;
2847
static inline uint32_t
2848
si_translate_blend_logic_op(VkLogicOp op)
2851
case VK_LOGIC_OP_CLEAR:
2852
return V_028808_ROP3_CLEAR;
2853
case VK_LOGIC_OP_AND:
2854
return V_028808_ROP3_AND;
2855
case VK_LOGIC_OP_AND_REVERSE:
2856
return V_028808_ROP3_AND_REVERSE;
2857
case VK_LOGIC_OP_COPY:
2858
return V_028808_ROP3_COPY;
2859
case VK_LOGIC_OP_AND_INVERTED:
2860
return V_028808_ROP3_AND_INVERTED;
2861
case VK_LOGIC_OP_NO_OP:
2862
return V_028808_ROP3_NO_OP;
2863
case VK_LOGIC_OP_XOR:
2864
return V_028808_ROP3_XOR;
2865
case VK_LOGIC_OP_OR:
2866
return V_028808_ROP3_OR;
2867
case VK_LOGIC_OP_NOR:
2868
return V_028808_ROP3_NOR;
2869
case VK_LOGIC_OP_EQUIVALENT:
2870
return V_028808_ROP3_EQUIVALENT;
2871
case VK_LOGIC_OP_INVERT:
2872
return V_028808_ROP3_INVERT;
2873
case VK_LOGIC_OP_OR_REVERSE:
2874
return V_028808_ROP3_OR_REVERSE;
2875
case VK_LOGIC_OP_COPY_INVERTED:
2876
return V_028808_ROP3_COPY_INVERTED;
2877
case VK_LOGIC_OP_OR_INVERTED:
2878
return V_028808_ROP3_OR_INVERTED;
2879
case VK_LOGIC_OP_NAND:
2880
return V_028808_ROP3_NAND;
2881
case VK_LOGIC_OP_SET:
2882
return V_028808_ROP3_SET;
2884
unreachable("Unhandled logic op");
2889
* Queue helper to get ring.
2890
* placed here as it needs queue + device structs.
2892
static inline enum ring_type
2893
radv_queue_ring(struct radv_queue *queue)
2895
return radv_queue_family_to_ring(queue->device->physical_device, queue->qf);
2899
* Helper used for debugging compiler issues by enabling/disabling LLVM for a
2900
* specific shader stage (developers only).
2903
radv_use_llvm_for_stage(struct radv_device *device, UNUSED gl_shader_stage stage)
2905
return device->physical_device->use_llvm;
2908
struct radv_acceleration_structure {
2909
struct vk_object_base base;
2911
struct radeon_winsys_bo *bo;
2912
uint64_t mem_offset;
2916
static inline uint64_t
2917
radv_accel_struct_get_va(const struct radv_acceleration_structure *accel)
2919
return radv_buffer_get_va(accel->bo) + accel->mem_offset;
2922
/* radv_perfcounter.c */
2923
void radv_perfcounter_emit_shaders(struct radeon_cmdbuf *cs, unsigned shaders);
2924
void radv_perfcounter_emit_reset(struct radeon_cmdbuf *cs);
2925
void radv_perfcounter_emit_start(struct radv_device *device, struct radeon_cmdbuf *cs, int family);
2926
void radv_perfcounter_emit_stop(struct radv_device *device, struct radeon_cmdbuf *cs, int family);
2929
bool radv_spm_init(struct radv_device *device);
2930
void radv_spm_finish(struct radv_device *device);
2931
void radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs);
2933
#define RADV_FROM_HANDLE(__radv_type, __name, __handle) \
2934
VK_FROM_HANDLE(__radv_type, __name, __handle)
2936
VK_DEFINE_HANDLE_CASTS(radv_cmd_buffer, vk.base, VkCommandBuffer,
2937
VK_OBJECT_TYPE_COMMAND_BUFFER)
2938
VK_DEFINE_HANDLE_CASTS(radv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
2939
VK_DEFINE_HANDLE_CASTS(radv_instance, vk.base, VkInstance, VK_OBJECT_TYPE_INSTANCE)
2940
VK_DEFINE_HANDLE_CASTS(radv_physical_device, vk.base, VkPhysicalDevice,
2941
VK_OBJECT_TYPE_PHYSICAL_DEVICE)
2942
VK_DEFINE_HANDLE_CASTS(radv_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
2943
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_acceleration_structure, base,
2944
VkAccelerationStructureKHR,
2945
VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR)
2946
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_cmd_pool, vk.base, VkCommandPool,
2947
VK_OBJECT_TYPE_COMMAND_POOL)
2948
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_buffer, base, VkBuffer, VK_OBJECT_TYPE_BUFFER)
2949
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_buffer_view, base, VkBufferView,
2950
VK_OBJECT_TYPE_BUFFER_VIEW)
2951
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_pool, base, VkDescriptorPool,
2952
VK_OBJECT_TYPE_DESCRIPTOR_POOL)
2953
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_set, header.base, VkDescriptorSet,
2954
VK_OBJECT_TYPE_DESCRIPTOR_SET)
2955
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_set_layout, base,
2956
VkDescriptorSetLayout,
2957
VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
2958
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_update_template, base,
2959
VkDescriptorUpdateTemplate,
2960
VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE)
2961
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_device_memory, base, VkDeviceMemory,
2962
VK_OBJECT_TYPE_DEVICE_MEMORY)
2963
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
2964
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_image, base, VkImage, VK_OBJECT_TYPE_IMAGE)
2965
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_image_view, base, VkImageView,
2966
VK_OBJECT_TYPE_IMAGE_VIEW);
2967
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline_cache, base, VkPipelineCache,
2968
VK_OBJECT_TYPE_PIPELINE_CACHE)
2969
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline, base, VkPipeline,
2970
VK_OBJECT_TYPE_PIPELINE)
2971
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline_layout, base, VkPipelineLayout,
2972
VK_OBJECT_TYPE_PIPELINE_LAYOUT)
2973
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_query_pool, base, VkQueryPool,
2974
VK_OBJECT_TYPE_QUERY_POOL)
2975
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_render_pass, base, VkRenderPass,
2976
VK_OBJECT_TYPE_RENDER_PASS)
2977
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_sampler, base, VkSampler,
2978
VK_OBJECT_TYPE_SAMPLER)
2979
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_sampler_ycbcr_conversion, base,
2980
VkSamplerYcbcrConversion,
2981
VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION)
2987
#endif /* RADV_PRIVATE_H */