2
* Copyright © 2015 Intel Corporation
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
29
#include <sys/mkdev.h>
31
#ifdef MAJOR_IN_SYSMACROS
32
#include <sys/sysmacros.h>
38
#include "drm-uapi/drm_fourcc.h"
39
#include "drm-uapi/drm.h"
42
#include "anv_private.h"
43
#include "anv_measure.h"
44
#include "util/debug.h"
45
#include "util/build_id.h"
46
#include "util/disk_cache.h"
47
#include "util/mesa-sha1.h"
48
#include "util/os_file.h"
49
#include "util/os_misc.h"
50
#include "util/u_atomic.h"
51
#include "util/u_string.h"
52
#include "util/driconf.h"
55
#include "vk_deferred_operation.h"
56
#include "vk_drm_syncobj.h"
57
#include "common/intel_aux_map.h"
58
#include "common/intel_defines.h"
59
#include "common/intel_uuid.h"
60
#include "perf/intel_perf.h"
62
#include "genxml/gen7_pack.h"
63
#include "genxml/genX_bits.h"
65
static const driOptionDescription anv_dri_options[] = {
66
DRI_CONF_SECTION_PERFORMANCE
67
DRI_CONF_ADAPTIVE_SYNC(true)
68
DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
69
DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
70
DRI_CONF_VK_XWAYLAND_WAIT_READY(true)
71
DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS(false)
74
DRI_CONF_SECTION_DEBUG
75
DRI_CONF_ALWAYS_FLUSH_CACHE(false)
76
DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
80
/* This is probably far to big but it reflects the max size used for messages
81
* in OpenGLs KHR_debug.
83
#define MAX_DEBUG_MESSAGE_LENGTH 4096
85
/* Render engine timestamp register */
86
#define TIMESTAMP 0x2358
88
/* The "RAW" clocks on Linux are called "FAST" on FreeBSD */
89
#if !defined(CLOCK_MONOTONIC_RAW) && defined(CLOCK_MONOTONIC_FAST)
90
#define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC_FAST
94
compiler_debug_log(void *data, UNUSED unsigned *id, const char *fmt, ...)
96
char str[MAX_DEBUG_MESSAGE_LENGTH];
97
struct anv_device *device = (struct anv_device *)data;
98
UNUSED struct anv_instance *instance = device->physical->instance;
102
(void) vsnprintf(str, MAX_DEBUG_MESSAGE_LENGTH, fmt, args);
105
//vk_logd(VK_LOG_NO_OBJS(&instance->vk), "%s", str);
109
compiler_perf_log(UNUSED void *data, UNUSED unsigned *id, const char *fmt, ...)
114
if (INTEL_DEBUG(DEBUG_PERF))
115
mesa_logd_v(fmt, args);
120
#if defined(VK_USE_PLATFORM_WAYLAND_KHR) || \
121
defined(VK_USE_PLATFORM_XCB_KHR) || \
122
defined(VK_USE_PLATFORM_XLIB_KHR) || \
123
defined(VK_USE_PLATFORM_DISPLAY_KHR)
124
#define ANV_USE_WSI_PLATFORM
128
#define ANV_API_VERSION VK_MAKE_VERSION(1, 1, VK_HEADER_VERSION)
130
#define ANV_API_VERSION VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION)
133
VkResult anv_EnumerateInstanceVersion(
134
uint32_t* pApiVersion)
136
*pApiVersion = ANV_API_VERSION;
140
static const struct vk_instance_extension_table instance_extensions = {
141
.KHR_device_group_creation = true,
142
.KHR_external_fence_capabilities = true,
143
.KHR_external_memory_capabilities = true,
144
.KHR_external_semaphore_capabilities = true,
145
.KHR_get_physical_device_properties2 = true,
146
.EXT_debug_report = true,
147
.EXT_debug_utils = true,
149
#ifdef ANV_USE_WSI_PLATFORM
150
.KHR_get_surface_capabilities2 = true,
152
.KHR_surface_protected_capabilities = true,
154
#ifdef VK_USE_PLATFORM_WAYLAND_KHR
155
.KHR_wayland_surface = true,
157
#ifdef VK_USE_PLATFORM_XCB_KHR
158
.KHR_xcb_surface = true,
160
#ifdef VK_USE_PLATFORM_XLIB_KHR
161
.KHR_xlib_surface = true,
163
#ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
164
.EXT_acquire_xlib_display = true,
166
#ifdef VK_USE_PLATFORM_DISPLAY_KHR
168
.KHR_get_display_properties2 = true,
169
.EXT_direct_mode_display = true,
170
.EXT_display_surface_counter = true,
171
.EXT_acquire_drm_display = true,
176
get_device_extensions(const struct anv_physical_device *device,
177
struct vk_device_extension_table *ext)
179
const bool has_syncobj_wait =
180
(device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT) != 0;
182
const bool nv_mesh_shading_enabled =
183
env_var_as_boolean("ANV_EXPERIMENTAL_NV_MESH_SHADER", false);
185
*ext = (struct vk_device_extension_table) {
186
.KHR_8bit_storage = device->info.ver >= 8,
187
.KHR_16bit_storage = device->info.ver >= 8,
188
.KHR_bind_memory2 = true,
189
.KHR_buffer_device_address = device->has_a64_buffer_access,
190
.KHR_copy_commands2 = true,
191
.KHR_create_renderpass2 = true,
192
.KHR_dedicated_allocation = true,
193
.KHR_deferred_host_operations = true,
194
.KHR_depth_stencil_resolve = true,
195
.KHR_descriptor_update_template = true,
196
.KHR_device_group = true,
197
.KHR_draw_indirect_count = true,
198
.KHR_driver_properties = true,
199
.KHR_dynamic_rendering = true,
200
.KHR_external_fence = has_syncobj_wait,
201
.KHR_external_fence_fd = has_syncobj_wait,
202
.KHR_external_memory = true,
203
.KHR_external_memory_fd = true,
204
.KHR_external_semaphore = true,
205
.KHR_external_semaphore_fd = true,
206
.KHR_format_feature_flags2 = true,
207
.KHR_fragment_shading_rate = device->info.ver >= 11,
208
.KHR_get_memory_requirements2 = true,
209
.KHR_image_format_list = true,
210
.KHR_imageless_framebuffer = true,
211
#ifdef ANV_USE_WSI_PLATFORM
212
.KHR_incremental_present = true,
214
.KHR_maintenance1 = true,
215
.KHR_maintenance2 = true,
216
.KHR_maintenance3 = true,
217
.KHR_maintenance4 = true,
218
.KHR_multiview = true,
219
.KHR_performance_query =
220
!anv_use_relocations(device) && device->perf &&
221
(device->perf->i915_perf_version >= 3 ||
222
INTEL_DEBUG(DEBUG_NO_OACONFIG)) &&
223
device->use_call_secondary,
224
.KHR_pipeline_executable_properties = true,
225
.KHR_push_descriptor = true,
226
.KHR_ray_query = device->info.has_ray_tracing,
227
.KHR_relaxed_block_layout = true,
228
.KHR_sampler_mirror_clamp_to_edge = true,
229
.KHR_sampler_ycbcr_conversion = true,
230
.KHR_separate_depth_stencil_layouts = true,
231
.KHR_shader_atomic_int64 = device->info.ver >= 9,
232
.KHR_shader_clock = true,
233
.KHR_shader_draw_parameters = true,
234
.KHR_shader_float16_int8 = device->info.ver >= 8,
235
.KHR_shader_float_controls = device->info.ver >= 8,
236
.KHR_shader_integer_dot_product = true,
237
.KHR_shader_non_semantic_info = true,
238
.KHR_shader_subgroup_extended_types = device->info.ver >= 8,
239
.KHR_shader_subgroup_uniform_control_flow = true,
240
.KHR_shader_terminate_invocation = true,
241
.KHR_spirv_1_4 = true,
242
.KHR_storage_buffer_storage_class = true,
243
#ifdef ANV_USE_WSI_PLATFORM
244
.KHR_swapchain = true,
245
.KHR_swapchain_mutable_format = true,
247
.KHR_synchronization2 = true,
248
.KHR_timeline_semaphore = true,
249
.KHR_uniform_buffer_standard_layout = true,
250
.KHR_variable_pointers = true,
251
.KHR_vulkan_memory_model = true,
252
.KHR_workgroup_memory_explicit_layout = true,
253
.KHR_zero_initialize_workgroup_memory = true,
254
.EXT_4444_formats = true,
255
.EXT_buffer_device_address = device->has_a64_buffer_access,
256
.EXT_calibrated_timestamps = device->has_reg_timestamp,
257
.EXT_color_write_enable = true,
258
.EXT_conditional_rendering = device->info.verx10 >= 75,
259
.EXT_conservative_rasterization = device->info.ver >= 9,
260
.EXT_custom_border_color = device->info.ver >= 8,
261
.EXT_depth_clip_control = true,
262
.EXT_depth_clip_enable = true,
263
.EXT_descriptor_indexing = device->has_a64_buffer_access &&
264
device->has_bindless_images,
265
#ifdef VK_USE_PLATFORM_DISPLAY_KHR
266
.EXT_display_control = true,
268
.EXT_extended_dynamic_state = true,
269
.EXT_extended_dynamic_state2 = true,
270
.EXT_external_memory_dma_buf = true,
271
.EXT_external_memory_host = true,
272
.EXT_fragment_shader_interlock = device->info.ver >= 9,
273
.EXT_global_priority = device->max_context_priority >=
274
INTEL_CONTEXT_MEDIUM_PRIORITY,
275
.EXT_global_priority_query = device->max_context_priority >=
276
INTEL_CONTEXT_MEDIUM_PRIORITY,
277
.EXT_host_query_reset = true,
278
.EXT_image_2d_view_of_3d = true,
279
.EXT_image_robustness = true,
280
.EXT_image_drm_format_modifier = true,
281
.EXT_index_type_uint8 = true,
282
.EXT_inline_uniform_block = true,
283
.EXT_line_rasterization = true,
284
.EXT_memory_budget = device->sys.available,
285
.EXT_pci_bus_info = true,
286
.EXT_physical_device_drm = true,
287
.EXT_pipeline_creation_cache_control = true,
288
.EXT_pipeline_creation_feedback = true,
289
.EXT_post_depth_coverage = device->info.ver >= 9,
290
.EXT_primitive_topology_list_restart = true,
291
.EXT_private_data = true,
292
.EXT_provoking_vertex = true,
293
.EXT_queue_family_foreign = true,
294
.EXT_robustness2 = true,
295
.EXT_sample_locations = true,
296
.EXT_sampler_filter_minmax = device->info.ver >= 9,
297
.EXT_scalar_block_layout = true,
298
.EXT_separate_stencil_usage = true,
299
.EXT_shader_atomic_float = true,
300
.EXT_shader_atomic_float2 = device->info.ver >= 9,
301
.EXT_shader_demote_to_helper_invocation = true,
302
.EXT_shader_stencil_export = device->info.ver >= 9,
303
.EXT_shader_subgroup_ballot = true,
304
.EXT_shader_subgroup_vote = true,
305
.EXT_shader_viewport_index_layer = true,
306
.EXT_subgroup_size_control = true,
307
.EXT_texel_buffer_alignment = true,
308
.EXT_tooling_info = true,
309
.EXT_transform_feedback = true,
310
.EXT_vertex_attribute_divisor = true,
311
.EXT_ycbcr_image_arrays = true,
313
.ANDROID_external_memory_android_hardware_buffer = true,
314
.ANDROID_native_buffer = true,
316
.GOOGLE_decorate_string = true,
317
.GOOGLE_hlsl_functionality1 = true,
318
.GOOGLE_user_type = true,
319
.INTEL_performance_query = device->perf &&
320
device->perf->i915_perf_version >= 3,
321
.INTEL_shader_integer_functions2 = device->info.ver >= 8,
322
.EXT_multi_draw = true,
323
.NV_compute_shader_derivatives = true,
324
.NV_mesh_shader = device->info.has_mesh_shading &&
325
nv_mesh_shading_enabled,
326
.VALVE_mutable_descriptor_type = true,
331
anv_compute_sys_heap_size(struct anv_physical_device *device,
334
/* We don't want to burn too much ram with the GPU. If the user has 4GiB
335
* or less, we use at most half. If they have more than 4GiB, we use 3/4.
337
uint64_t available_ram;
338
if (total_ram <= 4ull * 1024ull * 1024ull * 1024ull)
339
available_ram = total_ram / 2;
341
available_ram = total_ram * 3 / 4;
343
/* We also want to leave some padding for things we allocate in the driver,
344
* so don't go over 3/4 of the GTT either.
346
available_ram = MIN2(available_ram, device->gtt_size * 3 / 4);
348
if (available_ram > (2ull << 30) && !device->supports_48bit_addresses) {
349
/* When running with an overridden PCI ID, we may get a GTT size from
350
* the kernel that is greater than 2 GiB but the execbuf check for 48bit
351
* address support can still fail. Just clamp the address space size to
352
* 2 GiB if we don't have 48-bit support.
354
mesa_logw("%s:%d: The kernel reported a GTT size larger than 2 GiB but "
355
"not support for 48-bit addresses",
357
available_ram = 2ull << 30;
360
return available_ram;
363
static VkResult MUST_CHECK
364
anv_gather_meminfo(struct anv_physical_device *device, int fd, bool update)
366
char sys_mem_regions[sizeof(struct drm_i915_query_memory_regions) +
367
sizeof(struct drm_i915_memory_region_info)];
369
struct drm_i915_query_memory_regions *mem_regions =
370
intel_i915_query_alloc(fd, DRM_I915_QUERY_MEMORY_REGIONS, NULL);
371
if (mem_regions == NULL) {
372
if (device->info.has_local_mem) {
373
return vk_errorf(device, VK_ERROR_INCOMPATIBLE_DRIVER,
374
"failed to memory regions: %m");
378
if (!os_get_total_physical_memory(&total_phys)) {
379
return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
380
"failed to get total physical memory: %m");
384
if (!os_get_available_system_memory(&available))
385
available = 0; /* Silently disable VK_EXT_memory_budget */
387
/* The kernel query failed. Fake it using OS memory queries. This
388
* should be roughly the same for integrated GPUs.
390
mem_regions = (void *)sys_mem_regions;
391
mem_regions->num_regions = 1;
392
mem_regions->regions[0] = (struct drm_i915_memory_region_info) {
393
.region.memory_class = I915_MEMORY_CLASS_SYSTEM,
394
.probed_size = total_phys,
395
.unallocated_size = available,
399
for(int i = 0; i < mem_regions->num_regions; i++) {
400
struct drm_i915_memory_region_info *info = &mem_regions->regions[i];
402
struct anv_memregion *region;
403
switch (info->region.memory_class) {
404
case I915_MEMORY_CLASS_SYSTEM:
405
region = &device->sys;
407
case I915_MEMORY_CLASS_DEVICE:
408
region = &device->vram;
411
/* We don't know what kind of memory this is */
415
uint64_t size = info->probed_size;
416
if (info->region.memory_class == I915_MEMORY_CLASS_SYSTEM)
417
size = anv_compute_sys_heap_size(device, size);
419
uint64_t available = MIN2(size, info->unallocated_size);
422
assert(region->region.memory_class == info->region.memory_class);
423
assert(region->region.memory_instance == info->region.memory_instance);
424
assert(region->size == size);
426
region->region = info->region;
429
region->available = available;
432
if (mem_regions != (void *)sys_mem_regions)
438
static VkResult MUST_CHECK
439
anv_init_meminfo(struct anv_physical_device *device, int fd)
441
return anv_gather_meminfo(device, fd, false);
445
anv_update_meminfo(struct anv_physical_device *device, int fd)
447
ASSERTED VkResult result = anv_gather_meminfo(device, fd, true);
448
assert(result == VK_SUCCESS);
453
anv_physical_device_init_heaps(struct anv_physical_device *device, int fd)
455
VkResult result = anv_init_meminfo(device, fd);
456
if (result != VK_SUCCESS)
459
assert(device->sys.size != 0);
461
if (device->vram.size > 0) {
462
/* We can create 2 different heaps when we have local memory support,
463
* first heap with local memory size and second with system memory size.
465
device->memory.heap_count = 2;
466
device->memory.heaps[0] = (struct anv_memory_heap) {
467
.size = device->vram.size,
468
.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
469
.is_local_mem = true,
471
device->memory.heaps[1] = (struct anv_memory_heap) {
472
.size = device->sys.size,
474
.is_local_mem = false,
477
device->memory.type_count = 3;
478
device->memory.types[0] = (struct anv_memory_type) {
479
.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
482
device->memory.types[1] = (struct anv_memory_type) {
483
.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
484
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
485
VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
488
device->memory.types[2] = (struct anv_memory_type) {
489
.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
490
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
491
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
494
} else if (device->info.has_llc) {
495
device->memory.heap_count = 1;
496
device->memory.heaps[0] = (struct anv_memory_heap) {
497
.size = device->sys.size,
498
.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
499
.is_local_mem = false,
502
/* Big core GPUs share LLC with the CPU and thus one memory type can be
503
* both cached and coherent at the same time.
505
device->memory.type_count = 1;
506
device->memory.types[0] = (struct anv_memory_type) {
507
.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
508
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
509
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
510
VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
514
device->memory.heap_count = 1;
515
device->memory.heaps[0] = (struct anv_memory_heap) {
516
.size = device->sys.size,
517
.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
518
.is_local_mem = false,
521
/* The spec requires that we expose a host-visible, coherent memory
522
* type, but Atom GPUs don't share LLC. Thus we offer two memory types
523
* to give the application a choice between cached, but not coherent and
524
* coherent but uncached (WC though).
526
device->memory.type_count = 2;
527
device->memory.types[0] = (struct anv_memory_type) {
528
.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
529
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
530
VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
533
device->memory.types[1] = (struct anv_memory_type) {
534
.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
535
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
536
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
541
device->memory.need_clflush = false;
542
for (unsigned i = 0; i < device->memory.type_count; i++) {
543
VkMemoryPropertyFlags props = device->memory.types[i].propertyFlags;
544
if ((props & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) &&
545
!(props & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
546
device->memory.need_clflush = true;
553
anv_physical_device_init_uuids(struct anv_physical_device *device)
555
const struct build_id_note *note =
556
build_id_find_nhdr_for_addr(anv_physical_device_init_uuids);
558
return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
559
"Failed to find build-id");
562
unsigned build_id_len = build_id_length(note);
563
if (build_id_len < 20) {
564
return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
565
"build-id too short. It needs to be a SHA");
568
memcpy(device->driver_build_sha1, build_id_data(note), 20);
570
struct mesa_sha1 sha1_ctx;
572
STATIC_ASSERT(VK_UUID_SIZE <= sizeof(sha1));
574
/* The pipeline cache UUID is used for determining when a pipeline cache is
575
* invalid. It needs both a driver build and the PCI ID of the device.
577
_mesa_sha1_init(&sha1_ctx);
578
_mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
579
_mesa_sha1_update(&sha1_ctx, &device->info.pci_device_id,
580
sizeof(device->info.pci_device_id));
581
_mesa_sha1_update(&sha1_ctx, &device->always_use_bindless,
582
sizeof(device->always_use_bindless));
583
_mesa_sha1_update(&sha1_ctx, &device->has_a64_buffer_access,
584
sizeof(device->has_a64_buffer_access));
585
_mesa_sha1_update(&sha1_ctx, &device->has_bindless_images,
586
sizeof(device->has_bindless_images));
587
_mesa_sha1_update(&sha1_ctx, &device->has_bindless_samplers,
588
sizeof(device->has_bindless_samplers));
589
_mesa_sha1_final(&sha1_ctx, sha1);
590
memcpy(device->pipeline_cache_uuid, sha1, VK_UUID_SIZE);
592
intel_uuid_compute_driver_id(device->driver_uuid, &device->info, VK_UUID_SIZE);
593
intel_uuid_compute_device_id(device->device_uuid, &device->info, VK_UUID_SIZE);
599
anv_physical_device_init_disk_cache(struct anv_physical_device *device)
601
#ifdef ENABLE_SHADER_CACHE
603
ASSERTED int len = snprintf(renderer, sizeof(renderer), "anv_%04x",
604
device->info.pci_device_id);
605
assert(len == sizeof(renderer) - 2);
608
_mesa_sha1_format(timestamp, device->driver_build_sha1);
610
const uint64_t driver_flags =
611
brw_get_compiler_config_value(device->compiler);
612
device->disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
614
device->disk_cache = NULL;
619
anv_physical_device_free_disk_cache(struct anv_physical_device *device)
621
#ifdef ENABLE_SHADER_CACHE
622
if (device->disk_cache)
623
disk_cache_destroy(device->disk_cache);
625
assert(device->disk_cache == NULL);
629
/* The ANV_QUEUE_OVERRIDE environment variable is a comma separated list of
632
* To override the number queues:
633
* * "gc" is for graphics queues with compute support
634
* * "g" is for graphics queues with no compute support
635
* * "c" is for compute queues with no graphics support
637
* For example, ANV_QUEUE_OVERRIDE=gc=2,c=1 would override the number of
638
* advertised queues to be 2 queues with graphics+compute support, and 1 queue
639
* with compute-only support.
641
* ANV_QUEUE_OVERRIDE=c=1 would override the number of advertised queues to
642
* include 1 queue with compute-only support, but it will not change the
643
* number of graphics+compute queues.
645
* ANV_QUEUE_OVERRIDE=gc=0,c=1 would override the number of advertised queues
646
* to include 1 queue with compute-only support, and it would override the
647
* number of graphics+compute queues to be 0.
650
anv_override_engine_counts(int *gc_count, int *g_count, int *c_count)
652
int gc_override = -1;
655
char *env = getenv("ANV_QUEUE_OVERRIDE");
662
char *next = strtok_r(env, ",", &save);
663
while (next != NULL) {
664
if (strncmp(next, "gc=", 3) == 0) {
665
gc_override = strtol(next + 3, NULL, 0);
666
} else if (strncmp(next, "g=", 2) == 0) {
667
g_override = strtol(next + 2, NULL, 0);
668
} else if (strncmp(next, "c=", 2) == 0) {
669
c_override = strtol(next + 2, NULL, 0);
671
mesa_logw("Ignoring unsupported ANV_QUEUE_OVERRIDE token: %s", next);
673
next = strtok_r(NULL, ",", &save);
676
if (gc_override >= 0)
677
*gc_count = gc_override;
679
*g_count = g_override;
680
if (*g_count > 0 && *gc_count <= 0 && (gc_override >= 0 || g_override >= 0))
681
mesa_logw("ANV_QUEUE_OVERRIDE: gc=0 with g > 0 violates the "
682
"Vulkan specification");
684
*c_count = c_override;
688
anv_physical_device_init_queue_families(struct anv_physical_device *pdevice)
690
uint32_t family_count = 0;
692
if (pdevice->engine_info) {
694
intel_gem_count_engines(pdevice->engine_info,
695
I915_ENGINE_CLASS_RENDER);
699
anv_override_engine_counts(&gc_count, &g_count, &c_count);
702
pdevice->queue.families[family_count++] = (struct anv_queue_family) {
703
.queueFlags = VK_QUEUE_GRAPHICS_BIT |
704
VK_QUEUE_COMPUTE_BIT |
705
VK_QUEUE_TRANSFER_BIT,
706
.queueCount = gc_count,
707
.engine_class = I915_ENGINE_CLASS_RENDER,
711
pdevice->queue.families[family_count++] = (struct anv_queue_family) {
712
.queueFlags = VK_QUEUE_GRAPHICS_BIT |
713
VK_QUEUE_TRANSFER_BIT,
714
.queueCount = g_count,
715
.engine_class = I915_ENGINE_CLASS_RENDER,
719
pdevice->queue.families[family_count++] = (struct anv_queue_family) {
720
.queueFlags = VK_QUEUE_COMPUTE_BIT |
721
VK_QUEUE_TRANSFER_BIT,
722
.queueCount = c_count,
723
.engine_class = I915_ENGINE_CLASS_RENDER,
726
/* Increase count below when other families are added as a reminder to
727
* increase the ANV_MAX_QUEUE_FAMILIES value.
729
STATIC_ASSERT(ANV_MAX_QUEUE_FAMILIES >= 3);
731
/* Default to a single render queue */
732
pdevice->queue.families[family_count++] = (struct anv_queue_family) {
733
.queueFlags = VK_QUEUE_GRAPHICS_BIT |
734
VK_QUEUE_COMPUTE_BIT |
735
VK_QUEUE_TRANSFER_BIT,
737
.engine_class = I915_ENGINE_CLASS_RENDER,
741
assert(family_count <= ANV_MAX_QUEUE_FAMILIES);
742
pdevice->queue.family_count = family_count;
746
anv_physical_device_try_create(struct anv_instance *instance,
747
drmDevicePtr drm_device,
748
struct anv_physical_device **device_out)
750
const char *primary_path = drm_device->nodes[DRM_NODE_PRIMARY];
751
const char *path = drm_device->nodes[DRM_NODE_RENDER];
756
brw_process_intel_debug_variable();
758
fd = open(path, O_RDWR | O_CLOEXEC);
760
if (errno == ENOMEM) {
761
return vk_errorf(instance, VK_ERROR_OUT_OF_HOST_MEMORY,
762
"Unable to open device %s: out of memory", path);
764
return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
765
"Unable to open device %s: %m", path);
768
struct intel_device_info devinfo;
769
if (!intel_get_device_info_from_fd(fd, &devinfo)) {
770
result = vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
774
bool is_alpha = true;
775
if (devinfo.platform == INTEL_PLATFORM_HSW) {
776
mesa_logw("Haswell Vulkan support is incomplete");
777
} else if (devinfo.platform == INTEL_PLATFORM_IVB) {
778
mesa_logw("Ivy Bridge Vulkan support is incomplete");
779
} else if (devinfo.platform == INTEL_PLATFORM_BYT) {
780
mesa_logw("Bay Trail Vulkan support is incomplete");
781
} else if (devinfo.ver >= 8 && devinfo.ver <= 12) {
782
/* Gfx8-12 fully supported */
785
result = vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
786
"Vulkan not yet supported on %s", devinfo.name);
790
struct anv_physical_device *device =
791
vk_zalloc(&instance->vk.alloc, sizeof(*device), 8,
792
VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
793
if (device == NULL) {
794
result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
798
struct vk_physical_device_dispatch_table dispatch_table;
799
vk_physical_device_dispatch_table_from_entrypoints(
800
&dispatch_table, &anv_physical_device_entrypoints, true);
801
vk_physical_device_dispatch_table_from_entrypoints(
802
&dispatch_table, &wsi_physical_device_entrypoints, false);
804
result = vk_physical_device_init(&device->vk, &instance->vk,
805
NULL, /* We set up extensions later */
807
if (result != VK_SUCCESS) {
808
vk_error(instance, result);
811
device->instance = instance;
813
assert(strlen(path) < ARRAY_SIZE(device->path));
814
snprintf(device->path, ARRAY_SIZE(device->path), "%s", path);
816
device->info = devinfo;
817
device->is_alpha = is_alpha;
819
device->cmd_parser_version = -1;
820
if (device->info.ver == 7) {
821
device->cmd_parser_version =
822
anv_gem_get_param(fd, I915_PARAM_CMD_PARSER_VERSION);
823
if (device->cmd_parser_version == -1) {
824
result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
825
"failed to get command parser version");
830
if (!anv_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT)) {
831
result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
832
"kernel missing gem wait");
836
if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2)) {
837
result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
838
"kernel missing execbuf2");
842
if (!device->info.has_llc &&
843
anv_gem_get_param(fd, I915_PARAM_MMAP_VERSION) < 1) {
844
result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
845
"kernel missing wc mmap");
849
device->use_relocations = device->info.ver < 8 ||
850
device->info.platform == INTEL_PLATFORM_CHV;
852
if (!device->use_relocations &&
853
!anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN)) {
854
result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
855
"kernel missing softpin");
859
if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_FENCE_ARRAY)) {
860
result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
861
"kernel missing syncobj support");
865
device->has_exec_async = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_ASYNC);
866
device->has_exec_capture = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_CAPTURE);
868
/* Start with medium; sorted low to high */
869
const int priorities[] = {
870
INTEL_CONTEXT_MEDIUM_PRIORITY,
871
INTEL_CONTEXT_HIGH_PRIORITY,
872
INTEL_CONTEXT_REALTIME_PRIORITY,
874
device->max_context_priority = INT_MIN;
875
for (unsigned i = 0; i < ARRAY_SIZE(priorities); i++) {
876
if (!anv_gem_has_context_priority(fd, priorities[i]))
878
device->max_context_priority = priorities[i];
881
device->gtt_size = device->info.gtt_size ? device->info.gtt_size :
882
device->info.aperture_bytes;
884
/* We only allow 48-bit addresses with softpin because knowing the actual
885
* address is required for the vertex cache flush workaround.
887
device->supports_48bit_addresses = (device->info.ver >= 8) &&
888
device->gtt_size > (4ULL << 30 /* GiB */);
890
/* Initialize memory regions struct to 0. */
891
memset(&device->vram, 0, sizeof(device->vram));
892
memset(&device->sys, 0, sizeof(device->sys));
894
result = anv_physical_device_init_heaps(device, fd);
895
if (result != VK_SUCCESS)
898
assert(device->supports_48bit_addresses == !device->use_relocations);
899
device->use_softpin = !device->use_relocations;
901
device->has_context_isolation =
902
anv_gem_get_param(fd, I915_PARAM_HAS_CONTEXT_ISOLATION);
904
device->has_exec_timeline =
905
anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_TIMELINE_FENCES);
906
if (env_var_as_boolean("ANV_QUEUE_THREAD_DISABLE", false))
907
device->has_exec_timeline = false;
911
device->sync_syncobj_type = vk_drm_syncobj_get_type(fd);
912
if (!device->has_exec_timeline)
913
device->sync_syncobj_type.features &= ~VK_SYNC_FEATURE_TIMELINE;
914
device->sync_types[st_idx++] = &device->sync_syncobj_type;
916
if (!(device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT))
917
device->sync_types[st_idx++] = &anv_bo_sync_type;
919
if (!(device->sync_syncobj_type.features & VK_SYNC_FEATURE_TIMELINE)) {
920
device->sync_timeline_type = vk_sync_timeline_get_type(&anv_bo_sync_type);
921
device->sync_types[st_idx++] = &device->sync_timeline_type.sync;
924
device->sync_types[st_idx++] = NULL;
925
assert(st_idx <= ARRAY_SIZE(device->sync_types));
926
device->vk.supported_sync_types = device->sync_types;
928
device->always_use_bindless =
929
env_var_as_boolean("ANV_ALWAYS_BINDLESS", false);
931
device->use_call_secondary =
932
device->use_softpin &&
933
!env_var_as_boolean("ANV_DISABLE_SECONDARY_CMD_BUFFER_CALLS", false);
935
/* We first got the A64 messages on broadwell and we can only use them if
936
* we can pass addresses directly into the shader which requires softpin.
938
device->has_a64_buffer_access = device->info.ver >= 8 &&
941
/* We first get bindless image access on Skylake.
943
device->has_bindless_images = device->info.ver >= 9;
945
/* We've had bindless samplers since Ivy Bridge (forever in Vulkan terms)
946
* because it's just a matter of setting the sampler address in the sample
947
* message header. However, we've not bothered to wire it up for vec4 so
948
* we leave it disabled on gfx7.
950
device->has_bindless_samplers = device->info.ver >= 8;
952
device->has_implicit_ccs = device->info.has_aux_map ||
953
device->info.verx10 >= 125;
955
/* Check if we can read the GPU timestamp register from the CPU */
957
device->has_reg_timestamp = anv_gem_reg_read(fd, TIMESTAMP | I915_REG_READ_8B_WA,
960
device->always_flush_cache = INTEL_DEBUG(DEBUG_SYNC) ||
961
driQueryOptionb(&instance->dri_options, "always_flush_cache");
963
device->has_mmap_offset =
964
anv_gem_get_param(fd, I915_PARAM_MMAP_GTT_VERSION) >= 4;
966
device->has_userptr_probe =
967
anv_gem_get_param(fd, I915_PARAM_HAS_USERPTR_PROBE);
969
device->compiler = brw_compiler_create(NULL, &device->info);
970
if (device->compiler == NULL) {
971
result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
974
device->compiler->shader_debug_log = compiler_debug_log;
975
device->compiler->shader_perf_log = compiler_perf_log;
976
device->compiler->constant_buffer_0_is_relative =
977
device->info.ver < 8 || !device->has_context_isolation;
978
device->compiler->supports_shader_constants = true;
979
device->compiler->indirect_ubos_use_sampler = device->info.ver < 12;
981
isl_device_init(&device->isl_dev, &device->info);
983
result = anv_physical_device_init_uuids(device);
984
if (result != VK_SUCCESS)
987
anv_physical_device_init_disk_cache(device);
989
if (instance->vk.enabled_extensions.KHR_display) {
990
master_fd = open(primary_path, O_RDWR | O_CLOEXEC);
991
if (master_fd >= 0) {
992
/* prod the device with a GETPARAM call which will fail if
993
* we don't have permission to even render on this device
995
if (anv_gem_get_param(master_fd, I915_PARAM_CHIPSET_ID) == 0) {
1001
device->master_fd = master_fd;
1003
device->engine_info = anv_gem_get_engine_info(fd);
1004
anv_physical_device_init_queue_families(device);
1006
device->local_fd = fd;
1008
result = anv_init_wsi(device);
1009
if (result != VK_SUCCESS)
1010
goto fail_engine_info;
1012
anv_physical_device_init_perf(device, fd);
1014
anv_measure_device_init(device);
1016
get_device_extensions(device, &device->vk.supported_extensions);
1018
anv_genX(&device->info, init_physical_device_state)(device);
1020
*device_out = device;
1024
if (stat(primary_path, &st) == 0) {
1025
device->has_master = true;
1026
device->master_major = major(st.st_rdev);
1027
device->master_minor = minor(st.st_rdev);
1029
device->has_master = false;
1030
device->master_major = 0;
1031
device->master_minor = 0;
1034
if (stat(path, &st) == 0) {
1035
device->has_local = true;
1036
device->local_major = major(st.st_rdev);
1037
device->local_minor = minor(st.st_rdev);
1039
device->has_local = false;
1040
device->local_major = 0;
1041
device->local_minor = 0;
1047
free(device->engine_info);
1048
anv_physical_device_free_disk_cache(device);
1050
ralloc_free(device->compiler);
1052
vk_physical_device_finish(&device->vk);
1054
vk_free(&instance->vk.alloc, device);
1057
if (master_fd != -1)
1063
anv_physical_device_destroy(struct anv_physical_device *device)
1065
anv_finish_wsi(device);
1066
anv_measure_device_destroy(device);
1067
free(device->engine_info);
1068
anv_physical_device_free_disk_cache(device);
1069
ralloc_free(device->compiler);
1070
ralloc_free(device->perf);
1071
close(device->local_fd);
1072
if (device->master_fd >= 0)
1073
close(device->master_fd);
1074
vk_physical_device_finish(&device->vk);
1075
vk_free(&device->instance->vk.alloc, device);
1078
VkResult anv_EnumerateInstanceExtensionProperties(
1079
const char* pLayerName,
1080
uint32_t* pPropertyCount,
1081
VkExtensionProperties* pProperties)
1084
return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
1086
return vk_enumerate_instance_extension_properties(
1087
&instance_extensions, pPropertyCount, pProperties);
1091
anv_init_dri_options(struct anv_instance *instance)
1093
driParseOptionInfo(&instance->available_dri_options, anv_dri_options,
1094
ARRAY_SIZE(anv_dri_options));
1095
driParseConfigFiles(&instance->dri_options,
1096
&instance->available_dri_options, 0, "anv", NULL, NULL,
1097
instance->vk.app_info.app_name,
1098
instance->vk.app_info.app_version,
1099
instance->vk.app_info.engine_name,
1100
instance->vk.app_info.engine_version);
1102
instance->assume_full_subgroups =
1103
driQueryOptionb(&instance->dri_options, "anv_assume_full_subgroups");
1106
VkResult anv_CreateInstance(
1107
const VkInstanceCreateInfo* pCreateInfo,
1108
const VkAllocationCallbacks* pAllocator,
1109
VkInstance* pInstance)
1111
struct anv_instance *instance;
1114
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
1116
if (pAllocator == NULL)
1117
pAllocator = vk_default_allocator();
1119
instance = vk_alloc(pAllocator, sizeof(*instance), 8,
1120
VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1122
return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
1124
struct vk_instance_dispatch_table dispatch_table;
1125
vk_instance_dispatch_table_from_entrypoints(
1126
&dispatch_table, &anv_instance_entrypoints, true);
1127
vk_instance_dispatch_table_from_entrypoints(
1128
&dispatch_table, &wsi_instance_entrypoints, false);
1130
result = vk_instance_init(&instance->vk, &instance_extensions,
1131
&dispatch_table, pCreateInfo, pAllocator);
1132
if (result != VK_SUCCESS) {
1133
vk_free(pAllocator, instance);
1134
return vk_error(NULL, result);
1137
instance->physical_devices_enumerated = false;
1138
list_inithead(&instance->physical_devices);
1140
instance->pipeline_cache_enabled =
1141
env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true);
1143
VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
1145
anv_init_dri_options(instance);
1147
intel_driver_ds_init();
1149
*pInstance = anv_instance_to_handle(instance);
1154
void anv_DestroyInstance(
1155
VkInstance _instance,
1156
const VkAllocationCallbacks* pAllocator)
1158
ANV_FROM_HANDLE(anv_instance, instance, _instance);
1163
list_for_each_entry_safe(struct anv_physical_device, pdevice,
1164
&instance->physical_devices, link)
1165
anv_physical_device_destroy(pdevice);
1167
VG(VALGRIND_DESTROY_MEMPOOL(instance));
1169
driDestroyOptionCache(&instance->dri_options);
1170
driDestroyOptionInfo(&instance->available_dri_options);
1172
vk_instance_finish(&instance->vk);
1173
vk_free(&instance->vk.alloc, instance);
1177
anv_enumerate_physical_devices(struct anv_instance *instance)
1179
if (instance->physical_devices_enumerated)
1182
instance->physical_devices_enumerated = true;
1184
/* TODO: Check for more devices ? */
1185
drmDevicePtr devices[8];
1188
max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
1189
if (max_devices < 1)
1192
VkResult result = VK_SUCCESS;
1193
for (unsigned i = 0; i < (unsigned)max_devices; i++) {
1194
if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
1195
devices[i]->bustype == DRM_BUS_PCI &&
1196
devices[i]->deviceinfo.pci->vendor_id == 0x8086) {
1198
struct anv_physical_device *pdevice;
1199
result = anv_physical_device_try_create(instance, devices[i],
1201
/* Incompatible DRM device, skip. */
1202
if (result == VK_ERROR_INCOMPATIBLE_DRIVER) {
1203
result = VK_SUCCESS;
1207
/* Error creating the physical device, report the error. */
1208
if (result != VK_SUCCESS)
1211
list_addtail(&pdevice->link, &instance->physical_devices);
1214
drmFreeDevices(devices, max_devices);
1216
/* If we successfully enumerated any devices, call it success */
1220
VkResult anv_EnumeratePhysicalDevices(
1221
VkInstance _instance,
1222
uint32_t* pPhysicalDeviceCount,
1223
VkPhysicalDevice* pPhysicalDevices)
1225
ANV_FROM_HANDLE(anv_instance, instance, _instance);
1226
VK_OUTARRAY_MAKE_TYPED(VkPhysicalDevice, out,
1227
pPhysicalDevices, pPhysicalDeviceCount);
1229
VkResult result = anv_enumerate_physical_devices(instance);
1230
if (result != VK_SUCCESS)
1233
list_for_each_entry(struct anv_physical_device, pdevice,
1234
&instance->physical_devices, link) {
1235
vk_outarray_append_typed(VkPhysicalDevice, &out, i) {
1236
*i = anv_physical_device_to_handle(pdevice);
1240
return vk_outarray_status(&out);
1243
VkResult anv_EnumeratePhysicalDeviceGroups(
1244
VkInstance _instance,
1245
uint32_t* pPhysicalDeviceGroupCount,
1246
VkPhysicalDeviceGroupProperties* pPhysicalDeviceGroupProperties)
1248
ANV_FROM_HANDLE(anv_instance, instance, _instance);
1249
VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceGroupProperties, out,
1250
pPhysicalDeviceGroupProperties,
1251
pPhysicalDeviceGroupCount);
1253
VkResult result = anv_enumerate_physical_devices(instance);
1254
if (result != VK_SUCCESS)
1257
list_for_each_entry(struct anv_physical_device, pdevice,
1258
&instance->physical_devices, link) {
1259
vk_outarray_append_typed(VkPhysicalDeviceGroupProperties, &out, p) {
1260
p->physicalDeviceCount = 1;
1261
memset(p->physicalDevices, 0, sizeof(p->physicalDevices));
1262
p->physicalDevices[0] = anv_physical_device_to_handle(pdevice);
1263
p->subsetAllocation = false;
1265
vk_foreach_struct(ext, p->pNext)
1266
anv_debug_ignored_stype(ext->sType);
1270
return vk_outarray_status(&out);
1273
void anv_GetPhysicalDeviceFeatures(
1274
VkPhysicalDevice physicalDevice,
1275
VkPhysicalDeviceFeatures* pFeatures)
1277
ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
1279
/* Just pick one; they're all the same */
1280
const bool has_astc_ldr =
1281
isl_format_supports_sampling(&pdevice->info,
1282
ISL_FORMAT_ASTC_LDR_2D_4X4_FLT16);
1284
*pFeatures = (VkPhysicalDeviceFeatures) {
1285
.robustBufferAccess = true,
1286
.fullDrawIndexUint32 = true,
1287
.imageCubeArray = true,
1288
.independentBlend = true,
1289
.geometryShader = true,
1290
.tessellationShader = true,
1291
.sampleRateShading = true,
1292
.dualSrcBlend = true,
1294
.multiDrawIndirect = true,
1295
.drawIndirectFirstInstance = true,
1297
.depthBiasClamp = true,
1298
.fillModeNonSolid = true,
1299
.depthBounds = pdevice->info.ver >= 12,
1301
.largePoints = true,
1303
.multiViewport = true,
1304
.samplerAnisotropy = true,
1305
.textureCompressionETC2 = pdevice->info.ver >= 8 ||
1306
pdevice->info.platform == INTEL_PLATFORM_BYT,
1307
.textureCompressionASTC_LDR = has_astc_ldr,
1308
.textureCompressionBC = true,
1309
.occlusionQueryPrecise = true,
1310
.pipelineStatisticsQuery = true,
1311
.fragmentStoresAndAtomics = true,
1312
.shaderTessellationAndGeometryPointSize = true,
1313
.shaderImageGatherExtended = true,
1314
.shaderStorageImageExtendedFormats = true,
1315
.shaderStorageImageMultisample = false,
1316
.shaderStorageImageReadWithoutFormat = false,
1317
.shaderStorageImageWriteWithoutFormat = true,
1318
.shaderUniformBufferArrayDynamicIndexing = true,
1319
.shaderSampledImageArrayDynamicIndexing = true,
1320
.shaderStorageBufferArrayDynamicIndexing = true,
1321
.shaderStorageImageArrayDynamicIndexing = true,
1322
.shaderClipDistance = true,
1323
.shaderCullDistance = true,
1324
.shaderFloat64 = pdevice->info.ver >= 8 &&
1325
pdevice->info.has_64bit_float,
1326
.shaderInt64 = pdevice->info.ver >= 8,
1327
.shaderInt16 = pdevice->info.ver >= 8,
1328
.shaderResourceMinLod = pdevice->info.ver >= 9,
1329
.variableMultisampleRate = true,
1330
.inheritedQueries = true,
1333
/* We can't do image stores in vec4 shaders */
1334
pFeatures->vertexPipelineStoresAndAtomics =
1335
pdevice->compiler->scalar_stage[MESA_SHADER_VERTEX] &&
1336
pdevice->compiler->scalar_stage[MESA_SHADER_GEOMETRY];
1338
struct vk_app_info *app_info = &pdevice->instance->vk.app_info;
1340
/* The new DOOM and Wolfenstein games require depthBounds without
1341
* checking for it. They seem to run fine without it so just claim it's
1342
* there and accept the consequences.
1344
if (app_info->engine_name && strcmp(app_info->engine_name, "idTech") == 0)
1345
pFeatures->depthBounds = true;
1349
anv_get_physical_device_features_1_1(struct anv_physical_device *pdevice,
1350
VkPhysicalDeviceVulkan11Features *f)
1352
assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES);
1354
f->storageBuffer16BitAccess = pdevice->info.ver >= 8;
1355
f->uniformAndStorageBuffer16BitAccess = pdevice->info.ver >= 8;
1356
f->storagePushConstant16 = pdevice->info.ver >= 8;
1357
f->storageInputOutput16 = false;
1358
f->multiview = true;
1359
f->multiviewGeometryShader = true;
1360
f->multiviewTessellationShader = true;
1361
f->variablePointersStorageBuffer = true;
1362
f->variablePointers = true;
1363
f->protectedMemory = false;
1364
f->samplerYcbcrConversion = true;
1365
f->shaderDrawParameters = true;
1369
anv_get_physical_device_features_1_2(struct anv_physical_device *pdevice,
1370
VkPhysicalDeviceVulkan12Features *f)
1372
assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES);
1374
f->samplerMirrorClampToEdge = true;
1375
f->drawIndirectCount = true;
1376
f->storageBuffer8BitAccess = pdevice->info.ver >= 8;
1377
f->uniformAndStorageBuffer8BitAccess = pdevice->info.ver >= 8;
1378
f->storagePushConstant8 = pdevice->info.ver >= 8;
1379
f->shaderBufferInt64Atomics = pdevice->info.ver >= 9;
1380
f->shaderSharedInt64Atomics = false;
1381
f->shaderFloat16 = pdevice->info.ver >= 8;
1382
f->shaderInt8 = pdevice->info.ver >= 8;
1384
bool descIndexing = pdevice->has_a64_buffer_access &&
1385
pdevice->has_bindless_images;
1386
f->descriptorIndexing = descIndexing;
1387
f->shaderInputAttachmentArrayDynamicIndexing = false;
1388
f->shaderUniformTexelBufferArrayDynamicIndexing = descIndexing;
1389
f->shaderStorageTexelBufferArrayDynamicIndexing = descIndexing;
1390
f->shaderUniformBufferArrayNonUniformIndexing = descIndexing;
1391
f->shaderSampledImageArrayNonUniformIndexing = descIndexing;
1392
f->shaderStorageBufferArrayNonUniformIndexing = descIndexing;
1393
f->shaderStorageImageArrayNonUniformIndexing = descIndexing;
1394
f->shaderInputAttachmentArrayNonUniformIndexing = false;
1395
f->shaderUniformTexelBufferArrayNonUniformIndexing = descIndexing;
1396
f->shaderStorageTexelBufferArrayNonUniformIndexing = descIndexing;
1397
f->descriptorBindingUniformBufferUpdateAfterBind = descIndexing;
1398
f->descriptorBindingSampledImageUpdateAfterBind = descIndexing;
1399
f->descriptorBindingStorageImageUpdateAfterBind = descIndexing;
1400
f->descriptorBindingStorageBufferUpdateAfterBind = descIndexing;
1401
f->descriptorBindingUniformTexelBufferUpdateAfterBind = descIndexing;
1402
f->descriptorBindingStorageTexelBufferUpdateAfterBind = descIndexing;
1403
f->descriptorBindingUpdateUnusedWhilePending = descIndexing;
1404
f->descriptorBindingPartiallyBound = descIndexing;
1405
f->descriptorBindingVariableDescriptorCount = descIndexing;
1406
f->runtimeDescriptorArray = descIndexing;
1408
f->samplerFilterMinmax = pdevice->info.ver >= 9;
1409
f->scalarBlockLayout = true;
1410
f->imagelessFramebuffer = true;
1411
f->uniformBufferStandardLayout = true;
1412
f->shaderSubgroupExtendedTypes = true;
1413
f->separateDepthStencilLayouts = true;
1414
f->hostQueryReset = true;
1415
f->timelineSemaphore = true;
1416
f->bufferDeviceAddress = pdevice->has_a64_buffer_access;
1417
f->bufferDeviceAddressCaptureReplay = pdevice->has_a64_buffer_access;
1418
f->bufferDeviceAddressMultiDevice = false;
1419
f->vulkanMemoryModel = true;
1420
f->vulkanMemoryModelDeviceScope = true;
1421
f->vulkanMemoryModelAvailabilityVisibilityChains = true;
1422
f->shaderOutputViewportIndex = true;
1423
f->shaderOutputLayer = true;
1424
f->subgroupBroadcastDynamicId = true;
1428
anv_get_physical_device_features_1_3(struct anv_physical_device *pdevice,
1429
VkPhysicalDeviceVulkan13Features *f)
1431
assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES);
1433
f->robustImageAccess = true;
1434
f->inlineUniformBlock = true;
1435
f->descriptorBindingInlineUniformBlockUpdateAfterBind = true;
1436
f->pipelineCreationCacheControl = true;
1437
f->privateData = true;
1438
f->shaderDemoteToHelperInvocation = true;
1439
f->shaderTerminateInvocation = true;
1440
f->subgroupSizeControl = true;
1441
f->computeFullSubgroups = true;
1442
f->synchronization2 = true;
1443
f->textureCompressionASTC_HDR = false;
1444
f->shaderZeroInitializeWorkgroupMemory = true;
1445
f->dynamicRendering = true;
1446
f->shaderIntegerDotProduct = true;
1447
f->maintenance4 = true;
1450
void anv_GetPhysicalDeviceFeatures2(
1451
VkPhysicalDevice physicalDevice,
1452
VkPhysicalDeviceFeatures2* pFeatures)
1454
ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
1455
anv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
1457
VkPhysicalDeviceVulkan11Features core_1_1 = {
1458
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES,
1460
anv_get_physical_device_features_1_1(pdevice, &core_1_1);
1462
VkPhysicalDeviceVulkan12Features core_1_2 = {
1463
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
1465
anv_get_physical_device_features_1_2(pdevice, &core_1_2);
1467
VkPhysicalDeviceVulkan13Features core_1_3 = {
1468
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES,
1470
anv_get_physical_device_features_1_3(pdevice, &core_1_3);
1472
vk_foreach_struct(ext, pFeatures->pNext) {
1473
if (vk_get_physical_device_core_1_1_feature_ext(ext, &core_1_1))
1475
if (vk_get_physical_device_core_1_2_feature_ext(ext, &core_1_2))
1477
if (vk_get_physical_device_core_1_3_feature_ext(ext, &core_1_3))
1480
switch (ext->sType) {
1481
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT: {
1482
VkPhysicalDevice4444FormatsFeaturesEXT *features =
1483
(VkPhysicalDevice4444FormatsFeaturesEXT *)ext;
1484
features->formatA4R4G4B4 = true;
1485
features->formatA4B4G4R4 = false;
1489
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR: {
1490
VkPhysicalDeviceAccelerationStructureFeaturesKHR *features = (void *)ext;
1491
features->accelerationStructure = false;
1492
features->accelerationStructureCaptureReplay = false;
1493
features->accelerationStructureIndirectBuild = false;
1494
features->accelerationStructureHostCommands = false;
1495
features->descriptorBindingAccelerationStructureUpdateAfterBind = true;
1499
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_EXT: {
1500
VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *features = (void *)ext;
1501
features->bufferDeviceAddress = pdevice->has_a64_buffer_access;
1502
features->bufferDeviceAddressCaptureReplay = false;
1503
features->bufferDeviceAddressMultiDevice = false;
1508
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COLOR_WRITE_ENABLE_FEATURES_EXT: {
1509
VkPhysicalDeviceColorWriteEnableFeaturesEXT *features =
1510
(VkPhysicalDeviceColorWriteEnableFeaturesEXT *)ext;
1511
features->colorWriteEnable = true;
1515
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_2D_VIEW_OF_3D_FEATURES_EXT: {
1516
VkPhysicalDeviceImage2DViewOf3DFeaturesEXT *features =
1517
(VkPhysicalDeviceImage2DViewOf3DFeaturesEXT *)ext;
1518
features->image2DViewOf3D = true;
1519
features->sampler2DViewOf3D = pdevice->info.ver >= 9;
1523
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV: {
1524
VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *features =
1525
(VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *)ext;
1526
features->computeDerivativeGroupQuads = true;
1527
features->computeDerivativeGroupLinear = true;
1531
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: {
1532
VkPhysicalDeviceConditionalRenderingFeaturesEXT *features =
1533
(VkPhysicalDeviceConditionalRenderingFeaturesEXT*)ext;
1534
features->conditionalRendering = pdevice->info.verx10 >= 75;
1535
features->inheritedConditionalRendering = pdevice->info.verx10 >= 75;
1539
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
1540
VkPhysicalDeviceCustomBorderColorFeaturesEXT *features =
1541
(VkPhysicalDeviceCustomBorderColorFeaturesEXT *)ext;
1542
features->customBorderColors = pdevice->info.ver >= 8;
1543
features->customBorderColorWithoutFormat = pdevice->info.ver >= 8;
1547
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT: {
1548
VkPhysicalDeviceDepthClipEnableFeaturesEXT *features =
1549
(VkPhysicalDeviceDepthClipEnableFeaturesEXT *)ext;
1550
features->depthClipEnable = true;
1554
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES_KHR: {
1555
VkPhysicalDeviceDynamicRenderingFeaturesKHR *features =
1556
(VkPhysicalDeviceDynamicRenderingFeaturesKHR *)ext;
1557
features->dynamicRendering = true;
1561
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT: {
1562
VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *features =
1563
(VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *)ext;
1564
features->fragmentShaderSampleInterlock = pdevice->info.ver >= 9;
1565
features->fragmentShaderPixelInterlock = pdevice->info.ver >= 9;
1566
features->fragmentShaderShadingRateInterlock = false;
1570
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GLOBAL_PRIORITY_QUERY_FEATURES_EXT: {
1571
VkPhysicalDeviceGlobalPriorityQueryFeaturesEXT *features =
1572
(VkPhysicalDeviceGlobalPriorityQueryFeaturesEXT *)ext;
1573
features->globalPriorityQuery = true;
1577
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR: {
1578
VkPhysicalDeviceFragmentShadingRateFeaturesKHR *features =
1579
(VkPhysicalDeviceFragmentShadingRateFeaturesKHR *)ext;
1580
features->attachmentFragmentShadingRate = false;
1581
features->pipelineFragmentShadingRate = true;
1582
features->primitiveFragmentShadingRate =
1583
pdevice->info.has_coarse_pixel_primitive_and_cb;
1584
features->attachmentFragmentShadingRate =
1585
pdevice->info.has_coarse_pixel_primitive_and_cb;
1589
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT: {
1590
VkPhysicalDeviceIndexTypeUint8FeaturesEXT *features =
1591
(VkPhysicalDeviceIndexTypeUint8FeaturesEXT *)ext;
1592
features->indexTypeUint8 = true;
1596
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT: {
1597
VkPhysicalDeviceLineRasterizationFeaturesEXT *features =
1598
(VkPhysicalDeviceLineRasterizationFeaturesEXT *)ext;
1599
/* Rectangular lines must use the strict algorithm, which is not
1600
* supported for wide lines prior to ICL. See rasterization_mode for
1601
* details and how the HW states are programmed.
1603
features->rectangularLines = pdevice->info.ver >= 10;
1604
features->bresenhamLines = true;
1605
/* Support for Smooth lines with MSAA was removed on gfx11. From the
1606
* BSpec section "Multisample ModesState" table for "AA Line Support
1609
* GFX10:BUG:######## NUM_MULTISAMPLES == 1
1611
* Fortunately, this isn't a case most people care about.
1613
features->smoothLines = pdevice->info.ver < 10;
1614
features->stippledRectangularLines = false;
1615
features->stippledBresenhamLines = true;
1616
features->stippledSmoothLines = false;
1620
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_4_FEATURES_KHR: {
1621
VkPhysicalDeviceMaintenance4FeaturesKHR *features =
1622
(VkPhysicalDeviceMaintenance4FeaturesKHR *)ext;
1623
features->maintenance4 = true;
1627
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_FEATURES_NV: {
1628
VkPhysicalDeviceMeshShaderFeaturesNV *features =
1629
(VkPhysicalDeviceMeshShaderFeaturesNV *)ext;
1630
features->taskShader = pdevice->vk.supported_extensions.NV_mesh_shader;
1631
features->meshShader = pdevice->vk.supported_extensions.NV_mesh_shader;
1635
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_VALVE: {
1636
VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *features =
1637
(VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *)ext;
1638
features->mutableDescriptorType = true;
1642
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_FEATURES_KHR: {
1643
VkPhysicalDevicePerformanceQueryFeaturesKHR *feature =
1644
(VkPhysicalDevicePerformanceQueryFeaturesKHR *)ext;
1645
feature->performanceCounterQueryPools = true;
1646
/* HW only supports a single configuration at a time. */
1647
feature->performanceCounterMultipleQueryPools = false;
1651
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR: {
1652
VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *features =
1653
(VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *)ext;
1654
features->pipelineExecutableInfo = true;
1658
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT: {
1659
VkPhysicalDeviceProvokingVertexFeaturesEXT *features =
1660
(VkPhysicalDeviceProvokingVertexFeaturesEXT *)ext;
1661
features->provokingVertexLast = true;
1662
features->transformFeedbackPreservesProvokingVertex = true;
1666
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR: {
1667
VkPhysicalDeviceRayQueryFeaturesKHR *features = (void *)ext;
1668
features->rayQuery = pdevice->info.has_ray_tracing;
1672
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
1673
VkPhysicalDeviceRobustness2FeaturesEXT *features = (void *)ext;
1674
features->robustBufferAccess2 = true;
1675
features->robustImageAccess2 = true;
1676
features->nullDescriptor = true;
1680
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT: {
1681
VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *features = (void *)ext;
1682
features->shaderBufferFloat32Atomics = true;
1683
features->shaderBufferFloat32AtomicAdd = pdevice->info.has_lsc;
1684
features->shaderBufferFloat64Atomics =
1685
pdevice->info.has_64bit_float && pdevice->info.has_lsc;
1686
features->shaderBufferFloat64AtomicAdd = false;
1687
features->shaderSharedFloat32Atomics = true;
1688
features->shaderSharedFloat32AtomicAdd = false;
1689
features->shaderSharedFloat64Atomics = false;
1690
features->shaderSharedFloat64AtomicAdd = false;
1691
features->shaderImageFloat32Atomics = true;
1692
features->shaderImageFloat32AtomicAdd = false;
1693
features->sparseImageFloat32Atomics = false;
1694
features->sparseImageFloat32AtomicAdd = false;
1698
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_2_FEATURES_EXT: {
1699
VkPhysicalDeviceShaderAtomicFloat2FeaturesEXT *features = (void *)ext;
1700
features->shaderBufferFloat16Atomics = false;
1701
features->shaderBufferFloat16AtomicAdd = false;
1702
features->shaderBufferFloat16AtomicMinMax = false;
1703
features->shaderBufferFloat32AtomicMinMax = pdevice->info.ver >= 9;
1704
features->shaderBufferFloat64AtomicMinMax =
1705
pdevice->info.has_64bit_float && pdevice->info.has_lsc;
1706
features->shaderSharedFloat16Atomics = false;
1707
features->shaderSharedFloat16AtomicAdd = false;
1708
features->shaderSharedFloat16AtomicMinMax = false;
1709
features->shaderSharedFloat32AtomicMinMax = pdevice->info.ver >= 9;
1710
features->shaderSharedFloat64AtomicMinMax = false;
1711
features->shaderImageFloat32AtomicMinMax = false;
1712
features->sparseImageFloat32AtomicMinMax = false;
1716
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CLOCK_FEATURES_KHR: {
1717
VkPhysicalDeviceShaderClockFeaturesKHR *features =
1718
(VkPhysicalDeviceShaderClockFeaturesKHR *)ext;
1719
features->shaderSubgroupClock = true;
1720
features->shaderDeviceClock = false;
1724
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_INTEGER_FUNCTIONS_2_FEATURES_INTEL: {
1725
VkPhysicalDeviceShaderIntegerFunctions2FeaturesINTEL *features =
1726
(VkPhysicalDeviceShaderIntegerFunctions2FeaturesINTEL *)ext;
1727
features->shaderIntegerFunctions2 = true;
1731
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_UNIFORM_CONTROL_FLOW_FEATURES_KHR: {
1732
VkPhysicalDeviceShaderSubgroupUniformControlFlowFeaturesKHR *features =
1733
(VkPhysicalDeviceShaderSubgroupUniformControlFlowFeaturesKHR *)ext;
1734
features->shaderSubgroupUniformControlFlow = true;
1738
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT: {
1739
VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *features =
1740
(VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *)ext;
1741
features->texelBufferAlignment = true;
1745
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: {
1746
VkPhysicalDeviceTransformFeedbackFeaturesEXT *features =
1747
(VkPhysicalDeviceTransformFeedbackFeaturesEXT *)ext;
1748
features->transformFeedback = true;
1749
features->geometryStreams = true;
1753
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: {
1754
VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features =
1755
(VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext;
1756
features->vertexAttributeInstanceRateDivisor = true;
1757
features->vertexAttributeInstanceRateZeroDivisor = true;
1761
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR: {
1762
VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR *features =
1763
(VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR *)ext;
1764
features->workgroupMemoryExplicitLayout = true;
1765
features->workgroupMemoryExplicitLayoutScalarBlockLayout = true;
1766
features->workgroupMemoryExplicitLayout8BitAccess = true;
1767
features->workgroupMemoryExplicitLayout16BitAccess = true;
1771
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_YCBCR_IMAGE_ARRAYS_FEATURES_EXT: {
1772
VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *features =
1773
(VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *)ext;
1774
features->ycbcrImageArrays = true;
1778
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT: {
1779
VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *features =
1780
(VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *)ext;
1781
features->extendedDynamicState = true;
1785
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_2_FEATURES_EXT: {
1786
VkPhysicalDeviceExtendedDynamicState2FeaturesEXT *features =
1787
(VkPhysicalDeviceExtendedDynamicState2FeaturesEXT *)ext;
1788
features->extendedDynamicState2 = true;
1789
features->extendedDynamicState2LogicOp = true;
1790
features->extendedDynamicState2PatchControlPoints = false;
1794
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_FEATURES_EXT: {
1795
VkPhysicalDeviceMultiDrawFeaturesEXT *features = (VkPhysicalDeviceMultiDrawFeaturesEXT *)ext;
1796
features->multiDraw = true;
1800
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVE_TOPOLOGY_LIST_RESTART_FEATURES_EXT: {
1801
VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT *features =
1802
(VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT *)ext;
1803
features->primitiveTopologyListRestart = true;
1804
features->primitiveTopologyPatchListRestart = true;
1808
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_CONTROL_FEATURES_EXT: {
1809
VkPhysicalDeviceDepthClipControlFeaturesEXT *features =
1810
(VkPhysicalDeviceDepthClipControlFeaturesEXT *)ext;
1811
features->depthClipControl = true;
1816
anv_debug_ignored_stype(ext->sType);
1823
#define MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS 64
1825
#define MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS 64
1826
#define MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS 256
1828
#define MAX_CUSTOM_BORDER_COLORS 4096
1830
void anv_GetPhysicalDeviceProperties(
1831
VkPhysicalDevice physicalDevice,
1832
VkPhysicalDeviceProperties* pProperties)
1834
ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
1835
const struct intel_device_info *devinfo = &pdevice->info;
1837
const uint32_t max_ssbos = pdevice->has_a64_buffer_access ? UINT16_MAX : 64;
1838
const uint32_t max_textures =
1839
pdevice->has_bindless_images ? UINT16_MAX : 128;
1840
const uint32_t max_samplers =
1841
pdevice->has_bindless_samplers ? UINT16_MAX :
1842
(devinfo->verx10 >= 75) ? 128 : 16;
1843
const uint32_t max_images =
1844
pdevice->has_bindless_images ? UINT16_MAX : MAX_IMAGES;
1846
/* If we can use bindless for everything, claim a high per-stage limit,
1847
* otherwise use the binding table size, minus the slots reserved for
1848
* render targets and one slot for the descriptor buffer. */
1849
const uint32_t max_per_stage =
1850
pdevice->has_bindless_images && pdevice->has_a64_buffer_access
1851
? UINT32_MAX : MAX_BINDING_TABLE_SIZE - MAX_RTS - 1;
1853
const uint32_t max_workgroup_size =
1854
MIN2(1024, 32 * devinfo->max_cs_workgroup_threads);
1856
VkSampleCountFlags sample_counts =
1857
isl_device_get_sample_counts(&pdevice->isl_dev);
1860
VkPhysicalDeviceLimits limits = {
1861
.maxImageDimension1D = (1 << 14),
1862
.maxImageDimension2D = (1 << 14),
1863
.maxImageDimension3D = (1 << 11),
1864
.maxImageDimensionCube = (1 << 14),
1865
.maxImageArrayLayers = (1 << 11),
1866
.maxTexelBufferElements = 128 * 1024 * 1024,
1867
.maxUniformBufferRange = pdevice->compiler->indirect_ubos_use_sampler ? (1u << 27) : (1u << 30),
1868
.maxStorageBufferRange = pdevice->isl_dev.max_buffer_size,
1869
.maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
1870
.maxMemoryAllocationCount = UINT32_MAX,
1871
.maxSamplerAllocationCount = 64 * 1024,
1872
.bufferImageGranularity = 1,
1873
.sparseAddressSpaceSize = 0,
1874
.maxBoundDescriptorSets = MAX_SETS,
1875
.maxPerStageDescriptorSamplers = max_samplers,
1876
.maxPerStageDescriptorUniformBuffers = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS,
1877
.maxPerStageDescriptorStorageBuffers = max_ssbos,
1878
.maxPerStageDescriptorSampledImages = max_textures,
1879
.maxPerStageDescriptorStorageImages = max_images,
1880
.maxPerStageDescriptorInputAttachments = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS,
1881
.maxPerStageResources = max_per_stage,
1882
.maxDescriptorSetSamplers = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSamplers */
1883
.maxDescriptorSetUniformBuffers = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS, /* number of stages * maxPerStageDescriptorUniformBuffers */
1884
.maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
1885
.maxDescriptorSetStorageBuffers = 6 * max_ssbos, /* number of stages * maxPerStageDescriptorStorageBuffers */
1886
.maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
1887
.maxDescriptorSetSampledImages = 6 * max_textures, /* number of stages * maxPerStageDescriptorSampledImages */
1888
.maxDescriptorSetStorageImages = 6 * max_images, /* number of stages * maxPerStageDescriptorStorageImages */
1889
.maxDescriptorSetInputAttachments = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS,
1890
.maxVertexInputAttributes = MAX_VES,
1891
.maxVertexInputBindings = MAX_VBS,
1892
/* Broadwell PRMs: Volume 2d: Command Reference: Structures:
1894
* VERTEX_ELEMENT_STATE::Source Element Offset: [0,2047]
1896
.maxVertexInputAttributeOffset = 2047,
1897
/* Broadwell PRMs: Volume 2d: Command Reference: Structures:
1899
* VERTEX_BUFFER_STATE::Buffer Pitch: [0,2048]
1901
* Skylake PRMs: Volume 2d: Command Reference: Structures:
1903
* VERTEX_BUFFER_STATE::Buffer Pitch: [0,4095]
1905
.maxVertexInputBindingStride = devinfo->ver < 9 ? 2048 : 4095,
1906
.maxVertexOutputComponents = 128,
1907
.maxTessellationGenerationLevel = 64,
1908
.maxTessellationPatchSize = 32,
1909
.maxTessellationControlPerVertexInputComponents = 128,
1910
.maxTessellationControlPerVertexOutputComponents = 128,
1911
.maxTessellationControlPerPatchOutputComponents = 128,
1912
.maxTessellationControlTotalOutputComponents = 2048,
1913
.maxTessellationEvaluationInputComponents = 128,
1914
.maxTessellationEvaluationOutputComponents = 128,
1915
.maxGeometryShaderInvocations = 32,
1916
.maxGeometryInputComponents = devinfo->ver >= 8 ? 128 : 64,
1917
.maxGeometryOutputComponents = 128,
1918
.maxGeometryOutputVertices = 256,
1919
.maxGeometryTotalOutputComponents = 1024,
1920
.maxFragmentInputComponents = 116, /* 128 components - (PSIZ, CLIP_DIST0, CLIP_DIST1) */
1921
.maxFragmentOutputAttachments = 8,
1922
.maxFragmentDualSrcAttachments = 1,
1923
.maxFragmentCombinedOutputResources = MAX_RTS + max_ssbos + max_images,
1924
.maxComputeSharedMemorySize = 64 * 1024,
1925
.maxComputeWorkGroupCount = { 65535, 65535, 65535 },
1926
.maxComputeWorkGroupInvocations = max_workgroup_size,
1927
.maxComputeWorkGroupSize = {
1932
.subPixelPrecisionBits = 8,
1933
.subTexelPrecisionBits = 8,
1934
.mipmapPrecisionBits = 8,
1935
.maxDrawIndexedIndexValue = UINT32_MAX,
1936
.maxDrawIndirectCount = UINT32_MAX,
1937
.maxSamplerLodBias = 16,
1938
.maxSamplerAnisotropy = 16,
1939
.maxViewports = MAX_VIEWPORTS,
1940
.maxViewportDimensions = { (1 << 14), (1 << 14) },
1941
.viewportBoundsRange = { INT16_MIN, INT16_MAX },
1942
.viewportSubPixelBits = 13, /* We take a float? */
1943
.minMemoryMapAlignment = 4096, /* A page */
1944
/* The dataport requires texel alignment so we need to assume a worst
1945
* case of R32G32B32A32 which is 16 bytes.
1947
.minTexelBufferOffsetAlignment = 16,
1948
.minUniformBufferOffsetAlignment = ANV_UBO_ALIGNMENT,
1949
.minStorageBufferOffsetAlignment = ANV_SSBO_ALIGNMENT,
1950
.minTexelOffset = -8,
1951
.maxTexelOffset = 7,
1952
.minTexelGatherOffset = -32,
1953
.maxTexelGatherOffset = 31,
1954
.minInterpolationOffset = -0.5,
1955
.maxInterpolationOffset = 0.4375,
1956
.subPixelInterpolationOffsetBits = 4,
1957
.maxFramebufferWidth = (1 << 14),
1958
.maxFramebufferHeight = (1 << 14),
1959
.maxFramebufferLayers = (1 << 11),
1960
.framebufferColorSampleCounts = sample_counts,
1961
.framebufferDepthSampleCounts = sample_counts,
1962
.framebufferStencilSampleCounts = sample_counts,
1963
.framebufferNoAttachmentsSampleCounts = sample_counts,
1964
.maxColorAttachments = MAX_RTS,
1965
.sampledImageColorSampleCounts = sample_counts,
1966
.sampledImageIntegerSampleCounts = sample_counts,
1967
.sampledImageDepthSampleCounts = sample_counts,
1968
.sampledImageStencilSampleCounts = sample_counts,
1969
.storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
1970
.maxSampleMaskWords = 1,
1971
.timestampComputeAndGraphics = true,
1972
.timestampPeriod = 1000000000.0 / devinfo->timestamp_frequency,
1973
.maxClipDistances = 8,
1974
.maxCullDistances = 8,
1975
.maxCombinedClipAndCullDistances = 8,
1976
.discreteQueuePriorities = 2,
1977
.pointSizeRange = { 0.125, 255.875 },
1978
/* While SKL and up support much wider lines than we are setting here,
1979
* in practice we run into conformance issues if we go past this limit.
1980
* Since the Windows driver does the same, it's probably fair to assume
1981
* that no one needs more than this.
1983
.lineWidthRange = { 0.0, devinfo->ver >= 9 ? 8.0 : 7.9921875 },
1984
.pointSizeGranularity = (1.0 / 8.0),
1985
.lineWidthGranularity = (1.0 / 128.0),
1986
.strictLines = false,
1987
.standardSampleLocations = true,
1988
.optimalBufferCopyOffsetAlignment = 128,
1989
.optimalBufferCopyRowPitchAlignment = 128,
1990
.nonCoherentAtomSize = 64,
1993
*pProperties = (VkPhysicalDeviceProperties) {
1994
.apiVersion = ANV_API_VERSION,
1995
.driverVersion = vk_get_driver_version(),
1997
.deviceID = pdevice->info.pci_device_id,
1998
.deviceType = pdevice->info.has_local_mem ?
1999
VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU :
2000
VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
2002
.sparseProperties = {0}, /* Broadwell doesn't do sparse. */
2005
snprintf(pProperties->deviceName, sizeof(pProperties->deviceName),
2006
"%s", pdevice->info.name);
2007
memcpy(pProperties->pipelineCacheUUID,
2008
pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
2012
anv_get_physical_device_properties_1_1(struct anv_physical_device *pdevice,
2013
VkPhysicalDeviceVulkan11Properties *p)
2015
assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES);
2017
memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
2018
memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
2019
memset(p->deviceLUID, 0, VK_LUID_SIZE);
2020
p->deviceNodeMask = 0;
2021
p->deviceLUIDValid = false;
2023
p->subgroupSize = BRW_SUBGROUP_SIZE;
2024
VkShaderStageFlags scalar_stages = 0;
2025
for (unsigned stage = 0; stage < MESA_SHADER_STAGES; stage++) {
2026
if (pdevice->compiler->scalar_stage[stage])
2027
scalar_stages |= mesa_to_vk_shader_stage(stage);
2029
if (pdevice->vk.supported_extensions.KHR_ray_tracing_pipeline) {
2030
scalar_stages |= VK_SHADER_STAGE_RAYGEN_BIT_KHR |
2031
VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
2032
VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
2033
VK_SHADER_STAGE_MISS_BIT_KHR |
2034
VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
2035
VK_SHADER_STAGE_CALLABLE_BIT_KHR;
2037
if (pdevice->vk.supported_extensions.NV_mesh_shader) {
2038
scalar_stages |= VK_SHADER_STAGE_TASK_BIT_NV |
2039
VK_SHADER_STAGE_MESH_BIT_NV;
2041
p->subgroupSupportedStages = scalar_stages;
2042
p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
2043
VK_SUBGROUP_FEATURE_VOTE_BIT |
2044
VK_SUBGROUP_FEATURE_BALLOT_BIT |
2045
VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
2046
VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
2047
VK_SUBGROUP_FEATURE_QUAD_BIT;
2048
if (pdevice->info.ver >= 8) {
2049
/* TODO: There's no technical reason why these can't be made to
2050
* work on gfx7 but they don't at the moment so it's best to leave
2051
* the feature disabled than enabled and broken.
2053
p->subgroupSupportedOperations |= VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
2054
VK_SUBGROUP_FEATURE_CLUSTERED_BIT;
2056
p->subgroupQuadOperationsInAllStages = pdevice->info.ver >= 8;
2058
p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY;
2059
p->maxMultiviewViewCount = 16;
2060
p->maxMultiviewInstanceIndex = UINT32_MAX / 16;
2061
p->protectedNoFault = false;
2062
/* This value doesn't matter for us today as our per-stage descriptors are
2065
p->maxPerSetDescriptors = 1024;
2066
p->maxMemoryAllocationSize = MAX_MEMORY_ALLOCATION_SIZE;
2070
anv_get_physical_device_properties_1_2(struct anv_physical_device *pdevice,
2071
VkPhysicalDeviceVulkan12Properties *p)
2073
assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES);
2075
p->driverID = VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA_KHR;
2076
memset(p->driverName, 0, sizeof(p->driverName));
2077
snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE_KHR,
2078
"Intel open-source Mesa driver");
2079
memset(p->driverInfo, 0, sizeof(p->driverInfo));
2080
snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE_KHR,
2081
"Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
2083
/* Don't advertise conformance with a particular version if the hardware's
2084
* support is incomplete/alpha.
2086
if (pdevice->is_alpha) {
2087
p->conformanceVersion = (VkConformanceVersionKHR) {
2095
p->conformanceVersion = (VkConformanceVersionKHR) {
2103
p->denormBehaviorIndependence =
2104
VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR;
2105
p->roundingModeIndependence =
2106
VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE_KHR;
2108
/* Broadwell does not support HF denorms and there are restrictions
2109
* other gens. According to Kabylake's PRM:
2111
* "math - Extended Math Function
2113
* Restriction : Half-float denorms are always retained."
2115
p->shaderDenormFlushToZeroFloat16 = false;
2116
p->shaderDenormPreserveFloat16 = pdevice->info.ver > 8;
2117
p->shaderRoundingModeRTEFloat16 = true;
2118
p->shaderRoundingModeRTZFloat16 = true;
2119
p->shaderSignedZeroInfNanPreserveFloat16 = true;
2121
p->shaderDenormFlushToZeroFloat32 = true;
2122
p->shaderDenormPreserveFloat32 = true;
2123
p->shaderRoundingModeRTEFloat32 = true;
2124
p->shaderRoundingModeRTZFloat32 = true;
2125
p->shaderSignedZeroInfNanPreserveFloat32 = true;
2127
p->shaderDenormFlushToZeroFloat64 = true;
2128
p->shaderDenormPreserveFloat64 = true;
2129
p->shaderRoundingModeRTEFloat64 = true;
2130
p->shaderRoundingModeRTZFloat64 = true;
2131
p->shaderSignedZeroInfNanPreserveFloat64 = true;
2133
/* It's a bit hard to exactly map our implementation to the limits
2134
* described by Vulkan. The bindless surface handle in the extended
2135
* message descriptors is 20 bits and it's an index into the table of
2136
* RENDER_SURFACE_STATE structs that starts at bindless surface base
2137
* address. This means that we can have at must 1M surface states
2138
* allocated at any given time. Since most image views take two
2139
* descriptors, this means we have a limit of about 500K image views.
2141
* However, since we allocate surface states at vkCreateImageView time,
2142
* this means our limit is actually something on the order of 500K image
2143
* views allocated at any time. The actual limit describe by Vulkan, on
2144
* the other hand, is a limit of how many you can have in a descriptor set.
2145
* Assuming anyone using 1M descriptors will be using the same image view
2146
* twice a bunch of times (or a bunch of null descriptors), we can safely
2147
* advertise a larger limit here.
2149
const unsigned max_bindless_views = 1 << 20;
2150
p->maxUpdateAfterBindDescriptorsInAllPools = max_bindless_views;
2151
p->shaderUniformBufferArrayNonUniformIndexingNative = false;
2152
p->shaderSampledImageArrayNonUniformIndexingNative = false;
2153
p->shaderStorageBufferArrayNonUniformIndexingNative = true;
2154
p->shaderStorageImageArrayNonUniformIndexingNative = false;
2155
p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
2156
p->robustBufferAccessUpdateAfterBind = true;
2157
p->quadDivergentImplicitLod = false;
2158
p->maxPerStageDescriptorUpdateAfterBindSamplers = max_bindless_views;
2159
p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS;
2160
p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = UINT32_MAX;
2161
p->maxPerStageDescriptorUpdateAfterBindSampledImages = max_bindless_views;
2162
p->maxPerStageDescriptorUpdateAfterBindStorageImages = max_bindless_views;
2163
p->maxPerStageDescriptorUpdateAfterBindInputAttachments = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS;
2164
p->maxPerStageUpdateAfterBindResources = UINT32_MAX;
2165
p->maxDescriptorSetUpdateAfterBindSamplers = max_bindless_views;
2166
p->maxDescriptorSetUpdateAfterBindUniformBuffers = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS;
2167
p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
2168
p->maxDescriptorSetUpdateAfterBindStorageBuffers = UINT32_MAX;
2169
p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
2170
p->maxDescriptorSetUpdateAfterBindSampledImages = max_bindless_views;
2171
p->maxDescriptorSetUpdateAfterBindStorageImages = max_bindless_views;
2172
p->maxDescriptorSetUpdateAfterBindInputAttachments = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS;
2174
/* We support all of the depth resolve modes */
2175
p->supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR |
2176
VK_RESOLVE_MODE_AVERAGE_BIT_KHR |
2177
VK_RESOLVE_MODE_MIN_BIT_KHR |
2178
VK_RESOLVE_MODE_MAX_BIT_KHR;
2179
/* Average doesn't make sense for stencil so we don't support that */
2180
p->supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR;
2181
if (pdevice->info.ver >= 8) {
2182
/* The advanced stencil resolve modes currently require stencil
2183
* sampling be supported by the hardware.
2185
p->supportedStencilResolveModes |= VK_RESOLVE_MODE_MIN_BIT_KHR |
2186
VK_RESOLVE_MODE_MAX_BIT_KHR;
2188
p->independentResolveNone = true;
2189
p->independentResolve = true;
2191
p->filterMinmaxSingleComponentFormats = pdevice->info.ver >= 9;
2192
p->filterMinmaxImageComponentMapping = pdevice->info.ver >= 9;
2194
p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
2196
p->framebufferIntegerColorSampleCounts =
2197
isl_device_get_sample_counts(&pdevice->isl_dev);
2201
anv_get_physical_device_properties_1_3(struct anv_physical_device *pdevice,
2202
VkPhysicalDeviceVulkan13Properties *p)
2204
assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES);
2206
p->minSubgroupSize = 8;
2207
p->maxSubgroupSize = 32;
2208
p->maxComputeWorkgroupSubgroups = pdevice->info.max_cs_workgroup_threads;
2209
p->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT |
2210
VK_SHADER_STAGE_TASK_BIT_NV |
2211
VK_SHADER_STAGE_MESH_BIT_NV;
2213
p->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
2214
p->maxPerStageDescriptorInlineUniformBlocks =
2215
MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
2216
p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks =
2217
MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
2218
p->maxDescriptorSetInlineUniformBlocks =
2219
MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
2220
p->maxDescriptorSetUpdateAfterBindInlineUniformBlocks =
2221
MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
2222
p->maxInlineUniformTotalSize = UINT16_MAX;
2224
p->integerDotProduct8BitUnsignedAccelerated = false;
2225
p->integerDotProduct8BitSignedAccelerated = false;
2226
p->integerDotProduct8BitMixedSignednessAccelerated = false;
2227
p->integerDotProduct4x8BitPackedUnsignedAccelerated = pdevice->info.ver >= 12;
2228
p->integerDotProduct4x8BitPackedSignedAccelerated = pdevice->info.ver >= 12;
2229
p->integerDotProduct4x8BitPackedMixedSignednessAccelerated = pdevice->info.ver >= 12;
2230
p->integerDotProduct16BitUnsignedAccelerated = false;
2231
p->integerDotProduct16BitSignedAccelerated = false;
2232
p->integerDotProduct16BitMixedSignednessAccelerated = false;
2233
p->integerDotProduct32BitUnsignedAccelerated = false;
2234
p->integerDotProduct32BitSignedAccelerated = false;
2235
p->integerDotProduct32BitMixedSignednessAccelerated = false;
2236
p->integerDotProduct64BitUnsignedAccelerated = false;
2237
p->integerDotProduct64BitSignedAccelerated = false;
2238
p->integerDotProduct64BitMixedSignednessAccelerated = false;
2239
p->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = false;
2240
p->integerDotProductAccumulatingSaturating8BitSignedAccelerated = false;
2241
p->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false;
2242
p->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = pdevice->info.ver >= 12;
2243
p->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = pdevice->info.ver >= 12;
2244
p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = pdevice->info.ver >= 12;
2245
p->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = false;
2246
p->integerDotProductAccumulatingSaturating16BitSignedAccelerated = false;
2247
p->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false;
2248
p->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false;
2249
p->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false;
2250
p->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false;
2251
p->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false;
2252
p->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false;
2253
p->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false;
2255
/* From the SKL PRM Vol. 2d, docs for RENDER_SURFACE_STATE::Surface
2258
* "For SURFTYPE_BUFFER non-rendertarget surfaces, this field
2259
* specifies the base address of the first element of the surface,
2260
* computed in software by adding the surface base address to the
2261
* byte offset of the element in the buffer. The base address must
2262
* be aligned to element size."
2264
* The typed dataport messages require that things be texel aligned.
2265
* Otherwise, we may just load/store the wrong data or, in the worst
2266
* case, there may be hangs.
2268
p->storageTexelBufferOffsetAlignmentBytes = 16;
2269
p->storageTexelBufferOffsetSingleTexelAlignment = true;
2271
/* The sampler, however, is much more forgiving and it can handle
2272
* arbitrary byte alignment for linear and buffer surfaces. It's
2273
* hard to find a good PRM citation for this but years of empirical
2274
* experience demonstrate that this is true.
2276
p->uniformTexelBufferOffsetAlignmentBytes = 1;
2277
p->uniformTexelBufferOffsetSingleTexelAlignment = false;
2279
p->maxBufferSize = pdevice->isl_dev.max_buffer_size;
2282
void anv_GetPhysicalDeviceProperties2(
2283
VkPhysicalDevice physicalDevice,
2284
VkPhysicalDeviceProperties2* pProperties)
2286
ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
2288
anv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
2290
VkPhysicalDeviceVulkan11Properties core_1_1 = {
2291
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES,
2293
anv_get_physical_device_properties_1_1(pdevice, &core_1_1);
2295
VkPhysicalDeviceVulkan12Properties core_1_2 = {
2296
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES,
2298
anv_get_physical_device_properties_1_2(pdevice, &core_1_2);
2300
VkPhysicalDeviceVulkan13Properties core_1_3 = {
2301
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES,
2303
anv_get_physical_device_properties_1_3(pdevice, &core_1_3);
2305
vk_foreach_struct(ext, pProperties->pNext) {
2306
if (vk_get_physical_device_core_1_1_property_ext(ext, &core_1_1))
2308
if (vk_get_physical_device_core_1_2_property_ext(ext, &core_1_2))
2310
if (vk_get_physical_device_core_1_3_property_ext(ext, &core_1_3))
2313
switch (ext->sType) {
2314
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_PROPERTIES_KHR: {
2315
VkPhysicalDeviceAccelerationStructurePropertiesKHR *props = (void *)ext;
2316
props->maxGeometryCount = (1u << 24) - 1;
2317
props->maxInstanceCount = (1u << 24) - 1;
2318
props->maxPrimitiveCount = (1u << 29) - 1;
2319
props->maxPerStageDescriptorAccelerationStructures = UINT16_MAX;
2320
props->maxPerStageDescriptorUpdateAfterBindAccelerationStructures = UINT16_MAX;
2321
props->maxDescriptorSetAccelerationStructures = UINT16_MAX;
2322
props->maxDescriptorSetUpdateAfterBindAccelerationStructures = UINT16_MAX;
2323
props->minAccelerationStructureScratchOffsetAlignment = 64;
2327
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT: {
2328
/* TODO: Real limits */
2329
VkPhysicalDeviceConservativeRasterizationPropertiesEXT *properties =
2330
(VkPhysicalDeviceConservativeRasterizationPropertiesEXT *)ext;
2331
/* There's nothing in the public docs about this value as far as I
2332
* can tell. However, this is the value the Windows driver reports
2333
* and there's a comment on a rejected HW feature in the internal
2336
* "This is similar to conservative rasterization, except the
2337
* primitive area is not extended by 1/512 and..."
2339
* That's a bit of an obtuse reference but it's the best we've got
2342
properties->primitiveOverestimationSize = 1.0f / 512.0f;
2343
properties->maxExtraPrimitiveOverestimationSize = 0.0f;
2344
properties->extraPrimitiveOverestimationSizeGranularity = 0.0f;
2345
properties->primitiveUnderestimation = false;
2346
properties->conservativePointAndLineRasterization = false;
2347
properties->degenerateTrianglesRasterized = true;
2348
properties->degenerateLinesRasterized = false;
2349
properties->fullyCoveredFragmentShaderInputVariable = false;
2350
properties->conservativeRasterizationPostDepthCoverage = true;
2354
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT: {
2355
VkPhysicalDeviceCustomBorderColorPropertiesEXT *properties =
2356
(VkPhysicalDeviceCustomBorderColorPropertiesEXT *)ext;
2357
properties->maxCustomBorderColorSamplers = MAX_CUSTOM_BORDER_COLORS;
2361
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR: {
2362
VkPhysicalDeviceFragmentShadingRatePropertiesKHR *props =
2363
(VkPhysicalDeviceFragmentShadingRatePropertiesKHR *)ext;
2364
props->primitiveFragmentShadingRateWithMultipleViewports =
2365
pdevice->info.has_coarse_pixel_primitive_and_cb;
2366
props->layeredShadingRateAttachments = pdevice->info.has_coarse_pixel_primitive_and_cb;
2367
props->fragmentShadingRateNonTrivialCombinerOps =
2368
pdevice->info.has_coarse_pixel_primitive_and_cb;
2369
props->maxFragmentSize = (VkExtent2D) { 4, 4 };
2370
props->maxFragmentSizeAspectRatio =
2371
pdevice->info.has_coarse_pixel_primitive_and_cb ?
2373
props->maxFragmentShadingRateCoverageSamples = 4 * 4 *
2374
(pdevice->info.has_coarse_pixel_primitive_and_cb ? 4 : 16);
2375
props->maxFragmentShadingRateRasterizationSamples =
2376
pdevice->info.has_coarse_pixel_primitive_and_cb ?
2377
VK_SAMPLE_COUNT_4_BIT : VK_SAMPLE_COUNT_16_BIT;
2378
props->fragmentShadingRateWithShaderDepthStencilWrites = false;
2379
props->fragmentShadingRateWithSampleMask = true;
2380
props->fragmentShadingRateWithShaderSampleMask = false;
2381
props->fragmentShadingRateWithConservativeRasterization = true;
2382
props->fragmentShadingRateWithFragmentShaderInterlock = true;
2383
props->fragmentShadingRateWithCustomSampleLocations = true;
2385
/* Fix in DG2_G10_C0 and DG2_G11_B0. Consider any other Sku as having
2388
props->fragmentShadingRateStrictMultiplyCombiner =
2389
pdevice->info.platform == INTEL_PLATFORM_DG2_G10 ?
2390
pdevice->info.revision >= 8 :
2391
pdevice->info.platform == INTEL_PLATFORM_DG2_G11 ?
2392
pdevice->info.revision >= 4 : true;
2394
if (pdevice->info.has_coarse_pixel_primitive_and_cb) {
2395
props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 8, 8 };
2396
props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 8, 8 };
2397
props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 1;
2399
/* Those must be 0 if attachmentFragmentShadingRate is not
2402
props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
2403
props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
2404
props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 0;
2409
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT: {
2410
VkPhysicalDeviceDrmPropertiesEXT *props =
2411
(VkPhysicalDeviceDrmPropertiesEXT *)ext;
2413
props->hasPrimary = pdevice->has_master;
2414
props->primaryMajor = pdevice->master_major;
2415
props->primaryMinor = pdevice->master_minor;
2417
props->hasRender = pdevice->has_local;
2418
props->renderMajor = pdevice->local_major;
2419
props->renderMinor = pdevice->local_minor;
2424
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT: {
2425
VkPhysicalDeviceExternalMemoryHostPropertiesEXT *props =
2426
(VkPhysicalDeviceExternalMemoryHostPropertiesEXT *) ext;
2427
/* Userptr needs page aligned memory. */
2428
props->minImportedHostPointerAlignment = 4096;
2432
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT: {
2433
VkPhysicalDeviceLineRasterizationPropertiesEXT *props =
2434
(VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext;
2435
/* In the Skylake PRM Vol. 7, subsection titled "GIQ (Diamond)
2436
* Sampling Rules - Legacy Mode", it says the following:
2438
* "Note that the device divides a pixel into a 16x16 array of
2439
* subpixels, referenced by their upper left corners."
2441
* This is the only known reference in the PRMs to the subpixel
2442
* precision of line rasterization and a "16x16 array of subpixels"
2443
* implies 4 subpixel precision bits. Empirical testing has shown
2444
* that 4 subpixel precision bits applies to all line rasterization
2447
props->lineSubPixelPrecisionBits = 4;
2451
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_4_PROPERTIES_KHR: {
2452
VkPhysicalDeviceMaintenance4PropertiesKHR *properties =
2453
(VkPhysicalDeviceMaintenance4PropertiesKHR *)ext;
2454
properties->maxBufferSize = pdevice->isl_dev.max_buffer_size;
2458
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_PROPERTIES_NV: {
2459
VkPhysicalDeviceMeshShaderPropertiesNV *props =
2460
(VkPhysicalDeviceMeshShaderPropertiesNV *)ext;
2462
/* Bounded by the maximum representable size in
2463
* 3DSTATE_MESH_SHADER_BODY::SharedLocalMemorySize. Same for Task.
2465
const uint32_t max_slm_size = 64 * 1024;
2467
/* Bounded by the maximum representable size in
2468
* 3DSTATE_MESH_SHADER_BODY::LocalXMaximum. Same for Task.
2470
const uint32_t max_workgroup_size = 1 << 10;
2472
/* Bounded by the maximum representable count in
2473
* 3DSTATE_MESH_SHADER_BODY::MaximumPrimitiveCount.
2475
const uint32_t max_primitives = 1024;
2477
/* TODO(mesh): Multiview. */
2478
const uint32_t max_view_count = 1;
2480
props->maxDrawMeshTasksCount = UINT32_MAX;
2482
/* TODO(mesh): Implement workgroup Y and Z sizes larger than one by
2483
* mapping them to/from the single value that HW provides us
2484
* (currently used for X).
2487
props->maxTaskWorkGroupInvocations = max_workgroup_size;
2488
props->maxTaskWorkGroupSize[0] = max_workgroup_size;
2489
props->maxTaskWorkGroupSize[1] = 1;
2490
props->maxTaskWorkGroupSize[2] = 1;
2491
props->maxTaskTotalMemorySize = max_slm_size;
2492
props->maxTaskOutputCount = UINT16_MAX;
2494
props->maxMeshWorkGroupInvocations = max_workgroup_size;
2495
props->maxMeshWorkGroupSize[0] = max_workgroup_size;
2496
props->maxMeshWorkGroupSize[1] = 1;
2497
props->maxMeshWorkGroupSize[2] = 1;
2498
props->maxMeshTotalMemorySize = max_slm_size / max_view_count;
2499
props->maxMeshOutputPrimitives = max_primitives / max_view_count;
2500
props->maxMeshMultiviewViewCount = max_view_count;
2502
/* Depends on what indices can be represented with IndexFormat. For
2503
* now we always use U32, so bound to the maximum unique vertices we
2504
* need for the maximum primitives.
2506
* TODO(mesh): Revisit this if we drop "U32" IndexFormat when adding
2507
* support for others.
2509
props->maxMeshOutputVertices = 3 * props->maxMeshOutputPrimitives;
2512
props->meshOutputPerVertexGranularity = 32;
2513
props->meshOutputPerPrimitiveGranularity = 32;
2518
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: {
2519
VkPhysicalDevicePCIBusInfoPropertiesEXT *properties =
2520
(VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext;
2521
properties->pciDomain = pdevice->info.pci_domain;
2522
properties->pciBus = pdevice->info.pci_bus;
2523
properties->pciDevice = pdevice->info.pci_dev;
2524
properties->pciFunction = pdevice->info.pci_func;
2528
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_PROPERTIES_KHR: {
2529
VkPhysicalDevicePerformanceQueryPropertiesKHR *properties =
2530
(VkPhysicalDevicePerformanceQueryPropertiesKHR *)ext;
2531
/* We could support this by spawning a shader to do the equation
2534
properties->allowCommandBufferQueryCopies = false;
2538
#pragma GCC diagnostic push
2539
#pragma GCC diagnostic ignored "-Wswitch"
2540
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRESENTATION_PROPERTIES_ANDROID: {
2541
VkPhysicalDevicePresentationPropertiesANDROID *props =
2542
(VkPhysicalDevicePresentationPropertiesANDROID *)ext;
2543
props->sharedImage = VK_FALSE;
2546
#pragma GCC diagnostic pop
2548
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_PROPERTIES_EXT: {
2549
VkPhysicalDeviceProvokingVertexPropertiesEXT *properties =
2550
(VkPhysicalDeviceProvokingVertexPropertiesEXT *)ext;
2551
properties->provokingVertexModePerPipeline = true;
2552
properties->transformFeedbackPreservesTriangleFanProvokingVertex = false;
2556
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
2557
VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
2558
(VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
2559
properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
2563
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_PROPERTIES_EXT: {
2564
VkPhysicalDeviceRobustness2PropertiesEXT *properties = (void *)ext;
2565
properties->robustStorageBufferAccessSizeAlignment =
2566
ANV_SSBO_BOUNDS_CHECK_ALIGNMENT;
2567
properties->robustUniformBufferAccessSizeAlignment =
2572
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
2573
VkPhysicalDeviceSampleLocationsPropertiesEXT *props =
2574
(VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;
2576
props->sampleLocationSampleCounts =
2577
isl_device_get_sample_counts(&pdevice->isl_dev);
2579
/* See also anv_GetPhysicalDeviceMultisamplePropertiesEXT */
2580
props->maxSampleLocationGridSize.width = 1;
2581
props->maxSampleLocationGridSize.height = 1;
2583
props->sampleLocationCoordinateRange[0] = 0;
2584
props->sampleLocationCoordinateRange[1] = 0.9375;
2585
props->sampleLocationSubPixelBits = 4;
2587
props->variableSampleLocations = true;
2591
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: {
2592
VkPhysicalDeviceTransformFeedbackPropertiesEXT *props =
2593
(VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext;
2595
props->maxTransformFeedbackStreams = MAX_XFB_STREAMS;
2596
props->maxTransformFeedbackBuffers = MAX_XFB_BUFFERS;
2597
props->maxTransformFeedbackBufferSize = (1ull << 32);
2598
props->maxTransformFeedbackStreamDataSize = 128 * 4;
2599
props->maxTransformFeedbackBufferDataSize = 128 * 4;
2600
props->maxTransformFeedbackBufferDataStride = 2048;
2601
props->transformFeedbackQueries = true;
2602
props->transformFeedbackStreamsLinesTriangles = false;
2603
props->transformFeedbackRasterizationStreamSelect = false;
2604
/* This requires MI_MATH */
2605
props->transformFeedbackDraw = pdevice->info.verx10 >= 75;
2609
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
2610
VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *props =
2611
(VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext;
2612
/* We have to restrict this a bit for multiview */
2613
props->maxVertexAttribDivisor = UINT32_MAX / 16;
2617
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_PROPERTIES_EXT: {
2618
VkPhysicalDeviceMultiDrawPropertiesEXT *props = (VkPhysicalDeviceMultiDrawPropertiesEXT *)ext;
2619
props->maxMultiDrawCount = 2048;
2624
anv_debug_ignored_stype(ext->sType);
2631
vk_priority_to_gen(int priority)
2634
case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT:
2635
return INTEL_CONTEXT_LOW_PRIORITY;
2636
case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT:
2637
return INTEL_CONTEXT_MEDIUM_PRIORITY;
2638
case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT:
2639
return INTEL_CONTEXT_HIGH_PRIORITY;
2640
case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT:
2641
return INTEL_CONTEXT_REALTIME_PRIORITY;
2643
unreachable("Invalid priority");
2647
static const VkQueueFamilyProperties
2648
anv_queue_family_properties_template = {
2649
.timestampValidBits = 36, /* XXX: Real value here */
2650
.minImageTransferGranularity = { 1, 1, 1 },
2653
void anv_GetPhysicalDeviceQueueFamilyProperties2(
2654
VkPhysicalDevice physicalDevice,
2655
uint32_t* pQueueFamilyPropertyCount,
2656
VkQueueFamilyProperties2* pQueueFamilyProperties)
2658
ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
2659
VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out,
2660
pQueueFamilyProperties, pQueueFamilyPropertyCount);
2662
for (uint32_t i = 0; i < pdevice->queue.family_count; i++) {
2663
struct anv_queue_family *queue_family = &pdevice->queue.families[i];
2664
vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) {
2665
p->queueFamilyProperties = anv_queue_family_properties_template;
2666
p->queueFamilyProperties.queueFlags = queue_family->queueFlags;
2667
p->queueFamilyProperties.queueCount = queue_family->queueCount;
2669
vk_foreach_struct(ext, p->pNext) {
2670
switch (ext->sType) {
2671
case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_EXT: {
2672
VkQueueFamilyGlobalPriorityPropertiesEXT *properties =
2673
(VkQueueFamilyGlobalPriorityPropertiesEXT *)ext;
2675
/* Deliberately sorted low to high */
2676
VkQueueGlobalPriorityEXT all_priorities[] = {
2677
VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT,
2678
VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT,
2679
VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT,
2680
VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT,
2684
for (unsigned i = 0; i < ARRAY_SIZE(all_priorities); i++) {
2685
if (vk_priority_to_gen(all_priorities[i]) >
2686
pdevice->max_context_priority)
2689
properties->priorities[count++] = all_priorities[i];
2691
properties->priorityCount = count;
2696
anv_debug_ignored_stype(ext->sType);
2703
void anv_GetPhysicalDeviceMemoryProperties(
2704
VkPhysicalDevice physicalDevice,
2705
VkPhysicalDeviceMemoryProperties* pMemoryProperties)
2707
ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
2709
pMemoryProperties->memoryTypeCount = physical_device->memory.type_count;
2710
for (uint32_t i = 0; i < physical_device->memory.type_count; i++) {
2711
pMemoryProperties->memoryTypes[i] = (VkMemoryType) {
2712
.propertyFlags = physical_device->memory.types[i].propertyFlags,
2713
.heapIndex = physical_device->memory.types[i].heapIndex,
2717
pMemoryProperties->memoryHeapCount = physical_device->memory.heap_count;
2718
for (uint32_t i = 0; i < physical_device->memory.heap_count; i++) {
2719
pMemoryProperties->memoryHeaps[i] = (VkMemoryHeap) {
2720
.size = physical_device->memory.heaps[i].size,
2721
.flags = physical_device->memory.heaps[i].flags,
2727
anv_get_memory_budget(VkPhysicalDevice physicalDevice,
2728
VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
2730
ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
2732
anv_update_meminfo(device, device->local_fd);
2734
VkDeviceSize total_sys_heaps_size = 0, total_vram_heaps_size = 0;
2735
for (size_t i = 0; i < device->memory.heap_count; i++) {
2736
if (device->memory.heaps[i].is_local_mem) {
2737
total_vram_heaps_size += device->memory.heaps[i].size;
2739
total_sys_heaps_size += device->memory.heaps[i].size;
2743
for (size_t i = 0; i < device->memory.heap_count; i++) {
2744
VkDeviceSize heap_size = device->memory.heaps[i].size;
2745
VkDeviceSize heap_used = device->memory.heaps[i].used;
2746
VkDeviceSize heap_budget, total_heaps_size;
2747
uint64_t mem_available = 0;
2749
if (device->memory.heaps[i].is_local_mem) {
2750
total_heaps_size = total_vram_heaps_size;
2751
mem_available = device->vram.available;
2753
total_heaps_size = total_sys_heaps_size;
2754
mem_available = device->sys.available;
2757
double heap_proportion = (double) heap_size / total_heaps_size;
2758
VkDeviceSize available_prop = mem_available * heap_proportion;
2761
* Let's not incite the app to starve the system: report at most 90% of
2762
* the available heap memory.
2764
uint64_t heap_available = available_prop * 9 / 10;
2765
heap_budget = MIN2(heap_size, heap_used + heap_available);
2768
* Round down to the nearest MB
2770
heap_budget &= ~((1ull << 20) - 1);
2773
* The heapBudget value must be non-zero for array elements less than
2774
* VkPhysicalDeviceMemoryProperties::memoryHeapCount. The heapBudget
2775
* value must be less than or equal to VkMemoryHeap::size for each heap.
2777
assert(0 < heap_budget && heap_budget <= heap_size);
2779
memoryBudget->heapUsage[i] = heap_used;
2780
memoryBudget->heapBudget[i] = heap_budget;
2783
/* The heapBudget and heapUsage values must be zero for array elements
2784
* greater than or equal to VkPhysicalDeviceMemoryProperties::memoryHeapCount
2786
for (uint32_t i = device->memory.heap_count; i < VK_MAX_MEMORY_HEAPS; i++) {
2787
memoryBudget->heapBudget[i] = 0;
2788
memoryBudget->heapUsage[i] = 0;
2792
void anv_GetPhysicalDeviceMemoryProperties2(
2793
VkPhysicalDevice physicalDevice,
2794
VkPhysicalDeviceMemoryProperties2* pMemoryProperties)
2796
anv_GetPhysicalDeviceMemoryProperties(physicalDevice,
2797
&pMemoryProperties->memoryProperties);
2799
vk_foreach_struct(ext, pMemoryProperties->pNext) {
2800
switch (ext->sType) {
2801
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT:
2802
anv_get_memory_budget(physicalDevice, (void*)ext);
2805
anv_debug_ignored_stype(ext->sType);
2812
anv_GetDeviceGroupPeerMemoryFeatures(
2815
uint32_t localDeviceIndex,
2816
uint32_t remoteDeviceIndex,
2817
VkPeerMemoryFeatureFlags* pPeerMemoryFeatures)
2819
assert(localDeviceIndex == 0 && remoteDeviceIndex == 0);
2820
*pPeerMemoryFeatures = VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT |
2821
VK_PEER_MEMORY_FEATURE_COPY_DST_BIT |
2822
VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
2823
VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
2826
PFN_vkVoidFunction anv_GetInstanceProcAddr(
2827
VkInstance _instance,
2830
ANV_FROM_HANDLE(anv_instance, instance, _instance);
2831
return vk_instance_get_proc_addr(&instance->vk,
2832
&anv_instance_entrypoints,
2836
/* With version 1+ of the loader interface the ICD should expose
2837
* vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in apps.
2840
VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2841
VkInstance instance,
2845
VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2846
VkInstance instance,
2849
return anv_GetInstanceProcAddr(instance, pName);
2852
/* With version 4+ of the loader interface the ICD should expose
2853
* vk_icdGetPhysicalDeviceProcAddr()
2856
VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetPhysicalDeviceProcAddr(
2857
VkInstance _instance,
2860
PFN_vkVoidFunction vk_icdGetPhysicalDeviceProcAddr(
2861
VkInstance _instance,
2864
ANV_FROM_HANDLE(anv_instance, instance, _instance);
2865
return vk_instance_get_physical_device_proc_addr(&instance->vk, pName);
2868
static struct anv_state
2869
anv_state_pool_emit_data(struct anv_state_pool *pool, size_t size, size_t align, const void *p)
2871
struct anv_state state;
2873
state = anv_state_pool_alloc(pool, size, align);
2874
memcpy(state.map, p, size);
2880
anv_device_init_border_colors(struct anv_device *device)
2882
if (device->info.platform == INTEL_PLATFORM_HSW) {
2883
static const struct hsw_border_color border_colors[] = {
2884
[VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
2885
[VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
2886
[VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
2887
[VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .uint32 = { 0, 0, 0, 0 } },
2888
[VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .uint32 = { 0, 0, 0, 1 } },
2889
[VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .uint32 = { 1, 1, 1, 1 } },
2892
device->border_colors =
2893
anv_state_pool_emit_data(&device->dynamic_state_pool,
2894
sizeof(border_colors), 512, border_colors);
2896
static const struct gfx8_border_color border_colors[] = {
2897
[VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
2898
[VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
2899
[VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
2900
[VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .uint32 = { 0, 0, 0, 0 } },
2901
[VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .uint32 = { 0, 0, 0, 1 } },
2902
[VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .uint32 = { 1, 1, 1, 1 } },
2905
device->border_colors =
2906
anv_state_pool_emit_data(&device->dynamic_state_pool,
2907
sizeof(border_colors), 64, border_colors);
2912
anv_device_init_trivial_batch(struct anv_device *device)
2914
VkResult result = anv_device_alloc_bo(device, "trivial-batch", 4096,
2915
ANV_BO_ALLOC_MAPPED,
2916
0 /* explicit_address */,
2917
&device->trivial_batch_bo);
2918
if (result != VK_SUCCESS)
2921
struct anv_batch batch = {
2922
.start = device->trivial_batch_bo->map,
2923
.next = device->trivial_batch_bo->map,
2924
.end = device->trivial_batch_bo->map + 4096,
2927
anv_batch_emit(&batch, GFX7_MI_BATCH_BUFFER_END, bbe);
2928
anv_batch_emit(&batch, GFX7_MI_NOOP, noop);
2930
if (!device->info.has_llc)
2931
intel_clflush_range(batch.start, batch.next - batch.start);
2937
get_bo_from_pool(struct intel_batch_decode_bo *ret,
2938
struct anv_block_pool *pool,
2941
anv_block_pool_foreach_bo(bo, pool) {
2942
uint64_t bo_address = intel_48b_address(bo->offset);
2943
if (address >= bo_address && address < (bo_address + bo->size)) {
2944
*ret = (struct intel_batch_decode_bo) {
2955
/* Finding a buffer for batch decoding */
2956
static struct intel_batch_decode_bo
2957
decode_get_bo(void *v_batch, bool ppgtt, uint64_t address)
2959
struct anv_device *device = v_batch;
2960
struct intel_batch_decode_bo ret_bo = {};
2964
if (get_bo_from_pool(&ret_bo, &device->dynamic_state_pool.block_pool, address))
2966
if (get_bo_from_pool(&ret_bo, &device->instruction_state_pool.block_pool, address))
2968
if (get_bo_from_pool(&ret_bo, &device->binding_table_pool.block_pool, address))
2970
if (get_bo_from_pool(&ret_bo, &device->surface_state_pool.block_pool, address))
2973
if (!device->cmd_buffer_being_decoded)
2974
return (struct intel_batch_decode_bo) { };
2976
struct anv_batch_bo **bo;
2978
u_vector_foreach(bo, &device->cmd_buffer_being_decoded->seen_bbos) {
2979
/* The decoder zeroes out the top 16 bits, so we need to as well */
2980
uint64_t bo_address = (*bo)->bo->offset & (~0ull >> 16);
2982
if (address >= bo_address && address < bo_address + (*bo)->bo->size) {
2983
return (struct intel_batch_decode_bo) {
2985
.size = (*bo)->bo->size,
2986
.map = (*bo)->bo->map,
2991
return (struct intel_batch_decode_bo) { };
2994
struct intel_aux_map_buffer {
2995
struct intel_buffer base;
2996
struct anv_state state;
2999
static struct intel_buffer *
3000
intel_aux_map_buffer_alloc(void *driver_ctx, uint32_t size)
3002
struct intel_aux_map_buffer *buf = malloc(sizeof(struct intel_aux_map_buffer));
3006
struct anv_device *device = (struct anv_device*)driver_ctx;
3007
assert(device->physical->supports_48bit_addresses &&
3008
device->physical->use_softpin);
3010
struct anv_state_pool *pool = &device->dynamic_state_pool;
3011
buf->state = anv_state_pool_alloc(pool, size, size);
3013
buf->base.gpu = pool->block_pool.bo->offset + buf->state.offset;
3014
buf->base.gpu_end = buf->base.gpu + buf->state.alloc_size;
3015
buf->base.map = buf->state.map;
3016
buf->base.driver_bo = &buf->state;
3021
intel_aux_map_buffer_free(void *driver_ctx, struct intel_buffer *buffer)
3023
struct intel_aux_map_buffer *buf = (struct intel_aux_map_buffer*)buffer;
3024
struct anv_device *device = (struct anv_device*)driver_ctx;
3025
struct anv_state_pool *pool = &device->dynamic_state_pool;
3026
anv_state_pool_free(pool, buf->state);
3030
static struct intel_mapped_pinned_buffer_alloc aux_map_allocator = {
3031
.alloc = intel_aux_map_buffer_alloc,
3032
.free = intel_aux_map_buffer_free,
3035
static VkResult anv_device_check_status(struct vk_device *vk_device);
3037
VkResult anv_CreateDevice(
3038
VkPhysicalDevice physicalDevice,
3039
const VkDeviceCreateInfo* pCreateInfo,
3040
const VkAllocationCallbacks* pAllocator,
3043
ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
3045
struct anv_device *device;
3047
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
3049
/* Check enabled features */
3050
bool robust_buffer_access = false;
3051
if (pCreateInfo->pEnabledFeatures) {
3052
if (pCreateInfo->pEnabledFeatures->robustBufferAccess)
3053
robust_buffer_access = true;
3056
vk_foreach_struct_const(ext, pCreateInfo->pNext) {
3057
switch (ext->sType) {
3058
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2: {
3059
const VkPhysicalDeviceFeatures2 *features = (const void *)ext;
3060
if (features->features.robustBufferAccess)
3061
robust_buffer_access = true;
3071
/* Check requested queues and fail if we are requested to create any
3072
* queues with flags we don't support.
3074
assert(pCreateInfo->queueCreateInfoCount > 0);
3075
for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
3076
if (pCreateInfo->pQueueCreateInfos[i].flags != 0)
3077
return vk_error(physical_device, VK_ERROR_INITIALIZATION_FAILED);
3080
/* Check if client specified queue priority. */
3081
const VkDeviceQueueGlobalPriorityCreateInfoEXT *queue_priority =
3082
vk_find_struct_const(pCreateInfo->pQueueCreateInfos[0].pNext,
3083
DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
3085
VkQueueGlobalPriorityEXT priority =
3086
queue_priority ? queue_priority->globalPriority :
3087
VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT;
3089
device = vk_zalloc2(&physical_device->instance->vk.alloc, pAllocator,
3091
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
3093
return vk_error(physical_device, VK_ERROR_OUT_OF_HOST_MEMORY);
3095
struct vk_device_dispatch_table dispatch_table;
3096
vk_device_dispatch_table_from_entrypoints(&dispatch_table,
3097
anv_genX(&physical_device->info, device_entrypoints), true);
3098
vk_device_dispatch_table_from_entrypoints(&dispatch_table,
3099
&anv_device_entrypoints, false);
3100
vk_device_dispatch_table_from_entrypoints(&dispatch_table,
3101
&wsi_device_entrypoints, false);
3103
result = vk_device_init(&device->vk, &physical_device->vk,
3104
&dispatch_table, pCreateInfo, pAllocator);
3105
if (result != VK_SUCCESS)
3108
if (INTEL_DEBUG(DEBUG_BATCH)) {
3109
const unsigned decode_flags =
3110
INTEL_BATCH_DECODE_FULL |
3111
(INTEL_DEBUG(DEBUG_COLOR) ? INTEL_BATCH_DECODE_IN_COLOR : 0) |
3112
INTEL_BATCH_DECODE_OFFSETS |
3113
INTEL_BATCH_DECODE_FLOATS;
3115
intel_batch_decode_ctx_init(&device->decoder_ctx,
3116
&physical_device->info,
3117
stderr, decode_flags, NULL,
3118
decode_get_bo, NULL, device);
3120
device->decoder_ctx.dynamic_base = DYNAMIC_STATE_POOL_MIN_ADDRESS;
3121
device->decoder_ctx.surface_base = SURFACE_STATE_POOL_MIN_ADDRESS;
3122
device->decoder_ctx.instruction_base =
3123
INSTRUCTION_STATE_POOL_MIN_ADDRESS;
3126
device->physical = physical_device;
3128
/* XXX(chadv): Can we dup() physicalDevice->fd here? */
3129
device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC);
3130
if (device->fd == -1) {
3131
result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
3135
device->vk.check_status = anv_device_check_status;
3136
device->vk.create_sync_for_memory = anv_create_sync_for_memory;
3137
vk_device_set_drm_fd(&device->vk, device->fd);
3139
uint32_t num_queues = 0;
3140
for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++)
3141
num_queues += pCreateInfo->pQueueCreateInfos[i].queueCount;
3143
if (device->physical->engine_info) {
3144
/* The kernel API supports at most 64 engines */
3145
assert(num_queues <= 64);
3146
uint16_t engine_classes[64];
3147
int engine_count = 0;
3148
for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
3149
const VkDeviceQueueCreateInfo *queueCreateInfo =
3150
&pCreateInfo->pQueueCreateInfos[i];
3152
assert(queueCreateInfo->queueFamilyIndex <
3153
physical_device->queue.family_count);
3154
struct anv_queue_family *queue_family =
3155
&physical_device->queue.families[queueCreateInfo->queueFamilyIndex];
3157
for (uint32_t j = 0; j < queueCreateInfo->queueCount; j++)
3158
engine_classes[engine_count++] = queue_family->engine_class;
3160
device->context_id =
3161
intel_gem_create_context_engines(device->fd,
3162
physical_device->engine_info,
3163
engine_count, engine_classes);
3165
assert(num_queues == 1);
3166
device->context_id = anv_gem_create_context(device);
3168
if (device->context_id == -1) {
3169
result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
3173
/* Here we tell the kernel not to attempt to recover our context but
3174
* immediately (on the next batchbuffer submission) report that the
3175
* context is lost, and we will do the recovery ourselves. In the case
3176
* of Vulkan, recovery means throwing VK_ERROR_DEVICE_LOST and letting
3177
* the client clean up the pieces.
3179
anv_gem_set_context_param(device->fd, device->context_id,
3180
I915_CONTEXT_PARAM_RECOVERABLE, false);
3183
vk_zalloc(&device->vk.alloc, num_queues * sizeof(*device->queues), 8,
3184
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
3185
if (device->queues == NULL) {
3186
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3187
goto fail_context_id;
3190
device->queue_count = 0;
3191
for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
3192
const VkDeviceQueueCreateInfo *queueCreateInfo =
3193
&pCreateInfo->pQueueCreateInfos[i];
3195
for (uint32_t j = 0; j < queueCreateInfo->queueCount; j++) {
3196
/* When using legacy contexts, we use I915_EXEC_RENDER but, with
3197
* engine-based contexts, the bottom 6 bits of exec_flags are used
3198
* for the engine ID.
3200
uint32_t exec_flags = device->physical->engine_info ?
3201
device->queue_count : I915_EXEC_RENDER;
3203
result = anv_queue_init(device, &device->queues[device->queue_count],
3204
exec_flags, queueCreateInfo, j);
3205
if (result != VK_SUCCESS)
3208
device->queue_count++;
3212
if (!anv_use_relocations(physical_device)) {
3213
if (pthread_mutex_init(&device->vma_mutex, NULL) != 0) {
3214
result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
3218
/* keep the page with address zero out of the allocator */
3219
util_vma_heap_init(&device->vma_lo,
3220
LOW_HEAP_MIN_ADDRESS, LOW_HEAP_SIZE);
3222
util_vma_heap_init(&device->vma_cva, CLIENT_VISIBLE_HEAP_MIN_ADDRESS,
3223
CLIENT_VISIBLE_HEAP_SIZE);
3225
/* Leave the last 4GiB out of the high vma range, so that no state
3226
* base address + size can overflow 48 bits. For more information see
3227
* the comment about Wa32bitGeneralStateOffset in anv_allocator.c
3229
util_vma_heap_init(&device->vma_hi, HIGH_HEAP_MIN_ADDRESS,
3230
physical_device->gtt_size - (1ull << 32) -
3231
HIGH_HEAP_MIN_ADDRESS);
3234
list_inithead(&device->memory_objects);
3236
/* As per spec, the driver implementation may deny requests to acquire
3237
* a priority above the default priority (MEDIUM) if the caller does not
3238
* have sufficient privileges. In this scenario VK_ERROR_NOT_PERMITTED_EXT
3241
if (physical_device->max_context_priority >= INTEL_CONTEXT_MEDIUM_PRIORITY) {
3242
int err = anv_gem_set_context_param(device->fd, device->context_id,
3243
I915_CONTEXT_PARAM_PRIORITY,
3244
vk_priority_to_gen(priority));
3245
if (err != 0 && priority > VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT) {
3246
result = vk_error(device, VK_ERROR_NOT_PERMITTED_EXT);
3251
device->info = physical_device->info;
3252
device->isl_dev = physical_device->isl_dev;
3254
/* On Broadwell and later, we can use batch chaining to more efficiently
3255
* implement growing command buffers. Prior to Haswell, the kernel
3256
* command parser gets in the way and we have to fall back to growing
3259
device->can_chain_batches = device->info.ver >= 8;
3261
device->robust_buffer_access = robust_buffer_access;
3263
if (pthread_mutex_init(&device->mutex, NULL) != 0) {
3264
result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
3268
pthread_condattr_t condattr;
3269
if (pthread_condattr_init(&condattr) != 0) {
3270
result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
3273
if (pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC) != 0) {
3274
pthread_condattr_destroy(&condattr);
3275
result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
3278
if (pthread_cond_init(&device->queue_submit, &condattr) != 0) {
3279
pthread_condattr_destroy(&condattr);
3280
result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
3283
pthread_condattr_destroy(&condattr);
3285
result = anv_bo_cache_init(&device->bo_cache, device);
3286
if (result != VK_SUCCESS)
3287
goto fail_queue_cond;
3289
anv_bo_pool_init(&device->batch_bo_pool, device, "batch");
3291
/* Because scratch is also relative to General State Base Address, we leave
3292
* the base address 0 and start the pool memory at an offset. This way we
3293
* get the correct offsets in the anv_states that get allocated from it.
3295
result = anv_state_pool_init(&device->general_state_pool, device,
3297
0, GENERAL_STATE_POOL_MIN_ADDRESS, 16384);
3298
if (result != VK_SUCCESS)
3299
goto fail_batch_bo_pool;
3301
result = anv_state_pool_init(&device->dynamic_state_pool, device,
3303
DYNAMIC_STATE_POOL_MIN_ADDRESS, 0, 16384);
3304
if (result != VK_SUCCESS)
3305
goto fail_general_state_pool;
3307
if (device->info.ver >= 8) {
3308
/* The border color pointer is limited to 24 bits, so we need to make
3309
* sure that any such color used at any point in the program doesn't
3310
* exceed that limit.
3311
* We achieve that by reserving all the custom border colors we support
3312
* right off the bat, so they are close to the base address.
3314
anv_state_reserved_pool_init(&device->custom_border_colors,
3315
&device->dynamic_state_pool,
3316
MAX_CUSTOM_BORDER_COLORS,
3317
sizeof(struct gfx8_border_color), 64);
3320
result = anv_state_pool_init(&device->instruction_state_pool, device,
3322
INSTRUCTION_STATE_POOL_MIN_ADDRESS, 0, 16384);
3323
if (result != VK_SUCCESS)
3324
goto fail_dynamic_state_pool;
3326
result = anv_state_pool_init(&device->surface_state_pool, device,
3327
"surface state pool",
3328
SURFACE_STATE_POOL_MIN_ADDRESS, 0, 4096);
3329
if (result != VK_SUCCESS)
3330
goto fail_instruction_state_pool;
3332
if (device->info.verx10 >= 125) {
3333
/* We're using 3DSTATE_BINDING_TABLE_POOL_ALLOC to give the binding
3334
* table its own base address separately from surface state base.
3336
result = anv_state_pool_init(&device->binding_table_pool, device,
3337
"binding table pool",
3338
BINDING_TABLE_POOL_MIN_ADDRESS, 0,
3339
BINDING_TABLE_POOL_BLOCK_SIZE);
3340
} else if (!anv_use_relocations(physical_device)) {
3341
int64_t bt_pool_offset = (int64_t)BINDING_TABLE_POOL_MIN_ADDRESS -
3342
(int64_t)SURFACE_STATE_POOL_MIN_ADDRESS;
3343
assert(INT32_MIN < bt_pool_offset && bt_pool_offset < 0);
3344
result = anv_state_pool_init(&device->binding_table_pool, device,
3345
"binding table pool",
3346
SURFACE_STATE_POOL_MIN_ADDRESS,
3348
BINDING_TABLE_POOL_BLOCK_SIZE);
3350
if (result != VK_SUCCESS)
3351
goto fail_surface_state_pool;
3353
if (device->info.has_aux_map) {
3354
device->aux_map_ctx = intel_aux_map_init(device, &aux_map_allocator,
3355
&physical_device->info);
3356
if (!device->aux_map_ctx)
3357
goto fail_binding_table_pool;
3360
result = anv_device_alloc_bo(device, "workaround", 4096,
3361
ANV_BO_ALLOC_CAPTURE |
3362
ANV_BO_ALLOC_MAPPED |
3363
ANV_BO_ALLOC_LOCAL_MEM,
3364
0 /* explicit_address */,
3365
&device->workaround_bo);
3366
if (result != VK_SUCCESS)
3367
goto fail_surface_aux_map_pool;
3369
device->workaround_address = (struct anv_address) {
3370
.bo = device->workaround_bo,
3371
.offset = align_u32(
3372
intel_debug_write_identifiers(device->workaround_bo->map,
3373
device->workaround_bo->size,
3377
device->debug_frame_desc =
3378
intel_debug_get_identifier_block(device->workaround_bo->map,
3379
device->workaround_bo->size,
3380
INTEL_DEBUG_BLOCK_TYPE_FRAME);
3382
if (device->vk.enabled_extensions.KHR_ray_query) {
3383
uint32_t ray_queries_size =
3384
align_u32(brw_rt_ray_queries_hw_stacks_size(&device->info), 4096);
3386
result = anv_device_alloc_bo(device, "ray queries",
3388
ANV_BO_ALLOC_LOCAL_MEM,
3389
0 /* explicit_address */,
3390
&device->ray_query_bo);
3391
if (result != VK_SUCCESS)
3392
goto fail_workaround_bo;
3395
result = anv_device_init_trivial_batch(device);
3396
if (result != VK_SUCCESS)
3397
goto fail_ray_query_bo;
3399
if (device->info.ver >= 12 &&
3400
device->vk.enabled_extensions.KHR_fragment_shading_rate) {
3401
uint32_t n_cps_states = 3 * 3; /* All combinaisons of X by Y CP sizes (1, 2, 4) */
3403
if (device->info.has_coarse_pixel_primitive_and_cb)
3404
n_cps_states *= 5 * 5; /* 5 combiners by 2 operators */
3406
n_cps_states += 1; /* Disable CPS */
3408
/* Each of the combinaison must be replicated on all viewports */
3409
n_cps_states *= MAX_VIEWPORTS;
3411
device->cps_states =
3412
anv_state_pool_alloc(&device->dynamic_state_pool,
3413
n_cps_states * CPS_STATE_length(&device->info) * 4,
3415
if (device->cps_states.map == NULL)
3416
goto fail_trivial_batch;
3418
anv_genX(&device->info, init_cps_device_state)(device);
3421
/* Allocate a null surface state at surface state offset 0. This makes
3422
* NULL descriptor handling trivial because we can just memset structures
3423
* to zero and they have a valid descriptor.
3425
device->null_surface_state =
3426
anv_state_pool_alloc(&device->surface_state_pool,
3427
device->isl_dev.ss.size,
3428
device->isl_dev.ss.align);
3429
isl_null_fill_state(&device->isl_dev, device->null_surface_state.map,
3430
.size = isl_extent3d(1, 1, 1) /* This shouldn't matter */);
3431
assert(device->null_surface_state.offset == 0);
3433
anv_scratch_pool_init(device, &device->scratch_pool);
3435
/* TODO(RT): Do we want some sort of data structure for this? */
3436
memset(device->rt_scratch_bos, 0, sizeof(device->rt_scratch_bos));
3438
result = anv_genX(&device->info, init_device_state)(device);
3439
if (result != VK_SUCCESS)
3440
goto fail_trivial_batch_bo_and_scratch_pool;
3442
anv_pipeline_cache_init(&device->default_pipeline_cache, device,
3443
true /* cache_enabled */, false /* external_sync */);
3445
result = anv_device_init_rt_shaders(device);
3446
if (result != VK_SUCCESS)
3447
goto fail_rt_trampoline;
3449
anv_device_init_blorp(device);
3451
anv_device_init_border_colors(device);
3453
anv_device_perf_init(device);
3455
anv_device_utrace_init(device);
3457
*pDevice = anv_device_to_handle(device);
3462
anv_pipeline_cache_finish(&device->default_pipeline_cache);
3463
fail_trivial_batch_bo_and_scratch_pool:
3464
anv_scratch_pool_finish(device, &device->scratch_pool);
3466
anv_device_release_bo(device, device->trivial_batch_bo);
3468
if (device->ray_query_bo)
3469
anv_device_release_bo(device, device->ray_query_bo);
3471
anv_device_release_bo(device, device->workaround_bo);
3472
fail_surface_aux_map_pool:
3473
if (device->info.has_aux_map) {
3474
intel_aux_map_finish(device->aux_map_ctx);
3475
device->aux_map_ctx = NULL;
3477
fail_binding_table_pool:
3478
if (!anv_use_relocations(physical_device))
3479
anv_state_pool_finish(&device->binding_table_pool);
3480
fail_surface_state_pool:
3481
anv_state_pool_finish(&device->surface_state_pool);
3482
fail_instruction_state_pool:
3483
anv_state_pool_finish(&device->instruction_state_pool);
3484
fail_dynamic_state_pool:
3485
if (device->info.ver >= 8)
3486
anv_state_reserved_pool_finish(&device->custom_border_colors);
3487
anv_state_pool_finish(&device->dynamic_state_pool);
3488
fail_general_state_pool:
3489
anv_state_pool_finish(&device->general_state_pool);
3491
anv_bo_pool_finish(&device->batch_bo_pool);
3492
anv_bo_cache_finish(&device->bo_cache);
3494
pthread_cond_destroy(&device->queue_submit);
3496
pthread_mutex_destroy(&device->mutex);
3498
if (!anv_use_relocations(physical_device)) {
3499
util_vma_heap_finish(&device->vma_hi);
3500
util_vma_heap_finish(&device->vma_cva);
3501
util_vma_heap_finish(&device->vma_lo);
3504
for (uint32_t i = 0; i < device->queue_count; i++)
3505
anv_queue_finish(&device->queues[i]);
3506
vk_free(&device->vk.alloc, device->queues);
3508
anv_gem_destroy_context(device, device->context_id);
3512
vk_device_finish(&device->vk);
3514
vk_free(&device->vk.alloc, device);
3519
void anv_DestroyDevice(
3521
const VkAllocationCallbacks* pAllocator)
3523
ANV_FROM_HANDLE(anv_device, device, _device);
3528
anv_device_utrace_finish(device);
3530
anv_device_finish_blorp(device);
3532
anv_device_finish_rt_shaders(device);
3534
anv_pipeline_cache_finish(&device->default_pipeline_cache);
3536
#ifdef HAVE_VALGRIND
3537
/* We only need to free these to prevent valgrind errors. The backing
3538
* BO will go away in a couple of lines so we don't actually leak.
3540
if (device->info.ver >= 8)
3541
anv_state_reserved_pool_finish(&device->custom_border_colors);
3542
anv_state_pool_free(&device->dynamic_state_pool, device->border_colors);
3543
anv_state_pool_free(&device->dynamic_state_pool, device->slice_hash);
3544
anv_state_pool_free(&device->dynamic_state_pool, device->cps_states);
3547
for (unsigned i = 0; i < ARRAY_SIZE(device->rt_scratch_bos); i++) {
3548
if (device->rt_scratch_bos[i] != NULL)
3549
anv_device_release_bo(device, device->rt_scratch_bos[i]);
3552
anv_scratch_pool_finish(device, &device->scratch_pool);
3554
if (device->vk.enabled_extensions.KHR_ray_query) {
3555
for (unsigned i = 0; i < ARRAY_SIZE(device->ray_query_shadow_bos); i++) {
3556
if (device->ray_query_shadow_bos[i] != NULL)
3557
anv_device_release_bo(device, device->ray_query_shadow_bos[i]);
3559
anv_device_release_bo(device, device->ray_query_bo);
3561
anv_device_release_bo(device, device->workaround_bo);
3562
anv_device_release_bo(device, device->trivial_batch_bo);
3564
if (device->info.has_aux_map) {
3565
intel_aux_map_finish(device->aux_map_ctx);
3566
device->aux_map_ctx = NULL;
3569
if (!anv_use_relocations(device->physical))
3570
anv_state_pool_finish(&device->binding_table_pool);
3571
anv_state_pool_finish(&device->surface_state_pool);
3572
anv_state_pool_finish(&device->instruction_state_pool);
3573
anv_state_pool_finish(&device->dynamic_state_pool);
3574
anv_state_pool_finish(&device->general_state_pool);
3576
anv_bo_pool_finish(&device->batch_bo_pool);
3578
anv_bo_cache_finish(&device->bo_cache);
3580
if (!anv_use_relocations(device->physical)) {
3581
util_vma_heap_finish(&device->vma_hi);
3582
util_vma_heap_finish(&device->vma_cva);
3583
util_vma_heap_finish(&device->vma_lo);
3586
pthread_cond_destroy(&device->queue_submit);
3587
pthread_mutex_destroy(&device->mutex);
3589
for (uint32_t i = 0; i < device->queue_count; i++)
3590
anv_queue_finish(&device->queues[i]);
3591
vk_free(&device->vk.alloc, device->queues);
3593
anv_gem_destroy_context(device, device->context_id);
3595
if (INTEL_DEBUG(DEBUG_BATCH))
3596
intel_batch_decode_ctx_finish(&device->decoder_ctx);
3600
vk_device_finish(&device->vk);
3601
vk_free(&device->vk.alloc, device);
3604
VkResult anv_EnumerateInstanceLayerProperties(
3605
uint32_t* pPropertyCount,
3606
VkLayerProperties* pProperties)
3608
if (pProperties == NULL) {
3609
*pPropertyCount = 0;
3613
/* None supported at this time */
3614
return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
3618
anv_device_check_status(struct vk_device *vk_device)
3620
struct anv_device *device = container_of(vk_device, struct anv_device, vk);
3622
uint32_t active, pending;
3623
int ret = anv_gem_context_get_reset_stats(device->fd, device->context_id,
3626
/* We don't know the real error. */
3627
return vk_device_set_lost(&device->vk, "get_reset_stats failed: %m");
3631
return vk_device_set_lost(&device->vk, "GPU hung on one of our command buffers");
3632
} else if (pending) {
3633
return vk_device_set_lost(&device->vk, "GPU hung with commands in-flight");
3640
anv_device_wait(struct anv_device *device, struct anv_bo *bo,
3643
int ret = anv_gem_wait(device, bo->gem_handle, &timeout);
3644
if (ret == -1 && errno == ETIME) {
3646
} else if (ret == -1) {
3647
/* We don't know the real error. */
3648
return vk_device_set_lost(&device->vk, "gem wait failed: %m");
3655
anv_vma_alloc(struct anv_device *device,
3656
uint64_t size, uint64_t align,
3657
enum anv_bo_alloc_flags alloc_flags,
3658
uint64_t client_address)
3660
pthread_mutex_lock(&device->vma_mutex);
3664
if (alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) {
3665
if (client_address) {
3666
if (util_vma_heap_alloc_addr(&device->vma_cva,
3667
client_address, size)) {
3668
addr = client_address;
3671
addr = util_vma_heap_alloc(&device->vma_cva, size, align);
3673
/* We don't want to fall back to other heaps */
3677
assert(client_address == 0);
3679
if (!(alloc_flags & ANV_BO_ALLOC_32BIT_ADDRESS))
3680
addr = util_vma_heap_alloc(&device->vma_hi, size, align);
3683
addr = util_vma_heap_alloc(&device->vma_lo, size, align);
3686
pthread_mutex_unlock(&device->vma_mutex);
3688
assert(addr == intel_48b_address(addr));
3689
return intel_canonical_address(addr);
3693
anv_vma_free(struct anv_device *device,
3694
uint64_t address, uint64_t size)
3696
const uint64_t addr_48b = intel_48b_address(address);
3698
pthread_mutex_lock(&device->vma_mutex);
3700
if (addr_48b >= LOW_HEAP_MIN_ADDRESS &&
3701
addr_48b <= LOW_HEAP_MAX_ADDRESS) {
3702
util_vma_heap_free(&device->vma_lo, addr_48b, size);
3703
} else if (addr_48b >= CLIENT_VISIBLE_HEAP_MIN_ADDRESS &&
3704
addr_48b <= CLIENT_VISIBLE_HEAP_MAX_ADDRESS) {
3705
util_vma_heap_free(&device->vma_cva, addr_48b, size);
3707
assert(addr_48b >= HIGH_HEAP_MIN_ADDRESS);
3708
util_vma_heap_free(&device->vma_hi, addr_48b, size);
3711
pthread_mutex_unlock(&device->vma_mutex);
3714
VkResult anv_AllocateMemory(
3716
const VkMemoryAllocateInfo* pAllocateInfo,
3717
const VkAllocationCallbacks* pAllocator,
3718
VkDeviceMemory* pMem)
3720
ANV_FROM_HANDLE(anv_device, device, _device);
3721
struct anv_physical_device *pdevice = device->physical;
3722
struct anv_device_memory *mem;
3723
VkResult result = VK_SUCCESS;
3725
assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
3727
/* The Vulkan 1.0.33 spec says "allocationSize must be greater than 0". */
3728
assert(pAllocateInfo->allocationSize > 0);
3730
VkDeviceSize aligned_alloc_size =
3731
align_u64(pAllocateInfo->allocationSize, 4096);
3733
if (aligned_alloc_size > MAX_MEMORY_ALLOCATION_SIZE)
3734
return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3736
assert(pAllocateInfo->memoryTypeIndex < pdevice->memory.type_count);
3737
struct anv_memory_type *mem_type =
3738
&pdevice->memory.types[pAllocateInfo->memoryTypeIndex];
3739
assert(mem_type->heapIndex < pdevice->memory.heap_count);
3740
struct anv_memory_heap *mem_heap =
3741
&pdevice->memory.heaps[mem_type->heapIndex];
3743
uint64_t mem_heap_used = p_atomic_read(&mem_heap->used);
3744
if (mem_heap_used + aligned_alloc_size > mem_heap->size)
3745
return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3747
mem = vk_object_alloc(&device->vk, pAllocator, sizeof(*mem),
3748
VK_OBJECT_TYPE_DEVICE_MEMORY);
3750
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3752
mem->type = mem_type;
3757
mem->host_ptr = NULL;
3759
enum anv_bo_alloc_flags alloc_flags = 0;
3761
const VkExportMemoryAllocateInfo *export_info = NULL;
3762
const VkImportAndroidHardwareBufferInfoANDROID *ahw_import_info = NULL;
3763
const VkImportMemoryFdInfoKHR *fd_info = NULL;
3764
const VkImportMemoryHostPointerInfoEXT *host_ptr_info = NULL;
3765
const VkMemoryDedicatedAllocateInfo *dedicated_info = NULL;
3766
VkMemoryAllocateFlags vk_flags = 0;
3767
uint64_t client_address = 0;
3769
vk_foreach_struct_const(ext, pAllocateInfo->pNext) {
3770
switch (ext->sType) {
3771
case VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO:
3772
export_info = (void *)ext;
3775
case VK_STRUCTURE_TYPE_IMPORT_ANDROID_HARDWARE_BUFFER_INFO_ANDROID:
3776
ahw_import_info = (void *)ext;
3779
case VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR:
3780
fd_info = (void *)ext;
3783
case VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT:
3784
host_ptr_info = (void *)ext;
3787
case VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO: {
3788
const VkMemoryAllocateFlagsInfo *flags_info = (void *)ext;
3789
vk_flags = flags_info->flags;
3793
case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO:
3794
dedicated_info = (void *)ext;
3797
case VK_STRUCTURE_TYPE_MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO_KHR: {
3798
const VkMemoryOpaqueCaptureAddressAllocateInfoKHR *addr_info =
3799
(const VkMemoryOpaqueCaptureAddressAllocateInfoKHR *)ext;
3800
client_address = addr_info->opaqueCaptureAddress;
3805
if (ext->sType != VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA)
3806
/* this isn't a real enum value,
3807
* so use conditional to avoid compiler warn
3809
anv_debug_ignored_stype(ext->sType);
3814
/* By default, we want all VkDeviceMemory objects to support CCS */
3815
if (device->physical->has_implicit_ccs && device->info.has_aux_map)
3816
alloc_flags |= ANV_BO_ALLOC_IMPLICIT_CCS;
3818
if (vk_flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR)
3819
alloc_flags |= ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS;
3821
if ((export_info && export_info->handleTypes) ||
3822
(fd_info && fd_info->handleType) ||
3823
(host_ptr_info && host_ptr_info->handleType)) {
3824
/* Anything imported or exported is EXTERNAL */
3825
alloc_flags |= ANV_BO_ALLOC_EXTERNAL;
3828
/* Check if we need to support Android HW buffer export. If so,
3829
* create AHardwareBuffer and import memory from it.
3831
bool android_export = false;
3832
if (export_info && export_info->handleTypes &
3833
VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID)
3834
android_export = true;
3836
if (ahw_import_info) {
3837
result = anv_import_ahw_memory(_device, mem, ahw_import_info);
3838
if (result != VK_SUCCESS)
3842
} else if (android_export) {
3843
result = anv_create_ahw_memory(_device, mem, pAllocateInfo);
3844
if (result != VK_SUCCESS)
3850
/* The Vulkan spec permits handleType to be 0, in which case the struct is
3853
if (fd_info && fd_info->handleType) {
3854
/* At the moment, we support only the below handle types. */
3855
assert(fd_info->handleType ==
3856
VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
3857
fd_info->handleType ==
3858
VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3860
result = anv_device_import_bo(device, fd_info->fd, alloc_flags,
3861
client_address, &mem->bo);
3862
if (result != VK_SUCCESS)
3865
/* For security purposes, we reject importing the bo if it's smaller
3866
* than the requested allocation size. This prevents a malicious client
3867
* from passing a buffer to a trusted client, lying about the size, and
3868
* telling the trusted client to try and texture from an image that goes
3869
* out-of-bounds. This sort of thing could lead to GPU hangs or worse
3870
* in the trusted client. The trusted client can protect itself against
3871
* this sort of attack but only if it can trust the buffer size.
3873
if (mem->bo->size < aligned_alloc_size) {
3874
result = vk_errorf(device, VK_ERROR_INVALID_EXTERNAL_HANDLE,
3875
"aligned allocationSize too large for "
3876
"VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT: "
3877
"%"PRIu64"B > %"PRIu64"B",
3878
aligned_alloc_size, mem->bo->size);
3879
anv_device_release_bo(device, mem->bo);
3883
/* From the Vulkan spec:
3885
* "Importing memory from a file descriptor transfers ownership of
3886
* the file descriptor from the application to the Vulkan
3887
* implementation. The application must not perform any operations on
3888
* the file descriptor after a successful import."
3890
* If the import fails, we leave the file descriptor open.
3896
if (host_ptr_info && host_ptr_info->handleType) {
3897
if (host_ptr_info->handleType ==
3898
VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_MAPPED_FOREIGN_MEMORY_BIT_EXT) {
3899
result = vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
3903
assert(host_ptr_info->handleType ==
3904
VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
3906
result = anv_device_import_bo_from_host_ptr(device,
3907
host_ptr_info->pHostPointer,
3908
pAllocateInfo->allocationSize,
3912
if (result != VK_SUCCESS)
3915
mem->host_ptr = host_ptr_info->pHostPointer;
3919
/* Set ALLOC_LOCAL_MEM flag if heap has device local bit set and requested
3920
* memory property flag has DEVICE_LOCAL_BIT set.
3922
if (mem_type->propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
3923
alloc_flags |= ANV_BO_ALLOC_LOCAL_MEM;
3925
/* Regular allocate (not importing memory). */
3927
result = anv_device_alloc_bo(device, "user", pAllocateInfo->allocationSize,
3928
alloc_flags, client_address, &mem->bo);
3929
if (result != VK_SUCCESS)
3932
if (dedicated_info && dedicated_info->image != VK_NULL_HANDLE) {
3933
ANV_FROM_HANDLE(anv_image, image, dedicated_info->image);
3935
/* Some legacy (non-modifiers) consumers need the tiling to be set on
3936
* the BO. In this case, we have a dedicated allocation.
3938
if (image->vk.wsi_legacy_scanout) {
3939
const struct isl_surf *surf = &image->planes[0].primary_surface.isl;
3940
result = anv_device_set_bo_tiling(device, mem->bo,
3943
if (result != VK_SUCCESS) {
3944
anv_device_release_bo(device, mem->bo);
3951
mem_heap_used = p_atomic_add_return(&mem_heap->used, mem->bo->size);
3952
if (mem_heap_used > mem_heap->size) {
3953
p_atomic_add(&mem_heap->used, -mem->bo->size);
3954
anv_device_release_bo(device, mem->bo);
3955
result = vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY,
3956
"Out of heap memory");
3960
pthread_mutex_lock(&device->mutex);
3961
list_addtail(&mem->link, &device->memory_objects);
3962
pthread_mutex_unlock(&device->mutex);
3964
*pMem = anv_device_memory_to_handle(mem);
3969
vk_object_free(&device->vk, pAllocator, mem);
3974
VkResult anv_GetMemoryFdKHR(
3976
const VkMemoryGetFdInfoKHR* pGetFdInfo,
3979
ANV_FROM_HANDLE(anv_device, dev, device_h);
3980
ANV_FROM_HANDLE(anv_device_memory, mem, pGetFdInfo->memory);
3982
assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
3984
assert(pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
3985
pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3987
return anv_device_export_bo(dev, mem->bo, pFd);
3990
VkResult anv_GetMemoryFdPropertiesKHR(
3992
VkExternalMemoryHandleTypeFlagBits handleType,
3994
VkMemoryFdPropertiesKHR* pMemoryFdProperties)
3996
ANV_FROM_HANDLE(anv_device, device, _device);
3998
switch (handleType) {
3999
case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
4000
/* dma-buf can be imported as any memory type */
4001
pMemoryFdProperties->memoryTypeBits =
4002
(1 << device->physical->memory.type_count) - 1;
4006
/* The valid usage section for this function says:
4008
* "handleType must not be one of the handle types defined as
4011
* So opaque handle types fall into the default "unsupported" case.
4013
return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
4017
VkResult anv_GetMemoryHostPointerPropertiesEXT(
4019
VkExternalMemoryHandleTypeFlagBits handleType,
4020
const void* pHostPointer,
4021
VkMemoryHostPointerPropertiesEXT* pMemoryHostPointerProperties)
4023
ANV_FROM_HANDLE(anv_device, device, _device);
4025
assert(pMemoryHostPointerProperties->sType ==
4026
VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT);
4028
switch (handleType) {
4029
case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT:
4030
/* Host memory can be imported as any memory type. */
4031
pMemoryHostPointerProperties->memoryTypeBits =
4032
(1ull << device->physical->memory.type_count) - 1;
4037
return VK_ERROR_INVALID_EXTERNAL_HANDLE;
4041
void anv_FreeMemory(
4043
VkDeviceMemory _mem,
4044
const VkAllocationCallbacks* pAllocator)
4046
ANV_FROM_HANDLE(anv_device, device, _device);
4047
ANV_FROM_HANDLE(anv_device_memory, mem, _mem);
4052
pthread_mutex_lock(&device->mutex);
4053
list_del(&mem->link);
4054
pthread_mutex_unlock(&device->mutex);
4057
anv_UnmapMemory(_device, _mem);
4059
p_atomic_add(&device->physical->memory.heaps[mem->type->heapIndex].used,
4062
anv_device_release_bo(device, mem->bo);
4064
#if defined(ANDROID) && ANDROID_API_LEVEL >= 26
4066
AHardwareBuffer_release(mem->ahw);
4069
vk_object_free(&device->vk, pAllocator, mem);
4072
VkResult anv_MapMemory(
4074
VkDeviceMemory _memory,
4075
VkDeviceSize offset,
4077
VkMemoryMapFlags flags,
4080
ANV_FROM_HANDLE(anv_device, device, _device);
4081
ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
4088
if (mem->host_ptr) {
4089
*ppData = mem->host_ptr + offset;
4093
if (size == VK_WHOLE_SIZE)
4094
size = mem->bo->size - offset;
4096
/* From the Vulkan spec version 1.0.32 docs for MapMemory:
4098
* * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0
4099
* assert(size != 0);
4100
* * If size is not equal to VK_WHOLE_SIZE, size must be less than or
4101
* equal to the size of the memory minus offset
4104
assert(offset + size <= mem->bo->size);
4106
if (size != (size_t)size) {
4107
return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
4108
"requested size 0x%"PRIx64" does not fit in %u bits",
4109
size, (unsigned)(sizeof(size_t) * 8));
4112
/* From the Vulkan 1.2.194 spec:
4114
* "memory must not be currently host mapped"
4116
if (mem->map != NULL) {
4117
return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
4118
"Memory object already mapped.");
4121
uint32_t gem_flags = 0;
4123
if (!device->info.has_llc &&
4124
(mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
4125
gem_flags |= I915_MMAP_WC;
4127
/* GEM will fail to map if the offset isn't 4k-aligned. Round down. */
4128
uint64_t map_offset;
4129
if (!device->physical->has_mmap_offset)
4130
map_offset = offset & ~4095ull;
4133
assert(offset >= map_offset);
4134
uint64_t map_size = (offset + size) - map_offset;
4136
/* Let's map whole pages */
4137
map_size = align_u64(map_size, 4096);
4140
VkResult result = anv_device_map_bo(device, mem->bo, map_offset,
4141
map_size, gem_flags, &map);
4142
if (result != VK_SUCCESS)
4146
mem->map_size = map_size;
4147
mem->map_delta = (offset - map_offset);
4148
*ppData = mem->map + mem->map_delta;
4153
void anv_UnmapMemory(
4155
VkDeviceMemory _memory)
4157
ANV_FROM_HANDLE(anv_device, device, _device);
4158
ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
4160
if (mem == NULL || mem->host_ptr)
4163
anv_device_unmap_bo(device, mem->bo, mem->map, mem->map_size);
4171
clflush_mapped_ranges(struct anv_device *device,
4173
const VkMappedMemoryRange *ranges)
4175
for (uint32_t i = 0; i < count; i++) {
4176
ANV_FROM_HANDLE(anv_device_memory, mem, ranges[i].memory);
4177
uint64_t map_offset = ranges[i].offset + mem->map_delta;
4178
if (map_offset >= mem->map_size)
4181
if (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
4184
intel_clflush_range(mem->map + map_offset,
4185
MIN2(ranges[i].size, mem->map_size - map_offset));
4189
VkResult anv_FlushMappedMemoryRanges(
4191
uint32_t memoryRangeCount,
4192
const VkMappedMemoryRange* pMemoryRanges)
4194
ANV_FROM_HANDLE(anv_device, device, _device);
4196
if (!device->physical->memory.need_clflush)
4199
/* Make sure the writes we're flushing have landed. */
4200
__builtin_ia32_mfence();
4202
clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges);
4207
VkResult anv_InvalidateMappedMemoryRanges(
4209
uint32_t memoryRangeCount,
4210
const VkMappedMemoryRange* pMemoryRanges)
4212
ANV_FROM_HANDLE(anv_device, device, _device);
4214
if (!device->physical->memory.need_clflush)
4217
clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges);
4219
/* Make sure no reads get moved up above the invalidate. */
4220
__builtin_ia32_mfence();
4225
void anv_GetDeviceMemoryCommitment(
4227
VkDeviceMemory memory,
4228
VkDeviceSize* pCommittedMemoryInBytes)
4230
*pCommittedMemoryInBytes = 0;
4234
anv_bind_buffer_memory(const VkBindBufferMemoryInfo *pBindInfo)
4236
ANV_FROM_HANDLE(anv_device_memory, mem, pBindInfo->memory);
4237
ANV_FROM_HANDLE(anv_buffer, buffer, pBindInfo->buffer);
4239
assert(pBindInfo->sType == VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO);
4242
assert(pBindInfo->memoryOffset < mem->bo->size);
4243
assert(mem->bo->size - pBindInfo->memoryOffset >= buffer->size);
4244
buffer->address = (struct anv_address) {
4246
.offset = pBindInfo->memoryOffset,
4249
buffer->address = ANV_NULL_ADDRESS;
4253
VkResult anv_BindBufferMemory2(
4255
uint32_t bindInfoCount,
4256
const VkBindBufferMemoryInfo* pBindInfos)
4258
for (uint32_t i = 0; i < bindInfoCount; i++)
4259
anv_bind_buffer_memory(&pBindInfos[i]);
4264
VkResult anv_QueueBindSparse(
4266
uint32_t bindInfoCount,
4267
const VkBindSparseInfo* pBindInfo,
4270
ANV_FROM_HANDLE(anv_queue, queue, _queue);
4271
if (vk_device_is_lost(&queue->device->vk))
4272
return VK_ERROR_DEVICE_LOST;
4274
return vk_error(queue, VK_ERROR_FEATURE_NOT_PRESENT);
4279
VkResult anv_CreateEvent(
4281
const VkEventCreateInfo* pCreateInfo,
4282
const VkAllocationCallbacks* pAllocator,
4285
ANV_FROM_HANDLE(anv_device, device, _device);
4286
struct anv_event *event;
4288
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_EVENT_CREATE_INFO);
4290
event = vk_object_alloc(&device->vk, pAllocator, sizeof(*event),
4291
VK_OBJECT_TYPE_EVENT);
4293
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
4295
event->state = anv_state_pool_alloc(&device->dynamic_state_pool,
4296
sizeof(uint64_t), 8);
4297
*(uint64_t *)event->state.map = VK_EVENT_RESET;
4299
*pEvent = anv_event_to_handle(event);
4304
void anv_DestroyEvent(
4307
const VkAllocationCallbacks* pAllocator)
4309
ANV_FROM_HANDLE(anv_device, device, _device);
4310
ANV_FROM_HANDLE(anv_event, event, _event);
4315
anv_state_pool_free(&device->dynamic_state_pool, event->state);
4317
vk_object_free(&device->vk, pAllocator, event);
4320
VkResult anv_GetEventStatus(
4324
ANV_FROM_HANDLE(anv_device, device, _device);
4325
ANV_FROM_HANDLE(anv_event, event, _event);
4327
if (vk_device_is_lost(&device->vk))
4328
return VK_ERROR_DEVICE_LOST;
4330
return *(uint64_t *)event->state.map;
4333
VkResult anv_SetEvent(
4337
ANV_FROM_HANDLE(anv_event, event, _event);
4339
*(uint64_t *)event->state.map = VK_EVENT_SET;
4344
VkResult anv_ResetEvent(
4348
ANV_FROM_HANDLE(anv_event, event, _event);
4350
*(uint64_t *)event->state.map = VK_EVENT_RESET;
4358
anv_get_buffer_memory_requirements(struct anv_device *device,
4360
VkBufferUsageFlags usage,
4361
VkMemoryRequirements2* pMemoryRequirements)
4363
/* The Vulkan spec (git aaed022) says:
4365
* memoryTypeBits is a bitfield and contains one bit set for every
4366
* supported memory type for the resource. The bit `1<<i` is set if and
4367
* only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
4368
* structure for the physical device is supported.
4370
uint32_t memory_types = (1ull << device->physical->memory.type_count) - 1;
4372
/* Base alignment requirement of a cache line */
4373
uint32_t alignment = 16;
4375
if (usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT)
4376
alignment = MAX2(alignment, ANV_UBO_ALIGNMENT);
4378
pMemoryRequirements->memoryRequirements.size = size;
4379
pMemoryRequirements->memoryRequirements.alignment = alignment;
4381
/* Storage and Uniform buffers should have their size aligned to
4382
* 32-bits to avoid boundary checks when last DWord is not complete.
4383
* This would ensure that not internal padding would be needed for
4386
if (device->robust_buffer_access &&
4387
(usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT ||
4388
usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT))
4389
pMemoryRequirements->memoryRequirements.size = align_u64(size, 4);
4391
pMemoryRequirements->memoryRequirements.memoryTypeBits = memory_types;
4393
vk_foreach_struct(ext, pMemoryRequirements->pNext) {
4394
switch (ext->sType) {
4395
case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
4396
VkMemoryDedicatedRequirements *requirements = (void *)ext;
4397
requirements->prefersDedicatedAllocation = false;
4398
requirements->requiresDedicatedAllocation = false;
4403
anv_debug_ignored_stype(ext->sType);
4409
void anv_GetBufferMemoryRequirements2(
4411
const VkBufferMemoryRequirementsInfo2* pInfo,
4412
VkMemoryRequirements2* pMemoryRequirements)
4414
ANV_FROM_HANDLE(anv_device, device, _device);
4415
ANV_FROM_HANDLE(anv_buffer, buffer, pInfo->buffer);
4417
anv_get_buffer_memory_requirements(device,
4420
pMemoryRequirements);
4423
void anv_GetDeviceBufferMemoryRequirementsKHR(
4425
const VkDeviceBufferMemoryRequirementsKHR* pInfo,
4426
VkMemoryRequirements2* pMemoryRequirements)
4428
ANV_FROM_HANDLE(anv_device, device, _device);
4430
anv_get_buffer_memory_requirements(device,
4431
pInfo->pCreateInfo->size,
4432
pInfo->pCreateInfo->usage,
4433
pMemoryRequirements);
4436
VkResult anv_CreateBuffer(
4438
const VkBufferCreateInfo* pCreateInfo,
4439
const VkAllocationCallbacks* pAllocator,
4442
ANV_FROM_HANDLE(anv_device, device, _device);
4443
struct anv_buffer *buffer;
4445
/* Don't allow creating buffers bigger than our address space. The real
4446
* issue here is that we may align up the buffer size and we don't want
4447
* doing so to cause roll-over. However, no one has any business
4448
* allocating a buffer larger than our GTT size.
4450
if (pCreateInfo->size > device->physical->gtt_size)
4451
return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
4453
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
4455
buffer = vk_object_alloc(&device->vk, pAllocator, sizeof(*buffer),
4456
VK_OBJECT_TYPE_BUFFER);
4458
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
4460
buffer->create_flags = pCreateInfo->flags;
4461
buffer->size = pCreateInfo->size;
4462
buffer->usage = pCreateInfo->usage;
4463
buffer->address = ANV_NULL_ADDRESS;
4465
*pBuffer = anv_buffer_to_handle(buffer);
4470
void anv_DestroyBuffer(
4473
const VkAllocationCallbacks* pAllocator)
4475
ANV_FROM_HANDLE(anv_device, device, _device);
4476
ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
4481
vk_object_free(&device->vk, pAllocator, buffer);
4484
VkDeviceAddress anv_GetBufferDeviceAddress(
4486
const VkBufferDeviceAddressInfoKHR* pInfo)
4488
ANV_FROM_HANDLE(anv_buffer, buffer, pInfo->buffer);
4490
assert(!anv_address_is_null(buffer->address));
4491
assert(anv_bo_is_pinned(buffer->address.bo));
4493
return anv_address_physical(buffer->address);
4496
uint64_t anv_GetBufferOpaqueCaptureAddress(
4498
const VkBufferDeviceAddressInfoKHR* pInfo)
4503
uint64_t anv_GetDeviceMemoryOpaqueCaptureAddress(
4505
const VkDeviceMemoryOpaqueCaptureAddressInfoKHR* pInfo)
4507
ANV_FROM_HANDLE(anv_device_memory, memory, pInfo->memory);
4509
assert(anv_bo_is_pinned(memory->bo));
4510
assert(memory->bo->has_client_visible_address);
4512
return intel_48b_address(memory->bo->offset);
4516
anv_fill_buffer_surface_state(struct anv_device *device, struct anv_state state,
4517
enum isl_format format,
4518
isl_surf_usage_flags_t usage,
4519
struct anv_address address,
4520
uint32_t range, uint32_t stride)
4522
isl_buffer_fill_state(&device->isl_dev, state.map,
4523
.address = anv_address_physical(address),
4524
.mocs = isl_mocs(&device->isl_dev, usage,
4525
address.bo && address.bo->is_external),
4528
.swizzle = ISL_SWIZZLE_IDENTITY,
4529
.stride_B = stride);
4532
void anv_DestroySampler(
4535
const VkAllocationCallbacks* pAllocator)
4537
ANV_FROM_HANDLE(anv_device, device, _device);
4538
ANV_FROM_HANDLE(anv_sampler, sampler, _sampler);
4543
if (sampler->bindless_state.map) {
4544
anv_state_pool_free(&device->dynamic_state_pool,
4545
sampler->bindless_state);
4548
if (sampler->custom_border_color.map) {
4549
anv_state_reserved_pool_free(&device->custom_border_colors,
4550
sampler->custom_border_color);
4553
vk_object_free(&device->vk, pAllocator, sampler);
4556
static const VkTimeDomainEXT anv_time_domains[] = {
4557
VK_TIME_DOMAIN_DEVICE_EXT,
4558
VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
4559
#ifdef CLOCK_MONOTONIC_RAW
4560
VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
4564
VkResult anv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
4565
VkPhysicalDevice physicalDevice,
4566
uint32_t *pTimeDomainCount,
4567
VkTimeDomainEXT *pTimeDomains)
4570
VK_OUTARRAY_MAKE_TYPED(VkTimeDomainEXT, out, pTimeDomains, pTimeDomainCount);
4572
for (d = 0; d < ARRAY_SIZE(anv_time_domains); d++) {
4573
vk_outarray_append_typed(VkTimeDomainEXT, &out, i) {
4574
*i = anv_time_domains[d];
4578
return vk_outarray_status(&out);
4582
anv_clock_gettime(clockid_t clock_id)
4584
struct timespec current;
4587
ret = clock_gettime(clock_id, ¤t);
4588
#ifdef CLOCK_MONOTONIC_RAW
4589
if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
4590
ret = clock_gettime(CLOCK_MONOTONIC, ¤t);
4595
return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;
4598
VkResult anv_GetCalibratedTimestampsEXT(
4600
uint32_t timestampCount,
4601
const VkCalibratedTimestampInfoEXT *pTimestampInfos,
4602
uint64_t *pTimestamps,
4603
uint64_t *pMaxDeviation)
4605
ANV_FROM_HANDLE(anv_device, device, _device);
4606
uint64_t timestamp_frequency = device->info.timestamp_frequency;
4609
uint64_t begin, end;
4610
uint64_t max_clock_period = 0;
4612
#ifdef CLOCK_MONOTONIC_RAW
4613
begin = anv_clock_gettime(CLOCK_MONOTONIC_RAW);
4615
begin = anv_clock_gettime(CLOCK_MONOTONIC);
4618
for (d = 0; d < timestampCount; d++) {
4619
switch (pTimestampInfos[d].timeDomain) {
4620
case VK_TIME_DOMAIN_DEVICE_EXT:
4621
ret = anv_gem_reg_read(device->fd, TIMESTAMP | I915_REG_READ_8B_WA,
4625
return vk_device_set_lost(&device->vk, "Failed to read the "
4626
"TIMESTAMP register: %m");
4628
uint64_t device_period = DIV_ROUND_UP(1000000000, timestamp_frequency);
4629
max_clock_period = MAX2(max_clock_period, device_period);
4631
case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
4632
pTimestamps[d] = anv_clock_gettime(CLOCK_MONOTONIC);
4633
max_clock_period = MAX2(max_clock_period, 1);
4636
#ifdef CLOCK_MONOTONIC_RAW
4637
case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
4638
pTimestamps[d] = begin;
4647
#ifdef CLOCK_MONOTONIC_RAW
4648
end = anv_clock_gettime(CLOCK_MONOTONIC_RAW);
4650
end = anv_clock_gettime(CLOCK_MONOTONIC);
4654
* The maximum deviation is the sum of the interval over which we
4655
* perform the sampling and the maximum period of any sampled
4656
* clock. That's because the maximum skew between any two sampled
4657
* clock edges is when the sampled clock with the largest period is
4658
* sampled at the end of that period but right at the beginning of the
4659
* sampling interval and some other clock is sampled right at the
4660
* begining of its sampling period and right at the end of the
4661
* sampling interval. Let's assume the GPU has the longest clock
4662
* period and that the application is sampling GPU and monotonic:
4665
* w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
4666
* Raw -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
4670
* GPU -----_____-----_____-----_____-----_____
4673
* x y z 0 1 2 3 4 5 6 7 8 9 a b c
4674
* Monotonic -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
4676
* Interval <----------------->
4677
* Deviation <-------------------------->
4681
* m = read(monotonic) 2
4684
* We round the sample interval up by one tick to cover sampling error
4685
* in the interval clock
4688
uint64_t sample_interval = end - begin + 1;
4690
*pMaxDeviation = sample_interval + max_clock_period;
4695
void anv_GetPhysicalDeviceMultisamplePropertiesEXT(
4696
VkPhysicalDevice physicalDevice,
4697
VkSampleCountFlagBits samples,
4698
VkMultisamplePropertiesEXT* pMultisampleProperties)
4700
ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
4702
assert(pMultisampleProperties->sType ==
4703
VK_STRUCTURE_TYPE_MULTISAMPLE_PROPERTIES_EXT);
4705
VkExtent2D grid_size;
4706
if (samples & isl_device_get_sample_counts(&physical_device->isl_dev)) {
4707
grid_size.width = 1;
4708
grid_size.height = 1;
4710
grid_size.width = 0;
4711
grid_size.height = 0;
4713
pMultisampleProperties->maxSampleLocationGridSize = grid_size;
4715
vk_foreach_struct(ext, pMultisampleProperties->pNext)
4716
anv_debug_ignored_stype(ext->sType);
4719
/* vk_icd.h does not declare this function, so we declare it here to
4720
* suppress Wmissing-prototypes.
4722
PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
4723
vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion);
4725
PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
4726
vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion)
4728
/* For the full details on loader interface versioning, see
4729
* <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
4730
* What follows is a condensed summary, to help you navigate the large and
4731
* confusing official doc.
4733
* - Loader interface v0 is incompatible with later versions. We don't
4736
* - In loader interface v1:
4737
* - The first ICD entrypoint called by the loader is
4738
* vk_icdGetInstanceProcAddr(). The ICD must statically expose this
4740
* - The ICD must statically expose no other Vulkan symbol unless it is
4741
* linked with -Bsymbolic.
4742
* - Each dispatchable Vulkan handle created by the ICD must be
4743
* a pointer to a struct whose first member is VK_LOADER_DATA. The
4744
* ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
4745
* - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
4746
* vkDestroySurfaceKHR(). The ICD must be capable of working with
4747
* such loader-managed surfaces.
4749
* - Loader interface v2 differs from v1 in:
4750
* - The first ICD entrypoint called by the loader is
4751
* vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
4752
* statically expose this entrypoint.
4754
* - Loader interface v3 differs from v2 in:
4755
* - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
4756
* vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
4757
* because the loader no longer does so.
4759
* - Loader interface v4 differs from v3 in:
4760
* - The ICD must implement vk_icdGetPhysicalDeviceProcAddr().
4762
* - Loader interface v5 differs from v4 in:
4763
* - The ICD must support Vulkan API version 1.1 and must not return
4764
* VK_ERROR_INCOMPATIBLE_DRIVER from vkCreateInstance() unless a
4765
* Vulkan Loader with interface v4 or smaller is being used and the
4766
* application provides an API version that is greater than 1.0.
4768
*pSupportedVersion = MIN2(*pSupportedVersion, 5u);
4772
VkResult anv_GetPhysicalDeviceFragmentShadingRatesKHR(
4773
VkPhysicalDevice physicalDevice,
4774
uint32_t* pFragmentShadingRateCount,
4775
VkPhysicalDeviceFragmentShadingRateKHR* pFragmentShadingRates)
4777
ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
4778
VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceFragmentShadingRateKHR, out,
4779
pFragmentShadingRates, pFragmentShadingRateCount);
4781
#define append_rate(_samples, _width, _height) \
4783
vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out, __r) { \
4784
__r->sampleCounts = _samples; \
4785
__r->fragmentSize = (VkExtent2D) { \
4787
.height = _height, \
4792
VkSampleCountFlags sample_counts =
4793
isl_device_get_sample_counts(&physical_device->isl_dev);
4795
/* BSpec 47003: There are a number of restrictions on the sample count
4796
* based off the coarse pixel size.
4798
static const VkSampleCountFlags cp_size_sample_limits[] = {
4799
[1] = ISL_SAMPLE_COUNT_16_BIT | ISL_SAMPLE_COUNT_8_BIT |
4800
ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
4801
[2] = ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
4802
[4] = ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
4803
[8] = ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
4804
[16] = ISL_SAMPLE_COUNT_1_BIT,
4807
for (uint32_t x = 4; x >= 1; x /= 2) {
4808
for (uint32_t y = 4; y >= 1; y /= 2) {
4809
if (physical_device->info.has_coarse_pixel_primitive_and_cb) {
4811
* "CPsize 1x4 and 4x1 are not supported"
4813
if ((x == 1 && y == 4) || (x == 4 && y == 1))
4816
/* For size {1, 1}, the sample count must be ~0
4818
* 4x2 is also a specially case.
4820
if (x == 1 && y == 1)
4821
append_rate(~0, x, y);
4822
else if (x == 4 && y == 2)
4823
append_rate(ISL_SAMPLE_COUNT_1_BIT, x, y);
4825
append_rate(cp_size_sample_limits[x * y], x, y);
4827
/* For size {1, 1}, the sample count must be ~0 */
4828
if (x == 1 && y == 1)
4829
append_rate(~0, x, y);
4831
append_rate(sample_counts, x, y);
4838
return vk_outarray_status(&out);