~mmach/netext73/mesa-ryzen

« back to all changes in this revision

Viewing changes to src/intel/vulkan/anv_device.c

  • Committer: mmach
  • Date: 2023-11-02 21:31:35 UTC
  • Revision ID: netbit73@gmail.com-20231102213135-18d4tzh7tj0uz752
2023-11-02 22:11:57

Show diffs side-by-side

added added

removed removed

Lines of Context:
77
77
      DRI_CONF_ANV_GENERATED_INDIRECT_THRESHOLD(4)
78
78
      DRI_CONF_NO_16BIT(false)
79
79
      DRI_CONF_ANV_QUERY_CLEAR_WITH_BLORP_THRESHOLD(6)
 
80
      DRI_CONF_ANV_QUERY_COPY_WITH_SHADER_THRESHOLD(6)
 
81
      DRI_CONF_ANV_FORCE_INDIRECT_DESCRIPTORS(false)
80
82
   DRI_CONF_SECTION_END
81
83
 
82
84
   DRI_CONF_SECTION_DEBUG
83
85
      DRI_CONF_ALWAYS_FLUSH_CACHE(false)
84
86
      DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
 
87
      DRI_CONF_VK_WSI_FORCE_SWAPCHAIN_TO_CURRENT_EXTENT(false)
85
88
      DRI_CONF_LIMIT_TRIG_INPUT_RANGE(false)
86
89
      DRI_CONF_ANV_MESH_CONV_PRIM_ATTRS_TO_VERT_ATTRS(-2)
 
90
      DRI_CONF_FORCE_VK_VENDOR(0)
87
91
   DRI_CONF_SECTION_END
88
92
 
89
93
   DRI_CONF_SECTION_QUALITY
192
196
      (device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT) != 0;
193
197
 
194
198
   const bool rt_enabled = ANV_SUPPORT_RT && device->info.has_ray_tracing;
 
199
 
195
200
   const bool nv_mesh_shading_enabled =
196
201
      debug_get_bool_option("ANV_EXPERIMENTAL_NV_MESH_SHADER", false);
197
202
 
253
258
      .KHR_ray_query                         = rt_enabled,
254
259
      .KHR_ray_tracing_maintenance1          = rt_enabled,
255
260
      .KHR_ray_tracing_pipeline              = rt_enabled,
 
261
      .KHR_ray_tracing_position_fetch        = rt_enabled,
256
262
      .KHR_relaxed_block_layout              = true,
257
263
      .KHR_sampler_mirror_clamp_to_edge      = true,
258
264
      .KHR_sampler_ycbcr_conversion          = true,
280
286
      .KHR_video_queue                       = device->video_decode_enabled,
281
287
      .KHR_video_decode_queue                = device->video_decode_enabled,
282
288
      .KHR_video_decode_h264                 = VIDEO_CODEC_H264DEC && device->video_decode_enabled,
 
289
      .KHR_video_decode_h265                 = VIDEO_CODEC_H265DEC && device->video_decode_enabled,
283
290
      .KHR_vulkan_memory_model               = true,
284
291
      .KHR_workgroup_memory_explicit_layout  = true,
285
292
      .KHR_zero_initialize_workgroup_memory  = true,
291
298
      .EXT_conditional_rendering             = true,
292
299
      .EXT_conservative_rasterization        = true,
293
300
      .EXT_custom_border_color               = true,
 
301
      .EXT_depth_bias_control                = true,
294
302
      .EXT_depth_clamp_zero_one              = true,
295
303
      .EXT_depth_clip_control                = true,
296
304
      .EXT_depth_clip_enable                 = true,
298
306
#ifdef VK_USE_PLATFORM_DISPLAY_KHR
299
307
      .EXT_display_control                   = true,
300
308
#endif
 
309
      .EXT_dynamic_rendering_unused_attachments = true,
301
310
      .EXT_extended_dynamic_state            = true,
302
311
      .EXT_extended_dynamic_state2           = true,
303
312
      .EXT_extended_dynamic_state3           = true,
308
317
                                               VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
309
318
      .EXT_global_priority_query             = device->max_context_priority >=
310
319
                                               VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
 
320
      .EXT_graphics_pipeline_library         = !debug_get_bool_option("ANV_NO_GPL", false),
311
321
      .EXT_host_query_reset                  = true,
312
322
      .EXT_image_2d_view_of_3d               = true,
313
323
      .EXT_image_robustness                  = true,
376
386
   };
377
387
}
378
388
 
 
389
static void
 
390
get_features(const struct anv_physical_device *pdevice,
 
391
             struct vk_features *features)
 
392
{
 
393
   struct vk_app_info *app_info = &pdevice->instance->vk.app_info;
 
394
 
 
395
   /* Just pick one; they're all the same */
 
396
   const bool has_astc_ldr =
 
397
      isl_format_supports_sampling(&pdevice->info,
 
398
                                   ISL_FORMAT_ASTC_LDR_2D_4X4_FLT16);
 
399
 
 
400
   const bool rt_enabled = ANV_SUPPORT_RT && pdevice->info.has_ray_tracing;
 
401
 
 
402
   const bool mesh_shader =
 
403
      pdevice->vk.supported_extensions.EXT_mesh_shader ||
 
404
      pdevice->vk.supported_extensions.NV_mesh_shader;
 
405
 
 
406
   *features = (struct vk_features) {
 
407
      /* Vulkan 1.0 */
 
408
      .robustBufferAccess                       = true,
 
409
      .fullDrawIndexUint32                      = true,
 
410
      .imageCubeArray                           = true,
 
411
      .independentBlend                         = true,
 
412
      .geometryShader                           = true,
 
413
      .tessellationShader                       = true,
 
414
      .sampleRateShading                        = true,
 
415
      .dualSrcBlend                             = true,
 
416
      .logicOp                                  = true,
 
417
      .multiDrawIndirect                        = true,
 
418
      .drawIndirectFirstInstance                = true,
 
419
      .depthClamp                               = true,
 
420
      .depthBiasClamp                           = true,
 
421
      .fillModeNonSolid                         = true,
 
422
      .depthBounds                              = pdevice->info.ver >= 12,
 
423
      .wideLines                                = true,
 
424
      .largePoints                              = true,
 
425
      .alphaToOne                               = true,
 
426
      .multiViewport                            = true,
 
427
      .samplerAnisotropy                        = true,
 
428
      .textureCompressionETC2                   = true,
 
429
      .textureCompressionASTC_LDR               = has_astc_ldr,
 
430
      .textureCompressionBC                     = true,
 
431
      .occlusionQueryPrecise                    = true,
 
432
      .pipelineStatisticsQuery                  = true,
 
433
      /* We can't do image stores in vec4 shaders */
 
434
      .vertexPipelineStoresAndAtomics =
 
435
         pdevice->compiler->scalar_stage[MESA_SHADER_VERTEX] &&
 
436
         pdevice->compiler->scalar_stage[MESA_SHADER_GEOMETRY],
 
437
      .fragmentStoresAndAtomics                 = true,
 
438
      .shaderTessellationAndGeometryPointSize   = true,
 
439
      .shaderImageGatherExtended                = true,
 
440
      .shaderStorageImageExtendedFormats        = true,
 
441
      .shaderStorageImageMultisample            = false,
 
442
      /* Gfx12.5 has all the required format supported in HW for typed
 
443
       * read/writes
 
444
       */
 
445
      .shaderStorageImageReadWithoutFormat      = pdevice->info.verx10 >= 125,
 
446
      .shaderStorageImageWriteWithoutFormat     = true,
 
447
      .shaderUniformBufferArrayDynamicIndexing  = true,
 
448
      .shaderSampledImageArrayDynamicIndexing   = true,
 
449
      .shaderStorageBufferArrayDynamicIndexing  = true,
 
450
      .shaderStorageImageArrayDynamicIndexing   = true,
 
451
      .shaderClipDistance                       = true,
 
452
      .shaderCullDistance                       = true,
 
453
      .shaderFloat64                            = pdevice->info.has_64bit_float,
 
454
      .shaderInt64                              = true,
 
455
      .shaderInt16                              = true,
 
456
      .shaderResourceMinLod                     = true,
 
457
      .variableMultisampleRate                  = true,
 
458
      .inheritedQueries                         = true,
 
459
 
 
460
      /* Vulkan 1.1 */
 
461
      .storageBuffer16BitAccess            = !pdevice->instance->no_16bit,
 
462
      .uniformAndStorageBuffer16BitAccess  = !pdevice->instance->no_16bit,
 
463
      .storagePushConstant16               = true,
 
464
      .storageInputOutput16                = false,
 
465
      .multiview                           = true,
 
466
      .multiviewGeometryShader             = true,
 
467
      .multiviewTessellationShader         = true,
 
468
      .variablePointersStorageBuffer       = true,
 
469
      .variablePointers                    = true,
 
470
      .protectedMemory                     = false,
 
471
      .samplerYcbcrConversion              = true,
 
472
      .shaderDrawParameters                = true,
 
473
 
 
474
      /* Vulkan 1.2 */
 
475
      .samplerMirrorClampToEdge            = true,
 
476
      .drawIndirectCount                   = true,
 
477
      .storageBuffer8BitAccess             = true,
 
478
      .uniformAndStorageBuffer8BitAccess   = true,
 
479
      .storagePushConstant8                = true,
 
480
      .shaderBufferInt64Atomics            = true,
 
481
      .shaderSharedInt64Atomics            = false,
 
482
      .shaderFloat16                       = !pdevice->instance->no_16bit,
 
483
      .shaderInt8                          = !pdevice->instance->no_16bit,
 
484
 
 
485
      .descriptorIndexing                                 = true,
 
486
      .shaderInputAttachmentArrayDynamicIndexing          = false,
 
487
      .shaderUniformTexelBufferArrayDynamicIndexing       = true,
 
488
      .shaderStorageTexelBufferArrayDynamicIndexing       = true,
 
489
      .shaderUniformBufferArrayNonUniformIndexing         = true,
 
490
      .shaderSampledImageArrayNonUniformIndexing          = true,
 
491
      .shaderStorageBufferArrayNonUniformIndexing         = true,
 
492
      .shaderStorageImageArrayNonUniformIndexing          = true,
 
493
      .shaderInputAttachmentArrayNonUniformIndexing       = false,
 
494
      .shaderUniformTexelBufferArrayNonUniformIndexing    = true,
 
495
      .shaderStorageTexelBufferArrayNonUniformIndexing    = true,
 
496
      .descriptorBindingUniformBufferUpdateAfterBind      = true,
 
497
      .descriptorBindingSampledImageUpdateAfterBind       = true,
 
498
      .descriptorBindingStorageImageUpdateAfterBind       = true,
 
499
      .descriptorBindingStorageBufferUpdateAfterBind      = true,
 
500
      .descriptorBindingUniformTexelBufferUpdateAfterBind = true,
 
501
      .descriptorBindingStorageTexelBufferUpdateAfterBind = true,
 
502
      .descriptorBindingUpdateUnusedWhilePending          = true,
 
503
      .descriptorBindingPartiallyBound                    = true,
 
504
      .descriptorBindingVariableDescriptorCount           = true,
 
505
      .runtimeDescriptorArray                             = true,
 
506
 
 
507
      .samplerFilterMinmax                 = true,
 
508
      .scalarBlockLayout                   = true,
 
509
      .imagelessFramebuffer                = true,
 
510
      .uniformBufferStandardLayout         = true,
 
511
      .shaderSubgroupExtendedTypes         = true,
 
512
      .separateDepthStencilLayouts         = true,
 
513
      .hostQueryReset                      = true,
 
514
      .timelineSemaphore                   = true,
 
515
      .bufferDeviceAddress                 = true,
 
516
      .bufferDeviceAddressCaptureReplay    = true,
 
517
      .bufferDeviceAddressMultiDevice      = false,
 
518
      .vulkanMemoryModel                   = true,
 
519
      .vulkanMemoryModelDeviceScope        = true,
 
520
      .vulkanMemoryModelAvailabilityVisibilityChains = true,
 
521
      .shaderOutputViewportIndex           = true,
 
522
      .shaderOutputLayer                   = true,
 
523
      .subgroupBroadcastDynamicId          = true,
 
524
 
 
525
      /* Vulkan 1.3 */
 
526
      .robustImageAccess = true,
 
527
      .inlineUniformBlock = true,
 
528
      .descriptorBindingInlineUniformBlockUpdateAfterBind = true,
 
529
      .pipelineCreationCacheControl = true,
 
530
      .privateData = true,
 
531
      .shaderDemoteToHelperInvocation = true,
 
532
      .shaderTerminateInvocation = true,
 
533
      .subgroupSizeControl = true,
 
534
      .computeFullSubgroups = true,
 
535
      .synchronization2 = true,
 
536
      .textureCompressionASTC_HDR = false,
 
537
      .shaderZeroInitializeWorkgroupMemory = true,
 
538
      .dynamicRendering = true,
 
539
      .shaderIntegerDotProduct = true,
 
540
      .maintenance4 = true,
 
541
 
 
542
      /* VK_EXT_4444_formats */
 
543
      .formatA4R4G4B4 = true,
 
544
      .formatA4B4G4R4 = false,
 
545
 
 
546
      /* VK_KHR_acceleration_structure */
 
547
      .accelerationStructure = rt_enabled,
 
548
      .accelerationStructureCaptureReplay = false, /* TODO */
 
549
      .accelerationStructureIndirectBuild = false, /* TODO */
 
550
      .accelerationStructureHostCommands = false,
 
551
      .descriptorBindingAccelerationStructureUpdateAfterBind = rt_enabled,
 
552
 
 
553
      /* VK_EXT_border_color_swizzle */
 
554
      .borderColorSwizzle = true,
 
555
      .borderColorSwizzleFromImage = true,
 
556
 
 
557
      /* VK_EXT_color_write_enable */
 
558
      .colorWriteEnable = true,
 
559
 
 
560
      /* VK_EXT_image_2d_view_of_3d  */
 
561
      .image2DViewOf3D = true,
 
562
      .sampler2DViewOf3D = true,
 
563
 
 
564
      /* VK_EXT_image_sliced_view_of_3d */
 
565
      .imageSlicedViewOf3D = true,
 
566
 
 
567
      /* VK_NV_compute_shader_derivatives */
 
568
      .computeDerivativeGroupQuads = true,
 
569
      .computeDerivativeGroupLinear = true,
 
570
 
 
571
      /* VK_EXT_conditional_rendering */
 
572
      .conditionalRendering = true,
 
573
      .inheritedConditionalRendering = true,
 
574
 
 
575
      /* VK_EXT_custom_border_color */
 
576
      .customBorderColors = true,
 
577
      .customBorderColorWithoutFormat = true,
 
578
 
 
579
      /* VK_EXT_depth_clamp_zero_one */
 
580
      .depthClampZeroOne = true,
 
581
 
 
582
      /* VK_EXT_depth_clip_enable */
 
583
      .depthClipEnable = true,
 
584
 
 
585
      /* VK_EXT_fragment_shader_interlock */
 
586
      .fragmentShaderSampleInterlock = true,
 
587
      .fragmentShaderPixelInterlock = true,
 
588
      .fragmentShaderShadingRateInterlock = false,
 
589
 
 
590
      /* VK_EXT_global_priority_query */
 
591
      .globalPriorityQuery = true,
 
592
 
 
593
      /* VK_EXT_graphics_pipeline_library */
 
594
      .graphicsPipelineLibrary =
 
595
         pdevice->vk.supported_extensions.EXT_graphics_pipeline_library,
 
596
 
 
597
      /* VK_KHR_fragment_shading_rate */
 
598
      .pipelineFragmentShadingRate = true,
 
599
      .primitiveFragmentShadingRate =
 
600
         pdevice->info.has_coarse_pixel_primitive_and_cb,
 
601
      .attachmentFragmentShadingRate =
 
602
         pdevice->info.has_coarse_pixel_primitive_and_cb,
 
603
 
 
604
      /* VK_EXT_image_view_min_lod */
 
605
      .minLod = true,
 
606
 
 
607
      /* VK_EXT_index_type_uint8 */
 
608
      .indexTypeUint8 = true,
 
609
 
 
610
      /* VK_EXT_line_rasterization */
 
611
      /* Rectangular lines must use the strict algorithm, which is not
 
612
       * supported for wide lines prior to ICL.  See rasterization_mode for
 
613
       * details and how the HW states are programmed.
 
614
       */
 
615
      .rectangularLines = pdevice->info.ver >= 10,
 
616
      .bresenhamLines = true,
 
617
      /* Support for Smooth lines with MSAA was removed on gfx11.  From the
 
618
       * BSpec section "Multisample ModesState" table for "AA Line Support
 
619
       * Requirements":
 
620
       *
 
621
       *    GFX10:BUG:########  NUM_MULTISAMPLES == 1
 
622
       *
 
623
       * Fortunately, this isn't a case most people care about.
 
624
       */
 
625
      .smoothLines = pdevice->info.ver < 10,
 
626
      .stippledRectangularLines = false,
 
627
      .stippledBresenhamLines = true,
 
628
      .stippledSmoothLines = false,
 
629
 
 
630
      /* VK_NV_mesh_shader */
 
631
      .taskShaderNV = mesh_shader,
 
632
      .meshShaderNV = mesh_shader,
 
633
 
 
634
      /* VK_EXT_mesh_shader */
 
635
      .taskShader = mesh_shader,
 
636
      .meshShader = mesh_shader,
 
637
      .multiviewMeshShader = false,
 
638
      .primitiveFragmentShadingRateMeshShader = mesh_shader,
 
639
      .meshShaderQueries = false,
 
640
 
 
641
      /* VK_EXT_mutable_descriptor_type */
 
642
      .mutableDescriptorType = true,
 
643
 
 
644
      /* VK_KHR_performance_query */
 
645
      .performanceCounterQueryPools = true,
 
646
      /* HW only supports a single configuration at a time. */
 
647
      .performanceCounterMultipleQueryPools = false,
 
648
 
 
649
      /* VK_KHR_pipeline_executable_properties */
 
650
      .pipelineExecutableInfo = true,
 
651
 
 
652
      /* VK_EXT_primitives_generated_query */
 
653
      .primitivesGeneratedQuery = true,
 
654
      .primitivesGeneratedQueryWithRasterizerDiscard = false,
 
655
      .primitivesGeneratedQueryWithNonZeroStreams = false,
 
656
 
 
657
      /* VK_EXT_pipeline_library_group_handles */
 
658
      .pipelineLibraryGroupHandles = true,
 
659
 
 
660
      /* VK_EXT_provoking_vertex */
 
661
      .provokingVertexLast = true,
 
662
      .transformFeedbackPreservesProvokingVertex = true,
 
663
 
 
664
      /* VK_KHR_ray_query */
 
665
      .rayQuery = rt_enabled,
 
666
 
 
667
      /* VK_KHR_ray_tracing_maintenance1 */
 
668
      .rayTracingMaintenance1 = rt_enabled,
 
669
      .rayTracingPipelineTraceRaysIndirect2 = rt_enabled,
 
670
 
 
671
      /* VK_KHR_ray_tracing_pipeline */
 
672
      .rayTracingPipeline = rt_enabled,
 
673
      .rayTracingPipelineShaderGroupHandleCaptureReplay = false,
 
674
      .rayTracingPipelineShaderGroupHandleCaptureReplayMixed = false,
 
675
      .rayTracingPipelineTraceRaysIndirect = rt_enabled,
 
676
      .rayTraversalPrimitiveCulling = rt_enabled,
 
677
 
 
678
      /* VK_EXT_robustness2 */
 
679
      .robustBufferAccess2 = true,
 
680
      .robustImageAccess2 = true,
 
681
      .nullDescriptor = true,
 
682
 
 
683
      /* VK_EXT_shader_atomic_float */
 
684
      .shaderBufferFloat32Atomics =    true,
 
685
      .shaderBufferFloat32AtomicAdd =  pdevice->info.has_lsc,
 
686
      .shaderBufferFloat64Atomics =
 
687
         pdevice->info.has_64bit_float && pdevice->info.has_lsc,
 
688
      .shaderBufferFloat64AtomicAdd =  false,
 
689
      .shaderSharedFloat32Atomics =    true,
 
690
      .shaderSharedFloat32AtomicAdd =  false,
 
691
      .shaderSharedFloat64Atomics =    false,
 
692
      .shaderSharedFloat64AtomicAdd =  false,
 
693
      .shaderImageFloat32Atomics =     true,
 
694
      .shaderImageFloat32AtomicAdd =   false,
 
695
      .sparseImageFloat32Atomics =     false,
 
696
      .sparseImageFloat32AtomicAdd =   false,
 
697
 
 
698
      /* VK_EXT_shader_atomic_float2 */
 
699
      .shaderBufferFloat16Atomics      = pdevice->info.has_lsc,
 
700
      .shaderBufferFloat16AtomicAdd    = false,
 
701
      .shaderBufferFloat16AtomicMinMax = pdevice->info.has_lsc,
 
702
      .shaderBufferFloat32AtomicMinMax = true,
 
703
      .shaderBufferFloat64AtomicMinMax =
 
704
         pdevice->info.has_64bit_float && pdevice->info.has_lsc,
 
705
      .shaderSharedFloat16Atomics      = pdevice->info.has_lsc,
 
706
      .shaderSharedFloat16AtomicAdd    = false,
 
707
      .shaderSharedFloat16AtomicMinMax = pdevice->info.has_lsc,
 
708
      .shaderSharedFloat32AtomicMinMax = true,
 
709
      .shaderSharedFloat64AtomicMinMax = false,
 
710
      .shaderImageFloat32AtomicMinMax  = false,
 
711
      .sparseImageFloat32AtomicMinMax  = false,
 
712
 
 
713
      /* VK_KHR_shader_clock */
 
714
      .shaderSubgroupClock = true,
 
715
      .shaderDeviceClock = false,
 
716
 
 
717
      /* VK_INTEL_shader_integer_functions2 */
 
718
      .shaderIntegerFunctions2 = true,
 
719
 
 
720
      /* VK_EXT_shader_module_identifier */
 
721
      .shaderModuleIdentifier = true,
 
722
 
 
723
      /* VK_KHR_shader_subgroup_uniform_control_flow */
 
724
      .shaderSubgroupUniformControlFlow = true,
 
725
 
 
726
      /* VK_EXT_texel_buffer_alignment */
 
727
      .texelBufferAlignment = true,
 
728
 
 
729
      /* VK_EXT_transform_feedback */
 
730
      .transformFeedback = true,
 
731
      .geometryStreams = true,
 
732
 
 
733
      /* VK_EXT_vertex_attribute_divisor */
 
734
      .vertexAttributeInstanceRateDivisor = true,
 
735
      .vertexAttributeInstanceRateZeroDivisor = true,
 
736
 
 
737
      /* VK_KHR_workgroup_memory_explicit_layout */
 
738
      .workgroupMemoryExplicitLayout = true,
 
739
      .workgroupMemoryExplicitLayoutScalarBlockLayout = true,
 
740
      .workgroupMemoryExplicitLayout8BitAccess = true,
 
741
      .workgroupMemoryExplicitLayout16BitAccess = true,
 
742
 
 
743
      /* VK_EXT_ycbcr_image_arrays */
 
744
      .ycbcrImageArrays = true,
 
745
 
 
746
      /* VK_EXT_extended_dynamic_state */
 
747
      .extendedDynamicState = true,
 
748
 
 
749
      /* VK_EXT_extended_dynamic_state2 */
 
750
      .extendedDynamicState2 = true,
 
751
      .extendedDynamicState2LogicOp = true,
 
752
      .extendedDynamicState2PatchControlPoints = true,
 
753
 
 
754
      /* VK_EXT_extended_dynamic_state3 */
 
755
      .extendedDynamicState3PolygonMode = true,
 
756
      .extendedDynamicState3TessellationDomainOrigin = true,
 
757
      .extendedDynamicState3RasterizationStream = true,
 
758
      .extendedDynamicState3LineStippleEnable = true,
 
759
      .extendedDynamicState3LineRasterizationMode = true,
 
760
      .extendedDynamicState3LogicOpEnable = true,
 
761
      .extendedDynamicState3AlphaToOneEnable = true,
 
762
      .extendedDynamicState3DepthClipEnable = true,
 
763
      .extendedDynamicState3DepthClampEnable = true,
 
764
      .extendedDynamicState3DepthClipNegativeOneToOne = true,
 
765
      .extendedDynamicState3ProvokingVertexMode = true,
 
766
      .extendedDynamicState3ColorBlendEnable = true,
 
767
      .extendedDynamicState3ColorWriteMask = true,
 
768
      .extendedDynamicState3ColorBlendEquation = true,
 
769
      .extendedDynamicState3SampleLocationsEnable = true,
 
770
      .extendedDynamicState3SampleMask = true,
 
771
 
 
772
      .extendedDynamicState3RasterizationSamples = false,
 
773
      .extendedDynamicState3AlphaToCoverageEnable = false,
 
774
      .extendedDynamicState3ConservativeRasterizationMode = false,
 
775
      .extendedDynamicState3ExtraPrimitiveOverestimationSize = false,
 
776
      .extendedDynamicState3ViewportWScalingEnable = false,
 
777
      .extendedDynamicState3ViewportSwizzle = false,
 
778
      .extendedDynamicState3ShadingRateImageEnable = false,
 
779
      .extendedDynamicState3CoverageToColorEnable = false,
 
780
      .extendedDynamicState3CoverageToColorLocation = false,
 
781
      .extendedDynamicState3CoverageModulationMode = false,
 
782
      .extendedDynamicState3CoverageModulationTableEnable = false,
 
783
      .extendedDynamicState3CoverageModulationTable = false,
 
784
      .extendedDynamicState3CoverageReductionMode = false,
 
785
      .extendedDynamicState3RepresentativeFragmentTestEnable = false,
 
786
      .extendedDynamicState3ColorBlendAdvanced = false,
 
787
 
 
788
      /* VK_EXT_multi_draw */
 
789
      .multiDraw = true,
 
790
 
 
791
      /* VK_EXT_non_seamless_cube_map */
 
792
      .nonSeamlessCubeMap = true,
 
793
 
 
794
      /* VK_EXT_primitive_topology_list_restart */
 
795
      .primitiveTopologyListRestart = true,
 
796
      .primitiveTopologyPatchListRestart = true,
 
797
 
 
798
      /* VK_EXT_depth_clip_control */
 
799
      .depthClipControl = true,
 
800
 
 
801
      /* VK_KHR_present_id */
 
802
      .presentId = pdevice->vk.supported_extensions.KHR_present_id,
 
803
 
 
804
      /* VK_KHR_present_wait */
 
805
      .presentWait = pdevice->vk.supported_extensions.KHR_present_wait,
 
806
 
 
807
      /* VK_EXT_vertex_input_dynamic_state */
 
808
      .vertexInputDynamicState = true,
 
809
 
 
810
      /* VK_KHR_ray_tracing_position_fetch */
 
811
      .rayTracingPositionFetch = rt_enabled,
 
812
 
 
813
      /* VK_EXT_dynamic_rendering_unused_attachments */
 
814
      .dynamicRenderingUnusedAttachments = true,
 
815
 
 
816
      /* VK_EXT_depth_bias_control */
 
817
      .depthBiasControl = true,
 
818
      .floatRepresentation = true,
 
819
      .leastRepresentableValueForceUnormRepresentation = false,
 
820
      .depthBiasExact = true,
 
821
   };
 
822
 
 
823
   /* The new DOOM and Wolfenstein games require depthBounds without
 
824
    * checking for it.  They seem to run fine without it so just claim it's
 
825
    * there and accept the consequences.
 
826
    */
 
827
   if (app_info->engine_name && strcmp(app_info->engine_name, "idTech") == 0)
 
828
      features->depthBounds = true;
 
829
}
 
830
 
379
831
static uint64_t
380
832
anv_compute_sys_heap_size(struct anv_physical_device *device,
381
833
                          uint64_t total_ram)
394
846
    */
395
847
   available_ram = MIN2(available_ram, device->gtt_size * 3 / 4);
396
848
 
397
 
   if (available_ram > (2ull << 30) && !device->supports_48bit_addresses) {
398
 
      /* When running with an overridden PCI ID, we may get a GTT size from
399
 
       * the kernel that is greater than 2 GiB but the execbuf check for 48bit
400
 
       * address support can still fail.  Just clamp the address space size to
401
 
       * 2 GiB if we don't have 48-bit support.
402
 
       */
403
 
      mesa_logw("%s:%d: The kernel reported a GTT size larger than 2 GiB but "
404
 
                "not support for 48-bit addresses",
405
 
                __FILE__, __LINE__);
406
 
      available_ram = 2ull << 30;
407
 
   }
408
 
 
409
849
   return available_ram;
410
850
}
411
851
 
442
882
   device->vram_non_mappable.available = devinfo->mem.vram.unmappable.free;
443
883
}
444
884
 
445
 
 
446
885
static VkResult
447
886
anv_physical_device_init_heaps(struct anv_physical_device *device, int fd)
448
887
{
484
923
            .is_local_mem = true,
485
924
         };
486
925
      }
487
 
 
488
 
      device->memory.type_count = 3;
489
 
      device->memory.types[0] = (struct anv_memory_type) {
490
 
         .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
491
 
         .heapIndex = 0,
492
 
      };
493
 
      device->memory.types[1] = (struct anv_memory_type) {
494
 
         .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
495
 
                          VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
496
 
                          VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
497
 
         .heapIndex = 1,
498
 
      };
499
 
      device->memory.types[2] = (struct anv_memory_type) {
500
 
         .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
501
 
                          VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
502
 
                          VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
503
 
         /* This memory type either comes from heaps[0] if there is only
504
 
          * mappable vram region, or from heaps[2] if there is both mappable &
505
 
          * non-mappable vram regions.
506
 
          */
507
 
         .heapIndex = device->vram_non_mappable.size > 0 ? 2 : 0,
508
 
      };
509
 
   } else if (device->info.has_llc) {
510
 
      device->memory.heap_count = 1;
511
 
      device->memory.heaps[0] = (struct anv_memory_heap) {
512
 
         .size = device->sys.size,
513
 
         .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
514
 
         .is_local_mem = false,
515
 
      };
516
 
 
517
 
      /* Big core GPUs share LLC with the CPU and thus one memory type can be
518
 
       * both cached and coherent at the same time.
519
 
       *
520
 
       * But some game engines can't handle single type well
521
 
       * https://gitlab.freedesktop.org/mesa/mesa/-/issues/7360#note_1719438
522
 
       *
523
 
       * The second memory type w/out HOST_CACHED_BIT will get write-combining.
524
 
       * See anv_AllocateMemory()).
525
 
       *
526
 
       * The Intel Vulkan driver for Windows also advertises these memory types.
527
 
       */
528
 
      device->memory.type_count = 3;
529
 
      device->memory.types[0] = (struct anv_memory_type) {
530
 
         .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
531
 
         .heapIndex = 0,
532
 
      };
533
 
      device->memory.types[1] = (struct anv_memory_type) {
534
 
         .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
535
 
                          VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
536
 
                          VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
537
 
         .heapIndex = 0,
538
 
      };
539
 
      device->memory.types[2] = (struct anv_memory_type) {
540
 
         .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
541
 
                          VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
542
 
                          VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
543
 
                          VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
544
 
         .heapIndex = 0,
545
 
      };
546
926
   } else {
547
927
      device->memory.heap_count = 1;
548
928
      device->memory.heaps[0] = (struct anv_memory_heap) {
550
930
         .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
551
931
         .is_local_mem = false,
552
932
      };
553
 
 
554
 
      /* The spec requires that we expose a host-visible, coherent memory
555
 
       * type, but Atom GPUs don't share LLC. Thus we offer two memory types
556
 
       * to give the application a choice between cached, but not coherent and
557
 
       * coherent but uncached (WC though).
558
 
       */
559
 
      device->memory.type_count = 2;
560
 
      device->memory.types[0] = (struct anv_memory_type) {
561
 
         .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
562
 
                          VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
563
 
                          VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
564
 
         .heapIndex = 0,
565
 
      };
566
 
      device->memory.types[1] = (struct anv_memory_type) {
567
 
         .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
568
 
                          VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
569
 
                          VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
570
 
         .heapIndex = 0,
571
 
      };
572
 
   }
 
933
   }
 
934
 
 
935
   switch (device->info.kmd_type) {
 
936
   case INTEL_KMD_TYPE_XE:
 
937
      result = anv_xe_physical_device_init_memory_types(device);
 
938
      break;
 
939
   case INTEL_KMD_TYPE_I915:
 
940
   default:
 
941
      result = anv_i915_physical_device_init_memory_types(device);
 
942
      break;
 
943
   }
 
944
 
 
945
   if (result != VK_SUCCESS)
 
946
      return result;
573
947
 
574
948
   for (unsigned i = 0; i < device->memory.type_count; i++) {
575
949
      VkMemoryPropertyFlags props = device->memory.types[i].propertyFlags;
768
1142
         };
769
1143
      }
770
1144
      if (v_count > 0 && pdevice->video_decode_enabled) {
 
1145
         /* HEVC support on Gfx9 is only available on VCS0. So limit the number of video queues
 
1146
          * to the first VCS engine instance.
 
1147
          *
 
1148
          * We should be able to query HEVC support from the kernel using the engine query uAPI,
 
1149
          * but this appears to be broken :
 
1150
          *    https://gitlab.freedesktop.org/drm/intel/-/issues/8832
 
1151
          *
 
1152
          * When this bug is fixed we should be able to check HEVC support to determine the
 
1153
          * correct number of queues.
 
1154
          */
771
1155
         pdevice->queue.families[family_count++] = (struct anv_queue_family) {
772
1156
            .queueFlags = VK_QUEUE_VIDEO_DECODE_BIT_KHR,
773
 
            .queueCount = v_count,
 
1157
            .queueCount = pdevice->info.ver == 9 ? MIN2(1, v_count) : v_count,
774
1158
            .engine_class = INTEL_ENGINE_CLASS_VIDEO,
775
1159
         };
776
1160
      }
 
1161
 
777
1162
      /* Increase count below when other families are added as a reminder to
778
1163
       * increase the ANV_MAX_QUEUE_FAMILIES value.
779
1164
       */
854
1239
      goto fail_fd;
855
1240
   }
856
1241
 
 
1242
   if (!devinfo.has_context_isolation) {
 
1243
      result = vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
 
1244
                         "Vulkan requires context isolation for %s", devinfo.name);
 
1245
      goto fail_fd;
 
1246
   }
 
1247
 
857
1248
   struct anv_physical_device *device =
858
1249
      vk_zalloc(&instance->vk.alloc, sizeof(*device), 8,
859
1250
                VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
869
1260
      &dispatch_table, &wsi_physical_device_entrypoints, false);
870
1261
 
871
1262
   result = vk_physical_device_init(&device->vk, &instance->vk,
872
 
                                    NULL, /* We set up extensions later */
 
1263
                                    NULL, NULL, /* We set up extensions later */
873
1264
                                    &dispatch_table);
874
1265
   if (result != VK_SUCCESS) {
875
1266
      vk_error(instance, result);
890
1281
   device->gtt_size = device->info.gtt_size ? device->info.gtt_size :
891
1282
                                              device->info.aperture_bytes;
892
1283
 
893
 
   /* We only allow 48-bit addresses with softpin because knowing the actual
894
 
    * address is required for the vertex cache flush workaround.
895
 
    */
896
 
   device->supports_48bit_addresses =
897
 
      device->gtt_size > (4ULL << 30 /* GiB */);
 
1284
   if (device->gtt_size < (4ULL << 30 /* GiB */)) {
 
1285
      vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
 
1286
                "GTT size too small: 0x%016"PRIx64, device->gtt_size);
 
1287
      goto fail_base;
 
1288
   }
898
1289
 
899
1290
   /* We currently only have the right bits for instructions in Gen12+. If the
900
1291
    * kernel ever starts supporting that feature on previous generations,
953
1344
 
954
1345
   device->video_decode_enabled = debug_get_bool_option("ANV_VIDEO_DECODE", false);
955
1346
 
 
1347
   device->uses_ex_bso = device->info.verx10 >= 125;
 
1348
 
 
1349
   /* For now always use indirect descriptors. We'll update this
 
1350
    * to !uses_ex_bso when all the infrastructure is built up.
 
1351
    */
 
1352
   device->indirect_descriptors =
 
1353
      !device->uses_ex_bso ||
 
1354
      driQueryOptionb(&instance->dri_options, "force_indirect_descriptors");
 
1355
 
956
1356
   /* Check if we can read the GPU timestamp register from the CPU */
957
1357
   uint64_t u64_ignore;
958
1358
   device->has_reg_timestamp = intel_gem_read_render_timestamp(fd,
969
1369
   }
970
1370
   device->compiler->shader_debug_log = compiler_debug_log;
971
1371
   device->compiler->shader_perf_log = compiler_perf_log;
972
 
   device->compiler->constant_buffer_0_is_relative =
973
 
      !device->info.has_context_isolation;
 
1372
   device->compiler->constant_buffer_0_is_relative = false;
974
1373
   device->compiler->supports_shader_constants = true;
975
1374
   device->compiler->indirect_ubos_use_sampler = device->info.ver < 12;
 
1375
   device->compiler->extended_bindless_surface_offset = device->uses_ex_bso;
 
1376
   device->compiler->use_bindless_sampler_offset = !device->indirect_descriptors;
976
1377
 
977
1378
   isl_device_init(&device->isl_dev, &device->info);
978
1379
 
980
1381
   if (result != VK_SUCCESS)
981
1382
      goto fail_compiler;
982
1383
 
 
1384
   anv_physical_device_init_va_ranges(device);
 
1385
 
983
1386
   anv_physical_device_init_disk_cache(device);
984
1387
 
985
1388
   if (instance->vk.enabled_extensions.KHR_display) {
1003
1406
   anv_physical_device_init_perf(device, fd);
1004
1407
 
1005
1408
   get_device_extensions(device, &device->vk.supported_extensions);
 
1409
   get_features(device, &device->vk.supported_features);
1006
1410
 
1007
1411
   /* Gather major/minor before WSI. */
1008
1412
   struct stat st;
1117
1521
            driQueryOptioni(&instance->dri_options, "generated_indirect_threshold");
1118
1522
    instance->query_clear_with_blorp_threshold =
1119
1523
       driQueryOptioni(&instance->dri_options, "query_clear_with_blorp_threshold");
 
1524
    instance->query_copy_with_shader_threshold =
 
1525
       driQueryOptioni(&instance->dri_options, "query_copy_with_shader_threshold");
 
1526
    instance->force_vk_vendor =
 
1527
       driQueryOptioni(&instance->dri_options, "force_vk_vendor");
1120
1528
}
1121
1529
 
1122
1530
VkResult anv_CreateInstance(
1182
1590
   vk_free(&instance->vk.alloc, instance);
1183
1591
}
1184
1592
 
1185
 
void anv_GetPhysicalDeviceFeatures2(
1186
 
    VkPhysicalDevice                            physicalDevice,
1187
 
    VkPhysicalDeviceFeatures2*                  pFeatures)
1188
 
{
1189
 
   ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
1190
 
 
1191
 
   struct vk_app_info *app_info = &pdevice->instance->vk.app_info;
1192
 
 
1193
 
   /* Just pick one; they're all the same */
1194
 
   const bool has_astc_ldr =
1195
 
      isl_format_supports_sampling(&pdevice->info,
1196
 
                                   ISL_FORMAT_ASTC_LDR_2D_4X4_FLT16);
1197
 
 
1198
 
   const bool rt_enabled = ANV_SUPPORT_RT && pdevice->info.has_ray_tracing;
1199
 
 
1200
 
   const bool mesh_shader =
1201
 
      pdevice->vk.supported_extensions.EXT_mesh_shader ||
1202
 
      pdevice->vk.supported_extensions.NV_mesh_shader;
1203
 
 
1204
 
   struct vk_features features = {
1205
 
      /* Vulkan 1.0 */
1206
 
      .robustBufferAccess                       = true,
1207
 
      .fullDrawIndexUint32                      = true,
1208
 
      .imageCubeArray                           = true,
1209
 
      .independentBlend                         = true,
1210
 
      .geometryShader                           = true,
1211
 
      .tessellationShader                       = true,
1212
 
      .sampleRateShading                        = true,
1213
 
      .dualSrcBlend                             = true,
1214
 
      .logicOp                                  = true,
1215
 
      .multiDrawIndirect                        = true,
1216
 
      .drawIndirectFirstInstance                = true,
1217
 
      .depthClamp                               = true,
1218
 
      .depthBiasClamp                           = true,
1219
 
      .fillModeNonSolid                         = true,
1220
 
      .depthBounds                              = pdevice->info.ver >= 12,
1221
 
      .wideLines                                = true,
1222
 
      .largePoints                              = true,
1223
 
      .alphaToOne                               = true,
1224
 
      .multiViewport                            = true,
1225
 
      .samplerAnisotropy                        = true,
1226
 
      .textureCompressionETC2                   = true,
1227
 
      .textureCompressionASTC_LDR               = has_astc_ldr,
1228
 
      .textureCompressionBC                     = true,
1229
 
      .occlusionQueryPrecise                    = true,
1230
 
      .pipelineStatisticsQuery                  = true,
1231
 
      /* We can't do image stores in vec4 shaders */
1232
 
      .vertexPipelineStoresAndAtomics =
1233
 
         pdevice->compiler->scalar_stage[MESA_SHADER_VERTEX] &&
1234
 
         pdevice->compiler->scalar_stage[MESA_SHADER_GEOMETRY],
1235
 
      .fragmentStoresAndAtomics                 = true,
1236
 
      .shaderTessellationAndGeometryPointSize   = true,
1237
 
      .shaderImageGatherExtended                = true,
1238
 
      .shaderStorageImageExtendedFormats        = true,
1239
 
      .shaderStorageImageMultisample            = false,
1240
 
      .shaderStorageImageReadWithoutFormat      = false,
1241
 
      .shaderStorageImageWriteWithoutFormat     = true,
1242
 
      .shaderUniformBufferArrayDynamicIndexing  = true,
1243
 
      .shaderSampledImageArrayDynamicIndexing   = true,
1244
 
      .shaderStorageBufferArrayDynamicIndexing  = true,
1245
 
      .shaderStorageImageArrayDynamicIndexing   = true,
1246
 
      .shaderClipDistance                       = true,
1247
 
      .shaderCullDistance                       = true,
1248
 
      .shaderFloat64                            = pdevice->info.has_64bit_float,
1249
 
      .shaderInt64                              = true,
1250
 
      .shaderInt16                              = true,
1251
 
      .shaderResourceMinLod                     = true,
1252
 
      .variableMultisampleRate                  = true,
1253
 
      .inheritedQueries                         = true,
1254
 
 
1255
 
      /* Vulkan 1.1 */
1256
 
      .storageBuffer16BitAccess            = !pdevice->instance->no_16bit,
1257
 
      .uniformAndStorageBuffer16BitAccess  = !pdevice->instance->no_16bit,
1258
 
      .storagePushConstant16               = true,
1259
 
      .storageInputOutput16                = false,
1260
 
      .multiview                           = true,
1261
 
      .multiviewGeometryShader             = true,
1262
 
      .multiviewTessellationShader         = true,
1263
 
      .variablePointersStorageBuffer       = true,
1264
 
      .variablePointers                    = true,
1265
 
      .protectedMemory                     = false,
1266
 
      .samplerYcbcrConversion              = true,
1267
 
      .shaderDrawParameters                = true,
1268
 
 
1269
 
      /* Vulkan 1.2 */
1270
 
      .samplerMirrorClampToEdge            = true,
1271
 
      .drawIndirectCount                   = true,
1272
 
      .storageBuffer8BitAccess             = true,
1273
 
      .uniformAndStorageBuffer8BitAccess   = true,
1274
 
      .storagePushConstant8                = true,
1275
 
      .shaderBufferInt64Atomics            = true,
1276
 
      .shaderSharedInt64Atomics            = false,
1277
 
      .shaderFloat16                       = !pdevice->instance->no_16bit,
1278
 
      .shaderInt8                          = !pdevice->instance->no_16bit,
1279
 
 
1280
 
      .descriptorIndexing                                 = true,
1281
 
      .shaderInputAttachmentArrayDynamicIndexing          = false,
1282
 
      .shaderUniformTexelBufferArrayDynamicIndexing       = true,
1283
 
      .shaderStorageTexelBufferArrayDynamicIndexing       = true,
1284
 
      .shaderUniformBufferArrayNonUniformIndexing         = false,
1285
 
      .shaderSampledImageArrayNonUniformIndexing          = true,
1286
 
      .shaderStorageBufferArrayNonUniformIndexing         = true,
1287
 
      .shaderStorageImageArrayNonUniformIndexing          = true,
1288
 
      .shaderInputAttachmentArrayNonUniformIndexing       = false,
1289
 
      .shaderUniformTexelBufferArrayNonUniformIndexing    = true,
1290
 
      .shaderStorageTexelBufferArrayNonUniformIndexing    = true,
1291
 
      .descriptorBindingUniformBufferUpdateAfterBind      = true,
1292
 
      .descriptorBindingSampledImageUpdateAfterBind       = true,
1293
 
      .descriptorBindingStorageImageUpdateAfterBind       = true,
1294
 
      .descriptorBindingStorageBufferUpdateAfterBind      = true,
1295
 
      .descriptorBindingUniformTexelBufferUpdateAfterBind = true,
1296
 
      .descriptorBindingStorageTexelBufferUpdateAfterBind = true,
1297
 
      .descriptorBindingUpdateUnusedWhilePending          = true,
1298
 
      .descriptorBindingPartiallyBound                    = true,
1299
 
      .descriptorBindingVariableDescriptorCount           = true,
1300
 
      .runtimeDescriptorArray                             = true,
1301
 
 
1302
 
      .samplerFilterMinmax                 = true,
1303
 
      .scalarBlockLayout                   = true,
1304
 
      .imagelessFramebuffer                = true,
1305
 
      .uniformBufferStandardLayout         = true,
1306
 
      .shaderSubgroupExtendedTypes         = true,
1307
 
      .separateDepthStencilLayouts         = true,
1308
 
      .hostQueryReset                      = true,
1309
 
      .timelineSemaphore                   = true,
1310
 
      .bufferDeviceAddress                 = true,
1311
 
      .bufferDeviceAddressCaptureReplay    = true,
1312
 
      .bufferDeviceAddressMultiDevice      = false,
1313
 
      .vulkanMemoryModel                   = true,
1314
 
      .vulkanMemoryModelDeviceScope        = true,
1315
 
      .vulkanMemoryModelAvailabilityVisibilityChains = true,
1316
 
      .shaderOutputViewportIndex           = true,
1317
 
      .shaderOutputLayer                   = true,
1318
 
      .subgroupBroadcastDynamicId          = true,
1319
 
 
1320
 
      /* Vulkan 1.3 */
1321
 
      .robustImageAccess = true,
1322
 
      .inlineUniformBlock = true,
1323
 
      .descriptorBindingInlineUniformBlockUpdateAfterBind = true,
1324
 
      .pipelineCreationCacheControl = true,
1325
 
      .privateData = true,
1326
 
      .shaderDemoteToHelperInvocation = true,
1327
 
      .shaderTerminateInvocation = true,
1328
 
      .subgroupSizeControl = true,
1329
 
      .computeFullSubgroups = true,
1330
 
      .synchronization2 = true,
1331
 
      .textureCompressionASTC_HDR = false,
1332
 
      .shaderZeroInitializeWorkgroupMemory = true,
1333
 
      .dynamicRendering = true,
1334
 
      .shaderIntegerDotProduct = true,
1335
 
      .maintenance4 = true,
1336
 
 
1337
 
      /* VK_EXT_4444_formats */
1338
 
      .formatA4R4G4B4 = true,
1339
 
      .formatA4B4G4R4 = false,
1340
 
 
1341
 
      /* VK_KHR_acceleration_structure */
1342
 
      .accelerationStructure = rt_enabled,
1343
 
      .accelerationStructureCaptureReplay = false, /* TODO */
1344
 
      .accelerationStructureIndirectBuild = false, /* TODO */
1345
 
      .accelerationStructureHostCommands = false,
1346
 
      .descriptorBindingAccelerationStructureUpdateAfterBind = rt_enabled,
1347
 
 
1348
 
      /* VK_EXT_border_color_swizzle */
1349
 
      .borderColorSwizzle = true,
1350
 
      .borderColorSwizzleFromImage = true,
1351
 
 
1352
 
      /* VK_EXT_color_write_enable */
1353
 
      .colorWriteEnable = true,
1354
 
 
1355
 
      /* VK_EXT_image_2d_view_of_3d  */
1356
 
      .image2DViewOf3D = true,
1357
 
      .sampler2DViewOf3D = true,
1358
 
 
1359
 
      /* VK_EXT_image_sliced_view_of_3d */
1360
 
      .imageSlicedViewOf3D = true,
1361
 
 
1362
 
      /* VK_NV_compute_shader_derivatives */
1363
 
      .computeDerivativeGroupQuads = true,
1364
 
      .computeDerivativeGroupLinear = true,
1365
 
 
1366
 
      /* VK_EXT_conditional_rendering */
1367
 
      .conditionalRendering = true,
1368
 
      .inheritedConditionalRendering = true,
1369
 
 
1370
 
      /* VK_EXT_custom_border_color */
1371
 
      .customBorderColors = true,
1372
 
      .customBorderColorWithoutFormat = true,
1373
 
 
1374
 
      /* VK_EXT_depth_clamp_zero_one */
1375
 
      .depthClampZeroOne = true,
1376
 
 
1377
 
      /* VK_EXT_depth_clip_enable */
1378
 
      .depthClipEnable = true,
1379
 
 
1380
 
      /* VK_EXT_fragment_shader_interlock */
1381
 
      .fragmentShaderSampleInterlock = true,
1382
 
      .fragmentShaderPixelInterlock = true,
1383
 
      .fragmentShaderShadingRateInterlock = false,
1384
 
 
1385
 
      /* VK_EXT_global_priority_query */
1386
 
      .globalPriorityQuery = true,
1387
 
 
1388
 
      /* VK_KHR_fragment_shading_rate */
1389
 
      .pipelineFragmentShadingRate = true,
1390
 
      .primitiveFragmentShadingRate =
1391
 
         pdevice->info.has_coarse_pixel_primitive_and_cb,
1392
 
      .attachmentFragmentShadingRate =
1393
 
         pdevice->info.has_coarse_pixel_primitive_and_cb,
1394
 
 
1395
 
      /* VK_EXT_image_view_min_lod */
1396
 
      .minLod = true,
1397
 
 
1398
 
      /* VK_EXT_index_type_uint8 */
1399
 
      .indexTypeUint8 = true,
1400
 
 
1401
 
      /* VK_EXT_line_rasterization */
1402
 
      /* Rectangular lines must use the strict algorithm, which is not
1403
 
       * supported for wide lines prior to ICL.  See rasterization_mode for
1404
 
       * details and how the HW states are programmed.
1405
 
       */
1406
 
      .rectangularLines = pdevice->info.ver >= 10,
1407
 
      .bresenhamLines = true,
1408
 
      /* Support for Smooth lines with MSAA was removed on gfx11.  From the
1409
 
       * BSpec section "Multisample ModesState" table for "AA Line Support
1410
 
       * Requirements":
1411
 
       *
1412
 
       *    GFX10:BUG:########  NUM_MULTISAMPLES == 1
1413
 
       *
1414
 
       * Fortunately, this isn't a case most people care about.
1415
 
       */
1416
 
      .smoothLines = pdevice->info.ver < 10,
1417
 
      .stippledRectangularLines = false,
1418
 
      .stippledBresenhamLines = true,
1419
 
      .stippledSmoothLines = false,
1420
 
 
1421
 
      /* VK_NV_mesh_shader */
1422
 
      .taskShaderNV = mesh_shader,
1423
 
      .meshShaderNV = mesh_shader,
1424
 
 
1425
 
      /* VK_EXT_mesh_shader */
1426
 
      .taskShader = mesh_shader,
1427
 
      .meshShader = mesh_shader,
1428
 
      .multiviewMeshShader = false,
1429
 
      .primitiveFragmentShadingRateMeshShader = mesh_shader,
1430
 
      .meshShaderQueries = false,
1431
 
 
1432
 
      /* VK_EXT_mutable_descriptor_type */
1433
 
      .mutableDescriptorType = true,
1434
 
 
1435
 
      /* VK_KHR_performance_query */
1436
 
      .performanceCounterQueryPools = true,
1437
 
      /* HW only supports a single configuration at a time. */
1438
 
      .performanceCounterMultipleQueryPools = false,
1439
 
 
1440
 
      /* VK_KHR_pipeline_executable_properties */
1441
 
      .pipelineExecutableInfo = true,
1442
 
 
1443
 
      /* VK_EXT_primitives_generated_query */
1444
 
      .primitivesGeneratedQuery = true,
1445
 
      .primitivesGeneratedQueryWithRasterizerDiscard = false,
1446
 
      .primitivesGeneratedQueryWithNonZeroStreams = false,
1447
 
 
1448
 
      /* VK_EXT_pipeline_library_group_handles */
1449
 
      .pipelineLibraryGroupHandles = true,
1450
 
 
1451
 
      /* VK_EXT_provoking_vertex */
1452
 
      .provokingVertexLast = true,
1453
 
      .transformFeedbackPreservesProvokingVertex = true,
1454
 
 
1455
 
      /* VK_KHR_ray_query */
1456
 
      .rayQuery = rt_enabled,
1457
 
 
1458
 
      /* VK_KHR_ray_tracing_maintenance1 */
1459
 
      .rayTracingMaintenance1 = rt_enabled,
1460
 
      .rayTracingPipelineTraceRaysIndirect2 = rt_enabled,
1461
 
 
1462
 
      /* VK_KHR_ray_tracing_pipeline */
1463
 
      .rayTracingPipeline = rt_enabled,
1464
 
      .rayTracingPipelineShaderGroupHandleCaptureReplay = false,
1465
 
      .rayTracingPipelineShaderGroupHandleCaptureReplayMixed = false,
1466
 
      .rayTracingPipelineTraceRaysIndirect = rt_enabled,
1467
 
      .rayTraversalPrimitiveCulling = rt_enabled,
1468
 
 
1469
 
      /* VK_EXT_robustness2 */
1470
 
      .robustBufferAccess2 = true,
1471
 
      .robustImageAccess2 = true,
1472
 
      .nullDescriptor = true,
1473
 
 
1474
 
      /* VK_EXT_shader_atomic_float */
1475
 
      .shaderBufferFloat32Atomics =    true,
1476
 
      .shaderBufferFloat32AtomicAdd =  pdevice->info.has_lsc,
1477
 
      .shaderBufferFloat64Atomics =
1478
 
         pdevice->info.has_64bit_float && pdevice->info.has_lsc,
1479
 
      .shaderBufferFloat64AtomicAdd =  false,
1480
 
      .shaderSharedFloat32Atomics =    true,
1481
 
      .shaderSharedFloat32AtomicAdd =  false,
1482
 
      .shaderSharedFloat64Atomics =    false,
1483
 
      .shaderSharedFloat64AtomicAdd =  false,
1484
 
      .shaderImageFloat32Atomics =     true,
1485
 
      .shaderImageFloat32AtomicAdd =   false,
1486
 
      .sparseImageFloat32Atomics =     false,
1487
 
      .sparseImageFloat32AtomicAdd =   false,
1488
 
 
1489
 
      /* VK_EXT_shader_atomic_float2 */
1490
 
      .shaderBufferFloat16Atomics      = pdevice->info.has_lsc,
1491
 
      .shaderBufferFloat16AtomicAdd    = false,
1492
 
      .shaderBufferFloat16AtomicMinMax = pdevice->info.has_lsc,
1493
 
      .shaderBufferFloat32AtomicMinMax = true,
1494
 
      .shaderBufferFloat64AtomicMinMax =
1495
 
         pdevice->info.has_64bit_float && pdevice->info.has_lsc,
1496
 
      .shaderSharedFloat16Atomics      = pdevice->info.has_lsc,
1497
 
      .shaderSharedFloat16AtomicAdd    = false,
1498
 
      .shaderSharedFloat16AtomicMinMax = pdevice->info.has_lsc,
1499
 
      .shaderSharedFloat32AtomicMinMax = true,
1500
 
      .shaderSharedFloat64AtomicMinMax = false,
1501
 
      .shaderImageFloat32AtomicMinMax  = false,
1502
 
      .sparseImageFloat32AtomicMinMax  = false,
1503
 
 
1504
 
      /* VK_KHR_shader_clock */
1505
 
      .shaderSubgroupClock = true,
1506
 
      .shaderDeviceClock = false,
1507
 
 
1508
 
      /* VK_INTEL_shader_integer_functions2 */
1509
 
      .shaderIntegerFunctions2 = true,
1510
 
 
1511
 
      /* VK_EXT_shader_module_identifier */
1512
 
      .shaderModuleIdentifier = true,
1513
 
 
1514
 
      /* VK_KHR_shader_subgroup_uniform_control_flow */
1515
 
      .shaderSubgroupUniformControlFlow = true,
1516
 
 
1517
 
      /* VK_EXT_texel_buffer_alignment */
1518
 
      .texelBufferAlignment = true,
1519
 
 
1520
 
      /* VK_EXT_transform_feedback */
1521
 
      .transformFeedback = true,
1522
 
      .geometryStreams = true,
1523
 
 
1524
 
      /* VK_EXT_vertex_attribute_divisor */
1525
 
      .vertexAttributeInstanceRateDivisor = true,
1526
 
      .vertexAttributeInstanceRateZeroDivisor = true,
1527
 
 
1528
 
      /* VK_KHR_workgroup_memory_explicit_layout */
1529
 
      .workgroupMemoryExplicitLayout = true,
1530
 
      .workgroupMemoryExplicitLayoutScalarBlockLayout = true,
1531
 
      .workgroupMemoryExplicitLayout8BitAccess = true,
1532
 
      .workgroupMemoryExplicitLayout16BitAccess = true,
1533
 
 
1534
 
      /* VK_EXT_ycbcr_image_arrays */
1535
 
      .ycbcrImageArrays = true,
1536
 
 
1537
 
      /* VK_EXT_extended_dynamic_state */
1538
 
      .extendedDynamicState = true,
1539
 
 
1540
 
      /* VK_EXT_extended_dynamic_state2 */
1541
 
      .extendedDynamicState2 = true,
1542
 
      .extendedDynamicState2LogicOp = true,
1543
 
      .extendedDynamicState2PatchControlPoints = false,
1544
 
 
1545
 
      /* VK_EXT_extended_dynamic_state3 */
1546
 
      .extendedDynamicState3PolygonMode = true,
1547
 
      .extendedDynamicState3TessellationDomainOrigin = true,
1548
 
      .extendedDynamicState3RasterizationStream = true,
1549
 
      .extendedDynamicState3LineStippleEnable = true,
1550
 
      .extendedDynamicState3LineRasterizationMode = true,
1551
 
      .extendedDynamicState3LogicOpEnable = true,
1552
 
      .extendedDynamicState3AlphaToOneEnable = true,
1553
 
      .extendedDynamicState3DepthClipEnable = true,
1554
 
      .extendedDynamicState3DepthClampEnable = true,
1555
 
      .extendedDynamicState3DepthClipNegativeOneToOne = true,
1556
 
      .extendedDynamicState3ProvokingVertexMode = true,
1557
 
      .extendedDynamicState3ColorBlendEnable = true,
1558
 
      .extendedDynamicState3ColorWriteMask = true,
1559
 
      .extendedDynamicState3ColorBlendEquation = true,
1560
 
      .extendedDynamicState3SampleLocationsEnable = true,
1561
 
      .extendedDynamicState3SampleMask = true,
1562
 
 
1563
 
      .extendedDynamicState3RasterizationSamples = false,
1564
 
      .extendedDynamicState3AlphaToCoverageEnable = false,
1565
 
      .extendedDynamicState3ConservativeRasterizationMode = false,
1566
 
      .extendedDynamicState3ExtraPrimitiveOverestimationSize = false,
1567
 
      .extendedDynamicState3ViewportWScalingEnable = false,
1568
 
      .extendedDynamicState3ViewportSwizzle = false,
1569
 
      .extendedDynamicState3ShadingRateImageEnable = false,
1570
 
      .extendedDynamicState3CoverageToColorEnable = false,
1571
 
      .extendedDynamicState3CoverageToColorLocation = false,
1572
 
      .extendedDynamicState3CoverageModulationMode = false,
1573
 
      .extendedDynamicState3CoverageModulationTableEnable = false,
1574
 
      .extendedDynamicState3CoverageModulationTable = false,
1575
 
      .extendedDynamicState3CoverageReductionMode = false,
1576
 
      .extendedDynamicState3RepresentativeFragmentTestEnable = false,
1577
 
      .extendedDynamicState3ColorBlendAdvanced = false,
1578
 
 
1579
 
      /* VK_EXT_multi_draw */
1580
 
      .multiDraw = true,
1581
 
 
1582
 
      /* VK_EXT_non_seamless_cube_map */
1583
 
      .nonSeamlessCubeMap = true,
1584
 
 
1585
 
      /* VK_EXT_primitive_topology_list_restart */
1586
 
      .primitiveTopologyListRestart = true,
1587
 
      .primitiveTopologyPatchListRestart = true,
1588
 
 
1589
 
      /* VK_EXT_depth_clip_control */
1590
 
      .depthClipControl = true,
1591
 
 
1592
 
      /* VK_KHR_present_id */
1593
 
      .presentId = pdevice->vk.supported_extensions.KHR_present_id,
1594
 
 
1595
 
      /* VK_KHR_present_wait */
1596
 
      .presentWait = pdevice->vk.supported_extensions.KHR_present_wait,
1597
 
 
1598
 
      /* VK_EXT_vertex_input_dynamic_state */
1599
 
      .vertexInputDynamicState = true,
1600
 
   };
1601
 
 
1602
 
   /* The new DOOM and Wolfenstein games require depthBounds without
1603
 
    * checking for it.  They seem to run fine without it so just claim it's
1604
 
    * there and accept the consequences.
1605
 
    */
1606
 
   if (app_info->engine_name && strcmp(app_info->engine_name, "idTech") == 0)
1607
 
      features.depthBounds = true;
1608
 
 
1609
 
   vk_get_physical_device_features(pFeatures, &features);
1610
 
}
1611
 
 
1612
1593
#define MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS   64
1613
1594
 
1614
1595
#define MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS 64
1616
1597
 
1617
1598
#define MAX_CUSTOM_BORDER_COLORS                   4096
1618
1599
 
 
1600
static VkDeviceSize
 
1601
anx_get_physical_device_max_heap_size(struct anv_physical_device *pdevice)
 
1602
{
 
1603
   VkDeviceSize ret = 0;
 
1604
 
 
1605
   for (uint32_t i = 0; i < pdevice->memory.heap_count; i++) {
 
1606
      if (pdevice->memory.heaps[i].size > ret)
 
1607
         ret = pdevice->memory.heaps[i].size;
 
1608
   }
 
1609
 
 
1610
   return ret;
 
1611
}
 
1612
 
1619
1613
void anv_GetPhysicalDeviceProperties(
1620
1614
    VkPhysicalDevice                            physicalDevice,
1621
1615
    VkPhysicalDeviceProperties*                 pProperties)
1627
1621
   const uint32_t max_textures = UINT16_MAX;
1628
1622
   const uint32_t max_samplers = UINT16_MAX;
1629
1623
   const uint32_t max_images = UINT16_MAX;
 
1624
   const VkDeviceSize max_heap_size = anx_get_physical_device_max_heap_size(pdevice);
1630
1625
 
1631
1626
   /* Claim a high per-stage limit since we have bindless. */
1632
1627
   const uint32_t max_per_stage = UINT32_MAX;
1646
1641
      .maxImageArrayLayers                      = (1 << 11),
1647
1642
      .maxTexelBufferElements                   = 128 * 1024 * 1024,
1648
1643
      .maxUniformBufferRange                    = pdevice->compiler->indirect_ubos_use_sampler ? (1u << 27) : (1u << 30),
1649
 
      .maxStorageBufferRange                    = MIN2(pdevice->isl_dev.max_buffer_size, UINT32_MAX),
 
1644
      .maxStorageBufferRange                    = MIN3(pdevice->isl_dev.max_buffer_size, max_heap_size, UINT32_MAX),
1650
1645
      .maxPushConstantsSize                     = MAX_PUSH_CONSTANTS_SIZE,
1651
1646
      .maxMemoryAllocationCount                 = UINT32_MAX,
1652
1647
      .maxSamplerAllocationCount                = 64 * 1024,
1779
1774
      .sparseProperties = {0}, /* Broadwell doesn't do sparse. */
1780
1775
   };
1781
1776
 
 
1777
   if (unlikely(pdevice->instance->force_vk_vendor))
 
1778
      pProperties->vendorID = pdevice->instance->force_vk_vendor;
1782
1779
   snprintf(pProperties->deviceName, sizeof(pProperties->deviceName),
1783
1780
            "%s", pdevice->info.name);
1784
1781
   memcpy(pProperties->pipelineCacheUUID,
1859
1856
   p->conformanceVersion = (VkConformanceVersion) {
1860
1857
      .major = 1,
1861
1858
      .minor = 3,
1862
 
      .subminor = 0,
 
1859
      .subminor = 6,
1863
1860
      .patch = 0,
1864
1861
   };
1865
1862
 
1909
1906
    * twice a bunch of times (or a bunch of null descriptors), we can safely
1910
1907
    * advertise a larger limit here.
1911
1908
    */
1912
 
   const unsigned max_bindless_views = 1 << 20;
 
1909
   const unsigned max_bindless_views =
 
1910
      anv_physical_device_bindless_heap_size(pdevice) / ANV_SURFACE_STATE_SIZE;
1913
1911
   p->maxUpdateAfterBindDescriptorsInAllPools            = max_bindless_views;
1914
1912
   p->shaderUniformBufferArrayNonUniformIndexingNative   = false;
1915
1913
   p->shaderSampledImageArrayNonUniformIndexingNative    = false;
2194
2192
         break;
2195
2193
      }
2196
2194
 
 
2195
      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GRAPHICS_PIPELINE_LIBRARY_PROPERTIES_EXT: {
 
2196
         VkPhysicalDeviceGraphicsPipelineLibraryPropertiesEXT *props =
 
2197
            (VkPhysicalDeviceGraphicsPipelineLibraryPropertiesEXT *)ext;
 
2198
         props->graphicsPipelineLibraryFastLinking = true;
 
2199
         props->graphicsPipelineLibraryIndependentInterpolationDecoration = true;
 
2200
         break;
 
2201
      }
 
2202
 
2197
2203
      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT: {
2198
2204
         VkPhysicalDeviceLineRasterizationPropertiesEXT *props =
2199
2205
            (VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext;
2609
2615
            case VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR: {
2610
2616
               VkQueueFamilyVideoPropertiesKHR *prop =
2611
2617
                  (VkQueueFamilyVideoPropertiesKHR *)ext;
2612
 
               if (queue_family->queueFlags & VK_QUEUE_VIDEO_DECODE_BIT_KHR)
2613
 
                  prop->videoCodecOperations = VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR;
 
2618
               if (queue_family->queueFlags & VK_QUEUE_VIDEO_DECODE_BIT_KHR) {
 
2619
                  prop->videoCodecOperations = VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR |
 
2620
                                               VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR;
 
2621
               }
2614
2622
               break;
2615
2623
            }
2616
2624
            default:
2869
2877
      return ret_bo;
2870
2878
   if (get_bo_from_pool(&ret_bo, &device->scratch_surface_state_pool.block_pool, address))
2871
2879
      return ret_bo;
2872
 
   if (get_bo_from_pool(&ret_bo, &device->bindless_surface_state_pool.block_pool, address))
 
2880
   if (device->physical->indirect_descriptors &&
 
2881
       get_bo_from_pool(&ret_bo, &device->bindless_surface_state_pool.block_pool, address))
2873
2882
      return ret_bo;
2874
2883
   if (get_bo_from_pool(&ret_bo, &device->internal_surface_state_pool.block_pool, address))
2875
2884
      return ret_bo;
 
2885
   if (get_bo_from_pool(&ret_bo, &device->push_descriptor_pool.block_pool, address))
 
2886
      return ret_bo;
2876
2887
 
2877
2888
   if (!device->cmd_buffer_being_decoded)
2878
2889
      return (struct intel_batch_decode_bo) { };
2879
2890
 
2880
 
   struct anv_batch_bo **bo;
2881
 
 
2882
 
   u_vector_foreach(bo, &device->cmd_buffer_being_decoded->seen_bbos) {
 
2891
   struct anv_batch_bo **bbo;
 
2892
   u_vector_foreach(bbo, &device->cmd_buffer_being_decoded->seen_bbos) {
2883
2893
      /* The decoder zeroes out the top 16 bits, so we need to as well */
2884
 
      uint64_t bo_address = (*bo)->bo->offset & (~0ull >> 16);
 
2894
      uint64_t bo_address = (*bbo)->bo->offset & (~0ull >> 16);
2885
2895
 
2886
 
      if (address >= bo_address && address < bo_address + (*bo)->bo->size) {
 
2896
      if (address >= bo_address && address < bo_address + (*bbo)->bo->size) {
2887
2897
         return (struct intel_batch_decode_bo) {
2888
2898
            .addr = bo_address,
2889
 
            .size = (*bo)->bo->size,
2890
 
            .map = (*bo)->bo->map,
 
2899
            .size = (*bbo)->bo->size,
 
2900
            .map = (*bbo)->bo->map,
2891
2901
         };
2892
2902
      }
 
2903
 
 
2904
      uint32_t dep_words = (*bbo)->relocs.dep_words;
 
2905
      BITSET_WORD *deps = (*bbo)->relocs.deps;
 
2906
      for (uint32_t w = 0; w < dep_words; w++) {
 
2907
         BITSET_WORD mask = deps[w];
 
2908
         while (mask) {
 
2909
            int i = u_bit_scan(&mask);
 
2910
            uint32_t gem_handle = w * BITSET_WORDBITS + i;
 
2911
            struct anv_bo *bo = anv_device_lookup_bo(device, gem_handle);
 
2912
            assert(bo->refcount > 0);
 
2913
            bo_address = bo->offset & (~0ull >> 16);
 
2914
            if (address >= bo_address && address < bo_address + bo->size) {
 
2915
               return (struct intel_batch_decode_bo) {
 
2916
                  .addr = bo_address,
 
2917
                  .size = bo->size,
 
2918
                  .map = bo->map,
 
2919
               };
 
2920
            }
 
2921
         }
 
2922
      }
2893
2923
   }
2894
2924
 
2895
2925
   return (struct intel_batch_decode_bo) { };
2908
2938
      return NULL;
2909
2939
 
2910
2940
   struct anv_device *device = (struct anv_device*)driver_ctx;
2911
 
   assert(device->physical->supports_48bit_addresses);
2912
2941
 
2913
2942
   struct anv_state_pool *pool = &device->dynamic_state_pool;
2914
2943
   buf->state = anv_state_pool_alloc(pool, size, size);
3040
3069
                                     decode_get_bo, NULL, device);
3041
3070
 
3042
3071
         decoder->engine = physical_device->queue.families[i].engine_class;
3043
 
         decoder->dynamic_base = DYNAMIC_STATE_POOL_MIN_ADDRESS;
3044
 
         decoder->surface_base = INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS;
3045
 
         decoder->instruction_base = INSTRUCTION_STATE_POOL_MIN_ADDRESS;
 
3072
         decoder->dynamic_base = physical_device->va.dynamic_state_pool.addr;
 
3073
         decoder->surface_base = physical_device->va.internal_surface_state_pool.addr;
 
3074
         decoder->instruction_base = physical_device->va.instruction_state_pool.addr;
3046
3075
      }
3047
3076
   }
3048
3077
 
3118
3147
 
3119
3148
   /* keep the page with address zero out of the allocator */
3120
3149
   util_vma_heap_init(&device->vma_lo,
3121
 
                      LOW_HEAP_MIN_ADDRESS, LOW_HEAP_SIZE);
3122
 
 
3123
 
   util_vma_heap_init(&device->vma_cva, CLIENT_VISIBLE_HEAP_MIN_ADDRESS,
3124
 
                      CLIENT_VISIBLE_HEAP_SIZE);
3125
 
 
3126
 
   /* Leave the last 4GiB out of the high vma range, so that no state
3127
 
    * base address + size can overflow 48 bits. For more information see
3128
 
    * the comment about Wa32bitGeneralStateOffset in anv_allocator.c
3129
 
    */
3130
 
   util_vma_heap_init(&device->vma_hi, HIGH_HEAP_MIN_ADDRESS,
3131
 
                      physical_device->gtt_size - (1ull << 32) -
3132
 
                      HIGH_HEAP_MIN_ADDRESS);
 
3150
                      device->physical->va.low_heap.addr,
 
3151
                      device->physical->va.low_heap.size);
 
3152
 
 
3153
   util_vma_heap_init(&device->vma_cva,
 
3154
                      device->physical->va.client_visible_heap.addr,
 
3155
                      device->physical->va.client_visible_heap.size);
 
3156
 
 
3157
   util_vma_heap_init(&device->vma_hi,
 
3158
                      device->physical->va.high_heap.addr,
 
3159
                      device->physical->va.high_heap.size);
 
3160
 
 
3161
   util_vma_heap_init(&device->vma_desc,
 
3162
                      device->physical->va.descriptor_pool.addr,
 
3163
                      device->physical->va.descriptor_pool.size);
3133
3164
 
3134
3165
   list_inithead(&device->memory_objects);
3135
3166
   list_inithead(&device->image_private_objects);
3168
3199
    */
3169
3200
   result = anv_state_pool_init(&device->general_state_pool, device,
3170
3201
                                "general pool",
3171
 
                                0, GENERAL_STATE_POOL_MIN_ADDRESS, 16384);
 
3202
                                0, device->physical->va.general_state_pool.addr, 16384);
3172
3203
   if (result != VK_SUCCESS)
3173
3204
      goto fail_batch_bo_pool;
3174
3205
 
3175
3206
   result = anv_state_pool_init(&device->dynamic_state_pool, device,
3176
3207
                                "dynamic pool",
3177
 
                                DYNAMIC_STATE_POOL_MIN_ADDRESS, 0, 16384);
 
3208
                                device->physical->va.dynamic_state_pool.addr, 0, 16384);
3178
3209
   if (result != VK_SUCCESS)
3179
3210
      goto fail_general_state_pool;
3180
3211
 
3191
3222
 
3192
3223
   result = anv_state_pool_init(&device->instruction_state_pool, device,
3193
3224
                                "instruction pool",
3194
 
                                INSTRUCTION_STATE_POOL_MIN_ADDRESS, 0, 16384);
 
3225
                                device->physical->va.instruction_state_pool.addr,
 
3226
                                0, 16384);
3195
3227
   if (result != VK_SUCCESS)
3196
3228
      goto fail_dynamic_state_pool;
3197
3229
 
3201
3233
       */
3202
3234
      result = anv_state_pool_init(&device->scratch_surface_state_pool, device,
3203
3235
                                   "scratch surface state pool",
3204
 
                                   SCRATCH_SURFACE_STATE_POOL_MIN_ADDRESS, 0, 4096);
 
3236
                                   device->physical->va.scratch_surface_state_pool.addr,
 
3237
                                   0, 4096);
3205
3238
      if (result != VK_SUCCESS)
3206
3239
         goto fail_instruction_state_pool;
3207
3240
 
3208
3241
      result = anv_state_pool_init(&device->internal_surface_state_pool, device,
3209
3242
                                   "internal surface state pool",
3210
 
                                   INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS,
3211
 
                                   SCRATCH_SURFACE_STATE_POOL_SIZE, 4096);
 
3243
                                   device->physical->va.internal_surface_state_pool.addr,
 
3244
                                   device->physical->va.scratch_surface_state_pool.size,
 
3245
                                   4096);
3212
3246
   } else {
3213
3247
      result = anv_state_pool_init(&device->internal_surface_state_pool, device,
3214
3248
                                   "internal surface state pool",
3215
 
                                   INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS, 0, 4096);
 
3249
                                   device->physical->va.internal_surface_state_pool.addr,
 
3250
                                   0, 4096);
3216
3251
   }
3217
3252
   if (result != VK_SUCCESS)
3218
3253
      goto fail_scratch_surface_state_pool;
3219
3254
 
3220
 
   result = anv_state_pool_init(&device->bindless_surface_state_pool, device,
3221
 
                                "bindless surface state pool",
3222
 
                                BINDLESS_SURFACE_STATE_POOL_MIN_ADDRESS, 0, 4096);
3223
 
   if (result != VK_SUCCESS)
3224
 
      goto fail_internal_surface_state_pool;
 
3255
   if (device->physical->indirect_descriptors) {
 
3256
      result = anv_state_pool_init(&device->bindless_surface_state_pool, device,
 
3257
                                   "bindless surface state pool",
 
3258
                                   device->physical->va.bindless_surface_state_pool.addr,
 
3259
                                   0, 4096);
 
3260
      if (result != VK_SUCCESS)
 
3261
         goto fail_internal_surface_state_pool;
 
3262
   }
3225
3263
 
3226
3264
   if (device->info->verx10 >= 125) {
3227
3265
      /* We're using 3DSTATE_BINDING_TABLE_POOL_ALLOC to give the binding
3229
3267
       */
3230
3268
      result = anv_state_pool_init(&device->binding_table_pool, device,
3231
3269
                                   "binding table pool",
3232
 
                                   BINDING_TABLE_POOL_MIN_ADDRESS, 0,
 
3270
                                   device->physical->va.binding_table_pool.addr, 0,
3233
3271
                                   BINDING_TABLE_POOL_BLOCK_SIZE);
3234
3272
   } else {
3235
 
      int64_t bt_pool_offset = (int64_t)BINDING_TABLE_POOL_MIN_ADDRESS -
3236
 
                               (int64_t)INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS;
 
3273
      /* The binding table should be in front of the surface states in virtual
 
3274
       * address space so that all surface states can be express as relative
 
3275
       * offsets from the binding table location.
 
3276
       */
 
3277
      assert(device->physical->va.binding_table_pool.addr <
 
3278
             device->physical->va.internal_surface_state_pool.addr);
 
3279
      int64_t bt_pool_offset = (int64_t)device->physical->va.binding_table_pool.addr -
 
3280
                               (int64_t)device->physical->va.internal_surface_state_pool.addr;
3237
3281
      assert(INT32_MIN < bt_pool_offset && bt_pool_offset < 0);
3238
3282
      result = anv_state_pool_init(&device->binding_table_pool, device,
3239
3283
                                   "binding table pool",
3240
 
                                   INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS,
 
3284
                                   device->physical->va.internal_surface_state_pool.addr,
3241
3285
                                   bt_pool_offset,
3242
3286
                                   BINDING_TABLE_POOL_BLOCK_SIZE);
3243
3287
   }
3244
3288
   if (result != VK_SUCCESS)
3245
3289
      goto fail_bindless_surface_state_pool;
3246
3290
 
 
3291
   result = anv_state_pool_init(&device->push_descriptor_pool, device,
 
3292
                                "push descriptor pool",
 
3293
                                device->physical->va.push_descriptor_pool.addr,
 
3294
                                0, 4096);
 
3295
   if (result != VK_SUCCESS)
 
3296
      goto fail_binding_table_pool;
 
3297
 
3247
3298
   if (device->info->has_aux_map) {
3248
3299
      device->aux_map_ctx = intel_aux_map_init(device, &aux_map_allocator,
3249
3300
                                               &physical_device->info);
3250
3301
      if (!device->aux_map_ctx)
3251
 
         goto fail_binding_table_pool;
 
3302
         goto fail_push_descriptor_pool;
3252
3303
   }
3253
3304
 
3254
 
   result = anv_device_alloc_bo(device, "workaround", 4096,
 
3305
   result = anv_device_alloc_bo(device, "workaround", 8192,
3255
3306
                                ANV_BO_ALLOC_CAPTURE |
3256
 
                                ANV_BO_ALLOC_MAPPED |
3257
 
                                (device->info->has_local_mem ?
3258
 
                                 ANV_BO_ALLOC_WRITE_COMBINE : 0),
 
3307
                                ANV_BO_ALLOC_MAPPED,
3259
3308
                                0 /* explicit_address */,
3260
3309
                                &device->workaround_bo);
3261
3310
   if (result != VK_SUCCESS)
3321
3370
      anv_genX(device->info, init_cps_device_state)(device);
3322
3371
   }
3323
3372
 
3324
 
   /* Allocate a null surface state at surface state offset 0.  This makes
3325
 
    * NULL descriptor handling trivial because we can just memset structures
3326
 
    * to zero and they have a valid descriptor.
3327
 
    */
3328
 
   device->null_surface_state =
3329
 
      anv_state_pool_alloc(&device->bindless_surface_state_pool,
3330
 
                           device->isl_dev.ss.size,
3331
 
                           device->isl_dev.ss.align);
3332
 
   isl_null_fill_state(&device->isl_dev, device->null_surface_state.map,
3333
 
                       .size = isl_extent3d(1, 1, 1) /* This shouldn't matter */);
3334
 
   assert(device->null_surface_state.offset == 0);
 
3373
   if (device->physical->indirect_descriptors) {
 
3374
      /* Allocate a null surface state at surface state offset 0. This makes
 
3375
       * NULL descriptor handling trivial because we can just memset
 
3376
       * structures to zero and they have a valid descriptor.
 
3377
       */
 
3378
      device->null_surface_state =
 
3379
         anv_state_pool_alloc(&device->bindless_surface_state_pool,
 
3380
                              device->isl_dev.ss.size,
 
3381
                              device->isl_dev.ss.align);
 
3382
      isl_null_fill_state(&device->isl_dev, device->null_surface_state.map,
 
3383
                          .size = isl_extent3d(1, 1, 1) /* This shouldn't matter */);
 
3384
      assert(device->null_surface_state.offset == 0);
 
3385
   } else {
 
3386
      /* When using direct descriptors, those can hold the null surface state
 
3387
       * directly. We still need a null surface for the binding table entries
 
3388
       * though but this one can live anywhere the internal surface state
 
3389
       * pool.
 
3390
       */
 
3391
      device->null_surface_state =
 
3392
         anv_state_pool_alloc(&device->internal_surface_state_pool,
 
3393
                              device->isl_dev.ss.size,
 
3394
                              device->isl_dev.ss.align);
 
3395
      isl_null_fill_state(&device->isl_dev, device->null_surface_state.map,
 
3396
                          .size = isl_extent3d(1, 1, 1) /* This shouldn't matter */);
 
3397
   }
3335
3398
 
3336
3399
   anv_scratch_pool_init(device, &device->scratch_pool);
3337
3400
 
3396
3459
 
3397
3460
   anv_device_init_border_colors(device);
3398
3461
 
3399
 
   anv_device_init_generated_indirect_draws(device);
 
3462
   anv_device_init_internal_kernels(device);
3400
3463
 
3401
3464
   anv_device_perf_init(device);
3402
3465
 
3427
3490
      intel_aux_map_finish(device->aux_map_ctx);
3428
3491
      device->aux_map_ctx = NULL;
3429
3492
   }
 
3493
 fail_push_descriptor_pool:
 
3494
   anv_state_pool_finish(&device->push_descriptor_pool);
3430
3495
 fail_binding_table_pool:
3431
3496
   anv_state_pool_finish(&device->binding_table_pool);
3432
3497
 fail_bindless_surface_state_pool:
3433
 
   anv_state_pool_finish(&device->bindless_surface_state_pool);
 
3498
   if (device->physical->indirect_descriptors)
 
3499
      anv_state_pool_finish(&device->bindless_surface_state_pool);
3434
3500
 fail_internal_surface_state_pool:
3435
3501
   anv_state_pool_finish(&device->internal_surface_state_pool);
3436
3502
 fail_scratch_surface_state_pool:
3451
3517
 fail_mutex:
3452
3518
   pthread_mutex_destroy(&device->mutex);
3453
3519
 fail_vmas:
 
3520
   util_vma_heap_finish(&device->vma_desc);
3454
3521
   util_vma_heap_finish(&device->vma_hi);
3455
3522
   util_vma_heap_finish(&device->vma_cva);
3456
3523
   util_vma_heap_finish(&device->vma_lo);
3487
3554
 
3488
3555
   anv_device_finish_rt_shaders(device);
3489
3556
 
3490
 
   anv_device_finish_generated_indirect_draws(device);
 
3557
   anv_device_finish_internal_kernels(device);
3491
3558
 
3492
3559
   vk_pipeline_cache_destroy(device->internal_cache, NULL);
3493
3560
   vk_pipeline_cache_destroy(device->default_pipeline_cache, NULL);
3527
3594
      device->aux_map_ctx = NULL;
3528
3595
   }
3529
3596
 
 
3597
   anv_state_pool_finish(&device->push_descriptor_pool);
3530
3598
   anv_state_pool_finish(&device->binding_table_pool);
3531
3599
   if (device->info->verx10 >= 125)
3532
3600
      anv_state_pool_finish(&device->scratch_surface_state_pool);
3533
3601
   anv_state_pool_finish(&device->internal_surface_state_pool);
3534
 
   anv_state_pool_finish(&device->bindless_surface_state_pool);
 
3602
   if (device->physical->indirect_descriptors)
 
3603
      anv_state_pool_finish(&device->bindless_surface_state_pool);
3535
3604
   anv_state_pool_finish(&device->instruction_state_pool);
3536
3605
   anv_state_pool_finish(&device->dynamic_state_pool);
3537
3606
   anv_state_pool_finish(&device->general_state_pool);
3540
3609
 
3541
3610
   anv_bo_cache_finish(&device->bo_cache);
3542
3611
 
 
3612
   util_vma_heap_finish(&device->vma_desc);
3543
3613
   util_vma_heap_finish(&device->vma_hi);
3544
3614
   util_vma_heap_finish(&device->vma_cva);
3545
3615
   util_vma_heap_finish(&device->vma_lo);
3592
3662
   }
3593
3663
}
3594
3664
 
 
3665
static struct util_vma_heap *
 
3666
anv_vma_heap_for_flags(struct anv_device *device,
 
3667
                       enum anv_bo_alloc_flags alloc_flags)
 
3668
{
 
3669
   if (alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS)
 
3670
      return &device->vma_cva;
 
3671
 
 
3672
   if (alloc_flags & ANV_BO_ALLOC_32BIT_ADDRESS)
 
3673
      return &device->vma_lo;
 
3674
 
 
3675
   if (alloc_flags & ANV_BO_ALLOC_DESCRIPTOR_POOL)
 
3676
      return &device->vma_desc;
 
3677
 
 
3678
   return &device->vma_hi;
 
3679
}
 
3680
 
3595
3681
uint64_t
3596
3682
anv_vma_alloc(struct anv_device *device,
3597
3683
              uint64_t size, uint64_t align,
3598
3684
              enum anv_bo_alloc_flags alloc_flags,
3599
 
              uint64_t client_address)
 
3685
              uint64_t client_address,
 
3686
              struct util_vma_heap **out_vma_heap)
3600
3687
{
3601
3688
   pthread_mutex_lock(&device->vma_mutex);
3602
3689
 
3603
3690
   uint64_t addr = 0;
 
3691
   *out_vma_heap = anv_vma_heap_for_flags(device, alloc_flags);
3604
3692
 
3605
3693
   if (alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) {
3606
3694
      if (client_address) {
3607
 
         if (util_vma_heap_alloc_addr(&device->vma_cva,
 
3695
         if (util_vma_heap_alloc_addr(*out_vma_heap,
3608
3696
                                      client_address, size)) {
3609
3697
            addr = client_address;
3610
3698
         }
3611
3699
      } else {
3612
 
         addr = util_vma_heap_alloc(&device->vma_cva, size, align);
 
3700
         addr = util_vma_heap_alloc(*out_vma_heap, size, align);
3613
3701
      }
3614
3702
      /* We don't want to fall back to other heaps */
3615
3703
      goto done;
3617
3705
 
3618
3706
   assert(client_address == 0);
3619
3707
 
3620
 
   if (!(alloc_flags & ANV_BO_ALLOC_32BIT_ADDRESS))
3621
 
      addr = util_vma_heap_alloc(&device->vma_hi, size, align);
3622
 
 
3623
 
   if (addr == 0)
3624
 
      addr = util_vma_heap_alloc(&device->vma_lo, size, align);
 
3708
   addr = util_vma_heap_alloc(*out_vma_heap, size, align);
3625
3709
 
3626
3710
done:
3627
3711
   pthread_mutex_unlock(&device->vma_mutex);
3632
3716
 
3633
3717
void
3634
3718
anv_vma_free(struct anv_device *device,
 
3719
             struct util_vma_heap *vma_heap,
3635
3720
             uint64_t address, uint64_t size)
3636
3721
{
 
3722
   assert(vma_heap == &device->vma_lo ||
 
3723
          vma_heap == &device->vma_cva ||
 
3724
          vma_heap == &device->vma_hi ||
 
3725
          vma_heap == &device->vma_desc);
 
3726
 
3637
3727
   const uint64_t addr_48b = intel_48b_address(address);
3638
3728
 
3639
3729
   pthread_mutex_lock(&device->vma_mutex);
3640
3730
 
3641
 
   if (addr_48b >= LOW_HEAP_MIN_ADDRESS &&
3642
 
       addr_48b <= LOW_HEAP_MAX_ADDRESS) {
3643
 
      util_vma_heap_free(&device->vma_lo, addr_48b, size);
3644
 
   } else if (addr_48b >= CLIENT_VISIBLE_HEAP_MIN_ADDRESS &&
3645
 
              addr_48b <= CLIENT_VISIBLE_HEAP_MAX_ADDRESS) {
3646
 
      util_vma_heap_free(&device->vma_cva, addr_48b, size);
3647
 
   } else {
3648
 
      assert(addr_48b >= HIGH_HEAP_MIN_ADDRESS);
3649
 
      util_vma_heap_free(&device->vma_hi, addr_48b, size);
3650
 
   }
 
3731
   util_vma_heap_free(vma_heap, addr_48b, size);
3651
3732
 
3652
3733
   pthread_mutex_unlock(&device->vma_mutex);
3653
3734
}
3762
3843
   if (!mem_heap->is_local_mem)
3763
3844
      alloc_flags |= ANV_BO_ALLOC_NO_LOCAL_MEM;
3764
3845
 
3765
 
   /* If the allocated buffer might end up in local memory and it's host
3766
 
    * visible and uncached, enable CPU write-combining. It should be faster.
3767
 
    */
3768
 
   if (mem_heap->is_local_mem &&
3769
 
       (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) == 0 &&
3770
 
       (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT))
3771
 
      alloc_flags |= ANV_BO_ALLOC_WRITE_COMBINE;
3772
 
 
3773
3846
   if (mem->vk.alloc_flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT)
3774
3847
      alloc_flags |= ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS;
3775
3848
 
3776
 
   /* Anything imported or exported is EXTERNAL */
 
3849
   /* Anything imported or exported is EXTERNAL. Apply implicit sync to be
 
3850
    * compatible with clients relying on implicit fencing. This matches the
 
3851
    * behavior in iris i915_batch_submit. An example client is VA-API.
 
3852
    */
3777
3853
   if (mem->vk.export_handle_types || mem->vk.import_handle_type)
3778
 
      alloc_flags |= ANV_BO_ALLOC_EXTERNAL;
 
3854
      alloc_flags |= (ANV_BO_ALLOC_EXTERNAL | ANV_BO_ALLOC_IMPLICIT_SYNC);
3779
3855
 
3780
3856
   if (mem->vk.ahardware_buffer) {
3781
3857
      result = anv_import_ahw_memory(_device, mem);
4303
4379
    */
4304
4380
   uint32_t memory_types = (1ull << device->physical->memory.type_count) - 1;
4305
4381
 
4306
 
   /* Base alignment requirement of a cache line */
4307
 
   uint32_t alignment = 16;
4308
 
 
4309
 
   if (usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT)
4310
 
      alignment = MAX2(alignment, ANV_UBO_ALIGNMENT);
 
4382
   /* The GPU appears to write back to main memory in cachelines. Writes to a
 
4383
    * buffers should not clobber with writes to another buffers so make sure
 
4384
    * those are in different cachelines.
 
4385
    */
 
4386
   uint32_t alignment = 64;
4311
4387
 
4312
4388
   pMemoryRequirements->memoryRequirements.size = size;
4313
4389
   pMemoryRequirements->memoryRequirements.alignment = alignment;
4426
4502
}
4427
4503
 
4428
4504
void
4429
 
anv_fill_buffer_surface_state(struct anv_device *device, struct anv_state state,
 
4505
anv_fill_buffer_surface_state(struct anv_device *device,
 
4506
                              void *surface_state_ptr,
4430
4507
                              enum isl_format format,
4431
4508
                              struct isl_swizzle swizzle,
4432
4509
                              isl_surf_usage_flags_t usage,
4433
4510
                              struct anv_address address,
4434
4511
                              uint32_t range, uint32_t stride)
4435
4512
{
4436
 
   isl_buffer_fill_state(&device->isl_dev, state.map,
 
4513
   isl_buffer_fill_state(&device->isl_dev, surface_state_ptr,
4437
4514
                         .address = anv_address_physical(address),
4438
4515
                         .mocs = isl_mocs(&device->isl_dev, usage,
4439
4516
                                          address.bo && address.bo->is_external),