~mmach/netext73/mesa-ryzen

« back to all changes in this revision

Viewing changes to src/imagination/vulkan/pvr_pipeline.c

  • Committer: mmach
  • Date: 2023-11-02 21:31:35 UTC
  • Revision ID: netbit73@gmail.com-20231102213135-18d4tzh7tj0uz752
2023-11-02 22:11:57

Show diffs side-by-side

added added

removed removed

Lines of Context:
46
46
#include "util/log.h"
47
47
#include "util/macros.h"
48
48
#include "util/ralloc.h"
 
49
#include "util/u_dynarray.h"
49
50
#include "util/u_math.h"
50
51
#include "vk_alloc.h"
 
52
#include "vk_format.h"
51
53
#include "vk_graphics_state.h"
52
54
#include "vk_log.h"
53
55
#include "vk_object.h"
65
67
   const uint32_t *fpu_iterators,
66
68
   uint32_t fpu_iterators_count,
67
69
   const uint32_t *destinations,
68
 
   struct pvr_pds_upload *const pds_upload_out)
 
70
   struct pvr_pds_upload *const pds_upload_out,
 
71
   uint32_t *const pds_temps_count_out)
69
72
{
70
73
   struct pvr_pds_coeff_loading_program program = {
71
74
      .num_fpu_iterators = fpu_iterators_count,
83
86
      pds_upload_out->pvr_bo = NULL;
84
87
      pds_upload_out->code_size = 0;
85
88
      pds_upload_out->data_size = 0;
 
89
      *pds_temps_count_out = 0;
86
90
 
87
91
      return VK_SUCCESS;
88
92
   }
89
93
 
90
 
   staging_buffer_size =
91
 
      (program.code_size + program.data_size) * sizeof(*staging_buffer);
 
94
   staging_buffer_size = PVR_DW_TO_BYTES(program.code_size + program.data_size);
92
95
 
93
96
   staging_buffer = vk_alloc2(&device->vk.alloc,
94
97
                              allocator,
127
130
 
128
131
   vk_free2(&device->vk.alloc, allocator, staging_buffer);
129
132
 
 
133
   *pds_temps_count_out = program.temps_used;
 
134
 
130
135
   return VK_SUCCESS;
131
136
}
132
137
 
135
140
VkResult pvr_pds_fragment_program_create_and_upload(
136
141
   struct pvr_device *device,
137
142
   const VkAllocationCallbacks *allocator,
138
 
   const struct pvr_bo *fragment_shader_bo,
 
143
   const struct pvr_suballoc_bo *fragment_shader_bo,
139
144
   uint32_t fragment_temp_count,
140
145
   enum rogue_msaa_mode msaa_mode,
141
146
   bool has_phase_rate_change,
155
160
    * allocating the buffer. The size from pvr_pds_kick_usc() is constant.
156
161
    */
157
162
   pvr_pds_setup_doutu(&program.usc_task_control,
158
 
                       fragment_shader_bo->vma->dev_addr.addr,
 
163
                       fragment_shader_bo->dev_addr.addr,
159
164
                       fragment_temp_count,
160
165
                       sample_rate,
161
166
                       has_phase_rate_change);
162
167
 
163
168
   pvr_pds_kick_usc(&program, NULL, 0, false, PDS_GENERATE_SIZES);
164
169
 
165
 
   staging_buffer_size =
166
 
      (program.code_size + program.data_size) * sizeof(*staging_buffer);
 
170
   staging_buffer_size = PVR_DW_TO_BYTES(program.code_size + program.data_size);
167
171
 
168
172
   staging_buffer = vk_alloc2(&device->vk.alloc,
169
173
                              allocator,
353
357
   const size_t const_entries_size_in_bytes =
354
358
      pvr_pds_get_max_vertex_program_const_map_size_in_bytes(
355
359
         &device->pdevice->dev_info,
356
 
         device->features.robustBufferAccess);
 
360
         device->vk.enabled_features.robustBufferAccess);
357
361
   struct pvr_pds_upload *const program = &program_out->program;
358
362
   struct pvr_pds_info *const info = &program_out->info;
359
 
   struct pvr_const_map_entry *entries_buffer;
 
363
   struct pvr_const_map_entry *new_entries;
360
364
   ASSERTED uint32_t code_size_in_dwords;
361
365
   size_t staging_buffer_size;
362
366
   uint32_t *staging_buffer;
364
368
 
365
369
   memset(info, 0, sizeof(*info));
366
370
 
367
 
   entries_buffer = vk_alloc2(&device->vk.alloc,
368
 
                              allocator,
369
 
                              const_entries_size_in_bytes,
370
 
                              8,
371
 
                              VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
372
 
   if (!entries_buffer)
373
 
      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
 
371
   info->entries = vk_alloc2(&device->vk.alloc,
 
372
                             allocator,
 
373
                             const_entries_size_in_bytes,
 
374
                             8,
 
375
                             VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
 
376
   if (!info->entries) {
 
377
      result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
 
378
      goto err_out;
 
379
   }
374
380
 
375
 
   info->entries = entries_buffer;
376
381
   info->entries_size_in_bytes = const_entries_size_in_bytes;
377
382
 
378
 
   pvr_pds_generate_vertex_primary_program(input,
379
 
                                           NULL,
380
 
                                           info,
381
 
                                           device->features.robustBufferAccess,
382
 
                                           &device->pdevice->dev_info);
 
383
   pvr_pds_generate_vertex_primary_program(
 
384
      input,
 
385
      NULL,
 
386
      info,
 
387
      device->vk.enabled_features.robustBufferAccess,
 
388
      &device->pdevice->dev_info);
383
389
 
384
390
   code_size_in_dwords = info->code_size_in_dwords;
385
 
   staging_buffer_size = info->code_size_in_dwords * sizeof(*staging_buffer);
 
391
   staging_buffer_size = PVR_DW_TO_BYTES(info->code_size_in_dwords);
386
392
 
387
393
   staging_buffer = vk_alloc2(&device->vk.alloc,
388
394
                              allocator,
390
396
                              8,
391
397
                              VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
392
398
   if (!staging_buffer) {
393
 
      vk_free2(&device->vk.alloc, allocator, entries_buffer);
394
 
      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
 
399
      result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
 
400
      goto err_free_entries;
395
401
   }
396
402
 
397
403
   /* This also fills in info->entries. */
398
 
   pvr_pds_generate_vertex_primary_program(input,
399
 
                                           staging_buffer,
400
 
                                           info,
401
 
                                           device->features.robustBufferAccess,
402
 
                                           &device->pdevice->dev_info);
 
404
   pvr_pds_generate_vertex_primary_program(
 
405
      input,
 
406
      staging_buffer,
 
407
      info,
 
408
      device->vk.enabled_features.robustBufferAccess,
 
409
      &device->pdevice->dev_info);
403
410
 
404
411
   assert(info->code_size_in_dwords <= code_size_in_dwords);
405
412
 
406
413
   /* FIXME: Add a vk_realloc2() ? */
407
 
   entries_buffer = vk_realloc((!allocator) ? &device->vk.alloc : allocator,
408
 
                               entries_buffer,
409
 
                               info->entries_written_size_in_bytes,
410
 
                               8,
411
 
                               VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
412
 
   if (!entries_buffer) {
413
 
      vk_free2(&device->vk.alloc, allocator, staging_buffer);
414
 
      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
 
414
   new_entries = vk_realloc((!allocator) ? &device->vk.alloc : allocator,
 
415
                            info->entries,
 
416
                            info->entries_written_size_in_bytes,
 
417
                            8,
 
418
                            VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
 
419
   if (!new_entries) {
 
420
      result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
 
421
      goto err_free_staging_buffer;
415
422
   }
416
423
 
417
 
   info->entries = entries_buffer;
 
424
   info->entries = new_entries;
418
425
   info->entries_size_in_bytes = info->entries_written_size_in_bytes;
419
426
 
420
427
   /* FIXME: Figure out the define for alignment of 16. */
427
434
                               16,
428
435
                               16,
429
436
                               program);
430
 
   if (result != VK_SUCCESS) {
431
 
      vk_free2(&device->vk.alloc, allocator, entries_buffer);
432
 
      vk_free2(&device->vk.alloc, allocator, staging_buffer);
433
 
      return result;
434
 
   }
 
437
   if (result != VK_SUCCESS)
 
438
      goto err_free_staging_buffer;
435
439
 
436
440
   vk_free2(&device->vk.alloc, allocator, staging_buffer);
437
441
 
438
442
   return VK_SUCCESS;
 
443
 
 
444
err_free_staging_buffer:
 
445
   vk_free2(&device->vk.alloc, allocator, staging_buffer);
 
446
 
 
447
err_free_entries:
 
448
   vk_free2(&device->vk.alloc, allocator, info->entries);
 
449
 
 
450
err_out:
 
451
   return result;
439
452
}
440
453
 
441
454
static inline void pvr_pds_vertex_attrib_program_destroy(
443
456
   const struct VkAllocationCallbacks *const allocator,
444
457
   struct pvr_pds_attrib_program *const program)
445
458
{
446
 
   pvr_bo_free(device, program->program.pvr_bo);
 
459
   pvr_bo_suballoc_free(program->program.pvr_bo);
447
460
   vk_free2(&device->vk.alloc, allocator, program->info.entries);
448
461
}
449
462
 
453
466
typedef struct pvr_pds_attrib_program (*const pvr_pds_attrib_programs_array_ptr)
454
467
   [PVR_PDS_VERTEX_ATTRIB_PROGRAM_COUNT];
455
468
 
 
469
/* Indicates that the special variable is unused and has not been allocated a
 
470
 * register.
 
471
 */
 
472
#define PVR_VERTEX_SPECIAL_VAR_UNUSED (-1)
 
473
 
 
474
/* Each special variable gets allocated its own vtxin reg if used. */
 
475
struct pvr_vertex_special_vars {
 
476
   /* VertexIndex built-in. */
 
477
   int16_t vertex_id_offset;
 
478
   /* InstanceIndex built-in. */
 
479
   int16_t instance_id_offset;
 
480
};
 
481
 
456
482
/* Generate and uploads a PDS program for DMAing vertex attribs into USC vertex
457
483
 * inputs. This will bake the code segment and create a template of the data
458
484
 * segment for the command buffer to fill in.
460
486
/* If allocator == NULL, the internal one will be used.
461
487
 *
462
488
 * programs_out_ptr is a pointer to the array where the outputs will be placed.
463
 
 * */
 
489
 */
464
490
static VkResult pvr_pds_vertex_attrib_programs_create_and_upload(
465
491
   struct pvr_device *device,
466
492
   const VkAllocationCallbacks *const allocator,
467
493
   const VkPipelineVertexInputStateCreateInfo *const vertex_input_state,
468
494
   uint32_t usc_temp_count,
469
495
   const struct rogue_vs_build_data *vs_data,
 
496
 
 
497
   /* Needed for the new path. */
 
498
   /* TODO: Remove some of the above once the compiler is hooked up. */
 
499
   const struct pvr_pds_vertex_dma
 
500
      dma_descriptions[static const PVR_MAX_VERTEX_ATTRIB_DMAS],
 
501
   uint32_t dma_count,
 
502
   const struct pvr_vertex_special_vars *special_vars_layout,
 
503
 
470
504
   pvr_pds_attrib_programs_array_ptr programs_out_ptr)
471
505
{
472
 
   struct pvr_pds_vertex_dma dma_descriptions[PVR_MAX_VERTEX_ATTRIB_DMAS];
 
506
   struct pvr_pds_vertex_dma dma_descriptions_old[PVR_MAX_VERTEX_ATTRIB_DMAS];
 
507
 
473
508
   struct pvr_pds_attrib_program *const programs_out = *programs_out_ptr;
474
 
   struct pvr_pds_vertex_primary_program_input input = {
475
 
      .dma_list = dma_descriptions,
476
 
   };
 
509
   struct pvr_pds_vertex_primary_program_input input = { 0 };
477
510
   VkResult result;
478
511
 
479
 
   pvr_pds_vertex_attrib_init_dma_descriptions(vertex_input_state,
480
 
                                               vs_data,
481
 
                                               &dma_descriptions,
482
 
                                               &input.dma_count);
 
512
   const bool old_path = pvr_has_hard_coded_shaders(&device->pdevice->dev_info);
 
513
 
 
514
   if (old_path) {
 
515
      pvr_pds_vertex_attrib_init_dma_descriptions(vertex_input_state,
 
516
                                                  vs_data,
 
517
                                                  &dma_descriptions_old,
 
518
                                                  &input.dma_count);
 
519
 
 
520
      input.dma_list = dma_descriptions_old;
 
521
   } else {
 
522
      input.dma_list = dma_descriptions;
 
523
      input.dma_count = dma_count;
 
524
 
 
525
      if (special_vars_layout->vertex_id_offset !=
 
526
          PVR_VERTEX_SPECIAL_VAR_UNUSED) {
 
527
         /* Gets filled by the HW and copied into the appropriate reg. */
 
528
         input.flags |= PVR_PDS_VERTEX_FLAGS_VERTEX_ID_REQUIRED;
 
529
         input.vertex_id_register = special_vars_layout->vertex_id_offset;
 
530
      }
 
531
 
 
532
      if (special_vars_layout->instance_id_offset !=
 
533
          PVR_VERTEX_SPECIAL_VAR_UNUSED) {
 
534
         /* Gets filled by the HW and copied into the appropriate reg. */
 
535
         input.flags |= PVR_PDS_VERTEX_FLAGS_INSTANCE_ID_REQUIRED;
 
536
         input.instance_id_register = special_vars_layout->instance_id_offset;
 
537
      }
 
538
   }
483
539
 
484
540
   pvr_pds_setup_doutu(&input.usc_task_control,
485
541
                       0,
487
543
                       PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
488
544
                       false);
489
545
 
490
 
   /* TODO: If statements for all the "bRequired"s + ui32ExtraFlags. */
491
 
 
492
546
   /* Note: programs_out_ptr is a pointer to an array so this is fine. See the
493
547
    * typedef.
494
548
    */
495
549
   for (uint32_t i = 0; i < ARRAY_SIZE(*programs_out_ptr); i++) {
 
550
      uint32_t extra_flags;
 
551
 
496
552
      switch (i) {
497
553
      case PVR_PDS_VERTEX_ATTRIB_PROGRAM_BASIC:
498
 
         input.flags = 0;
 
554
         extra_flags = 0;
499
555
         break;
500
556
 
501
557
      case PVR_PDS_VERTEX_ATTRIB_PROGRAM_BASE_INSTANCE:
502
 
         input.flags = PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT;
 
558
         extra_flags = PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT;
503
559
         break;
504
560
 
505
561
      case PVR_PDS_VERTEX_ATTRIB_PROGRAM_DRAW_INDIRECT:
506
 
         /* We unset INSTANCE and set INDIRECT. */
507
 
         input.flags = PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT;
 
562
         extra_flags = PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT;
508
563
         break;
509
564
 
510
565
      default:
511
566
         unreachable("Invalid vertex attrib program type.");
512
567
      }
513
568
 
 
569
      input.flags |= extra_flags;
 
570
 
514
571
      result =
515
572
         pvr_pds_vertex_attrib_program_create_and_upload(device,
516
573
                                                         allocator,
525
582
 
526
583
         return result;
527
584
      }
 
585
 
 
586
      input.flags &= ~extra_flags;
528
587
   }
529
588
 
530
589
   return VK_SUCCESS;
595
654
   const struct rogue_ubo_data *ubo_data,
596
655
   pvr_pds_descriptor_program_buffer_array_ptr buffers_out_ptr,
597
656
   uint32_t *const buffer_count_out,
598
 
   struct pvr_bo **const static_consts_pvr_bo_out)
 
657
   struct pvr_suballoc_bo **const static_consts_pvr_bo_out)
599
658
{
600
659
   struct pvr_pds_buffer *const buffers = *buffers_out_ptr;
601
660
   uint32_t buffer_count = 0;
673
732
      pvr_pds_get_max_descriptor_upload_const_map_size_in_bytes();
674
733
   struct pvr_pds_info *const pds_info = &descriptor_state->pds_info;
675
734
   struct pvr_pds_descriptor_program_input program = { 0 };
676
 
   struct pvr_const_map_entry *entries_buffer;
 
735
   struct pvr_const_map_entry *new_entries;
677
736
   ASSERTED uint32_t code_size_in_dwords;
678
737
   uint32_t staging_buffer_size;
679
738
   uint32_t *staging_buffer;
688
747
   if (old_path) {
689
748
      result = pvr_pds_descriptor_program_setup_buffers(
690
749
         device,
691
 
         device->features.robustBufferAccess,
 
750
         device->vk.enabled_features.robustBufferAccess,
692
751
         compile_time_consts_data,
693
752
         ubo_data,
694
753
         &program.buffers,
759
818
      program.addr_literal_count = addr_literals;
760
819
   }
761
820
 
762
 
   entries_buffer = vk_alloc2(&device->vk.alloc,
763
 
                              allocator,
764
 
                              const_entries_size_in_bytes,
765
 
                              8,
766
 
                              VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
767
 
   if (!entries_buffer) {
768
 
      pvr_bo_free(device, descriptor_state->static_consts);
769
 
 
770
 
      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
 
821
   pds_info->entries = vk_alloc2(&device->vk.alloc,
 
822
                                 allocator,
 
823
                                 const_entries_size_in_bytes,
 
824
                                 8,
 
825
                                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
 
826
   if (!pds_info->entries) {
 
827
      result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
 
828
      goto err_free_static_consts;
771
829
   }
772
830
 
773
 
   pds_info->entries = entries_buffer;
774
831
   pds_info->entries_size_in_bytes = const_entries_size_in_bytes;
775
832
 
776
833
   pvr_pds_generate_descriptor_upload_program(&program, NULL, pds_info);
777
834
 
778
835
   code_size_in_dwords = pds_info->code_size_in_dwords;
779
 
   staging_buffer_size =
780
 
      pds_info->code_size_in_dwords * sizeof(*staging_buffer);
 
836
   staging_buffer_size = PVR_DW_TO_BYTES(pds_info->code_size_in_dwords);
781
837
 
782
838
   if (!staging_buffer_size) {
783
 
      vk_free2(&device->vk.alloc, allocator, entries_buffer);
 
839
      vk_free2(&device->vk.alloc, allocator, pds_info->entries);
784
840
 
785
841
      *descriptor_state = (struct pvr_stage_allocation_descriptor_state){ 0 };
786
842
 
793
849
                              8,
794
850
                              VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
795
851
   if (!staging_buffer) {
796
 
      pvr_bo_free(device, descriptor_state->static_consts);
797
 
      vk_free2(&device->vk.alloc, allocator, entries_buffer);
798
 
 
799
 
      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
 
852
      result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
 
853
      goto err_free_entries;
800
854
   }
801
855
 
802
856
   pvr_pds_generate_descriptor_upload_program(&program,
806
860
   assert(pds_info->code_size_in_dwords <= code_size_in_dwords);
807
861
 
808
862
   /* FIXME: use vk_realloc2() ? */
809
 
   entries_buffer = vk_realloc((!allocator) ? &device->vk.alloc : allocator,
810
 
                               entries_buffer,
811
 
                               pds_info->entries_written_size_in_bytes,
812
 
                               8,
813
 
                               VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
814
 
   if (!entries_buffer) {
815
 
      pvr_bo_free(device, descriptor_state->static_consts);
816
 
      vk_free2(&device->vk.alloc, allocator, staging_buffer);
817
 
 
818
 
      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
 
863
   new_entries = vk_realloc((!allocator) ? &device->vk.alloc : allocator,
 
864
                            pds_info->entries,
 
865
                            pds_info->entries_written_size_in_bytes,
 
866
                            8,
 
867
                            VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
 
868
   if (!new_entries) {
 
869
      result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
 
870
      goto err_free_staging_buffer;
819
871
   }
820
872
 
821
 
   pds_info->entries = entries_buffer;
 
873
   pds_info->entries = new_entries;
822
874
   pds_info->entries_size_in_bytes = pds_info->entries_written_size_in_bytes;
823
875
 
824
876
   /* FIXME: Figure out the define for alignment of 16. */
831
883
                               16,
832
884
                               16,
833
885
                               &descriptor_state->pds_code);
834
 
   if (result != VK_SUCCESS) {
835
 
      pvr_bo_free(device, descriptor_state->static_consts);
836
 
      vk_free2(&device->vk.alloc, allocator, entries_buffer);
837
 
      vk_free2(&device->vk.alloc, allocator, staging_buffer);
838
 
 
839
 
      return result;
840
 
   }
 
886
   if (result != VK_SUCCESS)
 
887
      goto err_free_staging_buffer;
841
888
 
842
889
   vk_free2(&device->vk.alloc, allocator, staging_buffer);
843
890
 
844
891
   return VK_SUCCESS;
 
892
 
 
893
err_free_staging_buffer:
 
894
   vk_free2(&device->vk.alloc, allocator, staging_buffer);
 
895
 
 
896
err_free_entries:
 
897
   vk_free2(&device->vk.alloc, allocator, pds_info->entries);
 
898
 
 
899
err_free_static_consts:
 
900
   pvr_bo_suballoc_free(descriptor_state->static_consts);
 
901
 
 
902
   return result;
845
903
}
846
904
 
847
905
static void pvr_pds_descriptor_program_destroy(
849
907
   const struct VkAllocationCallbacks *const allocator,
850
908
   struct pvr_stage_allocation_descriptor_state *const descriptor_state)
851
909
{
852
 
   pvr_bo_free(device, descriptor_state->pds_code.pvr_bo);
 
910
   if (!descriptor_state)
 
911
      return;
 
912
 
 
913
   pvr_bo_suballoc_free(descriptor_state->pds_code.pvr_bo);
853
914
   vk_free2(&device->vk.alloc, allocator, descriptor_state->pds_info.entries);
854
 
   pvr_bo_free(device, descriptor_state->static_consts);
 
915
   pvr_bo_suballoc_free(descriptor_state->static_consts);
855
916
}
856
917
 
857
918
static void pvr_pds_compute_program_setup(
923
984
   /* FIXME: According to pvr_device_init_compute_pds_program() the code size
924
985
    * is in bytes. Investigate this.
925
986
    */
926
 
   staging_buffer_size =
927
 
      (program.code_size + program.data_size) * sizeof(*staging_buffer);
 
987
   staging_buffer_size = PVR_DW_TO_BYTES(program.code_size + program.data_size);
928
988
 
929
989
   staging_buffer = vk_alloc2(&device->vk.alloc,
930
990
                              allocator,
980
1040
   struct pvr_pds_info *const pds_info)
981
1041
{
982
1042
   /* We don't allocate an entries buffer so we don't need to free it */
983
 
   pvr_bo_free(device, pds_program->pvr_bo);
 
1043
   pvr_bo_suballoc_free(pds_program->pvr_bo);
984
1044
}
985
1045
 
986
1046
/* This only uploads the code segment. The data segment will need to be patched
1014
1074
   /* FIXME: According to pvr_device_init_compute_pds_program() the code size
1015
1075
    * is in bytes. Investigate this.
1016
1076
    */
1017
 
   buffer_size = MAX2(program.code_size, program.data_size) * sizeof(*buffer);
 
1077
   buffer_size = PVR_DW_TO_BYTES(MAX2(program.code_size, program.data_size));
1018
1078
 
1019
1079
   buffer = vk_alloc2(&device->vk.alloc,
1020
1080
                      allocator,
1070
1130
   const VkAllocationCallbacks *const allocator,
1071
1131
   struct pvr_pds_base_workgroup_program *const state)
1072
1132
{
1073
 
   pvr_bo_free(device, state->code_upload.pvr_bo);
 
1133
   pvr_bo_suballoc_free(state->code_upload.pvr_bo);
1074
1134
   vk_free2(&device->vk.alloc, allocator, state->data_section);
1075
1135
}
1076
1136
 
1238
1298
      work_group_input_regs,
1239
1299
      barrier_coefficient,
1240
1300
      usc_temps,
1241
 
      compute_pipeline->shader_state.bo->vma->dev_addr,
 
1301
      compute_pipeline->shader_state.bo->dev_addr,
1242
1302
      &compute_pipeline->primary_program,
1243
1303
      &compute_pipeline->primary_program_info);
1244
1304
   if (result != VK_SUCCESS)
1260
1320
         work_group_input_regs,
1261
1321
         barrier_coefficient,
1262
1322
         usc_temps,
1263
 
         compute_pipeline->shader_state.bo->vma->dev_addr,
 
1323
         compute_pipeline->shader_state.bo->dev_addr,
1264
1324
         &compute_pipeline->primary_base_workgroup_variant_program);
1265
1325
      if (result != VK_SUCCESS)
1266
1326
         goto err_destroy_compute_program;
1275
1335
                                   &compute_pipeline->primary_program_info);
1276
1336
 
1277
1337
err_free_descriptor_program:
1278
 
   pvr_bo_free(device, compute_pipeline->descriptor_state.pds_code.pvr_bo);
 
1338
   pvr_pds_descriptor_program_destroy(device,
 
1339
                                      allocator,
 
1340
                                      &compute_pipeline->descriptor_state);
1279
1341
 
1280
1342
err_free_shader:
1281
 
   pvr_bo_free(device, compute_pipeline->shader_state.bo);
 
1343
   pvr_bo_suballoc_free(compute_pipeline->shader_state.bo);
1282
1344
 
1283
1345
   return result;
1284
1346
}
1365
1427
   pvr_pds_descriptor_program_destroy(device,
1366
1428
                                      allocator,
1367
1429
                                      &compute_pipeline->descriptor_state);
1368
 
   pvr_bo_free(device, compute_pipeline->shader_state.bo);
 
1430
   pvr_bo_suballoc_free(compute_pipeline->shader_state.bo);
1369
1431
 
1370
1432
   pvr_pipeline_finish(&compute_pipeline->base);
1371
1433
 
1429
1491
      pvr_pds_vertex_attrib_program_destroy(device, allocator, attrib_program);
1430
1492
   }
1431
1493
 
1432
 
   pvr_bo_free(device,
1433
 
               gfx_pipeline->shader_state.fragment.pds_fragment_program.pvr_bo);
1434
 
   pvr_bo_free(device,
1435
 
               gfx_pipeline->shader_state.fragment.pds_coeff_program.pvr_bo);
 
1494
   pvr_bo_suballoc_free(
 
1495
      gfx_pipeline->shader_state.fragment.pds_fragment_program.pvr_bo);
 
1496
   pvr_bo_suballoc_free(
 
1497
      gfx_pipeline->shader_state.fragment.pds_coeff_program.pvr_bo);
1436
1498
 
1437
 
   pvr_bo_free(device, gfx_pipeline->shader_state.fragment.bo);
1438
 
   pvr_bo_free(device, gfx_pipeline->shader_state.vertex.bo);
 
1499
   pvr_bo_suballoc_free(gfx_pipeline->shader_state.fragment.bo);
 
1500
   pvr_bo_suballoc_free(gfx_pipeline->shader_state.vertex.bo);
1439
1501
 
1440
1502
   pvr_pipeline_finish(&gfx_pipeline->base);
1441
1503
 
1445
1507
static void
1446
1508
pvr_vertex_state_init(struct pvr_graphics_pipeline *gfx_pipeline,
1447
1509
                      const struct rogue_common_build_data *common_data,
 
1510
                      uint32_t vtxin_regs_used,
1448
1511
                      const struct rogue_vs_build_data *vs_data)
1449
1512
{
1450
1513
   struct pvr_vertex_shader_state *vertex_state =
1455
1518
    */
1456
1519
   vertex_state->stage_state.const_shared_reg_count = common_data->shareds;
1457
1520
   vertex_state->stage_state.const_shared_reg_offset = 0;
1458
 
   vertex_state->stage_state.temps_count = common_data->temps;
1459
1521
   vertex_state->stage_state.coefficient_size = common_data->coeffs;
1460
1522
   vertex_state->stage_state.uses_atomic_ops = false;
1461
1523
   vertex_state->stage_state.uses_texture_rw = false;
1463
1525
   vertex_state->stage_state.has_side_effects = false;
1464
1526
   vertex_state->stage_state.empty_program = false;
1465
1527
 
1466
 
   vertex_state->vertex_input_size = vs_data->num_vertex_input_regs;
 
1528
   /* This ends up unused since we'll use the temp_usage for the PDS program we
 
1529
    * end up selecting, and the descriptor PDS program doesn't use any temps.
 
1530
    * Let's set it to ~0 in case it ever gets used.
 
1531
    */
 
1532
   vertex_state->stage_state.pds_temps_count = ~0;
 
1533
 
 
1534
   vertex_state->vertex_input_size = vtxin_regs_used;
1467
1535
   vertex_state->vertex_output_size =
1468
1536
      vs_data->num_vertex_outputs * ROGUE_REG_SIZE_BYTES;
1469
1537
   vertex_state->user_clip_planes_mask = 0;
1502
1570
    */
1503
1571
   fragment_state->stage_state.const_shared_reg_count = 0;
1504
1572
   fragment_state->stage_state.const_shared_reg_offset = 0;
1505
 
   fragment_state->stage_state.temps_count = common_data->temps;
1506
1573
   fragment_state->stage_state.coefficient_size = common_data->coeffs;
1507
1574
   fragment_state->stage_state.uses_atomic_ops = false;
1508
1575
   fragment_state->stage_state.uses_texture_rw = false;
1512
1579
 
1513
1580
   fragment_state->pass_type = PVRX(TA_PASSTYPE_OPAQUE);
1514
1581
   fragment_state->entry_offset = 0;
 
1582
 
 
1583
   /* We can't initialize it yet since we still need to generate the PDS
 
1584
    * programs so set it to `~0` to make sure that we set this up later on.
 
1585
    */
 
1586
   fragment_state->stage_state.pds_temps_count = ~0;
1515
1587
}
1516
1588
 
1517
1589
static bool pvr_blend_factor_requires_consts(VkBlendFactor factor)
1539
1611
static bool pvr_graphics_pipeline_requires_dynamic_blend_consts(
1540
1612
   const struct pvr_graphics_pipeline *gfx_pipeline)
1541
1613
{
1542
 
   const bool has_dynamic_blend_consts =
1543
 
      BITSET_TEST(gfx_pipeline->dynamic_state.set,
1544
 
                  MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS);
 
1614
   const struct vk_dynamic_graphics_state *const state =
 
1615
      &gfx_pipeline->dynamic_state;
1545
1616
 
1546
 
   if (!has_dynamic_blend_consts)
 
1617
   if (BITSET_TEST(state->set, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS))
1547
1618
      return false;
1548
1619
 
1549
 
   for (uint32_t i = 0; i < gfx_pipeline->dynamic_state.cb.attachment_count;
1550
 
        i++) {
 
1620
   for (uint32_t i = 0; i < state->cb.attachment_count; i++) {
1551
1621
      const struct vk_color_blend_attachment_state *attachment =
1552
 
         &gfx_pipeline->dynamic_state.cb.attachments[i];
 
1622
         &state->cb.attachments[i];
1553
1623
 
1554
1624
      const bool has_color_write =
1555
1625
         attachment->write_mask &
1627
1697
 
1628
1698
#undef PVR_DEV_ADDR_SIZE_IN_SH_REGS
1629
1699
 
 
1700
static void pvr_graphics_pipeline_alloc_vertex_inputs(
 
1701
   const VkPipelineVertexInputStateCreateInfo *const vs_data,
 
1702
   rogue_vertex_inputs *const vertex_input_layout_out,
 
1703
   unsigned *num_vertex_input_regs_out,
 
1704
   pvr_pds_attrib_dma_descriptions_array_ptr dma_descriptions_out_ptr,
 
1705
   uint32_t *const dma_count_out)
 
1706
{
 
1707
   const VkVertexInputBindingDescription
 
1708
      *sorted_bindings[PVR_MAX_VERTEX_INPUT_BINDINGS] = { 0 };
 
1709
   const VkVertexInputAttributeDescription
 
1710
      *sorted_attributes[PVR_MAX_VERTEX_INPUT_BINDINGS] = { 0 };
 
1711
 
 
1712
   rogue_vertex_inputs build_data = {
 
1713
      .num_input_vars = vs_data->vertexAttributeDescriptionCount,
 
1714
   };
 
1715
   uint32_t next_reg_offset = 0;
 
1716
 
 
1717
   struct pvr_pds_vertex_dma *const dma_descriptions =
 
1718
      *dma_descriptions_out_ptr;
 
1719
   uint32_t dma_count = 0;
 
1720
 
 
1721
   /* Vertex attributes map to the `layout(location = x)` annotation in the
 
1722
    * shader where `x` is the attribute's location.
 
1723
    * Vertex bindings have NO relation to the shader. They have nothing to do
 
1724
    * with the `layout(set = x, binding = y)` notation. They instead indicate
 
1725
    * where the data for a collection of vertex attributes comes from. The
 
1726
    * application binds a VkBuffer with vkCmdBindVertexBuffers() to a specific
 
1727
    * binding number and based on that we'll know which buffer to DMA the data
 
1728
    * from, to fill in the collection of vertex attributes.
 
1729
    */
 
1730
 
 
1731
   for (uint32_t i = 0; i < vs_data->vertexBindingDescriptionCount; i++) {
 
1732
      const VkVertexInputBindingDescription *binding_desc =
 
1733
         &vs_data->pVertexBindingDescriptions[i];
 
1734
 
 
1735
      sorted_bindings[binding_desc->binding] = binding_desc;
 
1736
   }
 
1737
 
 
1738
   for (uint32_t i = 0; i < vs_data->vertexAttributeDescriptionCount; i++) {
 
1739
      const VkVertexInputAttributeDescription *attribute_desc =
 
1740
         &vs_data->pVertexAttributeDescriptions[i];
 
1741
 
 
1742
      sorted_attributes[attribute_desc->location] = attribute_desc;
 
1743
   }
 
1744
 
 
1745
   for (uint32_t i = 0, j = 0; i < ARRAY_SIZE(sorted_attributes); i++) {
 
1746
      if (sorted_attributes[i])
 
1747
         sorted_attributes[j++] = sorted_attributes[i];
 
1748
   }
 
1749
 
 
1750
   for (uint32_t i = 0; i < vs_data->vertexAttributeDescriptionCount; i++) {
 
1751
      const VkVertexInputAttributeDescription *attribute = sorted_attributes[i];
 
1752
      const VkVertexInputBindingDescription *binding =
 
1753
         sorted_bindings[attribute->binding];
 
1754
      const struct util_format_description *fmt_description =
 
1755
         vk_format_description(attribute->format);
 
1756
      struct pvr_pds_vertex_dma *dma_desc = &dma_descriptions[dma_count];
 
1757
      unsigned vtxin_reg_offset;
 
1758
 
 
1759
      /* Reg allocation. */
 
1760
 
 
1761
      vtxin_reg_offset = next_reg_offset;
 
1762
      build_data.base[i] = vtxin_reg_offset;
 
1763
 
 
1764
      if (fmt_description->colorspace != UTIL_FORMAT_COLORSPACE_RGB ||
 
1765
          fmt_description->layout != UTIL_FORMAT_LAYOUT_PLAIN ||
 
1766
          fmt_description->block.bits % 32 != 0 || !fmt_description->is_array) {
 
1767
         /* For now we only support formats with 32 bit components since we
 
1768
          * don't need to pack/unpack them.
 
1769
          */
 
1770
         /* TODO: Support any other format with VERTEX_BUFFER_BIT set that
 
1771
          * doesn't have 32 bit components if we're advertising any.
 
1772
          */
 
1773
         assert(false);
 
1774
      }
 
1775
 
 
1776
      /* TODO: Check if this is fine with the compiler. Does it want the amount
 
1777
       * of components or does it want a size in dwords to figure out how many
 
1778
       * vtxin regs are covered. For formats with 32 bit components the
 
1779
       * distinction doesn't change anything.
 
1780
       */
 
1781
      build_data.components[i] =
 
1782
         util_format_get_nr_components(fmt_description->format);
 
1783
 
 
1784
      next_reg_offset += build_data.components[i];
 
1785
 
 
1786
      /* DMA setup. */
 
1787
 
 
1788
      /* The PDS program sets up DDMADs to DMA attributes into vtxin regs.
 
1789
       *
 
1790
       * DDMAD -> Multiply, add, and DOUTD (i.e. DMA from that address).
 
1791
       *          DMA source addr = src0 * src1 + src2
 
1792
       *          DMA params = src3
 
1793
       *
 
1794
       * In the PDS program we setup src0 with the binding's stride and src1
 
1795
       * with either the instance id or vertex id (both of which get filled by
 
1796
       * the hardware). We setup src2 later on once we know which VkBuffer to
 
1797
       * DMA the data from so it's saved for later when we patch the data
 
1798
       * section.
 
1799
       */
 
1800
 
 
1801
      /* TODO: Right now we're setting up a DMA per attribute. In a case where
 
1802
       * there are multiple attributes packed into a single binding with
 
1803
       * adjacent locations we'd still be DMAing them separately. This is not
 
1804
       * great so the DMA setup should be smarter and could do with some
 
1805
       * optimization.
 
1806
       */
 
1807
 
 
1808
      *dma_desc = (struct pvr_pds_vertex_dma){ 0 };
 
1809
 
 
1810
      /* In relation to the Vulkan spec. 22.4. Vertex Input Address Calculation
 
1811
       * this corresponds to `attribDesc.offset`.
 
1812
       * The PDS program doesn't do anything with it but just save it in the
 
1813
       * PDS program entry.
 
1814
       */
 
1815
      dma_desc->offset = attribute->offset;
 
1816
 
 
1817
      /* In relation to the Vulkan spec. 22.4. Vertex Input Address Calculation
 
1818
       * this corresponds to `bindingDesc.stride`.
 
1819
       * The PDS program will calculate the `effectiveVertexOffset` with this
 
1820
       * and add it to the address provided in the patched data segment.
 
1821
       */
 
1822
      dma_desc->stride = binding->stride;
 
1823
 
 
1824
      if (binding->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE)
 
1825
         dma_desc->flags = PVR_PDS_VERTEX_DMA_FLAGS_INSTANCE_RATE;
 
1826
      else
 
1827
         dma_desc->flags = 0;
 
1828
 
 
1829
      /* Size to DMA per vertex attribute. Used to setup src3 in the DDMAD. */
 
1830
      assert(fmt_description->block.bits != 0); /* Likely an unsupported fmt. */
 
1831
      dma_desc->size_in_dwords = fmt_description->block.bits / 32;
 
1832
 
 
1833
      /* Vtxin reg offset to start DMAing into. */
 
1834
      dma_desc->destination = vtxin_reg_offset;
 
1835
 
 
1836
      /* Will be used by the driver to figure out buffer address to patch in the
 
1837
       * data section. I.e. which binding we should DMA from.
 
1838
       */
 
1839
      dma_desc->binding_index = attribute->binding;
 
1840
 
 
1841
      /* We don't currently support VK_EXT_vertex_attribute_divisor so no
 
1842
       * repeating of instance-rate vertex attributes needed. We should always
 
1843
       * move on to the next vertex attribute.
 
1844
       */
 
1845
      dma_desc->divisor = 1;
 
1846
 
 
1847
      /* Will be used to generate PDS code that takes care of robust buffer
 
1848
       * access, and later on by the driver to write the correct robustness
 
1849
       * buffer address to DMA the fallback values from.
 
1850
       */
 
1851
      dma_desc->robustness_buffer_offset =
 
1852
         pvr_get_robustness_buffer_format_offset(attribute->format);
 
1853
 
 
1854
      /* Used by later on by the driver to figure out if the buffer is being
 
1855
       * accessed out of bounds, for robust buffer access.
 
1856
       */
 
1857
      dma_desc->component_size_in_bytes =
 
1858
         fmt_description->block.bits / fmt_description->nr_channels / 8;
 
1859
 
 
1860
      dma_count++;
 
1861
   };
 
1862
 
 
1863
   *vertex_input_layout_out = build_data;
 
1864
   *num_vertex_input_regs_out = next_reg_offset;
 
1865
   *dma_count_out = dma_count;
 
1866
}
 
1867
 
 
1868
static void pvr_graphics_pipeline_alloc_vertex_special_vars(
 
1869
   unsigned *num_vertex_input_regs,
 
1870
   struct pvr_vertex_special_vars *special_vars_layout_out)
 
1871
{
 
1872
   unsigned next_free_reg = *num_vertex_input_regs;
 
1873
   struct pvr_vertex_special_vars layout;
 
1874
 
 
1875
   /* We don't support VK_KHR_shader_draw_parameters or Vulkan 1.1 so no
 
1876
    * BaseInstance, BaseVertex, DrawIndex.
 
1877
    */
 
1878
 
 
1879
   /* TODO: The shader might not necessarily be using this so we'd just be
 
1880
    * wasting regs. Get the info from the compiler about whether or not the
 
1881
    * shader uses them and allocate them accordingly. For now we'll set them up
 
1882
    * regardless.
 
1883
    */
 
1884
 
 
1885
   layout.vertex_id_offset = (int16_t)next_free_reg;
 
1886
   next_free_reg++;
 
1887
 
 
1888
   layout.instance_id_offset = (int16_t)next_free_reg;
 
1889
   next_free_reg++;
 
1890
 
 
1891
   *num_vertex_input_regs = next_free_reg;
 
1892
   *special_vars_layout_out = layout;
 
1893
}
 
1894
 
1630
1895
/* Compiles and uploads shaders and PDS programs. */
1631
1896
static VkResult
1632
1897
pvr_graphics_pipeline_compile(struct pvr_device *const device,
1660
1925
   const bool old_path = pvr_has_hard_coded_shaders(&device->pdevice->dev_info);
1661
1926
 
1662
1927
   /* Vars needed for the new path. */
 
1928
   struct pvr_pds_vertex_dma vtx_dma_descriptions[PVR_MAX_VERTEX_ATTRIB_DMAS];
 
1929
   uint32_t vtx_dma_count = 0;
 
1930
   rogue_vertex_inputs *vertex_input_layout;
 
1931
   unsigned *vertex_input_reg_count;
 
1932
 
 
1933
   /* TODO: The compiler should be making use of this to determine where
 
1934
    * specific special variables are located in the vtxin reg set.
 
1935
    */
 
1936
   struct pvr_vertex_special_vars special_vars_layout = { 0 };
 
1937
 
1663
1938
   uint32_t sh_count[PVR_STAGE_ALLOCATION_COUNT] = { 0 };
1664
1939
 
1665
 
   if (!old_path)
 
1940
   /* Setup shared build context. */
 
1941
   ctx = rogue_build_context_create(compiler, layout);
 
1942
   if (!ctx)
 
1943
      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
 
1944
 
 
1945
   vertex_input_layout = &ctx->stage_data.vs.inputs;
 
1946
   vertex_input_reg_count = &ctx->stage_data.vs.num_vertex_input_regs;
 
1947
 
 
1948
   if (!old_path) {
 
1949
      pvr_graphics_pipeline_alloc_vertex_inputs(vertex_input_state,
 
1950
                                                vertex_input_layout,
 
1951
                                                vertex_input_reg_count,
 
1952
                                                &vtx_dma_descriptions,
 
1953
                                                &vtx_dma_count);
 
1954
 
 
1955
      pvr_graphics_pipeline_alloc_vertex_special_vars(vertex_input_reg_count,
 
1956
                                                      &special_vars_layout);
 
1957
 
1666
1958
      for (enum pvr_stage_allocation pvr_stage =
1667
1959
              PVR_STAGE_ALLOCATION_VERTEX_GEOMETRY;
1668
1960
           pvr_stage < PVR_STAGE_ALLOCATION_COMPUTE;
1672
1964
            layout,
1673
1965
            pvr_stage,
1674
1966
            &layout->sh_reg_layout_per_stage[pvr_stage]);
1675
 
 
1676
 
   /* Setup shared build context. */
1677
 
   ctx = rogue_build_context_create(compiler, layout);
1678
 
   if (!ctx)
1679
 
      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
 
1967
   }
1680
1968
 
1681
1969
   /* NIR middle-end translation. */
1682
1970
   for (gl_shader_stage stage = MESA_SHADER_FRAGMENT; stage > MESA_SHADER_NONE;
1774
2062
   } else {
1775
2063
      pvr_vertex_state_init(gfx_pipeline,
1776
2064
                            &ctx->common_data[MESA_SHADER_VERTEX],
 
2065
                            *vertex_input_reg_count,
1777
2066
                            &ctx->stage_data.vs);
1778
2067
 
1779
2068
      if (!old_path) {
1786
2075
          */
1787
2076
         vertex_state->stage_state.const_shared_reg_count =
1788
2077
            sh_count[PVR_STAGE_ALLOCATION_VERTEX_GEOMETRY];
 
2078
 
 
2079
         gfx_pipeline->shader_state.vertex.vertex_input_size =
 
2080
            ctx->stage_data.vs.num_vertex_input_regs;
1789
2081
      }
1790
2082
   }
1791
2083
 
1792
 
   result = pvr_gpu_upload_usc(device,
1793
 
                               ctx->binary[MESA_SHADER_VERTEX].data,
1794
 
                               ctx->binary[MESA_SHADER_VERTEX].size,
1795
 
                               cache_line_size,
1796
 
                               &gfx_pipeline->shader_state.vertex.bo);
 
2084
   result =
 
2085
      pvr_gpu_upload_usc(device,
 
2086
                         util_dynarray_begin(&ctx->binary[MESA_SHADER_VERTEX]),
 
2087
                         ctx->binary[MESA_SHADER_VERTEX].size,
 
2088
                         cache_line_size,
 
2089
                         &gfx_pipeline->shader_state.vertex.bo);
1797
2090
   if (result != VK_SUCCESS)
1798
2091
      goto err_free_build_context;
1799
2092
 
1800
 
   if (pvr_has_hard_coded_shaders(&device->pdevice->dev_info) &&
1801
 
       pvr_hard_code_graphics_get_flags(&device->pdevice->dev_info) &
1802
 
          BITFIELD_BIT(MESA_SHADER_FRAGMENT)) {
1803
 
      pvr_hard_code_graphics_fragment_state(
1804
 
         &device->pdevice->dev_info,
1805
 
         hard_code_pipeline_n,
1806
 
         &gfx_pipeline->shader_state.fragment);
1807
 
   } else {
1808
 
      pvr_fragment_state_init(gfx_pipeline,
1809
 
                              &ctx->common_data[MESA_SHADER_FRAGMENT]);
1810
 
 
1811
 
      if (!old_path) {
1812
 
         struct pvr_fragment_shader_state *fragment_state =
1813
 
            &gfx_pipeline->shader_state.fragment;
1814
 
 
1815
 
         /* FIXME: For now we just overwrite it but the compiler shouldn't be
1816
 
          * returning the sh count since the driver is in charge of allocating
1817
 
          * them.
1818
 
          */
1819
 
         fragment_state->stage_state.const_shared_reg_count =
1820
 
            sh_count[PVR_STAGE_ALLOCATION_FRAGMENT];
 
2093
   if (ctx->nir[MESA_SHADER_FRAGMENT]) {
 
2094
      struct pvr_fragment_shader_state *fragment_state =
 
2095
         &gfx_pipeline->shader_state.fragment;
 
2096
 
 
2097
      if (pvr_has_hard_coded_shaders(&device->pdevice->dev_info) &&
 
2098
          pvr_hard_code_graphics_get_flags(&device->pdevice->dev_info) &
 
2099
             BITFIELD_BIT(MESA_SHADER_FRAGMENT)) {
 
2100
         pvr_hard_code_graphics_fragment_state(
 
2101
            &device->pdevice->dev_info,
 
2102
            hard_code_pipeline_n,
 
2103
            &gfx_pipeline->shader_state.fragment);
 
2104
      } else {
 
2105
         pvr_fragment_state_init(gfx_pipeline,
 
2106
                                 &ctx->common_data[MESA_SHADER_FRAGMENT]);
 
2107
 
 
2108
         if (!old_path) {
 
2109
            /* FIXME: For now we just overwrite it but the compiler shouldn't be
 
2110
             * returning the sh count since the driver is in charge of
 
2111
             * allocating them.
 
2112
             */
 
2113
            fragment_state->stage_state.const_shared_reg_count =
 
2114
               sh_count[PVR_STAGE_ALLOCATION_FRAGMENT];
 
2115
         }
1821
2116
      }
 
2117
 
 
2118
      result = pvr_gpu_upload_usc(
 
2119
         device,
 
2120
         util_dynarray_begin(&ctx->binary[MESA_SHADER_FRAGMENT]),
 
2121
         ctx->binary[MESA_SHADER_FRAGMENT].size,
 
2122
         cache_line_size,
 
2123
         &gfx_pipeline->shader_state.fragment.bo);
 
2124
      if (result != VK_SUCCESS)
 
2125
         goto err_free_vertex_bo;
 
2126
 
 
2127
      /* TODO: powervr has an optimization where it attempts to recompile
 
2128
       * shaders. See PipelineCompileNoISPFeedbackFragmentStage. Unimplemented
 
2129
       * since in our case the optimization doesn't happen.
 
2130
       */
 
2131
 
 
2132
      result = pvr_pds_coeff_program_create_and_upload(
 
2133
         device,
 
2134
         allocator,
 
2135
         ctx->stage_data.fs.iterator_args.fpu_iterators,
 
2136
         ctx->stage_data.fs.iterator_args.num_fpu_iterators,
 
2137
         ctx->stage_data.fs.iterator_args.destination,
 
2138
         &fragment_state->pds_coeff_program,
 
2139
         &fragment_state->stage_state.pds_temps_count);
 
2140
      if (result != VK_SUCCESS)
 
2141
         goto err_free_fragment_bo;
 
2142
 
 
2143
      result = pvr_pds_fragment_program_create_and_upload(
 
2144
         device,
 
2145
         allocator,
 
2146
         gfx_pipeline->shader_state.fragment.bo,
 
2147
         ctx->common_data[MESA_SHADER_FRAGMENT].temps,
 
2148
         ctx->stage_data.fs.msaa_mode,
 
2149
         ctx->stage_data.fs.phas,
 
2150
         &fragment_state->pds_fragment_program);
 
2151
      if (result != VK_SUCCESS)
 
2152
         goto err_free_coeff_program;
 
2153
 
 
2154
      /* FIXME: For now we pass in the same explicit_const_usage since it
 
2155
       * contains all invalid entries. Fix this by hooking it up to the
 
2156
       * compiler.
 
2157
       */
 
2158
      result = pvr_pds_descriptor_program_create_and_upload(
 
2159
         device,
 
2160
         allocator,
 
2161
         &ctx->common_data[MESA_SHADER_FRAGMENT].compile_time_consts_data,
 
2162
         &ctx->common_data[MESA_SHADER_FRAGMENT].ubo_data,
 
2163
         &frag_explicit_const_usage,
 
2164
         layout,
 
2165
         PVR_STAGE_ALLOCATION_FRAGMENT,
 
2166
         sh_reg_layout_frag,
 
2167
         &fragment_state->descriptor_state);
 
2168
      if (result != VK_SUCCESS)
 
2169
         goto err_free_frag_program;
 
2170
 
 
2171
      /* If not, we need to MAX2() and set
 
2172
       * `fragment_state->stage_state.pds_temps_count` appropriately.
 
2173
       */
 
2174
      assert(fragment_state->descriptor_state.pds_info.temps_required == 0);
1822
2175
   }
1823
2176
 
1824
 
   result = pvr_gpu_upload_usc(device,
1825
 
                               ctx->binary[MESA_SHADER_FRAGMENT].data,
1826
 
                               ctx->binary[MESA_SHADER_FRAGMENT].size,
1827
 
                               cache_line_size,
1828
 
                               &gfx_pipeline->shader_state.fragment.bo);
1829
 
   if (result != VK_SUCCESS)
1830
 
      goto err_free_vertex_bo;
1831
 
 
1832
 
   /* TODO: powervr has an optimization where it attempts to recompile shaders.
1833
 
    * See PipelineCompileNoISPFeedbackFragmentStage. Unimplemented since in our
1834
 
    * case the optimization doesn't happen.
1835
 
    */
1836
 
 
1837
 
   result = pvr_pds_coeff_program_create_and_upload(
1838
 
      device,
1839
 
      allocator,
1840
 
      ctx->stage_data.fs.iterator_args.fpu_iterators,
1841
 
      ctx->stage_data.fs.iterator_args.num_fpu_iterators,
1842
 
      ctx->stage_data.fs.iterator_args.destination,
1843
 
      &gfx_pipeline->shader_state.fragment.pds_coeff_program);
1844
 
   if (result != VK_SUCCESS)
1845
 
      goto err_free_fragment_bo;
1846
 
 
1847
 
   result = pvr_pds_fragment_program_create_and_upload(
1848
 
      device,
1849
 
      allocator,
1850
 
      gfx_pipeline->shader_state.fragment.bo,
1851
 
      ctx->common_data[MESA_SHADER_FRAGMENT].temps,
1852
 
      ctx->stage_data.fs.msaa_mode,
1853
 
      ctx->stage_data.fs.phas,
1854
 
      &gfx_pipeline->shader_state.fragment.pds_fragment_program);
1855
 
   if (result != VK_SUCCESS)
1856
 
      goto err_free_coeff_program;
1857
 
 
1858
2177
   result = pvr_pds_vertex_attrib_programs_create_and_upload(
1859
2178
      device,
1860
2179
      allocator,
1861
2180
      vertex_input_state,
1862
2181
      ctx->common_data[MESA_SHADER_VERTEX].temps,
1863
2182
      &ctx->stage_data.vs,
 
2183
      vtx_dma_descriptions,
 
2184
      vtx_dma_count,
 
2185
      &special_vars_layout,
1864
2186
      &gfx_pipeline->shader_state.vertex.pds_attrib_programs);
1865
2187
   if (result != VK_SUCCESS)
1866
 
      goto err_free_frag_program;
 
2188
      goto err_free_frag_descriptor_program;
1867
2189
 
1868
2190
   result = pvr_pds_descriptor_program_create_and_upload(
1869
2191
      device,
1885
2207
   /* assert(pvr_pds_descriptor_program_variables.temp_buff_total_size == 0); */
1886
2208
   /* TODO: Implement spilling with the above. */
1887
2209
 
1888
 
   /* FIXME: For now we pass in the same explicit_const_usage since it contains
1889
 
    * all invalid entries. Fix this by hooking it up to the compiler.
1890
 
    */
1891
 
   result = pvr_pds_descriptor_program_create_and_upload(
1892
 
      device,
1893
 
      allocator,
1894
 
      &ctx->common_data[MESA_SHADER_FRAGMENT].compile_time_consts_data,
1895
 
      &ctx->common_data[MESA_SHADER_FRAGMENT].ubo_data,
1896
 
      &frag_explicit_const_usage,
1897
 
      layout,
1898
 
      PVR_STAGE_ALLOCATION_FRAGMENT,
1899
 
      sh_reg_layout_frag,
1900
 
      &gfx_pipeline->shader_state.fragment.descriptor_state);
1901
 
   if (result != VK_SUCCESS)
1902
 
      goto err_free_vertex_descriptor_program;
1903
 
 
1904
2210
   ralloc_free(ctx);
1905
2211
 
1906
2212
   hard_code_pipeline_n++;
1907
2213
 
1908
2214
   return VK_SUCCESS;
1909
2215
 
1910
 
err_free_vertex_descriptor_program:
1911
 
   pvr_pds_descriptor_program_destroy(
1912
 
      device,
1913
 
      allocator,
1914
 
      &gfx_pipeline->shader_state.vertex.descriptor_state);
1915
2216
err_free_vertex_attrib_program:
1916
2217
   for (uint32_t i = 0;
1917
2218
        i < ARRAY_SIZE(gfx_pipeline->shader_state.vertex.pds_attrib_programs);
1921
2222
 
1922
2223
      pvr_pds_vertex_attrib_program_destroy(device, allocator, attrib_program);
1923
2224
   }
 
2225
err_free_frag_descriptor_program:
 
2226
   pvr_pds_descriptor_program_destroy(
 
2227
      device,
 
2228
      allocator,
 
2229
      &gfx_pipeline->shader_state.fragment.descriptor_state);
1924
2230
err_free_frag_program:
1925
 
   pvr_bo_free(device,
1926
 
               gfx_pipeline->shader_state.fragment.pds_fragment_program.pvr_bo);
 
2231
   pvr_bo_suballoc_free(
 
2232
      gfx_pipeline->shader_state.fragment.pds_fragment_program.pvr_bo);
1927
2233
err_free_coeff_program:
1928
 
   pvr_bo_free(device,
1929
 
               gfx_pipeline->shader_state.fragment.pds_coeff_program.pvr_bo);
 
2234
   pvr_bo_suballoc_free(
 
2235
      gfx_pipeline->shader_state.fragment.pds_coeff_program.pvr_bo);
1930
2236
err_free_fragment_bo:
1931
 
   pvr_bo_free(device, gfx_pipeline->shader_state.fragment.bo);
 
2237
   pvr_bo_suballoc_free(gfx_pipeline->shader_state.fragment.bo);
1932
2238
err_free_vertex_bo:
1933
 
   pvr_bo_free(device, gfx_pipeline->shader_state.vertex.bo);
 
2239
   pvr_bo_suballoc_free(gfx_pipeline->shader_state.vertex.bo);
1934
2240
err_free_build_context:
1935
2241
   ralloc_free(ctx);
1936
2242
   return result;
1937
2243
}
1938
2244
 
1939
 
static struct vk_subpass_info
1940
 
pvr_create_subpass_info(const VkGraphicsPipelineCreateInfo *const info)
 
2245
static struct vk_render_pass_state
 
2246
pvr_create_renderpass_state(const VkGraphicsPipelineCreateInfo *const info)
1941
2247
{
1942
2248
   PVR_FROM_HANDLE(pvr_render_pass, pass, info->renderPass);
1943
2249
   const struct pvr_render_subpass *const subpass =
1957
2263
         pass->attachments[subpass->depth_stencil_attachment].aspects;
1958
2264
   }
1959
2265
 
1960
 
   return (struct vk_subpass_info){
 
2266
   return (struct vk_render_pass_state){
1961
2267
      .attachment_aspects = attachment_aspects,
 
2268
      .render_pass = info->renderPass,
 
2269
      .subpass = info->subpass,
1962
2270
 
1963
2271
      /* TODO: This is only needed for VK_KHR_create_renderpass2 (or core 1.2),
1964
2272
       * which is not currently supported.
1976
2284
{
1977
2285
   struct vk_dynamic_graphics_state *const dynamic_state =
1978
2286
      &gfx_pipeline->dynamic_state;
1979
 
   const struct vk_subpass_info sp_info = pvr_create_subpass_info(pCreateInfo);
 
2287
   const struct vk_render_pass_state rp_state = pvr_create_renderpass_state(pCreateInfo);
1980
2288
 
1981
2289
   struct vk_graphics_pipeline_all_state all_state;
1982
2290
   struct vk_graphics_pipeline_state state = { 0 };
1988
2296
   result = vk_graphics_pipeline_state_fill(&device->vk,
1989
2297
                                            &state,
1990
2298
                                            pCreateInfo,
1991
 
                                            &sp_info,
 
2299
                                            &rp_state,
1992
2300
                                            &all_state,
1993
2301
                                            NULL,
1994
2302
                                            0,