2
* Copyright © 2012-2018 Intel Corporation
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24
/** @file brw_eu_compact.c
26
* Instruction compaction is a feature of G45 and newer hardware that allows
27
* for a smaller instruction encoding.
29
* The instruction cache is on the order of 32KB, and many programs generate
30
* far more instructions than that. The instruction cache is built to barely
31
* keep up with instruction dispatch ability in cache hit cases -- L1
32
* instruction cache misses that still hit in the next level could limit
33
* throughput by around 50%.
35
* The idea of instruction compaction is that most instructions use a tiny
36
* subset of the GPU functionality, so we can encode what would be a 16 byte
37
* instruction in 8 bytes using some lookup tables for various fields.
40
* Instruction compaction capabilities vary subtly by generation.
42
* G45's support for instruction compaction is very limited. Jump counts on
43
* this generation are in units of 16-byte uncompacted instructions. As such,
44
* all jump targets must be 16-byte aligned. Also, all instructions must be
45
* naturally aligned, i.e. uncompacted instructions must be 16-byte aligned.
46
* A G45-only instruction, NENOP, must be used to provide padding to align
47
* uncompacted instructions.
49
* Gfx5 removes these restrictions and changes jump counts to be in units of
50
* 8-byte compacted instructions, allowing jump targets to be only 8-byte
51
* aligned. Uncompacted instructions can also be placed on 8-byte boundaries.
53
* Gfx6 adds the ability to compact instructions with a limited range of
54
* immediate values. Compactable immediates have 12 unrestricted bits, and a
55
* 13th bit that's replicated through the high 20 bits, to create the 32-bit
56
* value of DW3 in the uncompacted instruction word.
58
* On Gfx7 we can compact some control flow instructions with a small positive
59
* immediate in the low bits of DW3, like ENDIF with the JIP field. Other
60
* control flow instructions with UIP cannot be compacted, because of the
61
* replicated 13th bit. No control flow instructions can be compacted on Gfx6
62
* since the jump count field is not in DW3.
68
* else JIP (plus UIP on BDW+)
70
* while JIP (must be negative)
72
* Gen 8 adds support for compacting 3-src instructions.
74
* Gfx12 reduces the number of bits that available to compacted immediates from
75
* 13 to 12, but improves the compaction of floating-point immediates by
76
* allowing the high bits to be encoded (the sign, 8-bit exponent, and the
77
* three most significant bits of the mantissa), rather than the lowest bits of
82
#include "brw_shader.h"
83
#include "brw_disasm_info.h"
84
#include "dev/intel_debug.h"
86
static const uint32_t g45_control_index_table[32] = {
121
static const uint32_t g45_datatype_table[32] = {
122
0b001000000000100001,
123
0b001011010110101101,
124
0b001000001000110001,
125
0b001111011110111101,
126
0b001011010110101100,
127
0b001000000110101101,
128
0b001000000000100000,
129
0b010100010110110001,
130
0b001100011000101101,
131
0b001000000000100010,
132
0b001000001000110110,
133
0b010000001000110001,
134
0b001000001000110010,
135
0b011000001000110010,
136
0b001111011110111100,
137
0b001000000100101000,
138
0b010100011000110001,
139
0b001010010100101001,
140
0b001000001000101001,
141
0b010000001000110110,
142
0b101000001000110001,
143
0b001011011000101101,
144
0b001000000100001001,
145
0b001011011000101100,
146
0b110100011000110001,
147
0b001000001110111101,
148
0b110000001000110001,
149
0b011000000100101010,
150
0b101000001000101001,
151
0b001011010110001100,
152
0b001000000110100001,
153
0b001010010100001000,
156
static const uint16_t g45_subreg_table[32] = {
191
static const uint16_t g45_src_index_table[32] = {
226
static const uint32_t gfx6_control_index_table[32] = {
261
static const uint32_t gfx6_datatype_table[32] = {
262
0b001001110000000000,
263
0b001000110000100000,
264
0b001001110000000001,
265
0b001000000001100000,
266
0b001010110100101001,
267
0b001000000110101101,
268
0b001100011000101100,
269
0b001011110110101101,
270
0b001000000111101100,
271
0b001000000001100001,
272
0b001000110010100101,
273
0b001000000001000001,
274
0b001000001000110001,
275
0b001000001000101001,
276
0b001000000000100000,
277
0b001000001000110010,
278
0b001010010100101001,
279
0b001011010010100101,
280
0b001000000110100101,
281
0b001100011000101001,
282
0b001011011000101100,
283
0b001011010110100101,
284
0b001011110110100101,
285
0b001111011110111101,
286
0b001111011110111100,
287
0b001111011110111101,
288
0b001111011110011101,
289
0b001111011110111110,
290
0b001000000000100001,
291
0b001000000000100010,
292
0b001001111111011101,
293
0b001000001110111110,
296
static const uint16_t gfx6_subreg_table[32] = {
331
static const uint16_t gfx6_src_index_table[32] = {
366
static const uint32_t gfx7_control_index_table[32] = {
367
0b0000000000000000010,
368
0b0000100000000000000,
369
0b0000100000000000001,
370
0b0000100000000000010,
371
0b0000100000000000011,
372
0b0000100000000000100,
373
0b0000100000000000101,
374
0b0000100000000000111,
375
0b0000100000000001000,
376
0b0000100000000001001,
377
0b0000100000000001101,
378
0b0000110000000000000,
379
0b0000110000000000001,
380
0b0000110000000000010,
381
0b0000110000000000011,
382
0b0000110000000000100,
383
0b0000110000000000101,
384
0b0000110000000000111,
385
0b0000110000000001001,
386
0b0000110000000001101,
387
0b0000110000000010000,
388
0b0000110000100000000,
389
0b0001000000000000000,
390
0b0001000000000000010,
391
0b0001000000000000100,
392
0b0001000000100000000,
393
0b0010110000000000000,
394
0b0010110000000010000,
395
0b0011000000000000000,
396
0b0011000000100000000,
397
0b0101000000000000000,
398
0b0101000000100000000,
401
static const uint32_t gfx7_datatype_table[32] = {
402
0b001000000000000001,
403
0b001000000000100000,
404
0b001000000000100001,
405
0b001000000001100001,
406
0b001000000010111101,
407
0b001000001011111101,
408
0b001000001110100001,
409
0b001000001110100101,
410
0b001000001110111101,
411
0b001000010000100001,
412
0b001000110000100000,
413
0b001000110000100001,
414
0b001001010010100101,
415
0b001001110010100100,
416
0b001001110010100101,
417
0b001111001110111101,
418
0b001111011110011101,
419
0b001111011110111100,
420
0b001111011110111101,
421
0b001111111110111100,
422
0b000000001000001100,
423
0b001000000000111101,
424
0b001000000010100101,
425
0b001000010000100000,
426
0b001001010010100100,
427
0b001001110010000100,
428
0b001010010100001001,
429
0b001101111110111101,
430
0b001111111110111101,
431
0b001011110110101100,
432
0b001010010100101000,
433
0b001010110100101000,
436
static const uint16_t gfx7_subreg_table[32] = {
471
static const uint16_t gfx7_src_index_table[32] = {
506
static const uint32_t gfx8_control_index_table[32] = {
507
0b0000000000000000010,
508
0b0000100000000000000,
509
0b0000100000000000001,
510
0b0000100000000000010,
511
0b0000100000000000011,
512
0b0000100000000000100,
513
0b0000100000000000101,
514
0b0000100000000000111,
515
0b0000100000000001000,
516
0b0000100000000001001,
517
0b0000100000000001101,
518
0b0000110000000000000,
519
0b0000110000000000001,
520
0b0000110000000000010,
521
0b0000110000000000011,
522
0b0000110000000000100,
523
0b0000110000000000101,
524
0b0000110000000000111,
525
0b0000110000000001001,
526
0b0000110000000001101,
527
0b0000110000000010000,
528
0b0000110000100000000,
529
0b0001000000000000000,
530
0b0001000000000000010,
531
0b0001000000000000100,
532
0b0001000000100000000,
533
0b0010110000000000000,
534
0b0010110000000010000,
535
0b0011000000000000000,
536
0b0011000000100000000,
537
0b0101000000000000000,
538
0b0101000000100000000,
541
static const uint32_t gfx8_datatype_table[32] = {
542
0b001000000000000000001,
543
0b001000000000001000000,
544
0b001000000000001000001,
545
0b001000000000011000001,
546
0b001000000000101011101,
547
0b001000000010111011101,
548
0b001000000011101000001,
549
0b001000000011101000101,
550
0b001000000011101011101,
551
0b001000001000001000001,
552
0b001000011000001000000,
553
0b001000011000001000001,
554
0b001000101000101000101,
555
0b001000111000101000100,
556
0b001000111000101000101,
557
0b001011100011101011101,
558
0b001011101011100011101,
559
0b001011101011101011100,
560
0b001011101011101011101,
561
0b001011111011101011100,
562
0b000000000010000001100,
563
0b001000000000001011101,
564
0b001000000000101000101,
565
0b001000001000001000000,
566
0b001000101000101000100,
567
0b001000111000100000100,
568
0b001001001001000001001,
569
0b001010111011101011101,
570
0b001011111011101011101,
571
0b001001111001101001100,
572
0b001001001001001001000,
573
0b001001011001001001000,
576
static const uint16_t gfx8_subreg_table[32] = {
611
static const uint16_t gfx8_src_index_table[32] = {
646
static const uint32_t gfx11_datatype_table[32] = {
647
0b001000000000000000001,
648
0b001000000000001000000,
649
0b001000000000001000001,
650
0b001000000000011000001,
651
0b001000000000101100101,
652
0b001000000101111100101,
653
0b001000000100101000001,
654
0b001000000100101000101,
655
0b001000000100101100101,
656
0b001000001000001000001,
657
0b001000011000001000000,
658
0b001000011000001000001,
659
0b001000101000101000101,
660
0b001000111000101000100,
661
0b001000111000101000101,
662
0b001100100100101100101,
663
0b001100101100100100101,
664
0b001100101100101100100,
665
0b001100101100101100101,
666
0b001100111100101100100,
667
0b000000000010000001100,
668
0b001000000000001100101,
669
0b001000000000101000101,
670
0b001000001000001000000,
671
0b001000101000101000100,
672
0b001000111000100000100,
673
0b001001001001000001001,
674
0b001101111100101100101,
675
0b001100111100101100101,
676
0b001001111001101001100,
677
0b001001001001001001000,
678
0b001001011001001001000,
681
static const uint32_t gfx12_control_index_table[32] = {
682
0b000000000000000000100, /* (16|M0) */
683
0b000000000000000000011, /* (8|M0) */
684
0b000000010000000000000, /* (W) (1|M0) */
685
0b000000010000000000100, /* (W) (16|M0) */
686
0b000000010000000000011, /* (W) (8|M0) */
687
0b010000000000000000100, /* (16|M0) (ge)f0.0 */
688
0b000000000000000100100, /* (16|M16) */
689
0b010100000000000000100, /* (16|M0) (lt)f0.0 */
690
0b000000000000000000000, /* (1|M0) */
691
0b000010000000000000100, /* (16|M0) (sat) */
692
0b000000000000000010011, /* (8|M8) */
693
0b001100000000000000100, /* (16|M0) (gt)f0.0 */
694
0b000100000000000000100, /* (16|M0) (eq)f0.0 */
695
0b000100010000000000100, /* (W) (16|M0) (eq)f0.0 */
696
0b001000000000000000100, /* (16|M0) (ne)f0.0 */
697
0b000000000000100000100, /* (f0.0) (16|M0) */
698
0b010100000000000000011, /* (8|M0) (lt)f0.0 */
699
0b000000000000110000100, /* (f1.0) (16|M0) */
700
0b000000010000000000001, /* (W) (2|M0) */
701
0b000000000000101000100, /* (f0.1) (16|M0) */
702
0b000000000000111000100, /* (f1.1) (16|M0) */
703
0b010000010000000000100, /* (W) (16|M0) (ge)f0.0 */
704
0b000000000000000100011, /* (8|M16) */
705
0b000000000000000110011, /* (8|M24) */
706
0b010100010000000000100, /* (W) (16|M0) (lt)f0.0 */
707
0b010000000000000000011, /* (8|M0) (ge)f0.0 */
708
0b000100010000000000000, /* (W) (1|M0) (eq)f0.0 */
709
0b000010000000000000011, /* (8|M0) (sat) */
710
0b010100000000010000100, /* (16|M0) (lt)f1.0 */
711
0b000100000000000000011, /* (8|M0) (eq)f0.0 */
712
0b000001000000000000011, /* (8|M0) {AccWrEn} */
713
0b000000010000000100100, /* (W) (16|M16) */
716
static const uint32_t gfx12_datatype_table[32] = {
717
0b11010110100101010100, /* grf<1>:f grf:f grf:f */
718
0b00000110100101010100, /* grf<1>:f grf:f arf:ub */
719
0b00000010101101010100, /* grf<1>:f imm:f arf:ub */
720
0b01010110110101010100, /* grf<1>:f grf:f imm:f */
721
0b11010100100101010100, /* arf<1>:f grf:f grf:f */
722
0b11010010100101010100, /* grf<1>:f arf:f grf:f */
723
0b01010100110101010100, /* arf<1>:f grf:f imm:f */
724
0b00000000100000000000, /* arf<1>:ub arf:ub arf:ub */
725
0b11010000100101010100, /* arf<1>:f arf:f grf:f */
726
0b00101110110011001100, /* grf<1>:d grf:d imm:w */
727
0b10110110100011001100, /* grf<1>:d grf:d grf:d */
728
0b01010010110101010100, /* grf<1>:f arf:f imm:f */
729
0b10010110100001000100, /* grf<1>:ud grf:ud grf:ud */
730
0b01010000110101010100, /* arf<1>:f arf:f imm:f */
731
0b00110110110011001100, /* grf<1>:d grf:d imm:d */
732
0b00010110110001000100, /* grf<1>:ud grf:ud imm:ud */
733
0b00000111000101010100, /* grf<2>:f grf:f arf:ub */
734
0b00101100110011001100, /* arf<1>:d grf:d imm:w */
735
0b00000000100000100010, /* arf<1>:uw arf:uw arf:ub */
736
0b00000010100001000100, /* grf<1>:ud arf:ud arf:ub */
737
0b00100110110000101010, /* grf<1>:w grf:uw imm:uv */
738
0b00001110110000100010, /* grf<1>:uw grf:uw imm:uw */
739
0b10010111000001000100, /* grf<2>:ud grf:ud grf:ud */
740
0b00000110100101001100, /* grf<1>:d grf:f arf:ub */
741
0b10001100100011001100, /* arf<1>:d grf:d grf:uw */
742
0b00000110100001010100, /* grf<1>:f grf:ud arf:ub */
743
0b00101110110001001100, /* grf<1>:d grf:ud imm:w */
744
0b00000010100000100010, /* grf<1>:uw arf:uw arf:ub */
745
0b00000110100000110100, /* grf<1>:f grf:uw arf:ub */
746
0b00000110100000010100, /* grf<1>:f grf:ub arf:ub */
747
0b00000110100011010100, /* grf<1>:f grf:d arf:ub */
748
0b00000010100101010100, /* grf<1>:f arf:f arf:ub */
751
static const uint16_t gfx12_subreg_table[32] = {
752
0b000000000000000, /* .0 .0 .0 */
753
0b100000000000000, /* .0 .0 .16 */
754
0b001000000000000, /* .0 .0 .4 */
755
0b011000000000000, /* .0 .0 .12 */
756
0b000000010000000, /* .0 .4 .0 */
757
0b010000000000000, /* .0 .0 .8 */
758
0b101000000000000, /* .0 .0 .20 */
759
0b000000000001000, /* .8 .0 .0 */
760
0b000000100000000, /* .0 .8 .0 */
761
0b110000000000000, /* .0 .0 .24 */
762
0b111000000000000, /* .0 .0 .28 */
763
0b000001000000000, /* .0 .16 .0 */
764
0b000000000000100, /* .4 .0 .0 */
765
0b000001100000000, /* .0 .24 .0 */
766
0b000001010000000, /* .0 .20 .0 */
767
0b000000110000000, /* .0 .12 .0 */
768
0b000001110000000, /* .0 .28 .0 */
769
0b000000000011100, /* .28 .0 .0 */
770
0b000000000010000, /* .16 .0 .0 */
771
0b000000000001100, /* .12 .0 .0 */
772
0b000000000011000, /* .24 .0 .0 */
773
0b000000000010100, /* .20 .0 .0 */
774
0b000000000000010, /* .2 .0 .0 */
775
0b000000101000000, /* .0 .10 .0 */
776
0b000000001000000, /* .0 .2 .0 */
777
0b000000010000100, /* .4 .4 .0 */
778
0b000000001011100, /* .28 .2 .0 */
779
0b000000001000010, /* .2 .2 .0 */
780
0b000000110001100, /* .12 .12 .0 */
781
0b000000000100000, /* .0 .1 .0 */
782
0b000000001100000, /* .0 .3 .0 */
783
0b110001100000000, /* .0 .24 .24 */
786
static const uint16_t gfx12_src0_index_table[16] = {
787
0b010001100100, /* r<8;8,1> */
788
0b000000000000, /* r<0;1,0> */
789
0b010001100110, /* -r<8;8,1> */
790
0b010001100101, /* (abs)r<8;8,1> */
791
0b000000000010, /* -r<0;1,0> */
792
0b001000000000, /* r<2;1,0> */
793
0b001001000000, /* r<2;4,0> */
794
0b001101000000, /* r<4;4,0> */
795
0b001000100100, /* r<2;2,1> */
796
0b001100000000, /* r<4;1,0> */
797
0b001000100110, /* -r<2;2,1> */
798
0b001101000100, /* r<4;4,1> */
799
0b010001100111, /* -(abs)r<8;8,1> */
800
0b000100000000, /* r<1;1,0> */
801
0b000000000001, /* (abs)r<0;1,0> */
802
0b111100010000, /* r[a]<1,0> */
805
static const uint16_t gfx12_src1_index_table[16] = {
806
0b000100011001, /* r<8;8,1> */
807
0b000000000000, /* r<0;1,0> */
808
0b100100011001, /* -r<8;8,1> */
809
0b100000000000, /* -r<0;1,0> */
810
0b010100011001, /* (abs)r<8;8,1> */
811
0b100011010000, /* -r<4;4,0> */
812
0b000010000000, /* r<2;1,0> */
813
0b000010001001, /* r<2;2,1> */
814
0b100010001001, /* -r<2;2,1> */
815
0b000011010000, /* r<4;4,0> */
816
0b000011010001, /* r<4;4,1> */
817
0b000011000000, /* r<4;1,0> */
818
0b110100011001, /* -(abs)r<8;8,1> */
819
0b010000000000, /* (abs)r<0;1,0> */
820
0b110000000000, /* -(abs)r<0;1,0> */
821
0b100011010001, /* -r<4;4,1> */
824
static const uint16_t xehp_src0_index_table[16] = {
825
0b000100000000, /* r<1;1,0> */
826
0b000000000000, /* r<0;1,0> */
827
0b000100000010, /* -r<1;1,0> */
828
0b000100000001, /* (abs)r<1;1,0> */
829
0b000000000010, /* -r<0;1,0> */
830
0b001000000000, /* r<2;1,0> */
831
0b001001000000, /* r<2;4,0> */
832
0b001101000000, /* r<4;4,0> */
833
0b001100000000, /* r<4;1,0> */
834
0b000100000011, /* -(abs)r<1;1,0> */
835
0b000000000001, /* (abs)r<0;1,0> */
836
0b111100010000, /* r[a]<1,0> */
837
0b010001100000, /* r<8;8,0> */
838
0b000101000000, /* r<1;4,0> */
839
0b010001001000, /* r<8;4,2> */
840
0b001000000010, /* -r<2;1,0> */
843
static const uint16_t xehp_src1_index_table[16] = {
844
0b000001000000, /* r<1;1,0> */
845
0b000000000000, /* r<0;1,0> */
846
0b100001000000, /* -r<1;1,0> */
847
0b100000000000, /* -r<0;1,0> */
848
0b010001000000, /* (abs)r<1;1,0> */
849
0b100011010000, /* -r<4;4,0> */
850
0b000010000000, /* r<2;1,0> */
851
0b000011010000, /* r<4;4,0> */
852
0b000011000000, /* r<4;1,0> */
853
0b110001000000, /* -(abs)r<1;1,0> */
854
0b010000000000, /* (abs)r<0;1,0> */
855
0b110000000000, /* -(abs)r<0;1,0> */
856
0b000100011000, /* r<8;8,0> */
857
0b100010000000, /* -r<2;1,0> */
858
0b100000001001, /* -r<0;2,1> */
859
0b100001000100, /* -r[a]<1;1,0> */
862
/* This is actually the control index table for Cherryview (26 bits), but the
863
* only difference from Broadwell (24 bits) is that it has two extra 0-bits at
866
* The low 24 bits have the same mappings on both hardware.
868
static const uint32_t gfx8_3src_control_index_table[4] = {
869
0b00100000000110000000000001,
870
0b00000000000110000000000001,
871
0b00000000001000000000000001,
872
0b00000000001000000000100001,
875
/* This is actually the control index table for Cherryview (49 bits), but the
876
* only difference from Broadwell (46 bits) is that it has three extra 0-bits
879
* The low 44 bits have the same mappings on both hardware, and since the high
880
* three bits on Broadwell are zero, we can reuse Cherryview's table.
882
static const uint64_t gfx8_3src_source_index_table[4] = {
883
0b0000001110010011100100111001000001111000000000000,
884
0b0000001110010011100100111001000001111000000000010,
885
0b0000001110010011100100111001000001111000000001000,
886
0b0000001110010011100100111001000001111000000100000,
889
static const uint64_t gfx12_3src_control_index_table[32] = {
890
0b000001001010010101000000000000000100, /* (16|M0) grf<1>:f :f :f :f */
891
0b000001001010010101000000000000000011, /* (8|M0) grf<1>:f :f :f :f */
892
0b000001001000010101000000000000000011, /* (8|M0) arf<1>:f :f :f :f */
893
0b000001001010010101000010000000000011, /* (W) (8|M0) grf<1>:f :f :f :f */
894
0b000001001000010101000010000000000011, /* (W) (8|M0) arf<1>:f :f :f :f */
895
0b000001001000010101000000000000010011, /* (8|M8) arf<1>:f :f :f :f */
896
0b000001001010010101000000000000010011, /* (8|M8) grf<1>:f :f :f :f */
897
0b000001001000010101000010000000010011, /* (W) (8|M8) arf<1>:f :f :f :f */
898
0b000001001010010101000010000000010011, /* (W) (8|M8) grf<1>:f :f :f :f */
899
0b000001001010010101000010000000000100, /* (W) (16|M0) grf<1>:f :f :f :f */
900
0b000001001000010101000000000000000100, /* (16|M0) arf<1>:f :f :f :f */
901
0b000001001010010101010000000000000100, /* (16|M0) (sat)grf<1>:f :f :f :f */
902
0b000001001010010101000000000000100100, /* (16|M16) grf<1>:f :f :f :f */
903
0b000001001000010101000010000000000100, /* (W) (16|M0) arf<1>:f :f :f :f */
904
0b000001001010010101000010000000000000, /* (W) (1|M0) grf<1>:f :f :f :f */
905
0b000001001010010101010000000000000011, /* (8|M0) (sat)grf<1>:f :f :f :f */
906
0b000001001000010101000010000000110011, /* (W) (8|M24) arf<1>:f :f :f :f */
907
0b000001001000010101000010000000100011, /* (W) (8|M16) arf<1>:f :f :f :f */
908
0b000001001010010101000010000000110011, /* (W) (8|M24) grf<1>:f :f :f :f */
909
0b000001001010010101000010000000100011, /* (W) (8|M16) grf<1>:f :f :f :f */
910
0b000001001000010101000000000000100011, /* (8|M16) arf<1>:f :f :f :f */
911
0b000001001000010101000000000000110011, /* (8|M24) arf<1>:f :f :f :f */
912
0b000001001010010101000000000000100011, /* (8|M16) grf<1>:f :f :f :f */
913
0b000001001010010101000000000000110011, /* (8|M24) grf<1>:f :f :f :f */
914
0b000001001000010101010000000000000100, /* (16|M0) (sat)arf<1>:f :f :f :f */
915
0b000001001010010101010010000000000100, /* (W) (16|M0) (sat)grf<1>:f :f :f :f */
916
0b000001001010010101000010000000100100, /* (W) (16|M16) grf<1>:f :f :f :f */
917
0b000001001010010001000010000000000000, /* (W) (1|M0) grf<1>:ud :ud :ud :ud */
918
0b000001001000010101000000000000100100, /* (16|M16) arf<1>:f :f :f :f */
919
0b000001001010010101010000000000100100, /* (16|M16) (sat)grf<1>:f :f :f :f */
920
0b000001001010010101000010000000000010, /* (W) (4|M0) grf<1>:f :f :f :f */
921
0b000001001000010101010000000000000011, /* (8|M0) (sat)arf<1>:f :f :f :f */
924
static const uint64_t xehp_3src_control_index_table[32] = {
925
0b0000010010100010101000000000000000100, /* (16|M0) grf<1>:f :f :f :f */
926
0b0000010010100010101000000000000000011, /* (8|M0) grf<1>:f :f :f :f */
927
0b0000010010000010101000000000000000011, /* (8|M0) arf<1>:f :f :f :f */
928
0b0000010010100010101000010000000000011, /* (W) (8|M0) grf<1>:f :f :f :f */
929
0b0000010010000010101000010000000000011, /* (W) (8|M0) arf<1>:f :f :f :f */
930
0b0000010010000010101000000000000010011, /* (8|M8) arf<1>:f :f :f :f */
931
0b0000010010100010101000000000000010011, /* (8|M8) grf<1>:f :f :f :f */
932
0b0000010010000010101000010000000010011, /* (W) (8|M8) arf<1>:f :f :f :f */
933
0b0000010010100010101000010000000010011, /* (W) (8|M8) grf<1>:f :f :f :f */
934
0b0000010010100010101000010000000000100, /* (W) (16|M0) grf<1>:f :f :f :f */
935
0b0000010010000010101000000000000000100, /* (16|M0) arf<1>:f :f :f :f */
936
0b0000010010100010101010000000000000100, /* (16|M0) (sat)grf<1>:f :f :f :f */
937
0b0000010010100010101000000000000100100, /* (16|M16) grf<1>:f :f :f :f */
938
0b0000010010000010101000010000000000100, /* (W) (16|M0) arf<1>:f :f :f :f */
939
0b0000010010100010101000010000000000000, /* (W) (1|M0) grf<1>:f :f :f :f */
940
0b0000010010100010101010000000000000011, /* (8|M0) (sat)grf<1>:f :f :f :f */
941
0b0000010010000010101000010000000100011, /* (W) (8|M16) arf<1>:f :f :f :f */
942
0b0000010010000010101000010000000110011, /* (W) (8|M24) arf<1>:f :f :f :f */
943
0b0000010010100010101000010000000100011, /* (W) (8|M16) grf<1>:f :f :f :f */
944
0b0000010010100010101000010000000110011, /* (W) (8|M24) grf<1>:f :f :f :f */
945
0b0000010010000010101000000000000110011, /* (8|M24) arf<1>:f :f :f :f */
946
0b0000010010000010101000000000000100011, /* (8|M16) arf<1>:f :f :f :f */
947
0b0000000100111110011000000000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :ub :b */
948
0b0000000000111110011000100000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :ub :ub {Atomic} */
949
0b0000100100111110011000100000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :b :b {Atomic} */
950
0b0000100000111110011000100000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :b :ub {Atomic} */
951
0b0000100100111110011000000000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :b :b */
952
0b0000000000111110011000000000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :ub :ub */
953
0b0000000100111110011000100000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :ub :b {Atomic} */
954
0b0000100000111110011000000000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :b :ub */
955
0b0000101101111010101000100000000000011, /* dpas.8x* (8|M0) grf<1>:f :f :bf :bf {Atomic} */
956
0b0000101101111010101000000000000000011, /* dpas.8x* (8|M0) grf<1>:f :f :bf :bf */
959
static const uint32_t gfx12_3src_source_index_table[32] = {
960
0b100101100001100000000, /* grf<0;0> grf<8;1> grf<0> */
961
0b100101100001001000010, /* arf<4;1> grf<8;1> grf<0> */
962
0b101101100001101000011, /* grf<8;1> grf<8;1> grf<1> */
963
0b100101100001101000011, /* grf<8;1> grf<8;1> grf<0> */
964
0b101100000000101000011, /* grf<8;1> grf<0;0> grf<1> */
965
0b101101100001101001011, /* -grf<8;1> grf<8;1> grf<1> */
966
0b101001100001101000011, /* grf<8;1> arf<8;1> grf<1> */
967
0b100001100001100000000, /* grf<0;0> arf<8;1> grf<0> */
968
0b101101100001100000000, /* grf<0;0> grf<8;1> grf<1> */
969
0b101101100101101000011, /* grf<8;1> grf<8;1> -grf<1> */
970
0b101101110001101000011, /* grf<8;1> -grf<8;1> grf<1> */
971
0b101100000000100000000, /* grf<0;0> grf<0;0> grf<1> */
972
0b100001100001101000011, /* grf<8;1> arf<8;1> grf<0> */
973
0b100101110001100000000, /* grf<0;0> -grf<8;1> grf<0> */
974
0b100101110001101000011, /* grf<8;1> -grf<8;1> grf<0> */
975
0b100101100001101001011, /* -grf<8;1> grf<8;1> grf<0> */
976
0b100100000000101000011, /* grf<8;1> grf<0;0> grf<0> */
977
0b100101100001100001000, /* -grf<0;0> grf<8;1> grf<0> */
978
0b100100000000100000000, /* grf<0;0> grf<0;0> grf<0> */
979
0b101101110001100000000, /* grf<0;0> -grf<8;1> grf<1> */
980
0b100101100101100000000, /* grf<0;0> grf<8;1> -grf<0> */
981
0b101001100001100000000, /* grf<0;0> arf<8;1> grf<1> */
982
0b100101100101101000011, /* grf<8;1> grf<8;1> -grf<0> */
983
0b101101100101101001011, /* -grf<8;1> grf<8;1> -grf<1> */
984
0b101001100001101001011, /* -grf<8;1> arf<8;1> grf<1> */
985
0b101101110001101001011, /* -grf<8;1> -grf<8;1> grf<1> */
986
0b101100010000101000011, /* grf<8;1> -grf<0;0> grf<1> */
987
0b101100000100101000011, /* grf<8;1> grf<0;0> -grf<1> */
988
0b101101100001100001000, /* -grf<0;0> grf<8;1> grf<1> */
989
0b101101100101100000000, /* grf<0;0> grf<8;1> -grf<1> */
990
0b100100000100101000011, /* grf<8;1> grf<0;0> -grf<0> */
991
0b101001100101101000011, /* grf<8;1> arf<8;1> -grf<1> */
994
static const uint32_t xehp_3src_source_index_table[32] = {
995
0b100100000001100000000, /* grf<0;0> grf<1;0> grf<0> */
996
0b100100000001000000001, /* arf<1;0> grf<1;0> grf<0> */
997
0b101100000001100000001, /* grf<1;0> grf<1;0> grf<1> */
998
0b100100000001100000001, /* grf<1;0> grf<1;0> grf<0> */
999
0b101100000000100000001, /* grf<1;0> grf<0;0> grf<1> */
1000
0b101100000001100001001, /* -grf<1;0> grf<1;0> grf<1> */
1001
0b101000000001100000001, /* grf<1;0> arf<1;0> grf<1> */
1002
0b101100000001100000000, /* grf<0;0> grf<1;0> grf<1> */
1003
0b100000000001100000000, /* grf<0;0> arf<1;0> grf<0> */
1004
0b101100000101100000001, /* grf<1;0> grf<1;0> -grf<1> */
1005
0b101100010001100000001, /* grf<1;0> -grf<1;0> grf<1> */
1006
0b101100000000100000000, /* grf<0;0> grf<0;0> grf<1> */
1007
0b100000000001100000001, /* grf<1;0> arf<1;0> grf<0> */
1008
0b100100010001100000000, /* grf<0;0> -grf<1;0> grf<0> */
1009
0b100100010001100000001, /* grf<1;0> -grf<1;0> grf<0> */
1010
0b100100000001100001001, /* -grf<1;0> grf<1;0> grf<0> */
1011
0b100100000000100000001, /* grf<1;0> grf<0;0> grf<0> */
1012
0b100100000001100001000, /* -grf<0;0> grf<1;0> grf<0> */
1013
0b100100000000100000000, /* grf<0;0> grf<0;0> grf<0>
1014
* dpas.*x1 grf:d grf:[ub,b] grf:[ub,b]
1015
* dpas.*x1 grf:f grf:bf grf:bf
1017
0b101100010001100000000, /* grf<0;0> -grf<1;0> grf<1> */
1018
0b100100000101100000000, /* grf<0;0> grf<1;0> -grf<0> */
1019
0b101000000001100000000, /* grf<0;0> arf<1;0> grf<1> */
1020
0b100100000101100000001, /* grf<1;0> grf<1;0> -grf<0> */
1021
0b101100000101100001001, /* -grf<1;0> grf<1;0> -grf<1> */
1022
0b100100010000100000000, /* dpas.*x1 grf:d grf:[u2,s2] grf:[ub,b] */
1023
0b100100000100100000000, /* dpas.*x1 grf:d grf:[ub,b] grf:[u2,s2] */
1024
0b100100010100100000000, /* dpas.*x1 grf:d grf:[u2,s2] grf:[u2,s2] */
1025
0b100100001000100000000, /* dpas.*x1 grf:d grf:[u4,s4] grf:[ub,b] */
1026
0b100100001100100000000, /* dpas.*x1 grf:d grf:[u4,s4] grf:[u2,s2] */
1027
0b100100000010100000000, /* dpas.*x1 grf:d grf:[ub,b] grf:[u4,s4] */
1028
0b100100001010100000000, /* dpas.*x1 grf:d grf:[u4,s4] grf:[u4,s4] */
1029
0b100100010010100000000, /* dpas.*x1 grf:d grf:[u2,s2] grf:[u4,s4] */
1032
static const uint32_t gfx12_3src_subreg_table[32] = {
1033
0b00000000000000000000, /* .0 .0 .0 .0 */
1034
0b00100000000000000000, /* .0 .0 .0 .4 */
1035
0b00000000000110000000, /* .0 .12 .0 .0 */
1036
0b10100000000000000000, /* .0 .0 .0 .20 */
1037
0b10000000001110000000, /* .0 .28 .0 .16 */
1038
0b01100000000000000000, /* .0 .0 .0 .12 */
1039
0b01000000000000000000, /* .0 .0 .0 .8 */
1040
0b00000010000000000000, /* .0 .0 .8 .0 */
1041
0b00000001000000000000, /* .0 .0 .4 .0 */
1042
0b11000000000000000000, /* .0 .0 .0 .24 */
1043
0b10000000000000000000, /* .0 .0 .0 .16 */
1044
0b11100000000000000000, /* .0 .0 .0 .28 */
1045
0b00000110000000000000, /* .0 .0 .24 .0 */
1046
0b00000000000010000000, /* .0 .4 .0 .0 */
1047
0b00000100000000000000, /* .0 .0 .16 .0 */
1048
0b00000011000000000000, /* .0 .0 .12 .0 */
1049
0b00000101000000000000, /* .0 .0 .20 .0 */
1050
0b00000111000000000000, /* .0 .0 .28 .0 */
1051
0b00000000000100000000, /* .0 .8 .0 .0 */
1052
0b00000000001000000000, /* .0 .16 .0 .0 */
1053
0b00000000001100000000, /* .0 .24 .0 .0 */
1054
0b00000000001010000000, /* .0 .20 .0 .0 */
1055
0b00000000001110000000, /* .0 .28 .0 .0 */
1056
0b11000000001110000000, /* .0 .28 .0 .24 */
1057
0b00100000000100000000, /* .0 .8 .0 .4 */
1058
0b00100000000110000000, /* .0 .12 .0 .4 */
1059
0b01000000000110000000, /* .0 .12 .0 .8 */
1060
0b10000000001100000000, /* .0 .24 .0 .16 */
1061
0b10000000001010000000, /* .0 .20 .0 .16 */
1062
0b01100000000010000000, /* .0 .4 .0 .12 */
1063
0b10100000001110000000, /* .0 .28 .0 .20 */
1064
0b01000000000010000000, /* .0 .4 .0 .8 */
1067
struct compaction_state {
1068
const struct intel_device_info *devinfo;
1069
const uint32_t *control_index_table;
1070
const uint32_t *datatype_table;
1071
const uint16_t *subreg_table;
1072
const uint16_t *src0_index_table;
1073
const uint16_t *src1_index_table;
1076
static void compaction_state_init(struct compaction_state *c,
1077
const struct intel_device_info *devinfo);
1080
set_control_index(const struct compaction_state *c,
1081
brw_compact_inst *dst, const brw_inst *src)
1083
const struct intel_device_info *devinfo = c->devinfo;
1084
uint32_t uncompacted; /* 17b/G45; 19b/IVB+; 21b/TGL+ */
1086
if (devinfo->ver >= 12) {
1087
uncompacted = (brw_inst_bits(src, 95, 92) << 17) | /* 4b */
1088
(brw_inst_bits(src, 34, 34) << 16) | /* 1b */
1089
(brw_inst_bits(src, 33, 33) << 15) | /* 1b */
1090
(brw_inst_bits(src, 32, 32) << 14) | /* 1b */
1091
(brw_inst_bits(src, 31, 31) << 13) | /* 1b */
1092
(brw_inst_bits(src, 28, 28) << 12) | /* 1b */
1093
(brw_inst_bits(src, 27, 24) << 8) | /* 4b */
1094
(brw_inst_bits(src, 23, 22) << 6) | /* 2b */
1095
(brw_inst_bits(src, 21, 19) << 3) | /* 3b */
1096
(brw_inst_bits(src, 18, 16)); /* 3b */
1097
} else if (devinfo->ver >= 8) {
1098
uncompacted = (brw_inst_bits(src, 33, 31) << 16) | /* 3b */
1099
(brw_inst_bits(src, 23, 12) << 4) | /* 12b */
1100
(brw_inst_bits(src, 10, 9) << 2) | /* 2b */
1101
(brw_inst_bits(src, 34, 34) << 1) | /* 1b */
1102
(brw_inst_bits(src, 8, 8)); /* 1b */
1104
uncompacted = (brw_inst_bits(src, 31, 31) << 16) | /* 1b */
1105
(brw_inst_bits(src, 23, 8)); /* 16b */
1107
/* On gfx7, the flag register and subregister numbers are integrated into
1108
* the control index.
1110
if (devinfo->ver == 7)
1111
uncompacted |= brw_inst_bits(src, 90, 89) << 17; /* 2b */
1114
for (int i = 0; i < 32; i++) {
1115
if (c->control_index_table[i] == uncompacted) {
1116
brw_compact_inst_set_control_index(devinfo, dst, i);
1125
set_datatype_index(const struct compaction_state *c, brw_compact_inst *dst,
1126
const brw_inst *src, bool is_immediate)
1128
const struct intel_device_info *devinfo = c->devinfo;
1129
uint32_t uncompacted; /* 18b/G45+; 21b/BDW+; 20b/TGL+ */
1131
if (devinfo->ver >= 12) {
1132
uncompacted = (brw_inst_bits(src, 91, 88) << 15) | /* 4b */
1133
(brw_inst_bits(src, 66, 66) << 14) | /* 1b */
1134
(brw_inst_bits(src, 50, 50) << 13) | /* 1b */
1135
(brw_inst_bits(src, 49, 48) << 11) | /* 2b */
1136
(brw_inst_bits(src, 47, 47) << 10) | /* 1b */
1137
(brw_inst_bits(src, 46, 46) << 9) | /* 1b */
1138
(brw_inst_bits(src, 43, 40) << 5) | /* 4b */
1139
(brw_inst_bits(src, 39, 36) << 1) | /* 4b */
1140
(brw_inst_bits(src, 35, 35)); /* 1b */
1142
/* Src1.RegFile overlaps with the immediate, so ignore it if an immediate
1145
if (!is_immediate) {
1146
uncompacted |= brw_inst_bits(src, 98, 98) << 19; /* 1b */
1148
} else if (devinfo->ver >= 8) {
1149
uncompacted = (brw_inst_bits(src, 63, 61) << 18) | /* 3b */
1150
(brw_inst_bits(src, 94, 89) << 12) | /* 6b */
1151
(brw_inst_bits(src, 46, 35)); /* 12b */
1153
uncompacted = (brw_inst_bits(src, 63, 61) << 15) | /* 3b */
1154
(brw_inst_bits(src, 46, 32)); /* 15b */
1157
for (int i = 0; i < 32; i++) {
1158
if (c->datatype_table[i] == uncompacted) {
1159
brw_compact_inst_set_datatype_index(devinfo, dst, i);
1168
set_subreg_index(const struct compaction_state *c, brw_compact_inst *dst,
1169
const brw_inst *src, bool is_immediate)
1171
const struct intel_device_info *devinfo = c->devinfo;
1172
uint16_t uncompacted; /* 15b */
1174
if (devinfo->ver >= 12) {
1175
uncompacted = (brw_inst_bits(src, 55, 51) << 0) | /* 5b */
1176
(brw_inst_bits(src, 71, 67) << 5); /* 5b */
1179
uncompacted |= brw_inst_bits(src, 103, 99) << 10; /* 5b */
1181
uncompacted = (brw_inst_bits(src, 52, 48) << 0) | /* 5b */
1182
(brw_inst_bits(src, 68, 64) << 5); /* 5b */
1185
uncompacted |= brw_inst_bits(src, 100, 96) << 10; /* 5b */
1188
for (int i = 0; i < 32; i++) {
1189
if (c->subreg_table[i] == uncompacted) {
1190
brw_compact_inst_set_subreg_index(devinfo, dst, i);
1199
set_src0_index(const struct compaction_state *c, brw_compact_inst *dst,
1200
const brw_inst *src)
1202
const struct intel_device_info *devinfo = c->devinfo;
1203
uint16_t uncompacted; /* 12b */
1206
if (devinfo->ver >= 12) {
1207
table_len = ARRAY_SIZE(gfx12_src0_index_table);
1208
uncompacted = (brw_inst_bits(src, 87, 84) << 8) | /* 4b */
1209
(brw_inst_bits(src, 83, 81) << 5) | /* 3b */
1210
(brw_inst_bits(src, 80, 80) << 4) | /* 1b */
1211
(brw_inst_bits(src, 65, 64) << 2) | /* 2b */
1212
(brw_inst_bits(src, 45, 44)); /* 2b */
1214
table_len = ARRAY_SIZE(gfx8_src_index_table);
1215
uncompacted = brw_inst_bits(src, 88, 77); /* 12b */
1218
for (int i = 0; i < table_len; i++) {
1219
if (c->src0_index_table[i] == uncompacted) {
1220
brw_compact_inst_set_src0_index(devinfo, dst, i);
1229
set_src1_index(const struct compaction_state *c, brw_compact_inst *dst,
1230
const brw_inst *src, bool is_immediate, unsigned imm)
1232
const struct intel_device_info *devinfo = c->devinfo;
1234
if (devinfo->ver >= 12) {
1235
/* src1 index takes the low 4 bits of the 12-bit compacted value */
1236
brw_compact_inst_set_src1_index(devinfo, dst, imm & 0xf);
1238
/* src1 index takes the high 5 bits of the 13-bit compacted value */
1239
brw_compact_inst_set_src1_index(devinfo, dst, imm >> 8);
1243
uint16_t uncompacted; /* 12b */
1246
if (devinfo->ver >= 12) {
1247
table_len = ARRAY_SIZE(gfx12_src0_index_table);
1248
uncompacted = (brw_inst_bits(src, 121, 120) << 10) | /* 2b */
1249
(brw_inst_bits(src, 119, 116) << 6) | /* 4b */
1250
(brw_inst_bits(src, 115, 113) << 3) | /* 3b */
1251
(brw_inst_bits(src, 112, 112) << 2) | /* 1b */
1252
(brw_inst_bits(src, 97, 96)); /* 2b */
1254
table_len = ARRAY_SIZE(gfx8_src_index_table);
1255
uncompacted = brw_inst_bits(src, 120, 109); /* 12b */
1258
for (int i = 0; i < table_len; i++) {
1259
if (c->src1_index_table[i] == uncompacted) {
1260
brw_compact_inst_set_src1_index(devinfo, dst, i);
1270
set_3src_control_index(const struct intel_device_info *devinfo,
1271
brw_compact_inst *dst, const brw_inst *src)
1273
assert(devinfo->ver >= 8);
1275
if (devinfo->verx10 >= 125) {
1276
uint64_t uncompacted = /* 37b/XeHP+ */
1277
(brw_inst_bits(src, 95, 92) << 33) | /* 4b */
1278
(brw_inst_bits(src, 90, 88) << 30) | /* 3b */
1279
(brw_inst_bits(src, 82, 80) << 27) | /* 3b */
1280
(brw_inst_bits(src, 50, 50) << 26) | /* 1b */
1281
(brw_inst_bits(src, 49, 48) << 24) | /* 2b */
1282
(brw_inst_bits(src, 42, 40) << 21) | /* 3b */
1283
(brw_inst_bits(src, 39, 39) << 20) | /* 1b */
1284
(brw_inst_bits(src, 38, 36) << 17) | /* 3b */
1285
(brw_inst_bits(src, 34, 34) << 16) | /* 1b */
1286
(brw_inst_bits(src, 33, 33) << 15) | /* 1b */
1287
(brw_inst_bits(src, 32, 32) << 14) | /* 1b */
1288
(brw_inst_bits(src, 31, 31) << 13) | /* 1b */
1289
(brw_inst_bits(src, 28, 28) << 12) | /* 1b */
1290
(brw_inst_bits(src, 27, 24) << 8) | /* 4b */
1291
(brw_inst_bits(src, 23, 23) << 7) | /* 1b */
1292
(brw_inst_bits(src, 22, 22) << 6) | /* 1b */
1293
(brw_inst_bits(src, 21, 19) << 3) | /* 3b */
1294
(brw_inst_bits(src, 18, 16)); /* 3b */
1296
for (unsigned i = 0; i < ARRAY_SIZE(xehp_3src_control_index_table); i++) {
1297
if (xehp_3src_control_index_table[i] == uncompacted) {
1298
brw_compact_inst_set_3src_control_index(devinfo, dst, i);
1302
} else if (devinfo->ver >= 12) {
1303
uint64_t uncompacted = /* 36b/TGL+ */
1304
(brw_inst_bits(src, 95, 92) << 32) | /* 4b */
1305
(brw_inst_bits(src, 90, 88) << 29) | /* 3b */
1306
(brw_inst_bits(src, 82, 80) << 26) | /* 3b */
1307
(brw_inst_bits(src, 50, 50) << 25) | /* 1b */
1308
(brw_inst_bits(src, 48, 48) << 24) | /* 1b */
1309
(brw_inst_bits(src, 42, 40) << 21) | /* 3b */
1310
(brw_inst_bits(src, 39, 39) << 20) | /* 1b */
1311
(brw_inst_bits(src, 38, 36) << 17) | /* 3b */
1312
(brw_inst_bits(src, 34, 34) << 16) | /* 1b */
1313
(brw_inst_bits(src, 33, 33) << 15) | /* 1b */
1314
(brw_inst_bits(src, 32, 32) << 14) | /* 1b */
1315
(brw_inst_bits(src, 31, 31) << 13) | /* 1b */
1316
(brw_inst_bits(src, 28, 28) << 12) | /* 1b */
1317
(brw_inst_bits(src, 27, 24) << 8) | /* 4b */
1318
(brw_inst_bits(src, 23, 23) << 7) | /* 1b */
1319
(brw_inst_bits(src, 22, 22) << 6) | /* 1b */
1320
(brw_inst_bits(src, 21, 19) << 3) | /* 3b */
1321
(brw_inst_bits(src, 18, 16)); /* 3b */
1323
for (unsigned i = 0; i < ARRAY_SIZE(gfx12_3src_control_index_table); i++) {
1324
if (gfx12_3src_control_index_table[i] == uncompacted) {
1325
brw_compact_inst_set_3src_control_index(devinfo, dst, i);
1330
uint32_t uncompacted = /* 24b/BDW; 26b/CHV/SKL+ */
1331
(brw_inst_bits(src, 34, 32) << 21) | /* 3b */
1332
(brw_inst_bits(src, 28, 8)); /* 21b */
1334
if (devinfo->ver >= 9 || devinfo->platform == INTEL_PLATFORM_CHV) {
1336
brw_inst_bits(src, 36, 35) << 24; /* 2b */
1339
for (unsigned i = 0; i < ARRAY_SIZE(gfx8_3src_control_index_table); i++) {
1340
if (gfx8_3src_control_index_table[i] == uncompacted) {
1341
brw_compact_inst_set_3src_control_index(devinfo, dst, i);
1351
set_3src_source_index(const struct intel_device_info *devinfo,
1352
brw_compact_inst *dst, const brw_inst *src)
1354
assert(devinfo->ver >= 8);
1356
if (devinfo->ver >= 12) {
1357
uint32_t uncompacted = /* 21b/TGL+ */
1358
(brw_inst_bits(src, 114, 114) << 20) | /* 1b */
1359
(brw_inst_bits(src, 113, 112) << 18) | /* 2b */
1360
(brw_inst_bits(src, 98, 98) << 17) | /* 1b */
1361
(brw_inst_bits(src, 97, 96) << 15) | /* 2b */
1362
(brw_inst_bits(src, 91, 91) << 14) | /* 1b */
1363
(brw_inst_bits(src, 87, 86) << 12) | /* 2b */
1364
(brw_inst_bits(src, 85, 84) << 10) | /* 2b */
1365
(brw_inst_bits(src, 83, 83) << 9) | /* 1b */
1366
(brw_inst_bits(src, 66, 66) << 8) | /* 1b */
1367
(brw_inst_bits(src, 65, 64) << 6) | /* 2b */
1368
(brw_inst_bits(src, 47, 47) << 5) | /* 1b */
1369
(brw_inst_bits(src, 46, 46) << 4) | /* 1b */
1370
(brw_inst_bits(src, 45, 44) << 2) | /* 2b */
1371
(brw_inst_bits(src, 43, 43) << 1) | /* 1b */
1372
(brw_inst_bits(src, 35, 35)); /* 1b */
1374
const uint32_t *three_src_source_index_table =
1375
devinfo->verx10 >= 125 ?
1376
xehp_3src_source_index_table : gfx12_3src_source_index_table;
1377
const uint32_t three_src_source_index_table_len =
1378
devinfo->verx10 >= 125 ? ARRAY_SIZE(xehp_3src_source_index_table) :
1379
ARRAY_SIZE(gfx12_3src_source_index_table);
1381
for (unsigned i = 0; i < three_src_source_index_table_len; i++) {
1382
if (three_src_source_index_table[i] == uncompacted) {
1383
brw_compact_inst_set_3src_source_index(devinfo, dst, i);
1388
uint64_t uncompacted = /* 46b/BDW; 49b/CHV/SKL+ */
1389
(brw_inst_bits(src, 83, 83) << 43) | /* 1b */
1390
(brw_inst_bits(src, 114, 107) << 35) | /* 8b */
1391
(brw_inst_bits(src, 93, 86) << 27) | /* 8b */
1392
(brw_inst_bits(src, 72, 65) << 19) | /* 8b */
1393
(brw_inst_bits(src, 55, 37)); /* 19b */
1395
if (devinfo->ver >= 9 || devinfo->platform == INTEL_PLATFORM_CHV) {
1397
(brw_inst_bits(src, 126, 125) << 47) | /* 2b */
1398
(brw_inst_bits(src, 105, 104) << 45) | /* 2b */
1399
(brw_inst_bits(src, 84, 84) << 44); /* 1b */
1402
(brw_inst_bits(src, 125, 125) << 45) | /* 1b */
1403
(brw_inst_bits(src, 104, 104) << 44); /* 1b */
1406
for (unsigned i = 0; i < ARRAY_SIZE(gfx8_3src_source_index_table); i++) {
1407
if (gfx8_3src_source_index_table[i] == uncompacted) {
1408
brw_compact_inst_set_3src_source_index(devinfo, dst, i);
1418
set_3src_subreg_index(const struct intel_device_info *devinfo,
1419
brw_compact_inst *dst, const brw_inst *src)
1421
assert(devinfo->ver >= 12);
1423
uint32_t uncompacted = /* 20b/TGL+ */
1424
(brw_inst_bits(src, 119, 115) << 15) | /* 5b */
1425
(brw_inst_bits(src, 103, 99) << 10) | /* 5b */
1426
(brw_inst_bits(src, 71, 67) << 5) | /* 5b */
1427
(brw_inst_bits(src, 55, 51)); /* 5b */
1429
for (unsigned i = 0; i < ARRAY_SIZE(gfx12_3src_subreg_table); i++) {
1430
if (gfx12_3src_subreg_table[i] == uncompacted) {
1431
brw_compact_inst_set_3src_subreg_index(devinfo, dst, i);
1440
has_unmapped_bits(const struct intel_device_info *devinfo, const brw_inst *src)
1442
/* EOT can only be mapped on a send if the src1 is an immediate */
1443
if ((brw_inst_opcode(devinfo, src) == BRW_OPCODE_SENDC ||
1444
brw_inst_opcode(devinfo, src) == BRW_OPCODE_SEND) &&
1445
brw_inst_eot(devinfo, src))
1448
/* Check for instruction bits that don't map to any of the fields of the
1449
* compacted instruction. The instruction cannot be compacted if any of
1450
* them are set. They overlap with:
1451
* - NibCtrl (bit 47 on Gfx7, bit 11 on Gfx8)
1452
* - Dst.AddrImm[9] (bit 47 on Gfx8)
1453
* - Src0.AddrImm[9] (bit 95 on Gfx8)
1454
* - Imm64[27:31] (bits 91-95 on Gfx7, bit 95 on Gfx8)
1455
* - UIP[31] (bit 95 on Gfx8)
1457
if (devinfo->ver >= 12) {
1458
assert(!brw_inst_bits(src, 7, 7));
1460
} else if (devinfo->ver >= 8) {
1461
assert(!brw_inst_bits(src, 7, 7));
1462
return brw_inst_bits(src, 95, 95) ||
1463
brw_inst_bits(src, 47, 47) ||
1464
brw_inst_bits(src, 11, 11);
1466
assert(!brw_inst_bits(src, 7, 7) &&
1467
!(devinfo->ver < 7 && brw_inst_bits(src, 90, 90)));
1468
return brw_inst_bits(src, 95, 91) ||
1469
brw_inst_bits(src, 47, 47);
1474
has_3src_unmapped_bits(const struct intel_device_info *devinfo,
1475
const brw_inst *src)
1477
/* Check for three-source instruction bits that don't map to any of the
1478
* fields of the compacted instruction. All of them seem to be reserved
1481
if (devinfo->ver >= 12) {
1482
assert(!brw_inst_bits(src, 7, 7));
1483
} else if (devinfo->ver >= 9 || devinfo->platform == INTEL_PLATFORM_CHV) {
1484
assert(!brw_inst_bits(src, 127, 127) &&
1485
!brw_inst_bits(src, 7, 7));
1487
assert(devinfo->ver >= 8);
1488
assert(!brw_inst_bits(src, 127, 126) &&
1489
!brw_inst_bits(src, 105, 105) &&
1490
!brw_inst_bits(src, 84, 84) &&
1491
!brw_inst_bits(src, 7, 7));
1493
/* Src1Type and Src2Type, used for mixed-precision floating point */
1494
if (brw_inst_bits(src, 36, 35))
1502
brw_try_compact_3src_instruction(const struct intel_device_info *devinfo,
1503
brw_compact_inst *dst, const brw_inst *src)
1505
assert(devinfo->ver >= 8);
1507
if (has_3src_unmapped_bits(devinfo, src))
1510
#define compact(field) \
1511
brw_compact_inst_set_3src_##field(devinfo, dst, brw_inst_3src_##field(devinfo, src))
1512
#define compact_a16(field) \
1513
brw_compact_inst_set_3src_##field(devinfo, dst, brw_inst_3src_a16_##field(devinfo, src))
1517
if (!set_3src_control_index(devinfo, dst, src))
1520
if (!set_3src_source_index(devinfo, dst, src))
1523
if (devinfo->ver >= 12) {
1524
if (!set_3src_subreg_index(devinfo, dst, src))
1528
compact(debug_control);
1529
compact(dst_reg_nr);
1530
compact(src0_reg_nr);
1531
compact(src1_reg_nr);
1532
compact(src2_reg_nr);
1534
compact(dst_reg_nr);
1535
compact_a16(src0_rep_ctrl);
1536
compact(debug_control);
1538
compact_a16(src1_rep_ctrl);
1539
compact_a16(src2_rep_ctrl);
1540
compact(src0_reg_nr);
1541
compact(src1_reg_nr);
1542
compact(src2_reg_nr);
1543
compact_a16(src0_subreg_nr);
1544
compact_a16(src1_subreg_nr);
1545
compact_a16(src2_subreg_nr);
1547
brw_compact_inst_set_3src_cmpt_control(devinfo, dst, true);
1555
/* On SNB through ICL, compacted instructions have 12-bits for immediate
1556
* sources, and a 13th bit that's replicated through the high 20 bits.
1558
* Effectively this means we get 12-bit integers, 0.0f, and some limited uses
1559
* of packed vectors as compactable immediates.
1561
* On TGL+, the high 12-bits of floating-point values (:f and :hf) are encoded
1562
* rather than the low 12-bits. For signed integer the 12th bit is replicated,
1563
* while for unsigned integers it is not.
1565
* Returns the compacted immediate, or -1 if immediate cannot be compacted
1568
compact_immediate(const struct intel_device_info *devinfo,
1569
enum brw_reg_type type, unsigned imm)
1571
if (devinfo->ver >= 12) {
1572
/* 16-bit immediates need to be replicated through the 32-bit immediate
1576
case BRW_REGISTER_TYPE_W:
1577
case BRW_REGISTER_TYPE_UW:
1578
case BRW_REGISTER_TYPE_HF:
1579
if ((imm >> 16) != (imm & 0xffff))
1587
case BRW_REGISTER_TYPE_F:
1588
/* We get the high 12-bits as-is; rest must be zero */
1589
if ((imm & 0xfffff) == 0)
1590
return (imm >> 20) & 0xfff;
1592
case BRW_REGISTER_TYPE_HF:
1593
/* We get the high 12-bits as-is; rest must be zero */
1594
if ((imm & 0xf) == 0)
1595
return (imm >> 4) & 0xfff;
1597
case BRW_REGISTER_TYPE_UD:
1598
case BRW_REGISTER_TYPE_VF:
1599
case BRW_REGISTER_TYPE_UV:
1600
case BRW_REGISTER_TYPE_V:
1601
/* We get the low 12-bits as-is; rest must be zero */
1602
if ((imm & 0xfffff000) == 0)
1605
case BRW_REGISTER_TYPE_UW:
1606
/* We get the low 12-bits as-is; rest must be zero */
1607
if ((imm & 0xf000) == 0)
1610
case BRW_REGISTER_TYPE_D:
1611
/* We get the low 11-bits as-is; 12th is replicated */
1612
if (((int)imm >> 11) == 0 || ((int)imm >> 11) == -1)
1615
case BRW_REGISTER_TYPE_W:
1616
/* We get the low 11-bits as-is; 12th is replicated */
1617
if (((short)imm >> 11) == 0 || ((short)imm >> 11) == -1)
1620
case BRW_REGISTER_TYPE_NF:
1621
case BRW_REGISTER_TYPE_DF:
1622
case BRW_REGISTER_TYPE_Q:
1623
case BRW_REGISTER_TYPE_UQ:
1624
case BRW_REGISTER_TYPE_B:
1625
case BRW_REGISTER_TYPE_UB:
1629
/* We get the low 12 bits as-is; 13th is replicated */
1630
if (((int)imm >> 12) == 0 || ((int)imm >> 12 == -1)) {
1631
return imm & 0x1fff;
1639
uncompact_immediate(const struct intel_device_info *devinfo,
1640
enum brw_reg_type type, unsigned compact_imm)
1642
if (devinfo->ver >= 12) {
1644
case BRW_REGISTER_TYPE_F:
1645
return compact_imm << 20;
1646
case BRW_REGISTER_TYPE_HF:
1647
return (compact_imm << 20) | (compact_imm << 4);
1648
case BRW_REGISTER_TYPE_UD:
1649
case BRW_REGISTER_TYPE_VF:
1650
case BRW_REGISTER_TYPE_UV:
1651
case BRW_REGISTER_TYPE_V:
1653
case BRW_REGISTER_TYPE_UW:
1655
return compact_imm << 16 | compact_imm;
1656
case BRW_REGISTER_TYPE_D:
1657
/* Extend the 12th bit into the high 20 bits */
1658
return (int)(compact_imm << 20) >> 20;
1659
case BRW_REGISTER_TYPE_W:
1660
/* Extend the 12th bit into the high 4 bits and replicate */
1661
return ((int)(compact_imm << 20) >> 4) |
1662
((unsigned short)((short)(compact_imm << 4) >> 4));
1663
case BRW_REGISTER_TYPE_NF:
1664
case BRW_REGISTER_TYPE_DF:
1665
case BRW_REGISTER_TYPE_Q:
1666
case BRW_REGISTER_TYPE_UQ:
1667
case BRW_REGISTER_TYPE_B:
1668
case BRW_REGISTER_TYPE_UB:
1669
unreachable("not reached");
1672
/* Replicate the 13th bit into the high 19 bits */
1673
return (int)(compact_imm << 19) >> 19;
1676
unreachable("not reached");
1680
has_immediate(const struct intel_device_info *devinfo, const brw_inst *inst,
1681
enum brw_reg_type *type)
1683
if (brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) {
1684
*type = brw_inst_src0_type(devinfo, inst);
1685
return *type != INVALID_REG_TYPE;
1686
} else if (brw_inst_src1_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) {
1687
*type = brw_inst_src1_type(devinfo, inst);
1688
return *type != INVALID_REG_TYPE;
1695
* Applies some small changes to instruction types to increase chances of
1699
precompact(const struct intel_device_info *devinfo, brw_inst inst)
1701
if (brw_inst_src0_reg_file(devinfo, &inst) != BRW_IMMEDIATE_VALUE)
1704
/* The Bspec's section titled "Non-present Operands" claims that if src0
1705
* is an immediate that src1's type must be the same as that of src0.
1707
* The SNB+ DataTypeIndex instruction compaction tables contain mappings
1708
* that do not follow this rule. E.g., from the IVB/HSW table:
1710
* DataTypeIndex 18-Bit Mapping Mapped Meaning
1711
* 3 001000001011111101 r:f | i:vf | a:ud | <1> | dir |
1713
* And from the SNB table:
1715
* DataTypeIndex 18-Bit Mapping Mapped Meaning
1716
* 8 001000000111101100 a:w | i:w | a:ud | <1> | dir |
1718
* Neither of these cause warnings from the simulator when used,
1719
* compacted or otherwise. In fact, all compaction mappings that have an
1720
* immediate in src0 use a:ud for src1.
1722
* The GM45 instruction compaction tables do not contain mapped meanings
1723
* so it's not clear whether it has the restriction. We'll assume it was
1724
* lifted on SNB. (FINISHME: decode the GM45 tables and check.)
1726
* Don't do any of this for 64-bit immediates, since the src1 fields
1727
* overlap with the immediate and setting them would overwrite the
1730
if (devinfo->ver >= 6 &&
1731
!(devinfo->platform == INTEL_PLATFORM_HSW &&
1732
brw_inst_opcode(devinfo, &inst) == BRW_OPCODE_DIM) &&
1733
!(devinfo->ver >= 8 &&
1734
(brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_DF ||
1735
brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_UQ ||
1736
brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_Q))) {
1737
brw_inst_set_src1_reg_hw_type(devinfo, &inst, 0);
1740
/* Compacted instructions only have 12-bits (plus 1 for the other 20)
1741
* for immediate values. Presumably the hardware engineers realized
1742
* that the only useful floating-point value that could be represented
1743
* in this format is 0.0, which can also be represented as a VF-typed
1744
* immediate, so they gave us the previously mentioned mapping on IVB+.
1746
* Strangely, we do have a mapping for imm:f in src1, so we don't need
1749
* If we see a 0.0:F, change the type to VF so that it can be compacted.
1751
* Compaction of floating-point immediates is improved on Gfx12, thus
1752
* removing the need for this.
1754
if (devinfo->ver < 12 &&
1755
brw_inst_imm_ud(devinfo, &inst) == 0x0 &&
1756
brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_F &&
1757
brw_inst_dst_type(devinfo, &inst) == BRW_REGISTER_TYPE_F &&
1758
brw_inst_dst_hstride(devinfo, &inst) == BRW_HORIZONTAL_STRIDE_1) {
1759
enum brw_reg_file file = brw_inst_src0_reg_file(devinfo, &inst);
1760
brw_inst_set_src0_file_type(devinfo, &inst, file, BRW_REGISTER_TYPE_VF);
1763
/* There are no mappings for dst:d | i:d, so if the immediate is suitable
1764
* set the types to :UD so the instruction can be compacted.
1766
* FINISHME: Use dst:f | imm:f on Gfx12
1768
if (devinfo->ver < 12 &&
1769
compact_immediate(devinfo, BRW_REGISTER_TYPE_D,
1770
brw_inst_imm_ud(devinfo, &inst)) != -1 &&
1771
brw_inst_cond_modifier(devinfo, &inst) == BRW_CONDITIONAL_NONE &&
1772
brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_D &&
1773
brw_inst_dst_type(devinfo, &inst) == BRW_REGISTER_TYPE_D) {
1774
enum brw_reg_file src_file = brw_inst_src0_reg_file(devinfo, &inst);
1775
enum brw_reg_file dst_file = brw_inst_dst_reg_file(devinfo, &inst);
1777
brw_inst_set_src0_file_type(devinfo, &inst, src_file, BRW_REGISTER_TYPE_UD);
1778
brw_inst_set_dst_file_type(devinfo, &inst, dst_file, BRW_REGISTER_TYPE_UD);
1785
* Tries to compact instruction src into dst.
1787
* It doesn't modify dst unless src is compactable, which is relied on by
1788
* brw_compact_instructions().
1791
try_compact_instruction(const struct compaction_state *c,
1792
brw_compact_inst *dst, const brw_inst *src)
1794
const struct intel_device_info *devinfo = c->devinfo;
1795
brw_compact_inst temp;
1797
assert(brw_inst_cmpt_control(devinfo, src) == 0);
1799
if (is_3src(devinfo, brw_inst_opcode(devinfo, src))) {
1800
if (devinfo->ver >= 8) {
1801
memset(&temp, 0, sizeof(temp));
1802
if (brw_try_compact_3src_instruction(devinfo, &temp, src)) {
1813
enum brw_reg_type type;
1814
bool is_immediate = has_immediate(devinfo, src, &type);
1816
unsigned compacted_imm = 0;
1819
/* Instructions with immediates cannot be compacted on Gen < 6 */
1820
if (devinfo->ver < 6)
1823
compacted_imm = compact_immediate(devinfo, type,
1824
brw_inst_imm_ud(devinfo, src));
1825
if (compacted_imm == -1)
1829
if (has_unmapped_bits(devinfo, src))
1832
memset(&temp, 0, sizeof(temp));
1834
#define compact(field) \
1835
brw_compact_inst_set_##field(devinfo, &temp, brw_inst_##field(devinfo, src))
1836
#define compact_reg(field) \
1837
brw_compact_inst_set_##field##_reg_nr(devinfo, &temp, \
1838
brw_inst_##field##_da_reg_nr(devinfo, src))
1841
compact(debug_control);
1843
if (!set_control_index(c, &temp, src))
1845
if (!set_datatype_index(c, &temp, src, is_immediate))
1847
if (!set_subreg_index(c, &temp, src, is_immediate))
1849
if (!set_src0_index(c, &temp, src))
1851
if (!set_src1_index(c, &temp, src, is_immediate, compacted_imm))
1854
if (devinfo->ver >= 12) {
1860
/* src1 reg takes the high 8 bits (of the 12-bit compacted value) */
1861
brw_compact_inst_set_src1_reg_nr(devinfo, &temp, compacted_imm >> 4);
1866
if (devinfo->ver >= 6) {
1867
compact(acc_wr_control);
1869
compact(mask_control_ex);
1872
if (devinfo->ver <= 6)
1873
compact(flag_subreg_nr);
1875
compact(cond_modifier);
1881
/* src1 reg takes the low 8 bits (of the 13-bit compacted value) */
1882
brw_compact_inst_set_src1_reg_nr(devinfo, &temp, compacted_imm & 0xff);
1887
brw_compact_inst_set_cmpt_control(devinfo, &temp, true);
1898
brw_try_compact_instruction(const struct intel_device_info *devinfo,
1899
brw_compact_inst *dst, const brw_inst *src)
1901
struct compaction_state c;
1902
compaction_state_init(&c, devinfo);
1903
return try_compact_instruction(&c, dst, src);
1907
set_uncompacted_control(const struct compaction_state *c, brw_inst *dst,
1908
brw_compact_inst *src)
1910
const struct intel_device_info *devinfo = c->devinfo;
1911
uint32_t uncompacted =
1912
c->control_index_table[brw_compact_inst_control_index(devinfo, src)];
1914
if (devinfo->ver >= 12) {
1915
brw_inst_set_bits(dst, 95, 92, (uncompacted >> 17));
1916
brw_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1);
1917
brw_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1);
1918
brw_inst_set_bits(dst, 32, 32, (uncompacted >> 14) & 0x1);
1919
brw_inst_set_bits(dst, 31, 31, (uncompacted >> 13) & 0x1);
1920
brw_inst_set_bits(dst, 28, 28, (uncompacted >> 12) & 0x1);
1921
brw_inst_set_bits(dst, 27, 24, (uncompacted >> 8) & 0xf);
1922
brw_inst_set_bits(dst, 23, 22, (uncompacted >> 6) & 0x3);
1923
brw_inst_set_bits(dst, 21, 19, (uncompacted >> 3) & 0x7);
1924
brw_inst_set_bits(dst, 18, 16, (uncompacted >> 0) & 0x7);
1925
} else if (devinfo->ver >= 8) {
1926
brw_inst_set_bits(dst, 33, 31, (uncompacted >> 16));
1927
brw_inst_set_bits(dst, 23, 12, (uncompacted >> 4) & 0xfff);
1928
brw_inst_set_bits(dst, 10, 9, (uncompacted >> 2) & 0x3);
1929
brw_inst_set_bits(dst, 34, 34, (uncompacted >> 1) & 0x1);
1930
brw_inst_set_bits(dst, 8, 8, (uncompacted >> 0) & 0x1);
1932
brw_inst_set_bits(dst, 31, 31, (uncompacted >> 16) & 0x1);
1933
brw_inst_set_bits(dst, 23, 8, (uncompacted & 0xffff));
1935
if (devinfo->ver == 7)
1936
brw_inst_set_bits(dst, 90, 89, uncompacted >> 17);
1941
set_uncompacted_datatype(const struct compaction_state *c, brw_inst *dst,
1942
brw_compact_inst *src)
1944
const struct intel_device_info *devinfo = c->devinfo;
1945
uint32_t uncompacted =
1946
c->datatype_table[brw_compact_inst_datatype_index(devinfo, src)];
1948
if (devinfo->ver >= 12) {
1949
brw_inst_set_bits(dst, 98, 98, (uncompacted >> 19));
1950
brw_inst_set_bits(dst, 91, 88, (uncompacted >> 15) & 0xf);
1951
brw_inst_set_bits(dst, 66, 66, (uncompacted >> 14) & 0x1);
1952
brw_inst_set_bits(dst, 50, 50, (uncompacted >> 13) & 0x1);
1953
brw_inst_set_bits(dst, 49, 48, (uncompacted >> 11) & 0x3);
1954
brw_inst_set_bits(dst, 47, 47, (uncompacted >> 10) & 0x1);
1955
brw_inst_set_bits(dst, 46, 46, (uncompacted >> 9) & 0x1);
1956
brw_inst_set_bits(dst, 43, 40, (uncompacted >> 5) & 0xf);
1957
brw_inst_set_bits(dst, 39, 36, (uncompacted >> 1) & 0xf);
1958
brw_inst_set_bits(dst, 35, 35, (uncompacted >> 0) & 0x1);
1959
} else if (devinfo->ver >= 8) {
1960
brw_inst_set_bits(dst, 63, 61, (uncompacted >> 18));
1961
brw_inst_set_bits(dst, 94, 89, (uncompacted >> 12) & 0x3f);
1962
brw_inst_set_bits(dst, 46, 35, (uncompacted >> 0) & 0xfff);
1964
brw_inst_set_bits(dst, 63, 61, (uncompacted >> 15));
1965
brw_inst_set_bits(dst, 46, 32, (uncompacted & 0x7fff));
1970
set_uncompacted_subreg(const struct compaction_state *c, brw_inst *dst,
1971
brw_compact_inst *src)
1973
const struct intel_device_info *devinfo = c->devinfo;
1974
uint16_t uncompacted =
1975
c->subreg_table[brw_compact_inst_subreg_index(devinfo, src)];
1977
if (devinfo->ver >= 12) {
1978
brw_inst_set_bits(dst, 103, 99, (uncompacted >> 10));
1979
brw_inst_set_bits(dst, 71, 67, (uncompacted >> 5) & 0x1f);
1980
brw_inst_set_bits(dst, 55, 51, (uncompacted >> 0) & 0x1f);
1982
brw_inst_set_bits(dst, 100, 96, (uncompacted >> 10));
1983
brw_inst_set_bits(dst, 68, 64, (uncompacted >> 5) & 0x1f);
1984
brw_inst_set_bits(dst, 52, 48, (uncompacted >> 0) & 0x1f);
1989
set_uncompacted_src0(const struct compaction_state *c, brw_inst *dst,
1990
brw_compact_inst *src)
1992
const struct intel_device_info *devinfo = c->devinfo;
1993
uint32_t compacted = brw_compact_inst_src0_index(devinfo, src);
1994
uint16_t uncompacted = c->src0_index_table[compacted];
1996
if (devinfo->ver >= 12) {
1997
brw_inst_set_bits(dst, 87, 84, (uncompacted >> 8));
1998
brw_inst_set_bits(dst, 83, 81, (uncompacted >> 5) & 0x7);
1999
brw_inst_set_bits(dst, 80, 80, (uncompacted >> 4) & 0x1);
2000
brw_inst_set_bits(dst, 65, 64, (uncompacted >> 2) & 0x3);
2001
brw_inst_set_bits(dst, 45, 44, (uncompacted >> 0) & 0x3);
2003
brw_inst_set_bits(dst, 88, 77, uncompacted);
2008
set_uncompacted_src1(const struct compaction_state *c, brw_inst *dst,
2009
brw_compact_inst *src)
2011
const struct intel_device_info *devinfo = c->devinfo;
2012
uint16_t uncompacted =
2013
c->src1_index_table[brw_compact_inst_src1_index(devinfo, src)];
2015
if (devinfo->ver >= 12) {
2016
brw_inst_set_bits(dst, 121, 120, (uncompacted >> 10));
2017
brw_inst_set_bits(dst, 119, 116, (uncompacted >> 6) & 0xf);
2018
brw_inst_set_bits(dst, 115, 113, (uncompacted >> 3) & 0x7);
2019
brw_inst_set_bits(dst, 112, 112, (uncompacted >> 2) & 0x1);
2020
brw_inst_set_bits(dst, 97, 96, (uncompacted >> 0) & 0x3);
2022
brw_inst_set_bits(dst, 120, 109, uncompacted);
2027
set_uncompacted_3src_control_index(const struct compaction_state *c,
2028
brw_inst *dst, brw_compact_inst *src)
2030
const struct intel_device_info *devinfo = c->devinfo;
2031
assert(devinfo->ver >= 8);
2033
if (devinfo->verx10 >= 125) {
2034
uint64_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
2035
uint64_t uncompacted = xehp_3src_control_index_table[compacted];
2037
brw_inst_set_bits(dst, 95, 92, (uncompacted >> 33));
2038
brw_inst_set_bits(dst, 90, 88, (uncompacted >> 30) & 0x7);
2039
brw_inst_set_bits(dst, 82, 80, (uncompacted >> 27) & 0x7);
2040
brw_inst_set_bits(dst, 50, 50, (uncompacted >> 26) & 0x1);
2041
brw_inst_set_bits(dst, 49, 48, (uncompacted >> 24) & 0x3);
2042
brw_inst_set_bits(dst, 42, 40, (uncompacted >> 21) & 0x7);
2043
brw_inst_set_bits(dst, 39, 39, (uncompacted >> 20) & 0x1);
2044
brw_inst_set_bits(dst, 38, 36, (uncompacted >> 17) & 0x7);
2045
brw_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1);
2046
brw_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1);
2047
brw_inst_set_bits(dst, 32, 32, (uncompacted >> 14) & 0x1);
2048
brw_inst_set_bits(dst, 31, 31, (uncompacted >> 13) & 0x1);
2049
brw_inst_set_bits(dst, 28, 28, (uncompacted >> 12) & 0x1);
2050
brw_inst_set_bits(dst, 27, 24, (uncompacted >> 8) & 0xf);
2051
brw_inst_set_bits(dst, 23, 23, (uncompacted >> 7) & 0x1);
2052
brw_inst_set_bits(dst, 22, 22, (uncompacted >> 6) & 0x1);
2053
brw_inst_set_bits(dst, 21, 19, (uncompacted >> 3) & 0x7);
2054
brw_inst_set_bits(dst, 18, 16, (uncompacted >> 0) & 0x7);
2056
} else if (devinfo->ver >= 12) {
2057
uint64_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
2058
uint64_t uncompacted = gfx12_3src_control_index_table[compacted];
2060
brw_inst_set_bits(dst, 95, 92, (uncompacted >> 32));
2061
brw_inst_set_bits(dst, 90, 88, (uncompacted >> 29) & 0x7);
2062
brw_inst_set_bits(dst, 82, 80, (uncompacted >> 26) & 0x7);
2063
brw_inst_set_bits(dst, 50, 50, (uncompacted >> 25) & 0x1);
2064
brw_inst_set_bits(dst, 48, 48, (uncompacted >> 24) & 0x1);
2065
brw_inst_set_bits(dst, 42, 40, (uncompacted >> 21) & 0x7);
2066
brw_inst_set_bits(dst, 39, 39, (uncompacted >> 20) & 0x1);
2067
brw_inst_set_bits(dst, 38, 36, (uncompacted >> 17) & 0x7);
2068
brw_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1);
2069
brw_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1);
2070
brw_inst_set_bits(dst, 32, 32, (uncompacted >> 14) & 0x1);
2071
brw_inst_set_bits(dst, 31, 31, (uncompacted >> 13) & 0x1);
2072
brw_inst_set_bits(dst, 28, 28, (uncompacted >> 12) & 0x1);
2073
brw_inst_set_bits(dst, 27, 24, (uncompacted >> 8) & 0xf);
2074
brw_inst_set_bits(dst, 23, 23, (uncompacted >> 7) & 0x1);
2075
brw_inst_set_bits(dst, 22, 22, (uncompacted >> 6) & 0x1);
2076
brw_inst_set_bits(dst, 21, 19, (uncompacted >> 3) & 0x7);
2077
brw_inst_set_bits(dst, 18, 16, (uncompacted >> 0) & 0x7);
2079
uint32_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
2080
uint32_t uncompacted = gfx8_3src_control_index_table[compacted];
2082
brw_inst_set_bits(dst, 34, 32, (uncompacted >> 21) & 0x7);
2083
brw_inst_set_bits(dst, 28, 8, (uncompacted >> 0) & 0x1fffff);
2085
if (devinfo->ver >= 9 || devinfo->platform == INTEL_PLATFORM_CHV)
2086
brw_inst_set_bits(dst, 36, 35, (uncompacted >> 24) & 0x3);
2091
set_uncompacted_3src_source_index(const struct intel_device_info *devinfo,
2092
brw_inst *dst, brw_compact_inst *src)
2094
assert(devinfo->ver >= 8);
2096
uint32_t compacted = brw_compact_inst_3src_source_index(devinfo, src);
2098
if (devinfo->ver >= 12) {
2099
const uint32_t *three_src_source_index_table =
2100
devinfo->verx10 >= 125 ?
2101
xehp_3src_source_index_table : gfx12_3src_source_index_table;
2102
uint32_t uncompacted = three_src_source_index_table[compacted];
2104
brw_inst_set_bits(dst, 114, 114, (uncompacted >> 20));
2105
brw_inst_set_bits(dst, 113, 112, (uncompacted >> 18) & 0x3);
2106
brw_inst_set_bits(dst, 98, 98, (uncompacted >> 17) & 0x1);
2107
brw_inst_set_bits(dst, 97, 96, (uncompacted >> 15) & 0x3);
2108
brw_inst_set_bits(dst, 91, 91, (uncompacted >> 14) & 0x1);
2109
brw_inst_set_bits(dst, 87, 86, (uncompacted >> 12) & 0x3);
2110
brw_inst_set_bits(dst, 85, 84, (uncompacted >> 10) & 0x3);
2111
brw_inst_set_bits(dst, 83, 83, (uncompacted >> 9) & 0x1);
2112
brw_inst_set_bits(dst, 66, 66, (uncompacted >> 8) & 0x1);
2113
brw_inst_set_bits(dst, 65, 64, (uncompacted >> 6) & 0x3);
2114
brw_inst_set_bits(dst, 47, 47, (uncompacted >> 5) & 0x1);
2115
brw_inst_set_bits(dst, 46, 46, (uncompacted >> 4) & 0x1);
2116
brw_inst_set_bits(dst, 45, 44, (uncompacted >> 2) & 0x3);
2117
brw_inst_set_bits(dst, 43, 43, (uncompacted >> 1) & 0x1);
2118
brw_inst_set_bits(dst, 35, 35, (uncompacted >> 0) & 0x1);
2120
uint64_t uncompacted = gfx8_3src_source_index_table[compacted];
2122
brw_inst_set_bits(dst, 83, 83, (uncompacted >> 43) & 0x1);
2123
brw_inst_set_bits(dst, 114, 107, (uncompacted >> 35) & 0xff);
2124
brw_inst_set_bits(dst, 93, 86, (uncompacted >> 27) & 0xff);
2125
brw_inst_set_bits(dst, 72, 65, (uncompacted >> 19) & 0xff);
2126
brw_inst_set_bits(dst, 55, 37, (uncompacted >> 0) & 0x7ffff);
2128
if (devinfo->ver >= 9 || devinfo->platform == INTEL_PLATFORM_CHV) {
2129
brw_inst_set_bits(dst, 126, 125, (uncompacted >> 47) & 0x3);
2130
brw_inst_set_bits(dst, 105, 104, (uncompacted >> 45) & 0x3);
2131
brw_inst_set_bits(dst, 84, 84, (uncompacted >> 44) & 0x1);
2133
brw_inst_set_bits(dst, 125, 125, (uncompacted >> 45) & 0x1);
2134
brw_inst_set_bits(dst, 104, 104, (uncompacted >> 44) & 0x1);
2140
set_uncompacted_3src_subreg_index(const struct intel_device_info *devinfo,
2141
brw_inst *dst, brw_compact_inst *src)
2143
assert(devinfo->ver >= 12);
2145
uint32_t compacted = brw_compact_inst_3src_subreg_index(devinfo, src);
2146
uint32_t uncompacted = gfx12_3src_subreg_table[compacted];
2148
brw_inst_set_bits(dst, 119, 115, (uncompacted >> 15));
2149
brw_inst_set_bits(dst, 103, 99, (uncompacted >> 10) & 0x1f);
2150
brw_inst_set_bits(dst, 71, 67, (uncompacted >> 5) & 0x1f);
2151
brw_inst_set_bits(dst, 55, 51, (uncompacted >> 0) & 0x1f);
2155
brw_uncompact_3src_instruction(const struct compaction_state *c,
2156
brw_inst *dst, brw_compact_inst *src)
2158
const struct intel_device_info *devinfo = c->devinfo;
2159
assert(devinfo->ver >= 8);
2161
#define uncompact(field) \
2162
brw_inst_set_3src_##field(devinfo, dst, brw_compact_inst_3src_##field(devinfo, src))
2163
#define uncompact_a16(field) \
2164
brw_inst_set_3src_a16_##field(devinfo, dst, brw_compact_inst_3src_##field(devinfo, src))
2166
uncompact(hw_opcode);
2168
if (devinfo->ver >= 12) {
2169
set_uncompacted_3src_control_index(c, dst, src);
2170
set_uncompacted_3src_source_index(devinfo, dst, src);
2171
set_uncompacted_3src_subreg_index(devinfo, dst, src);
2173
uncompact(debug_control);
2175
uncompact(dst_reg_nr);
2176
uncompact(src0_reg_nr);
2177
uncompact(src1_reg_nr);
2178
uncompact(src2_reg_nr);
2180
set_uncompacted_3src_control_index(c, dst, src);
2181
set_uncompacted_3src_source_index(devinfo, dst, src);
2183
uncompact(dst_reg_nr);
2184
uncompact_a16(src0_rep_ctrl);
2185
uncompact(debug_control);
2186
uncompact(saturate);
2187
uncompact_a16(src1_rep_ctrl);
2188
uncompact_a16(src2_rep_ctrl);
2189
uncompact(src0_reg_nr);
2190
uncompact(src1_reg_nr);
2191
uncompact(src2_reg_nr);
2192
uncompact_a16(src0_subreg_nr);
2193
uncompact_a16(src1_subreg_nr);
2194
uncompact_a16(src2_subreg_nr);
2196
brw_inst_set_3src_cmpt_control(devinfo, dst, false);
2199
#undef uncompact_a16
2203
uncompact_instruction(const struct compaction_state *c, brw_inst *dst,
2204
brw_compact_inst *src)
2206
const struct intel_device_info *devinfo = c->devinfo;
2207
memset(dst, 0, sizeof(*dst));
2209
if (devinfo->ver >= 8 &&
2210
is_3src(devinfo, brw_opcode_decode(
2211
devinfo, brw_compact_inst_3src_hw_opcode(devinfo, src)))) {
2212
brw_uncompact_3src_instruction(c, dst, src);
2216
#define uncompact(field) \
2217
brw_inst_set_##field(devinfo, dst, brw_compact_inst_##field(devinfo, src))
2218
#define uncompact_reg(field) \
2219
brw_inst_set_##field##_da_reg_nr(devinfo, dst, \
2220
brw_compact_inst_##field##_reg_nr(devinfo, src))
2222
uncompact(hw_opcode);
2223
uncompact(debug_control);
2225
set_uncompacted_control(c, dst, src);
2226
set_uncompacted_datatype(c, dst, src);
2227
set_uncompacted_subreg(c, dst, src);
2228
set_uncompacted_src0(c, dst, src);
2230
enum brw_reg_type type;
2231
if (has_immediate(devinfo, dst, &type)) {
2232
unsigned imm = uncompact_immediate(devinfo, type,
2233
brw_compact_inst_imm(devinfo, src));
2234
brw_inst_set_imm_ud(devinfo, dst, imm);
2236
set_uncompacted_src1(c, dst, src);
2237
uncompact_reg(src1);
2240
if (devinfo->ver >= 12) {
2243
uncompact_reg(src0);
2245
if (devinfo->ver >= 6) {
2246
uncompact(acc_wr_control);
2248
uncompact(mask_control_ex);
2251
uncompact(cond_modifier);
2253
if (devinfo->ver <= 6)
2254
uncompact(flag_subreg_nr);
2257
uncompact_reg(src0);
2259
brw_inst_set_cmpt_control(devinfo, dst, false);
2262
#undef uncompact_reg
2266
brw_uncompact_instruction(const struct intel_device_info *devinfo,
2267
brw_inst *dst, brw_compact_inst *src)
2269
struct compaction_state c;
2270
compaction_state_init(&c, devinfo);
2271
uncompact_instruction(&c, dst, src);
2274
void brw_debug_compact_uncompact(const struct intel_device_info *devinfo,
2276
brw_inst *uncompacted)
2278
fprintf(stderr, "Instruction compact/uncompact changed (gen%d):\n",
2281
fprintf(stderr, " before: ");
2282
brw_disassemble_inst(stderr, devinfo, orig, true, 0, NULL);
2284
fprintf(stderr, " after: ");
2285
brw_disassemble_inst(stderr, devinfo, uncompacted, false, 0, NULL);
2287
uint32_t *before_bits = (uint32_t *)orig;
2288
uint32_t *after_bits = (uint32_t *)uncompacted;
2289
fprintf(stderr, " changed bits:\n");
2290
for (int i = 0; i < 128; i++) {
2291
uint32_t before = before_bits[i / 32] & (1 << (i & 31));
2292
uint32_t after = after_bits[i / 32] & (1 << (i & 31));
2294
if (before != after) {
2295
fprintf(stderr, " bit %d, %s to %s\n", i,
2296
before ? "set" : "unset",
2297
after ? "set" : "unset");
2303
compacted_between(int old_ip, int old_target_ip, int *compacted_counts)
2305
int this_compacted_count = compacted_counts[old_ip];
2306
int target_compacted_count = compacted_counts[old_target_ip];
2307
return target_compacted_count - this_compacted_count;
2311
update_uip_jip(const struct intel_device_info *devinfo, brw_inst *insn,
2312
int this_old_ip, int *compacted_counts)
2314
/* JIP and UIP are in units of:
2315
* - bytes on Gfx8+; and
2316
* - compacted instructions on Gfx6+.
2318
int shift = devinfo->ver >= 8 ? 3 : 0;
2320
int32_t jip_compacted = brw_inst_jip(devinfo, insn) >> shift;
2321
jip_compacted -= compacted_between(this_old_ip,
2322
this_old_ip + (jip_compacted / 2),
2324
brw_inst_set_jip(devinfo, insn, jip_compacted << shift);
2326
if (brw_inst_opcode(devinfo, insn) == BRW_OPCODE_ENDIF ||
2327
brw_inst_opcode(devinfo, insn) == BRW_OPCODE_WHILE ||
2328
(brw_inst_opcode(devinfo, insn) == BRW_OPCODE_ELSE && devinfo->ver <= 7))
2331
int32_t uip_compacted = brw_inst_uip(devinfo, insn) >> shift;
2332
uip_compacted -= compacted_between(this_old_ip,
2333
this_old_ip + (uip_compacted / 2),
2335
brw_inst_set_uip(devinfo, insn, uip_compacted << shift);
2339
update_gfx4_jump_count(const struct intel_device_info *devinfo, brw_inst *insn,
2340
int this_old_ip, int *compacted_counts)
2342
assert(devinfo->ver == 5 || devinfo->platform == INTEL_PLATFORM_G4X);
2344
/* Jump Count is in units of:
2345
* - uncompacted instructions on G45; and
2346
* - compacted instructions on Gfx5.
2348
int shift = devinfo->platform == INTEL_PLATFORM_G4X ? 1 : 0;
2350
int jump_count_compacted = brw_inst_gfx4_jump_count(devinfo, insn) << shift;
2352
int target_old_ip = this_old_ip + (jump_count_compacted / 2);
2354
int this_compacted_count = compacted_counts[this_old_ip];
2355
int target_compacted_count = compacted_counts[target_old_ip];
2357
jump_count_compacted -= (target_compacted_count - this_compacted_count);
2358
brw_inst_set_gfx4_jump_count(devinfo, insn, jump_count_compacted >> shift);
2362
compaction_state_init(struct compaction_state *c,
2363
const struct intel_device_info *devinfo)
2365
assert(g45_control_index_table[ARRAY_SIZE(g45_control_index_table) - 1] != 0);
2366
assert(g45_datatype_table[ARRAY_SIZE(g45_datatype_table) - 1] != 0);
2367
assert(g45_subreg_table[ARRAY_SIZE(g45_subreg_table) - 1] != 0);
2368
assert(g45_src_index_table[ARRAY_SIZE(g45_src_index_table) - 1] != 0);
2369
assert(gfx6_control_index_table[ARRAY_SIZE(gfx6_control_index_table) - 1] != 0);
2370
assert(gfx6_datatype_table[ARRAY_SIZE(gfx6_datatype_table) - 1] != 0);
2371
assert(gfx6_subreg_table[ARRAY_SIZE(gfx6_subreg_table) - 1] != 0);
2372
assert(gfx6_src_index_table[ARRAY_SIZE(gfx6_src_index_table) - 1] != 0);
2373
assert(gfx7_control_index_table[ARRAY_SIZE(gfx7_control_index_table) - 1] != 0);
2374
assert(gfx7_datatype_table[ARRAY_SIZE(gfx7_datatype_table) - 1] != 0);
2375
assert(gfx7_subreg_table[ARRAY_SIZE(gfx7_subreg_table) - 1] != 0);
2376
assert(gfx7_src_index_table[ARRAY_SIZE(gfx7_src_index_table) - 1] != 0);
2377
assert(gfx8_control_index_table[ARRAY_SIZE(gfx8_control_index_table) - 1] != 0);
2378
assert(gfx8_datatype_table[ARRAY_SIZE(gfx8_datatype_table) - 1] != 0);
2379
assert(gfx8_subreg_table[ARRAY_SIZE(gfx8_subreg_table) - 1] != 0);
2380
assert(gfx8_src_index_table[ARRAY_SIZE(gfx8_src_index_table) - 1] != 0);
2381
assert(gfx11_datatype_table[ARRAY_SIZE(gfx11_datatype_table) - 1] != 0);
2382
assert(gfx12_control_index_table[ARRAY_SIZE(gfx12_control_index_table) - 1] != 0);
2383
assert(gfx12_datatype_table[ARRAY_SIZE(gfx12_datatype_table) - 1] != 0);
2384
assert(gfx12_subreg_table[ARRAY_SIZE(gfx12_subreg_table) - 1] != 0);
2385
assert(gfx12_src0_index_table[ARRAY_SIZE(gfx12_src0_index_table) - 1] != 0);
2386
assert(gfx12_src1_index_table[ARRAY_SIZE(gfx12_src1_index_table) - 1] != 0);
2387
assert(xehp_src0_index_table[ARRAY_SIZE(xehp_src0_index_table) - 1] != 0);
2388
assert(xehp_src1_index_table[ARRAY_SIZE(xehp_src1_index_table) - 1] != 0);
2390
c->devinfo = devinfo;
2391
switch (devinfo->ver) {
2393
c->control_index_table = gfx12_control_index_table;;
2394
c->datatype_table = gfx12_datatype_table;
2395
c->subreg_table = gfx12_subreg_table;
2396
if (devinfo->verx10 >= 125) {
2397
c->src0_index_table = xehp_src0_index_table;
2398
c->src1_index_table = xehp_src1_index_table;
2400
c->src0_index_table = gfx12_src0_index_table;
2401
c->src1_index_table = gfx12_src1_index_table;
2405
c->control_index_table = gfx8_control_index_table;
2406
c->datatype_table = gfx11_datatype_table;
2407
c->subreg_table = gfx8_subreg_table;
2408
c->src0_index_table = gfx8_src_index_table;
2409
c->src1_index_table = gfx8_src_index_table;
2413
c->control_index_table = gfx8_control_index_table;
2414
c->datatype_table = gfx8_datatype_table;
2415
c->subreg_table = gfx8_subreg_table;
2416
c->src0_index_table = gfx8_src_index_table;
2417
c->src1_index_table = gfx8_src_index_table;
2420
c->control_index_table = gfx7_control_index_table;
2421
c->datatype_table = gfx7_datatype_table;
2422
c->subreg_table = gfx7_subreg_table;
2423
c->src0_index_table = gfx7_src_index_table;
2424
c->src1_index_table = gfx7_src_index_table;
2427
c->control_index_table = gfx6_control_index_table;
2428
c->datatype_table = gfx6_datatype_table;
2429
c->subreg_table = gfx6_subreg_table;
2430
c->src0_index_table = gfx6_src_index_table;
2431
c->src1_index_table = gfx6_src_index_table;
2435
c->control_index_table = g45_control_index_table;
2436
c->datatype_table = g45_datatype_table;
2437
c->subreg_table = g45_subreg_table;
2438
c->src0_index_table = g45_src_index_table;
2439
c->src1_index_table = g45_src_index_table;
2442
unreachable("unknown generation");
2447
brw_compact_instructions(struct brw_codegen *p, int start_offset,
2448
struct disasm_info *disasm)
2450
if (INTEL_DEBUG(DEBUG_NO_COMPACTION))
2453
const struct intel_device_info *devinfo = p->devinfo;
2454
void *store = p->store + start_offset / 16;
2455
/* For an instruction at byte offset 16*i before compaction, this is the
2456
* number of compacted instructions minus the number of padding NOP/NENOPs
2459
int compacted_counts[(p->next_insn_offset - start_offset) / sizeof(brw_inst)];
2460
/* For an instruction at byte offset 8*i after compaction, this was its IP
2461
* (in 16-byte units) before compaction.
2463
int old_ip[(p->next_insn_offset - start_offset) / sizeof(brw_compact_inst) + 1];
2465
if (devinfo->ver == 4 && devinfo->platform != INTEL_PLATFORM_G4X)
2468
struct compaction_state c;
2469
compaction_state_init(&c, devinfo);
2472
int compacted_count = 0;
2473
for (int src_offset = 0; src_offset < p->next_insn_offset - start_offset;
2474
src_offset += sizeof(brw_inst)) {
2475
brw_inst *src = store + src_offset;
2476
void *dst = store + offset;
2478
old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst);
2479
compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count;
2481
brw_inst inst = precompact(devinfo, *src);
2482
brw_inst saved = inst;
2484
if (try_compact_instruction(&c, dst, &inst)) {
2487
if (INTEL_DEBUG(DEBUG_ANY)) {
2488
brw_inst uncompacted;
2489
uncompact_instruction(&c, &uncompacted, dst);
2490
if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) {
2491
brw_debug_compact_uncompact(devinfo, &saved, &uncompacted);
2495
offset += sizeof(brw_compact_inst);
2497
/* All uncompacted instructions need to be aligned on G45. */
2498
if ((offset & sizeof(brw_compact_inst)) != 0 &&
2499
devinfo->platform == INTEL_PLATFORM_G4X) {
2500
brw_compact_inst *align = store + offset;
2501
memset(align, 0, sizeof(*align));
2502
brw_compact_inst_set_hw_opcode(
2503
devinfo, align, brw_opcode_encode(devinfo, BRW_OPCODE_NENOP));
2504
brw_compact_inst_set_cmpt_control(devinfo, align, true);
2505
offset += sizeof(brw_compact_inst);
2507
compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count;
2508
old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst);
2510
dst = store + offset;
2513
/* If we didn't compact this intruction, we need to move it down into
2516
if (offset != src_offset) {
2517
memmove(dst, src, sizeof(brw_inst));
2519
offset += sizeof(brw_inst);
2523
/* Add an entry for the ending offset of the program. This greatly
2524
* simplifies the linked list walk at the end of the function.
2526
old_ip[offset / sizeof(brw_compact_inst)] =
2527
(p->next_insn_offset - start_offset) / sizeof(brw_inst);
2529
/* Fix up control flow offsets. */
2530
p->next_insn_offset = start_offset + offset;
2531
for (offset = 0; offset < p->next_insn_offset - start_offset;
2532
offset = next_offset(devinfo, store, offset)) {
2533
brw_inst *insn = store + offset;
2534
int this_old_ip = old_ip[offset / sizeof(brw_compact_inst)];
2535
int this_compacted_count = compacted_counts[this_old_ip];
2537
switch (brw_inst_opcode(devinfo, insn)) {
2538
case BRW_OPCODE_BREAK:
2539
case BRW_OPCODE_CONTINUE:
2540
case BRW_OPCODE_HALT:
2541
if (devinfo->ver >= 6) {
2542
update_uip_jip(devinfo, insn, this_old_ip, compacted_counts);
2544
update_gfx4_jump_count(devinfo, insn, this_old_ip,
2550
case BRW_OPCODE_IFF:
2551
case BRW_OPCODE_ELSE:
2552
case BRW_OPCODE_ENDIF:
2553
case BRW_OPCODE_WHILE:
2554
if (devinfo->ver >= 7) {
2555
if (brw_inst_cmpt_control(devinfo, insn)) {
2556
brw_inst uncompacted;
2557
uncompact_instruction(&c, &uncompacted,
2558
(brw_compact_inst *)insn);
2560
update_uip_jip(devinfo, &uncompacted, this_old_ip,
2563
bool ret = try_compact_instruction(&c, (brw_compact_inst *)insn,
2565
assert(ret); (void)ret;
2567
update_uip_jip(devinfo, insn, this_old_ip, compacted_counts);
2569
} else if (devinfo->ver == 6) {
2570
assert(!brw_inst_cmpt_control(devinfo, insn));
2572
/* Jump Count is in units of compacted instructions on Gfx6. */
2573
int jump_count_compacted = brw_inst_gfx6_jump_count(devinfo, insn);
2575
int target_old_ip = this_old_ip + (jump_count_compacted / 2);
2576
int target_compacted_count = compacted_counts[target_old_ip];
2577
jump_count_compacted -= (target_compacted_count - this_compacted_count);
2578
brw_inst_set_gfx6_jump_count(devinfo, insn, jump_count_compacted);
2580
update_gfx4_jump_count(devinfo, insn, this_old_ip,
2585
case BRW_OPCODE_ADD:
2586
/* Add instructions modifying the IP register use an immediate src1,
2587
* and Gens that use this cannot compact instructions with immediate
2590
if (brw_inst_cmpt_control(devinfo, insn))
2593
if (brw_inst_dst_reg_file(devinfo, insn) == BRW_ARCHITECTURE_REGISTER_FILE &&
2594
brw_inst_dst_da_reg_nr(devinfo, insn) == BRW_ARF_IP) {
2595
assert(brw_inst_src1_reg_file(devinfo, insn) == BRW_IMMEDIATE_VALUE);
2598
int jump_compacted = brw_inst_imm_d(devinfo, insn) >> shift;
2600
int target_old_ip = this_old_ip + (jump_compacted / 2);
2601
int target_compacted_count = compacted_counts[target_old_ip];
2602
jump_compacted -= (target_compacted_count - this_compacted_count);
2603
brw_inst_set_imm_ud(devinfo, insn, jump_compacted << shift);
2612
/* p->nr_insn is counting the number of uncompacted instructions still, so
2613
* divide. We do want to be sure there's a valid instruction in any
2614
* alignment padding, so that the next compression pass (for the FS 8/16
2615
* compile passes) parses correctly.
2617
if (p->next_insn_offset & sizeof(brw_compact_inst)) {
2618
brw_compact_inst *align = store + offset;
2619
memset(align, 0, sizeof(*align));
2620
brw_compact_inst_set_hw_opcode(
2621
devinfo, align, brw_opcode_encode(devinfo, BRW_OPCODE_NOP));
2622
brw_compact_inst_set_cmpt_control(devinfo, align, true);
2623
p->next_insn_offset += sizeof(brw_compact_inst);
2625
p->nr_insn = p->next_insn_offset / sizeof(brw_inst);
2627
for (int i = 0; i < p->num_relocs; i++) {
2628
if (p->relocs[i].offset < (uint32_t)start_offset)
2631
assert(p->relocs[i].offset % 16 == 0);
2632
unsigned idx = (p->relocs[i].offset - start_offset) / 16;
2633
p->relocs[i].offset -= compacted_counts[idx] * 8;
2636
/* Update the instruction offsets for each group. */
2640
foreach_list_typed(struct inst_group, group, link, &disasm->group_list) {
2641
while (start_offset + old_ip[offset / sizeof(brw_compact_inst)] *
2642
sizeof(brw_inst) != group->offset) {
2643
assert(start_offset + old_ip[offset / sizeof(brw_compact_inst)] *
2644
sizeof(brw_inst) < group->offset);
2645
offset = next_offset(devinfo, store, offset);
2648
group->offset = start_offset + offset;
2650
offset = next_offset(devinfo, store, offset);