2
Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io>
4
Permission is hereby granted, free of charge, to any person obtaining a
5
copy of this software and associated documentation files (the "Software"),
6
to deal in the Software without restriction, including without limitation
7
the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
and/or sell copies of the Software, and to permit persons to whom the
9
Software is furnished to do so, subject to the following conditions:
11
The above copyright notice and this permission notice (including the next
12
paragraph) shall be included in all copies or substantial portions of the
15
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29
def __init__(self, name, dests, srcs, imms, is_float, can_eliminate, encoding_16, encoding_32):
34
self.is_float = is_float
35
self.can_eliminate = can_eliminate
36
self.encoding_16 = encoding_16
37
self.encoding_32 = encoding_32
39
class Immediate(object):
40
def __init__(self, name, ctype):
44
class Encoding(object):
45
def __init__(self, description):
46
(exact, mask, length_short, length_long) = description
49
if length_long is None:
50
length_long = length_short
54
self.length_short = length_short
55
self.extensible = length_short != length_long
58
assert(length_long == length_short + (4 if length_short > 8 else 2))
60
def op(name, encoding_32, dests = 1, srcs = 0, imms = [], is_float = False, can_eliminate = True, encoding_16 = None):
61
encoding_16 = Encoding(encoding_16) if encoding_16 is not None else None
62
encoding_32 = Encoding(encoding_32) if encoding_32 is not None else None
64
opcodes[name] = Opcode(name, dests, srcs, imms, is_float, can_eliminate, encoding_16, encoding_32)
66
def immediate(name, ctype = "uint32_t"):
67
imm = Immediate(name, ctype)
68
immediates[name] = imm
71
def enum(name, value_dict):
72
enums[name] = value_dict
73
return immediate(name, "enum agx_" + name)
78
FORMAT = immediate("format", "enum agx_format")
79
IMM = immediate("imm")
80
WRITEOUT = immediate("writeout")
81
INDEX = immediate("index")
82
COMPONENT = immediate("component")
83
CHANNELS = immediate("channels")
84
TRUTH_TABLE = immediate("truth_table")
85
ROUND = immediate("round")
86
SHIFT = immediate("shift")
87
MASK = immediate("mask")
88
BFI_MASK = immediate("bfi_mask")
89
LOD_MODE = immediate("lod_mode", "enum agx_lod_mode")
90
DIM = immediate("dim", "enum agx_dim")
91
SCOREBOARD = immediate("scoreboard")
92
ICOND = immediate("icond")
93
FCOND = immediate("fcond")
94
NEST = immediate("nest")
95
INVERT_COND = immediate("invert_cond")
96
NEST = immediate("nest")
97
TARGET = immediate("target", "agx_block *")
98
PERSPECTIVE = immediate("perspective", "bool")
100
0: 'threadgroup_position_in_grid.x',
101
1: 'threadgroup_position_in_grid.y',
102
2: 'threadgroup_position_in_grid.z',
103
4: 'threads_per_threadgroup.x',
104
5: 'threads_per_threadgroup.y',
105
6: 'threads_per_threadgroup.z',
106
8: 'dispatch_threads_per_threadgroup.x',
107
9: 'dispatch_threads_per_threadgroup.y',
108
10: 'dispatch_threads_per_threadgroup.z',
109
48: 'thread_position_in_threadgroup.x',
110
49: 'thread_position_in_threadgroup.y',
111
50: 'thread_position_in_threadgroup.z',
112
51: 'thread_index_in_threadgroup',
113
52: 'thread_index_in_subgroup',
114
53: 'subgroup_index_in_threadgroup',
115
56: 'active_thread_index_in_quad',
116
58: 'active_thread_index_in_subgroup',
118
80: 'thread_position_in_grid.x',
119
81: 'thread_position_in_grid.y',
120
82: 'thread_position_in_grid.z',
123
FUNOP = lambda x: (x << 28)
124
FUNOP_MASK = FUNOP((1 << 14) - 1)
126
def funop(name, opcode):
127
op(name, (0x0A | L | (opcode << 28),
128
0x3F | L | (((1 << 14) - 1) << 28), 6, _),
129
srcs = 1, is_float = True)
132
funop("floor", 0b000000)
133
funop("srsqrt", 0b000001)
134
funop("dfdx", 0b000100)
135
funop("dfdy", 0b000110)
136
funop("rcp", 0b001000)
137
funop("rsqrt", 0b001001)
138
funop("sin_pt_1", 0b001010)
139
funop("log2", 0b001100)
140
funop("exp2", 0b001101)
141
funop("sin_pt_2", 0b001110)
142
funop("ceil", 0b010000)
143
funop("trunc", 0b100000)
144
funop("roundeven", 0b110000)
147
encoding_16 = (0x26 | L, 0x3F | L, 6, _),
148
encoding_32 = (0x2A | L, 0x3F | L, 6, _),
149
srcs = 2, is_float = True)
152
encoding_16 = (0x36, 0x3F, 6, 8),
153
encoding_32 = (0x3A, 0x3F, 6, 8),
154
srcs = 3, is_float = True)
157
encoding_16 = ((0x16 | L), (0x3F | L), 6, _),
158
encoding_32 = ((0x1A | L), (0x3F | L), 6, _),
159
srcs = 2, is_float = True)
162
encoding_32 = (0x62, 0xFF, 6, 8),
163
encoding_16 = (0x62, 0xFF, 4, 6),
167
encoding_32 = (0x0E, 0x3F | L, 8, _),
168
srcs = 2, imms = [SHIFT])
171
encoding_32 = (0x1E, 0x3F | L, 8, _),
172
srcs = 3, imms = [SHIFT])
175
encoding_32 = (0x2E, 0x7F | (0x3 << 26), 8, _),
176
srcs = 3, imms = [BFI_MASK])
179
encoding_32 = (0x2E | L, 0x7F | L | (0x3 << 26), 8, _),
180
srcs = 3, imms = [BFI_MASK])
183
encoding_32 = (0x2E | L | (0x1 << 26), 0x7F | L | (0x3 << 26), 8, _),
187
encoding_32 = (0x12, 0x7F, 8, 10),
188
srcs = 4, imms = [ICOND])
191
encoding_32 = (0x02, 0x7F, 8, 10),
192
srcs = 4, imms = [FCOND])
194
# sources are coordinates, LOD, texture, sampler, offset
195
# TODO: anything else?
197
encoding_32 = (0x32, 0x7F, 8, 10), # XXX WRONG SIZE
198
srcs = 5, imms = [DIM, LOD_MODE, MASK, SCOREBOARD])
200
# sources are base, index
202
encoding_32 = (0x05, 0x7F, 6, 8),
203
srcs = 2, imms = [FORMAT, MASK, SCOREBOARD])
205
op("wait", (0x38, 0xFF, 2, _), dests = 0,
206
can_eliminate = False, imms = [SCOREBOARD])
208
op("get_sr", (0x72, 0x7F | L, 4, _), dests = 1, imms = [SR])
210
op("sample_mask", (0x7fc1, 0xffff, 6, _), dests = 0, srcs = 1, can_eliminate = False)
212
# Essentially same encoding
213
op("ld_tile", (0x49, 0x7F, 8, _), dests = 1, srcs = 0,
214
can_eliminate = False, imms = [FORMAT])
216
op("st_tile", (0x09, 0x7F, 8, _), dests = 0, srcs = 1,
217
can_eliminate = False, imms = [FORMAT])
219
for (name, exact) in [("any", 0xC000), ("none", 0xC200)]:
220
op("jmp_exec_" + name, (exact, (1 << 16) - 1, 6, _), dests = 0, srcs = 0,
221
can_eliminate = False, imms = [TARGET])
223
# TODO: model implicit r0l destinations
224
op("pop_exec", (0x52 | (0x3 << 9), ((1 << 48) - 1) ^ (0x3 << 7) ^ (0x3 << 11), 6, _),
225
dests = 0, srcs = 0, can_eliminate = False, imms = [NEST])
227
for is_float in [False, True]:
228
mod_mask = 0 if is_float else (0x3 << 26) | (0x3 << 38)
230
for (cf, cf_op) in [("if", 0), ("else", 1), ("while", 2)]:
231
name = "{}_{}cmp".format(cf, "f" if is_float else "i")
232
exact = 0x42 | (0x0 if is_float else 0x10) | (cf_op << 9)
233
mask = 0x7F | (0x3 << 9) | mod_mask | (0x3 << 44)
234
imms = [NEST, FCOND if is_float else ICOND, INVERT_COND]
236
op(name, (exact, mask, 6, _), dests = 0, srcs = 2, can_eliminate = False,
237
imms = imms, is_float = is_float)
239
op("bitop", (0x7E, 0x7F, 6, _), srcs = 2, imms = [TRUTH_TABLE])
240
op("convert", (0x3E | L, 0x7F | L | (0x3 << 38), 6, _), srcs = 2, imms = [ROUND])
241
op("ld_vary", (0x21, 0xBF, 8, _), srcs = 1, imms = [CHANNELS, PERSPECTIVE])
242
op("ld_vary_flat", (0xA1, 0xBF, 8, _), srcs = 1, imms = [CHANNELS])
243
op("st_vary", None, dests = 0, srcs = 2, can_eliminate = False)
244
op("stop", (0x88, 0xFFFF, 2, _), dests = 0, can_eliminate = False)
245
op("trap", (0x08, 0xFFFF, 2, _), dests = 0, can_eliminate = False)
246
op("writeout", (0x48, 0xFF, 4, _), dests = 0, imms = [WRITEOUT], can_eliminate = False)
248
op("p_combine", _, srcs = 4)
249
op("p_extract", _, srcs = 1, imms = [COMPONENT])