58
58
.. opcode:: ARL - Address Register Load
62
dst.x = (int) \lfloor src.x\rfloor
64
dst.y = (int) \lfloor src.y\rfloor
66
dst.z = (int) \lfloor src.z\rfloor
68
dst.w = (int) \lfloor src.w\rfloor
62
dst.x = (int) \lfloor src.x\rfloor
64
dst.y = (int) \lfloor src.y\rfloor
66
dst.z = (int) \lfloor src.z\rfloor
68
dst.w = (int) \lfloor src.w\rfloor
71
71
.. opcode:: MOV - Move
84
84
.. opcode:: LIT - Light Coefficients
89
dst.y &= max(src.x, 0) \\
90
dst.z &= (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0 \\
95
max(src.y, 0)^{clamp(src.w, -128, 128)} & \quad \textrm{if } src.x \gt 0 \\
96
0 & \quad \textrm{otherwise}
94
103
.. opcode:: RCP - Reciprocal
96
This instruction replicates its result.
100
dst = \frac{1}{src.x}
105
This instruction replicates its result.
109
dst = \frac{1}{src.x}
103
112
.. opcode:: RSQ - Reciprocal Square Root
105
This instruction replicates its result. The results are undefined for *src* <= 0.
109
dst = \frac{1}{\sqrt{src.x}}
114
This instruction replicates its result. The results are undefined for *src* <= 0.
118
dst = \frac{1}{\sqrt{src.x}}
112
121
.. opcode:: SQRT - Square Root
114
This instruction replicates its result. The results are undefined for *src* < 0.
123
This instruction replicates its result. The results are undefined for *src* < 0.
121
130
.. opcode:: EXP - Approximate Exponential Base 2
125
dst.x &= 2^{\lfloor src.x\rfloor} \\
126
dst.y &= src.x - \lfloor src.x\rfloor \\
127
dst.z &= 2^{src.x} \\
134
dst.x &= 2^{\lfloor src.x\rfloor} \\
135
dst.y &= src.x - \lfloor src.x\rfloor \\
136
dst.z &= 2^{src.x} \\
131
140
.. opcode:: LOG - Approximate Logarithm Base 2
135
dst.x &= \lfloor\log_2{|src.x|}\rfloor \\
136
dst.y &= \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}} \\
137
dst.z &= \log_2{|src.x|} \\
144
dst.x &= \lfloor\log_2{|src.x|}\rfloor \\
145
dst.y &= \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}} \\
146
dst.z &= \log_2{|src.x|} \\
141
150
.. opcode:: MUL - Multiply
145
dst.x = src0.x \times src1.x
147
dst.y = src0.y \times src1.y
149
dst.z = src0.z \times src1.z
151
dst.w = src0.w \times src1.w
154
dst.x = src0.x \times src1.x
156
dst.y = src0.y \times src1.y
158
dst.z = src0.z \times src1.z
160
dst.w = src0.w \times src1.w
154
163
.. opcode:: ADD - Add
158
dst.x = src0.x + src1.x
160
dst.y = src0.y + src1.y
162
dst.z = src0.z + src1.z
164
dst.w = src0.w + src1.w
167
dst.x = src0.x + src1.x
169
dst.y = src0.y + src1.y
171
dst.z = src0.z + src1.z
173
dst.w = src0.w + src1.w
167
176
.. opcode:: DP3 - 3-component Dot Product
169
This instruction replicates its result.
173
dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
178
This instruction replicates its result.
183
dst = & src0.x \times src1.x +\\
184
& src0.y \times src1.y +\\
185
& src0.z \times src1.z
176
189
.. opcode:: DP4 - 4-component Dot Product
178
This instruction replicates its result.
182
dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
191
This instruction replicates its result.
196
dst = & src0.x \times src1.x +\\
197
& src0.y \times src1.y +\\
198
& src0.z \times src1.z +\\
199
& src0.w \times src1.w
185
203
.. opcode:: DST - Distance Vector
190
dst.y &= src0.y \times src1.y\\
208
dst.y &= src0.y \times src1.y\\
195
213
.. opcode:: MIN - Minimum
199
dst.x = min(src0.x, src1.x)
201
dst.y = min(src0.y, src1.y)
203
dst.z = min(src0.z, src1.z)
205
dst.w = min(src0.w, src1.w)
217
dst.x = min(src0.x, src1.x)
219
dst.y = min(src0.y, src1.y)
221
dst.z = min(src0.z, src1.z)
223
dst.w = min(src0.w, src1.w)
208
226
.. opcode:: MAX - Maximum
212
dst.x = max(src0.x, src1.x)
214
dst.y = max(src0.y, src1.y)
216
dst.z = max(src0.z, src1.z)
218
dst.w = max(src0.w, src1.w)
230
dst.x = max(src0.x, src1.x)
232
dst.y = max(src0.y, src1.y)
234
dst.z = max(src0.z, src1.z)
236
dst.w = max(src0.w, src1.w)
221
239
.. opcode:: SLT - Set On Less Than
225
dst.x = (src0.x < src1.x) ? 1.0F : 0.0F
227
dst.y = (src0.y < src1.y) ? 1.0F : 0.0F
229
dst.z = (src0.z < src1.z) ? 1.0F : 0.0F
231
dst.w = (src0.w < src1.w) ? 1.0F : 0.0F
243
dst.x = (src0.x < src1.x) ? 1.0F : 0.0F
245
dst.y = (src0.y < src1.y) ? 1.0F : 0.0F
247
dst.z = (src0.z < src1.z) ? 1.0F : 0.0F
249
dst.w = (src0.w < src1.w) ? 1.0F : 0.0F
234
252
.. opcode:: SGE - Set On Greater Equal Than
238
dst.x = (src0.x >= src1.x) ? 1.0F : 0.0F
240
dst.y = (src0.y >= src1.y) ? 1.0F : 0.0F
242
dst.z = (src0.z >= src1.z) ? 1.0F : 0.0F
244
dst.w = (src0.w >= src1.w) ? 1.0F : 0.0F
256
dst.x = (src0.x >= src1.x) ? 1.0F : 0.0F
258
dst.y = (src0.y >= src1.y) ? 1.0F : 0.0F
260
dst.z = (src0.z >= src1.z) ? 1.0F : 0.0F
262
dst.w = (src0.w >= src1.w) ? 1.0F : 0.0F
247
265
.. opcode:: MAD - Multiply And Add
249
Perform a * b + c. The implementation is free to decide whether there is an
250
intermediate rounding step or not.
254
dst.x = src0.x \times src1.x + src2.x
256
dst.y = src0.y \times src1.y + src2.y
258
dst.z = src0.z \times src1.z + src2.z
260
dst.w = src0.w \times src1.w + src2.w
267
Perform a * b + c. The implementation is free to decide whether there is an
268
intermediate rounding step or not.
272
dst.x = src0.x \times src1.x + src2.x
274
dst.y = src0.y \times src1.y + src2.y
276
dst.z = src0.z \times src1.z + src2.z
278
dst.w = src0.w \times src1.w + src2.w
263
281
.. opcode:: LRP - Linear Interpolate
267
dst.x = src0.x \times src1.x + (1 - src0.x) \times src2.x
269
dst.y = src0.y \times src1.y + (1 - src0.y) \times src2.y
271
dst.z = src0.z \times src1.z + (1 - src0.z) \times src2.z
273
dst.w = src0.w \times src1.w + (1 - src0.w) \times src2.w
285
dst.x = src0.x \times src1.x + (1 - src0.x) \times src2.x
287
dst.y = src0.y \times src1.y + (1 - src0.y) \times src2.y
289
dst.z = src0.z \times src1.z + (1 - src0.z) \times src2.z
291
dst.w = src0.w \times src1.w + (1 - src0.w) \times src2.w
276
294
.. opcode:: FMA - Fused Multiply-Add
278
Perform a * b + c with no intermediate rounding step.
282
dst.x = src0.x \times src1.x + src2.x
284
dst.y = src0.y \times src1.y + src2.y
286
dst.z = src0.z \times src1.z + src2.z
288
dst.w = src0.w \times src1.w + src2.w
296
Perform a * b + c with no intermediate rounding step.
300
dst.x = src0.x \times src1.x + src2.x
302
dst.y = src0.y \times src1.y + src2.y
304
dst.z = src0.z \times src1.z + src2.z
306
dst.w = src0.w \times src1.w + src2.w
291
309
.. opcode:: FRC - Fraction
295
dst.x = src.x - \lfloor src.x\rfloor
297
dst.y = src.y - \lfloor src.y\rfloor
299
dst.z = src.z - \lfloor src.z\rfloor
301
dst.w = src.w - \lfloor src.w\rfloor
313
dst.x = src.x - \lfloor src.x\rfloor
315
dst.y = src.y - \lfloor src.y\rfloor
317
dst.z = src.z - \lfloor src.z\rfloor
319
dst.w = src.w - \lfloor src.w\rfloor
304
322
.. opcode:: FLR - Floor
308
dst.x = \lfloor src.x\rfloor
310
dst.y = \lfloor src.y\rfloor
312
dst.z = \lfloor src.z\rfloor
314
dst.w = \lfloor src.w\rfloor
326
dst.x = \lfloor src.x\rfloor
328
dst.y = \lfloor src.y\rfloor
330
dst.z = \lfloor src.z\rfloor
332
dst.w = \lfloor src.w\rfloor
317
335
.. opcode:: ROUND - Round
330
348
.. opcode:: EX2 - Exponential Base 2
332
This instruction replicates its result.
350
This instruction replicates its result.
339
357
.. opcode:: LG2 - Logarithm Base 2
341
This instruction replicates its result.
359
This instruction replicates its result.
348
366
.. opcode:: POW - Power
350
This instruction replicates its result.
354
dst = src0.x^{src1.x}
368
This instruction replicates its result.
372
dst = src0.x^{src1.x}
357
375
.. opcode:: LDEXP - Multiply Number by Integral Power of 2
359
*src1* is an integer.
363
dst.x = src0.x * 2^{src1.x}
364
dst.y = src0.y * 2^{src1.y}
365
dst.z = src0.z * 2^{src1.z}
366
dst.w = src0.w * 2^{src1.w}
377
*src1* is an integer.
381
dst.x = src0.x * 2^{src1.x}
383
dst.y = src0.y * 2^{src1.y}
385
dst.z = src0.z * 2^{src1.z}
387
dst.w = src0.w * 2^{src1.w}
369
390
.. opcode:: COS - Cosine
371
This instruction replicates its result.
392
This instruction replicates its result.
378
399
.. opcode:: DDX, DDX_FINE - Derivative Relative To X
380
The fine variant is only used when ``PIPE_CAP_FS_FINE_DERIVATIVE`` is
381
advertised. When it is, the fine version guarantees one derivative per row
382
while DDX is allowed to be the same for the entire 2x2 quad.
386
dst.x = partialx(src.x)
388
dst.y = partialx(src.y)
390
dst.z = partialx(src.z)
392
dst.w = partialx(src.w)
401
The fine variant is only used when ``PIPE_CAP_FS_FINE_DERIVATIVE`` is
402
advertised. When it is, the fine version guarantees one derivative per
403
row while DDX is allowed to be the same for the entire 2x2 quad.
407
dst.x = partialx(src.x)
409
dst.y = partialx(src.y)
411
dst.z = partialx(src.z)
413
dst.w = partialx(src.w)
395
416
.. opcode:: DDY, DDY_FINE - Derivative Relative To Y
397
The fine variant is only used when ``PIPE_CAP_FS_FINE_DERIVATIVE`` is
398
advertised. When it is, the fine version guarantees one derivative per column
399
while DDY is allowed to be the same for the entire 2x2 quad.
403
dst.x = partialy(src.x)
405
dst.y = partialy(src.y)
407
dst.z = partialy(src.z)
409
dst.w = partialy(src.w)
418
The fine variant is only used when ``PIPE_CAP_FS_FINE_DERIVATIVE`` is
419
advertised. When it is, the fine version guarantees one derivative per
420
column while DDY is allowed to be the same for the entire 2x2 quad.
424
dst.x = partialy(src.x)
426
dst.y = partialy(src.y)
428
dst.z = partialy(src.z)
430
dst.w = partialy(src.w)
412
433
.. opcode:: PK2H - Pack Two 16-bit Floats
414
This instruction replicates its result.
418
dst = f32\_to\_f16(src.x) | f32\_to\_f16(src.y) << 16
435
This instruction replicates its result.
440
dst = & f32\_to\_f16(src.x) | \\
441
( & f32\_to\_f16(src.y) \ll 16)
421
444
.. opcode:: PK2US - Pack Two Unsigned 16-bit Scalars
423
This instruction replicates its result.
427
dst = f32\_to\_unorm16(src.x) | f32\_to\_unorm16(src.y) << 16
446
This instruction replicates its result.
451
dst = & f32\_to\_unorm16(src.x) | \\
452
( & f32\_to\_unorm16(src.y) \ll 16)
430
456
.. opcode:: PK4B - Pack Four Signed 8-bit Scalars
432
This instruction replicates its result.
436
dst = f32\_to\_snorm8(src.x) |
437
(f32\_to\_snorm8(src.y) << 8) |
438
(f32\_to\_snorm8(src.z) << 16) |
439
(f32\_to\_snorm8(src.w) << 24)
458
This instruction replicates its result.
463
dst = & f32\_to\_snorm8(src.x) | \\
464
( & f32\_to\_snorm8(src.y) \ll 8) | \\
465
( & f32\_to\_snorm8(src.z) \ll 16) | \\
466
( & f32\_to\_snorm8(src.w) \ll 24)
442
470
.. opcode:: PK4UB - Pack Four Unsigned 8-bit Scalars
444
This instruction replicates its result.
448
dst = f32\_to\_unorm8(src.x) |
449
(f32\_to\_unorm8(src.y) << 8) |
450
(f32\_to\_unorm8(src.z) << 16) |
451
(f32\_to\_unorm8(src.w) << 24)
472
This instruction replicates its result.
477
dst = & f32\_to\_unorm8(src.x) | \\
478
( & f32\_to\_unorm8(src.y) \ll 8) | \\
479
( & f32\_to\_unorm8(src.z) \ll 16) | \\
480
( & f32\_to\_unorm8(src.w) \ll 24)
454
484
.. opcode:: SEQ - Set On Equal
458
dst.x = (src0.x == src1.x) ? 1.0F : 0.0F
460
dst.y = (src0.y == src1.y) ? 1.0F : 0.0F
462
dst.z = (src0.z == src1.z) ? 1.0F : 0.0F
464
dst.w = (src0.w == src1.w) ? 1.0F : 0.0F
488
dst.x = (src0.x == src1.x) ? 1.0F : 0.0F
490
dst.y = (src0.y == src1.y) ? 1.0F : 0.0F
492
dst.z = (src0.z == src1.z) ? 1.0F : 0.0F
494
dst.w = (src0.w == src1.w) ? 1.0F : 0.0F
467
497
.. opcode:: SGT - Set On Greater Than
471
dst.x = (src0.x > src1.x) ? 1.0F : 0.0F
473
dst.y = (src0.y > src1.y) ? 1.0F : 0.0F
475
dst.z = (src0.z > src1.z) ? 1.0F : 0.0F
477
dst.w = (src0.w > src1.w) ? 1.0F : 0.0F
501
dst.x = (src0.x > src1.x) ? 1.0F : 0.0F
503
dst.y = (src0.y > src1.y) ? 1.0F : 0.0F
505
dst.z = (src0.z > src1.z) ? 1.0F : 0.0F
507
dst.w = (src0.w > src1.w) ? 1.0F : 0.0F
480
510
.. opcode:: SIN - Sine
482
This instruction replicates its result.
512
This instruction replicates its result.
489
519
.. opcode:: SLE - Set On Less Equal Than
493
dst.x = (src0.x <= src1.x) ? 1.0F : 0.0F
495
dst.y = (src0.y <= src1.y) ? 1.0F : 0.0F
497
dst.z = (src0.z <= src1.z) ? 1.0F : 0.0F
499
dst.w = (src0.w <= src1.w) ? 1.0F : 0.0F
523
dst.x = (src0.x <= src1.x) ? 1.0F : 0.0F
525
dst.y = (src0.y <= src1.y) ? 1.0F : 0.0F
527
dst.z = (src0.z <= src1.z) ? 1.0F : 0.0F
529
dst.w = (src0.w <= src1.w) ? 1.0F : 0.0F
502
532
.. opcode:: SNE - Set On Not Equal
506
dst.x = (src0.x != src1.x) ? 1.0F : 0.0F
508
dst.y = (src0.y != src1.y) ? 1.0F : 0.0F
510
dst.z = (src0.z != src1.z) ? 1.0F : 0.0F
512
dst.w = (src0.w != src1.w) ? 1.0F : 0.0F
536
dst.x = (src0.x != src1.x) ? 1.0F : 0.0F
538
dst.y = (src0.y != src1.y) ? 1.0F : 0.0F
540
dst.z = (src0.z != src1.z) ? 1.0F : 0.0F
542
dst.w = (src0.w != src1.w) ? 1.0F : 0.0F
515
545
.. opcode:: TEX - Texture Lookup
517
for array textures *src0.y* contains the slice for 1D,
518
and *src0.z* contain the slice for 2D.
520
for shadow textures with no arrays (and not cube map),
521
*src0.z* contains the reference value.
523
for shadow textures with arrays, *src0.z* contains
524
the reference value for 1D arrays, and *src0.w* contains
525
the reference value for 2D arrays and cube maps.
527
for cube map array shadow textures, the reference value
528
cannot be passed in *src0.w*, and TEX2 must be used instead.
534
shadow_ref = src0.z or src0.w (optional)
538
dst = texture\_sample(unit, coord, shadow_ref)
547
for array textures *src0.y* contains the slice for 1D,
548
and *src0.z* contain the slice for 2D.
550
for shadow textures with no arrays (and not cube map),
551
*src0.z* contains the reference value.
553
for shadow textures with arrays, *src0.z* contains
554
the reference value for 1D arrays, and *src0.w* contains
555
the reference value for 2D arrays and cube maps.
557
for cube map array shadow textures, the reference value
558
cannot be passed in *src0.w*, and TEX2 must be used instead.
564
shadow\_ref = src0.z \textrm{ or } src0.w \textrm{ (optional)}
568
dst = texture\_sample(unit, coord, shadow\_ref)
541
571
.. opcode:: TEX2 - Texture Lookup (for shadow cube map arrays only)
543
this is the same as TEX, but uses another reg to encode the
554
dst = texture\_sample(unit, coord, shadow_ref)
573
this is the same as TEX, but uses another reg to encode the
584
dst = texture\_sample(unit, coord, shadow\_ref)
559
587
.. opcode:: TXD - Texture Lookup with Derivatives
571
dst = texture\_sample\_deriv(unit, coord, ddx, ddy)
599
dst = texture\_sample\_deriv(unit, coord, ddx, ddy)
574
602
.. opcode:: TXP - Projective Texture Lookup
578
coord.x = src0.x / src0.w
580
coord.y = src0.y / src0.w
582
coord.z = src0.z / src0.w
588
dst = texture\_sample(unit, coord)
606
coord.x = src0.x / src0.w
608
coord.y = src0.y / src0.w
610
coord.z = src0.z / src0.w
616
dst = texture\_sample(unit, coord)
591
619
.. opcode:: UP2H - Unpack Two 16-Bit Floats
595
dst.x = f16\_to\_f32(src0.x \& 0xffff)
597
dst.y = f16\_to\_f32(src0.x >> 16)
599
dst.z = f16\_to\_f32(src0.x \& 0xffff)
601
dst.w = f16\_to\_f32(src0.x >> 16)
605
Considered for removal.
623
dst.x = f16\_to\_f32(src0.x \& 0xffff)
625
dst.y = f16\_to\_f32(src0.x \gg 16)
627
dst.z = f16\_to\_f32(src0.x \& 0xffff)
629
dst.w = f16\_to\_f32(src0.x \gg 16)
633
Considered for removal.
607
635
.. opcode:: UP2US - Unpack Two Unsigned 16-Bit Scalars
613
Considered for removal.
641
Considered for removal.
615
643
.. opcode:: UP4B - Unpack Four Signed 8-Bit Values
621
Considered for removal.
649
Considered for removal.
623
651
.. opcode:: UP4UB - Unpack Four Unsigned 8-Bit Scalars
629
Considered for removal.
657
Considered for removal.
632
660
.. opcode:: ARR - Address Register Load With Round
636
dst.x = (int) round(src.x)
638
dst.y = (int) round(src.y)
640
dst.z = (int) round(src.z)
642
dst.w = (int) round(src.w)
664
dst.x = (int) round(src.x)
666
dst.y = (int) round(src.y)
668
dst.z = (int) round(src.z)
670
dst.w = (int) round(src.w)
645
673
.. opcode:: SSG - Set Sign
649
dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0
651
dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0
653
dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0
655
dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0
677
dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0
679
dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0
681
dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0
683
dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0
658
686
.. opcode:: CMP - Compare
662
dst.x = (src0.x < 0) ? src1.x : src2.x
664
dst.y = (src0.y < 0) ? src1.y : src2.y
666
dst.z = (src0.z < 0) ? src1.z : src2.z
668
dst.w = (src0.w < 0) ? src1.w : src2.w
690
dst.x = (src0.x < 0) ? src1.x : src2.x
692
dst.y = (src0.y < 0) ? src1.y : src2.y
694
dst.z = (src0.z < 0) ? src1.z : src2.z
696
dst.w = (src0.w < 0) ? src1.w : src2.w
671
699
.. opcode:: KILL_IF - Conditional Discard
673
Conditional discard. Allowed in fragment shaders only.
677
if (src.x < 0 || src.y < 0 || src.z < 0 || src.w < 0)
701
Conditional discard. Allowed in fragment shaders only.
705
if (src.x < 0 || src.y < 0 || src.z < 0 || src.w < 0)
682
710
.. opcode:: KILL - Discard
684
Unconditional discard. Allowed in fragment shaders only.
712
Unconditional discard. Allowed in fragment shaders only.
687
715
.. opcode:: DEMOTE - Demote Invocation to a Helper
689
This demotes the current invocation to a helper, but continues
690
execution (while KILL may or may not terminate the
691
invocation). After this runs, all the usual helper invocation rules
692
apply about discarding buffer and render target writes. This is
693
useful for having accurate derivatives in the other invocations
694
which have not been demoted.
717
This demotes the current invocation to a helper, but continues
718
execution (while KILL may or may not terminate the
719
invocation). After this runs, all the usual helper invocation rules
720
apply about discarding buffer and render target writes. This is
721
useful for having accurate derivatives in the other invocations
722
which have not been demoted.
696
Allowed in fragment shaders only.
724
Allowed in fragment shaders only.
699
727
.. opcode:: READ_HELPER - Reads Invocation Helper Status
701
This is identical to ``TGSI_SEMANTIC_HELPER_INVOCATION``, except
702
this will read the current value, which might change as a result of
703
a ``DEMOTE`` instruction.
729
This is identical to ``TGSI_SEMANTIC_HELPER_INVOCATION``, except
730
this will read the current value, which might change as a result of
731
a ``DEMOTE`` instruction.
705
Allowed in fragment shaders only.
733
Allowed in fragment shaders only.
708
736
.. opcode:: TXB - Texture Lookup With Bias
710
for cube map array textures and shadow cube maps, the bias value
711
cannot be passed in *src0.w*, and TXB2 must be used instead.
713
if the target is a shadow texture, the reference value is always
714
in *src.z* (this prevents shadow 3d and shadow 2d arrays from
715
using this instruction, but this is not needed).
731
dst = texture\_sample(unit, coord, bias)
738
for cube map array textures and shadow cube maps, the bias value
739
cannot be passed in *src0.w*, and TXB2 must be used instead.
741
if the target is a shadow texture, the reference value is always
742
in *src.z* (this prevents shadow 3d and shadow 2d arrays from
743
using this instruction, but this is not needed).
759
dst = texture\_sample(unit, coord, bias)
734
762
.. opcode:: TXB2 - Texture Lookup With Bias (some cube maps only)
736
this is the same as TXB, but uses another reg to encode the
737
LOD bias value for cube map arrays and shadow cube maps.
738
Presumably shadow 2d arrays and shadow 3d targets could use
739
this encoding too, but this is not legal.
741
if the target is a shadow cube map array, the reference value is in
752
dst = texture\_sample(unit, coord, bias)
764
this is the same as TXB, but uses another reg to encode the
765
LOD bias value for cube map arrays and shadow cube maps.
766
Presumably shadow 2d arrays and shadow 3d targets could use
767
this encoding too, but this is not legal.
769
if the target is a shadow cube map array, the reference value is in
780
dst = texture\_sample(unit, coord, bias)
755
783
.. opcode:: DIV - Divide
759
dst.x = \frac{src0.x}{src1.x}
761
dst.y = \frac{src0.y}{src1.y}
763
dst.z = \frac{src0.z}{src1.z}
765
dst.w = \frac{src0.w}{src1.w}
787
dst.x = \frac{src0.x}{src1.x}
789
dst.y = \frac{src0.y}{src1.y}
791
dst.z = \frac{src0.z}{src1.z}
793
dst.w = \frac{src0.w}{src1.w}
768
796
.. opcode:: DP2 - 2-component Dot Product
770
This instruction replicates its result.
774
dst = src0.x \times src1.x + src0.y \times src1.y
798
This instruction replicates its result.
803
dst = & src0.x \times src1.x + \\
804
& src0.y \times src1.y
777
807
.. opcode:: TEX_LZ - Texture Lookup With LOD = 0
779
This is the same as TXL with LOD = 0. Like every texture opcode, it obeys
780
pipe_sampler_view::u.tex.first_level and pipe_sampler_state::min_lod.
781
There is no way to override those two in shaders.
797
dst = texture\_sample(unit, coord, lod)
809
This is the same as TXL with LOD = 0. Like every texture opcode, it obeys
810
pipe_sampler_view::u.tex.first_level and pipe_sampler_state::min_lod.
811
There is no way to override those two in shaders.
827
dst = texture\_sample(unit, coord, lod)
800
830
.. opcode:: TXL - Texture Lookup With explicit LOD
802
for cube map array textures, the explicit LOD value
803
cannot be passed in *src0.w*, and TXL2 must be used instead.
805
if the target is a shadow texture, the reference value is always
806
in *src.z* (this prevents shadow 3d / 2d array / cube targets from
807
using this instruction, but this is not needed).
823
dst = texture\_sample(unit, coord, lod)
832
for cube map array textures, the explicit LOD value
833
cannot be passed in *src0.w*, and TXL2 must be used instead.
835
if the target is a shadow texture, the reference value is always
836
in *src.z* (this prevents shadow 3d / 2d array / cube targets from
837
using this instruction, but this is not needed).
853
dst = texture\_sample(unit, coord, lod)
826
856
.. opcode:: TXL2 - Texture Lookup With explicit LOD (for cube map arrays only)
828
this is the same as TXL, but uses another reg to encode the
830
Presumably shadow 3d / 2d array / cube targets could use
831
this encoding too, but this is not legal.
833
if the target is a shadow cube map array, the reference value is in
844
dst = texture\_sample(unit, coord, lod)
858
this is the same as TXL, but uses another reg to encode the
860
Presumably shadow 3d / 2d array / cube targets could use
861
this encoding too, but this is not legal.
863
if the target is a shadow cube map array, the reference value is in
874
dst = texture\_sample(unit, coord, lod)
855
885
.. opcode:: CEIL - Ceiling
859
dst.x = \lceil src.x\rceil
861
dst.y = \lceil src.y\rceil
863
dst.z = \lceil src.z\rceil
865
dst.w = \lceil src.w\rceil
889
dst.x = \lceil src.x\rceil
891
dst.y = \lceil src.y\rceil
893
dst.z = \lceil src.z\rceil
895
dst.w = \lceil src.w\rceil
868
898
.. opcode:: TRUNC - Truncate
881
911
.. opcode:: MOD - Modulus
885
dst.x = src0.x \bmod src1.x
887
dst.y = src0.y \bmod src1.y
889
dst.z = src0.z \bmod src1.z
891
dst.w = src0.w \bmod src1.w
915
dst.x = src0.x \bmod src1.x
917
dst.y = src0.y \bmod src1.y
919
dst.z = src0.z \bmod src1.z
921
dst.w = src0.w \bmod src1.w
894
924
.. opcode:: UARL - Integer Address Register Load
896
Moves the contents of the source register, assumed to be an integer, into the
897
destination register, which is assumed to be an address (ADDR) register.
926
Moves the contents of the source register, assumed to be an integer, into the
927
destination register, which is assumed to be an address (ADDR) register.
900
930
.. opcode:: TXF - Texel Fetch
902
As per :ext:`GL_NV_gpu_program4`, extract a single texel from a specified
903
texture image or PIPE_BUFFER resource. The source sampler may not be a
904
CUBE or SHADOW. *src0* is a
905
four-component signed integer vector used to identify the single texel
906
accessed. 3 components + level. If the texture is multisampled, then
907
the fourth component indicates the sample, not the mipmap level.
908
Just like texture instructions, an optional
909
offset vector is provided, which is subject to various driver restrictions
910
(regarding range, source of offsets). This instruction ignores the sampler
932
As per :ext:`GL_NV_gpu_program4`, extract a single texel from a specified
933
texture image or PIPE_BUFFER resource. The source sampler may not be a
934
CUBE or SHADOW. *src0* is a
935
four-component signed integer vector used to identify the single texel
936
accessed. 3 components + level. If the texture is multisampled, then
937
the fourth component indicates the sample, not the mipmap level.
938
Just like texture instructions, an optional
939
offset vector is provided, which is subject to various driver restrictions
940
(regarding range, source of offsets). This instruction ignores the sampler
913
TXF(uint_vec coord, int_vec offset).
943
TXF(uint_vec coord, int_vec offset).
916
946
.. opcode:: TXQ - Texture Size Query
918
As per :ext:`GL_NV_gpu_program4`, retrieve the dimensions of the texture
919
depending on the target. For 1D (width), 2D/RECT/CUBE (width, height),
920
3D (width, height, depth), 1D array (width, layers), 2D array (width,
921
height, layers). Also return the number of accessible levels
922
(last_level - first_level + 1) in W.
924
For components which don't return a resource dimension, their value
931
dst.x = texture\_width(unit, lod)
933
dst.y = texture\_height(unit, lod)
935
dst.z = texture\_depth(unit, lod)
937
dst.w = texture\_levels(unit)
948
As per :ext:`GL_NV_gpu_program4`, retrieve the dimensions of the texture
949
depending on the target. For 1D (width), 2D/RECT/CUBE (width, height),
950
3D (width, height, depth), 1D array (width, layers), 2D array (width,
951
height, layers). Also return the number of accessible levels
952
(last_level - first_level + 1) in W.
954
For components which don't return a resource dimension, their value
961
dst.x = texture\_width(unit, lod)
963
dst.y = texture\_height(unit, lod)
965
dst.z = texture\_depth(unit, lod)
967
dst.w = texture\_levels(unit)
940
970
.. opcode:: TXQS - Texture Samples Query
942
This retrieves the number of samples in the texture, and stores it
943
into the x component as an unsigned integer. The other components are
944
undefined. If the texture is not multisampled, this function returns
945
(1, undef, undef, undef).
949
dst.x = texture\_samples(unit)
972
This retrieves the number of samples in the texture, and stores it
973
into the x component as an unsigned integer. The other components are
974
undefined. If the texture is not multisampled, this function returns
975
(1, undef, undef, undef).
979
dst.x = texture\_samples(unit)
952
982
.. opcode:: TG4 - Texture Gather
954
As per :ext:`GL_ARB_texture_gather`, gathers the four texels to be used in a
955
bi-linear filtering operation and packs them into a single register.
956
Only works with 2D, 2D array, cubemaps, and cubemaps arrays. For 2D
957
textures, only the addressing modes of the sampler and the top level of any
958
mip pyramid are used. Set W to zero. It behaves like the TEX instruction,
959
but a filtered sample is not generated. The four samples that contribute to
960
filtering are placed into XYZW in clockwise order, starting with the (u,v)
961
texture coordinate delta at the following locations (-, +), (+, +), (+, -),
962
(-, -), where the magnitude of the deltas are half a texel.
964
PIPE_CAP_TEXTURE_SM5 enhances this instruction to support shadow per-sample
965
depth compares, single component selection, and a non-constant offset. It
966
doesn't allow support for the GL independent offset to get i0,j0. This would
967
require another CAP is HW can do it natively. For now we lower that before
970
PIPE_CAP_TGSI_TG4_COMPONENT_IN_SWIZZLE changes the encoding so that component
971
is stored in the sampler source swizzle x.
984
As per :ext:`GL_ARB_texture_gather`, gathers the four texels to be used in a
985
bi-linear filtering operation and packs them into a single register.
986
Only works with 2D, 2D array, cubemaps, and cubemaps arrays. For 2D
987
textures, only the addressing modes of the sampler and the top level of any
988
mip pyramid are used. Set W to zero. It behaves like the TEX instruction,
989
but a filtered sample is not generated. The four samples that contribute to
990
filtering are placed into XYZW in clockwise order, starting with the (u,v)
991
texture coordinate delta at the following locations (-, +), (+, +), (+, -),
992
(-, -), where the magnitude of the deltas are half a texel.
994
PIPE_CAP_TEXTURE_SM5 enhances this instruction to support shadow per-sample
995
depth compares, single component selection, and a non-constant offset. It
996
doesn't allow support for the GL independent offset to get i0,j0. This would
997
require another CAP is HW can do it natively. For now we lower that before
1000
PIPE_CAP_TGSI_TG4_COMPONENT_IN_SWIZZLE changes the encoding so that component
1001
is stored in the sampler source swizzle x.
977
1003
(without TGSI_TG4_COMPONENT_IN_SWIZZLE)
980
dst = texture\_gather4 (unit, coord, component)
1011
dst = texture\_gather4 (unit, coord, component)
982
1013
(with TGSI_TG4_COMPONENT_IN_SWIZZLE)
983
dst = texture\_gather4 (unit, coord)
984
component is encoded in sampler swizzle.
986
(with SM5 - cube array shadow)
994
dst = texture\_gather (uint, coord, compare)
1019
dst = texture\_gather4 (unit, coord)
1021
\text{component is encoded in sampler swizzle.}
1023
(with SM5 - cube array shadow)
1031
dst = texture\_gather (uint, coord, compare)
996
1033
.. opcode:: LODQ - level of detail query
1125
1162
This instruction works the same for signed and unsigned integers.
1126
1163
The low 32bit of the result is returned.
1130
dst.x = src0.x \times src1.x
1132
dst.y = src0.y \times src1.y
1134
dst.z = src0.z \times src1.z
1136
dst.w = src0.w \times src1.w
1167
dst.x = src0.x \times src1.x
1169
dst.y = src0.y \times src1.y
1171
dst.z = src0.z \times src1.z
1173
dst.w = src0.w \times src1.w
1139
1176
.. opcode:: IMUL_HI - Signed Integer Multiply High Bits
1141
1178
The high 32bits of the multiplication of 2 signed integers are returned.
1145
dst.x = (src0.x \times src1.x) >> 32
1147
dst.y = (src0.y \times src1.y) >> 32
1149
dst.z = (src0.z \times src1.z) >> 32
1151
dst.w = (src0.w \times src1.w) >> 32
1182
dst.x = (src0.x \times src1.x) \gg 32
1184
dst.y = (src0.y \times src1.y) \gg 32
1186
dst.z = (src0.z \times src1.z) \gg 32
1188
dst.w = (src0.w \times src1.w) \gg 32
1154
1191
.. opcode:: UMUL_HI - Unsigned Integer Multiply High Bits
1156
1193
The high 32bits of the multiplication of 2 unsigned integers are returned.
1160
dst.x = (src0.x \times src1.x) >> 32
1162
dst.y = (src0.y \times src1.y) >> 32
1164
dst.z = (src0.z \times src1.z) >> 32
1166
dst.w = (src0.w \times src1.w) >> 32
1197
dst.x = (src0.x \times src1.x) \gg 32
1199
dst.y = (src0.y \times src1.y) \gg 32
1201
dst.z = (src0.z \times src1.z) \gg 32
1203
dst.w = (src0.w \times src1.w) \gg 32
1169
1206
.. opcode:: IDIV - Signed Integer Division
1171
1208
TBD: behavior for division by zero.
1175
dst.x = \frac{src0.x}{src1.x}
1177
dst.y = \frac{src0.y}{src1.y}
1179
dst.z = \frac{src0.z}{src1.z}
1181
dst.w = \frac{src0.w}{src1.w}
1212
dst.x = \frac{src0.x}{src1.x}
1214
dst.y = \frac{src0.y}{src1.y}
1216
dst.z = \frac{src0.z}{src1.z}
1218
dst.w = \frac{src0.w}{src1.w}
1184
1221
.. opcode:: UDIV - Unsigned Integer Division
1186
1223
For division by zero, ``0xffffffff`` is returned.
1190
dst.x = \frac{src0.x}{src1.x}
1192
dst.y = \frac{src0.y}{src1.y}
1194
dst.z = \frac{src0.z}{src1.z}
1196
dst.w = \frac{src0.w}{src1.w}
1227
dst.x = \frac{src0.x}{src1.x}
1229
dst.y = \frac{src0.y}{src1.y}
1231
dst.z = \frac{src0.z}{src1.z}
1233
dst.w = \frac{src0.w}{src1.w}
1199
1236
.. opcode:: UMOD - Unsigned Integer Remainder
1201
1238
If *src1* is zero, ``0xffffffff`` is returned.
1205
dst.x = src0.x \bmod src1.x
1207
dst.y = src0.y \bmod src1.y
1209
dst.z = src0.z \bmod src1.z
1211
dst.w = src0.w \bmod src1.w
1242
dst.x = src0.x \bmod src1.x
1244
dst.y = src0.y \bmod src1.y
1246
dst.z = src0.z \bmod src1.z
1248
dst.w = src0.w \bmod src1.w
1214
1251
.. opcode:: NOT - Bitwise Not
1227
1264
.. opcode:: AND - Bitwise And
1231
dst.x = src0.x \& src1.x
1233
dst.y = src0.y \& src1.y
1235
dst.z = src0.z \& src1.z
1237
dst.w = src0.w \& src1.w
1268
dst.x = src0.x \& src1.x
1270
dst.y = src0.y \& src1.y
1272
dst.z = src0.z \& src1.z
1274
dst.w = src0.w \& src1.w
1240
1277
.. opcode:: OR - Bitwise Or
1244
dst.x = src0.x | src1.x
1246
dst.y = src0.y | src1.y
1248
dst.z = src0.z | src1.z
1250
dst.w = src0.w | src1.w
1281
dst.x = src0.x | src1.x
1283
dst.y = src0.y | src1.y
1285
dst.z = src0.z | src1.z
1287
dst.w = src0.w | src1.w
1253
1290
.. opcode:: XOR - Bitwise Xor
1257
dst.x = src0.x \oplus src1.x
1259
dst.y = src0.y \oplus src1.y
1261
dst.z = src0.z \oplus src1.z
1263
dst.w = src0.w \oplus src1.w
1294
dst.x = src0.x \oplus src1.x
1296
dst.y = src0.y \oplus src1.y
1298
dst.z = src0.z \oplus src1.z
1300
dst.w = src0.w \oplus src1.w
1266
1303
.. opcode:: IMAX - Maximum of Signed Integers
1270
dst.x = max(src0.x, src1.x)
1272
dst.y = max(src0.y, src1.y)
1274
dst.z = max(src0.z, src1.z)
1276
dst.w = max(src0.w, src1.w)
1307
dst.x = max(src0.x, src1.x)
1309
dst.y = max(src0.y, src1.y)
1311
dst.z = max(src0.z, src1.z)
1313
dst.w = max(src0.w, src1.w)
1279
1316
.. opcode:: UMAX - Maximum of Unsigned Integers
1283
dst.x = max(src0.x, src1.x)
1285
dst.y = max(src0.y, src1.y)
1287
dst.z = max(src0.z, src1.z)
1289
dst.w = max(src0.w, src1.w)
1320
dst.x = max(src0.x, src1.x)
1322
dst.y = max(src0.y, src1.y)
1324
dst.z = max(src0.z, src1.z)
1326
dst.w = max(src0.w, src1.w)
1292
1329
.. opcode:: IMIN - Minimum of Signed Integers
1296
dst.x = min(src0.x, src1.x)
1298
dst.y = min(src0.y, src1.y)
1300
dst.z = min(src0.z, src1.z)
1302
dst.w = min(src0.w, src1.w)
1333
dst.x = min(src0.x, src1.x)
1335
dst.y = min(src0.y, src1.y)
1337
dst.z = min(src0.z, src1.z)
1339
dst.w = min(src0.w, src1.w)
1305
1342
.. opcode:: UMIN - Minimum of Unsigned Integers
1309
dst.x = min(src0.x, src1.x)
1311
dst.y = min(src0.y, src1.y)
1313
dst.z = min(src0.z, src1.z)
1315
dst.w = min(src0.w, src1.w)
1346
dst.x = min(src0.x, src1.x)
1348
dst.y = min(src0.y, src1.y)
1350
dst.z = min(src0.z, src1.z)
1352
dst.w = min(src0.w, src1.w)
1318
1355
.. opcode:: SHL - Shift Left
1320
1357
The shift count is masked with ``0x1f`` before the shift is applied.
1324
dst.x = src0.x << (0x1f \& src1.x)
1326
dst.y = src0.y << (0x1f \& src1.y)
1328
dst.z = src0.z << (0x1f \& src1.z)
1330
dst.w = src0.w << (0x1f \& src1.w)
1361
dst.x = src0.x \ll (0x1f \& src1.x)
1363
dst.y = src0.y \ll (0x1f \& src1.y)
1365
dst.z = src0.z \ll (0x1f \& src1.z)
1367
dst.w = src0.w \ll (0x1f \& src1.w)
1333
1370
.. opcode:: ISHR - Arithmetic Shift Right (of Signed Integer)
1335
1372
The shift count is masked with ``0x1f`` before the shift is applied.
1339
dst.x = src0.x >> (0x1f \& src1.x)
1341
dst.y = src0.y >> (0x1f \& src1.y)
1343
dst.z = src0.z >> (0x1f \& src1.z)
1345
dst.w = src0.w >> (0x1f \& src1.w)
1376
dst.x = src0.x \gg (0x1f \& src1.x)
1378
dst.y = src0.y \gg (0x1f \& src1.y)
1380
dst.z = src0.z \gg (0x1f \& src1.z)
1382
dst.w = src0.w \gg (0x1f \& src1.w)
1348
1385
.. opcode:: USHR - Logical Shift Right
1350
1387
The shift count is masked with ``0x1f`` before the shift is applied.
1354
dst.x = src0.x >> (unsigned) (0x1f \& src1.x)
1356
dst.y = src0.y >> (unsigned) (0x1f \& src1.y)
1358
dst.z = src0.z >> (unsigned) (0x1f \& src1.z)
1360
dst.w = src0.w >> (unsigned) (0x1f \& src1.w)
1391
dst.x = src0.x \gg (unsigned) (0x1f \& src1.x)
1393
dst.y = src0.y \gg (unsigned) (0x1f \& src1.y)
1395
dst.z = src0.z \gg (unsigned) (0x1f \& src1.z)
1397
dst.w = src0.w \gg (unsigned) (0x1f \& src1.w)
1363
1400
.. opcode:: UCMP - Integer Conditional Move
1367
dst.x = src0.x ? src1.x : src2.x
1369
dst.y = src0.y ? src1.y : src2.y
1371
dst.z = src0.z ? src1.z : src2.z
1373
dst.w = src0.w ? src1.w : src2.w
1404
dst.x = src0.x ? src1.x : src2.x
1406
dst.y = src0.y ? src1.y : src2.y
1408
dst.z = src0.z ? src1.z : src2.z
1410
dst.w = src0.w ? src1.w : src2.w
1377
1414
.. opcode:: ISSG - Integer Set Sign
1381
dst.x = (src0.x < 0) ? -1 : (src0.x > 0) ? 1 : 0
1383
dst.y = (src0.y < 0) ? -1 : (src0.y > 0) ? 1 : 0
1385
dst.z = (src0.z < 0) ? -1 : (src0.z > 0) ? 1 : 0
1387
dst.w = (src0.w < 0) ? -1 : (src0.w > 0) ? 1 : 0
1418
dst.x = (src0.x < 0) ? -1 : (src0.x > 0) ? 1 : 0
1420
dst.y = (src0.y < 0) ? -1 : (src0.y > 0) ? 1 : 0
1422
dst.z = (src0.z < 0) ? -1 : (src0.z > 0) ? 1 : 0
1424
dst.w = (src0.w < 0) ? -1 : (src0.w > 0) ? 1 : 0
1393
1430
Same comparison as SLT but returns integer instead of 1.0/0.0 float
1397
dst.x = (src0.x < src1.x) ? \sim 0 : 0
1399
dst.y = (src0.y < src1.y) ? \sim 0 : 0
1401
dst.z = (src0.z < src1.z) ? \sim 0 : 0
1403
dst.w = (src0.w < src1.w) ? \sim 0 : 0
1434
dst.x = (src0.x < src1.x) ? \sim 0 : 0
1436
dst.y = (src0.y < src1.y) ? \sim 0 : 0
1438
dst.z = (src0.z < src1.z) ? \sim 0 : 0
1440
dst.w = (src0.w < src1.w) ? \sim 0 : 0
1406
1443
.. opcode:: ISLT - Signed Integer Set On Less Than
1410
dst.x = (src0.x < src1.x) ? \sim 0 : 0
1412
dst.y = (src0.y < src1.y) ? \sim 0 : 0
1414
dst.z = (src0.z < src1.z) ? \sim 0 : 0
1416
dst.w = (src0.w < src1.w) ? \sim 0 : 0
1447
dst.x = (src0.x < src1.x) ? \sim 0 : 0
1449
dst.y = (src0.y < src1.y) ? \sim 0 : 0
1451
dst.z = (src0.z < src1.z) ? \sim 0 : 0
1453
dst.w = (src0.w < src1.w) ? \sim 0 : 0
1419
1456
.. opcode:: USLT - Unsigned Integer Set On Less Than
1423
dst.x = (src0.x < src1.x) ? \sim 0 : 0
1425
dst.y = (src0.y < src1.y) ? \sim 0 : 0
1427
dst.z = (src0.z < src1.z) ? \sim 0 : 0
1429
dst.w = (src0.w < src1.w) ? \sim 0 : 0
1460
dst.x = (src0.x < src1.x) ? \sim 0 : 0
1462
dst.y = (src0.y < src1.y) ? \sim 0 : 0
1464
dst.z = (src0.z < src1.z) ? \sim 0 : 0
1466
dst.w = (src0.w < src1.w) ? \sim 0 : 0
1432
1469
.. opcode:: FSGE - Float Set On Greater Equal Than (ordered)
1434
1471
Same comparison as SGE but returns integer instead of 1.0/0.0 float
1438
dst.x = (src0.x >= src1.x) ? \sim 0 : 0
1440
dst.y = (src0.y >= src1.y) ? \sim 0 : 0
1442
dst.z = (src0.z >= src1.z) ? \sim 0 : 0
1444
dst.w = (src0.w >= src1.w) ? \sim 0 : 0
1475
dst.x = (src0.x >= src1.x) ? \sim 0 : 0
1477
dst.y = (src0.y >= src1.y) ? \sim 0 : 0
1479
dst.z = (src0.z >= src1.z) ? \sim 0 : 0
1481
dst.w = (src0.w >= src1.w) ? \sim 0 : 0
1447
1484
.. opcode:: ISGE - Signed Integer Set On Greater Equal Than
1451
dst.x = (src0.x >= src1.x) ? \sim 0 : 0
1453
dst.y = (src0.y >= src1.y) ? \sim 0 : 0
1455
dst.z = (src0.z >= src1.z) ? \sim 0 : 0
1457
dst.w = (src0.w >= src1.w) ? \sim 0 : 0
1488
dst.x = (src0.x >= src1.x) ? \sim 0 : 0
1490
dst.y = (src0.y >= src1.y) ? \sim 0 : 0
1492
dst.z = (src0.z >= src1.z) ? \sim 0 : 0
1494
dst.w = (src0.w >= src1.w) ? \sim 0 : 0
1460
1497
.. opcode:: USGE - Unsigned Integer Set On Greater Equal Than
1464
dst.x = (src0.x >= src1.x) ? \sim 0 : 0
1466
dst.y = (src0.y >= src1.y) ? \sim 0 : 0
1468
dst.z = (src0.z >= src1.z) ? \sim 0 : 0
1470
dst.w = (src0.w >= src1.w) ? \sim 0 : 0
1501
dst.x = (src0.x >= src1.x) ? \sim 0 : 0
1503
dst.y = (src0.y >= src1.y) ? \sim 0 : 0
1505
dst.z = (src0.z >= src1.z) ? \sim 0 : 0
1507
dst.w = (src0.w >= src1.w) ? \sim 0 : 0
1473
1510
.. opcode:: FSEQ - Float Set On Equal (ordered)
1475
1512
Same comparison as SEQ but returns integer instead of 1.0/0.0 float
1479
dst.x = (src0.x == src1.x) ? \sim 0 : 0
1481
dst.y = (src0.y == src1.y) ? \sim 0 : 0
1483
dst.z = (src0.z == src1.z) ? \sim 0 : 0
1485
dst.w = (src0.w == src1.w) ? \sim 0 : 0
1516
dst.x = (src0.x == src1.x) ? \sim 0 : 0
1518
dst.y = (src0.y == src1.y) ? \sim 0 : 0
1520
dst.z = (src0.z == src1.z) ? \sim 0 : 0
1522
dst.w = (src0.w == src1.w) ? \sim 0 : 0
1488
1525
.. opcode:: USEQ - Integer Set On Equal
1492
dst.x = (src0.x == src1.x) ? \sim 0 : 0
1494
dst.y = (src0.y == src1.y) ? \sim 0 : 0
1496
dst.z = (src0.z == src1.z) ? \sim 0 : 0
1498
dst.w = (src0.w == src1.w) ? \sim 0 : 0
1529
dst.x = (src0.x == src1.x) ? \sim 0 : 0
1531
dst.y = (src0.y == src1.y) ? \sim 0 : 0
1533
dst.z = (src0.z == src1.z) ? \sim 0 : 0
1535
dst.w = (src0.w == src1.w) ? \sim 0 : 0
1501
1538
.. opcode:: FSNE - Float Set On Not Equal (unordered)
1503
1540
Same comparison as SNE but returns integer instead of 1.0/0.0 float
1507
dst.x = (src0.x != src1.x) ? \sim 0 : 0
1509
dst.y = (src0.y != src1.y) ? \sim 0 : 0
1511
dst.z = (src0.z != src1.z) ? \sim 0 : 0
1513
dst.w = (src0.w != src1.w) ? \sim 0 : 0
1544
dst.x = (src0.x != src1.x) ? \sim 0 : 0
1546
dst.y = (src0.y != src1.y) ? \sim 0 : 0
1548
dst.z = (src0.z != src1.z) ? \sim 0 : 0
1550
dst.w = (src0.w != src1.w) ? \sim 0 : 0
1516
1553
.. opcode:: USNE - Integer Set On Not Equal
1520
dst.x = (src0.x != src1.x) ? \sim 0 : 0
1522
dst.y = (src0.y != src1.y) ? \sim 0 : 0
1524
dst.z = (src0.z != src1.z) ? \sim 0 : 0
1526
dst.w = (src0.w != src1.w) ? \sim 0 : 0
1557
dst.x = (src0.x != src1.x) ? \sim 0 : 0
1559
dst.y = (src0.y != src1.y) ? \sim 0 : 0
1561
dst.z = (src0.z != src1.z) ? \sim 0 : 0
1563
dst.w = (src0.w != src1.w) ? \sim 0 : 0
1529
1566
.. opcode:: INEG - Integer Negate
1531
1568
Two's complement.
1544
1581
.. opcode:: IABS - Integer Absolute Value
1809
1846
.. opcode:: DABS - Absolute
1817
1854
.. opcode:: DADD - Add
1821
dst.xy = src0.xy + src1.xy
1823
dst.zw = src0.zw + src1.zw
1858
dst.xy = src0.xy + src1.xy
1860
dst.zw = src0.zw + src1.zw
1825
1862
.. opcode:: DSEQ - Set on Equal
1829
dst.x = src0.xy == src1.xy ? \sim 0 : 0
1831
dst.z = src0.zw == src1.zw ? \sim 0 : 0
1866
dst.x = src0.xy == src1.xy ? \sim 0 : 0
1868
dst.z = src0.zw == src1.zw ? \sim 0 : 0
1833
1870
.. opcode:: DSNE - Set on Not Equal
1837
dst.x = src0.xy != src1.xy ? \sim 0 : 0
1839
dst.z = src0.zw != src1.zw ? \sim 0 : 0
1874
dst.x = src0.xy != src1.xy ? \sim 0 : 0
1876
dst.z = src0.zw != src1.zw ? \sim 0 : 0
1841
1878
.. opcode:: DSLT - Set on Less than
1845
dst.x = src0.xy < src1.xy ? \sim 0 : 0
1847
dst.z = src0.zw < src1.zw ? \sim 0 : 0
1882
dst.x = src0.xy < src1.xy ? \sim 0 : 0
1884
dst.z = src0.zw < src1.zw ? \sim 0 : 0
1849
1886
.. opcode:: DSGE - Set on Greater equal
1853
dst.x = src0.xy >= src1.xy ? \sim 0 : 0
1855
dst.z = src0.zw >= src1.zw ? \sim 0 : 0
1890
dst.x = src0.xy >= src1.xy ? \sim 0 : 0
1892
dst.z = src0.zw >= src1.zw ? \sim 0 : 0
1857
1894
.. opcode:: DFRAC - Fraction
1861
dst.xy = src.xy - \lfloor src.xy\rfloor
1863
dst.zw = src.zw - \lfloor src.zw\rfloor
1898
dst.xy = src.xy - \lfloor src.xy\rfloor
1900
dst.zw = src.zw - \lfloor src.zw\rfloor
1865
1902
.. opcode:: DTRUNC - Truncate
1869
dst.xy = trunc(src.xy)
1871
dst.zw = trunc(src.zw)
1906
dst.xy = trunc(src.xy)
1908
dst.zw = trunc(src.zw)
1873
1910
.. opcode:: DCEIL - Ceiling
1877
dst.xy = \lceil src.xy\rceil
1879
dst.zw = \lceil src.zw\rceil
1914
dst.xy = \lceil src.xy\rceil
1916
dst.zw = \lceil src.zw\rceil
1881
1918
.. opcode:: DFLR - Floor
1885
dst.xy = \lfloor src.xy\rfloor
1887
dst.zw = \lfloor src.zw\rfloor
1922
dst.xy = \lfloor src.xy\rfloor
1924
dst.zw = \lfloor src.zw\rfloor
1889
1926
.. opcode:: DROUND - Fraction
1893
dst.xy = round(src.xy)
1895
dst.zw = round(src.zw)
1930
dst.xy = round(src.xy)
1932
dst.zw = round(src.zw)
1897
1934
.. opcode:: DSSG - Set Sign
1901
dst.xy = (src.xy > 0) ? 1.0 : (src.xy < 0) ? -1.0 : 0.0
1903
dst.zw = (src.zw > 0) ? 1.0 : (src.zw < 0) ? -1.0 : 0.0
1938
dst.xy = (src.xy > 0) ? 1.0 : (src.xy < 0) ? -1.0 : 0.0
1940
dst.zw = (src.zw > 0) ? 1.0 : (src.zw < 0) ? -1.0 : 0.0
1905
1942
.. opcode:: DLDEXP - Multiply Number by Integral Power of 2
1907
This opcode is the inverse of frexp. The second
1908
source is an integer.
1912
dst.xy = src0.xy \times 2^{src1.x}
1914
dst.zw = src0.zw \times 2^{src1.z}
1944
This opcode is the inverse of frexp. The second
1945
source is an integer.
1949
dst.xy = src0.xy \times 2^{src1.x}
1951
dst.zw = src0.zw \times 2^{src1.z}
1916
1953
.. opcode:: DMIN - Minimum
1920
dst.xy = min(src0.xy, src1.xy)
1922
dst.zw = min(src0.zw, src1.zw)
1957
dst.xy = min(src0.xy, src1.xy)
1959
dst.zw = min(src0.zw, src1.zw)
1924
1961
.. opcode:: DMAX - Maximum
1928
dst.xy = max(src0.xy, src1.xy)
1930
dst.zw = max(src0.zw, src1.zw)
1965
dst.xy = max(src0.xy, src1.xy)
1967
dst.zw = max(src0.zw, src1.zw)
1932
1969
.. opcode:: DMUL - Multiply
1936
dst.xy = src0.xy \times src1.xy
1938
dst.zw = src0.zw \times src1.zw
1973
dst.xy = src0.xy \times src1.xy
1975
dst.zw = src0.zw \times src1.zw
1941
1978
.. opcode:: DMAD - Multiply And Add
1945
dst.xy = src0.xy \times src1.xy + src2.xy
1947
dst.zw = src0.zw \times src1.zw + src2.zw
1982
dst.xy = src0.xy \times src1.xy + src2.xy
1984
dst.zw = src0.zw \times src1.zw + src2.zw
1950
1987
.. opcode:: DFMA - Fused Multiply-Add
1952
Perform a * b + c with no intermediate rounding step.
1956
dst.xy = src0.xy \times src1.xy + src2.xy
1958
dst.zw = src0.zw \times src1.zw + src2.zw
1989
Perform a * b + c with no intermediate rounding step.
1993
dst.xy = src0.xy \times src1.xy + src2.xy
1995
dst.zw = src0.zw \times src1.zw + src2.zw
1961
1998
.. opcode:: DDIV - Divide
1965
dst.xy = \frac{src0.xy}{src1.xy}
1967
dst.zw = \frac{src0.zw}{src1.zw}
2002
dst.xy = \frac{src0.xy}{src1.xy}
2004
dst.zw = \frac{src0.zw}{src1.zw}
1970
2007
.. opcode:: DRCP - Reciprocal
1974
dst.xy = \frac{1}{src.xy}
1976
dst.zw = \frac{1}{src.zw}
2011
dst.xy = \frac{1}{src.xy}
2013
dst.zw = \frac{1}{src.zw}
1978
2015
.. opcode:: DSQRT - Square Root
1982
dst.xy = \sqrt{src.xy}
1984
dst.zw = \sqrt{src.zw}
2019
dst.xy = \sqrt{src.xy}
2021
dst.zw = \sqrt{src.zw}
1986
2023
.. opcode:: DRSQ - Reciprocal Square Root
1990
dst.xy = \frac{1}{\sqrt{src.xy}}
1992
dst.zw = \frac{1}{\sqrt{src.zw}}
2027
dst.xy = \frac{1}{\sqrt{src.xy}}
2029
dst.zw = \frac{1}{\sqrt{src.zw}}
1994
2031
.. opcode:: F2D - Float to Double
1998
dst.xy = double(src0.x)
2000
dst.zw = double(src0.y)
2035
dst.xy = double(src0.x)
2037
dst.zw = double(src0.y)
2002
2039
.. opcode:: D2F - Double to Float
2006
dst.x = float(src0.xy)
2008
dst.y = float(src0.zw)
2043
dst.x = float(src0.xy)
2045
dst.y = float(src0.zw)
2010
2047
.. opcode:: I2D - Int to Double
2014
dst.xy = double(src0.x)
2016
dst.zw = double(src0.y)
2051
dst.xy = double(src0.x)
2053
dst.zw = double(src0.y)
2018
2055
.. opcode:: D2I - Double to Int
2022
dst.x = int(src0.xy)
2024
dst.y = int(src0.zw)
2059
dst.x = int(src0.xy)
2061
dst.y = int(src0.zw)
2026
2063
.. opcode:: U2D - Unsigned Int to Double
2030
dst.xy = double(src0.x)
2032
dst.zw = double(src0.y)
2067
dst.xy = double(src0.x)
2069
dst.zw = double(src0.y)
2034
2071
.. opcode:: D2U - Double to Unsigned Int
2038
dst.x = unsigned(src0.xy)
2040
dst.y = unsigned(src0.zw)
2075
dst.x = unsigned(src0.xy)
2077
dst.y = unsigned(src0.zw)
2042
2079
64-bit Integer ISA
2043
2080
^^^^^^^^^^^^^^^^^^
2048
2085
.. opcode:: I64ABS - 64-bit Integer Absolute Value
2056
2093
.. opcode:: I64NEG - 64-bit Integer Negate
2066
2103
.. opcode:: I64SSG - 64-bit Integer Set Sign
2070
dst.xy = (src0.xy < 0) ? -1 : (src0.xy > 0) ? 1 : 0
2072
dst.zw = (src0.zw < 0) ? -1 : (src0.zw > 0) ? 1 : 0
2107
dst.xy = (src0.xy < 0) ? -1 : (src0.xy > 0) ? 1 : 0
2109
dst.zw = (src0.zw < 0) ? -1 : (src0.zw > 0) ? 1 : 0
2074
2111
.. opcode:: U64ADD - 64-bit Integer Add
2078
dst.xy = src0.xy + src1.xy
2080
dst.zw = src0.zw + src1.zw
2115
dst.xy = src0.xy + src1.xy
2117
dst.zw = src0.zw + src1.zw
2082
2119
.. opcode:: U64MUL - 64-bit Integer Multiply
2086
dst.xy = src0.xy * src1.xy
2088
dst.zw = src0.zw * src1.zw
2123
dst.xy = src0.xy * src1.xy
2125
dst.zw = src0.zw * src1.zw
2090
2127
.. opcode:: U64SEQ - 64-bit Integer Set on Equal
2094
dst.x = src0.xy == src1.xy ? \sim 0 : 0
2096
dst.z = src0.zw == src1.zw ? \sim 0 : 0
2131
dst.x = src0.xy == src1.xy ? \sim 0 : 0
2133
dst.z = src0.zw == src1.zw ? \sim 0 : 0
2098
2135
.. opcode:: U64SNE - 64-bit Integer Set on Not Equal
2102
dst.x = src0.xy != src1.xy ? \sim 0 : 0
2104
dst.z = src0.zw != src1.zw ? \sim 0 : 0
2139
dst.x = src0.xy != src1.xy ? \sim 0 : 0
2141
dst.z = src0.zw != src1.zw ? \sim 0 : 0
2106
2143
.. opcode:: U64SLT - 64-bit Unsigned Integer Set on Less Than
2110
dst.x = src0.xy < src1.xy ? \sim 0 : 0
2112
dst.z = src0.zw < src1.zw ? \sim 0 : 0
2147
dst.x = src0.xy < src1.xy ? \sim 0 : 0
2149
dst.z = src0.zw < src1.zw ? \sim 0 : 0
2114
2151
.. opcode:: U64SGE - 64-bit Unsigned Integer Set on Greater Equal
2118
dst.x = src0.xy >= src1.xy ? \sim 0 : 0
2120
dst.z = src0.zw >= src1.zw ? \sim 0 : 0
2155
dst.x = src0.xy >= src1.xy ? \sim 0 : 0
2157
dst.z = src0.zw >= src1.zw ? \sim 0 : 0
2122
2159
.. opcode:: I64SLT - 64-bit Signed Integer Set on Less Than
2126
dst.x = src0.xy < src1.xy ? \sim 0 : 0
2128
dst.z = src0.zw < src1.zw ? \sim 0 : 0
2163
dst.x = src0.xy < src1.xy ? \sim 0 : 0
2165
dst.z = src0.zw < src1.zw ? \sim 0 : 0
2130
2167
.. opcode:: I64SGE - 64-bit Signed Integer Set on Greater Equal
2134
dst.x = src0.xy >= src1.xy ? \sim 0 : 0
2136
dst.z = src0.zw >= src1.zw ? \sim 0 : 0
2171
dst.x = src0.xy >= src1.xy ? \sim 0 : 0
2173
dst.z = src0.zw >= src1.zw ? \sim 0 : 0
2138
2175
.. opcode:: I64MIN - Minimum of 64-bit Signed Integers
2142
dst.xy = min(src0.xy, src1.xy)
2144
dst.zw = min(src0.zw, src1.zw)
2179
dst.xy = min(src0.xy, src1.xy)
2181
dst.zw = min(src0.zw, src1.zw)
2146
2183
.. opcode:: U64MIN - Minimum of 64-bit Unsigned Integers
2150
dst.xy = min(src0.xy, src1.xy)
2152
dst.zw = min(src0.zw, src1.zw)
2187
dst.xy = min(src0.xy, src1.xy)
2189
dst.zw = min(src0.zw, src1.zw)
2154
2191
.. opcode:: I64MAX - Maximum of 64-bit Signed Integers
2158
dst.xy = max(src0.xy, src1.xy)
2160
dst.zw = max(src0.zw, src1.zw)
2195
dst.xy = max(src0.xy, src1.xy)
2197
dst.zw = max(src0.zw, src1.zw)
2162
2199
.. opcode:: U64MAX - Maximum of 64-bit Unsigned Integers
2166
dst.xy = max(src0.xy, src1.xy)
2168
dst.zw = max(src0.zw, src1.zw)
2203
dst.xy = max(src0.xy, src1.xy)
2205
dst.zw = max(src0.zw, src1.zw)
2170
2207
.. opcode:: U64SHL - Shift Left 64-bit Unsigned Integer
2172
2209
The shift count is masked with ``0x3f`` before the shift is applied.
2176
dst.xy = src0.xy << (0x3f \& src1.x)
2178
dst.zw = src0.zw << (0x3f \& src1.y)
2213
dst.xy = src0.xy \ll (0x3f \& src1.x)
2215
dst.zw = src0.zw \ll (0x3f \& src1.y)
2180
2217
.. opcode:: I64SHR - Arithmetic Shift Right (of 64-bit Signed Integer)
2182
2219
The shift count is masked with ``0x3f`` before the shift is applied.
2186
dst.xy = src0.xy >> (0x3f \& src1.x)
2188
dst.zw = src0.zw >> (0x3f \& src1.y)
2223
dst.xy = src0.xy \gg (0x3f \& src1.x)
2225
dst.zw = src0.zw \gg (0x3f \& src1.y)
2190
2227
.. opcode:: U64SHR - Logical Shift Right (of 64-bit Unsigned Integer)
2192
2229
The shift count is masked with ``0x3f`` before the shift is applied.
2196
dst.xy = src0.xy >> (unsigned) (0x3f \& src1.x)
2198
dst.zw = src0.zw >> (unsigned) (0x3f \& src1.y)
2233
dst.xy = src0.xy \gg (unsigned) (0x3f \& src1.x)
2235
dst.zw = src0.zw \gg (unsigned) (0x3f \& src1.y)
2200
2237
.. opcode:: I64DIV - 64-bit Signed Integer Division
2204
dst.xy = \frac{src0.xy}{src1.xy}
2206
dst.zw = \frac{src0.zw}{src1.zw}
2241
dst.xy = \frac{src0.xy}{src1.xy}
2243
dst.zw = \frac{src0.zw}{src1.zw}
2208
2245
.. opcode:: U64DIV - 64-bit Unsigned Integer Division
2212
dst.xy = \frac{src0.xy}{src1.xy}
2214
dst.zw = \frac{src0.zw}{src1.zw}
2249
dst.xy = \frac{src0.xy}{src1.xy}
2251
dst.zw = \frac{src0.zw}{src1.zw}
2216
2253
.. opcode:: U64MOD - 64-bit Unsigned Integer Remainder
2220
dst.xy = src0.xy \bmod src1.xy
2222
dst.zw = src0.zw \bmod src1.zw
2257
dst.xy = src0.xy \bmod src1.xy
2259
dst.zw = src0.zw \bmod src1.zw
2224
2261
.. opcode:: I64MOD - 64-bit Signed Integer Remainder
2228
dst.xy = src0.xy \bmod src1.xy
2230
dst.zw = src0.zw \bmod src1.zw
2265
dst.xy = src0.xy \bmod src1.xy
2267
dst.zw = src0.zw \bmod src1.zw
2232
2269
.. opcode:: F2U64 - Float to 64-bit Unsigned Int
2236
dst.xy = (uint64_t) src0.x
2238
dst.zw = (uint64_t) src0.y
2273
dst.xy = (uint64_t) src0.x
2275
dst.zw = (uint64_t) src0.y
2240
2277
.. opcode:: F2I64 - Float to 64-bit Int
2244
dst.xy = (int64_t) src0.x
2246
dst.zw = (int64_t) src0.y
2281
dst.xy = (int64_t) src0.x
2283
dst.zw = (int64_t) src0.y
2248
2285
.. opcode:: U2I64 - Unsigned Integer to 64-bit Integer
2250
2287
This is a zero extension.
2254
dst.xy = (int64_t) src0.x
2256
dst.zw = (int64_t) src0.y
2291
dst.xy = (int64_t) src0.x
2293
dst.zw = (int64_t) src0.y
2258
2295
.. opcode:: I2I64 - Signed Integer to 64-bit Integer
2260
2297
This is a sign extension.
2264
dst.xy = (int64_t) src0.x
2266
dst.zw = (int64_t) src0.y
2301
dst.xy = (int64_t) src0.x
2303
dst.zw = (int64_t) src0.y
2268
2305
.. opcode:: D2U64 - Double to 64-bit Unsigned Int
2272
dst.xy = (uint64_t) src0.xy
2274
dst.zw = (uint64_t) src0.zw
2309
dst.xy = (uint64_t) src0.xy
2311
dst.zw = (uint64_t) src0.zw
2276
2313
.. opcode:: D2I64 - Double to 64-bit Int
2280
dst.xy = (int64_t) src0.xy
2282
dst.zw = (int64_t) src0.zw
2317
dst.xy = (int64_t) src0.xy
2319
dst.zw = (int64_t) src0.zw
2284
2321
.. opcode:: U642F - 64-bit unsigned integer to float
2288
dst.x = (float) src0.xy
2290
dst.y = (float) src0.zw
2325
dst.x = (float) src0.xy
2327
dst.y = (float) src0.zw
2292
2329
.. opcode:: I642F - 64-bit Int to Float
2296
dst.x = (float) src0.xy
2298
dst.y = (float) src0.zw
2333
dst.x = (float) src0.xy
2335
dst.y = (float) src0.zw
2300
2337
.. opcode:: U642D - 64-bit unsigned integer to double
2304
dst.xy = (double) src0.xy
2306
dst.zw = (double) src0.zw
2341
dst.xy = (double) src0.xy
2343
dst.zw = (double) src0.zw
2308
2345
.. opcode:: I642D - 64-bit Int to double
2312
dst.xy = (double) src0.xy
2314
dst.zw = (double) src0.zw
2349
dst.xy = (double) src0.xy
2351
dst.zw = (double) src0.zw
2316
2353
.. _samplingopcodes:
2326
2363
.. opcode:: SAMPLE
2328
Using provided address, sample data from the specified texture using the
2329
filtering mode identified by the given sampler. The source data may come from
2330
any resource type other than buffers.
2332
Syntax: ``SAMPLE dst, address, sampler_view, sampler``
2334
Example: ``SAMPLE TEMP[0], TEMP[1], SVIEW[0], SAMP[0]``
2365
Using provided address, sample data from the specified texture using the
2366
filtering mode identified by the given sampler. The source data may come from
2367
any resource type other than buffers.
2369
Syntax: ``SAMPLE dst, address, sampler_view, sampler``
2371
Example: ``SAMPLE TEMP[0], TEMP[1], SVIEW[0], SAMP[0]``
2336
2373
.. opcode:: SAMPLE_I
2338
Simplified alternative to the SAMPLE instruction. Using the provided
2339
integer address, SAMPLE_I fetches data from the specified sampler view
2340
without any filtering. The source data may come from any resource type
2343
Syntax: ``SAMPLE_I dst, address, sampler_view``
2345
Example: ``SAMPLE_I TEMP[0], TEMP[1], SVIEW[0]``
2347
The 'address' is specified as unsigned integers. If the 'address' is out of
2348
range [0...(# texels - 1)] the result of the fetch is always 0 in all
2349
components. As such the instruction doesn't honor address wrap modes, in
2350
cases where that behavior is desirable 'SAMPLE' instruction should be used.
2351
address.w always provides an unsigned integer mipmap level. If the value is
2352
out of the range then the instruction always returns 0 in all components.
2353
address.yz are ignored for buffers and 1d textures. address.z is ignored
2354
for 1d texture arrays and 2d textures.
2356
For 1D texture arrays address.y provides the array index (also as unsigned
2357
integer). If the value is out of the range of available array indices
2358
[0... (array size - 1)] then the opcode always returns 0 in all components.
2359
For 2D texture arrays address.z provides the array index, otherwise it
2360
exhibits the same behavior as in the case for 1D texture arrays. The exact
2361
semantics of the source address are presented in the table below:
2363
+---------------------------+----+-----+-----+---------+
2364
| resource type | X | Y | Z | W |
2365
+===========================+====+=====+=====+=========+
2366
| ``PIPE_BUFFER`` | x | | | ignored |
2367
+---------------------------+----+-----+-----+---------+
2368
| ``PIPE_TEXTURE_1D`` | x | | | mpl |
2369
+---------------------------+----+-----+-----+---------+
2370
| ``PIPE_TEXTURE_2D`` | x | y | | mpl |
2371
+---------------------------+----+-----+-----+---------+
2372
| ``PIPE_TEXTURE_3D`` | x | y | z | mpl |
2373
+---------------------------+----+-----+-----+---------+
2374
| ``PIPE_TEXTURE_RECT`` | x | y | | mpl |
2375
+---------------------------+----+-----+-----+---------+
2376
| ``PIPE_TEXTURE_CUBE`` | not allowed as source |
2377
+---------------------------+----+-----+-----+---------+
2378
| ``PIPE_TEXTURE_1D_ARRAY`` | x | idx | | mpl |
2379
+---------------------------+----+-----+-----+---------+
2380
| ``PIPE_TEXTURE_2D_ARRAY`` | x | y | idx | mpl |
2381
+---------------------------+----+-----+-----+---------+
2383
Where 'mpl' is a mipmap level and 'idx' is the array index.
2375
Simplified alternative to the SAMPLE instruction. Using the provided
2376
integer address, SAMPLE_I fetches data from the specified sampler view
2377
without any filtering. The source data may come from any resource type
2380
Syntax: ``SAMPLE_I dst, address, sampler_view``
2382
Example: ``SAMPLE_I TEMP[0], TEMP[1], SVIEW[0]``
2384
The 'address' is specified as unsigned integers. If the 'address' is out of
2385
range [0...(# texels - 1)] the result of the fetch is always 0 in all
2386
components. As such the instruction doesn't honor address wrap modes, in
2387
cases where that behavior is desirable 'SAMPLE' instruction should be used.
2388
address.w always provides an unsigned integer mipmap level. If the value is
2389
out of the range then the instruction always returns 0 in all components.
2390
address.yz are ignored for buffers and 1d textures. address.z is ignored
2391
for 1d texture arrays and 2d textures.
2393
For 1D texture arrays address.y provides the array index (also as unsigned
2394
integer). If the value is out of the range of available array indices
2395
[0... (array size - 1)] then the opcode always returns 0 in all components.
2396
For 2D texture arrays address.z provides the array index, otherwise it
2397
exhibits the same behavior as in the case for 1D texture arrays. The exact
2398
semantics of the source address are presented in the table below:
2400
+---------------------------+----+-----+-----+---------+
2401
| resource type | X | Y | Z | W |
2402
+===========================+====+=====+=====+=========+
2403
| ``PIPE_BUFFER`` | x | | | ignored |
2404
+---------------------------+----+-----+-----+---------+
2405
| ``PIPE_TEXTURE_1D`` | x | | | mpl |
2406
+---------------------------+----+-----+-----+---------+
2407
| ``PIPE_TEXTURE_2D`` | x | y | | mpl |
2408
+---------------------------+----+-----+-----+---------+
2409
| ``PIPE_TEXTURE_3D`` | x | y | z | mpl |
2410
+---------------------------+----+-----+-----+---------+
2411
| ``PIPE_TEXTURE_RECT`` | x | y | | mpl |
2412
+---------------------------+----+-----+-----+---------+
2413
| ``PIPE_TEXTURE_CUBE`` | not allowed as source |
2414
+---------------------------+----+-----+-----+---------+
2415
| ``PIPE_TEXTURE_1D_ARRAY`` | x | idx | | mpl |
2416
+---------------------------+----+-----+-----+---------+
2417
| ``PIPE_TEXTURE_2D_ARRAY`` | x | y | idx | mpl |
2418
+---------------------------+----+-----+-----+---------+
2420
Where 'mpl' is a mipmap level and 'idx' is the array index.
2385
2422
.. opcode:: SAMPLE_I_MS
2387
Just like SAMPLE_I but allows fetch data from multi-sampled surfaces.
2424
Just like SAMPLE_I but allows fetch data from multi-sampled surfaces.
2389
Syntax: ``SAMPLE_I_MS dst, address, sampler_view, sample``
2426
Syntax: ``SAMPLE_I_MS dst, address, sampler_view, sample``
2391
2428
.. opcode:: SAMPLE_B
2393
Just like the SAMPLE instruction with the exception that an additional bias
2394
is applied to the level of detail computed as part of the instruction
2397
Syntax: ``SAMPLE_B dst, address, sampler_view, sampler, lod_bias``
2399
Example: ``SAMPLE_B TEMP[0], TEMP[1], SVIEW[0], SAMP[0], TEMP[2].x``
2430
Just like the SAMPLE instruction with the exception that an additional bias
2431
is applied to the level of detail computed as part of the instruction
2434
Syntax: ``SAMPLE_B dst, address, sampler_view, sampler, lod_bias``
2436
Example: ``SAMPLE_B TEMP[0], TEMP[1], SVIEW[0], SAMP[0], TEMP[2].x``
2401
2438
.. opcode:: SAMPLE_C
2403
Similar to the SAMPLE instruction but it performs a comparison filter. The
2404
operands to SAMPLE_C are identical to SAMPLE, except that there is an
2405
additional float32 operand, reference value, which must be a register with
2406
single-component, or a scalar literal. SAMPLE_C makes the hardware use the
2407
current samplers compare_func (in pipe_sampler_state) to compare reference
2408
value against the red component value for the surce resource at each texel
2409
that the currently configured texture filter covers based on the provided
2412
Syntax: ``SAMPLE_C dst, address, sampler_view.r, sampler, ref_value``
2414
Example: ``SAMPLE_C TEMP[0], TEMP[1], SVIEW[0].r, SAMP[0], TEMP[2].x``
2440
Similar to the SAMPLE instruction but it performs a comparison filter. The
2441
operands to SAMPLE_C are identical to SAMPLE, except that there is an
2442
additional float32 operand, reference value, which must be a register with
2443
single-component, or a scalar literal. SAMPLE_C makes the hardware use the
2444
current samplers compare_func (in pipe_sampler_state) to compare reference
2445
value against the red component value for the source resource at each texel
2446
that the currently configured texture filter covers based on the provided
2449
Syntax: ``SAMPLE_C dst, address, sampler_view.r, sampler, ref_value``
2451
Example: ``SAMPLE_C TEMP[0], TEMP[1], SVIEW[0].r, SAMP[0], TEMP[2].x``
2416
2453
.. opcode:: SAMPLE_C_LZ
2418
Same as SAMPLE_C, but LOD is 0 and derivatives are ignored. The LZ stands
2421
Syntax: ``SAMPLE_C_LZ dst, address, sampler_view.r, sampler, ref_value``
2423
Example: ``SAMPLE_C_LZ TEMP[0], TEMP[1], SVIEW[0].r, SAMP[0], TEMP[2].x``
2455
Same as SAMPLE_C, but LOD is 0 and derivatives are ignored. The LZ stands
2458
Syntax: ``SAMPLE_C_LZ dst, address, sampler_view.r, sampler, ref_value``
2460
Example: ``SAMPLE_C_LZ TEMP[0], TEMP[1], SVIEW[0].r, SAMP[0], TEMP[2].x``
2426
2463
.. opcode:: SAMPLE_D
2428
SAMPLE_D is identical to the SAMPLE opcode except that the derivatives for
2429
the source address in the x direction and the y direction are provided by
2432
Syntax: ``SAMPLE_D dst, address, sampler_view, sampler, der_x, der_y``
2434
Example: ``SAMPLE_D TEMP[0], TEMP[1], SVIEW[0], SAMP[0], TEMP[2], TEMP[3]``
2465
SAMPLE_D is identical to the SAMPLE opcode except that the derivatives for
2466
the source address in the x direction and the y direction are provided by
2469
Syntax: ``SAMPLE_D dst, address, sampler_view, sampler, der_x, der_y``
2471
Example: ``SAMPLE_D TEMP[0], TEMP[1], SVIEW[0], SAMP[0], TEMP[2], TEMP[3]``
2436
2473
.. opcode:: SAMPLE_L
2438
SAMPLE_L is identical to the SAMPLE opcode except that the LOD is provided
2439
directly as a scalar value, representing no anisotropy.
2441
Syntax: ``SAMPLE_L dst, address, sampler_view, sampler, explicit_lod``
2443
Example: ``SAMPLE_L TEMP[0], TEMP[1], SVIEW[0], SAMP[0], TEMP[2].x``
2475
SAMPLE_L is identical to the SAMPLE opcode except that the LOD is provided
2476
directly as a scalar value, representing no anisotropy.
2478
Syntax: ``SAMPLE_L dst, address, sampler_view, sampler, explicit_lod``
2480
Example: ``SAMPLE_L TEMP[0], TEMP[1], SVIEW[0], SAMP[0], TEMP[2].x``
2445
2482
.. opcode:: GATHER4
2447
Gathers the four texels to be used in a bi-linear filtering operation and
2448
packs them into a single register. Only works with 2D, 2D array, cubemaps,
2449
and cubemaps arrays. For 2D textures, only the addressing modes of the
2450
sampler and the top level of any mip pyramid are used. Set W to zero. It
2451
behaves like the SAMPLE instruction, but a filtered sample is not
2452
generated. The four samples that contribute to filtering are placed into
2453
XYZW in counter-clockwise order, starting with the (u,v) texture coordinate
2454
delta at the following locations (-, +), (+, +), (+, -), (-, -), where the
2455
magnitude of the deltas are half a texel.
2484
Gathers the four texels to be used in a bi-linear filtering operation and
2485
packs them into a single register. Only works with 2D, 2D array, cubemaps,
2486
and cubemaps arrays. For 2D textures, only the addressing modes of the
2487
sampler and the top level of any mip pyramid are used. Set W to zero. It
2488
behaves like the SAMPLE instruction, but a filtered sample is not
2489
generated. The four samples that contribute to filtering are placed into
2490
XYZW in counter-clockwise order, starting with the (u,v) texture coordinate
2491
delta at the following locations (-, +), (+, +), (+, -), (-, -), where the
2492
magnitude of the deltas are half a texel.
2458
2495
.. opcode:: SVIEWINFO
2460
Query the dimensions of a given sampler view. dst receives width, height,
2461
depth or array size and number of mipmap levels as int4. The dst can have a
2462
writemask which will specify what info is the caller interested in.
2464
Syntax: ``SVIEWINFO dst, src_mip_level, sampler_view``
2466
Example: ``SVIEWINFO TEMP[0], TEMP[1].x, SVIEW[0]``
2468
src_mip_level is an unsigned integer scalar. If it's out of range then
2469
returns 0 for width, height and depth/array size but the total number of
2470
mipmap is still returned correctly for the given sampler view. The returned
2471
width, height and depth values are for the mipmap level selected by the
2472
src_mip_level and are in the number of texels. For 1d texture array width
2473
is in dst.x, array size is in dst.y and dst.z is 0. The number of mipmaps is
2474
still in dst.w. In contrast to d3d10 resinfo, there's no way in the tgsi
2475
instruction encoding to specify the return type (float/rcpfloat/uint), hence
2476
always using uint. Also, unlike the SAMPLE instructions, the swizzle on src1
2477
resinfo allowing swizzling dst values is ignored (due to the interaction
2478
with rcpfloat modifier which requires some swizzle handling in the state
2497
Query the dimensions of a given sampler view. dst receives width, height,
2498
depth or array size and number of mipmap levels as int4. The dst can have a
2499
writemask which will specify what info is the caller interested in.
2501
Syntax: ``SVIEWINFO dst, src_mip_level, sampler_view``
2503
Example: ``SVIEWINFO TEMP[0], TEMP[1].x, SVIEW[0]``
2505
src_mip_level is an unsigned integer scalar. If it's out of range then
2506
returns 0 for width, height and depth/array size but the total number of
2507
mipmap is still returned correctly for the given sampler view. The returned
2508
width, height and depth values are for the mipmap level selected by the
2509
src_mip_level and are in the number of texels. For 1d texture array width
2510
is in dst.x, array size is in dst.y and dst.z is 0. The number of mipmaps is
2511
still in dst.w. In contrast to d3d10 resinfo, there's no way in the tgsi
2512
instruction encoding to specify the return type (float/rcpfloat/uint), hence
2513
always using uint. Also, unlike the SAMPLE instructions, the swizzle on src1
2514
resinfo allowing swizzling dst values is ignored (due to the interaction
2515
with rcpfloat modifier which requires some swizzle handling in the state
2481
2518
.. opcode:: SAMPLE_POS
2483
Query the position of a sample in the given resource or render target
2484
when per-sample fragment shading is in effect.
2486
Syntax: ``SAMPLE_POS dst, source, sample_index``
2488
dst receives float4 (x, y, undef, undef) indicated where the sample is
2489
located. Sample locations are in the range [0, 1] where 0.5 is the center
2492
source is either a sampler view (to indicate a shader resource) or temp
2493
register (to indicate the render target). The source register may have
2494
an optional swizzle to apply to the returned result
2496
sample_index is an integer scalar indicating which sample position is to
2499
If per-sample shading is not in effect or the source resource or render
2500
target is not multisampled, the result is (0.5, 0.5, undef, undef).
2502
NOTE: no driver has implemented this opcode yet (and no gallium frontend
2503
emits it). This information is subject to change.
2520
Query the position of a sample in the given resource or render target
2521
when per-sample fragment shading is in effect.
2523
Syntax: ``SAMPLE_POS dst, source, sample_index``
2525
dst receives float4 (x, y, undef, undef) indicated where the sample is
2526
located. Sample locations are in the range [0, 1] where 0.5 is the center
2529
source is either a sampler view (to indicate a shader resource) or temp
2530
register (to indicate the render target). The source register may have
2531
an optional swizzle to apply to the returned result
2533
sample_index is an integer scalar indicating which sample position is to
2536
If per-sample shading is not in effect or the source resource or render
2537
target is not multisampled, the result is (0.5, 0.5, undef, undef).
2539
NOTE: no driver has implemented this opcode yet (and no gallium frontend
2540
emits it). This information is subject to change.
2505
2542
.. opcode:: SAMPLE_INFO
2507
Query the number of samples in a multisampled resource or render target.
2509
Syntax: ``SAMPLE_INFO dst, source``
2511
dst receives int4 (n, 0, 0, 0) where n is the number of samples in a
2512
resource or the render target.
2514
source is either a sampler view (to indicate a shader resource) or temp
2515
register (to indicate the render target). The source register may have
2516
an optional swizzle to apply to the returned result
2518
If per-sample shading is not in effect or the source resource or render
2519
target is not multisampled, the result is (1, 0, 0, 0).
2521
NOTE: no driver has implemented this opcode yet (and no gallium frontend
2522
emits it). This information is subject to change.
2544
Query the number of samples in a multisampled resource or render target.
2546
Syntax: ``SAMPLE_INFO dst, source``
2548
dst receives int4 (n, 0, 0, 0) where n is the number of samples in a
2549
resource or the render target.
2551
source is either a sampler view (to indicate a shader resource) or temp
2552
register (to indicate the render target). The source register may have
2553
an optional swizzle to apply to the returned result
2555
If per-sample shading is not in effect or the source resource or render
2556
target is not multisampled, the result is (1, 0, 0, 0).
2558
NOTE: no driver has implemented this opcode yet (and no gallium frontend
2559
emits it). This information is subject to change.
2524
2561
.. opcode:: LOD - level of detail
2543
2580
.. opcode:: LOAD - Fetch data from a shader buffer or image
2545
Syntax: ``LOAD dst, resource, address``
2547
Example: ``LOAD TEMP[0], BUFFER[0], TEMP[1]``
2549
Using the provided integer address, LOAD fetches data
2550
from the specified buffer or texture without any
2553
The 'address' is specified as a vector of unsigned
2554
integers. If the 'address' is out of range the result
2557
Only the first mipmap level of a resource can be read
2558
from using this instruction.
2560
For 1D or 2D texture arrays, the array index is
2561
provided as an unsigned integer in address.y or
2562
address.z, respectively. address.yz are ignored for
2563
buffers and 1D textures. address.z is ignored for 1D
2564
texture arrays and 2D textures. address.w is always
2567
A swizzle suffix may be added to the resource argument
2568
this will cause the resource data to be swizzled accordingly.
2582
Syntax: ``LOAD dst, resource, address``
2584
Example: ``LOAD TEMP[0], BUFFER[0], TEMP[1]``
2586
Using the provided integer address, LOAD fetches data from the
2587
specified buffer or texture without any filtering.
2589
The 'address' is specified as a vector of unsigned integers. If the
2590
'address' is out of range the result is unspecified.
2592
Only the first mipmap level of a resource can be read from using this
2595
For 1D or 2D texture arrays, the array index is provided as an
2596
unsigned integer in address.y or address.z, respectively. address.yz
2597
are ignored for buffers and 1D textures. address.z is ignored for 1D
2598
texture arrays and 2D textures. address.w is always ignored.
2600
A swizzle suffix may be added to the resource argument this will
2601
cause the resource data to be swizzled accordingly.
2570
2603
.. opcode:: STORE - Write data to a shader resource
2572
Syntax: ``STORE resource, address, src``
2574
Example: ``STORE BUFFER[0], TEMP[0], TEMP[1]``
2576
Using the provided integer address, STORE writes data
2577
to the specified buffer or texture.
2579
The 'address' is specified as a vector of unsigned
2580
integers. If the 'address' is out of range the result
2583
Only the first mipmap level of a resource can be
2584
written to using this instruction.
2586
For 1D or 2D texture arrays, the array index is
2587
provided as an unsigned integer in address.y or
2588
address.z, respectively. address.yz are ignored for
2589
buffers and 1D textures. address.z is ignored for 1D
2590
texture arrays and 2D textures. address.w is always
2605
Syntax: ``STORE resource, address, src``
2607
Example: ``STORE BUFFER[0], TEMP[0], TEMP[1]``
2609
Using the provided integer address, STORE writes data to the
2610
specified buffer or texture.
2612
The 'address' is specified as a vector of unsigned integers. If the
2613
'address' is out of range the result is unspecified.
2615
Only the first mipmap level of a resource can be written to using
2618
For 1D or 2D texture arrays, the array index is provided as an
2619
unsigned integer in address.y or address.z, respectively.
2620
address.yz are ignored for buffers and 1D textures. address.z is
2621
ignored for 1D texture arrays and 2D textures. address.w is always
2593
2624
.. opcode:: RESQ - Query information about a resource
2595
Syntax: ``RESQ dst, resource``
2597
Example: ``RESQ TEMP[0], BUFFER[0]``
2599
Returns information about the buffer or image resource. For buffer
2600
resources, the size (in bytes) is returned in the x component. For
2601
image resources, .xyz will contain the width/height/layers of the
2602
image, while .w will contain the number of samples for multi-sampled
2626
Syntax: ``RESQ dst, resource``
2628
Example: ``RESQ TEMP[0], BUFFER[0]``
2630
Returns information about the buffer or image resource. For buffer
2631
resources, the size (in bytes) is returned in the x component. For
2632
image resources, .xyz will contain the width/height/layers of the
2633
image, while .w will contain the number of samples for multi-sampled
2605
2636
.. opcode:: FBFETCH - Load data from framebuffer
2607
Syntax: ``FBFETCH dst, output``
2609
Example: ``FBFETCH TEMP[0], OUT[0]``
2611
This is only valid on ``COLOR`` semantic outputs. Returns the color
2612
of the current position in the framebuffer from before this fragment
2613
shader invocation. May return the same value from multiple calls for
2614
a particular output within a single invocation. Note that result may
2615
be undefined if a fragment is drawn multiple times without a blend
2638
Syntax: ``FBFETCH dst, output``
2640
Example: ``FBFETCH TEMP[0], OUT[0]``
2642
This is only valid on ``COLOR`` semantic outputs. Returns the color
2643
of the current position in the framebuffer from before this fragment
2644
shader invocation. May return the same value from multiple calls for
2645
a particular output within a single invocation. Note that result may
2646
be undefined if a fragment is drawn multiple times without a blend
2619
2650
.. _bindlessopcodes:
2696
2727
.. opcode:: ATOMUADD - Atomic integer addition
2698
Syntax: ``ATOMUADD dst, resource, offset, src``
2700
Example: ``ATOMUADD TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
2702
The following operation is performed atomically:
2706
dst_x = resource[offset]
2708
resource[offset] = dst_x + src_x
2729
Syntax: ``ATOMUADD dst, resource, offset, src``
2731
Example: ``ATOMUADD TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
2733
The following operation is performed atomically:
2737
dst_x = resource[offset]
2739
resource[offset] = dst_x + src_x
2711
2742
.. opcode:: ATOMFADD - Atomic floating point addition
2713
Syntax: ``ATOMFADD dst, resource, offset, src``
2715
Example: ``ATOMFADD TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
2717
The following operation is performed atomically:
2721
dst_x = resource[offset]
2723
resource[offset] = dst_x + src_x
2744
Syntax: ``ATOMFADD dst, resource, offset, src``
2746
Example: ``ATOMFADD TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
2748
The following operation is performed atomically:
2752
dst_x = resource[offset]
2754
resource[offset] = dst_x + src_x
2726
2757
.. opcode:: ATOMXCHG - Atomic exchange
2728
Syntax: ``ATOMXCHG dst, resource, offset, src``
2730
Example: ``ATOMXCHG TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
2732
The following operation is performed atomically:
2736
dst_x = resource[offset]
2738
resource[offset] = src_x
2759
Syntax: ``ATOMXCHG dst, resource, offset, src``
2761
Example: ``ATOMXCHG TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
2763
The following operation is performed atomically:
2767
dst_x = resource[offset]
2769
resource[offset] = src_x
2741
2772
.. opcode:: ATOMCAS - Atomic compare-and-exchange
2743
Syntax: ``ATOMCAS dst, resource, offset, cmp, src``
2745
Example: ``ATOMCAS TEMP[0], BUFFER[0], TEMP[1], TEMP[2], TEMP[3]``
2747
The following operation is performed atomically:
2751
dst_x = resource[offset]
2753
resource[offset] = (dst_x == cmp_x ? src_x : dst_x)
2774
Syntax: ``ATOMCAS dst, resource, offset, cmp, src``
2776
Example: ``ATOMCAS TEMP[0], BUFFER[0], TEMP[1], TEMP[2], TEMP[3]``
2778
The following operation is performed atomically:
2782
dst_x = resource[offset]
2784
resource[offset] = (dst_x == cmp_x ? src_x : dst_x)
2756
2787
.. opcode:: ATOMAND - Atomic bitwise And
2758
Syntax: ``ATOMAND dst, resource, offset, src``
2760
Example: ``ATOMAND TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
2762
The following operation is performed atomically:
2766
dst_x = resource[offset]
2768
resource[offset] = dst_x \& src_x
2789
Syntax: ``ATOMAND dst, resource, offset, src``
2791
Example: ``ATOMAND TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
2793
The following operation is performed atomically:
2797
dst_x = resource[offset]
2799
resource[offset] = dst_x \& src_x
2771
2802
.. opcode:: ATOMOR - Atomic bitwise Or
2773
Syntax: ``ATOMOR dst, resource, offset, src``
2775
Example: ``ATOMOR TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
2777
The following operation is performed atomically:
2781
dst_x = resource[offset]
2783
resource[offset] = dst_x | src_x
2804
Syntax: ``ATOMOR dst, resource, offset, src``
2806
Example: ``ATOMOR TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
2808
The following operation is performed atomically:
2812
dst_x = resource[offset]
2814
resource[offset] = dst_x | src_x
2786
2817
.. opcode:: ATOMXOR - Atomic bitwise Xor
2788
Syntax: ``ATOMXOR dst, resource, offset, src``
2790
Example: ``ATOMXOR TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
2792
The following operation is performed atomically:
2796
dst_x = resource[offset]
2798
resource[offset] = dst_x \oplus src_x
2819
Syntax: ``ATOMXOR dst, resource, offset, src``
2821
Example: ``ATOMXOR TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
2823
The following operation is performed atomically:
2827
dst_x = resource[offset]
2829
resource[offset] = dst_x \oplus src_x
2801
2832
.. opcode:: ATOMUMIN - Atomic unsigned minimum
2803
Syntax: ``ATOMUMIN dst, resource, offset, src``
2805
Example: ``ATOMUMIN TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
2807
The following operation is performed atomically:
2811
dst_x = resource[offset]
2813
resource[offset] = (dst_x < src_x ? dst_x : src_x)
2834
Syntax: ``ATOMUMIN dst, resource, offset, src``
2836
Example: ``ATOMUMIN TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
2838
The following operation is performed atomically:
2842
dst_x = resource[offset]
2844
resource[offset] = (dst_x < src_x ? dst_x : src_x)
2816
2847
.. opcode:: ATOMUMAX - Atomic unsigned maximum
2818
Syntax: ``ATOMUMAX dst, resource, offset, src``
2820
Example: ``ATOMUMAX TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
2822
The following operation is performed atomically:
2826
dst_x = resource[offset]
2828
resource[offset] = (dst_x > src_x ? dst_x : src_x)
2849
Syntax: ``ATOMUMAX dst, resource, offset, src``
2851
Example: ``ATOMUMAX TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
2853
The following operation is performed atomically:
2857
dst_x = resource[offset]
2859
resource[offset] = (dst_x > src_x ? dst_x : src_x)
2831
2862
.. opcode:: ATOMIMIN - Atomic signed minimum
2833
Syntax: ``ATOMIMIN dst, resource, offset, src``
2835
Example: ``ATOMIMIN TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
2837
The following operation is performed atomically:
2841
dst_x = resource[offset]
2843
resource[offset] = (dst_x < src_x ? dst_x : src_x)
2864
Syntax: ``ATOMIMIN dst, resource, offset, src``
2866
Example: ``ATOMIMIN TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
2868
The following operation is performed atomically:
2872
dst_x = resource[offset]
2874
resource[offset] = (dst_x < src_x ? dst_x : src_x)
2846
2877
.. opcode:: ATOMIMAX - Atomic signed maximum
2848
Syntax: ``ATOMIMAX dst, resource, offset, src``
2850
Example: ``ATOMIMAX TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
2852
The following operation is performed atomically:
2856
dst_x = resource[offset]
2858
resource[offset] = (dst_x > src_x ? dst_x : src_x)
2879
Syntax: ``ATOMIMAX dst, resource, offset, src``
2881
Example: ``ATOMIMAX TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
2883
The following operation is performed atomically:
2887
dst_x = resource[offset]
2889
resource[offset] = (dst_x > src_x ? dst_x : src_x)
2861
2892
.. opcode:: ATOMINC_WRAP - Atomic increment + wrap around
2863
Syntax: ``ATOMINC_WRAP dst, resource, offset, src``
2865
Example: ``ATOMINC_WRAP TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
2867
The following operation is performed atomically:
2871
dst_x = resource[offset] + 1
2873
resource[offset] = dst_x <= src_x ? dst_x : 0
2894
Syntax: ``ATOMINC_WRAP dst, resource, offset, src``
2896
Example: ``ATOMINC_WRAP TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
2898
The following operation is performed atomically:
2902
dst_x = resource[offset] + 1
2904
resource[offset] = dst_x <= src_x ? dst_x : 0
2876
2907
.. opcode:: ATOMDEC_WRAP - Atomic decrement + wrap around
2878
Syntax: ``ATOMDEC_WRAP dst, resource, offset, src``
2880
Example: ``ATOMDEC_WRAP TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
2882
The following operation is performed atomically:
2886
dst_x = resource[offset]
2888
resource[offset] = (dst_x > 0 && dst_x < src_x) ? dst_x - 1 : 0
2909
Syntax: ``ATOMDEC_WRAP dst, resource, offset, src``
2911
Example: ``ATOMDEC_WRAP TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
2913
The following operation is performed atomically:
2917
dst_x = resource[offset]
2921
\begin{array}{ c l }
2922
dst_x - 1 & \quad \textrm{if } dst_x \gt 0 \textrm{ and } dst_x \lt src_x \\
2923
0 & \quad \textrm{otherwise}
2891
2927
.. _interlaneopcodes: