2
* Copyright 2007 Nouveau Project
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
11
* The above copyright notice and this permission notice shall be included in
12
* all copies or substantial portions of the Software.
14
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17
* THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19
* OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24
#include "nv30_shaders.h"
27
NV30_LoadFragProg(ScrnInfoPtr pScrn, nv_shader_t *shader)
29
NVPtr pNv = NVPTR(pScrn);
30
static struct nouveau_bo *fp_mem = NULL;
31
static int next_hw_id_offset = 0;
34
if (nouveau_bo_new(pNv->dev, NOUVEAU_BO_VRAM | NOUVEAU_BO_PIN,
35
0, 0x1000, &fp_mem)) {
36
xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
37
"Couldn't alloc fragprog buffer!\n");
41
if (nouveau_bo_map(fp_mem, NOUVEAU_BO_RDWR)) {
42
xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
43
"Couldn't map fragprog buffer!\n");
48
uint32_t *map = fp_mem->map + next_hw_id_offset;
51
for (i = 0; i < shader->size; i++) {
52
uint32_t data = shader->data[i];
53
#if (X_BYTE_ORDER != X_LITTLE_ENDIAN)
54
data = ((data >> 16) | ((data & 0xffff) << 16));
59
shader->hw_id += next_hw_id_offset;
60
next_hw_id_offset += (shader->size * sizeof(uint32_t));
61
next_hw_id_offset = (next_hw_id_offset + 63) & ~63;
64
BEGIN_RING(Nv3D, NV34TCL_FP_ACTIVE_PROGRAM, 1);
65
OUT_RELOC (fp_mem, shader->hw_id, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
66
NOUVEAU_BO_RD | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
67
NV34TCL_FP_ACTIVE_PROGRAM_DMA0, NV34TCL_FP_ACTIVE_PROGRAM_DMA1);
69
BEGIN_RING(Nv3D, NV34TCL_FP_REG_CONTROL, 1);
70
OUT_RING ((1 << 16)| 0xf);
71
BEGIN_RING(Nv3D, NV34TCL_MULTISAMPLE_CONTROL, 1);
72
OUT_RING (0xffff0000);
74
BEGIN_RING(Nv3D,NV34TCL_FP_CONTROL,1);
75
OUT_RING ((shader->card_priv.NV30FP.num_regs-1)/2);
81
NV40_LoadVtxProg(ScrnInfoPtr pScrn, nv_shader_t *shader)
83
NVPtr pNv = NVPTR(pScrn);
84
static int next_hw_id = 0;
88
shader->hw_id = next_hw_id;
90
BEGIN_RING(Nv3D, NV40TCL_VP_UPLOAD_FROM_ID, 1);
91
OUT_RING ((shader->hw_id));
92
for (i=0; i<shader->size; i+=4) {
93
BEGIN_RING(Nv3D, NV40TCL_VP_UPLOAD_INST(0), 4);
94
OUT_RING (shader->data[i + 0]);
95
OUT_RING (shader->data[i + 1]);
96
OUT_RING (shader->data[i + 2]);
97
OUT_RING (shader->data[i + 3]);
102
BEGIN_RING(Nv3D, NV40TCL_VP_START_FROM_ID, 1);
103
OUT_RING ((shader->hw_id));
105
BEGIN_RING(Nv3D, NV40TCL_VP_ATTRIB_EN, 2);
106
OUT_RING (shader->card_priv.NV30VP.vp_in_reg);
107
OUT_RING (shader->card_priv.NV30VP.vp_out_reg);
111
NV40_LoadFragProg(ScrnInfoPtr pScrn, nv_shader_t *shader)
113
NVPtr pNv = NVPTR(pScrn);
114
static struct nouveau_bo *fp_mem = NULL;
115
static int next_hw_id_offset = 0;
118
if (nouveau_bo_new(pNv->dev, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART,
119
0, 0x1000, &fp_mem)) {
120
xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
121
"Couldn't alloc fragprog buffer!\n");
125
if (nouveau_bo_map(fp_mem, NOUVEAU_BO_RDWR)) {
126
xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
127
"Couldn't map fragprog buffer!\n");
131
if (!shader->hw_id) {
132
uint32_t *map = fp_mem->map + next_hw_id_offset;
135
for (i = 0; i < shader->size; i++) {
136
uint32_t data = shader->data[i];
137
#if (X_BYTE_ORDER != X_LITTLE_ENDIAN)
138
data = ((data >> 16) | ((data & 0xffff) << 16));
143
shader->hw_id = next_hw_id_offset;
144
next_hw_id_offset += (shader->size * sizeof(uint32_t));
145
next_hw_id_offset = (next_hw_id_offset + 63) & ~63;
148
BEGIN_RING(Nv3D, NV40TCL_FP_ADDRESS, 1);
149
OUT_RELOC (fp_mem, shader->hw_id, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
150
NOUVEAU_BO_RD | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
151
NV40TCL_FP_ADDRESS_DMA0, NV40TCL_FP_ADDRESS_DMA1);
152
BEGIN_RING(Nv3D, NV40TCL_FP_CONTROL, 1);
153
OUT_RING (shader->card_priv.NV30FP.num_regs <<
154
NV40TCL_FP_CONTROL_TEMP_COUNT_SHIFT);
157
/*******************************************************************************
158
* NV40/G70 vertex shaders
161
nv_shader_t nv40_vp_exa_render = {
162
.card_priv.NV30VP.vp_in_reg = 0x00000309,
163
.card_priv.NV30VP.vp_out_reg = 0x0000c001,
166
/* MOV result.position, vertex.position */
167
0x40041c6c, 0x0040000d, 0x8106c083, 0x6041ff80,
168
/* MOV result.texcoord[0], vertex.texcoord[0] */
169
0x401f9c6c, 0x0040080d, 0x8106c083, 0x6041ff9c,
170
/* MOV result.texcoord[1], vertex.texcoord[1] */
171
0x401f9c6c, 0x0040090d, 0x8106c083, 0x6041ffa1,
175
/*******************************************************************************
176
* NV30/NV40/G70 fragment shaders
179
nv_shader_t nv30_fp_pass_col0 = {
180
.card_priv.NV30FP.num_regs = 2,
183
/* MOV R0, fragment.color */
184
0x01403e81, 0x1c9dc801, 0x0001c800, 0x3fe1c800,
188
nv_shader_t nv30_fp_pass_tex0 = {
189
.card_priv.NV30FP.num_regs = 2,
192
/* TEX R0, fragment.texcoord[0], texture[0], 2D */
193
0x17009e00, 0x1c9dc801, 0x0001c800, 0x3fe1c800,
195
0x01401e81, 0x1c9dc800, 0x0001c800, 0x0001c800,
199
nv_shader_t nv30_fp_composite_mask = {
200
.card_priv.NV30FP.num_regs = 2,
203
/* TEXC0 R1.w , fragment.texcoord[1], texture[1], 2D */
204
0x1702b102, 0x1c9dc801, 0x0001c800, 0x3fe1c800,
205
/* TEX R0 (NE0.wwww), fragment.texcoord[0], texture[0], 2D */
206
0x17009e00, 0x1ff5c801, 0x0001c800, 0x3fe1c800,
207
/* MUL R0 , R0, R1.w */
208
0x02001e81, 0x1c9dc800, 0x0001fe04, 0x0001c800,
212
nv_shader_t nv30_fp_composite_mask_sa_ca = {
213
.card_priv.NV30FP.num_regs = 2,
216
/* TEXC0 R1.w , fragment.texcoord[0], texture[0], 2D */
217
0x17009102, 0x1c9dc801, 0x0001c800, 0x3fe1c800,
218
/* TEX R0 (NE0.wwww), fragment.texcoord[1], texture[1], 2D */
219
0x1702be00, 0x1ff5c801, 0x0001c800, 0x3fe1c800,
220
/* MUL R0 , R1,wwww, R0 */
221
0x02001e81, 0x1c9dfe04, 0x0001c800, 0x0001c800,
225
nv_shader_t nv30_fp_composite_mask_ca = {
226
.card_priv.NV30FP.num_regs = 2,
229
/* TEXC0 R0 , fragment.texcoord[0], texture[0], 2D */
230
0x17009f00, 0x1c9dc801, 0x0001c800, 0x3fe1c800,
231
/* TEX R1 (NE0.xyzw), fragment.texcoord[1], texture[1], 2D */
232
0x1702be02, 0x1c95c801, 0x0001c800, 0x3fe1c800,
233
/* MUL R0 , R0, R1 */
234
0x02001e81, 0x1c9dc800, 0x0001c804, 0x0001c800,
238
nv_shader_t nv40_vp_video = {
239
.card_priv.NV30VP.vp_in_reg = 0x00000309,
240
.card_priv.NV30VP.vp_out_reg = 0x0000c001,
243
/* MOV result.position, vertex.position */
244
0x40041c6c, 0x0040000d, 0x8106c083, 0x6041ff80,
245
/* MOV result.texcoord[0], vertex.texcoord[0] */
246
0x401f9c6c, 0x0040080d, 0x8106c083, 0x6041ff9c,
247
/* MOV result.texcoord[1], vertex.texcoord[1] */
248
0x401f9c6c, 0x0040090d, 0x8106c083, 0x6041ffa1,
252
nv_shader_t nv40_fp_yv12_bicubic = {
253
.card_priv.NV30FP.num_regs = 4,
256
/* INST 0: MOVR R0.xy (TR0.xyzw), attrib.texcoord[0] */
257
0x01008600, 0x1c9dc801, 0x0001c800, 0x3fe1c800,
258
/* INST 1: ADDR R0.z (TR0.xyzw), R0.yyyy, { 0.50, 0.00, 0.00, 0.00 }.xxxx */
259
0x03000800, 0x1c9caa00, 0x00000002, 0x0001c800,
260
0x3f000000, 0x00000000, 0x00000000, 0x00000000,
261
/* INST 2: ADDR R1.x (TR0.xyzw), R0, { 0.50, 0.00, 0.00, 0.00 }.xxxx */
262
0x03000202, 0x1c9dc800, 0x00000002, 0x0001c800,
263
0x3f000000, 0x00000000, 0x00000000, 0x00000000,
264
/* INST 3: TEXRC0 R1.xyz (TR0.xyzw), R0.zzzz, texture[0] */
265
0x17000f82, 0x1c9d5400, 0x0001c800, 0x0001c800,
266
/* INST 4: MULR R2.yw (TR0.xyzw), R1.xxyy, { -1.00, 1.00, 0.00, 0.00 }.xxyy */
267
0x02001404, 0x1c9ca104, 0x0000a002, 0x0001c800,
268
0xbf800000, 0x3f800000, 0x00000000, 0x00000000,
269
/* INST 5: TEXR R3.xyz (TR0.xyzw), R1, texture[0] */
270
0x17000e86, 0x1c9dc804, 0x0001c800, 0x0001c800,
271
/* INST 6: MULR R2.xz (TR0.xyzw), R3.xxyy, { -1.00, 1.00, 0.00, 0.00 }.xxyy */
272
0x02000a04, 0x1c9ca10c, 0x0000a002, 0x0001c800,
273
0xbf800000, 0x3f800000, 0x00000000, 0x00000000,
274
/* INST 7: ADDR R2 (TR0.xyzw), R0.xyxy, R2 */
275
0x03001e04, 0x1c9c8800, 0x0001c808, 0x0001c800,
276
/* INST 8: TEXR R1.y (TR0.xyzw), R2.zwzz, -texture[1] */
277
0x17020402, 0x1c9d5c08, 0x0001c800, 0x0001c800,
278
/* INST 9: MADH R1.x (TR0.xyzw), -R1.zzzz, R1.yyyy, R1.yyyy */
279
0x04400282, 0x1c9f5504, 0x0000aa04, 0x0000aa04,
280
/* INST 10: TEXR R0.y (TR0.xyzw), R2.xwxw, -texture[1] */
281
0x17020400, 0x1c9d9808, 0x0001c800, 0x0001c800,
282
/* INST 11: MADH R0.w (TR0.xyzw), -R1.zzzz, R0.yyyy, R0.yyyy */
283
0x04401080, 0x1c9f5504, 0x0000aa00, 0x0000aa00,
284
/* INST 12: TEXR R0.x (TR0.xyzw), R2.zyxy, texture[1] */
285
0x17020200, 0x1c9c8c08, 0x0001c800, 0x0001c800,
286
/* INST 13: MADH R1.x (TR0.xyzw), R1.zzzz, R0, R1 */
287
0x04400282, 0x1c9d5504, 0x0001c800, 0x0001c904,
288
/* INST 14: TEXR R0.x (NE0.zzzz), R2, texture[1] */
289
0x17020200, 0x1555c808, 0x0001c800, 0x0001c800,
290
/* INST 15: MADH R0.x (TR0.xyzw), R1.zzzz, R0, R0.wwww */
291
0x04400280, 0x1c9d5504, 0x0001c800, 0x0001ff00,
292
/* INST 16: MADH R0.w (TR0.xyzw), -R3.zzzz, R1.xxxx, R1.xxxx */
293
0x04401080, 0x1c9f550c, 0x00000104, 0x00000104,
294
/* INST 17: TEXR R0.yz (TR0.xyzw), attrib.texcoord[1], abs(texture[2]) */
295
0x1704ac80, 0x1c9dc801, 0x0001c800, 0x3fe1c800,
296
/* INST 18: MADH R0.x (TR0.xyzw), R3.zzzz, R0, R0.wwww */
297
0x04400280, 0x1c9d550c, 0x0001c900, 0x0001ff00,
298
/* INST 19: MADH R1.xyz (TR0.xyzw), R0.xxxx, { 1.16, -0.87, 0.53, -1.08 }.xxxx, { 1.16, -0.87, 0.53, -1.08 }.yzww */
299
0x04400e82, 0x1c9c0100, 0x00000002, 0x0001f202,
300
0x3f9507c8, 0xbf5ee393, 0x3f078fef, 0xbf8a6762,
301
/* INST 20: MADH R1.xyz (TR0.xyzw), R0.yyyy, { 0.00, -0.39, 2.02, 0.00 }, R1 */
302
0x04400e82, 0x1c9cab00, 0x0001c802, 0x0001c904,
303
0x00000000, 0xbec890d6, 0x40011687, 0x00000000,
304
/* INST 21: MADH R0.xyz (TR0.xyzw), R0.zzzz, { 1.60, -0.81, 0.00, 0.00 }, R1 + END */
305
0x04400e81, 0x1c9d5500, 0x0001c802, 0x0001c904,
306
0x3fcc432d, 0xbf501a37, 0x00000000, 0x00000000,
310
nv_shader_t nv30_fp_yv12_bicubic = {
311
.card_priv.NV30FP.num_regs = 4,
314
/* INST 0: MOVR R2.xy (TR0.xyzw), attrib.texcoord[0] */
315
0x01008604, 0x1c9dc801, 0x0001c800, 0x0001c800,
316
/* INST 1: ADDR R0.xy (TR0.xyzw), R2, { 0.50, 0.00, 0.00, 0.00 }.xxxx */
317
0x03000600, 0x1c9dc808, 0x00000002, 0x0001c800,
318
0x3f000000, 0x00000000, 0x00000000, 0x00000000,
319
/* INST 2: TEXR R3.xyz (TR0.xyzw), R0, texture[0] */
320
0x17000e06, 0x1c9dc800, 0x0001c800, 0x0001c800,
321
/* INST 3: TEXR R0.xyz (TR0.xyzw), R0.yyyy, texture[0] */
322
0x17000e00, 0x1c9caa00, 0x0001c800, 0x0001c800,
323
/* INST 4: MULR R1.xz (TR0.xyzw), R3.xxyy, { -1.00, 1.00, 0.00, 0.00 }.xxyy */
324
0x02000a02, 0x1c9ca00c, 0x0000a002, 0x0001c800,
325
0xbf800000, 0x3f800000, 0x00000000, 0x00000000,
326
/* INST 5: MULR R1.yw (TR0.xyzw), R0.xxyy, { -1.00, 1.00, 0.00, 0.00 }.xxyy */
327
0x02001402, 0x1c9ca000, 0x0000a002, 0x0001c800,
328
0xbf800000, 0x3f800000, 0x00000000, 0x00000000,
329
/* INST 6: ADDR R2 (TR0.xyzw), R2.xyxy, R1 */
330
0x03001e04, 0x1c9c8808, 0x0001c804, 0x0001c800,
331
/* INST 7: TEXR R0.x (TR0.xyzw), R2, texture[1] */
332
0x17020200, 0x1c9dc808, 0x0001c800, 0x0001c800,
333
/* INST 8: TEXR R1.y (TR0.xyzw), R2.xwxw, texture[1] */
334
0x17020402, 0x1c9d9808, 0x0001c800, 0x0001c800,
335
/* INST 9: TEXR R1.x (TR0.xyzw), R2.zyxy, texture[1] */
336
0x17020202, 0x1c9c8c08, 0x0001c800, 0x0001c800,
337
/* INST 10: LRPH R0.x (TR0.xyzw), R0.zzzz, R0, R1.yyyy */
338
0x1f400280, 0x1c9d5400, 0x0001c800, 0x0000aa04,
339
/* INST 11: TEXR R0.y (TR0.xyzw), R2.zwzz, texture[1] */
340
0x17020400, 0x1c9d5c08, 0x0001c800, 0x0001c800,
341
/* INST 12: LRPH R0.y (TR0.xyzw), R0.zzzz, R1.xxxx, R0 */
342
0x1f400480, 0x1c9d5400, 0x00000004, 0x0001c800,
343
/* INST 13: LRPH R0.x (TR0.xyzw), R3.zzzz, R0, R0.yyyy */
344
0x1f400280, 0x1c9d540c, 0x0001c900, 0x0000ab00,
345
/* INST 14: MADH R0.xyz (TR0.xyzw), R0.xxxx, { 1.16, -0.87, 0.53, -1.08 }.xxxx, { 1.16, -0.87, 0.53, -1.08 }.yzww */
346
0x04400e80, 0x1c9c0100, 0x00000002, 0x0001f202,
347
0x3f9507c8, 0xbf5ee393, 0x3f078fef, 0xbf8a6762,
348
/* INST 15: TEXR R1.yz (TR0.xyzw), attrib.texcoord[1], abs(texture[2]) */
349
0x1704ac02, 0x1c9dc801, 0x0001c800, 0x0001c800,
350
/* INST 16: MADH R0.xyz (TR0.xyzw), R1.yyyy, { 0.00, -0.39, 2.02, 0.00 }, R0 */
351
0x04400e80, 0x1c9caa04, 0x0001c802, 0x0001c900,
352
0x00000000, 0xbec890d6, 0x40011687, 0x00000000,
353
/* INST 17: MADH R0.xyz (TR0.xyzw), R1.zzzz, { 1.60, -0.81, 0.00, 0.00 }, R0 + END */
354
0x04400e81, 0x1c9d5404, 0x0001c802, 0x0001c900,
355
0x3fcc432d, 0xbf501a37, 0x00000000, 0x00000000,
359
nv_shader_t nv30_fp_yv12_bilinear = {
360
.card_priv.NV30FP.num_regs = 2,
363
/* INST 0: TEXR R0.x (TR0.xyzw), attrib.texcoord[0], abs(texture[1]) */
364
0x17028200, 0x1c9dc801, 0x0001c800, 0x3fe1c800,
365
/* INST 1: MADR R1.xyz (TR0.xyzw), R0.xxxx, { 1.16, -0.87, 0.53, -1.08 }.xxxx, { 1.16, -0.87, 0.53, -1.08 }.yzww */
366
0x04000e02, 0x1c9c0000, 0x00000002, 0x0001f202,
367
0x3f9507c8, 0xbf5ee393, 0x3f078fef, 0xbf8a6762,
368
/* INST 2: TEXR R0.yz (TR0.xyzw), attrib.texcoord[1], abs(texture[2]) */
369
0x1704ac80, 0x1c9dc801, 0x0001c800, 0x3fe1c800,
370
/* INST 3: MADR R1.xyz (TR0.xyzw), R0.yyyy, { 0.00, -0.39, 2.02, 0.00 }, R1 */
371
0x04000e02, 0x1c9cab00, 0x0001c802, 0x0001c804,
372
0x00000000, 0xbec890d6, 0x40011687, 0x00000000,
373
/* INST 4: MADR R0.xyz (TR0.xyzw), R0.zzzz, { 1.60, -0.81, 0.00, 0.00 }, R1 + END */
374
0x04000e81, 0x1c9d5500, 0x0001c802, 0x0001c804,
375
0x3fcc432d, 0xbf501a37, 0x00000000, 0x00000000,