~ubuntu-branches/ubuntu/jaunty/mesa/jaunty

Viewing changes to src/mesa/drivers/dri/i965/brw_wm_glsl.c

Committer: Bazaar Package Importer
Author(s): Timo Aaltonen
Date: 2009-01-23 10:20:24 UTC
mfrom: (1.2.14 upstream)
Revision ID: james.westby@ubuntu.com-20090123102024-1f3kmb3aea7wzk67

Tags: 7.3~rc3-1ubuntu1

* Merge with Debian experimental.
* Drop 102_dont_vblank.patch, since the new drm code in the kernel
fixes the bugs that it worked around.
* Bump the build-dependency of libdrm to 2.4.4. It's the first version
with necessary changes to build this.

files added:
Makefile~

aclocal.m4

configs/default~

configs/linux-debug~

configs/linux-dri~

configure

docs/README.MINGW32.orig

docs/memory.html

docs/relnotes-7.2.html

docs/relnotes-7.3.html

progs/demos/bug17404.c

progs/demos/gltc.h

progs/demos/rain.c

progs/demos/readtex.c

progs/demos/readtex.h

progs/demos/showbuffer.c

progs/demos/showbuffer.h

progs/demos/trackball.c

progs/demos/trackball.h

progs/demos/unzoom.c

progs/demos/viewdds.c

progs/glsl/Makefile~

progs/glsl/multinoise.c

progs/glsl/samplers.c

progs/glsl/shader_1.vert

progs/glsl/shader_2.frag

progs/glsl/shaderutil.c

progs/glsl/shadow_sampler.c

progs/glsl/shtest.c

progs/glsl/vert-or-frag-only.c

progs/osdemos/readtex.c

progs/redbook/checker2.c

progs/samples/logo2.c

progs/samples/readtex.c

progs/xdemos/Makefile.orig

progs/xdemos/glthreads-tex.c

progs/xdemos/multictx.c

src/glut/glx/Makefile.mgw.orig

src/glut/glx/Makefile~

src/glut/glx/depend

src/glut/glx/glut_ppm.c

src/glw/depend

src/mesa/depend

src/mesa/drivers/allegro/amesa.h

src/mesa/drivers/dri/r200/radeon_chipset.h

src/mesa/drivers/dri/r200/radeon_screen.c

src/mesa/drivers/dri/r200/radeon_screen.h

src/mesa/drivers/dri/r300/radeon_chipset.h

src/mesa/drivers/dri/r300/radeon_screen.c

src/mesa/drivers/dri/r300/radeon_screen.h

src/mesa/drivers/dri/r300/radeon_span.h

src/mesa/drivers/x11/fxmesa.h

src/mesa/drivers/x11/xmesa.h

src/mesa/drivers/x11/xmesa_x.h

src/mesa/drivers/x11/xmesa_xf86.h

src/mesa/main/dlopen.c

src/mesa/main/dlopen.h

src/mesa/main/mcompiler.h

src/mesa/osmesa.pc.in

src/mesa/shader/prog_noise.c

src/mesa/shader/prog_noise.h

src/mesa/shader/shader_debug.c

windows/VC8/mesa/gdi/gdi_staticCRT.vcproj

windows/VC8/mesa/glu/glu_staticCRT.vcproj

windows/VC8/mesa/mesa/dosfile

windows/VC8/mesa/mesa/dosfile2

windows/VC8/mesa/mesa/mesa_staticCRT.vcproj

windows/VC8/mesa/mesa_staticCRT.sln

windows/VC8/mesa/osmesa/osmesa_staticCRT.vcproj

windows/VC8/progs/demos/gears_staticCRT.vcproj

windows/VC8/progs/glut/glut_staticCRT.vcproj

windows/VC8/progs/progs_staticCRT.sln

files removed:
.gitattributes

configs/autoconf

configs/debian-default

configs/debian-dri-amd64

configs/debian-dri-any

configs/debian-dri-default

configs/debian-dri-i386

configs/debian-dri-i386-i686

configs/debian-dri-optimized-default

configs/debian-indirect-default

configs/debian-indirect-hurd-i386

configs/debian-osmesa

configs/debian-osmesa-default

configs/debian-osmesa-static

configs/debian-osmesa16

configs/debian-osmesa16-static

configs/debian-osmesa32

configs/debian-osmesa32-static

configs/debian-swx11+glu-alpha-ev5

configs/debian-swx11+glu-amd64

configs/debian-swx11+glu-any

configs/debian-swx11+glu-default

configs/debian-swx11+glu-i386

configs/debian-swx11+glu-i386-i686

configs/debian-swx11+glu-powerpc-603

configs/debian-swx11+glu-ppc64

configs/debian-swx11+glu-sparc

configs/debian-swx11+glu-sparc-ultrasparc

configs/debian-swx11+glu-static-amd64

configs/debian-swx11+glu-static-any

configs/debian-swx11+glu-static-i386

configs/debian-swx11+glu-static-ppc64

configs/default-bp

configs/linux-osmesa-static

configs/linux-osmesa32-static

debian/patches/102_dont_vblank.patch

debian/scripts/install-source.sh

docs/gears.png

include/GL/amesa.h

include/GL/fxmesa.h

include/GL/uglglutshapes.h

include/GL/uglmesa.h

include/GL/xmesa.h

include/GL/xmesa_x.h

include/GL/xmesa_xf86.h

include/GLES

include/GLES/egl.h

include/GLES/egltypes.h

include/GLView.h

progs/directfb

progs/directfb/Makefile

progs/directfb/df_gears.c

progs/directfb/df_morph3d.c

progs/directfb/df_reflect.c

progs/directfb/multi_window.c

progs/egl

progs/egl/Makefile

progs/egl/demo1.c

progs/egl/demo2.c

progs/egl/demo3.c

progs/egl/eglgears.c

progs/egl/eglinfo.c

progs/fp

progs/fp/Makefile

progs/fp/point-position.c

progs/fp/tri-abs.c

progs/fp/tri-add.c

progs/fp/tri-cmp.c

progs/fp/tri-cos.c

progs/fp/tri-depth.c

progs/fp/tri-depth2.c

progs/fp/tri-depthwrite.c

progs/fp/tri-depthwrite2.c

progs/fp/tri-dp3.c

progs/fp/tri-dp4.c

progs/fp/tri-dph.c

progs/fp/tri-dst.c

progs/fp/tri-ex2.c

progs/fp/tri-flr.c

progs/fp/tri-fp.c

progs/fp/tri-frc.c

progs/fp/tri-inv.c

progs/fp/tri-kil.c

progs/fp/tri-lg2.c

progs/fp/tri-lit.c

progs/fp/tri-lrp.c

progs/fp/tri-mad.c

progs/fp/tri-max.c

progs/fp/tri-min.c

progs/fp/tri-mov.c

progs/fp/tri-mul.c

progs/fp/tri-param.c

progs/fp/tri-position.c

progs/fp/tri-pow.c

progs/fp/tri-rcp.c

progs/fp/tri-rsq.c

progs/fp/tri-scs.c

progs/fp/tri-sge.c

progs/fp/tri-sge2.c

progs/fp/tri-sin.c

progs/fp/tri-slt.c

progs/fp/tri-sub.c

progs/fp/tri-swz.c

progs/fp/tri-swz2.c

progs/fp/tri-tex.c

progs/fp/tri-xpd.c

progs/miniglx

progs/miniglx/Makefile

progs/miniglx/glfbdevtest.c

progs/miniglx/manytex.c

progs/miniglx/miniglxsample.c

progs/miniglx/miniglxtest.c

progs/miniglx/sample_server.c

progs/miniglx/sample_server2.c

progs/miniglx/texline.c

src/glut/os2/src-glut_os2pm.zip

src/glx/x11/depend

src/mesa/drivers/dri/i965/brw_vs_tnl.c

src/mesa/drivers/windows/gldirect/dx7

src/mesa/drivers/windows/gldirect/dx7/gld_driver_dx7.c

src/mesa/drivers/windows/gldirect/dx7/gld_dx7.h

src/mesa/drivers/windows/gldirect/dx7/gld_dxerr7.h

src/mesa/drivers/windows/gldirect/dx7/gld_ext_dx7.c

src/mesa/drivers/windows/gldirect/dx7/gld_pipeline_dx7.c

src/mesa/drivers/windows/gldirect/dx7/gld_primitive_dx7.c

src/mesa/drivers/windows/gldirect/dx7/gld_texture_dx7.c

src/mesa/drivers/windows/gldirect/dx7/gld_vb_d3d_render_dx7.c

src/mesa/drivers/windows/gldirect/dx7/gld_vb_mesa_render_dx7.c

src/mesa/drivers/windows/gldirect/dx7/gld_wgl_dx7.c

src/mesa/drivers/windows/gldirect/dx8

src/mesa/drivers/windows/gldirect/dx8/gld_driver_dx8.c

src/mesa/drivers/windows/gldirect/dx8/gld_dx8.h

src/mesa/drivers/windows/gldirect/dx8/gld_dxerr8.h

src/mesa/drivers/windows/gldirect/dx8/gld_ext_dx8.c

src/mesa/drivers/windows/gldirect/dx8/gld_pipeline_dx8.c

src/mesa/drivers/windows/gldirect/dx8/gld_primitive_dx8.c

src/mesa/drivers/windows/gldirect/dx8/gld_texture_dx8.c

src/mesa/drivers/windows/gldirect/dx8/gld_vb_d3d_render_dx8.c

src/mesa/drivers/windows/gldirect/dx8/gld_vb_mesa_render_dx8.c

src/mesa/drivers/windows/gldirect/dx8/gld_wgl_dx8.c

src/mesa/drivers/windows/gldirect/dx9

src/mesa/drivers/windows/gldirect/dx9/gld_driver_dx9.c

src/mesa/drivers/windows/gldirect/dx9/gld_dx9.h

src/mesa/drivers/windows/gldirect/dx9/gld_dxerr9.h

src/mesa/drivers/windows/gldirect/dx9/gld_ext_dx9.c

src/mesa/drivers/windows/gldirect/dx9/gld_pipeline_dx9.c

src/mesa/drivers/windows/gldirect/dx9/gld_primitive_dx9.c

src/mesa/drivers/windows/gldirect/dx9/gld_texture_dx9.c

src/mesa/drivers/windows/gldirect/dx9/gld_vb_d3d_render_dx9.c

src/mesa/drivers/windows/gldirect/dx9/gld_vb_mesa_render_dx9.c

src/mesa/drivers/windows/gldirect/dx9/gld_wgl_dx9.c

src/mesa/shader/slang/slang_library_noise.c

src/mesa/shader/slang/slang_library_noise.h

files modified:
Makefile

autogen.sh *

bin/confdiff.sh *

bin/installmesa *

configs/autoconf.in

configs/default

configure.ac

debian/changelog

debian/control

debian/copyright

debian/patches/03_optional-progs-and-install.patch

debian/patches/04_osmesa_version.diff

debian/patches/series

debian/rules

docs/MESA_copy_sub_buffer.spec

docs/cell.html

docs/contents.html

docs/download.html

docs/install.html

docs/news.html

docs/relnotes.html

docs/shading.html

include/GL/Makefile.am

include/GL/glext.h

include/GL/glxext.h

include/GL/internal/dri_interface.h

progs/demos/arbocclude.c

progs/glsl/Makefile

progs/glsl/noise.c

progs/glsl/twoside.c

progs/util/shaderutil.c

src/glu/Makefile

src/glu/glu.pc.in

src/glu/sgi/glu.exports

src/glut/glx/Makefile

src/glut/glx/glut.pc.in

src/glut/glx/glut_init.c

src/glut/glx/glut_swap.c

src/glut/glx/glutint.h

src/glut/mini/Makefile

src/glut/mini/glut.pc.in

src/glw/Makefile

src/glw/glw.pc.in

src/glx/x11/drisw_glx.c

src/glx/x11/glx_pbuffer.c

src/glx/x11/glxcmds.c

src/glx/x11/glxcurrent.c

src/glx/x11/indirect.c

src/mesa/Makefile

src/mesa/drivers/allegro/amesa.c

src/mesa/drivers/dri/Makefile

src/mesa/drivers/dri/common/dri_util.c

src/mesa/drivers/dri/common/dri_util.h

src/mesa/drivers/dri/common/vblank.c

src/mesa/drivers/dri/dri.pc.in

src/mesa/drivers/dri/i915/i830_vtbl.c

src/mesa/drivers/dri/i915/i915_context.c

src/mesa/drivers/dri/i915/i915_fragprog.c

src/mesa/drivers/dri/i915/i915_state.c

src/mesa/drivers/dri/i915/i915_texstate.c

src/mesa/drivers/dri/i915/i915_vtbl.c

src/mesa/drivers/dri/i965/Makefile

src/mesa/drivers/dri/i965/brw_context.c

src/mesa/drivers/dri/i965/brw_context.h

src/mesa/drivers/dri/i965/brw_draw.c

src/mesa/drivers/dri/i965/brw_eu.h

src/mesa/drivers/dri/i965/brw_eu_emit.c

src/mesa/drivers/dri/i965/brw_program.c

src/mesa/drivers/dri/i965/brw_sf.c

src/mesa/drivers/dri/i965/brw_sf_state.c

src/mesa/drivers/dri/i965/brw_state.h

src/mesa/drivers/dri/i965/brw_state_upload.c

src/mesa/drivers/dri/i965/brw_vs_emit.c

src/mesa/drivers/dri/i965/brw_wm.c

src/mesa/drivers/dri/i965/brw_wm_emit.c

src/mesa/drivers/dri/i965/brw_wm_fp.c

src/mesa/drivers/dri/i965/brw_wm_glsl.c

src/mesa/drivers/dri/i965/brw_wm_sampler_state.c

src/mesa/drivers/dri/i965/brw_wm_surface_state.c

src/mesa/drivers/dri/intel/intel_batchbuffer.h

src/mesa/drivers/dri/intel/intel_blit.c

src/mesa/drivers/dri/intel/intel_buffers.c

src/mesa/drivers/dri/intel/intel_buffers.h

src/mesa/drivers/dri/intel/intel_context.c

src/mesa/drivers/dri/intel/intel_context.h

src/mesa/drivers/dri/intel/intel_decode.c

src/mesa/drivers/dri/intel/intel_depthstencil.c

src/mesa/drivers/dri/intel/intel_fbo.c

src/mesa/drivers/dri/intel/intel_fbo.h

src/mesa/drivers/dri/intel/intel_mipmap_tree.c

src/mesa/drivers/dri/intel/intel_pixel.c

src/mesa/drivers/dri/intel/intel_pixel.h

src/mesa/drivers/dri/intel/intel_pixel_bitmap.c

src/mesa/drivers/dri/intel/intel_pixel_copy.c

src/mesa/drivers/dri/intel/intel_pixel_draw.c

src/mesa/drivers/dri/intel/intel_regions.c

src/mesa/drivers/dri/intel/intel_screen.c

src/mesa/drivers/dri/intel/intel_screen.h

src/mesa/drivers/dri/intel/intel_span.c

src/mesa/drivers/dri/intel/intel_tex.c

src/mesa/drivers/dri/intel/intel_tex.h

src/mesa/drivers/dri/intel/intel_tex_copy.c

src/mesa/drivers/dri/intel/intel_tex_format.c

src/mesa/drivers/dri/intel/intel_tex_subimage.c

src/mesa/drivers/dri/mach64/mach64_context.h

src/mesa/drivers/dri/r300/Lindent *

src/mesa/drivers/dri/r300/r300_reg.h

src/mesa/drivers/dri/r300/r300_render.c

src/mesa/drivers/dri/r300/r300_state.c

src/mesa/drivers/dri/r300/radeon_program_pair.c

src/mesa/drivers/dri/swrast/swrast.c

src/mesa/drivers/glslcompiler/Makefile

src/mesa/drivers/osmesa/Makefile

src/mesa/drivers/windows/gdi/mesa.def

src/mesa/drivers/windows/gdi/wgl.c

src/mesa/drivers/x11/fakeglx.c

src/mesa/drivers/x11/xm_api.c

src/mesa/drivers/x11/xm_buffer.c

src/mesa/drivers/x11/xmesaP.h

src/mesa/gl.pc.in

src/mesa/glapi/extension_helper.py

src/mesa/glapi/glX_proto_recv.py

src/mesa/glapi/glX_proto_send.py

src/mesa/glapi/glX_proto_size.py

src/mesa/glapi/gl_API.dtd

src/mesa/glapi/gl_API.xml

src/mesa/glapi/gl_XML.py

src/mesa/glapi/gl_apitemp.py

src/mesa/glapi/gl_x86_asm.py

src/mesa/glapi/glthread.h

src/mesa/glapi/next_available_offset.sh *

src/mesa/main/api_validate.c

src/mesa/main/arrayobj.c

src/mesa/main/attrib.c

src/mesa/main/bufferobj.c

src/mesa/main/colormac.h

src/mesa/main/config.h

src/mesa/main/context.c

src/mesa/main/enable.c

src/mesa/main/ffvertex_prog.c

src/mesa/main/get.c

src/mesa/main/get_gen.py

src/mesa/main/glheader.h

src/mesa/main/image.c

src/mesa/main/image.h

src/mesa/main/imports.c

src/mesa/main/imports.h

src/mesa/main/light.c

src/mesa/main/mipmap.c

src/mesa/main/mtypes.h

src/mesa/main/points.c

src/mesa/main/rastpos.c

src/mesa/main/sources

src/mesa/main/state.c

src/mesa/main/texcompress_s3tc.c

src/mesa/main/texenvprogram.c

src/mesa/main/texformat.c

src/mesa/main/texformat.h

src/mesa/main/texformat_tmp.h

src/mesa/main/texobj.c

src/mesa/main/texrender.c

src/mesa/main/texstate.c

src/mesa/main/texstore.c

src/mesa/main/texstore.h

src/mesa/main/version.h

src/mesa/math/m_matrix.h

src/mesa/shader/arbprogparse.c

src/mesa/shader/arbprogram.c

src/mesa/shader/prog_execute.c

src/mesa/shader/prog_instruction.c

src/mesa/shader/prog_parameter.c

src/mesa/shader/prog_print.c

src/mesa/shader/prog_statevars.h

src/mesa/shader/program.c

src/mesa/shader/slang/library/slang_120_core_gc.h

src/mesa/shader/slang/library/slang_builtin_120_common_gc.h

src/mesa/shader/slang/library/slang_builtin_120_fragment_gc.h

src/mesa/shader/slang/library/slang_common_builtin.gc

src/mesa/shader/slang/library/slang_common_builtin_gc.h

src/mesa/shader/slang/library/slang_core.gc

src/mesa/shader/slang/library/slang_core_gc.h

src/mesa/shader/slang/library/slang_fragment_builtin_gc.h

src/mesa/shader/slang/library/slang_pp_directives.syn

src/mesa/shader/slang/library/slang_pp_directives_syn.h

src/mesa/shader/slang/library/slang_shader.syn

src/mesa/shader/slang/library/slang_shader_syn.h

src/mesa/shader/slang/library/slang_vertex_builtin_gc.h

src/mesa/shader/slang/slang_builtin.c

src/mesa/shader/slang/slang_builtin.h

src/mesa/shader/slang/slang_codegen.c

src/mesa/shader/slang/slang_codegen.h

src/mesa/shader/slang/slang_compile.c

src/mesa/shader/slang/slang_compile.h

src/mesa/shader/slang/slang_compile_function.c

src/mesa/shader/slang/slang_compile_function.h

src/mesa/shader/slang/slang_compile_operation.c

src/mesa/shader/slang/slang_compile_operation.h

src/mesa/shader/slang/slang_compile_variable.c

src/mesa/shader/slang/slang_compile_variable.h

src/mesa/shader/slang/slang_emit.c

src/mesa/shader/slang/slang_emit.h

src/mesa/shader/slang/slang_ir.c

src/mesa/shader/slang/slang_ir.h

src/mesa/shader/slang/slang_link.c

src/mesa/shader/slang/slang_log.c

src/mesa/shader/slang/slang_mem.c

src/mesa/shader/slang/slang_preprocess.c

src/mesa/shader/slang/slang_preprocess.h

src/mesa/shader/slang/slang_print.c

src/mesa/shader/slang/slang_simplify.c

src/mesa/shader/slang/slang_storage.h

src/mesa/shader/slang/slang_typeinfo.c

src/mesa/shader/slang/slang_typeinfo.h

src/mesa/sources

src/mesa/swrast/s_aalinetemp.h

src/mesa/swrast/s_span.c

src/mesa/swrast/s_texfilter.c

src/mesa/tnl/t_context.c

src/mesa/tnl/t_vertex.c

src/mesa/tnl/t_vertex.h

src/mesa/tnl/t_vertex_generic.c

src/mesa/tnl/t_vertex_sse.c

windows/VC7/mesa/mesa/mesa.vcproj

windows/VC8/mesa/gdi/gdi.vcproj

windows/VC8/mesa/glu/glu.vcproj

windows/VC8/mesa/mesa/mesa.vcproj

windows/VC8/mesa/osmesa/osmesa.vcproj

windows/VC8/progs/demos/gears.vcproj

windows/VC8/progs/glut/glut.vcproj

Show diffs side-by-side

added added

removed removed

src/mesa/drivers/dri/i965/brw_wm_glsl.c

267

struct brw_reg src, dst;

268

dst = get_dst_reg(c, inst, i, 1) ;

269

src = get_src_reg(c, &inst->SrcReg[0], i, 1);

270

brw_RNDD(p, dst, src);

270

brw_RNDZ(p, dst, src);

271

}

272

}

273

brw_set_saturate(p, 0);

623

brw_MAC(p, brw_null_reg(), src0[1], src1[1]);

624

brw_MAC(p, dst, src0[2], src1[2]);

625

brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);

626

brw_ADD(p, dst, src0[3], src1[3]);

626

brw_ADD(p, dst, dst, src1[3]);

627

brw_set_saturate(p, 0);

628

}

629

892

}

893

}

894

895

/**

896

* For GLSL shaders, this KIL will be unconditional.

897

* It may be contained inside an IF/ENDIF structure of course.

898

895

899

static void emit_kil(struct brw_wm_compile *c)

896

900

{

897

struct brw_compile *p = &c->func;

898

struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);

899

brw_push_insn_state(p);

900

brw_set_mask_control(p, BRW_MASK_DISABLE);

901

brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK

902

brw_AND(p, depth, c->emit_mask_reg, depth);

903

brw_pop_insn_state(p);

901

struct brw_compile *p = &c->func;

902

struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);

903

brw_push_insn_state(p);

904

brw_set_mask_control(p, BRW_MASK_DISABLE);

905

brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK

906

brw_AND(p, depth, c->emit_mask_reg, depth);

907

brw_pop_insn_state(p);

904

908

}

905

909

906

910

static void emit_mad(struct brw_wm_compile *c,

1095

1099

/* Arrange the two end coordinates into scalars (itmp0/itmp1) to

1096

1100

be hashed. Also compute the remainder (offset within the unit

1097

1101

length), interleaved to reduce register dependency penalties. */

1098

brw_RNDD( p, itmp[ 0 ], param );

1102

brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param );

1099

1103

brw_FRC( p, param, param );

1100

1104

brw_ADD( p, itmp[ 1 ], itmp[ 0 ], brw_imm_ud( 1 ) );

1101

1105

brw_MOV( p, itmp[ 3 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */

1220

1224

/* Arrange the four corner coordinates into scalars (itmp0..itmp3) to

1221

1225

be hashed. Also compute the remainders (offsets within the unit

1222

1226

square), interleaved to reduce register dependency penalties. */

1223

brw_RNDD( p, itmp[ 0 ], param0 );

1224

brw_RNDD( p, itmp[ 1 ], param1 );

1227

brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param0 );

1228

brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param1 );

1225

1229

brw_FRC( p, param0, param0 );

1226

1230

brw_FRC( p, param1, param1 );

1227

1231

brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */

1400

1404

/* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to

1401

1405

be hashed. Also compute the remainders (offsets within the unit

1402

1406

cube), interleaved to reduce register dependency penalties. */

1403

brw_RNDD( p, itmp[ 0 ], param0 );

1404

brw_RNDD( p, itmp[ 1 ], param1 );

1405

brw_RNDD( p, itmp[ 2 ], param2 );

1406

brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xBC8F ) ); /* constant used later */

1407

brw_MOV( p, itmp[ 5 ], brw_imm_ud( 0xD0BD ) ); /* constant used later */

1408

brw_MOV( p, itmp[ 6 ], brw_imm_ud( 0x9B93 ) ); /* constant used later */

1407

brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param0 );

1408

brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param1 );

1409

brw_RNDD( p, retype( itmp[ 2 ], BRW_REGISTER_TYPE_D ), param2 );

1409

1410

brw_FRC( p, param0, param0 );

1410

1411

brw_FRC( p, param1, param1 );

1411

1412

brw_FRC( p, param2, param2 );

1412

1413

/* Since we now have only 16 bits of precision in the hash, we must

1413

1414

be more careful about thorough mixing to maintain entropy as we

1414

1415

squash the input vector into a small scalar. */

1415

brw_MUL( p, brw_acc_reg(), itmp[ 4 ], itmp[ 0 ] );

1416

brw_MAC( p, brw_acc_reg(), itmp[ 5 ], itmp[ 1 ] );

1417

brw_MAC( p, itmp[ 0 ], itmp[ 6 ], itmp[ 2 ] );

1416

brw_MUL( p, brw_null_reg(), low_words( itmp[ 0 ] ), brw_imm_uw( 0xBC8F ) );

1417

brw_MAC( p, brw_null_reg(), low_words( itmp[ 1 ] ), brw_imm_uw( 0xD0BD ) );

1418

brw_MAC( p, low_words( itmp[ 0 ] ), low_words( itmp[ 2 ] ),

1419

brw_imm_uw( 0x9B93 ) );

1418

1420

brw_ADD( p, high_words( itmp[ 0 ] ), low_words( itmp[ 0 ] ),

1419

1421

brw_imm_uw( 0xBC8F ) );

1420

1422

1668

1670

release_tmps( c, mark );

1669

1671

}

1670

1672

1673

/* For the four-dimensional case, the little micro-optimisation benefits

1674

we obtain by unrolling all the loops aren't worth the massive bloat it

1675

now causes. Instead, we loop twice around performing a similar operation

1676

to noise3, once for the w=0 cube and once for the w=1, with a bit more

1677

code to glue it all together. */

1678

static void noise4_sub( struct brw_wm_compile *c ) {

1679

1680

struct brw_compile *p = &c->func;

1681

struct brw_reg param[ 4 ],

1682

x0y0, x0y1, x1y0, x1y1, /* gradients at four of the corners */

1683

w0, /* noise for the w=0 cube */

1684

floors[ 2 ], /* integer coordinates of base corner of hypercube */

1685

interp[ 4 ], /* interpolation coefficients */

1686

t, tmp[ 8 ], /* float temporaries */

1687

itmp[ 8 ], /* unsigned integer temporaries (aliases of floats above) */

1688

wtmp[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */

1689

int i, j;

1690

int mark = mark_tmps( c );

1691

GLuint loop, origin;

1692

1693

x0y0 = alloc_tmp( c );

1694

x0y1 = alloc_tmp( c );

1695

x1y0 = alloc_tmp( c );

1696

x1y1 = alloc_tmp( c );

1697

t = alloc_tmp( c );

1698

w0 = alloc_tmp( c );

1699

floors[ 0 ] = retype( alloc_tmp( c ), BRW_REGISTER_TYPE_UD );

1700

floors[ 1 ] = retype( alloc_tmp( c ), BRW_REGISTER_TYPE_UD );

1701

1702

for( i = 0; i < 4; i++ ) {

1703

param[ i ] = lookup_tmp( c, mark - 5 + i );

1704

interp[ i ] = alloc_tmp( c );

1705

}

1706

1707

for( i = 0; i < 8; i++ ) {

1708

tmp[ i ] = alloc_tmp( c );

1709

itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD );

1710

wtmp[ i ] = brw_uw16_grf( tmp[ i ].nr, 0 );

1711

}

1712

1713

brw_set_access_mode( p, BRW_ALIGN_1 );

1714

1715

/* We only want 16 bits of precision from the integral part of each

1716

co-ordinate, but unfortunately the RNDD semantics would saturate

1717

at 16 bits if we performed the operation directly to a 16-bit

1718

destination. Therefore, we round to 32-bit temporaries where

1719

appropriate, and then store only the lower 16 bits. */

1720

brw_RNDD( p, retype( floors[ 0 ], BRW_REGISTER_TYPE_D ), param[ 0 ] );

1721

brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param[ 1 ] );

1722

brw_RNDD( p, retype( floors[ 1 ], BRW_REGISTER_TYPE_D ), param[ 2 ] );

1723

brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param[ 3 ] );

1724

brw_MOV( p, high_words( floors[ 0 ] ), low_words( itmp[ 0 ] ) );

1725

brw_MOV( p, high_words( floors[ 1 ] ), low_words( itmp[ 1 ] ) );

1726

1727

/* Modify the flag register here, because the side effect is useful

1728

later (see below). We know for certain that all flags will be

1729

cleared, since the FRC instruction cannot possibly generate

1730

negative results. Even for exceptional inputs (infinities, denormals,

1731

NaNs), the architecture guarantees that the L conditional is false. */

1732

brw_set_conditionalmod( p, BRW_CONDITIONAL_L );

1733

brw_FRC( p, param[ 0 ], param[ 0 ] );

1734

brw_set_predicate_control( p, BRW_PREDICATE_NONE );

1735

for( i = 1; i < 4; i++ )

1736

brw_FRC( p, param[ i ], param[ i ] );

1737

1738

/* Calculate the interpolation coefficients (6t^5 - 15t^4 + 10t^3) first

1739

of all. */

1740

for( i = 0; i < 4; i++ )

1741

brw_MUL( p, interp[ i ], param[ i ], brw_imm_f( 6.0 ) );

1742

for( i = 0; i < 4; i++ )

1743

brw_ADD( p, interp[ i ], interp[ i ], brw_imm_f( -15.0 ) );

1744

for( i = 0; i < 4; i++ )

1745

brw_MUL( p, interp[ i ], interp[ i ], param[ i ] );

1746

for( i = 0; i < 4; i++ )

1747

brw_ADD( p, interp[ i ], interp[ i ], brw_imm_f( 10.0 ) );

1748

for( j = 0; j < 3; j++ )

1749

for( i = 0; i < 4; i++ )

1750

brw_MUL( p, interp[ i ], interp[ i ], param[ i ] );

1751

1752

/* Mark the current address, as it will be a jump destination. The

1753

following code will be executed twice: first, with the flag

1754

1755

set for w=1. */

1756

loop = p->nr_insn;

1757

1758

/* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to

1759

be hashed. Since we have only 16 bits of precision in the hash, we

1760

must be careful about thorough mixing to maintain entropy as we

1761

squash the input vector into a small scalar. */

1762

brw_MUL( p, brw_null_reg(), low_words( floors[ 0 ] ),

1763

brw_imm_uw( 0xBC8F ) );

1764

brw_MAC( p, brw_null_reg(), high_words( floors[ 0 ] ),

1765

brw_imm_uw( 0xD0BD ) );

1766

brw_MAC( p, brw_null_reg(), low_words( floors[ 1 ] ),

1767

brw_imm_uw( 0x9B93 ) );

1768

brw_MAC( p, low_words( itmp[ 0 ] ), high_words( floors[ 1 ] ),

1769

brw_imm_uw( 0xA359 ) );

1770

brw_ADD( p, high_words( itmp[ 0 ] ), low_words( itmp[ 0 ] ),

1771

brw_imm_uw( 0xBC8F ) );

1772

1773

/* Temporarily disable the execution mask while we work with ExecSize=16

1774

channels (the mask is set for ExecSize=8 and is probably incorrect).

1775

Although this might cause execution of unwanted channels, the code

1776

writes only to temporary registers and has no side effects, so

1777

disabling the mask is harmless. */

1778

brw_push_insn_state( p );

1779

brw_set_mask_control( p, BRW_MASK_DISABLE );

1780

brw_ADD( p, wtmp[ 1 ], wtmp[ 0 ], brw_imm_uw( 0xD0BD ) );

1781

brw_ADD( p, wtmp[ 2 ], wtmp[ 0 ], brw_imm_uw( 0x9B93 ) );

1782

brw_ADD( p, wtmp[ 3 ], wtmp[ 1 ], brw_imm_uw( 0x9B93 ) );

1783

1784

/* We're now ready to perform the hashing. The eight hashes are

1785

interleaved for performance. The hash function used is

1786

designed to rapidly achieve avalanche and require only 16x16

1787

bit multiplication, and 8-bit swizzles (which we get for

1788

free). */

1789

for( i = 0; i < 4; i++ )

1790

brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0x28D9 ) );

1791

for( i = 0; i < 4; i++ )

1792

brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ),

1793

odd_bytes( wtmp[ i ] ) );

1794

for( i = 0; i < 4; i++ )

1795

brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0xC6D5 ) );

1796

for( i = 0; i < 4; i++ )

1797

brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ),

1798

odd_bytes( wtmp[ i ] ) );

1799

brw_pop_insn_state( p );

1800

1801

/* Now we want to initialise the four rear gradients based on the

1802

hashes. Format conversion from signed integer to float leaves

1803

everything scaled too high by a factor of pow( 2, 15 ), but

1804

we correct for that right at the end. */

1805

/* x component */

1806

brw_ADD( p, t, param[ 0 ], brw_imm_f( -1.0 ) );

1807

brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) );

1808

brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) );

1809

brw_MOV( p, x1y0, high_words( tmp[ 0 ] ) );

1810

brw_MOV( p, x1y1, high_words( tmp[ 1 ] ) );

1811

1812

brw_push_insn_state( p );

1813

brw_set_mask_control( p, BRW_MASK_DISABLE );

1814

brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) );

1815

brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) );

1816

brw_pop_insn_state( p );

1817

1818

brw_MUL( p, x1y0, x1y0, t );

1819

brw_MUL( p, x1y1, x1y1, t );

1820

brw_ADD( p, t, param[ 1 ], brw_imm_f( -1.0 ) );

1821

brw_MUL( p, x0y0, x0y0, param[ 0 ] );

1822

brw_MUL( p, x0y1, x0y1, param[ 0 ] );

1823

1824

/* y component */

1825

brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) );

1826

brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) );

1827

brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) );

1828

brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) );

1829

1830

brw_push_insn_state( p );

1831

brw_set_mask_control( p, BRW_MASK_DISABLE );

1832

brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) );

1833

brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) );

1834

brw_pop_insn_state( p );

1835

1836

brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );

1837

brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );

1838

/* prepare t for the w component (used below): w the first time through

1839

the loop; w - 1 the second time) */

1840

brw_set_predicate_control( p, BRW_PREDICATE_NORMAL );

1841

brw_ADD( p, t, param[ 3 ], brw_imm_f( -1.0 ) );

1842

p->current->header.predicate_inverse = 1;

1843

brw_MOV( p, t, param[ 3 ] );

1844

p->current->header.predicate_inverse = 0;

1845

brw_set_predicate_control( p, BRW_PREDICATE_NONE );

1846

brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 1 ] );

1847

brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 1 ] );

1848

1849

brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );

1850

brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );

1851

brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );

1852

brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );

1853

1854

/* z component */

1855

brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) );

1856

brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) );

1857

brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) );

1858

brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) );

1859

1860

brw_push_insn_state( p );

1861

brw_set_mask_control( p, BRW_MASK_DISABLE );

1862

brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) );

1863

brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) );

1864

brw_pop_insn_state( p );

1865

1866

brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 2 ] );

1867

brw_MUL( p, tmp[ 5 ], tmp[ 5 ], param[ 2 ] );

1868

brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 2 ] );

1869

brw_MUL( p, tmp[ 7 ], tmp[ 7 ], param[ 2 ] );

1870

1871

brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );

1872

brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );

1873

brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );

1874

brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );

1875

1876

/* w component */

1877

brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) );

1878

brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) );

1879

brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) );

1880

brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) );

1881

1882

brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t );

1883

brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );

1884

brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t );

1885

brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );

1886

brw_ADD( p, t, param[ 0 ], brw_imm_f( -1.0 ) );

1887

1888

brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );

1889

brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );

1890

brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );

1891

brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );

1892

1893

/* Here we interpolate in the y dimension... */

1894

brw_ADD( p, x0y1, x0y1, negate( x0y0 ) );

1895

brw_ADD( p, x1y1, x1y1, negate( x1y0 ) );

1896

brw_MUL( p, x0y1, x0y1, interp[ 1 ] );

1897

brw_MUL( p, x1y1, x1y1, interp[ 1 ] );

1898

brw_ADD( p, x0y0, x0y0, x0y1 );

1899

brw_ADD( p, x1y0, x1y0, x1y1 );

1900

1901

/* And now in x. Leave the result in tmp[ 0 ] (see below)... */

1902

brw_ADD( p, x1y0, x1y0, negate( x0y0 ) );

1903

brw_MUL( p, x1y0, x1y0, interp[ 0 ] );

1904

brw_ADD( p, tmp[ 0 ], x0y0, x1y0 );

1905

1906

/* Now do the same thing for the front four gradients... */

1907

/* x component */

1908

brw_MOV( p, x0y0, low_words( tmp[ 2 ] ) );

1909

brw_MOV( p, x0y1, low_words( tmp[ 3 ] ) );

1910

brw_MOV( p, x1y0, high_words( tmp[ 2 ] ) );

1911

brw_MOV( p, x1y1, high_words( tmp[ 3 ] ) );

1912

1913

brw_push_insn_state( p );

1914

brw_set_mask_control( p, BRW_MASK_DISABLE );

1915

brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) );

1916

brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) );

1917

brw_pop_insn_state( p );

1918

1919

brw_MUL( p, x1y0, x1y0, t );

1920

brw_MUL( p, x1y1, x1y1, t );

1921

brw_ADD( p, t, param[ 1 ], brw_imm_f( -1.0 ) );

1922

brw_MUL( p, x0y0, x0y0, param[ 0 ] );

1923

brw_MUL( p, x0y1, x0y1, param[ 0 ] );

1924

1925

/* y component */

1926

brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) );

1927

brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) );

1928

brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) );

1929

brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) );

1930

1931

brw_push_insn_state( p );

1932

brw_set_mask_control( p, BRW_MASK_DISABLE );

1933

brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) );

1934

brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) );

1935

brw_pop_insn_state( p );

1936

1937

brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );

1938

brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );

1939

brw_ADD( p, t, param[ 2 ], brw_imm_f( -1.0 ) );

1940

brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 1 ] );

1941

brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 1 ] );

1942

1943

brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );

1944

brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );

1945

brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );

1946

brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );

1947

1948

/* z component */

1949

brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) );

1950

brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) );

1951

brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) );

1952

brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) );

1953

1954

brw_push_insn_state( p );

1955

brw_set_mask_control( p, BRW_MASK_DISABLE );

1956

brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) );

1957

brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) );

1958

brw_pop_insn_state( p );

1959

1960

brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t );

1961

brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );

1962

brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t );

1963

brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );

1964

/* prepare t for the w component (used below): w the first time through

1965

the loop; w - 1 the second time) */

1966

brw_set_predicate_control( p, BRW_PREDICATE_NORMAL );

1967

brw_ADD( p, t, param[ 3 ], brw_imm_f( -1.0 ) );

1968

p->current->header.predicate_inverse = 1;

1969

brw_MOV( p, t, param[ 3 ] );

1970

p->current->header.predicate_inverse = 0;

1971

brw_set_predicate_control( p, BRW_PREDICATE_NONE );

1972

1973

brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );

1974

brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );

1975

brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );

1976

brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );

1977

1978

/* w component */

1979

brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) );

1980

brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) );

1981

brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) );

1982

brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) );

1983

1984

brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t );

1985

brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );

1986

brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t );

1987

brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );

1988

1989

brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );

1990

brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );

1991

brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );

1992

brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );

1993

1994

/* Interpolate in the y dimension: */

1995

brw_ADD( p, x0y1, x0y1, negate( x0y0 ) );

1996

brw_ADD( p, x1y1, x1y1, negate( x1y0 ) );

1997

brw_MUL( p, x0y1, x0y1, interp[ 1 ] );

1998

brw_MUL( p, x1y1, x1y1, interp[ 1 ] );

1999

brw_ADD( p, x0y0, x0y0, x0y1 );

2000

brw_ADD( p, x1y0, x1y0, x1y1 );

2001

2002

/* And now in x. The rear face is in tmp[ 0 ] (see above), so this

2003

time put the front face in tmp[ 1 ] and we're nearly there... */

2004

brw_ADD( p, x1y0, x1y0, negate( x0y0 ) );

2005

brw_MUL( p, x1y0, x1y0, interp[ 0 ] );

2006

brw_ADD( p, tmp[ 1 ], x0y0, x1y0 );

2007

2008

/* Another interpolation, in the z dimension: */

2009

brw_ADD( p, tmp[ 1 ], tmp[ 1 ], negate( tmp[ 0 ] ) );

2010

brw_MUL( p, tmp[ 1 ], tmp[ 1 ], interp[ 2 ] );

2011

brw_ADD( p, tmp[ 0 ], tmp[ 0 ], tmp[ 1 ] );

2012

2013

/* Exit the loop if we've computed both cubes... */

2014

origin = p->nr_insn;

2015

brw_push_insn_state( p );

2016

brw_set_predicate_control( p, BRW_PREDICATE_NORMAL );

2017

brw_set_mask_control( p, BRW_MASK_DISABLE );

2018

brw_ADD( p, brw_ip_reg(), brw_ip_reg(), brw_imm_d( 0 ) );

2019

brw_pop_insn_state( p );

2020

2021

/* Save the result for the w=0 case, and increment the w coordinate: */

2022

brw_MOV( p, w0, tmp[ 0 ] );

2023

brw_ADD( p, high_words( floors[ 1 ] ), high_words( floors[ 1 ] ),

2024

brw_imm_uw( 1 ) );

2025

2026

/* Loop around for the other cube. Explicitly set the flag register

2027

(unfortunately we must spend an extra instruction to do this: we

2028

can't rely on a side effect of the previous MOV or ADD because

2029

conditional modifiers which are normally true might be false in

2030

exceptional circumstances, e.g. given a NaN input; the add to

2031

brw_ip_reg() is not suitable because the IP is not an 8-vector). */

2032

brw_push_insn_state( p );

2033

brw_set_mask_control( p, BRW_MASK_DISABLE );

2034

brw_MOV( p, brw_flag_reg(), brw_imm_uw( 0xFF ) );

2035

brw_ADD( p, brw_ip_reg(), brw_ip_reg(),

2036

brw_imm_d( ( loop - p->nr_insn ) << 4 ) );

2037

brw_pop_insn_state( p );

2038

2039

/* Patch the previous conditional branch now that we know the

2040

destination address. */

2041

brw_set_src1( p->store + origin,

2042

brw_imm_d( ( p->nr_insn - origin ) << 4 ) );

2043

2044

/* The very last interpolation. */

2045

brw_ADD( p, tmp[ 0 ], tmp[ 0 ], negate( w0 ) );

2046

brw_MUL( p, tmp[ 0 ], tmp[ 0 ], interp[ 3 ] );

2047

brw_ADD( p, tmp[ 0 ], tmp[ 0 ], w0 );

2048

2049

/* scale by pow( 2, -15 ), as described above */

2050

brw_MUL( p, param[ 0 ], tmp[ 0 ], brw_imm_f( 0.000030517578125 ) );

2051

2052

release_tmps( c, mark );

2053

}

2054

2055

static void emit_noise4( struct brw_wm_compile *c,

2056

struct prog_instruction *inst )

2057

{

2058

struct brw_compile *p = &c->func;

2059

struct brw_reg src0, src1, src2, src3, param0, param1, param2, param3, dst;

2060

GLuint mask = inst->DstReg.WriteMask;

2061

int i;

2062

int mark = mark_tmps( c );

2063

2064

assert( mark == 0 );

2065

2066

src0 = get_src_reg( c, inst->SrcReg, 0, 1 );

2067

src1 = get_src_reg( c, inst->SrcReg, 1, 1 );

2068

src2 = get_src_reg( c, inst->SrcReg, 2, 1 );

2069

src3 = get_src_reg( c, inst->SrcReg, 3, 1 );

2070

2071

param0 = alloc_tmp( c );

2072

param1 = alloc_tmp( c );

2073

param2 = alloc_tmp( c );

2074

param3 = alloc_tmp( c );

2075

2076

brw_MOV( p, param0, src0 );

2077

brw_MOV( p, param1, src1 );

2078

brw_MOV( p, param2, src2 );

2079

brw_MOV( p, param3, src3 );

2080

2081

invoke_subroutine( c, SUB_NOISE4, noise4_sub );

2082

2083

/* Fill in the result: */

2084

brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE );

2085

for (i = 0 ; i < 4; i++) {

2086

if (mask & (1<<i)) {

2087

dst = get_dst_reg(c, inst, i, 1);

2088

brw_MOV( p, dst, param0 );

2089

}

2090

}

2091

if( inst->SaturateMode == SATURATE_ZERO_ONE )

2092

brw_set_saturate( p, 0 );

2093

2094

release_tmps( c, mark );

2095

}

2096

1671

2097

static void emit_wpos_xy(struct brw_wm_compile *c,

1672

2098

struct prog_instruction *inst)

1673

2099

{

1996

2422

case OPCODE_NOISE3:

1997

2423

emit_noise3(c, inst);

1998

2424

break;

1999

/* case OPCODE_NOISE4: */

2000

/* not yet implemented */

2425

case OPCODE_NOISE4:

2426

emit_noise4(c, inst);

2427

break;

2001

2428

case OPCODE_TEX:

2002

2429

emit_tex(c, inst);

2003

2430

break;

Older »