~mmach/netext73/mesa-haswell

« back to all changes in this revision

Viewing changes to src/intel/compiler/brw_fs.cpp

Committer: mmach
Date: 2022-09-22 19:56:13 UTC
Revision ID: netbit73@gmail.com-20220922195613-wtik9mmy20tmor0i

2022-09-22 21:17:09

files removed:
.gitlab

.gitlab-ci

.gitlab-ci/all-skips.txt

.gitlab-ci/bare-metal

.gitlab-ci/bare-metal/.editorconfig

.gitlab-ci/bare-metal/arm64_a630_egl.sh

.gitlab-ci/bare-metal/bm-init.sh

.gitlab-ci/bare-metal/cisco-2960-poe-off.sh

.gitlab-ci/bare-metal/cisco-2960-poe-on.sh

.gitlab-ci/bare-metal/cros-servo.sh

.gitlab-ci/bare-metal/cros_servo_run.py

.gitlab-ci/bare-metal/eth008-power-down.sh

.gitlab-ci/bare-metal/eth008-power-relay.py

.gitlab-ci/bare-metal/eth008-power-up.sh

.gitlab-ci/bare-metal/expect-output.sh

.gitlab-ci/bare-metal/fastboot.sh

.gitlab-ci/bare-metal/fastboot_run.py

.gitlab-ci/bare-metal/google-power-down.sh

.gitlab-ci/bare-metal/google-power-relay.py

.gitlab-ci/bare-metal/google-power-up.sh

.gitlab-ci/bare-metal/poe-off

.gitlab-ci/bare-metal/poe-on

.gitlab-ci/bare-metal/poe-powered.sh

.gitlab-ci/bare-metal/poe_run.py

.gitlab-ci/bare-metal/rootfs-setup.sh

.gitlab-ci/bare-metal/serial_buffer.py

.gitlab-ci/bare-metal/telnet-buffer.py

.gitlab-ci/build

.gitlab-ci/build/gitlab-ci.yml

.gitlab-ci/common

.gitlab-ci/common/capture-devcoredump.sh

.gitlab-ci/common/generate-env.sh

.gitlab-ci/common/init-stage1.sh

.gitlab-ci/common/init-stage2.sh

.gitlab-ci/common/intel-gpu-freq.sh

.gitlab-ci/common/start-x.sh

.gitlab-ci/container

.gitlab-ci/container/arm.config

.gitlab-ci/container/arm64.config

.gitlab-ci/container/baremetal_build.sh

.gitlab-ci/container/build-apitrace.sh

.gitlab-ci/container/build-crosvm.sh

.gitlab-ci/container/build-crosvm_no-syslog.patch

.gitlab-ci/container/build-deqp-runner.sh

.gitlab-ci/container/build-deqp.sh

.gitlab-ci/container/build-fossilize.sh

.gitlab-ci/container/build-gfxreconstruct.sh

.gitlab-ci/container/build-hang-detection.sh

.gitlab-ci/container/build-kernel.sh

.gitlab-ci/container/build-libclc.sh

.gitlab-ci/container/build-libdrm.sh

.gitlab-ci/container/build-piglit.sh

.gitlab-ci/container/build-rust.sh

.gitlab-ci/container/build-skqp.sh

.gitlab-ci/container/build-skqp_BUILD.gn.patch

.gitlab-ci/container/build-skqp_base.gn

.gitlab-ci/container/build-skqp_fetch_gn.patch

.gitlab-ci/container/build-skqp_git-sync-deps.patch

.gitlab-ci/container/build-skqp_is_clang.py.patch

.gitlab-ci/container/build-va-tools.sh

.gitlab-ci/container/build-virglrenderer.sh

.gitlab-ci/container/build-vkd3d-proton.sh

.gitlab-ci/container/build-wayland.sh

.gitlab-ci/container/container_post_build.sh

.gitlab-ci/container/container_pre_build.sh

.gitlab-ci/container/create-android-cross-file.sh

.gitlab-ci/container/create-android-ndk-pc.sh

.gitlab-ci/container/create-cross-file.sh

.gitlab-ci/container/create-rootfs.sh

.gitlab-ci/container/cross_build.sh

.gitlab-ci/container/debian

.gitlab-ci/container/debian/android_build.sh

.gitlab-ci/container/debian/arm_build.sh

.gitlab-ci/container/debian/arm_test.sh

.gitlab-ci/container/debian/i386_build.sh

.gitlab-ci/container/debian/ppc64el_build.sh

.gitlab-ci/container/debian/s390x_build.sh

.gitlab-ci/container/debian/x86_build-base.sh

.gitlab-ci/container/debian/x86_build.sh

.gitlab-ci/container/debian/x86_test-base.sh

.gitlab-ci/container/debian/x86_test-gl.sh

.gitlab-ci/container/debian/x86_test-vk.sh

.gitlab-ci/container/fedora

.gitlab-ci/container/fedora/x86_build.sh

.gitlab-ci/container/gitlab-ci.yml

.gitlab-ci/container/lava_build.sh

.gitlab-ci/container/x86_64.config

.gitlab-ci/cross-xfail-ppc64el

.gitlab-ci/cross-xfail-s390x

.gitlab-ci/crosvm-init.sh

.gitlab-ci/crosvm-runner.sh

.gitlab-ci/deqp-runner.sh

.gitlab-ci/docs

.gitlab-ci/download-git-cache.sh

.gitlab-ci/fossilize-runner.sh

.gitlab-ci/fossils

.gitlab-ci/fossils.yml

.gitlab-ci/fossils/fossils.sh

.gitlab-ci/fossils/query_fossils_yaml.py

.gitlab-ci/gtest-runner.sh

.gitlab-ci/image-tags.yml

.gitlab-ci/lava

.gitlab-ci/lava/lava-gitlab-ci.yml

.gitlab-ci/lava/lava-pytest.sh

.gitlab-ci/lava/lava-submit.sh

.gitlab-ci/lava/lava_job_submitter.py

.gitlab-ci/meson

.gitlab-ci/meson/build.sh

.gitlab-ci/meson/time-strace.sh

.gitlab-ci/meson/time.sh

.gitlab-ci/piglit

.gitlab-ci/piglit/disable-vs_in.diff

.gitlab-ci/piglit/piglit-runner.sh

.gitlab-ci/piglit/piglit-traces.sh

.gitlab-ci/piglit/run.sh

.gitlab-ci/piglit/run_cl.sh

.gitlab-ci/prepare-artifacts.sh

.gitlab-ci/report-flakes.py

.gitlab-ci/run-shader-db.sh

.gitlab-ci/skqp-runner.sh

.gitlab-ci/test

.gitlab-ci/test-source-dep.yml

.gitlab-ci/test/gitlab-ci.yml

.gitlab-ci/tests

.gitlab-ci/tests/__init__.py

.gitlab-ci/tests/test_lava_job_submitter.py

.gitlab-ci/valve

.gitlab-ci/valve/b2c.yml.jinja2.jinja2

.gitlab-ci/valve/generate_b2c.py

.gitlab-ci/vkd3d-proton

.gitlab-ci/vkd3d-proton/run.sh

.gitlab-ci/windows

.gitlab-ci/windows/Dockerfile

.gitlab-ci/windows/Dockerfile_build

.gitlab-ci/windows/Dockerfile_test

.gitlab-ci/windows/README.md

.gitlab-ci/windows/deqp_runner_run.ps1

.gitlab-ci/windows/mesa_build.ps1

.gitlab-ci/windows/mesa_container.ps1

.gitlab-ci/windows/mesa_deps.ps1

.gitlab-ci/windows/mesa_deps_build.ps1

.gitlab-ci/windows/mesa_deps_test.ps1

.gitlab-ci/windows/mesa_deps_vs2019.ps1

.gitlab-ci/windows/piglit_run.ps1

.gitlab-ci/windows/quick_gl.txt

.gitlab-ci/windows/spirv2dxil_check.ps1

.gitlab-ci/windows/spirv2dxil_run.ps1

.gitlab-ci/x86_64-w64-mingw32

.gitlab/issue_templates

.gitlab/issue_templates/Bug Report - AMD Radeon Vulkan.md

.gitlab/issue_templates/Bug Report.md

CODEOWNERS

README.rst

VERSION

android

android/Android.mk

android/mesa3d_cross.mk

bin/.editorconfig

bin/__init__.py

bin/commit_in_branch.py

bin/commit_in_branch_test.py

bin/gen_calendar_entries.py

bin/gen_calendar_entries_test.py

bin/gen_release_notes.py

bin/gen_release_notes_test.py

bin/git_sha1_gen.py

bin/install_megadrivers.py

bin/khronos-update.py

bin/meson-cmd-extract.py

bin/meson-options.py

bin/meson.build

bin/meson_get_version.py

bin/perf-annotate-jit.py

bin/pick

bin/pick-ui.py

bin/pick/__init__.py

bin/pick/core.py

bin/pick/core_test.py

bin/pick/ui.py

bin/post_version.py

bin/post_version_test.py

bin/symbols-check.py

bin/update-android-headers.sh

build-support

build-support/conftest.dyn

build-support/conftest.map

debian

debian/.directory

debian/README.Debian

debian/README.source

debian/changelog

debian/control

debian/copyright

debian/gbp.conf

debian/libd3dadapter9-mesa-dev.install

debian/libd3dadapter9-mesa.install

debian/libegl-mesa0.install

debian/libegl-mesa0.postinst.in

debian/libegl-mesa0.symbols

debian/libegl1-mesa-dev.install

debian/libgbm-dev.install

debian/libgbm1.install

debian/libgbm1.lintian-overrides

debian/libgbm1.symbols

debian/libgl1-mesa-dri.install

debian/libgl1-mesa-dri.maintscript

debian/libglapi-mesa.install

debian/libglapi-mesa.lintian-overrides

debian/libglx-mesa0.install

debian/libglx-mesa0.links.in

debian/libglx-mesa0.postinst.in

debian/libglx-mesa0.symbols

debian/libglx-mesa0.symbols.hurd

debian/libosmesa6-dev.install

debian/libosmesa6.install

debian/libosmesa6.links.in

debian/libosmesa6.lintian-overrides

debian/libosmesa6.shlibs

debian/libxatracker-dev.install

debian/libxatracker2.install

debian/libxatracker2.symbols

debian/local

debian/local/control

debian/local/script

debian/mesa-common-dev.docs

debian/mesa-common-dev.install

debian/mesa-opencl-icd.install

debian/mesa-opencl-icd.lintian-overrides

debian/mesa-vulkan-drivers.install

debian/mesa-vulkan-drivers.lintian-overrides

debian/mesa-vulkan-drivers.triggers

debian/patches

debian/patches/.directory

debian/patches/series

debian/patches/version

debian/rules

debian/shlibs.local

debian/source

debian/source/format

debian/upstream

debian/upstream/signing-key.asc

debian/watch

docs

docs/ARB_color_buffer_float.txt

docs/README.UVD

docs/README.VCE

docs/_extra

docs/_extra/_redirects

docs/_extra/specs

docs/_extra/specs/EGL_MESA_device_software.txt

docs/_extra/specs/EGL_MESA_drm_image_formats.txt

docs/_extra/specs/EGL_MESA_platform_surfaceless.txt

docs/_extra/specs/EGL_MESA_query_driver.txt

docs/_extra/specs/EXT_shader_integer_mix.spec

docs/_extra/specs/EXT_shader_samples_identical.txt

docs/_extra/specs/INTEL_shader_atomic_float_minmax.txt

docs/_extra/specs/MESA_bgra.txt

docs/_extra/specs/MESA_configless_context.spec

docs/_extra/specs/MESA_copy_sub_buffer.spec

docs/_extra/specs/MESA_drm_image.spec

docs/_extra/specs/MESA_framebuffer_flip_y.txt

docs/_extra/specs/MESA_image_dma_buf_export.txt

docs/_extra/specs/MESA_multithread_makecurrent.spec

docs/_extra/specs/MESA_pack_invert.spec

docs/_extra/specs/MESA_pixmap_colormap.spec

docs/_extra/specs/MESA_query_renderer.spec

docs/_extra/specs/MESA_release_buffers.spec

docs/_extra/specs/MESA_shader_debug.spec

docs/_extra/specs/MESA_shader_integer_functions.txt

docs/_extra/specs/MESA_swap_control.spec

docs/_extra/specs/MESA_swap_frame_usage.spec

docs/_extra/specs/MESA_texture_array.spec

docs/_extra/specs/MESA_texture_signed_rgba.spec

docs/_extra/specs/MESA_window_pos.spec

docs/_extra/specs/MESA_ycbcr_texture.spec

docs/_extra/specs/OLD

docs/_extra/specs/OLD/EGL_MESA_screen_surface.txt

docs/_extra/specs/OLD/MESA_agp_offset.spec

docs/_extra/specs/OLD/MESA_packed_depth_stencil.spec

docs/_extra/specs/OLD/MESA_program_debug.spec

docs/_extra/specs/OLD/MESA_resize_buffers.spec

docs/_extra/specs/OLD/MESA_set_3dfx_mode.spec

docs/_extra/specs/OLD/MESA_sprite_point.spec

docs/_extra/specs/OLD/MESA_trace.spec

docs/_extra/specs/WL_bind_wayland_display.spec

docs/_extra/specs/WL_create_wayland_buffer_from_image.spec

docs/_extra/specs/enums.txt

docs/_exts

docs/_exts/formatting.py

docs/_exts/nir.py

docs/_exts/redirects.py

docs/android.rst

docs/application-issues.rst

docs/bugs.rst

docs/ci

docs/ci/LAVA.rst

docs/ci/bare-metal.rst

docs/ci/docker.rst

docs/ci/fdo-cache

docs/ci/index.rst

docs/ci/kernel.rst

docs/ci/skqp.rst

docs/ci/uri-caching.conf

docs/codingstyle.rst

docs/conf.py

docs/conform.rst

docs/debugging.rst

docs/developers.rst

docs/devinfo.rst

docs/dispatch.rst

docs/download.rst

docs/doxygen-wrapper.py

docs/drivers

docs/drivers/anv.rst

docs/drivers/d3d12.rst

docs/drivers/freedreno

docs/drivers/freedreno.rst

docs/drivers/freedreno/ir3-notes.rst

docs/drivers/freedreno/isaspec.rst

docs/drivers/lima.rst

docs/drivers/llvmpipe.rst

docs/drivers/panfrost.rst

docs/drivers/radv.rst

docs/drivers/svga3d.rst

docs/drivers/v3d.rst

docs/drivers/vc4.rst

docs/drivers/venus.rst

docs/drivers/virgl.rst

docs/drivers/zink.rst

docs/egl.rst

docs/envvars.rst

docs/extensions.rst

docs/faq.rst

docs/favicon.ico

docs/favicon.svg

docs/features.txt

docs/gallium

docs/gallium/buffermapping.rst

docs/gallium/context.rst

docs/gallium/cso

docs/gallium/cso.rst

docs/gallium/cso/blend.rst

docs/gallium/cso/dsa.rst

docs/gallium/cso/rasterizer.rst

docs/gallium/cso/sampler.rst

docs/gallium/cso/shader.rst

docs/gallium/cso/velems.rst

docs/gallium/debugging.rst

docs/gallium/distro.rst

docs/gallium/format.rst

docs/gallium/glossary.rst

docs/gallium/index.rst

docs/gallium/intro.rst

docs/gallium/pipeline.txt

docs/gallium/postprocess.rst

docs/gallium/resources.rst

docs/gallium/screen.rst

docs/gallium/tgsi.rst

docs/helpwanted.rst

docs/history.rst

docs/index.rst

docs/install.rst

docs/isl

docs/isl/aux-surf-comp.rst

docs/isl/ccs.rst

docs/isl/formats.rst

docs/isl/hiz.rst

docs/isl/index.rst

docs/isl/tiling-basic.svg

docs/isl/tiling.rst

docs/isl/units.rst

docs/libGL.txt

docs/license.rst

docs/lists.rst

docs/macos.rst

docs/meson.rst

docs/nir

docs/nir/alu.rst

docs/nir/index.rst

docs/nir/tex.rst

docs/opengles.rst

docs/osmesa.rst

docs/perf.rst

docs/perfetto.rst

docs/precompiled.rst

docs/release-calendar.csv

docs/release-calendar.rst

docs/release-maintainers-keys.asc

docs/releasing.rst

docs/relnotes

docs/relnotes.rst

docs/relnotes/10.0.1.rst

docs/relnotes/10.0.2.rst

docs/relnotes/10.0.3.rst

docs/relnotes/10.0.4.rst

docs/relnotes/10.0.5.rst

docs/relnotes/10.0.rst

docs/relnotes/10.1.1.rst

docs/relnotes/10.1.2.rst

docs/relnotes/10.1.3.rst

docs/relnotes/10.1.4.rst

docs/relnotes/10.1.5.rst

docs/relnotes/10.1.6.rst

docs/relnotes/10.1.rst

docs/relnotes/10.2.1.rst

docs/relnotes/10.2.2.rst

docs/relnotes/10.2.3.rst

docs/relnotes/10.2.4.rst

docs/relnotes/10.2.5.rst

docs/relnotes/10.2.6.rst

docs/relnotes/10.2.7.rst

docs/relnotes/10.2.8.rst

docs/relnotes/10.2.9.rst

docs/relnotes/10.2.rst

docs/relnotes/10.3.1.rst

docs/relnotes/10.3.2.rst

docs/relnotes/10.3.3.rst

docs/relnotes/10.3.4.rst

docs/relnotes/10.3.5.rst

docs/relnotes/10.3.6.rst

docs/relnotes/10.3.7.rst

docs/relnotes/10.3.rst

docs/relnotes/10.4.1.rst

docs/relnotes/10.4.2.rst

docs/relnotes/10.4.3.rst

docs/relnotes/10.4.4.rst

docs/relnotes/10.4.5.rst

docs/relnotes/10.4.6.rst

docs/relnotes/10.4.7.rst

docs/relnotes/10.4.rst

docs/relnotes/10.5.0.rst

docs/relnotes/10.5.1.rst

docs/relnotes/10.5.2.rst

docs/relnotes/10.5.3.rst

docs/relnotes/10.5.4.rst

docs/relnotes/10.5.5.rst

docs/relnotes/10.5.6.rst

docs/relnotes/10.5.7.rst

docs/relnotes/10.5.8.rst

docs/relnotes/10.5.9.rst

docs/relnotes/10.6.0.rst

docs/relnotes/10.6.1.rst

docs/relnotes/10.6.2.rst

docs/relnotes/10.6.3.rst

docs/relnotes/10.6.4.rst

docs/relnotes/10.6.5.rst

docs/relnotes/10.6.6.rst

docs/relnotes/10.6.7.rst

docs/relnotes/10.6.8.rst

docs/relnotes/10.6.9.rst

docs/relnotes/11.0.0.rst

docs/relnotes/11.0.1.rst

docs/relnotes/11.0.2.rst

docs/relnotes/11.0.3.rst

docs/relnotes/11.0.4.rst

docs/relnotes/11.0.5.rst

docs/relnotes/11.0.6.rst

docs/relnotes/11.0.7.rst

docs/relnotes/11.0.8.rst

docs/relnotes/11.0.9.rst

docs/relnotes/11.1.0.rst

docs/relnotes/11.1.1.rst

docs/relnotes/11.1.2.rst

docs/relnotes/11.1.3.rst

docs/relnotes/11.1.4.rst

docs/relnotes/11.2.0.rst

docs/relnotes/11.2.1.rst

docs/relnotes/11.2.2.rst

docs/relnotes/12.0.0.rst

docs/relnotes/12.0.1.rst

docs/relnotes/12.0.2.rst

docs/relnotes/12.0.3.rst

docs/relnotes/12.0.4.rst

docs/relnotes/12.0.5.rst

docs/relnotes/12.0.6.rst

docs/relnotes/13.0.0.rst

docs/relnotes/13.0.1.rst

docs/relnotes/13.0.2.rst

docs/relnotes/13.0.3.rst

docs/relnotes/13.0.4.rst

docs/relnotes/13.0.5.rst

docs/relnotes/13.0.6.rst

docs/relnotes/17.0.0.rst

docs/relnotes/17.0.1.rst

docs/relnotes/17.0.2.rst

docs/relnotes/17.0.3.rst

docs/relnotes/17.0.4.rst

docs/relnotes/17.0.5.rst

docs/relnotes/17.0.6.rst

docs/relnotes/17.0.7.rst

docs/relnotes/17.1.0.rst

docs/relnotes/17.1.1.rst

docs/relnotes/17.1.10.rst

docs/relnotes/17.1.2.rst

docs/relnotes/17.1.3.rst

docs/relnotes/17.1.4.rst

docs/relnotes/17.1.5.rst

docs/relnotes/17.1.6.rst

docs/relnotes/17.1.7.rst

docs/relnotes/17.1.8.rst

docs/relnotes/17.1.9.rst

docs/relnotes/17.2.0.rst

docs/relnotes/17.2.1.rst

docs/relnotes/17.2.2.rst

docs/relnotes/17.2.3.rst

docs/relnotes/17.2.4.rst

docs/relnotes/17.2.5.rst

docs/relnotes/17.2.6.rst

docs/relnotes/17.2.7.rst

docs/relnotes/17.2.8.rst

docs/relnotes/17.3.0.rst

docs/relnotes/17.3.1.rst

docs/relnotes/17.3.2.rst

docs/relnotes/17.3.3.rst

docs/relnotes/17.3.4.rst

docs/relnotes/17.3.5.rst

docs/relnotes/17.3.6.rst

docs/relnotes/17.3.7.rst

docs/relnotes/17.3.8.rst

docs/relnotes/17.3.9.rst

docs/relnotes/18.0.0.rst

docs/relnotes/18.0.1.rst

docs/relnotes/18.0.2.rst

docs/relnotes/18.0.3.rst

docs/relnotes/18.0.4.rst

docs/relnotes/18.0.5.rst

docs/relnotes/18.1.0.rst

docs/relnotes/18.1.1.rst

docs/relnotes/18.1.2.rst

docs/relnotes/18.1.3.rst

docs/relnotes/18.1.4.rst

docs/relnotes/18.1.5.rst

docs/relnotes/18.1.6.rst

docs/relnotes/18.1.7.rst

docs/relnotes/18.1.8.rst

docs/relnotes/18.1.9.rst

docs/relnotes/18.2.0.rst

docs/relnotes/18.2.1.rst

docs/relnotes/18.2.2.rst

docs/relnotes/18.2.3.rst

docs/relnotes/18.2.4.rst

docs/relnotes/18.2.5.rst

docs/relnotes/18.2.6.rst

docs/relnotes/18.2.7.rst

docs/relnotes/18.2.8.rst

docs/relnotes/18.3.0.rst

docs/relnotes/18.3.1.rst

docs/relnotes/18.3.2.rst

docs/relnotes/18.3.3.rst

docs/relnotes/18.3.4.rst

docs/relnotes/18.3.5.rst

docs/relnotes/18.3.6.rst

docs/relnotes/19.0.0.rst

docs/relnotes/19.0.1.rst

docs/relnotes/19.0.2.rst

docs/relnotes/19.0.3.rst

docs/relnotes/19.0.4.rst

docs/relnotes/19.0.5.rst

docs/relnotes/19.0.6.rst

docs/relnotes/19.0.7.rst

docs/relnotes/19.0.8.rst

docs/relnotes/19.1.0.rst

docs/relnotes/19.1.1.rst

docs/relnotes/19.1.2.rst

docs/relnotes/19.1.3.rst

docs/relnotes/19.1.4.rst

docs/relnotes/19.1.5.rst

docs/relnotes/19.1.6.rst

docs/relnotes/19.1.7.rst

docs/relnotes/19.1.8.rst

docs/relnotes/19.2.0.rst

docs/relnotes/19.2.1.rst

docs/relnotes/19.2.2.rst

docs/relnotes/19.2.3.rst

docs/relnotes/19.2.4.rst

docs/relnotes/19.2.5.rst

docs/relnotes/19.2.6.rst

docs/relnotes/19.2.7.rst

docs/relnotes/19.2.8.rst

docs/relnotes/19.3.0.rst

docs/relnotes/19.3.1.rst

docs/relnotes/19.3.2.rst

docs/relnotes/19.3.3.rst

docs/relnotes/19.3.4.rst

docs/relnotes/19.3.5.rst

docs/relnotes/20.0.0.rst

docs/relnotes/20.0.1.rst

docs/relnotes/20.0.2.rst

docs/relnotes/20.0.3.rst

docs/relnotes/20.0.4.rst

docs/relnotes/20.0.5.rst

docs/relnotes/20.0.6.rst

docs/relnotes/20.0.7.rst

docs/relnotes/20.0.8.rst

docs/relnotes/20.1.0.rst

docs/relnotes/20.1.1.rst

docs/relnotes/20.1.10.rst

docs/relnotes/20.1.2.rst

docs/relnotes/20.1.3.rst

docs/relnotes/20.1.4.rst

docs/relnotes/20.1.5.rst

docs/relnotes/20.1.6.rst

docs/relnotes/20.1.7.rst

docs/relnotes/20.1.8.rst

docs/relnotes/20.1.9.rst

docs/relnotes/20.2.0.rst

docs/relnotes/20.2.1.rst

docs/relnotes/20.2.2.rst

docs/relnotes/20.2.3.rst

docs/relnotes/20.2.4.rst

docs/relnotes/20.2.5.rst

docs/relnotes/20.2.6.rst

docs/relnotes/20.3.0.rst

docs/relnotes/20.3.1.rst

docs/relnotes/20.3.2.rst

docs/relnotes/20.3.3.rst

docs/relnotes/20.3.4.rst

docs/relnotes/20.3.5.rst

docs/relnotes/21.0.0.rst

docs/relnotes/21.0.1.rst

docs/relnotes/21.0.2.rst

docs/relnotes/21.0.3.rst

docs/relnotes/21.1.0.rst

docs/relnotes/21.1.1.rst

docs/relnotes/21.1.2.rst

docs/relnotes/21.1.3.rst

docs/relnotes/21.1.4.rst

docs/relnotes/21.1.5.rst

docs/relnotes/21.1.6.rst

docs/relnotes/21.1.7.rst

docs/relnotes/21.1.8.rst

docs/relnotes/21.2.0.rst

docs/relnotes/21.2.2.rst

docs/relnotes/21.2.3.rst

docs/relnotes/21.2.4.rst

docs/relnotes/21.2.5.rst

docs/relnotes/21.2.6.rst

docs/relnotes/21.3.0.rst

docs/relnotes/21.3.1.rst

docs/relnotes/21.3.2.rst

docs/relnotes/21.3.3.rst

docs/relnotes/21.3.4.rst

docs/relnotes/21.3.5.rst

docs/relnotes/21.3.6.rst

docs/relnotes/21.3.7.rst

docs/relnotes/21.3.8.rst

docs/relnotes/22.0.0.rst

docs/relnotes/22.0.1.rst

docs/relnotes/22.1.0.rst

docs/relnotes/22.1.1.rst

docs/relnotes/22.1.2.rst

docs/relnotes/22.1.3.rst

docs/relnotes/3.1

docs/relnotes/3.2

docs/relnotes/3.2.1

docs/relnotes/3.3

docs/relnotes/3.4

docs/relnotes/3.4.1

docs/relnotes/3.4.2

docs/relnotes/3.5

docs/relnotes/4.0

docs/relnotes/4.0.1

docs/relnotes/4.0.2

docs/relnotes/4.0.3

docs/relnotes/4.1

docs/relnotes/5.0

docs/relnotes/5.0.1

docs/relnotes/5.0.2

docs/relnotes/5.1

docs/relnotes/6.0

docs/relnotes/6.0.1

docs/relnotes/6.1

docs/relnotes/6.2

docs/relnotes/6.2.1

docs/relnotes/6.3

docs/relnotes/6.3.1

docs/relnotes/6.3.2

docs/relnotes/6.4

docs/relnotes/6.4.1.rst

docs/relnotes/6.4.2.rst

docs/relnotes/6.4.rst

docs/relnotes/6.5.1.rst

docs/relnotes/6.5.2.rst

docs/relnotes/6.5.3.rst

docs/relnotes/6.5.rst

docs/relnotes/7.0.1.rst

docs/relnotes/7.0.2.rst

docs/relnotes/7.0.3.rst

docs/relnotes/7.0.4.rst

docs/relnotes/7.0.rst

docs/relnotes/7.1.rst

docs/relnotes/7.10.1.rst

docs/relnotes/7.10.2.rst

docs/relnotes/7.10.3.rst

docs/relnotes/7.10.rst

docs/relnotes/7.11.1.rst

docs/relnotes/7.11.2.rst

docs/relnotes/7.11.rst

docs/relnotes/7.2.rst

docs/relnotes/7.3.rst

docs/relnotes/7.4.1.rst

docs/relnotes/7.4.2.rst

docs/relnotes/7.4.3.rst

docs/relnotes/7.4.4.rst

docs/relnotes/7.4.rst

docs/relnotes/7.5.1.rst

docs/relnotes/7.5.2.rst

docs/relnotes/7.5.rst

docs/relnotes/7.6.1.rst

docs/relnotes/7.6.rst

docs/relnotes/7.7.1.rst

docs/relnotes/7.7.rst

docs/relnotes/7.8.1.rst

docs/relnotes/7.8.2.rst

docs/relnotes/7.8.3.rst

docs/relnotes/7.8.rst

docs/relnotes/7.9.1.rst

docs/relnotes/7.9.2.rst

docs/relnotes/7.9.rst

docs/relnotes/8.0.1.rst

docs/relnotes/8.0.2.rst

docs/relnotes/8.0.3.rst

docs/relnotes/8.0.4.rst

docs/relnotes/8.0.5.rst

docs/relnotes/8.0.rst

docs/relnotes/9.0.1.rst

docs/relnotes/9.0.2.rst

docs/relnotes/9.0.3.rst

docs/relnotes/9.0.rst

docs/relnotes/9.1.1.rst

docs/relnotes/9.1.2.rst

docs/relnotes/9.1.3.rst

docs/relnotes/9.1.4.rst

docs/relnotes/9.1.5.rst

docs/relnotes/9.1.6.rst

docs/relnotes/9.1.7.rst

docs/relnotes/9.1.rst

docs/relnotes/9.2.1.rst

docs/relnotes/9.2.2.rst

docs/relnotes/9.2.3.rst

docs/relnotes/9.2.4.rst

docs/relnotes/9.2.5.rst

docs/relnotes/9.2.rst

docs/repository.rst

docs/shading.rst

docs/sourcetree.rst

docs/submittingpatches.rst

docs/systems.rst

docs/thanks.rst

docs/utilities.rst

docs/viewperf.rst

docs/vulkan

docs/vulkan/base-objs.rst

docs/vulkan/dispatch.rst

docs/vulkan/index.rst

docs/vulkan/renderpass.rst

docs/xlibdriver.rst

include

include/CL

include/CL/cl.h

include/CL/cl.hpp

include/CL/cl2.hpp

include/CL/cl_d3d10.h

include/CL/cl_d3d11.h

include/CL/cl_dx9_media_sharing.h

include/CL/cl_dx9_media_sharing_intel.h

include/CL/cl_egl.h

include/CL/cl_ext.h

include/CL/cl_ext_intel.h

include/CL/cl_gl.h

include/CL/cl_gl_ext.h

include/CL/cl_icd.h

include/CL/cl_platform.h

include/CL/cl_va_api_media_sharing_intel.h

include/CL/cl_version.h

include/CL/opencl.h

include/D3D9

include/D3D9/.editorconfig

include/D3D9/d3d9.h

include/D3D9/d3d9caps.h

include/D3D9/d3d9types.h

include/EGL

include/EGL/egl.h

include/EGL/eglext.h

include/EGL/eglextchromium.h

include/EGL/eglmesaext.h

include/EGL/eglplatform.h

include/GL

include/GL/gl.h

include/GL/glcorearb.h

include/GL/glext.h

include/GL/glx.h

include/GL/glxext.h

include/GL/internal

include/GL/internal/dri_interface.h

include/GL/mesa_glinterop.h

include/GL/osmesa.h

include/GL/wglext.h

include/GLES

include/GLES/egl.h

include/GLES/gl.h

include/GLES/glext.h

include/GLES/glplatform.h

include/GLES2

include/GLES2/gl2.h

include/GLES2/gl2ext.h

include/GLES2/gl2platform.h

include/GLES3

include/GLES3/gl3.h

include/GLES3/gl31.h

include/GLES3/gl32.h

include/GLES3/gl3ext.h

include/GLES3/gl3platform.h

include/HaikuGL

include/HaikuGL/GLRenderer.h

include/HaikuGL/GLView.h

include/HaikuGL/OpenGLKit.h

include/HaikuGL/README

include/KHR

include/KHR/khrplatform.h

include/android_stub

include/android_stub/android

include/android_stub/android/data_space.h

include/android_stub/android/hardware_buffer.h

include/android_stub/android/hdr_metadata.h

include/android_stub/android/log.h

include/android_stub/android/native_window.h

include/android_stub/android/rect.h

include/android_stub/android/sync.h

include/android_stub/backtrace

include/android_stub/backtrace/Backtrace.h

include/android_stub/backtrace/BacktraceMap.h

include/android_stub/backtrace/backtrace_constants.h

include/android_stub/cutils

include/android_stub/cutils/compiler.h

include/android_stub/cutils/log.h

include/android_stub/cutils/native_handle.h

include/android_stub/cutils/properties.h

include/android_stub/cutils/trace.h

include/android_stub/hardware

include/android_stub/hardware/fb.h

include/android_stub/hardware/gralloc.h

include/android_stub/hardware/gralloc1.h

include/android_stub/hardware/hardware.h

include/android_stub/hardware/hwvulkan.h

include/android_stub/log

include/android_stub/log/event_tag_map.h

include/android_stub/log/log.h

include/android_stub/log/log_event_list.h

include/android_stub/log/log_id.h

include/android_stub/log/log_main.h

include/android_stub/log/log_properties.h

include/android_stub/log/log_radio.h

include/android_stub/log/log_read.h

include/android_stub/log/log_safetynet.h

include/android_stub/log/log_system.h

include/android_stub/log/log_time.h

include/android_stub/log/logprint.h

include/android_stub/nativebase

include/android_stub/nativebase/nativebase.h

include/android_stub/ndk

include/android_stub/ndk/sync.h

include/android_stub/sync

include/android_stub/sync/sync.h

include/android_stub/system

include/android_stub/system/camera.h

include/android_stub/system/graphics-base-v1.0.h

include/android_stub/system/graphics-base-v1.1.h

include/android_stub/system/graphics-base-v1.2.h

include/android_stub/system/graphics-base.h

include/android_stub/system/graphics-sw.h

include/android_stub/system/graphics.h

include/android_stub/system/radio.h

include/android_stub/system/thread_defs.h

include/android_stub/system/window.h

include/android_stub/vndk

include/android_stub/vndk/hardware_buffer.h

include/android_stub/vndk/window.h

include/c11

include/c11/.editorconfig

include/c11/threads.h

include/c11/threads_posix.h

include/c11/threads_win32.h

include/c11_compat.h

include/c99_alloca.h

include/c99_compat.h

include/c99_math.h

include/d3dadapter

include/d3dadapter/.editorconfig

include/d3dadapter/d3dadapter9.h

include/d3dadapter/drm.h

include/d3dadapter/present.h

include/drm-uapi

include/drm-uapi/README

include/drm-uapi/amdgpu_drm.h

include/drm-uapi/drm.h

include/drm-uapi/drm_fourcc.h

include/drm-uapi/drm_mode.h

include/drm-uapi/etnaviv_drm.h

include/drm-uapi/i915_drm.h

include/drm-uapi/lima_drm.h

include/drm-uapi/msm_drm.h

include/drm-uapi/panfrost_drm.h

include/drm-uapi/sync_file.h

include/drm-uapi/tegra_drm.h

include/drm-uapi/v3d_drm.h

include/drm-uapi/vc4_drm.h

include/drm-uapi/virtgpu_drm.h

include/kopper_interface.h

include/meson.build

include/no_extern_c.h

include/pci_ids

include/pci_ids/crocus_pci_ids.h

include/pci_ids/i830_pci_ids.h

include/pci_ids/i915_pci_ids.h

include/pci_ids/iris_pci_ids.h

include/pci_ids/r300_pci_ids.h

include/pci_ids/r600_pci_ids.h

include/pci_ids/radeonsi_pci_ids.h

include/pci_ids/virtio_gpu_pci_ids.h

include/pci_ids/vmwgfx_pci_ids.h

include/vk_video

include/vk_video/vulkan_video_codec_h264std.h

include/vk_video/vulkan_video_codec_h264std_decode.h

include/vk_video/vulkan_video_codec_h264std_encode.h

include/vk_video/vulkan_video_codec_h265std.h

include/vk_video/vulkan_video_codec_h265std_decode.h

include/vk_video/vulkan_video_codec_h265std_encode.h

include/vk_video/vulkan_video_codecs_common.h

include/vulkan

include/vulkan/.editorconfig

include/vulkan/vk_android_native_buffer.h

include/vulkan/vk_icd.h

include/vulkan/vk_layer.h

include/vulkan/vk_platform.h

include/vulkan/vulkan.h

include/vulkan/vulkan_android.h

include/vulkan/vulkan_beta.h

include/vulkan/vulkan_core.h

include/vulkan/vulkan_directfb.h

include/vulkan/vulkan_fuchsia.h

include/vulkan/vulkan_ggp.h

include/vulkan/vulkan_ios.h

include/vulkan/vulkan_macos.h

include/vulkan/vulkan_metal.h

include/vulkan/vulkan_screen.h

include/vulkan/vulkan_vi.h

include/vulkan/vulkan_wayland.h

include/vulkan/vulkan_win32.h

include/vulkan/vulkan_xcb.h

include/vulkan/vulkan_xlib.h

include/vulkan/vulkan_xlib_xrandr.h

include/winddk

include/winddk/.gitignore

include/winddk/README.txt

include/winddk/winddk_compat.h

meson.build

meson_options.txt

src/amd

src/amd/.clang-format

src/amd/addrlib

src/amd/addrlib/inc

src/amd/addrlib/inc/addrinterface.h

src/amd/addrlib/inc/addrtypes.h

src/amd/addrlib/meson.build

src/amd/addrlib/src

src/amd/addrlib/src/addrinterface.cpp

src/amd/addrlib/src/amdgpu_asic_addr.h

src/amd/addrlib/src/chip

src/amd/addrlib/src/chip/gfx10

src/amd/addrlib/src/chip/gfx10/gfx10_gb_reg.h

src/amd/addrlib/src/chip/gfx9

src/amd/addrlib/src/chip/gfx9/gfx9_gb_reg.h

src/amd/addrlib/src/chip/r800

src/amd/addrlib/src/chip/r800/si_gb_reg.h

src/amd/addrlib/src/core

src/amd/addrlib/src/core/addrcommon.h

src/amd/addrlib/src/core/addrelemlib.cpp

src/amd/addrlib/src/core/addrelemlib.h

src/amd/addrlib/src/core/addrlib.cpp

src/amd/addrlib/src/core/addrlib.h

src/amd/addrlib/src/core/addrlib1.cpp

src/amd/addrlib/src/core/addrlib1.h

src/amd/addrlib/src/core/addrlib2.cpp

src/amd/addrlib/src/core/addrlib2.h

src/amd/addrlib/src/core/addrobject.cpp

src/amd/addrlib/src/core/addrobject.h

src/amd/addrlib/src/core/coord.cpp

src/amd/addrlib/src/core/coord.h

src/amd/addrlib/src/gfx10

src/amd/addrlib/src/gfx10/gfx10SwizzlePattern.h

src/amd/addrlib/src/gfx10/gfx10addrlib.cpp

src/amd/addrlib/src/gfx10/gfx10addrlib.h

src/amd/addrlib/src/gfx9

src/amd/addrlib/src/gfx9/gfx9addrlib.cpp

src/amd/addrlib/src/gfx9/gfx9addrlib.h

src/amd/addrlib/src/r800

src/amd/addrlib/src/r800/ciaddrlib.cpp

src/amd/addrlib/src/r800/ciaddrlib.h

src/amd/addrlib/src/r800/egbaddrlib.cpp

src/amd/addrlib/src/r800/egbaddrlib.h

src/amd/addrlib/src/r800/siaddrlib.cpp

src/amd/addrlib/src/r800/siaddrlib.h

src/amd/ci

src/amd/ci/.gitattributes

src/amd/ci/gitlab-ci.yml

src/amd/ci/radv-bonaire-aco-fails.txt

src/amd/ci/radv-bonaire-aco-skips.txt

src/amd/ci/radv-fiji-aco-fails.txt

src/amd/ci/radv-hawaii-aco-fails.txt

src/amd/ci/radv-hawaii-aco-skips.txt

src/amd/ci/radv-navi10-aco-fails.txt

src/amd/ci/radv-navi14-aco-fails.txt

src/amd/ci/radv-oland-aco-fails.txt

src/amd/ci/radv-pitcairn-aco-fails.txt

src/amd/ci/radv-polaris10-aco-fails.txt

src/amd/ci/radv-raven-aco-fails.txt

src/amd/ci/radv-raven-aco-flakes.txt

src/amd/ci/radv-raven-aco-skips.txt

src/amd/ci/radv-renoir-aco-fails.txt

src/amd/ci/radv-renoir-aco-flakes.txt

src/amd/ci/radv-sienna_cichlid-aco-fails.txt

src/amd/ci/radv-sienna_cichlid-aco-flakes.txt

src/amd/ci/radv-skips.txt

src/amd/ci/radv-stoney-aco-fails.txt

src/amd/ci/radv-stoney-aco-flakes.txt

src/amd/ci/radv-stoney-aco-skips.txt

src/amd/ci/radv-vangogh-aco-fails.txt

src/amd/ci/radv-vega10-aco-fails.txt

src/amd/ci/radv-vega10-aco-flakes.txt

src/amd/common

src/amd/common/ac_binary.c

src/amd/common/ac_binary.h

src/amd/common/ac_debug.c

src/amd/common/ac_debug.h

src/amd/common/ac_drm_fourcc.h

src/amd/common/ac_gpu_info.c

src/amd/common/ac_gpu_info.h

src/amd/common/ac_msgpack.c

src/amd/common/ac_msgpack.h

src/amd/common/ac_nir.c

src/amd/common/ac_nir.h

src/amd/common/ac_nir_cull.c

src/amd/common/ac_nir_lower_esgs_io_to_mem.c

src/amd/common/ac_nir_lower_global_access.c

src/amd/common/ac_nir_lower_ngg.c

src/amd/common/ac_nir_lower_tess_io_to_mem.c

src/amd/common/ac_perfcounter.c

src/amd/common/ac_perfcounter.h

src/amd/common/ac_rgp.c

src/amd/common/ac_rgp.h

src/amd/common/ac_rgp_elf_object_pack.c

src/amd/common/ac_rtld.c

src/amd/common/ac_rtld.h

src/amd/common/ac_shader_args.c

src/amd/common/ac_shader_args.h

src/amd/common/ac_shader_util.c

src/amd/common/ac_shader_util.h

src/amd/common/ac_shadowed_regs.c

src/amd/common/ac_shadowed_regs.h

src/amd/common/ac_spm.c

src/amd/common/ac_spm.h

src/amd/common/ac_sqtt.c

src/amd/common/ac_sqtt.h

src/amd/common/ac_surface.c

src/amd/common/ac_surface.h

src/amd/common/ac_surface_meta_address_test.c

src/amd/common/ac_surface_modifier_test.c

src/amd/common/ac_surface_test_common.h

src/amd/common/ac_uvd_dec.h

src/amd/common/ac_vcn_dec.h

src/amd/common/amd_family.c

src/amd/common/amd_family.h

src/amd/common/amd_kernel_code_t.h

src/amd/common/gfx10_format_table.h

src/amd/common/gfx10_format_table.py

src/amd/common/meson.build

src/amd/common/sid.h

src/amd/common/sid_tables.py

src/amd/compiler

src/amd/compiler/.clang-format

src/amd/compiler/README-ISA.md

src/amd/compiler/README.md

src/amd/compiler/aco_assembler.cpp

src/amd/compiler/aco_builder_h.py

src/amd/compiler/aco_dead_code_analysis.cpp

src/amd/compiler/aco_dominance.cpp

src/amd/compiler/aco_form_hard_clauses.cpp

src/amd/compiler/aco_insert_NOPs.cpp

src/amd/compiler/aco_insert_exec_mask.cpp

src/amd/compiler/aco_insert_waitcnt.cpp

src/amd/compiler/aco_instruction_selection.cpp

src/amd/compiler/aco_instruction_selection.h

src/amd/compiler/aco_instruction_selection_setup.cpp

src/amd/compiler/aco_interface.cpp

src/amd/compiler/aco_interface.h

src/amd/compiler/aco_ir.cpp

src/amd/compiler/aco_ir.h

src/amd/compiler/aco_live_var_analysis.cpp

src/amd/compiler/aco_lower_phis.cpp

src/amd/compiler/aco_lower_to_cssa.cpp

src/amd/compiler/aco_lower_to_hw_instr.cpp

src/amd/compiler/aco_opcodes.py

src/amd/compiler/aco_opcodes_cpp.py

src/amd/compiler/aco_opcodes_h.py

src/amd/compiler/aco_opt_value_numbering.cpp

src/amd/compiler/aco_optimizer.cpp

src/amd/compiler/aco_optimizer_postRA.cpp

src/amd/compiler/aco_print_asm.cpp

src/amd/compiler/aco_print_ir.cpp

src/amd/compiler/aco_reduce_assign.cpp

src/amd/compiler/aco_register_allocation.cpp

src/amd/compiler/aco_reindex_ssa.cpp

src/amd/compiler/aco_scheduler.cpp

src/amd/compiler/aco_spill.cpp

src/amd/compiler/aco_ssa_elimination.cpp

src/amd/compiler/aco_statistics.cpp

src/amd/compiler/aco_util.h

src/amd/compiler/aco_validate.cpp

src/amd/compiler/meson.build

src/amd/compiler/tests

src/amd/compiler/tests/README.md

src/amd/compiler/tests/check_output.py

src/amd/compiler/tests/framework.h

src/amd/compiler/tests/glsl_scraper.py

src/amd/compiler/tests/helpers.cpp

src/amd/compiler/tests/helpers.h

src/amd/compiler/tests/main.cpp

src/amd/compiler/tests/meson.build

src/amd/compiler/tests/test_assembler.cpp

src/amd/compiler/tests/test_builder.cpp

src/amd/compiler/tests/test_hard_clause.cpp

src/amd/compiler/tests/test_insert_nops.cpp

src/amd/compiler/tests/test_isel.cpp

src/amd/compiler/tests/test_optimizer.cpp

src/amd/compiler/tests/test_optimizer_postRA.cpp

src/amd/compiler/tests/test_regalloc.cpp

src/amd/compiler/tests/test_sdwa.cpp

src/amd/compiler/tests/test_tests.cpp

src/amd/compiler/tests/test_to_hw_instr.cpp

src/amd/drm-shim

src/amd/drm-shim/README.md

src/amd/drm-shim/meson.build

src/amd/drm-shim/radeon_noop_drm_shim.c

src/amd/llvm

src/amd/llvm/ac_llvm_build.c

src/amd/llvm/ac_llvm_build.h

src/amd/llvm/ac_llvm_cull.c

src/amd/llvm/ac_llvm_cull.h

src/amd/llvm/ac_llvm_helper.cpp

src/amd/llvm/ac_llvm_util.c

src/amd/llvm/ac_llvm_util.h

src/amd/llvm/ac_nir_to_llvm.c

src/amd/llvm/ac_nir_to_llvm.h

src/amd/llvm/ac_shader_abi.h

src/amd/llvm/meson.build

src/amd/meson.build

src/amd/registers

src/amd/registers/canonicalize.py

src/amd/registers/gfx10-rsrc.json

src/amd/registers/gfx10.json

src/amd/registers/gfx103.json

src/amd/registers/gfx6.json

src/amd/registers/gfx7.json

src/amd/registers/gfx8.json

src/amd/registers/gfx81.json

src/amd/registers/gfx9.json

src/amd/registers/makeregheader.py

src/amd/registers/mergedbs.py

src/amd/registers/parse_kernel_headers.py

src/amd/registers/parseheader.py

src/amd/registers/pkt3.json

src/amd/registers/regdb.py

src/amd/registers/registers-manually-defined.json

src/amd/vulkan

src/amd/vulkan/.editorconfig

src/amd/vulkan/00-radv-defaults.conf

src/amd/vulkan/layers

src/amd/vulkan/layers/radv_metro_exodus.c

src/amd/vulkan/layers/radv_sqtt_layer.c

src/amd/vulkan/meson.build

src/amd/vulkan/radv_acceleration_structure.c

src/amd/vulkan/radv_acceleration_structure.h

src/amd/vulkan/radv_android.c

src/amd/vulkan/radv_check_va.py

src/amd/vulkan/radv_cmd_buffer.c

src/amd/vulkan/radv_constants.h

src/amd/vulkan/radv_cs.h

src/amd/vulkan/radv_debug.c

src/amd/vulkan/radv_debug.h

src/amd/vulkan/radv_descriptor_set.c

src/amd/vulkan/radv_descriptor_set.h

src/amd/vulkan/radv_device.c

src/amd/vulkan/radv_formats.c

src/amd/vulkan/radv_image.c

src/amd/vulkan/radv_llvm_helper.cpp

src/amd/vulkan/radv_llvm_helper.h

src/amd/vulkan/radv_meta.c

src/amd/vulkan/radv_meta.h

src/amd/vulkan/radv_meta_blit.c

src/amd/vulkan/radv_meta_blit2d.c

src/amd/vulkan/radv_meta_buffer.c

src/amd/vulkan/radv_meta_bufimage.c

src/amd/vulkan/radv_meta_clear.c

src/amd/vulkan/radv_meta_copy.c

src/amd/vulkan/radv_meta_copy_vrs_htile.c

src/amd/vulkan/radv_meta_dcc_retile.c

src/amd/vulkan/radv_meta_decompress.c

src/amd/vulkan/radv_meta_etc_decode.c

src/amd/vulkan/radv_meta_fast_clear.c

src/amd/vulkan/radv_meta_fmask_copy.c

src/amd/vulkan/radv_meta_fmask_expand.c

src/amd/vulkan/radv_meta_resolve.c

src/amd/vulkan/radv_meta_resolve_cs.c

src/amd/vulkan/radv_meta_resolve_fs.c

src/amd/vulkan/radv_nir_apply_pipeline_layout.c

src/amd/vulkan/radv_nir_lower_ray_queries.c

src/amd/vulkan/radv_nir_lower_ycbcr_textures.c

src/amd/vulkan/radv_nir_to_llvm.c

src/amd/vulkan/radv_pass.c

src/amd/vulkan/radv_perfcounter.c

src/amd/vulkan/radv_pipeline.c

src/amd/vulkan/radv_pipeline_cache.c

src/amd/vulkan/radv_pipeline_rt.c

src/amd/vulkan/radv_private.h

src/amd/vulkan/radv_query.c

src/amd/vulkan/radv_radeon_winsys.h

src/amd/vulkan/radv_rt_common.c

src/amd/vulkan/radv_rt_common.h

src/amd/vulkan/radv_sdma_copy_image.c

src/amd/vulkan/radv_shader.c

src/amd/vulkan/radv_shader.h

src/amd/vulkan/radv_shader_args.c

src/amd/vulkan/radv_shader_args.h

src/amd/vulkan/radv_shader_info.c

src/amd/vulkan/radv_spm.c

src/amd/vulkan/radv_sqtt.c

src/amd/vulkan/radv_wsi.c

src/amd/vulkan/si_cmd_buffer.c

src/amd/vulkan/vk_format.h

src/amd/vulkan/vulkan.sym

src/amd/vulkan/vulkan_radv.def

src/amd/vulkan/winsys

src/amd/vulkan/winsys/amdgpu

src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c

src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h

src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c

src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.h

src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c

src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.h

src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c

src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h

src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys_public.h

src/amd/vulkan/winsys/null

src/amd/vulkan/winsys/null/radv_null_bo.c

src/amd/vulkan/winsys/null/radv_null_bo.h

src/amd/vulkan/winsys/null/radv_null_cs.c

src/amd/vulkan/winsys/null/radv_null_cs.h

src/amd/vulkan/winsys/null/radv_null_winsys.c

src/amd/vulkan/winsys/null/radv_null_winsys.h

src/amd/vulkan/winsys/null/radv_null_winsys_public.h

src/android_stub

src/android_stub/README.md

src/android_stub/backtrace_stub.cpp

src/android_stub/cutils_stub.cpp

src/android_stub/hardware_stub.cpp

src/android_stub/log_stub.cpp

src/android_stub/meson.build

src/android_stub/nativewindow_stub.cpp

src/android_stub/sync_stub.cpp

src/asahi

src/asahi/compiler

src/asahi/compiler/agx_builder.h.py

src/asahi/compiler/agx_compile.c

src/asahi/compiler/agx_compile.h

src/asahi/compiler/agx_compiler.h

src/asahi/compiler/agx_dce.c

src/asahi/compiler/agx_liveness.c

src/asahi/compiler/agx_minifloat.h

src/asahi/compiler/agx_opcodes.c.py

src/asahi/compiler/agx_opcodes.h.py

src/asahi/compiler/agx_opcodes.py

src/asahi/compiler/agx_optimizer.c

src/asahi/compiler/agx_pack.c

src/asahi/compiler/agx_print.c

src/asahi/compiler/agx_register_allocate.c

src/asahi/compiler/agx_uniforms.c

src/asahi/compiler/cmdline.c

src/asahi/compiler/meson.build

src/asahi/lib

src/asahi/lib/agx_bo.h

src/asahi/lib/agx_device.c

src/asahi/lib/agx_device.h

src/asahi/lib/agx_formats.c

src/asahi/lib/agx_formats.h

src/asahi/lib/cmdbuf.xml

src/asahi/lib/decode.c

src/asahi/lib/decode.h

src/asahi/lib/gen_pack.py

src/asahi/lib/hexdump.h

src/asahi/lib/io.h

src/asahi/lib/meson.build

src/asahi/lib/pool.c

src/asahi/lib/pool.h

src/asahi/lib/tests

src/asahi/lib/tests/test-lod-clamps.cpp

src/asahi/lib/tiling.c

src/asahi/lib/tiling.h

src/asahi/meson.build

src/broadcom

src/broadcom/.editorconfig

src/broadcom/ci

src/broadcom/ci/broadcom-rpi3-fails.txt

src/broadcom/ci/broadcom-rpi3-flakes.txt

src/broadcom/ci/broadcom-rpi3-skips.txt

src/broadcom/ci/broadcom-rpi4-fails.txt

src/broadcom/ci/broadcom-rpi4-flakes.txt

src/broadcom/ci/broadcom-rpi4-skips.txt

src/broadcom/ci/deqp-broadcom-rpi3.toml

src/broadcom/ci/deqp-broadcom-rpi4.toml

src/broadcom/ci/gitlab-ci.yml

src/broadcom/cle

src/broadcom/cle/gen_pack_header.py

src/broadcom/cle/meson.build

src/broadcom/cle/v3d_decoder.c

src/broadcom/cle/v3d_decoder.h

src/broadcom/cle/v3d_packet_helpers.h

src/broadcom/cle/v3d_packet_v21.xml

src/broadcom/cle/v3d_packet_v33.xml

src/broadcom/cle/v3dx_pack.h

src/broadcom/clif

src/broadcom/clif/clif_dump.c

src/broadcom/clif/clif_dump.h

src/broadcom/clif/clif_private.h

src/broadcom/clif/v3dx_dump.c

src/broadcom/common

src/broadcom/common/v3d_cpu_tiling.h

src/broadcom/common/v3d_debug.c

src/broadcom/common/v3d_debug.h

src/broadcom/common/v3d_device_info.c

src/broadcom/common/v3d_device_info.h

src/broadcom/common/v3d_limits.h

src/broadcom/common/v3d_macros.h

src/broadcom/common/v3d_tfu.h

src/broadcom/common/v3d_tiling.c

src/broadcom/common/v3d_tiling.h

src/broadcom/common/v3d_util.c

src/broadcom/common/v3d_util.h

src/broadcom/compiler

src/broadcom/compiler/meson.build

src/broadcom/compiler/nir_to_vir.c

src/broadcom/compiler/qpu_schedule.c

src/broadcom/compiler/qpu_validate.c

src/broadcom/compiler/v3d33_tex.c

src/broadcom/compiler/v3d33_vpm_setup.c

src/broadcom/compiler/v3d40_tex.c

src/broadcom/compiler/v3d_compiler.h

src/broadcom/compiler/v3d_nir_lower_image_load_store.c

src/broadcom/compiler/v3d_nir_lower_io.c

src/broadcom/compiler/v3d_nir_lower_line_smooth.c

src/broadcom/compiler/v3d_nir_lower_load_store_bitsize.c

src/broadcom/compiler/v3d_nir_lower_logic_ops.c

src/broadcom/compiler/v3d_nir_lower_robust_buffer_access.c

src/broadcom/compiler/v3d_nir_lower_scratch.c

src/broadcom/compiler/v3d_nir_lower_txf_ms.c

src/broadcom/compiler/vir.c

src/broadcom/compiler/vir_dump.c

src/broadcom/compiler/vir_live_variables.c

src/broadcom/compiler/vir_opt_constant_alu.c

src/broadcom/compiler/vir_opt_copy_propagate.c

src/broadcom/compiler/vir_opt_dead_code.c

src/broadcom/compiler/vir_opt_redundant_flags.c

src/broadcom/compiler/vir_opt_small_immediates.c

src/broadcom/compiler/vir_register_allocate.c

src/broadcom/compiler/vir_to_qpu.c

src/broadcom/drm-shim

src/broadcom/drm-shim/README.md

src/broadcom/drm-shim/meson.build

src/broadcom/drm-shim/v3d_noop.c

src/broadcom/drm-shim/vc4_noop.c

src/broadcom/meson.build

src/broadcom/qpu

src/broadcom/qpu/meson.build

src/broadcom/qpu/qpu_disasm.c

src/broadcom/qpu/qpu_disasm.h

src/broadcom/qpu/qpu_instr.c

src/broadcom/qpu/qpu_instr.h

src/broadcom/qpu/qpu_pack.c

src/broadcom/qpu/qpu_validate.c

src/broadcom/qpu/tests

src/broadcom/qpu/tests/qpu_disasm.c

src/broadcom/simulator

src/broadcom/simulator/meson.build

src/broadcom/simulator/v3d_simulator.c

src/broadcom/simulator/v3d_simulator.h

src/broadcom/simulator/v3d_simulator_wrapper.cpp

src/broadcom/simulator/v3d_simulator_wrapper.h

src/broadcom/simulator/v3dx_simulator.c

src/broadcom/simulator/v3dx_simulator.h

src/broadcom/vulkan

src/broadcom/vulkan/.dir-locals.el

src/broadcom/vulkan/.editorconfig

src/broadcom/vulkan/meson.build

src/broadcom/vulkan/v3dv_android.c

src/broadcom/vulkan/v3dv_bo.c

src/broadcom/vulkan/v3dv_bo.h

src/broadcom/vulkan/v3dv_cl.c

src/broadcom/vulkan/v3dv_cl.h

src/broadcom/vulkan/v3dv_cmd_buffer.c

src/broadcom/vulkan/v3dv_debug.c

src/broadcom/vulkan/v3dv_debug.h

src/broadcom/vulkan/v3dv_descriptor_set.c

src/broadcom/vulkan/v3dv_device.c

src/broadcom/vulkan/v3dv_formats.c

src/broadcom/vulkan/v3dv_image.c

src/broadcom/vulkan/v3dv_limits.h

src/broadcom/vulkan/v3dv_meta_clear.c

src/broadcom/vulkan/v3dv_meta_common.h

src/broadcom/vulkan/v3dv_meta_copy.c

src/broadcom/vulkan/v3dv_pass.c

src/broadcom/vulkan/v3dv_pipeline.c

src/broadcom/vulkan/v3dv_pipeline_cache.c

src/broadcom/vulkan/v3dv_private.h

src/broadcom/vulkan/v3dv_query.c

src/broadcom/vulkan/v3dv_queue.c

src/broadcom/vulkan/v3dv_uniforms.c

src/broadcom/vulkan/v3dv_wsi.c

src/broadcom/vulkan/v3dvx_cmd_buffer.c

src/broadcom/vulkan/v3dvx_descriptor_set.c

src/broadcom/vulkan/v3dvx_device.c

src/broadcom/vulkan/v3dvx_formats.c

src/broadcom/vulkan/v3dvx_image.c

src/broadcom/vulkan/v3dvx_meta_common.c

src/broadcom/vulkan/v3dvx_pipeline.c

src/broadcom/vulkan/v3dvx_private.h

src/broadcom/vulkan/v3dvx_queue.c

src/compiler

src/compiler/builtin_type_macros.h

src/compiler/clc

src/compiler/clc/clc.c

src/compiler/clc/clc.h

src/compiler/clc/clc_helpers.cpp

src/compiler/clc/clc_helpers.h

src/compiler/clc/meson.build

src/compiler/glsl

src/compiler/glsl/README

src/compiler/glsl/TODO

src/compiler/glsl/ast.h

src/compiler/glsl/ast_array_index.cpp

src/compiler/glsl/ast_expr.cpp

src/compiler/glsl/ast_function.cpp

src/compiler/glsl/ast_to_hir.cpp

src/compiler/glsl/ast_type.cpp

src/compiler/glsl/builtin_functions.cpp

src/compiler/glsl/builtin_functions.h

src/compiler/glsl/builtin_int64.h

src/compiler/glsl/builtin_types.cpp

src/compiler/glsl/builtin_variables.cpp

src/compiler/glsl/float64.glsl

src/compiler/glsl/generate_ir.cpp

src/compiler/glsl/gl_nir.h

src/compiler/glsl/gl_nir_link_atomics.c

src/compiler/glsl/gl_nir_link_uniform_blocks.c

src/compiler/glsl/gl_nir_link_uniform_initializers.c

src/compiler/glsl/gl_nir_link_uniforms.c

src/compiler/glsl/gl_nir_link_xfb.c

src/compiler/glsl/gl_nir_linker.c

src/compiler/glsl/gl_nir_linker.h

src/compiler/glsl/gl_nir_lower_atomics.c

src/compiler/glsl/gl_nir_lower_buffers.c

src/compiler/glsl/gl_nir_lower_images.c

src/compiler/glsl/gl_nir_lower_samplers.c

src/compiler/glsl/gl_nir_lower_samplers_as_deref.c

src/compiler/glsl/glcpp

src/compiler/glsl/glcpp/README

src/compiler/glsl/glcpp/glcpp-lex.l

src/compiler/glsl/glcpp/glcpp-parse.y

src/compiler/glsl/glcpp/glcpp.c

src/compiler/glsl/glcpp/glcpp.h

src/compiler/glsl/glcpp/meson.build

src/compiler/glsl/glcpp/pp.c

src/compiler/glsl/glcpp/pp_standalone_scaffolding.c

src/compiler/glsl/glcpp/pp_standalone_scaffolding.h

src/compiler/glsl/glcpp/tests

src/compiler/glsl/glcpp/tests/000-content-with-spaces.c

src/compiler/glsl/glcpp/tests/000-content-with-spaces.c.expected

src/compiler/glsl/glcpp/tests/001-define.c

src/compiler/glsl/glcpp/tests/001-define.c.expected

src/compiler/glsl/glcpp/tests/002-define-chain.c

src/compiler/glsl/glcpp/tests/002-define-chain.c.expected

src/compiler/glsl/glcpp/tests/003-define-chain-reverse.c

src/compiler/glsl/glcpp/tests/003-define-chain-reverse.c.expected

src/compiler/glsl/glcpp/tests/004-define-recursive.c

src/compiler/glsl/glcpp/tests/004-define-recursive.c.expected

src/compiler/glsl/glcpp/tests/005-define-composite-chain.c

src/compiler/glsl/glcpp/tests/005-define-composite-chain.c.expected

src/compiler/glsl/glcpp/tests/006-define-composite-chain-reverse.c

src/compiler/glsl/glcpp/tests/006-define-composite-chain-reverse.c.expected

src/compiler/glsl/glcpp/tests/007-define-composite-recursive.c

src/compiler/glsl/glcpp/tests/007-define-composite-recursive.c.expected

src/compiler/glsl/glcpp/tests/008-define-empty.c

src/compiler/glsl/glcpp/tests/008-define-empty.c.expected

src/compiler/glsl/glcpp/tests/009-undef.c

src/compiler/glsl/glcpp/tests/009-undef.c.expected

src/compiler/glsl/glcpp/tests/010-undef-re-define.c

src/compiler/glsl/glcpp/tests/010-undef-re-define.c.expected

src/compiler/glsl/glcpp/tests/011-define-func-empty.c

src/compiler/glsl/glcpp/tests/011-define-func-empty.c.expected

src/compiler/glsl/glcpp/tests/012-define-func-no-args.c

src/compiler/glsl/glcpp/tests/012-define-func-no-args.c.expected

src/compiler/glsl/glcpp/tests/013-define-func-1-arg-unused.c

src/compiler/glsl/glcpp/tests/013-define-func-1-arg-unused.c.expected

src/compiler/glsl/glcpp/tests/014-define-func-2-arg-unused.c

src/compiler/glsl/glcpp/tests/014-define-func-2-arg-unused.c.expected

src/compiler/glsl/glcpp/tests/015-define-object-with-parens.c

src/compiler/glsl/glcpp/tests/015-define-object-with-parens.c.expected

src/compiler/glsl/glcpp/tests/016-define-func-1-arg.c

src/compiler/glsl/glcpp/tests/016-define-func-1-arg.c.expected

src/compiler/glsl/glcpp/tests/017-define-func-2-args.c

src/compiler/glsl/glcpp/tests/017-define-func-2-args.c.expected

src/compiler/glsl/glcpp/tests/018-define-func-macro-as-parameter.c

src/compiler/glsl/glcpp/tests/018-define-func-macro-as-parameter.c.expected

src/compiler/glsl/glcpp/tests/019-define-func-1-arg-multi.c

src/compiler/glsl/glcpp/tests/019-define-func-1-arg-multi.c.expected

src/compiler/glsl/glcpp/tests/020-define-func-2-arg-multi.c

src/compiler/glsl/glcpp/tests/020-define-func-2-arg-multi.c.expected

src/compiler/glsl/glcpp/tests/021-define-func-compose.c

src/compiler/glsl/glcpp/tests/021-define-func-compose.c.expected

src/compiler/glsl/glcpp/tests/022-define-func-arg-with-parens.c

src/compiler/glsl/glcpp/tests/022-define-func-arg-with-parens.c.expected

src/compiler/glsl/glcpp/tests/023-define-extra-whitespace.c

src/compiler/glsl/glcpp/tests/023-define-extra-whitespace.c.expected

src/compiler/glsl/glcpp/tests/024-define-chain-to-self-recursion.c

src/compiler/glsl/glcpp/tests/024-define-chain-to-self-recursion.c.expected

src/compiler/glsl/glcpp/tests/025-func-macro-as-non-macro.c

src/compiler/glsl/glcpp/tests/025-func-macro-as-non-macro.c.expected

src/compiler/glsl/glcpp/tests/026-define-func-extra-newlines.c

src/compiler/glsl/glcpp/tests/026-define-func-extra-newlines.c.expected

src/compiler/glsl/glcpp/tests/027-define-chain-obj-to-func.c

src/compiler/glsl/glcpp/tests/027-define-chain-obj-to-func.c.expected

src/compiler/glsl/glcpp/tests/028-define-chain-obj-to-non-func.c

src/compiler/glsl/glcpp/tests/028-define-chain-obj-to-non-func.c.expected

src/compiler/glsl/glcpp/tests/029-define-chain-obj-to-func-with-args.c

src/compiler/glsl/glcpp/tests/029-define-chain-obj-to-func-with-args.c.expected

src/compiler/glsl/glcpp/tests/030-define-chain-obj-to-func-compose.c

src/compiler/glsl/glcpp/tests/030-define-chain-obj-to-func-compose.c.expected

src/compiler/glsl/glcpp/tests/031-define-chain-func-to-func-compose.c

src/compiler/glsl/glcpp/tests/031-define-chain-func-to-func-compose.c.expected

src/compiler/glsl/glcpp/tests/032-define-func-self-recurse.c

src/compiler/glsl/glcpp/tests/032-define-func-self-recurse.c.expected

src/compiler/glsl/glcpp/tests/033-define-func-self-compose.c

src/compiler/glsl/glcpp/tests/033-define-func-self-compose.c.expected

src/compiler/glsl/glcpp/tests/034-define-func-self-compose-non-func.c

src/compiler/glsl/glcpp/tests/034-define-func-self-compose-non-func.c.expected

src/compiler/glsl/glcpp/tests/035-define-func-self-compose-non-func-multi-token-argument.c

src/compiler/glsl/glcpp/tests/035-define-func-self-compose-non-func-multi-token-argument.c.expected

src/compiler/glsl/glcpp/tests/036-define-func-non-macro-multi-token-argument.c

src/compiler/glsl/glcpp/tests/036-define-func-non-macro-multi-token-argument.c.expected

src/compiler/glsl/glcpp/tests/037-finalize-unexpanded-macro.c

src/compiler/glsl/glcpp/tests/037-finalize-unexpanded-macro.c.expected

src/compiler/glsl/glcpp/tests/038-func-arg-with-commas.c

src/compiler/glsl/glcpp/tests/038-func-arg-with-commas.c.expected

src/compiler/glsl/glcpp/tests/039-func-arg-obj-macro-with-comma.c

src/compiler/glsl/glcpp/tests/039-func-arg-obj-macro-with-comma.c.expected

src/compiler/glsl/glcpp/tests/040-token-pasting.c

src/compiler/glsl/glcpp/tests/040-token-pasting.c.expected

src/compiler/glsl/glcpp/tests/041-if-0.c

src/compiler/glsl/glcpp/tests/041-if-0.c.expected

src/compiler/glsl/glcpp/tests/042-if-1.c

src/compiler/glsl/glcpp/tests/042-if-1.c.expected

src/compiler/glsl/glcpp/tests/043-if-0-else.c

src/compiler/glsl/glcpp/tests/043-if-0-else.c.expected

src/compiler/glsl/glcpp/tests/044-if-1-else.c

src/compiler/glsl/glcpp/tests/044-if-1-else.c.expected

src/compiler/glsl/glcpp/tests/045-if-0-elif.c

src/compiler/glsl/glcpp/tests/045-if-0-elif.c.expected

src/compiler/glsl/glcpp/tests/046-if-1-elsif.c

src/compiler/glsl/glcpp/tests/046-if-1-elsif.c.expected

src/compiler/glsl/glcpp/tests/047-if-elif-else.c

src/compiler/glsl/glcpp/tests/047-if-elif-else.c.expected

src/compiler/glsl/glcpp/tests/048-if-nested.c

src/compiler/glsl/glcpp/tests/048-if-nested.c.expected

src/compiler/glsl/glcpp/tests/049-if-expression-precedence.c

src/compiler/glsl/glcpp/tests/049-if-expression-precedence.c.expected

src/compiler/glsl/glcpp/tests/050-if-defined.c

src/compiler/glsl/glcpp/tests/050-if-defined.c.expected

src/compiler/glsl/glcpp/tests/051-if-relational.c

src/compiler/glsl/glcpp/tests/051-if-relational.c.expected

src/compiler/glsl/glcpp/tests/052-if-bitwise.c

src/compiler/glsl/glcpp/tests/052-if-bitwise.c.expected

src/compiler/glsl/glcpp/tests/053-if-divide-and-shift.c

src/compiler/glsl/glcpp/tests/053-if-divide-and-shift.c.expected

src/compiler/glsl/glcpp/tests/054-if-with-macros.c

src/compiler/glsl/glcpp/tests/054-if-with-macros.c.expected

src/compiler/glsl/glcpp/tests/055-define-chain-obj-to-func-parens-in-text.c

src/compiler/glsl/glcpp/tests/055-define-chain-obj-to-func-parens-in-text.c.expected

src/compiler/glsl/glcpp/tests/056-macro-argument-with-comma.c

src/compiler/glsl/glcpp/tests/056-macro-argument-with-comma.c.expected

src/compiler/glsl/glcpp/tests/057-empty-arguments.c

src/compiler/glsl/glcpp/tests/057-empty-arguments.c.expected

src/compiler/glsl/glcpp/tests/058-token-pasting-empty-arguments.c

src/compiler/glsl/glcpp/tests/058-token-pasting-empty-arguments.c.expected

src/compiler/glsl/glcpp/tests/059-token-pasting-integer.c

src/compiler/glsl/glcpp/tests/059-token-pasting-integer.c.expected

src/compiler/glsl/glcpp/tests/060-left-paren-in-macro-right-paren-in-text.c

src/compiler/glsl/glcpp/tests/060-left-paren-in-macro-right-paren-in-text.c.expected

src/compiler/glsl/glcpp/tests/061-define-chain-obj-to-func-multi.c

src/compiler/glsl/glcpp/tests/061-define-chain-obj-to-func-multi.c.expected

src/compiler/glsl/glcpp/tests/062-if-0-skips-garbage.c

src/compiler/glsl/glcpp/tests/062-if-0-skips-garbage.c.expected

src/compiler/glsl/glcpp/tests/063-comments.c

src/compiler/glsl/glcpp/tests/063-comments.c.expected

src/compiler/glsl/glcpp/tests/064-version.c

src/compiler/glsl/glcpp/tests/064-version.c.expected

src/compiler/glsl/glcpp/tests/065-if-defined-parens.c

src/compiler/glsl/glcpp/tests/065-if-defined-parens.c.expected

src/compiler/glsl/glcpp/tests/066-if-nospace-expression.c

src/compiler/glsl/glcpp/tests/066-if-nospace-expression.c.expected

src/compiler/glsl/glcpp/tests/067-nested-ifdef-ifndef.c

src/compiler/glsl/glcpp/tests/067-nested-ifdef-ifndef.c.expected

src/compiler/glsl/glcpp/tests/068-accidental-pasting.c

src/compiler/glsl/glcpp/tests/068-accidental-pasting.c.expected

src/compiler/glsl/glcpp/tests/069-repeated-argument.c

src/compiler/glsl/glcpp/tests/069-repeated-argument.c.expected

src/compiler/glsl/glcpp/tests/070-undefined-macro-in-expression.c

src/compiler/glsl/glcpp/tests/070-undefined-macro-in-expression.c.expected

src/compiler/glsl/glcpp/tests/071-punctuator.c

src/compiler/glsl/glcpp/tests/071-punctuator.c.expected

src/compiler/glsl/glcpp/tests/072-token-pasting-same-line.c

src/compiler/glsl/glcpp/tests/072-token-pasting-same-line.c.expected

src/compiler/glsl/glcpp/tests/073-if-in-ifdef.c

src/compiler/glsl/glcpp/tests/073-if-in-ifdef.c.expected

src/compiler/glsl/glcpp/tests/074-elif-undef.c

src/compiler/glsl/glcpp/tests/074-elif-undef.c.expected

src/compiler/glsl/glcpp/tests/075-elif-elif-undef.c

src/compiler/glsl/glcpp/tests/075-elif-elif-undef.c.expected

src/compiler/glsl/glcpp/tests/076-elif-undef-nested.c

src/compiler/glsl/glcpp/tests/076-elif-undef-nested.c.expected

src/compiler/glsl/glcpp/tests/077-else-without-if.c

src/compiler/glsl/glcpp/tests/077-else-without-if.c.expected

src/compiler/glsl/glcpp/tests/078-elif-without-if.c

src/compiler/glsl/glcpp/tests/078-elif-without-if.c.expected

src/compiler/glsl/glcpp/tests/079-endif-without-if.c

src/compiler/glsl/glcpp/tests/079-endif-without-if.c.expected

src/compiler/glsl/glcpp/tests/080-if-without-expression.c

src/compiler/glsl/glcpp/tests/080-if-without-expression.c.expected

src/compiler/glsl/glcpp/tests/081-elif-without-expression.c

src/compiler/glsl/glcpp/tests/081-elif-without-expression.c.expected

src/compiler/glsl/glcpp/tests/082-invalid-paste.c

src/compiler/glsl/glcpp/tests/082-invalid-paste.c.expected

src/compiler/glsl/glcpp/tests/083-unterminated-if.c

src/compiler/glsl/glcpp/tests/083-unterminated-if.c.expected

src/compiler/glsl/glcpp/tests/084-unbalanced-parentheses.c

src/compiler/glsl/glcpp/tests/084-unbalanced-parentheses.c.expected

src/compiler/glsl/glcpp/tests/085-incorrect-argument-count.c

src/compiler/glsl/glcpp/tests/085-incorrect-argument-count.c.expected

src/compiler/glsl/glcpp/tests/086-reserved-macro-names.c

src/compiler/glsl/glcpp/tests/086-reserved-macro-names.c.expected

src/compiler/glsl/glcpp/tests/087-if-comments.c

src/compiler/glsl/glcpp/tests/087-if-comments.c.expected

src/compiler/glsl/glcpp/tests/088-redefine-macro-legitimate.c

src/compiler/glsl/glcpp/tests/088-redefine-macro-legitimate.c.expected

src/compiler/glsl/glcpp/tests/089-redefine-macro-error.c

src/compiler/glsl/glcpp/tests/089-redefine-macro-error.c.expected

src/compiler/glsl/glcpp/tests/090-hash-error.c

src/compiler/glsl/glcpp/tests/090-hash-error.c.expected

src/compiler/glsl/glcpp/tests/091-hash-line.c

src/compiler/glsl/glcpp/tests/091-hash-line.c.expected

src/compiler/glsl/glcpp/tests/092-redefine-macro-error-2.c

src/compiler/glsl/glcpp/tests/092-redefine-macro-error-2.c.expected

src/compiler/glsl/glcpp/tests/093-divide-by-zero.c

src/compiler/glsl/glcpp/tests/093-divide-by-zero.c.expected

src/compiler/glsl/glcpp/tests/094-divide-by-zero-short-circuit.c

src/compiler/glsl/glcpp/tests/094-divide-by-zero-short-circuit.c.expected

src/compiler/glsl/glcpp/tests/095-recursive-define.c

src/compiler/glsl/glcpp/tests/095-recursive-define.c.expected

src/compiler/glsl/glcpp/tests/096-paste-twice.c

src/compiler/glsl/glcpp/tests/096-paste-twice.c.expected

src/compiler/glsl/glcpp/tests/097-paste-with-non-function-macro.c

src/compiler/glsl/glcpp/tests/097-paste-with-non-function-macro.c.expected

src/compiler/glsl/glcpp/tests/098-elif-undefined.c

src/compiler/glsl/glcpp/tests/098-elif-undefined.c.expected

src/compiler/glsl/glcpp/tests/099-c99-example.c

src/compiler/glsl/glcpp/tests/099-c99-example.c.expected

src/compiler/glsl/glcpp/tests/100-macro-with-colon.c

src/compiler/glsl/glcpp/tests/100-macro-with-colon.c.expected

src/compiler/glsl/glcpp/tests/101-macros-used-twice.c

src/compiler/glsl/glcpp/tests/101-macros-used-twice.c.expected

src/compiler/glsl/glcpp/tests/102-garbage-after-endif.c

src/compiler/glsl/glcpp/tests/102-garbage-after-endif.c.expected

src/compiler/glsl/glcpp/tests/103-garbage-after-else-0.c

src/compiler/glsl/glcpp/tests/103-garbage-after-else-0.c.expected

src/compiler/glsl/glcpp/tests/104-hash-line-followed-by-code.c

src/compiler/glsl/glcpp/tests/104-hash-line-followed-by-code.c.expected

src/compiler/glsl/glcpp/tests/105-multiline-hash-line.c

src/compiler/glsl/glcpp/tests/105-multiline-hash-line.c.expected

src/compiler/glsl/glcpp/tests/106-multiline-hash-if.c

src/compiler/glsl/glcpp/tests/106-multiline-hash-if.c.expected

src/compiler/glsl/glcpp/tests/107-multiline-hash-elif.c

src/compiler/glsl/glcpp/tests/107-multiline-hash-elif.c.expected

src/compiler/glsl/glcpp/tests/108-no-space-after-hash-version.c

src/compiler/glsl/glcpp/tests/108-no-space-after-hash-version.c.expected

src/compiler/glsl/glcpp/tests/109-no-space-after-hash-line.c

src/compiler/glsl/glcpp/tests/109-no-space-after-hash-line.c.expected

src/compiler/glsl/glcpp/tests/110-no-space-digits-after-hash-elif.c

src/compiler/glsl/glcpp/tests/110-no-space-digits-after-hash-elif.c.expected

src/compiler/glsl/glcpp/tests/111-no-space-operator-after-hash-if.c

src/compiler/glsl/glcpp/tests/111-no-space-operator-after-hash-if.c.expected

src/compiler/glsl/glcpp/tests/112-no-space-operator-after-hash-elif.c

src/compiler/glsl/glcpp/tests/112-no-space-operator-after-hash-elif.c.expected

src/compiler/glsl/glcpp/tests/113-line-and-file-macros.c

src/compiler/glsl/glcpp/tests/113-line-and-file-macros.c.expected

src/compiler/glsl/glcpp/tests/114-paste-integer-tokens.c

src/compiler/glsl/glcpp/tests/114-paste-integer-tokens.c.expected

src/compiler/glsl/glcpp/tests/115-line-continuations.c

src/compiler/glsl/glcpp/tests/115-line-continuations.c.expected

src/compiler/glsl/glcpp/tests/116-disable-line-continuations.c

src/compiler/glsl/glcpp/tests/116-disable-line-continuations.c.expected

src/compiler/glsl/glcpp/tests/117-line-continuation-and-non-continuation-backslash.c

src/compiler/glsl/glcpp/tests/117-line-continuation-and-non-continuation-backslash.c.expected

src/compiler/glsl/glcpp/tests/118-comment-becomes-space.c

src/compiler/glsl/glcpp/tests/118-comment-becomes-space.c.expected

src/compiler/glsl/glcpp/tests/119-elif-after-else.c

src/compiler/glsl/glcpp/tests/119-elif-after-else.c.expected

src/compiler/glsl/glcpp/tests/120-undef-builtin.c

src/compiler/glsl/glcpp/tests/120-undef-builtin.c.expected

src/compiler/glsl/glcpp/tests/121-comment-bug-72686.c

src/compiler/glsl/glcpp/tests/121-comment-bug-72686.c.expected

src/compiler/glsl/glcpp/tests/122-redefine-whitespace.c

src/compiler/glsl/glcpp/tests/122-redefine-whitespace.c.expected

src/compiler/glsl/glcpp/tests/123-garbage-after-else-1.c

src/compiler/glsl/glcpp/tests/123-garbage-after-else-1.c.expected

src/compiler/glsl/glcpp/tests/124-preprocessing-numbers.c

src/compiler/glsl/glcpp/tests/124-preprocessing-numbers.c.expected

src/compiler/glsl/glcpp/tests/125-es-short-circuit-undefined.c

src/compiler/glsl/glcpp/tests/125-es-short-circuit-undefined.c.expected

src/compiler/glsl/glcpp/tests/126-garbage-after-directive.c

src/compiler/glsl/glcpp/tests/126-garbage-after-directive.c.expected

src/compiler/glsl/glcpp/tests/127-pragma-empty.c

src/compiler/glsl/glcpp/tests/127-pragma-empty.c.expected

src/compiler/glsl/glcpp/tests/128-space-before-hash.c

src/compiler/glsl/glcpp/tests/128-space-before-hash.c.expected

src/compiler/glsl/glcpp/tests/129-define-non-identifier.c

src/compiler/glsl/glcpp/tests/129-define-non-identifier.c.expected

src/compiler/glsl/glcpp/tests/130-define-comment.c

src/compiler/glsl/glcpp/tests/130-define-comment.c.expected

src/compiler/glsl/glcpp/tests/131-eof-without-newline.c

src/compiler/glsl/glcpp/tests/131-eof-without-newline.c.expected

src/compiler/glsl/glcpp/tests/132-eof-without-newline-define.c

src/compiler/glsl/glcpp/tests/132-eof-without-newline-define.c.expected

src/compiler/glsl/glcpp/tests/133-eof-without-newline-comment.c

src/compiler/glsl/glcpp/tests/133-eof-without-newline-comment.c.expected

src/compiler/glsl/glcpp/tests/134-hash-comment-directive.c

src/compiler/glsl/glcpp/tests/134-hash-comment-directive.c.expected

src/compiler/glsl/glcpp/tests/135-duplicate-parameter.c

src/compiler/glsl/glcpp/tests/135-duplicate-parameter.c.expected

src/compiler/glsl/glcpp/tests/136-plus-plus-and-minus-minus.c

src/compiler/glsl/glcpp/tests/136-plus-plus-and-minus-minus.c.expected

src/compiler/glsl/glcpp/tests/137-expand-macro-after-period.c

src/compiler/glsl/glcpp/tests/137-expand-macro-after-period.c.expected

src/compiler/glsl/glcpp/tests/138-multi-line-comment-in-if-0.c

src/compiler/glsl/glcpp/tests/138-multi-line-comment-in-if-0.c.expected

src/compiler/glsl/glcpp/tests/139-define-without-macro-name.c

src/compiler/glsl/glcpp/tests/139-define-without-macro-name.c.expected

src/compiler/glsl/glcpp/tests/140-null-directive.c

src/compiler/glsl/glcpp/tests/140-null-directive.c.expected

src/compiler/glsl/glcpp/tests/141-pragma-and-__LINE__.c

src/compiler/glsl/glcpp/tests/141-pragma-and-__LINE__.c.expected

src/compiler/glsl/glcpp/tests/142-defined-within-macro.c

src/compiler/glsl/glcpp/tests/142-defined-within-macro.c.expected

src/compiler/glsl/glcpp/tests/143-multiple-else.c

src/compiler/glsl/glcpp/tests/143-multiple-else.c.expected

src/compiler/glsl/glcpp/tests/144-implicit-version.c

src/compiler/glsl/glcpp/tests/144-implicit-version.c.expected

src/compiler/glsl/glcpp/tests/145-version-first.c

src/compiler/glsl/glcpp/tests/145-version-first.c.expected

src/compiler/glsl/glcpp/tests/146-version-first-hash.c

src/compiler/glsl/glcpp/tests/146-version-first-hash.c.expected

src/compiler/glsl/glcpp/tests/147-define-macro-no-space.c

src/compiler/glsl/glcpp/tests/147-define-macro-no-space.c.expected

src/compiler/glsl/glcpp/tests/147-undef-builtin-allowed.c

src/compiler/glsl/glcpp/tests/147-undef-builtin-allowed.c.expected

src/compiler/glsl/glcpp/tests/148-legal-characters.c

src/compiler/glsl/glcpp/tests/148-legal-characters.c.expected

src/compiler/glsl/glcpp/tests/149-hex-const-uppercase-prefix.c

src/compiler/glsl/glcpp/tests/149-hex-const-uppercase-prefix.c.expected

src/compiler/glsl/glcpp/tests/glcpp_test.py

src/compiler/glsl/glsl_lexer.ll

src/compiler/glsl/glsl_parser.yy

src/compiler/glsl/glsl_parser_extras.cpp

src/compiler/glsl/glsl_parser_extras.h

src/compiler/glsl/glsl_symbol_table.cpp

src/compiler/glsl/glsl_symbol_table.h

src/compiler/glsl/glsl_to_nir.cpp

src/compiler/glsl/glsl_to_nir.h

src/compiler/glsl/hir_field_selection.cpp

src/compiler/glsl/int64.glsl

src/compiler/glsl/ir.cpp

src/compiler/glsl/ir.h

src/compiler/glsl/ir_array_refcount.cpp

src/compiler/glsl/ir_array_refcount.h

src/compiler/glsl/ir_basic_block.cpp

src/compiler/glsl/ir_basic_block.h

src/compiler/glsl/ir_builder.cpp

src/compiler/glsl/ir_builder.h

src/compiler/glsl/ir_builder_print_visitor.cpp

src/compiler/glsl/ir_builder_print_visitor.h

src/compiler/glsl/ir_clone.cpp

src/compiler/glsl/ir_constant_expression.cpp

src/compiler/glsl/ir_equals.cpp

src/compiler/glsl/ir_expression_flattening.cpp

src/compiler/glsl/ir_expression_flattening.h

src/compiler/glsl/ir_expression_operation.py

src/compiler/glsl/ir_function.cpp

src/compiler/glsl/ir_function_can_inline.cpp

src/compiler/glsl/ir_function_detect_recursion.cpp

src/compiler/glsl/ir_function_inlining.h

src/compiler/glsl/ir_hierarchical_visitor.cpp

src/compiler/glsl/ir_hierarchical_visitor.h

src/compiler/glsl/ir_hv_accept.cpp

src/compiler/glsl/ir_optimization.h

src/compiler/glsl/ir_print_visitor.cpp

src/compiler/glsl/ir_print_visitor.h

src/compiler/glsl/ir_reader.cpp

src/compiler/glsl/ir_reader.h

src/compiler/glsl/ir_rvalue_visitor.cpp

src/compiler/glsl/ir_rvalue_visitor.h

src/compiler/glsl/ir_set_program_inouts.cpp

src/compiler/glsl/ir_uniform.h

src/compiler/glsl/ir_validate.cpp

src/compiler/glsl/ir_variable_refcount.cpp

src/compiler/glsl/ir_variable_refcount.h

src/compiler/glsl/ir_visitor.h

src/compiler/glsl/link_atomics.cpp

src/compiler/glsl/link_functions.cpp

src/compiler/glsl/link_interface_blocks.cpp

src/compiler/glsl/link_uniform_block_active_visitor.cpp

src/compiler/glsl/link_uniform_block_active_visitor.h

src/compiler/glsl/link_uniform_blocks.cpp

src/compiler/glsl/link_uniform_initializers.cpp

src/compiler/glsl/link_uniforms.cpp

src/compiler/glsl/link_varyings.cpp

src/compiler/glsl/link_varyings.h

src/compiler/glsl/linker.cpp

src/compiler/glsl/linker.h

src/compiler/glsl/linker_util.cpp

src/compiler/glsl/linker_util.h

src/compiler/glsl/list.h

src/compiler/glsl/loop_analysis.cpp

src/compiler/glsl/loop_analysis.h

src/compiler/glsl/loop_unroll.cpp

src/compiler/glsl/lower_blend_equation_advanced.cpp

src/compiler/glsl/lower_buffer_access.cpp

src/compiler/glsl/lower_buffer_access.h

src/compiler/glsl/lower_builtins.cpp

src/compiler/glsl/lower_const_arrays_to_uniforms.cpp

src/compiler/glsl/lower_cs_derived.cpp

src/compiler/glsl/lower_discard.cpp

src/compiler/glsl/lower_discard_flow.cpp

src/compiler/glsl/lower_distance.cpp

src/compiler/glsl/lower_if_to_cond_assign.cpp

src/compiler/glsl/lower_instructions.cpp

src/compiler/glsl/lower_int64.cpp

src/compiler/glsl/lower_jumps.cpp

src/compiler/glsl/lower_mat_op_to_vec.cpp

src/compiler/glsl/lower_named_interface_blocks.cpp

src/compiler/glsl/lower_offset_array.cpp

src/compiler/glsl/lower_output_reads.cpp

src/compiler/glsl/lower_packed_varyings.cpp

src/compiler/glsl/lower_packing_builtins.cpp

src/compiler/glsl/lower_precision.cpp

src/compiler/glsl/lower_shared_reference.cpp

src/compiler/glsl/lower_subroutine.cpp

src/compiler/glsl/lower_tess_level.cpp

src/compiler/glsl/lower_ubo_reference.cpp

src/compiler/glsl/lower_variable_index_to_cond_assign.cpp

src/compiler/glsl/lower_vec_index_to_cond_assign.cpp

src/compiler/glsl/lower_vec_index_to_swizzle.cpp

src/compiler/glsl/lower_vector.cpp

src/compiler/glsl/lower_vector_derefs.cpp

src/compiler/glsl/lower_vector_insert.cpp

src/compiler/glsl/lower_vertex_id.cpp

src/compiler/glsl/lower_xfb_varying.cpp

src/compiler/glsl/main.cpp

src/compiler/glsl/meson.build

src/compiler/glsl/opt_add_neg_to_sub.h

src/compiler/glsl/opt_algebraic.cpp

src/compiler/glsl/opt_array_splitting.cpp

src/compiler/glsl/opt_conditional_discard.cpp

src/compiler/glsl/opt_constant_folding.cpp

src/compiler/glsl/opt_constant_propagation.cpp

src/compiler/glsl/opt_constant_variable.cpp

src/compiler/glsl/opt_copy_propagation_elements.cpp

src/compiler/glsl/opt_dead_builtin_variables.cpp

src/compiler/glsl/opt_dead_builtin_varyings.cpp

src/compiler/glsl/opt_dead_code.cpp

src/compiler/glsl/opt_dead_code_local.cpp

src/compiler/glsl/opt_dead_functions.cpp

src/compiler/glsl/opt_flatten_nested_if_blocks.cpp

src/compiler/glsl/opt_flip_matrices.cpp

src/compiler/glsl/opt_function_inlining.cpp

src/compiler/glsl/opt_if_simplification.cpp

src/compiler/glsl/opt_minmax.cpp

src/compiler/glsl/opt_rebalance_tree.cpp

src/compiler/glsl/opt_structure_splitting.cpp

src/compiler/glsl/opt_swizzle.cpp

src/compiler/glsl/opt_tree_grafting.cpp

src/compiler/glsl/program.h

src/compiler/glsl/propagate_invariance.cpp

src/compiler/glsl/s_expression.cpp

src/compiler/glsl/s_expression.h

src/compiler/glsl/serialize.cpp

src/compiler/glsl/serialize.h

src/compiler/glsl/shader_cache.cpp

src/compiler/glsl/shader_cache.h

src/compiler/glsl/standalone.cpp

src/compiler/glsl/standalone.h

src/compiler/glsl/standalone_scaffolding.cpp

src/compiler/glsl/standalone_scaffolding.h

src/compiler/glsl/string_to_uint_map.cpp

src/compiler/glsl/string_to_uint_map.h

src/compiler/glsl/test.cpp

src/compiler/glsl/test_optpass.cpp

src/compiler/glsl/test_optpass.h

src/compiler/glsl/tests

src/compiler/glsl/tests/array_refcount_test.cpp

src/compiler/glsl/tests/builtin_variable_test.cpp

src/compiler/glsl/tests/copy_constant_to_storage_tests.cpp

src/compiler/glsl/tests/general_ir_test.cpp

src/compiler/glsl/tests/invalidate_locations_test.cpp

src/compiler/glsl/tests/list_iterators.cpp

src/compiler/glsl/tests/lower_int64_test.cpp

src/compiler/glsl/tests/lower_jump_cases.py

src/compiler/glsl/tests/lower_precision_test.py

src/compiler/glsl/tests/meson.build

src/compiler/glsl/tests/opt_add_neg_to_sub_test.cpp

src/compiler/glsl/tests/optimization_test.py

src/compiler/glsl/tests/sampler_types_test.cpp

src/compiler/glsl/tests/set_uniform_initializer_tests.cpp

src/compiler/glsl/tests/sexps.py

src/compiler/glsl/tests/standalone_dump-builder.frag

src/compiler/glsl/tests/uniform_initializer_utils.cpp

src/compiler/glsl/tests/uniform_initializer_utils.h

src/compiler/glsl/tests/varyings_test.cpp

src/compiler/glsl/tests/warnings

src/compiler/glsl/tests/warnings/000-basic-test.vert

src/compiler/glsl/tests/warnings/000-basic-test.vert.expected

src/compiler/glsl/tests/warnings/001-use-undefined-then-define.vert

src/compiler/glsl/tests/warnings/001-use-undefined-then-define.vert.expected

src/compiler/glsl/tests/warnings/002-loop.vert

src/compiler/glsl/tests/warnings/002-loop.vert.expected

src/compiler/glsl/tests/warnings/003-less.vert

src/compiler/glsl/tests/warnings/003-less.vert.expected

src/compiler/glsl/tests/warnings/004-greater.vert

src/compiler/glsl/tests/warnings/004-greater.vert.expected

src/compiler/glsl/tests/warnings/005-lequal.vert

src/compiler/glsl/tests/warnings/005-lequal.vert.expected

src/compiler/glsl/tests/warnings/006-gequal.vert

src/compiler/glsl/tests/warnings/006-gequal.vert.expected

src/compiler/glsl/tests/warnings/007-test-mod.vert

src/compiler/glsl/tests/warnings/007-test-mod.vert.expected

src/compiler/glsl/tests/warnings/008-mulassign.vert

src/compiler/glsl/tests/warnings/008-mulassign.vert.expected

src/compiler/glsl/tests/warnings/009-div-assign.vert

src/compiler/glsl/tests/warnings/009-div-assign.vert.expected

src/compiler/glsl/tests/warnings/010-add-assign.vert

src/compiler/glsl/tests/warnings/010-add-assign.vert.expected

src/compiler/glsl/tests/warnings/011-sub-assign.vert

src/compiler/glsl/tests/warnings/011-sub-assign.vert.expected

src/compiler/glsl/tests/warnings/012-modassign.vert

src/compiler/glsl/tests/warnings/012-modassign.vert.expected

src/compiler/glsl/tests/warnings/013-lsassign.vert

src/compiler/glsl/tests/warnings/013-lsassign.vert.expected

src/compiler/glsl/tests/warnings/014-rsassign.vert

src/compiler/glsl/tests/warnings/014-rsassign.vert.expected

src/compiler/glsl/tests/warnings/015-andassign.vert

src/compiler/glsl/tests/warnings/015-andassign.vert.expected

src/compiler/glsl/tests/warnings/016-orassign.vert

src/compiler/glsl/tests/warnings/016-orassign.vert.expected

src/compiler/glsl/tests/warnings/017-xorassign.vert

src/compiler/glsl/tests/warnings/017-xorassign.vert.expected

src/compiler/glsl/tests/warnings/018-bitand.vert

src/compiler/glsl/tests/warnings/018-bitand.vert.expected

src/compiler/glsl/tests/warnings/019-array.vert

src/compiler/glsl/tests/warnings/019-array.vert.expected

src/compiler/glsl/tests/warnings/020-array-length.vert

src/compiler/glsl/tests/warnings/020-array-length.vert.expected

src/compiler/glsl/tests/warnings/021-lshift.vert

src/compiler/glsl/tests/warnings/021-lshift.vert.expected

src/compiler/glsl/tests/warnings/022-rshift.vert

src/compiler/glsl/tests/warnings/022-rshift.vert.expected

src/compiler/glsl/tests/warnings/023-switch.vert

src/compiler/glsl/tests/warnings/023-switch.vert.expected

src/compiler/glsl/tests/warnings/024-shaderout.vert

src/compiler/glsl/tests/warnings/024-shaderout.vert.expected

src/compiler/glsl/tests/warnings/025-function-parameters.vert

src/compiler/glsl/tests/warnings/025-function-parameters.vert.expected

src/compiler/glsl/tests/warnings/026-out-function-parameter-shaderout.vert

src/compiler/glsl/tests/warnings/026-out-function-parameter-shaderout.vert.expected

src/compiler/glsl/tests/warnings/027-inout-function-parameter-shaderout.vert

src/compiler/glsl/tests/warnings/027-inout-function-parameter-shaderout.vert.expected

src/compiler/glsl/tests/warnings/028-conditional.vert

src/compiler/glsl/tests/warnings/028-conditional.vert.expected

src/compiler/glsl/tests/warnings/029-fieldselection.vert

src/compiler/glsl/tests/warnings/029-fieldselection.vert.expected

src/compiler/glsl/tests/warnings/030-array-as-function-parameter.vert

src/compiler/glsl/tests/warnings/030-array-as-function-parameter.vert.expected

src/compiler/glsl/tests/warnings/031-__-in-function-name.vert

src/compiler/glsl/tests/warnings/031-__-in-function-name.vert.expected

src/compiler/glsl/tests/warnings/032-__-in-function-name-pragma-disable.vert

src/compiler/glsl/tests/warnings/032-__-in-function-name-pragma-disable.vert.expected

src/compiler/glsl/tests/warnings_test.py

src/compiler/glsl_types.cpp

src/compiler/glsl_types.h

src/compiler/isaspec

src/compiler/isaspec/README.rst

src/compiler/isaspec/decode.c

src/compiler/isaspec/decode.h

src/compiler/isaspec/decode.py

src/compiler/isaspec/encode.py

src/compiler/isaspec/isa.py

src/compiler/isaspec/meson.build

src/compiler/meson.build

src/compiler/nir

src/compiler/nir/README

src/compiler/nir/meson.build

src/compiler/nir/nir.c

src/compiler/nir/nir.h

src/compiler/nir/nir_algebraic.py

src/compiler/nir/nir_builder.c

src/compiler/nir/nir_builder.h

src/compiler/nir/nir_builder_opcodes_h.py

src/compiler/nir/nir_builtin_builder.c

src/compiler/nir/nir_builtin_builder.h

src/compiler/nir/nir_clone.c

src/compiler/nir/nir_constant_expressions.h

src/compiler/nir/nir_constant_expressions.py

src/compiler/nir/nir_control_flow.c

src/compiler/nir/nir_control_flow.h

src/compiler/nir/nir_control_flow_private.h

src/compiler/nir/nir_conversion_builder.h

src/compiler/nir/nir_convert_ycbcr.c

src/compiler/nir/nir_deref.c

src/compiler/nir/nir_deref.h

src/compiler/nir/nir_divergence_analysis.c

src/compiler/nir/nir_dominance.c

src/compiler/nir/nir_format_convert.h

src/compiler/nir/nir_from_ssa.c

src/compiler/nir/nir_gather_info.c

src/compiler/nir/nir_gather_ssa_types.c

src/compiler/nir/nir_gather_xfb_info.c

src/compiler/nir/nir_group_loads.c

src/compiler/nir/nir_gs_count_vertices.c

src/compiler/nir/nir_inline_functions.c

src/compiler/nir/nir_inline_helpers.h

src/compiler/nir/nir_inline_uniforms.c

src/compiler/nir/nir_instr_set.c

src/compiler/nir/nir_instr_set.h

src/compiler/nir/nir_intrinsics.py

src/compiler/nir/nir_intrinsics_c.py

src/compiler/nir/nir_intrinsics_h.py

src/compiler/nir/nir_intrinsics_indices_h.py

src/compiler/nir/nir_linking_helpers.c

src/compiler/nir/nir_liveness.c

src/compiler/nir/nir_loop_analyze.c

src/compiler/nir/nir_loop_analyze.h

src/compiler/nir/nir_lower_alpha_test.c

src/compiler/nir/nir_lower_alu.c

src/compiler/nir/nir_lower_alu_to_scalar.c

src/compiler/nir/nir_lower_amul.c

src/compiler/nir/nir_lower_array_deref_of_vec.c

src/compiler/nir/nir_lower_atomics_to_ssbo.c

src/compiler/nir/nir_lower_bit_size.c

src/compiler/nir/nir_lower_bitmap.c

src/compiler/nir/nir_lower_blend.c

src/compiler/nir/nir_lower_blend.h

src/compiler/nir/nir_lower_bool_to_bitsize.c

src/compiler/nir/nir_lower_bool_to_float.c

src/compiler/nir/nir_lower_bool_to_int32.c

src/compiler/nir/nir_lower_clamp_color_outputs.c

src/compiler/nir/nir_lower_clip.c

src/compiler/nir/nir_lower_clip_cull_distance_arrays.c

src/compiler/nir/nir_lower_clip_disable.c

src/compiler/nir/nir_lower_clip_halfz.c

src/compiler/nir/nir_lower_convert_alu_types.c

src/compiler/nir/nir_lower_discard_if.c

src/compiler/nir/nir_lower_discard_or_demote.c

src/compiler/nir/nir_lower_double_ops.c

src/compiler/nir/nir_lower_drawpixels.c

src/compiler/nir/nir_lower_fb_read.c

src/compiler/nir/nir_lower_flatshade.c

src/compiler/nir/nir_lower_flrp.c

src/compiler/nir/nir_lower_fp16_conv.c

src/compiler/nir/nir_lower_fragcolor.c

src/compiler/nir/nir_lower_fragcoord_wtrans.c

src/compiler/nir/nir_lower_frexp.c

src/compiler/nir/nir_lower_global_vars_to_local.c

src/compiler/nir/nir_lower_goto_ifs.c

src/compiler/nir/nir_lower_gs_intrinsics.c

src/compiler/nir/nir_lower_idiv.c

src/compiler/nir/nir_lower_image.c

src/compiler/nir/nir_lower_indirect_derefs.c

src/compiler/nir/nir_lower_input_attachments.c

src/compiler/nir/nir_lower_int64.c

src/compiler/nir/nir_lower_int_to_float.c

src/compiler/nir/nir_lower_interpolation.c

src/compiler/nir/nir_lower_io.c

src/compiler/nir/nir_lower_io_arrays_to_elements.c

src/compiler/nir/nir_lower_io_to_scalar.c

src/compiler/nir/nir_lower_io_to_temporaries.c

src/compiler/nir/nir_lower_io_to_vector.c

src/compiler/nir/nir_lower_is_helper_invocation.c

src/compiler/nir/nir_lower_load_const_to_scalar.c

src/compiler/nir/nir_lower_locals_to_regs.c

src/compiler/nir/nir_lower_mediump.c

src/compiler/nir/nir_lower_memcpy.c

src/compiler/nir/nir_lower_memory_model.c

src/compiler/nir/nir_lower_multiview.c

src/compiler/nir/nir_lower_non_uniform_access.c

src/compiler/nir/nir_lower_packing.c

src/compiler/nir/nir_lower_passthrough_edgeflags.c

src/compiler/nir/nir_lower_patch_vertices.c

src/compiler/nir/nir_lower_phis_to_scalar.c

src/compiler/nir/nir_lower_pntc_ytransform.c

src/compiler/nir/nir_lower_point_size.c

src/compiler/nir/nir_lower_point_size_mov.c

src/compiler/nir/nir_lower_printf.c

src/compiler/nir/nir_lower_readonly_images_to_tex.c

src/compiler/nir/nir_lower_regs_to_ssa.c

src/compiler/nir/nir_lower_returns.c

src/compiler/nir/nir_lower_samplers.c

src/compiler/nir/nir_lower_scratch.c

src/compiler/nir/nir_lower_shader_calls.c

src/compiler/nir/nir_lower_ssbo.c

src/compiler/nir/nir_lower_subgroups.c

src/compiler/nir/nir_lower_system_values.c

src/compiler/nir/nir_lower_sysvals_to_varyings.c

src/compiler/nir/nir_lower_tex.c

src/compiler/nir/nir_lower_tex_shadow.c

src/compiler/nir/nir_lower_texcoord_replace.c

src/compiler/nir/nir_lower_to_source_mods.c

src/compiler/nir/nir_lower_two_sided_color.c

src/compiler/nir/nir_lower_ubo_vec4.c

src/compiler/nir/nir_lower_undef_to_zero.c

src/compiler/nir/nir_lower_uniforms_to_ubo.c

src/compiler/nir/nir_lower_var_copies.c

src/compiler/nir/nir_lower_variable_initializers.c

src/compiler/nir/nir_lower_vars_to_ssa.c

src/compiler/nir/nir_lower_vec3_to_vec4.c

src/compiler/nir/nir_lower_vec_to_movs.c

src/compiler/nir/nir_lower_viewport_transform.c

src/compiler/nir/nir_lower_wpos_center.c

src/compiler/nir/nir_lower_wpos_ytransform.c

src/compiler/nir/nir_lower_wrmasks.c

src/compiler/nir/nir_metadata.c

src/compiler/nir/nir_move_vec_src_uses_to_dest.c

src/compiler/nir/nir_normalize_cubemap_coords.c

src/compiler/nir/nir_opcodes.py

src/compiler/nir/nir_opcodes_c.py

src/compiler/nir/nir_opcodes_h.py

src/compiler/nir/nir_opt_access.c

src/compiler/nir/nir_opt_algebraic.py

src/compiler/nir/nir_opt_barriers.c

src/compiler/nir/nir_opt_combine_stores.c

src/compiler/nir/nir_opt_comparison_pre.c

src/compiler/nir/nir_opt_conditional_discard.c

src/compiler/nir/nir_opt_constant_folding.c

src/compiler/nir/nir_opt_copy_prop_vars.c

src/compiler/nir/nir_opt_copy_propagate.c

src/compiler/nir/nir_opt_cse.c

src/compiler/nir/nir_opt_dce.c

src/compiler/nir/nir_opt_dead_cf.c

src/compiler/nir/nir_opt_dead_write_vars.c

src/compiler/nir/nir_opt_find_array_copies.c

src/compiler/nir/nir_opt_fragdepth.c

src/compiler/nir/nir_opt_gcm.c

src/compiler/nir/nir_opt_idiv_const.c

src/compiler/nir/nir_opt_if.c

src/compiler/nir/nir_opt_intrinsics.c

src/compiler/nir/nir_opt_large_constants.c

src/compiler/nir/nir_opt_load_store_vectorize.c

src/compiler/nir/nir_opt_loop_unroll.c

src/compiler/nir/nir_opt_memcpy.c

src/compiler/nir/nir_opt_move.c

src/compiler/nir/nir_opt_move_discards_to_top.c

src/compiler/nir/nir_opt_offsets.c

src/compiler/nir/nir_opt_peephole_select.c

src/compiler/nir/nir_opt_phi_precision.c

src/compiler/nir/nir_opt_preamble.c

src/compiler/nir/nir_opt_ray_queries.c

src/compiler/nir/nir_opt_rematerialize_compares.c

src/compiler/nir/nir_opt_remove_phis.c

src/compiler/nir/nir_opt_shrink_stores.c

src/compiler/nir/nir_opt_shrink_vectors.c

src/compiler/nir/nir_opt_sink.c

src/compiler/nir/nir_opt_trivial_continues.c

src/compiler/nir/nir_opt_undef.c

src/compiler/nir/nir_opt_uniform_atomics.c

src/compiler/nir/nir_opt_vectorize.c

src/compiler/nir/nir_phi_builder.c

src/compiler/nir/nir_phi_builder.h

src/compiler/nir/nir_print.c

src/compiler/nir/nir_propagate_invariant.c

src/compiler/nir/nir_range_analysis.c

src/compiler/nir/nir_range_analysis.h

src/compiler/nir/nir_remove_dead_variables.c

src/compiler/nir/nir_repair_ssa.c

src/compiler/nir/nir_schedule.c

src/compiler/nir/nir_schedule.h

src/compiler/nir/nir_search.c

src/compiler/nir/nir_search.h

src/compiler/nir/nir_search_helpers.h

src/compiler/nir/nir_serialize.c

src/compiler/nir/nir_serialize.h

src/compiler/nir/nir_split_per_member_structs.c

src/compiler/nir/nir_split_var_copies.c

src/compiler/nir/nir_split_vars.c

src/compiler/nir/nir_sweep.c

src/compiler/nir/nir_to_lcssa.c

src/compiler/nir/nir_validate.c

src/compiler/nir/nir_vla.h

src/compiler/nir/nir_vulkan.h

src/compiler/nir/nir_worklist.c

src/compiler/nir/nir_worklist.h

src/compiler/nir/nir_xfb_info.h

src/compiler/nir/tests

src/compiler/nir/tests/algebraic_parser_test.py

src/compiler/nir/tests/algebraic_tests.cpp

src/compiler/nir/tests/builder_tests.cpp

src/compiler/nir/tests/comparison_pre_tests.cpp

src/compiler/nir/tests/control_flow_tests.cpp

src/compiler/nir/tests/core_tests.cpp

src/compiler/nir/tests/dce_tests.cpp

src/compiler/nir/tests/load_store_vectorizer_tests.cpp

src/compiler/nir/tests/lower_returns_tests.cpp

src/compiler/nir/tests/negative_equal_tests.cpp

src/compiler/nir/tests/opt_if_tests.cpp

src/compiler/nir/tests/serialize_tests.cpp

src/compiler/nir/tests/ssa_def_bits_used_tests.cpp

src/compiler/nir/tests/vars_tests.cpp

src/compiler/nir_gl_types.h

src/compiler/nir_types.cpp

src/compiler/nir_types.h

src/compiler/shader_enums.c

src/compiler/shader_enums.h

src/compiler/shader_info.h

src/compiler/spirv

src/compiler/spirv/GLSL.ext.AMD.h

src/compiler/spirv/GLSL.std.450.h

src/compiler/spirv/OpenCL.std.h

src/compiler/spirv/gl_spirv.c

src/compiler/spirv/meson.build

src/compiler/spirv/nir_load_libclc.c

src/compiler/spirv/nir_lower_libclc.c

src/compiler/spirv/nir_spirv.h

src/compiler/spirv/spir-v.xml

src/compiler/spirv/spirv.core.grammar.json

src/compiler/spirv/spirv.h

src/compiler/spirv/spirv2nir.c

src/compiler/spirv/spirv_info.h

src/compiler/spirv/spirv_info_c.py

src/compiler/spirv/spirv_to_nir.c

src/compiler/spirv/tests

src/compiler/spirv/tests/avail_vis.cpp

src/compiler/spirv/tests/helpers.h

src/compiler/spirv/tests/volatile.cpp

src/compiler/spirv/vtn_alu.c

src/compiler/spirv/vtn_amd.c

src/compiler/spirv/vtn_cfg.c

src/compiler/spirv/vtn_gather_types_c.py

src/compiler/spirv/vtn_generator_ids_h.py

src/compiler/spirv/vtn_glsl450.c

src/compiler/spirv/vtn_opencl.c

src/compiler/spirv/vtn_private.h

src/compiler/spirv/vtn_subgroup.c

src/compiler/spirv/vtn_variables.c

src/drm-shim

src/drm-shim/README.md

src/drm-shim/device.c

src/drm-shim/drm_shim.c

src/drm-shim/drm_shim.h

src/drm-shim/meson.build

src/egl

src/egl/drivers

src/egl/drivers/dri2

src/egl/drivers/dri2/egl_dri2.c

src/egl/drivers/dri2/egl_dri2.h

src/egl/drivers/dri2/platform_android.c

src/egl/drivers/dri2/platform_android.h

src/egl/drivers/dri2/platform_android_mapper.cpp

src/egl/drivers/dri2/platform_device.c

src/egl/drivers/dri2/platform_drm.c

src/egl/drivers/dri2/platform_surfaceless.c

src/egl/drivers/dri2/platform_wayland.c

src/egl/drivers/dri2/platform_x11.c

src/egl/drivers/dri2/platform_x11_dri3.c

src/egl/drivers/dri2/platform_x11_dri3.h

src/egl/drivers/haiku

src/egl/drivers/haiku/.editorconfig

src/egl/drivers/haiku/egl_haiku.cpp

src/egl/drivers/wgl

src/egl/drivers/wgl/egl_wgl.c

src/egl/drivers/wgl/egl_wgl.h

src/egl/egl-entrypoint-check.py

src/egl/egl-glvnd-symbols.txt

src/egl/egl-symbols.txt

src/egl/generate

src/egl/generate/egl.xml

src/egl/generate/eglFunctionList.py

src/egl/generate/egl_other.xml

src/egl/generate/gen_egl_dispatch.py

src/egl/main

src/egl/main/50_mesa.json

src/egl/main/egl.def

src/egl/main/eglapi.c

src/egl/main/eglarray.c

src/egl/main/eglarray.h

src/egl/main/eglconfig.c

src/egl/main/eglconfig.h

src/egl/main/eglconfigdebug.c

src/egl/main/eglconfigdebug.h

src/egl/main/eglcontext.c

src/egl/main/eglcontext.h

src/egl/main/eglcurrent.c

src/egl/main/eglcurrent.h

src/egl/main/egldefines.h

src/egl/main/egldevice.c

src/egl/main/egldevice.h

src/egl/main/egldispatchstubs.c

src/egl/main/egldispatchstubs.h

src/egl/main/egldisplay.c

src/egl/main/egldisplay.h

src/egl/main/egldriver.h

src/egl/main/eglentrypoint.h

src/egl/main/eglglobals.c

src/egl/main/eglglobals.h

src/egl/main/eglglvnd.c

src/egl/main/eglimage.c

src/egl/main/eglimage.h

src/egl/main/egllog.c

src/egl/main/egllog.h

src/egl/main/eglsurface.c

src/egl/main/eglsurface.h

src/egl/main/eglsync.c

src/egl/main/eglsync.h

src/egl/main/egltypedefs.h

src/egl/meson.build

src/egl/wayland

src/egl/wayland/.editorconfig

src/egl/wayland/wayland-drm

src/egl/wayland/wayland-drm/meson.build

src/egl/wayland/wayland-drm/wayland-drm.c

src/egl/wayland/wayland-drm/wayland-drm.h

src/egl/wayland/wayland-drm/wayland-drm.xml

src/etnaviv

src/etnaviv/ci

src/etnaviv/ci/etnaviv-gc2000-fails.txt

src/etnaviv/ci/etnaviv-gc2000-flakes.txt

src/etnaviv/ci/etnaviv-gc2000-skips.txt

src/etnaviv/ci/gitlab-ci.yml

src/etnaviv/drm

src/etnaviv/drm-shim

src/etnaviv/drm-shim/README.md

src/etnaviv/drm-shim/etnaviv_noop.c

src/etnaviv/drm-shim/meson.build

src/etnaviv/drm/etnaviv_bo.c

src/etnaviv/drm/etnaviv_bo_cache.c

src/etnaviv/drm/etnaviv_cmd_stream.c

src/etnaviv/drm/etnaviv_device.c

src/etnaviv/drm/etnaviv_drmif.h

src/etnaviv/drm/etnaviv_gpu.c

src/etnaviv/drm/etnaviv_perfmon.c

src/etnaviv/drm/etnaviv_pipe.c

src/etnaviv/drm/etnaviv_priv.h

src/etnaviv/drm/meson.build

src/etnaviv/drm/tests

src/etnaviv/drm/tests/etnaviv_bo_cache_test.c

src/etnaviv/drm/tests/etnaviv_cmd_stream_test.c

src/etnaviv/drm/tests/meson.build

src/etnaviv/meson.build

src/freedreno

src/freedreno/.clang-format

src/freedreno/.dir-locals.el

src/freedreno/.editorconfig

src/freedreno/.gitlab-ci

src/freedreno/.gitlab-ci/reference

src/freedreno/.gitlab-ci/reference/afuc_test.asm

src/freedreno/.gitlab-ci/reference/afuc_test.fw

src/freedreno/.gitlab-ci/reference/crash.log

src/freedreno/.gitlab-ci/reference/dEQP-GLES2.functional.texture.specification.basic_teximage2d.rgba16f_2d.log

src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log

src/freedreno/.gitlab-ci/reference/es2gears-a320.log

src/freedreno/.gitlab-ci/reference/fd-clouds.log

src/freedreno/.gitlab-ci/reference/glxgears-a420.log

src/freedreno/.gitlab-ci/reference/shadow.log

src/freedreno/.gitlab-ci/traces

src/freedreno/.gitlab-ci/traces/afuc_test.asm

src/freedreno/.gitlab-ci/traces/crash.devcore

src/freedreno/.gitlab-ci/traces/dEQP-GLES2.functional.texture.specification.basic_teximage2d.rgba16f_2d.rd.gz

src/freedreno/.gitlab-ci/traces/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.rd.gz

src/freedreno/.gitlab-ci/traces/es2gears-a320.rd.gz

src/freedreno/.gitlab-ci/traces/fd-clouds.rd.gz

src/freedreno/.gitlab-ci/traces/glxgears-a420.rd.gz

src/freedreno/.gitlab-ci/traces/shadow.rd.gz

src/freedreno/afuc

src/freedreno/afuc/README.rst

src/freedreno/afuc/afuc.h

src/freedreno/afuc/asm.c

src/freedreno/afuc/asm.h

src/freedreno/afuc/disasm.c

src/freedreno/afuc/emu-ds.c

src/freedreno/afuc/emu-regs.c

src/freedreno/afuc/emu-ui.c

src/freedreno/afuc/emu.c

src/freedreno/afuc/emu.h

src/freedreno/afuc/lexer.l

src/freedreno/afuc/meson.build

src/freedreno/afuc/parser.y

src/freedreno/afuc/util.c

src/freedreno/afuc/util.h

src/freedreno/ci

src/freedreno/ci/deqp-freedreno-a307.toml

src/freedreno/ci/deqp-freedreno-a530.toml

src/freedreno/ci/deqp-freedreno-a618-vk-full.toml

src/freedreno/ci/deqp-freedreno-a618-vk.toml

src/freedreno/ci/deqp-freedreno-a630-vk-asan.toml

src/freedreno/ci/deqp-freedreno-a630-vk-full.toml

src/freedreno/ci/deqp-freedreno-a630-vk.toml

src/freedreno/ci/deqp-freedreno-a630.toml

src/freedreno/ci/freedreno-a307-fails.txt

src/freedreno/ci/freedreno-a307-flakes.txt

src/freedreno/ci/freedreno-a307-skips.txt

src/freedreno/ci/freedreno-a420-fails.txt

src/freedreno/ci/freedreno-a420-flakes.txt

src/freedreno/ci/freedreno-a420-skips.txt

src/freedreno/ci/freedreno-a530-fails.txt

src/freedreno/ci/freedreno-a530-flakes.txt

src/freedreno/ci/freedreno-a530-skips.txt

src/freedreno/ci/freedreno-a618-fails.txt

src/freedreno/ci/freedreno-a618-flakes.txt

src/freedreno/ci/freedreno-a618-premerge-skips.txt

src/freedreno/ci/freedreno-a618-skips.txt

src/freedreno/ci/freedreno-a630-asan-fails.txt

src/freedreno/ci/freedreno-a630-asan-skips.txt

src/freedreno/ci/freedreno-a630-bypass-flakes.txt

src/freedreno/ci/freedreno-a630-fails.txt

src/freedreno/ci/freedreno-a630-flakes.txt

src/freedreno/ci/freedreno-a630-premerge-skips.txt

src/freedreno/ci/freedreno-a630-skips.txt

src/freedreno/ci/freedreno-a630-skqp-gl_rendertests.txt

src/freedreno/ci/freedreno-a630-skqp_unittests.txt

src/freedreno/ci/gitlab-ci.yml

src/freedreno/ci/restricted-traces-freedreno.yml

src/freedreno/ci/traces-freedreno.yml

src/freedreno/common

src/freedreno/common/README.rst

src/freedreno/common/disasm.h

src/freedreno/common/freedreno_dev_info.c

src/freedreno/common/freedreno_dev_info.h

src/freedreno/common/freedreno_devices.py

src/freedreno/common/freedreno_guardband.h

src/freedreno/common/freedreno_pm4.h

src/freedreno/common/freedreno_uuid.c

src/freedreno/common/freedreno_uuid.h

src/freedreno/common/meson.build

src/freedreno/computerator

src/freedreno/computerator/README.rst

src/freedreno/computerator/a4xx.c

src/freedreno/computerator/a6xx.c

src/freedreno/computerator/examples

src/freedreno/computerator/examples/branch.asm

src/freedreno/computerator/examples/invocationid.asm

src/freedreno/computerator/examples/pvtmem.asm

src/freedreno/computerator/examples/simple.asm

src/freedreno/computerator/examples/stg_ldg_offset.asm

src/freedreno/computerator/examples/test-flut.sh

src/freedreno/computerator/examples/test-opcodes.sh

src/freedreno/computerator/examples/test-regfile.sh

src/freedreno/computerator/ir3_asm.c

src/freedreno/computerator/ir3_asm.h

src/freedreno/computerator/main.c

src/freedreno/computerator/main.h

src/freedreno/computerator/meson.build

src/freedreno/decode

src/freedreno/decode/buffers.c

src/freedreno/decode/buffers.h

src/freedreno/decode/cffdec.c

src/freedreno/decode/cffdec.h

src/freedreno/decode/cffdump.c

src/freedreno/decode/crashdec-hfi.c

src/freedreno/decode/crashdec-mempool.c

src/freedreno/decode/crashdec.c

src/freedreno/decode/crashdec.h

src/freedreno/decode/io.c

src/freedreno/decode/io.h

src/freedreno/decode/meson.build

src/freedreno/decode/pager.c

src/freedreno/decode/pager.h

src/freedreno/decode/pgmdump.c

src/freedreno/decode/pgmdump2.c

src/freedreno/decode/redump.h

src/freedreno/decode/rnnutil.c

src/freedreno/decode/rnnutil.h

src/freedreno/decode/script.c

src/freedreno/decode/script.h

src/freedreno/decode/scripts

src/freedreno/decode/scripts/analyze.lua

src/freedreno/decode/scripts/parse-submits.lua

src/freedreno/decode/scripts/sanity-a6xx.lua

src/freedreno/decode/scripts/test.lua

src/freedreno/decode/scripts/tex3d-layout.lua

src/freedreno/decode/scripts/texturator-to-unit-test-5xx.lua

src/freedreno/decode/scripts/texturator-to-unit-test.lua

src/freedreno/decode/util.h

src/freedreno/drm

src/freedreno/drm-shim

src/freedreno/drm-shim/README.md

src/freedreno/drm-shim/freedreno_noop.c

src/freedreno/drm-shim/meson.build

src/freedreno/drm/freedreno_bo.c

src/freedreno/drm/freedreno_bo_cache.c

src/freedreno/drm/freedreno_device.c

src/freedreno/drm/freedreno_drmif.h

src/freedreno/drm/freedreno_pipe.c

src/freedreno/drm/freedreno_priv.h

src/freedreno/drm/freedreno_ringbuffer.c

src/freedreno/drm/freedreno_ringbuffer.h

src/freedreno/drm/freedreno_ringbuffer_sp.c

src/freedreno/drm/freedreno_ringbuffer_sp.h

src/freedreno/drm/freedreno_ringbuffer_sp_reloc.h

src/freedreno/drm/meson.build

src/freedreno/drm/msm

src/freedreno/drm/msm/msm_bo.c

src/freedreno/drm/msm/msm_device.c

src/freedreno/drm/msm/msm_pipe.c

src/freedreno/drm/msm/msm_priv.h

src/freedreno/drm/msm/msm_ringbuffer.c

src/freedreno/drm/msm/msm_ringbuffer_sp.c

src/freedreno/drm/virtio

src/freedreno/drm/virtio/msm_proto.h

src/freedreno/drm/virtio/virtio_bo.c

src/freedreno/drm/virtio/virtio_device.c

src/freedreno/drm/virtio/virtio_pipe.c

src/freedreno/drm/virtio/virtio_priv.h

src/freedreno/drm/virtio/virtio_ringbuffer.c

src/freedreno/drm/virtio/virtio_ringbuffer.h

src/freedreno/ds

src/freedreno/ds/fd_pps_driver.cc

src/freedreno/ds/fd_pps_driver.h

src/freedreno/ds/meson.build

src/freedreno/fdl

src/freedreno/fdl/fd5_layout.c

src/freedreno/fdl/fd5_layout_test.c

src/freedreno/fdl/fd6_format_table.c

src/freedreno/fdl/fd6_format_table.h

src/freedreno/fdl/fd6_layout.c

src/freedreno/fdl/fd6_layout_test.c

src/freedreno/fdl/fd6_view.c

src/freedreno/fdl/fd_layout_test.c

src/freedreno/fdl/fd_layout_test.h

src/freedreno/fdl/freedreno_layout.c

src/freedreno/fdl/freedreno_layout.h

src/freedreno/fdl/meson.build

src/freedreno/ir2

src/freedreno/ir2/disasm-a2xx.c

src/freedreno/ir2/instr-a2xx.h

src/freedreno/ir2/meson.build

src/freedreno/ir3

src/freedreno/ir3/.dir-locals.el

src/freedreno/ir3/.editorconfig

src/freedreno/ir3/disasm-a3xx.c

src/freedreno/ir3/instr-a3xx.h

src/freedreno/ir3/ir3.c

src/freedreno/ir3/ir3.h

src/freedreno/ir3/ir3_a4xx.c

src/freedreno/ir3/ir3_a6xx.c

src/freedreno/ir3/ir3_array_to_ssa.c

src/freedreno/ir3/ir3_assembler.c

src/freedreno/ir3/ir3_assembler.h

src/freedreno/ir3/ir3_cf.c

src/freedreno/ir3/ir3_compiler.c

src/freedreno/ir3/ir3_compiler.h

src/freedreno/ir3/ir3_compiler_nir.c

src/freedreno/ir3/ir3_context.c

src/freedreno/ir3/ir3_context.h

src/freedreno/ir3/ir3_cp.c

src/freedreno/ir3/ir3_cp_postsched.c

src/freedreno/ir3/ir3_cse.c

src/freedreno/ir3/ir3_dce.c

src/freedreno/ir3/ir3_delay.c

src/freedreno/ir3/ir3_disk_cache.c

src/freedreno/ir3/ir3_dominance.c

src/freedreno/ir3/ir3_image.c

src/freedreno/ir3/ir3_image.h

src/freedreno/ir3/ir3_legalize.c

src/freedreno/ir3/ir3_lexer.l

src/freedreno/ir3/ir3_liveness.c

src/freedreno/ir3/ir3_lower_parallelcopy.c

src/freedreno/ir3/ir3_lower_spill.c

src/freedreno/ir3/ir3_lower_subgroups.c

src/freedreno/ir3/ir3_merge_regs.c

src/freedreno/ir3/ir3_nir.c

src/freedreno/ir3/ir3_nir.h

src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c

src/freedreno/ir3/ir3_nir_imul.py

src/freedreno/ir3/ir3_nir_lower_64b.c

src/freedreno/ir3/ir3_nir_lower_io_offsets.c

src/freedreno/ir3/ir3_nir_lower_load_barycentric_at_offset.c

src/freedreno/ir3/ir3_nir_lower_load_barycentric_at_sample.c

src/freedreno/ir3/ir3_nir_lower_tess.c

src/freedreno/ir3/ir3_nir_lower_tex_prefetch.c

src/freedreno/ir3/ir3_nir_lower_wide_load_store.c

src/freedreno/ir3/ir3_nir_move_varying_inputs.c

src/freedreno/ir3/ir3_nir_opt_preamble.c

src/freedreno/ir3/ir3_nir_trig.py

src/freedreno/ir3/ir3_parser.y

src/freedreno/ir3/ir3_postsched.c

src/freedreno/ir3/ir3_print.c

src/freedreno/ir3/ir3_ra.c

src/freedreno/ir3/ir3_ra.h

src/freedreno/ir3/ir3_ra_validate.c

src/freedreno/ir3/ir3_remove_unreachable.c

src/freedreno/ir3/ir3_sched.c

src/freedreno/ir3/ir3_shader.c

src/freedreno/ir3/ir3_shader.h

src/freedreno/ir3/ir3_spill.c

src/freedreno/ir3/ir3_validate.c

src/freedreno/ir3/meson.build

src/freedreno/ir3/tests

src/freedreno/ir3/tests/delay.c

src/freedreno/ir3/tests/disasm.c

src/freedreno/isa

src/freedreno/isa/encode.c

src/freedreno/isa/ir3-cat0.xml

src/freedreno/isa/ir3-cat1.xml

src/freedreno/isa/ir3-cat2.xml

src/freedreno/isa/ir3-cat3.xml

src/freedreno/isa/ir3-cat4.xml

src/freedreno/isa/ir3-cat5.xml

src/freedreno/isa/ir3-cat6.xml

src/freedreno/isa/ir3-cat7.xml

src/freedreno/isa/ir3-common.xml

src/freedreno/isa/ir3-disasm.c

src/freedreno/isa/ir3.xml

src/freedreno/isa/isa.h

src/freedreno/isa/meson.build

src/freedreno/meson.build

src/freedreno/perfcntrs

src/freedreno/perfcntrs/fd2_perfcntr.c

src/freedreno/perfcntrs/fd5_perfcntr.c

src/freedreno/perfcntrs/fd6_perfcntr.c

src/freedreno/perfcntrs/fdperf.c

src/freedreno/perfcntrs/freedreno_dt.c

src/freedreno/perfcntrs/freedreno_dt.h

src/freedreno/perfcntrs/freedreno_perfcntr.c

src/freedreno/perfcntrs/freedreno_perfcntr.h

src/freedreno/perfcntrs/meson.build

src/freedreno/registers

src/freedreno/registers/.gitignore

src/freedreno/registers/adreno

src/freedreno/registers/adreno.xml

src/freedreno/registers/adreno/a2xx.xml

src/freedreno/registers/adreno/a3xx.xml

src/freedreno/registers/adreno/a4xx.xml

src/freedreno/registers/adreno/a5xx.xml

src/freedreno/registers/adreno/a6xx.xml

src/freedreno/registers/adreno/a6xx_gmu.xml

src/freedreno/registers/adreno/adreno_common.xml

src/freedreno/registers/adreno/adreno_control_regs.xml

src/freedreno/registers/adreno/adreno_pipe_regs.xml

src/freedreno/registers/adreno/adreno_pm4.xml

src/freedreno/registers/adreno/meson.build

src/freedreno/registers/adreno/ocmem.xml

src/freedreno/registers/dsi

src/freedreno/registers/dsi/dsi.xml

src/freedreno/registers/dsi/dsi_phy_10nm.xml

src/freedreno/registers/dsi/dsi_phy_14nm.xml

src/freedreno/registers/dsi/dsi_phy_20nm.xml

src/freedreno/registers/dsi/dsi_phy_28nm.xml

src/freedreno/registers/dsi/dsi_phy_28nm_8960.xml

src/freedreno/registers/dsi/dsi_phy_7nm.xml

src/freedreno/registers/dsi/dsi_phy_v2.xml

src/freedreno/registers/dsi/mmss_cc.xml

src/freedreno/registers/dsi/sfpb.xml

src/freedreno/registers/edp

src/freedreno/registers/edp/edp.xml

src/freedreno/registers/freedreno_copyright.xml

src/freedreno/registers/gen_header.py

src/freedreno/registers/hdmi

src/freedreno/registers/hdmi/hdmi.xml

src/freedreno/registers/hdmi/qfprom.xml

src/freedreno/registers/mdp

src/freedreno/registers/mdp/mdp4.xml

src/freedreno/registers/mdp/mdp5.xml

src/freedreno/registers/mdp/mdp_common.xml

src/freedreno/registers/meson.build

src/freedreno/registers/msm.xml

src/freedreno/registers/rules-ng-ng.txt

src/freedreno/registers/rules-ng.xsd

src/freedreno/registers/text-format.txt

src/freedreno/rnn

src/freedreno/rnn/aprintf.c

src/freedreno/rnn/colors.c

src/freedreno/rnn/colors.h

src/freedreno/rnn/headergen2.c

src/freedreno/rnn/meson.build

src/freedreno/rnn/path.c

src/freedreno/rnn/rnn.c

src/freedreno/rnn/rnn.h

src/freedreno/rnn/rnndec.c

src/freedreno/rnn/rnndec.h

src/freedreno/rnn/util.h

src/freedreno/vulkan

src/freedreno/vulkan/.clang-format

src/freedreno/vulkan/.dir-locals.el

src/freedreno/vulkan/.editorconfig

src/freedreno/vulkan/TODO

src/freedreno/vulkan/meson.build

src/freedreno/vulkan/msm_kgsl.h

src/freedreno/vulkan/tu_android.c

src/freedreno/vulkan/tu_autotune.c

src/freedreno/vulkan/tu_autotune.h

src/freedreno/vulkan/tu_clear_blit.c

src/freedreno/vulkan/tu_cmd_buffer.c

src/freedreno/vulkan/tu_cs.c

src/freedreno/vulkan/tu_cs.h

src/freedreno/vulkan/tu_descriptor_set.c

src/freedreno/vulkan/tu_descriptor_set.h

src/freedreno/vulkan/tu_device.c

src/freedreno/vulkan/tu_drm.c

src/freedreno/vulkan/tu_formats.c

src/freedreno/vulkan/tu_image.c

src/freedreno/vulkan/tu_kgsl.c

src/freedreno/vulkan/tu_nir_lower_multiview.c

src/freedreno/vulkan/tu_pass.c

src/freedreno/vulkan/tu_perfetto.cc

src/freedreno/vulkan/tu_perfetto.h

src/freedreno/vulkan/tu_perfetto_util.c

src/freedreno/vulkan/tu_pipeline.c

src/freedreno/vulkan/tu_pipeline_cache.c

src/freedreno/vulkan/tu_private.h

src/freedreno/vulkan/tu_query.c

src/freedreno/vulkan/tu_shader.c

src/freedreno/vulkan/tu_suballoc.c

src/freedreno/vulkan/tu_tracepoints.py

src/freedreno/vulkan/tu_util.c

src/freedreno/vulkan/tu_util.h

src/freedreno/vulkan/tu_wsi.c

src/freedreno/vulkan/vk_format.h

src/gallium

src/gallium/README.portability

src/gallium/auxiliary

src/gallium/auxiliary/cso_cache

src/gallium/auxiliary/cso_cache/cso_cache.c

src/gallium/auxiliary/cso_cache/cso_cache.h

src/gallium/auxiliary/cso_cache/cso_context.c

src/gallium/auxiliary/cso_cache/cso_context.h

src/gallium/auxiliary/cso_cache/cso_hash.c

src/gallium/auxiliary/cso_cache/cso_hash.h

src/gallium/auxiliary/draw

src/gallium/auxiliary/draw/draw_cliptest_tmp.h

src/gallium/auxiliary/draw/draw_context.c

src/gallium/auxiliary/draw/draw_context.h

src/gallium/auxiliary/draw/draw_decompose_tmp.h

src/gallium/auxiliary/draw/draw_fs.c

src/gallium/auxiliary/draw/draw_fs.h

src/gallium/auxiliary/draw/draw_gs.c

src/gallium/auxiliary/draw/draw_gs.h

src/gallium/auxiliary/draw/draw_gs_tmp.h

src/gallium/auxiliary/draw/draw_llvm.c

src/gallium/auxiliary/draw/draw_llvm.h

src/gallium/auxiliary/draw/draw_llvm_sample.c

src/gallium/auxiliary/draw/draw_pipe.c

src/gallium/auxiliary/draw/draw_pipe.h

src/gallium/auxiliary/draw/draw_pipe_aaline.c

src/gallium/auxiliary/draw/draw_pipe_aapoint.c

src/gallium/auxiliary/draw/draw_pipe_clip.c

src/gallium/auxiliary/draw/draw_pipe_cull.c

src/gallium/auxiliary/draw/draw_pipe_flatshade.c

src/gallium/auxiliary/draw/draw_pipe_offset.c

src/gallium/auxiliary/draw/draw_pipe_pstipple.c

src/gallium/auxiliary/draw/draw_pipe_stipple.c

src/gallium/auxiliary/draw/draw_pipe_twoside.c

src/gallium/auxiliary/draw/draw_pipe_unfilled.c

src/gallium/auxiliary/draw/draw_pipe_user_cull.c

src/gallium/auxiliary/draw/draw_pipe_util.c

src/gallium/auxiliary/draw/draw_pipe_validate.c

src/gallium/auxiliary/draw/draw_pipe_vbuf.c

src/gallium/auxiliary/draw/draw_pipe_wide_line.c

src/gallium/auxiliary/draw/draw_pipe_wide_point.c

src/gallium/auxiliary/draw/draw_prim_assembler.c

src/gallium/auxiliary/draw/draw_prim_assembler.h

src/gallium/auxiliary/draw/draw_prim_assembler_tmp.h

src/gallium/auxiliary/draw/draw_private.h

src/gallium/auxiliary/draw/draw_pt.c

src/gallium/auxiliary/draw/draw_pt.h

src/gallium/auxiliary/draw/draw_pt_decompose.h

src/gallium/auxiliary/draw/draw_pt_emit.c

src/gallium/auxiliary/draw/draw_pt_fetch.c

src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c

src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c

src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c

src/gallium/auxiliary/draw/draw_pt_post_vs.c

src/gallium/auxiliary/draw/draw_pt_so_emit.c

src/gallium/auxiliary/draw/draw_pt_util.c

src/gallium/auxiliary/draw/draw_pt_vsplit.c

src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h

src/gallium/auxiliary/draw/draw_so_emit_tmp.h

src/gallium/auxiliary/draw/draw_split_tmp.h

src/gallium/auxiliary/draw/draw_tess.c

src/gallium/auxiliary/draw/draw_tess.h

src/gallium/auxiliary/draw/draw_vbuf.h

src/gallium/auxiliary/draw/draw_vertex.c

src/gallium/auxiliary/draw/draw_vertex.h

src/gallium/auxiliary/draw/draw_vs.c

src/gallium/auxiliary/draw/draw_vs.h

src/gallium/auxiliary/draw/draw_vs_exec.c

src/gallium/auxiliary/draw/draw_vs_llvm.c

src/gallium/auxiliary/draw/draw_vs_variant.c

src/gallium/auxiliary/driver_ddebug

src/gallium/auxiliary/driver_ddebug/dd_context.c

src/gallium/auxiliary/driver_ddebug/dd_draw.c

src/gallium/auxiliary/driver_ddebug/dd_pipe.h

src/gallium/auxiliary/driver_ddebug/dd_public.h

src/gallium/auxiliary/driver_ddebug/dd_screen.c

src/gallium/auxiliary/driver_ddebug/dd_util.h

src/gallium/auxiliary/driver_noop

src/gallium/auxiliary/driver_noop/noop_pipe.c

src/gallium/auxiliary/driver_noop/noop_public.h

src/gallium/auxiliary/driver_noop/noop_state.c

src/gallium/auxiliary/driver_rbug

src/gallium/auxiliary/driver_rbug/README

src/gallium/auxiliary/driver_rbug/rbug_context.c

src/gallium/auxiliary/driver_rbug/rbug_context.h

src/gallium/auxiliary/driver_rbug/rbug_core.c

src/gallium/auxiliary/driver_rbug/rbug_objects.c

src/gallium/auxiliary/driver_rbug/rbug_objects.h

src/gallium/auxiliary/driver_rbug/rbug_public.h

src/gallium/auxiliary/driver_rbug/rbug_screen.c

src/gallium/auxiliary/driver_rbug/rbug_screen.h

src/gallium/auxiliary/driver_trace

src/gallium/auxiliary/driver_trace/README

src/gallium/auxiliary/driver_trace/enums2names.py

src/gallium/auxiliary/driver_trace/tr_context.c

src/gallium/auxiliary/driver_trace/tr_context.h

src/gallium/auxiliary/driver_trace/tr_dump.c

src/gallium/auxiliary/driver_trace/tr_dump.h

src/gallium/auxiliary/driver_trace/tr_dump_defines.h

src/gallium/auxiliary/driver_trace/tr_dump_state.c

src/gallium/auxiliary/driver_trace/tr_dump_state.h

src/gallium/auxiliary/driver_trace/tr_public.h

src/gallium/auxiliary/driver_trace/tr_screen.c

src/gallium/auxiliary/driver_trace/tr_screen.h

src/gallium/auxiliary/driver_trace/tr_texture.c

src/gallium/auxiliary/driver_trace/tr_texture.h

src/gallium/auxiliary/driver_trace/trace.xsl

src/gallium/auxiliary/gallivm

src/gallium/auxiliary/gallivm/f.cpp

src/gallium/auxiliary/gallivm/lp_bld.h

src/gallium/auxiliary/gallivm/lp_bld_arit.c

src/gallium/auxiliary/gallivm/lp_bld_arit.h

src/gallium/auxiliary/gallivm/lp_bld_arit_overflow.c

src/gallium/auxiliary/gallivm/lp_bld_arit_overflow.h

src/gallium/auxiliary/gallivm/lp_bld_assert.c

src/gallium/auxiliary/gallivm/lp_bld_assert.h

src/gallium/auxiliary/gallivm/lp_bld_bitarit.c

src/gallium/auxiliary/gallivm/lp_bld_bitarit.h

src/gallium/auxiliary/gallivm/lp_bld_const.c

src/gallium/auxiliary/gallivm/lp_bld_const.h

src/gallium/auxiliary/gallivm/lp_bld_conv.c

src/gallium/auxiliary/gallivm/lp_bld_conv.h

src/gallium/auxiliary/gallivm/lp_bld_coro.c

src/gallium/auxiliary/gallivm/lp_bld_coro.h

src/gallium/auxiliary/gallivm/lp_bld_debug.cpp

src/gallium/auxiliary/gallivm/lp_bld_debug.h

src/gallium/auxiliary/gallivm/lp_bld_flow.c

src/gallium/auxiliary/gallivm/lp_bld_flow.h

src/gallium/auxiliary/gallivm/lp_bld_format.c

src/gallium/auxiliary/gallivm/lp_bld_format.h

src/gallium/auxiliary/gallivm/lp_bld_format_aos.c

src/gallium/auxiliary/gallivm/lp_bld_format_aos_array.c

src/gallium/auxiliary/gallivm/lp_bld_format_float.c

src/gallium/auxiliary/gallivm/lp_bld_format_s3tc.c

src/gallium/auxiliary/gallivm/lp_bld_format_soa.c

src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c

src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c

src/gallium/auxiliary/gallivm/lp_bld_gather.c

src/gallium/auxiliary/gallivm/lp_bld_gather.h

src/gallium/auxiliary/gallivm/lp_bld_init.c

src/gallium/auxiliary/gallivm/lp_bld_init.h

src/gallium/auxiliary/gallivm/lp_bld_intr.c

src/gallium/auxiliary/gallivm/lp_bld_intr.h

src/gallium/auxiliary/gallivm/lp_bld_ir_common.c

src/gallium/auxiliary/gallivm/lp_bld_ir_common.h

src/gallium/auxiliary/gallivm/lp_bld_limits.h

src/gallium/auxiliary/gallivm/lp_bld_logic.c

src/gallium/auxiliary/gallivm/lp_bld_logic.h

src/gallium/auxiliary/gallivm/lp_bld_misc.cpp

src/gallium/auxiliary/gallivm/lp_bld_misc.h

src/gallium/auxiliary/gallivm/lp_bld_nir.c

src/gallium/auxiliary/gallivm/lp_bld_nir.h

src/gallium/auxiliary/gallivm/lp_bld_nir_aos.c

src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c

src/gallium/auxiliary/gallivm/lp_bld_pack.c

src/gallium/auxiliary/gallivm/lp_bld_pack.h

src/gallium/auxiliary/gallivm/lp_bld_printf.c

src/gallium/auxiliary/gallivm/lp_bld_printf.h

src/gallium/auxiliary/gallivm/lp_bld_quad.c

src/gallium/auxiliary/gallivm/lp_bld_quad.h

src/gallium/auxiliary/gallivm/lp_bld_sample.c

src/gallium/auxiliary/gallivm/lp_bld_sample.h

src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c

src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h

src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c

src/gallium/auxiliary/gallivm/lp_bld_struct.c

src/gallium/auxiliary/gallivm/lp_bld_struct.h

src/gallium/auxiliary/gallivm/lp_bld_swizzle.c

src/gallium/auxiliary/gallivm/lp_bld_swizzle.h

src/gallium/auxiliary/gallivm/lp_bld_tgsi.c

src/gallium/auxiliary/gallivm/lp_bld_tgsi.h

src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c

src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h

src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c

src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c

src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c

src/gallium/auxiliary/gallivm/lp_bld_type.c

src/gallium/auxiliary/gallivm/lp_bld_type.h

src/gallium/auxiliary/hud

src/gallium/auxiliary/hud/font.c

src/gallium/auxiliary/hud/font.h

src/gallium/auxiliary/hud/hud_context.c

src/gallium/auxiliary/hud/hud_context.h

src/gallium/auxiliary/hud/hud_cpu.c

src/gallium/auxiliary/hud/hud_cpufreq.c

src/gallium/auxiliary/hud/hud_diskstat.c

src/gallium/auxiliary/hud/hud_driver_query.c

src/gallium/auxiliary/hud/hud_fps.c

src/gallium/auxiliary/hud/hud_nic.c

src/gallium/auxiliary/hud/hud_private.h

src/gallium/auxiliary/hud/hud_sensors_temp.c

src/gallium/auxiliary/meson.build

src/gallium/auxiliary/nir

src/gallium/auxiliary/nir/nir_draw_helpers.c

src/gallium/auxiliary/nir/nir_draw_helpers.h

src/gallium/auxiliary/nir/nir_helpers.c

src/gallium/auxiliary/nir/nir_helpers.h

src/gallium/auxiliary/nir/nir_to_tgsi.c

src/gallium/auxiliary/nir/nir_to_tgsi.h

src/gallium/auxiliary/nir/nir_to_tgsi_info.c

src/gallium/auxiliary/nir/nir_to_tgsi_info.h

src/gallium/auxiliary/nir/tgsi_to_nir.c

src/gallium/auxiliary/nir/tgsi_to_nir.h

src/gallium/auxiliary/os

src/gallium/auxiliary/os/os_mman.h

src/gallium/auxiliary/os/os_process.c

src/gallium/auxiliary/os/os_process.h

src/gallium/auxiliary/os/os_thread.h

src/gallium/auxiliary/pipe-loader

src/gallium/auxiliary/pipe-loader/driinfo_gallium.h

src/gallium/auxiliary/pipe-loader/meson.build

src/gallium/auxiliary/pipe-loader/pipe_loader.c

src/gallium/auxiliary/pipe-loader/pipe_loader.h

src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c

src/gallium/auxiliary/pipe-loader/pipe_loader_priv.h

src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c

src/gallium/auxiliary/pipebuffer

src/gallium/auxiliary/pipebuffer/pb_buffer.h

src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c

src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h

src/gallium/auxiliary/pipebuffer/pb_bufmgr.h

src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c

src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c

src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c

src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c

src/gallium/auxiliary/pipebuffer/pb_cache.c

src/gallium/auxiliary/pipebuffer/pb_cache.h

src/gallium/auxiliary/pipebuffer/pb_slab.c

src/gallium/auxiliary/pipebuffer/pb_slab.h

src/gallium/auxiliary/pipebuffer/pb_validate.c

src/gallium/auxiliary/pipebuffer/pb_validate.h

src/gallium/auxiliary/postprocess

src/gallium/auxiliary/postprocess/ADDING

src/gallium/auxiliary/postprocess/filters.h

src/gallium/auxiliary/postprocess/postprocess.h

src/gallium/auxiliary/postprocess/pp_celshade.c

src/gallium/auxiliary/postprocess/pp_celshade.h

src/gallium/auxiliary/postprocess/pp_colors.c

src/gallium/auxiliary/postprocess/pp_colors.h

src/gallium/auxiliary/postprocess/pp_filters.h

src/gallium/auxiliary/postprocess/pp_init.c

src/gallium/auxiliary/postprocess/pp_mlaa.c

src/gallium/auxiliary/postprocess/pp_mlaa.h

src/gallium/auxiliary/postprocess/pp_mlaa_areamap.h

src/gallium/auxiliary/postprocess/pp_private.h

src/gallium/auxiliary/postprocess/pp_program.c

src/gallium/auxiliary/postprocess/pp_run.c

src/gallium/auxiliary/rbug

src/gallium/auxiliary/rbug/README

src/gallium/auxiliary/rbug/rbug.h

src/gallium/auxiliary/rbug/rbug_connection.c

src/gallium/auxiliary/rbug/rbug_connection.h

src/gallium/auxiliary/rbug/rbug_context.c

src/gallium/auxiliary/rbug/rbug_context.h

src/gallium/auxiliary/rbug/rbug_core.c

src/gallium/auxiliary/rbug/rbug_core.h

src/gallium/auxiliary/rbug/rbug_demarshal.c

src/gallium/auxiliary/rbug/rbug_internal.h

src/gallium/auxiliary/rbug/rbug_proto.h

src/gallium/auxiliary/rbug/rbug_shader.c

src/gallium/auxiliary/rbug/rbug_shader.h

src/gallium/auxiliary/rbug/rbug_texture.c

src/gallium/auxiliary/rbug/rbug_texture.h

src/gallium/auxiliary/renderonly

src/gallium/auxiliary/renderonly/renderonly.c

src/gallium/auxiliary/renderonly/renderonly.h

src/gallium/auxiliary/rtasm

src/gallium/auxiliary/rtasm/rtasm_cpu.c

src/gallium/auxiliary/rtasm/rtasm_cpu.h

src/gallium/auxiliary/rtasm/rtasm_execmem.c

src/gallium/auxiliary/rtasm/rtasm_execmem.h

src/gallium/auxiliary/rtasm/rtasm_x86sse.c

src/gallium/auxiliary/rtasm/rtasm_x86sse.h

src/gallium/auxiliary/target-helpers

src/gallium/auxiliary/target-helpers/drm_helper.h

src/gallium/auxiliary/target-helpers/drm_helper_public.h

src/gallium/auxiliary/target-helpers/inline_debug_helper.h

src/gallium/auxiliary/target-helpers/inline_sw_helper.h

src/gallium/auxiliary/target-helpers/sw_helper.h

src/gallium/auxiliary/target-helpers/sw_helper_public.h

src/gallium/auxiliary/tessellator

src/gallium/auxiliary/tessellator/p_tessellator.cpp

src/gallium/auxiliary/tessellator/p_tessellator.h

src/gallium/auxiliary/tessellator/tessellator.cpp

src/gallium/auxiliary/tessellator/tessellator.hpp

src/gallium/auxiliary/tgsi

src/gallium/auxiliary/tgsi/tgsi_aa_point.c

src/gallium/auxiliary/tgsi/tgsi_aa_point.h

src/gallium/auxiliary/tgsi/tgsi_build.c

src/gallium/auxiliary/tgsi/tgsi_build.h

src/gallium/auxiliary/tgsi/tgsi_dump.c

src/gallium/auxiliary/tgsi/tgsi_dump.h

src/gallium/auxiliary/tgsi/tgsi_dynamic_indexing.c

src/gallium/auxiliary/tgsi/tgsi_dynamic_indexing.h

src/gallium/auxiliary/tgsi/tgsi_emulate.c

src/gallium/auxiliary/tgsi/tgsi_emulate.h

src/gallium/auxiliary/tgsi/tgsi_exec.c

src/gallium/auxiliary/tgsi/tgsi_exec.h

src/gallium/auxiliary/tgsi/tgsi_from_mesa.c

src/gallium/auxiliary/tgsi/tgsi_from_mesa.h

src/gallium/auxiliary/tgsi/tgsi_info.c

src/gallium/auxiliary/tgsi/tgsi_info.h

src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h

src/gallium/auxiliary/tgsi/tgsi_iterate.c

src/gallium/auxiliary/tgsi/tgsi_iterate.h

src/gallium/auxiliary/tgsi/tgsi_lowering.c

src/gallium/auxiliary/tgsi/tgsi_lowering.h

src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h

src/gallium/auxiliary/tgsi/tgsi_parse.c

src/gallium/auxiliary/tgsi/tgsi_parse.h

src/gallium/auxiliary/tgsi/tgsi_point_sprite.c

src/gallium/auxiliary/tgsi/tgsi_point_sprite.h

src/gallium/auxiliary/tgsi/tgsi_sanity.c

src/gallium/auxiliary/tgsi/tgsi_sanity.h

src/gallium/auxiliary/tgsi/tgsi_scan.c

src/gallium/auxiliary/tgsi/tgsi_scan.h

src/gallium/auxiliary/tgsi/tgsi_strings.c

src/gallium/auxiliary/tgsi/tgsi_strings.h

src/gallium/auxiliary/tgsi/tgsi_text.c

src/gallium/auxiliary/tgsi/tgsi_text.h

src/gallium/auxiliary/tgsi/tgsi_transform.c

src/gallium/auxiliary/tgsi/tgsi_transform.h

src/gallium/auxiliary/tgsi/tgsi_two_side.c

src/gallium/auxiliary/tgsi/tgsi_two_side.h

src/gallium/auxiliary/tgsi/tgsi_ureg.c

src/gallium/auxiliary/tgsi/tgsi_ureg.h

src/gallium/auxiliary/tgsi/tgsi_util.c

src/gallium/auxiliary/tgsi/tgsi_util.h

src/gallium/auxiliary/tgsi/tgsi_vpos.c

src/gallium/auxiliary/tgsi/tgsi_vpos.h

src/gallium/auxiliary/translate

src/gallium/auxiliary/translate/translate.c

src/gallium/auxiliary/translate/translate.h

src/gallium/auxiliary/translate/translate_cache.c

src/gallium/auxiliary/translate/translate_cache.h

src/gallium/auxiliary/translate/translate_generic.c

src/gallium/auxiliary/translate/translate_sse.c

src/gallium/auxiliary/util

src/gallium/auxiliary/util/dbghelp.h

src/gallium/auxiliary/util/u_async_debug.c

src/gallium/auxiliary/util/u_async_debug.h

src/gallium/auxiliary/util/u_bitcast.h

src/gallium/auxiliary/util/u_bitmask.c

src/gallium/auxiliary/util/u_bitmask.h

src/gallium/auxiliary/util/u_blend.h

src/gallium/auxiliary/util/u_blitter.c

src/gallium/auxiliary/util/u_blitter.h

src/gallium/auxiliary/util/u_box.h

src/gallium/auxiliary/util/u_cache.c

src/gallium/auxiliary/util/u_cache.h

src/gallium/auxiliary/util/u_compute.c

src/gallium/auxiliary/util/u_compute.h

src/gallium/auxiliary/util/u_debug_flush.c

src/gallium/auxiliary/util/u_debug_flush.h

src/gallium/auxiliary/util/u_debug_image.c

src/gallium/auxiliary/util/u_debug_image.h

src/gallium/auxiliary/util/u_dirty_flags.h

src/gallium/auxiliary/util/u_dirty_surfaces.h

src/gallium/auxiliary/util/u_dl.c

src/gallium/auxiliary/util/u_dl.h

src/gallium/auxiliary/util/u_draw.c

src/gallium/auxiliary/util/u_draw.h

src/gallium/auxiliary/util/u_draw_quad.c

src/gallium/auxiliary/util/u_draw_quad.h

src/gallium/auxiliary/util/u_driconf.c

src/gallium/auxiliary/util/u_driconf.h

src/gallium/auxiliary/util/u_dual_blend.h

src/gallium/auxiliary/util/u_dump.h

src/gallium/auxiliary/util/u_dump_defines.c

src/gallium/auxiliary/util/u_dump_state.c

src/gallium/auxiliary/util/u_file.h

src/gallium/auxiliary/util/u_framebuffer.c

src/gallium/auxiliary/util/u_framebuffer.h

src/gallium/auxiliary/util/u_gen_mipmap.c

src/gallium/auxiliary/util/u_gen_mipmap.h

src/gallium/auxiliary/util/u_handle_table.c

src/gallium/auxiliary/util/u_handle_table.h

src/gallium/auxiliary/util/u_helpers.c

src/gallium/auxiliary/util/u_helpers.h

src/gallium/auxiliary/util/u_index_modify.c

src/gallium/auxiliary/util/u_index_modify.h

src/gallium/auxiliary/util/u_inlines.h

src/gallium/auxiliary/util/u_linear.c

src/gallium/auxiliary/util/u_linear.h

src/gallium/auxiliary/util/u_live_shader_cache.c

src/gallium/auxiliary/util/u_live_shader_cache.h

src/gallium/auxiliary/util/u_log.c

src/gallium/auxiliary/util/u_log.h

src/gallium/auxiliary/util/u_network.c

src/gallium/auxiliary/util/u_network.h

src/gallium/auxiliary/util/u_pack_color.h

src/gallium/auxiliary/util/u_pointer.h

src/gallium/auxiliary/util/u_prim.c

src/gallium/auxiliary/util/u_prim.h

src/gallium/auxiliary/util/u_prim_restart.c

src/gallium/auxiliary/util/u_prim_restart.h

src/gallium/auxiliary/util/u_pstipple.c

src/gallium/auxiliary/util/u_pstipple.h

src/gallium/auxiliary/util/u_pwr8.h

src/gallium/auxiliary/util/u_range.h

src/gallium/auxiliary/util/u_rect.h

src/gallium/auxiliary/util/u_resource.c

src/gallium/auxiliary/util/u_resource.h

src/gallium/auxiliary/util/u_sampler.c

src/gallium/auxiliary/util/u_sampler.h

src/gallium/auxiliary/util/u_screen.c

src/gallium/auxiliary/util/u_screen.h

src/gallium/auxiliary/util/u_simple_shaders.c

src/gallium/auxiliary/util/u_simple_shaders.h

src/gallium/auxiliary/util/u_split_draw.c

src/gallium/auxiliary/util/u_split_draw.h

src/gallium/auxiliary/util/u_split_prim.h

src/gallium/auxiliary/util/u_sse.h

src/gallium/auxiliary/util/u_suballoc.c

src/gallium/auxiliary/util/u_suballoc.h

src/gallium/auxiliary/util/u_surface.c

src/gallium/auxiliary/util/u_surface.h

src/gallium/auxiliary/util/u_tests.c

src/gallium/auxiliary/util/u_tests.h

src/gallium/auxiliary/util/u_texture.c

src/gallium/auxiliary/util/u_texture.h

src/gallium/auxiliary/util/u_threaded_context.c

src/gallium/auxiliary/util/u_threaded_context.h

src/gallium/auxiliary/util/u_threaded_context_calls.h

src/gallium/auxiliary/util/u_tile.c

src/gallium/auxiliary/util/u_tile.h

src/gallium/auxiliary/util/u_trace_gallium.c

src/gallium/auxiliary/util/u_trace_gallium.h

src/gallium/auxiliary/util/u_tracepoints.py

src/gallium/auxiliary/util/u_transfer.c

src/gallium/auxiliary/util/u_transfer.h

src/gallium/auxiliary/util/u_transfer_helper.c

src/gallium/auxiliary/util/u_transfer_helper.h

src/gallium/auxiliary/util/u_upload_mgr.c

src/gallium/auxiliary/util/u_upload_mgr.h

src/gallium/auxiliary/util/u_vbuf.c

src/gallium/auxiliary/util/u_vbuf.h

src/gallium/auxiliary/util/u_vertex_state_cache.c

src/gallium/auxiliary/util/u_vertex_state_cache.h

src/gallium/auxiliary/util/u_video.h

src/gallium/auxiliary/util/u_viewport.h

src/gallium/auxiliary/vl

src/gallium/auxiliary/vl/vl_bicubic_filter.c

src/gallium/auxiliary/vl/vl_bicubic_filter.h

src/gallium/auxiliary/vl/vl_compositor.c

src/gallium/auxiliary/vl/vl_compositor.h

src/gallium/auxiliary/vl/vl_compositor_cs.c

src/gallium/auxiliary/vl/vl_compositor_cs.h

src/gallium/auxiliary/vl/vl_compositor_gfx.c

src/gallium/auxiliary/vl/vl_compositor_gfx.h

src/gallium/auxiliary/vl/vl_csc.c

src/gallium/auxiliary/vl/vl_csc.h

src/gallium/auxiliary/vl/vl_decoder.c

src/gallium/auxiliary/vl/vl_decoder.h

src/gallium/auxiliary/vl/vl_defines.h

src/gallium/auxiliary/vl/vl_deint_filter.c

src/gallium/auxiliary/vl/vl_deint_filter.h

src/gallium/auxiliary/vl/vl_idct.c

src/gallium/auxiliary/vl/vl_idct.h

src/gallium/auxiliary/vl/vl_matrix_filter.c

src/gallium/auxiliary/vl/vl_matrix_filter.h

src/gallium/auxiliary/vl/vl_mc.c

src/gallium/auxiliary/vl/vl_mc.h

src/gallium/auxiliary/vl/vl_median_filter.c

src/gallium/auxiliary/vl/vl_median_filter.h

src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c

src/gallium/auxiliary/vl/vl_mpeg12_bitstream.h

src/gallium/auxiliary/vl/vl_mpeg12_decoder.c

src/gallium/auxiliary/vl/vl_mpeg12_decoder.h

src/gallium/auxiliary/vl/vl_probs_table.h

src/gallium/auxiliary/vl/vl_stubs.c

src/gallium/auxiliary/vl/vl_types.h

src/gallium/auxiliary/vl/vl_vertex_buffers.c

src/gallium/auxiliary/vl/vl_vertex_buffers.h

src/gallium/auxiliary/vl/vl_video_buffer.c

src/gallium/auxiliary/vl/vl_video_buffer.h

src/gallium/auxiliary/vl/vl_winsys.h

src/gallium/auxiliary/vl/vl_winsys_dri.c

src/gallium/auxiliary/vl/vl_winsys_dri3.c

src/gallium/auxiliary/vl/vl_winsys_drm.c

src/gallium/auxiliary/vl/vl_zscan.c

src/gallium/auxiliary/vl/vl_zscan.h

src/gallium/drivers

src/gallium/drivers/asahi

src/gallium/drivers/asahi/agx_blit.c

src/gallium/drivers/asahi/agx_pipe.c

src/gallium/drivers/asahi/agx_public.h

src/gallium/drivers/asahi/agx_state.c

src/gallium/drivers/asahi/agx_state.h

src/gallium/drivers/asahi/agx_uniforms.c

src/gallium/drivers/asahi/magic.c

src/gallium/drivers/asahi/magic.h

src/gallium/drivers/asahi/meson.build

src/gallium/drivers/crocus

src/gallium/drivers/crocus/ci

src/gallium/drivers/crocus/ci/crocus-g41-fails.txt

src/gallium/drivers/crocus/ci/crocus-g41-flakes.txt

src/gallium/drivers/crocus/ci/crocus-g41-skips.txt

src/gallium/drivers/crocus/ci/crocus-hsw-fails.txt

src/gallium/drivers/crocus/ci/crocus-hsw-flakes.txt

src/gallium/drivers/crocus/ci/crocus-hsw-skips.txt

src/gallium/drivers/crocus/ci/deqp-crocus-g41.toml

src/gallium/drivers/crocus/ci/deqp-crocus-hsw.toml

src/gallium/drivers/crocus/ci/gitlab-ci.yml

src/gallium/drivers/crocus/ci/traces-crocus.yml

src/gallium/drivers/crocus/crocus_batch.c

src/gallium/drivers/crocus/crocus_batch.h

src/gallium/drivers/crocus/crocus_blit.c

src/gallium/drivers/crocus/crocus_blorp.c

src/gallium/drivers/crocus/crocus_blt.c

src/gallium/drivers/crocus/crocus_bufmgr.c

src/gallium/drivers/crocus/crocus_bufmgr.h

src/gallium/drivers/crocus/crocus_clear.c

src/gallium/drivers/crocus/crocus_context.c

src/gallium/drivers/crocus/crocus_context.h

src/gallium/drivers/crocus/crocus_defines.h

src/gallium/drivers/crocus/crocus_disk_cache.c

src/gallium/drivers/crocus/crocus_draw.c

src/gallium/drivers/crocus/crocus_fence.c

src/gallium/drivers/crocus/crocus_fence.h

src/gallium/drivers/crocus/crocus_fine_fence.c

src/gallium/drivers/crocus/crocus_fine_fence.h

src/gallium/drivers/crocus/crocus_formats.c

src/gallium/drivers/crocus/crocus_genx_macros.h

src/gallium/drivers/crocus/crocus_genx_protos.h

src/gallium/drivers/crocus/crocus_monitor.c

src/gallium/drivers/crocus/crocus_monitor.h

src/gallium/drivers/crocus/crocus_pipe.h

src/gallium/drivers/crocus/crocus_pipe_control.c

src/gallium/drivers/crocus/crocus_program.c

src/gallium/drivers/crocus/crocus_program_cache.c

src/gallium/drivers/crocus/crocus_query.c

src/gallium/drivers/crocus/crocus_resolve.c

src/gallium/drivers/crocus/crocus_resource.c

src/gallium/drivers/crocus/crocus_resource.h

src/gallium/drivers/crocus/crocus_screen.c

src/gallium/drivers/crocus/crocus_screen.h

src/gallium/drivers/crocus/crocus_state.c

src/gallium/drivers/crocus/crocus_todo.txt

src/gallium/drivers/crocus/driinfo_crocus.h

src/gallium/drivers/crocus/gen4_blorp_exec.h

src/gallium/drivers/crocus/meson.build

src/gallium/drivers/d3d12

src/gallium/drivers/d3d12/ci

src/gallium/drivers/d3d12/ci/d3d12-quick_gl.txt

src/gallium/drivers/d3d12/ci/d3d12-quick_shader.txt

src/gallium/drivers/d3d12/ci/gitlab-ci.yml

src/gallium/drivers/d3d12/d3d12_batch.cpp

src/gallium/drivers/d3d12/d3d12_batch.h

src/gallium/drivers/d3d12/d3d12_blit.cpp

src/gallium/drivers/d3d12/d3d12_blit.h

src/gallium/drivers/d3d12/d3d12_bufmgr.cpp

src/gallium/drivers/d3d12/d3d12_bufmgr.h

src/gallium/drivers/d3d12/d3d12_cmd_signature.cpp

src/gallium/drivers/d3d12/d3d12_cmd_signature.h

src/gallium/drivers/d3d12/d3d12_compiler.cpp

src/gallium/drivers/d3d12/d3d12_compiler.h

src/gallium/drivers/d3d12/d3d12_compute_transforms.cpp

src/gallium/drivers/d3d12/d3d12_compute_transforms.h

src/gallium/drivers/d3d12/d3d12_context.cpp

src/gallium/drivers/d3d12/d3d12_context.h

src/gallium/drivers/d3d12/d3d12_debug.h

src/gallium/drivers/d3d12/d3d12_descriptor_pool.cpp

src/gallium/drivers/d3d12/d3d12_descriptor_pool.h

src/gallium/drivers/d3d12/d3d12_draw.cpp

src/gallium/drivers/d3d12/d3d12_dxcore_screen.cpp

src/gallium/drivers/d3d12/d3d12_dxgi_screen.cpp

src/gallium/drivers/d3d12/d3d12_fence.cpp

src/gallium/drivers/d3d12/d3d12_fence.h

src/gallium/drivers/d3d12/d3d12_format.c

src/gallium/drivers/d3d12/d3d12_format.h

src/gallium/drivers/d3d12/d3d12_gs_variant.cpp

src/gallium/drivers/d3d12/d3d12_lower_image_casts.c

src/gallium/drivers/d3d12/d3d12_lower_int_cubemap_to_array.c

src/gallium/drivers/d3d12/d3d12_lower_point_sprite.c

src/gallium/drivers/d3d12/d3d12_nir_lower_vs_vertex_conversion.c

src/gallium/drivers/d3d12/d3d12_nir_passes.c

src/gallium/drivers/d3d12/d3d12_nir_passes.h

src/gallium/drivers/d3d12/d3d12_pipeline_state.cpp

src/gallium/drivers/d3d12/d3d12_pipeline_state.h

src/gallium/drivers/d3d12/d3d12_public.h

src/gallium/drivers/d3d12/d3d12_query.cpp

src/gallium/drivers/d3d12/d3d12_query.h

src/gallium/drivers/d3d12/d3d12_residency.cpp

src/gallium/drivers/d3d12/d3d12_residency.h

src/gallium/drivers/d3d12/d3d12_resource.cpp

src/gallium/drivers/d3d12/d3d12_resource.h

src/gallium/drivers/d3d12/d3d12_root_signature.cpp

src/gallium/drivers/d3d12/d3d12_root_signature.h

src/gallium/drivers/d3d12/d3d12_screen.cpp

src/gallium/drivers/d3d12/d3d12_screen.h

src/gallium/drivers/d3d12/d3d12_surface.cpp

src/gallium/drivers/d3d12/d3d12_surface.h

src/gallium/drivers/d3d12/d3d12_tcs_variant.cpp

src/gallium/drivers/d3d12/meson.build

src/gallium/drivers/etnaviv

src/gallium/drivers/etnaviv/README

src/gallium/drivers/etnaviv/etnaviv_asm.c

src/gallium/drivers/etnaviv/etnaviv_asm.h

src/gallium/drivers/etnaviv/etnaviv_blend.c

src/gallium/drivers/etnaviv/etnaviv_blend.h

src/gallium/drivers/etnaviv/etnaviv_blt.c

src/gallium/drivers/etnaviv/etnaviv_blt.h

src/gallium/drivers/etnaviv/etnaviv_clear_blit.c

src/gallium/drivers/etnaviv/etnaviv_clear_blit.h

src/gallium/drivers/etnaviv/etnaviv_compiler.c

src/gallium/drivers/etnaviv/etnaviv_compiler.h

src/gallium/drivers/etnaviv/etnaviv_compiler_cmdline.c

src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c

src/gallium/drivers/etnaviv/etnaviv_compiler_nir.h

src/gallium/drivers/etnaviv/etnaviv_compiler_nir_emit.c

src/gallium/drivers/etnaviv/etnaviv_compiler_nir_liveness.c

src/gallium/drivers/etnaviv/etnaviv_compiler_nir_ra.c

src/gallium/drivers/etnaviv/etnaviv_context.c

src/gallium/drivers/etnaviv/etnaviv_context.h

src/gallium/drivers/etnaviv/etnaviv_debug.h

src/gallium/drivers/etnaviv/etnaviv_disasm.c

src/gallium/drivers/etnaviv/etnaviv_disasm.h

src/gallium/drivers/etnaviv/etnaviv_disk_cache.c

src/gallium/drivers/etnaviv/etnaviv_disk_cache.h

src/gallium/drivers/etnaviv/etnaviv_emit.c

src/gallium/drivers/etnaviv/etnaviv_emit.h

src/gallium/drivers/etnaviv/etnaviv_etc2.c

src/gallium/drivers/etnaviv/etnaviv_etc2.h

src/gallium/drivers/etnaviv/etnaviv_fence.c

src/gallium/drivers/etnaviv/etnaviv_fence.h

src/gallium/drivers/etnaviv/etnaviv_format.c

src/gallium/drivers/etnaviv/etnaviv_format.h

src/gallium/drivers/etnaviv/etnaviv_internal.h

src/gallium/drivers/etnaviv/etnaviv_nir.c

src/gallium/drivers/etnaviv/etnaviv_nir.h

src/gallium/drivers/etnaviv/etnaviv_nir_lower_ubo_to_uniform.c

src/gallium/drivers/etnaviv/etnaviv_perfmon.c

src/gallium/drivers/etnaviv/etnaviv_perfmon.h

src/gallium/drivers/etnaviv/etnaviv_query.c

src/gallium/drivers/etnaviv/etnaviv_query.h

src/gallium/drivers/etnaviv/etnaviv_query_acc.c

src/gallium/drivers/etnaviv/etnaviv_query_acc.h

src/gallium/drivers/etnaviv/etnaviv_query_acc_occlusion.c

src/gallium/drivers/etnaviv/etnaviv_query_acc_perfmon.c

src/gallium/drivers/etnaviv/etnaviv_query_sw.c

src/gallium/drivers/etnaviv/etnaviv_query_sw.h

src/gallium/drivers/etnaviv/etnaviv_rasterizer.c

src/gallium/drivers/etnaviv/etnaviv_rasterizer.h

src/gallium/drivers/etnaviv/etnaviv_resource.c

src/gallium/drivers/etnaviv/etnaviv_resource.h

src/gallium/drivers/etnaviv/etnaviv_rs.c

src/gallium/drivers/etnaviv/etnaviv_rs.h

src/gallium/drivers/etnaviv/etnaviv_screen.c

src/gallium/drivers/etnaviv/etnaviv_screen.h

src/gallium/drivers/etnaviv/etnaviv_shader.c

src/gallium/drivers/etnaviv/etnaviv_shader.h

src/gallium/drivers/etnaviv/etnaviv_state.c

src/gallium/drivers/etnaviv/etnaviv_state.h

src/gallium/drivers/etnaviv/etnaviv_surface.c

src/gallium/drivers/etnaviv/etnaviv_surface.h

src/gallium/drivers/etnaviv/etnaviv_texture.c

src/gallium/drivers/etnaviv/etnaviv_texture.h

src/gallium/drivers/etnaviv/etnaviv_texture_desc.c

src/gallium/drivers/etnaviv/etnaviv_texture_desc.h

src/gallium/drivers/etnaviv/etnaviv_texture_state.c

src/gallium/drivers/etnaviv/etnaviv_texture_state.h

src/gallium/drivers/etnaviv/etnaviv_tiling.c

src/gallium/drivers/etnaviv/etnaviv_tiling.h

src/gallium/drivers/etnaviv/etnaviv_transfer.c

src/gallium/drivers/etnaviv/etnaviv_transfer.h

src/gallium/drivers/etnaviv/etnaviv_translate.h

src/gallium/drivers/etnaviv/etnaviv_uniforms.c

src/gallium/drivers/etnaviv/etnaviv_uniforms.h

src/gallium/drivers/etnaviv/etnaviv_util.h

src/gallium/drivers/etnaviv/etnaviv_zsa.c

src/gallium/drivers/etnaviv/etnaviv_zsa.h

src/gallium/drivers/etnaviv/hw

src/gallium/drivers/etnaviv/hw/cmdstream.xml.h

src/gallium/drivers/etnaviv/hw/common.xml.h

src/gallium/drivers/etnaviv/hw/common_3d.xml.h

src/gallium/drivers/etnaviv/hw/isa.xml.h

src/gallium/drivers/etnaviv/hw/state.xml.h

src/gallium/drivers/etnaviv/hw/state_3d.xml.h

src/gallium/drivers/etnaviv/hw/state_blt.xml.h

src/gallium/drivers/etnaviv/hw/texdesc_3d.xml.h

src/gallium/drivers/etnaviv/meson.build

src/gallium/drivers/etnaviv/tests

src/gallium/drivers/etnaviv/tests/lower_ubo_tests.cpp

src/gallium/drivers/freedreno

src/gallium/drivers/freedreno/.clang-format

src/gallium/drivers/freedreno/.dir-locals.el

src/gallium/drivers/freedreno/.editorconfig

src/gallium/drivers/freedreno/a2xx

src/gallium/drivers/freedreno/a2xx/fd2_blend.c

src/gallium/drivers/freedreno/a2xx/fd2_blend.h

src/gallium/drivers/freedreno/a2xx/fd2_context.c

src/gallium/drivers/freedreno/a2xx/fd2_context.h

src/gallium/drivers/freedreno/a2xx/fd2_draw.c

src/gallium/drivers/freedreno/a2xx/fd2_draw.h

src/gallium/drivers/freedreno/a2xx/fd2_emit.c

src/gallium/drivers/freedreno/a2xx/fd2_emit.h

src/gallium/drivers/freedreno/a2xx/fd2_gmem.c

src/gallium/drivers/freedreno/a2xx/fd2_gmem.h

src/gallium/drivers/freedreno/a2xx/fd2_program.c

src/gallium/drivers/freedreno/a2xx/fd2_program.h

src/gallium/drivers/freedreno/a2xx/fd2_query.c

src/gallium/drivers/freedreno/a2xx/fd2_query.h

src/gallium/drivers/freedreno/a2xx/fd2_rasterizer.c

src/gallium/drivers/freedreno/a2xx/fd2_rasterizer.h

src/gallium/drivers/freedreno/a2xx/fd2_resource.c

src/gallium/drivers/freedreno/a2xx/fd2_resource.h

src/gallium/drivers/freedreno/a2xx/fd2_screen.c

src/gallium/drivers/freedreno/a2xx/fd2_screen.h

src/gallium/drivers/freedreno/a2xx/fd2_texture.c

src/gallium/drivers/freedreno/a2xx/fd2_texture.h

src/gallium/drivers/freedreno/a2xx/fd2_util.c

src/gallium/drivers/freedreno/a2xx/fd2_util.h

src/gallium/drivers/freedreno/a2xx/fd2_zsa.c

src/gallium/drivers/freedreno/a2xx/fd2_zsa.h

src/gallium/drivers/freedreno/a2xx/ir2.c

src/gallium/drivers/freedreno/a2xx/ir2.h

src/gallium/drivers/freedreno/a2xx/ir2_assemble.c

src/gallium/drivers/freedreno/a2xx/ir2_cp.c

src/gallium/drivers/freedreno/a2xx/ir2_nir.c

src/gallium/drivers/freedreno/a2xx/ir2_private.h

src/gallium/drivers/freedreno/a2xx/ir2_ra.c

src/gallium/drivers/freedreno/a3xx

src/gallium/drivers/freedreno/a3xx/fd3_blend.c

src/gallium/drivers/freedreno/a3xx/fd3_blend.h

src/gallium/drivers/freedreno/a3xx/fd3_context.c

src/gallium/drivers/freedreno/a3xx/fd3_context.h

src/gallium/drivers/freedreno/a3xx/fd3_draw.c

src/gallium/drivers/freedreno/a3xx/fd3_draw.h

src/gallium/drivers/freedreno/a3xx/fd3_emit.c

src/gallium/drivers/freedreno/a3xx/fd3_emit.h

src/gallium/drivers/freedreno/a3xx/fd3_format.c

src/gallium/drivers/freedreno/a3xx/fd3_format.h

src/gallium/drivers/freedreno/a3xx/fd3_gmem.c

src/gallium/drivers/freedreno/a3xx/fd3_gmem.h

src/gallium/drivers/freedreno/a3xx/fd3_program.c

src/gallium/drivers/freedreno/a3xx/fd3_program.h

src/gallium/drivers/freedreno/a3xx/fd3_query.c

src/gallium/drivers/freedreno/a3xx/fd3_query.h

src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c

src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.h

src/gallium/drivers/freedreno/a3xx/fd3_resource.c

src/gallium/drivers/freedreno/a3xx/fd3_resource.h

src/gallium/drivers/freedreno/a3xx/fd3_screen.c

src/gallium/drivers/freedreno/a3xx/fd3_screen.h

src/gallium/drivers/freedreno/a3xx/fd3_texture.c

src/gallium/drivers/freedreno/a3xx/fd3_texture.h

src/gallium/drivers/freedreno/a3xx/fd3_zsa.c

src/gallium/drivers/freedreno/a3xx/fd3_zsa.h

src/gallium/drivers/freedreno/a4xx

src/gallium/drivers/freedreno/a4xx/fd4_blend.c

src/gallium/drivers/freedreno/a4xx/fd4_blend.h

src/gallium/drivers/freedreno/a4xx/fd4_compute.c

src/gallium/drivers/freedreno/a4xx/fd4_compute.h

src/gallium/drivers/freedreno/a4xx/fd4_context.c

src/gallium/drivers/freedreno/a4xx/fd4_context.h

src/gallium/drivers/freedreno/a4xx/fd4_draw.c

src/gallium/drivers/freedreno/a4xx/fd4_draw.h

src/gallium/drivers/freedreno/a4xx/fd4_emit.c

src/gallium/drivers/freedreno/a4xx/fd4_emit.h

src/gallium/drivers/freedreno/a4xx/fd4_format.c

src/gallium/drivers/freedreno/a4xx/fd4_format.h

src/gallium/drivers/freedreno/a4xx/fd4_gmem.c

src/gallium/drivers/freedreno/a4xx/fd4_gmem.h

src/gallium/drivers/freedreno/a4xx/fd4_image.c

src/gallium/drivers/freedreno/a4xx/fd4_image.h

src/gallium/drivers/freedreno/a4xx/fd4_program.c

src/gallium/drivers/freedreno/a4xx/fd4_program.h

src/gallium/drivers/freedreno/a4xx/fd4_query.c

src/gallium/drivers/freedreno/a4xx/fd4_query.h

src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c

src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h

src/gallium/drivers/freedreno/a4xx/fd4_resource.c

src/gallium/drivers/freedreno/a4xx/fd4_resource.h

src/gallium/drivers/freedreno/a4xx/fd4_screen.c

src/gallium/drivers/freedreno/a4xx/fd4_screen.h

src/gallium/drivers/freedreno/a4xx/fd4_texture.c

src/gallium/drivers/freedreno/a4xx/fd4_texture.h

src/gallium/drivers/freedreno/a4xx/fd4_zsa.c

src/gallium/drivers/freedreno/a4xx/fd4_zsa.h

src/gallium/drivers/freedreno/a5xx

src/gallium/drivers/freedreno/a5xx/fd5_blend.c

src/gallium/drivers/freedreno/a5xx/fd5_blend.h

src/gallium/drivers/freedreno/a5xx/fd5_blitter.c

src/gallium/drivers/freedreno/a5xx/fd5_blitter.h

src/gallium/drivers/freedreno/a5xx/fd5_compute.c

src/gallium/drivers/freedreno/a5xx/fd5_compute.h

src/gallium/drivers/freedreno/a5xx/fd5_context.c

src/gallium/drivers/freedreno/a5xx/fd5_context.h

src/gallium/drivers/freedreno/a5xx/fd5_draw.c

src/gallium/drivers/freedreno/a5xx/fd5_draw.h

src/gallium/drivers/freedreno/a5xx/fd5_emit.c

src/gallium/drivers/freedreno/a5xx/fd5_emit.h

src/gallium/drivers/freedreno/a5xx/fd5_format.c

src/gallium/drivers/freedreno/a5xx/fd5_format.h

src/gallium/drivers/freedreno/a5xx/fd5_gmem.c

src/gallium/drivers/freedreno/a5xx/fd5_gmem.h

src/gallium/drivers/freedreno/a5xx/fd5_image.c

src/gallium/drivers/freedreno/a5xx/fd5_image.h

src/gallium/drivers/freedreno/a5xx/fd5_program.c

src/gallium/drivers/freedreno/a5xx/fd5_program.h

src/gallium/drivers/freedreno/a5xx/fd5_query.c

src/gallium/drivers/freedreno/a5xx/fd5_query.h

src/gallium/drivers/freedreno/a5xx/fd5_rasterizer.c

src/gallium/drivers/freedreno/a5xx/fd5_rasterizer.h

src/gallium/drivers/freedreno/a5xx/fd5_resource.c

src/gallium/drivers/freedreno/a5xx/fd5_resource.h

src/gallium/drivers/freedreno/a5xx/fd5_screen.c

src/gallium/drivers/freedreno/a5xx/fd5_screen.h

src/gallium/drivers/freedreno/a5xx/fd5_texture.c

src/gallium/drivers/freedreno/a5xx/fd5_texture.h

src/gallium/drivers/freedreno/a5xx/fd5_zsa.c

src/gallium/drivers/freedreno/a5xx/fd5_zsa.h

src/gallium/drivers/freedreno/a6xx

src/gallium/drivers/freedreno/a6xx/fd6_blend.c

src/gallium/drivers/freedreno/a6xx/fd6_blend.h

src/gallium/drivers/freedreno/a6xx/fd6_blitter.c

src/gallium/drivers/freedreno/a6xx/fd6_blitter.h

src/gallium/drivers/freedreno/a6xx/fd6_compute.c

src/gallium/drivers/freedreno/a6xx/fd6_compute.h

src/gallium/drivers/freedreno/a6xx/fd6_const.c

src/gallium/drivers/freedreno/a6xx/fd6_const.h

src/gallium/drivers/freedreno/a6xx/fd6_context.c

src/gallium/drivers/freedreno/a6xx/fd6_context.h

src/gallium/drivers/freedreno/a6xx/fd6_draw.c

src/gallium/drivers/freedreno/a6xx/fd6_draw.h

src/gallium/drivers/freedreno/a6xx/fd6_emit.c

src/gallium/drivers/freedreno/a6xx/fd6_emit.h

src/gallium/drivers/freedreno/a6xx/fd6_format.c

src/gallium/drivers/freedreno/a6xx/fd6_format.h

src/gallium/drivers/freedreno/a6xx/fd6_gmem.c

src/gallium/drivers/freedreno/a6xx/fd6_gmem.h

src/gallium/drivers/freedreno/a6xx/fd6_image.c

src/gallium/drivers/freedreno/a6xx/fd6_image.h

src/gallium/drivers/freedreno/a6xx/fd6_pack.h

src/gallium/drivers/freedreno/a6xx/fd6_program.c

src/gallium/drivers/freedreno/a6xx/fd6_program.h

src/gallium/drivers/freedreno/a6xx/fd6_query.c

src/gallium/drivers/freedreno/a6xx/fd6_query.h

src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.c

src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.h

src/gallium/drivers/freedreno/a6xx/fd6_resource.c

src/gallium/drivers/freedreno/a6xx/fd6_resource.h

src/gallium/drivers/freedreno/a6xx/fd6_screen.c

src/gallium/drivers/freedreno/a6xx/fd6_screen.h

src/gallium/drivers/freedreno/a6xx/fd6_texture.c

src/gallium/drivers/freedreno/a6xx/fd6_texture.h

src/gallium/drivers/freedreno/a6xx/fd6_vsc.c

src/gallium/drivers/freedreno/a6xx/fd6_vsc.h

src/gallium/drivers/freedreno/a6xx/fd6_zsa.c

src/gallium/drivers/freedreno/a6xx/fd6_zsa.h

src/gallium/drivers/freedreno/freedreno_autotune.c

src/gallium/drivers/freedreno/freedreno_autotune.h

src/gallium/drivers/freedreno/freedreno_batch.c

src/gallium/drivers/freedreno/freedreno_batch.h

src/gallium/drivers/freedreno/freedreno_batch_cache.c

src/gallium/drivers/freedreno/freedreno_batch_cache.h

src/gallium/drivers/freedreno/freedreno_blitter.c

src/gallium/drivers/freedreno/freedreno_blitter.h

src/gallium/drivers/freedreno/freedreno_context.c

src/gallium/drivers/freedreno/freedreno_context.h

src/gallium/drivers/freedreno/freedreno_draw.c

src/gallium/drivers/freedreno/freedreno_draw.h

src/gallium/drivers/freedreno/freedreno_fence.c

src/gallium/drivers/freedreno/freedreno_fence.h

src/gallium/drivers/freedreno/freedreno_gmem.c

src/gallium/drivers/freedreno/freedreno_gmem.h

src/gallium/drivers/freedreno/freedreno_perfetto.cc

src/gallium/drivers/freedreno/freedreno_perfetto.h

src/gallium/drivers/freedreno/freedreno_program.c

src/gallium/drivers/freedreno/freedreno_program.h

src/gallium/drivers/freedreno/freedreno_query.c

src/gallium/drivers/freedreno/freedreno_query.h

src/gallium/drivers/freedreno/freedreno_query_acc.c

src/gallium/drivers/freedreno/freedreno_query_acc.h

src/gallium/drivers/freedreno/freedreno_query_hw.c

src/gallium/drivers/freedreno/freedreno_query_hw.h

src/gallium/drivers/freedreno/freedreno_query_sw.c

src/gallium/drivers/freedreno/freedreno_query_sw.h

src/gallium/drivers/freedreno/freedreno_resource.c

src/gallium/drivers/freedreno/freedreno_resource.h

src/gallium/drivers/freedreno/freedreno_screen.c

src/gallium/drivers/freedreno/freedreno_screen.h

src/gallium/drivers/freedreno/freedreno_state.c

src/gallium/drivers/freedreno/freedreno_state.h

src/gallium/drivers/freedreno/freedreno_surface.c

src/gallium/drivers/freedreno/freedreno_surface.h

src/gallium/drivers/freedreno/freedreno_texture.c

src/gallium/drivers/freedreno/freedreno_texture.h

src/gallium/drivers/freedreno/freedreno_tracepoints.py

src/gallium/drivers/freedreno/freedreno_util.c

src/gallium/drivers/freedreno/freedreno_util.h

src/gallium/drivers/freedreno/gmemtool.c

src/gallium/drivers/freedreno/ir3

src/gallium/drivers/freedreno/ir3/ir3_cache.c

src/gallium/drivers/freedreno/ir3/ir3_cache.h

src/gallium/drivers/freedreno/ir3/ir3_cmdline.c

src/gallium/drivers/freedreno/ir3/ir3_const.h

src/gallium/drivers/freedreno/ir3/ir3_gallium.c

src/gallium/drivers/freedreno/ir3/ir3_gallium.h

src/gallium/drivers/freedreno/meson.build

src/gallium/drivers/freedreno/trace-parser.py

src/gallium/drivers/i915

src/gallium/drivers/i915/.clang-format

src/gallium/drivers/i915/TODO

src/gallium/drivers/i915/ci

src/gallium/drivers/i915/ci/deqp-i915g.toml

src/gallium/drivers/i915/ci/gitlab-ci.yml

src/gallium/drivers/i915/ci/i915-g33-fails.txt

src/gallium/drivers/i915/ci/i915-g33-flakes.txt

src/gallium/drivers/i915/ci/i915-g33-skips.txt

src/gallium/drivers/i915/ci/traces-i915.yml

src/gallium/drivers/i915/i915_batch.h

src/gallium/drivers/i915/i915_batchbuffer.h

src/gallium/drivers/i915/i915_blit.c

src/gallium/drivers/i915/i915_blit.h

src/gallium/drivers/i915/i915_clear.c

src/gallium/drivers/i915/i915_context.c

src/gallium/drivers/i915/i915_context.h

src/gallium/drivers/i915/i915_debug.c

src/gallium/drivers/i915/i915_debug.h

src/gallium/drivers/i915/i915_debug_fp.c

src/gallium/drivers/i915/i915_debug_private.h

src/gallium/drivers/i915/i915_flush.c

src/gallium/drivers/i915/i915_fpc.h

src/gallium/drivers/i915/i915_fpc_emit.c

src/gallium/drivers/i915/i915_fpc_optimize.c

src/gallium/drivers/i915/i915_fpc_translate.c

src/gallium/drivers/i915/i915_prim_emit.c

src/gallium/drivers/i915/i915_prim_vbuf.c

src/gallium/drivers/i915/i915_public.h

src/gallium/drivers/i915/i915_query.c

src/gallium/drivers/i915/i915_query.h

src/gallium/drivers/i915/i915_reg.h

src/gallium/drivers/i915/i915_resource.c

src/gallium/drivers/i915/i915_resource.h

src/gallium/drivers/i915/i915_resource_buffer.c

src/gallium/drivers/i915/i915_resource_texture.c

src/gallium/drivers/i915/i915_screen.c

src/gallium/drivers/i915/i915_screen.h

src/gallium/drivers/i915/i915_state.c

src/gallium/drivers/i915/i915_state.h

src/gallium/drivers/i915/i915_state_derived.c

src/gallium/drivers/i915/i915_state_dynamic.c

src/gallium/drivers/i915/i915_state_emit.c

src/gallium/drivers/i915/i915_state_fpc.c

src/gallium/drivers/i915/i915_state_immediate.c

src/gallium/drivers/i915/i915_state_inlines.h

src/gallium/drivers/i915/i915_state_sampler.c

src/gallium/drivers/i915/i915_state_static.c

src/gallium/drivers/i915/i915_surface.c

src/gallium/drivers/i915/i915_surface.h

src/gallium/drivers/i915/i915_winsys.h

src/gallium/drivers/i915/meson.build

src/gallium/drivers/iris

src/gallium/drivers/iris/ci

src/gallium/drivers/iris/ci/deqp-iris-amly.toml

src/gallium/drivers/iris/ci/deqp-iris-apl.toml

src/gallium/drivers/iris/ci/deqp-iris-cml.toml

src/gallium/drivers/iris/ci/deqp-iris-glk.toml

src/gallium/drivers/iris/ci/deqp-iris-kbl.toml

src/gallium/drivers/iris/ci/deqp-iris-whl.toml

src/gallium/drivers/iris/ci/gitlab-ci.yml

src/gallium/drivers/iris/ci/iris-amly-fails.txt

src/gallium/drivers/iris/ci/iris-amly-flakes.txt

src/gallium/drivers/iris/ci/iris-amly-skips.txt

src/gallium/drivers/iris/ci/iris-apl-fails.txt

src/gallium/drivers/iris/ci/iris-apl-flakes.txt

src/gallium/drivers/iris/ci/iris-cml-fails.txt

src/gallium/drivers/iris/ci/iris-cml-flakes.txt

src/gallium/drivers/iris/ci/iris-glk-fails.txt

src/gallium/drivers/iris/ci/iris-glk-flakes.txt

src/gallium/drivers/iris/ci/iris-kbl-fails.txt

src/gallium/drivers/iris/ci/iris-kbl-flakes.txt

src/gallium/drivers/iris/ci/iris-kbl-skips.txt

src/gallium/drivers/iris/ci/iris-skips.txt

src/gallium/drivers/iris/ci/iris-whl-fails.txt

src/gallium/drivers/iris/ci/iris-whl-flakes.txt

src/gallium/drivers/iris/ci/traces-iris.yml

src/gallium/drivers/iris/driinfo_iris.h

src/gallium/drivers/iris/iris_batch.c

src/gallium/drivers/iris/iris_batch.h

src/gallium/drivers/iris/iris_binder.c

src/gallium/drivers/iris/iris_binder.h

src/gallium/drivers/iris/iris_blit.c

src/gallium/drivers/iris/iris_blorp.c

src/gallium/drivers/iris/iris_border_color.c

src/gallium/drivers/iris/iris_bufmgr.c

src/gallium/drivers/iris/iris_bufmgr.h

src/gallium/drivers/iris/iris_clear.c

src/gallium/drivers/iris/iris_context.c

src/gallium/drivers/iris/iris_context.h

src/gallium/drivers/iris/iris_defines.h

src/gallium/drivers/iris/iris_disk_cache.c

src/gallium/drivers/iris/iris_draw.c

src/gallium/drivers/iris/iris_fence.c

src/gallium/drivers/iris/iris_fence.h

src/gallium/drivers/iris/iris_fine_fence.c

src/gallium/drivers/iris/iris_fine_fence.h

src/gallium/drivers/iris/iris_formats.c

src/gallium/drivers/iris/iris_genx_macros.h

src/gallium/drivers/iris/iris_genx_protos.h

src/gallium/drivers/iris/iris_measure.c

src/gallium/drivers/iris/iris_measure.h

src/gallium/drivers/iris/iris_monitor.c

src/gallium/drivers/iris/iris_monitor.h

src/gallium/drivers/iris/iris_perf.c

src/gallium/drivers/iris/iris_perf.h

src/gallium/drivers/iris/iris_performance_query.c

src/gallium/drivers/iris/iris_pipe.h

src/gallium/drivers/iris/iris_pipe_control.c

src/gallium/drivers/iris/iris_program.c

src/gallium/drivers/iris/iris_program_cache.c

src/gallium/drivers/iris/iris_query.c

src/gallium/drivers/iris/iris_resolve.c

src/gallium/drivers/iris/iris_resource.c

src/gallium/drivers/iris/iris_resource.h

src/gallium/drivers/iris/iris_screen.c

src/gallium/drivers/iris/iris_screen.h

src/gallium/drivers/iris/iris_state.c

src/gallium/drivers/iris/iris_utrace.c

src/gallium/drivers/iris/iris_utrace.h

src/gallium/drivers/iris/meson.build

src/gallium/drivers/lima

src/gallium/drivers/lima/ci

src/gallium/drivers/lima/ci/gitlab-ci.yml

src/gallium/drivers/lima/ci/lima-fails.txt

src/gallium/drivers/lima/ci/lima-skips.txt

src/gallium/drivers/lima/drm-shim

src/gallium/drivers/lima/drm-shim/lima_noop.c

src/gallium/drivers/lima/drm-shim/meson.build

src/gallium/drivers/lima/ir

src/gallium/drivers/lima/ir/gp

src/gallium/drivers/lima/ir/gp/codegen.c

src/gallium/drivers/lima/ir/gp/codegen.h

src/gallium/drivers/lima/ir/gp/disasm.c

src/gallium/drivers/lima/ir/gp/gpir.h

src/gallium/drivers/lima/ir/gp/instr.c

src/gallium/drivers/lima/ir/gp/lower.c

src/gallium/drivers/lima/ir/gp/nir.c

src/gallium/drivers/lima/ir/gp/node.c

src/gallium/drivers/lima/ir/gp/optimize.c

src/gallium/drivers/lima/ir/gp/reduce_scheduler.c

src/gallium/drivers/lima/ir/gp/regalloc.c

src/gallium/drivers/lima/ir/gp/scheduler.c

src/gallium/drivers/lima/ir/lima_ir.h

src/gallium/drivers/lima/ir/lima_nir_algebraic.py

src/gallium/drivers/lima/ir/lima_nir_duplicate_consts.c

src/gallium/drivers/lima/ir/lima_nir_duplicate_intrinsic.c

src/gallium/drivers/lima/ir/lima_nir_lower_txp.c

src/gallium/drivers/lima/ir/lima_nir_lower_uniform_to_scalar.c

src/gallium/drivers/lima/ir/lima_nir_split_load_input.c

src/gallium/drivers/lima/ir/lima_nir_split_loads.c

src/gallium/drivers/lima/ir/pp

src/gallium/drivers/lima/ir/pp/codegen.c

src/gallium/drivers/lima/ir/pp/codegen.h

src/gallium/drivers/lima/ir/pp/disasm.c

src/gallium/drivers/lima/ir/pp/instr.c

src/gallium/drivers/lima/ir/pp/liveness.c

src/gallium/drivers/lima/ir/pp/lower.c

src/gallium/drivers/lima/ir/pp/nir.c

src/gallium/drivers/lima/ir/pp/node.c

src/gallium/drivers/lima/ir/pp/node_to_instr.c

src/gallium/drivers/lima/ir/pp/ppir.h

src/gallium/drivers/lima/ir/pp/regalloc.c

src/gallium/drivers/lima/ir/pp/scheduler.c

src/gallium/drivers/lima/lima_bo.c

src/gallium/drivers/lima/lima_bo.h

src/gallium/drivers/lima/lima_context.c

src/gallium/drivers/lima/lima_context.h

src/gallium/drivers/lima/lima_disk_cache.c

src/gallium/drivers/lima/lima_disk_cache.h

src/gallium/drivers/lima/lima_draw.c

src/gallium/drivers/lima/lima_fence.c

src/gallium/drivers/lima/lima_fence.h

src/gallium/drivers/lima/lima_format.c

src/gallium/drivers/lima/lima_format.h

src/gallium/drivers/lima/lima_gpu.h

src/gallium/drivers/lima/lima_job.c

src/gallium/drivers/lima/lima_job.h

src/gallium/drivers/lima/lima_parser.c

src/gallium/drivers/lima/lima_parser.h

src/gallium/drivers/lima/lima_program.c

src/gallium/drivers/lima/lima_program.h

src/gallium/drivers/lima/lima_query.c

src/gallium/drivers/lima/lima_resource.c

src/gallium/drivers/lima/lima_resource.h

src/gallium/drivers/lima/lima_screen.c

src/gallium/drivers/lima/lima_screen.h

src/gallium/drivers/lima/lima_state.c

src/gallium/drivers/lima/lima_texture.c

src/gallium/drivers/lima/lima_texture.h

src/gallium/drivers/lima/lima_util.c

src/gallium/drivers/lima/lima_util.h

src/gallium/drivers/lima/meson.build

src/gallium/drivers/lima/standalone

src/gallium/drivers/lima/standalone/glsl.cpp

src/gallium/drivers/lima/standalone/glsl.h

src/gallium/drivers/lima/standalone/lima_compiler_cmdline.c

src/gallium/drivers/lima/standalone/lima_disasm.c

src/gallium/drivers/llvmpipe

src/gallium/drivers/llvmpipe/ci

src/gallium/drivers/llvmpipe/ci/deqp-llvmpipe-asan.toml

src/gallium/drivers/llvmpipe/ci/deqp-llvmpipe.toml

src/gallium/drivers/llvmpipe/ci/gitlab-ci.yml

src/gallium/drivers/llvmpipe/ci/llvmpipe-cl.txt

src/gallium/drivers/llvmpipe/ci/llvmpipe-fails.txt

src/gallium/drivers/llvmpipe/ci/llvmpipe-flakes.txt

src/gallium/drivers/llvmpipe/ci/llvmpipe-skips.txt

src/gallium/drivers/llvmpipe/ci/traces-llvmpipe.yml

src/gallium/drivers/llvmpipe/lp_bld_alpha.c

src/gallium/drivers/llvmpipe/lp_bld_alpha.h

src/gallium/drivers/llvmpipe/lp_bld_blend.c

src/gallium/drivers/llvmpipe/lp_bld_blend.h

src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c

src/gallium/drivers/llvmpipe/lp_bld_blend_logicop.c

src/gallium/drivers/llvmpipe/lp_bld_depth.c

src/gallium/drivers/llvmpipe/lp_bld_depth.h

src/gallium/drivers/llvmpipe/lp_bld_interp.c

src/gallium/drivers/llvmpipe/lp_bld_interp.h

src/gallium/drivers/llvmpipe/lp_clear.c

src/gallium/drivers/llvmpipe/lp_clear.h

src/gallium/drivers/llvmpipe/lp_context.c

src/gallium/drivers/llvmpipe/lp_context.h

src/gallium/drivers/llvmpipe/lp_cs_tpool.c

src/gallium/drivers/llvmpipe/lp_cs_tpool.h

src/gallium/drivers/llvmpipe/lp_debug.h

src/gallium/drivers/llvmpipe/lp_draw_arrays.c

src/gallium/drivers/llvmpipe/lp_fence.c

src/gallium/drivers/llvmpipe/lp_fence.h

src/gallium/drivers/llvmpipe/lp_flush.c

src/gallium/drivers/llvmpipe/lp_flush.h

src/gallium/drivers/llvmpipe/lp_jit.c

src/gallium/drivers/llvmpipe/lp_jit.h

src/gallium/drivers/llvmpipe/lp_limits.h

src/gallium/drivers/llvmpipe/lp_linear.c

src/gallium/drivers/llvmpipe/lp_linear_fastpath.c

src/gallium/drivers/llvmpipe/lp_linear_interp.c

src/gallium/drivers/llvmpipe/lp_linear_priv.h

src/gallium/drivers/llvmpipe/lp_linear_sampler.c

src/gallium/drivers/llvmpipe/lp_memory.c

src/gallium/drivers/llvmpipe/lp_memory.h

src/gallium/drivers/llvmpipe/lp_perf.c

src/gallium/drivers/llvmpipe/lp_perf.h

src/gallium/drivers/llvmpipe/lp_public.h

src/gallium/drivers/llvmpipe/lp_query.c

src/gallium/drivers/llvmpipe/lp_query.h

src/gallium/drivers/llvmpipe/lp_rast.c

src/gallium/drivers/llvmpipe/lp_rast.h

src/gallium/drivers/llvmpipe/lp_rast_debug.c

src/gallium/drivers/llvmpipe/lp_rast_linear.c

src/gallium/drivers/llvmpipe/lp_rast_linear_fallback.c

src/gallium/drivers/llvmpipe/lp_rast_priv.h

src/gallium/drivers/llvmpipe/lp_rast_rect.c

src/gallium/drivers/llvmpipe/lp_rast_tri.c

src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h

src/gallium/drivers/llvmpipe/lp_scene.c

src/gallium/drivers/llvmpipe/lp_scene.h

src/gallium/drivers/llvmpipe/lp_scene_queue.c

src/gallium/drivers/llvmpipe/lp_scene_queue.h

src/gallium/drivers/llvmpipe/lp_screen.c

src/gallium/drivers/llvmpipe/lp_screen.h

src/gallium/drivers/llvmpipe/lp_setup.c

src/gallium/drivers/llvmpipe/lp_setup.h

src/gallium/drivers/llvmpipe/lp_setup_analysis.c

src/gallium/drivers/llvmpipe/lp_setup_context.h

src/gallium/drivers/llvmpipe/lp_setup_line.c

src/gallium/drivers/llvmpipe/lp_setup_point.c

src/gallium/drivers/llvmpipe/lp_setup_rect.c

src/gallium/drivers/llvmpipe/lp_setup_tri.c

src/gallium/drivers/llvmpipe/lp_setup_vbuf.c

src/gallium/drivers/llvmpipe/lp_state.h

src/gallium/drivers/llvmpipe/lp_state_blend.c

src/gallium/drivers/llvmpipe/lp_state_clip.c

src/gallium/drivers/llvmpipe/lp_state_cs.c

src/gallium/drivers/llvmpipe/lp_state_cs.h

src/gallium/drivers/llvmpipe/lp_state_derived.c

src/gallium/drivers/llvmpipe/lp_state_fs.c

src/gallium/drivers/llvmpipe/lp_state_fs.h

src/gallium/drivers/llvmpipe/lp_state_fs_analysis.c

src/gallium/drivers/llvmpipe/lp_state_fs_fastpath.c

src/gallium/drivers/llvmpipe/lp_state_fs_linear.c

src/gallium/drivers/llvmpipe/lp_state_fs_linear_llvm.c

src/gallium/drivers/llvmpipe/lp_state_gs.c

src/gallium/drivers/llvmpipe/lp_state_rasterizer.c

src/gallium/drivers/llvmpipe/lp_state_sampler.c

src/gallium/drivers/llvmpipe/lp_state_setup.c

src/gallium/drivers/llvmpipe/lp_state_setup.h

src/gallium/drivers/llvmpipe/lp_state_so.c

src/gallium/drivers/llvmpipe/lp_state_surface.c

src/gallium/drivers/llvmpipe/lp_state_tess.c

src/gallium/drivers/llvmpipe/lp_state_vertex.c

src/gallium/drivers/llvmpipe/lp_state_vs.c

src/gallium/drivers/llvmpipe/lp_surface.c

src/gallium/drivers/llvmpipe/lp_surface.h

src/gallium/drivers/llvmpipe/lp_test.h

src/gallium/drivers/llvmpipe/lp_test_arit.c

src/gallium/drivers/llvmpipe/lp_test_blend.c

src/gallium/drivers/llvmpipe/lp_test_conv.c

src/gallium/drivers/llvmpipe/lp_test_format.c

src/gallium/drivers/llvmpipe/lp_test_main.c

src/gallium/drivers/llvmpipe/lp_test_printf.c

src/gallium/drivers/llvmpipe/lp_tex_sample.c

src/gallium/drivers/llvmpipe/lp_tex_sample.h

src/gallium/drivers/llvmpipe/lp_texture.c

src/gallium/drivers/llvmpipe/lp_texture.h

src/gallium/drivers/llvmpipe/meson.build

src/gallium/drivers/nouveau

src/gallium/drivers/nouveau/ci

src/gallium/drivers/nouveau/ci/deqp-nouveau-gm20b.toml

src/gallium/drivers/nouveau/ci/gitlab-ci.yml

src/gallium/drivers/nouveau/ci/nouveau-gf108-fails.txt

src/gallium/drivers/nouveau/ci/nouveau-gf108-skips.txt

src/gallium/drivers/nouveau/ci/nouveau-gm20b-fails.txt

src/gallium/drivers/nouveau/ci/nouveau-gm20b-flakes.txt

src/gallium/drivers/nouveau/ci/nouveau-gm20b-skips.txt

src/gallium/drivers/nouveau/ci/nouveau-gt215-fails.txt

src/gallium/drivers/nouveau/ci/nouveau-gt215-skips.txt

src/gallium/drivers/nouveau/codegen

src/gallium/drivers/nouveau/codegen/lib

src/gallium/drivers/nouveau/codegen/lib/Makefile

src/gallium/drivers/nouveau/codegen/lib/gf100.asm

src/gallium/drivers/nouveau/codegen/lib/gf100.asm.h

src/gallium/drivers/nouveau/codegen/lib/gk104.asm

src/gallium/drivers/nouveau/codegen/lib/gk104.asm.h

src/gallium/drivers/nouveau/codegen/lib/gk110.asm

src/gallium/drivers/nouveau/codegen/lib/gk110.asm.h

src/gallium/drivers/nouveau/codegen/lib/gm107.asm

src/gallium/drivers/nouveau/codegen/lib/gm107.asm.h

src/gallium/drivers/nouveau/codegen/nv50_ir.cpp

src/gallium/drivers/nouveau/codegen/nv50_ir.h

src/gallium/drivers/nouveau/codegen/nv50_ir_bb.cpp

src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp

src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h

src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h

src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp

src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp

src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.cpp

src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.h

src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp

src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp

src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp

src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.h

src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp

src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp

src/gallium/drivers/nouveau/codegen/nv50_ir_graph.cpp

src/gallium/drivers/nouveau/codegen/nv50_ir_graph.h

src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h

src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp

src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.h

src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.cpp

src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.h

src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_helper.cpp

src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_helper.h

src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp

src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp

src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h

src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp

src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp

src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp

src/gallium/drivers/nouveau/codegen/nv50_ir_sched_gm107.h

src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp

src/gallium/drivers/nouveau/codegen/nv50_ir_ssa.cpp

src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp

src/gallium/drivers/nouveau/codegen/nv50_ir_target.h

src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp

src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.h

src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.cpp

src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.h

src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp

src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.h

src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp

src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.h

src/gallium/drivers/nouveau/codegen/nv50_ir_util.cpp

src/gallium/drivers/nouveau/codegen/nv50_ir_util.h

src/gallium/drivers/nouveau/codegen/unordered_set.h

src/gallium/drivers/nouveau/meson.build

src/gallium/drivers/nouveau/nouveau_buffer.c

src/gallium/drivers/nouveau/nouveau_buffer.h

src/gallium/drivers/nouveau/nouveau_compiler.c

src/gallium/drivers/nouveau/nouveau_context.h

src/gallium/drivers/nouveau/nouveau_debug.h

src/gallium/drivers/nouveau/nouveau_fence.c

src/gallium/drivers/nouveau/nouveau_fence.h

src/gallium/drivers/nouveau/nouveau_gldefs.h

src/gallium/drivers/nouveau/nouveau_heap.c

src/gallium/drivers/nouveau/nouveau_heap.h

src/gallium/drivers/nouveau/nouveau_mm.c

src/gallium/drivers/nouveau/nouveau_mm.h

src/gallium/drivers/nouveau/nouveau_screen.c

src/gallium/drivers/nouveau/nouveau_screen.h

src/gallium/drivers/nouveau/nouveau_video.c

src/gallium/drivers/nouveau/nouveau_video.h

src/gallium/drivers/nouveau/nouveau_vp3_video.c

src/gallium/drivers/nouveau/nouveau_vp3_video.h

src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c

src/gallium/drivers/nouveau/nouveau_vp3_video_vp.c

src/gallium/drivers/nouveau/nouveau_winsys.h

src/gallium/drivers/nouveau/nv17_mpeg.xml.h

src/gallium/drivers/nouveau/nv30

src/gallium/drivers/nouveau/nv30/nv01_2d.xml.h

src/gallium/drivers/nouveau/nv30/nv30-40_3d.xml.h

src/gallium/drivers/nouveau/nv30/nv30_clear.c

src/gallium/drivers/nouveau/nv30/nv30_context.c

src/gallium/drivers/nouveau/nv30/nv30_context.h

src/gallium/drivers/nouveau/nv30/nv30_draw.c

src/gallium/drivers/nouveau/nv30/nv30_format.c

src/gallium/drivers/nouveau/nv30/nv30_format.h

src/gallium/drivers/nouveau/nv30/nv30_fragprog.c

src/gallium/drivers/nouveau/nv30/nv30_fragtex.c

src/gallium/drivers/nouveau/nv30/nv30_miptree.c

src/gallium/drivers/nouveau/nv30/nv30_push.c

src/gallium/drivers/nouveau/nv30/nv30_query.c

src/gallium/drivers/nouveau/nv30/nv30_resource.c

src/gallium/drivers/nouveau/nv30/nv30_resource.h

src/gallium/drivers/nouveau/nv30/nv30_screen.c

src/gallium/drivers/nouveau/nv30/nv30_screen.h

src/gallium/drivers/nouveau/nv30/nv30_state.c

src/gallium/drivers/nouveau/nv30/nv30_state.h

src/gallium/drivers/nouveau/nv30/nv30_state_validate.c

src/gallium/drivers/nouveau/nv30/nv30_texture.c

src/gallium/drivers/nouveau/nv30/nv30_transfer.c

src/gallium/drivers/nouveau/nv30/nv30_transfer.h

src/gallium/drivers/nouveau/nv30/nv30_vbo.c

src/gallium/drivers/nouveau/nv30/nv30_vertprog.c

src/gallium/drivers/nouveau/nv30/nv30_vertprog.h

src/gallium/drivers/nouveau/nv30/nv30_winsys.h

src/gallium/drivers/nouveau/nv30/nv40_vertprog.h

src/gallium/drivers/nouveau/nv30/nv40_verttex.c

src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c

src/gallium/drivers/nouveau/nv30/nvfx_shader.h

src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c

src/gallium/drivers/nouveau/nv31_mpeg.xml.h

src/gallium/drivers/nouveau/nv50

src/gallium/drivers/nouveau/nv50/g80_defs.xml.h

src/gallium/drivers/nouveau/nv50/g80_texture.xml.h

src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h

src/gallium/drivers/nouveau/nv50/nv50_3d.xml.h

src/gallium/drivers/nouveau/nv50/nv50_3ddefs.xml.h

src/gallium/drivers/nouveau/nv50/nv50_blit.h

src/gallium/drivers/nouveau/nv50/nv50_compute.c

src/gallium/drivers/nouveau/nv50/nv50_compute.xml.h

src/gallium/drivers/nouveau/nv50/nv50_context.c

src/gallium/drivers/nouveau/nv50/nv50_context.h

src/gallium/drivers/nouveau/nv50/nv50_formats.c

src/gallium/drivers/nouveau/nv50/nv50_miptree.c

src/gallium/drivers/nouveau/nv50/nv50_program.c

src/gallium/drivers/nouveau/nv50/nv50_program.h

src/gallium/drivers/nouveau/nv50/nv50_push.c

src/gallium/drivers/nouveau/nv50/nv50_query.c

src/gallium/drivers/nouveau/nv50/nv50_query.h

src/gallium/drivers/nouveau/nv50/nv50_query_hw.c

src/gallium/drivers/nouveau/nv50/nv50_query_hw.h

src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.c

src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.h

src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c

src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.h

src/gallium/drivers/nouveau/nv50/nv50_resource.c

src/gallium/drivers/nouveau/nv50/nv50_resource.h

src/gallium/drivers/nouveau/nv50/nv50_screen.c

src/gallium/drivers/nouveau/nv50/nv50_screen.h

src/gallium/drivers/nouveau/nv50/nv50_shader_state.c

src/gallium/drivers/nouveau/nv50/nv50_state.c

src/gallium/drivers/nouveau/nv50/nv50_state_validate.c

src/gallium/drivers/nouveau/nv50/nv50_stateobj.h

src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h

src/gallium/drivers/nouveau/nv50/nv50_surface.c

src/gallium/drivers/nouveau/nv50/nv50_tex.c

src/gallium/drivers/nouveau/nv50/nv50_transfer.c

src/gallium/drivers/nouveau/nv50/nv50_transfer.h

src/gallium/drivers/nouveau/nv50/nv50_vbo.c

src/gallium/drivers/nouveau/nv50/nv50_winsys.h

src/gallium/drivers/nouveau/nv50/nv84_video.c

src/gallium/drivers/nouveau/nv50/nv84_video.h

src/gallium/drivers/nouveau/nv50/nv84_video_bsp.c

src/gallium/drivers/nouveau/nv50/nv84_video_vp.c

src/gallium/drivers/nouveau/nv50/nv98_video.c

src/gallium/drivers/nouveau/nv50/nv98_video.h

src/gallium/drivers/nouveau/nv50/nv98_video_bsp.c

src/gallium/drivers/nouveau/nv50/nv98_video_ppp.c

src/gallium/drivers/nouveau/nv50/nv98_video_vp.c

src/gallium/drivers/nouveau/nv_m2mf.xml.h

src/gallium/drivers/nouveau/nv_object.xml.h

src/gallium/drivers/nouveau/nvc0

src/gallium/drivers/nouveau/nvc0/cla0c0qmd.h

src/gallium/drivers/nouveau/nvc0/clc0c0qmd.h

src/gallium/drivers/nouveau/nvc0/clc3c0qmd.h

src/gallium/drivers/nouveau/nvc0/drf.h

src/gallium/drivers/nouveau/nvc0/gm107_texture.xml.h

src/gallium/drivers/nouveau/nvc0/mme

src/gallium/drivers/nouveau/nvc0/mme/Makefile

src/gallium/drivers/nouveau/nvc0/mme/com9097.mme

src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h

src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme

src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme.h

src/gallium/drivers/nouveau/nvc0/mme/comc597.mme.h

src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h

src/gallium/drivers/nouveau/nvc0/nvc0_compute.c

src/gallium/drivers/nouveau/nvc0/nvc0_compute.xml.h

src/gallium/drivers/nouveau/nvc0/nvc0_context.c

src/gallium/drivers/nouveau/nvc0/nvc0_context.h

src/gallium/drivers/nouveau/nvc0/nvc0_formats.c

src/gallium/drivers/nouveau/nvc0/nvc0_m2mf.xml.h

src/gallium/drivers/nouveau/nvc0/nvc0_macros.h

src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c

src/gallium/drivers/nouveau/nvc0/nvc0_program.c

src/gallium/drivers/nouveau/nvc0/nvc0_program.h

src/gallium/drivers/nouveau/nvc0/nvc0_query.c

src/gallium/drivers/nouveau/nvc0/nvc0_query.h

src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c

src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.h

src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c

src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.h

src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c

src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.h

src/gallium/drivers/nouveau/nvc0/nvc0_query_sw.c

src/gallium/drivers/nouveau/nvc0/nvc0_query_sw.h

src/gallium/drivers/nouveau/nvc0/nvc0_resource.c

src/gallium/drivers/nouveau/nvc0/nvc0_resource.h

src/gallium/drivers/nouveau/nvc0/nvc0_screen.c

src/gallium/drivers/nouveau/nvc0/nvc0_screen.h

src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c

src/gallium/drivers/nouveau/nvc0/nvc0_state.c

src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c

src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h

src/gallium/drivers/nouveau/nvc0/nvc0_surface.c

src/gallium/drivers/nouveau/nvc0/nvc0_tex.c

src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c

src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c

src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c

src/gallium/drivers/nouveau/nvc0/nvc0_video.c

src/gallium/drivers/nouveau/nvc0/nvc0_video.h

src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c

src/gallium/drivers/nouveau/nvc0/nvc0_video_ppp.c

src/gallium/drivers/nouveau/nvc0/nvc0_video_vp.c

src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h

src/gallium/drivers/nouveau/nvc0/nve4_compute.c

src/gallium/drivers/nouveau/nvc0/nve4_compute.h

src/gallium/drivers/nouveau/nvc0/nve4_compute.xml.h

src/gallium/drivers/nouveau/nvc0/nve4_copy.xml.h

src/gallium/drivers/nouveau/nvc0/nve4_p2mf.xml.h

src/gallium/drivers/nouveau/nvc0/qmd.h

src/gallium/drivers/nouveau/nvc0/qmda0c0.c

src/gallium/drivers/nouveau/nvc0/qmdc0c0.c

src/gallium/drivers/nouveau/nvc0/qmdc3c0.c

src/gallium/drivers/panfrost

src/gallium/drivers/panfrost/.editorconfig

src/gallium/drivers/panfrost/meson.build

src/gallium/drivers/panfrost/pan_assemble.c

src/gallium/drivers/panfrost/pan_blend_cso.h

src/gallium/drivers/panfrost/pan_blit.c

src/gallium/drivers/panfrost/pan_cmdstream.c

src/gallium/drivers/panfrost/pan_compute.c

src/gallium/drivers/panfrost/pan_context.c

src/gallium/drivers/panfrost/pan_context.h

src/gallium/drivers/panfrost/pan_helpers.c

src/gallium/drivers/panfrost/pan_job.c

src/gallium/drivers/panfrost/pan_job.h

src/gallium/drivers/panfrost/pan_mempool.c

src/gallium/drivers/panfrost/pan_mempool.h

src/gallium/drivers/panfrost/pan_public.h

src/gallium/drivers/panfrost/pan_resource.c

src/gallium/drivers/panfrost/pan_resource.h

src/gallium/drivers/panfrost/pan_screen.c

src/gallium/drivers/panfrost/pan_screen.h

src/gallium/drivers/r300

src/gallium/drivers/r300/.editorconfig

src/gallium/drivers/r300/ci

src/gallium/drivers/r300/ci/r300-rv515-fails.txt

src/gallium/drivers/r300/ci/r300-rv515-skips.txt

src/gallium/drivers/r300/compiler

src/gallium/drivers/r300/compiler/memory_pool.c

src/gallium/drivers/r300/compiler/memory_pool.h

src/gallium/drivers/r300/compiler/r300_fragprog.c

src/gallium/drivers/r300/compiler/r300_fragprog.h

src/gallium/drivers/r300/compiler/r300_fragprog_emit.c

src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c

src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.h

src/gallium/drivers/r300/compiler/r300_nir_algebraic.py

src/gallium/drivers/r300/compiler/r3xx_fragprog.c

src/gallium/drivers/r300/compiler/r3xx_vertprog.c

src/gallium/drivers/r300/compiler/r3xx_vertprog_dump.c

src/gallium/drivers/r300/compiler/r500_fragprog.c

src/gallium/drivers/r300/compiler/r500_fragprog.h

src/gallium/drivers/r300/compiler/r500_fragprog_emit.c

src/gallium/drivers/r300/compiler/radeon_code.c

src/gallium/drivers/r300/compiler/radeon_code.h

src/gallium/drivers/r300/compiler/radeon_compiler.c

src/gallium/drivers/r300/compiler/radeon_compiler.h

src/gallium/drivers/r300/compiler/radeon_compiler_util.c

src/gallium/drivers/r300/compiler/radeon_compiler_util.h

src/gallium/drivers/r300/compiler/radeon_dataflow.c

src/gallium/drivers/r300/compiler/radeon_dataflow.h

src/gallium/drivers/r300/compiler/radeon_dataflow_deadcode.c

src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c

src/gallium/drivers/r300/compiler/radeon_emulate_branches.c

src/gallium/drivers/r300/compiler/radeon_emulate_branches.h

src/gallium/drivers/r300/compiler/radeon_inline_literals.c

src/gallium/drivers/r300/compiler/radeon_list.c

src/gallium/drivers/r300/compiler/radeon_list.h

src/gallium/drivers/r300/compiler/radeon_opcodes.c

src/gallium/drivers/r300/compiler/radeon_opcodes.h

src/gallium/drivers/r300/compiler/radeon_optimize.c

src/gallium/drivers/r300/compiler/radeon_pair_dead_sources.c

src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c

src/gallium/drivers/r300/compiler/radeon_pair_schedule.c

src/gallium/drivers/r300/compiler/radeon_pair_translate.c

src/gallium/drivers/r300/compiler/radeon_program.c

src/gallium/drivers/r300/compiler/radeon_program.h

src/gallium/drivers/r300/compiler/radeon_program_alu.c

src/gallium/drivers/r300/compiler/radeon_program_alu.h

src/gallium/drivers/r300/compiler/radeon_program_constants.h

src/gallium/drivers/r300/compiler/radeon_program_pair.c

src/gallium/drivers/r300/compiler/radeon_program_pair.h

src/gallium/drivers/r300/compiler/radeon_program_print.c

src/gallium/drivers/r300/compiler/radeon_program_tex.c

src/gallium/drivers/r300/compiler/radeon_program_tex.h

src/gallium/drivers/r300/compiler/radeon_regalloc.h

src/gallium/drivers/r300/compiler/radeon_remove_constants.c

src/gallium/drivers/r300/compiler/radeon_remove_constants.h

src/gallium/drivers/r300/compiler/radeon_rename_regs.c

src/gallium/drivers/r300/compiler/radeon_rename_regs.h

src/gallium/drivers/r300/compiler/radeon_swizzle.h

src/gallium/drivers/r300/compiler/radeon_variable.c

src/gallium/drivers/r300/compiler/radeon_variable.h

src/gallium/drivers/r300/compiler/radeon_vert_fc.c

src/gallium/drivers/r300/compiler/tests

src/gallium/drivers/r300/compiler/tests/omod_two_writers.test

src/gallium/drivers/r300/compiler/tests/r300_compiler_tests.c

src/gallium/drivers/r300/compiler/tests/r300_compiler_tests.h

src/gallium/drivers/r300/compiler/tests/radeon_compiler_optimize_tests.c

src/gallium/drivers/r300/compiler/tests/radeon_compiler_regalloc_tests.c

src/gallium/drivers/r300/compiler/tests/radeon_compiler_util_tests.c

src/gallium/drivers/r300/compiler/tests/rc_test_helpers.c

src/gallium/drivers/r300/compiler/tests/rc_test_helpers.h

src/gallium/drivers/r300/compiler/tests/regalloc_tex_1d_swizzle.test

src/gallium/drivers/r300/compiler/tests/unit_test.c

src/gallium/drivers/r300/compiler/tests/unit_test.h

src/gallium/drivers/r300/meson.build

src/gallium/drivers/r300/r300_blit.c

src/gallium/drivers/r300/r300_cb.h

src/gallium/drivers/r300/r300_chipset.c

src/gallium/drivers/r300/r300_chipset.h

src/gallium/drivers/r300/r300_context.c

src/gallium/drivers/r300/r300_context.h

src/gallium/drivers/r300/r300_cs.h

src/gallium/drivers/r300/r300_debug.c

src/gallium/drivers/r300/r300_defines.h

src/gallium/drivers/r300/r300_emit.c

src/gallium/drivers/r300/r300_emit.h

src/gallium/drivers/r300/r300_flush.c

src/gallium/drivers/r300/r300_fs.c

src/gallium/drivers/r300/r300_fs.h

src/gallium/drivers/r300/r300_hyperz.c

src/gallium/drivers/r300/r300_public.h

src/gallium/drivers/r300/r300_query.c

src/gallium/drivers/r300/r300_reg.h

src/gallium/drivers/r300/r300_render.c

src/gallium/drivers/r300/r300_render_stencilref.c

src/gallium/drivers/r300/r300_render_translate.c

src/gallium/drivers/r300/r300_resource.c

src/gallium/drivers/r300/r300_screen.c

src/gallium/drivers/r300/r300_screen.h

src/gallium/drivers/r300/r300_screen_buffer.c

src/gallium/drivers/r300/r300_screen_buffer.h

src/gallium/drivers/r300/r300_shader_semantics.h

src/gallium/drivers/r300/r300_state.c

src/gallium/drivers/r300/r300_state_derived.c

src/gallium/drivers/r300/r300_state_inlines.h

src/gallium/drivers/r300/r300_texture.c

src/gallium/drivers/r300/r300_texture.h

src/gallium/drivers/r300/r300_texture_desc.c

src/gallium/drivers/r300/r300_texture_desc.h

src/gallium/drivers/r300/r300_tgsi_to_rc.c

src/gallium/drivers/r300/r300_tgsi_to_rc.h

src/gallium/drivers/r300/r300_transfer.c

src/gallium/drivers/r300/r300_transfer.h

src/gallium/drivers/r300/r300_vs.c

src/gallium/drivers/r300/r300_vs.h

src/gallium/drivers/r300/r300_vs_draw.c

src/gallium/drivers/r600

src/gallium/drivers/r600/.dir-locals.el

src/gallium/drivers/r600/.editorconfig

src/gallium/drivers/r600/cayman_msaa.c

src/gallium/drivers/r600/ci

src/gallium/drivers/r600/ci/r600-rv770-fails.txt

src/gallium/drivers/r600/ci/r600-rv770-flakes.txt

src/gallium/drivers/r600/ci/r600-rv770-skips.txt

src/gallium/drivers/r600/ci/r600-turks-fails.txt

src/gallium/drivers/r600/ci/r600-turks-flakes.txt

src/gallium/drivers/r600/ci/r600-turks-skips.txt

src/gallium/drivers/r600/compute_memory_pool.c

src/gallium/drivers/r600/compute_memory_pool.h

src/gallium/drivers/r600/eg_asm.c

src/gallium/drivers/r600/eg_debug.c

src/gallium/drivers/r600/eg_sq.h

src/gallium/drivers/r600/egd_tables.py

src/gallium/drivers/r600/evergreen_compute.c

src/gallium/drivers/r600/evergreen_compute.h

src/gallium/drivers/r600/evergreen_compute_internal.h

src/gallium/drivers/r600/evergreen_hw_context.c

src/gallium/drivers/r600/evergreen_state.c

src/gallium/drivers/r600/evergreend.h

src/gallium/drivers/r600/meson.build

src/gallium/drivers/r600/r600_asm.c

src/gallium/drivers/r600/r600_asm.h

src/gallium/drivers/r600/r600_blit.c

src/gallium/drivers/r600/r600_buffer_common.c

src/gallium/drivers/r600/r600_cs.h

src/gallium/drivers/r600/r600_dump.c

src/gallium/drivers/r600/r600_dump.h

src/gallium/drivers/r600/r600_formats.h

src/gallium/drivers/r600/r600_gpu_load.c

src/gallium/drivers/r600/r600_hw_context.c

src/gallium/drivers/r600/r600_isa.c

src/gallium/drivers/r600/r600_isa.h

src/gallium/drivers/r600/r600_opcodes.h

src/gallium/drivers/r600/r600_perfcounter.c

src/gallium/drivers/r600/r600_pipe.c

src/gallium/drivers/r600/r600_pipe.h

src/gallium/drivers/r600/r600_pipe_common.c

src/gallium/drivers/r600/r600_pipe_common.h

src/gallium/drivers/r600/r600_public.h

src/gallium/drivers/r600/r600_query.c

src/gallium/drivers/r600/r600_query.h

src/gallium/drivers/r600/r600_shader.c

src/gallium/drivers/r600/r600_shader.h

src/gallium/drivers/r600/r600_sq.h

src/gallium/drivers/r600/r600_state.c

src/gallium/drivers/r600/r600_state_common.c

src/gallium/drivers/r600/r600_streamout.c

src/gallium/drivers/r600/r600_test_dma.c

src/gallium/drivers/r600/r600_texture.c

src/gallium/drivers/r600/r600_uvd.c

src/gallium/drivers/r600/r600_viewport.c

src/gallium/drivers/r600/r600d.h

src/gallium/drivers/r600/r600d_common.h

src/gallium/drivers/r600/r700_asm.c

src/gallium/drivers/r600/r700_sq.h

src/gallium/drivers/r600/radeon_uvd.c

src/gallium/drivers/r600/radeon_uvd.h

src/gallium/drivers/r600/radeon_vce.c

src/gallium/drivers/r600/radeon_vce.h

src/gallium/drivers/r600/radeon_video.c

src/gallium/drivers/r600/radeon_video.h

src/gallium/drivers/r600/sb

src/gallium/drivers/r600/sb/notes.markdown

src/gallium/drivers/r600/sb/sb_bc.h

src/gallium/drivers/r600/sb/sb_bc_builder.cpp

src/gallium/drivers/r600/sb/sb_bc_decoder.cpp

src/gallium/drivers/r600/sb/sb_bc_dump.cpp

src/gallium/drivers/r600/sb/sb_bc_finalize.cpp

src/gallium/drivers/r600/sb/sb_bc_fmt_def.inc

src/gallium/drivers/r600/sb/sb_bc_parser.cpp

src/gallium/drivers/r600/sb/sb_context.cpp

src/gallium/drivers/r600/sb/sb_core.cpp

src/gallium/drivers/r600/sb/sb_dce_cleanup.cpp

src/gallium/drivers/r600/sb/sb_def_use.cpp

src/gallium/drivers/r600/sb/sb_dump.cpp

src/gallium/drivers/r600/sb/sb_expr.cpp

src/gallium/drivers/r600/sb/sb_expr.h

src/gallium/drivers/r600/sb/sb_gcm.cpp

src/gallium/drivers/r600/sb/sb_gvn.cpp

src/gallium/drivers/r600/sb/sb_if_conversion.cpp

src/gallium/drivers/r600/sb/sb_ir.cpp

src/gallium/drivers/r600/sb/sb_ir.h

src/gallium/drivers/r600/sb/sb_liveness.cpp

src/gallium/drivers/r600/sb/sb_pass.cpp

src/gallium/drivers/r600/sb/sb_pass.h

src/gallium/drivers/r600/sb/sb_peephole.cpp

src/gallium/drivers/r600/sb/sb_psi_ops.cpp

src/gallium/drivers/r600/sb/sb_public.h

src/gallium/drivers/r600/sb/sb_ra_checker.cpp

src/gallium/drivers/r600/sb/sb_ra_coalesce.cpp

src/gallium/drivers/r600/sb/sb_ra_init.cpp

src/gallium/drivers/r600/sb/sb_sched.cpp

src/gallium/drivers/r600/sb/sb_sched.h

src/gallium/drivers/r600/sb/sb_shader.cpp

src/gallium/drivers/r600/sb/sb_shader.h

src/gallium/drivers/r600/sb/sb_ssa_builder.cpp

src/gallium/drivers/r600/sb/sb_valtable.cpp

src/gallium/drivers/r600/sfn

src/gallium/drivers/r600/sfn/.editorconfig

src/gallium/drivers/r600/sfn/sfn_alu_defines.cpp

src/gallium/drivers/r600/sfn/sfn_alu_defines.h

src/gallium/drivers/r600/sfn/sfn_callstack.cpp

src/gallium/drivers/r600/sfn/sfn_callstack.h

src/gallium/drivers/r600/sfn/sfn_conditionaljumptracker.cpp

src/gallium/drivers/r600/sfn/sfn_conditionaljumptracker.h

src/gallium/drivers/r600/sfn/sfn_debug.cpp

src/gallium/drivers/r600/sfn/sfn_debug.h

src/gallium/drivers/r600/sfn/sfn_defines.h

src/gallium/drivers/r600/sfn/sfn_docu.txt

src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.cpp

src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.h

src/gallium/drivers/r600/sfn/sfn_emitinstruction.cpp

src/gallium/drivers/r600/sfn/sfn_emitinstruction.h

src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.cpp

src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.h

src/gallium/drivers/r600/sfn/sfn_emittexinstruction.cpp

src/gallium/drivers/r600/sfn/sfn_emittexinstruction.h

src/gallium/drivers/r600/sfn/sfn_instruction_alu.cpp

src/gallium/drivers/r600/sfn/sfn_instruction_alu.h

src/gallium/drivers/r600/sfn/sfn_instruction_base.cpp

src/gallium/drivers/r600/sfn/sfn_instruction_base.h

src/gallium/drivers/r600/sfn/sfn_instruction_block.cpp

src/gallium/drivers/r600/sfn/sfn_instruction_block.h

src/gallium/drivers/r600/sfn/sfn_instruction_cf.cpp

src/gallium/drivers/r600/sfn/sfn_instruction_cf.h

src/gallium/drivers/r600/sfn/sfn_instruction_export.cpp

src/gallium/drivers/r600/sfn/sfn_instruction_export.h

src/gallium/drivers/r600/sfn/sfn_instruction_fetch.cpp

src/gallium/drivers/r600/sfn/sfn_instruction_fetch.h

src/gallium/drivers/r600/sfn/sfn_instruction_gds.cpp

src/gallium/drivers/r600/sfn/sfn_instruction_gds.h

src/gallium/drivers/r600/sfn/sfn_instruction_lds.cpp

src/gallium/drivers/r600/sfn/sfn_instruction_lds.h

src/gallium/drivers/r600/sfn/sfn_instruction_misc.cpp

src/gallium/drivers/r600/sfn/sfn_instruction_misc.h

src/gallium/drivers/r600/sfn/sfn_instruction_tex.cpp

src/gallium/drivers/r600/sfn/sfn_instruction_tex.h

src/gallium/drivers/r600/sfn/sfn_instructionvisitor.cpp

src/gallium/drivers/r600/sfn/sfn_instructionvisitor.h

src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.cpp

src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.h

src/gallium/drivers/r600/sfn/sfn_liverange.cpp

src/gallium/drivers/r600/sfn/sfn_liverange.h

src/gallium/drivers/r600/sfn/sfn_nir.cpp

src/gallium/drivers/r600/sfn/sfn_nir.h

src/gallium/drivers/r600/sfn/sfn_nir_legalize_image_load_store.cpp

src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp

src/gallium/drivers/r600/sfn/sfn_nir_lower_alu.cpp

src/gallium/drivers/r600/sfn/sfn_nir_lower_alu.h

src/gallium/drivers/r600/sfn/sfn_nir_lower_fs_out_to_vector.cpp

src/gallium/drivers/r600/sfn/sfn_nir_lower_fs_out_to_vector.h

src/gallium/drivers/r600/sfn/sfn_nir_lower_tess_io.cpp

src/gallium/drivers/r600/sfn/sfn_nir_vectorize_vs_inputs.c

src/gallium/drivers/r600/sfn/sfn_optimizers.cpp

src/gallium/drivers/r600/sfn/sfn_optimizers.h

src/gallium/drivers/r600/sfn/sfn_shader_base.cpp

src/gallium/drivers/r600/sfn/sfn_shader_base.h

src/gallium/drivers/r600/sfn/sfn_shader_compute.cpp

src/gallium/drivers/r600/sfn/sfn_shader_compute.h

src/gallium/drivers/r600/sfn/sfn_shader_fragment.cpp

src/gallium/drivers/r600/sfn/sfn_shader_fragment.h

src/gallium/drivers/r600/sfn/sfn_shader_geometry.cpp

src/gallium/drivers/r600/sfn/sfn_shader_geometry.h

src/gallium/drivers/r600/sfn/sfn_shader_tcs.cpp

src/gallium/drivers/r600/sfn/sfn_shader_tcs.h

src/gallium/drivers/r600/sfn/sfn_shader_tess_eval.cpp

src/gallium/drivers/r600/sfn/sfn_shader_tess_eval.h

src/gallium/drivers/r600/sfn/sfn_shader_vertex.cpp

src/gallium/drivers/r600/sfn/sfn_shader_vertex.h

src/gallium/drivers/r600/sfn/sfn_shaderio.cpp

src/gallium/drivers/r600/sfn/sfn_shaderio.h

src/gallium/drivers/r600/sfn/sfn_value.cpp

src/gallium/drivers/r600/sfn/sfn_value.h

src/gallium/drivers/r600/sfn/sfn_value_gpr.cpp

src/gallium/drivers/r600/sfn/sfn_value_gpr.h

src/gallium/drivers/r600/sfn/sfn_valuepool.cpp

src/gallium/drivers/r600/sfn/sfn_valuepool.h

src/gallium/drivers/r600/sfn/sfn_vertexstageexport.cpp

src/gallium/drivers/r600/sfn/sfn_vertexstageexport.h

src/gallium/drivers/radeon

src/gallium/drivers/radeon/.dir-locals.el

src/gallium/drivers/radeon/pspdecryptionparam.h

src/gallium/drivers/radeon/radeon_efc.h

src/gallium/drivers/radeon/radeon_temporal.h

src/gallium/drivers/radeon/radeon_uvd.c

src/gallium/drivers/radeon/radeon_uvd.h

src/gallium/drivers/radeon/radeon_uvd_enc.c

src/gallium/drivers/radeon/radeon_uvd_enc.h

src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c

src/gallium/drivers/radeon/radeon_vce.c

src/gallium/drivers/radeon/radeon_vce.h

src/gallium/drivers/radeon/radeon_vce_40_2_2.c

src/gallium/drivers/radeon/radeon_vce_50.c

src/gallium/drivers/radeon/radeon_vce_52.c

src/gallium/drivers/radeon/radeon_vcn_av1_default.h

src/gallium/drivers/radeon/radeon_vcn_dec.c

src/gallium/drivers/radeon/radeon_vcn_dec.h

src/gallium/drivers/radeon/radeon_vcn_dec_jpeg.c

src/gallium/drivers/radeon/radeon_vcn_enc.c

src/gallium/drivers/radeon/radeon_vcn_enc.h

src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c

src/gallium/drivers/radeon/radeon_vcn_enc_2_0.c

src/gallium/drivers/radeon/radeon_vcn_enc_3_0.c

src/gallium/drivers/radeon/radeon_video.c

src/gallium/drivers/radeon/radeon_video.h

src/gallium/drivers/radeon/radeon_winsys.h

src/gallium/drivers/radeonsi

src/gallium/drivers/radeonsi/ci

src/gallium/drivers/radeonsi/ci/deqp-radeonsi-stoney.toml

src/gallium/drivers/radeonsi/ci/gitlab-ci.yml

src/gallium/drivers/radeonsi/ci/navi10-deqp-fail.csv

src/gallium/drivers/radeonsi/ci/navi10-glcts-fail.csv

src/gallium/drivers/radeonsi/ci/navi10-piglit-quick-fail.csv

src/gallium/drivers/radeonsi/ci/radeonsi-run-tests.py

src/gallium/drivers/radeonsi/ci/radeonsi-stoney-fails.txt

src/gallium/drivers/radeonsi/ci/radeonsi-stoney-skips.txt

src/gallium/drivers/radeonsi/ci/raven-deqp-fail.csv

src/gallium/drivers/radeonsi/ci/raven-glcts-fail.csv

src/gallium/drivers/radeonsi/ci/raven-piglit-quick-fail.csv

src/gallium/drivers/radeonsi/ci/sienna_cichlid-deqp-fail.csv

src/gallium/drivers/radeonsi/ci/sienna_cichlid-glcts-fail.csv

src/gallium/drivers/radeonsi/ci/sienna_cichlid-piglit-quick-fail.csv

src/gallium/drivers/radeonsi/ci/skips.csv

src/gallium/drivers/radeonsi/ci/traces-radeonsi.yml

src/gallium/drivers/radeonsi/ci/vega20-deqp-fail.csv

src/gallium/drivers/radeonsi/ci/vega20-glcts-fail.csv

src/gallium/drivers/radeonsi/ci/vega20-piglit-quick-fail.csv

src/gallium/drivers/radeonsi/ci/vega20-piglit-quick-flakes.csv

src/gallium/drivers/radeonsi/driinfo_radeonsi.h

src/gallium/drivers/radeonsi/gfx10_query.c

src/gallium/drivers/radeonsi/gfx10_shader_ngg.c

src/gallium/drivers/radeonsi/glsl_tests

src/gallium/drivers/radeonsi/glsl_tests/HOW_TO_RUN

src/gallium/drivers/radeonsi/glsl_tests/Makefile

src/gallium/drivers/radeonsi/glsl_tests/amdgcn_glslc.c

src/gallium/drivers/radeonsi/glsl_tests/bitcount.glsl

src/gallium/drivers/radeonsi/glsl_tests/bitfield_extract.glsl

src/gallium/drivers/radeonsi/glsl_tests/bitfield_insert.glsl

src/gallium/drivers/radeonsi/glsl_tests/div.glsl

src/gallium/drivers/radeonsi/glsl_tests/exp2.glsl

src/gallium/drivers/radeonsi/glsl_tests/fma.glsl

src/gallium/drivers/radeonsi/glsl_tests/fract.glsl

src/gallium/drivers/radeonsi/glsl_tests/frexp.glsl

src/gallium/drivers/radeonsi/glsl_tests/ldexp.glsl

src/gallium/drivers/radeonsi/glsl_tests/lit.cfg

src/gallium/drivers/radeonsi/glsl_tests/log2.glsl

src/gallium/drivers/radeonsi/glsl_tests/minmax.f32.glsl

src/gallium/drivers/radeonsi/glsl_tests/minmax.f64.glsl

src/gallium/drivers/radeonsi/glsl_tests/minmax.i32.glsl

src/gallium/drivers/radeonsi/glsl_tests/minmax.u32.glsl

src/gallium/drivers/radeonsi/glsl_tests/pack_unpack_half.glsl

src/gallium/drivers/radeonsi/glsl_tests/pow.glsl

src/gallium/drivers/radeonsi/glsl_tests/sqrt.glsl

src/gallium/drivers/radeonsi/meson.build

src/gallium/drivers/radeonsi/si_blit.c

src/gallium/drivers/radeonsi/si_buffer.c

src/gallium/drivers/radeonsi/si_build_pm4.h

src/gallium/drivers/radeonsi/si_clear.c

src/gallium/drivers/radeonsi/si_compute.c

src/gallium/drivers/radeonsi/si_compute.h

src/gallium/drivers/radeonsi/si_compute_blit.c

src/gallium/drivers/radeonsi/si_cp_dma.c

src/gallium/drivers/radeonsi/si_cp_reg_shadowing.c

src/gallium/drivers/radeonsi/si_debug.c

src/gallium/drivers/radeonsi/si_debug_options.h

src/gallium/drivers/radeonsi/si_descriptors.c

src/gallium/drivers/radeonsi/si_fence.c

src/gallium/drivers/radeonsi/si_get.c

src/gallium/drivers/radeonsi/si_gfx_cs.c

src/gallium/drivers/radeonsi/si_gpu_load.c

src/gallium/drivers/radeonsi/si_nir_optim.c

src/gallium/drivers/radeonsi/si_perfcounter.c

src/gallium/drivers/radeonsi/si_pipe.c

src/gallium/drivers/radeonsi/si_pipe.h

src/gallium/drivers/radeonsi/si_pm4.c

src/gallium/drivers/radeonsi/si_pm4.h

src/gallium/drivers/radeonsi/si_public.h

src/gallium/drivers/radeonsi/si_query.c

src/gallium/drivers/radeonsi/si_query.h

src/gallium/drivers/radeonsi/si_sdma_copy_image.c

src/gallium/drivers/radeonsi/si_shader.c

src/gallium/drivers/radeonsi/si_shader.h

src/gallium/drivers/radeonsi/si_shader_info.c

src/gallium/drivers/radeonsi/si_shader_internal.h

src/gallium/drivers/radeonsi/si_shader_llvm.c

src/gallium/drivers/radeonsi/si_shader_llvm_gs.c

src/gallium/drivers/radeonsi/si_shader_llvm_ps.c

src/gallium/drivers/radeonsi/si_shader_llvm_resources.c

src/gallium/drivers/radeonsi/si_shader_llvm_tess.c

src/gallium/drivers/radeonsi/si_shader_llvm_vs.c

src/gallium/drivers/radeonsi/si_shader_nir.c

src/gallium/drivers/radeonsi/si_shaderlib_nir.c

src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c

src/gallium/drivers/radeonsi/si_sqtt.c

src/gallium/drivers/radeonsi/si_state.c

src/gallium/drivers/radeonsi/si_state.h

src/gallium/drivers/radeonsi/si_state_binning.c

src/gallium/drivers/radeonsi/si_state_draw.cpp

src/gallium/drivers/radeonsi/si_state_msaa.c

src/gallium/drivers/radeonsi/si_state_shaders.cpp

src/gallium/drivers/radeonsi/si_state_streamout.c

src/gallium/drivers/radeonsi/si_state_viewport.c

src/gallium/drivers/radeonsi/si_test_blit.c

src/gallium/drivers/radeonsi/si_test_dma_perf.c

src/gallium/drivers/radeonsi/si_texture.c

src/gallium/drivers/radeonsi/si_uvd.c

src/gallium/drivers/softpipe

src/gallium/drivers/softpipe/ci

src/gallium/drivers/softpipe/ci/deqp-softpipe.toml

src/gallium/drivers/softpipe/ci/gitlab-ci.yml

src/gallium/drivers/softpipe/ci/softpipe-asan-fails.txt

src/gallium/drivers/softpipe/ci/softpipe-fails.txt

src/gallium/drivers/softpipe/ci/softpipe-flakes.txt

src/gallium/drivers/softpipe/ci/softpipe-skips.txt

src/gallium/drivers/softpipe/meson.build

src/gallium/drivers/softpipe/sp_buffer.c

src/gallium/drivers/softpipe/sp_buffer.h

src/gallium/drivers/softpipe/sp_clear.c

src/gallium/drivers/softpipe/sp_clear.h

src/gallium/drivers/softpipe/sp_compute.c

src/gallium/drivers/softpipe/sp_context.c

src/gallium/drivers/softpipe/sp_context.h

src/gallium/drivers/softpipe/sp_draw_arrays.c

src/gallium/drivers/softpipe/sp_fence.c

src/gallium/drivers/softpipe/sp_fence.h

src/gallium/drivers/softpipe/sp_flush.c

src/gallium/drivers/softpipe/sp_flush.h

src/gallium/drivers/softpipe/sp_fs.h

src/gallium/drivers/softpipe/sp_fs_exec.c

src/gallium/drivers/softpipe/sp_image.c

src/gallium/drivers/softpipe/sp_image.h

src/gallium/drivers/softpipe/sp_limits.h

src/gallium/drivers/softpipe/sp_prim_vbuf.c

src/gallium/drivers/softpipe/sp_prim_vbuf.h

src/gallium/drivers/softpipe/sp_public.h

src/gallium/drivers/softpipe/sp_quad.h

src/gallium/drivers/softpipe/sp_quad_blend.c

src/gallium/drivers/softpipe/sp_quad_depth_test.c

src/gallium/drivers/softpipe/sp_quad_depth_test_tmp.h

src/gallium/drivers/softpipe/sp_quad_fs.c

src/gallium/drivers/softpipe/sp_quad_pipe.c

src/gallium/drivers/softpipe/sp_quad_pipe.h

src/gallium/drivers/softpipe/sp_query.c

src/gallium/drivers/softpipe/sp_query.h

src/gallium/drivers/softpipe/sp_screen.c

src/gallium/drivers/softpipe/sp_screen.h

src/gallium/drivers/softpipe/sp_setup.c

src/gallium/drivers/softpipe/sp_setup.h

src/gallium/drivers/softpipe/sp_state.h

src/gallium/drivers/softpipe/sp_state_blend.c

src/gallium/drivers/softpipe/sp_state_clip.c

src/gallium/drivers/softpipe/sp_state_derived.c

src/gallium/drivers/softpipe/sp_state_image.c

src/gallium/drivers/softpipe/sp_state_rasterizer.c

src/gallium/drivers/softpipe/sp_state_sampler.c

src/gallium/drivers/softpipe/sp_state_shader.c

src/gallium/drivers/softpipe/sp_state_so.c

src/gallium/drivers/softpipe/sp_state_surface.c

src/gallium/drivers/softpipe/sp_state_vertex.c

src/gallium/drivers/softpipe/sp_surface.c

src/gallium/drivers/softpipe/sp_surface.h

src/gallium/drivers/softpipe/sp_tex_sample.c

src/gallium/drivers/softpipe/sp_tex_sample.h

src/gallium/drivers/softpipe/sp_tex_tile_cache.c

src/gallium/drivers/softpipe/sp_tex_tile_cache.h

src/gallium/drivers/softpipe/sp_texture.c

src/gallium/drivers/softpipe/sp_texture.h

src/gallium/drivers/softpipe/sp_tile_cache.c

src/gallium/drivers/softpipe/sp_tile_cache.h

src/gallium/drivers/svga

src/gallium/drivers/svga/include

src/gallium/drivers/svga/include/README

src/gallium/drivers/svga/include/VGPU10ShaderTokens.h

src/gallium/drivers/svga/include/includeCheck.h

src/gallium/drivers/svga/include/svga3d_caps.h

src/gallium/drivers/svga/include/svga3d_cmd.h

src/gallium/drivers/svga/include/svga3d_devcaps.h

src/gallium/drivers/svga/include/svga3d_dx.h

src/gallium/drivers/svga/include/svga3d_limits.h

src/gallium/drivers/svga/include/svga3d_reg.h

src/gallium/drivers/svga/include/svga3d_shaderdefs.h

src/gallium/drivers/svga/include/svga3d_surfacedefs.h

src/gallium/drivers/svga/include/svga3d_types.h

src/gallium/drivers/svga/include/svga_escape.h

src/gallium/drivers/svga/include/svga_overlay.h

src/gallium/drivers/svga/include/svga_reg.h

src/gallium/drivers/svga/include/svga_types.h

src/gallium/drivers/svga/include/vmware_pack_begin.h

src/gallium/drivers/svga/include/vmware_pack_end.h

src/gallium/drivers/svga/meson.build

src/gallium/drivers/svga/svga_cmd.c

src/gallium/drivers/svga/svga_cmd.h

src/gallium/drivers/svga/svga_cmd_vgpu10.c

src/gallium/drivers/svga/svga_context.c

src/gallium/drivers/svga/svga_context.h

src/gallium/drivers/svga/svga_debug.h

src/gallium/drivers/svga/svga_draw.c

src/gallium/drivers/svga/svga_draw.h

src/gallium/drivers/svga/svga_draw_arrays.c

src/gallium/drivers/svga/svga_draw_elements.c

src/gallium/drivers/svga/svga_draw_private.h

src/gallium/drivers/svga/svga_format.c

src/gallium/drivers/svga/svga_format.h

src/gallium/drivers/svga/svga_hw_reg.h

src/gallium/drivers/svga/svga_image_view.c

src/gallium/drivers/svga/svga_image_view.h

src/gallium/drivers/svga/svga_link.c

src/gallium/drivers/svga/svga_link.h

src/gallium/drivers/svga/svga_mksstats.h

src/gallium/drivers/svga/svga_pipe_blend.c

src/gallium/drivers/svga/svga_pipe_blit.c

src/gallium/drivers/svga/svga_pipe_clear.c

src/gallium/drivers/svga/svga_pipe_constants.c

src/gallium/drivers/svga/svga_pipe_cs.c

src/gallium/drivers/svga/svga_pipe_depthstencil.c

src/gallium/drivers/svga/svga_pipe_draw.c

src/gallium/drivers/svga/svga_pipe_flush.c

src/gallium/drivers/svga/svga_pipe_fs.c

src/gallium/drivers/svga/svga_pipe_gs.c

src/gallium/drivers/svga/svga_pipe_misc.c

src/gallium/drivers/svga/svga_pipe_query.c

src/gallium/drivers/svga/svga_pipe_rasterizer.c

src/gallium/drivers/svga/svga_pipe_sampler.c

src/gallium/drivers/svga/svga_pipe_streamout.c

src/gallium/drivers/svga/svga_pipe_ts.c

src/gallium/drivers/svga/svga_pipe_vertex.c

src/gallium/drivers/svga/svga_pipe_vs.c

src/gallium/drivers/svga/svga_public.h

src/gallium/drivers/svga/svga_resource.c

src/gallium/drivers/svga/svga_resource.h

src/gallium/drivers/svga/svga_resource_buffer.c

src/gallium/drivers/svga/svga_resource_buffer.h

src/gallium/drivers/svga/svga_resource_buffer_upload.c

src/gallium/drivers/svga/svga_resource_buffer_upload.h

src/gallium/drivers/svga/svga_resource_texture.c

src/gallium/drivers/svga/svga_resource_texture.h

src/gallium/drivers/svga/svga_sampler_view.c

src/gallium/drivers/svga/svga_sampler_view.h

src/gallium/drivers/svga/svga_screen.c

src/gallium/drivers/svga/svga_screen.h

src/gallium/drivers/svga/svga_screen_cache.c

src/gallium/drivers/svga/svga_screen_cache.h

src/gallium/drivers/svga/svga_shader.c

src/gallium/drivers/svga/svga_shader.h

src/gallium/drivers/svga/svga_shader_buffer.c

src/gallium/drivers/svga/svga_shader_buffer.h

src/gallium/drivers/svga/svga_state.c

src/gallium/drivers/svga/svga_state.h

src/gallium/drivers/svga/svga_state_constants.c

src/gallium/drivers/svga/svga_state_cs.c

src/gallium/drivers/svga/svga_state_framebuffer.c

src/gallium/drivers/svga/svga_state_fs.c

src/gallium/drivers/svga/svga_state_gs.c

src/gallium/drivers/svga/svga_state_need_swtnl.c

src/gallium/drivers/svga/svga_state_rss.c

src/gallium/drivers/svga/svga_state_sampler.c

src/gallium/drivers/svga/svga_state_tgsi_transform.c

src/gallium/drivers/svga/svga_state_ts.c

src/gallium/drivers/svga/svga_state_tss.c

src/gallium/drivers/svga/svga_state_uav.c

src/gallium/drivers/svga/svga_state_vdecl.c

src/gallium/drivers/svga/svga_state_vs.c

src/gallium/drivers/svga/svga_streamout.h

src/gallium/drivers/svga/svga_surface.c

src/gallium/drivers/svga/svga_surface.h

src/gallium/drivers/svga/svga_swtnl.h

src/gallium/drivers/svga/svga_swtnl_backend.c

src/gallium/drivers/svga/svga_swtnl_draw.c

src/gallium/drivers/svga/svga_swtnl_private.h

src/gallium/drivers/svga/svga_swtnl_state.c

src/gallium/drivers/svga/svga_tgsi.c

src/gallium/drivers/svga/svga_tgsi.h

src/gallium/drivers/svga/svga_tgsi_decl_sm30.c

src/gallium/drivers/svga/svga_tgsi_emit.h

src/gallium/drivers/svga/svga_tgsi_insn.c

src/gallium/drivers/svga/svga_tgsi_vgpu10.c

src/gallium/drivers/svga/svga_winsys.h

src/gallium/drivers/svga/svgadump

src/gallium/drivers/svga/svgadump/svga_dump.c

src/gallium/drivers/svga/svgadump/svga_dump.h

src/gallium/drivers/svga/svgadump/svga_dump.py

src/gallium/drivers/svga/svgadump/svga_shader.h

src/gallium/drivers/svga/svgadump/svga_shader_dump.c

src/gallium/drivers/svga/svgadump/svga_shader_dump.h

src/gallium/drivers/svga/svgadump/svga_shader_op.c

src/gallium/drivers/svga/svgadump/svga_shader_op.h

src/gallium/drivers/tegra

src/gallium/drivers/tegra/meson.build

src/gallium/drivers/tegra/tegra_context.c

src/gallium/drivers/tegra/tegra_context.h

src/gallium/drivers/tegra/tegra_resource.h

src/gallium/drivers/tegra/tegra_screen.c

src/gallium/drivers/tegra/tegra_screen.h

src/gallium/drivers/v3d

src/gallium/drivers/v3d/.editorconfig

src/gallium/drivers/v3d/driinfo_v3d.h

src/gallium/drivers/v3d/meson.build

src/gallium/drivers/v3d/v3d_blit.c

src/gallium/drivers/v3d/v3d_bufmgr.c

src/gallium/drivers/v3d/v3d_bufmgr.h

src/gallium/drivers/v3d/v3d_cl.c

src/gallium/drivers/v3d/v3d_cl.h

src/gallium/drivers/v3d/v3d_context.c

src/gallium/drivers/v3d/v3d_context.h

src/gallium/drivers/v3d/v3d_disk_cache.c

src/gallium/drivers/v3d/v3d_fence.c

src/gallium/drivers/v3d/v3d_format_table.h

src/gallium/drivers/v3d/v3d_formats.c

src/gallium/drivers/v3d/v3d_job.c

src/gallium/drivers/v3d/v3d_program.c

src/gallium/drivers/v3d/v3d_query.c

src/gallium/drivers/v3d/v3d_query.h

src/gallium/drivers/v3d/v3d_query_perfcnt.c

src/gallium/drivers/v3d/v3d_query_pipe.c

src/gallium/drivers/v3d/v3d_resource.c

src/gallium/drivers/v3d/v3d_resource.h

src/gallium/drivers/v3d/v3d_screen.c

src/gallium/drivers/v3d/v3d_screen.h

src/gallium/drivers/v3d/v3d_uniforms.c

src/gallium/drivers/v3d/v3dx_context.h

src/gallium/drivers/v3d/v3dx_draw.c

src/gallium/drivers/v3d/v3dx_emit.c

src/gallium/drivers/v3d/v3dx_format_table.c

src/gallium/drivers/v3d/v3dx_job.c

src/gallium/drivers/v3d/v3dx_rcl.c

src/gallium/drivers/v3d/v3dx_state.c

src/gallium/drivers/vc4

src/gallium/drivers/vc4/.dir-locals.el

src/gallium/drivers/vc4/.editorconfig

src/gallium/drivers/vc4/kernel

src/gallium/drivers/vc4/kernel/.dir-locals.el

src/gallium/drivers/vc4/kernel/.editorconfig

src/gallium/drivers/vc4/kernel/README

src/gallium/drivers/vc4/kernel/vc4_drv.h

src/gallium/drivers/vc4/kernel/vc4_gem.c

src/gallium/drivers/vc4/kernel/vc4_packet.h

src/gallium/drivers/vc4/kernel/vc4_render_cl.c

src/gallium/drivers/vc4/kernel/vc4_validate.c

src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c

src/gallium/drivers/vc4/meson.build

src/gallium/drivers/vc4/vc4_blit.c

src/gallium/drivers/vc4/vc4_bufmgr.c

src/gallium/drivers/vc4/vc4_bufmgr.h

src/gallium/drivers/vc4/vc4_cl.c

src/gallium/drivers/vc4/vc4_cl.h

src/gallium/drivers/vc4/vc4_cl_dump.c

src/gallium/drivers/vc4/vc4_cl_dump.h

src/gallium/drivers/vc4/vc4_context.c

src/gallium/drivers/vc4/vc4_context.h

src/gallium/drivers/vc4/vc4_draw.c

src/gallium/drivers/vc4/vc4_emit.c

src/gallium/drivers/vc4/vc4_fence.c

src/gallium/drivers/vc4/vc4_formats.c

src/gallium/drivers/vc4/vc4_job.c

src/gallium/drivers/vc4/vc4_nir_lower_blend.c

src/gallium/drivers/vc4/vc4_nir_lower_io.c

src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c

src/gallium/drivers/vc4/vc4_opt_algebraic.c

src/gallium/drivers/vc4/vc4_opt_coalesce_ff_writes.c

src/gallium/drivers/vc4/vc4_opt_constant_folding.c

src/gallium/drivers/vc4/vc4_opt_copy_propagation.c

src/gallium/drivers/vc4/vc4_opt_dead_code.c

src/gallium/drivers/vc4/vc4_opt_peephole_sf.c

src/gallium/drivers/vc4/vc4_opt_small_immediates.c

src/gallium/drivers/vc4/vc4_opt_vpm.c

src/gallium/drivers/vc4/vc4_program.c

src/gallium/drivers/vc4/vc4_qir.c

src/gallium/drivers/vc4/vc4_qir.h

src/gallium/drivers/vc4/vc4_qir_emit_uniform_stream_resets.c

src/gallium/drivers/vc4/vc4_qir_live_variables.c

src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c

src/gallium/drivers/vc4/vc4_qir_schedule.c

src/gallium/drivers/vc4/vc4_qir_validate.c

src/gallium/drivers/vc4/vc4_qpu.c

src/gallium/drivers/vc4/vc4_qpu.h

src/gallium/drivers/vc4/vc4_qpu_defines.h

src/gallium/drivers/vc4/vc4_qpu_disasm.c

src/gallium/drivers/vc4/vc4_qpu_emit.c

src/gallium/drivers/vc4/vc4_qpu_schedule.c

src/gallium/drivers/vc4/vc4_qpu_validate.c

src/gallium/drivers/vc4/vc4_query.c

src/gallium/drivers/vc4/vc4_register_allocate.c

src/gallium/drivers/vc4/vc4_reorder_uniforms.c

src/gallium/drivers/vc4/vc4_resource.c

src/gallium/drivers/vc4/vc4_resource.h

src/gallium/drivers/vc4/vc4_screen.c

src/gallium/drivers/vc4/vc4_screen.h

src/gallium/drivers/vc4/vc4_simulator.c

src/gallium/drivers/vc4/vc4_simulator_validate.h

src/gallium/drivers/vc4/vc4_state.c

src/gallium/drivers/vc4/vc4_tiling.c

src/gallium/drivers/vc4/vc4_tiling.h

src/gallium/drivers/vc4/vc4_tiling_lt.c

src/gallium/drivers/vc4/vc4_tiling_lt_neon.c

src/gallium/drivers/vc4/vc4_uniforms.c

src/gallium/drivers/virgl

src/gallium/drivers/virgl/ci

src/gallium/drivers/virgl/ci/deqp-virgl-gl.toml

src/gallium/drivers/virgl/ci/deqp-virgl-gles.toml

src/gallium/drivers/virgl/ci/deqp-virpipe-gl.toml

src/gallium/drivers/virgl/ci/gitlab-ci.yml

src/gallium/drivers/virgl/ci/traces-virgl-lava.yml

src/gallium/drivers/virgl/ci/traces-virgl.yml

src/gallium/drivers/virgl/ci/virgl-gl-fails.txt

src/gallium/drivers/virgl/ci/virgl-gl-flakes.txt

src/gallium/drivers/virgl/ci/virgl-gl-skips.txt

src/gallium/drivers/virgl/ci/virgl-gles-fails.txt

src/gallium/drivers/virgl/ci/virgl-gles-flakes.txt

src/gallium/drivers/virgl/ci/virgl-gles-skips.txt

src/gallium/drivers/virgl/ci/virpipe-gl-fails.txt

src/gallium/drivers/virgl/ci/virpipe-gl-flakes.txt

src/gallium/drivers/virgl/ci/virpipe-gl-skips.txt

src/gallium/drivers/virgl/meson.build

src/gallium/drivers/virgl/tests

src/gallium/drivers/virgl/tests/meson.build

src/gallium/drivers/virgl/tests/virgl_staging_mgr_test.cpp

src/gallium/drivers/virgl/virgl_buffer.c

src/gallium/drivers/virgl/virgl_context.c

src/gallium/drivers/virgl/virgl_context.h

src/gallium/drivers/virgl/virgl_driinfo.h.in

src/gallium/drivers/virgl/virgl_encode.c

src/gallium/drivers/virgl/virgl_encode.h

src/gallium/drivers/virgl/virgl_public.h

src/gallium/drivers/virgl/virgl_query.c

src/gallium/drivers/virgl/virgl_resource.c

src/gallium/drivers/virgl/virgl_resource.h

src/gallium/drivers/virgl/virgl_screen.c

src/gallium/drivers/virgl/virgl_screen.h

src/gallium/drivers/virgl/virgl_staging_mgr.c

src/gallium/drivers/virgl/virgl_staging_mgr.h

src/gallium/drivers/virgl/virgl_streamout.c

src/gallium/drivers/virgl/virgl_texture.c

src/gallium/drivers/virgl/virgl_tgsi.c

src/gallium/drivers/virgl/virgl_transfer_queue.c

src/gallium/drivers/virgl/virgl_transfer_queue.h

src/gallium/drivers/virgl/virgl_winsys.h

src/gallium/drivers/zink

src/gallium/drivers/zink/ci

src/gallium/drivers/zink/ci/deqp-zink-lvp.toml

src/gallium/drivers/zink/ci/deqp-zink-radv.toml

src/gallium/drivers/zink/ci/gitlab-ci.yml

src/gallium/drivers/zink/ci/zink-anv-icl-fails.txt

src/gallium/drivers/zink/ci/zink-anv-icl-skips.txt

src/gallium/drivers/zink/ci/zink-lvp-fails.txt

src/gallium/drivers/zink/ci/zink-lvp-flakes.txt

src/gallium/drivers/zink/ci/zink-lvp-skips.txt

src/gallium/drivers/zink/ci/zink-nv-fails.txt

src/gallium/drivers/zink/ci/zink-nv-flakes.txt

src/gallium/drivers/zink/ci/zink-radv-fails.txt

src/gallium/drivers/zink/ci/zink-radv-flakes.txt

src/gallium/drivers/zink/ci/zink-radv-skips.txt

src/gallium/drivers/zink/driinfo_zink.h

src/gallium/drivers/zink/meson.build

src/gallium/drivers/zink/nir_lower_dynamic_bo_access.c

src/gallium/drivers/zink/nir_to_spirv

src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c

src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.h

src/gallium/drivers/zink/nir_to_spirv/spirv_builder.c

src/gallium/drivers/zink/nir_to_spirv/spirv_builder.h

src/gallium/drivers/zink/nir_to_spirv/zink_nir_algebraic.py

src/gallium/drivers/zink/zink_batch.c

src/gallium/drivers/zink/zink_batch.h

src/gallium/drivers/zink/zink_blit.c

src/gallium/drivers/zink/zink_bo.c

src/gallium/drivers/zink/zink_bo.h

src/gallium/drivers/zink/zink_clear.c

src/gallium/drivers/zink/zink_clear.h

src/gallium/drivers/zink/zink_compiler.c

src/gallium/drivers/zink/zink_compiler.h

src/gallium/drivers/zink/zink_context.c

src/gallium/drivers/zink/zink_context.h

src/gallium/drivers/zink/zink_descriptors.c

src/gallium/drivers/zink/zink_descriptors.h

src/gallium/drivers/zink/zink_descriptors_lazy.c

src/gallium/drivers/zink/zink_device_info.py

src/gallium/drivers/zink/zink_draw.cpp

src/gallium/drivers/zink/zink_extensions.py

src/gallium/drivers/zink/zink_fence.c

src/gallium/drivers/zink/zink_fence.h

src/gallium/drivers/zink/zink_format.c

src/gallium/drivers/zink/zink_format.h

src/gallium/drivers/zink/zink_format_test.c

src/gallium/drivers/zink/zink_framebuffer.c

src/gallium/drivers/zink/zink_framebuffer.h

src/gallium/drivers/zink/zink_helpers.h

src/gallium/drivers/zink/zink_inlines.h

src/gallium/drivers/zink/zink_instance.py

src/gallium/drivers/zink/zink_kopper.c

src/gallium/drivers/zink/zink_kopper.h

src/gallium/drivers/zink/zink_lower_cubemap_to_array.c

src/gallium/drivers/zink/zink_pipeline.c

src/gallium/drivers/zink/zink_pipeline.h

src/gallium/drivers/zink/zink_program.c

src/gallium/drivers/zink/zink_program.h

src/gallium/drivers/zink/zink_public.h

src/gallium/drivers/zink/zink_query.c

src/gallium/drivers/zink/zink_query.h

src/gallium/drivers/zink/zink_render_pass.c

src/gallium/drivers/zink/zink_render_pass.h

src/gallium/drivers/zink/zink_resource.c

src/gallium/drivers/zink/zink_resource.h

src/gallium/drivers/zink/zink_screen.c

src/gallium/drivers/zink/zink_screen.h

src/gallium/drivers/zink/zink_shader_keys.h

src/gallium/drivers/zink/zink_state.c

src/gallium/drivers/zink/zink_state.h

src/gallium/drivers/zink/zink_surface.c

src/gallium/drivers/zink/zink_surface.h

src/gallium/frontends

src/gallium/frontends/clover

src/gallium/frontends/clover/api

src/gallium/frontends/clover/api/context.cpp

src/gallium/frontends/clover/api/device.cpp

src/gallium/frontends/clover/api/dispatch.cpp

src/gallium/frontends/clover/api/dispatch.hpp

src/gallium/frontends/clover/api/event.cpp

src/gallium/frontends/clover/api/interop.cpp

src/gallium/frontends/clover/api/invalid.cpp

src/gallium/frontends/clover/api/kernel.cpp

src/gallium/frontends/clover/api/memory.cpp

src/gallium/frontends/clover/api/platform.cpp

src/gallium/frontends/clover/api/program.cpp

src/gallium/frontends/clover/api/queue.cpp

src/gallium/frontends/clover/api/sampler.cpp

src/gallium/frontends/clover/api/transfer.cpp

src/gallium/frontends/clover/api/util.hpp

src/gallium/frontends/clover/core

src/gallium/frontends/clover/core/binary.cpp

src/gallium/frontends/clover/core/binary.hpp

src/gallium/frontends/clover/core/compiler.hpp

src/gallium/frontends/clover/core/context.cpp

src/gallium/frontends/clover/core/context.hpp

src/gallium/frontends/clover/core/device.cpp

src/gallium/frontends/clover/core/device.hpp

src/gallium/frontends/clover/core/error.hpp

src/gallium/frontends/clover/core/event.cpp

src/gallium/frontends/clover/core/event.hpp

src/gallium/frontends/clover/core/format.cpp

src/gallium/frontends/clover/core/format.hpp

src/gallium/frontends/clover/core/kernel.cpp

src/gallium/frontends/clover/core/kernel.hpp

src/gallium/frontends/clover/core/memory.cpp

src/gallium/frontends/clover/core/memory.hpp

src/gallium/frontends/clover/core/object.hpp

src/gallium/frontends/clover/core/platform.cpp

src/gallium/frontends/clover/core/platform.hpp

src/gallium/frontends/clover/core/printf.cpp

src/gallium/frontends/clover/core/printf.hpp

src/gallium/frontends/clover/core/program.cpp

src/gallium/frontends/clover/core/program.hpp

src/gallium/frontends/clover/core/property.hpp

src/gallium/frontends/clover/core/queue.cpp

src/gallium/frontends/clover/core/queue.hpp

src/gallium/frontends/clover/core/resource.cpp

src/gallium/frontends/clover/core/resource.hpp

src/gallium/frontends/clover/core/sampler.cpp

src/gallium/frontends/clover/core/sampler.hpp

src/gallium/frontends/clover/core/timestamp.cpp

src/gallium/frontends/clover/core/timestamp.hpp

src/gallium/frontends/clover/llvm

src/gallium/frontends/clover/llvm/codegen

src/gallium/frontends/clover/llvm/codegen.hpp

src/gallium/frontends/clover/llvm/codegen/bitcode.cpp

src/gallium/frontends/clover/llvm/codegen/common.cpp

src/gallium/frontends/clover/llvm/codegen/native.cpp

src/gallium/frontends/clover/llvm/compat.hpp

src/gallium/frontends/clover/llvm/invocation.cpp

src/gallium/frontends/clover/llvm/invocation.hpp

src/gallium/frontends/clover/llvm/metadata.hpp

src/gallium/frontends/clover/llvm/util.hpp

src/gallium/frontends/clover/meson.build

src/gallium/frontends/clover/nir

src/gallium/frontends/clover/nir/invocation.cpp

src/gallium/frontends/clover/nir/invocation.hpp

src/gallium/frontends/clover/spirv

src/gallium/frontends/clover/spirv/invocation.cpp

src/gallium/frontends/clover/spirv/invocation.hpp

src/gallium/frontends/clover/util

src/gallium/frontends/clover/util/adaptor.hpp

src/gallium/frontends/clover/util/algebra.hpp

src/gallium/frontends/clover/util/algorithm.hpp

src/gallium/frontends/clover/util/compat.hpp

src/gallium/frontends/clover/util/factor.hpp

src/gallium/frontends/clover/util/functional.hpp

src/gallium/frontends/clover/util/lazy.hpp

src/gallium/frontends/clover/util/pointer.hpp

src/gallium/frontends/clover/util/range.hpp

src/gallium/frontends/clover/util/tuple.hpp

src/gallium/frontends/d3d10umd

src/gallium/frontends/d3d10umd/Adapter.cpp

src/gallium/frontends/d3d10umd/D3DKMT.cpp

src/gallium/frontends/d3d10umd/Debug.cpp

src/gallium/frontends/d3d10umd/Debug.h

src/gallium/frontends/d3d10umd/Device.cpp

src/gallium/frontends/d3d10umd/Device.h

src/gallium/frontends/d3d10umd/Draw.cpp

src/gallium/frontends/d3d10umd/Draw.h

src/gallium/frontends/d3d10umd/DriverIncludes.h

src/gallium/frontends/d3d10umd/DxgiFns.cpp

src/gallium/frontends/d3d10umd/DxgiFns.h

src/gallium/frontends/d3d10umd/Format.cpp

src/gallium/frontends/d3d10umd/Format.h

src/gallium/frontends/d3d10umd/InputAssembly.cpp

src/gallium/frontends/d3d10umd/InputAssembly.h

src/gallium/frontends/d3d10umd/OutputMerger.cpp

src/gallium/frontends/d3d10umd/OutputMerger.h

src/gallium/frontends/d3d10umd/Query.cpp

src/gallium/frontends/d3d10umd/Query.h

src/gallium/frontends/d3d10umd/README.md

src/gallium/frontends/d3d10umd/Rasterizer.cpp

src/gallium/frontends/d3d10umd/Rasterizer.h

src/gallium/frontends/d3d10umd/Resource.cpp

src/gallium/frontends/d3d10umd/Resource.h

src/gallium/frontends/d3d10umd/Shader.cpp

src/gallium/frontends/d3d10umd/Shader.h

src/gallium/frontends/d3d10umd/ShaderDump.cpp

src/gallium/frontends/d3d10umd/ShaderDump.h

src/gallium/frontends/d3d10umd/ShaderParse.c

src/gallium/frontends/d3d10umd/ShaderParse.h

src/gallium/frontends/d3d10umd/ShaderTGSI.c

src/gallium/frontends/d3d10umd/State.h

src/gallium/frontends/d3d10umd/d3d10_sw.def

src/gallium/frontends/d3d10umd/meson.build

src/gallium/frontends/dri

src/gallium/frontends/dri/dri2.c

src/gallium/frontends/dri/dri_context.c

src/gallium/frontends/dri/dri_context.h

src/gallium/frontends/dri/dri_drawable.c

src/gallium/frontends/dri/dri_drawable.h

src/gallium/frontends/dri/dri_helpers.c

src/gallium/frontends/dri/dri_helpers.h

src/gallium/frontends/dri/dri_query_renderer.c

src/gallium/frontends/dri/dri_query_renderer.h

src/gallium/frontends/dri/dri_screen.c

src/gallium/frontends/dri/dri_screen.h

src/gallium/frontends/dri/dri_util.c

src/gallium/frontends/dri/dri_util.h

src/gallium/frontends/dri/drisw.c

src/gallium/frontends/dri/kopper.c

src/gallium/frontends/dri/meson.build

src/gallium/frontends/dri/utils.c

src/gallium/frontends/dri/utils.h

src/gallium/frontends/glx

src/gallium/frontends/glx/xlib

src/gallium/frontends/glx/xlib/glx_api.c

src/gallium/frontends/glx/xlib/glx_getproc.c

src/gallium/frontends/glx/xlib/glx_usefont.c

src/gallium/frontends/glx/xlib/meson.build

src/gallium/frontends/glx/xlib/xm_api.c

src/gallium/frontends/glx/xlib/xm_api.h

src/gallium/frontends/glx/xlib/xm_public.h

src/gallium/frontends/glx/xlib/xm_st.c

src/gallium/frontends/glx/xlib/xm_st.h

src/gallium/frontends/hgl

src/gallium/frontends/hgl/.editorconfig

src/gallium/frontends/hgl/bitmap_wrapper.cpp

src/gallium/frontends/hgl/bitmap_wrapper.h

src/gallium/frontends/hgl/hgl.c

src/gallium/frontends/hgl/hgl_context.h

src/gallium/frontends/hgl/meson.build

src/gallium/frontends/lavapipe

src/gallium/frontends/lavapipe/ci

src/gallium/frontends/lavapipe/ci/deqp-lvp.toml

src/gallium/frontends/lavapipe/ci/gitlab-ci.yml

src/gallium/frontends/lavapipe/ci/lvp-asan-fails.txt

src/gallium/frontends/lavapipe/ci/lvp-asan-skips.txt

src/gallium/frontends/lavapipe/ci/lvp-fails.txt

src/gallium/frontends/lavapipe/ci/lvp-flakes.txt

src/gallium/frontends/lavapipe/ci/lvp-skips.txt

src/gallium/frontends/lavapipe/lvp_cmd_buffer.c

src/gallium/frontends/lavapipe/lvp_conv.h

src/gallium/frontends/lavapipe/lvp_descriptor_set.c

src/gallium/frontends/lavapipe/lvp_device.c

src/gallium/frontends/lavapipe/lvp_execute.c

src/gallium/frontends/lavapipe/lvp_formats.c

src/gallium/frontends/lavapipe/lvp_image.c

src/gallium/frontends/lavapipe/lvp_lower_input_attachments.c

src/gallium/frontends/lavapipe/lvp_lower_vulkan_resource.c

src/gallium/frontends/lavapipe/lvp_lower_vulkan_resource.h

src/gallium/frontends/lavapipe/lvp_pipe_sync.c

src/gallium/frontends/lavapipe/lvp_pipeline.c

src/gallium/frontends/lavapipe/lvp_pipeline_cache.c

src/gallium/frontends/lavapipe/lvp_private.h

src/gallium/frontends/lavapipe/lvp_query.c

src/gallium/frontends/lavapipe/lvp_util.c

src/gallium/frontends/lavapipe/lvp_wsi.c

src/gallium/frontends/lavapipe/lvp_wsi.h

src/gallium/frontends/lavapipe/meson.build

src/gallium/frontends/nine

src/gallium/frontends/nine/.editorconfig

src/gallium/frontends/nine/README

src/gallium/frontends/nine/adapter9.c

src/gallium/frontends/nine/adapter9.h

src/gallium/frontends/nine/authenticatedchannel9.c

src/gallium/frontends/nine/authenticatedchannel9.h

src/gallium/frontends/nine/basetexture9.c

src/gallium/frontends/nine/basetexture9.h

src/gallium/frontends/nine/buffer9.c

src/gallium/frontends/nine/buffer9.h

src/gallium/frontends/nine/cryptosession9.c

src/gallium/frontends/nine/cryptosession9.h

src/gallium/frontends/nine/cubetexture9.c

src/gallium/frontends/nine/cubetexture9.h

src/gallium/frontends/nine/device9.c

src/gallium/frontends/nine/device9.h

src/gallium/frontends/nine/device9ex.c

src/gallium/frontends/nine/device9ex.h

src/gallium/frontends/nine/device9video.c

src/gallium/frontends/nine/device9video.h

src/gallium/frontends/nine/guid.c

src/gallium/frontends/nine/guid.h

src/gallium/frontends/nine/indexbuffer9.c

src/gallium/frontends/nine/indexbuffer9.h

src/gallium/frontends/nine/iunknown.c

src/gallium/frontends/nine/iunknown.h

src/gallium/frontends/nine/meson.build

src/gallium/frontends/nine/nine_buffer_upload.c

src/gallium/frontends/nine/nine_buffer_upload.h

src/gallium/frontends/nine/nine_csmt_helper.h

src/gallium/frontends/nine/nine_debug.c

src/gallium/frontends/nine/nine_debug.h

src/gallium/frontends/nine/nine_defines.h

src/gallium/frontends/nine/nine_dump.c

src/gallium/frontends/nine/nine_dump.h

src/gallium/frontends/nine/nine_ff.c

src/gallium/frontends/nine/nine_ff.h

src/gallium/frontends/nine/nine_flags.h

src/gallium/frontends/nine/nine_helpers.c

src/gallium/frontends/nine/nine_helpers.h

src/gallium/frontends/nine/nine_limits.h

src/gallium/frontends/nine/nine_lock.c

src/gallium/frontends/nine/nine_lock.h

src/gallium/frontends/nine/nine_memory_helper.c

src/gallium/frontends/nine/nine_memory_helper.h

src/gallium/frontends/nine/nine_pdata.h

src/gallium/frontends/nine/nine_pipe.c

src/gallium/frontends/nine/nine_pipe.h

src/gallium/frontends/nine/nine_queue.c

src/gallium/frontends/nine/nine_queue.h

src/gallium/frontends/nine/nine_quirk.c

src/gallium/frontends/nine/nine_quirk.h

src/gallium/frontends/nine/nine_shader.c

src/gallium/frontends/nine/nine_shader.h

src/gallium/frontends/nine/nine_state.c

src/gallium/frontends/nine/nine_state.h

src/gallium/frontends/nine/nineexoverlayextension.c

src/gallium/frontends/nine/nineexoverlayextension.h

src/gallium/frontends/nine/pixelshader9.c

src/gallium/frontends/nine/pixelshader9.h

src/gallium/frontends/nine/query9.c

src/gallium/frontends/nine/query9.h

src/gallium/frontends/nine/resource9.c

src/gallium/frontends/nine/resource9.h

src/gallium/frontends/nine/stateblock9.c

src/gallium/frontends/nine/stateblock9.h

src/gallium/frontends/nine/surface9.c

src/gallium/frontends/nine/surface9.h

src/gallium/frontends/nine/swapchain9.c

src/gallium/frontends/nine/swapchain9.h

src/gallium/frontends/nine/swapchain9ex.c

src/gallium/frontends/nine/swapchain9ex.h

src/gallium/frontends/nine/texture9.c

src/gallium/frontends/nine/texture9.h

src/gallium/frontends/nine/threadpool.c

src/gallium/frontends/nine/threadpool.h

src/gallium/frontends/nine/vertexbuffer9.c

src/gallium/frontends/nine/vertexbuffer9.h

src/gallium/frontends/nine/vertexdeclaration9.c

src/gallium/frontends/nine/vertexdeclaration9.h

src/gallium/frontends/nine/vertexshader9.c

src/gallium/frontends/nine/vertexshader9.h

src/gallium/frontends/nine/volume9.c

src/gallium/frontends/nine/volume9.h

src/gallium/frontends/nine/volumetexture9.c

src/gallium/frontends/nine/volumetexture9.h

src/gallium/frontends/omx

src/gallium/frontends/omx/bellagio

src/gallium/frontends/omx/bellagio/entrypoint.c

src/gallium/frontends/omx/bellagio/entrypoint.h

src/gallium/frontends/omx/bellagio/vid_dec.c

src/gallium/frontends/omx/bellagio/vid_dec.h

src/gallium/frontends/omx/bellagio/vid_dec_av1.c

src/gallium/frontends/omx/bellagio/vid_dec_av1.h

src/gallium/frontends/omx/bellagio/vid_dec_h264.c

src/gallium/frontends/omx/bellagio/vid_dec_h265.c

src/gallium/frontends/omx/bellagio/vid_dec_mpeg12.c

src/gallium/frontends/omx/bellagio/vid_enc.c

src/gallium/frontends/omx/bellagio/vid_enc.h

src/gallium/frontends/omx/meson.build

src/gallium/frontends/omx/tizonia

src/gallium/frontends/omx/tizonia/entrypoint.c

src/gallium/frontends/omx/tizonia/entrypoint.h

src/gallium/frontends/omx/tizonia/h264d.c

src/gallium/frontends/omx/tizonia/h264d.h

src/gallium/frontends/omx/tizonia/h264dinport.c

src/gallium/frontends/omx/tizonia/h264dinport.h

src/gallium/frontends/omx/tizonia/h264dinport_decls.h

src/gallium/frontends/omx/tizonia/h264dprc.c

src/gallium/frontends/omx/tizonia/h264dprc.h

src/gallium/frontends/omx/tizonia/h264e.c

src/gallium/frontends/omx/tizonia/h264e.h

src/gallium/frontends/omx/tizonia/h264einport.c

src/gallium/frontends/omx/tizonia/h264einport.h

src/gallium/frontends/omx/tizonia/h264einport_decls.h

src/gallium/frontends/omx/tizonia/h264eoutport.c

src/gallium/frontends/omx/tizonia/h264eoutport.h

src/gallium/frontends/omx/tizonia/h264eoutport_decls.h

src/gallium/frontends/omx/tizonia/h264eprc.c

src/gallium/frontends/omx/tizonia/h264eprc.h

src/gallium/frontends/omx/tizonia/names.h

src/gallium/frontends/omx/vid_dec_common.c

src/gallium/frontends/omx/vid_dec_common.h

src/gallium/frontends/omx/vid_dec_h264_common.c

src/gallium/frontends/omx/vid_dec_h264_common.h

src/gallium/frontends/omx/vid_enc_common.c

src/gallium/frontends/omx/vid_enc_common.h

src/gallium/frontends/omx/vid_omx_common.c

src/gallium/frontends/omx/vid_omx_common.h

src/gallium/frontends/osmesa

src/gallium/frontends/osmesa/meson.build

src/gallium/frontends/osmesa/osmesa.c

src/gallium/frontends/va

src/gallium/frontends/va/buffer.c

src/gallium/frontends/va/config.c

src/gallium/frontends/va/context.c

src/gallium/frontends/va/display.c

src/gallium/frontends/va/image.c

src/gallium/frontends/va/meson.build

src/gallium/frontends/va/picture.c

src/gallium/frontends/va/picture_av1.c

src/gallium/frontends/va/picture_h264.c

src/gallium/frontends/va/picture_h264_enc.c

src/gallium/frontends/va/picture_hevc.c

src/gallium/frontends/va/picture_hevc_enc.c

src/gallium/frontends/va/picture_mjpeg.c

src/gallium/frontends/va/picture_mpeg12.c

src/gallium/frontends/va/picture_mpeg4.c

src/gallium/frontends/va/picture_vc1.c

src/gallium/frontends/va/picture_vp9.c

src/gallium/frontends/va/postproc.c

src/gallium/frontends/va/subpicture.c

src/gallium/frontends/va/surface.c

src/gallium/frontends/va/va_private.h

src/gallium/frontends/vdpau

src/gallium/frontends/vdpau/bitmap.c

src/gallium/frontends/vdpau/decode.c

src/gallium/frontends/vdpau/device.c

src/gallium/frontends/vdpau/ftab.c

src/gallium/frontends/vdpau/htab.c

src/gallium/frontends/vdpau/meson.build

src/gallium/frontends/vdpau/mixer.c

src/gallium/frontends/vdpau/output.c

src/gallium/frontends/vdpau/preemption.c

src/gallium/frontends/vdpau/presentation.c

src/gallium/frontends/vdpau/query.c

src/gallium/frontends/vdpau/surface.c

src/gallium/frontends/vdpau/vdpau_private.h

src/gallium/frontends/wgl

src/gallium/frontends/wgl/gldrv.h

src/gallium/frontends/wgl/meson.build

src/gallium/frontends/wgl/stw_context.c

src/gallium/frontends/wgl/stw_context.h

src/gallium/frontends/wgl/stw_device.c

src/gallium/frontends/wgl/stw_device.h

src/gallium/frontends/wgl/stw_ext_context.c

src/gallium/frontends/wgl/stw_ext_context.h

src/gallium/frontends/wgl/stw_ext_extensionsstring.c

src/gallium/frontends/wgl/stw_ext_pbuffer.c

src/gallium/frontends/wgl/stw_ext_pixelformat.c

src/gallium/frontends/wgl/stw_ext_rendertexture.c

src/gallium/frontends/wgl/stw_ext_swapinterval.c

src/gallium/frontends/wgl/stw_framebuffer.c

src/gallium/frontends/wgl/stw_framebuffer.h

src/gallium/frontends/wgl/stw_getprocaddress.c

src/gallium/frontends/wgl/stw_nopfuncs.c

src/gallium/frontends/wgl/stw_nopfuncs.h

src/gallium/frontends/wgl/stw_pixelformat.c

src/gallium/frontends/wgl/stw_pixelformat.h

src/gallium/frontends/wgl/stw_st.c

src/gallium/frontends/wgl/stw_st.h

src/gallium/frontends/wgl/stw_tls.c

src/gallium/frontends/wgl/stw_tls.h

src/gallium/frontends/wgl/stw_winsys.h

src/gallium/frontends/xa

src/gallium/frontends/xa/.editorconfig

src/gallium/frontends/xa/README

src/gallium/frontends/xa/meson.build

src/gallium/frontends/xa/xa-indent.sh

src/gallium/frontends/xa/xa_composite.c

src/gallium/frontends/xa/xa_composite.h

src/gallium/frontends/xa/xa_context.c

src/gallium/frontends/xa/xa_context.h

src/gallium/frontends/xa/xa_priv.h

src/gallium/frontends/xa/xa_renderer.c

src/gallium/frontends/xa/xa_tgsi.c

src/gallium/frontends/xa/xa_tracker.c

src/gallium/frontends/xa/xa_tracker.h.in

src/gallium/frontends/xa/xa_yuv.c

src/gallium/frontends/xvmc

src/gallium/frontends/xvmc/attributes.c

src/gallium/frontends/xvmc/block.c

src/gallium/frontends/xvmc/context.c

src/gallium/frontends/xvmc/meson.build

src/gallium/frontends/xvmc/subpicture.c

src/gallium/frontends/xvmc/surface.c

src/gallium/frontends/xvmc/tests

src/gallium/frontends/xvmc/tests/test_blocks.c

src/gallium/frontends/xvmc/tests/test_context.c

src/gallium/frontends/xvmc/tests/test_rendering.c

src/gallium/frontends/xvmc/tests/test_subpicture.c

src/gallium/frontends/xvmc/tests/test_surface.c

src/gallium/frontends/xvmc/tests/testlib.c

src/gallium/frontends/xvmc/tests/testlib.h

src/gallium/frontends/xvmc/tests/xvmc_bench.c

src/gallium/frontends/xvmc/xvmc_private.h

src/gallium/include

src/gallium/include/frontend

src/gallium/include/frontend/api.h

src/gallium/include/frontend/drisw_api.h

src/gallium/include/frontend/drm_driver.h

src/gallium/include/frontend/graw.h

src/gallium/include/frontend/opencl_interop.h

src/gallium/include/frontend/sw_driver.h

src/gallium/include/frontend/sw_winsys.h

src/gallium/include/frontend/vdpau_dmabuf.h

src/gallium/include/frontend/vdpau_funcs.h

src/gallium/include/frontend/vdpau_interop.h

src/gallium/include/frontend/winsys_handle.h

src/gallium/include/frontend/xlibsw_api.h

src/gallium/include/pipe

src/gallium/include/pipe/p_compiler.h

src/gallium/include/pipe/p_config.h

src/gallium/include/pipe/p_context.h

src/gallium/include/pipe/p_defines.h

src/gallium/include/pipe/p_format.h

src/gallium/include/pipe/p_screen.h

src/gallium/include/pipe/p_shader_tokens.h

src/gallium/include/pipe/p_state.h

src/gallium/include/pipe/p_video_codec.h

src/gallium/include/pipe/p_video_enums.h

src/gallium/include/pipe/p_video_state.h

src/gallium/meson.build

src/gallium/targets

src/gallium/targets/d3d10sw

src/gallium/targets/d3d10sw/README.md

src/gallium/targets/d3d10sw/d3d10_gdi.c

src/gallium/targets/d3d10sw/meson.build

src/gallium/targets/d3d10sw/tests

src/gallium/targets/d3d10sw/tests/tri.cpp

src/gallium/targets/d3d10sw/tests/tri_ps_4_0.h

src/gallium/targets/d3d10sw/tests/tri_vs_4_0.h

src/gallium/targets/d3dadapter9

src/gallium/targets/d3dadapter9/.editorconfig

src/gallium/targets/d3dadapter9/d3dadapter9.sym

src/gallium/targets/d3dadapter9/description.c

src/gallium/targets/d3dadapter9/drm.c

src/gallium/targets/d3dadapter9/getproc.c

src/gallium/targets/d3dadapter9/meson.build

src/gallium/targets/dri

src/gallium/targets/dri-vdpau.dyn

src/gallium/targets/dri/dri.sym

src/gallium/targets/dri/meson.build

src/gallium/targets/dri/target.c

src/gallium/targets/graw-gdi

src/gallium/targets/graw-gdi/graw_gdi.c

src/gallium/targets/graw-gdi/meson.build

src/gallium/targets/graw-null

src/gallium/targets/graw-null/graw_null.c

src/gallium/targets/graw-null/graw_util.c

src/gallium/targets/graw-null/meson.build

src/gallium/targets/graw-xlib

src/gallium/targets/graw-xlib/graw_xlib.c

src/gallium/targets/graw-xlib/meson.build

src/gallium/targets/haiku-softpipe

src/gallium/targets/haiku-softpipe/.editorconfig

src/gallium/targets/haiku-softpipe/GalliumContext.cpp

src/gallium/targets/haiku-softpipe/GalliumContext.h

src/gallium/targets/haiku-softpipe/SoftwareRenderer.cpp

src/gallium/targets/haiku-softpipe/SoftwareRenderer.h

src/gallium/targets/haiku-softpipe/SoftwareRenderer.rdef

src/gallium/targets/haiku-softpipe/meson.build

src/gallium/targets/lavapipe

src/gallium/targets/lavapipe/meson.build

src/gallium/targets/lavapipe/target.c

src/gallium/targets/lavapipe/vulkan_lvp.def

src/gallium/targets/lavapipe/vulkan_lvp.mingw.def

src/gallium/targets/libgl-d3d12

src/gallium/targets/libgl-d3d12/libgl_d3d12.c

src/gallium/targets/libgl-d3d12/meson.build

src/gallium/targets/libgl-d3d12/openglon12.def

src/gallium/targets/libgl-gdi

src/gallium/targets/libgl-gdi/meson.build

src/gallium/targets/libgl-gdi/opengl32.def

src/gallium/targets/libgl-gdi/opengl32.mingw.def

src/gallium/targets/libgl-gdi/stw_wgl.c

src/gallium/targets/libgl-gdi/stw_wgl.h

src/gallium/targets/libgl-gdi/tests

src/gallium/targets/libgl-gdi/tests/wgl_tests.cpp

src/gallium/targets/libgl-xlib

src/gallium/targets/libgl-xlib/libgl-xlib.sym

src/gallium/targets/libgl-xlib/meson.build

src/gallium/targets/libgl-xlib/xlib.c

src/gallium/targets/omx

src/gallium/targets/omx/meson.build

src/gallium/targets/omx/omx.sym

src/gallium/targets/omx/target.c

src/gallium/targets/opencl

src/gallium/targets/opencl/MesaOpenCL.def

src/gallium/targets/opencl/OpenCL.def

src/gallium/targets/opencl/mesa.icd.in

src/gallium/targets/opencl/meson.build

src/gallium/targets/opencl/opencl.sym

src/gallium/targets/osmesa

src/gallium/targets/osmesa/meson.build

src/gallium/targets/osmesa/osmesa.def

src/gallium/targets/osmesa/osmesa.mingw.def

src/gallium/targets/osmesa/osmesa.sym

src/gallium/targets/osmesa/target.c

src/gallium/targets/osmesa/test-render.cpp

src/gallium/targets/pipe-loader

src/gallium/targets/pipe-loader/meson.build

src/gallium/targets/pipe-loader/pipe.sym

src/gallium/targets/pipe-loader/pipe_crocus.c

src/gallium/targets/pipe-loader/pipe_i915.c

src/gallium/targets/pipe-loader/pipe_iris.c

src/gallium/targets/pipe-loader/pipe_kmsro.c

src/gallium/targets/pipe-loader/pipe_msm.c

src/gallium/targets/pipe-loader/pipe_nouveau.c

src/gallium/targets/pipe-loader/pipe_r300.c

src/gallium/targets/pipe-loader/pipe_r600.c

src/gallium/targets/pipe-loader/pipe_radeonsi.c

src/gallium/targets/pipe-loader/pipe_swrast.c

src/gallium/targets/pipe-loader/pipe_vmwgfx.c

src/gallium/targets/va

src/gallium/targets/va/meson.build

src/gallium/targets/va/target.c

src/gallium/targets/va/va.sym

src/gallium/targets/vdpau

src/gallium/targets/vdpau/meson.build

src/gallium/targets/vdpau/target.c

src/gallium/targets/vdpau/vdpau.sym

src/gallium/targets/wgl

src/gallium/targets/wgl/gallium_wgl.def

src/gallium/targets/wgl/gallium_wgl.mingw.def

src/gallium/targets/wgl/meson.build

src/gallium/targets/wgl/wgl.c

src/gallium/targets/xa

src/gallium/targets/xa/meson.build

src/gallium/targets/xa/target.c

src/gallium/targets/xa/xa.sym

src/gallium/targets/xvmc

src/gallium/targets/xvmc/meson.build

src/gallium/targets/xvmc/target.c

src/gallium/targets/xvmc/xvmc.sym

src/gallium/tests

src/gallium/tests/graw

src/gallium/tests/graw/clear.c

src/gallium/tests/graw/disasm.c

src/gallium/tests/graw/fragment-shader

src/gallium/tests/graw/fragment-shader/frag-abs.sh

src/gallium/tests/graw/fragment-shader/frag-add.sh

src/gallium/tests/graw/fragment-shader/frag-cb-1d.sh

src/gallium/tests/graw/fragment-shader/frag-cb-2d.sh

src/gallium/tests/graw/fragment-shader/frag-dp3.sh

src/gallium/tests/graw/fragment-shader/frag-dp4.sh

src/gallium/tests/graw/fragment-shader/frag-dst.sh

src/gallium/tests/graw/fragment-shader/frag-ex2.sh

src/gallium/tests/graw/fragment-shader/frag-face.sh

src/gallium/tests/graw/fragment-shader/frag-flr.sh

src/gallium/tests/graw/fragment-shader/frag-frc.sh

src/gallium/tests/graw/fragment-shader/frag-kil.sh

src/gallium/tests/graw/fragment-shader/frag-lg2.sh

src/gallium/tests/graw/fragment-shader/frag-lit.sh

src/gallium/tests/graw/fragment-shader/frag-lrp.sh

src/gallium/tests/graw/fragment-shader/frag-mad-immx.sh

src/gallium/tests/graw/fragment-shader/frag-mad.sh

src/gallium/tests/graw/fragment-shader/frag-max.sh

src/gallium/tests/graw/fragment-shader/frag-min.sh

src/gallium/tests/graw/fragment-shader/frag-mov.sh

src/gallium/tests/graw/fragment-shader/frag-mul.sh

src/gallium/tests/graw/fragment-shader/frag-rcp.sh

src/gallium/tests/graw/fragment-shader/frag-rsq.sh

src/gallium/tests/graw/fragment-shader/frag-sge.sh

src/gallium/tests/graw/fragment-shader/frag-slt.sh

src/gallium/tests/graw/fragment-shader/frag-srcmod-abs.sh

src/gallium/tests/graw/fragment-shader/frag-srcmod-absneg.sh

src/gallium/tests/graw/fragment-shader/frag-srcmod-neg.sh

src/gallium/tests/graw/fragment-shader/frag-srcmod-swz.sh

src/gallium/tests/graw/fragment-shader/frag-sub.sh

src/gallium/tests/graw/fragment-shader/frag-tempx.sh

src/gallium/tests/graw/fragment-shader/frag-ucmp.sh

src/gallium/tests/graw/fragment-shader/frag-xpd.sh

src/gallium/tests/graw/fs-fragcoord.c

src/gallium/tests/graw/fs-frontface.c

src/gallium/tests/graw/fs-test.c

src/gallium/tests/graw/fs-write-z.c

src/gallium/tests/graw/geometry-shader

src/gallium/tests/graw/geometry-shader/add-mix.txt

src/gallium/tests/graw/geometry-shader/add.txt

src/gallium/tests/graw/geometry-shader/line.txt

src/gallium/tests/graw/geometry-shader/mov-cb-2d.txt

src/gallium/tests/graw/geometry-shader/mov.txt

src/gallium/tests/graw/geometry-shader/multi-line.txt

src/gallium/tests/graw/graw_util.h

src/gallium/tests/graw/gs-test.c

src/gallium/tests/graw/meson.build

src/gallium/tests/graw/occlusion-query.c

src/gallium/tests/graw/quad-sample.c

src/gallium/tests/graw/quad-tex.c

src/gallium/tests/graw/shader-leak.c

src/gallium/tests/graw/tex-srgb.c

src/gallium/tests/graw/tex-swizzle.c

src/gallium/tests/graw/tgsi_dump.gdb

src/gallium/tests/graw/tri-gs.c

src/gallium/tests/graw/tri-instanced.c

src/gallium/tests/graw/tri-large.c

src/gallium/tests/graw/tri.c

src/gallium/tests/graw/vertex-shader

src/gallium/tests/graw/vertex-shader/vert-abs.sh

src/gallium/tests/graw/vertex-shader/vert-add.sh

src/gallium/tests/graw/vertex-shader/vert-arl.sh

src/gallium/tests/graw/vertex-shader/vert-arr.sh

src/gallium/tests/graw/vertex-shader/vert-cb-1d.sh

src/gallium/tests/graw/vertex-shader/vert-cb-2d.sh

src/gallium/tests/graw/vertex-shader/vert-dp3.sh

src/gallium/tests/graw/vertex-shader/vert-dp4.sh

src/gallium/tests/graw/vertex-shader/vert-dst.sh

src/gallium/tests/graw/vertex-shader/vert-ex2.sh

src/gallium/tests/graw/vertex-shader/vert-flr.sh

src/gallium/tests/graw/vertex-shader/vert-frc.sh

src/gallium/tests/graw/vertex-shader/vert-imul_hi.sh

src/gallium/tests/graw/vertex-shader/vert-lg2.sh

src/gallium/tests/graw/vertex-shader/vert-lit.sh

src/gallium/tests/graw/vertex-shader/vert-lrp.sh

src/gallium/tests/graw/vertex-shader/vert-mad.sh

src/gallium/tests/graw/vertex-shader/vert-max.sh

src/gallium/tests/graw/vertex-shader/vert-min.sh

src/gallium/tests/graw/vertex-shader/vert-mov.sh

src/gallium/tests/graw/vertex-shader/vert-mul.sh

src/gallium/tests/graw/vertex-shader/vert-rcp.sh

src/gallium/tests/graw/vertex-shader/vert-rsq.sh

src/gallium/tests/graw/vertex-shader/vert-sge.sh

src/gallium/tests/graw/vertex-shader/vert-slt.sh

src/gallium/tests/graw/vertex-shader/vert-srcmod-abs.sh

src/gallium/tests/graw/vertex-shader/vert-srcmod-absneg.sh

src/gallium/tests/graw/vertex-shader/vert-srcmod-neg.sh

src/gallium/tests/graw/vertex-shader/vert-srcmod-swz.sh

src/gallium/tests/graw/vertex-shader/vert-sub.sh

src/gallium/tests/graw/vertex-shader/vert-uadd.sh

src/gallium/tests/graw/vertex-shader/vert-umul_hi.sh

src/gallium/tests/graw/vertex-shader/vert-xpd.sh

src/gallium/tests/graw/vs-test.c

src/gallium/tests/meson.build

src/gallium/tests/python

src/gallium/tests/python/tests

src/gallium/tests/python/tests/regress

src/gallium/tests/python/tests/regress/fragment-shader

src/gallium/tests/python/tests/regress/fragment-shader/frag-cmp.sh

src/gallium/tests/trivial

src/gallium/tests/trivial/compute.c

src/gallium/tests/trivial/meson.build

src/gallium/tests/trivial/quad-tex.c

src/gallium/tests/trivial/tri.c

src/gallium/tests/unit

src/gallium/tests/unit/meson.build

src/gallium/tests/unit/pipe_barrier_test.c

src/gallium/tests/unit/translate_test.c

src/gallium/tests/unit/u_cache_test.c

src/gallium/tests/unit/u_half_test.c

src/gallium/tests/unit/u_prim_verts_test.c

src/gallium/tools

src/gallium/tools/addr2line.sh

src/gallium/tools/trace

src/gallium/tools/trace/README.txt

src/gallium/tools/trace/TODO.txt

src/gallium/tools/trace/diff_state.py

src/gallium/tools/trace/dump.py

src/gallium/tools/trace/dump_state.py

src/gallium/tools/trace/format.py

src/gallium/tools/trace/model.py

src/gallium/tools/trace/parse.py

src/gallium/tools/trace/tracediff.sh

src/gallium/winsys

src/gallium/winsys/amdgpu

src/gallium/winsys/amdgpu/drm

src/gallium/winsys/amdgpu/drm/amdgpu_bo.c

src/gallium/winsys/amdgpu/drm/amdgpu_bo.h

src/gallium/winsys/amdgpu/drm/amdgpu_cs.c

src/gallium/winsys/amdgpu/drm/amdgpu_cs.h

src/gallium/winsys/amdgpu/drm/amdgpu_public.h

src/gallium/winsys/amdgpu/drm/amdgpu_surface.c

src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c

src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h

src/gallium/winsys/amdgpu/drm/meson.build

src/gallium/winsys/crocus

src/gallium/winsys/crocus/drm

src/gallium/winsys/crocus/drm/crocus_drm_public.h

src/gallium/winsys/crocus/drm/crocus_drm_winsys.c

src/gallium/winsys/crocus/drm/meson.build

src/gallium/winsys/d3d12

src/gallium/winsys/d3d12/wgl

src/gallium/winsys/d3d12/wgl/d3d12_wgl_framebuffer.cpp

src/gallium/winsys/d3d12/wgl/d3d12_wgl_public.h

src/gallium/winsys/d3d12/wgl/d3d12_wgl_winsys.c

src/gallium/winsys/d3d12/wgl/meson.build

src/gallium/winsys/etnaviv

src/gallium/winsys/etnaviv/drm

src/gallium/winsys/etnaviv/drm/etnaviv_drm_public.h

src/gallium/winsys/etnaviv/drm/etnaviv_drm_winsys.c

src/gallium/winsys/etnaviv/drm/meson.build

src/gallium/winsys/freedreno

src/gallium/winsys/freedreno/drm

src/gallium/winsys/freedreno/drm/.editorconfig

src/gallium/winsys/freedreno/drm/freedreno_drm_public.h

src/gallium/winsys/freedreno/drm/freedreno_drm_winsys.c

src/gallium/winsys/freedreno/drm/meson.build

src/gallium/winsys/i915

src/gallium/winsys/i915/drm

src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c

src/gallium/winsys/i915/drm/i915_drm_buffer.c

src/gallium/winsys/i915/drm/i915_drm_fence.c

src/gallium/winsys/i915/drm/i915_drm_public.h

src/gallium/winsys/i915/drm/i915_drm_winsys.c

src/gallium/winsys/i915/drm/i915_drm_winsys.h

src/gallium/winsys/i915/drm/meson.build

src/gallium/winsys/iris

src/gallium/winsys/iris/drm

src/gallium/winsys/iris/drm/iris_drm_public.h

src/gallium/winsys/iris/drm/iris_drm_winsys.c

src/gallium/winsys/iris/drm/meson.build

src/gallium/winsys/kmsro

src/gallium/winsys/kmsro/drm

src/gallium/winsys/kmsro/drm/kmsro_drm_public.h

src/gallium/winsys/kmsro/drm/kmsro_drm_winsys.c

src/gallium/winsys/kmsro/drm/meson.build

src/gallium/winsys/lima

src/gallium/winsys/lima/drm

src/gallium/winsys/lima/drm/lima_drm_public.h

src/gallium/winsys/lima/drm/lima_drm_winsys.c

src/gallium/winsys/lima/drm/meson.build

src/gallium/winsys/nouveau

src/gallium/winsys/nouveau/drm

src/gallium/winsys/nouveau/drm/.editorconfig

src/gallium/winsys/nouveau/drm/meson.build

src/gallium/winsys/nouveau/drm/nouveau_drm_public.h

src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c

src/gallium/winsys/panfrost

src/gallium/winsys/panfrost/drm

src/gallium/winsys/panfrost/drm/meson.build

src/gallium/winsys/panfrost/drm/panfrost_drm_public.h

src/gallium/winsys/panfrost/drm/panfrost_drm_winsys.c

src/gallium/winsys/radeon

src/gallium/winsys/radeon/.dir-locals.el

src/gallium/winsys/radeon/drm

src/gallium/winsys/radeon/drm/meson.build

src/gallium/winsys/radeon/drm/radeon_drm_bo.c

src/gallium/winsys/radeon/drm/radeon_drm_bo.h

src/gallium/winsys/radeon/drm/radeon_drm_cs.c

src/gallium/winsys/radeon/drm/radeon_drm_cs.h

src/gallium/winsys/radeon/drm/radeon_drm_public.h

src/gallium/winsys/radeon/drm/radeon_drm_surface.c

src/gallium/winsys/radeon/drm/radeon_drm_winsys.c

src/gallium/winsys/radeon/drm/radeon_drm_winsys.h

src/gallium/winsys/svga

src/gallium/winsys/svga/drm

src/gallium/winsys/svga/drm/meson.build

src/gallium/winsys/svga/drm/pb_buffer_simple_fenced.c

src/gallium/winsys/svga/drm/svga_drm_public.h

src/gallium/winsys/svga/drm/vmw_buffer.c

src/gallium/winsys/svga/drm/vmw_buffer.h

src/gallium/winsys/svga/drm/vmw_context.c

src/gallium/winsys/svga/drm/vmw_context.h

src/gallium/winsys/svga/drm/vmw_fence.c

src/gallium/winsys/svga/drm/vmw_fence.h

src/gallium/winsys/svga/drm/vmw_msg.c

src/gallium/winsys/svga/drm/vmw_msg.h

src/gallium/winsys/svga/drm/vmw_query.c

src/gallium/winsys/svga/drm/vmw_query.h

src/gallium/winsys/svga/drm/vmw_screen.c

src/gallium/winsys/svga/drm/vmw_screen.h

src/gallium/winsys/svga/drm/vmw_screen_dri.c

src/gallium/winsys/svga/drm/vmw_screen_ioctl.c

src/gallium/winsys/svga/drm/vmw_screen_pools.c

src/gallium/winsys/svga/drm/vmw_screen_svga.c

src/gallium/winsys/svga/drm/vmw_shader.c

src/gallium/winsys/svga/drm/vmw_shader.h

src/gallium/winsys/svga/drm/vmw_surface.c

src/gallium/winsys/svga/drm/vmw_surface.h

src/gallium/winsys/svga/drm/vmwgfx_drm.h

src/gallium/winsys/sw

src/gallium/winsys/sw/dri

src/gallium/winsys/sw/dri/dri_sw_winsys.c

src/gallium/winsys/sw/dri/dri_sw_winsys.h

src/gallium/winsys/sw/dri/meson.build

src/gallium/winsys/sw/gdi

src/gallium/winsys/sw/gdi/gdi_sw_winsys.c

src/gallium/winsys/sw/gdi/gdi_sw_winsys.h

src/gallium/winsys/sw/gdi/meson.build

src/gallium/winsys/sw/hgl

src/gallium/winsys/sw/hgl/.editorconfig

src/gallium/winsys/sw/hgl/hgl_sw_winsys.cpp

src/gallium/winsys/sw/hgl/hgl_sw_winsys.h

src/gallium/winsys/sw/hgl/meson.build

src/gallium/winsys/sw/kms-dri

src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c

src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.h

src/gallium/winsys/sw/kms-dri/meson.build

src/gallium/winsys/sw/null

src/gallium/winsys/sw/null/meson.build

src/gallium/winsys/sw/null/null_sw_winsys.c

src/gallium/winsys/sw/null/null_sw_winsys.h

src/gallium/winsys/sw/wrapper

src/gallium/winsys/sw/wrapper/meson.build

src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c

src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.h

src/gallium/winsys/sw/xlib

src/gallium/winsys/sw/xlib/meson.build

src/gallium/winsys/sw/xlib/xlib_sw_winsys.c

src/gallium/winsys/sw/xlib/xlib_sw_winsys.h

src/gallium/winsys/tegra

src/gallium/winsys/tegra/drm

src/gallium/winsys/tegra/drm/meson.build

src/gallium/winsys/tegra/drm/tegra_drm_public.h

src/gallium/winsys/tegra/drm/tegra_drm_winsys.c

src/gallium/winsys/v3d

src/gallium/winsys/v3d/drm

src/gallium/winsys/v3d/drm/meson.build

src/gallium/winsys/v3d/drm/v3d_drm_public.h

src/gallium/winsys/v3d/drm/v3d_drm_winsys.c

src/gallium/winsys/vc4

src/gallium/winsys/vc4/drm

src/gallium/winsys/vc4/drm/meson.build

src/gallium/winsys/vc4/drm/vc4_drm_public.h

src/gallium/winsys/vc4/drm/vc4_drm_winsys.c

src/gallium/winsys/virgl

src/gallium/winsys/virgl/common

src/gallium/winsys/virgl/common/meson.build

src/gallium/winsys/virgl/common/virgl_resource_cache.c

src/gallium/winsys/virgl/common/virgl_resource_cache.h

src/gallium/winsys/virgl/drm

src/gallium/winsys/virgl/drm/meson.build

src/gallium/winsys/virgl/drm/virgl_drm_public.h

src/gallium/winsys/virgl/drm/virgl_drm_winsys.c

src/gallium/winsys/virgl/drm/virgl_drm_winsys.h

src/gallium/winsys/virgl/vtest

src/gallium/winsys/virgl/vtest/meson.build

src/gallium/winsys/virgl/vtest/virgl_vtest_public.h

src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c

src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c

src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.h

src/gbm

src/gbm/backends

src/gbm/backends/dri

src/gbm/backends/dri/gbm_dri.c

src/gbm/backends/dri/gbm_driint.h

src/gbm/gbm-symbols.txt

src/gbm/main

src/gbm/main/backend.c

src/gbm/main/backend.h

src/gbm/main/gbm.c

src/gbm/main/gbm.h

src/gbm/main/gbm_abi_check.c

src/gbm/main/gbm_backend_abi.h

src/gbm/main/gbmint.h

src/gbm/meson.build

src/getopt

src/getopt/.editorconfig

src/getopt/getopt.h

src/getopt/getopt_long.c

src/getopt/meson.build

src/glx

src/glx/apple

src/glx/apple/RELEASE_NOTES

src/glx/apple/TODO

src/glx/apple/apple_cgl.c

src/glx/apple/apple_cgl.h

src/glx/apple/apple_glapi.c

src/glx/apple/apple_glx.c

src/glx/apple/apple_glx.h

src/glx/apple/apple_glx_context.c

src/glx/apple/apple_glx_context.h

src/glx/apple/apple_glx_drawable.c

src/glx/apple/apple_glx_drawable.h

src/glx/apple/apple_glx_log.c

src/glx/apple/apple_glx_log.h

src/glx/apple/apple_glx_pbuffer.c

src/glx/apple/apple_glx_pixmap.c

src/glx/apple/apple_glx_surface.c

src/glx/apple/apple_visual.c

src/glx/apple/apple_visual.h

src/glx/apple/apple_xgl_api.h

src/glx/apple/apple_xgl_api_read.c

src/glx/apple/apple_xgl_api_stereo.c

src/glx/apple/apple_xgl_api_viewport.c

src/glx/apple/appledri.c

src/glx/apple/appledri.h

src/glx/apple/appledristr.h

src/glx/apple/glx_empty.c

src/glx/apple/meson.build

src/glx/applegl_glx.c

src/glx/clientattrib.c

src/glx/clientinfo.c

src/glx/compsize.c

src/glx/create_context.c

src/glx/dri2.c

src/glx/dri2.h

src/glx/dri2_glx.c

src/glx/dri2_priv.h

src/glx/dri3_glx.c

src/glx/dri3_priv.h

src/glx/dri_common.c

src/glx/dri_common.h

src/glx/dri_common_interop.c

src/glx/dri_common_query_renderer.c

src/glx/drisw_glx.c

src/glx/drisw_priv.h

src/glx/driwindows_glx.c

src/glx/eval.c

src/glx/g_glxglvnddispatchfuncs.c

src/glx/g_glxglvnddispatchindices.h

src/glx/glx_error.c

src/glx/glx_error.h

src/glx/glx_pbuffer.c

src/glx/glx_query.c

src/glx/glxclient.h

src/glx/glxcmds.c

src/glx/glxconfig.c

src/glx/glxconfig.h

src/glx/glxcurrent.c

src/glx/glxext.c

src/glx/glxextensions.c

src/glx/glxextensions.h

src/glx/glxglvnd.c

src/glx/glxglvnd.h

src/glx/glxglvnddispatchfuncs.h

src/glx/glxhash.c

src/glx/glxhash.h

src/glx/indirect_glx.c

src/glx/indirect_init.h

src/glx/indirect_texture_compression.c

src/glx/indirect_transpose_matrix.c

src/glx/indirect_vertex_array.c

src/glx/indirect_vertex_array.h

src/glx/indirect_vertex_array_priv.h

src/glx/indirect_vertex_program.c

src/glx/indirect_window_pos.c

src/glx/meson.build

src/glx/packrender.h

src/glx/packsingle.h

src/glx/pixel.c

src/glx/pixelstore.c

src/glx/query_renderer.c

src/glx/render2.c

src/glx/renderpix.c

src/glx/single2.c

src/glx/singlepix.c

src/glx/tests

src/glx/tests/clientinfo_unittest.cpp

src/glx/tests/create_context_unittest.cpp

src/glx/tests/dispatch-index-check

src/glx/tests/enum_sizes.cpp

src/glx/tests/fake_glx_screen.cpp

src/glx/tests/fake_glx_screen.h

src/glx/tests/indirect_api.cpp

src/glx/tests/meson.build

src/glx/tests/mock_xdisplay.h

src/glx/tests/query_renderer_implementation_unittest.cpp

src/glx/tests/query_renderer_unittest.cpp

src/glx/vertarr.c

src/glx/windows

src/glx/windows/meson.build

src/glx/windows/wgl.c

src/glx/windows/wgl.h

src/glx/windows/windows_drawable.c

src/glx/windows/windowsdriconst.h

src/glx/windows/windowsdristr.h

src/glx/windows/windowsgl.c

src/glx/windows/windowsgl.h

src/glx/windows/windowsgl_internal.h

src/glx/windows/xwindowsdri.c

src/glx/windows/xwindowsdri.h

src/glx/xfont.c

src/gtest

src/gtest/.editorconfig

src/gtest/include

src/gtest/include/gtest

src/gtest/include/gtest/gtest-death-test.h

src/gtest/include/gtest/gtest-matchers.h

src/gtest/include/gtest/gtest-message.h

src/gtest/include/gtest/gtest-param-test.h

src/gtest/include/gtest/gtest-printers.h

src/gtest/include/gtest/gtest-spi.h

src/gtest/include/gtest/gtest-test-part.h

src/gtest/include/gtest/gtest-typed-test.h

src/gtest/include/gtest/gtest.h

src/gtest/include/gtest/gtest_pred_impl.h

src/gtest/include/gtest/gtest_prod.h

src/gtest/include/gtest/internal

src/gtest/include/gtest/internal/custom

src/gtest/include/gtest/internal/custom/README.md

src/gtest/include/gtest/internal/custom/gtest-port.h

src/gtest/include/gtest/internal/custom/gtest-printers.h

src/gtest/include/gtest/internal/custom/gtest.h

src/gtest/include/gtest/internal/gtest-death-test-internal.h

src/gtest/include/gtest/internal/gtest-filepath.h

src/gtest/include/gtest/internal/gtest-internal.h

src/gtest/include/gtest/internal/gtest-param-util.h

src/gtest/include/gtest/internal/gtest-port-arch.h

src/gtest/include/gtest/internal/gtest-port.h

src/gtest/include/gtest/internal/gtest-string.h

src/gtest/include/gtest/internal/gtest-type-util.h

src/gtest/include/gtest/internal/gtest-type-util.h.pump

src/gtest/include/mesa-gtest-extras.h

src/gtest/meson.build

src/gtest/src

src/gtest/src/gtest-all.cc

src/gtest/src/gtest-death-test.cc

src/gtest/src/gtest-filepath.cc

src/gtest/src/gtest-internal-inl.h

src/gtest/src/gtest-matchers.cc

src/gtest/src/gtest-port.cc

src/gtest/src/gtest-printers.cc

src/gtest/src/gtest-test-part.cc

src/gtest/src/gtest-typed-test.cc

src/gtest/src/gtest.cc

src/gtest/src/gtest_main.cc

src/hgl

src/hgl/.editorconfig

src/hgl/GLRenderer.cpp

src/hgl/GLRendererRoster.cpp

src/hgl/GLRendererRoster.h

src/hgl/GLView.cpp

src/hgl/meson.build

src/imagination

src/imagination/.clang-format

src/imagination/.dir-locals.el

src/imagination/.editorconfig

src/imagination/common

src/imagination/common/meson.build

src/imagination/common/pvr_device_info.c

src/imagination/common/pvr_device_info.h

src/imagination/csbgen

src/imagination/csbgen/gen_pack_header.py

src/imagination/csbgen/meson.build

src/imagination/csbgen/pvr_packet_helpers.h

src/imagination/csbgen/rogue_cdm.xml

src/imagination/csbgen/rogue_cr.xml

src/imagination/csbgen/rogue_hwdefs.h

src/imagination/csbgen/rogue_ipf.xml

src/imagination/csbgen/rogue_lls.xml

src/imagination/csbgen/rogue_pbestate.xml

src/imagination/csbgen/rogue_pds.xml

src/imagination/csbgen/rogue_ppp.xml

src/imagination/csbgen/rogue_texstate.xml

src/imagination/csbgen/rogue_vdm.xml

src/imagination/include

src/imagination/include/hwdef

src/imagination/include/hwdef/rogue_hw_defs.h

src/imagination/include/hwdef/rogue_hw_utils.h

src/imagination/include/pvr_rogue_fw.h

src/imagination/meson.build

src/imagination/rogue

src/imagination/rogue/meson.build

src/imagination/rogue/nir

src/imagination/rogue/nir/rogue_nir_constreg.c

src/imagination/rogue/nir/rogue_nir_lower_io.c

src/imagination/rogue/nir/rogue_nir_pfo.c

src/imagination/rogue/rogue.c

src/imagination/rogue/rogue.h

src/imagination/rogue/rogue_build_data.c

src/imagination/rogue/rogue_build_data.h

src/imagination/rogue/rogue_compiler.c

src/imagination/rogue/rogue_compiler.h

src/imagination/rogue/rogue_constreg.c

src/imagination/rogue/rogue_constreg.h

src/imagination/rogue/rogue_dump.c

src/imagination/rogue/rogue_dump.h

src/imagination/rogue/rogue_encode.c

src/imagination/rogue/rogue_encode.h

src/imagination/rogue/rogue_encoders.c

src/imagination/rogue/rogue_encoders.h

src/imagination/rogue/rogue_instr.c

src/imagination/rogue/rogue_instr.h

src/imagination/rogue/rogue_nir.c

src/imagination/rogue/rogue_nir.h

src/imagination/rogue/rogue_nir_helpers.h

src/imagination/rogue/rogue_operand.c

src/imagination/rogue/rogue_operand.h

src/imagination/rogue/rogue_regalloc.c

src/imagination/rogue/rogue_regalloc.h

src/imagination/rogue/rogue_shader.c

src/imagination/rogue/rogue_shader.h

src/imagination/rogue/rogue_util.c

src/imagination/rogue/rogue_util.h

src/imagination/rogue/rogue_validate.c

src/imagination/rogue/rogue_validate.h

src/imagination/rogue/tools

src/imagination/rogue/tools/offline_compiler.c

src/imagination/vulkan

src/imagination/vulkan/meson.build

src/imagination/vulkan/pds

src/imagination/vulkan/pds/meson.build

src/imagination/vulkan/pds/pvr_pds.c

src/imagination/vulkan/pds/pvr_pds.h

src/imagination/vulkan/pds/pvr_pds_disasm.c

src/imagination/vulkan/pds/pvr_pds_printer.c

src/imagination/vulkan/pds/pvr_pds_programs

src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays0.h

src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays1.h

src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays2.h

src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays3.h

src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance0.h

src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance1.h

src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance2.h

src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance3.h

src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance_drawid0.h

src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance_drawid1.h

src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance_drawid2.h

src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance_drawid3.h

src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements0.h

src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements1.h

src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements2.h

src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements3.h

src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance0.h

src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance1.h

src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance2.h

src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance3.h

src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance_drawid0.h

src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance_drawid1.h

src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance_drawid2.h

src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance_drawid3.h

src/imagination/vulkan/pds/pvr_rogue_pds_defs.h

src/imagination/vulkan/pds/pvr_rogue_pds_disasm.h

src/imagination/vulkan/pds/pvr_rogue_pds_encode.h

src/imagination/vulkan/pds/pvr_xgl_pds.c

src/imagination/vulkan/pvr_blit.c

src/imagination/vulkan/pvr_bo.c

src/imagination/vulkan/pvr_bo.h

src/imagination/vulkan/pvr_cmd_buffer.c

src/imagination/vulkan/pvr_csb.c

src/imagination/vulkan/pvr_csb.h

src/imagination/vulkan/pvr_descriptor_set.c

src/imagination/vulkan/pvr_device.c

src/imagination/vulkan/pvr_formats.c

src/imagination/vulkan/pvr_formats.h

src/imagination/vulkan/pvr_hw_pass.c

src/imagination/vulkan/pvr_hw_pass.h

src/imagination/vulkan/pvr_image.c

src/imagination/vulkan/pvr_job_common.c

src/imagination/vulkan/pvr_job_common.h

src/imagination/vulkan/pvr_job_compute.c

src/imagination/vulkan/pvr_job_compute.h

src/imagination/vulkan/pvr_job_context.c

src/imagination/vulkan/pvr_job_context.h

src/imagination/vulkan/pvr_job_render.c

src/imagination/vulkan/pvr_job_render.h

src/imagination/vulkan/pvr_limits.h

src/imagination/vulkan/pvr_pass.c

src/imagination/vulkan/pvr_pipeline.c

src/imagination/vulkan/pvr_pipeline_cache.c

src/imagination/vulkan/pvr_private.h

src/imagination/vulkan/pvr_query.c

src/imagination/vulkan/pvr_queue.c

src/imagination/vulkan/pvr_shader.c

src/imagination/vulkan/pvr_shader.h

src/imagination/vulkan/pvr_tex_state.c

src/imagination/vulkan/pvr_tex_state.h

src/imagination/vulkan/pvr_wsi.c

src/imagination/vulkan/usc

src/imagination/vulkan/usc/programs

src/imagination/vulkan/usc/programs/pvr_cdm_load_sr.h

src/imagination/vulkan/usc/programs/pvr_end_of_tile.h

src/imagination/vulkan/usc/programs/pvr_usc_compute_shader.h

src/imagination/vulkan/usc/programs/pvr_usc_fragment_shader.h

src/imagination/vulkan/usc/programs/pvr_vdm_load_sr.h

src/imagination/vulkan/usc/programs/pvr_vdm_store_sr.h

src/imagination/vulkan/vk_format.h

src/imagination/vulkan/winsys

src/imagination/vulkan/winsys/powervr

src/imagination/vulkan/winsys/powervr/pvr_drm.c

src/imagination/vulkan/winsys/powervr/pvr_drm_public.h

src/imagination/vulkan/winsys/pvr_winsys.c

src/imagination/vulkan/winsys/pvr_winsys.h

src/imagination/vulkan/winsys/pvr_winsys_helper.c

src/imagination/vulkan/winsys/pvr_winsys_helper.h

src/imagination/vulkan/winsys/pvrsrvkm

src/imagination/vulkan/winsys/pvrsrvkm/fw-api

src/imagination/vulkan/winsys/pvrsrvkm/fw-api/pvr_rogue_fwif.h

src/imagination/vulkan/winsys/pvrsrvkm/fw-api/pvr_rogue_fwif_rf.h

src/imagination/vulkan/winsys/pvrsrvkm/fw-api/pvr_rogue_fwif_shared.h

src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv.c

src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv.h

src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_bo.c

src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_bo.h

src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_bridge.c

src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_bridge.h

src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_common.h

src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_compute.c

src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_compute.h

src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_render.c

src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_render.h

src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_public.h

src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_syncobj.c

src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_syncobj.h

src/imgui

src/imgui/LICENSE.txt

src/imgui/README

src/imgui/imconfig.h

src/imgui/imgui.cpp

src/imgui/imgui.h

src/imgui/imgui_draw.cpp

src/imgui/imgui_internal.h

src/imgui/imgui_memory_editor.h

src/imgui/imgui_widgets.cpp

src/imgui/imstb_rectpack.h

src/imgui/imstb_textedit.h

src/imgui/imstb_truetype.h

src/imgui/meson.build

src/intel

src/intel/blorp

src/intel/blorp/TODO

src/intel/blorp/blorp.c

src/intel/blorp/blorp.h

src/intel/blorp/blorp_blit.c

src/intel/blorp/blorp_clear.c

src/intel/blorp/blorp_genX_exec.h

src/intel/blorp/blorp_nir_builder.h

src/intel/blorp/blorp_priv.h

src/intel/blorp/meson.build

src/intel/ci

src/intel/ci/anv-skips.txt

src/intel/ci/anv-tgl-fails.txt

src/intel/ci/anv-tgl-skips.txt

src/intel/ci/deqp-anv-tgl-vk.toml

src/intel/ci/gitlab-ci.yml

src/intel/common

src/intel/common/intel_aux_map.c

src/intel/common/intel_aux_map.h

src/intel/common/intel_batch_decoder.c

src/intel/common/intel_batch_decoder_stub.c

src/intel/common/intel_buffer_alloc.h

src/intel/common/intel_clflush.h

src/intel/common/intel_decoder.c

src/intel/common/intel_decoder.h

src/intel/common/intel_defines.h

src/intel/common/intel_disasm.c

src/intel/common/intel_disasm.h

src/intel/common/intel_gem.c

src/intel/common/intel_gem.h

src/intel/common/intel_guardband.h

src/intel/common/intel_l3_config.c

src/intel/common/intel_l3_config.h

src/intel/common/intel_measure.c

src/intel/common/intel_measure.h

src/intel/common/intel_pixel_hash.h

src/intel/common/intel_sample_positions.c

src/intel/common/intel_sample_positions.h

src/intel/common/intel_urb_config.c

src/intel/common/intel_uuid.c

src/intel/common/intel_uuid.h

src/intel/common/meson.build

src/intel/common/mi_builder.h

src/intel/common/tests

src/intel/common/tests/gentest.xml

src/intel/common/tests/genxml_test.c

src/intel/common/tests/mi_builder_test.cpp

src/intel/compiler

src/intel/compiler/brw_cfg.cpp

src/intel/compiler/brw_cfg.h

src/intel/compiler/brw_clip.h

src/intel/compiler/brw_clip_line.c

src/intel/compiler/brw_clip_point.c

src/intel/compiler/brw_clip_tri.c

src/intel/compiler/brw_clip_unfilled.c

src/intel/compiler/brw_clip_util.c

src/intel/compiler/brw_compile_clip.c

src/intel/compiler/brw_compile_ff_gs.c

src/intel/compiler/brw_compile_sf.c

src/intel/compiler/brw_compiler.c

src/intel/compiler/brw_compiler.h

src/intel/compiler/brw_dead_control_flow.cpp

src/intel/compiler/brw_dead_control_flow.h

src/intel/compiler/brw_debug_recompile.c

src/intel/compiler/brw_disasm.c

src/intel/compiler/brw_disasm_info.c

src/intel/compiler/brw_disasm_info.h

src/intel/compiler/brw_eu.cpp

src/intel/compiler/brw_eu.h

src/intel/compiler/brw_eu_compact.c

src/intel/compiler/brw_eu_defines.h

src/intel/compiler/brw_eu_emit.c

src/intel/compiler/brw_eu_util.c

src/intel/compiler/brw_eu_validate.c

src/intel/compiler/brw_fs.cpp

src/intel/compiler/brw_fs.h

src/intel/compiler/brw_fs_bank_conflicts.cpp

src/intel/compiler/brw_fs_builder.h

src/intel/compiler/brw_fs_cmod_propagation.cpp

src/intel/compiler/brw_fs_combine_constants.cpp

src/intel/compiler/brw_fs_copy_propagation.cpp

src/intel/compiler/brw_fs_cse.cpp

src/intel/compiler/brw_fs_dead_code_eliminate.cpp

src/intel/compiler/brw_fs_generator.cpp

src/intel/compiler/brw_fs_live_variables.cpp

src/intel/compiler/brw_fs_live_variables.h

src/intel/compiler/brw_fs_lower_pack.cpp

src/intel/compiler/brw_fs_lower_regioning.cpp

src/intel/compiler/brw_fs_nir.cpp

src/intel/compiler/brw_fs_reg_allocate.cpp

src/intel/compiler/brw_fs_register_coalesce.cpp

src/intel/compiler/brw_fs_saturate_propagation.cpp

src/intel/compiler/brw_fs_scoreboard.cpp

src/intel/compiler/brw_fs_sel_peephole.cpp

src/intel/compiler/brw_fs_validate.cpp

src/intel/compiler/brw_fs_visitor.cpp

src/intel/compiler/brw_gfx_ver_enum.h

src/intel/compiler/brw_inst.h

src/intel/compiler/brw_interpolation_map.c

src/intel/compiler/brw_ir.h

src/intel/compiler/brw_ir_allocator.h

src/intel/compiler/brw_ir_analysis.h

src/intel/compiler/brw_ir_fs.h

src/intel/compiler/brw_ir_performance.cpp

src/intel/compiler/brw_ir_performance.h

src/intel/compiler/brw_ir_vec4.h

src/intel/compiler/brw_kernel.c

src/intel/compiler/brw_kernel.h

src/intel/compiler/brw_mesh.cpp

src/intel/compiler/brw_nir.c

src/intel/compiler/brw_nir.h

src/intel/compiler/brw_nir_analyze_boolean_resolves.c

src/intel/compiler/brw_nir_analyze_ubo_ranges.c

src/intel/compiler/brw_nir_attribute_workarounds.c

src/intel/compiler/brw_nir_clamp_image_1d_2d_array_sizes.c

src/intel/compiler/brw_nir_lower_alpha_to_coverage.c

src/intel/compiler/brw_nir_lower_conversions.c

src/intel/compiler/brw_nir_lower_cs_intrinsics.c

src/intel/compiler/brw_nir_lower_intersection_shader.c

src/intel/compiler/brw_nir_lower_mem_access_bit_sizes.c

src/intel/compiler/brw_nir_lower_ray_queries.c

src/intel/compiler/brw_nir_lower_rt_intrinsics.c

src/intel/compiler/brw_nir_lower_scoped_barriers.c

src/intel/compiler/brw_nir_lower_shader_calls.c

src/intel/compiler/brw_nir_lower_shading_rate_output.c

src/intel/compiler/brw_nir_lower_storage_image.c

src/intel/compiler/brw_nir_opt_peephole_ffma.c

src/intel/compiler/brw_nir_rt.c

src/intel/compiler/brw_nir_rt.h

src/intel/compiler/brw_nir_rt_builder.h

src/intel/compiler/brw_nir_tcs_workarounds.c

src/intel/compiler/brw_nir_trig_workarounds.py

src/intel/compiler/brw_packed_float.c

src/intel/compiler/brw_predicated_break.cpp

src/intel/compiler/brw_private.h

src/intel/compiler/brw_reg.h

src/intel/compiler/brw_reg_type.c

src/intel/compiler/brw_reg_type.h

src/intel/compiler/brw_rt.h

src/intel/compiler/brw_schedule_instructions.cpp

src/intel/compiler/brw_shader.cpp

src/intel/compiler/brw_shader.h

src/intel/compiler/brw_simd_selection.c

src/intel/compiler/brw_vec4.cpp

src/intel/compiler/brw_vec4.h

src/intel/compiler/brw_vec4_builder.h

src/intel/compiler/brw_vec4_cmod_propagation.cpp

src/intel/compiler/brw_vec4_copy_propagation.cpp

src/intel/compiler/brw_vec4_cse.cpp

src/intel/compiler/brw_vec4_dead_code_eliminate.cpp

src/intel/compiler/brw_vec4_generator.cpp

src/intel/compiler/brw_vec4_gs_nir.cpp

src/intel/compiler/brw_vec4_gs_visitor.cpp

src/intel/compiler/brw_vec4_gs_visitor.h

src/intel/compiler/brw_vec4_live_variables.cpp

src/intel/compiler/brw_vec4_live_variables.h

src/intel/compiler/brw_vec4_nir.cpp

src/intel/compiler/brw_vec4_reg_allocate.cpp

src/intel/compiler/brw_vec4_surface_builder.cpp

src/intel/compiler/brw_vec4_surface_builder.h

src/intel/compiler/brw_vec4_tcs.cpp

src/intel/compiler/brw_vec4_tcs.h

src/intel/compiler/brw_vec4_tes.cpp

src/intel/compiler/brw_vec4_tes.h

src/intel/compiler/brw_vec4_visitor.cpp

src/intel/compiler/brw_vec4_vs.h

src/intel/compiler/brw_vec4_vs_visitor.cpp

src/intel/compiler/brw_vue_map.c

src/intel/compiler/brw_wm_iz.cpp

src/intel/compiler/gfx6_gs_visitor.cpp

src/intel/compiler/gfx6_gs_visitor.h

src/intel/compiler/intel_clc.c

src/intel/compiler/meson.build

src/intel/compiler/test_eu_compact.cpp

src/intel/compiler/test_eu_validate.cpp

src/intel/compiler/test_fs_cmod_propagation.cpp

src/intel/compiler/test_fs_copy_propagation.cpp

src/intel/compiler/test_fs_saturate_propagation.cpp

src/intel/compiler/test_fs_scoreboard.cpp

src/intel/compiler/test_simd_selection.cpp

src/intel/compiler/test_vec4_cmod_propagation.cpp

src/intel/compiler/test_vec4_copy_propagation.cpp

src/intel/compiler/test_vec4_dead_code_eliminate.cpp

src/intel/compiler/test_vec4_register_coalesce.cpp

src/intel/compiler/test_vf_float_conversions.cpp

src/intel/dev

src/intel/dev/intel_debug.c

src/intel/dev/intel_debug.h

src/intel/dev/intel_dev_info.c

src/intel/dev/intel_device_info.c

src/intel/dev/intel_device_info.h

src/intel/dev/intel_device_info_test.c

src/intel/dev/intel_hwconfig.c

src/intel/dev/intel_hwconfig.h

src/intel/dev/intel_hwconfig_types.h

src/intel/dev/meson.build

src/intel/ds

src/intel/ds/.clang-format

src/intel/ds/intel_driver_ds.cc

src/intel/ds/intel_driver_ds.h

src/intel/ds/intel_pps_driver.cc

src/intel/ds/intel_pps_driver.h

src/intel/ds/intel_pps_perf.cc

src/intel/ds/intel_pps_perf.h

src/intel/ds/intel_pps_priv.h

src/intel/ds/intel_tracepoints.py

src/intel/ds/meson.build

src/intel/genxml

src/intel/genxml/README

src/intel/genxml/gen11.xml

src/intel/genxml/gen12.xml

src/intel/genxml/gen125.xml

src/intel/genxml/gen4.xml

src/intel/genxml/gen45.xml

src/intel/genxml/gen5.xml

src/intel/genxml/gen6.xml

src/intel/genxml/gen7.xml

src/intel/genxml/gen75.xml

src/intel/genxml/gen8.xml

src/intel/genxml/gen9.xml

src/intel/genxml/genX_pack.h

src/intel/genxml/gen_bits_header.py

src/intel/genxml/gen_macros.h

src/intel/genxml/gen_pack_header.py

src/intel/genxml/gen_rt.xml

src/intel/genxml/gen_sort_tags.py

src/intel/genxml/gen_zipped_file.py

src/intel/genxml/meson.build

src/intel/genxml/util.py

src/intel/isl

src/intel/isl/README

src/intel/isl/gen_format_layout.py

src/intel/isl/isl.c

src/intel/isl/isl.h

src/intel/isl/isl_aux_info.c

src/intel/isl/isl_drm.c

src/intel/isl/isl_emit_cpb.c

src/intel/isl/isl_emit_depth_stencil.c

src/intel/isl/isl_format.c

src/intel/isl/isl_format_layout.csv

src/intel/isl/isl_genX_helpers.h

src/intel/isl/isl_genX_priv.h

src/intel/isl/isl_gfx12.c

src/intel/isl/isl_gfx12.h

src/intel/isl/isl_gfx4.c

src/intel/isl/isl_gfx4.h

src/intel/isl/isl_gfx6.c

src/intel/isl/isl_gfx6.h

src/intel/isl/isl_gfx7.c

src/intel/isl/isl_gfx7.h

src/intel/isl/isl_gfx8.c

src/intel/isl/isl_gfx8.h

src/intel/isl/isl_gfx9.c

src/intel/isl/isl_gfx9.h

src/intel/isl/isl_priv.h

src/intel/isl/isl_storage_image.c

src/intel/isl/isl_surface_state.c

src/intel/isl/isl_tiled_memcpy.c

src/intel/isl/isl_tiled_memcpy_normal.c

src/intel/isl/isl_tiled_memcpy_sse41.c

src/intel/isl/meson.build

src/intel/isl/tests

src/intel/isl/tests/isl_aux_info_test.cpp

src/intel/isl/tests/isl_surf_get_image_offset_test.c

src/intel/meson.build

src/intel/nullhw-layer

src/intel/nullhw-layer/README

src/intel/nullhw-layer/VkLayer_INTEL_nullhw.json

src/intel/nullhw-layer/intel_nullhw.c

src/intel/nullhw-layer/meson.build

src/intel/perf

src/intel/perf/gen_perf.py

src/intel/perf/intel_perf.c

src/intel/perf/intel_perf.h

src/intel/perf/intel_perf_mdapi.c

src/intel/perf/intel_perf_mdapi.h

src/intel/perf/intel_perf_private.h

src/intel/perf/intel_perf_query.c

src/intel/perf/intel_perf_query.h

src/intel/perf/intel_perf_regs.h

src/intel/perf/intel_perf_setup.h

src/intel/perf/meson.build

src/intel/perf/oa-adl.xml

src/intel/perf/oa-bdw.xml

src/intel/perf/oa-bxt.xml

src/intel/perf/oa-cflgt2.xml

src/intel/perf/oa-cflgt3.xml

src/intel/perf/oa-chv.xml

src/intel/perf/oa-dg1.xml

src/intel/perf/oa-ehl.xml

src/intel/perf/oa-glk.xml

src/intel/perf/oa-hsw.xml

src/intel/perf/oa-icl.xml

src/intel/perf/oa-kblgt2.xml

src/intel/perf/oa-kblgt3.xml

src/intel/perf/oa-rkl.xml

src/intel/perf/oa-sklgt2.xml

src/intel/perf/oa-sklgt3.xml

src/intel/perf/oa-sklgt4.xml

src/intel/perf/oa-tglgt1.xml

src/intel/perf/oa-tglgt2.xml

src/intel/tools

src/intel/tools/aub_mem.c

src/intel/tools/aub_mem.h

src/intel/tools/aub_read.c

src/intel/tools/aub_read.h

src/intel/tools/aub_write.c

src/intel/tools/aub_write.h

src/intel/tools/aubinator.c

src/intel/tools/aubinator_error_decode.c

src/intel/tools/aubinator_viewer.cpp

src/intel/tools/aubinator_viewer.h

src/intel/tools/aubinator_viewer_decoder.cpp

src/intel/tools/aubinator_viewer_urb.h

src/intel/tools/error2aub.c

src/intel/tools/gfx10_context.h

src/intel/tools/gfx8_context.h

src/intel/tools/i965_asm.c

src/intel/tools/i965_asm.h

src/intel/tools/i965_disasm.c

src/intel/tools/i965_gram.y

src/intel/tools/i965_lex.l

src/intel/tools/imgui

src/intel/tools/imgui/imgui_impl_gtk3.cpp

src/intel/tools/imgui/imgui_impl_gtk3.h

src/intel/tools/imgui/imgui_impl_opengl3.cpp

src/intel/tools/imgui/imgui_impl_opengl3.h

src/intel/tools/imgui/meson.build

src/intel/tools/intel_aub.h

src/intel/tools/intel_context.h

src/intel/tools/intel_dump_gpu.c

src/intel/tools/intel_dump_gpu.in

src/intel/tools/intel_noop_drm_shim.c

src/intel/tools/intel_sanitize_gpu.c

src/intel/tools/intel_sanitize_gpu.in

src/intel/tools/intel_stub_gpu.in

src/intel/tools/meson.build

src/intel/tools/tests

src/intel/tools/tests/gen11

src/intel/tools/tests/gen11/cr0.asm

src/intel/tools/tests/gen11/cr0.expected

src/intel/tools/tests/gen11/rol.asm

src/intel/tools/tests/gen11/rol.expected

src/intel/tools/tests/gen11/ror.asm

src/intel/tools/tests/gen11/ror.expected

src/intel/tools/tests/gen4

src/intel/tools/tests/gen4.5

src/intel/tools/tests/gen4.5/add.asm

src/intel/tools/tests/gen4.5/add.expected

src/intel/tools/tests/gen4.5/and.asm

src/intel/tools/tests/gen4.5/and.expected

src/intel/tools/tests/gen4.5/asr.asm

src/intel/tools/tests/gen4.5/asr.expected

src/intel/tools/tests/gen4.5/break.asm

src/intel/tools/tests/gen4.5/break.expected

src/intel/tools/tests/gen4.5/cmp.asm

src/intel/tools/tests/gen4.5/cmp.expected

src/intel/tools/tests/gen4.5/cont.asm

src/intel/tools/tests/gen4.5/cont.expected

src/intel/tools/tests/gen4.5/do.asm

src/intel/tools/tests/gen4.5/do.expected

src/intel/tools/tests/gen4.5/dp2.asm

src/intel/tools/tests/gen4.5/dp2.expected

src/intel/tools/tests/gen4.5/dp3.asm

src/intel/tools/tests/gen4.5/dp3.expected

src/intel/tools/tests/gen4.5/dp4.asm

src/intel/tools/tests/gen4.5/dp4.expected

src/intel/tools/tests/gen4.5/dph.asm

src/intel/tools/tests/gen4.5/dph.expected

src/intel/tools/tests/gen4.5/else.asm

src/intel/tools/tests/gen4.5/else.expected

src/intel/tools/tests/gen4.5/endif.asm

src/intel/tools/tests/gen4.5/endif.expected

src/intel/tools/tests/gen4.5/frc.asm

src/intel/tools/tests/gen4.5/frc.expected

src/intel/tools/tests/gen4.5/if.asm

src/intel/tools/tests/gen4.5/if.expected

src/intel/tools/tests/gen4.5/iff.asm

src/intel/tools/tests/gen4.5/iff.expected

src/intel/tools/tests/gen4.5/jmpi.asm

src/intel/tools/tests/gen4.5/jmpi.expected

src/intel/tools/tests/gen4.5/mach.asm

src/intel/tools/tests/gen4.5/mach.expected

src/intel/tools/tests/gen4.5/mov.asm

src/intel/tools/tests/gen4.5/mov.expected

src/intel/tools/tests/gen4.5/mul.asm

src/intel/tools/tests/gen4.5/mul.expected

src/intel/tools/tests/gen4.5/not.asm

src/intel/tools/tests/gen4.5/not.expected

src/intel/tools/tests/gen4.5/or.asm

src/intel/tools/tests/gen4.5/or.expected

src/intel/tools/tests/gen4.5/pln.asm

src/intel/tools/tests/gen4.5/pln.expected

src/intel/tools/tests/gen4.5/rndd.asm

src/intel/tools/tests/gen4.5/rndd.expected

src/intel/tools/tests/gen4.5/sel.asm

src/intel/tools/tests/gen4.5/sel.expected

src/intel/tools/tests/gen4.5/send.asm

src/intel/tools/tests/gen4.5/send.expected

src/intel/tools/tests/gen4.5/shl.asm

src/intel/tools/tests/gen4.5/shl.expected

src/intel/tools/tests/gen4.5/shr.asm

src/intel/tools/tests/gen4.5/shr.expected

src/intel/tools/tests/gen4.5/while.asm

src/intel/tools/tests/gen4.5/while.expected

src/intel/tools/tests/gen4.5/xor.asm

src/intel/tools/tests/gen4.5/xor.expected

src/intel/tools/tests/gen4/add.asm

src/intel/tools/tests/gen4/add.expected

src/intel/tools/tests/gen4/and.asm

src/intel/tools/tests/gen4/and.expected

src/intel/tools/tests/gen4/asr.asm

src/intel/tools/tests/gen4/asr.expected

src/intel/tools/tests/gen4/break.asm

src/intel/tools/tests/gen4/break.expected

src/intel/tools/tests/gen4/cmp.asm

src/intel/tools/tests/gen4/cmp.expected

src/intel/tools/tests/gen4/cont.asm

src/intel/tools/tests/gen4/cont.expected

src/intel/tools/tests/gen4/do.asm

src/intel/tools/tests/gen4/do.expected

src/intel/tools/tests/gen4/dp2.asm

src/intel/tools/tests/gen4/dp2.expected

src/intel/tools/tests/gen4/dp3.asm

src/intel/tools/tests/gen4/dp3.expected

src/intel/tools/tests/gen4/dp4.asm

src/intel/tools/tests/gen4/dp4.expected

src/intel/tools/tests/gen4/dph.asm

src/intel/tools/tests/gen4/dph.expected

src/intel/tools/tests/gen4/else.asm

src/intel/tools/tests/gen4/else.expected

src/intel/tools/tests/gen4/endif.asm

src/intel/tools/tests/gen4/endif.expected

src/intel/tools/tests/gen4/frc.asm

src/intel/tools/tests/gen4/frc.expected

src/intel/tools/tests/gen4/if.asm

src/intel/tools/tests/gen4/if.expected

src/intel/tools/tests/gen4/iff.asm

src/intel/tools/tests/gen4/iff.expected

src/intel/tools/tests/gen4/jmpi.asm

src/intel/tools/tests/gen4/jmpi.expected

src/intel/tools/tests/gen4/line.asm

src/intel/tools/tests/gen4/line.expected

src/intel/tools/tests/gen4/mac.asm

src/intel/tools/tests/gen4/mac.expected

src/intel/tools/tests/gen4/mach.asm

src/intel/tools/tests/gen4/mach.expected

src/intel/tools/tests/gen4/mov.asm

src/intel/tools/tests/gen4/mov.expected

src/intel/tools/tests/gen4/mul.asm

src/intel/tools/tests/gen4/mul.expected

src/intel/tools/tests/gen4/not.asm

src/intel/tools/tests/gen4/not.expected

src/intel/tools/tests/gen4/or.asm

src/intel/tools/tests/gen4/or.expected

src/intel/tools/tests/gen4/rndd.asm

src/intel/tools/tests/gen4/rndd.expected

src/intel/tools/tests/gen4/sel.asm

src/intel/tools/tests/gen4/sel.expected

src/intel/tools/tests/gen4/send.asm

src/intel/tools/tests/gen4/send.expected

src/intel/tools/tests/gen4/shl.asm

src/intel/tools/tests/gen4/shl.expected

src/intel/tools/tests/gen4/shr.asm

src/intel/tools/tests/gen4/shr.expected

src/intel/tools/tests/gen4/while.asm

src/intel/tools/tests/gen4/while.expected

src/intel/tools/tests/gen4/xor.asm

src/intel/tools/tests/gen4/xor.expected

src/intel/tools/tests/gen5

src/intel/tools/tests/gen5/add.asm

src/intel/tools/tests/gen5/add.expected

src/intel/tools/tests/gen5/and.asm

src/intel/tools/tests/gen5/and.expected

src/intel/tools/tests/gen5/asr.asm

src/intel/tools/tests/gen5/asr.expected

src/intel/tools/tests/gen5/break.asm

src/intel/tools/tests/gen5/break.expected

src/intel/tools/tests/gen5/cmp.asm

src/intel/tools/tests/gen5/cmp.expected

src/intel/tools/tests/gen5/do.asm

src/intel/tools/tests/gen5/do.expected

src/intel/tools/tests/gen5/dp3.asm

src/intel/tools/tests/gen5/dp3.expected

src/intel/tools/tests/gen5/dp4.asm

src/intel/tools/tests/gen5/dp4.expected

src/intel/tools/tests/gen5/dph.asm

src/intel/tools/tests/gen5/dph.expected

src/intel/tools/tests/gen5/else.asm

src/intel/tools/tests/gen5/else.expected

src/intel/tools/tests/gen5/endif.asm

src/intel/tools/tests/gen5/endif.expected

src/intel/tools/tests/gen5/frc.asm

src/intel/tools/tests/gen5/frc.expected

src/intel/tools/tests/gen5/if.asm

src/intel/tools/tests/gen5/if.expected

src/intel/tools/tests/gen5/iff.asm

src/intel/tools/tests/gen5/iff.expected

src/intel/tools/tests/gen5/jmpi.asm

src/intel/tools/tests/gen5/jmpi.expected

src/intel/tools/tests/gen5/mach.asm

src/intel/tools/tests/gen5/mach.expected

src/intel/tools/tests/gen5/mov.asm

src/intel/tools/tests/gen5/mov.expected

src/intel/tools/tests/gen5/mul.asm

src/intel/tools/tests/gen5/mul.expected

src/intel/tools/tests/gen5/not.asm

src/intel/tools/tests/gen5/not.expected

src/intel/tools/tests/gen5/or.asm

src/intel/tools/tests/gen5/or.expected

src/intel/tools/tests/gen5/pln.asm

src/intel/tools/tests/gen5/pln.expected

src/intel/tools/tests/gen5/rndd.asm

src/intel/tools/tests/gen5/rndd.expected

src/intel/tools/tests/gen5/sel.asm

src/intel/tools/tests/gen5/sel.expected

src/intel/tools/tests/gen5/send.asm

src/intel/tools/tests/gen5/send.expected

src/intel/tools/tests/gen5/shl.asm

src/intel/tools/tests/gen5/shl.expected

src/intel/tools/tests/gen5/shr.asm

src/intel/tools/tests/gen5/shr.expected

src/intel/tools/tests/gen5/while.asm

src/intel/tools/tests/gen5/while.expected

src/intel/tools/tests/gen6

src/intel/tools/tests/gen6/add.asm

src/intel/tools/tests/gen6/add.expected

src/intel/tools/tests/gen6/and.asm

src/intel/tools/tests/gen6/and.expected

src/intel/tools/tests/gen6/asr.asm

src/intel/tools/tests/gen6/asr.expected

src/intel/tools/tests/gen6/break.asm

src/intel/tools/tests/gen6/break.expected

src/intel/tools/tests/gen6/cmp.asm

src/intel/tools/tests/gen6/cmp.expected

src/intel/tools/tests/gen6/cont.asm

src/intel/tools/tests/gen6/cont.expected

src/intel/tools/tests/gen6/dp2.asm

src/intel/tools/tests/gen6/dp2.expected

src/intel/tools/tests/gen6/dp3.asm

src/intel/tools/tests/gen6/dp3.expected

src/intel/tools/tests/gen6/dp4.asm

src/intel/tools/tests/gen6/dp4.expected

src/intel/tools/tests/gen6/dph.asm

src/intel/tools/tests/gen6/dph.expected

src/intel/tools/tests/gen6/else.asm

src/intel/tools/tests/gen6/else.expected

src/intel/tools/tests/gen6/endif.asm

src/intel/tools/tests/gen6/endif.expected

src/intel/tools/tests/gen6/frc.asm

src/intel/tools/tests/gen6/frc.expected

src/intel/tools/tests/gen6/halt.asm

src/intel/tools/tests/gen6/halt.expected

src/intel/tools/tests/gen6/if.asm

src/intel/tools/tests/gen6/if.expected

src/intel/tools/tests/gen6/lrp.asm

src/intel/tools/tests/gen6/lrp.expected

src/intel/tools/tests/gen6/lzd.asm

src/intel/tools/tests/gen6/lzd.expected

src/intel/tools/tests/gen6/mach.asm

src/intel/tools/tests/gen6/mach.expected

src/intel/tools/tests/gen6/mad.asm

src/intel/tools/tests/gen6/mad.expected

src/intel/tools/tests/gen6/math.asm

src/intel/tools/tests/gen6/math.expected

src/intel/tools/tests/gen6/mov.asm

src/intel/tools/tests/gen6/mov.expected

src/intel/tools/tests/gen6/mul.asm

src/intel/tools/tests/gen6/mul.expected

src/intel/tools/tests/gen6/not.asm

src/intel/tools/tests/gen6/not.expected

src/intel/tools/tests/gen6/or.asm

src/intel/tools/tests/gen6/or.expected

src/intel/tools/tests/gen6/pln.asm

src/intel/tools/tests/gen6/pln.expected

src/intel/tools/tests/gen6/rndd.asm

src/intel/tools/tests/gen6/rndd.expected

src/intel/tools/tests/gen6/rnde.asm

src/intel/tools/tests/gen6/rnde.expected

src/intel/tools/tests/gen6/rndz.asm

src/intel/tools/tests/gen6/rndz.expected

src/intel/tools/tests/gen6/sel.asm

src/intel/tools/tests/gen6/sel.expected

src/intel/tools/tests/gen6/send.asm

src/intel/tools/tests/gen6/send.expected

src/intel/tools/tests/gen6/sendc.asm

src/intel/tools/tests/gen6/sendc.expected

src/intel/tools/tests/gen6/shl.asm

src/intel/tools/tests/gen6/shl.expected

src/intel/tools/tests/gen6/shr.asm

src/intel/tools/tests/gen6/shr.expected

src/intel/tools/tests/gen6/while.asm

src/intel/tools/tests/gen6/while.expected

src/intel/tools/tests/gen6/xor.asm

src/intel/tools/tests/gen6/xor.expected

src/intel/tools/tests/gen7

src/intel/tools/tests/gen7.5

src/intel/tools/tests/gen7.5/add.asm

src/intel/tools/tests/gen7.5/add.expected

src/intel/tools/tests/gen7.5/and.asm

src/intel/tools/tests/gen7.5/and.expected

src/intel/tools/tests/gen7.5/asr.asm

src/intel/tools/tests/gen7.5/asr.expected

src/intel/tools/tests/gen7.5/bfe.asm

src/intel/tools/tests/gen7.5/bfe.expected

src/intel/tools/tests/gen7.5/bfi1.asm

src/intel/tools/tests/gen7.5/bfi1.expected

src/intel/tools/tests/gen7.5/bfi2.asm

src/intel/tools/tests/gen7.5/bfi2.expected

src/intel/tools/tests/gen7.5/bfrev.asm

src/intel/tools/tests/gen7.5/bfrev.expected

src/intel/tools/tests/gen7.5/break.asm

src/intel/tools/tests/gen7.5/break.expected

src/intel/tools/tests/gen7.5/cbit.asm

src/intel/tools/tests/gen7.5/cbit.expected

src/intel/tools/tests/gen7.5/cmp.asm

src/intel/tools/tests/gen7.5/cmp.expected

src/intel/tools/tests/gen7.5/cont.asm

src/intel/tools/tests/gen7.5/cont.expected

src/intel/tools/tests/gen7.5/dim.asm

src/intel/tools/tests/gen7.5/dim.expected

src/intel/tools/tests/gen7.5/dp2.asm

src/intel/tools/tests/gen7.5/dp2.expected

src/intel/tools/tests/gen7.5/dp3.asm

src/intel/tools/tests/gen7.5/dp3.expected

src/intel/tools/tests/gen7.5/dp4.asm

src/intel/tools/tests/gen7.5/dp4.expected

src/intel/tools/tests/gen7.5/dph.asm

src/intel/tools/tests/gen7.5/dph.expected

src/intel/tools/tests/gen7.5/else.asm

src/intel/tools/tests/gen7.5/else.expected

src/intel/tools/tests/gen7.5/endif.asm

src/intel/tools/tests/gen7.5/endif.expected

src/intel/tools/tests/gen7.5/f16to32.asm

src/intel/tools/tests/gen7.5/f16to32.expected

src/intel/tools/tests/gen7.5/f32to16.asm

src/intel/tools/tests/gen7.5/f32to16.expected

src/intel/tools/tests/gen7.5/fbh.asm

src/intel/tools/tests/gen7.5/fbh.expected

src/intel/tools/tests/gen7.5/fbl.asm

src/intel/tools/tests/gen7.5/fbl.expected

src/intel/tools/tests/gen7.5/frc.asm

src/intel/tools/tests/gen7.5/frc.expected

src/intel/tools/tests/gen7.5/halt.asm

src/intel/tools/tests/gen7.5/halt.expected

src/intel/tools/tests/gen7.5/if.asm

src/intel/tools/tests/gen7.5/if.expected

src/intel/tools/tests/gen7.5/lrp.asm

src/intel/tools/tests/gen7.5/lrp.expected

src/intel/tools/tests/gen7.5/lzd.asm

src/intel/tools/tests/gen7.5/lzd.expected

src/intel/tools/tests/gen7.5/mach.asm

src/intel/tools/tests/gen7.5/mach.expected

src/intel/tools/tests/gen7.5/mad.asm

src/intel/tools/tests/gen7.5/mad.expected

src/intel/tools/tests/gen7.5/math.asm

src/intel/tools/tests/gen7.5/math.expected

src/intel/tools/tests/gen7.5/mov.asm

src/intel/tools/tests/gen7.5/mov.expected

src/intel/tools/tests/gen7.5/mul.asm

src/intel/tools/tests/gen7.5/mul.expected

src/intel/tools/tests/gen7.5/not.asm

src/intel/tools/tests/gen7.5/not.expected

src/intel/tools/tests/gen7.5/or.asm

src/intel/tools/tests/gen7.5/or.expected

src/intel/tools/tests/gen7.5/pln.asm

src/intel/tools/tests/gen7.5/pln.expected

src/intel/tools/tests/gen7.5/rndd.asm

src/intel/tools/tests/gen7.5/rndd.expected

src/intel/tools/tests/gen7.5/rnde.asm

src/intel/tools/tests/gen7.5/rnde.expected

src/intel/tools/tests/gen7.5/rndz.asm

src/intel/tools/tests/gen7.5/rndz.expected

src/intel/tools/tests/gen7.5/sel.asm

src/intel/tools/tests/gen7.5/sel.expected

src/intel/tools/tests/gen7.5/send.asm

src/intel/tools/tests/gen7.5/send.expected

src/intel/tools/tests/gen7.5/sendc.asm

src/intel/tools/tests/gen7.5/sendc.expected

src/intel/tools/tests/gen7.5/shl.asm

src/intel/tools/tests/gen7.5/shl.expected

src/intel/tools/tests/gen7.5/shr.asm

src/intel/tools/tests/gen7.5/shr.expected

src/intel/tools/tests/gen7.5/wait.asm

src/intel/tools/tests/gen7.5/wait.expected

src/intel/tools/tests/gen7.5/while.asm

src/intel/tools/tests/gen7.5/while.expected

src/intel/tools/tests/gen7.5/xor.asm

src/intel/tools/tests/gen7.5/xor.expected

src/intel/tools/tests/gen7/add.asm

src/intel/tools/tests/gen7/add.expected

src/intel/tools/tests/gen7/and.asm

src/intel/tools/tests/gen7/and.expected

src/intel/tools/tests/gen7/asr.asm

src/intel/tools/tests/gen7/asr.expected

src/intel/tools/tests/gen7/bfe.asm

src/intel/tools/tests/gen7/bfe.expected

src/intel/tools/tests/gen7/bfi1.asm

src/intel/tools/tests/gen7/bfi1.expected

src/intel/tools/tests/gen7/bfi2.asm

src/intel/tools/tests/gen7/bfi2.expected

src/intel/tools/tests/gen7/bfrev.asm

src/intel/tools/tests/gen7/bfrev.expected

src/intel/tools/tests/gen7/break.asm

src/intel/tools/tests/gen7/break.expected

src/intel/tools/tests/gen7/cbit.asm

src/intel/tools/tests/gen7/cbit.expected

src/intel/tools/tests/gen7/cmp.asm

src/intel/tools/tests/gen7/cmp.expected

src/intel/tools/tests/gen7/dp2.asm

src/intel/tools/tests/gen7/dp2.expected

src/intel/tools/tests/gen7/dp3.asm

src/intel/tools/tests/gen7/dp3.expected

src/intel/tools/tests/gen7/dp4.asm

src/intel/tools/tests/gen7/dp4.expected

src/intel/tools/tests/gen7/dph.asm

src/intel/tools/tests/gen7/dph.expected

src/intel/tools/tests/gen7/else.asm

src/intel/tools/tests/gen7/else.expected

src/intel/tools/tests/gen7/endif.asm

src/intel/tools/tests/gen7/endif.expected

src/intel/tools/tests/gen7/f16to32.asm

src/intel/tools/tests/gen7/f16to32.expected

src/intel/tools/tests/gen7/f32to16.asm

src/intel/tools/tests/gen7/f32to16.expected

src/intel/tools/tests/gen7/fbh.asm

src/intel/tools/tests/gen7/fbh.expected

src/intel/tools/tests/gen7/fbl.asm

src/intel/tools/tests/gen7/fbl.expected

src/intel/tools/tests/gen7/frc.asm

src/intel/tools/tests/gen7/frc.expected

src/intel/tools/tests/gen7/halt.asm

src/intel/tools/tests/gen7/halt.expected

src/intel/tools/tests/gen7/if.asm

src/intel/tools/tests/gen7/if.expected

src/intel/tools/tests/gen7/lrp.asm

src/intel/tools/tests/gen7/lrp.expected

src/intel/tools/tests/gen7/lzd.asm

src/intel/tools/tests/gen7/lzd.expected

src/intel/tools/tests/gen7/mach.asm

src/intel/tools/tests/gen7/mach.expected

src/intel/tools/tests/gen7/mad.asm

src/intel/tools/tests/gen7/mad.expected

src/intel/tools/tests/gen7/math.asm

src/intel/tools/tests/gen7/math.expected

src/intel/tools/tests/gen7/mov.asm

src/intel/tools/tests/gen7/mov.expected

src/intel/tools/tests/gen7/mul.asm

src/intel/tools/tests/gen7/mul.expected

src/intel/tools/tests/gen7/not.asm

src/intel/tools/tests/gen7/not.expected

src/intel/tools/tests/gen7/or.asm

src/intel/tools/tests/gen7/or.expected

src/intel/tools/tests/gen7/pln.asm

src/intel/tools/tests/gen7/pln.expected

src/intel/tools/tests/gen7/rndd.asm

src/intel/tools/tests/gen7/rndd.expected

src/intel/tools/tests/gen7/rnde.asm

src/intel/tools/tests/gen7/rnde.expected

src/intel/tools/tests/gen7/rndz.asm

src/intel/tools/tests/gen7/rndz.expected

src/intel/tools/tests/gen7/sel.asm

src/intel/tools/tests/gen7/sel.expected

src/intel/tools/tests/gen7/send.asm

src/intel/tools/tests/gen7/send.expected

src/intel/tools/tests/gen7/sendc.asm

src/intel/tools/tests/gen7/sendc.expected

src/intel/tools/tests/gen7/shl.asm

src/intel/tools/tests/gen7/shl.expected

src/intel/tools/tests/gen7/shr.asm

src/intel/tools/tests/gen7/shr.expected

src/intel/tools/tests/gen7/wait.asm

src/intel/tools/tests/gen7/wait.expected

src/intel/tools/tests/gen7/while.asm

src/intel/tools/tests/gen7/while.expected

src/intel/tools/tests/gen7/xor.asm

src/intel/tools/tests/gen7/xor.expected

src/intel/tools/tests/gen8

src/intel/tools/tests/gen8/add.asm

src/intel/tools/tests/gen8/add.expected

src/intel/tools/tests/gen8/and.asm

src/intel/tools/tests/gen8/and.expected

src/intel/tools/tests/gen8/asr.asm

src/intel/tools/tests/gen8/asr.expected

src/intel/tools/tests/gen8/bfe.asm

src/intel/tools/tests/gen8/bfe.expected

src/intel/tools/tests/gen8/bfi1.asm

src/intel/tools/tests/gen8/bfi1.expected

src/intel/tools/tests/gen8/bfi2.asm

src/intel/tools/tests/gen8/bfi2.expected

src/intel/tools/tests/gen8/bfrev.asm

src/intel/tools/tests/gen8/bfrev.expected

src/intel/tools/tests/gen8/break.asm

src/intel/tools/tests/gen8/break.expected

src/intel/tools/tests/gen8/cbit.asm

src/intel/tools/tests/gen8/cbit.expected

src/intel/tools/tests/gen8/cmp.asm

src/intel/tools/tests/gen8/cmp.expected

src/intel/tools/tests/gen8/cont.asm

src/intel/tools/tests/gen8/cont.expected

src/intel/tools/tests/gen8/cr0.asm

src/intel/tools/tests/gen8/cr0.expected

src/intel/tools/tests/gen8/csel.asm

src/intel/tools/tests/gen8/csel.expected

src/intel/tools/tests/gen8/else.asm

src/intel/tools/tests/gen8/else.expected

src/intel/tools/tests/gen8/endif.asm

src/intel/tools/tests/gen8/endif.expected

src/intel/tools/tests/gen8/fbh.asm

src/intel/tools/tests/gen8/fbh.expected

src/intel/tools/tests/gen8/fbl.asm

src/intel/tools/tests/gen8/fbl.expected

src/intel/tools/tests/gen8/frc.asm

src/intel/tools/tests/gen8/frc.expected

src/intel/tools/tests/gen8/halt.asm

src/intel/tools/tests/gen8/halt.expected

src/intel/tools/tests/gen8/if.asm

src/intel/tools/tests/gen8/if.expected

src/intel/tools/tests/gen8/lrp.asm

src/intel/tools/tests/gen8/lrp.expected

src/intel/tools/tests/gen8/lzd.asm

src/intel/tools/tests/gen8/lzd.expected

src/intel/tools/tests/gen8/mach.asm

src/intel/tools/tests/gen8/mach.expected

src/intel/tools/tests/gen8/mad.asm

src/intel/tools/tests/gen8/mad.expected

src/intel/tools/tests/gen8/math.asm

src/intel/tools/tests/gen8/math.expected

src/intel/tools/tests/gen8/mov.asm

src/intel/tools/tests/gen8/mov.expected

src/intel/tools/tests/gen8/mul.asm

src/intel/tools/tests/gen8/mul.expected

src/intel/tools/tests/gen8/nop.asm

src/intel/tools/tests/gen8/nop.expected

src/intel/tools/tests/gen8/not.asm

src/intel/tools/tests/gen8/not.expected

src/intel/tools/tests/gen8/or.asm

src/intel/tools/tests/gen8/or.expected

src/intel/tools/tests/gen8/pln.asm

src/intel/tools/tests/gen8/pln.expected

src/intel/tools/tests/gen8/rndd.asm

src/intel/tools/tests/gen8/rndd.expected

src/intel/tools/tests/gen8/rnde.asm

src/intel/tools/tests/gen8/rnde.expected

src/intel/tools/tests/gen8/rndz.asm

src/intel/tools/tests/gen8/rndz.expected

src/intel/tools/tests/gen8/sel.asm

src/intel/tools/tests/gen8/sel.expected

src/intel/tools/tests/gen8/send.asm

src/intel/tools/tests/gen8/send.expected

src/intel/tools/tests/gen8/sendc.asm

src/intel/tools/tests/gen8/sendc.expected

src/intel/tools/tests/gen8/shl.asm

src/intel/tools/tests/gen8/shl.expected

src/intel/tools/tests/gen8/shr.asm

src/intel/tools/tests/gen8/shr.expected

src/intel/tools/tests/gen8/wait.asm

src/intel/tools/tests/gen8/wait.expected

src/intel/tools/tests/gen8/while.asm

src/intel/tools/tests/gen8/while.expected

src/intel/tools/tests/gen8/xor.asm

src/intel/tools/tests/gen8/xor.expected

src/intel/tools/tests/gen9

src/intel/tools/tests/gen9/add.asm

src/intel/tools/tests/gen9/add.expected

src/intel/tools/tests/gen9/and.asm

src/intel/tools/tests/gen9/and.expected

src/intel/tools/tests/gen9/asr.asm

src/intel/tools/tests/gen9/asr.expected

src/intel/tools/tests/gen9/bfe.asm

src/intel/tools/tests/gen9/bfe.expected

src/intel/tools/tests/gen9/bfi1.asm

src/intel/tools/tests/gen9/bfi1.expected

src/intel/tools/tests/gen9/bfi2.asm

src/intel/tools/tests/gen9/bfi2.expected

src/intel/tools/tests/gen9/bfrev.asm

src/intel/tools/tests/gen9/bfrev.expected

src/intel/tools/tests/gen9/break.asm

src/intel/tools/tests/gen9/break.expected

src/intel/tools/tests/gen9/cbit.asm

src/intel/tools/tests/gen9/cbit.expected

src/intel/tools/tests/gen9/cmp.asm

src/intel/tools/tests/gen9/cmp.expected

src/intel/tools/tests/gen9/cont.asm

src/intel/tools/tests/gen9/cont.expected

src/intel/tools/tests/gen9/cr0.asm

src/intel/tools/tests/gen9/cr0.expected

src/intel/tools/tests/gen9/csel.asm

src/intel/tools/tests/gen9/csel.expected

src/intel/tools/tests/gen9/else.asm

src/intel/tools/tests/gen9/else.expected

src/intel/tools/tests/gen9/endif.asm

src/intel/tools/tests/gen9/endif.expected

src/intel/tools/tests/gen9/fbh.asm

src/intel/tools/tests/gen9/fbh.expected

src/intel/tools/tests/gen9/fbl.asm

src/intel/tools/tests/gen9/fbl.expected

src/intel/tools/tests/gen9/frc.asm

src/intel/tools/tests/gen9/frc.expected

src/intel/tools/tests/gen9/halt.asm

src/intel/tools/tests/gen9/halt.expected

src/intel/tools/tests/gen9/if.asm

src/intel/tools/tests/gen9/if.expected

src/intel/tools/tests/gen9/lrp.asm

src/intel/tools/tests/gen9/lrp.expected

src/intel/tools/tests/gen9/lzd.asm

src/intel/tools/tests/gen9/lzd.expected

src/intel/tools/tests/gen9/mach.asm

src/intel/tools/tests/gen9/mach.expected

src/intel/tools/tests/gen9/mad.asm

src/intel/tools/tests/gen9/mad.expected

src/intel/tools/tests/gen9/math.asm

src/intel/tools/tests/gen9/math.expected

src/intel/tools/tests/gen9/mov.asm

src/intel/tools/tests/gen9/mov.expected

src/intel/tools/tests/gen9/mul.asm

src/intel/tools/tests/gen9/mul.expected

src/intel/tools/tests/gen9/nop.asm

src/intel/tools/tests/gen9/nop.expected

src/intel/tools/tests/gen9/not.asm

src/intel/tools/tests/gen9/not.expected

src/intel/tools/tests/gen9/or.asm

src/intel/tools/tests/gen9/or.expected

src/intel/tools/tests/gen9/pln.asm

src/intel/tools/tests/gen9/pln.expected

src/intel/tools/tests/gen9/rndd.asm

src/intel/tools/tests/gen9/rndd.expected

src/intel/tools/tests/gen9/rnde.asm

src/intel/tools/tests/gen9/rnde.expected

src/intel/tools/tests/gen9/rndz.asm

src/intel/tools/tests/gen9/rndz.expected

src/intel/tools/tests/gen9/sel.asm

src/intel/tools/tests/gen9/sel.expected

src/intel/tools/tests/gen9/send.asm

src/intel/tools/tests/gen9/send.expected

src/intel/tools/tests/gen9/sendc.asm

src/intel/tools/tests/gen9/sendc.expected

src/intel/tools/tests/gen9/sends.asm

src/intel/tools/tests/gen9/sends.expected

src/intel/tools/tests/gen9/shl.asm

src/intel/tools/tests/gen9/shl.expected

src/intel/tools/tests/gen9/shr.asm

src/intel/tools/tests/gen9/shr.expected

src/intel/tools/tests/gen9/wait.asm

src/intel/tools/tests/gen9/wait.expected

src/intel/tools/tests/gen9/while.asm

src/intel/tools/tests/gen9/while.expected

src/intel/tools/tests/gen9/xor.asm

src/intel/tools/tests/gen9/xor.expected

src/intel/tools/tests/run-test.py

src/intel/vulkan

src/intel/vulkan/TODO

src/intel/vulkan/anv_acceleration_structure.c

src/intel/vulkan/anv_allocator.c

src/intel/vulkan/anv_android.c

src/intel/vulkan/anv_android.h

src/intel/vulkan/anv_android_stubs.c

src/intel/vulkan/anv_batch_chain.c

src/intel/vulkan/anv_blorp.c

src/intel/vulkan/anv_bo_sync.c

src/intel/vulkan/anv_cmd_buffer.c

src/intel/vulkan/anv_descriptor_set.c

src/intel/vulkan/anv_device.c

src/intel/vulkan/anv_formats.c

src/intel/vulkan/anv_gem.c

src/intel/vulkan/anv_gem_stubs.c

src/intel/vulkan/anv_genX.h

src/intel/vulkan/anv_image.c

src/intel/vulkan/anv_measure.c

src/intel/vulkan/anv_measure.h

src/intel/vulkan/anv_nir.h

src/intel/vulkan/anv_nir_add_base_work_group_id.c

src/intel/vulkan/anv_nir_apply_pipeline_layout.c

src/intel/vulkan/anv_nir_compute_push_layout.c

src/intel/vulkan/anv_nir_lower_multiview.c

src/intel/vulkan/anv_nir_lower_ubo_loads.c

src/intel/vulkan/anv_nir_lower_ycbcr_textures.c

src/intel/vulkan/anv_perf.c

src/intel/vulkan/anv_pipeline.c

src/intel/vulkan/anv_pipeline_cache.c

src/intel/vulkan/anv_private.h

src/intel/vulkan/anv_queue.c

src/intel/vulkan/anv_util.c

src/intel/vulkan/anv_utrace.c

src/intel/vulkan/anv_wsi.c

src/intel/vulkan/genX_blorp_exec.c

src/intel/vulkan/genX_cmd_buffer.c

src/intel/vulkan/genX_gpu_memcpy.c

src/intel/vulkan/genX_pipeline.c

src/intel/vulkan/genX_query.c

src/intel/vulkan/genX_state.c

src/intel/vulkan/gfx7_cmd_buffer.c

src/intel/vulkan/gfx8_cmd_buffer.c

src/intel/vulkan/meson.build

src/intel/vulkan/tests

src/intel/vulkan/tests/block_pool_grow_first.c

src/intel/vulkan/tests/block_pool_no_free.c

src/intel/vulkan/tests/state_pool.c

src/intel/vulkan/tests/state_pool_free_list_only.c

src/intel/vulkan/tests/state_pool_no_free.c

src/intel/vulkan/tests/state_pool_padding.c

src/intel/vulkan/tests/state_pool_test_helper.h

src/intel/vulkan/tests/test_common.h

src/loader

src/loader/loader.c

src/loader/loader.h

src/loader/loader_dri3_helper.c

src/loader/loader_dri3_helper.h

src/loader/loader_dri_helper.c

src/loader/loader_dri_helper.h

src/loader/meson.build

src/loader/pci_id_driver_map.h

src/mapi

src/mapi/entry.c

src/mapi/entry.h

src/mapi/entry_ppc64le_tls.h

src/mapi/entry_ppc64le_tsd.h

src/mapi/entry_x86-64_tls.h

src/mapi/entry_x86_tls.h

src/mapi/entry_x86_tsd.h

src/mapi/es1api

src/mapi/es1api/gles1-symbols.txt

src/mapi/es1api/meson.build

src/mapi/es2api

src/mapi/es2api/gles2-symbols.txt

src/mapi/es2api/meson.build

src/mapi/glapi

src/mapi/glapi/gen

src/mapi/glapi/gen/AMD_depth_clamp_separate.xml

src/mapi/glapi/gen/AMD_draw_buffers_blend.xml

src/mapi/glapi/gen/AMD_gpu_shader_int64.xml

src/mapi/glapi/gen/AMD_performance_monitor.xml

src/mapi/glapi/gen/APPLE_object_purgeable.xml

src/mapi/glapi/gen/APPLE_vertex_array_object.xml

src/mapi/glapi/gen/ARB_ES2_compatibility.xml

src/mapi/glapi/gen/ARB_ES3_compatibility.xml

src/mapi/glapi/gen/ARB_base_instance.xml

src/mapi/glapi/gen/ARB_bindless_texture.xml

src/mapi/glapi/gen/ARB_blend_func_extended.xml

src/mapi/glapi/gen/ARB_clear_buffer_object.xml

src/mapi/glapi/gen/ARB_clear_texture.xml

src/mapi/glapi/gen/ARB_clip_control.xml

src/mapi/glapi/gen/ARB_color_buffer_float.xml

src/mapi/glapi/gen/ARB_compressed_texture_pixel_storage.xml

src/mapi/glapi/gen/ARB_compute_shader.xml

src/mapi/glapi/gen/ARB_compute_variable_group_size.xml

src/mapi/glapi/gen/ARB_copy_buffer.xml

src/mapi/glapi/gen/ARB_copy_image.xml

src/mapi/glapi/gen/ARB_debug_output.xml

src/mapi/glapi/gen/ARB_depth_buffer_float.xml

src/mapi/glapi/gen/ARB_depth_clamp.xml

src/mapi/glapi/gen/ARB_direct_state_access.xml

src/mapi/glapi/gen/ARB_draw_buffers.xml

src/mapi/glapi/gen/ARB_draw_buffers_blend.xml

src/mapi/glapi/gen/ARB_draw_elements_base_vertex.xml

src/mapi/glapi/gen/ARB_draw_indirect.xml

src/mapi/glapi/gen/ARB_draw_instanced.xml

src/mapi/glapi/gen/ARB_framebuffer_no_attachments.xml

src/mapi/glapi/gen/ARB_framebuffer_object.xml

src/mapi/glapi/gen/ARB_get_program_binary.xml

src/mapi/glapi/gen/ARB_get_texture_sub_image.xml

src/mapi/glapi/gen/ARB_gl_spirv.xml

src/mapi/glapi/gen/ARB_gpu_shader5.xml

src/mapi/glapi/gen/ARB_gpu_shader_fp64.xml

src/mapi/glapi/gen/ARB_gpu_shader_int64.xml

src/mapi/glapi/gen/ARB_indirect_parameters.xml

src/mapi/glapi/gen/ARB_instanced_arrays.xml

src/mapi/glapi/gen/ARB_internalformat_query.xml

src/mapi/glapi/gen/ARB_internalformat_query2.xml

src/mapi/glapi/gen/ARB_invalidate_subdata.xml

src/mapi/glapi/gen/ARB_map_buffer_range.xml

src/mapi/glapi/gen/ARB_multi_bind.xml

src/mapi/glapi/gen/ARB_pipeline_statistics_query.xml

src/mapi/glapi/gen/ARB_program_interface_query.xml

src/mapi/glapi/gen/ARB_robustness.xml

src/mapi/glapi/gen/ARB_sample_shading.xml

src/mapi/glapi/gen/ARB_sampler_objects.xml

src/mapi/glapi/gen/ARB_seamless_cube_map.xml

src/mapi/glapi/gen/ARB_separate_shader_objects.xml

src/mapi/glapi/gen/ARB_shader_atomic_counters.xml

src/mapi/glapi/gen/ARB_shader_image_load_store.xml

src/mapi/glapi/gen/ARB_shader_storage_buffer_object.xml

src/mapi/glapi/gen/ARB_shader_subroutine.xml

src/mapi/glapi/gen/ARB_shading_language_include.xml

src/mapi/glapi/gen/ARB_sparse_buffer.xml

src/mapi/glapi/gen/ARB_sparse_texture.xml

src/mapi/glapi/gen/ARB_spirv_extensions.xml

src/mapi/glapi/gen/ARB_sync.xml

src/mapi/glapi/gen/ARB_tessellation_shader.xml

src/mapi/glapi/gen/ARB_texture_barrier.xml

src/mapi/glapi/gen/ARB_texture_buffer_object.xml

src/mapi/glapi/gen/ARB_texture_buffer_range.xml

src/mapi/glapi/gen/ARB_texture_compression_rgtc.xml

src/mapi/glapi/gen/ARB_texture_cube_map_array.xml

src/mapi/glapi/gen/ARB_texture_float.xml

src/mapi/glapi/gen/ARB_texture_gather.xml

src/mapi/glapi/gen/ARB_texture_multisample.xml

src/mapi/glapi/gen/ARB_texture_rg.xml

src/mapi/glapi/gen/ARB_texture_rgb10_a2ui.xml

src/mapi/glapi/gen/ARB_texture_storage.xml

src/mapi/glapi/gen/ARB_texture_storage_multisample.xml

src/mapi/glapi/gen/ARB_texture_view.xml

src/mapi/glapi/gen/ARB_uniform_buffer_object.xml

src/mapi/glapi/gen/ARB_vertex_array_object.xml

src/mapi/glapi/gen/ARB_vertex_attrib_64bit.xml

src/mapi/glapi/gen/ARB_vertex_attrib_binding.xml

src/mapi/glapi/gen/ARB_vertex_type_2_10_10_10_rev.xml

src/mapi/glapi/gen/ARB_viewport_array.xml

src/mapi/glapi/gen/EXT_EGL_image_storage.xml

src/mapi/glapi/gen/EXT_direct_state_access.xml

src/mapi/glapi/gen/EXT_draw_buffers2.xml

src/mapi/glapi/gen/EXT_external_objects.xml

src/mapi/glapi/gen/EXT_external_objects_fd.xml

src/mapi/glapi/gen/EXT_framebuffer_object.xml

src/mapi/glapi/gen/EXT_gpu_shader4.xml

src/mapi/glapi/gen/EXT_multisampled_render_to_texture.xml

src/mapi/glapi/gen/EXT_packed_depth_stencil.xml

src/mapi/glapi/gen/EXT_provoking_vertex.xml

src/mapi/glapi/gen/EXT_separate_shader_objects.xml

src/mapi/glapi/gen/EXT_shader_image_load_store.xml

src/mapi/glapi/gen/EXT_texture_array.xml

src/mapi/glapi/gen/EXT_texture_integer.xml

src/mapi/glapi/gen/EXT_transform_feedback.xml

src/mapi/glapi/gen/EXT_vertex_attrib_64bit.xml

src/mapi/glapi/gen/EXT_window_rectangles.xml

src/mapi/glapi/gen/GL3x.xml

src/mapi/glapi/gen/GL4x.xml

src/mapi/glapi/gen/GREMEDY_string_marker.xml

src/mapi/glapi/gen/INTEL_performance_query.xml

src/mapi/glapi/gen/KHR_context_flush_control.xml

src/mapi/glapi/gen/KHR_debug.xml

src/mapi/glapi/gen/KHR_robustness.xml

src/mapi/glapi/gen/KHR_robustness_es.xml

src/mapi/glapi/gen/KHR_texture_compression_astc.xml

src/mapi/glapi/gen/MESA_tile_raster_order.xml

src/mapi/glapi/gen/NV_alpha_to_coverage_dither_control.xml

src/mapi/glapi/gen/NV_conditional_render.xml

src/mapi/glapi/gen/NV_copy_image.xml

src/mapi/glapi/gen/NV_half_float.xml

src/mapi/glapi/gen/NV_primitive_restart.xml

src/mapi/glapi/gen/NV_texture_barrier.xml

src/mapi/glapi/gen/NV_vdpau_interop.xml

src/mapi/glapi/gen/NV_viewport_swizzle.xml

src/mapi/glapi/gen/OES_EGL_image.xml

src/mapi/glapi/gen/OES_fixed_point.xml

src/mapi/glapi/gen/OES_single_precision.xml

src/mapi/glapi/gen/OES_texture_compression_astc.xml

src/mapi/glapi/gen/api_exec_decl_h.py

src/mapi/glapi/gen/api_exec_init.py

src/mapi/glapi/gen/api_save_h.py

src/mapi/glapi/gen/api_save_init_h.py

src/mapi/glapi/gen/api_vtxfmt_init_h.py

src/mapi/glapi/gen/apiexec.py

src/mapi/glapi/gen/es_EXT.xml

src/mapi/glapi/gen/glX_API.xml

src/mapi/glapi/gen/glX_XML.py

src/mapi/glapi/gen/glX_proto_common.py

src/mapi/glapi/gen/glX_proto_recv.py

src/mapi/glapi/gen/glX_proto_send.py

src/mapi/glapi/gen/glX_proto_size.py

src/mapi/glapi/gen/glX_server_table.py

src/mapi/glapi/gen/gl_API.dtd

src/mapi/glapi/gen/gl_API.xml

src/mapi/glapi/gen/gl_SPARC_asm.py

src/mapi/glapi/gen/gl_XML.py

src/mapi/glapi/gen/gl_and_es_API.xml

src/mapi/glapi/gen/gl_and_glX_API.xml

src/mapi/glapi/gen/gl_apitemp.py

src/mapi/glapi/gen/gl_enums.py

src/mapi/glapi/gen/gl_gentable.py

src/mapi/glapi/gen/gl_marshal.py

src/mapi/glapi/gen/gl_marshal_h.py

src/mapi/glapi/gen/gl_procs.py

src/mapi/glapi/gen/gl_table.py

src/mapi/glapi/gen/gl_x86-64_asm.py

src/mapi/glapi/gen/gl_x86_asm.py

src/mapi/glapi/gen/license.py

src/mapi/glapi/gen/marshal_XML.py

src/mapi/glapi/gen/meson.build

src/mapi/glapi/gen/remap_helper.py

src/mapi/glapi/gen/static_data.py

src/mapi/glapi/gen/typeexpr.py

src/mapi/glapi/glapi.c

src/mapi/glapi/glapi.h

src/mapi/glapi/glapi_dispatch.c

src/mapi/glapi/glapi_entrypoint.c

src/mapi/glapi/glapi_getproc.c

src/mapi/glapi/glapi_nop.c

src/mapi/glapi/glapi_priv.h

src/mapi/glapi/meson.build

src/mapi/glapi/registry

src/mapi/glapi/registry/gl.xml

src/mapi/glapi/tests

src/mapi/glapi/tests/check_table.cpp

src/mapi/mapi_abi.py

src/mapi/mapi_glapi.c

src/mapi/mapi_tmp.h

src/mapi/meson.build

src/mapi/new

src/mapi/new/genCommon.py

src/mapi/new/gen_gldispatch_mapi.py

src/mapi/shared-glapi

src/mapi/shared-glapi/glapi-symbols.txt

src/mapi/shared-glapi/meson.build

src/mapi/shared-glapi/tests

src/mapi/shared-glapi/tests/check_table.cpp

src/mapi/stub.c

src/mapi/stub.h

src/mapi/table.c

src/mapi/table.h

src/mapi/u_current.c

src/mapi/u_current.h

src/mapi/u_execmem.c

src/mapi/u_execmem.h

src/mesa

src/mesa/main

src/mesa/main/accum.c

src/mesa/main/accum.h

src/mesa/main/api_arrayelt.c

src/mesa/main/api_arrayelt.h

src/mesa/main/arbprogram.c

src/mesa/main/arrayobj.c

src/mesa/main/arrayobj.h

src/mesa/main/atifragshader.c

src/mesa/main/atifragshader.h

src/mesa/main/attrib.c

src/mesa/main/attrib.h

src/mesa/main/barrier.c

src/mesa/main/bbox.c

src/mesa/main/bbox.h

src/mesa/main/blend.c

src/mesa/main/blend.h

src/mesa/main/blit.c

src/mesa/main/blit.h

src/mesa/main/bufferobj.c

src/mesa/main/bufferobj.h

src/mesa/main/buffers.c

src/mesa/main/buffers.h

src/mesa/main/clear.c

src/mesa/main/clip.c

src/mesa/main/clip.h

src/mesa/main/colormac.h

src/mesa/main/compute.c

src/mesa/main/condrender.c

src/mesa/main/condrender.h

src/mesa/main/config.h

src/mesa/main/conservativeraster.c

src/mesa/main/conservativeraster.h

src/mesa/main/consts_exts.h

src/mesa/main/context.c

src/mesa/main/context.h

src/mesa/main/copyimage.c

src/mesa/main/cpuinfo.c

src/mesa/main/cpuinfo.h

src/mesa/main/dd.h

src/mesa/main/debug.c

src/mesa/main/debug.h

src/mesa/main/debug_output.c

src/mesa/main/debug_output.h

src/mesa/main/depth.c

src/mesa/main/depth.h

src/mesa/main/dlist.c

src/mesa/main/dlist.h

src/mesa/main/draw.c

src/mesa/main/draw.h

src/mesa/main/draw_validate.c

src/mesa/main/draw_validate.h

src/mesa/main/drawpix.c

src/mesa/main/drawtex.c

src/mesa/main/enable.c

src/mesa/main/enable.h

src/mesa/main/enums.h

src/mesa/main/errors.c

src/mesa/main/errors.h

src/mesa/main/es1_conversion.c

src/mesa/main/eval.c

src/mesa/main/eval.h

src/mesa/main/extensions.c

src/mesa/main/extensions.h

src/mesa/main/extensions_table.c

src/mesa/main/extensions_table.h

src/mesa/main/externalobjects.c

src/mesa/main/externalobjects.h

src/mesa/main/fbobject.c

src/mesa/main/fbobject.h

src/mesa/main/feedback.c

src/mesa/main/feedback.h

src/mesa/main/ff_fragment_shader.cpp

src/mesa/main/ffvertex_prog.c

src/mesa/main/ffvertex_prog.h

src/mesa/main/fog.c

src/mesa/main/fog.h

src/mesa/main/format_fallback.py

src/mesa/main/format_info.py

src/mesa/main/format_pack.h

src/mesa/main/format_parser.py

src/mesa/main/format_unpack.h

src/mesa/main/format_utils.c

src/mesa/main/format_utils.h

src/mesa/main/formatquery.c

src/mesa/main/formatquery.h

src/mesa/main/formats.c

src/mesa/main/formats.csv

src/mesa/main/formats.h

src/mesa/main/framebuffer.c

src/mesa/main/framebuffer.h

src/mesa/main/genmipmap.c

src/mesa/main/genmipmap.h

src/mesa/main/get.c

src/mesa/main/get.h

src/mesa/main/get_hash_generator.py

src/mesa/main/get_hash_params.py

src/mesa/main/getstring.c

src/mesa/main/glconfig.h

src/mesa/main/glformats.c

src/mesa/main/glformats.h

src/mesa/main/glheader.h

src/mesa/main/glspirv.c

src/mesa/main/glspirv.h

src/mesa/main/glthread.c

src/mesa/main/glthread.h

src/mesa/main/glthread_bufferobj.c

src/mesa/main/glthread_draw.c

src/mesa/main/glthread_get.c

src/mesa/main/glthread_list.c

src/mesa/main/glthread_marshal.h

src/mesa/main/glthread_shaderobj.c

src/mesa/main/glthread_varray.c

src/mesa/main/hash.c

src/mesa/main/hash.h

src/mesa/main/hint.c

src/mesa/main/hint.h

src/mesa/main/image.c

src/mesa/main/image.h

src/mesa/main/light.c

src/mesa/main/light.h

src/mesa/main/lines.c

src/mesa/main/lines.h

src/mesa/main/macros.h

src/mesa/main/matrix.c

src/mesa/main/matrix.h

src/mesa/main/menums.h

src/mesa/main/mesa_private.h

src/mesa/main/meson.build

src/mesa/main/mipmap.c

src/mesa/main/mipmap.h

src/mesa/main/mtypes.h

src/mesa/main/multisample.c

src/mesa/main/multisample.h

src/mesa/main/objectlabel.c

src/mesa/main/objectpurge.c

src/mesa/main/pack.c

src/mesa/main/pack.h

src/mesa/main/pbo.c

src/mesa/main/pbo.h

src/mesa/main/performance_monitor.c

src/mesa/main/performance_monitor.h

src/mesa/main/performance_query.c

src/mesa/main/performance_query.h

src/mesa/main/pipelineobj.c

src/mesa/main/pipelineobj.h

src/mesa/main/pixel.c

src/mesa/main/pixel.h

src/mesa/main/pixelstore.c

src/mesa/main/pixelstore.h

src/mesa/main/pixeltransfer.c

src/mesa/main/pixeltransfer.h

src/mesa/main/points.c

src/mesa/main/points.h

src/mesa/main/polygon.c

src/mesa/main/polygon.h

src/mesa/main/program_binary.c

src/mesa/main/program_binary.h

src/mesa/main/program_resource.c

src/mesa/main/querymatrix.c

src/mesa/main/queryobj.c

src/mesa/main/queryobj.h

src/mesa/main/rastpos.c

src/mesa/main/rastpos.h

src/mesa/main/readpix.c

src/mesa/main/readpix.h

src/mesa/main/remap.c

src/mesa/main/remap.h

src/mesa/main/renderbuffer.c

src/mesa/main/renderbuffer.h

src/mesa/main/robustness.c

src/mesa/main/samplerobj.c

src/mesa/main/samplerobj.h

src/mesa/main/scissor.c

src/mesa/main/scissor.h

src/mesa/main/shader_query.cpp

src/mesa/main/shader_types.h

src/mesa/main/shaderapi.c

src/mesa/main/shaderapi.h

src/mesa/main/shaderimage.c

src/mesa/main/shaderimage.h

src/mesa/main/shaderobj.c

src/mesa/main/shaderobj.h

src/mesa/main/shared.c

src/mesa/main/shared.h

src/mesa/main/spirv_extensions.c

src/mesa/main/spirv_extensions.h

src/mesa/main/sse_minmax.c

src/mesa/main/sse_minmax.h

src/mesa/main/state.c

src/mesa/main/state.h

src/mesa/main/stencil.c

src/mesa/main/stencil.h

src/mesa/main/streaming-load-memcpy.c

src/mesa/main/streaming-load-memcpy.h

src/mesa/main/syncobj.c

src/mesa/main/syncobj.h

src/mesa/main/tests

src/mesa/main/tests/enum_strings.cpp

src/mesa/main/tests/mesa_extensions.cpp

src/mesa/main/tests/mesa_formats.cpp

src/mesa/main/tests/meson.build

src/mesa/main/tests/program_state_string.cpp

src/mesa/main/tests/stubs.cpp

src/mesa/main/texcompress.c

src/mesa/main/texcompress.h

src/mesa/main/texcompress_astc.cpp

src/mesa/main/texcompress_astc.h

src/mesa/main/texcompress_bptc.c

src/mesa/main/texcompress_bptc.h

src/mesa/main/texcompress_bptc_tmp.h

src/mesa/main/texcompress_cpal.c

src/mesa/main/texcompress_cpal.h

src/mesa/main/texcompress_etc.c

src/mesa/main/texcompress_etc.h

src/mesa/main/texcompress_etc_tmp.h

src/mesa/main/texcompress_fxt1.c

src/mesa/main/texcompress_fxt1.h

src/mesa/main/texcompress_rgtc.c

src/mesa/main/texcompress_rgtc.h

src/mesa/main/texcompress_s3tc.c

src/mesa/main/texcompress_s3tc.h

src/mesa/main/texcompress_s3tc_tmp.h

src/mesa/main/texenv.c

src/mesa/main/texenvprogram.h

src/mesa/main/texgen.c

src/mesa/main/texgetimage.c

src/mesa/main/texgetimage.h

src/mesa/main/teximage.c

src/mesa/main/teximage.h

src/mesa/main/texobj.c

src/mesa/main/texobj.h

src/mesa/main/texparam.c

src/mesa/main/texparam.h

src/mesa/main/texstate.c

src/mesa/main/texstate.h

src/mesa/main/texstorage.c

src/mesa/main/texstorage.h

src/mesa/main/texstore.c

src/mesa/main/texstore.h

src/mesa/main/texturebindless.c

src/mesa/main/texturebindless.h

src/mesa/main/textureview.c

src/mesa/main/textureview.h

src/mesa/main/transformfeedback.c

src/mesa/main/transformfeedback.h

src/mesa/main/uniform_query.cpp

src/mesa/main/uniforms.c

src/mesa/main/uniforms.h

src/mesa/main/varray.c

src/mesa/main/varray.h

src/mesa/main/vdpau.c

src/mesa/main/version.c

src/mesa/main/version.h

src/mesa/main/viewport.c

src/mesa/main/viewport.h

src/mesa/math

src/mesa/math/m_debug.h

src/mesa/math/m_debug_clip.c

src/mesa/math/m_debug_norm.c

src/mesa/math/m_debug_util.h

src/mesa/math/m_debug_xform.c

src/mesa/math/m_eval.c

src/mesa/math/m_eval.h

src/mesa/math/m_matrix.c

src/mesa/math/m_matrix.h

src/mesa/math/m_vector.c

src/mesa/math/m_vector.h

src/mesa/math/m_vector_asm.h

src/mesa/math/m_xform.h

src/mesa/meson.build

src/mesa/program

src/mesa/program/arbprogparse.c

src/mesa/program/arbprogparse.h

src/mesa/program/dummy_errors.c

src/mesa/program/link_program.cpp

src/mesa/program/link_program.h

src/mesa/program/meson.build

src/mesa/program/prog_cache.c

src/mesa/program/prog_cache.h

src/mesa/program/prog_instruction.c

src/mesa/program/prog_instruction.h

src/mesa/program/prog_opt_constant_fold.c

src/mesa/program/prog_optimize.c

src/mesa/program/prog_optimize.h

src/mesa/program/prog_parameter.c

src/mesa/program/prog_parameter.h

src/mesa/program/prog_parameter_layout.c

src/mesa/program/prog_parameter_layout.h

src/mesa/program/prog_print.c

src/mesa/program/prog_print.h

src/mesa/program/prog_statevars.c

src/mesa/program/prog_statevars.h

src/mesa/program/prog_to_nir.c

src/mesa/program/prog_to_nir.h

src/mesa/program/program.c

src/mesa/program/program.h

src/mesa/program/program_lexer.l

src/mesa/program/program_parse.y

src/mesa/program/program_parse_extra.c

src/mesa/program/program_parser.h

src/mesa/program/programopt.c

src/mesa/program/programopt.h

src/mesa/program/symbol_table.c

src/mesa/program/symbol_table.h

src/mesa/state_tracker

src/mesa/state_tracker/st_atifs_to_nir.c

src/mesa/state_tracker/st_atifs_to_nir.h

src/mesa/state_tracker/st_atom.c

src/mesa/state_tracker/st_atom.h

src/mesa/state_tracker/st_atom_array.cpp

src/mesa/state_tracker/st_atom_atomicbuf.c

src/mesa/state_tracker/st_atom_blend.c

src/mesa/state_tracker/st_atom_clip.c

src/mesa/state_tracker/st_atom_constbuf.c

src/mesa/state_tracker/st_atom_constbuf.h

src/mesa/state_tracker/st_atom_depth.c

src/mesa/state_tracker/st_atom_framebuffer.c

src/mesa/state_tracker/st_atom_image.c

src/mesa/state_tracker/st_atom_list.h

src/mesa/state_tracker/st_atom_msaa.c

src/mesa/state_tracker/st_atom_pixeltransfer.c

src/mesa/state_tracker/st_atom_rasterizer.c

src/mesa/state_tracker/st_atom_sampler.c

src/mesa/state_tracker/st_atom_scissor.c

src/mesa/state_tracker/st_atom_shader.c

src/mesa/state_tracker/st_atom_stipple.c

src/mesa/state_tracker/st_atom_storagebuf.c

src/mesa/state_tracker/st_atom_tess.c

src/mesa/state_tracker/st_atom_texture.c

src/mesa/state_tracker/st_atom_viewport.c

src/mesa/state_tracker/st_cb_bitmap.c

src/mesa/state_tracker/st_cb_bitmap.h

src/mesa/state_tracker/st_cb_bitmap_shader.c

src/mesa/state_tracker/st_cb_clear.c

src/mesa/state_tracker/st_cb_clear.h

src/mesa/state_tracker/st_cb_copyimage.c

src/mesa/state_tracker/st_cb_copyimage.h

src/mesa/state_tracker/st_cb_drawpixels.c

src/mesa/state_tracker/st_cb_drawpixels.h

src/mesa/state_tracker/st_cb_drawpixels_shader.c

src/mesa/state_tracker/st_cb_drawtex.c

src/mesa/state_tracker/st_cb_drawtex.h

src/mesa/state_tracker/st_cb_eglimage.c

src/mesa/state_tracker/st_cb_eglimage.h

src/mesa/state_tracker/st_cb_feedback.c

src/mesa/state_tracker/st_cb_feedback.h

src/mesa/state_tracker/st_cb_flush.c

src/mesa/state_tracker/st_cb_flush.h

src/mesa/state_tracker/st_cb_rasterpos.c

src/mesa/state_tracker/st_cb_rasterpos.h

src/mesa/state_tracker/st_cb_readpixels.c

src/mesa/state_tracker/st_cb_readpixels.h

src/mesa/state_tracker/st_cb_texture.c

src/mesa/state_tracker/st_cb_texture.h

src/mesa/state_tracker/st_context.c

src/mesa/state_tracker/st_context.h

src/mesa/state_tracker/st_copytex.c

src/mesa/state_tracker/st_copytex.h

src/mesa/state_tracker/st_debug.c

src/mesa/state_tracker/st_debug.h

src/mesa/state_tracker/st_draw.c

src/mesa/state_tracker/st_draw.h

src/mesa/state_tracker/st_draw_feedback.c

src/mesa/state_tracker/st_extensions.c

src/mesa/state_tracker/st_extensions.h

src/mesa/state_tracker/st_format.c

src/mesa/state_tracker/st_format.h

src/mesa/state_tracker/st_gen_mipmap.c

src/mesa/state_tracker/st_gen_mipmap.h

src/mesa/state_tracker/st_gl_api.h

src/mesa/state_tracker/st_glsl_to_ir.cpp

src/mesa/state_tracker/st_glsl_to_ir.h

src/mesa/state_tracker/st_glsl_to_nir.cpp

src/mesa/state_tracker/st_glsl_to_tgsi.cpp

src/mesa/state_tracker/st_glsl_to_tgsi.h

src/mesa/state_tracker/st_glsl_to_tgsi_array_merge.cpp

src/mesa/state_tracker/st_glsl_to_tgsi_array_merge.h

src/mesa/state_tracker/st_glsl_to_tgsi_private.cpp

src/mesa/state_tracker/st_glsl_to_tgsi_private.h

src/mesa/state_tracker/st_glsl_to_tgsi_temprename.cpp

src/mesa/state_tracker/st_glsl_to_tgsi_temprename.h

src/mesa/state_tracker/st_manager.c

src/mesa/state_tracker/st_manager.h

src/mesa/state_tracker/st_nir.h

src/mesa/state_tracker/st_nir_builtins.c

src/mesa/state_tracker/st_nir_lower_builtin.c

src/mesa/state_tracker/st_nir_lower_tex_src_plane.c

src/mesa/state_tracker/st_pbo.c

src/mesa/state_tracker/st_pbo.h

src/mesa/state_tracker/st_pbo_compute.c

src/mesa/state_tracker/st_program.c

src/mesa/state_tracker/st_program.h

src/mesa/state_tracker/st_sampler_view.c

src/mesa/state_tracker/st_sampler_view.h

src/mesa/state_tracker/st_scissor.c

src/mesa/state_tracker/st_scissor.h

src/mesa/state_tracker/st_shader_cache.c

src/mesa/state_tracker/st_shader_cache.h

src/mesa/state_tracker/st_texture.c

src/mesa/state_tracker/st_texture.h

src/mesa/state_tracker/st_tgsi_lower_yuv.c

src/mesa/state_tracker/st_tgsi_lower_yuv.h

src/mesa/state_tracker/st_util.h

src/mesa/state_tracker/st_vdpau.c

src/mesa/state_tracker/st_vdpau.h

src/mesa/state_tracker/tests

src/mesa/state_tracker/tests/meson.build

src/mesa/state_tracker/tests/st_format.c

src/mesa/state_tracker/tests/st_tests_common.cpp

src/mesa/state_tracker/tests/st_tests_common.h

src/mesa/state_tracker/tests/test_glsl_to_tgsi_array_merge.cpp

src/mesa/state_tracker/tests/test_glsl_to_tgsi_lifetime.cpp

src/mesa/vbo

src/mesa/vbo/vbo.h

src/mesa/vbo/vbo_attrib.h

src/mesa/vbo/vbo_attrib_tmp.h

src/mesa/vbo/vbo_context.c

src/mesa/vbo/vbo_exec.c

src/mesa/vbo/vbo_exec.h

src/mesa/vbo/vbo_exec_api.c

src/mesa/vbo/vbo_exec_draw.c

src/mesa/vbo/vbo_exec_eval.c

src/mesa/vbo/vbo_minmax_index.c

src/mesa/vbo/vbo_noop.c

src/mesa/vbo/vbo_private.h

src/mesa/vbo/vbo_save.c

src/mesa/vbo/vbo_save.h

src/mesa/vbo/vbo_save_api.c

src/mesa/vbo/vbo_save_draw.c

src/mesa/vbo/vbo_save_loopback.c

src/mesa/vbo/vbo_util.h

src/mesa/x86

src/mesa/x86/assyntax.h

src/mesa/x86/common_x86.c

src/mesa/x86/common_x86_asm.S

src/mesa/x86/common_x86_asm.h

src/mesa/x86/common_x86_features.h

src/mesa/x86/read_rgba_span_x86.S

src/meson.build

src/microsoft

src/microsoft/ci

src/microsoft/ci/gitlab-ci.yml

src/microsoft/ci/spirv2dxil_reference.txt

src/microsoft/ci/warp-fails.txt

src/microsoft/clc

src/microsoft/clc/clc_compiler.c

src/microsoft/clc/clc_compiler.h

src/microsoft/clc/clc_compiler_test.cpp

src/microsoft/clc/clc_nir.c

src/microsoft/clc/clc_nir.h

src/microsoft/clc/clon12compiler.def

src/microsoft/clc/compute_test.cpp

src/microsoft/clc/compute_test.h

src/microsoft/clc/meson.build

src/microsoft/compiler

src/microsoft/compiler/dxcapi.h

src/microsoft/compiler/dxil_buffer.c

src/microsoft/compiler/dxil_buffer.h

src/microsoft/compiler/dxil_buffer_test.c

src/microsoft/compiler/dxil_container.c

src/microsoft/compiler/dxil_container.h

src/microsoft/compiler/dxil_dump.c

src/microsoft/compiler/dxil_dump.h

src/microsoft/compiler/dxil_dump_decls.h

src/microsoft/compiler/dxil_enums.c

src/microsoft/compiler/dxil_enums.h

src/microsoft/compiler/dxil_function.c

src/microsoft/compiler/dxil_function.h

src/microsoft/compiler/dxil_internal.h

src/microsoft/compiler/dxil_module.c

src/microsoft/compiler/dxil_module.h

src/microsoft/compiler/dxil_nir.c

src/microsoft/compiler/dxil_nir.h

src/microsoft/compiler/dxil_nir_algebraic.py

src/microsoft/compiler/dxil_nir_lower_int_samplers.c

src/microsoft/compiler/dxil_nir_lower_int_samplers.h

src/microsoft/compiler/dxil_nir_tess.c

src/microsoft/compiler/dxil_signature.c

src/microsoft/compiler/dxil_signature.h

src/microsoft/compiler/dxil_validator.cpp

src/microsoft/compiler/dxil_validator.h

src/microsoft/compiler/meson.build

src/microsoft/compiler/nir_to_dxil.c

src/microsoft/compiler/nir_to_dxil.h

src/microsoft/meson.build

src/microsoft/resource_state_manager

src/microsoft/resource_state_manager/D3D12ResourceState.cpp

src/microsoft/resource_state_manager/D3D12ResourceState.h

src/microsoft/resource_state_manager/meson.build

src/microsoft/spirv_to_dxil

src/microsoft/spirv_to_dxil/dxil_validation.cpp

src/microsoft/spirv_to_dxil/dxil_validation.h

src/microsoft/spirv_to_dxil/meson.build

src/microsoft/spirv_to_dxil/spirv2dxil.c

src/microsoft/spirv_to_dxil/spirv_to_dxil.c

src/microsoft/spirv_to_dxil/spirv_to_dxil.def

src/microsoft/spirv_to_dxil/spirv_to_dxil.h

src/microsoft/vulkan

src/microsoft/vulkan/dzn_cmd_buffer.cpp

src/microsoft/vulkan/dzn_descriptor_set.cpp

src/microsoft/vulkan/dzn_device.cpp

src/microsoft/vulkan/dzn_image.cpp

src/microsoft/vulkan/dzn_meta.cpp

src/microsoft/vulkan/dzn_nir.c

src/microsoft/vulkan/dzn_nir.h

src/microsoft/vulkan/dzn_pass.cpp

src/microsoft/vulkan/dzn_pipeline.cpp

src/microsoft/vulkan/dzn_pipeline_cache.cpp

src/microsoft/vulkan/dzn_private.h

src/microsoft/vulkan/dzn_query.cpp

src/microsoft/vulkan/dzn_sync.cpp

src/microsoft/vulkan/dzn_util.c

src/microsoft/vulkan/dzn_util.cpp

src/microsoft/vulkan/dzn_wsi.cpp

src/microsoft/vulkan/meson.build

src/microsoft/vulkan/vulkan_dzn.def

src/nouveau

src/nouveau/drm-shim

src/nouveau/drm-shim/README.md

src/nouveau/drm-shim/meson.build

src/nouveau/drm-shim/nouveau_noop.c

src/nouveau/meson.build

src/panfrost

src/panfrost/bifrost

src/panfrost/bifrost/ISA.xml

src/panfrost/bifrost/Notes.txt

src/panfrost/bifrost/README.md

src/panfrost/bifrost/bi_builder.h.py

src/panfrost/bifrost/bi_helper_invocations.c

src/panfrost/bifrost/bi_layout.c

src/panfrost/bifrost/bi_liveness.c

src/panfrost/bifrost/bi_lower_divergent_indirects.c

src/panfrost/bifrost/bi_lower_swizzle.c

src/panfrost/bifrost/bi_opcodes.c.py

src/panfrost/bifrost/bi_opcodes.h.py

src/panfrost/bifrost/bi_opt_constant_fold.c

src/panfrost/bifrost/bi_opt_copy_prop.c

src/panfrost/bifrost/bi_opt_cse.c

src/panfrost/bifrost/bi_opt_dce.c

src/panfrost/bifrost/bi_opt_dual_tex.c

src/panfrost/bifrost/bi_opt_message_preload.c

src/panfrost/bifrost/bi_opt_mod_props.c

src/panfrost/bifrost/bi_opt_push_ubo.c

src/panfrost/bifrost/bi_pack.c

src/panfrost/bifrost/bi_packer.c.py

src/panfrost/bifrost/bi_print.c

src/panfrost/bifrost/bi_print_common.c

src/panfrost/bifrost/bi_print_common.h

src/panfrost/bifrost/bi_printer.c.py

src/panfrost/bifrost/bi_quirks.h

src/panfrost/bifrost/bi_ra.c

src/panfrost/bifrost/bi_schedule.c

src/panfrost/bifrost/bi_scoreboard.c

src/panfrost/bifrost/bi_test.h

src/panfrost/bifrost/bi_validate.c

src/panfrost/bifrost/bifrost.h

src/panfrost/bifrost/bifrost_compile.c

src/panfrost/bifrost/bifrost_compile.h

src/panfrost/bifrost/bifrost_isa.py

src/panfrost/bifrost/bifrost_nir.h

src/panfrost/bifrost/bifrost_nir_algebraic.py

src/panfrost/bifrost/bir.c

src/panfrost/bifrost/cmdline.c

src/panfrost/bifrost/compiler.h

src/panfrost/bifrost/disassemble.c

src/panfrost/bifrost/disassemble.h

src/panfrost/bifrost/gen_disasm.py

src/panfrost/bifrost/meson.build

src/panfrost/bifrost/test

src/panfrost/bifrost/test/test-constant-fold.cpp

src/panfrost/bifrost/test/test-dual-texture.cpp

src/panfrost/bifrost/test/test-message-preload.cpp

src/panfrost/bifrost/test/test-optimizer.cpp

src/panfrost/bifrost/test/test-pack-formats.cpp

src/panfrost/bifrost/test/test-packing.cpp

src/panfrost/bifrost/test/test-scheduler-predicates.cpp

src/panfrost/bifrost/valhall

src/panfrost/bifrost/valhall/ISA.xml

src/panfrost/bifrost/valhall/asm.py

src/panfrost/bifrost/valhall/disasm.py

src/panfrost/bifrost/valhall/disassemble.h

src/panfrost/bifrost/valhall/meson.build

src/panfrost/bifrost/valhall/test

src/panfrost/bifrost/valhall/test-assembly.py

src/panfrost/bifrost/valhall/test/assembler-cases.txt

src/panfrost/bifrost/valhall/test/negative-cases.txt

src/panfrost/bifrost/valhall/test/test-add-imm.cpp

src/panfrost/bifrost/valhall/test/test-disassembler.c

src/panfrost/bifrost/valhall/test/test-lower-isel.cpp

src/panfrost/bifrost/valhall/test/test-packing.cpp

src/panfrost/bifrost/valhall/test/test-validate-fau.cpp

src/panfrost/bifrost/valhall/va_compiler.h

src/panfrost/bifrost/valhall/va_lower_constants.c

src/panfrost/bifrost/valhall/va_lower_isel.c

src/panfrost/bifrost/valhall/va_optimize.c

src/panfrost/bifrost/valhall/va_pack.c

src/panfrost/bifrost/valhall/va_perf.c

src/panfrost/bifrost/valhall/va_validate.c

src/panfrost/bifrost/valhall/valhall.c.py

src/panfrost/bifrost/valhall/valhall.h

src/panfrost/bifrost/valhall/valhall.py

src/panfrost/bifrost/valhall/valhall_enums.h.py

src/panfrost/ci

src/panfrost/ci/deqp-panfrost-g52-vk.toml

src/panfrost/ci/deqp-panfrost-g52.toml

src/panfrost/ci/deqp-panfrost-g72.toml

src/panfrost/ci/deqp-panfrost-t720.toml

src/panfrost/ci/deqp-panfrost-t760.toml

src/panfrost/ci/deqp-panfrost-t860.toml

src/panfrost/ci/gitlab-ci.yml

src/panfrost/ci/panfrost-g52-fails.txt

src/panfrost/ci/panfrost-g52-flakes.txt

src/panfrost/ci/panfrost-g52-skips.txt

src/panfrost/ci/panfrost-g72-fails.txt

src/panfrost/ci/panfrost-g72-flakes.txt

src/panfrost/ci/panfrost-t720-fails.txt

src/panfrost/ci/panfrost-t720-flakes.txt

src/panfrost/ci/panfrost-t720-skips.txt

src/panfrost/ci/panfrost-t760-fails.txt

src/panfrost/ci/panfrost-t760-flakes.txt

src/panfrost/ci/panfrost-t820-fails.txt

src/panfrost/ci/panfrost-t860-fails.txt

src/panfrost/ci/panfrost-t860-flakes.txt

src/panfrost/ci/panfrost-t860-skips.txt

src/panfrost/ci/traces-panfrost.yml

src/panfrost/drm-shim

src/panfrost/drm-shim/meson.build

src/panfrost/drm-shim/panfrost_noop.c

src/panfrost/ds

src/panfrost/ds/.clang-format

src/panfrost/ds/meson.build

src/panfrost/ds/pan_pps_driver.cc

src/panfrost/ds/pan_pps_driver.h

src/panfrost/ds/pan_pps_perf.cc

src/panfrost/ds/pan_pps_perf.h

src/panfrost/include

src/panfrost/include/panfrost-job.h

src/panfrost/lib

src/panfrost/lib/.gitignore

src/panfrost/lib/genxml

src/panfrost/lib/genxml/common.xml

src/panfrost/lib/genxml/decode.c

src/panfrost/lib/genxml/decode.h

src/panfrost/lib/genxml/decode_common.c

src/panfrost/lib/genxml/gen_macros.h

src/panfrost/lib/genxml/gen_pack.py

src/panfrost/lib/genxml/meson.build

src/panfrost/lib/genxml/v4.xml

src/panfrost/lib/genxml/v5.xml

src/panfrost/lib/genxml/v6.xml

src/panfrost/lib/genxml/v7.xml

src/panfrost/lib/genxml/v9.xml

src/panfrost/lib/meson.build

src/panfrost/lib/pan_afbc.c

src/panfrost/lib/pan_attributes.c

src/panfrost/lib/pan_blend.c

src/panfrost/lib/pan_blend.h

src/panfrost/lib/pan_blitter.c

src/panfrost/lib/pan_blitter.h

src/panfrost/lib/pan_bo.c

src/panfrost/lib/pan_bo.h

src/panfrost/lib/pan_clear.c

src/panfrost/lib/pan_cs.c

src/panfrost/lib/pan_cs.h

src/panfrost/lib/pan_device.h

src/panfrost/lib/pan_encoder.h

src/panfrost/lib/pan_format.c

src/panfrost/lib/pan_format.h

src/panfrost/lib/pan_indirect_dispatch.c

src/panfrost/lib/pan_indirect_dispatch.h

src/panfrost/lib/pan_indirect_draw.c

src/panfrost/lib/pan_indirect_draw.h

src/panfrost/lib/pan_pool.h

src/panfrost/lib/pan_props.c

src/panfrost/lib/pan_samples.c

src/panfrost/lib/pan_scoreboard.h

src/panfrost/lib/pan_scratch.c

src/panfrost/lib/pan_shader.c

src/panfrost/lib/pan_shader.h

src/panfrost/lib/pan_texture.c

src/panfrost/lib/pan_texture.h

src/panfrost/lib/pan_tiler.c

src/panfrost/lib/pan_util.c

src/panfrost/lib/pan_util.h

src/panfrost/lib/tests

src/panfrost/lib/tests/test-blend.c

src/panfrost/lib/tests/test-clear.c

src/panfrost/lib/wrap.h

src/panfrost/meson.build

src/panfrost/midgard

src/panfrost/midgard/compiler.h

src/panfrost/midgard/disassemble.c

src/panfrost/midgard/disassemble.h

src/panfrost/midgard/helpers.h

src/panfrost/midgard/meson.build

src/panfrost/midgard/midgard.h

src/panfrost/midgard/midgard_address.c

src/panfrost/midgard/midgard_compile.c

src/panfrost/midgard/midgard_compile.h

src/panfrost/midgard/midgard_derivatives.c

src/panfrost/midgard/midgard_emit.c

src/panfrost/midgard/midgard_errata_lod.c

src/panfrost/midgard/midgard_helper_invocations.c

src/panfrost/midgard/midgard_liveness.c

src/panfrost/midgard/midgard_nir.h

src/panfrost/midgard/midgard_nir_algebraic.py

src/panfrost/midgard/midgard_nir_lower_helper_writes.c

src/panfrost/midgard/midgard_nir_lower_image_bitsize.c

src/panfrost/midgard/midgard_ops.c

src/panfrost/midgard/midgard_ops.h

src/panfrost/midgard/midgard_opt_copy_prop.c

src/panfrost/midgard/midgard_opt_dce.c

src/panfrost/midgard/midgard_opt_perspective.c

src/panfrost/midgard/midgard_print.c

src/panfrost/midgard/midgard_print_constant.c

src/panfrost/midgard/midgard_quirks.h

src/panfrost/midgard/midgard_ra.c

src/panfrost/midgard/midgard_ra_pipeline.c

src/panfrost/midgard/midgard_schedule.c

src/panfrost/midgard/mir.c

src/panfrost/midgard/mir_promote_uniforms.c

src/panfrost/midgard/mir_squeeze.c

src/panfrost/midgard/nir_fuse_io_16.c

src/panfrost/perf

src/panfrost/perf/G31.xml

src/panfrost/perf/G51.xml

src/panfrost/perf/G52.xml

src/panfrost/perf/G57.xml

src/panfrost/perf/G68.xml

src/panfrost/perf/G71.xml

src/panfrost/perf/G72.xml

src/panfrost/perf/G76.xml

src/panfrost/perf/G77.xml

src/panfrost/perf/G78.xml

src/panfrost/perf/T72x.xml

src/panfrost/perf/T76x.xml

src/panfrost/perf/T82x.xml

src/panfrost/perf/T83x.xml

src/panfrost/perf/T86x.xml

src/panfrost/perf/T88x.xml

src/panfrost/perf/meson.build

src/panfrost/perf/pan_gen_perf.py

src/panfrost/perf/pan_perf.c

src/panfrost/perf/pan_perf.h

src/panfrost/perf/quick.c

src/panfrost/shared

src/panfrost/shared/meson.build

src/panfrost/shared/pan_minmax_cache.c

src/panfrost/shared/pan_minmax_cache.h

src/panfrost/shared/pan_tiling.c

src/panfrost/shared/pan_tiling.h

src/panfrost/util

src/panfrost/util/lcra.c

src/panfrost/util/lcra.h

src/panfrost/util/meson.build

src/panfrost/util/nir_mod_helpers.c

src/panfrost/util/pan_ir.c

src/panfrost/util/pan_ir.h

src/panfrost/util/pan_liveness.c

src/panfrost/util/pan_lower_64bit_intrin.c

src/panfrost/util/pan_lower_framebuffer.c

src/panfrost/util/pan_lower_framebuffer.h

src/panfrost/util/pan_lower_helper_invocation.c

src/panfrost/util/pan_lower_sample_position.c

src/panfrost/util/pan_lower_writeout.c

src/panfrost/util/pan_sysval.c

src/panfrost/vulkan

src/panfrost/vulkan/meson.build

src/panfrost/vulkan/panvk_cmd_buffer.c

src/panfrost/vulkan/panvk_cs.c

src/panfrost/vulkan/panvk_cs.h

src/panfrost/vulkan/panvk_descriptor_set.c

src/panfrost/vulkan/panvk_device.c

src/panfrost/vulkan/panvk_formats.c

src/panfrost/vulkan/panvk_image.c

src/panfrost/vulkan/panvk_mempool.c

src/panfrost/vulkan/panvk_mempool.h

src/panfrost/vulkan/panvk_pass.c

src/panfrost/vulkan/panvk_pipeline.c

src/panfrost/vulkan/panvk_pipeline_cache.c

src/panfrost/vulkan/panvk_private.h

src/panfrost/vulkan/panvk_query.c

src/panfrost/vulkan/panvk_shader.c

src/panfrost/vulkan/panvk_util.c

src/panfrost/vulkan/panvk_vX_cmd_buffer.c

src/panfrost/vulkan/panvk_vX_cmd_buffer.h

src/panfrost/vulkan/panvk_vX_cs.c

src/panfrost/vulkan/panvk_vX_cs.h

src/panfrost/vulkan/panvk_vX_descriptor_set.c

src/panfrost/vulkan/panvk_vX_device.c

src/panfrost/vulkan/panvk_vX_device.h

src/panfrost/vulkan/panvk_vX_image.c

src/panfrost/vulkan/panvk_vX_meta.c

src/panfrost/vulkan/panvk_vX_meta.h

src/panfrost/vulkan/panvk_vX_meta_blit.c

src/panfrost/vulkan/panvk_vX_meta_clear.c

src/panfrost/vulkan/panvk_vX_meta_copy.c

src/panfrost/vulkan/panvk_vX_pipeline.c

src/panfrost/vulkan/panvk_vX_shader.c

src/panfrost/vulkan/panvk_varyings.h

src/panfrost/vulkan/panvk_wsi.c

src/tool

src/tool/dlclose-skip

src/tool/dlclose-skip/dlclose-skip.c

src/tool/dlclose-skip/meson.build

src/tool/meson.build

src/tool/pps

src/tool/pps/.clang-format

src/tool/pps/cfg

src/tool/pps/cfg/cpu.cfg

src/tool/pps/cfg/gpu.cfg

src/tool/pps/cfg/intel.cfg

src/tool/pps/cfg/system.cfg

src/tool/pps/meson.build

src/tool/pps/pps.cc

src/tool/pps/pps.h

src/tool/pps/pps_algorithm.h

src/tool/pps/pps_config.cc

src/tool/pps/pps_counter.cc

src/tool/pps/pps_counter.h

src/tool/pps/pps_datasource.cc

src/tool/pps/pps_datasource.h

src/tool/pps/pps_device.cc

src/tool/pps/pps_device.h

src/tool/pps/pps_driver.cc

src/tool/pps/pps_driver.h

src/tool/pps/pps_producer.cc

src/util

src/util/00-mesa-defaults.conf

src/util/anon_file.c

src/util/anon_file.h

src/util/bigmath.h

src/util/bitscan.c

src/util/bitscan.h

src/util/bitset.h

src/util/blob.c

src/util/blob.h

src/util/build_id.c

src/util/build_id.h

src/util/cnd_monotonic.h

src/util/compiler.h

src/util/compress.c

src/util/compress.h

src/util/crc32.c

src/util/crc32.h

src/util/dag.c

src/util/dag.h

src/util/debug.c

src/util/debug.h

src/util/detect_os.h

src/util/disk_cache.c

src/util/disk_cache.h

src/util/disk_cache_os.c

src/util/disk_cache_os.h

src/util/double.c

src/util/double.h

src/util/driconf.h

src/util/driconf_static.py

src/util/enum_operators.h

src/util/fast_idiv_by_const.c

src/util/fast_idiv_by_const.h

src/util/fast_urem_by_const.h

src/util/format

src/util/format/format_utils.h

src/util/format/meson.build

src/util/format/u_format.c

src/util/format/u_format.csv

src/util/format/u_format.h

src/util/format/u_format_bptc.c

src/util/format/u_format_bptc.h

src/util/format/u_format_etc.c

src/util/format/u_format_etc.h

src/util/format/u_format_fxt1.c

src/util/format/u_format_fxt1.h

src/util/format/u_format_latc.c

src/util/format/u_format_latc.h

src/util/format/u_format_other.c

src/util/format/u_format_other.h

src/util/format/u_format_pack.py

src/util/format/u_format_parse.py

src/util/format/u_format_rgtc.c

src/util/format/u_format_rgtc.h

src/util/format/u_format_s3tc.c

src/util/format/u_format_s3tc.h

src/util/format/u_format_table.py

src/util/format/u_format_tests.c

src/util/format/u_format_tests.h

src/util/format/u_format_unpack_neon.c

src/util/format/u_format_yuv.c

src/util/format/u_format_yuv.h

src/util/format/u_format_zs.c

src/util/format/u_format_zs.h

src/util/format_r11g11b10f.h

src/util/format_rgb9e5.h

src/util/format_srgb.h

src/util/format_srgb.py

src/util/fossilize_db.c

src/util/fossilize_db.h

src/util/futex.h

src/util/half_float.c

src/util/half_float.h

src/util/hash_table.c

src/util/hash_table.h

src/util/indices

src/util/indices/u_indices.c

src/util/indices/u_indices.h

src/util/indices/u_indices_gen.py

src/util/indices/u_indices_priv.h

src/util/indices/u_primconvert.c

src/util/indices/u_primconvert.h

src/util/indices/u_unfilled_gen.py

src/util/indices/u_unfilled_indices.c

src/util/libsync.h

src/util/list.h

src/util/log.c

src/util/log.h

src/util/macros.h

src/util/memstream.c

src/util/memstream.h

src/util/mesa-sha1.c

src/util/mesa-sha1.h

src/util/meson.build

src/util/os_file.c

src/util/os_file.h

src/util/os_memory.h

src/util/os_memory_aligned.h

src/util/os_memory_debug.h

src/util/os_memory_fd.c

src/util/os_memory_fd.h

src/util/os_memory_stdc.h

src/util/os_misc.c

src/util/os_misc.h

src/util/os_socket.c

src/util/os_socket.h

src/util/os_time.c

src/util/os_time.h

src/util/perf

src/util/perf/u_trace.c

src/util/perf/u_trace.h

src/util/perf/u_trace.py

src/util/perf/u_trace_priv.h

src/util/ptralloc.h

src/util/ralloc.c

src/util/ralloc.h

src/util/rand_xor.c

src/util/rand_xor.h

src/util/rb_tree.c

src/util/rb_tree.h

src/util/register_allocate.c

src/util/register_allocate.h

src/util/register_allocate_internal.h

src/util/rgtc.c

src/util/rgtc.h

src/util/rounding.h

src/util/rwlock.h

src/util/set.c

src/util/set.h

src/util/sha1

src/util/sha1/README

src/util/sha1/sha1.c

src/util/sha1/sha1.h

src/util/simple_list.h

src/util/simple_mtx.h

src/util/slab.c

src/util/slab.h

src/util/softfloat.c

src/util/softfloat.h

src/util/sparse_array.c

src/util/sparse_array.h

src/util/string_buffer.c

src/util/string_buffer.h

src/util/strndup.h

src/util/strtod.c

src/util/strtod.h

src/util/tests

src/util/tests/bitset_test.cpp

src/util/tests/blob_test.cpp

src/util/tests/cache_test.cpp

src/util/tests/dag_test.cpp

src/util/tests/drirc_configdir

src/util/tests/drirc_configdir/00-test.conf

src/util/tests/drirc_configdir/01-unused

src/util/tests/drirc_home

src/util/tests/drirc_home/.drirc

src/util/tests/fast_idiv_by_const_test.cpp

src/util/tests/fast_urem_by_const_test.cpp

src/util/tests/format

src/util/tests/format/meson.build

src/util/tests/format/srgb.c

src/util/tests/format/u_format_compatible_test.c

src/util/tests/format/u_format_test.c

src/util/tests/hash_table

src/util/tests/hash_table/clear.c

src/util/tests/hash_table/collision.c

src/util/tests/hash_table/delete_and_lookup.c

src/util/tests/hash_table/delete_management.c

src/util/tests/hash_table/destroy_callback.c

src/util/tests/hash_table/insert_and_lookup.c

src/util/tests/hash_table/insert_many.c

src/util/tests/hash_table/meson.build

src/util/tests/hash_table/null_destroy.c

src/util/tests/hash_table/random_entry.c

src/util/tests/hash_table/remove_key.c

src/util/tests/hash_table/remove_null.c

src/util/tests/hash_table/replacement.c

src/util/tests/int_min_max.cpp

src/util/tests/mesa-sha1_test.cpp

src/util/tests/process_test.c

src/util/tests/rb_tree_test.cpp

src/util/tests/register_allocate_test.cpp

src/util/tests/roundeven_test.cpp

src/util/tests/set_test.cpp

src/util/tests/sparse_array_test.cpp

src/util/tests/string_buffer_test.cpp

src/util/tests/timespec_test.cpp

src/util/tests/u_atomic_test.cpp

src/util/tests/u_debug_stack_test.cpp

src/util/tests/u_printf_test.cpp

src/util/tests/u_qsort_test.cpp

src/util/tests/vector_test.cpp

src/util/tests/vma

src/util/tests/vma/meson.build

src/util/tests/vma/vma_random_test.cpp

src/util/tests/xmlconfig.cpp

src/util/texcompress_rgtc_tmp.h

src/util/timespec.h

src/util/u_atomic.c

src/util/u_atomic.h

src/util/u_cpu_detect.c

src/util/u_cpu_detect.h

src/util/u_debug.c

src/util/u_debug.h

src/util/u_debug_describe.c

src/util/u_debug_describe.h

src/util/u_debug_memory.c

src/util/u_debug_refcnt.c

src/util/u_debug_refcnt.h

src/util/u_debug_stack.c

src/util/u_debug_stack.h

src/util/u_debug_stack_android.cpp

src/util/u_debug_symbol.c

src/util/u_debug_symbol.h

src/util/u_drm.h

src/util/u_dynarray.h

src/util/u_endian.h

src/util/u_fifo.h

src/util/u_hash_table.c

src/util/u_hash_table.h

src/util/u_idalloc.c

src/util/u_idalloc.h

src/util/u_math.c

src/util/u_math.h

src/util/u_memory.h

src/util/u_memset.h

src/util/u_mm.c

src/util/u_mm.h

src/util/u_perfetto.cc

src/util/u_perfetto.h

src/util/u_printf.c

src/util/u_printf.h

src/util/u_process.c

src/util/u_process.h

src/util/u_qsort.cpp

src/util/u_qsort.h

src/util/u_queue.c

src/util/u_queue.h

src/util/u_string.h

src/util/u_thread.h

src/util/u_vector.c

src/util/u_vector.h

src/util/vl_rbsp.h

src/util/vl_vlc.h

src/util/vma.c

src/util/vma.h

src/util/xmlconfig.c

src/util/xmlconfig.h

src/util/xxd.py

src/util/xxhash.h

src/virtio

src/virtio/meson.build

src/virtio/venus-protocol

src/virtio/venus-protocol/vn_protocol_driver.h

src/virtio/venus-protocol/vn_protocol_driver_buffer.h

src/virtio/venus-protocol/vn_protocol_driver_buffer_view.h

src/virtio/venus-protocol/vn_protocol_driver_command_buffer.h

src/virtio/venus-protocol/vn_protocol_driver_command_pool.h

src/virtio/venus-protocol/vn_protocol_driver_cs.h

src/virtio/venus-protocol/vn_protocol_driver_defines.h

src/virtio/venus-protocol/vn_protocol_driver_descriptor_pool.h

src/virtio/venus-protocol/vn_protocol_driver_descriptor_set.h

src/virtio/venus-protocol/vn_protocol_driver_descriptor_set_layout.h

src/virtio/venus-protocol/vn_protocol_driver_descriptor_update_template.h

src/virtio/venus-protocol/vn_protocol_driver_device.h

src/virtio/venus-protocol/vn_protocol_driver_device_memory.h

src/virtio/venus-protocol/vn_protocol_driver_event.h

src/virtio/venus-protocol/vn_protocol_driver_fence.h

src/virtio/venus-protocol/vn_protocol_driver_framebuffer.h

src/virtio/venus-protocol/vn_protocol_driver_handles.h

src/virtio/venus-protocol/vn_protocol_driver_image.h

src/virtio/venus-protocol/vn_protocol_driver_image_view.h

src/virtio/venus-protocol/vn_protocol_driver_info.h

src/virtio/venus-protocol/vn_protocol_driver_instance.h

src/virtio/venus-protocol/vn_protocol_driver_pipeline.h

src/virtio/venus-protocol/vn_protocol_driver_pipeline_cache.h

src/virtio/venus-protocol/vn_protocol_driver_pipeline_layout.h

src/virtio/venus-protocol/vn_protocol_driver_private_data_slot.h

src/virtio/venus-protocol/vn_protocol_driver_query_pool.h

src/virtio/venus-protocol/vn_protocol_driver_queue.h

src/virtio/venus-protocol/vn_protocol_driver_render_pass.h

src/virtio/venus-protocol/vn_protocol_driver_sampler.h

src/virtio/venus-protocol/vn_protocol_driver_sampler_ycbcr_conversion.h

src/virtio/venus-protocol/vn_protocol_driver_semaphore.h

src/virtio/venus-protocol/vn_protocol_driver_shader_module.h

src/virtio/venus-protocol/vn_protocol_driver_structs.h

src/virtio/venus-protocol/vn_protocol_driver_transport.h

src/virtio/venus-protocol/vn_protocol_driver_types.h

src/virtio/virtio-gpu

src/virtio/virtio-gpu/drm_hw.h

src/virtio/virtio-gpu/venus_hw.h

src/virtio/virtio-gpu/virgl_hw.h

src/virtio/virtio-gpu/virgl_protocol.h

src/virtio/virtio-gpu/virglrenderer_hw.h

src/virtio/vtest

src/virtio/vtest/vtest_protocol.h

src/virtio/vulkan

src/virtio/vulkan/.clang-format

src/virtio/vulkan/meson.build

src/virtio/vulkan/vn_android.c

src/virtio/vulkan/vn_android.h

src/virtio/vulkan/vn_buffer.c

src/virtio/vulkan/vn_buffer.h

src/virtio/vulkan/vn_command_buffer.c

src/virtio/vulkan/vn_command_buffer.h

src/virtio/vulkan/vn_common.c

src/virtio/vulkan/vn_common.h

src/virtio/vulkan/vn_cs.c

src/virtio/vulkan/vn_cs.h

src/virtio/vulkan/vn_descriptor_set.c

src/virtio/vulkan/vn_descriptor_set.h

src/virtio/vulkan/vn_device.c

src/virtio/vulkan/vn_device.h

src/virtio/vulkan/vn_device_memory.c

src/virtio/vulkan/vn_device_memory.h

src/virtio/vulkan/vn_icd.c

src/virtio/vulkan/vn_icd.h

src/virtio/vulkan/vn_image.c

src/virtio/vulkan/vn_image.h

src/virtio/vulkan/vn_instance.c

src/virtio/vulkan/vn_instance.h

src/virtio/vulkan/vn_physical_device.c

src/virtio/vulkan/vn_physical_device.h

src/virtio/vulkan/vn_pipeline.c

src/virtio/vulkan/vn_pipeline.h

src/virtio/vulkan/vn_query_pool.c

src/virtio/vulkan/vn_query_pool.h

src/virtio/vulkan/vn_queue.c

src/virtio/vulkan/vn_queue.h

src/virtio/vulkan/vn_render_pass.c

src/virtio/vulkan/vn_render_pass.h

src/virtio/vulkan/vn_renderer.h

src/virtio/vulkan/vn_renderer_internal.c

src/virtio/vulkan/vn_renderer_internal.h

src/virtio/vulkan/vn_renderer_util.c

src/virtio/vulkan/vn_renderer_util.h

src/virtio/vulkan/vn_renderer_virtgpu.c

src/virtio/vulkan/vn_renderer_vtest.c

src/virtio/vulkan/vn_ring.c

src/virtio/vulkan/vn_ring.h

src/virtio/vulkan/vn_wsi.c

src/virtio/vulkan/vn_wsi.h

src/vulkan

src/vulkan/device-select-layer

src/vulkan/device-select-layer/VkLayer_MESA_device_select.json

src/vulkan/device-select-layer/device_select.h

src/vulkan/device-select-layer/device_select_layer.c

src/vulkan/device-select-layer/device_select_wayland.c

src/vulkan/device-select-layer/device_select_x11.c

src/vulkan/device-select-layer/meson.build

src/vulkan/meson.build

src/vulkan/overlay-layer

src/vulkan/overlay-layer/README.rst

src/vulkan/overlay-layer/TODO

src/vulkan/overlay-layer/VkLayer_MESA_overlay.json

src/vulkan/overlay-layer/mesa-overlay-control.py

src/vulkan/overlay-layer/meson.build

src/vulkan/overlay-layer/overlay.cpp

src/vulkan/overlay-layer/overlay.frag

src/vulkan/overlay-layer/overlay.vert

src/vulkan/overlay-layer/overlay_params.c

src/vulkan/overlay-layer/overlay_params.h

src/vulkan/registry

src/vulkan/registry/vk.xml

src/vulkan/runtime

src/vulkan/runtime/meson.build

src/vulkan/runtime/vk_android.c

src/vulkan/runtime/vk_cmd_copy.c

src/vulkan/runtime/vk_cmd_enqueue.c

src/vulkan/runtime/vk_command_buffer.c

src/vulkan/runtime/vk_command_buffer.h

src/vulkan/runtime/vk_command_pool.c

src/vulkan/runtime/vk_command_pool.h

src/vulkan/runtime/vk_debug_report.c

src/vulkan/runtime/vk_debug_report.h

src/vulkan/runtime/vk_debug_utils.c

src/vulkan/runtime/vk_debug_utils.h

src/vulkan/runtime/vk_deferred_operation.c

src/vulkan/runtime/vk_deferred_operation.h

src/vulkan/runtime/vk_descriptors.c

src/vulkan/runtime/vk_descriptors.h

src/vulkan/runtime/vk_device.c

src/vulkan/runtime/vk_device.h

src/vulkan/runtime/vk_drm_syncobj.c

src/vulkan/runtime/vk_drm_syncobj.h

src/vulkan/runtime/vk_fence.c

src/vulkan/runtime/vk_fence.h

src/vulkan/runtime/vk_framebuffer.c

src/vulkan/runtime/vk_framebuffer.h

src/vulkan/runtime/vk_image.c

src/vulkan/runtime/vk_image.h

src/vulkan/runtime/vk_instance.c

src/vulkan/runtime/vk_instance.h

src/vulkan/runtime/vk_log.c

src/vulkan/runtime/vk_log.h

src/vulkan/runtime/vk_nir.c

src/vulkan/runtime/vk_nir.h

src/vulkan/runtime/vk_object.c

src/vulkan/runtime/vk_object.h

src/vulkan/runtime/vk_physical_device.c

src/vulkan/runtime/vk_physical_device.h

src/vulkan/runtime/vk_physical_device_features.py

src/vulkan/runtime/vk_queue.c

src/vulkan/runtime/vk_queue.h

src/vulkan/runtime/vk_render_pass.c

src/vulkan/runtime/vk_render_pass.h

src/vulkan/runtime/vk_semaphore.c

src/vulkan/runtime/vk_semaphore.h

src/vulkan/runtime/vk_shader_module.c

src/vulkan/runtime/vk_shader_module.h

src/vulkan/runtime/vk_sync.c

src/vulkan/runtime/vk_sync.h

src/vulkan/runtime/vk_sync_binary.c

src/vulkan/runtime/vk_sync_binary.h

src/vulkan/runtime/vk_sync_dummy.c

src/vulkan/runtime/vk_sync_dummy.h

src/vulkan/runtime/vk_sync_timeline.c

src/vulkan/runtime/vk_sync_timeline.h

src/vulkan/runtime/vk_synchronization2.c

src/vulkan/util

src/vulkan/util/gen_enum_to_str.py

src/vulkan/util/meson.build

src/vulkan/util/vk_alloc.c

src/vulkan/util/vk_alloc.h

src/vulkan/util/vk_cmd_queue_gen.py

src/vulkan/util/vk_dispatch_table_gen.py

src/vulkan/util/vk_dispatch_trampolines_gen.py

src/vulkan/util/vk_entrypoints.py

src/vulkan/util/vk_entrypoints_gen.py

src/vulkan/util/vk_extensions.py

src/vulkan/util/vk_extensions_gen.py

src/vulkan/util/vk_format.c

src/vulkan/util/vk_format.h

src/vulkan/util/vk_icd_gen.py

src/vulkan/util/vk_util.c

src/vulkan/util/vk_util.h

src/vulkan/vulkan-icd-symbols.txt

src/vulkan/wsi

src/vulkan/wsi/meson.build

src/vulkan/wsi/wsi_common.c

src/vulkan/wsi/wsi_common.h

src/vulkan/wsi/wsi_common_display.c

src/vulkan/wsi/wsi_common_display.h

src/vulkan/wsi/wsi_common_drm.c

src/vulkan/wsi/wsi_common_drm.h

src/vulkan/wsi/wsi_common_private.h

src/vulkan/wsi/wsi_common_queue.h

src/vulkan/wsi/wsi_common_wayland.c

src/vulkan/wsi/wsi_common_win32.c

src/vulkan/wsi/wsi_common_x11.c

subprojects

subprojects/.gitignore

subprojects/DirectX-Headers.wrap

subprojects/expat.wrap

subprojects/libelf.wrap

subprojects/perfetto.wrap

subprojects/zlib.wrap

Show diffs side-by-side

added added

removed removed

src/intel/compiler/brw_fs.cpp

* Permission is hereby granted, free of charge, to any person obtaining a

* copy of this software and associated documentation files (the "Software"),

* to deal in the Software without restriction, including without limitation

* the rights to use, copy, modify, merge, publish, distribute, sublicense,

* and/or sell copies of the Software, and to permit persons to whom the

* Software is furnished to do so, subject to the following conditions:

* The above copyright notice and this permission notice (including the next

* paragraph) shall be included in all copies or substantial portions of the

* Software.

* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL

* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS

* IN THE SOFTWARE.

/** @file brw_fs.cpp

* This file drives the GLSL IR -> LIR translation, contains the

* optimizations on the LIR, and drives the generation of native code

* from the LIR.

#include "main/macros.h"

#include "brw_eu.h"

#include "brw_fs.h"

#include "brw_fs_live_variables.h"

#include "brw_nir.h"

#include "brw_vec4_gs_visitor.h"

#include "brw_cfg.h"

#include "brw_dead_control_flow.h"

#include "brw_private.h"

#include "dev/intel_debug.h"

#include "compiler/glsl_types.h"

#include "compiler/nir/nir_builder.h"

#include "program/prog_parameter.h"

#include "util/u_math.h"

using namespace brw;

static unsigned get_lowered_simd_width(const struct intel_device_info *devinfo,

const fs_inst *inst);

void

fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,

const fs_reg *src, unsigned sources)

{

memset((void*)this, 0, sizeof(*this));

this->src = new fs_reg[MAX2(sources, 3)];

for (unsigned i = 0; i < sources; i++)

this->src[i] = src[i];

this->opcode = opcode;

this->dst = dst;

this->sources = sources;

this->exec_size = exec_size;

this->base_mrf = -1;

assert(dst.file != IMM && dst.file != UNIFORM);

assert(this->exec_size != 0);

this->conditional_mod = BRW_CONDITIONAL_NONE;

/* This will be the case for almost all instructions. */

switch (dst.file) {

case VGRF:

case ARF:

case FIXED_GRF:

case MRF:

case ATTR:

this->size_written = dst.component_size(exec_size);

break;

case BAD_FILE:

this->size_written = 0;

break;

case IMM:

case UNIFORM:

unreachable("Invalid destination register file");

}

this->writes_accumulator = false;

}

fs_inst::fs_inst()

{

init(BRW_OPCODE_NOP, 8, dst, NULL, 0);

}

fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size)

{

100

init(opcode, exec_size, reg_undef, NULL, 0);

101

}

102

103

fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst)

104

{

105

init(opcode, exec_size, dst, NULL, 0);

106

}

107

108

fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,

109

const fs_reg &src0)

110

{

111

const fs_reg src[1] = { src0 };

112

init(opcode, exec_size, dst, src, 1);

113

}

114

115

fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,

116

const fs_reg &src0, const fs_reg &src1)

117

{

118

const fs_reg src[2] = { src0, src1 };

119

init(opcode, exec_size, dst, src, 2);

120

}

121

122

fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,

123

const fs_reg &src0, const fs_reg &src1, const fs_reg &src2)

124

{

125

const fs_reg src[3] = { src0, src1, src2 };

126

init(opcode, exec_size, dst, src, 3);

127

}

128

129

fs_inst::fs_inst(enum opcode opcode, uint8_t exec_width, const fs_reg &dst,

130

const fs_reg src[], unsigned sources)

131

{

132

init(opcode, exec_width, dst, src, sources);

133

}

134

135

fs_inst::fs_inst(const fs_inst &that)

136

{

137

memcpy((void*)this, &that, sizeof(that));

138

139

this->src = new fs_reg[MAX2(that.sources, 3)];

140

141

for (unsigned i = 0; i < that.sources; i++)

142

this->src[i] = that.src[i];

143

}

144

145

fs_inst::~fs_inst()

146

{

147

delete[] this->src;

148

}

149

150

void

151

fs_inst::resize_sources(uint8_t num_sources)

152

{

153

if (this->sources != num_sources) {

154

fs_reg *src = new fs_reg[MAX2(num_sources, 3)];

155

156

for (unsigned i = 0; i < MIN2(this->sources, num_sources); ++i)

157

src[i] = this->src[i];

158

159

delete[] this->src;

160

this->src = src;

161

this->sources = num_sources;

162

}

163

}

164

165

void

166

fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld,

167

const fs_reg &dst,

168

const fs_reg &surf_index,

169

const fs_reg &varying_offset,

170

uint32_t const_offset,

171

uint8_t alignment)

172

{

173

/* We have our constant surface use a pitch of 4 bytes, so our index can

174

* be any component of a vector, and then we load 4 contiguous

175

* components starting from that.

176

177

* We break down the const_offset to a portion added to the variable offset

178

* and a portion done using fs_reg::offset, which means that if you have

179

* GLSL using something like "uniform vec4 a[20]; gl_FragColor = a[i]",

180

* we'll temporarily generate 4 vec4 loads from offset i * 4, and CSE can

181

* later notice that those loads are all the same and eliminate the

182

* redundant ones.

183

184

fs_reg vec4_offset = vgrf(glsl_type::uint_type);

185

bld.ADD(vec4_offset, varying_offset, brw_imm_ud(const_offset & ~0xf));

186

187

/* The pull load message will load a vec4 (16 bytes). If we are loading

188

* a double this means we are only loading 2 elements worth of data.

189

* We also want to use a 32-bit data type for the dst of the load operation

190

* so other parts of the driver don't get confused about the size of the

191

* result.

192

193

fs_reg vec4_result = bld.vgrf(BRW_REGISTER_TYPE_F, 4);

194

fs_inst *inst = bld.emit(FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL,

195

vec4_result, surf_index, vec4_offset,

196

brw_imm_ud(alignment));

197

inst->size_written = 4 * vec4_result.component_size(inst->exec_size);

198

199

shuffle_from_32bit_read(bld, dst, vec4_result,

200

(const_offset & 0xf) / type_sz(dst.type), 1);

201

}

202

203

/**

204

* A helper for MOV generation for fixing up broken hardware SEND dependency

205

* handling.

206

207

void

208

fs_visitor::DEP_RESOLVE_MOV(const fs_builder &bld, int grf)

209

{

210

/* The caller always wants uncompressed to emit the minimal extra

211

* dependencies, and to avoid having to deal with aligning its regs to 2.

212

213

const fs_builder ubld = bld.annotate("send dependency resolve")

214

.quarter(0);

215

216

ubld.MOV(ubld.null_reg_f(), fs_reg(VGRF, grf, BRW_REGISTER_TYPE_F));

217

}

218

219

bool

220

fs_inst::is_send_from_grf() const

221

{

222

switch (opcode) {

223

case SHADER_OPCODE_SEND:

224

case FS_OPCODE_INTERPOLATE_AT_SAMPLE:

225

case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:

226

case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:

227

case SHADER_OPCODE_URB_WRITE_SIMD8:

228

case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:

229

case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:

230

case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:

231

case SHADER_OPCODE_URB_READ_SIMD8:

232

case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:

233

case SHADER_OPCODE_INTERLOCK:

234

case SHADER_OPCODE_MEMORY_FENCE:

235

case SHADER_OPCODE_BARRIER:

236

return true;

237

case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:

238

return src[1].file == VGRF;

239

case FS_OPCODE_FB_WRITE:

240

case FS_OPCODE_FB_READ:

241

return src[0].file == VGRF;

242

default:

243

if (is_tex())

244

return src[0].file == VGRF;

245

246

return false;

247

}

248

}

249

250

bool

251

fs_inst::is_control_source(unsigned arg) const

252

{

253

switch (opcode) {

254

case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:

255

case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GFX7:

256

case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GFX4:

257

return arg == 0;

258

259

case SHADER_OPCODE_BROADCAST:

260

case SHADER_OPCODE_SHUFFLE:

261

case SHADER_OPCODE_QUAD_SWIZZLE:

262

case FS_OPCODE_INTERPOLATE_AT_SAMPLE:

263

case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:

264

case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:

265

case SHADER_OPCODE_GET_BUFFER_SIZE:

266

return arg == 1;

267

268

case SHADER_OPCODE_MOV_INDIRECT:

269

case SHADER_OPCODE_CLUSTER_BROADCAST:

270

case SHADER_OPCODE_TEX:

271

case FS_OPCODE_TXB:

272

case SHADER_OPCODE_TXD:

273

case SHADER_OPCODE_TXF:

274

case SHADER_OPCODE_TXF_LZ:

275

case SHADER_OPCODE_TXF_CMS:

276

case SHADER_OPCODE_TXF_CMS_W:

277

case SHADER_OPCODE_TXF_UMS:

278

case SHADER_OPCODE_TXF_MCS:

279

case SHADER_OPCODE_TXL:

280

case SHADER_OPCODE_TXL_LZ:

281

case SHADER_OPCODE_TXS:

282

case SHADER_OPCODE_LOD:

283

case SHADER_OPCODE_TG4:

284

case SHADER_OPCODE_TG4_OFFSET:

285

case SHADER_OPCODE_SAMPLEINFO:

286

return arg == 1 || arg == 2;

287

288

case SHADER_OPCODE_SEND:

289

return arg == 0 || arg == 1;

290

291

default:

292

return false;

293

}

294

}

295

296

bool

297

fs_inst::is_payload(unsigned arg) const

298

{

299

switch (opcode) {

300

case FS_OPCODE_FB_WRITE:

301

case FS_OPCODE_FB_READ:

302

case SHADER_OPCODE_URB_WRITE_SIMD8:

303

case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:

304

case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:

305

case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:

306

case SHADER_OPCODE_URB_READ_SIMD8:

307

case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:

308

case VEC4_OPCODE_UNTYPED_ATOMIC:

309

case VEC4_OPCODE_UNTYPED_SURFACE_READ:

310

case VEC4_OPCODE_UNTYPED_SURFACE_WRITE:

311

case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:

312

case FS_OPCODE_INTERPOLATE_AT_SAMPLE:

313

case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:

314

case SHADER_OPCODE_INTERLOCK:

315

case SHADER_OPCODE_MEMORY_FENCE:

316

case SHADER_OPCODE_BARRIER:

317

return arg == 0;

318

319

case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GFX7:

320

return arg == 1;

321

322

case SHADER_OPCODE_SEND:

323

return arg == 2 || arg == 3;

324

325

default:

326

if (is_tex())

327

return arg == 0;

328

else

329

return false;

330

}

331

}

332

333

/**

334

* Returns true if this instruction's sources and destinations cannot

335

* safely be the same register.

336

337

* In most cases, a register can be written over safely by the same

338

* instruction that is its last use. For a single instruction, the

339

* sources are dereferenced before writing of the destination starts

340

* (naturally).

341

342

* However, there are a few cases where this can be problematic:

343

344

* - Virtual opcodes that translate to multiple instructions in the

345

* code generator: if src == dst and one instruction writes the

346

* destination before a later instruction reads the source, then

347

* src will have been clobbered.

348

349

* - SIMD16 compressed instructions with certain regioning (see below).

350

351

* The register allocator uses this information to set up conflicts between

352

* GRF sources and the destination.

353

354

bool

355

fs_inst::has_source_and_destination_hazard() const

356

{

357

switch (opcode) {

358

case FS_OPCODE_PACK_HALF_2x16_SPLIT:

359

/* Multiple partial writes to the destination */

360

return true;

361

case SHADER_OPCODE_SHUFFLE:

362

/* This instruction returns an arbitrary channel from the source and

363

* gets split into smaller instructions in the generator. It's possible

364

* that one of the instructions will read from a channel corresponding

365

* to an earlier instruction.

366

367

case SHADER_OPCODE_SEL_EXEC:

368

/* This is implemented as

369

370

* mov(16) g4<1>D 0D { align1 WE_all 1H };

371

* mov(16) g4<1>D g5<8,8,1>D { align1 1H }

372

373

* Because the source is only read in the second instruction, the first

374

* may stomp all over it.

375

376

return true;

377

case SHADER_OPCODE_QUAD_SWIZZLE:

378

switch (src[1].ud) {

379

case BRW_SWIZZLE_XXXX:

380

case BRW_SWIZZLE_YYYY:

381

case BRW_SWIZZLE_ZZZZ:

382

case BRW_SWIZZLE_WWWW:

383

case BRW_SWIZZLE_XXZZ:

384

case BRW_SWIZZLE_YYWW:

385

case BRW_SWIZZLE_XYXY:

386

case BRW_SWIZZLE_ZWZW:

387

/* These can be implemented as a single Align1 region on all

388

* platforms, so there's never a hazard between source and

389

* destination. C.f. fs_generator::generate_quad_swizzle().

390

391

return false;

392

default:

393

return !is_uniform(src[0]);

394

}

395

default:

396

/* The SIMD16 compressed instruction

397

398

* add(16) g4<1>F g4<8,8,1>F g6<8,8,1>F

399

400

* is actually decoded in hardware as:

401

402

* add(8) g4<1>F g4<8,8,1>F g6<8,8,1>F

403

* add(8) g5<1>F g5<8,8,1>F g7<8,8,1>F

404

405

* Which is safe. However, if we have uniform accesses

406

* happening, we get into trouble:

407

408

* add(8) g4<1>F g4<0,1,0>F g6<8,8,1>F

409

* add(8) g5<1>F g4<0,1,0>F g7<8,8,1>F

410

411

* Now our destination for the first instruction overwrote the

412

* second instruction's src0, and we get garbage for those 8

413

* pixels. There's a similar issue for the pre-gfx6

414

* pixel_x/pixel_y, which are registers of 16-bit values and thus

415

* would get stomped by the first decode as well.

416

417

if (exec_size == 16) {

418

for (int i = 0; i < sources; i++) {

419

if (src[i].file == VGRF && (src[i].stride == 0 ||

420

src[i].type == BRW_REGISTER_TYPE_UW ||

421

src[i].type == BRW_REGISTER_TYPE_W ||

422

src[i].type == BRW_REGISTER_TYPE_UB ||

423

src[i].type == BRW_REGISTER_TYPE_B)) {

424

return true;

425

}

426

}

427

}

428

return false;

429

}

430

}

431

432

bool

433

fs_inst::can_do_source_mods(const struct intel_device_info *devinfo) const

434

{

435

if (devinfo->ver == 6 && is_math())

436

return false;

437

438

if (is_send_from_grf())

439

return false;

440

441

/* From Wa_1604601757:

442

443

* "When multiplying a DW and any lower precision integer, source modifier

444

* is not supported."

445

446

if (devinfo->ver >= 12 && (opcode == BRW_OPCODE_MUL ||

447

opcode == BRW_OPCODE_MAD)) {

448

const brw_reg_type exec_type = get_exec_type(this);

449

const unsigned min_type_sz = opcode == BRW_OPCODE_MAD ?

450

MIN2(type_sz(src[1].type), type_sz(src[2].type)) :

451

MIN2(type_sz(src[0].type), type_sz(src[1].type));

452

453

if (brw_reg_type_is_integer(exec_type) &&

454

type_sz(exec_type) >= 4 &&

455

type_sz(exec_type) != min_type_sz)

456

return false;

457

}

458

459

if (!backend_instruction::can_do_source_mods())

460

return false;

461

462

return true;

463

}

464

465

bool

466

fs_inst::can_do_cmod()

467

{

468

if (!backend_instruction::can_do_cmod())

469

return false;

470

471

/* The accumulator result appears to get used for the conditional modifier

472

* generation. When negating a UD value, there is a 33rd bit generated for

473

* the sign in the accumulator value, so now you can't check, for example,

474

* equality with a 32-bit value. See piglit fs-op-neg-uvec4.

475

476

for (unsigned i = 0; i < sources; i++) {

477

if (brw_reg_type_is_unsigned_integer(src[i].type) && src[i].negate)

478

return false;

479

}

480

481

return true;

482

}

483

484

bool

485

fs_inst::can_change_types() const

486

{

487

return dst.type == src[0].type &&

488

!src[0].abs && !src[0].negate && !saturate &&

489

(opcode == BRW_OPCODE_MOV ||

490

(opcode == BRW_OPCODE_SEL &&

491

dst.type == src[1].type &&

492

predicate != BRW_PREDICATE_NONE &&

493

!src[1].abs && !src[1].negate));

494

}

495

496

void

497

fs_reg::init()

498

{

499

memset((void*)this, 0, sizeof(*this));

500

type = BRW_REGISTER_TYPE_UD;

501

stride = 1;

502

}

503

504

/** Generic unset register constructor. */

505

fs_reg::fs_reg()

506

{

507

init();

508

this->file = BAD_FILE;

509

}

510

511

fs_reg::fs_reg(struct ::brw_reg reg) :

512

backend_reg(reg)

513

{

514

this->offset = 0;

515

this->stride = 1;

516

if (this->file == IMM &&

517

(this->type != BRW_REGISTER_TYPE_V &&

518

this->type != BRW_REGISTER_TYPE_UV &&

519

this->type != BRW_REGISTER_TYPE_VF)) {

520

this->stride = 0;

521

}

522

}

523

524

bool

525

fs_reg::equals(const fs_reg &r) const

526

{

527

return (this->backend_reg::equals(r) &&

528

stride == r.stride);

529

}

530

531

bool

532

fs_reg::negative_equals(const fs_reg &r) const

533

{

534

return (this->backend_reg::negative_equals(r) &&

535

stride == r.stride);

536

}

537

538

bool

539

fs_reg::is_contiguous() const

540

{

541

switch (file) {

542

case ARF:

543

case FIXED_GRF:

544

return hstride == BRW_HORIZONTAL_STRIDE_1 &&

545

vstride == width + hstride;

546

case MRF:

547

case VGRF:

548

case ATTR:

549

return stride == 1;

550

case UNIFORM:

551

case IMM:

552

case BAD_FILE:

553

return true;

554

}

555

556

unreachable("Invalid register file");

557

}

558

559

unsigned

560

fs_reg::component_size(unsigned width) const

561

{

562

const unsigned stride = ((file != ARF && file != FIXED_GRF) ? this->stride :

563

hstride == 0 ? 0 :

564

1 << (hstride - 1));

565

return MAX2(width * stride, 1) * type_sz(type);

566

}

567

568

/**

569

* Create a MOV to read the timestamp register.

570

571

fs_reg

572

fs_visitor::get_timestamp(const fs_builder &bld)

573

{

574

assert(devinfo->ver >= 7);

575

576

fs_reg ts = fs_reg(retype(brw_vec4_reg(BRW_ARCHITECTURE_REGISTER_FILE,

577

BRW_ARF_TIMESTAMP,

578

0),

579

BRW_REGISTER_TYPE_UD));

580

581

fs_reg dst = fs_reg(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);

582

583

/* We want to read the 3 fields we care about even if it's not enabled in

584

* the dispatch.

585

586

bld.group(4, 0).exec_all().MOV(dst, ts);

587

588

return dst;

589

}

590

591

void

592

fs_visitor::vfail(const char *format, va_list va)

593

{

594

char *msg;

595

596

if (failed)

597

return;

598

599

failed = true;

600

601

msg = ralloc_vasprintf(mem_ctx, format, va);

602

msg = ralloc_asprintf(mem_ctx, "SIMD%d %s compile failed: %s\n",

603

dispatch_width, stage_abbrev, msg);

604

605

this->fail_msg = msg;

606

607

if (unlikely(debug_enabled)) {

608

fprintf(stderr, "%s", msg);

609

}

610

}

611

612

void

613

fs_visitor::fail(const char *format, ...)

614

{

615

va_list va;

616

617

va_start(va, format);

618

vfail(format, va);

619

va_end(va);

620

}

621

622

/**

623

* Mark this program as impossible to compile with dispatch width greater

624

* than n.

625

626

* During the SIMD8 compile (which happens first), we can detect and flag

627

* things that are unsupported in SIMD16+ mode, so the compiler can skip the

628

* SIMD16+ compile altogether.

629

630

* During a compile of dispatch width greater than n (if one happens anyway),

631

* this just calls fail().

632

633

void

634

fs_visitor::limit_dispatch_width(unsigned n, const char *msg)

635

{

636

if (dispatch_width > n) {

637

fail("%s", msg);

638

} else {

639

max_dispatch_width = MIN2(max_dispatch_width, n);

640

brw_shader_perf_log(compiler, log_data,

641

"Shader dispatch width limited to SIMD%d: %s\n",

642

n, msg);

643

}

644

}

645

646

/**

647

* Returns true if the instruction has a flag that means it won't

648

* update an entire destination register.

649

650

* For example, dead code elimination and live variable analysis want to know

651

* when a write to a variable screens off any preceding values that were in

652

* it.

653

654

bool

655

fs_inst::is_partial_write() const

656

{

657

return ((this->predicate && this->opcode != BRW_OPCODE_SEL) ||

658

(this->exec_size * type_sz(this->dst.type)) < 32 ||

659

!this->dst.is_contiguous() ||

660

this->dst.offset % REG_SIZE != 0);

661

}

662

663

unsigned

664

fs_inst::components_read(unsigned i) const

665

{

666

/* Return zero if the source is not present. */

667

if (src[i].file == BAD_FILE)

668

return 0;

669

670

switch (opcode) {

671

case FS_OPCODE_LINTERP:

672

if (i == 0)

673

return 2;

674

else

675

return 1;

676

677

case FS_OPCODE_PIXEL_X:

678

case FS_OPCODE_PIXEL_Y:

679

assert(i < 2);

680

if (i == 0)

681

return 2;

682

else

683

return 1;

684

685

case FS_OPCODE_FB_WRITE_LOGICAL:

686

assert(src[FB_WRITE_LOGICAL_SRC_COMPONENTS].file == IMM);

687

/* First/second FB write color. */

688

if (i < 2)

689

return src[FB_WRITE_LOGICAL_SRC_COMPONENTS].ud;

690

else

691

return 1;

692

693

case SHADER_OPCODE_TEX_LOGICAL:

694

case SHADER_OPCODE_TXD_LOGICAL:

695

case SHADER_OPCODE_TXF_LOGICAL:

696

case SHADER_OPCODE_TXL_LOGICAL:

697

case SHADER_OPCODE_TXS_LOGICAL:

698

case SHADER_OPCODE_IMAGE_SIZE_LOGICAL:

699

case FS_OPCODE_TXB_LOGICAL:

700

case SHADER_OPCODE_TXF_CMS_LOGICAL:

701

case SHADER_OPCODE_TXF_CMS_W_LOGICAL:

702

case SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL:

703

case SHADER_OPCODE_TXF_UMS_LOGICAL:

704

case SHADER_OPCODE_TXF_MCS_LOGICAL:

705

case SHADER_OPCODE_LOD_LOGICAL:

706

case SHADER_OPCODE_TG4_LOGICAL:

707

case SHADER_OPCODE_TG4_OFFSET_LOGICAL:

708

case SHADER_OPCODE_SAMPLEINFO_LOGICAL:

709

assert(src[TEX_LOGICAL_SRC_COORD_COMPONENTS].file == IMM &&

710

src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].file == IMM);

711

/* Texture coordinates. */

712

if (i == TEX_LOGICAL_SRC_COORDINATE)

713

return src[TEX_LOGICAL_SRC_COORD_COMPONENTS].ud;

714

/* Texture derivatives. */

715

else if ((i == TEX_LOGICAL_SRC_LOD || i == TEX_LOGICAL_SRC_LOD2) &&

716

opcode == SHADER_OPCODE_TXD_LOGICAL)

717

return src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].ud;

718

/* Texture offset. */

719

else if (i == TEX_LOGICAL_SRC_TG4_OFFSET)

720

return 2;

721

/* MCS */

722

else if (i == TEX_LOGICAL_SRC_MCS) {

723

if (opcode == SHADER_OPCODE_TXF_CMS_W_LOGICAL)

724

return 2;

725

else if (opcode == SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL)

726

return 4;

727

else

728

return 1;

729

} else

730

return 1;

731

732

case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:

733

case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:

734

assert(src[SURFACE_LOGICAL_SRC_IMM_DIMS].file == IMM);

735

/* Surface coordinates. */

736

if (i == SURFACE_LOGICAL_SRC_ADDRESS)

737

return src[SURFACE_LOGICAL_SRC_IMM_DIMS].ud;

738

/* Surface operation source (ignored for reads). */

739

else if (i == SURFACE_LOGICAL_SRC_DATA)

740

return 0;

741

else

742

return 1;

743

744

case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:

745

case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:

746

assert(src[SURFACE_LOGICAL_SRC_IMM_DIMS].file == IMM &&

747

src[SURFACE_LOGICAL_SRC_IMM_ARG].file == IMM);

748

/* Surface coordinates. */

749

if (i == SURFACE_LOGICAL_SRC_ADDRESS)

750

return src[SURFACE_LOGICAL_SRC_IMM_DIMS].ud;

751

/* Surface operation source. */

752

else if (i == SURFACE_LOGICAL_SRC_DATA)

753

return src[SURFACE_LOGICAL_SRC_IMM_ARG].ud;

754

else

755

return 1;

756

757

case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL:

758

case SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL:

759

case SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL:

760

assert(src[2].file == IMM);

761

return 1;

762

763

case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL:

764

assert(src[2].file == IMM);

765

if (i == 1) { /* data to write */

766

const unsigned comps = src[2].ud / exec_size;

767

assert(comps > 0);

768

return comps;

769

} else {

770

return 1;

771

}

772

773

case SHADER_OPCODE_OWORD_BLOCK_READ_LOGICAL:

774

case SHADER_OPCODE_UNALIGNED_OWORD_BLOCK_READ_LOGICAL:

775

assert(src[SURFACE_LOGICAL_SRC_IMM_ARG].file == IMM);

776

return 1;

777

778

case SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL:

779

assert(src[SURFACE_LOGICAL_SRC_IMM_ARG].file == IMM);

780

if (i == SURFACE_LOGICAL_SRC_DATA) {

781

const unsigned comps = src[SURFACE_LOGICAL_SRC_IMM_ARG].ud / exec_size;

782

assert(comps > 0);

783

return comps;

784

} else {

785

return 1;

786

}

787

788

case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL:

789

assert(src[2].file == IMM);

790

return i == 1 ? src[2].ud : 1;

791

792

case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:

793

case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL:

794

case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL:

795

assert(src[2].file == IMM);

796

if (i == 1) {

797

/* Data source */

798

const unsigned op = src[2].ud;

799

switch (op) {

800

case BRW_AOP_INC:

801

case BRW_AOP_DEC:

802

case BRW_AOP_PREDEC:

803

return 0;

804

case BRW_AOP_CMPWR:

805

return 2;

806

default:

807

return 1;

808

}

809

} else {

810

return 1;

811

}

812

813

case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL:

814

case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL:

815

case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT64_LOGICAL:

816

assert(src[2].file == IMM);

817

if (i == 1) {

818

/* Data source */

819

const unsigned op = src[2].ud;

820

return op == BRW_AOP_FCMPWR ? 2 : 1;

821

} else {

822

return 1;

823

}

824

825

case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:

826

case SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL:

827

/* Scattered logical opcodes use the following params:

828

* src[0] Surface coordinates

829

* src[1] Surface operation source (ignored for reads)

830

* src[2] Surface

831

* src[3] IMM with always 1 dimension.

832

* src[4] IMM with arg bitsize for scattered read/write 8, 16, 32

833

834

assert(src[SURFACE_LOGICAL_SRC_IMM_DIMS].file == IMM &&

835

src[SURFACE_LOGICAL_SRC_IMM_ARG].file == IMM);

836

return i == SURFACE_LOGICAL_SRC_DATA ? 0 : 1;

837

838

case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:

839

case SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL:

840

assert(src[SURFACE_LOGICAL_SRC_IMM_DIMS].file == IMM &&

841

src[SURFACE_LOGICAL_SRC_IMM_ARG].file == IMM);

842

return 1;

843

844

case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:

845

case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: {

846

assert(src[SURFACE_LOGICAL_SRC_IMM_DIMS].file == IMM &&

847

src[SURFACE_LOGICAL_SRC_IMM_ARG].file == IMM);

848

const unsigned op = src[SURFACE_LOGICAL_SRC_IMM_ARG].ud;

849

/* Surface coordinates. */

850

if (i == SURFACE_LOGICAL_SRC_ADDRESS)

851

return src[SURFACE_LOGICAL_SRC_IMM_DIMS].ud;

852

/* Surface operation source. */

853

else if (i == SURFACE_LOGICAL_SRC_DATA && op == BRW_AOP_CMPWR)

854

return 2;

855

else if (i == SURFACE_LOGICAL_SRC_DATA &&

856

(op == BRW_AOP_INC || op == BRW_AOP_DEC || op == BRW_AOP_PREDEC))

857

return 0;

858

else

859

return 1;

860

}

861

case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:

862

return (i == 0 ? 2 : 1);

863

864

case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL: {

865

assert(src[SURFACE_LOGICAL_SRC_IMM_DIMS].file == IMM &&

866

src[SURFACE_LOGICAL_SRC_IMM_ARG].file == IMM);

867

const unsigned op = src[SURFACE_LOGICAL_SRC_IMM_ARG].ud;

868

/* Surface coordinates. */

869

if (i == SURFACE_LOGICAL_SRC_ADDRESS)

870

return src[SURFACE_LOGICAL_SRC_IMM_DIMS].ud;

871

/* Surface operation source. */

872

else if (i == SURFACE_LOGICAL_SRC_DATA && op == BRW_AOP_FCMPWR)

873

return 2;

874

else

875

return 1;

876

}

877

878

default:

879

return 1;

880

}

881

}

882

883

unsigned

884

fs_inst::size_read(int arg) const

885

{

886

switch (opcode) {

887

case SHADER_OPCODE_SEND:

888

if (arg == 2) {

889

return mlen * REG_SIZE;

890

} else if (arg == 3) {

891

return ex_mlen * REG_SIZE;

892

}

893

break;

894

895

case FS_OPCODE_FB_WRITE:

896

case FS_OPCODE_REP_FB_WRITE:

897

if (arg == 0) {

898

if (base_mrf >= 0)

899

return src[0].file == BAD_FILE ? 0 : 2 * REG_SIZE;

900

else

901

return mlen * REG_SIZE;

902

}

903

break;

904

905

case FS_OPCODE_FB_READ:

906

case SHADER_OPCODE_URB_WRITE_SIMD8:

907

case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:

908

case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:

909

case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:

910

case SHADER_OPCODE_URB_READ_SIMD8:

911

case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:

912

case FS_OPCODE_INTERPOLATE_AT_SAMPLE:

913

case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:

914

if (arg == 0)

915

return mlen * REG_SIZE;

916

break;

917

918

case FS_OPCODE_SET_SAMPLE_ID:

919

if (arg == 1)

920

return 1;

921

break;

922

923

case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GFX7:

924

/* The payload is actually stored in src1 */

925

if (arg == 1)

926

return mlen * REG_SIZE;

927

break;

928

929

case FS_OPCODE_LINTERP:

930

if (arg == 1)

931

return 16;

932

break;

933

934

case SHADER_OPCODE_LOAD_PAYLOAD:

935

if (arg < this->header_size)

936

return REG_SIZE;

937

break;

938

939

case CS_OPCODE_CS_TERMINATE:

940

case SHADER_OPCODE_BARRIER:

941

return REG_SIZE;

942

943

case SHADER_OPCODE_MOV_INDIRECT:

944

if (arg == 0) {

945

assert(src[2].file == IMM);

946

return src[2].ud;

947

}

948

break;

949

950

default:

951

if (is_tex() && arg == 0 && src[0].file == VGRF)

952

return mlen * REG_SIZE;

953

break;

954

}

955

956

switch (src[arg].file) {

957

case UNIFORM:

958

case IMM:

959

return components_read(arg) * type_sz(src[arg].type);

960

case BAD_FILE:

961

case ARF:

962

case FIXED_GRF:

963

case VGRF:

964

case ATTR:

965

return components_read(arg) * src[arg].component_size(exec_size);

966

case MRF:

967

unreachable("MRF registers are not allowed as sources");

968

}

969

return 0;

970

}

971

972

namespace {

973

unsigned

974

predicate_width(brw_predicate predicate)

975

{

976

switch (predicate) {

977

case BRW_PREDICATE_NONE: return 1;

978

case BRW_PREDICATE_NORMAL: return 1;

979

case BRW_PREDICATE_ALIGN1_ANY2H: return 2;

980

case BRW_PREDICATE_ALIGN1_ALL2H: return 2;

981

case BRW_PREDICATE_ALIGN1_ANY4H: return 4;

982

case BRW_PREDICATE_ALIGN1_ALL4H: return 4;

983

case BRW_PREDICATE_ALIGN1_ANY8H: return 8;

984

case BRW_PREDICATE_ALIGN1_ALL8H: return 8;

985

case BRW_PREDICATE_ALIGN1_ANY16H: return 16;

986

case BRW_PREDICATE_ALIGN1_ALL16H: return 16;

987

case BRW_PREDICATE_ALIGN1_ANY32H: return 32;

988

case BRW_PREDICATE_ALIGN1_ALL32H: return 32;

989

default: unreachable("Unsupported predicate");

990

}

991

}

992

993

/* Return the subset of flag registers that an instruction could

994

* potentially read or write based on the execution controls and flag

995

* subregister number of the instruction.

996

997

unsigned

998

flag_mask(const fs_inst *inst, unsigned width)

999

{

1000

assert(util_is_power_of_two_nonzero(width));

1001

const unsigned start = (inst->flag_subreg * 16 + inst->group) &

1002

~(width - 1);

1003

const unsigned end = start + ALIGN(inst->exec_size, width);

1004

return ((1 << DIV_ROUND_UP(end, 8)) - 1) & ~((1 << (start / 8)) - 1);

1005

}

1006

1007

unsigned

1008

bit_mask(unsigned n)

1009

{

1010

return (n >= CHAR_BIT * sizeof(bit_mask(n)) ? ~0u : (1u << n) - 1);

1011

}

1012

1013

unsigned

1014

flag_mask(const fs_reg &r, unsigned sz)

1015

{

1016

if (r.file == ARF) {

1017

const unsigned start = (r.nr - BRW_ARF_FLAG) * 4 + r.subnr;

1018

const unsigned end = start + sz;

1019

return bit_mask(end) & ~bit_mask(start);

1020

} else {

1021

return 0;

1022

}

1023

}

1024

}

1025

1026

unsigned

1027

fs_inst::flags_read(const intel_device_info *devinfo) const

1028

{

1029

if (predicate == BRW_PREDICATE_ALIGN1_ANYV ||

1030

predicate == BRW_PREDICATE_ALIGN1_ALLV) {

1031

/* The vertical predication modes combine corresponding bits from

1032

* f0.0 and f1.0 on Gfx7+, and f0.0 and f0.1 on older hardware.

1033

1034

const unsigned shift = devinfo->ver >= 7 ? 4 : 2;

1035

return flag_mask(this, 1) << shift | flag_mask(this, 1);

1036

} else if (predicate) {

1037

return flag_mask(this, predicate_width(predicate));

1038

} else {

1039

unsigned mask = 0;

1040

for (int i = 0; i < sources; i++) {

1041

mask |= flag_mask(src[i], size_read(i));

1042

}

1043

return mask;

1044

}

1045

}

1046

1047

unsigned

1048

fs_inst::flags_written(const intel_device_info *devinfo) const

1049

{

1050

/* On Gfx4 and Gfx5, sel.l (for min) and sel.ge (for max) are implemented

1051

* using a separte cmpn and sel instruction. This lowering occurs in

1052

* fs_vistor::lower_minmax which is called very, very late.

1053

1054

if ((conditional_mod && ((opcode != BRW_OPCODE_SEL || devinfo->ver <= 5) &&

1055

opcode != BRW_OPCODE_CSEL &&

1056

opcode != BRW_OPCODE_IF &&

1057

opcode != BRW_OPCODE_WHILE)) ||

1058

opcode == FS_OPCODE_FB_WRITE) {

1059

return flag_mask(this, 1);

1060

} else if (opcode == SHADER_OPCODE_FIND_LIVE_CHANNEL ||

1061

opcode == SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL ||

1062

opcode == FS_OPCODE_LOAD_LIVE_CHANNELS) {

1063

return flag_mask(this, 32);

1064

} else {

1065

return flag_mask(dst, size_written);

1066

}

1067

}

1068

1069

/**

1070

* Returns how many MRFs an FS opcode will write over.

1071

1072

* Note that this is not the 0 or 1 implied writes in an actual gen

1073

* instruction -- the FS opcodes often generate MOVs in addition.

1074

1075

unsigned

1076

fs_inst::implied_mrf_writes() const

1077

{

1078

if (mlen == 0)

1079

return 0;

1080

1081

if (base_mrf == -1)

1082

return 0;

1083

1084

switch (opcode) {

1085

case SHADER_OPCODE_RCP:

1086

case SHADER_OPCODE_RSQ:

1087

case SHADER_OPCODE_SQRT:

1088

case SHADER_OPCODE_EXP2:

1089

case SHADER_OPCODE_LOG2:

1090

case SHADER_OPCODE_SIN:

1091

case SHADER_OPCODE_COS:

1092

return 1 * exec_size / 8;

1093

case SHADER_OPCODE_POW:

1094

case SHADER_OPCODE_INT_QUOTIENT:

1095

case SHADER_OPCODE_INT_REMAINDER:

1096

return 2 * exec_size / 8;

1097

case SHADER_OPCODE_TEX:

1098

case FS_OPCODE_TXB:

1099

case SHADER_OPCODE_TXD:

1100

case SHADER_OPCODE_TXF:

1101

case SHADER_OPCODE_TXF_CMS:

1102

case SHADER_OPCODE_TXF_MCS:

1103

case SHADER_OPCODE_TG4:

1104

case SHADER_OPCODE_TG4_OFFSET:

1105

case SHADER_OPCODE_TXL:

1106

case SHADER_OPCODE_TXS:

1107

case SHADER_OPCODE_LOD:

1108

case SHADER_OPCODE_SAMPLEINFO:

1109

return 1;

1110

case FS_OPCODE_FB_WRITE:

1111

case FS_OPCODE_REP_FB_WRITE:

1112

return src[0].file == BAD_FILE ? 0 : 2;

1113

case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:

1114

case SHADER_OPCODE_GFX4_SCRATCH_READ:

1115

return 1;

1116

case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GFX4:

1117

return mlen;

1118

case SHADER_OPCODE_GFX4_SCRATCH_WRITE:

1119

return mlen;

1120

default:

1121

unreachable("not reached");

1122

}

1123

}

1124

1125

fs_reg

1126

fs_visitor::vgrf(const glsl_type *const type)

1127

{

1128

int reg_width = dispatch_width / 8;

1129

return fs_reg(VGRF,

1130

alloc.allocate(glsl_count_dword_slots(type, false) * reg_width),

1131

brw_type_for_base_type(type));

1132

}

1133

1134

fs_reg::fs_reg(enum brw_reg_file file, int nr)

1135

{

1136

init();

1137

this->file = file;

1138

this->nr = nr;

1139

this->type = BRW_REGISTER_TYPE_F;

1140

this->stride = (file == UNIFORM ? 0 : 1);

1141

}

1142

1143

fs_reg::fs_reg(enum brw_reg_file file, int nr, enum brw_reg_type type)

1144

{

1145

init();

1146

this->file = file;

1147

this->nr = nr;

1148

this->type = type;

1149

this->stride = (file == UNIFORM ? 0 : 1);

1150

}

1151

1152

/* For SIMD16, we need to follow from the uniform setup of SIMD8 dispatch.

1153

* This brings in those uniform definitions

1154

1155

void

1156

fs_visitor::import_uniforms(fs_visitor *v)

1157

{

1158

this->push_constant_loc = v->push_constant_loc;

1159

this->uniforms = v->uniforms;

1160

this->subgroup_id = v->subgroup_id;

1161

for (unsigned i = 0; i < ARRAY_SIZE(this->group_size); i++)

1162

this->group_size[i] = v->group_size[i];

1163

}

1164

1165

void

1166

fs_visitor::emit_fragcoord_interpolation(fs_reg wpos)

1167

{

1168

assert(stage == MESA_SHADER_FRAGMENT);

1169

1170

/* gl_FragCoord.x */

1171

bld.MOV(wpos, this->pixel_x);

1172

wpos = offset(wpos, bld, 1);

1173

1174

/* gl_FragCoord.y */

1175

bld.MOV(wpos, this->pixel_y);

1176

wpos = offset(wpos, bld, 1);

1177

1178

/* gl_FragCoord.z */

1179

if (devinfo->ver >= 6) {

1180

bld.MOV(wpos, this->pixel_z);

1181

} else {

1182

bld.emit(FS_OPCODE_LINTERP, wpos,

1183

this->delta_xy[BRW_BARYCENTRIC_PERSPECTIVE_PIXEL],

1184

component(interp_reg(VARYING_SLOT_POS, 2), 0));

1185

}

1186

wpos = offset(wpos, bld, 1);

1187

1188

/* gl_FragCoord.w: Already set up in emit_interpolation */

1189

bld.MOV(wpos, this->wpos_w);

1190

}

1191

1192

enum brw_barycentric_mode

1193

brw_barycentric_mode(enum glsl_interp_mode mode, nir_intrinsic_op op)

1194

{

1195

/* Barycentric modes don't make sense for flat inputs. */

1196

assert(mode != INTERP_MODE_FLAT);

1197

1198

unsigned bary;

1199

switch (op) {

1200

case nir_intrinsic_load_barycentric_pixel:

1201

case nir_intrinsic_load_barycentric_at_offset:

1202

bary = BRW_BARYCENTRIC_PERSPECTIVE_PIXEL;

1203

break;

1204

case nir_intrinsic_load_barycentric_centroid:

1205

bary = BRW_BARYCENTRIC_PERSPECTIVE_CENTROID;

1206

break;

1207

case nir_intrinsic_load_barycentric_sample:

1208

case nir_intrinsic_load_barycentric_at_sample:

1209

bary = BRW_BARYCENTRIC_PERSPECTIVE_SAMPLE;

1210

break;

1211

default:

1212

unreachable("invalid intrinsic");

1213

}

1214

1215

if (mode == INTERP_MODE_NOPERSPECTIVE)

1216

bary += 3;

1217

1218

return (enum brw_barycentric_mode) bary;

1219

}

1220

1221

/**

1222

* Turn one of the two CENTROID barycentric modes into PIXEL mode.

1223

1224

static enum brw_barycentric_mode

1225

centroid_to_pixel(enum brw_barycentric_mode bary)

1226

{

1227

assert(bary == BRW_BARYCENTRIC_PERSPECTIVE_CENTROID ||

1228

bary == BRW_BARYCENTRIC_NONPERSPECTIVE_CENTROID);

1229

return (enum brw_barycentric_mode) ((unsigned) bary - 1);

1230

}

1231

1232

fs_reg

1233

fs_visitor::emit_frontfacing_interpolation()

1234

{

1235

fs_reg ff = bld.vgrf(BRW_REGISTER_TYPE_D);

1236

1237

if (devinfo->ver >= 12) {

1238

fs_reg g1 = fs_reg(retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_W));

1239

1240

fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_W);

1241

bld.ASR(tmp, g1, brw_imm_d(15));

1242

bld.NOT(ff, tmp);

1243

} else if (devinfo->ver >= 6) {

1244

/* Bit 15 of g0.0 is 0 if the polygon is front facing. We want to create

1245

* a boolean result from this (~0/true or 0/false).

1246

1247

* We can use the fact that bit 15 is the MSB of g0.0:W to accomplish

1248

* this task in only one instruction:

1249

* - a negation source modifier will flip the bit; and

1250

* - a W -> D type conversion will sign extend the bit into the high

1251

* word of the destination.

1252

1253

* An ASR 15 fills the low word of the destination.

1254

1255

fs_reg g0 = fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_W));

1256

g0.negate = true;

1257

1258

bld.ASR(ff, g0, brw_imm_d(15));

1259

} else {

1260

/* Bit 31 of g1.6 is 0 if the polygon is front facing. We want to create

1261

* a boolean result from this (1/true or 0/false).

1262

1263

* Like in the above case, since the bit is the MSB of g1.6:UD we can use

1264

* the negation source modifier to flip it. Unfortunately the SHR

1265

* instruction only operates on UD (or D with an abs source modifier)

1266

* sources without negation.

1267

1268

* Instead, use ASR (which will give ~0/true or 0/false).

1269

1270

fs_reg g1_6 = fs_reg(retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_D));

1271

g1_6.negate = true;

1272

1273

bld.ASR(ff, g1_6, brw_imm_d(31));

1274

}

1275

1276

return ff;

1277

}

1278

1279

fs_reg

1280

fs_visitor::emit_samplepos_setup()

1281

{

1282

assert(stage == MESA_SHADER_FRAGMENT);

1283

struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(this->prog_data);

1284

assert(devinfo->ver >= 6);

1285

1286

const fs_builder abld = bld.annotate("compute sample position");

1287

fs_reg pos = abld.vgrf(BRW_REGISTER_TYPE_F, 2);

1288

1289

if (!wm_prog_data->persample_dispatch) {

1290

/* From ARB_sample_shading specification:

1291

* "When rendering to a non-multisample buffer, or if multisample

1292

* rasterization is disabled, gl_SamplePosition will always be

1293

* (0.5, 0.5).

1294

1295

bld.MOV(offset(pos, bld, 0), brw_imm_f(0.5f));

1296

bld.MOV(offset(pos, bld, 1), brw_imm_f(0.5f));

1297

return pos;

1298

}

1299

1300

/* WM will be run in MSDISPMODE_PERSAMPLE. So, only one of SIMD8 or SIMD16

1301

* mode will be enabled.

1302

1303

* From the Ivy Bridge PRM, volume 2 part 1, page 344:

1304

* R31.1:0 Position Offset X/Y for Slot[3:0]

1305

* R31.3:2 Position Offset X/Y for Slot[7:4]

1306

* .....

1307

1308

* The X, Y sample positions come in as bytes in thread payload. So, read

1309

* the positions using vstride=16, width=8, hstride=2.

1310

1311

const fs_reg sample_pos_reg =

1312

fetch_payload_reg(abld, payload.sample_pos_reg, BRW_REGISTER_TYPE_W);

1313

1314

for (unsigned i = 0; i < 2; i++) {

1315

fs_reg tmp_d = bld.vgrf(BRW_REGISTER_TYPE_D);

1316

abld.MOV(tmp_d, subscript(sample_pos_reg, BRW_REGISTER_TYPE_B, i));

1317

/* Convert int_sample_pos to floating point */

1318

fs_reg tmp_f = bld.vgrf(BRW_REGISTER_TYPE_F);

1319

abld.MOV(tmp_f, tmp_d);

1320

/* Scale to the range [0, 1] */

1321

abld.MUL(offset(pos, abld, i), tmp_f, brw_imm_f(1 / 16.0f));

1322

}

1323

1324

return pos;

1325

}

1326

1327

fs_reg

1328

fs_visitor::emit_sampleid_setup()

1329

{

1330

assert(stage == MESA_SHADER_FRAGMENT);

1331

brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;

1332

assert(devinfo->ver >= 6);

1333

1334

const fs_builder abld = bld.annotate("compute sample id");

1335

fs_reg sample_id = abld.vgrf(BRW_REGISTER_TYPE_UD);

1336

1337

if (!key->multisample_fbo) {

1338

/* As per GL_ARB_sample_shading specification:

1339

* "When rendering to a non-multisample buffer, or if multisample

1340

* rasterization is disabled, gl_SampleID will always be zero."

1341

1342

abld.MOV(sample_id, brw_imm_d(0));

1343

} else if (devinfo->ver >= 8) {

1344

/* Sample ID comes in as 4-bit numbers in g1.0:

1345

1346

* 15:12 Slot 3 SampleID (only used in SIMD16)

1347

* 11:8 Slot 2 SampleID (only used in SIMD16)

1348

* 7:4 Slot 1 SampleID

1349

* 3:0 Slot 0 SampleID

1350

1351

* Each slot corresponds to four channels, so we want to replicate each

1352

* half-byte value to 4 channels in a row:

1353

1354

* dst+0: .7 .6 .5 .4 .3 .2 .1 .0

1355

* 7:4 7:4 7:4 7:4 3:0 3:0 3:0 3:0

1356

1357

* dst+1: .7 .6 .5 .4 .3 .2 .1 .0 (if SIMD16)

1358

* 15:12 15:12 15:12 15:12 11:8 11:8 11:8 11:8

1359

1360

* First, we read g1.0 with a <1,8,0>UB region, causing the first 8

1361

* channels to read the first byte (7:0), and the second group of 8

1362

* channels to read the second byte (15:8). Then, we shift right by

1363

* a vector immediate of <4, 4, 4, 4, 0, 0, 0, 0>, moving the slot 1 / 3

1364

* values into place. Finally, we AND with 0xf to keep the low nibble.

1365

1366

* shr(16) tmp<1>W g1.0<1,8,0>B 0x44440000:V

1367

* and(16) dst<1>D tmp<8,8,1>W 0xf:W

1368

1369

* TODO: These payload bits exist on Gfx7 too, but they appear to always

1370

* be zero, so this code fails to work. We should find out why.

1371

1372

const fs_reg tmp = abld.vgrf(BRW_REGISTER_TYPE_UW);

1373

1374

for (unsigned i = 0; i < DIV_ROUND_UP(dispatch_width, 16); i++) {

1375

const fs_builder hbld = abld.group(MIN2(16, dispatch_width), i);

1376

hbld.SHR(offset(tmp, hbld, i),

1377

stride(retype(brw_vec1_grf(1 + i, 0), BRW_REGISTER_TYPE_UB),

1378

1, 8, 0),

1379

brw_imm_v(0x44440000));

1380

}

1381

1382

abld.AND(sample_id, tmp, brw_imm_w(0xf));

1383

} else {

1384

const fs_reg t1 = component(abld.vgrf(BRW_REGISTER_TYPE_UD), 0);

1385

const fs_reg t2 = abld.vgrf(BRW_REGISTER_TYPE_UW);

1386

1387

/* The PS will be run in MSDISPMODE_PERSAMPLE. For example with

1388

* 8x multisampling, subspan 0 will represent sample N (where N

1389

* is 0, 2, 4 or 6), subspan 1 will represent sample 1, 3, 5 or

1390

* 7. We can find the value of N by looking at R0.0 bits 7:6

1391

* ("Starting Sample Pair Index (SSPI)") and multiplying by two

1392

* (since samples are always delivered in pairs). That is, we

1393

* compute 2*((R0.0 & 0xc0) >> 6) == (R0.0 & 0xc0) >> 5. Then

1394

* we need to add N to the sequence (0, 0, 0, 0, 1, 1, 1, 1) in

1395

* case of SIMD8 and sequence (0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2,

1396

* 2, 3, 3, 3, 3) in case of SIMD16. We compute this sequence by

1397

* populating a temporary variable with the sequence (0, 1, 2, 3),

1398

* and then reading from it using vstride=1, width=4, hstride=0.

1399

* These computations hold good for 4x multisampling as well.

1400

1401

* For 2x MSAA and SIMD16, we want to use the sequence (0, 1, 0, 1):

1402

* the first four slots are sample 0 of subspan 0; the next four

1403

* are sample 1 of subspan 0; the third group is sample 0 of

1404

* subspan 1, and finally sample 1 of subspan 1.

1405

1406

1407

/* SKL+ has an extra bit for the Starting Sample Pair Index to

1408

* accomodate 16x MSAA.

1409

1410

abld.exec_all().group(1, 0)

1411

.AND(t1, fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD)),

1412

brw_imm_ud(0xc0));

1413

abld.exec_all().group(1, 0).SHR(t1, t1, brw_imm_d(5));

1414

1415

/* This works for SIMD8-SIMD16. It also works for SIMD32 but only if we

1416

* can assume 4x MSAA. Disallow it on IVB+

1417

1418

* FINISHME: One day, we could come up with a way to do this that

1419

* actually works on gfx7.

1420

1421

if (devinfo->ver >= 7)

1422

limit_dispatch_width(16, "gl_SampleId is unsupported in SIMD32 on gfx7");

1423

abld.exec_all().group(8, 0).MOV(t2, brw_imm_v(0x32103210));

1424

1425

/* This special instruction takes care of setting vstride=1,

1426

* width=4, hstride=0 of t2 during an ADD instruction.

1427

1428

abld.emit(FS_OPCODE_SET_SAMPLE_ID, sample_id, t1, t2);

1429

}

1430

1431

return sample_id;

1432

}

1433

1434

fs_reg

1435

fs_visitor::emit_samplemaskin_setup()

1436

{

1437

assert(stage == MESA_SHADER_FRAGMENT);

1438

struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(this->prog_data);

1439

assert(devinfo->ver >= 6);

1440

1441

fs_reg mask = bld.vgrf(BRW_REGISTER_TYPE_D);

1442

1443

/* The HW doesn't provide us with expected values. */

1444

assert(!wm_prog_data->per_coarse_pixel_dispatch);

1445

1446

fs_reg coverage_mask =

1447

fetch_payload_reg(bld, payload.sample_mask_in_reg, BRW_REGISTER_TYPE_D);

1448

1449

if (wm_prog_data->persample_dispatch) {

1450

/* gl_SampleMaskIn[] comes from two sources: the input coverage mask,

1451

* and a mask representing which sample is being processed by the

1452

* current shader invocation.

1453

1454

* From the OES_sample_variables specification:

1455

* "When per-sample shading is active due to the use of a fragment input

1456

* qualified by "sample" or due to the use of the gl_SampleID or

1457

* gl_SamplePosition variables, only the bit for the current sample is

1458

* set in gl_SampleMaskIn."

1459

1460

const fs_builder abld = bld.annotate("compute gl_SampleMaskIn");

1461

1462

if (nir_system_values[SYSTEM_VALUE_SAMPLE_ID].file == BAD_FILE)

1463

nir_system_values[SYSTEM_VALUE_SAMPLE_ID] = emit_sampleid_setup();

1464

1465

fs_reg one = vgrf(glsl_type::int_type);

1466

fs_reg enabled_mask = vgrf(glsl_type::int_type);

1467

abld.MOV(one, brw_imm_d(1));

1468

abld.SHL(enabled_mask, one, nir_system_values[SYSTEM_VALUE_SAMPLE_ID]);

1469

abld.AND(mask, enabled_mask, coverage_mask);

1470

} else {

1471

/* In per-pixel mode, the coverage mask is sufficient. */

1472

mask = coverage_mask;

1473

}

1474

return mask;

1475

}

1476

1477

fs_reg

1478

fs_visitor::emit_shading_rate_setup()

1479

{

1480

assert(devinfo->ver >= 11);

1481

1482

const fs_builder abld = bld.annotate("compute fragment shading rate");

1483

fs_reg rate = abld.vgrf(BRW_REGISTER_TYPE_UD);

1484

1485

struct brw_wm_prog_data *wm_prog_data =

1486

brw_wm_prog_data(bld.shader->stage_prog_data);

1487

1488

/* Coarse pixel shading size fields overlap with other fields of not in

1489

* coarse pixel dispatch mode, so report 0 when that's not the case.

1490

1491

if (wm_prog_data->per_coarse_pixel_dispatch) {

1492

/* The shading rates provided in the shader are the actual 2D shading

1493

* rate while the SPIR-V built-in is the enum value that has the shading

1494

* rate encoded as a bitfield. Fortunately, the bitfield value is just

1495

* the shading rate divided by two and shifted.

1496

1497

1498

/* r1.0 - 0:7 ActualCoarsePixelShadingSize.X */

1499

fs_reg actual_x = fs_reg(retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UB));

1500

/* r1.0 - 15:8 ActualCoarsePixelShadingSize.Y */

1501

fs_reg actual_y = byte_offset(actual_x, 1);

1502

1503

fs_reg int_rate_x = bld.vgrf(BRW_REGISTER_TYPE_UD);

1504

fs_reg int_rate_y = bld.vgrf(BRW_REGISTER_TYPE_UD);

1505

1506

abld.SHR(int_rate_y, actual_y, brw_imm_ud(1));

1507

abld.SHR(int_rate_x, actual_x, brw_imm_ud(1));

1508

abld.SHL(int_rate_x, int_rate_x, brw_imm_ud(2));

1509

abld.OR(rate, int_rate_x, int_rate_y);

1510

} else {

1511

abld.MOV(rate, brw_imm_ud(0));

1512

}

1513

1514

return rate;

1515

}

1516

1517

fs_reg

1518

fs_visitor::resolve_source_modifiers(const fs_reg &src)

1519

{

1520

if (!src.abs && !src.negate)

1521

return src;

1522

1523

fs_reg temp = bld.vgrf(src.type);

1524

bld.MOV(temp, src);

1525

1526

return temp;

1527

}

1528

1529

void

1530

fs_visitor::emit_gs_thread_end()

1531

{

1532

assert(stage == MESA_SHADER_GEOMETRY);

1533

1534

struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(prog_data);

1535

1536

if (gs_compile->control_data_header_size_bits > 0) {

1537

emit_gs_control_data_bits(this->final_gs_vertex_count);

1538

}

1539

1540

const fs_builder abld = bld.annotate("thread end");

1541

fs_inst *inst;

1542

1543

if (gs_prog_data->static_vertex_count != -1) {

1544

foreach_in_list_reverse(fs_inst, prev, &this->instructions) {

1545

if (prev->opcode == SHADER_OPCODE_URB_WRITE_SIMD8 ||

1546

prev->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_MASKED ||

1547

prev->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT ||

1548

prev->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT) {

1549

prev->eot = true;

1550

1551

/* Delete now dead instructions. */

1552

foreach_in_list_reverse_safe(exec_node, dead, &this->instructions) {

1553

if (dead == prev)

1554

break;

1555

dead->remove();

1556

}

1557

return;

1558

} else if (prev->is_control_flow() || prev->has_side_effects()) {

1559

break;

1560

}

1561

}

1562

fs_reg hdr = abld.vgrf(BRW_REGISTER_TYPE_UD, 1);

1563

abld.MOV(hdr, fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD)));

1564

inst = abld.emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, hdr);

1565

inst->mlen = 1;

1566

} else {

1567

fs_reg payload = abld.vgrf(BRW_REGISTER_TYPE_UD, 2);

1568

fs_reg *sources = ralloc_array(mem_ctx, fs_reg, 2);

1569

sources[0] = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));

1570

sources[1] = this->final_gs_vertex_count;

1571

abld.LOAD_PAYLOAD(payload, sources, 2, 2);

1572

inst = abld.emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, payload);

1573

inst->mlen = 2;

1574

}

1575

inst->eot = true;

1576

inst->offset = 0;

1577

}

1578

1579

void

1580

fs_visitor::assign_curb_setup()

1581

{

1582

unsigned uniform_push_length = DIV_ROUND_UP(stage_prog_data->nr_params, 8);

1583

1584

unsigned ubo_push_length = 0;

1585

unsigned ubo_push_start[4];

1586

for (int i = 0; i < 4; i++) {

1587

ubo_push_start[i] = 8 * (ubo_push_length + uniform_push_length);

1588

ubo_push_length += stage_prog_data->ubo_ranges[i].length;

1589

}

1590

1591

prog_data->curb_read_length = uniform_push_length + ubo_push_length;

1592

1593

uint64_t used = 0;

1594

bool is_compute = gl_shader_stage_is_compute(stage);

1595

1596

if (is_compute && brw_cs_prog_data(prog_data)->uses_inline_data) {

1597

/* With COMPUTE_WALKER, we can push up to one register worth of data via

1598

* the inline data parameter in the COMPUTE_WALKER command itself.

1599

1600

* TODO: Support inline data and push at the same time.

1601

1602

assert(devinfo->verx10 >= 125);

1603

assert(uniform_push_length <= 1);

1604

} else if (is_compute && devinfo->verx10 >= 125) {

1605

fs_builder ubld = bld.exec_all().group(8, 0).at(

1606

cfg->first_block(), cfg->first_block()->start());

1607

1608

/* The base address for our push data is passed in as R0.0[31:6]. We

1609

* have to mask off the bottom 6 bits.

1610

1611

fs_reg base_addr = ubld.vgrf(BRW_REGISTER_TYPE_UD);

1612

ubld.group(1, 0).AND(base_addr,

1613

retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD),

1614

brw_imm_ud(INTEL_MASK(31, 6)));

1615

1616

fs_reg header0 = ubld.vgrf(BRW_REGISTER_TYPE_UD);

1617

ubld.MOV(header0, brw_imm_ud(0));

1618

ubld.group(1, 0).SHR(component(header0, 2), base_addr, brw_imm_ud(4));

1619

1620

/* On Gfx12-HP we load constants at the start of the program using A32

1621

* stateless messages.

1622

1623

for (unsigned i = 0; i < uniform_push_length;) {

1624

/* Limit ourselves to HW limit of 8 Owords (8 * 16bytes = 128 bytes

1625

* or 4 registers).

1626

1627

unsigned num_regs = MIN2(uniform_push_length - i, 4);

1628

assert(num_regs > 0);

1629

num_regs = 1 << util_logbase2(num_regs);

1630

1631

fs_reg header;

1632

if (i == 0) {

1633

header = header0;

1634

} else {

1635

header = ubld.vgrf(BRW_REGISTER_TYPE_UD);

1636

ubld.MOV(header, brw_imm_ud(0));

1637

ubld.group(1, 0).ADD(component(header, 2),

1638

component(header0, 2),

1639

brw_imm_ud(i * 2));

1640

}

1641

1642

fs_reg srcs[4] = {

1643

brw_imm_ud(0), /* desc */

1644

brw_imm_ud(0), /* ex_desc */

1645

header, /* payload */

1646

fs_reg(), /* payload2 */

1647

};

1648

1649

fs_reg dest = retype(brw_vec8_grf(payload.num_regs + i, 0),

1650

BRW_REGISTER_TYPE_UD);

1651

1652

/* This instruction has to be run SIMD16 if we're filling more than a

1653

* single register.

1654

1655

unsigned send_width = MIN2(16, num_regs * 8);

1656

1657

fs_inst *send = ubld.group(send_width, 0).emit(SHADER_OPCODE_SEND,

1658

dest, srcs, 4);

1659

send->sfid = GFX7_SFID_DATAPORT_DATA_CACHE;

1660

send->desc = brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,

1661

GFX7_DATAPORT_DC_OWORD_BLOCK_READ,

1662

BRW_DATAPORT_OWORD_BLOCK_OWORDS(num_regs * 2));

1663

send->header_size = 1;

1664

send->mlen = 1;

1665

send->size_written = num_regs * REG_SIZE;

1666

send->send_is_volatile = true;

1667

1668

i += num_regs;

1669

}

1670

1671

invalidate_analysis(DEPENDENCY_INSTRUCTIONS);

1672

}

1673

1674

/* Map the offsets in the UNIFORM file to fixed HW regs. */

1675

foreach_block_and_inst(block, fs_inst, inst, cfg) {

1676

for (unsigned int i = 0; i < inst->sources; i++) {

1677

if (inst->src[i].file == UNIFORM) {

1678

int uniform_nr = inst->src[i].nr + inst->src[i].offset / 4;

1679

int constant_nr;

1680

if (inst->src[i].nr >= UBO_START) {

1681

/* constant_nr is in 32-bit units, the rest are in bytes */

1682

constant_nr = ubo_push_start[inst->src[i].nr - UBO_START] +

1683

inst->src[i].offset / 4;

1684

} else if (uniform_nr >= 0 && uniform_nr < (int) uniforms) {

1685

constant_nr = push_constant_loc[uniform_nr];

1686

} else {

1687

/* Section 5.11 of the OpenGL 4.1 spec says:

1688

* "Out-of-bounds reads return undefined values, which include

1689

* values from other variables of the active program or zero."

1690

* Just return the first push constant.

1691

1692

constant_nr = 0;

1693

}

1694

1695

assert(constant_nr / 8 < 64);

1696

used |= BITFIELD64_BIT(constant_nr / 8);

1697

1698

struct brw_reg brw_reg = brw_vec1_grf(payload.num_regs +

1699

constant_nr / 8,

1700

constant_nr % 8);

1701

brw_reg.abs = inst->src[i].abs;

1702

brw_reg.negate = inst->src[i].negate;

1703

1704

assert(inst->src[i].stride == 0);

1705

inst->src[i] = byte_offset(

1706

retype(brw_reg, inst->src[i].type),

1707

inst->src[i].offset % 4);

1708

}

1709

}

1710

}

1711

1712

uint64_t want_zero = used & stage_prog_data->zero_push_reg;

1713

if (want_zero) {

1714

fs_builder ubld = bld.exec_all().group(8, 0).at(

1715

cfg->first_block(), cfg->first_block()->start());

1716

1717

/* push_reg_mask_param is in 32-bit units */

1718

unsigned mask_param = stage_prog_data->push_reg_mask_param;

1719

struct brw_reg mask = brw_vec1_grf(payload.num_regs + mask_param / 8,

1720

mask_param % 8);

1721

1722

fs_reg b32;

1723

for (unsigned i = 0; i < 64; i++) {

1724

if (i % 16 == 0 && (want_zero & BITFIELD64_RANGE(i, 16))) {

1725

fs_reg shifted = ubld.vgrf(BRW_REGISTER_TYPE_W, 2);

1726

ubld.SHL(horiz_offset(shifted, 8),

1727

byte_offset(retype(mask, BRW_REGISTER_TYPE_W), i / 8),

1728

brw_imm_v(0x01234567));

1729

ubld.SHL(shifted, horiz_offset(shifted, 8), brw_imm_w(8));

1730

1731

fs_builder ubld16 = ubld.group(16, 0);

1732

b32 = ubld16.vgrf(BRW_REGISTER_TYPE_D);

1733

ubld16.group(16, 0).ASR(b32, shifted, brw_imm_w(15));

1734

}

1735

1736

if (want_zero & BITFIELD64_BIT(i)) {

1737

assert(i < prog_data->curb_read_length);

1738

struct brw_reg push_reg =

1739

retype(brw_vec8_grf(payload.num_regs + i, 0),

1740

BRW_REGISTER_TYPE_D);

1741

1742

ubld.AND(push_reg, push_reg, component(b32, i % 16));

1743

}

1744

}

1745

1746

invalidate_analysis(DEPENDENCY_INSTRUCTIONS);

1747

}

1748

1749

/* This may be updated in assign_urb_setup or assign_vs_urb_setup. */

1750

this->first_non_payload_grf = payload.num_regs + prog_data->curb_read_length;

1751

}

1752

1753

1754

* Build up an array of indices into the urb_setup array that

1755

* references the active entries of the urb_setup array.

1756

* Used to accelerate walking the active entries of the urb_setup array

1757

* on each upload.

1758

1759

void

1760

brw_compute_urb_setup_index(struct brw_wm_prog_data *wm_prog_data)

1761

{

1762

/* TODO(mesh): Review usage of this in the context of Mesh, we may want to

1763

* skip per-primitive attributes here.

1764

1765

1766

/* Make sure uint8_t is sufficient */

1767

STATIC_ASSERT(VARYING_SLOT_MAX <= 0xff);

1768

uint8_t index = 0;

1769

for (uint8_t attr = 0; attr < VARYING_SLOT_MAX; attr++) {

1770

if (wm_prog_data->urb_setup[attr] >= 0) {

1771

wm_prog_data->urb_setup_attribs[index++] = attr;

1772

}

1773

}

1774

wm_prog_data->urb_setup_attribs_count = index;

1775

}

1776

1777

static void

1778

calculate_urb_setup(const struct intel_device_info *devinfo,

1779

const struct brw_wm_prog_key *key,

1780

struct brw_wm_prog_data *prog_data,

1781

const nir_shader *nir,

1782

const struct brw_mue_map *mue_map)

1783

{

1784

memset(prog_data->urb_setup, -1,

1785

sizeof(prog_data->urb_setup[0]) * VARYING_SLOT_MAX);

1786

1787

int urb_next = 0;

1788

1789

const uint64_t inputs_read =

1790

nir->info.inputs_read & ~nir->info.per_primitive_inputs;

1791

1792

/* Figure out where each of the incoming setup attributes lands. */

1793

if (mue_map) {

1794

/* Per-Primitive Attributes are laid out by Hardware before the regular

1795

* attributes, so order them like this to make easy later to map setup

1796

* into real HW registers.

1797

1798

if (nir->info.per_primitive_inputs) {

1799

for (unsigned i = 0; i < VARYING_SLOT_MAX; i++) {

1800

if (nir->info.per_primitive_inputs & BITFIELD64_BIT(i)) {

1801

prog_data->urb_setup[i] = urb_next++;

1802

}

1803

}

1804

1805

/* The actual setup attributes later must be aligned to a full GRF. */

1806

urb_next = ALIGN(urb_next, 2);

1807

1808

prog_data->num_per_primitive_inputs = urb_next;

1809

}

1810

1811

const uint64_t clip_dist_bits = VARYING_BIT_CLIP_DIST0 |

1812

VARYING_BIT_CLIP_DIST1;

1813

1814

uint64_t unique_fs_attrs = inputs_read & BRW_FS_VARYING_INPUT_MASK;

1815

1816

if (inputs_read & clip_dist_bits) {

1817

assert(mue_map->per_vertex_header_size_dw > 8);

1818

unique_fs_attrs &= ~clip_dist_bits;

1819

}

1820

1821

/* In Mesh, CLIP_DIST slots are always at the beginning, because

1822

* they come from MUE Vertex Header, not Per-Vertex Attributes.

1823

1824

if (inputs_read & clip_dist_bits) {

1825

prog_data->urb_setup[VARYING_SLOT_CLIP_DIST0] = urb_next++;

1826

prog_data->urb_setup[VARYING_SLOT_CLIP_DIST1] = urb_next++;

1827

}

1828

1829

/* Per-Vertex attributes are laid out ordered. Because we always link

1830

* Mesh and Fragment shaders, the which slots are written and read by

1831

* each of them will match. */

1832

for (unsigned int i = 0; i < VARYING_SLOT_MAX; i++) {

1833

if (unique_fs_attrs & BITFIELD64_BIT(i))

1834

prog_data->urb_setup[i] = urb_next++;

1835

}

1836

} else if (devinfo->ver >= 6) {

1837

uint64_t vue_header_bits =

1838

VARYING_BIT_PSIZ | VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT;

1839

1840

uint64_t unique_fs_attrs = inputs_read & BRW_FS_VARYING_INPUT_MASK;

1841

1842

/* VUE header fields all live in the same URB slot, so we pass them

1843

* as a single FS input attribute. We want to only count them once.

1844

1845

if (inputs_read & vue_header_bits) {

1846

unique_fs_attrs &= ~vue_header_bits;

1847

unique_fs_attrs |= VARYING_BIT_PSIZ;

1848

}

1849

1850

if (util_bitcount64(unique_fs_attrs) <= 16) {

1851

/* The SF/SBE pipeline stage can do arbitrary rearrangement of the

1852

* first 16 varying inputs, so we can put them wherever we want.

1853

* Just put them in order.

1854

1855

* This is useful because it means that (a) inputs not used by the

1856

* fragment shader won't take up valuable register space, and (b) we

1857

* won't have to recompile the fragment shader if it gets paired with

1858

* a different vertex (or geometry) shader.

1859

1860

* VUE header fields share the same FS input attribute.

1861

1862

if (inputs_read & vue_header_bits) {

1863

if (inputs_read & VARYING_BIT_PSIZ)

1864

prog_data->urb_setup[VARYING_SLOT_PSIZ] = urb_next;

1865

if (inputs_read & VARYING_BIT_LAYER)

1866

prog_data->urb_setup[VARYING_SLOT_LAYER] = urb_next;

1867

if (inputs_read & VARYING_BIT_VIEWPORT)

1868

prog_data->urb_setup[VARYING_SLOT_VIEWPORT] = urb_next;

1869

1870

urb_next++;

1871

}

1872

1873

for (unsigned int i = 0; i < VARYING_SLOT_MAX; i++) {

1874

if (inputs_read & BRW_FS_VARYING_INPUT_MASK & ~vue_header_bits &

1875

BITFIELD64_BIT(i)) {

1876

prog_data->urb_setup[i] = urb_next++;

1877

}

1878

}

1879

} else {

1880

/* We have enough input varyings that the SF/SBE pipeline stage can't

1881

* arbitrarily rearrange them to suit our whim; we have to put them

1882

* in an order that matches the output of the previous pipeline stage

1883

* (geometry or vertex shader).

1884

1885

1886

/* Re-compute the VUE map here in the case that the one coming from

1887

* geometry has more than one position slot (used for Primitive

1888

* Replication).

1889

1890

struct brw_vue_map prev_stage_vue_map;

1891

brw_compute_vue_map(devinfo, &prev_stage_vue_map,

1892

key->input_slots_valid,

1893

nir->info.separate_shader, 1);

1894

1895

int first_slot =

1896

brw_compute_first_urb_slot_required(inputs_read,

1897

&prev_stage_vue_map);

1898

1899

assert(prev_stage_vue_map.num_slots <= first_slot + 32);

1900

for (int slot = first_slot; slot < prev_stage_vue_map.num_slots;

1901

slot++) {

1902

int varying = prev_stage_vue_map.slot_to_varying[slot];

1903

if (varying != BRW_VARYING_SLOT_PAD &&

1904

(inputs_read & BRW_FS_VARYING_INPUT_MASK &

1905

BITFIELD64_BIT(varying))) {

1906

prog_data->urb_setup[varying] = slot - first_slot;

1907

}

1908

}

1909

urb_next = prev_stage_vue_map.num_slots - first_slot;

1910

}

1911

} else {

1912

/* FINISHME: The sf doesn't map VS->FS inputs for us very well. */

1913

for (unsigned int i = 0; i < VARYING_SLOT_MAX; i++) {

1914

/* Point size is packed into the header, not as a general attribute */

1915

if (i == VARYING_SLOT_PSIZ)

1916

continue;

1917

1918

if (key->input_slots_valid & BITFIELD64_BIT(i)) {

1919

/* The back color slot is skipped when the front color is

1920

* also written to. In addition, some slots can be

1921

* written in the vertex shader and not read in the

1922

* fragment shader. So the register number must always be

1923

* incremented, mapped or not.

1924

1925

if (_mesa_varying_slot_in_fs((gl_varying_slot) i))

1926

prog_data->urb_setup[i] = urb_next;

1927

urb_next++;

1928

}

1929

}

1930

1931

1932

* It's a FS only attribute, and we did interpolation for this attribute

1933

* in SF thread. So, count it here, too.

1934

1935

* See compile_sf_prog() for more info.

1936

1937

if (inputs_read & BITFIELD64_BIT(VARYING_SLOT_PNTC))

1938

prog_data->urb_setup[VARYING_SLOT_PNTC] = urb_next++;

1939

}

1940

1941

prog_data->num_varying_inputs = urb_next - prog_data->num_per_primitive_inputs;

1942

prog_data->inputs = inputs_read;

1943

1944

brw_compute_urb_setup_index(prog_data);

1945

}

1946

1947

void

1948

fs_visitor::assign_urb_setup()

1949

{

1950

assert(stage == MESA_SHADER_FRAGMENT);

1951

struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data);

1952

1953

int urb_start = payload.num_regs + prog_data->base.curb_read_length;

1954

1955

/* Offset all the urb_setup[] index by the actual position of the

1956

* setup regs, now that the location of the constants has been chosen.

1957

1958

foreach_block_and_inst(block, fs_inst, inst, cfg) {

1959

for (int i = 0; i < inst->sources; i++) {

1960

if (inst->src[i].file == ATTR) {

1961

/* ATTR regs in the FS are in units of logical scalar inputs each

1962

* of which consumes half of a GRF register.

1963

1964

assert(inst->src[i].offset < REG_SIZE / 2);

1965

const unsigned grf = urb_start + inst->src[i].nr / 2;

1966

const unsigned offset = (inst->src[i].nr % 2) * (REG_SIZE / 2) +

1967

inst->src[i].offset;

1968

const unsigned width = inst->src[i].stride == 0 ?

1969

1 : MIN2(inst->exec_size, 8);

1970

struct brw_reg reg = stride(

1971

byte_offset(retype(brw_vec8_grf(grf, 0), inst->src[i].type),

1972

offset),

1973

width * inst->src[i].stride,

1974

width, inst->src[i].stride);

1975

reg.abs = inst->src[i].abs;

1976

reg.negate = inst->src[i].negate;

1977

inst->src[i] = reg;

1978

}

1979

}

1980

}

1981

1982

/* Each attribute is 4 setup channels, each of which is half a reg. */

1983

this->first_non_payload_grf += prog_data->num_varying_inputs * 2;

1984

1985

/* Unlike regular attributes, per-primitive attributes have all 4 channels

1986

* in the same slot, so each GRF can store two slots.

1987

1988

assert(prog_data->num_per_primitive_inputs % 2 == 0);

1989

this->first_non_payload_grf += prog_data->num_per_primitive_inputs / 2;

1990

}

1991

1992

void

1993

fs_visitor::convert_attr_sources_to_hw_regs(fs_inst *inst)

1994

{

1995

for (int i = 0; i < inst->sources; i++) {

1996

if (inst->src[i].file == ATTR) {

1997

int grf = payload.num_regs +

1998

prog_data->curb_read_length +

1999

inst->src[i].nr +

2000

inst->src[i].offset / REG_SIZE;

2001

2002

/* As explained at brw_reg_from_fs_reg, From the Haswell PRM:

2003

2004

* VertStride must be used to cross GRF register boundaries. This

2005

* rule implies that elements within a 'Width' cannot cross GRF

2006

* boundaries.

2007

2008

* So, for registers that are large enough, we have to split the exec

2009

* size in two and trust the compression state to sort it out.

2010

2011

unsigned total_size = inst->exec_size *

2012

inst->src[i].stride *

2013

type_sz(inst->src[i].type);

2014

2015

assert(total_size <= 2 * REG_SIZE);

2016

const unsigned exec_size =

2017

(total_size <= REG_SIZE) ? inst->exec_size : inst->exec_size / 2;

2018

2019

unsigned width = inst->src[i].stride == 0 ? 1 : exec_size;

2020

struct brw_reg reg =

2021

stride(byte_offset(retype(brw_vec8_grf(grf, 0), inst->src[i].type),

2022

inst->src[i].offset % REG_SIZE),

2023

exec_size * inst->src[i].stride,

2024

width, inst->src[i].stride);

2025

reg.abs = inst->src[i].abs;

2026

reg.negate = inst->src[i].negate;

2027

2028

inst->src[i] = reg;

2029

}

2030

}

2031

}

2032

2033

void

2034

fs_visitor::assign_vs_urb_setup()

2035

{

2036

struct brw_vs_prog_data *vs_prog_data = brw_vs_prog_data(prog_data);

2037

2038

assert(stage == MESA_SHADER_VERTEX);

2039

2040

/* Each attribute is 4 regs. */

2041

this->first_non_payload_grf += 4 * vs_prog_data->nr_attribute_slots;

2042

2043

assert(vs_prog_data->base.urb_read_length <= 15);

2044

2045

/* Rewrite all ATTR file references to the hw grf that they land in. */

2046

foreach_block_and_inst(block, fs_inst, inst, cfg) {

2047

convert_attr_sources_to_hw_regs(inst);

2048

}

2049

}

2050

2051

void

2052

fs_visitor::assign_tcs_urb_setup()

2053

{

2054

assert(stage == MESA_SHADER_TESS_CTRL);

2055

2056

/* Rewrite all ATTR file references to HW_REGs. */

2057

foreach_block_and_inst(block, fs_inst, inst, cfg) {

2058

convert_attr_sources_to_hw_regs(inst);

2059

}

2060

}

2061

2062

void

2063

fs_visitor::assign_tes_urb_setup()

2064

{

2065

assert(stage == MESA_SHADER_TESS_EVAL);

2066

2067

struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(prog_data);

2068

2069

first_non_payload_grf += 8 * vue_prog_data->urb_read_length;

2070

2071

/* Rewrite all ATTR file references to HW_REGs. */

2072

foreach_block_and_inst(block, fs_inst, inst, cfg) {

2073

convert_attr_sources_to_hw_regs(inst);

2074

}

2075

}

2076

2077

void

2078

fs_visitor::assign_gs_urb_setup()

2079

{

2080

assert(stage == MESA_SHADER_GEOMETRY);

2081

2082

struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(prog_data);

2083

2084

first_non_payload_grf +=

2085

8 * vue_prog_data->urb_read_length * nir->info.gs.vertices_in;

2086

2087

foreach_block_and_inst(block, fs_inst, inst, cfg) {

2088

/* Rewrite all ATTR file references to GRFs. */

2089

convert_attr_sources_to_hw_regs(inst);

2090

}

2091

}

2092

2093

2094

/**

2095

* Split large virtual GRFs into separate components if we can.

2096

2097

* This pass aggressively splits VGRFs into as small a chunks as possible,

2098

* down to single registers if it can. If no VGRFs can be split, we return

2099

* false so this pass can safely be used inside an optimization loop. We

2100

* want to split, because virtual GRFs are what we register allocate and

2101

* spill (due to contiguousness requirements for some instructions), and

2102

* they're what we naturally generate in the codegen process, but most

2103

* virtual GRFs don't actually need to be contiguous sets of GRFs. If we

2104

* split, we'll end up with reduced live intervals and better dead code

2105

* elimination and coalescing.

2106

2107

bool

2108

fs_visitor::split_virtual_grfs()

2109

{

2110

/* Compact the register file so we eliminate dead vgrfs. This

2111

* only defines split points for live registers, so if we have

2112

* too large dead registers they will hit assertions later.

2113

2114

compact_virtual_grfs();

2115

2116

int num_vars = this->alloc.count;

2117

2118

/* Count the total number of registers */

2119

int reg_count = 0;

2120

int vgrf_to_reg[num_vars];

2121

for (int i = 0; i < num_vars; i++) {

2122

vgrf_to_reg[i] = reg_count;

2123

reg_count += alloc.sizes[i];

2124

}

2125

2126

/* An array of "split points". For each register slot, this indicates

2127

* if this slot can be separated from the previous slot. Every time an

2128

* instruction uses multiple elements of a register (as a source or

2129

* destination), we mark the used slots as inseparable. Then we go

2130

* through and split the registers into the smallest pieces we can.

2131

2132

bool *split_points = new bool[reg_count];

2133

memset(split_points, 0, reg_count * sizeof(*split_points));

2134

2135

/* Mark all used registers as fully splittable */

2136

foreach_block_and_inst(block, fs_inst, inst, cfg) {

2137

if (inst->dst.file == VGRF) {

2138

int reg = vgrf_to_reg[inst->dst.nr];

2139

for (unsigned j = 1; j < this->alloc.sizes[inst->dst.nr]; j++)

2140

split_points[reg + j] = true;

2141

}

2142

2143

for (int i = 0; i < inst->sources; i++) {

2144

if (inst->src[i].file == VGRF) {

2145

int reg = vgrf_to_reg[inst->src[i].nr];

2146

for (unsigned j = 1; j < this->alloc.sizes[inst->src[i].nr]; j++)

2147

split_points[reg + j] = true;

2148

}

2149

}

2150

}

2151

2152

foreach_block_and_inst(block, fs_inst, inst, cfg) {

2153

/* We fix up undef instructions later */

2154

if (inst->opcode == SHADER_OPCODE_UNDEF) {

2155

/* UNDEF instructions are currently only used to undef entire

2156

* registers. We need this invariant later when we split them.

2157

2158

assert(inst->dst.file == VGRF);

2159

assert(inst->dst.offset == 0);

2160

assert(inst->size_written == alloc.sizes[inst->dst.nr] * REG_SIZE);

2161

continue;

2162

}

2163

2164

if (inst->dst.file == VGRF) {

2165

int reg = vgrf_to_reg[inst->dst.nr] + inst->dst.offset / REG_SIZE;

2166

for (unsigned j = 1; j < regs_written(inst); j++)

2167

split_points[reg + j] = false;

2168

}

2169

for (int i = 0; i < inst->sources; i++) {

2170

if (inst->src[i].file == VGRF) {

2171

int reg = vgrf_to_reg[inst->src[i].nr] + inst->src[i].offset / REG_SIZE;

2172

for (unsigned j = 1; j < regs_read(inst, i); j++)

2173

split_points[reg + j] = false;

2174

}

2175

}

2176

}

2177

2178

/* Bitset of which registers have been split */

2179

bool *vgrf_has_split = new bool[num_vars];

2180

memset(vgrf_has_split, 0, num_vars * sizeof(*vgrf_has_split));

2181

2182

int *new_virtual_grf = new int[reg_count];

2183

int *new_reg_offset = new int[reg_count];

2184

2185

int reg = 0;

2186

bool has_splits = false;

2187

for (int i = 0; i < num_vars; i++) {

2188

/* The first one should always be 0 as a quick sanity check. */

2189

assert(split_points[reg] == false);

2190

2191

/* j = 0 case */

2192

new_reg_offset[reg] = 0;

2193

reg++;

2194

int offset = 1;

2195

2196

/* j > 0 case */

2197

for (unsigned j = 1; j < alloc.sizes[i]; j++) {

2198

/* If this is a split point, reset the offset to 0 and allocate a

2199

* new virtual GRF for the previous offset many registers

2200

2201

if (split_points[reg]) {

2202

has_splits = true;

2203

vgrf_has_split[i] = true;

2204

assert(offset <= MAX_VGRF_SIZE);

2205

int grf = alloc.allocate(offset);

2206

for (int k = reg - offset; k < reg; k++)

2207

new_virtual_grf[k] = grf;

2208

offset = 0;

2209

}

2210

new_reg_offset[reg] = offset;

2211

offset++;

2212

reg++;

2213

}

2214

2215

/* The last one gets the original register number */

2216

assert(offset <= MAX_VGRF_SIZE);

2217

alloc.sizes[i] = offset;

2218

for (int k = reg - offset; k < reg; k++)

2219

new_virtual_grf[k] = i;

2220

}

2221

assert(reg == reg_count);

2222

2223

bool progress;

2224

if (!has_splits) {

2225

progress = false;

2226

goto cleanup;

2227

}

2228

2229

foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {

2230

if (inst->opcode == SHADER_OPCODE_UNDEF) {

2231

assert(inst->dst.file == VGRF);

2232

if (vgrf_has_split[inst->dst.nr]) {

2233

const fs_builder ibld(this, block, inst);

2234

assert(inst->size_written % REG_SIZE == 0);

2235

unsigned reg_offset = 0;

2236

while (reg_offset < inst->size_written / REG_SIZE) {

2237

reg = vgrf_to_reg[inst->dst.nr] + reg_offset;

2238

ibld.UNDEF(fs_reg(VGRF, new_virtual_grf[reg], inst->dst.type));

2239

reg_offset += alloc.sizes[new_virtual_grf[reg]];

2240

}

2241

inst->remove(block);

2242

} else {

2243

reg = vgrf_to_reg[inst->dst.nr];

2244

assert(new_reg_offset[reg] == 0);

2245

assert(new_virtual_grf[reg] == (int)inst->dst.nr);

2246

}

2247

continue;

2248

}

2249

2250

if (inst->dst.file == VGRF) {

2251

reg = vgrf_to_reg[inst->dst.nr] + inst->dst.offset / REG_SIZE;

2252

if (vgrf_has_split[inst->dst.nr]) {

2253

inst->dst.nr = new_virtual_grf[reg];

2254

inst->dst.offset = new_reg_offset[reg] * REG_SIZE +

2255

inst->dst.offset % REG_SIZE;

2256

assert((unsigned)new_reg_offset[reg] <

2257

alloc.sizes[new_virtual_grf[reg]]);

2258

} else {

2259

assert(new_reg_offset[reg] == inst->dst.offset / REG_SIZE);

2260

assert(new_virtual_grf[reg] == (int)inst->dst.nr);

2261

}

2262

}

2263

for (int i = 0; i < inst->sources; i++) {

2264

if (inst->src[i].file != VGRF)

2265

continue;

2266

2267

reg = vgrf_to_reg[inst->src[i].nr] + inst->src[i].offset / REG_SIZE;

2268

if (vgrf_has_split[inst->src[i].nr]) {

2269

inst->src[i].nr = new_virtual_grf[reg];

2270

inst->src[i].offset = new_reg_offset[reg] * REG_SIZE +

2271

inst->src[i].offset % REG_SIZE;

2272

assert((unsigned)new_reg_offset[reg] <

2273

alloc.sizes[new_virtual_grf[reg]]);

2274

} else {

2275

assert(new_reg_offset[reg] == inst->src[i].offset / REG_SIZE);

2276

assert(new_virtual_grf[reg] == (int)inst->src[i].nr);

2277

}

2278

}

2279

}

2280

invalidate_analysis(DEPENDENCY_INSTRUCTION_DETAIL | DEPENDENCY_VARIABLES);

2281

2282

progress = true;

2283

2284

cleanup:

2285

delete[] split_points;

2286

delete[] vgrf_has_split;

2287

delete[] new_virtual_grf;

2288

delete[] new_reg_offset;

2289

2290

return progress;

2291

}

2292

2293

/**

2294

* Remove unused virtual GRFs and compact the vgrf_* arrays.

2295

2296

* During code generation, we create tons of temporary variables, many of

2297

* which get immediately killed and are never used again. Yet, in later

2298

* optimization and analysis passes, such as compute_live_intervals, we need

2299

* to loop over all the virtual GRFs. Compacting them can save a lot of

2300

* overhead.

2301

2302

bool

2303

fs_visitor::compact_virtual_grfs()

2304

{

2305

bool progress = false;

2306

int *remap_table = new int[this->alloc.count];

2307

memset(remap_table, -1, this->alloc.count * sizeof(int));

2308

2309

/* Mark which virtual GRFs are used. */

2310

foreach_block_and_inst(block, const fs_inst, inst, cfg) {

2311

if (inst->dst.file == VGRF)

2312

remap_table[inst->dst.nr] = 0;

2313

2314

for (int i = 0; i < inst->sources; i++) {

2315

if (inst->src[i].file == VGRF)

2316

remap_table[inst->src[i].nr] = 0;

2317

}

2318

}

2319

2320

/* Compact the GRF arrays. */

2321

int new_index = 0;

2322

for (unsigned i = 0; i < this->alloc.count; i++) {

2323

if (remap_table[i] == -1) {

2324

/* We just found an unused register. This means that we are

2325

* actually going to compact something.

2326

2327

progress = true;

2328

} else {

2329

remap_table[i] = new_index;

2330

alloc.sizes[new_index] = alloc.sizes[i];

2331

invalidate_analysis(DEPENDENCY_INSTRUCTION_DETAIL | DEPENDENCY_VARIABLES);

2332

++new_index;

2333

}

2334

}

2335

2336

this->alloc.count = new_index;

2337

2338

/* Patch all the instructions to use the newly renumbered registers */

2339

foreach_block_and_inst(block, fs_inst, inst, cfg) {

2340

if (inst->dst.file == VGRF)

2341

inst->dst.nr = remap_table[inst->dst.nr];

2342

2343

for (int i = 0; i < inst->sources; i++) {

2344

if (inst->src[i].file == VGRF)

2345

inst->src[i].nr = remap_table[inst->src[i].nr];

2346

}

2347

}

2348

2349

/* Patch all the references to delta_xy, since they're used in register

2350

* allocation. If they're unused, switch them to BAD_FILE so we don't

2351

* think some random VGRF is delta_xy.

2352

2353

for (unsigned i = 0; i < ARRAY_SIZE(delta_xy); i++) {

2354

if (delta_xy[i].file == VGRF) {

2355

if (remap_table[delta_xy[i].nr] != -1) {

2356

delta_xy[i].nr = remap_table[delta_xy[i].nr];

2357

} else {

2358

delta_xy[i].file = BAD_FILE;

2359

}

2360

}

2361

}

2362

2363

delete[] remap_table;

2364

2365

return progress;

2366

}

2367

2368

static int

2369

get_subgroup_id_param_index(const intel_device_info *devinfo,

2370

const brw_stage_prog_data *prog_data)

2371

{

2372

if (prog_data->nr_params == 0)

2373

return -1;

2374

2375

if (devinfo->verx10 >= 125)

2376

return -1;

2377

2378

/* The local thread id is always the last parameter in the list */

2379

uint32_t last_param = prog_data->param[prog_data->nr_params - 1];

2380

if (last_param == BRW_PARAM_BUILTIN_SUBGROUP_ID)

2381

return prog_data->nr_params - 1;

2382

2383

return -1;

2384

}

2385

2386

/**

2387

* Assign UNIFORM file registers to either push constants or pull constants.

2388

2389

* We allow a fragment shader to have more than the specified minimum

2390

* maximum number of fragment shader uniform components (64). If

2391

* there are too many of these, they'd fill up all of register space.

2392

* So, this will push some of them out to the pull constant buffer and

2393

* update the program to load them.

2394

2395

void

2396

fs_visitor::assign_constant_locations()

2397

{

2398

/* Only the first compile gets to decide on locations. */

2399

if (push_constant_loc)

2400

return;

2401

2402

push_constant_loc = ralloc_array(mem_ctx, int, uniforms);

2403

for (unsigned u = 0; u < uniforms; u++)

2404

push_constant_loc[u] = u;

2405

2406

/* Now that we know how many regular uniforms we'll push, reduce the

2407

* UBO push ranges so we don't exceed the 3DSTATE_CONSTANT limits.

2408

2409

/* For gen4/5:

2410

* Only allow 16 registers (128 uniform components) as push constants.

2411

2412

* If changing this value, note the limitation about total_regs in

2413

* brw_curbe.c/crocus_state.c

2414

2415

const unsigned max_push_length = compiler->devinfo->ver < 6 ? 16 : 64;

2416

unsigned push_length = DIV_ROUND_UP(stage_prog_data->nr_params, 8);

2417

for (int i = 0; i < 4; i++) {

2418

struct brw_ubo_range *range = &prog_data->ubo_ranges[i];

2419

2420

if (push_length + range->length > max_push_length)

2421

range->length = max_push_length - push_length;

2422

2423

push_length += range->length;

2424

}

2425

assert(push_length <= max_push_length);

2426

}

2427

2428

bool

2429

fs_visitor::get_pull_locs(const fs_reg &src,

2430

unsigned *out_surf_index,

2431

unsigned *out_pull_index)

2432

{

2433

assert(src.file == UNIFORM);

2434

2435

if (src.nr < UBO_START)

2436

return false;

2437

2438

const struct brw_ubo_range *range =

2439

&prog_data->ubo_ranges[src.nr - UBO_START];

2440

2441

/* If this access is in our (reduced) range, use the push data. */

2442

if (src.offset / 32 < range->length)

2443

return false;

2444

2445

*out_surf_index = range->block;

2446

*out_pull_index = (32 * range->start + src.offset) / 4;

2447

2448

prog_data->has_ubo_pull = true;

2449

2450

return true;

2451

}

2452

2453

/**

2454

* Replace UNIFORM register file access with either UNIFORM_PULL_CONSTANT_LOAD

2455

* or VARYING_PULL_CONSTANT_LOAD instructions which load values into VGRFs.

2456

2457

void

2458

fs_visitor::lower_constant_loads()

2459

{

2460

unsigned index, pull_index;

2461

2462

foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {

2463

/* Set up the annotation tracking for new generated instructions. */

2464

const fs_builder ibld(this, block, inst);

2465

2466

for (int i = 0; i < inst->sources; i++) {

2467

if (inst->src[i].file != UNIFORM)

2468

continue;

2469

2470

/* We'll handle this case later */

2471

if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && i == 0)

2472

continue;

2473

2474

if (!get_pull_locs(inst->src[i], &index, &pull_index))

2475

continue;

2476

2477

assert(inst->src[i].stride == 0);

2478

2479

const unsigned block_sz = 64; /* Fetch one cacheline at a time. */

2480

const fs_builder ubld = ibld.exec_all().group(block_sz / 4, 0);

2481

const fs_reg dst = ubld.vgrf(BRW_REGISTER_TYPE_UD);

2482

const unsigned base = pull_index * 4;

2483

2484

ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,

2485

dst, brw_imm_ud(index), brw_imm_ud(base & ~(block_sz - 1)));

2486

2487

/* Rewrite the instruction to use the temporary VGRF. */

2488

inst->src[i].file = VGRF;

2489

inst->src[i].nr = dst.nr;

2490

inst->src[i].offset = (base & (block_sz - 1)) +

2491

inst->src[i].offset % 4;

2492

}

2493

2494

if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT &&

2495

inst->src[0].file == UNIFORM) {

2496

2497

if (!get_pull_locs(inst->src[0], &index, &pull_index))

2498

continue;

2499

2500

VARYING_PULL_CONSTANT_LOAD(ibld, inst->dst,

2501

brw_imm_ud(index),

2502

inst->src[1],

2503

pull_index * 4, 4);

2504

inst->remove(block);

2505

}

2506

}

2507

invalidate_analysis(DEPENDENCY_INSTRUCTIONS);

2508

}

2509

2510

bool

2511

fs_visitor::opt_algebraic()

2512

{

2513

bool progress = false;

2514

2515

foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {

2516

switch (inst->opcode) {

2517

case BRW_OPCODE_MOV:

2518

if (!devinfo->has_64bit_float &&

2519

!devinfo->has_64bit_int &&

2520

(inst->dst.type == BRW_REGISTER_TYPE_DF ||

2521

inst->dst.type == BRW_REGISTER_TYPE_UQ ||

2522

inst->dst.type == BRW_REGISTER_TYPE_Q)) {

2523

assert(inst->dst.type == inst->src[0].type);

2524

assert(!inst->saturate);

2525

assert(!inst->src[0].abs);

2526

assert(!inst->src[0].negate);

2527

const brw::fs_builder ibld(this, block, inst);

2528

2529

ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 1),

2530

subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 1));

2531

ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0),

2532

subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0));

2533

2534

inst->remove(block);

2535

progress = true;

2536

}

2537

2538

if ((inst->conditional_mod == BRW_CONDITIONAL_Z ||

2539

inst->conditional_mod == BRW_CONDITIONAL_NZ) &&

2540

inst->dst.is_null() &&

2541

(inst->src[0].abs || inst->src[0].negate)) {

2542

inst->src[0].abs = false;

2543

inst->src[0].negate = false;

2544

progress = true;

2545

break;

2546

}

2547

2548

if (inst->src[0].file != IMM)

2549

break;

2550

2551

if (inst->saturate) {

2552

/* Full mixed-type saturates don't happen. However, we can end up

2553

* with things like:

2554

2555

* mov.sat(8) g21<1>DF -1F

2556

2557

* Other mixed-size-but-same-base-type cases may also be possible.

2558

2559

if (inst->dst.type != inst->src[0].type &&

2560

inst->dst.type != BRW_REGISTER_TYPE_DF &&

2561

inst->src[0].type != BRW_REGISTER_TYPE_F)

2562

assert(!"unimplemented: saturate mixed types");

2563

2564

if (brw_saturate_immediate(inst->src[0].type,

2565

&inst->src[0].as_brw_reg())) {

2566

inst->saturate = false;

2567

progress = true;

2568

}

2569

}

2570

break;

2571

2572

case BRW_OPCODE_MUL:

2573

if (inst->src[1].file != IMM)

2574

continue;

2575

2576

if (brw_reg_type_is_floating_point(inst->src[1].type))

2577

break;

2578

2579

/* a * 1.0 = a */

2580

if (inst->src[1].is_one()) {

2581

inst->opcode = BRW_OPCODE_MOV;

2582

inst->src[1] = reg_undef;

2583

progress = true;

2584

break;

2585

}

2586

2587

/* a * -1.0 = -a */

2588

if (inst->src[1].is_negative_one()) {

2589

inst->opcode = BRW_OPCODE_MOV;

2590

inst->src[0].negate = !inst->src[0].negate;

2591

inst->src[1] = reg_undef;

2592

progress = true;

2593

break;

2594

}

2595

2596

break;

2597

case BRW_OPCODE_ADD:

2598

if (inst->src[1].file != IMM)

2599

continue;

2600

2601

if (brw_reg_type_is_integer(inst->src[1].type) &&

2602

inst->src[1].is_zero()) {

2603

inst->opcode = BRW_OPCODE_MOV;

2604

inst->src[1] = reg_undef;

2605

progress = true;

2606

break;

2607

}

2608

2609

if (inst->src[0].file == IMM) {

2610

assert(inst->src[0].type == BRW_REGISTER_TYPE_F);

2611

inst->opcode = BRW_OPCODE_MOV;

2612

inst->src[0].f += inst->src[1].f;

2613

inst->src[1] = reg_undef;

2614

progress = true;

2615

break;

2616

}

2617

break;

2618

case BRW_OPCODE_OR:

2619

if (inst->src[0].equals(inst->src[1]) ||

2620

inst->src[1].is_zero()) {

2621

/* On Gfx8+, the OR instruction can have a source modifier that

2622

* performs logical not on the operand. Cases of 'OR r0, ~r1, 0'

2623

* or 'OR r0, ~r1, ~r1' should become a NOT instead of a MOV.

2624

2625

if (inst->src[0].negate) {

2626

inst->opcode = BRW_OPCODE_NOT;

2627

inst->src[0].negate = false;

2628

} else {

2629

inst->opcode = BRW_OPCODE_MOV;

2630

}

2631

inst->src[1] = reg_undef;

2632

progress = true;

2633

break;

2634

}

2635

break;

2636

case BRW_OPCODE_CMP:

2637

if ((inst->conditional_mod == BRW_CONDITIONAL_Z ||

2638

inst->conditional_mod == BRW_CONDITIONAL_NZ) &&

2639

inst->src[1].is_zero() &&

2640

(inst->src[0].abs || inst->src[0].negate)) {

2641

inst->src[0].abs = false;

2642

inst->src[0].negate = false;

2643

progress = true;

2644

break;

2645

}

2646

break;

2647

case BRW_OPCODE_SEL:

2648

if (!devinfo->has_64bit_float &&

2649

!devinfo->has_64bit_int &&

2650

(inst->dst.type == BRW_REGISTER_TYPE_DF ||

2651

inst->dst.type == BRW_REGISTER_TYPE_UQ ||

2652

inst->dst.type == BRW_REGISTER_TYPE_Q)) {

2653

assert(inst->dst.type == inst->src[0].type);

2654

assert(!inst->saturate);

2655

assert(!inst->src[0].abs && !inst->src[0].negate);

2656

assert(!inst->src[1].abs && !inst->src[1].negate);

2657

const brw::fs_builder ibld(this, block, inst);

2658

2659

set_predicate(inst->predicate,

2660

ibld.SEL(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0),

2661

subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0),

2662

subscript(inst->src[1], BRW_REGISTER_TYPE_UD, 0)));

2663

set_predicate(inst->predicate,

2664

ibld.SEL(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 1),

2665

subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 1),

2666

subscript(inst->src[1], BRW_REGISTER_TYPE_UD, 1)));

2667

2668

inst->remove(block);

2669

progress = true;

2670

}

2671

if (inst->src[0].equals(inst->src[1])) {

2672

inst->opcode = BRW_OPCODE_MOV;

2673

inst->src[1] = reg_undef;

2674

inst->predicate = BRW_PREDICATE_NONE;

2675

inst->predicate_inverse = false;

2676

progress = true;

2677

} else if (inst->saturate && inst->src[1].file == IMM) {

2678

switch (inst->conditional_mod) {

2679

case BRW_CONDITIONAL_LE:

2680

case BRW_CONDITIONAL_L:

2681

switch (inst->src[1].type) {

2682

case BRW_REGISTER_TYPE_F:

2683

if (inst->src[1].f >= 1.0f) {

2684

inst->opcode = BRW_OPCODE_MOV;

2685

inst->src[1] = reg_undef;

2686

inst->conditional_mod = BRW_CONDITIONAL_NONE;

2687

progress = true;

2688

}

2689

break;

2690

default:

2691

break;

2692

}

2693

break;

2694

case BRW_CONDITIONAL_GE:

2695

case BRW_CONDITIONAL_G:

2696

switch (inst->src[1].type) {

2697

case BRW_REGISTER_TYPE_F:

2698

if (inst->src[1].f <= 0.0f) {

2699

inst->opcode = BRW_OPCODE_MOV;

2700

inst->src[1] = reg_undef;

2701

inst->conditional_mod = BRW_CONDITIONAL_NONE;

2702

progress = true;

2703

}

2704

break;

2705

default:

2706

break;

2707

}

2708

default:

2709

break;

2710

}

2711

}

2712

break;

2713

case BRW_OPCODE_MAD:

2714

if (inst->src[0].type != BRW_REGISTER_TYPE_F ||

2715

inst->src[1].type != BRW_REGISTER_TYPE_F ||

2716

inst->src[2].type != BRW_REGISTER_TYPE_F)

2717

break;

2718

if (inst->src[1].is_one()) {

2719

inst->opcode = BRW_OPCODE_ADD;

2720

inst->src[1] = inst->src[2];

2721

inst->src[2] = reg_undef;

2722

progress = true;

2723

} else if (inst->src[2].is_one()) {

2724

inst->opcode = BRW_OPCODE_ADD;

2725

inst->src[2] = reg_undef;

2726

progress = true;

2727

}

2728

break;

2729

case SHADER_OPCODE_BROADCAST:

2730

if (is_uniform(inst->src[0])) {

2731

inst->opcode = BRW_OPCODE_MOV;

2732

inst->sources = 1;

2733

inst->force_writemask_all = true;

2734

progress = true;

2735

} else if (inst->src[1].file == IMM) {

2736

inst->opcode = BRW_OPCODE_MOV;

2737

/* It's possible that the selected component will be too large and

2738

* overflow the register. This can happen if someone does a

2739

* readInvocation() from GLSL or SPIR-V and provides an OOB

2740

* invocationIndex. If this happens and we some how manage

2741

* to constant fold it in and get here, then component() may cause

2742

* us to start reading outside of the VGRF which will lead to an

2743

* assert later. Instead, just let it wrap around if it goes over

2744

* exec_size.

2745

2746

const unsigned comp = inst->src[1].ud & (inst->exec_size - 1);

2747

inst->src[0] = component(inst->src[0], comp);

2748

inst->sources = 1;

2749

inst->force_writemask_all = true;

2750

progress = true;

2751

}

2752

break;

2753

2754

case SHADER_OPCODE_SHUFFLE:

2755

if (is_uniform(inst->src[0])) {

2756

inst->opcode = BRW_OPCODE_MOV;

2757

inst->sources = 1;

2758

progress = true;

2759

} else if (inst->src[1].file == IMM) {

2760

inst->opcode = BRW_OPCODE_MOV;

2761

inst->src[0] = component(inst->src[0],

2762

inst->src[1].ud);

2763

inst->sources = 1;

2764

progress = true;

2765

}

2766

break;

2767

2768

default:

2769

break;

2770

}

2771

2772

/* Swap if src[0] is immediate. */

2773

if (progress && inst->is_commutative()) {

2774

if (inst->src[0].file == IMM) {

2775

fs_reg tmp = inst->src[1];

2776

inst->src[1] = inst->src[0];

2777

inst->src[0] = tmp;

2778

}

2779

}

2780

}

2781

2782

if (progress)

2783

invalidate_analysis(DEPENDENCY_INSTRUCTION_DATA_FLOW |

2784

DEPENDENCY_INSTRUCTION_DETAIL);

2785

2786

return progress;

2787

}

2788

2789

/**

2790

* Optimize sample messages that have constant zero values for the trailing

2791

* texture coordinates. We can just reduce the message length for these

2792

* instructions instead of reserving a register for it. Trailing parameters

2793

* that aren't sent default to zero anyway. This will cause the dead code

2794

* eliminator to remove the MOV instruction that would otherwise be emitted to

2795

* set up the zero value.

2796

2797

bool

2798

fs_visitor::opt_zero_samples()

2799

{

2800

/* Gfx4 infers the texturing opcode based on the message length so we can't

2801

* change it. Gfx12.5 has restrictions on the number of coordinate

2802

* parameters that have to be provided for some texture types

2803

* (Wa_14013363432).

2804

2805

if (devinfo->ver < 5 || devinfo->verx10 == 125)

2806

return false;

2807

2808

bool progress = false;

2809

2810

foreach_block_and_inst(block, fs_inst, inst, cfg) {

2811

if (!inst->is_tex())

2812

continue;

2813

2814

fs_inst *load_payload = (fs_inst *) inst->prev;

2815

2816

if (load_payload->is_head_sentinel() ||

2817

load_payload->opcode != SHADER_OPCODE_LOAD_PAYLOAD)

2818

continue;

2819

2820

/* We don't want to remove the message header or the first parameter.

2821

* Removing the first parameter is not allowed, see the Haswell PRM

2822

* volume 7, page 149:

2823

2824

* "Parameter 0 is required except for the sampleinfo message, which

2825

* has no parameter 0"

2826

2827

while (inst->mlen > inst->header_size + inst->exec_size / 8 &&

2828

load_payload->src[(inst->mlen - inst->header_size) /

2829

(inst->exec_size / 8) +

2830

inst->header_size - 1].is_zero()) {

2831

inst->mlen -= inst->exec_size / 8;

2832

progress = true;

2833

}

2834

}

2835

2836

if (progress)

2837

invalidate_analysis(DEPENDENCY_INSTRUCTION_DETAIL);

2838

2839

return progress;

2840

}

2841

2842

bool

2843

fs_visitor::opt_register_renaming()

2844

{

2845

bool progress = false;

2846

int depth = 0;

2847

2848

unsigned remap[alloc.count];

2849

memset(remap, ~0u, sizeof(unsigned) * alloc.count);

2850

2851

foreach_block_and_inst(block, fs_inst, inst, cfg) {

2852

if (inst->opcode == BRW_OPCODE_IF || inst->opcode == BRW_OPCODE_DO) {

2853

depth++;

2854

} else if (inst->opcode == BRW_OPCODE_ENDIF ||

2855

inst->opcode == BRW_OPCODE_WHILE) {

2856

depth--;

2857

}

2858

2859

/* Rewrite instruction sources. */

2860

for (int i = 0; i < inst->sources; i++) {

2861

if (inst->src[i].file == VGRF &&

2862

remap[inst->src[i].nr] != ~0u &&

2863

remap[inst->src[i].nr] != inst->src[i].nr) {

2864

inst->src[i].nr = remap[inst->src[i].nr];

2865

progress = true;

2866

}

2867

}

2868

2869

const unsigned dst = inst->dst.nr;

2870

2871

if (depth == 0 &&

2872

inst->dst.file == VGRF &&

2873

alloc.sizes[inst->dst.nr] * REG_SIZE == inst->size_written &&

2874

!inst->is_partial_write()) {

2875

if (remap[dst] == ~0u) {

2876

remap[dst] = dst;

2877

} else {

2878

remap[dst] = alloc.allocate(regs_written(inst));

2879

inst->dst.nr = remap[dst];

2880

progress = true;

2881

}

2882

} else if (inst->dst.file == VGRF &&

2883

remap[dst] != ~0u &&

2884

remap[dst] != dst) {

2885

inst->dst.nr = remap[dst];

2886

progress = true;

2887

}

2888

}

2889

2890

if (progress) {

2891

invalidate_analysis(DEPENDENCY_INSTRUCTION_DETAIL |

2892

DEPENDENCY_VARIABLES);

2893

2894

for (unsigned i = 0; i < ARRAY_SIZE(delta_xy); i++) {

2895

if (delta_xy[i].file == VGRF && remap[delta_xy[i].nr] != ~0u) {

2896

delta_xy[i].nr = remap[delta_xy[i].nr];

2897

}

2898

}

2899

}

2900

2901

return progress;

2902

}

2903

2904

/**

2905

* Remove redundant or useless halts.

2906

2907

* For example, we can eliminate halts in the following sequence:

2908

2909

* halt (redundant with the next halt)

2910

* halt (useless; jumps to the next instruction)

2911

* halt-target

2912

2913

bool

2914

fs_visitor::opt_redundant_halt()

2915

{

2916

bool progress = false;

2917

2918

unsigned halt_count = 0;

2919

fs_inst *halt_target = NULL;

2920

bblock_t *halt_target_block = NULL;

2921

foreach_block_and_inst(block, fs_inst, inst, cfg) {

2922

if (inst->opcode == BRW_OPCODE_HALT)

2923

halt_count++;

2924

2925

if (inst->opcode == SHADER_OPCODE_HALT_TARGET) {

2926

halt_target = inst;

2927

halt_target_block = block;

2928

break;

2929

}

2930

}

2931

2932

if (!halt_target) {

2933

assert(halt_count == 0);

2934

return false;

2935

}

2936

2937

/* Delete any HALTs immediately before the halt target. */

2938

for (fs_inst *prev = (fs_inst *) halt_target->prev;

2939

!prev->is_head_sentinel() && prev->opcode == BRW_OPCODE_HALT;

2940

prev = (fs_inst *) halt_target->prev) {

2941

prev->remove(halt_target_block);

2942

halt_count--;

2943

progress = true;

2944

}

2945

2946

if (halt_count == 0) {

2947

halt_target->remove(halt_target_block);

2948

progress = true;

2949

}

2950

2951

if (progress)

2952

invalidate_analysis(DEPENDENCY_INSTRUCTIONS);

2953

2954

return progress;

2955

}

2956

2957

/**

2958

* Compute a bitmask with GRF granularity with a bit set for each GRF starting

2959

* from \p r.offset which overlaps the region starting at \p s.offset and

2960

* spanning \p ds bytes.

2961

2962

static inline unsigned

2963

mask_relative_to(const fs_reg &r, const fs_reg &s, unsigned ds)

2964

{

2965

const int rel_offset = reg_offset(s) - reg_offset(r);

2966

const int shift = rel_offset / REG_SIZE;

2967

const unsigned n = DIV_ROUND_UP(rel_offset % REG_SIZE + ds, REG_SIZE);

2968

assert(reg_space(r) == reg_space(s) &&

2969

shift >= 0 && shift < int(8 * sizeof(unsigned)));

2970

return ((1 << n) - 1) << shift;

2971

}

2972

2973

bool

2974

fs_visitor::compute_to_mrf()

2975

{

2976

bool progress = false;

2977

int next_ip = 0;

2978

2979

/* No MRFs on Gen >= 7. */

2980

if (devinfo->ver >= 7)

2981

return false;

2982

2983

const fs_live_variables &live = live_analysis.require();

2984

2985

foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {

2986

int ip = next_ip;

2987

next_ip++;

2988

2989

if (inst->opcode != BRW_OPCODE_MOV ||

2990

inst->is_partial_write() ||

2991

inst->dst.file != MRF || inst->src[0].file != VGRF ||

2992

inst->dst.type != inst->src[0].type ||

2993

inst->src[0].abs || inst->src[0].negate ||

2994

!inst->src[0].is_contiguous() ||

2995

inst->src[0].offset % REG_SIZE != 0)

2996

continue;

2997

2998

/* Can't compute-to-MRF this GRF if someone else was going to

2999

* read it later.

3000

3001

if (live.vgrf_end[inst->src[0].nr] > ip)

3002

continue;

3003

3004

/* Found a move of a GRF to a MRF. Let's see if we can go rewrite the

3005

* things that computed the value of all GRFs of the source region. The

3006

* regs_left bitset keeps track of the registers we haven't yet found a

3007

* generating instruction for.

3008

3009

unsigned regs_left = (1 << regs_read(inst, 0)) - 1;

3010

3011

foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {

3012

if (regions_overlap(scan_inst->dst, scan_inst->size_written,

3013

inst->src[0], inst->size_read(0))) {

3014

/* Found the last thing to write our reg we want to turn

3015

* into a compute-to-MRF.

3016

3017

3018

/* If this one instruction didn't populate all the

3019

* channels, bail. We might be able to rewrite everything

3020

* that writes that reg, but it would require smarter

3021

* tracking.

3022

3023

if (scan_inst->is_partial_write())

3024

break;

3025

3026

/* Handling things not fully contained in the source of the copy

3027

* would need us to understand coalescing out more than one MOV at

3028

* a time.

3029

3030

if (!region_contained_in(scan_inst->dst, scan_inst->size_written,

3031

inst->src[0], inst->size_read(0)))

3032

break;

3033

3034

/* SEND instructions can't have MRF as a destination. */

3035

if (scan_inst->mlen)

3036

break;

3037

3038

if (devinfo->ver == 6) {

3039

/* gfx6 math instructions must have the destination be

3040

* GRF, so no compute-to-MRF for them.

3041

3042

if (scan_inst->is_math()) {

3043

break;

3044

}

3045

}

3046

3047

/* Clear the bits for any registers this instruction overwrites. */

3048

regs_left &= ~mask_relative_to(

3049

inst->src[0], scan_inst->dst, scan_inst->size_written);

3050

if (!regs_left)

3051

break;

3052

}

3053

3054

/* We don't handle control flow here. Most computation of

3055

* values that end up in MRFs are shortly before the MRF

3056

* write anyway.

3057

3058

if (block->start() == scan_inst)

3059

break;

3060

3061

/* You can't read from an MRF, so if someone else reads our

3062

* MRF's source GRF that we wanted to rewrite, that stops us.

3063

3064

bool interfered = false;

3065

for (int i = 0; i < scan_inst->sources; i++) {

3066

if (regions_overlap(scan_inst->src[i], scan_inst->size_read(i),

3067

inst->src[0], inst->size_read(0))) {

3068

interfered = true;

3069

}

3070

}

3071

if (interfered)

3072

break;

3073

3074

if (regions_overlap(scan_inst->dst, scan_inst->size_written,

3075

inst->dst, inst->size_written)) {

3076

/* If somebody else writes our MRF here, we can't

3077

* compute-to-MRF before that.

3078

3079

break;

3080

}

3081

3082

if (scan_inst->mlen > 0 && scan_inst->base_mrf != -1 &&

3083

regions_overlap(fs_reg(MRF, scan_inst->base_mrf), scan_inst->mlen * REG_SIZE,

3084

inst->dst, inst->size_written)) {

3085

/* Found a SEND instruction, which means that there are

3086

* live values in MRFs from base_mrf to base_mrf +

3087

* scan_inst->mlen - 1. Don't go pushing our MRF write up

3088

* above it.

3089

3090

break;

3091

}

3092

}

3093

3094

if (regs_left)

3095

continue;

3096

3097

/* Found all generating instructions of our MRF's source value, so it

3098

* should be safe to rewrite them to point to the MRF directly.

3099

3100

regs_left = (1 << regs_read(inst, 0)) - 1;

3101

3102

foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {

3103

if (regions_overlap(scan_inst->dst, scan_inst->size_written,

3104

inst->src[0], inst->size_read(0))) {

3105

/* Clear the bits for any registers this instruction overwrites. */

3106

regs_left &= ~mask_relative_to(

3107

inst->src[0], scan_inst->dst, scan_inst->size_written);

3108

3109

const unsigned rel_offset = reg_offset(scan_inst->dst) -

3110

reg_offset(inst->src[0]);

3111

3112

if (inst->dst.nr & BRW_MRF_COMPR4) {

3113

/* Apply the same address transformation done by the hardware

3114

* for COMPR4 MRF writes.

3115

3116

assert(rel_offset < 2 * REG_SIZE);

3117

scan_inst->dst.nr = inst->dst.nr + rel_offset / REG_SIZE * 4;

3118

3119

/* Clear the COMPR4 bit if the generating instruction is not

3120

* compressed.

3121

3122

if (scan_inst->size_written < 2 * REG_SIZE)

3123

scan_inst->dst.nr &= ~BRW_MRF_COMPR4;

3124

3125

} else {

3126

/* Calculate the MRF number the result of this instruction is

3127

* ultimately written to.

3128

3129

scan_inst->dst.nr = inst->dst.nr + rel_offset / REG_SIZE;

3130

}

3131

3132

scan_inst->dst.file = MRF;

3133

scan_inst->dst.offset = inst->dst.offset + rel_offset % REG_SIZE;

3134

scan_inst->saturate |= inst->saturate;

3135

if (!regs_left)

3136

break;

3137

}

3138

}

3139

3140

assert(!regs_left);

3141

inst->remove(block);

3142

progress = true;

3143

}

3144

3145

if (progress)

3146

invalidate_analysis(DEPENDENCY_INSTRUCTIONS);

3147

3148

return progress;

3149

}

3150

3151

/**

3152

* Eliminate FIND_LIVE_CHANNEL instructions occurring outside any control

3153

* flow. We could probably do better here with some form of divergence

3154

* analysis.

3155

3156

bool

3157

fs_visitor::eliminate_find_live_channel()

3158

{

3159

bool progress = false;

3160

unsigned depth = 0;

3161

3162

if (!brw_stage_has_packed_dispatch(devinfo, stage, stage_prog_data)) {

3163

/* The optimization below assumes that channel zero is live on thread

3164

* dispatch, which may not be the case if the fixed function dispatches

3165

* threads sparsely.

3166

3167

return false;

3168

}

3169

3170

foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {

3171

switch (inst->opcode) {

3172

case BRW_OPCODE_IF:

3173

case BRW_OPCODE_DO:

3174

depth++;

3175

break;

3176

3177

case BRW_OPCODE_ENDIF:

3178

case BRW_OPCODE_WHILE:

3179

depth--;

3180

break;

3181

3182

case BRW_OPCODE_HALT:

3183

/* This can potentially make control flow non-uniform until the end

3184

* of the program.

3185

3186

return progress;

3187

3188

case SHADER_OPCODE_FIND_LIVE_CHANNEL:

3189

if (depth == 0) {

3190

inst->opcode = BRW_OPCODE_MOV;

3191

inst->src[0] = brw_imm_ud(0u);

3192

inst->sources = 1;

3193

inst->force_writemask_all = true;

3194

progress = true;

3195

}

3196

break;

3197

3198

default:

3199

break;

3200

}

3201

}

3202

3203

if (progress)

3204

invalidate_analysis(DEPENDENCY_INSTRUCTION_DETAIL);

3205

3206

return progress;

3207

}

3208

3209

/**

3210

* Once we've generated code, try to convert normal FS_OPCODE_FB_WRITE

3211

* instructions to FS_OPCODE_REP_FB_WRITE.

3212

3213

void

3214

fs_visitor::emit_repclear_shader()

3215

{

3216

brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;

3217

int base_mrf = 0;

3218

int color_mrf = base_mrf + 2;

3219

fs_inst *mov;

3220

3221

if (uniforms > 0) {

3222

mov = bld.exec_all().group(4, 0)

3223

.MOV(brw_message_reg(color_mrf),

3224

fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F));

3225

} else {

3226

struct brw_reg reg =

3227

brw_reg(BRW_GENERAL_REGISTER_FILE, 2, 3, 0, 0, BRW_REGISTER_TYPE_UD,

3228

BRW_VERTICAL_STRIDE_8, BRW_WIDTH_2, BRW_HORIZONTAL_STRIDE_4,

3229

BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);

3230

3231

mov = bld.exec_all().group(4, 0)

3232

.MOV(brw_uvec_mrf(4, color_mrf, 0), fs_reg(reg));

3233

}

3234

3235

fs_inst *write = NULL;

3236

if (key->nr_color_regions == 1) {

3237

write = bld.emit(FS_OPCODE_REP_FB_WRITE);

3238

write->saturate = key->clamp_fragment_color;

3239

write->base_mrf = color_mrf;

3240

write->target = 0;

3241

write->header_size = 0;

3242

write->mlen = 1;

3243

} else {

3244

assume(key->nr_color_regions > 0);

3245

3246

struct brw_reg header =

3247

retype(brw_message_reg(base_mrf), BRW_REGISTER_TYPE_UD);

3248

bld.exec_all().group(16, 0)

3249

.MOV(header, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));

3250

3251

for (int i = 0; i < key->nr_color_regions; ++i) {

3252

if (i > 0) {

3253

bld.exec_all().group(1, 0)

3254

.MOV(component(header, 2), brw_imm_ud(i));

3255

}

3256

3257

write = bld.emit(FS_OPCODE_REP_FB_WRITE);

3258

write->saturate = key->clamp_fragment_color;

3259

write->base_mrf = base_mrf;

3260

write->target = i;

3261

write->header_size = 2;

3262

write->mlen = 3;

3263

}

3264

}

3265

write->eot = true;

3266

write->last_rt = true;

3267

3268

calculate_cfg();

3269

3270

assign_constant_locations();

3271

assign_curb_setup();

3272

3273

/* Now that we have the uniform assigned, go ahead and force it to a vec4. */

3274

if (uniforms > 0) {

3275

assert(mov->src[0].file == FIXED_GRF);

3276

mov->src[0] = brw_vec4_grf(mov->src[0].nr, 0);

3277

}

3278

3279

lower_scoreboard();

3280

}

3281

3282

/**

3283

* Walks through basic blocks, looking for repeated MRF writes and

3284

* removing the later ones.

3285

3286

bool

3287

fs_visitor::remove_duplicate_mrf_writes()

3288

{

3289

fs_inst *last_mrf_move[BRW_MAX_MRF(devinfo->ver)];

3290

bool progress = false;

3291

3292

/* Need to update the MRF tracking for compressed instructions. */

3293

if (dispatch_width >= 16)

3294

return false;

3295

3296

memset(last_mrf_move, 0, sizeof(last_mrf_move));

3297

3298

foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {

3299

if (inst->is_control_flow()) {

3300

memset(last_mrf_move, 0, sizeof(last_mrf_move));

3301

}

3302

3303

if (inst->opcode == BRW_OPCODE_MOV &&

3304

inst->dst.file == MRF) {

3305

fs_inst *prev_inst = last_mrf_move[inst->dst.nr];

3306

if (prev_inst && prev_inst->opcode == BRW_OPCODE_MOV &&

3307

inst->dst.equals(prev_inst->dst) &&

3308

inst->src[0].equals(prev_inst->src[0]) &&

3309

inst->saturate == prev_inst->saturate &&

3310

inst->predicate == prev_inst->predicate &&

3311

inst->conditional_mod == prev_inst->conditional_mod &&

3312

inst->exec_size == prev_inst->exec_size) {

3313

inst->remove(block);

3314

progress = true;

3315

continue;

3316

}

3317

}

3318

3319

/* Clear out the last-write records for MRFs that were overwritten. */

3320

if (inst->dst.file == MRF) {

3321

last_mrf_move[inst->dst.nr] = NULL;

3322

}

3323

3324

if (inst->mlen > 0 && inst->base_mrf != -1) {

3325

/* Found a SEND instruction, which will include two or fewer

3326

* implied MRF writes. We could do better here.

3327

3328

for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) {

3329

last_mrf_move[inst->base_mrf + i] = NULL;

3330

}

3331

}

3332

3333

/* Clear out any MRF move records whose sources got overwritten. */

3334

for (unsigned i = 0; i < ARRAY_SIZE(last_mrf_move); i++) {

3335

if (last_mrf_move[i] &&

3336

regions_overlap(inst->dst, inst->size_written,

3337

last_mrf_move[i]->src[0],

3338

last_mrf_move[i]->size_read(0))) {

3339

last_mrf_move[i] = NULL;

3340

}

3341

}

3342

3343

if (inst->opcode == BRW_OPCODE_MOV &&

3344

inst->dst.file == MRF &&

3345

inst->src[0].file != ARF &&

3346

!inst->is_partial_write()) {

3347

last_mrf_move[inst->dst.nr] = inst;

3348

}

3349

}

3350

3351

if (progress)

3352

invalidate_analysis(DEPENDENCY_INSTRUCTIONS);

3353

3354

return progress;

3355

}

3356

3357

/**

3358

* Rounding modes for conversion instructions are included for each

3359

* conversion, but right now it is a state. So once it is set,

3360

* we don't need to call it again for subsequent calls.

3361

3362

* This is useful for vector/matrices conversions, as setting the

3363

* mode once is enough for the full vector/matrix

3364

3365

bool

3366

fs_visitor::remove_extra_rounding_modes()

3367

{

3368

bool progress = false;

3369

unsigned execution_mode = this->nir->info.float_controls_execution_mode;

3370

3371

brw_rnd_mode base_mode = BRW_RND_MODE_UNSPECIFIED;

3372

if ((FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16 |

3373

FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32 |

3374

FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64) &

3375

execution_mode)

3376

base_mode = BRW_RND_MODE_RTNE;

3377

if ((FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16 |

3378

FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32 |

3379

FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64) &

3380

execution_mode)

3381

base_mode = BRW_RND_MODE_RTZ;

3382

3383

foreach_block (block, cfg) {

3384

brw_rnd_mode prev_mode = base_mode;

3385

3386

foreach_inst_in_block_safe (fs_inst, inst, block) {

3387

if (inst->opcode == SHADER_OPCODE_RND_MODE) {

3388

assert(inst->src[0].file == BRW_IMMEDIATE_VALUE);

3389

const brw_rnd_mode mode = (brw_rnd_mode) inst->src[0].d;

3390

if (mode == prev_mode) {

3391

inst->remove(block);

3392

progress = true;

3393

} else {

3394

prev_mode = mode;

3395

}

3396

}

3397

}

3398

}

3399

3400

if (progress)

3401

invalidate_analysis(DEPENDENCY_INSTRUCTIONS);

3402

3403

return progress;

3404

}

3405

3406

static void

3407

clear_deps_for_inst_src(fs_inst *inst, bool *deps, int first_grf, int grf_len)

3408

{

3409

/* Clear the flag for registers that actually got read (as expected). */

3410

for (int i = 0; i < inst->sources; i++) {

3411

int grf;

3412

if (inst->src[i].file == VGRF || inst->src[i].file == FIXED_GRF) {

3413

grf = inst->src[i].nr;

3414

} else {

3415

continue;

3416

}

3417

3418

if (grf >= first_grf &&

3419

grf < first_grf + grf_len) {

3420

deps[grf - first_grf] = false;

3421

if (inst->exec_size == 16)

3422

deps[grf - first_grf + 1] = false;

3423

}

3424

}

3425

}

3426

3427

/**

3428

* Implements this workaround for the original 965:

3429

3430

* "[DevBW, DevCL] Implementation Restrictions: As the hardware does not

3431

* check for post destination dependencies on this instruction, software

3432

* must ensure that there is no destination hazard for the case of ‘write

3433

* followed by a posted write’ shown in the following example.

3434

3435

* 1. mov r3 0

3436

* 2. send r3.xy <rest of send instruction>

3437

* 3. mov r2 r3

3438

3439

* Due to no post-destination dependency check on the ‘send’, the above

3440

* code sequence could have two instructions (1 and 2) in flight at the

3441

* same time that both consider ‘r3’ as the target of their final writes.

3442

3443

void

3444

fs_visitor::insert_gfx4_pre_send_dependency_workarounds(bblock_t *block,

3445

fs_inst *inst)

3446

{

3447

int write_len = regs_written(inst);

3448

int first_write_grf = inst->dst.nr;

3449

bool needs_dep[BRW_MAX_MRF(devinfo->ver)];

3450

assert(write_len < (int)sizeof(needs_dep) - 1);

3451

3452

memset(needs_dep, false, sizeof(needs_dep));

3453

memset(needs_dep, true, write_len);

3454

3455

clear_deps_for_inst_src(inst, needs_dep, first_write_grf, write_len);

3456

3457

/* Walk backwards looking for writes to registers we're writing which

3458

* aren't read since being written. If we hit the start of the program,

3459

* we assume that there are no outstanding dependencies on entry to the

3460

* program.

3461

3462

foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {

3463

/* If we hit control flow, assume that there *are* outstanding

3464

* dependencies, and force their cleanup before our instruction.

3465

3466

if (block->start() == scan_inst && block->num != 0) {

3467

for (int i = 0; i < write_len; i++) {

3468

if (needs_dep[i])

3469

DEP_RESOLVE_MOV(fs_builder(this, block, inst),

3470

first_write_grf + i);

3471

}

3472

return;

3473

}

3474

3475

/* We insert our reads as late as possible on the assumption that any

3476

* instruction but a MOV that might have left us an outstanding

3477

* dependency has more latency than a MOV.

3478

3479

if (scan_inst->dst.file == VGRF) {

3480

for (unsigned i = 0; i < regs_written(scan_inst); i++) {

3481

int reg = scan_inst->dst.nr + i;

3482

3483

if (reg >= first_write_grf &&

3484

reg < first_write_grf + write_len &&

3485

needs_dep[reg - first_write_grf]) {

3486

DEP_RESOLVE_MOV(fs_builder(this, block, inst), reg);

3487

needs_dep[reg - first_write_grf] = false;

3488

if (scan_inst->exec_size == 16)

3489

needs_dep[reg - first_write_grf + 1] = false;

3490

}

3491

}

3492

}

3493

3494

/* Clear the flag for registers that actually got read (as expected). */

3495

clear_deps_for_inst_src(scan_inst, needs_dep, first_write_grf, write_len);

3496

3497

/* Continue the loop only if we haven't resolved all the dependencies */

3498

int i;

3499

for (i = 0; i < write_len; i++) {

3500

if (needs_dep[i])

3501

break;

3502

}

3503

if (i == write_len)

3504

return;

3505

}

3506

}

3507

3508

/**

3509

* Implements this workaround for the original 965:

3510

3511

* "[DevBW, DevCL] Errata: A destination register from a send can not be

3512

* used as a destination register until after it has been sourced by an

3513

* instruction with a different destination register.

3514

3515

void

3516

fs_visitor::insert_gfx4_post_send_dependency_workarounds(bblock_t *block, fs_inst *inst)

3517

{

3518

int write_len = regs_written(inst);

3519

unsigned first_write_grf = inst->dst.nr;

3520

bool needs_dep[BRW_MAX_MRF(devinfo->ver)];

3521

assert(write_len < (int)sizeof(needs_dep) - 1);

3522

3523

memset(needs_dep, false, sizeof(needs_dep));

3524

memset(needs_dep, true, write_len);

3525

/* Walk forwards looking for writes to registers we're writing which aren't

3526

* read before being written.

3527

3528

foreach_inst_in_block_starting_from(fs_inst, scan_inst, inst) {

3529

/* If we hit control flow, force resolve all remaining dependencies. */

3530

if (block->end() == scan_inst && block->num != cfg->num_blocks - 1) {

3531

for (int i = 0; i < write_len; i++) {

3532

if (needs_dep[i])

3533

DEP_RESOLVE_MOV(fs_builder(this, block, scan_inst),

3534

first_write_grf + i);

3535

}

3536

return;

3537

}

3538

3539

/* Clear the flag for registers that actually got read (as expected). */

3540

clear_deps_for_inst_src(scan_inst, needs_dep, first_write_grf, write_len);

3541

3542

/* We insert our reads as late as possible since they're reading the

3543

* result of a SEND, which has massive latency.

3544

3545

if (scan_inst->dst.file == VGRF &&

3546

scan_inst->dst.nr >= first_write_grf &&

3547

scan_inst->dst.nr < first_write_grf + write_len &&

3548

needs_dep[scan_inst->dst.nr - first_write_grf]) {

3549

DEP_RESOLVE_MOV(fs_builder(this, block, scan_inst),

3550

scan_inst->dst.nr);

3551

needs_dep[scan_inst->dst.nr - first_write_grf] = false;

3552

}

3553

3554

/* Continue the loop only if we haven't resolved all the dependencies */

3555

int i;

3556

for (i = 0; i < write_len; i++) {

3557

if (needs_dep[i])

3558

break;

3559

}

3560

if (i == write_len)

3561

return;

3562

}

3563

}

3564

3565

void

3566

fs_visitor::insert_gfx4_send_dependency_workarounds()

3567

{

3568

if (devinfo->ver != 4 || devinfo->platform == INTEL_PLATFORM_G4X)

3569

return;

3570

3571

bool progress = false;

3572

3573

foreach_block_and_inst(block, fs_inst, inst, cfg) {

3574

if (inst->mlen != 0 && inst->dst.file == VGRF) {

3575

insert_gfx4_pre_send_dependency_workarounds(block, inst);

3576

insert_gfx4_post_send_dependency_workarounds(block, inst);

3577

progress = true;

3578

}

3579

}

3580

3581

if (progress)

3582

invalidate_analysis(DEPENDENCY_INSTRUCTIONS);

3583

}

3584

3585

/**

3586

* Turns the generic expression-style uniform pull constant load instruction

3587

* into a hardware-specific series of instructions for loading a pull

3588

* constant.

3589

3590

* The expression style allows the CSE pass before this to optimize out

3591

* repeated loads from the same offset, and gives the pre-register-allocation

3592

* scheduling full flexibility, while the conversion to native instructions

3593

* allows the post-register-allocation scheduler the best information

3594

* possible.

3595

3596

* Note that execution masking for setting up pull constant loads is special:

3597

* the channels that need to be written are unrelated to the current execution

3598

* mask, since a later instruction will use one of the result channels as a

3599

* source operand for all 8 or 16 of its channels.

3600

3601

void

3602

fs_visitor::lower_uniform_pull_constant_loads()

3603

{

3604

foreach_block_and_inst (block, fs_inst, inst, cfg) {

3605

if (inst->opcode != FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD)

3606

continue;

3607

3608

const fs_reg& surface = inst->src[0];

3609

const fs_reg& offset_B = inst->src[1];

3610

assert(offset_B.file == IMM);

3611

3612

if (devinfo->has_lsc) {

3613

const fs_builder ubld =

3614

fs_builder(this, block, inst).group(8, 0).exec_all();

3615

3616

const fs_reg payload = ubld.vgrf(BRW_REGISTER_TYPE_UD);

3617

ubld.MOV(payload, offset_B);

3618

3619

inst->sfid = GFX12_SFID_UGM;

3620

inst->desc = lsc_msg_desc(devinfo, LSC_OP_LOAD,

3621

1 /* simd_size */,

3622

LSC_ADDR_SURFTYPE_BTI,

3623

LSC_ADDR_SIZE_A32,

3624

1 /* num_coordinates */,

3625

LSC_DATA_SIZE_D32,

3626

inst->size_written / 4,

3627

true /* transpose */,

3628

LSC_CACHE_LOAD_L1STATE_L3MOCS,

3629

true /* has_dest */);

3630

3631

fs_reg ex_desc;

3632

if (surface.file == IMM) {

3633

ex_desc = brw_imm_ud(lsc_bti_ex_desc(devinfo, surface.ud));

3634

} else {

3635

/* We only need the first component for the payload so we can use

3636

* one of the other components for the extended descriptor

3637

3638

ex_desc = component(payload, 1);

3639

ubld.group(1, 0).SHL(ex_desc, surface, brw_imm_ud(24));

3640

}

3641

3642

/* Update the original instruction. */

3643

inst->opcode = SHADER_OPCODE_SEND;

3644

inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc);

3645

inst->ex_mlen = 0;

3646

inst->header_size = 0;

3647

inst->send_has_side_effects = false;

3648

inst->send_is_volatile = true;

3649

inst->exec_size = 1;

3650

3651

/* Finally, the payload */

3652

inst->resize_sources(3);

3653

inst->src[0] = brw_imm_ud(0); /* desc */

3654

inst->src[1] = ex_desc;

3655

inst->src[2] = payload;

3656

3657

invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);

3658

} else if (devinfo->ver >= 7) {

3659

const fs_builder ubld = fs_builder(this, block, inst).exec_all();

3660

const fs_reg payload = ubld.group(8, 0).vgrf(BRW_REGISTER_TYPE_UD);

3661

3662

ubld.group(8, 0).MOV(payload,

3663

retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));

3664

ubld.group(1, 0).MOV(component(payload, 2),

3665

brw_imm_ud(offset_B.ud / 16));

3666

3667

inst->opcode = FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GFX7;

3668

inst->src[1] = payload;

3669

inst->header_size = 1;

3670

inst->mlen = 1;

3671

3672

invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);

3673

} else {

3674

/* Before register allocation, we didn't tell the scheduler about the

3675

* MRF we use. We know it's safe to use this MRF because nothing

3676

* else does except for register spill/unspill, which generates and

3677

* uses its MRF within a single IR instruction.

3678

3679

inst->base_mrf = FIRST_PULL_LOAD_MRF(devinfo->ver) + 1;

3680

inst->mlen = 1;

3681

}

3682

}

3683

}

3684

3685

bool

3686

fs_visitor::lower_load_payload()

3687

{

3688

bool progress = false;

3689

3690

foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {

3691

if (inst->opcode != SHADER_OPCODE_LOAD_PAYLOAD)

3692

continue;

3693

3694

assert(inst->dst.file == MRF || inst->dst.file == VGRF);

3695

assert(inst->saturate == false);

3696

fs_reg dst = inst->dst;

3697

3698

/* Get rid of COMPR4. We'll add it back in if we need it */

3699

if (dst.file == MRF)

3700

dst.nr = dst.nr & ~BRW_MRF_COMPR4;

3701

3702

const fs_builder ibld(this, block, inst);

3703

const fs_builder ubld = ibld.exec_all();

3704

3705

for (uint8_t i = 0; i < inst->header_size;) {

3706

/* Number of header GRFs to initialize at once with a single MOV

3707

* instruction.

3708

3709

const unsigned n =

3710

(i + 1 < inst->header_size && inst->src[i].stride == 1 &&

3711

inst->src[i + 1].equals(byte_offset(inst->src[i], REG_SIZE))) ?

3712

2 : 1;

3713

3714

if (inst->src[i].file != BAD_FILE)

3715

ubld.group(8 * n, 0).MOV(retype(dst, BRW_REGISTER_TYPE_UD),

3716

retype(inst->src[i], BRW_REGISTER_TYPE_UD));

3717

3718

dst = byte_offset(dst, n * REG_SIZE);

3719

i += n;

3720

}

3721

3722

if (inst->dst.file == MRF && (inst->dst.nr & BRW_MRF_COMPR4) &&

3723

inst->exec_size > 8) {

3724

/* In this case, the payload portion of the LOAD_PAYLOAD isn't

3725

* a straightforward copy. Instead, the result of the

3726

* LOAD_PAYLOAD is treated as interleaved and the first four

3727

* non-header sources are unpacked as:

3728

3729

* m + 0: r0

3730

* m + 1: g0

3731

* m + 2: b0

3732

* m + 3: a0

3733

* m + 4: r1

3734

* m + 5: g1

3735

* m + 6: b1

3736

* m + 7: a1

3737

3738

* This is used for gen <= 5 fb writes.

3739

3740

assert(inst->exec_size == 16);

3741

assert(inst->header_size + 4 <= inst->sources);

3742

for (uint8_t i = inst->header_size; i < inst->header_size + 4; i++) {

3743

if (inst->src[i].file != BAD_FILE) {

3744

if (devinfo->has_compr4) {

3745

fs_reg compr4_dst = retype(dst, inst->src[i].type);

3746

compr4_dst.nr |= BRW_MRF_COMPR4;

3747

ibld.MOV(compr4_dst, inst->src[i]);

3748

} else {

3749

/* Platform doesn't have COMPR4. We have to fake it */

3750

fs_reg mov_dst = retype(dst, inst->src[i].type);

3751

ibld.quarter(0).MOV(mov_dst, quarter(inst->src[i], 0));

3752

mov_dst.nr += 4;

3753

ibld.quarter(1).MOV(mov_dst, quarter(inst->src[i], 1));

3754

}

3755

}

3756

3757

dst.nr++;

3758

}

3759

3760

/* The loop above only ever incremented us through the first set

3761

* of 4 registers. However, thanks to the magic of COMPR4, we

3762

* actually wrote to the first 8 registers, so we need to take

3763

* that into account now.

3764

3765

dst.nr += 4;

3766

3767

/* The COMPR4 code took care of the first 4 sources. We'll let

3768

* the regular path handle any remaining sources. Yes, we are

3769

* modifying the instruction but we're about to delete it so

3770

* this really doesn't hurt anything.

3771

3772

inst->header_size += 4;

3773

}

3774

3775

for (uint8_t i = inst->header_size; i < inst->sources; i++) {

3776

dst.type = inst->src[i].type;

3777

if (inst->src[i].file != BAD_FILE) {

3778

ibld.MOV(dst, inst->src[i]);

3779

}

3780

dst = offset(dst, ibld, 1);

3781

}

3782

3783

inst->remove(block);

3784

progress = true;

3785

}

3786

3787

if (progress)

3788

invalidate_analysis(DEPENDENCY_INSTRUCTIONS);

3789

3790

return progress;

3791

}

3792

3793

void

3794

fs_visitor::lower_mul_dword_inst(fs_inst *inst, bblock_t *block)

3795

{

3796

const fs_builder ibld(this, block, inst);

3797

3798

const bool ud = (inst->src[1].type == BRW_REGISTER_TYPE_UD);

3799

if (inst->src[1].file == IMM &&

3800

(( ud && inst->src[1].ud <= UINT16_MAX) ||

3801

(!ud && inst->src[1].d <= INT16_MAX && inst->src[1].d >= INT16_MIN))) {

3802

/* The MUL instruction isn't commutative. On Gen <= 6, only the low

3803

* 16-bits of src0 are read, and on Gen >= 7 only the low 16-bits of

3804

* src1 are used.

3805

3806

* If multiplying by an immediate value that fits in 16-bits, do a

3807

* single MUL instruction with that value in the proper location.

3808

3809

if (devinfo->ver < 7) {

3810

fs_reg imm(VGRF, alloc.allocate(dispatch_width / 8), inst->dst.type);

3811

ibld.MOV(imm, inst->src[1]);

3812

ibld.MUL(inst->dst, imm, inst->src[0]);

3813

} else {

3814

ibld.MUL(inst->dst, inst->src[0],

3815

ud ? brw_imm_uw(inst->src[1].ud)

3816

: brw_imm_w(inst->src[1].d));

3817

}

3818

} else {

3819

/* Gen < 8 (and some Gfx8+ low-power parts like Cherryview) cannot

3820

* do 32-bit integer multiplication in one instruction, but instead

3821

* must do a sequence (which actually calculates a 64-bit result):

3822

3823

* mul(8) acc0<1>D g3<8,8,1>D g4<8,8,1>D

3824

* mach(8) null g3<8,8,1>D g4<8,8,1>D

3825

* mov(8) g2<1>D acc0<8,8,1>D

3826

3827

* But on Gen > 6, the ability to use second accumulator register

3828

* (acc1) for non-float data types was removed, preventing a simple

3829

* implementation in SIMD16. A 16-channel result can be calculated by

3830

* executing the three instructions twice in SIMD8, once with quarter

3831

* control of 1Q for the first eight channels and again with 2Q for

3832

* the second eight channels.

3833

3834

* Which accumulator register is implicitly accessed (by AccWrEnable

3835

* for instance) is determined by the quarter control. Unfortunately

3836

* Ivybridge (and presumably Baytrail) has a hardware bug in which an

3837

* implicit accumulator access by an instruction with 2Q will access

3838

* acc1 regardless of whether the data type is usable in acc1.

3839

3840

* Specifically, the 2Q mach(8) writes acc1 which does not exist for

3841

* integer data types.

3842

3843

* Since we only want the low 32-bits of the result, we can do two

3844

* 32-bit x 16-bit multiplies (like the mul and mach are doing), and

3845

* adjust the high result and add them (like the mach is doing):

3846

3847

* mul(8) g7<1>D g3<8,8,1>D g4.0<8,8,1>UW

3848

* mul(8) g8<1>D g3<8,8,1>D g4.1<8,8,1>UW

3849

* shl(8) g9<1>D g8<8,8,1>D 16D

3850

* add(8) g2<1>D g7<8,8,1>D g8<8,8,1>D

3851

3852

* We avoid the shl instruction by realizing that we only want to add

3853

* the low 16-bits of the "high" result to the high 16-bits of the

3854

* "low" result and using proper regioning on the add:

3855

3856

* mul(8) g7<1>D g3<8,8,1>D g4.0<16,8,2>UW

3857

* mul(8) g8<1>D g3<8,8,1>D g4.1<16,8,2>UW

3858

* add(8) g7.1<2>UW g7.1<16,8,2>UW g8<16,8,2>UW

3859

3860

* Since it does not use the (single) accumulator register, we can

3861

* schedule multi-component multiplications much better.

3862

3863

3864

bool needs_mov = false;

3865

fs_reg orig_dst = inst->dst;

3866

3867

/* Get a new VGRF for the "low" 32x16-bit multiplication result if

3868

* reusing the original destination is impossible due to hardware

3869

* restrictions, source/destination overlap, or it being the null

3870

* register.

3871

3872

fs_reg low = inst->dst;

3873

if (orig_dst.is_null() || orig_dst.file == MRF ||

3874

regions_overlap(inst->dst, inst->size_written,

3875

inst->src[0], inst->size_read(0)) ||

3876

regions_overlap(inst->dst, inst->size_written,

3877

inst->src[1], inst->size_read(1)) ||

3878

inst->dst.stride >= 4) {

3879

needs_mov = true;

3880

low = fs_reg(VGRF, alloc.allocate(regs_written(inst)),

3881

inst->dst.type);

3882

}

3883

3884

/* Get a new VGRF but keep the same stride as inst->dst */

3885

fs_reg high(VGRF, alloc.allocate(regs_written(inst)), inst->dst.type);

3886

high.stride = inst->dst.stride;

3887

high.offset = inst->dst.offset % REG_SIZE;

3888

3889

if (devinfo->ver >= 7) {

3890

/* From Wa_1604601757:

3891

3892

* "When multiplying a DW and any lower precision integer, source modifier

3893

* is not supported."

3894

3895

* An unsupported negate modifier on src[1] would ordinarily be

3896

* lowered by the subsequent lower_regioning pass. In this case that

3897

* pass would spawn another dword multiply. Instead, lower the

3898

* modifier first.

3899

3900

const bool source_mods_unsupported = (devinfo->ver >= 12);

3901

3902

if (inst->src[1].abs || (inst->src[1].negate &&

3903

source_mods_unsupported))

3904

lower_src_modifiers(this, block, inst, 1);

3905

3906

if (inst->src[1].file == IMM) {

3907

ibld.MUL(low, inst->src[0],

3908

brw_imm_uw(inst->src[1].ud & 0xffff));

3909

ibld.MUL(high, inst->src[0],

3910

brw_imm_uw(inst->src[1].ud >> 16));

3911

} else {

3912

ibld.MUL(low, inst->src[0],

3913

subscript(inst->src[1], BRW_REGISTER_TYPE_UW, 0));

3914

ibld.MUL(high, inst->src[0],

3915

subscript(inst->src[1], BRW_REGISTER_TYPE_UW, 1));

3916

}

3917

} else {

3918

if (inst->src[0].abs)

3919

lower_src_modifiers(this, block, inst, 0);

3920

3921

ibld.MUL(low, subscript(inst->src[0], BRW_REGISTER_TYPE_UW, 0),

3922

inst->src[1]);

3923

ibld.MUL(high, subscript(inst->src[0], BRW_REGISTER_TYPE_UW, 1),

3924

inst->src[1]);

3925

}

3926

3927

ibld.ADD(subscript(low, BRW_REGISTER_TYPE_UW, 1),

3928

subscript(low, BRW_REGISTER_TYPE_UW, 1),

3929

subscript(high, BRW_REGISTER_TYPE_UW, 0));

3930

3931

if (needs_mov || inst->conditional_mod)

3932

set_condmod(inst->conditional_mod, ibld.MOV(orig_dst, low));

3933

}

3934

}

3935

3936

void

3937

fs_visitor::lower_mul_qword_inst(fs_inst *inst, bblock_t *block)

3938

{

3939

const fs_builder ibld(this, block, inst);

3940

3941

/* Considering two 64-bit integers ab and cd where each letter ab

3942

* corresponds to 32 bits, we get a 128-bit result WXYZ. We * cd

3943

* only need to provide the YZ part of the result. -------

3944

* BD

3945

* Only BD needs to be 64 bits. For AD and BC we only care + AD

3946

* about the lower 32 bits (since they are part of the upper + BC

3947

* 32 bits of our result). AC is not needed since it starts + AC

3948

* on the 65th bit of the result. -------

3949

* WXYZ

3950

3951

unsigned int q_regs = regs_written(inst);

3952

unsigned int d_regs = (q_regs + 1) / 2;

3953

3954

fs_reg bd(VGRF, alloc.allocate(q_regs), BRW_REGISTER_TYPE_UQ);

3955

fs_reg ad(VGRF, alloc.allocate(d_regs), BRW_REGISTER_TYPE_UD);

3956

fs_reg bc(VGRF, alloc.allocate(d_regs), BRW_REGISTER_TYPE_UD);

3957

3958

/* Here we need the full 64 bit result for 32b * 32b. */

3959

if (devinfo->has_integer_dword_mul) {

3960

ibld.MUL(bd, subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0),

3961

subscript(inst->src[1], BRW_REGISTER_TYPE_UD, 0));

3962

} else {

3963

fs_reg bd_high(VGRF, alloc.allocate(d_regs), BRW_REGISTER_TYPE_UD);

3964

fs_reg bd_low(VGRF, alloc.allocate(d_regs), BRW_REGISTER_TYPE_UD);

3965

fs_reg acc = retype(brw_acc_reg(inst->exec_size), BRW_REGISTER_TYPE_UD);

3966

3967

fs_inst *mul = ibld.MUL(acc,

3968

subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0),

3969

subscript(inst->src[1], BRW_REGISTER_TYPE_UW, 0));

3970

mul->writes_accumulator = true;

3971

3972

ibld.MACH(bd_high, subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0),

3973

subscript(inst->src[1], BRW_REGISTER_TYPE_UD, 0));

3974

ibld.MOV(bd_low, acc);

3975

3976

ibld.MOV(subscript(bd, BRW_REGISTER_TYPE_UD, 0), bd_low);

3977

ibld.MOV(subscript(bd, BRW_REGISTER_TYPE_UD, 1), bd_high);

3978

}

3979

3980

ibld.MUL(ad, subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 1),

3981

subscript(inst->src[1], BRW_REGISTER_TYPE_UD, 0));

3982

ibld.MUL(bc, subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0),

3983

subscript(inst->src[1], BRW_REGISTER_TYPE_UD, 1));

3984

3985

ibld.ADD(ad, ad, bc);

3986

ibld.ADD(subscript(bd, BRW_REGISTER_TYPE_UD, 1),

3987

subscript(bd, BRW_REGISTER_TYPE_UD, 1), ad);

3988

3989

if (devinfo->has_64bit_int) {

3990

ibld.MOV(inst->dst, bd);

3991

} else {

3992

ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0),

3993

subscript(bd, BRW_REGISTER_TYPE_UD, 0));

3994

ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 1),

3995

subscript(bd, BRW_REGISTER_TYPE_UD, 1));

3996

}

3997

}

3998

3999

void

4000

fs_visitor::lower_mulh_inst(fs_inst *inst, bblock_t *block)

4001

{

4002

const fs_builder ibld(this, block, inst);

4003

4004

/* According to the BDW+ BSpec page for the "Multiply Accumulate

4005

* High" instruction:

4006

4007

* "An added preliminary mov is required for source modification on

4008

* src1:

4009

* mov (8) r3.0<1>:d -r3<8;8,1>:d

4010

* mul (8) acc0:d r2.0<8;8,1>:d r3.0<16;8,2>:uw

4011

* mach (8) r5.0<1>:d r2.0<8;8,1>:d r3.0<8;8,1>:d"

4012

4013

if (devinfo->ver >= 8 && (inst->src[1].negate || inst->src[1].abs))

4014

lower_src_modifiers(this, block, inst, 1);

4015

4016

/* Should have been lowered to 8-wide. */

4017

assert(inst->exec_size <= get_lowered_simd_width(devinfo, inst));

4018

const fs_reg acc = retype(brw_acc_reg(inst->exec_size), inst->dst.type);

4019

fs_inst *mul = ibld.MUL(acc, inst->src[0], inst->src[1]);

4020

fs_inst *mach = ibld.MACH(inst->dst, inst->src[0], inst->src[1]);

4021

4022

if (devinfo->ver >= 8) {

4023

/* Until Gfx8, integer multiplies read 32-bits from one source,

4024

* and 16-bits from the other, and relying on the MACH instruction

4025

* to generate the high bits of the result.

4026

4027

* On Gfx8, the multiply instruction does a full 32x32-bit

4028

* multiply, but in order to do a 64-bit multiply we can simulate

4029

* the previous behavior and then use a MACH instruction.

4030

4031

assert(mul->src[1].type == BRW_REGISTER_TYPE_D ||

4032

mul->src[1].type == BRW_REGISTER_TYPE_UD);

4033

mul->src[1].type = BRW_REGISTER_TYPE_UW;

4034

mul->src[1].stride *= 2;

4035

4036

if (mul->src[1].file == IMM) {

4037

mul->src[1] = brw_imm_uw(mul->src[1].ud);

4038

}

4039

} else if (devinfo->verx10 == 70 &&

4040

inst->group > 0) {

4041

/* Among other things the quarter control bits influence which

4042

* accumulator register is used by the hardware for instructions

4043

* that access the accumulator implicitly (e.g. MACH). A

4044

* second-half instruction would normally map to acc1, which

4045

* doesn't exist on Gfx7 and up (the hardware does emulate it for

4046

* floating-point instructions *only* by taking advantage of the

4047

* extra precision of acc0 not normally used for floating point

4048

* arithmetic).

4049

4050

* HSW and up are careful enough not to try to access an

4051

* accumulator register that doesn't exist, but on earlier Gfx7

4052

* hardware we need to make sure that the quarter control bits are

4053

* zero to avoid non-deterministic behaviour and emit an extra MOV

4054

* to get the result masked correctly according to the current

4055

* channel enables.

4056

4057

mach->group = 0;

4058

mach->force_writemask_all = true;

4059

mach->dst = ibld.vgrf(inst->dst.type);

4060

ibld.MOV(inst->dst, mach->dst);

4061

}

4062

}

4063

4064

bool

4065

fs_visitor::lower_integer_multiplication()

4066

{

4067

bool progress = false;

4068

4069

foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {

4070

if (inst->opcode == BRW_OPCODE_MUL) {

4071

/* If the instruction is already in a form that does not need lowering,

4072

* return early.

4073

4074

if (devinfo->ver >= 7) {

4075

if (type_sz(inst->src[1].type) < 4 && type_sz(inst->src[0].type) <= 4)

4076

continue;

4077

} else {

4078

if (type_sz(inst->src[0].type) < 4 && type_sz(inst->src[1].type) <= 4)

4079

continue;

4080

}

4081

4082

if ((inst->dst.type == BRW_REGISTER_TYPE_Q ||

4083

inst->dst.type == BRW_REGISTER_TYPE_UQ) &&

4084

(inst->src[0].type == BRW_REGISTER_TYPE_Q ||

4085

inst->src[0].type == BRW_REGISTER_TYPE_UQ) &&

4086

(inst->src[1].type == BRW_REGISTER_TYPE_Q ||

4087

inst->src[1].type == BRW_REGISTER_TYPE_UQ)) {

4088

lower_mul_qword_inst(inst, block);

4089

inst->remove(block);

4090

progress = true;

4091

} else if (!inst->dst.is_accumulator() &&

4092

(inst->dst.type == BRW_REGISTER_TYPE_D ||

4093

inst->dst.type == BRW_REGISTER_TYPE_UD) &&

4094

(!devinfo->has_integer_dword_mul ||

4095

devinfo->verx10 >= 125)) {

4096

lower_mul_dword_inst(inst, block);

4097

inst->remove(block);

4098

progress = true;

4099

}

4100

} else if (inst->opcode == SHADER_OPCODE_MULH) {

4101

lower_mulh_inst(inst, block);

4102

inst->remove(block);

4103

progress = true;

4104

}

4105

4106

}

4107

4108

if (progress)

4109

invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);

4110

4111

return progress;

4112

}

4113

4114

bool

4115

fs_visitor::lower_minmax()

4116

{

4117

assert(devinfo->ver < 6);

4118

4119

bool progress = false;

4120

4121

foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {

4122

const fs_builder ibld(this, block, inst);

4123

4124

if (inst->opcode == BRW_OPCODE_SEL &&

4125

inst->predicate == BRW_PREDICATE_NONE) {

4126

/* If src1 is an immediate value that is not NaN, then it can't be

4127

* NaN. In that case, emit CMP because it is much better for cmod

4128

* propagation. Likewise if src1 is not float. Gfx4 and Gfx5 don't

4129

* support HF or DF, so it is not necessary to check for those.

4130

4131

if (inst->src[1].type != BRW_REGISTER_TYPE_F ||

4132

(inst->src[1].file == IMM && !isnan(inst->src[1].f))) {

4133

ibld.CMP(ibld.null_reg_d(), inst->src[0], inst->src[1],

4134

inst->conditional_mod);

4135

} else {

4136

ibld.CMPN(ibld.null_reg_d(), inst->src[0], inst->src[1],

4137

inst->conditional_mod);

4138

}

4139

inst->predicate = BRW_PREDICATE_NORMAL;

4140

inst->conditional_mod = BRW_CONDITIONAL_NONE;

4141

4142

progress = true;

4143

}

4144

}

4145

4146

if (progress)

4147

invalidate_analysis(DEPENDENCY_INSTRUCTIONS);

4148

4149

return progress;

4150

}

4151

4152

bool

4153

fs_visitor::lower_sub_sat()

4154

{

4155

bool progress = false;

4156

4157

foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {

4158

const fs_builder ibld(this, block, inst);

4159

4160

if (inst->opcode == SHADER_OPCODE_USUB_SAT ||

4161

inst->opcode == SHADER_OPCODE_ISUB_SAT) {

4162

/* The fundamental problem is the hardware performs source negation

4163

* at the bit width of the source. If the source is 0x80000000D, the

4164

* negation is 0x80000000D. As a result, subtractSaturate(0,

4165

* 0x80000000) will produce 0x80000000 instead of 0x7fffffff. There

4166

* are at least three ways to resolve this:

4167

4168

* 1. Use the accumulator for the negated source. The accumulator is

4169

* 33 bits, so our source 0x80000000 is sign-extended to

4170

* 0x1800000000. The negation of which is 0x080000000. This

4171

* doesn't help for 64-bit integers (which are already bigger than

4172

* 33 bits). There are also only 8 accumulators, so SIMD16 or

4173

* SIMD32 instructions would have to be split into multiple SIMD8

4174

* instructions.

4175

4176

* 2. Use slightly different math. For any n-bit value x, we know (x

4177

* >> 1) != -(x >> 1). We can use this fact to only do

4178

* subtractions involving (x >> 1). subtractSaturate(a, b) ==

4179

* subtractSaturate(subtractSaturate(a, (b >> 1)), b - (b >> 1)).

4180

4181

* 3. For unsigned sources, it is sufficient to replace the

4182

* subtractSaturate with (a > b) ? a - b : 0.

4183

4184

* It may also be possible to use the SUBB instruction. This

4185

* implicitly writes the accumulator, so it could only be used in the

4186

* same situations as #1 above. It is further limited by only

4187

* allowing UD sources.

4188

4189

if (inst->exec_size == 8 && inst->src[0].type != BRW_REGISTER_TYPE_Q &&

4190

inst->src[0].type != BRW_REGISTER_TYPE_UQ) {

4191

fs_reg acc(ARF, BRW_ARF_ACCUMULATOR, inst->src[1].type);

4192

4193

ibld.MOV(acc, inst->src[1]);

4194

fs_inst *add = ibld.ADD(inst->dst, acc, inst->src[0]);

4195

add->saturate = true;

4196

add->src[0].negate = true;

4197

} else if (inst->opcode == SHADER_OPCODE_ISUB_SAT) {

4198

/* tmp = src1 >> 1;

4199

* dst = add.sat(add.sat(src0, -tmp), -(src1 - tmp));

4200

4201

fs_reg tmp1 = ibld.vgrf(inst->src[0].type);

4202

fs_reg tmp2 = ibld.vgrf(inst->src[0].type);

4203

fs_reg tmp3 = ibld.vgrf(inst->src[0].type);

4204

fs_inst *add;

4205

4206

ibld.SHR(tmp1, inst->src[1], brw_imm_d(1));

4207

4208

add = ibld.ADD(tmp2, inst->src[1], tmp1);

4209

add->src[1].negate = true;

4210

4211

add = ibld.ADD(tmp3, inst->src[0], tmp1);

4212

add->src[1].negate = true;

4213

add->saturate = true;

4214

4215

add = ibld.ADD(inst->dst, tmp3, tmp2);

4216

add->src[1].negate = true;

4217

add->saturate = true;

4218

} else {

4219

/* a > b ? a - b : 0 */

4220

ibld.CMP(ibld.null_reg_d(), inst->src[0], inst->src[1],

4221

BRW_CONDITIONAL_G);

4222

4223

fs_inst *add = ibld.ADD(inst->dst, inst->src[0], inst->src[1]);

4224

add->src[1].negate = !add->src[1].negate;

4225

4226

ibld.SEL(inst->dst, inst->dst, brw_imm_ud(0))

4227

->predicate = BRW_PREDICATE_NORMAL;

4228

}

4229

4230

inst->remove(block);

4231

progress = true;

4232

}

4233

}

4234

4235

if (progress)

4236

invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);

4237

4238

return progress;

4239

}

4240

4241

/**

4242

* Get the mask of SIMD channels enabled during dispatch and not yet disabled

4243

* by discard. Due to the layout of the sample mask in the fragment shader

4244

* thread payload, \p bld is required to have a dispatch_width() not greater

4245

* than 16 for fragment shaders.

4246

4247

static fs_reg

4248

sample_mask_reg(const fs_builder &bld)

4249

{

4250

const fs_visitor *v = static_cast<const fs_visitor *>(bld.shader);

4251

4252

if (v->stage != MESA_SHADER_FRAGMENT) {

4253

return brw_imm_ud(0xffffffff);

4254

} else if (brw_wm_prog_data(v->stage_prog_data)->uses_kill) {

4255

assert(bld.dispatch_width() <= 16);

4256

return brw_flag_subreg(sample_mask_flag_subreg(v) + bld.group() / 16);

4257

} else {

4258

assert(v->devinfo->ver >= 6 && bld.dispatch_width() <= 16);

4259

return retype(brw_vec1_grf((bld.group() >= 16 ? 2 : 1), 7),

4260

BRW_REGISTER_TYPE_UW);

4261

}

4262

}

4263

4264

static void

4265

setup_color_payload(const fs_builder &bld, const brw_wm_prog_key *key,

4266

fs_reg *dst, fs_reg color, unsigned components)

4267

{

4268

if (key->clamp_fragment_color) {

4269

fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F, 4);

4270

assert(color.type == BRW_REGISTER_TYPE_F);

4271

4272

for (unsigned i = 0; i < components; i++)

4273

set_saturate(true,

4274

bld.MOV(offset(tmp, bld, i), offset(color, bld, i)));

4275

4276

color = tmp;

4277

}

4278

4279

for (unsigned i = 0; i < components; i++)

4280

dst[i] = offset(color, bld, i);

4281

}

4282

4283

uint32_t

4284

brw_fb_write_msg_control(const fs_inst *inst,

4285

const struct brw_wm_prog_data *prog_data)

4286

{

4287

uint32_t mctl;

4288

4289

if (inst->opcode == FS_OPCODE_REP_FB_WRITE) {

4290

assert(inst->group == 0 && inst->exec_size == 16);

4291

mctl = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED;

4292

} else if (prog_data->dual_src_blend) {

4293

assert(inst->exec_size == 8);

4294

4295

if (inst->group % 16 == 0)

4296

mctl = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01;

4297

else if (inst->group % 16 == 8)

4298

mctl = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23;

4299

else

4300

unreachable("Invalid dual-source FB write instruction group");

4301

} else {

4302

assert(inst->group == 0 || (inst->group == 16 && inst->exec_size == 16));

4303

4304

if (inst->exec_size == 16)

4305

mctl = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;

4306

else if (inst->exec_size == 8)

4307

mctl = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;

4308

else

4309

unreachable("Invalid FB write execution size");

4310

}

4311

4312

return mctl;

4313

}

4314

4315

static void

4316

lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,

4317

const struct brw_wm_prog_data *prog_data,

4318

const brw_wm_prog_key *key,

4319

const fs_visitor::thread_payload &payload)

4320

{

4321

assert(inst->src[FB_WRITE_LOGICAL_SRC_COMPONENTS].file == IMM);

4322

const intel_device_info *devinfo = bld.shader->devinfo;

4323

const fs_reg &color0 = inst->src[FB_WRITE_LOGICAL_SRC_COLOR0];

4324

const fs_reg &color1 = inst->src[FB_WRITE_LOGICAL_SRC_COLOR1];

4325

const fs_reg &src0_alpha = inst->src[FB_WRITE_LOGICAL_SRC_SRC0_ALPHA];

4326

const fs_reg &src_depth = inst->src[FB_WRITE_LOGICAL_SRC_SRC_DEPTH];

4327

const fs_reg &dst_depth = inst->src[FB_WRITE_LOGICAL_SRC_DST_DEPTH];

4328

const fs_reg &src_stencil = inst->src[FB_WRITE_LOGICAL_SRC_SRC_STENCIL];

4329

fs_reg sample_mask = inst->src[FB_WRITE_LOGICAL_SRC_OMASK];

4330

const unsigned components =

4331

inst->src[FB_WRITE_LOGICAL_SRC_COMPONENTS].ud;

4332

4333

assert(inst->target != 0 || src0_alpha.file == BAD_FILE);

4334

4335

/* We can potentially have a message length of up to 15, so we have to set

4336

* base_mrf to either 0 or 1 in order to fit in m0..m15.

4337

4338

fs_reg sources[15];

4339

int header_size = 2, payload_header_size;

4340

unsigned length = 0;

4341

4342

if (devinfo->ver < 6) {

4343

/* TODO: Support SIMD32 on gfx4-5 */

4344

assert(bld.group() < 16);

4345

4346

/* For gfx4-5, we always have a header consisting of g0 and g1. We have

4347

* an implied MOV from g0,g1 to the start of the message. The MOV from

4348

* g0 is handled by the hardware and the MOV from g1 is provided by the

4349

* generator. This is required because, on gfx4-5, the generator may

4350

* generate two write messages with different message lengths in order

4351

* to handle AA data properly.

4352

4353

* Also, since the pixel mask goes in the g0 portion of the message and

4354

* since render target writes are the last thing in the shader, we write

4355

* the pixel mask directly into g0 and it will get copied as part of the

4356

* implied write.

4357

4358

if (prog_data->uses_kill) {

4359

bld.exec_all().group(1, 0)

4360

.MOV(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW),

4361

sample_mask_reg(bld));

4362

}

4363

4364

assert(length == 0);

4365

length = 2;

4366

} else if ((devinfo->verx10 <= 70 &&

4367

prog_data->uses_kill) ||

4368

(devinfo->ver < 11 &&

4369

(color1.file != BAD_FILE || key->nr_color_regions > 1))) {

4370

/* From the Sandy Bridge PRM, volume 4, page 198:

4371

4372

* "Dispatched Pixel Enables. One bit per pixel indicating

4373

* which pixels were originally enabled when the thread was

4374

* dispatched. This field is only required for the end-of-

4375

* thread message and on all dual-source messages."

4376

4377

const fs_builder ubld = bld.exec_all().group(8, 0);

4378

4379

fs_reg header = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2);

4380

if (bld.group() < 16) {

4381

/* The header starts off as g0 and g1 for the first half */

4382

ubld.group(16, 0).MOV(header, retype(brw_vec8_grf(0, 0),

4383

BRW_REGISTER_TYPE_UD));

4384

} else {

4385

/* The header starts off as g0 and g2 for the second half */

4386

assert(bld.group() < 32);

4387

const fs_reg header_sources[2] = {

4388

retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD),

4389

retype(brw_vec8_grf(2, 0), BRW_REGISTER_TYPE_UD),

4390

};

4391

ubld.LOAD_PAYLOAD(header, header_sources, 2, 0);

4392

4393

/* Gfx12 will require additional fix-ups if we ever hit this path. */

4394

assert(devinfo->ver < 12);

4395

}

4396

4397

uint32_t g00_bits = 0;

4398

4399

/* Set "Source0 Alpha Present to RenderTarget" bit in message

4400

* header.

4401

4402

if (src0_alpha.file != BAD_FILE)

4403

g00_bits |= 1 << 11;

4404

4405

/* Set computes stencil to render target */

4406

if (prog_data->computed_stencil)

4407

g00_bits |= 1 << 14;

4408

4409

if (g00_bits) {

4410

/* OR extra bits into g0.0 */

4411

ubld.group(1, 0).OR(component(header, 0),

4412

retype(brw_vec1_grf(0, 0),

4413

BRW_REGISTER_TYPE_UD),

4414

brw_imm_ud(g00_bits));

4415

}

4416

4417

/* Set the render target index for choosing BLEND_STATE. */

4418

if (inst->target > 0) {

4419

ubld.group(1, 0).MOV(component(header, 2), brw_imm_ud(inst->target));

4420

}

4421

4422

if (prog_data->uses_kill) {

4423

ubld.group(1, 0).MOV(retype(component(header, 15),

4424

BRW_REGISTER_TYPE_UW),

4425

sample_mask_reg(bld));

4426

}

4427

4428

assert(length == 0);

4429

sources[0] = header;

4430

sources[1] = horiz_offset(header, 8);

4431

length = 2;

4432

}

4433

assert(length == 0 || length == 2);

4434

header_size = length;

4435

4436

if (payload.aa_dest_stencil_reg[0]) {

4437

assert(inst->group < 16);

4438

sources[length] = fs_reg(VGRF, bld.shader->alloc.allocate(1));

4439

bld.group(8, 0).exec_all().annotate("FB write stencil/AA alpha")

4440

.MOV(sources[length],

4441

fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg[0], 0)));

4442

length++;

4443

}

4444

4445

if (src0_alpha.file != BAD_FILE) {

4446

for (unsigned i = 0; i < bld.dispatch_width() / 8; i++) {

4447

const fs_builder &ubld = bld.exec_all().group(8, i)

4448

.annotate("FB write src0 alpha");

4449

const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_F);

4450

ubld.MOV(tmp, horiz_offset(src0_alpha, i * 8));

4451

setup_color_payload(ubld, key, &sources[length], tmp, 1);

4452

length++;

4453

}

4454

}

4455

4456

if (sample_mask.file != BAD_FILE) {

4457

sources[length] = fs_reg(VGRF, bld.shader->alloc.allocate(1),

4458

BRW_REGISTER_TYPE_UD);

4459

4460

/* Hand over gl_SampleMask. Only the lower 16 bits of each channel are

4461

* relevant. Since it's unsigned single words one vgrf is always

4462

* 16-wide, but only the lower or higher 8 channels will be used by the

4463

* hardware when doing a SIMD8 write depending on whether we have

4464

* selected the subspans for the first or second half respectively.

4465

4466

assert(sample_mask.file != BAD_FILE && type_sz(sample_mask.type) == 4);

4467

sample_mask.type = BRW_REGISTER_TYPE_UW;

4468

sample_mask.stride *= 2;

4469

4470

bld.exec_all().annotate("FB write oMask")

4471

.MOV(horiz_offset(retype(sources[length], BRW_REGISTER_TYPE_UW),

4472

inst->group % 16),

4473

sample_mask);

4474

length++;

4475

}

4476

4477

payload_header_size = length;

4478

4479

setup_color_payload(bld, key, &sources[length], color0, components);

4480

length += 4;

4481

4482

if (color1.file != BAD_FILE) {

4483

setup_color_payload(bld, key, &sources[length], color1, components);

4484

length += 4;

4485

}

4486

4487

if (src_depth.file != BAD_FILE) {

4488

sources[length] = src_depth;

4489

length++;

4490

}

4491

4492

if (dst_depth.file != BAD_FILE) {

4493

sources[length] = dst_depth;

4494

length++;

4495

}

4496

4497

if (src_stencil.file != BAD_FILE) {

4498

assert(devinfo->ver >= 9);

4499

assert(bld.dispatch_width() == 8);

4500

4501

/* XXX: src_stencil is only available on gfx9+. dst_depth is never

4502

* available on gfx9+. As such it's impossible to have both enabled at the

4503

* same time and therefore length cannot overrun the array.

4504

4505

assert(length < 15);

4506

4507

sources[length] = bld.vgrf(BRW_REGISTER_TYPE_UD);

4508

bld.exec_all().annotate("FB write OS")

4509

.MOV(retype(sources[length], BRW_REGISTER_TYPE_UB),

4510

subscript(src_stencil, BRW_REGISTER_TYPE_UB, 0));

4511

length++;

4512

}

4513

4514

fs_inst *load;

4515

if (devinfo->ver >= 7) {

4516

/* Send from the GRF */

4517

fs_reg payload = fs_reg(VGRF, -1, BRW_REGISTER_TYPE_F);

4518

load = bld.LOAD_PAYLOAD(payload, sources, length, payload_header_size);

4519

payload.nr = bld.shader->alloc.allocate(regs_written(load));

4520

load->dst = payload;

4521

4522

uint32_t msg_ctl = brw_fb_write_msg_control(inst, prog_data);

4523

4524

inst->desc =

4525

(inst->group / 16) << 11 | /* rt slot group */

4526

brw_fb_write_desc(devinfo, inst->target, msg_ctl, inst->last_rt,

4527

prog_data->per_coarse_pixel_dispatch);

4528

4529

uint32_t ex_desc = 0;

4530

if (devinfo->ver >= 11) {

4531

/* Set the "Render Target Index" and "Src0 Alpha Present" fields

4532

* in the extended message descriptor, in lieu of using a header.

4533

4534

ex_desc = inst->target << 12 | (src0_alpha.file != BAD_FILE) << 15;

4535

4536

if (key->nr_color_regions == 0)

4537

ex_desc |= 1 << 20; /* Null Render Target */

4538

}

4539

inst->ex_desc = ex_desc;

4540

4541

inst->opcode = SHADER_OPCODE_SEND;

4542

inst->resize_sources(3);

4543

inst->sfid = GFX6_SFID_DATAPORT_RENDER_CACHE;

4544

inst->src[0] = brw_imm_ud(0);

4545

inst->src[1] = brw_imm_ud(0);

4546

inst->src[2] = payload;

4547

inst->mlen = regs_written(load);

4548

inst->ex_mlen = 0;

4549

inst->header_size = header_size;

4550

inst->check_tdr = true;

4551

inst->send_has_side_effects = true;

4552

} else {

4553

/* Send from the MRF */

4554

load = bld.LOAD_PAYLOAD(fs_reg(MRF, 1, BRW_REGISTER_TYPE_F),

4555

sources, length, payload_header_size);

4556

4557

/* On pre-SNB, we have to interlace the color values. LOAD_PAYLOAD

4558

* will do this for us if we just give it a COMPR4 destination.

4559

4560

if (devinfo->ver < 6 && bld.dispatch_width() == 16)

4561

load->dst.nr |= BRW_MRF_COMPR4;

4562

4563

if (devinfo->ver < 6) {

4564

/* Set up src[0] for the implied MOV from grf0-1 */

4565

inst->resize_sources(1);

4566

inst->src[0] = brw_vec8_grf(0, 0);

4567

} else {

4568

inst->resize_sources(0);

4569

}

4570

inst->base_mrf = 1;

4571

inst->opcode = FS_OPCODE_FB_WRITE;

4572

inst->mlen = regs_written(load);

4573

inst->header_size = header_size;

4574

}

4575

}

4576

4577

static void

4578

lower_fb_read_logical_send(const fs_builder &bld, fs_inst *inst)

4579

{

4580

const intel_device_info *devinfo = bld.shader->devinfo;

4581

const fs_builder &ubld = bld.exec_all().group(8, 0);

4582

const unsigned length = 2;

4583

const fs_reg header = ubld.vgrf(BRW_REGISTER_TYPE_UD, length);

4584

4585

if (bld.group() < 16) {

4586

ubld.group(16, 0).MOV(header, retype(brw_vec8_grf(0, 0),

4587

BRW_REGISTER_TYPE_UD));

4588

} else {

4589

assert(bld.group() < 32);

4590

const fs_reg header_sources[] = {

4591

retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD),

4592

retype(brw_vec8_grf(2, 0), BRW_REGISTER_TYPE_UD)

4593

};

4594

ubld.LOAD_PAYLOAD(header, header_sources, ARRAY_SIZE(header_sources), 0);

4595

4596

if (devinfo->ver >= 12) {

4597

/* On Gfx12 the Viewport and Render Target Array Index fields (AKA

4598

* Poly 0 Info) are provided in r1.1 instead of r0.0, and the render

4599

* target message header format was updated accordingly -- However

4600

* the updated format only works for the lower 16 channels in a

4601

* SIMD32 thread, since the higher 16 channels want the subspan data

4602

* from r2 instead of r1, so we need to copy over the contents of

4603

* r1.1 in order to fix things up.

4604

4605

ubld.group(1, 0).MOV(component(header, 9),

4606

retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_UD));

4607

}

4608

}

4609

4610

/* BSpec 12470 (Gfx8-11), BSpec 47842 (Gfx12+) :

4611

4612

* "Must be zero for Render Target Read message."

4613

4614

* For bits :

4615

* - 14 : Stencil Present to Render Target

4616

* - 13 : Source Depth Present to Render Target

4617

* - 12 : oMask to Render Target

4618

* - 11 : Source0 Alpha Present to Render Target

4619

4620

ubld.group(1, 0).AND(component(header, 0),

4621

component(header, 0),

4622

brw_imm_ud(~INTEL_MASK(14, 11)));

4623

4624

inst->resize_sources(1);

4625

inst->src[0] = header;

4626

inst->opcode = FS_OPCODE_FB_READ;

4627

inst->mlen = length;

4628

inst->header_size = length;

4629

}

4630

4631

static void

4632

lower_sampler_logical_send_gfx4(const fs_builder &bld, fs_inst *inst, opcode op,

4633

const fs_reg &coordinate,

4634

const fs_reg &shadow_c,

4635

const fs_reg &lod, const fs_reg &lod2,

4636

const fs_reg &surface,

4637

const fs_reg &sampler,

4638

unsigned coord_components,

4639

unsigned grad_components)

4640

{

4641

const bool has_lod = (op == SHADER_OPCODE_TXL || op == FS_OPCODE_TXB ||

4642

op == SHADER_OPCODE_TXF || op == SHADER_OPCODE_TXS);

4643

fs_reg msg_begin(MRF, 1, BRW_REGISTER_TYPE_F);

4644

fs_reg msg_end = msg_begin;

4645

4646

/* g0 header. */

4647

msg_end = offset(msg_end, bld.group(8, 0), 1);

4648

4649

for (unsigned i = 0; i < coord_components; i++)

4650

bld.MOV(retype(offset(msg_end, bld, i), coordinate.type),

4651

offset(coordinate, bld, i));

4652

4653

msg_end = offset(msg_end, bld, coord_components);

4654

4655

/* Messages other than SAMPLE and RESINFO in SIMD16 and TXD in SIMD8

4656

* require all three components to be present and zero if they are unused.

4657

4658

if (coord_components > 0 &&

4659

(has_lod || shadow_c.file != BAD_FILE ||

4660

(op == SHADER_OPCODE_TEX && bld.dispatch_width() == 8))) {

4661

assert(coord_components <= 3);

4662

for (unsigned i = 0; i < 3 - coord_components; i++)

4663

bld.MOV(offset(msg_end, bld, i), brw_imm_f(0.0f));

4664

4665

msg_end = offset(msg_end, bld, 3 - coord_components);

4666

}

4667

4668

if (op == SHADER_OPCODE_TXD) {

4669

/* TXD unsupported in SIMD16 mode. */

4670

assert(bld.dispatch_width() == 8);

4671

4672

/* the slots for u and v are always present, but r is optional */

4673

if (coord_components < 2)

4674

msg_end = offset(msg_end, bld, 2 - coord_components);

4675

4676

/* P = u, v, r

4677

* dPdx = dudx, dvdx, drdx

4678

* dPdy = dudy, dvdy, drdy

4679

4680

* 1-arg: Does not exist.

4681

4682

* 2-arg: dudx dvdx dudy dvdy

4683

* dPdx.x dPdx.y dPdy.x dPdy.y

4684

* m4 m5 m6 m7

4685

4686

* 3-arg: dudx dvdx drdx dudy dvdy drdy

4687

* dPdx.x dPdx.y dPdx.z dPdy.x dPdy.y dPdy.z

4688

* m5 m6 m7 m8 m9 m10

4689

4690

for (unsigned i = 0; i < grad_components; i++)

4691

bld.MOV(offset(msg_end, bld, i), offset(lod, bld, i));

4692

4693

msg_end = offset(msg_end, bld, MAX2(grad_components, 2));

4694

4695

for (unsigned i = 0; i < grad_components; i++)

4696

bld.MOV(offset(msg_end, bld, i), offset(lod2, bld, i));

4697

4698

msg_end = offset(msg_end, bld, MAX2(grad_components, 2));

4699

}

4700

4701

if (has_lod) {

4702

/* Bias/LOD with shadow comparator is unsupported in SIMD16 -- *Without*

4703

* shadow comparator (including RESINFO) it's unsupported in SIMD8 mode.

4704

4705

assert(shadow_c.file != BAD_FILE ? bld.dispatch_width() == 8 :

4706

bld.dispatch_width() == 16);

4707

4708

const brw_reg_type type =

4709

(op == SHADER_OPCODE_TXF || op == SHADER_OPCODE_TXS ?

4710

BRW_REGISTER_TYPE_UD : BRW_REGISTER_TYPE_F);

4711

bld.MOV(retype(msg_end, type), lod);

4712

msg_end = offset(msg_end, bld, 1);

4713

}

4714

4715

if (shadow_c.file != BAD_FILE) {

4716

if (op == SHADER_OPCODE_TEX && bld.dispatch_width() == 8) {

4717

/* There's no plain shadow compare message, so we use shadow

4718

* compare with a bias of 0.0.

4719

4720

bld.MOV(msg_end, brw_imm_f(0.0f));

4721

msg_end = offset(msg_end, bld, 1);

4722

}

4723

4724

bld.MOV(msg_end, shadow_c);

4725

msg_end = offset(msg_end, bld, 1);

4726

}

4727

4728

inst->opcode = op;

4729

inst->src[0] = reg_undef;

4730

inst->src[1] = surface;

4731

inst->src[2] = sampler;

4732

inst->resize_sources(3);

4733

inst->base_mrf = msg_begin.nr;

4734

inst->mlen = msg_end.nr - msg_begin.nr;

4735

inst->header_size = 1;

4736

}

4737

4738

static void

4739

lower_sampler_logical_send_gfx5(const fs_builder &bld, fs_inst *inst, opcode op,

4740

const fs_reg &coordinate,

4741

const fs_reg &shadow_c,

4742

const fs_reg &lod, const fs_reg &lod2,

4743

const fs_reg &sample_index,

4744

const fs_reg &surface,

4745

const fs_reg &sampler,

4746

unsigned coord_components,

4747

unsigned grad_components)

4748

{

4749

fs_reg message(MRF, 2, BRW_REGISTER_TYPE_F);

4750

fs_reg msg_coords = message;

4751

unsigned header_size = 0;

4752

4753

if (inst->offset != 0) {

4754

/* The offsets set up by the visitor are in the m1 header, so we can't

4755

* go headerless.

4756

4757

header_size = 1;

4758

message.nr--;

4759

}

4760

4761

for (unsigned i = 0; i < coord_components; i++)

4762

bld.MOV(retype(offset(msg_coords, bld, i), coordinate.type),

4763

offset(coordinate, bld, i));

4764

4765

fs_reg msg_end = offset(msg_coords, bld, coord_components);

4766

fs_reg msg_lod = offset(msg_coords, bld, 4);

4767

4768

if (shadow_c.file != BAD_FILE) {

4769

fs_reg msg_shadow = msg_lod;

4770

bld.MOV(msg_shadow, shadow_c);

4771

msg_lod = offset(msg_shadow, bld, 1);

4772

msg_end = msg_lod;

4773

}

4774

4775

switch (op) {

4776

case SHADER_OPCODE_TXL:

4777

case FS_OPCODE_TXB:

4778

bld.MOV(msg_lod, lod);

4779

msg_end = offset(msg_lod, bld, 1);

4780

break;

4781

case SHADER_OPCODE_TXD:

4782

/**

4783

* P = u, v, r

4784

* dPdx = dudx, dvdx, drdx

4785

* dPdy = dudy, dvdy, drdy

4786

4787

* Load up these values:

4788

* - dudx dudy dvdx dvdy drdx drdy

4789

* - dPdx.x dPdy.x dPdx.y dPdy.y dPdx.z dPdy.z

4790

4791

msg_end = msg_lod;

4792

for (unsigned i = 0; i < grad_components; i++) {

4793

bld.MOV(msg_end, offset(lod, bld, i));

4794

msg_end = offset(msg_end, bld, 1);

4795

4796

bld.MOV(msg_end, offset(lod2, bld, i));

4797

msg_end = offset(msg_end, bld, 1);

4798

}

4799

break;

4800

case SHADER_OPCODE_TXS:

4801

msg_lod = retype(msg_end, BRW_REGISTER_TYPE_UD);

4802

bld.MOV(msg_lod, lod);

4803

msg_end = offset(msg_lod, bld, 1);

4804

break;

4805

case SHADER_OPCODE_TXF:

4806

msg_lod = offset(msg_coords, bld, 3);

4807

bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), lod);

4808

msg_end = offset(msg_lod, bld, 1);

4809

break;

4810

case SHADER_OPCODE_TXF_CMS:

4811

msg_lod = offset(msg_coords, bld, 3);

4812

/* lod */

4813

bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), brw_imm_ud(0u));

4814

/* sample index */

4815

bld.MOV(retype(offset(msg_lod, bld, 1), BRW_REGISTER_TYPE_UD), sample_index);

4816

msg_end = offset(msg_lod, bld, 2);

4817

break;

4818

default:

4819

break;

4820

}

4821

4822

inst->opcode = op;

4823

inst->src[0] = reg_undef;

4824

inst->src[1] = surface;

4825

inst->src[2] = sampler;

4826

inst->resize_sources(3);

4827

inst->base_mrf = message.nr;

4828

inst->mlen = msg_end.nr - message.nr;

4829

inst->header_size = header_size;

4830

4831

/* Message length > MAX_SAMPLER_MESSAGE_SIZE disallowed by hardware. */

4832

assert(inst->mlen <= MAX_SAMPLER_MESSAGE_SIZE);

4833

}

4834

4835

static bool

4836

is_high_sampler(const struct intel_device_info *devinfo, const fs_reg &sampler)

4837

{

4838

if (devinfo->verx10 <= 70)

4839

return false;

4840

4841

return sampler.file != IMM || sampler.ud >= 16;

4842

}

4843

4844

static unsigned

4845

sampler_msg_type(const intel_device_info *devinfo,

4846

opcode opcode, bool shadow_compare)

4847

{

4848

assert(devinfo->ver >= 5);

4849

switch (opcode) {

4850

case SHADER_OPCODE_TEX:

4851

return shadow_compare ? GFX5_SAMPLER_MESSAGE_SAMPLE_COMPARE :

4852

GFX5_SAMPLER_MESSAGE_SAMPLE;

4853

case FS_OPCODE_TXB:

4854

return shadow_compare ? GFX5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE :

4855

GFX5_SAMPLER_MESSAGE_SAMPLE_BIAS;

4856

case SHADER_OPCODE_TXL:

4857

return shadow_compare ? GFX5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE :

4858

GFX5_SAMPLER_MESSAGE_SAMPLE_LOD;

4859

case SHADER_OPCODE_TXL_LZ:

4860

return shadow_compare ? GFX9_SAMPLER_MESSAGE_SAMPLE_C_LZ :

4861

GFX9_SAMPLER_MESSAGE_SAMPLE_LZ;

4862

case SHADER_OPCODE_TXS:

4863

case SHADER_OPCODE_IMAGE_SIZE_LOGICAL:

4864

return GFX5_SAMPLER_MESSAGE_SAMPLE_RESINFO;

4865

case SHADER_OPCODE_TXD:

4866

assert(!shadow_compare || devinfo->verx10 >= 75);

4867

return shadow_compare ? HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE :

4868

GFX5_SAMPLER_MESSAGE_SAMPLE_DERIVS;

4869

case SHADER_OPCODE_TXF:

4870

return GFX5_SAMPLER_MESSAGE_SAMPLE_LD;

4871

case SHADER_OPCODE_TXF_LZ:

4872

assert(devinfo->ver >= 9);

4873

return GFX9_SAMPLER_MESSAGE_SAMPLE_LD_LZ;

4874

case SHADER_OPCODE_TXF_CMS_W:

4875

assert(devinfo->ver >= 9);

4876

return GFX9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W;

4877

case SHADER_OPCODE_TXF_CMS:

4878

return devinfo->ver >= 7 ? GFX7_SAMPLER_MESSAGE_SAMPLE_LD2DMS :

4879

GFX5_SAMPLER_MESSAGE_SAMPLE_LD;

4880

case SHADER_OPCODE_TXF_UMS:

4881

assert(devinfo->ver >= 7);

4882

return GFX7_SAMPLER_MESSAGE_SAMPLE_LD2DSS;

4883

case SHADER_OPCODE_TXF_MCS:

4884

assert(devinfo->ver >= 7);

4885

return GFX7_SAMPLER_MESSAGE_SAMPLE_LD_MCS;

4886

case SHADER_OPCODE_LOD:

4887

return GFX5_SAMPLER_MESSAGE_LOD;

4888

case SHADER_OPCODE_TG4:

4889

assert(devinfo->ver >= 7);

4890

return shadow_compare ? GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C :

4891

GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4;

4892

break;

4893

case SHADER_OPCODE_TG4_OFFSET:

4894

assert(devinfo->ver >= 7);

4895

return shadow_compare ? GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C :

4896

GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO;

4897

case SHADER_OPCODE_SAMPLEINFO:

4898

return GFX6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO;

4899

default:

4900

unreachable("not reached");

4901

}

4902

}

4903

4904

/**

4905

* Emit a LOAD_PAYLOAD instruction while ensuring the sources are aligned to

4906

* the given requested_alignment_sz.

4907

4908

static fs_inst *

4909

emit_load_payload_with_padding(const fs_builder &bld, const fs_reg &dst,

4910

const fs_reg *src, unsigned sources,

4911

unsigned header_size,

4912

unsigned requested_alignment_sz)

4913

{

4914

unsigned length = 0;

4915

unsigned num_srcs =

4916

sources * DIV_ROUND_UP(requested_alignment_sz, bld.dispatch_width());

4917

fs_reg *src_comps = new fs_reg[num_srcs];

4918

4919

for (unsigned i = 0; i < header_size; i++)

4920

src_comps[length++] = src[i];

4921

4922

for (unsigned i = header_size; i < sources; i++) {

4923

unsigned src_sz =

4924

retype(dst, src[i].type).component_size(bld.dispatch_width());

4925

const enum brw_reg_type padding_payload_type =

4926

brw_reg_type_from_bit_size(type_sz(src[i].type) * 8,

4927

BRW_REGISTER_TYPE_UD);

4928

4929

src_comps[length++] = src[i];

4930

4931

/* Expand the real sources if component of requested payload type is

4932

* larger than real source component.

4933

4934

if (src_sz < requested_alignment_sz) {

4935

for (unsigned j = 0; j < (requested_alignment_sz / src_sz) - 1; j++) {

4936

src_comps[length++] = retype(fs_reg(), padding_payload_type);

4937

}

4938

}

4939

}

4940

4941

fs_inst *inst = bld.LOAD_PAYLOAD(dst, src_comps, length, header_size);

4942

delete[] src_comps;

4943

4944

return inst;

4945

}

4946

4947

static void

4948

lower_sampler_logical_send_gfx7(const fs_builder &bld, fs_inst *inst, opcode op,

4949

const fs_reg &coordinate,

4950

const fs_reg &shadow_c,

4951

fs_reg lod, const fs_reg &lod2,

4952

const fs_reg &min_lod,

4953

const fs_reg &sample_index,

4954

const fs_reg &mcs,

4955

const fs_reg &surface,

4956

const fs_reg &sampler,

4957

const fs_reg &surface_handle,

4958

const fs_reg &sampler_handle,

4959

const fs_reg &tg4_offset,

4960

unsigned payload_type_bit_size,

4961

unsigned coord_components,

4962

unsigned grad_components)

4963

{

4964

const intel_device_info *devinfo = bld.shader->devinfo;

4965

const enum brw_reg_type payload_type =

4966

brw_reg_type_from_bit_size(payload_type_bit_size, BRW_REGISTER_TYPE_F);

4967

const enum brw_reg_type payload_unsigned_type =

4968

brw_reg_type_from_bit_size(payload_type_bit_size, BRW_REGISTER_TYPE_UD);

4969

const enum brw_reg_type payload_signed_type =

4970

brw_reg_type_from_bit_size(payload_type_bit_size, BRW_REGISTER_TYPE_D);

4971

unsigned reg_width = bld.dispatch_width() / 8;

4972

unsigned header_size = 0, length = 0;

4973

fs_reg sources[MAX_SAMPLER_MESSAGE_SIZE];

4974

for (unsigned i = 0; i < ARRAY_SIZE(sources); i++)

4975

sources[i] = bld.vgrf(payload_type);

4976

4977

/* We must have exactly one of surface/sampler and surface/sampler_handle */

4978

assert((surface.file == BAD_FILE) != (surface_handle.file == BAD_FILE));

4979

assert((sampler.file == BAD_FILE) != (sampler_handle.file == BAD_FILE));

4980

4981

if (op == SHADER_OPCODE_TG4 || op == SHADER_OPCODE_TG4_OFFSET ||

4982

inst->offset != 0 || inst->eot ||

4983

op == SHADER_OPCODE_SAMPLEINFO ||

4984

sampler_handle.file != BAD_FILE ||

4985

is_high_sampler(devinfo, sampler)) {

4986

/* For general texture offsets (no txf workaround), we need a header to

4987

* put them in.

4988

4989

* TG4 needs to place its channel select in the header, for interaction

4990

* with ARB_texture_swizzle. The sampler index is only 4-bits, so for

4991

* larger sampler numbers we need to offset the Sampler State Pointer in

4992

* the header.

4993

4994

fs_reg header = retype(sources[0], BRW_REGISTER_TYPE_UD);

4995

header_size = 1;

4996

length++;

4997

4998

/* If we're requesting fewer than four channels worth of response,

4999

* and we have an explicit header, we need to set up the sampler

5000

* writemask. It's reversed from normal: 1 means "don't write".

5001

5002

if (!inst->eot && regs_written(inst) != 4 * reg_width) {

5003

assert(regs_written(inst) % reg_width == 0);

5004

unsigned mask = ~((1 << (regs_written(inst) / reg_width)) - 1) & 0xf;

5005

inst->offset |= mask << 12;

5006

}

5007

5008

/* Build the actual header */

5009

const fs_builder ubld = bld.exec_all().group(8, 0);

5010

const fs_builder ubld1 = ubld.group(1, 0);

5011

ubld.MOV(header, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));

5012

if (inst->offset) {

5013

ubld1.MOV(component(header, 2), brw_imm_ud(inst->offset));

5014

} else if (bld.shader->stage != MESA_SHADER_VERTEX &&

5015

bld.shader->stage != MESA_SHADER_FRAGMENT) {

5016

/* The vertex and fragment stages have g0.2 set to 0, so

5017

* header0.2 is 0 when g0 is copied. Other stages may not, so we

5018

* must set it to 0 to avoid setting undesirable bits in the

5019

* message.

5020

5021

ubld1.MOV(component(header, 2), brw_imm_ud(0));

5022

}

5023

5024

if (sampler_handle.file != BAD_FILE) {

5025

/* Bindless sampler handles aren't relative to the sampler state

5026

* pointer passed into the shader through SAMPLER_STATE_POINTERS_*.

5027

* Instead, it's an absolute pointer relative to dynamic state base

5028

* address.

5029

5030

* Sampler states are 16 bytes each and the pointer we give here has

5031

* to be 32-byte aligned. In order to avoid more indirect messages

5032

* than required, we assume that all bindless sampler states are

5033

* 32-byte aligned. This sacrifices a bit of general state base

5034

* address space but means we can do something more efficient in the

5035

* shader.

5036

5037

ubld1.MOV(component(header, 3), sampler_handle);

5038

} else if (is_high_sampler(devinfo, sampler)) {

5039

fs_reg sampler_state_ptr =

5040

retype(brw_vec1_grf(0, 3), BRW_REGISTER_TYPE_UD);

5041

5042

/* Gfx11+ sampler message headers include bits in 4:0 which conflict

5043

* with the ones included in g0.3 bits 4:0. Mask them out.

5044

5045

if (devinfo->ver >= 11) {

5046

sampler_state_ptr = ubld1.vgrf(BRW_REGISTER_TYPE_UD);

5047

ubld1.AND(sampler_state_ptr,

5048

retype(brw_vec1_grf(0, 3), BRW_REGISTER_TYPE_UD),

5049

brw_imm_ud(INTEL_MASK(31, 5)));

5050

}

5051

5052

if (sampler.file == BRW_IMMEDIATE_VALUE) {

5053

assert(sampler.ud >= 16);

5054

const int sampler_state_size = 16; /* 16 bytes */

5055

5056

ubld1.ADD(component(header, 3), sampler_state_ptr,

5057

brw_imm_ud(16 * (sampler.ud / 16) * sampler_state_size));

5058

} else {

5059

fs_reg tmp = ubld1.vgrf(BRW_REGISTER_TYPE_UD);

5060

ubld1.AND(tmp, sampler, brw_imm_ud(0x0f0));

5061

ubld1.SHL(tmp, tmp, brw_imm_ud(4));

5062

ubld1.ADD(component(header, 3), sampler_state_ptr, tmp);

5063

}

5064

} else if (devinfo->ver >= 11) {

5065

/* Gfx11+ sampler message headers include bits in 4:0 which conflict

5066

* with the ones included in g0.3 bits 4:0. Mask them out.

5067

5068

ubld1.AND(component(header, 3),

5069

retype(brw_vec1_grf(0, 3), BRW_REGISTER_TYPE_UD),

5070

brw_imm_ud(INTEL_MASK(31, 5)));

5071

}

5072

}

5073

5074

if (shadow_c.file != BAD_FILE) {

5075

bld.MOV(sources[length], shadow_c);

5076

length++;

5077

}

5078

5079

bool coordinate_done = false;

5080

5081

/* Set up the LOD info */

5082

switch (op) {

5083

case FS_OPCODE_TXB:

5084

case SHADER_OPCODE_TXL:

5085

if (devinfo->ver >= 9 && op == SHADER_OPCODE_TXL && lod.is_zero()) {

5086

op = SHADER_OPCODE_TXL_LZ;

5087

break;

5088

}

5089

bld.MOV(sources[length], lod);

5090

length++;

5091

break;

5092

case SHADER_OPCODE_TXD:

5093

/* TXD should have been lowered in SIMD16 mode. */

5094

assert(bld.dispatch_width() == 8);

5095

5096

/* Load dPdx and the coordinate together:

5097

* [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z

5098

5099

for (unsigned i = 0; i < coord_components; i++) {

5100

bld.MOV(sources[length++], offset(coordinate, bld, i));

5101

5102

/* For cube map array, the coordinate is (u,v,r,ai) but there are

5103

* only derivatives for (u, v, r).

5104

5105

if (i < grad_components) {

5106

bld.MOV(sources[length++], offset(lod, bld, i));

5107

bld.MOV(sources[length++], offset(lod2, bld, i));

5108

}

5109

}

5110

5111

coordinate_done = true;

5112

break;

5113

case SHADER_OPCODE_TXS:

5114

bld.MOV(retype(sources[length], payload_unsigned_type), lod);

5115

length++;

5116

break;

5117

case SHADER_OPCODE_IMAGE_SIZE_LOGICAL:

5118

/* We need an LOD; just use 0 */

5119

bld.MOV(retype(sources[length], payload_unsigned_type), brw_imm_ud(0));

5120

length++;

5121

break;

5122

case SHADER_OPCODE_TXF:

5123

/* Unfortunately, the parameters for LD are intermixed: u, lod, v, r.

5124

* On Gfx9 they are u, v, lod, r

5125

5126

bld.MOV(retype(sources[length++], payload_signed_type), coordinate);

5127

5128

if (devinfo->ver >= 9) {

5129

if (coord_components >= 2) {

5130

bld.MOV(retype(sources[length], payload_signed_type),

5131

offset(coordinate, bld, 1));

5132

} else {

5133

sources[length] = brw_imm_d(0);

5134

}

5135

length++;

5136

}

5137

5138

if (devinfo->ver >= 9 && lod.is_zero()) {

5139

op = SHADER_OPCODE_TXF_LZ;

5140

} else {

5141

bld.MOV(retype(sources[length], payload_signed_type), lod);

5142

length++;

5143

}

5144

5145

for (unsigned i = devinfo->ver >= 9 ? 2 : 1; i < coord_components; i++)

5146

bld.MOV(retype(sources[length++], payload_signed_type),

5147

offset(coordinate, bld, i));

5148

5149

coordinate_done = true;

5150

break;

5151

5152

case SHADER_OPCODE_TXF_CMS:

5153

case SHADER_OPCODE_TXF_CMS_W:

5154

case SHADER_OPCODE_TXF_UMS:

5155

case SHADER_OPCODE_TXF_MCS:

5156

if (op == SHADER_OPCODE_TXF_UMS ||

5157

op == SHADER_OPCODE_TXF_CMS ||

5158

op == SHADER_OPCODE_TXF_CMS_W) {

5159

bld.MOV(retype(sources[length++], payload_unsigned_type), sample_index);

5160

}

5161

5162

/* Data from the multisample control surface. */

5163

if (op == SHADER_OPCODE_TXF_CMS || op == SHADER_OPCODE_TXF_CMS_W) {

5164

unsigned num_mcs_components = 1;

5165

5166

/* From the Gfx12HP BSpec: Render Engine - 3D and GPGPU Programs -

5167

* Shared Functions - 3D Sampler - Messages - Message Format:

5168

5169

* ld2dms_w si mcs0 mcs1 mcs2 mcs3 u v r

5170

5171

if (devinfo->verx10 >= 125 && op == SHADER_OPCODE_TXF_CMS_W)

5172

num_mcs_components = 4;

5173

else if (op == SHADER_OPCODE_TXF_CMS_W)

5174

num_mcs_components = 2;

5175

5176

for (unsigned i = 0; i < num_mcs_components; ++i) {

5177

bld.MOV(retype(sources[length++], payload_unsigned_type),

5178

mcs.file == IMM ? mcs : offset(mcs, bld, i));

5179

}

5180

}

5181

5182

/* There is no offsetting for this message; just copy in the integer

5183

* texture coordinates.

5184

5185

for (unsigned i = 0; i < coord_components; i++)

5186

bld.MOV(retype(sources[length++], payload_signed_type),

5187

offset(coordinate, bld, i));

5188

5189

coordinate_done = true;

5190

break;

5191

case SHADER_OPCODE_TG4_OFFSET:

5192

/* More crazy intermixing */

5193

for (unsigned i = 0; i < 2; i++) /* u, v */

5194

bld.MOV(sources[length++], offset(coordinate, bld, i));

5195

5196

for (unsigned i = 0; i < 2; i++) /* offu, offv */

5197

bld.MOV(retype(sources[length++], payload_signed_type),

5198

offset(tg4_offset, bld, i));

5199

5200

if (coord_components == 3) /* r if present */

5201

bld.MOV(sources[length++], offset(coordinate, bld, 2));

5202

5203

coordinate_done = true;

5204

break;

5205

default:

5206

break;

5207

}

5208

5209

/* Set up the coordinate (except for cases where it was done above) */

5210

if (!coordinate_done) {

5211

for (unsigned i = 0; i < coord_components; i++)

5212

bld.MOV(retype(sources[length++], payload_type),

5213

offset(coordinate, bld, i));

5214

}

5215

5216

if (min_lod.file != BAD_FILE) {

5217

/* Account for all of the missing coordinate sources */

5218

if (op == SHADER_OPCODE_TXD && devinfo->verx10 >= 125) {

5219

/* On DG2 and newer platforms, sample_d can only be used with 1D and

5220

* 2D surfaces, so the maximum number of gradient components is 2.

5221

* In spite of this limitation, the Bspec lists a mysterious R

5222

* component before the min_lod, so the maximum coordinate components

5223

* is 3.

5224

5225

* Wa_1209978020

5226

5227

length += 3 - coord_components;

5228

length += (2 - grad_components) * 2;

5229

} else {

5230

length += 4 - coord_components;

5231

if (op == SHADER_OPCODE_TXD)

5232

length += (3 - grad_components) * 2;

5233

}

5234

5235

bld.MOV(sources[length++], min_lod);

5236

}

5237

5238

const fs_reg src_payload =

5239

fs_reg(VGRF, bld.shader->alloc.allocate(length * reg_width),

5240

BRW_REGISTER_TYPE_F);

5241

/* In case of 16-bit payload each component takes one full register in

5242

* both SIMD8H and SIMD16H modes. In both cases one reg can hold 16

5243

* elements. In SIMD8H case hardware simply expects the components to be

5244

* padded (i.e., aligned on reg boundary).

5245

5246

fs_inst *load_payload_inst =

5247

emit_load_payload_with_padding(bld, src_payload, sources, length,

5248

header_size, REG_SIZE);

5249

unsigned mlen = load_payload_inst->size_written / REG_SIZE;

5250

unsigned simd_mode = 0;

5251

if (payload_type_bit_size == 16) {

5252

assert(devinfo->ver >= 11);

5253

simd_mode = inst->exec_size <= 8 ? GFX10_SAMPLER_SIMD_MODE_SIMD8H :

5254

GFX10_SAMPLER_SIMD_MODE_SIMD16H;

5255

} else {

5256

simd_mode = inst->exec_size <= 8 ? BRW_SAMPLER_SIMD_MODE_SIMD8 :

5257

BRW_SAMPLER_SIMD_MODE_SIMD16;

5258

}

5259

5260

/* Generate the SEND. */

5261

inst->opcode = SHADER_OPCODE_SEND;

5262

inst->mlen = mlen;

5263

inst->header_size = header_size;

5264

5265

const unsigned msg_type =

5266

sampler_msg_type(devinfo, op, inst->shadow_compare);

5267

5268

inst->sfid = BRW_SFID_SAMPLER;

5269

if (surface.file == IMM &&

5270

(sampler.file == IMM || sampler_handle.file != BAD_FILE)) {

5271

inst->desc = brw_sampler_desc(devinfo, surface.ud,

5272

sampler.file == IMM ? sampler.ud % 16 : 0,

5273

msg_type,

5274

simd_mode,

5275

0 /* return_format unused on gfx7+ */);

5276

inst->src[0] = brw_imm_ud(0);

5277

inst->src[1] = brw_imm_ud(0);

5278

} else if (surface_handle.file != BAD_FILE) {

5279

/* Bindless surface */

5280

assert(devinfo->ver >= 9);

5281

inst->desc = brw_sampler_desc(devinfo,

5282

GFX9_BTI_BINDLESS,

5283

sampler.file == IMM ? sampler.ud % 16 : 0,

5284

msg_type,

5285

simd_mode,

5286

0 /* return_format unused on gfx7+ */);

5287

5288

/* For bindless samplers, the entire address is included in the message

5289

* header so we can leave the portion in the message descriptor 0.

5290

5291

if (sampler_handle.file != BAD_FILE || sampler.file == IMM) {

5292

inst->src[0] = brw_imm_ud(0);

5293

} else {

5294

const fs_builder ubld = bld.group(1, 0).exec_all();

5295

fs_reg desc = ubld.vgrf(BRW_REGISTER_TYPE_UD);

5296

ubld.SHL(desc, sampler, brw_imm_ud(8));

5297

inst->src[0] = desc;

5298

}

5299

5300

/* We assume that the driver provided the handle in the top 20 bits so

5301

* we can use the surface handle directly as the extended descriptor.

5302

5303

inst->src[1] = retype(surface_handle, BRW_REGISTER_TYPE_UD);

5304

} else {

5305

/* Immediate portion of the descriptor */

5306

inst->desc = brw_sampler_desc(devinfo,

5307

0, /* surface */

5308

0, /* sampler */

5309

msg_type,

5310

simd_mode,

5311

0 /* return_format unused on gfx7+ */);

5312

const fs_builder ubld = bld.group(1, 0).exec_all();

5313

fs_reg desc = ubld.vgrf(BRW_REGISTER_TYPE_UD);

5314

if (surface.equals(sampler)) {

5315

/* This case is common in GL */

5316

ubld.MUL(desc, surface, brw_imm_ud(0x101));

5317

} else {

5318

if (sampler_handle.file != BAD_FILE) {

5319

ubld.MOV(desc, surface);

5320

} else if (sampler.file == IMM) {

5321

ubld.OR(desc, surface, brw_imm_ud(sampler.ud << 8));

5322

} else {

5323

ubld.SHL(desc, sampler, brw_imm_ud(8));

5324

ubld.OR(desc, desc, surface);

5325

}

5326

}

5327

ubld.AND(desc, desc, brw_imm_ud(0xfff));

5328

5329

inst->src[0] = component(desc, 0);

5330

inst->src[1] = brw_imm_ud(0); /* ex_desc */

5331

}

5332

5333

inst->ex_desc = 0;

5334

5335

inst->src[2] = src_payload;

5336

inst->resize_sources(3);

5337

5338

if (inst->eot) {

5339

/* EOT sampler messages don't make sense to split because it would

5340

* involve ending half of the thread early.

5341

5342

assert(inst->group == 0);

5343

/* We need to use SENDC for EOT sampler messages */

5344

inst->check_tdr = true;

5345

inst->send_has_side_effects = true;

5346

}

5347

5348

/* Message length > MAX_SAMPLER_MESSAGE_SIZE disallowed by hardware. */

5349

assert(inst->mlen <= MAX_SAMPLER_MESSAGE_SIZE);

5350

}

5351

5352

static unsigned

5353

get_sampler_msg_payload_type_bit_size(const intel_device_info *devinfo,

5354

opcode op, const fs_reg *src)

5355

{

5356

unsigned src_type_size = 0;

5357

5358

/* All sources need to have the same size, therefore seek the first valid

5359

* and take the size from there.

5360

5361

for (unsigned i = 0; i < TEX_LOGICAL_NUM_SRCS; i++) {

5362

if (src[i].file != BAD_FILE) {

5363

src_type_size = brw_reg_type_to_size(src[i].type);

5364

break;

5365

}

5366

}

5367

5368

assert(src_type_size == 2 || src_type_size == 4);

5369

5370

#ifndef NDEBUG

5371

/* Make sure all sources agree. On gfx12 this doesn't hold when sampling

5372

* compressed multisampled surfaces. There the payload contains MCS data

5373

* which is already in 16-bits unlike the other parameters that need forced

5374

* conversion.

5375

5376

if (devinfo->verx10 < 125 ||

5377

(op != SHADER_OPCODE_TXF_CMS_W &&

5378

op != SHADER_OPCODE_TXF_CMS)) {

5379

for (unsigned i = 0; i < TEX_LOGICAL_NUM_SRCS; i++) {

5380

assert(src[i].file == BAD_FILE ||

5381

brw_reg_type_to_size(src[i].type) == src_type_size);

5382

}

5383

}

5384

#endif

5385

5386

if (devinfo->verx10 < 125)

5387

return src_type_size * 8;

5388

5389

/* Force conversion from 32-bit sources to 16-bit payload. From the XeHP Bspec:

5390

* 3D and GPGPU Programs - Shared Functions - 3D Sampler - Messages - Message

5391

* Format [GFX12:HAS:1209977870] *

5392

5393

* ld2dms_w SIMD8H and SIMD16H Only

5394

* ld_mcs SIMD8H and SIMD16H Only

5395

* ld2dms REMOVEDBY(GEN:HAS:1406788836)

5396

5397

5398

if (op == SHADER_OPCODE_TXF_CMS_W ||

5399

op == SHADER_OPCODE_TXF_CMS ||

5400

op == SHADER_OPCODE_TXF_UMS ||

5401

op == SHADER_OPCODE_TXF_MCS)

5402

src_type_size = 2;

5403

5404

return src_type_size * 8;

5405

}

5406

5407

static void

5408

lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op)

5409

{

5410

const intel_device_info *devinfo = bld.shader->devinfo;

5411

const fs_reg &coordinate = inst->src[TEX_LOGICAL_SRC_COORDINATE];

5412

const fs_reg &shadow_c = inst->src[TEX_LOGICAL_SRC_SHADOW_C];

5413

const fs_reg &lod = inst->src[TEX_LOGICAL_SRC_LOD];

5414

const fs_reg &lod2 = inst->src[TEX_LOGICAL_SRC_LOD2];

5415

const fs_reg &min_lod = inst->src[TEX_LOGICAL_SRC_MIN_LOD];

5416

const fs_reg &sample_index = inst->src[TEX_LOGICAL_SRC_SAMPLE_INDEX];

5417

const fs_reg &mcs = inst->src[TEX_LOGICAL_SRC_MCS];

5418

const fs_reg &surface = inst->src[TEX_LOGICAL_SRC_SURFACE];

5419

const fs_reg &sampler = inst->src[TEX_LOGICAL_SRC_SAMPLER];

5420

const fs_reg &surface_handle = inst->src[TEX_LOGICAL_SRC_SURFACE_HANDLE];

5421

const fs_reg &sampler_handle = inst->src[TEX_LOGICAL_SRC_SAMPLER_HANDLE];

5422

const fs_reg &tg4_offset = inst->src[TEX_LOGICAL_SRC_TG4_OFFSET];

5423

assert(inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].file == IMM);

5424

const unsigned coord_components = inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].ud;

5425

assert(inst->src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].file == IMM);

5426

const unsigned grad_components = inst->src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].ud;

5427

5428

if (devinfo->ver >= 7) {

5429

const unsigned msg_payload_type_bit_size =

5430

get_sampler_msg_payload_type_bit_size(devinfo, op, inst->src);

5431

5432

/* 16-bit payloads are available only on gfx11+ */

5433

assert(msg_payload_type_bit_size != 16 || devinfo->ver >= 11);

5434

5435

lower_sampler_logical_send_gfx7(bld, inst, op, coordinate,

5436

shadow_c, lod, lod2, min_lod,

5437

sample_index,

5438

mcs, surface, sampler,

5439

surface_handle, sampler_handle,

5440

tg4_offset,

5441

msg_payload_type_bit_size,

5442

coord_components, grad_components);

5443

} else if (devinfo->ver >= 5) {

5444

lower_sampler_logical_send_gfx5(bld, inst, op, coordinate,

5445

shadow_c, lod, lod2, sample_index,

5446

surface, sampler,

5447

coord_components, grad_components);

5448

} else {

5449

lower_sampler_logical_send_gfx4(bld, inst, op, coordinate,

5450

shadow_c, lod, lod2,

5451

surface, sampler,

5452

coord_components, grad_components);

5453

}

5454

}

5455

5456

/**

5457

* Predicate the specified instruction on the sample mask.

5458

5459

static void

5460

emit_predicate_on_sample_mask(const fs_builder &bld, fs_inst *inst)

5461

{

5462

assert(bld.shader->stage == MESA_SHADER_FRAGMENT &&

5463

bld.group() == inst->group &&

5464

bld.dispatch_width() == inst->exec_size);

5465

5466

const fs_visitor *v = static_cast<const fs_visitor *>(bld.shader);

5467

const fs_reg sample_mask = sample_mask_reg(bld);

5468

const unsigned subreg = sample_mask_flag_subreg(v);

5469

5470

if (brw_wm_prog_data(v->stage_prog_data)->uses_kill) {

5471

assert(sample_mask.file == ARF &&

5472

sample_mask.nr == brw_flag_subreg(subreg).nr &&

5473

sample_mask.subnr == brw_flag_subreg(

5474

subreg + inst->group / 16).subnr);

5475

} else {

5476

bld.group(1, 0).exec_all()

5477

.MOV(brw_flag_subreg(subreg + inst->group / 16), sample_mask);

5478

}

5479

5480

if (inst->predicate) {

5481

assert(inst->predicate == BRW_PREDICATE_NORMAL);

5482

assert(!inst->predicate_inverse);

5483

assert(inst->flag_subreg == 0);

5484

/* Combine the sample mask with the existing predicate by using a

5485

* vertical predication mode.

5486

5487

inst->predicate = BRW_PREDICATE_ALIGN1_ALLV;

5488

} else {

5489

inst->flag_subreg = subreg;

5490

inst->predicate = BRW_PREDICATE_NORMAL;

5491

inst->predicate_inverse = false;

5492

}

5493

}

5494

5495

void

5496

fs_visitor::emit_is_helper_invocation(fs_reg result)

5497

{

5498

/* Unlike the regular gl_HelperInvocation, that is defined at dispatch,

5499

* the helperInvocationEXT() (aka SpvOpIsHelperInvocationEXT) takes into

5500

* consideration demoted invocations.

5501

5502

result.type = BRW_REGISTER_TYPE_UD;

5503

5504

bld.MOV(result, brw_imm_ud(0));

5505

5506

/* See sample_mask_reg() for why we split SIMD32 into SIMD16 here. */

5507

unsigned width = bld.dispatch_width();

5508

for (unsigned i = 0; i < DIV_ROUND_UP(width, 16); i++) {

5509

const fs_builder b = bld.group(MIN2(width, 16), i);

5510

5511

fs_inst *mov = b.MOV(offset(result, b, i), brw_imm_ud(~0));

5512

5513

/* The at() ensures that any code emitted to get the predicate happens

5514

* before the mov right above. This is not an issue elsewhere because

5515

* lowering code already set up the builder this way.

5516

5517

emit_predicate_on_sample_mask(b.at(NULL, mov), mov);

5518

mov->predicate_inverse = true;

5519

}

5520

}

5521

5522

/**

5523

* Predicate the specified instruction on the vector mask.

5524

5525

static void

5526

emit_predicate_on_vector_mask(const fs_builder &bld, fs_inst *inst)

5527

{

5528

assert(bld.shader->stage == MESA_SHADER_FRAGMENT &&

5529

bld.group() == inst->group &&

5530

bld.dispatch_width() == inst->exec_size);

5531

5532

const fs_builder ubld = bld.exec_all().group(1, 0);

5533

5534

const fs_visitor *v = static_cast<const fs_visitor *>(bld.shader);

5535

const fs_reg vector_mask = ubld.vgrf(BRW_REGISTER_TYPE_UW);

5536

ubld.emit(SHADER_OPCODE_READ_SR_REG, vector_mask, brw_imm_ud(3));

5537

const unsigned subreg = sample_mask_flag_subreg(v);

5538

5539

ubld.MOV(brw_flag_subreg(subreg + inst->group / 16), vector_mask);

5540

5541

if (inst->predicate) {

5542

assert(inst->predicate == BRW_PREDICATE_NORMAL);

5543

assert(!inst->predicate_inverse);

5544

assert(inst->flag_subreg == 0);

5545

/* Combine the vector mask with the existing predicate by using a

5546

* vertical predication mode.

5547

5548

inst->predicate = BRW_PREDICATE_ALIGN1_ALLV;

5549

} else {

5550

inst->flag_subreg = subreg;

5551

inst->predicate = BRW_PREDICATE_NORMAL;

5552

inst->predicate_inverse = false;

5553

}

5554

}

5555

5556

static void

5557

setup_surface_descriptors(const fs_builder &bld, fs_inst *inst, uint32_t desc,

5558

const fs_reg &surface, const fs_reg &surface_handle)

5559

{

5560

const ASSERTED intel_device_info *devinfo = bld.shader->devinfo;

5561

5562

/* We must have exactly one of surface and surface_handle */

5563

assert((surface.file == BAD_FILE) != (surface_handle.file == BAD_FILE));

5564

5565

if (surface.file == IMM) {

5566

inst->desc = desc | (surface.ud & 0xff);

5567

inst->src[0] = brw_imm_ud(0);

5568

inst->src[1] = brw_imm_ud(0); /* ex_desc */

5569

} else if (surface_handle.file != BAD_FILE) {

5570

/* Bindless surface */

5571

assert(devinfo->ver >= 9);

5572

inst->desc = desc | GFX9_BTI_BINDLESS;

5573

inst->src[0] = brw_imm_ud(0);

5574

5575

/* We assume that the driver provided the handle in the top 20 bits so

5576

* we can use the surface handle directly as the extended descriptor.

5577

5578

inst->src[1] = retype(surface_handle, BRW_REGISTER_TYPE_UD);

5579

} else {

5580

inst->desc = desc;

5581

const fs_builder ubld = bld.exec_all().group(1, 0);

5582

fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD);

5583

ubld.AND(tmp, surface, brw_imm_ud(0xff));

5584

inst->src[0] = component(tmp, 0);

5585

inst->src[1] = brw_imm_ud(0); /* ex_desc */

5586

}

5587

}

5588

5589

static void

5590

lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)

5591

{

5592

const intel_device_info *devinfo = bld.shader->devinfo;

5593

5594

/* Get the logical send arguments. */

5595

const fs_reg &addr = inst->src[SURFACE_LOGICAL_SRC_ADDRESS];

5596

const fs_reg &src = inst->src[SURFACE_LOGICAL_SRC_DATA];

5597

const fs_reg &surface = inst->src[SURFACE_LOGICAL_SRC_SURFACE];

5598

const fs_reg &surface_handle = inst->src[SURFACE_LOGICAL_SRC_SURFACE_HANDLE];

5599

const UNUSED fs_reg &dims = inst->src[SURFACE_LOGICAL_SRC_IMM_DIMS];

5600

const fs_reg &arg = inst->src[SURFACE_LOGICAL_SRC_IMM_ARG];

5601

const fs_reg &allow_sample_mask =

5602

inst->src[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK];

5603

assert(arg.file == IMM);

5604

assert(allow_sample_mask.file == IMM);

5605

5606

/* Calculate the total number of components of the payload. */

5607

const unsigned addr_sz = inst->components_read(SURFACE_LOGICAL_SRC_ADDRESS);

5608

const unsigned src_sz = inst->components_read(SURFACE_LOGICAL_SRC_DATA);

5609

5610

const bool is_typed_access =

5611

inst->opcode == SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL ||

5612

inst->opcode == SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL ||

5613

inst->opcode == SHADER_OPCODE_TYPED_ATOMIC_LOGICAL;

5614

5615

const bool is_surface_access = is_typed_access ||

5616

inst->opcode == SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL ||

5617

inst->opcode == SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL ||

5618

inst->opcode == SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL;

5619

5620

const bool is_stateless =

5621

surface.file == IMM && (surface.ud == BRW_BTI_STATELESS ||

5622

surface.ud == GFX8_BTI_STATELESS_NON_COHERENT);

5623

5624

const bool has_side_effects = inst->has_side_effects();

5625

5626

fs_reg sample_mask = allow_sample_mask.ud ? sample_mask_reg(bld) :

5627

fs_reg(brw_imm_d(0xffff));

5628

5629

/* From the BDW PRM Volume 7, page 147:

5630

5631

* "For the Data Cache Data Port*, the header must be present for the

5632

* following message types: [...] Typed read/write/atomics"

5633

5634

* Earlier generations have a similar wording. Because of this restriction

5635

* we don't attempt to implement sample masks via predication for such

5636

* messages prior to Gfx9, since we have to provide a header anyway. On

5637

* Gfx11+ the header has been removed so we can only use predication.

5638

5639

* For all stateless A32 messages, we also need a header

5640

5641

fs_reg header;

5642

if ((devinfo->ver < 9 && is_typed_access) || is_stateless) {

5643

fs_builder ubld = bld.exec_all().group(8, 0);

5644

header = ubld.vgrf(BRW_REGISTER_TYPE_UD);

5645

if (is_stateless) {

5646

assert(!is_surface_access);

5647

ubld.emit(SHADER_OPCODE_SCRATCH_HEADER, header);

5648

} else {

5649

ubld.MOV(header, brw_imm_d(0));

5650

if (is_surface_access)

5651

ubld.group(1, 0).MOV(component(header, 7), sample_mask);

5652

}

5653

}

5654

const unsigned header_sz = header.file != BAD_FILE ? 1 : 0;

5655

5656

fs_reg payload, payload2;

5657

unsigned mlen, ex_mlen = 0;

5658

if (devinfo->ver >= 9 &&

5659

(src.file == BAD_FILE || header.file == BAD_FILE)) {

5660

/* We have split sends on gfx9 and above */

5661

if (header.file == BAD_FILE) {

5662

payload = bld.move_to_vgrf(addr, addr_sz);

5663

payload2 = bld.move_to_vgrf(src, src_sz);

5664

mlen = addr_sz * (inst->exec_size / 8);

5665

ex_mlen = src_sz * (inst->exec_size / 8);

5666

} else {

5667

assert(src.file == BAD_FILE);

5668

payload = header;

5669

payload2 = bld.move_to_vgrf(addr, addr_sz);

5670

mlen = header_sz;

5671

ex_mlen = addr_sz * (inst->exec_size / 8);

5672

}

5673

} else {

5674

/* Allocate space for the payload. */

5675

const unsigned sz = header_sz + addr_sz + src_sz;

5676

payload = bld.vgrf(BRW_REGISTER_TYPE_UD, sz);

5677

fs_reg *const components = new fs_reg[sz];

5678

unsigned n = 0;

5679

5680

/* Construct the payload. */

5681

if (header.file != BAD_FILE)

5682

components[n++] = header;

5683

5684

for (unsigned i = 0; i < addr_sz; i++)

5685

components[n++] = offset(addr, bld, i);

5686

5687

for (unsigned i = 0; i < src_sz; i++)

5688

components[n++] = offset(src, bld, i);

5689

5690

bld.LOAD_PAYLOAD(payload, components, sz, header_sz);

5691

mlen = header_sz + (addr_sz + src_sz) * inst->exec_size / 8;

5692

5693

delete[] components;

5694

}

5695

5696

/* Predicate the instruction on the sample mask if no header is

5697

* provided.

5698

5699

if ((header.file == BAD_FILE || !is_surface_access) &&

5700

sample_mask.file != BAD_FILE && sample_mask.file != IMM)

5701

emit_predicate_on_sample_mask(bld, inst);

5702

5703

uint32_t sfid;

5704

switch (inst->opcode) {

5705

case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:

5706

case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:

5707

/* Byte scattered opcodes go through the normal data cache */

5708

sfid = GFX7_SFID_DATAPORT_DATA_CACHE;

5709

break;

5710

5711

case SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL:

5712

case SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL:

5713

sfid = devinfo->ver >= 7 ? GFX7_SFID_DATAPORT_DATA_CACHE :

5714

devinfo->ver >= 6 ? GFX6_SFID_DATAPORT_RENDER_CACHE :

5715

BRW_DATAPORT_READ_TARGET_RENDER_CACHE;

5716

break;

5717

5718

case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:

5719

case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:

5720

case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:

5721

case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:

5722

/* Untyped Surface messages go through the data cache but the SFID value

5723

* changed on Haswell.

5724

5725

sfid = (devinfo->verx10 >= 75 ?

5726

HSW_SFID_DATAPORT_DATA_CACHE_1 :

5727

GFX7_SFID_DATAPORT_DATA_CACHE);

5728

break;

5729

5730

case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:

5731

case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:

5732

case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:

5733

/* Typed surface messages go through the render cache on IVB and the

5734

* data cache on HSW+.

5735

5736

sfid = (devinfo->verx10 >= 75 ?

5737

HSW_SFID_DATAPORT_DATA_CACHE_1 :

5738

GFX6_SFID_DATAPORT_RENDER_CACHE);

5739

break;

5740

5741

default:

5742

unreachable("Unsupported surface opcode");

5743

}

5744

5745

uint32_t desc;

5746

switch (inst->opcode) {

5747

case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:

5748

desc = brw_dp_untyped_surface_rw_desc(devinfo, inst->exec_size,

5749

arg.ud, /* num_channels */

5750

false /* write */);

5751

break;

5752

5753

case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:

5754

desc = brw_dp_untyped_surface_rw_desc(devinfo, inst->exec_size,

5755

arg.ud, /* num_channels */

5756

true /* write */);

5757

break;

5758

5759

case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:

5760

desc = brw_dp_byte_scattered_rw_desc(devinfo, inst->exec_size,

5761

arg.ud, /* bit_size */

5762

false /* write */);

5763

break;

5764

5765

case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:

5766

desc = brw_dp_byte_scattered_rw_desc(devinfo, inst->exec_size,

5767

arg.ud, /* bit_size */

5768

true /* write */);

5769

break;

5770

5771

case SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL:

5772

assert(arg.ud == 32); /* bit_size */

5773

desc = brw_dp_dword_scattered_rw_desc(devinfo, inst->exec_size,

5774

false /* write */);

5775

break;

5776

5777

case SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL:

5778

assert(arg.ud == 32); /* bit_size */

5779

desc = brw_dp_dword_scattered_rw_desc(devinfo, inst->exec_size,

5780

true /* write */);

5781

break;

5782

5783

case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:

5784

desc = brw_dp_untyped_atomic_desc(devinfo, inst->exec_size,

5785

arg.ud, /* atomic_op */

5786

!inst->dst.is_null());

5787

break;

5788

5789

case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:

5790

desc = brw_dp_untyped_atomic_float_desc(devinfo, inst->exec_size,

5791

arg.ud, /* atomic_op */

5792

!inst->dst.is_null());

5793

break;

5794

5795

case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:

5796

desc = brw_dp_typed_surface_rw_desc(devinfo, inst->exec_size, inst->group,

5797

arg.ud, /* num_channels */

5798

false /* write */);

5799

break;

5800

5801

case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:

5802

desc = brw_dp_typed_surface_rw_desc(devinfo, inst->exec_size, inst->group,

5803

arg.ud, /* num_channels */

5804

true /* write */);

5805

break;

5806

5807

case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:

5808

desc = brw_dp_typed_atomic_desc(devinfo, inst->exec_size, inst->group,

5809

arg.ud, /* atomic_op */

5810

!inst->dst.is_null());

5811

break;

5812

5813

default:

5814

unreachable("Unknown surface logical instruction");

5815

}

5816

5817

/* Update the original instruction. */

5818

inst->opcode = SHADER_OPCODE_SEND;

5819

inst->mlen = mlen;

5820

inst->ex_mlen = ex_mlen;

5821

inst->header_size = header_sz;

5822

inst->send_has_side_effects = has_side_effects;

5823

inst->send_is_volatile = !has_side_effects;

5824

5825

/* Set up SFID and descriptors */

5826

inst->sfid = sfid;

5827

setup_surface_descriptors(bld, inst, desc, surface, surface_handle);

5828

5829

inst->resize_sources(4);

5830

5831

/* Finally, the payload */

5832

inst->src[2] = payload;

5833

inst->src[3] = payload2;

5834

}

5835

5836

static enum lsc_opcode

5837

brw_atomic_op_to_lsc_atomic_op(unsigned op)

5838

{

5839

switch(op) {

5840

case BRW_AOP_AND:

5841

return LSC_OP_ATOMIC_AND;

5842

case BRW_AOP_OR:

5843

return LSC_OP_ATOMIC_OR;

5844

case BRW_AOP_XOR:

5845

return LSC_OP_ATOMIC_XOR;

5846

case BRW_AOP_MOV:

5847

return LSC_OP_ATOMIC_STORE;

5848

case BRW_AOP_INC:

5849

return LSC_OP_ATOMIC_INC;

5850

case BRW_AOP_DEC:

5851

return LSC_OP_ATOMIC_DEC;

5852

case BRW_AOP_ADD:

5853

return LSC_OP_ATOMIC_ADD;

5854

case BRW_AOP_SUB:

5855

return LSC_OP_ATOMIC_SUB;

5856

case BRW_AOP_IMAX:

5857

return LSC_OP_ATOMIC_MAX;

5858

case BRW_AOP_IMIN:

5859

return LSC_OP_ATOMIC_MIN;

5860

case BRW_AOP_UMAX:

5861

return LSC_OP_ATOMIC_UMAX;

5862

case BRW_AOP_UMIN:

5863

return LSC_OP_ATOMIC_UMIN;

5864

case BRW_AOP_CMPWR:

5865

return LSC_OP_ATOMIC_CMPXCHG;

5866

default:

5867

assert(false);

5868

unreachable("invalid atomic opcode");

5869

}

5870

}

5871

5872

static enum lsc_opcode

5873

brw_atomic_op_to_lsc_fatomic_op(uint32_t aop)

5874

{

5875

switch(aop) {

5876

case BRW_AOP_FMAX:

5877

return LSC_OP_ATOMIC_FMAX;

5878

case BRW_AOP_FMIN:

5879

return LSC_OP_ATOMIC_FMIN;

5880

case BRW_AOP_FCMPWR:

5881

return LSC_OP_ATOMIC_FCMPXCHG;

5882

case BRW_AOP_FADD:

5883

return LSC_OP_ATOMIC_FADD;

5884

default:

5885

unreachable("Unsupported float atomic opcode");

5886

}

5887

}

5888

5889

static enum lsc_data_size

5890

lsc_bits_to_data_size(unsigned bit_size)

5891

{

5892

switch (bit_size / 8) {

5893

case 1: return LSC_DATA_SIZE_D8U32;

5894

case 2: return LSC_DATA_SIZE_D16U32;

5895

case 4: return LSC_DATA_SIZE_D32;

5896

case 8: return LSC_DATA_SIZE_D64;

5897

default:

5898

unreachable("Unsupported data size.");

5899

}

5900

}

5901

5902

static void

5903

lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst)

5904

{

5905

const intel_device_info *devinfo = bld.shader->devinfo;

5906

assert(devinfo->has_lsc);

5907

5908

/* Get the logical send arguments. */

5909

const fs_reg addr = inst->src[SURFACE_LOGICAL_SRC_ADDRESS];

5910

const fs_reg src = inst->src[SURFACE_LOGICAL_SRC_DATA];

5911

const fs_reg surface = inst->src[SURFACE_LOGICAL_SRC_SURFACE];

5912

const fs_reg surface_handle = inst->src[SURFACE_LOGICAL_SRC_SURFACE_HANDLE];

5913

const UNUSED fs_reg &dims = inst->src[SURFACE_LOGICAL_SRC_IMM_DIMS];

5914

const fs_reg arg = inst->src[SURFACE_LOGICAL_SRC_IMM_ARG];

5915

const fs_reg allow_sample_mask =

5916

inst->src[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK];

5917

assert(arg.file == IMM);

5918

assert(allow_sample_mask.file == IMM);

5919

5920

/* Calculate the total number of components of the payload. */

5921

const unsigned addr_sz = inst->components_read(SURFACE_LOGICAL_SRC_ADDRESS);

5922

const unsigned src_comps = inst->components_read(SURFACE_LOGICAL_SRC_DATA);

5923

const unsigned src_sz = type_sz(src.type);

5924

5925

const bool has_side_effects = inst->has_side_effects();

5926

5927

unsigned ex_mlen = 0;

5928

fs_reg payload, payload2;

5929

payload = bld.move_to_vgrf(addr, addr_sz);

5930

if (src.file != BAD_FILE) {

5931

payload2 = bld.move_to_vgrf(src, src_comps);

5932

ex_mlen = (src_comps * src_sz * inst->exec_size) / REG_SIZE;

5933

}

5934

5935

/* Predicate the instruction on the sample mask if needed */

5936

fs_reg sample_mask = allow_sample_mask.ud ? sample_mask_reg(bld) :

5937

fs_reg(brw_imm_d(0xffff));

5938

if (sample_mask.file != BAD_FILE && sample_mask.file != IMM)

5939

emit_predicate_on_sample_mask(bld, inst);

5940

5941

if (surface.file == IMM && surface.ud == GFX7_BTI_SLM)

5942

inst->sfid = GFX12_SFID_SLM;

5943

else

5944

inst->sfid = GFX12_SFID_UGM;

5945

5946

/* We must have exactly one of surface and surface_handle */

5947

assert((surface.file == BAD_FILE) != (surface_handle.file == BAD_FILE));

5948

5949

enum lsc_addr_surface_type surf_type;

5950

if (surface_handle.file != BAD_FILE)

5951

surf_type = LSC_ADDR_SURFTYPE_BSS;

5952

else if (surface.file == IMM && surface.ud == GFX7_BTI_SLM)

5953

surf_type = LSC_ADDR_SURFTYPE_FLAT;

5954

else

5955

surf_type = LSC_ADDR_SURFTYPE_BTI;

5956

5957

switch (inst->opcode) {

5958

case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:

5959

inst->desc = lsc_msg_desc(devinfo, LSC_OP_LOAD_CMASK, inst->exec_size,

5960

surf_type, LSC_ADDR_SIZE_A32,

5961

1 /* num_coordinates */,

5962

LSC_DATA_SIZE_D32, arg.ud /* num_channels */,

5963

false /* transpose */,

5964

LSC_CACHE_LOAD_L1STATE_L3MOCS,

5965

true /* has_dest */);

5966

break;

5967

case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:

5968

inst->desc = lsc_msg_desc(devinfo, LSC_OP_STORE_CMASK, inst->exec_size,

5969

surf_type, LSC_ADDR_SIZE_A32,

5970

1 /* num_coordinates */,

5971

LSC_DATA_SIZE_D32, arg.ud /* num_channels */,

5972

false /* transpose */,

5973

LSC_CACHE_STORE_L1STATE_L3MOCS,

5974

false /* has_dest */);

5975

break;

5976

case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:

5977

case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL: {

5978

/* Bspec: Atomic instruction -> Cache section:

5979

5980

* Atomic messages are always forced to "un-cacheable" in the L1

5981

* cache.

5982

5983

enum lsc_opcode opcode =

5984

inst->opcode == SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL ?

5985

brw_atomic_op_to_lsc_fatomic_op(arg.ud) :

5986

brw_atomic_op_to_lsc_atomic_op(arg.ud);

5987

inst->desc = lsc_msg_desc(devinfo, opcode, inst->exec_size,

5988

surf_type, LSC_ADDR_SIZE_A32,

5989

1 /* num_coordinates */,

5990

lsc_bits_to_data_size(src_sz * 8),

5991

1 /* num_channels */,

5992

false /* transpose */,

5993

LSC_CACHE_STORE_L1UC_L3WB,

5994

!inst->dst.is_null());

5995

break;

5996

}

5997

case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:

5998

inst->desc = lsc_msg_desc(devinfo, LSC_OP_LOAD, inst->exec_size,

5999

surf_type, LSC_ADDR_SIZE_A32,

6000

1 /* num_coordinates */,

6001

lsc_bits_to_data_size(arg.ud),

6002

1 /* num_channels */,

6003

false /* transpose */,

6004

LSC_CACHE_LOAD_L1STATE_L3MOCS,

6005

true /* has_dest */);

6006

break;

6007

case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:

6008

inst->desc = lsc_msg_desc(devinfo, LSC_OP_STORE, inst->exec_size,

6009

surf_type, LSC_ADDR_SIZE_A32,

6010

1 /* num_coordinates */,

6011

lsc_bits_to_data_size(arg.ud),

6012

1 /* num_channels */,

6013

false /* transpose */,

6014

LSC_CACHE_STORE_L1STATE_L3MOCS,

6015

false /* has_dest */);

6016

break;

6017

default:

6018

unreachable("Unknown surface logical instruction");

6019

}

6020

6021

inst->src[0] = brw_imm_ud(0);

6022

6023

/* Set up extended descriptors */

6024

switch (surf_type) {

6025

case LSC_ADDR_SURFTYPE_FLAT:

6026

inst->src[1] = brw_imm_ud(0);

6027

break;

6028

case LSC_ADDR_SURFTYPE_BSS:

6029

/* We assume that the driver provided the handle in the top 20 bits so

6030

* we can use the surface handle directly as the extended descriptor.

6031

6032

inst->src[1] = retype(surface_handle, BRW_REGISTER_TYPE_UD);

6033

break;

6034

case LSC_ADDR_SURFTYPE_BTI:

6035

if (surface.file == IMM) {

6036

inst->src[1] = brw_imm_ud(lsc_bti_ex_desc(devinfo, surface.ud));

6037

} else {

6038

const fs_builder ubld = bld.exec_all().group(1, 0);

6039

fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD);

6040

ubld.SHL(tmp, surface, brw_imm_ud(24));

6041

inst->src[1] = component(tmp, 0);

6042

}

6043

break;

6044

default:

6045

unreachable("Unknown surface type");

6046

}

6047

6048

/* Update the original instruction. */

6049

inst->opcode = SHADER_OPCODE_SEND;

6050

inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc);

6051

inst->ex_mlen = ex_mlen;

6052

inst->header_size = 0;

6053

inst->send_has_side_effects = has_side_effects;

6054

inst->send_is_volatile = !has_side_effects;

6055

6056

inst->resize_sources(4);

6057

6058

/* Finally, the payload */

6059

inst->src[2] = payload;

6060

inst->src[3] = payload2;

6061

}

6062

6063

static void

6064

lower_surface_block_logical_send(const fs_builder &bld, fs_inst *inst)

6065

{

6066

const intel_device_info *devinfo = bld.shader->devinfo;

6067

assert(devinfo->ver >= 9);

6068

6069

/* Get the logical send arguments. */

6070

const fs_reg &addr = inst->src[SURFACE_LOGICAL_SRC_ADDRESS];

6071

const fs_reg &src = inst->src[SURFACE_LOGICAL_SRC_DATA];

6072

const fs_reg &surface = inst->src[SURFACE_LOGICAL_SRC_SURFACE];

6073

const fs_reg &surface_handle = inst->src[SURFACE_LOGICAL_SRC_SURFACE_HANDLE];

6074

const fs_reg &arg = inst->src[SURFACE_LOGICAL_SRC_IMM_ARG];

6075

assert(arg.file == IMM);

6076

assert(inst->src[SURFACE_LOGICAL_SRC_IMM_DIMS].file == BAD_FILE);

6077

assert(inst->src[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK].file == BAD_FILE);

6078

6079

const bool is_stateless =

6080

surface.file == IMM && (surface.ud == BRW_BTI_STATELESS ||

6081

surface.ud == GFX8_BTI_STATELESS_NON_COHERENT);

6082

6083

const bool has_side_effects = inst->has_side_effects();

6084

6085

const bool align_16B =

6086

inst->opcode != SHADER_OPCODE_UNALIGNED_OWORD_BLOCK_READ_LOGICAL;

6087

6088

const bool write = inst->opcode == SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL;

6089

6090

/* The address is stored in the header. See MH_A32_GO and MH_BTS_GO. */

6091

fs_builder ubld = bld.exec_all().group(8, 0);

6092

fs_reg header = ubld.vgrf(BRW_REGISTER_TYPE_UD);

6093

6094

if (is_stateless)

6095

ubld.emit(SHADER_OPCODE_SCRATCH_HEADER, header);

6096

else

6097

ubld.MOV(header, brw_imm_d(0));

6098

6099

/* Address in OWord units when aligned to OWords. */

6100

if (align_16B)

6101

ubld.group(1, 0).SHR(component(header, 2), addr, brw_imm_ud(4));

6102

else

6103

ubld.group(1, 0).MOV(component(header, 2), addr);

6104

6105

fs_reg data;

6106

unsigned ex_mlen = 0;

6107

if (write) {

6108

const unsigned src_sz = inst->components_read(SURFACE_LOGICAL_SRC_DATA);

6109

data = retype(bld.move_to_vgrf(src, src_sz), BRW_REGISTER_TYPE_UD);

6110

ex_mlen = src_sz * type_sz(src.type) * inst->exec_size / REG_SIZE;

6111

}

6112

6113

inst->opcode = SHADER_OPCODE_SEND;

6114

inst->mlen = 1;

6115

inst->ex_mlen = ex_mlen;

6116

inst->header_size = 1;

6117

inst->send_has_side_effects = has_side_effects;

6118

inst->send_is_volatile = !has_side_effects;

6119

6120

inst->sfid = GFX7_SFID_DATAPORT_DATA_CACHE;

6121

6122

const uint32_t desc = brw_dp_oword_block_rw_desc(devinfo, align_16B,

6123

arg.ud, write);

6124

setup_surface_descriptors(bld, inst, desc, surface, surface_handle);

6125

6126

inst->resize_sources(4);

6127

6128

inst->src[2] = header;

6129

inst->src[3] = data;

6130

}

6131

6132

static fs_reg

6133

emit_a64_oword_block_header(const fs_builder &bld, const fs_reg &addr)

6134

{

6135

const fs_builder ubld = bld.exec_all().group(8, 0);

6136

fs_reg header = ubld.vgrf(BRW_REGISTER_TYPE_UD);

6137

ubld.MOV(header, brw_imm_ud(0));

6138

6139

/* Use a 2-wide MOV to fill out the address */

6140

assert(type_sz(addr.type) == 8 && addr.stride == 0);

6141

fs_reg addr_vec2 = addr;

6142

addr_vec2.type = BRW_REGISTER_TYPE_UD;

6143

addr_vec2.stride = 1;

6144

ubld.group(2, 0).MOV(header, addr_vec2);

6145

6146

return header;

6147

}

6148

6149

static void

6150

emit_fragment_mask(const fs_builder &bld, fs_inst *inst)

6151

{

6152

assert(inst->src[A64_LOGICAL_ENABLE_HELPERS].file == IMM);

6153

const bool enable_helpers = inst->src[A64_LOGICAL_ENABLE_HELPERS].ud;

6154

6155

/* If we're a fragment shader, we have to predicate with the sample mask to

6156

* avoid helper invocations to avoid helper invocations in instructions

6157

* with side effects, unless they are explicitly required.

6158

6159

* There are also special cases when we actually want to run on helpers

6160

* (ray queries).

6161

6162

assert(bld.shader->stage == MESA_SHADER_FRAGMENT);

6163

if (enable_helpers)

6164

emit_predicate_on_vector_mask(bld, inst);

6165

else if (inst->has_side_effects())

6166

emit_predicate_on_sample_mask(bld, inst);

6167

}

6168

6169

static void

6170

lower_lsc_a64_logical_send(const fs_builder &bld, fs_inst *inst)

6171

{

6172

const intel_device_info *devinfo = bld.shader->devinfo;

6173

6174

/* Get the logical send arguments. */

6175

const fs_reg &addr = inst->src[A64_LOGICAL_ADDRESS];

6176

const fs_reg &src = inst->src[A64_LOGICAL_SRC];

6177

const unsigned src_sz = type_sz(src.type);

6178

6179

const unsigned src_comps = inst->components_read(1);

6180

assert(inst->src[A64_LOGICAL_ARG].file == IMM);

6181

const unsigned arg = inst->src[A64_LOGICAL_ARG].ud;

6182

const bool has_side_effects = inst->has_side_effects();

6183

6184

fs_reg payload = retype(bld.move_to_vgrf(addr, 1), BRW_REGISTER_TYPE_UD);

6185

fs_reg payload2 = retype(bld.move_to_vgrf(src, src_comps),

6186

BRW_REGISTER_TYPE_UD);

6187

unsigned ex_mlen = src_comps * src_sz * inst->exec_size / REG_SIZE;

6188

6189

switch (inst->opcode) {

6190

case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL:

6191

inst->desc = lsc_msg_desc(devinfo, LSC_OP_LOAD_CMASK, inst->exec_size,

6192

LSC_ADDR_SURFTYPE_FLAT, LSC_ADDR_SIZE_A64,

6193

1 /* num_coordinates */,

6194

LSC_DATA_SIZE_D32, arg /* num_channels */,

6195

false /* transpose */,

6196

LSC_CACHE_LOAD_L1STATE_L3MOCS,

6197

true /* has_dest */);

6198

break;

6199

case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL:

6200

inst->desc = lsc_msg_desc(devinfo, LSC_OP_STORE_CMASK, inst->exec_size,

6201

LSC_ADDR_SURFTYPE_FLAT, LSC_ADDR_SIZE_A64,

6202

1 /* num_coordinates */,

6203

LSC_DATA_SIZE_D32, arg /* num_channels */,

6204

false /* transpose */,

6205

LSC_CACHE_STORE_L1STATE_L3MOCS,

6206

false /* has_dest */);

6207

break;

6208

case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL:

6209

inst->desc = lsc_msg_desc(devinfo, LSC_OP_LOAD, inst->exec_size,

6210

LSC_ADDR_SURFTYPE_FLAT, LSC_ADDR_SIZE_A64,

6211

1 /* num_coordinates */,

6212

lsc_bits_to_data_size(arg),

6213

1 /* num_channels */,

6214

false /* transpose */,

6215

LSC_CACHE_LOAD_L1STATE_L3MOCS,

6216

true /* has_dest */);

6217

break;

6218

case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL:

6219

inst->desc = lsc_msg_desc(devinfo, LSC_OP_STORE, inst->exec_size,

6220

LSC_ADDR_SURFTYPE_FLAT, LSC_ADDR_SIZE_A64,

6221

1 /* num_coordinates */,

6222

lsc_bits_to_data_size(arg),

6223

1 /* num_channels */,

6224

false /* transpose */,

6225

LSC_CACHE_STORE_L1STATE_L3MOCS,

6226

false /* has_dest */);

6227

break;

6228

case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:

6229

case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL:

6230

case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL: {

6231

case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL:

6232

case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL:

6233

case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT64_LOGICAL:

6234

/* Bspec: Atomic instruction -> Cache section:

6235

6236

* Atomic messages are always forced to "un-cacheable" in the L1

6237

* cache.

6238

6239

enum lsc_opcode opcode =

6240

(inst->opcode == SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL ||

6241

inst->opcode == SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL ||

6242

inst->opcode == SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL) ?

6243

brw_atomic_op_to_lsc_atomic_op(arg) :

6244

brw_atomic_op_to_lsc_fatomic_op(arg);

6245

inst->desc = lsc_msg_desc(devinfo, opcode, inst->exec_size,

6246

LSC_ADDR_SURFTYPE_FLAT, LSC_ADDR_SIZE_A64,

6247

1 /* num_coordinates */,

6248

lsc_bits_to_data_size(src_sz * 8),

6249

1 /* num_channels */,

6250

false /* transpose */,

6251

LSC_CACHE_STORE_L1UC_L3WB,

6252

!inst->dst.is_null());

6253

break;

6254

}

6255

default:

6256

unreachable("Unknown A64 logical instruction");

6257

}

6258

6259

if (bld.shader->stage == MESA_SHADER_FRAGMENT)

6260

emit_fragment_mask(bld, inst);

6261

6262

/* Update the original instruction. */

6263

inst->opcode = SHADER_OPCODE_SEND;

6264

inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc);

6265

inst->ex_mlen = ex_mlen;

6266

inst->header_size = 0;

6267

inst->send_has_side_effects = has_side_effects;

6268

inst->send_is_volatile = !has_side_effects;

6269

6270

/* Set up SFID and descriptors */

6271

inst->sfid = GFX12_SFID_UGM;

6272

inst->resize_sources(4);

6273

inst->src[0] = brw_imm_ud(0); /* desc */

6274

inst->src[1] = brw_imm_ud(0); /* ex_desc */

6275

inst->src[2] = payload;

6276

inst->src[3] = payload2;

6277

}

6278

6279

static void

6280

lower_a64_logical_send(const fs_builder &bld, fs_inst *inst)

6281

{

6282

const intel_device_info *devinfo = bld.shader->devinfo;

6283

6284

const fs_reg &addr = inst->src[A64_LOGICAL_ADDRESS];

6285

const fs_reg &src = inst->src[A64_LOGICAL_SRC];

6286

const unsigned src_comps = inst->components_read(1);

6287

assert(inst->src[A64_LOGICAL_ARG].file == IMM);

6288

const unsigned arg = inst->src[A64_LOGICAL_ARG].ud;

6289

const bool has_side_effects = inst->has_side_effects();

6290

6291

fs_reg payload, payload2;

6292

unsigned mlen, ex_mlen = 0, header_size = 0;

6293

if (inst->opcode == SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL ||

6294

inst->opcode == SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL ||

6295

inst->opcode == SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL) {

6296

assert(devinfo->ver >= 9);

6297

6298

/* OWORD messages only take a scalar address in a header */

6299

mlen = 1;

6300

header_size = 1;

6301

payload = emit_a64_oword_block_header(bld, addr);

6302

6303

if (inst->opcode == SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL) {

6304

ex_mlen = src_comps * type_sz(src.type) * inst->exec_size / REG_SIZE;

6305

payload2 = retype(bld.move_to_vgrf(src, src_comps),

6306

BRW_REGISTER_TYPE_UD);

6307

}

6308

} else if (devinfo->ver >= 9) {

6309

/* On Skylake and above, we have SENDS */

6310

mlen = 2 * (inst->exec_size / 8);

6311

ex_mlen = src_comps * type_sz(src.type) * inst->exec_size / REG_SIZE;

6312

payload = retype(bld.move_to_vgrf(addr, 1), BRW_REGISTER_TYPE_UD);

6313

payload2 = retype(bld.move_to_vgrf(src, src_comps),

6314

BRW_REGISTER_TYPE_UD);

6315

} else {

6316

/* Add two because the address is 64-bit */

6317

const unsigned dwords = 2 + src_comps;

6318

mlen = dwords * (inst->exec_size / 8);

6319

6320

fs_reg sources[5];

6321

6322

sources[0] = addr;

6323

6324

for (unsigned i = 0; i < src_comps; i++)

6325

sources[1 + i] = offset(src, bld, i);

6326

6327

payload = bld.vgrf(BRW_REGISTER_TYPE_UD, dwords);

6328

bld.LOAD_PAYLOAD(payload, sources, 1 + src_comps, 0);

6329

}

6330

6331

uint32_t desc;

6332

switch (inst->opcode) {

6333

case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL:

6334

desc = brw_dp_a64_untyped_surface_rw_desc(devinfo, inst->exec_size,

6335

arg, /* num_channels */

6336

false /* write */);

6337

break;

6338

6339

case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL:

6340

desc = brw_dp_a64_untyped_surface_rw_desc(devinfo, inst->exec_size,

6341

arg, /* num_channels */

6342

true /* write */);

6343

break;

6344

6345

case SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL:

6346

desc = brw_dp_a64_oword_block_rw_desc(devinfo,

6347

true, /* align_16B */

6348

arg, /* num_dwords */

6349

false /* write */);

6350

break;

6351

6352

case SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL:

6353

desc = brw_dp_a64_oword_block_rw_desc(devinfo,

6354

false, /* align_16B */

6355

arg, /* num_dwords */

6356

false /* write */);

6357

break;

6358

6359

case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL:

6360

desc = brw_dp_a64_oword_block_rw_desc(devinfo,

6361

true, /* align_16B */

6362

arg, /* num_dwords */

6363

true /* write */);

6364

break;

6365

6366

case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL:

6367

desc = brw_dp_a64_byte_scattered_rw_desc(devinfo, inst->exec_size,

6368

arg, /* bit_size */

6369

false /* write */);

6370

break;

6371

6372

case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL:

6373

desc = brw_dp_a64_byte_scattered_rw_desc(devinfo, inst->exec_size,

6374

arg, /* bit_size */

6375

true /* write */);

6376

break;

6377

6378

case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:

6379

desc = brw_dp_a64_untyped_atomic_desc(devinfo, inst->exec_size, 32,

6380

arg, /* atomic_op */

6381

!inst->dst.is_null());

6382

break;

6383

6384

case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL:

6385

desc = brw_dp_a64_untyped_atomic_desc(devinfo, inst->exec_size, 16,

6386

arg, /* atomic_op */

6387

!inst->dst.is_null());

6388

break;

6389

6390

case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL:

6391

desc = brw_dp_a64_untyped_atomic_desc(devinfo, inst->exec_size, 64,

6392

arg, /* atomic_op */

6393

!inst->dst.is_null());

6394

break;

6395

6396

case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL:

6397

desc = brw_dp_a64_untyped_atomic_float_desc(devinfo, inst->exec_size,

6398

16, /* bit_size */

6399

arg, /* atomic_op */

6400

!inst->dst.is_null());

6401

break;

6402

6403

case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL:

6404

desc = brw_dp_a64_untyped_atomic_float_desc(devinfo, inst->exec_size,

6405

32, /* bit_size */

6406

arg, /* atomic_op */

6407

!inst->dst.is_null());

6408

break;

6409

6410

default:

6411

unreachable("Unknown A64 logical instruction");

6412

}

6413

6414

if (bld.shader->stage == MESA_SHADER_FRAGMENT)

6415

emit_fragment_mask(bld, inst);

6416

6417

/* Update the original instruction. */

6418

inst->opcode = SHADER_OPCODE_SEND;

6419

inst->mlen = mlen;

6420

inst->ex_mlen = ex_mlen;

6421

inst->header_size = header_size;

6422

inst->send_has_side_effects = has_side_effects;

6423

inst->send_is_volatile = !has_side_effects;

6424

6425

/* Set up SFID and descriptors */

6426

inst->sfid = HSW_SFID_DATAPORT_DATA_CACHE_1;

6427

inst->desc = desc;

6428

inst->resize_sources(4);

6429

inst->src[0] = brw_imm_ud(0); /* desc */

6430

inst->src[1] = brw_imm_ud(0); /* ex_desc */

6431

inst->src[2] = payload;

6432

inst->src[3] = payload2;

6433

}

6434

6435

static void

6436

lower_lsc_varying_pull_constant_logical_send(const fs_builder &bld,

6437

fs_inst *inst)

6438

{

6439

const intel_device_info *devinfo = bld.shader->devinfo;

6440

ASSERTED const brw_compiler *compiler = bld.shader->compiler;

6441

6442

fs_reg index = inst->src[0];

6443

6444

/* We are switching the instruction from an ALU-like instruction to a

6445

* send-from-grf instruction. Since sends can't handle strides or

6446

* source modifiers, we have to make a copy of the offset source.

6447

6448

fs_reg ubo_offset = bld.move_to_vgrf(inst->src[1], 1);

6449

6450

assert(inst->src[2].file == BRW_IMMEDIATE_VALUE);

6451

unsigned alignment = inst->src[2].ud;

6452

6453

inst->opcode = SHADER_OPCODE_SEND;

6454

inst->sfid = GFX12_SFID_UGM;

6455

inst->resize_sources(3);

6456

inst->src[0] = brw_imm_ud(0);

6457

6458

if (index.file == IMM) {

6459

inst->src[1] = brw_imm_ud(lsc_bti_ex_desc(devinfo, index.ud));

6460

} else {

6461

const fs_builder ubld = bld.exec_all().group(1, 0);

6462

fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD);

6463

ubld.SHL(tmp, index, brw_imm_ud(24));

6464

inst->src[1] = component(tmp, 0);

6465

}

6466

6467

assert(!compiler->indirect_ubos_use_sampler);

6468

6469

inst->src[2] = ubo_offset; /* payload */

6470

if (alignment >= 4) {

6471

inst->desc = lsc_msg_desc(devinfo, LSC_OP_LOAD_CMASK, inst->exec_size,

6472

LSC_ADDR_SURFTYPE_BTI, LSC_ADDR_SIZE_A32,

6473

1 /* num_coordinates */,

6474

LSC_DATA_SIZE_D32,

6475

4 /* num_channels */,

6476

false /* transpose */,

6477

LSC_CACHE_LOAD_L1STATE_L3MOCS,

6478

true /* has_dest */);

6479

inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc);

6480

} else {

6481

inst->desc = lsc_msg_desc(devinfo, LSC_OP_LOAD, inst->exec_size,

6482

LSC_ADDR_SURFTYPE_BTI, LSC_ADDR_SIZE_A32,

6483

1 /* num_coordinates */,

6484

LSC_DATA_SIZE_D32,

6485

1 /* num_channels */,

6486

false /* transpose */,

6487

LSC_CACHE_LOAD_L1STATE_L3MOCS,

6488

true /* has_dest */);

6489

inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc);

6490

/* The byte scattered messages can only read one dword at a time so

6491

* we have to duplicate the message 4 times to read the full vec4.

6492

* Hopefully, dead code will clean up the mess if some of them aren't

6493

* needed.

6494

6495

assert(inst->size_written == 16 * inst->exec_size);

6496

inst->size_written /= 4;

6497

for (unsigned c = 1; c < 4; c++) {

6498

/* Emit a copy of the instruction because we're about to modify

6499

* it. Because this loop starts at 1, we will emit copies for the

6500

* first 3 and the final one will be the modified instruction.

6501

6502

bld.emit(*inst);

6503

6504

/* Offset the source */

6505

inst->src[2] = bld.vgrf(BRW_REGISTER_TYPE_UD);

6506

bld.ADD(inst->src[2], ubo_offset, brw_imm_ud(c * 4));

6507

6508

/* Offset the destination */

6509

inst->dst = offset(inst->dst, bld, 1);

6510

}

6511

}

6512

}

6513

6514

static void

6515

lower_varying_pull_constant_logical_send(const fs_builder &bld, fs_inst *inst)

6516

{

6517

const intel_device_info *devinfo = bld.shader->devinfo;

6518

const brw_compiler *compiler = bld.shader->compiler;

6519

6520

if (devinfo->ver >= 7) {

6521

fs_reg index = inst->src[0];

6522

/* We are switching the instruction from an ALU-like instruction to a

6523

* send-from-grf instruction. Since sends can't handle strides or

6524

* source modifiers, we have to make a copy of the offset source.

6525

6526

fs_reg ubo_offset = bld.vgrf(BRW_REGISTER_TYPE_UD);

6527

bld.MOV(ubo_offset, inst->src[1]);

6528

6529

assert(inst->src[2].file == BRW_IMMEDIATE_VALUE);

6530

unsigned alignment = inst->src[2].ud;

6531

6532

inst->opcode = SHADER_OPCODE_SEND;

6533

inst->mlen = inst->exec_size / 8;

6534

inst->resize_sources(3);

6535

6536

if (index.file == IMM) {

6537

inst->desc = index.ud & 0xff;

6538

inst->src[0] = brw_imm_ud(0);

6539

} else {

6540

inst->desc = 0;

6541

const fs_builder ubld = bld.exec_all().group(1, 0);

6542

fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD);

6543

ubld.AND(tmp, index, brw_imm_ud(0xff));

6544

inst->src[0] = component(tmp, 0);

6545

}

6546

inst->src[1] = brw_imm_ud(0); /* ex_desc */

6547

inst->src[2] = ubo_offset; /* payload */

6548

6549

if (compiler->indirect_ubos_use_sampler) {

6550

const unsigned simd_mode =

6551

inst->exec_size <= 8 ? BRW_SAMPLER_SIMD_MODE_SIMD8 :

6552

BRW_SAMPLER_SIMD_MODE_SIMD16;

6553

6554

inst->sfid = BRW_SFID_SAMPLER;

6555

inst->desc |= brw_sampler_desc(devinfo, 0, 0,

6556

GFX5_SAMPLER_MESSAGE_SAMPLE_LD,

6557

simd_mode, 0);

6558

} else if (alignment >= 4) {

6559

inst->sfid = (devinfo->verx10 >= 75 ?

6560

HSW_SFID_DATAPORT_DATA_CACHE_1 :

6561

GFX7_SFID_DATAPORT_DATA_CACHE);

6562

inst->desc |= brw_dp_untyped_surface_rw_desc(devinfo, inst->exec_size,

6563

4, /* num_channels */

6564

false /* write */);

6565

} else {

6566

inst->sfid = GFX7_SFID_DATAPORT_DATA_CACHE;

6567

inst->desc |= brw_dp_byte_scattered_rw_desc(devinfo, inst->exec_size,

6568

32, /* bit_size */

6569

false /* write */);

6570

/* The byte scattered messages can only read one dword at a time so

6571

* we have to duplicate the message 4 times to read the full vec4.

6572

* Hopefully, dead code will clean up the mess if some of them aren't

6573

* needed.

6574

6575

assert(inst->size_written == 16 * inst->exec_size);

6576

inst->size_written /= 4;

6577

for (unsigned c = 1; c < 4; c++) {

6578

/* Emit a copy of the instruction because we're about to modify

6579

* it. Because this loop starts at 1, we will emit copies for the

6580

* first 3 and the final one will be the modified instruction.

6581

6582

bld.emit(*inst);

6583

6584

/* Offset the source */

6585

inst->src[2] = bld.vgrf(BRW_REGISTER_TYPE_UD);

6586

bld.ADD(inst->src[2], ubo_offset, brw_imm_ud(c * 4));

6587

6588

/* Offset the destination */

6589

inst->dst = offset(inst->dst, bld, 1);

6590

}

6591

}

6592

} else {

6593

const fs_reg payload(MRF, FIRST_PULL_LOAD_MRF(devinfo->ver),

6594

BRW_REGISTER_TYPE_UD);

6595

6596

bld.MOV(byte_offset(payload, REG_SIZE), inst->src[1]);

6597

6598

inst->opcode = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GFX4;

6599

inst->resize_sources(1);

6600

inst->base_mrf = payload.nr;

6601

inst->header_size = 1;

6602

inst->mlen = 1 + inst->exec_size / 8;

6603

}

6604

}

6605

6606

static void

6607

lower_math_logical_send(const fs_builder &bld, fs_inst *inst)

6608

{

6609

assert(bld.shader->devinfo->ver < 6);

6610

6611

inst->base_mrf = 2;

6612

inst->mlen = inst->sources * inst->exec_size / 8;

6613

6614

if (inst->sources > 1) {

6615

/* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13

6616

* "Message Payload":

6617

6618

* "Operand0[7]. For the INT DIV functions, this operand is the

6619

* denominator."

6620

* ...

6621

* "Operand1[7]. For the INT DIV functions, this operand is the

6622

* numerator."

6623

6624

const bool is_int_div = inst->opcode != SHADER_OPCODE_POW;

6625

const fs_reg src0 = is_int_div ? inst->src[1] : inst->src[0];

6626

const fs_reg src1 = is_int_div ? inst->src[0] : inst->src[1];

6627

6628

inst->resize_sources(1);

6629

inst->src[0] = src0;

6630

6631

assert(inst->exec_size == 8);

6632

bld.MOV(fs_reg(MRF, inst->base_mrf + 1, src1.type), src1);

6633

}

6634

}

6635

6636

static void

6637

lower_btd_logical_send(const fs_builder &bld, fs_inst *inst)

6638

{

6639

const intel_device_info *devinfo = bld.shader->devinfo;

6640

fs_reg global_addr = inst->src[0];

6641

const fs_reg &btd_record = inst->src[1];

6642

6643

const unsigned mlen = 2;

6644

const fs_builder ubld = bld.exec_all().group(8, 0);

6645

fs_reg header = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2);

6646

6647

ubld.MOV(header, brw_imm_ud(0));

6648

switch (inst->opcode) {

6649

case SHADER_OPCODE_BTD_SPAWN_LOGICAL:

6650

assert(type_sz(global_addr.type) == 8 && global_addr.stride == 0);

6651

global_addr.type = BRW_REGISTER_TYPE_UD;

6652

global_addr.stride = 1;

6653

ubld.group(2, 0).MOV(header, global_addr);

6654

break;

6655

6656

case SHADER_OPCODE_BTD_RETIRE_LOGICAL:

6657

/* The bottom bit is the Stack ID release bit */

6658

ubld.group(1, 0).MOV(header, brw_imm_ud(1));

6659

break;

6660

6661

default:

6662

unreachable("Invalid BTD message");

6663

}

6664

6665

/* Stack IDs are always in R1 regardless of whether we're coming from a

6666

* bindless shader or a regular compute shader.

6667

6668

fs_reg stack_ids =

6669

retype(byte_offset(header, REG_SIZE), BRW_REGISTER_TYPE_UW);

6670

bld.MOV(stack_ids, retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UW));

6671

6672

unsigned ex_mlen = 0;

6673

fs_reg payload;

6674

if (inst->opcode == SHADER_OPCODE_BTD_SPAWN_LOGICAL) {

6675

ex_mlen = 2 * (inst->exec_size / 8);

6676

payload = bld.move_to_vgrf(btd_record, 1);

6677

} else {

6678

assert(inst->opcode == SHADER_OPCODE_BTD_RETIRE_LOGICAL);

6679

/* All these messages take a BTD and things complain if we don't provide

6680

* one for RETIRE. However, it shouldn't ever actually get used so fill

6681

* it with zero.

6682

6683

ex_mlen = 2 * (inst->exec_size / 8);

6684

payload = bld.move_to_vgrf(brw_imm_uq(0), 1);

6685

}

6686

6687

/* Update the original instruction. */

6688

inst->opcode = SHADER_OPCODE_SEND;

6689

inst->mlen = mlen;

6690

inst->ex_mlen = ex_mlen;

6691

inst->header_size = 0; /* HW docs require has_header = false */

6692

inst->send_has_side_effects = true;

6693

inst->send_is_volatile = false;

6694

6695

/* Set up SFID and descriptors */

6696

inst->sfid = GEN_RT_SFID_BINDLESS_THREAD_DISPATCH;

6697

inst->desc = brw_btd_spawn_desc(devinfo, inst->exec_size,

6698

GEN_RT_BTD_MESSAGE_SPAWN);

6699

inst->resize_sources(4);

6700

inst->src[0] = brw_imm_ud(0); /* desc */

6701

inst->src[1] = brw_imm_ud(0); /* ex_desc */

6702

inst->src[2] = header;

6703

inst->src[3] = payload;

6704

}

6705

6706

static void

6707

lower_trace_ray_logical_send(const fs_builder &bld, fs_inst *inst)

6708

{

6709

const intel_device_info *devinfo = bld.shader->devinfo;

6710

const fs_reg &globals_addr = inst->src[RT_LOGICAL_SRC_GLOBALS];

6711

const fs_reg &bvh_level =

6712

inst->src[RT_LOGICAL_SRC_BVH_LEVEL].file == BRW_IMMEDIATE_VALUE ?

6713

inst->src[RT_LOGICAL_SRC_BVH_LEVEL] :

6714

bld.move_to_vgrf(inst->src[RT_LOGICAL_SRC_BVH_LEVEL],

6715

inst->components_read(RT_LOGICAL_SRC_BVH_LEVEL));

6716

const fs_reg &trace_ray_control =

6717

inst->src[RT_LOGICAL_SRC_TRACE_RAY_CONTROL].file == BRW_IMMEDIATE_VALUE ?

6718

inst->src[RT_LOGICAL_SRC_TRACE_RAY_CONTROL] :

6719

bld.move_to_vgrf(inst->src[RT_LOGICAL_SRC_TRACE_RAY_CONTROL],

6720

inst->components_read(RT_LOGICAL_SRC_TRACE_RAY_CONTROL));

6721

const fs_reg &synchronous_src = inst->src[RT_LOGICAL_SRC_SYNCHRONOUS];

6722

assert(synchronous_src.file == BRW_IMMEDIATE_VALUE);

6723

const bool synchronous = synchronous_src.ud;

6724

6725

const unsigned mlen = 1;

6726

const fs_builder ubld = bld.exec_all().group(8, 0);

6727

fs_reg header = ubld.vgrf(BRW_REGISTER_TYPE_UD);

6728

ubld.MOV(header, brw_imm_ud(0));

6729

ubld.group(2, 0).MOV(header, retype(globals_addr, BRW_REGISTER_TYPE_UD));

6730

if (synchronous)

6731

ubld.group(1, 0).MOV(byte_offset(header, 16), brw_imm_ud(synchronous));

6732

6733

const unsigned ex_mlen = inst->exec_size / 8;

6734

fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD);

6735

if (bvh_level.file == BRW_IMMEDIATE_VALUE &&

6736

trace_ray_control.file == BRW_IMMEDIATE_VALUE) {

6737

bld.MOV(payload, brw_imm_ud(SET_BITS(trace_ray_control.ud, 9, 8) |

6738

(bvh_level.ud & 0x7)));

6739

} else {

6740

bld.SHL(payload, trace_ray_control, brw_imm_ud(8));

6741

bld.OR(payload, payload, bvh_level);

6742

}

6743

6744

/* When doing synchronous traversal, the HW implicitly computes the

6745

* stack_id using the following formula :

6746

6747

* EUID[3:0] & THREAD_ID[2:0] & SIMD_LANE_ID[3:0]

6748

6749

* Only in the asynchronous case we need to set the stack_id given from the

6750

* payload register.

6751

6752

if (!synchronous) {

6753

bld.AND(subscript(payload, BRW_REGISTER_TYPE_UW, 1),

6754

retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UW),

6755

brw_imm_uw(0x7ff));

6756

}

6757

6758

/* Update the original instruction. */

6759

inst->opcode = SHADER_OPCODE_SEND;

6760

inst->mlen = mlen;

6761

inst->ex_mlen = ex_mlen;

6762

inst->header_size = 0; /* HW docs require has_header = false */

6763

inst->send_has_side_effects = true;

6764

inst->send_is_volatile = false;

6765

6766

/* Set up SFID and descriptors */

6767

inst->sfid = GEN_RT_SFID_RAY_TRACE_ACCELERATOR;

6768

inst->desc = brw_rt_trace_ray_desc(devinfo, inst->exec_size);

6769

inst->resize_sources(4);

6770

inst->src[0] = brw_imm_ud(0); /* desc */

6771

inst->src[1] = brw_imm_ud(0); /* ex_desc */

6772

inst->src[2] = header;

6773

inst->src[3] = payload;

6774

}

6775

6776

bool

6777

fs_visitor::lower_logical_sends()

6778

{

6779

bool progress = false;

6780

6781

foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {

6782

const fs_builder ibld(this, block, inst);

6783

6784

switch (inst->opcode) {

6785

case FS_OPCODE_FB_WRITE_LOGICAL:

6786

assert(stage == MESA_SHADER_FRAGMENT);

6787

lower_fb_write_logical_send(ibld, inst,

6788

brw_wm_prog_data(prog_data),

6789

(const brw_wm_prog_key *)key,

6790

payload);

6791

break;

6792

6793

case FS_OPCODE_FB_READ_LOGICAL:

6794

lower_fb_read_logical_send(ibld, inst);

6795

break;

6796

6797

case SHADER_OPCODE_TEX_LOGICAL:

6798

lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TEX);

6799

break;

6800

6801

case SHADER_OPCODE_TXD_LOGICAL:

6802

lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXD);

6803

break;

6804

6805

case SHADER_OPCODE_TXF_LOGICAL:

6806

lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXF);

6807

break;

6808

6809

case SHADER_OPCODE_TXL_LOGICAL:

6810

lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXL);

6811

break;

6812

6813

case SHADER_OPCODE_TXS_LOGICAL:

6814

lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXS);

6815

break;

6816

6817

case SHADER_OPCODE_IMAGE_SIZE_LOGICAL:

6818

lower_sampler_logical_send(ibld, inst,

6819

SHADER_OPCODE_IMAGE_SIZE_LOGICAL);

6820

break;

6821

6822

case FS_OPCODE_TXB_LOGICAL:

6823

lower_sampler_logical_send(ibld, inst, FS_OPCODE_TXB);

6824

break;

6825

6826

case SHADER_OPCODE_TXF_CMS_LOGICAL:

6827

lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXF_CMS);

6828

break;

6829

6830

case SHADER_OPCODE_TXF_CMS_W_LOGICAL:

6831

case SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL:

6832

lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXF_CMS_W);

6833

break;

6834

6835

case SHADER_OPCODE_TXF_UMS_LOGICAL:

6836

lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXF_UMS);

6837

break;

6838

6839

case SHADER_OPCODE_TXF_MCS_LOGICAL:

6840

lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXF_MCS);

6841

break;

6842

6843

case SHADER_OPCODE_LOD_LOGICAL:

6844

lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_LOD);

6845

break;

6846

6847

case SHADER_OPCODE_TG4_LOGICAL:

6848

lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TG4);

6849

break;

6850

6851

case SHADER_OPCODE_TG4_OFFSET_LOGICAL:

6852

lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TG4_OFFSET);

6853

break;

6854

6855

case SHADER_OPCODE_SAMPLEINFO_LOGICAL:

6856

lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_SAMPLEINFO);

6857

break;

6858

6859

case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:

6860

case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:

6861

case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:

6862

case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:

6863

case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:

6864

case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:

6865

if (devinfo->has_lsc) {

6866

lower_lsc_surface_logical_send(ibld, inst);

6867

break;

6868

}

6869

case SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL:

6870

case SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL:

6871

case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:

6872

case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:

6873

case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:

6874

lower_surface_logical_send(ibld, inst);

6875

break;

6876

6877

case SHADER_OPCODE_OWORD_BLOCK_READ_LOGICAL:

6878

case SHADER_OPCODE_UNALIGNED_OWORD_BLOCK_READ_LOGICAL:

6879

case SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL:

6880

lower_surface_block_logical_send(ibld, inst);

6881

break;

6882

6883

case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL:

6884

case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL:

6885

case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL:

6886

case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL:

6887

case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:

6888

case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL:

6889

case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL:

6890

case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL:

6891

case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL:

6892

case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT64_LOGICAL:

6893

if (devinfo->has_lsc) {

6894

lower_lsc_a64_logical_send(ibld, inst);

6895

break;

6896

}

6897

case SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL:

6898

case SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL:

6899

case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL:

6900

lower_a64_logical_send(ibld, inst);

6901

break;

6902

6903

case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:

6904

if (devinfo->has_lsc && !compiler->indirect_ubos_use_sampler)

6905

lower_lsc_varying_pull_constant_logical_send(ibld, inst);

6906

else

6907

lower_varying_pull_constant_logical_send(ibld, inst);

6908

break;

6909

6910

case SHADER_OPCODE_RCP:

6911

case SHADER_OPCODE_RSQ:

6912

case SHADER_OPCODE_SQRT:

6913

case SHADER_OPCODE_EXP2:

6914

case SHADER_OPCODE_LOG2:

6915

case SHADER_OPCODE_SIN:

6916

case SHADER_OPCODE_COS:

6917

case SHADER_OPCODE_POW:

6918

case SHADER_OPCODE_INT_QUOTIENT:

6919

case SHADER_OPCODE_INT_REMAINDER:

6920

/* The math opcodes are overloaded for the send-like and

6921

* expression-like instructions which seems kind of icky. Gfx6+ has

6922

* a native (but rather quirky) MATH instruction so we don't need to

6923

* do anything here. On Gfx4-5 we'll have to lower the Gfx6-like

6924

* logical instructions (which we can easily recognize because they

6925

* have mlen = 0) into send-like virtual instructions.

6926

6927

if (devinfo->ver < 6 && inst->mlen == 0) {

6928

lower_math_logical_send(ibld, inst);

6929

break;

6930

6931

} else {

6932

continue;

6933

}

6934

6935

case SHADER_OPCODE_BTD_SPAWN_LOGICAL:

6936

case SHADER_OPCODE_BTD_RETIRE_LOGICAL:

6937

lower_btd_logical_send(ibld, inst);

6938

break;

6939

6940

case RT_OPCODE_TRACE_RAY_LOGICAL:

6941

lower_trace_ray_logical_send(ibld, inst);

6942

break;

6943

6944

default:

6945

continue;

6946

}

6947

6948

progress = true;

6949

}

6950

6951

if (progress)

6952

invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);

6953

6954

return progress;

6955

}

6956

6957

static bool

6958

is_mixed_float_with_fp32_dst(const fs_inst *inst)

6959

{

6960

/* This opcode sometimes uses :W type on the source even if the operand is

6961

* a :HF, because in gfx7 there is no support for :HF, and thus it uses :W.

6962

6963

if (inst->opcode == BRW_OPCODE_F16TO32)

6964

return true;

6965

6966

if (inst->dst.type != BRW_REGISTER_TYPE_F)

6967

return false;

6968

6969

for (int i = 0; i < inst->sources; i++) {

6970

if (inst->src[i].type == BRW_REGISTER_TYPE_HF)

6971

return true;

6972

}

6973

6974

return false;

6975

}

6976

6977

static bool

6978

is_mixed_float_with_packed_fp16_dst(const fs_inst *inst)

6979

{

6980

/* This opcode sometimes uses :W type on the destination even if the

6981

* destination is a :HF, because in gfx7 there is no support for :HF, and

6982

* thus it uses :W.

6983

6984

if (inst->opcode == BRW_OPCODE_F32TO16 &&

6985

inst->dst.stride == 1)

6986

return true;

6987

6988

if (inst->dst.type != BRW_REGISTER_TYPE_HF ||

6989

inst->dst.stride != 1)

6990

return false;

6991

6992

for (int i = 0; i < inst->sources; i++) {

6993

if (inst->src[i].type == BRW_REGISTER_TYPE_F)

6994

return true;

6995

}

6996

6997

return false;

6998

}

6999

7000

/**

7001

* Get the closest allowed SIMD width for instruction \p inst accounting for

7002

* some common regioning and execution control restrictions that apply to FPU

7003

* instructions. These restrictions don't necessarily have any relevance to

7004

* instructions not executed by the FPU pipeline like extended math, control

7005

* flow or send message instructions.

7006

7007

* For virtual opcodes it's really up to the instruction -- In some cases

7008

* (e.g. where a virtual instruction unrolls into a simple sequence of FPU

7009

* instructions) it may simplify virtual instruction lowering if we can

7010

* enforce FPU-like regioning restrictions already on the virtual instruction,

7011

* in other cases (e.g. virtual send-like instructions) this may be

7012

* excessively restrictive.

7013

7014

static unsigned

7015

get_fpu_lowered_simd_width(const struct intel_device_info *devinfo,

7016

const fs_inst *inst)

7017

{

7018

/* Maximum execution size representable in the instruction controls. */

7019

unsigned max_width = MIN2(32, inst->exec_size);

7020

7021

/* According to the PRMs:

7022

* "A. In Direct Addressing mode, a source cannot span more than 2

7023

* adjacent GRF registers.

7024

* B. A destination cannot span more than 2 adjacent GRF registers."

7025

7026

* Look for the source or destination with the largest register region

7027

* which is the one that is going to limit the overall execution size of

7028

* the instruction due to this rule.

7029

7030

unsigned reg_count = DIV_ROUND_UP(inst->size_written, REG_SIZE);

7031

7032

for (unsigned i = 0; i < inst->sources; i++)

7033

reg_count = MAX2(reg_count, DIV_ROUND_UP(inst->size_read(i), REG_SIZE));

7034

7035

/* Calculate the maximum execution size of the instruction based on the

7036

* factor by which it goes over the hardware limit of 2 GRFs.

7037

7038

if (reg_count > 2)

7039

max_width = MIN2(max_width, inst->exec_size / DIV_ROUND_UP(reg_count, 2));

7040

7041

/* According to the IVB PRMs:

7042

* "When destination spans two registers, the source MUST span two

7043

* registers. The exception to the above rule:

7044

7045

* - When source is scalar, the source registers are not incremented.

7046

* - When source is packed integer Word and destination is packed

7047

* integer DWord, the source register is not incremented but the

7048

* source sub register is incremented."

7049

7050

* The hardware specs from Gfx4 to Gfx7.5 mention similar regioning

7051

* restrictions. The code below intentionally doesn't check whether the

7052

* destination type is integer because empirically the hardware doesn't

7053

* seem to care what the actual type is as long as it's dword-aligned.

7054

7055

if (devinfo->ver < 8) {

7056

for (unsigned i = 0; i < inst->sources; i++) {

7057

/* IVB implements DF scalars as <0;2,1> regions. */

7058

const bool is_scalar_exception = is_uniform(inst->src[i]) &&

7059

(devinfo->platform == INTEL_PLATFORM_HSW || type_sz(inst->src[i].type) != 8);

7060

const bool is_packed_word_exception =

7061

type_sz(inst->dst.type) == 4 && inst->dst.stride == 1 &&

7062

type_sz(inst->src[i].type) == 2 && inst->src[i].stride == 1;

7063

7064

/* We check size_read(i) against size_written instead of REG_SIZE

7065

* because we want to properly handle SIMD32. In SIMD32, you can end

7066

* up with writes to 4 registers and a source that reads 2 registers

7067

* and we may still need to lower all the way to SIMD8 in that case.

7068

7069

if (inst->size_written > REG_SIZE &&

7070

inst->size_read(i) != 0 &&

7071

inst->size_read(i) < inst->size_written &&

7072

!is_scalar_exception && !is_packed_word_exception) {

7073

const unsigned reg_count = DIV_ROUND_UP(inst->size_written, REG_SIZE);

7074

max_width = MIN2(max_width, inst->exec_size / reg_count);

7075

}

7076

}

7077

}

7078

7079

if (devinfo->ver < 6) {

7080

/* From the G45 PRM, Volume 4 Page 361:

7081

7082

* "Operand Alignment Rule: With the exceptions listed below, a

7083

* source/destination operand in general should be aligned to even

7084

* 256-bit physical register with a region size equal to two 256-bit

7085

* physical registers."

7086

7087

* Normally we enforce this by allocating virtual registers to the

7088

* even-aligned class. But we need to handle payload registers.

7089

7090

for (unsigned i = 0; i < inst->sources; i++) {

7091

if (inst->src[i].file == FIXED_GRF && (inst->src[i].nr & 1) &&

7092

inst->size_read(i) > REG_SIZE) {

7093

max_width = MIN2(max_width, 8);

7094

}

7095

}

7096

}

7097

7098

/* From the IVB PRMs:

7099

* "When an instruction is SIMD32, the low 16 bits of the execution mask

7100

* are applied for both halves of the SIMD32 instruction. If different

7101

* execution mask channels are required, split the instruction into two

7102

* SIMD16 instructions."

7103

7104

* There is similar text in the HSW PRMs. Gfx4-6 don't even implement

7105

* 32-wide control flow support in hardware and will behave similarly.

7106

7107

if (devinfo->ver < 8 && !inst->force_writemask_all)

7108

max_width = MIN2(max_width, 16);

7109

7110

/* From the IVB PRMs (applies to HSW too):

7111

* "Instructions with condition modifiers must not use SIMD32."

7112

7113

* From the BDW PRMs (applies to later hardware too):

7114

* "Ternary instruction with condition modifiers must not use SIMD32."

7115

7116

if (inst->conditional_mod && (devinfo->ver < 8 || inst->is_3src(devinfo)))

7117

max_width = MIN2(max_width, 16);

7118

7119

/* From the IVB PRMs (applies to other devices that don't have the

7120

* intel_device_info::supports_simd16_3src flag set):

7121

* "In Align16 access mode, SIMD16 is not allowed for DW operations and

7122

* SIMD8 is not allowed for DF operations."

7123

7124

if (inst->is_3src(devinfo) && !devinfo->supports_simd16_3src)

7125

max_width = MIN2(max_width, inst->exec_size / reg_count);

7126

7127

/* Pre-Gfx8 EUs are hardwired to use the QtrCtrl+1 (where QtrCtrl is

7128

* the 8-bit quarter of the execution mask signals specified in the

7129

* instruction control fields) for the second compressed half of any

7130

* single-precision instruction (for double-precision instructions

7131

* it's hardwired to use NibCtrl+1, at least on HSW), which means that

7132

* the EU will apply the wrong execution controls for the second

7133

* sequential GRF write if the number of channels per GRF is not exactly

7134

* eight in single-precision mode (or four in double-float mode).

7135

7136

* In this situation we calculate the maximum size of the split

7137

* instructions so they only ever write to a single register.

7138

7139

if (devinfo->ver < 8 && inst->size_written > REG_SIZE &&

7140

!inst->force_writemask_all) {

7141

const unsigned channels_per_grf = inst->exec_size /

7142

DIV_ROUND_UP(inst->size_written, REG_SIZE);

7143

const unsigned exec_type_size = get_exec_type_size(inst);

7144

assert(exec_type_size);

7145

7146

/* The hardware shifts exactly 8 channels per compressed half of the

7147

* instruction in single-precision mode and exactly 4 in double-precision.

7148

7149

if (channels_per_grf != (exec_type_size == 8 ? 4 : 8))

7150

max_width = MIN2(max_width, channels_per_grf);

7151

7152

/* Lower all non-force_writemask_all DF instructions to SIMD4 on IVB/BYT

7153

* because HW applies the same channel enable signals to both halves of

7154

* the compressed instruction which will be just wrong under

7155

* non-uniform control flow.

7156

7157

if (devinfo->verx10 == 70 &&

7158

(exec_type_size == 8 || type_sz(inst->dst.type) == 8))

7159

max_width = MIN2(max_width, 4);

7160

}

7161

7162

/* From the SKL PRM, Special Restrictions for Handling Mixed Mode

7163

* Float Operations:

7164

7165

* "No SIMD16 in mixed mode when destination is f32. Instruction

7166

* execution size must be no more than 8."

7167

7168

* FIXME: the simulator doesn't seem to complain if we don't do this and

7169

* empirical testing with existing CTS tests show that they pass just fine

7170

* without implementing this, however, since our interpretation of the PRM

7171

* is that conversion MOVs between HF and F are still mixed-float

7172

* instructions (and therefore subject to this restriction) we decided to

7173

* split them to be safe. Might be useful to do additional investigation to

7174

* lift the restriction if we can ensure that it is safe though, since these

7175

* conversions are common when half-float types are involved since many

7176

* instructions do not support HF types and conversions from/to F are

7177

* required.

7178

7179

if (is_mixed_float_with_fp32_dst(inst))

7180

max_width = MIN2(max_width, 8);

7181

7182

/* From the SKL PRM, Special Restrictions for Handling Mixed Mode

7183

* Float Operations:

7184

7185

* "No SIMD16 in mixed mode when destination is packed f16 for both

7186

* Align1 and Align16."

7187

7188

if (is_mixed_float_with_packed_fp16_dst(inst))

7189

max_width = MIN2(max_width, 8);

7190

7191

/* Only power-of-two execution sizes are representable in the instruction

7192

* control fields.

7193

7194

return 1 << util_logbase2(max_width);

7195

}

7196

7197

/**

7198

* Get the maximum allowed SIMD width for instruction \p inst accounting for

7199

* various payload size restrictions that apply to sampler message

7200

* instructions.

7201

7202

* This is only intended to provide a maximum theoretical bound for the

7203

* execution size of the message based on the number of argument components

7204

* alone, which in most cases will determine whether the SIMD8 or SIMD16

7205

* variant of the message can be used, though some messages may have

7206

* additional restrictions not accounted for here (e.g. pre-ILK hardware uses

7207

* the message length to determine the exact SIMD width and argument count,

7208

* which makes a number of sampler message combinations impossible to

7209

* represent).

7210

7211

static unsigned

7212

get_sampler_lowered_simd_width(const struct intel_device_info *devinfo,

7213

const fs_inst *inst)

7214

{

7215

/* If we have a min_lod parameter on anything other than a simple sample

7216

* message, it will push it over 5 arguments and we have to fall back to

7217

* SIMD8.

7218

7219

if (inst->opcode != SHADER_OPCODE_TEX &&

7220

inst->components_read(TEX_LOGICAL_SRC_MIN_LOD))

7221

return 8;

7222

7223

/* Calculate the number of coordinate components that have to be present

7224

* assuming that additional arguments follow the texel coordinates in the

7225

* message payload. On IVB+ there is no need for padding, on ILK-SNB we

7226

* need to pad to four or three components depending on the message,

7227

* pre-ILK we need to pad to at most three components.

7228

7229

const unsigned req_coord_components =

7230

(devinfo->ver >= 7 ||

7231

!inst->components_read(TEX_LOGICAL_SRC_COORDINATE)) ? 0 :

7232

(devinfo->ver >= 5 && inst->opcode != SHADER_OPCODE_TXF_LOGICAL &&

7233

inst->opcode != SHADER_OPCODE_TXF_CMS_LOGICAL) ? 4 :

7234

7235

7236

/* On Gfx9+ the LOD argument is for free if we're able to use the LZ

7237

* variant of the TXL or TXF message.

7238

7239

const bool implicit_lod = devinfo->ver >= 9 &&

7240

(inst->opcode == SHADER_OPCODE_TXL ||

7241

inst->opcode == SHADER_OPCODE_TXF) &&

7242

inst->src[TEX_LOGICAL_SRC_LOD].is_zero();

7243

7244

/* Calculate the total number of argument components that need to be passed

7245

* to the sampler unit.

7246

7247

const unsigned num_payload_components =

7248

MAX2(inst->components_read(TEX_LOGICAL_SRC_COORDINATE),

7249

req_coord_components) +

7250

inst->components_read(TEX_LOGICAL_SRC_SHADOW_C) +

7251

(implicit_lod ? 0 : inst->components_read(TEX_LOGICAL_SRC_LOD)) +

7252

inst->components_read(TEX_LOGICAL_SRC_LOD2) +

7253

inst->components_read(TEX_LOGICAL_SRC_SAMPLE_INDEX) +

7254

(inst->opcode == SHADER_OPCODE_TG4_OFFSET_LOGICAL ?

7255

inst->components_read(TEX_LOGICAL_SRC_TG4_OFFSET) : 0) +

7256

inst->components_read(TEX_LOGICAL_SRC_MCS);

7257

7258

/* SIMD16 messages with more than five arguments exceed the maximum message

7259

* size supported by the sampler, regardless of whether a header is

7260

* provided or not.

7261

7262

return MIN2(inst->exec_size,

7263

num_payload_components > MAX_SAMPLER_MESSAGE_SIZE / 2 ? 8 : 16);

7264

}

7265

7266

/**

7267

* Get the closest native SIMD width supported by the hardware for instruction

7268

* \p inst. The instruction will be left untouched by

7269

* fs_visitor::lower_simd_width() if the returned value is equal to the

7270

* original execution size.

7271

7272

static unsigned

7273

get_lowered_simd_width(const struct intel_device_info *devinfo,

7274

const fs_inst *inst)

7275

{

7276

switch (inst->opcode) {

7277

case BRW_OPCODE_MOV:

7278

case BRW_OPCODE_SEL:

7279

case BRW_OPCODE_NOT:

7280

case BRW_OPCODE_AND:

7281

case BRW_OPCODE_OR:

7282

case BRW_OPCODE_XOR:

7283

case BRW_OPCODE_SHR:

7284

case BRW_OPCODE_SHL:

7285

case BRW_OPCODE_ASR:

7286

case BRW_OPCODE_ROR:

7287

case BRW_OPCODE_ROL:

7288

case BRW_OPCODE_CMPN:

7289

case BRW_OPCODE_CSEL:

7290

case BRW_OPCODE_F32TO16:

7291

case BRW_OPCODE_F16TO32:

7292

case BRW_OPCODE_BFREV:

7293

case BRW_OPCODE_BFE:

7294

case BRW_OPCODE_ADD:

7295

case BRW_OPCODE_MUL:

7296

case BRW_OPCODE_AVG:

7297

case BRW_OPCODE_FRC:

7298

case BRW_OPCODE_RNDU:

7299

case BRW_OPCODE_RNDD:

7300

case BRW_OPCODE_RNDE:

7301

case BRW_OPCODE_RNDZ:

7302

case BRW_OPCODE_LZD:

7303

case BRW_OPCODE_FBH:

7304

case BRW_OPCODE_FBL:

7305

case BRW_OPCODE_CBIT:

7306

case BRW_OPCODE_SAD2:

7307

case BRW_OPCODE_MAD:

7308

case BRW_OPCODE_LRP:

7309

case BRW_OPCODE_ADD3:

7310

case FS_OPCODE_PACK:

7311

case SHADER_OPCODE_SEL_EXEC:

7312

case SHADER_OPCODE_CLUSTER_BROADCAST:

7313

case SHADER_OPCODE_MOV_RELOC_IMM:

7314

return get_fpu_lowered_simd_width(devinfo, inst);

7315

7316

case BRW_OPCODE_CMP: {

7317

/* The Ivybridge/BayTrail WaCMPInstFlagDepClearedEarly workaround says that

7318

* when the destination is a GRF the dependency-clear bit on the flag

7319

* register is cleared early.

7320

7321

* Suggested workarounds are to disable coissuing CMP instructions

7322

* or to split CMP(16) instructions into two CMP(8) instructions.

7323

7324

* We choose to split into CMP(8) instructions since disabling

7325

* coissuing would affect CMP instructions not otherwise affected by

7326

* the errata.

7327

7328

const unsigned max_width = (devinfo->verx10 == 70 &&

7329

!inst->dst.is_null() ? 8 : ~0);

7330

return MIN2(max_width, get_fpu_lowered_simd_width(devinfo, inst));

7331

}

7332

case BRW_OPCODE_BFI1:

7333

case BRW_OPCODE_BFI2:

7334

/* The Haswell WaForceSIMD8ForBFIInstruction workaround says that we

7335

* should

7336

* "Force BFI instructions to be executed always in SIMD8."

7337

7338

return MIN2(devinfo->platform == INTEL_PLATFORM_HSW ? 8 : ~0u,

7339

get_fpu_lowered_simd_width(devinfo, inst));

7340

7341

case BRW_OPCODE_IF:

7342

assert(inst->src[0].file == BAD_FILE || inst->exec_size <= 16);

7343

return inst->exec_size;

7344

7345

case SHADER_OPCODE_RCP:

7346

case SHADER_OPCODE_RSQ:

7347

case SHADER_OPCODE_SQRT:

7348

case SHADER_OPCODE_EXP2:

7349

case SHADER_OPCODE_LOG2:

7350

case SHADER_OPCODE_SIN:

7351

case SHADER_OPCODE_COS: {

7352

/* Unary extended math instructions are limited to SIMD8 on Gfx4 and

7353

* Gfx6. Extended Math Function is limited to SIMD8 with half-float.

7354

7355

if (devinfo->ver == 6 || devinfo->verx10 == 40)

7356

return MIN2(8, inst->exec_size);

7357

if (inst->dst.type == BRW_REGISTER_TYPE_HF)

7358

return MIN2(8, inst->exec_size);

7359

return MIN2(16, inst->exec_size);

7360

}

7361

7362

case SHADER_OPCODE_POW: {

7363

/* SIMD16 is only allowed on Gfx7+. Extended Math Function is limited

7364

* to SIMD8 with half-float

7365

7366

if (devinfo->ver < 7)

7367

return MIN2(8, inst->exec_size);

7368

if (inst->dst.type == BRW_REGISTER_TYPE_HF)

7369

return MIN2(8, inst->exec_size);

7370

return MIN2(16, inst->exec_size);

7371

}

7372

7373

case SHADER_OPCODE_USUB_SAT:

7374

case SHADER_OPCODE_ISUB_SAT:

7375

return get_fpu_lowered_simd_width(devinfo, inst);

7376

7377

case SHADER_OPCODE_INT_QUOTIENT:

7378

case SHADER_OPCODE_INT_REMAINDER:

7379

/* Integer division is limited to SIMD8 on all generations. */

7380

return MIN2(8, inst->exec_size);

7381

7382

case FS_OPCODE_LINTERP:

7383

case SHADER_OPCODE_GET_BUFFER_SIZE:

7384

case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:

7385

case FS_OPCODE_PACK_HALF_2x16_SPLIT:

7386

case FS_OPCODE_INTERPOLATE_AT_SAMPLE:

7387

case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:

7388

case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:

7389

return MIN2(16, inst->exec_size);

7390

7391

case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:

7392

/* Pre-ILK hardware doesn't have a SIMD8 variant of the texel fetch

7393

* message used to implement varying pull constant loads, so expand it

7394

* to SIMD16. An alternative with longer message payload length but

7395

* shorter return payload would be to use the SIMD8 sampler message that

7396

* takes (header, u, v, r) as parameters instead of (header, u).

7397

7398

return (devinfo->ver == 4 ? 16 : MIN2(16, inst->exec_size));

7399

7400

case FS_OPCODE_DDX_COARSE:

7401

case FS_OPCODE_DDX_FINE:

7402

case FS_OPCODE_DDY_COARSE:

7403

case FS_OPCODE_DDY_FINE:

7404

/* The implementation of this virtual opcode may require emitting

7405

* compressed Align16 instructions, which are severely limited on some

7406

* generations.

7407

7408

* From the Ivy Bridge PRM, volume 4 part 3, section 3.3.9 (Register

7409

* Region Restrictions):

7410

7411

* "In Align16 access mode, SIMD16 is not allowed for DW operations

7412

* and SIMD8 is not allowed for DF operations."

7413

7414

* In this context, "DW operations" means "operations acting on 32-bit

7415

* values", so it includes operations on floats.

7416

7417

* Gfx4 has a similar restriction. From the i965 PRM, section 11.5.3

7418

* (Instruction Compression -> Rules and Restrictions):

7419

7420

* "A compressed instruction must be in Align1 access mode. Align16

7421

* mode instructions cannot be compressed."

7422

7423

* Similar text exists in the g45 PRM.

7424

7425

* Empirically, compressed align16 instructions using odd register

7426

* numbers don't appear to work on Sandybridge either.

7427

7428

return (devinfo->ver == 4 || devinfo->ver == 6 ||

7429

(devinfo->verx10 == 70) ?

7430

MIN2(8, inst->exec_size) : MIN2(16, inst->exec_size));

7431

7432

case SHADER_OPCODE_MULH:

7433

/* MULH is lowered to the MUL/MACH sequence using the accumulator, which

7434

* is 8-wide on Gfx7+.

7435

7436

return (devinfo->ver >= 7 ? 8 :

7437

get_fpu_lowered_simd_width(devinfo, inst));

7438

7439

case FS_OPCODE_FB_WRITE_LOGICAL:

7440

/* Gfx6 doesn't support SIMD16 depth writes but we cannot handle them

7441

* here.

7442

7443

assert(devinfo->ver != 6 ||

7444

inst->src[FB_WRITE_LOGICAL_SRC_SRC_DEPTH].file == BAD_FILE ||

7445

inst->exec_size == 8);

7446

/* Dual-source FB writes are unsupported in SIMD16 mode. */

7447

return (inst->src[FB_WRITE_LOGICAL_SRC_COLOR1].file != BAD_FILE ?

7448

8 : MIN2(16, inst->exec_size));

7449

7450

case FS_OPCODE_FB_READ_LOGICAL:

7451

return MIN2(16, inst->exec_size);

7452

7453

case SHADER_OPCODE_TEX_LOGICAL:

7454

case SHADER_OPCODE_TXF_CMS_LOGICAL:

7455

case SHADER_OPCODE_TXF_UMS_LOGICAL:

7456

case SHADER_OPCODE_TXF_MCS_LOGICAL:

7457

case SHADER_OPCODE_LOD_LOGICAL:

7458

case SHADER_OPCODE_TG4_LOGICAL:

7459

case SHADER_OPCODE_SAMPLEINFO_LOGICAL:

7460

case SHADER_OPCODE_TXF_CMS_W_LOGICAL:

7461

case SHADER_OPCODE_TG4_OFFSET_LOGICAL:

7462

return get_sampler_lowered_simd_width(devinfo, inst);

7463

7464

/* On gfx12 parameters are fixed to 16-bit values and therefore they all

7465

* always fit regardless of the execution size.

7466

7467

case SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL:

7468

return MIN2(16, inst->exec_size);

7469

7470

case SHADER_OPCODE_TXD_LOGICAL:

7471

/* TXD is unsupported in SIMD16 mode. */

7472

return 8;

7473

7474

case SHADER_OPCODE_TXL_LOGICAL:

7475

case FS_OPCODE_TXB_LOGICAL:

7476

/* Only one execution size is representable pre-ILK depending on whether

7477

* the shadow reference argument is present.

7478

7479

if (devinfo->ver == 4)

7480

return inst->src[TEX_LOGICAL_SRC_SHADOW_C].file == BAD_FILE ? 16 : 8;

7481

else

7482

return get_sampler_lowered_simd_width(devinfo, inst);

7483

7484

case SHADER_OPCODE_TXF_LOGICAL:

7485

case SHADER_OPCODE_TXS_LOGICAL:

7486

/* Gfx4 doesn't have SIMD8 variants for the RESINFO and LD-with-LOD

7487

* messages. Use SIMD16 instead.

7488

7489

if (devinfo->ver == 4)

7490

return 16;

7491

else

7492

return get_sampler_lowered_simd_width(devinfo, inst);

7493

7494

case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:

7495

case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:

7496

case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:

7497

return 8;

7498

7499

case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:

7500

case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:

7501

case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:

7502

case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:

7503

case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:

7504

case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:

7505

case SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL:

7506

case SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL:

7507

return MIN2(16, inst->exec_size);

7508

7509

case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL:

7510

case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL:

7511

case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL:

7512

case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL:

7513

return devinfo->ver <= 8 ? 8 : MIN2(16, inst->exec_size);

7514

7515

case SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL:

7516

case SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL:

7517

case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL:

7518

assert(inst->exec_size <= 16);

7519

return inst->exec_size;

7520

7521

case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:

7522

case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL:

7523

case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL:

7524

case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL:

7525

case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL:

7526

case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT64_LOGICAL:

7527

return 8;

7528

7529

case SHADER_OPCODE_URB_READ_SIMD8:

7530

case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:

7531

case SHADER_OPCODE_URB_WRITE_SIMD8:

7532

case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:

7533

case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:

7534

case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:

7535

return MIN2(8, inst->exec_size);

7536

7537

case SHADER_OPCODE_QUAD_SWIZZLE: {

7538

const unsigned swiz = inst->src[1].ud;

7539

return (is_uniform(inst->src[0]) ?

7540

get_fpu_lowered_simd_width(devinfo, inst) :

7541

devinfo->ver < 11 && type_sz(inst->src[0].type) == 4 ? 8 :

7542

swiz == BRW_SWIZZLE_XYXY || swiz == BRW_SWIZZLE_ZWZW ? 4 :

7543

get_fpu_lowered_simd_width(devinfo, inst));

7544

}

7545

case SHADER_OPCODE_MOV_INDIRECT: {

7546

/* From IVB and HSW PRMs:

7547

7548

* "2.When the destination requires two registers and the sources are

7549

* indirect, the sources must use 1x1 regioning mode.

7550

7551

* In case of DF instructions in HSW/IVB, the exec_size is limited by

7552

* the EU decompression logic not handling VxH indirect addressing

7553

* correctly.

7554

7555

const unsigned max_size = (devinfo->ver >= 8 ? 2 : 1) * REG_SIZE;

7556

/* Prior to Broadwell, we only have 8 address subregisters. */

7557

return MIN3(devinfo->ver >= 8 ? 16 : 8,

7558

max_size / (inst->dst.stride * type_sz(inst->dst.type)),

7559

inst->exec_size);

7560

}

7561

7562

case SHADER_OPCODE_LOAD_PAYLOAD: {

7563

const unsigned reg_count =

7564

DIV_ROUND_UP(inst->dst.component_size(inst->exec_size), REG_SIZE);

7565

7566

if (reg_count > 2) {

7567

/* Only LOAD_PAYLOAD instructions with per-channel destination region

7568

* can be easily lowered (which excludes headers and heterogeneous

7569

* types).

7570

7571

assert(!inst->header_size);

7572

for (unsigned i = 0; i < inst->sources; i++)

7573

assert(type_sz(inst->dst.type) == type_sz(inst->src[i].type) ||

7574

inst->src[i].file == BAD_FILE);

7575

7576

return inst->exec_size / DIV_ROUND_UP(reg_count, 2);

7577

} else {

7578

return inst->exec_size;

7579

}

7580

}

7581

default:

7582

return inst->exec_size;

7583

}

7584

}

7585

7586

/**

7587

* Return true if splitting out the group of channels of instruction \p inst

7588

* given by lbld.group() requires allocating a temporary for the i-th source

7589

* of the lowered instruction.

7590

7591

static inline bool

7592

needs_src_copy(const fs_builder &lbld, const fs_inst *inst, unsigned i)

7593

{

7594

return !(is_periodic(inst->src[i], lbld.dispatch_width()) ||

7595

(inst->components_read(i) == 1 &&

7596

lbld.dispatch_width() <= inst->exec_size)) ||

7597

(inst->flags_written(lbld.shader->devinfo) &

7598

flag_mask(inst->src[i], type_sz(inst->src[i].type)));

7599

}

7600

7601

/**

7602

* Extract the data that would be consumed by the channel group given by

7603

* lbld.group() from the i-th source region of instruction \p inst and return

7604

* it as result in packed form.

7605

7606

static fs_reg

7607

emit_unzip(const fs_builder &lbld, fs_inst *inst, unsigned i)

7608

{

7609

assert(lbld.group() >= inst->group);

7610

7611

/* Specified channel group from the source region. */

7612

const fs_reg src = horiz_offset(inst->src[i], lbld.group() - inst->group);

7613

7614

if (needs_src_copy(lbld, inst, i)) {

7615

/* Builder of the right width to perform the copy avoiding uninitialized

7616

* data if the lowered execution size is greater than the original

7617

* execution size of the instruction.

7618

7619

const fs_builder cbld = lbld.group(MIN2(lbld.dispatch_width(),

7620

inst->exec_size), 0);

7621

const fs_reg tmp = lbld.vgrf(inst->src[i].type, inst->components_read(i));

7622

7623

for (unsigned k = 0; k < inst->components_read(i); ++k)

7624

cbld.MOV(offset(tmp, lbld, k), offset(src, inst->exec_size, k));

7625

7626

return tmp;

7627

7628

} else if (is_periodic(inst->src[i], lbld.dispatch_width())) {

7629

/* The source is invariant for all dispatch_width-wide groups of the

7630

* original region.

7631

7632

return inst->src[i];

7633

7634

} else {

7635

/* We can just point the lowered instruction at the right channel group

7636

* from the original region.

7637

7638

return src;

7639

}

7640

}

7641

7642

/**

7643

* Return true if splitting out the group of channels of instruction \p inst

7644

* given by lbld.group() requires allocating a temporary for the destination

7645

* of the lowered instruction and copying the data back to the original

7646

* destination region.

7647

7648

static inline bool

7649

needs_dst_copy(const fs_builder &lbld, const fs_inst *inst)

7650

{

7651

/* If the instruction writes more than one component we'll have to shuffle

7652

* the results of multiple lowered instructions in order to make sure that

7653

* they end up arranged correctly in the original destination region.

7654

7655

if (inst->size_written > inst->dst.component_size(inst->exec_size))

7656

return true;

7657

7658

/* If the lowered execution size is larger than the original the result of

7659

* the instruction won't fit in the original destination, so we'll have to

7660

* allocate a temporary in any case.

7661

7662

if (lbld.dispatch_width() > inst->exec_size)

7663

return true;

7664

7665

for (unsigned i = 0; i < inst->sources; i++) {

7666

/* If we already made a copy of the source for other reasons there won't

7667

* be any overlap with the destination.

7668

7669

if (needs_src_copy(lbld, inst, i))

7670

continue;

7671

7672

/* In order to keep the logic simple we emit a copy whenever the

7673

* destination region doesn't exactly match an overlapping source, which

7674

* may point at the source and destination not being aligned group by

7675

* group which could cause one of the lowered instructions to overwrite

7676

* the data read from the same source by other lowered instructions.

7677

7678

if (regions_overlap(inst->dst, inst->size_written,

7679

inst->src[i], inst->size_read(i)) &&

7680

!inst->dst.equals(inst->src[i]))

7681

return true;

7682

}

7683

7684

return false;

7685

}

7686

7687

/**

7688

* Insert data from a packed temporary into the channel group given by

7689

* lbld.group() of the destination region of instruction \p inst and return

7690

* the temporary as result. Any copy instructions that are required for

7691

* unzipping the previous value (in the case of partial writes) will be

7692

* inserted using \p lbld_before and any copy instructions required for

7693

* zipping up the destination of \p inst will be inserted using \p lbld_after.

7694

7695

static fs_reg

7696

emit_zip(const fs_builder &lbld_before, const fs_builder &lbld_after,

7697

fs_inst *inst)

7698

{

7699

assert(lbld_before.dispatch_width() == lbld_after.dispatch_width());

7700

assert(lbld_before.group() == lbld_after.group());

7701

assert(lbld_after.group() >= inst->group);

7702

7703

/* Specified channel group from the destination region. */

7704

const fs_reg dst = horiz_offset(inst->dst, lbld_after.group() - inst->group);

7705

const unsigned dst_size = inst->size_written /

7706

inst->dst.component_size(inst->exec_size);

7707

7708

if (needs_dst_copy(lbld_after, inst)) {

7709

const fs_reg tmp = lbld_after.vgrf(inst->dst.type, dst_size);

7710

7711

if (inst->predicate) {

7712

/* Handle predication by copying the original contents of

7713

* the destination into the temporary before emitting the

7714

* lowered instruction.

7715

7716

const fs_builder gbld_before =

7717

lbld_before.group(MIN2(lbld_before.dispatch_width(),

7718

inst->exec_size), 0);

7719

for (unsigned k = 0; k < dst_size; ++k) {

7720

gbld_before.MOV(offset(tmp, lbld_before, k),

7721

offset(dst, inst->exec_size, k));

7722

}

7723

}

7724

7725

const fs_builder gbld_after =

7726

lbld_after.group(MIN2(lbld_after.dispatch_width(),

7727

inst->exec_size), 0);

7728

for (unsigned k = 0; k < dst_size; ++k) {

7729

/* Use a builder of the right width to perform the copy avoiding

7730

* uninitialized data if the lowered execution size is greater than

7731

* the original execution size of the instruction.

7732

7733

gbld_after.MOV(offset(dst, inst->exec_size, k),

7734

offset(tmp, lbld_after, k));

7735

}

7736

7737

return tmp;

7738

7739

} else {

7740

/* No need to allocate a temporary for the lowered instruction, just

7741

* take the right group of channels from the original region.

7742

7743

return dst;

7744

}

7745

}

7746

7747

bool

7748

fs_visitor::lower_simd_width()

7749

{

7750

bool progress = false;

7751

7752

foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {

7753

const unsigned lower_width = get_lowered_simd_width(devinfo, inst);

7754

7755

if (lower_width != inst->exec_size) {

7756

/* Builder matching the original instruction. We may also need to

7757

* emit an instruction of width larger than the original, set the

7758

* execution size of the builder to the highest of both for now so

7759

* we're sure that both cases can be handled.

7760

7761

const unsigned max_width = MAX2(inst->exec_size, lower_width);

7762

const fs_builder ibld = bld.at(block, inst)

7763

.exec_all(inst->force_writemask_all)

7764

.group(max_width, inst->group / max_width);

7765

7766

/* Split the copies in chunks of the execution width of either the

7767

* original or the lowered instruction, whichever is lower.

7768

7769

const unsigned n = DIV_ROUND_UP(inst->exec_size, lower_width);

7770

const unsigned dst_size = inst->size_written /

7771

inst->dst.component_size(inst->exec_size);

7772

7773

assert(!inst->writes_accumulator && !inst->mlen);

7774

7775

/* Inserting the zip, unzip, and duplicated instructions in all of

7776

* the right spots is somewhat tricky. All of the unzip and any

7777

* instructions from the zip which unzip the destination prior to

7778

* writing need to happen before all of the per-group instructions

7779

* and the zip instructions need to happen after. In order to sort

7780

* this all out, we insert the unzip instructions before \p inst,

7781

* insert the per-group instructions after \p inst (i.e. before

7782

* inst->next), and insert the zip instructions before the

7783

* instruction after \p inst. Since we are inserting instructions

7784

* after \p inst, inst->next is a moving target and we need to save

7785

* it off here so that we insert the zip instructions in the right

7786

* place.

7787

7788

* Since we're inserting split instructions after after_inst, the

7789

* instructions will end up in the reverse order that we insert them.

7790

* However, certain render target writes require that the low group

7791

* instructions come before the high group. From the Ivy Bridge PRM

7792

* Vol. 4, Pt. 1, Section 3.9.11:

7793

7794

* "If multiple SIMD8 Dual Source messages are delivered by the

7795

* pixel shader thread, each SIMD8_DUALSRC_LO message must be

7796

* issued before the SIMD8_DUALSRC_HI message with the same Slot

7797

* Group Select setting."

7798

7799

* And, from Section 3.9.11.1 of the same PRM:

7800

7801

* "When SIMD32 or SIMD16 PS threads send render target writes

7802

* with multiple SIMD8 and SIMD16 messages, the following must

7803

* hold:

7804

7805

* All the slots (as described above) must have a corresponding

7806

* render target write irrespective of the slot's validity. A slot

7807

* is considered valid when at least one sample is enabled. For

7808

* example, a SIMD16 PS thread must send two SIMD8 render target

7809

* writes to cover all the slots.

7810

7811

* PS thread must send SIMD render target write messages with

7812

* increasing slot numbers. For example, SIMD16 thread has

7813

* Slot[15:0] and if two SIMD8 render target writes are used, the

7814

* first SIMD8 render target write must send Slot[7:0] and the

7815

* next one must send Slot[15:8]."

7816

7817

* In order to make low group instructions come before high group

7818

* instructions (this is required for some render target writes), we

7819

* split from the highest group to lowest.

7820

7821

exec_node *const after_inst = inst->next;

7822

for (int i = n - 1; i >= 0; i--) {

7823

/* Emit a copy of the original instruction with the lowered width.

7824

* If the EOT flag was set throw it away except for the last

7825

* instruction to avoid killing the thread prematurely.

7826

7827

fs_inst split_inst = *inst;

7828

split_inst.exec_size = lower_width;

7829

split_inst.eot = inst->eot && i == int(n - 1);

7830

7831

/* Select the correct channel enables for the i-th group, then

7832

* transform the sources and destination and emit the lowered

7833

* instruction.

7834

7835

const fs_builder lbld = ibld.group(lower_width, i);

7836

7837

for (unsigned j = 0; j < inst->sources; j++)

7838

split_inst.src[j] = emit_unzip(lbld.at(block, inst), inst, j);

7839

7840

split_inst.dst = emit_zip(lbld.at(block, inst),

7841

lbld.at(block, after_inst), inst);

7842

split_inst.size_written =

7843

split_inst.dst.component_size(lower_width) * dst_size;

7844

7845

lbld.at(block, inst->next).emit(split_inst);

7846

}

7847

7848

inst->remove(block);

7849

progress = true;

7850

}

7851

}

7852

7853

if (progress)

7854

invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);

7855

7856

return progress;

7857

}

7858

7859

/**

7860

* Transform barycentric vectors into the interleaved form expected by the PLN

7861

* instruction and returned by the Gfx7+ PI shared function.

7862

7863

* For channels 0-15 in SIMD16 mode they are expected to be laid out as

7864

* follows in the register file:

7865

7866

* rN+0: X[0-7]

7867

* rN+1: Y[0-7]

7868

* rN+2: X[8-15]

7869

* rN+3: Y[8-15]

7870

7871

* There is no need to handle SIMD32 here -- This is expected to be run after

7872

* SIMD lowering, since SIMD lowering relies on vectors having the standard

7873

* component layout.

7874

7875

bool

7876

fs_visitor::lower_barycentrics()

7877

{

7878

const bool has_interleaved_layout = devinfo->has_pln || devinfo->ver >= 7;

7879

bool progress = false;

7880

7881

if (stage != MESA_SHADER_FRAGMENT || !has_interleaved_layout)

7882

return false;

7883

7884

foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {

7885

if (inst->exec_size < 16)

7886

continue;

7887

7888

const fs_builder ibld(this, block, inst);

7889

const fs_builder ubld = ibld.exec_all().group(8, 0);

7890

7891

switch (inst->opcode) {

7892

case FS_OPCODE_LINTERP : {

7893

assert(inst->exec_size == 16);

7894

const fs_reg tmp = ibld.vgrf(inst->src[0].type, 2);

7895

fs_reg srcs[4];

7896

7897

for (unsigned i = 0; i < ARRAY_SIZE(srcs); i++)

7898

srcs[i] = horiz_offset(offset(inst->src[0], ibld, i % 2),

7899

8 * (i / 2));

7900

7901

ubld.LOAD_PAYLOAD(tmp, srcs, ARRAY_SIZE(srcs), ARRAY_SIZE(srcs));

7902

7903

inst->src[0] = tmp;

7904

progress = true;

7905

break;

7906

}

7907

case FS_OPCODE_INTERPOLATE_AT_SAMPLE:

7908

case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:

7909

case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: {

7910

assert(inst->exec_size == 16);

7911

const fs_reg tmp = ibld.vgrf(inst->dst.type, 2);

7912

7913

for (unsigned i = 0; i < 2; i++) {

7914

for (unsigned g = 0; g < inst->exec_size / 8; g++) {

7915

fs_inst *mov = ibld.at(block, inst->next).group(8, g)

7916

.MOV(horiz_offset(offset(inst->dst, ibld, i),

7917

8 * g),

7918

offset(tmp, ubld, 2 * g + i));

7919

mov->predicate = inst->predicate;

7920

mov->predicate_inverse = inst->predicate_inverse;

7921

mov->flag_subreg = inst->flag_subreg;

7922

}

7923

}

7924

7925

inst->dst = tmp;

7926

progress = true;

7927

break;

7928

}

7929

default:

7930

break;

7931

}

7932

}

7933

7934

if (progress)

7935

invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);

7936

7937

return progress;

7938

}

7939

7940

/**

7941

* Lower a derivative instruction as the floating-point difference of two

7942

* swizzles of the source, specified as \p swz0 and \p swz1.

7943

7944

static bool

7945

lower_derivative(fs_visitor *v, bblock_t *block, fs_inst *inst,

7946

unsigned swz0, unsigned swz1)

7947

{

7948

const fs_builder ibld(v, block, inst);

7949

const fs_reg tmp0 = ibld.vgrf(inst->src[0].type);

7950

const fs_reg tmp1 = ibld.vgrf(inst->src[0].type);

7951

7952

ibld.emit(SHADER_OPCODE_QUAD_SWIZZLE, tmp0, inst->src[0], brw_imm_ud(swz0));

7953

ibld.emit(SHADER_OPCODE_QUAD_SWIZZLE, tmp1, inst->src[0], brw_imm_ud(swz1));

7954

7955

inst->resize_sources(2);

7956

inst->src[0] = negate(tmp0);

7957

inst->src[1] = tmp1;

7958

inst->opcode = BRW_OPCODE_ADD;

7959

7960

return true;

7961

}

7962

7963

/**

7964

* Lower derivative instructions on platforms where codegen cannot implement

7965

* them efficiently (i.e. XeHP).

7966

7967

bool

7968

fs_visitor::lower_derivatives()

7969

{

7970

bool progress = false;

7971

7972

if (devinfo->verx10 < 125)

7973

return false;

7974

7975

foreach_block_and_inst(block, fs_inst, inst, cfg) {

7976

if (inst->opcode == FS_OPCODE_DDX_COARSE)

7977

progress |= lower_derivative(this, block, inst,

7978

BRW_SWIZZLE_XXXX, BRW_SWIZZLE_YYYY);

7979

7980

else if (inst->opcode == FS_OPCODE_DDX_FINE)

7981

progress |= lower_derivative(this, block, inst,

7982

BRW_SWIZZLE_XXZZ, BRW_SWIZZLE_YYWW);

7983

7984

else if (inst->opcode == FS_OPCODE_DDY_COARSE)

7985

progress |= lower_derivative(this, block, inst,

7986

BRW_SWIZZLE_XXXX, BRW_SWIZZLE_ZZZZ);

7987

7988

else if (inst->opcode == FS_OPCODE_DDY_FINE)

7989

progress |= lower_derivative(this, block, inst,

7990

BRW_SWIZZLE_XYXY, BRW_SWIZZLE_ZWZW);

7991

}

7992

7993

if (progress)

7994

invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);

7995

7996

return progress;

7997

}

7998

7999

void

8000

fs_visitor::dump_instructions() const

8001

{

8002

dump_instructions(NULL);

8003

}

8004

8005

void

8006

fs_visitor::dump_instructions(const char *name) const

8007

{

8008

FILE *file = stderr;

8009

if (name && geteuid() != 0) {

8010

file = fopen(name, "w");

8011

if (!file)

8012

file = stderr;

8013

}

8014

8015

if (cfg) {

8016

const register_pressure &rp = regpressure_analysis.require();

8017

unsigned ip = 0, max_pressure = 0;

8018

foreach_block_and_inst(block, backend_instruction, inst, cfg) {

8019

max_pressure = MAX2(max_pressure, rp.regs_live_at_ip[ip]);

8020

fprintf(file, "{%3d} %4d: ", rp.regs_live_at_ip[ip], ip);

8021

dump_instruction(inst, file);

8022

ip++;

8023

}

8024

fprintf(file, "Maximum %3d registers live at once.\n", max_pressure);

8025

} else {

8026

int ip = 0;

8027

foreach_in_list(backend_instruction, inst, &instructions) {

8028

fprintf(file, "%4d: ", ip++);

8029

dump_instruction(inst, file);

8030

}

8031

}

8032

8033

if (file != stderr) {

8034

fclose(file);

8035

}

8036

}

8037

8038

void

8039

fs_visitor::dump_instruction(const backend_instruction *be_inst) const

8040

{

8041

dump_instruction(be_inst, stderr);

8042

}

8043

8044

void

8045

fs_visitor::dump_instruction(const backend_instruction *be_inst, FILE *file) const

8046

{

8047

const fs_inst *inst = (const fs_inst *)be_inst;

8048

8049

if (inst->predicate) {

8050

fprintf(file, "(%cf%d.%d) ",

8051

inst->predicate_inverse ? '-' : '+',

8052

inst->flag_subreg / 2,

8053

inst->flag_subreg % 2);

8054

}

8055

8056

fprintf(file, "%s", brw_instruction_name(devinfo, inst->opcode));

8057

if (inst->saturate)

8058

fprintf(file, ".sat");

8059

if (inst->conditional_mod) {

8060

fprintf(file, "%s", conditional_modifier[inst->conditional_mod]);

8061

if (!inst->predicate &&

8062

(devinfo->ver < 5 || (inst->opcode != BRW_OPCODE_SEL &&

8063

inst->opcode != BRW_OPCODE_CSEL &&

8064

inst->opcode != BRW_OPCODE_IF &&

8065

inst->opcode != BRW_OPCODE_WHILE))) {

8066

fprintf(file, ".f%d.%d", inst->flag_subreg / 2,

8067

inst->flag_subreg % 2);

8068

}

8069

}

8070

fprintf(file, "(%d) ", inst->exec_size);

8071

8072

if (inst->mlen) {

8073

fprintf(file, "(mlen: %d) ", inst->mlen);

8074

}

8075

8076

if (inst->ex_mlen) {

8077

fprintf(file, "(ex_mlen: %d) ", inst->ex_mlen);

8078

}

8079

8080

if (inst->eot) {

8081

fprintf(file, "(EOT) ");

8082

}

8083

8084

switch (inst->dst.file) {

8085

case VGRF:

8086

fprintf(file, "vgrf%d", inst->dst.nr);

8087

break;

8088

case FIXED_GRF:

8089

fprintf(file, "g%d", inst->dst.nr);

8090

break;

8091

case MRF:

8092

fprintf(file, "m%d", inst->dst.nr);

8093

break;

8094

case BAD_FILE:

8095

fprintf(file, "(null)");

8096

break;

8097

case UNIFORM:

8098

fprintf(file, "***u%d***", inst->dst.nr);

8099

break;

8100

case ATTR:

8101

fprintf(file, "***attr%d***", inst->dst.nr);

8102

break;

8103

case ARF:

8104

switch (inst->dst.nr) {

8105

case BRW_ARF_NULL:

8106

fprintf(file, "null");

8107

break;

8108

case BRW_ARF_ADDRESS:

8109

fprintf(file, "a0.%d", inst->dst.subnr);

8110

break;

8111

case BRW_ARF_ACCUMULATOR:

8112

fprintf(file, "acc%d", inst->dst.subnr);

8113

break;

8114

case BRW_ARF_FLAG:

8115

fprintf(file, "f%d.%d", inst->dst.nr & 0xf, inst->dst.subnr);

8116

break;

8117

default:

8118

fprintf(file, "arf%d.%d", inst->dst.nr & 0xf, inst->dst.subnr);

8119

break;

8120

}

8121

break;

8122

case IMM:

8123

unreachable("not reached");

8124

}

8125

8126

if (inst->dst.offset ||

8127

(inst->dst.file == VGRF &&

8128

alloc.sizes[inst->dst.nr] * REG_SIZE != inst->size_written)) {

8129

const unsigned reg_size = (inst->dst.file == UNIFORM ? 4 : REG_SIZE);

8130

fprintf(file, "+%d.%d", inst->dst.offset / reg_size,

8131

inst->dst.offset % reg_size);

8132

}

8133

8134

if (inst->dst.stride != 1)

8135

fprintf(file, "<%u>", inst->dst.stride);

8136

fprintf(file, ":%s, ", brw_reg_type_to_letters(inst->dst.type));

8137

8138

for (int i = 0; i < inst->sources; i++) {

8139

if (inst->src[i].negate)

8140

fprintf(file, "-");

8141

if (inst->src[i].abs)

8142

fprintf(file, "|");

8143

switch (inst->src[i].file) {

8144

case VGRF:

8145

fprintf(file, "vgrf%d", inst->src[i].nr);

8146

break;

8147

case FIXED_GRF:

8148

fprintf(file, "g%d", inst->src[i].nr);

8149

break;

8150

case MRF:

8151

fprintf(file, "***m%d***", inst->src[i].nr);

8152

break;

8153

case ATTR:

8154

fprintf(file, "attr%d", inst->src[i].nr);

8155

break;

8156

case UNIFORM:

8157

fprintf(file, "u%d", inst->src[i].nr);

8158

break;

8159

case BAD_FILE:

8160

fprintf(file, "(null)");

8161

break;

8162

case IMM:

8163

switch (inst->src[i].type) {

8164

case BRW_REGISTER_TYPE_HF:

8165

fprintf(file, "%-ghf", _mesa_half_to_float(inst->src[i].ud & 0xffff));

8166

break;

8167

case BRW_REGISTER_TYPE_F:

8168

fprintf(file, "%-gf", inst->src[i].f);

8169

break;

8170

case BRW_REGISTER_TYPE_DF:

8171

fprintf(file, "%fdf", inst->src[i].df);

8172

break;

8173

case BRW_REGISTER_TYPE_W:

8174

case BRW_REGISTER_TYPE_D:

8175

fprintf(file, "%dd", inst->src[i].d);

8176

break;

8177

case BRW_REGISTER_TYPE_UW:

8178

case BRW_REGISTER_TYPE_UD:

8179

fprintf(file, "%uu", inst->src[i].ud);

8180

break;

8181

case BRW_REGISTER_TYPE_Q:

8182

fprintf(file, "%" PRId64 "q", inst->src[i].d64);

8183

break;

8184

case BRW_REGISTER_TYPE_UQ:

8185

fprintf(file, "%" PRIu64 "uq", inst->src[i].u64);

8186

break;

8187

case BRW_REGISTER_TYPE_VF:

8188

fprintf(file, "[%-gF, %-gF, %-gF, %-gF]",

8189

brw_vf_to_float((inst->src[i].ud >> 0) & 0xff),

8190

brw_vf_to_float((inst->src[i].ud >> 8) & 0xff),

8191

brw_vf_to_float((inst->src[i].ud >> 16) & 0xff),

8192

brw_vf_to_float((inst->src[i].ud >> 24) & 0xff));

8193

break;

8194

case BRW_REGISTER_TYPE_V:

8195

case BRW_REGISTER_TYPE_UV:

8196

fprintf(file, "%08x%s", inst->src[i].ud,

8197

inst->src[i].type == BRW_REGISTER_TYPE_V ? "V" : "UV");

8198

break;

8199

default:

8200

fprintf(file, "???");

8201

break;

8202

}

8203

break;

8204

case ARF:

8205

switch (inst->src[i].nr) {

8206

case BRW_ARF_NULL:

8207

fprintf(file, "null");

8208

break;

8209

case BRW_ARF_ADDRESS:

8210

fprintf(file, "a0.%d", inst->src[i].subnr);

8211

break;

8212

case BRW_ARF_ACCUMULATOR:

8213

fprintf(file, "acc%d", inst->src[i].subnr);

8214

break;

8215

case BRW_ARF_FLAG:

8216

fprintf(file, "f%d.%d", inst->src[i].nr & 0xf, inst->src[i].subnr);

8217

break;

8218

default:

8219

fprintf(file, "arf%d.%d", inst->src[i].nr & 0xf, inst->src[i].subnr);

8220

break;

8221

}

8222

break;

8223

}

8224

8225

if (inst->src[i].offset ||

8226

(inst->src[i].file == VGRF &&

8227

alloc.sizes[inst->src[i].nr] * REG_SIZE != inst->size_read(i))) {

8228

const unsigned reg_size = (inst->src[i].file == UNIFORM ? 4 : REG_SIZE);

8229

fprintf(file, "+%d.%d", inst->src[i].offset / reg_size,

8230

inst->src[i].offset % reg_size);

8231

}

8232

8233

if (inst->src[i].abs)

8234

fprintf(file, "|");

8235

8236

if (inst->src[i].file != IMM) {

8237

unsigned stride;

8238

if (inst->src[i].file == ARF || inst->src[i].file == FIXED_GRF) {

8239

unsigned hstride = inst->src[i].hstride;

8240

stride = (hstride == 0 ? 0 : (1 << (hstride - 1)));

8241

} else {

8242

stride = inst->src[i].stride;

8243

}

8244

if (stride != 1)

8245

fprintf(file, "<%u>", stride);

8246

8247

fprintf(file, ":%s", brw_reg_type_to_letters(inst->src[i].type));

8248

}

8249

8250

if (i < inst->sources - 1 && inst->src[i + 1].file != BAD_FILE)

8251

fprintf(file, ", ");

8252

}

8253

8254

fprintf(file, " ");

8255

8256

if (inst->force_writemask_all)

8257

fprintf(file, "NoMask ");

8258

8259

if (inst->exec_size != dispatch_width)

8260

fprintf(file, "group%d ", inst->group);

8261

8262

fprintf(file, "\n");

8263

}

8264

8265

void

8266

fs_visitor::setup_fs_payload_gfx6()

8267

{

8268

assert(stage == MESA_SHADER_FRAGMENT);

8269

struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data);

8270

const unsigned payload_width = MIN2(16, dispatch_width);

8271

assert(dispatch_width % payload_width == 0);

8272

assert(devinfo->ver >= 6);

8273

8274

/* R0: PS thread payload header. */

8275

payload.num_regs++;

8276

8277

for (unsigned j = 0; j < dispatch_width / payload_width; j++) {

8278

/* R1: masks, pixel X/Y coordinates. */

8279

payload.subspan_coord_reg[j] = payload.num_regs++;

8280

}

8281

8282

for (unsigned j = 0; j < dispatch_width / payload_width; j++) {

8283

/* R3-26: barycentric interpolation coordinates. These appear in the

8284

* same order that they appear in the brw_barycentric_mode enum. Each

8285

* set of coordinates occupies 2 registers if dispatch width == 8 and 4

8286

* registers if dispatch width == 16. Coordinates only appear if they

8287

* were enabled using the "Barycentric Interpolation Mode" bits in

8288

* WM_STATE.

8289

8290

for (int i = 0; i < BRW_BARYCENTRIC_MODE_COUNT; ++i) {

8291

if (prog_data->barycentric_interp_modes & (1 << i)) {

8292

payload.barycentric_coord_reg[i][j] = payload.num_regs;

8293

payload.num_regs += payload_width / 4;

8294

}

8295

}

8296

8297

/* R27-28: interpolated depth if uses source depth */

8298

if (prog_data->uses_src_depth) {

8299

payload.source_depth_reg[j] = payload.num_regs;

8300

payload.num_regs += payload_width / 8;

8301

}

8302

8303

/* R29-30: interpolated W set if GFX6_WM_USES_SOURCE_W. */

8304

if (prog_data->uses_src_w) {

8305

payload.source_w_reg[j] = payload.num_regs;

8306

payload.num_regs += payload_width / 8;

8307

}

8308

8309

/* R31: MSAA position offsets. */

8310

if (prog_data->uses_pos_offset) {

8311

payload.sample_pos_reg[j] = payload.num_regs;

8312

payload.num_regs++;

8313

}

8314

8315

/* R32-33: MSAA input coverage mask */

8316

if (prog_data->uses_sample_mask) {

8317

assert(devinfo->ver >= 7);

8318

payload.sample_mask_in_reg[j] = payload.num_regs;

8319

payload.num_regs += payload_width / 8;

8320

}

8321

8322

/* R66: Source Depth and/or W Attribute Vertex Deltas */

8323

if (prog_data->uses_depth_w_coefficients) {

8324

payload.depth_w_coef_reg[j] = payload.num_regs;

8325

payload.num_regs++;

8326

}

8327

}

8328

8329

if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {

8330

source_depth_to_render_target = true;

8331

}

8332

}

8333

8334

void

8335

fs_visitor::setup_vs_payload()

8336

{

8337

/* R0: thread header, R1: urb handles */

8338

payload.num_regs = 2;

8339

}

8340

8341

void

8342

fs_visitor::setup_gs_payload()

8343

{

8344

assert(stage == MESA_SHADER_GEOMETRY);

8345

8346

struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(prog_data);

8347

struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(prog_data);

8348

8349

/* R0: thread header, R1: output URB handles */

8350

payload.num_regs = 2;

8351

8352

if (gs_prog_data->include_primitive_id) {

8353

/* R2: Primitive ID 0..7 */

8354

payload.num_regs++;

8355

}

8356

8357

/* Always enable VUE handles so we can safely use pull model if needed.

8358

8359

* The push model for a GS uses a ton of register space even for trivial

8360

* scenarios with just a few inputs, so just make things easier and a bit

8361

* safer by always having pull model available.

8362

8363

gs_prog_data->base.include_vue_handles = true;

8364

8365

/* R3..RN: ICP Handles for each incoming vertex (when using pull model) */

8366

payload.num_regs += nir->info.gs.vertices_in;

8367

8368

/* Use a maximum of 24 registers for push-model inputs. */

8369

const unsigned max_push_components = 24;

8370

8371

/* If pushing our inputs would take too many registers, reduce the URB read

8372

* length (which is in HWords, or 8 registers), and resort to pulling.

8373

8374

* Note that the GS reads <URB Read Length> HWords for every vertex - so we

8375

* have to multiply by VerticesIn to obtain the total storage requirement.

8376

8377

if (8 * vue_prog_data->urb_read_length * nir->info.gs.vertices_in >

8378

max_push_components) {

8379

vue_prog_data->urb_read_length =

8380

ROUND_DOWN_TO(max_push_components / nir->info.gs.vertices_in, 8) / 8;

8381

}

8382

}

8383

8384

void

8385

fs_visitor::setup_cs_payload()

8386

{

8387

assert(devinfo->ver >= 7);

8388

/* TODO: Fill out uses_btd_stack_ids automatically */

8389

payload.num_regs = 1 + brw_cs_prog_data(prog_data)->uses_btd_stack_ids;

8390

}

8391

8392

brw::register_pressure::register_pressure(const fs_visitor *v)

8393

{

8394

const fs_live_variables &live = v->live_analysis.require();

8395

const unsigned num_instructions = v->cfg->num_blocks ?

8396

v->cfg->blocks[v->cfg->num_blocks - 1]->end_ip + 1 : 0;

8397

8398

regs_live_at_ip = new unsigned[num_instructions]();

8399

8400

for (unsigned reg = 0; reg < v->alloc.count; reg++) {

8401

for (int ip = live.vgrf_start[reg]; ip <= live.vgrf_end[reg]; ip++)

8402

regs_live_at_ip[ip] += v->alloc.sizes[reg];

8403

}

8404

}

8405

8406

brw::register_pressure::~register_pressure()

8407

{

8408

delete[] regs_live_at_ip;

8409

}

8410

8411

void

8412

fs_visitor::invalidate_analysis(brw::analysis_dependency_class c)

8413

{

8414

backend_shader::invalidate_analysis(c);

8415

live_analysis.invalidate(c);

8416

regpressure_analysis.invalidate(c);

8417

}

8418

8419

void

8420

fs_visitor::optimize()

8421

{

8422

/* Start by validating the shader we currently have. */

8423

validate();

8424

8425

/* bld is the common builder object pointing at the end of the program we

8426

* used to translate it into i965 IR. For the optimization and lowering

8427

* passes coming next, any code added after the end of the program without

8428

* having explicitly called fs_builder::at() clearly points at a mistake.

8429

* Ideally optimization passes wouldn't be part of the visitor so they

8430

* wouldn't have access to bld at all, but they do, so just in case some

8431

* pass forgets to ask for a location explicitly set it to NULL here to

8432

* make it trip. The dispatch width is initialized to a bogus value to

8433

* make sure that optimizations set the execution controls explicitly to

8434

* match the code they are manipulating instead of relying on the defaults.

8435

8436

bld = fs_builder(this, 64);

8437

8438

assign_constant_locations();

8439

lower_constant_loads();

8440

8441

validate();

8442

8443

#define OPT(pass, args...) ({ \

8444

pass_num++; \

8445

bool this_progress = pass(args); \

8446

8447

if (INTEL_DEBUG(DEBUG_OPTIMIZER) && this_progress) { \

8448

char filename[64]; \

8449

snprintf(filename, 64, "%s%d-%s-%02d-%02d-" #pass, \

8450

stage_abbrev, dispatch_width, nir->info.name, iteration, pass_num); \

8451

8452

backend_shader::dump_instructions(filename); \

8453

} \

8454

8455

validate(); \

8456

8457

progress = progress || this_progress; \

8458

this_progress; \

8459

})

8460

8461

if (INTEL_DEBUG(DEBUG_OPTIMIZER)) {

8462

char filename[64];

8463

snprintf(filename, 64, "%s%d-%s-00-00-start",

8464

stage_abbrev, dispatch_width, nir->info.name);

8465

8466

backend_shader::dump_instructions(filename);

8467

}

8468

8469

bool progress = false;

8470

int iteration = 0;

8471

int pass_num = 0;

8472

8473

OPT(split_virtual_grfs);

8474

8475

/* Before anything else, eliminate dead code. The results of some NIR

8476

* instructions may effectively be calculated twice. Once when the

8477

* instruction is encountered, and again when the user of that result is

8478

* encountered. Wipe those away before algebraic optimizations and

8479

* especially copy propagation can mix things up.

8480

8481

OPT(dead_code_eliminate);

8482

8483

OPT(remove_extra_rounding_modes);

8484

8485

do {

8486

progress = false;

8487

pass_num = 0;

8488

iteration++;

8489

8490

OPT(remove_duplicate_mrf_writes);

8491

8492

OPT(opt_algebraic);

8493

OPT(opt_cse);

8494

OPT(opt_copy_propagation);

8495

OPT(opt_predicated_break, this);

8496

OPT(opt_cmod_propagation);

8497

OPT(dead_code_eliminate);

8498

OPT(opt_peephole_sel);

8499

OPT(dead_control_flow_eliminate, this);

8500

OPT(opt_register_renaming);

8501

OPT(opt_saturate_propagation);

8502

OPT(register_coalesce);

8503

OPT(compute_to_mrf);

8504

OPT(eliminate_find_live_channel);

8505

8506

OPT(compact_virtual_grfs);

8507

} while (progress);

8508

8509

progress = false;

8510

pass_num = 0;

8511

8512

if (OPT(lower_pack)) {

8513

OPT(register_coalesce);

8514

OPT(dead_code_eliminate);

8515

}

8516

8517

OPT(lower_simd_width);

8518

OPT(lower_barycentrics);

8519

OPT(lower_logical_sends);

8520

8521

/* After logical SEND lowering. */

8522

OPT(fixup_nomask_control_flow);

8523

8524

if (progress) {

8525

OPT(opt_copy_propagation);

8526

/* Only run after logical send lowering because it's easier to implement

8527

* in terms of physical sends.

8528

8529

if (OPT(opt_zero_samples))

8530

OPT(opt_copy_propagation);

8531

/* Run after logical send lowering to give it a chance to CSE the

8532

* LOAD_PAYLOAD instructions created to construct the payloads of

8533

* e.g. texturing messages in cases where it wasn't possible to CSE the

8534

* whole logical instruction.

8535

8536

OPT(opt_cse);

8537

OPT(register_coalesce);

8538

OPT(compute_to_mrf);

8539

OPT(dead_code_eliminate);

8540

OPT(remove_duplicate_mrf_writes);

8541

OPT(opt_peephole_sel);

8542

}

8543

8544

OPT(opt_redundant_halt);

8545

8546

if (OPT(lower_load_payload)) {

8547

OPT(split_virtual_grfs);

8548

8549

/* Lower 64 bit MOVs generated by payload lowering. */

8550

if (!devinfo->has_64bit_float && !devinfo->has_64bit_int)

8551

OPT(opt_algebraic);

8552

8553

OPT(register_coalesce);

8554

OPT(lower_simd_width);

8555

OPT(compute_to_mrf);

8556

OPT(dead_code_eliminate);

8557

}

8558

8559

OPT(opt_combine_constants);

8560

if (OPT(lower_integer_multiplication)) {

8561

/* If lower_integer_multiplication made progress, it may have produced

8562

* some 32x32-bit MULs in the process of lowering 64-bit MULs. Run it

8563

* one more time to clean those up if they exist.

8564

8565

OPT(lower_integer_multiplication);

8566

}

8567

OPT(lower_sub_sat);

8568

8569

if (devinfo->ver <= 5 && OPT(lower_minmax)) {

8570

OPT(opt_cmod_propagation);

8571

OPT(opt_cse);

8572

OPT(opt_copy_propagation);

8573

OPT(dead_code_eliminate);

8574

}

8575

8576

progress = false;

8577

OPT(lower_derivatives);

8578

OPT(lower_regioning);

8579

if (progress) {

8580

OPT(opt_copy_propagation);

8581

OPT(dead_code_eliminate);

8582

OPT(lower_simd_width);

8583

}

8584

8585

OPT(fixup_sends_duplicate_payload);

8586

8587

lower_uniform_pull_constant_loads();

8588

8589

validate();

8590

}

8591

8592

/**

8593

* From the Skylake PRM Vol. 2a docs for sends:

8594

8595

* "It is required that the second block of GRFs does not overlap with the

8596

* first block."

8597

8598

* There are plenty of cases where we may accidentally violate this due to

8599

* having, for instance, both sources be the constant 0. This little pass

8600

* just adds a new vgrf for the second payload and copies it over.

8601

8602

bool

8603

fs_visitor::fixup_sends_duplicate_payload()

8604

{

8605

bool progress = false;

8606

8607

foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {

8608

if (inst->opcode == SHADER_OPCODE_SEND && inst->ex_mlen > 0 &&

8609

regions_overlap(inst->src[2], inst->mlen * REG_SIZE,

8610

inst->src[3], inst->ex_mlen * REG_SIZE)) {

8611

fs_reg tmp = fs_reg(VGRF, alloc.allocate(inst->ex_mlen),

8612

BRW_REGISTER_TYPE_UD);

8613

/* Sadly, we've lost all notion of channels and bit sizes at this

8614

* point. Just WE_all it.

8615

8616

const fs_builder ibld = bld.at(block, inst).exec_all().group(16, 0);

8617

fs_reg copy_src = retype(inst->src[3], BRW_REGISTER_TYPE_UD);

8618

fs_reg copy_dst = tmp;

8619

for (unsigned i = 0; i < inst->ex_mlen; i += 2) {

8620

if (inst->ex_mlen == i + 1) {

8621

/* Only one register left; do SIMD8 */

8622

ibld.group(8, 0).MOV(copy_dst, copy_src);

8623

} else {

8624

ibld.MOV(copy_dst, copy_src);

8625

}

8626

copy_src = offset(copy_src, ibld, 1);

8627

copy_dst = offset(copy_dst, ibld, 1);

8628

}

8629

inst->src[3] = tmp;

8630

progress = true;

8631

}

8632

}

8633

8634

if (progress)

8635

invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);

8636

8637

return progress;

8638

}

8639

8640

/**

8641

* Three source instruction must have a GRF/MRF destination register.

8642

* ARF NULL is not allowed. Fix that up by allocating a temporary GRF.

8643

8644

void

8645

fs_visitor::fixup_3src_null_dest()

8646

{

8647

bool progress = false;

8648

8649

foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {

8650

if (inst->is_3src(devinfo) && inst->dst.is_null()) {

8651

inst->dst = fs_reg(VGRF, alloc.allocate(dispatch_width / 8),

8652

inst->dst.type);

8653

progress = true;

8654

}

8655

}

8656

8657

if (progress)

8658

invalidate_analysis(DEPENDENCY_INSTRUCTION_DETAIL |

8659

DEPENDENCY_VARIABLES);

8660

}

8661

8662

static bool

8663

needs_dummy_fence(const intel_device_info *devinfo, fs_inst *inst)

8664

{

8665

/* This workaround is about making sure that any instruction writing

8666

* through UGM has completed before we hit EOT.

8667

8668

* The workaround talks about UGM writes or atomic message but what is

8669

* important is anything that hasn't completed. Usually any SEND

8670

* instruction that has a destination register will be read by something

8671

* else so we don't need to care about those as they will be synchronized

8672

* by other parts of the shader or optimized away. What is left are

8673

* instructions that don't have a destination register.

8674

8675

if (inst->sfid != GFX12_SFID_UGM)

8676

return false;

8677

8678

return inst->dst.file == BAD_FILE;

8679

}

8680

8681

/* Wa_22013689345

8682

8683

* We need to emit UGM fence message before EOT, if shader has any UGM write

8684

* or atomic message.

8685

8686

* TODO/FINISHME: According to Curro we could avoid the fence in some cases.

8687

* We probably need a better criteria in needs_dummy_fence().

8688

8689

void

8690

fs_visitor::emit_dummy_memory_fence_before_eot()

8691

{

8692

bool progress = false;

8693

bool has_ugm_write_or_atomic = false;

8694

8695

if (!intel_device_info_is_dg2(devinfo))

8696

return;

8697

8698

foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {

8699

if (!inst->eot) {

8700

if (needs_dummy_fence(devinfo, inst))

8701

has_ugm_write_or_atomic = true;

8702

continue;

8703

}

8704

8705

if (!has_ugm_write_or_atomic)

8706

break;

8707

8708

const fs_builder ibld(this, block, inst);

8709

const fs_builder ubld = ibld.exec_all().group(1, 0);

8710

8711

fs_reg dst = ubld.vgrf(BRW_REGISTER_TYPE_UD);

8712

fs_inst *dummy_fence = ubld.emit(SHADER_OPCODE_MEMORY_FENCE,

8713

dst, brw_vec8_grf(0, 0),

8714

/* commit enable */ brw_imm_ud(1),

8715

/* bti */ brw_imm_ud(0));

8716

dummy_fence->sfid = GFX12_SFID_UGM;

8717

dummy_fence->desc = lsc_fence_msg_desc(devinfo, LSC_FENCE_TILE,

8718

LSC_FLUSH_TYPE_NONE_6, false);

8719

ubld.emit(FS_OPCODE_SCHEDULING_FENCE, ubld.null_reg_ud(), dst);

8720

progress = true;

8721

/* TODO: remove this break if we ever have shader with multiple EOT. */

8722

break;

8723

}

8724

8725

if (progress) {

8726

invalidate_analysis(DEPENDENCY_INSTRUCTIONS |

8727

DEPENDENCY_VARIABLES);

8728

}

8729

}

8730

8731

/**

8732

* Find the first instruction in the program that might start a region of

8733

* divergent control flow due to a HALT jump. There is no

8734

* find_halt_control_flow_region_end(), the region of divergence extends until

8735

* the only SHADER_OPCODE_HALT_TARGET in the program.

8736

8737

static const fs_inst *

8738

find_halt_control_flow_region_start(const fs_visitor *v)

8739

{

8740

foreach_block_and_inst(block, fs_inst, inst, v->cfg) {

8741

if (inst->opcode == BRW_OPCODE_HALT ||

8742

inst->opcode == SHADER_OPCODE_HALT_TARGET)

8743

return inst;

8744

}

8745

8746

return NULL;

8747

}

8748

8749

/**

8750

* Work around the Gfx12 hardware bug filed as Wa_1407528679. EU fusion

8751

* can cause a BB to be executed with all channels disabled, which will lead

8752

* to the execution of any NoMask instructions in it, even though any

8753

* execution-masked instructions will be correctly shot down. This may break

8754

* assumptions of some NoMask SEND messages whose descriptor depends on data

8755

* generated by live invocations of the shader.

8756

8757

* This avoids the problem by predicating certain instructions on an ANY

8758

* horizontal predicate that makes sure that their execution is omitted when

8759

* all channels of the program are disabled.

8760

8761

bool

8762

fs_visitor::fixup_nomask_control_flow()

8763

{

8764

if (devinfo->ver != 12)

8765

return false;

8766

8767

const brw_predicate pred = dispatch_width > 16 ? BRW_PREDICATE_ALIGN1_ANY32H :

8768

dispatch_width > 8 ? BRW_PREDICATE_ALIGN1_ANY16H :

8769

BRW_PREDICATE_ALIGN1_ANY8H;

8770

const fs_inst *halt_start = find_halt_control_flow_region_start(this);

8771

unsigned depth = 0;

8772

bool progress = false;

8773

8774

const fs_live_variables &live_vars = live_analysis.require();

8775

8776

/* Scan the program backwards in order to be able to easily determine

8777

* whether the flag register is live at any point.

8778

8779

foreach_block_reverse_safe(block, cfg) {

8780

BITSET_WORD flag_liveout = live_vars.block_data[block->num]

8781

.flag_liveout[0];

8782

STATIC_ASSERT(ARRAY_SIZE(live_vars.block_data[0].flag_liveout) == 1);

8783

8784

foreach_inst_in_block_reverse_safe(fs_inst, inst, block) {

8785

if (!inst->predicate && inst->exec_size >= 8)

8786

flag_liveout &= ~inst->flags_written(devinfo);

8787

8788

switch (inst->opcode) {

8789

case BRW_OPCODE_DO:

8790

case BRW_OPCODE_IF:

8791

/* Note that this doesn't handle BRW_OPCODE_HALT since only

8792

* the first one in the program closes the region of divergent

8793

* control flow due to any HALT instructions -- Instead this is

8794

* handled with the halt_start check below.

8795

8796

depth--;

8797

break;

8798

8799

case BRW_OPCODE_WHILE:

8800

case BRW_OPCODE_ENDIF:

8801

case SHADER_OPCODE_HALT_TARGET:

8802

depth++;

8803

break;

8804

8805

default:

8806

/* Note that the vast majority of NoMask SEND instructions in the

8807

* program are harmless while executed in a block with all

8808

* channels disabled, since any instructions with side effects we

8809

* could hit here should be execution-masked.

8810

8811

* The main concern is NoMask SEND instructions where the message

8812

* descriptor or header depends on data generated by live

8813

* invocations of the shader (RESINFO and

8814

* FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD with a dynamically

8815

* computed surface index seem to be the only examples right now

8816

* where this could easily lead to GPU hangs). Unfortunately we

8817

* have no straightforward way to detect that currently, so just

8818

* predicate any NoMask SEND instructions we find under control

8819

* flow.

8820

8821

* If this proves to have a measurable performance impact it can

8822

* be easily extended with a whitelist of messages we know we can

8823

* safely omit the predication for.

8824

8825

if (depth && inst->force_writemask_all &&

8826

is_send(inst) && !inst->predicate) {

8827

/* We need to load the execution mask into the flag register by

8828

* using a builder with channel group matching the whole shader

8829

* (rather than the default which is derived from the original

8830

* instruction), in order to avoid getting a right-shifted

8831

* value.

8832

8833

const fs_builder ubld = fs_builder(this, block, inst)

8834

.exec_all().group(dispatch_width, 0);

8835

const fs_reg flag = retype(brw_flag_reg(0, 0),

8836

BRW_REGISTER_TYPE_UD);

8837

8838

/* Due to the lack of flag register allocation we need to save

8839

* and restore the flag register if it's live.

8840

8841

const bool save_flag = flag_liveout &

8842

flag_mask(flag, dispatch_width / 8);

8843

const fs_reg tmp = ubld.group(1, 0).vgrf(flag.type);

8844

8845

if (save_flag)

8846

ubld.group(1, 0).MOV(tmp, flag);

8847

8848

ubld.emit(FS_OPCODE_LOAD_LIVE_CHANNELS);

8849

8850

set_predicate(pred, inst);

8851

inst->flag_subreg = 0;

8852

8853

if (save_flag)

8854

ubld.group(1, 0).at(block, inst->next).MOV(flag, tmp);

8855

8856

progress = true;

8857

}

8858

break;

8859

}

8860

8861

if (inst == halt_start)

8862

depth--;

8863

8864

flag_liveout |= inst->flags_read(devinfo);

8865

}

8866

}

8867

8868

if (progress)

8869

invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);

8870

8871

return progress;

8872

}

8873

8874

void

8875

fs_visitor::allocate_registers(bool allow_spilling)

8876

{

8877

bool allocated;

8878

8879

static const enum instruction_scheduler_mode pre_modes[] = {

8880

SCHEDULE_PRE,

8881

SCHEDULE_PRE_NON_LIFO,

8882

SCHEDULE_NONE,

8883

SCHEDULE_PRE_LIFO,

8884

};

8885

8886

static const char *scheduler_mode_name[] = {

8887

"top-down",

8888

"non-lifo",

8889

"none",

8890

"lifo"

8891

};

8892

8893

bool spill_all = allow_spilling && INTEL_DEBUG(DEBUG_SPILL_FS);

8894

8895

/* Before we schedule anything, stash off the instruction order as an array

8896

* of fs_inst *. This way, we can reset it between scheduling passes to

8897

* prevent dependencies between the different scheduling modes.

8898

8899

int num_insts = cfg->last_block()->end_ip + 1;

8900

fs_inst **inst_arr = ralloc_array(mem_ctx, fs_inst *, num_insts);

8901

8902

int ip = 0;

8903

foreach_block_and_inst(block, fs_inst, inst, cfg) {

8904

assert(ip >= block->start_ip && ip <= block->end_ip);

8905

inst_arr[ip++] = inst;

8906

}

8907

assert(ip == num_insts);

8908

8909

/* Try each scheduling heuristic to see if it can successfully register

8910

* allocate without spilling. They should be ordered by decreasing

8911

* performance but increasing likelihood of allocating.

8912

8913

for (unsigned i = 0; i < ARRAY_SIZE(pre_modes); i++) {

8914

if (i > 0) {

8915

/* Unless we're the first pass, reset back to the original order */

8916

ip = 0;

8917

foreach_block (block, cfg) {

8918

block->instructions.make_empty();

8919

8920

assert(ip == block->start_ip);

8921

for (; ip <= block->end_ip; ip++)

8922

block->instructions.push_tail(inst_arr[ip]);

8923

}

8924

assert(ip == num_insts);

8925

8926

invalidate_analysis(DEPENDENCY_INSTRUCTIONS);

8927

}

8928

8929

if (pre_modes[i] != SCHEDULE_NONE)

8930

schedule_instructions(pre_modes[i]);

8931

this->shader_stats.scheduler_mode = scheduler_mode_name[i];

8932

8933

if (0) {

8934

assign_regs_trivial();

8935

allocated = true;

8936

break;

8937

}

8938

8939

bool can_spill = allow_spilling &&

8940

(i == ARRAY_SIZE(pre_modes) - 1);

8941

8942

/* We should only spill registers on the last scheduling. */

8943

assert(!spilled_any_registers);

8944

8945

allocated = assign_regs(can_spill, spill_all);

8946

if (allocated)

8947

break;

8948

}

8949

8950

if (!allocated) {

8951

fail("Failure to register allocate. Reduce number of "

8952

"live scalar values to avoid this.");

8953

} else if (spilled_any_registers) {

8954

brw_shader_perf_log(compiler, log_data,

8955

"%s shader triggered register spilling. "

8956

"Try reducing the number of live scalar "

8957

"values to improve performance.\n",

8958

stage_name);

8959

}

8960

8961

/* This must come after all optimization and register allocation, since

8962

* it inserts dead code that happens to have side effects, and it does

8963

* so based on the actual physical registers in use.

8964

8965

insert_gfx4_send_dependency_workarounds();

8966

8967

if (failed)

8968

return;

8969

8970

opt_bank_conflicts();

8971

8972

schedule_instructions(SCHEDULE_POST);

8973

8974

if (last_scratch > 0) {

8975

ASSERTED unsigned max_scratch_size = 2 * 1024 * 1024;

8976

8977

/* Take the max of any previously compiled variant of the shader. In the

8978

* case of bindless shaders with return parts, this will also take the

8979

* max of all parts.

8980

8981

prog_data->total_scratch = MAX2(brw_get_scratch_size(last_scratch),

8982

prog_data->total_scratch);

8983

8984

if (gl_shader_stage_is_compute(stage)) {

8985

if (devinfo->platform == INTEL_PLATFORM_HSW) {

8986

/* According to the MEDIA_VFE_STATE's "Per Thread Scratch Space"

8987

* field documentation, Haswell supports a minimum of 2kB of

8988

* scratch space for compute shaders, unlike every other stage

8989

* and platform.

8990

8991

prog_data->total_scratch = MAX2(prog_data->total_scratch, 2048);

8992

} else if (devinfo->ver <= 7) {

8993

/* According to the MEDIA_VFE_STATE's "Per Thread Scratch Space"

8994

* field documentation, platforms prior to Haswell measure scratch

8995

* size linearly with a range of [1kB, 12kB] and 1kB granularity.

8996

8997

prog_data->total_scratch = ALIGN(last_scratch, 1024);

8998

max_scratch_size = 12 * 1024;

8999

}

9000

}

9001

9002

/* We currently only support up to 2MB of scratch space. If we

9003

* need to support more eventually, the documentation suggests

9004

* that we could allocate a larger buffer, and partition it out

9005

* ourselves. We'd just have to undo the hardware's address

9006

* calculation by subtracting (FFTID * Per Thread Scratch Space)

9007

* and then add FFTID * (Larger Per Thread Scratch Space).

9008

9009

* See 3D-Media-GPGPU Engine > Media GPGPU Pipeline >

9010

* Thread Group Tracking > Local Memory/Scratch Space.

9011

9012

assert(prog_data->total_scratch < max_scratch_size);

9013

}

9014

9015

lower_scoreboard();

9016

}

9017

9018

bool

9019

fs_visitor::run_vs()

9020

{

9021

assert(stage == MESA_SHADER_VERTEX);

9022

9023

setup_vs_payload();

9024

9025

emit_nir_code();

9026

9027

if (failed)

9028

return false;

9029

9030

emit_urb_writes();

9031

9032

calculate_cfg();

9033

9034

optimize();

9035

9036

assign_curb_setup();

9037

assign_vs_urb_setup();

9038

9039

fixup_3src_null_dest();

9040

emit_dummy_memory_fence_before_eot();

9041

allocate_registers(true /* allow_spilling */);

9042

9043

return !failed;

9044

}

9045

9046

void

9047

fs_visitor::set_tcs_invocation_id()

9048

{

9049

struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(prog_data);

9050

struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base;

9051

9052

const bool dg2_plus =

9053

devinfo->ver > 12 || intel_device_info_is_dg2(devinfo);

9054

const unsigned instance_id_mask =

9055

dg2_plus ? INTEL_MASK(7, 0) :

9056

(devinfo->ver >= 11) ? INTEL_MASK(22, 16) : INTEL_MASK(23, 17);

9057

const unsigned instance_id_shift =

9058

dg2_plus ? 0 : (devinfo->ver >= 11) ? 16 : 17;

9059

9060

/* Get instance number from g0.2 bits:

9061

* * 7:0 on DG2+

9062

* * 22:16 on gfx11+

9063

* * 23:17 otherwise

9064

9065

fs_reg t = bld.vgrf(BRW_REGISTER_TYPE_UD);

9066

bld.AND(t, fs_reg(retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD)),

9067

brw_imm_ud(instance_id_mask));

9068

9069

invocation_id = bld.vgrf(BRW_REGISTER_TYPE_UD);

9070

9071

if (vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_8_PATCH) {

9072

/* gl_InvocationID is just the thread number */

9073

bld.SHR(invocation_id, t, brw_imm_ud(instance_id_shift));

9074

return;

9075

}

9076

9077

assert(vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_SINGLE_PATCH);

9078

9079

fs_reg channels_uw = bld.vgrf(BRW_REGISTER_TYPE_UW);

9080

fs_reg channels_ud = bld.vgrf(BRW_REGISTER_TYPE_UD);

9081

bld.MOV(channels_uw, fs_reg(brw_imm_uv(0x76543210)));

9082

bld.MOV(channels_ud, channels_uw);

9083

9084

if (tcs_prog_data->instances == 1) {

9085

invocation_id = channels_ud;

9086

} else {

9087

fs_reg instance_times_8 = bld.vgrf(BRW_REGISTER_TYPE_UD);

9088

bld.SHR(instance_times_8, t, brw_imm_ud(instance_id_shift - 3));

9089

bld.ADD(invocation_id, instance_times_8, channels_ud);

9090

}

9091

}

9092

9093

bool

9094

fs_visitor::run_tcs()

9095

{

9096

assert(stage == MESA_SHADER_TESS_CTRL);

9097

9098

struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(prog_data);

9099

struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(prog_data);

9100

struct brw_tcs_prog_key *tcs_key = (struct brw_tcs_prog_key *) key;

9101

9102

assert(vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_SINGLE_PATCH ||

9103

vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_8_PATCH);

9104

9105

if (vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_SINGLE_PATCH) {

9106

/* r1-r4 contain the ICP handles. */

9107

payload.num_regs = 5;

9108

} else {

9109

assert(vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_8_PATCH);

9110

assert(tcs_key->input_vertices > 0);

9111

/* r1 contains output handles, r2 may contain primitive ID, then the

9112

* ICP handles occupy the next 1-32 registers.

9113

9114

payload.num_regs = 2 + tcs_prog_data->include_primitive_id +

9115

tcs_key->input_vertices;

9116

}

9117

9118

/* Initialize gl_InvocationID */

9119

set_tcs_invocation_id();

9120

9121

const bool fix_dispatch_mask =

9122

vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_SINGLE_PATCH &&

9123

(nir->info.tess.tcs_vertices_out % 8) != 0;

9124

9125

/* Fix the disptach mask */

9126

if (fix_dispatch_mask) {

9127

bld.CMP(bld.null_reg_ud(), invocation_id,

9128

brw_imm_ud(nir->info.tess.tcs_vertices_out), BRW_CONDITIONAL_L);

9129

bld.IF(BRW_PREDICATE_NORMAL);

9130

}

9131

9132

emit_nir_code();

9133

9134

if (fix_dispatch_mask) {

9135

bld.emit(BRW_OPCODE_ENDIF);

9136

}

9137

9138

/* Emit EOT write; set TR DS Cache bit */

9139

fs_reg srcs[3] = {

9140

fs_reg(get_tcs_output_urb_handle()),

9141

fs_reg(brw_imm_ud(WRITEMASK_X << 16)),

9142

fs_reg(brw_imm_ud(0)),

9143

};

9144

fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 3);

9145

bld.LOAD_PAYLOAD(payload, srcs, 3, 2);

9146

9147

fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_SIMD8_MASKED,

9148

bld.null_reg_ud(), payload);

9149

inst->mlen = 3;

9150

inst->eot = true;

9151

9152

if (failed)

9153

return false;

9154

9155

calculate_cfg();

9156

9157

optimize();

9158

9159

assign_curb_setup();

9160

assign_tcs_urb_setup();

9161

9162

fixup_3src_null_dest();

9163

emit_dummy_memory_fence_before_eot();

9164

allocate_registers(true /* allow_spilling */);

9165

9166

return !failed;

9167

}

9168

9169

bool

9170

fs_visitor::run_tes()

9171

{

9172

assert(stage == MESA_SHADER_TESS_EVAL);

9173

9174

/* R0: thread header, R1-3: gl_TessCoord.xyz, R4: URB handles */

9175

payload.num_regs = 5;

9176

9177

emit_nir_code();

9178

9179

if (failed)

9180

return false;

9181

9182

emit_urb_writes();

9183

9184

calculate_cfg();

9185

9186

optimize();

9187

9188

assign_curb_setup();

9189

assign_tes_urb_setup();

9190

9191

fixup_3src_null_dest();

9192

emit_dummy_memory_fence_before_eot();

9193

allocate_registers(true /* allow_spilling */);

9194

9195

return !failed;

9196

}

9197

9198

bool

9199

fs_visitor::run_gs()

9200

{

9201

assert(stage == MESA_SHADER_GEOMETRY);

9202

9203

setup_gs_payload();

9204

9205

this->final_gs_vertex_count = vgrf(glsl_type::uint_type);

9206

9207

if (gs_compile->control_data_header_size_bits > 0) {

9208

/* Create a VGRF to store accumulated control data bits. */

9209

this->control_data_bits = vgrf(glsl_type::uint_type);

9210

9211

/* If we're outputting more than 32 control data bits, then EmitVertex()

9212

* will set control_data_bits to 0 after emitting the first vertex.

9213

* Otherwise, we need to initialize it to 0 here.

9214

9215

if (gs_compile->control_data_header_size_bits <= 32) {

9216

const fs_builder abld = bld.annotate("initialize control data bits");

9217

abld.MOV(this->control_data_bits, brw_imm_ud(0u));

9218

}

9219

}

9220

9221

emit_nir_code();

9222

9223

emit_gs_thread_end();

9224

9225

if (failed)

9226

return false;

9227

9228

calculate_cfg();

9229

9230

optimize();

9231

9232

assign_curb_setup();

9233

assign_gs_urb_setup();

9234

9235

fixup_3src_null_dest();

9236

emit_dummy_memory_fence_before_eot();

9237

allocate_registers(true /* allow_spilling */);

9238

9239

return !failed;

9240

}

9241

9242

/* From the SKL PRM, Volume 16, Workarounds:

9243

9244

* 0877 3D Pixel Shader Hang possible when pixel shader dispatched with

9245

* only header phases (R0-R2)

9246

9247

* WA: Enable a non-header phase (e.g. push constant) when dispatch would

9248

* have been header only.

9249

9250

* Instead of enabling push constants one can alternatively enable one of the

9251

* inputs. Here one simply chooses "layer" which shouldn't impose much

9252

* overhead.

9253

9254

static void

9255

gfx9_ps_header_only_workaround(struct brw_wm_prog_data *wm_prog_data)

9256

{

9257

if (wm_prog_data->num_varying_inputs)

9258

return;

9259

9260

if (wm_prog_data->base.curb_read_length)

9261

return;

9262

9263

wm_prog_data->urb_setup[VARYING_SLOT_LAYER] = 0;

9264

wm_prog_data->num_varying_inputs = 1;

9265

9266

brw_compute_urb_setup_index(wm_prog_data);

9267

}

9268

9269

bool

9270

fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)

9271

{

9272

struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(this->prog_data);

9273

brw_wm_prog_key *wm_key = (brw_wm_prog_key *) this->key;

9274

9275

assert(stage == MESA_SHADER_FRAGMENT);

9276

9277

if (devinfo->ver >= 6)

9278

setup_fs_payload_gfx6();

9279

else

9280

setup_fs_payload_gfx4();

9281

9282

if (0) {

9283

emit_dummy_fs();

9284

} else if (do_rep_send) {

9285

assert(dispatch_width == 16);

9286

emit_repclear_shader();

9287

} else {

9288

if (nir->info.inputs_read > 0 ||

9289

BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) ||

9290

(nir->info.outputs_read > 0 && !wm_key->coherent_fb_fetch)) {

9291

if (devinfo->ver < 6)

9292

emit_interpolation_setup_gfx4();

9293

else

9294

emit_interpolation_setup_gfx6();

9295

}

9296

9297

/* We handle discards by keeping track of the still-live pixels in f0.1.

9298

* Initialize it with the dispatched pixels.

9299

9300

if (wm_prog_data->uses_kill) {

9301

const unsigned lower_width = MIN2(dispatch_width, 16);

9302

for (unsigned i = 0; i < dispatch_width / lower_width; i++) {

9303

const fs_reg dispatch_mask =

9304

devinfo->ver >= 6 ? brw_vec1_grf((i ? 2 : 1), 7) :

9305

brw_vec1_grf(0, 0);

9306

bld.exec_all().group(1, 0)

9307

.MOV(sample_mask_reg(bld.group(lower_width, i)),

9308

retype(dispatch_mask, BRW_REGISTER_TYPE_UW));

9309

}

9310

}

9311

9312

if (nir->info.writes_memory)

9313

wm_prog_data->has_side_effects = true;

9314

9315

emit_nir_code();

9316

9317

if (failed)

9318

return false;

9319

9320

if (wm_key->emit_alpha_test)

9321

emit_alpha_test();

9322

9323

emit_fb_writes();

9324

9325

calculate_cfg();

9326

9327

optimize();

9328

9329

assign_curb_setup();

9330

9331

if (devinfo->ver == 9)

9332

gfx9_ps_header_only_workaround(wm_prog_data);

9333

9334

assign_urb_setup();

9335

9336

fixup_3src_null_dest();

9337

emit_dummy_memory_fence_before_eot();

9338

9339

allocate_registers(allow_spilling);

9340

}

9341

9342

return !failed;

9343

}

9344

9345

bool

9346

fs_visitor::run_cs(bool allow_spilling)

9347

{

9348

assert(gl_shader_stage_is_compute(stage));

9349

9350

setup_cs_payload();

9351

9352

if (devinfo->platform == INTEL_PLATFORM_HSW && prog_data->total_shared > 0) {

9353

/* Move SLM index from g0.0[27:24] to sr0.1[11:8] */

9354

const fs_builder abld = bld.exec_all().group(1, 0);

9355

abld.MOV(retype(brw_sr0_reg(1), BRW_REGISTER_TYPE_UW),

9356

suboffset(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW), 1));

9357

}

9358

9359

emit_nir_code();

9360

9361

if (failed)

9362

return false;

9363

9364

emit_cs_terminate();

9365

9366

calculate_cfg();

9367

9368

optimize();

9369

9370

assign_curb_setup();

9371

9372

fixup_3src_null_dest();

9373

emit_dummy_memory_fence_before_eot();

9374

allocate_registers(allow_spilling);

9375

9376

return !failed;

9377

}

9378

9379

bool

9380

fs_visitor::run_bs(bool allow_spilling)

9381

{

9382

assert(stage >= MESA_SHADER_RAYGEN && stage <= MESA_SHADER_CALLABLE);

9383

9384

/* R0: thread header, R1: stack IDs, R2: argument addresses */

9385

payload.num_regs = 3;

9386

9387

emit_nir_code();

9388

9389

if (failed)

9390

return false;

9391

9392

/* TODO(RT): Perhaps rename this? */

9393

emit_cs_terminate();

9394

9395

calculate_cfg();

9396

9397

optimize();

9398

9399

assign_curb_setup();

9400

9401

fixup_3src_null_dest();

9402

emit_dummy_memory_fence_before_eot();

9403

allocate_registers(allow_spilling);

9404

9405

return !failed;

9406

}

9407

9408

bool

9409

fs_visitor::run_task(bool allow_spilling)

9410

{

9411

assert(stage == MESA_SHADER_TASK);

9412

9413

/* Task Shader Payloads (SIMD8 and SIMD16)

9414

9415

* R0: Header

9416

* R1: Local_ID.X[0-7 or 0-15]

9417

* R2: Inline Parameter

9418

9419

* Task Shader Payloads (SIMD32)

9420

9421

* R0: Header

9422

* R1: Local_ID.X[0-15]

9423

* R2: Local_ID.X[16-31]

9424

* R3: Inline Parameter

9425

9426

* Local_ID.X values are 16 bits.

9427

9428

* Inline parameter is optional but always present since we use it to pass

9429

* the address to descriptors.

9430

9431

payload.num_regs = dispatch_width == 32 ? 4 : 3;

9432

9433

emit_nir_code();

9434

9435

if (failed)

9436

return false;

9437

9438

emit_cs_terminate();

9439

9440

calculate_cfg();

9441

9442

optimize();

9443

9444

assign_curb_setup();

9445

9446

fixup_3src_null_dest();

9447

emit_dummy_memory_fence_before_eot();

9448

allocate_registers(allow_spilling);

9449

9450

return !failed;

9451

}

9452

9453

bool

9454

fs_visitor::run_mesh(bool allow_spilling)

9455

{

9456

assert(stage == MESA_SHADER_MESH);

9457

9458

/* Mesh Shader Payloads (SIMD8 and SIMD16)

9459

9460

* R0: Header

9461

* R1: Local_ID.X[0-7 or 0-15]

9462

* R2: Inline Parameter

9463

9464

* Mesh Shader Payloads (SIMD32)

9465

9466

* R0: Header

9467

* R1: Local_ID.X[0-15]

9468

* R2: Local_ID.X[16-31]

9469

* R3: Inline Parameter

9470

9471

* Local_ID.X values are 16 bits.

9472

9473

* Inline parameter is optional but always present since we use it to pass

9474

* the address to descriptors.

9475

9476

payload.num_regs = dispatch_width == 32 ? 4 : 3;

9477

9478

emit_nir_code();

9479

9480

if (failed)

9481

return false;

9482

9483

emit_cs_terminate();

9484

9485

calculate_cfg();

9486

9487

optimize();

9488

9489

assign_curb_setup();

9490

9491

fixup_3src_null_dest();

9492

emit_dummy_memory_fence_before_eot();

9493

allocate_registers(allow_spilling);

9494

9495

return !failed;

9496

}

9497

9498

static bool

9499

is_used_in_not_interp_frag_coord(nir_ssa_def *def)

9500

{

9501

nir_foreach_use(src, def) {

9502

if (src->parent_instr->type != nir_instr_type_intrinsic)

9503

return true;

9504

9505

nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(src->parent_instr);

9506

if (intrin->intrinsic != nir_intrinsic_load_frag_coord)

9507

return true;

9508

}

9509

9510

nir_foreach_if_use(src, def)

9511

return true;

9512

9513

return false;

9514

}

9515

9516

/**

9517

* Return a bitfield where bit n is set if barycentric interpolation mode n

9518

* (see enum brw_barycentric_mode) is needed by the fragment shader.

9519

9520

* We examine the load_barycentric intrinsics rather than looking at input

9521

* variables so that we catch interpolateAtCentroid() messages too, which

9522

* also need the BRW_BARYCENTRIC_[NON]PERSPECTIVE_CENTROID mode set up.

9523

9524

static unsigned

9525

brw_compute_barycentric_interp_modes(const struct intel_device_info *devinfo,

9526

const nir_shader *shader)

9527

{

9528

unsigned barycentric_interp_modes = 0;

9529

9530

nir_foreach_function(f, shader) {

9531

if (!f->impl)

9532

continue;

9533

9534

nir_foreach_block(block, f->impl) {

9535

nir_foreach_instr(instr, block) {

9536

if (instr->type != nir_instr_type_intrinsic)

9537

continue;

9538

9539

nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);

9540

switch (intrin->intrinsic) {

9541

case nir_intrinsic_load_barycentric_pixel:

9542

case nir_intrinsic_load_barycentric_centroid:

9543

case nir_intrinsic_load_barycentric_sample:

9544

break;

9545

default:

9546

continue;

9547

}

9548

9549

/* Ignore WPOS; it doesn't require interpolation. */

9550

assert(intrin->dest.is_ssa);

9551

if (!is_used_in_not_interp_frag_coord(&intrin->dest.ssa))

9552

continue;

9553

9554

enum glsl_interp_mode interp = (enum glsl_interp_mode)

9555

nir_intrinsic_interp_mode(intrin);

9556

nir_intrinsic_op bary_op = intrin->intrinsic;

9557

enum brw_barycentric_mode bary =

9558

brw_barycentric_mode(interp, bary_op);

9559

9560

barycentric_interp_modes |= 1 << bary;

9561

9562

if (devinfo->needs_unlit_centroid_workaround &&

9563

bary_op == nir_intrinsic_load_barycentric_centroid)

9564

barycentric_interp_modes |= 1 << centroid_to_pixel(bary);

9565

}

9566

}

9567

}

9568

9569

return barycentric_interp_modes;

9570

}

9571

9572

static void

9573

brw_compute_flat_inputs(struct brw_wm_prog_data *prog_data,

9574

const nir_shader *shader)

9575

{

9576

prog_data->flat_inputs = 0;

9577

9578

nir_foreach_shader_in_variable(var, shader) {

9579

/* flat shading */

9580

if (var->data.interpolation != INTERP_MODE_FLAT)

9581

continue;

9582

9583

if (var->data.per_primitive)

9584

continue;

9585

9586

unsigned slots = glsl_count_attribute_slots(var->type, false);

9587

for (unsigned s = 0; s < slots; s++) {

9588

int input_index = prog_data->urb_setup[var->data.location + s];

9589

9590

if (input_index >= 0)

9591

prog_data->flat_inputs |= 1 << input_index;

9592

}

9593

}

9594

}

9595

9596

static uint8_t

9597

computed_depth_mode(const nir_shader *shader)

9598

{

9599

if (shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {

9600

switch (shader->info.fs.depth_layout) {

9601

case FRAG_DEPTH_LAYOUT_NONE:

9602

case FRAG_DEPTH_LAYOUT_ANY:

9603

return BRW_PSCDEPTH_ON;

9604

case FRAG_DEPTH_LAYOUT_GREATER:

9605

return BRW_PSCDEPTH_ON_GE;

9606

case FRAG_DEPTH_LAYOUT_LESS:

9607

return BRW_PSCDEPTH_ON_LE;

9608

case FRAG_DEPTH_LAYOUT_UNCHANGED:

9609

return BRW_PSCDEPTH_OFF;

9610

}

9611

}

9612

return BRW_PSCDEPTH_OFF;

9613

}

9614

9615

/**

9616

* Move load_interpolated_input with simple (payload-based) barycentric modes

9617

* to the top of the program so we don't emit multiple PLNs for the same input.

9618

9619

* This works around CSE not being able to handle non-dominating cases

9620

* such as:

9621

9622

* if (...) {

9623

* interpolate input

9624

* } else {

9625

* interpolate the same exact input

9626

* }

9627

9628

* This should be replaced by global value numbering someday.

9629

9630

bool

9631

brw_nir_move_interpolation_to_top(nir_shader *nir)

9632

{

9633

bool progress = false;

9634

9635

nir_foreach_function(f, nir) {

9636

if (!f->impl)

9637

continue;

9638

9639

nir_block *top = nir_start_block(f->impl);

9640

exec_node *cursor_node = NULL;

9641

9642

nir_foreach_block(block, f->impl) {

9643

if (block == top)

9644

continue;

9645

9646

nir_foreach_instr_safe(instr, block) {

9647

if (instr->type != nir_instr_type_intrinsic)

9648

continue;

9649

9650

nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);

9651

if (intrin->intrinsic != nir_intrinsic_load_interpolated_input)

9652

continue;

9653

nir_intrinsic_instr *bary_intrinsic =

9654

nir_instr_as_intrinsic(intrin->src[0].ssa->parent_instr);

9655

nir_intrinsic_op op = bary_intrinsic->intrinsic;

9656

9657

/* Leave interpolateAtSample/Offset() where they are. */

9658

if (op == nir_intrinsic_load_barycentric_at_sample ||

9659

op == nir_intrinsic_load_barycentric_at_offset)

9660

continue;

9661

9662

nir_instr *move[3] = {

9663

&bary_intrinsic->instr,

9664

intrin->src[1].ssa->parent_instr,

9665

instr

9666

};

9667

9668

for (unsigned i = 0; i < ARRAY_SIZE(move); i++) {

9669

if (move[i]->block != top) {

9670

move[i]->block = top;

9671

exec_node_remove(&move[i]->node);

9672

if (cursor_node) {

9673

exec_node_insert_after(cursor_node, &move[i]->node);

9674

} else {

9675

exec_list_push_head(&top->instr_list, &move[i]->node);

9676

}

9677

cursor_node = &move[i]->node;

9678

progress = true;

9679

}

9680

}

9681

}

9682

}

9683

nir_metadata_preserve(f->impl, nir_metadata_block_index |

9684

nir_metadata_dominance);

9685

}

9686

9687

return progress;

9688

}

9689

9690

static bool

9691

brw_nir_demote_sample_qualifiers_instr(nir_builder *b,

9692

nir_instr *instr,

9693

UNUSED void *cb_data)

9694

{

9695

if (instr->type != nir_instr_type_intrinsic)

9696

return false;

9697

9698

nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);

9699

if (intrin->intrinsic != nir_intrinsic_load_barycentric_sample &&

9700

intrin->intrinsic != nir_intrinsic_load_barycentric_at_sample)

9701

return false;

9702

9703

b->cursor = nir_before_instr(instr);

9704

nir_ssa_def *centroid =

9705

nir_load_barycentric(b, nir_intrinsic_load_barycentric_centroid,

9706

nir_intrinsic_interp_mode(intrin));

9707

nir_ssa_def_rewrite_uses(&intrin->dest.ssa, centroid);

9708

nir_instr_remove(instr);

9709

return true;

9710

}

9711

9712

/**

9713

* Demote per-sample barycentric intrinsics to centroid.

9714

9715

* Useful when rendering to a non-multisampled buffer.

9716

9717

bool

9718

brw_nir_demote_sample_qualifiers(nir_shader *nir)

9719

{

9720

return nir_shader_instructions_pass(nir,

9721

brw_nir_demote_sample_qualifiers_instr,

9722

nir_metadata_block_index |

9723

nir_metadata_dominance,

9724

NULL);

9725

}

9726

9727

static void

9728

brw_nir_populate_wm_prog_data(const nir_shader *shader,

9729

const struct intel_device_info *devinfo,

9730

const struct brw_wm_prog_key *key,

9731

struct brw_wm_prog_data *prog_data,

9732

const struct brw_mue_map *mue_map)

9733

{

9734

/* key->alpha_test_func means simulating alpha testing via discards,

9735

* so the shader definitely kills pixels.

9736

9737

prog_data->uses_kill = shader->info.fs.uses_discard ||

9738

shader->info.fs.uses_demote ||

9739

key->emit_alpha_test;

9740

prog_data->uses_omask = !key->ignore_sample_mask_out &&

9741

(shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK));

9742

prog_data->color_outputs_written = key->color_outputs_valid;

9743

prog_data->computed_depth_mode = computed_depth_mode(shader);

9744

prog_data->computed_stencil =

9745

shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL);

9746

9747

prog_data->persample_dispatch =

9748

key->multisample_fbo &&

9749

(key->persample_interp ||

9750

BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) ||

9751

BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS) ||

9752

shader->info.fs.uses_sample_qualifier ||

9753

shader->info.outputs_read);

9754

9755

if (devinfo->ver >= 6) {

9756

prog_data->uses_sample_mask =

9757

BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN);

9758

9759

/* From the Ivy Bridge PRM documentation for 3DSTATE_PS:

9760

9761

* "MSDISPMODE_PERSAMPLE is required in order to select

9762

* POSOFFSET_SAMPLE"

9763

9764

* So we can only really get sample positions if we are doing real

9765

* per-sample dispatch. If we need gl_SamplePosition and we don't have

9766

* persample dispatch, we hard-code it to 0.5.

9767

9768

prog_data->uses_pos_offset = prog_data->persample_dispatch &&

9769

(BITSET_TEST(shader->info.system_values_read,

9770

SYSTEM_VALUE_SAMPLE_POS) ||

9771

BITSET_TEST(shader->info.system_values_read,

9772

SYSTEM_VALUE_SAMPLE_POS_OR_CENTER));

9773

}

9774

9775

prog_data->has_render_target_reads = shader->info.outputs_read != 0ull;

9776

9777

prog_data->early_fragment_tests = shader->info.fs.early_fragment_tests;

9778

prog_data->post_depth_coverage = shader->info.fs.post_depth_coverage;

9779

prog_data->inner_coverage = shader->info.fs.inner_coverage;

9780

9781

prog_data->barycentric_interp_modes =

9782

brw_compute_barycentric_interp_modes(devinfo, shader);

9783

9784

prog_data->per_coarse_pixel_dispatch =

9785

key->coarse_pixel &&

9786

!prog_data->uses_omask &&

9787

!prog_data->persample_dispatch &&

9788

!prog_data->uses_sample_mask &&

9789

(prog_data->computed_depth_mode == BRW_PSCDEPTH_OFF) &&

9790

!prog_data->computed_stencil;

9791

9792

prog_data->uses_src_w =

9793

BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD);

9794

prog_data->uses_src_depth =

9795

BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) &&

9796

!prog_data->per_coarse_pixel_dispatch;

9797

prog_data->uses_depth_w_coefficients =

9798

BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) &&

9799

prog_data->per_coarse_pixel_dispatch;

9800

9801

calculate_urb_setup(devinfo, key, prog_data, shader, mue_map);

9802

brw_compute_flat_inputs(prog_data, shader);

9803

}

9804

9805

/**

9806

* Pre-gfx6, the register file of the EUs was shared between threads,

9807

* and each thread used some subset allocated on a 16-register block

9808

* granularity. The unit states wanted these block counts.

9809

9810

static inline int

9811

brw_register_blocks(int reg_count)

9812

{

9813

return ALIGN(reg_count, 16) / 16 - 1;

9814

}

9815

9816

const unsigned *

9817

brw_compile_fs(const struct brw_compiler *compiler,

9818

void *mem_ctx,

9819

struct brw_compile_fs_params *params)

9820

{

9821

struct nir_shader *nir = params->nir;

9822

const struct brw_wm_prog_key *key = params->key;

9823

struct brw_wm_prog_data *prog_data = params->prog_data;

9824

bool allow_spilling = params->allow_spilling;

9825

const bool debug_enabled =

9826

INTEL_DEBUG(params->debug_flag ? params->debug_flag : DEBUG_WM);

9827

9828

prog_data->base.stage = MESA_SHADER_FRAGMENT;

9829

prog_data->base.ray_queries = nir->info.ray_queries;

9830

prog_data->base.total_scratch = 0;

9831

9832

const struct intel_device_info *devinfo = compiler->devinfo;

9833

const unsigned max_subgroup_size = compiler->devinfo->ver >= 6 ? 32 : 16;

9834

9835

brw_nir_apply_key(nir, compiler, &key->base, max_subgroup_size, true);

9836

brw_nir_lower_fs_inputs(nir, devinfo, key);

9837

brw_nir_lower_fs_outputs(nir);

9838

9839

if (devinfo->ver < 6)

9840

brw_setup_vue_interpolation(params->vue_map, nir, prog_data);

9841

9842

/* From the SKL PRM, Volume 7, "Alpha Coverage":

9843

* "If Pixel Shader outputs oMask, AlphaToCoverage is disabled in

9844

* hardware, regardless of the state setting for this feature."

9845

9846

if (devinfo->ver > 6 && key->alpha_to_coverage) {

9847

/* Run constant fold optimization in order to get the correct source

9848

* offset to determine render target 0 store instruction in

9849

* emit_alpha_to_coverage pass.

9850

9851

NIR_PASS_V(nir, nir_opt_constant_folding);

9852

NIR_PASS_V(nir, brw_nir_lower_alpha_to_coverage);

9853

}

9854

9855

if (!key->multisample_fbo)

9856

NIR_PASS_V(nir, brw_nir_demote_sample_qualifiers);

9857

NIR_PASS_V(nir, brw_nir_move_interpolation_to_top);

9858

brw_postprocess_nir(nir, compiler, true, debug_enabled,

9859

key->base.robust_buffer_access);

9860

9861

brw_nir_populate_wm_prog_data(nir, compiler->devinfo, key, prog_data,

9862

params->mue_map);

9863

9864

fs_visitor *v8 = NULL, *v16 = NULL, *v32 = NULL;

9865

cfg_t *simd8_cfg = NULL, *simd16_cfg = NULL, *simd32_cfg = NULL;

9866

float throughput = 0;

9867

bool has_spilled = false;

9868

9869

v8 = new fs_visitor(compiler, params->log_data, mem_ctx, &key->base,

9870

&prog_data->base, nir, 8,

9871

debug_enabled);

9872

if (!v8->run_fs(allow_spilling, false /* do_rep_send */)) {

9873

params->error_str = ralloc_strdup(mem_ctx, v8->fail_msg);

9874

delete v8;

9875

return NULL;

9876

} else if (!INTEL_DEBUG(DEBUG_NO8)) {

9877

simd8_cfg = v8->cfg;

9878

prog_data->base.dispatch_grf_start_reg = v8->payload.num_regs;

9879

prog_data->reg_blocks_8 = brw_register_blocks(v8->grf_used);

9880

const performance &perf = v8->performance_analysis.require();

9881

throughput = MAX2(throughput, perf.throughput);

9882

has_spilled = v8->spilled_any_registers;

9883

allow_spilling = false;

9884

}

9885

9886

/* Limit dispatch width to simd8 with dual source blending on gfx8.

9887

* See: https://gitlab.freedesktop.org/mesa/mesa/-/issues/1917

9888

9889

if (devinfo->ver == 8 && prog_data->dual_src_blend &&

9890

!INTEL_DEBUG(DEBUG_NO8)) {

9891

assert(!params->use_rep_send);

9892

v8->limit_dispatch_width(8, "gfx8 workaround: "

9893

"using SIMD8 when dual src blending.\n");

9894

}

9895

9896

if (key->coarse_pixel) {

9897

if (prog_data->dual_src_blend) {

9898

v8->limit_dispatch_width(8, "SIMD16 coarse pixel shading cannot"

9899

" use SIMD8 messages.\n");

9900

}

9901

v8->limit_dispatch_width(16, "SIMD32 not supported with coarse"

9902

" pixel shading.\n");

9903

}

9904

9905

if (nir->info.ray_queries > 0)

9906

v8->limit_dispatch_width(16, "SIMD32 with ray queries.\n");

9907

9908

if (!has_spilled &&

9909

v8->max_dispatch_width >= 16 &&

9910

(!INTEL_DEBUG(DEBUG_NO16) || params->use_rep_send)) {

9911

/* Try a SIMD16 compile */

9912

v16 = new fs_visitor(compiler, params->log_data, mem_ctx, &key->base,

9913

&prog_data->base, nir, 16,

9914

debug_enabled);

9915

v16->import_uniforms(v8);

9916

if (!v16->run_fs(allow_spilling, params->use_rep_send)) {

9917

brw_shader_perf_log(compiler, params->log_data,

9918

"SIMD16 shader failed to compile: %s\n",

9919

v16->fail_msg);

9920

} else {

9921

simd16_cfg = v16->cfg;

9922

prog_data->dispatch_grf_start_reg_16 = v16->payload.num_regs;

9923

prog_data->reg_blocks_16 = brw_register_blocks(v16->grf_used);

9924

const performance &perf = v16->performance_analysis.require();

9925

throughput = MAX2(throughput, perf.throughput);

9926

has_spilled = v16->spilled_any_registers;

9927

allow_spilling = false;

9928

}

9929

}

9930

9931

const bool simd16_failed = v16 && !simd16_cfg;

9932

9933

/* Currently, the compiler only supports SIMD32 on SNB+ */

9934

if (!has_spilled &&

9935

v8->max_dispatch_width >= 32 && !params->use_rep_send &&

9936

devinfo->ver >= 6 && !simd16_failed &&

9937

!INTEL_DEBUG(DEBUG_NO32)) {

9938

/* Try a SIMD32 compile */

9939

v32 = new fs_visitor(compiler, params->log_data, mem_ctx, &key->base,

9940

&prog_data->base, nir, 32,

9941

debug_enabled);

9942

v32->import_uniforms(v8);

9943

if (!v32->run_fs(allow_spilling, false)) {

9944

brw_shader_perf_log(compiler, params->log_data,

9945

"SIMD32 shader failed to compile: %s\n",

9946

v32->fail_msg);

9947

} else {

9948

const performance &perf = v32->performance_analysis.require();

9949

9950

if (!INTEL_DEBUG(DEBUG_DO32) && throughput >= perf.throughput) {

9951

brw_shader_perf_log(compiler, params->log_data,

9952

"SIMD32 shader inefficient\n");

9953

} else {

9954

simd32_cfg = v32->cfg;

9955

prog_data->dispatch_grf_start_reg_32 = v32->payload.num_regs;

9956

prog_data->reg_blocks_32 = brw_register_blocks(v32->grf_used);

9957

throughput = MAX2(throughput, perf.throughput);

9958

}

9959

}

9960

}

9961

9962

/* When the caller requests a repclear shader, they want SIMD16-only */

9963

if (params->use_rep_send)

9964

simd8_cfg = NULL;

9965

9966

/* Prior to Iron Lake, the PS had a single shader offset with a jump table

9967

* at the top to select the shader. We've never implemented that.

9968

* Instead, we just give them exactly one shader and we pick the widest one

9969

* available.

9970

9971

if (compiler->devinfo->ver < 5) {

9972

if (simd32_cfg || simd16_cfg)

9973

simd8_cfg = NULL;

9974

if (simd32_cfg)

9975

simd16_cfg = NULL;

9976

}

9977

9978

/* If computed depth is enabled SNB only allows SIMD8. */

9979

if (compiler->devinfo->ver == 6 &&

9980

prog_data->computed_depth_mode != BRW_PSCDEPTH_OFF)

9981

assert(simd16_cfg == NULL && simd32_cfg == NULL);

9982

9983

if (compiler->devinfo->ver <= 5 && !simd8_cfg) {

9984

/* Iron lake and earlier only have one Dispatch GRF start field. Make

9985

* the data available in the base prog data struct for convenience.

9986

9987

if (simd16_cfg) {

9988

prog_data->base.dispatch_grf_start_reg =

9989

prog_data->dispatch_grf_start_reg_16;

9990

} else if (simd32_cfg) {

9991

prog_data->base.dispatch_grf_start_reg =

9992

prog_data->dispatch_grf_start_reg_32;

9993

}

9994

}

9995

9996

if (prog_data->persample_dispatch) {

9997

/* Starting with SandyBridge (where we first get MSAA), the different

9998

* pixel dispatch combinations are grouped into classifications A

9999

* through F (SNB PRM Vol. 2 Part 1 Section 7.7.1). On most hardware

10000

* generations, the only configurations supporting persample dispatch

10001

* are those in which only one dispatch width is enabled.

10002

10003

* The Gfx12 hardware spec has a similar dispatch grouping table, but

10004

* the following conflicting restriction applies (from the page on

10005

* "Structure_3DSTATE_PS_BODY"), so we need to keep the SIMD16 shader:

10006

10007

* "SIMD32 may only be enabled if SIMD16 or (dual)SIMD8 is also

10008

* enabled."

10009

10010

if (simd32_cfg || simd16_cfg)

10011

simd8_cfg = NULL;

10012

if (simd32_cfg && devinfo->ver < 12)

10013

simd16_cfg = NULL;

10014

}

10015

10016

fs_generator g(compiler, params->log_data, mem_ctx, &prog_data->base,

10017

v8->runtime_check_aads_emit, MESA_SHADER_FRAGMENT);

10018

10019

if (unlikely(debug_enabled)) {

10020

g.enable_debug(ralloc_asprintf(mem_ctx, "%s fragment shader %s",

10021

nir->info.label ?

10022

nir->info.label : "unnamed",

10023

nir->info.name));

10024

}

10025

10026

struct brw_compile_stats *stats = params->stats;

10027

10028

if (simd8_cfg) {

10029

prog_data->dispatch_8 = true;

10030

g.generate_code(simd8_cfg, 8, v8->shader_stats,

10031

v8->performance_analysis.require(), stats);

10032

stats = stats ? stats + 1 : NULL;

10033

}

10034

10035

if (simd16_cfg) {

10036

prog_data->dispatch_16 = true;

10037

prog_data->prog_offset_16 = g.generate_code(

10038

simd16_cfg, 16, v16->shader_stats,

10039

v16->performance_analysis.require(), stats);

10040

stats = stats ? stats + 1 : NULL;

10041

}

10042

10043

if (simd32_cfg) {

10044

prog_data->dispatch_32 = true;

10045

prog_data->prog_offset_32 = g.generate_code(

10046

simd32_cfg, 32, v32->shader_stats,

10047

v32->performance_analysis.require(), stats);

10048

stats = stats ? stats + 1 : NULL;

10049

}

10050

10051

g.add_const_data(nir->constant_data, nir->constant_data_size);

10052

10053

delete v8;

10054

delete v16;

10055

delete v32;

10056

10057

return g.get_assembly();

10058

}

10059

10060

fs_reg

10061

fs_visitor::emit_work_group_id_setup()

10062

{

10063

assert(gl_shader_stage_uses_workgroup(stage));

10064

10065

fs_reg id = bld.vgrf(BRW_REGISTER_TYPE_UD, 3);

10066

10067

struct brw_reg r0_1(retype(brw_vec1_grf(0, 1), BRW_REGISTER_TYPE_UD));

10068

bld.MOV(id, r0_1);

10069

10070

if (gl_shader_stage_is_compute(stage)) {

10071

struct brw_reg r0_6(retype(brw_vec1_grf(0, 6), BRW_REGISTER_TYPE_UD));

10072

struct brw_reg r0_7(retype(brw_vec1_grf(0, 7), BRW_REGISTER_TYPE_UD));

10073

bld.MOV(offset(id, bld, 1), r0_6);

10074

bld.MOV(offset(id, bld, 2), r0_7);

10075

} else {

10076

/* Task/Mesh have a single Workgroup ID dimension in the HW. */

10077

bld.MOV(offset(id, bld, 1), brw_imm_ud(0));

10078

bld.MOV(offset(id, bld, 2), brw_imm_ud(0));

10079

}

10080

10081

return id;

10082

}

10083

10084

unsigned

10085

brw_cs_push_const_total_size(const struct brw_cs_prog_data *cs_prog_data,

10086

unsigned threads)

10087

{

10088

assert(cs_prog_data->push.per_thread.size % REG_SIZE == 0);

10089

assert(cs_prog_data->push.cross_thread.size % REG_SIZE == 0);

10090

return cs_prog_data->push.per_thread.size * threads +

10091

cs_prog_data->push.cross_thread.size;

10092

}

10093

10094

static void

10095

fill_push_const_block_info(struct brw_push_const_block *block, unsigned dwords)

10096

{

10097

block->dwords = dwords;

10098

block->regs = DIV_ROUND_UP(dwords, 8);

10099

block->size = block->regs * 32;

10100

}

10101

10102

static void

10103

cs_fill_push_const_info(const struct intel_device_info *devinfo,

10104

struct brw_cs_prog_data *cs_prog_data)

10105

{

10106

const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;

10107

int subgroup_id_index = get_subgroup_id_param_index(devinfo, prog_data);

10108

bool cross_thread_supported = devinfo->verx10 >= 75;

10109

10110

/* The thread ID should be stored in the last param dword */

10111

assert(subgroup_id_index == -1 ||

10112

subgroup_id_index == (int)prog_data->nr_params - 1);

10113

10114

unsigned cross_thread_dwords, per_thread_dwords;

10115

if (!cross_thread_supported) {

10116

cross_thread_dwords = 0u;

10117

per_thread_dwords = prog_data->nr_params;

10118

} else if (subgroup_id_index >= 0) {

10119

/* Fill all but the last register with cross-thread payload */

10120

cross_thread_dwords = 8 * (subgroup_id_index / 8);

10121

per_thread_dwords = prog_data->nr_params - cross_thread_dwords;

10122

assert(per_thread_dwords > 0 && per_thread_dwords <= 8);

10123

} else {

10124

/* Fill all data using cross-thread payload */

10125

cross_thread_dwords = prog_data->nr_params;

10126

per_thread_dwords = 0u;

10127

}

10128

10129

fill_push_const_block_info(&cs_prog_data->push.cross_thread, cross_thread_dwords);

10130

fill_push_const_block_info(&cs_prog_data->push.per_thread, per_thread_dwords);

10131

10132

assert(cs_prog_data->push.cross_thread.dwords % 8 == 0 ||

10133

cs_prog_data->push.per_thread.size == 0);

10134

assert(cs_prog_data->push.cross_thread.dwords +

10135

cs_prog_data->push.per_thread.dwords ==

10136

prog_data->nr_params);

10137

}

10138

10139

static bool

10140

filter_simd(const nir_instr *instr, const void * /* options */)

10141

{

10142

if (instr->type != nir_instr_type_intrinsic)

10143

return false;

10144

10145

switch (nir_instr_as_intrinsic(instr)->intrinsic) {

10146

case nir_intrinsic_load_simd_width_intel:

10147

case nir_intrinsic_load_subgroup_id:

10148

return true;

10149

10150

default:

10151

return false;

10152

}

10153

}

10154

10155

static nir_ssa_def *

10156

lower_simd(nir_builder *b, nir_instr *instr, void *options)

10157

{

10158

uintptr_t simd_width = (uintptr_t)options;

10159

10160

switch (nir_instr_as_intrinsic(instr)->intrinsic) {

10161

case nir_intrinsic_load_simd_width_intel:

10162

return nir_imm_int(b, simd_width);

10163

10164

case nir_intrinsic_load_subgroup_id:

10165

/* If the whole workgroup fits in one thread, we can lower subgroup_id

10166

* to a constant zero.

10167

10168

if (!b->shader->info.workgroup_size_variable) {

10169

unsigned local_workgroup_size = b->shader->info.workgroup_size[0] *

10170

b->shader->info.workgroup_size[1] *

10171

b->shader->info.workgroup_size[2];

10172

if (local_workgroup_size <= simd_width)

10173

return nir_imm_int(b, 0);

10174

}

10175

return NULL;

10176

10177

default:

10178

return NULL;

10179

}

10180

}

10181

10182

void

10183

brw_nir_lower_simd(nir_shader *nir, unsigned dispatch_width)

10184

{

10185

nir_shader_lower_instructions(nir, filter_simd, lower_simd,

10186

(void *)(uintptr_t)dispatch_width);

10187

}

10188

10189

const unsigned *

10190

brw_compile_cs(const struct brw_compiler *compiler,

10191

void *mem_ctx,

10192

struct brw_compile_cs_params *params)

10193

{

10194

const nir_shader *nir = params->nir;

10195

const struct brw_cs_prog_key *key = params->key;

10196

struct brw_cs_prog_data *prog_data = params->prog_data;

10197

10198

const bool debug_enabled =

10199

INTEL_DEBUG(params->debug_flag ? params->debug_flag : DEBUG_CS);

10200

10201

prog_data->base.stage = MESA_SHADER_COMPUTE;

10202

prog_data->base.total_shared = nir->info.shared_size;

10203

prog_data->base.ray_queries = nir->info.ray_queries;

10204

prog_data->base.total_scratch = 0;

10205

10206

if (!nir->info.workgroup_size_variable) {

10207

prog_data->local_size[0] = nir->info.workgroup_size[0];

10208

prog_data->local_size[1] = nir->info.workgroup_size[1];

10209

prog_data->local_size[2] = nir->info.workgroup_size[2];

10210

}

10211

10212

const unsigned required_dispatch_width =

10213

brw_required_dispatch_width(&nir->info, key->base.subgroup_size_type);

10214

10215

fs_visitor *v[3] = {0};

10216

const char *error[3] = {0};

10217

10218

for (unsigned simd = 0; simd < 3; simd++) {

10219

if (!brw_simd_should_compile(mem_ctx, simd, compiler->devinfo, prog_data,

10220

required_dispatch_width, &error[simd]))

10221

continue;

10222

10223

const unsigned dispatch_width = 8u << simd;

10224

10225

nir_shader *shader = nir_shader_clone(mem_ctx, nir);

10226

brw_nir_apply_key(shader, compiler, &key->base,

10227

dispatch_width, true /* is_scalar */);

10228

10229

NIR_PASS_V(shader, brw_nir_lower_simd, dispatch_width);

10230

10231

/* Clean up after the local index and ID calculations. */

10232

NIR_PASS_V(shader, nir_opt_constant_folding);

10233

NIR_PASS_V(shader, nir_opt_dce);

10234

10235

brw_postprocess_nir(shader, compiler, true, debug_enabled,

10236

key->base.robust_buffer_access);

10237

10238

v[simd] = new fs_visitor(compiler, params->log_data, mem_ctx, &key->base,

10239

&prog_data->base, shader, dispatch_width,

10240

debug_enabled);

10241

10242

if (prog_data->prog_mask) {

10243

unsigned first = ffs(prog_data->prog_mask) - 1;

10244

v[simd]->import_uniforms(v[first]);

10245

}

10246

10247

const bool allow_spilling = !prog_data->prog_mask ||

10248

nir->info.workgroup_size_variable;

10249

10250

if (v[simd]->run_cs(allow_spilling)) {

10251

/* We should always be able to do SIMD32 for compute shaders. */

10252

assert(v[simd]->max_dispatch_width >= 32);

10253

10254

cs_fill_push_const_info(compiler->devinfo, prog_data);

10255

10256

brw_simd_mark_compiled(simd, prog_data, v[simd]->spilled_any_registers);

10257

} else {

10258

error[simd] = ralloc_strdup(mem_ctx, v[simd]->fail_msg);

10259

if (simd > 0) {

10260

brw_shader_perf_log(compiler, params->log_data,

10261

"SIMD%u shader failed to compile: %s\n",

10262

dispatch_width, v[simd]->fail_msg);

10263

}

10264

}

10265

}

10266

10267

const int selected_simd = brw_simd_select(prog_data);

10268

if (selected_simd < 0) {

10269

params->error_str = ralloc_asprintf(mem_ctx, "Can't compile shader: %s, %s and %s.\n",

10270

error[0], error[1], error[2]);;

10271

return NULL;

10272

}

10273

10274

assert(selected_simd < 3);

10275

fs_visitor *selected = v[selected_simd];

10276

10277

if (!nir->info.workgroup_size_variable)

10278

prog_data->prog_mask = 1 << selected_simd;

10279

10280

const unsigned *ret = NULL;

10281

10282

fs_generator g(compiler, params->log_data, mem_ctx, &prog_data->base,

10283

selected->runtime_check_aads_emit, MESA_SHADER_COMPUTE);

10284

if (unlikely(debug_enabled)) {

10285

char *name = ralloc_asprintf(mem_ctx, "%s compute shader %s",

10286

nir->info.label ?

10287

nir->info.label : "unnamed",

10288

nir->info.name);

10289

g.enable_debug(name);

10290

}

10291

10292

struct brw_compile_stats *stats = params->stats;

10293

for (unsigned simd = 0; simd < 3; simd++) {

10294

if (prog_data->prog_mask & (1u << simd)) {

10295

assert(v[simd]);

10296

prog_data->prog_offset[simd] =

10297

g.generate_code(v[simd]->cfg, 8u << simd, v[simd]->shader_stats,

10298

v[simd]->performance_analysis.require(), stats);

10299

stats = stats ? stats + 1 : NULL;

10300

}

10301

}

10302

10303

g.add_const_data(nir->constant_data, nir->constant_data_size);

10304

10305

ret = g.get_assembly();

10306

10307

delete v[0];

10308

delete v[1];

10309

delete v[2];

10310

10311

return ret;

10312

}

10313

10314

struct brw_cs_dispatch_info

10315

brw_cs_get_dispatch_info(const struct intel_device_info *devinfo,

10316

const struct brw_cs_prog_data *prog_data,

10317

const unsigned *override_local_size)

10318

{

10319

struct brw_cs_dispatch_info info = {};

10320

10321

const unsigned *sizes =

10322

override_local_size ? override_local_size :

10323

prog_data->local_size;

10324

10325

const int simd =

10326

override_local_size ? brw_simd_select_for_workgroup_size(devinfo, prog_data, sizes) :

10327

brw_simd_select(prog_data);

10328

assert(simd >= 0 && simd < 3);

10329

10330

info.group_size = sizes[0] * sizes[1] * sizes[2];

10331

info.simd_size = 8u << simd;

10332

info.threads = DIV_ROUND_UP(info.group_size, info.simd_size);

10333

10334

const uint32_t remainder = info.group_size & (info.simd_size - 1);

10335

if (remainder > 0)

10336

info.right_mask = ~0u >> (32 - remainder);

10337

else

10338

info.right_mask = ~0u >> (32 - info.simd_size);

10339

10340

return info;

10341

}

10342

10343

static uint8_t

10344

compile_single_bs(const struct brw_compiler *compiler, void *log_data,

10345

void *mem_ctx,

10346

const struct brw_bs_prog_key *key,

10347

struct brw_bs_prog_data *prog_data,

10348

nir_shader *shader,

10349

fs_generator *g,

10350

struct brw_compile_stats *stats,

10351

int *prog_offset,

10352

char **error_str)

10353

{

10354

const bool debug_enabled = INTEL_DEBUG(DEBUG_RT);

10355

10356

prog_data->base.stage = shader->info.stage;

10357

prog_data->max_stack_size = MAX2(prog_data->max_stack_size,

10358

shader->scratch_size);

10359

10360

const unsigned max_dispatch_width = 16;

10361

brw_nir_apply_key(shader, compiler, &key->base, max_dispatch_width, true);

10362

brw_postprocess_nir(shader, compiler, true, debug_enabled,

10363

key->base.robust_buffer_access);

10364

10365

fs_visitor *v = NULL, *v8 = NULL, *v16 = NULL;

10366

bool has_spilled = false;

10367

10368

uint8_t simd_size = 0;

10369

if (!INTEL_DEBUG(DEBUG_NO8)) {

10370

v8 = new fs_visitor(compiler, log_data, mem_ctx, &key->base,

10371

&prog_data->base, shader,

10372

8, debug_enabled);

10373

const bool allow_spilling = true;

10374

if (!v8->run_bs(allow_spilling)) {

10375

if (error_str)

10376

*error_str = ralloc_strdup(mem_ctx, v8->fail_msg);

10377

delete v8;

10378

return 0;

10379

} else {

10380

v = v8;

10381

simd_size = 8;

10382

if (v8->spilled_any_registers)

10383

has_spilled = true;

10384

}

10385

}

10386

10387

if (!has_spilled && !INTEL_DEBUG(DEBUG_NO16)) {

10388

v16 = new fs_visitor(compiler, log_data, mem_ctx, &key->base,

10389

&prog_data->base, shader,

10390

16, debug_enabled);

10391

const bool allow_spilling = (v == NULL);

10392

if (!v16->run_bs(allow_spilling)) {

10393

brw_shader_perf_log(compiler, log_data,

10394

"SIMD16 shader failed to compile: %s\n",

10395

v16->fail_msg);

10396

if (v == NULL) {

10397

assert(v8 == NULL);

10398

if (error_str) {

10399

*error_str = ralloc_asprintf(

10400

mem_ctx, "SIMD8 disabled and couldn't generate SIMD16: %s",

10401

v16->fail_msg);

10402

}

10403

delete v16;

10404

return 0;

10405

}

10406

} else {

10407

v = v16;

10408

simd_size = 16;

10409

if (v16->spilled_any_registers)

10410

has_spilled = true;

10411

}

10412

}

10413

10414

if (unlikely(v == NULL)) {

10415

assert(INTEL_DEBUG(DEBUG_NO8 | DEBUG_NO16));

10416

if (error_str) {

10417

*error_str = ralloc_strdup(mem_ctx,

10418

"Cannot satisfy INTEL_DEBUG flags SIMD restrictions");

10419

}

10420

return false;

10421

}

10422

10423

assert(v);

10424

10425

int offset = g->generate_code(v->cfg, simd_size, v->shader_stats,

10426

v->performance_analysis.require(), stats);

10427

if (prog_offset)

10428

*prog_offset = offset;

10429

else

10430

assert(offset == 0);

10431

10432

delete v8;

10433

delete v16;

10434

10435

return simd_size;

10436

}

10437

10438

uint64_t

10439

brw_bsr(const struct intel_device_info *devinfo,

10440

uint32_t offset, uint8_t simd_size, uint8_t local_arg_offset)

10441

{

10442

assert(offset % 64 == 0);

10443

assert(simd_size == 8 || simd_size == 16);

10444

assert(local_arg_offset % 8 == 0);

10445

10446

return offset |

10447

SET_BITS(simd_size == 8, 4, 4) |

10448

SET_BITS(local_arg_offset / 8, 2, 0);

10449

}

10450

10451

const unsigned *

10452

brw_compile_bs(const struct brw_compiler *compiler,

10453

void *mem_ctx,

10454

struct brw_compile_bs_params *params)

10455

{

10456

nir_shader *shader = params->nir;

10457

struct brw_bs_prog_data *prog_data = params->prog_data;

10458

unsigned num_resume_shaders = params->num_resume_shaders;

10459

nir_shader **resume_shaders = params->resume_shaders;

10460

const bool debug_enabled = INTEL_DEBUG(DEBUG_RT);

10461

10462

prog_data->base.stage = shader->info.stage;

10463

prog_data->base.ray_queries = shader->info.ray_queries;

10464

prog_data->base.total_scratch = 0;

10465

10466

prog_data->max_stack_size = 0;

10467

10468

fs_generator g(compiler, params->log_data, mem_ctx, &prog_data->base,

10469

false, shader->info.stage);

10470

if (unlikely(debug_enabled)) {

10471

char *name = ralloc_asprintf(mem_ctx, "%s %s shader %s",

10472

shader->info.label ?

10473

shader->info.label : "unnamed",

10474

gl_shader_stage_name(shader->info.stage),

10475

shader->info.name);

10476

g.enable_debug(name);

10477

}

10478

10479

prog_data->simd_size =

10480

compile_single_bs(compiler, params->log_data, mem_ctx,

10481

params->key, prog_data,

10482

shader, &g, params->stats, NULL, &params->error_str);

10483

if (prog_data->simd_size == 0)

10484

return NULL;

10485

10486

uint64_t *resume_sbt = ralloc_array(mem_ctx, uint64_t, num_resume_shaders);

10487

for (unsigned i = 0; i < num_resume_shaders; i++) {

10488

if (INTEL_DEBUG(DEBUG_RT)) {

10489

char *name = ralloc_asprintf(mem_ctx, "%s %s resume(%u) shader %s",

10490

shader->info.label ?

10491

shader->info.label : "unnamed",

10492

gl_shader_stage_name(shader->info.stage),

10493

i, shader->info.name);

10494

g.enable_debug(name);

10495

}

10496

10497

/* TODO: Figure out shader stats etc. for resume shaders */

10498

int offset = 0;

10499

uint8_t simd_size =

10500

compile_single_bs(compiler, params->log_data, mem_ctx, params->key,

10501

prog_data, resume_shaders[i], &g, NULL, &offset,

10502

&params->error_str);

10503

if (simd_size == 0)

10504

return NULL;

10505

10506

assert(offset > 0);

10507

resume_sbt[i] = brw_bsr(compiler->devinfo, offset, simd_size, 0);

10508

}

10509

10510

/* We only have one constant data so we want to make sure they're all the

10511

* same.

10512

10513

for (unsigned i = 0; i < num_resume_shaders; i++) {

10514

assert(resume_shaders[i]->constant_data_size ==

10515

shader->constant_data_size);

10516

assert(memcmp(resume_shaders[i]->constant_data,

10517

shader->constant_data,

10518

shader->constant_data_size) == 0);

10519

}

10520

10521

g.add_const_data(shader->constant_data, shader->constant_data_size);

10522

g.add_resume_sbt(num_resume_shaders, resume_sbt);

10523

10524

return g.get_assembly();

10525

}

10526

10527

/**

10528

* Test the dispatch mask packing assumptions of

10529

* brw_stage_has_packed_dispatch(). Call this from e.g. the top of

10530

* fs_visitor::emit_nir_code() to cause a GPU hang if any shader invocation is

10531

* executed with an unexpected dispatch mask.

10532

10533

static UNUSED void

10534

brw_fs_test_dispatch_packing(const fs_builder &bld)

10535

{

10536

const gl_shader_stage stage = bld.shader->stage;

10537

10538

if (brw_stage_has_packed_dispatch(bld.shader->devinfo, stage,

10539

bld.shader->stage_prog_data)) {

10540

const fs_builder ubld = bld.exec_all().group(1, 0);

10541

const fs_reg tmp = component(bld.vgrf(BRW_REGISTER_TYPE_UD), 0);

10542

const fs_reg mask = (stage == MESA_SHADER_FRAGMENT ? brw_vmask_reg() :

10543

brw_dmask_reg());

10544

10545

ubld.ADD(tmp, mask, brw_imm_ud(1));

10546

ubld.AND(tmp, mask, tmp);

10547

10548

/* This will loop forever if the dispatch mask doesn't have the expected

10549

* form '2^n-1', in which case tmp will be non-zero.

10550

10551

bld.emit(BRW_OPCODE_DO);

10552

bld.CMP(bld.null_reg_ud(), tmp, brw_imm_ud(0), BRW_CONDITIONAL_NZ);

10553

set_predicate(BRW_PREDICATE_NORMAL, bld.emit(BRW_OPCODE_WHILE));

10554

}

10555

}

10556

10557

unsigned

10558

fs_visitor::workgroup_size() const

10559

{

10560

assert(gl_shader_stage_uses_workgroup(stage));

10561

const struct brw_cs_prog_data *cs = brw_cs_prog_data(prog_data);

10562

return cs->local_size[0] * cs->local_size[1] * cs->local_size[2];

10563

}

Older »