2
* Copyright © 2021 Intel Corporation
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24
#include "brw_private.h"
25
#include "compiler/shader_info.h"
26
#include "intel/dev/intel_debug.h"
27
#include "intel/dev/intel_device_info.h"
28
#include "util/ralloc.h"
31
brw_required_dispatch_width(const struct shader_info *info,
32
enum brw_subgroup_size_type subgroup_size_type)
34
unsigned required = 0;
36
if ((int)subgroup_size_type >= (int)BRW_SUBGROUP_SIZE_REQUIRE_8) {
37
assert(gl_shader_stage_uses_workgroup(info->stage));
38
/* These enum values are expressly chosen to be equal to the subgroup
39
* size that they require.
41
required = (unsigned)subgroup_size_type;
44
if (gl_shader_stage_is_compute(info->stage) && info->cs.subgroup_size > 0) {
45
assert(required == 0 || required == info->cs.subgroup_size);
46
required = info->cs.subgroup_size;
53
test_bit(unsigned mask, unsigned bit) {
54
return mask & (1u << bit);
58
brw_simd_should_compile(void *mem_ctx,
60
const struct intel_device_info *devinfo,
61
struct brw_cs_prog_data *prog_data,
66
assert(!test_bit(prog_data->prog_mask, simd));
69
const unsigned width = 8u << simd;
71
/* For shaders with variable size workgroup, we will always compile all the
72
* variants, since the choice will happen only at dispatch time.
74
const bool workgroup_size_variable = prog_data->local_size[0] == 0;
76
if (!workgroup_size_variable) {
77
if (test_bit(prog_data->prog_spilled, simd)) {
78
*error = ralloc_asprintf(
79
mem_ctx, "SIMD%u skipped because would spill", width);
83
const unsigned workgroup_size = prog_data->local_size[0] *
84
prog_data->local_size[1] *
85
prog_data->local_size[2];
87
unsigned max_threads = devinfo->max_cs_workgroup_threads;
89
if (required && required != width) {
90
*error = ralloc_asprintf(
91
mem_ctx, "SIMD%u skipped because required dispatch width is %u",
96
if (simd > 0 && test_bit(prog_data->prog_mask, simd - 1) &&
97
workgroup_size <= (width / 2)) {
98
*error = ralloc_asprintf(
99
mem_ctx, "SIMD%u skipped because workgroup size %u already fits in SIMD%u",
100
width, workgroup_size, width / 2);
104
if (DIV_ROUND_UP(workgroup_size, width) > max_threads) {
105
*error = ralloc_asprintf(
106
mem_ctx, "SIMD%u can't fit all %u invocations in %u threads",
107
width, workgroup_size, max_threads);
111
/* The SIMD32 is only enabled for cases it is needed unless forced.
113
* TODO: Use performance_analysis and drop this rule.
116
if (!INTEL_DEBUG(DEBUG_DO32) && prog_data->prog_mask) {
117
*error = ralloc_strdup(
118
mem_ctx, "SIMD32 skipped because not required");
124
const bool env_skip[3] = {
125
INTEL_DEBUG(DEBUG_NO8),
126
INTEL_DEBUG(DEBUG_NO16),
127
INTEL_DEBUG(DEBUG_NO32),
130
if (unlikely(env_skip[simd])) {
131
*error = ralloc_asprintf(
132
mem_ctx, "SIMD%u skipped because INTEL_DEBUG=no%u",
141
brw_simd_mark_compiled(unsigned simd, struct brw_cs_prog_data *prog_data, bool spilled)
143
assert(!test_bit(prog_data->prog_mask, simd));
145
prog_data->prog_mask |= 1u << simd;
147
/* If a SIMD spilled, all the larger ones would spill too. */
149
for (unsigned i = simd; i < 3; i++)
150
prog_data->prog_spilled |= 1u << i;
155
brw_simd_select(const struct brw_cs_prog_data *prog_data)
157
assert((prog_data->prog_mask & ~0x7u) == 0);
158
const unsigned not_spilled_mask =
159
prog_data->prog_mask & ~prog_data->prog_spilled;
161
/* Util functions index bits from 1 instead of 0, adjust before return. */
163
if (not_spilled_mask)
164
return util_last_bit(not_spilled_mask) - 1;
165
else if (prog_data->prog_mask)
166
return ffs(prog_data->prog_mask) - 1;
172
brw_simd_select_for_workgroup_size(const struct intel_device_info *devinfo,
173
const struct brw_cs_prog_data *prog_data,
174
const unsigned *sizes)
178
if (prog_data->local_size[0] == sizes[0] &&
179
prog_data->local_size[1] == sizes[1] &&
180
prog_data->local_size[2] == sizes[2])
181
return brw_simd_select(prog_data);
183
void *mem_ctx = ralloc_context(NULL);
185
struct brw_cs_prog_data cloned = *prog_data;
186
for (unsigned i = 0; i < 3; i++)
187
cloned.local_size[i] = sizes[i];
189
cloned.prog_mask = 0;
190
cloned.prog_spilled = 0;
192
const char *error[3] = {0};
194
for (unsigned simd = 0; simd < 3; simd++) {
195
/* We are not recompiling, so use original results of prog_mask and
196
* prog_spilled as they will already contain all possible compilations.
198
if (brw_simd_should_compile(mem_ctx, simd, devinfo, &cloned,
199
0 /* required_dispatch_width */, &error[simd]) &&
200
test_bit(prog_data->prog_mask, simd)) {
201
brw_simd_mark_compiled(simd, &cloned, test_bit(prog_data->prog_spilled, simd));
205
ralloc_free(mem_ctx);
207
return brw_simd_select(&cloned);