2
* Copyright © 2021 Intel Corporation
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23
#ifndef INTEL_PIXEL_HASH_H
24
#define INTEL_PIXEL_HASH_H
27
* Compute an \p n x \p m pixel hashing table usable as slice, subslice or
28
* pixel pipe hashing table. The resulting table is the cyclic repetition of
29
* a fixed pattern with periodicity equal to \p period.
31
* If \p index is specified to be equal to \p period, a 2-way hashing table
32
* will be generated such that indices 0 and 1 are returned for the following
33
* fractions of entries respectively:
35
* p_0 = ceil(period / 2) / period
36
* p_1 = floor(period / 2) / period
38
* If \p index is even and less than \p period, a 3-way hashing table will be
39
* generated such that indices 0, 1 and 2 are returned for the following
40
* fractions of entries:
42
* p_0 = (ceil(period / 2) - 1) / period
43
* p_1 = floor(period / 2) / period
46
* The equations above apply if \p flip is equal to 0, if it is equal to 1 p_0
47
* and p_1 will be swapped for the result. Note that in the context of pixel
48
* pipe hashing this can be always 0 on Gfx12 platforms, since the hardware
49
* transparently remaps logical indices found on the table to physical pixel
50
* pipe indices from the highest to lowest EU count.
53
intel_compute_pixel_hash_table_3way(unsigned n, unsigned m,
54
unsigned period, unsigned index, bool flip,
57
for (unsigned i = 0; i < n; i++) {
58
for (unsigned j = 0; j < m; j++) {
59
const unsigned k = (i + j) % period;
60
p[j + m * i] = (k == index ? 2 : (k & 1) ^ flip);
66
* Compute an \p n x \p m pixel hashing table usable as slice,
67
* subslice or pixel pipe hashing table. This generalizes the
68
* previous 3-way hash table function to an arbitrary number of ways
69
* given by the number of bits set in the \p mask argument, but
70
* doesn't allow the specification of different frequencies for
71
* different table indices.
74
intel_compute_pixel_hash_table_nway(unsigned n, unsigned m, uint32_t mask,
77
/* Construct a table mapping consecutive indices to the physical
78
* indices given by the bits set on the mask argument.
80
unsigned phys_ids[sizeof(mask) * CHAR_BIT];
83
u_foreach_bit(i, mask)
84
phys_ids[num_ids++] = i;
88
/* Compute a permutation of the above indices that assigns indices
89
* as far as possible to adjacent entries. This permutation is
90
* designed to be equivalent to the bit reversal of each index in
91
* cases where num_ids is a power of two, but doesn't actually
92
* require it to be a power of two in order to satisfy the required
93
* properties (which is necessary to handle configurations with
94
* arbitrary non-power of two fusing). By construction, flipping
95
* bit l of its input will lead to a change in its result of the
96
* order of num_ids/2^(l+1) (see variable t below). The
97
* bijectivity of this permutation can be verified easily by
100
const unsigned bits = util_logbase2_ceil(num_ids);
101
unsigned swz[ARRAY_SIZE(phys_ids)];
103
for (unsigned k = 0; k < num_ids; k++) {
104
unsigned t = num_ids;
107
for (unsigned l = 0; l < bits; l++) {
119
/* Initialize the table with the cyclic repetition of a
120
* num_ids-periodic pattern.
122
* Note that the swz permutation only affects the ordering of rows.
123
* This is intentional in order to minimize the size of the
124
* contiguous area that needs to be rendered in parallel in order
125
* to utilize the whole GPU: A rendering rectangle of width W will
126
* need to be at least H blocks high, where H is bounded by
127
* 2^ceil(log2(num_ids/W)) thanks to the above definition of the swz
130
for (unsigned i = 0; i < n; i++) {
131
const unsigned k = i % num_ids;
132
assert(swz[k] < num_ids);
133
for (unsigned j = 0; j < m; j++) {
134
p[j + m * i] = phys_ids[(j + swz[k]) % num_ids];