~mmach/netext73/mesa-haswell

1 by mmach
1
1
/*
2
 * Copyright © 2015 Intel Corporation
3
 * Copyright © 2019 Valve Corporation
4
 *
5
 * Permission is hereby granted, free of charge, to any person obtaining a
6
 * copy of this software and associated documentation files (the "Software"),
7
 * to deal in the Software without restriction, including without limitation
8
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
 * and/or sell copies of the Software, and to permit persons to whom the
10
 * Software is furnished to do so, subject to the following conditions:
11
 *
12
 * The above copyright notice and this permission notice (including the next
13
 * paragraph) shall be included in all copies or substantial portions of the
14
 * Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22
 * IN THE SOFTWARE.
23
 *
24
 * Authors:
25
 *    Jason Ekstrand (jason@jlekstrand.net)
26
 *    Samuel Pitoiset (samuel.pitoiset@gmail.com>
27
 */
28
29
#include "nir.h"
30
#include "nir_builder.h"
31
32
static nir_ssa_def *
33
lower_frexp_sig(nir_builder *b, nir_ssa_def *x)
34
{
35
   nir_ssa_def *abs_x = nir_fabs(b, x);
36
   nir_ssa_def *zero = nir_imm_floatN_t(b, 0, x->bit_size);
37
   nir_ssa_def *sign_mantissa_mask, *exponent_value;
38
   nir_ssa_def *is_not_zero = nir_fneu(b, abs_x, zero);
39
40
   switch (x->bit_size) {
41
   case 16:
42
      /* Half-precision floating-point values are stored as
43
       *   1 sign bit;
44
       *   5 exponent bits;
45
       *   10 mantissa bits.
46
       *
47
       * An exponent shift of 10 will shift the mantissa out, leaving only the
48
       * exponent and sign bit (which itself may be zero, if the absolute value
49
       * was taken before the bitcast and shift).
50
       */
51
      sign_mantissa_mask = nir_imm_intN_t(b, 0x83ffu, 16);
52
      /* Exponent of floating-point values in the range [0.5, 1.0). */
53
      exponent_value = nir_imm_intN_t(b, 0x3800u, 16);
54
      break;
55
   case 32:
56
      /* Single-precision floating-point values are stored as
57
       *   1 sign bit;
58
       *   8 exponent bits;
59
       *   23 mantissa bits.
60
       *
61
       * An exponent shift of 23 will shift the mantissa out, leaving only the
62
       * exponent and sign bit (which itself may be zero, if the absolute value
63
       * was taken before the bitcast and shift.
64
       */
65
      sign_mantissa_mask = nir_imm_int(b, 0x807fffffu);
66
      /* Exponent of floating-point values in the range [0.5, 1.0). */
67
      exponent_value = nir_imm_int(b, 0x3f000000u);
68
      break;
69
   case 64:
70
      /* Double-precision floating-point values are stored as
71
       *   1 sign bit;
72
       *   11 exponent bits;
73
       *   52 mantissa bits.
74
       *
75
       * An exponent shift of 20 will shift the remaining mantissa bits out,
76
       * leaving only the exponent and sign bit (which itself may be zero, if
77
       * the absolute value was taken before the bitcast and shift.
78
       */
79
      sign_mantissa_mask = nir_imm_int(b, 0x800fffffu);
80
      /* Exponent of floating-point values in the range [0.5, 1.0). */
81
      exponent_value = nir_imm_int(b, 0x3fe00000u);
82
      break;
83
   default:
84
      unreachable("Invalid bitsize");
85
   }
86
87
   if (x->bit_size == 64) {
88
      /* We only need to deal with the exponent so first we extract the upper
89
       * 32 bits using nir_unpack_64_2x32_split_y.
90
       */
91
      nir_ssa_def *upper_x = nir_unpack_64_2x32_split_y(b, x);
92
      nir_ssa_def *zero32 = nir_imm_int(b, 0);
93
94
      nir_ssa_def *new_upper =
95
         nir_ior(b, nir_iand(b, upper_x, sign_mantissa_mask),
96
                    nir_bcsel(b, is_not_zero, exponent_value, zero32));
97
98
      nir_ssa_def *lower_x = nir_unpack_64_2x32_split_x(b, x);
99
100
      return nir_pack_64_2x32_split(b, lower_x, new_upper);
101
   } else {
102
      return nir_ior(b, nir_iand(b, x, sign_mantissa_mask),
103
                        nir_bcsel(b, is_not_zero, exponent_value, zero));
104
   }
105
}
106
107
static nir_ssa_def *
108
lower_frexp_exp(nir_builder *b, nir_ssa_def *x)
109
{
110
   nir_ssa_def *abs_x = nir_fabs(b, x);
111
   nir_ssa_def *zero = nir_imm_floatN_t(b, 0, x->bit_size);
112
   nir_ssa_def *is_not_zero = nir_fneu(b, abs_x, zero);
113
   nir_ssa_def *exponent;
114
115
   switch (x->bit_size) {
116
   case 16: {
117
      nir_ssa_def *exponent_shift = nir_imm_int(b, 10);
118
      nir_ssa_def *exponent_bias = nir_imm_intN_t(b, -14, 16);
119
120
      /* Significand return must be of the same type as the input, but the
121
       * exponent must be a 32-bit integer.
122
       */
123
      exponent = nir_i2i32(b, nir_iadd(b, nir_ushr(b, abs_x, exponent_shift),
124
                              nir_bcsel(b, is_not_zero, exponent_bias, zero)));
125
      break;
126
   }
127
   case 32: {
128
      nir_ssa_def *exponent_shift = nir_imm_int(b, 23);
129
      nir_ssa_def *exponent_bias = nir_imm_int(b, -126);
130
131
      exponent = nir_iadd(b, nir_ushr(b, abs_x, exponent_shift),
132
                             nir_bcsel(b, is_not_zero, exponent_bias, zero));
133
      break;
134
   }
135
   case 64: {
136
      nir_ssa_def *exponent_shift = nir_imm_int(b, 20);
137
      nir_ssa_def *exponent_bias = nir_imm_int(b, -1022);
138
139
      nir_ssa_def *zero32 = nir_imm_int(b, 0);
140
      nir_ssa_def *abs_upper_x = nir_unpack_64_2x32_split_y(b, abs_x);
141
142
      exponent = nir_iadd(b, nir_ushr(b, abs_upper_x, exponent_shift),
143
                             nir_bcsel(b, is_not_zero, exponent_bias, zero32));
144
      break;
145
   }
146
   default:
147
      unreachable("Invalid bitsize");
148
   }
149
150
   return exponent;
151
}
152
153
static bool
154
lower_frexp_impl(nir_function_impl *impl)
155
{
156
   bool progress = false;
157
158
   nir_builder b;
159
   nir_builder_init(&b, impl);
160
161
   nir_foreach_block(block, impl) {
162
      nir_foreach_instr_safe(instr, block) {
163
         if (instr->type != nir_instr_type_alu)
164
            continue;
165
166
         nir_alu_instr *alu_instr = nir_instr_as_alu(instr);
167
         nir_ssa_def *lower;
168
169
         b.cursor = nir_before_instr(instr);
170
171
         switch (alu_instr->op) {
172
         case nir_op_frexp_sig:
173
            lower = lower_frexp_sig(&b, nir_ssa_for_alu_src(&b, alu_instr, 0));
174
            break;
175
         case nir_op_frexp_exp:
176
            lower = lower_frexp_exp(&b, nir_ssa_for_alu_src(&b, alu_instr, 0));
177
            break;
178
         default:
179
            continue;
180
         }
181
182
         nir_ssa_def_rewrite_uses(&alu_instr->dest.dest.ssa,
183
                                  nir_src_for_ssa(lower));
184
         nir_instr_remove(instr);
185
         progress = true;
186
      }
187
   }
188
189
   if (progress) {
190
      nir_metadata_preserve(impl, nir_metadata_block_index |
191
                                  nir_metadata_dominance);
192
   }
193
194
   return progress;
195
}
196
197
bool
198
nir_lower_frexp(nir_shader *shader)
199
{
200
   bool progress = false;
201
202
   nir_foreach_function(function, shader) {
203
      if (function->impl)
204
         progress |= lower_frexp_impl(function->impl);
205
   }
206
207
   return progress;
208
}