~mmach/netext73/mesa-haswell : contents of src/compiler/nir/nir_lower

~mmach/netext73/mesa-haswell : (revision 1)

/*
 * Copyright © 2015 Intel Corporation
 * Copyright © 2019 Valve Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 *
 * Authors:
 *    Jason Ekstrand (jason@jlekstrand.net)
 *    Samuel Pitoiset (samuel.pitoiset@gmail.com>
 */

#include "nir.h"
#include "nir_builder.h"

static nir_ssa_def *
lower_frexp_sig(nir_builder *b, nir_ssa_def *x)
{
   nir_ssa_def *abs_x = nir_fabs(b, x);
   nir_ssa_def *zero = nir_imm_floatN_t(b, 0, x->bit_size);
   nir_ssa_def *sign_mantissa_mask, *exponent_value;
   nir_ssa_def *is_not_zero = nir_fneu(b, abs_x, zero);

   switch (x->bit_size) {
   case 16:
      /* Half-precision floating-point values are stored as
       *   1 sign bit;
       *   5 exponent bits;
       *   10 mantissa bits.
       *
       * An exponent shift of 10 will shift the mantissa out, leaving only the
       * exponent and sign bit (which itself may be zero, if the absolute value
       * was taken before the bitcast and shift).
       */
      sign_mantissa_mask = nir_imm_intN_t(b, 0x83ffu, 16);
      /* Exponent of floating-point values in the range [0.5, 1.0). */
      exponent_value = nir_imm_intN_t(b, 0x3800u, 16);
      break;
   case 32:
      /* Single-precision floating-point values are stored as
       *   1 sign bit;
       *   8 exponent bits;
       *   23 mantissa bits.
       *
       * An exponent shift of 23 will shift the mantissa out, leaving only the
       * exponent and sign bit (which itself may be zero, if the absolute value
       * was taken before the bitcast and shift.
       */
      sign_mantissa_mask = nir_imm_int(b, 0x807fffffu);
      /* Exponent of floating-point values in the range [0.5, 1.0). */
      exponent_value = nir_imm_int(b, 0x3f000000u);
      break;
   case 64:
      /* Double-precision floating-point values are stored as
       *   1 sign bit;
       *   11 exponent bits;
       *   52 mantissa bits.
       *
       * An exponent shift of 20 will shift the remaining mantissa bits out,
       * leaving only the exponent and sign bit (which itself may be zero, if
       * the absolute value was taken before the bitcast and shift.
       */
      sign_mantissa_mask = nir_imm_int(b, 0x800fffffu);
      /* Exponent of floating-point values in the range [0.5, 1.0). */
      exponent_value = nir_imm_int(b, 0x3fe00000u);
      break;
   default:
      unreachable("Invalid bitsize");
   }

   if (x->bit_size == 64) {
      /* We only need to deal with the exponent so first we extract the upper
       * 32 bits using nir_unpack_64_2x32_split_y.
       */
      nir_ssa_def *upper_x = nir_unpack_64_2x32_split_y(b, x);
      nir_ssa_def *zero32 = nir_imm_int(b, 0);

      nir_ssa_def *new_upper =
         nir_ior(b, nir_iand(b, upper_x, sign_mantissa_mask),
                    nir_bcsel(b, is_not_zero, exponent_value, zero32));

      nir_ssa_def *lower_x = nir_unpack_64_2x32_split_x(b, x);

      return nir_pack_64_2x32_split(b, lower_x, new_upper);
   } else {
      return nir_ior(b, nir_iand(b, x, sign_mantissa_mask),
                        nir_bcsel(b, is_not_zero, exponent_value, zero));
   }
}

static nir_ssa_def *
lower_frexp_exp(nir_builder *b, nir_ssa_def *x)
{
   nir_ssa_def *abs_x = nir_fabs(b, x);
   nir_ssa_def *zero = nir_imm_floatN_t(b, 0, x->bit_size);
   nir_ssa_def *is_not_zero = nir_fneu(b, abs_x, zero);
   nir_ssa_def *exponent;

   switch (x->bit_size) {
   case 16: {
      nir_ssa_def *exponent_shift = nir_imm_int(b, 10);
      nir_ssa_def *exponent_bias = nir_imm_intN_t(b, -14, 16);

      /* Significand return must be of the same type as the input, but the
       * exponent must be a 32-bit integer.
       */
      exponent = nir_i2i32(b, nir_iadd(b, nir_ushr(b, abs_x, exponent_shift),
                              nir_bcsel(b, is_not_zero, exponent_bias, zero)));
      break;
   }
   case 32: {
      nir_ssa_def *exponent_shift = nir_imm_int(b, 23);
      nir_ssa_def *exponent_bias = nir_imm_int(b, -126);

      exponent = nir_iadd(b, nir_ushr(b, abs_x, exponent_shift),
                             nir_bcsel(b, is_not_zero, exponent_bias, zero));
      break;
   }
   case 64: {
      nir_ssa_def *exponent_shift = nir_imm_int(b, 20);
      nir_ssa_def *exponent_bias = nir_imm_int(b, -1022);

      nir_ssa_def *zero32 = nir_imm_int(b, 0);
      nir_ssa_def *abs_upper_x = nir_unpack_64_2x32_split_y(b, abs_x);

      exponent = nir_iadd(b, nir_ushr(b, abs_upper_x, exponent_shift),
                             nir_bcsel(b, is_not_zero, exponent_bias, zero32));
      break;
   }
   default:
      unreachable("Invalid bitsize");
   }

   return exponent;
}

static bool
lower_frexp_impl(nir_function_impl *impl)
{
   bool progress = false;

   nir_builder b;
   nir_builder_init(&b, impl);

   nir_foreach_block(block, impl) {
      nir_foreach_instr_safe(instr, block) {
         if (instr->type != nir_instr_type_alu)
            continue;

         nir_alu_instr *alu_instr = nir_instr_as_alu(instr);
         nir_ssa_def *lower;

         b.cursor = nir_before_instr(instr);

         switch (alu_instr->op) {
         case nir_op_frexp_sig:
            lower = lower_frexp_sig(&b, nir_ssa_for_alu_src(&b, alu_instr, 0));
            break;
         case nir_op_frexp_exp:
            lower = lower_frexp_exp(&b, nir_ssa_for_alu_src(&b, alu_instr, 0));
            break;
         default:
            continue;
         }

         nir_ssa_def_rewrite_uses(&alu_instr->dest.dest.ssa,
                                  nir_src_for_ssa(lower));
         nir_instr_remove(instr);
         progress = true;
      }
   }

   if (progress) {
      nir_metadata_preserve(impl, nir_metadata_block_index |
                                  nir_metadata_dominance);
   }

   return progress;
}

bool
nir_lower_frexp(nir_shader *shader)
{
   bool progress = false;

   nir_foreach_function(function, shader) {
      if (function->impl)
         progress |= lower_frexp_impl(function->impl);
   }

   return progress;
}

1 by mmach 1	1	/*
	2	* Copyright © 2015 Intel Corporation
	3	* Copyright © 2019 Valve Corporation
	4	*
	5	* Permission is hereby granted, free of charge, to any person obtaining a
	6	* copy of this software and associated documentation files (the "Software"),
	7	* to deal in the Software without restriction, including without limitation
	8	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
	9	* and/or sell copies of the Software, and to permit persons to whom the
	10	* Software is furnished to do so, subject to the following conditions:
	11	*
	12	* The above copyright notice and this permission notice (including the next
	13	* paragraph) shall be included in all copies or substantial portions of the
	14	* Software.
	15	*
	16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
	19	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
	21	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
	22	* IN THE SOFTWARE.
	23	*
	24	* Authors:
	25	* Jason Ekstrand (jason@jlekstrand.net)
	26	* Samuel Pitoiset (samuel.pitoiset@gmail.com>
	27	*/
	28
	29	#include "nir.h"
	30	#include "nir_builder.h"
	31
	32	static nir_ssa_def *
	33	lower_frexp_sig(nir_builder b, nir_ssa_def x)
	34	{
	35	nir_ssa_def *abs_x = nir_fabs(b, x);
	36	nir_ssa_def *zero = nir_imm_floatN_t(b, 0, x->bit_size);
	37	nir_ssa_def sign_mantissa_mask, exponent_value;
	38	nir_ssa_def *is_not_zero = nir_fneu(b, abs_x, zero);
	39
	40	switch (x->bit_size) {
	41	case 16:
	42	/* Half-precision floating-point values are stored as
	43	* 1 sign bit;
	44	* 5 exponent bits;
	45	* 10 mantissa bits.
	46	*
	47	* An exponent shift of 10 will shift the mantissa out, leaving only the
	48	* exponent and sign bit (which itself may be zero, if the absolute value
	49	* was taken before the bitcast and shift).
	50	*/
	51	sign_mantissa_mask = nir_imm_intN_t(b, 0x83ffu, 16);
	52	/* Exponent of floating-point values in the range [0.5, 1.0). */
	53	exponent_value = nir_imm_intN_t(b, 0x3800u, 16);
	54	break;
	55	case 32:
	56	/* Single-precision floating-point values are stored as
	57	* 1 sign bit;
	58	* 8 exponent bits;
	59	* 23 mantissa bits.
	60	*
	61	* An exponent shift of 23 will shift the mantissa out, leaving only the
	62	* exponent and sign bit (which itself may be zero, if the absolute value
	63	* was taken before the bitcast and shift.
	64	*/
65	sign_mantissa_mask = nir_imm_int(b, 0x807fffffu);
66	/* Exponent of floating-point values in the range [0.5, 1.0). */
67	exponent_value = nir_imm_int(b, 0x3f000000u);
68	break;
69	case 64:
70	/* Double-precision floating-point values are stored as
71	* 1 sign bit;
72	* 11 exponent bits;
73	* 52 mantissa bits.
74	*
75	* An exponent shift of 20 will shift the remaining mantissa bits out,
76	* leaving only the exponent and sign bit (which itself may be zero, if
77	* the absolute value was taken before the bitcast and shift.
78	*/
79	sign_mantissa_mask = nir_imm_int(b, 0x800fffffu);
80	/* Exponent of floating-point values in the range [0.5, 1.0). */
81	exponent_value = nir_imm_int(b, 0x3fe00000u);
82	break;
83	default:
84	unreachable("Invalid bitsize");
85	}
86
87	if (x->bit_size == 64) {
88	/* We only need to deal with the exponent so first we extract the upper
89	* 32 bits using nir_unpack_64_2x32_split_y.
90	*/
91	nir_ssa_def *upper_x = nir_unpack_64_2x32_split_y(b, x);
92	nir_ssa_def *zero32 = nir_imm_int(b, 0);
93
94	nir_ssa_def *new_upper =
95	nir_ior(b, nir_iand(b, upper_x, sign_mantissa_mask),
96	nir_bcsel(b, is_not_zero, exponent_value, zero32));
97
98	nir_ssa_def *lower_x = nir_unpack_64_2x32_split_x(b, x);
99
100	return nir_pack_64_2x32_split(b, lower_x, new_upper);
101	} else {
102	return nir_ior(b, nir_iand(b, x, sign_mantissa_mask),
103	nir_bcsel(b, is_not_zero, exponent_value, zero));
104	}
105	}
106
107	static nir_ssa_def *
108	lower_frexp_exp(nir_builder b, nir_ssa_def x)
109	{
110	nir_ssa_def *abs_x = nir_fabs(b, x);
111	nir_ssa_def *zero = nir_imm_floatN_t(b, 0, x->bit_size);
112	nir_ssa_def *is_not_zero = nir_fneu(b, abs_x, zero);
113	nir_ssa_def *exponent;
114
115	switch (x->bit_size) {
116	case 16: {
117	nir_ssa_def *exponent_shift = nir_imm_int(b, 10);
118	nir_ssa_def *exponent_bias = nir_imm_intN_t(b, -14, 16);
119
120	/* Significand return must be of the same type as the input, but the
121	* exponent must be a 32-bit integer.
122	*/
123	exponent = nir_i2i32(b, nir_iadd(b, nir_ushr(b, abs_x, exponent_shift),
124	nir_bcsel(b, is_not_zero, exponent_bias, zero)));
125	break;
126	}
127	case 32: {
128	nir_ssa_def *exponent_shift = nir_imm_int(b, 23);
129	nir_ssa_def *exponent_bias = nir_imm_int(b, -126);
130
131	exponent = nir_iadd(b, nir_ushr(b, abs_x, exponent_shift),
132	nir_bcsel(b, is_not_zero, exponent_bias, zero));
133	break;
134	}
135	case 64: {
136	nir_ssa_def *exponent_shift = nir_imm_int(b, 20);
137	nir_ssa_def *exponent_bias = nir_imm_int(b, -1022);
138
139	nir_ssa_def *zero32 = nir_imm_int(b, 0);
140	nir_ssa_def *abs_upper_x = nir_unpack_64_2x32_split_y(b, abs_x);
141
142	exponent = nir_iadd(b, nir_ushr(b, abs_upper_x, exponent_shift),
143	nir_bcsel(b, is_not_zero, exponent_bias, zero32));
144	break;
145	}
146	default:
147	unreachable("Invalid bitsize");
148	}
149
150	return exponent;
151	}
152
153	static bool
154	lower_frexp_impl(nir_function_impl *impl)
155	{
156	bool progress = false;
157
158	nir_builder b;
159	nir_builder_init(&b, impl);
160
161	nir_foreach_block(block, impl) {
162	nir_foreach_instr_safe(instr, block) {
163	if (instr->type != nir_instr_type_alu)
164	continue;
165
166	nir_alu_instr *alu_instr = nir_instr_as_alu(instr);
167	nir_ssa_def *lower;
168
169	b.cursor = nir_before_instr(instr);
170
171	switch (alu_instr->op) {
172	case nir_op_frexp_sig:
173	lower = lower_frexp_sig(&b, nir_ssa_for_alu_src(&b, alu_instr, 0));
174	break;
175	case nir_op_frexp_exp:
176	lower = lower_frexp_exp(&b, nir_ssa_for_alu_src(&b, alu_instr, 0));
177	break;
178	default:
179	continue;
180	}
181
182	nir_ssa_def_rewrite_uses(&alu_instr->dest.dest.ssa,
183	nir_src_for_ssa(lower));
184	nir_instr_remove(instr);
185	progress = true;
186	}
187	}
188
189	if (progress) {
190	nir_metadata_preserve(impl, nir_metadata_block_index \|
191	nir_metadata_dominance);
192	}
193
194	return progress;
195	}
196
197	bool
198	nir_lower_frexp(nir_shader *shader)
199	{
200	bool progress = false;
201
202	nir_foreach_function(function, shader) {
203	if (function->impl)
204	progress \|= lower_frexp_impl(function->impl);
205	}
206
207	return progress;
208	}