2
* Copyright © 2010 Intel Corporation
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
* DEALINGS IN THE SOFTWARE.
25
* \file ir_div_to_mul_rcp.cpp
27
* Breaks an ir_unop_div expression down to op0 * (rcp(op1)).
29
* Many GPUs don't have a divide instruction (945 and 965 included),
30
* but they do have an RCP instruction to compute an approximate
31
* reciprocal. By breaking the operation down, constant reciprocals
32
* can get constant folded.
36
#include "glsl_types.h"
38
class ir_div_to_mul_rcp_visitor : public ir_hierarchical_visitor {
40
ir_div_to_mul_rcp_visitor()
42
this->made_progress = false;
45
ir_visitor_status visit_leave(ir_expression *);
51
do_div_to_mul_rcp(exec_list *instructions)
53
ir_div_to_mul_rcp_visitor v;
55
visit_list_elements(&v, instructions);
56
return v.made_progress;
60
ir_div_to_mul_rcp_visitor::visit_leave(ir_expression *ir)
62
if (ir->operation != ir_binop_div)
63
return visit_continue;
65
if (ir->operands[1]->type->base_type != GLSL_TYPE_INT &&
66
ir->operands[1]->type->base_type != GLSL_TYPE_UINT) {
67
/* New expression for the 1.0 / op1 */
69
expr = new(ir) ir_expression(ir_unop_rcp,
70
ir->operands[1]->type,
74
/* op0 / op1 -> op0 * (1.0 / op1) */
75
ir->operation = ir_binop_mul;
76
ir->operands[1] = expr;
78
/* Be careful with integer division -- we need to do it as a
79
* float and re-truncate, since rcp(n > 1) of an integer would
83
const struct glsl_type *vec_type;
85
vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
86
ir->operands[1]->type->vector_elements,
87
ir->operands[1]->type->matrix_columns);
89
if (ir->operands[1]->type->base_type == GLSL_TYPE_INT)
90
op1 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[1], NULL);
92
op1 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[1], NULL);
94
op1 = new(ir) ir_expression(ir_unop_rcp, op1->type, op1, NULL);
96
vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
97
ir->operands[0]->type->vector_elements,
98
ir->operands[0]->type->matrix_columns);
100
if (ir->operands[0]->type->base_type == GLSL_TYPE_INT)
101
op0 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[0], NULL);
103
op0 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[0], NULL);
105
op0 = new(ir) ir_expression(ir_binop_mul, vec_type, op0, op1);
107
ir->operation = ir_unop_f2i;
108
ir->operands[0] = op0;
109
ir->operands[1] = NULL;
112
this->made_progress = true;
114
return visit_continue;