1
# Copyright 2010 Luca Barbieri
3
# Permission is hereby granted, free of charge, to any person obtaining
4
# a copy of this software and associated documentation files (the
5
# "Software"), to deal in the Software without restriction, including
6
# without limitation the rights to use, copy, modify, merge, publish,
7
# distribute, sublicense, and/or sell copies of the Software, and to
8
# permit persons to whom the Software is furnished to do so, subject to
9
# the following conditions:
11
# The above copyright notice and this permission notice (including the
12
# next paragraph) shall be included in all copies or substantial
13
# portions of the Software.
15
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18
# IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
19
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
# *************************************************************************
25
# The code is a reimplementation of the algorithm in
26
# www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf
27
# "Fast Half Float Conversions" by Jeroen van der Zijp, Nov 2008
29
# The table contents have been slightly changed so that the exponent
30
# bias is now in the exponent table instead of the mantissa table (mostly
31
# for cosmetic reasons, and because it theoretically allows a variant
32
# that flushes denormal to zero but uses a mantissa table with 24-bit
35
# The tables are also constructed slightly differently.
38
# Note that using a 64K * 4 table is a terrible idea since it will not fit
39
# in the L1 cache and will massively pollute the L2 cache as well
41
# These should instead fit in the L1 cache.
43
# TODO: we could use a denormal bias table instead of the mantissa/offset
44
# tables: this would reduce the L1 cache usage from 8704 to 2304 bytes
45
# but would involve more computation
47
# Note however that if denormals are never encountered, the L1 cache usage
48
# is only about 4608 bytes anyway.
59
print "const " + t + " " + n + "[" + str(l) + "] = {"
64
print "\t" + hex(v) + ","
70
assert table_index == table_length
72
print "/* This file is autogenerated by u_half.py. Do not edit directly. */"
73
print "#include \"util/u_half.h\""
75
begin("uint32_t", "util_half_to_float_mantissa_table", 2048)
80
for i in xrange(1, 1024):
85
while (m & 0x00800000) == 0:
94
for i in xrange(1024, 2048):
95
value((i - 1024) << 13)
98
begin("uint32_t", "util_half_to_float_exponent_table", 64)
99
# positive zero or denormals
103
for i in xrange(1, 31):
104
value(0x38000000 + (i << 23))
106
# positive infinity/NaN
109
# negative zero or denormals
113
for i in range(33, 63):
114
value(0xb8000000 + ((i - 32) << 23))
116
# negative infinity/NaN
120
begin("uint32_t", "util_half_to_float_offset_table", 64)
121
# positive zero or denormals
125
for i in range(1, 32):
128
# negative zero or denormals
132
for i in xrange(33, 64):
136
begin("uint16_t", "util_float_to_half_base_table", 512)
137
for sign in (0, 0x8000):
138
# very small numbers mapping to zero
139
for i in xrange(-127, -24):
142
# small numbers mapping to denormals
143
for i in xrange(-24, -14):
144
value(sign | (0x400 >> (-14 -i)))
147
for i in xrange(-14, 16):
148
value(sign | ((i + 15) << 10))
150
# large numbers mapping to infinity
151
for i in xrange(16, 128):
158
begin("uint8_t", "util_float_to_half_shift_table", 512)
159
for sign in (0, 0x8000):
160
# very small numbers mapping to zero
161
for i in xrange(-127, -24):
164
# small numbers mapping to denormals
165
for i in xrange(-24, -14):
169
for i in xrange(-14, 16):
172
# large numbers mapping to infinity
173
for i in xrange(16, 128):