2
* Copyright (C) 2014 Intel Corporation
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
* DEALINGS IN THE SOFTWARE.
25
* Included by texcompress_bptc and gallium to define BPTC decoding routines.
28
#ifndef TEXCOMPRESS_BPTC_TMP_H
29
#define TEXCOMPRESS_BPTC_TMP_H
31
#include "util/format_srgb.h"
32
#include "util/half_float.h"
36
#define N_PARTITIONS 64
37
#define BLOCK_BYTES 16
39
struct bptc_unorm_mode {
42
bool has_rotation_bits;
43
bool has_index_selection_bit;
46
bool has_endpoint_pbits;
47
bool has_shared_pbits;
49
int n_secondary_index_bits;
52
struct bptc_float_bitfield {
60
struct bptc_float_mode {
62
bool transformed_endpoints;
67
struct bptc_float_bitfield bitfields[24];
76
static const struct bptc_unorm_mode
77
bptc_unorm_modes[] = {
78
/* 0 */ { 3, 4, false, false, 4, 0, true, false, 3, 0 },
79
/* 1 */ { 2, 6, false, false, 6, 0, false, true, 3, 0 },
80
/* 2 */ { 3, 6, false, false, 5, 0, false, false, 2, 0 },
81
/* 3 */ { 2, 6, false, false, 7, 0, true, false, 2, 0 },
82
/* 4 */ { 1, 0, true, true, 5, 6, false, false, 2, 3 },
83
/* 5 */ { 1, 0, true, false, 7, 8, false, false, 2, 2 },
84
/* 6 */ { 1, 0, false, false, 7, 7, true, false, 4, 0 },
85
/* 7 */ { 2, 6, false, false, 5, 5, true, false, 2, 0 }
88
static const struct bptc_float_mode
89
bptc_float_modes[] = {
91
{ false, true, 5, 10, 3, { 5, 5, 5 },
92
{ { 2, 1, 4, 1, false }, { 2, 2, 4, 1, false }, { 3, 2, 4, 1, false },
93
{ 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
94
{ 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
95
{ 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
96
{ 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
97
{ 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
98
{ 3, 2, 3, 1, false },
102
{ false, true, 5, 7, 3, { 6, 6, 6 },
103
{ { 2, 1, 5, 1, false }, { 3, 1, 4, 1, false }, { 3, 1, 5, 1, false },
104
{ 0, 0, 0, 7, false }, { 3, 2, 0, 1, false }, { 3, 2, 1, 1, false },
105
{ 2, 2, 4, 1, false }, { 0, 1, 0, 7, false }, { 2, 2, 5, 1, false },
106
{ 3, 2, 2, 1, false }, { 2, 1, 4, 1, false }, { 0, 2, 0, 7, false },
107
{ 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
108
{ 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
109
{ 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
110
{ 2, 0, 0, 6, false },
111
{ 3, 0, 0, 6, false },
115
{ false, true, 5, 11, 3, { 5, 4, 4 },
116
{ { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
117
{ 1, 0, 0, 5, false }, { 0, 0, 10, 1, false }, { 2, 1, 0, 4, false },
118
{ 1, 1, 0, 4, false }, { 0, 1, 10, 1, false }, { 3, 2, 0, 1, false },
119
{ 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
120
{ 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
121
{ 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
125
{ false, false, 0, 10, 4, { 10, 10, 10 },
126
{ { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
127
{ 1, 0, 0, 10, false }, { 1, 1, 0, 10, false }, { 1, 2, 0, 10, false },
131
{ false, true, 5, 11, 3, { 4, 5, 4 },
132
{ { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
133
{ 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 3, 1, 4, 1, false },
134
{ 2, 1, 0, 4, false }, { 1, 1, 0, 5, false }, { 0, 1, 10, 1, false },
135
{ 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
136
{ 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
137
{ 3, 2, 0, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
138
{ 2, 1, 4, 1, false }, { 3, 2, 3, 1, false },
142
{ false, true, 0, 11, 4, { 9, 9, 9 },
143
{ { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
144
{ 1, 0, 0, 9, false }, { 0, 0, 10, 1, false }, { 1, 1, 0, 9, false },
145
{ 0, 1, 10, 1, false }, { 1, 2, 0, 9, false }, { 0, 2, 10, 1, false },
149
{ false, true, 5, 11, 3, { 4, 4, 5 },
150
{ { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
151
{ 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 2, 2, 4, 1, false },
152
{ 2, 1, 0, 4, false }, { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false },
153
{ 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
154
{ 0, 2, 10, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
155
{ 3, 2, 1, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
156
{ 3, 2, 4, 1, false }, { 3, 2, 3, 1, false },
160
{ false, true, 0, 12, 4, { 8, 8, 8 },
161
{ { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
162
{ 1, 0, 0, 8, false }, { 0, 0, 10, 2, true }, { 1, 1, 0, 8, false },
163
{ 0, 1, 10, 2, true }, { 1, 2, 0, 8, false }, { 0, 2, 10, 2, true },
167
{ false, true, 5, 9, 3, { 5, 5, 5 },
168
{ { 0, 0, 0, 9, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 9, false },
169
{ 2, 1, 4, 1, false }, { 0, 2, 0, 9, false }, { 3, 2, 4, 1, false },
170
{ 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
171
{ 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
172
{ 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
173
{ 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
174
{ 3, 2, 3, 1, false },
178
{ false, true, 0, 16, 4, { 4, 4, 4 },
179
{ { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
180
{ 1, 0, 0, 4, false }, { 0, 0, 10, 6, true }, { 1, 1, 0, 4, false },
181
{ 0, 1, 10, 6, true }, { 1, 2, 0, 4, false }, { 0, 2, 10, 6, true },
185
{ false, true, 5, 8, 3, { 6, 5, 5 },
186
{ { 0, 0, 0, 8, false }, { 3, 1, 4, 1, false }, { 2, 2, 4, 1, false },
187
{ 0, 1, 0, 8, false }, { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false },
188
{ 0, 2, 0, 8, false }, { 3, 2, 3, 1, false }, { 3, 2, 4, 1, false },
189
{ 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false },
190
{ 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
191
{ 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 6, false },
192
{ 3, 0, 0, 6, false },
196
{ true /* reserved */ },
198
{ false, true, 5, 8, 3, { 5, 6, 5 },
199
{ { 0, 0, 0, 8, false }, { 3, 2, 0, 1, false }, { 2, 2, 4, 1, false },
200
{ 0, 1, 0, 8, false }, { 2, 1, 5, 1, false }, { 2, 1, 4, 1, false },
201
{ 0, 2, 0, 8, false }, { 3, 1, 5, 1, false }, { 3, 2, 4, 1, false },
202
{ 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
203
{ 1, 1, 0, 6, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
204
{ 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
205
{ 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
209
{ true /* reserved */ },
211
{ false, true, 5, 8, 3, { 5, 5, 6 },
212
{ { 0, 0, 0, 8, false }, { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false },
213
{ 0, 1, 0, 8, false }, { 2, 2, 5, 1, false }, { 2, 1, 4, 1, false },
214
{ 0, 2, 0, 8, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
215
{ 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
216
{ 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
217
{ 1, 2, 0, 6, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
218
{ 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
222
{ true /* reserved */ },
224
{ false, false, 5, 6, 3, { 6, 6, 6 },
225
{ { 0, 0, 0, 6, false }, { 3, 1, 4, 1, false }, { 3, 2, 0, 1, false },
226
{ 3, 2, 1, 1, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 6, false },
227
{ 2, 1, 5, 1, false }, { 2, 2, 5, 1, false }, { 3, 2, 2, 1, false },
228
{ 2, 1, 4, 1, false }, { 0, 2, 0, 6, false }, { 3, 1, 5, 1, false },
229
{ 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
230
{ 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
231
{ 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
232
{ 2, 0, 0, 6, false }, { 3, 0, 0, 6, false },
236
{ true /* reserved */ },
239
/* This partition table is used when the mode has two subsets. Each
240
* partition is represented by a 32-bit value which gives 2 bits per texel
241
* within the block. The value of the two bits represents which subset to use
244
static const uint32_t
245
partition_table1[N_PARTITIONS] = {
246
0x50505050U, 0x40404040U, 0x54545454U, 0x54505040U,
247
0x50404000U, 0x55545450U, 0x55545040U, 0x54504000U,
248
0x50400000U, 0x55555450U, 0x55544000U, 0x54400000U,
249
0x55555440U, 0x55550000U, 0x55555500U, 0x55000000U,
250
0x55150100U, 0x00004054U, 0x15010000U, 0x00405054U,
251
0x00004050U, 0x15050100U, 0x05010000U, 0x40505054U,
252
0x00404050U, 0x05010100U, 0x14141414U, 0x05141450U,
253
0x01155440U, 0x00555500U, 0x15014054U, 0x05414150U,
254
0x44444444U, 0x55005500U, 0x11441144U, 0x05055050U,
255
0x05500550U, 0x11114444U, 0x41144114U, 0x44111144U,
256
0x15055054U, 0x01055040U, 0x05041050U, 0x05455150U,
257
0x14414114U, 0x50050550U, 0x41411414U, 0x00141400U,
258
0x00041504U, 0x00105410U, 0x10541000U, 0x04150400U,
259
0x50410514U, 0x41051450U, 0x05415014U, 0x14054150U,
260
0x41050514U, 0x41505014U, 0x40011554U, 0x54150140U,
261
0x50505500U, 0x00555050U, 0x15151010U, 0x54540404U,
264
/* This partition table is used when the mode has three subsets. In this case
265
* the values can be 0, 1 or 2.
267
static const uint32_t
268
partition_table2[N_PARTITIONS] = {
269
0xaa685050U, 0x6a5a5040U, 0x5a5a4200U, 0x5450a0a8U,
270
0xa5a50000U, 0xa0a05050U, 0x5555a0a0U, 0x5a5a5050U,
271
0xaa550000U, 0xaa555500U, 0xaaaa5500U, 0x90909090U,
272
0x94949494U, 0xa4a4a4a4U, 0xa9a59450U, 0x2a0a4250U,
273
0xa5945040U, 0x0a425054U, 0xa5a5a500U, 0x55a0a0a0U,
274
0xa8a85454U, 0x6a6a4040U, 0xa4a45000U, 0x1a1a0500U,
275
0x0050a4a4U, 0xaaa59090U, 0x14696914U, 0x69691400U,
276
0xa08585a0U, 0xaa821414U, 0x50a4a450U, 0x6a5a0200U,
277
0xa9a58000U, 0x5090a0a8U, 0xa8a09050U, 0x24242424U,
278
0x00aa5500U, 0x24924924U, 0x24499224U, 0x50a50a50U,
279
0x500aa550U, 0xaaaa4444U, 0x66660000U, 0xa5a0a5a0U,
280
0x50a050a0U, 0x69286928U, 0x44aaaa44U, 0x66666600U,
281
0xaa444444U, 0x54a854a8U, 0x95809580U, 0x96969600U,
282
0xa85454a8U, 0x80959580U, 0xaa141414U, 0x96960000U,
283
0xaaaa1414U, 0xa05050a0U, 0xa0a5a5a0U, 0x96000000U,
284
0x40804080U, 0xa9a8a9a8U, 0xaaaaaa44U, 0x2a4a5254U
288
anchor_indices[][N_PARTITIONS] = {
289
/* Anchor index values for the second subset of two-subset partitioning */
291
0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
292
0xf,0x2,0x8,0x2,0x2,0x8,0x8,0xf,0x2,0x8,0x2,0x2,0x8,0x8,0x2,0x2,
293
0xf,0xf,0x6,0x8,0x2,0x8,0xf,0xf,0x2,0x8,0x2,0x2,0x2,0xf,0xf,0x6,
294
0x6,0x2,0x6,0x8,0xf,0xf,0x2,0x2,0xf,0xf,0xf,0xf,0xf,0x2,0x2,0xf
297
/* Anchor index values for the second subset of three-subset partitioning */
299
0x3,0x3,0xf,0xf,0x8,0x3,0xf,0xf,0x8,0x8,0x6,0x6,0x6,0x5,0x3,0x3,
300
0x3,0x3,0x8,0xf,0x3,0x3,0x6,0xa,0x5,0x8,0x8,0x6,0x8,0x5,0xf,0xf,
301
0x8,0xf,0x3,0x5,0x6,0xa,0x8,0xf,0xf,0x3,0xf,0x5,0xf,0xf,0xf,0xf,
302
0x3,0xf,0x5,0x5,0x5,0x8,0x5,0xa,0x5,0xa,0x8,0xd,0xf,0xc,0x3,0x3
305
/* Anchor index values for the third subset of three-subset
309
0xf,0x8,0x8,0x3,0xf,0xf,0x3,0x8,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x8,
310
0xf,0x8,0xf,0x3,0xf,0x8,0xf,0x8,0x3,0xf,0x6,0xa,0xf,0xf,0xa,0x8,
311
0xf,0x3,0xf,0xa,0xa,0x8,0x9,0xa,0x6,0xf,0x8,0xf,0x3,0x6,0x6,0x8,
312
0xf,0x3,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x3,0xf,0xf,0x8
317
extract_bits(const uint8_t *block,
321
int byte_index = offset / 8;
322
int bit_index = offset % 8;
323
int n_bits_in_byte = MIN2(n_bits, 8 - bit_index);
328
result |= ((block[byte_index] >> bit_index) &
329
((1 << n_bits_in_byte) - 1)) << bit;
331
n_bits -= n_bits_in_byte;
336
bit += n_bits_in_byte;
339
n_bits_in_byte = MIN2(n_bits, 8);
344
expand_component(uint8_t byte,
347
/* Expands a n-bit quantity into a byte by copying the most-significant
348
* bits into the unused least-significant bits.
350
return byte << (8 - n_bits) | (byte >> (2 * n_bits - 8));
354
extract_unorm_endpoints(const struct bptc_unorm_mode *mode,
355
const uint8_t *block,
357
uint8_t endpoints[][4])
365
/* Extract each color component */
366
for (component = 0; component < 3; component++) {
367
for (subset = 0; subset < mode->n_subsets; subset++) {
368
for (endpoint = 0; endpoint < 2; endpoint++) {
369
endpoints[subset * 2 + endpoint][component] =
370
extract_bits(block, bit_offset, mode->n_color_bits);
371
bit_offset += mode->n_color_bits;
376
/* Extract the alpha values */
377
if (mode->n_alpha_bits > 0) {
378
for (subset = 0; subset < mode->n_subsets; subset++) {
379
for (endpoint = 0; endpoint < 2; endpoint++) {
380
endpoints[subset * 2 + endpoint][3] =
381
extract_bits(block, bit_offset, mode->n_alpha_bits);
382
bit_offset += mode->n_alpha_bits;
388
for (subset = 0; subset < mode->n_subsets; subset++)
389
for (endpoint = 0; endpoint < 2; endpoint++)
390
endpoints[subset * 2 + endpoint][3] = 255;
395
/* Add in the p-bits */
396
if (mode->has_endpoint_pbits) {
397
for (subset = 0; subset < mode->n_subsets; subset++) {
398
for (endpoint = 0; endpoint < 2; endpoint++) {
399
pbit = extract_bits(block, bit_offset, 1);
402
for (component = 0; component < n_components; component++) {
403
endpoints[subset * 2 + endpoint][component] <<= 1;
404
endpoints[subset * 2 + endpoint][component] |= pbit;
408
} else if (mode->has_shared_pbits) {
409
for (subset = 0; subset < mode->n_subsets; subset++) {
410
pbit = extract_bits(block, bit_offset, 1);
413
for (endpoint = 0; endpoint < 2; endpoint++) {
414
for (component = 0; component < n_components; component++) {
415
endpoints[subset * 2 + endpoint][component] <<= 1;
416
endpoints[subset * 2 + endpoint][component] |= pbit;
422
/* Expand the n-bit values to a byte */
423
for (subset = 0; subset < mode->n_subsets; subset++) {
424
for (endpoint = 0; endpoint < 2; endpoint++) {
425
for (component = 0; component < 3; component++) {
426
endpoints[subset * 2 + endpoint][component] =
427
expand_component(endpoints[subset * 2 + endpoint][component],
429
mode->has_endpoint_pbits +
430
mode->has_shared_pbits);
433
if (mode->n_alpha_bits > 0) {
434
endpoints[subset * 2 + endpoint][3] =
435
expand_component(endpoints[subset * 2 + endpoint][3],
437
mode->has_endpoint_pbits +
438
mode->has_shared_pbits);
447
is_anchor(int n_subsets,
458
return anchor_indices[0][partition_num] == texel;
460
return (anchor_indices[1][partition_num] == texel ||
461
anchor_indices[2][partition_num] == texel);
469
count_anchors_before_texel(int n_subsets,
482
if (texel > anchor_indices[0][partition_num])
486
if (texel > anchor_indices[1][partition_num])
488
if (texel > anchor_indices[2][partition_num])
500
interpolate(int32_t a, int32_t b,
504
static const uint8_t weights2[] = { 0, 21, 43, 64 };
505
static const uint8_t weights3[] = { 0, 9, 18, 27, 37, 46, 55, 64 };
506
static const uint8_t weights4[] =
507
{ 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
508
static const uint8_t *weights[] = {
509
NULL, NULL, weights2, weights3, weights4
513
weight = weights[index_bits][index];
515
return ((64 - weight) * a + weight * b + 32) >> 6;
519
apply_rotation(int rotation,
529
t = result[rotation];
530
result[rotation] = result[3];
535
fetch_rgba_unorm_from_block(const uint8_t *block,
539
int mode_num = ffs(block[0]);
540
const struct bptc_unorm_mode *mode;
541
int bit_offset, secondary_bit_offset;
549
int anchors_before_texel;
551
uint8_t endpoints[3 * 2][4];
556
/* According to the spec this mode is reserved and shouldn't be used. */
557
memset(result, 0, 4);
561
mode = bptc_unorm_modes + mode_num - 1;
562
bit_offset = mode_num;
564
partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
565
bit_offset += mode->n_partition_bits;
567
switch (mode->n_subsets) {
572
subsets = partition_table1[partition_num];
575
subsets = partition_table2[partition_num];
582
if (mode->has_rotation_bits) {
583
rotation = extract_bits(block, bit_offset, 2);
589
if (mode->has_index_selection_bit) {
590
index_selection = extract_bits(block, bit_offset, 1);
596
bit_offset = extract_unorm_endpoints(mode, block, bit_offset, endpoints);
598
anchors_before_texel = count_anchors_before_texel(mode->n_subsets,
599
partition_num, texel);
601
/* Calculate the offset to the secondary index */
602
secondary_bit_offset = (bit_offset +
603
BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits -
605
mode->n_secondary_index_bits * texel -
606
anchors_before_texel);
608
/* Calculate the offset to the primary index for this texel */
609
bit_offset += mode->n_index_bits * texel - anchors_before_texel;
611
subset_num = (subsets >> (texel * 2)) & 3;
613
anchor = is_anchor(mode->n_subsets, partition_num, texel);
615
index_bits = mode->n_index_bits;
618
indices[0] = extract_bits(block, bit_offset, index_bits);
620
if (mode->n_secondary_index_bits) {
621
index_bits = mode->n_secondary_index_bits;
624
indices[1] = extract_bits(block, secondary_bit_offset, index_bits);
627
index = indices[index_selection];
628
index_bits = (index_selection ?
629
mode->n_secondary_index_bits :
632
for (component = 0; component < 3; component++)
633
result[component] = interpolate(endpoints[subset_num * 2][component],
634
endpoints[subset_num * 2 + 1][component],
638
/* Alpha uses the opposite index from the color components */
639
if (mode->n_secondary_index_bits && !index_selection) {
641
index_bits = mode->n_secondary_index_bits;
644
index_bits = mode->n_index_bits;
647
result[3] = interpolate(endpoints[subset_num * 2][3],
648
endpoints[subset_num * 2 + 1][3],
652
apply_rotation(rotation, result);
655
#ifdef BPTC_BLOCK_DECODE
657
decompress_rgba_unorm_block(int src_width, int src_height,
658
const uint8_t *block,
659
uint8_t *dst_row, int dst_rowstride)
661
int mode_num = ffs(block[0]);
662
const struct bptc_unorm_mode *mode;
663
int bit_offset_head, bit_offset, secondary_bit_offset;
671
int anchors_before_texel;
673
uint8_t endpoints[3 * 2][4];
679
/* According to the spec this mode is reserved and shouldn't be used. */
680
for(y = 0; y < src_height; y += 1) {
681
uint8_t *result = dst_row;
682
memset(result, 0, 4 * src_width);
683
dst_row += dst_rowstride;
688
mode = bptc_unorm_modes + mode_num - 1;
689
bit_offset_head = mode_num;
691
partition_num = extract_bits(block, bit_offset_head, mode->n_partition_bits);
692
bit_offset_head += mode->n_partition_bits;
694
switch (mode->n_subsets) {
699
subsets = partition_table1[partition_num];
702
subsets = partition_table2[partition_num];
709
if (mode->has_rotation_bits) {
710
rotation = extract_bits(block, bit_offset_head, 2);
711
bit_offset_head += 2;
716
if (mode->has_index_selection_bit) {
717
index_selection = extract_bits(block, bit_offset_head, 1);
723
bit_offset_head = extract_unorm_endpoints(mode, block, bit_offset_head, endpoints);
725
for(y = 0; y < src_height; y += 1) {
726
uint8_t *result = dst_row;
727
for(x = 0; x < src_width; x += 1) {
730
bit_offset = bit_offset_head;
732
anchors_before_texel = count_anchors_before_texel(mode->n_subsets,
736
/* Calculate the offset to the secondary index */
737
secondary_bit_offset = (bit_offset +
738
BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits -
740
mode->n_secondary_index_bits * texel -
741
anchors_before_texel);
743
/* Calculate the offset to the primary index for this texel */
744
bit_offset += mode->n_index_bits * texel - anchors_before_texel;
746
subset_num = (subsets >> (texel * 2)) & 3;
748
anchor = is_anchor(mode->n_subsets, partition_num, texel);
750
index_bits = mode->n_index_bits;
753
indices[0] = extract_bits(block, bit_offset, index_bits);
755
if (mode->n_secondary_index_bits) {
756
index_bits = mode->n_secondary_index_bits;
759
indices[1] = extract_bits(block, secondary_bit_offset, index_bits);
762
index = indices[index_selection];
763
index_bits = (index_selection ?
764
mode->n_secondary_index_bits :
767
for (component = 0; component < 3; component++)
768
result[component] = interpolate(endpoints[subset_num * 2][component],
769
endpoints[subset_num * 2 + 1][component],
773
/* Alpha uses the opposite index from the color components */
774
if (mode->n_secondary_index_bits && !index_selection) {
776
index_bits = mode->n_secondary_index_bits;
779
index_bits = mode->n_index_bits;
782
result[3] = interpolate(endpoints[subset_num * 2][3],
783
endpoints[subset_num * 2 + 1][3],
787
apply_rotation(rotation, result);
790
dst_row += dst_rowstride;
795
decompress_rgba_unorm(int width, int height,
796
const uint8_t *src, int src_rowstride,
797
uint8_t *dst, int dst_rowstride)
802
if (src_rowstride >= width * 4)
803
src_row_diff = src_rowstride - ((width + 3) & ~3) * 4;
807
for (y = 0; y < height; y += BLOCK_SIZE) {
808
for (x = 0; x < width; x += BLOCK_SIZE) {
809
decompress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE),
810
MIN2(height - y, BLOCK_SIZE),
812
dst + x * 4 + y * dst_rowstride,
819
#endif // BPTC_BLOCK_DECODE
822
sign_extend(int32_t value,
825
assert(n_bits > 0 && n_bits < 32);
827
const unsigned n = 32 - n_bits;
828
return (int32_t)((uint32_t)value << n) >> n;
832
signed_unquantize(int value, int n_endpoint_bits)
836
if (n_endpoint_bits >= 16)
849
if (value >= (1 << (n_endpoint_bits - 1)) - 1)
852
value = ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
861
unsigned_unquantize(int value, int n_endpoint_bits)
863
if (n_endpoint_bits >= 15)
869
if (value == (1 << n_endpoint_bits) - 1)
872
return ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
876
extract_float_endpoints(const struct bptc_float_mode *mode,
877
const uint8_t *block,
879
int32_t endpoints[][3],
882
const struct bptc_float_bitfield *bitfield;
883
int endpoint, component;
888
if (mode->n_partition_bits)
893
memset(endpoints, 0, sizeof endpoints[0][0] * n_endpoints * 3);
895
for (bitfield = mode->bitfields; bitfield->endpoint != -1; bitfield++) {
896
value = extract_bits(block, bit_offset, bitfield->n_bits);
897
bit_offset += bitfield->n_bits;
899
if (bitfield->reverse) {
900
for (i = 0; i < bitfield->n_bits; i++) {
901
if (value & (1 << i))
902
endpoints[bitfield->endpoint][bitfield->component] |=
903
1 << ((bitfield->n_bits - 1 - i) + bitfield->offset);
906
endpoints[bitfield->endpoint][bitfield->component] |=
907
value << bitfield->offset;
911
if (mode->transformed_endpoints) {
912
/* The endpoints are specified as signed offsets from e0 */
913
for (endpoint = 1; endpoint < n_endpoints; endpoint++) {
914
for (component = 0; component < 3; component++) {
915
value = sign_extend(endpoints[endpoint][component],
916
mode->n_delta_bits[component]);
917
endpoints[endpoint][component] =
918
((endpoints[0][component] + value) &
919
((1 << mode->n_endpoint_bits) - 1));
925
for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
926
for (component = 0; component < 3; component++) {
927
value = sign_extend(endpoints[endpoint][component],
928
mode->n_endpoint_bits);
929
endpoints[endpoint][component] =
930
signed_unquantize(value, mode->n_endpoint_bits);
934
for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
935
for (component = 0; component < 3; component++) {
936
endpoints[endpoint][component] =
937
unsigned_unquantize(endpoints[endpoint][component],
938
mode->n_endpoint_bits);
947
finish_unsigned_unquantize(int32_t value)
949
return value * 31 / 64;
953
finish_signed_unquantize(int32_t value)
956
return (-value * 31 / 32) | 0x8000;
958
return value * 31 / 32;
962
fetch_rgb_float_from_block(const uint8_t *block,
968
const struct bptc_float_mode *mode;
974
int anchors_before_texel;
975
int32_t endpoints[2 * 2][3];
981
if (block[0] & 0x2) {
982
mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2;
985
mode_num = block[0] & 3;
989
mode = bptc_float_modes + mode_num;
991
if (mode->reserved) {
992
memset(result, 0, sizeof result[0] * 3);
997
bit_offset = extract_float_endpoints(mode, block, bit_offset,
998
endpoints, is_signed);
1000
if (mode->n_partition_bits) {
1001
partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
1002
bit_offset += mode->n_partition_bits;
1004
subsets = partition_table1[partition_num];
1012
anchors_before_texel =
1013
count_anchors_before_texel(n_subsets, partition_num, texel);
1015
/* Calculate the offset to the primary index for this texel */
1016
bit_offset += mode->n_index_bits * texel - anchors_before_texel;
1018
subset_num = (subsets >> (texel * 2)) & 3;
1020
index_bits = mode->n_index_bits;
1021
if (is_anchor(n_subsets, partition_num, texel))
1023
index = extract_bits(block, bit_offset, index_bits);
1025
for (component = 0; component < 3; component++) {
1026
value = interpolate(endpoints[subset_num * 2][component],
1027
endpoints[subset_num * 2 + 1][component],
1029
mode->n_index_bits);
1032
value = finish_signed_unquantize(value);
1034
value = finish_unsigned_unquantize(value);
1036
result[component] = _mesa_half_to_float(value);
1042
#ifdef BPTC_BLOCK_DECODE
1044
decompress_rgb_float_block(unsigned src_width, unsigned src_height,
1045
const uint8_t *block,
1046
float *dst_row, unsigned dst_rowstride,
1050
const struct bptc_float_mode *mode;
1051
int bit_offset_head, bit_offset;
1056
int anchors_before_texel;
1057
int32_t endpoints[2 * 2][3];
1064
if (block[0] & 0x2) {
1065
mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2;
1066
bit_offset_head = 5;
1068
mode_num = block[0] & 3;
1069
bit_offset_head = 2;
1072
mode = bptc_float_modes + mode_num;
1074
if (mode->reserved) {
1075
for(y = 0; y < src_height; y += 1) {
1076
float *result = dst_row;
1077
memset(result, 0, sizeof result[0] * 4 * src_width);
1078
for(x = 0; x < src_width; x += 1) {
1082
dst_row += dst_rowstride / sizeof dst_row[0];
1087
bit_offset_head = extract_float_endpoints(mode, block, bit_offset_head,
1088
endpoints, is_signed);
1090
if (mode->n_partition_bits) {
1091
partition_num = extract_bits(block, bit_offset_head, mode->n_partition_bits);
1092
bit_offset_head += mode->n_partition_bits;
1094
subsets = partition_table1[partition_num];
1102
for(y = 0; y < src_height; y += 1) {
1103
float *result = dst_row;
1104
for(x = 0; x < src_width; x += 1) {
1107
bit_offset = bit_offset_head;
1111
anchors_before_texel =
1112
count_anchors_before_texel(n_subsets, partition_num, texel);
1114
/* Calculate the offset to the primary index for this texel */
1115
bit_offset += mode->n_index_bits * texel - anchors_before_texel;
1117
subset_num = (subsets >> (texel * 2)) & 3;
1119
index_bits = mode->n_index_bits;
1120
if (is_anchor(n_subsets, partition_num, texel))
1122
index = extract_bits(block, bit_offset, index_bits);
1124
for (component = 0; component < 3; component++) {
1125
value = interpolate(endpoints[subset_num * 2][component],
1126
endpoints[subset_num * 2 + 1][component],
1128
mode->n_index_bits);
1131
value = finish_signed_unquantize(value);
1133
value = finish_unsigned_unquantize(value);
1135
result[component] = _mesa_half_to_float(value);
1141
dst_row += dst_rowstride / sizeof dst_row[0];
1146
decompress_rgb_float(int width, int height,
1147
const uint8_t *src, int src_rowstride,
1148
float *dst, int dst_rowstride, bool is_signed)
1153
if (src_rowstride >= width * 4)
1154
src_row_diff = src_rowstride - ((width + 3) & ~3) * 4;
1158
for (y = 0; y < height; y += BLOCK_SIZE) {
1159
for (x = 0; x < width; x += BLOCK_SIZE) {
1160
decompress_rgb_float_block(MIN2(width - x, BLOCK_SIZE),
1161
MIN2(height - y, BLOCK_SIZE),
1164
(y * dst_rowstride / sizeof dst[0])),
1165
dst_rowstride, is_signed);
1168
src += src_row_diff;
1171
#endif // BPTC_BLOCK_DECODE
1174
write_bits(struct bit_writer *writer, int n_bits, int value)
1177
if (n_bits + writer->pos >= 8) {
1178
*(writer->dst++) = writer->buf | (value << writer->pos);
1180
value >>= (8 - writer->pos);
1181
n_bits -= (8 - writer->pos);
1184
writer->buf |= value << writer->pos;
1185
writer->pos += n_bits;
1188
} while (n_bits > 0);
1192
get_average_luminance_alpha_unorm(int width, int height,
1193
const uint8_t *src, int src_rowstride,
1194
int *average_luminance, int *average_alpha)
1196
int luminance_sum = 0, alpha_sum = 0;
1199
for (y = 0; y < height; y++) {
1200
for (x = 0; x < width; x++) {
1201
luminance_sum += src[0] + src[1] + src[2];
1202
alpha_sum += src[3];
1205
src += src_rowstride - width * 4;
1208
*average_luminance = luminance_sum / (width * height);
1209
*average_alpha = alpha_sum / (width * height);
1213
get_rgba_endpoints_unorm(int width, int height,
1214
const uint8_t *src, int src_rowstride,
1215
int average_luminance, int average_alpha,
1216
uint8_t endpoints[][4])
1218
int endpoint_luminances[2];
1224
const uint8_t *p = src;
1225
int rgb_left_endpoint_count = 0;
1226
int alpha_left_endpoint_count = 0;
1229
memset(sums, 0, sizeof sums);
1231
for (y = 0; y < height; y++) {
1232
for (x = 0; x < width; x++) {
1233
luminance = p[0] + p[1] + p[2];
1234
if (luminance < average_luminance) {
1236
rgb_left_endpoint_count++;
1240
for (i = 0; i < 3; i++)
1241
sums[endpoint][i] += p[i];
1243
if (p[2] < average_alpha) {
1245
alpha_left_endpoint_count++;
1249
sums[endpoint][3] += p[3];
1254
p += src_rowstride - width * 4;
1257
if (rgb_left_endpoint_count == 0 ||
1258
rgb_left_endpoint_count == width * height) {
1259
for (i = 0; i < 3; i++)
1260
endpoints[0][i] = endpoints[1][i] =
1261
(sums[0][i] + sums[1][i]) / (width * height);
1263
for (i = 0; i < 3; i++) {
1264
endpoints[0][i] = sums[0][i] / rgb_left_endpoint_count;
1265
endpoints[1][i] = (sums[1][i] /
1266
(width * height - rgb_left_endpoint_count));
1270
if (alpha_left_endpoint_count == 0 ||
1271
alpha_left_endpoint_count == width * height) {
1272
endpoints[0][3] = endpoints[1][3] =
1273
(sums[0][3] + sums[1][3]) / (width * height);
1275
endpoints[0][3] = sums[0][3] / alpha_left_endpoint_count;
1276
endpoints[1][3] = (sums[1][3] /
1277
(width * height - alpha_left_endpoint_count));
1280
/* We may need to swap the endpoints to ensure the most-significant bit of
1281
* the first index is zero */
1283
for (endpoint = 0; endpoint < 2; endpoint++) {
1284
endpoint_luminances[endpoint] =
1285
endpoints[endpoint][0] +
1286
endpoints[endpoint][1] +
1287
endpoints[endpoint][2];
1289
midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2;
1291
if ((src[0] + src[1] + src[2] <= midpoint) !=
1292
(endpoint_luminances[0] <= midpoint)) {
1293
memcpy(temp, endpoints[0], 3);
1294
memcpy(endpoints[0], endpoints[1], 3);
1295
memcpy(endpoints[1], temp, 3);
1298
/* Same for the alpha endpoints */
1300
midpoint = (endpoints[0][3] + endpoints[1][3]) / 2;
1302
if ((src[3] <= midpoint) != (endpoints[0][3] <= midpoint)) {
1303
temp[0] = endpoints[0][3];
1304
endpoints[0][3] = endpoints[1][3];
1305
endpoints[1][3] = temp[0];
1310
write_rgb_indices_unorm(struct bit_writer *writer,
1311
int src_width, int src_height,
1312
const uint8_t *src, int src_rowstride,
1313
uint8_t endpoints[][4])
1316
int endpoint_luminances[2];
1321
for (endpoint = 0; endpoint < 2; endpoint++) {
1322
endpoint_luminances[endpoint] =
1323
endpoints[endpoint][0] +
1324
endpoints[endpoint][1] +
1325
endpoints[endpoint][2];
1328
/* If the endpoints have the same luminance then we'll just use index 0 for
1329
* all of the texels */
1330
if (endpoint_luminances[0] == endpoint_luminances[1]) {
1331
write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 2 - 1, 0);
1335
for (y = 0; y < src_height; y++) {
1336
for (x = 0; x < src_width; x++) {
1337
luminance = src[0] + src[1] + src[2];
1339
index = ((luminance - endpoint_luminances[0]) * 3 /
1340
(endpoint_luminances[1] - endpoint_luminances[0]));
1346
assert(x != 0 || y != 0 || index < 2);
1348
write_bits(writer, (x == 0 && y == 0) ? 1 : 2, index);
1353
/* Pad the indices out to the block size */
1354
if (src_width < BLOCK_SIZE)
1355
write_bits(writer, 2 * (BLOCK_SIZE - src_width), 0);
1357
src += src_rowstride - src_width * 4;
1360
/* Pad the indices out to the block size */
1361
if (src_height < BLOCK_SIZE)
1362
write_bits(writer, 2 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1366
write_alpha_indices_unorm(struct bit_writer *writer,
1367
int src_width, int src_height,
1368
const uint8_t *src, int src_rowstride,
1369
uint8_t endpoints[][4])
1374
/* If the endpoints have the same alpha then we'll just use index 0 for
1375
* all of the texels */
1376
if (endpoints[0][3] == endpoints[1][3]) {
1377
write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 3 - 1, 0);
1381
for (y = 0; y < src_height; y++) {
1382
for (x = 0; x < src_width; x++) {
1383
index = (((int) src[3] - (int) endpoints[0][3]) * 7 /
1384
((int) endpoints[1][3] - endpoints[0][3]));
1390
assert(x != 0 || y != 0 || index < 4);
1392
/* The first index has one less bit */
1393
write_bits(writer, (x == 0 && y == 0) ? 2 : 3, index);
1398
/* Pad the indices out to the block size */
1399
if (src_width < BLOCK_SIZE)
1400
write_bits(writer, 3 * (BLOCK_SIZE - src_width), 0);
1402
src += src_rowstride - src_width * 4;
1405
/* Pad the indices out to the block size */
1406
if (src_height < BLOCK_SIZE)
1407
write_bits(writer, 3 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1411
compress_rgba_unorm_block(int src_width, int src_height,
1412
const uint8_t *src, int src_rowstride,
1415
int average_luminance, average_alpha;
1416
uint8_t endpoints[2][4];
1417
struct bit_writer writer;
1418
int component, endpoint;
1420
get_average_luminance_alpha_unorm(src_width, src_height, src, src_rowstride,
1421
&average_luminance, &average_alpha);
1422
get_rgba_endpoints_unorm(src_width, src_height, src, src_rowstride,
1423
average_luminance, average_alpha,
1430
write_bits(&writer, 5, 0x10); /* mode 4 */
1431
write_bits(&writer, 2, 0); /* rotation 0 */
1432
write_bits(&writer, 1, 0); /* index selection bit */
1434
/* Write the color endpoints */
1435
for (component = 0; component < 3; component++)
1436
for (endpoint = 0; endpoint < 2; endpoint++)
1437
write_bits(&writer, 5, endpoints[endpoint][component] >> 3);
1439
/* Write the alpha endpoints */
1440
for (endpoint = 0; endpoint < 2; endpoint++)
1441
write_bits(&writer, 6, endpoints[endpoint][3] >> 2);
1443
write_rgb_indices_unorm(&writer,
1444
src_width, src_height,
1447
write_alpha_indices_unorm(&writer,
1448
src_width, src_height,
1454
compress_rgba_unorm(int width, int height,
1455
const uint8_t *src, int src_rowstride,
1456
uint8_t *dst, int dst_rowstride)
1461
if (dst_rowstride >= width * 4)
1462
dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
1466
for (y = 0; y < height; y += BLOCK_SIZE) {
1467
for (x = 0; x < width; x += BLOCK_SIZE) {
1468
compress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE),
1469
MIN2(height - y, BLOCK_SIZE),
1470
src + x * 4 + y * src_rowstride,
1475
dst += dst_row_diff;
1480
get_average_luminance_float(int width, int height,
1481
const float *src, int src_rowstride)
1483
float luminance_sum = 0;
1486
for (y = 0; y < height; y++) {
1487
for (x = 0; x < width; x++) {
1488
luminance_sum += src[0] + src[1] + src[2];
1491
src += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
1494
return luminance_sum / (width * height);
1498
clamp_value(float value, bool is_signed)
1500
if (value > 65504.0f)
1504
if (value < -65504.0f)
1517
get_endpoints_float(int width, int height,
1518
const float *src, int src_rowstride,
1519
float average_luminance, float endpoints[][3],
1522
float endpoint_luminances[2];
1525
int endpoint, component;
1528
const float *p = src;
1529
int left_endpoint_count = 0;
1532
memset(sums, 0, sizeof sums);
1534
for (y = 0; y < height; y++) {
1535
for (x = 0; x < width; x++) {
1536
luminance = p[0] + p[1] + p[2];
1537
if (luminance < average_luminance) {
1539
left_endpoint_count++;
1543
for (i = 0; i < 3; i++)
1544
sums[endpoint][i] += p[i];
1549
p += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
1552
if (left_endpoint_count == 0 ||
1553
left_endpoint_count == width * height) {
1554
for (i = 0; i < 3; i++)
1555
endpoints[0][i] = endpoints[1][i] =
1556
(sums[0][i] + sums[1][i]) / (width * height);
1558
for (i = 0; i < 3; i++) {
1559
endpoints[0][i] = sums[0][i] / left_endpoint_count;
1560
endpoints[1][i] = sums[1][i] / (width * height - left_endpoint_count);
1564
/* Clamp the endpoints to the range of a half float and strip out
1566
for (endpoint = 0; endpoint < 2; endpoint++) {
1567
for (component = 0; component < 3; component++) {
1568
endpoints[endpoint][component] =
1569
clamp_value(endpoints[endpoint][component], is_signed);
1573
/* We may need to swap the endpoints to ensure the most-significant bit of
1574
* the first index is zero */
1576
for (endpoint = 0; endpoint < 2; endpoint++) {
1577
endpoint_luminances[endpoint] =
1578
endpoints[endpoint][0] +
1579
endpoints[endpoint][1] +
1580
endpoints[endpoint][2];
1582
midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2.0f;
1584
if ((src[0] + src[1] + src[2] <= midpoint) !=
1585
(endpoint_luminances[0] <= midpoint)) {
1586
memcpy(temp, endpoints[0], sizeof temp);
1587
memcpy(endpoints[0], endpoints[1], sizeof temp);
1588
memcpy(endpoints[1], temp, sizeof temp);
1593
write_rgb_indices_float(struct bit_writer *writer,
1594
int src_width, int src_height,
1595
const float *src, int src_rowstride,
1596
float endpoints[][3])
1599
float endpoint_luminances[2];
1604
for (endpoint = 0; endpoint < 2; endpoint++) {
1605
endpoint_luminances[endpoint] =
1606
endpoints[endpoint][0] +
1607
endpoints[endpoint][1] +
1608
endpoints[endpoint][2];
1611
/* If the endpoints have the same luminance then we'll just use index 0 for
1612
* all of the texels */
1613
if (endpoint_luminances[0] == endpoint_luminances[1]) {
1614
write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 4 - 1, 0);
1618
for (y = 0; y < src_height; y++) {
1619
for (x = 0; x < src_width; x++) {
1620
luminance = src[0] + src[1] + src[2];
1622
index = ((luminance - endpoint_luminances[0]) * 15 /
1623
(endpoint_luminances[1] - endpoint_luminances[0]));
1626
else if (index > 15)
1629
assert(x != 0 || y != 0 || index < 8);
1631
write_bits(writer, (x == 0 && y == 0) ? 3 : 4, index);
1636
/* Pad the indices out to the block size */
1637
if (src_width < BLOCK_SIZE)
1638
write_bits(writer, 4 * (BLOCK_SIZE - src_width), 0);
1640
src += (src_rowstride - src_width * 3 * sizeof (float)) / sizeof (float);
1643
/* Pad the indices out to the block size */
1644
if (src_height < BLOCK_SIZE)
1645
write_bits(writer, 4 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1649
get_endpoint_value(float value, bool is_signed)
1655
half = _mesa_float_to_half(value);
1657
if (half & 0x8000) {
1662
half = (32 * half / 31) >> 6;
1665
half = -half & ((1 << 10) - 1);
1672
half = _mesa_float_to_half(value);
1674
return (64 * half / 31) >> 6;
1679
compress_rgb_float_block(int src_width, int src_height,
1680
const float *src, int src_rowstride,
1684
float average_luminance;
1685
float endpoints[2][3];
1686
struct bit_writer writer;
1687
int component, endpoint;
1691
get_average_luminance_float(src_width, src_height, src, src_rowstride);
1692
get_endpoints_float(src_width, src_height, src, src_rowstride,
1693
average_luminance, endpoints, is_signed);
1699
write_bits(&writer, 5, 3); /* mode 3 */
1701
/* Write the endpoints */
1702
for (endpoint = 0; endpoint < 2; endpoint++) {
1703
for (component = 0; component < 3; component++) {
1705
get_endpoint_value(endpoints[endpoint][component], is_signed);
1706
write_bits(&writer, 10, endpoint_value);
1710
write_rgb_indices_float(&writer,
1711
src_width, src_height,
1717
compress_rgb_float(int width, int height,
1718
const float *src, int src_rowstride,
1719
uint8_t *dst, int dst_rowstride,
1725
if (dst_rowstride >= width * 4)
1726
dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
1730
for (y = 0; y < height; y += BLOCK_SIZE) {
1731
for (x = 0; x < width; x += BLOCK_SIZE) {
1732
compress_rgb_float_block(MIN2(width - x, BLOCK_SIZE),
1733
MIN2(height - y, BLOCK_SIZE),
1735
y * src_rowstride / sizeof (float),
1741
dst += dst_row_diff;