2
* Copyright 2015 Philip Taylor <philip@zaynar.co.uk>
3
* Copyright 2018 Advanced Micro Devices, Inc.
5
* Permission is hereby granted, free of charge, to any person obtaining a
6
* copy of this software and associated documentation files (the "Software"),
7
* to deal in the Software without restriction, including without limitation
8
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
* and/or sell copies of the Software, and to permit persons to whom the
10
* Software is furnished to do so, subject to the following conditions:
12
* The above copyright notice and this permission notice (including the next
13
* paragraph) shall be included in all copies or substantial portions of the
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22
* DEALINGS IN THE SOFTWARE.
26
* \file texcompress_astc.c
28
* Decompression code for GL_KHR_texture_compression_astc_ldr, which is just
31
* The ASTC 2D LDR decoder (without the sRGB part) was copied from the OASTC
32
* library written by Philip Taylor. I added sRGB support and adjusted it for
36
#include "texcompress_astc.h"
38
#include "util/half_float.h"
40
#include <cstdlib> // for abort() on windows
42
static bool VERBOSE_DECODE = false;
43
static bool VERBOSE_WRITE = false;
46
uint16_div_64k_to_half_to_unorm8(uint16_t v)
48
return _mesa_half_to_unorm8(_mesa_uint16_div_64k_to_half(v));
56
unsupported_hdr_void_extent,
57
reserved_block_mode_1,
58
reserved_block_mode_2,
59
dual_plane_and_too_many_partitions,
60
invalid_range_in_void_extent,
61
weight_grid_exceeds_block_size,
62
invalid_colour_endpoints_size,
63
invalid_colour_endpoints_count,
75
/* Based on the Color Unquantization Parameters table,
76
* plus the bit-only representations, sorted by increasing size
78
static cem_range cem_ranges[] = {
98
#define CAT_BITS_2(a, b) ( ((a) << 1) | (b) )
99
#define CAT_BITS_3(a, b, c) ( ((a) << 2) | ((b) << 1) | (c) )
100
#define CAT_BITS_4(a, b, c, d) ( ((a) << 3) | ((b) << 2) | ((c) << 1) | (d) )
101
#define CAT_BITS_5(a, b, c, d, e) ( ((a) << 4) | ((b) << 3) | ((c) << 2) | ((d) << 1) | (e) )
104
* Unpack 5n+8 bits from 'in' into 5 output values.
105
* If n <= 4 then T should be uint32_t, else it must be uint64_t.
107
template <typename T>
108
static void unpack_trit_block(int n, T in, uint8_t *out)
110
assert(n <= 6); /* else output will overflow uint8_t */
112
uint8_t T0 = (in >> (n)) & 0x1;
113
uint8_t T1 = (in >> (n+1)) & 0x1;
114
uint8_t T2 = (in >> (2*n+2)) & 0x1;
115
uint8_t T3 = (in >> (2*n+3)) & 0x1;
116
uint8_t T4 = (in >> (3*n+4)) & 0x1;
117
uint8_t T5 = (in >> (4*n+5)) & 0x1;
118
uint8_t T6 = (in >> (4*n+6)) & 0x1;
119
uint8_t T7 = (in >> (5*n+7)) & 0x1;
120
uint8_t mmask = (1 << n) - 1;
121
uint8_t m0 = (in >> (0)) & mmask;
122
uint8_t m1 = (in >> (n+2)) & mmask;
123
uint8_t m2 = (in >> (2*n+4)) & mmask;
124
uint8_t m3 = (in >> (3*n+5)) & mmask;
125
uint8_t m4 = (in >> (4*n+7)) & mmask;
128
uint8_t t4, t3, t2, t1, t0;
129
if (CAT_BITS_3(T4, T3, T2) == 0x7) {
130
C = CAT_BITS_5(T7, T6, T5, T1, T0);
133
C = CAT_BITS_5(T4, T3, T2, T1, T0);
134
if (CAT_BITS_2(T6, T5) == 0x3) {
139
t3 = CAT_BITS_2(T6, T5);
143
if ((C & 0x3) == 0x3) {
146
uint8_t C3 = (C >> 3) & 0x1;
147
uint8_t C2 = (C >> 2) & 0x1;
148
t0 = (C3 << 1) | (C2 & ~C3);
149
} else if (((C >> 2) & 0x3) == 0x3) {
156
uint8_t C1 = (C >> 1) & 0x1;
157
uint8_t C0 = (C >> 0) & 0x1;
158
t0 = (C1 << 1) | (C0 & ~C1);
161
out[0] = (t0 << n) | m0;
162
out[1] = (t1 << n) | m1;
163
out[2] = (t2 << n) | m2;
164
out[3] = (t3 << n) | m3;
165
out[4] = (t4 << n) | m4;
169
* Unpack 3n+7 bits from 'in' into 3 output values
171
static void unpack_quint_block(int n, uint32_t in, uint8_t *out)
173
assert(n <= 5); /* else output will overflow uint8_t */
175
uint8_t Q0 = (in >> (n)) & 0x1;
176
uint8_t Q1 = (in >> (n+1)) & 0x1;
177
uint8_t Q2 = (in >> (n+2)) & 0x1;
178
uint8_t Q3 = (in >> (2*n+3)) & 0x1;
179
uint8_t Q4 = (in >> (2*n+4)) & 0x1;
180
uint8_t Q5 = (in >> (3*n+5)) & 0x1;
181
uint8_t Q6 = (in >> (3*n+6)) & 0x1;
182
uint8_t mmask = (1 << n) - 1;
183
uint8_t m0 = (in >> (0)) & mmask;
184
uint8_t m1 = (in >> (n+3)) & mmask;
185
uint8_t m2 = (in >> (2*n+5)) & mmask;
189
if (CAT_BITS_4(Q6, Q5, Q2, Q1) == 0x3) {
190
q2 = CAT_BITS_3(Q0, Q4 & ~Q0, Q3 & ~Q0);
194
if (CAT_BITS_2(Q2, Q1) == 0x3) {
196
C = CAT_BITS_5(Q4, Q3, 0x1 & ~Q6, 0x1 & ~Q5, Q0);
198
q2 = CAT_BITS_2(Q6, Q5);
199
C = CAT_BITS_5(Q4, Q3, Q2, Q1, Q0);
201
if ((C & 0x7) == 0x5) {
209
out[0] = (q0 << n) | m0;
210
out[1] = (q1 << n) | m1;
211
out[2] = (q2 << n) | m2;
221
uint8x4_t(int a, int b, int c, int d)
223
assert(0 <= a && a <= 255);
224
assert(0 <= b && b <= 255);
225
assert(0 <= c && c <= 255);
226
assert(0 <= d && d <= 255);
233
static uint8x4_t clamped(int a, int b, int c, int d)
236
r.v[0] = MAX2(0, MIN2(255, a));
237
r.v[1] = MAX2(0, MIN2(255, b));
238
r.v[2] = MAX2(0, MIN2(255, c));
239
r.v[3] = MAX2(0, MIN2(255, d));
244
static uint8x4_t blue_contract(int r, int g, int b, int a)
246
return uint8x4_t((r+b) >> 1, (g+b) >> 1, b, a);
249
static uint8x4_t blue_contract_clamped(int r, int g, int b, int a)
251
return uint8x4_t::clamped((r+b) >> 1, (g+b) >> 1, b, a);
254
static void bit_transfer_signed(int &a, int &b)
264
static uint32_t hash52(uint32_t p)
279
static int select_partition(int seed, int x, int y, int z, int partitioncount,
287
seed += (partitioncount - 1) * 1024;
288
uint32_t rnum = hash52(seed);
289
uint8_t seed1 = rnum & 0xF;
290
uint8_t seed2 = (rnum >> 4) & 0xF;
291
uint8_t seed3 = (rnum >> 8) & 0xF;
292
uint8_t seed4 = (rnum >> 12) & 0xF;
293
uint8_t seed5 = (rnum >> 16) & 0xF;
294
uint8_t seed6 = (rnum >> 20) & 0xF;
295
uint8_t seed7 = (rnum >> 24) & 0xF;
296
uint8_t seed8 = (rnum >> 28) & 0xF;
297
uint8_t seed9 = (rnum >> 18) & 0xF;
298
uint8_t seed10 = (rnum >> 22) & 0xF;
299
uint8_t seed11 = (rnum >> 26) & 0xF;
300
uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
317
sh1 = (seed & 2 ? 4 : 5);
318
sh2 = (partitioncount == 3 ? 6 : 5);
320
sh1 = (partitioncount == 3 ? 6 : 5);
321
sh2 = (seed & 2 ? 4 : 5);
323
sh3 = (seed & 0x10) ? sh1 : sh2;
338
int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
339
int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
340
int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
341
int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
348
if (partitioncount < 4)
350
if (partitioncount < 3)
353
if (a >= b && a >= c && a >= d)
355
else if (b >= c && b >= d)
364
struct InputBitVector
368
void printf_bits(int offset, int count, const char *fmt = "", ...)
371
memset(out, '.', 128);
374
for (int i = 0; i < count; ++i) {
375
out[127 - idx] = ((data[idx >> 5] >> (idx & 31)) & 1) ? '1' : '0';
386
uint32_t get_bits(int offset, int count)
388
assert(count >= 0 && count < 32);
392
out |= data[0] >> offset;
394
if (0 < offset && offset <= 32)
395
out |= data[1] << (32 - offset);
396
if (32 < offset && offset < 64)
397
out |= data[1] >> (offset - 32);
399
if (32 < offset && offset <= 64)
400
out |= data[2] << (64 - offset);
401
if (64 < offset && offset < 96)
402
out |= data[2] >> (offset - 64);
404
if (64 < offset && offset <= 96)
405
out |= data[3] << (96 - offset);
406
if (96 < offset && offset < 128)
407
out |= data[3] >> (offset - 96);
409
out &= (1 << count) - 1;
413
uint64_t get_bits64(int offset, int count)
415
assert(count >= 0 && count < 64);
419
out |= data[0] >> offset;
422
out |= (uint64_t)data[1] << (32 - offset);
423
if (32 < offset && offset < 64)
424
out |= data[1] >> (offset - 32);
426
if (0 < offset && offset <= 64)
427
out |= (uint64_t)data[2] << (64 - offset);
428
if (64 < offset && offset < 96)
429
out |= data[2] >> (offset - 64);
431
if (32 < offset && offset <= 96)
432
out |= (uint64_t)data[3] << (96 - offset);
433
if (96 < offset && offset < 128)
434
out |= data[3] >> (offset - 96);
436
out &= ((uint64_t)1 << count) - 1;
440
uint32_t get_bits_rev(int offset, int count)
442
assert(offset >= count);
443
uint32_t tmp = get_bits(offset - count, count);
445
for (int i = 0; i < count; ++i)
446
out |= ((tmp >> i) & 1) << (count - 1 - i);
451
struct OutputBitVector
459
memset(data, 0, sizeof(data));
462
void append(uint32_t value, int size)
465
printf("append offset=%d size=%d values=0x%x\n", offset, size, value);
467
assert(offset + size <= 128);
471
assert((value >> size) == 0);
474
int c = MIN2(size, 32 - (offset & 31));
475
data[offset >> 5] |= (value << (offset & 31));
482
void append64(uint64_t value, int size)
485
printf("append offset=%d size=%d values=0x%llx\n", offset, size, (unsigned long long)value);
487
assert(offset + size <= 128);
491
assert((value >> size) == 0);
494
int c = MIN2(size, 32 - (offset & 31));
495
data[offset >> 5] |= (value << (offset & 31));
502
void append(OutputBitVector &v, int size)
505
printf("append vector offset=%d size=%d\n", offset, size);
507
assert(offset + size <= 128);
510
append(v.data[i++], 32);
514
append(v.data[i] & ((1 << size) - 1), size);
517
void append_end(OutputBitVector &v, int size)
519
for (int i = 0; i < size; ++i)
520
data[(127 - i) >> 5] |= ((v.data[i >> 5] >> (i & 31)) & 1) << ((127 - i) & 31);
523
/* Insert the given number of '1' bits. (We could use 0s instead, but 1s are
524
* more likely to flush out bugs where we accidentally read undefined bits.)
529
printf("skip offset=%d size=%d\n", offset, size);
531
assert(offset + size <= 128);
533
append(0xffffffff, 32);
537
append(0xffffffff >> (32 - size), size);
545
Decoder(int block_w, int block_h, int block_d, bool srgb, bool output_unorm8)
546
: block_w(block_w), block_h(block_h), block_d(block_d), srgb(srgb),
547
output_unorm8(output_unorm8) {}
549
decode_error::type decode(const uint8_t *in, uint16_t *output) const;
551
int block_w, block_h, block_d;
552
bool srgb, output_unorm8;
558
bool bogus_colour_endpoints;
563
int colour_component_selector;
565
int wt_w, wt_h, wt_d;
571
int void_extent_min_s;
572
int void_extent_max_s;
573
int void_extent_min_t;
574
int void_extent_max_t;
575
uint16_t void_extent_colour_r;
576
uint16_t void_extent_colour_g;
577
uint16_t void_extent_colour_b;
578
uint16_t void_extent_colour_a;
581
int num_extra_cem_bits;
582
int colour_endpoint_data_offset;
589
/* Calculated by unpack_weights(): */
590
uint8_t weights_quant[64 + 4]; /* max 64 values, plus padding for overflows in trit parsing */
592
/* Calculated by unquantise_weights(): */
593
uint8_t weights[64 + 18]; /* max 64 values, plus padding for the infill interpolation */
595
/* Calculated by unpack_colour_endpoints(): */
596
uint8_t colour_endpoints_quant[18 + 4]; /* max 18 values, plus padding for overflows in trit parsing */
598
/* Calculated by unquantise_colour_endpoints(): */
599
uint8_t colour_endpoints[18];
601
/* Calculated by calculate_from_weights(): */
609
/* Calculated by calculate_remaining_bits(): */
612
/* Calculated by calculate_colour_endpoints_size(): */
613
int colour_endpoint_bits;
619
/* Calculated by compute_infill_weights(); */
620
uint8_t infill_weights[2][216]; /* large enough for 6x6x6 */
622
/* Calculated by decode_colour_endpoints(); */
623
uint8x4_t endpoints_decoded[2][4];
625
void calculate_from_weights();
626
void calculate_remaining_bits();
627
decode_error::type calculate_colour_endpoints_size();
629
void unquantise_weights();
630
void unquantise_colour_endpoints();
632
decode_error::type decode(const Decoder &decoder, InputBitVector in);
634
decode_error::type decode_block_mode(InputBitVector in);
635
decode_error::type decode_void_extent(InputBitVector in);
636
void decode_cem(InputBitVector in);
637
void unpack_colour_endpoints(InputBitVector in);
638
void decode_colour_endpoints();
639
void unpack_weights(InputBitVector in);
640
void compute_infill_weights(int block_w, int block_h, int block_d);
642
void write_decoded(const Decoder &decoder, uint16_t *output);
646
decode_error::type Decoder::decode(const uint8_t *in, uint16_t *output) const
649
InputBitVector in_vec;
650
memcpy(&in_vec.data, in, 16);
651
decode_error::type err = blk.decode(*this, in_vec);
652
if (err == decode_error::ok) {
653
blk.write_decoded(*this, output);
655
/* Fill output with the error colour */
656
for (int i = 0; i < block_w * block_h * block_d; ++i) {
658
output[i*4+0] = 0xff;
660
output[i*4+2] = 0xff;
661
output[i*4+3] = 0xff;
663
assert(!srgb); /* srgb must use unorm8 */
665
output[i*4+0] = FP16_ONE;
666
output[i*4+1] = FP16_ZERO;
667
output[i*4+2] = FP16_ONE;
668
output[i*4+3] = FP16_ONE;
676
decode_error::type Block::decode_void_extent(InputBitVector block)
680
is_void_extent = true;
681
void_extent_d = block.get_bits(9, 1);
682
void_extent_min_s = block.get_bits(12, 13);
683
void_extent_max_s = block.get_bits(25, 13);
684
void_extent_min_t = block.get_bits(38, 13);
685
void_extent_max_t = block.get_bits(51, 13);
686
void_extent_colour_r = block.get_bits(64, 16);
687
void_extent_colour_g = block.get_bits(80, 16);
688
void_extent_colour_b = block.get_bits(96, 16);
689
void_extent_colour_a = block.get_bits(112, 16);
691
/* TODO: maybe we should do something useful with the extent coordinates? */
694
return decode_error::unsupported_hdr_void_extent;
697
if (void_extent_min_s == 0x1fff && void_extent_max_s == 0x1fff
698
&& void_extent_min_t == 0x1fff && void_extent_max_t == 0x1fff) {
704
/* Check for illegal encoding */
705
if (void_extent_min_s >= void_extent_max_s || void_extent_min_t >= void_extent_max_t) {
706
return decode_error::invalid_range_in_void_extent;
710
return decode_error::ok;
713
decode_error::type Block::decode_block_mode(InputBitVector in)
715
dual_plane = in.get_bits(10, 1);
716
high_prec = in.get_bits(9, 1);
718
if (in.get_bits(0, 2) != 0x0) {
719
wt_range = (in.get_bits(0, 2) << 1) | in.get_bits(4, 1);
720
int a = in.get_bits(5, 2);
721
int b = in.get_bits(7, 2);
722
switch (in.get_bits(2, 2)) {
725
in.printf_bits(0, 11, "DHBBAAR00RR");
731
in.printf_bits(0, 11, "DHBBAAR01RR");
737
in.printf_bits(0, 11, "DHBBAAR10RR");
742
if ((b & 0x2) == 0) {
744
in.printf_bits(0, 11, "DH0BAAR11RR");
749
in.printf_bits(0, 11, "DH1BAAR11RR");
750
wt_w = (b & 0x1) + 2;
756
if (in.get_bits(6, 3) == 0x7) {
757
if (in.get_bits(0, 9) == 0x1fc) {
759
in.printf_bits(0, 11, "xx111111100 (void extent)");
760
return decode_void_extent(in);
763
in.printf_bits(0, 11, "xx111xxxx00");
764
return decode_error::reserved_block_mode_1;
767
if (in.get_bits(0, 4) == 0x0) {
769
in.printf_bits(0, 11, "xxxxxxx0000");
770
return decode_error::reserved_block_mode_2;
773
wt_range = in.get_bits(1, 3) | in.get_bits(4, 1);
774
int a = in.get_bits(5, 2);
777
switch (in.get_bits(7, 2)) {
780
in.printf_bits(0, 11, "DH00AARRR00");
786
in.printf_bits(0, 11, "DH01AARRR00");
791
if (in.get_bits(5, 1) == 0) {
793
in.printf_bits(0, 11, "DH1100RRR00");
798
in.printf_bits(0, 11, "DH1101RRR00");
805
in.printf_bits(0, 11, "BB10AARRR00");
806
b = in.get_bits(9, 2);
814
return decode_error::ok;
817
void Block::decode_cem(InputBitVector in)
819
cems[0] = cems[1] = cems[2] = cems[3] = -1;
821
num_extra_cem_bits = 0;
826
partition_index = in.get_bits(13, 10);
828
in.printf_bits(13, 10, "partition ID (%d)", partition_index);
830
uint32_t cem = in.get_bits(23, 6);
832
if ((cem & 0x3) == 0x0) {
834
cem_base_class = cem >> 2;
835
is_multi_cem = false;
837
for (int i = 0; i < num_parts; ++i)
841
in.printf_bits(23, 6, "CEM (single, %d)", cem);
844
cem_base_class = (cem & 0x3) - 1;
848
in.printf_bits(23, 6, "CEM (multi, base class %d)", cem_base_class);
850
int offset = 128 - weight_bits;
852
if (num_parts == 2) {
853
if (VERBOSE_DECODE) {
854
in.printf_bits(25, 4, "M0M0 C1 C0");
855
in.printf_bits(offset - 2, 2, "M1M1");
858
uint32_t c0 = in.get_bits(25, 1);
859
uint32_t c1 = in.get_bits(26, 1);
861
extra_cem_bits = c0 + c1;
863
num_extra_cem_bits = 2;
865
uint32_t m0 = in.get_bits(27, 2);
866
uint32_t m1 = in.get_bits(offset - 2, 2);
868
cems[0] = ((cem_base_class + c0) << 2) | m0;
869
cems[1] = ((cem_base_class + c1) << 2) | m1;
871
} else if (num_parts == 3) {
872
if (VERBOSE_DECODE) {
873
in.printf_bits(25, 4, "M0 C2 C1 C0");
874
in.printf_bits(offset - 5, 5, "M2M2 M1M1 M0");
877
uint32_t c0 = in.get_bits(25, 1);
878
uint32_t c1 = in.get_bits(26, 1);
879
uint32_t c2 = in.get_bits(27, 1);
881
extra_cem_bits = c0 + c1 + c2;
883
num_extra_cem_bits = 5;
885
uint32_t m0 = in.get_bits(28, 1) | (in.get_bits(128 - weight_bits - 5, 1) << 1);
886
uint32_t m1 = in.get_bits(offset - 4, 2);
887
uint32_t m2 = in.get_bits(offset - 2, 2);
889
cems[0] = ((cem_base_class + c0) << 2) | m0;
890
cems[1] = ((cem_base_class + c1) << 2) | m1;
891
cems[2] = ((cem_base_class + c2) << 2) | m2;
893
} else if (num_parts == 4) {
894
if (VERBOSE_DECODE) {
895
in.printf_bits(25, 4, "C3 C2 C1 C0");
896
in.printf_bits(offset - 8, 8, "M3M3 M2M2 M1M1 M0M0");
899
uint32_t c0 = in.get_bits(25, 1);
900
uint32_t c1 = in.get_bits(26, 1);
901
uint32_t c2 = in.get_bits(27, 1);
902
uint32_t c3 = in.get_bits(28, 1);
904
extra_cem_bits = c0 + c1 + c2 + c3;
906
num_extra_cem_bits = 8;
908
uint32_t m0 = in.get_bits(offset - 8, 2);
909
uint32_t m1 = in.get_bits(offset - 6, 2);
910
uint32_t m2 = in.get_bits(offset - 4, 2);
911
uint32_t m3 = in.get_bits(offset - 2, 2);
913
cems[0] = ((cem_base_class + c0) << 2) | m0;
914
cems[1] = ((cem_base_class + c1) << 2) | m1;
915
cems[2] = ((cem_base_class + c2) << 2) | m2;
916
cems[3] = ((cem_base_class + c3) << 2) | m3;
922
colour_endpoint_data_offset = 29;
925
uint32_t cem = in.get_bits(13, 4);
927
cem_base_class = cem >> 2;
928
is_multi_cem = false;
932
partition_index = -1;
935
in.printf_bits(13, 4, "CEM = %d (class %d)", cem, cem_base_class);
937
colour_endpoint_data_offset = 17;
941
void Block::unpack_colour_endpoints(InputBitVector in)
944
int offset = colour_endpoint_data_offset;
945
int bits_left = colour_endpoint_bits;
946
for (int i = 0; i < num_cem_values; i += 5) {
947
int bits_to_read = MIN2(bits_left, 8 + ce_bits * 5);
948
/* If ce_trits then ce_bits <= 6, so bits_to_read <= 38 and we have to use uint64_t */
949
uint64_t raw = in.get_bits64(offset, bits_to_read);
950
unpack_trit_block(ce_bits, raw, &colour_endpoints_quant[i]);
953
in.printf_bits(offset, bits_to_read,
954
"trits [%d,%d,%d,%d,%d]",
955
colour_endpoints_quant[i+0], colour_endpoints_quant[i+1],
956
colour_endpoints_quant[i+2], colour_endpoints_quant[i+3],
957
colour_endpoints_quant[i+4]);
959
offset += 8 + ce_bits * 5;
960
bits_left -= 8 + ce_bits * 5;
962
} else if (ce_quints) {
963
int offset = colour_endpoint_data_offset;
964
int bits_left = colour_endpoint_bits;
965
for (int i = 0; i < num_cem_values; i += 3) {
966
int bits_to_read = MIN2(bits_left, 7 + ce_bits * 3);
967
/* If ce_quints then ce_bits <= 5, so bits_to_read <= 22 and we can use uint32_t */
968
uint32_t raw = in.get_bits(offset, bits_to_read);
969
unpack_quint_block(ce_bits, raw, &colour_endpoints_quant[i]);
972
in.printf_bits(offset, bits_to_read,
974
colour_endpoints_quant[i], colour_endpoints_quant[i+1], colour_endpoints_quant[i+2]);
976
offset += 7 + ce_bits * 3;
977
bits_left -= 7 + ce_bits * 3;
980
assert((colour_endpoint_bits % ce_bits) == 0);
981
int offset = colour_endpoint_data_offset;
982
for (int i = 0; i < num_cem_values; i++) {
983
colour_endpoints_quant[i] = in.get_bits(offset, ce_bits);
986
in.printf_bits(offset, ce_bits, "bits [%d]", colour_endpoints_quant[i]);
993
void Block::decode_colour_endpoints()
995
int cem_values_idx = 0;
996
for (int part = 0; part < num_parts; ++part) {
997
uint8_t *v = &colour_endpoints[cem_values_idx];
1006
cem_values_idx += ((cems[part] >> 2) + 1) * 2;
1014
e0 = uint8x4_t(v0, v0, v0, 0xff);
1015
e1 = uint8x4_t(v1, v1, v1, 0xff);
1018
L0 = (v0 >> 2) | (v1 & 0xc0);
1019
L1 = L0 + (v1 & 0x3f);
1022
e0 = uint8x4_t(L0, L0, L0, 0xff);
1023
e1 = uint8x4_t(L1, L1, L1, 0xff);
1026
e0 = uint8x4_t(v0, v0, v0, v2);
1027
e1 = uint8x4_t(v1, v1, v1, v3);
1030
bit_transfer_signed(v1, v0);
1031
bit_transfer_signed(v3, v2);
1032
e0 = uint8x4_t(v0, v0, v0, v2);
1033
e1 = uint8x4_t::clamped(v0+v1, v0+v1, v0+v1, v2+v3);
1036
e0 = uint8x4_t(v0*v3 >> 8, v1*v3 >> 8, v2*v3 >> 8, 0xff);
1037
e1 = uint8x4_t(v0, v1, v2, 0xff);
1043
e0 = uint8x4_t(v0, v2, v4, 0xff);
1044
e1 = uint8x4_t(v1, v3, v5, 0xff);
1046
e0 = blue_contract(v1, v3, v5, 0xff);
1047
e1 = blue_contract(v0, v2, v4, 0xff);
1051
bit_transfer_signed(v1, v0);
1052
bit_transfer_signed(v3, v2);
1053
bit_transfer_signed(v5, v4);
1054
if (v1 + v3 + v5 >= 0) {
1055
e0 = uint8x4_t(v0, v2, v4, 0xff);
1056
e1 = uint8x4_t::clamped(v0+v1, v2+v3, v4+v5, 0xff);
1058
e0 = blue_contract_clamped(v0+v1, v2+v3, v4+v5, 0xff);
1059
e1 = blue_contract(v0, v2, v4, 0xff);
1063
e0 = uint8x4_t(v0*v3 >> 8, v1*v3 >> 8, v2*v3 >> 8, v4);
1064
e1 = uint8x4_t(v0, v1, v2, v5);
1070
e0 = uint8x4_t(v0, v2, v4, v6);
1071
e1 = uint8x4_t(v1, v3, v5, v7);
1073
e0 = blue_contract(v1, v3, v5, v7);
1074
e1 = blue_contract(v0, v2, v4, v6);
1078
bit_transfer_signed(v1, v0);
1079
bit_transfer_signed(v3, v2);
1080
bit_transfer_signed(v5, v4);
1081
bit_transfer_signed(v7, v6);
1082
if (v1 + v3 + v5 >= 0) {
1083
e0 = uint8x4_t(v0, v2, v4, v6);
1084
e1 = uint8x4_t::clamped(v0+v1, v2+v3, v4+v5, v6+v7);
1086
e0 = blue_contract_clamped(v0+v1, v2+v3, v4+v5, v6+v7);
1087
e1 = blue_contract(v0, v2, v4, v6);
1091
/* HDR endpoints not supported; return error colour */
1092
e0 = uint8x4_t(255, 0, 255, 255);
1093
e1 = uint8x4_t(255, 0, 255, 255);
1097
endpoints_decoded[0][part] = e0;
1098
endpoints_decoded[1][part] = e1;
1100
if (VERBOSE_DECODE) {
1101
printf("cems[%d]=%d v=[", part, cems[part]);
1102
for (int i = 0; i < (cems[part] >> 2) + 1; ++i) {
1105
printf("%3d", v[i]);
1107
printf("] e0=[%3d,%4d,%4d,%4d] e1=[%3d,%4d,%4d,%4d]\n",
1108
e0.v[0], e0.v[1], e0.v[2], e0.v[3],
1109
e1.v[0], e1.v[1], e1.v[2], e1.v[3]);
1114
void Block::unpack_weights(InputBitVector in)
1118
int bits_left = weight_bits;
1119
for (int i = 0; i < num_weights; i += 5) {
1120
int bits_to_read = MIN2(bits_left, 8 + 5*wt_bits);
1121
/* If wt_trits then wt_bits <= 3, so bits_to_read <= 23 and we can use uint32_t */
1122
uint32_t raw = in.get_bits_rev(offset, bits_to_read);
1123
unpack_trit_block(wt_bits, raw, &weights_quant[i]);
1126
in.printf_bits(offset - bits_to_read, bits_to_read, "weight trits [%d,%d,%d,%d,%d]",
1127
weights_quant[i+0], weights_quant[i+1],
1128
weights_quant[i+2], weights_quant[i+3],
1129
weights_quant[i+4]);
1131
offset -= 8 + wt_bits * 5;
1132
bits_left -= 8 + wt_bits * 5;
1135
} else if (wt_quints) {
1138
int bits_left = weight_bits;
1139
for (int i = 0; i < num_weights; i += 3) {
1140
int bits_to_read = MIN2(bits_left, 7 + 3*wt_bits);
1141
/* If wt_quints then wt_bits <= 2, so bits_to_read <= 13 and we can use uint32_t */
1142
uint32_t raw = in.get_bits_rev(offset, bits_to_read);
1143
unpack_quint_block(wt_bits, raw, &weights_quant[i]);
1146
in.printf_bits(offset - bits_to_read, bits_to_read, "weight quints [%d,%d,%d]",
1147
weights_quant[i], weights_quant[i+1], weights_quant[i+2]);
1149
offset -= 7 + wt_bits * 3;
1150
bits_left -= 7 + wt_bits * 3;
1155
assert((weight_bits % wt_bits) == 0);
1156
for (int i = 0; i < num_weights; ++i) {
1157
weights_quant[i] = in.get_bits_rev(offset, wt_bits);
1160
in.printf_bits(offset - wt_bits, wt_bits, "weight bits [%d]", weights_quant[i]);
1167
void Block::unquantise_weights()
1169
assert(num_weights <= (int)ARRAY_SIZE(weights_quant));
1170
assert(num_weights <= (int)ARRAY_SIZE(weights));
1172
memset(weights, 0, sizeof(weights));
1174
for (int i = 0; i < num_weights; ++i) {
1176
uint8_t v = weights_quant[i];
1185
A = (v & 0x1) ? 0x7F : 0x00;
1193
B = (v & 0x2) ? 0x45 : 0x00;
1198
B = ((v & 0x6) >> 1) | ((v & 0x6) << 4);
1205
uint16_t T = D * C + B;
1207
T = (A & 0x20) | (T >> 2);
1214
} else if (wt_quints) {
1220
A = (v & 0x1) ? 0x7F : 0x00;
1228
B = (v & 0x2) ? 0x42 : 0x00;
1235
uint16_t T = D * C + B;
1237
T = (A & 0x20) | (T >> 2);
1248
case 1: w = v ? 0x3F : 0x00; break;
1249
case 2: w = v | (v << 2) | (v << 4); break;
1250
case 3: w = v | (v << 3); break;
1251
case 4: w = (v >> 2) | (v << 2); break;
1252
case 5: w = (v >> 4) | (v << 1); break;
1253
default: unreachable("");
1263
void Block::compute_infill_weights(int block_w, int block_h, int block_d)
1265
int Ds = block_w <= 1 ? 0 : (1024 + block_w / 2) / (block_w - 1);
1266
int Dt = block_h <= 1 ? 0 : (1024 + block_h / 2) / (block_h - 1);
1267
int Dr = block_d <= 1 ? 0 : (1024 + block_d / 2) / (block_d - 1);
1268
for (int r = 0; r < block_d; ++r) {
1269
for (int t = 0; t < block_h; ++t) {
1270
for (int s = 0; s < block_w; ++s) {
1274
int gs = (cs * (wt_w - 1) + 32) >> 6;
1275
int gt = (ct * (wt_h - 1) + 32) >> 6;
1276
int gr = (cr * (wt_d - 1) + 32) >> 6;
1277
assert(gs >= 0 && gs <= 176);
1278
assert(gt >= 0 && gt <= 176);
1279
assert(gr >= 0 && gr <= 176);
1291
int w11 = (fs * ft + 8) >> 4;
1294
int w00 = 16 - fs - ft + w11;
1297
int p00, p01, p10, p11, i0, i1;
1298
int v0 = js + jt * wt_w;
1299
p00 = weights[(v0) * 2];
1300
p01 = weights[(v0 + 1) * 2];
1301
p10 = weights[(v0 + wt_w) * 2];
1302
p11 = weights[(v0 + wt_w + 1) * 2];
1303
i0 = (p00*w00 + p01*w01 + p10*w10 + p11*w11 + 8) >> 4;
1304
p00 = weights[(v0) * 2 + 1];
1305
p01 = weights[(v0 + 1) * 2 + 1];
1306
p10 = weights[(v0 + wt_w) * 2 + 1];
1307
p11 = weights[(v0 + wt_w + 1) * 2 + 1];
1308
assert((v0 + wt_w + 1) * 2 + 1 < (int)ARRAY_SIZE(weights));
1309
i1 = (p00*w00 + p01*w01 + p10*w10 + p11*w11 + 8) >> 4;
1310
assert(0 <= i0 && i0 <= 64);
1311
infill_weights[0][s + t*block_w + r*block_w*block_h] = i0;
1312
infill_weights[1][s + t*block_w + r*block_w*block_h] = i1;
1314
int p00, p01, p10, p11, i;
1315
int v0 = js + jt * wt_w;
1317
p01 = weights[v0 + 1];
1318
p10 = weights[v0 + wt_w];
1319
p11 = weights[v0 + wt_w + 1];
1320
assert(v0 + wt_w + 1 < (int)ARRAY_SIZE(weights));
1321
i = (p00*w00 + p01*w01 + p10*w10 + p11*w11 + 8) >> 4;
1322
assert(0 <= i && i <= 64);
1323
infill_weights[0][s + t*block_w + r*block_w*block_h] = i;
1330
void Block::unquantise_colour_endpoints()
1332
assert(num_cem_values <= (int)ARRAY_SIZE(colour_endpoints_quant));
1333
assert(num_cem_values <= (int)ARRAY_SIZE(colour_endpoints));
1335
for (int i = 0; i < num_cem_values; ++i) {
1336
uint8_t v = colour_endpoints_quant[i];
1339
uint16_t A, B, C, D;
1341
A = (v & 0x1) ? 0x1FF : 0x000;
1349
B = (v & 0x2) ? 0x116 : 0x000;
1354
t = ((v >> 1) & 0x3);
1355
B = t | (t << 2) | (t << 7);
1360
t = ((v >> 1) & 0x7);
1366
t = ((v >> 1) & 0xF);
1367
B = (t >> 2) | (t << 5);
1372
B = ((v & 0x3E) << 3) | ((v >> 5) & 0x1);
1379
uint16_t T = D * C + B;
1381
T = (A & 0x80) | (T >> 2);
1383
colour_endpoints[i] = T;
1384
} else if (ce_quints) {
1385
uint16_t A, B, C, D;
1387
A = (v & 0x1) ? 0x1FF : 0x000;
1395
B = (v & 0x2) ? 0x10C : 0x000;
1400
t = ((v >> 1) & 0x3);
1401
B = (t >> 1) | (t << 1) | (t << 7);
1406
t = ((v >> 1) & 0x7);
1407
B = (t >> 1) | (t << 6);
1412
t = ((v >> 1) & 0xF);
1413
B = (t >> 4) | (t << 5);
1420
uint16_t T = D * C + B;
1422
T = (A & 0x80) | (T >> 2);
1424
colour_endpoints[i] = T;
1427
case 1: v = v ? 0xFF : 0x00; break;
1428
case 2: v = (v << 6) | (v << 4) | (v << 2) | v; break;
1429
case 3: v = (v << 5) | (v << 2) | (v >> 1); break;
1430
case 4: v = (v << 4) | v; break;
1431
case 5: v = (v << 3) | (v >> 2); break;
1432
case 6: v = (v << 2) | (v >> 4); break;
1433
case 7: v = (v << 1) | (v >> 6); break;
1435
default: unreachable("");
1437
colour_endpoints[i] = v;
1442
decode_error::type Block::decode(const Decoder &decoder, InputBitVector in)
1444
decode_error::type err;
1447
bogus_colour_endpoints = false;
1448
bogus_weights = false;
1449
is_void_extent = false;
1454
/* TODO: test for all the illegal encodings */
1457
in.printf_bits(0, 128);
1459
err = decode_block_mode(in);
1460
if (err != decode_error::ok)
1464
return decode_error::ok;
1468
calculate_from_weights();
1471
printf("weights_grid=%dx%dx%d dual_plane=%d num_weights=%d high_prec=%d r=%d range=0..%d (%dt %dq %db) weight_bits=%d\n",
1472
wt_w, wt_h, wt_d, dual_plane, num_weights, high_prec, wt_range, wt_max, wt_trits, wt_quints, wt_bits, weight_bits);
1474
if (wt_w > decoder.block_w || wt_h > decoder.block_h || wt_d > decoder.block_d)
1475
return decode_error::weight_grid_exceeds_block_size;
1477
num_parts = in.get_bits(11, 2) + 1;
1480
in.printf_bits(11, 2, "partitions = %d", num_parts);
1482
if (dual_plane && num_parts > 3)
1483
return decode_error::dual_plane_and_too_many_partitions;
1488
printf("cem=[%d,%d,%d,%d] base_cem_class=%d\n", cems[0], cems[1], cems[2], cems[3], cem_base_class);
1490
int num_cem_pairs = (cem_base_class + 1) * num_parts + extra_cem_bits;
1491
num_cem_values = num_cem_pairs * 2;
1493
calculate_remaining_bits();
1494
err = calculate_colour_endpoints_size();
1495
if (err != decode_error::ok)
1499
in.printf_bits(colour_endpoint_data_offset, colour_endpoint_bits,
1500
"endpoint data (%d bits, %d vals, %dt %dq %db)",
1501
colour_endpoint_bits, num_cem_values, ce_trits, ce_quints, ce_bits);
1503
unpack_colour_endpoints(in);
1505
if (VERBOSE_DECODE) {
1506
printf("cem values raw =[");
1507
for (int i = 0; i < num_cem_values; i++) {
1510
printf("%3d", colour_endpoints_quant[i]);
1515
if (num_cem_values > 18)
1516
return decode_error::invalid_colour_endpoints_count;
1518
unquantise_colour_endpoints();
1520
if (VERBOSE_DECODE) {
1521
printf("cem values norm=[");
1522
for (int i = 0; i < num_cem_values; i++) {
1525
printf("%3d", colour_endpoints[i]);
1530
decode_colour_endpoints();
1533
int ccs_offset = 128 - weight_bits - num_extra_cem_bits - 2;
1534
colour_component_selector = in.get_bits(ccs_offset, 2);
1537
in.printf_bits(ccs_offset, 2, "colour component selector = %d", colour_component_selector);
1539
colour_component_selector = 0;
1544
in.printf_bits(128 - weight_bits, weight_bits, "weights (%d bits)", weight_bits);
1546
if (num_weights > 64)
1547
return decode_error::invalid_num_weights;
1549
if (weight_bits < 24 || weight_bits > 96)
1550
return decode_error::invalid_weight_bits;
1554
unquantise_weights();
1556
if (VERBOSE_DECODE) {
1557
printf("weights=[");
1558
for (int i = 0; i < num_weights; ++i) {
1561
printf("%d", weights[i]);
1565
for (int plane = 0; plane <= dual_plane; ++plane) {
1566
printf("weights (plane %d):\n", plane);
1570
for (int r = 0; r < wt_d; ++r) {
1571
for (int t = 0; t < wt_h; ++t) {
1572
for (int s = 0; s < wt_w; ++s) {
1573
printf("%3d", weights[i++ * (1 + dual_plane) + plane]);
1583
compute_infill_weights(decoder.block_w, decoder.block_h, decoder.block_d);
1585
if (VERBOSE_DECODE) {
1586
for (int plane = 0; plane <= dual_plane; ++plane) {
1587
printf("infilled weights (plane %d):\n", plane);
1591
for (int r = 0; r < decoder.block_d; ++r) {
1592
for (int t = 0; t < decoder.block_h; ++t) {
1593
for (int s = 0; s < decoder.block_w; ++s) {
1594
printf("%3d", infill_weights[plane][i++]);
1598
if (r < decoder.block_d - 1)
1606
return decode_error::ok;
1609
void Block::write_decoded(const Decoder &decoder, uint16_t *output)
1611
/* sRGB can only be stored as unorm8. */
1612
assert(!decoder.srgb || decoder.output_unorm8);
1614
if (is_void_extent) {
1615
for (int idx = 0; idx < decoder.block_w*decoder.block_h*decoder.block_d; ++idx) {
1616
if (decoder.output_unorm8) {
1618
output[idx*4+0] = void_extent_colour_r >> 8;
1619
output[idx*4+1] = void_extent_colour_g >> 8;
1620
output[idx*4+2] = void_extent_colour_b >> 8;
1622
output[idx*4+0] = uint16_div_64k_to_half_to_unorm8(void_extent_colour_r);
1623
output[idx*4+1] = uint16_div_64k_to_half_to_unorm8(void_extent_colour_g);
1624
output[idx*4+2] = uint16_div_64k_to_half_to_unorm8(void_extent_colour_b);
1626
output[idx*4+3] = uint16_div_64k_to_half_to_unorm8(void_extent_colour_a);
1628
/* Store the color as FP16. */
1629
output[idx*4+0] = _mesa_uint16_div_64k_to_half(void_extent_colour_r);
1630
output[idx*4+1] = _mesa_uint16_div_64k_to_half(void_extent_colour_g);
1631
output[idx*4+2] = _mesa_uint16_div_64k_to_half(void_extent_colour_b);
1632
output[idx*4+3] = _mesa_uint16_div_64k_to_half(void_extent_colour_a);
1638
int small_block = (decoder.block_w * decoder.block_h * decoder.block_d) < 31;
1641
for (int z = 0; z < decoder.block_d; ++z) {
1642
for (int y = 0; y < decoder.block_h; ++y) {
1643
for (int x = 0; x < decoder.block_w; ++x) {
1646
if (num_parts > 1) {
1647
partition = select_partition(partition_index, x, y, z, num_parts, small_block);
1648
assert(partition < num_parts);
1655
uint8x4_t e0 = endpoints_decoded[0][partition];
1656
uint8x4_t e1 = endpoints_decoded[1][partition];
1657
uint16_t c0[4], c1[4];
1659
/* Expand to 16 bits. */
1661
c0[0] = (uint16_t)((e0.v[0] << 8) | 0x80);
1662
c0[1] = (uint16_t)((e0.v[1] << 8) | 0x80);
1663
c0[2] = (uint16_t)((e0.v[2] << 8) | 0x80);
1664
c0[3] = (uint16_t)((e0.v[3] << 8) | 0x80);
1666
c1[0] = (uint16_t)((e1.v[0] << 8) | 0x80);
1667
c1[1] = (uint16_t)((e1.v[1] << 8) | 0x80);
1668
c1[2] = (uint16_t)((e1.v[2] << 8) | 0x80);
1669
c1[3] = (uint16_t)((e1.v[3] << 8) | 0x80);
1671
c0[0] = (uint16_t)((e0.v[0] << 8) | e0.v[0]);
1672
c0[1] = (uint16_t)((e0.v[1] << 8) | e0.v[1]);
1673
c0[2] = (uint16_t)((e0.v[2] << 8) | e0.v[2]);
1674
c0[3] = (uint16_t)((e0.v[3] << 8) | e0.v[3]);
1676
c1[0] = (uint16_t)((e1.v[0] << 8) | e1.v[0]);
1677
c1[1] = (uint16_t)((e1.v[1] << 8) | e1.v[1]);
1678
c1[2] = (uint16_t)((e1.v[2] << 8) | e1.v[2]);
1679
c1[3] = (uint16_t)((e1.v[3] << 8) | e1.v[3]);
1684
int w0 = infill_weights[0][idx];
1685
int w1 = infill_weights[1][idx];
1686
w[0] = w[1] = w[2] = w[3] = w0;
1687
w[colour_component_selector] = w1;
1689
int w0 = infill_weights[0][idx];
1690
w[0] = w[1] = w[2] = w[3] = w0;
1693
/* Interpolate to produce UNORM16, applying weights. */
1695
(uint16_t)((c0[0] * (64 - w[0]) + c1[0] * w[0] + 32) >> 6),
1696
(uint16_t)((c0[1] * (64 - w[1]) + c1[1] * w[1] + 32) >> 6),
1697
(uint16_t)((c0[2] * (64 - w[2]) + c1[2] * w[2] + 32) >> 6),
1698
(uint16_t)((c0[3] * (64 - w[3]) + c1[3] * w[3] + 32) >> 6),
1701
if (decoder.output_unorm8) {
1703
output[idx*4+0] = c[0] >> 8;
1704
output[idx*4+1] = c[1] >> 8;
1705
output[idx*4+2] = c[2] >> 8;
1707
output[idx*4+0] = c[0] == 65535 ? 0xff : uint16_div_64k_to_half_to_unorm8(c[0]);
1708
output[idx*4+1] = c[1] == 65535 ? 0xff : uint16_div_64k_to_half_to_unorm8(c[1]);
1709
output[idx*4+2] = c[2] == 65535 ? 0xff : uint16_div_64k_to_half_to_unorm8(c[2]);
1711
output[idx*4+3] = c[3] == 65535 ? 0xff : uint16_div_64k_to_half_to_unorm8(c[3]);
1713
/* Store the color as FP16. */
1714
output[idx*4+0] = c[0] == 65535 ? FP16_ONE : _mesa_uint16_div_64k_to_half(c[0]);
1715
output[idx*4+1] = c[1] == 65535 ? FP16_ONE : _mesa_uint16_div_64k_to_half(c[1]);
1716
output[idx*4+2] = c[2] == 65535 ? FP16_ONE : _mesa_uint16_div_64k_to_half(c[2]);
1717
output[idx*4+3] = c[3] == 65535 ? FP16_ONE : _mesa_uint16_div_64k_to_half(c[3]);
1726
void Block::calculate_from_weights()
1731
switch (high_prec) {
1734
case 0x2: wt_max = 1; wt_bits = 1; break;
1735
case 0x3: wt_max = 2; wt_trits = 1; break;
1736
case 0x4: wt_max = 3; wt_bits = 2; break;
1737
case 0x5: wt_max = 4; wt_quints = 1; break;
1738
case 0x6: wt_max = 5; wt_trits = 1; wt_bits = 1; break;
1739
case 0x7: wt_max = 7; wt_bits = 3; break;
1745
case 0x2: wt_max = 9; wt_quints = 1; wt_bits = 1; break;
1746
case 0x3: wt_max = 11; wt_trits = 1; wt_bits = 2; break;
1747
case 0x4: wt_max = 15; wt_bits = 4; break;
1748
case 0x5: wt_max = 19; wt_quints = 1; wt_bits = 2; break;
1749
case 0x6: wt_max = 23; wt_trits = 1; wt_bits = 3; break;
1750
case 0x7: wt_max = 31; wt_bits = 5; break;
1756
assert(wt_trits || wt_quints || wt_bits);
1758
num_weights = wt_w * wt_h * wt_d;
1764
(num_weights * 8 * wt_trits + 4) / 5
1765
+ (num_weights * 7 * wt_quints + 2) / 3
1766
+ num_weights * wt_bits;
1769
void Block::calculate_remaining_bits()
1772
if (num_parts > 1) {
1776
config_bits = 25 + 3 * num_parts;
1784
remaining_bits = 128 - config_bits - weight_bits;
1787
decode_error::type Block::calculate_colour_endpoints_size()
1789
/* Specified as illegal */
1790
if (remaining_bits < (13 * num_cem_values + 4) / 5) {
1791
colour_endpoint_bits = ce_max = ce_trits = ce_quints = ce_bits = 0;
1792
return decode_error::invalid_colour_endpoints_size;
1795
/* Find the largest cem_ranges that fits within remaining_bits */
1796
for (int i = ARRAY_SIZE(cem_ranges)-1; i >= 0; --i) {
1798
cem_bits = (num_cem_values * 8 * cem_ranges[i].t + 4) / 5
1799
+ (num_cem_values * 7 * cem_ranges[i].q + 2) / 3
1800
+ num_cem_values * cem_ranges[i].b;
1802
if (cem_bits <= remaining_bits)
1804
colour_endpoint_bits = cem_bits;
1805
ce_max = cem_ranges[i].max;
1806
ce_trits = cem_ranges[i].t;
1807
ce_quints = cem_ranges[i].q;
1808
ce_bits = cem_ranges[i].b;
1809
return decode_error::ok;
1814
return decode_error::invalid_colour_endpoints_size;
1818
* Decode ASTC 2D LDR texture data.
1820
* \param src_width in pixels
1821
* \param src_height in pixels
1822
* \param dst_stride in bytes
1825
_mesa_unpack_astc_2d_ldr(uint8_t *dst_row,
1826
unsigned dst_stride,
1827
const uint8_t *src_row,
1828
unsigned src_stride,
1830
unsigned src_height,
1833
assert(_mesa_is_format_astc_2d(format));
1834
bool srgb = _mesa_is_format_srgb(format);
1836
unsigned blk_w, blk_h;
1837
_mesa_get_format_block_size(format, &blk_w, &blk_h);
1839
const unsigned block_size = 16;
1840
unsigned x_blocks = (src_width + blk_w - 1) / blk_w;
1841
unsigned y_blocks = (src_height + blk_h - 1) / blk_h;
1843
Decoder dec(blk_w, blk_h, 1, srgb, true);
1845
for (unsigned y = 0; y < y_blocks; ++y) {
1846
for (unsigned x = 0; x < x_blocks; ++x) {
1847
/* Same size as the largest block. */
1848
uint16_t block_out[12 * 12 * 4];
1850
dec.decode(src_row + x * block_size, block_out);
1852
/* This can be smaller with NPOT dimensions. */
1853
unsigned dst_blk_w = MIN2(blk_w, src_width - x*blk_w);
1854
unsigned dst_blk_h = MIN2(blk_h, src_height - y*blk_h);
1856
for (unsigned sub_y = 0; sub_y < dst_blk_h; ++sub_y) {
1857
for (unsigned sub_x = 0; sub_x < dst_blk_w; ++sub_x) {
1858
uint8_t *dst = dst_row + sub_y * dst_stride +
1859
(x * blk_w + sub_x) * 4;
1860
const uint16_t *src = &block_out[(sub_y * blk_w + sub_x) * 4];
1869
src_row += src_stride;
1870
dst_row += dst_stride * blk_h;