26
26
**************************************************************************/
29
#include "pipe/p_defines.h"
29
31
#include "util/u_format.h"
32
#include "util/u_memory.h"
33
#include "util/u_string.h"
31
35
#include "lp_bld_type.h"
32
36
#include "lp_bld_const.h"
33
37
#include "lp_bld_conv.h"
38
#include "lp_bld_swizzle.h"
39
#include "lp_bld_gather.h"
34
40
#include "lp_bld_format.h"
38
lp_build_format_swizzle_chan_soa(struct lp_type type,
39
const LLVMValueRef *unswizzled,
40
enum util_format_swizzle swizzle)
43
case UTIL_FORMAT_SWIZZLE_X:
44
case UTIL_FORMAT_SWIZZLE_Y:
45
case UTIL_FORMAT_SWIZZLE_Z:
46
case UTIL_FORMAT_SWIZZLE_W:
47
return unswizzled[swizzle];
48
case UTIL_FORMAT_SWIZZLE_0:
49
return lp_build_zero(type);
50
case UTIL_FORMAT_SWIZZLE_1:
51
return lp_build_one(type);
52
case UTIL_FORMAT_SWIZZLE_NONE:
53
return lp_build_undef(type);
56
return lp_build_undef(type);
62
44
lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
45
struct lp_build_context *bld,
64
46
const LLVMValueRef *unswizzled,
65
LLVMValueRef *swizzled)
47
LLVMValueRef swizzled_out[4])
67
if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
49
assert(UTIL_FORMAT_SWIZZLE_0 == PIPE_SWIZZLE_ZERO);
50
assert(UTIL_FORMAT_SWIZZLE_1 == PIPE_SWIZZLE_ONE);
52
if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
54
* Return zzz1 for depth-stencil formats.
56
* XXX: Allow to control the depth swizzle with an additional parameter,
57
* as the caller may wish another depth swizzle, or retain the stencil
68
60
enum util_format_swizzle swizzle = format_desc->swizzle[0];
69
LLVMValueRef depth = lp_build_format_swizzle_chan_soa(type, unswizzled, swizzle);
70
swizzled[2] = swizzled[1] = swizzled[0] = depth;
71
swizzled[3] = lp_build_one(type);
61
LLVMValueRef depth = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
62
swizzled_out[2] = swizzled_out[1] = swizzled_out[0] = depth;
63
swizzled_out[3] = bld->one;
75
67
for (chan = 0; chan < 4; ++chan) {
76
68
enum util_format_swizzle swizzle = format_desc->swizzle[chan];
77
swizzled[chan] = lp_build_format_swizzle_chan_soa(type, unswizzled, swizzle);
69
swizzled_out[chan] = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
76
* Unpack several pixels in SoA.
78
* It takes a vector of packed pixels:
80
* packed = {P0, P1, P2, P3, ..., Pn}
82
* And will produce four vectors:
84
* red = {R0, R1, R2, R3, ..., Rn}
85
* green = {G0, G1, G2, G3, ..., Gn}
86
* blue = {B0, B1, B2, B3, ..., Bn}
87
* alpha = {A0, A1, A2, A3, ..., An}
89
* It requires that a packed pixel fits into an element of the output
90
* channels. The common case is when converting pixel with a depth of 32 bit or
93
* \param format_desc the format of the 'packed' incoming pixel vector
94
* \param type the desired type for rgba_out (type.length = n, above)
95
* \param packed the incoming vector of packed pixels
96
* \param rgba_out returns the SoA R,G,B,A vectors
84
99
lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
85
100
const struct util_format_description *format_desc,
86
101
struct lp_type type,
87
102
LLVMValueRef packed,
103
LLVMValueRef rgba_out[4])
105
struct lp_build_context bld;
90
106
LLVMValueRef inputs[4];
94
/* FIXME: Support more formats */
95
110
assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
96
111
assert(format_desc->block.width == 1);
97
112
assert(format_desc->block.height == 1);
98
assert(format_desc->block.bits <= 32);
113
assert(format_desc->block.bits <= type.width);
114
/* FIXME: Support more output types */
115
assert(type.floating);
116
assert(type.width == 32);
118
lp_build_context_init(&bld, builder, type);
100
120
/* Decode the input vector components */
102
for (chan = 0; chan < 4; ++chan) {
103
unsigned width = format_desc->channel[chan].size;
104
unsigned stop = start + width;
122
for (chan = 0; chan < format_desc->nr_channels; ++chan) {
123
const unsigned width = format_desc->channel[chan].size;
124
const unsigned stop = start + width;
105
125
LLVMValueRef input;
109
129
switch(format_desc->channel[chan].type) {
110
130
case UTIL_FORMAT_TYPE_VOID:
131
input = lp_build_undef(type);
114
134
case UTIL_FORMAT_TYPE_UNSIGNED:
117
input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(type, start), "");
118
if(stop < format_desc->block.bits) {
119
unsigned mask = ((unsigned long long)1 << width) - 1;
120
input = LLVMBuildAnd(builder, input, lp_build_int_const_scalar(type, mask), "");
140
input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(type, start), "");
147
if (stop < format_desc->block.bits) {
148
unsigned mask = ((unsigned long long)1 << width) - 1;
149
input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(type, mask), "");
123
157
if(format_desc->channel[chan].normalized)
124
158
input = lp_build_unsigned_norm_to_float(builder, width, type, input);
126
input = LLVMBuildFPToSI(builder, input, lp_build_vec_type(type), "");
160
input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), "");
165
input = lp_build_undef(type);
170
case UTIL_FORMAT_TYPE_SIGNED:
172
* Align the sign bit first.
175
if (stop < type.width) {
176
unsigned bits = type.width - stop;
177
LLVMValueRef bits_val = lp_build_const_int_vec(type, bits);
178
input = LLVMBuildShl(builder, input, bits_val, "");
182
* Align the LSB (with an arithmetic shift to preserve the sign)
185
if (format_desc->channel[chan].size < type.width) {
186
unsigned bits = type.width - format_desc->channel[chan].size;
187
LLVMValueRef bits_val = lp_build_const_int_vec(type, bits);
188
input = LLVMBuildAShr(builder, input, bits_val, "");
196
input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), "");
197
if (format_desc->channel[chan].normalized) {
198
double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1);
199
LLVMValueRef scale_val = lp_build_const_vec(type, scale);
200
input = LLVMBuildFMul(builder, input, scale_val, "");
206
input = lp_build_undef(type);
211
case UTIL_FORMAT_TYPE_FLOAT:
215
assert(type.width == 32);
216
input = LLVMBuildBitCast(builder, input, lp_build_vec_type(type), "");
221
input = lp_build_undef(type);
225
case UTIL_FORMAT_TYPE_FIXED:
227
double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1);
228
LLVMValueRef scale_val = lp_build_const_vec(type, scale);
229
input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), "");
230
input = LLVMBuildFMul(builder, input, scale_val, "");
146
lp_build_format_swizzle_soa(format_desc, type, inputs, rgba);
250
lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba_out);
255
lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder,
256
struct lp_type dst_type,
260
LLVMValueRef mask = lp_build_const_int_vec(dst_type, 0xff);
263
packed = LLVMBuildBitCast(builder, packed,
264
lp_build_int_vec_type(dst_type), "");
266
/* Decode the input vector components */
267
for (chan = 0; chan < 4; ++chan) {
268
unsigned start = chan*8;
269
unsigned stop = start + 8;
275
input = LLVMBuildLShr(builder, input,
276
lp_build_const_int_vec(dst_type, start), "");
279
input = LLVMBuildAnd(builder, input, mask, "");
281
input = lp_build_unsigned_norm_to_float(builder, 8, dst_type, input);
290
* Fetch a texels from a texture, returning them in SoA layout.
292
* \param type the desired return type for 'rgba'. The vector length
293
* is the number of texels to fetch
295
* \param base_ptr points to start of the texture image block. For non-
296
* compressed formats, this simply points to the texel.
297
* For compressed formats, it points to the start of the
298
* compressed data block.
300
* \param i, j the sub-block pixel coordinates. For non-compressed formats
301
* these will always be (0,0). For compressed formats, i will
302
* be in [0, block_width-1] and j will be in [0, block_height-1].
305
lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
306
const struct util_format_description *format_desc,
308
LLVMValueRef base_ptr,
312
LLVMValueRef rgba_out[4])
315
if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
316
(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
317
format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
318
format_desc->block.width == 1 &&
319
format_desc->block.height == 1 &&
320
format_desc->block.bits <= type.width &&
321
(format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT ||
322
format_desc->channel[0].size == 32))
325
* The packed pixel fits into an element of the destination format. Put
326
* the packed pixels into a vector and extract each component for all
327
* vector elements in parallel.
333
* gather the texels from the texture
334
* Ex: packed = {BGRA, BGRA, BGRA, BGRA}.
336
packed = lp_build_gather(builder,
338
format_desc->block.bits,
343
* convert texels to float rgba
345
lp_build_unpack_rgba_soa(builder,
353
* Try calling lp_build_fetch_rgba_aos for all pixels.
356
if (util_format_fits_8unorm(format_desc) &&
357
type.floating && type.width == 32 && type.length == 4) {
358
struct lp_type tmp_type;
361
memset(&tmp_type, 0, sizeof tmp_type);
363
tmp_type.length = type.length * 4;
364
tmp_type.norm = TRUE;
366
tmp = lp_build_fetch_rgba_aos(builder, format_desc, tmp_type,
367
base_ptr, offset, i, j);
369
lp_build_rgba8_to_f32_soa(builder,
378
* Fallback to calling lp_build_fetch_rgba_aos for each pixel.
380
* This is not the most efficient way of fetching pixels, as we
381
* miss some opportunities to do vectorization, but this is
382
* convenient for formats or scenarios for which there was no
383
* opportunity or incentive to optimize.
388
struct lp_type tmp_type;
393
for (chan = 0; chan < 4; ++chan) {
394
rgba_out[chan] = lp_build_undef(type);
397
/* loop over number of pixels */
398
for(k = 0; k < type.length; ++k) {
399
LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0);
400
LLVMValueRef offset_elem;
401
LLVMValueRef i_elem, j_elem;
404
offset_elem = LLVMBuildExtractElement(builder, offset, index, "");
406
i_elem = LLVMBuildExtractElement(builder, i, index, "");
407
j_elem = LLVMBuildExtractElement(builder, j, index, "");
409
/* Get a single float[4]={R,G,B,A} pixel */
410
tmp = lp_build_fetch_rgba_aos(builder, format_desc, tmp_type,
411
base_ptr, offset_elem,
415
* Insert the AoS tmp value channels into the SoA result vectors at
416
* position = 'index'.
418
for (chan = 0; chan < 4; ++chan) {
419
LLVMValueRef chan_val = LLVMConstInt(LLVMInt32Type(), chan, 0),
420
tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, "");
421
rgba_out[chan] = LLVMBuildInsertElement(builder, rgba_out[chan],
422
tmp_chan, index, "");