1
/**************************************************************************
3
* Copyright 2010 VMware, Inc.
6
* Permission is hereby granted, free of charge, to any person obtaining a
7
* copy of this software and associated documentation files (the
8
* "Software"), to deal in the Software without restriction, including
9
* without limitation the rights to use, copy, modify, merge, publish,
10
* distribute, sub license, and/or sell copies of the Software, and to
11
* permit persons to whom the Software is furnished to do so, subject to
12
* the following conditions:
14
* The above copyright notice and this permission notice (including the
15
* next paragraph) shall be included in all copies or substantial portions
18
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
**************************************************************************/
30
* Texture sampling -- AoS.
32
* @author Jose Fonseca <jfonseca@vmware.com>
33
* @author Brian Paul <brianp@vmware.com>
36
#include "pipe/p_defines.h"
37
#include "pipe/p_state.h"
38
#include "util/u_debug.h"
39
#include "util/u_dump.h"
40
#include "util/u_memory.h"
41
#include "util/u_math.h"
42
#include "util/format/u_format.h"
43
#include "util/u_cpu_detect.h"
44
#include "lp_bld_debug.h"
45
#include "lp_bld_type.h"
46
#include "lp_bld_const.h"
47
#include "lp_bld_conv.h"
48
#include "lp_bld_arit.h"
49
#include "lp_bld_bitarit.h"
50
#include "lp_bld_logic.h"
51
#include "lp_bld_swizzle.h"
52
#include "lp_bld_pack.h"
53
#include "lp_bld_flow.h"
54
#include "lp_bld_gather.h"
55
#include "lp_bld_format.h"
56
#include "lp_bld_init.h"
57
#include "lp_bld_sample.h"
58
#include "lp_bld_sample_aos.h"
59
#include "lp_bld_quad.h"
63
* Build LLVM code for texture coord wrapping, for nearest filtering,
64
* for scaled integer texcoords.
65
* \param block_length is the length of the pixel block along the
67
* \param coord the incoming texcoord (s,t or r) scaled to the texture size
68
* \param coord_f the incoming texcoord (s,t or r) as float vec
69
* \param length the texture size along one dimension
70
* \param stride pixel stride along the coordinate axis (in bytes)
71
* \param offset the texel offset along the coord axis
72
* \param is_pot if TRUE, length is a power of two
73
* \param wrap_mode one of PIPE_TEX_WRAP_x
74
* \param out_offset byte offset for the wrapped coordinate
75
* \param out_i resulting sub-block pixel coordinate for coord0
78
lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
79
unsigned block_length,
87
LLVMValueRef *out_offset,
90
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
91
LLVMBuilderRef builder = bld->gallivm->builder;
92
LLVMValueRef length_minus_one;
94
length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
97
case PIPE_TEX_WRAP_REPEAT:
99
coord = LLVMBuildAnd(builder, coord, length_minus_one, "");
101
struct lp_build_context *coord_bld = &bld->coord_bld;
102
LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
104
offset = lp_build_int_to_float(coord_bld, offset);
105
offset = lp_build_div(coord_bld, offset, length_f);
106
coord_f = lp_build_add(coord_bld, coord_f, offset);
108
coord = lp_build_fract_safe(coord_bld, coord_f);
109
coord = lp_build_mul(coord_bld, coord, length_f);
110
coord = lp_build_itrunc(coord_bld, coord);
114
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
115
coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
116
coord = lp_build_min(int_coord_bld, coord, length_minus_one);
119
case PIPE_TEX_WRAP_CLAMP:
120
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
121
case PIPE_TEX_WRAP_MIRROR_REPEAT:
122
case PIPE_TEX_WRAP_MIRROR_CLAMP:
123
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
124
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
129
lp_build_sample_partial_offset(int_coord_bld, block_length, coord, stride,
135
* Helper to compute the first coord and the weight for
136
* linear wrap repeat npot textures
139
lp_build_coord_repeat_npot_linear_int(struct lp_build_sample_context *bld,
140
LLVMValueRef coord_f,
141
LLVMValueRef length_i,
142
LLVMValueRef length_f,
143
LLVMValueRef *coord0_i,
144
LLVMValueRef *weight_i)
146
struct lp_build_context *coord_bld = &bld->coord_bld;
147
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
148
struct lp_build_context abs_coord_bld;
149
struct lp_type abs_type;
150
LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length_i,
152
LLVMValueRef mask, i32_c8, i32_c128, i32_c255;
154
/* wrap with normalized floats is just fract */
155
coord_f = lp_build_fract(coord_bld, coord_f);
157
coord_f = lp_build_mul(coord_bld, coord_f, length_f);
158
/* convert to int, compute lerp weight */
159
coord_f = lp_build_mul_imm(&bld->coord_bld, coord_f, 256);
161
/* At this point we don't have any negative numbers so use non-signed
162
* build context which might help on some archs.
164
abs_type = coord_bld->type;
166
lp_build_context_init(&abs_coord_bld, bld->gallivm, abs_type);
167
*coord0_i = lp_build_iround(&abs_coord_bld, coord_f);
169
/* subtract 0.5 (add -128) */
170
i32_c128 = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, -128);
171
*coord0_i = LLVMBuildAdd(bld->gallivm->builder, *coord0_i, i32_c128, "");
173
/* compute fractional part (AND with 0xff) */
174
i32_c255 = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, 255);
175
*weight_i = LLVMBuildAnd(bld->gallivm->builder, *coord0_i, i32_c255, "");
177
/* compute floor (shift right 8) */
178
i32_c8 = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, 8);
179
*coord0_i = LLVMBuildAShr(bld->gallivm->builder, *coord0_i, i32_c8, "");
181
* we avoided the 0.5/length division before the repeat wrap,
182
* now need to fix up edge cases with selects
184
mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
185
PIPE_FUNC_LESS, *coord0_i, int_coord_bld->zero);
186
*coord0_i = lp_build_select(int_coord_bld, mask, length_minus_one, *coord0_i);
188
* We should never get values too large - except if coord was nan or inf,
189
* in which case things go terribly wrong...
190
* Alternatively, could use fract_safe above...
192
*coord0_i = lp_build_min(int_coord_bld, *coord0_i, length_minus_one);
197
* Build LLVM code for texture coord wrapping, for linear filtering,
198
* for scaled integer texcoords.
199
* \param block_length is the length of the pixel block along the
201
* \param coord0 the incoming texcoord (s,t or r) scaled to the texture size
202
* \param coord_f the incoming texcoord (s,t or r) as float vec
203
* \param length the texture size along one dimension
204
* \param stride pixel stride along the coordinate axis (in bytes)
205
* \param offset the texel offset along the coord axis
206
* \param is_pot if TRUE, length is a power of two
207
* \param wrap_mode one of PIPE_TEX_WRAP_x
208
* \param offset0 resulting relative offset for coord0
209
* \param offset1 resulting relative offset for coord0 + 1
210
* \param i0 resulting sub-block pixel coordinate for coord0
211
* \param i1 resulting sub-block pixel coordinate for coord0 + 1
214
lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
215
unsigned block_length,
217
LLVMValueRef *weight_i,
218
LLVMValueRef coord_f,
224
LLVMValueRef *offset0,
225
LLVMValueRef *offset1,
229
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
230
LLVMBuilderRef builder = bld->gallivm->builder;
231
LLVMValueRef length_minus_one;
232
LLVMValueRef lmask, umask, mask;
235
* If the pixel block covers more than one pixel then there is no easy
236
* way to calculate offset1 relative to offset0. Instead, compute them
237
* independently. Otherwise, try to compute offset0 and offset1 with
238
* a single stride multiplication.
241
length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
243
if (block_length != 1) {
246
case PIPE_TEX_WRAP_REPEAT:
248
coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
249
coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
250
coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, "");
254
LLVMValueRef length_f = lp_build_int_to_float(&bld->coord_bld, length);
256
offset = lp_build_int_to_float(&bld->coord_bld, offset);
257
offset = lp_build_div(&bld->coord_bld, offset, length_f);
258
coord_f = lp_build_add(&bld->coord_bld, coord_f, offset);
260
lp_build_coord_repeat_npot_linear_int(bld, coord_f,
263
mask = lp_build_compare(bld->gallivm, int_coord_bld->type,
264
PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
265
coord1 = LLVMBuildAnd(builder,
266
lp_build_add(int_coord_bld, coord0,
272
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
273
coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
274
coord0 = lp_build_clamp(int_coord_bld, coord0, int_coord_bld->zero,
276
coord1 = lp_build_clamp(int_coord_bld, coord1, int_coord_bld->zero,
280
case PIPE_TEX_WRAP_CLAMP:
281
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
282
case PIPE_TEX_WRAP_MIRROR_REPEAT:
283
case PIPE_TEX_WRAP_MIRROR_CLAMP:
284
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
285
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
288
coord0 = int_coord_bld->zero;
289
coord1 = int_coord_bld->zero;
292
lp_build_sample_partial_offset(int_coord_bld, block_length, coord0, stride,
294
lp_build_sample_partial_offset(int_coord_bld, block_length, coord1, stride,
299
*i0 = int_coord_bld->zero;
300
*i1 = int_coord_bld->zero;
303
case PIPE_TEX_WRAP_REPEAT:
305
coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
308
LLVMValueRef length_f = lp_build_int_to_float(&bld->coord_bld, length);
310
offset = lp_build_int_to_float(&bld->coord_bld, offset);
311
offset = lp_build_div(&bld->coord_bld, offset, length_f);
312
coord_f = lp_build_add(&bld->coord_bld, coord_f, offset);
314
lp_build_coord_repeat_npot_linear_int(bld, coord_f,
319
mask = lp_build_compare(bld->gallivm, int_coord_bld->type,
320
PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
322
*offset0 = lp_build_mul(int_coord_bld, coord0, stride);
323
*offset1 = LLVMBuildAnd(builder,
324
lp_build_add(int_coord_bld, *offset0, stride),
328
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
329
/* XXX this might be slower than the separate path
330
* on some newer cpus. With sse41 this is 8 instructions vs. 7
331
* - at least on SNB this is almost certainly slower since
332
* min/max are cheaper than selects, and the muls aren't bad.
334
lmask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
335
PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero);
336
umask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
337
PIPE_FUNC_LESS, coord0, length_minus_one);
339
coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero);
340
coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one);
342
mask = LLVMBuildAnd(builder, lmask, umask, "");
344
*offset0 = lp_build_mul(int_coord_bld, coord0, stride);
345
*offset1 = lp_build_add(int_coord_bld,
347
LLVMBuildAnd(builder, stride, mask, ""));
350
case PIPE_TEX_WRAP_CLAMP:
351
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
352
case PIPE_TEX_WRAP_MIRROR_REPEAT:
353
case PIPE_TEX_WRAP_MIRROR_CLAMP:
354
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
355
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
358
*offset0 = int_coord_bld->zero;
359
*offset1 = int_coord_bld->zero;
366
* Fetch texels for image with nearest sampling.
367
* Return filtered color as two vectors of 16-bit fixed point values.
370
lp_build_sample_fetch_image_nearest(struct lp_build_sample_context *bld,
371
LLVMValueRef data_ptr,
373
LLVMValueRef x_subcoord,
374
LLVMValueRef y_subcoord,
375
LLVMValueRef *colors)
378
* Fetch the pixels as 4 x 32bit (rgba order might differ):
380
* rgba0 rgba1 rgba2 rgba3
382
* bit cast them into 16 x u8
384
* r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
386
* unpack them into two 8 x i16:
388
* r0 g0 b0 a0 r1 g1 b1 a1
389
* r2 g2 b2 a2 r3 g3 b3 a3
391
* The higher 8 bits of the resulting elements will be zero.
393
LLVMBuilderRef builder = bld->gallivm->builder;
395
struct lp_build_context u8n;
396
LLVMTypeRef u8n_vec_type;
397
struct lp_type fetch_type;
399
lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width));
400
u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
402
fetch_type = lp_type_uint(bld->texel_type.width);
403
if (util_format_is_rgba8_variant(bld->format_desc)) {
405
* Given the format is a rgba8, just read the pixels as is,
406
* without any swizzling. Swizzling will be done later.
408
rgba8 = lp_build_gather(bld->gallivm,
409
bld->texel_type.length,
410
bld->format_desc->block.bits,
413
data_ptr, offset, TRUE);
415
rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
418
rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
433
* Sample a single texture image with nearest sampling.
434
* If sampling a cube texture, r = cube face in [0,5].
435
* Return filtered color as two vectors of 16-bit fixed point values.
438
lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
439
LLVMValueRef int_size,
440
LLVMValueRef row_stride_vec,
441
LLVMValueRef img_stride_vec,
442
LLVMValueRef data_ptr,
443
LLVMValueRef mipoffsets,
447
const LLVMValueRef *offsets,
448
LLVMValueRef *colors)
450
const unsigned dims = bld->dims;
451
struct lp_build_context i32;
452
LLVMValueRef width_vec, height_vec, depth_vec;
453
LLVMValueRef s_ipart, t_ipart = NULL, r_ipart = NULL;
454
LLVMValueRef s_float, t_float = NULL, r_float = NULL;
455
LLVMValueRef x_stride;
456
LLVMValueRef x_offset, offset;
457
LLVMValueRef x_subcoord, y_subcoord = NULL, z_subcoord;
459
lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32, bld->vector_width));
461
lp_build_extract_image_sizes(bld,
469
s_float = s; t_float = t; r_float = r;
471
if (bld->static_sampler_state->normalized_coords) {
472
LLVMValueRef flt_size;
474
flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size);
476
lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
479
/* convert float to int */
480
/* For correct rounding, need floor, not truncation here.
481
* Note that in some cases (clamp to edge, no texel offsets) we
482
* could use a non-signed build context which would help archs
483
* greatly which don't have arch rounding.
485
s_ipart = lp_build_ifloor(&bld->coord_bld, s);
487
t_ipart = lp_build_ifloor(&bld->coord_bld, t);
489
r_ipart = lp_build_ifloor(&bld->coord_bld, r);
491
/* add texel offsets */
493
s_ipart = lp_build_add(&i32, s_ipart, offsets[0]);
495
t_ipart = lp_build_add(&i32, t_ipart, offsets[1]);
497
r_ipart = lp_build_add(&i32, r_ipart, offsets[2]);
502
/* get pixel, row, image strides */
503
x_stride = lp_build_const_vec(bld->gallivm,
504
bld->int_coord_bld.type,
505
bld->format_desc->block.bits/8);
507
/* Do texcoord wrapping, compute texel offset */
508
lp_build_sample_wrap_nearest_int(bld,
509
bld->format_desc->block.width,
511
width_vec, x_stride, offsets[0],
512
bld->static_texture_state->pot_width,
513
bld->static_sampler_state->wrap_s,
514
&x_offset, &x_subcoord);
517
LLVMValueRef y_offset;
518
lp_build_sample_wrap_nearest_int(bld,
519
bld->format_desc->block.height,
521
height_vec, row_stride_vec, offsets[1],
522
bld->static_texture_state->pot_height,
523
bld->static_sampler_state->wrap_t,
524
&y_offset, &y_subcoord);
525
offset = lp_build_add(&bld->int_coord_bld, offset, y_offset);
527
LLVMValueRef z_offset;
528
lp_build_sample_wrap_nearest_int(bld,
529
1, /* block length (depth) */
531
depth_vec, img_stride_vec, offsets[2],
532
bld->static_texture_state->pot_depth,
533
bld->static_sampler_state->wrap_r,
534
&z_offset, &z_subcoord);
535
offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
538
if (has_layer_coord(bld->static_texture_state->target)) {
539
LLVMValueRef z_offset;
540
/* The r coord is the cube face in [0,5] or array layer */
541
z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
542
offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
545
offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
548
lp_build_sample_fetch_image_nearest(bld, data_ptr, offset,
549
x_subcoord, y_subcoord,
555
* Fetch texels for image with linear sampling.
556
* Return filtered color as two vectors of 16-bit fixed point values.
559
lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld,
560
LLVMValueRef data_ptr,
561
LLVMValueRef offset[2][2][2],
562
LLVMValueRef x_subcoord[2],
563
LLVMValueRef y_subcoord[2],
564
LLVMValueRef s_fpart,
565
LLVMValueRef t_fpart,
566
LLVMValueRef r_fpart,
567
LLVMValueRef *colors)
569
const unsigned dims = bld->dims;
570
LLVMBuilderRef builder = bld->gallivm->builder;
571
struct lp_build_context u8n;
572
LLVMTypeRef u8n_vec_type;
573
LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
574
LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
575
LLVMValueRef shuffle;
576
LLVMValueRef neighbors[2][2][2]; /* [z][y][x] */
581
lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width));
582
u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
585
* Transform 4 x i32 in
587
* s_fpart = {s0, s1, s2, s3}
589
* where each value is between 0 and 0xff,
593
* s_fpart = {s0, s0, s0, s0, s1, s1, s1, s1, s2, s2, s2, s2, s3, s3, s3, s3}
595
* and likewise for t_fpart. There is no risk of loosing precision here
596
* since the fractional parts only use the lower 8bits.
598
s_fpart = LLVMBuildBitCast(builder, s_fpart, u8n_vec_type, "");
600
t_fpart = LLVMBuildBitCast(builder, t_fpart, u8n_vec_type, "");
602
r_fpart = LLVMBuildBitCast(builder, r_fpart, u8n_vec_type, "");
604
for (j = 0; j < u8n.type.length; j += 4) {
605
#if UTIL_ARCH_LITTLE_ENDIAN
606
unsigned subindex = 0;
608
unsigned subindex = 3;
612
index = LLVMConstInt(elem_type, j + subindex, 0);
613
for (i = 0; i < 4; ++i)
614
shuffles[j + i] = index;
617
shuffle = LLVMConstVector(shuffles, u8n.type.length);
619
s_fpart = LLVMBuildShuffleVector(builder, s_fpart, u8n.undef,
622
t_fpart = LLVMBuildShuffleVector(builder, t_fpart, u8n.undef,
626
r_fpart = LLVMBuildShuffleVector(builder, r_fpart, u8n.undef,
631
* Fetch the pixels as 4 x 32bit (rgba order might differ):
633
* rgba0 rgba1 rgba2 rgba3
635
* bit cast them into 16 x u8
637
* r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
639
* unpack them into two 8 x i16:
641
* r0 g0 b0 a0 r1 g1 b1 a1
642
* r2 g2 b2 a2 r3 g3 b3 a3
644
* The higher 8 bits of the resulting elements will be zero.
646
numj = 1 + (dims >= 2);
647
numk = 1 + (dims >= 3);
649
for (k = 0; k < numk; k++) {
650
for (j = 0; j < numj; j++) {
651
for (i = 0; i < 2; i++) {
654
if (util_format_is_rgba8_variant(bld->format_desc)) {
655
struct lp_type fetch_type;
657
* Given the format is a rgba8, just read the pixels as is,
658
* without any swizzling. Swizzling will be done later.
660
fetch_type = lp_type_uint(bld->texel_type.width);
661
rgba8 = lp_build_gather(bld->gallivm,
662
bld->texel_type.length,
663
bld->format_desc->block.bits,
666
data_ptr, offset[k][j][i], TRUE);
668
rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
671
rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
675
data_ptr, offset[k][j][i],
681
neighbors[k][j][i] = rgba8;
687
* Linear interpolation with 8.8 fixed point.
690
/* general 1/2/3-D lerping */
692
lp_build_reduce_filter(&u8n,
693
bld->static_sampler_state->reduction_mode,
694
LP_BLD_LERP_PRESCALED_WEIGHTS,
700
} else if (dims == 2) {
702
lp_build_reduce_filter_2d(&u8n,
703
bld->static_sampler_state->reduction_mode,
704
LP_BLD_LERP_PRESCALED_WEIGHTS,
715
lp_build_reduce_filter_3d(&u8n,
716
bld->static_sampler_state->reduction_mode,
717
LP_BLD_LERP_PRESCALED_WEIGHTS,
719
s_fpart, t_fpart, r_fpart,
735
* Sample a single texture image with (bi-)(tri-)linear sampling.
736
* Return filtered color as two vectors of 16-bit fixed point values.
739
lp_build_sample_image_linear(struct lp_build_sample_context *bld,
740
LLVMValueRef int_size,
741
LLVMValueRef row_stride_vec,
742
LLVMValueRef img_stride_vec,
743
LLVMValueRef data_ptr,
744
LLVMValueRef mipoffsets,
748
const LLVMValueRef *offsets,
749
LLVMValueRef *colors)
751
const unsigned dims = bld->dims;
752
LLVMBuilderRef builder = bld->gallivm->builder;
753
struct lp_build_context i32;
754
LLVMValueRef i32_c8, i32_c128, i32_c255;
755
LLVMValueRef width_vec, height_vec, depth_vec;
756
LLVMValueRef s_ipart, s_fpart, s_float;
757
LLVMValueRef t_ipart = NULL, t_fpart = NULL, t_float = NULL;
758
LLVMValueRef r_ipart = NULL, r_fpart = NULL, r_float = NULL;
759
LLVMValueRef x_stride, y_stride, z_stride;
760
LLVMValueRef x_offset0, x_offset1;
761
LLVMValueRef y_offset0, y_offset1;
762
LLVMValueRef z_offset0, z_offset1;
763
LLVMValueRef offset[2][2][2]; /* [z][y][x] */
764
LLVMValueRef x_subcoord[2], y_subcoord[2] = {NULL, NULL}, z_subcoord[2];
767
lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32, bld->vector_width));
769
lp_build_extract_image_sizes(bld,
777
s_float = s; t_float = t; r_float = r;
779
if (bld->static_sampler_state->normalized_coords) {
780
LLVMValueRef scaled_size;
781
LLVMValueRef flt_size;
783
/* scale size by 256 (8 fractional bits) */
784
scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);
786
flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);
788
lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
791
/* scale coords by 256 (8 fractional bits) */
792
s = lp_build_mul_imm(&bld->coord_bld, s, 256);
794
t = lp_build_mul_imm(&bld->coord_bld, t, 256);
796
r = lp_build_mul_imm(&bld->coord_bld, r, 256);
799
/* convert float to int */
800
/* For correct rounding, need round to nearest, not truncation here.
801
* Note that in some cases (clamp to edge, no texel offsets) we
802
* could use a non-signed build context which would help archs which
803
* don't have fptosi intrinsic with nearest rounding implemented.
805
s = lp_build_iround(&bld->coord_bld, s);
807
t = lp_build_iround(&bld->coord_bld, t);
809
r = lp_build_iround(&bld->coord_bld, r);
811
/* subtract 0.5 (add -128) */
812
i32_c128 = lp_build_const_int_vec(bld->gallivm, i32.type, -128);
814
s = LLVMBuildAdd(builder, s, i32_c128, "");
816
t = LLVMBuildAdd(builder, t, i32_c128, "");
819
r = LLVMBuildAdd(builder, r, i32_c128, "");
822
/* compute floor (shift right 8) */
823
i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8);
824
s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
826
t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
828
r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
830
/* add texel offsets */
832
s_ipart = lp_build_add(&i32, s_ipart, offsets[0]);
834
t_ipart = lp_build_add(&i32, t_ipart, offsets[1]);
836
r_ipart = lp_build_add(&i32, r_ipart, offsets[2]);
841
/* compute fractional part (AND with 0xff) */
842
i32_c255 = lp_build_const_int_vec(bld->gallivm, i32.type, 255);
843
s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
845
t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
847
r_fpart = LLVMBuildAnd(builder, r, i32_c255, "");
849
/* get pixel, row and image strides */
850
x_stride = lp_build_const_vec(bld->gallivm, bld->int_coord_bld.type,
851
bld->format_desc->block.bits/8);
852
y_stride = row_stride_vec;
853
z_stride = img_stride_vec;
855
/* do texcoord wrapping and compute texel offsets */
856
lp_build_sample_wrap_linear_int(bld,
857
bld->format_desc->block.width,
858
s_ipart, &s_fpart, s_float,
859
width_vec, x_stride, offsets[0],
860
bld->static_texture_state->pot_width,
861
bld->static_sampler_state->wrap_s,
862
&x_offset0, &x_offset1,
863
&x_subcoord[0], &x_subcoord[1]);
865
/* add potential cube/array/mip offsets now as they are constant per pixel */
866
if (has_layer_coord(bld->static_texture_state->target)) {
867
LLVMValueRef z_offset;
868
z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
869
/* The r coord is the cube face in [0,5] or array layer */
870
x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, z_offset);
871
x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, z_offset);
874
x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, mipoffsets);
875
x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, mipoffsets);
878
for (z = 0; z < 2; z++) {
879
for (y = 0; y < 2; y++) {
880
offset[z][y][0] = x_offset0;
881
offset[z][y][1] = x_offset1;
886
lp_build_sample_wrap_linear_int(bld,
887
bld->format_desc->block.height,
888
t_ipart, &t_fpart, t_float,
889
height_vec, y_stride, offsets[1],
890
bld->static_texture_state->pot_height,
891
bld->static_sampler_state->wrap_t,
892
&y_offset0, &y_offset1,
893
&y_subcoord[0], &y_subcoord[1]);
895
for (z = 0; z < 2; z++) {
896
for (x = 0; x < 2; x++) {
897
offset[z][0][x] = lp_build_add(&bld->int_coord_bld,
898
offset[z][0][x], y_offset0);
899
offset[z][1][x] = lp_build_add(&bld->int_coord_bld,
900
offset[z][1][x], y_offset1);
906
lp_build_sample_wrap_linear_int(bld,
907
1, /* block length (depth) */
908
r_ipart, &r_fpart, r_float,
909
depth_vec, z_stride, offsets[2],
910
bld->static_texture_state->pot_depth,
911
bld->static_sampler_state->wrap_r,
912
&z_offset0, &z_offset1,
913
&z_subcoord[0], &z_subcoord[1]);
914
for (y = 0; y < 2; y++) {
915
for (x = 0; x < 2; x++) {
916
offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
917
offset[0][y][x], z_offset0);
918
offset[1][y][x] = lp_build_add(&bld->int_coord_bld,
919
offset[1][y][x], z_offset1);
924
lp_build_sample_fetch_image_linear(bld, data_ptr, offset,
925
x_subcoord, y_subcoord,
926
s_fpart, t_fpart, r_fpart,
932
* Sample the texture/mipmap using given image filter and mip filter.
933
* data0_ptr and data1_ptr point to the two mipmap levels to sample
934
* from. width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
935
* If we're using nearest miplevel sampling the '1' values will be null/unused.
938
lp_build_sample_mipmap(struct lp_build_sample_context *bld,
944
const LLVMValueRef *offsets,
945
LLVMValueRef ilevel0,
946
LLVMValueRef ilevel1,
947
LLVMValueRef lod_fpart,
948
LLVMValueRef colors_var)
950
LLVMBuilderRef builder = bld->gallivm->builder;
953
LLVMValueRef row_stride0_vec = NULL;
954
LLVMValueRef row_stride1_vec = NULL;
955
LLVMValueRef img_stride0_vec = NULL;
956
LLVMValueRef img_stride1_vec = NULL;
957
LLVMValueRef data_ptr0;
958
LLVMValueRef data_ptr1;
959
LLVMValueRef mipoff0 = NULL;
960
LLVMValueRef mipoff1 = NULL;
961
LLVMValueRef colors0;
962
LLVMValueRef colors1;
964
/* sample the first mipmap level */
965
lp_build_mipmap_level_sizes(bld, ilevel0,
967
&row_stride0_vec, &img_stride0_vec);
968
if (bld->num_mips == 1) {
969
data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
972
/* This path should work for num_lods 1 too but slightly less efficient */
973
data_ptr0 = bld->base_ptr;
974
mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
977
if (img_filter == PIPE_TEX_FILTER_NEAREST) {
978
lp_build_sample_image_nearest(bld,
980
row_stride0_vec, img_stride0_vec,
981
data_ptr0, mipoff0, s, t, r, offsets,
985
assert(img_filter == PIPE_TEX_FILTER_LINEAR);
986
lp_build_sample_image_linear(bld,
988
row_stride0_vec, img_stride0_vec,
989
data_ptr0, mipoff0, s, t, r, offsets,
993
/* Store the first level's colors in the output variables */
994
LLVMBuildStore(builder, colors0, colors_var);
996
if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
997
LLVMValueRef h16vec_scale = lp_build_const_vec(bld->gallivm,
998
bld->lodf_bld.type, 256.0);
999
LLVMTypeRef i32vec_type = bld->lodi_bld.vec_type;
1000
struct lp_build_if_state if_ctx;
1001
LLVMValueRef need_lerp;
1002
unsigned num_quads = bld->coord_bld.type.length / 4;
1005
lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16vec_scale, "");
1006
lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32vec_type, "lod_fpart.fixed16");
1008
/* need_lerp = lod_fpart > 0 */
1009
if (bld->num_lods == 1) {
1010
need_lerp = LLVMBuildICmp(builder, LLVMIntSGT,
1011
lod_fpart, bld->lodi_bld.zero,
1016
* We'll do mip filtering if any of the quads need it.
1017
* It might be better to split the vectors here and only fetch/filter
1018
* quads which need it.
1021
* We need to clamp lod_fpart here since we can get negative
1022
* values which would screw up filtering if not all
1023
* lod_fpart values have same sign.
1024
* We can however then skip the greater than comparison.
1026
lod_fpart = lp_build_max(&bld->lodi_bld, lod_fpart,
1027
bld->lodi_bld.zero);
1028
need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, lod_fpart);
1031
lp_build_if(&if_ctx, bld->gallivm, need_lerp);
1033
struct lp_build_context u8n_bld;
1035
lp_build_context_init(&u8n_bld, bld->gallivm, lp_type_unorm(8, bld->vector_width));
1037
/* sample the second mipmap level */
1038
lp_build_mipmap_level_sizes(bld, ilevel1,
1040
&row_stride1_vec, &img_stride1_vec);
1041
if (bld->num_mips == 1) {
1042
data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
1045
data_ptr1 = bld->base_ptr;
1046
mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
1049
if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1050
lp_build_sample_image_nearest(bld,
1052
row_stride1_vec, img_stride1_vec,
1053
data_ptr1, mipoff1, s, t, r, offsets,
1057
lp_build_sample_image_linear(bld,
1059
row_stride1_vec, img_stride1_vec,
1060
data_ptr1, mipoff1, s, t, r, offsets,
1064
/* interpolate samples from the two mipmap levels */
1066
if (num_quads == 1 && bld->num_lods == 1) {
1067
lod_fpart = LLVMBuildTrunc(builder, lod_fpart, u8n_bld.elem_type, "");
1068
lod_fpart = lp_build_broadcast_scalar(&u8n_bld, lod_fpart);
1071
unsigned num_chans_per_lod = 4 * bld->coord_type.length / bld->num_lods;
1072
LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type, bld->lodi_bld.type.length);
1073
LLVMValueRef shuffle[LP_MAX_VECTOR_LENGTH];
1075
/* Take the LSB of lod_fpart */
1076
lod_fpart = LLVMBuildTrunc(builder, lod_fpart, tmp_vec_type, "");
1078
/* Broadcast each lod weight into their respective channels */
1079
for (i = 0; i < u8n_bld.type.length; ++i) {
1080
shuffle[i] = lp_build_const_int32(bld->gallivm, i / num_chans_per_lod);
1082
lod_fpart = LLVMBuildShuffleVector(builder, lod_fpart, LLVMGetUndef(tmp_vec_type),
1083
LLVMConstVector(shuffle, u8n_bld.type.length), "");
1086
lp_build_reduce_filter(&u8n_bld,
1087
bld->static_sampler_state->reduction_mode,
1088
LP_BLD_LERP_PRESCALED_WEIGHTS,
1095
LLVMBuildStore(builder, colors0, colors_var);
1097
lp_build_endif(&if_ctx);
1104
* Texture sampling in AoS format. Used when sampling common 32-bit/texel
1105
* formats. 1D/2D/3D/cube texture supported. All mipmap sampling modes
1106
* but only limited texture coord wrap modes.
1109
lp_build_sample_aos(struct lp_build_sample_context *bld,
1110
unsigned sampler_unit,
1114
const LLVMValueRef *offsets,
1115
LLVMValueRef lod_positive,
1116
LLVMValueRef lod_fpart,
1117
LLVMValueRef ilevel0,
1118
LLVMValueRef ilevel1,
1119
LLVMValueRef texel_out[4])
1121
LLVMBuilderRef builder = bld->gallivm->builder;
1122
const unsigned mip_filter = bld->static_sampler_state->min_mip_filter;
1123
const unsigned min_filter = bld->static_sampler_state->min_img_filter;
1124
const unsigned mag_filter = bld->static_sampler_state->mag_img_filter;
1125
const unsigned dims = bld->dims;
1126
LLVMValueRef packed_var, packed;
1127
LLVMValueRef unswizzled[4];
1128
struct lp_build_context u8n_bld;
1130
/* we only support the common/simple wrap modes at this time */
1131
assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_s));
1133
assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_t));
1135
assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_r));
1138
/* make 8-bit unorm builder context */
1139
lp_build_context_init(&u8n_bld, bld->gallivm, lp_type_unorm(8, bld->vector_width));
1142
* Get/interpolate texture colors.
1145
packed_var = lp_build_alloca(bld->gallivm, u8n_bld.vec_type, "packed_var");
1147
if (min_filter == mag_filter) {
1148
/* no need to distinguish between minification and magnification */
1149
lp_build_sample_mipmap(bld,
1150
min_filter, mip_filter,
1152
ilevel0, ilevel1, lod_fpart,
1156
/* Emit conditional to choose min image filter or mag image filter
1157
* depending on the lod being > 0 or <= 0, respectively.
1159
struct lp_build_if_state if_ctx;
1162
* FIXME this should take all lods into account, if some are min
1163
* some max probably could hack up the weights in the linear
1164
* path with selects to work for nearest.
1166
if (bld->num_lods > 1)
1167
lod_positive = LLVMBuildExtractElement(builder, lod_positive,
1168
lp_build_const_int32(bld->gallivm, 0), "");
1170
lod_positive = LLVMBuildTrunc(builder, lod_positive,
1171
LLVMInt1TypeInContext(bld->gallivm->context), "");
1173
lp_build_if(&if_ctx, bld->gallivm, lod_positive);
1175
/* Use the minification filter */
1176
lp_build_sample_mipmap(bld,
1177
min_filter, mip_filter,
1179
ilevel0, ilevel1, lod_fpart,
1182
lp_build_else(&if_ctx);
1184
/* Use the magnification filter */
1185
lp_build_sample_mipmap(bld,
1186
mag_filter, PIPE_TEX_MIPFILTER_NONE,
1188
ilevel0, NULL, NULL,
1191
lp_build_endif(&if_ctx);
1194
packed = LLVMBuildLoad(builder, packed_var, "");
1197
* Convert to SoA and swizzle.
1199
lp_build_rgba8_to_fi32_soa(bld->gallivm,
1201
packed, unswizzled);
1203
if (util_format_is_rgba8_variant(bld->format_desc)) {
1204
lp_build_format_swizzle_soa(bld->format_desc,
1206
unswizzled, texel_out);
1209
texel_out[0] = unswizzled[0];
1210
texel_out[1] = unswizzled[1];
1211
texel_out[2] = unswizzled[2];
1212
texel_out[3] = unswizzled[3];