165
165
if(dst_type.sign && src_type.sign) {
166
166
/* Replicate the sign bit in the most significant bits */
167
msb = LLVMBuildAShr(builder, src, lp_build_int_const_scalar(src_type, src_type.width - 1), "");
167
msb = LLVMBuildAShr(builder, src, lp_build_const_int_vec(src_type, src_type.width - 1), "");
170
170
/* Most significant bits always zero */
171
171
msb = lp_build_zero(src_type);
173
173
/* Interleave bits */
174
if(util_cpu_caps.little_endian) {
174
#ifdef PIPE_ARCH_LITTLE_ENDIAN
175
175
*dst_lo = lp_build_interleave2(builder, src_type, src, msb, 0);
176
176
*dst_hi = lp_build_interleave2(builder, src_type, src, msb, 1);
179
178
*dst_lo = lp_build_interleave2(builder, src_type, msb, src, 0);
180
179
*dst_hi = lp_build_interleave2(builder, src_type, msb, src, 1);
183
182
/* Cast the result into the new type (twice as wide) */
258
#if HAVE_LLVM < 0x0207
259
259
LLVMTypeRef src_vec_type = lp_build_vec_type(src_type);
260
261
LLVMTypeRef dst_vec_type = lp_build_vec_type(dst_type);
261
262
LLVMValueRef shuffle;
264
dst_vec_type = lp_build_vec_type(dst_type);
263
LLVMValueRef res = NULL;
266
265
assert(!src_type.floating);
267
266
assert(!dst_type.floating);
268
267
assert(src_type.width == dst_type.width * 2);
269
268
assert(src_type.length * 2 == dst_type.length);
270
/* Check for special cases first */
271
271
if(util_cpu_caps.has_sse2 && src_type.width * src_type.length == 128) {
272
272
switch(src_type.width) {
274
274
if(dst_type.sign) {
275
#if HAVE_LLVM >= 0x0207
276
res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", dst_vec_type, lo, hi);
275
278
res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", src_vec_type, lo, hi);
278
282
if (util_cpu_caps.has_sse4_1) {
279
/* PACKUSDW is the only instrinsic with a consistent signature */
280
283
return lp_build_intrinsic_binary(builder, "llvm.x86.sse41.packusdw", dst_vec_type, lo, hi);
284
return LLVMGetUndef(dst_vec_type);
286
/* use generic shuffle below */
290
293
if(dst_type.sign)
294
#if HAVE_LLVM >= 0x0207
295
res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packsswb.128", dst_vec_type, lo, hi);
291
297
res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packsswb.128", src_vec_type, lo, hi);
300
#if HAVE_LLVM >= 0x0207
301
res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packuswb.128", dst_vec_type, lo, hi);
293
303
res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packuswb.128", src_vec_type, lo, hi);
349
363
struct lp_build_context bld;
350
364
unsigned dst_bits = dst_type.sign ? dst_type.width - 1 : dst_type.width;
351
LLVMValueRef dst_max = lp_build_int_const_scalar(src_type, ((unsigned long long)1 << dst_bits) - 1);
365
LLVMValueRef dst_max = lp_build_const_int_vec(src_type, ((unsigned long long)1 << dst_bits) - 1);
352
366
lp_build_context_init(&bld, builder, src_type);
353
367
lo = lp_build_min(&bld, lo, dst_max);
354
368
hi = lp_build_min(&bld, hi, dst_max);
436
* Truncate or expand the bitwidth.
438
* NOTE: Getting the right sign flags is crucial here, as we employ some
439
* intrinsics that do saturation.
442
lp_build_resize(LLVMBuilderRef builder,
443
struct lp_type src_type,
444
struct lp_type dst_type,
445
const LLVMValueRef *src, unsigned num_srcs,
446
LLVMValueRef *dst, unsigned num_dsts)
448
LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
452
* We don't support float <-> int conversion here. That must be done
453
* before/after calling this function.
455
assert(src_type.floating == dst_type.floating);
458
* We don't support double <-> float conversion yet, although it could be
459
* added with little effort.
461
assert((!src_type.floating && !dst_type.floating) ||
462
src_type.width == dst_type.width);
464
/* We must not loose or gain channels. Only precision */
465
assert(src_type.length * num_srcs == dst_type.length * num_dsts);
467
/* We don't support M:N conversion, only 1:N, M:1, or 1:1 */
468
assert(num_srcs == 1 || num_dsts == 1);
470
assert(src_type.length <= LP_MAX_VECTOR_LENGTH);
471
assert(dst_type.length <= LP_MAX_VECTOR_LENGTH);
472
assert(num_srcs <= LP_MAX_VECTOR_LENGTH);
473
assert(num_dsts <= LP_MAX_VECTOR_LENGTH);
475
if (src_type.width > dst_type.width) {
477
* Truncate bit width.
480
assert(num_dsts == 1);
482
if (src_type.width * src_type.length == dst_type.width * dst_type.length) {
484
* Register width remains constant -- use vector packing intrinsics
487
tmp[0] = lp_build_pack(builder, src_type, dst_type, TRUE, src, num_srcs);
491
* Do it element-wise.
494
assert(src_type.length == dst_type.length);
495
tmp[0] = lp_build_undef(dst_type);
496
for (i = 0; i < dst_type.length; ++i) {
497
LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
498
LLVMValueRef val = LLVMBuildExtractElement(builder, src[0], index, "");
499
val = LLVMBuildTrunc(builder, val, lp_build_elem_type(dst_type), "");
500
tmp[0] = LLVMBuildInsertElement(builder, tmp[0], val, index, "");
504
else if (src_type.width < dst_type.width) {
509
assert(num_srcs == 1);
511
if (src_type.width * src_type.length == dst_type.width * dst_type.length) {
513
* Register width remains constant -- use vector unpack intrinsics
515
lp_build_unpack(builder, src_type, dst_type, src[0], tmp, num_dsts);
519
* Do it element-wise.
522
assert(src_type.length == dst_type.length);
523
tmp[0] = lp_build_undef(dst_type);
524
for (i = 0; i < dst_type.length; ++i) {
525
LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
526
LLVMValueRef val = LLVMBuildExtractElement(builder, src[0], index, "");
528
if (src_type.sign && dst_type.sign) {
529
val = LLVMBuildSExt(builder, val, lp_build_elem_type(dst_type), "");
531
val = LLVMBuildZExt(builder, val, lp_build_elem_type(dst_type), "");
533
tmp[0] = LLVMBuildInsertElement(builder, tmp[0], val, index, "");
542
assert(num_srcs == 1);
543
assert(num_dsts == 1);
548
for(i = 0; i < num_dsts; ++i)