140
178
mask ## _mask = vec_lvsl (0, mask); \
141
179
source ## _mask = vec_lvsl (0, source);
143
/* notice you have to declare temp vars...
144
* Note: tmp3 and tmp4 must remain untouched!
181
#define LOAD_VECTOR(source) \
184
vector unsigned char tmp1, tmp2; \
185
tmp1 = (typeof(tmp1))vec_ld (0, source); \
186
tmp2 = (typeof(tmp2))vec_ld (15, source); \
187
v ## source = (typeof(v ## source)) \
188
vec_perm (tmp1, tmp2, source ## _mask); \
147
191
#define LOAD_VECTORS(dest, source) \
148
tmp1 = (typeof(tmp1))vec_ld (0, source); \
149
tmp2 = (typeof(tmp2))vec_ld (15, source); \
150
v ## source = (typeof(v ## source)) \
151
vec_perm (tmp1, tmp2, source ## _mask); \
152
v ## dest = (typeof(v ## dest))vec_ld (0, dest);
194
LOAD_VECTOR(source); \
195
v ## dest = (typeof(v ## dest))vec_ld (0, dest); \
154
198
#define LOAD_VECTORSC(dest, source, mask) \
155
tmp1 = (typeof(tmp1))vec_ld (0, source); \
156
tmp2 = (typeof(tmp2))vec_ld (15, source); \
157
v ## source = (typeof(v ## source)) \
158
vec_perm (tmp1, tmp2, source ## _mask); \
159
tmp1 = (typeof(tmp1))vec_ld (0, mask); \
160
v ## dest = (typeof(v ## dest))vec_ld (0, dest); \
161
tmp2 = (typeof(tmp2))vec_ld (15, mask); \
162
v ## mask = (typeof(v ## mask)) \
163
vec_perm (tmp1, tmp2, mask ## _mask);
201
LOAD_VECTORS(dest, source); \
205
#define DECLARE_SRC_MASK_VAR vector unsigned char src_mask
206
#define DECLARE_MASK_MASK_VAR vector unsigned char mask_mask
210
/* Now the COMPUTE_SHIFT_{MASK, MASKS, MASKC} below are just no-op.
211
* They are defined that way because little endian altivec can do unaligned
212
* reads natively and have no need for constructing the permutation pattern
215
#define COMPUTE_SHIFT_MASK(source)
217
#define COMPUTE_SHIFT_MASKS(dest, source)
219
#define COMPUTE_SHIFT_MASKC(dest, source, mask)
221
# define LOAD_VECTOR(source) \
222
v ## source = *((typeof(v ## source)*)source);
224
# define LOAD_VECTORS(dest, source) \
225
LOAD_VECTOR(source); \
228
# define LOAD_VECTORSC(dest, source, mask) \
229
LOAD_VECTORS(dest, source); \
232
#define DECLARE_SRC_MASK_VAR
233
#define DECLARE_MASK_MASK_VAR
235
#endif /* WORDS_BIGENDIAN */
165
237
#define LOAD_VECTORSM(dest, source, mask) \
166
LOAD_VECTORSC (dest, source, mask) \
238
LOAD_VECTORSC (dest, source, mask); \
167
239
v ## source = pix_multiply (v ## source, \
168
240
splat_alpha (v ## mask));
170
242
#define STORE_VECTOR(dest) \
171
243
vec_st ((vector unsigned int) v ## dest, 0, dest);
245
/* load 4 pixels from a 16-byte boundary aligned address */
246
static force_inline vector unsigned int
247
load_128_aligned (const uint32_t* src)
249
return *((vector unsigned int *) src);
252
/* load 4 pixels from a unaligned address */
253
static force_inline vector unsigned int
254
load_128_unaligned (const uint32_t* src)
256
vector unsigned int vsrc;
257
DECLARE_SRC_MASK_VAR;
259
COMPUTE_SHIFT_MASK (src);
265
/* save 4 pixels on a 16-byte boundary aligned address */
266
static force_inline void
267
save_128_aligned (uint32_t* data,
268
vector unsigned int vdata)
273
static force_inline vector unsigned int
274
create_mask_16_128 (uint16_t mask)
277
vector unsigned short vsrc;
278
DECLARE_SRC_MASK_VAR;
282
COMPUTE_SHIFT_MASK (src);
284
return (vector unsigned int) vec_splat(vsrc, 0);
287
static force_inline vector unsigned int
288
create_mask_1x32_128 (const uint32_t *src)
290
vector unsigned int vsrc;
291
DECLARE_SRC_MASK_VAR;
293
COMPUTE_SHIFT_MASK (src);
295
return vec_splat(vsrc, 0);
298
static force_inline vector unsigned int
299
create_mask_32_128 (uint32_t mask)
301
return create_mask_1x32_128(&mask);
304
static force_inline vector unsigned int
305
unpack_32_1x128 (uint32_t data)
307
vector unsigned int vdata = {0, 0, 0, data};
308
vector unsigned short lo;
310
lo = (vector unsigned short)
311
#ifdef WORDS_BIGENDIAN
312
vec_mergel ((vector unsigned char) AVV(0),
313
(vector unsigned char) vdata);
315
vec_mergel ((vector unsigned char) vdata,
316
(vector unsigned char) AVV(0));
319
return (vector unsigned int) lo;
322
static force_inline vector unsigned int
323
unpacklo_128_16x8 (vector unsigned int data1, vector unsigned int data2)
325
vector unsigned char lo;
327
/* unpack to short */
328
lo = (vector unsigned char)
329
#ifdef WORDS_BIGENDIAN
330
vec_mergel ((vector unsigned char) data2,
331
(vector unsigned char) data1);
333
vec_mergel ((vector unsigned char) data1,
334
(vector unsigned char) data2);
337
return (vector unsigned int) lo;
340
static force_inline vector unsigned int
341
unpackhi_128_16x8 (vector unsigned int data1, vector unsigned int data2)
343
vector unsigned char hi;
345
/* unpack to short */
346
hi = (vector unsigned char)
347
#ifdef WORDS_BIGENDIAN
348
vec_mergeh ((vector unsigned char) data2,
349
(vector unsigned char) data1);
351
vec_mergeh ((vector unsigned char) data1,
352
(vector unsigned char) data2);
355
return (vector unsigned int) hi;
358
static force_inline vector unsigned int
359
unpacklo_128_8x16 (vector unsigned int data1, vector unsigned int data2)
361
vector unsigned short lo;
364
lo = (vector unsigned short)
365
#ifdef WORDS_BIGENDIAN
366
vec_mergel ((vector unsigned short) data2,
367
(vector unsigned short) data1);
369
vec_mergel ((vector unsigned short) data1,
370
(vector unsigned short) data2);
373
return (vector unsigned int) lo;
376
static force_inline vector unsigned int
377
unpackhi_128_8x16 (vector unsigned int data1, vector unsigned int data2)
379
vector unsigned short hi;
382
hi = (vector unsigned short)
383
#ifdef WORDS_BIGENDIAN
384
vec_mergeh ((vector unsigned short) data2,
385
(vector unsigned short) data1);
387
vec_mergeh ((vector unsigned short) data1,
388
(vector unsigned short) data2);
391
return (vector unsigned int) hi;
394
static force_inline void
395
unpack_128_2x128 (vector unsigned int data1, vector unsigned int data2,
396
vector unsigned int* data_lo, vector unsigned int* data_hi)
398
*data_lo = unpacklo_128_16x8(data1, data2);
399
*data_hi = unpackhi_128_16x8(data1, data2);
402
static force_inline void
403
unpack_128_2x128_16 (vector unsigned int data1, vector unsigned int data2,
404
vector unsigned int* data_lo, vector unsigned int* data_hi)
406
*data_lo = unpacklo_128_8x16(data1, data2);
407
*data_hi = unpackhi_128_8x16(data1, data2);
410
static force_inline vector unsigned int
411
unpack_565_to_8888 (vector unsigned int lo)
413
vector unsigned int r, g, b, rb, t;
415
r = vec_and (vec_sl(lo, create_mask_32_128(8)), mask_red);
416
g = vec_and (vec_sl(lo, create_mask_32_128(5)), mask_green);
417
b = vec_and (vec_sl(lo, create_mask_32_128(3)), mask_blue);
420
t = vec_and (rb, mask_565_fix_rb);
421
t = vec_sr (t, create_mask_32_128(5));
424
t = vec_and (g, mask_565_fix_g);
425
t = vec_sr (t, create_mask_32_128(6));
428
return vec_or (rb, g);
431
static force_inline uint32_t
432
pack_1x128_32 (vector unsigned int data)
434
vector unsigned char vpack;
436
vpack = vec_packsu((vector unsigned short) data,
437
(vector unsigned short) AVV(0));
439
return vec_extract((vector unsigned int) vpack, 1);
442
static force_inline vector unsigned int
443
pack_2x128_128 (vector unsigned int lo, vector unsigned int hi)
445
vector unsigned char vpack;
447
vpack = vec_packsu((vector unsigned short) hi,
448
(vector unsigned short) lo);
450
return (vector unsigned int) vpack;
453
static force_inline void
454
negate_2x128 (vector unsigned int data_lo,
455
vector unsigned int data_hi,
456
vector unsigned int* neg_lo,
457
vector unsigned int* neg_hi)
459
*neg_lo = vec_xor (data_lo, mask_00ff);
460
*neg_hi = vec_xor (data_hi, mask_00ff);
463
static force_inline int
464
is_opaque (vector unsigned int x)
467
vector bool int ffs = vec_cmpeq(x, x);
469
cmp_result = vec_all_eq(x, ffs);
471
return (cmp_result & 0x8888) == 0x8888;
474
static force_inline int
475
is_zero (vector unsigned int x)
479
cmp_result = vec_all_eq(x, (vector unsigned int) AVV(0));
481
return cmp_result == 0xffff;
484
static force_inline int
485
is_transparent (vector unsigned int x)
489
cmp_result = vec_all_eq(x, (vector unsigned int) AVV(0));
490
return (cmp_result & 0x8888) == 0x8888;
493
static force_inline vector unsigned int
494
expand_pixel_8_1x128 (uint8_t data)
496
vector unsigned int vdata;
498
vdata = unpack_32_1x128 ((uint32_t) data);
500
#ifdef WORDS_BIGENDIAN
501
return vec_perm (vdata, vdata,
502
(vector unsigned char)AVV (
503
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
504
0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F));
506
return vec_perm (vdata, vdata,
507
(vector unsigned char)AVV (
508
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
509
0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09));
513
static force_inline vector unsigned int
514
expand_alpha_1x128 (vector unsigned int data)
516
#ifdef WORDS_BIGENDIAN
517
return vec_perm (data, data,
518
(vector unsigned char)AVV (
519
0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01,
520
0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09));
522
return vec_perm (data, data,
523
(vector unsigned char)AVV (
524
0x06, 0x07, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07,
525
0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F));
529
static force_inline void
530
expand_alpha_2x128 (vector unsigned int data_lo,
531
vector unsigned int data_hi,
532
vector unsigned int* alpha_lo,
533
vector unsigned int* alpha_hi)
536
*alpha_lo = expand_alpha_1x128(data_lo);
537
*alpha_hi = expand_alpha_1x128(data_hi);
540
static force_inline void
541
expand_alpha_rev_2x128 (vector unsigned int data_lo,
542
vector unsigned int data_hi,
543
vector unsigned int* alpha_lo,
544
vector unsigned int* alpha_hi)
546
#ifdef WORDS_BIGENDIAN
547
*alpha_lo = vec_perm (data_lo, data_lo,
548
(vector unsigned char)AVV (
549
0x06, 0x07, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07,
550
0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F));
552
*alpha_hi = vec_perm (data_hi, data_hi,
553
(vector unsigned char)AVV (
554
0x06, 0x07, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07,
555
0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F));
557
*alpha_lo = vec_perm (data_lo, data_lo,
558
(vector unsigned char)AVV (
559
0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01,
560
0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09));
562
*alpha_hi = vec_perm (data_hi, data_hi,
563
(vector unsigned char)AVV (
564
0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01,
565
0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09));
569
static force_inline void
570
pix_multiply_2x128 (vector unsigned int* data_lo,
571
vector unsigned int* data_hi,
572
vector unsigned int* alpha_lo,
573
vector unsigned int* alpha_hi,
574
vector unsigned int* ret_lo,
575
vector unsigned int* ret_hi)
577
*ret_lo = pix_multiply(*data_lo, *alpha_lo);
578
*ret_hi = pix_multiply(*data_hi, *alpha_hi);
581
static force_inline void
582
over_2x128 (vector unsigned int* src_lo,
583
vector unsigned int* src_hi,
584
vector unsigned int* alpha_lo,
585
vector unsigned int* alpha_hi,
586
vector unsigned int* dst_lo,
587
vector unsigned int* dst_hi)
589
vector unsigned int t1, t2;
591
negate_2x128 (*alpha_lo, *alpha_hi, &t1, &t2);
593
pix_multiply_2x128 (dst_lo, dst_hi, &t1, &t2, dst_lo, dst_hi);
595
*dst_lo = (vector unsigned int)
596
vec_adds ((vector unsigned char) *src_lo,
597
(vector unsigned char) *dst_lo);
599
*dst_hi = (vector unsigned int)
600
vec_adds ((vector unsigned char) *src_hi,
601
(vector unsigned char) *dst_hi);
604
static force_inline void
605
in_over_2x128 (vector unsigned int* src_lo,
606
vector unsigned int* src_hi,
607
vector unsigned int* alpha_lo,
608
vector unsigned int* alpha_hi,
609
vector unsigned int* mask_lo,
610
vector unsigned int* mask_hi,
611
vector unsigned int* dst_lo,
612
vector unsigned int* dst_hi)
614
vector unsigned int s_lo, s_hi;
615
vector unsigned int a_lo, a_hi;
617
pix_multiply_2x128 (src_lo, src_hi, mask_lo, mask_hi, &s_lo, &s_hi);
618
pix_multiply_2x128 (alpha_lo, alpha_hi, mask_lo, mask_hi, &a_lo, &a_hi);
620
over_2x128 (&s_lo, &s_hi, &a_lo, &a_hi, dst_lo, dst_hi);
623
static force_inline uint32_t
624
core_combine_over_u_pixel_vmx (uint32_t src, uint32_t dst)
627
vector unsigned int vmxs;
637
vmxs = unpack_32_1x128 (src);
638
return pack_1x128_32(
639
over(vmxs, expand_alpha_1x128 (vmxs), unpack_32_1x128 (dst)));
645
static force_inline uint32_t
646
combine1 (const uint32_t *ps, const uint32_t *pm)
652
vector unsigned int ms, mm;
654
mm = unpack_32_1x128 (*pm);
655
mm = expand_alpha_1x128 (mm);
657
ms = unpack_32_1x128 (s);
658
ms = pix_multiply (ms, mm);
660
s = pack_1x128_32 (ms);
666
static force_inline vector unsigned int
667
combine4 (const uint32_t* ps, const uint32_t* pm)
669
vector unsigned int vmx_src_lo, vmx_src_hi;
670
vector unsigned int vmx_msk_lo, vmx_msk_hi;
671
vector unsigned int s;
675
vmx_msk_lo = load_128_unaligned(pm);
677
if (is_transparent(vmx_msk_lo))
678
return (vector unsigned int) AVV(0);
681
s = load_128_unaligned(ps);
685
unpack_128_2x128(s, (vector unsigned int) AVV(0),
686
&vmx_src_lo, &vmx_src_hi);
688
unpack_128_2x128(vmx_msk_lo, (vector unsigned int) AVV(0),
689
&vmx_msk_lo, &vmx_msk_hi);
691
expand_alpha_2x128(vmx_msk_lo, vmx_msk_hi, &vmx_msk_lo, &vmx_msk_hi);
693
pix_multiply_2x128(&vmx_src_lo, &vmx_src_hi,
694
&vmx_msk_lo, &vmx_msk_hi,
695
&vmx_src_lo, &vmx_src_hi);
697
s = pack_2x128_128(vmx_src_lo, vmx_src_hi);
174
704
vmx_combine_over_u_no_mask (uint32_t * dest,
175
705
const uint32_t *src,
2540
static pixman_bool_t
2541
vmx_fill (pixman_implementation_t *imp,
2551
uint32_t byte_width;
2554
vector unsigned int vfiller;
2561
stride = stride * (int) sizeof (uint32_t) / 1;
2562
byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x);
2568
filler = (w << 16) | w;
2572
stride = stride * (int) sizeof (uint32_t) / 2;
2573
byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);
2574
byte_width = 2 * width;
2577
filler = (filler & 0xffff) * 0x00010001;
2581
stride = stride * (int) sizeof (uint32_t) / 4;
2582
byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x);
2583
byte_width = 4 * width;
2591
vfiller = create_mask_1x32_128(&filler);
2596
uint8_t *d = byte_line;
2597
byte_line += stride;
2600
if (w >= 1 && ((uintptr_t)d & 1))
2602
*(uint8_t *)d = filler;
2607
while (w >= 2 && ((uintptr_t)d & 3))
2609
*(uint16_t *)d = filler;
2614
while (w >= 4 && ((uintptr_t)d & 15))
2616
*(uint32_t *)d = filler;
2624
vec_st(vfiller, 0, (uint32_t *) d);
2625
vec_st(vfiller, 0, (uint32_t *) d + 4);
2626
vec_st(vfiller, 0, (uint32_t *) d + 8);
2627
vec_st(vfiller, 0, (uint32_t *) d + 12);
2628
vec_st(vfiller, 0, (uint32_t *) d + 16);
2629
vec_st(vfiller, 0, (uint32_t *) d + 20);
2630
vec_st(vfiller, 0, (uint32_t *) d + 24);
2631
vec_st(vfiller, 0, (uint32_t *) d + 28);
2639
vec_st(vfiller, 0, (uint32_t *) d);
2640
vec_st(vfiller, 0, (uint32_t *) d + 4);
2641
vec_st(vfiller, 0, (uint32_t *) d + 8);
2642
vec_st(vfiller, 0, (uint32_t *) d + 12);
2650
vec_st(vfiller, 0, (uint32_t *) d);
2651
vec_st(vfiller, 0, (uint32_t *) d + 4);
2659
vec_st(vfiller, 0, (uint32_t *) d);
2667
*(uint32_t *)d = filler;
2675
*(uint16_t *)d = filler;
2682
*(uint8_t *)d = filler;
2692
vmx_composite_src_x888_8888 (pixman_implementation_t *imp,
2693
pixman_composite_info_t *info)
2695
PIXMAN_COMPOSITE_ARGS (info);
2696
uint32_t *dst_line, *dst;
2697
uint32_t *src_line, *src;
2699
int dst_stride, src_stride;
2701
PIXMAN_IMAGE_GET_LINE (
2702
dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
2703
PIXMAN_IMAGE_GET_LINE (
2704
src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
2709
dst_line += dst_stride;
2711
src_line += src_stride;
2714
while (w && (uintptr_t)dst & 15)
2716
*dst++ = *src++ | 0xff000000;
2722
vector unsigned int vmx_src1, vmx_src2, vmx_src3, vmx_src4;
2724
vmx_src1 = load_128_unaligned (src);
2725
vmx_src2 = load_128_unaligned (src + 4);
2726
vmx_src3 = load_128_unaligned (src + 8);
2727
vmx_src4 = load_128_unaligned (src + 12);
2729
save_128_aligned (dst, vec_or (vmx_src1, mask_ff000000));
2730
save_128_aligned (dst + 4, vec_or (vmx_src2, mask_ff000000));
2731
save_128_aligned (dst + 8, vec_or (vmx_src3, mask_ff000000));
2732
save_128_aligned (dst + 12, vec_or (vmx_src4, mask_ff000000));
2741
*dst++ = *src++ | 0xff000000;
2748
vmx_composite_over_8888_8888 (pixman_implementation_t *imp,
2749
pixman_composite_info_t *info)
2751
PIXMAN_COMPOSITE_ARGS (info);
2752
int dst_stride, src_stride;
2753
uint32_t *dst_line, *dst;
2754
uint32_t *src_line, *src;
2756
PIXMAN_IMAGE_GET_LINE (
2757
dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
2758
PIXMAN_IMAGE_GET_LINE (
2759
src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
2766
vmx_combine_over_u (imp, op, dst, src, NULL, width);
2774
vmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
2775
pixman_composite_info_t *info)
2777
PIXMAN_COMPOSITE_ARGS (info);
2779
uint32_t *dst_line, d;
2780
uint32_t *mask_line, m;
2782
int dst_stride, mask_stride;
2784
vector unsigned int vsrc, valpha, vmask, vdest;
2786
vector unsigned int vmx_dst, vmx_dst_lo, vmx_dst_hi;
2787
vector unsigned int vmx_mask, vmx_mask_lo, vmx_mask_hi;
2789
src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
2794
PIXMAN_IMAGE_GET_LINE (
2795
dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
2796
PIXMAN_IMAGE_GET_LINE (
2797
mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
2799
vsrc = unpacklo_128_16x8(create_mask_1x32_128 (&src),
2800
(vector unsigned int) AVV(0));
2802
valpha = expand_alpha_1x128(vsrc);
2807
const uint32_t *pm = (uint32_t *)mask_line;
2808
uint32_t *pd = (uint32_t *)dst_line;
2810
dst_line += dst_stride;
2811
mask_line += mask_stride;
2813
while (w && (uintptr_t)pd & 15)
2820
vmask = unpack_32_1x128(m);
2821
vdest = unpack_32_1x128(d);
2823
*pd = pack_1x128_32(in_over (vsrc, valpha, vmask, vdest));
2832
/* pm is NOT necessarily 16-byte aligned */
2833
vmx_mask = load_128_unaligned (pm);
2835
pack_cmp = vec_all_eq(vmx_mask, (vector unsigned int) AVV(0));
2837
/* if all bits in mask are zero, pack_cmp is not 0 */
2840
/* pd is 16-byte aligned */
2841
vmx_dst = load_128_aligned (pd);
2843
unpack_128_2x128 (vmx_mask, (vector unsigned int) AVV(0),
2844
&vmx_mask_lo, &vmx_mask_hi);
2846
unpack_128_2x128 (vmx_dst, (vector unsigned int) AVV(0),
2847
&vmx_dst_lo, &vmx_dst_hi);
2849
in_over_2x128 (&vsrc, &vsrc,
2851
&vmx_mask_lo, &vmx_mask_hi,
2852
&vmx_dst_lo, &vmx_dst_hi);
2854
save_128_aligned(pd, pack_2x128_128(vmx_dst_lo, vmx_dst_hi));
2869
vmask = unpack_32_1x128(m);
2870
vdest = unpack_32_1x128(d);
2872
*pd = pack_1x128_32(in_over (vsrc, valpha, vmask, vdest));
2882
vmx_composite_add_8_8 (pixman_implementation_t *imp,
2883
pixman_composite_info_t *info)
2885
PIXMAN_COMPOSITE_ARGS (info);
2886
uint8_t *dst_line, *dst;
2887
uint8_t *src_line, *src;
2888
int dst_stride, src_stride;
2892
PIXMAN_IMAGE_GET_LINE (
2893
src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
2894
PIXMAN_IMAGE_GET_LINE (
2895
dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
2902
dst_line += dst_stride;
2903
src_line += src_stride;
2907
while (w && (uintptr_t)dst & 3)
2909
t = (*dst) + (*src++);
2910
*dst++ = t | (0 - (t >> 8));
2914
vmx_combine_add_u (imp, op,
2915
(uint32_t*)dst, (uint32_t*)src, NULL, w >> 2);
2925
t = (*dst) + (*src++);
2926
*dst++ = t | (0 - (t >> 8));
2933
vmx_composite_add_8888_8888 (pixman_implementation_t *imp,
2934
pixman_composite_info_t *info)
2936
PIXMAN_COMPOSITE_ARGS (info);
2937
uint32_t *dst_line, *dst;
2938
uint32_t *src_line, *src;
2939
int dst_stride, src_stride;
2941
PIXMAN_IMAGE_GET_LINE (
2942
src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
2943
PIXMAN_IMAGE_GET_LINE (
2944
dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
2949
dst_line += dst_stride;
2951
src_line += src_stride;
2953
vmx_combine_add_u (imp, op, dst, src, NULL, width);
2957
static force_inline void
2958
scaled_nearest_scanline_vmx_8888_8888_OVER (uint32_t* pd,
2962
pixman_fixed_t unit_x,
2963
pixman_fixed_t src_width_fixed,
2964
pixman_bool_t fully_transparent_src)
2967
const uint32_t* pm = NULL;
2969
vector unsigned int vmx_dst_lo, vmx_dst_hi;
2970
vector unsigned int vmx_src_lo, vmx_src_hi;
2971
vector unsigned int vmx_alpha_lo, vmx_alpha_hi;
2973
if (fully_transparent_src)
2976
/* Align dst on a 16-byte boundary */
2977
while (w && ((uintptr_t)pd & 15))
2980
s = combine1 (ps + pixman_fixed_to_int (vx), pm);
2983
vx -= src_width_fixed;
2985
*pd++ = core_combine_over_u_pixel_vmx (s, d);
2993
vector unsigned int tmp;
2994
uint32_t tmp1, tmp2, tmp3, tmp4;
2996
tmp1 = *(ps + pixman_fixed_to_int (vx));
2999
vx -= src_width_fixed;
3000
tmp2 = *(ps + pixman_fixed_to_int (vx));
3003
vx -= src_width_fixed;
3004
tmp3 = *(ps + pixman_fixed_to_int (vx));
3007
vx -= src_width_fixed;
3008
tmp4 = *(ps + pixman_fixed_to_int (vx));
3011
vx -= src_width_fixed;
3018
vmx_src_hi = combine4 ((const uint32_t *) &tmp, pm);
3020
if (is_opaque (vmx_src_hi))
3022
save_128_aligned (pd, vmx_src_hi);
3024
else if (!is_zero (vmx_src_hi))
3026
vmx_dst_hi = load_128_aligned (pd);
3028
unpack_128_2x128 (vmx_src_hi, (vector unsigned int) AVV(0),
3029
&vmx_src_lo, &vmx_src_hi);
3031
unpack_128_2x128 (vmx_dst_hi, (vector unsigned int) AVV(0),
3032
&vmx_dst_lo, &vmx_dst_hi);
3034
expand_alpha_2x128 (
3035
vmx_src_lo, vmx_src_hi, &vmx_alpha_lo, &vmx_alpha_hi);
3037
over_2x128 (&vmx_src_lo, &vmx_src_hi,
3038
&vmx_alpha_lo, &vmx_alpha_hi,
3039
&vmx_dst_lo, &vmx_dst_hi);
3041
/* rebuid the 4 pixel data and save*/
3042
save_128_aligned (pd, pack_2x128_128 (vmx_dst_lo, vmx_dst_hi));
3054
s = combine1 (ps + pixman_fixed_to_int (vx), pm);
3057
vx -= src_width_fixed;
3059
*pd++ = core_combine_over_u_pixel_vmx (s, d);
3067
FAST_NEAREST_MAINLOOP (vmx_8888_8888_cover_OVER,
3068
scaled_nearest_scanline_vmx_8888_8888_OVER,
3069
uint32_t, uint32_t, COVER)
3070
FAST_NEAREST_MAINLOOP (vmx_8888_8888_none_OVER,
3071
scaled_nearest_scanline_vmx_8888_8888_OVER,
3072
uint32_t, uint32_t, NONE)
3073
FAST_NEAREST_MAINLOOP (vmx_8888_8888_pad_OVER,
3074
scaled_nearest_scanline_vmx_8888_8888_OVER,
3075
uint32_t, uint32_t, PAD)
3076
FAST_NEAREST_MAINLOOP (vmx_8888_8888_normal_OVER,
3077
scaled_nearest_scanline_vmx_8888_8888_OVER,
3078
uint32_t, uint32_t, NORMAL)
1989
3080
static const pixman_fast_path_t vmx_fast_paths[] =
3082
PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, vmx_composite_over_8888_8888),
3083
PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, vmx_composite_over_8888_8888),
3084
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, vmx_composite_over_8888_8888),
3085
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, vmx_composite_over_8888_8888),
3086
PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, vmx_composite_over_n_8888_8888_ca),
3087
PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, vmx_composite_over_n_8888_8888_ca),
3088
PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, vmx_composite_over_n_8888_8888_ca),
3089
PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, vmx_composite_over_n_8888_8888_ca),
3092
PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, vmx_composite_add_8_8),
3093
PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, vmx_composite_add_8888_8888),
3094
PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, vmx_composite_add_8888_8888),
3097
PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, vmx_composite_src_x888_8888),
3098
PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, vmx_composite_src_x888_8888),
3100
SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, vmx_8888_8888),
3101
SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, vmx_8888_8888),
3102
SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, vmx_8888_8888),
3103
SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, vmx_8888_8888),
1991
3105
{ PIXMAN_OP_NONE },
3109
vmx_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask)
3111
int w = iter->width;
3112
vector unsigned int ff000000 = mask_ff000000;
3113
uint32_t *dst = iter->buffer;
3114
uint32_t *src = (uint32_t *)iter->bits;
3116
iter->bits += iter->stride;
3118
while (w && ((uintptr_t)dst) & 0x0f)
3120
*dst++ = (*src++) | 0xff000000;
3126
save_128_aligned(dst, vec_or(load_128_unaligned(src), ff000000));
3135
*dst++ = (*src++) | 0xff000000;
3139
return iter->buffer;
3143
vmx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask)
3145
int w = iter->width;
3146
uint32_t *dst = iter->buffer;
3147
uint8_t *src = iter->bits;
3148
vector unsigned int vmx0, vmx1, vmx2, vmx3, vmx4, vmx5, vmx6;
3150
iter->bits += iter->stride;
3152
while (w && (((uintptr_t)dst) & 15))
3154
*dst++ = *(src++) << 24;
3160
vmx0 = load_128_unaligned((uint32_t *) src);
3162
unpack_128_2x128((vector unsigned int) AVV(0), vmx0, &vmx1, &vmx2);
3163
unpack_128_2x128_16((vector unsigned int) AVV(0), vmx1, &vmx3, &vmx4);
3164
unpack_128_2x128_16((vector unsigned int) AVV(0), vmx2, &vmx5, &vmx6);
3166
save_128_aligned(dst, vmx6);
3167
save_128_aligned((dst + 4), vmx5);
3168
save_128_aligned((dst + 8), vmx4);
3169
save_128_aligned((dst + 12), vmx3);
3178
*dst++ = *(src++) << 24;
3182
return iter->buffer;
3185
#define IMAGE_FLAGS \
3186
(FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \
3187
FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
3189
static const pixman_iter_info_t vmx_iters[] =
3191
{ PIXMAN_x8r8g8b8, IMAGE_FLAGS, ITER_NARROW,
3192
_pixman_iter_init_bits_stride, vmx_fetch_x8r8g8b8, NULL
3194
{ PIXMAN_a8, IMAGE_FLAGS, ITER_NARROW,
3195
_pixman_iter_init_bits_stride, vmx_fetch_a8, NULL
1994
3200
pixman_implementation_t *
1995
3201
_pixman_implementation_create_vmx (pixman_implementation_t *fallback)
1997
3203
pixman_implementation_t *imp = _pixman_implementation_create (fallback, vmx_fast_paths);
3206
mask_00ff = create_mask_16_128 (0x00ff);
3207
mask_ff000000 = create_mask_32_128 (0xff000000);
3208
mask_red = create_mask_32_128 (0x00f80000);
3209
mask_green = create_mask_32_128 (0x0000fc00);
3210
mask_blue = create_mask_32_128 (0x000000f8);
3211
mask_565_fix_rb = create_mask_32_128 (0x00e000e0);
3212
mask_565_fix_g = create_mask_32_128 (0x0000c000);
1999
3214
/* Set up function pointers */
2001
3216
imp->combine_32[PIXMAN_OP_OVER] = vmx_combine_over_u;