145
109
return vec_nor (src, src);
147
112
/* dest*~srca + src */
148
113
static force_inline vector unsigned int
149
over (vector unsigned int src, vector unsigned int srca,
114
over (vector unsigned int src,
115
vector unsigned int srca,
150
116
vector unsigned int dest)
152
118
vector unsigned char tmp = (vector unsigned char)
153
pix_multiply (dest, negate (srca));
119
pix_multiply (dest, negate (srca));
154
121
tmp = vec_adds ((vector unsigned char)src, tmp);
155
122
return (vector unsigned int)tmp;
158
125
/* in == pix_multiply */
159
#define in_over(src, srca, mask, dest) over (pix_multiply (src, mask),\
160
pix_multiply (srca, mask), dest)
163
#define COMPUTE_SHIFT_MASK(source) \
126
#define in_over(src, srca, mask, dest) \
127
over (pix_multiply (src, mask), \
128
pix_multiply (srca, mask), dest)
131
#define COMPUTE_SHIFT_MASK(source) \
164
132
source ## _mask = vec_lvsl (0, source);
166
#define COMPUTE_SHIFT_MASKS(dest, source) \
167
dest ## _mask = vec_lvsl (0, dest); \
168
source ## _mask = vec_lvsl (0, source); \
134
#define COMPUTE_SHIFT_MASKS(dest, source) \
135
dest ## _mask = vec_lvsl (0, dest); \
136
source ## _mask = vec_lvsl (0, source); \
169
137
store_mask = vec_lvsr (0, dest);
171
#define COMPUTE_SHIFT_MASKC(dest, source, mask) \
172
mask ## _mask = vec_lvsl (0, mask); \
173
dest ## _mask = vec_lvsl (0, dest); \
174
source ## _mask = vec_lvsl (0, source); \
139
#define COMPUTE_SHIFT_MASKC(dest, source, mask) \
140
mask ## _mask = vec_lvsl (0, mask); \
141
dest ## _mask = vec_lvsl (0, dest); \
142
source ## _mask = vec_lvsl (0, source); \
175
143
store_mask = vec_lvsr (0, dest);
177
145
/* notice you have to declare temp vars...
178
146
* Note: tmp3 and tmp4 must remain untouched!
181
#define LOAD_VECTORS(dest, source) \
182
tmp1 = (typeof(tmp1))vec_ld(0, source); \
183
tmp2 = (typeof(tmp2))vec_ld(15, source); \
184
tmp3 = (typeof(tmp3))vec_ld(0, dest); \
185
v ## source = (typeof(v ## source)) \
186
vec_perm(tmp1, tmp2, source ## _mask); \
187
tmp4 = (typeof(tmp4))vec_ld(15, dest); \
188
v ## dest = (typeof(v ## dest)) \
189
vec_perm(tmp3, tmp4, dest ## _mask);
191
#define LOAD_VECTORSC(dest, source, mask) \
192
tmp1 = (typeof(tmp1))vec_ld(0, source); \
193
tmp2 = (typeof(tmp2))vec_ld(15, source); \
194
tmp3 = (typeof(tmp3))vec_ld(0, dest); \
195
v ## source = (typeof(v ## source)) \
196
vec_perm(tmp1, tmp2, source ## _mask); \
197
tmp4 = (typeof(tmp4))vec_ld(15, dest); \
198
tmp1 = (typeof(tmp1))vec_ld(0, mask); \
199
v ## dest = (typeof(v ## dest)) \
200
vec_perm(tmp3, tmp4, dest ## _mask); \
201
tmp2 = (typeof(tmp2))vec_ld(15, mask); \
202
v ## mask = (typeof(v ## mask)) \
203
vec_perm(tmp1, tmp2, mask ## _mask);
204
#define STORE_VECTOR(dest) \
205
edges = vec_perm (tmp4, tmp3, dest ## _mask); \
206
tmp3 = vec_perm ((vector unsigned char)v ## dest, edges, store_mask); \
207
tmp1 = vec_perm (edges, (vector unsigned char)v ## dest, store_mask); \
208
vec_st ((vector unsigned int) tmp3, 15, dest ); \
209
vec_st ((vector unsigned int) tmp1, 0, dest );
212
vmxCombineMaskU (uint32_t *src, const uint32_t *msk, int width)
215
vector unsigned int vsrc, vmsk;
216
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
217
src_mask, msk_mask, store_mask;
219
COMPUTE_SHIFT_MASKS(src, msk)
221
/* printf ("%s\n",__PRETTY_FUNCTION__); */
222
for (i = width/4; i > 0; i--) {
224
LOAD_VECTORS(src, msk)
226
vsrc = pix_multiply (vsrc, splat_alpha (vmsk));
234
for (i = width%4; --i >= 0;) {
235
uint32_t a = msk[i] >> 24;
243
vmxCombineOverU (uint32_t *dest, const uint32_t *src, int width)
246
vector unsigned int vdest, vsrc;
247
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
248
dest_mask, src_mask, store_mask;
250
COMPUTE_SHIFT_MASKS(dest, src)
252
/* printf ("%s\n",__PRETTY_FUNCTION__); */
253
for (i = width/4; i > 0; i--) {
255
LOAD_VECTORS(dest, src)
257
vdest = over (vsrc, splat_alpha (vsrc), vdest);
265
for (i = width%4; --i >=0;) {
267
uint32_t d = dest[i];
268
uint32_t ia = Alpha (~s);
270
FbByteMulAdd (d, ia, s);
277
vmxCombineOverReverseU (uint32_t *dest, const uint32_t *src, int width)
280
vector unsigned int vdest, vsrc;
281
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
282
dest_mask, src_mask, store_mask;
284
COMPUTE_SHIFT_MASKS(dest, src)
286
/* printf ("%s\n",__PRETTY_FUNCTION__); */
287
for (i = width/4; i > 0; i--) {
289
LOAD_VECTORS(dest, src)
291
vdest = over (vdest, splat_alpha (vdest) , vsrc);
299
for (i = width%4; --i >=0;) {
301
uint32_t d = dest[i];
302
uint32_t ia = Alpha (~dest[i]);
304
FbByteMulAdd (s, ia, d);
310
vmxCombineInU (uint32_t *dest, const uint32_t *src, int width)
313
vector unsigned int vdest, vsrc;
314
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
315
dest_mask, src_mask, store_mask;
317
COMPUTE_SHIFT_MASKS(dest, src)
319
/* printf ("%s\n",__PRETTY_FUNCTION__); */
320
for (i = width/4; i > 0; i--) {
322
LOAD_VECTORS(dest, src)
324
vdest = pix_multiply (vsrc, splat_alpha (vdest));
332
for (i = width%4; --i >=0;) {
335
uint32_t a = Alpha (dest[i]);
342
vmxCombineInReverseU (uint32_t *dest, const uint32_t *src, int width)
345
vector unsigned int vdest, vsrc;
346
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
347
dest_mask, src_mask, store_mask;
349
COMPUTE_SHIFT_MASKS(dest, src)
351
/* printf ("%s\n",__PRETTY_FUNCTION__); */
352
for (i = width/4; i > 0; i--) {
354
LOAD_VECTORS(dest, src)
356
vdest = pix_multiply (vdest, splat_alpha (vsrc));
364
for (i = width%4; --i >=0;) {
365
uint32_t d = dest[i];
366
uint32_t a = Alpha (src[i]);
373
vmxCombineOutU (uint32_t *dest, const uint32_t *src, int width)
376
vector unsigned int vdest, vsrc;
377
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
378
dest_mask, src_mask, store_mask;
380
COMPUTE_SHIFT_MASKS(dest, src)
382
/* printf ("%s\n",__PRETTY_FUNCTION__); */
383
for (i = width/4; i > 0; i--) {
385
LOAD_VECTORS(dest, src)
387
vdest = pix_multiply (vsrc, splat_alpha (negate (vdest)));
395
for (i = width%4; --i >=0;) {
397
uint32_t a = Alpha (~dest[i]);
404
vmxCombineOutReverseU (uint32_t *dest, const uint32_t *src, int width)
407
vector unsigned int vdest, vsrc;
408
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
409
dest_mask, src_mask, store_mask;
411
COMPUTE_SHIFT_MASKS(dest, src)
413
/* printf ("%s\n",__PRETTY_FUNCTION__); */
414
for (i = width/4; i > 0; i--) {
416
LOAD_VECTORS(dest, src)
418
vdest = pix_multiply (vdest, splat_alpha (negate (vsrc)));
426
for (i = width%4; --i >=0;) {
427
uint32_t d = dest[i];
428
uint32_t a = Alpha (~src[i]);
435
vmxCombineAtopU (uint32_t *dest, const uint32_t *src, int width)
438
vector unsigned int vdest, vsrc;
439
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
440
dest_mask, src_mask, store_mask;
442
COMPUTE_SHIFT_MASKS(dest, src)
444
/* printf ("%s\n",__PRETTY_FUNCTION__); */
445
for (i = width/4; i > 0; i--) {
447
LOAD_VECTORS(dest, src)
449
vdest = pix_add_mul (vsrc, splat_alpha (vdest),
450
vdest, splat_alpha (negate (vsrc)));
458
for (i = width%4; --i >=0;) {
460
uint32_t d = dest[i];
461
uint32_t dest_a = Alpha (d);
462
uint32_t src_ia = Alpha (~s);
464
FbByteAddMul (s, dest_a, d, src_ia);
470
vmxCombineAtopReverseU (uint32_t *dest, const uint32_t *src, int width)
473
vector unsigned int vdest, vsrc;
474
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
475
dest_mask, src_mask, store_mask;
477
COMPUTE_SHIFT_MASKS(dest, src)
479
/* printf ("%s\n",__PRETTY_FUNCTION__); */
480
for (i = width/4; i > 0; i--) {
482
LOAD_VECTORS(dest, src)
484
vdest = pix_add_mul (vdest, splat_alpha (vsrc),
485
vsrc, splat_alpha (negate (vdest)));
493
for (i = width%4; --i >=0;) {
495
uint32_t d = dest[i];
496
uint32_t src_a = Alpha (s);
497
uint32_t dest_ia = Alpha (~d);
499
FbByteAddMul (s, dest_ia, d, src_a);
505
vmxCombineXorU (uint32_t *dest, const uint32_t *src, int width)
508
vector unsigned int vdest, vsrc;
509
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
510
dest_mask, src_mask, store_mask;
512
COMPUTE_SHIFT_MASKS(dest, src)
514
/* printf ("%s\n",__PRETTY_FUNCTION__); */
515
for (i = width/4; i > 0; i--) {
517
LOAD_VECTORS (dest, src)
519
vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)),
520
vdest, splat_alpha (negate (vsrc)));
528
for (i = width%4; --i >=0;) {
530
uint32_t d = dest[i];
531
uint32_t src_ia = Alpha (~s);
532
uint32_t dest_ia = Alpha (~d);
534
FbByteAddMul (s, dest_ia, d, src_ia);
540
vmxCombineAddU (uint32_t *dest, const uint32_t *src, int width)
543
vector unsigned int vdest, vsrc;
544
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
545
dest_mask, src_mask, store_mask;
547
COMPUTE_SHIFT_MASKS(dest, src)
548
/* printf ("%s\n",__PRETTY_FUNCTION__); */
549
for (i = width/4; i > 0; i--) {
551
LOAD_VECTORS(dest, src)
553
vdest = pix_add (vsrc, vdest);
561
for (i = width%4; --i >=0;) {
563
uint32_t d = dest[i];
570
vmxCombineSrcC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
573
vector unsigned int vdest, vsrc, vmask;
574
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
575
dest_mask, mask_mask, src_mask, store_mask;
577
COMPUTE_SHIFT_MASKC(dest, src, mask);
578
/* printf ("%s\n",__PRETTY_FUNCTION__); */
579
for (i = width/4; i > 0; i--) {
581
LOAD_VECTORSC(dest, src, mask)
583
vdest = pix_multiply (vsrc, vmask);
592
for (i = width%4; --i >=0;) {
593
uint32_t a = mask[i];
601
vmxCombineOverC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
604
vector unsigned int vdest, vsrc, vmask;
605
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
606
dest_mask, mask_mask, src_mask, store_mask;
608
COMPUTE_SHIFT_MASKC(dest, src, mask);
609
/* printf ("%s\n",__PRETTY_FUNCTION__); */
610
for (i = width/4; i > 0; i--) {
612
LOAD_VECTORSC(dest, src, mask)
614
vdest = in_over (vsrc, splat_alpha (vsrc), vmask, vdest);
623
for (i = width%4; --i >=0;) {
624
uint32_t a = mask[i];
626
uint32_t d = dest[i];
628
FbByteMulAddC (d, ~a, s);
634
vmxCombineOverReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
637
vector unsigned int vdest, vsrc, vmask;
638
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
639
dest_mask, mask_mask, src_mask, store_mask;
641
COMPUTE_SHIFT_MASKC(dest, src, mask);
149
#define LOAD_VECTORS(dest, source) \
150
tmp1 = (typeof(tmp1))vec_ld (0, source); \
151
tmp2 = (typeof(tmp2))vec_ld (15, source); \
152
tmp3 = (typeof(tmp3))vec_ld (0, dest); \
153
v ## source = (typeof(v ## source)) \
154
vec_perm (tmp1, tmp2, source ## _mask); \
155
tmp4 = (typeof(tmp4))vec_ld (15, dest); \
156
v ## dest = (typeof(v ## dest)) \
157
vec_perm (tmp3, tmp4, dest ## _mask);
159
#define LOAD_VECTORSC(dest, source, mask) \
160
tmp1 = (typeof(tmp1))vec_ld (0, source); \
161
tmp2 = (typeof(tmp2))vec_ld (15, source); \
162
tmp3 = (typeof(tmp3))vec_ld (0, dest); \
163
v ## source = (typeof(v ## source)) \
164
vec_perm (tmp1, tmp2, source ## _mask); \
165
tmp4 = (typeof(tmp4))vec_ld (15, dest); \
166
tmp1 = (typeof(tmp1))vec_ld (0, mask); \
167
v ## dest = (typeof(v ## dest)) \
168
vec_perm (tmp3, tmp4, dest ## _mask); \
169
tmp2 = (typeof(tmp2))vec_ld (15, mask); \
170
v ## mask = (typeof(v ## mask)) \
171
vec_perm (tmp1, tmp2, mask ## _mask);
173
#define LOAD_VECTORSM(dest, source, mask) \
174
LOAD_VECTORSC (dest, source, mask) \
175
v ## source = pix_multiply (v ## source, \
176
splat_alpha (v ## mask));
178
#define STORE_VECTOR(dest) \
179
edges = vec_perm (tmp4, tmp3, dest ## _mask); \
180
tmp3 = vec_perm ((vector unsigned char)v ## dest, edges, store_mask); \
181
tmp1 = vec_perm (edges, (vector unsigned char)v ## dest, store_mask); \
182
vec_st ((vector unsigned int) tmp3, 15, dest); \
183
vec_st ((vector unsigned int) tmp1, 0, dest);
186
vmx_combine_over_u_no_mask (uint32_t * dest,
191
vector unsigned int vdest, vsrc;
192
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
193
dest_mask, src_mask, store_mask;
195
COMPUTE_SHIFT_MASKS (dest, src);
197
/* printf ("%s\n",__PRETTY_FUNCTION__); */
198
for (i = width / 4; i > 0; i--)
201
LOAD_VECTORS (dest, src);
203
vdest = over (vsrc, splat_alpha (vsrc), vdest);
211
for (i = width % 4; --i >= 0;)
214
uint32_t d = dest[i];
215
uint32_t ia = ALPHA_8 (~s);
217
UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
224
vmx_combine_over_u_mask (uint32_t * dest,
226
const uint32_t *mask,
230
vector unsigned int vdest, vsrc, vmask;
231
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
232
dest_mask, src_mask, mask_mask, store_mask;
234
COMPUTE_SHIFT_MASKC (dest, src, mask);
236
/* printf ("%s\n",__PRETTY_FUNCTION__); */
237
for (i = width / 4; i > 0; i--)
239
LOAD_VECTORSM (dest, src, mask);
241
vdest = over (vsrc, splat_alpha (vsrc), vdest);
250
for (i = width % 4; --i >= 0;)
252
uint32_t m = ALPHA_8 (mask[i]);
254
uint32_t d = dest[i];
257
UN8x4_MUL_UN8 (s, m);
261
UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
267
vmx_combine_over_u (pixman_implementation_t *imp,
270
const uint32_t * src,
271
const uint32_t * mask,
275
vmx_combine_over_u_mask (dest, src, mask, width);
277
vmx_combine_over_u_no_mask (dest, src, width);
281
vmx_combine_over_reverse_u_no_mask (uint32_t * dest,
286
vector unsigned int vdest, vsrc;
287
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
288
dest_mask, src_mask, store_mask;
290
COMPUTE_SHIFT_MASKS (dest, src);
292
/* printf ("%s\n",__PRETTY_FUNCTION__); */
293
for (i = width / 4; i > 0; i--)
296
LOAD_VECTORS (dest, src);
298
vdest = over (vdest, splat_alpha (vdest), vsrc);
306
for (i = width % 4; --i >= 0;)
309
uint32_t d = dest[i];
310
uint32_t ia = ALPHA_8 (~dest[i]);
312
UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d);
318
vmx_combine_over_reverse_u_mask (uint32_t * dest,
320
const uint32_t *mask,
324
vector unsigned int vdest, vsrc, vmask;
325
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
326
dest_mask, src_mask, mask_mask, store_mask;
328
COMPUTE_SHIFT_MASKC (dest, src, mask);
330
/* printf ("%s\n",__PRETTY_FUNCTION__); */
331
for (i = width / 4; i > 0; i--)
334
LOAD_VECTORSM (dest, src, mask);
336
vdest = over (vdest, splat_alpha (vdest), vsrc);
345
for (i = width % 4; --i >= 0;)
347
uint32_t m = ALPHA_8 (mask[i]);
349
uint32_t d = dest[i];
350
uint32_t ia = ALPHA_8 (~dest[i]);
352
UN8x4_MUL_UN8 (s, m);
354
UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d);
360
vmx_combine_over_reverse_u (pixman_implementation_t *imp,
363
const uint32_t * src,
364
const uint32_t * mask,
368
vmx_combine_over_reverse_u_mask (dest, src, mask, width);
370
vmx_combine_over_reverse_u_no_mask (dest, src, width);
374
vmx_combine_in_u_no_mask (uint32_t * dest,
379
vector unsigned int vdest, vsrc;
380
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
381
dest_mask, src_mask, store_mask;
383
COMPUTE_SHIFT_MASKS (dest, src);
385
/* printf ("%s\n",__PRETTY_FUNCTION__); */
386
for (i = width / 4; i > 0; i--)
388
LOAD_VECTORS (dest, src);
390
vdest = pix_multiply (vsrc, splat_alpha (vdest));
398
for (i = width % 4; --i >= 0;)
401
uint32_t a = ALPHA_8 (dest[i]);
403
UN8x4_MUL_UN8 (s, a);
409
vmx_combine_in_u_mask (uint32_t * dest,
411
const uint32_t *mask,
415
vector unsigned int vdest, vsrc, vmask;
416
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
417
dest_mask, src_mask, mask_mask, store_mask;
419
COMPUTE_SHIFT_MASKC (dest, src, mask);
421
/* printf ("%s\n",__PRETTY_FUNCTION__); */
422
for (i = width / 4; i > 0; i--)
424
LOAD_VECTORSM (dest, src, mask);
426
vdest = pix_multiply (vsrc, splat_alpha (vdest));
435
for (i = width % 4; --i >= 0;)
437
uint32_t m = ALPHA_8 (mask[i]);
439
uint32_t a = ALPHA_8 (dest[i]);
441
UN8x4_MUL_UN8 (s, m);
442
UN8x4_MUL_UN8 (s, a);
449
vmx_combine_in_u (pixman_implementation_t *imp,
452
const uint32_t * src,
453
const uint32_t * mask,
457
vmx_combine_in_u_mask (dest, src, mask, width);
459
vmx_combine_in_u_no_mask (dest, src, width);
463
vmx_combine_in_reverse_u_no_mask (uint32_t * dest,
468
vector unsigned int vdest, vsrc;
469
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
470
dest_mask, src_mask, store_mask;
472
COMPUTE_SHIFT_MASKS (dest, src);
474
/* printf ("%s\n",__PRETTY_FUNCTION__); */
475
for (i = width / 4; i > 0; i--)
477
LOAD_VECTORS (dest, src);
479
vdest = pix_multiply (vdest, splat_alpha (vsrc));
487
for (i = width % 4; --i >= 0;)
489
uint32_t d = dest[i];
490
uint32_t a = ALPHA_8 (src[i]);
492
UN8x4_MUL_UN8 (d, a);
499
vmx_combine_in_reverse_u_mask (uint32_t * dest,
501
const uint32_t *mask,
505
vector unsigned int vdest, vsrc, vmask;
506
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
507
dest_mask, src_mask, mask_mask, store_mask;
509
COMPUTE_SHIFT_MASKC (dest, src, mask);
511
/* printf ("%s\n",__PRETTY_FUNCTION__); */
512
for (i = width / 4; i > 0; i--)
514
LOAD_VECTORSM (dest, src, mask);
516
vdest = pix_multiply (vdest, splat_alpha (vsrc));
525
for (i = width % 4; --i >= 0;)
527
uint32_t m = ALPHA_8 (mask[i]);
528
uint32_t d = dest[i];
531
UN8x4_MUL_UN8 (a, m);
533
UN8x4_MUL_UN8 (d, a);
540
vmx_combine_in_reverse_u (pixman_implementation_t *imp,
543
const uint32_t * src,
544
const uint32_t * mask,
548
vmx_combine_in_reverse_u_mask (dest, src, mask, width);
550
vmx_combine_in_reverse_u_no_mask (dest, src, width);
554
vmx_combine_out_u_no_mask (uint32_t * dest,
559
vector unsigned int vdest, vsrc;
560
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
561
dest_mask, src_mask, store_mask;
563
COMPUTE_SHIFT_MASKS (dest, src);
565
/* printf ("%s\n",__PRETTY_FUNCTION__); */
566
for (i = width / 4; i > 0; i--)
568
LOAD_VECTORS (dest, src);
570
vdest = pix_multiply (vsrc, splat_alpha (negate (vdest)));
578
for (i = width % 4; --i >= 0;)
581
uint32_t a = ALPHA_8 (~dest[i]);
583
UN8x4_MUL_UN8 (s, a);
590
vmx_combine_out_u_mask (uint32_t * dest,
592
const uint32_t *mask,
596
vector unsigned int vdest, vsrc, vmask;
597
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
598
dest_mask, src_mask, mask_mask, store_mask;
600
COMPUTE_SHIFT_MASKC (dest, src, mask);
602
/* printf ("%s\n",__PRETTY_FUNCTION__); */
603
for (i = width / 4; i > 0; i--)
605
LOAD_VECTORSM (dest, src, mask);
607
vdest = pix_multiply (vsrc, splat_alpha (negate (vdest)));
616
for (i = width % 4; --i >= 0;)
618
uint32_t m = ALPHA_8 (mask[i]);
620
uint32_t a = ALPHA_8 (~dest[i]);
622
UN8x4_MUL_UN8 (s, m);
623
UN8x4_MUL_UN8 (s, a);
630
vmx_combine_out_u (pixman_implementation_t *imp,
633
const uint32_t * src,
634
const uint32_t * mask,
638
vmx_combine_out_u_mask (dest, src, mask, width);
640
vmx_combine_out_u_no_mask (dest, src, width);
644
vmx_combine_out_reverse_u_no_mask (uint32_t * dest,
649
vector unsigned int vdest, vsrc;
650
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
651
dest_mask, src_mask, store_mask;
653
COMPUTE_SHIFT_MASKS (dest, src);
655
/* printf ("%s\n",__PRETTY_FUNCTION__); */
656
for (i = width / 4; i > 0; i--)
659
LOAD_VECTORS (dest, src);
661
vdest = pix_multiply (vdest, splat_alpha (negate (vsrc)));
669
for (i = width % 4; --i >= 0;)
671
uint32_t d = dest[i];
672
uint32_t a = ALPHA_8 (~src[i]);
674
UN8x4_MUL_UN8 (d, a);
681
vmx_combine_out_reverse_u_mask (uint32_t * dest,
683
const uint32_t *mask,
687
vector unsigned int vdest, vsrc, vmask;
688
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
689
dest_mask, src_mask, mask_mask, store_mask;
691
COMPUTE_SHIFT_MASKC (dest, src, mask);
693
/* printf ("%s\n",__PRETTY_FUNCTION__); */
694
for (i = width / 4; i > 0; i--)
696
LOAD_VECTORSM (dest, src, mask);
698
vdest = pix_multiply (vdest, splat_alpha (negate (vsrc)));
707
for (i = width % 4; --i >= 0;)
709
uint32_t m = ALPHA_8 (mask[i]);
710
uint32_t d = dest[i];
713
UN8x4_MUL_UN8 (a, m);
715
UN8x4_MUL_UN8 (d, a);
722
vmx_combine_out_reverse_u (pixman_implementation_t *imp,
725
const uint32_t * src,
726
const uint32_t * mask,
730
vmx_combine_out_reverse_u_mask (dest, src, mask, width);
732
vmx_combine_out_reverse_u_no_mask (dest, src, width);
736
vmx_combine_atop_u_no_mask (uint32_t * dest,
741
vector unsigned int vdest, vsrc;
742
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
743
dest_mask, src_mask, store_mask;
745
COMPUTE_SHIFT_MASKS (dest, src);
747
/* printf ("%s\n",__PRETTY_FUNCTION__); */
748
for (i = width / 4; i > 0; i--)
750
LOAD_VECTORS (dest, src);
752
vdest = pix_add_mul (vsrc, splat_alpha (vdest),
753
vdest, splat_alpha (negate (vsrc)));
761
for (i = width % 4; --i >= 0;)
764
uint32_t d = dest[i];
765
uint32_t dest_a = ALPHA_8 (d);
766
uint32_t src_ia = ALPHA_8 (~s);
768
UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia);
775
vmx_combine_atop_u_mask (uint32_t * dest,
777
const uint32_t *mask,
781
vector unsigned int vdest, vsrc, vmask;
782
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
783
dest_mask, src_mask, mask_mask, store_mask;
785
COMPUTE_SHIFT_MASKC (dest, src, mask);
787
/* printf ("%s\n",__PRETTY_FUNCTION__); */
788
for (i = width / 4; i > 0; i--)
790
LOAD_VECTORSM (dest, src, mask);
792
vdest = pix_add_mul (vsrc, splat_alpha (vdest),
793
vdest, splat_alpha (negate (vsrc)));
802
for (i = width % 4; --i >= 0;)
804
uint32_t m = ALPHA_8 (mask[i]);
806
uint32_t d = dest[i];
807
uint32_t dest_a = ALPHA_8 (d);
810
UN8x4_MUL_UN8 (s, m);
812
src_ia = ALPHA_8 (~s);
814
UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia);
821
vmx_combine_atop_u (pixman_implementation_t *imp,
824
const uint32_t * src,
825
const uint32_t * mask,
829
vmx_combine_atop_u_mask (dest, src, mask, width);
831
vmx_combine_atop_u_no_mask (dest, src, width);
835
vmx_combine_atop_reverse_u_no_mask (uint32_t * dest,
840
vector unsigned int vdest, vsrc;
841
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
842
dest_mask, src_mask, store_mask;
844
COMPUTE_SHIFT_MASKS (dest, src);
846
/* printf ("%s\n",__PRETTY_FUNCTION__); */
847
for (i = width / 4; i > 0; i--)
849
LOAD_VECTORS (dest, src);
851
vdest = pix_add_mul (vdest, splat_alpha (vsrc),
852
vsrc, splat_alpha (negate (vdest)));
860
for (i = width % 4; --i >= 0;)
863
uint32_t d = dest[i];
864
uint32_t src_a = ALPHA_8 (s);
865
uint32_t dest_ia = ALPHA_8 (~d);
867
UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a);
874
vmx_combine_atop_reverse_u_mask (uint32_t * dest,
876
const uint32_t *mask,
880
vector unsigned int vdest, vsrc, vmask;
881
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
882
dest_mask, src_mask, mask_mask, store_mask;
884
COMPUTE_SHIFT_MASKC (dest, src, mask);
886
/* printf ("%s\n",__PRETTY_FUNCTION__); */
887
for (i = width / 4; i > 0; i--)
889
LOAD_VECTORSM (dest, src, mask);
891
vdest = pix_add_mul (vdest, splat_alpha (vsrc),
892
vsrc, splat_alpha (negate (vdest)));
901
for (i = width % 4; --i >= 0;)
903
uint32_t m = ALPHA_8 (mask[i]);
905
uint32_t d = dest[i];
907
uint32_t dest_ia = ALPHA_8 (~d);
909
UN8x4_MUL_UN8 (s, m);
913
UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a);
920
vmx_combine_atop_reverse_u (pixman_implementation_t *imp,
923
const uint32_t * src,
924
const uint32_t * mask,
928
vmx_combine_atop_reverse_u_mask (dest, src, mask, width);
930
vmx_combine_atop_reverse_u_no_mask (dest, src, width);
934
vmx_combine_xor_u_no_mask (uint32_t * dest,
939
vector unsigned int vdest, vsrc;
940
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
941
dest_mask, src_mask, store_mask;
943
COMPUTE_SHIFT_MASKS (dest, src);
945
/* printf ("%s\n",__PRETTY_FUNCTION__); */
946
for (i = width / 4; i > 0; i--)
948
LOAD_VECTORS (dest, src);
950
vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)),
951
vdest, splat_alpha (negate (vsrc)));
959
for (i = width % 4; --i >= 0;)
962
uint32_t d = dest[i];
963
uint32_t src_ia = ALPHA_8 (~s);
964
uint32_t dest_ia = ALPHA_8 (~d);
966
UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia);
973
vmx_combine_xor_u_mask (uint32_t * dest,
975
const uint32_t *mask,
979
vector unsigned int vdest, vsrc, vmask;
980
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
981
dest_mask, src_mask, mask_mask, store_mask;
983
COMPUTE_SHIFT_MASKC (dest, src, mask);
985
/* printf ("%s\n",__PRETTY_FUNCTION__); */
986
for (i = width / 4; i > 0; i--)
988
LOAD_VECTORSM (dest, src, mask);
990
vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)),
991
vdest, splat_alpha (negate (vsrc)));
1000
for (i = width % 4; --i >= 0;)
1002
uint32_t m = ALPHA_8 (mask[i]);
1003
uint32_t s = src[i];
1004
uint32_t d = dest[i];
1006
uint32_t dest_ia = ALPHA_8 (~d);
1008
UN8x4_MUL_UN8 (s, m);
1010
src_ia = ALPHA_8 (~s);
1012
UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia);
1019
vmx_combine_xor_u (pixman_implementation_t *imp,
1022
const uint32_t * src,
1023
const uint32_t * mask,
1027
vmx_combine_xor_u_mask (dest, src, mask, width);
1029
vmx_combine_xor_u_no_mask (dest, src, width);
1033
vmx_combine_add_u_no_mask (uint32_t * dest,
1034
const uint32_t *src,
1038
vector unsigned int vdest, vsrc;
1039
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
1040
dest_mask, src_mask, store_mask;
1042
COMPUTE_SHIFT_MASKS (dest, src);
1043
/* printf ("%s\n",__PRETTY_FUNCTION__); */
1044
for (i = width / 4; i > 0; i--)
1046
LOAD_VECTORS (dest, src);
1048
vdest = pix_add (vsrc, vdest);
1050
STORE_VECTOR (dest);
1056
for (i = width % 4; --i >= 0;)
1058
uint32_t s = src[i];
1059
uint32_t d = dest[i];
1061
UN8x4_ADD_UN8x4 (d, s);
1068
vmx_combine_add_u_mask (uint32_t * dest,
1069
const uint32_t *src,
1070
const uint32_t *mask,
1074
vector unsigned int vdest, vsrc, vmask;
1075
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
1076
dest_mask, src_mask, mask_mask, store_mask;
1078
COMPUTE_SHIFT_MASKC (dest, src, mask);
1080
/* printf ("%s\n",__PRETTY_FUNCTION__); */
1081
for (i = width / 4; i > 0; i--)
1083
LOAD_VECTORSM (dest, src, mask);
1085
vdest = pix_add (vsrc, vdest);
1087
STORE_VECTOR (dest);
1094
for (i = width % 4; --i >= 0;)
1096
uint32_t m = ALPHA_8 (mask[i]);
1097
uint32_t s = src[i];
1098
uint32_t d = dest[i];
1100
UN8x4_MUL_UN8 (s, m);
1101
UN8x4_ADD_UN8x4 (d, s);
1108
vmx_combine_add_u (pixman_implementation_t *imp,
1111
const uint32_t * src,
1112
const uint32_t * mask,
1116
vmx_combine_add_u_mask (dest, src, mask, width);
1118
vmx_combine_add_u_no_mask (dest, src, width);
1122
vmx_combine_src_ca (pixman_implementation_t *imp,
1125
const uint32_t * src,
1126
const uint32_t * mask,
1130
vector unsigned int vdest, vsrc, vmask;
1131
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
1132
dest_mask, mask_mask, src_mask, store_mask;
1134
COMPUTE_SHIFT_MASKC (dest, src, mask);
1136
/* printf ("%s\n",__PRETTY_FUNCTION__); */
1137
for (i = width / 4; i > 0; i--)
1139
LOAD_VECTORSC (dest, src, mask);
1141
vdest = pix_multiply (vsrc, vmask);
1143
STORE_VECTOR (dest);
1150
for (i = width % 4; --i >= 0;)
1152
uint32_t a = mask[i];
1153
uint32_t s = src[i];
1155
UN8x4_MUL_UN8x4 (s, a);
1162
vmx_combine_over_ca (pixman_implementation_t *imp,
1165
const uint32_t * src,
1166
const uint32_t * mask,
1170
vector unsigned int vdest, vsrc, vmask;
1171
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
1172
dest_mask, mask_mask, src_mask, store_mask;
1174
COMPUTE_SHIFT_MASKC (dest, src, mask);
1176
/* printf ("%s\n",__PRETTY_FUNCTION__); */
1177
for (i = width / 4; i > 0; i--)
1179
LOAD_VECTORSC (dest, src, mask);
1181
vdest = in_over (vsrc, splat_alpha (vsrc), vmask, vdest);
1183
STORE_VECTOR (dest);
1190
for (i = width % 4; --i >= 0;)
1192
uint32_t a = mask[i];
1193
uint32_t s = src[i];
1194
uint32_t d = dest[i];
1195
uint32_t sa = ALPHA_8 (s);
1197
UN8x4_MUL_UN8x4 (s, a);
1198
UN8x4_MUL_UN8 (a, sa);
1199
UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ~a, s);
1206
vmx_combine_over_reverse_ca (pixman_implementation_t *imp,
1209
const uint32_t * src,
1210
const uint32_t * mask,
1214
vector unsigned int vdest, vsrc, vmask;
1215
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
1216
dest_mask, mask_mask, src_mask, store_mask;
1218
COMPUTE_SHIFT_MASKC (dest, src, mask);
642
1220
/* printf("%s\n",__PRETTY_FUNCTION__); */
643
for (i = width/4; i > 0; i--) {
645
LOAD_VECTORSC (dest, src, mask)
647
vdest = over (vdest, splat_alpha (vdest), pix_multiply (vsrc, vmask));
656
for (i = width%4; --i >=0;) {
657
uint32_t a = mask[i];
659
uint32_t d = dest[i];
660
uint32_t da = Alpha (d);
662
FbByteMulAddC (s, ~da, d);
668
vmxCombineInC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
671
vector unsigned int vdest, vsrc, vmask;
672
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
673
dest_mask, mask_mask, src_mask, store_mask;
675
COMPUTE_SHIFT_MASKC(dest, src, mask)
677
/* printf ("%s\n",__PRETTY_FUNCTION__); */
678
for (i = width/4; i > 0; i--) {
680
LOAD_VECTORSC(dest, src, mask)
682
vdest = pix_multiply (pix_multiply (vsrc, vmask), splat_alpha (vdest));
691
for (i = width%4; --i >=0;) {
692
uint32_t a = mask[i];
694
uint32_t da = Alpha (dest[i]);
702
vmxCombineInReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
705
vector unsigned int vdest, vsrc, vmask;
706
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
707
dest_mask, mask_mask, src_mask, store_mask;
709
COMPUTE_SHIFT_MASKC(dest, src, mask)
711
/* printf ("%s\n",__PRETTY_FUNCTION__); */
712
for (i = width/4; i > 0; i--) {
714
LOAD_VECTORSC(dest, src, mask)
716
vdest = pix_multiply (vdest, pix_multiply (vmask, splat_alpha (vsrc)));
725
for (i = width%4; --i >=0;) {
726
uint32_t a = mask[i];
727
uint32_t d = dest[i];
728
uint32_t sa = Alpha (src[i]);
736
vmxCombineOutC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
739
vector unsigned int vdest, vsrc, vmask;
740
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
741
dest_mask, mask_mask, src_mask, store_mask;
743
COMPUTE_SHIFT_MASKC(dest, src, mask)
745
/* printf ("%s\n",__PRETTY_FUNCTION__); */
746
for (i = width/4; i > 0; i--) {
748
LOAD_VECTORSC(dest, src, mask)
750
vdest = pix_multiply (pix_multiply (vsrc, vmask), splat_alpha (vdest));
759
for (i = width%4; --i >=0;) {
760
uint32_t a = mask[i];
762
uint32_t d = dest[i];
763
uint32_t da = Alpha (~d);
771
vmxCombineOutReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
774
vector unsigned int vdest, vsrc, vmask;
775
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
776
dest_mask, mask_mask, src_mask, store_mask;
778
COMPUTE_SHIFT_MASKC(dest, src, mask)
780
/* printf ("%s\n",__PRETTY_FUNCTION__); */
781
for (i = width/4; i > 0; i--) {
783
LOAD_VECTORSC(dest, src, mask)
785
vdest = pix_multiply (vdest,
786
negate (pix_multiply (vmask, splat_alpha (vsrc))));
795
for (i = width%4; --i >=0;) {
796
uint32_t a = mask[i];
798
uint32_t d = dest[i];
799
uint32_t sa = Alpha (s);
807
vmxCombineAtopC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
810
vector unsigned int vdest, vsrc, vmask;
811
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
812
dest_mask, mask_mask, src_mask, store_mask;
814
COMPUTE_SHIFT_MASKC(dest, src, mask)
816
/* printf ("%s\n",__PRETTY_FUNCTION__); */
817
for (i = width/4; i > 0; i--) {
819
LOAD_VECTORSC(dest, src, mask)
821
vdest = pix_add_mul (pix_multiply (vsrc, vmask), splat_alpha (vdest),
823
negate (pix_multiply (vmask,
824
splat_alpha (vmask))));
833
for (i = width%4; --i >=0;) {
834
uint32_t a = mask[i];
836
uint32_t d = dest[i];
837
uint32_t sa = Alpha (s);
838
uint32_t da = Alpha (d);
842
FbByteAddMulC (d, ~a, s, da);
848
vmxCombineAtopReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
851
vector unsigned int vdest, vsrc, vmask;
852
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
853
dest_mask, mask_mask, src_mask, store_mask;
855
COMPUTE_SHIFT_MASKC(dest, src, mask)
857
/* printf ("%s\n",__PRETTY_FUNCTION__); */
858
for (i = width/4; i > 0; i--) {
860
LOAD_VECTORSC(dest, src, mask)
862
vdest = pix_add_mul (vdest,
863
pix_multiply (vmask, splat_alpha (vsrc)),
864
pix_multiply (vsrc, vmask),
865
negate (splat_alpha (vdest)));
874
for (i = width%4; --i >=0;) {
875
uint32_t a = mask[i];
877
uint32_t d = dest[i];
878
uint32_t sa = Alpha (s);
879
uint32_t da = Alpha (d);
883
FbByteAddMulC (d, a, s, ~da);
889
vmxCombineXorC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
892
vector unsigned int vdest, vsrc, vmask;
893
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
894
dest_mask, mask_mask, src_mask, store_mask;
896
COMPUTE_SHIFT_MASKC(dest, src, mask)
898
/* printf ("%s\n",__PRETTY_FUNCTION__); */
899
for (i = width/4; i > 0; i--) {
901
LOAD_VECTORSC(dest, src, mask)
903
vdest = pix_add_mul (vdest,
904
negate (pix_multiply (vmask, splat_alpha (vsrc))),
905
pix_multiply (vsrc, vmask),
906
negate (splat_alpha (vdest)));
915
for (i = width%4; --i >=0;) {
916
uint32_t a = mask[i];
918
uint32_t d = dest[i];
919
uint32_t sa = Alpha (s);
920
uint32_t da = Alpha (d);
924
FbByteAddMulC (d, ~a, s, ~da);
930
vmxCombineAddC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
933
vector unsigned int vdest, vsrc, vmask;
934
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
935
dest_mask, mask_mask, src_mask, store_mask;
937
COMPUTE_SHIFT_MASKC(dest, src, mask)
939
/* printf ("%s\n",__PRETTY_FUNCTION__); */
940
for (i = width/4; i > 0; i--) {
942
LOAD_VECTORSC(dest, src, mask)
944
vdest = pix_add (pix_multiply (vsrc, vmask), vdest);
953
for (i = width%4; --i >=0;) {
954
uint32_t a = mask[i];
956
uint32_t d = dest[i];
967
fbCompositeSolid_nx8888vmx (pixman_operator_t op,
968
pixman_image_t * pSrc,
969
pixman_image_t * pMask,
970
pixman_image_t * pDst,
981
uint32_t *dstLine, *dst;
984
fbComposeGetSolid (pSrc, pDst, src);
989
fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
994
dstLine += dstStride;
995
/* XXX vmxCombineOverU (dst, src, width); */
1000
fbCompositeSolid_nx0565vmx (pixman_operator_t op,
1001
pixman_image_t * pSrc,
1002
pixman_image_t * pMask,
1003
pixman_image_t * pDst,
1014
uint16_t *dstLine, *dst;
1018
fbComposeGetSolid (pSrc, pDst, src);
1023
fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
1028
dstLine += dstStride;
1029
vmxCombineOverU565(dst, src, width);
1035
void fbComposeSetupVMX (void)
1037
/* check if we have VMX support and initialize accordingly */
1038
if (pixman_have_vmx ()) {
1039
pixman_composeFunctions.combineU[PIXMAN_OP_OVER] = vmxCombineOverU;
1040
pixman_composeFunctions.combineU[PIXMAN_OP_OVER_REVERSE] = vmxCombineOverReverseU;
1041
pixman_composeFunctions.combineU[PIXMAN_OP_IN] = vmxCombineInU;
1042
pixman_composeFunctions.combineU[PIXMAN_OP_IN_REVERSE] = vmxCombineInReverseU;
1043
pixman_composeFunctions.combineU[PIXMAN_OP_OUT] = vmxCombineOutU;
1044
pixman_composeFunctions.combineU[PIXMAN_OP_OUT_REVERSE] = vmxCombineOutReverseU;
1045
pixman_composeFunctions.combineU[PIXMAN_OP_ATOP] = vmxCombineAtopU;
1046
pixman_composeFunctions.combineU[PIXMAN_OP_ATOP_REVERSE] = vmxCombineAtopReverseU;
1047
pixman_composeFunctions.combineU[PIXMAN_OP_XOR] = vmxCombineXorU;
1048
pixman_composeFunctions.combineU[PIXMAN_OP_ADD] = vmxCombineAddU;
1050
pixman_composeFunctions.combineC[PIXMAN_OP_SRC] = vmxCombineSrcC;
1051
pixman_composeFunctions.combineC[PIXMAN_OP_OVER] = vmxCombineOverC;
1052
pixman_composeFunctions.combineC[PIXMAN_OP_OVER_REVERSE] = vmxCombineOverReverseC;
1053
pixman_composeFunctions.combineC[PIXMAN_OP_IN] = vmxCombineInC;
1054
pixman_composeFunctions.combineC[PIXMAN_OP_IN_REVERSE] = vmxCombineInReverseC;
1055
pixman_composeFunctions.combineC[PIXMAN_OP_OUT] = vmxCombineOutC;
1056
pixman_composeFunctions.combineC[PIXMAN_OP_OUT_REVERSE] = vmxCombineOutReverseC;
1057
pixman_composeFunctions.combineC[PIXMAN_OP_ATOP] = vmxCombineAtopC;
1058
pixman_composeFunctions.combineC[PIXMAN_OP_ATOP_REVERSE] = vmxCombineAtopReverseC;
1059
pixman_composeFunctions.combineC[PIXMAN_OP_XOR] = vmxCombineXorC;
1060
pixman_composeFunctions.combineC[PIXMAN_OP_ADD] = vmxCombineAddC;
1062
pixman_composeFunctions.combineMaskU = vmxCombineMaskU;
1221
for (i = width / 4; i > 0; i--)
1223
LOAD_VECTORSC (dest, src, mask);
1225
vdest = over (vdest, splat_alpha (vdest), pix_multiply (vsrc, vmask));
1227
STORE_VECTOR (dest);
1234
for (i = width % 4; --i >= 0;)
1236
uint32_t a = mask[i];
1237
uint32_t s = src[i];
1238
uint32_t d = dest[i];
1239
uint32_t ida = ALPHA_8 (~d);
1241
UN8x4_MUL_UN8x4 (s, a);
1242
UN8x4_MUL_UN8_ADD_UN8x4 (s, ida, d);
1249
vmx_combine_in_ca (pixman_implementation_t *imp,
1252
const uint32_t * src,
1253
const uint32_t * mask,
1257
vector unsigned int vdest, vsrc, vmask;
1258
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
1259
dest_mask, mask_mask, src_mask, store_mask;
1261
COMPUTE_SHIFT_MASKC (dest, src, mask);
1263
/* printf ("%s\n",__PRETTY_FUNCTION__); */
1264
for (i = width / 4; i > 0; i--)
1266
LOAD_VECTORSC (dest, src, mask);
1268
vdest = pix_multiply (pix_multiply (vsrc, vmask), splat_alpha (vdest));
1270
STORE_VECTOR (dest);
1277
for (i = width % 4; --i >= 0;)
1279
uint32_t a = mask[i];
1280
uint32_t s = src[i];
1281
uint32_t da = ALPHA_8 (dest[i]);
1283
UN8x4_MUL_UN8x4 (s, a);
1284
UN8x4_MUL_UN8 (s, da);
1291
vmx_combine_in_reverse_ca (pixman_implementation_t *imp,
1294
const uint32_t * src,
1295
const uint32_t * mask,
1299
vector unsigned int vdest, vsrc, vmask;
1300
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
1301
dest_mask, mask_mask, src_mask, store_mask;
1303
COMPUTE_SHIFT_MASKC (dest, src, mask);
1305
/* printf ("%s\n",__PRETTY_FUNCTION__); */
1306
for (i = width / 4; i > 0; i--)
1309
LOAD_VECTORSC (dest, src, mask);
1311
vdest = pix_multiply (vdest, pix_multiply (vmask, splat_alpha (vsrc)));
1313
STORE_VECTOR (dest);
1320
for (i = width % 4; --i >= 0;)
1322
uint32_t a = mask[i];
1323
uint32_t d = dest[i];
1324
uint32_t sa = ALPHA_8 (src[i]);
1326
UN8x4_MUL_UN8 (a, sa);
1327
UN8x4_MUL_UN8x4 (d, a);
1334
vmx_combine_out_ca (pixman_implementation_t *imp,
1337
const uint32_t * src,
1338
const uint32_t * mask,
1342
vector unsigned int vdest, vsrc, vmask;
1343
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
1344
dest_mask, mask_mask, src_mask, store_mask;
1346
COMPUTE_SHIFT_MASKC (dest, src, mask);
1348
/* printf ("%s\n",__PRETTY_FUNCTION__); */
1349
for (i = width / 4; i > 0; i--)
1351
LOAD_VECTORSC (dest, src, mask);
1353
vdest = pix_multiply (
1354
pix_multiply (vsrc, vmask), splat_alpha (negate (vdest)));
1356
STORE_VECTOR (dest);
1363
for (i = width % 4; --i >= 0;)
1365
uint32_t a = mask[i];
1366
uint32_t s = src[i];
1367
uint32_t d = dest[i];
1368
uint32_t da = ALPHA_8 (~d);
1370
UN8x4_MUL_UN8x4 (s, a);
1371
UN8x4_MUL_UN8 (s, da);
1378
vmx_combine_out_reverse_ca (pixman_implementation_t *imp,
1381
const uint32_t * src,
1382
const uint32_t * mask,
1386
vector unsigned int vdest, vsrc, vmask;
1387
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
1388
dest_mask, mask_mask, src_mask, store_mask;
1390
COMPUTE_SHIFT_MASKC (dest, src, mask);
1392
/* printf ("%s\n",__PRETTY_FUNCTION__); */
1393
for (i = width / 4; i > 0; i--)
1395
LOAD_VECTORSC (dest, src, mask);
1397
vdest = pix_multiply (
1398
vdest, negate (pix_multiply (vmask, splat_alpha (vsrc))));
1400
STORE_VECTOR (dest);
1407
for (i = width % 4; --i >= 0;)
1409
uint32_t a = mask[i];
1410
uint32_t s = src[i];
1411
uint32_t d = dest[i];
1412
uint32_t sa = ALPHA_8 (s);
1414
UN8x4_MUL_UN8 (a, sa);
1415
UN8x4_MUL_UN8x4 (d, ~a);
1422
vmx_combine_atop_ca (pixman_implementation_t *imp,
1425
const uint32_t * src,
1426
const uint32_t * mask,
1430
vector unsigned int vdest, vsrc, vmask, vsrca;
1431
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
1432
dest_mask, mask_mask, src_mask, store_mask;
1434
COMPUTE_SHIFT_MASKC (dest, src, mask);
1436
/* printf ("%s\n",__PRETTY_FUNCTION__); */
1437
for (i = width / 4; i > 0; i--)
1439
LOAD_VECTORSC (dest, src, mask);
1441
vsrca = splat_alpha (vsrc);
1443
vsrc = pix_multiply (vsrc, vmask);
1444
vmask = pix_multiply (vmask, vsrca);
1446
vdest = pix_add_mul (vsrc, splat_alpha (vdest),
1447
negate (vmask), vdest);
1449
STORE_VECTOR (dest);
1456
for (i = width % 4; --i >= 0;)
1458
uint32_t a = mask[i];
1459
uint32_t s = src[i];
1460
uint32_t d = dest[i];
1461
uint32_t sa = ALPHA_8 (s);
1462
uint32_t da = ALPHA_8 (d);
1464
UN8x4_MUL_UN8x4 (s, a);
1465
UN8x4_MUL_UN8 (a, sa);
1466
UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da);
1473
vmx_combine_atop_reverse_ca (pixman_implementation_t *imp,
1476
const uint32_t * src,
1477
const uint32_t * mask,
1481
vector unsigned int vdest, vsrc, vmask;
1482
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
1483
dest_mask, mask_mask, src_mask, store_mask;
1485
COMPUTE_SHIFT_MASKC (dest, src, mask);
1487
/* printf ("%s\n",__PRETTY_FUNCTION__); */
1488
for (i = width / 4; i > 0; i--)
1490
LOAD_VECTORSC (dest, src, mask);
1492
vdest = pix_add_mul (vdest,
1493
pix_multiply (vmask, splat_alpha (vsrc)),
1494
pix_multiply (vsrc, vmask),
1495
negate (splat_alpha (vdest)));
1497
STORE_VECTOR (dest);
1504
for (i = width % 4; --i >= 0;)
1506
uint32_t a = mask[i];
1507
uint32_t s = src[i];
1508
uint32_t d = dest[i];
1509
uint32_t sa = ALPHA_8 (s);
1510
uint32_t da = ALPHA_8 (~d);
1512
UN8x4_MUL_UN8x4 (s, a);
1513
UN8x4_MUL_UN8 (a, sa);
1514
UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, a, s, da);
1521
vmx_combine_xor_ca (pixman_implementation_t *imp,
1524
const uint32_t * src,
1525
const uint32_t * mask,
1529
vector unsigned int vdest, vsrc, vmask;
1530
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
1531
dest_mask, mask_mask, src_mask, store_mask;
1533
COMPUTE_SHIFT_MASKC (dest, src, mask);
1535
/* printf ("%s\n",__PRETTY_FUNCTION__); */
1536
for (i = width / 4; i > 0; i--)
1538
LOAD_VECTORSC (dest, src, mask);
1540
vdest = pix_add_mul (vdest,
1541
negate (pix_multiply (vmask, splat_alpha (vsrc))),
1542
pix_multiply (vsrc, vmask),
1543
negate (splat_alpha (vdest)));
1545
STORE_VECTOR (dest);
1552
for (i = width % 4; --i >= 0;)
1554
uint32_t a = mask[i];
1555
uint32_t s = src[i];
1556
uint32_t d = dest[i];
1557
uint32_t sa = ALPHA_8 (s);
1558
uint32_t da = ALPHA_8 (~d);
1560
UN8x4_MUL_UN8x4 (s, a);
1561
UN8x4_MUL_UN8 (a, sa);
1562
UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da);
1569
vmx_combine_add_ca (pixman_implementation_t *imp,
1572
const uint32_t * src,
1573
const uint32_t * mask,
1577
vector unsigned int vdest, vsrc, vmask;
1578
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
1579
dest_mask, mask_mask, src_mask, store_mask;
1581
COMPUTE_SHIFT_MASKC (dest, src, mask);
1583
/* printf ("%s\n",__PRETTY_FUNCTION__); */
1584
for (i = width / 4; i > 0; i--)
1586
LOAD_VECTORSC (dest, src, mask);
1588
vdest = pix_add (pix_multiply (vsrc, vmask), vdest);
1590
STORE_VECTOR (dest);
1597
for (i = width % 4; --i >= 0;)
1599
uint32_t a = mask[i];
1600
uint32_t s = src[i];
1601
uint32_t d = dest[i];
1603
UN8x4_MUL_UN8x4 (s, a);
1604
UN8x4_ADD_UN8x4 (s, d);
1610
pixman_implementation_t *
1611
_pixman_implementation_create_vmx (void)
1613
pixman_implementation_t *fast = _pixman_implementation_create_fast_path ();
1614
pixman_implementation_t *imp = _pixman_implementation_create (fast);
1616
/* Set up function pointers */
1618
imp->combine_32[PIXMAN_OP_OVER] = vmx_combine_over_u;
1619
imp->combine_32[PIXMAN_OP_OVER_REVERSE] = vmx_combine_over_reverse_u;
1620
imp->combine_32[PIXMAN_OP_IN] = vmx_combine_in_u;
1621
imp->combine_32[PIXMAN_OP_IN_REVERSE] = vmx_combine_in_reverse_u;
1622
imp->combine_32[PIXMAN_OP_OUT] = vmx_combine_out_u;
1623
imp->combine_32[PIXMAN_OP_OUT_REVERSE] = vmx_combine_out_reverse_u;
1624
imp->combine_32[PIXMAN_OP_ATOP] = vmx_combine_atop_u;
1625
imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = vmx_combine_atop_reverse_u;
1626
imp->combine_32[PIXMAN_OP_XOR] = vmx_combine_xor_u;
1628
imp->combine_32[PIXMAN_OP_ADD] = vmx_combine_add_u;
1630
imp->combine_32_ca[PIXMAN_OP_SRC] = vmx_combine_src_ca;
1631
imp->combine_32_ca[PIXMAN_OP_OVER] = vmx_combine_over_ca;
1632
imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = vmx_combine_over_reverse_ca;
1633
imp->combine_32_ca[PIXMAN_OP_IN] = vmx_combine_in_ca;
1634
imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = vmx_combine_in_reverse_ca;
1635
imp->combine_32_ca[PIXMAN_OP_OUT] = vmx_combine_out_ca;
1636
imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = vmx_combine_out_reverse_ca;
1637
imp->combine_32_ca[PIXMAN_OP_ATOP] = vmx_combine_atop_ca;
1638
imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = vmx_combine_atop_reverse_ca;
1639
imp->combine_32_ca[PIXMAN_OP_XOR] = vmx_combine_xor_ca;
1640
imp->combine_32_ca[PIXMAN_OP_ADD] = vmx_combine_add_ca;