32
31
#ifdef COMPILE_ALTIVEC_IS_OKAY
37
/* Paper over differences between official gcc and Apple's weird gcc */
39
#define INIT_VECTOR(v...) {v}
40
#define CONST_BUFFER(b) (b)
42
#define INIT_VECTOR(v...) (v)
43
#define CONST_BUFFER(b) ((guchar *)(b))
46
static const vector unsigned char alphamask = (const vector unsigned char)
47
INIT_VECTOR(0,0,0,0xff,0,0,0,0xff,0,0,0,0xff,0,0,0,0xff);
48
static const vector unsigned char combine_high_bytes = (const vector unsigned char)
49
INIT_VECTOR(0,16,2,18,4,20,6,22,8,24,10,26,12,28,14,30);
50
static const vector unsigned short ox0080 = (const vector unsigned short)
51
INIT_VECTOR(0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80);
52
static const vector unsigned short ox0008 = (const vector unsigned short)
53
INIT_VECTOR(0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8);
54
static const vector signed short ox00ff = (const vector signed short)
55
INIT_VECTOR(0x00ff,0x00ff,0x00ff,0x00ff,0x00ff,0x00ff,0x00ff,0x00ff);
56
static const vector signed short oxff80 = (const vector signed short)
57
INIT_VECTOR(0xff80,0xff80,0xff80,0xff80,0xff80,0xff80,0xff80,0xff80);
59
/* Load a vector from an unaligned location in memory */
60
static inline vector unsigned char
61
LoadUnaligned(const guchar *v)
65
vector unsigned char permuteVector = vec_lvsl(0, v);
66
vector unsigned char low = vec_ld(0, v);
67
vector unsigned char high = vec_ld(16, v);
68
return vec_perm(low, high, permuteVector);
71
return vec_ld(0, v); /* don't want overflow */
74
/* Load less than a vector from an unaligned location in memory */
75
static inline vector unsigned char
76
LoadUnalignedLess(const guchar *v,
79
vector unsigned char permuteVector = vec_lvsl(0, v);
80
if (((long)v&0x0f)+n > 15)
82
vector unsigned char low = vec_ld(0, v);
83
vector unsigned char high = vec_ld(16, v);
84
return vec_perm(low, high, permuteVector);
88
vector unsigned char tmp = vec_ld(0, v);
89
return vec_perm(tmp, tmp, permuteVector); /* don't want overflow */
93
/* Store a vector to an unaligned location in memory */
95
StoreUnaligned (vector unsigned char v,
98
if ((unsigned long)where & 0x0f)
100
/* Load the surrounding area */
101
vector unsigned char low = vec_ld(0, where);
102
vector unsigned char high = vec_ld(16, where);
103
/* Prepare the constants that we need */
104
vector unsigned char permuteVector = vec_lvsr(0, where);
105
vector signed char oxFF = vec_splat_s8(-1);
106
vector signed char ox00 = vec_splat_s8(0);
107
/* Make a mask for which parts of the vectors to swap out */
108
vector unsigned char mask = (vector unsigned char)vec_perm(ox00, oxFF, permuteVector);
109
v = vec_perm(v, v, permuteVector);
110
/* Insert our data into the low and high vectors */
111
low = vec_sel(low, v, mask);
112
high = vec_sel(v, high, mask);
113
/* Store the two aligned result vectors */
114
vec_st(low, 0, CONST_BUFFER(where));
115
vec_st(high, 16, CONST_BUFFER(where));
118
{ /* prevent overflow */
119
vec_st(v, 0, CONST_BUFFER(where));
123
/* Store less than a vector to an unaligned location in memory */
125
StoreUnalignedLess (vector unsigned char v,
130
vector unsigned char permuteVector = vec_lvsr(0, where);
131
v = vec_perm(v, v, permuteVector);
133
vec_ste(v, i, CONST_BUFFER(where));
137
gimp_composite_addition_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)
139
const guchar *A = ctx->A;
140
const guchar *B = ctx->B;
142
guint length = ctx->n_pixels;
143
vector unsigned char a,b,d,alpha_a,alpha_b;
150
alpha_a=vec_and(a, alphamask);
151
alpha_b=vec_and(b, alphamask);
152
d=vec_min(alpha_a, alpha_b);
154
a=vec_andc(a, alphamask);
156
b=vec_andc(b, alphamask);
159
StoreUnaligned(d, D);
166
/* process last pixels */
168
a=LoadUnalignedLess(A, length);
169
b=LoadUnalignedLess(B, length);
171
alpha_a=vec_and(a,alphamask);
172
alpha_b=vec_and(b,alphamask);
173
d=vec_min(alpha_a,alpha_b);
175
a=vec_andc(a,alphamask);
177
b=vec_andc(b,alphamask);
180
StoreUnalignedLess(d, D, length);
184
gimp_composite_subtract_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)
186
const guchar *A = ctx->A;
187
const guchar *B = ctx->B;
189
guint length = ctx->n_pixels;
190
vector unsigned char a,b,d,alpha_a,alpha_b;
197
alpha_a=vec_and(a, alphamask);
198
alpha_b=vec_and(b, alphamask);
199
d=vec_min(alpha_a, alpha_b);
201
a=vec_andc(a, alphamask);
203
b=vec_andc(b, alphamask);
206
StoreUnaligned(d, D);
213
/* process last pixels */
215
a=LoadUnalignedLess(A, length);
216
b=LoadUnalignedLess(B, length);
218
alpha_a=vec_and(a,alphamask);
219
alpha_b=vec_and(b,alphamask);
220
d=vec_min(alpha_a,alpha_b);
222
a=vec_andc(a,alphamask);
224
b=vec_andc(b,alphamask);
227
StoreUnalignedLess(d, D, length);
231
gimp_composite_swap_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)
233
const guchar *A = ctx->A;
234
const guchar *B = ctx->B;
235
guint length = ctx->n_pixels;
236
vector unsigned char a,b;
242
StoreUnaligned(b, A);
243
StoreUnaligned(a, B);
248
/* process last pixels */
250
a=LoadUnalignedLess(A, length);
251
b=LoadUnalignedLess(B, length);
252
StoreUnalignedLess(a, B, length);
253
StoreUnalignedLess(b, A, length);
257
gimp_composite_difference_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)
259
const guchar *A = ctx->A;
260
const guchar *B = ctx->B;
262
guint length = ctx->n_pixels;
263
vector unsigned char a,b,d,e,alpha_a,alpha_b;
270
alpha_a=vec_and(a, alphamask);
271
alpha_b=vec_and(b, alphamask);
272
d=vec_min(alpha_a, alpha_b);
274
a=vec_andc(a, alphamask);
276
b=vec_andc(b, alphamask);
281
StoreUnaligned(d, D);
288
/* process last pixels */
290
a=LoadUnalignedLess(A, length);
291
b=LoadUnalignedLess(B, length);
293
alpha_a=vec_and(a,alphamask);
294
alpha_b=vec_and(b,alphamask);
295
d=vec_min(alpha_a,alpha_b);
297
a=vec_andc(a,alphamask);
299
b=vec_andc(b,alphamask);
304
StoreUnalignedLess(d, D, length);
308
gimp_composite_darken_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)
310
const guchar *A = ctx->A;
311
const guchar *B = ctx->B;
313
guint length = ctx->n_pixels;
314
vector unsigned char a,b,d;
323
StoreUnaligned(d, D);
330
/* process last pixels */
332
a=LoadUnalignedLess(A, length);
333
b=LoadUnalignedLess(B, length);
337
StoreUnalignedLess(d, D, length);
341
gimp_composite_lighten_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)
343
const guchar *A = ctx->A;
344
const guchar *B = ctx->B;
346
guint length = ctx->n_pixels;
347
vector unsigned char a,b,d,alpha_a,alpha_b;
354
alpha_a=vec_and(a, alphamask);
355
alpha_b=vec_and(b, alphamask);
356
d=vec_min(alpha_a, alpha_b);
358
a=vec_andc(a, alphamask);
360
b=vec_andc(b, alphamask);
363
StoreUnaligned(d, D);
370
/* process last pixels */
372
a=LoadUnalignedLess(A, length);
373
b=LoadUnalignedLess(B, length);
375
alpha_a=vec_and(a,alphamask);
376
alpha_b=vec_and(b,alphamask);
377
d=vec_min(alpha_a,alpha_b);
379
a=vec_andc(a,alphamask);
381
b=vec_andc(b,alphamask);
384
StoreUnalignedLess(d, D, length);
388
gimp_composite_multiply_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)
390
const guchar *A = ctx->A;
391
const guchar *B = ctx->B;
393
guint length = ctx->n_pixels;
394
vector unsigned char a,b,d,alpha_a,alpha_b,alpha;
395
vector unsigned short al,ah;
403
al=vec_add(al,ox0080);
405
ah=vec_add(ah,ox0080);
406
al=vec_add(al,vec_sr(al,ox0008));
407
ah=vec_add(ah,vec_sr(ah,ox0008));
408
d=vec_perm((vector unsigned char)al,(vector unsigned char)ah,combine_high_bytes);
410
alpha_a=vec_and(a, alphamask);
411
alpha_b=vec_and(b, alphamask);
412
alpha=vec_min(alpha_a, alpha_b);
414
d=vec_andc(d, alphamask);
417
StoreUnaligned(d, D);
424
/* process last pixels */
426
a=LoadUnalignedLess(A, length);
427
b=LoadUnalignedLess(B, length);
430
al=vec_add(al,ox0080);
432
ah=vec_add(ah,ox0080);
433
al=vec_add(al,vec_sr(al,ox0008));
434
ah=vec_add(ah,vec_sr(ah,ox0008));
435
d=vec_perm((vector unsigned char)al,(vector unsigned char)ah,combine_high_bytes);
437
alpha_a=vec_and(a, alphamask);
438
alpha_b=vec_and(b, alphamask);
439
alpha=vec_min(alpha_a, alpha_b);
441
d=vec_andc(d, alphamask);
444
StoreUnalignedLess(d, D, length);
448
gimp_composite_blend_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)
450
const guchar *A = ctx->A;
451
const guchar *B = ctx->B;
453
guint length = ctx->n_pixels;
454
guchar blend = ctx->blend.blend;
457
vector unsigned char v;
458
unsigned char u8[16];
461
vector unsigned char vblendc;
462
vector unsigned char a,b,d;
463
vector unsigned short al,ah,bl,bh,one=vec_splat_u16(1);
466
for (tmp=0; tmp<16; tmp++ )
467
vblend.u8[tmp]=blend;
468
vblendc=vec_nor(vblend.v,vblend.v);
475
/* dest[b] = (src1[b] * blend2 + src2[b] * blend) / 255;
476
* to divide by 255 we use ((n+1)+(n+1)>>8)>>8
477
* It works for all value but 0xffff
478
* happily blending formula can't give this value */
480
al=vec_mule(a,vblendc);
481
ah=vec_mulo(a,vblendc);
483
bl=vec_mule(b,vblend.v);
484
bh=vec_mulo(b,vblend.v);
488
al=vec_add(al,vec_sr(al,ox0008));
492
ah=vec_add(ah,vec_sr(ah,ox0008));
494
d=vec_perm((vector unsigned char)al,(vector unsigned char)ah,combine_high_bytes);
496
StoreUnaligned(d, D);
503
/* process last pixels */
505
a=LoadUnalignedLess(A, length);
506
b=LoadUnalignedLess(B, length);
508
al=vec_mule(a,vblendc);
509
ah=vec_mulo(a,vblendc);
511
bl=vec_mule(b,vblend.v);
512
bh=vec_mulo(b,vblend.v);
516
al=vec_add(al,vec_sr(al,ox0008));
520
ah=vec_add(ah,vec_sr(ah,ox0008));
522
d=vec_perm((vector unsigned char)al,(vector unsigned char)ah,combine_high_bytes);
524
StoreUnalignedLess(d, D, length);
528
gimp_composite_screen_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)
530
const guchar *A = ctx->A;
531
const guchar *B = ctx->B;
533
guint length = ctx->n_pixels;
534
vector unsigned char a,b,d,alpha_a,alpha_b,alpha;
535
vector unsigned short ah,al;
542
alpha_a=vec_and(a, alphamask);
543
alpha_b=vec_and(b, alphamask);
544
alpha=vec_min(alpha_a, alpha_b);
549
al=vec_add(al,ox0080);
551
ah=vec_add(ah,ox0080);
553
al=vec_add(al,vec_sr(al,ox0008));
554
ah=vec_add(ah,vec_sr(ah,ox0008));
556
d=vec_perm((vector unsigned char)al,(vector unsigned char)ah,combine_high_bytes);
559
d=vec_andc(d, alphamask);
562
StoreUnaligned(d, D);
569
/* process last pixels */
571
a=LoadUnalignedLess(A, length);
572
b=LoadUnalignedLess(B, length);
574
alpha_a=vec_and(a, alphamask);
575
alpha_b=vec_and(b, alphamask);
576
alpha=vec_min(alpha_a, alpha_b);
581
al=vec_add(al,ox0080);
583
ah=vec_add(ah,ox0080);
585
al=vec_add(al,vec_sr(al,ox0008));
586
ah=vec_add(ah,vec_sr(ah,ox0008));
588
d=vec_perm((vector unsigned char)al,(vector unsigned char)ah,combine_high_bytes);
591
d=vec_andc(d, alphamask);
594
StoreUnalignedLess(d, D, length);
598
gimp_composite_grain_merge_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)
600
const guchar *A = ctx->A;
601
const guchar *B = ctx->B;
603
guint length = ctx->n_pixels;
604
vector unsigned char a,b,d,alpha_a,alpha_b,alpha;
605
vector signed short ah,al,bh,bl;
612
alpha_a=vec_and(a, alphamask);
613
alpha_b=vec_and(b, alphamask);
614
alpha=vec_min(alpha_a, alpha_b);
616
ah=vec_unpackh((vector signed char)a);
617
ah=vec_and(ah,ox00ff);
618
al=vec_unpackl((vector signed char)a);
619
al=vec_and(al,ox00ff);
620
bh=vec_unpackh((vector signed char)b);
621
bh=vec_and(bh,ox00ff);
622
bl=vec_unpackl((vector signed char)b);
623
bl=vec_and(bl,ox00ff);
627
ah=vec_add(ah,oxff80);
628
al=vec_add(al,oxff80);
632
d=vec_andc(d, alphamask);
635
StoreUnaligned(d, D);
642
/* process last pixels */
644
a=LoadUnalignedLess(A, length);
645
b=LoadUnalignedLess(B, length);
647
alpha_a=vec_and(a, alphamask);
648
alpha_b=vec_and(b, alphamask);
649
alpha=vec_min(alpha_a, alpha_b);
651
ah=vec_unpackh((vector signed char)a);
652
ah=vec_and(ah,ox00ff);
653
al=vec_unpackl((vector signed char)a);
654
al=vec_and(al,ox00ff);
655
bh=vec_unpackh((vector signed char)b);
656
bh=vec_and(bh,ox00ff);
657
bl=vec_unpackl((vector signed char)b);
658
bl=vec_and(bl,ox00ff);
662
ah=vec_add(ah,oxff80);
663
al=vec_add(al,oxff80);
667
d=vec_andc(d, alphamask);
670
StoreUnalignedLess(d, D, length);
674
gimp_composite_grain_extract_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)
676
const guchar *A = ctx->A;
677
const guchar *B = ctx->B;
679
guint length = ctx->n_pixels;
680
vector unsigned char a,b,d,alpha_a,alpha_b,alpha;
681
vector signed short ah,al,bh,bl;
688
alpha_a=vec_and(a, alphamask);
689
alpha_b=vec_and(b, alphamask);
690
alpha=vec_min(alpha_a, alpha_b);
692
ah=vec_unpackh((vector signed char)a);
693
ah=vec_and(ah,ox00ff);
694
al=vec_unpackl((vector signed char)a);
695
al=vec_and(al,ox00ff);
696
bh=vec_unpackh((vector signed char)b);
697
bh=vec_and(bh,ox00ff);
698
bl=vec_unpackl((vector signed char)b);
699
bl=vec_and(bl,ox00ff);
703
ah=vec_sub(ah,oxff80);
704
al=vec_sub(al,oxff80);
708
d=vec_andc(d, alphamask);
711
StoreUnaligned(d, D);
718
/* process last pixels */
720
a=LoadUnalignedLess(A, length);
721
b=LoadUnalignedLess(B, length);
723
alpha_a=vec_and(a, alphamask);
724
alpha_b=vec_and(b, alphamask);
725
alpha=vec_min(alpha_a, alpha_b);
727
ah=vec_unpackh((vector signed char)a);
728
ah=vec_and(ah,ox00ff);
729
al=vec_unpackl((vector signed char)a);
730
al=vec_and(al,ox00ff);
731
bh=vec_unpackh((vector signed char)b);
732
bh=vec_and(bh,ox00ff);
733
bl=vec_unpackl((vector signed char)b);
734
bl=vec_and(bl,ox00ff);
738
ah=vec_sub(ah,oxff80);
739
al=vec_sub(al,oxff80);
743
d=vec_andc(d, alphamask);
746
StoreUnalignedLess(d, D, length);
750
gimp_composite_divide_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)
752
const guchar *A = ctx->A;
753
const guchar *B = ctx->B;
755
guint length = ctx->n_pixels;
756
vector unsigned char a,b,d;
757
vector unsigned char alpha_a,alpha_b,alpha;
758
vector signed short ox0001=vec_splat_s16(1);
761
vector signed short v;
762
vector unsigned short vu;
771
alpha_a=vec_and(a, alphamask);
772
alpha_b=vec_and(b, alphamask);
773
alpha=vec_min(alpha_a, alpha_b);
775
ah.v=vec_unpackh((vector signed char)a);
776
ah.v=vec_sl(ah.v,ox0008);
777
al.v=vec_unpackl((vector signed char)a);
778
al.v=vec_sl(al.v,ox0008);
780
bh.v=vec_unpackh((vector signed char)b);
781
bh.v=vec_and(bh.v,ox00ff);
782
bh.v=vec_add(bh.v,ox0001);
783
bl.v=vec_unpackl((vector signed char)b);
784
bl.v=vec_and(bl.v,ox00ff);
785
bl.v=vec_add(bl.v,ox0001);
787
ah.u16[0]=ah.u16[0]/bh.u16[0];
788
ah.u16[1]=ah.u16[1]/bh.u16[1];
789
ah.u16[2]=ah.u16[2]/bh.u16[2];
790
ah.u16[4]=ah.u16[4]/bh.u16[4];
791
ah.u16[5]=ah.u16[5]/bh.u16[5];
792
ah.u16[6]=ah.u16[6]/bh.u16[6];
794
al.u16[0]=al.u16[0]/bl.u16[0];
795
al.u16[1]=al.u16[1]/bl.u16[1];
796
al.u16[2]=al.u16[2]/bl.u16[2];
797
al.u16[4]=al.u16[4]/bl.u16[4];
798
al.u16[5]=al.u16[5]/bl.u16[5];
799
al.u16[6]=al.u16[6]/bl.u16[6];
801
d=vec_packs(ah.vu,al.vu);
803
d=vec_andc(d, alphamask);
806
StoreUnaligned(d, D);
813
a=LoadUnalignedLess(A, length);
814
b=LoadUnalignedLess(B, length);
816
alpha_a=vec_and(a, alphamask);
817
alpha_b=vec_and(b, alphamask);
818
alpha=vec_min(alpha_a, alpha_b);
820
ah.v=vec_unpackh((vector signed char)a);
821
ah.v=vec_sl(ah.v,ox0008);
822
al.v=vec_unpackl((vector signed char)a);
823
al.v=vec_sl(al.v,ox0008);
825
bh.v=vec_unpackh((vector signed char)b);
826
bh.v=vec_and(bh.v,ox00ff);
827
bh.v=vec_add(bh.v,ox0001);
828
bl.v=vec_unpackl((vector signed char)b);
829
bl.v=vec_and(bl.v,ox00ff);
830
bl.v=vec_add(bl.v,ox0001);
832
ah.u16[0]=ah.u16[0]/bh.u16[0];
833
ah.u16[1]=ah.u16[1]/bh.u16[1];
834
ah.u16[2]=ah.u16[2]/bh.u16[2];
835
ah.u16[4]=ah.u16[4]/bh.u16[4];
836
ah.u16[5]=ah.u16[5]/bh.u16[5];
837
ah.u16[6]=ah.u16[6]/bh.u16[6];
839
al.u16[0]=al.u16[0]/bl.u16[0];
840
al.u16[1]=al.u16[1]/bl.u16[1];
841
al.u16[2]=al.u16[2]/bl.u16[2];
842
al.u16[4]=al.u16[4]/bl.u16[4];
843
al.u16[5]=al.u16[5]/bl.u16[5];
844
al.u16[6]=al.u16[6]/bl.u16[6];
846
d=vec_packs(ah.vu,al.vu);
848
d=vec_andc(d, alphamask);
851
StoreUnalignedLess(d, D, length);
855
gimp_composite_dodge_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)
857
const guchar *A = ctx->A;
858
const guchar *B = ctx->B;
860
guint length = ctx->n_pixels;
861
vector unsigned char a,b,d;
862
vector unsigned char alpha_a,alpha_b,alpha;
863
vector signed short ox0001=vec_splat_s16(1);
866
vector signed short v;
867
vector unsigned short vu;
876
alpha_a=vec_and(a, alphamask);
877
alpha_b=vec_and(b, alphamask);
878
alpha=vec_min(alpha_a, alpha_b);
880
ah.v=vec_unpackh((vector signed char)a);
881
ah.v=vec_sl(ah.v,ox0008);
882
al.v=vec_unpackl((vector signed char)a);
883
al.v=vec_sl(al.v,ox0008);
886
bh.v=vec_unpackh((vector signed char)b);
887
bh.v=vec_and(bh.v,ox00ff);
888
bh.v=vec_add(bh.v,ox0001);
889
bl.v=vec_unpackl((vector signed char)b);
890
bl.v=vec_and(bl.v,ox00ff);
891
bl.v=vec_add(bl.v,ox0001);
893
ah.u16[0]=ah.u16[0]/bh.u16[0];
894
ah.u16[1]=ah.u16[1]/bh.u16[1];
895
ah.u16[2]=ah.u16[2]/bh.u16[2];
896
ah.u16[4]=ah.u16[4]/bh.u16[4];
897
ah.u16[5]=ah.u16[5]/bh.u16[5];
898
ah.u16[6]=ah.u16[6]/bh.u16[6];
900
al.u16[0]=al.u16[0]/bl.u16[0];
901
al.u16[1]=al.u16[1]/bl.u16[1];
902
al.u16[2]=al.u16[2]/bl.u16[2];
903
al.u16[4]=al.u16[4]/bl.u16[4];
904
al.u16[5]=al.u16[5]/bl.u16[5];
905
al.u16[6]=al.u16[6]/bl.u16[6];
907
d=vec_packs(ah.vu,al.vu);
909
d=vec_andc(d, alphamask);
912
StoreUnaligned(d, D);
919
a=LoadUnalignedLess(A, length);
920
b=LoadUnalignedLess(B, length);
922
alpha_a=vec_and(a, alphamask);
923
alpha_b=vec_and(b, alphamask);
924
alpha=vec_min(alpha_a, alpha_b);
926
ah.v=vec_unpackh((vector signed char)a);
927
ah.v=vec_sl(ah.v,ox0008);
928
al.v=vec_unpackl((vector signed char)a);
929
al.v=vec_sl(al.v,ox0008);
932
bh.v=vec_unpackh((vector signed char)b);
933
bh.v=vec_and(bh.v,ox00ff);
934
bh.v=vec_add(bh.v,ox0001);
935
bl.v=vec_unpackl((vector signed char)b);
936
bl.v=vec_and(bl.v,ox00ff);
937
bl.v=vec_add(bl.v,ox0001);
939
ah.u16[0]=ah.u16[0]/bh.u16[0];
940
ah.u16[1]=ah.u16[1]/bh.u16[1];
941
ah.u16[2]=ah.u16[2]/bh.u16[2];
942
ah.u16[4]=ah.u16[4]/bh.u16[4];
943
ah.u16[5]=ah.u16[5]/bh.u16[5];
944
ah.u16[6]=ah.u16[6]/bh.u16[6];
946
al.u16[0]=al.u16[0]/bl.u16[0];
947
al.u16[1]=al.u16[1]/bl.u16[1];
948
al.u16[2]=al.u16[2]/bl.u16[2];
949
al.u16[4]=al.u16[4]/bl.u16[4];
950
al.u16[5]=al.u16[5]/bl.u16[5];
951
al.u16[6]=al.u16[6]/bl.u16[6];
953
d=vec_packs(ah.vu,al.vu);
955
d=vec_andc(d, alphamask);
958
StoreUnalignedLess(d, D, length);
961
#endif /* COMPILE_IS_OKAY */