63
void ff_snow_horizontal_compose97i_altivec(DWTELEM *b, int width)
61
void ff_snow_horizontal_compose97i_altivec(IDWTELEM *b, int width)
65
64
const int w2= (width+1)>>1;
66
DECLARE_ALIGNED_16(DWTELEM, temp[(width>>1)]);
65
DECLARE_ALIGNED_16(IDWTELEM, temp[(width>>1)]);
67
66
const int w_l= (width>>1);
68
67
const int w_r= w2 - 1;
70
vector signed int t1, t2, x, y, tmp1, tmp2;
71
vector signed int *vbuf, *vtmp;
69
vector signed short t1, t2, x, y, tmp1, tmp2;
70
vector signed short *vbuf, *vtmp;
72
71
vector unsigned char align;
77
DWTELEM * const ref = b + w2 - 1;
79
vbuf = (vector signed int *)b;
74
IDWTELEM * const ref = b + w2 - 1;
76
vector signed short v7 = vec_splat_s16(7);
77
vbuf = (vector signed short *)b;
81
79
tmp1 = vec_ld (0, ref);
82
80
align = vec_lvsl (0, ref);
83
81
tmp2 = vec_ld (15, ref);
84
t1= vec_perm(tmp1, tmp2, align);
82
t1 = vec_perm(tmp1, tmp2, align);
88
84
for (i=0; i<w_l-15; i+=16) {
90
b[i+0] = b[i+0] - ((3 * (ref[i+0] + ref[i+1]) + 4) >> 3);
86
/* b[i+0] = b[i+0] - ((3 * (ref[i+0] + ref[i+1]) + 4) >> 3);
91
87
b[i+1] = b[i+1] - ((3 * (ref[i+1] + ref[i+2]) + 4) >> 3);
92
88
b[i+2] = b[i+2] - ((3 * (ref[i+2] + ref[i+3]) + 4) >> 3);
93
b[i+3] = b[i+3] - ((3 * (ref[i+3] + ref[i+4]) + 4) >> 3);
89
b[i+3] = b[i+3] - ((3 * (ref[i+3] + ref[i+4]) + 4) >> 3);*/
90
b[i+0] = b[i+0] + ((7 * (ref[i+0] + ref[i+1])-1) >> 8);
96
tmp1 = vec_ld (0, ref+4+i);
97
tmp2 = vec_ld (15, ref+4+i);
99
t2 = vec_perm(tmp1, tmp2, align);
101
y = vec_add(t1,vec_sld(t1,t2,4));
102
y = vec_add(vec_add(y,y),y);
104
93
tmp1 = vec_ld (0, ref+8+i);
106
y = vec_add(y, vec_splat_s32(4));
107
y = vec_sra(y, vec_splat_u32(3));
109
94
tmp2 = vec_ld (15, ref+8+i);
96
t2 = vec_perm(tmp1, tmp2, align);
98
y = vec_add(t1, vec_sld(t1,t2,2));
99
// y = vec_add(vec_add(y,y),y);
101
tmp1 = vec_ld (0, ref+12+i);
103
y = vec_add(y, vec_splat_s32(4));
104
y = vec_sra(y, vec_splat_u32(3));
106
tmp2 = vec_ld (15, ref+12+i);
111
108
*vbuf = vec_sub(*vbuf, y);