41
41
b3 = vec_sub( b3, a1 )
43
43
void x264_sub4x4_dct_altivec( int16_t dct[4][4],
44
uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
44
uint8_t *pix1, uint8_t *pix2 )
48
48
vec_s16_t dct0v, dct1v, dct2v, dct3v;
49
49
vec_s16_t tmp0v, tmp1v, tmp2v, tmp3v;
51
VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 4, dct0v );
52
VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 4, dct1v );
53
VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 4, dct2v );
54
VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 4, dct3v );
55
VEC_DCT( dct0v, dct1v, dct2v, dct3v, tmp0v, tmp1v, tmp2v, tmp3v );
56
VEC_TRANSPOSE_4( tmp0v, tmp1v, tmp2v, tmp3v,
57
dct0v, dct1v, dct2v, dct3v );
58
VEC_DCT( dct0v, dct1v, dct2v, dct3v, tmp0v, tmp1v, tmp2v, tmp3v );
59
VEC_TRANSPOSE_4( tmp0v, tmp1v, tmp2v, tmp3v,
60
dct0v, dct1v, dct2v, dct3v );
61
VEC_STORE8( dct0v, dct[0] );
62
VEC_STORE8( dct1v, dct[1] );
63
VEC_STORE8( dct2v, dct[2] );
64
VEC_STORE8( dct3v, dct[3] );
51
VEC_DIFF_H( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, 4, dct0v );
52
VEC_DIFF_H( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, 4, dct1v );
53
VEC_DIFF_H( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, 4, dct2v );
54
VEC_DIFF_H( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, 4, dct3v );
55
VEC_DCT( dct0v, dct1v, dct2v, dct3v, tmp0v, tmp1v, tmp2v, tmp3v );
56
VEC_TRANSPOSE_4( tmp0v, tmp1v, tmp2v, tmp3v,
57
dct0v, dct1v, dct2v, dct3v );
58
VEC_DCT( dct0v, dct1v, dct2v, dct3v, tmp0v, tmp1v, tmp2v, tmp3v );
59
VEC_STORE8( tmp0v, dct[0] );
60
VEC_STORE8( tmp1v, dct[1] );
61
VEC_STORE8( tmp2v, dct[2] );
62
VEC_STORE8( tmp3v, dct[3] );
67
65
void x264_sub8x8_dct_altivec( int16_t dct[4][4][4],
68
uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
66
uint8_t *pix1, uint8_t *pix2 )
72
70
vec_s16_t dct0v, dct1v, dct2v, dct3v, dct4v, dct5v, dct6v, dct7v;
73
71
vec_s16_t tmp0v, tmp1v, tmp2v, tmp3v, tmp4v, tmp5v, tmp6v, tmp7v;
75
VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 8, dct0v );
76
VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 8, dct1v );
77
VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 8, dct2v );
78
VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 8, dct3v );
79
VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 8, dct4v );
80
VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 8, dct5v );
81
VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 8, dct6v );
82
VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 8, dct7v );
83
VEC_DCT( dct0v, dct1v, dct2v, dct3v, tmp0v, tmp1v, tmp2v, tmp3v );
84
VEC_DCT( dct4v, dct5v, dct6v, dct7v, tmp4v, tmp5v, tmp6v, tmp7v );
85
VEC_TRANSPOSE_8( tmp0v, tmp1v, tmp2v, tmp3v,
86
tmp4v, tmp5v, tmp6v, tmp7v,
87
dct0v, dct1v, dct2v, dct3v,
88
dct4v, dct5v, dct6v, dct7v );
89
VEC_DCT( dct0v, dct1v, dct2v, dct3v, tmp0v, tmp1v, tmp2v, tmp3v );
90
VEC_DCT( dct4v, dct5v, dct6v, dct7v, tmp4v, tmp5v, tmp6v, tmp7v );
91
VEC_TRANSPOSE_8( tmp0v, tmp1v, tmp2v, tmp3v,
92
tmp4v, tmp5v, tmp6v, tmp7v,
93
dct0v, dct1v, dct2v, dct3v,
94
dct4v, dct5v, dct6v, dct7v );
95
VEC_STORE8_H( dct0v, dct[0][0] );
96
VEC_STORE8_L( dct0v, dct[1][0] );
97
VEC_STORE8_H( dct1v, dct[0][1] );
98
VEC_STORE8_L( dct1v, dct[1][1] );
99
VEC_STORE8_H( dct2v, dct[0][2] );
100
VEC_STORE8_L( dct2v, dct[1][2] );
101
VEC_STORE8_H( dct3v, dct[0][3] );
102
VEC_STORE8_L( dct3v, dct[1][3] );
103
VEC_STORE8_H( dct4v, dct[2][0] );
104
VEC_STORE8_L( dct4v, dct[3][0] );
105
VEC_STORE8_H( dct5v, dct[2][1] );
106
VEC_STORE8_L( dct5v, dct[3][1] );
107
VEC_STORE8_H( dct6v, dct[2][2] );
108
VEC_STORE8_L( dct6v, dct[3][2] );
109
VEC_STORE8_H( dct7v, dct[2][3] );
110
VEC_STORE8_L( dct7v, dct[3][3] );
73
VEC_DIFF_H( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, 8, dct0v );
74
VEC_DIFF_H( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, 8, dct1v );
75
VEC_DIFF_H( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, 8, dct2v );
76
VEC_DIFF_H( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, 8, dct3v );
77
VEC_DIFF_H( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, 8, dct4v );
78
VEC_DIFF_H( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, 8, dct5v );
79
VEC_DIFF_H( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, 8, dct6v );
80
VEC_DIFF_H( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, 8, dct7v );
81
VEC_DCT( dct0v, dct1v, dct2v, dct3v, tmp0v, tmp1v, tmp2v, tmp3v );
82
VEC_DCT( dct4v, dct5v, dct6v, dct7v, tmp4v, tmp5v, tmp6v, tmp7v );
83
VEC_TRANSPOSE_8( tmp0v, tmp1v, tmp2v, tmp3v,
84
tmp4v, tmp5v, tmp6v, tmp7v,
85
dct0v, dct1v, dct2v, dct3v,
86
dct4v, dct5v, dct6v, dct7v );
87
VEC_DCT( dct0v, dct1v, dct2v, dct3v, tmp0v, tmp1v, tmp2v, tmp3v );
88
VEC_STORE8_H( tmp0v, dct[0][0] );
89
VEC_STORE8_H( tmp1v, dct[0][1] );
90
VEC_STORE8_H( tmp2v, dct[0][2] );
91
VEC_STORE8_H( tmp3v, dct[0][3] );
92
VEC_STORE8_L( tmp0v, dct[2][0] );
93
VEC_STORE8_L( tmp1v, dct[2][1] );
94
VEC_STORE8_L( tmp2v, dct[2][2] );
95
VEC_STORE8_L( tmp3v, dct[2][3] );
96
VEC_DCT( dct4v, dct5v, dct6v, dct7v, tmp4v, tmp5v, tmp6v, tmp7v );
97
VEC_STORE8_H( tmp4v, dct[1][0] );
98
VEC_STORE8_H( tmp5v, dct[1][1] );
99
VEC_STORE8_H( tmp6v, dct[1][2] );
100
VEC_STORE8_H( tmp7v, dct[1][3] );
101
VEC_STORE8_L( tmp4v, dct[3][0] );
102
VEC_STORE8_L( tmp5v, dct[3][1] );
103
VEC_STORE8_L( tmp6v, dct[3][2] );
104
VEC_STORE8_L( tmp7v, dct[3][3] );
113
107
void x264_sub16x16_dct_altivec( int16_t dct[16][4][4],
114
uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
108
uint8_t *pix1, uint8_t *pix2 )
122
116
vec_s16_t temp0v, temp1v, temp2v, temp3v,
123
117
temp4v, temp5v, temp6v, temp7v;
125
VEC_DIFF_HL( pix1, i_pix1, pix2, i_pix2, dcth0v, dctl0v );
126
VEC_DIFF_HL( pix1, i_pix1, pix2, i_pix2, dcth1v, dctl1v );
127
VEC_DIFF_HL( pix1, i_pix1, pix2, i_pix2, dcth2v, dctl2v );
128
VEC_DIFF_HL( pix1, i_pix1, pix2, i_pix2, dcth3v, dctl3v );
129
VEC_DIFF_HL( pix1, i_pix1, pix2, i_pix2, dcth4v, dctl4v );
130
VEC_DIFF_HL( pix1, i_pix1, pix2, i_pix2, dcth5v, dctl5v );
131
VEC_DIFF_HL( pix1, i_pix1, pix2, i_pix2, dcth6v, dctl6v );
132
VEC_DIFF_HL( pix1, i_pix1, pix2, i_pix2, dcth7v, dctl7v );
134
VEC_DCT( dcth0v, dcth1v, dcth2v, dcth3v,
135
temp0v, temp1v, temp2v, temp3v );
136
VEC_DCT( dcth4v, dcth5v, dcth6v, dcth7v,
137
temp4v, temp5v, temp6v, temp7v );
138
VEC_TRANSPOSE_8( temp0v, temp1v, temp2v, temp3v,
139
temp4v, temp5v, temp6v, temp7v,
140
dcth0v, dcth1v, dcth2v, dcth3v,
141
dcth4v, dcth5v, dcth6v, dcth7v );
142
VEC_DCT( dcth0v, dcth1v, dcth2v, dcth3v,
143
temp0v, temp1v, temp2v, temp3v );
144
VEC_DCT( dcth4v, dcth5v, dcth6v, dcth7v,
145
temp4v, temp5v, temp6v, temp7v );
146
VEC_TRANSPOSE_8( temp0v, temp1v, temp2v, temp3v,
147
temp4v, temp5v, temp6v, temp7v,
148
dcth0v, dcth1v, dcth2v, dcth3v,
149
dcth4v, dcth5v, dcth6v, dcth7v );
150
VEC_STORE8_H( dcth0v, dct[0][0] );
151
VEC_STORE8_L( dcth0v, dct[1][0] );
152
VEC_STORE8_H( dcth1v, dct[0][1] );
153
VEC_STORE8_L( dcth1v, dct[1][1] );
154
VEC_STORE8_H( dcth2v, dct[0][2] );
155
VEC_STORE8_L( dcth2v, dct[1][2] );
156
VEC_STORE8_H( dcth3v, dct[0][3] );
157
VEC_STORE8_L( dcth3v, dct[1][3] );
158
VEC_STORE8_H( dcth4v, dct[2][0] );
159
VEC_STORE8_L( dcth4v, dct[3][0] );
160
VEC_STORE8_H( dcth5v, dct[2][1] );
161
VEC_STORE8_L( dcth5v, dct[3][1] );
162
VEC_STORE8_H( dcth6v, dct[2][2] );
163
VEC_STORE8_L( dcth6v, dct[3][2] );
164
VEC_STORE8_H( dcth7v, dct[2][3] );
165
VEC_STORE8_L( dcth7v, dct[3][3] );
167
VEC_DCT( dctl0v, dctl1v, dctl2v, dctl3v,
168
temp0v, temp1v, temp2v, temp3v );
169
VEC_DCT( dctl4v, dctl5v, dctl6v, dctl7v,
170
temp4v, temp5v, temp6v, temp7v );
171
VEC_TRANSPOSE_8( temp0v, temp1v, temp2v, temp3v,
172
temp4v, temp5v, temp6v, temp7v,
173
dctl0v, dctl1v, dctl2v, dctl3v,
174
dctl4v, dctl5v, dctl6v, dctl7v );
175
VEC_DCT( dctl0v, dctl1v, dctl2v, dctl3v,
176
temp0v, temp1v, temp2v, temp3v );
177
VEC_DCT( dctl4v, dctl5v, dctl6v, dctl7v,
178
temp4v, temp5v, temp6v, temp7v );
179
VEC_TRANSPOSE_8( temp0v, temp1v, temp2v, temp3v,
180
temp4v, temp5v, temp6v, temp7v,
181
dctl0v, dctl1v, dctl2v, dctl3v,
182
dctl4v, dctl5v, dctl6v, dctl7v );
183
VEC_STORE8_H( dctl0v, dct[4][0] );
184
VEC_STORE8_L( dctl0v, dct[5][0] );
185
VEC_STORE8_H( dctl1v, dct[4][1] );
186
VEC_STORE8_L( dctl1v, dct[5][1] );
187
VEC_STORE8_H( dctl2v, dct[4][2] );
188
VEC_STORE8_L( dctl2v, dct[5][2] );
189
VEC_STORE8_H( dctl3v, dct[4][3] );
190
VEC_STORE8_L( dctl3v, dct[5][3] );
191
VEC_STORE8_H( dctl4v, dct[6][0] );
192
VEC_STORE8_L( dctl4v, dct[7][0] );
193
VEC_STORE8_H( dctl5v, dct[6][1] );
194
VEC_STORE8_L( dctl5v, dct[7][1] );
195
VEC_STORE8_H( dctl6v, dct[6][2] );
196
VEC_STORE8_L( dctl6v, dct[7][2] );
197
VEC_STORE8_H( dctl7v, dct[6][3] );
198
VEC_STORE8_L( dctl7v, dct[7][3] );
200
VEC_DIFF_HL( pix1, i_pix1, pix2, i_pix2, dcth0v, dctl0v );
201
VEC_DIFF_HL( pix1, i_pix1, pix2, i_pix2, dcth1v, dctl1v );
202
VEC_DIFF_HL( pix1, i_pix1, pix2, i_pix2, dcth2v, dctl2v );
203
VEC_DIFF_HL( pix1, i_pix1, pix2, i_pix2, dcth3v, dctl3v );
204
VEC_DIFF_HL( pix1, i_pix1, pix2, i_pix2, dcth4v, dctl4v );
205
VEC_DIFF_HL( pix1, i_pix1, pix2, i_pix2, dcth5v, dctl5v );
206
VEC_DIFF_HL( pix1, i_pix1, pix2, i_pix2, dcth6v, dctl6v );
207
VEC_DIFF_HL( pix1, i_pix1, pix2, i_pix2, dcth7v, dctl7v );
209
VEC_DCT( dcth0v, dcth1v, dcth2v, dcth3v,
210
temp0v, temp1v, temp2v, temp3v );
211
VEC_DCT( dcth4v, dcth5v, dcth6v, dcth7v,
212
temp4v, temp5v, temp6v, temp7v );
213
VEC_TRANSPOSE_8( temp0v, temp1v, temp2v, temp3v,
214
temp4v, temp5v, temp6v, temp7v,
215
dcth0v, dcth1v, dcth2v, dcth3v,
216
dcth4v, dcth5v, dcth6v, dcth7v );
217
VEC_DCT( dcth0v, dcth1v, dcth2v, dcth3v,
218
temp0v, temp1v, temp2v, temp3v );
219
VEC_DCT( dcth4v, dcth5v, dcth6v, dcth7v,
220
temp4v, temp5v, temp6v, temp7v );
221
VEC_TRANSPOSE_8( temp0v, temp1v, temp2v, temp3v,
222
temp4v, temp5v, temp6v, temp7v,
223
dcth0v, dcth1v, dcth2v, dcth3v,
224
dcth4v, dcth5v, dcth6v, dcth7v );
225
VEC_STORE8_H( dcth0v, dct[8][0] );
226
VEC_STORE8_L( dcth0v, dct[9][0] );
227
VEC_STORE8_H( dcth1v, dct[8][1] );
228
VEC_STORE8_L( dcth1v, dct[9][1] );
229
VEC_STORE8_H( dcth2v, dct[8][2] );
230
VEC_STORE8_L( dcth2v, dct[9][2] );
231
VEC_STORE8_H( dcth3v, dct[8][3] );
232
VEC_STORE8_L( dcth3v, dct[9][3] );
233
VEC_STORE8_H( dcth4v, dct[10][0] );
234
VEC_STORE8_L( dcth4v, dct[11][0] );
235
VEC_STORE8_H( dcth5v, dct[10][1] );
236
VEC_STORE8_L( dcth5v, dct[11][1] );
237
VEC_STORE8_H( dcth6v, dct[10][2] );
238
VEC_STORE8_L( dcth6v, dct[11][2] );
239
VEC_STORE8_H( dcth7v, dct[10][3] );
240
VEC_STORE8_L( dcth7v, dct[11][3] );
242
VEC_DCT( dctl0v, dctl1v, dctl2v, dctl3v,
243
temp0v, temp1v, temp2v, temp3v );
244
VEC_DCT( dctl4v, dctl5v, dctl6v, dctl7v,
245
temp4v, temp5v, temp6v, temp7v );
246
VEC_TRANSPOSE_8( temp0v, temp1v, temp2v, temp3v,
247
temp4v, temp5v, temp6v, temp7v,
248
dctl0v, dctl1v, dctl2v, dctl3v,
249
dctl4v, dctl5v, dctl6v, dctl7v );
250
VEC_DCT( dctl0v, dctl1v, dctl2v, dctl3v,
251
temp0v, temp1v, temp2v, temp3v );
252
VEC_DCT( dctl4v, dctl5v, dctl6v, dctl7v,
253
temp4v, temp5v, temp6v, temp7v );
254
VEC_TRANSPOSE_8( temp0v, temp1v, temp2v, temp3v,
255
temp4v, temp5v, temp6v, temp7v,
256
dctl0v, dctl1v, dctl2v, dctl3v,
257
dctl4v, dctl5v, dctl6v, dctl7v );
258
VEC_STORE8_H( dctl0v, dct[12][0] );
259
VEC_STORE8_L( dctl0v, dct[13][0] );
260
VEC_STORE8_H( dctl1v, dct[12][1] );
261
VEC_STORE8_L( dctl1v, dct[13][1] );
262
VEC_STORE8_H( dctl2v, dct[12][2] );
263
VEC_STORE8_L( dctl2v, dct[13][2] );
264
VEC_STORE8_H( dctl3v, dct[12][3] );
265
VEC_STORE8_L( dctl3v, dct[13][3] );
266
VEC_STORE8_H( dctl4v, dct[14][0] );
267
VEC_STORE8_L( dctl4v, dct[15][0] );
268
VEC_STORE8_H( dctl5v, dct[14][1] );
269
VEC_STORE8_L( dctl5v, dct[15][1] );
270
VEC_STORE8_H( dctl6v, dct[14][2] );
271
VEC_STORE8_L( dctl6v, dct[15][2] );
272
VEC_STORE8_H( dctl7v, dct[14][3] );
273
VEC_STORE8_L( dctl7v, dct[15][3] );
119
VEC_DIFF_HL( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, dcth0v, dctl0v );
120
VEC_DIFF_HL( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, dcth1v, dctl1v );
121
VEC_DIFF_HL( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, dcth2v, dctl2v );
122
VEC_DIFF_HL( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, dcth3v, dctl3v );
123
VEC_DIFF_HL( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, dcth4v, dctl4v );
124
VEC_DIFF_HL( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, dcth5v, dctl5v );
125
VEC_DIFF_HL( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, dcth6v, dctl6v );
126
VEC_DIFF_HL( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, dcth7v, dctl7v );
128
VEC_DCT( dcth0v, dcth1v, dcth2v, dcth3v,
129
temp0v, temp1v, temp2v, temp3v );
130
VEC_DCT( dcth4v, dcth5v, dcth6v, dcth7v,
131
temp4v, temp5v, temp6v, temp7v );
132
VEC_TRANSPOSE_8( temp0v, temp1v, temp2v, temp3v,
133
temp4v, temp5v, temp6v, temp7v,
134
dcth0v, dcth1v, dcth2v, dcth3v,
135
dcth4v, dcth5v, dcth6v, dcth7v );
136
VEC_DCT( dcth0v, dcth1v, dcth2v, dcth3v,
137
temp0v, temp1v, temp2v, temp3v );
138
VEC_STORE8_H( temp0v, dct[0][0] );
139
VEC_STORE8_H( temp1v, dct[0][1] );
140
VEC_STORE8_H( temp2v, dct[0][2] );
141
VEC_STORE8_H( temp3v, dct[0][3] );
142
VEC_STORE8_L( temp0v, dct[2][0] );
143
VEC_STORE8_L( temp1v, dct[2][1] );
144
VEC_STORE8_L( temp2v, dct[2][2] );
145
VEC_STORE8_L( temp3v, dct[2][3] );
146
VEC_DCT( dcth4v, dcth5v, dcth6v, dcth7v,
147
temp4v, temp5v, temp6v, temp7v );
148
VEC_STORE8_H( temp4v, dct[1][0] );
149
VEC_STORE8_H( temp5v, dct[1][1] );
150
VEC_STORE8_H( temp6v, dct[1][2] );
151
VEC_STORE8_H( temp7v, dct[1][3] );
152
VEC_STORE8_L( temp4v, dct[3][0] );
153
VEC_STORE8_L( temp5v, dct[3][1] );
154
VEC_STORE8_L( temp6v, dct[3][2] );
155
VEC_STORE8_L( temp7v, dct[3][3] );
157
VEC_DCT( dctl0v, dctl1v, dctl2v, dctl3v,
158
temp0v, temp1v, temp2v, temp3v );
159
VEC_DCT( dctl4v, dctl5v, dctl6v, dctl7v,
160
temp4v, temp5v, temp6v, temp7v );
161
VEC_TRANSPOSE_8( temp0v, temp1v, temp2v, temp3v,
162
temp4v, temp5v, temp6v, temp7v,
163
dctl0v, dctl1v, dctl2v, dctl3v,
164
dctl4v, dctl5v, dctl6v, dctl7v );
165
VEC_DCT( dctl0v, dctl1v, dctl2v, dctl3v,
166
temp0v, temp1v, temp2v, temp3v );
167
VEC_STORE8_H( temp0v, dct[4][0] );
168
VEC_STORE8_H( temp1v, dct[4][1] );
169
VEC_STORE8_H( temp2v, dct[4][2] );
170
VEC_STORE8_H( temp3v, dct[4][3] );
171
VEC_STORE8_L( temp0v, dct[6][0] );
172
VEC_STORE8_L( temp1v, dct[6][1] );
173
VEC_STORE8_L( temp2v, dct[6][2] );
174
VEC_STORE8_L( temp3v, dct[6][3] );
175
VEC_DCT( dctl4v, dctl5v, dctl6v, dctl7v,
176
temp4v, temp5v, temp6v, temp7v );
177
VEC_STORE8_H( temp4v, dct[5][0] );
178
VEC_STORE8_H( temp5v, dct[5][1] );
179
VEC_STORE8_H( temp6v, dct[5][2] );
180
VEC_STORE8_H( temp7v, dct[5][3] );
181
VEC_STORE8_L( temp4v, dct[7][0] );
182
VEC_STORE8_L( temp5v, dct[7][1] );
183
VEC_STORE8_L( temp6v, dct[7][2] );
184
VEC_STORE8_L( temp7v, dct[7][3] );
186
VEC_DIFF_HL( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, dcth0v, dctl0v );
187
VEC_DIFF_HL( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, dcth1v, dctl1v );
188
VEC_DIFF_HL( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, dcth2v, dctl2v );
189
VEC_DIFF_HL( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, dcth3v, dctl3v );
190
VEC_DIFF_HL( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, dcth4v, dctl4v );
191
VEC_DIFF_HL( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, dcth5v, dctl5v );
192
VEC_DIFF_HL( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, dcth6v, dctl6v );
193
VEC_DIFF_HL( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, dcth7v, dctl7v );
195
VEC_DCT( dcth0v, dcth1v, dcth2v, dcth3v,
196
temp0v, temp1v, temp2v, temp3v );
197
VEC_DCT( dcth4v, dcth5v, dcth6v, dcth7v,
198
temp4v, temp5v, temp6v, temp7v );
199
VEC_TRANSPOSE_8( temp0v, temp1v, temp2v, temp3v,
200
temp4v, temp5v, temp6v, temp7v,
201
dcth0v, dcth1v, dcth2v, dcth3v,
202
dcth4v, dcth5v, dcth6v, dcth7v );
203
VEC_DCT( dcth0v, dcth1v, dcth2v, dcth3v,
204
temp0v, temp1v, temp2v, temp3v );
205
VEC_STORE8_H( temp0v, dct[8][0] );
206
VEC_STORE8_H( temp1v, dct[8][1] );
207
VEC_STORE8_H( temp2v, dct[8][2] );
208
VEC_STORE8_H( temp3v, dct[8][3] );
209
VEC_STORE8_L( temp0v, dct[10][0] );
210
VEC_STORE8_L( temp1v, dct[10][1] );
211
VEC_STORE8_L( temp2v, dct[10][2] );
212
VEC_STORE8_L( temp3v, dct[10][3] );
213
VEC_DCT( dcth4v, dcth5v, dcth6v, dcth7v,
214
temp4v, temp5v, temp6v, temp7v );
215
VEC_STORE8_H( temp4v, dct[9][0] );
216
VEC_STORE8_H( temp5v, dct[9][1] );
217
VEC_STORE8_H( temp6v, dct[9][2] );
218
VEC_STORE8_H( temp7v, dct[9][3] );
219
VEC_STORE8_L( temp4v, dct[11][0] );
220
VEC_STORE8_L( temp5v, dct[11][1] );
221
VEC_STORE8_L( temp6v, dct[11][2] );
222
VEC_STORE8_L( temp7v, dct[11][3] );
224
VEC_DCT( dctl0v, dctl1v, dctl2v, dctl3v,
225
temp0v, temp1v, temp2v, temp3v );
226
VEC_DCT( dctl4v, dctl5v, dctl6v, dctl7v,
227
temp4v, temp5v, temp6v, temp7v );
228
VEC_TRANSPOSE_8( temp0v, temp1v, temp2v, temp3v,
229
temp4v, temp5v, temp6v, temp7v,
230
dctl0v, dctl1v, dctl2v, dctl3v,
231
dctl4v, dctl5v, dctl6v, dctl7v );
232
VEC_DCT( dctl0v, dctl1v, dctl2v, dctl3v,
233
temp0v, temp1v, temp2v, temp3v );
234
VEC_STORE8_H( temp0v, dct[12][0] );
235
VEC_STORE8_H( temp1v, dct[12][1] );
236
VEC_STORE8_H( temp2v, dct[12][2] );
237
VEC_STORE8_H( temp3v, dct[12][3] );
238
VEC_STORE8_L( temp0v, dct[14][0] );
239
VEC_STORE8_L( temp1v, dct[14][1] );
240
VEC_STORE8_L( temp2v, dct[14][2] );
241
VEC_STORE8_L( temp3v, dct[14][3] );
242
VEC_DCT( dctl4v, dctl5v, dctl6v, dctl7v,
243
temp4v, temp5v, temp6v, temp7v );
244
VEC_STORE8_H( temp4v, dct[13][0] );
245
VEC_STORE8_H( temp5v, dct[13][1] );
246
VEC_STORE8_H( temp6v, dct[13][2] );
247
VEC_STORE8_H( temp7v, dct[13][3] );
248
VEC_STORE8_L( temp4v, dct[15][0] );
249
VEC_STORE8_L( temp5v, dct[15][1] );
250
VEC_STORE8_L( temp6v, dct[15][2] );
251
VEC_STORE8_L( temp7v, dct[15][3] );