38
38
;-----------------------------------------------------------------------------
39
; void h264_weight(uint8_t *dst, int stride, int log2_denom,
39
; void h264_weight(uint8_t *dst, int stride, int height, int log2_denom,
40
40
; int weight, int offset);
41
41
;-----------------------------------------------------------------------------
48
%macro WEIGHT_PROLOGUE 1
42
%macro WEIGHT_PROLOGUE 0
58
52
%macro WEIGHT_SETUP 1
61
55
pslld m0, m2 ; 1<<log2_denom
63
shl r4, 19 ; *8, move to upper half of dword
64
lea r4, [r4+r3*2+0x10000]
65
movd m3, r4d ; weight<<1 | 1+(offset<<(3))
57
shl r5, 19 ; *8, move to upper half of dword
58
lea r5, [r5+r4*2+0x10000]
59
movd m3, r5d ; weight<<1 | 1+(offset<<(3))
67
61
mova m4, [pw_pixel_max]
68
62
paddw m2, [sq_1] ; log2_denom+1
122
112
%macro WEIGHT_FUNC_MM 1
123
cglobal h264_weight_8x16_10_%1
113
cglobal h264_weight_8_10_%1
134
cglobal h264_weight_8x8_10_%1
136
jmp mangle(ff_h264_weight_8x16_10_%1.prologue)
138
cglobal h264_weight_8x4_10_%1
140
jmp mangle(ff_h264_weight_8x16_10_%1.prologue)
176
151
;-----------------------------------------------------------------------------
177
; void h264_biweight(uint8_t *dst, uint8_t *src, int stride, int log2_denom,
178
; int weightd, int weights, int offset);
152
; void h264_biweight(uint8_t *dst, uint8_t *src, int stride, int height,
153
; int log2_denom, int weightd, int weights, int offset);
179
154
;-----------------------------------------------------------------------------
180
155
%ifdef ARCH_X86_32
186
%macro BIWEIGHT_PROLOGUE 1
161
%macro BIWEIGHT_PROLOGUE 0
190
164
movifnidn r0, r0mp
191
165
movifnidn r1, r1mp
194
167
movifnidn r5d, r5m
195
168
movifnidn r6d, r6m
198
172
%macro BIWEIGHT_SETUP 1
199
lea r6, [r6*4+1] ; (offset<<2)+1
203
movd m4, r4d ; weightd | weights
204
movd m5, r6d ; (offset+1)|1
205
movd m6, r3m ; log2_denom
173
lea t0, [t0*4+1] ; (offset<<2)+1
177
movd m4, r5d ; weightd | weights
178
movd m5, t0d ; (offset+1)|1
179
movd m6, r4m ; log2_denom
206
180
pslld m5, m6 ; (((offset<<2)+1)|1)<<log2_denom
210
184
mova m3, [pw_pixel_max]
245
220
%macro BIWEIGHT_FUNC_DBL 1
246
cglobal h264_biweight_16x16_10_%1
221
cglobal h264_biweight_16_10_%1
248
223
BIWEIGHT_SETUP %1
260
cglobal h264_biweight_16x8_10_%1
262
jmp mangle(ff_h264_biweight_16x16_10_%1.prologue)
267
238
BIWEIGHT_FUNC_DBL sse4
269
240
%macro BIWEIGHT_FUNC 1
270
cglobal h264_biweight_8x16_10_%1
241
cglobal h264_biweight_8_10_%1
272
243
BIWEIGHT_SETUP %1
282
cglobal h264_biweight_8x8_10_%1
284
jmp mangle(ff_h264_biweight_8x16_10_%1.prologue)
286
cglobal h264_biweight_8x4_10_%1
288
jmp mangle(ff_h264_biweight_8x16_10_%1.prologue)
293
256
BIWEIGHT_FUNC sse4
295
258
%macro BIWEIGHT_FUNC_HALF 1
296
cglobal h264_biweight_4x8_10_%1
259
cglobal h264_biweight_4_10_%1
298
261
BIWEIGHT_SETUP %1
310
cglobal h264_biweight_4x4_10_%1
312
jmp mangle(ff_h264_biweight_4x8_10_%1.prologue)
314
cglobal h264_biweight_4x2_10_%1
316
jmp mangle(ff_h264_biweight_4x8_10_%1.prologue)