290
290
;-----------------------------------------------------------------------------
291
; void x264_mc_copy_w4_mmxext( uint8_t *src, int i_src_stride,
292
; uint8_t *dst, int i_dst_stride, int i_height )
291
; void x264_mc_copy_w4_mmxext( uint8_t *dst, int i_dst_stride,
292
; uint8_t *src, int i_src_stride, int i_height )
293
293
;-----------------------------------------------------------------------------
294
294
x264_mc_copy_w4_mmxext:
295
295
mov eax, parm5d ; i_height
300
mov r11d, [parm1q+parm2q]
302
mov [parm3q+parm4q], r11d
300
mov r11d, [parm3q+parm4q]
302
mov [parm1q+parm2q], r11d
303
lea parm3q, [parm3q+parm4q*2]
303
304
lea parm1q, [parm1q+parm2q*2]
304
lea parm3q, [parm3q+parm4q*2]
312
312
;-----------------------------------------------------------------------------
313
; void x264_mc_copy_w8_mmxext( uint8_t *src, int i_src_stride,
314
; uint8_t *dst, int i_dst_stride, int i_height )
313
; void x264_mc_copy_w8_mmxext( uint8_t *dst, int i_dst_stride,
314
; uint8_t *src, int i_src_stride, int i_height )
315
315
;-----------------------------------------------------------------------------
316
316
x264_mc_copy_w8_mmxext:
317
317
mov eax, parm5d ; i_height
319
lea r10, [parm2q+parm2q*2] ; 3 * i_src_stride
320
lea r11, [parm4q+parm4q*2] ; 3 * i_dst_stride
319
lea r10, [parm4q+parm4q*2] ; 3 * i_src_stride
320
lea r11, [parm2q+parm2q*2] ; 3 * i_dst_stride
325
movq mm1, [parm1q+parm2q]
326
movq mm2, [parm1q+parm2q*2]
327
movq mm3, [parm1q+r10]
329
movq [parm3q+parm4q], mm1
330
movq [parm3q+parm4q*2], mm2
331
movq [parm3q+r11], mm3
325
movq mm1, [parm3q+parm4q]
326
movq mm2, [parm3q+parm4q*2]
327
movq mm3, [parm3q+r10]
329
movq [parm1q+parm2q], mm1
330
movq [parm1q+parm2q*2], mm2
331
movq [parm1q+r11], mm3
332
lea parm3q, [parm3q+parm4q*4]
332
333
lea parm1q, [parm1q+parm2q*4]
333
lea parm3q, [parm3q+parm4q*4]
341
341
;-----------------------------------------------------------------------------
342
; void x264_mc_copy_w16_mmxext( uint8_t *src, int i_src_stride,
343
; uint8_t *dst, int i_dst_stride, int i_height )
342
; void x264_mc_copy_w16_mmxext( uint8_t *dst, int i_dst_stride,
343
; uint8_t *src, int i_src_stride, int i_height )
344
344
;-----------------------------------------------------------------------------
345
345
x264_mc_copy_w16_mmxext:
346
346
mov eax, parm5d ; i_height
348
lea r10, [parm2q+parm2q*2] ; 3 * i_src_stride
349
lea r11, [parm4q+parm4q*2] ; 3 * i_dst_stride
348
lea r10, [parm4q+parm4q*2] ; 3 * i_src_stride
349
lea r11, [parm2q+parm2q*2] ; 3 * i_dst_stride
355
movq mm2, [parm1q+parm2q]
356
movq mm3, [parm1q+parm2q+8]
357
movq mm4, [parm1q+parm2q*2]
358
movq mm5, [parm1q+parm2q*2+8]
359
movq mm6, [parm1q+r10]
360
movq mm7, [parm1q+r10+8]
363
movq [parm3q+parm4q], mm2
364
movq [parm3q+parm4q+8], mm3
365
movq [parm3q+parm4q*2], mm4
366
movq [parm3q+parm4q*2+8], mm5
367
movq [parm3q+r11], mm6
368
movq [parm3q+r11+8], mm7
355
movq mm2, [parm3q+parm4q]
356
movq mm3, [parm3q+parm4q+8]
357
movq mm4, [parm3q+parm4q*2]
358
movq mm5, [parm3q+parm4q*2+8]
359
movq mm6, [parm3q+r10]
360
movq mm7, [parm3q+r10+8]
363
movq [parm1q+parm2q], mm2
364
movq [parm1q+parm2q+8], mm3
365
movq [parm1q+parm2q*2], mm4
366
movq [parm1q+parm2q*2+8], mm5
367
movq [parm1q+r11], mm6
368
movq [parm1q+r11+8], mm7
369
lea parm3q, [parm3q+parm4q*4]
369
370
lea parm1q, [parm1q+parm2q*4]
370
lea parm3q, [parm3q+parm4q*4]
378
378
;-----------------------------------------------------------------------------
379
; void x264_mc_copy_w16_sse2( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height )
379
; void x264_mc_copy_w16_sse2( uint8_t *dst, int i_dst_stride, uint8_t *src, int i_src_stride, int i_height )
380
380
;-----------------------------------------------------------------------------
381
381
x264_mc_copy_w16_sse2:
382
382
mov eax, parm5d ; i_height
386
movdqu xmm0, [parm1q]
387
movdqu xmm1, [parm1q+parm2q]
388
movdqu [parm3q], xmm0
389
movdqu [parm3q+parm4q], xmm1
386
movdqu xmm0, [parm3q]
387
movdqu xmm1, [parm3q+parm4q]
388
movdqu [parm1q], xmm0
389
movdqu [parm1q+parm2q], xmm1
392
lea parm3q, [parm3q+parm4q*2]
392
393
lea parm1q, [parm1q+parm2q*2]
393
lea parm3q, [parm3q+parm4q*2]