4
;;; Extended MMX prediction composition
5
;;; routines handling the four different interpolation cases...
7
;;; Copyright (C) 2000 Andrew Stevens <as@comlab.ox.ac.uk>
10
;;; This program is free software; you can reaxstribute it and/or
11
;;; modify it under the terms of the GNU General Public License
12
;;; as published by the Free Software Foundation; either version 2
13
;;; of the License, or (at your option) any later version.
15
;;; This program is distributed in the hope that it will be useful,
16
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
;;; GNU General Public License for more details.
20
;;; You should have received a copy of the GNU General Public License
21
;;; along with this program; if not, write to the Free Software
22
;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
28
;;; The no interpolation case...
30
global predcomp_00_mmx
32
;;; void predcomp_<ix><iy>_mmx(char *src,char *dst,int lx, int w, int h, int addflag);
34
;;; ix - Interpolation in x iy - Interpolation in y
43
;;; mm1 = one's mask for src
44
;;; mm0 = zero mask for src...
50
push ebp ; save frame pointer
64
mov ebx, [ebp+8] ; get psrc
65
mov eax, [ebp+12] ; get pdst
66
mov edx, [ebp+16] ; get lx
67
mov edi, [ebp+20] ; get w
68
mov ecx, [ebp+24] ; get h
69
mov esi, [ebp+28] ; get addflag
70
;; Extend addflag into bit-mask
72
jmp predrow00m ; align for speed
75
movq mm4, [ebx] ; first 8 bytes of row
100
movq mm4, [ebx+8] ; first 8 bytes of row
123
add eax, edx ; update pointer to next row
126
sub ecx, 1 ; check h left
140
;;; The x-axis interpolation case...
142
global predcomp_10_mmx
147
push ebp ; save frame pointer
161
mov ebx, [ebp+8] ; get psrc
162
mov eax, [ebp+12] ; get pdst
163
mov edx, [ebp+16] ; get lx
164
mov edi, [ebp+20] ; get w
165
mov ecx, [ebp+24] ; get h
166
mov esi, [ebp+28] ; get addflag
167
;; Extend addflag into bit-mask
169
jmp predrow10m ; align for speed
172
movq mm4, [ebx] ; first 8 bytes of row
181
paddw mm4, mm2 ; Average mm4/mm5 and mm2/mm3
191
movq mm2, [eax] ; Add
195
paddw mm4, mm2 ; Average mm4/mm5 and mm2/mm3
208
movq mm4, [ebx+8] ; first 8 bytes of row
217
paddw mm4, mm2 ; Average mm4/mm5 and mm2/mm3
227
movq mm2, [eax+8] ; Add
231
paddw mm4, mm2 ; Average mm4/mm5 and mm2/mm3
243
add eax, edx ; update pointer to next row
246
sub ecx, 1 ; check h left
259
;;; The y-axis interpolation case...
261
global predcomp_01_mmx
266
push ebp ; save frame pointer
280
mov ebx, [ebp+8] ; get psrc
281
mov eax, [ebp+12] ; get pdst
282
mov edx, [ebp+16] ; get lx
283
mov edi, [ebp+20] ; get w
284
mov ecx, [ebp+24] ; get h
285
mov esi, [ebp+28] ; get addflag
287
jmp predrow01m ; align for speed
291
movq mm4, [ebx] ; first 8 bytes of row
293
add ebx, edx ; Next row
302
paddw mm4, mm2 ; Average mm4/mm5 and mm2/mm3
312
movq mm2, [eax] ; Add
316
paddw mm4, mm2 ; Average mm4/mm5 and mm2/mm3
329
sub ebx, edx ; Back to first row...
330
movq mm4, [ebx+8] ; first 8 bytes of row
332
add ebx, edx ; Next row
340
paddw mm4, mm2 ; Average mm4/mm5 and mm2/mm3
350
movq mm2, [eax+8] ; Add
354
paddw mm4, mm2 ; Average mm4/mm5 and mm2/mm3
368
sub ecx, 1 ; check h left
382
;;; The x-axis and y-axis interpolation case...
384
global predcomp_11_mmx
391
push ebp ; save frame pointer
409
mov ebx, [ebp+8] ; get psrc
410
mov eax, [ebp+12] ; get pdst
411
mov edx, [ebp+16] ; get lx
412
mov edi, [ebp+20] ; get w
413
mov ecx, [ebp+24] ; get h
414
mov esi, [ebp+28] ; Addflags
415
;; Extend addflag into bit-mask
418
jmp predrow11 ; align for speed
421
movq mm4, [ebx] ; mm4 and mm6 accumulate partial sums for interp.
433
add ebx, edx ; update pointer to next row
435
movq mm5, [ebx] ; first 8 bytes 1st row: avg src in x
437
punpcklbw mm5, mm0 ; Accumulate partial interpolation
458
movq mm5, [eax] ; Add
462
paddw mm4, mm5 ; Average mm4/mm6 and mm5/mm7
476
sub ebx, edx ; Back to first row...
478
movq mm4, [ebx+8] ; mm4 and mm6 accumulate partial sums for interp.
490
add ebx, edx ; update pointer to next row
492
movq mm5, [ebx+8] ; first 8 bytes 1st row: avg src in x
494
punpcklbw mm5, mm0 ; Accumulate partial interpolation
515
movq mm5, [eax+8] ; Add and average
519
paddw mm4, mm5 ; Average mm4/mm6 and mm5/mm7
530
add eax, edx ; update pointer to next row
533
sub ecx, 1 ; check h left