2
;;; predcomp_00_mmxe.s:
4
;;; Extended MMX prediction composition
5
;;; routines handling the four different interpolation cases...
7
;;; Copyright (C) 2000 Andrew Stevens <as@comlab.ox.ac.uk>
10
;;; This program is free software; you can reaxstribute it and/or
11
;;; modify it under the terms of the GNU General Public License
12
;;; as published by the Free Software Foundation; either version 2
13
;;; of the License, or (at your option) any later version.
15
;;; This program is distributed in the hope that it will be useful,
16
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
;;; GNU General Public License for more details.
20
;;; You should have received a copy of the GNU General Public License
21
;;; along with this program; if not, write to the Free Software
22
;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
28
;;; The no interpolation case...
30
global predcomp_00_mmxe
32
;;; void predcomp_<ix><iy>_mmxe(char *src,char *dst,int lx, int w, int h, int mask);
34
;;; ix - Interpolation in x iy - Interpolation in y
43
;;; mm1 = one's mask for src
44
;;; mm0 = zero mask for src...
50
push ebp ; save frame pointer
59
mov ebx, [ebp+8] ; get psrc
60
mov eax, [ebp+12] ; get pdst
61
mov edx, [ebp+16] ; get lx
62
mov edi, [ebp+20] ; get w
63
mov ecx, [ebp+24] ; get h
65
;; Extend addflag into bit-mask
72
jmp predrow00 ; align for speed
75
movq mm4, [ebx] ; first 8 bytes of row
88
movq mm4, [ebx+8] ; first 8 bytes of row
99
add eax, edx ; update pointer to next row
102
sub ecx, 1 ; check h left
115
;;; The x-axis interpolation case...
117
global predcomp_10_mmxe
122
push ebp ; save frame pointer
131
mov ebx, [ebp+8] ; get psrc
132
mov eax, [ebp+12] ; get pdst
133
mov edx, [ebp+16] ; get lx
134
mov edi, [ebp+20] ; get w
135
mov ecx, [ebp+24] ; get h
137
;; Extend addflag into bit-mask
144
jmp predrow10 ; align for speed
147
movq mm4, [ebx] ; first 8 bytes row: avg src in x
154
pavgb mm4, mm2 ; combine
161
movq mm4, [ebx+8] ; 2nd 8 bytes row: avg src in x
168
pavgb mm4, mm2 ; combine
173
add eax, edx ; update pointer to next row
177
sub ecx, 1 ; check h left
190
;;; The x-axis and y-axis interpolation case...
192
global predcomp_11_mmxe
198
push ebp ; save frame pointer
210
mov ebx, [ebp+8] ; get psrc
211
mov eax, [ebp+12] ; get pdst
212
mov edx, [ebp+16] ; get lx
213
mov edi, [ebp+20] ; get w
214
mov ecx, [ebp+24] ; get h
216
;; Extend addflag into bit-mask
224
jmp predrow11 ; align for speed
227
movq mm4, [ebx] ; mm4 and mm6 accumulate partial sums for interp.
239
add ebx, edx ; update pointer to next row
241
movq mm5, [ebx] ; first 8 bytes 1st row: avg src in x
243
punpcklbw mm5, mm2 ; Accumulate partial interpolation
255
;; Now round and repack...
273
sub ebx, edx ; Back to 1st row
275
movq mm4, [ebx+8] ; mm4 and mm6 accumulate partial sums for interp.
287
add ebx, edx ; update pointer to next row
289
movq mm5, [ebx+8] ; first 8 bytes 1st row: avg src in x
291
punpcklbw mm5, mm2 ; Accumulate partial interpolation
303
;; Now round and repack...
319
add eax, edx ; update pointer to next row
322
sub ecx, 1 ; check h left
336
;;; The y-axis interpolation case...
338
global predcomp_01_mmxe
342
push ebp ; save frame pointer
351
mov ebx, [ebp+8] ; get psrc
352
mov eax, [ebp+12] ; get pdst
353
mov edx, [ebp+16] ; get lx
354
mov edi, [ebp+20] ; get w
355
mov ecx, [ebp+24] ; get h
357
;; Extend addflag into bit-mask
364
jmp predrow01 ; align for speed
367
movq mm4, [ebx] ; first 8 bytes row
368
add ebx, edx ; update pointer to next row
369
pavgb mm4, [ebx] ; Average in y
382
sub ebx, edx ; Back to prev row
383
movq mm4, [ebx+8] ; first 8 bytes row
384
add ebx, edx ; update pointer to next row
385
pavgb mm4, [ebx+8] ; Average in y
396
add eax, edx ; update pointer to next row
399
sub ecx, 1 ; check h left