2
* Alpha optimized DSP utils
3
* Copyright (c) 2002 Falk Hueffner <falk@debian.org>
5
* This program is free software; you can redistribute it and/or modify
6
* it under the terms of the GNU General Public License as published by
7
* the Free Software Foundation; either version 2 of the License, or
8
* (at your option) any later version.
10
* This program is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
* GNU General Public License for more details.
15
* You should have received a copy of the GNU General Public License
16
* along with this program; if not, write to the Free Software
17
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21
#ifdef HAVE_AV_CONFIG_H
25
/* Some nicer register names. */
30
/* Danger: these overlap with the argument list and the return value */
41
/*****************************************************************************
42
* int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size)
44
* This code is written with a pca56 in mind. For ev6, one should
45
* really take the increased latency of 3 cycles for MVI instructions
48
* It is important to keep the loading and first use of a register as
49
* far apart as possible, because if a register is accessed before it
50
* has been fetched from memory, the CPU will stall.
53
.globl pix_abs16x16_mvi_asm
54
.ent pix_abs16x16_mvi_asm
72
t0: left_u -> left lo -> left
74
t2: right_u -> right hi -> right
78
t5: left_u -> left lo -> left
80
t7: right_u -> right hi -> right
90
ldq_u t0, 0(a1) # left_u
92
ldq_u t2, 16(a1) # right_u
93
ldq t3, 0(a0) # ref left
94
ldq t4, 8(a0) # ref right
95
addq a0, a2, a0 # pix1
96
addq a1, a2, a1 # pix2
98
ldq_u t5, 0(a1) # left_u
100
ldq_u t7, 16(a1) # right_u
101
ldq t8, 0(a0) # ref left
102
ldq t9, 8(a0) # ref right
103
addq a0, a2, a0 # pix1
104
addq a1, a2, a1 # pix2
106
extql t0, a1, t0 # left lo
107
extqh t1, a1, ta # left hi
108
extql t1, a1, tb # right lo
110
extqh t2, a1, t2 # right hi
111
perr t3, t0, tc # error left
112
or t2, tb, t2 # right
113
perr t4, t2, td # error right
114
addq v0, tc, v0 # add error left
115
addq v0, td, v0 # add error left
117
extql t5, a1, t5 # left lo
118
extqh t6, a1, ta # left hi
119
extql t6, a1, tb # right lo
121
extqh t7, a1, t7 # right hi
122
perr t8, t5, tc # error left
123
or t7, tb, t7 # right
124
perr t9, t7, td # error right
125
addq v0, tc, v0 # add error left
126
addq v0, td, v0 # add error left
128
subq a3, 2, a3 # h -= 2
136
ldq t1, 8(a1) # right
137
addq a1, a2, a1 # pix2
138
ldq t2, 0(a0) # ref left
139
ldq t3, 8(a0) # ref right
140
addq a0, a2, a0 # pix1
143
ldq t5, 8(a1) # right
144
addq a1, a2, a1 # pix2
145
ldq t6, 0(a0) # ref left
146
ldq t7, 8(a0) # ref right
147
addq a0, a2, a0 # pix1
150
ldq t9, 8(a1) # right
151
addq a1, a2, a1 # pix2
152
ldq ta, 0(a0) # ref left
153
ldq tb, 8(a0) # ref right
154
addq a0, a2, a0 # pix1
157
ldq td, 8(a1) # right
158
addq a1, a2, a1 # pix2
159
ldq te, 0(a0) # ref left
160
ldq tf, 8(a0) # ref right
162
perr t0, t2, t0 # error left
163
addq a0, a2, a0 # pix1
164
perr t1, t3, t1 # error right
165
addq v0, t0, v0 # add error left
167
perr t4, t6, t0 # error left
168
addq v0, t1, v0 # add error right
169
perr t5, t7, t1 # error right
170
addq v0, t0, v0 # add error left
172
perr t8, ta, t0 # error left
173
addq v0, t1, v0 # add error right
174
perr t9, tb, t1 # error right
175
addq v0, t0, v0 # add error left
177
perr tc, te, t0 # error left
178
addq v0, t1, v0 # add error right
179
perr td, tf, t1 # error right
180
addq v0, t0, v0 # add error left
181
addq v0, t1, v0 # add error right
183
subq a3, 4, a3 # h -= 4
186
.end pix_abs16x16_mvi_asm