~kamalmostafa/ubuntu/lucid/pdp/fix-504941-ftbfs

« back to all changes in this revision

Viewing changes to system/mmx/pixel_biquad_s16.s

Committer: Bazaar Package Importer
Author(s): Guenter Geiger (Debian/GNU)
Date: 2005-03-15 22:21:05 UTC
mfrom: (1.1.1 upstream)
Revision ID: james.westby@ubuntu.com-20050315222105-1q287rsihmd9j1tb

Tags: 1:0.12.4-2

* fixed the hardcoded depends
* added 3dp library

files added:
debian/install

debian/manpages

debian/patches

debian/patches/00_add3dp.patch

pdp-0.12.4.tar.gz

files removed:
#TODO.EXP#

CHANGES.LOG

COPYING

Makefile

Makefile.config

Makefile.config.in

README

TAGS

TODO

abstractions

abstractions/pdp_affine.pd

abstractions/pdp_agc.pd

abstractions/pdp_blur.pd

abstractions/pdp_blur_hor.pd

abstractions/pdp_blur_ver.pd

abstractions/pdp_cheby3o.pd

abstractions/pdp_contrast.pd

abstractions/pdp_conv_alledge.pd

abstractions/pdp_conv_emboss.pd

abstractions/pdp_conv_smooth.pd

abstractions/pdp_conv_sobel_edge.pd

abstractions/pdp_conv_sobel_hor.pd

abstractions/pdp_conv_sobel_ver.pd

abstractions/pdp_diff.pd

abstractions/pdp_dither.pd

abstractions/pdp_gain3.pd

abstractions/pdp_gradient.pd

abstractions/pdp_grey.pd

abstractions/pdp_invert.pd

abstractions/pdp_m_inverse.pd

abstractions/pdp_motion_blur.pd

abstractions/pdp_motion_fade.pd

abstractions/pdp_motion_phase.pd

abstractions/pdp_offset.pd

abstractions/pdp_phase.pd

abstractions/pdp_phase_hor.pd

abstractions/pdp_phase_ver.pd

abstractions/pdp_png_to.pd

abstractions/pdp_pps.pd

abstractions/pdp_qt_control.pd

abstractions/pdp_qtloop2~.pd

abstractions/pdp_qtloop~.pd

abstractions/pdp_saturation.pd

abstractions/pdp_save_png_sequence.pd

abstractions/pdp_sub.pd

abstractions/pdp_tag.pd

abstractions/pdp_xv_keycursor.pd

abstractions/rgb2ycrcb.pd

bin/pdp-config

bin/pdp-config.in

buildall

config.guess

config.log

config.status

config.sub

configure

configure.ac

debian/pdp.doc-base.EX

debian/pdp.substvars

debug

debug/c++test.cc

debug/gdb_pdp_load

debug/gdb_pdp_load_rt

debug/quicktime_crashtest.pd

debug/teststuff.c

debug/teststuff.c~

doc/examples

doc/examples/example01.pd

doc/examples/example02.pd

doc/examples/example03.pd

doc/examples/example04.pd

doc/examples/example05.pd

doc/examples/example06.pd

doc/examples/example07.pd

doc/examples/example08.pd

doc/examples/example09.pd

doc/examples/example10.pd

doc/examples/example11.pd

doc/examples/example12.pd

doc/examples/example13.pd

doc/examples/example14.pd

doc/examples/example15.pd

doc/introduction

doc/introduction/control.pd

doc/introduction/input_output.pd

doc/introduction/quicktime.pd

doc/introduction/traffic.pd

doc/misc

doc/misc/devdoc.html

doc/misc/devdoc.html~

doc/misc/layers.txt

doc/misc/overview.html

doc/misc/overview.html~

doc/misc/pdp_forth.html~

doc/misc/todo.jme

doc/objects

doc/objects/README

doc/objects/pdp_abs.pd

doc/objects/pdp_add.pd

doc/objects/pdp_and.pd

doc/objects/pdp_bitdepth.pd

doc/objects/pdp_bitmask.pd

doc/objects/pdp_bq.pd

doc/objects/pdp_bqt.pd

doc/objects/pdp_cheby.pd

doc/objects/pdp_chrot.pd

doc/objects/pdp_cog.pd

doc/objects/pdp_constant.pd

doc/objects/pdp_control.pd

doc/objects/pdp_conv.pd

doc/objects/pdp_convert.pd

doc/objects/pdp_del.pd

doc/objects/pdp_description.pd

doc/objects/pdp_flip_lr.pd

doc/objects/pdp_flip_tb.pd

doc/objects/pdp_gain.pd

doc/objects/pdp_grey2mask.pd

doc/objects/pdp_help_input.pd

doc/objects/pdp_help_output.pd

doc/objects/pdp_histo.pd

doc/objects/pdp_hthresh.pd

doc/objects/pdp_loop.pd

doc/objects/pdp_mix.pd

doc/objects/pdp_mix2.pd

doc/objects/pdp_mul.pd

doc/objects/pdp_netsend.pd

doc/objects/pdp_noise.pd

doc/objects/pdp_not.pd

doc/objects/pdp_or.pd

doc/objects/pdp_plasma.pd

doc/objects/pdp_pointcloud.pd

doc/objects/pdp_positive.pd

doc/objects/pdp_qt.pd

doc/objects/pdp_qt~.pd

doc/objects/pdp_randmix.pd

doc/objects/pdp_rawin.pd

doc/objects/pdp_rawout.pd

doc/objects/pdp_reg.pd

doc/objects/pdp_rotate.pd

doc/objects/pdp_route.pd

doc/objects/pdp_scale.pd

doc/objects/pdp_scanxy~.pd

doc/objects/pdp_scan~.pd

doc/objects/pdp_scope~.pd

doc/objects/pdp_sign.pd

doc/objects/pdp_snap.pd

doc/objects/pdp_sthresh.pd

doc/objects/pdp_trigger.pd

doc/objects/pdp_v4l.pd

doc/objects/pdp_xor.pd

doc/objects/pdp_xv.pd

doc/objects/pdp_zoom.pd

doc/objects/pdp_zrot.pd

doc/objects/pdp_zthresh.pd

doc/reference.txt

doc/reference.txt~

include

include/.#pdp_packet.h.1.23

include/Makefile

include/pdp.h

include/pdp_ascii.h

include/pdp_base.h

include/pdp_bitmap.h

include/pdp_comm.h

include/pdp_compat.h

include/pdp_config.h

include/pdp_config.h.in

include/pdp_control.h

include/pdp_debug.h

include/pdp_dpd_base.h

include/pdp_dpd_command.h

include/pdp_image.h

include/pdp_imagebase.h

include/pdp_imageproc.h

include/pdp_internals.h

include/pdp_list.h

include/pdp_list_macros.h

include/pdp_llconv.h

include/pdp_matrix.h

include/pdp_mem.h

include/pdp_mmx.h

include/pdp_net.h

include/pdp_packet.h

include/pdp_pd.h

include/pdp_png.h

include/pdp_post.h

include/pdp_queue.h

include/pdp_resample.h

include/pdp_symbol.h

include/pdp_type.h

include/pdp_type.h_old

include/pdp_types.h

include/pdp_xvideo.h

include/pdp_xwindow.h

include/pwc-ioctl.h

modules

modules/Makefile

modules/README

modules/generic

modules/generic/.#pdp_udp_send.c.1.34

modules/generic/.#pdp_udp_send.c.1.35

modules/generic/Makefile

modules/generic/README

modules/generic/pdp_convert.c

modules/generic/pdp_del.c

modules/generic/pdp_description.c

modules/generic/pdp_inspect.c

modules/generic/pdp_loop.c

modules/generic/pdp_rawin.c

modules/generic/pdp_rawout.c

modules/generic/pdp_reg.c

modules/generic/pdp_route.c

modules/generic/pdp_snap.c

modules/generic/pdp_trigger.c

modules/generic/pdp_udp_receive.c

modules/generic/pdp_udp_send.c

modules/generic/pdp_udp_send.c.bak

modules/image_basic

modules/image_basic/Makefile

modules/image_basic/README

modules/image_basic/pdp_add.c

modules/image_basic/pdp_bq.c

modules/image_basic/pdp_cheby.c

modules/image_basic/pdp_constant.c

modules/image_basic/pdp_conv.c

modules/image_basic/pdp_gain.c

modules/image_basic/pdp_logic.c

modules/image_basic/pdp_mix.c

modules/image_basic/pdp_mul.c

modules/image_basic/pdp_noise.c

modules/image_basic/pdp_plasma.c

modules/image_basic/pdp_randmix.c

modules/image_basic/pdp_stateless.c

modules/image_basic/pdp_zoom.c

modules/image_io

modules/image_io/#pdp_rawout.c#

modules/image_io/Makefile

modules/image_io/README

modules/image_io/pdp_glx.c

modules/image_io/pdp_qt.c

modules/image_io/pdp_sdl.c

modules/image_io/pdp_v4l.c

modules/image_io/pdp_xv.c

modules/image_special

modules/image_special/Makefile

modules/image_special/README

modules/image_special/pdp_array.c

modules/image_special/pdp_chrot.c

modules/image_special/pdp_cog.c

modules/image_special/pdp_grey2mask.c

modules/image_special/pdp_histo.c

modules/image_special/pdp_scale.c

modules/image_special/pdp_scan.c

modules/image_special/pdp_scanxy.c

modules/image_special/pdp_scope.c

modules/matrix_basic

modules/matrix_basic/Makefile

modules/matrix_basic/README

modules/matrix_basic/clusterstuff.c

modules/matrix_basic/pdp_mat_lu.c

modules/matrix_basic/pdp_mat_mul.c

modules/matrix_basic/pdp_mat_vec.c

modules/test

modules/test/Makefile

modules/test/README

modules/test/pdp_dpd_test.c

opengl

opengl/Makefile

opengl/Makefile.config

opengl/README

opengl/TODO

opengl/abstractions

opengl/abstractions/3dp_basicscene.pd

opengl/abstractions/3dp_blend.pd

opengl/abstractions/3dp_display_texture.pd

opengl/abstractions/3dp_fixedsizewindowcontext.pd

opengl/abstractions/3dp_mouserotate.pd

opengl/abstractions/3dp_screenshot.pd

opengl/abstractions/elbat.pd

opengl/abstractions/randomnormal.pd

opengl/abstractions/randomwalk2D.pd

opengl/abstractions/smoothupdate.pd

opengl/doc

opengl/doc/examples

opengl/doc/examples/arm.pd

opengl/doc/examples/example01.pd

opengl/doc/examples/example02.pd

opengl/doc/examples/example03.pd

opengl/doc/examples/example04.pd

opengl/doc/examples/example05.pd

opengl/doc/examples/example06.pd

opengl/doc/examples/example07.pd

opengl/doc/examples/example08.pd

opengl/doc/examples/example09.pd

opengl/doc/examples/example10.pd

opengl/doc/examples/example11.pd

opengl/doc/examples/example12.pd

opengl/doc/examples/example13.pd

opengl/doc/examples/example14.pd

opengl/doc/examples/example15.pd

opengl/doc/examples/example16.pd

opengl/doc/objects

opengl/doc/objects/3dp_for.pd

opengl/include

opengl/include/Makefile

opengl/include/pdp_3Dcontext.h

opengl/include/pdp_3dp_base.h

opengl/include/pdp_mesh.h

opengl/include/pdp_opengl.h

opengl/include/pdp_texture.h

opengl/modules

opengl/modules/Makefile

opengl/modules/README

opengl/modules/pdp_3d_color.c

opengl/modules/pdp_3d_context.c

opengl/modules/pdp_3d_dlist.c

opengl/modules/pdp_3d_draw.c

opengl/modules/pdp_3d_drawmesh.c

opengl/modules/pdp_3d_for.c

opengl/modules/pdp_3d_light.c

opengl/modules/pdp_3d_push.c

opengl/modules/pdp_3d_snap.c

opengl/modules/pdp_3d_state.c

opengl/modules/pdp_3d_subcontext.c

opengl/modules/pdp_3d_view.c

opengl/modules/pdp_3d_windowcontext.c

opengl/system

opengl/system/Makefile

opengl/system/pdp_3Dcontext_common.c

opengl/system/pdp_3Dcontext_glx.c

opengl/system/pdp_3dp_base.c

opengl/system/pdp_mesh.c

opengl/system/pdp_opengl.c

opengl/system/pdp_texture.c

opengl/system/setup.c

opengl/test

opengl/test/arm.pd

opengl/test/meshtest.pd

opengl/test/pdp_ogl_draw_limb.pd

opengl/test/textest.pd

pdp-config.1

puredata

puredata/CONTENTS

puredata/Makefile

puredata/pdp_base.c

puredata/pdp_comm.c

puredata/pdp_compat.c

puredata/pdp_control.c

puredata/pdp_dpd_base.c

puredata/pdp_forthproc.c_bak

puredata/pdp_imagebase.c

puredata/pdp_queue.c

puredata/pdp_ut.c

scaf

scaf/COPYING

scaf/Makefile

scaf/Makefile.config

scaf/Makefile.config.in

scaf/README

scaf/README.scaf

scaf/TODO

scaf/autom4te.cache

scaf/autom4te.cache/output.0

scaf/autom4te.cache/requests

scaf/autom4te.cache/traces.0

scaf/compiler

scaf/compiler/Makefile

scaf/compiler/kernel.scaf

scaf/compiler/optim.rules

scaf/compiler/scafc

scaf/compiler/scafc.pl

scaf/compiler/scafmacro.s

scaf/config.log

scaf/config.status

scaf/configure

scaf/configure.ac

scaf/doc

scaf/doc/pdp_ca.pd

scaf/include

scaf/include/Makefile

scaf/include/pdp_ca.h

scaf/pdp

scaf/pdp/Makefile

scaf/pdp/pdp_ca.c

scaf/pdp/pdp_ca_system.c

scaf/pdp/scaf_feeder.s

scaf/rules

scaf/rules/Makefile

scaf/rules/carules.scaf

scaf/test

scaf/test/test_pdp_ca.pd

scaf/test/test_pdp_ca2.pd

scaf/test/test_pdp_ca3.pd

system

system/CONTENTS

system/Makefile

system/X11

system/X11/Makefile

system/X11/pdp_xvideo.c

system/X11/pdp_xwindow.c

system/image

system/image/Makefile

system/image/pdp_imageproc_common.c

system/image/pdp_imageproc_mmx.c

system/image/pdp_imageproc_portable.c

system/image/pdp_llconv.c

system/image/pdp_llconv_mmx.c

system/image/pdp_llconv_portable.c

system/image/pdp_resample.c

system/kernel

system/kernel/.#pdp_packet.c.1.20

system/kernel/.#pdp_packet2.c.1.1

system/kernel/.#pdp_symbol.c.1.9

system/kernel/CONTENTS

system/kernel/Makefile

system/kernel/pdp_debug.c

system/kernel/pdp_dpd_command.c

system/kernel/pdp_list.c

system/kernel/pdp_mem.c

system/kernel/pdp_packet.c

system/kernel/pdp_packet2.c

system/kernel/pdp_post.c

system/kernel/pdp_symbol.c

system/kernel/pdp_type.c

system/mmx

system/mmx/Makefile

system/mmx/pdp_mmx_test.c

system/mmx/pixel_add_s16.s

system/mmx/pixel_biquad_dirI_s16.s

system/mmx/pixel_biquad_s16.s

system/mmx/pixel_ca_s1.s

system/mmx/pixel_cascade_s16.s

system/mmx/pixel_cheby_s16.s

system/mmx/pixel_conv_hor_s16.s

system/mmx/pixel_conv_ver_s16.s

system/mmx/pixel_crot_s16.s

system/mmx/pixel_gain.s

system/mmx/pixel_gain_s16.s

system/mmx/pixel_mix_s16.s

system/mmx/pixel_mul_s16.s

system/mmx/pixel_pack_s16u8.s

system/mmx/pixel_rand_s16.s

system/mmx/pixel_randmix_s16.s

system/mmx/pixel_resample_s16.s

system/mmx/pixel_s1.s

system/mmx/pixel_unpack_u8s16.s

system/net

system/net/Makefile

system/net/pdp_net.c

system/pdp.c

system/png

system/png/Makefile

system/png/pdp_png.c

system/type

system/type/Makefile

system/type/pdp_bitmap.c

system/type/pdp_image.c

system/type/pdp_matrix.c

files modified:
debian/changelog

debian/control

debian/docs

debian/rules

Show diffs side-by-side

added added

removed removed

system/mmx/pixel_biquad_s16.s

# Pure Data Packet mmx routine.

# Copyright (c) by Tom Schouten <pdp@zzz.kotnet.org>

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

# DIRECT FORM II BIQUAD

# y[k] = b0 * x[k] + u1[k-1]

# u1[k] = b1 * x[k] + u2[k-1] - a1 * y[k]

# u2[k] = b2 * x[k] - a2 * y[k]

# MACRO: df2 <reg>

# computes a direct form 2 biquad

# does not use {mm0-mm3}\<inreg>

# input: <reg> == input

# %mm4 == state 1

# %mm5 == state 2

# (%esi) == biquad coefs (-a1 -a2 b0 b1 b2) in s1.14

# output: <reg> == output

# %mm4 == state 1

# %mm5 == state 2

.macro df2 reg

movq \reg, %mm6 # mm6 == x[k]

movq \reg, %mm7 # mm7 == x[k]

pmulhw 16(%esi), %mm6 # mm6 == x[k] * b0

pmulhw 24(%esi), %mm7 # mm7 == x[k] * b1

paddw %mm4, %mm6 # mm6 == x[k] * b0 + u1[k-1] == y[k]

paddw %mm5, %mm7 # mm7 == x[k] * b1 + u2[k-1]

paddsw %mm6, %mm6 # compensate for mul = x*y/4 (coefs are s1.14 fixed point)

paddsw %mm6, %mm6 # paddsw ensures saturation

movq \reg, %mm5 # mm5 == x[k]

movq %mm6, %mm4 # mm4 == y[k]

movq %mm6, \reg # reg == y[k] --------------------

pmulhw 0(%esi), %mm4 # mm4 == y[k] * (-a1)

pmulhw 8(%esi), %mm6 # mm6 == y[k] * (-a2)

pmulhw 32(%esi), %mm5 # mm5 == x[k] * b2

paddw %mm7, %mm4 # mm4 == u1[k] --------------------

paddw %mm6, %mm5 # mm5 == u2[k] --------------------

.endm

# input in register:

# %mm0-mm3: input 4x4 pixels {x0 x1 x2 x3}

# %esi: coef memory (-a1, -a2, b0, b1, b2) in s1.14

# %edi: state memory (u1, u2)

# return in register:

# %mm0-mm4: 4x4 pixels result

.macro biquad_4x4_pixels

.align 16

movq 0(%edi), %mm4 # get state

movq 8(%edi), %mm5

df2 %mm0 # compute 4 biquads

df2 %mm1

df2 %mm2

df2 %mm3

movq %mm4, 0(%edi) # store state

movq %mm5, 8(%edi)

.endm

# in order to use the 4 line parallel biquad routine on horizontal

# lines, we need to reorder (rotate or transpose) the matrix, since

# images are scanline encoded, and we want to work in parallell

# on 4 lines.

# since the 4 lines are independent, it doesnt matter in which order

# the the vector elements are present.

# this allows us to use the same routine for left->right and right->left

# processing.

# some comments on the non-abelean group of square isometries consisting of

# (I) identity

# (H) horizontal axis mirror

# (V) vertical axis mirror

# (T) transpose (diagonal axis mirror)

# (A) antitranspose (antidiagonal axis mirror)

100

# (R1) 90deg anticlockwize rotation

101

# (R2) 180deg rotation

102

# (R3) 90deg clockwize rotation

103

104

105

# we basicly have two options: (R1,R3) or (T,A)

106

# we opt for T and A because they are self inverting, which improves locality

107

108

# use antitranspose for right to left an transpose

109

# for left to right (little endian)

110

111

112

# antitranspose 4x4

113

114

# input

115

# %mm3 == {d0 d1 d2 d3}

116

# %mm2 == {c0 c1 c2 c3}

117

# %mm1 == {b0 b1 b2 b3}

118

# %mm0 == {a0 a1 a2 a3}

119

120

# output

121

# %mm3 == {a3 b3 c3 d3}

122

# %mm2 == {a2 b2 c2 d2}

123

# %mm1 == {a1 b1 c1 d1}

124

# %mm0 == {a0 b0 c0 d0}

125

126

127

.macro antitranspose_4x4:

128

movq %mm3, %mm4

129

punpcklwd %mm1, %mm4 # mm4 <- {b2 d2 b3 d3}

130

movq %mm3, %mm5

131

punpckhwd %mm1, %mm5 # mm5 <- {b0 d0 b1 d1}

132

133

movq %mm2, %mm6

134

punpcklwd %mm0, %mm6 # mm6 <- {a2 c2 a3 c3}

135

movq %mm2, %mm7

136

punpckhwd %mm0, %mm7 # mm7 <- {a0 c0 a1 c1}

137

138

movq %mm4, %mm3

139

punpcklwd %mm6, %mm3 # mm3 <- {a3 b3 c3 d3}

140

movq %mm4, %mm2

141

punpckhwd %mm6, %mm2 # mm2 <- {a2 b2 c2 d2}

142

143

movq %mm5, %mm1

144

punpcklwd %mm7, %mm1 # mm1 <- {a1 b1 c1 d1}

145

movq %mm5, %mm0

146

punpckhwd %mm7, %mm0 # mm0 <- {a0 b0 c0 d0}

147

148

.endm

149

150

151

# transpose 4x4

152

153

# input

154

# %mm3 == {d3 d2 d1 d0}

155

# %mm2 == {c3 c2 c1 c0}

156

# %mm1 == {b3 b2 b1 b0}

157

# %mm0 == {a3 a2 a1 a0}

158

159

# output

160

# %mm3 == {d3 c3 b3 a3}

161

# %mm2 == {d2 c2 b2 a2}

162

# %mm1 == {d1 c1 b1 a1}

163

# %mm0 == {d0 c0 b0 a0}

164

165

166

.macro transpose_4x4:

167

movq %mm0, %mm4

168

punpcklwd %mm2, %mm4 # mm4 <- {c1 a1 c0 a0}

169

movq %mm0, %mm5

170

punpckhwd %mm2, %mm5 # mm5 <- {c3 a3 c2 a2}

171

172

movq %mm1, %mm6

173

punpcklwd %mm3, %mm6 # mm6 <- {d1 b1 d0 b0}

174

movq %mm1, %mm7

175

punpckhwd %mm3, %mm7 # mm7 <- {d3 b3 d2 b2}

176

177

movq %mm4, %mm0

178

punpcklwd %mm6, %mm0 # mm0 <- {d0 c0 b0 a0}

179

movq %mm4, %mm1

180

punpckhwd %mm6, %mm1 # mm1 <- {d1 c1 b1 a1}

181

182

movq %mm5, %mm2

183

punpcklwd %mm7, %mm2 # mm2 <- {d2 c2 b2 a2}

184

movq %mm5, %mm3

185

punpckhwd %mm7, %mm3 # mm3 <- {d3 c3 b3 a3}

186

187

.endm

188

189

.globl pixel_biquad_vertb_s16

190

.type pixel_biquad_vertb_s16,@function

191

192

193

# pixel_biquad_vertbr_s16(char *pixel_array, int nb_rows, int linewidth, short int coef[20], short int state[8])

194

195

196

pixel_biquad_vertb_s16:

197

198

199

pushl %ebp

200

movl %esp, %ebp

201

push %ebx

202

push %esi

203

push %edi

204

205

movl 8(%ebp), %ebx # pixel array offset

206

movl 12(%ebp), %ecx # nb of 4x4 pixblocks

207

movl 16(%ebp), %edx # line with

208

209

movl 20(%ebp), %esi # coefs

210

movl 24(%ebp), %edi # state

211

212

shll $1, %edx # short int addressing

213

movl %edx, %eax

214

shll $1, %eax

215

addl %edx, %eax # eax = 3 * edx

216

217

.align 16

218

.biquad_vertb_line_loop:

219

movq (%ebx), %mm0

220

movq (%ebx,%edx,1), %mm1

221

movq (%ebx,%edx,2), %mm2

222

movq (%ebx,%eax,1), %mm3

223

biquad_4x4_pixels

224

movq %mm0, (%ebx)

225

movq %mm1, (%ebx,%edx,1)

226

movq %mm2, (%ebx,%edx,2)

227

movq %mm3, (%ebx,%eax,1)

228

addl %edx, %ebx

229

addl %eax, %ebx

230

decl %ecx

231

jnz .biquad_vertb_line_loop

232

233

emms

234

235

pop %edi

236

pop %esi

237

pop %ebx

238

leave

239

ret

240

.globl pixel_biquad_verbt_s16

241

.type pixel_biquad_verbt_s16,@function

242

243

244

# pixel_biquad_vertbt_s16(char *pixel_array, int nb_rows, int linewidth, short int coef[20], short int state[8])

245

246

247

pixel_biquad_verbt_s16:

248

249

250

pushl %ebp

251

movl %esp, %ebp

252

push %ebx

253

push %esi

254

push %edi

255

256

movl 8(%ebp), %ebx # pixel array offset

257

movl 12(%ebp), %ecx # nb of 4x4 pixblocks

258

movl 16(%ebp), %eax # line with

259

260

shll $3, %eax # 4 line byte spacing

261

decl %ecx

262

mul %ecx

263

incl %ecx

264

addl %eax, %ebx # ebx points to last pixblock

265

266

movl 16(%ebp), %edx # line with

267

268

movl 20(%ebp), %esi # coefs

269

movl 24(%ebp), %edi # state

270

271

shll $1, %edx # short int addressing

272

movl %edx, %eax

273

shll $1, %eax

274

addl %edx, %eax # eax = 3 * edx

275

276

.align 16

277

.biquad_verbt_line_loop:

278

movq (%ebx), %mm3

279

movq (%ebx,%edx,1), %mm2

280

movq (%ebx,%edx,2), %mm1

281

movq (%ebx,%eax,1), %mm0

282

biquad_4x4_pixels

283

movq %mm3, (%ebx)

284

movq %mm2, (%ebx,%edx,1)

285

movq %mm1, (%ebx,%edx,2)

286

movq %mm0, (%ebx,%eax,1)

287

subl %edx, %ebx

288

subl %eax, %ebx

289

decl %ecx

290

jnz .biquad_verbt_line_loop

291

292

emms

293

294

pop %edi

295

pop %esi

296

pop %ebx

297

leave

298

ret

299

300

.globl pixel_biquad_horlr_s16

301

.type pixel_biquad_horlr_s16,@function

302

# pixel_biquad_hor_s16(char *pixel_array, int nb_rows, int linewidth, short int coef[20], short int state[8])

303

304

pixel_biquad_horlr_s16:

305

306

307

pushl %ebp

308

movl %esp, %ebp

309

push %ebx

310

push %esi

311

push %edi

312

313

movl 8(%ebp), %ebx # pixel array offset

314

movl 12(%ebp), %ecx # nb of 4x4 pixblocks

315

movl 16(%ebp), %edx # line with

316

317

movl 20(%ebp), %esi # coefs

318

movl 24(%ebp), %edi # state

319

320

shll $1, %edx # short int addressing

321

movl %edx, %eax

322

shll $1, %eax

323

addl %edx, %eax # eax = 3 * edx

324

325

.align 16

326

.biquad_horlr_line_loop:

327

movq (%ebx), %mm0

328

movq (%ebx,%edx,1), %mm1

329

movq (%ebx,%edx,2), %mm2

330

movq (%ebx,%eax,1), %mm3

331

transpose_4x4

332

biquad_4x4_pixels

333

transpose_4x4

334

movq %mm0, (%ebx)

335

movq %mm1, (%ebx,%edx,1)

336

movq %mm2, (%ebx,%edx,2)

337

movq %mm3, (%ebx,%eax,1)

338

addl $8, %ebx

339

decl %ecx

340

jnz .biquad_horlr_line_loop

341

342

emms

343

344

pop %edi

345

pop %esi

346

pop %ebx

347

leave

348

ret

349

350

351

.globl pixel_biquad_horrl_s16

352

.type pixel_biquad_horrl_s16,@function

353

# pixel_biquad_horrl_s16(char *pixel_array, int nb_rows, int linewidth, short int coef[20], short int state[8])

354

355

pixel_biquad_horrl_s16:

356

357

pushl %ebp

358

movl %esp, %ebp

359

push %ebx

360

push %esi

361

push %edi

362

363

movl 8(%ebp), %ebx # pixel array offset

364

movl 12(%ebp), %ecx # nb of 4x4 pixblocks

365

movl 16(%ebp), %edx # line with

366

367

368

movl %ecx, %eax

369

decl %eax

370

shll $3, %eax

371

addl %eax, %ebx # ebx points to last pixblock

372

373

374

movl 20(%ebp), %esi # coefs

375

movl 24(%ebp), %edi # state

376

377

shll $1, %edx # short int addressing

378

movl %edx, %eax

379

shll $1, %eax

380

addl %edx, %eax # eax = 3 * edx

381

382

.align 16

383

.biquad_horrl_line_loop:

384

movq (%ebx), %mm0

385

movq (%ebx,%edx,1), %mm1

386

movq (%ebx,%edx,2), %mm2

387

movq (%ebx,%eax,1), %mm3

388

antitranspose_4x4

389

biquad_4x4_pixels

390

antitranspose_4x4

391

movq %mm0, (%ebx)

392

movq %mm1, (%ebx,%edx,1)

393

movq %mm2, (%ebx,%edx,2)

394

movq %mm3, (%ebx,%eax,1)

395

subl $8, %ebx

396

decl %ecx

397

jnz .biquad_horrl_line_loop

398

399

emms

400

401

pop %edi

402

pop %esi

403

pop %ebx

404

leave

405

ret

406

407

408

.globl pixel_biquad_time_s16

409

.type pixel_biquad_time_s16,@function

410

# pixel_biquad_time_s16(short int *pixel_array, short int *s1, short int *s2, short int *coefs, int nb_4_pix_vectors)

411

412

pixel_biquad_time_s16:

413

414

pushl %ebp

415

movl %esp, %ebp

416

push %ebx

417

push %esi

418

push %edi

419

420

movl 8(%ebp), %ebx # pixel array offset

421

movl 12(%ebp), %edx # state 1 array

422

movl 16(%ebp), %edi # state 2 array

423

424

movl 20(%ebp), %esi # coefs

425

movl 24(%ebp), %ecx # nb of 4 pixel vectors

426

427

428

.align 16

429

.biquad_time_loop:

430

movq (%ebx), %mm0 # get input

431

movq (%edx), %mm4 # get state 1

432

movq (%edi), %mm5 # get state 2

433

df2 %mm0 # compute direct form 2

434

movq %mm0, (%ebx) # write output

435

movq %mm5, (%edi) # write state 2

436

movq %mm4, (%edx) # write state 1

437

addl $8, %ebx

438

addl $8, %edi

439

addl $8, %edx

440

decl %ecx

441

jnz .biquad_time_loop

442

443

emms

444

445

pop %edi

446

pop %esi

447

pop %ebx

448

leave

449

ret

450

451

Older »