1
From 2ca1dc1b533f6916ac593d435e1af8a299944d2c Mon Sep 17 00:00:00 2001
2
From: conrad <conrad@9553f0bf-9b14-0410-a0b8-cfaf0461ba5b>
3
Date: Sat, 23 May 2009 18:47:26 +0000
4
Subject: [PATCH 09/27] ARM: actually add VP3 loop filter
6
git-svn-id: file:///var/local/repositories/ffmpeg/trunk@18917 9553f0bf-9b14-0410-a0b8-cfaf0461ba5b
8
libavcodec/arm/vp3dsp_neon.S | 94 ++++++++++++++++++++++++++++++++++++++++++
9
1 files changed, 94 insertions(+), 0 deletions(-)
10
create mode 100644 libavcodec/arm/vp3dsp_neon.S
12
diff --git a/libavcodec/arm/vp3dsp_neon.S b/libavcodec/arm/vp3dsp_neon.S
14
index 0000000..cce80dd
16
+++ b/libavcodec/arm/vp3dsp_neon.S
19
+ * Copyright (c) 2009 David Conrad
21
+ * This file is part of FFmpeg.
23
+ * FFmpeg is free software; you can redistribute it and/or
24
+ * modify it under the terms of the GNU Lesser General Public
25
+ * License as published by the Free Software Foundation; either
26
+ * version 2.1 of the License, or (at your option) any later version.
28
+ * FFmpeg is distributed in the hope that it will be useful,
29
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
30
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
31
+ * Lesser General Public License for more details.
33
+ * You should have received a copy of the GNU Lesser General Public
34
+ * License along with FFmpeg; if not, write to the Free Software
35
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
40
+.macro vp3_loop_filter
41
+ vsubl.u8 q3, d18, d17
42
+ vsubl.u8 q2, d16, d19
46
+ vrshr.s16 q0, q0, #3
51
+ vshr.s16 q0, q0, #15
52
+ vqsub.u16 q2, q15, q1
53
+ vqsub.u16 q3, q2, q1
58
+ vaddw.u8 q2, q0, d17
64
+function ff_vp3_v_loop_filter_neon, export=1
66
+ sub r0, r0, r1, lsl #1
67
+ vld1.64 {d16}, [r0,:64], r1
68
+ vld1.64 {d17}, [r0,:64], r1
69
+ vld1.64 {d18}, [r0,:64], r1
70
+ vld1.64 {d19}, [r0,:64], r1
71
+ ldrb r2, [r2, #129*4]
75
+ vst1.64 {d0}, [ip,:64], r1
76
+ vst1.64 {d1}, [ip,:64], r1
80
+function ff_vp3_h_loop_filter_neon, export=1
83
+ vld1.32 {d16[]}, [r0], r1
84
+ vld1.32 {d17[]}, [r0], r1
85
+ vld1.32 {d18[]}, [r0], r1
86
+ vld1.32 {d19[]}, [r0], r1
87
+ vld1.32 {d16[1]}, [r0], r1
88
+ vld1.32 {d17[1]}, [r0], r1
89
+ vld1.32 {d18[1]}, [r0], r1
90
+ vld1.32 {d19[1]}, [r0], r1
91
+ ldrb r2, [r2, #129*4]
102
+ vst1.16 {d0[0]}, [ip], r1
103
+ vst1.16 {d1[0]}, [ip], r1
104
+ vst1.16 {d0[1]}, [ip], r1
105
+ vst1.16 {d1[1]}, [ip], r1
106
+ vst1.16 {d0[2]}, [ip], r1
107
+ vst1.16 {d1[2]}, [ip], r1
108
+ vst1.16 {d0[3]}, [ip], r1
109
+ vst1.16 {d1[3]}, [ip], r1