2
* Copyright (c) 2012 Mans Rullgard
4
* This file is part of Libav.
6
* Libav is free software; you can redistribute it and/or
7
* modify it under the terms of the GNU Lesser General Public
8
* License as published by the Free Software Foundation; either
9
* version 2.1 of the License, or (at your option) any later version.
11
* Libav is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
* Lesser General Public License for more details.
16
* You should have received a copy of the GNU Lesser General Public
17
* License along with Libav; if not, write to the Free Software
18
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
#include "libavutil/arm/asm.S"
23
function ff_sbr_sum64x5_neon, export=1
31
vld1.32 {q0}, [r0,:128]
32
vld1.32 {q1}, [r1,:128]!
34
vld1.32 {q2}, [r2,:128]!
36
vld1.32 {q3}, [r3,:128]!
38
vld1.32 {q8}, [lr,:128]!
40
vst1.32 {q0}, [r0,:128]!
46
function ff_sbr_sum_square_neon, export=1
49
vld1.32 {q1}, [r0,:128]!
55
NOVFP vmov.32 r0, d0[0]
59
function ff_sbr_neg_odd_64_neon, export=1
62
vld2.32 {q0,q1}, [r0,:128]!
64
vld2.32 {q2,q3}, [r0,:128]!
66
vst2.32 {q0,q1}, [r1,:128]!
68
vld2.32 {q0,q1}, [r0,:128]!
69
vst2.32 {q2,q3}, [r1,:128]!
71
vld2.32 {q2,q3}, [r0,:128]!
74
vst2.32 {q0,q1}, [r1,:128]!
75
vst2.32 {q2,q3}, [r1,:128]!
79
function ff_sbr_qmf_pre_shuffle_neon, export=1
82
vld1.32 {d0}, [r0,:64]!
83
vst1.32 {d0}, [r2,:64]!
87
vld1.32 {q0}, [r1,:128], r3
88
vld1.32 {d2}, [r0,:64]!
90
vld1.32 {d3,d4}, [r0,:128]!
92
vld1.32 {q9}, [r1,:128], r3
94
vld1.32 {d5,d6}, [r0,:128]!
97
vst2.32 {q0,q1}, [r2,:64]!
102
vld1.32 {q0}, [r1,:128], r3
103
vst2.32 {q9,q10}, [r2,:64]!
106
vld1.32 {d3,d4}, [r0,:128]!
108
vld1.32 {q9}, [r1,:128], r3
110
vld1.32 {d5}, [r0,:64]!
113
vst2.32 {q0,q1}, [r2,:64]!
116
vst2.32 {d3,d5}, [r2,:64]!
117
vst2.32 {d2[0],d4[0]}, [r2,:64]!
121
function ff_sbr_qmf_post_shuffle_neon, export=1
126
vld1.32 {q0}, [r2,:128], r3
127
vld1.32 {q1}, [r1,:128]!
133
vld1.32 {q2}, [r2,:128], r3
134
vld1.32 {q3}, [r1,:128]!
135
vst2.32 {d1,d3}, [r0,:128]!
136
vst2.32 {d0,d2}, [r0,:128]!
141
vld1.32 {q0}, [r2,:128], r3
142
vld1.32 {q1}, [r1,:128]!
143
vst2.32 {d5,d7}, [r0,:128]!
144
vst2.32 {d4,d6}, [r0,:128]!
150
function ff_sbr_qmf_deint_neg_neon, export=1
157
vld2.32 {d0,d1}, [r1,:128], r3
160
vst1.32 {d0}, [r2,:64]
161
vst1.32 {d1}, [r0,:64]!
168
function ff_sbr_qmf_deint_bfly_neon, export=1
175
vld1.32 {q0}, [r1,:128]!
176
vld1.32 {q1}, [r2,:128], lr
183
vst1.32 {q1}, [r3,:128], lr
184
vst1.32 {q0}, [r0,:128]!
190
function ff_sbr_hf_g_filt_neon, export=1
192
add r1, r1, r12, lsl #3
195
vld2.32 {d2[],d3[]},[r2,:64]!
196
vld1.32 {d0}, [r1,:64], r12
198
vld1.32 {d1}, [r1,:64], r12
200
vld2.32 {d2[],d3[]},[r2,:64]!
201
vld1.32 {d0}, [r1,:64], r12
202
vst1.32 {q3}, [r0,:64]!
208
vst1.32 {d0}, [r0,:64]!
212
function ff_sbr_hf_gen_neon, export=1
213
NOVFP vld1.32 {d1[]}, [sp,:32]
214
VFP vdup.32 d1, d0[0]
216
vld1.32 {d3}, [r2,:64]
217
vld1.32 {d2}, [r3,:64]
219
ldrd r2, r3, [sp, #4*!HAVE_VFP_ARGS]
223
add r0, r0, r2, lsl #3
224
add r1, r1, r2, lsl #3
227
vld1.32 {q1}, [r1,:128]!
229
vld1.32 {q3}, [r1,:128]!
234
vmla.f32 q3, q1, d0[0]
236
vmla.f32 d7, d20, d18
237
vmla.f32 d6, d3, d0[1]
238
vmla.f32 d7, d16, d0[1]
242
vst1.32 {q3}, [r0,:128]!
248
function ff_sbr_autocorrelate_neon, export=1
249
vld1.32 {q0}, [r0,:128]!
258
vld1.32 {q2}, [r0,:128]!
272
vld1.32 {q2}, [r0,:128]!
278
vadd.f32 d20, d20, d21
283
vmla.f32 d0, d16, d17
284
vmla.f32 d1, d16, d18
293
vmla.f32 d20, d22, d22
296
vpadd.f32 d20, d20, d21
297
vst1.32 {q3}, [r1,:128]!
298
vst1.32 {d20[1]}, [r1,:32]
300
vst1.32 {d0}, [r1,:64]
302
vst1.32 {d20[0]}, [r1,:32]
306
function ff_sbr_hf_apply_noise_0_neon, export=1
310
movrelx r4, X(ff_sbr_noise_table)
316
add lr, r4, r3, lsl #3
317
vld2.32 {q0}, [r0,:64]
318
vld2.32 {q3}, [lr,:64]
319
vld1.32 {d2}, [r1,:64]!
320
vld1.32 {d18}, [r2,:64]!
331
vst2.32 {q0}, [r0,:64]!
335
add lr, r4, r3, lsl #3
336
vld1.32 {d0}, [r0,:64]
337
vld1.32 {d6}, [lr,:64]
338
vld1.32 {d2[]}, [r1,:32]!
339
vld1.32 {d3[]}, [r2,:32]!
346
vst1.32 {d0}, [r0,:64]!
351
function ff_sbr_hf_apply_noise_1_neon, export=1
358
movrelx r4, X(ff_sbr_noise_table)
364
add lr, r4, r3, lsl #3
365
vld2.32 {q0}, [r0,:64]
366
vld2.32 {q3}, [lr,:64]
367
vld1.32 {d2}, [r1,:64]!
368
vld1.32 {d18}, [r2,:64]!
379
vst2.32 {q0}, [r0,:64]!
383
add lr, r4, r3, lsl #3
384
vld1.32 {d0}, [r0,:64]
385
vld1.32 {d6}, [lr,:64]
386
vld1.32 {d2[]}, [r1,:32]!
387
vld1.32 {d18[]}, [r2,:32]!
394
vst1.32 {d0}, [r0,:64]!
399
function ff_sbr_hf_apply_noise_2_neon, export=1
404
function ff_sbr_hf_apply_noise_3_neon, export=1