3
* Bluetooth low-complexity, subband codec (SBC) library
5
* Copyright (C) 2008-2010 Nokia Corporation
6
* Copyright (C) 2004-2010 Marcel Holtmann <marcel@holtmann.org>
7
* Copyright (C) 2004-2005 Henryk Ploetz <henryk@ploetzli.ch>
8
* Copyright (C) 2005-2006 Brad Midgley <bmidgley@xmission.com>
11
* This library is free software; you can redistribute it and/or
12
* modify it under the terms of the GNU Lesser General Public
13
* License as published by the Free Software Foundation; either
14
* version 2.1 of the License, or (at your option) any later version.
16
* This library is distributed in the hope that it will be useful,
17
* but WITHOUT ANY WARRANTY; without even the implied warranty of
18
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19
* Lesser General Public License for more details.
21
* You should have received a copy of the GNU Lesser General Public
22
* License along with this library; if not, write to the Free Software
23
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
31
#include "sbc_tables.h"
33
#include "sbc_primitives_armv6.h"
36
* ARMv6 optimizations. The instructions are scheduled for ARM11 pipeline.
39
#ifdef SBC_BUILD_WITH_ARMV6_SUPPORT
41
static void __attribute__((naked)) sbc_analyze_four_armv6()
43
/* r0 = in, r1 = out, r2 = consts */
45
"push {r1, r4-r7, lr}\n"
47
"ldrd r4, r5, [r0, #0]\n"
48
"ldrd r6, r7, [r2, #0]\n"
49
"ldrd r8, r9, [r0, #16]\n"
50
"ldrd r10, r11, [r2, #16]\n"
52
"smlad r3, r4, r6, r14\n"
53
"smlad r12, r5, r7, r14\n"
54
"ldrd r4, r5, [r0, #32]\n"
55
"ldrd r6, r7, [r2, #32]\n"
56
"smlad r3, r8, r10, r3\n"
57
"smlad r12, r9, r11, r12\n"
58
"ldrd r8, r9, [r0, #48]\n"
59
"ldrd r10, r11, [r2, #48]\n"
60
"smlad r3, r4, r6, r3\n"
61
"smlad r12, r5, r7, r12\n"
62
"ldrd r4, r5, [r0, #64]\n"
63
"ldrd r6, r7, [r2, #64]\n"
64
"smlad r3, r8, r10, r3\n"
65
"smlad r12, r9, r11, r12\n"
66
"ldrd r8, r9, [r0, #8]\n"
67
"ldrd r10, r11, [r2, #8]\n"
68
"smlad r3, r4, r6, r3\n" /* t1[0] is done */
69
"smlad r12, r5, r7, r12\n" /* t1[1] is done */
70
"ldrd r4, r5, [r0, #24]\n"
71
"ldrd r6, r7, [r2, #24]\n"
72
"pkhtb r3, r12, r3, asr #16\n" /* combine t1[0] and t1[1] */
73
"smlad r12, r8, r10, r14\n"
74
"smlad r14, r9, r11, r14\n"
75
"ldrd r8, r9, [r0, #40]\n"
76
"ldrd r10, r11, [r2, #40]\n"
77
"smlad r12, r4, r6, r12\n"
78
"smlad r14, r5, r7, r14\n"
79
"ldrd r4, r5, [r0, #56]\n"
80
"ldrd r6, r7, [r2, #56]\n"
81
"smlad r12, r8, r10, r12\n"
82
"smlad r14, r9, r11, r14\n"
83
"ldrd r8, r9, [r0, #72]\n"
84
"ldrd r10, r11, [r2, #72]\n"
85
"smlad r12, r4, r6, r12\n"
86
"smlad r14, r5, r7, r14\n"
87
"ldrd r4, r5, [r2, #80]\n" /* start loading cos table */
88
"smlad r12, r8, r10, r12\n" /* t1[2] is done */
89
"smlad r14, r9, r11, r14\n" /* t1[3] is done */
90
"ldrd r6, r7, [r2, #88]\n"
91
"ldrd r8, r9, [r2, #96]\n"
92
"ldrd r10, r11, [r2, #104]\n" /* cos table fully loaded */
93
"pkhtb r12, r14, r12, asr #16\n" /* combine t1[2] and t1[3] */
96
"smlad r4, r12, r8, r4\n"
97
"smlad r5, r12, r9, r5\n"
100
"smlad r6, r12, r10, r6\n"
101
"smlad r7, r12, r11, r7\n"
103
"stmia r1, {r4, r5, r6, r7}\n"
104
"pop {r1, r4-r7, pc}\n"
108
#define sbc_analyze_four(in, out, consts) \
109
((void (*)(int16_t *, int32_t *, const FIXED_T*)) \
110
sbc_analyze_four_armv6)((in), (out), (consts))
112
static void __attribute__((naked)) sbc_analyze_eight_armv6()
114
/* r0 = in, r1 = out, r2 = consts */
116
"push {r1, r4-r7, lr}\n"
118
"ldrd r4, r5, [r0, #24]\n"
119
"ldrd r6, r7, [r2, #24]\n"
120
"ldrd r8, r9, [r0, #56]\n"
121
"ldrd r10, r11, [r2, #56]\n"
123
"smlad r3, r4, r6, r14\n"
124
"smlad r12, r5, r7, r14\n"
125
"ldrd r4, r5, [r0, #88]\n"
126
"ldrd r6, r7, [r2, #88]\n"
127
"smlad r3, r8, r10, r3\n"
128
"smlad r12, r9, r11, r12\n"
129
"ldrd r8, r9, [r0, #120]\n"
130
"ldrd r10, r11, [r2, #120]\n"
131
"smlad r3, r4, r6, r3\n"
132
"smlad r12, r5, r7, r12\n"
133
"ldrd r4, r5, [r0, #152]\n"
134
"ldrd r6, r7, [r2, #152]\n"
135
"smlad r3, r8, r10, r3\n"
136
"smlad r12, r9, r11, r12\n"
137
"ldrd r8, r9, [r0, #16]\n"
138
"ldrd r10, r11, [r2, #16]\n"
139
"smlad r3, r4, r6, r3\n" /* t1[6] is done */
140
"smlad r12, r5, r7, r12\n" /* t1[7] is done */
141
"ldrd r4, r5, [r0, #48]\n"
142
"ldrd r6, r7, [r2, #48]\n"
143
"pkhtb r3, r12, r3, asr #16\n" /* combine t1[6] and t1[7] */
144
"str r3, [sp, #-4]!\n" /* save to stack */
145
"smlad r3, r8, r10, r14\n"
146
"smlad r12, r9, r11, r14\n"
147
"ldrd r8, r9, [r0, #80]\n"
148
"ldrd r10, r11, [r2, #80]\n"
149
"smlad r3, r4, r6, r3\n"
150
"smlad r12, r5, r7, r12\n"
151
"ldrd r4, r5, [r0, #112]\n"
152
"ldrd r6, r7, [r2, #112]\n"
153
"smlad r3, r8, r10, r3\n"
154
"smlad r12, r9, r11, r12\n"
155
"ldrd r8, r9, [r0, #144]\n"
156
"ldrd r10, r11, [r2, #144]\n"
157
"smlad r3, r4, r6, r3\n"
158
"smlad r12, r5, r7, r12\n"
159
"ldrd r4, r5, [r0, #0]\n"
160
"ldrd r6, r7, [r2, #0]\n"
161
"smlad r3, r8, r10, r3\n" /* t1[4] is done */
162
"smlad r12, r9, r11, r12\n" /* t1[5] is done */
163
"ldrd r8, r9, [r0, #32]\n"
164
"ldrd r10, r11, [r2, #32]\n"
165
"pkhtb r3, r12, r3, asr #16\n" /* combine t1[4] and t1[5] */
166
"str r3, [sp, #-4]!\n" /* save to stack */
167
"smlad r3, r4, r6, r14\n"
168
"smlad r12, r5, r7, r14\n"
169
"ldrd r4, r5, [r0, #64]\n"
170
"ldrd r6, r7, [r2, #64]\n"
171
"smlad r3, r8, r10, r3\n"
172
"smlad r12, r9, r11, r12\n"
173
"ldrd r8, r9, [r0, #96]\n"
174
"ldrd r10, r11, [r2, #96]\n"
175
"smlad r3, r4, r6, r3\n"
176
"smlad r12, r5, r7, r12\n"
177
"ldrd r4, r5, [r0, #128]\n"
178
"ldrd r6, r7, [r2, #128]\n"
179
"smlad r3, r8, r10, r3\n"
180
"smlad r12, r9, r11, r12\n"
181
"ldrd r8, r9, [r0, #8]\n"
182
"ldrd r10, r11, [r2, #8]\n"
183
"smlad r3, r4, r6, r3\n" /* t1[0] is done */
184
"smlad r12, r5, r7, r12\n" /* t1[1] is done */
185
"ldrd r4, r5, [r0, #40]\n"
186
"ldrd r6, r7, [r2, #40]\n"
187
"pkhtb r3, r12, r3, asr #16\n" /* combine t1[0] and t1[1] */
188
"smlad r12, r8, r10, r14\n"
189
"smlad r14, r9, r11, r14\n"
190
"ldrd r8, r9, [r0, #72]\n"
191
"ldrd r10, r11, [r2, #72]\n"
192
"smlad r12, r4, r6, r12\n"
193
"smlad r14, r5, r7, r14\n"
194
"ldrd r4, r5, [r0, #104]\n"
195
"ldrd r6, r7, [r2, #104]\n"
196
"smlad r12, r8, r10, r12\n"
197
"smlad r14, r9, r11, r14\n"
198
"ldrd r8, r9, [r0, #136]\n"
199
"ldrd r10, r11, [r2, #136]!\n"
200
"smlad r12, r4, r6, r12\n"
201
"smlad r14, r5, r7, r14\n"
202
"ldrd r4, r5, [r2, #(160 - 136 + 0)]\n"
203
"smlad r12, r8, r10, r12\n" /* t1[2] is done */
204
"smlad r14, r9, r11, r14\n" /* t1[3] is done */
205
"ldrd r6, r7, [r2, #(160 - 136 + 8)]\n"
208
"pkhtb r12, r14, r12, asr #16\n" /* combine t1[2] and t1[3] */
211
"pop {r0, r14}\n" /* t2[4:5], t2[6:7] */
212
"ldrd r8, r9, [r2, #(160 - 136 + 32)]\n"
215
"ldrd r10, r11, [r2, #(160 - 136 + 40)]\n"
216
"smlad r4, r12, r8, r4\n"
217
"smlad r5, r12, r9, r5\n"
218
"ldrd r8, r9, [r2, #(160 - 136 + 64)]\n"
219
"smlad r6, r12, r10, r6\n"
220
"smlad r7, r12, r11, r7\n"
221
"ldrd r10, r11, [r2, #(160 - 136 + 72)]\n"
222
"smlad r4, r0, r8, r4\n"
223
"smlad r5, r0, r9, r5\n"
224
"ldrd r8, r9, [r2, #(160 - 136 + 96)]\n"
225
"smlad r6, r0, r10, r6\n"
226
"smlad r7, r0, r11, r7\n"
227
"ldrd r10, r11, [r2, #(160 - 136 + 104)]\n"
228
"smlad r4, r14, r8, r4\n"
229
"smlad r5, r14, r9, r5\n"
230
"ldrd r8, r9, [r2, #(160 - 136 + 16 + 0)]\n"
231
"smlad r6, r14, r10, r6\n"
232
"smlad r7, r14, r11, r7\n"
233
"ldrd r10, r11, [r2, #(160 - 136 + 16 + 8)]\n"
234
"stmia r1!, {r4, r5}\n"
237
"ldrd r8, r9, [r2, #(160 - 136 + 16 + 32)]\n"
238
"stmia r1!, {r6, r7}\n"
239
"smuad r6, r3, r10\n"
240
"smuad r7, r3, r11\n"
241
"ldrd r10, r11, [r2, #(160 - 136 + 16 + 40)]\n"
242
"smlad r4, r12, r8, r4\n"
243
"smlad r5, r12, r9, r5\n"
244
"ldrd r8, r9, [r2, #(160 - 136 + 16 + 64)]\n"
245
"smlad r6, r12, r10, r6\n"
246
"smlad r7, r12, r11, r7\n"
247
"ldrd r10, r11, [r2, #(160 - 136 + 16 + 72)]\n"
248
"smlad r4, r0, r8, r4\n"
249
"smlad r5, r0, r9, r5\n"
250
"ldrd r8, r9, [r2, #(160 - 136 + 16 + 96)]\n"
251
"smlad r6, r0, r10, r6\n"
252
"smlad r7, r0, r11, r7\n"
253
"ldrd r10, r11, [r2, #(160 - 136 + 16 + 104)]\n"
254
"smlad r4, r14, r8, r4\n"
255
"smlad r5, r14, r9, r5\n"
256
"smlad r6, r14, r10, r6\n"
257
"smlad r7, r14, r11, r7\n"
259
"stmia r1!, {r4, r5, r6, r7}\n"
260
"pop {r1, r4-r7, pc}\n"
264
#define sbc_analyze_eight(in, out, consts) \
265
((void (*)(int16_t *, int32_t *, const FIXED_T*)) \
266
sbc_analyze_eight_armv6)((in), (out), (consts))
268
static void sbc_analyze_4b_4s_armv6(int16_t *x, int32_t *out, int out_stride)
271
sbc_analyze_four(x + 12, out, analysis_consts_fixed4_simd_odd);
273
sbc_analyze_four(x + 8, out, analysis_consts_fixed4_simd_even);
275
sbc_analyze_four(x + 4, out, analysis_consts_fixed4_simd_odd);
277
sbc_analyze_four(x + 0, out, analysis_consts_fixed4_simd_even);
280
static void sbc_analyze_4b_8s_armv6(int16_t *x, int32_t *out, int out_stride)
283
sbc_analyze_eight(x + 24, out, analysis_consts_fixed8_simd_odd);
285
sbc_analyze_eight(x + 16, out, analysis_consts_fixed8_simd_even);
287
sbc_analyze_eight(x + 8, out, analysis_consts_fixed8_simd_odd);
289
sbc_analyze_eight(x + 0, out, analysis_consts_fixed8_simd_even);
292
void sbc_init_primitives_armv6(struct sbc_encoder_state *state)
294
state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_armv6;
295
state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_armv6;
296
state->implementation_info = "ARMv6 SIMD";