2
* Copyright (c) 2013 RISC OS Open Ltd
3
* Author: Ben Avison <bavison@riscosopen.org>
5
* This file is part of Libav.
7
* Libav is free software; you can redistribute it and/or
8
* modify it under the terms of the GNU Lesser General Public
9
* License as published by the Free Software Foundation; either
10
* version 2.1 of the License, or (at your option) any later version.
12
* Libav is distributed in the hope that it will be useful,
13
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
* Lesser General Public License for more details.
17
* You should have received a copy of the GNU Lesser General Public
18
* License along with Libav; if not, write to the Free Software
19
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22
#include "libavutil/arm/asm.S"
38
#define PRELOAD_DISTANCE 4
42
subs SIZE, SIZE, #4 @ C flag survives rest of macro
43
sub TMP0, DAT0, PATTERN, lsr #14
45
ands TMP0, TMP0, PATTERN
48
.macro innerloop16 decrement, do_preload
49
ldmia PTR!, {DAT0,DAT1,DAT2,DAT3}
50
.ifnc "\do_preload",""
51
pld [PTR, #PRELOAD_DISTANCE*32]
54
subs SIZE, SIZE, #\decrement @ C flag survives rest of macro
56
sub TMP0, DAT0, PATTERN, lsr #14
57
sub TMP1, DAT1, PATTERN, lsr #14
60
sub TMP2, DAT2, PATTERN, lsr #14
61
sub TMP3, DAT3, PATTERN, lsr #14
62
ands TMP0, TMP0, PATTERN
65
andseq TMP1, TMP1, PATTERN
68
andseq TMP2, TMP2, PATTERN
69
andseq TMP3, TMP3, PATTERN
72
/* int ff_startcode_find_candidate_armv6(const uint8_t *buf, int size) */
73
function ff_startcode_find_candidate_armv6, export=1
76
@ Ensure there are at least (PRELOAD_DISTANCE+2) complete cachelines to go
77
@ before using code that does preloads
78
cmp SIZE, #(PRELOAD_DISTANCE+3)*32 - 1
81
@ Get to word-alignment, 1 byte at a time
84
1: ldrb DAT0, [PTR], #1
90
2: @ Get to 4-word alignment, 1 word at a time
91
ldr PATTERN, =0x80008000
99
4: @ Get to cacheline (8-word) alignment
104
5: @ Check complete cachelines, with preloading
105
@ We need to stop when there are still (PRELOAD_DISTANCE+1)
106
@ complete cachelines to go
107
sub SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32
108
6: innerloop16 , do_preload
113
@ Preload trailing part-cacheline, if any
116
pld [PTR, #(PRELOAD_DISTANCE+1)*32]
117
@ Check remaining data without doing any more preloads. First
118
@ do in chunks of 4 words:
119
7: adds SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32 - 16
125
9: adds SIZE, SIZE, #16 - 4
131
@ Check second byte of final halfword
132
ldrb DAT0, [PTR, #-1]
135
@ Check any remaining bytes
138
12: ldrb DAT0, [PTR], #1
145
13: sub RESULT, PTR, BUF
148
60: @ Small buffer - simply check by looping over bytes
151
61: ldrb DAT0, [PTR], #1
160
90: @ Found a candidate at the preceding byte
162
sub RESULT, RESULT, #1
165
91: @ Found a candidate somewhere in the preceding 4 bytes
167
sub RESULT, RESULT, #4
168
sub TMP0, DAT0, #0x20000
169
bics TMP0, TMP0, DAT0
171
ldrbpl DAT0, [PTR, #-3]
172
addpl RESULT, RESULT, #2
175
beq 98f @ don't look back a byte if found at first byte in buffer
176
ldrb DAT0, [PTR, #-5]
179
subeq RESULT, RESULT, #1
182
93: @ Found a candidate somewhere in the preceding 16 bytes
184
sub RESULT, RESULT, #16
186
beq 95f @ not in first 4 bytes
187
sub TMP0, DAT0, #0x20000
188
bics TMP0, TMP0, DAT0
190
ldrbpl DAT0, [PTR, #-15]
191
addpl RESULT, RESULT, #2
194
beq 98f @ don't look back a byte if found at first byte in buffer
195
ldrb DAT0, [PTR, #-17]
198
subeq RESULT, RESULT, #1
200
95: add RESULT, RESULT, #4
202
beq 96f @ not in next 4 bytes
203
sub TMP1, DAT1, #0x20000
204
bics TMP1, TMP1, DAT1
206
ldrbmi DAT0, [PTR, #-13]
207
ldrbpl DAT0, [PTR, #-11]
208
addpl RESULT, RESULT, #2
211
subeq RESULT, RESULT, #1
213
96: add RESULT, RESULT, #4
215
beq 97f @ not in next 4 bytes
216
sub TMP2, DAT2, #0x20000
217
bics TMP2, TMP2, DAT2
219
ldrbmi DAT0, [PTR, #-9]
220
ldrbpl DAT0, [PTR, #-7]
221
addpl RESULT, RESULT, #2
224
subeq RESULT, RESULT, #1
226
97: add RESULT, RESULT, #4
227
sub TMP3, DAT3, #0x20000
228
bics TMP3, TMP3, DAT3
230
ldrbmi DAT0, [PTR, #-5]
231
ldrbpl DAT0, [PTR, #-3]
232
addpl RESULT, RESULT, #2
235
subeq RESULT, RESULT, #1
236
@ drop through to 98f