2
* Copyright (C) 2008 The Android Open Source Project
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions
8
* * Redistributions of source code must retain the above copyright
9
* notice, this list of conditions and the following disclaimer.
10
* * Redistributions in binary form must reproduce the above copyright
11
* notice, this list of conditions and the following disclaimer in
12
* the documentation and/or other materials provided with the
15
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29
#include <machine/cpu-features.h>
34
.type __memcmp16, %function
38
* Optimized memcmp16() for ARM9.
39
* This would not be optimal on XScale or ARM11, where more prefetching
40
* and use of PLD will be needed.
41
* The 2 major optimzations here are
42
* (1) The main loop compares 16 bytes at a time
43
* (2) The loads are scheduled in a way they won't stall
51
/* take of the case where length is nul or the buffers are the same */
57
/* since r0 hold the result, move the first source
58
* pointer somewhere else
63
/* make sure we have at least 12 words, this simplify things below
64
* and avoid some overhead for small blocks
70
/* small blocks (less then 12 words) */
85
0: stmfd sp!, {r4, lr}
87
/* align first pointer to word boundary */
95
/* restore registers and return */
102
0: /* here the first pointer is aligned, and we have at least 3 words
106
/* see if the pointers are congruent */
111
/* congruent case, 16 half-words per iteration
112
* We need to make sure there are at least 16+2 words left
113
* because we effectively read ahead one long word, and we could
114
* read past the buffer (and segfault) if we're not careful.
118
subs r2, r2, #(16 + 2)
152
/* do we have at least 2 words left? */
153
1: adds r2, r2, #(16 - 2 + 2)
156
/* finish off 2 words at a time */
167
/* restore registers and return */
172
2: /* the last 2 words are different, restart them */
179
/* restore registers and return */
183
/* process the last few words */
191
9: /* restore registers and return */
196
5: /*************** non-congruent case ***************/
198
/* align the unaligned pointer */
209
orr ip, ip, lr, lsl #16
211
moveq ip, lr, lsr #16
214
orreq ip, ip, lr, lsl #16
216
moveq ip, lr, lsr #16
219
orreq ip, ip, lr, lsl #16
221
moveq ip, lr, lsr #16
224
orreq ip, ip, lr, lsl #16
234
/* finish off the remaining bytes */
237
7: /* fix up the 2 pointers and fallthrough... */