2
* Copyright (C) 2008 The Android Open Source Project
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions
8
* * Redistributions of source code must retain the above copyright
9
* notice, this list of conditions and the following disclaimer.
10
* * Redistributions in binary form must reproduce the above copyright
11
* notice, this list of conditions and the following disclaimer in
12
* the documentation and/or other materials provided with the
15
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30
.type memcmp, %function
34
* Optimized memcmp() for ARM9.
35
* This would not be optimal on XScale or ARM11, where more prefetching
36
* and use of PLD will be needed.
37
* The 2 major optimzations here are
38
* (1) The main loop compares 16 bytes at a time
39
* (2) The loads are scheduled in a way they won't stall
47
/* take of the case where length is 0 or the buffers are the same */
60
/* since r0 hold the result, move the first source
61
* pointer somewhere else
66
/* make sure we have at least 8+4 bytes, this simplify things below
67
* and avoid some overhead for small blocks
72
/* align first pointer to word boundary
79
/* align first pointer */
89
0: /* here the first pointer is aligned, and we have at least 4 bytes
93
/* see if the pointers are congruent */
98
/* congruent case, 32 bytes per iteration
99
* We need to make sure there are at least 32+4 bytes left
100
* because we effectively read ahead one word, and we could
101
* read past the buffer (and segfault) if we're not careful.
105
subs r2, r2, #(32 + 4)
138
/* do we have at least 4 bytes left? */
139
1: adds r2, r2, #(32 - 4 + 4)
142
/* finish off 4 bytes at a time */
155
/* finish off the remaining bytes */
158
2: /* the last 4 bytes are different, restart them */
163
/* process the last few bytes */
172
9: /* restore registers and return */
181
5: /*************** non-congruent case ***************/
186
/* here, offset is 2 (16-bits aligned, special cased) */
188
/* make sure we have at least 16 bytes to process */
193
/* align the unaligned pointer */
202
orr ip, ip, lr, lsl #16
204
moveq ip, lr, lsr #16
207
orreq ip, ip, lr, lsl #16
209
moveq ip, lr, lsr #16
212
orreq ip, ip, lr, lsl #16
214
moveq ip, lr, lsr #16
217
orreq ip, ip, lr, lsl #16
227
/* finish off the remaining bytes */
230
7: /* fix up the 2 pointers and fallthrough... */
237
4: /*************** offset is 1 or 3 (less optimized) ***************/
239
stmfd sp!, {r5, r6, r7}
245
mov r5, r0, lsl #3 /* r5 = right shift */
246
rsb r6, r5, #32 /* r6 = left shift */
248
/* align the unaligned pointer */
253
6: mov ip, r7, lsr r5
256
orr ip, ip, r7, lsl r6
261
orreq ip, ip, r7, lsl r6
267
sub r1, r1, r6, lsr #3
268
ldmfd sp!, {r5, r6, r7}
275
/* finish off the remaining bytes */
278
7: /* fix up the 2 pointers and fallthrough... */
280
sub r1, r1, r6, lsr #3
283
ldmfd sp!, {r5, r6, r7}