2
* Copyright (c) 2008 ARM Ltd
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions
8
* 1. Redistributions of source code must retain the above copyright
9
* notice, this list of conditions and the following disclaimer.
10
* 2. Redistributions in binary form must reproduce the above copyright
11
* notice, this list of conditions and the following disclaimer in the
12
* documentation and/or other materials provided with the distribution.
13
* 3. The name of the company may not be used to endorse or promote
14
* products derived from this software without specific prior written
17
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
18
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
22
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34
#define SHFT2LSB "lsl"
35
#define SHFT2MSB "lsr"
36
#define MSB "0x000000ff"
37
#define LSB "0xff000000"
39
#define SHFT2LSB "lsr"
40
#define SHFT2MSB "lsl"
41
#define MSB "0xff000000"
42
#define LSB "0x000000ff"
46
#define magic1(REG) "#0x01010101"
47
#define magic2(REG) "#0x80808080"
49
#define magic1(REG) #REG
50
#define magic2(REG) #REG ", lsl #7"
54
__attribute__((naked)) strcmp (const char* s1, const char* s2)
57
#if !(defined(__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \
58
(defined (__thumb__) && !defined (__thumb2__)))
63
/* Strings not at same byte offset from a word boundary. */
64
"bne strcmp_unaligned\n\t"
68
"ldr ip, [r0], #4\n\t"
70
"ldreq r3, [r1], #4\n\t"
72
/* Although s1 and s2 have identical initial alignment, they are
73
not currently word aligned. Rather than comparing bytes,
74
make sure that any bytes fetched from before the addressed
75
bytes are forced to 0xff. Then they will always compare
80
SHFT2LSB" r2, r3, r2\n\t"
81
"ldr r3, [r1], #4\n\t"
86
/* Load the 'magic' constant 0x01010101. */
87
"str r4, [sp, #-4]!\n\t"
89
"orr r4, r4, r4, lsl #8\n\t"
90
"orr r4, r4, r4, lsl #16\n"
96
"sub r2, ip, "magic1(r4)"\n\t"
99
/* check for any zero bytes in first word */
100
"biceq r2, r2, ip\n\t"
101
"tsteq r2, "magic2(r4)"\n\t"
102
"ldreq ip, [r0], #4\n\t"
103
"ldreq r3, [r1], #4\n\t"
106
/* There's a zero or a different byte in the word */
107
SHFT2MSB" r0, ip, #24\n\t"
108
SHFT2LSB" ip, ip, #8\n\t"
111
"cmpcs r0, r3, "SHFT2MSB" #24\n\t"
113
SHFT2LSB"eq r3, r3, #8\n\t"
115
/* On a big-endian machine, r0 contains the desired byte in bits
116
0-7; on a little-endian machine they are in bits 24-31. In
117
both cases the other bits in r0 are all zero. For r3 the
118
interesting byte is at the other end of the word, but the
119
other bits are not necessarily zero. We need a signed result
120
representing the differnece in the unsigned bytes, so for the
121
little-endian case we can't just shift the interesting bits
124
"sub r0, r0, r3, lsr #24\n\t"
126
"and r3, r3, #255\n\t"
128
/* No RSB instruction in Thumb2 */
129
"lsr r0, r0, #24\n\t"
132
"rsb r0, r3, r0, lsr #24\n\t"
136
"ldr r4, [sp], #4\n\t"
139
#elif (defined (__thumb__) && !defined (__thumb2__))
154
"ldrb r2, [r0], #1\n\t"
155
"ldrb r3, [r1], #1\n\t"
166
#if !(defined(__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \
167
(defined (__thumb__) && !defined (__thumb2__)))
168
static int __attribute__((naked, used))
169
strcmp_unaligned(const char* s1, const char* s2)
172
/* The assembly code below is based on the following alogrithm. */
181
#define body(shift) \
182
mask = 0xffffffffU RSHIFT shift; \
188
if (__builtin_expect(t1 != w2 RSHIFT shift, 0)) \
193
if (__builtin_expect(((w1 - b1) & ~w1) & (b1 << 7), 0)) \
195
/* See comment in assembler below re syndrome on big-endian */\
196
if ((((w1 - b1) & ~w1) & (b1 << 7)) & mask) \
201
t1 = w1 RSHIFT (32 - shift); \
202
w2 = (w2 LSHIFT (32 - shift)) RSHIFT (32 - shift); \
208
if (__builtin_expect(t1 != w2 LSHIFT (32 - shift), 0)) \
210
t1 = w1 >> (32 - shift); \
211
w2 = (w2 << (32 - shift)) RSHIFT (32 - shift); \
222
unsigned b1 = 0x01010101;
226
while (((unsigned) s1) & 3)
230
if (c1 == 0 || c1 != c2)
233
wp1 = (unsigned*) (((unsigned)s1) & ~3);
234
wp2 = (unsigned*) (((unsigned)s2) & ~3);
235
t1 = ((unsigned) s2) & 3;
252
c1 = (char) t1 >> 24;
253
c2 = (char) w2 >> 24;
260
} while (c1 != 0 && c1 == c2);
264
asm("wp1 .req r0\n\t"
272
/* First of all, compare bytes until wp1(sp1) is word-aligned. */
276
"ldrb r2, [wp1], #1\n\t"
277
"ldrb r3, [wp2], #1\n\t"
286
"str r5, [sp, #-4]!\n\t"
287
"str r4, [sp, #-4]!\n\t"
288
// "stmfd sp!, {r4, r5}\n\t"
290
"orr b1, b1, b1, lsl #8\n\t"
291
"orr b1, b1, b1, lsl #16\n\t"
293
"and t1, wp2, #3\n\t"
294
"bic wp2, wp2, #3\n\t"
295
"ldr w1, [wp1], #4\n\t"
296
"ldr w2, [wp2], #4\n\t"
301
/* Critical inner Loop: Block with 3 bytes initial overlap */
304
"bic t1, w1, #"MSB"\n\t"
305
"cmp t1, w2, "SHFT2LSB" #8\n\t"
309
"ands r3, r3, b1, lsl #7\n\t"
311
"ldreq w2, [wp2], #4\n\t"
314
"cmp t1, w2, "SHFT2MSB" #24\n\t"
316
"ldr w1, [wp1], #4\n\t"
319
SHFT2LSB" w2, w2, #8\n\t"
324
/* The syndrome value may contain false ones if the string ends
325
with the bytes 0x01 0x00 */
326
"tst w1, #0xff000000\n\t"
328
"tstne w1, #0x00ff0000\n\t"
329
"tstne w1, #0x0000ff00\n\t"
332
"bics r3, r3, #0xff000000\n\t"
336
SHFT2LSB" t1, w1, #24\n\t"
338
"lsl w2, w2, #24\n\t"
343
SHFT2LSB" t1, w1, #24\n\t"
344
"and w2, w2, #"LSB"\n\t"
347
/* Critical inner Loop: Block with 2 bytes initial overlap */
350
SHFT2MSB" t1, w1, #16\n\t"
352
SHFT2LSB" t1, t1, #16\n\t"
354
"cmp t1, w2, "SHFT2LSB" #16\n\t"
356
"ands r3, r3, b1, lsl #7\n\t"
358
"ldreq w2, [wp2], #4\n\t"
361
"cmp t1, w2, "SHFT2MSB" #16\n\t"
363
"ldr w1, [wp1], #4\n\t"
368
/* The syndrome value may contain false ones if the string ends
369
with the bytes 0x01 0x00 */
370
"tst w1, #0xff000000\n\t"
372
"tstne w1, #0x00ff0000\n\t"
375
"lsls r3, r3, #16\n\t"
379
SHFT2LSB" t1, w1, #16\n\t"
381
"lsl w2, w2, #16\n\t"
386
SHFT2MSB" w2, w2, #16\n\t"
387
SHFT2LSB" t1, w1, #16\n\t"
389
SHFT2LSB" w2, w2, #16\n\t"
392
/* Critical inner Loop: Block with 1 byte initial overlap */
395
"and t1, w1, #"LSB"\n\t"
396
"cmp t1, w2, "SHFT2LSB" #24\n\t"
400
"ands r3, r3, b1, lsl #7\n\t"
402
"ldreq w2, [wp2], #4\n\t"
405
"cmp t1, w2, "SHFT2MSB" #8\n\t"
407
"ldr w1, [wp1], #4\n\t"
410
SHFT2LSB" w2, w2, #24\n\t"
413
/* The syndrome value may contain false ones if the string ends
414
with the bytes 0x01 0x00 */
417
"ldr w2, [wp2], #4\n"
419
SHFT2LSB" t1, w1, #8\n\t"
420
"bic w2, w2, #"MSB"\n\t"
424
// "ldmfd sp!, {r4, r5}\n\t"
425
"ldr r4, [sp], #4\n\t"
426
"ldr r5, [sp], #4\n\t"
429
"and r2, t1, #"LSB"\n\t"
430
"and r0, w2, #"LSB"\n\t"
435
SHFT2LSB"eq t1, t1, #8\n\t"
436
SHFT2LSB"eq w2, w2, #8\n\t"
439
// "ldmfd sp!, {r4, r5}\n\t"
440
"ldr r4, [sp], #4\n\t"
441
"ldr r5, [sp], #4\n\t"