2
* Copyright (c) 2008 ARM Ltd
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions
8
* 1. Redistributions of source code must retain the above copyright
9
* notice, this list of conditions and the following disclaimer.
10
* 2. Redistributions in binary form must reproduce the above copyright
11
* notice, this list of conditions and the following disclaimer in the
12
* documentation and/or other materials provided with the distribution.
13
* 3. The name of the company may not be used to endorse or promote
14
* products derived from this software without specific prior written
17
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
18
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
22
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37
#define SHFT2LSB "lsl"
38
#define SHFT2MSB "lsr"
39
#define MSB "0x000000ff"
40
#define LSB "0xff000000"
42
#define SHFT2LSB "lsr"
43
#define SHFT2MSB "lsl"
44
#define MSB "0xff000000"
45
#define LSB "0x000000ff"
49
#define magic1(REG) "#0x01010101"
50
#define magic2(REG) "#0x80808080"
52
#define magic1(REG) #REG
53
#define magic2(REG) #REG ", lsl #7"
57
__attribute__((naked)) strcmp (const char* s1, const char* s2)
60
#if !(defined(__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \
61
(defined (__thumb__) && !defined (__thumb2__)))
66
/* Strings not at same byte offset from a word boundary. */
67
"bne strcmp_unaligned\n\t"
71
"ldr ip, [r0], #4\n\t"
73
"ldreq r3, [r1], #4\n\t"
75
/* Although s1 and s2 have identical initial alignment, they are
76
not currently word aligned. Rather than comparing bytes,
77
make sure that any bytes fetched from before the addressed
78
bytes are forced to 0xff. Then they will always compare
83
SHFT2LSB" r2, r3, r2\n\t"
84
"ldr r3, [r1], #4\n\t"
89
/* Load the 'magic' constant 0x01010101. */
90
"str r4, [sp, #-4]!\n\t"
92
"orr r4, r4, r4, lsl #8\n\t"
93
"orr r4, r4, r4, lsl #16\n"
99
"sub r2, ip, "magic1(r4)"\n\t"
102
/* check for any zero bytes in first word */
103
"biceq r2, r2, ip\n\t"
104
"tsteq r2, "magic2(r4)"\n\t"
105
"ldreq ip, [r0], #4\n\t"
106
"ldreq r3, [r1], #4\n\t"
109
/* There's a zero or a different byte in the word */
110
SHFT2MSB" r0, ip, #24\n\t"
111
SHFT2LSB" ip, ip, #8\n\t"
114
"cmpcs r0, r3, "SHFT2MSB" #24\n\t"
116
SHFT2LSB"eq r3, r3, #8\n\t"
118
/* On a big-endian machine, r0 contains the desired byte in bits
119
0-7; on a little-endian machine they are in bits 24-31. In
120
both cases the other bits in r0 are all zero. For r3 the
121
interesting byte is at the other end of the word, but the
122
other bits are not necessarily zero. We need a signed result
123
representing the differnece in the unsigned bytes, so for the
124
little-endian case we can't just shift the interesting bits
127
"sub r0, r0, r3, lsr #24\n\t"
129
"and r3, r3, #255\n\t"
131
/* No RSB instruction in Thumb2 */
132
"lsr r0, r0, #24\n\t"
135
"rsb r0, r3, r0, lsr #24\n\t"
139
"ldr r4, [sp], #4\n\t"
142
#elif (defined (__thumb__) && !defined (__thumb2__))
157
"ldrb r2, [r0], #1\n\t"
158
"ldrb r3, [r1], #1\n\t"
169
#if !(defined(__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \
170
(defined (__thumb__) && !defined (__thumb2__)))
171
static int __attribute__((naked, used))
172
strcmp_unaligned(const char* s1, const char* s2)
175
/* The assembly code below is based on the following alogrithm. */
184
#define body(shift) \
185
mask = 0xffffffffU RSHIFT shift; \
191
if (__builtin_expect(t1 != w2 RSHIFT shift, 0)) \
196
if (__builtin_expect(((w1 - b1) & ~w1) & (b1 << 7), 0)) \
198
/* See comment in assembler below re syndrome on big-endian */\
199
if ((((w1 - b1) & ~w1) & (b1 << 7)) & mask) \
204
t1 = w1 RSHIFT (32 - shift); \
205
w2 = (w2 LSHIFT (32 - shift)) RSHIFT (32 - shift); \
211
if (__builtin_expect(t1 != w2 LSHIFT (32 - shift), 0)) \
213
t1 = w1 >> (32 - shift); \
214
w2 = (w2 << (32 - shift)) RSHIFT (32 - shift); \
225
unsigned b1 = 0x01010101;
229
while (((unsigned) s1) & 3)
233
if (c1 == 0 || c1 != c2)
236
wp1 = (unsigned*) (((unsigned)s1) & ~3);
237
wp2 = (unsigned*) (((unsigned)s2) & ~3);
238
t1 = ((unsigned) s2) & 3;
255
c1 = (char) t1 >> 24;
256
c2 = (char) w2 >> 24;
263
} while (c1 != 0 && c1 == c2);
267
asm("wp1 .req r0\n\t"
275
/* First of all, compare bytes until wp1(sp1) is word-aligned. */
279
"ldrb r2, [wp1], #1\n\t"
280
"ldrb r3, [wp2], #1\n\t"
289
"str r5, [sp, #-4]!\n\t"
290
"str r4, [sp, #-4]!\n\t"
291
// "stmfd sp!, {r4, r5}\n\t"
293
"orr b1, b1, b1, lsl #8\n\t"
294
"orr b1, b1, b1, lsl #16\n\t"
296
"and t1, wp2, #3\n\t"
297
"bic wp2, wp2, #3\n\t"
298
"ldr w1, [wp1], #4\n\t"
299
"ldr w2, [wp2], #4\n\t"
304
/* Critical inner Loop: Block with 3 bytes initial overlap */
307
"bic t1, w1, #"MSB"\n\t"
308
"cmp t1, w2, "SHFT2LSB" #8\n\t"
312
"ands r3, r3, b1, lsl #7\n\t"
314
"ldreq w2, [wp2], #4\n\t"
317
"cmp t1, w2, "SHFT2MSB" #24\n\t"
319
"ldr w1, [wp1], #4\n\t"
322
SHFT2LSB" w2, w2, #8\n\t"
327
/* The syndrome value may contain false ones if the string ends
328
with the bytes 0x01 0x00 */
329
"tst w1, #0xff000000\n\t"
331
"tstne w1, #0x00ff0000\n\t"
332
"tstne w1, #0x0000ff00\n\t"
335
"bics r3, r3, #0xff000000\n\t"
339
SHFT2LSB" t1, w1, #24\n\t"
341
"lsl w2, w2, #24\n\t"
346
SHFT2LSB" t1, w1, #24\n\t"
347
"and w2, w2, #"LSB"\n\t"
350
/* Critical inner Loop: Block with 2 bytes initial overlap */
353
SHFT2MSB" t1, w1, #16\n\t"
355
SHFT2LSB" t1, t1, #16\n\t"
357
"cmp t1, w2, "SHFT2LSB" #16\n\t"
359
"ands r3, r3, b1, lsl #7\n\t"
361
"ldreq w2, [wp2], #4\n\t"
364
"cmp t1, w2, "SHFT2MSB" #16\n\t"
366
"ldr w1, [wp1], #4\n\t"
371
/* The syndrome value may contain false ones if the string ends
372
with the bytes 0x01 0x00 */
373
"tst w1, #0xff000000\n\t"
375
"tstne w1, #0x00ff0000\n\t"
378
"lsls r3, r3, #16\n\t"
382
SHFT2LSB" t1, w1, #16\n\t"
384
"lsl w2, w2, #16\n\t"
389
SHFT2MSB" w2, w2, #16\n\t"
390
SHFT2LSB" t1, w1, #16\n\t"
392
SHFT2LSB" w2, w2, #16\n\t"
395
/* Critical inner Loop: Block with 1 byte initial overlap */
398
"and t1, w1, #"LSB"\n\t"
399
"cmp t1, w2, "SHFT2LSB" #24\n\t"
403
"ands r3, r3, b1, lsl #7\n\t"
405
"ldreq w2, [wp2], #4\n\t"
408
"cmp t1, w2, "SHFT2MSB" #8\n\t"
410
"ldr w1, [wp1], #4\n\t"
413
SHFT2LSB" w2, w2, #24\n\t"
416
/* The syndrome value may contain false ones if the string ends
417
with the bytes 0x01 0x00 */
420
"ldr w2, [wp2], #4\n"
422
SHFT2LSB" t1, w1, #8\n\t"
423
"bic w2, w2, #"MSB"\n\t"
427
// "ldmfd sp!, {r4, r5}\n\t"
428
"ldr r4, [sp], #4\n\t"
429
"ldr r5, [sp], #4\n\t"
432
"and r2, t1, #"LSB"\n\t"
433
"and r0, w2, #"LSB"\n\t"
438
SHFT2LSB"eq t1, t1, #8\n\t"
439
SHFT2LSB"eq w2, w2, #8\n\t"
442
// "ldmfd sp!, {r4, r5}\n\t"
443
"ldr r4, [sp], #4\n\t"
444
"ldr r5, [sp], #4\n\t"
450
/* For GLIBC: libc_hidden_builtin_def (strcmp) */