2
* Copyright (c) 2008 ARM Ltd
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions
8
* 1. Redistributions of source code must retain the above copyright
9
* notice, this list of conditions and the following disclaimer.
10
* 2. Redistributions in binary form must reproduce the above copyright
11
* notice, this list of conditions and the following disclaimer in the
12
* documentation and/or other materials provided with the distribution.
13
* 3. The name of the company may not be used to endorse or promote
14
* products derived from this software without specific prior written
17
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
18
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
22
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
#define SHFT2LSB "lsl"
31
#define SHFT2MSB "lsr"
32
#define MSB "0x000000ff"
33
#define LSB "0xff000000"
35
#define SHFT2LSB "lsr"
36
#define SHFT2MSB "lsl"
37
#define MSB "0xff000000"
38
#define LSB "0x000000ff"
42
#define magic1(REG) "#0x01010101"
43
#define magic2(REG) "#0x80808080"
45
#define magic1(REG) #REG
46
#define magic2(REG) #REG ", lsl #7"
50
__attribute__((naked)) strcmp (const char* s1, const char* s2)
53
#if !(defined(__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \
54
(defined (__thumb__) && !defined (__thumb2__)))
59
/* Strings not at same byte offset from a word boundary. */
60
"bne strcmp_unaligned\n\t"
64
"ldr ip, [r0], #4\n\t"
66
"ldreq r3, [r1], #4\n\t"
68
/* Although s1 and s2 have identical initial alignment, they are
69
not currently word aligned. Rather than comparing bytes,
70
make sure that any bytes fetched from before the addressed
71
bytes are forced to 0xff. Then they will always compare
76
SHFT2LSB" r2, r3, r2\n\t"
77
"ldr r3, [r1], #4\n\t"
82
/* Load the 'magic' constant 0x01010101. */
83
"str r4, [sp, #-4]!\n\t"
85
"orr r4, r4, r4, lsl #8\n\t"
86
"orr r4, r4, r4, lsl #16\n"
92
"sub r2, ip, "magic1(r4)"\n\t"
95
/* check for any zero bytes in first word */
96
"biceq r2, r2, ip\n\t"
97
"tsteq r2, "magic2(r4)"\n\t"
98
"ldreq ip, [r0], #4\n\t"
99
"ldreq r3, [r1], #4\n\t"
102
/* There's a zero or a different byte in the word */
103
SHFT2MSB" r0, ip, #24\n\t"
104
SHFT2LSB" ip, ip, #8\n\t"
107
"cmpcs r0, r3, "SHFT2MSB" #24\n\t"
109
SHFT2LSB"eq r3, r3, #8\n\t"
111
/* On a big-endian machine, r0 contains the desired byte in bits
112
0-7; on a little-endian machine they are in bits 24-31. In
113
both cases the other bits in r0 are all zero. For r3 the
114
interesting byte is at the other end of the word, but the
115
other bits are not necessarily zero. We need a signed result
116
representing the differnece in the unsigned bytes, so for the
117
little-endian case we can't just shift the interesting bits
120
"sub r0, r0, r3, lsr #24\n\t"
122
"and r3, r3, #255\n\t"
124
/* No RSB instruction in Thumb2 */
125
"lsr r0, r0, #24\n\t"
128
"rsb r0, r3, r0, lsr #24\n\t"
132
"ldr r4, [sp], #4\n\t"
135
#elif (defined (__thumb__) && !defined (__thumb2__))
150
"ldrb r2, [r0], #1\n\t"
151
"ldrb r3, [r1], #1\n\t"
162
#if !(defined(__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \
163
(defined (__thumb__) && !defined (__thumb2__)))
164
static int __attribute__((naked, used))
165
strcmp_unaligned(const char* s1, const char* s2)
168
/* The assembly code below is based on the following alogrithm. */
177
#define body(shift) \
178
mask = 0xffffffffU RSHIFT shift; \
184
if (__builtin_expect(t1 != w2 RSHIFT shift, 0)) \
189
if (__builtin_expect(((w1 - b1) & ~w1) & (b1 << 7), 0)) \
191
/* See comment in assembler below re syndrome on big-endian */\
192
if ((((w1 - b1) & ~w1) & (b1 << 7)) & mask) \
197
t1 = w1 RSHIFT (32 - shift); \
198
w2 = (w2 LSHIFT (32 - shift)) RSHIFT (32 - shift); \
204
if (__builtin_expect(t1 != w2 LSHIFT (32 - shift), 0)) \
206
t1 = w1 >> (32 - shift); \
207
w2 = (w2 << (32 - shift)) RSHIFT (32 - shift); \
218
unsigned b1 = 0x01010101;
222
while (((unsigned) s1) & 3)
226
if (c1 == 0 || c1 != c2)
229
wp1 = (unsigned*) (((unsigned)s1) & ~3);
230
wp2 = (unsigned*) (((unsigned)s2) & ~3);
231
t1 = ((unsigned) s2) & 3;
248
c1 = (char) t1 >> 24;
249
c2 = (char) w2 >> 24;
256
} while (c1 != 0 && c1 == c2);
260
asm("wp1 .req r0\n\t"
268
/* First of all, compare bytes until wp1(sp1) is word-aligned. */
272
"ldrb r2, [wp1], #1\n\t"
273
"ldrb r3, [wp2], #1\n\t"
282
"str r5, [sp, #-4]!\n\t"
283
"str r4, [sp, #-4]!\n\t"
284
// "stmfd sp!, {r4, r5}\n\t"
286
"orr b1, b1, b1, lsl #8\n\t"
287
"orr b1, b1, b1, lsl #16\n\t"
289
"and t1, wp2, #3\n\t"
290
"bic wp2, wp2, #3\n\t"
291
"ldr w1, [wp1], #4\n\t"
292
"ldr w2, [wp2], #4\n\t"
297
/* Critical inner Loop: Block with 3 bytes initial overlap */
300
"bic t1, w1, #"MSB"\n\t"
301
"cmp t1, w2, "SHFT2LSB" #8\n\t"
305
"ands r3, r3, b1, lsl #7\n\t"
307
"ldreq w2, [wp2], #4\n\t"
310
"cmp t1, w2, "SHFT2MSB" #24\n\t"
312
"ldr w1, [wp1], #4\n\t"
315
SHFT2LSB" w2, w2, #8\n\t"
320
/* The syndrome value may contain false ones if the string ends
321
with the bytes 0x01 0x00 */
322
"tst w1, #0xff000000\n\t"
324
"tstne w1, #0x00ff0000\n\t"
325
"tstne w1, #0x0000ff00\n\t"
328
"bics r3, r3, #0xff000000\n\t"
332
SHFT2LSB" t1, w1, #24\n\t"
334
"lsl w2, w2, #24\n\t"
339
SHFT2LSB" t1, w1, #24\n\t"
340
"and w2, w2, #"LSB"\n\t"
343
/* Critical inner Loop: Block with 2 bytes initial overlap */
346
SHFT2MSB" t1, w1, #16\n\t"
348
SHFT2LSB" t1, t1, #16\n\t"
350
"cmp t1, w2, "SHFT2LSB" #16\n\t"
352
"ands r3, r3, b1, lsl #7\n\t"
354
"ldreq w2, [wp2], #4\n\t"
357
"cmp t1, w2, "SHFT2MSB" #16\n\t"
359
"ldr w1, [wp1], #4\n\t"
364
/* The syndrome value may contain false ones if the string ends
365
with the bytes 0x01 0x00 */
366
"tst w1, #0xff000000\n\t"
368
"tstne w1, #0x00ff0000\n\t"
371
"lsls r3, r3, #16\n\t"
375
SHFT2LSB" t1, w1, #16\n\t"
377
"lsl w2, w2, #16\n\t"
382
SHFT2MSB" w2, w2, #16\n\t"
383
SHFT2LSB" t1, w1, #16\n\t"
385
SHFT2LSB" w2, w2, #16\n\t"
388
/* Critical inner Loop: Block with 1 byte initial overlap */
391
"and t1, w1, #"LSB"\n\t"
392
"cmp t1, w2, "SHFT2LSB" #24\n\t"
396
"ands r3, r3, b1, lsl #7\n\t"
398
"ldreq w2, [wp2], #4\n\t"
401
"cmp t1, w2, "SHFT2MSB" #8\n\t"
403
"ldr w1, [wp1], #4\n\t"
406
SHFT2LSB" w2, w2, #24\n\t"
409
/* The syndrome value may contain false ones if the string ends
410
with the bytes 0x01 0x00 */
413
"ldr w2, [wp2], #4\n"
415
SHFT2LSB" t1, w1, #8\n\t"
416
"bic w2, w2, #"MSB"\n\t"
420
// "ldmfd sp!, {r4, r5}\n\t"
421
"ldr r4, [sp], #4\n\t"
422
"ldr r5, [sp], #4\n\t"
425
"and r2, t1, #"LSB"\n\t"
426
"and r0, w2, #"LSB"\n\t"
431
SHFT2LSB"eq t1, t1, #8\n\t"
432
SHFT2LSB"eq w2, w2, #8\n\t"
435
// "ldmfd sp!, {r4, r5}\n\t"
436
"ldr r4, [sp], #4\n\t"
437
"ldr r5, [sp], #4\n\t"