~linaro-toolchain-dev/cortex-strings/trunk

1 by Michael Hope
Pulled in the initial versions
1
/*
2
 * Copyright (c) 2008 ARM Ltd
3
 * All rights reserved.
4
 *
5
 * Redistribution and use in source and binary forms, with or without
6
 * modification, are permitted provided that the following conditions
7
 * are met:
8
 * 1. Redistributions of source code must retain the above copyright
9
 *    notice, this list of conditions and the following disclaimer.
10
 * 2. Redistributions in binary form must reproduce the above copyright
11
 *    notice, this list of conditions and the following disclaimer in the
12
 *    documentation and/or other materials provided with the distribution.
13
 * 3. The name of the company may not be used to endorse or promote
14
 *    products derived from this software without specific prior written
15
 *    permission.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
18
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19
 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20
 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
22
 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
 */
28
29
#include "arm_asm.h"
30
#include <_ansi.h>
31
#include <string.h>
32
33
#ifdef __ARMEB__
34
#define SHFT2LSB "lsl"
35
#define SHFT2MSB "lsr"
36
#define MSB "0x000000ff"
37
#define LSB "0xff000000"
38
#else
39
#define SHFT2LSB "lsr"
40
#define SHFT2MSB "lsl"
41
#define MSB "0xff000000"
42
#define LSB "0x000000ff"
43
#endif
44
45
#ifdef __thumb2__
46
#define magic1(REG) "#0x01010101"
47
#define magic2(REG) "#0x80808080"
48
#else
49
#define magic1(REG) #REG
50
#define magic2(REG) #REG ", lsl #7"
51
#endif
52
53
int 
54
__attribute__((naked)) strcmp (const char* s1, const char* s2)
55
{
56
  asm(
57
#if !(defined(__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \
58
      (defined (__thumb__) && !defined (__thumb2__)))
59
      "optpld	r0\n\t"
60
      "optpld	r1\n\t"
61
      "eor	r2, r0, r1\n\t"
62
      "tst	r2, #3\n\t"
63
      /* Strings not at same byte offset from a word boundary.  */
64
      "bne	strcmp_unaligned\n\t"
65
      "ands	r2, r0, #3\n\t"
66
      "bic	r0, r0, #3\n\t"
67
      "bic	r1, r1, #3\n\t"
68
      "ldr	ip, [r0], #4\n\t"
69
      "it	eq\n\t"
70
      "ldreq	r3, [r1], #4\n\t"
71
      "beq	1f\n\t"
72
      /* Although s1 and s2 have identical initial alignment, they are
73
	 not currently word aligned.  Rather than comparing bytes,
74
	 make sure that any bytes fetched from before the addressed
75
	 bytes are forced to 0xff.  Then they will always compare
76
	 equal.  */
77
      "eor	r2, r2, #3\n\t"
78
      "lsl	r2, r2, #3\n\t"
79
      "mvn	r3, #"MSB"\n\t"
80
      SHFT2LSB"	r2, r3, r2\n\t"
81
      "ldr	r3, [r1], #4\n\t"
82
      "orr	ip, ip, r2\n\t"
83
      "orr	r3, r3, r2\n"
84
 "1:\n\t"
85
#ifndef __thumb2__
86
      /* Load the 'magic' constant 0x01010101.  */
87
      "str	r4, [sp, #-4]!\n\t"
88
      "mov	r4, #1\n\t"
89
      "orr	r4, r4, r4, lsl #8\n\t"
90
      "orr	r4, r4, r4, lsl #16\n"
91
#endif
92
      ".p2align	2\n"
93
 "4:\n\t"
94
      "optpld	r0, #8\n\t"
95
      "optpld	r1, #8\n\t"
96
      "sub	r2, ip, "magic1(r4)"\n\t"
97
      "cmp	ip, r3\n\t"
98
      "itttt	eq\n\t"
99
      /* check for any zero bytes in first word */
100
      "biceq	r2, r2, ip\n\t"
101
      "tsteq	r2, "magic2(r4)"\n\t"
102
      "ldreq	ip, [r0], #4\n\t"
103
      "ldreq	r3, [r1], #4\n\t"
104
      "beq	4b\n"
105
 "2:\n\t"
106
      /* There's a zero or a different byte in the word */
107
      SHFT2MSB"	r0, ip, #24\n\t"
108
      SHFT2LSB"	ip, ip, #8\n\t"
109
      "cmp	r0, #1\n\t"
110
      "it	cs\n\t"
111
      "cmpcs	r0, r3, "SHFT2MSB" #24\n\t"
112
      "it	eq\n\t"
113
      SHFT2LSB"eq r3, r3, #8\n\t"
114
      "beq	2b\n\t"
115
      /* On a big-endian machine, r0 contains the desired byte in bits
116
	 0-7; on a little-endian machine they are in bits 24-31.  In
117
	 both cases the other bits in r0 are all zero.  For r3 the
118
	 interesting byte is at the other end of the word, but the
119
	 other bits are not necessarily zero.  We need a signed result
120
	 representing the differnece in the unsigned bytes, so for the
121
	 little-endian case we can't just shift the interesting bits
122
	 up.  */
123
#ifdef __ARMEB__
124
      "sub	r0, r0, r3, lsr #24\n\t"
125
#else
126
      "and	r3, r3, #255\n\t"
127
#ifdef __thumb2__
128
      /* No RSB instruction in Thumb2 */
129
      "lsr	r0, r0, #24\n\t"
130
      "sub	r0, r0, r3\n\t"
131
#else
132
      "rsb	r0, r3, r0, lsr #24\n\t"
133
#endif
134
#endif
135
#ifndef __thumb2__
136
      "ldr	r4, [sp], #4\n\t"
137
#endif
138
      "RETURN"
139
#elif (defined (__thumb__) && !defined (__thumb2__))
140
  "1:\n\t"
141
      "ldrb	r2, [r0]\n\t"
142
      "ldrb	r3, [r1]\n\t"
143
      "add	r0, r0, #1\n\t"
144
      "add	r1, r1, #1\n\t"
145
      "cmp	r2, #0\n\t"
146
      "beq	2f\n\t"
147
      "cmp	r2, r3\n\t"
148
      "beq	1b\n\t"
149
  "2:\n\t"
150
      "sub	r0, r2, r3\n\t"
151
      "bx	lr"
152
#else
153
 "3:\n\t"
154
      "ldrb	r2, [r0], #1\n\t"
155
      "ldrb	r3, [r1], #1\n\t"
156
      "cmp	r2, #1\n\t"
157
      "it	cs\n\t"
158
      "cmpcs	r2, r3\n\t"
159
      "beq	3b\n\t"
160
      "sub	r0, r2, r3\n\t"
161
      "RETURN"
162
#endif
163
      );
164
}
165
166
#if !(defined(__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \
167
      (defined (__thumb__) && !defined (__thumb2__)))
168
static int __attribute__((naked, used)) 
169
strcmp_unaligned(const char* s1, const char* s2)
170
{
171
#if 0
172
  /* The assembly code below is based on the following alogrithm.  */
173
#ifdef __ARMEB__
174
#define RSHIFT <<
175
#define LSHIFT >>
176
#else
177
#define RSHIFT >>
178
#define LSHIFT <<
179
#endif
180
181
#define body(shift)							\
182
  mask = 0xffffffffU RSHIFT shift;					\
183
  w1 = *wp1++;								\
184
  w2 = *wp2++;								\
185
  do									\
186
    {									\
187
      t1 = w1 & mask;							\
188
      if (__builtin_expect(t1 != w2 RSHIFT shift, 0))			\
189
	{								\
190
	  w2 RSHIFT= shift;						\
191
	  break;							\
192
	}								\
193
      if (__builtin_expect(((w1 - b1) & ~w1) & (b1 << 7), 0))		\
194
	{								\
195
	  /* See comment in assembler below re syndrome on big-endian */\
196
	  if ((((w1 - b1) & ~w1) & (b1 << 7)) & mask)			\
197
	    w2 RSHIFT= shift;						\
198
	  else								\
199
	    {								\
200
	      w2 = *wp2;						\
201
	      t1 = w1 RSHIFT (32 - shift);				\
202
	      w2 = (w2 LSHIFT (32 - shift)) RSHIFT (32 - shift);	\
203
	    }								\
204
	  break;							\
205
	}								\
206
      w2 = *wp2++;							\
207
      t1 ^= w1;								\
208
      if (__builtin_expect(t1 != w2 LSHIFT (32 - shift), 0))		\
209
	{								\
210
	  t1 = w1 >> (32 - shift);					\
211
	  w2 = (w2 << (32 - shift)) RSHIFT (32 - shift);		\
212
	  break;							\
213
	}								\
214
      w1 = *wp1++;							\
215
    } while (1)
216
217
  const unsigned* wp1;
218
  const unsigned* wp2;
219
  unsigned w1, w2;
220
  unsigned mask;
221
  unsigned shift;
222
  unsigned b1 = 0x01010101;
223
  char c1, c2;
224
  unsigned t1;
225
226
  while (((unsigned) s1) & 3)
227
    {
228
      c1 = *s1++;
229
      c2 = *s2++;
230
      if (c1 == 0 || c1 != c2)
231
	return c1 - (int)c2;
232
    }
233
  wp1 = (unsigned*) (((unsigned)s1) & ~3);
234
  wp2 = (unsigned*) (((unsigned)s2) & ~3);
235
  t1 = ((unsigned) s2) & 3;
236
  if (t1 == 1)
237
    {
238
      body(8);
239
    }
240
  else if (t1 == 2)
241
    {
242
      body(16);
243
    }
244
  else
245
    {
246
      body (24);
247
    }
248
  
249
  do
250
    {
251
#ifdef __ARMEB__
252
      c1 = (char) t1 >> 24;
253
      c2 = (char) w2 >> 24;
254
#else
255
      c1 = (char) t1;
256
      c2 = (char) w2;
257
#endif
258
      t1 RSHIFT= 8;
259
      w2 RSHIFT= 8;
260
    } while (c1 != 0 && c1 == c2);
261
  return c1 - c2;
262
#endif
263
264
  asm("wp1 .req r0\n\t"
265
      "wp2 .req r1\n\t"
266
      "b1  .req r2\n\t"
267
      "w1  .req r4\n\t"
268
      "w2  .req r5\n\t"
269
      "t1  .req ip\n\t"
270
      "@ r3 is scratch\n"
271
272
      /* First of all, compare bytes until wp1(sp1) is word-aligned. */
273
 "1:\n\t"
274
      "tst	wp1, #3\n\t"
275
      "beq	2f\n\t"
276
      "ldrb	r2, [wp1], #1\n\t"
277
      "ldrb	r3, [wp2], #1\n\t"
278
      "cmp	r2, #1\n\t"
279
      "it	cs\n\t"
280
      "cmpcs	r2, r3\n\t"
281
      "beq	1b\n\t"
282
      "sub	r0, r2, r3\n\t"
283
      "RETURN\n"
284
285
 "2:\n\t"
286
      "str	r5, [sp, #-4]!\n\t"
287
      "str	r4, [sp, #-4]!\n\t"
288
      //      "stmfd	sp!, {r4, r5}\n\t"
289
      "mov	b1, #1\n\t"
290
      "orr	b1, b1, b1, lsl #8\n\t"
291
      "orr	b1, b1, b1, lsl #16\n\t"
292
293
      "and	t1, wp2, #3\n\t"
294
      "bic	wp2, wp2, #3\n\t"
295
      "ldr	w1, [wp1], #4\n\t"
296
      "ldr	w2, [wp2], #4\n\t"
297
      "cmp	t1, #2\n\t"
298
      "beq	2f\n\t"
299
      "bhi	3f\n"
300
301
      /* Critical inner Loop: Block with 3 bytes initial overlap */
302
      ".p2align	2\n"
303
 "1:\n\t"
304
      "bic	t1, w1, #"MSB"\n\t"
305
      "cmp	t1, w2, "SHFT2LSB" #8\n\t"
306
      "sub	r3, w1, b1\n\t"
307
      "bic	r3, r3, w1\n\t"
308
      "bne	4f\n\t"
309
      "ands	r3, r3, b1, lsl #7\n\t"
310
      "it	eq\n\t"
311
      "ldreq	w2, [wp2], #4\n\t"
312
      "bne	5f\n\t"
313
      "eor	t1, t1, w1\n\t"
314
      "cmp	t1, w2, "SHFT2MSB" #24\n\t"
315
      "bne	6f\n\t"
316
      "ldr	w1, [wp1], #4\n\t"
317
      "b	1b\n"
318
 "4:\n\t"
319
      SHFT2LSB"	w2, w2, #8\n\t"
320
      "b	8f\n"
321
322
 "5:\n\t"
323
#ifdef __ARMEB__
324
      /* The syndrome value may contain false ones if the string ends
325
	 with the bytes 0x01 0x00 */
326
      "tst	w1, #0xff000000\n\t"
327
      "itt	ne\n\t"
328
      "tstne	w1, #0x00ff0000\n\t"
329
      "tstne	w1, #0x0000ff00\n\t"
330
      "beq	7f\n\t"
331
#else
332
      "bics	r3, r3, #0xff000000\n\t"
333
      "bne	7f\n\t"
334
#endif
335
      "ldrb	w2, [wp2]\n\t"
336
      SHFT2LSB"	t1, w1, #24\n\t"
337
#ifdef __ARMEB__
338
      "lsl	w2, w2, #24\n\t"
339
#endif
340
      "b	8f\n"
341
342
 "6:\n\t"
343
      SHFT2LSB"	t1, w1, #24\n\t"
344
      "and	w2, w2, #"LSB"\n\t"
345
      "b	8f\n"
346
347
      /* Critical inner Loop: Block with 2 bytes initial overlap */
348
      ".p2align	2\n"
349
 "2:\n\t"
350
      SHFT2MSB"	t1, w1, #16\n\t"
351
      "sub	r3, w1, b1\n\t"
352
      SHFT2LSB"	t1, t1, #16\n\t"
353
      "bic	r3, r3, w1\n\t"
354
      "cmp	t1, w2, "SHFT2LSB" #16\n\t"
355
      "bne	4f\n\t"
356
      "ands	r3, r3, b1, lsl #7\n\t"
357
      "it	eq\n\t"
358
      "ldreq	w2, [wp2], #4\n\t"
359
      "bne	5f\n\t"
360
      "eor	t1, t1, w1\n\t"
361
      "cmp	t1, w2, "SHFT2MSB" #16\n\t"
362
      "bne	6f\n\t"
363
      "ldr	w1, [wp1], #4\n\t"
364
      "b	2b\n"
365
366
 "5:\n\t"
367
#ifdef __ARMEB__
368
      /* The syndrome value may contain false ones if the string ends
369
	 with the bytes 0x01 0x00 */
370
      "tst	w1, #0xff000000\n\t"
371
      "it	ne\n\t"
372
      "tstne	w1, #0x00ff0000\n\t"
373
      "beq	7f\n\t"
374
#else
375
      "lsls	r3, r3, #16\n\t"
376
      "bne	7f\n\t"
377
#endif
378
      "ldrh	w2, [wp2]\n\t"
379
      SHFT2LSB"	t1, w1, #16\n\t"
380
#ifdef __ARMEB__
381
      "lsl	w2, w2, #16\n\t"
382
#endif
383
      "b	8f\n"
384
385
 "6:\n\t"
386
      SHFT2MSB"	w2, w2, #16\n\t"
387
      SHFT2LSB"	t1, w1, #16\n\t"
388
 "4:\n\t"
389
      SHFT2LSB"	w2, w2, #16\n\t"
390
      "b	8f\n\t"
391
392
      /* Critical inner Loop: Block with 1 byte initial overlap */
393
      ".p2align	2\n"
394
 "3:\n\t"
395
      "and	t1, w1, #"LSB"\n\t"
396
      "cmp	t1, w2, "SHFT2LSB" #24\n\t"
397
      "sub	r3, w1, b1\n\t"
398
      "bic	r3, r3, w1\n\t"
399
      "bne	4f\n\t"
400
      "ands	r3, r3, b1, lsl #7\n\t"
401
      "it	eq\n\t"
402
      "ldreq	w2, [wp2], #4\n\t"
403
      "bne	5f\n\t"
404
      "eor	t1, t1, w1\n\t"
405
      "cmp	t1, w2, "SHFT2MSB" #8\n\t"
406
      "bne	6f\n\t"
407
      "ldr	w1, [wp1], #4\n\t"
408
      "b	3b\n"
409
 "4:\n\t"
410
      SHFT2LSB"	w2, w2, #24\n\t"
411
      "b	8f\n"
412
 "5:\n\t"
413
      /* The syndrome value may contain false ones if the string ends
414
	 with the bytes 0x01 0x00 */
415
      "tst	w1, #"LSB"\n\t"
416
      "beq	7f\n\t"
417
      "ldr	w2, [wp2], #4\n"
418
 "6:\n\t"
419
      SHFT2LSB"	t1, w1, #8\n\t"
420
      "bic	w2, w2, #"MSB"\n\t"
421
      "b	8f\n"
422
 "7:\n\t"
423
      "mov	r0, #0\n\t"
424
      //      "ldmfd	sp!, {r4, r5}\n\t"
425
      "ldr	r4, [sp], #4\n\t"
426
      "ldr	r5, [sp], #4\n\t"
427
      "RETURN\n"
428
 "8:\n\t"
429
      "and	r2, t1, #"LSB"\n\t"
430
      "and	r0, w2, #"LSB"\n\t"
431
      "cmp	r0, #1\n\t"
432
      "it	cs\n\t"
433
      "cmpcs	r0, r2\n\t"
434
      "itt	eq\n\t"
435
      SHFT2LSB"eq	t1, t1, #8\n\t"
436
      SHFT2LSB"eq	w2, w2, #8\n\t"
437
      "beq	8b\n\t"
438
      "sub	r0, r2, r0\n\t"
439
      //      "ldmfd	sp!, {r4, r5}\n\t"
440
      "ldr	r4, [sp], #4\n\t"
441
      "ldr	r5, [sp], #4\n\t"
442
      "RETURN");
443
}
444
445
#endif