~linaro-toolchain-dev/cortex-strings/trunk

103 by Will Newton
Split bionic reference code into A15 and A9 versions.
1
/*
2
 * Copyright (C) 2008 The Android Open Source Project
3
 * All rights reserved.
4
 *
5
 * Redistribution and use in source and binary forms, with or without
6
 * modification, are permitted provided that the following conditions
7
 * are met:
8
 *  * Redistributions of source code must retain the above copyright
9
 *    notice, this list of conditions and the following disclaimer.
10
 *  * Redistributions in binary form must reproduce the above copyright
11
 *    notice, this list of conditions and the following disclaimer in
12
 *    the documentation and/or other materials provided with the
13
 *    distribution.
14
 *
15
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19
 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22
 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23
 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26
 * SUCH DAMAGE.
27
 */
28
29
/*
30
 * This code assumes it is running on a processor that supports all arm v7
31
 * instructions and that supports neon instructions.
32
 */
33
34
	.text
35
	.fpu    neon
36
37
/* memset() returns its first argument.  */
38
	.globl memset
39
	.type memset,%function
40
memset:
41
	.fnstart
42
        # The neon memset only wins for less than 132.
43
        cmp         r2, #132
44
        bhi         11f
45
46
        .save       {r0}
47
        stmfd       sp!, {r0}
48
49
        vdup.8      q0, r1
50
51
        /* make sure we have at least 32 bytes to write */
52
        subs        r2, r2, #32
53
        blo         2f
54
        vmov        q1, q0
55
56
1:      /* The main loop writes 32 bytes at a time */
57
        subs        r2, r2, #32
58
        vst1.8      {d0 - d3}, [r0]!
59
        bhs         1b
60
61
2:      /* less than 32 left */
62
        add         r2, r2, #32
63
        tst         r2, #0x10
64
        beq         3f
65
66
        // writes 16 bytes, 128-bits aligned
67
        vst1.8      {d0, d1}, [r0]!
68
3:      /* write up to 15-bytes (count in r2) */
69
        movs        ip, r2, lsl #29
70
        bcc         1f
71
        vst1.8      {d0}, [r0]!
72
1:      bge         2f
73
        vst1.32     {d0[0]}, [r0]!
74
2:      movs        ip, r2, lsl #31
75
        strmib      r1, [r0], #1
76
        strcsb      r1, [r0], #1
77
        strcsb      r1, [r0], #1
78
        ldmfd       sp!, {r0}
79
        bx          lr
80
11:
81
        /* compute the offset to align the destination
82
         * offset = (4-(src&3))&3 = -src & 3
83
         */
84
85
        .save       {r0, r4-r7, lr}
86
        stmfd       sp!, {r0, r4-r7, lr}
87
        rsb         r3, r0, #0
88
        ands        r3, r3, #3
89
        cmp         r3, r2
90
        movhi       r3, r2
91
92
        /* splat r1 */
93
        mov         r1, r1, lsl #24
94
        orr         r1, r1, r1, lsr #8
95
        orr         r1, r1, r1, lsr #16
96
97
        movs        r12, r3, lsl #31
98
        strcsb      r1, [r0], #1    /* can't use strh (alignment unknown) */
99
        strcsb      r1, [r0], #1
100
        strmib      r1, [r0], #1
101
        subs        r2, r2, r3
102
        ldmlsfd     sp!, {r0, r4-r7, lr}   /* return */
103
        bxls        lr
104
105
        /* align the destination to a cache-line */
106
        mov         r12, r1
107
        mov         lr, r1
108
        mov         r4, r1
109
        mov         r5, r1
110
        mov         r6, r1
111
        mov         r7, r1
112
113
        rsb         r3, r0, #0
114
        ands        r3, r3, #0x1C
115
        beq         3f
116
        cmp         r3, r2
117
        andhi       r3, r2, #0x1C
118
        sub         r2, r2, r3
119
120
        /* conditionally writes 0 to 7 words (length in r3) */
121
        movs        r3, r3, lsl #28
122
        stmcsia     r0!, {r1, lr}
123
        stmcsia     r0!, {r1, lr}
124
        stmmiia     r0!, {r1, lr}
125
        movs        r3, r3, lsl #2
126
        strcs       r1, [r0], #4
127
128
3:
129
        subs        r2, r2, #32
130
        mov         r3, r1
131
        bmi         2f
132
1:      subs        r2, r2, #32
133
        stmia       r0!, {r1,r3,r4,r5,r6,r7,r12,lr}
134
        bhs         1b
135
2:      add         r2, r2, #32
136
137
        /* conditionally stores 0 to 31 bytes */
138
        movs        r2, r2, lsl #28
139
        stmcsia     r0!, {r1,r3,r12,lr}
140
        stmmiia     r0!, {r1, lr}
141
        movs        r2, r2, lsl #2
142
        strcs       r1, [r0], #4
143
        strmih      r1, [r0], #2
144
        movs        r2, r2, lsl #2
145
        strcsb      r1, [r0]
146
        ldmfd       sp!, {r0, r4-r7, lr}
147
        bx          lr
148
	.fnend
149
	.size memset, .-memset