103
by Will Newton
Split bionic reference code into A15 and A9 versions. |
1 |
/*
|
2 |
* Copyright (C) 2008 The Android Open Source Project |
|
3 |
* All rights reserved. |
|
4 |
*
|
|
5 |
* Redistribution and use in source and binary forms, with or without |
|
6 |
* modification, are permitted provided that the following conditions |
|
7 |
* are met: |
|
8 |
* * Redistributions of source code must retain the above copyright |
|
9 |
* notice, this list of conditions and the following disclaimer. |
|
10 |
* * Redistributions in binary form must reproduce the above copyright |
|
11 |
* notice, this list of conditions and the following disclaimer in |
|
12 |
* the documentation and/or other materials provided with the |
|
13 |
* distribution. |
|
14 |
*
|
|
15 |
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
|
16 |
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
|
17 |
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
|
18 |
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
|
19 |
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
|
20 |
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, |
|
21 |
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS |
|
22 |
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED |
|
23 |
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
|
24 |
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT |
|
25 |
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
|
26 |
* SUCH DAMAGE. |
|
27 |
*/
|
|
28 |
||
29 |
/*
|
|
30 |
* This code assumes it is running on a processor that supports all arm v7 |
|
31 |
* instructions and that supports neon instructions. |
|
32 |
*/
|
|
33 |
||
34 |
.text
|
|
35 |
.fpu neon |
|
36 |
||
37 |
/* memset() returns its first argument. */ |
|
38 |
.globl memset |
|
39 |
.type memset,%function |
|
40 |
memset: |
|
41 |
.fnstart
|
|
42 |
# The neon memset only wins for less than 132.
|
|
43 |
cmp r2, #132 |
|
44 |
bhi 11f |
|
45 |
||
46 |
.save {r0} |
|
47 |
stmfd sp!, {r0} |
|
48 |
||
49 |
vdup.8 q0, r1 |
|
50 |
||
51 |
/* make sure we have at least 32 bytes to write */ |
|
52 |
subs r2, r2, #32 |
|
53 |
blo 2f |
|
54 |
vmov q1, q0 |
|
55 |
||
56 |
1: /* The main loop writes 32 bytes at a time */ |
|
57 |
subs r2, r2, #32 |
|
58 |
vst1.8 {d0 - d3}, [r0]! |
|
59 |
bhs 1b |
|
60 |
||
61 |
2: /* less than 32 left */ |
|
62 |
add r2, r2, #32 |
|
63 |
tst r2, #0x10 |
|
64 |
beq 3f |
|
65 |
||
66 |
// writes 16 bytes, 128-bits aligned |
|
67 |
vst1.8 {d0, d1}, [r0]! |
|
68 |
3: /* write up to 15-bytes (count in r2) */ |
|
69 |
movs ip, r2, lsl #29 |
|
70 |
bcc 1f |
|
71 |
vst1.8 {d0}, [r0]! |
|
72 |
1: bge 2f |
|
73 |
vst1.32 {d0[0]}, [r0]! |
|
74 |
2: movs ip, r2, lsl #31 |
|
75 |
strmib r1, [r0], #1 |
|
76 |
strcsb r1, [r0], #1 |
|
77 |
strcsb r1, [r0], #1 |
|
78 |
ldmfd sp!, {r0} |
|
79 |
bx lr |
|
80 |
11: |
|
81 |
/* compute the offset to align the destination |
|
82 |
* offset = (4-(src&3))&3 = -src & 3 |
|
83 |
*/
|
|
84 |
||
85 |
.save {r0, r4-r7, lr} |
|
86 |
stmfd sp!, {r0, r4-r7, lr} |
|
87 |
rsb r3, r0, #0 |
|
88 |
ands r3, r3, #3 |
|
89 |
cmp r3, r2 |
|
90 |
movhi r3, r2 |
|
91 |
||
92 |
/* splat r1 */ |
|
93 |
mov r1, r1, lsl #24 |
|
94 |
orr r1, r1, r1, lsr #8 |
|
95 |
orr r1, r1, r1, lsr #16 |
|
96 |
||
97 |
movs r12, r3, lsl #31 |
|
98 |
strcsb r1, [r0], #1 /* can't use strh (alignment unknown) */ |
|
99 |
strcsb r1, [r0], #1 |
|
100 |
strmib r1, [r0], #1 |
|
101 |
subs r2, r2, r3 |
|
102 |
ldmlsfd sp!, {r0, r4-r7, lr} /* return */ |
|
103 |
bxls lr |
|
104 |
||
105 |
/* align the destination to a cache-line */ |
|
106 |
mov r12, r1 |
|
107 |
mov lr, r1 |
|
108 |
mov r4, r1 |
|
109 |
mov r5, r1 |
|
110 |
mov r6, r1 |
|
111 |
mov r7, r1 |
|
112 |
||
113 |
rsb r3, r0, #0 |
|
114 |
ands r3, r3, #0x1C |
|
115 |
beq 3f |
|
116 |
cmp r3, r2 |
|
117 |
andhi r3, r2, #0x1C |
|
118 |
sub r2, r2, r3 |
|
119 |
||
120 |
/* conditionally writes 0 to 7 words (length in r3) */ |
|
121 |
movs r3, r3, lsl #28 |
|
122 |
stmcsia r0!, {r1, lr} |
|
123 |
stmcsia r0!, {r1, lr} |
|
124 |
stmmiia r0!, {r1, lr} |
|
125 |
movs r3, r3, lsl #2 |
|
126 |
strcs r1, [r0], #4 |
|
127 |
||
128 |
3: |
|
129 |
subs r2, r2, #32 |
|
130 |
mov r3, r1 |
|
131 |
bmi 2f |
|
132 |
1: subs r2, r2, #32 |
|
133 |
stmia r0!, {r1,r3,r4,r5,r6,r7,r12,lr} |
|
134 |
bhs 1b |
|
135 |
2: add r2, r2, #32 |
|
136 |
||
137 |
/* conditionally stores 0 to 31 bytes */ |
|
138 |
movs r2, r2, lsl #28 |
|
139 |
stmcsia r0!, {r1,r3,r12,lr} |
|
140 |
stmmiia r0!, {r1, lr} |
|
141 |
movs r2, r2, lsl #2 |
|
142 |
strcs r1, [r0], #4 |
|
143 |
strmih r1, [r0], #2 |
|
144 |
movs r2, r2, lsl #2 |
|
145 |
strcsb r1, [r0] |
|
146 |
ldmfd sp!, {r0, r4-r7, lr} |
|
147 |
bx lr |
|
148 |
.fnend
|
|
149 |
.size memset, .-memset |