2
* Copyright (C) 2013 The Android Open Source Project
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions
8
* * Redistributions of source code must retain the above copyright
9
* notice, this list of conditions and the following disclaimer.
10
* * Redistributions in binary form must reproduce the above copyright
11
* notice, this list of conditions and the following disclaimer in
12
* the documentation and/or other materials provided with the
15
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30
* Optimized memset() for ARM.
32
* memset() returns its first argument.
39
.type memset,%function
45
// The new algorithm is slower for copies < 16 so use the old
46
// neon code in that case.
48
blo set_less_than_16_unknown_align
50
// Use strd which requires an even and odd register so move the
52
// r0 and r1 contain the memset value
53
// r2 is the number of bytes to set
54
// r3 is the destination pointer
57
// Copy the byte value in every byte of r1.
59
orr r1, r1, r1, lsr #8
60
orr r1, r1, r1, lsr #16
63
// Align destination to a double word to avoid the strd crossing
64
// a cache line boundary.
66
bne do_double_word_align
74
1: // Main loop sets 64 bytes at a time.
75
.irp offset, #0, #8, #16, #24, #32, #40, #48, #56
76
strd r0, r1, [r3, \offset]
84
// Restore r2 to the count of bytes left to set.
89
.irp offset, #0, #8, #16, #24
90
strd r0, r1, [r3, \offset]
98
strd r0, r1, [r3, \offset]
103
// Less than 16 bytes to set.
108
strd r0, r1, [r3], #8
126
do_double_word_align:
136
// Dst is at least word aligned by this point.
138
blo double_word_aligned
140
b double_word_aligned
142
set_less_than_16_unknown_align:
143
// Set up to 15 bytes.
149
vst1.32 {d0[0]}, [r0]!
150
2: movs ip, r2, lsl #31
159
.size memset, .-memset