~linaro-toolchain-dev/cortex-strings/trunk

« back to all changes in this revision

Viewing changes to src/aarch64/strnlen.S

Committer: Matthew Gretton-Dann
Author(s): Marcus Shawcroft
Date: 2013-01-16 20:55:59 UTC
mto: This revision was merged to the branch mainline in revision 99.
Revision ID: matthew.gretton-dann@linaro.org-20130116205559-7nye0l7d8fvzdye3

This patch fixes an issue in the AArch64 strnlen implementation which
occurs if ULONG_MAX-15 <= n <= ULONG_MAX.

files modified:
src/aarch64/strnlen.S

Show diffs side-by-side

added added

removed removed

src/aarch64/strnlen.S

bic src, srcin, #15

ands tmp1, srcin, #15

b.ne .Lmisaligned

add limit_wd, limit, #15

lsr limit_wd, limit_wd, #4

/* Calculate the number of full and partial words -1. */

sub limit_wd, limit, #1 /* Limit != 0, so no underflow. */

lsr limit_wd, limit_wd, #4 /* Convert to Qwords. */

/* NUL detection works on the principle that (X - 1) & (~X) & 0x80

(=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and

can be done in parallel across the entire word. */

103

105

bic has_nul2, tmp3, tmp4

104

106

subs limit_wd, limit_wd, #1

105

107

orr tmp1, has_nul1, has_nul2

106

ccmp tmp1, #0, #0, ne /* NZCV = 0000 */

108

ccmp tmp1, #0, #0, pl /* NZCV = 0000 */

107

109

b.eq .Lloop

108

110

/* End of critical section -- keep to one 64Byte cache line. */

109

111

141

143

ret

142

144

143

145

.Lmisaligned:

144

add tmp3, limit, tmp1

146

/* Deal with a partial first word.

147

We're doing two things in parallel here;

148

1) Calculate the number of words (but avoiding overflow if

149

limit is near ULONG_MAX) - to do this we need to work out

150

limit + tmp1 - 1 as a 65-bit value before shifting it;

151

2) Load and mask the initial data words - we force the bytes

152

before the ones we are interested in to 0xff - this ensures

153

early bytes will not hit any zero detection. */

154

sub limit_wd, limit, #1

155

neg tmp4, tmp1

145

156

cmp tmp1, #8

146

neg tmp1, tmp1

147

ldp data1, data2, [src], #16

148

add limit_wd, tmp3, #15

149

lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */

157

158

and tmp3, limit_wd, #15

159

lsr limit_wd, limit_wd, #4

150

160

mov tmp2, #~0

151

lsr limit_wd, limit_wd, #4

161

162

ldp data1, data2, [src], #16

163

lsl tmp4, tmp4, #3 /* Bytes beyond alignment -> bits. */

164

add tmp3, tmp3, tmp1

165

152

166

#ifdef __AARCH64EB__

153

167

/* Big-endian. Early bytes are at MSB. */

154

lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */

168

lsl tmp2, tmp2, tmp4 /* Shift (tmp1 & 63). */

155

169

#else

156

170

/* Little-endian. Early bytes are at LSB. */

157

lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */

171

lsr tmp2, tmp2, tmp4 /* Shift (tmp1 & 63). */

158

172

#endif

173

add limit_wd, limit_wd, tmp3, lsr #4

174

159

175

orr data1, data1, tmp2

160

176

orr data2a, data2, tmp2

177

161

178

csinv data1, data1, xzr, le

162

179

csel data2, data2, data2a, le

163

180

b .Lrealigned

Older »