1
1
/* strspn with SSE4.2 intrinsics
2
Copyright (C) 2009 Free Software Foundation, Inc.
2
Copyright (C) 2009, 2010 Free Software Foundation, Inc.
3
3
Contributed by Intel Corporation.
4
4
This file is part of the GNU C Library.
71
72
aligned = (const char *) ((size_t) a & -16L);
72
73
__m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
77
mask = _mm_srli_si128 (mask0, 1);
80
mask = _mm_srli_si128 (mask0, 2);
83
mask = _mm_srli_si128 (mask0, 3);
86
mask = _mm_srli_si128 (mask0, 4);
89
mask = _mm_srli_si128 (mask0, 5);
92
mask = _mm_srli_si128 (mask0, 6);
95
mask = _mm_srli_si128 (mask0, 7);
98
mask = _mm_srli_si128 (mask0, 8);
101
mask = _mm_srli_si128 (mask0, 9);
104
mask = _mm_srli_si128 (mask0, 10);
107
mask = _mm_srli_si128 (mask0, 11);
110
mask = _mm_srli_si128 (mask0, 12);
113
mask = _mm_srli_si128 (mask0, 13);
116
mask = _mm_srli_si128 (mask0, 14);
119
mask = _mm_srli_si128 (mask0, 15);
75
mask = __m128i_shift_right (mask0, offset);
123
77
/* Find where the NULL terminator is. */
124
78
int length = _mm_cmpistri (mask, mask, 0x3a);
138
/* Combine mask0 and mask1. */
142
mask = _mm_alignr_epi8 (mask1, mask0, 1);
145
mask = _mm_alignr_epi8 (mask1, mask0, 2);
148
mask = _mm_alignr_epi8 (mask1, mask0, 3);
151
mask = _mm_alignr_epi8 (mask1, mask0, 4);
154
mask = _mm_alignr_epi8 (mask1, mask0, 5);
157
mask = _mm_alignr_epi8 (mask1, mask0, 6);
160
mask = _mm_alignr_epi8 (mask1, mask0, 7);
163
mask = _mm_alignr_epi8 (mask1, mask0, 8);
166
mask = _mm_alignr_epi8 (mask1, mask0, 9);
169
mask = _mm_alignr_epi8 (mask1, mask0, 10);
172
mask = _mm_alignr_epi8 (mask1, mask0, 11);
175
mask = _mm_alignr_epi8 (mask1, mask0, 12);
178
mask = _mm_alignr_epi8 (mask1, mask0, 13);
181
mask = _mm_alignr_epi8 (mask1, mask0, 14);
184
mask = _mm_alignr_epi8 (mask1, mask0, 15);
92
/* Combine mask0 and mask1. We could play games with
93
palignr, but frankly this data should be in L1 now
94
so do the merge via an unaligned load. */
95
mask = _mm_loadu_si128 ((__m128i *) a);
210
119
aligned = (const char *) ((size_t) s & -16L);
211
120
__m128i value = _mm_load_si128 ((__m128i *) aligned);
216
value = _mm_srli_si128 (value, 1);
219
value = _mm_srli_si128 (value, 2);
222
value = _mm_srli_si128 (value, 3);
225
value = _mm_srli_si128 (value, 4);
228
value = _mm_srli_si128 (value, 5);
231
value = _mm_srli_si128 (value, 6);
234
value = _mm_srli_si128 (value, 7);
237
value = _mm_srli_si128 (value, 8);
240
value = _mm_srli_si128 (value, 9);
243
value = _mm_srli_si128 (value, 10);
246
value = _mm_srli_si128 (value, 11);
249
value = _mm_srli_si128 (value, 12);
252
value = _mm_srli_si128 (value, 13);
255
value = _mm_srli_si128 (value, 14);
258
value = _mm_srli_si128 (value, 15);
122
value = __m128i_shift_right (value, offset);
262
124
int length = _mm_cmpistri (mask, value, 0x12);
263
125
/* No need to check CFlag since it is always 1. */