1
2012-05-17 Andreas Jaeger <aj@suse.de>
2
Carlos O'Donell <carlos_odonell@mentor.com>
5
* sysdeps/x86_64/multiarch/init-arch.h
6
(bit_YMM_Usable): Rename to...
7
(bit_AVX_Usable): ... this.
8
(bit_FMA4_Usable): New macro.
9
(bit_XMM_state): New macro.
10
(bit_YMM_state): New macro.
11
[__ASSEMBLER__] (index_YMM_Usable): Rename to...
12
[__ASSEMBLER__] (index_AVX_Usable): ... this.
13
[__ASSEMBLER__] (index_FMA4_Usable): New macro.
14
(CPUID_OSXSAVE): New macro.
15
(CPUID_AVX): New macro.
16
(CPUID_FMA4): New macro.
17
(index_YMM_Usable): Rename to...
18
(index_AVX_Usable): ... this.
19
(HAS_AVX): Use HAS_ARCH_FEATURE.
21
(HAS_YMM_USABLE): Remove.
22
* sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features):
23
Enable AVX or FMA4 IFF YMM and XMM states are usable and the features
25
* sysdeps/x86_64/multiarch/strcmp.S: Use bit_AVX_Usable.
26
* sysdeps/i386/i686/multiarch/Makefile: Add test-multiarch to tests.
27
* sysdeps/x86_64/multiarch/Makefile: Likewise.
28
* sysdeps/i386/i686/multiarch/test-multiarch.c: New file.
29
* sysdeps/x86_64/multiarch/test-multiarch.c: New file.
31
2012-01-26 Ulrich Drepper <drepper@gmail.com>
34
* sysdeps/x86_64/multiarch/init-arch.h: Define bit_OSXSAVE.
35
Clean up HAS_* macros.
36
* sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features): If
37
bit_AVX is set also check OSXAVE/XCR0 and set bit_YMM_Usable if
39
* sysdeps/x86_64/fpu/multiarch/e_atan2.c: Use HAS_YMM_USABLE, not
41
* sysdeps/x86_64/fpu/multiarch/e_exp.c: Likewise.
42
* sysdeps/x86_64/fpu/multiarch/e_log.c: Likewise.
43
* sysdeps/x86_64/fpu/multiarch/s_atan.c: Likewise.
44
* sysdeps/x86_64/fpu/multiarch/s_sin.c: Likewise.
45
* sysdeps/x86_64/fpu/multiarch/s_tan.c: Likewise.
47
diff --git a/sysdeps/i386/i686/multiarch/Makefile b/sysdeps/i386/i686/multiarch/Makefile
48
index b764e5b..8946bfa 100644
49
--- a/sysdeps/i386/i686/multiarch/Makefile
50
+++ b/sysdeps/i386/i686/multiarch/Makefile
54
+tests += test-multiarch
55
gen-as-const-headers += ifunc-defines.sym
58
diff --git a/sysdeps/i386/i686/multiarch/test-multiarch.c b/sysdeps/i386/i686/multiarch/test-multiarch.c
60
index 0000000..593cfec
62
+++ b/sysdeps/i386/i686/multiarch/test-multiarch.c
64
+#include <sysdeps/x86_64/multiarch/test-multiarch.c>
65
diff --git a/sysdeps/x86_64/fpu/multiarch/e_atan2.c b/sysdeps/x86_64/fpu/multiarch/e_atan2.c
66
index 6867c6e..3a615fc 100644
67
--- a/sysdeps/x86_64/fpu/multiarch/e_atan2.c
68
+++ b/sysdeps/x86_64/fpu/multiarch/e_atan2.c
69
@@ -14,7 +14,7 @@ extern double __ieee754_atan2_fma4 (double, double);
71
libm_ifunc (__ieee754_atan2,
72
HAS_FMA4 ? __ieee754_atan2_fma4
73
- : (HAS_AVX ? __ieee754_atan2_avx : __ieee754_atan2_sse2));
74
+ : (HAS_YMM_USABLE ? __ieee754_atan2_avx : __ieee754_atan2_sse2));
75
strong_alias (__ieee754_atan2, __atan2_finite)
77
# define __ieee754_atan2 __ieee754_atan2_sse2
78
diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp.c b/sysdeps/x86_64/fpu/multiarch/e_exp.c
79
index 3c65028..7b2320a 100644
80
--- a/sysdeps/x86_64/fpu/multiarch/e_exp.c
81
+++ b/sysdeps/x86_64/fpu/multiarch/e_exp.c
82
@@ -14,7 +14,7 @@ extern double __ieee754_exp_fma4 (double);
84
libm_ifunc (__ieee754_exp,
85
HAS_FMA4 ? __ieee754_exp_fma4
86
- : (HAS_AVX ? __ieee754_exp_avx : __ieee754_exp_sse2));
87
+ : (HAS_YMM_USABLE ? __ieee754_exp_avx : __ieee754_exp_sse2));
88
strong_alias (__ieee754_exp, __exp_finite)
90
# define __ieee754_exp __ieee754_exp_sse2
91
diff --git a/sysdeps/x86_64/fpu/multiarch/e_log.c b/sysdeps/x86_64/fpu/multiarch/e_log.c
92
index 3b468d0..ab277d6 100644
93
--- a/sysdeps/x86_64/fpu/multiarch/e_log.c
94
+++ b/sysdeps/x86_64/fpu/multiarch/e_log.c
95
@@ -14,7 +14,7 @@ extern double __ieee754_log_fma4 (double);
97
libm_ifunc (__ieee754_log,
98
HAS_FMA4 ? __ieee754_log_fma4
99
- : (HAS_AVX ? __ieee754_log_avx
100
+ : (HAS_YMM_USABLE ? __ieee754_log_avx
101
: __ieee754_log_sse2));
102
strong_alias (__ieee754_log, __log_finite)
104
diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan.c b/sysdeps/x86_64/fpu/multiarch/s_atan.c
105
index 3160201..78c7e09 100644
106
--- a/sysdeps/x86_64/fpu/multiarch/s_atan.c
107
+++ b/sysdeps/x86_64/fpu/multiarch/s_atan.c
108
@@ -12,7 +12,8 @@ extern double __atan_fma4 (double);
109
# define __atan_fma4 ((void *) 0)
112
-libm_ifunc (atan, HAS_FMA4 ? __atan_fma4 : HAS_AVX ? __atan_avx : __atan_sse2);
113
+libm_ifunc (atan, (HAS_FMA4 ? __atan_fma4 :
114
+ HAS_YMM_USABLE ? __atan_avx : __atan_sse2));
116
# define atan __atan_sse2
118
diff --git a/sysdeps/x86_64/fpu/multiarch/s_sin.c b/sysdeps/x86_64/fpu/multiarch/s_sin.c
119
index 1ba9dbc..417acd0 100644
120
--- a/sysdeps/x86_64/fpu/multiarch/s_sin.c
121
+++ b/sysdeps/x86_64/fpu/multiarch/s_sin.c
122
@@ -17,10 +17,12 @@ extern double __sin_fma4 (double);
123
# define __sin_fma4 ((void *) 0)
126
-libm_ifunc (__cos, HAS_FMA4 ? __cos_fma4 : HAS_AVX ? __cos_avx : __cos_sse2);
127
+libm_ifunc (__cos, (HAS_FMA4 ? __cos_fma4 :
128
+ HAS_YMM_USABLE ? __cos_avx : __cos_sse2));
129
weak_alias (__cos, cos)
131
-libm_ifunc (__sin, HAS_FMA4 ? __sin_fma4 : HAS_AVX ? __sin_avx : __sin_sse2);
132
+libm_ifunc (__sin, (HAS_FMA4 ? __sin_fma4 :
133
+ HAS_YMM_USABLE ? __sin_avx : __sin_sse2));
134
weak_alias (__sin, sin)
136
# define __cos __cos_sse2
137
diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan.c b/sysdeps/x86_64/fpu/multiarch/s_tan.c
138
index 8f6601e..3047155 100644
139
--- a/sysdeps/x86_64/fpu/multiarch/s_tan.c
140
+++ b/sysdeps/x86_64/fpu/multiarch/s_tan.c
141
@@ -12,7 +12,8 @@ extern double __tan_fma4 (double);
142
# define __tan_fma4 ((void *) 0)
145
-libm_ifunc (tan, HAS_FMA4 ? __tan_fma4 : HAS_AVX ? __tan_avx : __tan_sse2);
146
+libm_ifunc (tan, (HAS_FMA4 ? __tan_fma4 :
147
+ HAS_YMM_USABLE ? __tan_avx : __tan_sse2));
149
# define tan __tan_sse2
151
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
152
index 9a183f0..dd6c27d 100644
153
--- a/sysdeps/x86_64/multiarch/Makefile
154
+++ b/sysdeps/x86_64/multiarch/Makefile
158
+tests += test-multiarch
159
gen-as-const-headers += ifunc-defines.sym
162
diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
163
index 65b0ee9..df0fe55c 100644
164
--- a/sysdeps/x86_64/multiarch/init-arch.c
165
+++ b/sysdeps/x86_64/multiarch/init-arch.c
167
/* Initialize CPU feature data.
168
This file is part of the GNU C Library.
169
- Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
170
+ Copyright (C) 2008-2012 Free Software Foundation, Inc.
171
Contributed by Ulrich Drepper <drepper@redhat.com>.
173
The GNU C Library is free software; you can redistribute it and/or
174
@@ -144,6 +144,25 @@ __init_cpu_features (void)
176
kind = arch_kind_other;
178
+ /* Can we call xgetbv? */
181
+ unsigned int xcrlow;
182
+ unsigned int xcrhigh;
183
+ asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
184
+ /* Is YMM and XMM state usable? */
185
+ if ((xcrlow & (bit_YMM_state | bit_XMM_state)) ==
186
+ (bit_YMM_state | bit_XMM_state))
188
+ /* Determine if AVX is usable. */
190
+ __cpu_features.feature[index_AVX_Usable] |= bit_AVX_Usable;
191
+ /* Determine if FMA4 is usable. */
193
+ __cpu_features.feature[index_FMA4_Usable] |= bit_FMA4_Usable;
197
__cpu_features.family = family;
198
__cpu_features.model = model;
199
atomic_write_barrier ();
200
diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h
201
index 2a1df39..37566c8 100644
202
--- a/sysdeps/x86_64/multiarch/init-arch.h
203
+++ b/sysdeps/x86_64/multiarch/init-arch.h
205
/* This file is part of the GNU C Library.
206
- Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
207
+ Copyright (C) 2008-2012 Free Software Foundation, Inc.
209
The GNU C Library is free software; you can redistribute it and/or
210
modify it under the terms of the GNU Lesser General Public
212
#define bit_Prefer_SSE_for_memop (1 << 3)
213
#define bit_Fast_Unaligned_Load (1 << 4)
214
#define bit_Prefer_PMINUB_for_stringop (1 << 5)
215
+#define bit_AVX_Usable (1 << 6)
216
+#define bit_FMA4_Usable (1 << 7)
218
+/* CPUID Feature flags. */
219
#define bit_SSE2 (1 << 26)
220
#define bit_SSSE3 (1 << 9)
221
#define bit_SSE4_1 (1 << 19)
222
#define bit_SSE4_2 (1 << 20)
223
+#define bit_OSXSAVE (1 << 27)
224
#define bit_AVX (1 << 28)
225
#define bit_POPCOUNT (1 << 23)
226
#define bit_FMA (1 << 12)
227
#define bit_FMA4 (1 << 16)
229
+/* XCR0 Feature flags. */
230
+#define bit_XMM_state (1 << 1)
231
+#define bit_YMM_state (2 << 1)
235
# include <ifunc-defines.h>
237
# define index_Prefer_SSE_for_memop FEATURE_INDEX_1*FEATURE_SIZE
238
# define index_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE
239
# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE
240
+# define index_AVX_Usable FEATURE_INDEX_1*FEATURE_SIZE
241
+# define index_FMA4_Usable FEATURE_INDEX_1*FEATURE_SIZE
243
#else /* __ASSEMBLER__ */
245
@@ -92,7 +102,7 @@ extern struct cpu_features
248
extern void __init_cpu_features (void) attribute_hidden;
249
-#define INIT_ARCH()\
250
+# define INIT_ARCH() \
252
if (__cpu_features.kind == arch_kind_unknown) \
253
__init_cpu_features (); \
254
@@ -111,37 +121,45 @@ extern const struct cpu_features *__get_cpu_features (void)
256
/* Following are the feature tests used throughout libc. */
258
+/* CPUID_* evaluates to true if the feature flag is enabled.
259
+ We always use &__cpu_features because the HAS_CPUID_* macros
260
+ are called only within __init_cpu_features, where we can't
261
+ call __get_cpu_features without infinite recursion. */
262
+# define HAS_CPUID_FLAG(idx, reg, bit) \
263
+ (((&__cpu_features)->cpuid[idx].reg & (bit)) != 0)
265
+# define CPUID_OSXSAVE \
266
+ HAS_CPUID_FLAG (COMMON_CPUID_INDEX_1, ecx, bit_OSXSAVE)
267
+# define CPUID_AVX \
268
+ HAS_CPUID_FLAG (COMMON_CPUID_INDEX_1, ecx, bit_AVX)
269
+# define CPUID_FMA4 \
270
+ HAS_CPUID_FLAG (COMMON_CPUID_INDEX_80000001, ecx, bit_FMA4)
272
+/* HAS_* evaluates to true if we may use the feature at runtime. */
273
# define HAS_SSE2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, edx, bit_SSE2)
274
# define HAS_POPCOUNT HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_POPCOUNT)
275
# define HAS_SSSE3 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSSE3)
276
# define HAS_SSE4_1 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_1)
277
# define HAS_SSE4_2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_2)
278
# define HAS_FMA HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_FMA)
279
-# define HAS_AVX HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_AVX)
280
-# define HAS_FMA4 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_80000001, ecx, bit_FMA4)
282
# define index_Fast_Rep_String FEATURE_INDEX_1
283
# define index_Fast_Copy_Backward FEATURE_INDEX_1
284
# define index_Slow_BSF FEATURE_INDEX_1
285
# define index_Prefer_SSE_for_memop FEATURE_INDEX_1
286
# define index_Fast_Unaligned_Load FEATURE_INDEX_1
288
-#define HAS_ARCH_FEATURE(idx, bit) \
289
- ((__get_cpu_features ()->feature[idx] & (bit)) != 0)
291
-#define HAS_FAST_REP_STRING \
292
- HAS_ARCH_FEATURE (index_Fast_Rep_String, bit_Fast_Rep_String)
294
-#define HAS_FAST_COPY_BACKWARD \
295
- HAS_ARCH_FEATURE (index_Fast_Copy_Backward, bit_Fast_Copy_Backward)
297
-#define HAS_SLOW_BSF \
298
- HAS_ARCH_FEATURE (index_Slow_BSF, bit_Slow_BSF)
300
-#define HAS_PREFER_SSE_FOR_MEMOP \
301
- HAS_ARCH_FEATURE (index_Prefer_SSE_for_memop, bit_Prefer_SSE_for_memop)
303
-#define HAS_FAST_UNALIGNED_LOAD \
304
- HAS_ARCH_FEATURE (index_Fast_Unaligned_Load, bit_Fast_Unaligned_Load)
305
+# define index_AVX_Usable FEATURE_INDEX_1
306
+# define index_FMA4_Usable FEATURE_INDEX_1
308
+# define HAS_ARCH_FEATURE(name) \
309
+ ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)
311
+# define HAS_FAST_REP_STRING HAS_ARCH_FEATURE (Fast_Rep_String)
312
+# define HAS_FAST_COPY_BACKWARD HAS_ARCH_FEATURE (Fast_Copy_Backward)
313
+# define HAS_SLOW_BSF HAS_ARCH_FEATURE (Slow_BSF)
314
+# define HAS_PREFER_SSE_FOR_MEMOP HAS_ARCH_FEATURE (Prefer_SSE_for_memop)
315
+# define HAS_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (Fast_Unaligned_Load)
316
+# define HAS_AVX HAS_ARCH_FEATURE (AVX_Usable)
317
+# define HAS_FMA4 HAS_ARCH_FEATURE (FMA4_Usable)
319
#endif /* __ASSEMBLER__ */
320
diff --git a/sysdeps/x86_64/multiarch/strcmp.S b/sysdeps/x86_64/multiarch/strcmp.S
321
index f93c83d..d036160 100644
322
--- a/sysdeps/x86_64/multiarch/strcmp.S
323
+++ b/sysdeps/x86_64/multiarch/strcmp.S
325
/* strcmp with SSE4.2
326
- Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.
327
+ Copyright (C) 2009-2012 Free Software Foundation, Inc.
328
Contributed by Intel Corporation.
329
This file is part of the GNU C Library.
334
.type STRCMP, @gnu_indirect_function
335
+ /* Manually inlined call to __get_cpu_features. */
336
cmpl $0, __cpu_features+KIND_OFFSET(%rip)
338
call __init_cpu_features
339
@@ -101,13 +102,14 @@ END(STRCMP)
340
# ifdef USE_AS_STRCASECMP_L
342
.type __strcasecmp, @gnu_indirect_function
343
+ /* Manually inlined call to __get_cpu_features. */
344
cmpl $0, __cpu_features+KIND_OFFSET(%rip)
346
call __init_cpu_features
348
# ifdef HAVE_AVX_SUPPORT
349
leaq __strcasecmp_avx(%rip), %rax
350
- testl $bit_AVX, __cpu_features+CPUID_OFFSET+index_AVX(%rip)
351
+ testl $bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip)
354
leaq __strcasecmp_sse42(%rip), %rax
355
@@ -124,13 +126,14 @@ weak_alias (__strcasecmp, strcasecmp)
356
# ifdef USE_AS_STRNCASECMP_L
358
.type __strncasecmp, @gnu_indirect_function
359
+ /* Manually inlined call to __get_cpu_features. */
360
cmpl $0, __cpu_features+KIND_OFFSET(%rip)
362
call __init_cpu_features
364
# ifdef HAVE_AVX_SUPPORT
365
leaq __strncasecmp_avx(%rip), %rax
366
- testl $bit_AVX, __cpu_features+CPUID_OFFSET+index_AVX(%rip)
367
+ testl $bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip)
370
leaq __strncasecmp_sse42(%rip), %rax
371
diff --git a/sysdeps/x86_64/multiarch/test-multiarch.c b/sysdeps/x86_64/multiarch/test-multiarch.c
373
index 0000000..76b1af2
375
+++ b/sysdeps/x86_64/multiarch/test-multiarch.c
377
+/* Test CPU feature data.
378
+ This file is part of the GNU C Library.
379
+ Copyright (C) 2012 Free Software Foundation, Inc.
381
+ The GNU C Library is free software; you can redistribute it and/or
382
+ modify it under the terms of the GNU Lesser General Public
383
+ License as published by the Free Software Foundation; either
384
+ version 2.1 of the License, or (at your option) any later version.
386
+ The GNU C Library is distributed in the hope that it will be useful,
387
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
388
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
389
+ Lesser General Public License for more details.
391
+ You should have received a copy of the GNU Lesser General Public
392
+ License along with the GNU C Library; if not, see
393
+ <http://www.gnu.org/licenses/>. */
395
+#include <init-arch.h>
400
+static char *cpu_flags;
402
+/* Search for flags in /proc/cpuinfo and store line
412
+ f = fopen ("/proc/cpuinfo", "r");
415
+ printf ("cannot open /proc/cpuinfo");
419
+ while ((read = getline (&line, &len, f)) != -1)
421
+ if (strncmp (line, "flags", 5) == 0)
423
+ cpu_flags = strdup (line);
432
+check_proc (const char *proc_name, int flag, const char *name)
436
+ printf ("Checking %s:\n", name);
437
+ printf (" init-arch %d\n", flag);
438
+ if (strstr (cpu_flags, proc_name) != NULL)
440
+ printf (" cpuinfo (%s) %d\n", proc_name, found);
443
+ printf (" *** failure ***\n");
445
+ return (found != flag);
449
+do_test (int argc, char **argv)
454
+ fails = check_proc ("avx", HAS_AVX, "HAS_AVX");
455
+ fails += check_proc ("fma4", HAS_FMA4, "HAS_FMA4");
456
+ fails += check_proc ("sse4_2", HAS_SSE4_2, "HAS_SSE4_2");
457
+ fails += check_proc ("sse4_1", HAS_SSE4_1, "HAS_SSE4_1");
458
+ fails += check_proc ("ssse3", HAS_SSSE3, "HAS_SSSE3");
459
+ fails += check_proc ("popcnt", HAS_POPCOUNT, "HAS_POPCOUNT");
461
+ printf ("%d differences between /proc/cpuinfo and glibc code.\n", fails);
463
+ return (fails != 0);
466
+#include "../../../test-skeleton.c"