23
23
#include <stdlib.h>
24
24
#include <string.h>
25
#include "libavutil/x86_cpu.h"
26
#include "libavutil/x86/asm.h"
27
#include "libavutil/x86/cpu.h"
26
28
#include "libavutil/cpu.h"
32
#define cpuid(index, eax, ebx, ecx, edx) \
33
ff_cpu_cpuid(index, &eax, &ebx, &ecx, &edx)
35
#define xgetbv(index, eax, edx) \
36
ff_cpu_xgetbv(index, &eax, &edx)
28
40
/* ebx saving is necessary for PIC. gcc seems unable to see it alone */
29
#define cpuid(index,eax,ebx,ecx,edx)\
31
("mov %%"REG_b", %%"REG_S"\n\t"\
33
"xchg %%"REG_b", %%"REG_S\
34
: "=a" (eax), "=S" (ebx),\
35
"=c" (ecx), "=d" (edx)\
41
#define cpuid(index, eax, ebx, ecx, edx) \
43
"mov %%"REG_b", %%"REG_S" \n\t" \
45
"xchg %%"REG_b", %%"REG_S \
46
: "=a" (eax), "=S" (ebx), "=c" (ecx), "=d" (edx) \
38
#define xgetbv(index,eax,edx) \
49
#define xgetbv(index, eax, edx) \
39
50
__asm__ (".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c" (index))
52
#define get_eflags(x) \
53
__asm__ volatile ("pushfl \n" \
57
#define set_eflags(x) \
58
__asm__ volatile ("push %0 \n" \
62
#endif /* HAVE_INLINE_ASM */
66
#define cpuid_test() 1
70
#define cpuid_test ff_cpu_cpuid_test
74
static int cpuid_test(void)
78
/* Check if CPUID is supported by attempting to toggle the ID bit in
79
* the EFLAGS register. */
81
set_eflags(a ^ 0x200000);
41
88
/* Function to test if multimedia instructions are supported... */
42
89
int ff_get_cpu_flags_x86(void)
45
95
int eax, ebx, ecx, edx;
46
int max_std_level, max_ext_level, std_caps=0, ext_caps=0;
47
int family=0, model=0;
96
int max_std_level, max_ext_level, std_caps = 0, ext_caps = 0;
97
int family = 0, model = 0;
48
98
union { int i[3]; char c[12]; } vendor;
53
/* See if CPUID instruction is supported ... */
54
/* ... Get copies of EFLAGS into eax and ecx */
59
/* ... Toggle the ID bit in one copy and store */
60
/* to the EFLAGS reg */
61
"xor $0x200000, %0\n\t"
65
/* ... Get the (hopefully modified) EFLAGS */
74
101
return 0; /* CPUID not supported */
77
cpuid(0, max_std_level, ebx, ecx, edx);
82
if(max_std_level >= 1){
103
cpuid(0, max_std_level, vendor.i[0], vendor.i[2], vendor.i[1]);
105
if (max_std_level >= 1) {
83
106
cpuid(1, eax, ebx, ecx, std_caps);
84
family = ((eax>>8)&0xf) + ((eax>>20)&0xff);
85
model = ((eax>>4)&0xf) + ((eax>>12)&0xf0);
86
if (std_caps & (1<<23))
107
family = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
108
model = ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0);
109
if (std_caps & (1 << 15))
110
rval |= AV_CPU_FLAG_CMOV;
111
if (std_caps & (1 << 23))
87
112
rval |= AV_CPU_FLAG_MMX;
88
if (std_caps & (1<<25))
89
rval |= AV_CPU_FLAG_MMX2
113
if (std_caps & (1 << 25))
114
rval |= AV_CPU_FLAG_MMXEXT;
92
if (std_caps & (1<<26))
116
if (std_caps & (1 << 25))
117
rval |= AV_CPU_FLAG_SSE;
118
if (std_caps & (1 << 26))
93
119
rval |= AV_CPU_FLAG_SSE2;
95
121
rval |= AV_CPU_FLAG_SSE3;
107
133
if ((eax & 0x6) == 0x6)
108
134
rval |= AV_CPU_FLAG_AVX;
136
#endif /* HAVE_AVX */
137
#endif /* HAVE_SSE */
115
140
cpuid(0x80000000, max_ext_level, ebx, ecx, edx);
117
if(max_ext_level >= 0x80000001){
142
if (max_ext_level >= 0x80000001) {
118
143
cpuid(0x80000001, eax, ebx, ecx, ext_caps);
119
if (ext_caps & (1U<<31))
144
if (ext_caps & (1U << 31))
120
145
rval |= AV_CPU_FLAG_3DNOW;
121
if (ext_caps & (1<<30))
146
if (ext_caps & (1 << 30))
122
147
rval |= AV_CPU_FLAG_3DNOWEXT;
123
if (ext_caps & (1<<23))
148
if (ext_caps & (1 << 23))
124
149
rval |= AV_CPU_FLAG_MMX;
125
if (ext_caps & (1<<22))
126
rval |= AV_CPU_FLAG_MMX2;
150
if (ext_caps & (1 << 22))
151
rval |= AV_CPU_FLAG_MMXEXT;
128
153
/* Allow for selectively disabling SSE2 functions on AMD processors
129
154
with SSE2 support but not SSE4a. This includes Athlon64, some
150
175
if (!strncmp(vendor.c, "GenuineIntel", 12)) {
151
176
if (family == 6 && (model == 9 || model == 13 || model == 14)) {
152
/* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and 6/14 (core1 "yonah")
153
* theoretically support sse2, but it's usually slower than mmx,
154
* so let's just pretend they don't. AV_CPU_FLAG_SSE2 is disabled and
155
* AV_CPU_FLAG_SSE2SLOW is enabled so that SSE2 is not used unless
156
* explicitly enabled by checking AV_CPU_FLAG_SSE2SLOW. The same
157
* situation applies for AV_CPU_FLAG_SSE3 and AV_CPU_FLAG_SSE3SLOW. */
158
if (rval & AV_CPU_FLAG_SSE2) rval ^= AV_CPU_FLAG_SSE2SLOW|AV_CPU_FLAG_SSE2;
159
if (rval & AV_CPU_FLAG_SSE3) rval ^= AV_CPU_FLAG_SSE3SLOW|AV_CPU_FLAG_SSE3;
177
/* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and
178
* 6/14 (core1 "yonah") theoretically support sse2, but it's
179
* usually slower than mmx, so let's just pretend they don't.
180
* AV_CPU_FLAG_SSE2 is disabled and AV_CPU_FLAG_SSE2SLOW is
181
* enabled so that SSE2 is not used unless explicitly enabled
182
* by checking AV_CPU_FLAG_SSE2SLOW. The same situation
183
* applies for AV_CPU_FLAG_SSE3 and AV_CPU_FLAG_SSE3SLOW. */
184
if (rval & AV_CPU_FLAG_SSE2)
185
rval ^= AV_CPU_FLAG_SSE2SLOW | AV_CPU_FLAG_SSE2;
186
if (rval & AV_CPU_FLAG_SSE3)
187
rval ^= AV_CPU_FLAG_SSE3SLOW | AV_CPU_FLAG_SSE3;
161
189
/* The Atom processor has SSSE3 support, which is useful in many cases,
162
190
* but sometimes the SSSE3 version is slower than the SSE2 equivalent