122
#define X264_CPU_CMOV 0x0000001
123
#define X264_CPU_MMX 0x0000002
124
#define X264_CPU_MMX2 0x0000004 /* MMX2 aka MMXEXT aka ISSE */
125
#define X264_CPU_MMXEXT X264_CPU_MMX2
126
#define X264_CPU_SSE 0x0000008
127
#define X264_CPU_SSE2 0x0000010
128
#define X264_CPU_SSE3 0x0000020
129
#define X264_CPU_SSSE3 0x0000040
130
#define X264_CPU_SSE4 0x0000080 /* SSE4.1 */
131
#define X264_CPU_SSE42 0x0000100 /* SSE4.2 */
132
#define X264_CPU_LZCNT 0x0000200 /* Phenom support for "leading zero count" instruction. */
133
#define X264_CPU_AVX 0x0000400 /* AVX support: requires OS support even if YMM registers aren't used. */
134
#define X264_CPU_XOP 0x0000800 /* AMD XOP */
135
#define X264_CPU_FMA4 0x0001000 /* AMD FMA4 */
136
#define X264_CPU_FMA3 0x0002000 /* FMA3 */
137
#define X264_CPU_AVX2 0x0004000 /* AVX2 */
138
#define X264_CPU_BMI1 0x0008000 /* BMI1 */
139
#define X264_CPU_BMI2 0x0010000 /* BMI2 */
122
#define X264_CPU_MMX (1<<0)
123
#define X264_CPU_MMX2 (1<<1) /* MMX2 aka MMXEXT aka ISSE */
124
#define X264_CPU_MMXEXT X264_CPU_MMX2
125
#define X264_CPU_SSE (1<<2)
126
#define X264_CPU_SSE2 (1<<3)
127
#define X264_CPU_LZCNT (1<<4)
128
#define X264_CPU_SSE3 (1<<5)
129
#define X264_CPU_SSSE3 (1<<6)
130
#define X264_CPU_SSE4 (1<<7) /* SSE4.1 */
131
#define X264_CPU_SSE42 (1<<8) /* SSE4.2 */
132
#define X264_CPU_AVX (1<<9) /* Requires OS support even if YMM registers aren't used */
133
#define X264_CPU_XOP (1<<10) /* AMD XOP */
134
#define X264_CPU_FMA4 (1<<11) /* AMD FMA4 */
135
#define X264_CPU_FMA3 (1<<12)
136
#define X264_CPU_BMI1 (1<<13)
137
#define X264_CPU_BMI2 (1<<14)
138
#define X264_CPU_AVX2 (1<<15)
139
#define X264_CPU_AVX512 (1<<16) /* AVX-512 {F, CD, BW, DQ, VL}, requires OS support */
140
140
/* x86 modifiers */
141
#define X264_CPU_CACHELINE_32 0x0020000 /* avoid memory loads that span the border between two cachelines */
142
#define X264_CPU_CACHELINE_64 0x0040000 /* 32/64 is the size of a cacheline in bytes */
143
#define X264_CPU_SSE2_IS_SLOW 0x0080000 /* avoid most SSE2 functions on Athlon64 */
144
#define X264_CPU_SSE2_IS_FAST 0x0100000 /* a few functions are only faster on Core2 and Phenom */
145
#define X264_CPU_SLOW_SHUFFLE 0x0200000 /* The Conroe has a slow shuffle unit (relative to overall SSE performance) */
146
#define X264_CPU_STACK_MOD4 0x0400000 /* if stack is only mod4 and not mod16 */
147
#define X264_CPU_SLOW_CTZ 0x0800000 /* BSR/BSF x86 instructions are really slow on some CPUs */
148
#define X264_CPU_SLOW_ATOM 0x1000000 /* The Atom is terrible: slow SSE unaligned loads, slow
141
#define X264_CPU_CACHELINE_32 (1<<17) /* avoid memory loads that span the border between two cachelines */
142
#define X264_CPU_CACHELINE_64 (1<<18) /* 32/64 is the size of a cacheline in bytes */
143
#define X264_CPU_SSE2_IS_SLOW (1<<19) /* avoid most SSE2 functions on Athlon64 */
144
#define X264_CPU_SSE2_IS_FAST (1<<20) /* a few functions are only faster on Core2 and Phenom */
145
#define X264_CPU_SLOW_SHUFFLE (1<<21) /* The Conroe has a slow shuffle unit (relative to overall SSE performance) */
146
#define X264_CPU_STACK_MOD4 (1<<22) /* if stack is only mod4 and not mod16 */
147
#define X264_CPU_SLOW_ATOM (1<<23) /* The Atom is terrible: slow SSE unaligned loads, slow
149
148
* SIMD multiplies, slow SIMD variable shifts, slow pshufb,
150
149
* cacheline split penalties -- gather everything here that
151
150
* isn't shared by other CPUs to avoid making half a dozen
152
151
* new SLOW flags. */
153
#define X264_CPU_SLOW_PSHUFB 0x2000000 /* such as on the Intel Atom */
154
#define X264_CPU_SLOW_PALIGNR 0x4000000 /* such as on the AMD Bobcat */
152
#define X264_CPU_SLOW_PSHUFB (1<<24) /* such as on the Intel Atom */
153
#define X264_CPU_SLOW_PALIGNR (1<<25) /* such as on the AMD Bobcat */
157
156
#define X264_CPU_ALTIVEC 0x0000001