130
130
unsigned char _BitScanReverse(unsigned long *, unsigned long);
131
131
#pragma intrinsic(_BitScanForward, _BitScanReverse)
133
unsigned char _bittest(const long *, long);
133
134
unsigned char _bittestandset(long *, long);
134
135
unsigned char _bittestandreset(long *, long);
135
#pragma intrinsic(_bittestandset, _bittestandreset)
136
unsigned char _bittestandcomplement(long *, long);
137
#pragma intrinsic(_bittest, _bittestandset, _bittestandreset, _bittestandcomplement)
137
139
unsigned char _bittestandset64(__int64 *, __int64);
138
140
unsigned char _bittestandreset64(__int64 *, __int64);
804
*----------------------------------------------------------------------
806
* COMPILER_FORCED_LOAD_AND_MEM_BARRIER --
808
* This macro prevents the compiler from re-ordering memory references
809
* across the barrier. In addition it emits a forced load from the given
810
* memory reference. The memory reference has to be either 1, 2, 4 or 8
812
* The forced load of a memory reference can be used exploit details of a
813
* given CPUs memory model. For example x86 CPUs won't reorder stores to
814
* a memory location x with loads from a memory location x.
815
* NOTE: It does not generate any fencing instruction, so the CPU is free
816
* to reorder instructions according to its memory model.
824
*----------------------------------------------------------------------
830
#define COMPILER_FORCED_LOAD_AND_MEM_BARRIER(_memory_reference) \
832
typeof(_memory_reference) _dummy; \
834
asm volatile("mov %1, %0\n\t" \
835
: "=r" (_dummy) /* Let compiler choose reg for _dummy */ \
836
: "m" (_memory_reference) \
840
#endif /* __GNUC__ */
841
#endif /* VM_X86_64 */
802
845
* PAUSE is a P4 instruction that improves spinlock power+performance;
803
846
* on non-P4 IA32 systems, the encoding is interpreted as a REPZ-NOP.
804
847
* Use volatile to avoid NOP removal.
945
988
*-----------------------------------------------------------------------------
992
#define DEBUGBREAK() __asm__("bkpt")
949
#define DEBUGBREAK() __debugbreak()
995
#define DEBUGBREAK() __debugbreak()
951
#define DEBUGBREAK() __asm__ (" int $3 ")
997
#define DEBUGBREAK() __asm__("int $3")
953
1000
#endif // defined(__i386__) || defined(__x86_64__) || defined(__arm__)
957
1004
*-----------------------------------------------------------------------------
959
* {Clear,Set}Bit{32,64} --
1006
* {Clear,Set,Test}Bit{32,64} --
961
1008
* Sets or clears a specified single bit in the provided variable.
962
1009
* The index input value specifies which bit to modify and is 0-based.
1071
1118
*-----------------------------------------------------------------------------
1120
* {Clear,Set,Complement,Test}BitVector --
1122
* Sets, clears, complements, or tests a specified single bit in the
1123
* provided array. The index input value specifies which bit to modify
1124
* and is 0-based. Bit number can be +-2Gb (+-128MB) relative from 'var'
1127
* All functions return value of the bit before modification was performed.
1129
*-----------------------------------------------------------------------------
1133
SetBitVector(void *var, int32 index)
1140
: "=rm" (bit), "+m" (*(volatile uint32 *)var)
1145
#elif defined(_MSC_VER)
1146
return _bittestandset((long *)var, index) != 0;
1148
#error No compiler defined for SetBitVector
1153
ClearBitVector(void *var, int32 index)
1160
: "=rm" (bit), "+m" (*(volatile uint32 *)var)
1165
#elif defined(_MSC_VER)
1166
return _bittestandreset((long *)var, index) != 0;
1168
#error No compiler defined for ClearBitVector
1173
ComplementBitVector(void *var, int32 index)
1180
: "=rm" (bit), "+m" (*(volatile uint32 *)var)
1185
#elif defined(_MSC_VER)
1186
return _bittestandcomplement((long *)var, index) != 0;
1188
#error No compiler defined for ComplementBitVector
1193
TestBitVector(const void *var, int32 index)
1201
: "m" (*(const uint32 *)var), "rI" (index)
1205
#elif defined _MSC_VER
1206
return _bittest((long *)var, index) != 0;
1208
#error No compiler defined for TestBitVector
1213
*-----------------------------------------------------------------------------
1072
1214
* RoundUpPow2_{64,32} --
1074
1216
* Rounds a value up to the next higher power of 2. Returns the original
1135
1277
static INLINE uint32
1136
1278
RoundUpPow2Asm32(uint32 value)
1282
// Note: None Thumb only!
1283
// The value of the argument "value"
1284
// will be affected!
1285
__asm__("sub %[in], %[in], #1;" // r1 = value - 1 . if value == 0 then r1 = 0xFFFFFFFF
1286
"clz %[in], %[in];" // r1 = log2(value - 1) if value != 1
1287
// if value == 0 then r1 = 0
1288
// if value == 1 then r1 = 32
1289
"mov %[out], %[out], ror %[in]" // out = 2^(32 - r1)
1290
// if out == 2^32 then out = 1 as it is right rotate
1291
: [in]"+r"(value),[out]"+r"(out));
1138
1293
uint32 out = 2;
1139
1295
__asm__("lea -1(%[in]), %%ecx;" // ecx = value - 1. Preserve original.
1140
1296
"bsr %%ecx, %%ecx;" // ecx = log2(value - 1) if value != 1
1141
1297
// if value == 0, then ecx = 31
1146
1302
// zf is always unmodified
1147
1303
"cmovz %[in], %[out]" // if value == 1 (zf == 1), write 1 to out.
1148
1304
: [out]"+r"(out) : [in]"r"(value) : "%ecx", "cc");
1151
1308
#endif // __GNUC__