95
114
#if defined(__GNUC__)
97
116
static INLINE void
98
FXSAVE_ES1(uint8 *save)
100
__asm__ __volatile__ ("fxsaveq %0 \n" : "=m" (*save) : : "memory");
104
FXSAVE_COMPAT_ES1(uint8 *save)
106
__asm__ __volatile__ ("fxsave %0 \n" : "=m" (*save) : : "memory");
110
FXRSTOR_ES1(const uint8 *load)
112
__asm__ __volatile__ ("fxrstorq %0 \n" : : "m" (*load) : "memory");
116
FXRSTOR_COMPAT_ES1(const uint8 *load)
118
__asm__ __volatile__ ("fxrstor %0 \n" : : "m" (*load) : "memory");
122
FXRSTOR_AMD_ES0(const uint8 *load)
117
FXSAVE_ES1(void *save)
119
__asm__ __volatile__ ("fxsaveq %0 \n" : "=m" (*(uint8 *)save) : : "memory");
123
FXSAVE_COMPAT_ES1(void *save)
125
__asm__ __volatile__ ("fxsave %0 \n" : "=m" (*(uint8 *)save) : : "memory");
129
FXRSTOR_ES1(const void *load)
131
__asm__ __volatile__ ("fxrstorq %0 \n"
132
: : "m" (*(const uint8 *)load) : "memory");
136
FXRSTOR_COMPAT_ES1(const void *load)
138
__asm__ __volatile__ ("fxrstor %0 \n"
139
: : "m" (*(const uint8 *)load) : "memory");
143
FXRSTOR_AMD_ES0(const void *load)
124
145
uint64 dummy = 0;
126
147
__asm__ __volatile__
127
148
("fnstsw %%ax \n" // Grab x87 ES bit
128
149
"bt $7,%%ax \n" // Test ES bit
134
155
// x87 exception pointers.
137
: "m" (dummy), "m" (*load)
158
: "m" (dummy), "m" (*(const uint8 *)load)
138
159
: "ax", "memory");
141
162
#endif /* __GNUC__ */
166
* save/restore GSSE/SIMD/MMX fpu state
168
* The pointer passed in must be 64-byte aligned.
169
* See above comment for more information.
171
#if defined(__GNUC__) && (defined(VMM) || defined(VMKERNEL) || defined(FROBOS))
174
XSAVE_ES1(void *save, uint64 mask)
176
#if __GNUC__ < 4 || __GNUC__ == 4 && __GNUC_MINOR__ == 1
177
__asm__ __volatile__ (
178
".byte 0x48, 0x0f, 0xae, 0x21 \n"
180
: "c" ((uint8 *)save), "a" ((uint32)mask), "d" ((uint32)(mask >> 32))
183
__asm__ __volatile__ (
185
: "=m" (*(uint8 *)save)
186
: "a" ((uint32)mask), "d" ((uint32)(mask >> 32))
192
XSAVE_COMPAT_ES1(void *save, uint64 mask)
194
#if __GNUC__ < 4 || __GNUC__ == 4 && __GNUC_MINOR__ == 1
195
__asm__ __volatile__ (
196
".byte 0x0f, 0xae, 0x21 \n"
198
: "c" ((uint8 *)save), "a" ((uint32)mask), "d" ((uint32)(mask >> 32))
201
__asm__ __volatile__ (
203
: "=m" (*(uint8 *)save)
204
: "a" ((uint32)mask), "d" ((uint32)(mask >> 32))
210
XSAVEOPT_ES1(void *save, uint64 mask)
212
__asm__ __volatile__ (
213
".byte 0x48, 0x0f, 0xae, 0x31 \n"
215
: "c" ((uint8 *)save), "a" ((uint32)mask), "d" ((uint32)(mask >> 32))
220
XRSTOR_ES1(const void *load, uint64 mask)
222
#if __GNUC__ < 4 || __GNUC__ == 4 && __GNUC_MINOR__ == 1
223
__asm__ __volatile__ (
224
".byte 0x48, 0x0f, 0xae, 0x29 \n"
226
: "c" ((const uint8 *)load),
227
"a" ((uint32)mask), "d" ((uint32)(mask >> 32))
230
__asm__ __volatile__ (
233
: "m" (*(const uint8 *)load),
234
"a" ((uint32)mask), "d" ((uint32)(mask >> 32))
240
XRSTOR_COMPAT_ES1(const void *load, uint64 mask)
242
#if __GNUC__ < 4 || __GNUC__ == 4 && __GNUC_MINOR__ == 1
243
__asm__ __volatile__ (
244
".byte 0x0f, 0xae, 0x29 \n"
246
: "c" ((const uint8 *)load),
247
"a" ((uint32)mask), "d" ((uint32)(mask >> 32))
250
__asm__ __volatile__ (
253
: "m" (*(const uint8 *)load),
254
"a" ((uint32)mask), "d" ((uint32)(mask >> 32))
260
XRSTOR_AMD_ES0(const void *load, uint64 mask)
265
("fnstsw %%ax \n" // Grab x87 ES bit
266
"bt $7,%%ax \n" // Test ES bit
267
"jnc 1f \n" // Jump if ES=0
268
"fnclex \n" // ES=1. Clear it so fild doesn't trap
270
"ffree %%st(7) \n" // Clear tag bit - avoid poss. stack overflow
271
"fildl %0 \n" // Dummy Load from "safe address" changes all
272
// x87 exception pointers.
273
"mov %%ebx, %%eax \n"
274
#if __GNUC__ < 4 || __GNUC__ == 4 && __GNUC_MINOR__ == 1
275
".byte 0x48, 0x0f, 0xae, 0x29 \n"
277
: "m" (dummy), "c" ((const uint8 *)load),
278
"b" ((uint32)mask), "d" ((uint32)(mask >> 32))
282
: "m" (dummy), "m" (*(const uint8 *)load),
283
"b" ((uint32)mask), "d" ((uint32)(mask >> 32))
288
#endif /* __GNUC__ */
145
292
*-----------------------------------------------------------------------------
149
296
* Unsigned integer by fixed point multiplication:
150
297
* result = multiplicand * multiplier >> shift
152
299
* Unsigned 64-bit integer multiplicand.
153
* Unsigned 32-bit fixed point multiplier, represented as
300
* Unsigned 64-bit fixed point multiplier, represented as
154
301
* multiplier >> shift, where shift < 64.
155
302
* Unsigned 64-bit integer product.
248
393
#elif defined(_MSC_VER)
250
395
static INLINE int64
251
Muls64x32s64(int64 multiplicand, uint32 multiplier, uint32 shift)
396
Muls64x64s64(int64 multiplicand, int64 multiplier, uint32 shift)
253
398
int64 tmplo, tmphi;
254
400
tmplo = _mul128(multiplicand, multiplier, &tmphi);
255
401
return __shiftright128(tmplo, tmphi, (uint8) shift);
407
*-----------------------------------------------------------------------------
411
* Unsigned integer by fixed point multiplication:
412
* result = multiplicand * multiplier >> shift
414
* Unsigned 64-bit integer multiplicand.
415
* Unsigned 32-bit fixed point multiplier, represented as
416
* multiplier >> shift, where shift < 64.
417
* Unsigned 64-bit integer product.
420
* Multiply 64x64 bits to yield a full 128-bit product.
421
* Shift result in RDX:RAX right by "shift".
422
* Return the low-order 64 bits of the above.
425
* Return the low-order 64 bits of ((multiplicand * multiplier) >> shift)
427
*-----------------------------------------------------------------------------
431
Mul64x3264(uint64 multiplicand, uint32 multiplier, uint32 shift)
433
return Mul64x6464(multiplicand, multiplier, shift);
437
*-----------------------------------------------------------------------------
441
* Signed integer by fixed point multiplication:
442
* result = (multiplicand * multiplier) >> shift
444
* Signed 64-bit integer multiplicand.
445
* Unsigned 32-bit fixed point multiplier, represented as
446
* multiplier >> shift, where shift < 64.
447
* Signed 64-bit integer product.
450
* Multiply 64x64 bits to yield a full 128-bit product.
451
* Shift result in RDX:RAX right by "shift".
452
* Return the low-order 64 bits of the above.
455
* Return the low-order 64 bits of ((multiplicand * multiplier) >> shift)
457
*-----------------------------------------------------------------------------
461
Muls64x32s64(int64 multiplicand, uint32 multiplier, uint32 shift)
463
return Muls64x64s64(multiplicand, multiplier, shift);
261
467
#if defined(__GNUC__)