65
65
volatile uint64 value;
66
66
} Atomic_uint64 ALIGNED(8);
70
EXTERN Atomic_uint32 atomicLocked64bit;
73
* Definitions for kernel function call which attempts an
74
* atomic exchange, returning 0 only upon success.
75
* The code actually called is put in memory by the kernel,
76
* and is in fact what the kernel uses for this atomic
77
* instruction. This does not work for Linux versions
78
* before 2.6 or (obviously) for non-Linux implementations.
79
* For other implementations on ARMv6 and up, use
80
* LDREX/SUBS/STREXEQ/LDRNE/ADDS/BNE spin-lock; for pre-ARMv6,
81
* use SWP-based spin-lock.
83
#if !defined(__linux__)
84
#define __kernel_cmpxchg(x, y, z) _fn__kernel_cmpxchgNotImplementedOnNonLinuxARM
86
typedef int (__kernel_cmpxchg_t)(uint32 oldVal,
88
volatile uint32 *mem);
89
#define __kernel_cmpxchg (*(__kernel_cmpxchg_t *)0xffff0fc0)
96
70
* Prototypes for msft atomics. These are defined & inlined by the
99
73
* have to use these. Unfortunately, we still have to use some inline asm
100
74
* for the 32 bit code since the and/or/xor implementations didn't show up
101
75
* untill xp or 2k3.
103
77
* The declarations for the intrinsic functions were taken from ntddk.h
104
78
* in the DDK. The declarations must match otherwise the 64-bit c++
105
79
* compiler will complain about second linkage of the intrinsic functions.
106
* We define the intrinsic using the basic types corresponding to the
80
* We define the intrinsic using the basic types corresponding to the
107
81
* Windows typedefs. This avoids having to include windows header files
108
82
* to get to the windows types.
148
122
#endif /* _MSC_VER */
125
# if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || \
126
defined(__ARM_ARCH_7R__)|| defined(__ARM_ARCH_7M__)
129
# error Only ARMv7 extends the synchronization primitives ldrex/strex. \
130
For the lower ARM version, please implement the atomic functions \
135
/* Data Memory Barrier */
137
#define dmb() __asm__ __volatile__("dmb" : : : "memory")
151
141
/* Convert a volatile uint32 to Atomic_uint32. */
152
142
static INLINE Atomic_uint32 *
179
169
* Atomic_SetFence sets AtomicUseFence to the given value.
181
* Atomic_Init computes and sets AtomicUseFence.
171
* Atomic_Init computes and sets AtomicUseFence for x86.
182
172
* It does not take into account the number of processors.
184
174
* The rationale for all this complexity is that Atomic_Init
367
357
Atomic_ReadWrite(Atomic_uint32 *var, // IN
368
358
uint32 val) // IN
371
uint32 retval = var->value;
374
#elif defined(__GNUC__)
376
register uint32 retval;
377
register volatile uint32 *mem = &(var->value);
378
/* XXX - ARMv5 only: for ARMv6, use LDREX/STREX/CMP/BEQ spin-lock */
379
__asm__ __volatile__("swp %0, %1, [%2]"
381
: "r,0" (val), "r,r" (mem) : "memory");
383
#else // __arm__ (assume x86*)
362
register volatile uint32 retVal;
363
register volatile uint32 res;
367
__asm__ __volatile__(
368
"1: ldrex %[retVal], [%[var]] \n\t"
369
"strex %[res], %[val], [%[var]] \n\t"
370
"teq %[res], #0 \n\t"
372
: [retVal] "=&r" (retVal), [res] "=&r" (res)
373
: [var] "r" (&var->value), [val] "r" (val)
380
#else // __VM_ARM_V7 (assume x86*)
384
381
/* Checked against the Intel manual and GCC --walken */
385
382
__asm__ __volatile__(
389
385
"+m" (var->value)
398
388
AtomicEpilogue();
401
391
#elif defined _MSC_VER
402
392
#if _MSC_VER >= 1310
403
393
return _InterlockedExchange((long *)&var->value, (long)val);
411
401
// eax is the return value, this is documented to work - edward
413
403
#pragma warning(pop)
404
#endif // _MSC_VER >= 1310
416
406
#error No compiler defined for Atomic_ReadWrite
419
409
#define Atomic_ReadWrite32 Atomic_ReadWrite
440
430
uint32 oldVal, // IN
441
431
uint32 newVal) // IN
444
uint32 readVal = var->value;
446
if (oldVal == readVal) {
450
#elif defined(__GNUC__)
453
register volatile uint32 *mem = &(var->value);
455
// loop until var not oldVal or var successfully replaced when var oldVal
457
readVal = Atomic_Read(var);
458
if (oldVal != readVal) {
461
} while (__kernel_cmpxchg(oldVal, newVal, mem) != 0);
462
return oldVal; // success
463
#else // __arm__ (assume x86*)
435
register uint32 retVal;
440
__asm__ __volatile__(
441
"1: ldrex %[retVal], [%[var]] \n\t"
442
"mov %[res], #1 \n\t"
443
"teq %[retVal], %[oldVal] \n\t"
444
"strexeq %[res], %[newVal], [%[var]] \n\t"
445
"teq %[res], #0 \n\t"
447
: [retVal] "=&r" (retVal), [res] "=&r" (res)
448
: [var] "r" (&var->value), [oldVal] "r" (oldVal), [newVal] "r" (newVal)
455
#else // VM_ARM_V7 (assume x86*)
466
458
/* Checked against the Intel manual and GCC --walken */
467
459
__asm__ __volatile__(
468
460
"lock; cmpxchgl %2, %1"
471
462
"+m" (var->value)
480
* "1" (var->value): results in inconsistent constraints on gcc 2.7.2.3
481
* when compiling enterprise-2.2.17-14-RH7.0-update.
482
* The constraint has been commented out for now. We may consider doing
483
* this systematically, but we need to be sure it is the right thing to
484
* do. However, it is also possible that the offending use of this asm
485
* function will be removed in the near future in which case we may
486
* decide to reintroduce the constraint instead. hpreg & agesen.
491
467
AtomicEpilogue();
494
470
#elif defined _MSC_VER
495
471
#if _MSC_VER >= 1310
496
472
return _InterlockedCompareExchange((long *)&var->value,
582
558
Atomic_And(Atomic_uint32 *var, // IN
583
559
uint32 val) // IN
587
#elif defined(__GNUC__)
589
/* same as Atomic_FetchAndAnd without return value */
591
register volatile uint32 *mem = &(var->value);
594
res = Atomic_Read(var);
595
} while (__kernel_cmpxchg(res, res & val, mem) != 0);
563
register volatile uint32 res;
564
register volatile uint32 tmp;
568
__asm__ __volatile__(
569
"1: ldrex %[tmp], [%[var]] \n\t"
570
"and %[tmp], %[val] \n\t"
571
"strex %[res], %[tmp], [%[var]] \n\t"
572
"teq %[res], #0 \n\t"
574
: [res] "=&r" (res), [tmp] "=&r" (tmp)
575
: [var] "r" (&var->value), [val] "r" (val)
580
#else /* VM_ARM_V7 */
597
581
/* Checked against the Intel manual and GCC --walken */
598
582
__asm__ __volatile__(
599
583
"lock; andl %1, %0"
601
584
: "+m" (var->value)
610
588
AtomicEpilogue();
612
590
#elif defined _MSC_VER
613
591
#if defined(__x86_64__)
614
592
_InterlockedAnd((long *)&var->value, (long)val);
644
622
Atomic_Or(Atomic_uint32 *var, // IN
645
623
uint32 val) // IN
649
#elif defined(__GNUC__)
651
/* same as Atomic_FetchAndOr without return value */
653
register volatile uint32 *mem = &(var->value);
656
res = Atomic_Read(var);
657
} while (__kernel_cmpxchg(res, res | val, mem) != 0);
627
register volatile uint32 res;
628
register volatile uint32 tmp;
632
__asm__ __volatile__(
633
"1: ldrex %[tmp], [%[var]] \n\t"
634
"orr %[tmp], %[val] \n\t"
635
"strex %[res], %[tmp], [%[var]] \n\t"
636
"teq %[res], #0 \n\t"
638
: [res] "=&r" (res), [tmp] "=&r" (tmp)
639
: [var] "r" (&var->value), [val] "r" (val)
659
645
/* Checked against the Intel manual and GCC --walken */
660
646
__asm__ __volatile__(
661
647
"lock; orl %1, %0"
663
648
: "+m" (var->value)
672
652
AtomicEpilogue();
674
654
#elif defined _MSC_VER
675
655
#if defined(__x86_64__)
676
656
_InterlockedOr((long *)&var->value, (long)val);
706
686
Atomic_Xor(Atomic_uint32 *var, // IN
707
687
uint32 val) // IN
711
#elif defined(__GNUC__)
714
register volatile uint32 *mem = &(var->value);
717
res = Atomic_Read(var);
718
} while (__kernel_cmpxchg(res, res ^ val, mem) != 0);
691
register volatile uint32 res;
692
register volatile uint32 tmp;
696
__asm__ __volatile__(
697
"1: ldrex %[tmp], [%[var]] \n\t"
698
"eor %[tmp], %[val] \n\t"
699
"strex %[res], %[tmp], [%[var]] \n\t"
700
"teq %[res], #0 \n\t"
702
: [res] "=&r" (res), [tmp] "=&r" (tmp)
703
: [var] "r" (&var->value), [val] "r" (val)
720
709
/* Checked against the Intel manual and GCC --walken */
721
710
__asm__ __volatile__(
722
711
"lock; xorl %1, %0"
724
712
: "+m" (var->value)
733
716
AtomicEpilogue();
735
718
#elif defined _MSC_VER
736
719
#if defined(__x86_64__)
737
720
_InterlockedXor((long *)&var->value, (long)val);
806
789
Atomic_Add(Atomic_uint32 *var, // IN
807
790
uint32 val) // IN
811
#elif defined(__GNUC__)
813
/* same as Atomic_FetchAndAddUnfenced without return value */
815
register volatile uint32 *mem = &(var->value);
818
res = Atomic_Read(var);
819
} while (__kernel_cmpxchg(res, res + val, mem) != 0);
794
register volatile uint32 res;
795
register volatile uint32 tmp;
799
__asm__ __volatile__(
800
"1: ldrex %[tmp], [%[var]] \n\t"
801
"add %[tmp], %[val] \n\t"
802
"strex %[res], %[tmp], [%[var]] \n\t"
803
"teq %[res], #0 \n\t"
805
: [res] "=&r" (res), [tmp] "=&r" (tmp)
806
: [var] "r" (&var->value), [val] "r" (val)
821
812
/* Checked against the Intel manual and GCC --walken */
822
813
__asm__ __volatile__(
823
814
"lock; addl %1, %0"
825
815
: "+m" (var->value)
834
819
AtomicEpilogue();
836
821
#elif defined _MSC_VER
837
822
#if _MSC_VER >= 1310
838
823
_InterlockedExchangeAdd((long *)&var->value, (long)val);
907
892
Atomic_Sub(Atomic_uint32 *var, // IN
908
893
uint32 val) // IN
912
#elif defined(__GNUC__)
915
register volatile uint32 *mem = &(var->value);
918
res = Atomic_Read(var);
919
} while (__kernel_cmpxchg(res, res - val, mem) != 0);
897
register volatile uint32 res;
898
register volatile uint32 tmp;
902
__asm__ __volatile__(
903
"1: ldrex %[tmp], [%[var]] \n\t"
904
"sub %[tmp], %[val] \n\t"
905
"strex %[res], %[tmp], [%[var]] \n\t"
906
"teq %[res], #0 \n\t"
908
: [res] "=&r" (res), [tmp] "=&r" (tmp)
909
: [var] "r" (&var->value), [val] "r" (val)
921
915
/* Checked against the Intel manual and GCC --walken */
922
916
__asm__ __volatile__(
923
917
"lock; subl %1, %0"
925
918
: "+m" (var->value)
934
922
AtomicEpilogue();
936
924
#elif defined _MSC_VER
937
925
#if _MSC_VER >= 1310
938
926
_InterlockedExchangeAdd((long *)&var->value, (long)-val);
1122
#if defined(__arm__) && !defined(FAKE_ATOMIC)
1123
register volatile uint32 *mem = &(var->value);
1125
res = Atomic_Read(var);
1126
} while (__kernel_cmpxchg(res, res | val, mem) != 0);
1129
1099
res = Atomic_Read(var);
1130
1100
} while (res != Atomic_ReadIfEqualWrite(var, res, res | val));
1158
#if defined(__arm__) && !defined(FAKE_ATOMIC)
1159
register volatile uint32 *mem = &(var->value);
1161
res = Atomic_Read(var);
1162
} while (__kernel_cmpxchg(res, res & val, mem) != 0);
1165
1129
res = Atomic_Read(var);
1166
1130
} while (res != Atomic_ReadIfEqualWrite(var, res, res & val));
1170
1134
#define Atomic_ReadOr32 Atomic_FetchAndOr
1169
*-----------------------------------------------------------------------------
1171
* Atomic_ReadAnd64 --
1173
* Atomic read (returned), bitwise AND with a value, write.
1176
* The value of the variable before the operation.
1181
*-----------------------------------------------------------------------------
1184
static INLINE uint64
1185
Atomic_ReadAnd64(Atomic_uint64 *var, // IN
1192
} while (res != Atomic_ReadIfEqualWrite64(var, res, res & val));
1196
#endif // __x86_64__
1229
1223
Atomic_FetchAndAddUnfenced(Atomic_uint32 *var, // IN
1230
1224
uint32 val) // IN
1233
uint32 res = var->value;
1234
var->value = res + val;
1236
#elif defined(__GNUC__)
1240
register volatile uint32 *mem = &(var->value);
1242
res = Atomic_Read(var);
1243
} while (__kernel_cmpxchg(res, res + val, mem) != 0);
1228
register volatile uint32 res;
1229
register volatile uint32 retVal;
1233
__asm__ __volatile__(
1234
"1: ldrex %[retVal], [%[var]] \n\t"
1235
"add %[val], %[retVal] \n\t"
1236
"strex %[res], %[val], [%[var]] \n\t"
1237
"teq %[res], #0 \n\t"
1239
: [res] "=&r" (res), [retVal] "=&r" (retVal)
1240
: [var] "r" (&var->value), [val] "r" (val)
1247
1248
/* Checked against the Intel manual and GCC --walken */
1248
1249
__asm__ __volatile__(
1250
1250
"lock; xaddl %0, %1"
1252
1252
"+m" (var->value)
1256
"lock; xaddl %0, (%1)"
1258
: "r" (&var->value),
1265
1258
#elif defined _MSC_VER
1266
1259
#if _MSC_VER >= 1310
1267
1260
return _InterlockedExchangeAdd((long *)&var->value, (long)val);
1309
1302
Atomic_FetchAndAdd(Atomic_uint32 *var, // IN
1310
1303
uint32 val) // IN
1312
#if defined(__GNUC__) && !defined(__arm__)
1305
#if defined(__GNUC__) && !defined(VM_ARM_V7)
1313
1306
val = Atomic_FetchAndAddUnfenced(var, val);
1314
1307
AtomicEpilogue();
1535
1525
uint64 const *oldVal, // IN
1536
1526
uint64 const *newVal) // IN
1539
uint64 readVal = var->value;
1541
if (*oldVal == readVal) {
1542
var->value = *newVal;
1544
return (*oldVal == readVal);
1545
#elif defined(__GNUC__)
1548
1531
/* Checked against the Intel manual and GCC --walken */
1646
#endif /* 32-bit version */
1625
#elif defined(VM_ARM_V7)
1626
volatile uint64 tmp;
1630
__asm__ __volatile__(
1631
"ldrexd %[tmp], %H[tmp], [%[var]] \n\t"
1632
"mov %[equal], #1 \n\t"
1633
"teq %[tmp], %[oldVal] \n\t"
1634
"teqeq %H[tmp], %H[oldVal] \n\t"
1635
"strexdeq %[equal], %[newVal], %H[newVal], [%[var]]"
1636
: [equal] "=&r" (equal), [tmp] "=&r" (tmp)
1637
: [var] "r" (&var->value), [oldVal] "r" (*oldVal), [newVal] "r" (*newVal)
1647
1646
AtomicEpilogue();
1649
1649
#elif defined _MSC_VER
1650
1650
#if defined(__x86_64__)
1651
1651
return (__int64)*oldVal == _InterlockedCompareExchange64((__int64 *)&var->value,
1697
1696
uint32 oldVal, // IN
1698
1697
uint32 newVal) // IN
1701
uint32 readVal = var->value;
1703
if (oldVal == readVal) {
1704
var->value = newVal;
1706
return (oldVal == readVal);
1707
#elif defined(__GNUC__)
1709
register volatile uint32 *mem = &(var->value);
1711
return !__kernel_cmpxchg(oldVal, newVal, mem);
1702
volatile uint64 tmp;
1706
__asm__ __volatile__(
1707
"ldrex %[tmp], [%[var]] \n\t"
1708
"mov %[equal], #1 \n\t"
1709
"teq %[tmp], %[oldVal] \n\t"
1710
"strexeq %[equal], %[newVal], [%[var]]"
1711
: [equal] "=&r" (equal), [tmp] "=&r" (tmp)
1712
: [var] "r" (&var->value), [oldVal] "r" (oldVal), [newVal] "r" (newVal)
1716
1722
__asm__ __volatile__(
1717
1723
"lock; cmpxchgl %3, %0" "\n\t"
1723
1728
: "r" (newVal),
1735
1732
AtomicEpilogue();
1738
1735
#else // defined(__GNUC__)
1739
1736
return (Atomic_ReadIfEqualWrite(var, oldVal, newVal) == oldVal);
1740
#endif // defined(__GNUC__)
1737
#endif // !defined(__GNUC__)
1746
#define Atomic_Read64(x) _fnAtomic_Read64_NotImplementedOnARM
1747
#define Atomic_FetchAndAdd64(x,y) _fnAtomic_FetchAndAdd64_NotImplementedOnARM
1748
#define Atomic_FetchAndInc64(x) _fnAtomic_FetchAndInc64_NotImplementedOnARM
1749
#define Atomic_FetchAndDec64(x) _fnAtomic_FetchAndDec64_NotImplementedOnARM
1750
#define Atomic_Inc64(x) _fnAtomic_Inc64_NotImplementedOnARM
1751
#define Atomic_Dec64(x) _fnAtomic_Dec64_NotImplementedOnARM
1752
#define Atomic_ReadWrite64(x,y) _fnAtomic_ReadWrite64_NotImplementedOnARM
1753
#define Atomic_Write64(x,y) _fnAtomic_Write64_NotImplementedOnARM
1754
#define Atomic_And64(x) _fnAtomic_And64_NotImplementedOnARM
1755
#define Atomic_Or64(x) _fnAtomic_Or64_NotImplementedOnARM
1760
1742
*-----------------------------------------------------------------------------
1775
1757
static INLINE uint64
1776
1758
Atomic_Read64(Atomic_uint64 const *var) // IN
1778
#if defined(FAKE_ATOMIC)
1780
#elif defined(__x86_64__)
1782
#elif defined(__GNUC__) && defined(__i386__) /* GCC on x86 */
1760
#if defined(__GNUC__) && defined(__x86_64__)
1764
ASSERT((uintptr_t)var % 8 == 0);
1767
* Use asm to ensure we emit a single load.
1769
__asm__ __volatile__(
1775
#elif defined(__GNUC__) && defined(__i386__)
1785
1778
* Since cmpxchg8b will replace the contents of EDX:EAX with the
1801
1794
AtomicEpilogue();
1803
#elif defined _MSC_VER /* MSC (assume on x86 for now) */
1796
#elif defined (_MSC_VER) && defined(__x86_64__)
1798
* Microsoft docs guarantee "Simple reads and writes to properly
1799
* aligned 64-bit variables are atomic on 64-bit Windows."
1800
* http://msdn.microsoft.com/en-us/library/ms684122%28VS.85%29.aspx
1802
* XXX Verify that value is properly aligned. Bug 61315.
1805
#elif defined (_MSC_VER) && defined(__i386__)
1804
1806
# pragma warning(push)
1805
1807
# pragma warning(disable : 4035) // disable no-return warning
1811
1813
// edx:eax is the return value; this is documented to work. --mann
1813
1815
# pragma warning(pop)
1815
# error No compiler defined for Atomic_Read64
1816
#elif defined(__GNUC__) && defined (VM_ARM_V7)
1819
__asm__ __volatile__(
1820
"ldrexd %[value], %H[value], [%[var]] \n\t"
1821
: [value] "=&r" (value)
1822
: [var] "r" (&var->value)
1831
*----------------------------------------------------------------------
1833
* Atomic_ReadUnaligned64 --
1835
* Atomically read a 64 bit integer, possibly misaligned.
1836
* This function can be *very* expensive, costing over 50 kcycles
1839
* Note that "var" needs to be writable, even though it will not
1843
* The value of the atomic variable.
1848
*----------------------------------------------------------------------
1850
#if defined(__x86_64__)
1851
static INLINE uint64
1852
Atomic_ReadUnaligned64(Atomic_uint64 const *var)
1854
return Atomic_ReadIfEqualWrite64((Atomic_uint64*)var, 0, 0);
2066
2109
* Microsoft docs guarantee "Simple reads and writes to properly aligned
2067
2110
* 64-bit variables are atomic on 64-bit Windows."
2068
2111
* http://msdn.microsoft.com/en-us/library/ms684122%28VS.85%29.aspx
2113
* XXX Verify that value is properly aligned. Bug 61315.
2071
2116
var->value = val;