2
* Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved.
3
* Copyright (c) 1996-1999 by Silicon Graphics. All rights reserved.
4
* Copyright (c) 1999-2004 Hewlett-Packard Development Company, L.P.
7
* THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
8
* OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
10
* Permission is hereby granted to use or copy this program
11
* for any purpose, provided the above notices are retained on all copies.
12
* Permission to modify the code and to distribute modified code is granted,
13
* provided the above notices are retained, and a notice that the code was
14
* modified is included with the above copyright notice.
18
/* FIXME. Incomplete. No support for 64 bits. */
19
/* Memory model documented at http://www-106.ibm.com/developerworks/ */
20
/* eserver/articles/archguide.html and (clearer) */
21
/* http://www-106.ibm.com/developerworks/eserver/articles/powerpc.html. */
22
/* There appears to be no implicit ordering between any kind of */
23
/* independent memory references. */
24
/* Architecture enforces some ordering based on control dependence. */
25
/* I don't know if that could help. */
26
/* Data-dependent loads are always ordered. */
27
/* Based on the above references, eieio is intended for use on */
28
/* uncached memory, which we don't support. It does not order loads */
29
/* from cached memory. */
30
/* Thanks to Maged Michael, Doug Lea, and Roger Hoover for helping to */
31
/* track some of this down and correcting my misunderstandings. -HB */
33
#include "../all_aligned_atomic_load_store.h"
35
#include "../test_and_set_t_is_ao_t.h"
36
/* There seems to be no byte equivalent of lwarx, so this */
37
/* may really be what we want, at least in the 32-bit case. */
42
__asm__ __volatile__("sync" : : : "memory");
45
#define AO_HAVE_nop_full
47
/* lwsync apparently works for everything but a StoreLoad barrier. */
51
__asm__ __volatile__("lwsync" : : : "memory");
54
#define AO_nop_write() AO_lwsync()
55
#define AO_HAVE_nop_write
57
#define AO_nop_read() AO_lwsync()
58
#define AO_HAVE_nop_read
60
/* We explicitly specify load_acquire, since it is important, and can */
61
/* be implemented relatively cheaply. It could be implemented */
62
/* with an ordinary load followed by a lwsync. But the general wisdom */
63
/* seems to be that a data dependent branch followed by an isync is */
64
/* cheaper. And the documentation is fairly explicit that this also */
65
/* has acquire semantics. */
66
/* ppc64 uses ld not lwz */
67
#if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__)
69
AO_load_acquire(volatile AO_t *addr)
73
/* FIXME: We should get gcc to allocate one of the condition */
74
/* registers. I always got "impossible constraint" when I */
75
/* tried the "y" constraint. */
76
__asm__ __volatile__ (
82
: "m"(*addr) : "memory", "cc");
87
AO_load_acquire(volatile AO_t *addr)
91
/* FIXME: We should get gcc to allocate one of the condition */
92
/* registers. I always got "impossible constraint" when I */
93
/* tried the "y" constraint. */
94
__asm__ __volatile__ (
100
: "m"(*addr) : "memory", "cc");
104
#define AO_HAVE_load_acquire
106
/* We explicitly specify store_release, since it relies */
107
/* on the fact that lwsync is also a LoadStore barrier. */
109
AO_store_release(volatile AO_t *addr, AO_t value)
115
#define AO_HAVE_load_acquire
117
/* This is similar to the code in the garbage collector. Deleting */
118
/* this and having it synthesized from compare_and_swap would probably */
119
/* only cost us a load immediate instruction. */
120
#if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__)
121
/* Completely untested. And we should be using smaller objects anyway. */
122
AO_INLINE AO_TS_VAL_t
123
AO_test_and_set(volatile AO_TS_t *addr) {
124
unsigned long oldval;
125
unsigned long temp = 1; /* locked value */
127
__asm__ __volatile__(
128
"1:ldarx %0,0,%1\n" /* load and reserve */
129
"cmpdi %0, 0\n" /* if load is */
130
"bne 2f\n" /* non-zero, return already set */
131
"stdcx. %2,0,%1\n" /* else store conditional */
132
"bne- 1b\n" /* retry if lost reservation */
133
"2:\n" /* oldval is zero if we set */
135
: "r"(addr), "r"(temp)
138
return (AO_TS_VAL_t)oldval;
143
AO_INLINE AO_TS_VAL_t
144
AO_test_and_set(volatile AO_TS_t *addr) {
146
int temp = 1; /* locked value */
148
__asm__ __volatile__(
149
"1:lwarx %0,0,%1\n" /* load and reserve */
150
"cmpwi %0, 0\n" /* if load is */
151
"bne 2f\n" /* non-zero, return already set */
152
"stwcx. %2,0,%1\n" /* else store conditional */
153
"bne- 1b\n" /* retry if lost reservation */
154
"2:\n" /* oldval is zero if we set */
156
: "r"(addr), "r"(temp)
159
return (AO_TS_VAL_t)oldval;
164
#define AO_have_test_and_set
166
AO_INLINE AO_TS_VAL_t
167
AO_test_and_set_acquire(volatile AO_TS_t *addr) {
168
AO_TS_VAL_t result = AO_test_and_set(addr);
173
#define AO_HAVE_test_and_set_acquire
175
AO_INLINE AO_TS_VAL_t
176
AO_test_and_set_release(volatile AO_TS_t *addr) {
178
return AO_test_and_set(addr);
181
#define AO_HAVE_test_and_set_release
183
AO_INLINE AO_TS_VAL_t
184
AO_test_and_set_full(volatile AO_TS_t *addr) {
187
result = AO_test_and_set(addr);
192
#define AO_HAVE_test_and_set_full
194
#if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__)
195
/* FIXME: Completely untested. */
197
AO_compare_and_swap(volatile AO_t *addr, AO_t old, AO_t new_val) {
201
__asm__ __volatile__(
202
"1:ldarx %0,0,%2\n" /* load and reserve */
203
"cmpd %0, %4\n" /* if load is not equal to */
204
"bne 2f\n" /* old, fail */
205
"stdcx. %3,0,%2\n" /* else store conditional */
206
"bne- 1b\n" /* retry if lost reservation */
207
"li %1,1\n" /* result = 1; */
209
: "=&r"(oldval), "=&r"(result)
210
: "r"(addr), "r"(new_val), "r"(old), "1"(result)
219
AO_compare_and_swap(volatile AO_t *addr, AO_t old, AO_t new_val) {
223
__asm__ __volatile__(
224
"1:lwarx %0,0,%2\n" /* load and reserve */
225
"cmpw %0, %4\n" /* if load is not equal to */
226
"bne 2f\n" /* old, fail */
227
"stwcx. %3,0,%2\n" /* else store conditional */
228
"bne- 1b\n" /* retry if lost reservation */
229
"li %1,1\n" /* result = 1; */
231
: "=&r"(oldval), "=&r"(result)
232
: "r"(addr), "r"(new_val), "r"(old), "1"(result)
239
#define AO_HAVE_compare_and_swap
242
AO_compare_and_swap_acquire(volatile AO_t *addr, AO_t old, AO_t new_val) {
243
int result = AO_compare_and_swap(addr, old, new_val);
248
#define AO_HAVE_compare_and_swap_acquire
251
AO_compare_and_swap_release(volatile AO_t *addr, AO_t old, AO_t new_val) {
253
return AO_compare_and_swap(addr, old, new_val);
256
#define AO_HAVE_compare_and_swap_release
259
AO_compare_and_swap_full(volatile AO_t *addr, AO_t old, AO_t new_val) {
262
result = AO_compare_and_swap(addr, old, new_val);
267
#define AO_HAVE_compare_and_swap_full
269
/* FIXME: We should also implement fetch_and_add and or primitives */