2
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
3
* Use is subject to license terms.
5
* This program is free software; you can redistribute it and/or
6
* modify it under the terms of the GNU General Public License as
7
* published by the Free Software Foundation, version 2 of the
12
* mctelem.c - x86 Machine Check Telemetry Transport
16
#include <xen/types.h>
17
#include <xen/kernel.h>
18
#include <xen/config.h>
20
#include <xen/errno.h>
21
#include <xen/sched.h>
22
#include <xen/sched-if.h>
23
#include <xen/cpumask.h>
24
#include <xen/event.h>
26
#include <asm/processor.h>
27
#include <asm/system.h>
33
struct mctelem_ent *mcte_next; /* next in chronological order */
34
struct mctelem_ent *mcte_prev; /* previous in chronological order */
35
uint32_t mcte_flags; /* See MCTE_F_* below */
36
uint32_t mcte_refcnt; /* Reference count */
37
void *mcte_data; /* corresponding data payload */
40
#define MCTE_F_HOME_URGENT 0x0001U /* free to urgent freelist */
41
#define MCTE_F_HOME_NONURGENT 0x0002U /* free to nonurgent freelist */
42
#define MCTE_F_CLASS_URGENT 0x0004U /* in use - urgent errors */
43
#define MCTE_F_CLASS_NONURGENT 0x0008U /* in use - nonurgent errors */
44
#define MCTE_F_STATE_FREE 0x0010U /* on a freelist */
45
#define MCTE_F_STATE_UNCOMMITTED 0x0020U /* reserved; on no list */
46
#define MCTE_F_STATE_COMMITTED 0x0040U /* on a committed list */
47
#define MCTE_F_STATE_PROCESSING 0x0080U /* on a processing list */
49
#define MCTE_F_MASK_HOME (MCTE_F_HOME_URGENT | MCTE_F_HOME_NONURGENT)
50
#define MCTE_F_MASK_CLASS (MCTE_F_CLASS_URGENT | MCTE_F_CLASS_NONURGENT)
51
#define MCTE_F_MASK_STATE (MCTE_F_STATE_FREE | \
52
MCTE_F_STATE_UNCOMMITTED | \
53
MCTE_F_STATE_COMMITTED | \
54
MCTE_F_STATE_PROCESSING)
56
#define MCTE_HOME(tep) ((tep)->mcte_flags & MCTE_F_MASK_HOME)
58
#define MCTE_CLASS(tep) ((tep)->mcte_flags & MCTE_F_MASK_CLASS)
59
#define MCTE_SET_CLASS(tep, new) do { \
60
(tep)->mcte_flags &= ~MCTE_F_MASK_CLASS; \
61
(tep)->mcte_flags |= MCTE_F_CLASS_##new; } while (0)
63
#define MCTE_STATE(tep) ((tep)->mcte_flags & MCTE_F_MASK_STATE)
64
#define MCTE_TRANSITION_STATE(tep, old, new) do { \
65
BUG_ON(MCTE_STATE(tep) != (MCTE_F_STATE_##old)); \
66
(tep)->mcte_flags &= ~MCTE_F_MASK_STATE; \
67
(tep)->mcte_flags |= (MCTE_F_STATE_##new); } while (0)
69
#define MC_URGENT_NENT 10
70
#define MC_NONURGENT_NENT 20
72
#define MC_NCLASSES (MC_NONURGENT + 1)
74
#define COOKIE2MCTE(c) ((struct mctelem_ent *)(c))
75
#define MCTE2COOKIE(tep) ((mctelem_cookie_t)(tep))
77
static struct mc_telem_ctl {
78
/* Linked lists that thread the array members together.
80
* The free lists are singly-linked via mcte_next, and we allocate
81
* from them by atomically unlinking an element from the head.
82
* Consumed entries are returned to the head of the free list.
83
* When an entry is reserved off the free list it is not linked
84
* on any list until it is committed or dismissed.
86
* The committed list grows at the head and we do not maintain a
87
* tail pointer; insertions are performed atomically. The head
88
* thus has the most-recently committed telemetry, i.e. the
89
* list is in reverse chronological order. The committed list
90
* is singly-linked via mcte_prev pointers, and mcte_next is NULL.
91
* When we move telemetry from the committed list to the processing
92
* list we atomically unlink the committed list and keep a pointer
93
* to the head of that list; we then traverse the list following
94
* mcte_prev and fill in mcte_next to doubly-link the list, and then
95
* append the tail of the list onto the processing list. If we panic
96
* during this manipulation of the committed list we still have
97
* the pointer to its head so we can recover all entries during
98
* the panic flow (albeit in reverse chronological order).
100
* The processing list is updated in a controlled context, and
101
* we can lock it for updates. The head of the processing list
102
* always has the oldest telemetry, and we append (as above)
103
* at the tail of the processing list. */
104
struct mctelem_ent *mctc_free[MC_NCLASSES];
105
struct mctelem_ent *mctc_committed[MC_NCLASSES];
106
struct mctelem_ent *mctc_processing_head[MC_NCLASSES];
107
struct mctelem_ent *mctc_processing_tail[MC_NCLASSES];
111
struct mctelem_ent *mctc_elems;
113
* Per-CPU processing lists, used for deferred (softirq)
114
* processing of telemetry. mctc_cpu is indexed by the
115
* CPU that the telemetry belongs to. mctc_cpu_processing
116
* is indexed by the CPU that is processing the telemetry.
118
struct mctelem_ent *mctc_cpu[NR_CPUS];
119
struct mctelem_ent *mctc_cpu_processing[NR_CPUS];
122
/* Lock protecting all processing lists */
123
static DEFINE_SPINLOCK(processing_lock);
125
static void mctelem_xchg_head(struct mctelem_ent **headp,
126
struct mctelem_ent **old,
127
struct mctelem_ent *new)
132
if (cmpxchgptr(headp, *old, new) == *old)
138
void mctelem_defer(mctelem_cookie_t cookie)
140
struct mctelem_ent *tep = COOKIE2MCTE(cookie);
142
mctelem_xchg_head(&mctctl.mctc_cpu[smp_processor_id()],
143
&tep->mcte_next, tep);
146
void mctelem_process_deferred(unsigned int cpu,
147
int (*fn)(mctelem_cookie_t))
149
struct mctelem_ent *tep;
150
struct mctelem_ent *head, *prev;
154
* First, unhook the list of telemetry structures, and
155
* hook it up to the processing list head for this CPU.
157
mctelem_xchg_head(&mctctl.mctc_cpu[cpu],
158
&mctctl.mctc_cpu_processing[smp_processor_id()], NULL);
160
head = mctctl.mctc_cpu_processing[smp_processor_id()];
163
* Then, fix up the list to include prev pointers, to make
164
* things a little easier, as the list must be traversed in
165
* chronological order, which is backward from the order they
168
for (tep = head, prev = NULL; tep != NULL; tep = tep->mcte_next) {
169
tep->mcte_prev = prev;
174
* Now walk the list of telemetry structures, handling each
175
* one of them. Unhooking the structure here does not need to
176
* be atomic, as this list is only accessed from a softirq
177
* context; the MCE handler does not touch it.
179
for (tep = prev; tep != NULL; tep = prev) {
180
prev = tep->mcte_prev;
181
tep->mcte_next = tep->mcte_prev = NULL;
183
ret = fn(MCTE2COOKIE(tep));
185
prev->mcte_next = NULL;
186
tep->mcte_prev = tep->mcte_next = NULL;
188
mctelem_commit(MCTE2COOKIE(tep));
190
mctelem_dismiss(MCTE2COOKIE(tep));
194
int mctelem_has_deferred(unsigned int cpu)
196
if (mctctl.mctc_cpu[cpu] != NULL)
201
/* Free an entry to its native free list; the entry must not be linked on
204
static void mctelem_free(struct mctelem_ent *tep)
206
mctelem_class_t target = MCTE_HOME(tep) == MCTE_F_HOME_URGENT ?
207
MC_URGENT : MC_NONURGENT;
209
BUG_ON(tep->mcte_refcnt != 0);
210
BUG_ON(MCTE_STATE(tep) != MCTE_F_STATE_FREE);
212
tep->mcte_prev = NULL;
213
mctelem_xchg_head(&mctctl.mctc_free[target], &tep->mcte_next, tep);
216
/* Increment the reference count of an entry that is not linked on to
217
* any list and which only the caller has a pointer to.
219
static void mctelem_hold(struct mctelem_ent *tep)
224
/* Increment the reference count on an entry that is linked at the head of
225
* a processing list. The caller is responsible for locking the list.
227
static void mctelem_processing_hold(struct mctelem_ent *tep)
229
int which = MCTE_CLASS(tep) == MCTE_F_CLASS_URGENT ?
230
MC_URGENT : MC_NONURGENT;
232
BUG_ON(tep != mctctl.mctc_processing_head[which]);
236
/* Decrement the reference count on an entry that is linked at the head of
237
* a processing list. The caller is responsible for locking the list.
239
static void mctelem_processing_release(struct mctelem_ent *tep)
241
int which = MCTE_CLASS(tep) == MCTE_F_CLASS_URGENT ?
242
MC_URGENT : MC_NONURGENT;
244
BUG_ON(tep != mctctl.mctc_processing_head[which]);
245
if (--tep->mcte_refcnt == 0) {
246
MCTE_TRANSITION_STATE(tep, PROCESSING, FREE);
247
mctctl.mctc_processing_head[which] = tep->mcte_next;
252
void mctelem_init(int reqdatasz)
254
static int called = 0;
255
static int datasz = 0, realdatasz = 0;
259
BUG_ON(MC_URGENT != 0 || MC_NONURGENT != 1 || MC_NCLASSES != 2);
261
/* Called from mcheck_init for all processors; initialize for the
262
* first call only (no race here since the boot cpu completes
263
* init before others start up). */
265
realdatasz = reqdatasz;
266
datasz = (reqdatasz & ~0xf) + 0x10; /* 16 byte roundup */
268
BUG_ON(reqdatasz != realdatasz);
272
if ((mctctl.mctc_elems = xmalloc_array(struct mctelem_ent,
273
MC_URGENT_NENT + MC_NONURGENT_NENT)) == NULL ||
274
(datarr = xmalloc_bytes((MC_URGENT_NENT + MC_NONURGENT_NENT) *
276
if (mctctl.mctc_elems)
277
xfree(mctctl.mctc_elems);
278
printk("Allocations for MCA telemetry failed\n");
282
for (i = 0; i < MC_URGENT_NENT + MC_NONURGENT_NENT; i++) {
283
struct mctelem_ent *tep, **tepp;
285
tep = mctctl.mctc_elems + i;
286
tep->mcte_flags = MCTE_F_STATE_FREE;
287
tep->mcte_refcnt = 0;
288
tep->mcte_data = datarr + i * datasz;
290
if (i < MC_URGENT_NENT) {
291
tepp = &mctctl.mctc_free[MC_URGENT];
292
tep->mcte_flags |= MCTE_F_HOME_URGENT;
294
tepp = &mctctl.mctc_free[MC_NONURGENT];
295
tep->mcte_flags |= MCTE_F_HOME_NONURGENT;
298
tep->mcte_next = *tepp;
299
tep->mcte_prev = NULL;
304
/* incremented non-atomically when reserve fails */
305
static int mctelem_drop_count;
307
/* Reserve a telemetry entry, or return NULL if none available.
308
* If we return an entry then the caller must subsequently call exactly one of
309
* mctelem_unreserve or mctelem_commit for that entry.
311
mctelem_cookie_t mctelem_reserve(mctelem_class_t which)
313
struct mctelem_ent **freelp;
314
struct mctelem_ent *oldhead, *newhead;
315
mctelem_class_t target = (which == MC_URGENT) ?
316
MC_URGENT : MC_NONURGENT;
318
freelp = &mctctl.mctc_free[target];
320
if ((oldhead = *freelp) == NULL) {
321
if (which == MC_URGENT && target == MC_URGENT) {
322
/* raid the non-urgent freelist */
323
target = MC_NONURGENT;
324
freelp = &mctctl.mctc_free[target];
327
mctelem_drop_count++;
332
newhead = oldhead->mcte_next;
333
if (cmpxchgptr(freelp, oldhead, newhead) == oldhead) {
334
struct mctelem_ent *tep = oldhead;
337
MCTE_TRANSITION_STATE(tep, FREE, UNCOMMITTED);
338
tep->mcte_next = NULL;
339
tep->mcte_prev = NULL;
340
if (which == MC_URGENT)
341
MCTE_SET_CLASS(tep, URGENT);
343
MCTE_SET_CLASS(tep, NONURGENT);
344
return MCTE2COOKIE(tep);
349
void *mctelem_dataptr(mctelem_cookie_t cookie)
351
struct mctelem_ent *tep = COOKIE2MCTE(cookie);
353
return tep->mcte_data;
356
/* Release a previously reserved entry back to the freelist without
357
* submitting it for logging. The entry must not be linked on to any
358
* list - that's how mctelem_reserve handed it out.
360
void mctelem_dismiss(mctelem_cookie_t cookie)
362
struct mctelem_ent *tep = COOKIE2MCTE(cookie);
365
MCTE_TRANSITION_STATE(tep, UNCOMMITTED, FREE);
369
/* Commit an entry with completed telemetry for logging. The caller must
370
* not reference the entry after this call. Note that we add entries
371
* at the head of the committed list, so that list therefore has entries
372
* in reverse chronological order.
374
void mctelem_commit(mctelem_cookie_t cookie)
376
struct mctelem_ent *tep = COOKIE2MCTE(cookie);
377
mctelem_class_t target = MCTE_CLASS(tep) == MCTE_F_CLASS_URGENT ?
378
MC_URGENT : MC_NONURGENT;
380
BUG_ON(tep->mcte_next != NULL || tep->mcte_prev != NULL);
381
MCTE_TRANSITION_STATE(tep, UNCOMMITTED, COMMITTED);
383
mctelem_xchg_head(&mctctl.mctc_committed[target], &tep->mcte_prev, tep);
386
/* Move telemetry from committed list to processing list, reversing the
387
* list into chronological order. The processing list has been
388
* locked by the caller, and may be non-empty. We append the
389
* reversed committed list on to the tail of the processing list.
390
* The committed list may grow even while we run, so use atomic
391
* operations to swap NULL to the freelist head.
393
* Note that "chronological order" means the order in which producers
394
* won additions to the processing list, which may not reflect the
395
* strict chronological order of the associated events if events are
396
* closely spaced in time and contend for the processing list at once.
399
static struct mctelem_ent *dangling[MC_NCLASSES];
401
static void mctelem_append_processing(mctelem_class_t which)
403
mctelem_class_t target = which == MC_URGENT ?
404
MC_URGENT : MC_NONURGENT;
405
struct mctelem_ent **commlp = &mctctl.mctc_committed[target];
406
struct mctelem_ent **proclhp = &mctctl.mctc_processing_head[target];
407
struct mctelem_ent **procltp = &mctctl.mctc_processing_tail[target];
408
struct mctelem_ent *tep, *ltep;
410
/* Check for an empty list; no race since we hold the processing lock */
414
/* Atomically unlink the committed list, and keep a pointer to
415
* the list we unlink in a well-known location so it can be
416
* picked up in panic code should we panic between this unlink
417
* and the append to the processing list. */
418
mctelem_xchg_head(commlp, &dangling[target], NULL);
420
if (dangling[target] == NULL)
423
/* Traverse the list following the previous pointers (reverse
424
* chronological order). For each entry fill in the next pointer
425
* and transition the element state. */
426
for (tep = dangling[target], ltep = NULL; tep != NULL;
427
tep = tep->mcte_prev) {
428
MCTE_TRANSITION_STATE(tep, COMMITTED, PROCESSING);
429
tep->mcte_next = ltep;
433
/* ltep points to the head of a chronologically ordered linked
434
* list of telemetry entries ending at the most recent entry
435
* dangling[target] if mcte_next is followed; tack this on to
436
* the processing list.
438
if (*proclhp == NULL) {
440
*procltp = dangling[target];
442
(*procltp)->mcte_next = ltep;
443
ltep->mcte_prev = *procltp;
444
*procltp = dangling[target];
447
dangling[target] = NULL;
451
mctelem_cookie_t mctelem_consume_oldest_begin(mctelem_class_t which)
453
mctelem_class_t target = (which == MC_URGENT) ?
454
MC_URGENT : MC_NONURGENT;
455
struct mctelem_ent *tep;
457
spin_lock(&processing_lock);
458
mctelem_append_processing(target);
459
if ((tep = mctctl.mctc_processing_head[target]) == NULL) {
460
spin_unlock(&processing_lock);
464
mctelem_processing_hold(tep);
466
spin_unlock(&processing_lock);
467
return MCTE2COOKIE(tep);
470
void mctelem_consume_oldest_end(mctelem_cookie_t cookie)
472
struct mctelem_ent *tep = COOKIE2MCTE(cookie);
474
spin_lock(&processing_lock);
475
mctelem_processing_release(tep);
477
spin_unlock(&processing_lock);
480
void mctelem_ack(mctelem_class_t which, mctelem_cookie_t cookie)
482
mctelem_class_t target = (which == MC_URGENT) ?
483
MC_URGENT : MC_NONURGENT;
484
struct mctelem_ent *tep = COOKIE2MCTE(cookie);
489
spin_lock(&processing_lock);
490
if (tep == mctctl.mctc_processing_head[target])
491
mctelem_processing_release(tep);
493
spin_unlock(&processing_lock);