2
* Non Fatal Machine Check Exception Reporting
4
* (C) Copyright 2002 Dave Jones. <davej@codemonkey.org.uk>
6
* This file contains routines to check for non-fatal MCEs every 15s
10
#include <xen/config.h>
12
#include <xen/types.h>
13
#include <xen/kernel.h>
15
#include <xen/timer.h>
16
#include <xen/errno.h>
17
#include <xen/event.h>
18
#include <xen/sched.h>
19
#include <asm/processor.h>
20
#include <asm/system.h>
25
DEFINE_PER_CPU(cpu_banks_t, poll_bankmask);
26
static struct timer mce_timer;
28
#define MCE_PERIOD MILLISECS(8000)
29
#define MCE_PERIOD_MIN MILLISECS(2000)
30
#define MCE_PERIOD_MAX MILLISECS(16000)
32
static uint64_t period = MCE_PERIOD;
33
static int adjust = 0;
34
static int variable_period = 1;
36
static void mce_checkregs (void *info)
38
mctelem_cookie_t mctc;
39
struct mca_summary bs;
40
static uint64_t dumpcount = 0;
42
mctc = mcheck_mca_logout(MCA_POLLER, __get_cpu_var(poll_bankmask), &bs, NULL);
44
if (bs.errcnt && mctc != NULL) {
47
/* If Dom0 enabled the VIRQ_MCA event, then notify it.
48
* Otherwise, if dom0 has had plenty of time to register
49
* the virq handler but still hasn't then dump telemetry
50
* to the Xen console. The call count may be incremented
51
* on multiple cpus at once and is indicative only - just
52
* a simple-minded attempt to avoid spamming the console
53
* for corrected errors in early startup.
56
if (dom0_vmce_enabled()) {
58
send_guest_global_virq(dom0, VIRQ_MCA);
59
} else if (++dumpcount >= 10) {
60
x86_mcinfo_dump((struct mc_info *)mctelem_dataptr(mctc));
61
mctelem_dismiss(mctc);
63
mctelem_dismiss(mctc);
65
} else if (mctc != NULL) {
66
mctelem_dismiss(mctc);
70
static void mce_work_fn(void *data)
72
on_each_cpu(mce_checkregs, NULL, 1);
74
if (variable_period) {
76
period /= (adjust + 1);
79
if (period > MCE_PERIOD_MAX)
80
period = MCE_PERIOD_MAX;
81
if (period < MCE_PERIOD_MIN)
82
period = MCE_PERIOD_MIN;
85
set_timer(&mce_timer, NOW() + period);
89
static int __init init_nonfatal_mce_checker(void)
91
struct cpuinfo_x86 *c = &boot_cpu_data;
93
/* Check for MCE support */
94
if (!mce_available(c))
98
* Check for non-fatal errors every MCE_RATE s
100
switch (c->x86_vendor) {
102
if (c->x86 == 6) { /* K7 */
103
init_timer(&mce_timer, mce_work_fn, NULL, 0);
104
set_timer(&mce_timer, NOW() + MCE_PERIOD);
108
/* Assume we are on K8 or newer AMD CPU here */
109
amd_nonfatal_mcheck_init(c);
112
case X86_VENDOR_INTEL:
114
* The P5 family is different. P4/P6 and latest CPUs share the
115
* same polling methods.
119
init_timer(&mce_timer, mce_work_fn, NULL, 0);
120
set_timer(&mce_timer, NOW() + MCE_PERIOD);
125
printk(KERN_INFO "mcheck_poll: Machine check polling timer started.\n");
128
__initcall(init_nonfatal_mce_checker);