1
/* Generic MTRR (Memory Type Range Register) driver.
3
Copyright (C) 1997-2000 Richard Gooch
4
Copyright (c) 2002 Patrick Mochel
6
This library is free software; you can redistribute it and/or
7
modify it under the terms of the GNU Library General Public
8
License as published by the Free Software Foundation; either
9
version 2 of the License, or (at your option) any later version.
11
This library is distributed in the hope that it will be useful,
12
but WITHOUT ANY WARRANTY; without even the implied warranty of
13
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
Library General Public License for more details.
16
You should have received a copy of the GNU Library General Public
17
License along with this library; if not, write to the Free
18
Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20
Richard Gooch may be reached by email at rgooch@atnf.csiro.au
21
The postal address is:
22
Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
24
Source: "Pentium Pro Family Developer's Manual, Volume 3:
25
Operating System Writer's Guide" (Intel document number 242692),
28
This was cleaned and made readable by Patrick Mochel <mochel@osdl.org>
30
Source: Intel Architecture Software Developers Manual, Volume 3:
31
System Programming Guide; Section 9.11. (1997 edition - PPro).
36
#include <linux/types.h> /* FIXME: kvm_para.h needs this */
38
#include <linux/stop_machine.h>
39
#include <linux/kvm_para.h>
40
#include <linux/uaccess.h>
41
#include <linux/module.h>
42
#include <linux/mutex.h>
43
#include <linux/init.h>
44
#include <linux/sort.h>
45
#include <linux/cpu.h>
46
#include <linux/pci.h>
47
#include <linux/smp.h>
48
#include <linux/syscore_ops.h>
50
#include <asm/processor.h>
59
unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
60
static DEFINE_MUTEX(mtrr_mutex);
62
u64 size_or_mask, size_and_mask;
63
static bool mtrr_aps_delayed_init;
65
static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM];
67
const struct mtrr_ops *mtrr_if;
69
static void set_mtrr(unsigned int reg, unsigned long base,
70
unsigned long size, mtrr_type type);
72
void set_mtrr_ops(const struct mtrr_ops *ops)
74
if (ops->vendor && ops->vendor < X86_VENDOR_NUM)
75
mtrr_ops[ops->vendor] = ops;
78
/* Returns non-zero if we have the write-combining memory type */
79
static int have_wrcomb(void)
83
dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL);
86
* ServerWorks LE chipsets < rev 6 have problems with
87
* write-combining. Don't allow it and leave room for other
88
* chipsets to be tagged
90
if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS &&
91
dev->device == PCI_DEVICE_ID_SERVERWORKS_LE &&
93
pr_info("mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n");
98
* Intel 450NX errata # 23. Non ascending cacheline evictions to
99
* write combining memory may resulting in data corruption
101
if (dev->vendor == PCI_VENDOR_ID_INTEL &&
102
dev->device == PCI_DEVICE_ID_INTEL_82451NX) {
103
pr_info("mtrr: Intel 450NX MMC detected. Write-combining disabled.\n");
109
return mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0;
112
/* This function returns the number of variable MTRRs */
113
static void __init set_num_var_ranges(void)
115
unsigned long config = 0, dummy;
118
rdmsr(MSR_MTRRcap, config, dummy);
119
else if (is_cpu(AMD))
121
else if (is_cpu(CYRIX) || is_cpu(CENTAUR))
124
num_var_ranges = config & 0xff;
127
static void __init init_table(void)
131
max = num_var_ranges;
132
for (i = 0; i < max; i++)
133
mtrr_usage_table[i] = 1;
136
struct set_mtrr_data {
137
unsigned long smp_base;
138
unsigned long smp_size;
139
unsigned int smp_reg;
144
* mtrr_rendezvous_handler - Work done in the synchronization handler. Executed
146
* @info: pointer to mtrr configuration data
150
static int mtrr_rendezvous_handler(void *info)
152
struct set_mtrr_data *data = info;
155
* We use this same function to initialize the mtrrs during boot,
156
* resume, runtime cpu online and on an explicit request to set a
159
* During boot or suspend, the state of the boot cpu's mtrrs has been
160
* saved, and we want to replicate that across all the cpus that come
161
* online (either at the end of boot or resume or during a runtime cpu
162
* online). If we're doing that, @reg is set to something special and on
163
* all the cpu's we do mtrr_if->set_all() (On the logical cpu that
164
* started the boot/resume sequence, this might be a duplicate
167
if (data->smp_reg != ~0U) {
168
mtrr_if->set(data->smp_reg, data->smp_base,
169
data->smp_size, data->smp_type);
170
} else if (mtrr_aps_delayed_init || !cpu_online(smp_processor_id())) {
176
static inline int types_compatible(mtrr_type type1, mtrr_type type2)
178
return type1 == MTRR_TYPE_UNCACHABLE ||
179
type2 == MTRR_TYPE_UNCACHABLE ||
180
(type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK) ||
181
(type1 == MTRR_TYPE_WRBACK && type2 == MTRR_TYPE_WRTHROUGH);
185
* set_mtrr - update mtrrs on all processors
186
* @reg: mtrr in question
191
* This is kinda tricky, but fortunately, Intel spelled it out for us cleanly:
193
* 1. Queue work to do the following on all processors:
194
* 2. Disable Interrupts
195
* 3. Wait for all procs to do so
196
* 4. Enter no-fill cache mode
200
* 8. Disable all range registers
201
* 9. Update the MTRRs
202
* 10. Enable all range registers
203
* 11. Flush all TLBs and caches again
204
* 12. Enter normal cache mode and reenable caching
206
* 14. Wait for buddies to catch up
207
* 15. Enable interrupts.
209
* What does that mean for us? Well, stop_machine() will ensure that
210
* the rendezvous handler is started on each CPU. And in lockstep they
211
* do the state transition of disabling interrupts, updating MTRR's
212
* (the CPU vendors may each do it differently, so we call mtrr_if->set()
213
* callback and let them take care of it.) and enabling interrupts.
215
* Note that the mechanism is the same for UP systems, too; all the SMP stuff
219
set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type)
221
struct set_mtrr_data data = { .smp_reg = reg,
227
stop_machine(mtrr_rendezvous_handler, &data, cpu_online_mask);
230
static void set_mtrr_from_inactive_cpu(unsigned int reg, unsigned long base,
231
unsigned long size, mtrr_type type)
233
struct set_mtrr_data data = { .smp_reg = reg,
239
stop_machine_from_inactive_cpu(mtrr_rendezvous_handler, &data,
244
* mtrr_add_page - Add a memory type region
245
* @base: Physical base address of region in pages (in units of 4 kB!)
246
* @size: Physical size of region in pages (4 kB)
247
* @type: Type of MTRR desired
248
* @increment: If this is true do usage counting on the region
250
* Memory type region registers control the caching on newer Intel and
251
* non Intel processors. This function allows drivers to request an
252
* MTRR is added. The details and hardware specifics of each processor's
253
* implementation are hidden from the caller, but nevertheless the
254
* caller should expect to need to provide a power of two size on an
255
* equivalent power of two boundary.
257
* If the region cannot be added either because all regions are in use
258
* or the CPU cannot support it a negative value is returned. On success
259
* the register number for this entry is returned, but should be treated
262
* On a multiprocessor machine the changes are made to all processors.
263
* This is required on x86 by the Intel processors.
265
* The available types are
267
* %MTRR_TYPE_UNCACHABLE - No caching
269
* %MTRR_TYPE_WRBACK - Write data back in bursts whenever
271
* %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
273
* %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
275
* BUGS: Needs a quiet flag for the cases where drivers do not mind
276
* failures and do not wish system log messages to be sent.
278
int mtrr_add_page(unsigned long base, unsigned long size,
279
unsigned int type, bool increment)
281
unsigned long lbase, lsize;
282
int i, replace, error;
288
error = mtrr_if->validate_add_page(base, size, type);
292
if (type >= MTRR_NUM_TYPES) {
293
pr_warning("mtrr: type: %u invalid\n", type);
297
/* If the type is WC, check that this processor supports it */
298
if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
299
pr_warning("mtrr: your processor doesn't support write-combining\n");
304
pr_warning("mtrr: zero sized request\n");
308
if (base & size_or_mask || size & size_or_mask) {
309
pr_warning("mtrr: base or size exceeds the MTRR width\n");
316
/* No CPU hotplug when we change MTRR entries */
319
/* Search for existing MTRR */
320
mutex_lock(&mtrr_mutex);
321
for (i = 0; i < num_var_ranges; ++i) {
322
mtrr_if->get(i, &lbase, &lsize, <ype);
323
if (!lsize || base > lbase + lsize - 1 ||
324
base + size - 1 < lbase)
327
* At this point we know there is some kind of
330
if (base < lbase || base + size - 1 > lbase + lsize - 1) {
332
base + size - 1 >= lbase + lsize - 1) {
333
/* New region encloses an existing region */
335
replace = replace == -1 ? i : -2;
337
} else if (types_compatible(type, ltype))
340
pr_warning("mtrr: 0x%lx000,0x%lx000 overlaps existing"
341
" 0x%lx000,0x%lx000\n", base, size, lbase,
345
/* New region is enclosed by an existing region */
347
if (types_compatible(type, ltype))
349
pr_warning("mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n",
350
base, size, mtrr_attrib_to_str(ltype),
351
mtrr_attrib_to_str(type));
355
++mtrr_usage_table[i];
359
/* Search for an empty MTRR */
360
i = mtrr_if->get_free_region(base, size, replace);
362
set_mtrr(i, base, size, type);
363
if (likely(replace < 0)) {
364
mtrr_usage_table[i] = 1;
366
mtrr_usage_table[i] = mtrr_usage_table[replace];
368
mtrr_usage_table[i]++;
369
if (unlikely(replace != i)) {
370
set_mtrr(replace, 0, 0, 0);
371
mtrr_usage_table[replace] = 0;
375
pr_info("mtrr: no more MTRRs available\n");
379
mutex_unlock(&mtrr_mutex);
384
static int mtrr_check(unsigned long base, unsigned long size)
386
if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
387
pr_warning("mtrr: size and base must be multiples of 4 kiB\n");
388
pr_debug("mtrr: size: 0x%lx base: 0x%lx\n", size, base);
396
* mtrr_add - Add a memory type region
397
* @base: Physical base address of region
398
* @size: Physical size of region
399
* @type: Type of MTRR desired
400
* @increment: If this is true do usage counting on the region
402
* Memory type region registers control the caching on newer Intel and
403
* non Intel processors. This function allows drivers to request an
404
* MTRR is added. The details and hardware specifics of each processor's
405
* implementation are hidden from the caller, but nevertheless the
406
* caller should expect to need to provide a power of two size on an
407
* equivalent power of two boundary.
409
* If the region cannot be added either because all regions are in use
410
* or the CPU cannot support it a negative value is returned. On success
411
* the register number for this entry is returned, but should be treated
414
* On a multiprocessor machine the changes are made to all processors.
415
* This is required on x86 by the Intel processors.
417
* The available types are
419
* %MTRR_TYPE_UNCACHABLE - No caching
421
* %MTRR_TYPE_WRBACK - Write data back in bursts whenever
423
* %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
425
* %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
427
* BUGS: Needs a quiet flag for the cases where drivers do not mind
428
* failures and do not wish system log messages to be sent.
430
int mtrr_add(unsigned long base, unsigned long size, unsigned int type,
433
if (mtrr_check(base, size))
435
return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
438
EXPORT_SYMBOL(mtrr_add);
441
* mtrr_del_page - delete a memory type region
442
* @reg: Register returned by mtrr_add
443
* @base: Physical base address
444
* @size: Size of region
446
* If register is supplied then base and size are ignored. This is
447
* how drivers should call it.
449
* Releases an MTRR region. If the usage count drops to zero the
450
* register is freed and the region returns to default state.
451
* On success the register is returned, on failure a negative error
454
int mtrr_del_page(int reg, unsigned long base, unsigned long size)
458
unsigned long lbase, lsize;
464
max = num_var_ranges;
465
/* No CPU hotplug when we change MTRR entries */
467
mutex_lock(&mtrr_mutex);
469
/* Search for existing MTRR */
470
for (i = 0; i < max; ++i) {
471
mtrr_if->get(i, &lbase, &lsize, <ype);
472
if (lbase == base && lsize == size) {
478
pr_debug("mtrr: no MTRR for %lx000,%lx000 found\n",
484
pr_warning("mtrr: register: %d too big\n", reg);
487
mtrr_if->get(reg, &lbase, &lsize, <ype);
489
pr_warning("mtrr: MTRR %d not used\n", reg);
492
if (mtrr_usage_table[reg] < 1) {
493
pr_warning("mtrr: reg: %d has count=0\n", reg);
496
if (--mtrr_usage_table[reg] < 1)
497
set_mtrr(reg, 0, 0, 0);
500
mutex_unlock(&mtrr_mutex);
506
* mtrr_del - delete a memory type region
507
* @reg: Register returned by mtrr_add
508
* @base: Physical base address
509
* @size: Size of region
511
* If register is supplied then base and size are ignored. This is
512
* how drivers should call it.
514
* Releases an MTRR region. If the usage count drops to zero the
515
* register is freed and the region returns to default state.
516
* On success the register is returned, on failure a negative error
519
int mtrr_del(int reg, unsigned long base, unsigned long size)
521
if (mtrr_check(base, size))
523
return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
525
EXPORT_SYMBOL(mtrr_del);
529
* These should be called implicitly, but we can't yet until all the initcall
532
static void __init init_ifs(void)
534
#ifndef CONFIG_X86_64
541
/* The suspend/resume methods are only for CPU without MTRR. CPU using generic
542
* MTRR driver doesn't require this
550
static struct mtrr_value mtrr_value[MTRR_MAX_VAR_RANGES];
552
static int mtrr_save(void)
556
for (i = 0; i < num_var_ranges; i++) {
557
mtrr_if->get(i, &mtrr_value[i].lbase,
558
&mtrr_value[i].lsize,
559
&mtrr_value[i].ltype);
564
static void mtrr_restore(void)
568
for (i = 0; i < num_var_ranges; i++) {
569
if (mtrr_value[i].lsize) {
570
set_mtrr(i, mtrr_value[i].lbase,
572
mtrr_value[i].ltype);
579
static struct syscore_ops mtrr_syscore_ops = {
580
.suspend = mtrr_save,
581
.resume = mtrr_restore,
584
int __initdata changed_by_mtrr_cleanup;
587
* mtrr_bp_init - initialize mtrrs on the boot CPU
589
* This needs to be called early; before any of the other CPUs are
590
* initialized (i.e. before smp_init()).
593
void __init mtrr_bp_init(void)
602
mtrr_if = &generic_mtrr_ops;
603
size_or_mask = 0xff000000; /* 36 bits */
604
size_and_mask = 0x00f00000;
608
* This is an AMD specific MSR, but we assume(hope?) that
609
* Intel will implement it to when they extend the address
612
if (cpuid_eax(0x80000000) >= 0x80000008) {
613
phys_addr = cpuid_eax(0x80000008) & 0xff;
614
/* CPUID workaround for Intel 0F33/0F34 CPU */
615
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
616
boot_cpu_data.x86 == 0xF &&
617
boot_cpu_data.x86_model == 0x3 &&
618
(boot_cpu_data.x86_mask == 0x3 ||
619
boot_cpu_data.x86_mask == 0x4))
622
size_or_mask = ~((1ULL << (phys_addr - PAGE_SHIFT)) - 1);
623
size_and_mask = ~size_or_mask & 0xfffff00000ULL;
624
} else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR &&
625
boot_cpu_data.x86 == 6) {
627
* VIA C* family have Intel style MTRRs,
628
* but don't support PAE
630
size_or_mask = 0xfff00000; /* 32 bits */
635
switch (boot_cpu_data.x86_vendor) {
637
if (cpu_has_k6_mtrr) {
638
/* Pre-Athlon (K6) AMD CPU MTRRs */
639
mtrr_if = mtrr_ops[X86_VENDOR_AMD];
640
size_or_mask = 0xfff00000; /* 32 bits */
644
case X86_VENDOR_CENTAUR:
645
if (cpu_has_centaur_mcr) {
646
mtrr_if = mtrr_ops[X86_VENDOR_CENTAUR];
647
size_or_mask = 0xfff00000; /* 32 bits */
651
case X86_VENDOR_CYRIX:
652
if (cpu_has_cyrix_arr) {
653
mtrr_if = mtrr_ops[X86_VENDOR_CYRIX];
654
size_or_mask = 0xfff00000; /* 32 bits */
664
set_num_var_ranges();
669
if (mtrr_cleanup(phys_addr)) {
670
changed_by_mtrr_cleanup = 1;
677
void mtrr_ap_init(void)
679
if (!use_intel() || mtrr_aps_delayed_init)
682
* Ideally we should hold mtrr_mutex here to avoid mtrr entries
683
* changed, but this routine will be called in cpu boot time,
684
* holding the lock breaks it.
686
* This routine is called in two cases:
688
* 1. very earily time of software resume, when there absolutely
689
* isn't mtrr entry changes;
691
* 2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug
692
* lock to prevent mtrr entry changes
694
set_mtrr_from_inactive_cpu(~0U, 0, 0, 0);
698
* Save current fixed-range MTRR state of the BSP
700
void mtrr_save_state(void)
702
smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1);
705
void set_mtrr_aps_delayed_init(void)
710
mtrr_aps_delayed_init = true;
714
* Delayed MTRR initialization for all AP's
716
void mtrr_aps_init(void)
722
* Check if someone has requested the delay of AP MTRR initialization,
723
* by doing set_mtrr_aps_delayed_init(), prior to this point. If not,
726
if (!mtrr_aps_delayed_init)
729
set_mtrr(~0U, 0, 0, 0);
730
mtrr_aps_delayed_init = false;
733
void mtrr_bp_restore(void)
741
static int __init mtrr_init_finialize(void)
747
if (!changed_by_mtrr_cleanup)
753
* The CPU has no MTRR and seems to not support SMP. They have
754
* specific drivers, we use a tricky method to support
755
* suspend/resume for them.
757
* TBD: is there any system with such CPU which supports
758
* suspend/resume? If no, we should remove the code.
760
register_syscore_ops(&mtrr_syscore_ops);
764
subsys_initcall(mtrr_init_finialize);