2
* Routines to indentify caches on Intel CPU.
5
* Venkatesh Pallipadi : Adding cache identification through cpuid(4)
6
* Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
7
* Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD.
10
#include <xen/config.h>
13
#include <xen/errno.h>
14
#include <asm/processor.h>
24
unsigned char descriptor;
29
/* all the cache descriptor types we care about (no TLB or trace cache entries) */
30
static struct _cache_table cache_table[] __cpuinitdata =
32
{ 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */
33
{ 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */
34
{ 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */
35
{ 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */
36
{ 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
37
{ 0x23, LVL_3, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */
38
{ 0x25, LVL_3, 2048 }, /* 8-way set assoc, sectored cache, 64 byte line size */
39
{ 0x29, LVL_3, 4096 }, /* 8-way set assoc, sectored cache, 64 byte line size */
40
{ 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */
41
{ 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */
42
{ 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */
43
{ 0x3a, LVL_2, 192 }, /* 6-way set assoc, sectored cache, 64 byte line size */
44
{ 0x3b, LVL_2, 128 }, /* 2-way set assoc, sectored cache, 64 byte line size */
45
{ 0x3c, LVL_2, 256 }, /* 4-way set assoc, sectored cache, 64 byte line size */
46
{ 0x3d, LVL_2, 384 }, /* 6-way set assoc, sectored cache, 64 byte line size */
47
{ 0x3e, LVL_2, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
48
{ 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */
49
{ 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */
50
{ 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */
51
{ 0x44, LVL_2, 1024 }, /* 4-way set assoc, 32 byte line size */
52
{ 0x45, LVL_2, 2048 }, /* 4-way set assoc, 32 byte line size */
53
{ 0x46, LVL_3, 4096 }, /* 4-way set assoc, 64 byte line size */
54
{ 0x47, LVL_3, 8192 }, /* 8-way set assoc, 64 byte line size */
55
{ 0x49, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */
56
{ 0x4a, LVL_3, 6144 }, /* 12-way set assoc, 64 byte line size */
57
{ 0x4b, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */
58
{ 0x4c, LVL_3, 12288 }, /* 12-way set assoc, 64 byte line size */
59
{ 0x4d, LVL_3, 16384 }, /* 16-way set assoc, 64 byte line size */
60
{ 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */
61
{ 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */
62
{ 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */
63
{ 0x68, LVL_1_DATA, 32 }, /* 4-way set assoc, sectored cache, 64 byte line size */
64
{ 0x70, LVL_TRACE, 12 }, /* 8-way set assoc */
65
{ 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */
66
{ 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */
67
{ 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */
68
{ 0x78, LVL_2, 1024 }, /* 4-way set assoc, 64 byte line size */
69
{ 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */
70
{ 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */
71
{ 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */
72
{ 0x7c, LVL_2, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */
73
{ 0x7d, LVL_2, 2048 }, /* 8-way set assoc, 64 byte line size */
74
{ 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */
75
{ 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */
76
{ 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */
77
{ 0x84, LVL_2, 1024 }, /* 8-way set assoc, 32 byte line size */
78
{ 0x85, LVL_2, 2048 }, /* 8-way set assoc, 32 byte line size */
79
{ 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */
80
{ 0x87, LVL_2, 1024 }, /* 8-way set assoc, 64 byte line size */
90
CACHE_TYPE_UNIFIED = 3
93
union _cpuid4_leaf_eax {
95
enum _cache_type type:5;
97
unsigned int is_self_initializing:1;
98
unsigned int is_fully_associative:1;
99
unsigned int reserved:4;
100
unsigned int num_threads_sharing:12;
101
unsigned int num_cores_on_die:6;
106
union _cpuid4_leaf_ebx {
108
unsigned int coherency_line_size:12;
109
unsigned int physical_line_partition:10;
110
unsigned int ways_of_associativity:10;
115
union _cpuid4_leaf_ecx {
117
unsigned int number_of_sets:32;
122
struct _cpuid4_info {
123
union _cpuid4_leaf_eax eax;
124
union _cpuid4_leaf_ebx ebx;
125
union _cpuid4_leaf_ecx ecx;
127
cpumask_t shared_cpu_map;
130
unsigned short num_cache_leaves;
132
/* AMD doesn't have CPUID4. Emulate it here to report the same
133
information to the user. This makes some assumptions about the machine:
134
L2 not shared, no SMT etc. that is currently true on AMD CPUs.
136
In theory the TLBs could be reported as fake type (they are in "dummy").
140
unsigned line_size : 8;
141
unsigned lines_per_tag : 8;
143
unsigned size_in_kb : 8;
150
unsigned line_size : 8;
151
unsigned lines_per_tag : 4;
153
unsigned size_in_kb : 16;
160
unsigned line_size : 8;
161
unsigned lines_per_tag : 4;
164
unsigned size_encoded : 14;
169
static const unsigned short assocs[] = {
170
[1] = 1, [2] = 2, [4] = 4, [6] = 8,
171
[8] = 16, [0xa] = 32, [0xb] = 48,
176
static const unsigned char levels[] = { 1, 1, 2, 3 };
177
static const unsigned char types[] = { 1, 2, 3, 3 };
179
static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
180
union _cpuid4_leaf_ebx *ebx,
181
union _cpuid4_leaf_ecx *ecx)
184
unsigned line_size, lines_per_tag, assoc, size_in_kb;
185
union l1_cache l1i, l1d;
188
union l1_cache *l1 = &l1d;
194
cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
195
cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
204
line_size = l1->line_size;
205
lines_per_tag = l1->lines_per_tag;
206
size_in_kb = l1->size_in_kb;
212
line_size = l2.line_size;
213
lines_per_tag = l2.lines_per_tag;
214
/* cpu_data has errata corrections for K7 applied */
215
size_in_kb = current_cpu_data.x86_cache_size;
221
line_size = l3.line_size;
222
lines_per_tag = l3.lines_per_tag;
223
size_in_kb = l3.size_encoded * 512;
229
eax->split.is_self_initializing = 1;
230
eax->split.type = types[leaf];
231
eax->split.level = levels[leaf];
233
eax->split.num_threads_sharing = current_cpu_data.x86_max_cores - 1;
235
eax->split.num_threads_sharing = 0;
236
eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1;
240
eax->split.is_fully_associative = 1;
241
ebx->split.coherency_line_size = line_size - 1;
242
ebx->split.ways_of_associativity = assocs[assoc] - 1;
243
ebx->split.physical_line_partition = lines_per_tag - 1;
244
ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
245
(ebx->split.ways_of_associativity + 1) - 1;
248
static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
250
union _cpuid4_leaf_eax eax;
251
union _cpuid4_leaf_ebx ebx;
252
union _cpuid4_leaf_ecx ecx;
255
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
256
amd_cpuid4(index, &eax, &ebx, &ecx);
258
cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
259
if (eax.split.type == CACHE_TYPE_NULL)
260
return -EIO; /* better error ? */
262
this_leaf->eax = eax;
263
this_leaf->ebx = ebx;
264
this_leaf->ecx = ecx;
265
this_leaf->size = (ecx.split.number_of_sets + 1) *
266
(ebx.split.coherency_line_size + 1) *
267
(ebx.split.physical_line_partition + 1) *
268
(ebx.split.ways_of_associativity + 1);
272
static int __cpuinit find_num_cache_leaves(void)
274
unsigned int eax, ebx, ecx, edx;
275
union _cpuid4_leaf_eax cache_eax;
280
/* Do cpuid(4) loop to find out num_cache_leaves */
281
cpuid_count(4, i, &eax, &ebx, &ecx, &edx);
282
cache_eax.full = eax;
283
} while (cache_eax.split.type != CACHE_TYPE_NULL);
287
unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
289
unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */
290
unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
291
unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
292
unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
294
if (c->cpuid_level > 3) {
295
static int is_initialized;
297
if (is_initialized == 0) {
298
/* Init num_cache_leaves from boot CPU */
299
num_cache_leaves = find_num_cache_leaves();
304
* Whenever possible use cpuid(4), deterministic cache
305
* parameters cpuid leaf to find the cache details
307
for (i = 0; i < num_cache_leaves; i++) {
308
struct _cpuid4_info this_leaf;
312
retval = cpuid4_cache_lookup(i, &this_leaf);
314
switch(this_leaf.eax.split.level) {
316
if (this_leaf.eax.split.type ==
318
new_l1d = this_leaf.size/1024;
319
else if (this_leaf.eax.split.type ==
321
new_l1i = this_leaf.size/1024;
324
new_l2 = this_leaf.size/1024;
325
num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
326
index_msb = get_count_order(num_threads_sharing);
327
l2_id = c->apicid >> index_msb;
330
new_l3 = this_leaf.size/1024;
331
num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
332
index_msb = get_count_order(num_threads_sharing);
333
l3_id = c->apicid >> index_msb;
342
* Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
345
if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
346
/* supports eax=2 call */
349
unsigned char *dp = (unsigned char *)regs;
352
if (num_cache_leaves != 0 && c->x86 == 15)
355
/* Number of times to iterate */
356
n = cpuid_eax(2) & 0xFF;
358
for ( i = 0 ; i < n ; i++ ) {
359
cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]);
361
/* If bit 31 is set, this is an unknown format */
362
for ( j = 0 ; j < 3 ; j++ ) {
363
if ( regs[j] < 0 ) regs[j] = 0;
366
/* Byte 0 is level count, not a descriptor */
367
for ( j = 1 ; j < 16 ; j++ ) {
368
unsigned char des = dp[j];
371
/* look up this descriptor in the table */
372
while (cache_table[k].descriptor != 0)
374
if (cache_table[k].descriptor == des) {
375
if (only_trace && cache_table[k].cache_type != LVL_TRACE)
377
switch (cache_table[k].cache_type) {
379
l1i += cache_table[k].size;
382
l1d += cache_table[k].size;
385
l2 += cache_table[k].size;
388
l3 += cache_table[k].size;
391
trace += cache_table[k].size;
420
printk("CPU: Trace cache: %dK uops", trace);
422
printk("CPU: L1 I cache: %dK", l1i);
425
printk(", L1 D cache: %dK\n", l1d);
430
printk("CPU: L2 cache: %dK\n", l2);
433
printk("CPU: L3 cache: %dK\n", l3);
436
c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));