~ubuntu-branches/ubuntu/hardy/kvm/hardy-backports

« back to all changes in this revision

Viewing changes to kernel/tmp/mmu.c

  • Committer: Bazaar Package Importer
  • Author(s): Soren Hansen
  • Date: 2008-01-03 10:39:25 UTC
  • mfrom: (1.1.16 upstream)
  • Revision ID: james.westby@ubuntu.com-20080103103925-8480u7sq2646hvbh
Tags: 1:59+dfsg-0ubuntu1
* New upstream release
* Build with alsa support (cherry pick from 57+dfsg-2)

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
/*
2
 
 * Kernel-based Virtual Machine driver for Linux
3
 
 *
4
 
 * This module enables machines with Intel VT-x extensions to run virtual
5
 
 * machines without emulation or binary translation.
6
 
 *
7
 
 * MMU support
8
 
 *
9
 
 * Copyright (C) 2006 Qumranet, Inc.
10
 
 *
11
 
 * Authors:
12
 
 *   Yaniv Kamay  <yaniv@qumranet.com>
13
 
 *   Avi Kivity   <avi@qumranet.com>
14
 
 *
15
 
 * This work is licensed under the terms of the GNU GPL, version 2.  See
16
 
 * the COPYING file in the top-level directory.
17
 
 *
18
 
 */
19
 
 
20
 
#include "vmx.h"
21
 
#include "mmu.h"
22
 
 
23
 
#include <linux/kvm_host.h>
24
 
#include <linux/types.h>
25
 
#include <linux/string.h>
26
 
#include <linux/mm.h>
27
 
#include <linux/highmem.h>
28
 
#include <linux/module.h>
29
 
#include <linux/swap.h>
30
 
 
31
 
#include <asm/page.h>
32
 
#include <asm/cmpxchg.h>
33
 
#include <asm/io.h>
34
 
 
35
 
#undef MMU_DEBUG
36
 
 
37
 
#undef AUDIT
38
 
 
39
 
#ifdef AUDIT
40
 
static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg);
41
 
#else
42
 
static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg) {}
43
 
#endif
44
 
 
45
 
#ifdef MMU_DEBUG
46
 
 
47
 
#define pgprintk(x...) do { if (dbg) printk(x); } while (0)
48
 
#define rmap_printk(x...) do { if (dbg) printk(x); } while (0)
49
 
 
50
 
#else
51
 
 
52
 
#define pgprintk(x...) do { } while (0)
53
 
#define rmap_printk(x...) do { } while (0)
54
 
 
55
 
#endif
56
 
 
57
 
#if defined(MMU_DEBUG) || defined(AUDIT)
58
 
static int dbg = 1;
59
 
#endif
60
 
 
61
 
#ifndef MMU_DEBUG
62
 
#define ASSERT(x) do { } while (0)
63
 
#else
64
 
#define ASSERT(x)                                                       \
65
 
        if (!(x)) {                                                     \
66
 
                printk(KERN_WARNING "assertion failed %s:%d: %s\n",     \
67
 
                       __FILE__, __LINE__, #x);                         \
68
 
        }
69
 
#endif
70
 
 
71
 
#define PT64_PT_BITS 9
72
 
#define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS)
73
 
#define PT32_PT_BITS 10
74
 
#define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS)
75
 
 
76
 
#define PT_WRITABLE_SHIFT 1
77
 
 
78
 
#define PT_PRESENT_MASK (1ULL << 0)
79
 
#define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT)
80
 
#define PT_USER_MASK (1ULL << 2)
81
 
#define PT_PWT_MASK (1ULL << 3)
82
 
#define PT_PCD_MASK (1ULL << 4)
83
 
#define PT_ACCESSED_MASK (1ULL << 5)
84
 
#define PT_DIRTY_MASK (1ULL << 6)
85
 
#define PT_PAGE_SIZE_MASK (1ULL << 7)
86
 
#define PT_PAT_MASK (1ULL << 7)
87
 
#define PT_GLOBAL_MASK (1ULL << 8)
88
 
#define PT64_NX_SHIFT 63
89
 
#define PT64_NX_MASK (1ULL << PT64_NX_SHIFT)
90
 
 
91
 
#define PT_PAT_SHIFT 7
92
 
#define PT_DIR_PAT_SHIFT 12
93
 
#define PT_DIR_PAT_MASK (1ULL << PT_DIR_PAT_SHIFT)
94
 
 
95
 
#define PT32_DIR_PSE36_SIZE 4
96
 
#define PT32_DIR_PSE36_SHIFT 13
97
 
#define PT32_DIR_PSE36_MASK \
98
 
        (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT)
99
 
 
100
 
 
101
 
#define PT_FIRST_AVAIL_BITS_SHIFT 9
102
 
#define PT64_SECOND_AVAIL_BITS_SHIFT 52
103
 
 
104
 
#define PT_SHADOW_IO_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
105
 
 
106
 
#define VALID_PAGE(x) ((x) != INVALID_PAGE)
107
 
 
108
 
#define PT64_LEVEL_BITS 9
109
 
 
110
 
#define PT64_LEVEL_SHIFT(level) \
111
 
                (PAGE_SHIFT + (level - 1) * PT64_LEVEL_BITS)
112
 
 
113
 
#define PT64_LEVEL_MASK(level) \
114
 
                (((1ULL << PT64_LEVEL_BITS) - 1) << PT64_LEVEL_SHIFT(level))
115
 
 
116
 
#define PT64_INDEX(address, level)\
117
 
        (((address) >> PT64_LEVEL_SHIFT(level)) & ((1 << PT64_LEVEL_BITS) - 1))
118
 
 
119
 
 
120
 
#define PT32_LEVEL_BITS 10
121
 
 
122
 
#define PT32_LEVEL_SHIFT(level) \
123
 
                (PAGE_SHIFT + (level - 1) * PT32_LEVEL_BITS)
124
 
 
125
 
#define PT32_LEVEL_MASK(level) \
126
 
                (((1ULL << PT32_LEVEL_BITS) - 1) << PT32_LEVEL_SHIFT(level))
127
 
 
128
 
#define PT32_INDEX(address, level)\
129
 
        (((address) >> PT32_LEVEL_SHIFT(level)) & ((1 << PT32_LEVEL_BITS) - 1))
130
 
 
131
 
 
132
 
#define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1))
133
 
#define PT64_DIR_BASE_ADDR_MASK \
134
 
        (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + PT64_LEVEL_BITS)) - 1))
135
 
 
136
 
#define PT32_BASE_ADDR_MASK PAGE_MASK
137
 
#define PT32_DIR_BASE_ADDR_MASK \
138
 
        (PAGE_MASK & ~((1ULL << (PAGE_SHIFT + PT32_LEVEL_BITS)) - 1))
139
 
 
140
 
#define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \
141
 
                        | PT64_NX_MASK)
142
 
 
143
 
#define PFERR_PRESENT_MASK (1U << 0)
144
 
#define PFERR_WRITE_MASK (1U << 1)
145
 
#define PFERR_USER_MASK (1U << 2)
146
 
#define PFERR_FETCH_MASK (1U << 4)
147
 
 
148
 
#define PT64_ROOT_LEVEL 4
149
 
#define PT32_ROOT_LEVEL 2
150
 
#define PT32E_ROOT_LEVEL 3
151
 
 
152
 
#define PT_DIRECTORY_LEVEL 2
153
 
#define PT_PAGE_TABLE_LEVEL 1
154
 
 
155
 
#define RMAP_EXT 4
156
 
 
157
 
#define ACC_EXEC_MASK    1
158
 
#define ACC_WRITE_MASK   PT_WRITABLE_MASK
159
 
#define ACC_USER_MASK    PT_USER_MASK
160
 
#define ACC_ALL          (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
161
 
 
162
 
struct kvm_rmap_desc {
163
 
        u64 *shadow_ptes[RMAP_EXT];
164
 
        struct kvm_rmap_desc *more;
165
 
};
166
 
 
167
 
static struct kmem_cache *pte_chain_cache;
168
 
static struct kmem_cache *rmap_desc_cache;
169
 
static struct kmem_cache *mmu_page_header_cache;
170
 
 
171
 
static u64 __read_mostly shadow_trap_nonpresent_pte;
172
 
static u64 __read_mostly shadow_notrap_nonpresent_pte;
173
 
 
174
 
void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
175
 
{
176
 
        shadow_trap_nonpresent_pte = trap_pte;
177
 
        shadow_notrap_nonpresent_pte = notrap_pte;
178
 
}
179
 
EXPORT_SYMBOL_GPL(kvm_mmu_set_nonpresent_ptes);
180
 
 
181
 
static int is_write_protection(struct kvm_vcpu *vcpu)
182
 
{
183
 
        return vcpu->arch.cr0 & X86_CR0_WP;
184
 
}
185
 
 
186
 
static int is_cpuid_PSE36(void)
187
 
{
188
 
        return 1;
189
 
}
190
 
 
191
 
static int is_nx(struct kvm_vcpu *vcpu)
192
 
{
193
 
        return vcpu->arch.shadow_efer & EFER_NX;
194
 
}
195
 
 
196
 
static int is_present_pte(unsigned long pte)
197
 
{
198
 
        return pte & PT_PRESENT_MASK;
199
 
}
200
 
 
201
 
static int is_shadow_present_pte(u64 pte)
202
 
{
203
 
        pte &= ~PT_SHADOW_IO_MARK;
204
 
        return pte != shadow_trap_nonpresent_pte
205
 
                && pte != shadow_notrap_nonpresent_pte;
206
 
}
207
 
 
208
 
static int is_writeble_pte(unsigned long pte)
209
 
{
210
 
        return pte & PT_WRITABLE_MASK;
211
 
}
212
 
 
213
 
static int is_dirty_pte(unsigned long pte)
214
 
{
215
 
        return pte & PT_DIRTY_MASK;
216
 
}
217
 
 
218
 
static int is_io_pte(unsigned long pte)
219
 
{
220
 
        return pte & PT_SHADOW_IO_MARK;
221
 
}
222
 
 
223
 
static int is_rmap_pte(u64 pte)
224
 
{
225
 
        return pte != shadow_trap_nonpresent_pte
226
 
                && pte != shadow_notrap_nonpresent_pte;
227
 
}
228
 
 
229
 
static gfn_t pse36_gfn_delta(u32 gpte)
230
 
{
231
 
        int shift = 32 - PT32_DIR_PSE36_SHIFT - PAGE_SHIFT;
232
 
 
233
 
        return (gpte & PT32_DIR_PSE36_MASK) << shift;
234
 
}
235
 
 
236
 
static void set_shadow_pte(u64 *sptep, u64 spte)
237
 
{
238
 
#ifdef CONFIG_X86_64
239
 
        set_64bit((unsigned long *)sptep, spte);
240
 
#else
241
 
        set_64bit((unsigned long long *)sptep, spte);
242
 
#endif
243
 
}
244
 
 
245
 
static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
246
 
                                  struct kmem_cache *base_cache, int min)
247
 
{
248
 
        void *obj;
249
 
 
250
 
        if (cache->nobjs >= min)
251
 
                return 0;
252
 
        while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
253
 
                obj = kmem_cache_zalloc(base_cache, GFP_KERNEL);
254
 
                if (!obj)
255
 
                        return -ENOMEM;
256
 
                cache->objects[cache->nobjs++] = obj;
257
 
        }
258
 
        return 0;
259
 
}
260
 
 
261
 
static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
262
 
{
263
 
        while (mc->nobjs)
264
 
                kfree(mc->objects[--mc->nobjs]);
265
 
}
266
 
 
267
 
static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache,
268
 
                                       int min)
269
 
{
270
 
        struct page *page;
271
 
 
272
 
        if (cache->nobjs >= min)
273
 
                return 0;
274
 
        while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
275
 
                page = alloc_page(GFP_KERNEL);
276
 
                if (!page)
277
 
                        return -ENOMEM;
278
 
                set_page_private(page, 0);
279
 
                cache->objects[cache->nobjs++] = page_address(page);
280
 
        }
281
 
        return 0;
282
 
}
283
 
 
284
 
static void mmu_free_memory_cache_page(struct kvm_mmu_memory_cache *mc)
285
 
{
286
 
        while (mc->nobjs)
287
 
                free_page((unsigned long)mc->objects[--mc->nobjs]);
288
 
}
289
 
 
290
 
static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
291
 
{
292
 
        int r;
293
 
 
294
 
        kvm_mmu_free_some_pages(vcpu);
295
 
        r = mmu_topup_memory_cache(&vcpu->arch.mmu_pte_chain_cache,
296
 
                                   pte_chain_cache, 4);
297
 
        if (r)
298
 
                goto out;
299
 
        r = mmu_topup_memory_cache(&vcpu->arch.mmu_rmap_desc_cache,
300
 
                                   rmap_desc_cache, 1);
301
 
        if (r)
302
 
                goto out;
303
 
        r = mmu_topup_memory_cache_page(&vcpu->arch.mmu_page_cache, 8);
304
 
        if (r)
305
 
                goto out;
306
 
        r = mmu_topup_memory_cache(&vcpu->arch.mmu_page_header_cache,
307
 
                                   mmu_page_header_cache, 4);
308
 
out:
309
 
        return r;
310
 
}
311
 
 
312
 
static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
313
 
{
314
 
        mmu_free_memory_cache(&vcpu->arch.mmu_pte_chain_cache);
315
 
        mmu_free_memory_cache(&vcpu->arch.mmu_rmap_desc_cache);
316
 
        mmu_free_memory_cache_page(&vcpu->arch.mmu_page_cache);
317
 
        mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache);
318
 
}
319
 
 
320
 
static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc,
321
 
                                    size_t size)
322
 
{
323
 
        void *p;
324
 
 
325
 
        BUG_ON(!mc->nobjs);
326
 
        p = mc->objects[--mc->nobjs];
327
 
        memset(p, 0, size);
328
 
        return p;
329
 
}
330
 
 
331
 
static struct kvm_pte_chain *mmu_alloc_pte_chain(struct kvm_vcpu *vcpu)
332
 
{
333
 
        return mmu_memory_cache_alloc(&vcpu->arch.mmu_pte_chain_cache,
334
 
                                      sizeof(struct kvm_pte_chain));
335
 
}
336
 
 
337
 
static void mmu_free_pte_chain(struct kvm_pte_chain *pc)
338
 
{
339
 
        kfree(pc);
340
 
}
341
 
 
342
 
static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu)
343
 
{
344
 
        return mmu_memory_cache_alloc(&vcpu->arch.mmu_rmap_desc_cache,
345
 
                                      sizeof(struct kvm_rmap_desc));
346
 
}
347
 
 
348
 
static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd)
349
 
{
350
 
        kfree(rd);
351
 
}
352
 
 
353
 
/*
354
 
 * Take gfn and return the reverse mapping to it.
355
 
 * Note: gfn must be unaliased before this function get called
356
 
 */
357
 
 
358
 
static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn)
359
 
{
360
 
        struct kvm_memory_slot *slot;
361
 
 
362
 
        slot = gfn_to_memslot(kvm, gfn);
363
 
        return &slot->rmap[gfn - slot->base_gfn];
364
 
}
365
 
 
366
 
/*
367
 
 * Reverse mapping data structures:
368
 
 *
369
 
 * If rmapp bit zero is zero, then rmapp point to the shadw page table entry
370
 
 * that points to page_address(page).
371
 
 *
372
 
 * If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc
373
 
 * containing more mappings.
374
 
 */
375
 
static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
376
 
{
377
 
        struct kvm_mmu_page *sp;
378
 
        struct kvm_rmap_desc *desc;
379
 
        unsigned long *rmapp;
380
 
        int i;
381
 
 
382
 
        if (!is_rmap_pte(*spte))
383
 
                return;
384
 
        gfn = unalias_gfn(vcpu->kvm, gfn);
385
 
        sp = page_header(__pa(spte));
386
 
        sp->gfns[spte - sp->spt] = gfn;
387
 
        rmapp = gfn_to_rmap(vcpu->kvm, gfn);
388
 
        if (!*rmapp) {
389
 
                rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte);
390
 
                *rmapp = (unsigned long)spte;
391
 
        } else if (!(*rmapp & 1)) {
392
 
                rmap_printk("rmap_add: %p %llx 1->many\n", spte, *spte);
393
 
                desc = mmu_alloc_rmap_desc(vcpu);
394
 
                desc->shadow_ptes[0] = (u64 *)*rmapp;
395
 
                desc->shadow_ptes[1] = spte;
396
 
                *rmapp = (unsigned long)desc | 1;
397
 
        } else {
398
 
                rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte);
399
 
                desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
400
 
                while (desc->shadow_ptes[RMAP_EXT-1] && desc->more)
401
 
                        desc = desc->more;
402
 
                if (desc->shadow_ptes[RMAP_EXT-1]) {
403
 
                        desc->more = mmu_alloc_rmap_desc(vcpu);
404
 
                        desc = desc->more;
405
 
                }
406
 
                for (i = 0; desc->shadow_ptes[i]; ++i)
407
 
                        ;
408
 
                desc->shadow_ptes[i] = spte;
409
 
        }
410
 
}
411
 
 
412
 
static void rmap_desc_remove_entry(unsigned long *rmapp,
413
 
                                   struct kvm_rmap_desc *desc,
414
 
                                   int i,
415
 
                                   struct kvm_rmap_desc *prev_desc)
416
 
{
417
 
        int j;
418
 
 
419
 
        for (j = RMAP_EXT - 1; !desc->shadow_ptes[j] && j > i; --j)
420
 
                ;
421
 
        desc->shadow_ptes[i] = desc->shadow_ptes[j];
422
 
        desc->shadow_ptes[j] = NULL;
423
 
        if (j != 0)
424
 
                return;
425
 
        if (!prev_desc && !desc->more)
426
 
                *rmapp = (unsigned long)desc->shadow_ptes[0];
427
 
        else
428
 
                if (prev_desc)
429
 
                        prev_desc->more = desc->more;
430
 
                else
431
 
                        *rmapp = (unsigned long)desc->more | 1;
432
 
        mmu_free_rmap_desc(desc);
433
 
}
434
 
 
435
 
static void rmap_remove(struct kvm *kvm, u64 *spte)
436
 
{
437
 
        struct kvm_rmap_desc *desc;
438
 
        struct kvm_rmap_desc *prev_desc;
439
 
        struct kvm_mmu_page *sp;
440
 
        struct page *page;
441
 
        unsigned long *rmapp;
442
 
        int i;
443
 
 
444
 
        if (!is_rmap_pte(*spte))
445
 
                return;
446
 
        sp = page_header(__pa(spte));
447
 
        page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
448
 
        mark_page_accessed(page);
449
 
        if (is_writeble_pte(*spte))
450
 
                kvm_release_page_dirty(page);
451
 
        else
452
 
                kvm_release_page_clean(page);
453
 
        rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt]);
454
 
        if (!*rmapp) {
455
 
                printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
456
 
                BUG();
457
 
        } else if (!(*rmapp & 1)) {
458
 
                rmap_printk("rmap_remove:  %p %llx 1->0\n", spte, *spte);
459
 
                if ((u64 *)*rmapp != spte) {
460
 
                        printk(KERN_ERR "rmap_remove:  %p %llx 1->BUG\n",
461
 
                               spte, *spte);
462
 
                        BUG();
463
 
                }
464
 
                *rmapp = 0;
465
 
        } else {
466
 
                rmap_printk("rmap_remove:  %p %llx many->many\n", spte, *spte);
467
 
                desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
468
 
                prev_desc = NULL;
469
 
                while (desc) {
470
 
                        for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i)
471
 
                                if (desc->shadow_ptes[i] == spte) {
472
 
                                        rmap_desc_remove_entry(rmapp,
473
 
                                                               desc, i,
474
 
                                                               prev_desc);
475
 
                                        return;
476
 
                                }
477
 
                        prev_desc = desc;
478
 
                        desc = desc->more;
479
 
                }
480
 
                BUG();
481
 
        }
482
 
}
483
 
 
484
 
static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte)
485
 
{
486
 
        struct kvm_rmap_desc *desc;
487
 
        struct kvm_rmap_desc *prev_desc;
488
 
        u64 *prev_spte;
489
 
        int i;
490
 
 
491
 
        if (!*rmapp)
492
 
                return NULL;
493
 
        else if (!(*rmapp & 1)) {
494
 
                if (!spte)
495
 
                        return (u64 *)*rmapp;
496
 
                return NULL;
497
 
        }
498
 
        desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
499
 
        prev_desc = NULL;
500
 
        prev_spte = NULL;
501
 
        while (desc) {
502
 
                for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i) {
503
 
                        if (prev_spte == spte)
504
 
                                return desc->shadow_ptes[i];
505
 
                        prev_spte = desc->shadow_ptes[i];
506
 
                }
507
 
                desc = desc->more;
508
 
        }
509
 
        return NULL;
510
 
}
511
 
 
512
 
static void rmap_write_protect(struct kvm *kvm, u64 gfn)
513
 
{
514
 
        unsigned long *rmapp;
515
 
        u64 *spte;
516
 
        int write_protected = 0;
517
 
 
518
 
        gfn = unalias_gfn(kvm, gfn);
519
 
        rmapp = gfn_to_rmap(kvm, gfn);
520
 
 
521
 
        spte = rmap_next(kvm, rmapp, NULL);
522
 
        while (spte) {
523
 
                BUG_ON(!spte);
524
 
                BUG_ON(!(*spte & PT_PRESENT_MASK));
525
 
                rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
526
 
                if (is_writeble_pte(*spte)) {
527
 
                        set_shadow_pte(spte, *spte & ~PT_WRITABLE_MASK);
528
 
                        write_protected = 1;
529
 
                }
530
 
                spte = rmap_next(kvm, rmapp, spte);
531
 
        }
532
 
        if (write_protected)
533
 
                kvm_flush_remote_tlbs(kvm);
534
 
}
535
 
 
536
 
#ifdef MMU_DEBUG
537
 
static int is_empty_shadow_page(u64 *spt)
538
 
{
539
 
        u64 *pos;
540
 
        u64 *end;
541
 
 
542
 
        for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++)
543
 
                if ((*pos & ~PT_SHADOW_IO_MARK) != shadow_trap_nonpresent_pte) {
544
 
                        printk(KERN_ERR "%s: %p %llx\n", __FUNCTION__,
545
 
                               pos, *pos);
546
 
                        return 0;
547
 
                }
548
 
        return 1;
549
 
}
550
 
#endif
551
 
 
552
 
static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp)
553
 
{
554
 
        ASSERT(is_empty_shadow_page(sp->spt));
555
 
        list_del(&sp->link);
556
 
        __free_page(virt_to_page(sp->spt));
557
 
        __free_page(virt_to_page(sp->gfns));
558
 
        kfree(sp);
559
 
        ++kvm->arch.n_free_mmu_pages;
560
 
}
561
 
 
562
 
static unsigned kvm_page_table_hashfn(gfn_t gfn)
563
 
{
564
 
        return gfn;
565
 
}
566
 
 
567
 
static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
568
 
                                               u64 *parent_pte)
569
 
{
570
 
        struct kvm_mmu_page *sp;
571
 
 
572
 
        if (!vcpu->kvm->arch.n_free_mmu_pages)
573
 
                return NULL;
574
 
 
575
 
        sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache, sizeof *sp);
576
 
        sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE);
577
 
        sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE);
578
 
        set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
579
 
        list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
580
 
        ASSERT(is_empty_shadow_page(sp->spt));
581
 
        sp->slot_bitmap = 0;
582
 
        sp->multimapped = 0;
583
 
        sp->parent_pte = parent_pte;
584
 
        --vcpu->kvm->arch.n_free_mmu_pages;
585
 
        return sp;
586
 
}
587
 
 
588
 
static void mmu_page_add_parent_pte(struct kvm_vcpu *vcpu,
589
 
                                    struct kvm_mmu_page *sp, u64 *parent_pte)
590
 
{
591
 
        struct kvm_pte_chain *pte_chain;
592
 
        struct hlist_node *node;
593
 
        int i;
594
 
 
595
 
        if (!parent_pte)
596
 
                return;
597
 
        if (!sp->multimapped) {
598
 
                u64 *old = sp->parent_pte;
599
 
 
600
 
                if (!old) {
601
 
                        sp->parent_pte = parent_pte;
602
 
                        return;
603
 
                }
604
 
                sp->multimapped = 1;
605
 
                pte_chain = mmu_alloc_pte_chain(vcpu);
606
 
                INIT_HLIST_HEAD(&sp->parent_ptes);
607
 
                hlist_add_head(&pte_chain->link, &sp->parent_ptes);
608
 
                pte_chain->parent_ptes[0] = old;
609
 
        }
610
 
        hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) {
611
 
                if (pte_chain->parent_ptes[NR_PTE_CHAIN_ENTRIES-1])
612
 
                        continue;
613
 
                for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i)
614
 
                        if (!pte_chain->parent_ptes[i]) {
615
 
                                pte_chain->parent_ptes[i] = parent_pte;
616
 
                                return;
617
 
                        }
618
 
        }
619
 
        pte_chain = mmu_alloc_pte_chain(vcpu);
620
 
        BUG_ON(!pte_chain);
621
 
        hlist_add_head(&pte_chain->link, &sp->parent_ptes);
622
 
        pte_chain->parent_ptes[0] = parent_pte;
623
 
}
624
 
 
625
 
static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp,
626
 
                                       u64 *parent_pte)
627
 
{
628
 
        struct kvm_pte_chain *pte_chain;
629
 
        struct hlist_node *node;
630
 
        int i;
631
 
 
632
 
        if (!sp->multimapped) {
633
 
                BUG_ON(sp->parent_pte != parent_pte);
634
 
                sp->parent_pte = NULL;
635
 
                return;
636
 
        }
637
 
        hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link)
638
 
                for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) {
639
 
                        if (!pte_chain->parent_ptes[i])
640
 
                                break;
641
 
                        if (pte_chain->parent_ptes[i] != parent_pte)
642
 
                                continue;
643
 
                        while (i + 1 < NR_PTE_CHAIN_ENTRIES
644
 
                                && pte_chain->parent_ptes[i + 1]) {
645
 
                                pte_chain->parent_ptes[i]
646
 
                                        = pte_chain->parent_ptes[i + 1];
647
 
                                ++i;
648
 
                        }
649
 
                        pte_chain->parent_ptes[i] = NULL;
650
 
                        if (i == 0) {
651
 
                                hlist_del(&pte_chain->link);
652
 
                                mmu_free_pte_chain(pte_chain);
653
 
                                if (hlist_empty(&sp->parent_ptes)) {
654
 
                                        sp->multimapped = 0;
655
 
                                        sp->parent_pte = NULL;
656
 
                                }
657
 
                        }
658
 
                        return;
659
 
                }
660
 
        BUG();
661
 
}
662
 
 
663
 
static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
664
 
{
665
 
        unsigned index;
666
 
        struct hlist_head *bucket;
667
 
        struct kvm_mmu_page *sp;
668
 
        struct hlist_node *node;
669
 
 
670
 
        pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn);
671
 
        index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
672
 
        bucket = &kvm->arch.mmu_page_hash[index];
673
 
        hlist_for_each_entry(sp, node, bucket, hash_link)
674
 
                if (sp->gfn == gfn && !sp->role.metaphysical) {
675
 
                        pgprintk("%s: found role %x\n",
676
 
                                 __FUNCTION__, sp->role.word);
677
 
                        return sp;
678
 
                }
679
 
        return NULL;
680
 
}
681
 
 
682
 
static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
683
 
                                             gfn_t gfn,
684
 
                                             gva_t gaddr,
685
 
                                             unsigned level,
686
 
                                             int metaphysical,
687
 
                                             unsigned access,
688
 
                                             u64 *parent_pte,
689
 
                                             bool *new_page)
690
 
{
691
 
        union kvm_mmu_page_role role;
692
 
        unsigned index;
693
 
        unsigned quadrant;
694
 
        struct hlist_head *bucket;
695
 
        struct kvm_mmu_page *sp;
696
 
        struct hlist_node *node;
697
 
 
698
 
        role.word = 0;
699
 
        role.glevels = vcpu->arch.mmu.root_level;
700
 
        role.level = level;
701
 
        role.metaphysical = metaphysical;
702
 
        role.access = access;
703
 
        if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) {
704
 
                quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
705
 
                quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
706
 
                role.quadrant = quadrant;
707
 
        }
708
 
        pgprintk("%s: looking gfn %lx role %x\n", __FUNCTION__,
709
 
                 gfn, role.word);
710
 
        index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
711
 
        bucket = &vcpu->kvm->arch.mmu_page_hash[index];
712
 
        hlist_for_each_entry(sp, node, bucket, hash_link)
713
 
                if (sp->gfn == gfn && sp->role.word == role.word) {
714
 
                        mmu_page_add_parent_pte(vcpu, sp, parent_pte);
715
 
                        pgprintk("%s: found\n", __FUNCTION__);
716
 
                        return sp;
717
 
                }
718
 
        sp = kvm_mmu_alloc_page(vcpu, parent_pte);
719
 
        if (!sp)
720
 
                return sp;
721
 
        pgprintk("%s: adding gfn %lx role %x\n", __FUNCTION__, gfn, role.word);
722
 
        sp->gfn = gfn;
723
 
        sp->role = role;
724
 
        hlist_add_head(&sp->hash_link, bucket);
725
 
        vcpu->arch.mmu.prefetch_page(vcpu, sp);
726
 
        if (!metaphysical)
727
 
                rmap_write_protect(vcpu->kvm, gfn);
728
 
        if (new_page)
729
 
                *new_page = 1;
730
 
        return sp;
731
 
}
732
 
 
733
 
static void kvm_mmu_page_unlink_children(struct kvm *kvm,
734
 
                                         struct kvm_mmu_page *sp)
735
 
{
736
 
        unsigned i;
737
 
        u64 *pt;
738
 
        u64 ent;
739
 
 
740
 
        pt = sp->spt;
741
 
 
742
 
        if (sp->role.level == PT_PAGE_TABLE_LEVEL) {
743
 
                for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
744
 
                        if (is_shadow_present_pte(pt[i]))
745
 
                                rmap_remove(kvm, &pt[i]);
746
 
                        pt[i] = shadow_trap_nonpresent_pte;
747
 
                }
748
 
                kvm_flush_remote_tlbs(kvm);
749
 
                return;
750
 
        }
751
 
 
752
 
        for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
753
 
                ent = pt[i];
754
 
 
755
 
                pt[i] = shadow_trap_nonpresent_pte;
756
 
                if (!is_shadow_present_pte(ent))
757
 
                        continue;
758
 
                ent &= PT64_BASE_ADDR_MASK;
759
 
                mmu_page_remove_parent_pte(page_header(ent), &pt[i]);
760
 
        }
761
 
        kvm_flush_remote_tlbs(kvm);
762
 
}
763
 
 
764
 
static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte)
765
 
{
766
 
        mmu_page_remove_parent_pte(sp, parent_pte);
767
 
}
768
 
 
769
 
static void kvm_mmu_reset_last_pte_updated(struct kvm *kvm)
770
 
{
771
 
        int i;
772
 
 
773
 
        for (i = 0; i < KVM_MAX_VCPUS; ++i)
774
 
                if (kvm->vcpus[i])
775
 
                        kvm->vcpus[i]->arch.last_pte_updated = NULL;
776
 
}
777
 
 
778
 
static void kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
779
 
{
780
 
        u64 *parent_pte;
781
 
 
782
 
        ++kvm->stat.mmu_shadow_zapped;
783
 
        while (sp->multimapped || sp->parent_pte) {
784
 
                if (!sp->multimapped)
785
 
                        parent_pte = sp->parent_pte;
786
 
                else {
787
 
                        struct kvm_pte_chain *chain;
788
 
 
789
 
                        chain = container_of(sp->parent_ptes.first,
790
 
                                             struct kvm_pte_chain, link);
791
 
                        parent_pte = chain->parent_ptes[0];
792
 
                }
793
 
                BUG_ON(!parent_pte);
794
 
                kvm_mmu_put_page(sp, parent_pte);
795
 
                set_shadow_pte(parent_pte, shadow_trap_nonpresent_pte);
796
 
        }
797
 
        kvm_mmu_page_unlink_children(kvm, sp);
798
 
        if (!sp->root_count) {
799
 
                hlist_del(&sp->hash_link);
800
 
                kvm_mmu_free_page(kvm, sp);
801
 
        } else
802
 
                list_move(&sp->link, &kvm->arch.active_mmu_pages);
803
 
        kvm_mmu_reset_last_pte_updated(kvm);
804
 
}
805
 
 
806
 
/*
807
 
 * Changing the number of mmu pages allocated to the vm
808
 
 * Note: if kvm_nr_mmu_pages is too small, you will get dead lock
809
 
 */
810
 
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages)
811
 
{
812
 
        /*
813
 
         * If we set the number of mmu pages to be smaller be than the
814
 
         * number of actived pages , we must to free some mmu pages before we
815
 
         * change the value
816
 
         */
817
 
 
818
 
        if ((kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages) >
819
 
            kvm_nr_mmu_pages) {
820
 
                int n_used_mmu_pages = kvm->arch.n_alloc_mmu_pages
821
 
                                       - kvm->arch.n_free_mmu_pages;
822
 
 
823
 
                while (n_used_mmu_pages > kvm_nr_mmu_pages) {
824
 
                        struct kvm_mmu_page *page;
825
 
 
826
 
                        page = container_of(kvm->arch.active_mmu_pages.prev,
827
 
                                            struct kvm_mmu_page, link);
828
 
                        kvm_mmu_zap_page(kvm, page);
829
 
                        n_used_mmu_pages--;
830
 
                }
831
 
                kvm->arch.n_free_mmu_pages = 0;
832
 
        }
833
 
        else
834
 
                kvm->arch.n_free_mmu_pages += kvm_nr_mmu_pages
835
 
                                         - kvm->arch.n_alloc_mmu_pages;
836
 
 
837
 
        kvm->arch.n_alloc_mmu_pages = kvm_nr_mmu_pages;
838
 
}
839
 
 
840
 
static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
841
 
{
842
 
        unsigned index;
843
 
        struct hlist_head *bucket;
844
 
        struct kvm_mmu_page *sp;
845
 
        struct hlist_node *node, *n;
846
 
        int r;
847
 
 
848
 
        pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn);
849
 
        r = 0;
850
 
        index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
851
 
        bucket = &kvm->arch.mmu_page_hash[index];
852
 
        hlist_for_each_entry_safe(sp, node, n, bucket, hash_link)
853
 
                if (sp->gfn == gfn && !sp->role.metaphysical) {
854
 
                        pgprintk("%s: gfn %lx role %x\n", __FUNCTION__, gfn,
855
 
                                 sp->role.word);
856
 
                        kvm_mmu_zap_page(kvm, sp);
857
 
                        r = 1;
858
 
                }
859
 
        return r;
860
 
}
861
 
 
862
 
static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
863
 
{
864
 
        struct kvm_mmu_page *sp;
865
 
 
866
 
        while ((sp = kvm_mmu_lookup_page(kvm, gfn)) != NULL) {
867
 
                pgprintk("%s: zap %lx %x\n", __FUNCTION__, gfn, sp->role.word);
868
 
                kvm_mmu_zap_page(kvm, sp);
869
 
        }
870
 
}
871
 
 
872
 
static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn)
873
 
{
874
 
        int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn));
875
 
        struct kvm_mmu_page *sp = page_header(__pa(pte));
876
 
 
877
 
        __set_bit(slot, &sp->slot_bitmap);
878
 
}
879
 
 
880
 
struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
881
 
{
882
 
        gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
883
 
 
884
 
        if (gpa == UNMAPPED_GVA)
885
 
                return NULL;
886
 
        return gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
887
 
}
888
 
 
889
 
static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
890
 
                         unsigned pt_access, unsigned pte_access,
891
 
                         int user_fault, int write_fault, int dirty,
892
 
                         int *ptwrite, gfn_t gfn)
893
 
{
894
 
        u64 spte;
895
 
        int was_rmapped = is_rmap_pte(*shadow_pte);
896
 
        struct page *page;
897
 
 
898
 
        pgprintk("%s: spte %llx access %x write_fault %d"
899
 
                 " user_fault %d gfn %lx\n",
900
 
                 __FUNCTION__, *shadow_pte, pt_access,
901
 
                 write_fault, user_fault, gfn);
902
 
 
903
 
        /*
904
 
         * We don't set the accessed bit, since we sometimes want to see
905
 
         * whether the guest actually used the pte (in order to detect
906
 
         * demand paging).
907
 
         */
908
 
        spte = PT_PRESENT_MASK | PT_DIRTY_MASK;
909
 
        if (!dirty)
910
 
                pte_access &= ~ACC_WRITE_MASK;
911
 
        if (!(pte_access & ACC_EXEC_MASK))
912
 
                spte |= PT64_NX_MASK;
913
 
 
914
 
        page = gfn_to_page(vcpu->kvm, gfn);
915
 
 
916
 
        spte |= PT_PRESENT_MASK;
917
 
        if (pte_access & ACC_USER_MASK)
918
 
                spte |= PT_USER_MASK;
919
 
 
920
 
        if (is_error_page(page)) {
921
 
                set_shadow_pte(shadow_pte,
922
 
                               shadow_trap_nonpresent_pte | PT_SHADOW_IO_MARK);
923
 
                kvm_release_page_clean(page);
924
 
                return;
925
 
        }
926
 
 
927
 
        spte |= page_to_phys(page);
928
 
 
929
 
        if ((pte_access & ACC_WRITE_MASK)
930
 
            || (write_fault && !is_write_protection(vcpu) && !user_fault)) {
931
 
                struct kvm_mmu_page *shadow;
932
 
 
933
 
                spte |= PT_WRITABLE_MASK;
934
 
                if (user_fault) {
935
 
                        mmu_unshadow(vcpu->kvm, gfn);
936
 
                        goto unshadowed;
937
 
                }
938
 
 
939
 
                shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn);
940
 
                if (shadow) {
941
 
                        pgprintk("%s: found shadow page for %lx, marking ro\n",
942
 
                                 __FUNCTION__, gfn);
943
 
                        pte_access &= ~ACC_WRITE_MASK;
944
 
                        if (is_writeble_pte(spte)) {
945
 
                                spte &= ~PT_WRITABLE_MASK;
946
 
                                kvm_x86_ops->tlb_flush(vcpu);
947
 
                        }
948
 
                        if (write_fault)
949
 
                                *ptwrite = 1;
950
 
                }
951
 
        }
952
 
 
953
 
unshadowed:
954
 
 
955
 
        if (pte_access & ACC_WRITE_MASK)
956
 
                mark_page_dirty(vcpu->kvm, gfn);
957
 
 
958
 
        pgprintk("%s: setting spte %llx\n", __FUNCTION__, spte);
959
 
        set_shadow_pte(shadow_pte, spte);
960
 
        page_header_update_slot(vcpu->kvm, shadow_pte, gfn);
961
 
        if (!was_rmapped) {
962
 
                rmap_add(vcpu, shadow_pte, gfn);
963
 
                if (!is_rmap_pte(*shadow_pte))
964
 
                        kvm_release_page_clean(page);
965
 
        }
966
 
        else
967
 
                kvm_release_page_clean(page);
968
 
        if (!ptwrite || !*ptwrite)
969
 
                vcpu->arch.last_pte_updated = shadow_pte;
970
 
}
971
 
 
972
 
static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
973
 
{
974
 
}
975
 
 
976
 
static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
977
 
{
978
 
        int level = PT32E_ROOT_LEVEL;
979
 
        hpa_t table_addr = vcpu->arch.mmu.root_hpa;
980
 
        int pt_write = 0;
981
 
 
982
 
        for (; ; level--) {
983
 
                u32 index = PT64_INDEX(v, level);
984
 
                u64 *table;
985
 
 
986
 
                ASSERT(VALID_PAGE(table_addr));
987
 
                table = __va(table_addr);
988
 
 
989
 
                if (level == 1) {
990
 
                        mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL,
991
 
                                     0, write, 1, &pt_write, gfn);
992
 
                        return pt_write || is_io_pte(table[index]);
993
 
                }
994
 
 
995
 
                if (table[index] == shadow_trap_nonpresent_pte) {
996
 
                        struct kvm_mmu_page *new_table;
997
 
                        gfn_t pseudo_gfn;
998
 
 
999
 
                        pseudo_gfn = (v & PT64_DIR_BASE_ADDR_MASK)
1000
 
                                >> PAGE_SHIFT;
1001
 
                        new_table = kvm_mmu_get_page(vcpu, pseudo_gfn,
1002
 
                                                     v, level - 1,
1003
 
                                                     1, ACC_ALL, &table[index],
1004
 
                                                     NULL);
1005
 
                        if (!new_table) {
1006
 
                                pgprintk("nonpaging_map: ENOMEM\n");
1007
 
                                return -ENOMEM;
1008
 
                        }
1009
 
 
1010
 
                        table[index] = __pa(new_table->spt) | PT_PRESENT_MASK
1011
 
                                | PT_WRITABLE_MASK | PT_USER_MASK;
1012
 
                }
1013
 
                table_addr = table[index] & PT64_BASE_ADDR_MASK;
1014
 
        }
1015
 
}
1016
 
 
1017
 
static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu,
1018
 
                                    struct kvm_mmu_page *sp)
1019
 
{
1020
 
        int i;
1021
 
 
1022
 
        for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
1023
 
                sp->spt[i] = shadow_trap_nonpresent_pte;
1024
 
}
1025
 
 
1026
 
static void mmu_free_roots(struct kvm_vcpu *vcpu)
1027
 
{
1028
 
        int i;
1029
 
        struct kvm_mmu_page *sp;
1030
 
 
1031
 
        if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
1032
 
                return;
1033
 
#ifdef CONFIG_X86_64
1034
 
        if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
1035
 
                hpa_t root = vcpu->arch.mmu.root_hpa;
1036
 
 
1037
 
                sp = page_header(root);
1038
 
                --sp->root_count;
1039
 
                vcpu->arch.mmu.root_hpa = INVALID_PAGE;
1040
 
                return;
1041
 
        }
1042
 
#endif
1043
 
        for (i = 0; i < 4; ++i) {
1044
 
                hpa_t root = vcpu->arch.mmu.pae_root[i];
1045
 
 
1046
 
                if (root) {
1047
 
                        root &= PT64_BASE_ADDR_MASK;
1048
 
                        sp = page_header(root);
1049
 
                        --sp->root_count;
1050
 
                }
1051
 
                vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;
1052
 
        }
1053
 
        vcpu->arch.mmu.root_hpa = INVALID_PAGE;
1054
 
}
1055
 
 
1056
 
static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
1057
 
{
1058
 
        int i;
1059
 
        gfn_t root_gfn;
1060
 
        struct kvm_mmu_page *sp;
1061
 
 
1062
 
        root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT;
1063
 
 
1064
 
#ifdef CONFIG_X86_64
1065
 
        if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
1066
 
                hpa_t root = vcpu->arch.mmu.root_hpa;
1067
 
 
1068
 
                ASSERT(!VALID_PAGE(root));
1069
 
                sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
1070
 
                                      PT64_ROOT_LEVEL, 0, ACC_ALL, NULL, NULL);
1071
 
                root = __pa(sp->spt);
1072
 
                ++sp->root_count;
1073
 
                vcpu->arch.mmu.root_hpa = root;
1074
 
                return;
1075
 
        }
1076
 
#endif
1077
 
        for (i = 0; i < 4; ++i) {
1078
 
                hpa_t root = vcpu->arch.mmu.pae_root[i];
1079
 
 
1080
 
                ASSERT(!VALID_PAGE(root));
1081
 
                if (vcpu->arch.mmu.root_level == PT32E_ROOT_LEVEL) {
1082
 
                        if (!is_present_pte(vcpu->arch.pdptrs[i])) {
1083
 
                                vcpu->arch.mmu.pae_root[i] = 0;
1084
 
                                continue;
1085
 
                        }
1086
 
                        root_gfn = vcpu->arch.pdptrs[i] >> PAGE_SHIFT;
1087
 
                } else if (vcpu->arch.mmu.root_level == 0)
1088
 
                        root_gfn = 0;
1089
 
                sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
1090
 
                                      PT32_ROOT_LEVEL, !is_paging(vcpu),
1091
 
                                      ACC_ALL, NULL, NULL);
1092
 
                root = __pa(sp->spt);
1093
 
                ++sp->root_count;
1094
 
                vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK;
1095
 
        }
1096
 
        vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root);
1097
 
}
1098
 
 
1099
 
static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
1100
 
{
1101
 
        return vaddr;
1102
 
}
1103
 
 
1104
 
static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
1105
 
                                u32 error_code)
1106
 
{
1107
 
        gfn_t gfn;
1108
 
        int r;
1109
 
 
1110
 
        pgprintk("%s: gva %lx error %x\n", __FUNCTION__, gva, error_code);
1111
 
        r = mmu_topup_memory_caches(vcpu);
1112
 
        if (r)
1113
 
                return r;
1114
 
 
1115
 
        ASSERT(vcpu);
1116
 
        ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
1117
 
 
1118
 
        gfn = gva >> PAGE_SHIFT;
1119
 
 
1120
 
        return nonpaging_map(vcpu, gva & PAGE_MASK,
1121
 
                             error_code & PFERR_WRITE_MASK, gfn);
1122
 
}
1123
 
 
1124
 
static void nonpaging_free(struct kvm_vcpu *vcpu)
1125
 
{
1126
 
        mmu_free_roots(vcpu);
1127
 
}
1128
 
 
1129
 
static int nonpaging_init_context(struct kvm_vcpu *vcpu)
1130
 
{
1131
 
        struct kvm_mmu *context = &vcpu->arch.mmu;
1132
 
 
1133
 
        context->new_cr3 = nonpaging_new_cr3;
1134
 
        context->page_fault = nonpaging_page_fault;
1135
 
        context->gva_to_gpa = nonpaging_gva_to_gpa;
1136
 
        context->free = nonpaging_free;
1137
 
        context->prefetch_page = nonpaging_prefetch_page;
1138
 
        context->root_level = 0;
1139
 
        context->shadow_root_level = PT32E_ROOT_LEVEL;
1140
 
        context->root_hpa = INVALID_PAGE;
1141
 
        return 0;
1142
 
}
1143
 
 
1144
 
void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
1145
 
{
1146
 
        ++vcpu->stat.tlb_flush;
1147
 
        kvm_x86_ops->tlb_flush(vcpu);
1148
 
}
1149
 
 
1150
 
static void paging_new_cr3(struct kvm_vcpu *vcpu)
1151
 
{
1152
 
        pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->cr3);
1153
 
        mmu_free_roots(vcpu);
1154
 
}
1155
 
 
1156
 
static void inject_page_fault(struct kvm_vcpu *vcpu,
1157
 
                              u64 addr,
1158
 
                              u32 err_code)
1159
 
{
1160
 
        kvm_inject_page_fault(vcpu, addr, err_code);
1161
 
}
1162
 
 
1163
 
static void paging_free(struct kvm_vcpu *vcpu)
1164
 
{
1165
 
        nonpaging_free(vcpu);
1166
 
}
1167
 
 
1168
 
#define PTTYPE 64
1169
 
#include "paging_tmpl.h"
1170
 
#undef PTTYPE
1171
 
 
1172
 
#define PTTYPE 32
1173
 
#include "paging_tmpl.h"
1174
 
#undef PTTYPE
1175
 
 
1176
 
static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level)
1177
 
{
1178
 
        struct kvm_mmu *context = &vcpu->arch.mmu;
1179
 
 
1180
 
        ASSERT(is_pae(vcpu));
1181
 
        context->new_cr3 = paging_new_cr3;
1182
 
        context->page_fault = paging64_page_fault;
1183
 
        context->gva_to_gpa = paging64_gva_to_gpa;
1184
 
        context->prefetch_page = paging64_prefetch_page;
1185
 
        context->free = paging_free;
1186
 
        context->root_level = level;
1187
 
        context->shadow_root_level = level;
1188
 
        context->root_hpa = INVALID_PAGE;
1189
 
        return 0;
1190
 
}
1191
 
 
1192
 
static int paging64_init_context(struct kvm_vcpu *vcpu)
1193
 
{
1194
 
        return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL);
1195
 
}
1196
 
 
1197
 
static int paging32_init_context(struct kvm_vcpu *vcpu)
1198
 
{
1199
 
        struct kvm_mmu *context = &vcpu->arch.mmu;
1200
 
 
1201
 
        context->new_cr3 = paging_new_cr3;
1202
 
        context->page_fault = paging32_page_fault;
1203
 
        context->gva_to_gpa = paging32_gva_to_gpa;
1204
 
        context->free = paging_free;
1205
 
        context->prefetch_page = paging32_prefetch_page;
1206
 
        context->root_level = PT32_ROOT_LEVEL;
1207
 
        context->shadow_root_level = PT32E_ROOT_LEVEL;
1208
 
        context->root_hpa = INVALID_PAGE;
1209
 
        return 0;
1210
 
}
1211
 
 
1212
 
static int paging32E_init_context(struct kvm_vcpu *vcpu)
1213
 
{
1214
 
        return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL);
1215
 
}
1216
 
 
1217
 
static int init_kvm_mmu(struct kvm_vcpu *vcpu)
1218
 
{
1219
 
        ASSERT(vcpu);
1220
 
        ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
1221
 
 
1222
 
        if (!is_paging(vcpu))
1223
 
                return nonpaging_init_context(vcpu);
1224
 
        else if (is_long_mode(vcpu))
1225
 
                return paging64_init_context(vcpu);
1226
 
        else if (is_pae(vcpu))
1227
 
                return paging32E_init_context(vcpu);
1228
 
        else
1229
 
                return paging32_init_context(vcpu);
1230
 
}
1231
 
 
1232
 
static void destroy_kvm_mmu(struct kvm_vcpu *vcpu)
1233
 
{
1234
 
        ASSERT(vcpu);
1235
 
        if (VALID_PAGE(vcpu->arch.mmu.root_hpa)) {
1236
 
                vcpu->arch.mmu.free(vcpu);
1237
 
                vcpu->arch.mmu.root_hpa = INVALID_PAGE;
1238
 
        }
1239
 
}
1240
 
 
1241
 
int kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
1242
 
{
1243
 
        destroy_kvm_mmu(vcpu);
1244
 
        return init_kvm_mmu(vcpu);
1245
 
}
1246
 
EXPORT_SYMBOL_GPL(kvm_mmu_reset_context);
1247
 
 
1248
 
int kvm_mmu_load(struct kvm_vcpu *vcpu)
1249
 
{
1250
 
        int r;
1251
 
 
1252
 
        mutex_lock(&vcpu->kvm->lock);
1253
 
        r = mmu_topup_memory_caches(vcpu);
1254
 
        if (r)
1255
 
                goto out;
1256
 
        mmu_alloc_roots(vcpu);
1257
 
        kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa);
1258
 
        kvm_mmu_flush_tlb(vcpu);
1259
 
out:
1260
 
        mutex_unlock(&vcpu->kvm->lock);
1261
 
        return r;
1262
 
}
1263
 
EXPORT_SYMBOL_GPL(kvm_mmu_load);
1264
 
 
1265
 
void kvm_mmu_unload(struct kvm_vcpu *vcpu)
1266
 
{
1267
 
        mmu_free_roots(vcpu);
1268
 
}
1269
 
 
1270
 
static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,
1271
 
                                  struct kvm_mmu_page *sp,
1272
 
                                  u64 *spte)
1273
 
{
1274
 
        u64 pte;
1275
 
        struct kvm_mmu_page *child;
1276
 
 
1277
 
        pte = *spte;
1278
 
        if (is_shadow_present_pte(pte)) {
1279
 
                if (sp->role.level == PT_PAGE_TABLE_LEVEL)
1280
 
                        rmap_remove(vcpu->kvm, spte);
1281
 
                else {
1282
 
                        child = page_header(pte & PT64_BASE_ADDR_MASK);
1283
 
                        mmu_page_remove_parent_pte(child, spte);
1284
 
                }
1285
 
        }
1286
 
        set_shadow_pte(spte, shadow_trap_nonpresent_pte);
1287
 
}
1288
 
 
1289
 
static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
1290
 
                                  struct kvm_mmu_page *sp,
1291
 
                                  u64 *spte,
1292
 
                                  const void *new, int bytes,
1293
 
                                  int offset_in_pte)
1294
 
{
1295
 
        if (sp->role.level != PT_PAGE_TABLE_LEVEL) {
1296
 
                ++vcpu->kvm->stat.mmu_pde_zapped;
1297
 
                return;
1298
 
        }
1299
 
 
1300
 
        ++vcpu->kvm->stat.mmu_pte_updated;
1301
 
        if (sp->role.glevels == PT32_ROOT_LEVEL)
1302
 
                paging32_update_pte(vcpu, sp, spte, new, bytes, offset_in_pte);
1303
 
        else
1304
 
                paging64_update_pte(vcpu, sp, spte, new, bytes, offset_in_pte);
1305
 
}
1306
 
 
1307
 
static bool need_remote_flush(u64 old, u64 new)
1308
 
{
1309
 
        if (!is_shadow_present_pte(old))
1310
 
                return false;
1311
 
        if (!is_shadow_present_pte(new))
1312
 
                return true;
1313
 
        if ((old ^ new) & PT64_BASE_ADDR_MASK)
1314
 
                return true;
1315
 
        old ^= PT64_NX_MASK;
1316
 
        new ^= PT64_NX_MASK;
1317
 
        return (old & ~new & PT64_PERM_MASK) != 0;
1318
 
}
1319
 
 
1320
 
static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, u64 old, u64 new)
1321
 
{
1322
 
        if (need_remote_flush(old, new))
1323
 
                kvm_flush_remote_tlbs(vcpu->kvm);
1324
 
        else
1325
 
                kvm_mmu_flush_tlb(vcpu);
1326
 
}
1327
 
 
1328
 
static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu)
1329
 
{
1330
 
        u64 *spte = vcpu->arch.last_pte_updated;
1331
 
 
1332
 
        return !!(spte && (*spte & PT_ACCESSED_MASK));
1333
 
}
1334
 
 
1335
 
void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1336
 
                       const u8 *new, int bytes)
1337
 
{
1338
 
        gfn_t gfn = gpa >> PAGE_SHIFT;
1339
 
        struct kvm_mmu_page *sp;
1340
 
        struct hlist_node *node, *n;
1341
 
        struct hlist_head *bucket;
1342
 
        unsigned index;
1343
 
        u64 entry;
1344
 
        u64 *spte;
1345
 
        unsigned offset = offset_in_page(gpa);
1346
 
        unsigned pte_size;
1347
 
        unsigned page_offset;
1348
 
        unsigned misaligned;
1349
 
        unsigned quadrant;
1350
 
        int level;
1351
 
        int flooded = 0;
1352
 
        int npte;
1353
 
 
1354
 
        pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes);
1355
 
        ++vcpu->kvm->stat.mmu_pte_write;
1356
 
        kvm_mmu_audit(vcpu, "pre pte write");
1357
 
        if (gfn == vcpu->arch.last_pt_write_gfn
1358
 
            && !last_updated_pte_accessed(vcpu)) {
1359
 
                ++vcpu->arch.last_pt_write_count;
1360
 
                if (vcpu->arch.last_pt_write_count >= 3)
1361
 
                        flooded = 1;
1362
 
        } else {
1363
 
                vcpu->arch.last_pt_write_gfn = gfn;
1364
 
                vcpu->arch.last_pt_write_count = 1;
1365
 
                vcpu->arch.last_pte_updated = NULL;
1366
 
        }
1367
 
        index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
1368
 
        bucket = &vcpu->kvm->arch.mmu_page_hash[index];
1369
 
        hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) {
1370
 
                if (sp->gfn != gfn || sp->role.metaphysical)
1371
 
                        continue;
1372
 
                pte_size = sp->role.glevels == PT32_ROOT_LEVEL ? 4 : 8;
1373
 
                misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);
1374
 
                misaligned |= bytes < 4;
1375
 
                if (misaligned || flooded) {
1376
 
                        /*
1377
 
                         * Misaligned accesses are too much trouble to fix
1378
 
                         * up; also, they usually indicate a page is not used
1379
 
                         * as a page table.
1380
 
                         *
1381
 
                         * If we're seeing too many writes to a page,
1382
 
                         * it may no longer be a page table, or we may be
1383
 
                         * forking, in which case it is better to unmap the
1384
 
                         * page.
1385
 
                         */
1386
 
                        pgprintk("misaligned: gpa %llx bytes %d role %x\n",
1387
 
                                 gpa, bytes, sp->role.word);
1388
 
                        kvm_mmu_zap_page(vcpu->kvm, sp);
1389
 
                        ++vcpu->kvm->stat.mmu_flooded;
1390
 
                        continue;
1391
 
                }
1392
 
                page_offset = offset;
1393
 
                level = sp->role.level;
1394
 
                npte = 1;
1395
 
                if (sp->role.glevels == PT32_ROOT_LEVEL) {
1396
 
                        page_offset <<= 1;      /* 32->64 */
1397
 
                        /*
1398
 
                         * A 32-bit pde maps 4MB while the shadow pdes map
1399
 
                         * only 2MB.  So we need to double the offset again
1400
 
                         * and zap two pdes instead of one.
1401
 
                         */
1402
 
                        if (level == PT32_ROOT_LEVEL) {
1403
 
                                page_offset &= ~7; /* kill rounding error */
1404
 
                                page_offset <<= 1;
1405
 
                                npte = 2;
1406
 
                        }
1407
 
                        quadrant = page_offset >> PAGE_SHIFT;
1408
 
                        page_offset &= ~PAGE_MASK;
1409
 
                        if (quadrant != sp->role.quadrant)
1410
 
                                continue;
1411
 
                }
1412
 
                spte = &sp->spt[page_offset / sizeof(*spte)];
1413
 
                while (npte--) {
1414
 
                        entry = *spte;
1415
 
                        mmu_pte_write_zap_pte(vcpu, sp, spte);
1416
 
                        mmu_pte_write_new_pte(vcpu, sp, spte, new, bytes,
1417
 
                                              page_offset & (pte_size - 1));
1418
 
                        mmu_pte_write_flush_tlb(vcpu, entry, *spte);
1419
 
                        ++spte;
1420
 
                }
1421
 
        }
1422
 
        kvm_mmu_audit(vcpu, "post pte write");
1423
 
}
1424
 
 
1425
 
int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
1426
 
{
1427
 
        gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
1428
 
 
1429
 
        return kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
1430
 
}
1431
 
 
1432
 
void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
1433
 
{
1434
 
        while (vcpu->kvm->arch.n_free_mmu_pages < KVM_REFILL_PAGES) {
1435
 
                struct kvm_mmu_page *sp;
1436
 
 
1437
 
                sp = container_of(vcpu->kvm->arch.active_mmu_pages.prev,
1438
 
                                  struct kvm_mmu_page, link);
1439
 
                kvm_mmu_zap_page(vcpu->kvm, sp);
1440
 
                ++vcpu->kvm->stat.mmu_recycled;
1441
 
        }
1442
 
}
1443
 
 
1444
 
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
1445
 
{
1446
 
        int r;
1447
 
        enum emulation_result er;
1448
 
 
1449
 
        mutex_lock(&vcpu->kvm->lock);
1450
 
        r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code);
1451
 
        if (r < 0)
1452
 
                goto out;
1453
 
 
1454
 
        if (!r) {
1455
 
                r = 1;
1456
 
                goto out;
1457
 
        }
1458
 
 
1459
 
        r = mmu_topup_memory_caches(vcpu);
1460
 
        if (r)
1461
 
                goto out;
1462
 
 
1463
 
        er = emulate_instruction(vcpu, vcpu->run, cr2, error_code, 0);
1464
 
        mutex_unlock(&vcpu->kvm->lock);
1465
 
 
1466
 
        switch (er) {
1467
 
        case EMULATE_DONE:
1468
 
                return 1;
1469
 
        case EMULATE_DO_MMIO:
1470
 
                ++vcpu->stat.mmio_exits;
1471
 
                return 0;
1472
 
        case EMULATE_FAIL:
1473
 
                kvm_report_emulation_failure(vcpu, "pagetable");
1474
 
                return 1;
1475
 
        default:
1476
 
                BUG();
1477
 
        }
1478
 
out:
1479
 
        mutex_unlock(&vcpu->kvm->lock);
1480
 
        return r;
1481
 
}
1482
 
EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
1483
 
 
1484
 
static void free_mmu_pages(struct kvm_vcpu *vcpu)
1485
 
{
1486
 
        struct kvm_mmu_page *sp;
1487
 
 
1488
 
        while (!list_empty(&vcpu->kvm->arch.active_mmu_pages)) {
1489
 
                sp = container_of(vcpu->kvm->arch.active_mmu_pages.next,
1490
 
                                  struct kvm_mmu_page, link);
1491
 
                kvm_mmu_zap_page(vcpu->kvm, sp);
1492
 
        }
1493
 
        free_page((unsigned long)vcpu->arch.mmu.pae_root);
1494
 
}
1495
 
 
1496
 
static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
1497
 
{
1498
 
        struct page *page;
1499
 
        int i;
1500
 
 
1501
 
        ASSERT(vcpu);
1502
 
 
1503
 
        if (vcpu->kvm->arch.n_requested_mmu_pages)
1504
 
                vcpu->kvm->arch.n_free_mmu_pages =
1505
 
                                        vcpu->kvm->arch.n_requested_mmu_pages;
1506
 
        else
1507
 
                vcpu->kvm->arch.n_free_mmu_pages =
1508
 
                                        vcpu->kvm->arch.n_alloc_mmu_pages;
1509
 
        /*
1510
 
         * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64.
1511
 
         * Therefore we need to allocate shadow page tables in the first
1512
 
         * 4GB of memory, which happens to fit the DMA32 zone.
1513
 
         */
1514
 
        page = alloc_page(GFP_KERNEL | __GFP_DMA32);
1515
 
        if (!page)
1516
 
                goto error_1;
1517
 
        vcpu->arch.mmu.pae_root = page_address(page);
1518
 
        for (i = 0; i < 4; ++i)
1519
 
                vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;
1520
 
 
1521
 
        return 0;
1522
 
 
1523
 
error_1:
1524
 
        free_mmu_pages(vcpu);
1525
 
        return -ENOMEM;
1526
 
}
1527
 
 
1528
 
int kvm_mmu_create(struct kvm_vcpu *vcpu)
1529
 
{
1530
 
        ASSERT(vcpu);
1531
 
        ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
1532
 
 
1533
 
        return alloc_mmu_pages(vcpu);
1534
 
}
1535
 
 
1536
 
int kvm_mmu_setup(struct kvm_vcpu *vcpu)
1537
 
{
1538
 
        ASSERT(vcpu);
1539
 
        ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
1540
 
 
1541
 
        return init_kvm_mmu(vcpu);
1542
 
}
1543
 
 
1544
 
void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
1545
 
{
1546
 
        ASSERT(vcpu);
1547
 
 
1548
 
        destroy_kvm_mmu(vcpu);
1549
 
        free_mmu_pages(vcpu);
1550
 
        mmu_free_memory_caches(vcpu);
1551
 
}
1552
 
 
1553
 
void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
1554
 
{
1555
 
        struct kvm_mmu_page *sp;
1556
 
 
1557
 
        list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) {
1558
 
                int i;
1559
 
                u64 *pt;
1560
 
 
1561
 
                if (!test_bit(slot, &sp->slot_bitmap))
1562
 
                        continue;
1563
 
 
1564
 
                pt = sp->spt;
1565
 
                for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
1566
 
                        /* avoid RMW */
1567
 
                        if (pt[i] & PT_WRITABLE_MASK)
1568
 
                                pt[i] &= ~PT_WRITABLE_MASK;
1569
 
        }
1570
 
}
1571
 
 
1572
 
void kvm_mmu_zap_all(struct kvm *kvm)
1573
 
{
1574
 
        struct kvm_mmu_page *sp, *node;
1575
 
 
1576
 
        list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link)
1577
 
                kvm_mmu_zap_page(kvm, sp);
1578
 
 
1579
 
        kvm_flush_remote_tlbs(kvm);
1580
 
}
1581
 
 
1582
 
void kvm_mmu_module_exit(void)
1583
 
{
1584
 
        if (pte_chain_cache)
1585
 
                kmem_cache_destroy(pte_chain_cache);
1586
 
        if (rmap_desc_cache)
1587
 
                kmem_cache_destroy(rmap_desc_cache);
1588
 
        if (mmu_page_header_cache)
1589
 
                kmem_cache_destroy(mmu_page_header_cache);
1590
 
}
1591
 
 
1592
 
int kvm_mmu_module_init(void)
1593
 
{
1594
 
        pte_chain_cache = kmem_cache_create("kvm_pte_chain",
1595
 
                                            sizeof(struct kvm_pte_chain),
1596
 
                                            0, 0, NULL);
1597
 
        if (!pte_chain_cache)
1598
 
                goto nomem;
1599
 
        rmap_desc_cache = kmem_cache_create("kvm_rmap_desc",
1600
 
                                            sizeof(struct kvm_rmap_desc),
1601
 
                                            0, 0, NULL);
1602
 
        if (!rmap_desc_cache)
1603
 
                goto nomem;
1604
 
 
1605
 
        mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header",
1606
 
                                                  sizeof(struct kvm_mmu_page),
1607
 
                                                  0, 0, NULL);
1608
 
        if (!mmu_page_header_cache)
1609
 
                goto nomem;
1610
 
 
1611
 
        return 0;
1612
 
 
1613
 
nomem:
1614
 
        kvm_mmu_module_exit();
1615
 
        return -ENOMEM;
1616
 
}
1617
 
 
1618
 
/*
1619
 
 * Caculate mmu pages needed for kvm.
1620
 
 */
1621
 
unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
1622
 
{
1623
 
        int i;
1624
 
        unsigned int nr_mmu_pages;
1625
 
        unsigned int  nr_pages = 0;
1626
 
 
1627
 
        for (i = 0; i < kvm->nmemslots; i++)
1628
 
                nr_pages += kvm->memslots[i].npages;
1629
 
 
1630
 
        nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
1631
 
        nr_mmu_pages = max(nr_mmu_pages,
1632
 
                        (unsigned int) KVM_MIN_ALLOC_MMU_PAGES);
1633
 
 
1634
 
        return nr_mmu_pages;
1635
 
}
1636
 
 
1637
 
#ifdef AUDIT
1638
 
 
1639
 
static const char *audit_msg;
1640
 
 
1641
 
static gva_t canonicalize(gva_t gva)
1642
 
{
1643
 
#ifdef CONFIG_X86_64
1644
 
        gva = (long long)(gva << 16) >> 16;
1645
 
#endif
1646
 
        return gva;
1647
 
}
1648
 
 
1649
 
static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
1650
 
                                gva_t va, int level)
1651
 
{
1652
 
        u64 *pt = __va(page_pte & PT64_BASE_ADDR_MASK);
1653
 
        int i;
1654
 
        gva_t va_delta = 1ul << (PAGE_SHIFT + 9 * (level - 1));
1655
 
 
1656
 
        for (i = 0; i < PT64_ENT_PER_PAGE; ++i, va += va_delta) {
1657
 
                u64 ent = pt[i];
1658
 
 
1659
 
                if (ent == shadow_trap_nonpresent_pte)
1660
 
                        continue;
1661
 
 
1662
 
                va = canonicalize(va);
1663
 
                if (level > 1) {
1664
 
                        if (ent == shadow_notrap_nonpresent_pte)
1665
 
                                printk(KERN_ERR "audit: (%s) nontrapping pte"
1666
 
                                       " in nonleaf level: levels %d gva %lx"
1667
 
                                       " level %d pte %llx\n", audit_msg,
1668
 
                                       vcpu->arch.mmu.root_level, va, level, ent);
1669
 
 
1670
 
                        audit_mappings_page(vcpu, ent, va, level - 1);
1671
 
                } else {
1672
 
                        gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va);
1673
 
                        struct page *page = gpa_to_page(vcpu, gpa);
1674
 
                        hpa_t hpa = page_to_phys(page);
1675
 
 
1676
 
                        if (is_shadow_present_pte(ent)
1677
 
                            && (ent & PT64_BASE_ADDR_MASK) != hpa)
1678
 
                                printk(KERN_ERR "xx audit error: (%s) levels %d"
1679
 
                                       " gva %lx gpa %llx hpa %llx ent %llx %d\n",
1680
 
                                       audit_msg, vcpu->arch.mmu.root_level,
1681
 
                                       va, gpa, hpa, ent,
1682
 
                                       is_shadow_present_pte(ent));
1683
 
                        else if (ent == shadow_notrap_nonpresent_pte
1684
 
                                 && !is_error_hpa(hpa))
1685
 
                                printk(KERN_ERR "audit: (%s) notrap shadow,"
1686
 
                                       " valid guest gva %lx\n", audit_msg, va);
1687
 
                        kvm_release_page_clean(page);
1688
 
 
1689
 
                }
1690
 
        }
1691
 
}
1692
 
 
1693
 
static void audit_mappings(struct kvm_vcpu *vcpu)
1694
 
{
1695
 
        unsigned i;
1696
 
 
1697
 
        if (vcpu->arch.mmu.root_level == 4)
1698
 
                audit_mappings_page(vcpu, vcpu->arch.mmu.root_hpa, 0, 4);
1699
 
        else
1700
 
                for (i = 0; i < 4; ++i)
1701
 
                        if (vcpu->arch.mmu.pae_root[i] & PT_PRESENT_MASK)
1702
 
                                audit_mappings_page(vcpu,
1703
 
                                                    vcpu->arch.mmu.pae_root[i],
1704
 
                                                    i << 30,
1705
 
                                                    2);
1706
 
}
1707
 
 
1708
 
static int count_rmaps(struct kvm_vcpu *vcpu)
1709
 
{
1710
 
        int nmaps = 0;
1711
 
        int i, j, k;
1712
 
 
1713
 
        for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
1714
 
                struct kvm_memory_slot *m = &vcpu->kvm->memslots[i];
1715
 
                struct kvm_rmap_desc *d;
1716
 
 
1717
 
                for (j = 0; j < m->npages; ++j) {
1718
 
                        unsigned long *rmapp = &m->rmap[j];
1719
 
 
1720
 
                        if (!*rmapp)
1721
 
                                continue;
1722
 
                        if (!(*rmapp & 1)) {
1723
 
                                ++nmaps;
1724
 
                                continue;
1725
 
                        }
1726
 
                        d = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
1727
 
                        while (d) {
1728
 
                                for (k = 0; k < RMAP_EXT; ++k)
1729
 
                                        if (d->shadow_ptes[k])
1730
 
                                                ++nmaps;
1731
 
                                        else
1732
 
                                                break;
1733
 
                                d = d->more;
1734
 
                        }
1735
 
                }
1736
 
        }
1737
 
        return nmaps;
1738
 
}
1739
 
 
1740
 
static int count_writable_mappings(struct kvm_vcpu *vcpu)
1741
 
{
1742
 
        int nmaps = 0;
1743
 
        struct kvm_mmu_page *sp;
1744
 
        int i;
1745
 
 
1746
 
        list_for_each_entry(sp, &vcpu->kvm->arch.active_mmu_pages, link) {
1747
 
                u64 *pt = sp->spt;
1748
 
 
1749
 
                if (sp->role.level != PT_PAGE_TABLE_LEVEL)
1750
 
                        continue;
1751
 
 
1752
 
                for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
1753
 
                        u64 ent = pt[i];
1754
 
 
1755
 
                        if (!(ent & PT_PRESENT_MASK))
1756
 
                                continue;
1757
 
                        if (!(ent & PT_WRITABLE_MASK))
1758
 
                                continue;
1759
 
                        ++nmaps;
1760
 
                }
1761
 
        }
1762
 
        return nmaps;
1763
 
}
1764
 
 
1765
 
static void audit_rmap(struct kvm_vcpu *vcpu)
1766
 
{
1767
 
        int n_rmap = count_rmaps(vcpu);
1768
 
        int n_actual = count_writable_mappings(vcpu);
1769
 
 
1770
 
        if (n_rmap != n_actual)
1771
 
                printk(KERN_ERR "%s: (%s) rmap %d actual %d\n",
1772
 
                       __FUNCTION__, audit_msg, n_rmap, n_actual);
1773
 
}
1774
 
 
1775
 
static void audit_write_protection(struct kvm_vcpu *vcpu)
1776
 
{
1777
 
        struct kvm_mmu_page *sp;
1778
 
        struct kvm_memory_slot *slot;
1779
 
        unsigned long *rmapp;
1780
 
        gfn_t gfn;
1781
 
 
1782
 
        list_for_each_entry(sp, &vcpu->kvm->arch.active_mmu_pages, link) {
1783
 
                if (sp->role.metaphysical)
1784
 
                        continue;
1785
 
 
1786
 
                slot = gfn_to_memslot(vcpu->kvm, sp->gfn);
1787
 
                gfn = unalias_gfn(vcpu->kvm, sp->gfn);
1788
 
                rmapp = &slot->rmap[gfn - slot->base_gfn];
1789
 
                if (*rmapp)
1790
 
                        printk(KERN_ERR "%s: (%s) shadow page has writable"
1791
 
                               " mappings: gfn %lx role %x\n",
1792
 
                               __FUNCTION__, audit_msg, sp->gfn,
1793
 
                               sp->role.word);
1794
 
        }
1795
 
}
1796
 
 
1797
 
static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg)
1798
 
{
1799
 
        int olddbg = dbg;
1800
 
 
1801
 
        dbg = 0;
1802
 
        audit_msg = msg;
1803
 
        audit_rmap(vcpu);
1804
 
        audit_write_protection(vcpu);
1805
 
        audit_mappings(vcpu);
1806
 
        dbg = olddbg;
1807
 
}
1808
 
 
1809
 
#endif