~ubuntu-branches/ubuntu/hardy/kvm/hardy-backports

« back to all changes in this revision

Viewing changes to kernel/x86.c

  • Committer: Bazaar Package Importer
  • Author(s): Soren Hansen
  • Date: 2007-11-15 02:21:55 UTC
  • mfrom: (1.1.10 upstream)
  • Revision ID: james.westby@ubuntu.com-20071115022155-pxoxb8kfcrkn72mi
Tags: 1:52+dfsg-0ubuntu1
* New upstream release.
* 08_default_tdf.patch
  - Make -tdf the default and add a -no-tdf option.

Show diffs side-by-side

added added

removed removed

Lines of Context:
16
16
 
17
17
#include "kvm.h"
18
18
#include "x86.h"
 
19
#include "segment_descriptor.h"
19
20
#include "irq.h"
20
21
 
21
22
#include <linux/kvm.h>
22
23
#include <linux/fs.h>
23
24
#include <linux/vmalloc.h>
 
25
#include <linux/module.h>
24
26
 
25
27
#include <asm/uaccess.h>
26
28
 
27
29
#define MAX_IO_MSRS 256
 
30
#define CR0_RESERVED_BITS                                               \
 
31
        (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
 
32
                          | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
 
33
                          | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
 
34
#define CR4_RESERVED_BITS                                               \
 
35
        (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
 
36
                          | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE     \
 
37
                          | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR  \
 
38
                          | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
 
39
 
 
40
#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
 
41
#define EFER_RESERVED_BITS 0xfffffffffffff2fe
 
42
 
 
43
#define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x)
 
44
 
 
45
struct kvm_stats_debugfs_item debugfs_entries[] = {
 
46
        { "pf_fixed", STAT_OFFSET(pf_fixed) },
 
47
        { "pf_guest", STAT_OFFSET(pf_guest) },
 
48
        { "tlb_flush", STAT_OFFSET(tlb_flush) },
 
49
        { "invlpg", STAT_OFFSET(invlpg) },
 
50
        { "exits", STAT_OFFSET(exits) },
 
51
        { "io_exits", STAT_OFFSET(io_exits) },
 
52
        { "mmio_exits", STAT_OFFSET(mmio_exits) },
 
53
        { "signal_exits", STAT_OFFSET(signal_exits) },
 
54
        { "irq_window", STAT_OFFSET(irq_window_exits) },
 
55
        { "halt_exits", STAT_OFFSET(halt_exits) },
 
56
        { "halt_wakeup", STAT_OFFSET(halt_wakeup) },
 
57
        { "request_irq", STAT_OFFSET(request_irq_exits) },
 
58
        { "irq_exits", STAT_OFFSET(irq_exits) },
 
59
        { "light_exits", STAT_OFFSET(light_exits) },
 
60
        { "efer_reload", STAT_OFFSET(efer_reload) },
 
61
        { NULL }
 
62
};
 
63
 
 
64
 
 
65
unsigned long segment_base(u16 selector)
 
66
{
 
67
        struct descriptor_table gdt;
 
68
        struct segment_descriptor *d;
 
69
        unsigned long table_base;
 
70
        unsigned long v;
 
71
 
 
72
        if (selector == 0)
 
73
                return 0;
 
74
 
 
75
        asm("sgdt %0" : "=m"(gdt));
 
76
        table_base = gdt.base;
 
77
 
 
78
        if (selector & 4) {           /* from ldt */
 
79
                u16 ldt_selector;
 
80
 
 
81
                asm("sldt %0" : "=g"(ldt_selector));
 
82
                table_base = segment_base(ldt_selector);
 
83
        }
 
84
        d = (struct segment_descriptor *)(table_base + (selector & ~7));
 
85
        v = d->base_low | ((unsigned long)d->base_mid << 16) |
 
86
                ((unsigned long)d->base_high << 24);
 
87
#ifdef CONFIG_X86_64
 
88
        if (d->system == 0 && (d->type == 2 || d->type == 9 || d->type == 11))
 
89
                v |= ((unsigned long) \
 
90
                      ((struct segment_descriptor_64 *)d)->base_higher) << 32;
 
91
#endif
 
92
        return v;
 
93
}
 
94
EXPORT_SYMBOL_GPL(segment_base);
 
95
 
 
96
u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
 
97
{
 
98
        if (irqchip_in_kernel(vcpu->kvm))
 
99
                return vcpu->apic_base;
 
100
        else
 
101
                return vcpu->apic_base;
 
102
}
 
103
EXPORT_SYMBOL_GPL(kvm_get_apic_base);
 
104
 
 
105
void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
 
106
{
 
107
        /* TODO: reserve bits check */
 
108
        if (irqchip_in_kernel(vcpu->kvm))
 
109
                kvm_lapic_set_base(vcpu, data);
 
110
        else
 
111
                vcpu->apic_base = data;
 
112
}
 
113
EXPORT_SYMBOL_GPL(kvm_set_apic_base);
 
114
 
 
115
static void inject_gp(struct kvm_vcpu *vcpu)
 
116
{
 
117
        kvm_x86_ops->inject_gp(vcpu, 0);
 
118
}
 
119
 
 
120
/*
 
121
 * Load the pae pdptrs.  Return true is they are all valid.
 
122
 */
 
123
int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
 
124
{
 
125
        gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
 
126
        unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
 
127
        int i;
 
128
        int ret;
 
129
        u64 pdpte[ARRAY_SIZE(vcpu->pdptrs)];
 
130
 
 
131
        mutex_lock(&vcpu->kvm->lock);
 
132
        ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte,
 
133
                                  offset * sizeof(u64), sizeof(pdpte));
 
134
        if (ret < 0) {
 
135
                ret = 0;
 
136
                goto out;
 
137
        }
 
138
        for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
 
139
                if ((pdpte[i] & 1) && (pdpte[i] & 0xfffffff0000001e6ull)) {
 
140
                        ret = 0;
 
141
                        goto out;
 
142
                }
 
143
        }
 
144
        ret = 1;
 
145
 
 
146
        memcpy(vcpu->pdptrs, pdpte, sizeof(vcpu->pdptrs));
 
147
out:
 
148
        mutex_unlock(&vcpu->kvm->lock);
 
149
 
 
150
        return ret;
 
151
}
 
152
 
 
153
void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 
154
{
 
155
        if (cr0 & CR0_RESERVED_BITS) {
 
156
                printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n",
 
157
                       cr0, vcpu->cr0);
 
158
                inject_gp(vcpu);
 
159
                return;
 
160
        }
 
161
 
 
162
        if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) {
 
163
                printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n");
 
164
                inject_gp(vcpu);
 
165
                return;
 
166
        }
 
167
 
 
168
        if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) {
 
169
                printk(KERN_DEBUG "set_cr0: #GP, set PG flag "
 
170
                       "and a clear PE flag\n");
 
171
                inject_gp(vcpu);
 
172
                return;
 
173
        }
 
174
 
 
175
        if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
 
176
#ifdef CONFIG_X86_64
 
177
                if ((vcpu->shadow_efer & EFER_LME)) {
 
178
                        int cs_db, cs_l;
 
179
 
 
180
                        if (!is_pae(vcpu)) {
 
181
                                printk(KERN_DEBUG "set_cr0: #GP, start paging "
 
182
                                       "in long mode while PAE is disabled\n");
 
183
                                inject_gp(vcpu);
 
184
                                return;
 
185
                        }
 
186
                        kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
 
187
                        if (cs_l) {
 
188
                                printk(KERN_DEBUG "set_cr0: #GP, start paging "
 
189
                                       "in long mode while CS.L == 1\n");
 
190
                                inject_gp(vcpu);
 
191
                                return;
 
192
 
 
193
                        }
 
194
                } else
 
195
#endif
 
196
                if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->cr3)) {
 
197
                        printk(KERN_DEBUG "set_cr0: #GP, pdptrs "
 
198
                               "reserved bits\n");
 
199
                        inject_gp(vcpu);
 
200
                        return;
 
201
                }
 
202
 
 
203
        }
 
204
 
 
205
        kvm_x86_ops->set_cr0(vcpu, cr0);
 
206
        vcpu->cr0 = cr0;
 
207
 
 
208
        mutex_lock(&vcpu->kvm->lock);
 
209
        kvm_mmu_reset_context(vcpu);
 
210
        mutex_unlock(&vcpu->kvm->lock);
 
211
        return;
 
212
}
 
213
EXPORT_SYMBOL_GPL(set_cr0);
 
214
 
 
215
void lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
 
216
{
 
217
        set_cr0(vcpu, (vcpu->cr0 & ~0x0ful) | (msw & 0x0f));
 
218
}
 
219
EXPORT_SYMBOL_GPL(lmsw);
 
220
 
 
221
void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 
222
{
 
223
        if (cr4 & CR4_RESERVED_BITS) {
 
224
                printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n");
 
225
                inject_gp(vcpu);
 
226
                return;
 
227
        }
 
228
 
 
229
        if (is_long_mode(vcpu)) {
 
230
                if (!(cr4 & X86_CR4_PAE)) {
 
231
                        printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while "
 
232
                               "in long mode\n");
 
233
                        inject_gp(vcpu);
 
234
                        return;
 
235
                }
 
236
        } else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & X86_CR4_PAE)
 
237
                   && !load_pdptrs(vcpu, vcpu->cr3)) {
 
238
                printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n");
 
239
                inject_gp(vcpu);
 
240
                return;
 
241
        }
 
242
 
 
243
        if (cr4 & X86_CR4_VMXE) {
 
244
                printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n");
 
245
                inject_gp(vcpu);
 
246
                return;
 
247
        }
 
248
        kvm_x86_ops->set_cr4(vcpu, cr4);
 
249
        vcpu->cr4 = cr4;
 
250
        mutex_lock(&vcpu->kvm->lock);
 
251
        kvm_mmu_reset_context(vcpu);
 
252
        mutex_unlock(&vcpu->kvm->lock);
 
253
}
 
254
EXPORT_SYMBOL_GPL(set_cr4);
 
255
 
 
256
void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 
257
{
 
258
        if (is_long_mode(vcpu)) {
 
259
                if (cr3 & CR3_L_MODE_RESERVED_BITS) {
 
260
                        printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n");
 
261
                        inject_gp(vcpu);
 
262
                        return;
 
263
                }
 
264
        } else {
 
265
                if (is_pae(vcpu)) {
 
266
                        if (cr3 & CR3_PAE_RESERVED_BITS) {
 
267
                                printk(KERN_DEBUG
 
268
                                       "set_cr3: #GP, reserved bits\n");
 
269
                                inject_gp(vcpu);
 
270
                                return;
 
271
                        }
 
272
                        if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) {
 
273
                                printk(KERN_DEBUG "set_cr3: #GP, pdptrs "
 
274
                                       "reserved bits\n");
 
275
                                inject_gp(vcpu);
 
276
                                return;
 
277
                        }
 
278
                }
 
279
                /*
 
280
                 * We don't check reserved bits in nonpae mode, because
 
281
                 * this isn't enforced, and VMware depends on this.
 
282
                 */
 
283
        }
 
284
 
 
285
        mutex_lock(&vcpu->kvm->lock);
 
286
        /*
 
287
         * Does the new cr3 value map to physical memory? (Note, we
 
288
         * catch an invalid cr3 even in real-mode, because it would
 
289
         * cause trouble later on when we turn on paging anyway.)
 
290
         *
 
291
         * A real CPU would silently accept an invalid cr3 and would
 
292
         * attempt to use it - with largely undefined (and often hard
 
293
         * to debug) behavior on the guest side.
 
294
         */
 
295
        if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT)))
 
296
                inject_gp(vcpu);
 
297
        else {
 
298
                vcpu->cr3 = cr3;
 
299
                vcpu->mmu.new_cr3(vcpu);
 
300
        }
 
301
        mutex_unlock(&vcpu->kvm->lock);
 
302
}
 
303
EXPORT_SYMBOL_GPL(set_cr3);
 
304
 
 
305
void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
 
306
{
 
307
        if (cr8 & CR8_RESERVED_BITS) {
 
308
                printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8);
 
309
                inject_gp(vcpu);
 
310
                return;
 
311
        }
 
312
        if (irqchip_in_kernel(vcpu->kvm))
 
313
                kvm_lapic_set_tpr(vcpu, cr8);
 
314
        else
 
315
                vcpu->cr8 = cr8;
 
316
}
 
317
EXPORT_SYMBOL_GPL(set_cr8);
 
318
 
 
319
unsigned long get_cr8(struct kvm_vcpu *vcpu)
 
320
{
 
321
        if (irqchip_in_kernel(vcpu->kvm))
 
322
                return kvm_lapic_get_cr8(vcpu);
 
323
        else
 
324
                return vcpu->cr8;
 
325
}
 
326
EXPORT_SYMBOL_GPL(get_cr8);
28
327
 
29
328
/*
30
329
 * List of msr numbers which we expose to userspace through KVM_GET_MSRS
48
347
        MSR_IA32_MISC_ENABLE,
49
348
};
50
349
 
 
350
#ifdef CONFIG_X86_64
 
351
 
 
352
static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
 
353
{
 
354
        if (efer & EFER_RESERVED_BITS) {
 
355
                printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n",
 
356
                       efer);
 
357
                inject_gp(vcpu);
 
358
                return;
 
359
        }
 
360
 
 
361
        if (is_paging(vcpu)
 
362
            && (vcpu->shadow_efer & EFER_LME) != (efer & EFER_LME)) {
 
363
                printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n");
 
364
                inject_gp(vcpu);
 
365
                return;
 
366
        }
 
367
 
 
368
        kvm_x86_ops->set_efer(vcpu, efer);
 
369
 
 
370
        efer &= ~EFER_LMA;
 
371
        efer |= vcpu->shadow_efer & EFER_LMA;
 
372
 
 
373
        vcpu->shadow_efer = efer;
 
374
}
 
375
 
 
376
#endif
 
377
 
 
378
/*
 
379
 * Writes msr value into into the appropriate "register".
 
380
 * Returns 0 on success, non-0 otherwise.
 
381
 * Assumes vcpu_load() was already called.
 
382
 */
 
383
int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
 
384
{
 
385
        return kvm_x86_ops->set_msr(vcpu, msr_index, data);
 
386
}
 
387
 
51
388
/*
52
389
 * Adapt set_msr() to msr_io()'s calling convention
53
390
 */
56
393
        return kvm_set_msr(vcpu, index, *data);
57
394
}
58
395
 
 
396
 
 
397
int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 
398
{
 
399
        switch (msr) {
 
400
#ifdef CONFIG_X86_64
 
401
        case MSR_EFER:
 
402
                set_efer(vcpu, data);
 
403
                break;
 
404
#endif
 
405
        case MSR_IA32_MC0_STATUS:
 
406
                pr_unimpl(vcpu, "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n",
 
407
                       __FUNCTION__, data);
 
408
                break;
 
409
        case MSR_IA32_MCG_STATUS:
 
410
                pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n",
 
411
                        __FUNCTION__, data);
 
412
                break;
 
413
        case MSR_IA32_UCODE_REV:
 
414
        case MSR_IA32_UCODE_WRITE:
 
415
        case 0x200 ... 0x2ff: /* MTRRs */
 
416
                break;
 
417
        case MSR_IA32_APICBASE:
 
418
                kvm_set_apic_base(vcpu, data);
 
419
                break;
 
420
        case MSR_IA32_MISC_ENABLE:
 
421
                vcpu->ia32_misc_enable_msr = data;
 
422
                break;
 
423
        default:
 
424
                pr_unimpl(vcpu, "unhandled wrmsr: 0x%x\n", msr);
 
425
                return 1;
 
426
        }
 
427
        return 0;
 
428
}
 
429
EXPORT_SYMBOL_GPL(kvm_set_msr_common);
 
430
 
 
431
 
 
432
/*
 
433
 * Reads an msr value (of 'msr_index') into 'pdata'.
 
434
 * Returns 0 on success, non-0 otherwise.
 
435
 * Assumes vcpu_load() was already called.
 
436
 */
 
437
int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 
438
{
 
439
        return kvm_x86_ops->get_msr(vcpu, msr_index, pdata);
 
440
}
 
441
 
 
442
int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 
443
{
 
444
        u64 data;
 
445
 
 
446
        switch (msr) {
 
447
        case 0xc0010010: /* SYSCFG */
 
448
        case 0xc0010015: /* HWCR */
 
449
        case MSR_IA32_PLATFORM_ID:
 
450
        case MSR_IA32_P5_MC_ADDR:
 
451
        case MSR_IA32_P5_MC_TYPE:
 
452
        case MSR_IA32_MC0_CTL:
 
453
        case MSR_IA32_MCG_STATUS:
 
454
        case MSR_IA32_MCG_CAP:
 
455
        case MSR_IA32_MC0_MISC:
 
456
        case MSR_IA32_MC0_MISC+4:
 
457
        case MSR_IA32_MC0_MISC+8:
 
458
        case MSR_IA32_MC0_MISC+12:
 
459
        case MSR_IA32_MC0_MISC+16:
 
460
        case MSR_IA32_UCODE_REV:
 
461
        case MSR_IA32_PERF_STATUS:
 
462
        case MSR_IA32_EBL_CR_POWERON:
 
463
                /* MTRR registers */
 
464
        case 0xfe:
 
465
        case 0x200 ... 0x2ff:
 
466
                data = 0;
 
467
                break;
 
468
        case 0xcd: /* fsb frequency */
 
469
                data = 3;
 
470
                break;
 
471
        case MSR_IA32_APICBASE:
 
472
                data = kvm_get_apic_base(vcpu);
 
473
                break;
 
474
        case MSR_IA32_MISC_ENABLE:
 
475
                data = vcpu->ia32_misc_enable_msr;
 
476
                break;
 
477
#ifdef CONFIG_X86_64
 
478
        case MSR_EFER:
 
479
                data = vcpu->shadow_efer;
 
480
                break;
 
481
#endif
 
482
        default:
 
483
                pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
 
484
                return 1;
 
485
        }
 
486
        *pdata = data;
 
487
        return 0;
 
488
}
 
489
EXPORT_SYMBOL_GPL(kvm_get_msr_common);
 
490
 
59
491
/*
60
492
 * Read or write a bunch of msrs. All parameters are kernel addresses.
61
493
 *
251
683
 
252
684
        switch (ioctl) {
253
685
        case KVM_GET_LAPIC: {
254
 
                struct kvm_lapic_state lapic;
 
686
                struct kvm_lapic_state l_apic;
255
687
 
256
 
                memset(&lapic, 0, sizeof lapic);
257
 
                r = kvm_vcpu_ioctl_get_lapic(vcpu, &lapic);
 
688
                memset(&l_apic, 0, sizeof l_apic);
 
689
                r = kvm_vcpu_ioctl_get_lapic(vcpu, &l_apic);
258
690
                if (r)
259
691
                        goto out;
260
692
                r = -EFAULT;
261
 
                if (copy_to_user(argp, &lapic, sizeof lapic))
 
693
                if (copy_to_user(argp, &l_apic, sizeof l_apic))
262
694
                        goto out;
263
695
                r = 0;
264
696
                break;
265
697
        }
266
698
        case KVM_SET_LAPIC: {
267
 
                struct kvm_lapic_state lapic;
 
699
                struct kvm_lapic_state l_apic;
268
700
 
269
701
                r = -EFAULT;
270
 
                if (copy_from_user(&lapic, argp, sizeof lapic))
 
702
                if (copy_from_user(&l_apic, argp, sizeof l_apic))
271
703
                        goto out;
272
 
                r = kvm_vcpu_ioctl_set_lapic(vcpu, &lapic);;
 
704
                r = kvm_vcpu_ioctl_set_lapic(vcpu, &l_apic);;
273
705
                if (r)
274
706
                        goto out;
275
707
                r = 0;
300
732
        return r;
301
733
}
302
734
 
 
735
static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
 
736
{
 
737
        int ret;
 
738
 
 
739
        if (addr > (unsigned int)(-3 * PAGE_SIZE))
 
740
                return -1;
 
741
        ret = kvm_x86_ops->set_tss_addr(kvm, addr);
 
742
        return ret;
 
743
}
 
744
 
 
745
static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
 
746
                                          u32 kvm_nr_mmu_pages)
 
747
{
 
748
        if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
 
749
                return -EINVAL;
 
750
 
 
751
        mutex_lock(&kvm->lock);
 
752
 
 
753
        kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
 
754
        kvm->n_requested_mmu_pages = kvm_nr_mmu_pages;
 
755
 
 
756
        mutex_unlock(&kvm->lock);
 
757
        return 0;
 
758
}
 
759
 
 
760
static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
 
761
{
 
762
        return kvm->n_alloc_mmu_pages;
 
763
}
 
764
 
 
765
/*
 
766
 * Set a new alias region.  Aliases map a portion of physical memory into
 
767
 * another portion.  This is useful for memory windows, for example the PC
 
768
 * VGA region.
 
769
 */
 
770
static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
 
771
                                         struct kvm_memory_alias *alias)
 
772
{
 
773
        int r, n;
 
774
        struct kvm_mem_alias *p;
 
775
 
 
776
        r = -EINVAL;
 
777
        /* General sanity checks */
 
778
        if (alias->memory_size & (PAGE_SIZE - 1))
 
779
                goto out;
 
780
        if (alias->guest_phys_addr & (PAGE_SIZE - 1))
 
781
                goto out;
 
782
        if (alias->slot >= KVM_ALIAS_SLOTS)
 
783
                goto out;
 
784
        if (alias->guest_phys_addr + alias->memory_size
 
785
            < alias->guest_phys_addr)
 
786
                goto out;
 
787
        if (alias->target_phys_addr + alias->memory_size
 
788
            < alias->target_phys_addr)
 
789
                goto out;
 
790
 
 
791
        mutex_lock(&kvm->lock);
 
792
 
 
793
        p = &kvm->aliases[alias->slot];
 
794
        p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
 
795
        p->npages = alias->memory_size >> PAGE_SHIFT;
 
796
        p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT;
 
797
 
 
798
        for (n = KVM_ALIAS_SLOTS; n > 0; --n)
 
799
                if (kvm->aliases[n - 1].npages)
 
800
                        break;
 
801
        kvm->naliases = n;
 
802
 
 
803
        kvm_mmu_zap_all(kvm);
 
804
 
 
805
        mutex_unlock(&kvm->lock);
 
806
 
 
807
        return 0;
 
808
 
 
809
out:
 
810
        return r;
 
811
}
 
812
 
 
813
static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
 
814
{
 
815
        int r;
 
816
 
 
817
        r = 0;
 
818
        switch (chip->chip_id) {
 
819
        case KVM_IRQCHIP_PIC_MASTER:
 
820
                memcpy(&chip->chip.pic,
 
821
                        &pic_irqchip(kvm)->pics[0],
 
822
                        sizeof(struct kvm_pic_state));
 
823
                break;
 
824
        case KVM_IRQCHIP_PIC_SLAVE:
 
825
                memcpy(&chip->chip.pic,
 
826
                        &pic_irqchip(kvm)->pics[1],
 
827
                        sizeof(struct kvm_pic_state));
 
828
                break;
 
829
        case KVM_IRQCHIP_IOAPIC:
 
830
                memcpy(&chip->chip.ioapic,
 
831
                        ioapic_irqchip(kvm),
 
832
                        sizeof(struct kvm_ioapic_state));
 
833
                break;
 
834
        default:
 
835
                r = -EINVAL;
 
836
                break;
 
837
        }
 
838
        return r;
 
839
}
 
840
 
 
841
static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
 
842
{
 
843
        int r;
 
844
 
 
845
        r = 0;
 
846
        switch (chip->chip_id) {
 
847
        case KVM_IRQCHIP_PIC_MASTER:
 
848
                memcpy(&pic_irqchip(kvm)->pics[0],
 
849
                        &chip->chip.pic,
 
850
                        sizeof(struct kvm_pic_state));
 
851
                break;
 
852
        case KVM_IRQCHIP_PIC_SLAVE:
 
853
                memcpy(&pic_irqchip(kvm)->pics[1],
 
854
                        &chip->chip.pic,
 
855
                        sizeof(struct kvm_pic_state));
 
856
                break;
 
857
        case KVM_IRQCHIP_IOAPIC:
 
858
                memcpy(ioapic_irqchip(kvm),
 
859
                        &chip->chip.ioapic,
 
860
                        sizeof(struct kvm_ioapic_state));
 
861
                break;
 
862
        default:
 
863
                r = -EINVAL;
 
864
                break;
 
865
        }
 
866
        kvm_pic_update_irq(pic_irqchip(kvm));
 
867
        return r;
 
868
}
 
869
 
 
870
long kvm_arch_vm_ioctl(struct file *filp,
 
871
                       unsigned int ioctl, unsigned long arg)
 
872
{
 
873
        struct kvm *kvm = filp->private_data;
 
874
        void __user *argp = (void __user *)arg;
 
875
        int r = -EINVAL;
 
876
 
 
877
        switch (ioctl) {
 
878
        case KVM_SET_TSS_ADDR:
 
879
                r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
 
880
                if (r < 0)
 
881
                        goto out;
 
882
                break;
 
883
        case KVM_SET_MEMORY_REGION: {
 
884
                struct kvm_memory_region kvm_mem;
 
885
                struct kvm_userspace_memory_region kvm_userspace_mem;
 
886
 
 
887
                r = -EFAULT;
 
888
                if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem))
 
889
                        goto out;
 
890
                kvm_userspace_mem.slot = kvm_mem.slot;
 
891
                kvm_userspace_mem.flags = kvm_mem.flags;
 
892
                kvm_userspace_mem.guest_phys_addr = kvm_mem.guest_phys_addr;
 
893
                kvm_userspace_mem.memory_size = kvm_mem.memory_size;
 
894
                r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 0);
 
895
                if (r)
 
896
                        goto out;
 
897
                break;
 
898
        }
 
899
        case KVM_SET_NR_MMU_PAGES:
 
900
                r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
 
901
                if (r)
 
902
                        goto out;
 
903
                break;
 
904
        case KVM_GET_NR_MMU_PAGES:
 
905
                r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
 
906
                break;
 
907
        case KVM_SET_MEMORY_ALIAS: {
 
908
                struct kvm_memory_alias alias;
 
909
 
 
910
                r = -EFAULT;
 
911
                if (copy_from_user(&alias, argp, sizeof alias))
 
912
                        goto out;
 
913
                r = kvm_vm_ioctl_set_memory_alias(kvm, &alias);
 
914
                if (r)
 
915
                        goto out;
 
916
                break;
 
917
        }
 
918
        case KVM_CREATE_IRQCHIP:
 
919
                r = -ENOMEM;
 
920
                kvm->vpic = kvm_create_pic(kvm);
 
921
                if (kvm->vpic) {
 
922
                        r = kvm_ioapic_init(kvm);
 
923
                        if (r) {
 
924
                                kfree(kvm->vpic);
 
925
                                kvm->vpic = NULL;
 
926
                                goto out;
 
927
                        }
 
928
                } else
 
929
                        goto out;
 
930
                break;
 
931
        case KVM_IRQ_LINE: {
 
932
                struct kvm_irq_level irq_event;
 
933
 
 
934
                r = -EFAULT;
 
935
                if (copy_from_user(&irq_event, argp, sizeof irq_event))
 
936
                        goto out;
 
937
                if (irqchip_in_kernel(kvm)) {
 
938
                        mutex_lock(&kvm->lock);
 
939
                        if (irq_event.irq < 16)
 
940
                                kvm_pic_set_irq(pic_irqchip(kvm),
 
941
                                        irq_event.irq,
 
942
                                        irq_event.level);
 
943
                        kvm_ioapic_set_irq(kvm->vioapic,
 
944
                                        irq_event.irq,
 
945
                                        irq_event.level);
 
946
                        mutex_unlock(&kvm->lock);
 
947
                        r = 0;
 
948
                }
 
949
                break;
 
950
        }
 
951
        case KVM_GET_IRQCHIP: {
 
952
                /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
 
953
                struct kvm_irqchip chip;
 
954
 
 
955
                r = -EFAULT;
 
956
                if (copy_from_user(&chip, argp, sizeof chip))
 
957
                        goto out;
 
958
                r = -ENXIO;
 
959
                if (!irqchip_in_kernel(kvm))
 
960
                        goto out;
 
961
                r = kvm_vm_ioctl_get_irqchip(kvm, &chip);
 
962
                if (r)
 
963
                        goto out;
 
964
                r = -EFAULT;
 
965
                if (copy_to_user(argp, &chip, sizeof chip))
 
966
                        goto out;
 
967
                r = 0;
 
968
                break;
 
969
        }
 
970
        case KVM_SET_IRQCHIP: {
 
971
                /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
 
972
                struct kvm_irqchip chip;
 
973
 
 
974
                r = -EFAULT;
 
975
                if (copy_from_user(&chip, argp, sizeof chip))
 
976
                        goto out;
 
977
                r = -ENXIO;
 
978
                if (!irqchip_in_kernel(kvm))
 
979
                        goto out;
 
980
                r = kvm_vm_ioctl_set_irqchip(kvm, &chip);
 
981
                if (r)
 
982
                        goto out;
 
983
                r = 0;
 
984
                break;
 
985
        }
 
986
        default:
 
987
                ;
 
988
        }
 
989
out:
 
990
        return r;
 
991
}
 
992
 
303
993
static __init void kvm_init_msr_list(void)
304
994
{
305
995
        u32 dummy[2];
315
1005
        num_msrs_to_save = j;
316
1006
}
317
1007
 
 
1008
/*
 
1009
 * Only apic need an MMIO device hook, so shortcut now..
 
1010
 */
 
1011
static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu,
 
1012
                                                gpa_t addr)
 
1013
{
 
1014
        struct kvm_io_device *dev;
 
1015
 
 
1016
        if (vcpu->apic) {
 
1017
                dev = &vcpu->apic->dev;
 
1018
                if (dev->in_range(dev, addr))
 
1019
                        return dev;
 
1020
        }
 
1021
        return NULL;
 
1022
}
 
1023
 
 
1024
 
 
1025
static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
 
1026
                                                gpa_t addr)
 
1027
{
 
1028
        struct kvm_io_device *dev;
 
1029
 
 
1030
        dev = vcpu_find_pervcpu_dev(vcpu, addr);
 
1031
        if (dev == NULL)
 
1032
                dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr);
 
1033
        return dev;
 
1034
}
 
1035
 
 
1036
int emulator_read_std(unsigned long addr,
 
1037
                             void *val,
 
1038
                             unsigned int bytes,
 
1039
                             struct kvm_vcpu *vcpu)
 
1040
{
 
1041
        void *data = val;
 
1042
 
 
1043
        while (bytes) {
 
1044
                gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
 
1045
                unsigned offset = addr & (PAGE_SIZE-1);
 
1046
                unsigned tocopy = min(bytes, (unsigned)PAGE_SIZE - offset);
 
1047
                int ret;
 
1048
 
 
1049
                if (gpa == UNMAPPED_GVA)
 
1050
                        return X86EMUL_PROPAGATE_FAULT;
 
1051
                ret = kvm_read_guest(vcpu->kvm, gpa, data, tocopy);
 
1052
                if (ret < 0)
 
1053
                        return X86EMUL_UNHANDLEABLE;
 
1054
 
 
1055
                bytes -= tocopy;
 
1056
                data += tocopy;
 
1057
                addr += tocopy;
 
1058
        }
 
1059
 
 
1060
        return X86EMUL_CONTINUE;
 
1061
}
 
1062
EXPORT_SYMBOL_GPL(emulator_read_std);
 
1063
 
 
1064
static int emulator_write_std(unsigned long addr,
 
1065
                              const void *val,
 
1066
                              unsigned int bytes,
 
1067
                              struct kvm_vcpu *vcpu)
 
1068
{
 
1069
        pr_unimpl(vcpu, "emulator_write_std: addr %lx n %d\n", addr, bytes);
 
1070
        return X86EMUL_UNHANDLEABLE;
 
1071
}
 
1072
 
 
1073
static int emulator_read_emulated(unsigned long addr,
 
1074
                                  void *val,
 
1075
                                  unsigned int bytes,
 
1076
                                  struct kvm_vcpu *vcpu)
 
1077
{
 
1078
        struct kvm_io_device *mmio_dev;
 
1079
        gpa_t                 gpa;
 
1080
 
 
1081
        if (vcpu->mmio_read_completed) {
 
1082
                memcpy(val, vcpu->mmio_data, bytes);
 
1083
                vcpu->mmio_read_completed = 0;
 
1084
                return X86EMUL_CONTINUE;
 
1085
        }
 
1086
 
 
1087
        gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
 
1088
 
 
1089
        /* For APIC access vmexit */
 
1090
        if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
 
1091
                goto mmio;
 
1092
 
 
1093
        if (emulator_read_std(addr, val, bytes, vcpu)
 
1094
                        == X86EMUL_CONTINUE)
 
1095
                return X86EMUL_CONTINUE;
 
1096
        if (gpa == UNMAPPED_GVA)
 
1097
                return X86EMUL_PROPAGATE_FAULT;
 
1098
 
 
1099
mmio:
 
1100
        /*
 
1101
         * Is this MMIO handled locally?
 
1102
         */
 
1103
        mmio_dev = vcpu_find_mmio_dev(vcpu, gpa);
 
1104
        if (mmio_dev) {
 
1105
                kvm_iodevice_read(mmio_dev, gpa, bytes, val);
 
1106
                return X86EMUL_CONTINUE;
 
1107
        }
 
1108
 
 
1109
        vcpu->mmio_needed = 1;
 
1110
        vcpu->mmio_phys_addr = gpa;
 
1111
        vcpu->mmio_size = bytes;
 
1112
        vcpu->mmio_is_write = 0;
 
1113
 
 
1114
        return X86EMUL_UNHANDLEABLE;
 
1115
}
 
1116
 
 
1117
static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
 
1118
                               const void *val, int bytes)
 
1119
{
 
1120
        int ret;
 
1121
 
 
1122
        ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
 
1123
        if (ret < 0)
 
1124
                return 0;
 
1125
        kvm_mmu_pte_write(vcpu, gpa, val, bytes);
 
1126
        return 1;
 
1127
}
 
1128
 
 
1129
static int emulator_write_emulated_onepage(unsigned long addr,
 
1130
                                           const void *val,
 
1131
                                           unsigned int bytes,
 
1132
                                           struct kvm_vcpu *vcpu)
 
1133
{
 
1134
        struct kvm_io_device *mmio_dev;
 
1135
        gpa_t                 gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
 
1136
 
 
1137
        if (gpa == UNMAPPED_GVA) {
 
1138
                kvm_x86_ops->inject_page_fault(vcpu, addr, 2);
 
1139
                return X86EMUL_PROPAGATE_FAULT;
 
1140
        }
 
1141
 
 
1142
        /* For APIC access vmexit */
 
1143
        if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
 
1144
                goto mmio;
 
1145
 
 
1146
        if (emulator_write_phys(vcpu, gpa, val, bytes))
 
1147
                return X86EMUL_CONTINUE;
 
1148
 
 
1149
mmio:
 
1150
        /*
 
1151
         * Is this MMIO handled locally?
 
1152
         */
 
1153
        mmio_dev = vcpu_find_mmio_dev(vcpu, gpa);
 
1154
        if (mmio_dev) {
 
1155
                kvm_iodevice_write(mmio_dev, gpa, bytes, val);
 
1156
                return X86EMUL_CONTINUE;
 
1157
        }
 
1158
 
 
1159
        vcpu->mmio_needed = 1;
 
1160
        vcpu->mmio_phys_addr = gpa;
 
1161
        vcpu->mmio_size = bytes;
 
1162
        vcpu->mmio_is_write = 1;
 
1163
        memcpy(vcpu->mmio_data, val, bytes);
 
1164
 
 
1165
        return X86EMUL_CONTINUE;
 
1166
}
 
1167
 
 
1168
int emulator_write_emulated(unsigned long addr,
 
1169
                                   const void *val,
 
1170
                                   unsigned int bytes,
 
1171
                                   struct kvm_vcpu *vcpu)
 
1172
{
 
1173
        /* Crossing a page boundary? */
 
1174
        if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
 
1175
                int rc, now;
 
1176
 
 
1177
                now = -addr & ~PAGE_MASK;
 
1178
                rc = emulator_write_emulated_onepage(addr, val, now, vcpu);
 
1179
                if (rc != X86EMUL_CONTINUE)
 
1180
                        return rc;
 
1181
                addr += now;
 
1182
                val += now;
 
1183
                bytes -= now;
 
1184
        }
 
1185
        return emulator_write_emulated_onepage(addr, val, bytes, vcpu);
 
1186
}
 
1187
EXPORT_SYMBOL_GPL(emulator_write_emulated);
 
1188
 
 
1189
static int emulator_cmpxchg_emulated(unsigned long addr,
 
1190
                                     const void *old,
 
1191
                                     const void *new,
 
1192
                                     unsigned int bytes,
 
1193
                                     struct kvm_vcpu *vcpu)
 
1194
{
 
1195
        static int reported;
 
1196
 
 
1197
        if (!reported) {
 
1198
                reported = 1;
 
1199
                printk(KERN_WARNING "kvm: emulating exchange as write\n");
 
1200
        }
 
1201
        return emulator_write_emulated(addr, new, bytes, vcpu);
 
1202
}
 
1203
 
 
1204
static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
 
1205
{
 
1206
        return kvm_x86_ops->get_segment_base(vcpu, seg);
 
1207
}
 
1208
 
 
1209
int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
 
1210
{
 
1211
        return X86EMUL_CONTINUE;
 
1212
}
 
1213
 
 
1214
int emulate_clts(struct kvm_vcpu *vcpu)
 
1215
{
 
1216
        vcpu->cr0 &= ~X86_CR0_TS;
 
1217
        kvm_x86_ops->set_cr0(vcpu, vcpu->cr0);
 
1218
        return X86EMUL_CONTINUE;
 
1219
}
 
1220
 
 
1221
int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
 
1222
{
 
1223
        struct kvm_vcpu *vcpu = ctxt->vcpu;
 
1224
 
 
1225
        switch (dr) {
 
1226
        case 0 ... 3:
 
1227
                *dest = kvm_x86_ops->get_dr(vcpu, dr);
 
1228
                return X86EMUL_CONTINUE;
 
1229
        default:
 
1230
                pr_unimpl(vcpu, "%s: unexpected dr %u\n", __FUNCTION__, dr);
 
1231
                return X86EMUL_UNHANDLEABLE;
 
1232
        }
 
1233
}
 
1234
 
 
1235
int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
 
1236
{
 
1237
        unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U;
 
1238
        int exception;
 
1239
 
 
1240
        kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask, &exception);
 
1241
        if (exception) {
 
1242
                /* FIXME: better handling */
 
1243
                return X86EMUL_UNHANDLEABLE;
 
1244
        }
 
1245
        return X86EMUL_CONTINUE;
 
1246
}
 
1247
 
 
1248
void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
 
1249
{
 
1250
        static int reported;
 
1251
        u8 opcodes[4];
 
1252
        unsigned long rip = vcpu->rip;
 
1253
        unsigned long rip_linear;
 
1254
 
 
1255
        rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS);
 
1256
 
 
1257
        if (reported)
 
1258
                return;
 
1259
 
 
1260
        emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu);
 
1261
 
 
1262
        printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n",
 
1263
               context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);
 
1264
        reported = 1;
 
1265
}
 
1266
EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
 
1267
 
 
1268
struct x86_emulate_ops emulate_ops = {
 
1269
        .read_std            = emulator_read_std,
 
1270
        .write_std           = emulator_write_std,
 
1271
        .read_emulated       = emulator_read_emulated,
 
1272
        .write_emulated      = emulator_write_emulated,
 
1273
        .cmpxchg_emulated    = emulator_cmpxchg_emulated,
 
1274
};
 
1275
 
 
1276
int emulate_instruction(struct kvm_vcpu *vcpu,
 
1277
                        struct kvm_run *run,
 
1278
                        unsigned long cr2,
 
1279
                        u16 error_code,
 
1280
                        int no_decode)
 
1281
{
 
1282
        int r;
 
1283
 
 
1284
        vcpu->mmio_fault_cr2 = cr2;
 
1285
        kvm_x86_ops->cache_regs(vcpu);
 
1286
 
 
1287
        vcpu->mmio_is_write = 0;
 
1288
        vcpu->pio.string = 0;
 
1289
 
 
1290
        if (!no_decode) {
 
1291
                int cs_db, cs_l;
 
1292
                kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
 
1293
 
 
1294
                vcpu->emulate_ctxt.vcpu = vcpu;
 
1295
                vcpu->emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
 
1296
                vcpu->emulate_ctxt.cr2 = cr2;
 
1297
                vcpu->emulate_ctxt.mode =
 
1298
                        (vcpu->emulate_ctxt.eflags & X86_EFLAGS_VM)
 
1299
                        ? X86EMUL_MODE_REAL : cs_l
 
1300
                        ? X86EMUL_MODE_PROT64 : cs_db
 
1301
                        ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
 
1302
 
 
1303
                if (vcpu->emulate_ctxt.mode == X86EMUL_MODE_PROT64) {
 
1304
                        vcpu->emulate_ctxt.cs_base = 0;
 
1305
                        vcpu->emulate_ctxt.ds_base = 0;
 
1306
                        vcpu->emulate_ctxt.es_base = 0;
 
1307
                        vcpu->emulate_ctxt.ss_base = 0;
 
1308
                } else {
 
1309
                        vcpu->emulate_ctxt.cs_base =
 
1310
                                        get_segment_base(vcpu, VCPU_SREG_CS);
 
1311
                        vcpu->emulate_ctxt.ds_base =
 
1312
                                        get_segment_base(vcpu, VCPU_SREG_DS);
 
1313
                        vcpu->emulate_ctxt.es_base =
 
1314
                                        get_segment_base(vcpu, VCPU_SREG_ES);
 
1315
                        vcpu->emulate_ctxt.ss_base =
 
1316
                                        get_segment_base(vcpu, VCPU_SREG_SS);
 
1317
                }
 
1318
 
 
1319
                vcpu->emulate_ctxt.gs_base =
 
1320
                                        get_segment_base(vcpu, VCPU_SREG_GS);
 
1321
                vcpu->emulate_ctxt.fs_base =
 
1322
                                        get_segment_base(vcpu, VCPU_SREG_FS);
 
1323
 
 
1324
                r = x86_decode_insn(&vcpu->emulate_ctxt, &emulate_ops);
 
1325
                if (r)  {
 
1326
                        if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
 
1327
                                return EMULATE_DONE;
 
1328
                        return EMULATE_FAIL;
 
1329
                }
 
1330
        }
 
1331
 
 
1332
        r = x86_emulate_insn(&vcpu->emulate_ctxt, &emulate_ops);
 
1333
 
 
1334
        if (vcpu->pio.string)
 
1335
                return EMULATE_DO_MMIO;
 
1336
 
 
1337
        if ((r || vcpu->mmio_is_write) && run) {
 
1338
                run->exit_reason = KVM_EXIT_MMIO;
 
1339
                run->mmio.phys_addr = vcpu->mmio_phys_addr;
 
1340
                memcpy(run->mmio.data, vcpu->mmio_data, 8);
 
1341
                run->mmio.len = vcpu->mmio_size;
 
1342
                run->mmio.is_write = vcpu->mmio_is_write;
 
1343
        }
 
1344
 
 
1345
        if (r) {
 
1346
                if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
 
1347
                        return EMULATE_DONE;
 
1348
                if (!vcpu->mmio_needed) {
 
1349
                        kvm_report_emulation_failure(vcpu, "mmio");
 
1350
                        return EMULATE_FAIL;
 
1351
                }
 
1352
                return EMULATE_DO_MMIO;
 
1353
        }
 
1354
 
 
1355
        kvm_x86_ops->decache_regs(vcpu);
 
1356
        kvm_x86_ops->set_rflags(vcpu, vcpu->emulate_ctxt.eflags);
 
1357
 
 
1358
        if (vcpu->mmio_is_write) {
 
1359
                vcpu->mmio_needed = 0;
 
1360
                return EMULATE_DO_MMIO;
 
1361
        }
 
1362
 
 
1363
        return EMULATE_DONE;
 
1364
}
 
1365
EXPORT_SYMBOL_GPL(emulate_instruction);
 
1366
 
 
1367
static void free_pio_guest_pages(struct kvm_vcpu *vcpu)
 
1368
{
 
1369
        int i;
 
1370
 
 
1371
        for (i = 0; i < ARRAY_SIZE(vcpu->pio.guest_pages); ++i)
 
1372
                if (vcpu->pio.guest_pages[i]) {
 
1373
                        kvm_release_page(vcpu->pio.guest_pages[i]);
 
1374
                        vcpu->pio.guest_pages[i] = NULL;
 
1375
                }
 
1376
}
 
1377
 
 
1378
static int pio_copy_data(struct kvm_vcpu *vcpu)
 
1379
{
 
1380
        void *p = vcpu->pio_data;
 
1381
        void *q;
 
1382
        unsigned bytes;
 
1383
        int nr_pages = vcpu->pio.guest_pages[1] ? 2 : 1;
 
1384
 
 
1385
        q = vmap(vcpu->pio.guest_pages, nr_pages, VM_READ|VM_WRITE,
 
1386
                 PAGE_KERNEL);
 
1387
        if (!q) {
 
1388
                free_pio_guest_pages(vcpu);
 
1389
                return -ENOMEM;
 
1390
        }
 
1391
        q += vcpu->pio.guest_page_offset;
 
1392
        bytes = vcpu->pio.size * vcpu->pio.cur_count;
 
1393
        if (vcpu->pio.in)
 
1394
                memcpy(q, p, bytes);
 
1395
        else
 
1396
                memcpy(p, q, bytes);
 
1397
        q -= vcpu->pio.guest_page_offset;
 
1398
        vunmap(q);
 
1399
        free_pio_guest_pages(vcpu);
 
1400
        return 0;
 
1401
}
 
1402
 
 
1403
int complete_pio(struct kvm_vcpu *vcpu)
 
1404
{
 
1405
        struct kvm_pio_request *io = &vcpu->pio;
 
1406
        long delta;
 
1407
        int r;
 
1408
 
 
1409
        kvm_x86_ops->cache_regs(vcpu);
 
1410
 
 
1411
        if (!io->string) {
 
1412
                if (io->in)
 
1413
                        memcpy(&vcpu->regs[VCPU_REGS_RAX], vcpu->pio_data,
 
1414
                               io->size);
 
1415
        } else {
 
1416
                if (io->in) {
 
1417
                        r = pio_copy_data(vcpu);
 
1418
                        if (r) {
 
1419
                                kvm_x86_ops->cache_regs(vcpu);
 
1420
                                return r;
 
1421
                        }
 
1422
                }
 
1423
 
 
1424
                delta = 1;
 
1425
                if (io->rep) {
 
1426
                        delta *= io->cur_count;
 
1427
                        /*
 
1428
                         * The size of the register should really depend on
 
1429
                         * current address size.
 
1430
                         */
 
1431
                        vcpu->regs[VCPU_REGS_RCX] -= delta;
 
1432
                }
 
1433
                if (io->down)
 
1434
                        delta = -delta;
 
1435
                delta *= io->size;
 
1436
                if (io->in)
 
1437
                        vcpu->regs[VCPU_REGS_RDI] += delta;
 
1438
                else
 
1439
                        vcpu->regs[VCPU_REGS_RSI] += delta;
 
1440
        }
 
1441
 
 
1442
        kvm_x86_ops->decache_regs(vcpu);
 
1443
 
 
1444
        io->count -= io->cur_count;
 
1445
        io->cur_count = 0;
 
1446
 
 
1447
        return 0;
 
1448
}
 
1449
 
 
1450
static void kernel_pio(struct kvm_io_device *pio_dev,
 
1451
                       struct kvm_vcpu *vcpu,
 
1452
                       void *pd)
 
1453
{
 
1454
        /* TODO: String I/O for in kernel device */
 
1455
 
 
1456
        mutex_lock(&vcpu->kvm->lock);
 
1457
        if (vcpu->pio.in)
 
1458
                kvm_iodevice_read(pio_dev, vcpu->pio.port,
 
1459
                                  vcpu->pio.size,
 
1460
                                  pd);
 
1461
        else
 
1462
                kvm_iodevice_write(pio_dev, vcpu->pio.port,
 
1463
                                   vcpu->pio.size,
 
1464
                                   pd);
 
1465
        mutex_unlock(&vcpu->kvm->lock);
 
1466
}
 
1467
 
 
1468
static void pio_string_write(struct kvm_io_device *pio_dev,
 
1469
                             struct kvm_vcpu *vcpu)
 
1470
{
 
1471
        struct kvm_pio_request *io = &vcpu->pio;
 
1472
        void *pd = vcpu->pio_data;
 
1473
        int i;
 
1474
 
 
1475
        mutex_lock(&vcpu->kvm->lock);
 
1476
        for (i = 0; i < io->cur_count; i++) {
 
1477
                kvm_iodevice_write(pio_dev, io->port,
 
1478
                                   io->size,
 
1479
                                   pd);
 
1480
                pd += io->size;
 
1481
        }
 
1482
        mutex_unlock(&vcpu->kvm->lock);
 
1483
}
 
1484
 
 
1485
static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
 
1486
                                               gpa_t addr)
 
1487
{
 
1488
        return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr);
 
1489
}
 
1490
 
 
1491
int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 
1492
                  int size, unsigned port)
 
1493
{
 
1494
        struct kvm_io_device *pio_dev;
 
1495
 
 
1496
        vcpu->run->exit_reason = KVM_EXIT_IO;
 
1497
        vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
 
1498
        vcpu->run->io.size = vcpu->pio.size = size;
 
1499
        vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
 
1500
        vcpu->run->io.count = vcpu->pio.count = vcpu->pio.cur_count = 1;
 
1501
        vcpu->run->io.port = vcpu->pio.port = port;
 
1502
        vcpu->pio.in = in;
 
1503
        vcpu->pio.string = 0;
 
1504
        vcpu->pio.down = 0;
 
1505
        vcpu->pio.guest_page_offset = 0;
 
1506
        vcpu->pio.rep = 0;
 
1507
 
 
1508
        kvm_x86_ops->cache_regs(vcpu);
 
1509
        memcpy(vcpu->pio_data, &vcpu->regs[VCPU_REGS_RAX], 4);
 
1510
        kvm_x86_ops->decache_regs(vcpu);
 
1511
 
 
1512
        kvm_x86_ops->skip_emulated_instruction(vcpu);
 
1513
 
 
1514
        pio_dev = vcpu_find_pio_dev(vcpu, port);
 
1515
        if (pio_dev) {
 
1516
                kernel_pio(pio_dev, vcpu, vcpu->pio_data);
 
1517
                complete_pio(vcpu);
 
1518
                return 1;
 
1519
        }
 
1520
        return 0;
 
1521
}
 
1522
EXPORT_SYMBOL_GPL(kvm_emulate_pio);
 
1523
 
 
1524
int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 
1525
                  int size, unsigned long count, int down,
 
1526
                  gva_t address, int rep, unsigned port)
 
1527
{
 
1528
        unsigned now, in_page;
 
1529
        int i, ret = 0;
 
1530
        int nr_pages = 1;
 
1531
        struct page *page;
 
1532
        struct kvm_io_device *pio_dev;
 
1533
 
 
1534
        vcpu->run->exit_reason = KVM_EXIT_IO;
 
1535
        vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
 
1536
        vcpu->run->io.size = vcpu->pio.size = size;
 
1537
        vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
 
1538
        vcpu->run->io.count = vcpu->pio.count = vcpu->pio.cur_count = count;
 
1539
        vcpu->run->io.port = vcpu->pio.port = port;
 
1540
        vcpu->pio.in = in;
 
1541
        vcpu->pio.string = 1;
 
1542
        vcpu->pio.down = down;
 
1543
        vcpu->pio.guest_page_offset = offset_in_page(address);
 
1544
        vcpu->pio.rep = rep;
 
1545
 
 
1546
        if (!count) {
 
1547
                kvm_x86_ops->skip_emulated_instruction(vcpu);
 
1548
                return 1;
 
1549
        }
 
1550
 
 
1551
        if (!down)
 
1552
                in_page = PAGE_SIZE - offset_in_page(address);
 
1553
        else
 
1554
                in_page = offset_in_page(address) + size;
 
1555
        now = min(count, (unsigned long)in_page / size);
 
1556
        if (!now) {
 
1557
                /*
 
1558
                 * String I/O straddles page boundary.  Pin two guest pages
 
1559
                 * so that we satisfy atomicity constraints.  Do just one
 
1560
                 * transaction to avoid complexity.
 
1561
                 */
 
1562
                nr_pages = 2;
 
1563
                now = 1;
 
1564
        }
 
1565
        if (down) {
 
1566
                /*
 
1567
                 * String I/O in reverse.  Yuck.  Kill the guest, fix later.
 
1568
                 */
 
1569
                pr_unimpl(vcpu, "guest string pio down\n");
 
1570
                inject_gp(vcpu);
 
1571
                return 1;
 
1572
        }
 
1573
        vcpu->run->io.count = now;
 
1574
        vcpu->pio.cur_count = now;
 
1575
 
 
1576
        if (vcpu->pio.cur_count == vcpu->pio.count)
 
1577
                kvm_x86_ops->skip_emulated_instruction(vcpu);
 
1578
 
 
1579
        for (i = 0; i < nr_pages; ++i) {
 
1580
                mutex_lock(&vcpu->kvm->lock);
 
1581
                page = gva_to_page(vcpu, address + i * PAGE_SIZE);
 
1582
                vcpu->pio.guest_pages[i] = page;
 
1583
                mutex_unlock(&vcpu->kvm->lock);
 
1584
                if (!page) {
 
1585
                        inject_gp(vcpu);
 
1586
                        free_pio_guest_pages(vcpu);
 
1587
                        return 1;
 
1588
                }
 
1589
        }
 
1590
 
 
1591
        pio_dev = vcpu_find_pio_dev(vcpu, port);
 
1592
        if (!vcpu->pio.in) {
 
1593
                /* string PIO write */
 
1594
                ret = pio_copy_data(vcpu);
 
1595
                if (ret >= 0 && pio_dev) {
 
1596
                        pio_string_write(pio_dev, vcpu);
 
1597
                        complete_pio(vcpu);
 
1598
                        if (vcpu->pio.count == 0)
 
1599
                                ret = 1;
 
1600
                }
 
1601
        } else if (pio_dev)
 
1602
                pr_unimpl(vcpu, "no string pio read support yet, "
 
1603
                       "port %x size %d count %ld\n",
 
1604
                        port, size, count);
 
1605
 
 
1606
        return ret;
 
1607
}
 
1608
EXPORT_SYMBOL_GPL(kvm_emulate_pio_string);
 
1609
 
318
1610
__init void kvm_arch_init(void)
319
1611
{
320
1612
        kvm_init_msr_list();
321
1613
}
 
1614
 
 
1615
int kvm_emulate_halt(struct kvm_vcpu *vcpu)
 
1616
{
 
1617
        ++vcpu->stat.halt_exits;
 
1618
        if (irqchip_in_kernel(vcpu->kvm)) {
 
1619
                vcpu->mp_state = VCPU_MP_STATE_HALTED;
 
1620
                kvm_vcpu_block(vcpu);
 
1621
                if (vcpu->mp_state != VCPU_MP_STATE_RUNNABLE)
 
1622
                        return -EINTR;
 
1623
                return 1;
 
1624
        } else {
 
1625
                vcpu->run->exit_reason = KVM_EXIT_HLT;
 
1626
                return 0;
 
1627
        }
 
1628
}
 
1629
EXPORT_SYMBOL_GPL(kvm_emulate_halt);
 
1630
 
 
1631
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 
1632
{
 
1633
        unsigned long nr, a0, a1, a2, a3, ret;
 
1634
 
 
1635
        kvm_x86_ops->cache_regs(vcpu);
 
1636
 
 
1637
        nr = vcpu->regs[VCPU_REGS_RAX];
 
1638
        a0 = vcpu->regs[VCPU_REGS_RBX];
 
1639
        a1 = vcpu->regs[VCPU_REGS_RCX];
 
1640
        a2 = vcpu->regs[VCPU_REGS_RDX];
 
1641
        a3 = vcpu->regs[VCPU_REGS_RSI];
 
1642
 
 
1643
        if (!is_long_mode(vcpu)) {
 
1644
                nr &= 0xFFFFFFFF;
 
1645
                a0 &= 0xFFFFFFFF;
 
1646
                a1 &= 0xFFFFFFFF;
 
1647
                a2 &= 0xFFFFFFFF;
 
1648
                a3 &= 0xFFFFFFFF;
 
1649
        }
 
1650
 
 
1651
        switch (nr) {
 
1652
        default:
 
1653
                ret = -KVM_ENOSYS;
 
1654
                break;
 
1655
        }
 
1656
        vcpu->regs[VCPU_REGS_RAX] = ret;
 
1657
        kvm_x86_ops->decache_regs(vcpu);
 
1658
        return 0;
 
1659
}
 
1660
EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
 
1661
 
 
1662
int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
 
1663
{
 
1664
        char instruction[3];
 
1665
        int ret = 0;
 
1666
 
 
1667
        mutex_lock(&vcpu->kvm->lock);
 
1668
 
 
1669
        /*
 
1670
         * Blow out the MMU to ensure that no other VCPU has an active mapping
 
1671
         * to ensure that the updated hypercall appears atomically across all
 
1672
         * VCPUs.
 
1673
         */
 
1674
        kvm_mmu_zap_all(vcpu->kvm);
 
1675
 
 
1676
        kvm_x86_ops->cache_regs(vcpu);
 
1677
        kvm_x86_ops->patch_hypercall(vcpu, instruction);
 
1678
        if (emulator_write_emulated(vcpu->rip, instruction, 3, vcpu)
 
1679
            != X86EMUL_CONTINUE)
 
1680
                ret = -EFAULT;
 
1681
 
 
1682
        mutex_unlock(&vcpu->kvm->lock);
 
1683
 
 
1684
        return ret;
 
1685
}
 
1686
 
 
1687
static u64 mk_cr_64(u64 curr_cr, u32 new_val)
 
1688
{
 
1689
        return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
 
1690
}
 
1691
 
 
1692
void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
 
1693
{
 
1694
        struct descriptor_table dt = { limit, base };
 
1695
 
 
1696
        kvm_x86_ops->set_gdt(vcpu, &dt);
 
1697
}
 
1698
 
 
1699
void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
 
1700
{
 
1701
        struct descriptor_table dt = { limit, base };
 
1702
 
 
1703
        kvm_x86_ops->set_idt(vcpu, &dt);
 
1704
}
 
1705
 
 
1706
void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
 
1707
                   unsigned long *rflags)
 
1708
{
 
1709
        lmsw(vcpu, msw);
 
1710
        *rflags = kvm_x86_ops->get_rflags(vcpu);
 
1711
}
 
1712
 
 
1713
unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
 
1714
{
 
1715
        kvm_x86_ops->decache_cr4_guest_bits(vcpu);
 
1716
        switch (cr) {
 
1717
        case 0:
 
1718
                return vcpu->cr0;
 
1719
        case 2:
 
1720
                return vcpu->cr2;
 
1721
        case 3:
 
1722
                return vcpu->cr3;
 
1723
        case 4:
 
1724
                return vcpu->cr4;
 
1725
        default:
 
1726
                vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr);
 
1727
                return 0;
 
1728
        }
 
1729
}
 
1730
 
 
1731
void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
 
1732
                     unsigned long *rflags)
 
1733
{
 
1734
        switch (cr) {
 
1735
        case 0:
 
1736
                set_cr0(vcpu, mk_cr_64(vcpu->cr0, val));
 
1737
                *rflags = kvm_x86_ops->get_rflags(vcpu);
 
1738
                break;
 
1739
        case 2:
 
1740
                vcpu->cr2 = val;
 
1741
                break;
 
1742
        case 3:
 
1743
                set_cr3(vcpu, val);
 
1744
                break;
 
1745
        case 4:
 
1746
                set_cr4(vcpu, mk_cr_64(vcpu->cr4, val));
 
1747
                break;
 
1748
        default:
 
1749
                vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr);
 
1750
        }
 
1751
}
 
1752
 
 
1753
void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
 
1754
{
 
1755
        int i;
 
1756
        u32 function;
 
1757
        struct kvm_cpuid_entry *e, *best;
 
1758
 
 
1759
        kvm_x86_ops->cache_regs(vcpu);
 
1760
        function = vcpu->regs[VCPU_REGS_RAX];
 
1761
        vcpu->regs[VCPU_REGS_RAX] = 0;
 
1762
        vcpu->regs[VCPU_REGS_RBX] = 0;
 
1763
        vcpu->regs[VCPU_REGS_RCX] = 0;
 
1764
        vcpu->regs[VCPU_REGS_RDX] = 0;
 
1765
        best = NULL;
 
1766
        for (i = 0; i < vcpu->cpuid_nent; ++i) {
 
1767
                e = &vcpu->cpuid_entries[i];
 
1768
                if (e->function == function) {
 
1769
                        best = e;
 
1770
                        break;
 
1771
                }
 
1772
                /*
 
1773
                 * Both basic or both extended?
 
1774
                 */
 
1775
                if (((e->function ^ function) & 0x80000000) == 0)
 
1776
                        if (!best || e->function > best->function)
 
1777
                                best = e;
 
1778
        }
 
1779
        if (best) {
 
1780
                vcpu->regs[VCPU_REGS_RAX] = best->eax;
 
1781
                vcpu->regs[VCPU_REGS_RBX] = best->ebx;
 
1782
                vcpu->regs[VCPU_REGS_RCX] = best->ecx;
 
1783
                vcpu->regs[VCPU_REGS_RDX] = best->edx;
 
1784
        }
 
1785
        kvm_x86_ops->decache_regs(vcpu);
 
1786
        kvm_x86_ops->skip_emulated_instruction(vcpu);
 
1787
}
 
1788
EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
 
1789
 
 
1790
/*
 
1791
 * Check if userspace requested an interrupt window, and that the
 
1792
 * interrupt window is open.
 
1793
 *
 
1794
 * No need to exit to userspace if we already have an interrupt queued.
 
1795
 */
 
1796
static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
 
1797
                                          struct kvm_run *kvm_run)
 
1798
{
 
1799
        return (!vcpu->irq_summary &&
 
1800
                kvm_run->request_interrupt_window &&
 
1801
                vcpu->interrupt_window_open &&
 
1802
                (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF));
 
1803
}
 
1804
 
 
1805
static void post_kvm_run_save(struct kvm_vcpu *vcpu,
 
1806
                              struct kvm_run *kvm_run)
 
1807
{
 
1808
        kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
 
1809
        kvm_run->cr8 = get_cr8(vcpu);
 
1810
        kvm_run->apic_base = kvm_get_apic_base(vcpu);
 
1811
        if (irqchip_in_kernel(vcpu->kvm))
 
1812
                kvm_run->ready_for_interrupt_injection = 1;
 
1813
        else
 
1814
                kvm_run->ready_for_interrupt_injection =
 
1815
                                        (vcpu->interrupt_window_open &&
 
1816
                                         vcpu->irq_summary == 0);
 
1817
}
 
1818
 
 
1819
static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
1820
{
 
1821
        int r;
 
1822
 
 
1823
        if (unlikely(vcpu->mp_state == VCPU_MP_STATE_SIPI_RECEIVED)) {
 
1824
                pr_debug("vcpu %d received sipi with vector # %x\n",
 
1825
                       vcpu->vcpu_id, vcpu->sipi_vector);
 
1826
                kvm_lapic_reset(vcpu);
 
1827
                r = kvm_x86_ops->vcpu_reset(vcpu);
 
1828
                if (r)
 
1829
                        return r;
 
1830
                vcpu->mp_state = VCPU_MP_STATE_RUNNABLE;
 
1831
        }
 
1832
 
 
1833
preempted:
 
1834
        if (vcpu->guest_debug.enabled)
 
1835
                kvm_x86_ops->guest_debug_pre(vcpu);
 
1836
 
 
1837
again:
 
1838
        r = kvm_mmu_reload(vcpu);
 
1839
        if (unlikely(r))
 
1840
                goto out;
 
1841
 
 
1842
        kvm_inject_pending_timer_irqs(vcpu);
 
1843
 
 
1844
        preempt_disable();
 
1845
        in_special_section();
 
1846
 
 
1847
        kvm_x86_ops->prepare_guest_switch(vcpu);
 
1848
        kvm_load_guest_fpu(vcpu);
 
1849
 
 
1850
        local_irq_disable();
 
1851
 
 
1852
        if (signal_pending(current)) {
 
1853
                local_irq_enable();
 
1854
                preempt_enable();
 
1855
                r = -EINTR;
 
1856
                kvm_run->exit_reason = KVM_EXIT_INTR;
 
1857
                ++vcpu->stat.signal_exits;
 
1858
                goto out;
 
1859
        }
 
1860
 
 
1861
        if (irqchip_in_kernel(vcpu->kvm))
 
1862
                kvm_x86_ops->inject_pending_irq(vcpu);
 
1863
        else if (!vcpu->mmio_read_completed)
 
1864
                kvm_x86_ops->inject_pending_vectors(vcpu, kvm_run);
 
1865
 
 
1866
        vcpu->guest_mode = 1;
 
1867
        kvm_guest_enter();
 
1868
 
 
1869
        if (vcpu->requests)
 
1870
                if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
 
1871
                        kvm_x86_ops->tlb_flush(vcpu);
 
1872
 
 
1873
        kvm_x86_ops->run(vcpu, kvm_run);
 
1874
        special_reload_dr7();
 
1875
 
 
1876
        vcpu->guest_mode = 0;
 
1877
        local_irq_enable();
 
1878
 
 
1879
        ++vcpu->stat.exits;
 
1880
 
 
1881
        /*
 
1882
         * We must have an instruction between local_irq_enable() and
 
1883
         * kvm_guest_exit(), so the timer interrupt isn't delayed by
 
1884
         * the interrupt shadow.  The stat.exits increment will do nicely.
 
1885
         * But we need to prevent reordering, hence this barrier():
 
1886
         */
 
1887
        barrier();
 
1888
 
 
1889
        kvm_guest_exit();
 
1890
 
 
1891
        preempt_enable();
 
1892
 
 
1893
        /*
 
1894
         * Profile KVM exit RIPs:
 
1895
         */
 
1896
        if (unlikely(prof_on == KVM_PROFILING)) {
 
1897
                kvm_x86_ops->cache_regs(vcpu);
 
1898
                profile_hit(KVM_PROFILING, (void *)vcpu->rip);
 
1899
        }
 
1900
 
 
1901
        r = kvm_x86_ops->handle_exit(kvm_run, vcpu);
 
1902
 
 
1903
        if (r > 0) {
 
1904
                if (dm_request_for_irq_injection(vcpu, kvm_run)) {
 
1905
                        r = -EINTR;
 
1906
                        kvm_run->exit_reason = KVM_EXIT_INTR;
 
1907
                        ++vcpu->stat.request_irq_exits;
 
1908
                        goto out;
 
1909
                }
 
1910
                if (!need_resched()) {
 
1911
                        ++vcpu->stat.light_exits;
 
1912
                        goto again;
 
1913
                }
 
1914
        }
 
1915
 
 
1916
out:
 
1917
        if (r > 0) {
 
1918
                kvm_resched(vcpu);
 
1919
                goto preempted;
 
1920
        }
 
1921
 
 
1922
        post_kvm_run_save(vcpu, kvm_run);
 
1923
 
 
1924
        return r;
 
1925
}
 
1926
 
 
1927
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
1928
{
 
1929
        int r;
 
1930
        sigset_t sigsaved;
 
1931
 
 
1932
        vcpu_load(vcpu);
 
1933
 
 
1934
        if (unlikely(vcpu->mp_state == VCPU_MP_STATE_UNINITIALIZED)) {
 
1935
                kvm_vcpu_block(vcpu);
 
1936
                vcpu_put(vcpu);
 
1937
                return -EAGAIN;
 
1938
        }
 
1939
 
 
1940
        if (vcpu->sigset_active)
 
1941
                sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
 
1942
 
 
1943
        /* re-sync apic's tpr */
 
1944
        if (!irqchip_in_kernel(vcpu->kvm))
 
1945
                set_cr8(vcpu, kvm_run->cr8);
 
1946
 
 
1947
        if (vcpu->pio.cur_count) {
 
1948
                r = complete_pio(vcpu);
 
1949
                if (r)
 
1950
                        goto out;
 
1951
        }
 
1952
#if CONFIG_HAS_IOMEM
 
1953
        if (vcpu->mmio_needed) {
 
1954
                memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
 
1955
                vcpu->mmio_read_completed = 1;
 
1956
                vcpu->mmio_needed = 0;
 
1957
                r = emulate_instruction(vcpu, kvm_run,
 
1958
                                        vcpu->mmio_fault_cr2, 0, 1);
 
1959
                if (r == EMULATE_DO_MMIO) {
 
1960
                        /*
 
1961
                         * Read-modify-write.  Back to userspace.
 
1962
                         */
 
1963
                        r = 0;
 
1964
                        goto out;
 
1965
                }
 
1966
        }
 
1967
#endif
 
1968
        if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) {
 
1969
                kvm_x86_ops->cache_regs(vcpu);
 
1970
                vcpu->regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret;
 
1971
                kvm_x86_ops->decache_regs(vcpu);
 
1972
        }
 
1973
 
 
1974
        r = __vcpu_run(vcpu, kvm_run);
 
1975
 
 
1976
out:
 
1977
        if (vcpu->sigset_active)
 
1978
                sigprocmask(SIG_SETMASK, &sigsaved, NULL);
 
1979
 
 
1980
        vcpu_put(vcpu);
 
1981
        return r;
 
1982
}
 
1983
 
 
1984
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 
1985
{
 
1986
        vcpu_load(vcpu);
 
1987
 
 
1988
        kvm_x86_ops->cache_regs(vcpu);
 
1989
 
 
1990
        regs->rax = vcpu->regs[VCPU_REGS_RAX];
 
1991
        regs->rbx = vcpu->regs[VCPU_REGS_RBX];
 
1992
        regs->rcx = vcpu->regs[VCPU_REGS_RCX];
 
1993
        regs->rdx = vcpu->regs[VCPU_REGS_RDX];
 
1994
        regs->rsi = vcpu->regs[VCPU_REGS_RSI];
 
1995
        regs->rdi = vcpu->regs[VCPU_REGS_RDI];
 
1996
        regs->rsp = vcpu->regs[VCPU_REGS_RSP];
 
1997
        regs->rbp = vcpu->regs[VCPU_REGS_RBP];
 
1998
#ifdef CONFIG_X86_64
 
1999
        regs->r8 = vcpu->regs[VCPU_REGS_R8];
 
2000
        regs->r9 = vcpu->regs[VCPU_REGS_R9];
 
2001
        regs->r10 = vcpu->regs[VCPU_REGS_R10];
 
2002
        regs->r11 = vcpu->regs[VCPU_REGS_R11];
 
2003
        regs->r12 = vcpu->regs[VCPU_REGS_R12];
 
2004
        regs->r13 = vcpu->regs[VCPU_REGS_R13];
 
2005
        regs->r14 = vcpu->regs[VCPU_REGS_R14];
 
2006
        regs->r15 = vcpu->regs[VCPU_REGS_R15];
 
2007
#endif
 
2008
 
 
2009
        regs->rip = vcpu->rip;
 
2010
        regs->rflags = kvm_x86_ops->get_rflags(vcpu);
 
2011
 
 
2012
        /*
 
2013
         * Don't leak debug flags in case they were set for guest debugging
 
2014
         */
 
2015
        if (vcpu->guest_debug.enabled && vcpu->guest_debug.singlestep)
 
2016
                regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
 
2017
 
 
2018
        vcpu_put(vcpu);
 
2019
 
 
2020
        return 0;
 
2021
}
 
2022
 
 
2023
int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 
2024
{
 
2025
        vcpu_load(vcpu);
 
2026
 
 
2027
        vcpu->regs[VCPU_REGS_RAX] = regs->rax;
 
2028
        vcpu->regs[VCPU_REGS_RBX] = regs->rbx;
 
2029
        vcpu->regs[VCPU_REGS_RCX] = regs->rcx;
 
2030
        vcpu->regs[VCPU_REGS_RDX] = regs->rdx;
 
2031
        vcpu->regs[VCPU_REGS_RSI] = regs->rsi;
 
2032
        vcpu->regs[VCPU_REGS_RDI] = regs->rdi;
 
2033
        vcpu->regs[VCPU_REGS_RSP] = regs->rsp;
 
2034
        vcpu->regs[VCPU_REGS_RBP] = regs->rbp;
 
2035
#ifdef CONFIG_X86_64
 
2036
        vcpu->regs[VCPU_REGS_R8] = regs->r8;
 
2037
        vcpu->regs[VCPU_REGS_R9] = regs->r9;
 
2038
        vcpu->regs[VCPU_REGS_R10] = regs->r10;
 
2039
        vcpu->regs[VCPU_REGS_R11] = regs->r11;
 
2040
        vcpu->regs[VCPU_REGS_R12] = regs->r12;
 
2041
        vcpu->regs[VCPU_REGS_R13] = regs->r13;
 
2042
        vcpu->regs[VCPU_REGS_R14] = regs->r14;
 
2043
        vcpu->regs[VCPU_REGS_R15] = regs->r15;
 
2044
#endif
 
2045
 
 
2046
        vcpu->rip = regs->rip;
 
2047
        kvm_x86_ops->set_rflags(vcpu, regs->rflags);
 
2048
 
 
2049
        kvm_x86_ops->decache_regs(vcpu);
 
2050
 
 
2051
        vcpu_put(vcpu);
 
2052
 
 
2053
        return 0;
 
2054
}
 
2055
 
 
2056
static void get_segment(struct kvm_vcpu *vcpu,
 
2057
                        struct kvm_segment *var, int seg)
 
2058
{
 
2059
        return kvm_x86_ops->get_segment(vcpu, var, seg);
 
2060
}
 
2061
 
 
2062
void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
 
2063
{
 
2064
        struct kvm_segment cs;
 
2065
 
 
2066
        get_segment(vcpu, &cs, VCPU_SREG_CS);
 
2067
        *db = cs.db;
 
2068
        *l = cs.l;
 
2069
}
 
2070
EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
 
2071
 
 
2072
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 
2073
                                  struct kvm_sregs *sregs)
 
2074
{
 
2075
        struct descriptor_table dt;
 
2076
        int pending_vec;
 
2077
 
 
2078
        vcpu_load(vcpu);
 
2079
 
 
2080
        get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
 
2081
        get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
 
2082
        get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
 
2083
        get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
 
2084
        get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
 
2085
        get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
 
2086
 
 
2087
        get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
 
2088
        get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
 
2089
 
 
2090
        kvm_x86_ops->get_idt(vcpu, &dt);
 
2091
        sregs->idt.limit = dt.limit;
 
2092
        sregs->idt.base = dt.base;
 
2093
        kvm_x86_ops->get_gdt(vcpu, &dt);
 
2094
        sregs->gdt.limit = dt.limit;
 
2095
        sregs->gdt.base = dt.base;
 
2096
 
 
2097
        kvm_x86_ops->decache_cr4_guest_bits(vcpu);
 
2098
        sregs->cr0 = vcpu->cr0;
 
2099
        sregs->cr2 = vcpu->cr2;
 
2100
        sregs->cr3 = vcpu->cr3;
 
2101
        sregs->cr4 = vcpu->cr4;
 
2102
        sregs->cr8 = get_cr8(vcpu);
 
2103
        sregs->efer = vcpu->shadow_efer;
 
2104
        sregs->apic_base = kvm_get_apic_base(vcpu);
 
2105
 
 
2106
        if (irqchip_in_kernel(vcpu->kvm)) {
 
2107
                memset(sregs->interrupt_bitmap, 0,
 
2108
                       sizeof sregs->interrupt_bitmap);
 
2109
                pending_vec = kvm_x86_ops->get_irq(vcpu);
 
2110
                if (pending_vec >= 0)
 
2111
                        set_bit(pending_vec,
 
2112
                                (unsigned long *)sregs->interrupt_bitmap);
 
2113
        } else
 
2114
                memcpy(sregs->interrupt_bitmap, vcpu->irq_pending,
 
2115
                       sizeof sregs->interrupt_bitmap);
 
2116
 
 
2117
        vcpu_put(vcpu);
 
2118
 
 
2119
        return 0;
 
2120
}
 
2121
 
 
2122
static void set_segment(struct kvm_vcpu *vcpu,
 
2123
                        struct kvm_segment *var, int seg)
 
2124
{
 
2125
        return kvm_x86_ops->set_segment(vcpu, var, seg);
 
2126
}
 
2127
 
 
2128
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 
2129
                                  struct kvm_sregs *sregs)
 
2130
{
 
2131
        int mmu_reset_needed = 0;
 
2132
        int i, pending_vec, max_bits;
 
2133
        struct descriptor_table dt;
 
2134
 
 
2135
        vcpu_load(vcpu);
 
2136
 
 
2137
        dt.limit = sregs->idt.limit;
 
2138
        dt.base = sregs->idt.base;
 
2139
        kvm_x86_ops->set_idt(vcpu, &dt);
 
2140
        dt.limit = sregs->gdt.limit;
 
2141
        dt.base = sregs->gdt.base;
 
2142
        kvm_x86_ops->set_gdt(vcpu, &dt);
 
2143
 
 
2144
        vcpu->cr2 = sregs->cr2;
 
2145
        mmu_reset_needed |= vcpu->cr3 != sregs->cr3;
 
2146
        vcpu->cr3 = sregs->cr3;
 
2147
 
 
2148
        set_cr8(vcpu, sregs->cr8);
 
2149
 
 
2150
        mmu_reset_needed |= vcpu->shadow_efer != sregs->efer;
 
2151
#ifdef CONFIG_X86_64
 
2152
        kvm_x86_ops->set_efer(vcpu, sregs->efer);
 
2153
#endif
 
2154
        kvm_set_apic_base(vcpu, sregs->apic_base);
 
2155
 
 
2156
        kvm_x86_ops->decache_cr4_guest_bits(vcpu);
 
2157
 
 
2158
        mmu_reset_needed |= vcpu->cr0 != sregs->cr0;
 
2159
        vcpu->cr0 = sregs->cr0;
 
2160
        kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
 
2161
 
 
2162
        mmu_reset_needed |= vcpu->cr4 != sregs->cr4;
 
2163
        kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
 
2164
        if (!is_long_mode(vcpu) && is_pae(vcpu))
 
2165
                load_pdptrs(vcpu, vcpu->cr3);
 
2166
 
 
2167
        if (mmu_reset_needed)
 
2168
                kvm_mmu_reset_context(vcpu);
 
2169
 
 
2170
        if (!irqchip_in_kernel(vcpu->kvm)) {
 
2171
                memcpy(vcpu->irq_pending, sregs->interrupt_bitmap,
 
2172
                       sizeof vcpu->irq_pending);
 
2173
                vcpu->irq_summary = 0;
 
2174
                for (i = 0; i < ARRAY_SIZE(vcpu->irq_pending); ++i)
 
2175
                        if (vcpu->irq_pending[i])
 
2176
                                __set_bit(i, &vcpu->irq_summary);
 
2177
        } else {
 
2178
                max_bits = (sizeof sregs->interrupt_bitmap) << 3;
 
2179
                pending_vec = find_first_bit(
 
2180
                        (const unsigned long *)sregs->interrupt_bitmap,
 
2181
                        max_bits);
 
2182
                /* Only pending external irq is handled here */
 
2183
                if (pending_vec < max_bits) {
 
2184
                        kvm_x86_ops->set_irq(vcpu, pending_vec);
 
2185
                        pr_debug("Set back pending irq %d\n",
 
2186
                                 pending_vec);
 
2187
                }
 
2188
        }
 
2189
 
 
2190
        set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
 
2191
        set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
 
2192
        set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
 
2193
        set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
 
2194
        set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
 
2195
        set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
 
2196
 
 
2197
        set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
 
2198
        set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
 
2199
 
 
2200
        vcpu_put(vcpu);
 
2201
 
 
2202
        return 0;
 
2203
}
 
2204
 
 
2205
int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
 
2206
                                    struct kvm_debug_guest *dbg)
 
2207
{
 
2208
        int r;
 
2209
 
 
2210
        vcpu_load(vcpu);
 
2211
 
 
2212
        r = kvm_x86_ops->set_guest_debug(vcpu, dbg);
 
2213
 
 
2214
        vcpu_put(vcpu);
 
2215
 
 
2216
        return r;
 
2217
}
 
2218
 
 
2219
/*
 
2220
 * fxsave fpu state.  Taken from x86_64/processor.h.  To be killed when
 
2221
 * we have asm/x86/processor.h
 
2222
 */
 
2223
struct fxsave {
 
2224
        u16     cwd;
 
2225
        u16     swd;
 
2226
        u16     twd;
 
2227
        u16     fop;
 
2228
        u64     rip;
 
2229
        u64     rdp;
 
2230
        u32     mxcsr;
 
2231
        u32     mxcsr_mask;
 
2232
        u32     st_space[32];   /* 8*16 bytes for each FP-reg = 128 bytes */
 
2233
#ifdef CONFIG_X86_64
 
2234
        u32     xmm_space[64];  /* 16*16 bytes for each XMM-reg = 256 bytes */
 
2235
#else
 
2236
        u32     xmm_space[32];  /* 8*16 bytes for each XMM-reg = 128 bytes */
 
2237
#endif
 
2238
};
 
2239
 
 
2240
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 
2241
{
 
2242
        struct fxsave *fxsave = (struct fxsave *)&vcpu->guest_fx_image;
 
2243
 
 
2244
        vcpu_load(vcpu);
 
2245
 
 
2246
        memcpy(fpu->fpr, fxsave->st_space, 128);
 
2247
        fpu->fcw = fxsave->cwd;
 
2248
        fpu->fsw = fxsave->swd;
 
2249
        fpu->ftwx = fxsave->twd;
 
2250
        fpu->last_opcode = fxsave->fop;
 
2251
        fpu->last_ip = fxsave->rip;
 
2252
        fpu->last_dp = fxsave->rdp;
 
2253
        memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
 
2254
 
 
2255
        vcpu_put(vcpu);
 
2256
 
 
2257
        return 0;
 
2258
}
 
2259
 
 
2260
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 
2261
{
 
2262
        struct fxsave *fxsave = (struct fxsave *)&vcpu->guest_fx_image;
 
2263
 
 
2264
        vcpu_load(vcpu);
 
2265
 
 
2266
        memcpy(fxsave->st_space, fpu->fpr, 128);
 
2267
        fxsave->cwd = fpu->fcw;
 
2268
        fxsave->swd = fpu->fsw;
 
2269
        fxsave->twd = fpu->ftwx;
 
2270
        fxsave->fop = fpu->last_opcode;
 
2271
        fxsave->rip = fpu->last_ip;
 
2272
        fxsave->rdp = fpu->last_dp;
 
2273
        memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
 
2274
 
 
2275
        vcpu_put(vcpu);
 
2276
 
 
2277
        return 0;
 
2278
}
 
2279
 
 
2280
void fx_init(struct kvm_vcpu *vcpu)
 
2281
{
 
2282
        unsigned after_mxcsr_mask;
 
2283
 
 
2284
        /* Initialize guest FPU by resetting ours and saving into guest's */
 
2285
        preempt_disable();
 
2286
        fx_save(&vcpu->host_fx_image);
 
2287
        fpu_init();
 
2288
        fx_save(&vcpu->guest_fx_image);
 
2289
        fx_restore(&vcpu->host_fx_image);
 
2290
        preempt_enable();
 
2291
 
 
2292
        vcpu->cr0 |= X86_CR0_ET;
 
2293
        after_mxcsr_mask = offsetof(struct i387_fxsave_struct, st_space);
 
2294
        vcpu->guest_fx_image.mxcsr = 0x1f80;
 
2295
        memset((void *)&vcpu->guest_fx_image + after_mxcsr_mask,
 
2296
               0, sizeof(struct i387_fxsave_struct) - after_mxcsr_mask);
 
2297
}
 
2298
EXPORT_SYMBOL_GPL(fx_init);
 
2299
 
 
2300
void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 
2301
{
 
2302
        if (!vcpu->fpu_active || vcpu->guest_fpu_loaded)
 
2303
                return;
 
2304
 
 
2305
        vcpu->guest_fpu_loaded = 1;
 
2306
        fx_save(&vcpu->host_fx_image);
 
2307
        fx_restore(&vcpu->guest_fx_image);
 
2308
}
 
2309
EXPORT_SYMBOL_GPL(kvm_load_guest_fpu);
 
2310
 
 
2311
void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 
2312
{
 
2313
        if (!vcpu->guest_fpu_loaded)
 
2314
                return;
 
2315
 
 
2316
        vcpu->guest_fpu_loaded = 0;
 
2317
        fx_save(&vcpu->guest_fx_image);
 
2318
        fx_restore(&vcpu->host_fx_image);
 
2319
}
 
2320
EXPORT_SYMBOL_GPL(kvm_put_guest_fpu);