~ubuntu-branches/ubuntu/hardy/kvm/hardy-backports

« back to all changes in this revision

Viewing changes to kernel/vmx.c

  • Committer: Bazaar Package Importer
  • Author(s): Soren Hansen
  • Date: 2007-11-15 02:21:55 UTC
  • mfrom: (1.1.10 upstream)
  • Revision ID: james.westby@ubuntu.com-20071115022155-pxoxb8kfcrkn72mi
Tags: 1:52+dfsg-0ubuntu1
* New upstream release.
* 08_default_tdf.patch
  - Make -tdf the default and add a -no-tdf option.

Show diffs side-by-side

added added

removed removed

Lines of Context:
16
16
 */
17
17
 
18
18
#include "kvm.h"
 
19
#include "x86.h"
19
20
#include "x86_emulate.h"
20
21
#include "irq.h"
21
22
#include "vmx.h"
31
32
#include <asm/io.h>
32
33
#include <asm/desc.h>
33
34
 
34
 
MODULE_INFO(version, "kvm-48");
 
35
MODULE_INFO(version, "kvm-52");
35
36
MODULE_AUTHOR("Qumranet");
36
37
MODULE_LICENSE("GPL");
37
38
 
86
87
        u32 revision_id;
87
88
        u32 pin_based_exec_ctrl;
88
89
        u32 cpu_based_exec_ctrl;
 
90
        u32 cpu_based_2nd_exec_ctrl;
89
91
        u32 vmexit_ctrl;
90
92
        u32 vmentry_ctrl;
91
93
} vmcs_config;
179
181
        return ((cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm)));
180
182
}
181
183
 
 
184
static inline int cpu_has_secondary_exec_ctrls(void)
 
185
{
 
186
        return (vmcs_config.cpu_based_exec_ctrl &
 
187
                CPU_BASED_ACTIVATE_SECONDARY_CONTROLS);
 
188
}
 
189
 
 
190
static inline int vm_need_secondary_exec_ctrls(struct kvm *kvm)
 
191
{
 
192
        return ((cpu_has_secondary_exec_ctrls()) && (irqchip_in_kernel(kvm)));
 
193
}
 
194
 
 
195
static inline int cpu_has_vmx_virtualize_apic_accesses(void)
 
196
{
 
197
        return (vmcs_config.cpu_based_2nd_exec_ctrl &
 
198
                SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
 
199
}
 
200
 
 
201
static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
 
202
{
 
203
        return ((cpu_has_vmx_virtualize_apic_accesses()) &&
 
204
                (irqchip_in_kernel(kvm)));
 
205
}
 
206
 
182
207
static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
183
208
{
184
209
        int i;
350
375
        u64 guest_efer = vmx->guest_msrs[efer_offset].data;
351
376
        u64 ignore_bits;
352
377
 
 
378
        if (efer_offset < 0)
 
379
                return;
353
380
        /*
354
381
         * NX is emulated; LMA and LME handled by hardware; SCE meaninless
355
382
         * outside long mode
922
949
        u32 min, opt;
923
950
        u32 _pin_based_exec_control = 0;
924
951
        u32 _cpu_based_exec_control = 0;
 
952
        u32 _cpu_based_2nd_exec_control = 0;
925
953
        u32 _vmexit_control = 0;
926
954
        u32 _vmentry_control = 0;
927
955
 
939
967
              CPU_BASED_USE_IO_BITMAPS |
940
968
              CPU_BASED_MOV_DR_EXITING |
941
969
              CPU_BASED_USE_TSC_OFFSETING;
942
 
#ifdef CONFIG_X86_64
943
 
        opt = CPU_BASED_TPR_SHADOW;
944
 
#else
945
 
        opt = 0;
946
 
#endif
 
970
        opt = CPU_BASED_TPR_SHADOW |
 
971
              CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
947
972
        if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS,
948
973
                                &_cpu_based_exec_control) < 0)
949
974
                return -EIO;
952
977
                _cpu_based_exec_control &= ~CPU_BASED_CR8_LOAD_EXITING &
953
978
                                           ~CPU_BASED_CR8_STORE_EXITING;
954
979
#endif
 
980
        if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) {
 
981
                min = 0;
 
982
                opt = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
 
983
                        SECONDARY_EXEC_WBINVD_EXITING;
 
984
                if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS2,
 
985
                                        &_cpu_based_2nd_exec_control) < 0)
 
986
                        return -EIO;
 
987
        }
 
988
#ifndef CONFIG_X86_64
 
989
        if (!(_cpu_based_2nd_exec_control &
 
990
                                SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
 
991
                _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW;
 
992
#endif
955
993
 
956
994
        min = 0;
957
995
#ifdef CONFIG_X86_64
989
1027
 
990
1028
        vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control;
991
1029
        vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control;
 
1030
        vmcs_conf->cpu_based_2nd_exec_ctrl = _cpu_based_2nd_exec_control;
992
1031
        vmcs_conf->vmexit_ctrl         = _vmexit_control;
993
1032
        vmcs_conf->vmentry_ctrl        = _vmentry_control;
994
1033
 
1109
1148
 
1110
1149
static gva_t rmode_tss_base(struct kvm *kvm)
1111
1150
{
1112
 
        gfn_t base_gfn = kvm->memslots[0].base_gfn + kvm->memslots[0].npages - 3;
1113
 
        return base_gfn << PAGE_SHIFT;
 
1151
        if (!kvm->tss_addr) {
 
1152
                gfn_t base_gfn = kvm->memslots[0].base_gfn +
 
1153
                                 kvm->memslots[0].npages - 3;
 
1154
                return base_gfn << PAGE_SHIFT;
 
1155
        }
 
1156
        return kvm->tss_addr;
1114
1157
}
1115
1158
 
1116
1159
static void fix_rmode_seg(int seg, struct kvm_save_segment *save)
1427
1470
        vmcs_write32(sf->ar_bytes, 0x93);
1428
1471
}
1429
1472
 
 
1473
static int alloc_apic_access_page(struct kvm *kvm)
 
1474
{
 
1475
        struct kvm_userspace_memory_region kvm_userspace_mem;
 
1476
        int r = 0;
 
1477
 
 
1478
        mutex_lock(&kvm->lock);
 
1479
        if (kvm->apic_access_page)
 
1480
                goto out;
 
1481
        kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
 
1482
        kvm_userspace_mem.flags = 0;
 
1483
        kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL;
 
1484
        kvm_userspace_mem.memory_size = PAGE_SIZE;
 
1485
        r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0);
 
1486
        if (r)
 
1487
                goto out;
 
1488
        kvm->apic_access_page = gfn_to_page(kvm, 0xfee00);
 
1489
out:
 
1490
        mutex_unlock(&kvm->lock);
 
1491
        return r;
 
1492
}
 
1493
 
1430
1494
/*
1431
1495
 * Sets up the vmcs for emulated real mode.
1432
1496
 */
1437
1501
        unsigned long a;
1438
1502
        struct descriptor_table dt;
1439
1503
        int i;
1440
 
        int ret = 0;
1441
1504
        unsigned long kvm_vmx_return;
1442
 
        u64 msr;
1443
1505
        u32 exec_control;
1444
1506
 
1445
 
        if (!init_rmode_tss(vmx->vcpu.kvm)) {
1446
 
                ret = -ENOMEM;
1447
 
                goto out;
1448
 
        }
1449
 
 
1450
 
        vmx->vcpu.rmode.active = 0;
1451
 
 
1452
 
        vmx->vcpu.regs[VCPU_REGS_RDX] = get_rdx_init_val();
1453
 
        set_cr8(&vmx->vcpu, 0);
1454
 
        msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
1455
 
        if (vmx->vcpu.vcpu_id == 0)
1456
 
                msr |= MSR_IA32_APICBASE_BSP;
1457
 
        kvm_set_apic_base(&vmx->vcpu, msr);
1458
 
 
1459
 
        fx_init(&vmx->vcpu);
1460
 
 
1461
 
        /*
1462
 
         * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode
1463
 
         * insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4.  Sigh.
1464
 
         */
1465
 
        if (vmx->vcpu.vcpu_id == 0) {
1466
 
                vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
1467
 
                vmcs_writel(GUEST_CS_BASE, 0x000f0000);
1468
 
        } else {
1469
 
                vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.sipi_vector << 8);
1470
 
                vmcs_writel(GUEST_CS_BASE, vmx->vcpu.sipi_vector << 12);
1471
 
        }
1472
 
        vmcs_write32(GUEST_CS_LIMIT, 0xffff);
1473
 
        vmcs_write32(GUEST_CS_AR_BYTES, 0x9b);
1474
 
 
1475
 
        seg_setup(VCPU_SREG_DS);
1476
 
        seg_setup(VCPU_SREG_ES);
1477
 
        seg_setup(VCPU_SREG_FS);
1478
 
        seg_setup(VCPU_SREG_GS);
1479
 
        seg_setup(VCPU_SREG_SS);
1480
 
 
1481
 
        vmcs_write16(GUEST_TR_SELECTOR, 0);
1482
 
        vmcs_writel(GUEST_TR_BASE, 0);
1483
 
        vmcs_write32(GUEST_TR_LIMIT, 0xffff);
1484
 
        vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
1485
 
 
1486
 
        vmcs_write16(GUEST_LDTR_SELECTOR, 0);
1487
 
        vmcs_writel(GUEST_LDTR_BASE, 0);
1488
 
        vmcs_write32(GUEST_LDTR_LIMIT, 0xffff);
1489
 
        vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082);
1490
 
 
1491
 
        vmcs_write32(GUEST_SYSENTER_CS, 0);
1492
 
        vmcs_writel(GUEST_SYSENTER_ESP, 0);
1493
 
        vmcs_writel(GUEST_SYSENTER_EIP, 0);
1494
 
 
1495
 
        vmcs_writel(GUEST_RFLAGS, 0x02);
1496
 
        if (vmx->vcpu.vcpu_id == 0)
1497
 
                vmcs_writel(GUEST_RIP, 0xfff0);
1498
 
        else
1499
 
                vmcs_writel(GUEST_RIP, 0);
1500
 
        vmcs_writel(GUEST_RSP, 0);
1501
 
 
1502
 
        /* todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0 */
1503
 
        vmcs_writel(GUEST_DR7, 0x400);
1504
 
 
1505
 
        vmcs_writel(GUEST_GDTR_BASE, 0);
1506
 
        vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
1507
 
 
1508
 
        vmcs_writel(GUEST_IDTR_BASE, 0);
1509
 
        vmcs_write32(GUEST_IDTR_LIMIT, 0xffff);
1510
 
 
1511
 
        vmcs_write32(GUEST_ACTIVITY_STATE, 0);
1512
 
        vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
1513
 
        vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0);
1514
 
 
1515
1507
        /* I/O */
1516
1508
        vmcs_write64(IO_BITMAP_A, page_to_phys(vmx_io_bitmap_a));
1517
1509
        vmcs_write64(IO_BITMAP_B, page_to_phys(vmx_io_bitmap_b));
1518
1510
 
1519
 
        guest_write_tsc(0);
1520
 
 
1521
1511
        vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
1522
1512
 
1523
 
        /* Special registers */
1524
 
        vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
1525
 
 
1526
1513
        /* Control */
1527
1514
        vmcs_write32(PIN_BASED_VM_EXEC_CONTROL,
1528
1515
                vmcs_config.pin_based_exec_ctrl);
1535
1522
                                CPU_BASED_CR8_LOAD_EXITING;
1536
1523
#endif
1537
1524
        }
 
1525
        if (!vm_need_secondary_exec_ctrls(vmx->vcpu.kvm))
 
1526
                exec_control &= ~CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
1538
1527
        vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control);
1539
1528
 
 
1529
        if (vm_need_secondary_exec_ctrls(vmx->vcpu.kvm))
 
1530
                vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
 
1531
                             vmcs_config.cpu_based_2nd_exec_ctrl);
 
1532
 
1540
1533
        vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, !!bypass_guest_pf);
1541
1534
        vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, !!bypass_guest_pf);
1542
1535
        vmcs_write32(CR3_TARGET_COUNT, 0);           /* 22.2.1 */
1597
1590
                ++vmx->nmsrs;
1598
1591
        }
1599
1592
 
1600
 
        setup_msrs(vmx);
1601
 
 
1602
1593
        vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
1603
1594
 
1604
1595
        /* 22.2.1, 20.8.1 */
1605
1596
        vmcs_write32(VM_ENTRY_CONTROLS, vmcs_config.vmentry_ctrl);
1606
1597
 
1607
 
        vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);  /* 22.2.1 */
1608
 
 
1609
 
#ifdef CONFIG_X86_64
1610
 
        vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
1611
 
        if (vm_need_tpr_shadow(vmx->vcpu.kvm))
1612
 
                vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
1613
 
                             page_to_phys(vmx->vcpu.apic->regs_page));
1614
 
        vmcs_write32(TPR_THRESHOLD, 0);
1615
 
#endif
1616
 
 
1617
1598
        vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
1618
1599
        vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK);
1619
1600
 
 
1601
        if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm))
 
1602
                if (alloc_apic_access_page(vmx->vcpu.kvm) != 0)
 
1603
                        return -ENOMEM;
 
1604
 
 
1605
        return 0;
 
1606
}
 
1607
 
 
1608
static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 
1609
{
 
1610
        struct vcpu_vmx *vmx = to_vmx(vcpu);
 
1611
        u64 msr;
 
1612
        int ret;
 
1613
 
 
1614
        if (!init_rmode_tss(vmx->vcpu.kvm)) {
 
1615
                ret = -ENOMEM;
 
1616
                goto out;
 
1617
        }
 
1618
 
 
1619
        vmx->vcpu.rmode.active = 0;
 
1620
 
 
1621
        vmx->vcpu.regs[VCPU_REGS_RDX] = get_rdx_init_val();
 
1622
        set_cr8(&vmx->vcpu, 0);
 
1623
        msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
 
1624
        if (vmx->vcpu.vcpu_id == 0)
 
1625
                msr |= MSR_IA32_APICBASE_BSP;
 
1626
        kvm_set_apic_base(&vmx->vcpu, msr);
 
1627
 
 
1628
        fx_init(&vmx->vcpu);
 
1629
 
 
1630
        /*
 
1631
         * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode
 
1632
         * insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4.  Sigh.
 
1633
         */
 
1634
        if (vmx->vcpu.vcpu_id == 0) {
 
1635
                vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
 
1636
                vmcs_writel(GUEST_CS_BASE, 0x000f0000);
 
1637
        } else {
 
1638
                vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.sipi_vector << 8);
 
1639
                vmcs_writel(GUEST_CS_BASE, vmx->vcpu.sipi_vector << 12);
 
1640
        }
 
1641
        vmcs_write32(GUEST_CS_LIMIT, 0xffff);
 
1642
        vmcs_write32(GUEST_CS_AR_BYTES, 0x9b);
 
1643
 
 
1644
        seg_setup(VCPU_SREG_DS);
 
1645
        seg_setup(VCPU_SREG_ES);
 
1646
        seg_setup(VCPU_SREG_FS);
 
1647
        seg_setup(VCPU_SREG_GS);
 
1648
        seg_setup(VCPU_SREG_SS);
 
1649
 
 
1650
        vmcs_write16(GUEST_TR_SELECTOR, 0);
 
1651
        vmcs_writel(GUEST_TR_BASE, 0);
 
1652
        vmcs_write32(GUEST_TR_LIMIT, 0xffff);
 
1653
        vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
 
1654
 
 
1655
        vmcs_write16(GUEST_LDTR_SELECTOR, 0);
 
1656
        vmcs_writel(GUEST_LDTR_BASE, 0);
 
1657
        vmcs_write32(GUEST_LDTR_LIMIT, 0xffff);
 
1658
        vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082);
 
1659
 
 
1660
        vmcs_write32(GUEST_SYSENTER_CS, 0);
 
1661
        vmcs_writel(GUEST_SYSENTER_ESP, 0);
 
1662
        vmcs_writel(GUEST_SYSENTER_EIP, 0);
 
1663
 
 
1664
        vmcs_writel(GUEST_RFLAGS, 0x02);
 
1665
        if (vmx->vcpu.vcpu_id == 0)
 
1666
                vmcs_writel(GUEST_RIP, 0xfff0);
 
1667
        else
 
1668
                vmcs_writel(GUEST_RIP, 0);
 
1669
        vmcs_writel(GUEST_RSP, 0);
 
1670
 
 
1671
        /* todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0 */
 
1672
        vmcs_writel(GUEST_DR7, 0x400);
 
1673
 
 
1674
        vmcs_writel(GUEST_GDTR_BASE, 0);
 
1675
        vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
 
1676
 
 
1677
        vmcs_writel(GUEST_IDTR_BASE, 0);
 
1678
        vmcs_write32(GUEST_IDTR_LIMIT, 0xffff);
 
1679
 
 
1680
        vmcs_write32(GUEST_ACTIVITY_STATE, 0);
 
1681
        vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
 
1682
        vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0);
 
1683
 
 
1684
        guest_write_tsc(0);
 
1685
 
 
1686
        /* Special registers */
 
1687
        vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
 
1688
 
 
1689
        setup_msrs(vmx);
 
1690
 
 
1691
        vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);  /* 22.2.1 */
 
1692
 
 
1693
        if (cpu_has_vmx_tpr_shadow()) {
 
1694
                vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
 
1695
                if (vm_need_tpr_shadow(vmx->vcpu.kvm))
 
1696
                        vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
 
1697
                                     page_to_phys(vmx->vcpu.apic->regs_page));
 
1698
                vmcs_write32(TPR_THRESHOLD, 0);
 
1699
        }
 
1700
 
 
1701
        if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm))
 
1702
                vmcs_write64(APIC_ACCESS_ADDR,
 
1703
                             page_to_phys(vmx->vcpu.kvm->apic_access_page));
 
1704
 
1620
1705
        vmx->vcpu.cr0 = 0x60000010;
1621
1706
        vmx_set_cr0(&vmx->vcpu, vmx->vcpu.cr0); /* enter rmode */
1622
1707
        vmx_set_cr4(&vmx->vcpu, 0);
1632
1717
        return ret;
1633
1718
}
1634
1719
 
1635
 
static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
1636
 
{
1637
 
        struct vcpu_vmx *vmx = to_vmx(vcpu);
1638
 
 
1639
 
        vmx_vcpu_setup(vmx);
1640
 
}
1641
 
 
1642
 
static void inject_rmode_irq(struct kvm_vcpu *vcpu, int irq)
1643
 
{
1644
 
        u16 ent[2];
1645
 
        u16 cs;
1646
 
        u16 ip;
1647
 
        unsigned long flags;
1648
 
        unsigned long ss_base = vmcs_readl(GUEST_SS_BASE);
1649
 
        u16 sp =  vmcs_readl(GUEST_RSP);
1650
 
        u32 ss_limit = vmcs_read32(GUEST_SS_LIMIT);
1651
 
 
1652
 
        if (sp > ss_limit || sp < 6) {
1653
 
                vcpu_printf(vcpu, "%s: #SS, rsp 0x%lx ss 0x%lx limit 0x%x\n",
1654
 
                            __FUNCTION__,
1655
 
                            vmcs_readl(GUEST_RSP),
1656
 
                            vmcs_readl(GUEST_SS_BASE),
1657
 
                            vmcs_read32(GUEST_SS_LIMIT));
1658
 
                return;
1659
 
        }
1660
 
 
1661
 
        if (emulator_read_std(irq * sizeof(ent), &ent, sizeof(ent), vcpu) !=
1662
 
                                                        X86EMUL_CONTINUE) {
1663
 
                vcpu_printf(vcpu, "%s: read guest err\n", __FUNCTION__);
1664
 
                return;
1665
 
        }
1666
 
 
1667
 
        flags =  vmcs_readl(GUEST_RFLAGS);
1668
 
        cs =  vmcs_readl(GUEST_CS_BASE) >> 4;
1669
 
        ip =  vmcs_readl(GUEST_RIP);
1670
 
 
1671
 
 
1672
 
        if (emulator_write_emulated(
1673
 
                    ss_base + sp - 2, &flags, 2, vcpu) != X86EMUL_CONTINUE ||
1674
 
            emulator_write_emulated(
1675
 
                    ss_base + sp - 4, &cs, 2, vcpu) != X86EMUL_CONTINUE ||
1676
 
            emulator_write_emulated(
1677
 
                    ss_base + sp - 6, &ip, 2, vcpu) != X86EMUL_CONTINUE) {
1678
 
                vcpu_printf(vcpu, "%s: write guest err\n", __FUNCTION__);
1679
 
                return;
1680
 
        }
1681
 
 
1682
 
        vmcs_writel(GUEST_RFLAGS, flags &
1683
 
                    ~(X86_EFLAGS_IF | X86_EFLAGS_AC | X86_EFLAGS_TF));
1684
 
        vmcs_write16(GUEST_CS_SELECTOR, ent[1]) ;
1685
 
        vmcs_writel(GUEST_CS_BASE, ent[1] << 4);
1686
 
        vmcs_writel(GUEST_RIP, ent[0]);
1687
 
        vmcs_writel(GUEST_RSP, (vmcs_readl(GUEST_RSP) & ~0xffff) | (sp - 6));
1688
 
}
1689
 
 
1690
1720
static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
1691
1721
{
1692
1722
        if (vcpu->rmode.active) {
1693
 
                inject_rmode_irq(vcpu, irq);
 
1723
                vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
 
1724
                             irq | INTR_TYPE_SOFT_INTR | INTR_INFO_VALID_MASK);
 
1725
                vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
 
1726
                vmcs_writel(GUEST_RIP, vmcs_readl(GUEST_RIP) - 1);
1694
1727
                return;
1695
1728
        }
1696
1729
        vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
1739
1772
        vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
1740
1773
}
1741
1774
 
 
1775
static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
 
1776
{
 
1777
        int ret;
 
1778
        struct kvm_userspace_memory_region tss_mem = {
 
1779
                .slot = 8,
 
1780
                .guest_phys_addr = addr,
 
1781
                .memory_size = PAGE_SIZE * 3,
 
1782
                .flags = 0,
 
1783
        };
 
1784
 
 
1785
        ret = kvm_set_memory_region(kvm, &tss_mem, 0);
 
1786
        if (ret)
 
1787
                return ret;
 
1788
        kvm->tss_addr = addr;
 
1789
        return 0;
 
1790
}
 
1791
 
1742
1792
static void kvm_guest_debug_pre(struct kvm_vcpu *vcpu)
1743
1793
{
1744
1794
        struct kvm_guest_debug *dbg = &vcpu->guest_debug;
1779
1829
        unsigned long cr2, rip;
1780
1830
        u32 vect_info;
1781
1831
        enum emulation_result er;
1782
 
        int r;
1783
1832
 
1784
1833
        vect_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
1785
1834
        intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
1817
1866
                error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
1818
1867
        if (is_page_fault(intr_info)) {
1819
1868
                cr2 = vmcs_readl(EXIT_QUALIFICATION);
1820
 
 
1821
 
                mutex_lock(&vcpu->kvm->lock);
1822
 
                r = kvm_mmu_page_fault(vcpu, cr2, error_code);
1823
 
                if (r < 0) {
1824
 
                        mutex_unlock(&vcpu->kvm->lock);
1825
 
                        return r;
1826
 
                }
1827
 
                if (!r) {
1828
 
                        mutex_unlock(&vcpu->kvm->lock);
1829
 
                        return 1;
1830
 
                }
1831
 
 
1832
 
                er = emulate_instruction(vcpu, kvm_run, cr2, error_code, 0);
1833
 
                mutex_unlock(&vcpu->kvm->lock);
1834
 
 
1835
 
                switch (er) {
1836
 
                case EMULATE_DONE:
1837
 
                        return 1;
1838
 
                case EMULATE_DO_MMIO:
1839
 
                        ++vcpu->stat.mmio_exits;
1840
 
                        return 0;
1841
 
                case EMULATE_FAIL:
1842
 
                        kvm_report_emulation_failure(vcpu, "pagetable");
1843
 
                        break;
1844
 
                default:
1845
 
                        BUG();
1846
 
                }
 
1869
                return kvm_mmu_page_fault(vcpu, cr2, error_code);
1847
1870
        }
1848
1871
 
1849
1872
        if (vcpu->rmode.active &&
2104
2127
        return 1;
2105
2128
}
2106
2129
 
 
2130
static int handle_wbinvd(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
2131
{
 
2132
        skip_emulated_instruction(vcpu);
 
2133
        /* TODO: Add support for VT-d/pass-through device */
 
2134
        return 1;
 
2135
}
 
2136
 
 
2137
static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
2138
{
 
2139
        u64 exit_qualification;
 
2140
        enum emulation_result er;
 
2141
        unsigned long offset;
 
2142
 
 
2143
        exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
 
2144
        offset = exit_qualification & 0xffful;
 
2145
 
 
2146
        er = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
 
2147
 
 
2148
        if (er !=  EMULATE_DONE) {
 
2149
                printk(KERN_ERR
 
2150
                       "Fail to handle apic access vmexit! Offset is 0x%lx\n",
 
2151
                       offset);
 
2152
                return -ENOTSUPP;
 
2153
        }
 
2154
        return 1;
 
2155
}
 
2156
 
2107
2157
/*
2108
2158
 * The exit handlers return 1 if the exit was handled fully and guest execution
2109
2159
 * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
2123
2173
        [EXIT_REASON_PENDING_INTERRUPT]       = handle_interrupt_window,
2124
2174
        [EXIT_REASON_HLT]                     = handle_halt,
2125
2175
        [EXIT_REASON_VMCALL]                  = handle_vmcall,
2126
 
        [EXIT_REASON_TPR_BELOW_THRESHOLD]     = handle_tpr_below_threshold
 
2176
        [EXIT_REASON_TPR_BELOW_THRESHOLD]     = handle_tpr_below_threshold,
 
2177
        [EXIT_REASON_APIC_ACCESS]             = handle_apic_access,
 
2178
        [EXIT_REASON_WBINVD]                  = handle_wbinvd,
2127
2179
};
2128
2180
 
2129
2181
static const int kvm_vmx_max_exit_handlers =
2248
2300
        asm(
2249
2301
                /* Store host registers */
2250
2302
#ifdef CONFIG_X86_64
2251
 
                "push %%rax; push %%rbx; push %%rdx;"
2252
 
                "push %%rsi; push %%rdi; push %%rbp;"
2253
 
                "push %%r8;  push %%r9;  push %%r10; push %%r11;"
2254
 
                "push %%r12; push %%r13; push %%r14; push %%r15;"
 
2303
                "push %%rdx; push %%rbp;"
2255
2304
                "push %%rcx \n\t"
2256
 
                ASM_VMX_VMWRITE_RSP_RDX "\n\t"
2257
2305
#else
2258
 
                "pusha; push %%ecx \n\t"
2259
 
                ASM_VMX_VMWRITE_RSP_RDX "\n\t"
 
2306
                "push %%edx; push %%ebp;"
 
2307
                "push %%ecx \n\t"
2260
2308
#endif
 
2309
                ASM_VMX_VMWRITE_RSP_RDX "\n\t"
2261
2310
                /* Check if vmlaunch of vmresume is needed */
2262
2311
                "cmp $0, %1 \n\t"
2263
2312
                /* Load guest registers.  Don't clobber flags. */
2316
2365
                "mov %%r15, %c[r15](%3) \n\t"
2317
2366
                "mov %%cr2, %%rax   \n\t"
2318
2367
                "mov %%rax, %c[cr2](%3) \n\t"
2319
 
                "mov (%%rsp), %3 \n\t"
2320
2368
 
2321
 
                "pop  %%rcx; pop  %%r15; pop  %%r14; pop  %%r13; pop  %%r12;"
2322
 
                "pop  %%r11; pop  %%r10; pop  %%r9;  pop  %%r8;"
2323
 
                "pop  %%rbp; pop  %%rdi; pop  %%rsi;"
2324
 
                "pop  %%rdx; pop  %%rbx; pop  %%rax \n\t"
 
2369
                "pop  %%rcx; pop  %%rbp; pop  %%rdx \n\t"
2325
2370
#else
2326
2371
                "xchg %3, (%%esp) \n\t"
2327
2372
                "mov %%eax, %c[rax](%3) \n\t"
2333
2378
                "mov %%ebp, %c[rbp](%3) \n\t"
2334
2379
                "mov %%cr2, %%eax  \n\t"
2335
2380
                "mov %%eax, %c[cr2](%3) \n\t"
2336
 
                "mov (%%esp), %3 \n\t"
2337
2381
 
2338
 
                "pop %%ecx; popa \n\t"
 
2382
                "pop %%ecx; pop %%ebp; pop %%edx \n\t"
2339
2383
#endif
2340
2384
                "setbe %0 \n\t"
2341
2385
              : "=q" (vmx->fail)
2359
2403
                [r15]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R15])),
2360
2404
#endif
2361
2405
                [cr2]"i"(offsetof(struct kvm_vcpu, cr2))
2362
 
              : "cc", "memory");
 
2406
              : "cc", "memory"
 
2407
#ifdef CONFIG_X86_64
 
2408
                , "rbx", "rdi", "rsi"
 
2409
                , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
 
2410
#else
 
2411
                , "ebx", "edi", "rsi"
 
2412
#endif
 
2413
              );
2363
2414
 
2364
2415
        vcpu->interrupt_window_open =
2365
2416
                (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0;
2371
2422
 
2372
2423
        /* We need to handle NMIs before interrupts are enabled */
2373
2424
        if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */
2374
 
                asm ("int $2");
 
2425
                asm("int $2");
2375
2426
}
2376
2427
 
2377
2428
static void vmx_inject_page_fault(struct kvm_vcpu *vcpu,
2430
2481
{
2431
2482
        int err;
2432
2483
        struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
 
2484
        int cpu;
2433
2485
 
2434
2486
        if (!vmx)
2435
2487
                return ERR_PTR(-ENOMEM);
2454
2506
 
2455
2507
        vmcs_clear(vmx->vmcs);
2456
2508
 
2457
 
        vcpu_load(&vmx->vcpu);
 
2509
        cpu = get_cpu();
 
2510
        in_special_section();
 
2511
        vmx_vcpu_load(&vmx->vcpu, cpu);
2458
2512
        err = vmx_vcpu_setup(vmx);
2459
 
        vcpu_put(&vmx->vcpu);
 
2513
        vmx_vcpu_put(&vmx->vcpu);
 
2514
        put_cpu();
2460
2515
        if (err)
2461
2516
                goto free_vmcs;
2462
2517
 
2544
2599
        .set_irq = vmx_inject_irq,
2545
2600
        .inject_pending_irq = vmx_intr_assist,
2546
2601
        .inject_pending_vectors = do_interrupt_requests,
 
2602
 
 
2603
        .set_tss_addr = vmx_set_tss_addr,
2547
2604
};
2548
2605
 
2549
2606
static int __init vmx_init(void)