1437
1501
unsigned long a;
1438
1502
struct descriptor_table dt;
1441
1504
unsigned long kvm_vmx_return;
1443
1505
u32 exec_control;
1445
if (!init_rmode_tss(vmx->vcpu.kvm)) {
1450
vmx->vcpu.rmode.active = 0;
1452
vmx->vcpu.regs[VCPU_REGS_RDX] = get_rdx_init_val();
1453
set_cr8(&vmx->vcpu, 0);
1454
msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
1455
if (vmx->vcpu.vcpu_id == 0)
1456
msr |= MSR_IA32_APICBASE_BSP;
1457
kvm_set_apic_base(&vmx->vcpu, msr);
1459
fx_init(&vmx->vcpu);
1462
* GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode
1463
* insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4. Sigh.
1465
if (vmx->vcpu.vcpu_id == 0) {
1466
vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
1467
vmcs_writel(GUEST_CS_BASE, 0x000f0000);
1469
vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.sipi_vector << 8);
1470
vmcs_writel(GUEST_CS_BASE, vmx->vcpu.sipi_vector << 12);
1472
vmcs_write32(GUEST_CS_LIMIT, 0xffff);
1473
vmcs_write32(GUEST_CS_AR_BYTES, 0x9b);
1475
seg_setup(VCPU_SREG_DS);
1476
seg_setup(VCPU_SREG_ES);
1477
seg_setup(VCPU_SREG_FS);
1478
seg_setup(VCPU_SREG_GS);
1479
seg_setup(VCPU_SREG_SS);
1481
vmcs_write16(GUEST_TR_SELECTOR, 0);
1482
vmcs_writel(GUEST_TR_BASE, 0);
1483
vmcs_write32(GUEST_TR_LIMIT, 0xffff);
1484
vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
1486
vmcs_write16(GUEST_LDTR_SELECTOR, 0);
1487
vmcs_writel(GUEST_LDTR_BASE, 0);
1488
vmcs_write32(GUEST_LDTR_LIMIT, 0xffff);
1489
vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082);
1491
vmcs_write32(GUEST_SYSENTER_CS, 0);
1492
vmcs_writel(GUEST_SYSENTER_ESP, 0);
1493
vmcs_writel(GUEST_SYSENTER_EIP, 0);
1495
vmcs_writel(GUEST_RFLAGS, 0x02);
1496
if (vmx->vcpu.vcpu_id == 0)
1497
vmcs_writel(GUEST_RIP, 0xfff0);
1499
vmcs_writel(GUEST_RIP, 0);
1500
vmcs_writel(GUEST_RSP, 0);
1502
/* todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0 */
1503
vmcs_writel(GUEST_DR7, 0x400);
1505
vmcs_writel(GUEST_GDTR_BASE, 0);
1506
vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
1508
vmcs_writel(GUEST_IDTR_BASE, 0);
1509
vmcs_write32(GUEST_IDTR_LIMIT, 0xffff);
1511
vmcs_write32(GUEST_ACTIVITY_STATE, 0);
1512
vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
1513
vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0);
1516
1508
vmcs_write64(IO_BITMAP_A, page_to_phys(vmx_io_bitmap_a));
1517
1509
vmcs_write64(IO_BITMAP_B, page_to_phys(vmx_io_bitmap_b));
1521
1511
vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
1523
/* Special registers */
1524
vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
1527
1514
vmcs_write32(PIN_BASED_VM_EXEC_CONTROL,
1528
1515
vmcs_config.pin_based_exec_ctrl);
1602
1593
vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
1604
1595
/* 22.2.1, 20.8.1 */
1605
1596
vmcs_write32(VM_ENTRY_CONTROLS, vmcs_config.vmentry_ctrl);
1607
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */
1609
#ifdef CONFIG_X86_64
1610
vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
1611
if (vm_need_tpr_shadow(vmx->vcpu.kvm))
1612
vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
1613
page_to_phys(vmx->vcpu.apic->regs_page));
1614
vmcs_write32(TPR_THRESHOLD, 0);
1617
1598
vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
1618
1599
vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK);
1601
if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm))
1602
if (alloc_apic_access_page(vmx->vcpu.kvm) != 0)
1608
static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
1610
struct vcpu_vmx *vmx = to_vmx(vcpu);
1614
if (!init_rmode_tss(vmx->vcpu.kvm)) {
1619
vmx->vcpu.rmode.active = 0;
1621
vmx->vcpu.regs[VCPU_REGS_RDX] = get_rdx_init_val();
1622
set_cr8(&vmx->vcpu, 0);
1623
msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
1624
if (vmx->vcpu.vcpu_id == 0)
1625
msr |= MSR_IA32_APICBASE_BSP;
1626
kvm_set_apic_base(&vmx->vcpu, msr);
1628
fx_init(&vmx->vcpu);
1631
* GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode
1632
* insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4. Sigh.
1634
if (vmx->vcpu.vcpu_id == 0) {
1635
vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
1636
vmcs_writel(GUEST_CS_BASE, 0x000f0000);
1638
vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.sipi_vector << 8);
1639
vmcs_writel(GUEST_CS_BASE, vmx->vcpu.sipi_vector << 12);
1641
vmcs_write32(GUEST_CS_LIMIT, 0xffff);
1642
vmcs_write32(GUEST_CS_AR_BYTES, 0x9b);
1644
seg_setup(VCPU_SREG_DS);
1645
seg_setup(VCPU_SREG_ES);
1646
seg_setup(VCPU_SREG_FS);
1647
seg_setup(VCPU_SREG_GS);
1648
seg_setup(VCPU_SREG_SS);
1650
vmcs_write16(GUEST_TR_SELECTOR, 0);
1651
vmcs_writel(GUEST_TR_BASE, 0);
1652
vmcs_write32(GUEST_TR_LIMIT, 0xffff);
1653
vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
1655
vmcs_write16(GUEST_LDTR_SELECTOR, 0);
1656
vmcs_writel(GUEST_LDTR_BASE, 0);
1657
vmcs_write32(GUEST_LDTR_LIMIT, 0xffff);
1658
vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082);
1660
vmcs_write32(GUEST_SYSENTER_CS, 0);
1661
vmcs_writel(GUEST_SYSENTER_ESP, 0);
1662
vmcs_writel(GUEST_SYSENTER_EIP, 0);
1664
vmcs_writel(GUEST_RFLAGS, 0x02);
1665
if (vmx->vcpu.vcpu_id == 0)
1666
vmcs_writel(GUEST_RIP, 0xfff0);
1668
vmcs_writel(GUEST_RIP, 0);
1669
vmcs_writel(GUEST_RSP, 0);
1671
/* todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0 */
1672
vmcs_writel(GUEST_DR7, 0x400);
1674
vmcs_writel(GUEST_GDTR_BASE, 0);
1675
vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
1677
vmcs_writel(GUEST_IDTR_BASE, 0);
1678
vmcs_write32(GUEST_IDTR_LIMIT, 0xffff);
1680
vmcs_write32(GUEST_ACTIVITY_STATE, 0);
1681
vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
1682
vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0);
1686
/* Special registers */
1687
vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
1691
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */
1693
if (cpu_has_vmx_tpr_shadow()) {
1694
vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
1695
if (vm_need_tpr_shadow(vmx->vcpu.kvm))
1696
vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
1697
page_to_phys(vmx->vcpu.apic->regs_page));
1698
vmcs_write32(TPR_THRESHOLD, 0);
1701
if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm))
1702
vmcs_write64(APIC_ACCESS_ADDR,
1703
page_to_phys(vmx->vcpu.kvm->apic_access_page));
1620
1705
vmx->vcpu.cr0 = 0x60000010;
1621
1706
vmx_set_cr0(&vmx->vcpu, vmx->vcpu.cr0); /* enter rmode */
1622
1707
vmx_set_cr4(&vmx->vcpu, 0);
1635
static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
1637
struct vcpu_vmx *vmx = to_vmx(vcpu);
1639
vmx_vcpu_setup(vmx);
1642
static void inject_rmode_irq(struct kvm_vcpu *vcpu, int irq)
1647
unsigned long flags;
1648
unsigned long ss_base = vmcs_readl(GUEST_SS_BASE);
1649
u16 sp = vmcs_readl(GUEST_RSP);
1650
u32 ss_limit = vmcs_read32(GUEST_SS_LIMIT);
1652
if (sp > ss_limit || sp < 6) {
1653
vcpu_printf(vcpu, "%s: #SS, rsp 0x%lx ss 0x%lx limit 0x%x\n",
1655
vmcs_readl(GUEST_RSP),
1656
vmcs_readl(GUEST_SS_BASE),
1657
vmcs_read32(GUEST_SS_LIMIT));
1661
if (emulator_read_std(irq * sizeof(ent), &ent, sizeof(ent), vcpu) !=
1663
vcpu_printf(vcpu, "%s: read guest err\n", __FUNCTION__);
1667
flags = vmcs_readl(GUEST_RFLAGS);
1668
cs = vmcs_readl(GUEST_CS_BASE) >> 4;
1669
ip = vmcs_readl(GUEST_RIP);
1672
if (emulator_write_emulated(
1673
ss_base + sp - 2, &flags, 2, vcpu) != X86EMUL_CONTINUE ||
1674
emulator_write_emulated(
1675
ss_base + sp - 4, &cs, 2, vcpu) != X86EMUL_CONTINUE ||
1676
emulator_write_emulated(
1677
ss_base + sp - 6, &ip, 2, vcpu) != X86EMUL_CONTINUE) {
1678
vcpu_printf(vcpu, "%s: write guest err\n", __FUNCTION__);
1682
vmcs_writel(GUEST_RFLAGS, flags &
1683
~(X86_EFLAGS_IF | X86_EFLAGS_AC | X86_EFLAGS_TF));
1684
vmcs_write16(GUEST_CS_SELECTOR, ent[1]) ;
1685
vmcs_writel(GUEST_CS_BASE, ent[1] << 4);
1686
vmcs_writel(GUEST_RIP, ent[0]);
1687
vmcs_writel(GUEST_RSP, (vmcs_readl(GUEST_RSP) & ~0xffff) | (sp - 6));
1690
1720
static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
1692
1722
if (vcpu->rmode.active) {
1693
inject_rmode_irq(vcpu, irq);
1723
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
1724
irq | INTR_TYPE_SOFT_INTR | INTR_INFO_VALID_MASK);
1725
vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
1726
vmcs_writel(GUEST_RIP, vmcs_readl(GUEST_RIP) - 1);
1696
1729
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
1817
1866
error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
1818
1867
if (is_page_fault(intr_info)) {
1819
1868
cr2 = vmcs_readl(EXIT_QUALIFICATION);
1821
mutex_lock(&vcpu->kvm->lock);
1822
r = kvm_mmu_page_fault(vcpu, cr2, error_code);
1824
mutex_unlock(&vcpu->kvm->lock);
1828
mutex_unlock(&vcpu->kvm->lock);
1832
er = emulate_instruction(vcpu, kvm_run, cr2, error_code, 0);
1833
mutex_unlock(&vcpu->kvm->lock);
1838
case EMULATE_DO_MMIO:
1839
++vcpu->stat.mmio_exits;
1842
kvm_report_emulation_failure(vcpu, "pagetable");
1869
return kvm_mmu_page_fault(vcpu, cr2, error_code);
1849
1872
if (vcpu->rmode.active &&
2130
static int handle_wbinvd(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2132
skip_emulated_instruction(vcpu);
2133
/* TODO: Add support for VT-d/pass-through device */
2137
static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2139
u64 exit_qualification;
2140
enum emulation_result er;
2141
unsigned long offset;
2143
exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
2144
offset = exit_qualification & 0xffful;
2146
er = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
2148
if (er != EMULATE_DONE) {
2150
"Fail to handle apic access vmexit! Offset is 0x%lx\n",
2108
2158
* The exit handlers return 1 if the exit was handled fully and guest execution
2109
2159
* may resume. Otherwise they set the kvm_run parameter to indicate what needs