93
93
* These 2 parameters are used to config the controls for Pause-Loop Exiting:
94
94
* ple_gap: upper bound on the amount of time between two successive
95
95
* executions of PAUSE in a loop. Also indicate if ple enabled.
96
* According to test, this time is usually small than 41 cycles.
96
* According to test, this time is usually smaller than 128 cycles.
97
97
* ple_window: upper bound on the amount of time a guest is allowed to execute
98
98
* in a PAUSE loop. Tests indicate that most spinlocks are held for
99
99
* less than 2^12 cycles
100
100
* Time is measured based on a counter that runs at the same rate as the TSC,
101
101
* refer SDM volume 3b section 21.6.13 & 22.1.3.
103
#define KVM_VMX_DEFAULT_PLE_GAP 41
103
#define KVM_VMX_DEFAULT_PLE_GAP 128
104
104
#define KVM_VMX_DEFAULT_PLE_WINDOW 4096
105
105
static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP;
106
106
module_param(ple_gap, int, S_IRUGO);
176
176
return container_of(vcpu, struct vcpu_vmx, vcpu);
179
static int init_rmode(struct kvm *kvm);
180
179
static u64 construct_eptp(unsigned long root_hpa);
181
180
static void kvm_cpu_vmxon(u64 addr);
182
181
static void kvm_cpu_vmxoff(void);
183
182
static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
183
static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
185
185
static DEFINE_PER_CPU(struct vmcs *, vmxarea);
186
186
static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
1334
1334
rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
1335
1335
if (msr & FEATURE_CONTROL_LOCKED) {
1336
/* launched w/ TXT and VMX disabled */
1336
1337
if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX)
1337
1338
&& tboot_enabled())
1340
/* launched w/o TXT and VMX only enabled w/ TXT */
1339
1341
if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)
1342
&& (msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX)
1340
1343
&& !tboot_enabled()) {
1341
1344
printk(KERN_WARNING "kvm: disable TXT in the BIOS or "
1342
" activate TXT before enabling KVM\n");
1345
"activate TXT before enabling KVM\n");
1348
/* launched w/o TXT and VMX disabled */
1349
if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)
1350
&& !tboot_enabled())
1348
/* locked but not enabled */
1351
1357
static void kvm_cpu_vmxon(u64 addr)
1683
1689
vmx->emulation_required = 1;
1684
1690
vmx->rmode.vm86_active = 0;
1692
vmcs_write16(GUEST_TR_SELECTOR, vmx->rmode.tr.selector);
1686
1693
vmcs_writel(GUEST_TR_BASE, vmx->rmode.tr.base);
1687
1694
vmcs_write32(GUEST_TR_LIMIT, vmx->rmode.tr.limit);
1688
1695
vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar);
1756
1763
vmx->emulation_required = 1;
1757
1764
vmx->rmode.vm86_active = 1;
1767
* Very old userspace does not call KVM_SET_TSS_ADDR before entering
1768
* vcpu. Call it here with phys address pointing 16M below 4G.
1770
if (!vcpu->kvm->arch.tss_addr) {
1771
printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be "
1772
"called before entering vcpu\n");
1773
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
1774
vmx_set_tss_addr(vcpu->kvm, 0xfeffd000);
1775
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
1778
vmx->rmode.tr.selector = vmcs_read16(GUEST_TR_SELECTOR);
1759
1779
vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
1760
1780
vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm));
2030
2049
vmcs_writel(GUEST_CR4, hw_cr4);
2033
static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
2035
struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
2037
return vmcs_readl(sf->base);
2040
2052
static void vmx_get_segment(struct kvm_vcpu *vcpu,
2041
2053
struct kvm_segment *var, int seg)
2055
struct vcpu_vmx *vmx = to_vmx(vcpu);
2043
2056
struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
2057
struct kvm_save_segment *save;
2060
if (vmx->rmode.vm86_active
2061
&& (seg == VCPU_SREG_TR || seg == VCPU_SREG_ES
2062
|| seg == VCPU_SREG_DS || seg == VCPU_SREG_FS
2063
|| seg == VCPU_SREG_GS)
2064
&& !emulate_invalid_guest_state) {
2066
case VCPU_SREG_TR: save = &vmx->rmode.tr; break;
2067
case VCPU_SREG_ES: save = &vmx->rmode.es; break;
2068
case VCPU_SREG_DS: save = &vmx->rmode.ds; break;
2069
case VCPU_SREG_FS: save = &vmx->rmode.fs; break;
2070
case VCPU_SREG_GS: save = &vmx->rmode.gs; break;
2073
var->selector = save->selector;
2074
var->base = save->base;
2075
var->limit = save->limit;
2077
if (seg == VCPU_SREG_TR
2078
|| var->selector == vmcs_read16(sf->selector))
2079
goto use_saved_rmode_seg;
2046
2081
var->base = vmcs_readl(sf->base);
2047
2082
var->limit = vmcs_read32(sf->limit);
2048
2083
var->selector = vmcs_read16(sf->selector);
2049
2084
ar = vmcs_read32(sf->ar_bytes);
2085
use_saved_rmode_seg:
2050
2086
if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state)
2052
2088
var->type = ar & 15;
2060
2096
var->unusable = (ar >> 16) & 1;
2099
static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
2101
struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
2102
struct kvm_segment s;
2104
if (to_vmx(vcpu)->rmode.vm86_active) {
2105
vmx_get_segment(vcpu, &s, seg);
2108
return vmcs_readl(sf->base);
2063
2111
static int vmx_get_cpl(struct kvm_vcpu *vcpu)
2065
2113
if (!is_protmode(vcpu))
2103
2151
if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) {
2152
vmcs_write16(sf->selector, var->selector);
2104
2153
vmx->rmode.tr.selector = var->selector;
2105
2154
vmx->rmode.tr.base = var->base;
2106
2155
vmx->rmode.tr.limit = var->limit;
2362
2411
static int init_rmode_tss(struct kvm *kvm)
2364
gfn_t fn = rmode_tss_base(kvm) >> PAGE_SHIFT;
2415
int r, idx, ret = 0;
2417
idx = srcu_read_lock(&kvm->srcu);
2418
fn = rmode_tss_base(kvm) >> PAGE_SHIFT;
2369
2419
r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
2411
2462
identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT;
2463
idx = srcu_read_lock(&kvm->srcu);
2412
2464
r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE);
2702
static int init_rmode(struct kvm *kvm)
2706
idx = srcu_read_lock(&kvm->srcu);
2707
if (!init_rmode_tss(kvm))
2709
if (!init_rmode_identity_map(kvm))
2714
srcu_read_unlock(&kvm->srcu, idx);
2718
2755
static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
2720
2757
struct vcpu_vmx *vmx = to_vmx(vcpu);
2805
2838
vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
2806
2839
if (vm_need_tpr_shadow(vmx->vcpu.kvm))
2807
2840
vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
2808
page_to_phys(vmx->vcpu.arch.apic->regs_page));
2841
__pa(vmx->vcpu.arch.apic->regs));
2809
2842
vmcs_write32(TPR_THRESHOLD, 0);
3992
4028
/* Store host registers */
3993
4029
"push %%"R"dx; push %%"R"bp;"
4030
"push %%"R"cx \n\t" /* placeholder for guest rcx */
3994
4031
"push %%"R"cx \n\t"
3995
4032
"cmp %%"R"sp, %c[host_rsp](%0) \n\t"
4032
4069
".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t"
4033
4070
".Lkvm_vmx_return: "
4034
4071
/* Save guest registers, load host registers, keep flags */
4035
"xchg %0, (%%"R"sp) \n\t"
4072
"mov %0, %c[wordsize](%%"R"sp) \n\t"
4036
4074
"mov %%"R"ax, %c[rax](%0) \n\t"
4037
4075
"mov %%"R"bx, %c[rbx](%0) \n\t"
4038
"push"Q" (%%"R"sp); pop"Q" %c[rcx](%0) \n\t"
4076
"pop"Q" %c[rcx](%0) \n\t"
4039
4077
"mov %%"R"dx, %c[rdx](%0) \n\t"
4040
4078
"mov %%"R"si, %c[rsi](%0) \n\t"
4041
4079
"mov %%"R"di, %c[rdi](%0) \n\t"
4053
4091
"mov %%cr2, %%"R"ax \n\t"
4054
4092
"mov %%"R"ax, %c[cr2](%0) \n\t"
4056
"pop %%"R"bp; pop %%"R"bp; pop %%"R"dx \n\t"
4094
"pop %%"R"bp; pop %%"R"dx \n\t"
4057
4095
"setbe %c[fail](%0) \n\t"
4058
4096
: : "c"(vmx), "d"((unsigned long)HOST_RSP),
4059
4097
[launched]"i"(offsetof(struct vcpu_vmx, launched)),
4076
4114
[r14]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R14])),
4077
4115
[r15]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R15])),
4079
[cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2))
4117
[cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)),
4118
[wordsize]"i"(sizeof(ulong))
4080
4119
: "cc", "memory"
4081
4120
, R"ax", R"bx", R"di", R"si"
4082
4121
#ifdef CONFIG_X86_64
4183
4222
if (!kvm->arch.ept_identity_map_addr)
4184
4223
kvm->arch.ept_identity_map_addr =
4185
4224
VMX_EPT_IDENTITY_PAGETABLE_ADDR;
4186
4226
if (alloc_identity_pagetable(kvm) != 0)
4187
4227
goto free_vmcs;
4228
if (!init_rmode_identity_map(kvm))
4190
4232
return &vmx->vcpu;