~ubuntu-branches/debian/wheezy/linux-2.6/wheezy

« back to all changes in this revision

Viewing changes to arch/x86/kvm/vmx.c

  • Committer: Bazaar Package Importer
  • Author(s): Ben Hutchings, Ben Hutchings, Aurelien Jarno
  • Date: 2011-06-07 12:14:05 UTC
  • mfrom: (43.1.9 sid)
  • Revision ID: james.westby@ubuntu.com-20110607121405-i3h1rd7nrnd2b73h
Tags: 2.6.39-2
[ Ben Hutchings ]
* [x86] Enable BACKLIGHT_APPLE, replacing BACKLIGHT_MBP_NVIDIA
  (Closes: #627492)
* cgroups: Disable memory resource controller by default. Allow it
  to be enabled using kernel parameter 'cgroup_enable=memory'.
* rt2800usb: Enable support for more USB devices including
  Linksys WUSB600N (Closes: #596626) (this change was accidentally
  omitted from 2.6.39-1)
* [x86] Remove Celeron from list of processors supporting PAE. Most
  'Celeron M' models do not.
* Update debconf template translations:
  - Swedish (Martin Bagge) (Closes: #628932)
  - French (David Prévot) (Closes: #628191)
* aufs: Update for 2.6.39 (Closes: #627837)
* Add stable 2.6.39.1, including:
  - ext4: dont set PageUptodate in ext4_end_bio()
  - pata_cmd64x: fix boot crash on parisc (Closes: #622997, #622745)
  - ext3: Fix fs corruption when make_indexed_dir() fails
  - netfilter: nf_ct_sip: validate Content-Length in TCP SIP messages
  - sctp: fix race between sctp_bind_addr_free() and
    sctp_bind_addr_conflict()
  - sctp: fix memory leak of the ASCONF queue when free asoc
  - md/bitmap: fix saving of events_cleared and other state
  - cdc_acm: Fix oops when Droids MuIn LCD is connected
  - cx88: Fix conversion from BKL to fine-grained locks (Closes: #619827)
  - keys: Set cred->user_ns in key_replace_session_keyring (CVE-2011-2184)
  - tmpfs: fix race between truncate and writepage
  - nfs41: Correct offset for LAYOUTCOMMIT
  - xen/mmu: fix a race window causing leave_mm BUG()
  - ext4: fix possible use-after-free in ext4_remove_li_request()
  For the complete list of changes, see:
   http://www.kernel.org/pub/linux/kernel/v2.6/ChangeLog-2.6.39.1
* Bump ABI to 2
* netfilter: Enable IP_SET, IP_SET_BITMAP_IP, IP_SET_BITMAP_IPMAC,
  IP_SET_BITMAP_PORT, IP_SET_HASH_IP, IP_SET_HASH_IPPORT,
  IP_SET_HASH_IPPORTIP, IP_SET_HASH_IPPORTNET, IP_SET_HASH_NET,
  IP_SET_HASH_NETPORT, IP_SET_LIST_SET, NETFILTER_XT_SET as modules
  (Closes: #629401)

[ Aurelien Jarno ]
* [mipsel/loongson-2f] Disable_SCSI_LPFC to workaround GCC ICE.

Show diffs side-by-side

added added

removed removed

Lines of Context:
93
93
 * These 2 parameters are used to config the controls for Pause-Loop Exiting:
94
94
 * ple_gap:    upper bound on the amount of time between two successive
95
95
 *             executions of PAUSE in a loop. Also indicate if ple enabled.
96
 
 *             According to test, this time is usually small than 41 cycles.
 
96
 *             According to test, this time is usually smaller than 128 cycles.
97
97
 * ple_window: upper bound on the amount of time a guest is allowed to execute
98
98
 *             in a PAUSE loop. Tests indicate that most spinlocks are held for
99
99
 *             less than 2^12 cycles
100
100
 * Time is measured based on a counter that runs at the same rate as the TSC,
101
101
 * refer SDM volume 3b section 21.6.13 & 22.1.3.
102
102
 */
103
 
#define KVM_VMX_DEFAULT_PLE_GAP    41
 
103
#define KVM_VMX_DEFAULT_PLE_GAP    128
104
104
#define KVM_VMX_DEFAULT_PLE_WINDOW 4096
105
105
static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP;
106
106
module_param(ple_gap, int, S_IRUGO);
176
176
        return container_of(vcpu, struct vcpu_vmx, vcpu);
177
177
}
178
178
 
179
 
static int init_rmode(struct kvm *kvm);
180
179
static u64 construct_eptp(unsigned long root_hpa);
181
180
static void kvm_cpu_vmxon(u64 addr);
182
181
static void kvm_cpu_vmxoff(void);
183
182
static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
 
183
static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
184
184
 
185
185
static DEFINE_PER_CPU(struct vmcs *, vmxarea);
186
186
static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
1333
1333
 
1334
1334
        rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
1335
1335
        if (msr & FEATURE_CONTROL_LOCKED) {
 
1336
                /* launched w/ TXT and VMX disabled */
1336
1337
                if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX)
1337
1338
                        && tboot_enabled())
1338
1339
                        return 1;
 
1340
                /* launched w/o TXT and VMX only enabled w/ TXT */
1339
1341
                if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)
 
1342
                        && (msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX)
1340
1343
                        && !tboot_enabled()) {
1341
1344
                        printk(KERN_WARNING "kvm: disable TXT in the BIOS or "
1342
 
                                " activate TXT before enabling KVM\n");
 
1345
                                "activate TXT before enabling KVM\n");
1343
1346
                        return 1;
1344
1347
                }
 
1348
                /* launched w/o TXT and VMX disabled */
 
1349
                if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)
 
1350
                        && !tboot_enabled())
 
1351
                        return 1;
1345
1352
        }
1346
1353
 
1347
1354
        return 0;
1348
 
        /* locked but not enabled */
1349
1355
}
1350
1356
 
1351
1357
static void kvm_cpu_vmxon(u64 addr)
1683
1689
        vmx->emulation_required = 1;
1684
1690
        vmx->rmode.vm86_active = 0;
1685
1691
 
 
1692
        vmcs_write16(GUEST_TR_SELECTOR, vmx->rmode.tr.selector);
1686
1693
        vmcs_writel(GUEST_TR_BASE, vmx->rmode.tr.base);
1687
1694
        vmcs_write32(GUEST_TR_LIMIT, vmx->rmode.tr.limit);
1688
1695
        vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar);
1756
1763
        vmx->emulation_required = 1;
1757
1764
        vmx->rmode.vm86_active = 1;
1758
1765
 
 
1766
        /*
 
1767
         * Very old userspace does not call KVM_SET_TSS_ADDR before entering
 
1768
         * vcpu. Call it here with phys address pointing 16M below 4G.
 
1769
         */
 
1770
        if (!vcpu->kvm->arch.tss_addr) {
 
1771
                printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be "
 
1772
                             "called before entering vcpu\n");
 
1773
                srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
 
1774
                vmx_set_tss_addr(vcpu->kvm, 0xfeffd000);
 
1775
                vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
 
1776
        }
 
1777
 
 
1778
        vmx->rmode.tr.selector = vmcs_read16(GUEST_TR_SELECTOR);
1759
1779
        vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
1760
1780
        vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm));
1761
1781
 
1794
1814
 
1795
1815
continue_rmode:
1796
1816
        kvm_mmu_reset_context(vcpu);
1797
 
        init_rmode(vcpu->kvm);
1798
1817
}
1799
1818
 
1800
1819
static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
2030
2049
        vmcs_writel(GUEST_CR4, hw_cr4);
2031
2050
}
2032
2051
 
2033
 
static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
2034
 
{
2035
 
        struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
2036
 
 
2037
 
        return vmcs_readl(sf->base);
2038
 
}
2039
 
 
2040
2052
static void vmx_get_segment(struct kvm_vcpu *vcpu,
2041
2053
                            struct kvm_segment *var, int seg)
2042
2054
{
 
2055
        struct vcpu_vmx *vmx = to_vmx(vcpu);
2043
2056
        struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
 
2057
        struct kvm_save_segment *save;
2044
2058
        u32 ar;
2045
2059
 
 
2060
        if (vmx->rmode.vm86_active
 
2061
            && (seg == VCPU_SREG_TR || seg == VCPU_SREG_ES
 
2062
                || seg == VCPU_SREG_DS || seg == VCPU_SREG_FS
 
2063
                || seg == VCPU_SREG_GS)
 
2064
            && !emulate_invalid_guest_state) {
 
2065
                switch (seg) {
 
2066
                case VCPU_SREG_TR: save = &vmx->rmode.tr; break;
 
2067
                case VCPU_SREG_ES: save = &vmx->rmode.es; break;
 
2068
                case VCPU_SREG_DS: save = &vmx->rmode.ds; break;
 
2069
                case VCPU_SREG_FS: save = &vmx->rmode.fs; break;
 
2070
                case VCPU_SREG_GS: save = &vmx->rmode.gs; break;
 
2071
                default: BUG();
 
2072
                }
 
2073
                var->selector = save->selector;
 
2074
                var->base = save->base;
 
2075
                var->limit = save->limit;
 
2076
                ar = save->ar;
 
2077
                if (seg == VCPU_SREG_TR
 
2078
                    || var->selector == vmcs_read16(sf->selector))
 
2079
                        goto use_saved_rmode_seg;
 
2080
        }
2046
2081
        var->base = vmcs_readl(sf->base);
2047
2082
        var->limit = vmcs_read32(sf->limit);
2048
2083
        var->selector = vmcs_read16(sf->selector);
2049
2084
        ar = vmcs_read32(sf->ar_bytes);
 
2085
use_saved_rmode_seg:
2050
2086
        if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state)
2051
2087
                ar = 0;
2052
2088
        var->type = ar & 15;
2060
2096
        var->unusable = (ar >> 16) & 1;
2061
2097
}
2062
2098
 
 
2099
static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
 
2100
{
 
2101
        struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
 
2102
        struct kvm_segment s;
 
2103
 
 
2104
        if (to_vmx(vcpu)->rmode.vm86_active) {
 
2105
                vmx_get_segment(vcpu, &s, seg);
 
2106
                return s.base;
 
2107
        }
 
2108
        return vmcs_readl(sf->base);
 
2109
}
 
2110
 
2063
2111
static int vmx_get_cpl(struct kvm_vcpu *vcpu)
2064
2112
{
2065
2113
        if (!is_protmode(vcpu))
2101
2149
        u32 ar;
2102
2150
 
2103
2151
        if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) {
 
2152
                vmcs_write16(sf->selector, var->selector);
2104
2153
                vmx->rmode.tr.selector = var->selector;
2105
2154
                vmx->rmode.tr.base = var->base;
2106
2155
                vmx->rmode.tr.limit = var->limit;
2361
2410
 
2362
2411
static int init_rmode_tss(struct kvm *kvm)
2363
2412
{
2364
 
        gfn_t fn = rmode_tss_base(kvm) >> PAGE_SHIFT;
 
2413
        gfn_t fn;
2365
2414
        u16 data = 0;
2366
 
        int ret = 0;
2367
 
        int r;
 
2415
        int r, idx, ret = 0;
2368
2416
 
 
2417
        idx = srcu_read_lock(&kvm->srcu);
 
2418
        fn = rmode_tss_base(kvm) >> PAGE_SHIFT;
2369
2419
        r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
2370
2420
        if (r < 0)
2371
2421
                goto out;
2389
2439
 
2390
2440
        ret = 1;
2391
2441
out:
 
2442
        srcu_read_unlock(&kvm->srcu, idx);
2392
2443
        return ret;
2393
2444
}
2394
2445
 
2395
2446
static int init_rmode_identity_map(struct kvm *kvm)
2396
2447
{
2397
 
        int i, r, ret;
 
2448
        int i, idx, r, ret;
2398
2449
        pfn_t identity_map_pfn;
2399
2450
        u32 tmp;
2400
2451
 
2409
2460
                return 1;
2410
2461
        ret = 0;
2411
2462
        identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT;
 
2463
        idx = srcu_read_lock(&kvm->srcu);
2412
2464
        r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE);
2413
2465
        if (r < 0)
2414
2466
                goto out;
2424
2476
        kvm->arch.ept_identity_pagetable_done = true;
2425
2477
        ret = 1;
2426
2478
out:
 
2479
        srcu_read_unlock(&kvm->srcu, idx);
2427
2480
        return ret;
2428
2481
}
2429
2482
 
2699
2752
        return 0;
2700
2753
}
2701
2754
 
2702
 
static int init_rmode(struct kvm *kvm)
2703
 
{
2704
 
        int idx, ret = 0;
2705
 
 
2706
 
        idx = srcu_read_lock(&kvm->srcu);
2707
 
        if (!init_rmode_tss(kvm))
2708
 
                goto exit;
2709
 
        if (!init_rmode_identity_map(kvm))
2710
 
                goto exit;
2711
 
 
2712
 
        ret = 1;
2713
 
exit:
2714
 
        srcu_read_unlock(&kvm->srcu, idx);
2715
 
        return ret;
2716
 
}
2717
 
 
2718
2755
static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
2719
2756
{
2720
2757
        struct vcpu_vmx *vmx = to_vmx(vcpu);
2722
2759
        int ret;
2723
2760
 
2724
2761
        vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP));
2725
 
        if (!init_rmode(vmx->vcpu.kvm)) {
2726
 
                ret = -ENOMEM;
2727
 
                goto out;
2728
 
        }
2729
2762
 
2730
2763
        vmx->rmode.vm86_active = 0;
2731
2764
 
2805
2838
                vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
2806
2839
                if (vm_need_tpr_shadow(vmx->vcpu.kvm))
2807
2840
                        vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
2808
 
                                page_to_phys(vmx->vcpu.arch.apic->regs_page));
 
2841
                                     __pa(vmx->vcpu.arch.apic->regs));
2809
2842
                vmcs_write32(TPR_THRESHOLD, 0);
2810
2843
        }
2811
2844
 
2971
3004
        if (ret)
2972
3005
                return ret;
2973
3006
        kvm->arch.tss_addr = addr;
 
3007
        if (!init_rmode_tss(kvm))
 
3008
                return  -ENOMEM;
 
3009
 
2974
3010
        return 0;
2975
3011
}
2976
3012
 
3962
3998
#define Q "l"
3963
3999
#endif
3964
4000
 
3965
 
static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
 
4001
static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
3966
4002
{
3967
4003
        struct vcpu_vmx *vmx = to_vmx(vcpu);
3968
4004
 
3991
4027
        asm(
3992
4028
                /* Store host registers */
3993
4029
                "push %%"R"dx; push %%"R"bp;"
 
4030
                "push %%"R"cx \n\t" /* placeholder for guest rcx */
3994
4031
                "push %%"R"cx \n\t"
3995
4032
                "cmp %%"R"sp, %c[host_rsp](%0) \n\t"
3996
4033
                "je 1f \n\t"
4032
4069
                ".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t"
4033
4070
                ".Lkvm_vmx_return: "
4034
4071
                /* Save guest registers, load host registers, keep flags */
4035
 
                "xchg %0,     (%%"R"sp) \n\t"
 
4072
                "mov %0, %c[wordsize](%%"R"sp) \n\t"
 
4073
                "pop %0 \n\t"
4036
4074
                "mov %%"R"ax, %c[rax](%0) \n\t"
4037
4075
                "mov %%"R"bx, %c[rbx](%0) \n\t"
4038
 
                "push"Q" (%%"R"sp); pop"Q" %c[rcx](%0) \n\t"
 
4076
                "pop"Q" %c[rcx](%0) \n\t"
4039
4077
                "mov %%"R"dx, %c[rdx](%0) \n\t"
4040
4078
                "mov %%"R"si, %c[rsi](%0) \n\t"
4041
4079
                "mov %%"R"di, %c[rdi](%0) \n\t"
4053
4091
                "mov %%cr2, %%"R"ax   \n\t"
4054
4092
                "mov %%"R"ax, %c[cr2](%0) \n\t"
4055
4093
 
4056
 
                "pop  %%"R"bp; pop  %%"R"bp; pop  %%"R"dx \n\t"
 
4094
                "pop  %%"R"bp; pop  %%"R"dx \n\t"
4057
4095
                "setbe %c[fail](%0) \n\t"
4058
4096
              : : "c"(vmx), "d"((unsigned long)HOST_RSP),
4059
4097
                [launched]"i"(offsetof(struct vcpu_vmx, launched)),
4076
4114
                [r14]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R14])),
4077
4115
                [r15]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R15])),
4078
4116
#endif
4079
 
                [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2))
 
4117
                [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)),
 
4118
                [wordsize]"i"(sizeof(ulong))
4080
4119
              : "cc", "memory"
4081
4120
                , R"ax", R"bx", R"di", R"si"
4082
4121
#ifdef CONFIG_X86_64
4183
4222
                if (!kvm->arch.ept_identity_map_addr)
4184
4223
                        kvm->arch.ept_identity_map_addr =
4185
4224
                                VMX_EPT_IDENTITY_PAGETABLE_ADDR;
 
4225
                err = -ENOMEM;
4186
4226
                if (alloc_identity_pagetable(kvm) != 0)
4187
4227
                        goto free_vmcs;
 
4228
                if (!init_rmode_identity_map(kvm))
 
4229
                        goto free_vmcs;
4188
4230
        }
4189
4231
 
4190
4232
        return &vmx->vcpu;