~ubuntu-branches/ubuntu/precise/linux-ti-omap4/precise

« back to all changes in this revision

Viewing changes to arch/x86/kvm/x86.c

  • Committer: Bazaar Package Importer
  • Author(s): Paolo Pisati
  • Date: 2011-06-29 15:23:51 UTC
  • mfrom: (26.1.1 natty-proposed)
  • Revision ID: james.westby@ubuntu.com-20110629152351-xs96tm303d95rpbk
Tags: 3.0.0-1200.2
* Rebased against 3.0.0-6.7
* BSP from TI based on 3.0.0

Show diffs side-by-side

added added

removed removed

Lines of Context:
47
47
#include <trace/events/kvm.h>
48
48
 
49
49
#define CREATE_TRACE_POINTS
50
 
#include <asm/kvm-trace.h>
 
50
#include "trace.h"
51
51
 
52
52
#include <asm/debugreg.h>
53
53
#include <asm/msr.h>
60
60
#include <asm/div64.h>
61
61
 
62
62
#define MAX_IO_MSRS 256
63
 
#define CR0_RESERVED_BITS                                               \
64
 
        (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
65
 
                          | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
66
 
                          | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
67
 
#define CR4_RESERVED_BITS                                               \
68
 
        (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
69
 
                          | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE     \
70
 
                          | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR  \
71
 
                          | X86_CR4_OSXSAVE \
72
 
                          | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
73
 
 
74
 
#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
75
 
 
76
63
#define KVM_MAX_MCE_BANKS 32
77
64
#define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)
78
65
 
 
66
#define emul_to_vcpu(ctxt) \
 
67
        container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt)
 
68
 
79
69
/* EFER defaults:
80
70
 * - enable syscall per default because its emulated by KVM
81
71
 * - enable LME and LMA per default on 64 bit KVM
82
72
 */
83
73
#ifdef CONFIG_X86_64
84
 
static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffafeULL;
 
74
static
 
75
u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA));
85
76
#else
86
 
static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL;
 
77
static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
87
78
#endif
88
79
 
89
80
#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
99
90
int ignore_msrs = 0;
100
91
module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR);
101
92
 
 
93
bool kvm_has_tsc_control;
 
94
EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
 
95
u32  kvm_max_guest_tsc_khz;
 
96
EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
 
97
 
102
98
#define KVM_NR_SHARED_MSRS 16
103
99
 
104
100
struct kvm_shared_msrs_global {
156
152
 
157
153
u64 __read_mostly host_xcr0;
158
154
 
 
155
int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
 
156
 
159
157
static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
160
158
{
161
159
        int i;
525
523
 
526
524
        kvm_x86_ops->set_cr0(vcpu, cr0);
527
525
 
528
 
        if ((cr0 ^ old_cr0) & X86_CR0_PG)
 
526
        if ((cr0 ^ old_cr0) & X86_CR0_PG) {
529
527
                kvm_clear_async_pf_completion_queue(vcpu);
 
528
                kvm_async_pf_hash_reset(vcpu);
 
529
        }
530
530
 
531
531
        if ((cr0 ^ old_cr0) & update_bits)
532
532
                kvm_mmu_reset_context(vcpu);
979
979
        return ret;
980
980
}
981
981
 
982
 
static inline u64 nsec_to_cycles(u64 nsec)
 
982
static u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu)
 
983
{
 
984
        if (vcpu->arch.virtual_tsc_khz)
 
985
                return vcpu->arch.virtual_tsc_khz;
 
986
        else
 
987
                return __this_cpu_read(cpu_tsc_khz);
 
988
}
 
989
 
 
990
static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
983
991
{
984
992
        u64 ret;
985
993
 
987
995
        if (kvm_tsc_changes_freq())
988
996
                printk_once(KERN_WARNING
989
997
                 "kvm: unreliable cycle conversion on adjustable rate TSC\n");
990
 
        ret = nsec * __this_cpu_read(cpu_tsc_khz);
 
998
        ret = nsec * vcpu_tsc_khz(vcpu);
991
999
        do_div(ret, USEC_PER_SEC);
992
1000
        return ret;
993
1001
}
994
1002
 
995
 
static void kvm_arch_set_tsc_khz(struct kvm *kvm, u32 this_tsc_khz)
 
1003
static void kvm_init_tsc_catchup(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
996
1004
{
997
1005
        /* Compute a scale to convert nanoseconds in TSC cycles */
998
1006
        kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000,
999
 
                           &kvm->arch.virtual_tsc_shift,
1000
 
                           &kvm->arch.virtual_tsc_mult);
1001
 
        kvm->arch.virtual_tsc_khz = this_tsc_khz;
 
1007
                           &vcpu->arch.tsc_catchup_shift,
 
1008
                           &vcpu->arch.tsc_catchup_mult);
1002
1009
}
1003
1010
 
1004
1011
static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
1005
1012
{
1006
1013
        u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.last_tsc_nsec,
1007
 
                                      vcpu->kvm->arch.virtual_tsc_mult,
1008
 
                                      vcpu->kvm->arch.virtual_tsc_shift);
 
1014
                                      vcpu->arch.tsc_catchup_mult,
 
1015
                                      vcpu->arch.tsc_catchup_shift);
1009
1016
        tsc += vcpu->arch.last_tsc_write;
1010
1017
        return tsc;
1011
1018
}
1017
1024
        unsigned long flags;
1018
1025
        s64 sdiff;
1019
1026
 
1020
 
        spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
1021
 
        offset = data - native_read_tsc();
 
1027
        raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
 
1028
        offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
1022
1029
        ns = get_kernel_ns();
1023
1030
        elapsed = ns - kvm->arch.last_tsc_nsec;
1024
1031
        sdiff = data - kvm->arch.last_tsc_write;
1028
1035
        /*
1029
1036
         * Special case: close write to TSC within 5 seconds of
1030
1037
         * another CPU is interpreted as an attempt to synchronize
1031
 
         * The 5 seconds is to accomodate host load / swapping as
 
1038
         * The 5 seconds is to accommodate host load / swapping as
1032
1039
         * well as any reset of TSC during the boot process.
1033
1040
         *
1034
1041
         * In that case, for a reliable TSC, we can match TSC offsets,
1035
1042
         * or make a best guest using elapsed value.
1036
1043
         */
1037
 
        if (sdiff < nsec_to_cycles(5ULL * NSEC_PER_SEC) &&
 
1044
        if (sdiff < nsec_to_cycles(vcpu, 5ULL * NSEC_PER_SEC) &&
1038
1045
            elapsed < 5ULL * NSEC_PER_SEC) {
1039
1046
                if (!check_tsc_unstable()) {
1040
1047
                        offset = kvm->arch.last_tsc_offset;
1041
1048
                        pr_debug("kvm: matched tsc offset for %llu\n", data);
1042
1049
                } else {
1043
 
                        u64 delta = nsec_to_cycles(elapsed);
 
1050
                        u64 delta = nsec_to_cycles(vcpu, elapsed);
1044
1051
                        offset += delta;
1045
1052
                        pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
1046
1053
                }
1050
1057
        kvm->arch.last_tsc_write = data;
1051
1058
        kvm->arch.last_tsc_offset = offset;
1052
1059
        kvm_x86_ops->write_tsc_offset(vcpu, offset);
1053
 
        spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
 
1060
        raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
1054
1061
 
1055
1062
        /* Reset of TSC must disable overshoot protection below */
1056
1063
        vcpu->arch.hv_clock.tsc_timestamp = 0;
1072
1079
        local_irq_save(flags);
1073
1080
        kvm_get_msr(v, MSR_IA32_TSC, &tsc_timestamp);
1074
1081
        kernel_ns = get_kernel_ns();
1075
 
        this_tsc_khz = __this_cpu_read(cpu_tsc_khz);
1076
 
 
 
1082
        this_tsc_khz = vcpu_tsc_khz(v);
1077
1083
        if (unlikely(this_tsc_khz == 0)) {
1078
1084
                local_irq_restore(flags);
1079
1085
                kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
1453
1459
        return 0;
1454
1460
}
1455
1461
 
 
1462
static void kvmclock_reset(struct kvm_vcpu *vcpu)
 
1463
{
 
1464
        if (vcpu->arch.time_page) {
 
1465
                kvm_release_page_dirty(vcpu->arch.time_page);
 
1466
                vcpu->arch.time_page = NULL;
 
1467
        }
 
1468
}
 
1469
 
1456
1470
int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1457
1471
{
1458
1472
        switch (msr) {
1510
1524
                break;
1511
1525
        case MSR_KVM_SYSTEM_TIME_NEW:
1512
1526
        case MSR_KVM_SYSTEM_TIME: {
1513
 
                if (vcpu->arch.time_page) {
1514
 
                        kvm_release_page_dirty(vcpu->arch.time_page);
1515
 
                        vcpu->arch.time_page = NULL;
1516
 
                }
 
1527
                kvmclock_reset(vcpu);
1517
1528
 
1518
1529
                vcpu->arch.time = data;
1519
1530
                kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
1592
1603
                } else
1593
1604
                        return set_msr_hyperv(vcpu, msr, data);
1594
1605
                break;
 
1606
        case MSR_IA32_BBL_CR_CTL3:
 
1607
                /* Drop writes to this legacy MSR -- see rdmsr
 
1608
                 * counterpart for further detail.
 
1609
                 */
 
1610
                pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data);
 
1611
                break;
1595
1612
        default:
1596
1613
                if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
1597
1614
                        return xen_hvm_config(vcpu, data);
1846
1863
                } else
1847
1864
                        return get_msr_hyperv(vcpu, msr, pdata);
1848
1865
                break;
 
1866
        case MSR_IA32_BBL_CR_CTL3:
 
1867
                /* This legacy MSR exists but isn't fully documented in current
 
1868
                 * silicon.  It is however accessed by winxp in very narrow
 
1869
                 * scenarios where it sets bit #19, itself documented as
 
1870
                 * a "reserved" bit.  Best effort attempt to source coherent
 
1871
                 * read data here should the balance of the register be
 
1872
                 * interpreted by the guest:
 
1873
                 *
 
1874
                 * L2 cache control register 3: 64GB range, 256KB size,
 
1875
                 * enabled, latency 0x1, configured
 
1876
                 */
 
1877
                data = 0xbe702111;
 
1878
                break;
1849
1879
        default:
1850
1880
                if (!ignore_msrs) {
1851
1881
                        pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
1966
1996
        case KVM_CAP_X86_ROBUST_SINGLESTEP:
1967
1997
        case KVM_CAP_XSAVE:
1968
1998
        case KVM_CAP_ASYNC_PF:
 
1999
        case KVM_CAP_GET_TSC_KHZ:
1969
2000
                r = 1;
1970
2001
                break;
1971
2002
        case KVM_CAP_COALESCED_MMIO:
1992
2023
        case KVM_CAP_XCRS:
1993
2024
                r = cpu_has_xsave;
1994
2025
                break;
 
2026
        case KVM_CAP_TSC_CONTROL:
 
2027
                r = kvm_has_tsc_control;
 
2028
                break;
1995
2029
        default:
1996
2030
                r = 0;
1997
2031
                break;
2093
2127
        kvm_x86_ops->vcpu_load(vcpu, cpu);
2094
2128
        if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
2095
2129
                /* Make sure TSC doesn't go backwards */
2096
 
                s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
2097
 
                                native_read_tsc() - vcpu->arch.last_host_tsc;
 
2130
                s64 tsc_delta;
 
2131
                u64 tsc;
 
2132
 
 
2133
                kvm_get_msr(vcpu, MSR_IA32_TSC, &tsc);
 
2134
                tsc_delta = !vcpu->arch.last_guest_tsc ? 0 :
 
2135
                             tsc - vcpu->arch.last_guest_tsc;
 
2136
 
2098
2137
                if (tsc_delta < 0)
2099
2138
                        mark_tsc_unstable("KVM discovered backwards TSC");
2100
2139
                if (check_tsc_unstable()) {
2101
2140
                        kvm_x86_ops->adjust_tsc_offset(vcpu, -tsc_delta);
2102
2141
                        vcpu->arch.tsc_catchup = 1;
2103
 
                        kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
2104
2142
                }
 
2143
                kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
2105
2144
                if (vcpu->cpu != cpu)
2106
2145
                        kvm_migrate_timers(vcpu);
2107
2146
                vcpu->cpu = cpu;
2112
2151
{
2113
2152
        kvm_x86_ops->vcpu_put(vcpu);
2114
2153
        kvm_put_guest_fpu(vcpu);
2115
 
        vcpu->arch.last_host_tsc = native_read_tsc();
 
2154
        kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc);
2116
2155
}
2117
2156
 
2118
2157
static int is_efer_nx(void)
2297
2336
                F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(XOP) |
2298
2337
                0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM);
2299
2338
 
 
2339
        /* cpuid 0xC0000001.edx */
 
2340
        const u32 kvm_supported_word5_x86_features =
 
2341
                F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
 
2342
                F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
 
2343
                F(PMM) | F(PMM_EN);
 
2344
 
2300
2345
        /* all calls to cpuid_count() should be made on the same cpu */
2301
2346
        get_cpu();
2302
2347
        do_cpuid_1_ent(entry, function, index);
2368
2413
                int i;
2369
2414
 
2370
2415
                entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
2371
 
                for (i = 1; *nent < maxnent; ++i) {
2372
 
                        if (entry[i - 1].eax == 0 && i != 2)
2373
 
                                break;
 
2416
                for (i = 1; *nent < maxnent && i < 64; ++i) {
 
2417
                        if (entry[i].eax == 0)
 
2418
                                continue;
2374
2419
                        do_cpuid_1_ent(&entry[i], function, i);
2375
2420
                        entry[i].flags |=
2376
2421
                               KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
2391
2436
                entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) |
2392
2437
                             (1 << KVM_FEATURE_NOP_IO_DELAY) |
2393
2438
                             (1 << KVM_FEATURE_CLOCKSOURCE2) |
 
2439
                             (1 << KVM_FEATURE_ASYNC_PF) |
2394
2440
                             (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
2395
2441
                entry->ebx = 0;
2396
2442
                entry->ecx = 0;
2405
2451
                entry->ecx &= kvm_supported_word6_x86_features;
2406
2452
                cpuid_mask(&entry->ecx, 6);
2407
2453
                break;
 
2454
        /*Add support for Centaur's CPUID instruction*/
 
2455
        case 0xC0000000:
 
2456
                /*Just support up to 0xC0000004 now*/
 
2457
                entry->eax = min(entry->eax, 0xC0000004);
 
2458
                break;
 
2459
        case 0xC0000001:
 
2460
                entry->edx &= kvm_supported_word5_x86_features;
 
2461
                cpuid_mask(&entry->edx, 5);
 
2462
                break;
 
2463
        case 0xC0000002:
 
2464
        case 0xC0000003:
 
2465
        case 0xC0000004:
 
2466
                /*Now nothing to do, reserved for the future*/
 
2467
                break;
2408
2468
        }
2409
2469
 
2410
2470
        kvm_x86_ops->set_supported_cpuid(function, entry);
2451
2511
        if (nent >= cpuid->nent)
2452
2512
                goto out_free;
2453
2513
 
 
2514
        /* Add support for Centaur's CPUID instruction. */
 
2515
        if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR) {
 
2516
                do_cpuid_ent(&cpuid_entries[nent], 0xC0000000, 0,
 
2517
                                &nent, cpuid->nent);
 
2518
 
 
2519
                r = -E2BIG;
 
2520
                if (nent >= cpuid->nent)
 
2521
                        goto out_free;
 
2522
 
 
2523
                limit = cpuid_entries[nent - 1].eax;
 
2524
                for (func = 0xC0000001;
 
2525
                        func <= limit && nent < cpuid->nent; ++func)
 
2526
                        do_cpuid_ent(&cpuid_entries[nent], func, 0,
 
2527
                                        &nent, cpuid->nent);
 
2528
 
 
2529
                r = -E2BIG;
 
2530
                if (nent >= cpuid->nent)
 
2531
                        goto out_free;
 
2532
        }
 
2533
 
2454
2534
        do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_SIGNATURE, 0, &nent,
2455
2535
                     cpuid->nent);
2456
2536
 
2575
2655
        if (mce->status & MCI_STATUS_UC) {
2576
2656
                if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
2577
2657
                    !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
2578
 
                        printk(KERN_DEBUG "kvm: set_mce: "
2579
 
                               "injects mce exception while "
2580
 
                               "previous one is in progress!\n");
2581
2658
                        kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
2582
2659
                        return 0;
2583
2660
                }
2648
2725
        vcpu->arch.interrupt.pending = events->interrupt.injected;
2649
2726
        vcpu->arch.interrupt.nr = events->interrupt.nr;
2650
2727
        vcpu->arch.interrupt.soft = events->interrupt.soft;
2651
 
        if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm))
2652
 
                kvm_pic_clear_isr_ack(vcpu->kvm);
2653
2728
        if (events->flags & KVM_VCPUEVENT_VALID_SHADOW)
2654
2729
                kvm_x86_ops->set_interrupt_shadow(vcpu,
2655
2730
                                                  events->interrupt.shadow);
3024
3099
                r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
3025
3100
                break;
3026
3101
        }
 
3102
        case KVM_SET_TSC_KHZ: {
 
3103
                u32 user_tsc_khz;
 
3104
 
 
3105
                r = -EINVAL;
 
3106
                if (!kvm_has_tsc_control)
 
3107
                        break;
 
3108
 
 
3109
                user_tsc_khz = (u32)arg;
 
3110
 
 
3111
                if (user_tsc_khz >= kvm_max_guest_tsc_khz)
 
3112
                        goto out;
 
3113
 
 
3114
                kvm_x86_ops->set_tsc_khz(vcpu, user_tsc_khz);
 
3115
 
 
3116
                r = 0;
 
3117
                goto out;
 
3118
        }
 
3119
        case KVM_GET_TSC_KHZ: {
 
3120
                r = -EIO;
 
3121
                if (check_tsc_unstable())
 
3122
                        goto out;
 
3123
 
 
3124
                r = vcpu_tsc_khz(vcpu);
 
3125
 
 
3126
                goto out;
 
3127
        }
3027
3128
        default:
3028
3129
                r = -EINVAL;
3029
3130
        }
3573
3674
static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
3574
3675
                           const void *v)
3575
3676
{
3576
 
        if (vcpu->arch.apic &&
3577
 
            !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v))
3578
 
                return 0;
3579
 
 
3580
 
        return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
 
3677
        int handled = 0;
 
3678
        int n;
 
3679
 
 
3680
        do {
 
3681
                n = min(len, 8);
 
3682
                if (!(vcpu->arch.apic &&
 
3683
                      !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, n, v))
 
3684
                    && kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
 
3685
                        break;
 
3686
                handled += n;
 
3687
                addr += n;
 
3688
                len -= n;
 
3689
                v += n;
 
3690
        } while (len);
 
3691
 
 
3692
        return handled;
3581
3693
}
3582
3694
 
3583
3695
static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
3584
3696
{
3585
 
        if (vcpu->arch.apic &&
3586
 
            !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v))
3587
 
                return 0;
3588
 
 
3589
 
        return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
 
3697
        int handled = 0;
 
3698
        int n;
 
3699
 
 
3700
        do {
 
3701
                n = min(len, 8);
 
3702
                if (!(vcpu->arch.apic &&
 
3703
                      !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, n, v))
 
3704
                    && kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
 
3705
                        break;
 
3706
                trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
 
3707
                handled += n;
 
3708
                addr += n;
 
3709
                len -= n;
 
3710
                v += n;
 
3711
        } while (len);
 
3712
 
 
3713
        return handled;
3590
3714
}
3591
3715
 
3592
3716
static void kvm_set_segment(struct kvm_vcpu *vcpu,
3681
3805
}
3682
3806
 
3683
3807
/* used for instruction fetching */
3684
 
static int kvm_fetch_guest_virt(gva_t addr, void *val, unsigned int bytes,
3685
 
                                struct kvm_vcpu *vcpu,
 
3808
static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt,
 
3809
                                gva_t addr, void *val, unsigned int bytes,
3686
3810
                                struct x86_exception *exception)
3687
3811
{
 
3812
        struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
3688
3813
        u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
 
3814
 
3689
3815
        return kvm_read_guest_virt_helper(addr, val, bytes, vcpu,
3690
3816
                                          access | PFERR_FETCH_MASK,
3691
3817
                                          exception);
3692
3818
}
3693
3819
 
3694
 
static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
3695
 
                               struct kvm_vcpu *vcpu,
 
3820
static int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt,
 
3821
                               gva_t addr, void *val, unsigned int bytes,
3696
3822
                               struct x86_exception *exception)
3697
3823
{
 
3824
        struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
3698
3825
        u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
 
3826
 
3699
3827
        return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
3700
3828
                                          exception);
3701
3829
}
3702
3830
 
3703
 
static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes,
3704
 
                                      struct kvm_vcpu *vcpu,
 
3831
static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt,
 
3832
                                      gva_t addr, void *val, unsigned int bytes,
3705
3833
                                      struct x86_exception *exception)
3706
3834
{
 
3835
        struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
3707
3836
        return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception);
3708
3837
}
3709
3838
 
3710
 
static int kvm_write_guest_virt_system(gva_t addr, void *val,
 
3839
static int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
 
3840
                                       gva_t addr, void *val,
3711
3841
                                       unsigned int bytes,
3712
 
                                       struct kvm_vcpu *vcpu,
3713
3842
                                       struct x86_exception *exception)
3714
3843
{
 
3844
        struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
3715
3845
        void *data = val;
3716
3846
        int r = X86EMUL_CONTINUE;
3717
3847
 
3739
3869
        return r;
3740
3870
}
3741
3871
 
3742
 
static int emulator_read_emulated(unsigned long addr,
 
3872
static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
 
3873
                                  unsigned long addr,
3743
3874
                                  void *val,
3744
3875
                                  unsigned int bytes,
3745
 
                                  struct x86_exception *exception,
3746
 
                                  struct kvm_vcpu *vcpu)
 
3876
                                  struct x86_exception *exception)
3747
3877
{
 
3878
        struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
3748
3879
        gpa_t                 gpa;
 
3880
        int handled;
3749
3881
 
3750
3882
        if (vcpu->mmio_read_completed) {
3751
3883
                memcpy(val, vcpu->mmio_data, bytes);
3764
3896
        if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
3765
3897
                goto mmio;
3766
3898
 
3767
 
        if (kvm_read_guest_virt(addr, val, bytes, vcpu, exception)
 
3899
        if (kvm_read_guest_virt(ctxt, addr, val, bytes, exception)
3768
3900
            == X86EMUL_CONTINUE)
3769
3901
                return X86EMUL_CONTINUE;
3770
3902
 
3772
3904
        /*
3773
3905
         * Is this MMIO handled locally?
3774
3906
         */
3775
 
        if (!vcpu_mmio_read(vcpu, gpa, bytes, val)) {
3776
 
                trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, gpa, *(u64 *)val);
 
3907
        handled = vcpu_mmio_read(vcpu, gpa, bytes, val);
 
3908
 
 
3909
        if (handled == bytes)
3777
3910
                return X86EMUL_CONTINUE;
3778
 
        }
 
3911
 
 
3912
        gpa += handled;
 
3913
        bytes -= handled;
 
3914
        val += handled;
3779
3915
 
3780
3916
        trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
3781
3917
 
3782
3918
        vcpu->mmio_needed = 1;
3783
3919
        vcpu->run->exit_reason = KVM_EXIT_MMIO;
3784
3920
        vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
3785
 
        vcpu->run->mmio.len = vcpu->mmio_size = bytes;
 
3921
        vcpu->mmio_size = bytes;
 
3922
        vcpu->run->mmio.len = min(vcpu->mmio_size, 8);
3786
3923
        vcpu->run->mmio.is_write = vcpu->mmio_is_write = 0;
 
3924
        vcpu->mmio_index = 0;
3787
3925
 
3788
3926
        return X86EMUL_IO_NEEDED;
3789
3927
}
3807
3945
                                           struct kvm_vcpu *vcpu)
3808
3946
{
3809
3947
        gpa_t                 gpa;
 
3948
        int handled;
3810
3949
 
3811
3950
        gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, exception);
3812
3951
 
3825
3964
        /*
3826
3965
         * Is this MMIO handled locally?
3827
3966
         */
3828
 
        if (!vcpu_mmio_write(vcpu, gpa, bytes, val))
 
3967
        handled = vcpu_mmio_write(vcpu, gpa, bytes, val);
 
3968
        if (handled == bytes)
3829
3969
                return X86EMUL_CONTINUE;
3830
3970
 
 
3971
        gpa += handled;
 
3972
        bytes -= handled;
 
3973
        val += handled;
 
3974
 
3831
3975
        vcpu->mmio_needed = 1;
 
3976
        memcpy(vcpu->mmio_data, val, bytes);
3832
3977
        vcpu->run->exit_reason = KVM_EXIT_MMIO;
3833
3978
        vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
3834
 
        vcpu->run->mmio.len = vcpu->mmio_size = bytes;
 
3979
        vcpu->mmio_size = bytes;
 
3980
        vcpu->run->mmio.len = min(vcpu->mmio_size, 8);
3835
3981
        vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1;
3836
 
        memcpy(vcpu->run->mmio.data, val, bytes);
 
3982
        memcpy(vcpu->run->mmio.data, vcpu->mmio_data, 8);
 
3983
        vcpu->mmio_index = 0;
3837
3984
 
3838
3985
        return X86EMUL_CONTINUE;
3839
3986
}
3840
3987
 
3841
 
int emulator_write_emulated(unsigned long addr,
 
3988
int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
 
3989
                            unsigned long addr,
3842
3990
                            const void *val,
3843
3991
                            unsigned int bytes,
3844
 
                            struct x86_exception *exception,
3845
 
                            struct kvm_vcpu *vcpu)
 
3992
                            struct x86_exception *exception)
3846
3993
{
 
3994
        struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
 
3995
 
3847
3996
        /* Crossing a page boundary? */
3848
3997
        if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
3849
3998
                int rc, now;
3871
4020
        (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old))
3872
4021
#endif
3873
4022
 
3874
 
static int emulator_cmpxchg_emulated(unsigned long addr,
 
4023
static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
 
4024
                                     unsigned long addr,
3875
4025
                                     const void *old,
3876
4026
                                     const void *new,
3877
4027
                                     unsigned int bytes,
3878
 
                                     struct x86_exception *exception,
3879
 
                                     struct kvm_vcpu *vcpu)
 
4028
                                     struct x86_exception *exception)
3880
4029
{
 
4030
        struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
3881
4031
        gpa_t gpa;
3882
4032
        struct page *page;
3883
4033
        char *kaddr;
3933
4083
emul_write:
3934
4084
        printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
3935
4085
 
3936
 
        return emulator_write_emulated(addr, new, bytes, exception, vcpu);
 
4086
        return emulator_write_emulated(ctxt, addr, new, bytes, exception);
3937
4087
}
3938
4088
 
3939
4089
static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
3952
4102
}
3953
4103
 
3954
4104
 
3955
 
static int emulator_pio_in_emulated(int size, unsigned short port, void *val,
3956
 
                             unsigned int count, struct kvm_vcpu *vcpu)
 
4105
static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
 
4106
                                    int size, unsigned short port, void *val,
 
4107
                                    unsigned int count)
3957
4108
{
 
4109
        struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
 
4110
 
3958
4111
        if (vcpu->arch.pio.count)
3959
4112
                goto data_avail;
3960
4113
 
3982
4135
        return 0;
3983
4136
}
3984
4137
 
3985
 
static int emulator_pio_out_emulated(int size, unsigned short port,
3986
 
                              const void *val, unsigned int count,
3987
 
                              struct kvm_vcpu *vcpu)
 
4138
static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
 
4139
                                     int size, unsigned short port,
 
4140
                                     const void *val, unsigned int count)
3988
4141
{
 
4142
        struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
 
4143
 
3989
4144
        trace_kvm_pio(1, port, size, count);
3990
4145
 
3991
4146
        vcpu->arch.pio.port = port;
4015
4170
        return kvm_x86_ops->get_segment_base(vcpu, seg);
4016
4171
}
4017
4172
 
4018
 
int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
 
4173
static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address)
4019
4174
{
4020
 
        kvm_mmu_invlpg(vcpu, address);
4021
 
        return X86EMUL_CONTINUE;
 
4175
        kvm_mmu_invlpg(emul_to_vcpu(ctxt), address);
4022
4176
}
4023
4177
 
4024
4178
int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
4040
4194
}
4041
4195
EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
4042
4196
 
4043
 
int emulate_clts(struct kvm_vcpu *vcpu)
4044
 
{
4045
 
        kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
4046
 
        kvm_x86_ops->fpu_activate(vcpu);
4047
 
        return X86EMUL_CONTINUE;
4048
 
}
4049
 
 
4050
 
int emulator_get_dr(int dr, unsigned long *dest, struct kvm_vcpu *vcpu)
4051
 
{
4052
 
        return _kvm_get_dr(vcpu, dr, dest);
4053
 
}
4054
 
 
4055
 
int emulator_set_dr(int dr, unsigned long value, struct kvm_vcpu *vcpu)
4056
 
{
4057
 
 
4058
 
        return __kvm_set_dr(vcpu, dr, value);
 
4197
static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
 
4198
{
 
4199
        kvm_emulate_wbinvd(emul_to_vcpu(ctxt));
 
4200
}
 
4201
 
 
4202
int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
 
4203
{
 
4204
        return _kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
 
4205
}
 
4206
 
 
4207
int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
 
4208
{
 
4209
 
 
4210
        return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value);
4059
4211
}
4060
4212
 
4061
4213
static u64 mk_cr_64(u64 curr_cr, u32 new_val)
4063
4215
        return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
4064
4216
}
4065
4217
 
4066
 
static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu)
 
4218
static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr)
4067
4219
{
 
4220
        struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4068
4221
        unsigned long value;
4069
4222
 
4070
4223
        switch (cr) {
4091
4244
        return value;
4092
4245
}
4093
4246
 
4094
 
static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu)
 
4247
static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val)
4095
4248
{
 
4249
        struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4096
4250
        int res = 0;
4097
4251
 
4098
4252
        switch (cr) {
4119
4273
        return res;
4120
4274
}
4121
4275
 
4122
 
static int emulator_get_cpl(struct kvm_vcpu *vcpu)
4123
 
{
4124
 
        return kvm_x86_ops->get_cpl(vcpu);
4125
 
}
4126
 
 
4127
 
static void emulator_get_gdt(struct desc_ptr *dt, struct kvm_vcpu *vcpu)
4128
 
{
4129
 
        kvm_x86_ops->get_gdt(vcpu, dt);
4130
 
}
4131
 
 
4132
 
static void emulator_get_idt(struct desc_ptr *dt, struct kvm_vcpu *vcpu)
4133
 
{
4134
 
        kvm_x86_ops->get_idt(vcpu, dt);
4135
 
}
4136
 
 
4137
 
static unsigned long emulator_get_cached_segment_base(int seg,
4138
 
                                                      struct kvm_vcpu *vcpu)
4139
 
{
4140
 
        return get_segment_base(vcpu, seg);
4141
 
}
4142
 
 
4143
 
static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg,
4144
 
                                           struct kvm_vcpu *vcpu)
 
4276
static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt)
 
4277
{
 
4278
        return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt));
 
4279
}
 
4280
 
 
4281
static void emulator_get_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
 
4282
{
 
4283
        kvm_x86_ops->get_gdt(emul_to_vcpu(ctxt), dt);
 
4284
}
 
4285
 
 
4286
static void emulator_get_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
 
4287
{
 
4288
        kvm_x86_ops->get_idt(emul_to_vcpu(ctxt), dt);
 
4289
}
 
4290
 
 
4291
static void emulator_set_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
 
4292
{
 
4293
        kvm_x86_ops->set_gdt(emul_to_vcpu(ctxt), dt);
 
4294
}
 
4295
 
 
4296
static void emulator_set_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
 
4297
{
 
4298
        kvm_x86_ops->set_idt(emul_to_vcpu(ctxt), dt);
 
4299
}
 
4300
 
 
4301
static unsigned long emulator_get_cached_segment_base(
 
4302
        struct x86_emulate_ctxt *ctxt, int seg)
 
4303
{
 
4304
        return get_segment_base(emul_to_vcpu(ctxt), seg);
 
4305
}
 
4306
 
 
4307
static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
 
4308
                                 struct desc_struct *desc, u32 *base3,
 
4309
                                 int seg)
4145
4310
{
4146
4311
        struct kvm_segment var;
4147
4312
 
4148
 
        kvm_get_segment(vcpu, &var, seg);
 
4313
        kvm_get_segment(emul_to_vcpu(ctxt), &var, seg);
 
4314
        *selector = var.selector;
4149
4315
 
4150
4316
        if (var.unusable)
4151
4317
                return false;
4154
4320
                var.limit >>= 12;
4155
4321
        set_desc_limit(desc, var.limit);
4156
4322
        set_desc_base(desc, (unsigned long)var.base);
 
4323
#ifdef CONFIG_X86_64
 
4324
        if (base3)
 
4325
                *base3 = var.base >> 32;
 
4326
#endif
4157
4327
        desc->type = var.type;
4158
4328
        desc->s = var.s;
4159
4329
        desc->dpl = var.dpl;
4166
4336
        return true;
4167
4337
}
4168
4338
 
4169
 
static void emulator_set_cached_descriptor(struct desc_struct *desc, int seg,
4170
 
                                           struct kvm_vcpu *vcpu)
 
4339
static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector,
 
4340
                                 struct desc_struct *desc, u32 base3,
 
4341
                                 int seg)
4171
4342
{
 
4343
        struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4172
4344
        struct kvm_segment var;
4173
4345
 
4174
 
        /* needed to preserve selector */
4175
 
        kvm_get_segment(vcpu, &var, seg);
4176
 
 
 
4346
        var.selector = selector;
4177
4347
        var.base = get_desc_base(desc);
 
4348
#ifdef CONFIG_X86_64
 
4349
        var.base |= ((u64)base3) << 32;
 
4350
#endif
4178
4351
        var.limit = get_desc_limit(desc);
4179
4352
        if (desc->g)
4180
4353
                var.limit = (var.limit << 12) | 0xfff;
4194
4367
        return;
4195
4368
}
4196
4369
 
4197
 
static u16 emulator_get_segment_selector(int seg, struct kvm_vcpu *vcpu)
4198
 
{
4199
 
        struct kvm_segment kvm_seg;
4200
 
 
4201
 
        kvm_get_segment(vcpu, &kvm_seg, seg);
4202
 
        return kvm_seg.selector;
4203
 
}
4204
 
 
4205
 
static void emulator_set_segment_selector(u16 sel, int seg,
4206
 
                                          struct kvm_vcpu *vcpu)
4207
 
{
4208
 
        struct kvm_segment kvm_seg;
4209
 
 
4210
 
        kvm_get_segment(vcpu, &kvm_seg, seg);
4211
 
        kvm_seg.selector = sel;
4212
 
        kvm_set_segment(vcpu, &kvm_seg, seg);
 
4370
static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
 
4371
                            u32 msr_index, u64 *pdata)
 
4372
{
 
4373
        return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata);
 
4374
}
 
4375
 
 
4376
static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
 
4377
                            u32 msr_index, u64 data)
 
4378
{
 
4379
        return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data);
 
4380
}
 
4381
 
 
4382
static void emulator_halt(struct x86_emulate_ctxt *ctxt)
 
4383
{
 
4384
        emul_to_vcpu(ctxt)->arch.halt_request = 1;
 
4385
}
 
4386
 
 
4387
static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt)
 
4388
{
 
4389
        preempt_disable();
 
4390
        kvm_load_guest_fpu(emul_to_vcpu(ctxt));
 
4391
        /*
 
4392
         * CR0.TS may reference the host fpu state, not the guest fpu state,
 
4393
         * so it may be clear at this point.
 
4394
         */
 
4395
        clts();
 
4396
}
 
4397
 
 
4398
static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt)
 
4399
{
 
4400
        preempt_enable();
 
4401
}
 
4402
 
 
4403
static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
 
4404
                              struct x86_instruction_info *info,
 
4405
                              enum x86_intercept_stage stage)
 
4406
{
 
4407
        return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage);
4213
4408
}
4214
4409
 
4215
4410
static struct x86_emulate_ops emulate_ops = {
4219
4414
        .read_emulated       = emulator_read_emulated,
4220
4415
        .write_emulated      = emulator_write_emulated,
4221
4416
        .cmpxchg_emulated    = emulator_cmpxchg_emulated,
 
4417
        .invlpg              = emulator_invlpg,
4222
4418
        .pio_in_emulated     = emulator_pio_in_emulated,
4223
4419
        .pio_out_emulated    = emulator_pio_out_emulated,
4224
 
        .get_cached_descriptor = emulator_get_cached_descriptor,
4225
 
        .set_cached_descriptor = emulator_set_cached_descriptor,
4226
 
        .get_segment_selector = emulator_get_segment_selector,
4227
 
        .set_segment_selector = emulator_set_segment_selector,
 
4420
        .get_segment         = emulator_get_segment,
 
4421
        .set_segment         = emulator_set_segment,
4228
4422
        .get_cached_segment_base = emulator_get_cached_segment_base,
4229
4423
        .get_gdt             = emulator_get_gdt,
4230
4424
        .get_idt             = emulator_get_idt,
 
4425
        .set_gdt             = emulator_set_gdt,
 
4426
        .set_idt             = emulator_set_idt,
4231
4427
        .get_cr              = emulator_get_cr,
4232
4428
        .set_cr              = emulator_set_cr,
4233
4429
        .cpl                 = emulator_get_cpl,
4234
4430
        .get_dr              = emulator_get_dr,
4235
4431
        .set_dr              = emulator_set_dr,
4236
 
        .set_msr             = kvm_set_msr,
4237
 
        .get_msr             = kvm_get_msr,
 
4432
        .set_msr             = emulator_set_msr,
 
4433
        .get_msr             = emulator_get_msr,
 
4434
        .halt                = emulator_halt,
 
4435
        .wbinvd              = emulator_wbinvd,
 
4436
        .fix_hypercall       = emulator_fix_hypercall,
 
4437
        .get_fpu             = emulator_get_fpu,
 
4438
        .put_fpu             = emulator_put_fpu,
 
4439
        .intercept           = emulator_intercept,
4238
4440
};
4239
4441
 
4240
4442
static void cache_all_regs(struct kvm_vcpu *vcpu)
4276
4478
        struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
4277
4479
        int cs_db, cs_l;
4278
4480
 
 
4481
        /*
 
4482
         * TODO: fix emulate.c to use guest_read/write_register
 
4483
         * instead of direct ->regs accesses, can save hundred cycles
 
4484
         * on Intel for instructions that don't read/change RSP, for
 
4485
         * for example.
 
4486
         */
4279
4487
        cache_all_regs(vcpu);
4280
4488
 
4281
4489
        kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
4282
4490
 
4283
 
        vcpu->arch.emulate_ctxt.vcpu = vcpu;
4284
 
        vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
 
4491
        vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu);
4285
4492
        vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu);
4286
4493
        vcpu->arch.emulate_ctxt.mode =
4287
4494
                (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
4289
4496
                ? X86EMUL_MODE_VM86 : cs_l
4290
4497
                ? X86EMUL_MODE_PROT64 : cs_db
4291
4498
                ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
 
4499
        vcpu->arch.emulate_ctxt.guest_mode = is_guest_mode(vcpu);
4292
4500
        memset(c, 0, sizeof(struct decode_cache));
4293
4501
        memcpy(c->regs, vcpu->arch.regs, sizeof c->regs);
 
4502
        vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
4294
4503
}
4295
4504
 
4296
4505
int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
4312
4521
        vcpu->arch.emulate_ctxt.eip = c->eip;
4313
4522
        memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
4314
4523
        kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
4315
 
        kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
 
4524
        kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
4316
4525
 
4317
4526
        if (irq == NMI_VECTOR)
4318
4527
                vcpu->arch.nmi_pending = false;
4374
4583
{
4375
4584
        int r;
4376
4585
        struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
 
4586
        bool writeback = true;
4377
4587
 
4378
4588
        kvm_clear_exception_queue(vcpu);
4379
 
        vcpu->arch.mmio_fault_cr2 = cr2;
4380
 
        /*
4381
 
         * TODO: fix emulate.c to use guest_read/write_register
4382
 
         * instead of direct ->regs accesses, can save hundred cycles
4383
 
         * on Intel for instructions that don't read/change RSP, for
4384
 
         * for example.
4385
 
         */
4386
 
        cache_all_regs(vcpu);
4387
4589
 
4388
4590
        if (!(emulation_type & EMULTYPE_NO_DECODE)) {
4389
4591
                init_emulate_ctxt(vcpu);
4391
4593
                vcpu->arch.emulate_ctxt.have_exception = false;
4392
4594
                vcpu->arch.emulate_ctxt.perm_ok = false;
4393
4595
 
 
4596
                vcpu->arch.emulate_ctxt.only_vendor_specific_insn
 
4597
                        = emulation_type & EMULTYPE_TRAP_UD;
 
4598
 
4394
4599
                r = x86_decode_insn(&vcpu->arch.emulate_ctxt, insn, insn_len);
4395
 
                if (r == X86EMUL_PROPAGATE_FAULT)
4396
 
                        goto done;
4397
4600
 
4398
4601
                trace_kvm_emulate_insn_start(vcpu);
4399
 
 
4400
 
                /* Only allow emulation of specific instructions on #UD
4401
 
                 * (namely VMMCALL, sysenter, sysexit, syscall)*/
4402
 
                if (emulation_type & EMULTYPE_TRAP_UD) {
4403
 
                        if (!c->twobyte)
4404
 
                                return EMULATE_FAIL;
4405
 
                        switch (c->b) {
4406
 
                        case 0x01: /* VMMCALL */
4407
 
                                if (c->modrm_mod != 3 || c->modrm_rm != 1)
4408
 
                                        return EMULATE_FAIL;
4409
 
                                break;
4410
 
                        case 0x34: /* sysenter */
4411
 
                        case 0x35: /* sysexit */
4412
 
                                if (c->modrm_mod != 0 || c->modrm_rm != 0)
4413
 
                                        return EMULATE_FAIL;
4414
 
                                break;
4415
 
                        case 0x05: /* syscall */
4416
 
                                if (c->modrm_mod != 0 || c->modrm_rm != 0)
4417
 
                                        return EMULATE_FAIL;
4418
 
                                break;
4419
 
                        default:
4420
 
                                return EMULATE_FAIL;
4421
 
                        }
4422
 
 
4423
 
                        if (!(c->modrm_reg == 0 || c->modrm_reg == 3))
4424
 
                                return EMULATE_FAIL;
4425
 
                }
4426
 
 
4427
4602
                ++vcpu->stat.insn_emulation;
4428
4603
                if (r)  {
 
4604
                        if (emulation_type & EMULTYPE_TRAP_UD)
 
4605
                                return EMULATE_FAIL;
4429
4606
                        if (reexecute_instruction(vcpu, cr2))
4430
4607
                                return EMULATE_DONE;
4431
4608
                        if (emulation_type & EMULTYPE_SKIP)
4439
4616
                return EMULATE_DONE;
4440
4617
        }
4441
4618
 
4442
 
        /* this is needed for vmware backdor interface to work since it
 
4619
        /* this is needed for vmware backdoor interface to work since it
4443
4620
           changes registers values  during IO operation */
4444
 
        memcpy(c->regs, vcpu->arch.regs, sizeof c->regs);
 
4621
        if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
 
4622
                vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
 
4623
                memcpy(c->regs, vcpu->arch.regs, sizeof c->regs);
 
4624
        }
4445
4625
 
4446
4626
restart:
4447
4627
        r = x86_emulate_insn(&vcpu->arch.emulate_ctxt);
4448
4628
 
 
4629
        if (r == EMULATION_INTERCEPTED)
 
4630
                return EMULATE_DONE;
 
4631
 
4449
4632
        if (r == EMULATION_FAILED) {
4450
4633
                if (reexecute_instruction(vcpu, cr2))
4451
4634
                        return EMULATE_DONE;
4453
4636
                return handle_emulation_failure(vcpu);
4454
4637
        }
4455
4638
 
4456
 
done:
4457
4639
        if (vcpu->arch.emulate_ctxt.have_exception) {
4458
4640
                inject_emulated_exception(vcpu);
4459
4641
                r = EMULATE_DONE;
4460
4642
        } else if (vcpu->arch.pio.count) {
4461
4643
                if (!vcpu->arch.pio.in)
4462
4644
                        vcpu->arch.pio.count = 0;
 
4645
                else
 
4646
                        writeback = false;
4463
4647
                r = EMULATE_DO_MMIO;
4464
4648
        } else if (vcpu->mmio_needed) {
4465
 
                if (vcpu->mmio_is_write)
4466
 
                        vcpu->mmio_needed = 0;
 
4649
                if (!vcpu->mmio_is_write)
 
4650
                        writeback = false;
4467
4651
                r = EMULATE_DO_MMIO;
4468
4652
        } else if (r == EMULATION_RESTART)
4469
4653
                goto restart;
4470
4654
        else
4471
4655
                r = EMULATE_DONE;
4472
4656
 
4473
 
        toggle_interruptibility(vcpu, vcpu->arch.emulate_ctxt.interruptibility);
4474
 
        kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
4475
 
        kvm_make_request(KVM_REQ_EVENT, vcpu);
4476
 
        memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
4477
 
        kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
 
4657
        if (writeback) {
 
4658
                toggle_interruptibility(vcpu,
 
4659
                                vcpu->arch.emulate_ctxt.interruptibility);
 
4660
                kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
 
4661
                kvm_make_request(KVM_REQ_EVENT, vcpu);
 
4662
                memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
 
4663
                vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
 
4664
                kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
 
4665
        } else
 
4666
                vcpu->arch.emulate_regs_need_sync_to_vcpu = true;
4478
4667
 
4479
4668
        return r;
4480
4669
}
4483
4672
int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
4484
4673
{
4485
4674
        unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX);
4486
 
        int ret = emulator_pio_out_emulated(size, port, &val, 1, vcpu);
 
4675
        int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt,
 
4676
                                            size, port, &val, 1);
4487
4677
        /* do not return to emulator after return from userspace */
4488
4678
        vcpu->arch.pio.count = 0;
4489
4679
        return ret;
4563
4753
 
4564
4754
        smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
4565
4755
 
4566
 
        spin_lock(&kvm_lock);
 
4756
        raw_spin_lock(&kvm_lock);
4567
4757
        list_for_each_entry(kvm, &vm_list, vm_list) {
4568
4758
                kvm_for_each_vcpu(i, vcpu, kvm) {
4569
4759
                        if (vcpu->cpu != freq->cpu)
4573
4763
                                send_ipi = 1;
4574
4764
                }
4575
4765
        }
4576
 
        spin_unlock(&kvm_lock);
 
4766
        raw_spin_unlock(&kvm_lock);
4577
4767
 
4578
4768
        if (freq->old < freq->new && send_ipi) {
4579
4769
                /*
4877
5067
}
4878
5068
EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
4879
5069
 
4880
 
int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
 
5070
int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
4881
5071
{
 
5072
        struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4882
5073
        char instruction[3];
4883
5074
        unsigned long rip = kvm_rip_read(vcpu);
4884
5075
 
4891
5082
 
4892
5083
        kvm_x86_ops->patch_hypercall(vcpu, instruction);
4893
5084
 
4894
 
        return emulator_write_emulated(rip, instruction, 3, NULL, vcpu);
4895
 
}
4896
 
 
4897
 
void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
4898
 
{
4899
 
        struct desc_ptr dt = { limit, base };
4900
 
 
4901
 
        kvm_x86_ops->set_gdt(vcpu, &dt);
4902
 
}
4903
 
 
4904
 
void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
4905
 
{
4906
 
        struct desc_ptr dt = { limit, base };
4907
 
 
4908
 
        kvm_x86_ops->set_idt(vcpu, &dt);
 
5085
        return emulator_write_emulated(&vcpu->arch.emulate_ctxt,
 
5086
                                       rip, instruction, 3, NULL);
4909
5087
}
4910
5088
 
4911
5089
static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
4956
5134
                        best = e;
4957
5135
                        break;
4958
5136
                }
4959
 
                /*
4960
 
                 * Both basic or both extended?
4961
 
                 */
4962
 
                if (((e->function ^ function) & 0x80000000) == 0)
4963
 
                        if (!best || e->function > best->function)
4964
 
                                best = e;
4965
5137
        }
4966
5138
        return best;
4967
5139
}
4981
5153
        return 36;
4982
5154
}
4983
5155
 
 
5156
/*
 
5157
 * If no match is found, check whether we exceed the vCPU's limit
 
5158
 * and return the content of the highest valid _standard_ leaf instead.
 
5159
 * This is to satisfy the CPUID specification.
 
5160
 */
 
5161
static struct kvm_cpuid_entry2* check_cpuid_limit(struct kvm_vcpu *vcpu,
 
5162
                                                  u32 function, u32 index)
 
5163
{
 
5164
        struct kvm_cpuid_entry2 *maxlevel;
 
5165
 
 
5166
        maxlevel = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0);
 
5167
        if (!maxlevel || maxlevel->eax >= function)
 
5168
                return NULL;
 
5169
        if (function & 0x80000000) {
 
5170
                maxlevel = kvm_find_cpuid_entry(vcpu, 0, 0);
 
5171
                if (!maxlevel)
 
5172
                        return NULL;
 
5173
        }
 
5174
        return kvm_find_cpuid_entry(vcpu, maxlevel->eax, index);
 
5175
}
 
5176
 
4984
5177
void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
4985
5178
{
4986
5179
        u32 function, index;
4993
5186
        kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
4994
5187
        kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
4995
5188
        best = kvm_find_cpuid_entry(vcpu, function, index);
 
5189
 
 
5190
        if (!best)
 
5191
                best = check_cpuid_limit(vcpu, function, index);
 
5192
 
4996
5193
        if (best) {
4997
5194
                kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax);
4998
5195
                kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx);
5149
5346
static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5150
5347
{
5151
5348
        int r;
 
5349
        bool nmi_pending;
5152
5350
        bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
5153
5351
                vcpu->run->request_interrupt_window;
5154
5352
 
5192
5390
        if (unlikely(r))
5193
5391
                goto out;
5194
5392
 
 
5393
        /*
 
5394
         * An NMI can be injected between local nmi_pending read and
 
5395
         * vcpu->arch.nmi_pending read inside inject_pending_event().
 
5396
         * But in that case, KVM_REQ_EVENT will be set, which makes
 
5397
         * the race described above benign.
 
5398
         */
 
5399
        nmi_pending = ACCESS_ONCE(vcpu->arch.nmi_pending);
 
5400
 
5195
5401
        if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
5196
5402
                inject_pending_event(vcpu);
5197
5403
 
5198
5404
                /* enable NMI/IRQ window open exits if needed */
5199
 
                if (vcpu->arch.nmi_pending)
 
5405
                if (nmi_pending)
5200
5406
                        kvm_x86_ops->enable_nmi_window(vcpu);
5201
5407
                else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
5202
5408
                        kvm_x86_ops->enable_irq_window(vcpu);
5214
5420
                kvm_load_guest_fpu(vcpu);
5215
5421
        kvm_load_guest_xcr0(vcpu);
5216
5422
 
5217
 
        atomic_set(&vcpu->guest_mode, 1);
5218
 
        smp_wmb();
 
5423
        vcpu->mode = IN_GUEST_MODE;
 
5424
 
 
5425
        /* We should set ->mode before check ->requests,
 
5426
         * see the comment in make_all_cpus_request.
 
5427
         */
 
5428
        smp_mb();
5219
5429
 
5220
5430
        local_irq_disable();
5221
5431
 
5222
 
        if (!atomic_read(&vcpu->guest_mode) || vcpu->requests
 
5432
        if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests
5223
5433
            || need_resched() || signal_pending(current)) {
5224
 
                atomic_set(&vcpu->guest_mode, 0);
 
5434
                vcpu->mode = OUTSIDE_GUEST_MODE;
5225
5435
                smp_wmb();
5226
5436
                local_irq_enable();
5227
5437
                preempt_enable();
5257
5467
 
5258
5468
        kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc);
5259
5469
 
5260
 
        atomic_set(&vcpu->guest_mode, 0);
 
5470
        vcpu->mode = OUTSIDE_GUEST_MODE;
5261
5471
        smp_wmb();
5262
5472
        local_irq_enable();
5263
5473
 
5372
5582
        return r;
5373
5583
}
5374
5584
 
 
5585
static int complete_mmio(struct kvm_vcpu *vcpu)
 
5586
{
 
5587
        struct kvm_run *run = vcpu->run;
 
5588
        int r;
 
5589
 
 
5590
        if (!(vcpu->arch.pio.count || vcpu->mmio_needed))
 
5591
                return 1;
 
5592
 
 
5593
        if (vcpu->mmio_needed) {
 
5594
                vcpu->mmio_needed = 0;
 
5595
                if (!vcpu->mmio_is_write)
 
5596
                        memcpy(vcpu->mmio_data + vcpu->mmio_index,
 
5597
                               run->mmio.data, 8);
 
5598
                vcpu->mmio_index += 8;
 
5599
                if (vcpu->mmio_index < vcpu->mmio_size) {
 
5600
                        run->exit_reason = KVM_EXIT_MMIO;
 
5601
                        run->mmio.phys_addr = vcpu->mmio_phys_addr + vcpu->mmio_index;
 
5602
                        memcpy(run->mmio.data, vcpu->mmio_data + vcpu->mmio_index, 8);
 
5603
                        run->mmio.len = min(vcpu->mmio_size - vcpu->mmio_index, 8);
 
5604
                        run->mmio.is_write = vcpu->mmio_is_write;
 
5605
                        vcpu->mmio_needed = 1;
 
5606
                        return 0;
 
5607
                }
 
5608
                if (vcpu->mmio_is_write)
 
5609
                        return 1;
 
5610
                vcpu->mmio_read_completed = 1;
 
5611
        }
 
5612
        vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
 
5613
        r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
 
5614
        srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
 
5615
        if (r != EMULATE_DONE)
 
5616
                return 0;
 
5617
        return 1;
 
5618
}
 
5619
 
5375
5620
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
5376
5621
{
5377
5622
        int r;
5398
5643
                }
5399
5644
        }
5400
5645
 
5401
 
        if (vcpu->arch.pio.count || vcpu->mmio_needed) {
5402
 
                if (vcpu->mmio_needed) {
5403
 
                        memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
5404
 
                        vcpu->mmio_read_completed = 1;
5405
 
                        vcpu->mmio_needed = 0;
5406
 
                }
5407
 
                vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
5408
 
                r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
5409
 
                srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
5410
 
                if (r != EMULATE_DONE) {
5411
 
                        r = 0;
5412
 
                        goto out;
5413
 
                }
5414
 
        }
 
5646
        r = complete_mmio(vcpu);
 
5647
        if (r <= 0)
 
5648
                goto out;
 
5649
 
5415
5650
        if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL)
5416
5651
                kvm_register_write(vcpu, VCPU_REGS_RAX,
5417
5652
                                     kvm_run->hypercall.ret);
5428
5663
 
5429
5664
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
5430
5665
{
 
5666
        if (vcpu->arch.emulate_regs_need_sync_to_vcpu) {
 
5667
                /*
 
5668
                 * We are here if userspace calls get_regs() in the middle of
 
5669
                 * instruction emulation. Registers state needs to be copied
 
5670
                 * back from emulation context to vcpu. Usrapace shouldn't do
 
5671
                 * that usually, but some bad designed PV devices (vmware
 
5672
                 * backdoor interface) need this to work
 
5673
                 */
 
5674
                struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
 
5675
                memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
 
5676
                vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
 
5677
        }
5431
5678
        regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
5432
5679
        regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX);
5433
5680
        regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX);
5455
5702
 
5456
5703
int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
5457
5704
{
 
5705
        vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
 
5706
        vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
 
5707
 
5458
5708
        kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax);
5459
5709
        kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx);
5460
5710
        kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx);
5565
5815
 
5566
5816
        memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
5567
5817
        kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
5568
 
        kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
 
5818
        kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
5569
5819
        kvm_make_request(KVM_REQ_EVENT, vcpu);
5570
5820
        return EMULATE_DONE;
5571
5821
}
5575
5825
                                  struct kvm_sregs *sregs)
5576
5826
{
5577
5827
        int mmu_reset_needed = 0;
5578
 
        int pending_vec, max_bits;
 
5828
        int pending_vec, max_bits, idx;
5579
5829
        struct desc_ptr dt;
5580
5830
 
5581
5831
        dt.size = sregs->idt.limit;
5604
5854
        kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
5605
5855
        if (sregs->cr4 & X86_CR4_OSXSAVE)
5606
5856
                update_cpuid(vcpu);
 
5857
 
 
5858
        idx = srcu_read_lock(&vcpu->kvm->srcu);
5607
5859
        if (!is_long_mode(vcpu) && is_pae(vcpu)) {
5608
5860
                load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
5609
5861
                mmu_reset_needed = 1;
5610
5862
        }
 
5863
        srcu_read_unlock(&vcpu->kvm->srcu, idx);
5611
5864
 
5612
5865
        if (mmu_reset_needed)
5613
5866
                kvm_mmu_reset_context(vcpu);
5618
5871
        if (pending_vec < max_bits) {
5619
5872
                kvm_queue_interrupt(vcpu, pending_vec, false);
5620
5873
                pr_debug("Set back pending irq %d\n", pending_vec);
5621
 
                if (irqchip_in_kernel(vcpu->kvm))
5622
 
                        kvm_pic_clear_isr_ack(vcpu->kvm);
5623
5874
        }
5624
5875
 
5625
5876
        kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
5815
6066
 
5816
6067
void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
5817
6068
{
5818
 
        if (vcpu->arch.time_page) {
5819
 
                kvm_release_page_dirty(vcpu->arch.time_page);
5820
 
                vcpu->arch.time_page = NULL;
5821
 
        }
 
6069
        kvmclock_reset(vcpu);
5822
6070
 
5823
6071
        free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
5824
6072
        fx_free(vcpu);
5879
6127
        kvm_make_request(KVM_REQ_EVENT, vcpu);
5880
6128
        vcpu->arch.apf.msr_val = 0;
5881
6129
 
 
6130
        kvmclock_reset(vcpu);
 
6131
 
5882
6132
        kvm_clear_async_pf_completion_queue(vcpu);
5883
6133
        kvm_async_pf_hash_reset(vcpu);
5884
6134
        vcpu->arch.apf.halted = false;
5947
6197
        }
5948
6198
        vcpu->arch.pio_data = page_address(page);
5949
6199
 
5950
 
        if (!kvm->arch.virtual_tsc_khz)
5951
 
                kvm_arch_set_tsc_khz(kvm, max_tsc_khz);
 
6200
        kvm_init_tsc_catchup(vcpu, max_tsc_khz);
5952
6201
 
5953
6202
        r = kvm_mmu_create(vcpu);
5954
6203
        if (r < 0)
6006
6255
        /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
6007
6256
        set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
6008
6257
 
6009
 
        spin_lock_init(&kvm->arch.tsc_write_lock);
 
6258
        raw_spin_lock_init(&kvm->arch.tsc_write_lock);
6010
6259
 
6011
6260
        return 0;
6012
6261
}
6104
6353
                                int user_alloc)
6105
6354
{
6106
6355
 
6107
 
        int npages = mem->memory_size >> PAGE_SHIFT;
 
6356
        int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT;
6108
6357
 
6109
6358
        if (!user_alloc && !old.user_alloc && old.rmap && !npages) {
6110
6359
                int ret;
6119
6368
                               "failed to munmap memory\n");
6120
6369
        }
6121
6370
 
 
6371
        if (!kvm->arch.n_requested_mmu_pages)
 
6372
                nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
 
6373
 
6122
6374
        spin_lock(&kvm->mmu_lock);
6123
 
        if (!kvm->arch.n_requested_mmu_pages) {
6124
 
                unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
 
6375
        if (nr_mmu_pages)
6125
6376
                kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
6126
 
        }
6127
 
 
6128
6377
        kvm_mmu_slot_remove_write_access(kvm, mem->slot);
6129
6378
        spin_unlock(&kvm->mmu_lock);
6130
6379
}
6158
6407
 
6159
6408
        me = get_cpu();
6160
6409
        if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
6161
 
                if (atomic_xchg(&vcpu->guest_mode, 0))
 
6410
                if (kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE)
6162
6411
                        smp_send_reschedule(cpu);
6163
6412
        put_cpu();
6164
6413
}