~ubuntu-branches/ubuntu/hardy/kvm/hardy-backports

« back to all changes in this revision

Viewing changes to patches/kvm-57.patch

  • Committer: Bazaar Package Importer
  • Author(s): Soren Hansen
  • Date: 2008-01-03 10:39:25 UTC
  • mfrom: (1.1.16 upstream)
  • Revision ID: james.westby@ubuntu.com-20080103103925-8480u7sq2646hvbh
Tags: 1:59+dfsg-0ubuntu1
* New upstream release
* Build with alsa support (cherry pick from 57+dfsg-2)

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
2
 
index 80b7ba4..ab2df55 100644
3
 
--- a/arch/x86/Kconfig
4
 
+++ b/arch/x86/Kconfig
5
 
@@ -116,6 +116,10 @@ config ARCH_SUPPORTS_OPROFILE
6
 
        bool
7
 
        default y
8
 
 
9
 
+config ARCH_SUPPORTS_KVM
10
 
+       bool
11
 
+       default y
12
 
+
13
 
 
14
 
 config ZONE_DMA32
15
 
        bool
16
 
@@ -1619,4 +1623,6 @@ source "security/Kconfig"
17
 
 
18
 
 source "crypto/Kconfig"
19
 
 
20
 
+source "arch/x86/kvm/Kconfig"
21
 
+
22
 
 source "lib/Kconfig"
23
 
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
24
 
index 7aa1dc6..96f79eb 100644
25
 
--- a/arch/x86/Makefile
26
 
+++ b/arch/x86/Makefile
27
 
@@ -17,3 +17,5 @@ else
28
 
         UTS_MACHINE := x86_64
29
 
         include $(srctree)/arch/x86/Makefile_64
30
 
 endif
31
 
+
32
 
+core-$(CONFIG_KVM) += arch/x86/kvm/
33
 
diff --git a/drivers/kvm/Kconfig b/arch/x86/kvm/Kconfig
34
 
similarity index 94%
35
 
rename from drivers/kvm/Kconfig
36
 
rename to arch/x86/kvm/Kconfig
37
 
index 6569206..4086080 100644
38
 
--- a/drivers/kvm/Kconfig
39
 
+++ b/arch/x86/kvm/Kconfig
40
 
@@ -3,7 +3,7 @@
41
 
 #
42
 
 menuconfig VIRTUALIZATION
43
 
        bool "Virtualization"
44
 
-       depends on X86
45
 
+       depends on ARCH_SUPPORTS_KVM || X86
46
 
        default y
47
 
        ---help---
48
 
          Say Y here to get to see options for using your Linux host to run other
49
 
@@ -16,7 +16,7 @@ if VIRTUALIZATION
50
 
 
51
 
 config KVM
52
 
        tristate "Kernel-based Virtual Machine (KVM) support"
53
 
-       depends on X86 && EXPERIMENTAL
54
 
+       depends on ARCH_SUPPORTS_KVM && EXPERIMENTAL
55
 
        select PREEMPT_NOTIFIERS
56
 
        select ANON_INODES
57
 
        ---help---
58
 
diff --git a/drivers/kvm/Makefile b/arch/x86/kvm/Makefile
59
 
similarity index 51%
60
 
rename from drivers/kvm/Makefile
61
 
rename to arch/x86/kvm/Makefile
62
 
index e5a8f4d..ffdd0b3 100644
63
 
--- a/drivers/kvm/Makefile
64
 
+++ b/arch/x86/kvm/Makefile
65
 
@@ -2,7 +2,11 @@
66
 
 # Makefile for Kernel-based Virtual Machine module
67
 
 #
68
 
 
69
 
-kvm-objs := kvm_main.o mmu.o x86_emulate.o i8259.o irq.o lapic.o ioapic.o
70
 
+common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o)
71
 
+
72
 
+EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
73
 
+
74
 
+kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o
75
 
 obj-$(CONFIG_KVM) += kvm.o
76
 
 kvm-intel-objs = vmx.o
77
 
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
78
 
diff --git a/drivers/kvm/i8259.c b/arch/x86/kvm/i8259.c
79
 
similarity index 98%
80
 
rename from drivers/kvm/i8259.c
81
 
rename to arch/x86/kvm/i8259.c
82
 
index a679157..ab29cf2 100644
83
 
--- a/drivers/kvm/i8259.c
84
 
+++ b/arch/x86/kvm/i8259.c
85
 
@@ -28,6 +28,8 @@
86
 
 #include <linux/mm.h>
87
 
 #include "irq.h"
88
 
 
89
 
+#include <linux/kvm_host.h>
90
 
+
91
 
 /*
92
 
  * set irq level. If an edge is detected, then the IRR is set to 1
93
 
  */
94
 
@@ -181,10 +183,8 @@ int kvm_pic_read_irq(struct kvm_pic *s)
95
 
        return intno;
96
 
 }
97
 
 
98
 
-static void pic_reset(void *opaque)
99
 
+void kvm_pic_reset(struct kvm_kpic_state *s)
100
 
 {
101
 
-       struct kvm_kpic_state *s = opaque;
102
 
-
103
 
        s->last_irr = 0;
104
 
        s->irr = 0;
105
 
        s->imr = 0;
106
 
@@ -209,7 +209,7 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val)
107
 
        addr &= 1;
108
 
        if (addr == 0) {
109
 
                if (val & 0x10) {
110
 
-                       pic_reset(s);   /* init */
111
 
+                       kvm_pic_reset(s);       /* init */
112
 
                        /*
113
 
                         * deassert a pending interrupt
114
 
                         */
115
 
diff --git a/drivers/kvm/irq.c b/arch/x86/kvm/irq.c
116
 
similarity index 81%
117
 
rename from drivers/kvm/irq.c
118
 
rename to arch/x86/kvm/irq.c
119
 
index 7628c7f..e571475 100644
120
 
--- a/drivers/kvm/irq.c
121
 
+++ b/arch/x86/kvm/irq.c
122
 
@@ -20,8 +20,8 @@
123
 
  */
124
 
 
125
 
 #include <linux/module.h>
126
 
+#include <linux/kvm_host.h>
127
 
 
128
 
-#include "kvm.h"
129
 
 #include "irq.h"
130
 
 
131
 
 /*
132
 
@@ -63,26 +63,6 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
133
 
 }
134
 
 EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
135
 
 
136
 
-static void vcpu_kick_intr(void *info)
137
 
-{
138
 
-#ifdef DEBUG
139
 
-       struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info;
140
 
-       printk(KERN_DEBUG "vcpu_kick_intr %p \n", vcpu);
141
 
-#endif
142
 
-}
143
 
-
144
 
-void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
145
 
-{
146
 
-       int ipi_pcpu = vcpu->cpu;
147
 
-
148
 
-       if (waitqueue_active(&vcpu->wq)) {
149
 
-               wake_up_interruptible(&vcpu->wq);
150
 
-               ++vcpu->stat.halt_wakeup;
151
 
-       }
152
 
-       if (vcpu->guest_mode)
153
 
-               smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0);
154
 
-}
155
 
-
156
 
 void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
157
 
 {
158
 
        kvm_inject_apic_timer_irqs(vcpu);
159
 
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
160
 
new file mode 100644
161
 
index 0000000..53c7f48
162
 
--- /dev/null
163
 
+++ b/arch/x86/kvm/irq.h
164
 
@@ -0,0 +1,88 @@
165
 
+/*
166
 
+ * irq.h: in kernel interrupt controller related definitions
167
 
+ * Copyright (c) 2007, Intel Corporation.
168
 
+ *
169
 
+ * This program is free software; you can redistribute it and/or modify it
170
 
+ * under the terms and conditions of the GNU General Public License,
171
 
+ * version 2, as published by the Free Software Foundation.
172
 
+ *
173
 
+ * This program is distributed in the hope it will be useful, but WITHOUT
174
 
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
175
 
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
176
 
+ * more details.
177
 
+ *
178
 
+ * You should have received a copy of the GNU General Public License along with
179
 
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
180
 
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
181
 
+ * Authors:
182
 
+ *   Yaozu (Eddie) Dong <Eddie.dong@intel.com>
183
 
+ *
184
 
+ */
185
 
+
186
 
+#ifndef __IRQ_H
187
 
+#define __IRQ_H
188
 
+
189
 
+#include <linux/mm_types.h>
190
 
+#include <linux/hrtimer.h>
191
 
+#include <linux/kvm_host.h>
192
 
+
193
 
+#include "iodev.h"
194
 
+#include "ioapic.h"
195
 
+#include "lapic.h"
196
 
+
197
 
+struct kvm;
198
 
+struct kvm_vcpu;
199
 
+
200
 
+typedef void irq_request_func(void *opaque, int level);
201
 
+
202
 
+struct kvm_kpic_state {
203
 
+       u8 last_irr;    /* edge detection */
204
 
+       u8 irr;         /* interrupt request register */
205
 
+       u8 imr;         /* interrupt mask register */
206
 
+       u8 isr;         /* interrupt service register */
207
 
+       u8 priority_add;        /* highest irq priority */
208
 
+       u8 irq_base;
209
 
+       u8 read_reg_select;
210
 
+       u8 poll;
211
 
+       u8 special_mask;
212
 
+       u8 init_state;
213
 
+       u8 auto_eoi;
214
 
+       u8 rotate_on_auto_eoi;
215
 
+       u8 special_fully_nested_mode;
216
 
+       u8 init4;               /* true if 4 byte init */
217
 
+       u8 elcr;                /* PIIX edge/trigger selection */
218
 
+       u8 elcr_mask;
219
 
+       struct kvm_pic *pics_state;
220
 
+};
221
 
+
222
 
+struct kvm_pic {
223
 
+       struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */
224
 
+       irq_request_func *irq_request;
225
 
+       void *irq_request_opaque;
226
 
+       int output;             /* intr from master PIC */
227
 
+       struct kvm_io_device dev;
228
 
+};
229
 
+
230
 
+struct kvm_pic *kvm_create_pic(struct kvm *kvm);
231
 
+void kvm_pic_set_irq(void *opaque, int irq, int level);
232
 
+int kvm_pic_read_irq(struct kvm_pic *s);
233
 
+void kvm_pic_update_irq(struct kvm_pic *s);
234
 
+
235
 
+static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
236
 
+{
237
 
+       return kvm->arch.vpic;
238
 
+}
239
 
+
240
 
+static inline int irqchip_in_kernel(struct kvm *kvm)
241
 
+{
242
 
+       return pic_irqchip(kvm) != NULL;
243
 
+}
244
 
+
245
 
+void kvm_pic_reset(struct kvm_kpic_state *s);
246
 
+
247
 
+void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
248
 
+void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
249
 
+void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
250
 
+void kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
251
 
+
252
 
+#endif
253
 
diff --git a/drivers/kvm/kvm_svm.h b/arch/x86/kvm/kvm_svm.h
254
 
similarity index 96%
255
 
rename from drivers/kvm/kvm_svm.h
256
 
rename to arch/x86/kvm/kvm_svm.h
257
 
index a0e415d..ecdfe97 100644
258
 
--- a/drivers/kvm/kvm_svm.h
259
 
+++ b/arch/x86/kvm/kvm_svm.h
260
 
@@ -4,10 +4,10 @@
261
 
 #include <linux/kernel.h>
262
 
 #include <linux/types.h>
263
 
 #include <linux/list.h>
264
 
+#include <linux/kvm_host.h>
265
 
 #include <asm/msr.h>
266
 
 
267
 
 #include "svm.h"
268
 
-#include "kvm.h"
269
 
 
270
 
 static const u32 host_save_user_msrs[] = {
271
 
 #ifdef CONFIG_X86_64
272
 
diff --git a/drivers/kvm/lapic.c b/arch/x86/kvm/lapic.c
273
 
similarity index 89%
274
 
rename from drivers/kvm/lapic.c
275
 
rename to arch/x86/kvm/lapic.c
276
 
index 238fcad..4076331 100644
277
 
--- a/drivers/kvm/lapic.c
278
 
+++ b/arch/x86/kvm/lapic.c
279
 
@@ -17,7 +17,7 @@
280
 
  * the COPYING file in the top-level directory.
281
 
  */
282
 
 
283
 
-#include "kvm.h"
284
 
+#include <linux/kvm_host.h>
285
 
 #include <linux/kvm.h>
286
 
 #include <linux/mm.h>
287
 
 #include <linux/highmem.h>
288
 
@@ -56,6 +56,7 @@
289
 
 
290
 
 #define VEC_POS(v) ((v) & (32 - 1))
291
 
 #define REG_POS(v) (((v) >> 5) << 4)
292
 
+
293
 
 static inline u32 apic_get_reg(struct kvm_lapic *apic, int reg_off)
294
 
 {
295
 
        return *((u32 *) (apic->regs + reg_off));
296
 
@@ -88,7 +89,7 @@ static inline void apic_clear_vector(int vec, void *bitmap)
297
 
 
298
 
 static inline int apic_hw_enabled(struct kvm_lapic *apic)
299
 
 {
300
 
-       return (apic)->vcpu->apic_base & MSR_IA32_APICBASE_ENABLE;
301
 
+       return (apic)->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE;
302
 
 }
303
 
 
304
 
 static inline int  apic_sw_enabled(struct kvm_lapic *apic)
305
 
@@ -172,7 +173,7 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic)
306
 
 
307
 
 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
308
 
 {
309
 
-       struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
310
 
+       struct kvm_lapic *apic = vcpu->arch.apic;
311
 
        int highest_irr;
312
 
 
313
 
        if (!apic)
314
 
@@ -183,8 +184,10 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
315
 
 }
316
 
 EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr);
317
 
 
318
 
-int kvm_apic_set_irq(struct kvm_lapic *apic, u8 vec, u8 trig)
319
 
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig)
320
 
 {
321
 
+       struct kvm_lapic *apic = vcpu->arch.apic;
322
 
+
323
 
        if (!apic_test_and_set_irr(vec, apic)) {
324
 
                /* a new pending irq is set in IRR */
325
 
                if (trig)
326
 
@@ -268,7 +271,7 @@ static int apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
327
 
                           int short_hand, int dest, int dest_mode)
328
 
 {
329
 
        int result = 0;
330
 
-       struct kvm_lapic *target = vcpu->apic;
331
 
+       struct kvm_lapic *target = vcpu->arch.apic;
332
 
 
333
 
        apic_debug("target %p, source %p, dest 0x%x, "
334
 
                   "dest_mode 0x%x, short_hand 0x%x",
335
 
@@ -335,10 +338,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
336
 
                } else
337
 
                        apic_clear_vector(vector, apic->regs + APIC_TMR);
338
 
 
339
 
-               if (vcpu->mp_state == VCPU_MP_STATE_RUNNABLE)
340
 
+               if (vcpu->arch.mp_state == VCPU_MP_STATE_RUNNABLE)
341
 
                        kvm_vcpu_kick(vcpu);
342
 
-               else if (vcpu->mp_state == VCPU_MP_STATE_HALTED) {
343
 
-                       vcpu->mp_state = VCPU_MP_STATE_RUNNABLE;
344
 
+               else if (vcpu->arch.mp_state == VCPU_MP_STATE_HALTED) {
345
 
+                       vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE;
346
 
                        if (waitqueue_active(&vcpu->wq))
347
 
                                wake_up_interruptible(&vcpu->wq);
348
 
                }
349
 
@@ -359,11 +362,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
350
 
 
351
 
        case APIC_DM_INIT:
352
 
                if (level) {
353
 
-                       if (vcpu->mp_state == VCPU_MP_STATE_RUNNABLE)
354
 
+                       if (vcpu->arch.mp_state == VCPU_MP_STATE_RUNNABLE)
355
 
                                printk(KERN_DEBUG
356
 
                                       "INIT on a runnable vcpu %d\n",
357
 
                                       vcpu->vcpu_id);
358
 
-                       vcpu->mp_state = VCPU_MP_STATE_INIT_RECEIVED;
359
 
+                       vcpu->arch.mp_state = VCPU_MP_STATE_INIT_RECEIVED;
360
 
                        kvm_vcpu_kick(vcpu);
361
 
                } else {
362
 
                        printk(KERN_DEBUG
363
 
@@ -376,9 +379,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
364
 
        case APIC_DM_STARTUP:
365
 
                printk(KERN_DEBUG "SIPI to vcpu %d vector 0x%02x\n",
366
 
                       vcpu->vcpu_id, vector);
367
 
-               if (vcpu->mp_state == VCPU_MP_STATE_INIT_RECEIVED) {
368
 
-                       vcpu->sipi_vector = vector;
369
 
-                       vcpu->mp_state = VCPU_MP_STATE_SIPI_RECEIVED;
370
 
+               if (vcpu->arch.mp_state == VCPU_MP_STATE_INIT_RECEIVED) {
371
 
+                       vcpu->arch.sipi_vector = vector;
372
 
+                       vcpu->arch.mp_state = VCPU_MP_STATE_SIPI_RECEIVED;
373
 
                        if (waitqueue_active(&vcpu->wq))
374
 
                                wake_up_interruptible(&vcpu->wq);
375
 
                }
376
 
@@ -392,15 +395,14 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
377
 
        return result;
378
 
 }
379
 
 
380
 
-struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector,
381
 
+static struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector,
382
 
                                       unsigned long bitmap)
383
 
 {
384
 
-       int vcpu_id;
385
 
        int last;
386
 
        int next;
387
 
-       struct kvm_lapic *apic;
388
 
+       struct kvm_lapic *apic = NULL;
389
 
 
390
 
-       last = kvm->round_robin_prev_vcpu;
391
 
+       last = kvm->arch.round_robin_prev_vcpu;
392
 
        next = last;
393
 
 
394
 
        do {
395
 
@@ -408,25 +410,30 @@ struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector,
396
 
                        next = 0;
397
 
                if (kvm->vcpus[next] == NULL || !test_bit(next, &bitmap))
398
 
                        continue;
399
 
-               apic = kvm->vcpus[next]->apic;
400
 
+               apic = kvm->vcpus[next]->arch.apic;
401
 
                if (apic && apic_enabled(apic))
402
 
                        break;
403
 
                apic = NULL;
404
 
        } while (next != last);
405
 
-       kvm->round_robin_prev_vcpu = next;
406
 
+       kvm->arch.round_robin_prev_vcpu = next;
407
 
 
408
 
-       if (!apic) {
409
 
-               vcpu_id = ffs(bitmap) - 1;
410
 
-               if (vcpu_id < 0) {
411
 
-                       vcpu_id = 0;
412
 
-                       printk(KERN_DEBUG "vcpu not ready for apic_round_robin\n");
413
 
-               }
414
 
-               apic = kvm->vcpus[vcpu_id]->apic;
415
 
-       }
416
 
+       if (!apic)
417
 
+               printk(KERN_DEBUG "vcpu not ready for apic_round_robin\n");
418
 
 
419
 
        return apic;
420
 
 }
421
 
 
422
 
+struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
423
 
+               unsigned long bitmap)
424
 
+{
425
 
+       struct kvm_lapic *apic;
426
 
+
427
 
+       apic = kvm_apic_round_robin(kvm, vector, bitmap);
428
 
+       if (apic)
429
 
+               return apic->vcpu;
430
 
+       return NULL;
431
 
+}
432
 
+
433
 
 static void apic_set_eoi(struct kvm_lapic *apic)
434
 
 {
435
 
        int vector = apic_find_highest_isr(apic);
436
 
@@ -458,7 +465,7 @@ static void apic_send_ipi(struct kvm_lapic *apic)
437
 
        unsigned int delivery_mode = icr_low & APIC_MODE_MASK;
438
 
        unsigned int vector = icr_low & APIC_VECTOR_MASK;
439
 
 
440
 
-       struct kvm_lapic *target;
441
 
+       struct kvm_vcpu *target;
442
 
        struct kvm_vcpu *vcpu;
443
 
        unsigned long lpr_map = 0;
444
 
        int i;
445
 
@@ -474,20 +481,20 @@ static void apic_send_ipi(struct kvm_lapic *apic)
446
 
                if (!vcpu)
447
 
                        continue;
448
 
 
449
 
-               if (vcpu->apic &&
450
 
+               if (vcpu->arch.apic &&
451
 
                    apic_match_dest(vcpu, apic, short_hand, dest, dest_mode)) {
452
 
                        if (delivery_mode == APIC_DM_LOWEST)
453
 
                                set_bit(vcpu->vcpu_id, &lpr_map);
454
 
                        else
455
 
-                               __apic_accept_irq(vcpu->apic, delivery_mode,
456
 
+                               __apic_accept_irq(vcpu->arch.apic, delivery_mode,
457
 
                                                  vector, level, trig_mode);
458
 
                }
459
 
        }
460
 
 
461
 
        if (delivery_mode == APIC_DM_LOWEST) {
462
 
-               target = kvm_apic_round_robin(vcpu->kvm, vector, lpr_map);
463
 
+               target = kvm_get_lowest_prio_vcpu(vcpu->kvm, vector, lpr_map);
464
 
                if (target != NULL)
465
 
-                       __apic_accept_irq(target, delivery_mode,
466
 
+                       __apic_accept_irq(target->arch.apic, delivery_mode,
467
 
                                          vector, level, trig_mode);
468
 
        }
469
 
 }
470
 
@@ -762,19 +769,17 @@ static int apic_mmio_range(struct kvm_io_device *this, gpa_t addr)
471
 
        return ret;
472
 
 }
473
 
 
474
 
-void kvm_free_apic(struct kvm_lapic *apic)
475
 
+void kvm_free_lapic(struct kvm_vcpu *vcpu)
476
 
 {
477
 
-       if (!apic)
478
 
+       if (!vcpu->arch.apic)
479
 
                return;
480
 
 
481
 
-       hrtimer_cancel(&apic->timer.dev);
482
 
+       hrtimer_cancel(&vcpu->arch.apic->timer.dev);
483
 
 
484
 
-       if (apic->regs_page) {
485
 
-               __free_page(apic->regs_page);
486
 
-               apic->regs_page = 0;
487
 
-       }
488
 
+       if (vcpu->arch.apic->regs_page)
489
 
+               __free_page(vcpu->arch.apic->regs_page);
490
 
 
491
 
-       kfree(apic);
492
 
+       kfree(vcpu->arch.apic);
493
 
 }
494
 
 
495
 
 /*
496
 
@@ -785,7 +790,7 @@ void kvm_free_apic(struct kvm_lapic *apic)
497
 
 
498
 
 void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
499
 
 {
500
 
-       struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
501
 
+       struct kvm_lapic *apic = vcpu->arch.apic;
502
 
 
503
 
        if (!apic)
504
 
                return;
505
 
@@ -794,7 +799,7 @@ void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
506
 
 
507
 
 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
508
 
 {
509
 
-       struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
510
 
+       struct kvm_lapic *apic = vcpu->arch.apic;
511
 
        u64 tpr;
512
 
 
513
 
        if (!apic)
514
 
@@ -807,29 +812,29 @@ EXPORT_SYMBOL_GPL(kvm_lapic_get_cr8);
515
 
 
516
 
 void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
517
 
 {
518
 
-       struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
519
 
+       struct kvm_lapic *apic = vcpu->arch.apic;
520
 
 
521
 
        if (!apic) {
522
 
                value |= MSR_IA32_APICBASE_BSP;
523
 
-               vcpu->apic_base = value;
524
 
+               vcpu->arch.apic_base = value;
525
 
                return;
526
 
        }
527
 
        if (apic->vcpu->vcpu_id)
528
 
                value &= ~MSR_IA32_APICBASE_BSP;
529
 
 
530
 
-       vcpu->apic_base = value;
531
 
-       apic->base_address = apic->vcpu->apic_base &
532
 
+       vcpu->arch.apic_base = value;
533
 
+       apic->base_address = apic->vcpu->arch.apic_base &
534
 
                             MSR_IA32_APICBASE_BASE;
535
 
 
536
 
        /* with FSB delivery interrupt, we can restart APIC functionality */
537
 
        apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is "
538
 
-                  "0x%lx.\n", apic->apic_base, apic->base_address);
539
 
+                  "0x%lx.\n", apic->vcpu->arch.apic_base, apic->base_address);
540
 
 
541
 
 }
542
 
 
543
 
 u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu)
544
 
 {
545
 
-       return vcpu->apic_base;
546
 
+       return vcpu->arch.apic_base;
547
 
 }
548
 
 EXPORT_SYMBOL_GPL(kvm_lapic_get_base);
549
 
 
550
 
@@ -841,7 +846,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
551
 
        apic_debug("%s\n", __FUNCTION__);
552
 
 
553
 
        ASSERT(vcpu);
554
 
-       apic = vcpu->apic;
555
 
+       apic = vcpu->arch.apic;
556
 
        ASSERT(apic != NULL);
557
 
 
558
 
        /* Stop the timer in case it's a reset to an active apic */
559
 
@@ -872,19 +877,19 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
560
 
        update_divide_count(apic);
561
 
        atomic_set(&apic->timer.pending, 0);
562
 
        if (vcpu->vcpu_id == 0)
563
 
-               vcpu->apic_base |= MSR_IA32_APICBASE_BSP;
564
 
+               vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP;
565
 
        apic_update_ppr(apic);
566
 
 
567
 
        apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr="
568
 
                   "0x%016" PRIx64 ", base_address=0x%0lx.\n", __FUNCTION__,
569
 
                   vcpu, kvm_apic_id(apic),
570
 
-                  vcpu->apic_base, apic->base_address);
571
 
+                  vcpu->arch.apic_base, apic->base_address);
572
 
 }
573
 
 EXPORT_SYMBOL_GPL(kvm_lapic_reset);
574
 
 
575
 
 int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
576
 
 {
577
 
-       struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
578
 
+       struct kvm_lapic *apic = vcpu->arch.apic;
579
 
        int ret = 0;
580
 
 
581
 
        if (!apic)
582
 
@@ -908,9 +913,8 @@ static int __apic_timer_fn(struct kvm_lapic *apic)
583
 
        wait_queue_head_t *q = &apic->vcpu->wq;
584
 
 
585
 
        atomic_inc(&apic->timer.pending);
586
 
-       if (waitqueue_active(q))
587
 
-       {
588
 
-               apic->vcpu->mp_state = VCPU_MP_STATE_RUNNABLE;
589
 
+       if (waitqueue_active(q)) {
590
 
+               apic->vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE;
591
 
                wake_up_interruptible(q);
592
 
        }
593
 
        if (apic_lvtt_period(apic)) {
594
 
@@ -956,13 +960,13 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
595
 
        if (!apic)
596
 
                goto nomem;
597
 
 
598
 
-       vcpu->apic = apic;
599
 
+       vcpu->arch.apic = apic;
600
 
 
601
 
        apic->regs_page = alloc_page(GFP_KERNEL);
602
 
        if (apic->regs_page == NULL) {
603
 
                printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
604
 
                       vcpu->vcpu_id);
605
 
-               goto nomem;
606
 
+               goto nomem_free_apic;
607
 
        }
608
 
        apic->regs = page_address(apic->regs_page);
609
 
        memset(apic->regs, 0, PAGE_SIZE);
610
 
@@ -971,7 +975,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
611
 
        hrtimer_init(&apic->timer.dev, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
612
 
        apic->timer.dev.function = apic_timer_fn;
613
 
        apic->base_address = APIC_DEFAULT_PHYS_BASE;
614
 
-       vcpu->apic_base = APIC_DEFAULT_PHYS_BASE;
615
 
+       vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE;
616
 
 
617
 
        kvm_lapic_reset(vcpu);
618
 
        apic->dev.read = apic_mmio_read;
619
 
@@ -980,15 +984,16 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
620
 
        apic->dev.private = apic;
621
 
 
622
 
        return 0;
623
 
+nomem_free_apic:
624
 
+       kfree(apic);
625
 
 nomem:
626
 
-       kvm_free_apic(apic);
627
 
        return -ENOMEM;
628
 
 }
629
 
 EXPORT_SYMBOL_GPL(kvm_create_lapic);
630
 
 
631
 
 int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
632
 
 {
633
 
-       struct kvm_lapic *apic = vcpu->apic;
634
 
+       struct kvm_lapic *apic = vcpu->arch.apic;
635
 
        int highest_irr;
636
 
 
637
 
        if (!apic || !apic_enabled(apic))
638
 
@@ -1004,11 +1009,11 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
639
 
 
640
 
 int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
641
 
 {
642
 
-       u32 lvt0 = apic_get_reg(vcpu->apic, APIC_LVT0);
643
 
+       u32 lvt0 = apic_get_reg(vcpu->arch.apic, APIC_LVT0);
644
 
        int r = 0;
645
 
 
646
 
        if (vcpu->vcpu_id == 0) {
647
 
-               if (!apic_hw_enabled(vcpu->apic))
648
 
+               if (!apic_hw_enabled(vcpu->arch.apic))
649
 
                        r = 1;
650
 
                if ((lvt0 & APIC_LVT_MASKED) == 0 &&
651
 
                    GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT)
652
 
@@ -1019,7 +1024,7 @@ int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
653
 
 
654
 
 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
655
 
 {
656
 
-       struct kvm_lapic *apic = vcpu->apic;
657
 
+       struct kvm_lapic *apic = vcpu->arch.apic;
658
 
 
659
 
        if (apic && apic_lvt_enabled(apic, APIC_LVTT) &&
660
 
                atomic_read(&apic->timer.pending) > 0) {
661
 
@@ -1030,7 +1035,7 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
662
 
 
663
 
 void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
664
 
 {
665
 
-       struct kvm_lapic *apic = vcpu->apic;
666
 
+       struct kvm_lapic *apic = vcpu->arch.apic;
667
 
 
668
 
        if (apic && apic_lvt_vector(apic, APIC_LVTT) == vec)
669
 
                apic->timer.last_update = ktime_add_ns(
670
 
@@ -1041,7 +1046,7 @@ void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
671
 
 int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
672
 
 {
673
 
        int vector = kvm_apic_has_interrupt(vcpu);
674
 
-       struct kvm_lapic *apic = vcpu->apic;
675
 
+       struct kvm_lapic *apic = vcpu->arch.apic;
676
 
 
677
 
        if (vector == -1)
678
 
                return -1;
679
 
@@ -1054,9 +1059,9 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
680
 
 
681
 
 void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
682
 
 {
683
 
-       struct kvm_lapic *apic = vcpu->apic;
684
 
+       struct kvm_lapic *apic = vcpu->arch.apic;
685
 
 
686
 
-       apic->base_address = vcpu->apic_base &
687
 
+       apic->base_address = vcpu->arch.apic_base &
688
 
                             MSR_IA32_APICBASE_BASE;
689
 
        apic_set_reg(apic, APIC_LVR, APIC_VERSION);
690
 
        apic_update_ppr(apic);
691
 
@@ -1067,7 +1072,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
692
 
 
693
 
 void kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
694
 
 {
695
 
-       struct kvm_lapic *apic = vcpu->apic;
696
 
+       struct kvm_lapic *apic = vcpu->arch.apic;
697
 
        struct hrtimer *timer;
698
 
 
699
 
        if (!apic)
700
 
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
701
 
new file mode 100644
702
 
index 0000000..447b654
703
 
--- /dev/null
704
 
+++ b/arch/x86/kvm/lapic.h
705
 
@@ -0,0 +1,44 @@
706
 
+#ifndef __KVM_X86_LAPIC_H
707
 
+#define __KVM_X86_LAPIC_H
708
 
+
709
 
+#include "iodev.h"
710
 
+
711
 
+#include <linux/kvm_host.h>
712
 
+
713
 
+struct kvm_lapic {
714
 
+       unsigned long base_address;
715
 
+       struct kvm_io_device dev;
716
 
+       struct {
717
 
+               atomic_t pending;
718
 
+               s64 period;     /* unit: ns */
719
 
+               u32 divide_count;
720
 
+               ktime_t last_update;
721
 
+               struct hrtimer dev;
722
 
+       } timer;
723
 
+       struct kvm_vcpu *vcpu;
724
 
+       struct page *regs_page;
725
 
+       void *regs;
726
 
+};
727
 
+int kvm_create_lapic(struct kvm_vcpu *vcpu);
728
 
+void kvm_free_lapic(struct kvm_vcpu *vcpu);
729
 
+
730
 
+int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
731
 
+int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu);
732
 
+int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu);
733
 
+void kvm_lapic_reset(struct kvm_vcpu *vcpu);
734
 
+u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
735
 
+void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
736
 
+void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
737
 
+
738
 
+int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
739
 
+int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
740
 
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig);
741
 
+
742
 
+u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
743
 
+void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
744
 
+void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu);
745
 
+int kvm_lapic_enabled(struct kvm_vcpu *vcpu);
746
 
+int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
747
 
+void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
748
 
+
749
 
+#endif
750
 
diff --git a/drivers/kvm/mmu.c b/arch/x86/kvm/mmu.c
751
 
similarity index 50%
752
 
rename from drivers/kvm/mmu.c
753
 
rename to arch/x86/kvm/mmu.c
754
 
index feb5ac9..9a57e1a 100644
755
 
--- a/drivers/kvm/mmu.c
756
 
+++ b/arch/x86/kvm/mmu.c
757
 
@@ -18,16 +18,19 @@
758
 
  */
759
 
 
760
 
 #include "vmx.h"
761
 
-#include "kvm.h"
762
 
+#include "mmu.h"
763
 
 
764
 
+#include <linux/kvm_host.h>
765
 
 #include <linux/types.h>
766
 
 #include <linux/string.h>
767
 
 #include <linux/mm.h>
768
 
 #include <linux/highmem.h>
769
 
 #include <linux/module.h>
770
 
+#include <linux/swap.h>
771
 
 
772
 
 #include <asm/page.h>
773
 
 #include <asm/cmpxchg.h>
774
 
+#include <asm/io.h>
775
 
 
776
 
 #undef MMU_DEBUG
777
 
 
778
 
@@ -82,7 +85,8 @@ static int dbg = 1;
779
 
 #define PT_PAGE_SIZE_MASK (1ULL << 7)
780
 
 #define PT_PAT_MASK (1ULL << 7)
781
 
 #define PT_GLOBAL_MASK (1ULL << 8)
782
 
-#define PT64_NX_MASK (1ULL << 63)
783
 
+#define PT64_NX_SHIFT 63
784
 
+#define PT64_NX_MASK (1ULL << PT64_NX_SHIFT)
785
 
 
786
 
 #define PT_PAT_SHIFT 7
787
 
 #define PT_DIR_PAT_SHIFT 12
788
 
@@ -90,7 +94,8 @@ static int dbg = 1;
789
 
 
790
 
 #define PT32_DIR_PSE36_SIZE 4
791
 
 #define PT32_DIR_PSE36_SHIFT 13
792
 
-#define PT32_DIR_PSE36_MASK (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT)
793
 
+#define PT32_DIR_PSE36_MASK \
794
 
+       (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT)
795
 
 
796
 
 
797
 
 #define PT_FIRST_AVAIL_BITS_SHIFT 9
798
 
@@ -103,7 +108,7 @@ static int dbg = 1;
799
 
 #define PT64_LEVEL_BITS 9
800
 
 
801
 
 #define PT64_LEVEL_SHIFT(level) \
802
 
-               ( PAGE_SHIFT + (level - 1) * PT64_LEVEL_BITS )
803
 
+               (PAGE_SHIFT + (level - 1) * PT64_LEVEL_BITS)
804
 
 
805
 
 #define PT64_LEVEL_MASK(level) \
806
 
                (((1ULL << PT64_LEVEL_BITS) - 1) << PT64_LEVEL_SHIFT(level))
807
 
@@ -115,7 +120,7 @@ static int dbg = 1;
808
 
 #define PT32_LEVEL_BITS 10
809
 
 
810
 
 #define PT32_LEVEL_SHIFT(level) \
811
 
-               ( PAGE_SHIFT + (level - 1) * PT32_LEVEL_BITS )
812
 
+               (PAGE_SHIFT + (level - 1) * PT32_LEVEL_BITS)
813
 
 
814
 
 #define PT32_LEVEL_MASK(level) \
815
 
                (((1ULL << PT32_LEVEL_BITS) - 1) << PT32_LEVEL_SHIFT(level))
816
 
@@ -132,6 +137,8 @@ static int dbg = 1;
817
 
 #define PT32_DIR_BASE_ADDR_MASK \
818
 
        (PAGE_MASK & ~((1ULL << (PAGE_SHIFT + PT32_LEVEL_BITS)) - 1))
819
 
 
820
 
+#define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \
821
 
+                       | PT64_NX_MASK)
822
 
 
823
 
 #define PFERR_PRESENT_MASK (1U << 0)
824
 
 #define PFERR_WRITE_MASK (1U << 1)
825
 
@@ -147,6 +154,11 @@ static int dbg = 1;
826
 
 
827
 
 #define RMAP_EXT 4
828
 
 
829
 
+#define ACC_EXEC_MASK    1
830
 
+#define ACC_WRITE_MASK   PT_WRITABLE_MASK
831
 
+#define ACC_USER_MASK    PT_USER_MASK
832
 
+#define ACC_ALL          (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
833
 
+
834
 
 struct kvm_rmap_desc {
835
 
        u64 *shadow_ptes[RMAP_EXT];
836
 
        struct kvm_rmap_desc *more;
837
 
@@ -156,9 +168,19 @@ static struct kmem_cache *pte_chain_cache;
838
 
 static struct kmem_cache *rmap_desc_cache;
839
 
 static struct kmem_cache *mmu_page_header_cache;
840
 
 
841
 
+static u64 __read_mostly shadow_trap_nonpresent_pte;
842
 
+static u64 __read_mostly shadow_notrap_nonpresent_pte;
843
 
+
844
 
+void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
845
 
+{
846
 
+       shadow_trap_nonpresent_pte = trap_pte;
847
 
+       shadow_notrap_nonpresent_pte = notrap_pte;
848
 
+}
849
 
+EXPORT_SYMBOL_GPL(kvm_mmu_set_nonpresent_ptes);
850
 
+
851
 
 static int is_write_protection(struct kvm_vcpu *vcpu)
852
 
 {
853
 
-       return vcpu->cr0 & X86_CR0_WP;
854
 
+       return vcpu->arch.cr0 & X86_CR0_WP;
855
 
 }
856
 
 
857
 
 static int is_cpuid_PSE36(void)
858
 
@@ -168,7 +190,7 @@ static int is_cpuid_PSE36(void)
859
 
 
860
 
 static int is_nx(struct kvm_vcpu *vcpu)
861
 
 {
862
 
-       return vcpu->shadow_efer & EFER_NX;
863
 
+       return vcpu->arch.shadow_efer & EFER_NX;
864
 
 }
865
 
 
866
 
 static int is_present_pte(unsigned long pte)
867
 
@@ -176,11 +198,23 @@ static int is_present_pte(unsigned long pte)
868
 
        return pte & PT_PRESENT_MASK;
869
 
 }
870
 
 
871
 
+static int is_shadow_present_pte(u64 pte)
872
 
+{
873
 
+       pte &= ~PT_SHADOW_IO_MARK;
874
 
+       return pte != shadow_trap_nonpresent_pte
875
 
+               && pte != shadow_notrap_nonpresent_pte;
876
 
+}
877
 
+
878
 
 static int is_writeble_pte(unsigned long pte)
879
 
 {
880
 
        return pte & PT_WRITABLE_MASK;
881
 
 }
882
 
 
883
 
+static int is_dirty_pte(unsigned long pte)
884
 
+{
885
 
+       return pte & PT_DIRTY_MASK;
886
 
+}
887
 
+
888
 
 static int is_io_pte(unsigned long pte)
889
 
 {
890
 
        return pte & PT_SHADOW_IO_MARK;
891
 
@@ -188,8 +222,15 @@ static int is_io_pte(unsigned long pte)
892
 
 
893
 
 static int is_rmap_pte(u64 pte)
894
 
 {
895
 
-       return (pte & (PT_WRITABLE_MASK | PT_PRESENT_MASK))
896
 
-               == (PT_WRITABLE_MASK | PT_PRESENT_MASK);
897
 
+       return pte != shadow_trap_nonpresent_pte
898
 
+               && pte != shadow_notrap_nonpresent_pte;
899
 
+}
900
 
+
901
 
+static gfn_t pse36_gfn_delta(u32 gpte)
902
 
+{
903
 
+       int shift = 32 - PT32_DIR_PSE36_SHIFT - PAGE_SHIFT;
904
 
+
905
 
+       return (gpte & PT32_DIR_PSE36_MASK) << shift;
906
 
 }
907
 
 
908
 
 static void set_shadow_pte(u64 *sptep, u64 spte)
909
 
@@ -251,18 +292,18 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
910
 
        int r;
911
 
 
912
 
        kvm_mmu_free_some_pages(vcpu);
913
 
-       r = mmu_topup_memory_cache(&vcpu->mmu_pte_chain_cache,
914
 
+       r = mmu_topup_memory_cache(&vcpu->arch.mmu_pte_chain_cache,
915
 
                                   pte_chain_cache, 4);
916
 
        if (r)
917
 
                goto out;
918
 
-       r = mmu_topup_memory_cache(&vcpu->mmu_rmap_desc_cache,
919
 
+       r = mmu_topup_memory_cache(&vcpu->arch.mmu_rmap_desc_cache,
920
 
                                   rmap_desc_cache, 1);
921
 
        if (r)
922
 
                goto out;
923
 
-       r = mmu_topup_memory_cache_page(&vcpu->mmu_page_cache, 4);
924
 
+       r = mmu_topup_memory_cache_page(&vcpu->arch.mmu_page_cache, 8);
925
 
        if (r)
926
 
                goto out;
927
 
-       r = mmu_topup_memory_cache(&vcpu->mmu_page_header_cache,
928
 
+       r = mmu_topup_memory_cache(&vcpu->arch.mmu_page_header_cache,
929
 
                                   mmu_page_header_cache, 4);
930
 
 out:
931
 
        return r;
932
 
@@ -270,10 +311,10 @@ out:
933
 
 
934
 
 static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
935
 
 {
936
 
-       mmu_free_memory_cache(&vcpu->mmu_pte_chain_cache);
937
 
-       mmu_free_memory_cache(&vcpu->mmu_rmap_desc_cache);
938
 
-       mmu_free_memory_cache_page(&vcpu->mmu_page_cache);
939
 
-       mmu_free_memory_cache(&vcpu->mmu_page_header_cache);
940
 
+       mmu_free_memory_cache(&vcpu->arch.mmu_pte_chain_cache);
941
 
+       mmu_free_memory_cache(&vcpu->arch.mmu_rmap_desc_cache);
942
 
+       mmu_free_memory_cache_page(&vcpu->arch.mmu_page_cache);
943
 
+       mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache);
944
 
 }
945
 
 
946
 
 static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc,
947
 
@@ -289,7 +330,7 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc,
948
 
 
949
 
 static struct kvm_pte_chain *mmu_alloc_pte_chain(struct kvm_vcpu *vcpu)
950
 
 {
951
 
-       return mmu_memory_cache_alloc(&vcpu->mmu_pte_chain_cache,
952
 
+       return mmu_memory_cache_alloc(&vcpu->arch.mmu_pte_chain_cache,
953
 
                                      sizeof(struct kvm_pte_chain));
954
 
 }
955
 
 
956
 
@@ -300,7 +341,7 @@ static void mmu_free_pte_chain(struct kvm_pte_chain *pc)
957
 
 
958
 
 static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu)
959
 
 {
960
 
-       return mmu_memory_cache_alloc(&vcpu->mmu_rmap_desc_cache,
961
 
+       return mmu_memory_cache_alloc(&vcpu->arch.mmu_rmap_desc_cache,
962
 
                                      sizeof(struct kvm_rmap_desc));
963
 
 }
964
 
 
965
 
@@ -310,35 +351,52 @@ static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd)
966
 
 }
967
 
 
968
 
 /*
969
 
+ * Take gfn and return the reverse mapping to it.
970
 
+ * Note: gfn must be unaliased before this function get called
971
 
+ */
972
 
+
973
 
+static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn)
974
 
+{
975
 
+       struct kvm_memory_slot *slot;
976
 
+
977
 
+       slot = gfn_to_memslot(kvm, gfn);
978
 
+       return &slot->rmap[gfn - slot->base_gfn];
979
 
+}
980
 
+
981
 
+/*
982
 
  * Reverse mapping data structures:
983
 
  *
984
 
- * If page->private bit zero is zero, then page->private points to the
985
 
- * shadow page table entry that points to page_address(page).
986
 
+ * If rmapp bit zero is zero, then rmapp point to the shadw page table entry
987
 
+ * that points to page_address(page).
988
 
  *
989
 
- * If page->private bit zero is one, (then page->private & ~1) points
990
 
- * to a struct kvm_rmap_desc containing more mappings.
991
 
+ * If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc
992
 
+ * containing more mappings.
993
 
  */
994
 
-static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte)
995
 
+static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
996
 
 {
997
 
-       struct page *page;
998
 
+       struct kvm_mmu_page *sp;
999
 
        struct kvm_rmap_desc *desc;
1000
 
+       unsigned long *rmapp;
1001
 
        int i;
1002
 
 
1003
 
        if (!is_rmap_pte(*spte))
1004
 
                return;
1005
 
-       page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
1006
 
-       if (!page_private(page)) {
1007
 
+       gfn = unalias_gfn(vcpu->kvm, gfn);
1008
 
+       sp = page_header(__pa(spte));
1009
 
+       sp->gfns[spte - sp->spt] = gfn;
1010
 
+       rmapp = gfn_to_rmap(vcpu->kvm, gfn);
1011
 
+       if (!*rmapp) {
1012
 
                rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte);
1013
 
-               set_page_private(page,(unsigned long)spte);
1014
 
-       } else if (!(page_private(page) & 1)) {
1015
 
+               *rmapp = (unsigned long)spte;
1016
 
+       } else if (!(*rmapp & 1)) {
1017
 
                rmap_printk("rmap_add: %p %llx 1->many\n", spte, *spte);
1018
 
                desc = mmu_alloc_rmap_desc(vcpu);
1019
 
-               desc->shadow_ptes[0] = (u64 *)page_private(page);
1020
 
+               desc->shadow_ptes[0] = (u64 *)*rmapp;
1021
 
                desc->shadow_ptes[1] = spte;
1022
 
-               set_page_private(page,(unsigned long)desc | 1);
1023
 
+               *rmapp = (unsigned long)desc | 1;
1024
 
        } else {
1025
 
                rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte);
1026
 
-               desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul);
1027
 
+               desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
1028
 
                while (desc->shadow_ptes[RMAP_EXT-1] && desc->more)
1029
 
                        desc = desc->more;
1030
 
                if (desc->shadow_ptes[RMAP_EXT-1]) {
1031
 
@@ -351,7 +409,7 @@ static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte)
1032
 
        }
1033
 
 }
1034
 
 
1035
 
-static void rmap_desc_remove_entry(struct page *page,
1036
 
+static void rmap_desc_remove_entry(unsigned long *rmapp,
1037
 
                                   struct kvm_rmap_desc *desc,
1038
 
                                   int i,
1039
 
                                   struct kvm_rmap_desc *prev_desc)
1040
 
@@ -365,44 +423,53 @@ static void rmap_desc_remove_entry(struct page *page,
1041
 
        if (j != 0)
1042
 
                return;
1043
 
        if (!prev_desc && !desc->more)
1044
 
-               set_page_private(page,(unsigned long)desc->shadow_ptes[0]);
1045
 
+               *rmapp = (unsigned long)desc->shadow_ptes[0];
1046
 
        else
1047
 
                if (prev_desc)
1048
 
                        prev_desc->more = desc->more;
1049
 
                else
1050
 
-                       set_page_private(page,(unsigned long)desc->more | 1);
1051
 
+                       *rmapp = (unsigned long)desc->more | 1;
1052
 
        mmu_free_rmap_desc(desc);
1053
 
 }
1054
 
 
1055
 
-static void rmap_remove(u64 *spte)
1056
 
+static void rmap_remove(struct kvm *kvm, u64 *spte)
1057
 
 {
1058
 
-       struct page *page;
1059
 
        struct kvm_rmap_desc *desc;
1060
 
        struct kvm_rmap_desc *prev_desc;
1061
 
+       struct kvm_mmu_page *sp;
1062
 
+       struct page *page;
1063
 
+       unsigned long *rmapp;
1064
 
        int i;
1065
 
 
1066
 
        if (!is_rmap_pte(*spte))
1067
 
                return;
1068
 
+       sp = page_header(__pa(spte));
1069
 
        page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
1070
 
-       if (!page_private(page)) {
1071
 
+       mark_page_accessed(page);
1072
 
+       if (is_writeble_pte(*spte))
1073
 
+               kvm_release_page_dirty(page);
1074
 
+       else
1075
 
+               kvm_release_page_clean(page);
1076
 
+       rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt]);
1077
 
+       if (!*rmapp) {
1078
 
                printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
1079
 
                BUG();
1080
 
-       } else if (!(page_private(page) & 1)) {
1081
 
+       } else if (!(*rmapp & 1)) {
1082
 
                rmap_printk("rmap_remove:  %p %llx 1->0\n", spte, *spte);
1083
 
-               if ((u64 *)page_private(page) != spte) {
1084
 
+               if ((u64 *)*rmapp != spte) {
1085
 
                        printk(KERN_ERR "rmap_remove:  %p %llx 1->BUG\n",
1086
 
                               spte, *spte);
1087
 
                        BUG();
1088
 
                }
1089
 
-               set_page_private(page,0);
1090
 
+               *rmapp = 0;
1091
 
        } else {
1092
 
                rmap_printk("rmap_remove:  %p %llx many->many\n", spte, *spte);
1093
 
-               desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul);
1094
 
+               desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
1095
 
                prev_desc = NULL;
1096
 
                while (desc) {
1097
 
                        for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i)
1098
 
                                if (desc->shadow_ptes[i] == spte) {
1099
 
-                                       rmap_desc_remove_entry(page,
1100
 
+                                       rmap_desc_remove_entry(rmapp,
1101
 
                                                               desc, i,
1102
 
                                                               prev_desc);
1103
 
                                        return;
1104
 
@@ -414,33 +481,56 @@ static void rmap_remove(u64 *spte)
1105
 
        }
1106
 
 }
1107
 
 
1108
 
-static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
1109
 
+static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte)
1110
 
 {
1111
 
-       struct kvm *kvm = vcpu->kvm;
1112
 
-       struct page *page;
1113
 
        struct kvm_rmap_desc *desc;
1114
 
+       struct kvm_rmap_desc *prev_desc;
1115
 
+       u64 *prev_spte;
1116
 
+       int i;
1117
 
+
1118
 
+       if (!*rmapp)
1119
 
+               return NULL;
1120
 
+       else if (!(*rmapp & 1)) {
1121
 
+               if (!spte)
1122
 
+                       return (u64 *)*rmapp;
1123
 
+               return NULL;
1124
 
+       }
1125
 
+       desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
1126
 
+       prev_desc = NULL;
1127
 
+       prev_spte = NULL;
1128
 
+       while (desc) {
1129
 
+               for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i) {
1130
 
+                       if (prev_spte == spte)
1131
 
+                               return desc->shadow_ptes[i];
1132
 
+                       prev_spte = desc->shadow_ptes[i];
1133
 
+               }
1134
 
+               desc = desc->more;
1135
 
+       }
1136
 
+       return NULL;
1137
 
+}
1138
 
+
1139
 
+static void rmap_write_protect(struct kvm *kvm, u64 gfn)
1140
 
+{
1141
 
+       unsigned long *rmapp;
1142
 
        u64 *spte;
1143
 
+       int write_protected = 0;
1144
 
 
1145
 
-       page = gfn_to_page(kvm, gfn);
1146
 
-       BUG_ON(!page);
1147
 
+       gfn = unalias_gfn(kvm, gfn);
1148
 
+       rmapp = gfn_to_rmap(kvm, gfn);
1149
 
 
1150
 
-       while (page_private(page)) {
1151
 
-               if (!(page_private(page) & 1))
1152
 
-                       spte = (u64 *)page_private(page);
1153
 
-               else {
1154
 
-                       desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul);
1155
 
-                       spte = desc->shadow_ptes[0];
1156
 
-               }
1157
 
+       spte = rmap_next(kvm, rmapp, NULL);
1158
 
+       while (spte) {
1159
 
                BUG_ON(!spte);
1160
 
-               BUG_ON((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT
1161
 
-                      != page_to_pfn(page));
1162
 
                BUG_ON(!(*spte & PT_PRESENT_MASK));
1163
 
-               BUG_ON(!(*spte & PT_WRITABLE_MASK));
1164
 
                rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
1165
 
-               rmap_remove(spte);
1166
 
-               set_shadow_pte(spte, *spte & ~PT_WRITABLE_MASK);
1167
 
-               kvm_flush_remote_tlbs(vcpu->kvm);
1168
 
+               if (is_writeble_pte(*spte)) {
1169
 
+                       set_shadow_pte(spte, *spte & ~PT_WRITABLE_MASK);
1170
 
+                       write_protected = 1;
1171
 
+               }
1172
 
+               spte = rmap_next(kvm, rmapp, spte);
1173
 
        }
1174
 
+       if (write_protected)
1175
 
+               kvm_flush_remote_tlbs(kvm);
1176
 
 }
1177
 
 
1178
 
 #ifdef MMU_DEBUG
1179
 
@@ -450,7 +540,7 @@ static int is_empty_shadow_page(u64 *spt)
1180
 
        u64 *end;
1181
 
 
1182
 
        for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++)
1183
 
-               if (*pos != 0) {
1184
 
+               if ((*pos & ~PT_SHADOW_IO_MARK) != shadow_trap_nonpresent_pte) {
1185
 
                        printk(KERN_ERR "%s: %p %llx\n", __FUNCTION__,
1186
 
                               pos, *pos);
1187
 
                        return 0;
1188
 
@@ -459,14 +549,14 @@ static int is_empty_shadow_page(u64 *spt)
1189
 
 }
1190
 
 #endif
1191
 
 
1192
 
-static void kvm_mmu_free_page(struct kvm *kvm,
1193
 
-                             struct kvm_mmu_page *page_head)
1194
 
+static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp)
1195
 
 {
1196
 
-       ASSERT(is_empty_shadow_page(page_head->spt));
1197
 
-       list_del(&page_head->link);
1198
 
-       __free_page(virt_to_page(page_head->spt));
1199
 
-       kfree(page_head);
1200
 
-       ++kvm->n_free_mmu_pages;
1201
 
+       ASSERT(is_empty_shadow_page(sp->spt));
1202
 
+       list_del(&sp->link);
1203
 
+       __free_page(virt_to_page(sp->spt));
1204
 
+       __free_page(virt_to_page(sp->gfns));
1205
 
+       kfree(sp);
1206
 
+       ++kvm->arch.n_free_mmu_pages;
1207
 
 }
1208
 
 
1209
 
 static unsigned kvm_page_table_hashfn(gfn_t gfn)
1210
 
@@ -477,26 +567,26 @@ static unsigned kvm_page_table_hashfn(gfn_t gfn)
1211
 
 static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
1212
 
                                               u64 *parent_pte)
1213
 
 {
1214
 
-       struct kvm_mmu_page *page;
1215
 
+       struct kvm_mmu_page *sp;
1216
 
 
1217
 
-       if (!vcpu->kvm->n_free_mmu_pages)
1218
 
+       if (!vcpu->kvm->arch.n_free_mmu_pages)
1219
 
                return NULL;
1220
 
 
1221
 
-       page = mmu_memory_cache_alloc(&vcpu->mmu_page_header_cache,
1222
 
-                                     sizeof *page);
1223
 
-       page->spt = mmu_memory_cache_alloc(&vcpu->mmu_page_cache, PAGE_SIZE);
1224
 
-       set_page_private(virt_to_page(page->spt), (unsigned long)page);
1225
 
-       list_add(&page->link, &vcpu->kvm->active_mmu_pages);
1226
 
-       ASSERT(is_empty_shadow_page(page->spt));
1227
 
-       page->slot_bitmap = 0;
1228
 
-       page->multimapped = 0;
1229
 
-       page->parent_pte = parent_pte;
1230
 
-       --vcpu->kvm->n_free_mmu_pages;
1231
 
-       return page;
1232
 
+       sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache, sizeof *sp);
1233
 
+       sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE);
1234
 
+       sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE);
1235
 
+       set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
1236
 
+       list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
1237
 
+       ASSERT(is_empty_shadow_page(sp->spt));
1238
 
+       sp->slot_bitmap = 0;
1239
 
+       sp->multimapped = 0;
1240
 
+       sp->parent_pte = parent_pte;
1241
 
+       --vcpu->kvm->arch.n_free_mmu_pages;
1242
 
+       return sp;
1243
 
 }
1244
 
 
1245
 
 static void mmu_page_add_parent_pte(struct kvm_vcpu *vcpu,
1246
 
-                                   struct kvm_mmu_page *page, u64 *parent_pte)
1247
 
+                                   struct kvm_mmu_page *sp, u64 *parent_pte)
1248
 
 {
1249
 
        struct kvm_pte_chain *pte_chain;
1250
 
        struct hlist_node *node;
1251
 
@@ -504,20 +594,20 @@ static void mmu_page_add_parent_pte(struct kvm_vcpu *vcpu,
1252
 
 
1253
 
        if (!parent_pte)
1254
 
                return;
1255
 
-       if (!page->multimapped) {
1256
 
-               u64 *old = page->parent_pte;
1257
 
+       if (!sp->multimapped) {
1258
 
+               u64 *old = sp->parent_pte;
1259
 
 
1260
 
                if (!old) {
1261
 
-                       page->parent_pte = parent_pte;
1262
 
+                       sp->parent_pte = parent_pte;
1263
 
                        return;
1264
 
                }
1265
 
-               page->multimapped = 1;
1266
 
+               sp->multimapped = 1;
1267
 
                pte_chain = mmu_alloc_pte_chain(vcpu);
1268
 
-               INIT_HLIST_HEAD(&page->parent_ptes);
1269
 
-               hlist_add_head(&pte_chain->link, &page->parent_ptes);
1270
 
+               INIT_HLIST_HEAD(&sp->parent_ptes);
1271
 
+               hlist_add_head(&pte_chain->link, &sp->parent_ptes);
1272
 
                pte_chain->parent_ptes[0] = old;
1273
 
        }
1274
 
-       hlist_for_each_entry(pte_chain, node, &page->parent_ptes, link) {
1275
 
+       hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) {
1276
 
                if (pte_chain->parent_ptes[NR_PTE_CHAIN_ENTRIES-1])
1277
 
                        continue;
1278
 
                for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i)
1279
 
@@ -528,23 +618,23 @@ static void mmu_page_add_parent_pte(struct kvm_vcpu *vcpu,
1280
 
        }
1281
 
        pte_chain = mmu_alloc_pte_chain(vcpu);
1282
 
        BUG_ON(!pte_chain);
1283
 
-       hlist_add_head(&pte_chain->link, &page->parent_ptes);
1284
 
+       hlist_add_head(&pte_chain->link, &sp->parent_ptes);
1285
 
        pte_chain->parent_ptes[0] = parent_pte;
1286
 
 }
1287
 
 
1288
 
-static void mmu_page_remove_parent_pte(struct kvm_mmu_page *page,
1289
 
+static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp,
1290
 
                                       u64 *parent_pte)
1291
 
 {
1292
 
        struct kvm_pte_chain *pte_chain;
1293
 
        struct hlist_node *node;
1294
 
        int i;
1295
 
 
1296
 
-       if (!page->multimapped) {
1297
 
-               BUG_ON(page->parent_pte != parent_pte);
1298
 
-               page->parent_pte = NULL;
1299
 
+       if (!sp->multimapped) {
1300
 
+               BUG_ON(sp->parent_pte != parent_pte);
1301
 
+               sp->parent_pte = NULL;
1302
 
                return;
1303
 
        }
1304
 
-       hlist_for_each_entry(pte_chain, node, &page->parent_ptes, link)
1305
 
+       hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link)
1306
 
                for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) {
1307
 
                        if (!pte_chain->parent_ptes[i])
1308
 
                                break;
1309
 
@@ -560,9 +650,9 @@ static void mmu_page_remove_parent_pte(struct kvm_mmu_page *page,
1310
 
                        if (i == 0) {
1311
 
                                hlist_del(&pte_chain->link);
1312
 
                                mmu_free_pte_chain(pte_chain);
1313
 
-                               if (hlist_empty(&page->parent_ptes)) {
1314
 
-                                       page->multimapped = 0;
1315
 
-                                       page->parent_pte = NULL;
1316
 
+                               if (hlist_empty(&sp->parent_ptes)) {
1317
 
+                                       sp->multimapped = 0;
1318
 
+                                       sp->parent_pte = NULL;
1319
 
                                }
1320
 
                        }
1321
 
                        return;
1322
 
@@ -570,22 +660,21 @@ static void mmu_page_remove_parent_pte(struct kvm_mmu_page *page,
1323
 
        BUG();
1324
 
 }
1325
 
 
1326
 
-static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm_vcpu *vcpu,
1327
 
-                                               gfn_t gfn)
1328
 
+static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
1329
 
 {
1330
 
        unsigned index;
1331
 
        struct hlist_head *bucket;
1332
 
-       struct kvm_mmu_page *page;
1333
 
+       struct kvm_mmu_page *sp;
1334
 
        struct hlist_node *node;
1335
 
 
1336
 
        pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn);
1337
 
        index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
1338
 
-       bucket = &vcpu->kvm->mmu_page_hash[index];
1339
 
-       hlist_for_each_entry(page, node, bucket, hash_link)
1340
 
-               if (page->gfn == gfn && !page->role.metaphysical) {
1341
 
+       bucket = &kvm->arch.mmu_page_hash[index];
1342
 
+       hlist_for_each_entry(sp, node, bucket, hash_link)
1343
 
+               if (sp->gfn == gfn && !sp->role.metaphysical) {
1344
 
                        pgprintk("%s: found role %x\n",
1345
 
-                                __FUNCTION__, page->role.word);
1346
 
-                       return page;
1347
 
+                                __FUNCTION__, sp->role.word);
1348
 
+                       return sp;
1349
 
                }
1350
 
        return NULL;
1351
 
 }
1352
 
@@ -595,22 +684,23 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
1353
 
                                             gva_t gaddr,
1354
 
                                             unsigned level,
1355
 
                                             int metaphysical,
1356
 
-                                            unsigned hugepage_access,
1357
 
-                                            u64 *parent_pte)
1358
 
+                                            unsigned access,
1359
 
+                                            u64 *parent_pte,
1360
 
+                                            bool *new_page)
1361
 
 {
1362
 
        union kvm_mmu_page_role role;
1363
 
        unsigned index;
1364
 
        unsigned quadrant;
1365
 
        struct hlist_head *bucket;
1366
 
-       struct kvm_mmu_page *page;
1367
 
+       struct kvm_mmu_page *sp;
1368
 
        struct hlist_node *node;
1369
 
 
1370
 
        role.word = 0;
1371
 
-       role.glevels = vcpu->mmu.root_level;
1372
 
+       role.glevels = vcpu->arch.mmu.root_level;
1373
 
        role.level = level;
1374
 
        role.metaphysical = metaphysical;
1375
 
-       role.hugepage_access = hugepage_access;
1376
 
-       if (vcpu->mmu.root_level <= PT32_ROOT_LEVEL) {
1377
 
+       role.access = access;
1378
 
+       if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) {
1379
 
                quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
1380
 
                quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
1381
 
                role.quadrant = quadrant;
1382
 
@@ -618,39 +708,42 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
1383
 
        pgprintk("%s: looking gfn %lx role %x\n", __FUNCTION__,
1384
 
                 gfn, role.word);
1385
 
        index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
1386
 
-       bucket = &vcpu->kvm->mmu_page_hash[index];
1387
 
-       hlist_for_each_entry(page, node, bucket, hash_link)
1388
 
-               if (page->gfn == gfn && page->role.word == role.word) {
1389
 
-                       mmu_page_add_parent_pte(vcpu, page, parent_pte);
1390
 
+       bucket = &vcpu->kvm->arch.mmu_page_hash[index];
1391
 
+       hlist_for_each_entry(sp, node, bucket, hash_link)
1392
 
+               if (sp->gfn == gfn && sp->role.word == role.word) {
1393
 
+                       mmu_page_add_parent_pte(vcpu, sp, parent_pte);
1394
 
                        pgprintk("%s: found\n", __FUNCTION__);
1395
 
-                       return page;
1396
 
+                       return sp;
1397
 
                }
1398
 
-       page = kvm_mmu_alloc_page(vcpu, parent_pte);
1399
 
-       if (!page)
1400
 
-               return page;
1401
 
+       sp = kvm_mmu_alloc_page(vcpu, parent_pte);
1402
 
+       if (!sp)
1403
 
+               return sp;
1404
 
        pgprintk("%s: adding gfn %lx role %x\n", __FUNCTION__, gfn, role.word);
1405
 
-       page->gfn = gfn;
1406
 
-       page->role = role;
1407
 
-       hlist_add_head(&page->hash_link, bucket);
1408
 
+       sp->gfn = gfn;
1409
 
+       sp->role = role;
1410
 
+       hlist_add_head(&sp->hash_link, bucket);
1411
 
+       vcpu->arch.mmu.prefetch_page(vcpu, sp);
1412
 
        if (!metaphysical)
1413
 
-               rmap_write_protect(vcpu, gfn);
1414
 
-       return page;
1415
 
+               rmap_write_protect(vcpu->kvm, gfn);
1416
 
+       if (new_page)
1417
 
+               *new_page = 1;
1418
 
+       return sp;
1419
 
 }
1420
 
 
1421
 
 static void kvm_mmu_page_unlink_children(struct kvm *kvm,
1422
 
-                                        struct kvm_mmu_page *page)
1423
 
+                                        struct kvm_mmu_page *sp)
1424
 
 {
1425
 
        unsigned i;
1426
 
        u64 *pt;
1427
 
        u64 ent;
1428
 
 
1429
 
-       pt = page->spt;
1430
 
+       pt = sp->spt;
1431
 
 
1432
 
-       if (page->role.level == PT_PAGE_TABLE_LEVEL) {
1433
 
+       if (sp->role.level == PT_PAGE_TABLE_LEVEL) {
1434
 
                for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
1435
 
-                       if (pt[i] & PT_PRESENT_MASK)
1436
 
-                               rmap_remove(&pt[i]);
1437
 
-                       pt[i] = 0;
1438
 
+                       if (is_shadow_present_pte(pt[i]))
1439
 
+                               rmap_remove(kvm, &pt[i]);
1440
 
+                       pt[i] = shadow_trap_nonpresent_pte;
1441
 
                }
1442
 
                kvm_flush_remote_tlbs(kvm);
1443
 
                return;
1444
 
@@ -659,8 +752,8 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm,
1445
 
        for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
1446
 
                ent = pt[i];
1447
 
 
1448
 
-               pt[i] = 0;
1449
 
-               if (!(ent & PT_PRESENT_MASK))
1450
 
+               pt[i] = shadow_trap_nonpresent_pte;
1451
 
+               if (!is_shadow_present_pte(ent))
1452
 
                        continue;
1453
 
                ent &= PT64_BASE_ADDR_MASK;
1454
 
                mmu_page_remove_parent_pte(page_header(ent), &pt[i]);
1455
 
@@ -668,147 +761,238 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm,
1456
 
        kvm_flush_remote_tlbs(kvm);
1457
 
 }
1458
 
 
1459
 
-static void kvm_mmu_put_page(struct kvm_mmu_page *page,
1460
 
-                            u64 *parent_pte)
1461
 
+static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte)
1462
 
 {
1463
 
-       mmu_page_remove_parent_pte(page, parent_pte);
1464
 
+       mmu_page_remove_parent_pte(sp, parent_pte);
1465
 
 }
1466
 
 
1467
 
-static void kvm_mmu_zap_page(struct kvm *kvm,
1468
 
-                            struct kvm_mmu_page *page)
1469
 
+static void kvm_mmu_reset_last_pte_updated(struct kvm *kvm)
1470
 
+{
1471
 
+       int i;
1472
 
+
1473
 
+       for (i = 0; i < KVM_MAX_VCPUS; ++i)
1474
 
+               if (kvm->vcpus[i])
1475
 
+                       kvm->vcpus[i]->arch.last_pte_updated = NULL;
1476
 
+}
1477
 
+
1478
 
+static void kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
1479
 
 {
1480
 
        u64 *parent_pte;
1481
 
 
1482
 
-       while (page->multimapped || page->parent_pte) {
1483
 
-               if (!page->multimapped)
1484
 
-                       parent_pte = page->parent_pte;
1485
 
+       ++kvm->stat.mmu_shadow_zapped;
1486
 
+       while (sp->multimapped || sp->parent_pte) {
1487
 
+               if (!sp->multimapped)
1488
 
+                       parent_pte = sp->parent_pte;
1489
 
                else {
1490
 
                        struct kvm_pte_chain *chain;
1491
 
 
1492
 
-                       chain = container_of(page->parent_ptes.first,
1493
 
+                       chain = container_of(sp->parent_ptes.first,
1494
 
                                             struct kvm_pte_chain, link);
1495
 
                        parent_pte = chain->parent_ptes[0];
1496
 
                }
1497
 
                BUG_ON(!parent_pte);
1498
 
-               kvm_mmu_put_page(page, parent_pte);
1499
 
-               set_shadow_pte(parent_pte, 0);
1500
 
+               kvm_mmu_put_page(sp, parent_pte);
1501
 
+               set_shadow_pte(parent_pte, shadow_trap_nonpresent_pte);
1502
 
        }
1503
 
-       kvm_mmu_page_unlink_children(kvm, page);
1504
 
-       if (!page->root_count) {
1505
 
-               hlist_del(&page->hash_link);
1506
 
-               kvm_mmu_free_page(kvm, page);
1507
 
+       kvm_mmu_page_unlink_children(kvm, sp);
1508
 
+       if (!sp->root_count) {
1509
 
+               hlist_del(&sp->hash_link);
1510
 
+               kvm_mmu_free_page(kvm, sp);
1511
 
        } else
1512
 
-               list_move(&page->link, &kvm->active_mmu_pages);
1513
 
+               list_move(&sp->link, &kvm->arch.active_mmu_pages);
1514
 
+       kvm_mmu_reset_last_pte_updated(kvm);
1515
 
+}
1516
 
+
1517
 
+/*
1518
 
+ * Changing the number of mmu pages allocated to the vm
1519
 
+ * Note: if kvm_nr_mmu_pages is too small, you will get dead lock
1520
 
+ */
1521
 
+void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages)
1522
 
+{
1523
 
+       /*
1524
 
+        * If we set the number of mmu pages to be smaller be than the
1525
 
+        * number of actived pages , we must to free some mmu pages before we
1526
 
+        * change the value
1527
 
+        */
1528
 
+
1529
 
+       if ((kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages) >
1530
 
+           kvm_nr_mmu_pages) {
1531
 
+               int n_used_mmu_pages = kvm->arch.n_alloc_mmu_pages
1532
 
+                                      - kvm->arch.n_free_mmu_pages;
1533
 
+
1534
 
+               while (n_used_mmu_pages > kvm_nr_mmu_pages) {
1535
 
+                       struct kvm_mmu_page *page;
1536
 
+
1537
 
+                       page = container_of(kvm->arch.active_mmu_pages.prev,
1538
 
+                                           struct kvm_mmu_page, link);
1539
 
+                       kvm_mmu_zap_page(kvm, page);
1540
 
+                       n_used_mmu_pages--;
1541
 
+               }
1542
 
+               kvm->arch.n_free_mmu_pages = 0;
1543
 
+       }
1544
 
+       else
1545
 
+               kvm->arch.n_free_mmu_pages += kvm_nr_mmu_pages
1546
 
+                                        - kvm->arch.n_alloc_mmu_pages;
1547
 
+
1548
 
+       kvm->arch.n_alloc_mmu_pages = kvm_nr_mmu_pages;
1549
 
 }
1550
 
 
1551
 
-static int kvm_mmu_unprotect_page(struct kvm_vcpu *vcpu, gfn_t gfn)
1552
 
+static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
1553
 
 {
1554
 
        unsigned index;
1555
 
        struct hlist_head *bucket;
1556
 
-       struct kvm_mmu_page *page;
1557
 
+       struct kvm_mmu_page *sp;
1558
 
        struct hlist_node *node, *n;
1559
 
        int r;
1560
 
 
1561
 
        pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn);
1562
 
        r = 0;
1563
 
        index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
1564
 
-       bucket = &vcpu->kvm->mmu_page_hash[index];
1565
 
-       hlist_for_each_entry_safe(page, node, n, bucket, hash_link)
1566
 
-               if (page->gfn == gfn && !page->role.metaphysical) {
1567
 
+       bucket = &kvm->arch.mmu_page_hash[index];
1568
 
+       hlist_for_each_entry_safe(sp, node, n, bucket, hash_link)
1569
 
+               if (sp->gfn == gfn && !sp->role.metaphysical) {
1570
 
                        pgprintk("%s: gfn %lx role %x\n", __FUNCTION__, gfn,
1571
 
-                                page->role.word);
1572
 
-                       kvm_mmu_zap_page(vcpu->kvm, page);
1573
 
+                                sp->role.word);
1574
 
+                       kvm_mmu_zap_page(kvm, sp);
1575
 
                        r = 1;
1576
 
                }
1577
 
        return r;
1578
 
 }
1579
 
 
1580
 
-static void mmu_unshadow(struct kvm_vcpu *vcpu, gfn_t gfn)
1581
 
+static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
1582
 
 {
1583
 
-       struct kvm_mmu_page *page;
1584
 
+       struct kvm_mmu_page *sp;
1585
 
 
1586
 
-       while ((page = kvm_mmu_lookup_page(vcpu, gfn)) != NULL) {
1587
 
-               pgprintk("%s: zap %lx %x\n",
1588
 
-                        __FUNCTION__, gfn, page->role.word);
1589
 
-               kvm_mmu_zap_page(vcpu->kvm, page);
1590
 
+       while ((sp = kvm_mmu_lookup_page(kvm, gfn)) != NULL) {
1591
 
+               pgprintk("%s: zap %lx %x\n", __FUNCTION__, gfn, sp->role.word);
1592
 
+               kvm_mmu_zap_page(kvm, sp);
1593
 
        }
1594
 
 }
1595
 
 
1596
 
-static void page_header_update_slot(struct kvm *kvm, void *pte, gpa_t gpa)
1597
 
+static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn)
1598
 
 {
1599
 
-       int slot = memslot_id(kvm, gfn_to_memslot(kvm, gpa >> PAGE_SHIFT));
1600
 
-       struct kvm_mmu_page *page_head = page_header(__pa(pte));
1601
 
+       int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn));
1602
 
+       struct kvm_mmu_page *sp = page_header(__pa(pte));
1603
 
 
1604
 
-       __set_bit(slot, &page_head->slot_bitmap);
1605
 
+       __set_bit(slot, &sp->slot_bitmap);
1606
 
 }
1607
 
 
1608
 
-hpa_t safe_gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa)
1609
 
+struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
1610
 
 {
1611
 
-       hpa_t hpa = gpa_to_hpa(vcpu, gpa);
1612
 
+       gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
1613
 
 
1614
 
-       return is_error_hpa(hpa) ? bad_page_address | (gpa & ~PAGE_MASK): hpa;
1615
 
+       if (gpa == UNMAPPED_GVA)
1616
 
+               return NULL;
1617
 
+       return gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
1618
 
 }
1619
 
 
1620
 
-hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa)
1621
 
+static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1622
 
+                        unsigned pt_access, unsigned pte_access,
1623
 
+                        int user_fault, int write_fault, int dirty,
1624
 
+                        int *ptwrite, gfn_t gfn)
1625
 
 {
1626
 
+       u64 spte;
1627
 
+       int was_rmapped = is_rmap_pte(*shadow_pte);
1628
 
        struct page *page;
1629
 
 
1630
 
-       ASSERT((gpa & HPA_ERR_MASK) == 0);
1631
 
-       page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
1632
 
-       if (!page)
1633
 
-               return gpa | HPA_ERR_MASK;
1634
 
-       return ((hpa_t)page_to_pfn(page) << PAGE_SHIFT)
1635
 
-               | (gpa & (PAGE_SIZE-1));
1636
 
-}
1637
 
+       pgprintk("%s: spte %llx access %x write_fault %d"
1638
 
+                " user_fault %d gfn %lx\n",
1639
 
+                __FUNCTION__, *shadow_pte, pt_access,
1640
 
+                write_fault, user_fault, gfn);
1641
 
 
1642
 
-hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva)
1643
 
-{
1644
 
-       gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva);
1645
 
+       /*
1646
 
+        * We don't set the accessed bit, since we sometimes want to see
1647
 
+        * whether the guest actually used the pte (in order to detect
1648
 
+        * demand paging).
1649
 
+        */
1650
 
+       spte = PT_PRESENT_MASK | PT_DIRTY_MASK;
1651
 
+       if (!dirty)
1652
 
+               pte_access &= ~ACC_WRITE_MASK;
1653
 
+       if (!(pte_access & ACC_EXEC_MASK))
1654
 
+               spte |= PT64_NX_MASK;
1655
 
+
1656
 
+       page = gfn_to_page(vcpu->kvm, gfn);
1657
 
+
1658
 
+       spte |= PT_PRESENT_MASK;
1659
 
+       if (pte_access & ACC_USER_MASK)
1660
 
+               spte |= PT_USER_MASK;
1661
 
+
1662
 
+       if (is_error_page(page)) {
1663
 
+               set_shadow_pte(shadow_pte,
1664
 
+                              shadow_trap_nonpresent_pte | PT_SHADOW_IO_MARK);
1665
 
+               kvm_release_page_clean(page);
1666
 
+               return;
1667
 
+       }
1668
 
 
1669
 
-       if (gpa == UNMAPPED_GVA)
1670
 
-               return UNMAPPED_GVA;
1671
 
-       return gpa_to_hpa(vcpu, gpa);
1672
 
-}
1673
 
+       spte |= page_to_phys(page);
1674
 
 
1675
 
-struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
1676
 
-{
1677
 
-       gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva);
1678
 
+       if ((pte_access & ACC_WRITE_MASK)
1679
 
+           || (write_fault && !is_write_protection(vcpu) && !user_fault)) {
1680
 
+               struct kvm_mmu_page *shadow;
1681
 
 
1682
 
-       if (gpa == UNMAPPED_GVA)
1683
 
-               return NULL;
1684
 
-       return pfn_to_page(gpa_to_hpa(vcpu, gpa) >> PAGE_SHIFT);
1685
 
+               spte |= PT_WRITABLE_MASK;
1686
 
+               if (user_fault) {
1687
 
+                       mmu_unshadow(vcpu->kvm, gfn);
1688
 
+                       goto unshadowed;
1689
 
+               }
1690
 
+
1691
 
+               shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn);
1692
 
+               if (shadow) {
1693
 
+                       pgprintk("%s: found shadow page for %lx, marking ro\n",
1694
 
+                                __FUNCTION__, gfn);
1695
 
+                       pte_access &= ~ACC_WRITE_MASK;
1696
 
+                       if (is_writeble_pte(spte)) {
1697
 
+                               spte &= ~PT_WRITABLE_MASK;
1698
 
+                               kvm_x86_ops->tlb_flush(vcpu);
1699
 
+                       }
1700
 
+                       if (write_fault)
1701
 
+                               *ptwrite = 1;
1702
 
+               }
1703
 
+       }
1704
 
+
1705
 
+unshadowed:
1706
 
+
1707
 
+       if (pte_access & ACC_WRITE_MASK)
1708
 
+               mark_page_dirty(vcpu->kvm, gfn);
1709
 
+
1710
 
+       pgprintk("%s: setting spte %llx\n", __FUNCTION__, spte);
1711
 
+       set_shadow_pte(shadow_pte, spte);
1712
 
+       page_header_update_slot(vcpu->kvm, shadow_pte, gfn);
1713
 
+       if (!was_rmapped) {
1714
 
+               rmap_add(vcpu, shadow_pte, gfn);
1715
 
+               if (!is_rmap_pte(*shadow_pte))
1716
 
+                       kvm_release_page_clean(page);
1717
 
+       }
1718
 
+       else
1719
 
+               kvm_release_page_clean(page);
1720
 
+       if (!ptwrite || !*ptwrite)
1721
 
+               vcpu->arch.last_pte_updated = shadow_pte;
1722
 
 }
1723
 
 
1724
 
 static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
1725
 
 {
1726
 
 }
1727
 
 
1728
 
-static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p)
1729
 
+static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
1730
 
 {
1731
 
        int level = PT32E_ROOT_LEVEL;
1732
 
-       hpa_t table_addr = vcpu->mmu.root_hpa;
1733
 
+       hpa_t table_addr = vcpu->arch.mmu.root_hpa;
1734
 
+       int pt_write = 0;
1735
 
 
1736
 
        for (; ; level--) {
1737
 
                u32 index = PT64_INDEX(v, level);
1738
 
                u64 *table;
1739
 
-               u64 pte;
1740
 
 
1741
 
                ASSERT(VALID_PAGE(table_addr));
1742
 
                table = __va(table_addr);
1743
 
 
1744
 
                if (level == 1) {
1745
 
-                       pte = table[index];
1746
 
-                       if (is_present_pte(pte) && is_writeble_pte(pte))
1747
 
-                               return 0;
1748
 
-                       mark_page_dirty(vcpu->kvm, v >> PAGE_SHIFT);
1749
 
-                       page_header_update_slot(vcpu->kvm, table, v);
1750
 
-                       table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK |
1751
 
-                                                               PT_USER_MASK;
1752
 
-                       rmap_add(vcpu, &table[index]);
1753
 
-                       return 0;
1754
 
+                       mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL,
1755
 
+                                    0, write, 1, &pt_write, gfn);
1756
 
+                       return pt_write || is_io_pte(table[index]);
1757
 
                }
1758
 
 
1759
 
-               if (table[index] == 0) {
1760
 
+               if (table[index] == shadow_trap_nonpresent_pte) {
1761
 
                        struct kvm_mmu_page *new_table;
1762
 
                        gfn_t pseudo_gfn;
1763
 
 
1764
 
@@ -816,7 +1000,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p)
1765
 
                                >> PAGE_SHIFT;
1766
 
                        new_table = kvm_mmu_get_page(vcpu, pseudo_gfn,
1767
 
                                                     v, level - 1,
1768
 
-                                                    1, 0, &table[index]);
1769
 
+                                                    1, ACC_ALL, &table[index],
1770
 
+                                                    NULL);
1771
 
                        if (!new_table) {
1772
 
                                pgprintk("nonpaging_map: ENOMEM\n");
1773
 
                                return -ENOMEM;
1774
 
@@ -829,77 +1014,86 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p)
1775
 
        }
1776
 
 }
1777
 
 
1778
 
+static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu,
1779
 
+                                   struct kvm_mmu_page *sp)
1780
 
+{
1781
 
+       int i;
1782
 
+
1783
 
+       for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
1784
 
+               sp->spt[i] = shadow_trap_nonpresent_pte;
1785
 
+}
1786
 
+
1787
 
 static void mmu_free_roots(struct kvm_vcpu *vcpu)
1788
 
 {
1789
 
        int i;
1790
 
-       struct kvm_mmu_page *page;
1791
 
+       struct kvm_mmu_page *sp;
1792
 
 
1793
 
-       if (!VALID_PAGE(vcpu->mmu.root_hpa))
1794
 
+       if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
1795
 
                return;
1796
 
 #ifdef CONFIG_X86_64
1797
 
-       if (vcpu->mmu.shadow_root_level == PT64_ROOT_LEVEL) {
1798
 
-               hpa_t root = vcpu->mmu.root_hpa;
1799
 
+       if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
1800
 
+               hpa_t root = vcpu->arch.mmu.root_hpa;
1801
 
 
1802
 
-               page = page_header(root);
1803
 
-               --page->root_count;
1804
 
-               vcpu->mmu.root_hpa = INVALID_PAGE;
1805
 
+               sp = page_header(root);
1806
 
+               --sp->root_count;
1807
 
+               vcpu->arch.mmu.root_hpa = INVALID_PAGE;
1808
 
                return;
1809
 
        }
1810
 
 #endif
1811
 
        for (i = 0; i < 4; ++i) {
1812
 
-               hpa_t root = vcpu->mmu.pae_root[i];
1813
 
+               hpa_t root = vcpu->arch.mmu.pae_root[i];
1814
 
 
1815
 
                if (root) {
1816
 
                        root &= PT64_BASE_ADDR_MASK;
1817
 
-                       page = page_header(root);
1818
 
-                       --page->root_count;
1819
 
+                       sp = page_header(root);
1820
 
+                       --sp->root_count;
1821
 
                }
1822
 
-               vcpu->mmu.pae_root[i] = INVALID_PAGE;
1823
 
+               vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;
1824
 
        }
1825
 
-       vcpu->mmu.root_hpa = INVALID_PAGE;
1826
 
+       vcpu->arch.mmu.root_hpa = INVALID_PAGE;
1827
 
 }
1828
 
 
1829
 
 static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
1830
 
 {
1831
 
        int i;
1832
 
        gfn_t root_gfn;
1833
 
-       struct kvm_mmu_page *page;
1834
 
+       struct kvm_mmu_page *sp;
1835
 
 
1836
 
-       root_gfn = vcpu->cr3 >> PAGE_SHIFT;
1837
 
+       root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT;
1838
 
 
1839
 
 #ifdef CONFIG_X86_64
1840
 
-       if (vcpu->mmu.shadow_root_level == PT64_ROOT_LEVEL) {
1841
 
-               hpa_t root = vcpu->mmu.root_hpa;
1842
 
+       if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
1843
 
+               hpa_t root = vcpu->arch.mmu.root_hpa;
1844
 
 
1845
 
                ASSERT(!VALID_PAGE(root));
1846
 
-               page = kvm_mmu_get_page(vcpu, root_gfn, 0,
1847
 
-                                       PT64_ROOT_LEVEL, 0, 0, NULL);
1848
 
-               root = __pa(page->spt);
1849
 
-               ++page->root_count;
1850
 
-               vcpu->mmu.root_hpa = root;
1851
 
+               sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
1852
 
+                                     PT64_ROOT_LEVEL, 0, ACC_ALL, NULL, NULL);
1853
 
+               root = __pa(sp->spt);
1854
 
+               ++sp->root_count;
1855
 
+               vcpu->arch.mmu.root_hpa = root;
1856
 
                return;
1857
 
        }
1858
 
 #endif
1859
 
        for (i = 0; i < 4; ++i) {
1860
 
-               hpa_t root = vcpu->mmu.pae_root[i];
1861
 
+               hpa_t root = vcpu->arch.mmu.pae_root[i];
1862
 
 
1863
 
                ASSERT(!VALID_PAGE(root));
1864
 
-               if (vcpu->mmu.root_level == PT32E_ROOT_LEVEL) {
1865
 
-                       if (!is_present_pte(vcpu->pdptrs[i])) {
1866
 
-                               vcpu->mmu.pae_root[i] = 0;
1867
 
+               if (vcpu->arch.mmu.root_level == PT32E_ROOT_LEVEL) {
1868
 
+                       if (!is_present_pte(vcpu->arch.pdptrs[i])) {
1869
 
+                               vcpu->arch.mmu.pae_root[i] = 0;
1870
 
                                continue;
1871
 
                        }
1872
 
-                       root_gfn = vcpu->pdptrs[i] >> PAGE_SHIFT;
1873
 
-               } else if (vcpu->mmu.root_level == 0)
1874
 
+                       root_gfn = vcpu->arch.pdptrs[i] >> PAGE_SHIFT;
1875
 
+               } else if (vcpu->arch.mmu.root_level == 0)
1876
 
                        root_gfn = 0;
1877
 
-               page = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
1878
 
-                                       PT32_ROOT_LEVEL, !is_paging(vcpu),
1879
 
-                                       0, NULL);
1880
 
-               root = __pa(page->spt);
1881
 
-               ++page->root_count;
1882
 
-               vcpu->mmu.pae_root[i] = root | PT_PRESENT_MASK;
1883
 
+               sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
1884
 
+                                     PT32_ROOT_LEVEL, !is_paging(vcpu),
1885
 
+                                     ACC_ALL, NULL, NULL);
1886
 
+               root = __pa(sp->spt);
1887
 
+               ++sp->root_count;
1888
 
+               vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK;
1889
 
        }
1890
 
-       vcpu->mmu.root_hpa = __pa(vcpu->mmu.pae_root);
1891
 
+       vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root);
1892
 
 }
1893
 
 
1894
 
 static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
1895
 
@@ -908,26 +1102,23 @@ static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
1896
 
 }
1897
 
 
1898
 
 static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
1899
 
-                              u32 error_code)
1900
 
+                               u32 error_code)
1901
 
 {
1902
 
-       gpa_t addr = gva;
1903
 
-       hpa_t paddr;
1904
 
+       gfn_t gfn;
1905
 
        int r;
1906
 
 
1907
 
+       pgprintk("%s: gva %lx error %x\n", __FUNCTION__, gva, error_code);
1908
 
        r = mmu_topup_memory_caches(vcpu);
1909
 
        if (r)
1910
 
                return r;
1911
 
 
1912
 
        ASSERT(vcpu);
1913
 
-       ASSERT(VALID_PAGE(vcpu->mmu.root_hpa));
1914
 
+       ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
1915
 
 
1916
 
+       gfn = gva >> PAGE_SHIFT;
1917
 
 
1918
 
-       paddr = gpa_to_hpa(vcpu , addr & PT64_BASE_ADDR_MASK);
1919
 
-
1920
 
-       if (is_error_hpa(paddr))
1921
 
-               return 1;
1922
 
-
1923
 
-       return nonpaging_map(vcpu, addr & PAGE_MASK, paddr);
1924
 
+       return nonpaging_map(vcpu, gva & PAGE_MASK,
1925
 
+                            error_code & PFERR_WRITE_MASK, gfn);
1926
 
 }
1927
 
 
1928
 
 static void nonpaging_free(struct kvm_vcpu *vcpu)
1929
 
@@ -937,19 +1128,20 @@ static void nonpaging_free(struct kvm_vcpu *vcpu)
1930
 
 
1931
 
 static int nonpaging_init_context(struct kvm_vcpu *vcpu)
1932
 
 {
1933
 
-       struct kvm_mmu *context = &vcpu->mmu;
1934
 
+       struct kvm_mmu *context = &vcpu->arch.mmu;
1935
 
 
1936
 
        context->new_cr3 = nonpaging_new_cr3;
1937
 
        context->page_fault = nonpaging_page_fault;
1938
 
        context->gva_to_gpa = nonpaging_gva_to_gpa;
1939
 
        context->free = nonpaging_free;
1940
 
+       context->prefetch_page = nonpaging_prefetch_page;
1941
 
        context->root_level = 0;
1942
 
        context->shadow_root_level = PT32E_ROOT_LEVEL;
1943
 
        context->root_hpa = INVALID_PAGE;
1944
 
        return 0;
1945
 
 }
1946
 
 
1947
 
-static void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
1948
 
+void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
1949
 
 {
1950
 
        ++vcpu->stat.tlb_flush;
1951
 
        kvm_x86_ops->tlb_flush(vcpu);
1952
 
@@ -965,7 +1157,7 @@ static void inject_page_fault(struct kvm_vcpu *vcpu,
1953
 
                              u64 addr,
1954
 
                              u32 err_code)
1955
 
 {
1956
 
-       kvm_x86_ops->inject_page_fault(vcpu, addr, err_code);
1957
 
+       kvm_inject_page_fault(vcpu, addr, err_code);
1958
 
 }
1959
 
 
1960
 
 static void paging_free(struct kvm_vcpu *vcpu)
1961
 
@@ -983,12 +1175,13 @@ static void paging_free(struct kvm_vcpu *vcpu)
1962
 
 
1963
 
 static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level)
1964
 
 {
1965
 
-       struct kvm_mmu *context = &vcpu->mmu;
1966
 
+       struct kvm_mmu *context = &vcpu->arch.mmu;
1967
 
 
1968
 
        ASSERT(is_pae(vcpu));
1969
 
        context->new_cr3 = paging_new_cr3;
1970
 
        context->page_fault = paging64_page_fault;
1971
 
        context->gva_to_gpa = paging64_gva_to_gpa;
1972
 
+       context->prefetch_page = paging64_prefetch_page;
1973
 
        context->free = paging_free;
1974
 
        context->root_level = level;
1975
 
        context->shadow_root_level = level;
1976
 
@@ -1003,12 +1196,13 @@ static int paging64_init_context(struct kvm_vcpu *vcpu)
1977
 
 
1978
 
 static int paging32_init_context(struct kvm_vcpu *vcpu)
1979
 
 {
1980
 
-       struct kvm_mmu *context = &vcpu->mmu;
1981
 
+       struct kvm_mmu *context = &vcpu->arch.mmu;
1982
 
 
1983
 
        context->new_cr3 = paging_new_cr3;
1984
 
        context->page_fault = paging32_page_fault;
1985
 
        context->gva_to_gpa = paging32_gva_to_gpa;
1986
 
        context->free = paging_free;
1987
 
+       context->prefetch_page = paging32_prefetch_page;
1988
 
        context->root_level = PT32_ROOT_LEVEL;
1989
 
        context->shadow_root_level = PT32E_ROOT_LEVEL;
1990
 
        context->root_hpa = INVALID_PAGE;
1991
 
@@ -1023,7 +1217,7 @@ static int paging32E_init_context(struct kvm_vcpu *vcpu)
1992
 
 static int init_kvm_mmu(struct kvm_vcpu *vcpu)
1993
 
 {
1994
 
        ASSERT(vcpu);
1995
 
-       ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa));
1996
 
+       ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
1997
 
 
1998
 
        if (!is_paging(vcpu))
1999
 
                return nonpaging_init_context(vcpu);
2000
 
@@ -1038,9 +1232,9 @@ static int init_kvm_mmu(struct kvm_vcpu *vcpu)
2001
 
 static void destroy_kvm_mmu(struct kvm_vcpu *vcpu)
2002
 
 {
2003
 
        ASSERT(vcpu);
2004
 
-       if (VALID_PAGE(vcpu->mmu.root_hpa)) {
2005
 
-               vcpu->mmu.free(vcpu);
2006
 
-               vcpu->mmu.root_hpa = INVALID_PAGE;
2007
 
+       if (VALID_PAGE(vcpu->arch.mmu.root_hpa)) {
2008
 
+               vcpu->arch.mmu.free(vcpu);
2009
 
+               vcpu->arch.mmu.root_hpa = INVALID_PAGE;
2010
 
        }
2011
 
 }
2012
 
 
2013
 
@@ -1060,7 +1254,7 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
2014
 
        if (r)
2015
 
                goto out;
2016
 
        mmu_alloc_roots(vcpu);
2017
 
-       kvm_x86_ops->set_cr3(vcpu, vcpu->mmu.root_hpa);
2018
 
+       kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa);
2019
 
        kvm_mmu_flush_tlb(vcpu);
2020
 
 out:
2021
 
        mutex_unlock(&vcpu->kvm->lock);
2022
 
@@ -1074,47 +1268,79 @@ void kvm_mmu_unload(struct kvm_vcpu *vcpu)
2023
 
 }
2024
 
 
2025
 
 static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,
2026
 
-                                 struct kvm_mmu_page *page,
2027
 
+                                 struct kvm_mmu_page *sp,
2028
 
                                  u64 *spte)
2029
 
 {
2030
 
        u64 pte;
2031
 
        struct kvm_mmu_page *child;
2032
 
 
2033
 
        pte = *spte;
2034
 
-       if (is_present_pte(pte)) {
2035
 
-               if (page->role.level == PT_PAGE_TABLE_LEVEL)
2036
 
-                       rmap_remove(spte);
2037
 
+       if (is_shadow_present_pte(pte)) {
2038
 
+               if (sp->role.level == PT_PAGE_TABLE_LEVEL)
2039
 
+                       rmap_remove(vcpu->kvm, spte);
2040
 
                else {
2041
 
                        child = page_header(pte & PT64_BASE_ADDR_MASK);
2042
 
                        mmu_page_remove_parent_pte(child, spte);
2043
 
                }
2044
 
        }
2045
 
-       set_shadow_pte(spte, 0);
2046
 
-       kvm_flush_remote_tlbs(vcpu->kvm);
2047
 
+       set_shadow_pte(spte, shadow_trap_nonpresent_pte);
2048
 
 }
2049
 
 
2050
 
 static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
2051
 
-                                 struct kvm_mmu_page *page,
2052
 
+                                 struct kvm_mmu_page *sp,
2053
 
                                  u64 *spte,
2054
 
-                                 const void *new, int bytes)
2055
 
+                                 const void *new, int bytes,
2056
 
+                                 int offset_in_pte)
2057
 
 {
2058
 
-       if (page->role.level != PT_PAGE_TABLE_LEVEL)
2059
 
+       if (sp->role.level != PT_PAGE_TABLE_LEVEL) {
2060
 
+               ++vcpu->kvm->stat.mmu_pde_zapped;
2061
 
                return;
2062
 
+       }
2063
 
 
2064
 
-       if (page->role.glevels == PT32_ROOT_LEVEL)
2065
 
-               paging32_update_pte(vcpu, page, spte, new, bytes);
2066
 
+       ++vcpu->kvm->stat.mmu_pte_updated;
2067
 
+       if (sp->role.glevels == PT32_ROOT_LEVEL)
2068
 
+               paging32_update_pte(vcpu, sp, spte, new, bytes, offset_in_pte);
2069
 
        else
2070
 
-               paging64_update_pte(vcpu, page, spte, new, bytes);
2071
 
+               paging64_update_pte(vcpu, sp, spte, new, bytes, offset_in_pte);
2072
 
+}
2073
 
+
2074
 
+static bool need_remote_flush(u64 old, u64 new)
2075
 
+{
2076
 
+       if (!is_shadow_present_pte(old))
2077
 
+               return false;
2078
 
+       if (!is_shadow_present_pte(new))
2079
 
+               return true;
2080
 
+       if ((old ^ new) & PT64_BASE_ADDR_MASK)
2081
 
+               return true;
2082
 
+       old ^= PT64_NX_MASK;
2083
 
+       new ^= PT64_NX_MASK;
2084
 
+       return (old & ~new & PT64_PERM_MASK) != 0;
2085
 
+}
2086
 
+
2087
 
+static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, u64 old, u64 new)
2088
 
+{
2089
 
+       if (need_remote_flush(old, new))
2090
 
+               kvm_flush_remote_tlbs(vcpu->kvm);
2091
 
+       else
2092
 
+               kvm_mmu_flush_tlb(vcpu);
2093
 
+}
2094
 
+
2095
 
+static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu)
2096
 
+{
2097
 
+       u64 *spte = vcpu->arch.last_pte_updated;
2098
 
+
2099
 
+       return !!(spte && (*spte & PT_ACCESSED_MASK));
2100
 
 }
2101
 
 
2102
 
 void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
2103
 
                       const u8 *new, int bytes)
2104
 
 {
2105
 
        gfn_t gfn = gpa >> PAGE_SHIFT;
2106
 
-       struct kvm_mmu_page *page;
2107
 
+       struct kvm_mmu_page *sp;
2108
 
        struct hlist_node *node, *n;
2109
 
        struct hlist_head *bucket;
2110
 
        unsigned index;
2111
 
+       u64 entry;
2112
 
        u64 *spte;
2113
 
        unsigned offset = offset_in_page(gpa);
2114
 
        unsigned pte_size;
2115
 
@@ -1126,20 +1352,24 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
2116
 
        int npte;
2117
 
 
2118
 
        pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes);
2119
 
-       if (gfn == vcpu->last_pt_write_gfn) {
2120
 
-               ++vcpu->last_pt_write_count;
2121
 
-               if (vcpu->last_pt_write_count >= 3)
2122
 
+       ++vcpu->kvm->stat.mmu_pte_write;
2123
 
+       kvm_mmu_audit(vcpu, "pre pte write");
2124
 
+       if (gfn == vcpu->arch.last_pt_write_gfn
2125
 
+           && !last_updated_pte_accessed(vcpu)) {
2126
 
+               ++vcpu->arch.last_pt_write_count;
2127
 
+               if (vcpu->arch.last_pt_write_count >= 3)
2128
 
                        flooded = 1;
2129
 
        } else {
2130
 
-               vcpu->last_pt_write_gfn = gfn;
2131
 
-               vcpu->last_pt_write_count = 1;
2132
 
+               vcpu->arch.last_pt_write_gfn = gfn;
2133
 
+               vcpu->arch.last_pt_write_count = 1;
2134
 
+               vcpu->arch.last_pte_updated = NULL;
2135
 
        }
2136
 
        index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
2137
 
-       bucket = &vcpu->kvm->mmu_page_hash[index];
2138
 
-       hlist_for_each_entry_safe(page, node, n, bucket, hash_link) {
2139
 
-               if (page->gfn != gfn || page->role.metaphysical)
2140
 
+       bucket = &vcpu->kvm->arch.mmu_page_hash[index];
2141
 
+       hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) {
2142
 
+               if (sp->gfn != gfn || sp->role.metaphysical)
2143
 
                        continue;
2144
 
-               pte_size = page->role.glevels == PT32_ROOT_LEVEL ? 4 : 8;
2145
 
+               pte_size = sp->role.glevels == PT32_ROOT_LEVEL ? 4 : 8;
2146
 
                misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);
2147
 
                misaligned |= bytes < 4;
2148
 
                if (misaligned || flooded) {
2149
 
@@ -1154,14 +1384,15 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
2150
 
                         * page.
2151
 
                         */
2152
 
                        pgprintk("misaligned: gpa %llx bytes %d role %x\n",
2153
 
-                                gpa, bytes, page->role.word);
2154
 
-                       kvm_mmu_zap_page(vcpu->kvm, page);
2155
 
+                                gpa, bytes, sp->role.word);
2156
 
+                       kvm_mmu_zap_page(vcpu->kvm, sp);
2157
 
+                       ++vcpu->kvm->stat.mmu_flooded;
2158
 
                        continue;
2159
 
                }
2160
 
                page_offset = offset;
2161
 
-               level = page->role.level;
2162
 
+               level = sp->role.level;
2163
 
                npte = 1;
2164
 
-               if (page->role.glevels == PT32_ROOT_LEVEL) {
2165
 
+               if (sp->role.glevels == PT32_ROOT_LEVEL) {
2166
 
                        page_offset <<= 1;      /* 32->64 */
2167
 
                        /*
2168
 
                         * A 32-bit pde maps 4MB while the shadow pdes map
2169
 
@@ -1175,46 +1406,91 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
2170
 
                        }
2171
 
                        quadrant = page_offset >> PAGE_SHIFT;
2172
 
                        page_offset &= ~PAGE_MASK;
2173
 
-                       if (quadrant != page->role.quadrant)
2174
 
+                       if (quadrant != sp->role.quadrant)
2175
 
                                continue;
2176
 
                }
2177
 
-               spte = &page->spt[page_offset / sizeof(*spte)];
2178
 
+               spte = &sp->spt[page_offset / sizeof(*spte)];
2179
 
                while (npte--) {
2180
 
-                       mmu_pte_write_zap_pte(vcpu, page, spte);
2181
 
-                       mmu_pte_write_new_pte(vcpu, page, spte, new, bytes);
2182
 
+                       entry = *spte;
2183
 
+                       mmu_pte_write_zap_pte(vcpu, sp, spte);
2184
 
+                       mmu_pte_write_new_pte(vcpu, sp, spte, new, bytes,
2185
 
+                                             page_offset & (pte_size - 1));
2186
 
+                       mmu_pte_write_flush_tlb(vcpu, entry, *spte);
2187
 
                        ++spte;
2188
 
                }
2189
 
        }
2190
 
+       kvm_mmu_audit(vcpu, "post pte write");
2191
 
 }
2192
 
 
2193
 
 int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
2194
 
 {
2195
 
-       gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva);
2196
 
+       gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
2197
 
 
2198
 
-       return kvm_mmu_unprotect_page(vcpu, gpa >> PAGE_SHIFT);
2199
 
+       return kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
2200
 
 }
2201
 
 
2202
 
 void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
2203
 
 {
2204
 
-       while (vcpu->kvm->n_free_mmu_pages < KVM_REFILL_PAGES) {
2205
 
-               struct kvm_mmu_page *page;
2206
 
+       while (vcpu->kvm->arch.n_free_mmu_pages < KVM_REFILL_PAGES) {
2207
 
+               struct kvm_mmu_page *sp;
2208
 
 
2209
 
-               page = container_of(vcpu->kvm->active_mmu_pages.prev,
2210
 
-                                   struct kvm_mmu_page, link);
2211
 
-               kvm_mmu_zap_page(vcpu->kvm, page);
2212
 
+               sp = container_of(vcpu->kvm->arch.active_mmu_pages.prev,
2213
 
+                                 struct kvm_mmu_page, link);
2214
 
+               kvm_mmu_zap_page(vcpu->kvm, sp);
2215
 
+               ++vcpu->kvm->stat.mmu_recycled;
2216
 
        }
2217
 
 }
2218
 
 
2219
 
+int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
2220
 
+{
2221
 
+       int r;
2222
 
+       enum emulation_result er;
2223
 
+
2224
 
+       mutex_lock(&vcpu->kvm->lock);
2225
 
+       r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code);
2226
 
+       if (r < 0)
2227
 
+               goto out;
2228
 
+
2229
 
+       if (!r) {
2230
 
+               r = 1;
2231
 
+               goto out;
2232
 
+       }
2233
 
+
2234
 
+       r = mmu_topup_memory_caches(vcpu);
2235
 
+       if (r)
2236
 
+               goto out;
2237
 
+
2238
 
+       er = emulate_instruction(vcpu, vcpu->run, cr2, error_code, 0);
2239
 
+       mutex_unlock(&vcpu->kvm->lock);
2240
 
+
2241
 
+       switch (er) {
2242
 
+       case EMULATE_DONE:
2243
 
+               return 1;
2244
 
+       case EMULATE_DO_MMIO:
2245
 
+               ++vcpu->stat.mmio_exits;
2246
 
+               return 0;
2247
 
+       case EMULATE_FAIL:
2248
 
+               kvm_report_emulation_failure(vcpu, "pagetable");
2249
 
+               return 1;
2250
 
+       default:
2251
 
+               BUG();
2252
 
+       }
2253
 
+out:
2254
 
+       mutex_unlock(&vcpu->kvm->lock);
2255
 
+       return r;
2256
 
+}
2257
 
+EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
2258
 
+
2259
 
 static void free_mmu_pages(struct kvm_vcpu *vcpu)
2260
 
 {
2261
 
-       struct kvm_mmu_page *page;
2262
 
+       struct kvm_mmu_page *sp;
2263
 
 
2264
 
-       while (!list_empty(&vcpu->kvm->active_mmu_pages)) {
2265
 
-               page = container_of(vcpu->kvm->active_mmu_pages.next,
2266
 
-                                   struct kvm_mmu_page, link);
2267
 
-               kvm_mmu_zap_page(vcpu->kvm, page);
2268
 
+       while (!list_empty(&vcpu->kvm->arch.active_mmu_pages)) {
2269
 
+               sp = container_of(vcpu->kvm->arch.active_mmu_pages.next,
2270
 
+                                 struct kvm_mmu_page, link);
2271
 
+               kvm_mmu_zap_page(vcpu->kvm, sp);
2272
 
        }
2273
 
-       free_page((unsigned long)vcpu->mmu.pae_root);
2274
 
+       free_page((unsigned long)vcpu->arch.mmu.pae_root);
2275
 
 }
2276
 
 
2277
 
 static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
2278
 
@@ -1224,8 +1500,12 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
2279
 
 
2280
 
        ASSERT(vcpu);
2281
 
 
2282
 
-       vcpu->kvm->n_free_mmu_pages = KVM_NUM_MMU_PAGES;
2283
 
-
2284
 
+       if (vcpu->kvm->arch.n_requested_mmu_pages)
2285
 
+               vcpu->kvm->arch.n_free_mmu_pages =
2286
 
+                                       vcpu->kvm->arch.n_requested_mmu_pages;
2287
 
+       else
2288
 
+               vcpu->kvm->arch.n_free_mmu_pages =
2289
 
+                                       vcpu->kvm->arch.n_alloc_mmu_pages;
2290
 
        /*
2291
 
         * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64.
2292
 
         * Therefore we need to allocate shadow page tables in the first
2293
 
@@ -1234,9 +1514,9 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
2294
 
        page = alloc_page(GFP_KERNEL | __GFP_DMA32);
2295
 
        if (!page)
2296
 
                goto error_1;
2297
 
-       vcpu->mmu.pae_root = page_address(page);
2298
 
+       vcpu->arch.mmu.pae_root = page_address(page);
2299
 
        for (i = 0; i < 4; ++i)
2300
 
-               vcpu->mmu.pae_root[i] = INVALID_PAGE;
2301
 
+               vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;
2302
 
 
2303
 
        return 0;
2304
 
 
2305
 
@@ -1248,7 +1528,7 @@ error_1:
2306
 
 int kvm_mmu_create(struct kvm_vcpu *vcpu)
2307
 
 {
2308
 
        ASSERT(vcpu);
2309
 
-       ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa));
2310
 
+       ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
2311
 
 
2312
 
        return alloc_mmu_pages(vcpu);
2313
 
 }
2314
 
@@ -1256,7 +1536,7 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
2315
 
 int kvm_mmu_setup(struct kvm_vcpu *vcpu)
2316
 
 {
2317
 
        ASSERT(vcpu);
2318
 
-       ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa));
2319
 
+       ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
2320
 
 
2321
 
        return init_kvm_mmu(vcpu);
2322
 
 }
2323
 
@@ -1272,31 +1552,29 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
2324
 
 
2325
 
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
2326
 
 {
2327
 
-       struct kvm_mmu_page *page;
2328
 
+       struct kvm_mmu_page *sp;
2329
 
 
2330
 
-       list_for_each_entry(page, &kvm->active_mmu_pages, link) {
2331
 
+       list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) {
2332
 
                int i;
2333
 
                u64 *pt;
2334
 
 
2335
 
-               if (!test_bit(slot, &page->slot_bitmap))
2336
 
+               if (!test_bit(slot, &sp->slot_bitmap))
2337
 
                        continue;
2338
 
 
2339
 
-               pt = page->spt;
2340
 
+               pt = sp->spt;
2341
 
                for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
2342
 
                        /* avoid RMW */
2343
 
-                       if (pt[i] & PT_WRITABLE_MASK) {
2344
 
-                               rmap_remove(&pt[i]);
2345
 
+                       if (pt[i] & PT_WRITABLE_MASK)
2346
 
                                pt[i] &= ~PT_WRITABLE_MASK;
2347
 
-                       }
2348
 
        }
2349
 
 }
2350
 
 
2351
 
 void kvm_mmu_zap_all(struct kvm *kvm)
2352
 
 {
2353
 
-       struct kvm_mmu_page *page, *node;
2354
 
+       struct kvm_mmu_page *sp, *node;
2355
 
 
2356
 
-       list_for_each_entry_safe(page, node, &kvm->active_mmu_pages, link)
2357
 
-               kvm_mmu_zap_page(kvm, page);
2358
 
+       list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link)
2359
 
+               kvm_mmu_zap_page(kvm, sp);
2360
 
 
2361
 
        kvm_flush_remote_tlbs(kvm);
2362
 
 }
2363
 
@@ -1337,6 +1615,25 @@ nomem:
2364
 
        return -ENOMEM;
2365
 
 }
2366
 
 
2367
 
+/*
2368
 
+ * Caculate mmu pages needed for kvm.
2369
 
+ */
2370
 
+unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
2371
 
+{
2372
 
+       int i;
2373
 
+       unsigned int nr_mmu_pages;
2374
 
+       unsigned int  nr_pages = 0;
2375
 
+
2376
 
+       for (i = 0; i < kvm->nmemslots; i++)
2377
 
+               nr_pages += kvm->memslots[i].npages;
2378
 
+
2379
 
+       nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
2380
 
+       nr_mmu_pages = max(nr_mmu_pages,
2381
 
+                       (unsigned int) KVM_MIN_ALLOC_MMU_PAGES);
2382
 
+
2383
 
+       return nr_mmu_pages;
2384
 
+}
2385
 
+
2386
 
 #ifdef AUDIT
2387
 
 
2388
 
 static const char *audit_msg;
2389
 
@@ -1359,22 +1656,36 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
2390
 
        for (i = 0; i < PT64_ENT_PER_PAGE; ++i, va += va_delta) {
2391
 
                u64 ent = pt[i];
2392
 
 
2393
 
-               if (!(ent & PT_PRESENT_MASK))
2394
 
+               if (ent == shadow_trap_nonpresent_pte)
2395
 
                        continue;
2396
 
 
2397
 
                va = canonicalize(va);
2398
 
-               if (level > 1)
2399
 
+               if (level > 1) {
2400
 
+                       if (ent == shadow_notrap_nonpresent_pte)
2401
 
+                               printk(KERN_ERR "audit: (%s) nontrapping pte"
2402
 
+                                      " in nonleaf level: levels %d gva %lx"
2403
 
+                                      " level %d pte %llx\n", audit_msg,
2404
 
+                                      vcpu->arch.mmu.root_level, va, level, ent);
2405
 
+
2406
 
                        audit_mappings_page(vcpu, ent, va, level - 1);
2407
 
-               else {
2408
 
-                       gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, va);
2409
 
-                       hpa_t hpa = gpa_to_hpa(vcpu, gpa);
2410
 
+               } else {
2411
 
+                       gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va);
2412
 
+                       struct page *page = gpa_to_page(vcpu, gpa);
2413
 
+                       hpa_t hpa = page_to_phys(page);
2414
 
 
2415
 
-                       if ((ent & PT_PRESENT_MASK)
2416
 
+                       if (is_shadow_present_pte(ent)
2417
 
                            && (ent & PT64_BASE_ADDR_MASK) != hpa)
2418
 
-                               printk(KERN_ERR "audit error: (%s) levels %d"
2419
 
-                                      " gva %lx gpa %llx hpa %llx ent %llx\n",
2420
 
-                                      audit_msg, vcpu->mmu.root_level,
2421
 
-                                      va, gpa, hpa, ent);
2422
 
+                               printk(KERN_ERR "xx audit error: (%s) levels %d"
2423
 
+                                      " gva %lx gpa %llx hpa %llx ent %llx %d\n",
2424
 
+                                      audit_msg, vcpu->arch.mmu.root_level,
2425
 
+                                      va, gpa, hpa, ent,
2426
 
+                                      is_shadow_present_pte(ent));
2427
 
+                       else if (ent == shadow_notrap_nonpresent_pte
2428
 
+                                && !is_error_hpa(hpa))
2429
 
+                               printk(KERN_ERR "audit: (%s) notrap shadow,"
2430
 
+                                      " valid guest gva %lx\n", audit_msg, va);
2431
 
+                       kvm_release_page_clean(page);
2432
 
+
2433
 
                }
2434
 
        }
2435
 
 }
2436
 
@@ -1383,13 +1694,13 @@ static void audit_mappings(struct kvm_vcpu *vcpu)
2437
 
 {
2438
 
        unsigned i;
2439
 
 
2440
 
-       if (vcpu->mmu.root_level == 4)
2441
 
-               audit_mappings_page(vcpu, vcpu->mmu.root_hpa, 0, 4);
2442
 
+       if (vcpu->arch.mmu.root_level == 4)
2443
 
+               audit_mappings_page(vcpu, vcpu->arch.mmu.root_hpa, 0, 4);
2444
 
        else
2445
 
                for (i = 0; i < 4; ++i)
2446
 
-                       if (vcpu->mmu.pae_root[i] & PT_PRESENT_MASK)
2447
 
+                       if (vcpu->arch.mmu.pae_root[i] & PT_PRESENT_MASK)
2448
 
                                audit_mappings_page(vcpu,
2449
 
-                                                   vcpu->mmu.pae_root[i],
2450
 
+                                                   vcpu->arch.mmu.pae_root[i],
2451
 
                                                    i << 30,
2452
 
                                                    2);
2453
 
 }
2454
 
@@ -1404,15 +1715,15 @@ static int count_rmaps(struct kvm_vcpu *vcpu)
2455
 
                struct kvm_rmap_desc *d;
2456
 
 
2457
 
                for (j = 0; j < m->npages; ++j) {
2458
 
-                       struct page *page = m->phys_mem[j];
2459
 
+                       unsigned long *rmapp = &m->rmap[j];
2460
 
 
2461
 
-                       if (!page->private)
2462
 
+                       if (!*rmapp)
2463
 
                                continue;
2464
 
-                       if (!(page->private & 1)) {
2465
 
+                       if (!(*rmapp & 1)) {
2466
 
                                ++nmaps;
2467
 
                                continue;
2468
 
                        }
2469
 
-                       d = (struct kvm_rmap_desc *)(page->private & ~1ul);
2470
 
+                       d = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
2471
 
                        while (d) {
2472
 
                                for (k = 0; k < RMAP_EXT; ++k)
2473
 
                                        if (d->shadow_ptes[k])
2474
 
@@ -1429,13 +1740,13 @@ static int count_rmaps(struct kvm_vcpu *vcpu)
2475
 
 static int count_writable_mappings(struct kvm_vcpu *vcpu)
2476
 
 {
2477
 
        int nmaps = 0;
2478
 
-       struct kvm_mmu_page *page;
2479
 
+       struct kvm_mmu_page *sp;
2480
 
        int i;
2481
 
 
2482
 
-       list_for_each_entry(page, &vcpu->kvm->active_mmu_pages, link) {
2483
 
-               u64 *pt = page->spt;
2484
 
+       list_for_each_entry(sp, &vcpu->kvm->arch.active_mmu_pages, link) {
2485
 
+               u64 *pt = sp->spt;
2486
 
 
2487
 
-               if (page->role.level != PT_PAGE_TABLE_LEVEL)
2488
 
+               if (sp->role.level != PT_PAGE_TABLE_LEVEL)
2489
 
                        continue;
2490
 
 
2491
 
                for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
2492
 
@@ -1463,23 +1774,23 @@ static void audit_rmap(struct kvm_vcpu *vcpu)
2493
 
 
2494
 
 static void audit_write_protection(struct kvm_vcpu *vcpu)
2495
 
 {
2496
 
-       struct kvm_mmu_page *page;
2497
 
-
2498
 
-       list_for_each_entry(page, &vcpu->kvm->active_mmu_pages, link) {
2499
 
-               hfn_t hfn;
2500
 
-               struct page *pg;
2501
 
+       struct kvm_mmu_page *sp;
2502
 
+       struct kvm_memory_slot *slot;
2503
 
+       unsigned long *rmapp;
2504
 
+       gfn_t gfn;
2505
 
 
2506
 
-               if (page->role.metaphysical)
2507
 
+       list_for_each_entry(sp, &vcpu->kvm->arch.active_mmu_pages, link) {
2508
 
+               if (sp->role.metaphysical)
2509
 
                        continue;
2510
 
 
2511
 
-               hfn = gpa_to_hpa(vcpu, (gpa_t)page->gfn << PAGE_SHIFT)
2512
 
-                       >> PAGE_SHIFT;
2513
 
-               pg = pfn_to_page(hfn);
2514
 
-               if (pg->private)
2515
 
+               slot = gfn_to_memslot(vcpu->kvm, sp->gfn);
2516
 
+               gfn = unalias_gfn(vcpu->kvm, sp->gfn);
2517
 
+               rmapp = &slot->rmap[gfn - slot->base_gfn];
2518
 
+               if (*rmapp)
2519
 
                        printk(KERN_ERR "%s: (%s) shadow page has writable"
2520
 
                               " mappings: gfn %lx role %x\n",
2521
 
-                              __FUNCTION__, audit_msg, page->gfn,
2522
 
-                              page->role.word);
2523
 
+                              __FUNCTION__, audit_msg, sp->gfn,
2524
 
+                              sp->role.word);
2525
 
        }
2526
 
 }
2527
 
 
2528
 
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
2529
 
new file mode 100644
2530
 
index 0000000..1fce19e
2531
 
--- /dev/null
2532
 
+++ b/arch/x86/kvm/mmu.h
2533
 
@@ -0,0 +1,44 @@
2534
 
+#ifndef __KVM_X86_MMU_H
2535
 
+#define __KVM_X86_MMU_H
2536
 
+
2537
 
+#include <linux/kvm_host.h>
2538
 
+
2539
 
+static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
2540
 
+{
2541
 
+       if (unlikely(vcpu->kvm->arch.n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES))
2542
 
+               __kvm_mmu_free_some_pages(vcpu);
2543
 
+}
2544
 
+
2545
 
+static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
2546
 
+{
2547
 
+       if (likely(vcpu->arch.mmu.root_hpa != INVALID_PAGE))
2548
 
+               return 0;
2549
 
+
2550
 
+       return kvm_mmu_load(vcpu);
2551
 
+}
2552
 
+
2553
 
+static inline int is_long_mode(struct kvm_vcpu *vcpu)
2554
 
+{
2555
 
+#ifdef CONFIG_X86_64
2556
 
+       return vcpu->arch.shadow_efer & EFER_LME;
2557
 
+#else
2558
 
+       return 0;
2559
 
+#endif
2560
 
+}
2561
 
+
2562
 
+static inline int is_pae(struct kvm_vcpu *vcpu)
2563
 
+{
2564
 
+       return vcpu->arch.cr4 & X86_CR4_PAE;
2565
 
+}
2566
 
+
2567
 
+static inline int is_pse(struct kvm_vcpu *vcpu)
2568
 
+{
2569
 
+       return vcpu->arch.cr4 & X86_CR4_PSE;
2570
 
+}
2571
 
+
2572
 
+static inline int is_paging(struct kvm_vcpu *vcpu)
2573
 
+{
2574
 
+       return vcpu->arch.cr0 & X86_CR0_PG;
2575
 
+}
2576
 
+
2577
 
+#endif
2578
 
diff --git a/drivers/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
2579
 
similarity index 50%
2580
 
rename from drivers/kvm/paging_tmpl.h
2581
 
rename to arch/x86/kvm/paging_tmpl.h
2582
 
index 6b094b4..56b88f7 100644
2583
 
--- a/drivers/kvm/paging_tmpl.h
2584
 
+++ b/arch/x86/kvm/paging_tmpl.h
2585
 
@@ -31,9 +31,12 @@
2586
 
        #define PT_INDEX(addr, level) PT64_INDEX(addr, level)
2587
 
        #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
2588
 
        #define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level)
2589
 
+       #define PT_LEVEL_BITS PT64_LEVEL_BITS
2590
 
        #ifdef CONFIG_X86_64
2591
 
        #define PT_MAX_FULL_LEVELS 4
2592
 
+       #define CMPXCHG cmpxchg
2593
 
        #else
2594
 
+       #define CMPXCHG cmpxchg64
2595
 
        #define PT_MAX_FULL_LEVELS 2
2596
 
        #endif
2597
 
 #elif PTTYPE == 32
2598
 
@@ -45,11 +48,16 @@
2599
 
        #define PT_INDEX(addr, level) PT32_INDEX(addr, level)
2600
 
        #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
2601
 
        #define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level)
2602
 
+       #define PT_LEVEL_BITS PT32_LEVEL_BITS
2603
 
        #define PT_MAX_FULL_LEVELS 2
2604
 
+       #define CMPXCHG cmpxchg
2605
 
 #else
2606
 
        #error Invalid PTTYPE value
2607
 
 #endif
2608
 
 
2609
 
+#define gpte_to_gfn FNAME(gpte_to_gfn)
2610
 
+#define gpte_to_gfn_pde FNAME(gpte_to_gfn_pde)
2611
 
+
2612
 
 /*
2613
 
  * The guest_walker structure emulates the behavior of the hardware page
2614
 
  * table walker.
2615
 
@@ -57,16 +65,56 @@
2616
 
 struct guest_walker {
2617
 
        int level;
2618
 
        gfn_t table_gfn[PT_MAX_FULL_LEVELS];
2619
 
-       pt_element_t *table;
2620
 
-       pt_element_t pte;
2621
 
-       pt_element_t *ptep;
2622
 
-       struct page *page;
2623
 
-       int index;
2624
 
-       pt_element_t inherited_ar;
2625
 
+       pt_element_t ptes[PT_MAX_FULL_LEVELS];
2626
 
+       gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
2627
 
+       unsigned pt_access;
2628
 
+       unsigned pte_access;
2629
 
        gfn_t gfn;
2630
 
        u32 error_code;
2631
 
 };
2632
 
 
2633
 
+static gfn_t gpte_to_gfn(pt_element_t gpte)
2634
 
+{
2635
 
+       return (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
2636
 
+}
2637
 
+
2638
 
+static gfn_t gpte_to_gfn_pde(pt_element_t gpte)
2639
 
+{
2640
 
+       return (gpte & PT_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT;
2641
 
+}
2642
 
+
2643
 
+static bool FNAME(cmpxchg_gpte)(struct kvm *kvm,
2644
 
+                        gfn_t table_gfn, unsigned index,
2645
 
+                        pt_element_t orig_pte, pt_element_t new_pte)
2646
 
+{
2647
 
+       pt_element_t ret;
2648
 
+       pt_element_t *table;
2649
 
+       struct page *page;
2650
 
+
2651
 
+       page = gfn_to_page(kvm, table_gfn);
2652
 
+       table = kmap_atomic(page, KM_USER0);
2653
 
+
2654
 
+       ret = CMPXCHG(&table[index], orig_pte, new_pte);
2655
 
+
2656
 
+       kunmap_atomic(table, KM_USER0);
2657
 
+
2658
 
+       kvm_release_page_dirty(page);
2659
 
+
2660
 
+       return (ret != orig_pte);
2661
 
+}
2662
 
+
2663
 
+static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte)
2664
 
+{
2665
 
+       unsigned access;
2666
 
+
2667
 
+       access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
2668
 
+#if PTTYPE == 64
2669
 
+       if (is_nx(vcpu))
2670
 
+               access &= ~(gpte >> PT64_NX_SHIFT);
2671
 
+#endif
2672
 
+       return access;
2673
 
+}
2674
 
+
2675
 
 /*
2676
 
  * Fetch a guest pte for a guest virtual address
2677
 
  */
2678
 
@@ -74,103 +122,104 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
2679
 
                            struct kvm_vcpu *vcpu, gva_t addr,
2680
 
                            int write_fault, int user_fault, int fetch_fault)
2681
 
 {
2682
 
-       hpa_t hpa;
2683
 
-       struct kvm_memory_slot *slot;
2684
 
-       pt_element_t *ptep;
2685
 
-       pt_element_t root;
2686
 
+       pt_element_t pte;
2687
 
        gfn_t table_gfn;
2688
 
+       unsigned index, pt_access, pte_access;
2689
 
+       gpa_t pte_gpa;
2690
 
 
2691
 
        pgprintk("%s: addr %lx\n", __FUNCTION__, addr);
2692
 
-       walker->level = vcpu->mmu.root_level;
2693
 
-       walker->table = NULL;
2694
 
-       walker->page = NULL;
2695
 
-       walker->ptep = NULL;
2696
 
-       root = vcpu->cr3;
2697
 
+walk:
2698
 
+       walker->level = vcpu->arch.mmu.root_level;
2699
 
+       pte = vcpu->arch.cr3;
2700
 
 #if PTTYPE == 64
2701
 
        if (!is_long_mode(vcpu)) {
2702
 
-               walker->ptep = &vcpu->pdptrs[(addr >> 30) & 3];
2703
 
-               root = *walker->ptep;
2704
 
-               walker->pte = root;
2705
 
-               if (!(root & PT_PRESENT_MASK))
2706
 
+               pte = vcpu->arch.pdptrs[(addr >> 30) & 3];
2707
 
+               if (!is_present_pte(pte))
2708
 
                        goto not_present;
2709
 
                --walker->level;
2710
 
        }
2711
 
 #endif
2712
 
-       table_gfn = (root & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
2713
 
-       walker->table_gfn[walker->level - 1] = table_gfn;
2714
 
-       pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
2715
 
-                walker->level - 1, table_gfn);
2716
 
-       slot = gfn_to_memslot(vcpu->kvm, table_gfn);
2717
 
-       hpa = safe_gpa_to_hpa(vcpu, root & PT64_BASE_ADDR_MASK);
2718
 
-       walker->page = pfn_to_page(hpa >> PAGE_SHIFT);
2719
 
-       walker->table = kmap_atomic(walker->page, KM_USER0);
2720
 
-
2721
 
        ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
2722
 
               (vcpu->cr3 & CR3_NONPAE_RESERVED_BITS) == 0);
2723
 
 
2724
 
-       walker->inherited_ar = PT_USER_MASK | PT_WRITABLE_MASK;
2725
 
+       pt_access = ACC_ALL;
2726
 
 
2727
 
        for (;;) {
2728
 
-               int index = PT_INDEX(addr, walker->level);
2729
 
-               hpa_t paddr;
2730
 
+               index = PT_INDEX(addr, walker->level);
2731
 
 
2732
 
-               ptep = &walker->table[index];
2733
 
-               walker->index = index;
2734
 
-               ASSERT(((unsigned long)walker->table & PAGE_MASK) ==
2735
 
-                      ((unsigned long)ptep & PAGE_MASK));
2736
 
+               table_gfn = gpte_to_gfn(pte);
2737
 
+               pte_gpa = gfn_to_gpa(table_gfn);
2738
 
+               pte_gpa += index * sizeof(pt_element_t);
2739
 
+               walker->table_gfn[walker->level - 1] = table_gfn;
2740
 
+               walker->pte_gpa[walker->level - 1] = pte_gpa;
2741
 
+               pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
2742
 
+                        walker->level - 1, table_gfn);
2743
 
+
2744
 
+               kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte));
2745
 
 
2746
 
-               if (!is_present_pte(*ptep))
2747
 
+               if (!is_present_pte(pte))
2748
 
                        goto not_present;
2749
 
 
2750
 
-               if (write_fault && !is_writeble_pte(*ptep))
2751
 
+               if (write_fault && !is_writeble_pte(pte))
2752
 
                        if (user_fault || is_write_protection(vcpu))
2753
 
                                goto access_error;
2754
 
 
2755
 
-               if (user_fault && !(*ptep & PT_USER_MASK))
2756
 
+               if (user_fault && !(pte & PT_USER_MASK))
2757
 
                        goto access_error;
2758
 
 
2759
 
 #if PTTYPE == 64
2760
 
-               if (fetch_fault && is_nx(vcpu) && (*ptep & PT64_NX_MASK))
2761
 
+               if (fetch_fault && is_nx(vcpu) && (pte & PT64_NX_MASK))
2762
 
                        goto access_error;
2763
 
 #endif
2764
 
 
2765
 
-               if (!(*ptep & PT_ACCESSED_MASK)) {
2766
 
+               if (!(pte & PT_ACCESSED_MASK)) {
2767
 
                        mark_page_dirty(vcpu->kvm, table_gfn);
2768
 
-                       *ptep |= PT_ACCESSED_MASK;
2769
 
+                       if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn,
2770
 
+                           index, pte, pte|PT_ACCESSED_MASK))
2771
 
+                               goto walk;
2772
 
+                       pte |= PT_ACCESSED_MASK;
2773
 
                }
2774
 
 
2775
 
+               pte_access = pt_access & FNAME(gpte_access)(vcpu, pte);
2776
 
+
2777
 
+               walker->ptes[walker->level - 1] = pte;
2778
 
+
2779
 
                if (walker->level == PT_PAGE_TABLE_LEVEL) {
2780
 
-                       walker->gfn = (*ptep & PT_BASE_ADDR_MASK)
2781
 
-                               >> PAGE_SHIFT;
2782
 
+                       walker->gfn = gpte_to_gfn(pte);
2783
 
                        break;
2784
 
                }
2785
 
 
2786
 
                if (walker->level == PT_DIRECTORY_LEVEL
2787
 
-                   && (*ptep & PT_PAGE_SIZE_MASK)
2788
 
+                   && (pte & PT_PAGE_SIZE_MASK)
2789
 
                    && (PTTYPE == 64 || is_pse(vcpu))) {
2790
 
-                       walker->gfn = (*ptep & PT_DIR_BASE_ADDR_MASK)
2791
 
-                               >> PAGE_SHIFT;
2792
 
+                       walker->gfn = gpte_to_gfn_pde(pte);
2793
 
                        walker->gfn += PT_INDEX(addr, PT_PAGE_TABLE_LEVEL);
2794
 
+                       if (PTTYPE == 32 && is_cpuid_PSE36())
2795
 
+                               walker->gfn += pse36_gfn_delta(pte);
2796
 
                        break;
2797
 
                }
2798
 
 
2799
 
-               walker->inherited_ar &= walker->table[index];
2800
 
-               table_gfn = (*ptep & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
2801
 
-               kunmap_atomic(walker->table, KM_USER0);
2802
 
-               paddr = safe_gpa_to_hpa(vcpu, table_gfn << PAGE_SHIFT);
2803
 
-               walker->page = pfn_to_page(paddr >> PAGE_SHIFT);
2804
 
-               walker->table = kmap_atomic(walker->page, KM_USER0);
2805
 
+               pt_access = pte_access;
2806
 
                --walker->level;
2807
 
-               walker->table_gfn[walker->level - 1 ] = table_gfn;
2808
 
-               pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
2809
 
-                        walker->level - 1, table_gfn);
2810
 
        }
2811
 
-       walker->pte = *ptep;
2812
 
-       if (walker->page)
2813
 
-               walker->ptep = NULL;
2814
 
-       if (walker->table)
2815
 
-               kunmap_atomic(walker->table, KM_USER0);
2816
 
-       pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)*ptep);
2817
 
+
2818
 
+       if (write_fault && !is_dirty_pte(pte)) {
2819
 
+               bool ret;
2820
 
+
2821
 
+               mark_page_dirty(vcpu->kvm, table_gfn);
2822
 
+               ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte,
2823
 
+                           pte|PT_DIRTY_MASK);
2824
 
+               if (ret)
2825
 
+                       goto walk;
2826
 
+               pte |= PT_DIRTY_MASK;
2827
 
+               kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte));
2828
 
+               walker->ptes[walker->level - 1] = pte;
2829
 
+       }
2830
 
+
2831
 
+       walker->pt_access = pt_access;
2832
 
+       walker->pte_access = pte_access;
2833
 
+       pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
2834
 
+                __FUNCTION__, (u64)pte, pt_access, pte_access);
2835
 
        return 1;
2836
 
 
2837
 
 not_present:
2838
 
@@ -187,153 +236,28 @@ err:
2839
 
                walker->error_code |= PFERR_USER_MASK;
2840
 
        if (fetch_fault)
2841
 
                walker->error_code |= PFERR_FETCH_MASK;
2842
 
-       if (walker->table)
2843
 
-               kunmap_atomic(walker->table, KM_USER0);
2844
 
        return 0;
2845
 
 }
2846
 
 
2847
 
-static void FNAME(mark_pagetable_dirty)(struct kvm *kvm,
2848
 
-                                       struct guest_walker *walker)
2849
 
-{
2850
 
-       mark_page_dirty(kvm, walker->table_gfn[walker->level - 1]);
2851
 
-}
2852
 
-
2853
 
-static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu,
2854
 
-                                 u64 *shadow_pte,
2855
 
-                                 gpa_t gaddr,
2856
 
-                                 pt_element_t gpte,
2857
 
-                                 u64 access_bits,
2858
 
-                                 int user_fault,
2859
 
-                                 int write_fault,
2860
 
-                                 int *ptwrite,
2861
 
-                                 struct guest_walker *walker,
2862
 
-                                 gfn_t gfn)
2863
 
-{
2864
 
-       hpa_t paddr;
2865
 
-       int dirty = gpte & PT_DIRTY_MASK;
2866
 
-       u64 spte = *shadow_pte;
2867
 
-       int was_rmapped = is_rmap_pte(spte);
2868
 
-
2869
 
-       pgprintk("%s: spte %llx gpte %llx access %llx write_fault %d"
2870
 
-                " user_fault %d gfn %lx\n",
2871
 
-                __FUNCTION__, spte, (u64)gpte, access_bits,
2872
 
-                write_fault, user_fault, gfn);
2873
 
-
2874
 
-       if (write_fault && !dirty) {
2875
 
-               pt_element_t *guest_ent, *tmp = NULL;
2876
 
-
2877
 
-               if (walker->ptep)
2878
 
-                       guest_ent = walker->ptep;
2879
 
-               else {
2880
 
-                       tmp = kmap_atomic(walker->page, KM_USER0);
2881
 
-                       guest_ent = &tmp[walker->index];
2882
 
-               }
2883
 
-
2884
 
-               *guest_ent |= PT_DIRTY_MASK;
2885
 
-               if (!walker->ptep)
2886
 
-                       kunmap_atomic(tmp, KM_USER0);
2887
 
-               dirty = 1;
2888
 
-               FNAME(mark_pagetable_dirty)(vcpu->kvm, walker);
2889
 
-       }
2890
 
-
2891
 
-       spte |= PT_PRESENT_MASK | PT_ACCESSED_MASK | PT_DIRTY_MASK;
2892
 
-       spte |= gpte & PT64_NX_MASK;
2893
 
-       if (!dirty)
2894
 
-               access_bits &= ~PT_WRITABLE_MASK;
2895
 
-
2896
 
-       paddr = gpa_to_hpa(vcpu, gaddr & PT64_BASE_ADDR_MASK);
2897
 
-
2898
 
-       spte |= PT_PRESENT_MASK;
2899
 
-       if (access_bits & PT_USER_MASK)
2900
 
-               spte |= PT_USER_MASK;
2901
 
-
2902
 
-       if (is_error_hpa(paddr)) {
2903
 
-               spte |= gaddr;
2904
 
-               spte |= PT_SHADOW_IO_MARK;
2905
 
-               spte &= ~PT_PRESENT_MASK;
2906
 
-               set_shadow_pte(shadow_pte, spte);
2907
 
-               return;
2908
 
-       }
2909
 
-
2910
 
-       spte |= paddr;
2911
 
-
2912
 
-       if ((access_bits & PT_WRITABLE_MASK)
2913
 
-           || (write_fault && !is_write_protection(vcpu) && !user_fault)) {
2914
 
-               struct kvm_mmu_page *shadow;
2915
 
-
2916
 
-               spte |= PT_WRITABLE_MASK;
2917
 
-               if (user_fault) {
2918
 
-                       mmu_unshadow(vcpu, gfn);
2919
 
-                       goto unshadowed;
2920
 
-               }
2921
 
-
2922
 
-               shadow = kvm_mmu_lookup_page(vcpu, gfn);
2923
 
-               if (shadow) {
2924
 
-                       pgprintk("%s: found shadow page for %lx, marking ro\n",
2925
 
-                                __FUNCTION__, gfn);
2926
 
-                       access_bits &= ~PT_WRITABLE_MASK;
2927
 
-                       if (is_writeble_pte(spte)) {
2928
 
-                               spte &= ~PT_WRITABLE_MASK;
2929
 
-                               kvm_x86_ops->tlb_flush(vcpu);
2930
 
-                       }
2931
 
-                       if (write_fault)
2932
 
-                               *ptwrite = 1;
2933
 
-               }
2934
 
-       }
2935
 
-
2936
 
-unshadowed:
2937
 
-
2938
 
-       if (access_bits & PT_WRITABLE_MASK)
2939
 
-               mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT);
2940
 
-
2941
 
-       set_shadow_pte(shadow_pte, spte);
2942
 
-       page_header_update_slot(vcpu->kvm, shadow_pte, gaddr);
2943
 
-       if (!was_rmapped)
2944
 
-               rmap_add(vcpu, shadow_pte);
2945
 
-}
2946
 
-
2947
 
-static void FNAME(set_pte)(struct kvm_vcpu *vcpu, pt_element_t gpte,
2948
 
-                          u64 *shadow_pte, u64 access_bits,
2949
 
-                          int user_fault, int write_fault, int *ptwrite,
2950
 
-                          struct guest_walker *walker, gfn_t gfn)
2951
 
-{
2952
 
-       access_bits &= gpte;
2953
 
-       FNAME(set_pte_common)(vcpu, shadow_pte, gpte & PT_BASE_ADDR_MASK,
2954
 
-                             gpte, access_bits, user_fault, write_fault,
2955
 
-                             ptwrite, walker, gfn);
2956
 
-}
2957
 
-
2958
 
 static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
2959
 
-                             u64 *spte, const void *pte, int bytes)
2960
 
+                             u64 *spte, const void *pte, int bytes,
2961
 
+                             int offset_in_pte)
2962
 
 {
2963
 
        pt_element_t gpte;
2964
 
+       unsigned pte_access;
2965
 
 
2966
 
-       if (bytes < sizeof(pt_element_t))
2967
 
-               return;
2968
 
        gpte = *(const pt_element_t *)pte;
2969
 
-       if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK))
2970
 
+       if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) {
2971
 
+               if (!offset_in_pte && !is_present_pte(gpte))
2972
 
+                       set_shadow_pte(spte, shadow_notrap_nonpresent_pte);
2973
 
+               return;
2974
 
+       }
2975
 
+       if (bytes < sizeof(pt_element_t))
2976
 
                return;
2977
 
        pgprintk("%s: gpte %llx spte %p\n", __FUNCTION__, (u64)gpte, spte);
2978
 
-       FNAME(set_pte)(vcpu, gpte, spte, PT_USER_MASK | PT_WRITABLE_MASK, 0,
2979
 
-                      0, NULL, NULL,
2980
 
-                      (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT);
2981
 
-}
2982
 
-
2983
 
-static void FNAME(set_pde)(struct kvm_vcpu *vcpu, pt_element_t gpde,
2984
 
-                          u64 *shadow_pte, u64 access_bits,
2985
 
-                          int user_fault, int write_fault, int *ptwrite,
2986
 
-                          struct guest_walker *walker, gfn_t gfn)
2987
 
-{
2988
 
-       gpa_t gaddr;
2989
 
-
2990
 
-       access_bits &= gpde;
2991
 
-       gaddr = (gpa_t)gfn << PAGE_SHIFT;
2992
 
-       if (PTTYPE == 32 && is_cpuid_PSE36())
2993
 
-               gaddr |= (gpde & PT32_DIR_PSE36_MASK) <<
2994
 
-                       (32 - PT32_DIR_PSE36_SHIFT);
2995
 
-       FNAME(set_pte_common)(vcpu, shadow_pte, gaddr,
2996
 
-                             gpde, access_bits, user_fault, write_fault,
2997
 
-                             ptwrite, walker, gfn);
2998
 
+       pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte);
2999
 
+       mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
3000
 
+                    gpte & PT_DIRTY_MASK, NULL, gpte_to_gfn(gpte));
3001
 
 }
3002
 
 
3003
 
 /*
3004
 
@@ -346,15 +270,15 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
3005
 
        hpa_t shadow_addr;
3006
 
        int level;
3007
 
        u64 *shadow_ent;
3008
 
-       u64 *prev_shadow_ent = NULL;
3009
 
+       unsigned access = walker->pt_access;
3010
 
 
3011
 
-       if (!is_present_pte(walker->pte))
3012
 
+       if (!is_present_pte(walker->ptes[walker->level - 1]))
3013
 
                return NULL;
3014
 
 
3015
 
-       shadow_addr = vcpu->mmu.root_hpa;
3016
 
-       level = vcpu->mmu.shadow_root_level;
3017
 
+       shadow_addr = vcpu->arch.mmu.root_hpa;
3018
 
+       level = vcpu->arch.mmu.shadow_root_level;
3019
 
        if (level == PT32E_ROOT_LEVEL) {
3020
 
-               shadow_addr = vcpu->mmu.pae_root[(addr >> 30) & 3];
3021
 
+               shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3];
3022
 
                shadow_addr &= PT64_BASE_ADDR_MASK;
3023
 
                --level;
3024
 
        }
3025
 
@@ -365,14 +289,13 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
3026
 
                u64 shadow_pte;
3027
 
                int metaphysical;
3028
 
                gfn_t table_gfn;
3029
 
-               unsigned hugepage_access = 0;
3030
 
+               bool new_page = 0;
3031
 
 
3032
 
                shadow_ent = ((u64 *)__va(shadow_addr)) + index;
3033
 
-               if (is_present_pte(*shadow_ent) || is_io_pte(*shadow_ent)) {
3034
 
+               if (is_shadow_present_pte(*shadow_ent)) {
3035
 
                        if (level == PT_PAGE_TABLE_LEVEL)
3036
 
                                break;
3037
 
                        shadow_addr = *shadow_ent & PT64_BASE_ADDR_MASK;
3038
 
-                       prev_shadow_ent = shadow_ent;
3039
 
                        continue;
3040
 
                }
3041
 
 
3042
 
@@ -382,37 +305,34 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
3043
 
                if (level - 1 == PT_PAGE_TABLE_LEVEL
3044
 
                    && walker->level == PT_DIRECTORY_LEVEL) {
3045
 
                        metaphysical = 1;
3046
 
-                       hugepage_access = walker->pte;
3047
 
-                       hugepage_access &= PT_USER_MASK | PT_WRITABLE_MASK;
3048
 
-                       if (walker->pte & PT64_NX_MASK)
3049
 
-                               hugepage_access |= (1 << 2);
3050
 
-                       hugepage_access >>= PT_WRITABLE_SHIFT;
3051
 
-                       table_gfn = (walker->pte & PT_BASE_ADDR_MASK)
3052
 
-                               >> PAGE_SHIFT;
3053
 
+                       if (!is_dirty_pte(walker->ptes[level - 1]))
3054
 
+                               access &= ~ACC_WRITE_MASK;
3055
 
+                       table_gfn = gpte_to_gfn(walker->ptes[level - 1]);
3056
 
                } else {
3057
 
                        metaphysical = 0;
3058
 
                        table_gfn = walker->table_gfn[level - 2];
3059
 
                }
3060
 
                shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
3061
 
-                                              metaphysical, hugepage_access,
3062
 
-                                              shadow_ent);
3063
 
+                                              metaphysical, access,
3064
 
+                                              shadow_ent, &new_page);
3065
 
+               if (new_page && !metaphysical) {
3066
 
+                       pt_element_t curr_pte;
3067
 
+                       kvm_read_guest(vcpu->kvm, walker->pte_gpa[level - 2],
3068
 
+                                      &curr_pte, sizeof(curr_pte));
3069
 
+                       if (curr_pte != walker->ptes[level - 2])
3070
 
+                               return NULL;
3071
 
+               }
3072
 
                shadow_addr = __pa(shadow_page->spt);
3073
 
                shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK
3074
 
                        | PT_WRITABLE_MASK | PT_USER_MASK;
3075
 
                *shadow_ent = shadow_pte;
3076
 
-               prev_shadow_ent = shadow_ent;
3077
 
        }
3078
 
 
3079
 
-       if (walker->level == PT_DIRECTORY_LEVEL) {
3080
 
-               FNAME(set_pde)(vcpu, walker->pte, shadow_ent,
3081
 
-                              walker->inherited_ar, user_fault, write_fault,
3082
 
-                              ptwrite, walker, walker->gfn);
3083
 
-       } else {
3084
 
-               ASSERT(walker->level == PT_PAGE_TABLE_LEVEL);
3085
 
-               FNAME(set_pte)(vcpu, walker->pte, shadow_ent,
3086
 
-                              walker->inherited_ar, user_fault, write_fault,
3087
 
-                              ptwrite, walker, walker->gfn);
3088
 
-       }
3089
 
+       mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access,
3090
 
+                    user_fault, write_fault,
3091
 
+                    walker->ptes[walker->level-1] & PT_DIRTY_MASK,
3092
 
+                    ptwrite, walker->gfn);
3093
 
+
3094
 
        return shadow_ent;
3095
 
 }
3096
 
 
3097
 
@@ -460,7 +380,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
3098
 
        if (!r) {
3099
 
                pgprintk("%s: guest page fault\n", __FUNCTION__);
3100
 
                inject_page_fault(vcpu, addr, walker.error_code);
3101
 
-               vcpu->last_pt_write_count = 0; /* reset fork detector */
3102
 
+               vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
3103
 
                return 0;
3104
 
        }
3105
 
 
3106
 
@@ -470,12 +390,12 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
3107
 
                 shadow_pte, *shadow_pte, write_pt);
3108
 
 
3109
 
        if (!write_pt)
3110
 
-               vcpu->last_pt_write_count = 0; /* reset fork detector */
3111
 
+               vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
3112
 
 
3113
 
        /*
3114
 
         * mmio: emulate if accessible, otherwise its a guest fault.
3115
 
         */
3116
 
-       if (is_io_pte(*shadow_pte))
3117
 
+       if (shadow_pte && is_io_pte(*shadow_pte))
3118
 
                return 1;
3119
 
 
3120
 
        ++vcpu->stat.pf_fixed;
3121
 
@@ -493,13 +413,39 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
3122
 
        r = FNAME(walk_addr)(&walker, vcpu, vaddr, 0, 0, 0);
3123
 
 
3124
 
        if (r) {
3125
 
-               gpa = (gpa_t)walker.gfn << PAGE_SHIFT;
3126
 
+               gpa = gfn_to_gpa(walker.gfn);
3127
 
                gpa |= vaddr & ~PAGE_MASK;
3128
 
        }
3129
 
 
3130
 
        return gpa;
3131
 
 }
3132
 
 
3133
 
+static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
3134
 
+                                struct kvm_mmu_page *sp)
3135
 
+{
3136
 
+       int i, offset = 0;
3137
 
+       pt_element_t *gpt;
3138
 
+       struct page *page;
3139
 
+
3140
 
+       if (sp->role.metaphysical
3141
 
+           || (PTTYPE == 32 && sp->role.level > PT_PAGE_TABLE_LEVEL)) {
3142
 
+               nonpaging_prefetch_page(vcpu, sp);
3143
 
+               return;
3144
 
+       }
3145
 
+
3146
 
+       if (PTTYPE == 32)
3147
 
+               offset = sp->role.quadrant << PT64_LEVEL_BITS;
3148
 
+       page = gfn_to_page(vcpu->kvm, sp->gfn);
3149
 
+       gpt = kmap_atomic(page, KM_USER0);
3150
 
+       for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
3151
 
+               if (is_present_pte(gpt[offset + i]))
3152
 
+                       sp->spt[i] = shadow_trap_nonpresent_pte;
3153
 
+               else
3154
 
+                       sp->spt[i] = shadow_notrap_nonpresent_pte;
3155
 
+       kunmap_atomic(gpt, KM_USER0);
3156
 
+       kvm_release_page_clean(page);
3157
 
+}
3158
 
+
3159
 
 #undef pt_element_t
3160
 
 #undef guest_walker
3161
 
 #undef FNAME
3162
 
@@ -508,4 +454,8 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
3163
 
 #undef SHADOW_PT_INDEX
3164
 
 #undef PT_LEVEL_MASK
3165
 
 #undef PT_DIR_BASE_ADDR_MASK
3166
 
+#undef PT_LEVEL_BITS
3167
 
 #undef PT_MAX_FULL_LEVELS
3168
 
+#undef gpte_to_gfn
3169
 
+#undef gpte_to_gfn_pde
3170
 
+#undef CMPXCHG
3171
 
diff --git a/arch/x86/kvm/segment_descriptor.h b/arch/x86/kvm/segment_descriptor.h
3172
 
new file mode 100644
3173
 
index 0000000..56fc4c8
3174
 
--- /dev/null
3175
 
+++ b/arch/x86/kvm/segment_descriptor.h
3176
 
@@ -0,0 +1,29 @@
3177
 
+#ifndef __SEGMENT_DESCRIPTOR_H
3178
 
+#define __SEGMENT_DESCRIPTOR_H
3179
 
+
3180
 
+struct segment_descriptor {
3181
 
+       u16 limit_low;
3182
 
+       u16 base_low;
3183
 
+       u8  base_mid;
3184
 
+       u8  type : 4;
3185
 
+       u8  system : 1;
3186
 
+       u8  dpl : 2;
3187
 
+       u8  present : 1;
3188
 
+       u8  limit_high : 4;
3189
 
+       u8  avl : 1;
3190
 
+       u8  long_mode : 1;
3191
 
+       u8  default_op : 1;
3192
 
+       u8  granularity : 1;
3193
 
+       u8  base_high;
3194
 
+} __attribute__((packed));
3195
 
+
3196
 
+#ifdef CONFIG_X86_64
3197
 
+/* LDT or TSS descriptor in the GDT. 16 bytes. */
3198
 
+struct segment_descriptor_64 {
3199
 
+       struct segment_descriptor s;
3200
 
+       u32 base_higher;
3201
 
+       u32 pad_zero;
3202
 
+};
3203
 
+
3204
 
+#endif
3205
 
+#endif
3206
 
diff --git a/drivers/kvm/svm.c b/arch/x86/kvm/svm.c
3207
 
similarity index 84%
3208
 
rename from drivers/kvm/svm.c
3209
 
rename to arch/x86/kvm/svm.c
3210
 
index 4e04e49..745b1ec 100644
3211
 
--- a/drivers/kvm/svm.c
3212
 
+++ b/arch/x86/kvm/svm.c
3213
 
@@ -13,10 +13,11 @@
3214
 
  * the COPYING file in the top-level directory.
3215
 
  *
3216
 
  */
3217
 
+#include <linux/kvm_host.h>
3218
 
 
3219
 
 #include "kvm_svm.h"
3220
 
-#include "x86_emulate.h"
3221
 
 #include "irq.h"
3222
 
+#include "mmu.h"
3223
 
 
3224
 
 #include <linux/module.h>
3225
 
 #include <linux/kernel.h>
3226
 
@@ -42,9 +43,6 @@ MODULE_LICENSE("GPL");
3227
 
 #define SEG_TYPE_LDT 2
3228
 
 #define SEG_TYPE_BUSY_TSS16 3
3229
 
 
3230
 
-#define KVM_EFER_LMA (1 << 10)
3231
 
-#define KVM_EFER_LME (1 << 8)
3232
 
-
3233
 
 #define SVM_FEATURE_NPT  (1 << 0)
3234
 
 #define SVM_FEATURE_LBRV (1 << 1)
3235
 
 #define SVM_DEATURE_SVML (1 << 2)
3236
 
@@ -102,20 +100,20 @@ static inline u32 svm_has(u32 feat)
3237
 
 
3238
 
 static inline u8 pop_irq(struct kvm_vcpu *vcpu)
3239
 
 {
3240
 
-       int word_index = __ffs(vcpu->irq_summary);
3241
 
-       int bit_index = __ffs(vcpu->irq_pending[word_index]);
3242
 
+       int word_index = __ffs(vcpu->arch.irq_summary);
3243
 
+       int bit_index = __ffs(vcpu->arch.irq_pending[word_index]);
3244
 
        int irq = word_index * BITS_PER_LONG + bit_index;
3245
 
 
3246
 
-       clear_bit(bit_index, &vcpu->irq_pending[word_index]);
3247
 
-       if (!vcpu->irq_pending[word_index])
3248
 
-               clear_bit(word_index, &vcpu->irq_summary);
3249
 
+       clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]);
3250
 
+       if (!vcpu->arch.irq_pending[word_index])
3251
 
+               clear_bit(word_index, &vcpu->arch.irq_summary);
3252
 
        return irq;
3253
 
 }
3254
 
 
3255
 
 static inline void push_irq(struct kvm_vcpu *vcpu, u8 irq)
3256
 
 {
3257
 
-       set_bit(irq, vcpu->irq_pending);
3258
 
-       set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary);
3259
 
+       set_bit(irq, vcpu->arch.irq_pending);
3260
 
+       set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary);
3261
 
 }
3262
 
 
3263
 
 static inline void clgi(void)
3264
 
@@ -184,35 +182,30 @@ static inline void flush_guest_tlb(struct kvm_vcpu *vcpu)
3265
 
 
3266
 
 static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
3267
 
 {
3268
 
-       if (!(efer & KVM_EFER_LMA))
3269
 
-               efer &= ~KVM_EFER_LME;
3270
 
+       if (!(efer & EFER_LMA))
3271
 
+               efer &= ~EFER_LME;
3272
 
 
3273
 
        to_svm(vcpu)->vmcb->save.efer = efer | MSR_EFER_SVME_MASK;
3274
 
-       vcpu->shadow_efer = efer;
3275
 
+       vcpu->arch.shadow_efer = efer;
3276
 
 }
3277
 
 
3278
 
-static void svm_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code)
3279
 
+static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
3280
 
+                               bool has_error_code, u32 error_code)
3281
 
 {
3282
 
        struct vcpu_svm *svm = to_svm(vcpu);
3283
 
 
3284
 
-       svm->vmcb->control.event_inj =          SVM_EVTINJ_VALID |
3285
 
-                                               SVM_EVTINJ_VALID_ERR |
3286
 
-                                               SVM_EVTINJ_TYPE_EXEPT |
3287
 
-                                               GP_VECTOR;
3288
 
+       svm->vmcb->control.event_inj = nr
3289
 
+               | SVM_EVTINJ_VALID
3290
 
+               | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
3291
 
+               | SVM_EVTINJ_TYPE_EXEPT;
3292
 
        svm->vmcb->control.event_inj_err = error_code;
3293
 
 }
3294
 
 
3295
 
-static void inject_ud(struct kvm_vcpu *vcpu)
3296
 
+static bool svm_exception_injected(struct kvm_vcpu *vcpu)
3297
 
 {
3298
 
-       to_svm(vcpu)->vmcb->control.event_inj = SVM_EVTINJ_VALID |
3299
 
-                                               SVM_EVTINJ_TYPE_EXEPT |
3300
 
-                                               UD_VECTOR;
3301
 
-}
3302
 
+       struct vcpu_svm *svm = to_svm(vcpu);
3303
 
 
3304
 
-static int is_page_fault(uint32_t info)
3305
 
-{
3306
 
-       info &= SVM_EVTINJ_VEC_MASK | SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
3307
 
-       return info == (PF_VECTOR | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT);
3308
 
+       return !(svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID);
3309
 
 }
3310
 
 
3311
 
 static int is_external_interrupt(u32 info)
3312
 
@@ -229,17 +222,16 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
3313
 
                printk(KERN_DEBUG "%s: NOP\n", __FUNCTION__);
3314
 
                return;
3315
 
        }
3316
 
-       if (svm->next_rip - svm->vmcb->save.rip > MAX_INST_SIZE) {
3317
 
+       if (svm->next_rip - svm->vmcb->save.rip > MAX_INST_SIZE)
3318
 
                printk(KERN_ERR "%s: ip 0x%llx next 0x%llx\n",
3319
 
                       __FUNCTION__,
3320
 
                       svm->vmcb->save.rip,
3321
 
                       svm->next_rip);
3322
 
-       }
3323
 
 
3324
 
-       vcpu->rip = svm->vmcb->save.rip = svm->next_rip;
3325
 
+       vcpu->arch.rip = svm->vmcb->save.rip = svm->next_rip;
3326
 
        svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
3327
 
 
3328
 
-       vcpu->interrupt_window_open = 1;
3329
 
+       vcpu->arch.interrupt_window_open = 1;
3330
 
 }
3331
 
 
3332
 
 static int has_svm(void)
3333
 
@@ -312,7 +304,7 @@ static void svm_hardware_enable(void *garbage)
3334
 
        svm_data->next_asid = svm_data->max_asid + 1;
3335
 
        svm_features = cpuid_edx(SVM_CPUID_FUNC);
3336
 
 
3337
 
-       asm volatile ( "sgdt %0" : "=m"(gdt_descr) );
3338
 
+       asm volatile ("sgdt %0" : "=m"(gdt_descr));
3339
 
        gdt = (struct desc_struct *)gdt_descr.address;
3340
 
        svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
3341
 
 
3342
 
@@ -458,11 +450,13 @@ static void init_vmcb(struct vmcb *vmcb)
3343
 
 
3344
 
        control->intercept_cr_read =    INTERCEPT_CR0_MASK |
3345
 
                                        INTERCEPT_CR3_MASK |
3346
 
-                                       INTERCEPT_CR4_MASK;
3347
 
+                                       INTERCEPT_CR4_MASK |
3348
 
+                                       INTERCEPT_CR8_MASK;
3349
 
 
3350
 
        control->intercept_cr_write =   INTERCEPT_CR0_MASK |
3351
 
                                        INTERCEPT_CR3_MASK |
3352
 
-                                       INTERCEPT_CR4_MASK;
3353
 
+                                       INTERCEPT_CR4_MASK |
3354
 
+                                       INTERCEPT_CR8_MASK;
3355
 
 
3356
 
        control->intercept_dr_read =    INTERCEPT_DR0_MASK |
3357
 
                                        INTERCEPT_DR1_MASK |
3358
 
@@ -476,7 +470,8 @@ static void init_vmcb(struct vmcb *vmcb)
3359
 
                                        INTERCEPT_DR5_MASK |
3360
 
                                        INTERCEPT_DR7_MASK;
3361
 
 
3362
 
-       control->intercept_exceptions = 1 << PF_VECTOR;
3363
 
+       control->intercept_exceptions = (1 << PF_VECTOR) |
3364
 
+                                       (1 << UD_VECTOR);
3365
 
 
3366
 
 
3367
 
        control->intercept =    (1ULL << INTERCEPT_INTR) |
3368
 
@@ -543,8 +538,7 @@ static void init_vmcb(struct vmcb *vmcb)
3369
 
        init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
3370
 
 
3371
 
        save->efer = MSR_EFER_SVME_MASK;
3372
 
-
3373
 
-        save->dr6 = 0xffff0ff0;
3374
 
+       save->dr6 = 0xffff0ff0;
3375
 
        save->dr7 = 0x400;
3376
 
        save->rflags = 2;
3377
 
        save->rip = 0x0000fff0;
3378
 
@@ -558,7 +552,7 @@ static void init_vmcb(struct vmcb *vmcb)
3379
 
        /* rdx = ?? */
3380
 
 }
3381
 
 
3382
 
-static void svm_vcpu_reset(struct kvm_vcpu *vcpu)
3383
 
+static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
3384
 
 {
3385
 
        struct vcpu_svm *svm = to_svm(vcpu);
3386
 
 
3387
 
@@ -566,9 +560,11 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu)
3388
 
 
3389
 
        if (vcpu->vcpu_id != 0) {
3390
 
                svm->vmcb->save.rip = 0;
3391
 
-               svm->vmcb->save.cs.base = svm->vcpu.sipi_vector << 12;
3392
 
-               svm->vmcb->save.cs.selector = svm->vcpu.sipi_vector << 8;
3393
 
+               svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12;
3394
 
+               svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8;
3395
 
        }
3396
 
+
3397
 
+       return 0;
3398
 
 }
3399
 
 
3400
 
 static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
3401
 
@@ -587,12 +583,6 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
3402
 
        if (err)
3403
 
                goto free_svm;
3404
 
 
3405
 
-       if (irqchip_in_kernel(kvm)) {
3406
 
-               err = kvm_create_lapic(&svm->vcpu);
3407
 
-               if (err < 0)
3408
 
-                       goto free_svm;
3409
 
-       }
3410
 
-
3411
 
        page = alloc_page(GFP_KERNEL);
3412
 
        if (!page) {
3413
 
                err = -ENOMEM;
3414
 
@@ -608,9 +598,9 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
3415
 
 
3416
 
        fx_init(&svm->vcpu);
3417
 
        svm->vcpu.fpu_active = 1;
3418
 
-       svm->vcpu.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
3419
 
+       svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
3420
 
        if (svm->vcpu.vcpu_id == 0)
3421
 
-               svm->vcpu.apic_base |= MSR_IA32_APICBASE_BSP;
3422
 
+               svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
3423
 
 
3424
 
        return &svm->vcpu;
3425
 
 
3426
 
@@ -644,7 +634,7 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3427
 
                 * increasing TSC.
3428
 
                 */
3429
 
                rdtscll(tsc_this);
3430
 
-               delta = vcpu->host_tsc - tsc_this;
3431
 
+               delta = vcpu->arch.host_tsc - tsc_this;
3432
 
                svm->vmcb->control.tsc_offset += delta;
3433
 
                vcpu->cpu = cpu;
3434
 
                kvm_migrate_apic_timer(vcpu);
3435
 
@@ -659,11 +649,11 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
3436
 
        struct vcpu_svm *svm = to_svm(vcpu);
3437
 
        int i;
3438
 
 
3439
 
+       ++vcpu->stat.host_state_reload;
3440
 
        for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
3441
 
                wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
3442
 
 
3443
 
-       rdtscll(vcpu->host_tsc);
3444
 
-       kvm_put_guest_fpu(vcpu);
3445
 
+       rdtscll(vcpu->arch.host_tsc);
3446
 
 }
3447
 
 
3448
 
 static void svm_vcpu_decache(struct kvm_vcpu *vcpu)
3449
 
@@ -674,17 +664,17 @@ static void svm_cache_regs(struct kvm_vcpu *vcpu)
3450
 
 {
3451
 
        struct vcpu_svm *svm = to_svm(vcpu);
3452
 
 
3453
 
-       vcpu->regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
3454
 
-       vcpu->regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
3455
 
-       vcpu->rip = svm->vmcb->save.rip;
3456
 
+       vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
3457
 
+       vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
3458
 
+       vcpu->arch.rip = svm->vmcb->save.rip;
3459
 
 }
3460
 
 
3461
 
 static void svm_decache_regs(struct kvm_vcpu *vcpu)
3462
 
 {
3463
 
        struct vcpu_svm *svm = to_svm(vcpu);
3464
 
-       svm->vmcb->save.rax = vcpu->regs[VCPU_REGS_RAX];
3465
 
-       svm->vmcb->save.rsp = vcpu->regs[VCPU_REGS_RSP];
3466
 
-       svm->vmcb->save.rip = vcpu->rip;
3467
 
+       svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
3468
 
+       svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
3469
 
+       svm->vmcb->save.rip = vcpu->arch.rip;
3470
 
 }
3471
 
 
3472
 
 static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
3473
 
@@ -782,24 +772,24 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
3474
 
        struct vcpu_svm *svm = to_svm(vcpu);
3475
 
 
3476
 
 #ifdef CONFIG_X86_64
3477
 
-       if (vcpu->shadow_efer & KVM_EFER_LME) {
3478
 
+       if (vcpu->arch.shadow_efer & EFER_LME) {
3479
 
                if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
3480
 
-                       vcpu->shadow_efer |= KVM_EFER_LMA;
3481
 
-                       svm->vmcb->save.efer |= KVM_EFER_LMA | KVM_EFER_LME;
3482
 
+                       vcpu->arch.shadow_efer |= EFER_LMA;
3483
 
+                       svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
3484
 
                }
3485
 
 
3486
 
-               if (is_paging(vcpu) && !(cr0 & X86_CR0_PG) ) {
3487
 
-                       vcpu->shadow_efer &= ~KVM_EFER_LMA;
3488
 
-                       svm->vmcb->save.efer &= ~(KVM_EFER_LMA | KVM_EFER_LME);
3489
 
+               if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) {
3490
 
+                       vcpu->arch.shadow_efer &= ~EFER_LMA;
3491
 
+                       svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME);
3492
 
                }
3493
 
        }
3494
 
 #endif
3495
 
-       if ((vcpu->cr0 & X86_CR0_TS) && !(cr0 & X86_CR0_TS)) {
3496
 
+       if ((vcpu->arch.cr0 & X86_CR0_TS) && !(cr0 & X86_CR0_TS)) {
3497
 
                svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
3498
 
                vcpu->fpu_active = 1;
3499
 
        }
3500
 
 
3501
 
-       vcpu->cr0 = cr0;
3502
 
+       vcpu->arch.cr0 = cr0;
3503
 
        cr0 |= X86_CR0_PG | X86_CR0_WP;
3504
 
        cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
3505
 
        svm->vmcb->save.cr0 = cr0;
3506
 
@@ -807,7 +797,7 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
3507
 
 
3508
 
 static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
3509
 
 {
3510
 
-       vcpu->cr4 = cr4;
3511
 
+       vcpu->arch.cr4 = cr4;
3512
 
        to_svm(vcpu)->vmcb->save.cr4 = cr4 | X86_CR4_PAE;
3513
 
 }
3514
 
 
3515
 
@@ -912,7 +902,7 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
3516
 
                svm->db_regs[dr] = value;
3517
 
                return;
3518
 
        case 4 ... 5:
3519
 
-               if (vcpu->cr4 & X86_CR4_DE) {
3520
 
+               if (vcpu->arch.cr4 & X86_CR4_DE) {
3521
 
                        *exception = UD_VECTOR;
3522
 
                        return;
3523
 
                }
3524
 
@@ -938,51 +928,30 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
3525
 
        struct kvm *kvm = svm->vcpu.kvm;
3526
 
        u64 fault_address;
3527
 
        u32 error_code;
3528
 
-       enum emulation_result er;
3529
 
-       int r;
3530
 
 
3531
 
        if (!irqchip_in_kernel(kvm) &&
3532
 
                is_external_interrupt(exit_int_info))
3533
 
                push_irq(&svm->vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK);
3534
 
 
3535
 
-       mutex_lock(&kvm->lock);
3536
 
-
3537
 
        fault_address  = svm->vmcb->control.exit_info_2;
3538
 
        error_code = svm->vmcb->control.exit_info_1;
3539
 
-       r = kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
3540
 
-       if (r < 0) {
3541
 
-               mutex_unlock(&kvm->lock);
3542
 
-               return r;
3543
 
-       }
3544
 
-       if (!r) {
3545
 
-               mutex_unlock(&kvm->lock);
3546
 
-               return 1;
3547
 
-       }
3548
 
-       er = emulate_instruction(&svm->vcpu, kvm_run, fault_address,
3549
 
-                                error_code);
3550
 
-       mutex_unlock(&kvm->lock);
3551
 
+       return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
3552
 
+}
3553
 
 
3554
 
-       switch (er) {
3555
 
-       case EMULATE_DONE:
3556
 
-               return 1;
3557
 
-       case EMULATE_DO_MMIO:
3558
 
-               ++svm->vcpu.stat.mmio_exits;
3559
 
-               return 0;
3560
 
-       case EMULATE_FAIL:
3561
 
-               kvm_report_emulation_failure(&svm->vcpu, "pagetable");
3562
 
-               break;
3563
 
-       default:
3564
 
-               BUG();
3565
 
-       }
3566
 
+static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
3567
 
+{
3568
 
+       int er;
3569
 
 
3570
 
-       kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
3571
 
-       return 0;
3572
 
+       er = emulate_instruction(&svm->vcpu, kvm_run, 0, 0, 0);
3573
 
+       if (er != EMULATE_DONE)
3574
 
+               kvm_queue_exception(&svm->vcpu, UD_VECTOR);
3575
 
+       return 1;
3576
 
 }
3577
 
 
3578
 
 static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
3579
 
 {
3580
 
        svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
3581
 
-       if (!(svm->vcpu.cr0 & X86_CR0_TS))
3582
 
+       if (!(svm->vcpu.arch.cr0 & X86_CR0_TS))
3583
 
                svm->vmcb->save.cr0 &= ~X86_CR0_TS;
3584
 
        svm->vcpu.fpu_active = 1;
3585
 
 
3586
 
@@ -1004,7 +973,7 @@ static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
3587
 
 
3588
 
 static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
3589
 
 {
3590
 
-       u32 io_info = svm->vmcb->control.exit_info_1; //address size bug?
3591
 
+       u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
3592
 
        int size, down, in, string, rep;
3593
 
        unsigned port;
3594
 
 
3595
 
@@ -1015,7 +984,8 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
3596
 
        string = (io_info & SVM_IOIO_STR_MASK) != 0;
3597
 
 
3598
 
        if (string) {
3599
 
-               if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0) == EMULATE_DO_MMIO)
3600
 
+               if (emulate_instruction(&svm->vcpu,
3601
 
+                                       kvm_run, 0, 0, 0) == EMULATE_DO_MMIO)
3602
 
                        return 0;
3603
 
                return 1;
3604
 
        }
3605
 
@@ -1045,13 +1015,14 @@ static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
3606
 
 {
3607
 
        svm->next_rip = svm->vmcb->save.rip + 3;
3608
 
        skip_emulated_instruction(&svm->vcpu);
3609
 
-       return kvm_hypercall(&svm->vcpu, kvm_run);
3610
 
+       kvm_emulate_hypercall(&svm->vcpu);
3611
 
+       return 1;
3612
 
 }
3613
 
 
3614
 
 static int invalid_op_interception(struct vcpu_svm *svm,
3615
 
                                   struct kvm_run *kvm_run)
3616
 
 {
3617
 
-       inject_ud(&svm->vcpu);
3618
 
+       kvm_queue_exception(&svm->vcpu, UD_VECTOR);
3619
 
        return 1;
3620
 
 }
3621
 
 
3622
 
@@ -1073,11 +1044,20 @@ static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
3623
 
 static int emulate_on_interception(struct vcpu_svm *svm,
3624
 
                                   struct kvm_run *kvm_run)
3625
 
 {
3626
 
-       if (emulate_instruction(&svm->vcpu, NULL, 0, 0) != EMULATE_DONE)
3627
 
+       if (emulate_instruction(&svm->vcpu, NULL, 0, 0, 0) != EMULATE_DONE)
3628
 
                pr_unimpl(&svm->vcpu, "%s: failed\n", __FUNCTION__);
3629
 
        return 1;
3630
 
 }
3631
 
 
3632
 
+static int cr8_write_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
3633
 
+{
3634
 
+       emulate_instruction(&svm->vcpu, NULL, 0, 0, 0);
3635
 
+       if (irqchip_in_kernel(svm->vcpu.kvm))
3636
 
+               return 1;
3637
 
+       kvm_run->exit_reason = KVM_EXIT_SET_TPR;
3638
 
+       return 0;
3639
 
+}
3640
 
+
3641
 
 static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
3642
 
 {
3643
 
        struct vcpu_svm *svm = to_svm(vcpu);
3644
 
@@ -1124,14 +1104,14 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
3645
 
 
3646
 
 static int rdmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
3647
 
 {
3648
 
-       u32 ecx = svm->vcpu.regs[VCPU_REGS_RCX];
3649
 
+       u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
3650
 
        u64 data;
3651
 
 
3652
 
        if (svm_get_msr(&svm->vcpu, ecx, &data))
3653
 
-               svm_inject_gp(&svm->vcpu, 0);
3654
 
+               kvm_inject_gp(&svm->vcpu, 0);
3655
 
        else {
3656
 
                svm->vmcb->save.rax = data & 0xffffffff;
3657
 
-               svm->vcpu.regs[VCPU_REGS_RDX] = data >> 32;
3658
 
+               svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32;
3659
 
                svm->next_rip = svm->vmcb->save.rip + 2;
3660
 
                skip_emulated_instruction(&svm->vcpu);
3661
 
        }
3662
 
@@ -1176,7 +1156,20 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
3663
 
        case MSR_IA32_SYSENTER_ESP:
3664
 
                svm->vmcb->save.sysenter_esp = data;
3665
 
                break;
3666
 
+       case MSR_K7_EVNTSEL0:
3667
 
+       case MSR_K7_EVNTSEL1:
3668
 
+       case MSR_K7_EVNTSEL2:
3669
 
+       case MSR_K7_EVNTSEL3:
3670
 
+               /*
3671
 
+                * only support writing 0 to the performance counters for now
3672
 
+                * to make Windows happy. Should be replaced by a real
3673
 
+                * performance counter emulation later.
3674
 
+                */
3675
 
+               if (data != 0)
3676
 
+                       goto unhandled;
3677
 
+               break;
3678
 
        default:
3679
 
+       unhandled:
3680
 
                return kvm_set_msr_common(vcpu, ecx, data);
3681
 
        }
3682
 
        return 0;
3683
 
@@ -1184,12 +1177,12 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
3684
 
 
3685
 
 static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
3686
 
 {
3687
 
-       u32 ecx = svm->vcpu.regs[VCPU_REGS_RCX];
3688
 
+       u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
3689
 
        u64 data = (svm->vmcb->save.rax & -1u)
3690
 
-               | ((u64)(svm->vcpu.regs[VCPU_REGS_RDX] & -1u) << 32);
3691
 
+               | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32);
3692
 
        svm->next_rip = svm->vmcb->save.rip + 2;
3693
 
        if (svm_set_msr(&svm->vcpu, ecx, data))
3694
 
-               svm_inject_gp(&svm->vcpu, 0);
3695
 
+               kvm_inject_gp(&svm->vcpu, 0);
3696
 
        else
3697
 
                skip_emulated_instruction(&svm->vcpu);
3698
 
        return 1;
3699
 
@@ -1213,7 +1206,7 @@ static int interrupt_window_interception(struct vcpu_svm *svm,
3700
 
         * possible
3701
 
         */
3702
 
        if (kvm_run->request_interrupt_window &&
3703
 
-           !svm->vcpu.irq_summary) {
3704
 
+           !svm->vcpu.arch.irq_summary) {
3705
 
                ++svm->vcpu.stat.irq_window_exits;
3706
 
                kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
3707
 
                return 0;
3708
 
@@ -1227,10 +1220,12 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
3709
 
        [SVM_EXIT_READ_CR0]                     = emulate_on_interception,
3710
 
        [SVM_EXIT_READ_CR3]                     = emulate_on_interception,
3711
 
        [SVM_EXIT_READ_CR4]                     = emulate_on_interception,
3712
 
+       [SVM_EXIT_READ_CR8]                     = emulate_on_interception,
3713
 
        /* for now: */
3714
 
        [SVM_EXIT_WRITE_CR0]                    = emulate_on_interception,
3715
 
        [SVM_EXIT_WRITE_CR3]                    = emulate_on_interception,
3716
 
        [SVM_EXIT_WRITE_CR4]                    = emulate_on_interception,
3717
 
+       [SVM_EXIT_WRITE_CR8]                    = cr8_write_interception,
3718
 
        [SVM_EXIT_READ_DR0]                     = emulate_on_interception,
3719
 
        [SVM_EXIT_READ_DR1]                     = emulate_on_interception,
3720
 
        [SVM_EXIT_READ_DR2]                     = emulate_on_interception,
3721
 
@@ -1241,6 +1236,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
3722
 
        [SVM_EXIT_WRITE_DR3]                    = emulate_on_interception,
3723
 
        [SVM_EXIT_WRITE_DR5]                    = emulate_on_interception,
3724
 
        [SVM_EXIT_WRITE_DR7]                    = emulate_on_interception,
3725
 
+       [SVM_EXIT_EXCP_BASE + UD_VECTOR]        = ud_interception,
3726
 
        [SVM_EXIT_EXCP_BASE + PF_VECTOR]        = pf_interception,
3727
 
        [SVM_EXIT_EXCP_BASE + NM_VECTOR]        = nm_interception,
3728
 
        [SVM_EXIT_INTR]                         = nop_on_interception,
3729
 
@@ -1293,7 +1289,7 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
3730
 
                       exit_code);
3731
 
 
3732
 
        if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
3733
 
-           || svm_exit_handlers[exit_code] == 0) {
3734
 
+           || !svm_exit_handlers[exit_code]) {
3735
 
                kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
3736
 
                kvm_run->hw.hardware_exit_reason = exit_code;
3737
 
                return 0;
3738
 
@@ -1307,7 +1303,7 @@ static void reload_tss(struct kvm_vcpu *vcpu)
3739
 
        int cpu = raw_smp_processor_id();
3740
 
 
3741
 
        struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu);
3742
 
-       svm_data->tss_desc->type = 9; //available 32/64-bit TSS
3743
 
+       svm_data->tss_desc->type = 9; /* available 32/64-bit TSS */
3744
 
        load_TR_desc();
3745
 
 }
3746
 
 
3747
 
@@ -1348,7 +1344,6 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu)
3748
 
        struct vmcb *vmcb = svm->vmcb;
3749
 
        int intr_vector = -1;
3750
 
 
3751
 
-       kvm_inject_pending_timer_irqs(vcpu);
3752
 
        if ((vmcb->control.exit_int_info & SVM_EVTINJ_VALID) &&
3753
 
            ((vmcb->control.exit_int_info & SVM_EVTINJ_TYPE_MASK) == 0)) {
3754
 
                intr_vector = vmcb->control.exit_int_info &
3755
 
@@ -1388,20 +1383,20 @@ static void kvm_reput_irq(struct vcpu_svm *svm)
3756
 
                push_irq(&svm->vcpu, control->int_vector);
3757
 
        }
3758
 
 
3759
 
-       svm->vcpu.interrupt_window_open =
3760
 
+       svm->vcpu.arch.interrupt_window_open =
3761
 
                !(control->int_state & SVM_INTERRUPT_SHADOW_MASK);
3762
 
 }
3763
 
 
3764
 
 static void svm_do_inject_vector(struct vcpu_svm *svm)
3765
 
 {
3766
 
        struct kvm_vcpu *vcpu = &svm->vcpu;
3767
 
-       int word_index = __ffs(vcpu->irq_summary);
3768
 
-       int bit_index = __ffs(vcpu->irq_pending[word_index]);
3769
 
+       int word_index = __ffs(vcpu->arch.irq_summary);
3770
 
+       int bit_index = __ffs(vcpu->arch.irq_pending[word_index]);
3771
 
        int irq = word_index * BITS_PER_LONG + bit_index;
3772
 
 
3773
 
-       clear_bit(bit_index, &vcpu->irq_pending[word_index]);
3774
 
-       if (!vcpu->irq_pending[word_index])
3775
 
-               clear_bit(word_index, &vcpu->irq_summary);
3776
 
+       clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]);
3777
 
+       if (!vcpu->arch.irq_pending[word_index])
3778
 
+               clear_bit(word_index, &vcpu->arch.irq_summary);
3779
 
        svm_inject_irq(svm, irq);
3780
 
 }
3781
 
 
3782
 
@@ -1411,11 +1406,11 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
3783
 
        struct vcpu_svm *svm = to_svm(vcpu);
3784
 
        struct vmcb_control_area *control = &svm->vmcb->control;
3785
 
 
3786
 
-       svm->vcpu.interrupt_window_open =
3787
 
+       svm->vcpu.arch.interrupt_window_open =
3788
 
                (!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
3789
 
                 (svm->vmcb->save.rflags & X86_EFLAGS_IF));
3790
 
 
3791
 
-       if (svm->vcpu.interrupt_window_open && svm->vcpu.irq_summary)
3792
 
+       if (svm->vcpu.arch.interrupt_window_open && svm->vcpu.arch.irq_summary)
3793
 
                /*
3794
 
                 * If interrupts enabled, and not blocked by sti or mov ss. Good.
3795
 
                 */
3796
 
@@ -1424,13 +1419,18 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
3797
 
        /*
3798
 
         * Interrupts blocked.  Wait for unblock.
3799
 
         */
3800
 
-       if (!svm->vcpu.interrupt_window_open &&
3801
 
-           (svm->vcpu.irq_summary || kvm_run->request_interrupt_window)) {
3802
 
+       if (!svm->vcpu.arch.interrupt_window_open &&
3803
 
+           (svm->vcpu.arch.irq_summary || kvm_run->request_interrupt_window))
3804
 
                control->intercept |= 1ULL << INTERCEPT_VINTR;
3805
 
-       } else
3806
 
+        else
3807
 
                control->intercept &= ~(1ULL << INTERCEPT_VINTR);
3808
 
 }
3809
 
 
3810
 
+static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
3811
 
+{
3812
 
+       return 0;
3813
 
+}
3814
 
+
3815
 
 static void save_db_regs(unsigned long *db_regs)
3816
 
 {
3817
 
        asm volatile ("mov %%dr0, %0" : "=r"(db_regs[0]));
3818
 
@@ -1472,7 +1472,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3819
 
        svm->host_cr2 = kvm_read_cr2();
3820
 
        svm->host_dr6 = read_dr6();
3821
 
        svm->host_dr7 = read_dr7();
3822
 
-       svm->vmcb->save.cr2 = vcpu->cr2;
3823
 
+       svm->vmcb->save.cr2 = vcpu->arch.cr2;
3824
 
 
3825
 
        if (svm->vmcb->save.dr7 & 0xff) {
3826
 
                write_dr7(0);
3827
 
@@ -1486,13 +1486,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3828
 
 
3829
 
        asm volatile (
3830
 
 #ifdef CONFIG_X86_64
3831
 
-               "push %%rbx; push %%rcx; push %%rdx;"
3832
 
-               "push %%rsi; push %%rdi; push %%rbp;"
3833
 
-               "push %%r8;  push %%r9;  push %%r10; push %%r11;"
3834
 
-               "push %%r12; push %%r13; push %%r14; push %%r15;"
3835
 
+               "push %%rbp; \n\t"
3836
 
 #else
3837
 
-               "push %%ebx; push %%ecx; push %%edx;"
3838
 
-               "push %%esi; push %%edi; push %%ebp;"
3839
 
+               "push %%ebp; \n\t"
3840
 
 #endif
3841
 
 
3842
 
 #ifdef CONFIG_X86_64
3843
 
@@ -1554,10 +1550,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3844
 
                "mov %%r14, %c[r14](%[svm]) \n\t"
3845
 
                "mov %%r15, %c[r15](%[svm]) \n\t"
3846
 
 
3847
 
-               "pop  %%r15; pop  %%r14; pop  %%r13; pop  %%r12;"
3848
 
-               "pop  %%r11; pop  %%r10; pop  %%r9;  pop  %%r8;"
3849
 
-               "pop  %%rbp; pop  %%rdi; pop  %%rsi;"
3850
 
-               "pop  %%rdx; pop  %%rcx; pop  %%rbx; \n\t"
3851
 
+               "pop  %%rbp; \n\t"
3852
 
 #else
3853
 
                "mov %%ebx, %c[rbx](%[svm]) \n\t"
3854
 
                "mov %%ecx, %c[rcx](%[svm]) \n\t"
3855
 
@@ -1566,34 +1559,40 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3856
 
                "mov %%edi, %c[rdi](%[svm]) \n\t"
3857
 
                "mov %%ebp, %c[rbp](%[svm]) \n\t"
3858
 
 
3859
 
-               "pop  %%ebp; pop  %%edi; pop  %%esi;"
3860
 
-               "pop  %%edx; pop  %%ecx; pop  %%ebx; \n\t"
3861
 
+               "pop  %%ebp; \n\t"
3862
 
 #endif
3863
 
                :
3864
 
                : [svm]"a"(svm),
3865
 
                  [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
3866
 
-                 [rbx]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RBX])),
3867
 
-                 [rcx]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RCX])),
3868
 
-                 [rdx]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RDX])),
3869
 
-                 [rsi]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RSI])),
3870
 
-                 [rdi]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RDI])),
3871
 
-                 [rbp]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RBP]))
3872
 
+                 [rbx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBX])),
3873
 
+                 [rcx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RCX])),
3874
 
+                 [rdx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDX])),
3875
 
+                 [rsi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RSI])),
3876
 
+                 [rdi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDI])),
3877
 
+                 [rbp]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBP]))
3878
 
+#ifdef CONFIG_X86_64
3879
 
+                 , [r8]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R8])),
3880
 
+                 [r9]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R9])),
3881
 
+                 [r10]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R10])),
3882
 
+                 [r11]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R11])),
3883
 
+                 [r12]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R12])),
3884
 
+                 [r13]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R13])),
3885
 
+                 [r14]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R14])),
3886
 
+                 [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15]))
3887
 
+#endif
3888
 
+               : "cc", "memory"
3889
 
 #ifdef CONFIG_X86_64
3890
 
-                 ,[r8 ]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R8])),
3891
 
-                 [r9 ]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R9 ])),
3892
 
-                 [r10]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R10])),
3893
 
-                 [r11]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R11])),
3894
 
-                 [r12]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R12])),
3895
 
-                 [r13]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R13])),
3896
 
-                 [r14]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R14])),
3897
 
-                 [r15]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R15]))
3898
 
+               , "rbx", "rcx", "rdx", "rsi", "rdi"
3899
 
+               , "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
3900
 
+#else
3901
 
+               , "ebx", "ecx", "edx" , "esi", "edi"
3902
 
 #endif
3903
 
-               : "cc", "memory" );
3904
 
+               );
3905
 
 
3906
 
        if ((svm->vmcb->save.dr7 & 0xff))
3907
 
                load_db_regs(svm->host_db_regs);
3908
 
 
3909
 
-       vcpu->cr2 = svm->vmcb->save.cr2;
3910
 
+       vcpu->arch.cr2 = svm->vmcb->save.cr2;
3911
 
 
3912
 
        write_dr6(svm->host_dr6);
3913
 
        write_dr7(svm->host_dr7);
3914
 
@@ -1627,34 +1626,6 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
3915
 
        }
3916
 
 }
3917
 
 
3918
 
-static void svm_inject_page_fault(struct kvm_vcpu *vcpu,
3919
 
-                                 unsigned long  addr,
3920
 
-                                 uint32_t err_code)
3921
 
-{
3922
 
-       struct vcpu_svm *svm = to_svm(vcpu);
3923
 
-       uint32_t exit_int_info = svm->vmcb->control.exit_int_info;
3924
 
-
3925
 
-       ++vcpu->stat.pf_guest;
3926
 
-
3927
 
-       if (is_page_fault(exit_int_info)) {
3928
 
-
3929
 
-               svm->vmcb->control.event_inj_err = 0;
3930
 
-               svm->vmcb->control.event_inj =  SVM_EVTINJ_VALID |
3931
 
-                                               SVM_EVTINJ_VALID_ERR |
3932
 
-                                               SVM_EVTINJ_TYPE_EXEPT |
3933
 
-                                               DF_VECTOR;
3934
 
-               return;
3935
 
-       }
3936
 
-       vcpu->cr2 = addr;
3937
 
-       svm->vmcb->save.cr2 = addr;
3938
 
-       svm->vmcb->control.event_inj =  SVM_EVTINJ_VALID |
3939
 
-                                       SVM_EVTINJ_VALID_ERR |
3940
 
-                                       SVM_EVTINJ_TYPE_EXEPT |
3941
 
-                                       PF_VECTOR;
3942
 
-       svm->vmcb->control.event_inj_err = err_code;
3943
 
-}
3944
 
-
3945
 
-
3946
 
 static int is_disabled(void)
3947
 
 {
3948
 
        u64 vm_cr;
3949
 
@@ -1675,7 +1646,6 @@ svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
3950
 
        hypercall[0] = 0x0f;
3951
 
        hypercall[1] = 0x01;
3952
 
        hypercall[2] = 0xd9;
3953
 
-       hypercall[3] = 0xc3;
3954
 
 }
3955
 
 
3956
 
 static void svm_check_processor_compat(void *rtn)
3957
 
@@ -1725,9 +1695,6 @@ static struct kvm_x86_ops svm_x86_ops = {
3958
 
        .set_rflags = svm_set_rflags,
3959
 
 
3960
 
        .tlb_flush = svm_flush_tlb,
3961
 
-       .inject_page_fault = svm_inject_page_fault,
3962
 
-
3963
 
-       .inject_gp = svm_inject_gp,
3964
 
 
3965
 
        .run = svm_vcpu_run,
3966
 
        .handle_exit = handle_exit,
3967
 
@@ -1735,19 +1702,23 @@ static struct kvm_x86_ops svm_x86_ops = {
3968
 
        .patch_hypercall = svm_patch_hypercall,
3969
 
        .get_irq = svm_get_irq,
3970
 
        .set_irq = svm_set_irq,
3971
 
+       .queue_exception = svm_queue_exception,
3972
 
+       .exception_injected = svm_exception_injected,
3973
 
        .inject_pending_irq = svm_intr_assist,
3974
 
        .inject_pending_vectors = do_interrupt_requests,
3975
 
+
3976
 
+       .set_tss_addr = svm_set_tss_addr,
3977
 
 };
3978
 
 
3979
 
 static int __init svm_init(void)
3980
 
 {
3981
 
-       return kvm_init_x86(&svm_x86_ops, sizeof(struct vcpu_svm),
3982
 
+       return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm),
3983
 
                              THIS_MODULE);
3984
 
 }
3985
 
 
3986
 
 static void __exit svm_exit(void)
3987
 
 {
3988
 
-       kvm_exit_x86();
3989
 
+       kvm_exit();
3990
 
 }
3991
 
 
3992
 
 module_init(svm_init)
3993
 
diff --git a/drivers/kvm/svm.h b/arch/x86/kvm/svm.h
3994
 
similarity index 98%
3995
 
rename from drivers/kvm/svm.h
3996
 
rename to arch/x86/kvm/svm.h
3997
 
index 3b1b0f3..5fd5049 100644
3998
 
--- a/drivers/kvm/svm.h
3999
 
+++ b/arch/x86/kvm/svm.h
4000
 
@@ -204,6 +204,7 @@ struct __attribute__ ((__packed__)) vmcb {
4001
 
 #define INTERCEPT_CR0_MASK 1
4002
 
 #define INTERCEPT_CR3_MASK (1 << 3)
4003
 
 #define INTERCEPT_CR4_MASK (1 << 4)
4004
 
+#define INTERCEPT_CR8_MASK (1 << 8)
4005
 
 
4006
 
 #define INTERCEPT_DR0_MASK 1
4007
 
 #define INTERCEPT_DR1_MASK (1 << 1)
4008
 
@@ -311,7 +312,7 @@ struct __attribute__ ((__packed__)) vmcb {
4009
 
 
4010
 
 #define SVM_EXIT_ERR           -1
4011
 
 
4012
 
-#define SVM_CR0_SELECTIVE_MASK (1 << 3 | 1) // TS and MP
4013
 
+#define SVM_CR0_SELECTIVE_MASK (1 << 3 | 1) /* TS and MP */
4014
 
 
4015
 
 #define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda"
4016
 
 #define SVM_VMRUN  ".byte 0x0f, 0x01, 0xd8"
4017
 
diff --git a/drivers/kvm/vmx.c b/arch/x86/kvm/vmx.c
4018
 
similarity index 75%
4019
 
rename from drivers/kvm/vmx.c
4020
 
rename to arch/x86/kvm/vmx.c
4021
 
index bb56ae3..20c0f5e 100644
4022
 
--- a/drivers/kvm/vmx.c
4023
 
+++ b/arch/x86/kvm/vmx.c
4024
 
@@ -15,17 +15,18 @@
4025
 
  *
4026
 
  */
4027
 
 
4028
 
-#include "kvm.h"
4029
 
-#include "x86_emulate.h"
4030
 
 #include "irq.h"
4031
 
 #include "vmx.h"
4032
 
 #include "segment_descriptor.h"
4033
 
+#include "mmu.h"
4034
 
 
4035
 
+#include <linux/kvm_host.h>
4036
 
 #include <linux/module.h>
4037
 
 #include <linux/kernel.h>
4038
 
 #include <linux/mm.h>
4039
 
 #include <linux/highmem.h>
4040
 
 #include <linux/sched.h>
4041
 
+#include <linux/moduleparam.h>
4042
 
 
4043
 
 #include <asm/io.h>
4044
 
 #include <asm/desc.h>
4045
 
@@ -33,6 +34,9 @@
4046
 
 MODULE_AUTHOR("Qumranet");
4047
 
 MODULE_LICENSE("GPL");
4048
 
 
4049
 
+static int bypass_guest_pf = 1;
4050
 
+module_param(bypass_guest_pf, bool, 0);
4051
 
+
4052
 
 struct vmcs {
4053
 
        u32 revision_id;
4054
 
        u32 abort;
4055
 
@@ -43,6 +47,7 @@ struct vcpu_vmx {
4056
 
        struct kvm_vcpu       vcpu;
4057
 
        int                   launched;
4058
 
        u8                    fail;
4059
 
+       u32                   idt_vectoring_info;
4060
 
        struct kvm_msr_entry *guest_msrs;
4061
 
        struct kvm_msr_entry *host_msrs;
4062
 
        int                   nmsrs;
4063
 
@@ -57,8 +62,15 @@ struct vcpu_vmx {
4064
 
                u16           fs_sel, gs_sel, ldt_sel;
4065
 
                int           gs_ldt_reload_needed;
4066
 
                int           fs_reload_needed;
4067
 
-       }host_state;
4068
 
-
4069
 
+               int           guest_efer_loaded;
4070
 
+       } host_state;
4071
 
+       struct {
4072
 
+               struct {
4073
 
+                       bool pending;
4074
 
+                       u8 vector;
4075
 
+                       unsigned rip;
4076
 
+               } irq;
4077
 
+       } rmode;
4078
 
 };
4079
 
 
4080
 
 static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
4081
 
@@ -74,14 +86,13 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
4082
 
 static struct page *vmx_io_bitmap_a;
4083
 
 static struct page *vmx_io_bitmap_b;
4084
 
 
4085
 
-#define EFER_SAVE_RESTORE_BITS ((u64)EFER_SCE)
4086
 
-
4087
 
 static struct vmcs_config {
4088
 
        int size;
4089
 
        int order;
4090
 
        u32 revision_id;
4091
 
        u32 pin_based_exec_ctrl;
4092
 
        u32 cpu_based_exec_ctrl;
4093
 
+       u32 cpu_based_2nd_exec_ctrl;
4094
 
        u32 vmexit_ctrl;
4095
 
        u32 vmentry_ctrl;
4096
 
 } vmcs_config;
4097
 
@@ -138,18 +149,6 @@ static void save_msrs(struct kvm_msr_entry *e, int n)
4098
 
                rdmsrl(e[i].index, e[i].data);
4099
 
 }
4100
 
 
4101
 
-static inline u64 msr_efer_save_restore_bits(struct kvm_msr_entry msr)
4102
 
-{
4103
 
-       return (u64)msr.data & EFER_SAVE_RESTORE_BITS;
4104
 
-}
4105
 
-
4106
 
-static inline int msr_efer_need_save_restore(struct vcpu_vmx *vmx)
4107
 
-{
4108
 
-       int efer_offset = vmx->msr_offset_efer;
4109
 
-       return msr_efer_save_restore_bits(vmx->host_msrs[efer_offset]) !=
4110
 
-               msr_efer_save_restore_bits(vmx->guest_msrs[efer_offset]);
4111
 
-}
4112
 
-
4113
 
 static inline int is_page_fault(u32 intr_info)
4114
 
 {
4115
 
        return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
4116
 
@@ -164,6 +163,13 @@ static inline int is_no_device(u32 intr_info)
4117
 
                (INTR_TYPE_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK);
4118
 
 }
4119
 
 
4120
 
+static inline int is_invalid_opcode(u32 intr_info)
4121
 
+{
4122
 
+       return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
4123
 
+                            INTR_INFO_VALID_MASK)) ==
4124
 
+               (INTR_TYPE_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK);
4125
 
+}
4126
 
+
4127
 
 static inline int is_external_interrupt(u32 intr_info)
4128
 
 {
4129
 
        return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
4130
 
@@ -180,6 +186,24 @@ static inline int vm_need_tpr_shadow(struct kvm *kvm)
4131
 
        return ((cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm)));
4132
 
 }
4133
 
 
4134
 
+static inline int cpu_has_secondary_exec_ctrls(void)
4135
 
+{
4136
 
+       return (vmcs_config.cpu_based_exec_ctrl &
4137
 
+               CPU_BASED_ACTIVATE_SECONDARY_CONTROLS);
4138
 
+}
4139
 
+
4140
 
+static inline int cpu_has_vmx_virtualize_apic_accesses(void)
4141
 
+{
4142
 
+       return (vmcs_config.cpu_based_2nd_exec_ctrl &
4143
 
+               SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
4144
 
+}
4145
 
+
4146
 
+static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
4147
 
+{
4148
 
+       return ((cpu_has_vmx_virtualize_apic_accesses()) &&
4149
 
+               (irqchip_in_kernel(kvm)));
4150
 
+}
4151
 
+
4152
 
 static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
4153
 
 {
4154
 
        int i;
4155
 
@@ -222,16 +246,14 @@ static void __vcpu_clear(void *arg)
4156
 
                vmcs_clear(vmx->vmcs);
4157
 
        if (per_cpu(current_vmcs, cpu) == vmx->vmcs)
4158
 
                per_cpu(current_vmcs, cpu) = NULL;
4159
 
-       rdtscll(vmx->vcpu.host_tsc);
4160
 
+       rdtscll(vmx->vcpu.arch.host_tsc);
4161
 
 }
4162
 
 
4163
 
 static void vcpu_clear(struct vcpu_vmx *vmx)
4164
 
 {
4165
 
-       if (vmx->vcpu.cpu != raw_smp_processor_id() && vmx->vcpu.cpu != -1)
4166
 
-               smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear,
4167
 
-                                        vmx, 0, 1);
4168
 
-       else
4169
 
-               __vcpu_clear(vmx);
4170
 
+       if (vmx->vcpu.cpu == -1)
4171
 
+               return;
4172
 
+       smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 0, 1);
4173
 
        vmx->launched = 0;
4174
 
 }
4175
 
 
4176
 
@@ -275,7 +297,7 @@ static void vmcs_writel(unsigned long field, unsigned long value)
4177
 
        u8 error;
4178
 
 
4179
 
        asm volatile (ASM_VMX_VMWRITE_RAX_RDX "; setna %0"
4180
 
-                      : "=q"(error) : "a"(value), "d"(field) : "cc" );
4181
 
+                      : "=q"(error) : "a"(value), "d"(field) : "cc");
4182
 
        if (unlikely(error))
4183
 
                vmwrite_error(field, value);
4184
 
 }
4185
 
@@ -315,12 +337,12 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
4186
 
 {
4187
 
        u32 eb;
4188
 
 
4189
 
-       eb = 1u << PF_VECTOR;
4190
 
+       eb = (1u << PF_VECTOR) | (1u << UD_VECTOR);
4191
 
        if (!vcpu->fpu_active)
4192
 
                eb |= 1u << NM_VECTOR;
4193
 
        if (vcpu->guest_debug.enabled)
4194
 
                eb |= 1u << 1;
4195
 
-       if (vcpu->rmode.active)
4196
 
+       if (vcpu->arch.rmode.active)
4197
 
                eb = ~0;
4198
 
        vmcs_write32(EXCEPTION_BITMAP, eb);
4199
 
 }
4200
 
@@ -344,16 +366,42 @@ static void reload_tss(void)
4201
 
 
4202
 
 static void load_transition_efer(struct vcpu_vmx *vmx)
4203
 
 {
4204
 
-       u64 trans_efer;
4205
 
        int efer_offset = vmx->msr_offset_efer;
4206
 
+       u64 host_efer = vmx->host_msrs[efer_offset].data;
4207
 
+       u64 guest_efer = vmx->guest_msrs[efer_offset].data;
4208
 
+       u64 ignore_bits;
4209
 
 
4210
 
-       trans_efer = vmx->host_msrs[efer_offset].data;
4211
 
-       trans_efer &= ~EFER_SAVE_RESTORE_BITS;
4212
 
-       trans_efer |= msr_efer_save_restore_bits(vmx->guest_msrs[efer_offset]);
4213
 
-       wrmsrl(MSR_EFER, trans_efer);
4214
 
+       if (efer_offset < 0)
4215
 
+               return;
4216
 
+       /*
4217
 
+        * NX is emulated; LMA and LME handled by hardware; SCE meaninless
4218
 
+        * outside long mode
4219
 
+        */
4220
 
+       ignore_bits = EFER_NX | EFER_SCE;
4221
 
+#ifdef CONFIG_X86_64
4222
 
+       ignore_bits |= EFER_LMA | EFER_LME;
4223
 
+       /* SCE is meaningful only in long mode on Intel */
4224
 
+       if (guest_efer & EFER_LMA)
4225
 
+               ignore_bits &= ~(u64)EFER_SCE;
4226
 
+#endif
4227
 
+       if ((guest_efer & ~ignore_bits) == (host_efer & ~ignore_bits))
4228
 
+               return;
4229
 
+
4230
 
+       vmx->host_state.guest_efer_loaded = 1;
4231
 
+       guest_efer &= ~ignore_bits;
4232
 
+       guest_efer |= host_efer & ignore_bits;
4233
 
+       wrmsrl(MSR_EFER, guest_efer);
4234
 
        vmx->vcpu.stat.efer_reload++;
4235
 
 }
4236
 
 
4237
 
+static void reload_host_efer(struct vcpu_vmx *vmx)
4238
 
+{
4239
 
+       if (vmx->host_state.guest_efer_loaded) {
4240
 
+               vmx->host_state.guest_efer_loaded = 0;
4241
 
+               load_msrs(vmx->host_msrs + vmx->msr_offset_efer, 1);
4242
 
+       }
4243
 
+}
4244
 
+
4245
 
 static void vmx_save_host_state(struct kvm_vcpu *vcpu)
4246
 
 {
4247
 
        struct vcpu_vmx *vmx = to_vmx(vcpu);
4248
 
@@ -393,14 +441,13 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
4249
 
 #endif
4250
 
 
4251
 
 #ifdef CONFIG_X86_64
4252
 
-       if (is_long_mode(&vmx->vcpu)) {
4253
 
+       if (is_long_mode(&vmx->vcpu))
4254
 
                save_msrs(vmx->host_msrs +
4255
 
                          vmx->msr_offset_kernel_gs_base, 1);
4256
 
-       }
4257
 
+
4258
 
 #endif
4259
 
        load_msrs(vmx->guest_msrs, vmx->save_nmsrs);
4260
 
-       if (msr_efer_need_save_restore(vmx))
4261
 
-               load_transition_efer(vmx);
4262
 
+       load_transition_efer(vmx);
4263
 
 }
4264
 
 
4265
 
 static void vmx_load_host_state(struct vcpu_vmx *vmx)
4266
 
@@ -410,6 +457,7 @@ static void vmx_load_host_state(struct vcpu_vmx *vmx)
4267
 
        if (!vmx->host_state.loaded)
4268
 
                return;
4269
 
 
4270
 
+       ++vmx->vcpu.stat.host_state_reload;
4271
 
        vmx->host_state.loaded = 0;
4272
 
        if (vmx->host_state.fs_reload_needed)
4273
 
                load_fs(vmx->host_state.fs_sel);
4274
 
@@ -429,8 +477,7 @@ static void vmx_load_host_state(struct vcpu_vmx *vmx)
4275
 
        reload_tss();
4276
 
        save_msrs(vmx->guest_msrs, vmx->save_nmsrs);
4277
 
        load_msrs(vmx->host_msrs, vmx->save_nmsrs);
4278
 
-       if (msr_efer_need_save_restore(vmx))
4279
 
-               load_msrs(vmx->host_msrs + vmx->msr_offset_efer, 1);
4280
 
+       reload_host_efer(vmx);
4281
 
 }
4282
 
 
4283
 
 /*
4284
 
@@ -480,7 +527,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
4285
 
                 * Make sure the time stamp counter is monotonous.
4286
 
                 */
4287
 
                rdtscll(tsc_this);
4288
 
-               delta = vcpu->host_tsc - tsc_this;
4289
 
+               delta = vcpu->arch.host_tsc - tsc_this;
4290
 
                vmcs_write64(TSC_OFFSET, vmcs_read64(TSC_OFFSET) + delta);
4291
 
        }
4292
 
 }
4293
 
@@ -488,7 +535,6 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
4294
 
 static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
4295
 
 {
4296
 
        vmx_load_host_state(to_vmx(vcpu));
4297
 
-       kvm_put_guest_fpu(vcpu);
4298
 
 }
4299
 
 
4300
 
 static void vmx_fpu_activate(struct kvm_vcpu *vcpu)
4301
 
@@ -497,7 +543,7 @@ static void vmx_fpu_activate(struct kvm_vcpu *vcpu)
4302
 
                return;
4303
 
        vcpu->fpu_active = 1;
4304
 
        vmcs_clear_bits(GUEST_CR0, X86_CR0_TS);
4305
 
-       if (vcpu->cr0 & X86_CR0_TS)
4306
 
+       if (vcpu->arch.cr0 & X86_CR0_TS)
4307
 
                vmcs_set_bits(GUEST_CR0, X86_CR0_TS);
4308
 
        update_exception_bitmap(vcpu);
4309
 
 }
4310
 
@@ -523,7 +569,7 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
4311
 
 
4312
 
 static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
4313
 
 {
4314
 
-       if (vcpu->rmode.active)
4315
 
+       if (vcpu->arch.rmode.active)
4316
 
                rflags |= IOPL_MASK | X86_EFLAGS_VM;
4317
 
        vmcs_writel(GUEST_RFLAGS, rflags);
4318
 
 }
4319
 
@@ -545,19 +591,25 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
4320
 
        if (interruptibility & 3)
4321
 
                vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
4322
 
                             interruptibility & ~3);
4323
 
-       vcpu->interrupt_window_open = 1;
4324
 
+       vcpu->arch.interrupt_window_open = 1;
4325
 
 }
4326
 
 
4327
 
-static void vmx_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code)
4328
 
+static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
4329
 
+                               bool has_error_code, u32 error_code)
4330
 
 {
4331
 
-       printk(KERN_DEBUG "inject_general_protection: rip 0x%lx\n",
4332
 
-              vmcs_readl(GUEST_RIP));
4333
 
-       vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
4334
 
        vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
4335
 
-                    GP_VECTOR |
4336
 
-                    INTR_TYPE_EXCEPTION |
4337
 
-                    INTR_INFO_DELIEVER_CODE_MASK |
4338
 
-                    INTR_INFO_VALID_MASK);
4339
 
+                    nr | INTR_TYPE_EXCEPTION
4340
 
+                    | (has_error_code ? INTR_INFO_DELIEVER_CODE_MASK : 0)
4341
 
+                    | INTR_INFO_VALID_MASK);
4342
 
+       if (has_error_code)
4343
 
+               vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
4344
 
+}
4345
 
+
4346
 
+static bool vmx_exception_injected(struct kvm_vcpu *vcpu)
4347
 
+{
4348
 
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
4349
 
+
4350
 
+       return !(vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK);
4351
 
 }
4352
 
 
4353
 
 /*
4354
 
@@ -608,7 +660,7 @@ static void setup_msrs(struct vcpu_vmx *vmx)
4355
 
                 * if efer.sce is enabled.
4356
 
                 */
4357
 
                index = __find_msr_index(vmx, MSR_K6_STAR);
4358
 
-               if ((index >= 0) && (vmx->vcpu.shadow_efer & EFER_SCE))
4359
 
+               if ((index >= 0) && (vmx->vcpu.arch.shadow_efer & EFER_SCE))
4360
 
                        move_msr_up(vmx, index, save_nmsrs++);
4361
 
        }
4362
 
 #endif
4363
 
@@ -712,8 +764,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
4364
 
 #ifdef CONFIG_X86_64
4365
 
        case MSR_EFER:
4366
 
                ret = kvm_set_msr_common(vcpu, msr_index, data);
4367
 
-               if (vmx->host_state.loaded)
4368
 
+               if (vmx->host_state.loaded) {
4369
 
+                       reload_host_efer(vmx);
4370
 
                        load_transition_efer(vmx);
4371
 
+               }
4372
 
                break;
4373
 
        case MSR_FS_BASE:
4374
 
                vmcs_writel(GUEST_FS_BASE, data);
4375
 
@@ -750,12 +804,12 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
4376
 
 
4377
 
 /*
4378
 
  * Sync the rsp and rip registers into the vcpu structure.  This allows
4379
 
- * registers to be accessed by indexing vcpu->regs.
4380
 
+ * registers to be accessed by indexing vcpu->arch.regs.
4381
 
  */
4382
 
 static void vcpu_load_rsp_rip(struct kvm_vcpu *vcpu)
4383
 
 {
4384
 
-       vcpu->regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP);
4385
 
-       vcpu->rip = vmcs_readl(GUEST_RIP);
4386
 
+       vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP);
4387
 
+       vcpu->arch.rip = vmcs_readl(GUEST_RIP);
4388
 
 }
4389
 
 
4390
 
 /*
4391
 
@@ -764,8 +818,8 @@ static void vcpu_load_rsp_rip(struct kvm_vcpu *vcpu)
4392
 
  */
4393
 
 static void vcpu_put_rsp_rip(struct kvm_vcpu *vcpu)
4394
 
 {
4395
 
-       vmcs_writel(GUEST_RSP, vcpu->regs[VCPU_REGS_RSP]);
4396
 
-       vmcs_writel(GUEST_RIP, vcpu->rip);
4397
 
+       vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
4398
 
+       vmcs_writel(GUEST_RIP, vcpu->arch.rip);
4399
 
 }
4400
 
 
4401
 
 static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg)
4402
 
@@ -808,14 +862,15 @@ static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg)
4403
 
 
4404
 
 static int vmx_get_irq(struct kvm_vcpu *vcpu)
4405
 
 {
4406
 
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
4407
 
        u32 idtv_info_field;
4408
 
 
4409
 
-       idtv_info_field = vmcs_read32(IDT_VECTORING_INFO_FIELD);
4410
 
+       idtv_info_field = vmx->idt_vectoring_info;
4411
 
        if (idtv_info_field & INTR_INFO_VALID_MASK) {
4412
 
                if (is_external_interrupt(idtv_info_field))
4413
 
                        return idtv_info_field & VECTORING_INFO_VECTOR_MASK;
4414
 
                else
4415
 
-                       printk("pending exception: not handled yet\n");
4416
 
+                       printk(KERN_DEBUG "pending exception: not handled yet\n");
4417
 
        }
4418
 
        return -1;
4419
 
 }
4420
 
@@ -863,7 +918,7 @@ static void hardware_disable(void *garbage)
4421
 
 }
4422
 
 
4423
 
 static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
4424
 
-                                     u32 msr, u32* result)
4425
 
+                                     u32 msr, u32 *result)
4426
 
 {
4427
 
        u32 vmx_msr_low, vmx_msr_high;
4428
 
        u32 ctl = ctl_min | ctl_opt;
4429
 
@@ -887,6 +942,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
4430
 
        u32 min, opt;
4431
 
        u32 _pin_based_exec_control = 0;
4432
 
        u32 _cpu_based_exec_control = 0;
4433
 
+       u32 _cpu_based_2nd_exec_control = 0;
4434
 
        u32 _vmexit_control = 0;
4435
 
        u32 _vmentry_control = 0;
4436
 
 
4437
 
@@ -904,11 +960,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
4438
 
              CPU_BASED_USE_IO_BITMAPS |
4439
 
              CPU_BASED_MOV_DR_EXITING |
4440
 
              CPU_BASED_USE_TSC_OFFSETING;
4441
 
-#ifdef CONFIG_X86_64
4442
 
-       opt = CPU_BASED_TPR_SHADOW;
4443
 
-#else
4444
 
-       opt = 0;
4445
 
-#endif
4446
 
+       opt = CPU_BASED_TPR_SHADOW |
4447
 
+             CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
4448
 
        if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS,
4449
 
                                &_cpu_based_exec_control) < 0)
4450
 
                return -EIO;
4451
 
@@ -917,6 +970,19 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
4452
 
                _cpu_based_exec_control &= ~CPU_BASED_CR8_LOAD_EXITING &
4453
 
                                           ~CPU_BASED_CR8_STORE_EXITING;
4454
 
 #endif
4455
 
+       if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) {
4456
 
+               min = 0;
4457
 
+               opt = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
4458
 
+                       SECONDARY_EXEC_WBINVD_EXITING;
4459
 
+               if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS2,
4460
 
+                                       &_cpu_based_2nd_exec_control) < 0)
4461
 
+                       return -EIO;
4462
 
+       }
4463
 
+#ifndef CONFIG_X86_64
4464
 
+       if (!(_cpu_based_2nd_exec_control &
4465
 
+                               SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
4466
 
+               _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW;
4467
 
+#endif
4468
 
 
4469
 
        min = 0;
4470
 
 #ifdef CONFIG_X86_64
4471
 
@@ -954,6 +1020,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
4472
 
 
4473
 
        vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control;
4474
 
        vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control;
4475
 
+       vmcs_conf->cpu_based_2nd_exec_ctrl = _cpu_based_2nd_exec_control;
4476
 
        vmcs_conf->vmexit_ctrl         = _vmexit_control;
4477
 
        vmcs_conf->vmentry_ctrl        = _vmentry_control;
4478
 
 
4479
 
@@ -1043,15 +1110,15 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
4480
 
 {
4481
 
        unsigned long flags;
4482
 
 
4483
 
-       vcpu->rmode.active = 0;
4484
 
+       vcpu->arch.rmode.active = 0;
4485
 
 
4486
 
-       vmcs_writel(GUEST_TR_BASE, vcpu->rmode.tr.base);
4487
 
-       vmcs_write32(GUEST_TR_LIMIT, vcpu->rmode.tr.limit);
4488
 
-       vmcs_write32(GUEST_TR_AR_BYTES, vcpu->rmode.tr.ar);
4489
 
+       vmcs_writel(GUEST_TR_BASE, vcpu->arch.rmode.tr.base);
4490
 
+       vmcs_write32(GUEST_TR_LIMIT, vcpu->arch.rmode.tr.limit);
4491
 
+       vmcs_write32(GUEST_TR_AR_BYTES, vcpu->arch.rmode.tr.ar);
4492
 
 
4493
 
        flags = vmcs_readl(GUEST_RFLAGS);
4494
 
        flags &= ~(IOPL_MASK | X86_EFLAGS_VM);
4495
 
-       flags |= (vcpu->rmode.save_iopl << IOPL_SHIFT);
4496
 
+       flags |= (vcpu->arch.rmode.save_iopl << IOPL_SHIFT);
4497
 
        vmcs_writel(GUEST_RFLAGS, flags);
4498
 
 
4499
 
        vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) |
4500
 
@@ -1059,10 +1126,10 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
4501
 
 
4502
 
        update_exception_bitmap(vcpu);
4503
 
 
4504
 
-       fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->rmode.es);
4505
 
-       fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->rmode.ds);
4506
 
-       fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->rmode.gs);
4507
 
-       fix_pmode_dataseg(VCPU_SREG_FS, &vcpu->rmode.fs);
4508
 
+       fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->arch.rmode.es);
4509
 
+       fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->arch.rmode.ds);
4510
 
+       fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->arch.rmode.gs);
4511
 
+       fix_pmode_dataseg(VCPU_SREG_FS, &vcpu->arch.rmode.fs);
4512
 
 
4513
 
        vmcs_write16(GUEST_SS_SELECTOR, 0);
4514
 
        vmcs_write32(GUEST_SS_AR_BYTES, 0x93);
4515
 
@@ -1072,10 +1139,14 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
4516
 
        vmcs_write32(GUEST_CS_AR_BYTES, 0x9b);
4517
 
 }
4518
 
 
4519
 
-static gva_t rmode_tss_base(struct kvm* kvm)
4520
 
+static gva_t rmode_tss_base(struct kvm *kvm)
4521
 
 {
4522
 
-       gfn_t base_gfn = kvm->memslots[0].base_gfn + kvm->memslots[0].npages - 3;
4523
 
-       return base_gfn << PAGE_SHIFT;
4524
 
+       if (!kvm->arch.tss_addr) {
4525
 
+               gfn_t base_gfn = kvm->memslots[0].base_gfn +
4526
 
+                                kvm->memslots[0].npages - 3;
4527
 
+               return base_gfn << PAGE_SHIFT;
4528
 
+       }
4529
 
+       return kvm->arch.tss_addr;
4530
 
 }
4531
 
 
4532
 
 static void fix_rmode_seg(int seg, struct kvm_save_segment *save)
4533
 
@@ -1086,7 +1157,8 @@ static void fix_rmode_seg(int seg, struct kvm_save_segment *save)
4534
 
        save->base = vmcs_readl(sf->base);
4535
 
        save->limit = vmcs_read32(sf->limit);
4536
 
        save->ar = vmcs_read32(sf->ar_bytes);
4537
 
-       vmcs_write16(sf->selector, vmcs_readl(sf->base) >> 4);
4538
 
+       vmcs_write16(sf->selector, save->base >> 4);
4539
 
+       vmcs_write32(sf->base, save->base & 0xfffff);
4540
 
        vmcs_write32(sf->limit, 0xffff);
4541
 
        vmcs_write32(sf->ar_bytes, 0xf3);
4542
 
 }
4543
 
@@ -1095,19 +1167,19 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
4544
 
 {
4545
 
        unsigned long flags;
4546
 
 
4547
 
-       vcpu->rmode.active = 1;
4548
 
+       vcpu->arch.rmode.active = 1;
4549
 
 
4550
 
-       vcpu->rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
4551
 
+       vcpu->arch.rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
4552
 
        vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm));
4553
 
 
4554
 
-       vcpu->rmode.tr.limit = vmcs_read32(GUEST_TR_LIMIT);
4555
 
+       vcpu->arch.rmode.tr.limit = vmcs_read32(GUEST_TR_LIMIT);
4556
 
        vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);
4557
 
 
4558
 
-       vcpu->rmode.tr.ar = vmcs_read32(GUEST_TR_AR_BYTES);
4559
 
+       vcpu->arch.rmode.tr.ar = vmcs_read32(GUEST_TR_AR_BYTES);
4560
 
        vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
4561
 
 
4562
 
        flags = vmcs_readl(GUEST_RFLAGS);
4563
 
-       vcpu->rmode.save_iopl = (flags & IOPL_MASK) >> IOPL_SHIFT;
4564
 
+       vcpu->arch.rmode.save_iopl = (flags & IOPL_MASK) >> IOPL_SHIFT;
4565
 
 
4566
 
        flags |= IOPL_MASK | X86_EFLAGS_VM;
4567
 
 
4568
 
@@ -1125,10 +1197,10 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
4569
 
                vmcs_writel(GUEST_CS_BASE, 0xf0000);
4570
 
        vmcs_write16(GUEST_CS_SELECTOR, vmcs_readl(GUEST_CS_BASE) >> 4);
4571
 
 
4572
 
-       fix_rmode_seg(VCPU_SREG_ES, &vcpu->rmode.es);
4573
 
-       fix_rmode_seg(VCPU_SREG_DS, &vcpu->rmode.ds);
4574
 
-       fix_rmode_seg(VCPU_SREG_GS, &vcpu->rmode.gs);
4575
 
-       fix_rmode_seg(VCPU_SREG_FS, &vcpu->rmode.fs);
4576
 
+       fix_rmode_seg(VCPU_SREG_ES, &vcpu->arch.rmode.es);
4577
 
+       fix_rmode_seg(VCPU_SREG_DS, &vcpu->arch.rmode.ds);
4578
 
+       fix_rmode_seg(VCPU_SREG_GS, &vcpu->arch.rmode.gs);
4579
 
+       fix_rmode_seg(VCPU_SREG_FS, &vcpu->arch.rmode.fs);
4580
 
 
4581
 
        kvm_mmu_reset_context(vcpu);
4582
 
        init_rmode_tss(vcpu->kvm);
4583
 
@@ -1149,7 +1221,7 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
4584
 
                             | AR_TYPE_BUSY_64_TSS);
4585
 
        }
4586
 
 
4587
 
-       vcpu->shadow_efer |= EFER_LMA;
4588
 
+       vcpu->arch.shadow_efer |= EFER_LMA;
4589
 
 
4590
 
        find_msr_entry(to_vmx(vcpu), MSR_EFER)->data |= EFER_LMA | EFER_LME;
4591
 
        vmcs_write32(VM_ENTRY_CONTROLS,
4592
 
@@ -1159,7 +1231,7 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
4593
 
 
4594
 
 static void exit_lmode(struct kvm_vcpu *vcpu)
4595
 
 {
4596
 
-       vcpu->shadow_efer &= ~EFER_LMA;
4597
 
+       vcpu->arch.shadow_efer &= ~EFER_LMA;
4598
 
 
4599
 
        vmcs_write32(VM_ENTRY_CONTROLS,
4600
 
                     vmcs_read32(VM_ENTRY_CONTROLS)
4601
 
@@ -1170,22 +1242,22 @@ static void exit_lmode(struct kvm_vcpu *vcpu)
4602
 
 
4603
 
 static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
4604
 
 {
4605
 
-       vcpu->cr4 &= KVM_GUEST_CR4_MASK;
4606
 
-       vcpu->cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK;
4607
 
+       vcpu->arch.cr4 &= KVM_GUEST_CR4_MASK;
4608
 
+       vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK;
4609
 
 }
4610
 
 
4611
 
 static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
4612
 
 {
4613
 
        vmx_fpu_deactivate(vcpu);
4614
 
 
4615
 
-       if (vcpu->rmode.active && (cr0 & X86_CR0_PE))
4616
 
+       if (vcpu->arch.rmode.active && (cr0 & X86_CR0_PE))
4617
 
                enter_pmode(vcpu);
4618
 
 
4619
 
-       if (!vcpu->rmode.active && !(cr0 & X86_CR0_PE))
4620
 
+       if (!vcpu->arch.rmode.active && !(cr0 & X86_CR0_PE))
4621
 
                enter_rmode(vcpu);
4622
 
 
4623
 
 #ifdef CONFIG_X86_64
4624
 
-       if (vcpu->shadow_efer & EFER_LME) {
4625
 
+       if (vcpu->arch.shadow_efer & EFER_LME) {
4626
 
                if (!is_paging(vcpu) && (cr0 & X86_CR0_PG))
4627
 
                        enter_lmode(vcpu);
4628
 
                if (is_paging(vcpu) && !(cr0 & X86_CR0_PG))
4629
 
@@ -1196,7 +1268,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
4630
 
        vmcs_writel(CR0_READ_SHADOW, cr0);
4631
 
        vmcs_writel(GUEST_CR0,
4632
 
                    (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON);
4633
 
-       vcpu->cr0 = cr0;
4634
 
+       vcpu->arch.cr0 = cr0;
4635
 
 
4636
 
        if (!(cr0 & X86_CR0_TS) || !(cr0 & X86_CR0_PE))
4637
 
                vmx_fpu_activate(vcpu);
4638
 
@@ -1205,16 +1277,16 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
4639
 
 static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
4640
 
 {
4641
 
        vmcs_writel(GUEST_CR3, cr3);
4642
 
-       if (vcpu->cr0 & X86_CR0_PE)
4643
 
+       if (vcpu->arch.cr0 & X86_CR0_PE)
4644
 
                vmx_fpu_deactivate(vcpu);
4645
 
 }
4646
 
 
4647
 
 static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
4648
 
 {
4649
 
        vmcs_writel(CR4_READ_SHADOW, cr4);
4650
 
-       vmcs_writel(GUEST_CR4, cr4 | (vcpu->rmode.active ?
4651
 
+       vmcs_writel(GUEST_CR4, cr4 | (vcpu->arch.rmode.active ?
4652
 
                    KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON));
4653
 
-       vcpu->cr4 = cr4;
4654
 
+       vcpu->arch.cr4 = cr4;
4655
 
 }
4656
 
 
4657
 
 #ifdef CONFIG_X86_64
4658
 
@@ -1224,7 +1296,7 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
4659
 
        struct vcpu_vmx *vmx = to_vmx(vcpu);
4660
 
        struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
4661
 
 
4662
 
-       vcpu->shadow_efer = efer;
4663
 
+       vcpu->arch.shadow_efer = efer;
4664
 
        if (efer & EFER_LMA) {
4665
 
                vmcs_write32(VM_ENTRY_CONTROLS,
4666
 
                                     vmcs_read32(VM_ENTRY_CONTROLS) |
4667
 
@@ -1301,17 +1373,17 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
4668
 
        struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
4669
 
        u32 ar;
4670
 
 
4671
 
-       if (vcpu->rmode.active && seg == VCPU_SREG_TR) {
4672
 
-               vcpu->rmode.tr.selector = var->selector;
4673
 
-               vcpu->rmode.tr.base = var->base;
4674
 
-               vcpu->rmode.tr.limit = var->limit;
4675
 
-               vcpu->rmode.tr.ar = vmx_segment_access_rights(var);
4676
 
+       if (vcpu->arch.rmode.active && seg == VCPU_SREG_TR) {
4677
 
+               vcpu->arch.rmode.tr.selector = var->selector;
4678
 
+               vcpu->arch.rmode.tr.base = var->base;
4679
 
+               vcpu->arch.rmode.tr.limit = var->limit;
4680
 
+               vcpu->arch.rmode.tr.ar = vmx_segment_access_rights(var);
4681
 
                return;
4682
 
        }
4683
 
        vmcs_writel(sf->base, var->base);
4684
 
        vmcs_write32(sf->limit, var->limit);
4685
 
        vmcs_write16(sf->selector, var->selector);
4686
 
-       if (vcpu->rmode.active && var->s) {
4687
 
+       if (vcpu->arch.rmode.active && var->s) {
4688
 
                /*
4689
 
                 * Hack real-mode segments into vm86 compatibility.
4690
 
                 */
4691
 
@@ -1355,35 +1427,30 @@ static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
4692
 
        vmcs_writel(GUEST_GDTR_BASE, dt->base);
4693
 
 }
4694
 
 
4695
 
-static int init_rmode_tss(struct kvm* kvm)
4696
 
+static int init_rmode_tss(struct kvm *kvm)
4697
 
 {
4698
 
-       struct page *p1, *p2, *p3;
4699
 
        gfn_t fn = rmode_tss_base(kvm) >> PAGE_SHIFT;
4700
 
-       char *page;
4701
 
-
4702
 
-       p1 = gfn_to_page(kvm, fn++);
4703
 
-       p2 = gfn_to_page(kvm, fn++);
4704
 
-       p3 = gfn_to_page(kvm, fn);
4705
 
+       u16 data = 0;
4706
 
+       int r;
4707
 
 
4708
 
-       if (!p1 || !p2 || !p3) {
4709
 
-               kvm_printf(kvm,"%s: gfn_to_page failed\n", __FUNCTION__);
4710
 
+       r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
4711
 
+       if (r < 0)
4712
 
+               return 0;
4713
 
+       data = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE;
4714
 
+       r = kvm_write_guest_page(kvm, fn++, &data, 0x66, sizeof(u16));
4715
 
+       if (r < 0)
4716
 
+               return 0;
4717
 
+       r = kvm_clear_guest_page(kvm, fn++, 0, PAGE_SIZE);
4718
 
+       if (r < 0)
4719
 
+               return 0;
4720
 
+       r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
4721
 
+       if (r < 0)
4722
 
+               return 0;
4723
 
+       data = ~0;
4724
 
+       r = kvm_write_guest_page(kvm, fn, &data, RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1,
4725
 
+                       sizeof(u8));
4726
 
+       if (r < 0)
4727
 
                return 0;
4728
 
-       }
4729
 
-
4730
 
-       page = kmap_atomic(p1, KM_USER0);
4731
 
-       clear_page(page);
4732
 
-       *(u16*)(page + 0x66) = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE;
4733
 
-       kunmap_atomic(page, KM_USER0);
4734
 
-
4735
 
-       page = kmap_atomic(p2, KM_USER0);
4736
 
-       clear_page(page);
4737
 
-       kunmap_atomic(page, KM_USER0);
4738
 
-
4739
 
-       page = kmap_atomic(p3, KM_USER0);
4740
 
-       clear_page(page);
4741
 
-       *(page + RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1) = ~0;
4742
 
-       kunmap_atomic(page, KM_USER0);
4743
 
-
4744
 
        return 1;
4745
 
 }
4746
 
 
4747
 
@@ -1397,6 +1464,27 @@ static void seg_setup(int seg)
4748
 
        vmcs_write32(sf->ar_bytes, 0x93);
4749
 
 }
4750
 
 
4751
 
+static int alloc_apic_access_page(struct kvm *kvm)
4752
 
+{
4753
 
+       struct kvm_userspace_memory_region kvm_userspace_mem;
4754
 
+       int r = 0;
4755
 
+
4756
 
+       mutex_lock(&kvm->lock);
4757
 
+       if (kvm->arch.apic_access_page)
4758
 
+               goto out;
4759
 
+       kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
4760
 
+       kvm_userspace_mem.flags = 0;
4761
 
+       kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL;
4762
 
+       kvm_userspace_mem.memory_size = PAGE_SIZE;
4763
 
+       r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0);
4764
 
+       if (r)
4765
 
+               goto out;
4766
 
+       kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00);
4767
 
+out:
4768
 
+       mutex_unlock(&kvm->lock);
4769
 
+       return r;
4770
 
+}
4771
 
+
4772
 
 /*
4773
 
  * Sets up the vmcs for emulated real mode.
4774
 
  */
4775
 
@@ -1407,92 +1495,15 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
4776
 
        unsigned long a;
4777
 
        struct descriptor_table dt;
4778
 
        int i;
4779
 
-       int ret = 0;
4780
 
        unsigned long kvm_vmx_return;
4781
 
-       u64 msr;
4782
 
        u32 exec_control;
4783
 
 
4784
 
-       if (!init_rmode_tss(vmx->vcpu.kvm)) {
4785
 
-               ret = -ENOMEM;
4786
 
-               goto out;
4787
 
-       }
4788
 
-
4789
 
-       vmx->vcpu.rmode.active = 0;
4790
 
-
4791
 
-       vmx->vcpu.regs[VCPU_REGS_RDX] = get_rdx_init_val();
4792
 
-       set_cr8(&vmx->vcpu, 0);
4793
 
-       msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
4794
 
-       if (vmx->vcpu.vcpu_id == 0)
4795
 
-               msr |= MSR_IA32_APICBASE_BSP;
4796
 
-       kvm_set_apic_base(&vmx->vcpu, msr);
4797
 
-
4798
 
-       fx_init(&vmx->vcpu);
4799
 
-
4800
 
-       /*
4801
 
-        * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode
4802
 
-        * insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4.  Sigh.
4803
 
-        */
4804
 
-       if (vmx->vcpu.vcpu_id == 0) {
4805
 
-               vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
4806
 
-               vmcs_writel(GUEST_CS_BASE, 0x000f0000);
4807
 
-       } else {
4808
 
-               vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.sipi_vector << 8);
4809
 
-               vmcs_writel(GUEST_CS_BASE, vmx->vcpu.sipi_vector << 12);
4810
 
-       }
4811
 
-       vmcs_write32(GUEST_CS_LIMIT, 0xffff);
4812
 
-       vmcs_write32(GUEST_CS_AR_BYTES, 0x9b);
4813
 
-
4814
 
-       seg_setup(VCPU_SREG_DS);
4815
 
-       seg_setup(VCPU_SREG_ES);
4816
 
-       seg_setup(VCPU_SREG_FS);
4817
 
-       seg_setup(VCPU_SREG_GS);
4818
 
-       seg_setup(VCPU_SREG_SS);
4819
 
-
4820
 
-       vmcs_write16(GUEST_TR_SELECTOR, 0);
4821
 
-       vmcs_writel(GUEST_TR_BASE, 0);
4822
 
-       vmcs_write32(GUEST_TR_LIMIT, 0xffff);
4823
 
-       vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
4824
 
-
4825
 
-       vmcs_write16(GUEST_LDTR_SELECTOR, 0);
4826
 
-       vmcs_writel(GUEST_LDTR_BASE, 0);
4827
 
-       vmcs_write32(GUEST_LDTR_LIMIT, 0xffff);
4828
 
-       vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082);
4829
 
-
4830
 
-       vmcs_write32(GUEST_SYSENTER_CS, 0);
4831
 
-       vmcs_writel(GUEST_SYSENTER_ESP, 0);
4832
 
-       vmcs_writel(GUEST_SYSENTER_EIP, 0);
4833
 
-
4834
 
-       vmcs_writel(GUEST_RFLAGS, 0x02);
4835
 
-       if (vmx->vcpu.vcpu_id == 0)
4836
 
-               vmcs_writel(GUEST_RIP, 0xfff0);
4837
 
-       else
4838
 
-               vmcs_writel(GUEST_RIP, 0);
4839
 
-       vmcs_writel(GUEST_RSP, 0);
4840
 
-
4841
 
-       //todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0
4842
 
-       vmcs_writel(GUEST_DR7, 0x400);
4843
 
-
4844
 
-       vmcs_writel(GUEST_GDTR_BASE, 0);
4845
 
-       vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
4846
 
-
4847
 
-       vmcs_writel(GUEST_IDTR_BASE, 0);
4848
 
-       vmcs_write32(GUEST_IDTR_LIMIT, 0xffff);
4849
 
-
4850
 
-       vmcs_write32(GUEST_ACTIVITY_STATE, 0);
4851
 
-       vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
4852
 
-       vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0);
4853
 
-
4854
 
        /* I/O */
4855
 
        vmcs_write64(IO_BITMAP_A, page_to_phys(vmx_io_bitmap_a));
4856
 
        vmcs_write64(IO_BITMAP_B, page_to_phys(vmx_io_bitmap_b));
4857
 
 
4858
 
-       guest_write_tsc(0);
4859
 
-
4860
 
        vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
4861
 
 
4862
 
-       /* Special registers */
4863
 
-       vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
4864
 
-
4865
 
        /* Control */
4866
 
        vmcs_write32(PIN_BASED_VM_EXEC_CONTROL,
4867
 
                vmcs_config.pin_based_exec_ctrl);
4868
 
@@ -1507,8 +1518,16 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
4869
 
        }
4870
 
        vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control);
4871
 
 
4872
 
-       vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);
4873
 
-       vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0);
4874
 
+       if (cpu_has_secondary_exec_ctrls()) {
4875
 
+               exec_control = vmcs_config.cpu_based_2nd_exec_ctrl;
4876
 
+               if (!vm_need_virtualize_apic_accesses(vmx->vcpu.kvm))
4877
 
+                       exec_control &=
4878
 
+                               ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
4879
 
+               vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
4880
 
+       }
4881
 
+
4882
 
+       vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, !!bypass_guest_pf);
4883
 
+       vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, !!bypass_guest_pf);
4884
 
        vmcs_write32(CR3_TARGET_COUNT, 0);           /* 22.2.1 */
4885
 
 
4886
 
        vmcs_writel(HOST_CR0, read_cr0());  /* 22.2.3 */
4887
 
@@ -1536,7 +1555,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
4888
 
        get_idt(&dt);
4889
 
        vmcs_writel(HOST_IDTR_BASE, dt.base);   /* 22.2.4 */
4890
 
 
4891
 
-       asm ("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return));
4892
 
+       asm("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return));
4893
 
        vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */
4894
 
        vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
4895
 
        vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
4896
 
@@ -1567,97 +1586,145 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
4897
 
                ++vmx->nmsrs;
4898
 
        }
4899
 
 
4900
 
-       setup_msrs(vmx);
4901
 
-
4902
 
        vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
4903
 
 
4904
 
        /* 22.2.1, 20.8.1 */
4905
 
        vmcs_write32(VM_ENTRY_CONTROLS, vmcs_config.vmentry_ctrl);
4906
 
 
4907
 
-       vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);  /* 22.2.1 */
4908
 
-
4909
 
-#ifdef CONFIG_X86_64
4910
 
-       vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
4911
 
-       if (vm_need_tpr_shadow(vmx->vcpu.kvm))
4912
 
-               vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
4913
 
-                            page_to_phys(vmx->vcpu.apic->regs_page));
4914
 
-       vmcs_write32(TPR_THRESHOLD, 0);
4915
 
-#endif
4916
 
-
4917
 
        vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
4918
 
        vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK);
4919
 
 
4920
 
-       vmx->vcpu.cr0 = 0x60000010;
4921
 
-       vmx_set_cr0(&vmx->vcpu, vmx->vcpu.cr0); // enter rmode
4922
 
-       vmx_set_cr4(&vmx->vcpu, 0);
4923
 
-#ifdef CONFIG_X86_64
4924
 
-       vmx_set_efer(&vmx->vcpu, 0);
4925
 
-#endif
4926
 
-       vmx_fpu_activate(&vmx->vcpu);
4927
 
-       update_exception_bitmap(&vmx->vcpu);
4928
 
+       if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm))
4929
 
+               if (alloc_apic_access_page(vmx->vcpu.kvm) != 0)
4930
 
+                       return -ENOMEM;
4931
 
 
4932
 
        return 0;
4933
 
-
4934
 
-out:
4935
 
-       return ret;
4936
 
 }
4937
 
 
4938
 
-static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
4939
 
+static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
4940
 
 {
4941
 
        struct vcpu_vmx *vmx = to_vmx(vcpu);
4942
 
+       u64 msr;
4943
 
+       int ret;
4944
 
 
4945
 
-       vmx_vcpu_setup(vmx);
4946
 
-}
4947
 
-
4948
 
-static void inject_rmode_irq(struct kvm_vcpu *vcpu, int irq)
4949
 
-{
4950
 
-       u16 ent[2];
4951
 
-       u16 cs;
4952
 
-       u16 ip;
4953
 
-       unsigned long flags;
4954
 
-       unsigned long ss_base = vmcs_readl(GUEST_SS_BASE);
4955
 
-       u16 sp =  vmcs_readl(GUEST_RSP);
4956
 
-       u32 ss_limit = vmcs_read32(GUEST_SS_LIMIT);
4957
 
-
4958
 
-       if (sp > ss_limit || sp < 6 ) {
4959
 
-               vcpu_printf(vcpu, "%s: #SS, rsp 0x%lx ss 0x%lx limit 0x%x\n",
4960
 
-                           __FUNCTION__,
4961
 
-                           vmcs_readl(GUEST_RSP),
4962
 
-                           vmcs_readl(GUEST_SS_BASE),
4963
 
-                           vmcs_read32(GUEST_SS_LIMIT));
4964
 
-               return;
4965
 
+       if (!init_rmode_tss(vmx->vcpu.kvm)) {
4966
 
+               ret = -ENOMEM;
4967
 
+               goto out;
4968
 
        }
4969
 
 
4970
 
-       if (emulator_read_std(irq * sizeof(ent), &ent, sizeof(ent), vcpu) !=
4971
 
-                                                       X86EMUL_CONTINUE) {
4972
 
-               vcpu_printf(vcpu, "%s: read guest err\n", __FUNCTION__);
4973
 
-               return;
4974
 
+       vmx->vcpu.arch.rmode.active = 0;
4975
 
+
4976
 
+       vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
4977
 
+       set_cr8(&vmx->vcpu, 0);
4978
 
+       msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
4979
 
+       if (vmx->vcpu.vcpu_id == 0)
4980
 
+               msr |= MSR_IA32_APICBASE_BSP;
4981
 
+       kvm_set_apic_base(&vmx->vcpu, msr);
4982
 
+
4983
 
+       fx_init(&vmx->vcpu);
4984
 
+
4985
 
+       /*
4986
 
+        * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode
4987
 
+        * insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4.  Sigh.
4988
 
+        */
4989
 
+       if (vmx->vcpu.vcpu_id == 0) {
4990
 
+               vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
4991
 
+               vmcs_writel(GUEST_CS_BASE, 0x000f0000);
4992
 
+       } else {
4993
 
+               vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.arch.sipi_vector << 8);
4994
 
+               vmcs_writel(GUEST_CS_BASE, vmx->vcpu.arch.sipi_vector << 12);
4995
 
        }
4996
 
+       vmcs_write32(GUEST_CS_LIMIT, 0xffff);
4997
 
+       vmcs_write32(GUEST_CS_AR_BYTES, 0x9b);
4998
 
 
4999
 
-       flags =  vmcs_readl(GUEST_RFLAGS);
5000
 
-       cs =  vmcs_readl(GUEST_CS_BASE) >> 4;
5001
 
-       ip =  vmcs_readl(GUEST_RIP);
5002
 
+       seg_setup(VCPU_SREG_DS);
5003
 
+       seg_setup(VCPU_SREG_ES);
5004
 
+       seg_setup(VCPU_SREG_FS);
5005
 
+       seg_setup(VCPU_SREG_GS);
5006
 
+       seg_setup(VCPU_SREG_SS);
5007
 
 
5008
 
+       vmcs_write16(GUEST_TR_SELECTOR, 0);
5009
 
+       vmcs_writel(GUEST_TR_BASE, 0);
5010
 
+       vmcs_write32(GUEST_TR_LIMIT, 0xffff);
5011
 
+       vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
5012
 
 
5013
 
-       if (emulator_write_emulated(ss_base + sp - 2, &flags, 2, vcpu) != X86EMUL_CONTINUE ||
5014
 
-           emulator_write_emulated(ss_base + sp - 4, &cs, 2, vcpu) != X86EMUL_CONTINUE ||
5015
 
-           emulator_write_emulated(ss_base + sp - 6, &ip, 2, vcpu) != X86EMUL_CONTINUE) {
5016
 
-               vcpu_printf(vcpu, "%s: write guest err\n", __FUNCTION__);
5017
 
-               return;
5018
 
+       vmcs_write16(GUEST_LDTR_SELECTOR, 0);
5019
 
+       vmcs_writel(GUEST_LDTR_BASE, 0);
5020
 
+       vmcs_write32(GUEST_LDTR_LIMIT, 0xffff);
5021
 
+       vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082);
5022
 
+
5023
 
+       vmcs_write32(GUEST_SYSENTER_CS, 0);
5024
 
+       vmcs_writel(GUEST_SYSENTER_ESP, 0);
5025
 
+       vmcs_writel(GUEST_SYSENTER_EIP, 0);
5026
 
+
5027
 
+       vmcs_writel(GUEST_RFLAGS, 0x02);
5028
 
+       if (vmx->vcpu.vcpu_id == 0)
5029
 
+               vmcs_writel(GUEST_RIP, 0xfff0);
5030
 
+       else
5031
 
+               vmcs_writel(GUEST_RIP, 0);
5032
 
+       vmcs_writel(GUEST_RSP, 0);
5033
 
+
5034
 
+       /* todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0 */
5035
 
+       vmcs_writel(GUEST_DR7, 0x400);
5036
 
+
5037
 
+       vmcs_writel(GUEST_GDTR_BASE, 0);
5038
 
+       vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
5039
 
+
5040
 
+       vmcs_writel(GUEST_IDTR_BASE, 0);
5041
 
+       vmcs_write32(GUEST_IDTR_LIMIT, 0xffff);
5042
 
+
5043
 
+       vmcs_write32(GUEST_ACTIVITY_STATE, 0);
5044
 
+       vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
5045
 
+       vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0);
5046
 
+
5047
 
+       guest_write_tsc(0);
5048
 
+
5049
 
+       /* Special registers */
5050
 
+       vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
5051
 
+
5052
 
+       setup_msrs(vmx);
5053
 
+
5054
 
+       vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);  /* 22.2.1 */
5055
 
+
5056
 
+       if (cpu_has_vmx_tpr_shadow()) {
5057
 
+               vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
5058
 
+               if (vm_need_tpr_shadow(vmx->vcpu.kvm))
5059
 
+                       vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
5060
 
+                               page_to_phys(vmx->vcpu.arch.apic->regs_page));
5061
 
+               vmcs_write32(TPR_THRESHOLD, 0);
5062
 
        }
5063
 
 
5064
 
-       vmcs_writel(GUEST_RFLAGS, flags &
5065
 
-                   ~( X86_EFLAGS_IF | X86_EFLAGS_AC | X86_EFLAGS_TF));
5066
 
-       vmcs_write16(GUEST_CS_SELECTOR, ent[1]) ;
5067
 
-       vmcs_writel(GUEST_CS_BASE, ent[1] << 4);
5068
 
-       vmcs_writel(GUEST_RIP, ent[0]);
5069
 
-       vmcs_writel(GUEST_RSP, (vmcs_readl(GUEST_RSP) & ~0xffff) | (sp - 6));
5070
 
+       if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm))
5071
 
+               vmcs_write64(APIC_ACCESS_ADDR,
5072
 
+                            page_to_phys(vmx->vcpu.kvm->arch.apic_access_page));
5073
 
+
5074
 
+       vmx->vcpu.arch.cr0 = 0x60000010;
5075
 
+       vmx_set_cr0(&vmx->vcpu, vmx->vcpu.arch.cr0); /* enter rmode */
5076
 
+       vmx_set_cr4(&vmx->vcpu, 0);
5077
 
+#ifdef CONFIG_X86_64
5078
 
+       vmx_set_efer(&vmx->vcpu, 0);
5079
 
+#endif
5080
 
+       vmx_fpu_activate(&vmx->vcpu);
5081
 
+       update_exception_bitmap(&vmx->vcpu);
5082
 
+
5083
 
+       return 0;
5084
 
+
5085
 
+out:
5086
 
+       return ret;
5087
 
 }
5088
 
 
5089
 
 static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
5090
 
 {
5091
 
-       if (vcpu->rmode.active) {
5092
 
-               inject_rmode_irq(vcpu, irq);
5093
 
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
5094
 
+
5095
 
+       if (vcpu->arch.rmode.active) {
5096
 
+               vmx->rmode.irq.pending = true;
5097
 
+               vmx->rmode.irq.vector = irq;
5098
 
+               vmx->rmode.irq.rip = vmcs_readl(GUEST_RIP);
5099
 
+               vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
5100
 
+                            irq | INTR_TYPE_SOFT_INTR | INTR_INFO_VALID_MASK);
5101
 
+               vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
5102
 
+               vmcs_writel(GUEST_RIP, vmx->rmode.irq.rip - 1);
5103
 
                return;
5104
 
        }
5105
 
        vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
5106
 
@@ -1666,13 +1733,13 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
5107
 
 
5108
 
 static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
5109
 
 {
5110
 
-       int word_index = __ffs(vcpu->irq_summary);
5111
 
-       int bit_index = __ffs(vcpu->irq_pending[word_index]);
5112
 
+       int word_index = __ffs(vcpu->arch.irq_summary);
5113
 
+       int bit_index = __ffs(vcpu->arch.irq_pending[word_index]);
5114
 
        int irq = word_index * BITS_PER_LONG + bit_index;
5115
 
 
5116
 
-       clear_bit(bit_index, &vcpu->irq_pending[word_index]);
5117
 
-       if (!vcpu->irq_pending[word_index])
5118
 
-               clear_bit(word_index, &vcpu->irq_summary);
5119
 
+       clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]);
5120
 
+       if (!vcpu->arch.irq_pending[word_index])
5121
 
+               clear_bit(word_index, &vcpu->arch.irq_summary);
5122
 
        vmx_inject_irq(vcpu, irq);
5123
 
 }
5124
 
 
5125
 
@@ -1682,12 +1749,12 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
5126
 
 {
5127
 
        u32 cpu_based_vm_exec_control;
5128
 
 
5129
 
-       vcpu->interrupt_window_open =
5130
 
+       vcpu->arch.interrupt_window_open =
5131
 
                ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
5132
 
                 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0);
5133
 
 
5134
 
-       if (vcpu->interrupt_window_open &&
5135
 
-           vcpu->irq_summary &&
5136
 
+       if (vcpu->arch.interrupt_window_open &&
5137
 
+           vcpu->arch.irq_summary &&
5138
 
            !(vmcs_read32(VM_ENTRY_INTR_INFO_FIELD) & INTR_INFO_VALID_MASK))
5139
 
                /*
5140
 
                 * If interrupts enabled, and not blocked by sti or mov ss. Good.
5141
 
@@ -1695,8 +1762,8 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
5142
 
                kvm_do_inject_irq(vcpu);
5143
 
 
5144
 
        cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
5145
 
-       if (!vcpu->interrupt_window_open &&
5146
 
-           (vcpu->irq_summary || kvm_run->request_interrupt_window))
5147
 
+       if (!vcpu->arch.interrupt_window_open &&
5148
 
+           (vcpu->arch.irq_summary || kvm_run->request_interrupt_window))
5149
 
                /*
5150
 
                 * Interrupts blocked.  Wait for unblock.
5151
 
                 */
5152
 
@@ -1706,6 +1773,23 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
5153
 
        vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
5154
 
 }
5155
 
 
5156
 
+static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
5157
 
+{
5158
 
+       int ret;
5159
 
+       struct kvm_userspace_memory_region tss_mem = {
5160
 
+               .slot = 8,
5161
 
+               .guest_phys_addr = addr,
5162
 
+               .memory_size = PAGE_SIZE * 3,
5163
 
+               .flags = 0,
5164
 
+       };
5165
 
+
5166
 
+       ret = kvm_set_memory_region(kvm, &tss_mem, 0);
5167
 
+       if (ret)
5168
 
+               return ret;
5169
 
+       kvm->arch.tss_addr = addr;
5170
 
+       return 0;
5171
 
+}
5172
 
+
5173
 
 static void kvm_guest_debug_pre(struct kvm_vcpu *vcpu)
5174
 
 {
5175
 
        struct kvm_guest_debug *dbg = &vcpu->guest_debug;
5176
 
@@ -1727,7 +1811,7 @@ static void kvm_guest_debug_pre(struct kvm_vcpu *vcpu)
5177
 
 static int handle_rmode_exception(struct kvm_vcpu *vcpu,
5178
 
                                  int vec, u32 err_code)
5179
 
 {
5180
 
-       if (!vcpu->rmode.active)
5181
 
+       if (!vcpu->arch.rmode.active)
5182
 
                return 0;
5183
 
 
5184
 
        /*
5185
 
@@ -1735,32 +1819,31 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
5186
 
         * Cause the #SS fault with 0 error code in VM86 mode.
5187
 
         */
5188
 
        if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0)
5189
 
-               if (emulate_instruction(vcpu, NULL, 0, 0) == EMULATE_DONE)
5190
 
+               if (emulate_instruction(vcpu, NULL, 0, 0, 0) == EMULATE_DONE)
5191
 
                        return 1;
5192
 
        return 0;
5193
 
 }
5194
 
 
5195
 
 static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
5196
 
 {
5197
 
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
5198
 
        u32 intr_info, error_code;
5199
 
        unsigned long cr2, rip;
5200
 
        u32 vect_info;
5201
 
        enum emulation_result er;
5202
 
-       int r;
5203
 
 
5204
 
-       vect_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
5205
 
+       vect_info = vmx->idt_vectoring_info;
5206
 
        intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
5207
 
 
5208
 
        if ((vect_info & VECTORING_INFO_VALID_MASK) &&
5209
 
-                                               !is_page_fault(intr_info)) {
5210
 
+                                               !is_page_fault(intr_info))
5211
 
                printk(KERN_ERR "%s: unexpected, vectoring info 0x%x "
5212
 
                       "intr info 0x%x\n", __FUNCTION__, vect_info, intr_info);
5213
 
-       }
5214
 
 
5215
 
        if (!irqchip_in_kernel(vcpu->kvm) && is_external_interrupt(vect_info)) {
5216
 
                int irq = vect_info & VECTORING_INFO_VECTOR_MASK;
5217
 
-               set_bit(irq, vcpu->irq_pending);
5218
 
-               set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary);
5219
 
+               set_bit(irq, vcpu->arch.irq_pending);
5220
 
+               set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary);
5221
 
        }
5222
 
 
5223
 
        if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */
5224
 
@@ -1771,52 +1854,34 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
5225
 
                return 1;
5226
 
        }
5227
 
 
5228
 
+       if (is_invalid_opcode(intr_info)) {
5229
 
+               er = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
5230
 
+               if (er != EMULATE_DONE)
5231
 
+                       kvm_queue_exception(vcpu, UD_VECTOR);
5232
 
+               return 1;
5233
 
+       }
5234
 
+
5235
 
        error_code = 0;
5236
 
        rip = vmcs_readl(GUEST_RIP);
5237
 
        if (intr_info & INTR_INFO_DELIEVER_CODE_MASK)
5238
 
                error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
5239
 
        if (is_page_fault(intr_info)) {
5240
 
                cr2 = vmcs_readl(EXIT_QUALIFICATION);
5241
 
-
5242
 
-               mutex_lock(&vcpu->kvm->lock);
5243
 
-               r = kvm_mmu_page_fault(vcpu, cr2, error_code);
5244
 
-               if (r < 0) {
5245
 
-                       mutex_unlock(&vcpu->kvm->lock);
5246
 
-                       return r;
5247
 
-               }
5248
 
-               if (!r) {
5249
 
-                       mutex_unlock(&vcpu->kvm->lock);
5250
 
-                       return 1;
5251
 
-               }
5252
 
-
5253
 
-               er = emulate_instruction(vcpu, kvm_run, cr2, error_code);
5254
 
-               mutex_unlock(&vcpu->kvm->lock);
5255
 
-
5256
 
-               switch (er) {
5257
 
-               case EMULATE_DONE:
5258
 
-                       return 1;
5259
 
-               case EMULATE_DO_MMIO:
5260
 
-                       ++vcpu->stat.mmio_exits;
5261
 
-                       return 0;
5262
 
-                case EMULATE_FAIL:
5263
 
-                       kvm_report_emulation_failure(vcpu, "pagetable");
5264
 
-                       break;
5265
 
-               default:
5266
 
-                       BUG();
5267
 
-               }
5268
 
+               return kvm_mmu_page_fault(vcpu, cr2, error_code);
5269
 
        }
5270
 
 
5271
 
-       if (vcpu->rmode.active &&
5272
 
+       if (vcpu->arch.rmode.active &&
5273
 
            handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK,
5274
 
                                                                error_code)) {
5275
 
-               if (vcpu->halt_request) {
5276
 
-                       vcpu->halt_request = 0;
5277
 
+               if (vcpu->arch.halt_request) {
5278
 
+                       vcpu->arch.halt_request = 0;
5279
 
                        return kvm_emulate_halt(vcpu);
5280
 
                }
5281
 
                return 1;
5282
 
        }
5283
 
 
5284
 
-       if ((intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK)) == (INTR_TYPE_EXCEPTION | 1)) {
5285
 
+       if ((intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK)) ==
5286
 
+           (INTR_TYPE_EXCEPTION | 1)) {
5287
 
                kvm_run->exit_reason = KVM_EXIT_DEBUG;
5288
 
                return 0;
5289
 
        }
5290
 
@@ -1850,7 +1915,8 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
5291
 
        string = (exit_qualification & 16) != 0;
5292
 
 
5293
 
        if (string) {
5294
 
-               if (emulate_instruction(vcpu, kvm_run, 0, 0) == EMULATE_DO_MMIO)
5295
 
+               if (emulate_instruction(vcpu,
5296
 
+                                       kvm_run, 0, 0, 0) == EMULATE_DO_MMIO)
5297
 
                        return 0;
5298
 
                return 1;
5299
 
        }
5300
 
@@ -1873,7 +1939,6 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
5301
 
        hypercall[0] = 0x0f;
5302
 
        hypercall[1] = 0x01;
5303
 
        hypercall[2] = 0xc1;
5304
 
-       hypercall[3] = 0xc3;
5305
 
 }
5306
 
 
5307
 
 static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
5308
 
@@ -1890,23 +1955,25 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
5309
 
                switch (cr) {
5310
 
                case 0:
5311
 
                        vcpu_load_rsp_rip(vcpu);
5312
 
-                       set_cr0(vcpu, vcpu->regs[reg]);
5313
 
+                       set_cr0(vcpu, vcpu->arch.regs[reg]);
5314
 
                        skip_emulated_instruction(vcpu);
5315
 
                        return 1;
5316
 
                case 3:
5317
 
                        vcpu_load_rsp_rip(vcpu);
5318
 
-                       set_cr3(vcpu, vcpu->regs[reg]);
5319
 
+                       set_cr3(vcpu, vcpu->arch.regs[reg]);
5320
 
                        skip_emulated_instruction(vcpu);
5321
 
                        return 1;
5322
 
                case 4:
5323
 
                        vcpu_load_rsp_rip(vcpu);
5324
 
-                       set_cr4(vcpu, vcpu->regs[reg]);
5325
 
+                       set_cr4(vcpu, vcpu->arch.regs[reg]);
5326
 
                        skip_emulated_instruction(vcpu);
5327
 
                        return 1;
5328
 
                case 8:
5329
 
                        vcpu_load_rsp_rip(vcpu);
5330
 
-                       set_cr8(vcpu, vcpu->regs[reg]);
5331
 
+                       set_cr8(vcpu, vcpu->arch.regs[reg]);
5332
 
                        skip_emulated_instruction(vcpu);
5333
 
+                       if (irqchip_in_kernel(vcpu->kvm))
5334
 
+                               return 1;
5335
 
                        kvm_run->exit_reason = KVM_EXIT_SET_TPR;
5336
 
                        return 0;
5337
 
                };
5338
 
@@ -1914,8 +1981,8 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
5339
 
        case 2: /* clts */
5340
 
                vcpu_load_rsp_rip(vcpu);
5341
 
                vmx_fpu_deactivate(vcpu);
5342
 
-               vcpu->cr0 &= ~X86_CR0_TS;
5343
 
-               vmcs_writel(CR0_READ_SHADOW, vcpu->cr0);
5344
 
+               vcpu->arch.cr0 &= ~X86_CR0_TS;
5345
 
+               vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0);
5346
 
                vmx_fpu_activate(vcpu);
5347
 
                skip_emulated_instruction(vcpu);
5348
 
                return 1;
5349
 
@@ -1923,13 +1990,13 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
5350
 
                switch (cr) {
5351
 
                case 3:
5352
 
                        vcpu_load_rsp_rip(vcpu);
5353
 
-                       vcpu->regs[reg] = vcpu->cr3;
5354
 
+                       vcpu->arch.regs[reg] = vcpu->arch.cr3;
5355
 
                        vcpu_put_rsp_rip(vcpu);
5356
 
                        skip_emulated_instruction(vcpu);
5357
 
                        return 1;
5358
 
                case 8:
5359
 
                        vcpu_load_rsp_rip(vcpu);
5360
 
-                       vcpu->regs[reg] = get_cr8(vcpu);
5361
 
+                       vcpu->arch.regs[reg] = get_cr8(vcpu);
5362
 
                        vcpu_put_rsp_rip(vcpu);
5363
 
                        skip_emulated_instruction(vcpu);
5364
 
                        return 1;
5365
 
@@ -1975,7 +2042,7 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
5366
 
                default:
5367
 
                        val = 0;
5368
 
                }
5369
 
-               vcpu->regs[reg] = val;
5370
 
+               vcpu->arch.regs[reg] = val;
5371
 
        } else {
5372
 
                /* mov to dr */
5373
 
        }
5374
 
@@ -1992,29 +2059,29 @@ static int handle_cpuid(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
5375
 
 
5376
 
 static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
5377
 
 {
5378
 
-       u32 ecx = vcpu->regs[VCPU_REGS_RCX];
5379
 
+       u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX];
5380
 
        u64 data;
5381
 
 
5382
 
        if (vmx_get_msr(vcpu, ecx, &data)) {
5383
 
-               vmx_inject_gp(vcpu, 0);
5384
 
+               kvm_inject_gp(vcpu, 0);
5385
 
                return 1;
5386
 
        }
5387
 
 
5388
 
        /* FIXME: handling of bits 32:63 of rax, rdx */
5389
 
-       vcpu->regs[VCPU_REGS_RAX] = data & -1u;
5390
 
-       vcpu->regs[VCPU_REGS_RDX] = (data >> 32) & -1u;
5391
 
+       vcpu->arch.regs[VCPU_REGS_RAX] = data & -1u;
5392
 
+       vcpu->arch.regs[VCPU_REGS_RDX] = (data >> 32) & -1u;
5393
 
        skip_emulated_instruction(vcpu);
5394
 
        return 1;
5395
 
 }
5396
 
 
5397
 
 static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
5398
 
 {
5399
 
-       u32 ecx = vcpu->regs[VCPU_REGS_RCX];
5400
 
-       u64 data = (vcpu->regs[VCPU_REGS_RAX] & -1u)
5401
 
-               | ((u64)(vcpu->regs[VCPU_REGS_RDX] & -1u) << 32);
5402
 
+       u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX];
5403
 
+       u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u)
5404
 
+               | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32);
5405
 
 
5406
 
        if (vmx_set_msr(vcpu, ecx, data) != 0) {
5407
 
-               vmx_inject_gp(vcpu, 0);
5408
 
+               kvm_inject_gp(vcpu, 0);
5409
 
                return 1;
5410
 
        }
5411
 
 
5412
 
@@ -2042,7 +2109,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
5413
 
         * possible
5414
 
         */
5415
 
        if (kvm_run->request_interrupt_window &&
5416
 
-           !vcpu->irq_summary) {
5417
 
+           !vcpu->arch.irq_summary) {
5418
 
                kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
5419
 
                ++vcpu->stat.irq_window_exits;
5420
 
                return 0;
5421
 
@@ -2059,7 +2126,35 @@ static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
5422
 
 static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
5423
 
 {
5424
 
        skip_emulated_instruction(vcpu);
5425
 
-       return kvm_hypercall(vcpu, kvm_run);
5426
 
+       kvm_emulate_hypercall(vcpu);
5427
 
+       return 1;
5428
 
+}
5429
 
+
5430
 
+static int handle_wbinvd(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
5431
 
+{
5432
 
+       skip_emulated_instruction(vcpu);
5433
 
+       /* TODO: Add support for VT-d/pass-through device */
5434
 
+       return 1;
5435
 
+}
5436
 
+
5437
 
+static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
5438
 
+{
5439
 
+       u64 exit_qualification;
5440
 
+       enum emulation_result er;
5441
 
+       unsigned long offset;
5442
 
+
5443
 
+       exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
5444
 
+       offset = exit_qualification & 0xffful;
5445
 
+
5446
 
+       er = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
5447
 
+
5448
 
+       if (er !=  EMULATE_DONE) {
5449
 
+               printk(KERN_ERR
5450
 
+                      "Fail to handle apic access vmexit! Offset is 0x%lx\n",
5451
 
+                      offset);
5452
 
+               return -ENOTSUPP;
5453
 
+       }
5454
 
+       return 1;
5455
 
 }
5456
 
 
5457
 
 /*
5458
 
@@ -2081,7 +2176,9 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
5459
 
        [EXIT_REASON_PENDING_INTERRUPT]       = handle_interrupt_window,
5460
 
        [EXIT_REASON_HLT]                     = handle_halt,
5461
 
        [EXIT_REASON_VMCALL]                  = handle_vmcall,
5462
 
-       [EXIT_REASON_TPR_BELOW_THRESHOLD]     = handle_tpr_below_threshold
5463
 
+       [EXIT_REASON_TPR_BELOW_THRESHOLD]     = handle_tpr_below_threshold,
5464
 
+       [EXIT_REASON_APIC_ACCESS]             = handle_apic_access,
5465
 
+       [EXIT_REASON_WBINVD]                  = handle_wbinvd,
5466
 
 };
5467
 
 
5468
 
 static const int kvm_vmx_max_exit_handlers =
5469
 
@@ -2093,9 +2190,9 @@ static const int kvm_vmx_max_exit_handlers =
5470
 
  */
5471
 
 static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
5472
 
 {
5473
 
-       u32 vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
5474
 
        u32 exit_reason = vmcs_read32(VM_EXIT_REASON);
5475
 
        struct vcpu_vmx *vmx = to_vmx(vcpu);
5476
 
+       u32 vectoring_info = vmx->idt_vectoring_info;
5477
 
 
5478
 
        if (unlikely(vmx->fail)) {
5479
 
                kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
5480
 
@@ -2104,8 +2201,8 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
5481
 
                return 0;
5482
 
        }
5483
 
 
5484
 
-       if ( (vectoring_info & VECTORING_INFO_VALID_MASK) &&
5485
 
-                               exit_reason != EXIT_REASON_EXCEPTION_NMI )
5486
 
+       if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
5487
 
+                               exit_reason != EXIT_REASON_EXCEPTION_NMI)
5488
 
                printk(KERN_WARNING "%s: unexpected, valid vectoring info and "
5489
 
                       "exit reason is 0x%x\n", __FUNCTION__, exit_reason);
5490
 
        if (exit_reason < kvm_vmx_max_exit_handlers
5491
 
@@ -2150,26 +2247,38 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
5492
 
 
5493
 
 static void vmx_intr_assist(struct kvm_vcpu *vcpu)
5494
 
 {
5495
 
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
5496
 
        u32 idtv_info_field, intr_info_field;
5497
 
        int has_ext_irq, interrupt_window_open;
5498
 
        int vector;
5499
 
 
5500
 
-       kvm_inject_pending_timer_irqs(vcpu);
5501
 
        update_tpr_threshold(vcpu);
5502
 
 
5503
 
        has_ext_irq = kvm_cpu_has_interrupt(vcpu);
5504
 
        intr_info_field = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD);
5505
 
-       idtv_info_field = vmcs_read32(IDT_VECTORING_INFO_FIELD);
5506
 
+       idtv_info_field = vmx->idt_vectoring_info;
5507
 
        if (intr_info_field & INTR_INFO_VALID_MASK) {
5508
 
                if (idtv_info_field & INTR_INFO_VALID_MASK) {
5509
 
                        /* TODO: fault when IDT_Vectoring */
5510
 
-                       printk(KERN_ERR "Fault when IDT_Vectoring\n");
5511
 
+                       if (printk_ratelimit())
5512
 
+                               printk(KERN_ERR "Fault when IDT_Vectoring\n");
5513
 
                }
5514
 
                if (has_ext_irq)
5515
 
                        enable_irq_window(vcpu);
5516
 
                return;
5517
 
        }
5518
 
        if (unlikely(idtv_info_field & INTR_INFO_VALID_MASK)) {
5519
 
+               if ((idtv_info_field & VECTORING_INFO_TYPE_MASK)
5520
 
+                   == INTR_TYPE_EXT_INTR
5521
 
+                   && vcpu->arch.rmode.active) {
5522
 
+                       u8 vect = idtv_info_field & VECTORING_INFO_VECTOR_MASK;
5523
 
+
5524
 
+                       vmx_inject_irq(vcpu, vect);
5525
 
+                       if (unlikely(has_ext_irq))
5526
 
+                               enable_irq_window(vcpu);
5527
 
+                       return;
5528
 
+               }
5529
 
+
5530
 
                vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
5531
 
                vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
5532
 
                                vmcs_read32(VM_EXIT_INSTRUCTION_LEN));
5533
 
@@ -2194,6 +2303,29 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
5534
 
                enable_irq_window(vcpu);
5535
 
 }
5536
 
 
5537
 
+/*
5538
 
+ * Failure to inject an interrupt should give us the information
5539
 
+ * in IDT_VECTORING_INFO_FIELD.  However, if the failure occurs
5540
 
+ * when fetching the interrupt redirection bitmap in the real-mode
5541
 
+ * tss, this doesn't happen.  So we do it ourselves.
5542
 
+ */
5543
 
+static void fixup_rmode_irq(struct vcpu_vmx *vmx)
5544
 
+{
5545
 
+       vmx->rmode.irq.pending = 0;
5546
 
+       if (vmcs_readl(GUEST_RIP) + 1 != vmx->rmode.irq.rip)
5547
 
+               return;
5548
 
+       vmcs_writel(GUEST_RIP, vmx->rmode.irq.rip);
5549
 
+       if (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) {
5550
 
+               vmx->idt_vectoring_info &= ~VECTORING_INFO_TYPE_MASK;
5551
 
+               vmx->idt_vectoring_info |= INTR_TYPE_EXT_INTR;
5552
 
+               return;
5553
 
+       }
5554
 
+       vmx->idt_vectoring_info =
5555
 
+               VECTORING_INFO_VALID_MASK
5556
 
+               | INTR_TYPE_EXT_INTR
5557
 
+               | vmx->rmode.irq.vector;
5558
 
+}
5559
 
+
5560
 
 static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
5561
 
 {
5562
 
        struct vcpu_vmx *vmx = to_vmx(vcpu);
5563
 
@@ -2204,50 +2336,47 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
5564
 
         */
5565
 
        vmcs_writel(HOST_CR0, read_cr0());
5566
 
 
5567
 
-       asm (
5568
 
+       asm(
5569
 
                /* Store host registers */
5570
 
 #ifdef CONFIG_X86_64
5571
 
-               "push %%rax; push %%rbx; push %%rdx;"
5572
 
-               "push %%rsi; push %%rdi; push %%rbp;"
5573
 
-               "push %%r8;  push %%r9;  push %%r10; push %%r11;"
5574
 
-               "push %%r12; push %%r13; push %%r14; push %%r15;"
5575
 
+               "push %%rdx; push %%rbp;"
5576
 
                "push %%rcx \n\t"
5577
 
-               ASM_VMX_VMWRITE_RSP_RDX "\n\t"
5578
 
 #else
5579
 
-               "pusha; push %%ecx \n\t"
5580
 
-               ASM_VMX_VMWRITE_RSP_RDX "\n\t"
5581
 
+               "push %%edx; push %%ebp;"
5582
 
+               "push %%ecx \n\t"
5583
 
 #endif
5584
 
+               ASM_VMX_VMWRITE_RSP_RDX "\n\t"
5585
 
                /* Check if vmlaunch of vmresume is needed */
5586
 
-               "cmp $0, %1 \n\t"
5587
 
+               "cmpl $0, %c[launched](%0) \n\t"
5588
 
                /* Load guest registers.  Don't clobber flags. */
5589
 
 #ifdef CONFIG_X86_64
5590
 
-               "mov %c[cr2](%3), %%rax \n\t"
5591
 
+               "mov %c[cr2](%0), %%rax \n\t"
5592
 
                "mov %%rax, %%cr2 \n\t"
5593
 
-               "mov %c[rax](%3), %%rax \n\t"
5594
 
-               "mov %c[rbx](%3), %%rbx \n\t"
5595
 
-               "mov %c[rdx](%3), %%rdx \n\t"
5596
 
-               "mov %c[rsi](%3), %%rsi \n\t"
5597
 
-               "mov %c[rdi](%3), %%rdi \n\t"
5598
 
-               "mov %c[rbp](%3), %%rbp \n\t"
5599
 
-               "mov %c[r8](%3),  %%r8  \n\t"
5600
 
-               "mov %c[r9](%3),  %%r9  \n\t"
5601
 
-               "mov %c[r10](%3), %%r10 \n\t"
5602
 
-               "mov %c[r11](%3), %%r11 \n\t"
5603
 
-               "mov %c[r12](%3), %%r12 \n\t"
5604
 
-               "mov %c[r13](%3), %%r13 \n\t"
5605
 
-               "mov %c[r14](%3), %%r14 \n\t"
5606
 
-               "mov %c[r15](%3), %%r15 \n\t"
5607
 
-               "mov %c[rcx](%3), %%rcx \n\t" /* kills %3 (rcx) */
5608
 
+               "mov %c[rax](%0), %%rax \n\t"
5609
 
+               "mov %c[rbx](%0), %%rbx \n\t"
5610
 
+               "mov %c[rdx](%0), %%rdx \n\t"
5611
 
+               "mov %c[rsi](%0), %%rsi \n\t"
5612
 
+               "mov %c[rdi](%0), %%rdi \n\t"
5613
 
+               "mov %c[rbp](%0), %%rbp \n\t"
5614
 
+               "mov %c[r8](%0),  %%r8  \n\t"
5615
 
+               "mov %c[r9](%0),  %%r9  \n\t"
5616
 
+               "mov %c[r10](%0), %%r10 \n\t"
5617
 
+               "mov %c[r11](%0), %%r11 \n\t"
5618
 
+               "mov %c[r12](%0), %%r12 \n\t"
5619
 
+               "mov %c[r13](%0), %%r13 \n\t"
5620
 
+               "mov %c[r14](%0), %%r14 \n\t"
5621
 
+               "mov %c[r15](%0), %%r15 \n\t"
5622
 
+               "mov %c[rcx](%0), %%rcx \n\t" /* kills %0 (rcx) */
5623
 
 #else
5624
 
-               "mov %c[cr2](%3), %%eax \n\t"
5625
 
+               "mov %c[cr2](%0), %%eax \n\t"
5626
 
                "mov %%eax,   %%cr2 \n\t"
5627
 
-               "mov %c[rax](%3), %%eax \n\t"
5628
 
-               "mov %c[rbx](%3), %%ebx \n\t"
5629
 
-               "mov %c[rdx](%3), %%edx \n\t"
5630
 
-               "mov %c[rsi](%3), %%esi \n\t"
5631
 
-               "mov %c[rdi](%3), %%edi \n\t"
5632
 
-               "mov %c[rbp](%3), %%ebp \n\t"
5633
 
-               "mov %c[rcx](%3), %%ecx \n\t" /* kills %3 (ecx) */
5634
 
+               "mov %c[rax](%0), %%eax \n\t"
5635
 
+               "mov %c[rbx](%0), %%ebx \n\t"
5636
 
+               "mov %c[rdx](%0), %%edx \n\t"
5637
 
+               "mov %c[rsi](%0), %%esi \n\t"
5638
 
+               "mov %c[rdi](%0), %%edi \n\t"
5639
 
+               "mov %c[rbp](%0), %%ebp \n\t"
5640
 
+               "mov %c[rcx](%0), %%ecx \n\t" /* kills %0 (ecx) */
5641
 
 #endif
5642
 
                /* Enter guest mode */
5643
 
                "jne .Llaunched \n\t"
5644
 
@@ -2257,72 +2386,79 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
5645
 
                ".Lkvm_vmx_return: "
5646
 
                /* Save guest registers, load host registers, keep flags */
5647
 
 #ifdef CONFIG_X86_64
5648
 
-               "xchg %3,     (%%rsp) \n\t"
5649
 
-               "mov %%rax, %c[rax](%3) \n\t"
5650
 
-               "mov %%rbx, %c[rbx](%3) \n\t"
5651
 
-               "pushq (%%rsp); popq %c[rcx](%3) \n\t"
5652
 
-               "mov %%rdx, %c[rdx](%3) \n\t"
5653
 
-               "mov %%rsi, %c[rsi](%3) \n\t"
5654
 
-               "mov %%rdi, %c[rdi](%3) \n\t"
5655
 
-               "mov %%rbp, %c[rbp](%3) \n\t"
5656
 
-               "mov %%r8,  %c[r8](%3) \n\t"
5657
 
-               "mov %%r9,  %c[r9](%3) \n\t"
5658
 
-               "mov %%r10, %c[r10](%3) \n\t"
5659
 
-               "mov %%r11, %c[r11](%3) \n\t"
5660
 
-               "mov %%r12, %c[r12](%3) \n\t"
5661
 
-               "mov %%r13, %c[r13](%3) \n\t"
5662
 
-               "mov %%r14, %c[r14](%3) \n\t"
5663
 
-               "mov %%r15, %c[r15](%3) \n\t"
5664
 
+               "xchg %0,     (%%rsp) \n\t"
5665
 
+               "mov %%rax, %c[rax](%0) \n\t"
5666
 
+               "mov %%rbx, %c[rbx](%0) \n\t"
5667
 
+               "pushq (%%rsp); popq %c[rcx](%0) \n\t"
5668
 
+               "mov %%rdx, %c[rdx](%0) \n\t"
5669
 
+               "mov %%rsi, %c[rsi](%0) \n\t"
5670
 
+               "mov %%rdi, %c[rdi](%0) \n\t"
5671
 
+               "mov %%rbp, %c[rbp](%0) \n\t"
5672
 
+               "mov %%r8,  %c[r8](%0) \n\t"
5673
 
+               "mov %%r9,  %c[r9](%0) \n\t"
5674
 
+               "mov %%r10, %c[r10](%0) \n\t"
5675
 
+               "mov %%r11, %c[r11](%0) \n\t"
5676
 
+               "mov %%r12, %c[r12](%0) \n\t"
5677
 
+               "mov %%r13, %c[r13](%0) \n\t"
5678
 
+               "mov %%r14, %c[r14](%0) \n\t"
5679
 
+               "mov %%r15, %c[r15](%0) \n\t"
5680
 
                "mov %%cr2, %%rax   \n\t"
5681
 
-               "mov %%rax, %c[cr2](%3) \n\t"
5682
 
-               "mov (%%rsp), %3 \n\t"
5683
 
+               "mov %%rax, %c[cr2](%0) \n\t"
5684
 
 
5685
 
-               "pop  %%rcx; pop  %%r15; pop  %%r14; pop  %%r13; pop  %%r12;"
5686
 
-               "pop  %%r11; pop  %%r10; pop  %%r9;  pop  %%r8;"
5687
 
-               "pop  %%rbp; pop  %%rdi; pop  %%rsi;"
5688
 
-               "pop  %%rdx; pop  %%rbx; pop  %%rax \n\t"
5689
 
+               "pop  %%rbp; pop  %%rbp; pop  %%rdx \n\t"
5690
 
 #else
5691
 
-               "xchg %3, (%%esp) \n\t"
5692
 
-               "mov %%eax, %c[rax](%3) \n\t"
5693
 
-               "mov %%ebx, %c[rbx](%3) \n\t"
5694
 
-               "pushl (%%esp); popl %c[rcx](%3) \n\t"
5695
 
-               "mov %%edx, %c[rdx](%3) \n\t"
5696
 
-               "mov %%esi, %c[rsi](%3) \n\t"
5697
 
-               "mov %%edi, %c[rdi](%3) \n\t"
5698
 
-               "mov %%ebp, %c[rbp](%3) \n\t"
5699
 
+               "xchg %0, (%%esp) \n\t"
5700
 
+               "mov %%eax, %c[rax](%0) \n\t"
5701
 
+               "mov %%ebx, %c[rbx](%0) \n\t"
5702
 
+               "pushl (%%esp); popl %c[rcx](%0) \n\t"
5703
 
+               "mov %%edx, %c[rdx](%0) \n\t"
5704
 
+               "mov %%esi, %c[rsi](%0) \n\t"
5705
 
+               "mov %%edi, %c[rdi](%0) \n\t"
5706
 
+               "mov %%ebp, %c[rbp](%0) \n\t"
5707
 
                "mov %%cr2, %%eax  \n\t"
5708
 
-               "mov %%eax, %c[cr2](%3) \n\t"
5709
 
-               "mov (%%esp), %3 \n\t"
5710
 
+               "mov %%eax, %c[cr2](%0) \n\t"
5711
 
 
5712
 
-               "pop %%ecx; popa \n\t"
5713
 
+               "pop %%ebp; pop %%ebp; pop %%edx \n\t"
5714
 
 #endif
5715
 
-               "setbe %0 \n\t"
5716
 
-             : "=q" (vmx->fail)
5717
 
-             : "r"(vmx->launched), "d"((unsigned long)HOST_RSP),
5718
 
-               "c"(vcpu),
5719
 
-               [rax]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RAX])),
5720
 
-               [rbx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RBX])),
5721
 
-               [rcx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RCX])),
5722
 
-               [rdx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RDX])),
5723
 
-               [rsi]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RSI])),
5724
 
-               [rdi]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RDI])),
5725
 
-               [rbp]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RBP])),
5726
 
+               "setbe %c[fail](%0) \n\t"
5727
 
+             : : "c"(vmx), "d"((unsigned long)HOST_RSP),
5728
 
+               [launched]"i"(offsetof(struct vcpu_vmx, launched)),
5729
 
+               [fail]"i"(offsetof(struct vcpu_vmx, fail)),
5730
 
+               [rax]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RAX])),
5731
 
+               [rbx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBX])),
5732
 
+               [rcx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RCX])),
5733
 
+               [rdx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDX])),
5734
 
+               [rsi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RSI])),
5735
 
+               [rdi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDI])),
5736
 
+               [rbp]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBP])),
5737
 
 #ifdef CONFIG_X86_64
5738
 
-               [r8 ]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R8 ])),
5739
 
-               [r9 ]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R9 ])),
5740
 
-               [r10]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R10])),
5741
 
-               [r11]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R11])),
5742
 
-               [r12]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R12])),
5743
 
-               [r13]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R13])),
5744
 
-               [r14]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R14])),
5745
 
-               [r15]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R15])),
5746
 
+               [r8]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R8])),
5747
 
+               [r9]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R9])),
5748
 
+               [r10]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R10])),
5749
 
+               [r11]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R11])),
5750
 
+               [r12]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R12])),
5751
 
+               [r13]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R13])),
5752
 
+               [r14]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R14])),
5753
 
+               [r15]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R15])),
5754
 
 #endif
5755
 
-               [cr2]"i"(offsetof(struct kvm_vcpu, cr2))
5756
 
-             : "cc", "memory" );
5757
 
+               [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2))
5758
 
+             : "cc", "memory"
5759
 
+#ifdef CONFIG_X86_64
5760
 
+               , "rbx", "rdi", "rsi"
5761
 
+               , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
5762
 
+#else
5763
 
+               , "ebx", "edi", "rsi"
5764
 
+#endif
5765
 
+             );
5766
 
+
5767
 
+       vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
5768
 
+       if (vmx->rmode.irq.pending)
5769
 
+               fixup_rmode_irq(vmx);
5770
 
 
5771
 
-       vcpu->interrupt_window_open = (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0;
5772
 
+       vcpu->arch.interrupt_window_open =
5773
 
+               (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0;
5774
 
 
5775
 
-       asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
5776
 
+       asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
5777
 
        vmx->launched = 1;
5778
 
 
5779
 
        intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
5780
 
@@ -2332,36 +2468,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
5781
 
                asm("int $2");
5782
 
 }
5783
 
 
5784
 
-static void vmx_inject_page_fault(struct kvm_vcpu *vcpu,
5785
 
-                                 unsigned long addr,
5786
 
-                                 u32 err_code)
5787
 
-{
5788
 
-       u32 vect_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
5789
 
-
5790
 
-       ++vcpu->stat.pf_guest;
5791
 
-
5792
 
-       if (is_page_fault(vect_info)) {
5793
 
-               printk(KERN_DEBUG "inject_page_fault: "
5794
 
-                      "double fault 0x%lx @ 0x%lx\n",
5795
 
-                      addr, vmcs_readl(GUEST_RIP));
5796
 
-               vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, 0);
5797
 
-               vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
5798
 
-                            DF_VECTOR |
5799
 
-                            INTR_TYPE_EXCEPTION |
5800
 
-                            INTR_INFO_DELIEVER_CODE_MASK |
5801
 
-                            INTR_INFO_VALID_MASK);
5802
 
-               return;
5803
 
-       }
5804
 
-       vcpu->cr2 = addr;
5805
 
-       vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, err_code);
5806
 
-       vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
5807
 
-                    PF_VECTOR |
5808
 
-                    INTR_TYPE_EXCEPTION |
5809
 
-                    INTR_INFO_DELIEVER_CODE_MASK |
5810
 
-                    INTR_INFO_VALID_MASK);
5811
 
-
5812
 
-}
5813
 
-
5814
 
 static void vmx_free_vmcs(struct kvm_vcpu *vcpu)
5815
 
 {
5816
 
        struct vcpu_vmx *vmx = to_vmx(vcpu);
5817
 
@@ -2397,12 +2503,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
5818
 
        if (err)
5819
 
                goto free_vcpu;
5820
 
 
5821
 
-       if (irqchip_in_kernel(kvm)) {
5822
 
-               err = kvm_create_lapic(&vmx->vcpu);
5823
 
-               if (err < 0)
5824
 
-                       goto free_vcpu;
5825
 
-       }
5826
 
-
5827
 
        vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
5828
 
        if (!vmx->guest_msrs) {
5829
 
                err = -ENOMEM;
5830
 
@@ -2499,9 +2599,6 @@ static struct kvm_x86_ops vmx_x86_ops = {
5831
 
        .set_rflags = vmx_set_rflags,
5832
 
 
5833
 
        .tlb_flush = vmx_flush_tlb,
5834
 
-       .inject_page_fault = vmx_inject_page_fault,
5835
 
-
5836
 
-       .inject_gp = vmx_inject_gp,
5837
 
 
5838
 
        .run = vmx_vcpu_run,
5839
 
        .handle_exit = kvm_handle_exit,
5840
 
@@ -2509,8 +2606,12 @@ static struct kvm_x86_ops vmx_x86_ops = {
5841
 
        .patch_hypercall = vmx_patch_hypercall,
5842
 
        .get_irq = vmx_get_irq,
5843
 
        .set_irq = vmx_inject_irq,
5844
 
+       .queue_exception = vmx_queue_exception,
5845
 
+       .exception_injected = vmx_exception_injected,
5846
 
        .inject_pending_irq = vmx_intr_assist,
5847
 
        .inject_pending_vectors = do_interrupt_requests,
5848
 
+
5849
 
+       .set_tss_addr = vmx_set_tss_addr,
5850
 
 };
5851
 
 
5852
 
 static int __init vmx_init(void)
5853
 
@@ -2541,10 +2642,13 @@ static int __init vmx_init(void)
5854
 
        memset(iova, 0xff, PAGE_SIZE);
5855
 
        kunmap(vmx_io_bitmap_b);
5856
 
 
5857
 
-       r = kvm_init_x86(&vmx_x86_ops, sizeof(struct vcpu_vmx), THIS_MODULE);
5858
 
+       r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), THIS_MODULE);
5859
 
        if (r)
5860
 
                goto out1;
5861
 
 
5862
 
+       if (bypass_guest_pf)
5863
 
+               kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull);
5864
 
+
5865
 
        return 0;
5866
 
 
5867
 
 out1:
5868
 
@@ -2559,7 +2663,7 @@ static void __exit vmx_exit(void)
5869
 
        __free_page(vmx_io_bitmap_b);
5870
 
        __free_page(vmx_io_bitmap_a);
5871
 
 
5872
 
-       kvm_exit_x86();
5873
 
+       kvm_exit();
5874
 
 }
5875
 
 
5876
 
 module_init(vmx_init)
5877
 
diff --git a/drivers/kvm/vmx.h b/arch/x86/kvm/vmx.h
5878
 
similarity index 96%
5879
 
rename from drivers/kvm/vmx.h
5880
 
rename to arch/x86/kvm/vmx.h
5881
 
index fd4e146..d52ae8d 100644
5882
 
--- a/drivers/kvm/vmx.h
5883
 
+++ b/arch/x86/kvm/vmx.h
5884
 
@@ -25,6 +25,9 @@
5885
 
  *
5886
 
  */
5887
 
 
5888
 
+/*
5889
 
+ * Definitions of Primary Processor-Based VM-Execution Controls.
5890
 
+ */
5891
 
 #define CPU_BASED_VIRTUAL_INTR_PENDING          0x00000004
5892
 
 #define CPU_BASED_USE_TSC_OFFSETING             0x00000008
5893
 
 #define CPU_BASED_HLT_EXITING                   0x00000080
5894
 
@@ -42,6 +45,12 @@
5895
 
 #define CPU_BASED_MONITOR_EXITING               0x20000000
5896
 
 #define CPU_BASED_PAUSE_EXITING                 0x40000000
5897
 
 #define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS   0x80000000
5898
 
+/*
5899
 
+ * Definitions of Secondary Processor-Based VM-Execution Controls.
5900
 
+ */
5901
 
+#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
5902
 
+#define SECONDARY_EXEC_WBINVD_EXITING          0x00000040
5903
 
+
5904
 
 
5905
 
 #define PIN_BASED_EXT_INTR_MASK                 0x00000001
5906
 
 #define PIN_BASED_NMI_EXITING                   0x00000008
5907
 
@@ -54,8 +63,6 @@
5908
 
 #define VM_ENTRY_SMM                            0x00000400
5909
 
 #define VM_ENTRY_DEACT_DUAL_MONITOR             0x00000800
5910
 
 
5911
 
-#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
5912
 
-
5913
 
 /* VMCS Encodings */
5914
 
 enum vmcs_field {
5915
 
        GUEST_ES_SELECTOR               = 0x00000800,
5916
 
@@ -89,6 +96,8 @@ enum vmcs_field {
5917
 
        TSC_OFFSET_HIGH                 = 0x00002011,
5918
 
        VIRTUAL_APIC_PAGE_ADDR          = 0x00002012,
5919
 
        VIRTUAL_APIC_PAGE_ADDR_HIGH     = 0x00002013,
5920
 
+       APIC_ACCESS_ADDR                = 0x00002014,
5921
 
+       APIC_ACCESS_ADDR_HIGH           = 0x00002015,
5922
 
        VMCS_LINK_POINTER               = 0x00002800,
5923
 
        VMCS_LINK_POINTER_HIGH          = 0x00002801,
5924
 
        GUEST_IA32_DEBUGCTL             = 0x00002802,
5925
 
@@ -214,6 +223,8 @@ enum vmcs_field {
5926
 
 #define EXIT_REASON_MSR_WRITE           32
5927
 
 #define EXIT_REASON_MWAIT_INSTRUCTION   36
5928
 
 #define EXIT_REASON_TPR_BELOW_THRESHOLD 43
5929
 
+#define EXIT_REASON_APIC_ACCESS         44
5930
 
+#define EXIT_REASON_WBINVD             54
5931
 
 
5932
 
 /*
5933
 
  * Interruption-information format
5934
 
@@ -230,13 +241,14 @@ enum vmcs_field {
5935
 
 
5936
 
 #define INTR_TYPE_EXT_INTR              (0 << 8) /* external interrupt */
5937
 
 #define INTR_TYPE_EXCEPTION             (3 << 8) /* processor exception */
5938
 
+#define INTR_TYPE_SOFT_INTR             (4 << 8) /* software interrupt */
5939
 
 
5940
 
 /*
5941
 
  * Exit Qualifications for MOV for Control Register Access
5942
 
  */
5943
 
-#define CONTROL_REG_ACCESS_NUM          0x7     /* 2:0, number of control register */
5944
 
+#define CONTROL_REG_ACCESS_NUM          0x7     /* 2:0, number of control reg.*/
5945
 
 #define CONTROL_REG_ACCESS_TYPE         0x30    /* 5:4, access type */
5946
 
-#define CONTROL_REG_ACCESS_REG          0xf00   /* 10:8, general purpose register */
5947
 
+#define CONTROL_REG_ACCESS_REG          0xf00   /* 10:8, general purpose reg. */
5948
 
 #define LMSW_SOURCE_DATA_SHIFT 16
5949
 
 #define LMSW_SOURCE_DATA  (0xFFFF << LMSW_SOURCE_DATA_SHIFT) /* 16:31 lmsw source */
5950
 
 #define REG_EAX                         (0 << 8)
5951
 
@@ -259,11 +271,11 @@ enum vmcs_field {
5952
 
 /*
5953
 
  * Exit Qualifications for MOV for Debug Register Access
5954
 
  */
5955
 
-#define DEBUG_REG_ACCESS_NUM            0x7     /* 2:0, number of debug register */
5956
 
+#define DEBUG_REG_ACCESS_NUM            0x7     /* 2:0, number of debug reg. */
5957
 
 #define DEBUG_REG_ACCESS_TYPE           0x10    /* 4, direction of access */
5958
 
 #define TYPE_MOV_TO_DR                  (0 << 4)
5959
 
 #define TYPE_MOV_FROM_DR                (1 << 4)
5960
 
-#define DEBUG_REG_ACCESS_REG            0xf00   /* 11:8, general purpose register */
5961
 
+#define DEBUG_REG_ACCESS_REG            0xf00   /* 11:8, general purpose reg. */
5962
 
 
5963
 
 
5964
 
 /* segment AR */
5965
 
@@ -307,4 +319,6 @@ enum vmcs_field {
5966
 
 #define MSR_IA32_FEATURE_CONTROL_LOCKED         0x1
5967
 
 #define MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED  0x4
5968
 
 
5969
 
+#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT       9
5970
 
+
5971
 
 #endif
5972
 
diff --git a/drivers/kvm/kvm_main.c b/arch/x86/kvm/x86.c
5973
 
similarity index 52%
5974
 
rename from drivers/kvm/kvm_main.c
5975
 
rename to arch/x86/kvm/x86.c
5976
 
index 47c10b8..4b26270 100644
5977
 
--- a/drivers/kvm/kvm_main.c
5978
 
+++ b/arch/x86/kvm/x86.c
5979
 
@@ -1,8 +1,7 @@
5980
 
 /*
5981
 
  * Kernel-based Virtual Machine driver for Linux
5982
 
  *
5983
 
- * This module enables machines with Intel VT-x extensions to run virtual
5984
 
- * machines without emulation or binary translation.
5985
 
+ * derived from drivers/kvm/kvm_main.c
5986
 
  *
5987
 
  * Copyright (C) 2006 Qumranet, Inc.
5988
 
  *
5989
 
@@ -15,80 +14,22 @@
5990
 
  *
5991
 
  */
5992
 
 
5993
 
-#include "kvm.h"
5994
 
-#include "x86_emulate.h"
5995
 
+#include <linux/kvm_host.h>
5996
 
 #include "segment_descriptor.h"
5997
 
 #include "irq.h"
5998
 
+#include "mmu.h"
5999
 
 
6000
 
 #include <linux/kvm.h>
6001
 
-#include <linux/module.h>
6002
 
-#include <linux/errno.h>
6003
 
-#include <linux/percpu.h>
6004
 
-#include <linux/gfp.h>
6005
 
-#include <linux/mm.h>
6006
 
-#include <linux/miscdevice.h>
6007
 
+#include <linux/fs.h>
6008
 
 #include <linux/vmalloc.h>
6009
 
-#include <linux/reboot.h>
6010
 
-#include <linux/debugfs.h>
6011
 
+#include <linux/module.h>
6012
 
+#include <linux/mman.h>
6013
 
 #include <linux/highmem.h>
6014
 
-#include <linux/file.h>
6015
 
-#include <linux/sysdev.h>
6016
 
-#include <linux/cpu.h>
6017
 
-#include <linux/sched.h>
6018
 
-#include <linux/cpumask.h>
6019
 
-#include <linux/smp.h>
6020
 
-#include <linux/anon_inodes.h>
6021
 
-#include <linux/profile.h>
6022
 
-
6023
 
-#include <asm/processor.h>
6024
 
-#include <asm/msr.h>
6025
 
-#include <asm/io.h>
6026
 
-#include <asm/uaccess.h>
6027
 
-#include <asm/desc.h>
6028
 
-
6029
 
-MODULE_AUTHOR("Qumranet");
6030
 
-MODULE_LICENSE("GPL");
6031
 
-
6032
 
-static DEFINE_SPINLOCK(kvm_lock);
6033
 
-static LIST_HEAD(vm_list);
6034
 
-
6035
 
-static cpumask_t cpus_hardware_enabled;
6036
 
-
6037
 
-struct kvm_x86_ops *kvm_x86_ops;
6038
 
-struct kmem_cache *kvm_vcpu_cache;
6039
 
-EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
6040
 
-
6041
 
-static __read_mostly struct preempt_ops kvm_preempt_ops;
6042
 
-
6043
 
-#define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x)
6044
 
-
6045
 
-static struct kvm_stats_debugfs_item {
6046
 
-       const char *name;
6047
 
-       int offset;
6048
 
-       struct dentry *dentry;
6049
 
-} debugfs_entries[] = {
6050
 
-       { "pf_fixed", STAT_OFFSET(pf_fixed) },
6051
 
-       { "pf_guest", STAT_OFFSET(pf_guest) },
6052
 
-       { "tlb_flush", STAT_OFFSET(tlb_flush) },
6053
 
-       { "invlpg", STAT_OFFSET(invlpg) },
6054
 
-       { "exits", STAT_OFFSET(exits) },
6055
 
-       { "io_exits", STAT_OFFSET(io_exits) },
6056
 
-       { "mmio_exits", STAT_OFFSET(mmio_exits) },
6057
 
-       { "signal_exits", STAT_OFFSET(signal_exits) },
6058
 
-       { "irq_window", STAT_OFFSET(irq_window_exits) },
6059
 
-       { "halt_exits", STAT_OFFSET(halt_exits) },
6060
 
-       { "halt_wakeup", STAT_OFFSET(halt_wakeup) },
6061
 
-       { "request_irq", STAT_OFFSET(request_irq_exits) },
6062
 
-       { "irq_exits", STAT_OFFSET(irq_exits) },
6063
 
-       { "light_exits", STAT_OFFSET(light_exits) },
6064
 
-       { "efer_reload", STAT_OFFSET(efer_reload) },
6065
 
-       { NULL }
6066
 
-};
6067
 
 
6068
 
-static struct dentry *debugfs_dir;
6069
 
+#include <asm/uaccess.h>
6070
 
+#include <asm/msr.h>
6071
 
 
6072
 
 #define MAX_IO_MSRS 256
6073
 
-
6074
 
 #define CR0_RESERVED_BITS                                              \
6075
 
        (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
6076
 
                          | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
6077
 
@@ -102,317 +43,150 @@ static struct dentry *debugfs_dir;
6078
 
 #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
6079
 
 #define EFER_RESERVED_BITS 0xfffffffffffff2fe
6080
 
 
6081
 
-#ifdef CONFIG_X86_64
6082
 
-// LDT or TSS descriptor in the GDT. 16 bytes.
6083
 
-struct segment_descriptor_64 {
6084
 
-       struct segment_descriptor s;
6085
 
-       u32 base_higher;
6086
 
-       u32 pad_zero;
6087
 
-};
6088
 
+#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
6089
 
+#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
6090
 
 
6091
 
-#endif
6092
 
+struct kvm_x86_ops *kvm_x86_ops;
6093
 
+
6094
 
+struct kvm_stats_debugfs_item debugfs_entries[] = {
6095
 
+       { "pf_fixed", VCPU_STAT(pf_fixed) },
6096
 
+       { "pf_guest", VCPU_STAT(pf_guest) },
6097
 
+       { "tlb_flush", VCPU_STAT(tlb_flush) },
6098
 
+       { "invlpg", VCPU_STAT(invlpg) },
6099
 
+       { "exits", VCPU_STAT(exits) },
6100
 
+       { "io_exits", VCPU_STAT(io_exits) },
6101
 
+       { "mmio_exits", VCPU_STAT(mmio_exits) },
6102
 
+       { "signal_exits", VCPU_STAT(signal_exits) },
6103
 
+       { "irq_window", VCPU_STAT(irq_window_exits) },
6104
 
+       { "halt_exits", VCPU_STAT(halt_exits) },
6105
 
+       { "halt_wakeup", VCPU_STAT(halt_wakeup) },
6106
 
+       { "request_irq", VCPU_STAT(request_irq_exits) },
6107
 
+       { "irq_exits", VCPU_STAT(irq_exits) },
6108
 
+       { "host_state_reload", VCPU_STAT(host_state_reload) },
6109
 
+       { "efer_reload", VCPU_STAT(efer_reload) },
6110
 
+       { "fpu_reload", VCPU_STAT(fpu_reload) },
6111
 
+       { "insn_emulation", VCPU_STAT(insn_emulation) },
6112
 
+       { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
6113
 
+       { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
6114
 
+       { "mmu_pte_write", VM_STAT(mmu_pte_write) },
6115
 
+       { "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
6116
 
+       { "mmu_pde_zapped", VM_STAT(mmu_pde_zapped) },
6117
 
+       { "mmu_flooded", VM_STAT(mmu_flooded) },
6118
 
+       { "mmu_recycled", VM_STAT(mmu_recycled) },
6119
 
+       { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
6120
 
+       { NULL }
6121
 
+};
6122
 
 
6123
 
-static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
6124
 
-                          unsigned long arg);
6125
 
 
6126
 
 unsigned long segment_base(u16 selector)
6127
 
 {
6128
 
        struct descriptor_table gdt;
6129
 
        struct segment_descriptor *d;
6130
 
        unsigned long table_base;
6131
 
-       typedef unsigned long ul;
6132
 
        unsigned long v;
6133
 
 
6134
 
        if (selector == 0)
6135
 
                return 0;
6136
 
 
6137
 
-       asm ("sgdt %0" : "=m"(gdt));
6138
 
+       asm("sgdt %0" : "=m"(gdt));
6139
 
        table_base = gdt.base;
6140
 
 
6141
 
        if (selector & 4) {           /* from ldt */
6142
 
                u16 ldt_selector;
6143
 
 
6144
 
-               asm ("sldt %0" : "=g"(ldt_selector));
6145
 
+               asm("sldt %0" : "=g"(ldt_selector));
6146
 
                table_base = segment_base(ldt_selector);
6147
 
        }
6148
 
        d = (struct segment_descriptor *)(table_base + (selector & ~7));
6149
 
-       v = d->base_low | ((ul)d->base_mid << 16) | ((ul)d->base_high << 24);
6150
 
+       v = d->base_low | ((unsigned long)d->base_mid << 16) |
6151
 
+               ((unsigned long)d->base_high << 24);
6152
 
 #ifdef CONFIG_X86_64
6153
 
-       if (d->system == 0
6154
 
-           && (d->type == 2 || d->type == 9 || d->type == 11))
6155
 
-               v |= ((ul)((struct segment_descriptor_64 *)d)->base_higher) << 32;
6156
 
+       if (d->system == 0 && (d->type == 2 || d->type == 9 || d->type == 11))
6157
 
+               v |= ((unsigned long) \
6158
 
+                     ((struct segment_descriptor_64 *)d)->base_higher) << 32;
6159
 
 #endif
6160
 
        return v;
6161
 
 }
6162
 
 EXPORT_SYMBOL_GPL(segment_base);
6163
 
 
6164
 
-static inline int valid_vcpu(int n)
6165
 
-{
6166
 
-       return likely(n >= 0 && n < KVM_MAX_VCPUS);
6167
 
-}
6168
 
-
6169
 
-void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
6170
 
-{
6171
 
-       if (!vcpu->fpu_active || vcpu->guest_fpu_loaded)
6172
 
-               return;
6173
 
-
6174
 
-       vcpu->guest_fpu_loaded = 1;
6175
 
-       fx_save(&vcpu->host_fx_image);
6176
 
-       fx_restore(&vcpu->guest_fx_image);
6177
 
-}
6178
 
-EXPORT_SYMBOL_GPL(kvm_load_guest_fpu);
6179
 
-
6180
 
-void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
6181
 
-{
6182
 
-       if (!vcpu->guest_fpu_loaded)
6183
 
-               return;
6184
 
-
6185
 
-       vcpu->guest_fpu_loaded = 0;
6186
 
-       fx_save(&vcpu->guest_fx_image);
6187
 
-       fx_restore(&vcpu->host_fx_image);
6188
 
-}
6189
 
-EXPORT_SYMBOL_GPL(kvm_put_guest_fpu);
6190
 
-
6191
 
-/*
6192
 
- * Switches to specified vcpu, until a matching vcpu_put()
6193
 
- */
6194
 
-static void vcpu_load(struct kvm_vcpu *vcpu)
6195
 
-{
6196
 
-       int cpu;
6197
 
-
6198
 
-       mutex_lock(&vcpu->mutex);
6199
 
-       cpu = get_cpu();
6200
 
-       preempt_notifier_register(&vcpu->preempt_notifier);
6201
 
-       kvm_x86_ops->vcpu_load(vcpu, cpu);
6202
 
-       put_cpu();
6203
 
-}
6204
 
-
6205
 
-static void vcpu_put(struct kvm_vcpu *vcpu)
6206
 
-{
6207
 
-       preempt_disable();
6208
 
-       kvm_x86_ops->vcpu_put(vcpu);
6209
 
-       preempt_notifier_unregister(&vcpu->preempt_notifier);
6210
 
-       preempt_enable();
6211
 
-       mutex_unlock(&vcpu->mutex);
6212
 
-}
6213
 
-
6214
 
-static void ack_flush(void *_completed)
6215
 
-{
6216
 
-}
6217
 
-
6218
 
-void kvm_flush_remote_tlbs(struct kvm *kvm)
6219
 
-{
6220
 
-       int i, cpu;
6221
 
-       cpumask_t cpus;
6222
 
-       struct kvm_vcpu *vcpu;
6223
 
-
6224
 
-       cpus_clear(cpus);
6225
 
-       for (i = 0; i < KVM_MAX_VCPUS; ++i) {
6226
 
-               vcpu = kvm->vcpus[i];
6227
 
-               if (!vcpu)
6228
 
-                       continue;
6229
 
-               if (test_and_set_bit(KVM_TLB_FLUSH, &vcpu->requests))
6230
 
-                       continue;
6231
 
-               cpu = vcpu->cpu;
6232
 
-               if (cpu != -1 && cpu != raw_smp_processor_id())
6233
 
-                       cpu_set(cpu, cpus);
6234
 
-       }
6235
 
-       smp_call_function_mask(cpus, ack_flush, NULL, 1);
6236
 
-}
6237
 
-
6238
 
-int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
6239
 
+u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
6240
 
 {
6241
 
-       struct page *page;
6242
 
-       int r;
6243
 
-
6244
 
-       mutex_init(&vcpu->mutex);
6245
 
-       vcpu->cpu = -1;
6246
 
-       vcpu->mmu.root_hpa = INVALID_PAGE;
6247
 
-       vcpu->kvm = kvm;
6248
 
-       vcpu->vcpu_id = id;
6249
 
-       if (!irqchip_in_kernel(kvm) || id == 0)
6250
 
-               vcpu->mp_state = VCPU_MP_STATE_RUNNABLE;
6251
 
+       if (irqchip_in_kernel(vcpu->kvm))
6252
 
+               return vcpu->arch.apic_base;
6253
 
        else
6254
 
-               vcpu->mp_state = VCPU_MP_STATE_UNINITIALIZED;
6255
 
-       init_waitqueue_head(&vcpu->wq);
6256
 
-
6257
 
-       page = alloc_page(GFP_KERNEL | __GFP_ZERO);
6258
 
-       if (!page) {
6259
 
-               r = -ENOMEM;
6260
 
-               goto fail;
6261
 
-       }
6262
 
-       vcpu->run = page_address(page);
6263
 
-
6264
 
-       page = alloc_page(GFP_KERNEL | __GFP_ZERO);
6265
 
-       if (!page) {
6266
 
-               r = -ENOMEM;
6267
 
-               goto fail_free_run;
6268
 
-       }
6269
 
-       vcpu->pio_data = page_address(page);
6270
 
-
6271
 
-       r = kvm_mmu_create(vcpu);
6272
 
-       if (r < 0)
6273
 
-               goto fail_free_pio_data;
6274
 
-
6275
 
-       return 0;
6276
 
-
6277
 
-fail_free_pio_data:
6278
 
-       free_page((unsigned long)vcpu->pio_data);
6279
 
-fail_free_run:
6280
 
-       free_page((unsigned long)vcpu->run);
6281
 
-fail:
6282
 
-       return -ENOMEM;
6283
 
-}
6284
 
-EXPORT_SYMBOL_GPL(kvm_vcpu_init);
6285
 
-
6286
 
-void kvm_vcpu_uninit(struct kvm_vcpu *vcpu)
6287
 
-{
6288
 
-       kvm_mmu_destroy(vcpu);
6289
 
-       if (vcpu->apic)
6290
 
-               hrtimer_cancel(&vcpu->apic->timer.dev);
6291
 
-       kvm_free_apic(vcpu->apic);
6292
 
-       free_page((unsigned long)vcpu->pio_data);
6293
 
-       free_page((unsigned long)vcpu->run);
6294
 
-}
6295
 
-EXPORT_SYMBOL_GPL(kvm_vcpu_uninit);
6296
 
-
6297
 
-static struct kvm *kvm_create_vm(void)
6298
 
-{
6299
 
-       struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
6300
 
-
6301
 
-       if (!kvm)
6302
 
-               return ERR_PTR(-ENOMEM);
6303
 
-
6304
 
-       kvm_io_bus_init(&kvm->pio_bus);
6305
 
-       mutex_init(&kvm->lock);
6306
 
-       INIT_LIST_HEAD(&kvm->active_mmu_pages);
6307
 
-       kvm_io_bus_init(&kvm->mmio_bus);
6308
 
-       spin_lock(&kvm_lock);
6309
 
-       list_add(&kvm->vm_list, &vm_list);
6310
 
-       spin_unlock(&kvm_lock);
6311
 
-       return kvm;
6312
 
-}
6313
 
-
6314
 
-/*
6315
 
- * Free any memory in @free but not in @dont.
6316
 
- */
6317
 
-static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
6318
 
-                                 struct kvm_memory_slot *dont)
6319
 
-{
6320
 
-       int i;
6321
 
-
6322
 
-       if (!dont || free->phys_mem != dont->phys_mem)
6323
 
-               if (free->phys_mem) {
6324
 
-                       for (i = 0; i < free->npages; ++i)
6325
 
-                               if (free->phys_mem[i])
6326
 
-                                       __free_page(free->phys_mem[i]);
6327
 
-                       vfree(free->phys_mem);
6328
 
-               }
6329
 
-
6330
 
-       if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
6331
 
-               vfree(free->dirty_bitmap);
6332
 
-
6333
 
-       free->phys_mem = NULL;
6334
 
-       free->npages = 0;
6335
 
-       free->dirty_bitmap = NULL;
6336
 
-}
6337
 
-
6338
 
-static void kvm_free_physmem(struct kvm *kvm)
6339
 
-{
6340
 
-       int i;
6341
 
-
6342
 
-       for (i = 0; i < kvm->nmemslots; ++i)
6343
 
-               kvm_free_physmem_slot(&kvm->memslots[i], NULL);
6344
 
+               return vcpu->arch.apic_base;
6345
 
 }
6346
 
+EXPORT_SYMBOL_GPL(kvm_get_apic_base);
6347
 
 
6348
 
-static void free_pio_guest_pages(struct kvm_vcpu *vcpu)
6349
 
+void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
6350
 
 {
6351
 
-       int i;
6352
 
-
6353
 
-       for (i = 0; i < ARRAY_SIZE(vcpu->pio.guest_pages); ++i)
6354
 
-               if (vcpu->pio.guest_pages[i]) {
6355
 
-                       __free_page(vcpu->pio.guest_pages[i]);
6356
 
-                       vcpu->pio.guest_pages[i] = NULL;
6357
 
-               }
6358
 
+       /* TODO: reserve bits check */
6359
 
+       if (irqchip_in_kernel(vcpu->kvm))
6360
 
+               kvm_lapic_set_base(vcpu, data);
6361
 
+       else
6362
 
+               vcpu->arch.apic_base = data;
6363
 
 }
6364
 
+EXPORT_SYMBOL_GPL(kvm_set_apic_base);
6365
 
 
6366
 
-static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
6367
 
+void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
6368
 
 {
6369
 
-       vcpu_load(vcpu);
6370
 
-       kvm_mmu_unload(vcpu);
6371
 
-       vcpu_put(vcpu);
6372
 
+       WARN_ON(vcpu->arch.exception.pending);
6373
 
+       vcpu->arch.exception.pending = true;
6374
 
+       vcpu->arch.exception.has_error_code = false;
6375
 
+       vcpu->arch.exception.nr = nr;
6376
 
 }
6377
 
+EXPORT_SYMBOL_GPL(kvm_queue_exception);
6378
 
 
6379
 
-static void kvm_free_vcpus(struct kvm *kvm)
6380
 
+void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
6381
 
+                          u32 error_code)
6382
 
 {
6383
 
-       unsigned int i;
6384
 
-
6385
 
-       /*
6386
 
-        * Unpin any mmu pages first.
6387
 
-        */
6388
 
-       for (i = 0; i < KVM_MAX_VCPUS; ++i)
6389
 
-               if (kvm->vcpus[i])
6390
 
-                       kvm_unload_vcpu_mmu(kvm->vcpus[i]);
6391
 
-       for (i = 0; i < KVM_MAX_VCPUS; ++i) {
6392
 
-               if (kvm->vcpus[i]) {
6393
 
-                       kvm_x86_ops->vcpu_free(kvm->vcpus[i]);
6394
 
-                       kvm->vcpus[i] = NULL;
6395
 
-               }
6396
 
+       ++vcpu->stat.pf_guest;
6397
 
+       if (vcpu->arch.exception.pending && vcpu->arch.exception.nr == PF_VECTOR) {
6398
 
+               printk(KERN_DEBUG "kvm: inject_page_fault:"
6399
 
+                      " double fault 0x%lx\n", addr);
6400
 
+               vcpu->arch.exception.nr = DF_VECTOR;
6401
 
+               vcpu->arch.exception.error_code = 0;
6402
 
+               return;
6403
 
        }
6404
 
-
6405
 
+       vcpu->arch.cr2 = addr;
6406
 
+       kvm_queue_exception_e(vcpu, PF_VECTOR, error_code);
6407
 
 }
6408
 
 
6409
 
-static void kvm_destroy_vm(struct kvm *kvm)
6410
 
+void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
6411
 
 {
6412
 
-       spin_lock(&kvm_lock);
6413
 
-       list_del(&kvm->vm_list);
6414
 
-       spin_unlock(&kvm_lock);
6415
 
-       kvm_io_bus_destroy(&kvm->pio_bus);
6416
 
-       kvm_io_bus_destroy(&kvm->mmio_bus);
6417
 
-       kfree(kvm->vpic);
6418
 
-       kfree(kvm->vioapic);
6419
 
-       kvm_free_vcpus(kvm);
6420
 
-       kvm_free_physmem(kvm);
6421
 
-       kfree(kvm);
6422
 
-}
6423
 
-
6424
 
-static int kvm_vm_release(struct inode *inode, struct file *filp)
6425
 
-{
6426
 
-       struct kvm *kvm = filp->private_data;
6427
 
-
6428
 
-       kvm_destroy_vm(kvm);
6429
 
-       return 0;
6430
 
+       WARN_ON(vcpu->arch.exception.pending);
6431
 
+       vcpu->arch.exception.pending = true;
6432
 
+       vcpu->arch.exception.has_error_code = true;
6433
 
+       vcpu->arch.exception.nr = nr;
6434
 
+       vcpu->arch.exception.error_code = error_code;
6435
 
 }
6436
 
+EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
6437
 
 
6438
 
-static void inject_gp(struct kvm_vcpu *vcpu)
6439
 
+static void __queue_exception(struct kvm_vcpu *vcpu)
6440
 
 {
6441
 
-       kvm_x86_ops->inject_gp(vcpu, 0);
6442
 
+       kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
6443
 
+                                    vcpu->arch.exception.has_error_code,
6444
 
+                                    vcpu->arch.exception.error_code);
6445
 
 }
6446
 
 
6447
 
 /*
6448
 
  * Load the pae pdptrs.  Return true is they are all valid.
6449
 
  */
6450
 
-static int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
6451
 
+int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
6452
 
 {
6453
 
        gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
6454
 
        unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
6455
 
        int i;
6456
 
-       u64 *pdpt;
6457
 
        int ret;
6458
 
-       struct page *page;
6459
 
-       u64 pdpte[ARRAY_SIZE(vcpu->pdptrs)];
6460
 
+       u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)];
6461
 
 
6462
 
        mutex_lock(&vcpu->kvm->lock);
6463
 
-       page = gfn_to_page(vcpu->kvm, pdpt_gfn);
6464
 
-       if (!page) {
6465
 
+       ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte,
6466
 
+                                 offset * sizeof(u64), sizeof(pdpte));
6467
 
+       if (ret < 0) {
6468
 
                ret = 0;
6469
 
                goto out;
6470
 
        }
6471
 
-
6472
 
-       pdpt = kmap_atomic(page, KM_USER0);
6473
 
-       memcpy(pdpte, pdpt+offset, sizeof(pdpte));
6474
 
-       kunmap_atomic(pdpt, KM_USER0);
6475
 
-
6476
 
        for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
6477
 
                if ((pdpte[i] & 1) && (pdpte[i] & 0xfffffff0000001e6ull)) {
6478
 
                        ret = 0;
6479
 
@@ -421,67 +195,87 @@ static int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
6480
 
        }
6481
 
        ret = 1;
6482
 
 
6483
 
-       memcpy(vcpu->pdptrs, pdpte, sizeof(vcpu->pdptrs));
6484
 
+       memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs));
6485
 
 out:
6486
 
        mutex_unlock(&vcpu->kvm->lock);
6487
 
 
6488
 
        return ret;
6489
 
 }
6490
 
 
6491
 
+static bool pdptrs_changed(struct kvm_vcpu *vcpu)
6492
 
+{
6493
 
+       u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)];
6494
 
+       bool changed = true;
6495
 
+       int r;
6496
 
+
6497
 
+       if (is_long_mode(vcpu) || !is_pae(vcpu))
6498
 
+               return false;
6499
 
+
6500
 
+       mutex_lock(&vcpu->kvm->lock);
6501
 
+       r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte));
6502
 
+       if (r < 0)
6503
 
+               goto out;
6504
 
+       changed = memcmp(pdpte, vcpu->arch.pdptrs, sizeof(pdpte)) != 0;
6505
 
+out:
6506
 
+       mutex_unlock(&vcpu->kvm->lock);
6507
 
+
6508
 
+       return changed;
6509
 
+}
6510
 
+
6511
 
 void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
6512
 
 {
6513
 
        if (cr0 & CR0_RESERVED_BITS) {
6514
 
                printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n",
6515
 
-                      cr0, vcpu->cr0);
6516
 
-               inject_gp(vcpu);
6517
 
+                      cr0, vcpu->arch.cr0);
6518
 
+               kvm_inject_gp(vcpu, 0);
6519
 
                return;
6520
 
        }
6521
 
 
6522
 
        if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) {
6523
 
                printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n");
6524
 
-               inject_gp(vcpu);
6525
 
+               kvm_inject_gp(vcpu, 0);
6526
 
                return;
6527
 
        }
6528
 
 
6529
 
        if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) {
6530
 
                printk(KERN_DEBUG "set_cr0: #GP, set PG flag "
6531
 
                       "and a clear PE flag\n");
6532
 
-               inject_gp(vcpu);
6533
 
+               kvm_inject_gp(vcpu, 0);
6534
 
                return;
6535
 
        }
6536
 
 
6537
 
        if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
6538
 
 #ifdef CONFIG_X86_64
6539
 
-               if ((vcpu->shadow_efer & EFER_LME)) {
6540
 
+               if ((vcpu->arch.shadow_efer & EFER_LME)) {
6541
 
                        int cs_db, cs_l;
6542
 
 
6543
 
                        if (!is_pae(vcpu)) {
6544
 
                                printk(KERN_DEBUG "set_cr0: #GP, start paging "
6545
 
                                       "in long mode while PAE is disabled\n");
6546
 
-                               inject_gp(vcpu);
6547
 
+                               kvm_inject_gp(vcpu, 0);
6548
 
                                return;
6549
 
                        }
6550
 
                        kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
6551
 
                        if (cs_l) {
6552
 
                                printk(KERN_DEBUG "set_cr0: #GP, start paging "
6553
 
                                       "in long mode while CS.L == 1\n");
6554
 
-                               inject_gp(vcpu);
6555
 
+                               kvm_inject_gp(vcpu, 0);
6556
 
                                return;
6557
 
 
6558
 
                        }
6559
 
                } else
6560
 
 #endif
6561
 
-               if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->cr3)) {
6562
 
+               if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) {
6563
 
                        printk(KERN_DEBUG "set_cr0: #GP, pdptrs "
6564
 
                               "reserved bits\n");
6565
 
-                       inject_gp(vcpu);
6566
 
+                       kvm_inject_gp(vcpu, 0);
6567
 
                        return;
6568
 
                }
6569
 
 
6570
 
        }
6571
 
 
6572
 
        kvm_x86_ops->set_cr0(vcpu, cr0);
6573
 
-       vcpu->cr0 = cr0;
6574
 
+       vcpu->arch.cr0 = cr0;
6575
 
 
6576
 
        mutex_lock(&vcpu->kvm->lock);
6577
 
        kvm_mmu_reset_context(vcpu);
6578
 
@@ -492,7 +286,7 @@ EXPORT_SYMBOL_GPL(set_cr0);
6579
 
 
6580
 
 void lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
6581
 
 {
6582
 
-       set_cr0(vcpu, (vcpu->cr0 & ~0x0ful) | (msw & 0x0f));
6583
 
+       set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f));
6584
 
 }
6585
 
 EXPORT_SYMBOL_GPL(lmsw);
6586
 
 
6587
 
@@ -500,7 +294,7 @@ void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
6588
 
 {
6589
 
        if (cr4 & CR4_RESERVED_BITS) {
6590
 
                printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n");
6591
 
-               inject_gp(vcpu);
6592
 
+               kvm_inject_gp(vcpu, 0);
6593
 
                return;
6594
 
        }
6595
 
 
6596
 
@@ -508,23 +302,23 @@ void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
6597
 
                if (!(cr4 & X86_CR4_PAE)) {
6598
 
                        printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while "
6599
 
                               "in long mode\n");
6600
 
-                       inject_gp(vcpu);
6601
 
+                       kvm_inject_gp(vcpu, 0);
6602
 
                        return;
6603
 
                }
6604
 
        } else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & X86_CR4_PAE)
6605
 
-                  && !load_pdptrs(vcpu, vcpu->cr3)) {
6606
 
+                  && !load_pdptrs(vcpu, vcpu->arch.cr3)) {
6607
 
                printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n");
6608
 
-               inject_gp(vcpu);
6609
 
+               kvm_inject_gp(vcpu, 0);
6610
 
                return;
6611
 
        }
6612
 
 
6613
 
        if (cr4 & X86_CR4_VMXE) {
6614
 
                printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n");
6615
 
-               inject_gp(vcpu);
6616
 
+               kvm_inject_gp(vcpu, 0);
6617
 
                return;
6618
 
        }
6619
 
        kvm_x86_ops->set_cr4(vcpu, cr4);
6620
 
-       vcpu->cr4 = cr4;
6621
 
+       vcpu->arch.cr4 = cr4;
6622
 
        mutex_lock(&vcpu->kvm->lock);
6623
 
        kvm_mmu_reset_context(vcpu);
6624
 
        mutex_unlock(&vcpu->kvm->lock);
6625
 
@@ -533,10 +327,15 @@ EXPORT_SYMBOL_GPL(set_cr4);
6626
 
 
6627
 
 void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
6628
 
 {
6629
 
+       if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) {
6630
 
+               kvm_mmu_flush_tlb(vcpu);
6631
 
+               return;
6632
 
+       }
6633
 
+
6634
 
        if (is_long_mode(vcpu)) {
6635
 
                if (cr3 & CR3_L_MODE_RESERVED_BITS) {
6636
 
                        printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n");
6637
 
-                       inject_gp(vcpu);
6638
 
+                       kvm_inject_gp(vcpu, 0);
6639
 
                        return;
6640
 
                }
6641
 
        } else {
6642
 
@@ -544,23 +343,20 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
6643
 
                        if (cr3 & CR3_PAE_RESERVED_BITS) {
6644
 
                                printk(KERN_DEBUG
6645
 
                                       "set_cr3: #GP, reserved bits\n");
6646
 
-                               inject_gp(vcpu);
6647
 
+                               kvm_inject_gp(vcpu, 0);
6648
 
                                return;
6649
 
                        }
6650
 
                        if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) {
6651
 
                                printk(KERN_DEBUG "set_cr3: #GP, pdptrs "
6652
 
                                       "reserved bits\n");
6653
 
-                               inject_gp(vcpu);
6654
 
-                               return;
6655
 
-                       }
6656
 
-               } else {
6657
 
-                       if (cr3 & CR3_NONPAE_RESERVED_BITS) {
6658
 
-                               printk(KERN_DEBUG
6659
 
-                                      "set_cr3: #GP, reserved bits\n");
6660
 
-                               inject_gp(vcpu);
6661
 
+                               kvm_inject_gp(vcpu, 0);
6662
 
                                return;
6663
 
                        }
6664
 
                }
6665
 
+               /*
6666
 
+                * We don't check reserved bits in nonpae mode, because
6667
 
+                * this isn't enforced, and VMware depends on this.
6668
 
+                */
6669
 
        }
6670
 
 
6671
 
        mutex_lock(&vcpu->kvm->lock);
6672
 
@@ -574,10 +370,10 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
6673
 
         * to debug) behavior on the guest side.
6674
 
         */
6675
 
        if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT)))
6676
 
-               inject_gp(vcpu);
6677
 
+               kvm_inject_gp(vcpu, 0);
6678
 
        else {
6679
 
-               vcpu->cr3 = cr3;
6680
 
-               vcpu->mmu.new_cr3(vcpu);
6681
 
+               vcpu->arch.cr3 = cr3;
6682
 
+               vcpu->arch.mmu.new_cr3(vcpu);
6683
 
        }
6684
 
        mutex_unlock(&vcpu->kvm->lock);
6685
 
 }
6686
 
@@ -587,13 +383,13 @@ void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
6687
 
 {
6688
 
        if (cr8 & CR8_RESERVED_BITS) {
6689
 
                printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8);
6690
 
-               inject_gp(vcpu);
6691
 
+               kvm_inject_gp(vcpu, 0);
6692
 
                return;
6693
 
        }
6694
 
        if (irqchip_in_kernel(vcpu->kvm))
6695
 
                kvm_lapic_set_tpr(vcpu, cr8);
6696
 
        else
6697
 
-               vcpu->cr8 = cr8;
6698
 
+               vcpu->arch.cr8 = cr8;
6699
 
 }
6700
 
 EXPORT_SYMBOL_GPL(set_cr8);
6701
 
 
6702
 
@@ -602,210 +398,806 @@ unsigned long get_cr8(struct kvm_vcpu *vcpu)
6703
 
        if (irqchip_in_kernel(vcpu->kvm))
6704
 
                return kvm_lapic_get_cr8(vcpu);
6705
 
        else
6706
 
-               return vcpu->cr8;
6707
 
+               return vcpu->arch.cr8;
6708
 
 }
6709
 
 EXPORT_SYMBOL_GPL(get_cr8);
6710
 
 
6711
 
-u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
6712
 
+/*
6713
 
+ * List of msr numbers which we expose to userspace through KVM_GET_MSRS
6714
 
+ * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
6715
 
+ *
6716
 
+ * This list is modified at module load time to reflect the
6717
 
+ * capabilities of the host cpu.
6718
 
+ */
6719
 
+static u32 msrs_to_save[] = {
6720
 
+       MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
6721
 
+       MSR_K6_STAR,
6722
 
+#ifdef CONFIG_X86_64
6723
 
+       MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
6724
 
+#endif
6725
 
+       MSR_IA32_TIME_STAMP_COUNTER,
6726
 
+};
6727
 
+
6728
 
+static unsigned num_msrs_to_save;
6729
 
+
6730
 
+static u32 emulated_msrs[] = {
6731
 
+       MSR_IA32_MISC_ENABLE,
6732
 
+};
6733
 
+
6734
 
+#ifdef CONFIG_X86_64
6735
 
+
6736
 
+static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
6737
 
 {
6738
 
-       if (irqchip_in_kernel(vcpu->kvm))
6739
 
-               return vcpu->apic_base;
6740
 
-       else
6741
 
-               return vcpu->apic_base;
6742
 
+       if (efer & EFER_RESERVED_BITS) {
6743
 
+               printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n",
6744
 
+                      efer);
6745
 
+               kvm_inject_gp(vcpu, 0);
6746
 
+               return;
6747
 
+       }
6748
 
+
6749
 
+       if (is_paging(vcpu)
6750
 
+           && (vcpu->arch.shadow_efer & EFER_LME) != (efer & EFER_LME)) {
6751
 
+               printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n");
6752
 
+               kvm_inject_gp(vcpu, 0);
6753
 
+               return;
6754
 
+       }
6755
 
+
6756
 
+       kvm_x86_ops->set_efer(vcpu, efer);
6757
 
+
6758
 
+       efer &= ~EFER_LMA;
6759
 
+       efer |= vcpu->arch.shadow_efer & EFER_LMA;
6760
 
+
6761
 
+       vcpu->arch.shadow_efer = efer;
6762
 
 }
6763
 
-EXPORT_SYMBOL_GPL(kvm_get_apic_base);
6764
 
 
6765
 
-void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
6766
 
+#endif
6767
 
+
6768
 
+/*
6769
 
+ * Writes msr value into into the appropriate "register".
6770
 
+ * Returns 0 on success, non-0 otherwise.
6771
 
+ * Assumes vcpu_load() was already called.
6772
 
+ */
6773
 
+int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
6774
 
 {
6775
 
-       /* TODO: reserve bits check */
6776
 
-       if (irqchip_in_kernel(vcpu->kvm))
6777
 
-               kvm_lapic_set_base(vcpu, data);
6778
 
-       else
6779
 
-               vcpu->apic_base = data;
6780
 
+       return kvm_x86_ops->set_msr(vcpu, msr_index, data);
6781
 
 }
6782
 
-EXPORT_SYMBOL_GPL(kvm_set_apic_base);
6783
 
 
6784
 
-void fx_init(struct kvm_vcpu *vcpu)
6785
 
+/*
6786
 
+ * Adapt set_msr() to msr_io()'s calling convention
6787
 
+ */
6788
 
+static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
6789
 
 {
6790
 
-       unsigned after_mxcsr_mask;
6791
 
+       return kvm_set_msr(vcpu, index, *data);
6792
 
+}
6793
 
 
6794
 
-       /* Initialize guest FPU by resetting ours and saving into guest's */
6795
 
-       preempt_disable();
6796
 
-       fx_save(&vcpu->host_fx_image);
6797
 
-       fpu_init();
6798
 
-       fx_save(&vcpu->guest_fx_image);
6799
 
-       fx_restore(&vcpu->host_fx_image);
6800
 
-       preempt_enable();
6801
 
 
6802
 
-       vcpu->cr0 |= X86_CR0_ET;
6803
 
-       after_mxcsr_mask = offsetof(struct i387_fxsave_struct, st_space);
6804
 
-       vcpu->guest_fx_image.mxcsr = 0x1f80;
6805
 
-       memset((void *)&vcpu->guest_fx_image + after_mxcsr_mask,
6806
 
-              0, sizeof(struct i387_fxsave_struct) - after_mxcsr_mask);
6807
 
+int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
6808
 
+{
6809
 
+       switch (msr) {
6810
 
+#ifdef CONFIG_X86_64
6811
 
+       case MSR_EFER:
6812
 
+               set_efer(vcpu, data);
6813
 
+               break;
6814
 
+#endif
6815
 
+       case MSR_IA32_MC0_STATUS:
6816
 
+               pr_unimpl(vcpu, "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n",
6817
 
+                      __FUNCTION__, data);
6818
 
+               break;
6819
 
+       case MSR_IA32_MCG_STATUS:
6820
 
+               pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n",
6821
 
+                       __FUNCTION__, data);
6822
 
+               break;
6823
 
+       case MSR_IA32_UCODE_REV:
6824
 
+       case MSR_IA32_UCODE_WRITE:
6825
 
+       case 0x200 ... 0x2ff: /* MTRRs */
6826
 
+               break;
6827
 
+       case MSR_IA32_APICBASE:
6828
 
+               kvm_set_apic_base(vcpu, data);
6829
 
+               break;
6830
 
+       case MSR_IA32_MISC_ENABLE:
6831
 
+               vcpu->arch.ia32_misc_enable_msr = data;
6832
 
+               break;
6833
 
+       default:
6834
 
+               pr_unimpl(vcpu, "unhandled wrmsr: 0x%x\n", msr);
6835
 
+               return 1;
6836
 
+       }
6837
 
+       return 0;
6838
 
 }
6839
 
-EXPORT_SYMBOL_GPL(fx_init);
6840
 
+EXPORT_SYMBOL_GPL(kvm_set_msr_common);
6841
 
+
6842
 
+
6843
 
+/*
6844
 
+ * Reads an msr value (of 'msr_index') into 'pdata'.
6845
 
+ * Returns 0 on success, non-0 otherwise.
6846
 
+ * Assumes vcpu_load() was already called.
6847
 
+ */
6848
 
+int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
6849
 
+{
6850
 
+       return kvm_x86_ops->get_msr(vcpu, msr_index, pdata);
6851
 
+}
6852
 
+
6853
 
+int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
6854
 
+{
6855
 
+       u64 data;
6856
 
+
6857
 
+       switch (msr) {
6858
 
+       case 0xc0010010: /* SYSCFG */
6859
 
+       case 0xc0010015: /* HWCR */
6860
 
+       case MSR_IA32_PLATFORM_ID:
6861
 
+       case MSR_IA32_P5_MC_ADDR:
6862
 
+       case MSR_IA32_P5_MC_TYPE:
6863
 
+       case MSR_IA32_MC0_CTL:
6864
 
+       case MSR_IA32_MCG_STATUS:
6865
 
+       case MSR_IA32_MCG_CAP:
6866
 
+       case MSR_IA32_MC0_MISC:
6867
 
+       case MSR_IA32_MC0_MISC+4:
6868
 
+       case MSR_IA32_MC0_MISC+8:
6869
 
+       case MSR_IA32_MC0_MISC+12:
6870
 
+       case MSR_IA32_MC0_MISC+16:
6871
 
+       case MSR_IA32_UCODE_REV:
6872
 
+       case MSR_IA32_PERF_STATUS:
6873
 
+       case MSR_IA32_EBL_CR_POWERON:
6874
 
+               /* MTRR registers */
6875
 
+       case 0xfe:
6876
 
+       case 0x200 ... 0x2ff:
6877
 
+               data = 0;
6878
 
+               break;
6879
 
+       case 0xcd: /* fsb frequency */
6880
 
+               data = 3;
6881
 
+               break;
6882
 
+       case MSR_IA32_APICBASE:
6883
 
+               data = kvm_get_apic_base(vcpu);
6884
 
+               break;
6885
 
+       case MSR_IA32_MISC_ENABLE:
6886
 
+               data = vcpu->arch.ia32_misc_enable_msr;
6887
 
+               break;
6888
 
+#ifdef CONFIG_X86_64
6889
 
+       case MSR_EFER:
6890
 
+               data = vcpu->arch.shadow_efer;
6891
 
+               break;
6892
 
+#endif
6893
 
+       default:
6894
 
+               pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
6895
 
+               return 1;
6896
 
+       }
6897
 
+       *pdata = data;
6898
 
+       return 0;
6899
 
+}
6900
 
+EXPORT_SYMBOL_GPL(kvm_get_msr_common);
6901
 
 
6902
 
 /*
6903
 
- * Allocate some memory and give it an address in the guest physical address
6904
 
- * space.
6905
 
+ * Read or write a bunch of msrs. All parameters are kernel addresses.
6906
 
  *
6907
 
- * Discontiguous memory is allowed, mostly for framebuffers.
6908
 
+ * @return number of msrs set successfully.
6909
 
  */
6910
 
-static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
6911
 
-                                         struct kvm_memory_region *mem)
6912
 
+static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
6913
 
+                   struct kvm_msr_entry *entries,
6914
 
+                   int (*do_msr)(struct kvm_vcpu *vcpu,
6915
 
+                                 unsigned index, u64 *data))
6916
 
 {
6917
 
-       int r;
6918
 
-       gfn_t base_gfn;
6919
 
-       unsigned long npages;
6920
 
-       unsigned long i;
6921
 
-       struct kvm_memory_slot *memslot;
6922
 
-       struct kvm_memory_slot old, new;
6923
 
+       int i;
6924
 
 
6925
 
-       r = -EINVAL;
6926
 
-       /* General sanity checks */
6927
 
-       if (mem->memory_size & (PAGE_SIZE - 1))
6928
 
-               goto out;
6929
 
-       if (mem->guest_phys_addr & (PAGE_SIZE - 1))
6930
 
+       vcpu_load(vcpu);
6931
 
+
6932
 
+       for (i = 0; i < msrs->nmsrs; ++i)
6933
 
+               if (do_msr(vcpu, entries[i].index, &entries[i].data))
6934
 
+                       break;
6935
 
+
6936
 
+       vcpu_put(vcpu);
6937
 
+
6938
 
+       return i;
6939
 
+}
6940
 
+
6941
 
+/*
6942
 
+ * Read or write a bunch of msrs. Parameters are user addresses.
6943
 
+ *
6944
 
+ * @return number of msrs set successfully.
6945
 
+ */
6946
 
+static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
6947
 
+                 int (*do_msr)(struct kvm_vcpu *vcpu,
6948
 
+                               unsigned index, u64 *data),
6949
 
+                 int writeback)
6950
 
+{
6951
 
+       struct kvm_msrs msrs;
6952
 
+       struct kvm_msr_entry *entries;
6953
 
+       int r, n;
6954
 
+       unsigned size;
6955
 
+
6956
 
+       r = -EFAULT;
6957
 
+       if (copy_from_user(&msrs, user_msrs, sizeof msrs))
6958
 
                goto out;
6959
 
-       if (mem->slot >= KVM_MEMORY_SLOTS)
6960
 
+
6961
 
+       r = -E2BIG;
6962
 
+       if (msrs.nmsrs >= MAX_IO_MSRS)
6963
 
                goto out;
6964
 
-       if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
6965
 
+
6966
 
+       r = -ENOMEM;
6967
 
+       size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
6968
 
+       entries = vmalloc(size);
6969
 
+       if (!entries)
6970
 
                goto out;
6971
 
 
6972
 
-       memslot = &kvm->memslots[mem->slot];
6973
 
-       base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
6974
 
-       npages = mem->memory_size >> PAGE_SHIFT;
6975
 
+       r = -EFAULT;
6976
 
+       if (copy_from_user(entries, user_msrs->entries, size))
6977
 
+               goto out_free;
6978
 
 
6979
 
-       if (!npages)
6980
 
-               mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES;
6981
 
+       r = n = __msr_io(vcpu, &msrs, entries, do_msr);
6982
 
+       if (r < 0)
6983
 
+               goto out_free;
6984
 
 
6985
 
-       mutex_lock(&kvm->lock);
6986
 
+       r = -EFAULT;
6987
 
+       if (writeback && copy_to_user(user_msrs->entries, entries, size))
6988
 
+               goto out_free;
6989
 
 
6990
 
-       new = old = *memslot;
6991
 
+       r = n;
6992
 
 
6993
 
-       new.base_gfn = base_gfn;
6994
 
-       new.npages = npages;
6995
 
-       new.flags = mem->flags;
6996
 
+out_free:
6997
 
+       vfree(entries);
6998
 
+out:
6999
 
+       return r;
7000
 
+}
7001
 
 
7002
 
-       /* Disallow changing a memory slot's size. */
7003
 
-       r = -EINVAL;
7004
 
-       if (npages && old.npages && npages != old.npages)
7005
 
-               goto out_unlock;
7006
 
+/*
7007
 
+ * Make sure that a cpu that is being hot-unplugged does not have any vcpus
7008
 
+ * cached on it.
7009
 
+ */
7010
 
+void decache_vcpus_on_cpu(int cpu)
7011
 
+{
7012
 
+       struct kvm *vm;
7013
 
+       struct kvm_vcpu *vcpu;
7014
 
+       int i;
7015
 
+
7016
 
+       spin_lock(&kvm_lock);
7017
 
+       list_for_each_entry(vm, &vm_list, vm_list)
7018
 
+               for (i = 0; i < KVM_MAX_VCPUS; ++i) {
7019
 
+                       vcpu = vm->vcpus[i];
7020
 
+                       if (!vcpu)
7021
 
+                               continue;
7022
 
+                       /*
7023
 
+                        * If the vcpu is locked, then it is running on some
7024
 
+                        * other cpu and therefore it is not cached on the
7025
 
+                        * cpu in question.
7026
 
+                        *
7027
 
+                        * If it's not locked, check the last cpu it executed
7028
 
+                        * on.
7029
 
+                        */
7030
 
+                       if (mutex_trylock(&vcpu->mutex)) {
7031
 
+                               if (vcpu->cpu == cpu) {
7032
 
+                                       kvm_x86_ops->vcpu_decache(vcpu);
7033
 
+                                       vcpu->cpu = -1;
7034
 
+                               }
7035
 
+                               mutex_unlock(&vcpu->mutex);
7036
 
+                       }
7037
 
+               }
7038
 
+       spin_unlock(&kvm_lock);
7039
 
+}
7040
 
 
7041
 
-       /* Check for overlaps */
7042
 
-       r = -EEXIST;
7043
 
-       for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
7044
 
-               struct kvm_memory_slot *s = &kvm->memslots[i];
7045
 
+int kvm_dev_ioctl_check_extension(long ext)
7046
 
+{
7047
 
+       int r;
7048
 
 
7049
 
-               if (s == memslot)
7050
 
-                       continue;
7051
 
-               if (!((base_gfn + npages <= s->base_gfn) ||
7052
 
-                     (base_gfn >= s->base_gfn + s->npages)))
7053
 
-                       goto out_unlock;
7054
 
+       switch (ext) {
7055
 
+       case KVM_CAP_IRQCHIP:
7056
 
+       case KVM_CAP_HLT:
7057
 
+       case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
7058
 
+       case KVM_CAP_USER_MEMORY:
7059
 
+       case KVM_CAP_SET_TSS_ADDR:
7060
 
+       case KVM_CAP_EXT_CPUID:
7061
 
+               r = 1;
7062
 
+               break;
7063
 
+       default:
7064
 
+               r = 0;
7065
 
+               break;
7066
 
        }
7067
 
+       return r;
7068
 
 
7069
 
-       /* Deallocate if slot is being removed */
7070
 
-       if (!npages)
7071
 
-               new.phys_mem = NULL;
7072
 
+}
7073
 
 
7074
 
-       /* Free page dirty bitmap if unneeded */
7075
 
-       if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES))
7076
 
-               new.dirty_bitmap = NULL;
7077
 
+long kvm_arch_dev_ioctl(struct file *filp,
7078
 
+                       unsigned int ioctl, unsigned long arg)
7079
 
+{
7080
 
+       void __user *argp = (void __user *)arg;
7081
 
+       long r;
7082
 
 
7083
 
-       r = -ENOMEM;
7084
 
+       switch (ioctl) {
7085
 
+       case KVM_GET_MSR_INDEX_LIST: {
7086
 
+               struct kvm_msr_list __user *user_msr_list = argp;
7087
 
+               struct kvm_msr_list msr_list;
7088
 
+               unsigned n;
7089
 
 
7090
 
-       /* Allocate if a slot is being created */
7091
 
-       if (npages && !new.phys_mem) {
7092
 
-               new.phys_mem = vmalloc(npages * sizeof(struct page *));
7093
 
+               r = -EFAULT;
7094
 
+               if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
7095
 
+                       goto out;
7096
 
+               n = msr_list.nmsrs;
7097
 
+               msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs);
7098
 
+               if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
7099
 
+                       goto out;
7100
 
+               r = -E2BIG;
7101
 
+               if (n < num_msrs_to_save)
7102
 
+                       goto out;
7103
 
+               r = -EFAULT;
7104
 
+               if (copy_to_user(user_msr_list->indices, &msrs_to_save,
7105
 
+                                num_msrs_to_save * sizeof(u32)))
7106
 
+                       goto out;
7107
 
+               if (copy_to_user(user_msr_list->indices
7108
 
+                                + num_msrs_to_save * sizeof(u32),
7109
 
+                                &emulated_msrs,
7110
 
+                                ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
7111
 
+                       goto out;
7112
 
+               r = 0;
7113
 
+               break;
7114
 
+       }
7115
 
+       default:
7116
 
+               r = -EINVAL;
7117
 
+       }
7118
 
+out:
7119
 
+       return r;
7120
 
+}
7121
 
 
7122
 
-               if (!new.phys_mem)
7123
 
-                       goto out_unlock;
7124
 
+void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
7125
 
+{
7126
 
+       kvm_x86_ops->vcpu_load(vcpu, cpu);
7127
 
+}
7128
 
 
7129
 
-               memset(new.phys_mem, 0, npages * sizeof(struct page *));
7130
 
-               for (i = 0; i < npages; ++i) {
7131
 
-                       new.phys_mem[i] = alloc_page(GFP_HIGHUSER
7132
 
-                                                    | __GFP_ZERO);
7133
 
-                       if (!new.phys_mem[i])
7134
 
-                               goto out_unlock;
7135
 
-                       set_page_private(new.phys_mem[i],0);
7136
 
-               }
7137
 
-       }
7138
 
+void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
7139
 
+{
7140
 
+       kvm_x86_ops->vcpu_put(vcpu);
7141
 
+       kvm_put_guest_fpu(vcpu);
7142
 
+}
7143
 
+
7144
 
+static int is_efer_nx(void)
7145
 
+{
7146
 
+       u64 efer;
7147
 
 
7148
 
-       /* Allocate page dirty bitmap if needed */
7149
 
-       if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) {
7150
 
-               unsigned dirty_bytes = ALIGN(npages, BITS_PER_LONG) / 8;
7151
 
+       rdmsrl(MSR_EFER, efer);
7152
 
+       return efer & EFER_NX;
7153
 
+}
7154
 
+
7155
 
+static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
7156
 
+{
7157
 
+       int i;
7158
 
+       struct kvm_cpuid_entry2 *e, *entry;
7159
 
 
7160
 
-               new.dirty_bitmap = vmalloc(dirty_bytes);
7161
 
-               if (!new.dirty_bitmap)
7162
 
-                       goto out_unlock;
7163
 
-               memset(new.dirty_bitmap, 0, dirty_bytes);
7164
 
+       entry = NULL;
7165
 
+       for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
7166
 
+               e = &vcpu->arch.cpuid_entries[i];
7167
 
+               if (e->function == 0x80000001) {
7168
 
+                       entry = e;
7169
 
+                       break;
7170
 
+               }
7171
 
+       }
7172
 
+       if (entry && (entry->edx & (1 << 20)) && !is_efer_nx()) {
7173
 
+               entry->edx &= ~(1 << 20);
7174
 
+               printk(KERN_INFO "kvm: guest NX capability removed\n");
7175
 
        }
7176
 
+}
7177
 
 
7178
 
-       if (mem->slot >= kvm->nmemslots)
7179
 
-               kvm->nmemslots = mem->slot + 1;
7180
 
+/* when an old userspace process fills a new kernel module */
7181
 
+static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
7182
 
+                                   struct kvm_cpuid *cpuid,
7183
 
+                                   struct kvm_cpuid_entry __user *entries)
7184
 
+{
7185
 
+       int r, i;
7186
 
+       struct kvm_cpuid_entry *cpuid_entries;
7187
 
 
7188
 
-       *memslot = new;
7189
 
+       r = -E2BIG;
7190
 
+       if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
7191
 
+               goto out;
7192
 
+       r = -ENOMEM;
7193
 
+       cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry) * cpuid->nent);
7194
 
+       if (!cpuid_entries)
7195
 
+               goto out;
7196
 
+       r = -EFAULT;
7197
 
+       if (copy_from_user(cpuid_entries, entries,
7198
 
+                          cpuid->nent * sizeof(struct kvm_cpuid_entry)))
7199
 
+               goto out_free;
7200
 
+       for (i = 0; i < cpuid->nent; i++) {
7201
 
+               vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function;
7202
 
+               vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax;
7203
 
+               vcpu->arch.cpuid_entries[i].ebx = cpuid_entries[i].ebx;
7204
 
+               vcpu->arch.cpuid_entries[i].ecx = cpuid_entries[i].ecx;
7205
 
+               vcpu->arch.cpuid_entries[i].edx = cpuid_entries[i].edx;
7206
 
+               vcpu->arch.cpuid_entries[i].index = 0;
7207
 
+               vcpu->arch.cpuid_entries[i].flags = 0;
7208
 
+               vcpu->arch.cpuid_entries[i].padding[0] = 0;
7209
 
+               vcpu->arch.cpuid_entries[i].padding[1] = 0;
7210
 
+               vcpu->arch.cpuid_entries[i].padding[2] = 0;
7211
 
+       }
7212
 
+       vcpu->arch.cpuid_nent = cpuid->nent;
7213
 
+       cpuid_fix_nx_cap(vcpu);
7214
 
+       r = 0;
7215
 
 
7216
 
-       kvm_mmu_slot_remove_write_access(kvm, mem->slot);
7217
 
-       kvm_flush_remote_tlbs(kvm);
7218
 
+out_free:
7219
 
+       vfree(cpuid_entries);
7220
 
+out:
7221
 
+       return r;
7222
 
+}
7223
 
 
7224
 
-       mutex_unlock(&kvm->lock);
7225
 
+static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
7226
 
+                                   struct kvm_cpuid2 *cpuid,
7227
 
+                                   struct kvm_cpuid_entry2 __user *entries)
7228
 
+{
7229
 
+       int r;
7230
 
 
7231
 
-       kvm_free_physmem_slot(&old, &new);
7232
 
+       r = -E2BIG;
7233
 
+       if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
7234
 
+               goto out;
7235
 
+       r = -EFAULT;
7236
 
+       if (copy_from_user(&vcpu->arch.cpuid_entries, entries,
7237
 
+                          cpuid->nent * sizeof(struct kvm_cpuid_entry2)))
7238
 
+               goto out;
7239
 
+       vcpu->arch.cpuid_nent = cpuid->nent;
7240
 
        return 0;
7241
 
 
7242
 
-out_unlock:
7243
 
-       mutex_unlock(&kvm->lock);
7244
 
-       kvm_free_physmem_slot(&new, &old);
7245
 
 out:
7246
 
        return r;
7247
 
 }
7248
 
 
7249
 
-/*
7250
 
- * Get (and clear) the dirty memory log for a memory slot.
7251
 
- */
7252
 
-static int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
7253
 
-                                     struct kvm_dirty_log *log)
7254
 
+static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
7255
 
+                                   struct kvm_cpuid2 *cpuid,
7256
 
+                                   struct kvm_cpuid_entry2 __user *entries)
7257
 
 {
7258
 
-       struct kvm_memory_slot *memslot;
7259
 
-       int r, i;
7260
 
-       int n;
7261
 
-       unsigned long any = 0;
7262
 
-
7263
 
-       mutex_lock(&kvm->lock);
7264
 
+       int r;
7265
 
 
7266
 
-       r = -EINVAL;
7267
 
-       if (log->slot >= KVM_MEMORY_SLOTS)
7268
 
+       r = -E2BIG;
7269
 
+       if (cpuid->nent < vcpu->arch.cpuid_nent)
7270
 
                goto out;
7271
 
-
7272
 
-       memslot = &kvm->memslots[log->slot];
7273
 
-       r = -ENOENT;
7274
 
-       if (!memslot->dirty_bitmap)
7275
 
+       r = -EFAULT;
7276
 
+       if (copy_to_user(entries, &vcpu->arch.cpuid_entries,
7277
 
+                          vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
7278
 
                goto out;
7279
 
+       return 0;
7280
 
+
7281
 
+out:
7282
 
+       cpuid->nent = vcpu->arch.cpuid_nent;
7283
 
+       return r;
7284
 
+}
7285
 
+
7286
 
+static inline u32 bit(int bitno)
7287
 
+{
7288
 
+       return 1 << (bitno & 31);
7289
 
+}
7290
 
+
7291
 
+static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
7292
 
+                         u32 index)
7293
 
+{
7294
 
+       entry->function = function;
7295
 
+       entry->index = index;
7296
 
+       cpuid_count(entry->function, entry->index,
7297
 
+               &entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
7298
 
+       entry->flags = 0;
7299
 
+}
7300
 
+
7301
 
+static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
7302
 
+                        u32 index, int *nent, int maxnent)
7303
 
+{
7304
 
+       const u32 kvm_supported_word0_x86_features = bit(X86_FEATURE_FPU) |
7305
 
+               bit(X86_FEATURE_VME) | bit(X86_FEATURE_DE) |
7306
 
+               bit(X86_FEATURE_PSE) | bit(X86_FEATURE_TSC) |
7307
 
+               bit(X86_FEATURE_MSR) | bit(X86_FEATURE_PAE) |
7308
 
+               bit(X86_FEATURE_CX8) | bit(X86_FEATURE_APIC) |
7309
 
+               bit(X86_FEATURE_SEP) | bit(X86_FEATURE_PGE) |
7310
 
+               bit(X86_FEATURE_CMOV) | bit(X86_FEATURE_PSE36) |
7311
 
+               bit(X86_FEATURE_CLFLSH) | bit(X86_FEATURE_MMX) |
7312
 
+               bit(X86_FEATURE_FXSR) | bit(X86_FEATURE_XMM) |
7313
 
+               bit(X86_FEATURE_XMM2) | bit(X86_FEATURE_SELFSNOOP);
7314
 
+       const u32 kvm_supported_word1_x86_features = bit(X86_FEATURE_FPU) |
7315
 
+               bit(X86_FEATURE_VME) | bit(X86_FEATURE_DE) |
7316
 
+               bit(X86_FEATURE_PSE) | bit(X86_FEATURE_TSC) |
7317
 
+               bit(X86_FEATURE_MSR) | bit(X86_FEATURE_PAE) |
7318
 
+               bit(X86_FEATURE_CX8) | bit(X86_FEATURE_APIC) |
7319
 
+               bit(X86_FEATURE_PGE) |
7320
 
+               bit(X86_FEATURE_CMOV) | bit(X86_FEATURE_PSE36) |
7321
 
+               bit(X86_FEATURE_MMX) | bit(X86_FEATURE_FXSR) |
7322
 
+               bit(X86_FEATURE_SYSCALL) |
7323
 
+               (bit(X86_FEATURE_NX) && is_efer_nx()) |
7324
 
+#ifdef CONFIG_X86_64
7325
 
+               bit(X86_FEATURE_LM) |
7326
 
+#endif
7327
 
+               bit(X86_FEATURE_MMXEXT) |
7328
 
+               bit(X86_FEATURE_3DNOWEXT) |
7329
 
+               bit(X86_FEATURE_3DNOW);
7330
 
+       const u32 kvm_supported_word3_x86_features =
7331
 
+               bit(X86_FEATURE_XMM3) | bit(X86_FEATURE_CX16);
7332
 
+       const u32 kvm_supported_word6_x86_features =
7333
 
+               bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY);
7334
 
+
7335
 
+       /* all func 2 cpuid_count() should be called on the same cpu */
7336
 
+       get_cpu();
7337
 
+       do_cpuid_1_ent(entry, function, index);
7338
 
+       ++*nent;
7339
 
+
7340
 
+       switch (function) {
7341
 
+       case 0:
7342
 
+               entry->eax = min(entry->eax, (u32)0xb);
7343
 
+               break;
7344
 
+       case 1:
7345
 
+               entry->edx &= kvm_supported_word0_x86_features;
7346
 
+               entry->ecx &= kvm_supported_word3_x86_features;
7347
 
+               break;
7348
 
+       /* function 2 entries are STATEFUL. That is, repeated cpuid commands
7349
 
+        * may return different values. This forces us to get_cpu() before
7350
 
+        * issuing the first command, and also to emulate this annoying behavior
7351
 
+        * in kvm_emulate_cpuid() using KVM_CPUID_FLAG_STATE_READ_NEXT */
7352
 
+       case 2: {
7353
 
+               int t, times = entry->eax & 0xff;
7354
 
+
7355
 
+               entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
7356
 
+               for (t = 1; t < times && *nent < maxnent; ++t) {
7357
 
+                       do_cpuid_1_ent(&entry[t], function, 0);
7358
 
+                       entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
7359
 
+                       ++*nent;
7360
 
+               }
7361
 
+               break;
7362
 
+       }
7363
 
+       /* function 4 and 0xb have additional index. */
7364
 
+       case 4: {
7365
 
+               int index, cache_type;
7366
 
+
7367
 
+               entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
7368
 
+               /* read more entries until cache_type is zero */
7369
 
+               for (index = 1; *nent < maxnent; ++index) {
7370
 
+                       cache_type = entry[index - 1].eax & 0x1f;
7371
 
+                       if (!cache_type)
7372
 
+                               break;
7373
 
+                       do_cpuid_1_ent(&entry[index], function, index);
7374
 
+                       entry[index].flags |=
7375
 
+                              KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
7376
 
+                       ++*nent;
7377
 
+               }
7378
 
+               break;
7379
 
+       }
7380
 
+       case 0xb: {
7381
 
+               int index, level_type;
7382
 
+
7383
 
+               entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
7384
 
+               /* read more entries until level_type is zero */
7385
 
+               for (index = 1; *nent < maxnent; ++index) {
7386
 
+                       level_type = entry[index - 1].ecx & 0xff;
7387
 
+                       if (!level_type)
7388
 
+                               break;
7389
 
+                       do_cpuid_1_ent(&entry[index], function, index);
7390
 
+                       entry[index].flags |=
7391
 
+                              KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
7392
 
+                       ++*nent;
7393
 
+               }
7394
 
+               break;
7395
 
+       }
7396
 
+       case 0x80000000:
7397
 
+               entry->eax = min(entry->eax, 0x8000001a);
7398
 
+               break;
7399
 
+       case 0x80000001:
7400
 
+               entry->edx &= kvm_supported_word1_x86_features;
7401
 
+               entry->ecx &= kvm_supported_word6_x86_features;
7402
 
+               break;
7403
 
+       }
7404
 
+       put_cpu();
7405
 
+}
7406
 
 
7407
 
-       n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
7408
 
+static int kvm_vm_ioctl_get_supported_cpuid(struct kvm *kvm,
7409
 
+                                   struct kvm_cpuid2 *cpuid,
7410
 
+                                   struct kvm_cpuid_entry2 __user *entries)
7411
 
+{
7412
 
+       struct kvm_cpuid_entry2 *cpuid_entries;
7413
 
+       int limit, nent = 0, r = -E2BIG;
7414
 
+       u32 func;
7415
 
 
7416
 
-       for (i = 0; !any && i < n/sizeof(long); ++i)
7417
 
-               any = memslot->dirty_bitmap[i];
7418
 
+       if (cpuid->nent < 1)
7419
 
+               goto out;
7420
 
+       r = -ENOMEM;
7421
 
+       cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent);
7422
 
+       if (!cpuid_entries)
7423
 
+               goto out;
7424
 
 
7425
 
+       do_cpuid_ent(&cpuid_entries[0], 0, 0, &nent, cpuid->nent);
7426
 
+       limit = cpuid_entries[0].eax;
7427
 
+       for (func = 1; func <= limit && nent < cpuid->nent; ++func)
7428
 
+               do_cpuid_ent(&cpuid_entries[nent], func, 0,
7429
 
+                               &nent, cpuid->nent);
7430
 
+       r = -E2BIG;
7431
 
+       if (nent >= cpuid->nent)
7432
 
+               goto out_free;
7433
 
+
7434
 
+       do_cpuid_ent(&cpuid_entries[nent], 0x80000000, 0, &nent, cpuid->nent);
7435
 
+       limit = cpuid_entries[nent - 1].eax;
7436
 
+       for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func)
7437
 
+               do_cpuid_ent(&cpuid_entries[nent], func, 0,
7438
 
+                              &nent, cpuid->nent);
7439
 
        r = -EFAULT;
7440
 
-       if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
7441
 
-               goto out;
7442
 
+       if (copy_to_user(entries, cpuid_entries,
7443
 
+                       nent * sizeof(struct kvm_cpuid_entry2)))
7444
 
+               goto out_free;
7445
 
+       cpuid->nent = nent;
7446
 
+       r = 0;
7447
 
 
7448
 
-       /* If nothing is dirty, don't bother messing with page tables. */
7449
 
-       if (any) {
7450
 
-               kvm_mmu_slot_remove_write_access(kvm, log->slot);
7451
 
-               kvm_flush_remote_tlbs(kvm);
7452
 
-               memset(memslot->dirty_bitmap, 0, n);
7453
 
+out_free:
7454
 
+       vfree(cpuid_entries);
7455
 
+out:
7456
 
+       return r;
7457
 
+}
7458
 
+
7459
 
+static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
7460
 
+                                   struct kvm_lapic_state *s)
7461
 
+{
7462
 
+       vcpu_load(vcpu);
7463
 
+       memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
7464
 
+       vcpu_put(vcpu);
7465
 
+
7466
 
+       return 0;
7467
 
+}
7468
 
+
7469
 
+static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
7470
 
+                                   struct kvm_lapic_state *s)
7471
 
+{
7472
 
+       vcpu_load(vcpu);
7473
 
+       memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
7474
 
+       kvm_apic_post_state_restore(vcpu);
7475
 
+       vcpu_put(vcpu);
7476
 
+
7477
 
+       return 0;
7478
 
+}
7479
 
+
7480
 
+static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
7481
 
+                                   struct kvm_interrupt *irq)
7482
 
+{
7483
 
+       if (irq->irq < 0 || irq->irq >= 256)
7484
 
+               return -EINVAL;
7485
 
+       if (irqchip_in_kernel(vcpu->kvm))
7486
 
+               return -ENXIO;
7487
 
+       vcpu_load(vcpu);
7488
 
+
7489
 
+       set_bit(irq->irq, vcpu->arch.irq_pending);
7490
 
+       set_bit(irq->irq / BITS_PER_LONG, &vcpu->arch.irq_summary);
7491
 
+
7492
 
+       vcpu_put(vcpu);
7493
 
+
7494
 
+       return 0;
7495
 
+}
7496
 
+
7497
 
+long kvm_arch_vcpu_ioctl(struct file *filp,
7498
 
+                        unsigned int ioctl, unsigned long arg)
7499
 
+{
7500
 
+       struct kvm_vcpu *vcpu = filp->private_data;
7501
 
+       void __user *argp = (void __user *)arg;
7502
 
+       int r;
7503
 
+
7504
 
+       switch (ioctl) {
7505
 
+       case KVM_GET_LAPIC: {
7506
 
+               struct kvm_lapic_state lapic;
7507
 
+
7508
 
+               memset(&lapic, 0, sizeof lapic);
7509
 
+               r = kvm_vcpu_ioctl_get_lapic(vcpu, &lapic);
7510
 
+               if (r)
7511
 
+                       goto out;
7512
 
+               r = -EFAULT;
7513
 
+               if (copy_to_user(argp, &lapic, sizeof lapic))
7514
 
+                       goto out;
7515
 
+               r = 0;
7516
 
+               break;
7517
 
+       }
7518
 
+       case KVM_SET_LAPIC: {
7519
 
+               struct kvm_lapic_state lapic;
7520
 
+
7521
 
+               r = -EFAULT;
7522
 
+               if (copy_from_user(&lapic, argp, sizeof lapic))
7523
 
+                       goto out;
7524
 
+               r = kvm_vcpu_ioctl_set_lapic(vcpu, &lapic);;
7525
 
+               if (r)
7526
 
+                       goto out;
7527
 
+               r = 0;
7528
 
+               break;
7529
 
        }
7530
 
+       case KVM_INTERRUPT: {
7531
 
+               struct kvm_interrupt irq;
7532
 
 
7533
 
-       r = 0;
7534
 
+               r = -EFAULT;
7535
 
+               if (copy_from_user(&irq, argp, sizeof irq))
7536
 
+                       goto out;
7537
 
+               r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
7538
 
+               if (r)
7539
 
+                       goto out;
7540
 
+               r = 0;
7541
 
+               break;
7542
 
+       }
7543
 
+       case KVM_SET_CPUID: {
7544
 
+               struct kvm_cpuid __user *cpuid_arg = argp;
7545
 
+               struct kvm_cpuid cpuid;
7546
 
+
7547
 
+               r = -EFAULT;
7548
 
+               if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
7549
 
+                       goto out;
7550
 
+               r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
7551
 
+               if (r)
7552
 
+                       goto out;
7553
 
+               break;
7554
 
+       }
7555
 
+       case KVM_SET_CPUID2: {
7556
 
+               struct kvm_cpuid2 __user *cpuid_arg = argp;
7557
 
+               struct kvm_cpuid2 cpuid;
7558
 
+
7559
 
+               r = -EFAULT;
7560
 
+               if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
7561
 
+                       goto out;
7562
 
+               r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
7563
 
+                               cpuid_arg->entries);
7564
 
+               if (r)
7565
 
+                       goto out;
7566
 
+               break;
7567
 
+       }
7568
 
+       case KVM_GET_CPUID2: {
7569
 
+               struct kvm_cpuid2 __user *cpuid_arg = argp;
7570
 
+               struct kvm_cpuid2 cpuid;
7571
 
 
7572
 
+               r = -EFAULT;
7573
 
+               if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
7574
 
+                       goto out;
7575
 
+               r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
7576
 
+                               cpuid_arg->entries);
7577
 
+               if (r)
7578
 
+                       goto out;
7579
 
+               r = -EFAULT;
7580
 
+               if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
7581
 
+                       goto out;
7582
 
+               r = 0;
7583
 
+               break;
7584
 
+       }
7585
 
+       case KVM_GET_MSRS:
7586
 
+               r = msr_io(vcpu, argp, kvm_get_msr, 1);
7587
 
+               break;
7588
 
+       case KVM_SET_MSRS:
7589
 
+               r = msr_io(vcpu, argp, do_set_msr, 0);
7590
 
+               break;
7591
 
+       default:
7592
 
+               r = -EINVAL;
7593
 
+       }
7594
 
 out:
7595
 
-       mutex_unlock(&kvm->lock);
7596
 
        return r;
7597
 
 }
7598
 
 
7599
 
+static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
7600
 
+{
7601
 
+       int ret;
7602
 
+
7603
 
+       if (addr > (unsigned int)(-3 * PAGE_SIZE))
7604
 
+               return -1;
7605
 
+       ret = kvm_x86_ops->set_tss_addr(kvm, addr);
7606
 
+       return ret;
7607
 
+}
7608
 
+
7609
 
+static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
7610
 
+                                         u32 kvm_nr_mmu_pages)
7611
 
+{
7612
 
+       if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
7613
 
+               return -EINVAL;
7614
 
+
7615
 
+       mutex_lock(&kvm->lock);
7616
 
+
7617
 
+       kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
7618
 
+       kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
7619
 
+
7620
 
+       mutex_unlock(&kvm->lock);
7621
 
+       return 0;
7622
 
+}
7623
 
+
7624
 
+static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
7625
 
+{
7626
 
+       return kvm->arch.n_alloc_mmu_pages;
7627
 
+}
7628
 
+
7629
 
+gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
7630
 
+{
7631
 
+       int i;
7632
 
+       struct kvm_mem_alias *alias;
7633
 
+
7634
 
+       for (i = 0; i < kvm->arch.naliases; ++i) {
7635
 
+               alias = &kvm->arch.aliases[i];
7636
 
+               if (gfn >= alias->base_gfn
7637
 
+                   && gfn < alias->base_gfn + alias->npages)
7638
 
+                       return alias->target_gfn + gfn - alias->base_gfn;
7639
 
+       }
7640
 
+       return gfn;
7641
 
+}
7642
 
+
7643
 
 /*
7644
 
  * Set a new alias region.  Aliases map a portion of physical memory into
7645
 
  * another portion.  This is useful for memory windows, for example the PC
7646
 
@@ -834,15 +1226,15 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
7647
 
 
7648
 
        mutex_lock(&kvm->lock);
7649
 
 
7650
 
-       p = &kvm->aliases[alias->slot];
7651
 
+       p = &kvm->arch.aliases[alias->slot];
7652
 
        p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
7653
 
        p->npages = alias->memory_size >> PAGE_SHIFT;
7654
 
        p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT;
7655
 
 
7656
 
        for (n = KVM_ALIAS_SLOTS; n > 0; --n)
7657
 
-               if (kvm->aliases[n - 1].npages)
7658
 
+               if (kvm->arch.aliases[n - 1].npages)
7659
 
                        break;
7660
 
-       kvm->naliases = n;
7661
 
+       kvm->arch.naliases = n;
7662
 
 
7663
 
        kvm_mmu_zap_all(kvm);
7664
 
 
7665
 
@@ -861,17 +1253,17 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
7666
 
        r = 0;
7667
 
        switch (chip->chip_id) {
7668
 
        case KVM_IRQCHIP_PIC_MASTER:
7669
 
-               memcpy (&chip->chip.pic,
7670
 
+               memcpy(&chip->chip.pic,
7671
 
                        &pic_irqchip(kvm)->pics[0],
7672
 
                        sizeof(struct kvm_pic_state));
7673
 
                break;
7674
 
        case KVM_IRQCHIP_PIC_SLAVE:
7675
 
-               memcpy (&chip->chip.pic,
7676
 
+               memcpy(&chip->chip.pic,
7677
 
                        &pic_irqchip(kvm)->pics[1],
7678
 
                        sizeof(struct kvm_pic_state));
7679
 
                break;
7680
 
        case KVM_IRQCHIP_IOAPIC:
7681
 
-               memcpy (&chip->chip.ioapic,
7682
 
+               memcpy(&chip->chip.ioapic,
7683
 
                        ioapic_irqchip(kvm),
7684
 
                        sizeof(struct kvm_ioapic_state));
7685
 
                break;
7686
 
@@ -889,17 +1281,17 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
7687
 
        r = 0;
7688
 
        switch (chip->chip_id) {
7689
 
        case KVM_IRQCHIP_PIC_MASTER:
7690
 
-               memcpy (&pic_irqchip(kvm)->pics[0],
7691
 
+               memcpy(&pic_irqchip(kvm)->pics[0],
7692
 
                        &chip->chip.pic,
7693
 
                        sizeof(struct kvm_pic_state));
7694
 
                break;
7695
 
        case KVM_IRQCHIP_PIC_SLAVE:
7696
 
-               memcpy (&pic_irqchip(kvm)->pics[1],
7697
 
+               memcpy(&pic_irqchip(kvm)->pics[1],
7698
 
                        &chip->chip.pic,
7699
 
                        sizeof(struct kvm_pic_state));
7700
 
                break;
7701
 
        case KVM_IRQCHIP_IOAPIC:
7702
 
-               memcpy (ioapic_irqchip(kvm),
7703
 
+               memcpy(ioapic_irqchip(kvm),
7704
 
                        &chip->chip.ioapic,
7705
 
                        sizeof(struct kvm_ioapic_state));
7706
 
                break;
7707
 
@@ -911,110 +1303,191 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
7708
 
        return r;
7709
 
 }
7710
 
 
7711
 
-static gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
7712
 
+/*
7713
 
+ * Get (and clear) the dirty memory log for a memory slot.
7714
 
+ */
7715
 
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
7716
 
+                                     struct kvm_dirty_log *log)
7717
 
 {
7718
 
-       int i;
7719
 
-       struct kvm_mem_alias *alias;
7720
 
-
7721
 
-       for (i = 0; i < kvm->naliases; ++i) {
7722
 
-               alias = &kvm->aliases[i];
7723
 
-               if (gfn >= alias->base_gfn
7724
 
-                   && gfn < alias->base_gfn + alias->npages)
7725
 
-                       return alias->target_gfn + gfn - alias->base_gfn;
7726
 
-       }
7727
 
-       return gfn;
7728
 
-}
7729
 
+       int r;
7730
 
+       int n;
7731
 
+       struct kvm_memory_slot *memslot;
7732
 
+       int is_dirty = 0;
7733
 
 
7734
 
-static struct kvm_memory_slot *__gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
7735
 
-{
7736
 
-       int i;
7737
 
+       mutex_lock(&kvm->lock);
7738
 
 
7739
 
-       for (i = 0; i < kvm->nmemslots; ++i) {
7740
 
-               struct kvm_memory_slot *memslot = &kvm->memslots[i];
7741
 
+       r = kvm_get_dirty_log(kvm, log, &is_dirty);
7742
 
+       if (r)
7743
 
+               goto out;
7744
 
 
7745
 
-               if (gfn >= memslot->base_gfn
7746
 
-                   && gfn < memslot->base_gfn + memslot->npages)
7747
 
-                       return memslot;
7748
 
+       /* If nothing is dirty, don't bother messing with page tables. */
7749
 
+       if (is_dirty) {
7750
 
+               kvm_mmu_slot_remove_write_access(kvm, log->slot);
7751
 
+               kvm_flush_remote_tlbs(kvm);
7752
 
+               memslot = &kvm->memslots[log->slot];
7753
 
+               n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
7754
 
+               memset(memslot->dirty_bitmap, 0, n);
7755
 
        }
7756
 
-       return NULL;
7757
 
-}
7758
 
-
7759
 
-struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
7760
 
-{
7761
 
-       gfn = unalias_gfn(kvm, gfn);
7762
 
-       return __gfn_to_memslot(kvm, gfn);
7763
 
-}
7764
 
-
7765
 
-struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
7766
 
-{
7767
 
-       struct kvm_memory_slot *slot;
7768
 
-
7769
 
-       gfn = unalias_gfn(kvm, gfn);
7770
 
-       slot = __gfn_to_memslot(kvm, gfn);
7771
 
-       if (!slot)
7772
 
-               return NULL;
7773
 
-       return slot->phys_mem[gfn - slot->base_gfn];
7774
 
+       r = 0;
7775
 
+out:
7776
 
+       mutex_unlock(&kvm->lock);
7777
 
+       return r;
7778
 
 }
7779
 
-EXPORT_SYMBOL_GPL(gfn_to_page);
7780
 
 
7781
 
-/* WARNING: Does not work on aliased pages. */
7782
 
-void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
7783
 
+long kvm_arch_vm_ioctl(struct file *filp,
7784
 
+                      unsigned int ioctl, unsigned long arg)
7785
 
 {
7786
 
-       struct kvm_memory_slot *memslot;
7787
 
+       struct kvm *kvm = filp->private_data;
7788
 
+       void __user *argp = (void __user *)arg;
7789
 
+       int r = -EINVAL;
7790
 
 
7791
 
-       memslot = __gfn_to_memslot(kvm, gfn);
7792
 
-       if (memslot && memslot->dirty_bitmap) {
7793
 
-               unsigned long rel_gfn = gfn - memslot->base_gfn;
7794
 
+       switch (ioctl) {
7795
 
+       case KVM_SET_TSS_ADDR:
7796
 
+               r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
7797
 
+               if (r < 0)
7798
 
+                       goto out;
7799
 
+               break;
7800
 
+       case KVM_SET_MEMORY_REGION: {
7801
 
+               struct kvm_memory_region kvm_mem;
7802
 
+               struct kvm_userspace_memory_region kvm_userspace_mem;
7803
 
 
7804
 
-               /* avoid RMW */
7805
 
-               if (!test_bit(rel_gfn, memslot->dirty_bitmap))
7806
 
-                       set_bit(rel_gfn, memslot->dirty_bitmap);
7807
 
+               r = -EFAULT;
7808
 
+               if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem))
7809
 
+                       goto out;
7810
 
+               kvm_userspace_mem.slot = kvm_mem.slot;
7811
 
+               kvm_userspace_mem.flags = kvm_mem.flags;
7812
 
+               kvm_userspace_mem.guest_phys_addr = kvm_mem.guest_phys_addr;
7813
 
+               kvm_userspace_mem.memory_size = kvm_mem.memory_size;
7814
 
+               r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 0);
7815
 
+               if (r)
7816
 
+                       goto out;
7817
 
+               break;
7818
 
        }
7819
 
-}
7820
 
+       case KVM_SET_NR_MMU_PAGES:
7821
 
+               r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
7822
 
+               if (r)
7823
 
+                       goto out;
7824
 
+               break;
7825
 
+       case KVM_GET_NR_MMU_PAGES:
7826
 
+               r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
7827
 
+               break;
7828
 
+       case KVM_SET_MEMORY_ALIAS: {
7829
 
+               struct kvm_memory_alias alias;
7830
 
 
7831
 
-int emulator_read_std(unsigned long addr,
7832
 
-                            void *val,
7833
 
-                            unsigned int bytes,
7834
 
-                            struct kvm_vcpu *vcpu)
7835
 
-{
7836
 
-       void *data = val;
7837
 
+               r = -EFAULT;
7838
 
+               if (copy_from_user(&alias, argp, sizeof alias))
7839
 
+                       goto out;
7840
 
+               r = kvm_vm_ioctl_set_memory_alias(kvm, &alias);
7841
 
+               if (r)
7842
 
+                       goto out;
7843
 
+               break;
7844
 
+       }
7845
 
+       case KVM_CREATE_IRQCHIP:
7846
 
+               r = -ENOMEM;
7847
 
+               kvm->arch.vpic = kvm_create_pic(kvm);
7848
 
+               if (kvm->arch.vpic) {
7849
 
+                       r = kvm_ioapic_init(kvm);
7850
 
+                       if (r) {
7851
 
+                               kfree(kvm->arch.vpic);
7852
 
+                               kvm->arch.vpic = NULL;
7853
 
+                               goto out;
7854
 
+                       }
7855
 
+               } else
7856
 
+                       goto out;
7857
 
+               break;
7858
 
+       case KVM_IRQ_LINE: {
7859
 
+               struct kvm_irq_level irq_event;
7860
 
 
7861
 
-       while (bytes) {
7862
 
-               gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
7863
 
-               unsigned offset = addr & (PAGE_SIZE-1);
7864
 
-               unsigned tocopy = min(bytes, (unsigned)PAGE_SIZE - offset);
7865
 
-               unsigned long pfn;
7866
 
-               struct page *page;
7867
 
-               void *page_virt;
7868
 
+               r = -EFAULT;
7869
 
+               if (copy_from_user(&irq_event, argp, sizeof irq_event))
7870
 
+                       goto out;
7871
 
+               if (irqchip_in_kernel(kvm)) {
7872
 
+                       mutex_lock(&kvm->lock);
7873
 
+                       if (irq_event.irq < 16)
7874
 
+                               kvm_pic_set_irq(pic_irqchip(kvm),
7875
 
+                                       irq_event.irq,
7876
 
+                                       irq_event.level);
7877
 
+                       kvm_ioapic_set_irq(kvm->arch.vioapic,
7878
 
+                                       irq_event.irq,
7879
 
+                                       irq_event.level);
7880
 
+                       mutex_unlock(&kvm->lock);
7881
 
+                       r = 0;
7882
 
+               }
7883
 
+               break;
7884
 
+       }
7885
 
+       case KVM_GET_IRQCHIP: {
7886
 
+               /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
7887
 
+               struct kvm_irqchip chip;
7888
 
 
7889
 
-               if (gpa == UNMAPPED_GVA)
7890
 
-                       return X86EMUL_PROPAGATE_FAULT;
7891
 
-               pfn = gpa >> PAGE_SHIFT;
7892
 
-               page = gfn_to_page(vcpu->kvm, pfn);
7893
 
-               if (!page)
7894
 
-                       return X86EMUL_UNHANDLEABLE;
7895
 
-               page_virt = kmap_atomic(page, KM_USER0);
7896
 
+               r = -EFAULT;
7897
 
+               if (copy_from_user(&chip, argp, sizeof chip))
7898
 
+                       goto out;
7899
 
+               r = -ENXIO;
7900
 
+               if (!irqchip_in_kernel(kvm))
7901
 
+                       goto out;
7902
 
+               r = kvm_vm_ioctl_get_irqchip(kvm, &chip);
7903
 
+               if (r)
7904
 
+                       goto out;
7905
 
+               r = -EFAULT;
7906
 
+               if (copy_to_user(argp, &chip, sizeof chip))
7907
 
+                       goto out;
7908
 
+               r = 0;
7909
 
+               break;
7910
 
+       }
7911
 
+       case KVM_SET_IRQCHIP: {
7912
 
+               /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
7913
 
+               struct kvm_irqchip chip;
7914
 
 
7915
 
-               memcpy(data, page_virt + offset, tocopy);
7916
 
+               r = -EFAULT;
7917
 
+               if (copy_from_user(&chip, argp, sizeof chip))
7918
 
+                       goto out;
7919
 
+               r = -ENXIO;
7920
 
+               if (!irqchip_in_kernel(kvm))
7921
 
+                       goto out;
7922
 
+               r = kvm_vm_ioctl_set_irqchip(kvm, &chip);
7923
 
+               if (r)
7924
 
+                       goto out;
7925
 
+               r = 0;
7926
 
+               break;
7927
 
+       }
7928
 
+       case KVM_GET_SUPPORTED_CPUID: {
7929
 
+               struct kvm_cpuid2 __user *cpuid_arg = argp;
7930
 
+               struct kvm_cpuid2 cpuid;
7931
 
 
7932
 
-               kunmap_atomic(page_virt, KM_USER0);
7933
 
+               r = -EFAULT;
7934
 
+               if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
7935
 
+                       goto out;
7936
 
+               r = kvm_vm_ioctl_get_supported_cpuid(kvm, &cpuid,
7937
 
+                       cpuid_arg->entries);
7938
 
+               if (r)
7939
 
+                       goto out;
7940
 
 
7941
 
-               bytes -= tocopy;
7942
 
-               data += tocopy;
7943
 
-               addr += tocopy;
7944
 
+               r = -EFAULT;
7945
 
+               if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
7946
 
+                       goto out;
7947
 
+               r = 0;
7948
 
+               break;
7949
 
        }
7950
 
-
7951
 
-       return X86EMUL_CONTINUE;
7952
 
+       default:
7953
 
+               ;
7954
 
+       }
7955
 
+out:
7956
 
+       return r;
7957
 
 }
7958
 
-EXPORT_SYMBOL_GPL(emulator_read_std);
7959
 
 
7960
 
-static int emulator_write_std(unsigned long addr,
7961
 
-                             const void *val,
7962
 
-                             unsigned int bytes,
7963
 
-                             struct kvm_vcpu *vcpu)
7964
 
+static void kvm_init_msr_list(void)
7965
 
 {
7966
 
-       pr_unimpl(vcpu, "emulator_write_std: addr %lx n %d\n", addr, bytes);
7967
 
-       return X86EMUL_UNHANDLEABLE;
7968
 
+       u32 dummy[2];
7969
 
+       unsigned i, j;
7970
 
+
7971
 
+       for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
7972
 
+               if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
7973
 
+                       continue;
7974
 
+               if (j < i)
7975
 
+                       msrs_to_save[j] = msrs_to_save[i];
7976
 
+               j++;
7977
 
+       }
7978
 
+       num_msrs_to_save = j;
7979
 
 }
7980
 
 
7981
 
 /*
7982
 
@@ -1025,14 +1498,15 @@ static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu,
7983
 
 {
7984
 
        struct kvm_io_device *dev;
7985
 
 
7986
 
-       if (vcpu->apic) {
7987
 
-               dev = &vcpu->apic->dev;
7988
 
+       if (vcpu->arch.apic) {
7989
 
+               dev = &vcpu->arch.apic->dev;
7990
 
                if (dev->in_range(dev, addr))
7991
 
                        return dev;
7992
 
        }
7993
 
        return NULL;
7994
 
 }
7995
 
 
7996
 
+
7997
 
 static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
7998
 
                                                gpa_t addr)
7999
 
 {
8000
 
@@ -1044,11 +1518,33 @@ static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
8001
 
        return dev;
8002
 
 }
8003
 
 
8004
 
-static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
8005
 
-                                              gpa_t addr)
8006
 
+int emulator_read_std(unsigned long addr,
8007
 
+                            void *val,
8008
 
+                            unsigned int bytes,
8009
 
+                            struct kvm_vcpu *vcpu)
8010
 
 {
8011
 
-       return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr);
8012
 
+       void *data = val;
8013
 
+
8014
 
+       while (bytes) {
8015
 
+               gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
8016
 
+               unsigned offset = addr & (PAGE_SIZE-1);
8017
 
+               unsigned tocopy = min(bytes, (unsigned)PAGE_SIZE - offset);
8018
 
+               int ret;
8019
 
+
8020
 
+               if (gpa == UNMAPPED_GVA)
8021
 
+                       return X86EMUL_PROPAGATE_FAULT;
8022
 
+               ret = kvm_read_guest(vcpu->kvm, gpa, data, tocopy);
8023
 
+               if (ret < 0)
8024
 
+                       return X86EMUL_UNHANDLEABLE;
8025
 
+
8026
 
+               bytes -= tocopy;
8027
 
+               data += tocopy;
8028
 
+               addr += tocopy;
8029
 
+       }
8030
 
+
8031
 
+       return X86EMUL_CONTINUE;
8032
 
 }
8033
 
+EXPORT_SYMBOL_GPL(emulator_read_std);
8034
 
 
8035
 
 static int emulator_read_emulated(unsigned long addr,
8036
 
                                  void *val,
8037
 
@@ -1062,14 +1558,21 @@ static int emulator_read_emulated(unsigned long addr,
8038
 
                memcpy(val, vcpu->mmio_data, bytes);
8039
 
                vcpu->mmio_read_completed = 0;
8040
 
                return X86EMUL_CONTINUE;
8041
 
-       } else if (emulator_read_std(addr, val, bytes, vcpu)
8042
 
-                  == X86EMUL_CONTINUE)
8043
 
-               return X86EMUL_CONTINUE;
8044
 
+       }
8045
 
+
8046
 
+       gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
8047
 
 
8048
 
-       gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
8049
 
+       /* For APIC access vmexit */
8050
 
+       if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
8051
 
+               goto mmio;
8052
 
+
8053
 
+       if (emulator_read_std(addr, val, bytes, vcpu)
8054
 
+                       == X86EMUL_CONTINUE)
8055
 
+               return X86EMUL_CONTINUE;
8056
 
        if (gpa == UNMAPPED_GVA)
8057
 
                return X86EMUL_PROPAGATE_FAULT;
8058
 
 
8059
 
+mmio:
8060
 
        /*
8061
 
         * Is this MMIO handled locally?
8062
 
         */
8063
 
@@ -1090,19 +1593,12 @@ static int emulator_read_emulated(unsigned long addr,
8064
 
 static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
8065
 
                               const void *val, int bytes)
8066
 
 {
8067
 
-       struct page *page;
8068
 
-       void *virt;
8069
 
+       int ret;
8070
 
 
8071
 
-       if (((gpa + bytes - 1) >> PAGE_SHIFT) != (gpa >> PAGE_SHIFT))
8072
 
-               return 0;
8073
 
-       page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
8074
 
-       if (!page)
8075
 
+       ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
8076
 
+       if (ret < 0)
8077
 
                return 0;
8078
 
-       mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT);
8079
 
-       virt = kmap_atomic(page, KM_USER0);
8080
 
        kvm_mmu_pte_write(vcpu, gpa, val, bytes);
8081
 
-       memcpy(virt + offset_in_page(gpa), val, bytes);
8082
 
-       kunmap_atomic(virt, KM_USER0);
8083
 
        return 1;
8084
 
 }
8085
 
 
8086
 
@@ -1112,16 +1608,21 @@ static int emulator_write_emulated_onepage(unsigned long addr,
8087
 
                                           struct kvm_vcpu *vcpu)
8088
 
 {
8089
 
        struct kvm_io_device *mmio_dev;
8090
 
-       gpa_t                 gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
8091
 
+       gpa_t                 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
8092
 
 
8093
 
        if (gpa == UNMAPPED_GVA) {
8094
 
-               kvm_x86_ops->inject_page_fault(vcpu, addr, 2);
8095
 
+               kvm_inject_page_fault(vcpu, addr, 2);
8096
 
                return X86EMUL_PROPAGATE_FAULT;
8097
 
        }
8098
 
 
8099
 
+       /* For APIC access vmexit */
8100
 
+       if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
8101
 
+               goto mmio;
8102
 
+
8103
 
        if (emulator_write_phys(vcpu, gpa, val, bytes))
8104
 
                return X86EMUL_CONTINUE;
8105
 
 
8106
 
+mmio:
8107
 
        /*
8108
 
         * Is this MMIO handled locally?
8109
 
         */
8110
 
@@ -1173,6 +1674,31 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
8111
 
                reported = 1;
8112
 
                printk(KERN_WARNING "kvm: emulating exchange as write\n");
8113
 
        }
8114
 
+#ifndef CONFIG_X86_64
8115
 
+       /* guests cmpxchg8b have to be emulated atomically */
8116
 
+       if (bytes == 8) {
8117
 
+               gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
8118
 
+               struct page *page;
8119
 
+               char *addr;
8120
 
+               u64 *val;
8121
 
+
8122
 
+               if (gpa == UNMAPPED_GVA ||
8123
 
+                  (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
8124
 
+                       goto emul_write;
8125
 
+
8126
 
+               if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
8127
 
+                       goto emul_write;
8128
 
+
8129
 
+               val = (u64 *)new;
8130
 
+               page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
8131
 
+               addr = kmap_atomic(page, KM_USER0);
8132
 
+               set_64bit((u64 *)(addr + offset_in_page(gpa)), val);
8133
 
+               kunmap_atomic(addr, KM_USER0);
8134
 
+               kvm_release_page_dirty(page);
8135
 
+       }
8136
 
+emul_write:
8137
 
+#endif
8138
 
+
8139
 
        return emulator_write_emulated(addr, new, bytes, vcpu);
8140
 
 }
8141
 
 
8142
 
@@ -1188,11 +1714,11 @@ int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
8143
 
 
8144
 
 int emulate_clts(struct kvm_vcpu *vcpu)
8145
 
 {
8146
 
-       kvm_x86_ops->set_cr0(vcpu, vcpu->cr0 & ~X86_CR0_TS);
8147
 
+       kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 & ~X86_CR0_TS);
8148
 
        return X86EMUL_CONTINUE;
8149
 
 }
8150
 
 
8151
 
-int emulator_get_dr(struct x86_emulate_ctxt* ctxt, int dr, unsigned long *dest)
8152
 
+int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
8153
 
 {
8154
 
        struct kvm_vcpu *vcpu = ctxt->vcpu;
8155
 
 
8156
 
@@ -1223,7 +1749,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
8157
 
 {
8158
 
        static int reported;
8159
 
        u8 opcodes[4];
8160
 
-       unsigned long rip = vcpu->rip;
8161
 
+       unsigned long rip = vcpu->arch.rip;
8162
 
        unsigned long rip_linear;
8163
 
 
8164
 
        rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS);
8165
 
@@ -1241,7 +1767,6 @@ EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
8166
 
 
8167
 
 struct x86_emulate_ops emulate_ops = {
8168
 
        .read_std            = emulator_read_std,
8169
 
-       .write_std           = emulator_write_std,
8170
 
        .read_emulated       = emulator_read_emulated,
8171
 
        .write_emulated      = emulator_write_emulated,
8172
 
        .cmpxchg_emulated    = emulator_cmpxchg_emulated,
8173
 
@@ -1250,44 +1775,63 @@ struct x86_emulate_ops emulate_ops = {
8174
 
 int emulate_instruction(struct kvm_vcpu *vcpu,
8175
 
                        struct kvm_run *run,
8176
 
                        unsigned long cr2,
8177
 
-                       u16 error_code)
8178
 
+                       u16 error_code,
8179
 
+                       int no_decode)
8180
 
 {
8181
 
-       struct x86_emulate_ctxt emulate_ctxt;
8182
 
        int r;
8183
 
-       int cs_db, cs_l;
8184
 
 
8185
 
-       vcpu->mmio_fault_cr2 = cr2;
8186
 
+       vcpu->arch.mmio_fault_cr2 = cr2;
8187
 
        kvm_x86_ops->cache_regs(vcpu);
8188
 
 
8189
 
-       kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
8190
 
-
8191
 
-       emulate_ctxt.vcpu = vcpu;
8192
 
-       emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
8193
 
-       emulate_ctxt.cr2 = cr2;
8194
 
-       emulate_ctxt.mode = (emulate_ctxt.eflags & X86_EFLAGS_VM)
8195
 
-               ? X86EMUL_MODE_REAL : cs_l
8196
 
-               ? X86EMUL_MODE_PROT64 : cs_db
8197
 
-               ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
8198
 
-
8199
 
-       if (emulate_ctxt.mode == X86EMUL_MODE_PROT64) {
8200
 
-               emulate_ctxt.cs_base = 0;
8201
 
-               emulate_ctxt.ds_base = 0;
8202
 
-               emulate_ctxt.es_base = 0;
8203
 
-               emulate_ctxt.ss_base = 0;
8204
 
-       } else {
8205
 
-               emulate_ctxt.cs_base = get_segment_base(vcpu, VCPU_SREG_CS);
8206
 
-               emulate_ctxt.ds_base = get_segment_base(vcpu, VCPU_SREG_DS);
8207
 
-               emulate_ctxt.es_base = get_segment_base(vcpu, VCPU_SREG_ES);
8208
 
-               emulate_ctxt.ss_base = get_segment_base(vcpu, VCPU_SREG_SS);
8209
 
+       vcpu->mmio_is_write = 0;
8210
 
+       vcpu->arch.pio.string = 0;
8211
 
+
8212
 
+       if (!no_decode) {
8213
 
+               int cs_db, cs_l;
8214
 
+               kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
8215
 
+
8216
 
+               vcpu->arch.emulate_ctxt.vcpu = vcpu;
8217
 
+               vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
8218
 
+               vcpu->arch.emulate_ctxt.mode =
8219
 
+                       (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM)
8220
 
+                       ? X86EMUL_MODE_REAL : cs_l
8221
 
+                       ? X86EMUL_MODE_PROT64 : cs_db
8222
 
+                       ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
8223
 
+
8224
 
+               if (vcpu->arch.emulate_ctxt.mode == X86EMUL_MODE_PROT64) {
8225
 
+                       vcpu->arch.emulate_ctxt.cs_base = 0;
8226
 
+                       vcpu->arch.emulate_ctxt.ds_base = 0;
8227
 
+                       vcpu->arch.emulate_ctxt.es_base = 0;
8228
 
+                       vcpu->arch.emulate_ctxt.ss_base = 0;
8229
 
+               } else {
8230
 
+                       vcpu->arch.emulate_ctxt.cs_base =
8231
 
+                                       get_segment_base(vcpu, VCPU_SREG_CS);
8232
 
+                       vcpu->arch.emulate_ctxt.ds_base =
8233
 
+                                       get_segment_base(vcpu, VCPU_SREG_DS);
8234
 
+                       vcpu->arch.emulate_ctxt.es_base =
8235
 
+                                       get_segment_base(vcpu, VCPU_SREG_ES);
8236
 
+                       vcpu->arch.emulate_ctxt.ss_base =
8237
 
+                                       get_segment_base(vcpu, VCPU_SREG_SS);
8238
 
+               }
8239
 
+
8240
 
+               vcpu->arch.emulate_ctxt.gs_base =
8241
 
+                                       get_segment_base(vcpu, VCPU_SREG_GS);
8242
 
+               vcpu->arch.emulate_ctxt.fs_base =
8243
 
+                                       get_segment_base(vcpu, VCPU_SREG_FS);
8244
 
+
8245
 
+               r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
8246
 
+               ++vcpu->stat.insn_emulation;
8247
 
+               if (r)  {
8248
 
+                       ++vcpu->stat.insn_emulation_fail;
8249
 
+                       if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
8250
 
+                               return EMULATE_DONE;
8251
 
+                       return EMULATE_FAIL;
8252
 
+               }
8253
 
        }
8254
 
 
8255
 
-       emulate_ctxt.gs_base = get_segment_base(vcpu, VCPU_SREG_GS);
8256
 
-       emulate_ctxt.fs_base = get_segment_base(vcpu, VCPU_SREG_FS);
8257
 
+       r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
8258
 
 
8259
 
-       vcpu->mmio_is_write = 0;
8260
 
-       vcpu->pio.string = 0;
8261
 
-       r = x86_emulate_memop(&emulate_ctxt, &emulate_ops);
8262
 
-       if (vcpu->pio.string)
8263
 
+       if (vcpu->arch.pio.string)
8264
 
                return EMULATE_DO_MMIO;
8265
 
 
8266
 
        if ((r || vcpu->mmio_is_write) && run) {
8267
 
@@ -1309,7 +1853,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
8268
 
        }
8269
 
 
8270
 
        kvm_x86_ops->decache_regs(vcpu);
8271
 
-       kvm_x86_ops->set_rflags(vcpu, emulate_ctxt.eflags);
8272
 
+       kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
8273
 
 
8274
 
        if (vcpu->mmio_is_write) {
8275
 
                vcpu->mmio_needed = 0;
8276
 
@@ -1320,439 +1864,45 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
8277
 
 }
8278
 
 EXPORT_SYMBOL_GPL(emulate_instruction);
8279
 
 
8280
 
-/*
8281
 
- * The vCPU has executed a HLT instruction with in-kernel mode enabled.
8282
 
- */
8283
 
-static void kvm_vcpu_block(struct kvm_vcpu *vcpu)
8284
 
-{
8285
 
-       DECLARE_WAITQUEUE(wait, current);
8286
 
-
8287
 
-       add_wait_queue(&vcpu->wq, &wait);
8288
 
-
8289
 
-       /*
8290
 
-        * We will block until either an interrupt or a signal wakes us up
8291
 
-        */
8292
 
-       while (!kvm_cpu_has_interrupt(vcpu)
8293
 
-              && !signal_pending(current)
8294
 
-              && vcpu->mp_state != VCPU_MP_STATE_RUNNABLE
8295
 
-              && vcpu->mp_state != VCPU_MP_STATE_SIPI_RECEIVED) {
8296
 
-               set_current_state(TASK_INTERRUPTIBLE);
8297
 
-               vcpu_put(vcpu);
8298
 
-               schedule();
8299
 
-               vcpu_load(vcpu);
8300
 
-       }
8301
 
-
8302
 
-       __set_current_state(TASK_RUNNING);
8303
 
-       remove_wait_queue(&vcpu->wq, &wait);
8304
 
-}
8305
 
-
8306
 
-int kvm_emulate_halt(struct kvm_vcpu *vcpu)
8307
 
-{
8308
 
-       ++vcpu->stat.halt_exits;
8309
 
-       if (irqchip_in_kernel(vcpu->kvm)) {
8310
 
-               vcpu->mp_state = VCPU_MP_STATE_HALTED;
8311
 
-               kvm_vcpu_block(vcpu);
8312
 
-               if (vcpu->mp_state != VCPU_MP_STATE_RUNNABLE)
8313
 
-                       return -EINTR;
8314
 
-               return 1;
8315
 
-       } else {
8316
 
-               vcpu->run->exit_reason = KVM_EXIT_HLT;
8317
 
-               return 0;
8318
 
-       }
8319
 
-}
8320
 
-EXPORT_SYMBOL_GPL(kvm_emulate_halt);
8321
 
-
8322
 
-int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run)
8323
 
-{
8324
 
-       unsigned long nr, a0, a1, a2, a3, a4, a5, ret;
8325
 
-
8326
 
-       kvm_x86_ops->cache_regs(vcpu);
8327
 
-       ret = -KVM_EINVAL;
8328
 
-#ifdef CONFIG_X86_64
8329
 
-       if (is_long_mode(vcpu)) {
8330
 
-               nr = vcpu->regs[VCPU_REGS_RAX];
8331
 
-               a0 = vcpu->regs[VCPU_REGS_RDI];
8332
 
-               a1 = vcpu->regs[VCPU_REGS_RSI];
8333
 
-               a2 = vcpu->regs[VCPU_REGS_RDX];
8334
 
-               a3 = vcpu->regs[VCPU_REGS_RCX];
8335
 
-               a4 = vcpu->regs[VCPU_REGS_R8];
8336
 
-               a5 = vcpu->regs[VCPU_REGS_R9];
8337
 
-       } else
8338
 
-#endif
8339
 
-       {
8340
 
-               nr = vcpu->regs[VCPU_REGS_RBX] & -1u;
8341
 
-               a0 = vcpu->regs[VCPU_REGS_RAX] & -1u;
8342
 
-               a1 = vcpu->regs[VCPU_REGS_RCX] & -1u;
8343
 
-               a2 = vcpu->regs[VCPU_REGS_RDX] & -1u;
8344
 
-               a3 = vcpu->regs[VCPU_REGS_RSI] & -1u;
8345
 
-               a4 = vcpu->regs[VCPU_REGS_RDI] & -1u;
8346
 
-               a5 = vcpu->regs[VCPU_REGS_RBP] & -1u;
8347
 
-       }
8348
 
-       switch (nr) {
8349
 
-       default:
8350
 
-               run->hypercall.nr = nr;
8351
 
-               run->hypercall.args[0] = a0;
8352
 
-               run->hypercall.args[1] = a1;
8353
 
-               run->hypercall.args[2] = a2;
8354
 
-               run->hypercall.args[3] = a3;
8355
 
-               run->hypercall.args[4] = a4;
8356
 
-               run->hypercall.args[5] = a5;
8357
 
-               run->hypercall.ret = ret;
8358
 
-               run->hypercall.longmode = is_long_mode(vcpu);
8359
 
-               kvm_x86_ops->decache_regs(vcpu);
8360
 
-               return 0;
8361
 
-       }
8362
 
-       vcpu->regs[VCPU_REGS_RAX] = ret;
8363
 
-       kvm_x86_ops->decache_regs(vcpu);
8364
 
-       return 1;
8365
 
-}
8366
 
-EXPORT_SYMBOL_GPL(kvm_hypercall);
8367
 
-
8368
 
-static u64 mk_cr_64(u64 curr_cr, u32 new_val)
8369
 
-{
8370
 
-       return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
8371
 
-}
8372
 
-
8373
 
-void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
8374
 
-{
8375
 
-       struct descriptor_table dt = { limit, base };
8376
 
-
8377
 
-       kvm_x86_ops->set_gdt(vcpu, &dt);
8378
 
-}
8379
 
-
8380
 
-void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
8381
 
-{
8382
 
-       struct descriptor_table dt = { limit, base };
8383
 
-
8384
 
-       kvm_x86_ops->set_idt(vcpu, &dt);
8385
 
-}
8386
 
-
8387
 
-void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
8388
 
-                  unsigned long *rflags)
8389
 
-{
8390
 
-       lmsw(vcpu, msw);
8391
 
-       *rflags = kvm_x86_ops->get_rflags(vcpu);
8392
 
-}
8393
 
-
8394
 
-unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
8395
 
-{
8396
 
-       kvm_x86_ops->decache_cr4_guest_bits(vcpu);
8397
 
-       switch (cr) {
8398
 
-       case 0:
8399
 
-               return vcpu->cr0;
8400
 
-       case 2:
8401
 
-               return vcpu->cr2;
8402
 
-       case 3:
8403
 
-               return vcpu->cr3;
8404
 
-       case 4:
8405
 
-               return vcpu->cr4;
8406
 
-       default:
8407
 
-               vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr);
8408
 
-               return 0;
8409
 
-       }
8410
 
-}
8411
 
-
8412
 
-void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
8413
 
-                    unsigned long *rflags)
8414
 
-{
8415
 
-       switch (cr) {
8416
 
-       case 0:
8417
 
-               set_cr0(vcpu, mk_cr_64(vcpu->cr0, val));
8418
 
-               *rflags = kvm_x86_ops->get_rflags(vcpu);
8419
 
-               break;
8420
 
-       case 2:
8421
 
-               vcpu->cr2 = val;
8422
 
-               break;
8423
 
-       case 3:
8424
 
-               set_cr3(vcpu, val);
8425
 
-               break;
8426
 
-       case 4:
8427
 
-               set_cr4(vcpu, mk_cr_64(vcpu->cr4, val));
8428
 
-               break;
8429
 
-       default:
8430
 
-               vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr);
8431
 
-       }
8432
 
-}
8433
 
-
8434
 
-/*
8435
 
- * Register the para guest with the host:
8436
 
- */
8437
 
-static int vcpu_register_para(struct kvm_vcpu *vcpu, gpa_t para_state_gpa)
8438
 
-{
8439
 
-       struct kvm_vcpu_para_state *para_state;
8440
 
-       hpa_t para_state_hpa, hypercall_hpa;
8441
 
-       struct page *para_state_page;
8442
 
-       unsigned char *hypercall;
8443
 
-       gpa_t hypercall_gpa;
8444
 
-
8445
 
-       printk(KERN_DEBUG "kvm: guest trying to enter paravirtual mode\n");
8446
 
-       printk(KERN_DEBUG ".... para_state_gpa: %08Lx\n", para_state_gpa);
8447
 
-
8448
 
-       /*
8449
 
-        * Needs to be page aligned:
8450
 
-        */
8451
 
-       if (para_state_gpa != PAGE_ALIGN(para_state_gpa))
8452
 
-               goto err_gp;
8453
 
-
8454
 
-       para_state_hpa = gpa_to_hpa(vcpu, para_state_gpa);
8455
 
-       printk(KERN_DEBUG ".... para_state_hpa: %08Lx\n", para_state_hpa);
8456
 
-       if (is_error_hpa(para_state_hpa))
8457
 
-               goto err_gp;
8458
 
-
8459
 
-       mark_page_dirty(vcpu->kvm, para_state_gpa >> PAGE_SHIFT);
8460
 
-       para_state_page = pfn_to_page(para_state_hpa >> PAGE_SHIFT);
8461
 
-       para_state = kmap(para_state_page);
8462
 
-
8463
 
-       printk(KERN_DEBUG "....  guest version: %d\n", para_state->guest_version);
8464
 
-       printk(KERN_DEBUG "....           size: %d\n", para_state->size);
8465
 
-
8466
 
-       para_state->host_version = KVM_PARA_API_VERSION;
8467
 
-       /*
8468
 
-        * We cannot support guests that try to register themselves
8469
 
-        * with a newer API version than the host supports:
8470
 
-        */
8471
 
-       if (para_state->guest_version > KVM_PARA_API_VERSION) {
8472
 
-               para_state->ret = -KVM_EINVAL;
8473
 
-               goto err_kunmap_skip;
8474
 
-       }
8475
 
-
8476
 
-       hypercall_gpa = para_state->hypercall_gpa;
8477
 
-       hypercall_hpa = gpa_to_hpa(vcpu, hypercall_gpa);
8478
 
-       printk(KERN_DEBUG ".... hypercall_hpa: %08Lx\n", hypercall_hpa);
8479
 
-       if (is_error_hpa(hypercall_hpa)) {
8480
 
-               para_state->ret = -KVM_EINVAL;
8481
 
-               goto err_kunmap_skip;
8482
 
-       }
8483
 
-
8484
 
-       printk(KERN_DEBUG "kvm: para guest successfully registered.\n");
8485
 
-       vcpu->para_state_page = para_state_page;
8486
 
-       vcpu->para_state_gpa = para_state_gpa;
8487
 
-       vcpu->hypercall_gpa = hypercall_gpa;
8488
 
-
8489
 
-       mark_page_dirty(vcpu->kvm, hypercall_gpa >> PAGE_SHIFT);
8490
 
-       hypercall = kmap_atomic(pfn_to_page(hypercall_hpa >> PAGE_SHIFT),
8491
 
-                               KM_USER1) + (hypercall_hpa & ~PAGE_MASK);
8492
 
-       kvm_x86_ops->patch_hypercall(vcpu, hypercall);
8493
 
-       kunmap_atomic(hypercall, KM_USER1);
8494
 
-
8495
 
-       para_state->ret = 0;
8496
 
-err_kunmap_skip:
8497
 
-       kunmap(para_state_page);
8498
 
-       return 0;
8499
 
-err_gp:
8500
 
-       return 1;
8501
 
-}
8502
 
-
8503
 
-int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
8504
 
-{
8505
 
-       u64 data;
8506
 
-
8507
 
-       switch (msr) {
8508
 
-       case 0xc0010010: /* SYSCFG */
8509
 
-       case 0xc0010015: /* HWCR */
8510
 
-       case MSR_IA32_PLATFORM_ID:
8511
 
-       case MSR_IA32_P5_MC_ADDR:
8512
 
-       case MSR_IA32_P5_MC_TYPE:
8513
 
-       case MSR_IA32_MC0_CTL:
8514
 
-       case MSR_IA32_MCG_STATUS:
8515
 
-       case MSR_IA32_MCG_CAP:
8516
 
-       case MSR_IA32_MC0_MISC:
8517
 
-       case MSR_IA32_MC0_MISC+4:
8518
 
-       case MSR_IA32_MC0_MISC+8:
8519
 
-       case MSR_IA32_MC0_MISC+12:
8520
 
-       case MSR_IA32_MC0_MISC+16:
8521
 
-       case MSR_IA32_UCODE_REV:
8522
 
-       case MSR_IA32_PERF_STATUS:
8523
 
-       case MSR_IA32_EBL_CR_POWERON:
8524
 
-               /* MTRR registers */
8525
 
-       case 0xfe:
8526
 
-       case 0x200 ... 0x2ff:
8527
 
-               data = 0;
8528
 
-               break;
8529
 
-       case 0xcd: /* fsb frequency */
8530
 
-               data = 3;
8531
 
-               break;
8532
 
-       case MSR_IA32_APICBASE:
8533
 
-               data = kvm_get_apic_base(vcpu);
8534
 
-               break;
8535
 
-       case MSR_IA32_MISC_ENABLE:
8536
 
-               data = vcpu->ia32_misc_enable_msr;
8537
 
-               break;
8538
 
-#ifdef CONFIG_X86_64
8539
 
-       case MSR_EFER:
8540
 
-               data = vcpu->shadow_efer;
8541
 
-               break;
8542
 
-#endif
8543
 
-       default:
8544
 
-               pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
8545
 
-               return 1;
8546
 
-       }
8547
 
-       *pdata = data;
8548
 
-       return 0;
8549
 
-}
8550
 
-EXPORT_SYMBOL_GPL(kvm_get_msr_common);
8551
 
-
8552
 
-/*
8553
 
- * Reads an msr value (of 'msr_index') into 'pdata'.
8554
 
- * Returns 0 on success, non-0 otherwise.
8555
 
- * Assumes vcpu_load() was already called.
8556
 
- */
8557
 
-int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
8558
 
-{
8559
 
-       return kvm_x86_ops->get_msr(vcpu, msr_index, pdata);
8560
 
-}
8561
 
-
8562
 
-#ifdef CONFIG_X86_64
8563
 
-
8564
 
-static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
8565
 
-{
8566
 
-       if (efer & EFER_RESERVED_BITS) {
8567
 
-               printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n",
8568
 
-                      efer);
8569
 
-               inject_gp(vcpu);
8570
 
-               return;
8571
 
-       }
8572
 
-
8573
 
-       if (is_paging(vcpu)
8574
 
-           && (vcpu->shadow_efer & EFER_LME) != (efer & EFER_LME)) {
8575
 
-               printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n");
8576
 
-               inject_gp(vcpu);
8577
 
-               return;
8578
 
-       }
8579
 
-
8580
 
-       kvm_x86_ops->set_efer(vcpu, efer);
8581
 
-
8582
 
-       efer &= ~EFER_LMA;
8583
 
-       efer |= vcpu->shadow_efer & EFER_LMA;
8584
 
-
8585
 
-       vcpu->shadow_efer = efer;
8586
 
-}
8587
 
-
8588
 
-#endif
8589
 
-
8590
 
-int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
8591
 
-{
8592
 
-       switch (msr) {
8593
 
-#ifdef CONFIG_X86_64
8594
 
-       case MSR_EFER:
8595
 
-               set_efer(vcpu, data);
8596
 
-               break;
8597
 
-#endif
8598
 
-       case MSR_IA32_MC0_STATUS:
8599
 
-               pr_unimpl(vcpu, "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n",
8600
 
-                      __FUNCTION__, data);
8601
 
-               break;
8602
 
-       case MSR_IA32_MCG_STATUS:
8603
 
-               pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n",
8604
 
-                       __FUNCTION__, data);
8605
 
-               break;
8606
 
-       case MSR_IA32_UCODE_REV:
8607
 
-       case MSR_IA32_UCODE_WRITE:
8608
 
-       case 0x200 ... 0x2ff: /* MTRRs */
8609
 
-               break;
8610
 
-       case MSR_IA32_APICBASE:
8611
 
-               kvm_set_apic_base(vcpu, data);
8612
 
-               break;
8613
 
-       case MSR_IA32_MISC_ENABLE:
8614
 
-               vcpu->ia32_misc_enable_msr = data;
8615
 
-               break;
8616
 
-       /*
8617
 
-        * This is the 'probe whether the host is KVM' logic:
8618
 
-        */
8619
 
-       case MSR_KVM_API_MAGIC:
8620
 
-               return vcpu_register_para(vcpu, data);
8621
 
-
8622
 
-       default:
8623
 
-               pr_unimpl(vcpu, "unhandled wrmsr: 0x%x\n", msr);
8624
 
-               return 1;
8625
 
-       }
8626
 
-       return 0;
8627
 
-}
8628
 
-EXPORT_SYMBOL_GPL(kvm_set_msr_common);
8629
 
-
8630
 
-/*
8631
 
- * Writes msr value into into the appropriate "register".
8632
 
- * Returns 0 on success, non-0 otherwise.
8633
 
- * Assumes vcpu_load() was already called.
8634
 
- */
8635
 
-int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
8636
 
-{
8637
 
-       return kvm_x86_ops->set_msr(vcpu, msr_index, data);
8638
 
-}
8639
 
-
8640
 
-void kvm_resched(struct kvm_vcpu *vcpu)
8641
 
-{
8642
 
-       if (!need_resched())
8643
 
-               return;
8644
 
-       cond_resched();
8645
 
-}
8646
 
-EXPORT_SYMBOL_GPL(kvm_resched);
8647
 
-
8648
 
-void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
8649
 
+static void free_pio_guest_pages(struct kvm_vcpu *vcpu)
8650
 
 {
8651
 
        int i;
8652
 
-       u32 function;
8653
 
-       struct kvm_cpuid_entry *e, *best;
8654
 
 
8655
 
-       kvm_x86_ops->cache_regs(vcpu);
8656
 
-       function = vcpu->regs[VCPU_REGS_RAX];
8657
 
-       vcpu->regs[VCPU_REGS_RAX] = 0;
8658
 
-       vcpu->regs[VCPU_REGS_RBX] = 0;
8659
 
-       vcpu->regs[VCPU_REGS_RCX] = 0;
8660
 
-       vcpu->regs[VCPU_REGS_RDX] = 0;
8661
 
-       best = NULL;
8662
 
-       for (i = 0; i < vcpu->cpuid_nent; ++i) {
8663
 
-               e = &vcpu->cpuid_entries[i];
8664
 
-               if (e->function == function) {
8665
 
-                       best = e;
8666
 
-                       break;
8667
 
+       for (i = 0; i < ARRAY_SIZE(vcpu->arch.pio.guest_pages); ++i)
8668
 
+               if (vcpu->arch.pio.guest_pages[i]) {
8669
 
+                       kvm_release_page_dirty(vcpu->arch.pio.guest_pages[i]);
8670
 
+                       vcpu->arch.pio.guest_pages[i] = NULL;
8671
 
                }
8672
 
-               /*
8673
 
-                * Both basic or both extended?
8674
 
-                */
8675
 
-               if (((e->function ^ function) & 0x80000000) == 0)
8676
 
-                       if (!best || e->function > best->function)
8677
 
-                               best = e;
8678
 
-       }
8679
 
-       if (best) {
8680
 
-               vcpu->regs[VCPU_REGS_RAX] = best->eax;
8681
 
-               vcpu->regs[VCPU_REGS_RBX] = best->ebx;
8682
 
-               vcpu->regs[VCPU_REGS_RCX] = best->ecx;
8683
 
-               vcpu->regs[VCPU_REGS_RDX] = best->edx;
8684
 
-       }
8685
 
-       kvm_x86_ops->decache_regs(vcpu);
8686
 
-       kvm_x86_ops->skip_emulated_instruction(vcpu);
8687
 
 }
8688
 
-EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
8689
 
 
8690
 
 static int pio_copy_data(struct kvm_vcpu *vcpu)
8691
 
 {
8692
 
-       void *p = vcpu->pio_data;
8693
 
+       void *p = vcpu->arch.pio_data;
8694
 
        void *q;
8695
 
        unsigned bytes;
8696
 
-       int nr_pages = vcpu->pio.guest_pages[1] ? 2 : 1;
8697
 
+       int nr_pages = vcpu->arch.pio.guest_pages[1] ? 2 : 1;
8698
 
 
8699
 
-       q = vmap(vcpu->pio.guest_pages, nr_pages, VM_READ|VM_WRITE,
8700
 
+       q = vmap(vcpu->arch.pio.guest_pages, nr_pages, VM_READ|VM_WRITE,
8701
 
                 PAGE_KERNEL);
8702
 
        if (!q) {
8703
 
                free_pio_guest_pages(vcpu);
8704
 
                return -ENOMEM;
8705
 
        }
8706
 
-       q += vcpu->pio.guest_page_offset;
8707
 
-       bytes = vcpu->pio.size * vcpu->pio.cur_count;
8708
 
-       if (vcpu->pio.in)
8709
 
+       q += vcpu->arch.pio.guest_page_offset;
8710
 
+       bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count;
8711
 
+       if (vcpu->arch.pio.in)
8712
 
                memcpy(q, p, bytes);
8713
 
        else
8714
 
                memcpy(p, q, bytes);
8715
 
-       q -= vcpu->pio.guest_page_offset;
8716
 
+       q -= vcpu->arch.pio.guest_page_offset;
8717
 
        vunmap(q);
8718
 
        free_pio_guest_pages(vcpu);
8719
 
        return 0;
8720
 
 }
8721
 
 
8722
 
-static int complete_pio(struct kvm_vcpu *vcpu)
8723
 
+int complete_pio(struct kvm_vcpu *vcpu)
8724
 
 {
8725
 
-       struct kvm_pio_request *io = &vcpu->pio;
8726
 
+       struct kvm_pio_request *io = &vcpu->arch.pio;
8727
 
        long delta;
8728
 
        int r;
8729
 
 
8730
 
@@ -1760,7 +1910,7 @@ static int complete_pio(struct kvm_vcpu *vcpu)
8731
 
 
8732
 
        if (!io->string) {
8733
 
                if (io->in)
8734
 
-                       memcpy(&vcpu->regs[VCPU_REGS_RAX], vcpu->pio_data,
8735
 
+                       memcpy(&vcpu->arch.regs[VCPU_REGS_RAX], vcpu->arch.pio_data,
8736
 
                               io->size);
8737
 
        } else {
8738
 
                if (io->in) {
8739
 
@@ -1778,15 +1928,15 @@ static int complete_pio(struct kvm_vcpu *vcpu)
8740
 
                         * The size of the register should really depend on
8741
 
                         * current address size.
8742
 
                         */
8743
 
-                       vcpu->regs[VCPU_REGS_RCX] -= delta;
8744
 
+                       vcpu->arch.regs[VCPU_REGS_RCX] -= delta;
8745
 
                }
8746
 
                if (io->down)
8747
 
                        delta = -delta;
8748
 
                delta *= io->size;
8749
 
                if (io->in)
8750
 
-                       vcpu->regs[VCPU_REGS_RDI] += delta;
8751
 
+                       vcpu->arch.regs[VCPU_REGS_RDI] += delta;
8752
 
                else
8753
 
-                       vcpu->regs[VCPU_REGS_RSI] += delta;
8754
 
+                       vcpu->arch.regs[VCPU_REGS_RSI] += delta;
8755
 
        }
8756
 
 
8757
 
        kvm_x86_ops->decache_regs(vcpu);
8758
 
@@ -1804,13 +1954,13 @@ static void kernel_pio(struct kvm_io_device *pio_dev,
8759
 
        /* TODO: String I/O for in kernel device */
8760
 
 
8761
 
        mutex_lock(&vcpu->kvm->lock);
8762
 
-       if (vcpu->pio.in)
8763
 
-               kvm_iodevice_read(pio_dev, vcpu->pio.port,
8764
 
-                                 vcpu->pio.size,
8765
 
+       if (vcpu->arch.pio.in)
8766
 
+               kvm_iodevice_read(pio_dev, vcpu->arch.pio.port,
8767
 
+                                 vcpu->arch.pio.size,
8768
 
                                  pd);
8769
 
        else
8770
 
-               kvm_iodevice_write(pio_dev, vcpu->pio.port,
8771
 
-                                  vcpu->pio.size,
8772
 
+               kvm_iodevice_write(pio_dev, vcpu->arch.pio.port,
8773
 
+                                  vcpu->arch.pio.size,
8774
 
                                   pd);
8775
 
        mutex_unlock(&vcpu->kvm->lock);
8776
 
 }
8777
 
@@ -1818,8 +1968,8 @@ static void kernel_pio(struct kvm_io_device *pio_dev,
8778
 
 static void pio_string_write(struct kvm_io_device *pio_dev,
8779
 
                             struct kvm_vcpu *vcpu)
8780
 
 {
8781
 
-       struct kvm_pio_request *io = &vcpu->pio;
8782
 
-       void *pd = vcpu->pio_data;
8783
 
+       struct kvm_pio_request *io = &vcpu->arch.pio;
8784
 
+       void *pd = vcpu->arch.pio_data;
8785
 
        int i;
8786
 
 
8787
 
        mutex_lock(&vcpu->kvm->lock);
8788
 
@@ -1832,32 +1982,38 @@ static void pio_string_write(struct kvm_io_device *pio_dev,
8789
 
        mutex_unlock(&vcpu->kvm->lock);
8790
 
 }
8791
 
 
8792
 
-int kvm_emulate_pio (struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
8793
 
+static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
8794
 
+                                              gpa_t addr)
8795
 
+{
8796
 
+       return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr);
8797
 
+}
8798
 
+
8799
 
+int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
8800
 
                  int size, unsigned port)
8801
 
 {
8802
 
        struct kvm_io_device *pio_dev;
8803
 
 
8804
 
        vcpu->run->exit_reason = KVM_EXIT_IO;
8805
 
        vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
8806
 
-       vcpu->run->io.size = vcpu->pio.size = size;
8807
 
+       vcpu->run->io.size = vcpu->arch.pio.size = size;
8808
 
        vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
8809
 
-       vcpu->run->io.count = vcpu->pio.count = vcpu->pio.cur_count = 1;
8810
 
-       vcpu->run->io.port = vcpu->pio.port = port;
8811
 
-       vcpu->pio.in = in;
8812
 
-       vcpu->pio.string = 0;
8813
 
-       vcpu->pio.down = 0;
8814
 
-       vcpu->pio.guest_page_offset = 0;
8815
 
-       vcpu->pio.rep = 0;
8816
 
+       vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = 1;
8817
 
+       vcpu->run->io.port = vcpu->arch.pio.port = port;
8818
 
+       vcpu->arch.pio.in = in;
8819
 
+       vcpu->arch.pio.string = 0;
8820
 
+       vcpu->arch.pio.down = 0;
8821
 
+       vcpu->arch.pio.guest_page_offset = 0;
8822
 
+       vcpu->arch.pio.rep = 0;
8823
 
 
8824
 
        kvm_x86_ops->cache_regs(vcpu);
8825
 
-       memcpy(vcpu->pio_data, &vcpu->regs[VCPU_REGS_RAX], 4);
8826
 
+       memcpy(vcpu->arch.pio_data, &vcpu->arch.regs[VCPU_REGS_RAX], 4);
8827
 
        kvm_x86_ops->decache_regs(vcpu);
8828
 
 
8829
 
        kvm_x86_ops->skip_emulated_instruction(vcpu);
8830
 
 
8831
 
        pio_dev = vcpu_find_pio_dev(vcpu, port);
8832
 
        if (pio_dev) {
8833
 
-               kernel_pio(pio_dev, vcpu, vcpu->pio_data);
8834
 
+               kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
8835
 
                complete_pio(vcpu);
8836
 
                return 1;
8837
 
        }
8838
 
@@ -1877,15 +2033,15 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
8839
 
 
8840
 
        vcpu->run->exit_reason = KVM_EXIT_IO;
8841
 
        vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
8842
 
-       vcpu->run->io.size = vcpu->pio.size = size;
8843
 
+       vcpu->run->io.size = vcpu->arch.pio.size = size;
8844
 
        vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
8845
 
-       vcpu->run->io.count = vcpu->pio.count = vcpu->pio.cur_count = count;
8846
 
-       vcpu->run->io.port = vcpu->pio.port = port;
8847
 
-       vcpu->pio.in = in;
8848
 
-       vcpu->pio.string = 1;
8849
 
-       vcpu->pio.down = down;
8850
 
-       vcpu->pio.guest_page_offset = offset_in_page(address);
8851
 
-       vcpu->pio.rep = rep;
8852
 
+       vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = count;
8853
 
+       vcpu->run->io.port = vcpu->arch.pio.port = port;
8854
 
+       vcpu->arch.pio.in = in;
8855
 
+       vcpu->arch.pio.string = 1;
8856
 
+       vcpu->arch.pio.down = down;
8857
 
+       vcpu->arch.pio.guest_page_offset = offset_in_page(address);
8858
 
+       vcpu->arch.pio.rep = rep;
8859
 
 
8860
 
        if (!count) {
8861
 
                kvm_x86_ops->skip_emulated_instruction(vcpu);
8862
 
@@ -1911,37 +2067,35 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
8863
 
                 * String I/O in reverse.  Yuck.  Kill the guest, fix later.
8864
 
                 */
8865
 
                pr_unimpl(vcpu, "guest string pio down\n");
8866
 
-               inject_gp(vcpu);
8867
 
+               kvm_inject_gp(vcpu, 0);
8868
 
                return 1;
8869
 
        }
8870
 
        vcpu->run->io.count = now;
8871
 
-       vcpu->pio.cur_count = now;
8872
 
+       vcpu->arch.pio.cur_count = now;
8873
 
 
8874
 
-       if (vcpu->pio.cur_count == vcpu->pio.count)
8875
 
+       if (vcpu->arch.pio.cur_count == vcpu->arch.pio.count)
8876
 
                kvm_x86_ops->skip_emulated_instruction(vcpu);
8877
 
 
8878
 
        for (i = 0; i < nr_pages; ++i) {
8879
 
                mutex_lock(&vcpu->kvm->lock);
8880
 
                page = gva_to_page(vcpu, address + i * PAGE_SIZE);
8881
 
-               if (page)
8882
 
-                       get_page(page);
8883
 
-               vcpu->pio.guest_pages[i] = page;
8884
 
+               vcpu->arch.pio.guest_pages[i] = page;
8885
 
                mutex_unlock(&vcpu->kvm->lock);
8886
 
                if (!page) {
8887
 
-                       inject_gp(vcpu);
8888
 
+                       kvm_inject_gp(vcpu, 0);
8889
 
                        free_pio_guest_pages(vcpu);
8890
 
                        return 1;
8891
 
                }
8892
 
        }
8893
 
 
8894
 
        pio_dev = vcpu_find_pio_dev(vcpu, port);
8895
 
-       if (!vcpu->pio.in) {
8896
 
+       if (!vcpu->arch.pio.in) {
8897
 
                /* string PIO write */
8898
 
                ret = pio_copy_data(vcpu);
8899
 
                if (ret >= 0 && pio_dev) {
8900
 
                        pio_string_write(pio_dev, vcpu);
8901
 
                        complete_pio(vcpu);
8902
 
-                       if (vcpu->pio.count == 0)
8903
 
+                       if (vcpu->arch.pio.count == 0)
8904
 
                                ret = 1;
8905
 
                }
8906
 
        } else if (pio_dev)
8907
 
@@ -1953,6 +2107,265 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
8908
 
 }
8909
 
 EXPORT_SYMBOL_GPL(kvm_emulate_pio_string);
8910
 
 
8911
 
+int kvm_arch_init(void *opaque)
8912
 
+{
8913
 
+       int r;
8914
 
+       struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
8915
 
+
8916
 
+       r = kvm_mmu_module_init();
8917
 
+       if (r)
8918
 
+               goto out_fail;
8919
 
+
8920
 
+       kvm_init_msr_list();
8921
 
+
8922
 
+       if (kvm_x86_ops) {
8923
 
+               printk(KERN_ERR "kvm: already loaded the other module\n");
8924
 
+               r = -EEXIST;
8925
 
+               goto out;
8926
 
+       }
8927
 
+
8928
 
+       if (!ops->cpu_has_kvm_support()) {
8929
 
+               printk(KERN_ERR "kvm: no hardware support\n");
8930
 
+               r = -EOPNOTSUPP;
8931
 
+               goto out;
8932
 
+       }
8933
 
+       if (ops->disabled_by_bios()) {
8934
 
+               printk(KERN_ERR "kvm: disabled by bios\n");
8935
 
+               r = -EOPNOTSUPP;
8936
 
+               goto out;
8937
 
+       }
8938
 
+
8939
 
+       kvm_x86_ops = ops;
8940
 
+       kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
8941
 
+       return 0;
8942
 
+
8943
 
+out:
8944
 
+       kvm_mmu_module_exit();
8945
 
+out_fail:
8946
 
+       return r;
8947
 
+}
8948
 
+
8949
 
+void kvm_arch_exit(void)
8950
 
+{
8951
 
+       kvm_x86_ops = NULL;
8952
 
+       kvm_mmu_module_exit();
8953
 
+}
8954
 
+
8955
 
+int kvm_emulate_halt(struct kvm_vcpu *vcpu)
8956
 
+{
8957
 
+       ++vcpu->stat.halt_exits;
8958
 
+       if (irqchip_in_kernel(vcpu->kvm)) {
8959
 
+               vcpu->arch.mp_state = VCPU_MP_STATE_HALTED;
8960
 
+               kvm_vcpu_block(vcpu);
8961
 
+               if (vcpu->arch.mp_state != VCPU_MP_STATE_RUNNABLE)
8962
 
+                       return -EINTR;
8963
 
+               return 1;
8964
 
+       } else {
8965
 
+               vcpu->run->exit_reason = KVM_EXIT_HLT;
8966
 
+               return 0;
8967
 
+       }
8968
 
+}
8969
 
+EXPORT_SYMBOL_GPL(kvm_emulate_halt);
8970
 
+
8971
 
+int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
8972
 
+{
8973
 
+       unsigned long nr, a0, a1, a2, a3, ret;
8974
 
+
8975
 
+       kvm_x86_ops->cache_regs(vcpu);
8976
 
+
8977
 
+       nr = vcpu->arch.regs[VCPU_REGS_RAX];
8978
 
+       a0 = vcpu->arch.regs[VCPU_REGS_RBX];
8979
 
+       a1 = vcpu->arch.regs[VCPU_REGS_RCX];
8980
 
+       a2 = vcpu->arch.regs[VCPU_REGS_RDX];
8981
 
+       a3 = vcpu->arch.regs[VCPU_REGS_RSI];
8982
 
+
8983
 
+       if (!is_long_mode(vcpu)) {
8984
 
+               nr &= 0xFFFFFFFF;
8985
 
+               a0 &= 0xFFFFFFFF;
8986
 
+               a1 &= 0xFFFFFFFF;
8987
 
+               a2 &= 0xFFFFFFFF;
8988
 
+               a3 &= 0xFFFFFFFF;
8989
 
+       }
8990
 
+
8991
 
+       switch (nr) {
8992
 
+       default:
8993
 
+               ret = -KVM_ENOSYS;
8994
 
+               break;
8995
 
+       }
8996
 
+       vcpu->arch.regs[VCPU_REGS_RAX] = ret;
8997
 
+       kvm_x86_ops->decache_regs(vcpu);
8998
 
+       return 0;
8999
 
+}
9000
 
+EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
9001
 
+
9002
 
+int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
9003
 
+{
9004
 
+       char instruction[3];
9005
 
+       int ret = 0;
9006
 
+
9007
 
+       mutex_lock(&vcpu->kvm->lock);
9008
 
+
9009
 
+       /*
9010
 
+        * Blow out the MMU to ensure that no other VCPU has an active mapping
9011
 
+        * to ensure that the updated hypercall appears atomically across all
9012
 
+        * VCPUs.
9013
 
+        */
9014
 
+       kvm_mmu_zap_all(vcpu->kvm);
9015
 
+
9016
 
+       kvm_x86_ops->cache_regs(vcpu);
9017
 
+       kvm_x86_ops->patch_hypercall(vcpu, instruction);
9018
 
+       if (emulator_write_emulated(vcpu->arch.rip, instruction, 3, vcpu)
9019
 
+           != X86EMUL_CONTINUE)
9020
 
+               ret = -EFAULT;
9021
 
+
9022
 
+       mutex_unlock(&vcpu->kvm->lock);
9023
 
+
9024
 
+       return ret;
9025
 
+}
9026
 
+
9027
 
+static u64 mk_cr_64(u64 curr_cr, u32 new_val)
9028
 
+{
9029
 
+       return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
9030
 
+}
9031
 
+
9032
 
+void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
9033
 
+{
9034
 
+       struct descriptor_table dt = { limit, base };
9035
 
+
9036
 
+       kvm_x86_ops->set_gdt(vcpu, &dt);
9037
 
+}
9038
 
+
9039
 
+void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
9040
 
+{
9041
 
+       struct descriptor_table dt = { limit, base };
9042
 
+
9043
 
+       kvm_x86_ops->set_idt(vcpu, &dt);
9044
 
+}
9045
 
+
9046
 
+void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
9047
 
+                  unsigned long *rflags)
9048
 
+{
9049
 
+       lmsw(vcpu, msw);
9050
 
+       *rflags = kvm_x86_ops->get_rflags(vcpu);
9051
 
+}
9052
 
+
9053
 
+unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
9054
 
+{
9055
 
+       kvm_x86_ops->decache_cr4_guest_bits(vcpu);
9056
 
+       switch (cr) {
9057
 
+       case 0:
9058
 
+               return vcpu->arch.cr0;
9059
 
+       case 2:
9060
 
+               return vcpu->arch.cr2;
9061
 
+       case 3:
9062
 
+               return vcpu->arch.cr3;
9063
 
+       case 4:
9064
 
+               return vcpu->arch.cr4;
9065
 
+       case 8:
9066
 
+               return get_cr8(vcpu);
9067
 
+       default:
9068
 
+               vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr);
9069
 
+               return 0;
9070
 
+       }
9071
 
+}
9072
 
+
9073
 
+void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
9074
 
+                    unsigned long *rflags)
9075
 
+{
9076
 
+       switch (cr) {
9077
 
+       case 0:
9078
 
+               set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val));
9079
 
+               *rflags = kvm_x86_ops->get_rflags(vcpu);
9080
 
+               break;
9081
 
+       case 2:
9082
 
+               vcpu->arch.cr2 = val;
9083
 
+               break;
9084
 
+       case 3:
9085
 
+               set_cr3(vcpu, val);
9086
 
+               break;
9087
 
+       case 4:
9088
 
+               set_cr4(vcpu, mk_cr_64(vcpu->arch.cr4, val));
9089
 
+               break;
9090
 
+       case 8:
9091
 
+               set_cr8(vcpu, val & 0xfUL);
9092
 
+               break;
9093
 
+       default:
9094
 
+               vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr);
9095
 
+       }
9096
 
+}
9097
 
+
9098
 
+static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
9099
 
+{
9100
 
+       struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i];
9101
 
+       int j, nent = vcpu->arch.cpuid_nent;
9102
 
+
9103
 
+       e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
9104
 
+       /* when no next entry is found, the current entry[i] is reselected */
9105
 
+       for (j = i + 1; j == i; j = (j + 1) % nent) {
9106
 
+               struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j];
9107
 
+               if (ej->function == e->function) {
9108
 
+                       ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
9109
 
+                       return j;
9110
 
+               }
9111
 
+       }
9112
 
+       return 0; /* silence gcc, even though control never reaches here */
9113
 
+}
9114
 
+
9115
 
+/* find an entry with matching function, matching index (if needed), and that
9116
 
+ * should be read next (if it's stateful) */
9117
 
+static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e,
9118
 
+       u32 function, u32 index)
9119
 
+{
9120
 
+       if (e->function != function)
9121
 
+               return 0;
9122
 
+       if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index)
9123
 
+               return 0;
9124
 
+       if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) &&
9125
 
+               !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT))
9126
 
+               return 0;
9127
 
+       return 1;
9128
 
+}
9129
 
+
9130
 
+void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
9131
 
+{
9132
 
+       int i;
9133
 
+       u32 function, index;
9134
 
+       struct kvm_cpuid_entry2 *e, *best;
9135
 
+
9136
 
+       kvm_x86_ops->cache_regs(vcpu);
9137
 
+       function = vcpu->arch.regs[VCPU_REGS_RAX];
9138
 
+       index = vcpu->arch.regs[VCPU_REGS_RCX];
9139
 
+       vcpu->arch.regs[VCPU_REGS_RAX] = 0;
9140
 
+       vcpu->arch.regs[VCPU_REGS_RBX] = 0;
9141
 
+       vcpu->arch.regs[VCPU_REGS_RCX] = 0;
9142
 
+       vcpu->arch.regs[VCPU_REGS_RDX] = 0;
9143
 
+       best = NULL;
9144
 
+       for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
9145
 
+               e = &vcpu->arch.cpuid_entries[i];
9146
 
+               if (is_matching_cpuid_entry(e, function, index)) {
9147
 
+                       if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC)
9148
 
+                               move_to_next_stateful_cpuid_entry(vcpu, i);
9149
 
+                       best = e;
9150
 
+                       break;
9151
 
+               }
9152
 
+               /*
9153
 
+                * Both basic or both extended?
9154
 
+                */
9155
 
+               if (((e->function ^ function) & 0x80000000) == 0)
9156
 
+                       if (!best || e->function > best->function)
9157
 
+                               best = e;
9158
 
+       }
9159
 
+       if (best) {
9160
 
+               vcpu->arch.regs[VCPU_REGS_RAX] = best->eax;
9161
 
+               vcpu->arch.regs[VCPU_REGS_RBX] = best->ebx;
9162
 
+               vcpu->arch.regs[VCPU_REGS_RCX] = best->ecx;
9163
 
+               vcpu->arch.regs[VCPU_REGS_RDX] = best->edx;
9164
 
+       }
9165
 
+       kvm_x86_ops->decache_regs(vcpu);
9166
 
+       kvm_x86_ops->skip_emulated_instruction(vcpu);
9167
 
+}
9168
 
+EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
9169
 
+
9170
 
 /*
9171
 
  * Check if userspace requested an interrupt window, and that the
9172
 
  * interrupt window is open.
9173
 
@@ -1962,9 +2375,9 @@ EXPORT_SYMBOL_GPL(kvm_emulate_pio_string);
9174
 
 static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
9175
 
                                          struct kvm_run *kvm_run)
9176
 
 {
9177
 
-       return (!vcpu->irq_summary &&
9178
 
+       return (!vcpu->arch.irq_summary &&
9179
 
                kvm_run->request_interrupt_window &&
9180
 
-               vcpu->interrupt_window_open &&
9181
 
+               vcpu->arch.interrupt_window_open &&
9182
 
                (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF));
9183
 
 }
9184
 
 
9185
 
@@ -1978,20 +2391,22 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu,
9186
 
                kvm_run->ready_for_interrupt_injection = 1;
9187
 
        else
9188
 
                kvm_run->ready_for_interrupt_injection =
9189
 
-                                       (vcpu->interrupt_window_open &&
9190
 
-                                        vcpu->irq_summary == 0);
9191
 
+                                       (vcpu->arch.interrupt_window_open &&
9192
 
+                                        vcpu->arch.irq_summary == 0);
9193
 
 }
9194
 
 
9195
 
 static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
9196
 
 {
9197
 
        int r;
9198
 
 
9199
 
-       if (unlikely(vcpu->mp_state == VCPU_MP_STATE_SIPI_RECEIVED)) {
9200
 
-               printk("vcpu %d received sipi with vector # %x\n",
9201
 
-                      vcpu->vcpu_id, vcpu->sipi_vector);
9202
 
+       if (unlikely(vcpu->arch.mp_state == VCPU_MP_STATE_SIPI_RECEIVED)) {
9203
 
+               pr_debug("vcpu %d received sipi with vector # %x\n",
9204
 
+                      vcpu->vcpu_id, vcpu->arch.sipi_vector);
9205
 
                kvm_lapic_reset(vcpu);
9206
 
-               kvm_x86_ops->vcpu_reset(vcpu);
9207
 
-               vcpu->mp_state = VCPU_MP_STATE_RUNNABLE;
9208
 
+               r = kvm_x86_ops->vcpu_reset(vcpu);
9209
 
+               if (r)
9210
 
+                       return r;
9211
 
+               vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE;
9212
 
        }
9213
 
 
9214
 
 preempted:
9215
 
@@ -2003,6 +2418,8 @@ again:
9216
 
        if (unlikely(r))
9217
 
                goto out;
9218
 
 
9219
 
+       kvm_inject_pending_timer_irqs(vcpu);
9220
 
+
9221
 
        preempt_disable();
9222
 
 
9223
 
        kvm_x86_ops->prepare_guest_switch(vcpu);
9224
 
@@ -2019,16 +2436,18 @@ again:
9225
 
                goto out;
9226
 
        }
9227
 
 
9228
 
-       if (irqchip_in_kernel(vcpu->kvm))
9229
 
+       if (vcpu->arch.exception.pending)
9230
 
+               __queue_exception(vcpu);
9231
 
+       else if (irqchip_in_kernel(vcpu->kvm))
9232
 
                kvm_x86_ops->inject_pending_irq(vcpu);
9233
 
-       else if (!vcpu->mmio_read_completed)
9234
 
+       else
9235
 
                kvm_x86_ops->inject_pending_vectors(vcpu, kvm_run);
9236
 
 
9237
 
        vcpu->guest_mode = 1;
9238
 
        kvm_guest_enter();
9239
 
 
9240
 
        if (vcpu->requests)
9241
 
-               if (test_and_clear_bit(KVM_TLB_FLUSH, &vcpu->requests))
9242
 
+               if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
9243
 
                        kvm_x86_ops->tlb_flush(vcpu);
9244
 
 
9245
 
        kvm_x86_ops->run(vcpu, kvm_run);
9246
 
@@ -2055,9 +2474,12 @@ again:
9247
 
         */
9248
 
        if (unlikely(prof_on == KVM_PROFILING)) {
9249
 
                kvm_x86_ops->cache_regs(vcpu);
9250
 
-               profile_hit(KVM_PROFILING, (void *)vcpu->rip);
9251
 
+               profile_hit(KVM_PROFILING, (void *)vcpu->arch.rip);
9252
 
        }
9253
 
 
9254
 
+       if (vcpu->arch.exception.pending && kvm_x86_ops->exception_injected(vcpu))
9255
 
+               vcpu->arch.exception.pending = false;
9256
 
+
9257
 
        r = kvm_x86_ops->handle_exit(kvm_run, vcpu);
9258
 
 
9259
 
        if (r > 0) {
9260
 
@@ -2067,10 +2489,8 @@ again:
9261
 
                        ++vcpu->stat.request_irq_exits;
9262
 
                        goto out;
9263
 
                }
9264
 
-               if (!need_resched()) {
9265
 
-                       ++vcpu->stat.light_exits;
9266
 
+               if (!need_resched())
9267
 
                        goto again;
9268
 
-               }
9269
 
        }
9270
 
 
9271
 
 out:
9272
 
@@ -2084,15 +2504,14 @@ out:
9273
 
        return r;
9274
 
 }
9275
 
 
9276
 
-
9277
 
-static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
9278
 
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
9279
 
 {
9280
 
        int r;
9281
 
        sigset_t sigsaved;
9282
 
 
9283
 
        vcpu_load(vcpu);
9284
 
 
9285
 
-       if (unlikely(vcpu->mp_state == VCPU_MP_STATE_UNINITIALIZED)) {
9286
 
+       if (unlikely(vcpu->arch.mp_state == VCPU_MP_STATE_UNINITIALIZED)) {
9287
 
                kvm_vcpu_block(vcpu);
9288
 
                vcpu_put(vcpu);
9289
 
                return -EAGAIN;
9290
 
@@ -2105,18 +2524,18 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
9291
 
        if (!irqchip_in_kernel(vcpu->kvm))
9292
 
                set_cr8(vcpu, kvm_run->cr8);
9293
 
 
9294
 
-       if (vcpu->pio.cur_count) {
9295
 
+       if (vcpu->arch.pio.cur_count) {
9296
 
                r = complete_pio(vcpu);
9297
 
                if (r)
9298
 
                        goto out;
9299
 
        }
9300
 
-
9301
 
+#if CONFIG_HAS_IOMEM
9302
 
        if (vcpu->mmio_needed) {
9303
 
                memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
9304
 
                vcpu->mmio_read_completed = 1;
9305
 
                vcpu->mmio_needed = 0;
9306
 
                r = emulate_instruction(vcpu, kvm_run,
9307
 
-                                       vcpu->mmio_fault_cr2, 0);
9308
 
+                                       vcpu->arch.mmio_fault_cr2, 0, 1);
9309
 
                if (r == EMULATE_DO_MMIO) {
9310
 
                        /*
9311
 
                         * Read-modify-write.  Back to userspace.
9312
 
@@ -2125,10 +2544,10 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
9313
 
                        goto out;
9314
 
                }
9315
 
        }
9316
 
-
9317
 
+#endif
9318
 
        if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) {
9319
 
                kvm_x86_ops->cache_regs(vcpu);
9320
 
-               vcpu->regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret;
9321
 
+               vcpu->arch.regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret;
9322
 
                kvm_x86_ops->decache_regs(vcpu);
9323
 
        }
9324
 
 
9325
 
@@ -2142,33 +2561,32 @@ out:
9326
 
        return r;
9327
 
 }
9328
 
 
9329
 
-static int kvm_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu,
9330
 
-                                  struct kvm_regs *regs)
9331
 
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
9332
 
 {
9333
 
        vcpu_load(vcpu);
9334
 
 
9335
 
        kvm_x86_ops->cache_regs(vcpu);
9336
 
 
9337
 
-       regs->rax = vcpu->regs[VCPU_REGS_RAX];
9338
 
-       regs->rbx = vcpu->regs[VCPU_REGS_RBX];
9339
 
-       regs->rcx = vcpu->regs[VCPU_REGS_RCX];
9340
 
-       regs->rdx = vcpu->regs[VCPU_REGS_RDX];
9341
 
-       regs->rsi = vcpu->regs[VCPU_REGS_RSI];
9342
 
-       regs->rdi = vcpu->regs[VCPU_REGS_RDI];
9343
 
-       regs->rsp = vcpu->regs[VCPU_REGS_RSP];
9344
 
-       regs->rbp = vcpu->regs[VCPU_REGS_RBP];
9345
 
+       regs->rax = vcpu->arch.regs[VCPU_REGS_RAX];
9346
 
+       regs->rbx = vcpu->arch.regs[VCPU_REGS_RBX];
9347
 
+       regs->rcx = vcpu->arch.regs[VCPU_REGS_RCX];
9348
 
+       regs->rdx = vcpu->arch.regs[VCPU_REGS_RDX];
9349
 
+       regs->rsi = vcpu->arch.regs[VCPU_REGS_RSI];
9350
 
+       regs->rdi = vcpu->arch.regs[VCPU_REGS_RDI];
9351
 
+       regs->rsp = vcpu->arch.regs[VCPU_REGS_RSP];
9352
 
+       regs->rbp = vcpu->arch.regs[VCPU_REGS_RBP];
9353
 
 #ifdef CONFIG_X86_64
9354
 
-       regs->r8 = vcpu->regs[VCPU_REGS_R8];
9355
 
-       regs->r9 = vcpu->regs[VCPU_REGS_R9];
9356
 
-       regs->r10 = vcpu->regs[VCPU_REGS_R10];
9357
 
-       regs->r11 = vcpu->regs[VCPU_REGS_R11];
9358
 
-       regs->r12 = vcpu->regs[VCPU_REGS_R12];
9359
 
-       regs->r13 = vcpu->regs[VCPU_REGS_R13];
9360
 
-       regs->r14 = vcpu->regs[VCPU_REGS_R14];
9361
 
-       regs->r15 = vcpu->regs[VCPU_REGS_R15];
9362
 
+       regs->r8 = vcpu->arch.regs[VCPU_REGS_R8];
9363
 
+       regs->r9 = vcpu->arch.regs[VCPU_REGS_R9];
9364
 
+       regs->r10 = vcpu->arch.regs[VCPU_REGS_R10];
9365
 
+       regs->r11 = vcpu->arch.regs[VCPU_REGS_R11];
9366
 
+       regs->r12 = vcpu->arch.regs[VCPU_REGS_R12];
9367
 
+       regs->r13 = vcpu->arch.regs[VCPU_REGS_R13];
9368
 
+       regs->r14 = vcpu->arch.regs[VCPU_REGS_R14];
9369
 
+       regs->r15 = vcpu->arch.regs[VCPU_REGS_R15];
9370
 
 #endif
9371
 
 
9372
 
-       regs->rip = vcpu->rip;
9373
 
+       regs->rip = vcpu->arch.rip;
9374
 
        regs->rflags = kvm_x86_ops->get_rflags(vcpu);
9375
 
 
9376
 
        /*
9377
 
@@ -2182,31 +2600,30 @@ static int kvm_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu,
9378
 
        return 0;
9379
 
 }
9380
 
 
9381
 
-static int kvm_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu,
9382
 
-                                  struct kvm_regs *regs)
9383
 
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
9384
 
 {
9385
 
        vcpu_load(vcpu);
9386
 
 
9387
 
-       vcpu->regs[VCPU_REGS_RAX] = regs->rax;
9388
 
-       vcpu->regs[VCPU_REGS_RBX] = regs->rbx;
9389
 
-       vcpu->regs[VCPU_REGS_RCX] = regs->rcx;
9390
 
-       vcpu->regs[VCPU_REGS_RDX] = regs->rdx;
9391
 
-       vcpu->regs[VCPU_REGS_RSI] = regs->rsi;
9392
 
-       vcpu->regs[VCPU_REGS_RDI] = regs->rdi;
9393
 
-       vcpu->regs[VCPU_REGS_RSP] = regs->rsp;
9394
 
-       vcpu->regs[VCPU_REGS_RBP] = regs->rbp;
9395
 
+       vcpu->arch.regs[VCPU_REGS_RAX] = regs->rax;
9396
 
+       vcpu->arch.regs[VCPU_REGS_RBX] = regs->rbx;
9397
 
+       vcpu->arch.regs[VCPU_REGS_RCX] = regs->rcx;
9398
 
+       vcpu->arch.regs[VCPU_REGS_RDX] = regs->rdx;
9399
 
+       vcpu->arch.regs[VCPU_REGS_RSI] = regs->rsi;
9400
 
+       vcpu->arch.regs[VCPU_REGS_RDI] = regs->rdi;
9401
 
+       vcpu->arch.regs[VCPU_REGS_RSP] = regs->rsp;
9402
 
+       vcpu->arch.regs[VCPU_REGS_RBP] = regs->rbp;
9403
 
 #ifdef CONFIG_X86_64
9404
 
-       vcpu->regs[VCPU_REGS_R8] = regs->r8;
9405
 
-       vcpu->regs[VCPU_REGS_R9] = regs->r9;
9406
 
-       vcpu->regs[VCPU_REGS_R10] = regs->r10;
9407
 
-       vcpu->regs[VCPU_REGS_R11] = regs->r11;
9408
 
-       vcpu->regs[VCPU_REGS_R12] = regs->r12;
9409
 
-       vcpu->regs[VCPU_REGS_R13] = regs->r13;
9410
 
-       vcpu->regs[VCPU_REGS_R14] = regs->r14;
9411
 
-       vcpu->regs[VCPU_REGS_R15] = regs->r15;
9412
 
+       vcpu->arch.regs[VCPU_REGS_R8] = regs->r8;
9413
 
+       vcpu->arch.regs[VCPU_REGS_R9] = regs->r9;
9414
 
+       vcpu->arch.regs[VCPU_REGS_R10] = regs->r10;
9415
 
+       vcpu->arch.regs[VCPU_REGS_R11] = regs->r11;
9416
 
+       vcpu->arch.regs[VCPU_REGS_R12] = regs->r12;
9417
 
+       vcpu->arch.regs[VCPU_REGS_R13] = regs->r13;
9418
 
+       vcpu->arch.regs[VCPU_REGS_R14] = regs->r14;
9419
 
+       vcpu->arch.regs[VCPU_REGS_R15] = regs->r15;
9420
 
 #endif
9421
 
 
9422
 
-       vcpu->rip = regs->rip;
9423
 
+       vcpu->arch.rip = regs->rip;
9424
 
        kvm_x86_ops->set_rflags(vcpu, regs->rflags);
9425
 
 
9426
 
        kvm_x86_ops->decache_regs(vcpu);
9427
 
@@ -2222,8 +2639,18 @@ static void get_segment(struct kvm_vcpu *vcpu,
9428
 
        return kvm_x86_ops->get_segment(vcpu, var, seg);
9429
 
 }
9430
 
 
9431
 
-static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
9432
 
-                                   struct kvm_sregs *sregs)
9433
 
+void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
9434
 
+{
9435
 
+       struct kvm_segment cs;
9436
 
+
9437
 
+       get_segment(vcpu, &cs, VCPU_SREG_CS);
9438
 
+       *db = cs.db;
9439
 
+       *l = cs.l;
9440
 
+}
9441
 
+EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
9442
 
+
9443
 
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
9444
 
+                                 struct kvm_sregs *sregs)
9445
 
 {
9446
 
        struct descriptor_table dt;
9447
 
        int pending_vec;
9448
 
@@ -2248,12 +2675,12 @@ static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
9449
 
        sregs->gdt.base = dt.base;
9450
 
 
9451
 
        kvm_x86_ops->decache_cr4_guest_bits(vcpu);
9452
 
-       sregs->cr0 = vcpu->cr0;
9453
 
-       sregs->cr2 = vcpu->cr2;
9454
 
-       sregs->cr3 = vcpu->cr3;
9455
 
-       sregs->cr4 = vcpu->cr4;
9456
 
+       sregs->cr0 = vcpu->arch.cr0;
9457
 
+       sregs->cr2 = vcpu->arch.cr2;
9458
 
+       sregs->cr3 = vcpu->arch.cr3;
9459
 
+       sregs->cr4 = vcpu->arch.cr4;
9460
 
        sregs->cr8 = get_cr8(vcpu);
9461
 
-       sregs->efer = vcpu->shadow_efer;
9462
 
+       sregs->efer = vcpu->arch.shadow_efer;
9463
 
        sregs->apic_base = kvm_get_apic_base(vcpu);
9464
 
 
9465
 
        if (irqchip_in_kernel(vcpu->kvm)) {
9466
 
@@ -2261,9 +2688,10 @@ static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
9467
 
                       sizeof sregs->interrupt_bitmap);
9468
 
                pending_vec = kvm_x86_ops->get_irq(vcpu);
9469
 
                if (pending_vec >= 0)
9470
 
-                       set_bit(pending_vec, (unsigned long *)sregs->interrupt_bitmap);
9471
 
+                       set_bit(pending_vec,
9472
 
+                               (unsigned long *)sregs->interrupt_bitmap);
9473
 
        } else
9474
 
-               memcpy(sregs->interrupt_bitmap, vcpu->irq_pending,
9475
 
+               memcpy(sregs->interrupt_bitmap, vcpu->arch.irq_pending,
9476
 
                       sizeof sregs->interrupt_bitmap);
9477
 
 
9478
 
        vcpu_put(vcpu);
9479
 
@@ -2277,8 +2705,8 @@ static void set_segment(struct kvm_vcpu *vcpu,
9480
 
        return kvm_x86_ops->set_segment(vcpu, var, seg);
9481
 
 }
9482
 
 
9483
 
-static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
9484
 
-                                   struct kvm_sregs *sregs)
9485
 
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
9486
 
+                                 struct kvm_sregs *sregs)
9487
 
 {
9488
 
        int mmu_reset_needed = 0;
9489
 
        int i, pending_vec, max_bits;
9490
 
@@ -2293,13 +2721,13 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
9491
 
        dt.base = sregs->gdt.base;
9492
 
        kvm_x86_ops->set_gdt(vcpu, &dt);
9493
 
 
9494
 
-       vcpu->cr2 = sregs->cr2;
9495
 
-       mmu_reset_needed |= vcpu->cr3 != sregs->cr3;
9496
 
-       vcpu->cr3 = sregs->cr3;
9497
 
+       vcpu->arch.cr2 = sregs->cr2;
9498
 
+       mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3;
9499
 
+       vcpu->arch.cr3 = sregs->cr3;
9500
 
 
9501
 
        set_cr8(vcpu, sregs->cr8);
9502
 
 
9503
 
-       mmu_reset_needed |= vcpu->shadow_efer != sregs->efer;
9504
 
+       mmu_reset_needed |= vcpu->arch.shadow_efer != sregs->efer;
9505
 
 #ifdef CONFIG_X86_64
9506
 
        kvm_x86_ops->set_efer(vcpu, sregs->efer);
9507
 
 #endif
9508
 
@@ -2307,25 +2735,25 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
9509
 
 
9510
 
        kvm_x86_ops->decache_cr4_guest_bits(vcpu);
9511
 
 
9512
 
-       mmu_reset_needed |= vcpu->cr0 != sregs->cr0;
9513
 
-       vcpu->cr0 = sregs->cr0;
9514
 
+       mmu_reset_needed |= vcpu->arch.cr0 != sregs->cr0;
9515
 
+       vcpu->arch.cr0 = sregs->cr0;
9516
 
        kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
9517
 
 
9518
 
-       mmu_reset_needed |= vcpu->cr4 != sregs->cr4;
9519
 
+       mmu_reset_needed |= vcpu->arch.cr4 != sregs->cr4;
9520
 
        kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
9521
 
        if (!is_long_mode(vcpu) && is_pae(vcpu))
9522
 
-               load_pdptrs(vcpu, vcpu->cr3);
9523
 
+               load_pdptrs(vcpu, vcpu->arch.cr3);
9524
 
 
9525
 
        if (mmu_reset_needed)
9526
 
                kvm_mmu_reset_context(vcpu);
9527
 
 
9528
 
        if (!irqchip_in_kernel(vcpu->kvm)) {
9529
 
-               memcpy(vcpu->irq_pending, sregs->interrupt_bitmap,
9530
 
-                      sizeof vcpu->irq_pending);
9531
 
-               vcpu->irq_summary = 0;
9532
 
-               for (i = 0; i < ARRAY_SIZE(vcpu->irq_pending); ++i)
9533
 
-                       if (vcpu->irq_pending[i])
9534
 
-                               __set_bit(i, &vcpu->irq_summary);
9535
 
+               memcpy(vcpu->arch.irq_pending, sregs->interrupt_bitmap,
9536
 
+                      sizeof vcpu->arch.irq_pending);
9537
 
+               vcpu->arch.irq_summary = 0;
9538
 
+               for (i = 0; i < ARRAY_SIZE(vcpu->arch.irq_pending); ++i)
9539
 
+                       if (vcpu->arch.irq_pending[i])
9540
 
+                               __set_bit(i, &vcpu->arch.irq_summary);
9541
 
        } else {
9542
 
                max_bits = (sizeof sregs->interrupt_bitmap) << 3;
9543
 
                pending_vec = find_first_bit(
9544
 
@@ -2334,7 +2762,8 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
9545
 
                /* Only pending external irq is handled here */
9546
 
                if (pending_vec < max_bits) {
9547
 
                        kvm_x86_ops->set_irq(vcpu, pending_vec);
9548
 
-                       printk("Set back pending irq %d\n", pending_vec);
9549
 
+                       pr_debug("Set back pending irq %d\n",
9550
 
+                                pending_vec);
9551
 
                }
9552
 
        }
9553
 
 
9554
 
@@ -2353,174 +2782,8 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
9555
 
        return 0;
9556
 
 }
9557
 
 
9558
 
-void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
9559
 
-{
9560
 
-       struct kvm_segment cs;
9561
 
-
9562
 
-       get_segment(vcpu, &cs, VCPU_SREG_CS);
9563
 
-       *db = cs.db;
9564
 
-       *l = cs.l;
9565
 
-}
9566
 
-EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
9567
 
-
9568
 
-/*
9569
 
- * List of msr numbers which we expose to userspace through KVM_GET_MSRS
9570
 
- * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
9571
 
- *
9572
 
- * This list is modified at module load time to reflect the
9573
 
- * capabilities of the host cpu.
9574
 
- */
9575
 
-static u32 msrs_to_save[] = {
9576
 
-       MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
9577
 
-       MSR_K6_STAR,
9578
 
-#ifdef CONFIG_X86_64
9579
 
-       MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
9580
 
-#endif
9581
 
-       MSR_IA32_TIME_STAMP_COUNTER,
9582
 
-};
9583
 
-
9584
 
-static unsigned num_msrs_to_save;
9585
 
-
9586
 
-static u32 emulated_msrs[] = {
9587
 
-       MSR_IA32_MISC_ENABLE,
9588
 
-};
9589
 
-
9590
 
-static __init void kvm_init_msr_list(void)
9591
 
-{
9592
 
-       u32 dummy[2];
9593
 
-       unsigned i, j;
9594
 
-
9595
 
-       for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
9596
 
-               if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
9597
 
-                       continue;
9598
 
-               if (j < i)
9599
 
-                       msrs_to_save[j] = msrs_to_save[i];
9600
 
-               j++;
9601
 
-       }
9602
 
-       num_msrs_to_save = j;
9603
 
-}
9604
 
-
9605
 
-/*
9606
 
- * Adapt set_msr() to msr_io()'s calling convention
9607
 
- */
9608
 
-static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
9609
 
-{
9610
 
-       return kvm_set_msr(vcpu, index, *data);
9611
 
-}
9612
 
-
9613
 
-/*
9614
 
- * Read or write a bunch of msrs. All parameters are kernel addresses.
9615
 
- *
9616
 
- * @return number of msrs set successfully.
9617
 
- */
9618
 
-static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
9619
 
-                   struct kvm_msr_entry *entries,
9620
 
-                   int (*do_msr)(struct kvm_vcpu *vcpu,
9621
 
-                                 unsigned index, u64 *data))
9622
 
-{
9623
 
-       int i;
9624
 
-
9625
 
-       vcpu_load(vcpu);
9626
 
-
9627
 
-       for (i = 0; i < msrs->nmsrs; ++i)
9628
 
-               if (do_msr(vcpu, entries[i].index, &entries[i].data))
9629
 
-                       break;
9630
 
-
9631
 
-       vcpu_put(vcpu);
9632
 
-
9633
 
-       return i;
9634
 
-}
9635
 
-
9636
 
-/*
9637
 
- * Read or write a bunch of msrs. Parameters are user addresses.
9638
 
- *
9639
 
- * @return number of msrs set successfully.
9640
 
- */
9641
 
-static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
9642
 
-                 int (*do_msr)(struct kvm_vcpu *vcpu,
9643
 
-                               unsigned index, u64 *data),
9644
 
-                 int writeback)
9645
 
-{
9646
 
-       struct kvm_msrs msrs;
9647
 
-       struct kvm_msr_entry *entries;
9648
 
-       int r, n;
9649
 
-       unsigned size;
9650
 
-
9651
 
-       r = -EFAULT;
9652
 
-       if (copy_from_user(&msrs, user_msrs, sizeof msrs))
9653
 
-               goto out;
9654
 
-
9655
 
-       r = -E2BIG;
9656
 
-       if (msrs.nmsrs >= MAX_IO_MSRS)
9657
 
-               goto out;
9658
 
-
9659
 
-       r = -ENOMEM;
9660
 
-       size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
9661
 
-       entries = vmalloc(size);
9662
 
-       if (!entries)
9663
 
-               goto out;
9664
 
-
9665
 
-       r = -EFAULT;
9666
 
-       if (copy_from_user(entries, user_msrs->entries, size))
9667
 
-               goto out_free;
9668
 
-
9669
 
-       r = n = __msr_io(vcpu, &msrs, entries, do_msr);
9670
 
-       if (r < 0)
9671
 
-               goto out_free;
9672
 
-
9673
 
-       r = -EFAULT;
9674
 
-       if (writeback && copy_to_user(user_msrs->entries, entries, size))
9675
 
-               goto out_free;
9676
 
-
9677
 
-       r = n;
9678
 
-
9679
 
-out_free:
9680
 
-       vfree(entries);
9681
 
-out:
9682
 
-       return r;
9683
 
-}
9684
 
-
9685
 
-/*
9686
 
- * Translate a guest virtual address to a guest physical address.
9687
 
- */
9688
 
-static int kvm_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
9689
 
-                                   struct kvm_translation *tr)
9690
 
-{
9691
 
-       unsigned long vaddr = tr->linear_address;
9692
 
-       gpa_t gpa;
9693
 
-
9694
 
-       vcpu_load(vcpu);
9695
 
-       mutex_lock(&vcpu->kvm->lock);
9696
 
-       gpa = vcpu->mmu.gva_to_gpa(vcpu, vaddr);
9697
 
-       tr->physical_address = gpa;
9698
 
-       tr->valid = gpa != UNMAPPED_GVA;
9699
 
-       tr->writeable = 1;
9700
 
-       tr->usermode = 0;
9701
 
-       mutex_unlock(&vcpu->kvm->lock);
9702
 
-       vcpu_put(vcpu);
9703
 
-
9704
 
-       return 0;
9705
 
-}
9706
 
-
9707
 
-static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
9708
 
-                                   struct kvm_interrupt *irq)
9709
 
-{
9710
 
-       if (irq->irq < 0 || irq->irq >= 256)
9711
 
-               return -EINVAL;
9712
 
-       if (irqchip_in_kernel(vcpu->kvm))
9713
 
-               return -ENXIO;
9714
 
-       vcpu_load(vcpu);
9715
 
-
9716
 
-       set_bit(irq->irq, vcpu->irq_pending);
9717
 
-       set_bit(irq->irq / BITS_PER_LONG, &vcpu->irq_summary);
9718
 
-
9719
 
-       vcpu_put(vcpu);
9720
 
-
9721
 
-       return 0;
9722
 
-}
9723
 
-
9724
 
-static int kvm_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
9725
 
-                                     struct kvm_debug_guest *dbg)
9726
 
+int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
9727
 
+                                   struct kvm_debug_guest *dbg)
9728
 
 {
9729
 
        int r;
9730
 
 
9731
 
@@ -2533,179 +2796,6 @@ static int kvm_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
9732
 
        return r;
9733
 
 }
9734
 
 
9735
 
-static struct page *kvm_vcpu_nopage(struct vm_area_struct *vma,
9736
 
-                                   unsigned long address,
9737
 
-                                   int *type)
9738
 
-{
9739
 
-       struct kvm_vcpu *vcpu = vma->vm_file->private_data;
9740
 
-       unsigned long pgoff;
9741
 
-       struct page *page;
9742
 
-
9743
 
-       pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
9744
 
-       if (pgoff == 0)
9745
 
-               page = virt_to_page(vcpu->run);
9746
 
-       else if (pgoff == KVM_PIO_PAGE_OFFSET)
9747
 
-               page = virt_to_page(vcpu->pio_data);
9748
 
-       else
9749
 
-               return NOPAGE_SIGBUS;
9750
 
-       get_page(page);
9751
 
-       if (type != NULL)
9752
 
-               *type = VM_FAULT_MINOR;
9753
 
-
9754
 
-       return page;
9755
 
-}
9756
 
-
9757
 
-static struct vm_operations_struct kvm_vcpu_vm_ops = {
9758
 
-       .nopage = kvm_vcpu_nopage,
9759
 
-};
9760
 
-
9761
 
-static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma)
9762
 
-{
9763
 
-       vma->vm_ops = &kvm_vcpu_vm_ops;
9764
 
-       return 0;
9765
 
-}
9766
 
-
9767
 
-static int kvm_vcpu_release(struct inode *inode, struct file *filp)
9768
 
-{
9769
 
-       struct kvm_vcpu *vcpu = filp->private_data;
9770
 
-
9771
 
-       fput(vcpu->kvm->filp);
9772
 
-       return 0;
9773
 
-}
9774
 
-
9775
 
-static struct file_operations kvm_vcpu_fops = {
9776
 
-       .release        = kvm_vcpu_release,
9777
 
-       .unlocked_ioctl = kvm_vcpu_ioctl,
9778
 
-       .compat_ioctl   = kvm_vcpu_ioctl,
9779
 
-       .mmap           = kvm_vcpu_mmap,
9780
 
-};
9781
 
-
9782
 
-/*
9783
 
- * Allocates an inode for the vcpu.
9784
 
- */
9785
 
-static int create_vcpu_fd(struct kvm_vcpu *vcpu)
9786
 
-{
9787
 
-       int fd, r;
9788
 
-       struct inode *inode;
9789
 
-       struct file *file;
9790
 
-
9791
 
-       r = anon_inode_getfd(&fd, &inode, &file,
9792
 
-                            "kvm-vcpu", &kvm_vcpu_fops, vcpu);
9793
 
-       if (r)
9794
 
-               return r;
9795
 
-       atomic_inc(&vcpu->kvm->filp->f_count);
9796
 
-       return fd;
9797
 
-}
9798
 
-
9799
 
-/*
9800
 
- * Creates some virtual cpus.  Good luck creating more than one.
9801
 
- */
9802
 
-static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
9803
 
-{
9804
 
-       int r;
9805
 
-       struct kvm_vcpu *vcpu;
9806
 
-
9807
 
-       if (!valid_vcpu(n))
9808
 
-               return -EINVAL;
9809
 
-
9810
 
-       vcpu = kvm_x86_ops->vcpu_create(kvm, n);
9811
 
-       if (IS_ERR(vcpu))
9812
 
-               return PTR_ERR(vcpu);
9813
 
-
9814
 
-       preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops);
9815
 
-
9816
 
-       /* We do fxsave: this must be aligned. */
9817
 
-       BUG_ON((unsigned long)&vcpu->host_fx_image & 0xF);
9818
 
-
9819
 
-       vcpu_load(vcpu);
9820
 
-       r = kvm_mmu_setup(vcpu);
9821
 
-       vcpu_put(vcpu);
9822
 
-       if (r < 0)
9823
 
-               goto free_vcpu;
9824
 
-
9825
 
-       mutex_lock(&kvm->lock);
9826
 
-       if (kvm->vcpus[n]) {
9827
 
-               r = -EEXIST;
9828
 
-               mutex_unlock(&kvm->lock);
9829
 
-               goto mmu_unload;
9830
 
-       }
9831
 
-       kvm->vcpus[n] = vcpu;
9832
 
-       mutex_unlock(&kvm->lock);
9833
 
-
9834
 
-       /* Now it's all set up, let userspace reach it */
9835
 
-       r = create_vcpu_fd(vcpu);
9836
 
-       if (r < 0)
9837
 
-               goto unlink;
9838
 
-       return r;
9839
 
-
9840
 
-unlink:
9841
 
-       mutex_lock(&kvm->lock);
9842
 
-       kvm->vcpus[n] = NULL;
9843
 
-       mutex_unlock(&kvm->lock);
9844
 
-
9845
 
-mmu_unload:
9846
 
-       vcpu_load(vcpu);
9847
 
-       kvm_mmu_unload(vcpu);
9848
 
-       vcpu_put(vcpu);
9849
 
-
9850
 
-free_vcpu:
9851
 
-       kvm_x86_ops->vcpu_free(vcpu);
9852
 
-       return r;
9853
 
-}
9854
 
-
9855
 
-static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
9856
 
-{
9857
 
-       u64 efer;
9858
 
-       int i;
9859
 
-       struct kvm_cpuid_entry *e, *entry;
9860
 
-
9861
 
-       rdmsrl(MSR_EFER, efer);
9862
 
-       entry = NULL;
9863
 
-       for (i = 0; i < vcpu->cpuid_nent; ++i) {
9864
 
-               e = &vcpu->cpuid_entries[i];
9865
 
-               if (e->function == 0x80000001) {
9866
 
-                       entry = e;
9867
 
-                       break;
9868
 
-               }
9869
 
-       }
9870
 
-       if (entry && (entry->edx & (1 << 20)) && !(efer & EFER_NX)) {
9871
 
-               entry->edx &= ~(1 << 20);
9872
 
-               printk(KERN_INFO "kvm: guest NX capability removed\n");
9873
 
-       }
9874
 
-}
9875
 
-
9876
 
-static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
9877
 
-                                   struct kvm_cpuid *cpuid,
9878
 
-                                   struct kvm_cpuid_entry __user *entries)
9879
 
-{
9880
 
-       int r;
9881
 
-
9882
 
-       r = -E2BIG;
9883
 
-       if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
9884
 
-               goto out;
9885
 
-       r = -EFAULT;
9886
 
-       if (copy_from_user(&vcpu->cpuid_entries, entries,
9887
 
-                          cpuid->nent * sizeof(struct kvm_cpuid_entry)))
9888
 
-               goto out;
9889
 
-       vcpu->cpuid_nent = cpuid->nent;
9890
 
-       cpuid_fix_nx_cap(vcpu);
9891
 
-       return 0;
9892
 
-
9893
 
-out:
9894
 
-       return r;
9895
 
-}
9896
 
-
9897
 
-static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset)
9898
 
-{
9899
 
-       if (sigset) {
9900
 
-               sigdelsetmask(sigset, sigmask(SIGKILL)|sigmask(SIGSTOP));
9901
 
-               vcpu->sigset_active = 1;
9902
 
-               vcpu->sigset = *sigset;
9903
 
-       } else
9904
 
-               vcpu->sigset_active = 0;
9905
 
-       return 0;
9906
 
-}
9907
 
-
9908
 
 /*
9909
 
  * fxsave fpu state.  Taken from x86_64/processor.h.  To be killed when
9910
 
  * we have asm/x86/processor.h
9911
 
@@ -2727,9 +2817,31 @@ struct fxsave {
9912
 
 #endif
9913
 
 };
9914
 
 
9915
 
-static int kvm_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
9916
 
+/*
9917
 
+ * Translate a guest virtual address to a guest physical address.
9918
 
+ */
9919
 
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
9920
 
+                                   struct kvm_translation *tr)
9921
 
+{
9922
 
+       unsigned long vaddr = tr->linear_address;
9923
 
+       gpa_t gpa;
9924
 
+
9925
 
+       vcpu_load(vcpu);
9926
 
+       mutex_lock(&vcpu->kvm->lock);
9927
 
+       gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, vaddr);
9928
 
+       tr->physical_address = gpa;
9929
 
+       tr->valid = gpa != UNMAPPED_GVA;
9930
 
+       tr->writeable = 1;
9931
 
+       tr->usermode = 0;
9932
 
+       mutex_unlock(&vcpu->kvm->lock);
9933
 
+       vcpu_put(vcpu);
9934
 
+
9935
 
+       return 0;
9936
 
+}
9937
 
+
9938
 
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
9939
 
 {
9940
 
-       struct fxsave *fxsave = (struct fxsave *)&vcpu->guest_fx_image;
9941
 
+       struct fxsave *fxsave = (struct fxsave *)&vcpu->arch.guest_fx_image;
9942
 
 
9943
 
        vcpu_load(vcpu);
9944
 
 
9945
 
@@ -2747,9 +2859,9 @@ static int kvm_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
9946
 
        return 0;
9947
 
 }
9948
 
 
9949
 
-static int kvm_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
9950
 
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
9951
 
 {
9952
 
-       struct fxsave *fxsave = (struct fxsave *)&vcpu->guest_fx_image;
9953
 
+       struct fxsave *fxsave = (struct fxsave *)&vcpu->arch.guest_fx_image;
9954
 
 
9955
 
        vcpu_load(vcpu);
9956
 
 
9957
 
@@ -2767,862 +2879,288 @@ static int kvm_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
9958
 
        return 0;
9959
 
 }
9960
 
 
9961
 
-static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
9962
 
-                                   struct kvm_lapic_state *s)
9963
 
+void fx_init(struct kvm_vcpu *vcpu)
9964
 
 {
9965
 
-       vcpu_load(vcpu);
9966
 
-       memcpy(s->regs, vcpu->apic->regs, sizeof *s);
9967
 
-       vcpu_put(vcpu);
9968
 
-
9969
 
-       return 0;
9970
 
-}
9971
 
+       unsigned after_mxcsr_mask;
9972
 
 
9973
 
-static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
9974
 
-                                   struct kvm_lapic_state *s)
9975
 
-{
9976
 
-       vcpu_load(vcpu);
9977
 
-       memcpy(vcpu->apic->regs, s->regs, sizeof *s);
9978
 
-       kvm_apic_post_state_restore(vcpu);
9979
 
-       vcpu_put(vcpu);
9980
 
+       /* Initialize guest FPU by resetting ours and saving into guest's */
9981
 
+       preempt_disable();
9982
 
+       fx_save(&vcpu->arch.host_fx_image);
9983
 
+       fpu_init();
9984
 
+       fx_save(&vcpu->arch.guest_fx_image);
9985
 
+       fx_restore(&vcpu->arch.host_fx_image);
9986
 
+       preempt_enable();
9987
 
 
9988
 
-       return 0;
9989
 
+       vcpu->arch.cr0 |= X86_CR0_ET;
9990
 
+       after_mxcsr_mask = offsetof(struct i387_fxsave_struct, st_space);
9991
 
+       vcpu->arch.guest_fx_image.mxcsr = 0x1f80;
9992
 
+       memset((void *)&vcpu->arch.guest_fx_image + after_mxcsr_mask,
9993
 
+              0, sizeof(struct i387_fxsave_struct) - after_mxcsr_mask);
9994
 
 }
9995
 
+EXPORT_SYMBOL_GPL(fx_init);
9996
 
 
9997
 
-static long kvm_vcpu_ioctl(struct file *filp,
9998
 
-                          unsigned int ioctl, unsigned long arg)
9999
 
+void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
10000
 
 {
10001
 
-       struct kvm_vcpu *vcpu = filp->private_data;
10002
 
-       void __user *argp = (void __user *)arg;
10003
 
-       int r = -EINVAL;
10004
 
-
10005
 
-       switch (ioctl) {
10006
 
-       case KVM_RUN:
10007
 
-               r = -EINVAL;
10008
 
-               if (arg)
10009
 
-                       goto out;
10010
 
-               r = kvm_vcpu_ioctl_run(vcpu, vcpu->run);
10011
 
-               break;
10012
 
-       case KVM_GET_REGS: {
10013
 
-               struct kvm_regs kvm_regs;
10014
 
-
10015
 
-               memset(&kvm_regs, 0, sizeof kvm_regs);
10016
 
-               r = kvm_vcpu_ioctl_get_regs(vcpu, &kvm_regs);
10017
 
-               if (r)
10018
 
-                       goto out;
10019
 
-               r = -EFAULT;
10020
 
-               if (copy_to_user(argp, &kvm_regs, sizeof kvm_regs))
10021
 
-                       goto out;
10022
 
-               r = 0;
10023
 
-               break;
10024
 
-       }
10025
 
-       case KVM_SET_REGS: {
10026
 
-               struct kvm_regs kvm_regs;
10027
 
-
10028
 
-               r = -EFAULT;
10029
 
-               if (copy_from_user(&kvm_regs, argp, sizeof kvm_regs))
10030
 
-                       goto out;
10031
 
-               r = kvm_vcpu_ioctl_set_regs(vcpu, &kvm_regs);
10032
 
-               if (r)
10033
 
-                       goto out;
10034
 
-               r = 0;
10035
 
-               break;
10036
 
-       }
10037
 
-       case KVM_GET_SREGS: {
10038
 
-               struct kvm_sregs kvm_sregs;
10039
 
-
10040
 
-               memset(&kvm_sregs, 0, sizeof kvm_sregs);
10041
 
-               r = kvm_vcpu_ioctl_get_sregs(vcpu, &kvm_sregs);
10042
 
-               if (r)
10043
 
-                       goto out;
10044
 
-               r = -EFAULT;
10045
 
-               if (copy_to_user(argp, &kvm_sregs, sizeof kvm_sregs))
10046
 
-                       goto out;
10047
 
-               r = 0;
10048
 
-               break;
10049
 
-       }
10050
 
-       case KVM_SET_SREGS: {
10051
 
-               struct kvm_sregs kvm_sregs;
10052
 
-
10053
 
-               r = -EFAULT;
10054
 
-               if (copy_from_user(&kvm_sregs, argp, sizeof kvm_sregs))
10055
 
-                       goto out;
10056
 
-               r = kvm_vcpu_ioctl_set_sregs(vcpu, &kvm_sregs);
10057
 
-               if (r)
10058
 
-                       goto out;
10059
 
-               r = 0;
10060
 
-               break;
10061
 
-       }
10062
 
-       case KVM_TRANSLATE: {
10063
 
-               struct kvm_translation tr;
10064
 
-
10065
 
-               r = -EFAULT;
10066
 
-               if (copy_from_user(&tr, argp, sizeof tr))
10067
 
-                       goto out;
10068
 
-               r = kvm_vcpu_ioctl_translate(vcpu, &tr);
10069
 
-               if (r)
10070
 
-                       goto out;
10071
 
-               r = -EFAULT;
10072
 
-               if (copy_to_user(argp, &tr, sizeof tr))
10073
 
-                       goto out;
10074
 
-               r = 0;
10075
 
-               break;
10076
 
-       }
10077
 
-       case KVM_INTERRUPT: {
10078
 
-               struct kvm_interrupt irq;
10079
 
-
10080
 
-               r = -EFAULT;
10081
 
-               if (copy_from_user(&irq, argp, sizeof irq))
10082
 
-                       goto out;
10083
 
-               r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
10084
 
-               if (r)
10085
 
-                       goto out;
10086
 
-               r = 0;
10087
 
-               break;
10088
 
-       }
10089
 
-       case KVM_DEBUG_GUEST: {
10090
 
-               struct kvm_debug_guest dbg;
10091
 
-
10092
 
-               r = -EFAULT;
10093
 
-               if (copy_from_user(&dbg, argp, sizeof dbg))
10094
 
-                       goto out;
10095
 
-               r = kvm_vcpu_ioctl_debug_guest(vcpu, &dbg);
10096
 
-               if (r)
10097
 
-                       goto out;
10098
 
-               r = 0;
10099
 
-               break;
10100
 
-       }
10101
 
-       case KVM_GET_MSRS:
10102
 
-               r = msr_io(vcpu, argp, kvm_get_msr, 1);
10103
 
-               break;
10104
 
-       case KVM_SET_MSRS:
10105
 
-               r = msr_io(vcpu, argp, do_set_msr, 0);
10106
 
-               break;
10107
 
-       case KVM_SET_CPUID: {
10108
 
-               struct kvm_cpuid __user *cpuid_arg = argp;
10109
 
-               struct kvm_cpuid cpuid;
10110
 
-
10111
 
-               r = -EFAULT;
10112
 
-               if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
10113
 
-                       goto out;
10114
 
-               r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
10115
 
-               if (r)
10116
 
-                       goto out;
10117
 
-               break;
10118
 
-       }
10119
 
-       case KVM_SET_SIGNAL_MASK: {
10120
 
-               struct kvm_signal_mask __user *sigmask_arg = argp;
10121
 
-               struct kvm_signal_mask kvm_sigmask;
10122
 
-               sigset_t sigset, *p;
10123
 
-
10124
 
-               p = NULL;
10125
 
-               if (argp) {
10126
 
-                       r = -EFAULT;
10127
 
-                       if (copy_from_user(&kvm_sigmask, argp,
10128
 
-                                          sizeof kvm_sigmask))
10129
 
-                               goto out;
10130
 
-                       r = -EINVAL;
10131
 
-                       if (kvm_sigmask.len != sizeof sigset)
10132
 
-                               goto out;
10133
 
-                       r = -EFAULT;
10134
 
-                       if (copy_from_user(&sigset, sigmask_arg->sigset,
10135
 
-                                          sizeof sigset))
10136
 
-                               goto out;
10137
 
-                       p = &sigset;
10138
 
-               }
10139
 
-               r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset);
10140
 
-               break;
10141
 
-       }
10142
 
-       case KVM_GET_FPU: {
10143
 
-               struct kvm_fpu fpu;
10144
 
-
10145
 
-               memset(&fpu, 0, sizeof fpu);
10146
 
-               r = kvm_vcpu_ioctl_get_fpu(vcpu, &fpu);
10147
 
-               if (r)
10148
 
-                       goto out;
10149
 
-               r = -EFAULT;
10150
 
-               if (copy_to_user(argp, &fpu, sizeof fpu))
10151
 
-                       goto out;
10152
 
-               r = 0;
10153
 
-               break;
10154
 
-       }
10155
 
-       case KVM_SET_FPU: {
10156
 
-               struct kvm_fpu fpu;
10157
 
-
10158
 
-               r = -EFAULT;
10159
 
-               if (copy_from_user(&fpu, argp, sizeof fpu))
10160
 
-                       goto out;
10161
 
-               r = kvm_vcpu_ioctl_set_fpu(vcpu, &fpu);
10162
 
-               if (r)
10163
 
-                       goto out;
10164
 
-               r = 0;
10165
 
-               break;
10166
 
-       }
10167
 
-       case KVM_GET_LAPIC: {
10168
 
-               struct kvm_lapic_state lapic;
10169
 
-
10170
 
-               memset(&lapic, 0, sizeof lapic);
10171
 
-               r = kvm_vcpu_ioctl_get_lapic(vcpu, &lapic);
10172
 
-               if (r)
10173
 
-                       goto out;
10174
 
-               r = -EFAULT;
10175
 
-               if (copy_to_user(argp, &lapic, sizeof lapic))
10176
 
-                       goto out;
10177
 
-               r = 0;
10178
 
-               break;
10179
 
-       }
10180
 
-       case KVM_SET_LAPIC: {
10181
 
-               struct kvm_lapic_state lapic;
10182
 
+       if (!vcpu->fpu_active || vcpu->guest_fpu_loaded)
10183
 
+               return;
10184
 
 
10185
 
-               r = -EFAULT;
10186
 
-               if (copy_from_user(&lapic, argp, sizeof lapic))
10187
 
-                       goto out;
10188
 
-               r = kvm_vcpu_ioctl_set_lapic(vcpu, &lapic);;
10189
 
-               if (r)
10190
 
-                       goto out;
10191
 
-               r = 0;
10192
 
-               break;
10193
 
-       }
10194
 
-       default:
10195
 
-               ;
10196
 
-       }
10197
 
-out:
10198
 
-       return r;
10199
 
+       vcpu->guest_fpu_loaded = 1;
10200
 
+       fx_save(&vcpu->arch.host_fx_image);
10201
 
+       fx_restore(&vcpu->arch.guest_fx_image);
10202
 
 }
10203
 
+EXPORT_SYMBOL_GPL(kvm_load_guest_fpu);
10204
 
 
10205
 
-static long kvm_vm_ioctl(struct file *filp,
10206
 
-                          unsigned int ioctl, unsigned long arg)
10207
 
+void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
10208
 
 {
10209
 
-       struct kvm *kvm = filp->private_data;
10210
 
-       void __user *argp = (void __user *)arg;
10211
 
-       int r = -EINVAL;
10212
 
-
10213
 
-       switch (ioctl) {
10214
 
-       case KVM_CREATE_VCPU:
10215
 
-               r = kvm_vm_ioctl_create_vcpu(kvm, arg);
10216
 
-               if (r < 0)
10217
 
-                       goto out;
10218
 
-               break;
10219
 
-       case KVM_SET_MEMORY_REGION: {
10220
 
-               struct kvm_memory_region kvm_mem;
10221
 
-
10222
 
-               r = -EFAULT;
10223
 
-               if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem))
10224
 
-                       goto out;
10225
 
-               r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_mem);
10226
 
-               if (r)
10227
 
-                       goto out;
10228
 
-               break;
10229
 
-       }
10230
 
-       case KVM_GET_DIRTY_LOG: {
10231
 
-               struct kvm_dirty_log log;
10232
 
-
10233
 
-               r = -EFAULT;
10234
 
-               if (copy_from_user(&log, argp, sizeof log))
10235
 
-                       goto out;
10236
 
-               r = kvm_vm_ioctl_get_dirty_log(kvm, &log);
10237
 
-               if (r)
10238
 
-                       goto out;
10239
 
-               break;
10240
 
-       }
10241
 
-       case KVM_SET_MEMORY_ALIAS: {
10242
 
-               struct kvm_memory_alias alias;
10243
 
-
10244
 
-               r = -EFAULT;
10245
 
-               if (copy_from_user(&alias, argp, sizeof alias))
10246
 
-                       goto out;
10247
 
-               r = kvm_vm_ioctl_set_memory_alias(kvm, &alias);
10248
 
-               if (r)
10249
 
-                       goto out;
10250
 
-               break;
10251
 
-       }
10252
 
-       case KVM_CREATE_IRQCHIP:
10253
 
-               r = -ENOMEM;
10254
 
-               kvm->vpic = kvm_create_pic(kvm);
10255
 
-               if (kvm->vpic) {
10256
 
-                       r = kvm_ioapic_init(kvm);
10257
 
-                       if (r) {
10258
 
-                               kfree(kvm->vpic);
10259
 
-                               kvm->vpic = NULL;
10260
 
-                               goto out;
10261
 
-                       }
10262
 
-               }
10263
 
-               else
10264
 
-                       goto out;
10265
 
-               break;
10266
 
-       case KVM_IRQ_LINE: {
10267
 
-               struct kvm_irq_level irq_event;
10268
 
-
10269
 
-               r = -EFAULT;
10270
 
-               if (copy_from_user(&irq_event, argp, sizeof irq_event))
10271
 
-                       goto out;
10272
 
-               if (irqchip_in_kernel(kvm)) {
10273
 
-                       mutex_lock(&kvm->lock);
10274
 
-                       if (irq_event.irq < 16)
10275
 
-                               kvm_pic_set_irq(pic_irqchip(kvm),
10276
 
-                                       irq_event.irq,
10277
 
-                                       irq_event.level);
10278
 
-                       kvm_ioapic_set_irq(kvm->vioapic,
10279
 
-                                       irq_event.irq,
10280
 
-                                       irq_event.level);
10281
 
-                       mutex_unlock(&kvm->lock);
10282
 
-                       r = 0;
10283
 
-               }
10284
 
-               break;
10285
 
-       }
10286
 
-       case KVM_GET_IRQCHIP: {
10287
 
-               /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
10288
 
-               struct kvm_irqchip chip;
10289
 
-
10290
 
-               r = -EFAULT;
10291
 
-               if (copy_from_user(&chip, argp, sizeof chip))
10292
 
-                       goto out;
10293
 
-               r = -ENXIO;
10294
 
-               if (!irqchip_in_kernel(kvm))
10295
 
-                       goto out;
10296
 
-               r = kvm_vm_ioctl_get_irqchip(kvm, &chip);
10297
 
-               if (r)
10298
 
-                       goto out;
10299
 
-               r = -EFAULT;
10300
 
-               if (copy_to_user(argp, &chip, sizeof chip))
10301
 
-                       goto out;
10302
 
-               r = 0;
10303
 
-               break;
10304
 
-       }
10305
 
-       case KVM_SET_IRQCHIP: {
10306
 
-               /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
10307
 
-               struct kvm_irqchip chip;
10308
 
+       if (!vcpu->guest_fpu_loaded)
10309
 
+               return;
10310
 
 
10311
 
-               r = -EFAULT;
10312
 
-               if (copy_from_user(&chip, argp, sizeof chip))
10313
 
-                       goto out;
10314
 
-               r = -ENXIO;
10315
 
-               if (!irqchip_in_kernel(kvm))
10316
 
-                       goto out;
10317
 
-               r = kvm_vm_ioctl_set_irqchip(kvm, &chip);
10318
 
-               if (r)
10319
 
-                       goto out;
10320
 
-               r = 0;
10321
 
-               break;
10322
 
-       }
10323
 
-       default:
10324
 
-               ;
10325
 
-       }
10326
 
-out:
10327
 
-       return r;
10328
 
+       vcpu->guest_fpu_loaded = 0;
10329
 
+       fx_save(&vcpu->arch.guest_fx_image);
10330
 
+       fx_restore(&vcpu->arch.host_fx_image);
10331
 
+       ++vcpu->stat.fpu_reload;
10332
 
 }
10333
 
+EXPORT_SYMBOL_GPL(kvm_put_guest_fpu);
10334
 
 
10335
 
-static struct page *kvm_vm_nopage(struct vm_area_struct *vma,
10336
 
-                                 unsigned long address,
10337
 
-                                 int *type)
10338
 
+void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
10339
 
 {
10340
 
-       struct kvm *kvm = vma->vm_file->private_data;
10341
 
-       unsigned long pgoff;
10342
 
-       struct page *page;
10343
 
-
10344
 
-       pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
10345
 
-       page = gfn_to_page(kvm, pgoff);
10346
 
-       if (!page)
10347
 
-               return NOPAGE_SIGBUS;
10348
 
-       get_page(page);
10349
 
-       if (type != NULL)
10350
 
-               *type = VM_FAULT_MINOR;
10351
 
-
10352
 
-       return page;
10353
 
+       kvm_x86_ops->vcpu_free(vcpu);
10354
 
 }
10355
 
 
10356
 
-static struct vm_operations_struct kvm_vm_vm_ops = {
10357
 
-       .nopage = kvm_vm_nopage,
10358
 
-};
10359
 
-
10360
 
-static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma)
10361
 
+struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
10362
 
+                                               unsigned int id)
10363
 
 {
10364
 
-       vma->vm_ops = &kvm_vm_vm_ops;
10365
 
-       return 0;
10366
 
+       return kvm_x86_ops->vcpu_create(kvm, id);
10367
 
 }
10368
 
 
10369
 
-static struct file_operations kvm_vm_fops = {
10370
 
-       .release        = kvm_vm_release,
10371
 
-       .unlocked_ioctl = kvm_vm_ioctl,
10372
 
-       .compat_ioctl   = kvm_vm_ioctl,
10373
 
-       .mmap           = kvm_vm_mmap,
10374
 
-};
10375
 
-
10376
 
-static int kvm_dev_ioctl_create_vm(void)
10377
 
+int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
10378
 
 {
10379
 
-       int fd, r;
10380
 
-       struct inode *inode;
10381
 
-       struct file *file;
10382
 
-       struct kvm *kvm;
10383
 
+       int r;
10384
 
 
10385
 
-       kvm = kvm_create_vm();
10386
 
-       if (IS_ERR(kvm))
10387
 
-               return PTR_ERR(kvm);
10388
 
-       r = anon_inode_getfd(&fd, &inode, &file, "kvm-vm", &kvm_vm_fops, kvm);
10389
 
-       if (r) {
10390
 
-               kvm_destroy_vm(kvm);
10391
 
-               return r;
10392
 
-       }
10393
 
+       /* We do fxsave: this must be aligned. */
10394
 
+       BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF);
10395
 
 
10396
 
-       kvm->filp = file;
10397
 
+       vcpu_load(vcpu);
10398
 
+       r = kvm_arch_vcpu_reset(vcpu);
10399
 
+       if (r == 0)
10400
 
+               r = kvm_mmu_setup(vcpu);
10401
 
+       vcpu_put(vcpu);
10402
 
+       if (r < 0)
10403
 
+               goto free_vcpu;
10404
 
 
10405
 
-       return fd;
10406
 
+       return 0;
10407
 
+free_vcpu:
10408
 
+       kvm_x86_ops->vcpu_free(vcpu);
10409
 
+       return r;
10410
 
 }
10411
 
 
10412
 
-static long kvm_dev_ioctl(struct file *filp,
10413
 
-                         unsigned int ioctl, unsigned long arg)
10414
 
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
10415
 
 {
10416
 
-       void __user *argp = (void __user *)arg;
10417
 
-       long r = -EINVAL;
10418
 
-
10419
 
-       switch (ioctl) {
10420
 
-       case KVM_GET_API_VERSION:
10421
 
-               r = -EINVAL;
10422
 
-               if (arg)
10423
 
-                       goto out;
10424
 
-               r = KVM_API_VERSION;
10425
 
-               break;
10426
 
-       case KVM_CREATE_VM:
10427
 
-               r = -EINVAL;
10428
 
-               if (arg)
10429
 
-                       goto out;
10430
 
-               r = kvm_dev_ioctl_create_vm();
10431
 
-               break;
10432
 
-       case KVM_GET_MSR_INDEX_LIST: {
10433
 
-               struct kvm_msr_list __user *user_msr_list = argp;
10434
 
-               struct kvm_msr_list msr_list;
10435
 
-               unsigned n;
10436
 
-
10437
 
-               r = -EFAULT;
10438
 
-               if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
10439
 
-                       goto out;
10440
 
-               n = msr_list.nmsrs;
10441
 
-               msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs);
10442
 
-               if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
10443
 
-                       goto out;
10444
 
-               r = -E2BIG;
10445
 
-               if (n < num_msrs_to_save)
10446
 
-                       goto out;
10447
 
-               r = -EFAULT;
10448
 
-               if (copy_to_user(user_msr_list->indices, &msrs_to_save,
10449
 
-                                num_msrs_to_save * sizeof(u32)))
10450
 
-                       goto out;
10451
 
-               if (copy_to_user(user_msr_list->indices
10452
 
-                                + num_msrs_to_save * sizeof(u32),
10453
 
-                                &emulated_msrs,
10454
 
-                                ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
10455
 
-                       goto out;
10456
 
-               r = 0;
10457
 
-               break;
10458
 
-       }
10459
 
-       case KVM_CHECK_EXTENSION: {
10460
 
-               int ext = (long)argp;
10461
 
+       vcpu_load(vcpu);
10462
 
+       kvm_mmu_unload(vcpu);
10463
 
+       vcpu_put(vcpu);
10464
 
 
10465
 
-               switch (ext) {
10466
 
-               case KVM_CAP_IRQCHIP:
10467
 
-               case KVM_CAP_HLT:
10468
 
-                       r = 1;
10469
 
-                       break;
10470
 
-               default:
10471
 
-                       r = 0;
10472
 
-                       break;
10473
 
-               }
10474
 
-               break;
10475
 
-       }
10476
 
-       case KVM_GET_VCPU_MMAP_SIZE:
10477
 
-               r = -EINVAL;
10478
 
-               if (arg)
10479
 
-                       goto out;
10480
 
-               r = 2 * PAGE_SIZE;
10481
 
-               break;
10482
 
-       default:
10483
 
-               ;
10484
 
-       }
10485
 
-out:
10486
 
-       return r;
10487
 
+       kvm_x86_ops->vcpu_free(vcpu);
10488
 
 }
10489
 
 
10490
 
-static struct file_operations kvm_chardev_ops = {
10491
 
-       .unlocked_ioctl = kvm_dev_ioctl,
10492
 
-       .compat_ioctl   = kvm_dev_ioctl,
10493
 
-};
10494
 
-
10495
 
-static struct miscdevice kvm_dev = {
10496
 
-       KVM_MINOR,
10497
 
-       "kvm",
10498
 
-       &kvm_chardev_ops,
10499
 
-};
10500
 
-
10501
 
-/*
10502
 
- * Make sure that a cpu that is being hot-unplugged does not have any vcpus
10503
 
- * cached on it.
10504
 
- */
10505
 
-static void decache_vcpus_on_cpu(int cpu)
10506
 
+int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
10507
 
 {
10508
 
-       struct kvm *vm;
10509
 
-       struct kvm_vcpu *vcpu;
10510
 
-       int i;
10511
 
-
10512
 
-       spin_lock(&kvm_lock);
10513
 
-       list_for_each_entry(vm, &vm_list, vm_list)
10514
 
-               for (i = 0; i < KVM_MAX_VCPUS; ++i) {
10515
 
-                       vcpu = vm->vcpus[i];
10516
 
-                       if (!vcpu)
10517
 
-                               continue;
10518
 
-                       /*
10519
 
-                        * If the vcpu is locked, then it is running on some
10520
 
-                        * other cpu and therefore it is not cached on the
10521
 
-                        * cpu in question.
10522
 
-                        *
10523
 
-                        * If it's not locked, check the last cpu it executed
10524
 
-                        * on.
10525
 
-                        */
10526
 
-                       if (mutex_trylock(&vcpu->mutex)) {
10527
 
-                               if (vcpu->cpu == cpu) {
10528
 
-                                       kvm_x86_ops->vcpu_decache(vcpu);
10529
 
-                                       vcpu->cpu = -1;
10530
 
-                               }
10531
 
-                               mutex_unlock(&vcpu->mutex);
10532
 
-                       }
10533
 
-               }
10534
 
-       spin_unlock(&kvm_lock);
10535
 
+       return kvm_x86_ops->vcpu_reset(vcpu);
10536
 
 }
10537
 
 
10538
 
-static void hardware_enable(void *junk)
10539
 
+void kvm_arch_hardware_enable(void *garbage)
10540
 
 {
10541
 
-       int cpu = raw_smp_processor_id();
10542
 
-
10543
 
-       if (cpu_isset(cpu, cpus_hardware_enabled))
10544
 
-               return;
10545
 
-       cpu_set(cpu, cpus_hardware_enabled);
10546
 
-       kvm_x86_ops->hardware_enable(NULL);
10547
 
+       kvm_x86_ops->hardware_enable(garbage);
10548
 
 }
10549
 
 
10550
 
-static void hardware_disable(void *junk)
10551
 
+void kvm_arch_hardware_disable(void *garbage)
10552
 
 {
10553
 
-       int cpu = raw_smp_processor_id();
10554
 
-
10555
 
-       if (!cpu_isset(cpu, cpus_hardware_enabled))
10556
 
-               return;
10557
 
-       cpu_clear(cpu, cpus_hardware_enabled);
10558
 
-       decache_vcpus_on_cpu(cpu);
10559
 
-       kvm_x86_ops->hardware_disable(NULL);
10560
 
+       kvm_x86_ops->hardware_disable(garbage);
10561
 
 }
10562
 
 
10563
 
-static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
10564
 
-                          void *v)
10565
 
+int kvm_arch_hardware_setup(void)
10566
 
 {
10567
 
-       int cpu = (long)v;
10568
 
-
10569
 
-       switch (val) {
10570
 
-       case CPU_DYING:
10571
 
-       case CPU_DYING_FROZEN:
10572
 
-               printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
10573
 
-                      cpu);
10574
 
-               hardware_disable(NULL);
10575
 
-               break;
10576
 
-       case CPU_UP_CANCELED:
10577
 
-       case CPU_UP_CANCELED_FROZEN:
10578
 
-               printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
10579
 
-                      cpu);
10580
 
-               smp_call_function_single(cpu, hardware_disable, NULL, 0, 1);
10581
 
-               break;
10582
 
-       case CPU_ONLINE:
10583
 
-       case CPU_ONLINE_FROZEN:
10584
 
-               printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
10585
 
-                      cpu);
10586
 
-               smp_call_function_single(cpu, hardware_enable, NULL, 0, 1);
10587
 
-               break;
10588
 
-       }
10589
 
-       return NOTIFY_OK;
10590
 
+       return kvm_x86_ops->hardware_setup();
10591
 
 }
10592
 
 
10593
 
-static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
10594
 
-                       void *v)
10595
 
+void kvm_arch_hardware_unsetup(void)
10596
 
 {
10597
 
-       if (val == SYS_RESTART) {
10598
 
-               /*
10599
 
-                * Some (well, at least mine) BIOSes hang on reboot if
10600
 
-                * in vmx root mode.
10601
 
-                */
10602
 
-               printk(KERN_INFO "kvm: exiting hardware virtualization\n");
10603
 
-               on_each_cpu(hardware_disable, NULL, 0, 1);
10604
 
-       }
10605
 
-       return NOTIFY_OK;
10606
 
+       kvm_x86_ops->hardware_unsetup();
10607
 
 }
10608
 
 
10609
 
-static struct notifier_block kvm_reboot_notifier = {
10610
 
-       .notifier_call = kvm_reboot,
10611
 
-       .priority = 0,
10612
 
-};
10613
 
-
10614
 
-void kvm_io_bus_init(struct kvm_io_bus *bus)
10615
 
+void kvm_arch_check_processor_compat(void *rtn)
10616
 
 {
10617
 
-       memset(bus, 0, sizeof(*bus));
10618
 
+       kvm_x86_ops->check_processor_compatibility(rtn);
10619
 
 }
10620
 
 
10621
 
-void kvm_io_bus_destroy(struct kvm_io_bus *bus)
10622
 
+int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
10623
 
 {
10624
 
-       int i;
10625
 
+       struct page *page;
10626
 
+       struct kvm *kvm;
10627
 
+       int r;
10628
 
 
10629
 
-       for (i = 0; i < bus->dev_count; i++) {
10630
 
-               struct kvm_io_device *pos = bus->devs[i];
10631
 
+       BUG_ON(vcpu->kvm == NULL);
10632
 
+       kvm = vcpu->kvm;
10633
 
 
10634
 
-               kvm_iodevice_destructor(pos);
10635
 
-       }
10636
 
-}
10637
 
+       vcpu->arch.mmu.root_hpa = INVALID_PAGE;
10638
 
+       if (!irqchip_in_kernel(kvm) || vcpu->vcpu_id == 0)
10639
 
+               vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE;
10640
 
+       else
10641
 
+               vcpu->arch.mp_state = VCPU_MP_STATE_UNINITIALIZED;
10642
 
 
10643
 
-struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr)
10644
 
-{
10645
 
-       int i;
10646
 
+       page = alloc_page(GFP_KERNEL | __GFP_ZERO);
10647
 
+       if (!page) {
10648
 
+               r = -ENOMEM;
10649
 
+               goto fail;
10650
 
+       }
10651
 
+       vcpu->arch.pio_data = page_address(page);
10652
 
 
10653
 
-       for (i = 0; i < bus->dev_count; i++) {
10654
 
-               struct kvm_io_device *pos = bus->devs[i];
10655
 
+       r = kvm_mmu_create(vcpu);
10656
 
+       if (r < 0)
10657
 
+               goto fail_free_pio_data;
10658
 
 
10659
 
-               if (pos->in_range(pos, addr))
10660
 
-                       return pos;
10661
 
+       if (irqchip_in_kernel(kvm)) {
10662
 
+               r = kvm_create_lapic(vcpu);
10663
 
+               if (r < 0)
10664
 
+                       goto fail_mmu_destroy;
10665
 
        }
10666
 
 
10667
 
-       return NULL;
10668
 
-}
10669
 
-
10670
 
-void kvm_io_bus_register_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev)
10671
 
-{
10672
 
-       BUG_ON(bus->dev_count > (NR_IOBUS_DEVS-1));
10673
 
+       return 0;
10674
 
 
10675
 
-       bus->devs[bus->dev_count++] = dev;
10676
 
+fail_mmu_destroy:
10677
 
+       kvm_mmu_destroy(vcpu);
10678
 
+fail_free_pio_data:
10679
 
+       free_page((unsigned long)vcpu->arch.pio_data);
10680
 
+fail:
10681
 
+       return r;
10682
 
 }
10683
 
 
10684
 
-static struct notifier_block kvm_cpu_notifier = {
10685
 
-       .notifier_call = kvm_cpu_hotplug,
10686
 
-       .priority = 20, /* must be > scheduler priority */
10687
 
-};
10688
 
-
10689
 
-static u64 stat_get(void *_offset)
10690
 
+void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
10691
 
 {
10692
 
-       unsigned offset = (long)_offset;
10693
 
-       u64 total = 0;
10694
 
-       struct kvm *kvm;
10695
 
-       struct kvm_vcpu *vcpu;
10696
 
-       int i;
10697
 
-
10698
 
-       spin_lock(&kvm_lock);
10699
 
-       list_for_each_entry(kvm, &vm_list, vm_list)
10700
 
-               for (i = 0; i < KVM_MAX_VCPUS; ++i) {
10701
 
-                       vcpu = kvm->vcpus[i];
10702
 
-                       if (vcpu)
10703
 
-                               total += *(u32 *)((void *)vcpu + offset);
10704
 
-               }
10705
 
-       spin_unlock(&kvm_lock);
10706
 
-       return total;
10707
 
+       kvm_free_lapic(vcpu);
10708
 
+       kvm_mmu_destroy(vcpu);
10709
 
+       free_page((unsigned long)vcpu->arch.pio_data);
10710
 
 }
10711
 
 
10712
 
-DEFINE_SIMPLE_ATTRIBUTE(stat_fops, stat_get, NULL, "%llu\n");
10713
 
-
10714
 
-static __init void kvm_init_debug(void)
10715
 
+struct  kvm *kvm_arch_create_vm(void)
10716
 
 {
10717
 
-       struct kvm_stats_debugfs_item *p;
10718
 
-
10719
 
-       debugfs_dir = debugfs_create_dir("kvm", NULL);
10720
 
-       for (p = debugfs_entries; p->name; ++p)
10721
 
-               p->dentry = debugfs_create_file(p->name, 0444, debugfs_dir,
10722
 
-                                               (void *)(long)p->offset,
10723
 
-                                               &stat_fops);
10724
 
-}
10725
 
+       struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
10726
 
 
10727
 
-static void kvm_exit_debug(void)
10728
 
-{
10729
 
-       struct kvm_stats_debugfs_item *p;
10730
 
+       if (!kvm)
10731
 
+               return ERR_PTR(-ENOMEM);
10732
 
 
10733
 
-       for (p = debugfs_entries; p->name; ++p)
10734
 
-               debugfs_remove(p->dentry);
10735
 
-       debugfs_remove(debugfs_dir);
10736
 
-}
10737
 
+       INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
10738
 
 
10739
 
-static int kvm_suspend(struct sys_device *dev, pm_message_t state)
10740
 
-{
10741
 
-       hardware_disable(NULL);
10742
 
-       return 0;
10743
 
+       return kvm;
10744
 
 }
10745
 
 
10746
 
-static int kvm_resume(struct sys_device *dev)
10747
 
+static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
10748
 
 {
10749
 
-       hardware_enable(NULL);
10750
 
-       return 0;
10751
 
+       vcpu_load(vcpu);
10752
 
+       kvm_mmu_unload(vcpu);
10753
 
+       vcpu_put(vcpu);
10754
 
 }
10755
 
 
10756
 
-static struct sysdev_class kvm_sysdev_class = {
10757
 
-       set_kset_name("kvm"),
10758
 
-       .suspend = kvm_suspend,
10759
 
-       .resume = kvm_resume,
10760
 
-};
10761
 
-
10762
 
-static struct sys_device kvm_sysdev = {
10763
 
-       .id = 0,
10764
 
-       .cls = &kvm_sysdev_class,
10765
 
-};
10766
 
-
10767
 
-hpa_t bad_page_address;
10768
 
-
10769
 
-static inline
10770
 
-struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
10771
 
+static void kvm_free_vcpus(struct kvm *kvm)
10772
 
 {
10773
 
-       return container_of(pn, struct kvm_vcpu, preempt_notifier);
10774
 
-}
10775
 
+       unsigned int i;
10776
 
 
10777
 
-static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
10778
 
-{
10779
 
-       struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
10780
 
+       /*
10781
 
+        * Unpin any mmu pages first.
10782
 
+        */
10783
 
+       for (i = 0; i < KVM_MAX_VCPUS; ++i)
10784
 
+               if (kvm->vcpus[i])
10785
 
+                       kvm_unload_vcpu_mmu(kvm->vcpus[i]);
10786
 
+       for (i = 0; i < KVM_MAX_VCPUS; ++i) {
10787
 
+               if (kvm->vcpus[i]) {
10788
 
+                       kvm_arch_vcpu_free(kvm->vcpus[i]);
10789
 
+                       kvm->vcpus[i] = NULL;
10790
 
+               }
10791
 
+       }
10792
 
 
10793
 
-       kvm_x86_ops->vcpu_load(vcpu, cpu);
10794
 
 }
10795
 
 
10796
 
-static void kvm_sched_out(struct preempt_notifier *pn,
10797
 
-                         struct task_struct *next)
10798
 
+void kvm_arch_destroy_vm(struct kvm *kvm)
10799
 
 {
10800
 
-       struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
10801
 
-
10802
 
-       kvm_x86_ops->vcpu_put(vcpu);
10803
 
+       kfree(kvm->arch.vpic);
10804
 
+       kfree(kvm->arch.vioapic);
10805
 
+       kvm_free_vcpus(kvm);
10806
 
+       kvm_free_physmem(kvm);
10807
 
+       kfree(kvm);
10808
 
 }
10809
 
 
10810
 
-int kvm_init_x86(struct kvm_x86_ops *ops, unsigned int vcpu_size,
10811
 
-                 struct module *module)
10812
 
+int kvm_arch_set_memory_region(struct kvm *kvm,
10813
 
+                               struct kvm_userspace_memory_region *mem,
10814
 
+                               struct kvm_memory_slot old,
10815
 
+                               int user_alloc)
10816
 
 {
10817
 
-       int r;
10818
 
-       int cpu;
10819
 
-
10820
 
-       if (kvm_x86_ops) {
10821
 
-               printk(KERN_ERR "kvm: already loaded the other module\n");
10822
 
-               return -EEXIST;
10823
 
-       }
10824
 
-
10825
 
-       if (!ops->cpu_has_kvm_support()) {
10826
 
-               printk(KERN_ERR "kvm: no hardware support\n");
10827
 
-               return -EOPNOTSUPP;
10828
 
-       }
10829
 
-       if (ops->disabled_by_bios()) {
10830
 
-               printk(KERN_ERR "kvm: disabled by bios\n");
10831
 
-               return -EOPNOTSUPP;
10832
 
-       }
10833
 
-
10834
 
-       kvm_x86_ops = ops;
10835
 
-
10836
 
-       r = kvm_x86_ops->hardware_setup();
10837
 
-       if (r < 0)
10838
 
-               goto out;
10839
 
+       int npages = mem->memory_size >> PAGE_SHIFT;
10840
 
+       struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot];
10841
 
 
10842
 
-       for_each_online_cpu(cpu) {
10843
 
-               smp_call_function_single(cpu,
10844
 
-                               kvm_x86_ops->check_processor_compatibility,
10845
 
-                               &r, 0, 1);
10846
 
-               if (r < 0)
10847
 
-                       goto out_free_0;
10848
 
-       }
10849
 
-
10850
 
-       on_each_cpu(hardware_enable, NULL, 0, 1);
10851
 
-       r = register_cpu_notifier(&kvm_cpu_notifier);
10852
 
-       if (r)
10853
 
-               goto out_free_1;
10854
 
-       register_reboot_notifier(&kvm_reboot_notifier);
10855
 
-
10856
 
-       r = sysdev_class_register(&kvm_sysdev_class);
10857
 
-       if (r)
10858
 
-               goto out_free_2;
10859
 
-
10860
 
-       r = sysdev_register(&kvm_sysdev);
10861
 
-       if (r)
10862
 
-               goto out_free_3;
10863
 
-
10864
 
-       /* A kmem cache lets us meet the alignment requirements of fx_save. */
10865
 
-       kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size,
10866
 
-                                          __alignof__(struct kvm_vcpu), 0, 0);
10867
 
-       if (!kvm_vcpu_cache) {
10868
 
-               r = -ENOMEM;
10869
 
-               goto out_free_4;
10870
 
+       /*To keep backward compatibility with older userspace,
10871
 
+        *x86 needs to hanlde !user_alloc case.
10872
 
+        */
10873
 
+       if (!user_alloc) {
10874
 
+               if (npages && !old.rmap) {
10875
 
+                       down_write(&current->mm->mmap_sem);
10876
 
+                       memslot->userspace_addr = do_mmap(NULL, 0,
10877
 
+                                                    npages * PAGE_SIZE,
10878
 
+                                                    PROT_READ | PROT_WRITE,
10879
 
+                                                    MAP_SHARED | MAP_ANONYMOUS,
10880
 
+                                                    0);
10881
 
+                       up_write(&current->mm->mmap_sem);
10882
 
+
10883
 
+                       if (IS_ERR((void *)memslot->userspace_addr))
10884
 
+                               return PTR_ERR((void *)memslot->userspace_addr);
10885
 
+               } else {
10886
 
+                       if (!old.user_alloc && old.rmap) {
10887
 
+                               int ret;
10888
 
+
10889
 
+                               down_write(&current->mm->mmap_sem);
10890
 
+                               ret = do_munmap(current->mm, old.userspace_addr,
10891
 
+                                               old.npages * PAGE_SIZE);
10892
 
+                               up_write(&current->mm->mmap_sem);
10893
 
+                               if (ret < 0)
10894
 
+                                       printk(KERN_WARNING
10895
 
+                                      "kvm_vm_ioctl_set_memory_region: "
10896
 
+                                      "failed to munmap memory\n");
10897
 
+                       }
10898
 
+               }
10899
 
        }
10900
 
 
10901
 
-       kvm_chardev_ops.owner = module;
10902
 
-
10903
 
-       r = misc_register(&kvm_dev);
10904
 
-       if (r) {
10905
 
-               printk (KERN_ERR "kvm: misc device register failed\n");
10906
 
-               goto out_free;
10907
 
+       if (!kvm->arch.n_requested_mmu_pages) {
10908
 
+               unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
10909
 
+               kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
10910
 
        }
10911
 
 
10912
 
-       kvm_preempt_ops.sched_in = kvm_sched_in;
10913
 
-       kvm_preempt_ops.sched_out = kvm_sched_out;
10914
 
-
10915
 
-       return r;
10916
 
+       kvm_mmu_slot_remove_write_access(kvm, mem->slot);
10917
 
+       kvm_flush_remote_tlbs(kvm);
10918
 
 
10919
 
-out_free:
10920
 
-       kmem_cache_destroy(kvm_vcpu_cache);
10921
 
-out_free_4:
10922
 
-       sysdev_unregister(&kvm_sysdev);
10923
 
-out_free_3:
10924
 
-       sysdev_class_unregister(&kvm_sysdev_class);
10925
 
-out_free_2:
10926
 
-       unregister_reboot_notifier(&kvm_reboot_notifier);
10927
 
-       unregister_cpu_notifier(&kvm_cpu_notifier);
10928
 
-out_free_1:
10929
 
-       on_each_cpu(hardware_disable, NULL, 0, 1);
10930
 
-out_free_0:
10931
 
-       kvm_x86_ops->hardware_unsetup();
10932
 
-out:
10933
 
-       kvm_x86_ops = NULL;
10934
 
-       return r;
10935
 
+       return 0;
10936
 
 }
10937
 
 
10938
 
-void kvm_exit_x86(void)
10939
 
+int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
10940
 
 {
10941
 
-       misc_deregister(&kvm_dev);
10942
 
-       kmem_cache_destroy(kvm_vcpu_cache);
10943
 
-       sysdev_unregister(&kvm_sysdev);
10944
 
-       sysdev_class_unregister(&kvm_sysdev_class);
10945
 
-       unregister_reboot_notifier(&kvm_reboot_notifier);
10946
 
-       unregister_cpu_notifier(&kvm_cpu_notifier);
10947
 
-       on_each_cpu(hardware_disable, NULL, 0, 1);
10948
 
-       kvm_x86_ops->hardware_unsetup();
10949
 
-       kvm_x86_ops = NULL;
10950
 
+       return vcpu->arch.mp_state == VCPU_MP_STATE_RUNNABLE
10951
 
+              || vcpu->arch.mp_state == VCPU_MP_STATE_SIPI_RECEIVED;
10952
 
 }
10953
 
 
10954
 
-static __init int kvm_init(void)
10955
 
+static void vcpu_kick_intr(void *info)
10956
 
 {
10957
 
-       static struct page *bad_page;
10958
 
-       int r;
10959
 
-
10960
 
-       r = kvm_mmu_module_init();
10961
 
-       if (r)
10962
 
-               goto out4;
10963
 
-
10964
 
-       kvm_init_debug();
10965
 
-
10966
 
-       kvm_init_msr_list();
10967
 
-
10968
 
-       if ((bad_page = alloc_page(GFP_KERNEL)) == NULL) {
10969
 
-               r = -ENOMEM;
10970
 
-               goto out;
10971
 
-       }
10972
 
-
10973
 
-       bad_page_address = page_to_pfn(bad_page) << PAGE_SHIFT;
10974
 
-       memset(__va(bad_page_address), 0, PAGE_SIZE);
10975
 
-
10976
 
-       return 0;
10977
 
-
10978
 
-out:
10979
 
-       kvm_exit_debug();
10980
 
-       kvm_mmu_module_exit();
10981
 
-out4:
10982
 
-       return r;
10983
 
+#ifdef DEBUG
10984
 
+       struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info;
10985
 
+       printk(KERN_DEBUG "vcpu_kick_intr %p \n", vcpu);
10986
 
+#endif
10987
 
 }
10988
 
 
10989
 
-static __exit void kvm_exit(void)
10990
 
+void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
10991
 
 {
10992
 
-       kvm_exit_debug();
10993
 
-       __free_page(pfn_to_page(bad_page_address >> PAGE_SHIFT));
10994
 
-       kvm_mmu_module_exit();
10995
 
-}
10996
 
-
10997
 
-module_init(kvm_init)
10998
 
-module_exit(kvm_exit)
10999
 
+       int ipi_pcpu = vcpu->cpu;
11000
 
 
11001
 
-EXPORT_SYMBOL_GPL(kvm_init_x86);
11002
 
-EXPORT_SYMBOL_GPL(kvm_exit_x86);
11003
 
+       if (waitqueue_active(&vcpu->wq)) {
11004
 
+               wake_up_interruptible(&vcpu->wq);
11005
 
+               ++vcpu->stat.halt_wakeup;
11006
 
+       }
11007
 
+       if (vcpu->guest_mode)
11008
 
+               smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0);
11009
 
+}
11010
 
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
11011
 
new file mode 100644
11012
 
index 0000000..b2f6f0c
11013
 
--- /dev/null
11014
 
+++ b/arch/x86/kvm/x86_emulate.c
11015
 
@@ -0,0 +1,1924 @@
11016
 
+/******************************************************************************
11017
 
+ * x86_emulate.c
11018
 
+ *
11019
 
+ * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
11020
 
+ *
11021
 
+ * Copyright (c) 2005 Keir Fraser
11022
 
+ *
11023
 
+ * Linux coding style, mod r/m decoder, segment base fixes, real-mode
11024
 
+ * privileged instructions:
11025
 
+ *
11026
 
+ * Copyright (C) 2006 Qumranet
11027
 
+ *
11028
 
+ *   Avi Kivity <avi@qumranet.com>
11029
 
+ *   Yaniv Kamay <yaniv@qumranet.com>
11030
 
+ *
11031
 
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
11032
 
+ * the COPYING file in the top-level directory.
11033
 
+ *
11034
 
+ * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
11035
 
+ */
11036
 
+
11037
 
+#ifndef __KERNEL__
11038
 
+#include <stdio.h>
11039
 
+#include <stdint.h>
11040
 
+#include <public/xen.h>
11041
 
+#define DPRINTF(_f, _a ...) printf(_f , ## _a)
11042
 
+#else
11043
 
+#include <linux/kvm_host.h>
11044
 
+#define DPRINTF(x...) do {} while (0)
11045
 
+#endif
11046
 
+#include <linux/module.h>
11047
 
+#include <asm/kvm_x86_emulate.h>
11048
 
+
11049
 
+/*
11050
 
+ * Opcode effective-address decode tables.
11051
 
+ * Note that we only emulate instructions that have at least one memory
11052
 
+ * operand (excluding implicit stack references). We assume that stack
11053
 
+ * references and instruction fetches will never occur in special memory
11054
 
+ * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
11055
 
+ * not be handled.
11056
 
+ */
11057
 
+
11058
 
+/* Operand sizes: 8-bit operands or specified/overridden size. */
11059
 
+#define ByteOp      (1<<0)     /* 8-bit operands. */
11060
 
+/* Destination operand type. */
11061
 
+#define ImplicitOps (1<<1)     /* Implicit in opcode. No generic decode. */
11062
 
+#define DstReg      (2<<1)     /* Register operand. */
11063
 
+#define DstMem      (3<<1)     /* Memory operand. */
11064
 
+#define DstMask     (3<<1)
11065
 
+/* Source operand type. */
11066
 
+#define SrcNone     (0<<3)     /* No source operand. */
11067
 
+#define SrcImplicit (0<<3)     /* Source operand is implicit in the opcode. */
11068
 
+#define SrcReg      (1<<3)     /* Register operand. */
11069
 
+#define SrcMem      (2<<3)     /* Memory operand. */
11070
 
+#define SrcMem16    (3<<3)     /* Memory operand (16-bit). */
11071
 
+#define SrcMem32    (4<<3)     /* Memory operand (32-bit). */
11072
 
+#define SrcImm      (5<<3)     /* Immediate operand. */
11073
 
+#define SrcImmByte  (6<<3)     /* 8-bit sign-extended immediate operand. */
11074
 
+#define SrcMask     (7<<3)
11075
 
+/* Generic ModRM decode. */
11076
 
+#define ModRM       (1<<6)
11077
 
+/* Destination is only written; never read. */
11078
 
+#define Mov         (1<<7)
11079
 
+#define BitOp       (1<<8)
11080
 
+#define MemAbs      (1<<9)      /* Memory operand is absolute displacement */
11081
 
+#define String      (1<<10)     /* String instruction (rep capable) */
11082
 
+#define Stack       (1<<11)     /* Stack instruction (push/pop) */
11083
 
+
11084
 
+static u16 opcode_table[256] = {
11085
 
+       /* 0x00 - 0x07 */
11086
 
+       ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
11087
 
+       ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
11088
 
+       0, 0, 0, 0,
11089
 
+       /* 0x08 - 0x0F */
11090
 
+       ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
11091
 
+       ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
11092
 
+       0, 0, 0, 0,
11093
 
+       /* 0x10 - 0x17 */
11094
 
+       ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
11095
 
+       ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
11096
 
+       0, 0, 0, 0,
11097
 
+       /* 0x18 - 0x1F */
11098
 
+       ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
11099
 
+       ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
11100
 
+       0, 0, 0, 0,
11101
 
+       /* 0x20 - 0x27 */
11102
 
+       ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
11103
 
+       ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
11104
 
+       SrcImmByte, SrcImm, 0, 0,
11105
 
+       /* 0x28 - 0x2F */
11106
 
+       ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
11107
 
+       ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
11108
 
+       0, 0, 0, 0,
11109
 
+       /* 0x30 - 0x37 */
11110
 
+       ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
11111
 
+       ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
11112
 
+       0, 0, 0, 0,
11113
 
+       /* 0x38 - 0x3F */
11114
 
+       ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
11115
 
+       ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
11116
 
+       0, 0, 0, 0,
11117
 
+       /* 0x40 - 0x47 */
11118
 
+       DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
11119
 
+       /* 0x48 - 0x4F */
11120
 
+       DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
11121
 
+       /* 0x50 - 0x57 */
11122
 
+       SrcReg | Stack, SrcReg | Stack, SrcReg | Stack, SrcReg | Stack,
11123
 
+       SrcReg | Stack, SrcReg | Stack, SrcReg | Stack, SrcReg | Stack,
11124
 
+       /* 0x58 - 0x5F */
11125
 
+       DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack,
11126
 
+       DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack,
11127
 
+       /* 0x60 - 0x67 */
11128
 
+       0, 0, 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ ,
11129
 
+       0, 0, 0, 0,
11130
 
+       /* 0x68 - 0x6F */
11131
 
+       0, 0, ImplicitOps | Mov | Stack, 0,
11132
 
+       SrcNone  | ByteOp  | ImplicitOps, SrcNone  | ImplicitOps, /* insb, insw/insd */
11133
 
+       SrcNone  | ByteOp  | ImplicitOps, SrcNone  | ImplicitOps, /* outsb, outsw/outsd */
11134
 
+       /* 0x70 - 0x77 */
11135
 
+       ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
11136
 
+       ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
11137
 
+       /* 0x78 - 0x7F */
11138
 
+       ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
11139
 
+       ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
11140
 
+       /* 0x80 - 0x87 */
11141
 
+       ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
11142
 
+       ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM,
11143
 
+       ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
11144
 
+       ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
11145
 
+       /* 0x88 - 0x8F */
11146
 
+       ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov,
11147
 
+       ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
11148
 
+       0, ModRM | DstReg, 0, DstMem | SrcNone | ModRM | Mov | Stack,
11149
 
+       /* 0x90 - 0x9F */
11150
 
+       0, 0, 0, 0, 0, 0, 0, 0,
11151
 
+       0, 0, 0, 0, ImplicitOps | Stack, ImplicitOps | Stack, 0, 0,
11152
 
+       /* 0xA0 - 0xA7 */
11153
 
+       ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs,
11154
 
+       ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs,
11155
 
+       ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String,
11156
 
+       ByteOp | ImplicitOps | String, ImplicitOps | String,
11157
 
+       /* 0xA8 - 0xAF */
11158
 
+       0, 0, ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String,
11159
 
+       ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String,
11160
 
+       ByteOp | ImplicitOps | String, ImplicitOps | String,
11161
 
+       /* 0xB0 - 0xBF */
11162
 
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
11163
 
+       /* 0xC0 - 0xC7 */
11164
 
+       ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM,
11165
 
+       0, ImplicitOps | Stack, 0, 0,
11166
 
+       ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov,
11167
 
+       /* 0xC8 - 0xCF */
11168
 
+       0, 0, 0, 0, 0, 0, 0, 0,
11169
 
+       /* 0xD0 - 0xD7 */
11170
 
+       ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
11171
 
+       ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
11172
 
+       0, 0, 0, 0,
11173
 
+       /* 0xD8 - 0xDF */
11174
 
+       0, 0, 0, 0, 0, 0, 0, 0,
11175
 
+       /* 0xE0 - 0xE7 */
11176
 
+       0, 0, 0, 0, 0, 0, 0, 0,
11177
 
+       /* 0xE8 - 0xEF */
11178
 
+       ImplicitOps | Stack, SrcImm|ImplicitOps, 0, SrcImmByte|ImplicitOps,
11179
 
+       0, 0, 0, 0,
11180
 
+       /* 0xF0 - 0xF7 */
11181
 
+       0, 0, 0, 0,
11182
 
+       ImplicitOps, ImplicitOps,
11183
 
+       ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
11184
 
+       /* 0xF8 - 0xFF */
11185
 
+       ImplicitOps, 0, ImplicitOps, ImplicitOps,
11186
 
+       0, 0, ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM
11187
 
+};
11188
 
+
11189
 
+static u16 twobyte_table[256] = {
11190
 
+       /* 0x00 - 0x0F */
11191
 
+       0, SrcMem | ModRM | DstReg, 0, 0, 0, 0, ImplicitOps, 0,
11192
 
+       ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0,
11193
 
+       /* 0x10 - 0x1F */
11194
 
+       0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0,
11195
 
+       /* 0x20 - 0x2F */
11196
 
+       ModRM | ImplicitOps, ModRM, ModRM | ImplicitOps, ModRM, 0, 0, 0, 0,
11197
 
+       0, 0, 0, 0, 0, 0, 0, 0,
11198
 
+       /* 0x30 - 0x3F */
11199
 
+       ImplicitOps, 0, ImplicitOps, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
11200
 
+       /* 0x40 - 0x47 */
11201
 
+       DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
11202
 
+       DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
11203
 
+       DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
11204
 
+       DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
11205
 
+       /* 0x48 - 0x4F */
11206
 
+       DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
11207
 
+       DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
11208
 
+       DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
11209
 
+       DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
11210
 
+       /* 0x50 - 0x5F */
11211
 
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
11212
 
+       /* 0x60 - 0x6F */
11213
 
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
11214
 
+       /* 0x70 - 0x7F */
11215
 
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
11216
 
+       /* 0x80 - 0x8F */
11217
 
+       ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
11218
 
+       ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
11219
 
+       ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
11220
 
+       ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
11221
 
+       /* 0x90 - 0x9F */
11222
 
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
11223
 
+       /* 0xA0 - 0xA7 */
11224
 
+       0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0,
11225
 
+       /* 0xA8 - 0xAF */
11226
 
+       0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0,
11227
 
+       /* 0xB0 - 0xB7 */
11228
 
+       ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0,
11229
 
+           DstMem | SrcReg | ModRM | BitOp,
11230
 
+       0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
11231
 
+           DstReg | SrcMem16 | ModRM | Mov,
11232
 
+       /* 0xB8 - 0xBF */
11233
 
+       0, 0, DstMem | SrcImmByte | ModRM, DstMem | SrcReg | ModRM | BitOp,
11234
 
+       0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
11235
 
+           DstReg | SrcMem16 | ModRM | Mov,
11236
 
+       /* 0xC0 - 0xCF */
11237
 
+       0, 0, 0, DstMem | SrcReg | ModRM | Mov, 0, 0, 0, ImplicitOps | ModRM,
11238
 
+       0, 0, 0, 0, 0, 0, 0, 0,
11239
 
+       /* 0xD0 - 0xDF */
11240
 
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
11241
 
+       /* 0xE0 - 0xEF */
11242
 
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
11243
 
+       /* 0xF0 - 0xFF */
11244
 
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
11245
 
+};
11246
 
+
11247
 
+/* EFLAGS bit definitions. */
11248
 
+#define EFLG_OF (1<<11)
11249
 
+#define EFLG_DF (1<<10)
11250
 
+#define EFLG_SF (1<<7)
11251
 
+#define EFLG_ZF (1<<6)
11252
 
+#define EFLG_AF (1<<4)
11253
 
+#define EFLG_PF (1<<2)
11254
 
+#define EFLG_CF (1<<0)
11255
 
+
11256
 
+/*
11257
 
+ * Instruction emulation:
11258
 
+ * Most instructions are emulated directly via a fragment of inline assembly
11259
 
+ * code. This allows us to save/restore EFLAGS and thus very easily pick up
11260
 
+ * any modified flags.
11261
 
+ */
11262
 
+
11263
 
+#if defined(CONFIG_X86_64)
11264
 
+#define _LO32 "k"              /* force 32-bit operand */
11265
 
+#define _STK  "%%rsp"          /* stack pointer */
11266
 
+#elif defined(__i386__)
11267
 
+#define _LO32 ""               /* force 32-bit operand */
11268
 
+#define _STK  "%%esp"          /* stack pointer */
11269
 
+#endif
11270
 
+
11271
 
+/*
11272
 
+ * These EFLAGS bits are restored from saved value during emulation, and
11273
 
+ * any changes are written back to the saved value after emulation.
11274
 
+ */
11275
 
+#define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF)
11276
 
+
11277
 
+/* Before executing instruction: restore necessary bits in EFLAGS. */
11278
 
+#define _PRE_EFLAGS(_sav, _msk, _tmp)                                  \
11279
 
+       /* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); _sav &= ~_msk; */ \
11280
 
+       "movl %"_sav",%"_LO32 _tmp"; "                                  \
11281
 
+       "push %"_tmp"; "                                                \
11282
 
+       "push %"_tmp"; "                                                \
11283
 
+       "movl %"_msk",%"_LO32 _tmp"; "                                  \
11284
 
+       "andl %"_LO32 _tmp",("_STK"); "                                 \
11285
 
+       "pushf; "                                                       \
11286
 
+       "notl %"_LO32 _tmp"; "                                          \
11287
 
+       "andl %"_LO32 _tmp",("_STK"); "                                 \
11288
 
+       "andl %"_LO32 _tmp","__stringify(BITS_PER_LONG/4)"("_STK"); "   \
11289
 
+       "pop  %"_tmp"; "                                                \
11290
 
+       "orl  %"_LO32 _tmp",("_STK"); "                                 \
11291
 
+       "popf; "                                                        \
11292
 
+       "pop  %"_sav"; "
11293
 
+
11294
 
+/* After executing instruction: write-back necessary bits in EFLAGS. */
11295
 
+#define _POST_EFLAGS(_sav, _msk, _tmp) \
11296
 
+       /* _sav |= EFLAGS & _msk; */            \
11297
 
+       "pushf; "                               \
11298
 
+       "pop  %"_tmp"; "                        \
11299
 
+       "andl %"_msk",%"_LO32 _tmp"; "          \
11300
 
+       "orl  %"_LO32 _tmp",%"_sav"; "
11301
 
+
11302
 
+/* Raw emulation: instruction has two explicit operands. */
11303
 
+#define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \
11304
 
+       do {                                                                \
11305
 
+               unsigned long _tmp;                                         \
11306
 
+                                                                           \
11307
 
+               switch ((_dst).bytes) {                                     \
11308
 
+               case 2:                                                     \
11309
 
+                       __asm__ __volatile__ (                              \
11310
 
+                               _PRE_EFLAGS("0", "4", "2")                  \
11311
 
+                               _op"w %"_wx"3,%1; "                         \
11312
 
+                               _POST_EFLAGS("0", "4", "2")                 \
11313
 
+                               : "=m" (_eflags), "=m" ((_dst).val),        \
11314
 
+                                 "=&r" (_tmp)                              \
11315
 
+                               : _wy ((_src).val), "i" (EFLAGS_MASK));     \
11316
 
+                       break;                                              \
11317
 
+               case 4:                                                     \
11318
 
+                       __asm__ __volatile__ (                              \
11319
 
+                               _PRE_EFLAGS("0", "4", "2")                  \
11320
 
+                               _op"l %"_lx"3,%1; "                         \
11321
 
+                               _POST_EFLAGS("0", "4", "2")                 \
11322
 
+                               : "=m" (_eflags), "=m" ((_dst).val),        \
11323
 
+                                 "=&r" (_tmp)                              \
11324
 
+                               : _ly ((_src).val), "i" (EFLAGS_MASK));     \
11325
 
+                       break;                                              \
11326
 
+               case 8:                                                     \
11327
 
+                       __emulate_2op_8byte(_op, _src, _dst,                \
11328
 
+                                           _eflags, _qx, _qy);             \
11329
 
+                       break;                                              \
11330
 
+               }                                                           \
11331
 
+       } while (0)
11332
 
+
11333
 
+#define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \
11334
 
+       do {                                                                 \
11335
 
+               unsigned long _tmp;                                          \
11336
 
+               switch ((_dst).bytes) {                                      \
11337
 
+               case 1:                                                      \
11338
 
+                       __asm__ __volatile__ (                               \
11339
 
+                               _PRE_EFLAGS("0", "4", "2")                   \
11340
 
+                               _op"b %"_bx"3,%1; "                          \
11341
 
+                               _POST_EFLAGS("0", "4", "2")                  \
11342
 
+                               : "=m" (_eflags), "=m" ((_dst).val),         \
11343
 
+                                 "=&r" (_tmp)                               \
11344
 
+                               : _by ((_src).val), "i" (EFLAGS_MASK));      \
11345
 
+                       break;                                               \
11346
 
+               default:                                                     \
11347
 
+                       __emulate_2op_nobyte(_op, _src, _dst, _eflags,       \
11348
 
+                                            _wx, _wy, _lx, _ly, _qx, _qy);  \
11349
 
+                       break;                                               \
11350
 
+               }                                                            \
11351
 
+       } while (0)
11352
 
+
11353
 
+/* Source operand is byte-sized and may be restricted to just %cl. */
11354
 
+#define emulate_2op_SrcB(_op, _src, _dst, _eflags)                      \
11355
 
+       __emulate_2op(_op, _src, _dst, _eflags,                         \
11356
 
+                     "b", "c", "b", "c", "b", "c", "b", "c")
11357
 
+
11358
 
+/* Source operand is byte, word, long or quad sized. */
11359
 
+#define emulate_2op_SrcV(_op, _src, _dst, _eflags)                      \
11360
 
+       __emulate_2op(_op, _src, _dst, _eflags,                         \
11361
 
+                     "b", "q", "w", "r", _LO32, "r", "", "r")
11362
 
+
11363
 
+/* Source operand is word, long or quad sized. */
11364
 
+#define emulate_2op_SrcV_nobyte(_op, _src, _dst, _eflags)               \
11365
 
+       __emulate_2op_nobyte(_op, _src, _dst, _eflags,                  \
11366
 
+                            "w", "r", _LO32, "r", "", "r")
11367
 
+
11368
 
+/* Instruction has only one explicit operand (no source operand). */
11369
 
+#define emulate_1op(_op, _dst, _eflags)                                    \
11370
 
+       do {                                                            \
11371
 
+               unsigned long _tmp;                                     \
11372
 
+                                                                       \
11373
 
+               switch ((_dst).bytes) {                                 \
11374
 
+               case 1:                                                 \
11375
 
+                       __asm__ __volatile__ (                          \
11376
 
+                               _PRE_EFLAGS("0", "3", "2")              \
11377
 
+                               _op"b %1; "                             \
11378
 
+                               _POST_EFLAGS("0", "3", "2")             \
11379
 
+                               : "=m" (_eflags), "=m" ((_dst).val),    \
11380
 
+                                 "=&r" (_tmp)                          \
11381
 
+                               : "i" (EFLAGS_MASK));                   \
11382
 
+                       break;                                          \
11383
 
+               case 2:                                                 \
11384
 
+                       __asm__ __volatile__ (                          \
11385
 
+                               _PRE_EFLAGS("0", "3", "2")              \
11386
 
+                               _op"w %1; "                             \
11387
 
+                               _POST_EFLAGS("0", "3", "2")             \
11388
 
+                               : "=m" (_eflags), "=m" ((_dst).val),    \
11389
 
+                                 "=&r" (_tmp)                          \
11390
 
+                               : "i" (EFLAGS_MASK));                   \
11391
 
+                       break;                                          \
11392
 
+               case 4:                                                 \
11393
 
+                       __asm__ __volatile__ (                          \
11394
 
+                               _PRE_EFLAGS("0", "3", "2")              \
11395
 
+                               _op"l %1; "                             \
11396
 
+                               _POST_EFLAGS("0", "3", "2")             \
11397
 
+                               : "=m" (_eflags), "=m" ((_dst).val),    \
11398
 
+                                 "=&r" (_tmp)                          \
11399
 
+                               : "i" (EFLAGS_MASK));                   \
11400
 
+                       break;                                          \
11401
 
+               case 8:                                                 \
11402
 
+                       __emulate_1op_8byte(_op, _dst, _eflags);        \
11403
 
+                       break;                                          \
11404
 
+               }                                                       \
11405
 
+       } while (0)
11406
 
+
11407
 
+/* Emulate an instruction with quadword operands (x86/64 only). */
11408
 
+#if defined(CONFIG_X86_64)
11409
 
+#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy)           \
11410
 
+       do {                                                              \
11411
 
+               __asm__ __volatile__ (                                    \
11412
 
+                       _PRE_EFLAGS("0", "4", "2")                        \
11413
 
+                       _op"q %"_qx"3,%1; "                               \
11414
 
+                       _POST_EFLAGS("0", "4", "2")                       \
11415
 
+                       : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
11416
 
+                       : _qy ((_src).val), "i" (EFLAGS_MASK));         \
11417
 
+       } while (0)
11418
 
+
11419
 
+#define __emulate_1op_8byte(_op, _dst, _eflags)                           \
11420
 
+       do {                                                              \
11421
 
+               __asm__ __volatile__ (                                    \
11422
 
+                       _PRE_EFLAGS("0", "3", "2")                        \
11423
 
+                       _op"q %1; "                                       \
11424
 
+                       _POST_EFLAGS("0", "3", "2")                       \
11425
 
+                       : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
11426
 
+                       : "i" (EFLAGS_MASK));                             \
11427
 
+       } while (0)
11428
 
+
11429
 
+#elif defined(__i386__)
11430
 
+#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy)
11431
 
+#define __emulate_1op_8byte(_op, _dst, _eflags)
11432
 
+#endif                         /* __i386__ */
11433
 
+
11434
 
+/* Fetch next part of the instruction being emulated. */
11435
 
+#define insn_fetch(_type, _size, _eip)                                  \
11436
 
+({     unsigned long _x;                                               \
11437
 
+       rc = do_insn_fetch(ctxt, ops, (_eip), &_x, (_size));            \
11438
 
+       if (rc != 0)                                                    \
11439
 
+               goto done;                                              \
11440
 
+       (_eip) += (_size);                                              \
11441
 
+       (_type)_x;                                                      \
11442
 
+})
11443
 
+
11444
 
+/* Access/update address held in a register, based on addressing mode. */
11445
 
+#define address_mask(reg)                                              \
11446
 
+       ((c->ad_bytes == sizeof(unsigned long)) ?                       \
11447
 
+               (reg) : ((reg) & ((1UL << (c->ad_bytes << 3)) - 1)))
11448
 
+#define register_address(base, reg)                                     \
11449
 
+       ((base) + address_mask(reg))
11450
 
+#define register_address_increment(reg, inc)                            \
11451
 
+       do {                                                            \
11452
 
+               /* signed type ensures sign extension to long */        \
11453
 
+               int _inc = (inc);                                       \
11454
 
+               if (c->ad_bytes == sizeof(unsigned long))               \
11455
 
+                       (reg) += _inc;                                  \
11456
 
+               else                                                    \
11457
 
+                       (reg) = ((reg) &                                \
11458
 
+                                ~((1UL << (c->ad_bytes << 3)) - 1)) |  \
11459
 
+                               (((reg) + _inc) &                       \
11460
 
+                                ((1UL << (c->ad_bytes << 3)) - 1));    \
11461
 
+       } while (0)
11462
 
+
11463
 
+#define JMP_REL(rel)                                                   \
11464
 
+       do {                                                            \
11465
 
+               register_address_increment(c->eip, rel);                \
11466
 
+       } while (0)
11467
 
+
11468
 
+static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
11469
 
+                             struct x86_emulate_ops *ops,
11470
 
+                             unsigned long linear, u8 *dest)
11471
 
+{
11472
 
+       struct fetch_cache *fc = &ctxt->decode.fetch;
11473
 
+       int rc;
11474
 
+       int size;
11475
 
+
11476
 
+       if (linear < fc->start || linear >= fc->end) {
11477
 
+               size = min(15UL, PAGE_SIZE - offset_in_page(linear));
11478
 
+               rc = ops->read_std(linear, fc->data, size, ctxt->vcpu);
11479
 
+               if (rc)
11480
 
+                       return rc;
11481
 
+               fc->start = linear;
11482
 
+               fc->end = linear + size;
11483
 
+       }
11484
 
+       *dest = fc->data[linear - fc->start];
11485
 
+       return 0;
11486
 
+}
11487
 
+
11488
 
+static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
11489
 
+                        struct x86_emulate_ops *ops,
11490
 
+                        unsigned long eip, void *dest, unsigned size)
11491
 
+{
11492
 
+       int rc = 0;
11493
 
+
11494
 
+       eip += ctxt->cs_base;
11495
 
+       while (size--) {
11496
 
+               rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++);
11497
 
+               if (rc)
11498
 
+                       return rc;
11499
 
+       }
11500
 
+       return 0;
11501
 
+}
11502
 
+
11503
 
+/*
11504
 
+ * Given the 'reg' portion of a ModRM byte, and a register block, return a
11505
 
+ * pointer into the block that addresses the relevant register.
11506
 
+ * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
11507
 
+ */
11508
 
+static void *decode_register(u8 modrm_reg, unsigned long *regs,
11509
 
+                            int highbyte_regs)
11510
 
+{
11511
 
+       void *p;
11512
 
+
11513
 
+       p = &regs[modrm_reg];
11514
 
+       if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
11515
 
+               p = (unsigned char *)&regs[modrm_reg & 3] + 1;
11516
 
+       return p;
11517
 
+}
11518
 
+
11519
 
+static int read_descriptor(struct x86_emulate_ctxt *ctxt,
11520
 
+                          struct x86_emulate_ops *ops,
11521
 
+                          void *ptr,
11522
 
+                          u16 *size, unsigned long *address, int op_bytes)
11523
 
+{
11524
 
+       int rc;
11525
 
+
11526
 
+       if (op_bytes == 2)
11527
 
+               op_bytes = 3;
11528
 
+       *address = 0;
11529
 
+       rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2,
11530
 
+                          ctxt->vcpu);
11531
 
+       if (rc)
11532
 
+               return rc;
11533
 
+       rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes,
11534
 
+                          ctxt->vcpu);
11535
 
+       return rc;
11536
 
+}
11537
 
+
11538
 
+static int test_cc(unsigned int condition, unsigned int flags)
11539
 
+{
11540
 
+       int rc = 0;
11541
 
+
11542
 
+       switch ((condition & 15) >> 1) {
11543
 
+       case 0: /* o */
11544
 
+               rc |= (flags & EFLG_OF);
11545
 
+               break;
11546
 
+       case 1: /* b/c/nae */
11547
 
+               rc |= (flags & EFLG_CF);
11548
 
+               break;
11549
 
+       case 2: /* z/e */
11550
 
+               rc |= (flags & EFLG_ZF);
11551
 
+               break;
11552
 
+       case 3: /* be/na */
11553
 
+               rc |= (flags & (EFLG_CF|EFLG_ZF));
11554
 
+               break;
11555
 
+       case 4: /* s */
11556
 
+               rc |= (flags & EFLG_SF);
11557
 
+               break;
11558
 
+       case 5: /* p/pe */
11559
 
+               rc |= (flags & EFLG_PF);
11560
 
+               break;
11561
 
+       case 7: /* le/ng */
11562
 
+               rc |= (flags & EFLG_ZF);
11563
 
+               /* fall through */
11564
 
+       case 6: /* l/nge */
11565
 
+               rc |= (!(flags & EFLG_SF) != !(flags & EFLG_OF));
11566
 
+               break;
11567
 
+       }
11568
 
+
11569
 
+       /* Odd condition identifiers (lsb == 1) have inverted sense. */
11570
 
+       return (!!rc ^ (condition & 1));
11571
 
+}
11572
 
+
11573
 
+static void decode_register_operand(struct operand *op,
11574
 
+                                   struct decode_cache *c,
11575
 
+                                   int inhibit_bytereg)
11576
 
+{
11577
 
+       unsigned reg = c->modrm_reg;
11578
 
+       int highbyte_regs = c->rex_prefix == 0;
11579
 
+
11580
 
+       if (!(c->d & ModRM))
11581
 
+               reg = (c->b & 7) | ((c->rex_prefix & 1) << 3);
11582
 
+       op->type = OP_REG;
11583
 
+       if ((c->d & ByteOp) && !inhibit_bytereg) {
11584
 
+               op->ptr = decode_register(reg, c->regs, highbyte_regs);
11585
 
+               op->val = *(u8 *)op->ptr;
11586
 
+               op->bytes = 1;
11587
 
+       } else {
11588
 
+               op->ptr = decode_register(reg, c->regs, 0);
11589
 
+               op->bytes = c->op_bytes;
11590
 
+               switch (op->bytes) {
11591
 
+               case 2:
11592
 
+                       op->val = *(u16 *)op->ptr;
11593
 
+                       break;
11594
 
+               case 4:
11595
 
+                       op->val = *(u32 *)op->ptr;
11596
 
+                       break;
11597
 
+               case 8:
11598
 
+                       op->val = *(u64 *) op->ptr;
11599
 
+                       break;
11600
 
+               }
11601
 
+       }
11602
 
+       op->orig_val = op->val;
11603
 
+}
11604
 
+
11605
 
+static int decode_modrm(struct x86_emulate_ctxt *ctxt,
11606
 
+                       struct x86_emulate_ops *ops)
11607
 
+{
11608
 
+       struct decode_cache *c = &ctxt->decode;
11609
 
+       u8 sib;
11610
 
+       int index_reg = 0, base_reg = 0, scale, rip_relative = 0;
11611
 
+       int rc = 0;
11612
 
+
11613
 
+       if (c->rex_prefix) {
11614
 
+               c->modrm_reg = (c->rex_prefix & 4) << 1;        /* REX.R */
11615
 
+               index_reg = (c->rex_prefix & 2) << 2; /* REX.X */
11616
 
+               c->modrm_rm = base_reg = (c->rex_prefix & 1) << 3; /* REG.B */
11617
 
+       }
11618
 
+
11619
 
+       c->modrm = insn_fetch(u8, 1, c->eip);
11620
 
+       c->modrm_mod |= (c->modrm & 0xc0) >> 6;
11621
 
+       c->modrm_reg |= (c->modrm & 0x38) >> 3;
11622
 
+       c->modrm_rm |= (c->modrm & 0x07);
11623
 
+       c->modrm_ea = 0;
11624
 
+       c->use_modrm_ea = 1;
11625
 
+
11626
 
+       if (c->modrm_mod == 3) {
11627
 
+               c->modrm_val = *(unsigned long *)
11628
 
+                       decode_register(c->modrm_rm, c->regs, c->d & ByteOp);
11629
 
+               return rc;
11630
 
+       }
11631
 
+
11632
 
+       if (c->ad_bytes == 2) {
11633
 
+               unsigned bx = c->regs[VCPU_REGS_RBX];
11634
 
+               unsigned bp = c->regs[VCPU_REGS_RBP];
11635
 
+               unsigned si = c->regs[VCPU_REGS_RSI];
11636
 
+               unsigned di = c->regs[VCPU_REGS_RDI];
11637
 
+
11638
 
+               /* 16-bit ModR/M decode. */
11639
 
+               switch (c->modrm_mod) {
11640
 
+               case 0:
11641
 
+                       if (c->modrm_rm == 6)
11642
 
+                               c->modrm_ea += insn_fetch(u16, 2, c->eip);
11643
 
+                       break;
11644
 
+               case 1:
11645
 
+                       c->modrm_ea += insn_fetch(s8, 1, c->eip);
11646
 
+                       break;
11647
 
+               case 2:
11648
 
+                       c->modrm_ea += insn_fetch(u16, 2, c->eip);
11649
 
+                       break;
11650
 
+               }
11651
 
+               switch (c->modrm_rm) {
11652
 
+               case 0:
11653
 
+                       c->modrm_ea += bx + si;
11654
 
+                       break;
11655
 
+               case 1:
11656
 
+                       c->modrm_ea += bx + di;
11657
 
+                       break;
11658
 
+               case 2:
11659
 
+                       c->modrm_ea += bp + si;
11660
 
+                       break;
11661
 
+               case 3:
11662
 
+                       c->modrm_ea += bp + di;
11663
 
+                       break;
11664
 
+               case 4:
11665
 
+                       c->modrm_ea += si;
11666
 
+                       break;
11667
 
+               case 5:
11668
 
+                       c->modrm_ea += di;
11669
 
+                       break;
11670
 
+               case 6:
11671
 
+                       if (c->modrm_mod != 0)
11672
 
+                               c->modrm_ea += bp;
11673
 
+                       break;
11674
 
+               case 7:
11675
 
+                       c->modrm_ea += bx;
11676
 
+                       break;
11677
 
+               }
11678
 
+               if (c->modrm_rm == 2 || c->modrm_rm == 3 ||
11679
 
+                   (c->modrm_rm == 6 && c->modrm_mod != 0))
11680
 
+                       if (!c->override_base)
11681
 
+                               c->override_base = &ctxt->ss_base;
11682
 
+               c->modrm_ea = (u16)c->modrm_ea;
11683
 
+       } else {
11684
 
+               /* 32/64-bit ModR/M decode. */
11685
 
+               switch (c->modrm_rm) {
11686
 
+               case 4:
11687
 
+               case 12:
11688
 
+                       sib = insn_fetch(u8, 1, c->eip);
11689
 
+                       index_reg |= (sib >> 3) & 7;
11690
 
+                       base_reg |= sib & 7;
11691
 
+                       scale = sib >> 6;
11692
 
+
11693
 
+                       switch (base_reg) {
11694
 
+                       case 5:
11695
 
+                               if (c->modrm_mod != 0)
11696
 
+                                       c->modrm_ea += c->regs[base_reg];
11697
 
+                               else
11698
 
+                                       c->modrm_ea +=
11699
 
+                                               insn_fetch(s32, 4, c->eip);
11700
 
+                               break;
11701
 
+                       default:
11702
 
+                               c->modrm_ea += c->regs[base_reg];
11703
 
+                       }
11704
 
+                       switch (index_reg) {
11705
 
+                       case 4:
11706
 
+                               break;
11707
 
+                       default:
11708
 
+                               c->modrm_ea += c->regs[index_reg] << scale;
11709
 
+                       }
11710
 
+                       break;
11711
 
+               case 5:
11712
 
+                       if (c->modrm_mod != 0)
11713
 
+                               c->modrm_ea += c->regs[c->modrm_rm];
11714
 
+                       else if (ctxt->mode == X86EMUL_MODE_PROT64)
11715
 
+                               rip_relative = 1;
11716
 
+                       break;
11717
 
+               default:
11718
 
+                       c->modrm_ea += c->regs[c->modrm_rm];
11719
 
+                       break;
11720
 
+               }
11721
 
+               switch (c->modrm_mod) {
11722
 
+               case 0:
11723
 
+                       if (c->modrm_rm == 5)
11724
 
+                               c->modrm_ea += insn_fetch(s32, 4, c->eip);
11725
 
+                       break;
11726
 
+               case 1:
11727
 
+                       c->modrm_ea += insn_fetch(s8, 1, c->eip);
11728
 
+                       break;
11729
 
+               case 2:
11730
 
+                       c->modrm_ea += insn_fetch(s32, 4, c->eip);
11731
 
+                       break;
11732
 
+               }
11733
 
+       }
11734
 
+       if (rip_relative) {
11735
 
+               c->modrm_ea += c->eip;
11736
 
+               switch (c->d & SrcMask) {
11737
 
+               case SrcImmByte:
11738
 
+                       c->modrm_ea += 1;
11739
 
+                       break;
11740
 
+               case SrcImm:
11741
 
+                       if (c->d & ByteOp)
11742
 
+                               c->modrm_ea += 1;
11743
 
+                       else
11744
 
+                               if (c->op_bytes == 8)
11745
 
+                                       c->modrm_ea += 4;
11746
 
+                               else
11747
 
+                                       c->modrm_ea += c->op_bytes;
11748
 
+               }
11749
 
+       }
11750
 
+done:
11751
 
+       return rc;
11752
 
+}
11753
 
+
11754
 
+static int decode_abs(struct x86_emulate_ctxt *ctxt,
11755
 
+                     struct x86_emulate_ops *ops)
11756
 
+{
11757
 
+       struct decode_cache *c = &ctxt->decode;
11758
 
+       int rc = 0;
11759
 
+
11760
 
+       switch (c->ad_bytes) {
11761
 
+       case 2:
11762
 
+               c->modrm_ea = insn_fetch(u16, 2, c->eip);
11763
 
+               break;
11764
 
+       case 4:
11765
 
+               c->modrm_ea = insn_fetch(u32, 4, c->eip);
11766
 
+               break;
11767
 
+       case 8:
11768
 
+               c->modrm_ea = insn_fetch(u64, 8, c->eip);
11769
 
+               break;
11770
 
+       }
11771
 
+done:
11772
 
+       return rc;
11773
 
+}
11774
 
+
11775
 
+int
11776
 
+x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
11777
 
+{
11778
 
+       struct decode_cache *c = &ctxt->decode;
11779
 
+       int rc = 0;
11780
 
+       int mode = ctxt->mode;
11781
 
+       int def_op_bytes, def_ad_bytes;
11782
 
+
11783
 
+       /* Shadow copy of register state. Committed on successful emulation. */
11784
 
+
11785
 
+       memset(c, 0, sizeof(struct decode_cache));
11786
 
+       c->eip = ctxt->vcpu->arch.rip;
11787
 
+       memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
11788
 
+
11789
 
+       switch (mode) {
11790
 
+       case X86EMUL_MODE_REAL:
11791
 
+       case X86EMUL_MODE_PROT16:
11792
 
+               def_op_bytes = def_ad_bytes = 2;
11793
 
+               break;
11794
 
+       case X86EMUL_MODE_PROT32:
11795
 
+               def_op_bytes = def_ad_bytes = 4;
11796
 
+               break;
11797
 
+#ifdef CONFIG_X86_64
11798
 
+       case X86EMUL_MODE_PROT64:
11799
 
+               def_op_bytes = 4;
11800
 
+               def_ad_bytes = 8;
11801
 
+               break;
11802
 
+#endif
11803
 
+       default:
11804
 
+               return -1;
11805
 
+       }
11806
 
+
11807
 
+       c->op_bytes = def_op_bytes;
11808
 
+       c->ad_bytes = def_ad_bytes;
11809
 
+
11810
 
+       /* Legacy prefixes. */
11811
 
+       for (;;) {
11812
 
+               switch (c->b = insn_fetch(u8, 1, c->eip)) {
11813
 
+               case 0x66:      /* operand-size override */
11814
 
+                       /* switch between 2/4 bytes */
11815
 
+                       c->op_bytes = def_op_bytes ^ 6;
11816
 
+                       break;
11817
 
+               case 0x67:      /* address-size override */
11818
 
+                       if (mode == X86EMUL_MODE_PROT64)
11819
 
+                               /* switch between 4/8 bytes */
11820
 
+                               c->ad_bytes = def_ad_bytes ^ 12;
11821
 
+                       else
11822
 
+                               /* switch between 2/4 bytes */
11823
 
+                               c->ad_bytes = def_ad_bytes ^ 6;
11824
 
+                       break;
11825
 
+               case 0x2e:      /* CS override */
11826
 
+                       c->override_base = &ctxt->cs_base;
11827
 
+                       break;
11828
 
+               case 0x3e:      /* DS override */
11829
 
+                       c->override_base = &ctxt->ds_base;
11830
 
+                       break;
11831
 
+               case 0x26:      /* ES override */
11832
 
+                       c->override_base = &ctxt->es_base;
11833
 
+                       break;
11834
 
+               case 0x64:      /* FS override */
11835
 
+                       c->override_base = &ctxt->fs_base;
11836
 
+                       break;
11837
 
+               case 0x65:      /* GS override */
11838
 
+                       c->override_base = &ctxt->gs_base;
11839
 
+                       break;
11840
 
+               case 0x36:      /* SS override */
11841
 
+                       c->override_base = &ctxt->ss_base;
11842
 
+                       break;
11843
 
+               case 0x40 ... 0x4f: /* REX */
11844
 
+                       if (mode != X86EMUL_MODE_PROT64)
11845
 
+                               goto done_prefixes;
11846
 
+                       c->rex_prefix = c->b;
11847
 
+                       continue;
11848
 
+               case 0xf0:      /* LOCK */
11849
 
+                       c->lock_prefix = 1;
11850
 
+                       break;
11851
 
+               case 0xf2:      /* REPNE/REPNZ */
11852
 
+                       c->rep_prefix = REPNE_PREFIX;
11853
 
+                       break;
11854
 
+               case 0xf3:      /* REP/REPE/REPZ */
11855
 
+                       c->rep_prefix = REPE_PREFIX;
11856
 
+                       break;
11857
 
+               default:
11858
 
+                       goto done_prefixes;
11859
 
+               }
11860
 
+
11861
 
+               /* Any legacy prefix after a REX prefix nullifies its effect. */
11862
 
+
11863
 
+               c->rex_prefix = 0;
11864
 
+       }
11865
 
+
11866
 
+done_prefixes:
11867
 
+
11868
 
+       /* REX prefix. */
11869
 
+       if (c->rex_prefix)
11870
 
+               if (c->rex_prefix & 8)
11871
 
+                       c->op_bytes = 8;        /* REX.W */
11872
 
+
11873
 
+       /* Opcode byte(s). */
11874
 
+       c->d = opcode_table[c->b];
11875
 
+       if (c->d == 0) {
11876
 
+               /* Two-byte opcode? */
11877
 
+               if (c->b == 0x0f) {
11878
 
+                       c->twobyte = 1;
11879
 
+                       c->b = insn_fetch(u8, 1, c->eip);
11880
 
+                       c->d = twobyte_table[c->b];
11881
 
+               }
11882
 
+
11883
 
+               /* Unrecognised? */
11884
 
+               if (c->d == 0) {
11885
 
+                       DPRINTF("Cannot emulate %02x\n", c->b);
11886
 
+                       return -1;
11887
 
+               }
11888
 
+       }
11889
 
+
11890
 
+       if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack))
11891
 
+               c->op_bytes = 8;
11892
 
+
11893
 
+       /* ModRM and SIB bytes. */
11894
 
+       if (c->d & ModRM)
11895
 
+               rc = decode_modrm(ctxt, ops);
11896
 
+       else if (c->d & MemAbs)
11897
 
+               rc = decode_abs(ctxt, ops);
11898
 
+       if (rc)
11899
 
+               goto done;
11900
 
+
11901
 
+       if (!c->override_base)
11902
 
+               c->override_base = &ctxt->ds_base;
11903
 
+       if (mode == X86EMUL_MODE_PROT64 &&
11904
 
+           c->override_base != &ctxt->fs_base &&
11905
 
+           c->override_base != &ctxt->gs_base)
11906
 
+               c->override_base = NULL;
11907
 
+
11908
 
+       if (c->override_base)
11909
 
+               c->modrm_ea += *c->override_base;
11910
 
+
11911
 
+       if (c->ad_bytes != 8)
11912
 
+               c->modrm_ea = (u32)c->modrm_ea;
11913
 
+       /*
11914
 
+        * Decode and fetch the source operand: register, memory
11915
 
+        * or immediate.
11916
 
+        */
11917
 
+       switch (c->d & SrcMask) {
11918
 
+       case SrcNone:
11919
 
+               break;
11920
 
+       case SrcReg:
11921
 
+               decode_register_operand(&c->src, c, 0);
11922
 
+               break;
11923
 
+       case SrcMem16:
11924
 
+               c->src.bytes = 2;
11925
 
+               goto srcmem_common;
11926
 
+       case SrcMem32:
11927
 
+               c->src.bytes = 4;
11928
 
+               goto srcmem_common;
11929
 
+       case SrcMem:
11930
 
+               c->src.bytes = (c->d & ByteOp) ? 1 :
11931
 
+                                                          c->op_bytes;
11932
 
+               /* Don't fetch the address for invlpg: it could be unmapped. */
11933
 
+               if (c->twobyte && c->b == 0x01
11934
 
+                                   && c->modrm_reg == 7)
11935
 
+                       break;
11936
 
+             srcmem_common:
11937
 
+               /*
11938
 
+                * For instructions with a ModR/M byte, switch to register
11939
 
+                * access if Mod = 3.
11940
 
+                */
11941
 
+               if ((c->d & ModRM) && c->modrm_mod == 3) {
11942
 
+                       c->src.type = OP_REG;
11943
 
+                       break;
11944
 
+               }
11945
 
+               c->src.type = OP_MEM;
11946
 
+               break;
11947
 
+       case SrcImm:
11948
 
+               c->src.type = OP_IMM;
11949
 
+               c->src.ptr = (unsigned long *)c->eip;
11950
 
+               c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
11951
 
+               if (c->src.bytes == 8)
11952
 
+                       c->src.bytes = 4;
11953
 
+               /* NB. Immediates are sign-extended as necessary. */
11954
 
+               switch (c->src.bytes) {
11955
 
+               case 1:
11956
 
+                       c->src.val = insn_fetch(s8, 1, c->eip);
11957
 
+                       break;
11958
 
+               case 2:
11959
 
+                       c->src.val = insn_fetch(s16, 2, c->eip);
11960
 
+                       break;
11961
 
+               case 4:
11962
 
+                       c->src.val = insn_fetch(s32, 4, c->eip);
11963
 
+                       break;
11964
 
+               }
11965
 
+               break;
11966
 
+       case SrcImmByte:
11967
 
+               c->src.type = OP_IMM;
11968
 
+               c->src.ptr = (unsigned long *)c->eip;
11969
 
+               c->src.bytes = 1;
11970
 
+               c->src.val = insn_fetch(s8, 1, c->eip);
11971
 
+               break;
11972
 
+       }
11973
 
+
11974
 
+       /* Decode and fetch the destination operand: register or memory. */
11975
 
+       switch (c->d & DstMask) {
11976
 
+       case ImplicitOps:
11977
 
+               /* Special instructions do their own operand decoding. */
11978
 
+               return 0;
11979
 
+       case DstReg:
11980
 
+               decode_register_operand(&c->dst, c,
11981
 
+                        c->twobyte && (c->b == 0xb6 || c->b == 0xb7));
11982
 
+               break;
11983
 
+       case DstMem:
11984
 
+               /*
11985
 
+                * For instructions with a ModR/M byte, switch to register
11986
 
+                * access if Mod = 3.
11987
 
+                */
11988
 
+               if ((c->d & ModRM) && c->modrm_mod == 3)
11989
 
+                       c->dst.type = OP_REG;
11990
 
+               else
11991
 
+                       c->dst.type = OP_MEM;
11992
 
+               break;
11993
 
+       }
11994
 
+
11995
 
+done:
11996
 
+       return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
11997
 
+}
11998
 
+
11999
 
+static inline void emulate_push(struct x86_emulate_ctxt *ctxt)
12000
 
+{
12001
 
+       struct decode_cache *c = &ctxt->decode;
12002
 
+
12003
 
+       c->dst.type  = OP_MEM;
12004
 
+       c->dst.bytes = c->op_bytes;
12005
 
+       c->dst.val = c->src.val;
12006
 
+       register_address_increment(c->regs[VCPU_REGS_RSP], -c->op_bytes);
12007
 
+       c->dst.ptr = (void *) register_address(ctxt->ss_base,
12008
 
+                                              c->regs[VCPU_REGS_RSP]);
12009
 
+}
12010
 
+
12011
 
+static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
12012
 
+                               struct x86_emulate_ops *ops)
12013
 
+{
12014
 
+       struct decode_cache *c = &ctxt->decode;
12015
 
+       int rc;
12016
 
+
12017
 
+       rc = ops->read_std(register_address(ctxt->ss_base,
12018
 
+                                           c->regs[VCPU_REGS_RSP]),
12019
 
+                          &c->dst.val, c->dst.bytes, ctxt->vcpu);
12020
 
+       if (rc != 0)
12021
 
+               return rc;
12022
 
+
12023
 
+       register_address_increment(c->regs[VCPU_REGS_RSP], c->dst.bytes);
12024
 
+
12025
 
+       return 0;
12026
 
+}
12027
 
+
12028
 
+static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt)
12029
 
+{
12030
 
+       struct decode_cache *c = &ctxt->decode;
12031
 
+       switch (c->modrm_reg) {
12032
 
+       case 0: /* rol */
12033
 
+               emulate_2op_SrcB("rol", c->src, c->dst, ctxt->eflags);
12034
 
+               break;
12035
 
+       case 1: /* ror */
12036
 
+               emulate_2op_SrcB("ror", c->src, c->dst, ctxt->eflags);
12037
 
+               break;
12038
 
+       case 2: /* rcl */
12039
 
+               emulate_2op_SrcB("rcl", c->src, c->dst, ctxt->eflags);
12040
 
+               break;
12041
 
+       case 3: /* rcr */
12042
 
+               emulate_2op_SrcB("rcr", c->src, c->dst, ctxt->eflags);
12043
 
+               break;
12044
 
+       case 4: /* sal/shl */
12045
 
+       case 6: /* sal/shl */
12046
 
+               emulate_2op_SrcB("sal", c->src, c->dst, ctxt->eflags);
12047
 
+               break;
12048
 
+       case 5: /* shr */
12049
 
+               emulate_2op_SrcB("shr", c->src, c->dst, ctxt->eflags);
12050
 
+               break;
12051
 
+       case 7: /* sar */
12052
 
+               emulate_2op_SrcB("sar", c->src, c->dst, ctxt->eflags);
12053
 
+               break;
12054
 
+       }
12055
 
+}
12056
 
+
12057
 
+static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt,
12058
 
+                              struct x86_emulate_ops *ops)
12059
 
+{
12060
 
+       struct decode_cache *c = &ctxt->decode;
12061
 
+       int rc = 0;
12062
 
+
12063
 
+       switch (c->modrm_reg) {
12064
 
+       case 0 ... 1:   /* test */
12065
 
+               /*
12066
 
+                * Special case in Grp3: test has an immediate
12067
 
+                * source operand.
12068
 
+                */
12069
 
+               c->src.type = OP_IMM;
12070
 
+               c->src.ptr = (unsigned long *)c->eip;
12071
 
+               c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
12072
 
+               if (c->src.bytes == 8)
12073
 
+                       c->src.bytes = 4;
12074
 
+               switch (c->src.bytes) {
12075
 
+               case 1:
12076
 
+                       c->src.val = insn_fetch(s8, 1, c->eip);
12077
 
+                       break;
12078
 
+               case 2:
12079
 
+                       c->src.val = insn_fetch(s16, 2, c->eip);
12080
 
+                       break;
12081
 
+               case 4:
12082
 
+                       c->src.val = insn_fetch(s32, 4, c->eip);
12083
 
+                       break;
12084
 
+               }
12085
 
+               emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
12086
 
+               break;
12087
 
+       case 2: /* not */
12088
 
+               c->dst.val = ~c->dst.val;
12089
 
+               break;
12090
 
+       case 3: /* neg */
12091
 
+               emulate_1op("neg", c->dst, ctxt->eflags);
12092
 
+               break;
12093
 
+       default:
12094
 
+               DPRINTF("Cannot emulate %02x\n", c->b);
12095
 
+               rc = X86EMUL_UNHANDLEABLE;
12096
 
+               break;
12097
 
+       }
12098
 
+done:
12099
 
+       return rc;
12100
 
+}
12101
 
+
12102
 
+static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt,
12103
 
+                              struct x86_emulate_ops *ops)
12104
 
+{
12105
 
+       struct decode_cache *c = &ctxt->decode;
12106
 
+       int rc;
12107
 
+
12108
 
+       switch (c->modrm_reg) {
12109
 
+       case 0: /* inc */
12110
 
+               emulate_1op("inc", c->dst, ctxt->eflags);
12111
 
+               break;
12112
 
+       case 1: /* dec */
12113
 
+               emulate_1op("dec", c->dst, ctxt->eflags);
12114
 
+               break;
12115
 
+       case 4: /* jmp abs */
12116
 
+               if (c->b == 0xff)
12117
 
+                       c->eip = c->dst.val;
12118
 
+               else {
12119
 
+                       DPRINTF("Cannot emulate %02x\n", c->b);
12120
 
+                       return X86EMUL_UNHANDLEABLE;
12121
 
+               }
12122
 
+               break;
12123
 
+       case 6: /* push */
12124
 
+
12125
 
+               /* 64-bit mode: PUSH always pushes a 64-bit operand. */
12126
 
+
12127
 
+               if (ctxt->mode == X86EMUL_MODE_PROT64) {
12128
 
+                       c->dst.bytes = 8;
12129
 
+                       rc = ops->read_std((unsigned long)c->dst.ptr,
12130
 
+                                          &c->dst.val, 8, ctxt->vcpu);
12131
 
+                       if (rc != 0)
12132
 
+                               return rc;
12133
 
+               }
12134
 
+               register_address_increment(c->regs[VCPU_REGS_RSP],
12135
 
+                                          -c->dst.bytes);
12136
 
+               rc = ops->write_emulated(register_address(ctxt->ss_base,
12137
 
+                                                 c->regs[VCPU_REGS_RSP]),
12138
 
+                                        &c->dst.val,
12139
 
+                                        c->dst.bytes, ctxt->vcpu);
12140
 
+               if (rc != 0)
12141
 
+                       return rc;
12142
 
+               c->dst.type = OP_NONE;
12143
 
+               break;
12144
 
+       default:
12145
 
+               DPRINTF("Cannot emulate %02x\n", c->b);
12146
 
+               return X86EMUL_UNHANDLEABLE;
12147
 
+       }
12148
 
+       return 0;
12149
 
+}
12150
 
+
12151
 
+static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
12152
 
+                              struct x86_emulate_ops *ops,
12153
 
+                              unsigned long memop)
12154
 
+{
12155
 
+       struct decode_cache *c = &ctxt->decode;
12156
 
+       u64 old, new;
12157
 
+       int rc;
12158
 
+
12159
 
+       rc = ops->read_emulated(memop, &old, 8, ctxt->vcpu);
12160
 
+       if (rc != 0)
12161
 
+               return rc;
12162
 
+
12163
 
+       if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) ||
12164
 
+           ((u32) (old >> 32) != (u32) c->regs[VCPU_REGS_RDX])) {
12165
 
+
12166
 
+               c->regs[VCPU_REGS_RAX] = (u32) (old >> 0);
12167
 
+               c->regs[VCPU_REGS_RDX] = (u32) (old >> 32);
12168
 
+               ctxt->eflags &= ~EFLG_ZF;
12169
 
+
12170
 
+       } else {
12171
 
+               new = ((u64)c->regs[VCPU_REGS_RCX] << 32) |
12172
 
+                      (u32) c->regs[VCPU_REGS_RBX];
12173
 
+
12174
 
+               rc = ops->cmpxchg_emulated(memop, &old, &new, 8, ctxt->vcpu);
12175
 
+               if (rc != 0)
12176
 
+                       return rc;
12177
 
+               ctxt->eflags |= EFLG_ZF;
12178
 
+       }
12179
 
+       return 0;
12180
 
+}
12181
 
+
12182
 
+static inline int writeback(struct x86_emulate_ctxt *ctxt,
12183
 
+                           struct x86_emulate_ops *ops)
12184
 
+{
12185
 
+       int rc;
12186
 
+       struct decode_cache *c = &ctxt->decode;
12187
 
+
12188
 
+       switch (c->dst.type) {
12189
 
+       case OP_REG:
12190
 
+               /* The 4-byte case *is* correct:
12191
 
+                * in 64-bit mode we zero-extend.
12192
 
+                */
12193
 
+               switch (c->dst.bytes) {
12194
 
+               case 1:
12195
 
+                       *(u8 *)c->dst.ptr = (u8)c->dst.val;
12196
 
+                       break;
12197
 
+               case 2:
12198
 
+                       *(u16 *)c->dst.ptr = (u16)c->dst.val;
12199
 
+                       break;
12200
 
+               case 4:
12201
 
+                       *c->dst.ptr = (u32)c->dst.val;
12202
 
+                       break;  /* 64b: zero-ext */
12203
 
+               case 8:
12204
 
+                       *c->dst.ptr = c->dst.val;
12205
 
+                       break;
12206
 
+               }
12207
 
+               break;
12208
 
+       case OP_MEM:
12209
 
+               if (c->lock_prefix)
12210
 
+                       rc = ops->cmpxchg_emulated(
12211
 
+                                       (unsigned long)c->dst.ptr,
12212
 
+                                       &c->dst.orig_val,
12213
 
+                                       &c->dst.val,
12214
 
+                                       c->dst.bytes,
12215
 
+                                       ctxt->vcpu);
12216
 
+               else
12217
 
+                       rc = ops->write_emulated(
12218
 
+                                       (unsigned long)c->dst.ptr,
12219
 
+                                       &c->dst.val,
12220
 
+                                       c->dst.bytes,
12221
 
+                                       ctxt->vcpu);
12222
 
+               if (rc != 0)
12223
 
+                       return rc;
12224
 
+               break;
12225
 
+       case OP_NONE:
12226
 
+               /* no writeback */
12227
 
+               break;
12228
 
+       default:
12229
 
+               break;
12230
 
+       }
12231
 
+       return 0;
12232
 
+}
12233
 
+
12234
 
+int
12235
 
+x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
12236
 
+{
12237
 
+       unsigned long memop = 0;
12238
 
+       u64 msr_data;
12239
 
+       unsigned long saved_eip;
12240
 
+       struct decode_cache *c = &ctxt->decode;
12241
 
+       int rc = 0;
12242
 
+
12243
 
+       /* Shadow copy of register state. Committed on successful emulation.
12244
 
+        * NOTE: we can copy them from vcpu as x86_decode_insn() doesn't
12245
 
+        * modify them.
12246
 
+        */
12247
 
+
12248
 
+       memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
12249
 
+       saved_eip = c->eip;
12250
 
+
12251
 
+       if (((c->d & ModRM) && (c->modrm_mod != 3)) || (c->d & MemAbs))
12252
 
+               memop = c->modrm_ea;
12253
 
+
12254
 
+       if (c->rep_prefix && (c->d & String)) {
12255
 
+               /* All REP prefixes have the same first termination condition */
12256
 
+               if (c->regs[VCPU_REGS_RCX] == 0) {
12257
 
+                       ctxt->vcpu->arch.rip = c->eip;
12258
 
+                       goto done;
12259
 
+               }
12260
 
+               /* The second termination condition only applies for REPE
12261
 
+                * and REPNE. Test if the repeat string operation prefix is
12262
 
+                * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
12263
 
+                * corresponding termination condition according to:
12264
 
+                *      - if REPE/REPZ and ZF = 0 then done
12265
 
+                *      - if REPNE/REPNZ and ZF = 1 then done
12266
 
+                */
12267
 
+               if ((c->b == 0xa6) || (c->b == 0xa7) ||
12268
 
+                               (c->b == 0xae) || (c->b == 0xaf)) {
12269
 
+                       if ((c->rep_prefix == REPE_PREFIX) &&
12270
 
+                               ((ctxt->eflags & EFLG_ZF) == 0)) {
12271
 
+                                       ctxt->vcpu->arch.rip = c->eip;
12272
 
+                                       goto done;
12273
 
+                       }
12274
 
+                       if ((c->rep_prefix == REPNE_PREFIX) &&
12275
 
+                               ((ctxt->eflags & EFLG_ZF) == EFLG_ZF)) {
12276
 
+                               ctxt->vcpu->arch.rip = c->eip;
12277
 
+                               goto done;
12278
 
+                       }
12279
 
+               }
12280
 
+               c->regs[VCPU_REGS_RCX]--;
12281
 
+               c->eip = ctxt->vcpu->arch.rip;
12282
 
+       }
12283
 
+
12284
 
+       if (c->src.type == OP_MEM) {
12285
 
+               c->src.ptr = (unsigned long *)memop;
12286
 
+               c->src.val = 0;
12287
 
+               rc = ops->read_emulated((unsigned long)c->src.ptr,
12288
 
+                                       &c->src.val,
12289
 
+                                       c->src.bytes,
12290
 
+                                       ctxt->vcpu);
12291
 
+               if (rc != 0)
12292
 
+                       goto done;
12293
 
+               c->src.orig_val = c->src.val;
12294
 
+       }
12295
 
+
12296
 
+       if ((c->d & DstMask) == ImplicitOps)
12297
 
+               goto special_insn;
12298
 
+
12299
 
+
12300
 
+       if (c->dst.type == OP_MEM) {
12301
 
+               c->dst.ptr = (unsigned long *)memop;
12302
 
+               c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
12303
 
+               c->dst.val = 0;
12304
 
+               if (c->d & BitOp) {
12305
 
+                       unsigned long mask = ~(c->dst.bytes * 8 - 1);
12306
 
+
12307
 
+                       c->dst.ptr = (void *)c->dst.ptr +
12308
 
+                                                  (c->src.val & mask) / 8;
12309
 
+               }
12310
 
+               if (!(c->d & Mov) &&
12311
 
+                                  /* optimisation - avoid slow emulated read */
12312
 
+                   ((rc = ops->read_emulated((unsigned long)c->dst.ptr,
12313
 
+                                          &c->dst.val,
12314
 
+                                         c->dst.bytes, ctxt->vcpu)) != 0))
12315
 
+                       goto done;
12316
 
+       }
12317
 
+       c->dst.orig_val = c->dst.val;
12318
 
+
12319
 
+special_insn:
12320
 
+
12321
 
+       if (c->twobyte)
12322
 
+               goto twobyte_insn;
12323
 
+
12324
 
+       switch (c->b) {
12325
 
+       case 0x00 ... 0x05:
12326
 
+             add:              /* add */
12327
 
+               emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags);
12328
 
+               break;
12329
 
+       case 0x08 ... 0x0d:
12330
 
+             or:               /* or */
12331
 
+               emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags);
12332
 
+               break;
12333
 
+       case 0x10 ... 0x15:
12334
 
+             adc:              /* adc */
12335
 
+               emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags);
12336
 
+               break;
12337
 
+       case 0x18 ... 0x1d:
12338
 
+             sbb:              /* sbb */
12339
 
+               emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags);
12340
 
+               break;
12341
 
+       case 0x20 ... 0x23:
12342
 
+             and:              /* and */
12343
 
+               emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags);
12344
 
+               break;
12345
 
+       case 0x24:              /* and al imm8 */
12346
 
+               c->dst.type = OP_REG;
12347
 
+               c->dst.ptr = &c->regs[VCPU_REGS_RAX];
12348
 
+               c->dst.val = *(u8 *)c->dst.ptr;
12349
 
+               c->dst.bytes = 1;
12350
 
+               c->dst.orig_val = c->dst.val;
12351
 
+               goto and;
12352
 
+       case 0x25:              /* and ax imm16, or eax imm32 */
12353
 
+               c->dst.type = OP_REG;
12354
 
+               c->dst.bytes = c->op_bytes;
12355
 
+               c->dst.ptr = &c->regs[VCPU_REGS_RAX];
12356
 
+               if (c->op_bytes == 2)
12357
 
+                       c->dst.val = *(u16 *)c->dst.ptr;
12358
 
+               else
12359
 
+                       c->dst.val = *(u32 *)c->dst.ptr;
12360
 
+               c->dst.orig_val = c->dst.val;
12361
 
+               goto and;
12362
 
+       case 0x28 ... 0x2d:
12363
 
+             sub:              /* sub */
12364
 
+               emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags);
12365
 
+               break;
12366
 
+       case 0x30 ... 0x35:
12367
 
+             xor:              /* xor */
12368
 
+               emulate_2op_SrcV("xor", c->src, c->dst, ctxt->eflags);
12369
 
+               break;
12370
 
+       case 0x38 ... 0x3d:
12371
 
+             cmp:              /* cmp */
12372
 
+               emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
12373
 
+               break;
12374
 
+       case 0x40 ... 0x47: /* inc r16/r32 */
12375
 
+               emulate_1op("inc", c->dst, ctxt->eflags);
12376
 
+               break;
12377
 
+       case 0x48 ... 0x4f: /* dec r16/r32 */
12378
 
+               emulate_1op("dec", c->dst, ctxt->eflags);
12379
 
+               break;
12380
 
+       case 0x50 ... 0x57:  /* push reg */
12381
 
+               c->dst.type  = OP_MEM;
12382
 
+               c->dst.bytes = c->op_bytes;
12383
 
+               c->dst.val = c->src.val;
12384
 
+               register_address_increment(c->regs[VCPU_REGS_RSP],
12385
 
+                                          -c->op_bytes);
12386
 
+               c->dst.ptr = (void *) register_address(
12387
 
+                       ctxt->ss_base, c->regs[VCPU_REGS_RSP]);
12388
 
+               break;
12389
 
+       case 0x58 ... 0x5f: /* pop reg */
12390
 
+       pop_instruction:
12391
 
+               if ((rc = ops->read_std(register_address(ctxt->ss_base,
12392
 
+                       c->regs[VCPU_REGS_RSP]), c->dst.ptr,
12393
 
+                       c->op_bytes, ctxt->vcpu)) != 0)
12394
 
+                       goto done;
12395
 
+
12396
 
+               register_address_increment(c->regs[VCPU_REGS_RSP],
12397
 
+                                          c->op_bytes);
12398
 
+               c->dst.type = OP_NONE;  /* Disable writeback. */
12399
 
+               break;
12400
 
+       case 0x63:              /* movsxd */
12401
 
+               if (ctxt->mode != X86EMUL_MODE_PROT64)
12402
 
+                       goto cannot_emulate;
12403
 
+               c->dst.val = (s32) c->src.val;
12404
 
+               break;
12405
 
+       case 0x6a: /* push imm8 */
12406
 
+               c->src.val = 0L;
12407
 
+               c->src.val = insn_fetch(s8, 1, c->eip);
12408
 
+               emulate_push(ctxt);
12409
 
+               break;
12410
 
+       case 0x6c:              /* insb */
12411
 
+       case 0x6d:              /* insw/insd */
12412
 
+                if (kvm_emulate_pio_string(ctxt->vcpu, NULL,
12413
 
+                               1,
12414
 
+                               (c->d & ByteOp) ? 1 : c->op_bytes,
12415
 
+                               c->rep_prefix ?
12416
 
+                               address_mask(c->regs[VCPU_REGS_RCX]) : 1,
12417
 
+                               (ctxt->eflags & EFLG_DF),
12418
 
+                               register_address(ctxt->es_base,
12419
 
+                                                c->regs[VCPU_REGS_RDI]),
12420
 
+                               c->rep_prefix,
12421
 
+                               c->regs[VCPU_REGS_RDX]) == 0) {
12422
 
+                       c->eip = saved_eip;
12423
 
+                       return -1;
12424
 
+               }
12425
 
+               return 0;
12426
 
+       case 0x6e:              /* outsb */
12427
 
+       case 0x6f:              /* outsw/outsd */
12428
 
+               if (kvm_emulate_pio_string(ctxt->vcpu, NULL,
12429
 
+                               0,
12430
 
+                               (c->d & ByteOp) ? 1 : c->op_bytes,
12431
 
+                               c->rep_prefix ?
12432
 
+                               address_mask(c->regs[VCPU_REGS_RCX]) : 1,
12433
 
+                               (ctxt->eflags & EFLG_DF),
12434
 
+                               register_address(c->override_base ?
12435
 
+                                                       *c->override_base :
12436
 
+                                                       ctxt->ds_base,
12437
 
+                                                c->regs[VCPU_REGS_RSI]),
12438
 
+                               c->rep_prefix,
12439
 
+                               c->regs[VCPU_REGS_RDX]) == 0) {
12440
 
+                       c->eip = saved_eip;
12441
 
+                       return -1;
12442
 
+               }
12443
 
+               return 0;
12444
 
+       case 0x70 ... 0x7f: /* jcc (short) */ {
12445
 
+               int rel = insn_fetch(s8, 1, c->eip);
12446
 
+
12447
 
+               if (test_cc(c->b, ctxt->eflags))
12448
 
+                       JMP_REL(rel);
12449
 
+               break;
12450
 
+       }
12451
 
+       case 0x80 ... 0x83:     /* Grp1 */
12452
 
+               switch (c->modrm_reg) {
12453
 
+               case 0:
12454
 
+                       goto add;
12455
 
+               case 1:
12456
 
+                       goto or;
12457
 
+               case 2:
12458
 
+                       goto adc;
12459
 
+               case 3:
12460
 
+                       goto sbb;
12461
 
+               case 4:
12462
 
+                       goto and;
12463
 
+               case 5:
12464
 
+                       goto sub;
12465
 
+               case 6:
12466
 
+                       goto xor;
12467
 
+               case 7:
12468
 
+                       goto cmp;
12469
 
+               }
12470
 
+               break;
12471
 
+       case 0x84 ... 0x85:
12472
 
+               emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
12473
 
+               break;
12474
 
+       case 0x86 ... 0x87:     /* xchg */
12475
 
+               /* Write back the register source. */
12476
 
+               switch (c->dst.bytes) {
12477
 
+               case 1:
12478
 
+                       *(u8 *) c->src.ptr = (u8) c->dst.val;
12479
 
+                       break;
12480
 
+               case 2:
12481
 
+                       *(u16 *) c->src.ptr = (u16) c->dst.val;
12482
 
+                       break;
12483
 
+               case 4:
12484
 
+                       *c->src.ptr = (u32) c->dst.val;
12485
 
+                       break;  /* 64b reg: zero-extend */
12486
 
+               case 8:
12487
 
+                       *c->src.ptr = c->dst.val;
12488
 
+                       break;
12489
 
+               }
12490
 
+               /*
12491
 
+                * Write back the memory destination with implicit LOCK
12492
 
+                * prefix.
12493
 
+                */
12494
 
+               c->dst.val = c->src.val;
12495
 
+               c->lock_prefix = 1;
12496
 
+               break;
12497
 
+       case 0x88 ... 0x8b:     /* mov */
12498
 
+               goto mov;
12499
 
+       case 0x8d: /* lea r16/r32, m */
12500
 
+               c->dst.val = c->modrm_val;
12501
 
+               break;
12502
 
+       case 0x8f:              /* pop (sole member of Grp1a) */
12503
 
+               rc = emulate_grp1a(ctxt, ops);
12504
 
+               if (rc != 0)
12505
 
+                       goto done;
12506
 
+               break;
12507
 
+       case 0x9c: /* pushf */
12508
 
+               c->src.val =  (unsigned long) ctxt->eflags;
12509
 
+               emulate_push(ctxt);
12510
 
+               break;
12511
 
+       case 0x9d: /* popf */
12512
 
+               c->dst.ptr = (unsigned long *) &ctxt->eflags;
12513
 
+               goto pop_instruction;
12514
 
+       case 0xa0 ... 0xa1:     /* mov */
12515
 
+               c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
12516
 
+               c->dst.val = c->src.val;
12517
 
+               break;
12518
 
+       case 0xa2 ... 0xa3:     /* mov */
12519
 
+               c->dst.val = (unsigned long)c->regs[VCPU_REGS_RAX];
12520
 
+               break;
12521
 
+       case 0xa4 ... 0xa5:     /* movs */
12522
 
+               c->dst.type = OP_MEM;
12523
 
+               c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
12524
 
+               c->dst.ptr = (unsigned long *)register_address(
12525
 
+                                                  ctxt->es_base,
12526
 
+                                                  c->regs[VCPU_REGS_RDI]);
12527
 
+               if ((rc = ops->read_emulated(register_address(
12528
 
+                     c->override_base ? *c->override_base :
12529
 
+                                       ctxt->ds_base,
12530
 
+                                       c->regs[VCPU_REGS_RSI]),
12531
 
+                                       &c->dst.val,
12532
 
+                                       c->dst.bytes, ctxt->vcpu)) != 0)
12533
 
+                       goto done;
12534
 
+
12535
 
+               register_address_increment(c->regs[VCPU_REGS_RSI],
12536
 
+                                      (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
12537
 
+                                                          : c->dst.bytes);
12538
 
+               register_address_increment(c->regs[VCPU_REGS_RDI],
12539
 
+                                      (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
12540
 
+                                                          : c->dst.bytes);
12541
 
+               break;
12542
 
+       case 0xa6 ... 0xa7:     /* cmps */
12543
 
+               c->src.type = OP_NONE; /* Disable writeback. */
12544
 
+               c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
12545
 
+               c->src.ptr = (unsigned long *)register_address(
12546
 
+                               c->override_base ? *c->override_base :
12547
 
+                                                  ctxt->ds_base,
12548
 
+                                                  c->regs[VCPU_REGS_RSI]);
12549
 
+               if ((rc = ops->read_emulated((unsigned long)c->src.ptr,
12550
 
+                                               &c->src.val,
12551
 
+                                               c->src.bytes,
12552
 
+                                               ctxt->vcpu)) != 0)
12553
 
+                       goto done;
12554
 
+
12555
 
+               c->dst.type = OP_NONE; /* Disable writeback. */
12556
 
+               c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
12557
 
+               c->dst.ptr = (unsigned long *)register_address(
12558
 
+                                                  ctxt->es_base,
12559
 
+                                                  c->regs[VCPU_REGS_RDI]);
12560
 
+               if ((rc = ops->read_emulated((unsigned long)c->dst.ptr,
12561
 
+                                               &c->dst.val,
12562
 
+                                               c->dst.bytes,
12563
 
+                                               ctxt->vcpu)) != 0)
12564
 
+                       goto done;
12565
 
+
12566
 
+               DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr);
12567
 
+
12568
 
+               emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
12569
 
+
12570
 
+               register_address_increment(c->regs[VCPU_REGS_RSI],
12571
 
+                                      (ctxt->eflags & EFLG_DF) ? -c->src.bytes
12572
 
+                                                                 : c->src.bytes);
12573
 
+               register_address_increment(c->regs[VCPU_REGS_RDI],
12574
 
+                                      (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
12575
 
+                                                                 : c->dst.bytes);
12576
 
+
12577
 
+               break;
12578
 
+       case 0xaa ... 0xab:     /* stos */
12579
 
+               c->dst.type = OP_MEM;
12580
 
+               c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
12581
 
+               c->dst.ptr = (unsigned long *)register_address(
12582
 
+                                                  ctxt->es_base,
12583
 
+                                                  c->regs[VCPU_REGS_RDI]);
12584
 
+               c->dst.val = c->regs[VCPU_REGS_RAX];
12585
 
+               register_address_increment(c->regs[VCPU_REGS_RDI],
12586
 
+                                      (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
12587
 
+                                                          : c->dst.bytes);
12588
 
+               break;
12589
 
+       case 0xac ... 0xad:     /* lods */
12590
 
+               c->dst.type = OP_REG;
12591
 
+               c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
12592
 
+               c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
12593
 
+               if ((rc = ops->read_emulated(register_address(
12594
 
+                               c->override_base ? *c->override_base :
12595
 
+                                                  ctxt->ds_base,
12596
 
+                                                c->regs[VCPU_REGS_RSI]),
12597
 
+                                                &c->dst.val,
12598
 
+                                                c->dst.bytes,
12599
 
+                                                ctxt->vcpu)) != 0)
12600
 
+                       goto done;
12601
 
+
12602
 
+               register_address_increment(c->regs[VCPU_REGS_RSI],
12603
 
+                                      (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
12604
 
+                                                          : c->dst.bytes);
12605
 
+               break;
12606
 
+       case 0xae ... 0xaf:     /* scas */
12607
 
+               DPRINTF("Urk! I don't handle SCAS.\n");
12608
 
+               goto cannot_emulate;
12609
 
+       case 0xc0 ... 0xc1:
12610
 
+               emulate_grp2(ctxt);
12611
 
+               break;
12612
 
+       case 0xc3: /* ret */
12613
 
+               c->dst.ptr = &c->eip;
12614
 
+               goto pop_instruction;
12615
 
+       case 0xc6 ... 0xc7:     /* mov (sole member of Grp11) */
12616
 
+       mov:
12617
 
+               c->dst.val = c->src.val;
12618
 
+               break;
12619
 
+       case 0xd0 ... 0xd1:     /* Grp2 */
12620
 
+               c->src.val = 1;
12621
 
+               emulate_grp2(ctxt);
12622
 
+               break;
12623
 
+       case 0xd2 ... 0xd3:     /* Grp2 */
12624
 
+               c->src.val = c->regs[VCPU_REGS_RCX];
12625
 
+               emulate_grp2(ctxt);
12626
 
+               break;
12627
 
+       case 0xe8: /* call (near) */ {
12628
 
+               long int rel;
12629
 
+               switch (c->op_bytes) {
12630
 
+               case 2:
12631
 
+                       rel = insn_fetch(s16, 2, c->eip);
12632
 
+                       break;
12633
 
+               case 4:
12634
 
+                       rel = insn_fetch(s32, 4, c->eip);
12635
 
+                       break;
12636
 
+               default:
12637
 
+                       DPRINTF("Call: Invalid op_bytes\n");
12638
 
+                       goto cannot_emulate;
12639
 
+               }
12640
 
+               c->src.val = (unsigned long) c->eip;
12641
 
+               JMP_REL(rel);
12642
 
+               /*
12643
 
+                * emulate_push() save value in size of c->op_bytes, therefore
12644
 
+                * we are setting it now to be the size of eip so all the value
12645
 
+                * of eip will be saved
12646
 
+                */
12647
 
+               c->op_bytes = c->ad_bytes;
12648
 
+               emulate_push(ctxt);
12649
 
+               break;
12650
 
+       }
12651
 
+       case 0xe9: /* jmp rel */
12652
 
+       case 0xeb: /* jmp rel short */
12653
 
+               JMP_REL(c->src.val);
12654
 
+               c->dst.type = OP_NONE; /* Disable writeback. */
12655
 
+               break;
12656
 
+       case 0xf4:              /* hlt */
12657
 
+               ctxt->vcpu->arch.halt_request = 1;
12658
 
+               goto done;
12659
 
+       case 0xf5:      /* cmc */
12660
 
+               /* complement carry flag from eflags reg */
12661
 
+               ctxt->eflags ^= EFLG_CF;
12662
 
+               c->dst.type = OP_NONE;  /* Disable writeback. */
12663
 
+               break;
12664
 
+       case 0xf6 ... 0xf7:     /* Grp3 */
12665
 
+               rc = emulate_grp3(ctxt, ops);
12666
 
+               if (rc != 0)
12667
 
+                       goto done;
12668
 
+               break;
12669
 
+       case 0xf8: /* clc */
12670
 
+               ctxt->eflags &= ~EFLG_CF;
12671
 
+               c->dst.type = OP_NONE;  /* Disable writeback. */
12672
 
+               break;
12673
 
+       case 0xfa: /* cli */
12674
 
+               ctxt->eflags &= ~X86_EFLAGS_IF;
12675
 
+               c->dst.type = OP_NONE;  /* Disable writeback. */
12676
 
+               break;
12677
 
+       case 0xfb: /* sti */
12678
 
+               ctxt->eflags |= X86_EFLAGS_IF;
12679
 
+               c->dst.type = OP_NONE;  /* Disable writeback. */
12680
 
+               break;
12681
 
+       case 0xfe ... 0xff:     /* Grp4/Grp5 */
12682
 
+               rc = emulate_grp45(ctxt, ops);
12683
 
+               if (rc != 0)
12684
 
+                       goto done;
12685
 
+               break;
12686
 
+       }
12687
 
+
12688
 
+writeback:
12689
 
+       rc = writeback(ctxt, ops);
12690
 
+       if (rc != 0)
12691
 
+               goto done;
12692
 
+
12693
 
+       /* Commit shadow register state. */
12694
 
+       memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs);
12695
 
+       ctxt->vcpu->arch.rip = c->eip;
12696
 
+
12697
 
+done:
12698
 
+       if (rc == X86EMUL_UNHANDLEABLE) {
12699
 
+               c->eip = saved_eip;
12700
 
+               return -1;
12701
 
+       }
12702
 
+       return 0;
12703
 
+
12704
 
+twobyte_insn:
12705
 
+       switch (c->b) {
12706
 
+       case 0x01: /* lgdt, lidt, lmsw */
12707
 
+               switch (c->modrm_reg) {
12708
 
+                       u16 size;
12709
 
+                       unsigned long address;
12710
 
+
12711
 
+               case 0: /* vmcall */
12712
 
+                       if (c->modrm_mod != 3 || c->modrm_rm != 1)
12713
 
+                               goto cannot_emulate;
12714
 
+
12715
 
+                       rc = kvm_fix_hypercall(ctxt->vcpu);
12716
 
+                       if (rc)
12717
 
+                               goto done;
12718
 
+
12719
 
+                       kvm_emulate_hypercall(ctxt->vcpu);
12720
 
+                       break;
12721
 
+               case 2: /* lgdt */
12722
 
+                       rc = read_descriptor(ctxt, ops, c->src.ptr,
12723
 
+                                            &size, &address, c->op_bytes);
12724
 
+                       if (rc)
12725
 
+                               goto done;
12726
 
+                       realmode_lgdt(ctxt->vcpu, size, address);
12727
 
+                       break;
12728
 
+               case 3: /* lidt/vmmcall */
12729
 
+                       if (c->modrm_mod == 3 && c->modrm_rm == 1) {
12730
 
+                               rc = kvm_fix_hypercall(ctxt->vcpu);
12731
 
+                               if (rc)
12732
 
+                                       goto done;
12733
 
+                               kvm_emulate_hypercall(ctxt->vcpu);
12734
 
+                       } else {
12735
 
+                               rc = read_descriptor(ctxt, ops, c->src.ptr,
12736
 
+                                                    &size, &address,
12737
 
+                                                    c->op_bytes);
12738
 
+                               if (rc)
12739
 
+                                       goto done;
12740
 
+                               realmode_lidt(ctxt->vcpu, size, address);
12741
 
+                       }
12742
 
+                       break;
12743
 
+               case 4: /* smsw */
12744
 
+                       if (c->modrm_mod != 3)
12745
 
+                               goto cannot_emulate;
12746
 
+                       *(u16 *)&c->regs[c->modrm_rm]
12747
 
+                               = realmode_get_cr(ctxt->vcpu, 0);
12748
 
+                       break;
12749
 
+               case 6: /* lmsw */
12750
 
+                       if (c->modrm_mod != 3)
12751
 
+                               goto cannot_emulate;
12752
 
+                       realmode_lmsw(ctxt->vcpu, (u16)c->modrm_val,
12753
 
+                                                 &ctxt->eflags);
12754
 
+                       break;
12755
 
+               case 7: /* invlpg*/
12756
 
+                       emulate_invlpg(ctxt->vcpu, memop);
12757
 
+                       break;
12758
 
+               default:
12759
 
+                       goto cannot_emulate;
12760
 
+               }
12761
 
+               /* Disable writeback. */
12762
 
+               c->dst.type = OP_NONE;
12763
 
+               break;
12764
 
+       case 0x06:
12765
 
+               emulate_clts(ctxt->vcpu);
12766
 
+               c->dst.type = OP_NONE;
12767
 
+               break;
12768
 
+       case 0x08:              /* invd */
12769
 
+       case 0x09:              /* wbinvd */
12770
 
+       case 0x0d:              /* GrpP (prefetch) */
12771
 
+       case 0x18:              /* Grp16 (prefetch/nop) */
12772
 
+               c->dst.type = OP_NONE;
12773
 
+               break;
12774
 
+       case 0x20: /* mov cr, reg */
12775
 
+               if (c->modrm_mod != 3)
12776
 
+                       goto cannot_emulate;
12777
 
+               c->regs[c->modrm_rm] =
12778
 
+                               realmode_get_cr(ctxt->vcpu, c->modrm_reg);
12779
 
+               c->dst.type = OP_NONE;  /* no writeback */
12780
 
+               break;
12781
 
+       case 0x21: /* mov from dr to reg */
12782
 
+               if (c->modrm_mod != 3)
12783
 
+                       goto cannot_emulate;
12784
 
+               rc = emulator_get_dr(ctxt, c->modrm_reg, &c->regs[c->modrm_rm]);
12785
 
+               if (rc)
12786
 
+                       goto cannot_emulate;
12787
 
+               c->dst.type = OP_NONE;  /* no writeback */
12788
 
+               break;
12789
 
+       case 0x22: /* mov reg, cr */
12790
 
+               if (c->modrm_mod != 3)
12791
 
+                       goto cannot_emulate;
12792
 
+               realmode_set_cr(ctxt->vcpu,
12793
 
+                               c->modrm_reg, c->modrm_val, &ctxt->eflags);
12794
 
+               c->dst.type = OP_NONE;
12795
 
+               break;
12796
 
+       case 0x23: /* mov from reg to dr */
12797
 
+               if (c->modrm_mod != 3)
12798
 
+                       goto cannot_emulate;
12799
 
+               rc = emulator_set_dr(ctxt, c->modrm_reg,
12800
 
+                                    c->regs[c->modrm_rm]);
12801
 
+               if (rc)
12802
 
+                       goto cannot_emulate;
12803
 
+               c->dst.type = OP_NONE;  /* no writeback */
12804
 
+               break;
12805
 
+       case 0x30:
12806
 
+               /* wrmsr */
12807
 
+               msr_data = (u32)c->regs[VCPU_REGS_RAX]
12808
 
+                       | ((u64)c->regs[VCPU_REGS_RDX] << 32);
12809
 
+               rc = kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data);
12810
 
+               if (rc) {
12811
 
+                       kvm_inject_gp(ctxt->vcpu, 0);
12812
 
+                       c->eip = ctxt->vcpu->arch.rip;
12813
 
+               }
12814
 
+               rc = X86EMUL_CONTINUE;
12815
 
+               c->dst.type = OP_NONE;
12816
 
+               break;
12817
 
+       case 0x32:
12818
 
+               /* rdmsr */
12819
 
+               rc = kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data);
12820
 
+               if (rc) {
12821
 
+                       kvm_inject_gp(ctxt->vcpu, 0);
12822
 
+                       c->eip = ctxt->vcpu->arch.rip;
12823
 
+               } else {
12824
 
+                       c->regs[VCPU_REGS_RAX] = (u32)msr_data;
12825
 
+                       c->regs[VCPU_REGS_RDX] = msr_data >> 32;
12826
 
+               }
12827
 
+               rc = X86EMUL_CONTINUE;
12828
 
+               c->dst.type = OP_NONE;
12829
 
+               break;
12830
 
+       case 0x40 ... 0x4f:     /* cmov */
12831
 
+               c->dst.val = c->dst.orig_val = c->src.val;
12832
 
+               if (!test_cc(c->b, ctxt->eflags))
12833
 
+                       c->dst.type = OP_NONE; /* no writeback */
12834
 
+               break;
12835
 
+       case 0x80 ... 0x8f: /* jnz rel, etc*/ {
12836
 
+               long int rel;
12837
 
+
12838
 
+               switch (c->op_bytes) {
12839
 
+               case 2:
12840
 
+                       rel = insn_fetch(s16, 2, c->eip);
12841
 
+                       break;
12842
 
+               case 4:
12843
 
+                       rel = insn_fetch(s32, 4, c->eip);
12844
 
+                       break;
12845
 
+               case 8:
12846
 
+                       rel = insn_fetch(s64, 8, c->eip);
12847
 
+                       break;
12848
 
+               default:
12849
 
+                       DPRINTF("jnz: Invalid op_bytes\n");
12850
 
+                       goto cannot_emulate;
12851
 
+               }
12852
 
+               if (test_cc(c->b, ctxt->eflags))
12853
 
+                       JMP_REL(rel);
12854
 
+               c->dst.type = OP_NONE;
12855
 
+               break;
12856
 
+       }
12857
 
+       case 0xa3:
12858
 
+             bt:               /* bt */
12859
 
+               c->dst.type = OP_NONE;
12860
 
+               /* only subword offset */
12861
 
+               c->src.val &= (c->dst.bytes << 3) - 1;
12862
 
+               emulate_2op_SrcV_nobyte("bt", c->src, c->dst, ctxt->eflags);
12863
 
+               break;
12864
 
+       case 0xab:
12865
 
+             bts:              /* bts */
12866
 
+               /* only subword offset */
12867
 
+               c->src.val &= (c->dst.bytes << 3) - 1;
12868
 
+               emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags);
12869
 
+               break;
12870
 
+       case 0xb0 ... 0xb1:     /* cmpxchg */
12871
 
+               /*
12872
 
+                * Save real source value, then compare EAX against
12873
 
+                * destination.
12874
 
+                */
12875
 
+               c->src.orig_val = c->src.val;
12876
 
+               c->src.val = c->regs[VCPU_REGS_RAX];
12877
 
+               emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
12878
 
+               if (ctxt->eflags & EFLG_ZF) {
12879
 
+                       /* Success: write back to memory. */
12880
 
+                       c->dst.val = c->src.orig_val;
12881
 
+               } else {
12882
 
+                       /* Failure: write the value we saw to EAX. */
12883
 
+                       c->dst.type = OP_REG;
12884
 
+                       c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
12885
 
+               }
12886
 
+               break;
12887
 
+       case 0xb3:
12888
 
+             btr:              /* btr */
12889
 
+               /* only subword offset */
12890
 
+               c->src.val &= (c->dst.bytes << 3) - 1;
12891
 
+               emulate_2op_SrcV_nobyte("btr", c->src, c->dst, ctxt->eflags);
12892
 
+               break;
12893
 
+       case 0xb6 ... 0xb7:     /* movzx */
12894
 
+               c->dst.bytes = c->op_bytes;
12895
 
+               c->dst.val = (c->d & ByteOp) ? (u8) c->src.val
12896
 
+                                                      : (u16) c->src.val;
12897
 
+               break;
12898
 
+       case 0xba:              /* Grp8 */
12899
 
+               switch (c->modrm_reg & 3) {
12900
 
+               case 0:
12901
 
+                       goto bt;
12902
 
+               case 1:
12903
 
+                       goto bts;
12904
 
+               case 2:
12905
 
+                       goto btr;
12906
 
+               case 3:
12907
 
+                       goto btc;
12908
 
+               }
12909
 
+               break;
12910
 
+       case 0xbb:
12911
 
+             btc:              /* btc */
12912
 
+               /* only subword offset */
12913
 
+               c->src.val &= (c->dst.bytes << 3) - 1;
12914
 
+               emulate_2op_SrcV_nobyte("btc", c->src, c->dst, ctxt->eflags);
12915
 
+               break;
12916
 
+       case 0xbe ... 0xbf:     /* movsx */
12917
 
+               c->dst.bytes = c->op_bytes;
12918
 
+               c->dst.val = (c->d & ByteOp) ? (s8) c->src.val :
12919
 
+                                                       (s16) c->src.val;
12920
 
+               break;
12921
 
+       case 0xc3:              /* movnti */
12922
 
+               c->dst.bytes = c->op_bytes;
12923
 
+               c->dst.val = (c->op_bytes == 4) ? (u32) c->src.val :
12924
 
+                                                       (u64) c->src.val;
12925
 
+               break;
12926
 
+       case 0xc7:              /* Grp9 (cmpxchg8b) */
12927
 
+               rc = emulate_grp9(ctxt, ops, memop);
12928
 
+               if (rc != 0)
12929
 
+                       goto done;
12930
 
+               c->dst.type = OP_NONE;
12931
 
+               break;
12932
 
+       }
12933
 
+       goto writeback;
12934
 
+
12935
 
+cannot_emulate:
12936
 
+       DPRINTF("Cannot emulate %02x\n", c->b);
12937
 
+       c->eip = saved_eip;
12938
 
+       return -1;
12939
 
+}
12940
 
diff --git a/drivers/Kconfig b/drivers/Kconfig
12941
 
index f4076d9..08d4ae2 100644
12942
 
--- a/drivers/Kconfig
12943
 
+++ b/drivers/Kconfig
12944
 
@@ -90,8 +90,6 @@ source "drivers/dca/Kconfig"
12945
 
 
12946
 
 source "drivers/auxdisplay/Kconfig"
12947
 
 
12948
 
-source "drivers/kvm/Kconfig"
12949
 
-
12950
 
 source "drivers/uio/Kconfig"
12951
 
 
12952
 
 source "drivers/virtio/Kconfig"
12953
 
diff --git a/drivers/Makefile b/drivers/Makefile
12954
 
index 8cb37e3..513ae86 100644
12955
 
--- a/drivers/Makefile
12956
 
+++ b/drivers/Makefile
12957
 
@@ -47,7 +47,6 @@ obj-$(CONFIG_SPI)             += spi/
12958
 
 obj-$(CONFIG_PCCARD)           += pcmcia/
12959
 
 obj-$(CONFIG_DIO)              += dio/
12960
 
 obj-$(CONFIG_SBUS)             += sbus/
12961
 
-obj-$(CONFIG_KVM)              += kvm/
12962
 
 obj-$(CONFIG_ZORRO)            += zorro/
12963
 
 obj-$(CONFIG_MAC)              += macintosh/
12964
 
 obj-$(CONFIG_ATA_OVER_ETH)     += block/aoe/
12965
 
diff --git a/drivers/kvm/irq.h b/drivers/kvm/irq.h
12966
 
deleted file mode 100644
12967
 
index 11fc014..0000000
12968
 
--- a/drivers/kvm/irq.h
12969
 
+++ /dev/null
12970
 
@@ -1,165 +0,0 @@
12971
 
-/*
12972
 
- * irq.h: in kernel interrupt controller related definitions
12973
 
- * Copyright (c) 2007, Intel Corporation.
12974
 
- *
12975
 
- * This program is free software; you can redistribute it and/or modify it
12976
 
- * under the terms and conditions of the GNU General Public License,
12977
 
- * version 2, as published by the Free Software Foundation.
12978
 
- *
12979
 
- * This program is distributed in the hope it will be useful, but WITHOUT
12980
 
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12981
 
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12982
 
- * more details.
12983
 
- *
12984
 
- * You should have received a copy of the GNU General Public License along with
12985
 
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
12986
 
- * Place - Suite 330, Boston, MA 02111-1307 USA.
12987
 
- * Authors:
12988
 
- *   Yaozu (Eddie) Dong <Eddie.dong@intel.com>
12989
 
- *
12990
 
- */
12991
 
-
12992
 
-#ifndef __IRQ_H
12993
 
-#define __IRQ_H
12994
 
-
12995
 
-#include "kvm.h"
12996
 
-
12997
 
-typedef void irq_request_func(void *opaque, int level);
12998
 
-
12999
 
-struct kvm_kpic_state {
13000
 
-       u8 last_irr;    /* edge detection */
13001
 
-       u8 irr;         /* interrupt request register */
13002
 
-       u8 imr;         /* interrupt mask register */
13003
 
-       u8 isr;         /* interrupt service register */
13004
 
-       u8 priority_add;        /* highest irq priority */
13005
 
-       u8 irq_base;
13006
 
-       u8 read_reg_select;
13007
 
-       u8 poll;
13008
 
-       u8 special_mask;
13009
 
-       u8 init_state;
13010
 
-       u8 auto_eoi;
13011
 
-       u8 rotate_on_auto_eoi;
13012
 
-       u8 special_fully_nested_mode;
13013
 
-       u8 init4;               /* true if 4 byte init */
13014
 
-       u8 elcr;                /* PIIX edge/trigger selection */
13015
 
-       u8 elcr_mask;
13016
 
-       struct kvm_pic *pics_state;
13017
 
-};
13018
 
-
13019
 
-struct kvm_pic {
13020
 
-       struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */
13021
 
-       irq_request_func *irq_request;
13022
 
-       void *irq_request_opaque;
13023
 
-       int output;             /* intr from master PIC */
13024
 
-       struct kvm_io_device dev;
13025
 
-};
13026
 
-
13027
 
-struct kvm_pic *kvm_create_pic(struct kvm *kvm);
13028
 
-void kvm_pic_set_irq(void *opaque, int irq, int level);
13029
 
-int kvm_pic_read_irq(struct kvm_pic *s);
13030
 
-int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
13031
 
-int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
13032
 
-void kvm_pic_update_irq(struct kvm_pic *s);
13033
 
-
13034
 
-#define IOAPIC_NUM_PINS  KVM_IOAPIC_NUM_PINS
13035
 
-#define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */
13036
 
-#define IOAPIC_EDGE_TRIG  0
13037
 
-#define IOAPIC_LEVEL_TRIG 1
13038
 
-
13039
 
-#define IOAPIC_DEFAULT_BASE_ADDRESS  0xfec00000
13040
 
-#define IOAPIC_MEM_LENGTH            0x100
13041
 
-
13042
 
-/* Direct registers. */
13043
 
-#define IOAPIC_REG_SELECT  0x00
13044
 
-#define IOAPIC_REG_WINDOW  0x10
13045
 
-#define IOAPIC_REG_EOI     0x40        /* IA64 IOSAPIC only */
13046
 
-
13047
 
-/* Indirect registers. */
13048
 
-#define IOAPIC_REG_APIC_ID 0x00        /* x86 IOAPIC only */
13049
 
-#define IOAPIC_REG_VERSION 0x01
13050
 
-#define IOAPIC_REG_ARB_ID  0x02        /* x86 IOAPIC only */
13051
 
-
13052
 
-struct kvm_ioapic {
13053
 
-       u64 base_address;
13054
 
-       u32 ioregsel;
13055
 
-       u32 id;
13056
 
-       u32 irr;
13057
 
-       u32 pad;
13058
 
-       union ioapic_redir_entry {
13059
 
-               u64 bits;
13060
 
-               struct {
13061
 
-                       u8 vector;
13062
 
-                       u8 delivery_mode:3;
13063
 
-                       u8 dest_mode:1;
13064
 
-                       u8 delivery_status:1;
13065
 
-                       u8 polarity:1;
13066
 
-                       u8 remote_irr:1;
13067
 
-                       u8 trig_mode:1;
13068
 
-                       u8 mask:1;
13069
 
-                       u8 reserve:7;
13070
 
-                       u8 reserved[4];
13071
 
-                       u8 dest_id;
13072
 
-               } fields;
13073
 
-       } redirtbl[IOAPIC_NUM_PINS];
13074
 
-       struct kvm_io_device dev;
13075
 
-       struct kvm *kvm;
13076
 
-};
13077
 
-
13078
 
-struct kvm_lapic {
13079
 
-       unsigned long base_address;
13080
 
-       struct kvm_io_device dev;
13081
 
-       struct {
13082
 
-               atomic_t pending;
13083
 
-               s64 period;     /* unit: ns */
13084
 
-               u32 divide_count;
13085
 
-               ktime_t last_update;
13086
 
-               struct hrtimer dev;
13087
 
-       } timer;
13088
 
-       struct kvm_vcpu *vcpu;
13089
 
-       struct page *regs_page;
13090
 
-       void *regs;
13091
 
-};
13092
 
-
13093
 
-#ifdef DEBUG
13094
 
-#define ASSERT(x)                                                      \
13095
 
-do {                                                                   \
13096
 
-       if (!(x)) {                                                     \
13097
 
-               printk(KERN_EMERG "assertion failed %s: %d: %s\n",      \
13098
 
-                      __FILE__, __LINE__, #x);                         \
13099
 
-               BUG();                                                  \
13100
 
-       }                                                               \
13101
 
-} while (0)
13102
 
-#else
13103
 
-#define ASSERT(x) do { } while (0)
13104
 
-#endif
13105
 
-
13106
 
-void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
13107
 
-int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
13108
 
-int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu);
13109
 
-int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu);
13110
 
-int kvm_create_lapic(struct kvm_vcpu *vcpu);
13111
 
-void kvm_lapic_reset(struct kvm_vcpu *vcpu);
13112
 
-void kvm_free_apic(struct kvm_lapic *apic);
13113
 
-u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
13114
 
-void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
13115
 
-void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
13116
 
-struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector,
13117
 
-                                      unsigned long bitmap);
13118
 
-u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
13119
 
-void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
13120
 
-int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
13121
 
-void kvm_ioapic_update_eoi(struct kvm *kvm, int vector);
13122
 
-int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
13123
 
-int kvm_apic_set_irq(struct kvm_lapic *apic, u8 vec, u8 trig);
13124
 
-void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu);
13125
 
-int kvm_ioapic_init(struct kvm *kvm);
13126
 
-void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
13127
 
-int kvm_lapic_enabled(struct kvm_vcpu *vcpu);
13128
 
-int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
13129
 
-void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
13130
 
-void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
13131
 
-void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
13132
 
-void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
13133
 
-void kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
13134
 
-
13135
 
-#endif
13136
 
diff --git a/drivers/kvm/segment_descriptor.h b/drivers/kvm/segment_descriptor.h
13137
 
deleted file mode 100644
13138
 
index 71fdf45..0000000
13139
 
--- a/drivers/kvm/segment_descriptor.h
13140
 
+++ /dev/null
13141
 
@@ -1,17 +0,0 @@
13142
 
-struct segment_descriptor {
13143
 
-       u16 limit_low;
13144
 
-       u16 base_low;
13145
 
-       u8  base_mid;
13146
 
-       u8  type : 4;
13147
 
-       u8  system : 1;
13148
 
-       u8  dpl : 2;
13149
 
-       u8  present : 1;
13150
 
-       u8  limit_high : 4;
13151
 
-       u8  avl : 1;
13152
 
-       u8  long_mode : 1;
13153
 
-       u8  default_op : 1;
13154
 
-       u8  granularity : 1;
13155
 
-       u8  base_high;
13156
 
-} __attribute__((packed));
13157
 
-
13158
 
-
13159
 
diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c
13160
 
deleted file mode 100644
13161
 
index bd46de6..0000000
13162
 
--- a/drivers/kvm/x86_emulate.c
13163
 
+++ /dev/null
13164
 
@@ -1,1662 +0,0 @@
13165
 
-/******************************************************************************
13166
 
- * x86_emulate.c
13167
 
- *
13168
 
- * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
13169
 
- *
13170
 
- * Copyright (c) 2005 Keir Fraser
13171
 
- *
13172
 
- * Linux coding style, mod r/m decoder, segment base fixes, real-mode
13173
 
- * privileged instructions:
13174
 
- *
13175
 
- * Copyright (C) 2006 Qumranet
13176
 
- *
13177
 
- *   Avi Kivity <avi@qumranet.com>
13178
 
- *   Yaniv Kamay <yaniv@qumranet.com>
13179
 
- *
13180
 
- * This work is licensed under the terms of the GNU GPL, version 2.  See
13181
 
- * the COPYING file in the top-level directory.
13182
 
- *
13183
 
- * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
13184
 
- */
13185
 
-
13186
 
-#ifndef __KERNEL__
13187
 
-#include <stdio.h>
13188
 
-#include <stdint.h>
13189
 
-#include <public/xen.h>
13190
 
-#define DPRINTF(_f, _a ...) printf( _f , ## _a )
13191
 
-#else
13192
 
-#include "kvm.h"
13193
 
-#define DPRINTF(x...) do {} while (0)
13194
 
-#endif
13195
 
-#include "x86_emulate.h"
13196
 
-#include <linux/module.h>
13197
 
-
13198
 
-/*
13199
 
- * Opcode effective-address decode tables.
13200
 
- * Note that we only emulate instructions that have at least one memory
13201
 
- * operand (excluding implicit stack references). We assume that stack
13202
 
- * references and instruction fetches will never occur in special memory
13203
 
- * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
13204
 
- * not be handled.
13205
 
- */
13206
 
-
13207
 
-/* Operand sizes: 8-bit operands or specified/overridden size. */
13208
 
-#define ByteOp      (1<<0)     /* 8-bit operands. */
13209
 
-/* Destination operand type. */
13210
 
-#define ImplicitOps (1<<1)     /* Implicit in opcode. No generic decode. */
13211
 
-#define DstReg      (2<<1)     /* Register operand. */
13212
 
-#define DstMem      (3<<1)     /* Memory operand. */
13213
 
-#define DstMask     (3<<1)
13214
 
-/* Source operand type. */
13215
 
-#define SrcNone     (0<<3)     /* No source operand. */
13216
 
-#define SrcImplicit (0<<3)     /* Source operand is implicit in the opcode. */
13217
 
-#define SrcReg      (1<<3)     /* Register operand. */
13218
 
-#define SrcMem      (2<<3)     /* Memory operand. */
13219
 
-#define SrcMem16    (3<<3)     /* Memory operand (16-bit). */
13220
 
-#define SrcMem32    (4<<3)     /* Memory operand (32-bit). */
13221
 
-#define SrcImm      (5<<3)     /* Immediate operand. */
13222
 
-#define SrcImmByte  (6<<3)     /* 8-bit sign-extended immediate operand. */
13223
 
-#define SrcMask     (7<<3)
13224
 
-/* Generic ModRM decode. */
13225
 
-#define ModRM       (1<<6)
13226
 
-/* Destination is only written; never read. */
13227
 
-#define Mov         (1<<7)
13228
 
-#define BitOp       (1<<8)
13229
 
-
13230
 
-static u8 opcode_table[256] = {
13231
 
-       /* 0x00 - 0x07 */
13232
 
-       ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
13233
 
-       ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
13234
 
-       0, 0, 0, 0,
13235
 
-       /* 0x08 - 0x0F */
13236
 
-       ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
13237
 
-       ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
13238
 
-       0, 0, 0, 0,
13239
 
-       /* 0x10 - 0x17 */
13240
 
-       ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
13241
 
-       ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
13242
 
-       0, 0, 0, 0,
13243
 
-       /* 0x18 - 0x1F */
13244
 
-       ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
13245
 
-       ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
13246
 
-       0, 0, 0, 0,
13247
 
-       /* 0x20 - 0x27 */
13248
 
-       ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
13249
 
-       ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
13250
 
-       SrcImmByte, SrcImm, 0, 0,
13251
 
-       /* 0x28 - 0x2F */
13252
 
-       ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
13253
 
-       ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
13254
 
-       0, 0, 0, 0,
13255
 
-       /* 0x30 - 0x37 */
13256
 
-       ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
13257
 
-       ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
13258
 
-       0, 0, 0, 0,
13259
 
-       /* 0x38 - 0x3F */
13260
 
-       ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
13261
 
-       ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
13262
 
-       0, 0, 0, 0,
13263
 
-       /* 0x40 - 0x4F */
13264
 
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13265
 
-       /* 0x50 - 0x57 */
13266
 
-       ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
13267
 
-       ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
13268
 
-       /* 0x58 - 0x5F */
13269
 
-       ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
13270
 
-       ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
13271
 
-       /* 0x60 - 0x67 */
13272
 
-       0, 0, 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ ,
13273
 
-       0, 0, 0, 0,
13274
 
-       /* 0x68 - 0x6F */
13275
 
-       0, 0, ImplicitOps|Mov, 0,
13276
 
-       SrcNone  | ByteOp  | ImplicitOps, SrcNone  | ImplicitOps, /* insb, insw/insd */
13277
 
-       SrcNone  | ByteOp  | ImplicitOps, SrcNone  | ImplicitOps, /* outsb, outsw/outsd */
13278
 
-       /* 0x70 - 0x77 */
13279
 
-       ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
13280
 
-       ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
13281
 
-       /* 0x78 - 0x7F */
13282
 
-       ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
13283
 
-       ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
13284
 
-       /* 0x80 - 0x87 */
13285
 
-       ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
13286
 
-       ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM,
13287
 
-       ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
13288
 
-       ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
13289
 
-       /* 0x88 - 0x8F */
13290
 
-       ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov,
13291
 
-       ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
13292
 
-       0, ModRM | DstReg, 0, DstMem | SrcNone | ModRM | Mov,
13293
 
-       /* 0x90 - 0x9F */
13294
 
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps, ImplicitOps, 0, 0,
13295
 
-       /* 0xA0 - 0xA7 */
13296
 
-       ByteOp | DstReg | SrcMem | Mov, DstReg | SrcMem | Mov,
13297
 
-       ByteOp | DstMem | SrcReg | Mov, DstMem | SrcReg | Mov,
13298
 
-       ByteOp | ImplicitOps | Mov, ImplicitOps | Mov,
13299
 
-       ByteOp | ImplicitOps, ImplicitOps,
13300
 
-       /* 0xA8 - 0xAF */
13301
 
-       0, 0, ByteOp | ImplicitOps | Mov, ImplicitOps | Mov,
13302
 
-       ByteOp | ImplicitOps | Mov, ImplicitOps | Mov,
13303
 
-       ByteOp | ImplicitOps, ImplicitOps,
13304
 
-       /* 0xB0 - 0xBF */
13305
 
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13306
 
-       /* 0xC0 - 0xC7 */
13307
 
-       ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM,
13308
 
-       0, ImplicitOps, 0, 0,
13309
 
-       ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov,
13310
 
-       /* 0xC8 - 0xCF */
13311
 
-       0, 0, 0, 0, 0, 0, 0, 0,
13312
 
-       /* 0xD0 - 0xD7 */
13313
 
-       ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
13314
 
-       ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
13315
 
-       0, 0, 0, 0,
13316
 
-       /* 0xD8 - 0xDF */
13317
 
-       0, 0, 0, 0, 0, 0, 0, 0,
13318
 
-       /* 0xE0 - 0xE7 */
13319
 
-       0, 0, 0, 0, 0, 0, 0, 0,
13320
 
-       /* 0xE8 - 0xEF */
13321
 
-       ImplicitOps, SrcImm|ImplicitOps, 0, SrcImmByte|ImplicitOps, 0, 0, 0, 0,
13322
 
-       /* 0xF0 - 0xF7 */
13323
 
-       0, 0, 0, 0,
13324
 
-       ImplicitOps, 0,
13325
 
-       ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
13326
 
-       /* 0xF8 - 0xFF */
13327
 
-       0, 0, 0, 0,
13328
 
-       0, 0, ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM
13329
 
-};
13330
 
-
13331
 
-static u16 twobyte_table[256] = {
13332
 
-       /* 0x00 - 0x0F */
13333
 
-       0, SrcMem | ModRM | DstReg, 0, 0, 0, 0, ImplicitOps, 0,
13334
 
-       ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0,
13335
 
-       /* 0x10 - 0x1F */
13336
 
-       0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0,
13337
 
-       /* 0x20 - 0x2F */
13338
 
-       ModRM | ImplicitOps, ModRM, ModRM | ImplicitOps, ModRM, 0, 0, 0, 0,
13339
 
-       0, 0, 0, 0, 0, 0, 0, 0,
13340
 
-       /* 0x30 - 0x3F */
13341
 
-       ImplicitOps, 0, ImplicitOps, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13342
 
-       /* 0x40 - 0x47 */
13343
 
-       DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
13344
 
-       DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
13345
 
-       DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
13346
 
-       DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
13347
 
-       /* 0x48 - 0x4F */
13348
 
-       DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
13349
 
-       DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
13350
 
-       DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
13351
 
-       DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
13352
 
-       /* 0x50 - 0x5F */
13353
 
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13354
 
-       /* 0x60 - 0x6F */
13355
 
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13356
 
-       /* 0x70 - 0x7F */
13357
 
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13358
 
-       /* 0x80 - 0x8F */
13359
 
-       ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
13360
 
-       ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
13361
 
-       ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
13362
 
-       ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
13363
 
-       /* 0x90 - 0x9F */
13364
 
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13365
 
-       /* 0xA0 - 0xA7 */
13366
 
-       0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0,
13367
 
-       /* 0xA8 - 0xAF */
13368
 
-       0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0,
13369
 
-       /* 0xB0 - 0xB7 */
13370
 
-       ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0,
13371
 
-           DstMem | SrcReg | ModRM | BitOp,
13372
 
-       0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
13373
 
-           DstReg | SrcMem16 | ModRM | Mov,
13374
 
-       /* 0xB8 - 0xBF */
13375
 
-       0, 0, DstMem | SrcImmByte | ModRM, DstMem | SrcReg | ModRM | BitOp,
13376
 
-       0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
13377
 
-           DstReg | SrcMem16 | ModRM | Mov,
13378
 
-       /* 0xC0 - 0xCF */
13379
 
-       0, 0, 0, DstMem | SrcReg | ModRM | Mov, 0, 0, 0, ImplicitOps | ModRM,
13380
 
-       0, 0, 0, 0, 0, 0, 0, 0,
13381
 
-       /* 0xD0 - 0xDF */
13382
 
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13383
 
-       /* 0xE0 - 0xEF */
13384
 
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13385
 
-       /* 0xF0 - 0xFF */
13386
 
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
13387
 
-};
13388
 
-
13389
 
-/* Type, address-of, and value of an instruction's operand. */
13390
 
-struct operand {
13391
 
-       enum { OP_REG, OP_MEM, OP_IMM } type;
13392
 
-       unsigned int bytes;
13393
 
-       unsigned long val, orig_val, *ptr;
13394
 
-};
13395
 
-
13396
 
-/* EFLAGS bit definitions. */
13397
 
-#define EFLG_OF (1<<11)
13398
 
-#define EFLG_DF (1<<10)
13399
 
-#define EFLG_SF (1<<7)
13400
 
-#define EFLG_ZF (1<<6)
13401
 
-#define EFLG_AF (1<<4)
13402
 
-#define EFLG_PF (1<<2)
13403
 
-#define EFLG_CF (1<<0)
13404
 
-
13405
 
-/*
13406
 
- * Instruction emulation:
13407
 
- * Most instructions are emulated directly via a fragment of inline assembly
13408
 
- * code. This allows us to save/restore EFLAGS and thus very easily pick up
13409
 
- * any modified flags.
13410
 
- */
13411
 
-
13412
 
-#if defined(CONFIG_X86_64)
13413
 
-#define _LO32 "k"              /* force 32-bit operand */
13414
 
-#define _STK  "%%rsp"          /* stack pointer */
13415
 
-#elif defined(__i386__)
13416
 
-#define _LO32 ""               /* force 32-bit operand */
13417
 
-#define _STK  "%%esp"          /* stack pointer */
13418
 
-#endif
13419
 
-
13420
 
-/*
13421
 
- * These EFLAGS bits are restored from saved value during emulation, and
13422
 
- * any changes are written back to the saved value after emulation.
13423
 
- */
13424
 
-#define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF)
13425
 
-
13426
 
-/* Before executing instruction: restore necessary bits in EFLAGS. */
13427
 
-#define _PRE_EFLAGS(_sav, _msk, _tmp) \
13428
 
-       /* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); */        \
13429
 
-       "push %"_sav"; "                                        \
13430
 
-       "movl %"_msk",%"_LO32 _tmp"; "                          \
13431
 
-       "andl %"_LO32 _tmp",("_STK"); "                         \
13432
 
-       "pushf; "                                               \
13433
 
-       "notl %"_LO32 _tmp"; "                                  \
13434
 
-       "andl %"_LO32 _tmp",("_STK"); "                         \
13435
 
-       "pop  %"_tmp"; "                                        \
13436
 
-       "orl  %"_LO32 _tmp",("_STK"); "                         \
13437
 
-       "popf; "                                                \
13438
 
-       /* _sav &= ~msk; */                                     \
13439
 
-       "movl %"_msk",%"_LO32 _tmp"; "                          \
13440
 
-       "notl %"_LO32 _tmp"; "                                  \
13441
 
-       "andl %"_LO32 _tmp",%"_sav"; "
13442
 
-
13443
 
-/* After executing instruction: write-back necessary bits in EFLAGS. */
13444
 
-#define _POST_EFLAGS(_sav, _msk, _tmp) \
13445
 
-       /* _sav |= EFLAGS & _msk; */            \
13446
 
-       "pushf; "                               \
13447
 
-       "pop  %"_tmp"; "                        \
13448
 
-       "andl %"_msk",%"_LO32 _tmp"; "          \
13449
 
-       "orl  %"_LO32 _tmp",%"_sav"; "
13450
 
-
13451
 
-/* Raw emulation: instruction has two explicit operands. */
13452
 
-#define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \
13453
 
-       do {                                                                \
13454
 
-               unsigned long _tmp;                                         \
13455
 
-                                                                           \
13456
 
-               switch ((_dst).bytes) {                                     \
13457
 
-               case 2:                                                     \
13458
 
-                       __asm__ __volatile__ (                              \
13459
 
-                               _PRE_EFLAGS("0","4","2")                    \
13460
 
-                               _op"w %"_wx"3,%1; "                         \
13461
 
-                               _POST_EFLAGS("0","4","2")                   \
13462
 
-                               : "=m" (_eflags), "=m" ((_dst).val),        \
13463
 
-                                 "=&r" (_tmp)                              \
13464
 
-                               : _wy ((_src).val), "i" (EFLAGS_MASK) );    \
13465
 
-                       break;                                              \
13466
 
-               case 4:                                                     \
13467
 
-                       __asm__ __volatile__ (                              \
13468
 
-                               _PRE_EFLAGS("0","4","2")                    \
13469
 
-                               _op"l %"_lx"3,%1; "                         \
13470
 
-                               _POST_EFLAGS("0","4","2")                   \
13471
 
-                               : "=m" (_eflags), "=m" ((_dst).val),        \
13472
 
-                                 "=&r" (_tmp)                              \
13473
 
-                               : _ly ((_src).val), "i" (EFLAGS_MASK) );    \
13474
 
-                       break;                                              \
13475
 
-               case 8:                                                     \
13476
 
-                       __emulate_2op_8byte(_op, _src, _dst,                \
13477
 
-                                           _eflags, _qx, _qy);             \
13478
 
-                       break;                                              \
13479
 
-               }                                                           \
13480
 
-       } while (0)
13481
 
-
13482
 
-#define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \
13483
 
-       do {                                                                 \
13484
 
-               unsigned long _tmp;                                          \
13485
 
-               switch ( (_dst).bytes )                                      \
13486
 
-               {                                                            \
13487
 
-               case 1:                                                      \
13488
 
-                       __asm__ __volatile__ (                               \
13489
 
-                               _PRE_EFLAGS("0","4","2")                     \
13490
 
-                               _op"b %"_bx"3,%1; "                          \
13491
 
-                               _POST_EFLAGS("0","4","2")                    \
13492
 
-                               : "=m" (_eflags), "=m" ((_dst).val),         \
13493
 
-                                 "=&r" (_tmp)                               \
13494
 
-                               : _by ((_src).val), "i" (EFLAGS_MASK) );     \
13495
 
-                       break;                                               \
13496
 
-               default:                                                     \
13497
 
-                       __emulate_2op_nobyte(_op, _src, _dst, _eflags,       \
13498
 
-                                            _wx, _wy, _lx, _ly, _qx, _qy);  \
13499
 
-                       break;                                               \
13500
 
-               }                                                            \
13501
 
-       } while (0)
13502
 
-
13503
 
-/* Source operand is byte-sized and may be restricted to just %cl. */
13504
 
-#define emulate_2op_SrcB(_op, _src, _dst, _eflags)                      \
13505
 
-       __emulate_2op(_op, _src, _dst, _eflags,                         \
13506
 
-                     "b", "c", "b", "c", "b", "c", "b", "c")
13507
 
-
13508
 
-/* Source operand is byte, word, long or quad sized. */
13509
 
-#define emulate_2op_SrcV(_op, _src, _dst, _eflags)                      \
13510
 
-       __emulate_2op(_op, _src, _dst, _eflags,                         \
13511
 
-                     "b", "q", "w", "r", _LO32, "r", "", "r")
13512
 
-
13513
 
-/* Source operand is word, long or quad sized. */
13514
 
-#define emulate_2op_SrcV_nobyte(_op, _src, _dst, _eflags)               \
13515
 
-       __emulate_2op_nobyte(_op, _src, _dst, _eflags,                  \
13516
 
-                            "w", "r", _LO32, "r", "", "r")
13517
 
-
13518
 
-/* Instruction has only one explicit operand (no source operand). */
13519
 
-#define emulate_1op(_op, _dst, _eflags)                                    \
13520
 
-       do {                                                            \
13521
 
-               unsigned long _tmp;                                     \
13522
 
-                                                                       \
13523
 
-               switch ( (_dst).bytes )                                 \
13524
 
-               {                                                       \
13525
 
-               case 1:                                                 \
13526
 
-                       __asm__ __volatile__ (                          \
13527
 
-                               _PRE_EFLAGS("0","3","2")                \
13528
 
-                               _op"b %1; "                             \
13529
 
-                               _POST_EFLAGS("0","3","2")               \
13530
 
-                               : "=m" (_eflags), "=m" ((_dst).val),    \
13531
 
-                                 "=&r" (_tmp)                          \
13532
 
-                               : "i" (EFLAGS_MASK) );                  \
13533
 
-                       break;                                          \
13534
 
-               case 2:                                                 \
13535
 
-                       __asm__ __volatile__ (                          \
13536
 
-                               _PRE_EFLAGS("0","3","2")                \
13537
 
-                               _op"w %1; "                             \
13538
 
-                               _POST_EFLAGS("0","3","2")               \
13539
 
-                               : "=m" (_eflags), "=m" ((_dst).val),    \
13540
 
-                                 "=&r" (_tmp)                          \
13541
 
-                               : "i" (EFLAGS_MASK) );                  \
13542
 
-                       break;                                          \
13543
 
-               case 4:                                                 \
13544
 
-                       __asm__ __volatile__ (                          \
13545
 
-                               _PRE_EFLAGS("0","3","2")                \
13546
 
-                               _op"l %1; "                             \
13547
 
-                               _POST_EFLAGS("0","3","2")               \
13548
 
-                               : "=m" (_eflags), "=m" ((_dst).val),    \
13549
 
-                                 "=&r" (_tmp)                          \
13550
 
-                               : "i" (EFLAGS_MASK) );                  \
13551
 
-                       break;                                          \
13552
 
-               case 8:                                                 \
13553
 
-                       __emulate_1op_8byte(_op, _dst, _eflags);        \
13554
 
-                       break;                                          \
13555
 
-               }                                                       \
13556
 
-       } while (0)
13557
 
-
13558
 
-/* Emulate an instruction with quadword operands (x86/64 only). */
13559
 
-#if defined(CONFIG_X86_64)
13560
 
-#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy)           \
13561
 
-       do {                                                              \
13562
 
-               __asm__ __volatile__ (                                    \
13563
 
-                       _PRE_EFLAGS("0","4","2")                          \
13564
 
-                       _op"q %"_qx"3,%1; "                               \
13565
 
-                       _POST_EFLAGS("0","4","2")                         \
13566
 
-                       : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
13567
 
-                       : _qy ((_src).val), "i" (EFLAGS_MASK) );          \
13568
 
-       } while (0)
13569
 
-
13570
 
-#define __emulate_1op_8byte(_op, _dst, _eflags)                           \
13571
 
-       do {                                                              \
13572
 
-               __asm__ __volatile__ (                                    \
13573
 
-                       _PRE_EFLAGS("0","3","2")                          \
13574
 
-                       _op"q %1; "                                       \
13575
 
-                       _POST_EFLAGS("0","3","2")                         \
13576
 
-                       : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
13577
 
-                       : "i" (EFLAGS_MASK) );                            \
13578
 
-       } while (0)
13579
 
-
13580
 
-#elif defined(__i386__)
13581
 
-#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy)
13582
 
-#define __emulate_1op_8byte(_op, _dst, _eflags)
13583
 
-#endif                         /* __i386__ */
13584
 
-
13585
 
-/* Fetch next part of the instruction being emulated. */
13586
 
-#define insn_fetch(_type, _size, _eip)                                  \
13587
 
-({     unsigned long _x;                                               \
13588
 
-       rc = ops->read_std((unsigned long)(_eip) + ctxt->cs_base, &_x,  \
13589
 
-                                                  (_size), ctxt->vcpu); \
13590
 
-       if ( rc != 0 )                                                  \
13591
 
-               goto done;                                              \
13592
 
-       (_eip) += (_size);                                              \
13593
 
-       (_type)_x;                                                      \
13594
 
-})
13595
 
-
13596
 
-/* Access/update address held in a register, based on addressing mode. */
13597
 
-#define address_mask(reg)                                              \
13598
 
-       ((ad_bytes == sizeof(unsigned long)) ?                          \
13599
 
-               (reg) : ((reg) & ((1UL << (ad_bytes << 3)) - 1)))
13600
 
-#define register_address(base, reg)                                     \
13601
 
-       ((base) + address_mask(reg))
13602
 
-#define register_address_increment(reg, inc)                            \
13603
 
-       do {                                                            \
13604
 
-               /* signed type ensures sign extension to long */        \
13605
 
-               int _inc = (inc);                                       \
13606
 
-               if ( ad_bytes == sizeof(unsigned long) )                \
13607
 
-                       (reg) += _inc;                                  \
13608
 
-               else                                                    \
13609
 
-                       (reg) = ((reg) & ~((1UL << (ad_bytes << 3)) - 1)) | \
13610
 
-                          (((reg) + _inc) & ((1UL << (ad_bytes << 3)) - 1)); \
13611
 
-       } while (0)
13612
 
-
13613
 
-#define JMP_REL(rel)                                                   \
13614
 
-       do {                                                            \
13615
 
-               register_address_increment(_eip, rel);                  \
13616
 
-       } while (0)
13617
 
-
13618
 
-/*
13619
 
- * Given the 'reg' portion of a ModRM byte, and a register block, return a
13620
 
- * pointer into the block that addresses the relevant register.
13621
 
- * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
13622
 
- */
13623
 
-static void *decode_register(u8 modrm_reg, unsigned long *regs,
13624
 
-                            int highbyte_regs)
13625
 
-{
13626
 
-       void *p;
13627
 
-
13628
 
-       p = &regs[modrm_reg];
13629
 
-       if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
13630
 
-               p = (unsigned char *)&regs[modrm_reg & 3] + 1;
13631
 
-       return p;
13632
 
-}
13633
 
-
13634
 
-static int read_descriptor(struct x86_emulate_ctxt *ctxt,
13635
 
-                          struct x86_emulate_ops *ops,
13636
 
-                          void *ptr,
13637
 
-                          u16 *size, unsigned long *address, int op_bytes)
13638
 
-{
13639
 
-       int rc;
13640
 
-
13641
 
-       if (op_bytes == 2)
13642
 
-               op_bytes = 3;
13643
 
-       *address = 0;
13644
 
-       rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2,
13645
 
-                          ctxt->vcpu);
13646
 
-       if (rc)
13647
 
-               return rc;
13648
 
-       rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes,
13649
 
-                          ctxt->vcpu);
13650
 
-       return rc;
13651
 
-}
13652
 
-
13653
 
-static int test_cc(unsigned int condition, unsigned int flags)
13654
 
-{
13655
 
-       int rc = 0;
13656
 
-
13657
 
-       switch ((condition & 15) >> 1) {
13658
 
-       case 0: /* o */
13659
 
-               rc |= (flags & EFLG_OF);
13660
 
-               break;
13661
 
-       case 1: /* b/c/nae */
13662
 
-               rc |= (flags & EFLG_CF);
13663
 
-               break;
13664
 
-       case 2: /* z/e */
13665
 
-               rc |= (flags & EFLG_ZF);
13666
 
-               break;
13667
 
-       case 3: /* be/na */
13668
 
-               rc |= (flags & (EFLG_CF|EFLG_ZF));
13669
 
-               break;
13670
 
-       case 4: /* s */
13671
 
-               rc |= (flags & EFLG_SF);
13672
 
-               break;
13673
 
-       case 5: /* p/pe */
13674
 
-               rc |= (flags & EFLG_PF);
13675
 
-               break;
13676
 
-       case 7: /* le/ng */
13677
 
-               rc |= (flags & EFLG_ZF);
13678
 
-               /* fall through */
13679
 
-       case 6: /* l/nge */
13680
 
-               rc |= (!(flags & EFLG_SF) != !(flags & EFLG_OF));
13681
 
-               break;
13682
 
-       }
13683
 
-
13684
 
-       /* Odd condition identifiers (lsb == 1) have inverted sense. */
13685
 
-       return (!!rc ^ (condition & 1));
13686
 
-}
13687
 
-
13688
 
-int
13689
 
-x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
13690
 
-{
13691
 
-       unsigned d;
13692
 
-       u8 b, sib, twobyte = 0, rex_prefix = 0;
13693
 
-       u8 modrm, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0;
13694
 
-       unsigned long *override_base = NULL;
13695
 
-       unsigned int op_bytes, ad_bytes, lock_prefix = 0, rep_prefix = 0, i;
13696
 
-       int rc = 0;
13697
 
-       struct operand src, dst;
13698
 
-       unsigned long cr2 = ctxt->cr2;
13699
 
-       int mode = ctxt->mode;
13700
 
-       unsigned long modrm_ea;
13701
 
-       int use_modrm_ea, index_reg = 0, base_reg = 0, scale, rip_relative = 0;
13702
 
-       int no_wb = 0;
13703
 
-       u64 msr_data;
13704
 
-
13705
 
-       /* Shadow copy of register state. Committed on successful emulation. */
13706
 
-       unsigned long _regs[NR_VCPU_REGS];
13707
 
-       unsigned long _eip = ctxt->vcpu->rip, _eflags = ctxt->eflags;
13708
 
-       unsigned long modrm_val = 0;
13709
 
-
13710
 
-       memcpy(_regs, ctxt->vcpu->regs, sizeof _regs);
13711
 
-
13712
 
-       switch (mode) {
13713
 
-       case X86EMUL_MODE_REAL:
13714
 
-       case X86EMUL_MODE_PROT16:
13715
 
-               op_bytes = ad_bytes = 2;
13716
 
-               break;
13717
 
-       case X86EMUL_MODE_PROT32:
13718
 
-               op_bytes = ad_bytes = 4;
13719
 
-               break;
13720
 
-#ifdef CONFIG_X86_64
13721
 
-       case X86EMUL_MODE_PROT64:
13722
 
-               op_bytes = 4;
13723
 
-               ad_bytes = 8;
13724
 
-               break;
13725
 
-#endif
13726
 
-       default:
13727
 
-               return -1;
13728
 
-       }
13729
 
-
13730
 
-       /* Legacy prefixes. */
13731
 
-       for (i = 0; i < 8; i++) {
13732
 
-               switch (b = insn_fetch(u8, 1, _eip)) {
13733
 
-               case 0x66:      /* operand-size override */
13734
 
-                       op_bytes ^= 6;  /* switch between 2/4 bytes */
13735
 
-                       break;
13736
 
-               case 0x67:      /* address-size override */
13737
 
-                       if (mode == X86EMUL_MODE_PROT64)
13738
 
-                               ad_bytes ^= 12; /* switch between 4/8 bytes */
13739
 
-                       else
13740
 
-                               ad_bytes ^= 6;  /* switch between 2/4 bytes */
13741
 
-                       break;
13742
 
-               case 0x2e:      /* CS override */
13743
 
-                       override_base = &ctxt->cs_base;
13744
 
-                       break;
13745
 
-               case 0x3e:      /* DS override */
13746
 
-                       override_base = &ctxt->ds_base;
13747
 
-                       break;
13748
 
-               case 0x26:      /* ES override */
13749
 
-                       override_base = &ctxt->es_base;
13750
 
-                       break;
13751
 
-               case 0x64:      /* FS override */
13752
 
-                       override_base = &ctxt->fs_base;
13753
 
-                       break;
13754
 
-               case 0x65:      /* GS override */
13755
 
-                       override_base = &ctxt->gs_base;
13756
 
-                       break;
13757
 
-               case 0x36:      /* SS override */
13758
 
-                       override_base = &ctxt->ss_base;
13759
 
-                       break;
13760
 
-               case 0xf0:      /* LOCK */
13761
 
-                       lock_prefix = 1;
13762
 
-                       break;
13763
 
-               case 0xf2:      /* REPNE/REPNZ */
13764
 
-               case 0xf3:      /* REP/REPE/REPZ */
13765
 
-                       rep_prefix = 1;
13766
 
-                       break;
13767
 
-               default:
13768
 
-                       goto done_prefixes;
13769
 
-               }
13770
 
-       }
13771
 
-
13772
 
-done_prefixes:
13773
 
-
13774
 
-       /* REX prefix. */
13775
 
-       if ((mode == X86EMUL_MODE_PROT64) && ((b & 0xf0) == 0x40)) {
13776
 
-               rex_prefix = b;
13777
 
-               if (b & 8)
13778
 
-                       op_bytes = 8;   /* REX.W */
13779
 
-               modrm_reg = (b & 4) << 1;       /* REX.R */
13780
 
-               index_reg = (b & 2) << 2; /* REX.X */
13781
 
-               modrm_rm = base_reg = (b & 1) << 3; /* REG.B */
13782
 
-               b = insn_fetch(u8, 1, _eip);
13783
 
-       }
13784
 
-
13785
 
-       /* Opcode byte(s). */
13786
 
-       d = opcode_table[b];
13787
 
-       if (d == 0) {
13788
 
-               /* Two-byte opcode? */
13789
 
-               if (b == 0x0f) {
13790
 
-                       twobyte = 1;
13791
 
-                       b = insn_fetch(u8, 1, _eip);
13792
 
-                       d = twobyte_table[b];
13793
 
-               }
13794
 
-
13795
 
-               /* Unrecognised? */
13796
 
-               if (d == 0)
13797
 
-                       goto cannot_emulate;
13798
 
-       }
13799
 
-
13800
 
-       /* ModRM and SIB bytes. */
13801
 
-       if (d & ModRM) {
13802
 
-               modrm = insn_fetch(u8, 1, _eip);
13803
 
-               modrm_mod |= (modrm & 0xc0) >> 6;
13804
 
-               modrm_reg |= (modrm & 0x38) >> 3;
13805
 
-               modrm_rm |= (modrm & 0x07);
13806
 
-               modrm_ea = 0;
13807
 
-               use_modrm_ea = 1;
13808
 
-
13809
 
-               if (modrm_mod == 3) {
13810
 
-                       modrm_val = *(unsigned long *)
13811
 
-                               decode_register(modrm_rm, _regs, d & ByteOp);
13812
 
-                       goto modrm_done;
13813
 
-               }
13814
 
-
13815
 
-               if (ad_bytes == 2) {
13816
 
-                       unsigned bx = _regs[VCPU_REGS_RBX];
13817
 
-                       unsigned bp = _regs[VCPU_REGS_RBP];
13818
 
-                       unsigned si = _regs[VCPU_REGS_RSI];
13819
 
-                       unsigned di = _regs[VCPU_REGS_RDI];
13820
 
-
13821
 
-                       /* 16-bit ModR/M decode. */
13822
 
-                       switch (modrm_mod) {
13823
 
-                       case 0:
13824
 
-                               if (modrm_rm == 6)
13825
 
-                                       modrm_ea += insn_fetch(u16, 2, _eip);
13826
 
-                               break;
13827
 
-                       case 1:
13828
 
-                               modrm_ea += insn_fetch(s8, 1, _eip);
13829
 
-                               break;
13830
 
-                       case 2:
13831
 
-                               modrm_ea += insn_fetch(u16, 2, _eip);
13832
 
-                               break;
13833
 
-                       }
13834
 
-                       switch (modrm_rm) {
13835
 
-                       case 0:
13836
 
-                               modrm_ea += bx + si;
13837
 
-                               break;
13838
 
-                       case 1:
13839
 
-                               modrm_ea += bx + di;
13840
 
-                               break;
13841
 
-                       case 2:
13842
 
-                               modrm_ea += bp + si;
13843
 
-                               break;
13844
 
-                       case 3:
13845
 
-                               modrm_ea += bp + di;
13846
 
-                               break;
13847
 
-                       case 4:
13848
 
-                               modrm_ea += si;
13849
 
-                               break;
13850
 
-                       case 5:
13851
 
-                               modrm_ea += di;
13852
 
-                               break;
13853
 
-                       case 6:
13854
 
-                               if (modrm_mod != 0)
13855
 
-                                       modrm_ea += bp;
13856
 
-                               break;
13857
 
-                       case 7:
13858
 
-                               modrm_ea += bx;
13859
 
-                               break;
13860
 
-                       }
13861
 
-                       if (modrm_rm == 2 || modrm_rm == 3 ||
13862
 
-                           (modrm_rm == 6 && modrm_mod != 0))
13863
 
-                               if (!override_base)
13864
 
-                                       override_base = &ctxt->ss_base;
13865
 
-                       modrm_ea = (u16)modrm_ea;
13866
 
-               } else {
13867
 
-                       /* 32/64-bit ModR/M decode. */
13868
 
-                       switch (modrm_rm) {
13869
 
-                       case 4:
13870
 
-                       case 12:
13871
 
-                               sib = insn_fetch(u8, 1, _eip);
13872
 
-                               index_reg |= (sib >> 3) & 7;
13873
 
-                               base_reg |= sib & 7;
13874
 
-                               scale = sib >> 6;
13875
 
-
13876
 
-                               switch (base_reg) {
13877
 
-                               case 5:
13878
 
-                                       if (modrm_mod != 0)
13879
 
-                                               modrm_ea += _regs[base_reg];
13880
 
-                                       else
13881
 
-                                               modrm_ea += insn_fetch(s32, 4, _eip);
13882
 
-                                       break;
13883
 
-                               default:
13884
 
-                                       modrm_ea += _regs[base_reg];
13885
 
-                               }
13886
 
-                               switch (index_reg) {
13887
 
-                               case 4:
13888
 
-                                       break;
13889
 
-                               default:
13890
 
-                                       modrm_ea += _regs[index_reg] << scale;
13891
 
-
13892
 
-                               }
13893
 
-                               break;
13894
 
-                       case 5:
13895
 
-                               if (modrm_mod != 0)
13896
 
-                                       modrm_ea += _regs[modrm_rm];
13897
 
-                               else if (mode == X86EMUL_MODE_PROT64)
13898
 
-                                       rip_relative = 1;
13899
 
-                               break;
13900
 
-                       default:
13901
 
-                               modrm_ea += _regs[modrm_rm];
13902
 
-                               break;
13903
 
-                       }
13904
 
-                       switch (modrm_mod) {
13905
 
-                       case 0:
13906
 
-                               if (modrm_rm == 5)
13907
 
-                                       modrm_ea += insn_fetch(s32, 4, _eip);
13908
 
-                               break;
13909
 
-                       case 1:
13910
 
-                               modrm_ea += insn_fetch(s8, 1, _eip);
13911
 
-                               break;
13912
 
-                       case 2:
13913
 
-                               modrm_ea += insn_fetch(s32, 4, _eip);
13914
 
-                               break;
13915
 
-                       }
13916
 
-               }
13917
 
-               if (!override_base)
13918
 
-                       override_base = &ctxt->ds_base;
13919
 
-               if (mode == X86EMUL_MODE_PROT64 &&
13920
 
-                   override_base != &ctxt->fs_base &&
13921
 
-                   override_base != &ctxt->gs_base)
13922
 
-                       override_base = NULL;
13923
 
-
13924
 
-               if (override_base)
13925
 
-                       modrm_ea += *override_base;
13926
 
-
13927
 
-               if (rip_relative) {
13928
 
-                       modrm_ea += _eip;
13929
 
-                       switch (d & SrcMask) {
13930
 
-                       case SrcImmByte:
13931
 
-                               modrm_ea += 1;
13932
 
-                               break;
13933
 
-                       case SrcImm:
13934
 
-                               if (d & ByteOp)
13935
 
-                                       modrm_ea += 1;
13936
 
-                               else
13937
 
-                                       if (op_bytes == 8)
13938
 
-                                               modrm_ea += 4;
13939
 
-                                       else
13940
 
-                                               modrm_ea += op_bytes;
13941
 
-                       }
13942
 
-               }
13943
 
-               if (ad_bytes != 8)
13944
 
-                       modrm_ea = (u32)modrm_ea;
13945
 
-               cr2 = modrm_ea;
13946
 
-       modrm_done:
13947
 
-               ;
13948
 
-       }
13949
 
-
13950
 
-       /*
13951
 
-        * Decode and fetch the source operand: register, memory
13952
 
-        * or immediate.
13953
 
-        */
13954
 
-       switch (d & SrcMask) {
13955
 
-       case SrcNone:
13956
 
-               break;
13957
 
-       case SrcReg:
13958
 
-               src.type = OP_REG;
13959
 
-               if (d & ByteOp) {
13960
 
-                       src.ptr = decode_register(modrm_reg, _regs,
13961
 
-                                                 (rex_prefix == 0));
13962
 
-                       src.val = src.orig_val = *(u8 *) src.ptr;
13963
 
-                       src.bytes = 1;
13964
 
-               } else {
13965
 
-                       src.ptr = decode_register(modrm_reg, _regs, 0);
13966
 
-                       switch ((src.bytes = op_bytes)) {
13967
 
-                       case 2:
13968
 
-                               src.val = src.orig_val = *(u16 *) src.ptr;
13969
 
-                               break;
13970
 
-                       case 4:
13971
 
-                               src.val = src.orig_val = *(u32 *) src.ptr;
13972
 
-                               break;
13973
 
-                       case 8:
13974
 
-                               src.val = src.orig_val = *(u64 *) src.ptr;
13975
 
-                               break;
13976
 
-                       }
13977
 
-               }
13978
 
-               break;
13979
 
-       case SrcMem16:
13980
 
-               src.bytes = 2;
13981
 
-               goto srcmem_common;
13982
 
-       case SrcMem32:
13983
 
-               src.bytes = 4;
13984
 
-               goto srcmem_common;
13985
 
-       case SrcMem:
13986
 
-               src.bytes = (d & ByteOp) ? 1 : op_bytes;
13987
 
-               /* Don't fetch the address for invlpg: it could be unmapped. */
13988
 
-               if (twobyte && b == 0x01 && modrm_reg == 7)
13989
 
-                       break;
13990
 
-             srcmem_common:
13991
 
-               /*
13992
 
-                * For instructions with a ModR/M byte, switch to register
13993
 
-                * access if Mod = 3.
13994
 
-                */
13995
 
-               if ((d & ModRM) && modrm_mod == 3) {
13996
 
-                       src.type = OP_REG;
13997
 
-                       break;
13998
 
-               }
13999
 
-               src.type = OP_MEM;
14000
 
-               src.ptr = (unsigned long *)cr2;
14001
 
-               src.val = 0;
14002
 
-               if ((rc = ops->read_emulated((unsigned long)src.ptr,
14003
 
-                                            &src.val, src.bytes, ctxt->vcpu)) != 0)
14004
 
-                       goto done;
14005
 
-               src.orig_val = src.val;
14006
 
-               break;
14007
 
-       case SrcImm:
14008
 
-               src.type = OP_IMM;
14009
 
-               src.ptr = (unsigned long *)_eip;
14010
 
-               src.bytes = (d & ByteOp) ? 1 : op_bytes;
14011
 
-               if (src.bytes == 8)
14012
 
-                       src.bytes = 4;
14013
 
-               /* NB. Immediates are sign-extended as necessary. */
14014
 
-               switch (src.bytes) {
14015
 
-               case 1:
14016
 
-                       src.val = insn_fetch(s8, 1, _eip);
14017
 
-                       break;
14018
 
-               case 2:
14019
 
-                       src.val = insn_fetch(s16, 2, _eip);
14020
 
-                       break;
14021
 
-               case 4:
14022
 
-                       src.val = insn_fetch(s32, 4, _eip);
14023
 
-                       break;
14024
 
-               }
14025
 
-               break;
14026
 
-       case SrcImmByte:
14027
 
-               src.type = OP_IMM;
14028
 
-               src.ptr = (unsigned long *)_eip;
14029
 
-               src.bytes = 1;
14030
 
-               src.val = insn_fetch(s8, 1, _eip);
14031
 
-               break;
14032
 
-       }
14033
 
-
14034
 
-       /* Decode and fetch the destination operand: register or memory. */
14035
 
-       switch (d & DstMask) {
14036
 
-       case ImplicitOps:
14037
 
-               /* Special instructions do their own operand decoding. */
14038
 
-               goto special_insn;
14039
 
-       case DstReg:
14040
 
-               dst.type = OP_REG;
14041
 
-               if ((d & ByteOp)
14042
 
-                   && !(twobyte && (b == 0xb6 || b == 0xb7))) {
14043
 
-                       dst.ptr = decode_register(modrm_reg, _regs,
14044
 
-                                                 (rex_prefix == 0));
14045
 
-                       dst.val = *(u8 *) dst.ptr;
14046
 
-                       dst.bytes = 1;
14047
 
-               } else {
14048
 
-                       dst.ptr = decode_register(modrm_reg, _regs, 0);
14049
 
-                       switch ((dst.bytes = op_bytes)) {
14050
 
-                       case 2:
14051
 
-                               dst.val = *(u16 *)dst.ptr;
14052
 
-                               break;
14053
 
-                       case 4:
14054
 
-                               dst.val = *(u32 *)dst.ptr;
14055
 
-                               break;
14056
 
-                       case 8:
14057
 
-                               dst.val = *(u64 *)dst.ptr;
14058
 
-                               break;
14059
 
-                       }
14060
 
-               }
14061
 
-               break;
14062
 
-       case DstMem:
14063
 
-               dst.type = OP_MEM;
14064
 
-               dst.ptr = (unsigned long *)cr2;
14065
 
-               dst.bytes = (d & ByteOp) ? 1 : op_bytes;
14066
 
-               dst.val = 0;
14067
 
-               /*
14068
 
-                * For instructions with a ModR/M byte, switch to register
14069
 
-                * access if Mod = 3.
14070
 
-                */
14071
 
-               if ((d & ModRM) && modrm_mod == 3) {
14072
 
-                       dst.type = OP_REG;
14073
 
-                       break;
14074
 
-               }
14075
 
-               if (d & BitOp) {
14076
 
-                       unsigned long mask = ~(dst.bytes * 8 - 1);
14077
 
-
14078
 
-                       dst.ptr = (void *)dst.ptr + (src.val & mask) / 8;
14079
 
-               }
14080
 
-               if (!(d & Mov) && /* optimisation - avoid slow emulated read */
14081
 
-                   ((rc = ops->read_emulated((unsigned long)dst.ptr,
14082
 
-                                             &dst.val, dst.bytes, ctxt->vcpu)) != 0))
14083
 
-                       goto done;
14084
 
-               break;
14085
 
-       }
14086
 
-       dst.orig_val = dst.val;
14087
 
-
14088
 
-       if (twobyte)
14089
 
-               goto twobyte_insn;
14090
 
-
14091
 
-       switch (b) {
14092
 
-       case 0x00 ... 0x05:
14093
 
-             add:              /* add */
14094
 
-               emulate_2op_SrcV("add", src, dst, _eflags);
14095
 
-               break;
14096
 
-       case 0x08 ... 0x0d:
14097
 
-             or:               /* or */
14098
 
-               emulate_2op_SrcV("or", src, dst, _eflags);
14099
 
-               break;
14100
 
-       case 0x10 ... 0x15:
14101
 
-             adc:              /* adc */
14102
 
-               emulate_2op_SrcV("adc", src, dst, _eflags);
14103
 
-               break;
14104
 
-       case 0x18 ... 0x1d:
14105
 
-             sbb:              /* sbb */
14106
 
-               emulate_2op_SrcV("sbb", src, dst, _eflags);
14107
 
-               break;
14108
 
-       case 0x20 ... 0x23:
14109
 
-             and:              /* and */
14110
 
-               emulate_2op_SrcV("and", src, dst, _eflags);
14111
 
-               break;
14112
 
-       case 0x24:              /* and al imm8 */
14113
 
-               dst.type = OP_REG;
14114
 
-               dst.ptr = &_regs[VCPU_REGS_RAX];
14115
 
-               dst.val = *(u8 *)dst.ptr;
14116
 
-               dst.bytes = 1;
14117
 
-               dst.orig_val = dst.val;
14118
 
-               goto and;
14119
 
-       case 0x25:              /* and ax imm16, or eax imm32 */
14120
 
-               dst.type = OP_REG;
14121
 
-               dst.bytes = op_bytes;
14122
 
-               dst.ptr = &_regs[VCPU_REGS_RAX];
14123
 
-               if (op_bytes == 2)
14124
 
-                       dst.val = *(u16 *)dst.ptr;
14125
 
-               else
14126
 
-                       dst.val = *(u32 *)dst.ptr;
14127
 
-               dst.orig_val = dst.val;
14128
 
-               goto and;
14129
 
-       case 0x28 ... 0x2d:
14130
 
-             sub:              /* sub */
14131
 
-               emulate_2op_SrcV("sub", src, dst, _eflags);
14132
 
-               break;
14133
 
-       case 0x30 ... 0x35:
14134
 
-             xor:              /* xor */
14135
 
-               emulate_2op_SrcV("xor", src, dst, _eflags);
14136
 
-               break;
14137
 
-       case 0x38 ... 0x3d:
14138
 
-             cmp:              /* cmp */
14139
 
-               emulate_2op_SrcV("cmp", src, dst, _eflags);
14140
 
-               break;
14141
 
-       case 0x63:              /* movsxd */
14142
 
-               if (mode != X86EMUL_MODE_PROT64)
14143
 
-                       goto cannot_emulate;
14144
 
-               dst.val = (s32) src.val;
14145
 
-               break;
14146
 
-       case 0x80 ... 0x83:     /* Grp1 */
14147
 
-               switch (modrm_reg) {
14148
 
-               case 0:
14149
 
-                       goto add;
14150
 
-               case 1:
14151
 
-                       goto or;
14152
 
-               case 2:
14153
 
-                       goto adc;
14154
 
-               case 3:
14155
 
-                       goto sbb;
14156
 
-               case 4:
14157
 
-                       goto and;
14158
 
-               case 5:
14159
 
-                       goto sub;
14160
 
-               case 6:
14161
 
-                       goto xor;
14162
 
-               case 7:
14163
 
-                       goto cmp;
14164
 
-               }
14165
 
-               break;
14166
 
-       case 0x84 ... 0x85:
14167
 
-             test:             /* test */
14168
 
-               emulate_2op_SrcV("test", src, dst, _eflags);
14169
 
-               break;
14170
 
-       case 0x86 ... 0x87:     /* xchg */
14171
 
-               /* Write back the register source. */
14172
 
-               switch (dst.bytes) {
14173
 
-               case 1:
14174
 
-                       *(u8 *) src.ptr = (u8) dst.val;
14175
 
-                       break;
14176
 
-               case 2:
14177
 
-                       *(u16 *) src.ptr = (u16) dst.val;
14178
 
-                       break;
14179
 
-               case 4:
14180
 
-                       *src.ptr = (u32) dst.val;
14181
 
-                       break;  /* 64b reg: zero-extend */
14182
 
-               case 8:
14183
 
-                       *src.ptr = dst.val;
14184
 
-                       break;
14185
 
-               }
14186
 
-               /*
14187
 
-                * Write back the memory destination with implicit LOCK
14188
 
-                * prefix.
14189
 
-                */
14190
 
-               dst.val = src.val;
14191
 
-               lock_prefix = 1;
14192
 
-               break;
14193
 
-       case 0x88 ... 0x8b:     /* mov */
14194
 
-               goto mov;
14195
 
-       case 0x8d: /* lea r16/r32, m */
14196
 
-               dst.val = modrm_val;
14197
 
-               break;
14198
 
-       case 0x8f:              /* pop (sole member of Grp1a) */
14199
 
-               /* 64-bit mode: POP always pops a 64-bit operand. */
14200
 
-               if (mode == X86EMUL_MODE_PROT64)
14201
 
-                       dst.bytes = 8;
14202
 
-               if ((rc = ops->read_std(register_address(ctxt->ss_base,
14203
 
-                                                        _regs[VCPU_REGS_RSP]),
14204
 
-                                       &dst.val, dst.bytes, ctxt->vcpu)) != 0)
14205
 
-                       goto done;
14206
 
-               register_address_increment(_regs[VCPU_REGS_RSP], dst.bytes);
14207
 
-               break;
14208
 
-       case 0xa0 ... 0xa1:     /* mov */
14209
 
-               dst.ptr = (unsigned long *)&_regs[VCPU_REGS_RAX];
14210
 
-               dst.val = src.val;
14211
 
-               _eip += ad_bytes;       /* skip src displacement */
14212
 
-               break;
14213
 
-       case 0xa2 ... 0xa3:     /* mov */
14214
 
-               dst.val = (unsigned long)_regs[VCPU_REGS_RAX];
14215
 
-               _eip += ad_bytes;       /* skip dst displacement */
14216
 
-               break;
14217
 
-       case 0xc0 ... 0xc1:
14218
 
-             grp2:             /* Grp2 */
14219
 
-               switch (modrm_reg) {
14220
 
-               case 0: /* rol */
14221
 
-                       emulate_2op_SrcB("rol", src, dst, _eflags);
14222
 
-                       break;
14223
 
-               case 1: /* ror */
14224
 
-                       emulate_2op_SrcB("ror", src, dst, _eflags);
14225
 
-                       break;
14226
 
-               case 2: /* rcl */
14227
 
-                       emulate_2op_SrcB("rcl", src, dst, _eflags);
14228
 
-                       break;
14229
 
-               case 3: /* rcr */
14230
 
-                       emulate_2op_SrcB("rcr", src, dst, _eflags);
14231
 
-                       break;
14232
 
-               case 4: /* sal/shl */
14233
 
-               case 6: /* sal/shl */
14234
 
-                       emulate_2op_SrcB("sal", src, dst, _eflags);
14235
 
-                       break;
14236
 
-               case 5: /* shr */
14237
 
-                       emulate_2op_SrcB("shr", src, dst, _eflags);
14238
 
-                       break;
14239
 
-               case 7: /* sar */
14240
 
-                       emulate_2op_SrcB("sar", src, dst, _eflags);
14241
 
-                       break;
14242
 
-               }
14243
 
-               break;
14244
 
-       case 0xc6 ... 0xc7:     /* mov (sole member of Grp11) */
14245
 
-       mov:
14246
 
-               dst.val = src.val;
14247
 
-               break;
14248
 
-       case 0xd0 ... 0xd1:     /* Grp2 */
14249
 
-               src.val = 1;
14250
 
-               goto grp2;
14251
 
-       case 0xd2 ... 0xd3:     /* Grp2 */
14252
 
-               src.val = _regs[VCPU_REGS_RCX];
14253
 
-               goto grp2;
14254
 
-       case 0xf6 ... 0xf7:     /* Grp3 */
14255
 
-               switch (modrm_reg) {
14256
 
-               case 0 ... 1:   /* test */
14257
 
-                       /*
14258
 
-                        * Special case in Grp3: test has an immediate
14259
 
-                        * source operand.
14260
 
-                        */
14261
 
-                       src.type = OP_IMM;
14262
 
-                       src.ptr = (unsigned long *)_eip;
14263
 
-                       src.bytes = (d & ByteOp) ? 1 : op_bytes;
14264
 
-                       if (src.bytes == 8)
14265
 
-                               src.bytes = 4;
14266
 
-                       switch (src.bytes) {
14267
 
-                       case 1:
14268
 
-                               src.val = insn_fetch(s8, 1, _eip);
14269
 
-                               break;
14270
 
-                       case 2:
14271
 
-                               src.val = insn_fetch(s16, 2, _eip);
14272
 
-                               break;
14273
 
-                       case 4:
14274
 
-                               src.val = insn_fetch(s32, 4, _eip);
14275
 
-                               break;
14276
 
-                       }
14277
 
-                       goto test;
14278
 
-               case 2: /* not */
14279
 
-                       dst.val = ~dst.val;
14280
 
-                       break;
14281
 
-               case 3: /* neg */
14282
 
-                       emulate_1op("neg", dst, _eflags);
14283
 
-                       break;
14284
 
-               default:
14285
 
-                       goto cannot_emulate;
14286
 
-               }
14287
 
-               break;
14288
 
-       case 0xfe ... 0xff:     /* Grp4/Grp5 */
14289
 
-               switch (modrm_reg) {
14290
 
-               case 0: /* inc */
14291
 
-                       emulate_1op("inc", dst, _eflags);
14292
 
-                       break;
14293
 
-               case 1: /* dec */
14294
 
-                       emulate_1op("dec", dst, _eflags);
14295
 
-                       break;
14296
 
-               case 4: /* jmp abs */
14297
 
-                       if (b == 0xff)
14298
 
-                               _eip = dst.val;
14299
 
-                       else
14300
 
-                               goto cannot_emulate;
14301
 
-                       break;
14302
 
-               case 6: /* push */
14303
 
-                       /* 64-bit mode: PUSH always pushes a 64-bit operand. */
14304
 
-                       if (mode == X86EMUL_MODE_PROT64) {
14305
 
-                               dst.bytes = 8;
14306
 
-                               if ((rc = ops->read_std((unsigned long)dst.ptr,
14307
 
-                                                       &dst.val, 8,
14308
 
-                                                       ctxt->vcpu)) != 0)
14309
 
-                                       goto done;
14310
 
-                       }
14311
 
-                       register_address_increment(_regs[VCPU_REGS_RSP],
14312
 
-                                                  -dst.bytes);
14313
 
-                       if ((rc = ops->write_emulated(
14314
 
-                                    register_address(ctxt->ss_base,
14315
 
-                                                     _regs[VCPU_REGS_RSP]),
14316
 
-                                    &dst.val, dst.bytes, ctxt->vcpu)) != 0)
14317
 
-                               goto done;
14318
 
-                       no_wb = 1;
14319
 
-                       break;
14320
 
-               default:
14321
 
-                       goto cannot_emulate;
14322
 
-               }
14323
 
-               break;
14324
 
-       }
14325
 
-
14326
 
-writeback:
14327
 
-       if (!no_wb) {
14328
 
-               switch (dst.type) {
14329
 
-               case OP_REG:
14330
 
-                       /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
14331
 
-                       switch (dst.bytes) {
14332
 
-                       case 1:
14333
 
-                               *(u8 *)dst.ptr = (u8)dst.val;
14334
 
-                               break;
14335
 
-                       case 2:
14336
 
-                               *(u16 *)dst.ptr = (u16)dst.val;
14337
 
-                               break;
14338
 
-                       case 4:
14339
 
-                               *dst.ptr = (u32)dst.val;
14340
 
-                               break;  /* 64b: zero-ext */
14341
 
-                       case 8:
14342
 
-                               *dst.ptr = dst.val;
14343
 
-                               break;
14344
 
-                       }
14345
 
-                       break;
14346
 
-               case OP_MEM:
14347
 
-                       if (lock_prefix)
14348
 
-                               rc = ops->cmpxchg_emulated((unsigned long)dst.
14349
 
-                                                          ptr, &dst.orig_val,
14350
 
-                                                          &dst.val, dst.bytes,
14351
 
-                                                          ctxt->vcpu);
14352
 
-                       else
14353
 
-                               rc = ops->write_emulated((unsigned long)dst.ptr,
14354
 
-                                                        &dst.val, dst.bytes,
14355
 
-                                                        ctxt->vcpu);
14356
 
-                       if (rc != 0)
14357
 
-                               goto done;
14358
 
-               default:
14359
 
-                       break;
14360
 
-               }
14361
 
-       }
14362
 
-
14363
 
-       /* Commit shadow register state. */
14364
 
-       memcpy(ctxt->vcpu->regs, _regs, sizeof _regs);
14365
 
-       ctxt->eflags = _eflags;
14366
 
-       ctxt->vcpu->rip = _eip;
14367
 
-
14368
 
-done:
14369
 
-       return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
14370
 
-
14371
 
-special_insn:
14372
 
-       if (twobyte)
14373
 
-               goto twobyte_special_insn;
14374
 
-       switch(b) {
14375
 
-       case 0x50 ... 0x57:  /* push reg */
14376
 
-               if (op_bytes == 2)
14377
 
-                       src.val = (u16) _regs[b & 0x7];
14378
 
-               else
14379
 
-                       src.val = (u32) _regs[b & 0x7];
14380
 
-               dst.type  = OP_MEM;
14381
 
-               dst.bytes = op_bytes;
14382
 
-               dst.val = src.val;
14383
 
-               register_address_increment(_regs[VCPU_REGS_RSP], -op_bytes);
14384
 
-               dst.ptr = (void *) register_address(
14385
 
-                       ctxt->ss_base, _regs[VCPU_REGS_RSP]);
14386
 
-               break;
14387
 
-       case 0x58 ... 0x5f: /* pop reg */
14388
 
-               dst.ptr = (unsigned long *)&_regs[b & 0x7];
14389
 
-       pop_instruction:
14390
 
-               if ((rc = ops->read_std(register_address(ctxt->ss_base,
14391
 
-                       _regs[VCPU_REGS_RSP]), dst.ptr, op_bytes, ctxt->vcpu))
14392
 
-                       != 0)
14393
 
-                       goto done;
14394
 
-
14395
 
-               register_address_increment(_regs[VCPU_REGS_RSP], op_bytes);
14396
 
-               no_wb = 1; /* Disable writeback. */
14397
 
-               break;
14398
 
-       case 0x6a: /* push imm8 */
14399
 
-               src.val = 0L;
14400
 
-               src.val = insn_fetch(s8, 1, _eip);
14401
 
-       push:
14402
 
-               dst.type  = OP_MEM;
14403
 
-               dst.bytes = op_bytes;
14404
 
-               dst.val = src.val;
14405
 
-               register_address_increment(_regs[VCPU_REGS_RSP], -op_bytes);
14406
 
-               dst.ptr = (void *) register_address(ctxt->ss_base,
14407
 
-                                                       _regs[VCPU_REGS_RSP]);
14408
 
-               break;
14409
 
-       case 0x6c:              /* insb */
14410
 
-       case 0x6d:              /* insw/insd */
14411
 
-                if (kvm_emulate_pio_string(ctxt->vcpu, NULL,
14412
 
-                               1,                                      /* in */
14413
 
-                               (d & ByteOp) ? 1 : op_bytes,            /* size */
14414
 
-                               rep_prefix ?
14415
 
-                               address_mask(_regs[VCPU_REGS_RCX]) : 1, /* count */
14416
 
-                               (_eflags & EFLG_DF),                    /* down */
14417
 
-                               register_address(ctxt->es_base,
14418
 
-                                                _regs[VCPU_REGS_RDI]), /* address */
14419
 
-                               rep_prefix,
14420
 
-                               _regs[VCPU_REGS_RDX]                    /* port */
14421
 
-                               ) == 0)
14422
 
-                       return -1;
14423
 
-               return 0;
14424
 
-       case 0x6e:              /* outsb */
14425
 
-       case 0x6f:              /* outsw/outsd */
14426
 
-               if (kvm_emulate_pio_string(ctxt->vcpu, NULL,
14427
 
-                               0,                                      /* in */
14428
 
-                               (d & ByteOp) ? 1 : op_bytes,            /* size */
14429
 
-                               rep_prefix ?
14430
 
-                               address_mask(_regs[VCPU_REGS_RCX]) : 1, /* count */
14431
 
-                               (_eflags & EFLG_DF),                    /* down */
14432
 
-                               register_address(override_base ?
14433
 
-                                                *override_base : ctxt->ds_base,
14434
 
-                                                _regs[VCPU_REGS_RSI]), /* address */
14435
 
-                               rep_prefix,
14436
 
-                               _regs[VCPU_REGS_RDX]                    /* port */
14437
 
-                               ) == 0)
14438
 
-                       return -1;
14439
 
-               return 0;
14440
 
-       case 0x70 ... 0x7f: /* jcc (short) */ {
14441
 
-               int rel = insn_fetch(s8, 1, _eip);
14442
 
-
14443
 
-               if (test_cc(b, _eflags))
14444
 
-               JMP_REL(rel);
14445
 
-               break;
14446
 
-       }
14447
 
-       case 0x9c: /* pushf */
14448
 
-               src.val =  (unsigned long) _eflags;
14449
 
-               goto push;
14450
 
-       case 0x9d: /* popf */
14451
 
-               dst.ptr = (unsigned long *) &_eflags;
14452
 
-               goto pop_instruction;
14453
 
-       case 0xc3: /* ret */
14454
 
-               dst.ptr = &_eip;
14455
 
-               goto pop_instruction;
14456
 
-       case 0xf4:              /* hlt */
14457
 
-               ctxt->vcpu->halt_request = 1;
14458
 
-               goto done;
14459
 
-       }
14460
 
-       if (rep_prefix) {
14461
 
-               if (_regs[VCPU_REGS_RCX] == 0) {
14462
 
-                       ctxt->vcpu->rip = _eip;
14463
 
-                       goto done;
14464
 
-               }
14465
 
-               _regs[VCPU_REGS_RCX]--;
14466
 
-               _eip = ctxt->vcpu->rip;
14467
 
-       }
14468
 
-       switch (b) {
14469
 
-       case 0xa4 ... 0xa5:     /* movs */
14470
 
-               dst.type = OP_MEM;
14471
 
-               dst.bytes = (d & ByteOp) ? 1 : op_bytes;
14472
 
-               dst.ptr = (unsigned long *)register_address(ctxt->es_base,
14473
 
-                                                       _regs[VCPU_REGS_RDI]);
14474
 
-               if ((rc = ops->read_emulated(register_address(
14475
 
-                     override_base ? *override_base : ctxt->ds_base,
14476
 
-                     _regs[VCPU_REGS_RSI]), &dst.val, dst.bytes, ctxt->vcpu)) != 0)
14477
 
-                       goto done;
14478
 
-               register_address_increment(_regs[VCPU_REGS_RSI],
14479
 
-                            (_eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
14480
 
-               register_address_increment(_regs[VCPU_REGS_RDI],
14481
 
-                            (_eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
14482
 
-               break;
14483
 
-       case 0xa6 ... 0xa7:     /* cmps */
14484
 
-               DPRINTF("Urk! I don't handle CMPS.\n");
14485
 
-               goto cannot_emulate;
14486
 
-       case 0xaa ... 0xab:     /* stos */
14487
 
-               dst.type = OP_MEM;
14488
 
-               dst.bytes = (d & ByteOp) ? 1 : op_bytes;
14489
 
-               dst.ptr = (unsigned long *)cr2;
14490
 
-               dst.val = _regs[VCPU_REGS_RAX];
14491
 
-               register_address_increment(_regs[VCPU_REGS_RDI],
14492
 
-                            (_eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
14493
 
-               break;
14494
 
-       case 0xac ... 0xad:     /* lods */
14495
 
-               dst.type = OP_REG;
14496
 
-               dst.bytes = (d & ByteOp) ? 1 : op_bytes;
14497
 
-               dst.ptr = (unsigned long *)&_regs[VCPU_REGS_RAX];
14498
 
-               if ((rc = ops->read_emulated(cr2, &dst.val, dst.bytes,
14499
 
-                                            ctxt->vcpu)) != 0)
14500
 
-                       goto done;
14501
 
-               register_address_increment(_regs[VCPU_REGS_RSI],
14502
 
-                          (_eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
14503
 
-               break;
14504
 
-       case 0xae ... 0xaf:     /* scas */
14505
 
-               DPRINTF("Urk! I don't handle SCAS.\n");
14506
 
-               goto cannot_emulate;
14507
 
-       case 0xe8: /* call (near) */ {
14508
 
-               long int rel;
14509
 
-               switch (op_bytes) {
14510
 
-               case 2:
14511
 
-                       rel = insn_fetch(s16, 2, _eip);
14512
 
-                       break;
14513
 
-               case 4:
14514
 
-                       rel = insn_fetch(s32, 4, _eip);
14515
 
-                       break;
14516
 
-               case 8:
14517
 
-                       rel = insn_fetch(s64, 8, _eip);
14518
 
-                       break;
14519
 
-               default:
14520
 
-                       DPRINTF("Call: Invalid op_bytes\n");
14521
 
-                       goto cannot_emulate;
14522
 
-               }
14523
 
-               src.val = (unsigned long) _eip;
14524
 
-               JMP_REL(rel);
14525
 
-               op_bytes = ad_bytes;
14526
 
-               goto push;
14527
 
-       }
14528
 
-       case 0xe9: /* jmp rel */
14529
 
-       case 0xeb: /* jmp rel short */
14530
 
-               JMP_REL(src.val);
14531
 
-               no_wb = 1; /* Disable writeback. */
14532
 
-               break;
14533
 
-
14534
 
-
14535
 
-       }
14536
 
-       goto writeback;
14537
 
-
14538
 
-twobyte_insn:
14539
 
-       switch (b) {
14540
 
-       case 0x01: /* lgdt, lidt, lmsw */
14541
 
-               /* Disable writeback. */
14542
 
-               no_wb = 1;
14543
 
-               switch (modrm_reg) {
14544
 
-                       u16 size;
14545
 
-                       unsigned long address;
14546
 
-
14547
 
-               case 2: /* lgdt */
14548
 
-                       rc = read_descriptor(ctxt, ops, src.ptr,
14549
 
-                                            &size, &address, op_bytes);
14550
 
-                       if (rc)
14551
 
-                               goto done;
14552
 
-                       realmode_lgdt(ctxt->vcpu, size, address);
14553
 
-                       break;
14554
 
-               case 3: /* lidt */
14555
 
-                       rc = read_descriptor(ctxt, ops, src.ptr,
14556
 
-                                            &size, &address, op_bytes);
14557
 
-                       if (rc)
14558
 
-                               goto done;
14559
 
-                       realmode_lidt(ctxt->vcpu, size, address);
14560
 
-                       break;
14561
 
-               case 4: /* smsw */
14562
 
-                       if (modrm_mod != 3)
14563
 
-                               goto cannot_emulate;
14564
 
-                       *(u16 *)&_regs[modrm_rm]
14565
 
-                               = realmode_get_cr(ctxt->vcpu, 0);
14566
 
-                       break;
14567
 
-               case 6: /* lmsw */
14568
 
-                       if (modrm_mod != 3)
14569
 
-                               goto cannot_emulate;
14570
 
-                       realmode_lmsw(ctxt->vcpu, (u16)modrm_val, &_eflags);
14571
 
-                       break;
14572
 
-               case 7: /* invlpg*/
14573
 
-                       emulate_invlpg(ctxt->vcpu, cr2);
14574
 
-                       break;
14575
 
-               default:
14576
 
-                       goto cannot_emulate;
14577
 
-               }
14578
 
-               break;
14579
 
-       case 0x21: /* mov from dr to reg */
14580
 
-               no_wb = 1;
14581
 
-               if (modrm_mod != 3)
14582
 
-                       goto cannot_emulate;
14583
 
-               rc = emulator_get_dr(ctxt, modrm_reg, &_regs[modrm_rm]);
14584
 
-               break;
14585
 
-       case 0x23: /* mov from reg to dr */
14586
 
-               no_wb = 1;
14587
 
-               if (modrm_mod != 3)
14588
 
-                       goto cannot_emulate;
14589
 
-               rc = emulator_set_dr(ctxt, modrm_reg, _regs[modrm_rm]);
14590
 
-               break;
14591
 
-       case 0x40 ... 0x4f:     /* cmov */
14592
 
-               dst.val = dst.orig_val = src.val;
14593
 
-               no_wb = 1;
14594
 
-               /*
14595
 
-                * First, assume we're decoding an even cmov opcode
14596
 
-                * (lsb == 0).
14597
 
-                */
14598
 
-               switch ((b & 15) >> 1) {
14599
 
-               case 0: /* cmovo */
14600
 
-                       no_wb = (_eflags & EFLG_OF) ? 0 : 1;
14601
 
-                       break;
14602
 
-               case 1: /* cmovb/cmovc/cmovnae */
14603
 
-                       no_wb = (_eflags & EFLG_CF) ? 0 : 1;
14604
 
-                       break;
14605
 
-               case 2: /* cmovz/cmove */
14606
 
-                       no_wb = (_eflags & EFLG_ZF) ? 0 : 1;
14607
 
-                       break;
14608
 
-               case 3: /* cmovbe/cmovna */
14609
 
-                       no_wb = (_eflags & (EFLG_CF | EFLG_ZF)) ? 0 : 1;
14610
 
-                       break;
14611
 
-               case 4: /* cmovs */
14612
 
-                       no_wb = (_eflags & EFLG_SF) ? 0 : 1;
14613
 
-                       break;
14614
 
-               case 5: /* cmovp/cmovpe */
14615
 
-                       no_wb = (_eflags & EFLG_PF) ? 0 : 1;
14616
 
-                       break;
14617
 
-               case 7: /* cmovle/cmovng */
14618
 
-                       no_wb = (_eflags & EFLG_ZF) ? 0 : 1;
14619
 
-                       /* fall through */
14620
 
-               case 6: /* cmovl/cmovnge */
14621
 
-                       no_wb &= (!(_eflags & EFLG_SF) !=
14622
 
-                             !(_eflags & EFLG_OF)) ? 0 : 1;
14623
 
-                       break;
14624
 
-               }
14625
 
-               /* Odd cmov opcodes (lsb == 1) have inverted sense. */
14626
 
-               no_wb ^= b & 1;
14627
 
-               break;
14628
 
-       case 0xa3:
14629
 
-             bt:               /* bt */
14630
 
-               src.val &= (dst.bytes << 3) - 1; /* only subword offset */
14631
 
-               emulate_2op_SrcV_nobyte("bt", src, dst, _eflags);
14632
 
-               break;
14633
 
-       case 0xab:
14634
 
-             bts:              /* bts */
14635
 
-               src.val &= (dst.bytes << 3) - 1; /* only subword offset */
14636
 
-               emulate_2op_SrcV_nobyte("bts", src, dst, _eflags);
14637
 
-               break;
14638
 
-       case 0xb0 ... 0xb1:     /* cmpxchg */
14639
 
-               /*
14640
 
-                * Save real source value, then compare EAX against
14641
 
-                * destination.
14642
 
-                */
14643
 
-               src.orig_val = src.val;
14644
 
-               src.val = _regs[VCPU_REGS_RAX];
14645
 
-               emulate_2op_SrcV("cmp", src, dst, _eflags);
14646
 
-               if (_eflags & EFLG_ZF) {
14647
 
-                       /* Success: write back to memory. */
14648
 
-                       dst.val = src.orig_val;
14649
 
-               } else {
14650
 
-                       /* Failure: write the value we saw to EAX. */
14651
 
-                       dst.type = OP_REG;
14652
 
-                       dst.ptr = (unsigned long *)&_regs[VCPU_REGS_RAX];
14653
 
-               }
14654
 
-               break;
14655
 
-       case 0xb3:
14656
 
-             btr:              /* btr */
14657
 
-               src.val &= (dst.bytes << 3) - 1; /* only subword offset */
14658
 
-               emulate_2op_SrcV_nobyte("btr", src, dst, _eflags);
14659
 
-               break;
14660
 
-       case 0xb6 ... 0xb7:     /* movzx */
14661
 
-               dst.bytes = op_bytes;
14662
 
-               dst.val = (d & ByteOp) ? (u8) src.val : (u16) src.val;
14663
 
-               break;
14664
 
-       case 0xba:              /* Grp8 */
14665
 
-               switch (modrm_reg & 3) {
14666
 
-               case 0:
14667
 
-                       goto bt;
14668
 
-               case 1:
14669
 
-                       goto bts;
14670
 
-               case 2:
14671
 
-                       goto btr;
14672
 
-               case 3:
14673
 
-                       goto btc;
14674
 
-               }
14675
 
-               break;
14676
 
-       case 0xbb:
14677
 
-             btc:              /* btc */
14678
 
-               src.val &= (dst.bytes << 3) - 1; /* only subword offset */
14679
 
-               emulate_2op_SrcV_nobyte("btc", src, dst, _eflags);
14680
 
-               break;
14681
 
-       case 0xbe ... 0xbf:     /* movsx */
14682
 
-               dst.bytes = op_bytes;
14683
 
-               dst.val = (d & ByteOp) ? (s8) src.val : (s16) src.val;
14684
 
-               break;
14685
 
-       case 0xc3:              /* movnti */
14686
 
-               dst.bytes = op_bytes;
14687
 
-               dst.val = (op_bytes == 4) ? (u32) src.val : (u64) src.val;
14688
 
-               break;
14689
 
-       }
14690
 
-       goto writeback;
14691
 
-
14692
 
-twobyte_special_insn:
14693
 
-       /* Disable writeback. */
14694
 
-       no_wb = 1;
14695
 
-       switch (b) {
14696
 
-       case 0x06:
14697
 
-               emulate_clts(ctxt->vcpu);
14698
 
-               break;
14699
 
-       case 0x08:              /* invd */
14700
 
-               break;
14701
 
-       case 0x09:              /* wbinvd */
14702
 
-               break;
14703
 
-       case 0x0d:              /* GrpP (prefetch) */
14704
 
-       case 0x18:              /* Grp16 (prefetch/nop) */
14705
 
-               break;
14706
 
-       case 0x20: /* mov cr, reg */
14707
 
-               if (modrm_mod != 3)
14708
 
-                       goto cannot_emulate;
14709
 
-               _regs[modrm_rm] = realmode_get_cr(ctxt->vcpu, modrm_reg);
14710
 
-               break;
14711
 
-       case 0x22: /* mov reg, cr */
14712
 
-               if (modrm_mod != 3)
14713
 
-                       goto cannot_emulate;
14714
 
-               realmode_set_cr(ctxt->vcpu, modrm_reg, modrm_val, &_eflags);
14715
 
-               break;
14716
 
-       case 0x30:
14717
 
-               /* wrmsr */
14718
 
-               msr_data = (u32)_regs[VCPU_REGS_RAX]
14719
 
-                       | ((u64)_regs[VCPU_REGS_RDX] << 32);
14720
 
-               rc = kvm_set_msr(ctxt->vcpu, _regs[VCPU_REGS_RCX], msr_data);
14721
 
-               if (rc) {
14722
 
-                       kvm_x86_ops->inject_gp(ctxt->vcpu, 0);
14723
 
-                       _eip = ctxt->vcpu->rip;
14724
 
-               }
14725
 
-               rc = X86EMUL_CONTINUE;
14726
 
-               break;
14727
 
-       case 0x32:
14728
 
-               /* rdmsr */
14729
 
-               rc = kvm_get_msr(ctxt->vcpu, _regs[VCPU_REGS_RCX], &msr_data);
14730
 
-               if (rc) {
14731
 
-                       kvm_x86_ops->inject_gp(ctxt->vcpu, 0);
14732
 
-                       _eip = ctxt->vcpu->rip;
14733
 
-               } else {
14734
 
-                       _regs[VCPU_REGS_RAX] = (u32)msr_data;
14735
 
-                       _regs[VCPU_REGS_RDX] = msr_data >> 32;
14736
 
-               }
14737
 
-               rc = X86EMUL_CONTINUE;
14738
 
-               break;
14739
 
-       case 0x80 ... 0x8f: /* jnz rel, etc*/ {
14740
 
-               long int rel;
14741
 
-
14742
 
-               switch (op_bytes) {
14743
 
-               case 2:
14744
 
-                       rel = insn_fetch(s16, 2, _eip);
14745
 
-                       break;
14746
 
-               case 4:
14747
 
-                       rel = insn_fetch(s32, 4, _eip);
14748
 
-                       break;
14749
 
-               case 8:
14750
 
-                       rel = insn_fetch(s64, 8, _eip);
14751
 
-                       break;
14752
 
-               default:
14753
 
-                       DPRINTF("jnz: Invalid op_bytes\n");
14754
 
-                       goto cannot_emulate;
14755
 
-               }
14756
 
-               if (test_cc(b, _eflags))
14757
 
-                       JMP_REL(rel);
14758
 
-               break;
14759
 
-       }
14760
 
-       case 0xc7:              /* Grp9 (cmpxchg8b) */
14761
 
-               {
14762
 
-                       u64 old, new;
14763
 
-                       if ((rc = ops->read_emulated(cr2, &old, 8, ctxt->vcpu))
14764
 
-                                                                       != 0)
14765
 
-                               goto done;
14766
 
-                       if (((u32) (old >> 0) != (u32) _regs[VCPU_REGS_RAX]) ||
14767
 
-                           ((u32) (old >> 32) != (u32) _regs[VCPU_REGS_RDX])) {
14768
 
-                               _regs[VCPU_REGS_RAX] = (u32) (old >> 0);
14769
 
-                               _regs[VCPU_REGS_RDX] = (u32) (old >> 32);
14770
 
-                               _eflags &= ~EFLG_ZF;
14771
 
-                       } else {
14772
 
-                               new = ((u64)_regs[VCPU_REGS_RCX] << 32)
14773
 
-                                       | (u32) _regs[VCPU_REGS_RBX];
14774
 
-                               if ((rc = ops->cmpxchg_emulated(cr2, &old,
14775
 
-                                                         &new, 8, ctxt->vcpu)) != 0)
14776
 
-                                       goto done;
14777
 
-                               _eflags |= EFLG_ZF;
14778
 
-                       }
14779
 
-                       break;
14780
 
-               }
14781
 
-       }
14782
 
-       goto writeback;
14783
 
-
14784
 
-cannot_emulate:
14785
 
-       DPRINTF("Cannot emulate %02x\n", b);
14786
 
-       return -1;
14787
 
-}
14788
 
-
14789
 
-#ifdef __XEN__
14790
 
-
14791
 
-#include <asm/mm.h>
14792
 
-#include <asm/uaccess.h>
14793
 
-
14794
 
-int
14795
 
-x86_emulate_read_std(unsigned long addr,
14796
 
-                    unsigned long *val,
14797
 
-                    unsigned int bytes, struct x86_emulate_ctxt *ctxt)
14798
 
-{
14799
 
-       unsigned int rc;
14800
 
-
14801
 
-       *val = 0;
14802
 
-
14803
 
-       if ((rc = copy_from_user((void *)val, (void *)addr, bytes)) != 0) {
14804
 
-               propagate_page_fault(addr + bytes - rc, 0);     /* read fault */
14805
 
-               return X86EMUL_PROPAGATE_FAULT;
14806
 
-       }
14807
 
-
14808
 
-       return X86EMUL_CONTINUE;
14809
 
-}
14810
 
-
14811
 
-int
14812
 
-x86_emulate_write_std(unsigned long addr,
14813
 
-                     unsigned long val,
14814
 
-                     unsigned int bytes, struct x86_emulate_ctxt *ctxt)
14815
 
-{
14816
 
-       unsigned int rc;
14817
 
-
14818
 
-       if ((rc = copy_to_user((void *)addr, (void *)&val, bytes)) != 0) {
14819
 
-               propagate_page_fault(addr + bytes - rc, PGERR_write_access);
14820
 
-               return X86EMUL_PROPAGATE_FAULT;
14821
 
-       }
14822
 
-
14823
 
-       return X86EMUL_CONTINUE;
14824
 
-}
14825
 
-
14826
 
-#endif
14827
 
diff --git a/include/asm-x86/Kbuild b/include/asm-x86/Kbuild
14828
 
index 12db5a1..da5eb69 100644
14829
 
--- a/include/asm-x86/Kbuild
14830
 
+++ b/include/asm-x86/Kbuild
14831
 
@@ -3,6 +3,7 @@ include include/asm-generic/Kbuild.asm
14832
 
 header-y += boot.h
14833
 
 header-y += bootparam.h
14834
 
 header-y += debugreg.h
14835
 
+header-y += kvm.h
14836
 
 header-y += ldt.h
14837
 
 header-y += msr-index.h
14838
 
 header-y += prctl.h
14839
 
diff --git a/include/asm-x86/kvm.h b/include/asm-x86/kvm.h
14840
 
new file mode 100644
14841
 
index 0000000..17afa81
14842
 
--- /dev/null
14843
 
+++ b/include/asm-x86/kvm.h
14844
 
@@ -0,0 +1,176 @@
14845
 
+#ifndef __LINUX_KVM_X86_H
14846
 
+#define __LINUX_KVM_X86_H
14847
 
+
14848
 
+/*
14849
 
+ * KVM x86 specific structures and definitions
14850
 
+ *
14851
 
+ */
14852
 
+
14853
 
+#include <asm/types.h>
14854
 
+#include <linux/ioctl.h>
14855
 
+
14856
 
+/* Architectural interrupt line count. */
14857
 
+#define KVM_NR_INTERRUPTS 256
14858
 
+
14859
 
+struct kvm_memory_alias {
14860
 
+       __u32 slot;  /* this has a different namespace than memory slots */
14861
 
+       __u32 flags;
14862
 
+       __u64 guest_phys_addr;
14863
 
+       __u64 memory_size;
14864
 
+       __u64 target_phys_addr;
14865
 
+};
14866
 
+
14867
 
+/* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */
14868
 
+struct kvm_pic_state {
14869
 
+       __u8 last_irr;  /* edge detection */
14870
 
+       __u8 irr;               /* interrupt request register */
14871
 
+       __u8 imr;               /* interrupt mask register */
14872
 
+       __u8 isr;               /* interrupt service register */
14873
 
+       __u8 priority_add;      /* highest irq priority */
14874
 
+       __u8 irq_base;
14875
 
+       __u8 read_reg_select;
14876
 
+       __u8 poll;
14877
 
+       __u8 special_mask;
14878
 
+       __u8 init_state;
14879
 
+       __u8 auto_eoi;
14880
 
+       __u8 rotate_on_auto_eoi;
14881
 
+       __u8 special_fully_nested_mode;
14882
 
+       __u8 init4;             /* true if 4 byte init */
14883
 
+       __u8 elcr;              /* PIIX edge/trigger selection */
14884
 
+       __u8 elcr_mask;
14885
 
+};
14886
 
+
14887
 
+#define KVM_IOAPIC_NUM_PINS  24
14888
 
+struct kvm_ioapic_state {
14889
 
+       __u64 base_address;
14890
 
+       __u32 ioregsel;
14891
 
+       __u32 id;
14892
 
+       __u32 irr;
14893
 
+       __u32 pad;
14894
 
+       union {
14895
 
+               __u64 bits;
14896
 
+               struct {
14897
 
+                       __u8 vector;
14898
 
+                       __u8 delivery_mode:3;
14899
 
+                       __u8 dest_mode:1;
14900
 
+                       __u8 delivery_status:1;
14901
 
+                       __u8 polarity:1;
14902
 
+                       __u8 remote_irr:1;
14903
 
+                       __u8 trig_mode:1;
14904
 
+                       __u8 mask:1;
14905
 
+                       __u8 reserve:7;
14906
 
+                       __u8 reserved[4];
14907
 
+                       __u8 dest_id;
14908
 
+               } fields;
14909
 
+       } redirtbl[KVM_IOAPIC_NUM_PINS];
14910
 
+};
14911
 
+
14912
 
+#define KVM_IRQCHIP_PIC_MASTER   0
14913
 
+#define KVM_IRQCHIP_PIC_SLAVE    1
14914
 
+#define KVM_IRQCHIP_IOAPIC       2
14915
 
+
14916
 
+/* for KVM_GET_REGS and KVM_SET_REGS */
14917
 
+struct kvm_regs {
14918
 
+       /* out (KVM_GET_REGS) / in (KVM_SET_REGS) */
14919
 
+       __u64 rax, rbx, rcx, rdx;
14920
 
+       __u64 rsi, rdi, rsp, rbp;
14921
 
+       __u64 r8,  r9,  r10, r11;
14922
 
+       __u64 r12, r13, r14, r15;
14923
 
+       __u64 rip, rflags;
14924
 
+};
14925
 
+
14926
 
+/* for KVM_GET_LAPIC and KVM_SET_LAPIC */
14927
 
+#define KVM_APIC_REG_SIZE 0x400
14928
 
+struct kvm_lapic_state {
14929
 
+       char regs[KVM_APIC_REG_SIZE];
14930
 
+};
14931
 
+
14932
 
+struct kvm_segment {
14933
 
+       __u64 base;
14934
 
+       __u32 limit;
14935
 
+       __u16 selector;
14936
 
+       __u8  type;
14937
 
+       __u8  present, dpl, db, s, l, g, avl;
14938
 
+       __u8  unusable;
14939
 
+       __u8  padding;
14940
 
+};
14941
 
+
14942
 
+struct kvm_dtable {
14943
 
+       __u64 base;
14944
 
+       __u16 limit;
14945
 
+       __u16 padding[3];
14946
 
+};
14947
 
+
14948
 
+
14949
 
+/* for KVM_GET_SREGS and KVM_SET_SREGS */
14950
 
+struct kvm_sregs {
14951
 
+       /* out (KVM_GET_SREGS) / in (KVM_SET_SREGS) */
14952
 
+       struct kvm_segment cs, ds, es, fs, gs, ss;
14953
 
+       struct kvm_segment tr, ldt;
14954
 
+       struct kvm_dtable gdt, idt;
14955
 
+       __u64 cr0, cr2, cr3, cr4, cr8;
14956
 
+       __u64 efer;
14957
 
+       __u64 apic_base;
14958
 
+       __u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64];
14959
 
+};
14960
 
+
14961
 
+struct kvm_msr_entry {
14962
 
+       __u32 index;
14963
 
+       __u32 reserved;
14964
 
+       __u64 data;
14965
 
+};
14966
 
+
14967
 
+/* for KVM_GET_MSRS and KVM_SET_MSRS */
14968
 
+struct kvm_msrs {
14969
 
+       __u32 nmsrs; /* number of msrs in entries */
14970
 
+       __u32 pad;
14971
 
+
14972
 
+       struct kvm_msr_entry entries[0];
14973
 
+};
14974
 
+
14975
 
+/* for KVM_GET_MSR_INDEX_LIST */
14976
 
+struct kvm_msr_list {
14977
 
+       __u32 nmsrs; /* number of msrs in entries */
14978
 
+       __u32 indices[0];
14979
 
+};
14980
 
+
14981
 
+
14982
 
+struct kvm_cpuid_entry {
14983
 
+       __u32 function;
14984
 
+       __u32 eax;
14985
 
+       __u32 ebx;
14986
 
+       __u32 ecx;
14987
 
+       __u32 edx;
14988
 
+       __u32 padding;
14989
 
+};
14990
 
+
14991
 
+/* for KVM_SET_CPUID */
14992
 
+struct kvm_cpuid {
14993
 
+       __u32 nent;
14994
 
+       __u32 padding;
14995
 
+       struct kvm_cpuid_entry entries[0];
14996
 
+};
14997
 
+
14998
 
+struct kvm_cpuid_entry2 {
14999
 
+       __u32 function;
15000
 
+       __u32 index;
15001
 
+       __u32 flags;
15002
 
+       __u32 eax;
15003
 
+       __u32 ebx;
15004
 
+       __u32 ecx;
15005
 
+       __u32 edx;
15006
 
+       __u32 padding[3];
15007
 
+};
15008
 
+
15009
 
+#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1
15010
 
+#define KVM_CPUID_FLAG_STATEFUL_FUNC    2
15011
 
+#define KVM_CPUID_FLAG_STATE_READ_NEXT  4
15012
 
+
15013
 
+/* for KVM_SET_CPUID2 */
15014
 
+struct kvm_cpuid2 {
15015
 
+       __u32 nent;
15016
 
+       __u32 padding;
15017
 
+       struct kvm_cpuid_entry2 entries[0];
15018
 
+};
15019
 
+
15020
 
+#endif
15021
 
diff --git a/drivers/kvm/kvm.h b/include/asm-x86/kvm_host.h
15022
 
similarity index 64%
15023
 
rename from drivers/kvm/kvm.h
15024
 
rename to include/asm-x86/kvm_host.h
15025
 
index 3b0bc4b..28940e1 100644
15026
 
--- a/drivers/kvm/kvm.h
15027
 
+++ b/include/asm-x86/kvm_host.h
15028
 
@@ -1,23 +1,24 @@
15029
 
-#ifndef __KVM_H
15030
 
-#define __KVM_H
15031
 
-
15032
 
-/*
15033
 
+#/*
15034
 
+ * Kernel-based Virtual Machine driver for Linux
15035
 
+ *
15036
 
+ * This header defines architecture specific interfaces, x86 version
15037
 
+ *
15038
 
  * This work is licensed under the terms of the GNU GPL, version 2.  See
15039
 
  * the COPYING file in the top-level directory.
15040
 
+ *
15041
 
  */
15042
 
 
15043
 
+#ifndef ASM_KVM_HOST_H
15044
 
+#define ASM_KVM_HOST_H
15045
 
+
15046
 
 #include <linux/types.h>
15047
 
-#include <linux/list.h>
15048
 
-#include <linux/mutex.h>
15049
 
-#include <linux/spinlock.h>
15050
 
-#include <linux/signal.h>
15051
 
-#include <linux/sched.h>
15052
 
 #include <linux/mm.h>
15053
 
-#include <linux/preempt.h>
15054
 
-#include <asm/signal.h>
15055
 
 
15056
 
 #include <linux/kvm.h>
15057
 
 #include <linux/kvm_para.h>
15058
 
+#include <linux/kvm_types.h>
15059
 
+
15060
 
+#include <asm/desc.h>
15061
 
 
15062
 
 #define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1)
15063
 
 #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD))
15064
 
@@ -37,15 +38,8 @@
15065
 
 #define INVALID_PAGE (~(hpa_t)0)
15066
 
 #define UNMAPPED_GVA (~(gpa_t)0)
15067
 
 
15068
 
-#define KVM_MAX_VCPUS 4
15069
 
-#define KVM_ALIAS_SLOTS 4
15070
 
-#define KVM_MEMORY_SLOTS 8
15071
 
-#define KVM_NUM_MMU_PAGES 1024
15072
 
-#define KVM_MIN_FREE_MMU_PAGES 5
15073
 
-#define KVM_REFILL_PAGES 25
15074
 
-#define KVM_MAX_CPUID_ENTRIES 40
15075
 
-
15076
 
 #define DE_VECTOR 0
15077
 
+#define UD_VECTOR 6
15078
 
 #define NM_VECTOR 7
15079
 
 #define DF_VECTOR 8
15080
 
 #define TS_VECTOR 10
15081
 
@@ -59,31 +53,66 @@
15082
 
 
15083
 
 #define IOPL_SHIFT 12
15084
 
 
15085
 
-#define KVM_PIO_PAGE_OFFSET 1
15086
 
+#define KVM_ALIAS_SLOTS 4
15087
 
 
15088
 
-/*
15089
 
- * vcpu->requests bit members
15090
 
- */
15091
 
-#define KVM_TLB_FLUSH 0
15092
 
+#define KVM_PERMILLE_MMU_PAGES 20
15093
 
+#define KVM_MIN_ALLOC_MMU_PAGES 64
15094
 
+#define KVM_NUM_MMU_PAGES 1024
15095
 
+#define KVM_MIN_FREE_MMU_PAGES 5
15096
 
+#define KVM_REFILL_PAGES 25
15097
 
+#define KVM_MAX_CPUID_ENTRIES 40
15098
 
 
15099
 
-/*
15100
 
- * Address types:
15101
 
- *
15102
 
- *  gva - guest virtual address
15103
 
- *  gpa - guest physical address
15104
 
- *  gfn - guest frame number
15105
 
- *  hva - host virtual address
15106
 
- *  hpa - host physical address
15107
 
- *  hfn - host frame number
15108
 
- */
15109
 
+extern spinlock_t kvm_lock;
15110
 
+extern struct list_head vm_list;
15111
 
+
15112
 
+struct kvm_vcpu;
15113
 
+struct kvm;
15114
 
+
15115
 
+enum {
15116
 
+       VCPU_REGS_RAX = 0,
15117
 
+       VCPU_REGS_RCX = 1,
15118
 
+       VCPU_REGS_RDX = 2,
15119
 
+       VCPU_REGS_RBX = 3,
15120
 
+       VCPU_REGS_RSP = 4,
15121
 
+       VCPU_REGS_RBP = 5,
15122
 
+       VCPU_REGS_RSI = 6,
15123
 
+       VCPU_REGS_RDI = 7,
15124
 
+#ifdef CONFIG_X86_64
15125
 
+       VCPU_REGS_R8 = 8,
15126
 
+       VCPU_REGS_R9 = 9,
15127
 
+       VCPU_REGS_R10 = 10,
15128
 
+       VCPU_REGS_R11 = 11,
15129
 
+       VCPU_REGS_R12 = 12,
15130
 
+       VCPU_REGS_R13 = 13,
15131
 
+       VCPU_REGS_R14 = 14,
15132
 
+       VCPU_REGS_R15 = 15,
15133
 
+#endif
15134
 
+       NR_VCPU_REGS
15135
 
+};
15136
 
+
15137
 
+enum {
15138
 
+       VCPU_SREG_CS,
15139
 
+       VCPU_SREG_DS,
15140
 
+       VCPU_SREG_ES,
15141
 
+       VCPU_SREG_FS,
15142
 
+       VCPU_SREG_GS,
15143
 
+       VCPU_SREG_SS,
15144
 
+       VCPU_SREG_TR,
15145
 
+       VCPU_SREG_LDTR,
15146
 
+};
15147
 
 
15148
 
-typedef unsigned long  gva_t;
15149
 
-typedef u64            gpa_t;
15150
 
-typedef unsigned long  gfn_t;
15151
 
+#include <asm/kvm_x86_emulate.h>
15152
 
 
15153
 
-typedef unsigned long  hva_t;
15154
 
-typedef u64            hpa_t;
15155
 
-typedef unsigned long  hfn_t;
15156
 
+#define KVM_NR_MEM_OBJS 40
15157
 
+
15158
 
+/*
15159
 
+ * We don't want allocation failures within the mmu code, so we preallocate
15160
 
+ * enough memory for a single page fault in a cache.
15161
 
+ */
15162
 
+struct kvm_mmu_memory_cache {
15163
 
+       int nobjs;
15164
 
+       void *objects[KVM_NR_MEM_OBJS];
15165
 
+};
15166
 
 
15167
 
 #define NR_PTE_CHAIN_ENTRIES 5
15168
 
 
15169
 
@@ -99,7 +128,7 @@ struct kvm_pte_chain {
15170
 
  *   bits 4:7 - page table level for this shadow (1-4)
15171
 
  *   bits 8:9 - page table quadrant for 2-level guests
15172
 
  *   bit   16 - "metaphysical" - gfn is not a real page (huge page/real mode)
15173
 
- *   bits 17:19 - "access" - the user, writable, and nx bits of a huge page pde
15174
 
+ *   bits 17:19 - common access permissions for all ptes in this shadow page
15175
 
  */
15176
 
 union kvm_mmu_page_role {
15177
 
        unsigned word;
15178
 
@@ -109,7 +138,7 @@ union kvm_mmu_page_role {
15179
 
                unsigned quadrant : 2;
15180
 
                unsigned pad_for_nice_hex_output : 6;
15181
 
                unsigned metaphysical : 1;
15182
 
-               unsigned hugepage_access : 3;
15183
 
+               unsigned access : 3;
15184
 
        };
15185
 
 };
15186
 
 
15187
 
@@ -125,6 +154,8 @@ struct kvm_mmu_page {
15188
 
        union kvm_mmu_page_role role;
15189
 
 
15190
 
        u64 *spt;
15191
 
+       /* hold the gfn of each spte inside spt */
15192
 
+       gfn_t *gfns;
15193
 
        unsigned long slot_bitmap; /* One bit set per slot which has memory
15194
 
                                    * in this shadow page.
15195
 
                                    */
15196
 
@@ -136,9 +167,6 @@ struct kvm_mmu_page {
15197
 
        };
15198
 
 };
15199
 
 
15200
 
-struct kvm_vcpu;
15201
 
-extern struct kmem_cache *kvm_vcpu_cache;
15202
 
-
15203
 
 /*
15204
 
  * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
15205
 
  * 32-bit).  The kvm_mmu structure abstracts the details of the current mmu
15206
 
@@ -149,6 +177,8 @@ struct kvm_mmu {
15207
 
        int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err);
15208
 
        void (*free)(struct kvm_vcpu *vcpu);
15209
 
        gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva);
15210
 
+       void (*prefetch_page)(struct kvm_vcpu *vcpu,
15211
 
+                             struct kvm_mmu_page *page);
15212
 
        hpa_t root_hpa;
15213
 
        int root_level;
15214
 
        int shadow_root_level;
15215
 
@@ -156,159 +186,9 @@ struct kvm_mmu {
15216
 
        u64 *pae_root;
15217
 
 };
15218
 
 
15219
 
-#define KVM_NR_MEM_OBJS 20
15220
 
-
15221
 
-struct kvm_mmu_memory_cache {
15222
 
-       int nobjs;
15223
 
-       void *objects[KVM_NR_MEM_OBJS];
15224
 
-};
15225
 
-
15226
 
-/*
15227
 
- * We don't want allocation failures within the mmu code, so we preallocate
15228
 
- * enough memory for a single page fault in a cache.
15229
 
- */
15230
 
-struct kvm_guest_debug {
15231
 
-       int enabled;
15232
 
-       unsigned long bp[4];
15233
 
-       int singlestep;
15234
 
-};
15235
 
-
15236
 
-enum {
15237
 
-       VCPU_REGS_RAX = 0,
15238
 
-       VCPU_REGS_RCX = 1,
15239
 
-       VCPU_REGS_RDX = 2,
15240
 
-       VCPU_REGS_RBX = 3,
15241
 
-       VCPU_REGS_RSP = 4,
15242
 
-       VCPU_REGS_RBP = 5,
15243
 
-       VCPU_REGS_RSI = 6,
15244
 
-       VCPU_REGS_RDI = 7,
15245
 
-#ifdef CONFIG_X86_64
15246
 
-       VCPU_REGS_R8 = 8,
15247
 
-       VCPU_REGS_R9 = 9,
15248
 
-       VCPU_REGS_R10 = 10,
15249
 
-       VCPU_REGS_R11 = 11,
15250
 
-       VCPU_REGS_R12 = 12,
15251
 
-       VCPU_REGS_R13 = 13,
15252
 
-       VCPU_REGS_R14 = 14,
15253
 
-       VCPU_REGS_R15 = 15,
15254
 
-#endif
15255
 
-       NR_VCPU_REGS
15256
 
-};
15257
 
-
15258
 
-enum {
15259
 
-       VCPU_SREG_CS,
15260
 
-       VCPU_SREG_DS,
15261
 
-       VCPU_SREG_ES,
15262
 
-       VCPU_SREG_FS,
15263
 
-       VCPU_SREG_GS,
15264
 
-       VCPU_SREG_SS,
15265
 
-       VCPU_SREG_TR,
15266
 
-       VCPU_SREG_LDTR,
15267
 
-};
15268
 
-
15269
 
-struct kvm_pio_request {
15270
 
-       unsigned long count;
15271
 
-       int cur_count;
15272
 
-       struct page *guest_pages[2];
15273
 
-       unsigned guest_page_offset;
15274
 
-       int in;
15275
 
-       int port;
15276
 
-       int size;
15277
 
-       int string;
15278
 
-       int down;
15279
 
-       int rep;
15280
 
-};
15281
 
-
15282
 
-struct kvm_stat {
15283
 
-       u32 pf_fixed;
15284
 
-       u32 pf_guest;
15285
 
-       u32 tlb_flush;
15286
 
-       u32 invlpg;
15287
 
-
15288
 
-       u32 exits;
15289
 
-       u32 io_exits;
15290
 
-       u32 mmio_exits;
15291
 
-       u32 signal_exits;
15292
 
-       u32 irq_window_exits;
15293
 
-       u32 halt_exits;
15294
 
-       u32 halt_wakeup;
15295
 
-       u32 request_irq_exits;
15296
 
-       u32 irq_exits;
15297
 
-       u32 light_exits;
15298
 
-       u32 efer_reload;
15299
 
-};
15300
 
-
15301
 
-struct kvm_io_device {
15302
 
-       void (*read)(struct kvm_io_device *this,
15303
 
-                    gpa_t addr,
15304
 
-                    int len,
15305
 
-                    void *val);
15306
 
-       void (*write)(struct kvm_io_device *this,
15307
 
-                     gpa_t addr,
15308
 
-                     int len,
15309
 
-                     const void *val);
15310
 
-       int (*in_range)(struct kvm_io_device *this, gpa_t addr);
15311
 
-       void (*destructor)(struct kvm_io_device *this);
15312
 
-
15313
 
-       void             *private;
15314
 
-};
15315
 
-
15316
 
-static inline void kvm_iodevice_read(struct kvm_io_device *dev,
15317
 
-                                    gpa_t addr,
15318
 
-                                    int len,
15319
 
-                                    void *val)
15320
 
-{
15321
 
-       dev->read(dev, addr, len, val);
15322
 
-}
15323
 
-
15324
 
-static inline void kvm_iodevice_write(struct kvm_io_device *dev,
15325
 
-                                     gpa_t addr,
15326
 
-                                     int len,
15327
 
-                                     const void *val)
15328
 
-{
15329
 
-       dev->write(dev, addr, len, val);
15330
 
-}
15331
 
-
15332
 
-static inline int kvm_iodevice_inrange(struct kvm_io_device *dev, gpa_t addr)
15333
 
-{
15334
 
-       return dev->in_range(dev, addr);
15335
 
-}
15336
 
-
15337
 
-static inline void kvm_iodevice_destructor(struct kvm_io_device *dev)
15338
 
-{
15339
 
-       if (dev->destructor)
15340
 
-               dev->destructor(dev);
15341
 
-}
15342
 
-
15343
 
-/*
15344
 
- * It would be nice to use something smarter than a linear search, TBD...
15345
 
- * Thankfully we dont expect many devices to register (famous last words :),
15346
 
- * so until then it will suffice.  At least its abstracted so we can change
15347
 
- * in one place.
15348
 
- */
15349
 
-struct kvm_io_bus {
15350
 
-       int                   dev_count;
15351
 
-#define NR_IOBUS_DEVS 6
15352
 
-       struct kvm_io_device *devs[NR_IOBUS_DEVS];
15353
 
-};
15354
 
-
15355
 
-void kvm_io_bus_init(struct kvm_io_bus *bus);
15356
 
-void kvm_io_bus_destroy(struct kvm_io_bus *bus);
15357
 
-struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr);
15358
 
-void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
15359
 
-                            struct kvm_io_device *dev);
15360
 
-
15361
 
-struct kvm_vcpu {
15362
 
-       struct kvm *kvm;
15363
 
-       struct preempt_notifier preempt_notifier;
15364
 
-       int vcpu_id;
15365
 
-       struct mutex mutex;
15366
 
-       int   cpu;
15367
 
+struct kvm_vcpu_arch {
15368
 
        u64 host_tsc;
15369
 
-       struct kvm_run *run;
15370
 
        int interrupt_window_open;
15371
 
-       int guest_mode;
15372
 
-       unsigned long requests;
15373
 
        unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */
15374
 
        DECLARE_BITMAP(irq_pending, KVM_NR_INTERRUPTS);
15375
 
        unsigned long regs[NR_VCPU_REGS]; /* for rsp: vcpu_load_rsp_rip() */
15376
 
@@ -317,9 +197,6 @@ struct kvm_vcpu {
15377
 
        unsigned long cr0;
15378
 
        unsigned long cr2;
15379
 
        unsigned long cr3;
15380
 
-       gpa_t para_state_gpa;
15381
 
-       struct page *para_state_page;
15382
 
-       gpa_t hypercall_gpa;
15383
 
        unsigned long cr4;
15384
 
        unsigned long cr8;
15385
 
        u64 pdptrs[4]; /* pae */
15386
 
@@ -344,29 +221,21 @@ struct kvm_vcpu {
15387
 
 
15388
 
        gfn_t last_pt_write_gfn;
15389
 
        int   last_pt_write_count;
15390
 
-
15391
 
-       struct kvm_guest_debug guest_debug;
15392
 
+       u64  *last_pte_updated;
15393
 
 
15394
 
        struct i387_fxsave_struct host_fx_image;
15395
 
        struct i387_fxsave_struct guest_fx_image;
15396
 
-       int fpu_active;
15397
 
-       int guest_fpu_loaded;
15398
 
-
15399
 
-       int mmio_needed;
15400
 
-       int mmio_read_completed;
15401
 
-       int mmio_is_write;
15402
 
-       int mmio_size;
15403
 
-       unsigned char mmio_data[8];
15404
 
-       gpa_t mmio_phys_addr;
15405
 
+
15406
 
        gva_t mmio_fault_cr2;
15407
 
        struct kvm_pio_request pio;
15408
 
        void *pio_data;
15409
 
-       wait_queue_head_t wq;
15410
 
 
15411
 
-       int sigset_active;
15412
 
-       sigset_t sigset;
15413
 
-
15414
 
-       struct kvm_stat stat;
15415
 
+       struct kvm_queued_exception {
15416
 
+               bool pending;
15417
 
+               bool has_error_code;
15418
 
+               u8 nr;
15419
 
+               u32 error_code;
15420
 
+       } exception;
15421
 
 
15422
 
        struct {
15423
 
                int active;
15424
 
@@ -381,7 +250,10 @@ struct kvm_vcpu {
15425
 
        int halt_request; /* real mode on Intel only */
15426
 
 
15427
 
        int cpuid_nent;
15428
 
-       struct kvm_cpuid_entry cpuid_entries[KVM_MAX_CPUID_ENTRIES];
15429
 
+       struct kvm_cpuid_entry2 cpuid_entries[KVM_MAX_CPUID_ENTRIES];
15430
 
+       /* emulate context */
15431
 
+
15432
 
+       struct x86_emulate_ctxt emulate_ctxt;
15433
 
 };
15434
 
 
15435
 
 struct kvm_mem_alias {
15436
 
@@ -390,51 +262,57 @@ struct kvm_mem_alias {
15437
 
        gfn_t target_gfn;
15438
 
 };
15439
 
 
15440
 
-struct kvm_memory_slot {
15441
 
-       gfn_t base_gfn;
15442
 
-       unsigned long npages;
15443
 
-       unsigned long flags;
15444
 
-       struct page **phys_mem;
15445
 
-       unsigned long *dirty_bitmap;
15446
 
-};
15447
 
-
15448
 
-struct kvm {
15449
 
-       struct mutex lock; /* protects everything except vcpus */
15450
 
+struct kvm_arch{
15451
 
        int naliases;
15452
 
        struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
15453
 
-       int nmemslots;
15454
 
-       struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS];
15455
 
+
15456
 
+       unsigned int n_free_mmu_pages;
15457
 
+       unsigned int n_requested_mmu_pages;
15458
 
+       unsigned int n_alloc_mmu_pages;
15459
 
+       struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
15460
 
        /*
15461
 
         * Hash table of struct kvm_mmu_page.
15462
 
         */
15463
 
        struct list_head active_mmu_pages;
15464
 
-       int n_free_mmu_pages;
15465
 
-       struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
15466
 
-       struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
15467
 
-       unsigned long rmap_overflow;
15468
 
-       struct list_head vm_list;
15469
 
-       struct file *filp;
15470
 
-       struct kvm_io_bus mmio_bus;
15471
 
-       struct kvm_io_bus pio_bus;
15472
 
        struct kvm_pic *vpic;
15473
 
        struct kvm_ioapic *vioapic;
15474
 
+
15475
 
        int round_robin_prev_vcpu;
15476
 
+       unsigned int tss_addr;
15477
 
+       struct page *apic_access_page;
15478
 
 };
15479
 
 
15480
 
-static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
15481
 
-{
15482
 
-       return kvm->vpic;
15483
 
-}
15484
 
+struct kvm_vm_stat {
15485
 
+       u32 mmu_shadow_zapped;
15486
 
+       u32 mmu_pte_write;
15487
 
+       u32 mmu_pte_updated;
15488
 
+       u32 mmu_pde_zapped;
15489
 
+       u32 mmu_flooded;
15490
 
+       u32 mmu_recycled;
15491
 
+       u32 remote_tlb_flush;
15492
 
+};
15493
 
 
15494
 
-static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
15495
 
-{
15496
 
-       return kvm->vioapic;
15497
 
-}
15498
 
+struct kvm_vcpu_stat {
15499
 
+       u32 pf_fixed;
15500
 
+       u32 pf_guest;
15501
 
+       u32 tlb_flush;
15502
 
+       u32 invlpg;
15503
 
 
15504
 
-static inline int irqchip_in_kernel(struct kvm *kvm)
15505
 
-{
15506
 
-       return pic_irqchip(kvm) != 0;
15507
 
-}
15508
 
+       u32 exits;
15509
 
+       u32 io_exits;
15510
 
+       u32 mmio_exits;
15511
 
+       u32 signal_exits;
15512
 
+       u32 irq_window_exits;
15513
 
+       u32 halt_exits;
15514
 
+       u32 halt_wakeup;
15515
 
+       u32 request_irq_exits;
15516
 
+       u32 irq_exits;
15517
 
+       u32 host_state_reload;
15518
 
+       u32 efer_reload;
15519
 
+       u32 fpu_reload;
15520
 
+       u32 insn_emulation;
15521
 
+       u32 insn_emulation_fail;
15522
 
+};
15523
 
 
15524
 
 struct descriptor_table {
15525
 
        u16 limit;
15526
 
@@ -453,7 +331,7 @@ struct kvm_x86_ops {
15527
 
        /* Create, but do not attach this VCPU */
15528
 
        struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
15529
 
        void (*vcpu_free)(struct kvm_vcpu *vcpu);
15530
 
-       void (*vcpu_reset)(struct kvm_vcpu *vcpu);
15531
 
+       int (*vcpu_reset)(struct kvm_vcpu *vcpu);
15532
 
 
15533
 
        void (*prepare_guest_switch)(struct kvm_vcpu *vcpu);
15534
 
        void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
15535
 
@@ -489,10 +367,6 @@ struct kvm_x86_ops {
15536
 
        void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
15537
 
 
15538
 
        void (*tlb_flush)(struct kvm_vcpu *vcpu);
15539
 
-       void (*inject_page_fault)(struct kvm_vcpu *vcpu,
15540
 
-                                 unsigned long addr, u32 err_code);
15541
 
-
15542
 
-       void (*inject_gp)(struct kvm_vcpu *vcpu, unsigned err_code);
15543
 
 
15544
 
        void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run);
15545
 
        int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu);
15546
 
@@ -501,54 +375,31 @@ struct kvm_x86_ops {
15547
 
                                unsigned char *hypercall_addr);
15548
 
        int (*get_irq)(struct kvm_vcpu *vcpu);
15549
 
        void (*set_irq)(struct kvm_vcpu *vcpu, int vec);
15550
 
+       void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
15551
 
+                               bool has_error_code, u32 error_code);
15552
 
+       bool (*exception_injected)(struct kvm_vcpu *vcpu);
15553
 
        void (*inject_pending_irq)(struct kvm_vcpu *vcpu);
15554
 
        void (*inject_pending_vectors)(struct kvm_vcpu *vcpu,
15555
 
                                       struct kvm_run *run);
15556
 
+
15557
 
+       int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
15558
 
 };
15559
 
 
15560
 
 extern struct kvm_x86_ops *kvm_x86_ops;
15561
 
 
15562
 
-/* The guest did something we don't support. */
15563
 
-#define pr_unimpl(vcpu, fmt, ...)                                      \
15564
 
- do {                                                                  \
15565
 
-       if (printk_ratelimit())                                         \
15566
 
-               printk(KERN_ERR "kvm: %i: cpu%i " fmt,                  \
15567
 
-                      current->tgid, (vcpu)->vcpu_id , ## __VA_ARGS__); \
15568
 
- } while(0)
15569
 
-
15570
 
-#define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt)
15571
 
-#define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt)
15572
 
-
15573
 
-int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
15574
 
-void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
15575
 
-
15576
 
-int kvm_init_x86(struct kvm_x86_ops *ops, unsigned int vcpu_size,
15577
 
-                 struct module *module);
15578
 
-void kvm_exit_x86(void);
15579
 
-
15580
 
 int kvm_mmu_module_init(void);
15581
 
 void kvm_mmu_module_exit(void);
15582
 
 
15583
 
 void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
15584
 
 int kvm_mmu_create(struct kvm_vcpu *vcpu);
15585
 
 int kvm_mmu_setup(struct kvm_vcpu *vcpu);
15586
 
+void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte);
15587
 
 
15588
 
 int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
15589
 
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
15590
 
 void kvm_mmu_zap_all(struct kvm *kvm);
15591
 
-
15592
 
-hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa);
15593
 
-#define HPA_MSB ((sizeof(hpa_t) * 8) - 1)
15594
 
-#define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB)
15595
 
-static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; }
15596
 
-hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva);
15597
 
-struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva);
15598
 
-
15599
 
-extern hpa_t bad_page_address;
15600
 
-
15601
 
-struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
15602
 
-struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
15603
 
-void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
15604
 
+unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
15605
 
+void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
15606
 
 
15607
 
 enum emulation_result {
15608
 
        EMULATE_DONE,       /* no further processing */
15609
 
@@ -557,7 +408,7 @@ enum emulation_result {
15610
 
 };
15611
 
 
15612
 
 int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run,
15613
 
-                       unsigned long cr2, u16 error_code);
15614
 
+                       unsigned long cr2, u16 error_code, int no_decode);
15615
 
 void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context);
15616
 
 void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
15617
 
 void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
15618
 
@@ -572,7 +423,7 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
15619
 
 
15620
 
 struct x86_emulate_ctxt;
15621
 
 
15622
 
-int kvm_emulate_pio (struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
15623
 
+int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
15624
 
                     int size, unsigned port);
15625
 
 int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
15626
 
                           int size, unsigned long count, int down,
15627
 
@@ -581,7 +432,7 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
15628
 
 int kvm_emulate_halt(struct kvm_vcpu *vcpu);
15629
 
 int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address);
15630
 
 int emulate_clts(struct kvm_vcpu *vcpu);
15631
 
-int emulator_get_dr(struct x86_emulate_ctxt* ctxt, int dr,
15632
 
+int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr,
15633
 
                    unsigned long *dest);
15634
 
 int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
15635
 
                    unsigned long value);
15636
 
@@ -597,15 +448,15 @@ void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l);
15637
 
 int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
15638
 
 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data);
15639
 
 
15640
 
-void fx_init(struct kvm_vcpu *vcpu);
15641
 
+void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
15642
 
+void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
15643
 
+void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2,
15644
 
+                          u32 error_code);
15645
 
 
15646
 
-void kvm_resched(struct kvm_vcpu *vcpu);
15647
 
-void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
15648
 
-void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
15649
 
-void kvm_flush_remote_tlbs(struct kvm *kvm);
15650
 
+void fx_init(struct kvm_vcpu *vcpu);
15651
 
 
15652
 
 int emulator_read_std(unsigned long addr,
15653
 
-                      void *val,
15654
 
+                     void *val,
15655
 
                      unsigned int bytes,
15656
 
                      struct kvm_vcpu *vcpu);
15657
 
 int emulator_write_emulated(unsigned long addr,
15658
 
@@ -615,6 +466,7 @@ int emulator_write_emulated(unsigned long addr,
15659
 
 
15660
 
 unsigned long segment_base(u16 selector);
15661
 
 
15662
 
+void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu);
15663
 
 void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
15664
 
                       const u8 *new, int bytes);
15665
 
 int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
15666
 
@@ -622,66 +474,14 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
15667
 
 int kvm_mmu_load(struct kvm_vcpu *vcpu);
15668
 
 void kvm_mmu_unload(struct kvm_vcpu *vcpu);
15669
 
 
15670
 
-int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run);
15671
 
+int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
15672
 
 
15673
 
-static inline void kvm_guest_enter(void)
15674
 
-{
15675
 
-       current->flags |= PF_VCPU;
15676
 
-}
15677
 
+int kvm_fix_hypercall(struct kvm_vcpu *vcpu);
15678
 
 
15679
 
-static inline void kvm_guest_exit(void)
15680
 
-{
15681
 
-       current->flags &= ~PF_VCPU;
15682
 
-}
15683
 
+int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code);
15684
 
 
15685
 
-static inline int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
15686
 
-                                    u32 error_code)
15687
 
-{
15688
 
-       return vcpu->mmu.page_fault(vcpu, gva, error_code);
15689
 
-}
15690
 
-
15691
 
-static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
15692
 
-{
15693
 
-       if (unlikely(vcpu->kvm->n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES))
15694
 
-               __kvm_mmu_free_some_pages(vcpu);
15695
 
-}
15696
 
-
15697
 
-static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
15698
 
-{
15699
 
-       if (likely(vcpu->mmu.root_hpa != INVALID_PAGE))
15700
 
-               return 0;
15701
 
-
15702
 
-       return kvm_mmu_load(vcpu);
15703
 
-}
15704
 
-
15705
 
-static inline int is_long_mode(struct kvm_vcpu *vcpu)
15706
 
-{
15707
 
-#ifdef CONFIG_X86_64
15708
 
-       return vcpu->shadow_efer & EFER_LME;
15709
 
-#else
15710
 
-       return 0;
15711
 
-#endif
15712
 
-}
15713
 
-
15714
 
-static inline int is_pae(struct kvm_vcpu *vcpu)
15715
 
-{
15716
 
-       return vcpu->cr4 & X86_CR4_PAE;
15717
 
-}
15718
 
-
15719
 
-static inline int is_pse(struct kvm_vcpu *vcpu)
15720
 
-{
15721
 
-       return vcpu->cr4 & X86_CR4_PSE;
15722
 
-}
15723
 
-
15724
 
-static inline int is_paging(struct kvm_vcpu *vcpu)
15725
 
-{
15726
 
-       return vcpu->cr0 & X86_CR0_PG;
15727
 
-}
15728
 
-
15729
 
-static inline int memslot_id(struct kvm *kvm, struct kvm_memory_slot *slot)
15730
 
-{
15731
 
-       return slot - kvm->memslots;
15732
 
-}
15733
 
+int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
15734
 
+int complete_pio(struct kvm_vcpu *vcpu);
15735
 
 
15736
 
 static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
15737
 
 {
15738
 
@@ -693,55 +493,55 @@ static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
15739
 
 static inline u16 read_fs(void)
15740
 
 {
15741
 
        u16 seg;
15742
 
-       asm ("mov %%fs, %0" : "=g"(seg));
15743
 
+       asm("mov %%fs, %0" : "=g"(seg));
15744
 
        return seg;
15745
 
 }
15746
 
 
15747
 
 static inline u16 read_gs(void)
15748
 
 {
15749
 
        u16 seg;
15750
 
-       asm ("mov %%gs, %0" : "=g"(seg));
15751
 
+       asm("mov %%gs, %0" : "=g"(seg));
15752
 
        return seg;
15753
 
 }
15754
 
 
15755
 
 static inline u16 read_ldt(void)
15756
 
 {
15757
 
        u16 ldt;
15758
 
-       asm ("sldt %0" : "=g"(ldt));
15759
 
+       asm("sldt %0" : "=g"(ldt));
15760
 
        return ldt;
15761
 
 }
15762
 
 
15763
 
 static inline void load_fs(u16 sel)
15764
 
 {
15765
 
-       asm ("mov %0, %%fs" : : "rm"(sel));
15766
 
+       asm("mov %0, %%fs" : : "rm"(sel));
15767
 
 }
15768
 
 
15769
 
 static inline void load_gs(u16 sel)
15770
 
 {
15771
 
-       asm ("mov %0, %%gs" : : "rm"(sel));
15772
 
+       asm("mov %0, %%gs" : : "rm"(sel));
15773
 
 }
15774
 
 
15775
 
 #ifndef load_ldt
15776
 
 static inline void load_ldt(u16 sel)
15777
 
 {
15778
 
-       asm ("lldt %0" : : "rm"(sel));
15779
 
+       asm("lldt %0" : : "rm"(sel));
15780
 
 }
15781
 
 #endif
15782
 
 
15783
 
 static inline void get_idt(struct descriptor_table *table)
15784
 
 {
15785
 
-       asm ("sidt %0" : "=m"(*table));
15786
 
+       asm("sidt %0" : "=m"(*table));
15787
 
 }
15788
 
 
15789
 
 static inline void get_gdt(struct descriptor_table *table)
15790
 
 {
15791
 
-       asm ("sgdt %0" : "=m"(*table));
15792
 
+       asm("sgdt %0" : "=m"(*table));
15793
 
 }
15794
 
 
15795
 
 static inline unsigned long read_tr_base(void)
15796
 
 {
15797
 
        u16 tr;
15798
 
-       asm ("str %0" : "=g"(tr));
15799
 
+       asm("str %0" : "=g"(tr));
15800
 
        return segment_base(tr);
15801
 
 }
15802
 
 
15803
 
@@ -757,17 +557,17 @@ static inline unsigned long read_msr(unsigned long msr)
15804
 
 
15805
 
 static inline void fx_save(struct i387_fxsave_struct *image)
15806
 
 {
15807
 
-       asm ("fxsave (%0)":: "r" (image));
15808
 
+       asm("fxsave (%0)":: "r" (image));
15809
 
 }
15810
 
 
15811
 
 static inline void fx_restore(struct i387_fxsave_struct *image)
15812
 
 {
15813
 
-       asm ("fxrstor (%0)":: "r" (image));
15814
 
+       asm("fxrstor (%0)":: "r" (image));
15815
 
 }
15816
 
 
15817
 
 static inline void fpu_init(void)
15818
 
 {
15819
 
-       asm ("finit");
15820
 
+       asm("finit");
15821
 
 }
15822
 
 
15823
 
 static inline u32 get_rdx_init_val(void)
15824
 
@@ -775,6 +575,11 @@ static inline u32 get_rdx_init_val(void)
15825
 
        return 0x600; /* P6 family */
15826
 
 }
15827
 
 
15828
 
+static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
15829
 
+{
15830
 
+       kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
15831
 
+}
15832
 
+
15833
 
 #define ASM_VMX_VMCLEAR_RAX       ".byte 0x66, 0x0f, 0xc7, 0x30"
15834
 
 #define ASM_VMX_VMLAUNCH          ".byte 0x0f, 0x01, 0xc2"
15835
 
 #define ASM_VMX_VMRESUME          ".byte 0x0f, 0x01, 0xc3"
15836
 
diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h
15837
 
new file mode 100644
15838
 
index 0000000..c6f3fd8
15839
 
--- /dev/null
15840
 
+++ b/include/asm-x86/kvm_para.h
15841
 
@@ -0,0 +1,105 @@
15842
 
+#ifndef __X86_KVM_PARA_H
15843
 
+#define __X86_KVM_PARA_H
15844
 
+
15845
 
+/* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx.  It
15846
 
+ * should be used to determine that a VM is running under KVM.
15847
 
+ */
15848
 
+#define KVM_CPUID_SIGNATURE    0x40000000
15849
 
+
15850
 
+/* This CPUID returns a feature bitmap in eax.  Before enabling a particular
15851
 
+ * paravirtualization, the appropriate feature bit should be checked.
15852
 
+ */
15853
 
+#define KVM_CPUID_FEATURES     0x40000001
15854
 
+
15855
 
+#ifdef __KERNEL__
15856
 
+#include <asm/processor.h>
15857
 
+
15858
 
+/* This instruction is vmcall.  On non-VT architectures, it will generate a
15859
 
+ * trap that we will then rewrite to the appropriate instruction.
15860
 
+ */
15861
 
+#define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1"
15862
 
+
15863
 
+/* For KVM hypercalls, a three-byte sequence of either the vmrun or the vmmrun
15864
 
+ * instruction.  The hypervisor may replace it with something else but only the
15865
 
+ * instructions are guaranteed to be supported.
15866
 
+ *
15867
 
+ * Up to four arguments may be passed in rbx, rcx, rdx, and rsi respectively.
15868
 
+ * The hypercall number should be placed in rax and the return value will be
15869
 
+ * placed in rax.  No other registers will be clobbered unless explicited
15870
 
+ * noted by the particular hypercall.
15871
 
+ */
15872
 
+
15873
 
+static inline long kvm_hypercall0(unsigned int nr)
15874
 
+{
15875
 
+       long ret;
15876
 
+       asm volatile(KVM_HYPERCALL
15877
 
+                    : "=a"(ret)
15878
 
+                    : "a"(nr));
15879
 
+       return ret;
15880
 
+}
15881
 
+
15882
 
+static inline long kvm_hypercall1(unsigned int nr, unsigned long p1)
15883
 
+{
15884
 
+       long ret;
15885
 
+       asm volatile(KVM_HYPERCALL
15886
 
+                    : "=a"(ret)
15887
 
+                    : "a"(nr), "b"(p1));
15888
 
+       return ret;
15889
 
+}
15890
 
+
15891
 
+static inline long kvm_hypercall2(unsigned int nr, unsigned long p1,
15892
 
+                                 unsigned long p2)
15893
 
+{
15894
 
+       long ret;
15895
 
+       asm volatile(KVM_HYPERCALL
15896
 
+                    : "=a"(ret)
15897
 
+                    : "a"(nr), "b"(p1), "c"(p2));
15898
 
+       return ret;
15899
 
+}
15900
 
+
15901
 
+static inline long kvm_hypercall3(unsigned int nr, unsigned long p1,
15902
 
+                                 unsigned long p2, unsigned long p3)
15903
 
+{
15904
 
+       long ret;
15905
 
+       asm volatile(KVM_HYPERCALL
15906
 
+                    : "=a"(ret)
15907
 
+                    : "a"(nr), "b"(p1), "c"(p2), "d"(p3));
15908
 
+       return ret;
15909
 
+}
15910
 
+
15911
 
+static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
15912
 
+                                 unsigned long p2, unsigned long p3,
15913
 
+                                 unsigned long p4)
15914
 
+{
15915
 
+       long ret;
15916
 
+       asm volatile(KVM_HYPERCALL
15917
 
+                    : "=a"(ret)
15918
 
+                    : "a"(nr), "b"(p1), "c"(p2), "d"(p3), "S"(p4));
15919
 
+       return ret;
15920
 
+}
15921
 
+
15922
 
+static inline int kvm_para_available(void)
15923
 
+{
15924
 
+       unsigned int eax, ebx, ecx, edx;
15925
 
+       char signature[13];
15926
 
+
15927
 
+       cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx);
15928
 
+       memcpy(signature + 0, &ebx, 4);
15929
 
+       memcpy(signature + 4, &ecx, 4);
15930
 
+       memcpy(signature + 8, &edx, 4);
15931
 
+       signature[12] = 0;
15932
 
+
15933
 
+       if (strcmp(signature, "KVMKVMKVM") == 0)
15934
 
+               return 1;
15935
 
+
15936
 
+       return 0;
15937
 
+}
15938
 
+
15939
 
+static inline unsigned int kvm_arch_para_features(void)
15940
 
+{
15941
 
+       return cpuid_eax(KVM_CPUID_FEATURES);
15942
 
+}
15943
 
+
15944
 
+#endif
15945
 
+
15946
 
+#endif
15947
 
diff --git a/drivers/kvm/x86_emulate.h b/include/asm-x86/kvm_x86_emulate.h
15948
 
similarity index 83%
15949
 
rename from drivers/kvm/x86_emulate.h
15950
 
rename to include/asm-x86/kvm_x86_emulate.h
15951
 
index 92c73aa..7db91b9 100644
15952
 
--- a/drivers/kvm/x86_emulate.h
15953
 
+++ b/include/asm-x86/kvm_x86_emulate.h
15954
 
@@ -63,17 +63,6 @@ struct x86_emulate_ops {
15955
 
                        unsigned int bytes, struct kvm_vcpu *vcpu);
15956
 
 
15957
 
        /*
15958
 
-        * write_std: Write bytes of standard (non-emulated/special) memory.
15959
 
-        *            Used for stack operations, and others.
15960
 
-        *  @addr:  [IN ] Linear address to which to write.
15961
 
-        *  @val:   [IN ] Value to write to memory (low-order bytes used as
15962
 
-        *                required).
15963
 
-        *  @bytes: [IN ] Number of bytes to write to memory.
15964
 
-        */
15965
 
-       int (*write_std)(unsigned long addr, const void *val,
15966
 
-                        unsigned int bytes, struct kvm_vcpu *vcpu);
15967
 
-
15968
 
-       /*
15969
 
         * read_emulated: Read bytes from emulated/special memory area.
15970
 
         *  @addr:  [IN ] Linear address from which to read.
15971
 
         *  @val:   [OUT] Value read from memory, zero-extended to 'u_long'.
15972
 
@@ -112,13 +101,50 @@ struct x86_emulate_ops {
15973
 
 
15974
 
 };
15975
 
 
15976
 
+/* Type, address-of, and value of an instruction's operand. */
15977
 
+struct operand {
15978
 
+       enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type;
15979
 
+       unsigned int bytes;
15980
 
+       unsigned long val, orig_val, *ptr;
15981
 
+};
15982
 
+
15983
 
+struct fetch_cache {
15984
 
+       u8 data[15];
15985
 
+       unsigned long start;
15986
 
+       unsigned long end;
15987
 
+};
15988
 
+
15989
 
+struct decode_cache {
15990
 
+       u8 twobyte;
15991
 
+       u8 b;
15992
 
+       u8 lock_prefix;
15993
 
+       u8 rep_prefix;
15994
 
+       u8 op_bytes;
15995
 
+       u8 ad_bytes;
15996
 
+       u8 rex_prefix;
15997
 
+       struct operand src;
15998
 
+       struct operand dst;
15999
 
+       unsigned long *override_base;
16000
 
+       unsigned int d;
16001
 
+       unsigned long regs[NR_VCPU_REGS];
16002
 
+       unsigned long eip;
16003
 
+       /* modrm */
16004
 
+       u8 modrm;
16005
 
+       u8 modrm_mod;
16006
 
+       u8 modrm_reg;
16007
 
+       u8 modrm_rm;
16008
 
+       u8 use_modrm_ea;
16009
 
+       unsigned long modrm_ea;
16010
 
+       unsigned long modrm_val;
16011
 
+       struct fetch_cache fetch;
16012
 
+};
16013
 
+
16014
 
 struct x86_emulate_ctxt {
16015
 
        /* Register state before/after emulation. */
16016
 
        struct kvm_vcpu *vcpu;
16017
 
 
16018
 
        /* Linear faulting address (if emulating a page-faulting instruction). */
16019
 
        unsigned long eflags;
16020
 
-       unsigned long cr2;
16021
 
 
16022
 
        /* Emulated execution mode, represented by an X86EMUL_MODE value. */
16023
 
        int mode;
16024
 
@@ -129,8 +155,16 @@ struct x86_emulate_ctxt {
16025
 
        unsigned long ss_base;
16026
 
        unsigned long gs_base;
16027
 
        unsigned long fs_base;
16028
 
+
16029
 
+       /* decode cache */
16030
 
+
16031
 
+       struct decode_cache decode;
16032
 
 };
16033
 
 
16034
 
+/* Repeat String Operation Prefix */
16035
 
+#define REPE_PREFIX  1
16036
 
+#define REPNE_PREFIX    2
16037
 
+
16038
 
 /* Execution mode, passed to the emulator. */
16039
 
 #define X86EMUL_MODE_REAL     0        /* Real mode.             */
16040
 
 #define X86EMUL_MODE_PROT16   2        /* 16-bit protected mode. */
16041
 
@@ -144,12 +178,9 @@ struct x86_emulate_ctxt {
16042
 
 #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64
16043
 
 #endif
16044
 
 
16045
 
-/*
16046
 
- * x86_emulate_memop: Emulate an instruction that faulted attempting to
16047
 
- *                    read/write a 'special' memory area.
16048
 
- * Returns -1 on failure, 0 on success.
16049
 
- */
16050
 
-int x86_emulate_memop(struct x86_emulate_ctxt *ctxt,
16051
 
-                     struct x86_emulate_ops *ops);
16052
 
+int x86_decode_insn(struct x86_emulate_ctxt *ctxt,
16053
 
+                   struct x86_emulate_ops *ops);
16054
 
+int x86_emulate_insn(struct x86_emulate_ctxt *ctxt,
16055
 
+                    struct x86_emulate_ops *ops);
16056
 
 
16057
 
 #endif                         /* __X86_EMULATE_H__ */
16058
 
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
16059
 
index 37bfa19..397197f 100644
16060
 
--- a/include/linux/Kbuild
16061
 
+++ b/include/linux/Kbuild
16062
 
@@ -98,7 +98,6 @@ header-y += iso_fs.h
16063
 
 header-y += ixjuser.h
16064
 
 header-y += jffs2.h
16065
 
 header-y += keyctl.h
16066
 
-header-y += kvm.h
16067
 
 header-y += limits.h
16068
 
 header-y += lock_dlm_plock.h
16069
 
 header-y += magic.h
16070
 
@@ -255,6 +254,7 @@ unifdef-y += kd.h
16071
 
 unifdef-y += kernelcapi.h
16072
 
 unifdef-y += kernel.h
16073
 
 unifdef-y += keyboard.h
16074
 
+unifdef-$(CONFIG_ARCH_SUPPORTS_KVM) += kvm.h
16075
 
 unifdef-y += llc.h
16076
 
 unifdef-y += loop.h
16077
 
 unifdef-y += lp.h
16078
 
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
16079
 
index 057a7f3..de9f28d 100644
16080
 
--- a/include/linux/kvm.h
16081
 
+++ b/include/linux/kvm.h
16082
 
@@ -9,12 +9,10 @@
16083
 
 
16084
 
 #include <asm/types.h>
16085
 
 #include <linux/ioctl.h>
16086
 
+#include <asm/kvm.h>
16087
 
 
16088
 
 #define KVM_API_VERSION 12
16089
 
 
16090
 
-/* Architectural interrupt line count. */
16091
 
-#define KVM_NR_INTERRUPTS 256
16092
 
-
16093
 
 /* for KVM_CREATE_MEMORY_REGION */
16094
 
 struct kvm_memory_region {
16095
 
        __u32 slot;
16096
 
@@ -23,17 +21,19 @@ struct kvm_memory_region {
16097
 
        __u64 memory_size; /* bytes */
16098
 
 };
16099
 
 
16100
 
-/* for kvm_memory_region::flags */
16101
 
-#define KVM_MEM_LOG_DIRTY_PAGES  1UL
16102
 
-
16103
 
-struct kvm_memory_alias {
16104
 
-       __u32 slot;  /* this has a different namespace than memory slots */
16105
 
+/* for KVM_SET_USER_MEMORY_REGION */
16106
 
+struct kvm_userspace_memory_region {
16107
 
+       __u32 slot;
16108
 
        __u32 flags;
16109
 
        __u64 guest_phys_addr;
16110
 
-       __u64 memory_size;
16111
 
-       __u64 target_phys_addr;
16112
 
+       __u64 memory_size; /* bytes */
16113
 
+       __u64 userspace_addr; /* start of the userspace allocated memory */
16114
 
 };
16115
 
 
16116
 
+/* for kvm_memory_region::flags */
16117
 
+#define KVM_MEM_LOG_DIRTY_PAGES  1UL
16118
 
+
16119
 
+
16120
 
 /* for KVM_IRQ_LINE */
16121
 
 struct kvm_irq_level {
16122
 
        /*
16123
 
@@ -45,62 +45,18 @@ struct kvm_irq_level {
16124
 
        __u32 level;
16125
 
 };
16126
 
 
16127
 
-/* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */
16128
 
-struct kvm_pic_state {
16129
 
-       __u8 last_irr;  /* edge detection */
16130
 
-       __u8 irr;               /* interrupt request register */
16131
 
-       __u8 imr;               /* interrupt mask register */
16132
 
-       __u8 isr;               /* interrupt service register */
16133
 
-       __u8 priority_add;      /* highest irq priority */
16134
 
-       __u8 irq_base;
16135
 
-       __u8 read_reg_select;
16136
 
-       __u8 poll;
16137
 
-       __u8 special_mask;
16138
 
-       __u8 init_state;
16139
 
-       __u8 auto_eoi;
16140
 
-       __u8 rotate_on_auto_eoi;
16141
 
-       __u8 special_fully_nested_mode;
16142
 
-       __u8 init4;             /* true if 4 byte init */
16143
 
-       __u8 elcr;              /* PIIX edge/trigger selection */
16144
 
-       __u8 elcr_mask;
16145
 
-};
16146
 
-
16147
 
-#define KVM_IOAPIC_NUM_PINS  24
16148
 
-struct kvm_ioapic_state {
16149
 
-       __u64 base_address;
16150
 
-       __u32 ioregsel;
16151
 
-       __u32 id;
16152
 
-       __u32 irr;
16153
 
-       __u32 pad;
16154
 
-       union {
16155
 
-               __u64 bits;
16156
 
-               struct {
16157
 
-                       __u8 vector;
16158
 
-                       __u8 delivery_mode:3;
16159
 
-                       __u8 dest_mode:1;
16160
 
-                       __u8 delivery_status:1;
16161
 
-                       __u8 polarity:1;
16162
 
-                       __u8 remote_irr:1;
16163
 
-                       __u8 trig_mode:1;
16164
 
-                       __u8 mask:1;
16165
 
-                       __u8 reserve:7;
16166
 
-                       __u8 reserved[4];
16167
 
-                       __u8 dest_id;
16168
 
-               } fields;
16169
 
-       } redirtbl[KVM_IOAPIC_NUM_PINS];
16170
 
-};
16171
 
-
16172
 
-#define KVM_IRQCHIP_PIC_MASTER   0
16173
 
-#define KVM_IRQCHIP_PIC_SLAVE    1
16174
 
-#define KVM_IRQCHIP_IOAPIC       2
16175
 
 
16176
 
 struct kvm_irqchip {
16177
 
        __u32 chip_id;
16178
 
        __u32 pad;
16179
 
         union {
16180
 
                char dummy[512];  /* reserving space */
16181
 
+#ifdef CONFIG_X86
16182
 
                struct kvm_pic_state pic;
16183
 
+#endif
16184
 
+#if defined(CONFIG_X86) || defined(CONFIG_IA64)
16185
 
                struct kvm_ioapic_state ioapic;
16186
 
+#endif
16187
 
        } chip;
16188
 
 };
16189
 
 
16190
 
@@ -179,15 +135,6 @@ struct kvm_run {
16191
 
        };
16192
 
 };
16193
 
 
16194
 
-/* for KVM_GET_REGS and KVM_SET_REGS */
16195
 
-struct kvm_regs {
16196
 
-       /* out (KVM_GET_REGS) / in (KVM_SET_REGS) */
16197
 
-       __u64 rax, rbx, rcx, rdx;
16198
 
-       __u64 rsi, rdi, rsp, rbp;
16199
 
-       __u64 r8,  r9,  r10, r11;
16200
 
-       __u64 r12, r13, r14, r15;
16201
 
-       __u64 rip, rflags;
16202
 
-};
16203
 
 
16204
 
 /* for KVM_GET_FPU and KVM_SET_FPU */
16205
 
 struct kvm_fpu {
16206
 
@@ -204,59 +151,6 @@ struct kvm_fpu {
16207
 
        __u32 pad2;
16208
 
 };
16209
 
 
16210
 
-/* for KVM_GET_LAPIC and KVM_SET_LAPIC */
16211
 
-#define KVM_APIC_REG_SIZE 0x400
16212
 
-struct kvm_lapic_state {
16213
 
-       char regs[KVM_APIC_REG_SIZE];
16214
 
-};
16215
 
-
16216
 
-struct kvm_segment {
16217
 
-       __u64 base;
16218
 
-       __u32 limit;
16219
 
-       __u16 selector;
16220
 
-       __u8  type;
16221
 
-       __u8  present, dpl, db, s, l, g, avl;
16222
 
-       __u8  unusable;
16223
 
-       __u8  padding;
16224
 
-};
16225
 
-
16226
 
-struct kvm_dtable {
16227
 
-       __u64 base;
16228
 
-       __u16 limit;
16229
 
-       __u16 padding[3];
16230
 
-};
16231
 
-
16232
 
-/* for KVM_GET_SREGS and KVM_SET_SREGS */
16233
 
-struct kvm_sregs {
16234
 
-       /* out (KVM_GET_SREGS) / in (KVM_SET_SREGS) */
16235
 
-       struct kvm_segment cs, ds, es, fs, gs, ss;
16236
 
-       struct kvm_segment tr, ldt;
16237
 
-       struct kvm_dtable gdt, idt;
16238
 
-       __u64 cr0, cr2, cr3, cr4, cr8;
16239
 
-       __u64 efer;
16240
 
-       __u64 apic_base;
16241
 
-       __u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64];
16242
 
-};
16243
 
-
16244
 
-struct kvm_msr_entry {
16245
 
-       __u32 index;
16246
 
-       __u32 reserved;
16247
 
-       __u64 data;
16248
 
-};
16249
 
-
16250
 
-/* for KVM_GET_MSRS and KVM_SET_MSRS */
16251
 
-struct kvm_msrs {
16252
 
-       __u32 nmsrs; /* number of msrs in entries */
16253
 
-       __u32 pad;
16254
 
-
16255
 
-       struct kvm_msr_entry entries[0];
16256
 
-};
16257
 
-
16258
 
-/* for KVM_GET_MSR_INDEX_LIST */
16259
 
-struct kvm_msr_list {
16260
 
-       __u32 nmsrs; /* number of msrs in entries */
16261
 
-       __u32 indices[0];
16262
 
-};
16263
 
 
16264
 
 /* for KVM_TRANSLATE */
16265
 
 struct kvm_translation {
16266
 
@@ -302,22 +196,6 @@ struct kvm_dirty_log {
16267
 
        };
16268
 
 };
16269
 
 
16270
 
-struct kvm_cpuid_entry {
16271
 
-       __u32 function;
16272
 
-       __u32 eax;
16273
 
-       __u32 ebx;
16274
 
-       __u32 ecx;
16275
 
-       __u32 edx;
16276
 
-       __u32 padding;
16277
 
-};
16278
 
-
16279
 
-/* for KVM_SET_CPUID */
16280
 
-struct kvm_cpuid {
16281
 
-       __u32 nent;
16282
 
-       __u32 padding;
16283
 
-       struct kvm_cpuid_entry entries[0];
16284
 
-};
16285
 
-
16286
 
 /* for KVM_SET_SIGNAL_MASK */
16287
 
 struct kvm_signal_mask {
16288
 
        __u32 len;
16289
 
@@ -347,11 +225,20 @@ struct kvm_signal_mask {
16290
 
  */
16291
 
 #define KVM_CAP_IRQCHIP          0
16292
 
 #define KVM_CAP_HLT      1
16293
 
+#define KVM_CAP_MMU_SHADOW_CACHE_CONTROL 2
16294
 
+#define KVM_CAP_USER_MEMORY 3
16295
 
+#define KVM_CAP_SET_TSS_ADDR 4
16296
 
+#define KVM_CAP_EXT_CPUID 5
16297
 
 
16298
 
 /*
16299
 
  * ioctls for VM fds
16300
 
  */
16301
 
 #define KVM_SET_MEMORY_REGION     _IOW(KVMIO, 0x40, struct kvm_memory_region)
16302
 
+#define KVM_SET_NR_MMU_PAGES      _IO(KVMIO, 0x44)
16303
 
+#define KVM_GET_NR_MMU_PAGES      _IO(KVMIO, 0x45)
16304
 
+#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46,\
16305
 
+                                       struct kvm_userspace_memory_region)
16306
 
+#define KVM_SET_TSS_ADDR          _IO(KVMIO, 0x47)
16307
 
 /*
16308
 
  * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns
16309
 
  * a vcpu fd.
16310
 
@@ -359,6 +246,7 @@ struct kvm_signal_mask {
16311
 
 #define KVM_CREATE_VCPU           _IO(KVMIO,  0x41)
16312
 
 #define KVM_GET_DIRTY_LOG         _IOW(KVMIO, 0x42, struct kvm_dirty_log)
16313
 
 #define KVM_SET_MEMORY_ALIAS      _IOW(KVMIO, 0x43, struct kvm_memory_alias)
16314
 
+#define KVM_GET_SUPPORTED_CPUID   _IOWR(KVMIO, 0x48, struct kvm_cpuid2)
16315
 
 /* Device model IOC */
16316
 
 #define KVM_CREATE_IRQCHIP       _IO(KVMIO,  0x60)
16317
 
 #define KVM_IRQ_LINE             _IOW(KVMIO, 0x61, struct kvm_irq_level)
16318
 
@@ -384,5 +272,7 @@ struct kvm_signal_mask {
16319
 
 #define KVM_SET_FPU               _IOW(KVMIO,  0x8d, struct kvm_fpu)
16320
 
 #define KVM_GET_LAPIC             _IOR(KVMIO,  0x8e, struct kvm_lapic_state)
16321
 
 #define KVM_SET_LAPIC             _IOW(KVMIO,  0x8f, struct kvm_lapic_state)
16322
 
+#define KVM_SET_CPUID2            _IOW(KVMIO,  0x90, struct kvm_cpuid2)
16323
 
+#define KVM_GET_CPUID2            _IOWR(KVMIO, 0x91, struct kvm_cpuid2)
16324
 
 
16325
 
 #endif
16326
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
16327
 
new file mode 100644
16328
 
index 0000000..953b50a
16329
 
--- /dev/null
16330
 
+++ b/include/linux/kvm_host.h
16331
 
@@ -0,0 +1,290 @@
16332
 
+#ifndef __KVM_HOST_H
16333
 
+#define __KVM_HOST_H
16334
 
+
16335
 
+/*
16336
 
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
16337
 
+ * the COPYING file in the top-level directory.
16338
 
+ */
16339
 
+
16340
 
+#include <linux/types.h>
16341
 
+#include <linux/hardirq.h>
16342
 
+#include <linux/list.h>
16343
 
+#include <linux/mutex.h>
16344
 
+#include <linux/spinlock.h>
16345
 
+#include <linux/signal.h>
16346
 
+#include <linux/sched.h>
16347
 
+#include <linux/mm.h>
16348
 
+#include <linux/preempt.h>
16349
 
+#include <asm/signal.h>
16350
 
+
16351
 
+#include <linux/kvm.h>
16352
 
+#include <linux/kvm_para.h>
16353
 
+
16354
 
+#include <linux/kvm_types.h>
16355
 
+
16356
 
+#include <asm/kvm_host.h>
16357
 
+
16358
 
+#define KVM_MAX_VCPUS 4
16359
 
+#define KVM_MEMORY_SLOTS 8
16360
 
+/* memory slots that does not exposed to userspace */
16361
 
+#define KVM_PRIVATE_MEM_SLOTS 4
16362
 
+
16363
 
+#define KVM_PIO_PAGE_OFFSET 1
16364
 
+
16365
 
+/*
16366
 
+ * vcpu->requests bit members
16367
 
+ */
16368
 
+#define KVM_REQ_TLB_FLUSH          0
16369
 
+
16370
 
+
16371
 
+struct kvm_vcpu;
16372
 
+extern struct kmem_cache *kvm_vcpu_cache;
16373
 
+
16374
 
+struct kvm_guest_debug {
16375
 
+       int enabled;
16376
 
+       unsigned long bp[4];
16377
 
+       int singlestep;
16378
 
+};
16379
 
+
16380
 
+/*
16381
 
+ * It would be nice to use something smarter than a linear search, TBD...
16382
 
+ * Thankfully we dont expect many devices to register (famous last words :),
16383
 
+ * so until then it will suffice.  At least its abstracted so we can change
16384
 
+ * in one place.
16385
 
+ */
16386
 
+struct kvm_io_bus {
16387
 
+       int                   dev_count;
16388
 
+#define NR_IOBUS_DEVS 6
16389
 
+       struct kvm_io_device *devs[NR_IOBUS_DEVS];
16390
 
+};
16391
 
+
16392
 
+void kvm_io_bus_init(struct kvm_io_bus *bus);
16393
 
+void kvm_io_bus_destroy(struct kvm_io_bus *bus);
16394
 
+struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr);
16395
 
+void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
16396
 
+                            struct kvm_io_device *dev);
16397
 
+
16398
 
+struct kvm_vcpu {
16399
 
+       struct kvm *kvm;
16400
 
+       struct preempt_notifier preempt_notifier;
16401
 
+       int vcpu_id;
16402
 
+       struct mutex mutex;
16403
 
+       int   cpu;
16404
 
+       struct kvm_run *run;
16405
 
+       int guest_mode;
16406
 
+       unsigned long requests;
16407
 
+       struct kvm_guest_debug guest_debug;
16408
 
+       int fpu_active;
16409
 
+       int guest_fpu_loaded;
16410
 
+       wait_queue_head_t wq;
16411
 
+       int sigset_active;
16412
 
+       sigset_t sigset;
16413
 
+       struct kvm_vcpu_stat stat;
16414
 
+
16415
 
+#ifdef CONFIG_HAS_IOMEM
16416
 
+       int mmio_needed;
16417
 
+       int mmio_read_completed;
16418
 
+       int mmio_is_write;
16419
 
+       int mmio_size;
16420
 
+       unsigned char mmio_data[8];
16421
 
+       gpa_t mmio_phys_addr;
16422
 
+#endif
16423
 
+
16424
 
+       struct kvm_vcpu_arch arch;
16425
 
+};
16426
 
+
16427
 
+struct kvm_memory_slot {
16428
 
+       gfn_t base_gfn;
16429
 
+       unsigned long npages;
16430
 
+       unsigned long flags;
16431
 
+       unsigned long *rmap;
16432
 
+       unsigned long *dirty_bitmap;
16433
 
+       unsigned long userspace_addr;
16434
 
+       int user_alloc;
16435
 
+};
16436
 
+
16437
 
+struct kvm {
16438
 
+       struct mutex lock; /* protects everything except vcpus */
16439
 
+       struct mm_struct *mm; /* userspace tied to this vm */
16440
 
+       int nmemslots;
16441
 
+       struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS +
16442
 
+                                       KVM_PRIVATE_MEM_SLOTS];
16443
 
+       struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
16444
 
+       struct list_head vm_list;
16445
 
+       struct file *filp;
16446
 
+       struct kvm_io_bus mmio_bus;
16447
 
+       struct kvm_io_bus pio_bus;
16448
 
+       struct kvm_vm_stat stat;
16449
 
+       struct kvm_arch arch;
16450
 
+};
16451
 
+
16452
 
+/* The guest did something we don't support. */
16453
 
+#define pr_unimpl(vcpu, fmt, ...)                                      \
16454
 
+ do {                                                                  \
16455
 
+       if (printk_ratelimit())                                         \
16456
 
+               printk(KERN_ERR "kvm: %i: cpu%i " fmt,                  \
16457
 
+                      current->tgid, (vcpu)->vcpu_id , ## __VA_ARGS__); \
16458
 
+ } while (0)
16459
 
+
16460
 
+#define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt)
16461
 
+#define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt)
16462
 
+
16463
 
+int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
16464
 
+void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
16465
 
+
16466
 
+void vcpu_load(struct kvm_vcpu *vcpu);
16467
 
+void vcpu_put(struct kvm_vcpu *vcpu);
16468
 
+
16469
 
+void decache_vcpus_on_cpu(int cpu);
16470
 
+
16471
 
+
16472
 
+int kvm_init(void *opaque, unsigned int vcpu_size,
16473
 
+                 struct module *module);
16474
 
+void kvm_exit(void);
16475
 
+
16476
 
+#define HPA_MSB ((sizeof(hpa_t) * 8) - 1)
16477
 
+#define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB)
16478
 
+static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; }
16479
 
+struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva);
16480
 
+
16481
 
+extern struct page *bad_page;
16482
 
+
16483
 
+int is_error_page(struct page *page);
16484
 
+int kvm_is_error_hva(unsigned long addr);
16485
 
+int kvm_set_memory_region(struct kvm *kvm,
16486
 
+                         struct kvm_userspace_memory_region *mem,
16487
 
+                         int user_alloc);
16488
 
+int __kvm_set_memory_region(struct kvm *kvm,
16489
 
+                           struct kvm_userspace_memory_region *mem,
16490
 
+                           int user_alloc);
16491
 
+int kvm_arch_set_memory_region(struct kvm *kvm,
16492
 
+                               struct kvm_userspace_memory_region *mem,
16493
 
+                               struct kvm_memory_slot old,
16494
 
+                               int user_alloc);
16495
 
+gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn);
16496
 
+struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
16497
 
+void kvm_release_page_clean(struct page *page);
16498
 
+void kvm_release_page_dirty(struct page *page);
16499
 
+int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
16500
 
+                       int len);
16501
 
+int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len);
16502
 
+int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data,
16503
 
+                        int offset, int len);
16504
 
+int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
16505
 
+                   unsigned long len);
16506
 
+int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len);
16507
 
+int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len);
16508
 
+struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
16509
 
+int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
16510
 
+void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
16511
 
+
16512
 
+void kvm_vcpu_block(struct kvm_vcpu *vcpu);
16513
 
+void kvm_resched(struct kvm_vcpu *vcpu);
16514
 
+void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
16515
 
+void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
16516
 
+void kvm_flush_remote_tlbs(struct kvm *kvm);
16517
 
+
16518
 
+long kvm_arch_dev_ioctl(struct file *filp,
16519
 
+                       unsigned int ioctl, unsigned long arg);
16520
 
+long kvm_arch_vcpu_ioctl(struct file *filp,
16521
 
+                        unsigned int ioctl, unsigned long arg);
16522
 
+void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
16523
 
+void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
16524
 
+
16525
 
+int kvm_dev_ioctl_check_extension(long ext);
16526
 
+
16527
 
+int kvm_get_dirty_log(struct kvm *kvm,
16528
 
+                       struct kvm_dirty_log *log, int *is_dirty);
16529
 
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
16530
 
+                               struct kvm_dirty_log *log);
16531
 
+
16532
 
+int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
16533
 
+                                  struct
16534
 
+                                  kvm_userspace_memory_region *mem,
16535
 
+                                  int user_alloc);
16536
 
+long kvm_arch_vm_ioctl(struct file *filp,
16537
 
+                      unsigned int ioctl, unsigned long arg);
16538
 
+void kvm_arch_destroy_vm(struct kvm *kvm);
16539
 
+
16540
 
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu);
16541
 
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu);
16542
 
+
16543
 
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
16544
 
+                                   struct kvm_translation *tr);
16545
 
+
16546
 
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs);
16547
 
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs);
16548
 
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
16549
 
+                                 struct kvm_sregs *sregs);
16550
 
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
16551
 
+                                 struct kvm_sregs *sregs);
16552
 
+int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
16553
 
+                                   struct kvm_debug_guest *dbg);
16554
 
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
16555
 
+
16556
 
+int kvm_arch_init(void *opaque);
16557
 
+void kvm_arch_exit(void);
16558
 
+
16559
 
+int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu);
16560
 
+void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu);
16561
 
+
16562
 
+void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu);
16563
 
+void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
16564
 
+void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
16565
 
+struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id);
16566
 
+int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu);
16567
 
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
16568
 
+
16569
 
+int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu);
16570
 
+void kvm_arch_hardware_enable(void *garbage);
16571
 
+void kvm_arch_hardware_disable(void *garbage);
16572
 
+int kvm_arch_hardware_setup(void);
16573
 
+void kvm_arch_hardware_unsetup(void);
16574
 
+void kvm_arch_check_processor_compat(void *rtn);
16575
 
+int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
16576
 
+
16577
 
+void kvm_free_physmem(struct kvm *kvm);
16578
 
+
16579
 
+struct  kvm *kvm_arch_create_vm(void);
16580
 
+void kvm_arch_destroy_vm(struct kvm *kvm);
16581
 
+
16582
 
+int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
16583
 
+int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
16584
 
+void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
16585
 
+
16586
 
+static inline void kvm_guest_enter(void)
16587
 
+{
16588
 
+       account_system_vtime(current);
16589
 
+       current->flags |= PF_VCPU;
16590
 
+}
16591
 
+
16592
 
+static inline void kvm_guest_exit(void)
16593
 
+{
16594
 
+       account_system_vtime(current);
16595
 
+       current->flags &= ~PF_VCPU;
16596
 
+}
16597
 
+
16598
 
+static inline int memslot_id(struct kvm *kvm, struct kvm_memory_slot *slot)
16599
 
+{
16600
 
+       return slot - kvm->memslots;
16601
 
+}
16602
 
+
16603
 
+static inline gpa_t gfn_to_gpa(gfn_t gfn)
16604
 
+{
16605
 
+       return (gpa_t)gfn << PAGE_SHIFT;
16606
 
+}
16607
 
+
16608
 
+enum kvm_stat_kind {
16609
 
+       KVM_STAT_VM,
16610
 
+       KVM_STAT_VCPU,
16611
 
+};
16612
 
+
16613
 
+struct kvm_stats_debugfs_item {
16614
 
+       const char *name;
16615
 
+       int offset;
16616
 
+       enum kvm_stat_kind kind;
16617
 
+       struct dentry *dentry;
16618
 
+};
16619
 
+extern struct kvm_stats_debugfs_item debugfs_entries[];
16620
 
+
16621
 
+#endif
16622
 
diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h
16623
 
index 3b29256..e4db25f 100644
16624
 
--- a/include/linux/kvm_para.h
16625
 
+++ b/include/linux/kvm_para.h
16626
 
@@ -2,72 +2,28 @@
16627
 
 #define __LINUX_KVM_PARA_H
16628
 
 
16629
 
 /*
16630
 
- * Guest OS interface for KVM paravirtualization
16631
 
- *
16632
 
- * Note: this interface is totally experimental, and is certain to change
16633
 
- *       as we make progress.
16634
 
+ * This header file provides a method for making a hypercall to the host
16635
 
+ * Architectures should define:
16636
 
+ * - kvm_hypercall0, kvm_hypercall1...
16637
 
+ * - kvm_arch_para_features
16638
 
+ * - kvm_para_available
16639
 
  */
16640
 
 
16641
 
-/*
16642
 
- * Per-VCPU descriptor area shared between guest and host. Writable to
16643
 
- * both guest and host. Registered with the host by the guest when
16644
 
- * a guest acknowledges paravirtual mode.
16645
 
- *
16646
 
- * NOTE: all addresses are guest-physical addresses (gpa), to make it
16647
 
- * easier for the hypervisor to map between the various addresses.
16648
 
- */
16649
 
-struct kvm_vcpu_para_state {
16650
 
-       /*
16651
 
-        * API version information for compatibility. If there's any support
16652
 
-        * mismatch (too old host trying to execute too new guest) then
16653
 
-        * the host will deny entry into paravirtual mode. Any other
16654
 
-        * combination (new host + old guest and new host + new guest)
16655
 
-        * is supposed to work - new host versions will support all old
16656
 
-        * guest API versions.
16657
 
-        */
16658
 
-       u32 guest_version;
16659
 
-       u32 host_version;
16660
 
-       u32 size;
16661
 
-       u32 ret;
16662
 
-
16663
 
-       /*
16664
 
-        * The address of the vm exit instruction (VMCALL or VMMCALL),
16665
 
-        * which the host will patch according to the CPU model the
16666
 
-        * VM runs on:
16667
 
-        */
16668
 
-       u64 hypercall_gpa;
16669
 
-
16670
 
-} __attribute__ ((aligned(PAGE_SIZE)));
16671
 
-
16672
 
-#define KVM_PARA_API_VERSION 1
16673
 
+/* Return values for hypercalls */
16674
 
+#define KVM_ENOSYS             1000
16675
 
 
16676
 
+#ifdef __KERNEL__
16677
 
 /*
16678
 
- * This is used for an RDMSR's ECX parameter to probe for a KVM host.
16679
 
- * Hopefully no CPU vendor will use up this number. This is placed well
16680
 
- * out of way of the typical space occupied by CPU vendors' MSR indices,
16681
 
- * and we think (or at least hope) it wont be occupied in the future
16682
 
- * either.
16683
 
+ * hypercalls use architecture specific
16684
 
  */
16685
 
-#define MSR_KVM_API_MAGIC 0x87655678
16686
 
-
16687
 
-#define KVM_EINVAL 1
16688
 
+#include <asm/kvm_para.h>
16689
 
 
16690
 
-/*
16691
 
- * Hypercall calling convention:
16692
 
- *
16693
 
- * Each hypercall may have 0-6 parameters.
16694
 
- *
16695
 
- * 64-bit hypercall index is in RAX, goes from 0 to __NR_hypercalls-1
16696
 
- *
16697
 
- * 64-bit parameters 1-6 are in the standard gcc x86_64 calling convention
16698
 
- * order: RDI, RSI, RDX, RCX, R8, R9.
16699
 
- *
16700
 
- * 32-bit index is EBX, parameters are: EAX, ECX, EDX, ESI, EDI, EBP.
16701
 
- * (the first 3 are according to the gcc regparm calling convention)
16702
 
- *
16703
 
- * No registers are clobbered by the hypercall, except that the
16704
 
- * return value is in RAX.
16705
 
- */
16706
 
-#define __NR_hypercalls                        0
16707
 
+static inline int kvm_para_has_feature(unsigned int feature)
16708
 
+{
16709
 
+       if (kvm_arch_para_features() & (1UL << feature))
16710
 
+               return 1;
16711
 
+       return 0;
16712
 
+}
16713
 
+#endif /* __KERNEL__ */
16714
 
+#endif /* __LINUX_KVM_PARA_H */
16715
 
 
16716
 
-#endif
16717
 
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
16718
 
new file mode 100644
16719
 
index 0000000..1c4e46d
16720
 
--- /dev/null
16721
 
+++ b/include/linux/kvm_types.h
16722
 
@@ -0,0 +1,54 @@
16723
 
+/*
16724
 
+ * This program is free software; you can redistribute it and/or modify
16725
 
+ * it under the terms of the GNU General Public License as published by
16726
 
+ * the Free Software Foundation; either version 2 of the License.
16727
 
+ *
16728
 
+ * This program is distributed in the hope that it will be useful,
16729
 
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
16730
 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16731
 
+ * GNU General Public License for more details.
16732
 
+ *
16733
 
+ * You should have received a copy of the GNU General Public License
16734
 
+ * along with this program; if not, write to the Free Software
16735
 
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
16736
 
+ *
16737
 
+ */
16738
 
+
16739
 
+#ifndef __KVM_TYPES_H__
16740
 
+#define __KVM_TYPES_H__
16741
 
+
16742
 
+#include <asm/types.h>
16743
 
+
16744
 
+/*
16745
 
+ * Address types:
16746
 
+ *
16747
 
+ *  gva - guest virtual address
16748
 
+ *  gpa - guest physical address
16749
 
+ *  gfn - guest frame number
16750
 
+ *  hva - host virtual address
16751
 
+ *  hpa - host physical address
16752
 
+ *  hfn - host frame number
16753
 
+ */
16754
 
+
16755
 
+typedef unsigned long  gva_t;
16756
 
+typedef u64            gpa_t;
16757
 
+typedef unsigned long  gfn_t;
16758
 
+
16759
 
+typedef unsigned long  hva_t;
16760
 
+typedef u64            hpa_t;
16761
 
+typedef unsigned long  hfn_t;
16762
 
+
16763
 
+struct kvm_pio_request {
16764
 
+       unsigned long count;
16765
 
+       int cur_count;
16766
 
+       struct page *guest_pages[2];
16767
 
+       unsigned guest_page_offset;
16768
 
+       int in;
16769
 
+       int port;
16770
 
+       int size;
16771
 
+       int string;
16772
 
+       int down;
16773
 
+       int rep;
16774
 
+};
16775
 
+
16776
 
+#endif /* __KVM_TYPES_H__ */
16777
 
diff --git a/kernel/fork.c b/kernel/fork.c
16778
 
index 8dd8ff2..591c8df 100644
16779
 
--- a/kernel/fork.c
16780
 
+++ b/kernel/fork.c
16781
 
@@ -392,6 +392,7 @@ void fastcall __mmdrop(struct mm_struct *mm)
16782
 
        destroy_context(mm);
16783
 
        free_mm(mm);
16784
 
 }
16785
 
+EXPORT_SYMBOL_GPL(__mmdrop);
16786
 
 
16787
 
 /*
16788
 
  * Decrement the use count and release all resources for an mm.
16789
 
diff --git a/drivers/kvm/ioapic.c b/virt/kvm/ioapic.c
16790
 
similarity index 83%
16791
 
rename from drivers/kvm/ioapic.c
16792
 
rename to virt/kvm/ioapic.c
16793
 
index c7992e6..317f8e2 100644
16794
 
--- a/drivers/kvm/ioapic.c
16795
 
+++ b/virt/kvm/ioapic.c
16796
 
@@ -26,7 +26,7 @@
16797
 
  *  Based on Xen 3.1 code.
16798
 
  */
16799
 
 
16800
 
-#include "kvm.h"
16801
 
+#include <linux/kvm_host.h>
16802
 
 #include <linux/kvm.h>
16803
 
 #include <linux/mm.h>
16804
 
 #include <linux/highmem.h>
16805
 
@@ -34,14 +34,17 @@
16806
 
 #include <linux/hrtimer.h>
16807
 
 #include <linux/io.h>
16808
 
 #include <asm/processor.h>
16809
 
-#include <asm/msr.h>
16810
 
 #include <asm/page.h>
16811
 
 #include <asm/current.h>
16812
 
-#include <asm/apicdef.h>
16813
 
-#include <asm/io_apic.h>
16814
 
-#include "irq.h"
16815
 
-/* #define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */
16816
 
+
16817
 
+#include "ioapic.h"
16818
 
+#include "lapic.h"
16819
 
+
16820
 
+#if 0
16821
 
+#define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg)
16822
 
+#else
16823
 
 #define ioapic_debug(fmt, arg...)
16824
 
+#endif
16825
 
 static void ioapic_deliver(struct kvm_ioapic *vioapic, int irq);
16826
 
 
16827
 
 static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
16828
 
@@ -113,7 +116,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
16829
 
        default:
16830
 
                index = (ioapic->ioregsel - 0x10) >> 1;
16831
 
 
16832
 
-               ioapic_debug("change redir index %x val %x", index, val);
16833
 
+               ioapic_debug("change redir index %x val %x\n", index, val);
16834
 
                if (index >= IOAPIC_NUM_PINS)
16835
 
                        return;
16836
 
                if (ioapic->ioregsel & 1) {
16837
 
@@ -131,16 +134,16 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
16838
 
 }
16839
 
 
16840
 
 static void ioapic_inj_irq(struct kvm_ioapic *ioapic,
16841
 
-                          struct kvm_lapic *target,
16842
 
+                          struct kvm_vcpu *vcpu,
16843
 
                           u8 vector, u8 trig_mode, u8 delivery_mode)
16844
 
 {
16845
 
-       ioapic_debug("irq %d trig %d deliv %d", vector, trig_mode,
16846
 
+       ioapic_debug("irq %d trig %d deliv %d\n", vector, trig_mode,
16847
 
                     delivery_mode);
16848
 
 
16849
 
-       ASSERT((delivery_mode == dest_Fixed) ||
16850
 
-              (delivery_mode == dest_LowestPrio));
16851
 
+       ASSERT((delivery_mode == IOAPIC_FIXED) ||
16852
 
+              (delivery_mode == IOAPIC_LOWEST_PRIORITY));
16853
 
 
16854
 
-       kvm_apic_set_irq(target, vector, trig_mode);
16855
 
+       kvm_apic_set_irq(vcpu, vector, trig_mode);
16856
 
 }
16857
 
 
16858
 
 static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
16859
 
@@ -151,12 +154,12 @@ static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
16860
 
        struct kvm *kvm = ioapic->kvm;
16861
 
        struct kvm_vcpu *vcpu;
16862
 
 
16863
 
-       ioapic_debug("dest %d dest_mode %d", dest, dest_mode);
16864
 
+       ioapic_debug("dest %d dest_mode %d\n", dest, dest_mode);
16865
 
 
16866
 
        if (dest_mode == 0) {   /* Physical mode. */
16867
 
                if (dest == 0xFF) {     /* Broadcast. */
16868
 
                        for (i = 0; i < KVM_MAX_VCPUS; ++i)
16869
 
-                               if (kvm->vcpus[i] && kvm->vcpus[i]->apic)
16870
 
+                               if (kvm->vcpus[i] && kvm->vcpus[i]->arch.apic)
16871
 
                                        mask |= 1 << i;
16872
 
                        return mask;
16873
 
                }
16874
 
@@ -164,8 +167,8 @@ static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
16875
 
                        vcpu = kvm->vcpus[i];
16876
 
                        if (!vcpu)
16877
 
                                continue;
16878
 
-                       if (kvm_apic_match_physical_addr(vcpu->apic, dest)) {
16879
 
-                               if (vcpu->apic)
16880
 
+                       if (kvm_apic_match_physical_addr(vcpu->arch.apic, dest)) {
16881
 
+                               if (vcpu->arch.apic)
16882
 
                                        mask = 1 << i;
16883
 
                                break;
16884
 
                        }
16885
 
@@ -175,11 +178,11 @@ static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
16886
 
                        vcpu = kvm->vcpus[i];
16887
 
                        if (!vcpu)
16888
 
                                continue;
16889
 
-                       if (vcpu->apic &&
16890
 
-                           kvm_apic_match_logical_addr(vcpu->apic, dest))
16891
 
+                       if (vcpu->arch.apic &&
16892
 
+                           kvm_apic_match_logical_addr(vcpu->arch.apic, dest))
16893
 
                                mask |= 1 << vcpu->vcpu_id;
16894
 
                }
16895
 
-       ioapic_debug("mask %x", mask);
16896
 
+       ioapic_debug("mask %x\n", mask);
16897
 
        return mask;
16898
 
 }
16899
 
 
16900
 
@@ -191,41 +194,39 @@ static void ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
16901
 
        u8 vector = ioapic->redirtbl[irq].fields.vector;
16902
 
        u8 trig_mode = ioapic->redirtbl[irq].fields.trig_mode;
16903
 
        u32 deliver_bitmask;
16904
 
-       struct kvm_lapic *target;
16905
 
        struct kvm_vcpu *vcpu;
16906
 
        int vcpu_id;
16907
 
 
16908
 
        ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
16909
 
-                    "vector=%x trig_mode=%x",
16910
 
+                    "vector=%x trig_mode=%x\n",
16911
 
                     dest, dest_mode, delivery_mode, vector, trig_mode);
16912
 
 
16913
 
        deliver_bitmask = ioapic_get_delivery_bitmask(ioapic, dest, dest_mode);
16914
 
        if (!deliver_bitmask) {
16915
 
-               ioapic_debug("no target on destination");
16916
 
+               ioapic_debug("no target on destination\n");
16917
 
                return;
16918
 
        }
16919
 
 
16920
 
        switch (delivery_mode) {
16921
 
-       case dest_LowestPrio:
16922
 
-               target =
16923
 
-                   kvm_apic_round_robin(ioapic->kvm, vector, deliver_bitmask);
16924
 
-               if (target != NULL)
16925
 
-                       ioapic_inj_irq(ioapic, target, vector,
16926
 
+       case IOAPIC_LOWEST_PRIORITY:
16927
 
+               vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector,
16928
 
+                               deliver_bitmask);
16929
 
+               if (vcpu != NULL)
16930
 
+                       ioapic_inj_irq(ioapic, vcpu, vector,
16931
 
                                       trig_mode, delivery_mode);
16932
 
                else
16933
 
-                       ioapic_debug("null round robin: "
16934
 
-                                    "mask=%x vector=%x delivery_mode=%x",
16935
 
-                                    deliver_bitmask, vector, dest_LowestPrio);
16936
 
+                       ioapic_debug("null lowest prio vcpu: "
16937
 
+                                    "mask=%x vector=%x delivery_mode=%x\n",
16938
 
+                                    deliver_bitmask, vector, IOAPIC_LOWEST_PRIORITY);
16939
 
                break;
16940
 
-       case dest_Fixed:
16941
 
+       case IOAPIC_FIXED:
16942
 
                for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
16943
 
                        if (!(deliver_bitmask & (1 << vcpu_id)))
16944
 
                                continue;
16945
 
                        deliver_bitmask &= ~(1 << vcpu_id);
16946
 
                        vcpu = ioapic->kvm->vcpus[vcpu_id];
16947
 
                        if (vcpu) {
16948
 
-                               target = vcpu->apic;
16949
 
-                               ioapic_inj_irq(ioapic, target, vector,
16950
 
+                               ioapic_inj_irq(ioapic, vcpu, vector,
16951
 
                                               trig_mode, delivery_mode);
16952
 
                        }
16953
 
                }
16954
 
@@ -271,7 +272,7 @@ static int get_eoi_gsi(struct kvm_ioapic *ioapic, int vector)
16955
 
 
16956
 
 void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)
16957
 
 {
16958
 
-       struct kvm_ioapic *ioapic = kvm->vioapic;
16959
 
+       struct kvm_ioapic *ioapic = kvm->arch.vioapic;
16960
 
        union ioapic_redir_entry *ent;
16961
 
        int gsi;
16962
 
 
16963
 
@@ -304,7 +305,7 @@ static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
16964
 
        struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
16965
 
        u32 result;
16966
 
 
16967
 
-       ioapic_debug("addr %lx", (unsigned long)addr);
16968
 
+       ioapic_debug("addr %lx\n", (unsigned long)addr);
16969
 
        ASSERT(!(addr & 0xf));  /* check alignment */
16970
 
 
16971
 
        addr &= 0xff;
16972
 
@@ -341,8 +342,8 @@ static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
16973
 
        struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
16974
 
        u32 data;
16975
 
 
16976
 
-       ioapic_debug("ioapic_mmio_write addr=%lx len=%d val=%p\n",
16977
 
-                    addr, len, val);
16978
 
+       ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n",
16979
 
+                    (void*)addr, len, val);
16980
 
        ASSERT(!(addr & 0xf));  /* check alignment */
16981
 
        if (len == 4 || len == 8)
16982
 
                data = *(u32 *) val;
16983
 
@@ -360,24 +361,38 @@ static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
16984
 
        case IOAPIC_REG_WINDOW:
16985
 
                ioapic_write_indirect(ioapic, data);
16986
 
                break;
16987
 
+#ifdef CONFIG_IA64
16988
 
+       case IOAPIC_REG_EOI:
16989
 
+               kvm_ioapic_update_eoi(ioapic->kvm, data);
16990
 
+               break;
16991
 
+#endif
16992
 
 
16993
 
        default:
16994
 
                break;
16995
 
        }
16996
 
 }
16997
 
 
16998
 
+void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
16999
 
+{
17000
 
+       int i;
17001
 
+
17002
 
+       for (i = 0; i < IOAPIC_NUM_PINS; i++)
17003
 
+               ioapic->redirtbl[i].fields.mask = 1;
17004
 
+       ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
17005
 
+       ioapic->ioregsel = 0;
17006
 
+       ioapic->irr = 0;
17007
 
+       ioapic->id = 0;
17008
 
+}
17009
 
+
17010
 
 int kvm_ioapic_init(struct kvm *kvm)
17011
 
 {
17012
 
        struct kvm_ioapic *ioapic;
17013
 
-       int i;
17014
 
 
17015
 
        ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL);
17016
 
        if (!ioapic)
17017
 
                return -ENOMEM;
17018
 
-       kvm->vioapic = ioapic;
17019
 
-       for (i = 0; i < IOAPIC_NUM_PINS; i++)
17020
 
-               ioapic->redirtbl[i].fields.mask = 1;
17021
 
-       ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
17022
 
+       kvm->arch.vioapic = ioapic;
17023
 
+       kvm_ioapic_reset(ioapic);
17024
 
        ioapic->dev.read = ioapic_mmio_read;
17025
 
        ioapic->dev.write = ioapic_mmio_write;
17026
 
        ioapic->dev.in_range = ioapic_in_range;
17027
 
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
17028
 
new file mode 100644
17029
 
index 0000000..7f16675
17030
 
--- /dev/null
17031
 
+++ b/virt/kvm/ioapic.h
17032
 
@@ -0,0 +1,95 @@
17033
 
+#ifndef __KVM_IO_APIC_H
17034
 
+#define __KVM_IO_APIC_H
17035
 
+
17036
 
+#include <linux/kvm_host.h>
17037
 
+
17038
 
+#include "iodev.h"
17039
 
+
17040
 
+struct kvm;
17041
 
+struct kvm_vcpu;
17042
 
+
17043
 
+#define IOAPIC_NUM_PINS  KVM_IOAPIC_NUM_PINS
17044
 
+#define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */
17045
 
+#define IOAPIC_EDGE_TRIG  0
17046
 
+#define IOAPIC_LEVEL_TRIG 1
17047
 
+
17048
 
+#define IOAPIC_DEFAULT_BASE_ADDRESS  0xfec00000
17049
 
+#define IOAPIC_MEM_LENGTH            0x100
17050
 
+
17051
 
+/* Direct registers. */
17052
 
+#define IOAPIC_REG_SELECT  0x00
17053
 
+#define IOAPIC_REG_WINDOW  0x10
17054
 
+#define IOAPIC_REG_EOI     0x40        /* IA64 IOSAPIC only */
17055
 
+
17056
 
+/* Indirect registers. */
17057
 
+#define IOAPIC_REG_APIC_ID 0x00        /* x86 IOAPIC only */
17058
 
+#define IOAPIC_REG_VERSION 0x01
17059
 
+#define IOAPIC_REG_ARB_ID  0x02        /* x86 IOAPIC only */
17060
 
+
17061
 
+/*ioapic delivery mode*/
17062
 
+#define        IOAPIC_FIXED                    0x0
17063
 
+#define        IOAPIC_LOWEST_PRIORITY          0x1
17064
 
+#define        IOAPIC_PMI                      0x2
17065
 
+#define        IOAPIC_NMI                      0x4
17066
 
+#define        IOAPIC_INIT                     0x5
17067
 
+#define        IOAPIC_EXTINT                   0x7
17068
 
+
17069
 
+struct kvm_ioapic {
17070
 
+       u64 base_address;
17071
 
+       u32 ioregsel;
17072
 
+       u32 id;
17073
 
+       u32 irr;
17074
 
+       u32 pad;
17075
 
+       union ioapic_redir_entry {
17076
 
+               u64 bits;
17077
 
+               struct {
17078
 
+                       u8 vector;
17079
 
+                       u8 delivery_mode:3;
17080
 
+                       u8 dest_mode:1;
17081
 
+                       u8 delivery_status:1;
17082
 
+                       u8 polarity:1;
17083
 
+                       u8 remote_irr:1;
17084
 
+                       u8 trig_mode:1;
17085
 
+                       u8 mask:1;
17086
 
+                       u8 reserve:7;
17087
 
+                       u8 reserved[4];
17088
 
+                       u8 dest_id;
17089
 
+               } fields;
17090
 
+       } redirtbl[IOAPIC_NUM_PINS];
17091
 
+       struct kvm_io_device dev;
17092
 
+       struct kvm *kvm;
17093
 
+};
17094
 
+
17095
 
+#ifdef DEBUG
17096
 
+#define ASSERT(x)                                                      \
17097
 
+do {                                                                   \
17098
 
+       if (!(x)) {                                                     \
17099
 
+               printk(KERN_EMERG "assertion failed %s: %d: %s\n",      \
17100
 
+                      __FILE__, __LINE__, #x);                         \
17101
 
+               BUG();                                                  \
17102
 
+       }                                                               \
17103
 
+} while (0)
17104
 
+#else
17105
 
+#define ASSERT(x) do { } while (0)
17106
 
+#endif
17107
 
+
17108
 
+static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
17109
 
+{
17110
 
+       return kvm->arch.vioapic;
17111
 
+}
17112
 
+
17113
 
+#ifdef CONFIG_IA64
17114
 
+static inline int irqchip_in_kernel(struct kvm *kvm)
17115
 
+{
17116
 
+       return 1;
17117
 
+}
17118
 
+#endif
17119
 
+
17120
 
+struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
17121
 
+                                      unsigned long bitmap);
17122
 
+void kvm_ioapic_update_eoi(struct kvm *kvm, int vector);
17123
 
+int kvm_ioapic_init(struct kvm *kvm);
17124
 
+void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
17125
 
+void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
17126
 
+
17127
 
+#endif
17128
 
diff --git a/virt/kvm/iodev.h b/virt/kvm/iodev.h
17129
 
new file mode 100644
17130
 
index 0000000..c14e642
17131
 
--- /dev/null
17132
 
+++ b/virt/kvm/iodev.h
17133
 
@@ -0,0 +1,63 @@
17134
 
+/*
17135
 
+ * This program is free software; you can redistribute it and/or modify
17136
 
+ * it under the terms of the GNU General Public License as published by
17137
 
+ * the Free Software Foundation; either version 2 of the License.
17138
 
+ *
17139
 
+ * This program is distributed in the hope that it will be useful,
17140
 
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
17141
 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17142
 
+ * GNU General Public License for more details.
17143
 
+ *
17144
 
+ * You should have received a copy of the GNU General Public License
17145
 
+ * along with this program; if not, write to the Free Software
17146
 
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17147
 
+ */
17148
 
+
17149
 
+#ifndef __KVM_IODEV_H__
17150
 
+#define __KVM_IODEV_H__
17151
 
+
17152
 
+#include <linux/kvm_types.h>
17153
 
+
17154
 
+struct kvm_io_device {
17155
 
+       void (*read)(struct kvm_io_device *this,
17156
 
+                    gpa_t addr,
17157
 
+                    int len,
17158
 
+                    void *val);
17159
 
+       void (*write)(struct kvm_io_device *this,
17160
 
+                     gpa_t addr,
17161
 
+                     int len,
17162
 
+                     const void *val);
17163
 
+       int (*in_range)(struct kvm_io_device *this, gpa_t addr);
17164
 
+       void (*destructor)(struct kvm_io_device *this);
17165
 
+
17166
 
+       void             *private;
17167
 
+};
17168
 
+
17169
 
+static inline void kvm_iodevice_read(struct kvm_io_device *dev,
17170
 
+                                    gpa_t addr,
17171
 
+                                    int len,
17172
 
+                                    void *val)
17173
 
+{
17174
 
+       dev->read(dev, addr, len, val);
17175
 
+}
17176
 
+
17177
 
+static inline void kvm_iodevice_write(struct kvm_io_device *dev,
17178
 
+                                     gpa_t addr,
17179
 
+                                     int len,
17180
 
+                                     const void *val)
17181
 
+{
17182
 
+       dev->write(dev, addr, len, val);
17183
 
+}
17184
 
+
17185
 
+static inline int kvm_iodevice_inrange(struct kvm_io_device *dev, gpa_t addr)
17186
 
+{
17187
 
+       return dev->in_range(dev, addr);
17188
 
+}
17189
 
+
17190
 
+static inline void kvm_iodevice_destructor(struct kvm_io_device *dev)
17191
 
+{
17192
 
+       if (dev->destructor)
17193
 
+               dev->destructor(dev);
17194
 
+}
17195
 
+
17196
 
+#endif /* __KVM_IODEV_H__ */
17197
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
17198
 
new file mode 100644
17199
 
index 0000000..845beb2
17200
 
--- /dev/null
17201
 
+++ b/virt/kvm/kvm_main.c
17202
 
@@ -0,0 +1,1393 @@
17203
 
+/*
17204
 
+ * Kernel-based Virtual Machine driver for Linux
17205
 
+ *
17206
 
+ * This module enables machines with Intel VT-x extensions to run virtual
17207
 
+ * machines without emulation or binary translation.
17208
 
+ *
17209
 
+ * Copyright (C) 2006 Qumranet, Inc.
17210
 
+ *
17211
 
+ * Authors:
17212
 
+ *   Avi Kivity   <avi@qumranet.com>
17213
 
+ *   Yaniv Kamay  <yaniv@qumranet.com>
17214
 
+ *
17215
 
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
17216
 
+ * the COPYING file in the top-level directory.
17217
 
+ *
17218
 
+ */
17219
 
+
17220
 
+#include "iodev.h"
17221
 
+
17222
 
+#include <linux/kvm_host.h>
17223
 
+#include <linux/kvm.h>
17224
 
+#include <linux/module.h>
17225
 
+#include <linux/errno.h>
17226
 
+#include <linux/percpu.h>
17227
 
+#include <linux/gfp.h>
17228
 
+#include <linux/mm.h>
17229
 
+#include <linux/miscdevice.h>
17230
 
+#include <linux/vmalloc.h>
17231
 
+#include <linux/reboot.h>
17232
 
+#include <linux/debugfs.h>
17233
 
+#include <linux/highmem.h>
17234
 
+#include <linux/file.h>
17235
 
+#include <linux/sysdev.h>
17236
 
+#include <linux/cpu.h>
17237
 
+#include <linux/sched.h>
17238
 
+#include <linux/cpumask.h>
17239
 
+#include <linux/smp.h>
17240
 
+#include <linux/anon_inodes.h>
17241
 
+#include <linux/profile.h>
17242
 
+#include <linux/kvm_para.h>
17243
 
+#include <linux/pagemap.h>
17244
 
+#include <linux/mman.h>
17245
 
+
17246
 
+#include <asm/processor.h>
17247
 
+#include <asm/io.h>
17248
 
+#include <asm/uaccess.h>
17249
 
+#include <asm/pgtable.h>
17250
 
+
17251
 
+MODULE_AUTHOR("Qumranet");
17252
 
+MODULE_LICENSE("GPL");
17253
 
+
17254
 
+DEFINE_SPINLOCK(kvm_lock);
17255
 
+LIST_HEAD(vm_list);
17256
 
+
17257
 
+static cpumask_t cpus_hardware_enabled;
17258
 
+
17259
 
+struct kmem_cache *kvm_vcpu_cache;
17260
 
+EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
17261
 
+
17262
 
+static __read_mostly struct preempt_ops kvm_preempt_ops;
17263
 
+
17264
 
+static struct dentry *debugfs_dir;
17265
 
+
17266
 
+static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
17267
 
+                          unsigned long arg);
17268
 
+
17269
 
+static inline int valid_vcpu(int n)
17270
 
+{
17271
 
+       return likely(n >= 0 && n < KVM_MAX_VCPUS);
17272
 
+}
17273
 
+
17274
 
+/*
17275
 
+ * Switches to specified vcpu, until a matching vcpu_put()
17276
 
+ */
17277
 
+void vcpu_load(struct kvm_vcpu *vcpu)
17278
 
+{
17279
 
+       int cpu;
17280
 
+
17281
 
+       mutex_lock(&vcpu->mutex);
17282
 
+       cpu = get_cpu();
17283
 
+       preempt_notifier_register(&vcpu->preempt_notifier);
17284
 
+       kvm_arch_vcpu_load(vcpu, cpu);
17285
 
+       put_cpu();
17286
 
+}
17287
 
+
17288
 
+void vcpu_put(struct kvm_vcpu *vcpu)
17289
 
+{
17290
 
+       preempt_disable();
17291
 
+       kvm_arch_vcpu_put(vcpu);
17292
 
+       preempt_notifier_unregister(&vcpu->preempt_notifier);
17293
 
+       preempt_enable();
17294
 
+       mutex_unlock(&vcpu->mutex);
17295
 
+}
17296
 
+
17297
 
+static void ack_flush(void *_completed)
17298
 
+{
17299
 
+}
17300
 
+
17301
 
+void kvm_flush_remote_tlbs(struct kvm *kvm)
17302
 
+{
17303
 
+       int i, cpu;
17304
 
+       cpumask_t cpus;
17305
 
+       struct kvm_vcpu *vcpu;
17306
 
+
17307
 
+       cpus_clear(cpus);
17308
 
+       for (i = 0; i < KVM_MAX_VCPUS; ++i) {
17309
 
+               vcpu = kvm->vcpus[i];
17310
 
+               if (!vcpu)
17311
 
+                       continue;
17312
 
+               if (test_and_set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
17313
 
+                       continue;
17314
 
+               cpu = vcpu->cpu;
17315
 
+               if (cpu != -1 && cpu != raw_smp_processor_id())
17316
 
+                       cpu_set(cpu, cpus);
17317
 
+       }
17318
 
+       if (cpus_empty(cpus))
17319
 
+               return;
17320
 
+       ++kvm->stat.remote_tlb_flush;
17321
 
+       smp_call_function_mask(cpus, ack_flush, NULL, 1);
17322
 
+}
17323
 
+
17324
 
+int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
17325
 
+{
17326
 
+       struct page *page;
17327
 
+       int r;
17328
 
+
17329
 
+       mutex_init(&vcpu->mutex);
17330
 
+       vcpu->cpu = -1;
17331
 
+       vcpu->kvm = kvm;
17332
 
+       vcpu->vcpu_id = id;
17333
 
+       init_waitqueue_head(&vcpu->wq);
17334
 
+
17335
 
+       page = alloc_page(GFP_KERNEL | __GFP_ZERO);
17336
 
+       if (!page) {
17337
 
+               r = -ENOMEM;
17338
 
+               goto fail;
17339
 
+       }
17340
 
+       vcpu->run = page_address(page);
17341
 
+
17342
 
+       r = kvm_arch_vcpu_init(vcpu);
17343
 
+       if (r < 0)
17344
 
+               goto fail_free_run;
17345
 
+       return 0;
17346
 
+
17347
 
+fail_free_run:
17348
 
+       free_page((unsigned long)vcpu->run);
17349
 
+fail:
17350
 
+       return r;
17351
 
+}
17352
 
+EXPORT_SYMBOL_GPL(kvm_vcpu_init);
17353
 
+
17354
 
+void kvm_vcpu_uninit(struct kvm_vcpu *vcpu)
17355
 
+{
17356
 
+       kvm_arch_vcpu_uninit(vcpu);
17357
 
+       free_page((unsigned long)vcpu->run);
17358
 
+}
17359
 
+EXPORT_SYMBOL_GPL(kvm_vcpu_uninit);
17360
 
+
17361
 
+static struct kvm *kvm_create_vm(void)
17362
 
+{
17363
 
+       struct kvm *kvm = kvm_arch_create_vm();
17364
 
+
17365
 
+       if (IS_ERR(kvm))
17366
 
+               goto out;
17367
 
+
17368
 
+       kvm->mm = current->mm;
17369
 
+       atomic_inc(&kvm->mm->mm_count);
17370
 
+       kvm_io_bus_init(&kvm->pio_bus);
17371
 
+       mutex_init(&kvm->lock);
17372
 
+       kvm_io_bus_init(&kvm->mmio_bus);
17373
 
+       spin_lock(&kvm_lock);
17374
 
+       list_add(&kvm->vm_list, &vm_list);
17375
 
+       spin_unlock(&kvm_lock);
17376
 
+out:
17377
 
+       return kvm;
17378
 
+}
17379
 
+
17380
 
+/*
17381
 
+ * Free any memory in @free but not in @dont.
17382
 
+ */
17383
 
+static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
17384
 
+                                 struct kvm_memory_slot *dont)
17385
 
+{
17386
 
+       if (!dont || free->rmap != dont->rmap)
17387
 
+               vfree(free->rmap);
17388
 
+
17389
 
+       if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
17390
 
+               vfree(free->dirty_bitmap);
17391
 
+
17392
 
+       free->npages = 0;
17393
 
+       free->dirty_bitmap = NULL;
17394
 
+       free->rmap = NULL;
17395
 
+}
17396
 
+
17397
 
+void kvm_free_physmem(struct kvm *kvm)
17398
 
+{
17399
 
+       int i;
17400
 
+
17401
 
+       for (i = 0; i < kvm->nmemslots; ++i)
17402
 
+               kvm_free_physmem_slot(&kvm->memslots[i], NULL);
17403
 
+}
17404
 
+
17405
 
+static void kvm_destroy_vm(struct kvm *kvm)
17406
 
+{
17407
 
+       struct mm_struct *mm = kvm->mm;
17408
 
+
17409
 
+       spin_lock(&kvm_lock);
17410
 
+       list_del(&kvm->vm_list);
17411
 
+       spin_unlock(&kvm_lock);
17412
 
+       kvm_io_bus_destroy(&kvm->pio_bus);
17413
 
+       kvm_io_bus_destroy(&kvm->mmio_bus);
17414
 
+       kvm_arch_destroy_vm(kvm);
17415
 
+       mmdrop(mm);
17416
 
+}
17417
 
+
17418
 
+static int kvm_vm_release(struct inode *inode, struct file *filp)
17419
 
+{
17420
 
+       struct kvm *kvm = filp->private_data;
17421
 
+
17422
 
+       kvm_destroy_vm(kvm);
17423
 
+       return 0;
17424
 
+}
17425
 
+
17426
 
+/*
17427
 
+ * Allocate some memory and give it an address in the guest physical address
17428
 
+ * space.
17429
 
+ *
17430
 
+ * Discontiguous memory is allowed, mostly for framebuffers.
17431
 
+ *
17432
 
+ * Must be called holding kvm->lock.
17433
 
+ */
17434
 
+int __kvm_set_memory_region(struct kvm *kvm,
17435
 
+                           struct kvm_userspace_memory_region *mem,
17436
 
+                           int user_alloc)
17437
 
+{
17438
 
+       int r;
17439
 
+       gfn_t base_gfn;
17440
 
+       unsigned long npages;
17441
 
+       unsigned long i;
17442
 
+       struct kvm_memory_slot *memslot;
17443
 
+       struct kvm_memory_slot old, new;
17444
 
+
17445
 
+       r = -EINVAL;
17446
 
+       /* General sanity checks */
17447
 
+       if (mem->memory_size & (PAGE_SIZE - 1))
17448
 
+               goto out;
17449
 
+       if (mem->guest_phys_addr & (PAGE_SIZE - 1))
17450
 
+               goto out;
17451
 
+       if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS)
17452
 
+               goto out;
17453
 
+       if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
17454
 
+               goto out;
17455
 
+
17456
 
+       memslot = &kvm->memslots[mem->slot];
17457
 
+       base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
17458
 
+       npages = mem->memory_size >> PAGE_SHIFT;
17459
 
+
17460
 
+       if (!npages)
17461
 
+               mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES;
17462
 
+
17463
 
+       new = old = *memslot;
17464
 
+
17465
 
+       new.base_gfn = base_gfn;
17466
 
+       new.npages = npages;
17467
 
+       new.flags = mem->flags;
17468
 
+
17469
 
+       /* Disallow changing a memory slot's size. */
17470
 
+       r = -EINVAL;
17471
 
+       if (npages && old.npages && npages != old.npages)
17472
 
+               goto out_free;
17473
 
+
17474
 
+       /* Check for overlaps */
17475
 
+       r = -EEXIST;
17476
 
+       for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
17477
 
+               struct kvm_memory_slot *s = &kvm->memslots[i];
17478
 
+
17479
 
+               if (s == memslot)
17480
 
+                       continue;
17481
 
+               if (!((base_gfn + npages <= s->base_gfn) ||
17482
 
+                     (base_gfn >= s->base_gfn + s->npages)))
17483
 
+                       goto out_free;
17484
 
+       }
17485
 
+
17486
 
+       /* Free page dirty bitmap if unneeded */
17487
 
+       if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES))
17488
 
+               new.dirty_bitmap = NULL;
17489
 
+
17490
 
+       r = -ENOMEM;
17491
 
+
17492
 
+       /* Allocate if a slot is being created */
17493
 
+       if (npages && !new.rmap) {
17494
 
+               new.rmap = vmalloc(npages * sizeof(struct page *));
17495
 
+
17496
 
+               if (!new.rmap)
17497
 
+                       goto out_free;
17498
 
+
17499
 
+               memset(new.rmap, 0, npages * sizeof(*new.rmap));
17500
 
+
17501
 
+               new.user_alloc = user_alloc;
17502
 
+               new.userspace_addr = mem->userspace_addr;
17503
 
+       }
17504
 
+
17505
 
+       /* Allocate page dirty bitmap if needed */
17506
 
+       if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) {
17507
 
+               unsigned dirty_bytes = ALIGN(npages, BITS_PER_LONG) / 8;
17508
 
+
17509
 
+               new.dirty_bitmap = vmalloc(dirty_bytes);
17510
 
+               if (!new.dirty_bitmap)
17511
 
+                       goto out_free;
17512
 
+               memset(new.dirty_bitmap, 0, dirty_bytes);
17513
 
+       }
17514
 
+
17515
 
+       if (mem->slot >= kvm->nmemslots)
17516
 
+               kvm->nmemslots = mem->slot + 1;
17517
 
+
17518
 
+       *memslot = new;
17519
 
+
17520
 
+       r = kvm_arch_set_memory_region(kvm, mem, old, user_alloc);
17521
 
+       if (r) {
17522
 
+               *memslot = old;
17523
 
+               goto out_free;
17524
 
+       }
17525
 
+
17526
 
+       kvm_free_physmem_slot(&old, &new);
17527
 
+       return 0;
17528
 
+
17529
 
+out_free:
17530
 
+       kvm_free_physmem_slot(&new, &old);
17531
 
+out:
17532
 
+       return r;
17533
 
+
17534
 
+}
17535
 
+EXPORT_SYMBOL_GPL(__kvm_set_memory_region);
17536
 
+
17537
 
+int kvm_set_memory_region(struct kvm *kvm,
17538
 
+                         struct kvm_userspace_memory_region *mem,
17539
 
+                         int user_alloc)
17540
 
+{
17541
 
+       int r;
17542
 
+
17543
 
+       mutex_lock(&kvm->lock);
17544
 
+       r = __kvm_set_memory_region(kvm, mem, user_alloc);
17545
 
+       mutex_unlock(&kvm->lock);
17546
 
+       return r;
17547
 
+}
17548
 
+EXPORT_SYMBOL_GPL(kvm_set_memory_region);
17549
 
+
17550
 
+int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
17551
 
+                                  struct
17552
 
+                                  kvm_userspace_memory_region *mem,
17553
 
+                                  int user_alloc)
17554
 
+{
17555
 
+       if (mem->slot >= KVM_MEMORY_SLOTS)
17556
 
+               return -EINVAL;
17557
 
+       return kvm_set_memory_region(kvm, mem, user_alloc);
17558
 
+}
17559
 
+
17560
 
+int kvm_get_dirty_log(struct kvm *kvm,
17561
 
+                       struct kvm_dirty_log *log, int *is_dirty)
17562
 
+{
17563
 
+       struct kvm_memory_slot *memslot;
17564
 
+       int r, i;
17565
 
+       int n;
17566
 
+       unsigned long any = 0;
17567
 
+
17568
 
+       r = -EINVAL;
17569
 
+       if (log->slot >= KVM_MEMORY_SLOTS)
17570
 
+               goto out;
17571
 
+
17572
 
+       memslot = &kvm->memslots[log->slot];
17573
 
+       r = -ENOENT;
17574
 
+       if (!memslot->dirty_bitmap)
17575
 
+               goto out;
17576
 
+
17577
 
+       n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
17578
 
+
17579
 
+       for (i = 0; !any && i < n/sizeof(long); ++i)
17580
 
+               any = memslot->dirty_bitmap[i];
17581
 
+
17582
 
+       r = -EFAULT;
17583
 
+       if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
17584
 
+               goto out;
17585
 
+
17586
 
+       if (any)
17587
 
+               *is_dirty = 1;
17588
 
+
17589
 
+       r = 0;
17590
 
+out:
17591
 
+       return r;
17592
 
+}
17593
 
+
17594
 
+int is_error_page(struct page *page)
17595
 
+{
17596
 
+       return page == bad_page;
17597
 
+}
17598
 
+EXPORT_SYMBOL_GPL(is_error_page);
17599
 
+
17600
 
+static inline unsigned long bad_hva(void)
17601
 
+{
17602
 
+       return PAGE_OFFSET;
17603
 
+}
17604
 
+
17605
 
+int kvm_is_error_hva(unsigned long addr)
17606
 
+{
17607
 
+       return addr == bad_hva();
17608
 
+}
17609
 
+EXPORT_SYMBOL_GPL(kvm_is_error_hva);
17610
 
+
17611
 
+static struct kvm_memory_slot *__gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
17612
 
+{
17613
 
+       int i;
17614
 
+
17615
 
+       for (i = 0; i < kvm->nmemslots; ++i) {
17616
 
+               struct kvm_memory_slot *memslot = &kvm->memslots[i];
17617
 
+
17618
 
+               if (gfn >= memslot->base_gfn
17619
 
+                   && gfn < memslot->base_gfn + memslot->npages)
17620
 
+                       return memslot;
17621
 
+       }
17622
 
+       return NULL;
17623
 
+}
17624
 
+
17625
 
+struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
17626
 
+{
17627
 
+       gfn = unalias_gfn(kvm, gfn);
17628
 
+       return __gfn_to_memslot(kvm, gfn);
17629
 
+}
17630
 
+
17631
 
+int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
17632
 
+{
17633
 
+       int i;
17634
 
+
17635
 
+       gfn = unalias_gfn(kvm, gfn);
17636
 
+       for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
17637
 
+               struct kvm_memory_slot *memslot = &kvm->memslots[i];
17638
 
+
17639
 
+               if (gfn >= memslot->base_gfn
17640
 
+                   && gfn < memslot->base_gfn + memslot->npages)
17641
 
+                       return 1;
17642
 
+       }
17643
 
+       return 0;
17644
 
+}
17645
 
+EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
17646
 
+
17647
 
+static unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
17648
 
+{
17649
 
+       struct kvm_memory_slot *slot;
17650
 
+
17651
 
+       gfn = unalias_gfn(kvm, gfn);
17652
 
+       slot = __gfn_to_memslot(kvm, gfn);
17653
 
+       if (!slot)
17654
 
+               return bad_hva();
17655
 
+       return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
17656
 
+}
17657
 
+
17658
 
+/*
17659
 
+ * Requires current->mm->mmap_sem to be held
17660
 
+ */
17661
 
+static struct page *__gfn_to_page(struct kvm *kvm, gfn_t gfn)
17662
 
+{
17663
 
+       struct page *page[1];
17664
 
+       unsigned long addr;
17665
 
+       int npages;
17666
 
+
17667
 
+       might_sleep();
17668
 
+
17669
 
+       addr = gfn_to_hva(kvm, gfn);
17670
 
+       if (kvm_is_error_hva(addr)) {
17671
 
+               get_page(bad_page);
17672
 
+               return bad_page;
17673
 
+       }
17674
 
+
17675
 
+       npages = get_user_pages(current, current->mm, addr, 1, 1, 0, page,
17676
 
+                               NULL);
17677
 
+
17678
 
+       if (npages != 1) {
17679
 
+               get_page(bad_page);
17680
 
+               return bad_page;
17681
 
+       }
17682
 
+
17683
 
+       return page[0];
17684
 
+}
17685
 
+
17686
 
+struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
17687
 
+{
17688
 
+       struct page *page;
17689
 
+
17690
 
+       down_read(&current->mm->mmap_sem);
17691
 
+       page = __gfn_to_page(kvm, gfn);
17692
 
+       up_read(&current->mm->mmap_sem);
17693
 
+
17694
 
+       return page;
17695
 
+}
17696
 
+
17697
 
+EXPORT_SYMBOL_GPL(gfn_to_page);
17698
 
+
17699
 
+void kvm_release_page_clean(struct page *page)
17700
 
+{
17701
 
+       put_page(page);
17702
 
+}
17703
 
+EXPORT_SYMBOL_GPL(kvm_release_page_clean);
17704
 
+
17705
 
+void kvm_release_page_dirty(struct page *page)
17706
 
+{
17707
 
+       if (!PageReserved(page))
17708
 
+               SetPageDirty(page);
17709
 
+       put_page(page);
17710
 
+}
17711
 
+EXPORT_SYMBOL_GPL(kvm_release_page_dirty);
17712
 
+
17713
 
+static int next_segment(unsigned long len, int offset)
17714
 
+{
17715
 
+       if (len > PAGE_SIZE - offset)
17716
 
+               return PAGE_SIZE - offset;
17717
 
+       else
17718
 
+               return len;
17719
 
+}
17720
 
+
17721
 
+int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
17722
 
+                       int len)
17723
 
+{
17724
 
+       int r;
17725
 
+       unsigned long addr;
17726
 
+
17727
 
+       addr = gfn_to_hva(kvm, gfn);
17728
 
+       if (kvm_is_error_hva(addr))
17729
 
+               return -EFAULT;
17730
 
+       r = copy_from_user(data, (void __user *)addr + offset, len);
17731
 
+       if (r)
17732
 
+               return -EFAULT;
17733
 
+       return 0;
17734
 
+}
17735
 
+EXPORT_SYMBOL_GPL(kvm_read_guest_page);
17736
 
+
17737
 
+int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len)
17738
 
+{
17739
 
+       gfn_t gfn = gpa >> PAGE_SHIFT;
17740
 
+       int seg;
17741
 
+       int offset = offset_in_page(gpa);
17742
 
+       int ret;
17743
 
+
17744
 
+       while ((seg = next_segment(len, offset)) != 0) {
17745
 
+               ret = kvm_read_guest_page(kvm, gfn, data, offset, seg);
17746
 
+               if (ret < 0)
17747
 
+                       return ret;
17748
 
+               offset = 0;
17749
 
+               len -= seg;
17750
 
+               data += seg;
17751
 
+               ++gfn;
17752
 
+       }
17753
 
+       return 0;
17754
 
+}
17755
 
+EXPORT_SYMBOL_GPL(kvm_read_guest);
17756
 
+
17757
 
+int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data,
17758
 
+                        int offset, int len)
17759
 
+{
17760
 
+       int r;
17761
 
+       unsigned long addr;
17762
 
+
17763
 
+       addr = gfn_to_hva(kvm, gfn);
17764
 
+       if (kvm_is_error_hva(addr))
17765
 
+               return -EFAULT;
17766
 
+       r = copy_to_user((void __user *)addr + offset, data, len);
17767
 
+       if (r)
17768
 
+               return -EFAULT;
17769
 
+       mark_page_dirty(kvm, gfn);
17770
 
+       return 0;
17771
 
+}
17772
 
+EXPORT_SYMBOL_GPL(kvm_write_guest_page);
17773
 
+
17774
 
+int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
17775
 
+                   unsigned long len)
17776
 
+{
17777
 
+       gfn_t gfn = gpa >> PAGE_SHIFT;
17778
 
+       int seg;
17779
 
+       int offset = offset_in_page(gpa);
17780
 
+       int ret;
17781
 
+
17782
 
+       while ((seg = next_segment(len, offset)) != 0) {
17783
 
+               ret = kvm_write_guest_page(kvm, gfn, data, offset, seg);
17784
 
+               if (ret < 0)
17785
 
+                       return ret;
17786
 
+               offset = 0;
17787
 
+               len -= seg;
17788
 
+               data += seg;
17789
 
+               ++gfn;
17790
 
+       }
17791
 
+       return 0;
17792
 
+}
17793
 
+
17794
 
+int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len)
17795
 
+{
17796
 
+       return kvm_write_guest_page(kvm, gfn, empty_zero_page, offset, len);
17797
 
+}
17798
 
+EXPORT_SYMBOL_GPL(kvm_clear_guest_page);
17799
 
+
17800
 
+int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
17801
 
+{
17802
 
+       gfn_t gfn = gpa >> PAGE_SHIFT;
17803
 
+       int seg;
17804
 
+       int offset = offset_in_page(gpa);
17805
 
+       int ret;
17806
 
+
17807
 
+        while ((seg = next_segment(len, offset)) != 0) {
17808
 
+               ret = kvm_clear_guest_page(kvm, gfn, offset, seg);
17809
 
+               if (ret < 0)
17810
 
+                       return ret;
17811
 
+               offset = 0;
17812
 
+               len -= seg;
17813
 
+               ++gfn;
17814
 
+       }
17815
 
+       return 0;
17816
 
+}
17817
 
+EXPORT_SYMBOL_GPL(kvm_clear_guest);
17818
 
+
17819
 
+void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
17820
 
+{
17821
 
+       struct kvm_memory_slot *memslot;
17822
 
+
17823
 
+       gfn = unalias_gfn(kvm, gfn);
17824
 
+       memslot = __gfn_to_memslot(kvm, gfn);
17825
 
+       if (memslot && memslot->dirty_bitmap) {
17826
 
+               unsigned long rel_gfn = gfn - memslot->base_gfn;
17827
 
+
17828
 
+               /* avoid RMW */
17829
 
+               if (!test_bit(rel_gfn, memslot->dirty_bitmap))
17830
 
+                       set_bit(rel_gfn, memslot->dirty_bitmap);
17831
 
+       }
17832
 
+}
17833
 
+
17834
 
+/*
17835
 
+ * The vCPU has executed a HLT instruction with in-kernel mode enabled.
17836
 
+ */
17837
 
+void kvm_vcpu_block(struct kvm_vcpu *vcpu)
17838
 
+{
17839
 
+       DECLARE_WAITQUEUE(wait, current);
17840
 
+
17841
 
+       add_wait_queue(&vcpu->wq, &wait);
17842
 
+
17843
 
+       /*
17844
 
+        * We will block until either an interrupt or a signal wakes us up
17845
 
+        */
17846
 
+       while (!kvm_cpu_has_interrupt(vcpu)
17847
 
+              && !signal_pending(current)
17848
 
+              && !kvm_arch_vcpu_runnable(vcpu)) {
17849
 
+               set_current_state(TASK_INTERRUPTIBLE);
17850
 
+               vcpu_put(vcpu);
17851
 
+               schedule();
17852
 
+               vcpu_load(vcpu);
17853
 
+       }
17854
 
+
17855
 
+       __set_current_state(TASK_RUNNING);
17856
 
+       remove_wait_queue(&vcpu->wq, &wait);
17857
 
+}
17858
 
+
17859
 
+void kvm_resched(struct kvm_vcpu *vcpu)
17860
 
+{
17861
 
+       if (!need_resched())
17862
 
+               return;
17863
 
+       cond_resched();
17864
 
+}
17865
 
+EXPORT_SYMBOL_GPL(kvm_resched);
17866
 
+
17867
 
+static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
17868
 
+{
17869
 
+       struct kvm_vcpu *vcpu = vma->vm_file->private_data;
17870
 
+       struct page *page;
17871
 
+
17872
 
+       if (vmf->pgoff == 0)
17873
 
+               page = virt_to_page(vcpu->run);
17874
 
+       else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET)
17875
 
+               page = virt_to_page(vcpu->arch.pio_data);
17876
 
+       else
17877
 
+               return VM_FAULT_SIGBUS;
17878
 
+       get_page(page);
17879
 
+       vmf->page = page;
17880
 
+       return 0;
17881
 
+}
17882
 
+
17883
 
+static struct vm_operations_struct kvm_vcpu_vm_ops = {
17884
 
+       .fault = kvm_vcpu_fault,
17885
 
+};
17886
 
+
17887
 
+static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma)
17888
 
+{
17889
 
+       vma->vm_ops = &kvm_vcpu_vm_ops;
17890
 
+       return 0;
17891
 
+}
17892
 
+
17893
 
+static int kvm_vcpu_release(struct inode *inode, struct file *filp)
17894
 
+{
17895
 
+       struct kvm_vcpu *vcpu = filp->private_data;
17896
 
+
17897
 
+       fput(vcpu->kvm->filp);
17898
 
+       return 0;
17899
 
+}
17900
 
+
17901
 
+static struct file_operations kvm_vcpu_fops = {
17902
 
+       .release        = kvm_vcpu_release,
17903
 
+       .unlocked_ioctl = kvm_vcpu_ioctl,
17904
 
+       .compat_ioctl   = kvm_vcpu_ioctl,
17905
 
+       .mmap           = kvm_vcpu_mmap,
17906
 
+};
17907
 
+
17908
 
+/*
17909
 
+ * Allocates an inode for the vcpu.
17910
 
+ */
17911
 
+static int create_vcpu_fd(struct kvm_vcpu *vcpu)
17912
 
+{
17913
 
+       int fd, r;
17914
 
+       struct inode *inode;
17915
 
+       struct file *file;
17916
 
+
17917
 
+       r = anon_inode_getfd(&fd, &inode, &file,
17918
 
+                            "kvm-vcpu", &kvm_vcpu_fops, vcpu);
17919
 
+       if (r)
17920
 
+               return r;
17921
 
+       atomic_inc(&vcpu->kvm->filp->f_count);
17922
 
+       return fd;
17923
 
+}
17924
 
+
17925
 
+/*
17926
 
+ * Creates some virtual cpus.  Good luck creating more than one.
17927
 
+ */
17928
 
+static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
17929
 
+{
17930
 
+       int r;
17931
 
+       struct kvm_vcpu *vcpu;
17932
 
+
17933
 
+       if (!valid_vcpu(n))
17934
 
+               return -EINVAL;
17935
 
+
17936
 
+       vcpu = kvm_arch_vcpu_create(kvm, n);
17937
 
+       if (IS_ERR(vcpu))
17938
 
+               return PTR_ERR(vcpu);
17939
 
+
17940
 
+       preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops);
17941
 
+
17942
 
+       r = kvm_arch_vcpu_setup(vcpu);
17943
 
+       if (r)
17944
 
+               goto vcpu_destroy;
17945
 
+
17946
 
+       mutex_lock(&kvm->lock);
17947
 
+       if (kvm->vcpus[n]) {
17948
 
+               r = -EEXIST;
17949
 
+               mutex_unlock(&kvm->lock);
17950
 
+               goto vcpu_destroy;
17951
 
+       }
17952
 
+       kvm->vcpus[n] = vcpu;
17953
 
+       mutex_unlock(&kvm->lock);
17954
 
+
17955
 
+       /* Now it's all set up, let userspace reach it */
17956
 
+       r = create_vcpu_fd(vcpu);
17957
 
+       if (r < 0)
17958
 
+               goto unlink;
17959
 
+       return r;
17960
 
+
17961
 
+unlink:
17962
 
+       mutex_lock(&kvm->lock);
17963
 
+       kvm->vcpus[n] = NULL;
17964
 
+       mutex_unlock(&kvm->lock);
17965
 
+vcpu_destroy:
17966
 
+       kvm_arch_vcpu_destroy(vcpu);
17967
 
+       return r;
17968
 
+}
17969
 
+
17970
 
+static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset)
17971
 
+{
17972
 
+       if (sigset) {
17973
 
+               sigdelsetmask(sigset, sigmask(SIGKILL)|sigmask(SIGSTOP));
17974
 
+               vcpu->sigset_active = 1;
17975
 
+               vcpu->sigset = *sigset;
17976
 
+       } else
17977
 
+               vcpu->sigset_active = 0;
17978
 
+       return 0;
17979
 
+}
17980
 
+
17981
 
+static long kvm_vcpu_ioctl(struct file *filp,
17982
 
+                          unsigned int ioctl, unsigned long arg)
17983
 
+{
17984
 
+       struct kvm_vcpu *vcpu = filp->private_data;
17985
 
+       void __user *argp = (void __user *)arg;
17986
 
+       int r;
17987
 
+
17988
 
+       if (vcpu->kvm->mm != current->mm)
17989
 
+               return -EIO;
17990
 
+       switch (ioctl) {
17991
 
+       case KVM_RUN:
17992
 
+               r = -EINVAL;
17993
 
+               if (arg)
17994
 
+                       goto out;
17995
 
+               r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
17996
 
+               break;
17997
 
+       case KVM_GET_REGS: {
17998
 
+               struct kvm_regs kvm_regs;
17999
 
+
18000
 
+               memset(&kvm_regs, 0, sizeof kvm_regs);
18001
 
+               r = kvm_arch_vcpu_ioctl_get_regs(vcpu, &kvm_regs);
18002
 
+               if (r)
18003
 
+                       goto out;
18004
 
+               r = -EFAULT;
18005
 
+               if (copy_to_user(argp, &kvm_regs, sizeof kvm_regs))
18006
 
+                       goto out;
18007
 
+               r = 0;
18008
 
+               break;
18009
 
+       }
18010
 
+       case KVM_SET_REGS: {
18011
 
+               struct kvm_regs kvm_regs;
18012
 
+
18013
 
+               r = -EFAULT;
18014
 
+               if (copy_from_user(&kvm_regs, argp, sizeof kvm_regs))
18015
 
+                       goto out;
18016
 
+               r = kvm_arch_vcpu_ioctl_set_regs(vcpu, &kvm_regs);
18017
 
+               if (r)
18018
 
+                       goto out;
18019
 
+               r = 0;
18020
 
+               break;
18021
 
+       }
18022
 
+       case KVM_GET_SREGS: {
18023
 
+               struct kvm_sregs kvm_sregs;
18024
 
+
18025
 
+               memset(&kvm_sregs, 0, sizeof kvm_sregs);
18026
 
+               r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, &kvm_sregs);
18027
 
+               if (r)
18028
 
+                       goto out;
18029
 
+               r = -EFAULT;
18030
 
+               if (copy_to_user(argp, &kvm_sregs, sizeof kvm_sregs))
18031
 
+                       goto out;
18032
 
+               r = 0;
18033
 
+               break;
18034
 
+       }
18035
 
+       case KVM_SET_SREGS: {
18036
 
+               struct kvm_sregs kvm_sregs;
18037
 
+
18038
 
+               r = -EFAULT;
18039
 
+               if (copy_from_user(&kvm_sregs, argp, sizeof kvm_sregs))
18040
 
+                       goto out;
18041
 
+               r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, &kvm_sregs);
18042
 
+               if (r)
18043
 
+                       goto out;
18044
 
+               r = 0;
18045
 
+               break;
18046
 
+       }
18047
 
+       case KVM_TRANSLATE: {
18048
 
+               struct kvm_translation tr;
18049
 
+
18050
 
+               r = -EFAULT;
18051
 
+               if (copy_from_user(&tr, argp, sizeof tr))
18052
 
+                       goto out;
18053
 
+               r = kvm_arch_vcpu_ioctl_translate(vcpu, &tr);
18054
 
+               if (r)
18055
 
+                       goto out;
18056
 
+               r = -EFAULT;
18057
 
+               if (copy_to_user(argp, &tr, sizeof tr))
18058
 
+                       goto out;
18059
 
+               r = 0;
18060
 
+               break;
18061
 
+       }
18062
 
+       case KVM_DEBUG_GUEST: {
18063
 
+               struct kvm_debug_guest dbg;
18064
 
+
18065
 
+               r = -EFAULT;
18066
 
+               if (copy_from_user(&dbg, argp, sizeof dbg))
18067
 
+                       goto out;
18068
 
+               r = kvm_arch_vcpu_ioctl_debug_guest(vcpu, &dbg);
18069
 
+               if (r)
18070
 
+                       goto out;
18071
 
+               r = 0;
18072
 
+               break;
18073
 
+       }
18074
 
+       case KVM_SET_SIGNAL_MASK: {
18075
 
+               struct kvm_signal_mask __user *sigmask_arg = argp;
18076
 
+               struct kvm_signal_mask kvm_sigmask;
18077
 
+               sigset_t sigset, *p;
18078
 
+
18079
 
+               p = NULL;
18080
 
+               if (argp) {
18081
 
+                       r = -EFAULT;
18082
 
+                       if (copy_from_user(&kvm_sigmask, argp,
18083
 
+                                          sizeof kvm_sigmask))
18084
 
+                               goto out;
18085
 
+                       r = -EINVAL;
18086
 
+                       if (kvm_sigmask.len != sizeof sigset)
18087
 
+                               goto out;
18088
 
+                       r = -EFAULT;
18089
 
+                       if (copy_from_user(&sigset, sigmask_arg->sigset,
18090
 
+                                          sizeof sigset))
18091
 
+                               goto out;
18092
 
+                       p = &sigset;
18093
 
+               }
18094
 
+               r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset);
18095
 
+               break;
18096
 
+       }
18097
 
+       case KVM_GET_FPU: {
18098
 
+               struct kvm_fpu fpu;
18099
 
+
18100
 
+               memset(&fpu, 0, sizeof fpu);
18101
 
+               r = kvm_arch_vcpu_ioctl_get_fpu(vcpu, &fpu);
18102
 
+               if (r)
18103
 
+                       goto out;
18104
 
+               r = -EFAULT;
18105
 
+               if (copy_to_user(argp, &fpu, sizeof fpu))
18106
 
+                       goto out;
18107
 
+               r = 0;
18108
 
+               break;
18109
 
+       }
18110
 
+       case KVM_SET_FPU: {
18111
 
+               struct kvm_fpu fpu;
18112
 
+
18113
 
+               r = -EFAULT;
18114
 
+               if (copy_from_user(&fpu, argp, sizeof fpu))
18115
 
+                       goto out;
18116
 
+               r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, &fpu);
18117
 
+               if (r)
18118
 
+                       goto out;
18119
 
+               r = 0;
18120
 
+               break;
18121
 
+       }
18122
 
+       default:
18123
 
+               r = kvm_arch_vcpu_ioctl(filp, ioctl, arg);
18124
 
+       }
18125
 
+out:
18126
 
+       return r;
18127
 
+}
18128
 
+
18129
 
+static long kvm_vm_ioctl(struct file *filp,
18130
 
+                          unsigned int ioctl, unsigned long arg)
18131
 
+{
18132
 
+       struct kvm *kvm = filp->private_data;
18133
 
+       void __user *argp = (void __user *)arg;
18134
 
+       int r;
18135
 
+
18136
 
+       if (kvm->mm != current->mm)
18137
 
+               return -EIO;
18138
 
+       switch (ioctl) {
18139
 
+       case KVM_CREATE_VCPU:
18140
 
+               r = kvm_vm_ioctl_create_vcpu(kvm, arg);
18141
 
+               if (r < 0)
18142
 
+                       goto out;
18143
 
+               break;
18144
 
+       case KVM_SET_USER_MEMORY_REGION: {
18145
 
+               struct kvm_userspace_memory_region kvm_userspace_mem;
18146
 
+
18147
 
+               r = -EFAULT;
18148
 
+               if (copy_from_user(&kvm_userspace_mem, argp,
18149
 
+                                               sizeof kvm_userspace_mem))
18150
 
+                       goto out;
18151
 
+
18152
 
+               r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 1);
18153
 
+               if (r)
18154
 
+                       goto out;
18155
 
+               break;
18156
 
+       }
18157
 
+       case KVM_GET_DIRTY_LOG: {
18158
 
+               struct kvm_dirty_log log;
18159
 
+
18160
 
+               r = -EFAULT;
18161
 
+               if (copy_from_user(&log, argp, sizeof log))
18162
 
+                       goto out;
18163
 
+               r = kvm_vm_ioctl_get_dirty_log(kvm, &log);
18164
 
+               if (r)
18165
 
+                       goto out;
18166
 
+               break;
18167
 
+       }
18168
 
+       default:
18169
 
+               r = kvm_arch_vm_ioctl(filp, ioctl, arg);
18170
 
+       }
18171
 
+out:
18172
 
+       return r;
18173
 
+}
18174
 
+
18175
 
+static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
18176
 
+{
18177
 
+       struct kvm *kvm = vma->vm_file->private_data;
18178
 
+       struct page *page;
18179
 
+
18180
 
+       if (!kvm_is_visible_gfn(kvm, vmf->pgoff))
18181
 
+               return VM_FAULT_SIGBUS;
18182
 
+       /* current->mm->mmap_sem is already held so call lockless version */
18183
 
+       page = __gfn_to_page(kvm, vmf->pgoff);
18184
 
+       if (is_error_page(page)) {
18185
 
+               kvm_release_page_clean(page);
18186
 
+               return VM_FAULT_SIGBUS;
18187
 
+       }
18188
 
+       vmf->page = page;
18189
 
+       return 0;
18190
 
+}
18191
 
+
18192
 
+static struct vm_operations_struct kvm_vm_vm_ops = {
18193
 
+       .fault = kvm_vm_fault,
18194
 
+};
18195
 
+
18196
 
+static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma)
18197
 
+{
18198
 
+       vma->vm_ops = &kvm_vm_vm_ops;
18199
 
+       return 0;
18200
 
+}
18201
 
+
18202
 
+static struct file_operations kvm_vm_fops = {
18203
 
+       .release        = kvm_vm_release,
18204
 
+       .unlocked_ioctl = kvm_vm_ioctl,
18205
 
+       .compat_ioctl   = kvm_vm_ioctl,
18206
 
+       .mmap           = kvm_vm_mmap,
18207
 
+};
18208
 
+
18209
 
+static int kvm_dev_ioctl_create_vm(void)
18210
 
+{
18211
 
+       int fd, r;
18212
 
+       struct inode *inode;
18213
 
+       struct file *file;
18214
 
+       struct kvm *kvm;
18215
 
+
18216
 
+       kvm = kvm_create_vm();
18217
 
+       if (IS_ERR(kvm))
18218
 
+               return PTR_ERR(kvm);
18219
 
+       r = anon_inode_getfd(&fd, &inode, &file, "kvm-vm", &kvm_vm_fops, kvm);
18220
 
+       if (r) {
18221
 
+               kvm_destroy_vm(kvm);
18222
 
+               return r;
18223
 
+       }
18224
 
+
18225
 
+       kvm->filp = file;
18226
 
+
18227
 
+       return fd;
18228
 
+}
18229
 
+
18230
 
+static long kvm_dev_ioctl(struct file *filp,
18231
 
+                         unsigned int ioctl, unsigned long arg)
18232
 
+{
18233
 
+       void __user *argp = (void __user *)arg;
18234
 
+       long r = -EINVAL;
18235
 
+
18236
 
+       switch (ioctl) {
18237
 
+       case KVM_GET_API_VERSION:
18238
 
+               r = -EINVAL;
18239
 
+               if (arg)
18240
 
+                       goto out;
18241
 
+               r = KVM_API_VERSION;
18242
 
+               break;
18243
 
+       case KVM_CREATE_VM:
18244
 
+               r = -EINVAL;
18245
 
+               if (arg)
18246
 
+                       goto out;
18247
 
+               r = kvm_dev_ioctl_create_vm();
18248
 
+               break;
18249
 
+       case KVM_CHECK_EXTENSION:
18250
 
+               r = kvm_dev_ioctl_check_extension((long)argp);
18251
 
+               break;
18252
 
+       case KVM_GET_VCPU_MMAP_SIZE:
18253
 
+               r = -EINVAL;
18254
 
+               if (arg)
18255
 
+                       goto out;
18256
 
+               r = 2 * PAGE_SIZE;
18257
 
+               break;
18258
 
+       default:
18259
 
+               return kvm_arch_dev_ioctl(filp, ioctl, arg);
18260
 
+       }
18261
 
+out:
18262
 
+       return r;
18263
 
+}
18264
 
+
18265
 
+static struct file_operations kvm_chardev_ops = {
18266
 
+       .unlocked_ioctl = kvm_dev_ioctl,
18267
 
+       .compat_ioctl   = kvm_dev_ioctl,
18268
 
+};
18269
 
+
18270
 
+static struct miscdevice kvm_dev = {
18271
 
+       KVM_MINOR,
18272
 
+       "kvm",
18273
 
+       &kvm_chardev_ops,
18274
 
+};
18275
 
+
18276
 
+static void hardware_enable(void *junk)
18277
 
+{
18278
 
+       int cpu = raw_smp_processor_id();
18279
 
+
18280
 
+       if (cpu_isset(cpu, cpus_hardware_enabled))
18281
 
+               return;
18282
 
+       cpu_set(cpu, cpus_hardware_enabled);
18283
 
+       kvm_arch_hardware_enable(NULL);
18284
 
+}
18285
 
+
18286
 
+static void hardware_disable(void *junk)
18287
 
+{
18288
 
+       int cpu = raw_smp_processor_id();
18289
 
+
18290
 
+       if (!cpu_isset(cpu, cpus_hardware_enabled))
18291
 
+               return;
18292
 
+       cpu_clear(cpu, cpus_hardware_enabled);
18293
 
+       decache_vcpus_on_cpu(cpu);
18294
 
+       kvm_arch_hardware_disable(NULL);
18295
 
+}
18296
 
+
18297
 
+static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
18298
 
+                          void *v)
18299
 
+{
18300
 
+       int cpu = (long)v;
18301
 
+
18302
 
+       val &= ~CPU_TASKS_FROZEN;
18303
 
+       switch (val) {
18304
 
+       case CPU_DYING:
18305
 
+               printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
18306
 
+                      cpu);
18307
 
+               hardware_disable(NULL);
18308
 
+               break;
18309
 
+       case CPU_UP_CANCELED:
18310
 
+               printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
18311
 
+                      cpu);
18312
 
+               smp_call_function_single(cpu, hardware_disable, NULL, 0, 1);
18313
 
+               break;
18314
 
+       case CPU_ONLINE:
18315
 
+               printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
18316
 
+                      cpu);
18317
 
+               smp_call_function_single(cpu, hardware_enable, NULL, 0, 1);
18318
 
+               break;
18319
 
+       }
18320
 
+       return NOTIFY_OK;
18321
 
+}
18322
 
+
18323
 
+static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
18324
 
+                     void *v)
18325
 
+{
18326
 
+       if (val == SYS_RESTART) {
18327
 
+               /*
18328
 
+                * Some (well, at least mine) BIOSes hang on reboot if
18329
 
+                * in vmx root mode.
18330
 
+                */
18331
 
+               printk(KERN_INFO "kvm: exiting hardware virtualization\n");
18332
 
+               on_each_cpu(hardware_disable, NULL, 0, 1);
18333
 
+       }
18334
 
+       return NOTIFY_OK;
18335
 
+}
18336
 
+
18337
 
+static struct notifier_block kvm_reboot_notifier = {
18338
 
+       .notifier_call = kvm_reboot,
18339
 
+       .priority = 0,
18340
 
+};
18341
 
+
18342
 
+void kvm_io_bus_init(struct kvm_io_bus *bus)
18343
 
+{
18344
 
+       memset(bus, 0, sizeof(*bus));
18345
 
+}
18346
 
+
18347
 
+void kvm_io_bus_destroy(struct kvm_io_bus *bus)
18348
 
+{
18349
 
+       int i;
18350
 
+
18351
 
+       for (i = 0; i < bus->dev_count; i++) {
18352
 
+               struct kvm_io_device *pos = bus->devs[i];
18353
 
+
18354
 
+               kvm_iodevice_destructor(pos);
18355
 
+       }
18356
 
+}
18357
 
+
18358
 
+struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr)
18359
 
+{
18360
 
+       int i;
18361
 
+
18362
 
+       for (i = 0; i < bus->dev_count; i++) {
18363
 
+               struct kvm_io_device *pos = bus->devs[i];
18364
 
+
18365
 
+               if (pos->in_range(pos, addr))
18366
 
+                       return pos;
18367
 
+       }
18368
 
+
18369
 
+       return NULL;
18370
 
+}
18371
 
+
18372
 
+void kvm_io_bus_register_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev)
18373
 
+{
18374
 
+       BUG_ON(bus->dev_count > (NR_IOBUS_DEVS-1));
18375
 
+
18376
 
+       bus->devs[bus->dev_count++] = dev;
18377
 
+}
18378
 
+
18379
 
+static struct notifier_block kvm_cpu_notifier = {
18380
 
+       .notifier_call = kvm_cpu_hotplug,
18381
 
+       .priority = 20, /* must be > scheduler priority */
18382
 
+};
18383
 
+
18384
 
+static u64 vm_stat_get(void *_offset)
18385
 
+{
18386
 
+       unsigned offset = (long)_offset;
18387
 
+       u64 total = 0;
18388
 
+       struct kvm *kvm;
18389
 
+
18390
 
+       spin_lock(&kvm_lock);
18391
 
+       list_for_each_entry(kvm, &vm_list, vm_list)
18392
 
+               total += *(u32 *)((void *)kvm + offset);
18393
 
+       spin_unlock(&kvm_lock);
18394
 
+       return total;
18395
 
+}
18396
 
+
18397
 
+DEFINE_SIMPLE_ATTRIBUTE(vm_stat_fops, vm_stat_get, NULL, "%llu\n");
18398
 
+
18399
 
+static u64 vcpu_stat_get(void *_offset)
18400
 
+{
18401
 
+       unsigned offset = (long)_offset;
18402
 
+       u64 total = 0;
18403
 
+       struct kvm *kvm;
18404
 
+       struct kvm_vcpu *vcpu;
18405
 
+       int i;
18406
 
+
18407
 
+       spin_lock(&kvm_lock);
18408
 
+       list_for_each_entry(kvm, &vm_list, vm_list)
18409
 
+               for (i = 0; i < KVM_MAX_VCPUS; ++i) {
18410
 
+                       vcpu = kvm->vcpus[i];
18411
 
+                       if (vcpu)
18412
 
+                               total += *(u32 *)((void *)vcpu + offset);
18413
 
+               }
18414
 
+       spin_unlock(&kvm_lock);
18415
 
+       return total;
18416
 
+}
18417
 
+
18418
 
+DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, NULL, "%llu\n");
18419
 
+
18420
 
+static struct file_operations *stat_fops[] = {
18421
 
+       [KVM_STAT_VCPU] = &vcpu_stat_fops,
18422
 
+       [KVM_STAT_VM]   = &vm_stat_fops,
18423
 
+};
18424
 
+
18425
 
+static void kvm_init_debug(void)
18426
 
+{
18427
 
+       struct kvm_stats_debugfs_item *p;
18428
 
+
18429
 
+       debugfs_dir = debugfs_create_dir("kvm", NULL);
18430
 
+       for (p = debugfs_entries; p->name; ++p)
18431
 
+               p->dentry = debugfs_create_file(p->name, 0444, debugfs_dir,
18432
 
+                                               (void *)(long)p->offset,
18433
 
+                                               stat_fops[p->kind]);
18434
 
+}
18435
 
+
18436
 
+static void kvm_exit_debug(void)
18437
 
+{
18438
 
+       struct kvm_stats_debugfs_item *p;
18439
 
+
18440
 
+       for (p = debugfs_entries; p->name; ++p)
18441
 
+               debugfs_remove(p->dentry);
18442
 
+       debugfs_remove(debugfs_dir);
18443
 
+}
18444
 
+
18445
 
+static int kvm_suspend(struct sys_device *dev, pm_message_t state)
18446
 
+{
18447
 
+       hardware_disable(NULL);
18448
 
+       return 0;
18449
 
+}
18450
 
+
18451
 
+static int kvm_resume(struct sys_device *dev)
18452
 
+{
18453
 
+       hardware_enable(NULL);
18454
 
+       return 0;
18455
 
+}
18456
 
+
18457
 
+static struct sysdev_class kvm_sysdev_class = {
18458
 
+       set_kset_name("kvm"),
18459
 
+       .suspend = kvm_suspend,
18460
 
+       .resume = kvm_resume,
18461
 
+};
18462
 
+
18463
 
+static struct sys_device kvm_sysdev = {
18464
 
+       .id = 0,
18465
 
+       .cls = &kvm_sysdev_class,
18466
 
+};
18467
 
+
18468
 
+struct page *bad_page;
18469
 
+
18470
 
+static inline
18471
 
+struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
18472
 
+{
18473
 
+       return container_of(pn, struct kvm_vcpu, preempt_notifier);
18474
 
+}
18475
 
+
18476
 
+static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
18477
 
+{
18478
 
+       struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
18479
 
+
18480
 
+       kvm_arch_vcpu_load(vcpu, cpu);
18481
 
+}
18482
 
+
18483
 
+static void kvm_sched_out(struct preempt_notifier *pn,
18484
 
+                         struct task_struct *next)
18485
 
+{
18486
 
+       struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
18487
 
+
18488
 
+       kvm_arch_vcpu_put(vcpu);
18489
 
+}
18490
 
+
18491
 
+int kvm_init(void *opaque, unsigned int vcpu_size,
18492
 
+                 struct module *module)
18493
 
+{
18494
 
+       int r;
18495
 
+       int cpu;
18496
 
+
18497
 
+       kvm_init_debug();
18498
 
+
18499
 
+       r = kvm_arch_init(opaque);
18500
 
+       if (r)
18501
 
+               goto out_fail;
18502
 
+
18503
 
+       bad_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
18504
 
+
18505
 
+       if (bad_page == NULL) {
18506
 
+               r = -ENOMEM;
18507
 
+               goto out;
18508
 
+       }
18509
 
+
18510
 
+       r = kvm_arch_hardware_setup();
18511
 
+       if (r < 0)
18512
 
+               goto out_free_0;
18513
 
+
18514
 
+       for_each_online_cpu(cpu) {
18515
 
+               smp_call_function_single(cpu,
18516
 
+                               kvm_arch_check_processor_compat,
18517
 
+                               &r, 0, 1);
18518
 
+               if (r < 0)
18519
 
+                       goto out_free_1;
18520
 
+       }
18521
 
+
18522
 
+       on_each_cpu(hardware_enable, NULL, 0, 1);
18523
 
+       r = register_cpu_notifier(&kvm_cpu_notifier);
18524
 
+       if (r)
18525
 
+               goto out_free_2;
18526
 
+       register_reboot_notifier(&kvm_reboot_notifier);
18527
 
+
18528
 
+       r = sysdev_class_register(&kvm_sysdev_class);
18529
 
+       if (r)
18530
 
+               goto out_free_3;
18531
 
+
18532
 
+       r = sysdev_register(&kvm_sysdev);
18533
 
+       if (r)
18534
 
+               goto out_free_4;
18535
 
+
18536
 
+       /* A kmem cache lets us meet the alignment requirements of fx_save. */
18537
 
+       kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size,
18538
 
+                                          __alignof__(struct kvm_vcpu),
18539
 
+                                          0, NULL);
18540
 
+       if (!kvm_vcpu_cache) {
18541
 
+               r = -ENOMEM;
18542
 
+               goto out_free_5;
18543
 
+       }
18544
 
+
18545
 
+       kvm_chardev_ops.owner = module;
18546
 
+
18547
 
+       r = misc_register(&kvm_dev);
18548
 
+       if (r) {
18549
 
+               printk(KERN_ERR "kvm: misc device register failed\n");
18550
 
+               goto out_free;
18551
 
+       }
18552
 
+
18553
 
+       kvm_preempt_ops.sched_in = kvm_sched_in;
18554
 
+       kvm_preempt_ops.sched_out = kvm_sched_out;
18555
 
+
18556
 
+       return 0;
18557
 
+
18558
 
+out_free:
18559
 
+       kmem_cache_destroy(kvm_vcpu_cache);
18560
 
+out_free_5:
18561
 
+       sysdev_unregister(&kvm_sysdev);
18562
 
+out_free_4:
18563
 
+       sysdev_class_unregister(&kvm_sysdev_class);
18564
 
+out_free_3:
18565
 
+       unregister_reboot_notifier(&kvm_reboot_notifier);
18566
 
+       unregister_cpu_notifier(&kvm_cpu_notifier);
18567
 
+out_free_2:
18568
 
+       on_each_cpu(hardware_disable, NULL, 0, 1);
18569
 
+out_free_1:
18570
 
+       kvm_arch_hardware_unsetup();
18571
 
+out_free_0:
18572
 
+       __free_page(bad_page);
18573
 
+out:
18574
 
+       kvm_arch_exit();
18575
 
+       kvm_exit_debug();
18576
 
+out_fail:
18577
 
+       return r;
18578
 
+}
18579
 
+EXPORT_SYMBOL_GPL(kvm_init);
18580
 
+
18581
 
+void kvm_exit(void)
18582
 
+{
18583
 
+       misc_deregister(&kvm_dev);
18584
 
+       kmem_cache_destroy(kvm_vcpu_cache);
18585
 
+       sysdev_unregister(&kvm_sysdev);
18586
 
+       sysdev_class_unregister(&kvm_sysdev_class);
18587
 
+       unregister_reboot_notifier(&kvm_reboot_notifier);
18588
 
+       unregister_cpu_notifier(&kvm_cpu_notifier);
18589
 
+       on_each_cpu(hardware_disable, NULL, 0, 1);
18590
 
+       kvm_arch_hardware_unsetup();
18591
 
+       kvm_arch_exit();
18592
 
+       kvm_exit_debug();
18593
 
+       __free_page(bad_page);
18594
 
+}
18595
 
+EXPORT_SYMBOL_GPL(kvm_exit);