~ubuntu-branches/ubuntu/precise/linux-lowlatency/precise

« back to all changes in this revision

Viewing changes to arch/powerpc/kvm/book3s_hv.c

  • Committer: Package Import Robot
  • Author(s): Alessio Igor Bogani
  • Date: 2011-10-26 11:13:05 UTC
  • Revision ID: package-import@ubuntu.com-20111026111305-tz023xykf0i6eosh
Tags: upstream-3.2.0
ImportĀ upstreamĀ versionĀ 3.2.0

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*
 
2
 * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
 
3
 * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved.
 
4
 *
 
5
 * Authors:
 
6
 *    Paul Mackerras <paulus@au1.ibm.com>
 
7
 *    Alexander Graf <agraf@suse.de>
 
8
 *    Kevin Wolf <mail@kevin-wolf.de>
 
9
 *
 
10
 * Description: KVM functions specific to running on Book 3S
 
11
 * processors in hypervisor mode (specifically POWER7 and later).
 
12
 *
 
13
 * This file is derived from arch/powerpc/kvm/book3s.c,
 
14
 * by Alexander Graf <agraf@suse.de>.
 
15
 *
 
16
 * This program is free software; you can redistribute it and/or modify
 
17
 * it under the terms of the GNU General Public License, version 2, as
 
18
 * published by the Free Software Foundation.
 
19
 */
 
20
 
 
21
#include <linux/kvm_host.h>
 
22
#include <linux/err.h>
 
23
#include <linux/slab.h>
 
24
#include <linux/preempt.h>
 
25
#include <linux/sched.h>
 
26
#include <linux/delay.h>
 
27
#include <linux/export.h>
 
28
#include <linux/fs.h>
 
29
#include <linux/anon_inodes.h>
 
30
#include <linux/cpumask.h>
 
31
#include <linux/spinlock.h>
 
32
#include <linux/page-flags.h>
 
33
 
 
34
#include <asm/reg.h>
 
35
#include <asm/cputable.h>
 
36
#include <asm/cacheflush.h>
 
37
#include <asm/tlbflush.h>
 
38
#include <asm/uaccess.h>
 
39
#include <asm/io.h>
 
40
#include <asm/kvm_ppc.h>
 
41
#include <asm/kvm_book3s.h>
 
42
#include <asm/mmu_context.h>
 
43
#include <asm/lppaca.h>
 
44
#include <asm/processor.h>
 
45
#include <asm/cputhreads.h>
 
46
#include <asm/page.h>
 
47
#include <asm/hvcall.h>
 
48
#include <linux/gfp.h>
 
49
#include <linux/sched.h>
 
50
#include <linux/vmalloc.h>
 
51
#include <linux/highmem.h>
 
52
 
 
53
/*
 
54
 * For now, limit memory to 64GB and require it to be large pages.
 
55
 * This value is chosen because it makes the ram_pginfo array be
 
56
 * 64kB in size, which is about as large as we want to be trying
 
57
 * to allocate with kmalloc.
 
58
 */
 
59
#define MAX_MEM_ORDER           36
 
60
 
 
61
#define LARGE_PAGE_ORDER        24      /* 16MB pages */
 
62
 
 
63
/* #define EXIT_DEBUG */
 
64
/* #define EXIT_DEBUG_SIMPLE */
 
65
/* #define EXIT_DEBUG_INT */
 
66
 
 
67
static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
 
68
 
 
69
void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
70
{
 
71
        local_paca->kvm_hstate.kvm_vcpu = vcpu;
 
72
        local_paca->kvm_hstate.kvm_vcore = vcpu->arch.vcore;
 
73
}
 
74
 
 
75
void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
 
76
{
 
77
}
 
78
 
 
79
void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
 
80
{
 
81
        vcpu->arch.shregs.msr = msr;
 
82
        kvmppc_end_cede(vcpu);
 
83
}
 
84
 
 
85
void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
 
86
{
 
87
        vcpu->arch.pvr = pvr;
 
88
}
 
89
 
 
90
void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
 
91
{
 
92
        int r;
 
93
 
 
94
        pr_err("vcpu %p (%d):\n", vcpu, vcpu->vcpu_id);
 
95
        pr_err("pc  = %.16lx  msr = %.16llx  trap = %x\n",
 
96
               vcpu->arch.pc, vcpu->arch.shregs.msr, vcpu->arch.trap);
 
97
        for (r = 0; r < 16; ++r)
 
98
                pr_err("r%2d = %.16lx  r%d = %.16lx\n",
 
99
                       r, kvmppc_get_gpr(vcpu, r),
 
100
                       r+16, kvmppc_get_gpr(vcpu, r+16));
 
101
        pr_err("ctr = %.16lx  lr  = %.16lx\n",
 
102
               vcpu->arch.ctr, vcpu->arch.lr);
 
103
        pr_err("srr0 = %.16llx srr1 = %.16llx\n",
 
104
               vcpu->arch.shregs.srr0, vcpu->arch.shregs.srr1);
 
105
        pr_err("sprg0 = %.16llx sprg1 = %.16llx\n",
 
106
               vcpu->arch.shregs.sprg0, vcpu->arch.shregs.sprg1);
 
107
        pr_err("sprg2 = %.16llx sprg3 = %.16llx\n",
 
108
               vcpu->arch.shregs.sprg2, vcpu->arch.shregs.sprg3);
 
109
        pr_err("cr = %.8x  xer = %.16lx  dsisr = %.8x\n",
 
110
               vcpu->arch.cr, vcpu->arch.xer, vcpu->arch.shregs.dsisr);
 
111
        pr_err("dar = %.16llx\n", vcpu->arch.shregs.dar);
 
112
        pr_err("fault dar = %.16lx dsisr = %.8x\n",
 
113
               vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
 
114
        pr_err("SLB (%d entries):\n", vcpu->arch.slb_max);
 
115
        for (r = 0; r < vcpu->arch.slb_max; ++r)
 
116
                pr_err("  ESID = %.16llx VSID = %.16llx\n",
 
117
                       vcpu->arch.slb[r].orige, vcpu->arch.slb[r].origv);
 
118
        pr_err("lpcr = %.16lx sdr1 = %.16lx last_inst = %.8x\n",
 
119
               vcpu->kvm->arch.lpcr, vcpu->kvm->arch.sdr1,
 
120
               vcpu->arch.last_inst);
 
121
}
 
122
 
 
123
struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id)
 
124
{
 
125
        int r;
 
126
        struct kvm_vcpu *v, *ret = NULL;
 
127
 
 
128
        mutex_lock(&kvm->lock);
 
129
        kvm_for_each_vcpu(r, v, kvm) {
 
130
                if (v->vcpu_id == id) {
 
131
                        ret = v;
 
132
                        break;
 
133
                }
 
134
        }
 
135
        mutex_unlock(&kvm->lock);
 
136
        return ret;
 
137
}
 
138
 
 
139
static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa)
 
140
{
 
141
        vpa->shared_proc = 1;
 
142
        vpa->yield_count = 1;
 
143
}
 
144
 
 
145
static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
 
146
                                       unsigned long flags,
 
147
                                       unsigned long vcpuid, unsigned long vpa)
 
148
{
 
149
        struct kvm *kvm = vcpu->kvm;
 
150
        unsigned long pg_index, ra, len;
 
151
        unsigned long pg_offset;
 
152
        void *va;
 
153
        struct kvm_vcpu *tvcpu;
 
154
 
 
155
        tvcpu = kvmppc_find_vcpu(kvm, vcpuid);
 
156
        if (!tvcpu)
 
157
                return H_PARAMETER;
 
158
 
 
159
        flags >>= 63 - 18;
 
160
        flags &= 7;
 
161
        if (flags == 0 || flags == 4)
 
162
                return H_PARAMETER;
 
163
        if (flags < 4) {
 
164
                if (vpa & 0x7f)
 
165
                        return H_PARAMETER;
 
166
                /* registering new area; convert logical addr to real */
 
167
                pg_index = vpa >> kvm->arch.ram_porder;
 
168
                pg_offset = vpa & (kvm->arch.ram_psize - 1);
 
169
                if (pg_index >= kvm->arch.ram_npages)
 
170
                        return H_PARAMETER;
 
171
                if (kvm->arch.ram_pginfo[pg_index].pfn == 0)
 
172
                        return H_PARAMETER;
 
173
                ra = kvm->arch.ram_pginfo[pg_index].pfn << PAGE_SHIFT;
 
174
                ra |= pg_offset;
 
175
                va = __va(ra);
 
176
                if (flags <= 1)
 
177
                        len = *(unsigned short *)(va + 4);
 
178
                else
 
179
                        len = *(unsigned int *)(va + 4);
 
180
                if (pg_offset + len > kvm->arch.ram_psize)
 
181
                        return H_PARAMETER;
 
182
                switch (flags) {
 
183
                case 1:         /* register VPA */
 
184
                        if (len < 640)
 
185
                                return H_PARAMETER;
 
186
                        tvcpu->arch.vpa = va;
 
187
                        init_vpa(vcpu, va);
 
188
                        break;
 
189
                case 2:         /* register DTL */
 
190
                        if (len < 48)
 
191
                                return H_PARAMETER;
 
192
                        if (!tvcpu->arch.vpa)
 
193
                                return H_RESOURCE;
 
194
                        len -= len % 48;
 
195
                        tvcpu->arch.dtl = va;
 
196
                        tvcpu->arch.dtl_end = va + len;
 
197
                        break;
 
198
                case 3:         /* register SLB shadow buffer */
 
199
                        if (len < 8)
 
200
                                return H_PARAMETER;
 
201
                        if (!tvcpu->arch.vpa)
 
202
                                return H_RESOURCE;
 
203
                        tvcpu->arch.slb_shadow = va;
 
204
                        len = (len - 16) / 16;
 
205
                        tvcpu->arch.slb_shadow = va;
 
206
                        break;
 
207
                }
 
208
        } else {
 
209
                switch (flags) {
 
210
                case 5:         /* unregister VPA */
 
211
                        if (tvcpu->arch.slb_shadow || tvcpu->arch.dtl)
 
212
                                return H_RESOURCE;
 
213
                        tvcpu->arch.vpa = NULL;
 
214
                        break;
 
215
                case 6:         /* unregister DTL */
 
216
                        tvcpu->arch.dtl = NULL;
 
217
                        break;
 
218
                case 7:         /* unregister SLB shadow buffer */
 
219
                        tvcpu->arch.slb_shadow = NULL;
 
220
                        break;
 
221
                }
 
222
        }
 
223
        return H_SUCCESS;
 
224
}
 
225
 
 
226
int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 
227
{
 
228
        unsigned long req = kvmppc_get_gpr(vcpu, 3);
 
229
        unsigned long target, ret = H_SUCCESS;
 
230
        struct kvm_vcpu *tvcpu;
 
231
 
 
232
        switch (req) {
 
233
        case H_CEDE:
 
234
                break;
 
235
        case H_PROD:
 
236
                target = kvmppc_get_gpr(vcpu, 4);
 
237
                tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
 
238
                if (!tvcpu) {
 
239
                        ret = H_PARAMETER;
 
240
                        break;
 
241
                }
 
242
                tvcpu->arch.prodded = 1;
 
243
                smp_mb();
 
244
                if (vcpu->arch.ceded) {
 
245
                        if (waitqueue_active(&vcpu->wq)) {
 
246
                                wake_up_interruptible(&vcpu->wq);
 
247
                                vcpu->stat.halt_wakeup++;
 
248
                        }
 
249
                }
 
250
                break;
 
251
        case H_CONFER:
 
252
                break;
 
253
        case H_REGISTER_VPA:
 
254
                ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4),
 
255
                                        kvmppc_get_gpr(vcpu, 5),
 
256
                                        kvmppc_get_gpr(vcpu, 6));
 
257
                break;
 
258
        default:
 
259
                return RESUME_HOST;
 
260
        }
 
261
        kvmppc_set_gpr(vcpu, 3, ret);
 
262
        vcpu->arch.hcall_needed = 0;
 
263
        return RESUME_GUEST;
 
264
}
 
265
 
 
266
static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
267
                              struct task_struct *tsk)
 
268
{
 
269
        int r = RESUME_HOST;
 
270
 
 
271
        vcpu->stat.sum_exits++;
 
272
 
 
273
        run->exit_reason = KVM_EXIT_UNKNOWN;
 
274
        run->ready_for_interrupt_injection = 1;
 
275
        switch (vcpu->arch.trap) {
 
276
        /* We're good on these - the host merely wanted to get our attention */
 
277
        case BOOK3S_INTERRUPT_HV_DECREMENTER:
 
278
                vcpu->stat.dec_exits++;
 
279
                r = RESUME_GUEST;
 
280
                break;
 
281
        case BOOK3S_INTERRUPT_EXTERNAL:
 
282
                vcpu->stat.ext_intr_exits++;
 
283
                r = RESUME_GUEST;
 
284
                break;
 
285
        case BOOK3S_INTERRUPT_PERFMON:
 
286
                r = RESUME_GUEST;
 
287
                break;
 
288
        case BOOK3S_INTERRUPT_PROGRAM:
 
289
        {
 
290
                ulong flags;
 
291
                /*
 
292
                 * Normally program interrupts are delivered directly
 
293
                 * to the guest by the hardware, but we can get here
 
294
                 * as a result of a hypervisor emulation interrupt
 
295
                 * (e40) getting turned into a 700 by BML RTAS.
 
296
                 */
 
297
                flags = vcpu->arch.shregs.msr & 0x1f0000ull;
 
298
                kvmppc_core_queue_program(vcpu, flags);
 
299
                r = RESUME_GUEST;
 
300
                break;
 
301
        }
 
302
        case BOOK3S_INTERRUPT_SYSCALL:
 
303
        {
 
304
                /* hcall - punt to userspace */
 
305
                int i;
 
306
 
 
307
                if (vcpu->arch.shregs.msr & MSR_PR) {
 
308
                        /* sc 1 from userspace - reflect to guest syscall */
 
309
                        kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_SYSCALL);
 
310
                        r = RESUME_GUEST;
 
311
                        break;
 
312
                }
 
313
                run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3);
 
314
                for (i = 0; i < 9; ++i)
 
315
                        run->papr_hcall.args[i] = kvmppc_get_gpr(vcpu, 4 + i);
 
316
                run->exit_reason = KVM_EXIT_PAPR_HCALL;
 
317
                vcpu->arch.hcall_needed = 1;
 
318
                r = RESUME_HOST;
 
319
                break;
 
320
        }
 
321
        /*
 
322
         * We get these next two if the guest does a bad real-mode access,
 
323
         * as we have enabled VRMA (virtualized real mode area) mode in the
 
324
         * LPCR.  We just generate an appropriate DSI/ISI to the guest.
 
325
         */
 
326
        case BOOK3S_INTERRUPT_H_DATA_STORAGE:
 
327
                vcpu->arch.shregs.dsisr = vcpu->arch.fault_dsisr;
 
328
                vcpu->arch.shregs.dar = vcpu->arch.fault_dar;
 
329
                kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE, 0);
 
330
                r = RESUME_GUEST;
 
331
                break;
 
332
        case BOOK3S_INTERRUPT_H_INST_STORAGE:
 
333
                kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_INST_STORAGE,
 
334
                                        0x08000000);
 
335
                r = RESUME_GUEST;
 
336
                break;
 
337
        /*
 
338
         * This occurs if the guest executes an illegal instruction.
 
339
         * We just generate a program interrupt to the guest, since
 
340
         * we don't emulate any guest instructions at this stage.
 
341
         */
 
342
        case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
 
343
                kvmppc_core_queue_program(vcpu, 0x80000);
 
344
                r = RESUME_GUEST;
 
345
                break;
 
346
        default:
 
347
                kvmppc_dump_regs(vcpu);
 
348
                printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
 
349
                        vcpu->arch.trap, kvmppc_get_pc(vcpu),
 
350
                        vcpu->arch.shregs.msr);
 
351
                r = RESUME_HOST;
 
352
                BUG();
 
353
                break;
 
354
        }
 
355
 
 
356
        return r;
 
357
}
 
358
 
 
359
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 
360
                                  struct kvm_sregs *sregs)
 
361
{
 
362
        int i;
 
363
 
 
364
        sregs->pvr = vcpu->arch.pvr;
 
365
 
 
366
        memset(sregs, 0, sizeof(struct kvm_sregs));
 
367
        for (i = 0; i < vcpu->arch.slb_max; i++) {
 
368
                sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige;
 
369
                sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv;
 
370
        }
 
371
 
 
372
        return 0;
 
373
}
 
374
 
 
375
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 
376
                                  struct kvm_sregs *sregs)
 
377
{
 
378
        int i, j;
 
379
 
 
380
        kvmppc_set_pvr(vcpu, sregs->pvr);
 
381
 
 
382
        j = 0;
 
383
        for (i = 0; i < vcpu->arch.slb_nr; i++) {
 
384
                if (sregs->u.s.ppc64.slb[i].slbe & SLB_ESID_V) {
 
385
                        vcpu->arch.slb[j].orige = sregs->u.s.ppc64.slb[i].slbe;
 
386
                        vcpu->arch.slb[j].origv = sregs->u.s.ppc64.slb[i].slbv;
 
387
                        ++j;
 
388
                }
 
389
        }
 
390
        vcpu->arch.slb_max = j;
 
391
 
 
392
        return 0;
 
393
}
 
394
 
 
395
int kvmppc_core_check_processor_compat(void)
 
396
{
 
397
        if (cpu_has_feature(CPU_FTR_HVMODE))
 
398
                return 0;
 
399
        return -EIO;
 
400
}
 
401
 
 
402
struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 
403
{
 
404
        struct kvm_vcpu *vcpu;
 
405
        int err = -EINVAL;
 
406
        int core;
 
407
        struct kvmppc_vcore *vcore;
 
408
 
 
409
        core = id / threads_per_core;
 
410
        if (core >= KVM_MAX_VCORES)
 
411
                goto out;
 
412
 
 
413
        err = -ENOMEM;
 
414
        vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
 
415
        if (!vcpu)
 
416
                goto out;
 
417
 
 
418
        err = kvm_vcpu_init(vcpu, kvm, id);
 
419
        if (err)
 
420
                goto free_vcpu;
 
421
 
 
422
        vcpu->arch.shared = &vcpu->arch.shregs;
 
423
        vcpu->arch.last_cpu = -1;
 
424
        vcpu->arch.mmcr[0] = MMCR0_FC;
 
425
        vcpu->arch.ctrl = CTRL_RUNLATCH;
 
426
        /* default to host PVR, since we can't spoof it */
 
427
        vcpu->arch.pvr = mfspr(SPRN_PVR);
 
428
        kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
 
429
 
 
430
        kvmppc_mmu_book3s_hv_init(vcpu);
 
431
 
 
432
        /*
 
433
         * We consider the vcpu stopped until we see the first run ioctl for it.
 
434
         */
 
435
        vcpu->arch.state = KVMPPC_VCPU_STOPPED;
 
436
 
 
437
        init_waitqueue_head(&vcpu->arch.cpu_run);
 
438
 
 
439
        mutex_lock(&kvm->lock);
 
440
        vcore = kvm->arch.vcores[core];
 
441
        if (!vcore) {
 
442
                vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL);
 
443
                if (vcore) {
 
444
                        INIT_LIST_HEAD(&vcore->runnable_threads);
 
445
                        spin_lock_init(&vcore->lock);
 
446
                        init_waitqueue_head(&vcore->wq);
 
447
                }
 
448
                kvm->arch.vcores[core] = vcore;
 
449
        }
 
450
        mutex_unlock(&kvm->lock);
 
451
 
 
452
        if (!vcore)
 
453
                goto free_vcpu;
 
454
 
 
455
        spin_lock(&vcore->lock);
 
456
        ++vcore->num_threads;
 
457
        spin_unlock(&vcore->lock);
 
458
        vcpu->arch.vcore = vcore;
 
459
 
 
460
        vcpu->arch.cpu_type = KVM_CPU_3S_64;
 
461
        kvmppc_sanity_check(vcpu);
 
462
 
 
463
        return vcpu;
 
464
 
 
465
free_vcpu:
 
466
        kfree(vcpu);
 
467
out:
 
468
        return ERR_PTR(err);
 
469
}
 
470
 
 
471
void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
 
472
{
 
473
        kvm_vcpu_uninit(vcpu);
 
474
        kfree(vcpu);
 
475
}
 
476
 
 
477
static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
 
478
{
 
479
        unsigned long dec_nsec, now;
 
480
 
 
481
        now = get_tb();
 
482
        if (now > vcpu->arch.dec_expires) {
 
483
                /* decrementer has already gone negative */
 
484
                kvmppc_core_queue_dec(vcpu);
 
485
                kvmppc_core_deliver_interrupts(vcpu);
 
486
                return;
 
487
        }
 
488
        dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC
 
489
                   / tb_ticks_per_sec;
 
490
        hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec),
 
491
                      HRTIMER_MODE_REL);
 
492
        vcpu->arch.timer_running = 1;
 
493
}
 
494
 
 
495
static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
 
496
{
 
497
        vcpu->arch.ceded = 0;
 
498
        if (vcpu->arch.timer_running) {
 
499
                hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
 
500
                vcpu->arch.timer_running = 0;
 
501
        }
 
502
}
 
503
 
 
504
extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
 
505
extern void xics_wake_cpu(int cpu);
 
506
 
 
507
static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
 
508
                                   struct kvm_vcpu *vcpu)
 
509
{
 
510
        struct kvm_vcpu *v;
 
511
 
 
512
        if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
 
513
                return;
 
514
        vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
 
515
        --vc->n_runnable;
 
516
        ++vc->n_busy;
 
517
        /* decrement the physical thread id of each following vcpu */
 
518
        v = vcpu;
 
519
        list_for_each_entry_continue(v, &vc->runnable_threads, arch.run_list)
 
520
                --v->arch.ptid;
 
521
        list_del(&vcpu->arch.run_list);
 
522
}
 
523
 
 
524
static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
 
525
{
 
526
        int cpu;
 
527
        struct paca_struct *tpaca;
 
528
        struct kvmppc_vcore *vc = vcpu->arch.vcore;
 
529
 
 
530
        if (vcpu->arch.timer_running) {
 
531
                hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
 
532
                vcpu->arch.timer_running = 0;
 
533
        }
 
534
        cpu = vc->pcpu + vcpu->arch.ptid;
 
535
        tpaca = &paca[cpu];
 
536
        tpaca->kvm_hstate.kvm_vcpu = vcpu;
 
537
        tpaca->kvm_hstate.kvm_vcore = vc;
 
538
        tpaca->kvm_hstate.napping = 0;
 
539
        vcpu->cpu = vc->pcpu;
 
540
        smp_wmb();
 
541
#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
 
542
        if (vcpu->arch.ptid) {
 
543
                tpaca->cpu_start = 0x80;
 
544
                wmb();
 
545
                xics_wake_cpu(cpu);
 
546
                ++vc->n_woken;
 
547
        }
 
548
#endif
 
549
}
 
550
 
 
551
static void kvmppc_wait_for_nap(struct kvmppc_vcore *vc)
 
552
{
 
553
        int i;
 
554
 
 
555
        HMT_low();
 
556
        i = 0;
 
557
        while (vc->nap_count < vc->n_woken) {
 
558
                if (++i >= 1000000) {
 
559
                        pr_err("kvmppc_wait_for_nap timeout %d %d\n",
 
560
                               vc->nap_count, vc->n_woken);
 
561
                        break;
 
562
                }
 
563
                cpu_relax();
 
564
        }
 
565
        HMT_medium();
 
566
}
 
567
 
 
568
/*
 
569
 * Check that we are on thread 0 and that any other threads in
 
570
 * this core are off-line.
 
571
 */
 
572
static int on_primary_thread(void)
 
573
{
 
574
        int cpu = smp_processor_id();
 
575
        int thr = cpu_thread_in_core(cpu);
 
576
 
 
577
        if (thr)
 
578
                return 0;
 
579
        while (++thr < threads_per_core)
 
580
                if (cpu_online(cpu + thr))
 
581
                        return 0;
 
582
        return 1;
 
583
}
 
584
 
 
585
/*
 
586
 * Run a set of guest threads on a physical core.
 
587
 * Called with vc->lock held.
 
588
 */
 
589
static int kvmppc_run_core(struct kvmppc_vcore *vc)
 
590
{
 
591
        struct kvm_vcpu *vcpu, *vcpu0, *vnext;
 
592
        long ret;
 
593
        u64 now;
 
594
        int ptid;
 
595
 
 
596
        /* don't start if any threads have a signal pending */
 
597
        list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
 
598
                if (signal_pending(vcpu->arch.run_task))
 
599
                        return 0;
 
600
 
 
601
        /*
 
602
         * Make sure we are running on thread 0, and that
 
603
         * secondary threads are offline.
 
604
         * XXX we should also block attempts to bring any
 
605
         * secondary threads online.
 
606
         */
 
607
        if (threads_per_core > 1 && !on_primary_thread()) {
 
608
                list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
 
609
                        vcpu->arch.ret = -EBUSY;
 
610
                goto out;
 
611
        }
 
612
 
 
613
        /*
 
614
         * Assign physical thread IDs, first to non-ceded vcpus
 
615
         * and then to ceded ones.
 
616
         */
 
617
        ptid = 0;
 
618
        vcpu0 = NULL;
 
619
        list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
 
620
                if (!vcpu->arch.ceded) {
 
621
                        if (!ptid)
 
622
                                vcpu0 = vcpu;
 
623
                        vcpu->arch.ptid = ptid++;
 
624
                }
 
625
        }
 
626
        if (!vcpu0)
 
627
                return 0;               /* nothing to run */
 
628
        list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
 
629
                if (vcpu->arch.ceded)
 
630
                        vcpu->arch.ptid = ptid++;
 
631
 
 
632
        vc->n_woken = 0;
 
633
        vc->nap_count = 0;
 
634
        vc->entry_exit_count = 0;
 
635
        vc->vcore_state = VCORE_RUNNING;
 
636
        vc->in_guest = 0;
 
637
        vc->pcpu = smp_processor_id();
 
638
        vc->napping_threads = 0;
 
639
        list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
 
640
                kvmppc_start_thread(vcpu);
 
641
 
 
642
        preempt_disable();
 
643
        spin_unlock(&vc->lock);
 
644
 
 
645
        kvm_guest_enter();
 
646
        __kvmppc_vcore_entry(NULL, vcpu0);
 
647
 
 
648
        spin_lock(&vc->lock);
 
649
        /* disable sending of IPIs on virtual external irqs */
 
650
        list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
 
651
                vcpu->cpu = -1;
 
652
        /* wait for secondary threads to finish writing their state to memory */
 
653
        if (vc->nap_count < vc->n_woken)
 
654
                kvmppc_wait_for_nap(vc);
 
655
        /* prevent other vcpu threads from doing kvmppc_start_thread() now */
 
656
        vc->vcore_state = VCORE_EXITING;
 
657
        spin_unlock(&vc->lock);
 
658
 
 
659
        /* make sure updates to secondary vcpu structs are visible now */
 
660
        smp_mb();
 
661
        kvm_guest_exit();
 
662
 
 
663
        preempt_enable();
 
664
        kvm_resched(vcpu);
 
665
 
 
666
        now = get_tb();
 
667
        list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
 
668
                /* cancel pending dec exception if dec is positive */
 
669
                if (now < vcpu->arch.dec_expires &&
 
670
                    kvmppc_core_pending_dec(vcpu))
 
671
                        kvmppc_core_dequeue_dec(vcpu);
 
672
 
 
673
                ret = RESUME_GUEST;
 
674
                if (vcpu->arch.trap)
 
675
                        ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu,
 
676
                                                 vcpu->arch.run_task);
 
677
 
 
678
                vcpu->arch.ret = ret;
 
679
                vcpu->arch.trap = 0;
 
680
 
 
681
                if (vcpu->arch.ceded) {
 
682
                        if (ret != RESUME_GUEST)
 
683
                                kvmppc_end_cede(vcpu);
 
684
                        else
 
685
                                kvmppc_set_timer(vcpu);
 
686
                }
 
687
        }
 
688
 
 
689
        spin_lock(&vc->lock);
 
690
 out:
 
691
        vc->vcore_state = VCORE_INACTIVE;
 
692
        list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
 
693
                                 arch.run_list) {
 
694
                if (vcpu->arch.ret != RESUME_GUEST) {
 
695
                        kvmppc_remove_runnable(vc, vcpu);
 
696
                        wake_up(&vcpu->arch.cpu_run);
 
697
                }
 
698
        }
 
699
 
 
700
        return 1;
 
701
}
 
702
 
 
703
/*
 
704
 * Wait for some other vcpu thread to execute us, and
 
705
 * wake us up when we need to handle something in the host.
 
706
 */
 
707
static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state)
 
708
{
 
709
        DEFINE_WAIT(wait);
 
710
 
 
711
        prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state);
 
712
        if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE)
 
713
                schedule();
 
714
        finish_wait(&vcpu->arch.cpu_run, &wait);
 
715
}
 
716
 
 
717
/*
 
718
 * All the vcpus in this vcore are idle, so wait for a decrementer
 
719
 * or external interrupt to one of the vcpus.  vc->lock is held.
 
720
 */
 
721
static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
 
722
{
 
723
        DEFINE_WAIT(wait);
 
724
        struct kvm_vcpu *v;
 
725
        int all_idle = 1;
 
726
 
 
727
        prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
 
728
        vc->vcore_state = VCORE_SLEEPING;
 
729
        spin_unlock(&vc->lock);
 
730
        list_for_each_entry(v, &vc->runnable_threads, arch.run_list) {
 
731
                if (!v->arch.ceded || v->arch.pending_exceptions) {
 
732
                        all_idle = 0;
 
733
                        break;
 
734
                }
 
735
        }
 
736
        if (all_idle)
 
737
                schedule();
 
738
        finish_wait(&vc->wq, &wait);
 
739
        spin_lock(&vc->lock);
 
740
        vc->vcore_state = VCORE_INACTIVE;
 
741
}
 
742
 
 
743
static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
744
{
 
745
        int n_ceded;
 
746
        int prev_state;
 
747
        struct kvmppc_vcore *vc;
 
748
        struct kvm_vcpu *v, *vn;
 
749
 
 
750
        kvm_run->exit_reason = 0;
 
751
        vcpu->arch.ret = RESUME_GUEST;
 
752
        vcpu->arch.trap = 0;
 
753
 
 
754
        /*
 
755
         * Synchronize with other threads in this virtual core
 
756
         */
 
757
        vc = vcpu->arch.vcore;
 
758
        spin_lock(&vc->lock);
 
759
        vcpu->arch.ceded = 0;
 
760
        vcpu->arch.run_task = current;
 
761
        vcpu->arch.kvm_run = kvm_run;
 
762
        prev_state = vcpu->arch.state;
 
763
        vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
 
764
        list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads);
 
765
        ++vc->n_runnable;
 
766
 
 
767
        /*
 
768
         * This happens the first time this is called for a vcpu.
 
769
         * If the vcore is already running, we may be able to start
 
770
         * this thread straight away and have it join in.
 
771
         */
 
772
        if (prev_state == KVMPPC_VCPU_STOPPED) {
 
773
                if (vc->vcore_state == VCORE_RUNNING &&
 
774
                    VCORE_EXIT_COUNT(vc) == 0) {
 
775
                        vcpu->arch.ptid = vc->n_runnable - 1;
 
776
                        kvmppc_start_thread(vcpu);
 
777
                }
 
778
 
 
779
        } else if (prev_state == KVMPPC_VCPU_BUSY_IN_HOST)
 
780
                --vc->n_busy;
 
781
 
 
782
        while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
 
783
               !signal_pending(current)) {
 
784
                if (vc->n_busy || vc->vcore_state != VCORE_INACTIVE) {
 
785
                        spin_unlock(&vc->lock);
 
786
                        kvmppc_wait_for_exec(vcpu, TASK_INTERRUPTIBLE);
 
787
                        spin_lock(&vc->lock);
 
788
                        continue;
 
789
                }
 
790
                n_ceded = 0;
 
791
                list_for_each_entry(v, &vc->runnable_threads, arch.run_list)
 
792
                        n_ceded += v->arch.ceded;
 
793
                if (n_ceded == vc->n_runnable)
 
794
                        kvmppc_vcore_blocked(vc);
 
795
                else
 
796
                        kvmppc_run_core(vc);
 
797
 
 
798
                list_for_each_entry_safe(v, vn, &vc->runnable_threads,
 
799
                                         arch.run_list) {
 
800
                        kvmppc_core_deliver_interrupts(v);
 
801
                        if (signal_pending(v->arch.run_task)) {
 
802
                                kvmppc_remove_runnable(vc, v);
 
803
                                v->stat.signal_exits++;
 
804
                                v->arch.kvm_run->exit_reason = KVM_EXIT_INTR;
 
805
                                v->arch.ret = -EINTR;
 
806
                                wake_up(&v->arch.cpu_run);
 
807
                        }
 
808
                }
 
809
        }
 
810
 
 
811
        if (signal_pending(current)) {
 
812
                if (vc->vcore_state == VCORE_RUNNING ||
 
813
                    vc->vcore_state == VCORE_EXITING) {
 
814
                        spin_unlock(&vc->lock);
 
815
                        kvmppc_wait_for_exec(vcpu, TASK_UNINTERRUPTIBLE);
 
816
                        spin_lock(&vc->lock);
 
817
                }
 
818
                if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
 
819
                        kvmppc_remove_runnable(vc, vcpu);
 
820
                        vcpu->stat.signal_exits++;
 
821
                        kvm_run->exit_reason = KVM_EXIT_INTR;
 
822
                        vcpu->arch.ret = -EINTR;
 
823
                }
 
824
        }
 
825
 
 
826
        spin_unlock(&vc->lock);
 
827
        return vcpu->arch.ret;
 
828
}
 
829
 
 
830
int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
831
{
 
832
        int r;
 
833
 
 
834
        if (!vcpu->arch.sane) {
 
835
                run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
 
836
                return -EINVAL;
 
837
        }
 
838
 
 
839
        /* No need to go into the guest when all we'll do is come back out */
 
840
        if (signal_pending(current)) {
 
841
                run->exit_reason = KVM_EXIT_INTR;
 
842
                return -EINTR;
 
843
        }
 
844
 
 
845
        /* On PPC970, check that we have an RMA region */
 
846
        if (!vcpu->kvm->arch.rma && cpu_has_feature(CPU_FTR_ARCH_201))
 
847
                return -EPERM;
 
848
 
 
849
        flush_fp_to_thread(current);
 
850
        flush_altivec_to_thread(current);
 
851
        flush_vsx_to_thread(current);
 
852
        vcpu->arch.wqp = &vcpu->arch.vcore->wq;
 
853
 
 
854
        do {
 
855
                r = kvmppc_run_vcpu(run, vcpu);
 
856
 
 
857
                if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
 
858
                    !(vcpu->arch.shregs.msr & MSR_PR)) {
 
859
                        r = kvmppc_pseries_do_hcall(vcpu);
 
860
                        kvmppc_core_deliver_interrupts(vcpu);
 
861
                }
 
862
        } while (r == RESUME_GUEST);
 
863
        return r;
 
864
}
 
865
 
 
866
static long kvmppc_stt_npages(unsigned long window_size)
 
867
{
 
868
        return ALIGN((window_size >> SPAPR_TCE_SHIFT)
 
869
                     * sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
 
870
}
 
871
 
 
872
static void release_spapr_tce_table(struct kvmppc_spapr_tce_table *stt)
 
873
{
 
874
        struct kvm *kvm = stt->kvm;
 
875
        int i;
 
876
 
 
877
        mutex_lock(&kvm->lock);
 
878
        list_del(&stt->list);
 
879
        for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++)
 
880
                __free_page(stt->pages[i]);
 
881
        kfree(stt);
 
882
        mutex_unlock(&kvm->lock);
 
883
 
 
884
        kvm_put_kvm(kvm);
 
885
}
 
886
 
 
887
static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
888
{
 
889
        struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data;
 
890
        struct page *page;
 
891
 
 
892
        if (vmf->pgoff >= kvmppc_stt_npages(stt->window_size))
 
893
                return VM_FAULT_SIGBUS;
 
894
 
 
895
        page = stt->pages[vmf->pgoff];
 
896
        get_page(page);
 
897
        vmf->page = page;
 
898
        return 0;
 
899
}
 
900
 
 
901
static const struct vm_operations_struct kvm_spapr_tce_vm_ops = {
 
902
        .fault = kvm_spapr_tce_fault,
 
903
};
 
904
 
 
905
static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma)
 
906
{
 
907
        vma->vm_ops = &kvm_spapr_tce_vm_ops;
 
908
        return 0;
 
909
}
 
910
 
 
911
static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
 
912
{
 
913
        struct kvmppc_spapr_tce_table *stt = filp->private_data;
 
914
 
 
915
        release_spapr_tce_table(stt);
 
916
        return 0;
 
917
}
 
918
 
 
919
static struct file_operations kvm_spapr_tce_fops = {
 
920
        .mmap           = kvm_spapr_tce_mmap,
 
921
        .release        = kvm_spapr_tce_release,
 
922
};
 
923
 
 
924
long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
 
925
                                   struct kvm_create_spapr_tce *args)
 
926
{
 
927
        struct kvmppc_spapr_tce_table *stt = NULL;
 
928
        long npages;
 
929
        int ret = -ENOMEM;
 
930
        int i;
 
931
 
 
932
        /* Check this LIOBN hasn't been previously allocated */
 
933
        list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
 
934
                if (stt->liobn == args->liobn)
 
935
                        return -EBUSY;
 
936
        }
 
937
 
 
938
        npages = kvmppc_stt_npages(args->window_size);
 
939
 
 
940
        stt = kzalloc(sizeof(*stt) + npages* sizeof(struct page *),
 
941
                      GFP_KERNEL);
 
942
        if (!stt)
 
943
                goto fail;
 
944
 
 
945
        stt->liobn = args->liobn;
 
946
        stt->window_size = args->window_size;
 
947
        stt->kvm = kvm;
 
948
 
 
949
        for (i = 0; i < npages; i++) {
 
950
                stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
 
951
                if (!stt->pages[i])
 
952
                        goto fail;
 
953
        }
 
954
 
 
955
        kvm_get_kvm(kvm);
 
956
 
 
957
        mutex_lock(&kvm->lock);
 
958
        list_add(&stt->list, &kvm->arch.spapr_tce_tables);
 
959
 
 
960
        mutex_unlock(&kvm->lock);
 
961
 
 
962
        return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
 
963
                                stt, O_RDWR);
 
964
 
 
965
fail:
 
966
        if (stt) {
 
967
                for (i = 0; i < npages; i++)
 
968
                        if (stt->pages[i])
 
969
                                __free_page(stt->pages[i]);
 
970
 
 
971
                kfree(stt);
 
972
        }
 
973
        return ret;
 
974
}
 
975
 
 
976
/* Work out RMLS (real mode limit selector) field value for a given RMA size.
 
977
   Assumes POWER7 or PPC970. */
 
978
static inline int lpcr_rmls(unsigned long rma_size)
 
979
{
 
980
        switch (rma_size) {
 
981
        case 32ul << 20:        /* 32 MB */
 
982
                if (cpu_has_feature(CPU_FTR_ARCH_206))
 
983
                        return 8;       /* only supported on POWER7 */
 
984
                return -1;
 
985
        case 64ul << 20:        /* 64 MB */
 
986
                return 3;
 
987
        case 128ul << 20:       /* 128 MB */
 
988
                return 7;
 
989
        case 256ul << 20:       /* 256 MB */
 
990
                return 4;
 
991
        case 1ul << 30:         /* 1 GB */
 
992
                return 2;
 
993
        case 16ul << 30:        /* 16 GB */
 
994
                return 1;
 
995
        case 256ul << 30:       /* 256 GB */
 
996
                return 0;
 
997
        default:
 
998
                return -1;
 
999
        }
 
1000
}
 
1001
 
 
1002
static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
1003
{
 
1004
        struct kvmppc_rma_info *ri = vma->vm_file->private_data;
 
1005
        struct page *page;
 
1006
 
 
1007
        if (vmf->pgoff >= ri->npages)
 
1008
                return VM_FAULT_SIGBUS;
 
1009
 
 
1010
        page = pfn_to_page(ri->base_pfn + vmf->pgoff);
 
1011
        get_page(page);
 
1012
        vmf->page = page;
 
1013
        return 0;
 
1014
}
 
1015
 
 
1016
static const struct vm_operations_struct kvm_rma_vm_ops = {
 
1017
        .fault = kvm_rma_fault,
 
1018
};
 
1019
 
 
1020
static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
 
1021
{
 
1022
        vma->vm_flags |= VM_RESERVED;
 
1023
        vma->vm_ops = &kvm_rma_vm_ops;
 
1024
        return 0;
 
1025
}
 
1026
 
 
1027
static int kvm_rma_release(struct inode *inode, struct file *filp)
 
1028
{
 
1029
        struct kvmppc_rma_info *ri = filp->private_data;
 
1030
 
 
1031
        kvm_release_rma(ri);
 
1032
        return 0;
 
1033
}
 
1034
 
 
1035
static struct file_operations kvm_rma_fops = {
 
1036
        .mmap           = kvm_rma_mmap,
 
1037
        .release        = kvm_rma_release,
 
1038
};
 
1039
 
 
1040
long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
 
1041
{
 
1042
        struct kvmppc_rma_info *ri;
 
1043
        long fd;
 
1044
 
 
1045
        ri = kvm_alloc_rma();
 
1046
        if (!ri)
 
1047
                return -ENOMEM;
 
1048
 
 
1049
        fd = anon_inode_getfd("kvm-rma", &kvm_rma_fops, ri, O_RDWR);
 
1050
        if (fd < 0)
 
1051
                kvm_release_rma(ri);
 
1052
 
 
1053
        ret->rma_size = ri->npages << PAGE_SHIFT;
 
1054
        return fd;
 
1055
}
 
1056
 
 
1057
static struct page *hva_to_page(unsigned long addr)
 
1058
{
 
1059
        struct page *page[1];
 
1060
        int npages;
 
1061
 
 
1062
        might_sleep();
 
1063
 
 
1064
        npages = get_user_pages_fast(addr, 1, 1, page);
 
1065
 
 
1066
        if (unlikely(npages != 1))
 
1067
                return 0;
 
1068
 
 
1069
        return page[0];
 
1070
}
 
1071
 
 
1072
int kvmppc_core_prepare_memory_region(struct kvm *kvm,
 
1073
                                struct kvm_userspace_memory_region *mem)
 
1074
{
 
1075
        unsigned long psize, porder;
 
1076
        unsigned long i, npages, totalpages;
 
1077
        unsigned long pg_ix;
 
1078
        struct kvmppc_pginfo *pginfo;
 
1079
        unsigned long hva;
 
1080
        struct kvmppc_rma_info *ri = NULL;
 
1081
        struct page *page;
 
1082
 
 
1083
        /* For now, only allow 16MB pages */
 
1084
        porder = LARGE_PAGE_ORDER;
 
1085
        psize = 1ul << porder;
 
1086
        if ((mem->memory_size & (psize - 1)) ||
 
1087
            (mem->guest_phys_addr & (psize - 1))) {
 
1088
                pr_err("bad memory_size=%llx @ %llx\n",
 
1089
                       mem->memory_size, mem->guest_phys_addr);
 
1090
                return -EINVAL;
 
1091
        }
 
1092
 
 
1093
        npages = mem->memory_size >> porder;
 
1094
        totalpages = (mem->guest_phys_addr + mem->memory_size) >> porder;
 
1095
 
 
1096
        /* More memory than we have space to track? */
 
1097
        if (totalpages > (1ul << (MAX_MEM_ORDER - LARGE_PAGE_ORDER)))
 
1098
                return -EINVAL;
 
1099
 
 
1100
        /* Do we already have an RMA registered? */
 
1101
        if (mem->guest_phys_addr == 0 && kvm->arch.rma)
 
1102
                return -EINVAL;
 
1103
 
 
1104
        if (totalpages > kvm->arch.ram_npages)
 
1105
                kvm->arch.ram_npages = totalpages;
 
1106
 
 
1107
        /* Is this one of our preallocated RMAs? */
 
1108
        if (mem->guest_phys_addr == 0) {
 
1109
                struct vm_area_struct *vma;
 
1110
 
 
1111
                down_read(&current->mm->mmap_sem);
 
1112
                vma = find_vma(current->mm, mem->userspace_addr);
 
1113
                if (vma && vma->vm_file &&
 
1114
                    vma->vm_file->f_op == &kvm_rma_fops &&
 
1115
                    mem->userspace_addr == vma->vm_start)
 
1116
                        ri = vma->vm_file->private_data;
 
1117
                up_read(&current->mm->mmap_sem);
 
1118
                if (!ri && cpu_has_feature(CPU_FTR_ARCH_201)) {
 
1119
                        pr_err("CPU requires an RMO\n");
 
1120
                        return -EINVAL;
 
1121
                }
 
1122
        }
 
1123
 
 
1124
        if (ri) {
 
1125
                unsigned long rma_size;
 
1126
                unsigned long lpcr;
 
1127
                long rmls;
 
1128
 
 
1129
                rma_size = ri->npages << PAGE_SHIFT;
 
1130
                if (rma_size > mem->memory_size)
 
1131
                        rma_size = mem->memory_size;
 
1132
                rmls = lpcr_rmls(rma_size);
 
1133
                if (rmls < 0) {
 
1134
                        pr_err("Can't use RMA of 0x%lx bytes\n", rma_size);
 
1135
                        return -EINVAL;
 
1136
                }
 
1137
                atomic_inc(&ri->use_count);
 
1138
                kvm->arch.rma = ri;
 
1139
                kvm->arch.n_rma_pages = rma_size >> porder;
 
1140
 
 
1141
                /* Update LPCR and RMOR */
 
1142
                lpcr = kvm->arch.lpcr;
 
1143
                if (cpu_has_feature(CPU_FTR_ARCH_201)) {
 
1144
                        /* PPC970; insert RMLS value (split field) in HID4 */
 
1145
                        lpcr &= ~((1ul << HID4_RMLS0_SH) |
 
1146
                                  (3ul << HID4_RMLS2_SH));
 
1147
                        lpcr |= ((rmls >> 2) << HID4_RMLS0_SH) |
 
1148
                                ((rmls & 3) << HID4_RMLS2_SH);
 
1149
                        /* RMOR is also in HID4 */
 
1150
                        lpcr |= ((ri->base_pfn >> (26 - PAGE_SHIFT)) & 0xffff)
 
1151
                                << HID4_RMOR_SH;
 
1152
                } else {
 
1153
                        /* POWER7 */
 
1154
                        lpcr &= ~(LPCR_VPM0 | LPCR_VRMA_L);
 
1155
                        lpcr |= rmls << LPCR_RMLS_SH;
 
1156
                        kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT;
 
1157
                }
 
1158
                kvm->arch.lpcr = lpcr;
 
1159
                pr_info("Using RMO at %lx size %lx (LPCR = %lx)\n",
 
1160
                        ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
 
1161
        }
 
1162
 
 
1163
        pg_ix = mem->guest_phys_addr >> porder;
 
1164
        pginfo = kvm->arch.ram_pginfo + pg_ix;
 
1165
        for (i = 0; i < npages; ++i, ++pg_ix) {
 
1166
                if (ri && pg_ix < kvm->arch.n_rma_pages) {
 
1167
                        pginfo[i].pfn = ri->base_pfn +
 
1168
                                (pg_ix << (porder - PAGE_SHIFT));
 
1169
                        continue;
 
1170
                }
 
1171
                hva = mem->userspace_addr + (i << porder);
 
1172
                page = hva_to_page(hva);
 
1173
                if (!page) {
 
1174
                        pr_err("oops, no pfn for hva %lx\n", hva);
 
1175
                        goto err;
 
1176
                }
 
1177
                /* Check it's a 16MB page */
 
1178
                if (!PageHead(page) ||
 
1179
                    compound_order(page) != (LARGE_PAGE_ORDER - PAGE_SHIFT)) {
 
1180
                        pr_err("page at %lx isn't 16MB (o=%d)\n",
 
1181
                               hva, compound_order(page));
 
1182
                        goto err;
 
1183
                }
 
1184
                pginfo[i].pfn = page_to_pfn(page);
 
1185
        }
 
1186
 
 
1187
        return 0;
 
1188
 
 
1189
 err:
 
1190
        return -EINVAL;
 
1191
}
 
1192
 
 
1193
void kvmppc_core_commit_memory_region(struct kvm *kvm,
 
1194
                                struct kvm_userspace_memory_region *mem)
 
1195
{
 
1196
        if (mem->guest_phys_addr == 0 && mem->memory_size != 0 &&
 
1197
            !kvm->arch.rma)
 
1198
                kvmppc_map_vrma(kvm, mem);
 
1199
}
 
1200
 
 
1201
int kvmppc_core_init_vm(struct kvm *kvm)
 
1202
{
 
1203
        long r;
 
1204
        unsigned long npages = 1ul << (MAX_MEM_ORDER - LARGE_PAGE_ORDER);
 
1205
        long err = -ENOMEM;
 
1206
        unsigned long lpcr;
 
1207
 
 
1208
        /* Allocate hashed page table */
 
1209
        r = kvmppc_alloc_hpt(kvm);
 
1210
        if (r)
 
1211
                return r;
 
1212
 
 
1213
        INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
 
1214
 
 
1215
        kvm->arch.ram_pginfo = kzalloc(npages * sizeof(struct kvmppc_pginfo),
 
1216
                                       GFP_KERNEL);
 
1217
        if (!kvm->arch.ram_pginfo) {
 
1218
                pr_err("kvmppc_core_init_vm: couldn't alloc %lu bytes\n",
 
1219
                       npages * sizeof(struct kvmppc_pginfo));
 
1220
                goto out_free;
 
1221
        }
 
1222
 
 
1223
        kvm->arch.ram_npages = 0;
 
1224
        kvm->arch.ram_psize = 1ul << LARGE_PAGE_ORDER;
 
1225
        kvm->arch.ram_porder = LARGE_PAGE_ORDER;
 
1226
        kvm->arch.rma = NULL;
 
1227
        kvm->arch.n_rma_pages = 0;
 
1228
 
 
1229
        kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
 
1230
 
 
1231
        if (cpu_has_feature(CPU_FTR_ARCH_201)) {
 
1232
                /* PPC970; HID4 is effectively the LPCR */
 
1233
                unsigned long lpid = kvm->arch.lpid;
 
1234
                kvm->arch.host_lpid = 0;
 
1235
                kvm->arch.host_lpcr = lpcr = mfspr(SPRN_HID4);
 
1236
                lpcr &= ~((3 << HID4_LPID1_SH) | (0xful << HID4_LPID5_SH));
 
1237
                lpcr |= ((lpid >> 4) << HID4_LPID1_SH) |
 
1238
                        ((lpid & 0xf) << HID4_LPID5_SH);
 
1239
        } else {
 
1240
                /* POWER7; init LPCR for virtual RMA mode */
 
1241
                kvm->arch.host_lpid = mfspr(SPRN_LPID);
 
1242
                kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
 
1243
                lpcr &= LPCR_PECE | LPCR_LPES;
 
1244
                lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
 
1245
                        LPCR_VPM0 | LPCR_VRMA_L;
 
1246
        }
 
1247
        kvm->arch.lpcr = lpcr;
 
1248
 
 
1249
        return 0;
 
1250
 
 
1251
 out_free:
 
1252
        kvmppc_free_hpt(kvm);
 
1253
        return err;
 
1254
}
 
1255
 
 
1256
void kvmppc_core_destroy_vm(struct kvm *kvm)
 
1257
{
 
1258
        struct kvmppc_pginfo *pginfo;
 
1259
        unsigned long i;
 
1260
 
 
1261
        if (kvm->arch.ram_pginfo) {
 
1262
                pginfo = kvm->arch.ram_pginfo;
 
1263
                kvm->arch.ram_pginfo = NULL;
 
1264
                for (i = kvm->arch.n_rma_pages; i < kvm->arch.ram_npages; ++i)
 
1265
                        if (pginfo[i].pfn)
 
1266
                                put_page(pfn_to_page(pginfo[i].pfn));
 
1267
                kfree(pginfo);
 
1268
        }
 
1269
        if (kvm->arch.rma) {
 
1270
                kvm_release_rma(kvm->arch.rma);
 
1271
                kvm->arch.rma = NULL;
 
1272
        }
 
1273
 
 
1274
        kvmppc_free_hpt(kvm);
 
1275
        WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
 
1276
}
 
1277
 
 
1278
/* These are stubs for now */
 
1279
void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end)
 
1280
{
 
1281
}
 
1282
 
 
1283
/* We don't need to emulate any privileged instructions or dcbz */
 
1284
int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
1285
                           unsigned int inst, int *advance)
 
1286
{
 
1287
        return EMULATE_FAIL;
 
1288
}
 
1289
 
 
1290
int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
 
1291
{
 
1292
        return EMULATE_FAIL;
 
1293
}
 
1294
 
 
1295
int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
 
1296
{
 
1297
        return EMULATE_FAIL;
 
1298
}
 
1299
 
 
1300
static int kvmppc_book3s_hv_init(void)
 
1301
{
 
1302
        int r;
 
1303
 
 
1304
        r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
 
1305
 
 
1306
        if (r)
 
1307
                return r;
 
1308
 
 
1309
        r = kvmppc_mmu_hv_init();
 
1310
 
 
1311
        return r;
 
1312
}
 
1313
 
 
1314
static void kvmppc_book3s_hv_exit(void)
 
1315
{
 
1316
        kvm_exit();
 
1317
}
 
1318
 
 
1319
module_init(kvmppc_book3s_hv_init);
 
1320
module_exit(kvmppc_book3s_hv_exit);