2
* PowerPC implementation of KVM hooks
4
* Copyright IBM Corp. 2007
5
* Copyright (C) 2011 Freescale Semiconductor, Inc.
8
* Jerone Young <jyoung5@us.ibm.com>
9
* Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10
* Hollis Blanchard <hollisb@us.ibm.com>
12
* This work is licensed under the terms of the GNU GPL, version 2 or later.
13
* See the COPYING file in the top-level directory.
18
#include <sys/types.h>
19
#include <sys/ioctl.h>
23
#include <linux/kvm.h>
25
#include "qemu-common.h"
26
#include "qemu/timer.h"
27
#include "sysemu/sysemu.h"
28
#include "sysemu/kvm.h"
31
#include "sysemu/cpus.h"
32
#include "sysemu/device_tree.h"
33
#include "mmu-hash64.h"
35
#include "hw/sysbus.h"
36
#include "hw/ppc/spapr.h"
37
#include "hw/ppc/spapr_vio.h"
38
#include "sysemu/watchdog.h"
43
#define DPRINTF(fmt, ...) \
44
do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
46
#define DPRINTF(fmt, ...) \
50
#define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
52
const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
56
static int cap_interrupt_unset = false;
57
static int cap_interrupt_level = false;
58
static int cap_segstate;
59
static int cap_booke_sregs;
60
static int cap_ppc_smt;
61
static int cap_ppc_rma;
62
static int cap_spapr_tce;
64
static int cap_one_reg;
66
static int cap_ppc_watchdog;
68
static int cap_htab_fd;
70
/* XXX We have a race condition where we actually have a level triggered
71
* interrupt, but the infrastructure can't expose that yet, so the guest
72
* takes but ignores it, goes to sleep and never gets notified that there's
73
* still an interrupt pending.
75
* As a quick workaround, let's just wake up again 20 ms after we injected
76
* an interrupt. That way we can assure that we're always reinjecting
77
* interrupts in case the guest swallowed them.
79
static QEMUTimer *idle_timer;
81
static void kvm_kick_cpu(void *opaque)
83
PowerPCCPU *cpu = opaque;
85
qemu_cpu_kick(CPU(cpu));
88
static int kvm_ppc_register_host_cpu_type(void);
90
int kvm_arch_init(KVMState *s)
92
cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
93
cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
94
cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
95
cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
96
cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
97
cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
98
cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
99
cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
100
cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
101
cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
102
cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
103
/* Note: we don't set cap_papr here, because this capability is
104
* only activated after this by kvmppc_set_papr() */
105
cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
107
if (!cap_interrupt_level) {
108
fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
109
"VM to stall at times!\n");
112
kvm_ppc_register_host_cpu_type();
117
static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
119
CPUPPCState *cenv = &cpu->env;
120
CPUState *cs = CPU(cpu);
121
struct kvm_sregs sregs;
124
if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
125
/* What we're really trying to say is "if we're on BookE, we use
126
the native PVR for now". This is the only sane way to check
127
it though, so we potentially confuse users that they can run
128
BookE guests on BookS. Let's hope nobody dares enough :) */
132
fprintf(stderr, "kvm error: missing PVR setting capability\n");
137
ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
142
sregs.pvr = cenv->spr[SPR_PVR];
143
return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
146
/* Set up a shared TLB array with KVM */
147
static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
149
CPUPPCState *env = &cpu->env;
150
CPUState *cs = CPU(cpu);
151
struct kvm_book3e_206_tlb_params params = {};
152
struct kvm_config_tlb cfg = {};
153
struct kvm_enable_cap encap = {};
154
unsigned int entries = 0;
157
if (!kvm_enabled() ||
158
!kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
162
assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
164
for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
165
params.tlb_sizes[i] = booke206_tlb_size(env, i);
166
params.tlb_ways[i] = booke206_tlb_ways(env, i);
167
entries += params.tlb_sizes[i];
170
assert(entries == env->nb_tlb);
171
assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
173
env->tlb_dirty = true;
175
cfg.array = (uintptr_t)env->tlb.tlbm;
176
cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
177
cfg.params = (uintptr_t)¶ms;
178
cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
180
encap.cap = KVM_CAP_SW_TLB;
181
encap.args[0] = (uintptr_t)&cfg;
183
ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
185
fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
186
__func__, strerror(-ret));
190
env->kvm_sw_tlb = true;
195
#if defined(TARGET_PPC64)
196
static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
197
struct kvm_ppc_smmu_info *info)
199
CPUPPCState *env = &cpu->env;
200
CPUState *cs = CPU(cpu);
202
memset(info, 0, sizeof(*info));
204
/* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
205
* need to "guess" what the supported page sizes are.
207
* For that to work we make a few assumptions:
209
* - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
210
* KVM which only supports 4K and 16M pages, but supports them
211
* regardless of the backing store characteritics. We also don't
212
* support 1T segments.
214
* This is safe as if HV KVM ever supports that capability or PR
215
* KVM grows supports for more page/segment sizes, those versions
216
* will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
217
* will not hit this fallback
219
* - Else we are running HV KVM. This means we only support page
220
* sizes that fit in the backing store. Additionally we only
221
* advertize 64K pages if the processor is ARCH 2.06 and we assume
222
* P7 encodings for the SLB and hash table. Here too, we assume
223
* support for any newer processor will mean a kernel that
224
* implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
227
if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
232
/* Standard 4k base page size segment */
233
info->sps[0].page_shift = 12;
234
info->sps[0].slb_enc = 0;
235
info->sps[0].enc[0].page_shift = 12;
236
info->sps[0].enc[0].pte_enc = 0;
238
/* Standard 16M large page size segment */
239
info->sps[1].page_shift = 24;
240
info->sps[1].slb_enc = SLB_VSID_L;
241
info->sps[1].enc[0].page_shift = 24;
242
info->sps[1].enc[0].pte_enc = 0;
246
/* HV KVM has backing store size restrictions */
247
info->flags = KVM_PPC_PAGE_SIZES_REAL;
249
if (env->mmu_model & POWERPC_MMU_1TSEG) {
250
info->flags |= KVM_PPC_1T_SEGMENTS;
253
if (env->mmu_model == POWERPC_MMU_2_06) {
259
/* Standard 4k base page size segment */
260
info->sps[i].page_shift = 12;
261
info->sps[i].slb_enc = 0;
262
info->sps[i].enc[0].page_shift = 12;
263
info->sps[i].enc[0].pte_enc = 0;
266
/* 64K on MMU 2.06 */
267
if (env->mmu_model == POWERPC_MMU_2_06) {
268
info->sps[i].page_shift = 16;
269
info->sps[i].slb_enc = 0x110;
270
info->sps[i].enc[0].page_shift = 16;
271
info->sps[i].enc[0].pte_enc = 1;
275
/* Standard 16M large page size segment */
276
info->sps[i].page_shift = 24;
277
info->sps[i].slb_enc = SLB_VSID_L;
278
info->sps[i].enc[0].page_shift = 24;
279
info->sps[i].enc[0].pte_enc = 0;
283
static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
285
CPUState *cs = CPU(cpu);
288
if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
289
ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
295
kvm_get_fallback_smmu_info(cpu, info);
298
static long getrampagesize(void)
304
/* guest RAM is backed by normal anonymous pages */
305
return getpagesize();
309
ret = statfs(mem_path, &fs);
310
} while (ret != 0 && errno == EINTR);
313
fprintf(stderr, "Couldn't statfs() memory path: %s\n",
318
#define HUGETLBFS_MAGIC 0x958458f6
320
if (fs.f_type != HUGETLBFS_MAGIC) {
321
/* Explicit mempath, but it's ordinary pages */
322
return getpagesize();
325
/* It's hugepage, return the huge page size */
329
static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
331
if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
335
return (1ul << shift) <= rampgsize;
338
static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
340
static struct kvm_ppc_smmu_info smmu_info;
341
static bool has_smmu_info;
342
CPUPPCState *env = &cpu->env;
346
/* We only handle page sizes for 64-bit server guests for now */
347
if (!(env->mmu_model & POWERPC_MMU_64)) {
351
/* Collect MMU info from kernel if not already */
352
if (!has_smmu_info) {
353
kvm_get_smmu_info(cpu, &smmu_info);
354
has_smmu_info = true;
357
rampagesize = getrampagesize();
359
/* Convert to QEMU form */
360
memset(&env->sps, 0, sizeof(env->sps));
362
for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
363
struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
364
struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
366
if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
370
qsps->page_shift = ksps->page_shift;
371
qsps->slb_enc = ksps->slb_enc;
372
for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
373
if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
374
ksps->enc[jk].page_shift)) {
377
qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
378
qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
379
if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
383
if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
387
env->slb_nr = smmu_info.slb_size;
388
if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
389
env->mmu_model |= POWERPC_MMU_1TSEG;
391
env->mmu_model &= ~POWERPC_MMU_1TSEG;
394
#else /* defined (TARGET_PPC64) */
396
static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
400
#endif /* !defined (TARGET_PPC64) */
402
unsigned long kvm_arch_vcpu_id(CPUState *cpu)
404
return cpu->cpu_index;
407
int kvm_arch_init_vcpu(CPUState *cs)
409
PowerPCCPU *cpu = POWERPC_CPU(cs);
410
CPUPPCState *cenv = &cpu->env;
413
/* Gather server mmu info from KVM and update the CPU state */
414
kvm_fixup_page_sizes(cpu);
416
/* Synchronize sregs with kvm */
417
ret = kvm_arch_sync_sregs(cpu);
422
idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
424
/* Some targets support access to KVM's guest TLB. */
425
switch (cenv->mmu_model) {
426
case POWERPC_MMU_BOOKE206:
427
ret = kvm_booke206_tlb_init(cpu);
436
void kvm_arch_reset_vcpu(CPUState *cpu)
440
static void kvm_sw_tlb_put(PowerPCCPU *cpu)
442
CPUPPCState *env = &cpu->env;
443
CPUState *cs = CPU(cpu);
444
struct kvm_dirty_tlb dirty_tlb;
445
unsigned char *bitmap;
448
if (!env->kvm_sw_tlb) {
452
bitmap = g_malloc((env->nb_tlb + 7) / 8);
453
memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
455
dirty_tlb.bitmap = (uintptr_t)bitmap;
456
dirty_tlb.num_dirty = env->nb_tlb;
458
ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
460
fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
461
__func__, strerror(-ret));
467
static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
469
PowerPCCPU *cpu = POWERPC_CPU(cs);
470
CPUPPCState *env = &cpu->env;
475
struct kvm_one_reg reg = {
477
.addr = (uintptr_t) &val,
481
ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
483
fprintf(stderr, "Warning: Unable to retrieve SPR %d from KVM: %s\n",
484
spr, strerror(errno));
486
switch (id & KVM_REG_SIZE_MASK) {
487
case KVM_REG_SIZE_U32:
488
env->spr[spr] = val.u32;
491
case KVM_REG_SIZE_U64:
492
env->spr[spr] = val.u64;
496
/* Don't handle this size yet */
502
static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
504
PowerPCCPU *cpu = POWERPC_CPU(cs);
505
CPUPPCState *env = &cpu->env;
510
struct kvm_one_reg reg = {
512
.addr = (uintptr_t) &val,
516
switch (id & KVM_REG_SIZE_MASK) {
517
case KVM_REG_SIZE_U32:
518
val.u32 = env->spr[spr];
521
case KVM_REG_SIZE_U64:
522
val.u64 = env->spr[spr];
526
/* Don't handle this size yet */
530
ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
532
fprintf(stderr, "Warning: Unable to set SPR %d to KVM: %s\n",
533
spr, strerror(errno));
537
static int kvm_put_fp(CPUState *cs)
539
PowerPCCPU *cpu = POWERPC_CPU(cs);
540
CPUPPCState *env = &cpu->env;
541
struct kvm_one_reg reg;
545
if (env->insns_flags & PPC_FLOAT) {
546
uint64_t fpscr = env->fpscr;
547
bool vsx = !!(env->insns_flags2 & PPC2_VSX);
549
reg.id = KVM_REG_PPC_FPSCR;
550
reg.addr = (uintptr_t)&fpscr;
551
ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
553
DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
557
for (i = 0; i < 32; i++) {
560
vsr[0] = float64_val(env->fpr[i]);
561
vsr[1] = env->vsr[i];
562
reg.addr = (uintptr_t) &vsr;
563
reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
565
ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
567
DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
574
if (env->insns_flags & PPC_ALTIVEC) {
575
reg.id = KVM_REG_PPC_VSCR;
576
reg.addr = (uintptr_t)&env->vscr;
577
ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
579
DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
583
for (i = 0; i < 32; i++) {
584
reg.id = KVM_REG_PPC_VR(i);
585
reg.addr = (uintptr_t)&env->avr[i];
586
ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
588
DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
597
static int kvm_get_fp(CPUState *cs)
599
PowerPCCPU *cpu = POWERPC_CPU(cs);
600
CPUPPCState *env = &cpu->env;
601
struct kvm_one_reg reg;
605
if (env->insns_flags & PPC_FLOAT) {
607
bool vsx = !!(env->insns_flags2 & PPC2_VSX);
609
reg.id = KVM_REG_PPC_FPSCR;
610
reg.addr = (uintptr_t)&fpscr;
611
ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
613
DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
619
for (i = 0; i < 32; i++) {
622
reg.addr = (uintptr_t) &vsr;
623
reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
625
ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
627
DPRINTF("Unable to get %s%d from KVM: %s\n",
628
vsx ? "VSR" : "FPR", i, strerror(errno));
631
env->fpr[i] = vsr[0];
633
env->vsr[i] = vsr[1];
639
if (env->insns_flags & PPC_ALTIVEC) {
640
reg.id = KVM_REG_PPC_VSCR;
641
reg.addr = (uintptr_t)&env->vscr;
642
ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
644
DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
648
for (i = 0; i < 32; i++) {
649
reg.id = KVM_REG_PPC_VR(i);
650
reg.addr = (uintptr_t)&env->avr[i];
651
ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
653
DPRINTF("Unable to get VR%d from KVM: %s\n",
663
#if defined(TARGET_PPC64)
664
static int kvm_get_vpa(CPUState *cs)
666
PowerPCCPU *cpu = POWERPC_CPU(cs);
667
CPUPPCState *env = &cpu->env;
668
struct kvm_one_reg reg;
671
reg.id = KVM_REG_PPC_VPA_ADDR;
672
reg.addr = (uintptr_t)&env->vpa_addr;
673
ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
675
DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
679
assert((uintptr_t)&env->slb_shadow_size
680
== ((uintptr_t)&env->slb_shadow_addr + 8));
681
reg.id = KVM_REG_PPC_VPA_SLB;
682
reg.addr = (uintptr_t)&env->slb_shadow_addr;
683
ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
685
DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
690
assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
691
reg.id = KVM_REG_PPC_VPA_DTL;
692
reg.addr = (uintptr_t)&env->dtl_addr;
693
ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
695
DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
703
static int kvm_put_vpa(CPUState *cs)
705
PowerPCCPU *cpu = POWERPC_CPU(cs);
706
CPUPPCState *env = &cpu->env;
707
struct kvm_one_reg reg;
710
/* SLB shadow or DTL can't be registered unless a master VPA is
711
* registered. That means when restoring state, if a VPA *is*
712
* registered, we need to set that up first. If not, we need to
713
* deregister the others before deregistering the master VPA */
714
assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
717
reg.id = KVM_REG_PPC_VPA_ADDR;
718
reg.addr = (uintptr_t)&env->vpa_addr;
719
ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
721
DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
726
assert((uintptr_t)&env->slb_shadow_size
727
== ((uintptr_t)&env->slb_shadow_addr + 8));
728
reg.id = KVM_REG_PPC_VPA_SLB;
729
reg.addr = (uintptr_t)&env->slb_shadow_addr;
730
ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
732
DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
736
assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
737
reg.id = KVM_REG_PPC_VPA_DTL;
738
reg.addr = (uintptr_t)&env->dtl_addr;
739
ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
741
DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
746
if (!env->vpa_addr) {
747
reg.id = KVM_REG_PPC_VPA_ADDR;
748
reg.addr = (uintptr_t)&env->vpa_addr;
749
ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
751
DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
758
#endif /* TARGET_PPC64 */
760
int kvm_arch_put_registers(CPUState *cs, int level)
762
PowerPCCPU *cpu = POWERPC_CPU(cs);
763
CPUPPCState *env = &cpu->env;
764
struct kvm_regs regs;
768
ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s);
775
regs.xer = cpu_read_xer(env);
779
regs.srr0 = env->spr[SPR_SRR0];
780
regs.srr1 = env->spr[SPR_SRR1];
782
regs.sprg0 = env->spr[SPR_SPRG0];
783
regs.sprg1 = env->spr[SPR_SPRG1];
784
regs.sprg2 = env->spr[SPR_SPRG2];
785
regs.sprg3 = env->spr[SPR_SPRG3];
786
regs.sprg4 = env->spr[SPR_SPRG4];
787
regs.sprg5 = env->spr[SPR_SPRG5];
788
regs.sprg6 = env->spr[SPR_SPRG6];
789
regs.sprg7 = env->spr[SPR_SPRG7];
791
regs.pid = env->spr[SPR_BOOKE_PID];
793
for (i = 0;i < 32; i++)
794
regs.gpr[i] = env->gpr[i];
797
for (i = 0; i < 8; i++) {
798
regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
801
ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, ®s);
807
if (env->tlb_dirty) {
809
env->tlb_dirty = false;
812
if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
813
struct kvm_sregs sregs;
815
sregs.pvr = env->spr[SPR_PVR];
817
sregs.u.s.sdr1 = env->spr[SPR_SDR1];
821
for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
822
sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
823
sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
828
for (i = 0; i < 16; i++) {
829
sregs.u.s.ppc32.sr[i] = env->sr[i];
833
for (i = 0; i < 8; i++) {
834
/* Beware. We have to swap upper and lower bits here */
835
sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
837
sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
841
ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
847
if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
848
kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
854
/* We deliberately ignore errors here, for kernels which have
855
* the ONE_REG calls, but don't support the specific
856
* registers, there's a reasonable chance things will still
857
* work, at least until we try to migrate. */
858
for (i = 0; i < 1024; i++) {
859
uint64_t id = env->spr_cb[i].one_reg_id;
862
kvm_put_one_spr(cs, id, i);
868
if (kvm_put_vpa(cs) < 0) {
869
DPRINTF("Warning: Unable to set VPA information to KVM\n");
872
#endif /* TARGET_PPC64 */
878
int kvm_arch_get_registers(CPUState *cs)
880
PowerPCCPU *cpu = POWERPC_CPU(cs);
881
CPUPPCState *env = &cpu->env;
882
struct kvm_regs regs;
883
struct kvm_sregs sregs;
887
ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s);
892
for (i = 7; i >= 0; i--) {
893
env->crf[i] = cr & 15;
899
cpu_write_xer(env, regs.xer);
903
env->spr[SPR_SRR0] = regs.srr0;
904
env->spr[SPR_SRR1] = regs.srr1;
906
env->spr[SPR_SPRG0] = regs.sprg0;
907
env->spr[SPR_SPRG1] = regs.sprg1;
908
env->spr[SPR_SPRG2] = regs.sprg2;
909
env->spr[SPR_SPRG3] = regs.sprg3;
910
env->spr[SPR_SPRG4] = regs.sprg4;
911
env->spr[SPR_SPRG5] = regs.sprg5;
912
env->spr[SPR_SPRG6] = regs.sprg6;
913
env->spr[SPR_SPRG7] = regs.sprg7;
915
env->spr[SPR_BOOKE_PID] = regs.pid;
917
for (i = 0;i < 32; i++)
918
env->gpr[i] = regs.gpr[i];
922
if (cap_booke_sregs) {
923
ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
928
if (sregs.u.e.features & KVM_SREGS_E_BASE) {
929
env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
930
env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
931
env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
932
env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
933
env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
934
env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
935
env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
936
env->spr[SPR_DECR] = sregs.u.e.dec;
937
env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
938
env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
939
env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
942
if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
943
env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
944
env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
945
env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
946
env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
947
env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
950
if (sregs.u.e.features & KVM_SREGS_E_64) {
951
env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
954
if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
955
env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
958
if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
959
env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
960
env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
961
env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
962
env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
963
env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
964
env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
965
env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
966
env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
967
env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
968
env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
969
env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
970
env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
971
env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
972
env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
973
env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
974
env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
976
if (sregs.u.e.features & KVM_SREGS_E_SPE) {
977
env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
978
env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
979
env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
982
if (sregs.u.e.features & KVM_SREGS_E_PM) {
983
env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
986
if (sregs.u.e.features & KVM_SREGS_E_PC) {
987
env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
988
env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
992
if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
993
env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
994
env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
995
env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
996
env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
997
env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
998
env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
999
env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1000
env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1001
env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1002
env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1005
if (sregs.u.e.features & KVM_SREGS_EXP) {
1006
env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1009
if (sregs.u.e.features & KVM_SREGS_E_PD) {
1010
env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1011
env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1014
if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1015
env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1016
env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1017
env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1019
if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1020
env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1021
env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1027
ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1032
ppc_store_sdr1(env, sregs.u.s.sdr1);
1037
* The packed SLB array we get from KVM_GET_SREGS only contains
1038
* information about valid entries. So we flush our internal
1039
* copy to get rid of stale ones, then put all valid SLB entries
1042
memset(env->slb, 0, sizeof(env->slb));
1043
for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1044
target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1045
target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1047
* Only restore valid entries
1049
if (rb & SLB_ESID_V) {
1050
ppc_store_slb(env, rb, rs);
1056
for (i = 0; i < 16; i++) {
1057
env->sr[i] = sregs.u.s.ppc32.sr[i];
1061
for (i = 0; i < 8; i++) {
1062
env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1063
env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1064
env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1065
env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1070
kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1076
/* We deliberately ignore errors here, for kernels which have
1077
* the ONE_REG calls, but don't support the specific
1078
* registers, there's a reasonable chance things will still
1079
* work, at least until we try to migrate. */
1080
for (i = 0; i < 1024; i++) {
1081
uint64_t id = env->spr_cb[i].one_reg_id;
1084
kvm_get_one_spr(cs, id, i);
1090
if (kvm_get_vpa(cs) < 0) {
1091
DPRINTF("Warning: Unable to get VPA information from KVM\n");
1100
int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1102
unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1104
if (irq != PPC_INTERRUPT_EXT) {
1108
if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1112
kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1117
#if defined(TARGET_PPCEMB)
1118
#define PPC_INPUT_INT PPC40x_INPUT_INT
1119
#elif defined(TARGET_PPC64)
1120
#define PPC_INPUT_INT PPC970_INPUT_INT
1122
#define PPC_INPUT_INT PPC6xx_INPUT_INT
1125
void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1127
PowerPCCPU *cpu = POWERPC_CPU(cs);
1128
CPUPPCState *env = &cpu->env;
1132
/* PowerPC QEMU tracks the various core input pins (interrupt, critical
1133
* interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1134
if (!cap_interrupt_level &&
1135
run->ready_for_interrupt_injection &&
1136
(cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1137
(env->irq_input_state & (1<<PPC_INPUT_INT)))
1139
/* For now KVM disregards the 'irq' argument. However, in the
1140
* future KVM could cache it in-kernel to avoid a heavyweight exit
1141
* when reading the UIC.
1143
irq = KVM_INTERRUPT_SET;
1145
DPRINTF("injected interrupt %d\n", irq);
1146
r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1148
printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1151
/* Always wake up soon in case the interrupt was level based */
1152
timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1153
(get_ticks_per_sec() / 50));
1156
/* We don't know if there are more interrupts pending after this. However,
1157
* the guest will return to userspace in the course of handling this one
1158
* anyways, so we will get a chance to deliver the rest. */
1161
void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
1165
int kvm_arch_process_async_events(CPUState *cs)
1170
static int kvmppc_handle_halt(PowerPCCPU *cpu)
1172
CPUState *cs = CPU(cpu);
1173
CPUPPCState *env = &cpu->env;
1175
if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1177
env->exception_index = EXCP_HLT;
1183
/* map dcr access to existing qemu dcr emulation */
1184
static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1186
if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1187
fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1192
static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1194
if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1195
fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1200
int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1202
PowerPCCPU *cpu = POWERPC_CPU(cs);
1203
CPUPPCState *env = &cpu->env;
1206
switch (run->exit_reason) {
1208
if (run->dcr.is_write) {
1209
DPRINTF("handle dcr write\n");
1210
ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1212
DPRINTF("handle dcr read\n");
1213
ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1217
DPRINTF("handle halt\n");
1218
ret = kvmppc_handle_halt(cpu);
1220
#if defined(TARGET_PPC64)
1221
case KVM_EXIT_PAPR_HCALL:
1222
DPRINTF("handle PAPR hypercall\n");
1223
run->papr_hcall.ret = spapr_hypercall(cpu,
1225
run->papr_hcall.args);
1230
DPRINTF("handle epr\n");
1231
run->epr.epr = ldl_phys(env->mpic_iack);
1234
case KVM_EXIT_WATCHDOG:
1235
DPRINTF("handle watchdog expiry\n");
1236
watchdog_perform_action();
1241
fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1249
int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1251
CPUState *cs = CPU(cpu);
1252
uint32_t bits = tsr_bits;
1253
struct kvm_one_reg reg = {
1254
.id = KVM_REG_PPC_OR_TSR,
1255
.addr = (uintptr_t) &bits,
1258
return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
1261
int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1264
CPUState *cs = CPU(cpu);
1265
uint32_t bits = tsr_bits;
1266
struct kvm_one_reg reg = {
1267
.id = KVM_REG_PPC_CLEAR_TSR,
1268
.addr = (uintptr_t) &bits,
1271
return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
1274
int kvmppc_set_tcr(PowerPCCPU *cpu)
1276
CPUState *cs = CPU(cpu);
1277
CPUPPCState *env = &cpu->env;
1278
uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1280
struct kvm_one_reg reg = {
1281
.id = KVM_REG_PPC_TCR,
1282
.addr = (uintptr_t) &tcr,
1285
return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
1288
int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1290
CPUState *cs = CPU(cpu);
1291
struct kvm_enable_cap encap = {};
1294
if (!kvm_enabled()) {
1298
if (!cap_ppc_watchdog) {
1299
printf("warning: KVM does not support watchdog");
1303
encap.cap = KVM_CAP_PPC_BOOKE_WATCHDOG;
1304
ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
1306
fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1307
__func__, strerror(-ret));
1314
static int read_cpuinfo(const char *field, char *value, int len)
1318
int field_len = strlen(field);
1321
f = fopen("/proc/cpuinfo", "r");
1327
if(!fgets(line, sizeof(line), f)) {
1330
if (!strncmp(line, field, field_len)) {
1331
pstrcpy(value, len, line);
1342
uint32_t kvmppc_get_tbfreq(void)
1346
uint32_t retval = get_ticks_per_sec();
1348
if (read_cpuinfo("timebase", line, sizeof(line))) {
1352
if (!(ns = strchr(line, ':'))) {
1362
/* Try to find a device tree node for a CPU with clock-frequency property */
1363
static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1365
struct dirent *dirp;
1368
if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1369
printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1374
while ((dirp = readdir(dp)) != NULL) {
1376
snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1378
f = fopen(buf, "r");
1380
snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1387
if (buf[0] == '\0') {
1388
printf("Unknown host!\n");
1395
/* Read a CPU node property from the host device tree that's a single
1396
* integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1397
* (can't find or open the property, or doesn't understand the
1399
static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1409
if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1413
strncat(buf, "/", sizeof(buf) - strlen(buf));
1414
strncat(buf, propname, sizeof(buf) - strlen(buf));
1416
f = fopen(buf, "rb");
1421
len = fread(&u, 1, sizeof(u), f);
1425
/* property is a 32-bit quantity */
1426
return be32_to_cpu(u.v32);
1428
return be64_to_cpu(u.v64);
1434
uint64_t kvmppc_get_clockfreq(void)
1436
return kvmppc_read_int_cpu_dt("clock-frequency");
1439
uint32_t kvmppc_get_vmx(void)
1441
return kvmppc_read_int_cpu_dt("ibm,vmx");
1444
uint32_t kvmppc_get_dfp(void)
1446
return kvmppc_read_int_cpu_dt("ibm,dfp");
1449
static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1451
PowerPCCPU *cpu = ppc_env_get_cpu(env);
1452
CPUState *cs = CPU(cpu);
1454
if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1455
!kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1462
int kvmppc_get_hasidle(CPUPPCState *env)
1464
struct kvm_ppc_pvinfo pvinfo;
1466
if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1467
(pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1474
int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1476
uint32_t *hc = (uint32_t*)buf;
1477
struct kvm_ppc_pvinfo pvinfo;
1479
if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1480
memcpy(buf, pvinfo.hcall, buf_len);
1485
* Fallback to always fail hypercalls:
1501
void kvmppc_set_papr(PowerPCCPU *cpu)
1503
CPUPPCState *env = &cpu->env;
1504
CPUState *cs = CPU(cpu);
1505
struct kvm_enable_cap cap = {};
1508
cap.cap = KVM_CAP_PPC_PAPR;
1509
ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1512
cpu_abort(env, "This KVM version does not support PAPR\n");
1515
/* Update the capability flag so we sync the right information
1520
void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1522
CPUPPCState *env = &cpu->env;
1523
CPUState *cs = CPU(cpu);
1524
struct kvm_enable_cap cap = {};
1527
cap.cap = KVM_CAP_PPC_EPR;
1528
cap.args[0] = mpic_proxy;
1529
ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1531
if (ret && mpic_proxy) {
1532
cpu_abort(env, "This KVM version does not support EPR\n");
1536
int kvmppc_smt_threads(void)
1538
return cap_ppc_smt ? cap_ppc_smt : 1;
1542
off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1547
struct kvm_allocate_rma ret;
1548
MemoryRegion *rma_region;
1550
/* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1551
* if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1552
* not necessary on this hardware
1553
* if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1555
* FIXME: We should allow the user to force contiguous RMA
1556
* allocation in the cap_ppc_rma==1 case.
1558
if (cap_ppc_rma < 2) {
1562
fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1564
fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1569
size = MIN(ret.rma_size, 256ul << 20);
1571
rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1572
if (rma == MAP_FAILED) {
1573
fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1577
rma_region = g_new(MemoryRegion, 1);
1578
memory_region_init_ram_ptr(rma_region, NULL, name, size, rma);
1579
vmstate_register_ram_global(rma_region);
1580
memory_region_add_subregion(sysmem, 0, rma_region);
1585
uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1587
struct kvm_ppc_smmu_info info;
1588
long rampagesize, best_page_shift;
1591
if (cap_ppc_rma >= 2) {
1592
return current_size;
1595
/* Find the largest hardware supported page size that's less than
1596
* or equal to the (logical) backing page size of guest RAM */
1597
kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
1598
rampagesize = getrampagesize();
1599
best_page_shift = 0;
1601
for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
1602
struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
1604
if (!sps->page_shift) {
1608
if ((sps->page_shift > best_page_shift)
1609
&& ((1UL << sps->page_shift) <= rampagesize)) {
1610
best_page_shift = sps->page_shift;
1614
return MIN(current_size,
1615
1ULL << (best_page_shift + hash_shift - 7));
1619
void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1621
struct kvm_create_spapr_tce args = {
1623
.window_size = window_size,
1629
/* Must set fd to -1 so we don't try to munmap when called for
1630
* destroying the table, which the upper layers -will- do
1633
if (!cap_spapr_tce) {
1637
fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1639
fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1644
len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
1645
/* FIXME: round this up to page size */
1647
table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1648
if (table == MAP_FAILED) {
1649
fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1659
int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1667
len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(uint64_t);
1668
if ((munmap(table, len) < 0) ||
1670
fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1672
/* Leak the table */
1678
int kvmppc_reset_htab(int shift_hint)
1680
uint32_t shift = shift_hint;
1682
if (!kvm_enabled()) {
1683
/* Full emulation, tell caller to allocate htab itself */
1686
if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1688
ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1689
if (ret == -ENOTTY) {
1690
/* At least some versions of PR KVM advertise the
1691
* capability, but don't implement the ioctl(). Oops.
1692
* Return 0 so that we allocate the htab in qemu, as is
1693
* correct for PR. */
1695
} else if (ret < 0) {
1701
/* We have a kernel that predates the htab reset calls. For PR
1702
* KVM, we need to allocate the htab ourselves, for an HV KVM of
1703
* this era, it has allocated a 16MB fixed size hash table
1704
* already. Kernels of this era have the GET_PVINFO capability
1705
* only on PR, so we use this hack to determine the right
1707
if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1708
/* PR - tell caller to allocate htab */
1711
/* HV - assume 16MB kernel allocated htab */
1716
static inline uint32_t mfpvr(void)
1725
static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1734
static void kvmppc_host_cpu_initfn(Object *obj)
1736
assert(kvm_enabled());
1739
static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1741
PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
1742
uint32_t vmx = kvmppc_get_vmx();
1743
uint32_t dfp = kvmppc_get_dfp();
1744
uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
1745
uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
1747
/* Now fix up the class with information we can query from the host */
1750
/* Only override when we know what the host supports */
1751
alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
1752
alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
1755
/* Only override when we know what the host supports */
1756
alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
1759
if (dcache_size != -1) {
1760
pcc->l1_dcache_size = dcache_size;
1763
if (icache_size != -1) {
1764
pcc->l1_icache_size = icache_size;
1768
int kvmppc_fixup_cpu(PowerPCCPU *cpu)
1770
CPUState *cs = CPU(cpu);
1773
/* Adjust cpu index for SMT */
1774
smt = kvmppc_smt_threads();
1775
cs->cpu_index = (cs->cpu_index / smp_threads) * smt
1776
+ (cs->cpu_index % smp_threads);
1781
bool kvmppc_has_cap_epr(void)
1786
static int kvm_ppc_register_host_cpu_type(void)
1788
TypeInfo type_info = {
1789
.name = TYPE_HOST_POWERPC_CPU,
1790
.instance_init = kvmppc_host_cpu_initfn,
1791
.class_init = kvmppc_host_cpu_class_init,
1793
uint32_t host_pvr = mfpvr();
1794
PowerPCCPUClass *pvr_pcc;
1796
pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1797
if (pvr_pcc == NULL) {
1800
type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1801
type_register(&type_info);
1805
int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
1807
struct kvm_rtas_token_args args = {
1811
if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
1815
strncpy(args.name, function, sizeof(args.name));
1817
return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
1820
int kvmppc_get_htab_fd(bool write)
1822
struct kvm_get_htab_fd s = {
1823
.flags = write ? KVM_GET_HTAB_WRITE : 0,
1828
fprintf(stderr, "KVM version doesn't support saving the hash table\n");
1832
return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
1835
int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
1837
int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1838
uint8_t buf[bufsize];
1842
rc = read(fd, buf, bufsize);
1844
fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
1848
/* Kernel already retuns data in BE format for the file */
1849
qemu_put_buffer(f, buf, rc);
1853
|| ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
1855
return (rc == 0) ? 1 : 0;
1858
int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
1859
uint16_t n_valid, uint16_t n_invalid)
1861
struct kvm_get_htab_header *buf;
1862
size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
1865
buf = alloca(chunksize);
1866
/* This is KVM on ppc, so this is all big-endian */
1868
buf->n_valid = n_valid;
1869
buf->n_invalid = n_invalid;
1871
qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
1873
rc = write(fd, buf, chunksize);
1875
fprintf(stderr, "Error writing KVM hash table: %s\n",
1879
if (rc != chunksize) {
1880
/* We should never get a short write on a single chunk */
1881
fprintf(stderr, "Short write, restoring KVM hash table\n");
1887
bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
1892
int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
1897
int kvm_arch_on_sigbus(int code, void *addr)
1902
void kvm_arch_init_irq_routing(KVMState *s)