4
* Copyright (c) 2003 Fabrice Bellard
6
* This library is free software; you can redistribute it and/or
7
* modify it under the terms of the GNU Lesser General Public
8
* License as published by the Free Software Foundation; either
9
* version 2 of the License, or (at your option) any later version.
11
* This library is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
* Lesser General Public License for more details.
16
* You should have received a copy of the GNU Lesser General Public
17
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
19
#include "qemu/osdep.h"
20
#include "qapi/error.h"
24
#include "qemu/cutils.h"
26
#include "exec/exec-all.h"
28
#include "hw/qdev-core.h"
29
#if !defined(CONFIG_USER_ONLY)
30
#include "hw/boards.h"
31
#include "hw/xen/xen.h"
33
#include "sysemu/kvm.h"
34
#include "sysemu/sysemu.h"
35
#include "qemu/timer.h"
36
#include "qemu/config-file.h"
37
#include "qemu/error-report.h"
38
#if defined(CONFIG_USER_ONLY)
40
#else /* !CONFIG_USER_ONLY */
42
#include "exec/memory.h"
43
#include "exec/ioport.h"
44
#include "sysemu/dma.h"
45
#include "exec/address-spaces.h"
46
#include "sysemu/xen-mapcache.h"
49
#include "exec/cpu-all.h"
50
#include "qemu/rcu_queue.h"
51
#include "qemu/main-loop.h"
52
#include "translate-all.h"
53
#include "sysemu/replay.h"
55
#include "exec/memory-internal.h"
56
#include "exec/ram_addr.h"
59
#include "migration/vmstate.h"
61
#include "qemu/range.h"
63
#include "qemu/mmap-alloc.h"
66
//#define DEBUG_SUBPAGE
68
#if !defined(CONFIG_USER_ONLY)
69
/* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
70
* are protected by the ramlist lock.
72
RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
74
static MemoryRegion *system_memory;
75
static MemoryRegion *system_io;
77
AddressSpace address_space_io;
78
AddressSpace address_space_memory;
80
MemoryRegion io_mem_rom, io_mem_notdirty;
81
static MemoryRegion io_mem_unassigned;
83
/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
84
#define RAM_PREALLOC (1 << 0)
86
/* RAM is mmap-ed with MAP_SHARED */
87
#define RAM_SHARED (1 << 1)
89
/* Only a portion of RAM (used_length) is actually used, and migrated.
90
* This used_length size can change across reboots.
92
#define RAM_RESIZEABLE (1 << 2)
96
#ifdef TARGET_PAGE_BITS_VARY
98
bool target_page_bits_decided;
101
struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
102
/* current CPU in the current thread. It is only valid inside
104
__thread CPUState *current_cpu;
105
/* 0 = Do not count executed instructions.
106
1 = Precise instruction counting.
107
2 = Adaptive rate instruction counting. */
110
bool set_preferred_target_page_bits(int bits)
112
/* The target page size is the lowest common denominator for all
113
* the CPUs in the system, so we can only make it smaller, never
114
* larger. And we can't make it smaller once we've committed to
117
#ifdef TARGET_PAGE_BITS_VARY
118
assert(bits >= TARGET_PAGE_BITS_MIN);
119
if (target_page_bits == 0 || target_page_bits > bits) {
120
if (target_page_bits_decided) {
123
target_page_bits = bits;
129
#if !defined(CONFIG_USER_ONLY)
131
static void finalize_target_page_bits(void)
133
#ifdef TARGET_PAGE_BITS_VARY
134
if (target_page_bits == 0) {
135
target_page_bits = TARGET_PAGE_BITS_MIN;
137
target_page_bits_decided = true;
141
typedef struct PhysPageEntry PhysPageEntry;
143
struct PhysPageEntry {
144
/* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
146
/* index into phys_sections (!skip) or phys_map_nodes (skip) */
150
#define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
152
/* Size of the L2 (and L3, etc) page tables. */
153
#define ADDR_SPACE_BITS 64
156
#define P_L2_SIZE (1 << P_L2_BITS)
158
#define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
160
typedef PhysPageEntry Node[P_L2_SIZE];
162
typedef struct PhysPageMap {
165
unsigned sections_nb;
166
unsigned sections_nb_alloc;
168
unsigned nodes_nb_alloc;
170
MemoryRegionSection *sections;
173
struct AddressSpaceDispatch {
176
MemoryRegionSection *mru_section;
177
/* This is a multi-level map on the physical address space.
178
* The bottom level has pointers to MemoryRegionSections.
180
PhysPageEntry phys_map;
185
#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
186
typedef struct subpage_t {
190
uint16_t sub_section[];
193
#define PHYS_SECTION_UNASSIGNED 0
194
#define PHYS_SECTION_NOTDIRTY 1
195
#define PHYS_SECTION_ROM 2
196
#define PHYS_SECTION_WATCH 3
198
static void io_mem_init(void);
199
static void memory_map_init(void);
200
static void tcg_commit(MemoryListener *listener);
202
static MemoryRegion io_mem_watch;
205
* CPUAddressSpace: all the information a CPU needs about an AddressSpace
206
* @cpu: the CPU whose AddressSpace this is
207
* @as: the AddressSpace itself
208
* @memory_dispatch: its dispatch pointer (cached, RCU protected)
209
* @tcg_as_listener: listener for tracking changes to the AddressSpace
211
struct CPUAddressSpace {
214
struct AddressSpaceDispatch *memory_dispatch;
215
MemoryListener tcg_as_listener;
220
#if !defined(CONFIG_USER_ONLY)
222
static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
224
static unsigned alloc_hint = 16;
225
if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
226
map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, alloc_hint);
227
map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
228
map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
229
alloc_hint = map->nodes_nb_alloc;
233
static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
240
ret = map->nodes_nb++;
242
assert(ret != PHYS_MAP_NODE_NIL);
243
assert(ret != map->nodes_nb_alloc);
245
e.skip = leaf ? 0 : 1;
246
e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
247
for (i = 0; i < P_L2_SIZE; ++i) {
248
memcpy(&p[i], &e, sizeof(e));
253
static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
254
hwaddr *index, hwaddr *nb, uint16_t leaf,
258
hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
260
if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
261
lp->ptr = phys_map_node_alloc(map, level == 0);
263
p = map->nodes[lp->ptr];
264
lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
266
while (*nb && lp < &p[P_L2_SIZE]) {
267
if ((*index & (step - 1)) == 0 && *nb >= step) {
273
phys_page_set_level(map, lp, index, nb, leaf, level - 1);
279
static void phys_page_set(AddressSpaceDispatch *d,
280
hwaddr index, hwaddr nb,
283
/* Wildly overreserve - it doesn't matter much. */
284
phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
286
phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
289
/* Compact a non leaf page entry. Simply detect that the entry has a single child,
290
* and update our entry so we can skip it and go directly to the destination.
292
static void phys_page_compact(PhysPageEntry *lp, Node *nodes)
294
unsigned valid_ptr = P_L2_SIZE;
299
if (lp->ptr == PHYS_MAP_NODE_NIL) {
304
for (i = 0; i < P_L2_SIZE; i++) {
305
if (p[i].ptr == PHYS_MAP_NODE_NIL) {
312
phys_page_compact(&p[i], nodes);
316
/* We can only compress if there's only one child. */
321
assert(valid_ptr < P_L2_SIZE);
323
/* Don't compress if it won't fit in the # of bits we have. */
324
if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
328
lp->ptr = p[valid_ptr].ptr;
329
if (!p[valid_ptr].skip) {
330
/* If our only child is a leaf, make this a leaf. */
331
/* By design, we should have made this node a leaf to begin with so we
332
* should never reach here.
333
* But since it's so simple to handle this, let's do it just in case we
338
lp->skip += p[valid_ptr].skip;
342
static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
344
if (d->phys_map.skip) {
345
phys_page_compact(&d->phys_map, d->map.nodes);
349
static inline bool section_covers_addr(const MemoryRegionSection *section,
352
/* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
353
* the section must cover the entire address space.
355
return int128_gethi(section->size) ||
356
range_covers_byte(section->offset_within_address_space,
357
int128_getlo(section->size), addr);
360
static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
361
Node *nodes, MemoryRegionSection *sections)
364
hwaddr index = addr >> TARGET_PAGE_BITS;
367
for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
368
if (lp.ptr == PHYS_MAP_NODE_NIL) {
369
return §ions[PHYS_SECTION_UNASSIGNED];
372
lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
375
if (section_covers_addr(§ions[lp.ptr], addr)) {
376
return §ions[lp.ptr];
378
return §ions[PHYS_SECTION_UNASSIGNED];
382
bool memory_region_is_unassigned(MemoryRegion *mr)
384
return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
385
&& mr != &io_mem_watch;
388
/* Called from RCU critical section */
389
static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
391
bool resolve_subpage)
393
MemoryRegionSection *section = atomic_read(&d->mru_section);
397
if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
398
section_covers_addr(section, addr)) {
401
section = phys_page_find(d->phys_map, addr, d->map.nodes,
405
if (resolve_subpage && section->mr->subpage) {
406
subpage = container_of(section->mr, subpage_t, iomem);
407
section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
410
atomic_set(&d->mru_section, section);
415
/* Called from RCU critical section */
416
static MemoryRegionSection *
417
address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
418
hwaddr *plen, bool resolve_subpage)
420
MemoryRegionSection *section;
424
section = address_space_lookup_region(d, addr, resolve_subpage);
425
/* Compute offset within MemoryRegionSection */
426
addr -= section->offset_within_address_space;
428
/* Compute offset within MemoryRegion */
429
*xlat = addr + section->offset_within_region;
433
/* MMIO registers can be expected to perform full-width accesses based only
434
* on their address, without considering adjacent registers that could
435
* decode to completely different MemoryRegions. When such registers
436
* exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
437
* regions overlap wildly. For this reason we cannot clamp the accesses
440
* If the length is small (as is the case for address_space_ldl/stl),
441
* everything works fine. If the incoming length is large, however,
442
* the caller really has to do the clamping through memory_access_size.
444
if (memory_region_is_ram(mr)) {
445
diff = int128_sub(section->size, int128_make64(addr));
446
*plen = int128_get64(int128_min(diff, int128_make64(*plen)));
451
/* Called from RCU critical section */
452
MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
453
hwaddr *xlat, hwaddr *plen,
457
MemoryRegionSection *section;
461
AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
462
section = address_space_translate_internal(d, addr, &addr, plen, true);
465
if (!mr->iommu_ops) {
469
iotlb = mr->iommu_ops->translate(mr, addr, is_write);
470
addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
471
| (addr & iotlb.addr_mask));
472
*plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
473
if (!(iotlb.perm & (1 << is_write))) {
474
mr = &io_mem_unassigned;
478
as = iotlb.target_as;
481
if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
482
hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
483
*plen = MIN(page, *plen);
490
/* Called from RCU critical section */
491
MemoryRegionSection *
492
address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
493
hwaddr *xlat, hwaddr *plen)
495
MemoryRegionSection *section;
496
AddressSpaceDispatch *d = atomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch);
498
section = address_space_translate_internal(d, addr, xlat, plen, false);
500
assert(!section->mr->iommu_ops);
505
#if !defined(CONFIG_USER_ONLY)
507
static int cpu_common_post_load(void *opaque, int version_id)
509
CPUState *cpu = opaque;
511
/* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
512
version_id is increased. */
513
cpu->interrupt_request &= ~0x01;
519
static int cpu_common_pre_load(void *opaque)
521
CPUState *cpu = opaque;
523
cpu->exception_index = -1;
528
static bool cpu_common_exception_index_needed(void *opaque)
530
CPUState *cpu = opaque;
532
return tcg_enabled() && cpu->exception_index != -1;
535
static const VMStateDescription vmstate_cpu_common_exception_index = {
536
.name = "cpu_common/exception_index",
538
.minimum_version_id = 1,
539
.needed = cpu_common_exception_index_needed,
540
.fields = (VMStateField[]) {
541
VMSTATE_INT32(exception_index, CPUState),
542
VMSTATE_END_OF_LIST()
546
static bool cpu_common_crash_occurred_needed(void *opaque)
548
CPUState *cpu = opaque;
550
return cpu->crash_occurred;
553
static const VMStateDescription vmstate_cpu_common_crash_occurred = {
554
.name = "cpu_common/crash_occurred",
556
.minimum_version_id = 1,
557
.needed = cpu_common_crash_occurred_needed,
558
.fields = (VMStateField[]) {
559
VMSTATE_BOOL(crash_occurred, CPUState),
560
VMSTATE_END_OF_LIST()
564
const VMStateDescription vmstate_cpu_common = {
565
.name = "cpu_common",
567
.minimum_version_id = 1,
568
.pre_load = cpu_common_pre_load,
569
.post_load = cpu_common_post_load,
570
.fields = (VMStateField[]) {
571
VMSTATE_UINT32(halted, CPUState),
572
VMSTATE_UINT32(interrupt_request, CPUState),
573
VMSTATE_END_OF_LIST()
575
.subsections = (const VMStateDescription*[]) {
576
&vmstate_cpu_common_exception_index,
577
&vmstate_cpu_common_crash_occurred,
584
CPUState *qemu_get_cpu(int index)
589
if (cpu->cpu_index == index) {
597
#if !defined(CONFIG_USER_ONLY)
598
void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
600
CPUAddressSpace *newas;
602
/* Target code should have set num_ases before calling us */
603
assert(asidx < cpu->num_ases);
606
/* address space 0 gets the convenience alias */
610
/* KVM cannot currently support multiple address spaces. */
611
assert(asidx == 0 || !kvm_enabled());
613
if (!cpu->cpu_ases) {
614
cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
617
newas = &cpu->cpu_ases[asidx];
621
newas->tcg_as_listener.commit = tcg_commit;
622
memory_listener_register(&newas->tcg_as_listener, as);
626
AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
628
/* Return the AddressSpace corresponding to the specified index */
629
return cpu->cpu_ases[asidx].as;
633
void cpu_exec_unrealizefn(CPUState *cpu)
635
CPUClass *cc = CPU_GET_CLASS(cpu);
637
cpu_list_remove(cpu);
639
if (cc->vmsd != NULL) {
640
vmstate_unregister(NULL, cc->vmsd, cpu);
642
if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
643
vmstate_unregister(NULL, &vmstate_cpu_common, cpu);
647
void cpu_exec_initfn(CPUState *cpu)
652
#ifndef CONFIG_USER_ONLY
653
cpu->thread_id = qemu_get_thread_id();
655
/* This is a softmmu CPU object, so create a property for it
656
* so users can wire up its memory. (This can't go in qom/cpu.c
657
* because that file is compiled only once for both user-mode
658
* and system builds.) The default if no link is set up is to use
659
* the system address space.
661
object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
662
(Object **)&cpu->memory,
663
qdev_prop_allow_set_link_before_realize,
664
OBJ_PROP_LINK_UNREF_ON_RELEASE,
666
cpu->memory = system_memory;
667
object_ref(OBJECT(cpu->memory));
671
void cpu_exec_realizefn(CPUState *cpu, Error **errp)
673
CPUClass *cc ATTRIBUTE_UNUSED = CPU_GET_CLASS(cpu);
677
#ifndef CONFIG_USER_ONLY
678
if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
679
vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
681
if (cc->vmsd != NULL) {
682
vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
687
static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
689
/* Flush the whole TB as this will not have race conditions
690
* even if we don't have proper locking yet.
691
* Ideally we would just invalidate the TBs for the
697
#if defined(CONFIG_USER_ONLY)
698
void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
703
int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
709
void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
713
int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
714
int flags, CPUWatchpoint **watchpoint)
719
/* Add a watchpoint. */
720
int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
721
int flags, CPUWatchpoint **watchpoint)
725
/* forbid ranges which are empty or run off the end of the address space */
726
if (len == 0 || (addr + len - 1) < addr) {
727
error_report("tried to set invalid watchpoint at %"
728
VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
731
wp = g_malloc(sizeof(*wp));
737
/* keep all GDB-injected watchpoints in front */
738
if (flags & BP_GDB) {
739
QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
741
QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
744
tlb_flush_page(cpu, addr);
751
/* Remove a specific watchpoint. */
752
int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
757
QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
758
if (addr == wp->vaddr && len == wp->len
759
&& flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
760
cpu_watchpoint_remove_by_ref(cpu, wp);
767
/* Remove a specific watchpoint by reference. */
768
void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
770
QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
772
tlb_flush_page(cpu, watchpoint->vaddr);
777
/* Remove all matching watchpoints. */
778
void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
780
CPUWatchpoint *wp, *next;
782
QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
783
if (wp->flags & mask) {
784
cpu_watchpoint_remove_by_ref(cpu, wp);
789
/* Return true if this watchpoint address matches the specified
790
* access (ie the address range covered by the watchpoint overlaps
791
* partially or completely with the address range covered by the
794
static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
798
/* We know the lengths are non-zero, but a little caution is
799
* required to avoid errors in the case where the range ends
800
* exactly at the top of the address space and so addr + len
801
* wraps round to zero.
803
vaddr wpend = wp->vaddr + wp->len - 1;
804
vaddr addrend = addr + len - 1;
806
return !(addr > wpend || wp->vaddr > addrend);
811
/* Add a breakpoint. */
812
int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
813
CPUBreakpoint **breakpoint)
817
bp = g_malloc(sizeof(*bp));
822
/* keep all GDB-injected breakpoints in front */
823
if (flags & BP_GDB) {
824
QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
826
QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
829
breakpoint_invalidate(cpu, pc);
837
/* Remove a specific breakpoint. */
838
int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
842
QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
843
if (bp->pc == pc && bp->flags == flags) {
844
cpu_breakpoint_remove_by_ref(cpu, bp);
851
/* Remove a specific breakpoint by reference. */
852
void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
854
QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
856
breakpoint_invalidate(cpu, breakpoint->pc);
861
/* Remove all matching breakpoints. */
862
void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
864
CPUBreakpoint *bp, *next;
866
QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
867
if (bp->flags & mask) {
868
cpu_breakpoint_remove_by_ref(cpu, bp);
873
/* enable or disable single step mode. EXCP_DEBUG is returned by the
874
CPU loop after each instruction */
875
void cpu_single_step(CPUState *cpu, int enabled)
877
if (cpu->singlestep_enabled != enabled) {
878
cpu->singlestep_enabled = enabled;
880
kvm_update_guest_debug(cpu, 0);
882
/* must flush all the translated code to avoid inconsistencies */
883
/* XXX: only flush what is necessary */
889
void cpu_abort(CPUState *cpu, const char *fmt, ...)
896
fprintf(stderr, "qemu: fatal: ");
897
vfprintf(stderr, fmt, ap);
898
fprintf(stderr, "\n");
899
cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
900
if (qemu_log_separate()) {
902
qemu_log("qemu: fatal: ");
903
qemu_log_vprintf(fmt, ap2);
905
log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
913
#if defined(CONFIG_USER_ONLY)
915
struct sigaction act;
916
sigfillset(&act.sa_mask);
917
act.sa_handler = SIG_DFL;
918
sigaction(SIGABRT, &act, NULL);
924
#if !defined(CONFIG_USER_ONLY)
925
/* Called from RCU critical section */
926
static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
930
block = atomic_rcu_read(&ram_list.mru_block);
931
if (block && addr - block->offset < block->max_length) {
934
QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
935
if (addr - block->offset < block->max_length) {
940
fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
944
/* It is safe to write mru_block outside the iothread lock. This
949
* xxx removed from list
953
* call_rcu(reclaim_ramblock, xxx);
956
* atomic_rcu_set is not needed here. The block was already published
957
* when it was placed into the list. Here we're just making an extra
958
* copy of the pointer.
960
ram_list.mru_block = block;
964
static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
971
end = TARGET_PAGE_ALIGN(start + length);
972
start &= TARGET_PAGE_MASK;
975
block = qemu_get_ram_block(start);
976
assert(block == qemu_get_ram_block(end - 1));
977
start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
979
tlb_reset_dirty(cpu, start1, length);
984
/* Note: start and end must be within the same ram block. */
985
bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
989
DirtyMemoryBlocks *blocks;
990
unsigned long end, page;
997
end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
998
page = start >> TARGET_PAGE_BITS;
1002
blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
1004
while (page < end) {
1005
unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
1006
unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
1007
unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
1009
dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
1016
if (dirty && tcg_enabled()) {
1017
tlb_reset_dirty_range_all(start, length);
1023
/* Called from RCU critical section */
1024
hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1025
MemoryRegionSection *section,
1027
hwaddr paddr, hwaddr xlat,
1029
target_ulong *address)
1034
if (memory_region_is_ram(section->mr)) {
1036
iotlb = memory_region_get_ram_addr(section->mr) + xlat;
1037
if (!section->readonly) {
1038
iotlb |= PHYS_SECTION_NOTDIRTY;
1040
iotlb |= PHYS_SECTION_ROM;
1043
AddressSpaceDispatch *d;
1045
d = atomic_rcu_read(§ion->address_space->dispatch);
1046
iotlb = section - d->map.sections;
1050
/* Make accesses to pages with watchpoints go via the
1051
watchpoint trap routines. */
1052
QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1053
if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1054
/* Avoid trapping reads of pages with a write breakpoint. */
1055
if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1056
iotlb = PHYS_SECTION_WATCH + paddr;
1057
*address |= TLB_MMIO;
1065
#endif /* defined(CONFIG_USER_ONLY) */
1067
#if !defined(CONFIG_USER_ONLY)
1069
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1071
static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1073
static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1074
qemu_anon_ram_alloc;
1077
* Set a custom physical guest memory alloator.
1078
* Accelerators with unusual needs may need this. Hopefully, we can
1079
* get rid of it eventually.
1081
void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1083
phys_mem_alloc = alloc;
1086
static uint16_t phys_section_add(PhysPageMap *map,
1087
MemoryRegionSection *section)
1089
/* The physical section number is ORed with a page-aligned
1090
* pointer to produce the iotlb entries. Thus it should
1091
* never overflow into the page-aligned value.
1093
assert(map->sections_nb < TARGET_PAGE_SIZE);
1095
if (map->sections_nb == map->sections_nb_alloc) {
1096
map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1097
map->sections = g_renew(MemoryRegionSection, map->sections,
1098
map->sections_nb_alloc);
1100
map->sections[map->sections_nb] = *section;
1101
memory_region_ref(section->mr);
1102
return map->sections_nb++;
1105
static void phys_section_destroy(MemoryRegion *mr)
1107
bool have_sub_page = mr->subpage;
1109
memory_region_unref(mr);
1111
if (have_sub_page) {
1112
subpage_t *subpage = container_of(mr, subpage_t, iomem);
1113
object_unref(OBJECT(&subpage->iomem));
1118
static void phys_sections_free(PhysPageMap *map)
1120
while (map->sections_nb > 0) {
1121
MemoryRegionSection *section = &map->sections[--map->sections_nb];
1122
phys_section_destroy(section->mr);
1124
g_free(map->sections);
1128
static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1131
hwaddr base = section->offset_within_address_space
1133
MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1134
d->map.nodes, d->map.sections);
1135
MemoryRegionSection subsection = {
1136
.offset_within_address_space = base,
1137
.size = int128_make64(TARGET_PAGE_SIZE),
1141
assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1143
if (!(existing->mr->subpage)) {
1144
subpage = subpage_init(d->as, base);
1145
subsection.address_space = d->as;
1146
subsection.mr = &subpage->iomem;
1147
phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1148
phys_section_add(&d->map, &subsection));
1150
subpage = container_of(existing->mr, subpage_t, iomem);
1152
start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1153
end = start + int128_get64(section->size) - 1;
1154
subpage_register(subpage, start, end,
1155
phys_section_add(&d->map, section));
1159
static void register_multipage(AddressSpaceDispatch *d,
1160
MemoryRegionSection *section)
1162
hwaddr start_addr = section->offset_within_address_space;
1163
uint16_t section_index = phys_section_add(&d->map, section);
1164
uint64_t num_pages = int128_get64(int128_rshift(section->size,
1168
phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1171
static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1173
AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1174
AddressSpaceDispatch *d = as->next_dispatch;
1175
MemoryRegionSection now = *section, remain = *section;
1176
Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1178
if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1179
uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1180
- now.offset_within_address_space;
1182
now.size = int128_min(int128_make64(left), now.size);
1183
register_subpage(d, &now);
1185
now.size = int128_zero();
1187
while (int128_ne(remain.size, now.size)) {
1188
remain.size = int128_sub(remain.size, now.size);
1189
remain.offset_within_address_space += int128_get64(now.size);
1190
remain.offset_within_region += int128_get64(now.size);
1192
if (int128_lt(remain.size, page_size)) {
1193
register_subpage(d, &now);
1194
} else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1195
now.size = page_size;
1196
register_subpage(d, &now);
1198
now.size = int128_and(now.size, int128_neg(page_size));
1199
register_multipage(d, &now);
1204
void qemu_flush_coalesced_mmio_buffer(void)
1207
kvm_flush_coalesced_mmio_buffer();
1210
void qemu_mutex_lock_ramlist(void)
1212
qemu_mutex_lock(&ram_list.mutex);
1215
void qemu_mutex_unlock_ramlist(void)
1217
qemu_mutex_unlock(&ram_list.mutex);
1221
static int64_t get_file_size(int fd)
1223
int64_t size = lseek(fd, 0, SEEK_END);
1230
static void *file_ram_alloc(RAMBlock *block,
1235
bool unlink_on_error = false;
1237
char *sanitized_name;
1239
void *area = MAP_FAILED;
1243
if (kvm_enabled() && !kvm_has_sync_mmu()) {
1245
"host lacks kvm mmu notifiers, -mem-path unsupported");
1250
fd = open(path, O_RDWR);
1252
/* @path names an existing file, use it */
1255
if (errno == ENOENT) {
1256
/* @path names a file that doesn't exist, create it */
1257
fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
1259
unlink_on_error = true;
1262
} else if (errno == EISDIR) {
1263
/* @path names a directory, create a file there */
1264
/* Make name safe to use with mkstemp by replacing '/' with '_'. */
1265
sanitized_name = g_strdup(memory_region_name(block->mr));
1266
for (c = sanitized_name; *c != '\0'; c++) {
1272
filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1274
g_free(sanitized_name);
1276
fd = mkstemp(filename);
1284
if (errno != EEXIST && errno != EINTR) {
1285
error_setg_errno(errp, errno,
1286
"can't open backing store %s for guest RAM",
1291
* Try again on EINTR and EEXIST. The latter happens when
1292
* something else creates the file between our two open().
1296
block->page_size = qemu_fd_getpagesize(fd);
1297
block->mr->align = block->page_size;
1298
#if defined(__s390x__)
1299
if (kvm_enabled()) {
1300
block->mr->align = MAX(block->mr->align, QEMU_VMALLOC_ALIGN);
1304
file_size = get_file_size(fd);
1306
if (memory < block->page_size) {
1307
error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1308
"or larger than page size 0x%zx",
1309
memory, block->page_size);
1313
if (file_size > 0 && file_size < memory) {
1314
error_setg(errp, "backing store %s size 0x%" PRIx64
1315
" does not match 'size' option 0x" RAM_ADDR_FMT,
1316
path, file_size, memory);
1320
memory = ROUND_UP(memory, block->page_size);
1323
* ftruncate is not supported by hugetlbfs in older
1324
* hosts, so don't bother bailing out on errors.
1325
* If anything goes wrong with it under other filesystems,
1328
* Do not truncate the non-empty backend file to avoid corrupting
1329
* the existing data in the file. Disabling shrinking is not
1330
* enough. For example, the current vNVDIMM implementation stores
1331
* the guest NVDIMM labels at the end of the backend file. If the
1332
* backend file is later extended, QEMU will not be able to find
1333
* those labels. Therefore, extending the non-empty backend file
1334
* is disabled as well.
1336
if (!file_size && ftruncate(fd, memory)) {
1337
perror("ftruncate");
1340
area = qemu_ram_mmap(fd, memory, block->mr->align,
1341
block->flags & RAM_SHARED);
1342
if (area == MAP_FAILED) {
1343
error_setg_errno(errp, errno,
1344
"unable to map backing store for guest RAM");
1349
os_mem_prealloc(fd, area, memory, errp);
1350
if (errp && *errp) {
1359
if (area != MAP_FAILED) {
1360
qemu_ram_munmap(area, memory);
1362
if (unlink_on_error) {
1372
/* Called with the ramlist lock held. */
1373
static ram_addr_t find_ram_offset(ram_addr_t size)
1375
RAMBlock *block, *next_block;
1376
ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1378
assert(size != 0); /* it would hand out same offset multiple times */
1380
if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1384
QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1385
ram_addr_t end, next = RAM_ADDR_MAX;
1387
end = block->offset + block->max_length;
1389
QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1390
if (next_block->offset >= end) {
1391
next = MIN(next, next_block->offset);
1394
if (next - end >= size && next - end < mingap) {
1396
mingap = next - end;
1400
if (offset == RAM_ADDR_MAX) {
1401
fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1409
ram_addr_t last_ram_offset(void)
1412
ram_addr_t last = 0;
1415
QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1416
last = MAX(last, block->offset + block->max_length);
1422
static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1426
/* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1427
if (!machine_dump_guest_core(current_machine)) {
1428
ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1430
perror("qemu_madvise");
1431
fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1432
"but dump_guest_core=off specified\n");
1437
const char *qemu_ram_get_idstr(RAMBlock *rb)
1442
/* Called with iothread lock held. */
1443
void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
1448
assert(!new_block->idstr[0]);
1451
char *id = qdev_get_dev_path(dev);
1453
snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1457
pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1460
QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1461
if (block != new_block &&
1462
!strcmp(block->idstr, new_block->idstr)) {
1463
fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1471
/* Called with iothread lock held. */
1472
void qemu_ram_unset_idstr(RAMBlock *block)
1474
/* FIXME: arch_init.c assumes that this is not called throughout
1475
* migration. Ignore the problem since hot-unplug during migration
1476
* does not work anyway.
1479
memset(block->idstr, 0, sizeof(block->idstr));
1483
size_t qemu_ram_pagesize(RAMBlock *rb)
1485
return rb->page_size;
1488
static int memory_try_enable_merging(void *addr, size_t len)
1490
if (!machine_mem_merge(current_machine)) {
1491
/* disabled by the user */
1495
return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1498
/* Only legal before guest might have detected the memory size: e.g. on
1499
* incoming migration, or right after reset.
1501
* As memory core doesn't know how is memory accessed, it is up to
1502
* resize callback to update device state and/or add assertions to detect
1503
* misuse, if necessary.
1505
int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
1509
newsize = HOST_PAGE_ALIGN(newsize);
1511
if (block->used_length == newsize) {
1515
if (!(block->flags & RAM_RESIZEABLE)) {
1516
error_setg_errno(errp, EINVAL,
1517
"Length mismatch: %s: 0x" RAM_ADDR_FMT
1518
" in != 0x" RAM_ADDR_FMT, block->idstr,
1519
newsize, block->used_length);
1523
if (block->max_length < newsize) {
1524
error_setg_errno(errp, EINVAL,
1525
"Length too large: %s: 0x" RAM_ADDR_FMT
1526
" > 0x" RAM_ADDR_FMT, block->idstr,
1527
newsize, block->max_length);
1531
cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1532
block->used_length = newsize;
1533
cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1535
memory_region_set_size(block->mr, newsize);
1536
if (block->resized) {
1537
block->resized(block->idstr, newsize, block->host);
1542
/* Called with ram_list.mutex held */
1543
static void dirty_memory_extend(ram_addr_t old_ram_size,
1544
ram_addr_t new_ram_size)
1546
ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
1547
DIRTY_MEMORY_BLOCK_SIZE);
1548
ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
1549
DIRTY_MEMORY_BLOCK_SIZE);
1552
/* Only need to extend if block count increased */
1553
if (new_num_blocks <= old_num_blocks) {
1557
for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1558
DirtyMemoryBlocks *old_blocks;
1559
DirtyMemoryBlocks *new_blocks;
1562
old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
1563
new_blocks = g_malloc(sizeof(*new_blocks) +
1564
sizeof(new_blocks->blocks[0]) * new_num_blocks);
1566
if (old_num_blocks) {
1567
memcpy(new_blocks->blocks, old_blocks->blocks,
1568
old_num_blocks * sizeof(old_blocks->blocks[0]));
1571
for (j = old_num_blocks; j < new_num_blocks; j++) {
1572
new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
1575
atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
1578
g_free_rcu(old_blocks, rcu);
1583
static void ram_block_add(RAMBlock *new_block, Error **errp)
1586
RAMBlock *last_block = NULL;
1587
ram_addr_t old_ram_size, new_ram_size;
1590
old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1592
qemu_mutex_lock_ramlist();
1593
new_block->offset = find_ram_offset(new_block->max_length);
1595
if (!new_block->host) {
1596
if (xen_enabled()) {
1597
xen_ram_alloc(new_block->offset, new_block->max_length,
1598
new_block->mr, &err);
1600
error_propagate(errp, err);
1601
qemu_mutex_unlock_ramlist();
1605
new_block->host = phys_mem_alloc(new_block->max_length,
1606
&new_block->mr->align);
1607
if (!new_block->host) {
1608
error_setg_errno(errp, errno,
1609
"cannot set up guest memory '%s'",
1610
memory_region_name(new_block->mr));
1611
qemu_mutex_unlock_ramlist();
1614
memory_try_enable_merging(new_block->host, new_block->max_length);
1618
new_ram_size = MAX(old_ram_size,
1619
(new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1620
if (new_ram_size > old_ram_size) {
1621
migration_bitmap_extend(old_ram_size, new_ram_size);
1622
dirty_memory_extend(old_ram_size, new_ram_size);
1624
/* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1625
* QLIST (which has an RCU-friendly variant) does not have insertion at
1626
* tail, so save the last element in last_block.
1628
QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1630
if (block->max_length < new_block->max_length) {
1635
QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1636
} else if (last_block) {
1637
QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1638
} else { /* list is empty */
1639
QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1641
ram_list.mru_block = NULL;
1643
/* Write list before version */
1646
qemu_mutex_unlock_ramlist();
1648
cpu_physical_memory_set_dirty_range(new_block->offset,
1649
new_block->used_length,
1652
if (new_block->host) {
1653
qemu_ram_setup_dump(new_block->host, new_block->max_length);
1654
qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1655
/* MADV_DONTFORK is also needed by KVM in absence of synchronous MMU */
1656
qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1661
RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1662
bool share, const char *mem_path,
1665
RAMBlock *new_block;
1666
Error *local_err = NULL;
1668
if (xen_enabled()) {
1669
error_setg(errp, "-mem-path not supported with Xen");
1673
if (phys_mem_alloc != qemu_anon_ram_alloc) {
1675
* file_ram_alloc() needs to allocate just like
1676
* phys_mem_alloc, but we haven't bothered to provide
1680
"-mem-path not supported with this accelerator");
1684
size = HOST_PAGE_ALIGN(size);
1685
new_block = g_malloc0(sizeof(*new_block));
1687
new_block->used_length = size;
1688
new_block->max_length = size;
1689
new_block->flags = share ? RAM_SHARED : 0;
1690
new_block->host = file_ram_alloc(new_block, size,
1692
if (!new_block->host) {
1697
ram_block_add(new_block, &local_err);
1700
error_propagate(errp, local_err);
1708
RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1709
void (*resized)(const char*,
1712
void *host, bool resizeable,
1713
MemoryRegion *mr, Error **errp)
1715
RAMBlock *new_block;
1716
Error *local_err = NULL;
1718
size = HOST_PAGE_ALIGN(size);
1719
max_size = HOST_PAGE_ALIGN(max_size);
1720
new_block = g_malloc0(sizeof(*new_block));
1722
new_block->resized = resized;
1723
new_block->used_length = size;
1724
new_block->max_length = max_size;
1725
assert(max_size >= size);
1727
new_block->page_size = getpagesize();
1728
new_block->host = host;
1730
new_block->flags |= RAM_PREALLOC;
1733
new_block->flags |= RAM_RESIZEABLE;
1735
ram_block_add(new_block, &local_err);
1738
error_propagate(errp, local_err);
1744
RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1745
MemoryRegion *mr, Error **errp)
1747
return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1750
RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1752
return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1755
RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1756
void (*resized)(const char*,
1759
MemoryRegion *mr, Error **errp)
1761
return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1764
static void reclaim_ramblock(RAMBlock *block)
1766
if (block->flags & RAM_PREALLOC) {
1768
} else if (xen_enabled()) {
1769
xen_invalidate_map_cache_entry(block->host);
1771
} else if (block->fd >= 0) {
1772
qemu_ram_munmap(block->host, block->max_length);
1776
qemu_anon_ram_free(block->host, block->max_length);
1781
void qemu_ram_free(RAMBlock *block)
1787
qemu_mutex_lock_ramlist();
1788
QLIST_REMOVE_RCU(block, next);
1789
ram_list.mru_block = NULL;
1790
/* Write list before version */
1793
call_rcu(block, reclaim_ramblock, rcu);
1794
qemu_mutex_unlock_ramlist();
1798
void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1805
QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1806
offset = addr - block->offset;
1807
if (offset < block->max_length) {
1808
vaddr = ramblock_ptr(block, offset);
1809
if (block->flags & RAM_PREALLOC) {
1811
} else if (xen_enabled()) {
1815
if (block->fd >= 0) {
1816
flags |= (block->flags & RAM_SHARED ?
1817
MAP_SHARED : MAP_PRIVATE);
1818
area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1819
flags, block->fd, offset);
1822
* Remap needs to match alloc. Accelerators that
1823
* set phys_mem_alloc never remap. If they did,
1824
* we'd need a remap hook here.
1826
assert(phys_mem_alloc == qemu_anon_ram_alloc);
1828
flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1829
area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1832
if (area != vaddr) {
1833
fprintf(stderr, "Could not remap addr: "
1834
RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1838
memory_try_enable_merging(vaddr, length);
1839
qemu_ram_setup_dump(vaddr, length);
1844
#endif /* !_WIN32 */
1846
/* Return a host pointer to ram allocated with qemu_ram_alloc.
1847
* This should not be used for general purpose DMA. Use address_space_map
1848
* or address_space_rw instead. For local memory (e.g. video ram) that the
1849
* device owns, use memory_region_get_ram_ptr.
1851
* Called within RCU critical section.
1853
void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
1855
RAMBlock *block = ram_block;
1857
if (block == NULL) {
1858
block = qemu_get_ram_block(addr);
1859
addr -= block->offset;
1862
if (xen_enabled() && block->host == NULL) {
1863
/* We need to check if the requested address is in the RAM
1864
* because we don't want to map the entire memory in QEMU.
1865
* In that case just map until the end of the page.
1867
if (block->offset == 0) {
1868
return xen_map_cache(addr, 0, 0);
1871
block->host = xen_map_cache(block->offset, block->max_length, 1);
1873
return ramblock_ptr(block, addr);
1876
/* Return a host pointer to guest's ram. Similar to qemu_map_ram_ptr
1877
* but takes a size argument.
1879
* Called within RCU critical section.
1881
static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
1884
RAMBlock *block = ram_block;
1889
if (block == NULL) {
1890
block = qemu_get_ram_block(addr);
1891
addr -= block->offset;
1893
*size = MIN(*size, block->max_length - addr);
1895
if (xen_enabled() && block->host == NULL) {
1896
/* We need to check if the requested address is in the RAM
1897
* because we don't want to map the entire memory in QEMU.
1898
* In that case just map the requested area.
1900
if (block->offset == 0) {
1901
return xen_map_cache(addr, *size, 1);
1904
block->host = xen_map_cache(block->offset, block->max_length, 1);
1907
return ramblock_ptr(block, addr);
1911
* Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1914
* ptr: Host pointer to look up
1915
* round_offset: If true round the result offset down to a page boundary
1916
* *ram_addr: set to result ram_addr
1917
* *offset: set to result offset within the RAMBlock
1919
* Returns: RAMBlock (or NULL if not found)
1921
* By the time this function returns, the returned pointer is not protected
1922
* by RCU anymore. If the caller is not within an RCU critical section and
1923
* does not hold the iothread lock, it must have other means of protecting the
1924
* pointer, such as a reference to the region that includes the incoming
1927
RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1931
uint8_t *host = ptr;
1933
if (xen_enabled()) {
1934
ram_addr_t ram_addr;
1936
ram_addr = xen_ram_addr_from_mapcache(ptr);
1937
block = qemu_get_ram_block(ram_addr);
1939
*offset = ram_addr - block->offset;
1946
block = atomic_rcu_read(&ram_list.mru_block);
1947
if (block && block->host && host - block->host < block->max_length) {
1951
QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1952
/* This case append when the block is not mapped. */
1953
if (block->host == NULL) {
1956
if (host - block->host < block->max_length) {
1965
*offset = (host - block->host);
1967
*offset &= TARGET_PAGE_MASK;
1974
* Finds the named RAMBlock
1976
* name: The name of RAMBlock to find
1978
* Returns: RAMBlock (or NULL if not found)
1980
RAMBlock *qemu_ram_block_by_name(const char *name)
1984
QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1985
if (!strcmp(name, block->idstr)) {
1993
/* Some of the softmmu routines need to translate from a host pointer
1994
(typically a TLB entry) back to a ram offset. */
1995
ram_addr_t qemu_ram_addr_from_host(void *ptr)
2000
block = qemu_ram_block_from_host(ptr, false, &offset);
2002
return RAM_ADDR_INVALID;
2005
return block->offset + offset;
2008
/* Called within RCU critical section. */
2009
static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2010
uint64_t val, unsigned size)
2012
bool locked = false;
2014
if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
2017
tb_invalidate_phys_page_fast(ram_addr, size);
2021
stb_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2024
stw_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2027
stl_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2037
/* Set both VGA and migration bits for simplicity and to remove
2038
* the notdirty callback faster.
2040
cpu_physical_memory_set_dirty_range(ram_addr, size,
2041
DIRTY_CLIENTS_NOCODE);
2042
/* we remove the notdirty callback only if the code has been
2044
if (!cpu_physical_memory_is_clean(ram_addr)) {
2045
tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2049
static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2050
unsigned size, bool is_write)
2055
static const MemoryRegionOps notdirty_mem_ops = {
2056
.write = notdirty_mem_write,
2057
.valid.accepts = notdirty_mem_accepts,
2058
.endianness = DEVICE_NATIVE_ENDIAN,
2061
/* Generate a debug exception if a watchpoint has been hit. */
2062
static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2064
CPUState *cpu = current_cpu;
2065
CPUClass *cc = CPU_GET_CLASS(cpu);
2066
CPUArchState *env = cpu->env_ptr;
2067
target_ulong pc, cs_base;
2072
if (cpu->watchpoint_hit) {
2073
/* We re-entered the check after replacing the TB. Now raise
2074
* the debug interrupt so that is will trigger after the
2075
* current instruction. */
2076
cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2079
vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2080
QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2081
if (cpu_watchpoint_address_matches(wp, vaddr, len)
2082
&& (wp->flags & flags)) {
2083
if (flags == BP_MEM_READ) {
2084
wp->flags |= BP_WATCHPOINT_HIT_READ;
2086
wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2088
wp->hitaddr = vaddr;
2089
wp->hitattrs = attrs;
2090
if (!cpu->watchpoint_hit) {
2091
if (wp->flags & BP_CPU &&
2092
!cc->debug_check_watchpoint(cpu, wp)) {
2093
wp->flags &= ~BP_WATCHPOINT_HIT;
2096
cpu->watchpoint_hit = wp;
2098
/* The tb_lock will be reset when cpu_loop_exit or
2099
* cpu_loop_exit_noexc longjmp back into the cpu_exec
2103
tb_check_watchpoint(cpu);
2104
if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2105
cpu->exception_index = EXCP_DEBUG;
2108
cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2109
tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2110
cpu_loop_exit_noexc(cpu);
2114
wp->flags &= ~BP_WATCHPOINT_HIT;
2119
/* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2120
so these check for a hit then pass through to the normal out-of-line
2122
static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2123
unsigned size, MemTxAttrs attrs)
2127
int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2128
AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2130
check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2133
data = address_space_ldub(as, addr, attrs, &res);
2136
data = address_space_lduw(as, addr, attrs, &res);
2139
data = address_space_ldl(as, addr, attrs, &res);
2147
static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2148
uint64_t val, unsigned size,
2152
int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2153
AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2155
check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2158
address_space_stb(as, addr, val, attrs, &res);
2161
address_space_stw(as, addr, val, attrs, &res);
2164
address_space_stl(as, addr, val, attrs, &res);
2171
static const MemoryRegionOps watch_mem_ops = {
2172
.read_with_attrs = watch_mem_read,
2173
.write_with_attrs = watch_mem_write,
2174
.endianness = DEVICE_NATIVE_ENDIAN,
2177
static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2178
unsigned len, MemTxAttrs attrs)
2180
subpage_t *subpage = opaque;
2184
#if defined(DEBUG_SUBPAGE)
2185
printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2186
subpage, len, addr);
2188
res = address_space_read(subpage->as, addr + subpage->base,
2195
*data = ldub_p(buf);
2198
*data = lduw_p(buf);
2211
static MemTxResult subpage_write(void *opaque, hwaddr addr,
2212
uint64_t value, unsigned len, MemTxAttrs attrs)
2214
subpage_t *subpage = opaque;
2217
#if defined(DEBUG_SUBPAGE)
2218
printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2219
" value %"PRIx64"\n",
2220
__func__, subpage, len, addr, value);
2238
return address_space_write(subpage->as, addr + subpage->base,
2242
static bool subpage_accepts(void *opaque, hwaddr addr,
2243
unsigned len, bool is_write)
2245
subpage_t *subpage = opaque;
2246
#if defined(DEBUG_SUBPAGE)
2247
printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2248
__func__, subpage, is_write ? 'w' : 'r', len, addr);
2251
return address_space_access_valid(subpage->as, addr + subpage->base,
2255
static const MemoryRegionOps subpage_ops = {
2256
.read_with_attrs = subpage_read,
2257
.write_with_attrs = subpage_write,
2258
.impl.min_access_size = 1,
2259
.impl.max_access_size = 8,
2260
.valid.min_access_size = 1,
2261
.valid.max_access_size = 8,
2262
.valid.accepts = subpage_accepts,
2263
.endianness = DEVICE_NATIVE_ENDIAN,
2266
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2271
if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2273
idx = SUBPAGE_IDX(start);
2274
eidx = SUBPAGE_IDX(end);
2275
#if defined(DEBUG_SUBPAGE)
2276
printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2277
__func__, mmio, start, end, idx, eidx, section);
2279
for (; idx <= eidx; idx++) {
2280
mmio->sub_section[idx] = section;
2286
static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2290
mmio = g_malloc0(sizeof(subpage_t) + TARGET_PAGE_SIZE * sizeof(uint16_t));
2293
memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2294
NULL, TARGET_PAGE_SIZE);
2295
mmio->iomem.subpage = true;
2296
#if defined(DEBUG_SUBPAGE)
2297
printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2298
mmio, base, TARGET_PAGE_SIZE);
2300
subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2305
static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2309
MemoryRegionSection section = {
2310
.address_space = as,
2312
.offset_within_address_space = 0,
2313
.offset_within_region = 0,
2314
.size = int128_2_64(),
2317
return phys_section_add(map, §ion);
2320
MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2322
int asidx = cpu_asidx_from_attrs(cpu, attrs);
2323
CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2324
AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2325
MemoryRegionSection *sections = d->map.sections;
2327
return sections[index & ~TARGET_PAGE_MASK].mr;
2330
static void io_mem_init(void)
2332
memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2333
memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2335
memory_region_init_io(&io_mem_notdirty, NULL, ¬dirty_mem_ops, NULL,
2337
memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2341
static void mem_begin(MemoryListener *listener)
2343
AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2344
AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2347
n = dummy_section(&d->map, as, &io_mem_unassigned);
2348
assert(n == PHYS_SECTION_UNASSIGNED);
2349
n = dummy_section(&d->map, as, &io_mem_notdirty);
2350
assert(n == PHYS_SECTION_NOTDIRTY);
2351
n = dummy_section(&d->map, as, &io_mem_rom);
2352
assert(n == PHYS_SECTION_ROM);
2353
n = dummy_section(&d->map, as, &io_mem_watch);
2354
assert(n == PHYS_SECTION_WATCH);
2356
d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2358
as->next_dispatch = d;
2361
static void address_space_dispatch_free(AddressSpaceDispatch *d)
2363
phys_sections_free(&d->map);
2367
static void mem_commit(MemoryListener *listener)
2369
AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2370
AddressSpaceDispatch *cur = as->dispatch;
2371
AddressSpaceDispatch *next = as->next_dispatch;
2373
phys_page_compact_all(next, next->map.nodes_nb);
2375
atomic_rcu_set(&as->dispatch, next);
2377
call_rcu(cur, address_space_dispatch_free, rcu);
2381
static void tcg_commit(MemoryListener *listener)
2383
CPUAddressSpace *cpuas;
2384
AddressSpaceDispatch *d;
2386
/* since each CPU stores ram addresses in its TLB cache, we must
2387
reset the modified entries */
2388
cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2389
cpu_reloading_memory_map();
2390
/* The CPU and TLB are protected by the iothread lock.
2391
* We reload the dispatch pointer now because cpu_reloading_memory_map()
2392
* may have split the RCU critical section.
2394
d = atomic_rcu_read(&cpuas->as->dispatch);
2395
atomic_rcu_set(&cpuas->memory_dispatch, d);
2396
tlb_flush(cpuas->cpu, 1);
2399
void address_space_init_dispatch(AddressSpace *as)
2401
as->dispatch = NULL;
2402
as->dispatch_listener = (MemoryListener) {
2404
.commit = mem_commit,
2405
.region_add = mem_add,
2406
.region_nop = mem_add,
2409
memory_listener_register(&as->dispatch_listener, as);
2412
void address_space_unregister(AddressSpace *as)
2414
memory_listener_unregister(&as->dispatch_listener);
2417
void address_space_destroy_dispatch(AddressSpace *as)
2419
AddressSpaceDispatch *d = as->dispatch;
2421
atomic_rcu_set(&as->dispatch, NULL);
2423
call_rcu(d, address_space_dispatch_free, rcu);
2427
static void memory_map_init(void)
2429
system_memory = g_malloc(sizeof(*system_memory));
2431
memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2432
address_space_init(&address_space_memory, system_memory, "memory");
2434
system_io = g_malloc(sizeof(*system_io));
2435
memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2437
address_space_init(&address_space_io, system_io, "I/O");
2440
MemoryRegion *get_system_memory(void)
2442
return system_memory;
2445
MemoryRegion *get_system_io(void)
2450
#endif /* !defined(CONFIG_USER_ONLY) */
2452
/* physical memory access (slow version, mainly for debug) */
2453
#if defined(CONFIG_USER_ONLY)
2454
int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2455
uint8_t *buf, int len, int is_write)
2462
page = addr & TARGET_PAGE_MASK;
2463
l = (page + TARGET_PAGE_SIZE) - addr;
2466
flags = page_get_flags(page);
2467
if (!(flags & PAGE_VALID))
2470
if (!(flags & PAGE_WRITE))
2472
/* XXX: this code should not depend on lock_user */
2473
if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2476
unlock_user(p, addr, l);
2478
if (!(flags & PAGE_READ))
2480
/* XXX: this code should not depend on lock_user */
2481
if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2484
unlock_user(p, addr, 0);
2495
static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2498
uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2499
addr += memory_region_get_ram_addr(mr);
2501
/* No early return if dirty_log_mask is or becomes 0, because
2502
* cpu_physical_memory_set_dirty_range will still call
2503
* xen_modified_memory.
2505
if (dirty_log_mask) {
2507
cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2509
if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2511
tb_invalidate_phys_range(addr, addr + length);
2513
dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2515
cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2518
static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2520
unsigned access_size_max = mr->ops->valid.max_access_size;
2522
/* Regions are assumed to support 1-4 byte accesses unless
2523
otherwise specified. */
2524
if (access_size_max == 0) {
2525
access_size_max = 4;
2528
/* Bound the maximum access by the alignment of the address. */
2529
if (!mr->ops->impl.unaligned) {
2530
unsigned align_size_max = addr & -addr;
2531
if (align_size_max != 0 && align_size_max < access_size_max) {
2532
access_size_max = align_size_max;
2536
/* Don't attempt accesses larger than the maximum. */
2537
if (l > access_size_max) {
2538
l = access_size_max;
2545
static bool prepare_mmio_access(MemoryRegion *mr)
2547
bool unlocked = !qemu_mutex_iothread_locked();
2548
bool release_lock = false;
2550
if (unlocked && mr->global_locking) {
2551
qemu_mutex_lock_iothread();
2553
release_lock = true;
2555
if (mr->flush_coalesced_mmio) {
2557
qemu_mutex_lock_iothread();
2559
qemu_flush_coalesced_mmio_buffer();
2561
qemu_mutex_unlock_iothread();
2565
return release_lock;
2568
/* Called within RCU critical section. */
2569
static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2572
int len, hwaddr addr1,
2573
hwaddr l, MemoryRegion *mr)
2577
MemTxResult result = MEMTX_OK;
2578
bool release_lock = false;
2581
if (!memory_access_is_direct(mr, true)) {
2582
release_lock |= prepare_mmio_access(mr);
2583
l = memory_access_size(mr, l, addr1);
2584
/* XXX: could force current_cpu to NULL to avoid
2588
/* 64 bit write access */
2590
result |= memory_region_dispatch_write(mr, addr1, val, 8,
2594
/* 32 bit write access */
2596
result |= memory_region_dispatch_write(mr, addr1, val, 4,
2600
/* 16 bit write access */
2602
result |= memory_region_dispatch_write(mr, addr1, val, 2,
2606
/* 8 bit write access */
2608
result |= memory_region_dispatch_write(mr, addr1, val, 1,
2616
ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2617
memcpy(ptr, buf, l);
2618
invalidate_and_set_dirty(mr, addr1, l);
2622
qemu_mutex_unlock_iothread();
2623
release_lock = false;
2635
mr = address_space_translate(as, addr, &addr1, &l, true);
2641
MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2642
const uint8_t *buf, int len)
2647
MemTxResult result = MEMTX_OK;
2652
mr = address_space_translate(as, addr, &addr1, &l, true);
2653
result = address_space_write_continue(as, addr, attrs, buf, len,
2661
/* Called within RCU critical section. */
2662
MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2663
MemTxAttrs attrs, uint8_t *buf,
2664
int len, hwaddr addr1, hwaddr l,
2669
MemTxResult result = MEMTX_OK;
2670
bool release_lock = false;
2673
if (!memory_access_is_direct(mr, false)) {
2675
release_lock |= prepare_mmio_access(mr);
2676
l = memory_access_size(mr, l, addr1);
2679
/* 64 bit read access */
2680
result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2685
/* 32 bit read access */
2686
result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2691
/* 16 bit read access */
2692
result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2697
/* 8 bit read access */
2698
result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2707
ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2708
memcpy(buf, ptr, l);
2712
qemu_mutex_unlock_iothread();
2713
release_lock = false;
2725
mr = address_space_translate(as, addr, &addr1, &l, false);
2731
MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2732
MemTxAttrs attrs, uint8_t *buf, int len)
2737
MemTxResult result = MEMTX_OK;
2742
mr = address_space_translate(as, addr, &addr1, &l, false);
2743
result = address_space_read_continue(as, addr, attrs, buf, len,
2751
MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2752
uint8_t *buf, int len, bool is_write)
2755
return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2757
return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2761
void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2762
int len, int is_write)
2764
address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2765
buf, len, is_write);
2768
enum write_rom_type {
2773
static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2774
hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2784
mr = address_space_translate(as, addr, &addr1, &l, true);
2786
if (!(memory_region_is_ram(mr) ||
2787
memory_region_is_romd(mr))) {
2788
l = memory_access_size(mr, l, addr1);
2791
ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2794
memcpy(ptr, buf, l);
2795
invalidate_and_set_dirty(mr, addr1, l);
2798
flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2809
/* used for ROM loading : can write in RAM and ROM */
2810
void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2811
const uint8_t *buf, int len)
2813
cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2816
void cpu_flush_icache_range(hwaddr start, int len)
2819
* This function should do the same thing as an icache flush that was
2820
* triggered from within the guest. For TCG we are always cache coherent,
2821
* so there is no need to flush anything. For KVM / Xen we need to flush
2822
* the host's instruction cache at least.
2824
if (tcg_enabled()) {
2828
cpu_physical_memory_write_rom_internal(&address_space_memory,
2829
start, NULL, len, FLUSH_CACHE);
2840
static BounceBuffer bounce;
2842
typedef struct MapClient {
2844
QLIST_ENTRY(MapClient) link;
2847
QemuMutex map_client_list_lock;
2848
static QLIST_HEAD(map_client_list, MapClient) map_client_list
2849
= QLIST_HEAD_INITIALIZER(map_client_list);
2851
static void cpu_unregister_map_client_do(MapClient *client)
2853
QLIST_REMOVE(client, link);
2857
static void cpu_notify_map_clients_locked(void)
2861
while (!QLIST_EMPTY(&map_client_list)) {
2862
client = QLIST_FIRST(&map_client_list);
2863
qemu_bh_schedule(client->bh);
2864
cpu_unregister_map_client_do(client);
2868
void cpu_register_map_client(QEMUBH *bh)
2870
MapClient *client = g_malloc(sizeof(*client));
2872
qemu_mutex_lock(&map_client_list_lock);
2874
QLIST_INSERT_HEAD(&map_client_list, client, link);
2875
if (!atomic_read(&bounce.in_use)) {
2876
cpu_notify_map_clients_locked();
2878
qemu_mutex_unlock(&map_client_list_lock);
2881
void cpu_exec_init_all(void)
2883
qemu_mutex_init(&ram_list.mutex);
2884
/* The data structures we set up here depend on knowing the page size,
2885
* so no more changes can be made after this point.
2886
* In an ideal world, nothing we did before we had finished the
2887
* machine setup would care about the target page size, and we could
2888
* do this much later, rather than requiring board models to state
2889
* up front what their requirements are.
2891
finalize_target_page_bits();
2894
qemu_mutex_init(&map_client_list_lock);
2897
void cpu_unregister_map_client(QEMUBH *bh)
2901
qemu_mutex_lock(&map_client_list_lock);
2902
QLIST_FOREACH(client, &map_client_list, link) {
2903
if (client->bh == bh) {
2904
cpu_unregister_map_client_do(client);
2908
qemu_mutex_unlock(&map_client_list_lock);
2911
static void cpu_notify_map_clients(void)
2913
qemu_mutex_lock(&map_client_list_lock);
2914
cpu_notify_map_clients_locked();
2915
qemu_mutex_unlock(&map_client_list_lock);
2918
bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2926
mr = address_space_translate(as, addr, &xlat, &l, is_write);
2927
if (!memory_access_is_direct(mr, is_write)) {
2928
l = memory_access_size(mr, l, addr);
2929
if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2941
/* Map a physical memory region into a host virtual address.
2942
* May map a subset of the requested range, given by and returned in *plen.
2943
* May return NULL if resources needed to perform the mapping are exhausted.
2944
* Use only for reads OR writes - not for read-modify-write operations.
2945
* Use cpu_register_map_client() to know when retrying the map operation is
2946
* likely to succeed.
2948
void *address_space_map(AddressSpace *as,
2955
hwaddr l, xlat, base;
2956
MemoryRegion *mr, *this_mr;
2965
mr = address_space_translate(as, addr, &xlat, &l, is_write);
2967
if (!memory_access_is_direct(mr, is_write)) {
2968
if (atomic_xchg(&bounce.in_use, true)) {
2972
/* Avoid unbounded allocations */
2973
l = MIN(l, TARGET_PAGE_SIZE);
2974
bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2978
memory_region_ref(mr);
2981
address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2987
return bounce.buffer;
3001
this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
3002
if (this_mr != mr || xlat != base + done) {
3007
memory_region_ref(mr);
3009
ptr = qemu_ram_ptr_length(mr->ram_block, base, plen);
3015
/* Unmaps a memory region previously mapped by address_space_map().
3016
* Will also mark the memory as dirty if is_write == 1. access_len gives
3017
* the amount of memory that was actually read or written by the caller.
3019
void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3020
int is_write, hwaddr access_len)
3022
if (buffer != bounce.buffer) {
3026
mr = memory_region_from_host(buffer, &addr1);
3029
invalidate_and_set_dirty(mr, addr1, access_len);
3031
if (xen_enabled()) {
3032
xen_invalidate_map_cache_entry(buffer);
3034
memory_region_unref(mr);
3038
address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
3039
bounce.buffer, access_len);
3041
qemu_vfree(bounce.buffer);
3042
bounce.buffer = NULL;
3043
memory_region_unref(bounce.mr);
3044
atomic_mb_set(&bounce.in_use, false);
3045
cpu_notify_map_clients();
3048
void *cpu_physical_memory_map(hwaddr addr,
3052
return address_space_map(&address_space_memory, addr, plen, is_write);
3055
void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3056
int is_write, hwaddr access_len)
3058
return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3061
/* warning: addr must be aligned */
3062
static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
3064
MemTxResult *result,
3065
enum device_endian endian)
3073
bool release_lock = false;
3076
mr = address_space_translate(as, addr, &addr1, &l, false);
3077
if (l < 4 || !memory_access_is_direct(mr, false)) {
3078
release_lock |= prepare_mmio_access(mr);
3081
r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3082
#if defined(TARGET_WORDS_BIGENDIAN)
3083
if (endian == DEVICE_LITTLE_ENDIAN) {
3087
if (endian == DEVICE_BIG_ENDIAN) {
3093
ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3095
case DEVICE_LITTLE_ENDIAN:
3096
val = ldl_le_p(ptr);
3098
case DEVICE_BIG_ENDIAN:
3099
val = ldl_be_p(ptr);
3111
qemu_mutex_unlock_iothread();
3117
uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3118
MemTxAttrs attrs, MemTxResult *result)
3120
return address_space_ldl_internal(as, addr, attrs, result,
3121
DEVICE_NATIVE_ENDIAN);
3124
uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3125
MemTxAttrs attrs, MemTxResult *result)
3127
return address_space_ldl_internal(as, addr, attrs, result,
3128
DEVICE_LITTLE_ENDIAN);
3131
uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3132
MemTxAttrs attrs, MemTxResult *result)
3134
return address_space_ldl_internal(as, addr, attrs, result,
3138
uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3140
return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3143
uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3145
return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3148
uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3150
return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3153
/* warning: addr must be aligned */
3154
static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3156
MemTxResult *result,
3157
enum device_endian endian)
3165
bool release_lock = false;
3168
mr = address_space_translate(as, addr, &addr1, &l,
3170
if (l < 8 || !memory_access_is_direct(mr, false)) {
3171
release_lock |= prepare_mmio_access(mr);
3174
r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3175
#if defined(TARGET_WORDS_BIGENDIAN)
3176
if (endian == DEVICE_LITTLE_ENDIAN) {
3180
if (endian == DEVICE_BIG_ENDIAN) {
3186
ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3188
case DEVICE_LITTLE_ENDIAN:
3189
val = ldq_le_p(ptr);
3191
case DEVICE_BIG_ENDIAN:
3192
val = ldq_be_p(ptr);
3204
qemu_mutex_unlock_iothread();
3210
uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3211
MemTxAttrs attrs, MemTxResult *result)
3213
return address_space_ldq_internal(as, addr, attrs, result,
3214
DEVICE_NATIVE_ENDIAN);
3217
uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3218
MemTxAttrs attrs, MemTxResult *result)
3220
return address_space_ldq_internal(as, addr, attrs, result,
3221
DEVICE_LITTLE_ENDIAN);
3224
uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3225
MemTxAttrs attrs, MemTxResult *result)
3227
return address_space_ldq_internal(as, addr, attrs, result,
3231
uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3233
return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3236
uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3238
return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3241
uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3243
return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3247
uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3248
MemTxAttrs attrs, MemTxResult *result)
3253
r = address_space_rw(as, addr, attrs, &val, 1, 0);
3260
uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3262
return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3265
/* warning: addr must be aligned */
3266
static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3269
MemTxResult *result,
3270
enum device_endian endian)
3278
bool release_lock = false;
3281
mr = address_space_translate(as, addr, &addr1, &l,
3283
if (l < 2 || !memory_access_is_direct(mr, false)) {
3284
release_lock |= prepare_mmio_access(mr);
3287
r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3288
#if defined(TARGET_WORDS_BIGENDIAN)
3289
if (endian == DEVICE_LITTLE_ENDIAN) {
3293
if (endian == DEVICE_BIG_ENDIAN) {
3299
ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3301
case DEVICE_LITTLE_ENDIAN:
3302
val = lduw_le_p(ptr);
3304
case DEVICE_BIG_ENDIAN:
3305
val = lduw_be_p(ptr);
3317
qemu_mutex_unlock_iothread();
3323
uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3324
MemTxAttrs attrs, MemTxResult *result)
3326
return address_space_lduw_internal(as, addr, attrs, result,
3327
DEVICE_NATIVE_ENDIAN);
3330
uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3331
MemTxAttrs attrs, MemTxResult *result)
3333
return address_space_lduw_internal(as, addr, attrs, result,
3334
DEVICE_LITTLE_ENDIAN);
3337
uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3338
MemTxAttrs attrs, MemTxResult *result)
3340
return address_space_lduw_internal(as, addr, attrs, result,
3344
uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3346
return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3349
uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3351
return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3354
uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3356
return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3359
/* warning: addr must be aligned. The ram page is not masked as dirty
3360
and the code inside is not invalidated. It is useful if the dirty
3361
bits are used to track modified PTEs */
3362
void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3363
MemTxAttrs attrs, MemTxResult *result)
3370
uint8_t dirty_log_mask;
3371
bool release_lock = false;
3374
mr = address_space_translate(as, addr, &addr1, &l,
3376
if (l < 4 || !memory_access_is_direct(mr, true)) {
3377
release_lock |= prepare_mmio_access(mr);
3379
r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3381
ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3384
dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3385
dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3386
cpu_physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr,
3394
qemu_mutex_unlock_iothread();
3399
void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3401
address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3404
/* warning: addr must be aligned */
3405
static inline void address_space_stl_internal(AddressSpace *as,
3406
hwaddr addr, uint32_t val,
3408
MemTxResult *result,
3409
enum device_endian endian)
3416
bool release_lock = false;
3419
mr = address_space_translate(as, addr, &addr1, &l,
3421
if (l < 4 || !memory_access_is_direct(mr, true)) {
3422
release_lock |= prepare_mmio_access(mr);
3424
#if defined(TARGET_WORDS_BIGENDIAN)
3425
if (endian == DEVICE_LITTLE_ENDIAN) {
3429
if (endian == DEVICE_BIG_ENDIAN) {
3433
r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3436
ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3438
case DEVICE_LITTLE_ENDIAN:
3441
case DEVICE_BIG_ENDIAN:
3448
invalidate_and_set_dirty(mr, addr1, 4);
3455
qemu_mutex_unlock_iothread();
3460
void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3461
MemTxAttrs attrs, MemTxResult *result)
3463
address_space_stl_internal(as, addr, val, attrs, result,
3464
DEVICE_NATIVE_ENDIAN);
3467
void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3468
MemTxAttrs attrs, MemTxResult *result)
3470
address_space_stl_internal(as, addr, val, attrs, result,
3471
DEVICE_LITTLE_ENDIAN);
3474
void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3475
MemTxAttrs attrs, MemTxResult *result)
3477
address_space_stl_internal(as, addr, val, attrs, result,
3481
void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3483
address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3486
void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3488
address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3491
void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3493
address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3497
void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3498
MemTxAttrs attrs, MemTxResult *result)
3503
r = address_space_rw(as, addr, attrs, &v, 1, 1);
3509
void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3511
address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3514
/* warning: addr must be aligned */
3515
static inline void address_space_stw_internal(AddressSpace *as,
3516
hwaddr addr, uint32_t val,
3518
MemTxResult *result,
3519
enum device_endian endian)
3526
bool release_lock = false;
3529
mr = address_space_translate(as, addr, &addr1, &l, true);
3530
if (l < 2 || !memory_access_is_direct(mr, true)) {
3531
release_lock |= prepare_mmio_access(mr);
3533
#if defined(TARGET_WORDS_BIGENDIAN)
3534
if (endian == DEVICE_LITTLE_ENDIAN) {
3538
if (endian == DEVICE_BIG_ENDIAN) {
3542
r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3545
ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3547
case DEVICE_LITTLE_ENDIAN:
3550
case DEVICE_BIG_ENDIAN:
3557
invalidate_and_set_dirty(mr, addr1, 2);
3564
qemu_mutex_unlock_iothread();
3569
void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3570
MemTxAttrs attrs, MemTxResult *result)
3572
address_space_stw_internal(as, addr, val, attrs, result,
3573
DEVICE_NATIVE_ENDIAN);
3576
void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3577
MemTxAttrs attrs, MemTxResult *result)
3579
address_space_stw_internal(as, addr, val, attrs, result,
3580
DEVICE_LITTLE_ENDIAN);
3583
void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3584
MemTxAttrs attrs, MemTxResult *result)
3586
address_space_stw_internal(as, addr, val, attrs, result,
3590
void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3592
address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3595
void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3597
address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3600
void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3602
address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3606
void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3607
MemTxAttrs attrs, MemTxResult *result)
3611
r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3617
void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3618
MemTxAttrs attrs, MemTxResult *result)
3621
val = cpu_to_le64(val);
3622
r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3627
void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3628
MemTxAttrs attrs, MemTxResult *result)
3631
val = cpu_to_be64(val);
3632
r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3638
void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3640
address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3643
void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3645
address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3648
void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3650
address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3653
/* virtual memory access for debug (includes writing to ROM) */
3654
int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3655
uint8_t *buf, int len, int is_write)
3665
page = addr & TARGET_PAGE_MASK;
3666
phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3667
asidx = cpu_asidx_from_attrs(cpu, attrs);
3668
/* if no physical page mapped, return an error */
3669
if (phys_addr == -1)
3671
l = (page + TARGET_PAGE_SIZE) - addr;
3674
phys_addr += (addr & ~TARGET_PAGE_MASK);
3676
cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
3679
address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
3680
MEMTXATTRS_UNSPECIFIED,
3691
* Allows code that needs to deal with migration bitmaps etc to still be built
3692
* target independent.
3694
size_t qemu_target_page_bits(void)
3696
return TARGET_PAGE_BITS;
3702
* A helper function for the _utterly broken_ virtio device model to find out if
3703
* it's running on a big endian machine. Don't do this at home kids!
3705
bool target_words_bigendian(void);
3706
bool target_words_bigendian(void)
3708
#if defined(TARGET_WORDS_BIGENDIAN)
3715
#ifndef CONFIG_USER_ONLY
3716
bool cpu_physical_memory_is_io(hwaddr phys_addr)
3723
mr = address_space_translate(&address_space_memory,
3724
phys_addr, &phys_addr, &l, false);
3726
res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3731
int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3737
QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3738
ret = func(block->idstr, block->host, block->offset,
3739
block->used_length, opaque);