1
/******************************************************************************
4
* x86 specific paging support
5
* Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
6
* Copyright (c) 2007 XenSource Inc.
8
* This program is free software; you can redistribute it and/or modify
9
* it under the terms of the GNU General Public License as published by
10
* the Free Software Foundation; either version 2 of the License, or
11
* (at your option) any later version.
13
* This program is distributed in the hope that it will be useful,
14
* but WITHOUT ANY WARRANTY; without even the implied warranty of
15
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
* GNU General Public License for more details.
18
* You should have received a copy of the GNU General Public License
19
* along with this program; if not, write to the Free Software
20
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24
#include <asm/paging.h>
25
#include <asm/shadow.h>
28
#include <asm/guest_access.h>
32
#define hap_enabled(d) (is_hvm_domain(d) && (d)->arch.hvm_domain.hap_enabled)
35
#define PAGING_PRINTK(_f, _a...) \
36
debugtrace_printk("pg: %s(): " _f, __func__, ##_a)
37
#define PAGING_ERROR(_f, _a...) \
38
printk("pg error: %s(): " _f, __func__, ##_a)
39
#define PAGING_DEBUG(flag, _f, _a...) \
41
if (PAGING_DEBUG_ ## flag) \
42
debugtrace_printk("pgdebug: %s(): " _f, __func__, ##_a); \
45
/************************************************/
46
/* LOG DIRTY SUPPORT */
47
/************************************************/
48
/* Override macros from asm/page.h to make them work with mfn_t */
50
#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
52
#define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn))
54
#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
56
/* The log-dirty lock. This protects the log-dirty bitmap from
57
* concurrent accesses (and teardowns, etc).
59
* Locking discipline: always acquire shadow or HAP lock before this one.
61
* Because mark_dirty is called from a lot of places, the log-dirty lock
62
* may be acquired with the shadow or HAP locks already held. When the
63
* log-dirty code makes callbacks into HAP or shadow code to reset
64
* various traps that will trigger the mark_dirty calls, it must *not*
65
* have the log-dirty lock held, or it risks deadlock. Because the only
66
* purpose of those calls is to make sure that *guest* actions will
67
* cause mark_dirty to be called (hypervisor actions explictly call it
68
* anyway), it is safe to release the log-dirty lock before the callback
69
* as long as the domain is paused for the entire operation. */
71
#define log_dirty_lock_init(_d) \
73
spin_lock_init(&(_d)->arch.paging.log_dirty.lock); \
74
(_d)->arch.paging.log_dirty.locker = -1; \
75
(_d)->arch.paging.log_dirty.locker_function = "nobody"; \
78
#define log_dirty_lock(_d) \
80
if (unlikely((_d)->arch.paging.log_dirty.locker==current->processor))\
82
printk("Error: paging log dirty lock held by %s\n", \
83
(_d)->arch.paging.log_dirty.locker_function); \
86
spin_lock(&(_d)->arch.paging.log_dirty.lock); \
87
ASSERT((_d)->arch.paging.log_dirty.locker == -1); \
88
(_d)->arch.paging.log_dirty.locker = current->processor; \
89
(_d)->arch.paging.log_dirty.locker_function = __func__; \
92
#define log_dirty_unlock(_d) \
94
ASSERT((_d)->arch.paging.log_dirty.locker == current->processor); \
95
(_d)->arch.paging.log_dirty.locker = -1; \
96
(_d)->arch.paging.log_dirty.locker_function = "nobody"; \
97
spin_unlock(&(_d)->arch.paging.log_dirty.lock); \
100
static mfn_t paging_new_log_dirty_page(struct domain *d, void **mapping_p)
102
struct page_info *page;
104
page = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d)));
105
if ( unlikely(page == NULL) )
107
d->arch.paging.log_dirty.failed_allocs++;
108
return _mfn(INVALID_MFN);
111
d->arch.paging.log_dirty.allocs++;
112
*mapping_p = __map_domain_page(page);
114
return page_to_mfn(page);
117
static mfn_t paging_new_log_dirty_leaf(
118
struct domain *d, unsigned long **leaf_p)
120
mfn_t mfn = paging_new_log_dirty_page(d, (void **)leaf_p);
121
if ( mfn_valid(mfn) )
126
static mfn_t paging_new_log_dirty_node(struct domain *d, mfn_t **node_p)
129
mfn_t mfn = paging_new_log_dirty_page(d, (void **)node_p);
130
if ( mfn_valid(mfn) )
131
for ( i = 0; i < LOGDIRTY_NODE_ENTRIES; i++ )
132
(*node_p)[i] = _mfn(INVALID_MFN);
136
int paging_alloc_log_dirty_bitmap(struct domain *d)
140
if ( mfn_valid(d->arch.paging.log_dirty.top) )
143
d->arch.paging.log_dirty.top = paging_new_log_dirty_node(d, &mapping);
144
if ( unlikely(!mfn_valid(d->arch.paging.log_dirty.top)) )
146
/* Clear error indicator since we're reporting this one */
147
d->arch.paging.log_dirty.failed_allocs = 0;
150
unmap_domain_page(mapping);
155
static void paging_free_log_dirty_page(struct domain *d, mfn_t mfn)
157
d->arch.paging.log_dirty.allocs--;
158
free_domheap_page(mfn_to_page(mfn));
161
void paging_free_log_dirty_bitmap(struct domain *d)
166
if ( !mfn_valid(d->arch.paging.log_dirty.top) )
169
l4 = map_domain_page(mfn_x(d->arch.paging.log_dirty.top));
171
for ( i4 = 0; i4 < LOGDIRTY_NODE_ENTRIES; i4++ )
173
if ( !mfn_valid(l4[i4]) )
176
l3 = map_domain_page(mfn_x(l4[i4]));
178
for ( i3 = 0; i3 < LOGDIRTY_NODE_ENTRIES; i3++ )
180
if ( !mfn_valid(l3[i3]) )
183
l2 = map_domain_page(mfn_x(l3[i3]));
185
for ( i2 = 0; i2 < LOGDIRTY_NODE_ENTRIES; i2++ )
186
if ( mfn_valid(l2[i2]) )
187
paging_free_log_dirty_page(d, l2[i2]);
189
unmap_domain_page(l2);
190
paging_free_log_dirty_page(d, l3[i3]);
193
unmap_domain_page(l3);
194
paging_free_log_dirty_page(d, l4[i4]);
197
unmap_domain_page(l4);
198
paging_free_log_dirty_page(d, d->arch.paging.log_dirty.top);
200
d->arch.paging.log_dirty.top = _mfn(INVALID_MFN);
201
ASSERT(d->arch.paging.log_dirty.allocs == 0);
202
d->arch.paging.log_dirty.failed_allocs = 0;
205
int paging_log_dirty_enable(struct domain *d)
212
if ( paging_mode_log_dirty(d) )
218
ret = paging_alloc_log_dirty_bitmap(d);
221
paging_free_log_dirty_bitmap(d);
227
/* Safe because the domain is paused. */
228
ret = d->arch.paging.log_dirty.enable_log_dirty(d);
230
/* Possibility of leaving the bitmap allocated here but it'll be
231
* tidied on domain teardown. */
242
int paging_log_dirty_disable(struct domain *d)
247
/* Safe because the domain is paused. */
248
ret = d->arch.paging.log_dirty.disable_log_dirty(d);
250
if ( !paging_mode_log_dirty(d) )
251
paging_free_log_dirty_bitmap(d);
258
/* Mark a page as dirty */
259
void paging_mark_dirty(struct domain *d, unsigned long guest_mfn)
264
mfn_t mfn, *l4, *l3, *l2;
268
gmfn = _mfn(guest_mfn);
270
if ( !paging_mode_log_dirty(d) || !mfn_valid(gmfn) ||
271
page_get_owner(mfn_to_page(gmfn)) != d )
276
ASSERT(mfn_valid(d->arch.paging.log_dirty.top));
278
/* We /really/ mean PFN here, even for non-translated guests. */
279
pfn = get_gpfn_from_mfn(mfn_x(gmfn));
280
/* Shared MFNs should NEVER be marked dirty */
281
BUG_ON(SHARED_M2P(pfn));
284
* Values with the MSB set denote MFNs that aren't really part of the
285
* domain's pseudo-physical memory map (e.g., the shared info frame).
286
* Nothing to do here...
288
if ( unlikely(!VALID_M2P(pfn)) )
291
i1 = L1_LOGDIRTY_IDX(pfn);
292
i2 = L2_LOGDIRTY_IDX(pfn);
293
i3 = L3_LOGDIRTY_IDX(pfn);
294
i4 = L4_LOGDIRTY_IDX(pfn);
296
l4 = map_domain_page(mfn_x(d->arch.paging.log_dirty.top));
298
if ( !mfn_valid(mfn) )
299
mfn = l4[i4] = paging_new_log_dirty_node(d, &l3);
301
l3 = map_domain_page(mfn_x(mfn));
302
unmap_domain_page(l4);
303
if ( unlikely(!mfn_valid(mfn)) )
307
if ( !mfn_valid(mfn) )
308
mfn = l3[i3] = paging_new_log_dirty_node(d, &l2);
310
l2 = map_domain_page(mfn_x(mfn));
311
unmap_domain_page(l3);
312
if ( unlikely(!mfn_valid(mfn)) )
316
if ( !mfn_valid(mfn) )
317
mfn = l2[i2] = paging_new_log_dirty_leaf(d, &l1);
319
l1 = map_domain_page(mfn_x(mfn));
320
unmap_domain_page(l2);
321
if ( unlikely(!mfn_valid(mfn)) )
324
changed = !__test_and_set_bit(i1, l1);
325
unmap_domain_page(l1);
328
PAGING_DEBUG(LOGDIRTY,
329
"marked mfn %" PRI_mfn " (pfn=%lx), dom %d\n",
330
mfn_x(gmfn), pfn, d->domain_id);
331
d->arch.paging.log_dirty.dirty_count++;
338
/* Read a domain's log-dirty bitmap and stats. If the operation is a CLEAN,
339
* clear the bitmap and stats as well. */
340
int paging_log_dirty_op(struct domain *d, struct xen_domctl_shadow_op *sc)
342
int rv = 0, clean = 0, peek = 1;
343
unsigned long pages = 0;
351
clean = (sc->op == XEN_DOMCTL_SHADOW_OP_CLEAN);
353
PAGING_DEBUG(LOGDIRTY, "log-dirty %s: dom %u faults=%u dirty=%u\n",
354
(clean) ? "clean" : "peek",
356
d->arch.paging.log_dirty.fault_count,
357
d->arch.paging.log_dirty.dirty_count);
359
sc->stats.fault_count = d->arch.paging.log_dirty.fault_count;
360
sc->stats.dirty_count = d->arch.paging.log_dirty.dirty_count;
364
d->arch.paging.log_dirty.fault_count = 0;
365
d->arch.paging.log_dirty.dirty_count = 0;
368
if ( guest_handle_is_null(sc->dirty_bitmap) )
369
/* caller may have wanted just to clean the state or access stats. */
372
if ( (peek || clean) && !mfn_valid(d->arch.paging.log_dirty.top) )
374
rv = -EINVAL; /* perhaps should be ENOMEM? */
378
if ( unlikely(d->arch.paging.log_dirty.failed_allocs) ) {
379
printk("%s: %d failed page allocs while logging dirty pages\n",
380
__FUNCTION__, d->arch.paging.log_dirty.failed_allocs);
386
l4 = (mfn_valid(d->arch.paging.log_dirty.top) ?
387
map_domain_page(mfn_x(d->arch.paging.log_dirty.top)) : NULL);
390
(pages < sc->pages) && (i4 < LOGDIRTY_NODE_ENTRIES);
393
l3 = (l4 && mfn_valid(l4[i4])) ? map_domain_page(mfn_x(l4[i4])) : NULL;
395
(pages < sc->pages) && (i3 < LOGDIRTY_NODE_ENTRIES);
398
l2 = ((l3 && mfn_valid(l3[i3])) ?
399
map_domain_page(mfn_x(l3[i3])) : NULL);
401
(pages < sc->pages) && (i2 < LOGDIRTY_NODE_ENTRIES);
404
static unsigned long zeroes[PAGE_SIZE/BYTES_PER_LONG];
405
unsigned int bytes = PAGE_SIZE;
406
l1 = ((l2 && mfn_valid(l2[i2])) ?
407
map_domain_page(mfn_x(l2[i2])) : zeroes);
408
if ( unlikely(((sc->pages - pages + 7) >> 3) < bytes) )
409
bytes = (unsigned int)((sc->pages - pages + 7) >> 3);
412
if ( copy_to_guest_offset(sc->dirty_bitmap, pages >> 3,
413
(uint8_t *)l1, bytes) != 0 )
419
if ( clean && l1 != zeroes )
423
unmap_domain_page(l1);
426
unmap_domain_page(l2);
429
unmap_domain_page(l3);
432
unmap_domain_page(l4);
434
if ( pages < sc->pages )
441
/* We need to further call clean_dirty_bitmap() functions of specific
442
* paging modes (shadow or hap). Safe because the domain is paused. */
443
d->arch.paging.log_dirty.clean_dirty_bitmap(d);
454
int paging_log_dirty_range(struct domain *d,
455
unsigned long begin_pfn,
457
XEN_GUEST_HANDLE_64(uint8) dirty_bitmap)
460
unsigned long pages = 0;
466
d->arch.paging.log_dirty.clean_dirty_bitmap(d);
469
PAGING_DEBUG(LOGDIRTY, "log-dirty-range: dom %u faults=%u dirty=%u\n",
471
d->arch.paging.log_dirty.fault_count,
472
d->arch.paging.log_dirty.dirty_count);
474
if ( !mfn_valid(d->arch.paging.log_dirty.top) )
476
rv = -EINVAL; /* perhaps should be ENOMEM? */
480
if ( unlikely(d->arch.paging.log_dirty.failed_allocs) ) {
481
printk("%s: %d failed page allocs while logging dirty pages\n",
482
__FUNCTION__, d->arch.paging.log_dirty.failed_allocs);
487
if ( !d->arch.paging.log_dirty.fault_count &&
488
!d->arch.paging.log_dirty.dirty_count ) {
489
int size = (nr + BITS_PER_LONG - 1) / BITS_PER_LONG;
490
unsigned long zeroes[size];
491
memset(zeroes, 0x00, size * BYTES_PER_LONG);
493
if ( copy_to_guest_offset(dirty_bitmap, 0, (uint8_t *) zeroes,
494
size * BYTES_PER_LONG) != 0 )
498
d->arch.paging.log_dirty.fault_count = 0;
499
d->arch.paging.log_dirty.dirty_count = 0;
501
b1 = L1_LOGDIRTY_IDX(begin_pfn);
502
b2 = L2_LOGDIRTY_IDX(begin_pfn);
503
b3 = L3_LOGDIRTY_IDX(begin_pfn);
504
b4 = L4_LOGDIRTY_IDX(begin_pfn);
505
l4 = map_domain_page(mfn_x(d->arch.paging.log_dirty.top));
508
(pages < nr) && (i4 < LOGDIRTY_NODE_ENTRIES);
511
l3 = mfn_valid(l4[i4]) ? map_domain_page(mfn_x(l4[i4])) : NULL;
513
(pages < nr) && (i3 < LOGDIRTY_NODE_ENTRIES);
516
l2 = ((l3 && mfn_valid(l3[i3])) ?
517
map_domain_page(mfn_x(l3[i3])) : NULL);
519
(pages < nr) && (i2 < LOGDIRTY_NODE_ENTRIES);
522
static unsigned long zeroes[PAGE_SIZE/BYTES_PER_LONG];
523
unsigned int bytes = PAGE_SIZE;
525
l1 = ((l2 && mfn_valid(l2[i2])) ?
526
map_domain_page(mfn_x(l2[i2])) : zeroes);
528
s = ((uint8_t*)l1) + (b1 >> 3);
531
if ( likely(((nr - pages + 7) >> 3) < bytes) )
532
bytes = (unsigned int)((nr - pages + 7) >> 3);
534
/* begin_pfn is not 32K aligned, hence we have to bit
535
* shift the bitmap */
539
uint32_t *l = (uint32_t*) s;
541
int bitmask = (1 << bits) - 1;
542
int size = (bytes + BYTES_PER_LONG - 1) / BYTES_PER_LONG;
543
unsigned long bitmap[size];
544
static unsigned long printed = 0;
546
if ( printed != begin_pfn )
548
dprintk(XENLOG_DEBUG, "%s: begin_pfn %lx is not 32K aligned!\n",
549
__FUNCTION__, begin_pfn);
553
for ( i = 0; i < size - 1; i++, l++ ) {
554
bitmap[i] = ((*l) >> bits) |
555
(((*((uint8_t*)(l + 1))) & bitmask) << (sizeof(*l) * 8 - bits));
558
size = BYTES_PER_LONG - ((b1 >> 3) & 0x3);
560
for ( j = 0; j < size; j++, s++ )
561
bitmap[i] |= (*s) << (j * 8);
562
bitmap[i] = (bitmap[i] >> bits) | (bitmask << (size * 8 - bits));
563
if ( copy_to_guest_offset(dirty_bitmap, (pages >> 3),
564
(uint8_t*) bitmap, bytes) != 0 )
572
if ( copy_to_guest_offset(dirty_bitmap, pages >> 3,
584
unmap_domain_page(l1);
589
unmap_domain_page(l2);
593
unmap_domain_page(l3);
595
unmap_domain_page(l4);
606
/* Note that this function takes three function pointers. Callers must supply
607
* these functions for log dirty code to call. This function usually is
608
* invoked when paging is enabled. Check shadow_enable() and hap_enable() for
611
* These function pointers must not be followed with the log-dirty lock held.
613
void paging_log_dirty_init(struct domain *d,
614
int (*enable_log_dirty)(struct domain *d),
615
int (*disable_log_dirty)(struct domain *d),
616
void (*clean_dirty_bitmap)(struct domain *d))
618
/* We initialize log dirty lock first */
619
log_dirty_lock_init(d);
621
d->arch.paging.log_dirty.enable_log_dirty = enable_log_dirty;
622
d->arch.paging.log_dirty.disable_log_dirty = disable_log_dirty;
623
d->arch.paging.log_dirty.clean_dirty_bitmap = clean_dirty_bitmap;
624
d->arch.paging.log_dirty.top = _mfn(INVALID_MFN);
627
/* This function fress log dirty bitmap resources. */
628
static void paging_log_dirty_teardown(struct domain*d)
631
paging_free_log_dirty_bitmap(d);
634
/************************************************/
635
/* CODE FOR PAGING SUPPORT */
636
/************************************************/
637
/* Domain paging struct initialization. */
638
int paging_domain_init(struct domain *d, unsigned int domcr_flags)
642
if ( (rc = p2m_init(d)) != 0 )
645
/* The order of the *_init calls below is important, as the later
646
* ones may rewrite some common fields. Shadow pagetables are the
648
shadow_domain_init(d, domcr_flags);
650
/* ... but we will use hardware assistance if it's available. */
651
if ( hap_enabled(d) )
657
/* vcpu paging struct initialization goes here */
658
void paging_vcpu_init(struct vcpu *v)
660
if ( hap_enabled(v->domain) )
667
int paging_domctl(struct domain *d, xen_domctl_shadow_op_t *sc,
668
XEN_GUEST_HANDLE(void) u_domctl)
672
if ( unlikely(d == current->domain) )
674
gdprintk(XENLOG_INFO, "Tried to do a paging op on itself.\n");
678
if ( unlikely(d->is_dying) )
680
gdprintk(XENLOG_INFO, "Ignoring paging op on dying domain %u\n",
685
if ( unlikely(d->vcpu == NULL) || unlikely(d->vcpu[0] == NULL) )
687
PAGING_ERROR("Paging op on a domain (%u) with no vcpus\n",
692
rc = xsm_shadow_control(d, sc->op);
696
/* Code to handle log-dirty. Note that some log dirty operations
697
* piggy-back on shadow operations. For example, when
698
* XEN_DOMCTL_SHADOW_OP_OFF is called, it first checks whether log dirty
699
* mode is enabled. If does, we disables log dirty and continues with
700
* shadow code. For this reason, we need to further dispatch domctl
701
* to next-level paging code (shadow or hap).
705
case XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY:
706
if ( hap_enabled(d) )
707
hap_logdirty_init(d);
708
return paging_log_dirty_enable(d);
710
case XEN_DOMCTL_SHADOW_OP_ENABLE:
711
if ( sc->mode & XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY )
713
if ( hap_enabled(d) )
714
hap_logdirty_init(d);
715
return paging_log_dirty_enable(d);
718
case XEN_DOMCTL_SHADOW_OP_OFF:
719
if ( paging_mode_log_dirty(d) )
720
if ( (rc = paging_log_dirty_disable(d)) != 0 )
723
case XEN_DOMCTL_SHADOW_OP_CLEAN:
724
case XEN_DOMCTL_SHADOW_OP_PEEK:
725
return paging_log_dirty_op(d, sc);
728
/* Here, dispatch domctl to the appropriate paging code */
729
if ( hap_enabled(d) )
730
return hap_domctl(d, sc, u_domctl);
732
return shadow_domctl(d, sc, u_domctl);
735
/* Call when destroying a domain */
736
void paging_teardown(struct domain *d)
738
if ( hap_enabled(d) )
743
/* clean up log dirty resources. */
744
paging_log_dirty_teardown(d);
746
/* Move populate-on-demand cache back to domain_list for destruction */
747
p2m_pod_empty_cache(d);
750
/* Call once all of the references to the domain have gone away */
751
void paging_final_teardown(struct domain *d)
753
if ( hap_enabled(d) )
754
hap_final_teardown(d);
756
shadow_final_teardown(d);
758
p2m_final_teardown(d);
761
/* Enable an arbitrary paging-assistance mode. Call once at domain
763
int paging_enable(struct domain *d, u32 mode)
765
if ( hap_enabled(d) )
766
return hap_enable(d, mode | PG_HAP_enable);
768
return shadow_enable(d, mode | PG_SH_enable);
771
/* Print paging-assistance info to the console */
772
void paging_dump_domain_info(struct domain *d)
774
if ( paging_mode_enabled(d) )
776
printk(" paging assistance: ");
777
if ( paging_mode_shadow(d) )
779
if ( paging_mode_hap(d) )
781
if ( paging_mode_refcounts(d) )
782
printk("refcounts ");
783
if ( paging_mode_log_dirty(d) )
784
printk("log_dirty ");
785
if ( paging_mode_translate(d) )
786
printk("translate ");
787
if ( paging_mode_external(d) )
793
void paging_dump_vcpu_info(struct vcpu *v)
795
if ( paging_mode_enabled(v->domain) )
797
printk(" paging assistance: ");
798
if ( paging_mode_shadow(v->domain) )
800
if ( v->arch.paging.mode )
801
printk("shadowed %u-on-%u\n",
802
v->arch.paging.mode->guest_levels,
803
v->arch.paging.mode->shadow.shadow_levels);
805
printk("not shadowed\n");
807
else if ( paging_mode_hap(v->domain) && v->arch.paging.mode )
808
printk("hap, %u levels\n",
809
v->arch.paging.mode->guest_levels);
821
* indent-tabs-mode: nil