1
/******************************************************************************
4
* physical-to-machine mappings for automatically-translated domains.
6
* Parts of this code are Copyright (c) 2009 by Citrix Systems, Inc. (Patrick Colp)
7
* Parts of this code are Copyright (c) 2007 by Advanced Micro Devices.
8
* Parts of this code are Copyright (c) 2006-2007 by XenSource Inc.
9
* Parts of this code are Copyright (c) 2006 by Michael A Fetterman
10
* Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
12
* This program is free software; you can redistribute it and/or modify
13
* it under the terms of the GNU General Public License as published by
14
* the Free Software Foundation; either version 2 of the License, or
15
* (at your option) any later version.
17
* This program is distributed in the hope that it will be useful,
18
* but WITHOUT ANY WARRANTY; without even the implied warranty of
19
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20
* GNU General Public License for more details.
22
* You should have received a copy of the GNU General Public License
23
* along with this program; if not, write to the Free Software
24
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27
#include <asm/domain.h>
29
#include <asm/paging.h>
31
#include <asm/hvm/vmx/vmx.h> /* ept_p2m_init() */
32
#include <xen/iommu.h>
33
#include <asm/mem_event.h>
34
#include <public/mem_event.h>
35
#include <asm/mem_sharing.h>
36
#include <xen/event.h>
38
/* Debugging and auditing of the P2M code? */
40
#define P2M_DEBUGGING 0
43
#define P2M_PRINTK(_f, _a...) \
44
debugtrace_printk("p2m: %s(): " _f, __func__, ##_a)
45
#define P2M_ERROR(_f, _a...) \
46
printk("pg error: %s(): " _f, __func__, ##_a)
48
#define P2M_DEBUG(_f, _a...) \
49
debugtrace_printk("p2mdebug: %s(): " _f, __func__, ##_a)
51
#define P2M_DEBUG(_f, _a...) do { (void)(_f); } while(0)
55
/* Override macros from asm/page.h to make them work with mfn_t */
57
#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
59
#define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn))
61
#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
64
/* PTE flags for the various types of p2m entry */
65
#define P2M_BASE_FLAGS \
66
(_PAGE_PRESENT | _PAGE_USER | _PAGE_DIRTY | _PAGE_ACCESSED)
68
#define SUPERPAGE_PAGES (1UL << 9)
69
#define superpage_aligned(_x) (((_x)&(SUPERPAGE_PAGES-1))==0)
71
static unsigned long p2m_type_to_flags(p2m_type_t t)
75
flags = (unsigned long)(t & 0x3fff) << 9;
77
flags = (t & 0x7UL) << 9;
79
#ifndef HAVE_GRANT_MAP_P2M
80
BUG_ON(p2m_is_grant(t));
88
case p2m_grant_map_rw:
89
return flags | P2M_BASE_FLAGS | _PAGE_RW;
90
case p2m_ram_logdirty:
91
return flags | P2M_BASE_FLAGS;
93
case p2m_grant_map_ro:
94
return flags | P2M_BASE_FLAGS;
96
return flags | P2M_BASE_FLAGS;
100
return flags | P2M_BASE_FLAGS | _PAGE_RW | _PAGE_PCD;
101
case p2m_populate_on_demand:
107
static void audit_p2m(struct domain *d);
109
# define audit_p2m(_d) do { (void)(_d); } while(0)
110
#endif /* P2M_AUDIT */
112
// Find the next level's P2M entry, checking for out-of-range gfn's...
113
// Returns NULL on error.
115
static l1_pgentry_t *
116
p2m_find_entry(void *table, unsigned long *gfn_remainder,
117
unsigned long gfn, u32 shift, u32 max)
121
index = *gfn_remainder >> shift;
124
P2M_DEBUG("gfn=0x%lx out of range "
125
"(gfn_remainder=0x%lx shift=%d index=0x%x max=0x%x)\n",
126
gfn, *gfn_remainder, shift, index, max);
129
*gfn_remainder &= (1 << shift) - 1;
130
return (l1_pgentry_t *)table + index;
133
// Walk one level of the P2M table, allocating a new table if required.
134
// Returns 0 on error.
137
p2m_next_level(struct domain *d, mfn_t *table_mfn, void **table,
138
unsigned long *gfn_remainder, unsigned long gfn, u32 shift,
139
u32 max, unsigned long type)
141
l1_pgentry_t *l1_entry;
142
l1_pgentry_t *p2m_entry;
143
l1_pgentry_t new_entry;
146
ASSERT(d->arch.p2m->alloc_page);
148
if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn,
152
/* PoD: Not present doesn't imply empty. */
153
if ( !l1e_get_flags(*p2m_entry) )
155
struct page_info *pg = d->arch.p2m->alloc_page(d);
158
page_list_add_tail(pg, &d->arch.p2m->pages);
159
pg->u.inuse.type_info = type | 1 | PGT_validated;
162
new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)),
163
__PAGE_HYPERVISOR|_PAGE_USER);
166
case PGT_l3_page_table:
167
paging_write_p2m_entry(d, gfn,
168
p2m_entry, *table_mfn, new_entry, 4);
170
case PGT_l2_page_table:
171
#if CONFIG_PAGING_LEVELS == 3
172
/* for PAE mode, PDPE only has PCD/PWT/P bits available */
173
new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)), _PAGE_PRESENT);
175
paging_write_p2m_entry(d, gfn,
176
p2m_entry, *table_mfn, new_entry, 3);
178
case PGT_l1_page_table:
179
paging_write_p2m_entry(d, gfn,
180
p2m_entry, *table_mfn, new_entry, 2);
188
ASSERT(l1e_get_flags(*p2m_entry) & (_PAGE_PRESENT|_PAGE_PSE));
190
/* split single large page into 4KB page in P2M table */
191
if ( type == PGT_l1_page_table && (l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
193
unsigned long flags, pfn;
194
struct page_info *pg = d->arch.p2m->alloc_page(d);
197
page_list_add_tail(pg, &d->arch.p2m->pages);
198
pg->u.inuse.type_info = PGT_l1_page_table | 1 | PGT_validated;
201
/* New splintered mappings inherit the flags of the old superpage,
202
* with a little reorganisation for the _PAGE_PSE_PAT bit. */
203
flags = l1e_get_flags(*p2m_entry);
204
pfn = l1e_get_pfn(*p2m_entry);
205
if ( pfn & 1 ) /* ==> _PAGE_PSE_PAT was set */
206
pfn -= 1; /* Clear it; _PAGE_PSE becomes _PAGE_PAT */
208
flags &= ~_PAGE_PSE; /* Clear _PAGE_PSE (== _PAGE_PAT) */
210
l1_entry = __map_domain_page(pg);
211
for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
213
new_entry = l1e_from_pfn(pfn + i, flags);
214
paging_write_p2m_entry(d, gfn,
215
l1_entry+i, *table_mfn, new_entry, 1);
217
unmap_domain_page(l1_entry);
219
new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)),
220
__PAGE_HYPERVISOR|_PAGE_USER);
221
paging_write_p2m_entry(d, gfn,
222
p2m_entry, *table_mfn, new_entry, 2);
225
*table_mfn = _mfn(l1e_get_pfn(*p2m_entry));
226
next = map_domain_page(mfn_x(*table_mfn));
227
unmap_domain_page(*table);
234
* Populate-on-demand functionality
237
int set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
238
unsigned int page_order, p2m_type_t p2mt);
241
p2m_pod_cache_add(struct domain *d,
242
struct page_info *page,
247
struct p2m_domain *p2md = d->arch.p2m;
252
mfn = page_to_mfn(page);
254
/* Check to make sure this is a contiguous region */
255
if( mfn_x(mfn) & ((1 << order) - 1) )
257
printk("%s: mfn %lx not aligned order %lu! (mask %lx)\n",
258
__func__, mfn_x(mfn), order, ((1UL << order) - 1));
262
for(i=0; i < 1 << order ; i++) {
265
p = mfn_to_page(_mfn(mfn_x(mfn) + i));
266
od = page_get_owner(p);
269
printk("%s: mfn %lx expected owner d%d, got owner d%d!\n",
270
__func__, mfn_x(mfn), d->domain_id,
271
od?od->domain_id:-1);
277
ASSERT(p2m_locked_by_me(p2md));
280
* Pages from domain_alloc and returned by the balloon driver aren't
281
* guaranteed to be zero; but by reclaiming zero pages, we implicitly
282
* promise to provide zero pages. So we scrub pages before using.
284
for ( i = 0; i < (1 << order); i++ )
286
char *b = map_domain_page(mfn_x(page_to_mfn(page)) + i);
288
unmap_domain_page(b);
291
spin_lock(&d->page_alloc_lock);
293
/* First, take all pages off the domain list */
294
for(i=0; i < 1 << order ; i++)
297
page_list_del(p, &d->page_list);
300
/* Then add the first one to the appropriate populate-on-demand list */
304
page_list_add_tail(page, &p2md->pod.super); /* lock: page_alloc */
305
p2md->pod.count += 1 << order;
308
page_list_add_tail(page, &p2md->pod.single); /* lock: page_alloc */
309
p2md->pod.count += 1 ;
315
/* Ensure that the PoD cache has never been emptied.
316
* This may cause "zombie domains" since the page will never be freed. */
317
BUG_ON( d->arch.relmem != RELMEM_not_started );
319
spin_unlock(&d->page_alloc_lock);
324
/* Get a page of size order from the populate-on-demand cache. Will break
325
* down 2-meg pages into singleton pages automatically. Returns null if
326
* a superpage is requested and no superpages are available. Must be called
327
* with the d->page_lock held. */
328
static struct page_info * p2m_pod_cache_get(struct domain *d,
331
struct p2m_domain *p2md = d->arch.p2m;
332
struct page_info *p = NULL;
335
if ( order == 9 && page_list_empty(&p2md->pod.super) )
339
else if ( order == 0 && page_list_empty(&p2md->pod.single) )
344
BUG_ON( page_list_empty(&p2md->pod.super) );
346
/* Break up a superpage to make single pages. NB count doesn't
347
* need to be adjusted. */
348
p = page_list_remove_head(&p2md->pod.super);
349
mfn = mfn_x(page_to_mfn(p));
351
for ( i=0; i<SUPERPAGE_PAGES; i++ )
353
q = mfn_to_page(_mfn(mfn+i));
354
page_list_add_tail(q, &p2md->pod.single);
361
BUG_ON( page_list_empty(&p2md->pod.super) );
362
p = page_list_remove_head(&p2md->pod.super);
363
p2md->pod.count -= 1 << order; /* Lock: page_alloc */
366
BUG_ON( page_list_empty(&p2md->pod.single) );
367
p = page_list_remove_head(&p2md->pod.single);
368
p2md->pod.count -= 1;
374
/* Put the pages back on the domain page_list */
375
for ( i = 0 ; i < (1 << order) ; i++ )
377
BUG_ON(page_get_owner(p + i) != d);
378
page_list_add_tail(p + i, &d->page_list);
384
/* Set the size of the cache, allocating or freeing as necessary. */
386
p2m_pod_set_cache_target(struct domain *d, unsigned long pod_target)
388
struct p2m_domain *p2md = d->arch.p2m;
391
/* Increasing the target */
392
while ( pod_target > p2md->pod.count )
394
struct page_info * page;
397
if ( (pod_target - p2md->pod.count) >= SUPERPAGE_PAGES )
402
page = alloc_domheap_pages(d, order, 0);
403
if ( unlikely(page == NULL) )
407
/* If we can't allocate a superpage, try singleton pages */
412
printk("%s: Unable to allocate domheap page for pod cache. target %lu cachesize %d\n",
413
__func__, pod_target, p2md->pod.count);
418
p2m_pod_cache_add(d, page, order);
421
/* Decreasing the target */
422
/* We hold the p2m lock here, so we don't need to worry about
423
* cache disappearing under our feet. */
424
while ( pod_target < p2md->pod.count )
426
struct page_info * page;
429
/* Grab the lock before checking that pod.super is empty, or the last
430
* entries may disappear before we grab the lock. */
431
spin_lock(&d->page_alloc_lock);
433
if ( (p2md->pod.count - pod_target) > SUPERPAGE_PAGES
434
&& !page_list_empty(&p2md->pod.super) )
439
page = p2m_pod_cache_get(d, order);
441
ASSERT(page != NULL);
443
spin_unlock(&d->page_alloc_lock);
446
for ( i = 0 ; i < (1 << order) ; i++ )
448
/* Copied from common/memory.c:guest_remove_page() */
449
if ( unlikely(!get_page(page+i, d)) )
451
gdprintk(XENLOG_INFO, "Bad page free for domain %u\n", d->domain_id);
456
if ( test_and_clear_bit(_PGT_pinned, &(page+i)->u.inuse.type_info) )
457
put_page_and_type(page+i);
459
if ( test_and_clear_bit(_PGC_allocated, &(page+i)->count_info) )
471
* The "right behavior" here requires some careful thought. First, some
474
* + B: number of pages the balloon driver has ballooned down to.
475
* + P: Number of populated pages.
479
* The following equations should hold:
480
* 0 <= P <= T <= B <= M
481
* d->arch.p2m->pod.entry_count == B - P
482
* d->tot_pages == P + d->arch.p2m->pod.count
484
* Now we have the following potential cases to cover:
485
* B <T': Set the PoD cache size equal to the number of outstanding PoD
486
* entries. The balloon driver will deflate the balloon to give back
487
* the remainder of the ram to the guest OS.
488
* T <T'<B : Increase PoD cache size.
489
* T'<T<=B : Here we have a choice. We can decrease the size of the cache,
490
* get the memory right away. However, that means every time we
491
* reduce the memory target we risk the guest attempting to populate the
492
* memory before the balloon driver has reached its new target. Safer to
493
* never reduce the cache size here, but only when the balloon driver frees
496
* If there are many zero pages, we could reach the target also by doing
497
* zero sweeps and marking the ranges PoD; but the balloon driver will have
498
* to free this memory eventually anyway, so we don't actually gain that much
501
* NB that the equation (B<T') may require adjustment to the cache
502
* size as PoD pages are freed as well; i.e., freeing a PoD-backed
503
* entry when pod.entry_count == pod.count requires us to reduce both
504
* pod.entry_count and pod.count.
507
p2m_pod_set_mem_target(struct domain *d, unsigned long target)
510
struct p2m_domain *p2md = d->arch.p2m;
512
unsigned long populated;
516
/* P == B: Nothing to do. */
517
if ( p2md->pod.entry_count == 0 )
520
/* Don't do anything if the domain is being torn down */
524
/* T' < B: Don't reduce the cache size; let the balloon driver
525
* take care of it. */
526
if ( target < d->tot_pages )
529
populated = d->tot_pages - p2md->pod.count;
531
pod_target = target - populated;
533
/* B < T': Set the cache size equal to # of outstanding entries,
534
* let the balloon driver fill in the rest. */
535
if ( pod_target > p2md->pod.entry_count )
536
pod_target = p2md->pod.entry_count;
538
ASSERT( pod_target >= p2md->pod.count );
540
ret = p2m_pod_set_cache_target(d, pod_target);
549
p2m_pod_empty_cache(struct domain *d)
551
struct p2m_domain *p2md = d->arch.p2m;
552
struct page_info *page;
554
/* After this barrier no new PoD activities can happen. */
555
BUG_ON(!d->is_dying);
556
spin_barrier(&p2md->lock);
558
spin_lock(&d->page_alloc_lock);
560
while ( (page = page_list_remove_head(&p2md->pod.super)) )
564
for ( i = 0 ; i < SUPERPAGE_PAGES ; i++ )
566
BUG_ON(page_get_owner(page + i) != d);
567
page_list_add_tail(page + i, &d->page_list);
570
p2md->pod.count -= SUPERPAGE_PAGES;
573
while ( (page = page_list_remove_head(&p2md->pod.single)) )
575
BUG_ON(page_get_owner(page) != d);
576
page_list_add_tail(page, &d->page_list);
578
p2md->pod.count -= 1;
581
BUG_ON(p2md->pod.count != 0);
583
spin_unlock(&d->page_alloc_lock);
586
/* This function is needed for two reasons:
587
* + To properly handle clearing of PoD entries
588
* + To "steal back" memory being freed for the PoD cache, rather than
591
* Once both of these functions have been completed, we can return and
592
* allow decrease_reservation() to handle everything else.
595
p2m_pod_decrease_reservation(struct domain *d,
599
struct p2m_domain *p2md = d->arch.p2m;
603
int steal_for_cache = 0;
604
int pod = 0, nonpod = 0, ram = 0;
607
/* If we don't have any outstanding PoD entries, let things take their
609
if ( p2md->pod.entry_count == 0 )
612
/* Figure out if we need to steal some freed memory for our cache */
613
steal_for_cache = ( p2md->pod.entry_count > p2md->pod.count );
618
if ( unlikely(d->is_dying) )
621
/* See what's in here. */
622
/* FIXME: Add contiguous; query for PSE entries? */
623
for ( i=0; i<(1<<order); i++)
627
gfn_to_mfn_query(d, gpfn + i, &t);
629
if ( t == p2m_populate_on_demand )
639
/* No populate-on-demand? Don't need to steal anything? Then we're done!*/
640
if(!pod && !steal_for_cache)
645
/* All PoD: Mark the whole region invalid and tell caller
647
set_p2m_entry(d, gpfn, _mfn(INVALID_MFN), order, p2m_invalid);
648
p2md->pod.entry_count-=(1<<order); /* Lock: p2m */
649
BUG_ON(p2md->pod.entry_count < 0);
651
goto out_entry_check;
654
/* FIXME: Steal contig 2-meg regions for cache */
656
/* Process as long as:
657
* + There are PoD entries to handle, or
658
* + There is ram left, and we want to steal it
661
i<(1<<order) && (pod>0 || (steal_for_cache && ram > 0));
667
mfn = gfn_to_mfn_query(d, gpfn + i, &t);
668
if ( t == p2m_populate_on_demand )
670
set_p2m_entry(d, gpfn + i, _mfn(INVALID_MFN), 0, p2m_invalid);
671
p2md->pod.entry_count--; /* Lock: p2m */
672
BUG_ON(p2md->pod.entry_count < 0);
675
else if ( steal_for_cache && p2m_is_ram(t) )
677
struct page_info *page;
679
ASSERT(mfn_valid(mfn));
681
page = mfn_to_page(mfn);
683
set_p2m_entry(d, gpfn + i, _mfn(INVALID_MFN), 0, p2m_invalid);
684
set_gpfn_from_mfn(mfn_x(mfn), INVALID_M2P_ENTRY);
686
p2m_pod_cache_add(d, page, 0);
688
steal_for_cache = ( p2md->pod.entry_count > p2md->pod.count );
695
/* If there are no more non-PoD entries, tell decrease_reservation() that
696
* there's nothing left to do. */
701
/* If we've reduced our "liabilities" beyond our "assets", free some */
702
if ( p2md->pod.entry_count < p2md->pod.count )
704
p2m_pod_set_cache_target(d, p2md->pod.entry_count);
716
p2m_pod_dump_data(struct domain *d)
718
struct p2m_domain *p2md = d->arch.p2m;
720
printk(" PoD entries=%d cachesize=%d\n",
721
p2md->pod.entry_count, p2md->pod.count);
725
/* Search for all-zero superpages to be reclaimed as superpages for the
726
* PoD cache. Must be called w/ p2m lock held, page_alloc lock not held. */
728
p2m_pod_zero_check_superpage(struct domain *d, unsigned long gfn)
730
mfn_t mfn, mfn0 = _mfn(INVALID_MFN);
731
p2m_type_t type, type0 = 0;
732
unsigned long * map = NULL;
733
int ret=0, reset = 0;
737
if ( !superpage_aligned(gfn) )
740
/* Allow an extra refcount for one shadow pt mapping in shadowed domains */
741
if ( paging_mode_shadow(d) )
744
/* Look up the mfns, checking to make sure they're the same mfn
745
* and aligned, and mapping them. */
746
for ( i=0; i<SUPERPAGE_PAGES; i++ )
749
mfn = gfn_to_mfn_query(d, gfn + i, &type);
757
/* Conditions that must be met for superpage-superpage:
758
* + All gfns are ram types
759
* + All gfns have the same type
760
* + All of the mfns are allocated to a domain
761
* + None of the mfns are used as pagetables, or allocated via xenheap
762
* + The first mfn is 2-meg aligned
763
* + All the other mfns are in sequence
764
* Adding for good measure:
765
* + None of the mfns are likely to be mapped elsewhere (refcount
766
* 2 or less for shadow, 1 for hap)
768
if ( !p2m_is_ram(type)
770
|| ( (mfn_to_page(mfn)->count_info & PGC_allocated) == 0 )
771
|| ( (mfn_to_page(mfn)->count_info & (PGC_page_table|PGC_xen_heap)) != 0 )
772
|| ( (mfn_to_page(mfn)->count_info & PGC_xen_heap ) != 0 )
773
|| ( (mfn_to_page(mfn)->count_info & PGC_count_mask) > max_ref )
774
|| !( ( i == 0 && superpage_aligned(mfn_x(mfn0)) )
775
|| ( i != 0 && mfn_x(mfn) == (mfn_x(mfn0) + i) ) ) )
779
/* Now, do a quick check to see if it may be zero before unmapping. */
780
for ( i=0; i<SUPERPAGE_PAGES; i++ )
782
/* Quick zero-check */
783
map = map_domain_page(mfn_x(mfn0) + i);
785
for ( j=0; j<16; j++ )
789
unmap_domain_page(map);
796
/* Try to remove the page, restoring old mapping if it fails. */
797
set_p2m_entry(d, gfn,
798
_mfn(POPULATE_ON_DEMAND_MFN), 9,
799
p2m_populate_on_demand);
801
/* Make none of the MFNs are used elsewhere... for example, mapped
802
* via the grant table interface, or by qemu. Allow one refcount for
803
* being allocated to the domain. */
804
for ( i=0; i < SUPERPAGE_PAGES; i++ )
806
mfn = _mfn(mfn_x(mfn0) + i);
807
if ( (mfn_to_page(mfn)->count_info & PGC_count_mask) > 1 )
814
/* Finally, do a full zero-check */
815
for ( i=0; i < SUPERPAGE_PAGES; i++ )
817
map = map_domain_page(mfn_x(mfn0) + i);
819
for ( j=0; j<PAGE_SIZE/sizeof(*map); j++ )
826
unmap_domain_page(map);
844
__trace_var(TRC_MEM_POD_ZERO_RECLAIM, 0, sizeof(t), (unsigned char *)&t);
847
/* Finally! We've passed all the checks, and can add the mfn superpage
848
* back on the PoD cache, and account for the new p2m PoD entries */
849
p2m_pod_cache_add(d, mfn_to_page(mfn0), 9);
850
d->arch.p2m->pod.entry_count += SUPERPAGE_PAGES;
854
set_p2m_entry(d, gfn, mfn0, 9, type0);
861
p2m_pod_zero_check(struct domain *d, unsigned long *gfns, int count)
864
p2m_type_t types[count];
865
unsigned long * map[count];
870
/* Allow an extra refcount for one shadow pt mapping in shadowed domains */
871
if ( paging_mode_shadow(d) )
874
/* First, get the gfn list, translate to mfns, and map the pages. */
875
for ( i=0; i<count; i++ )
877
mfns[i] = gfn_to_mfn_query(d, gfns[i], types + i);
878
/* If this is ram, and not a pagetable or from the xen heap, and probably not mapped
879
elsewhere, map it; otherwise, skip. */
880
if ( p2m_is_ram(types[i])
881
&& ( (mfn_to_page(mfns[i])->count_info & PGC_allocated) != 0 )
882
&& ( (mfn_to_page(mfns[i])->count_info & (PGC_page_table|PGC_xen_heap)) == 0 )
883
&& ( (mfn_to_page(mfns[i])->count_info & PGC_count_mask) <= max_ref ) )
884
map[i] = map_domain_page(mfn_x(mfns[i]));
889
/* Then, go through and check for zeroed pages, removing write permission
890
* for those with zeroes. */
891
for ( i=0; i<count; i++ )
896
/* Quick zero-check */
897
for ( j=0; j<16; j++ )
898
if( *(map[i]+j) != 0 )
903
unmap_domain_page(map[i]);
908
/* Try to remove the page, restoring old mapping if it fails. */
909
set_p2m_entry(d, gfns[i],
910
_mfn(POPULATE_ON_DEMAND_MFN), 0,
911
p2m_populate_on_demand);
913
/* See if the page was successfully unmapped. (Allow one refcount
914
* for being allocated to a domain.) */
915
if ( (mfn_to_page(mfns[i])->count_info & PGC_count_mask) > 1 )
917
unmap_domain_page(map[i]);
920
set_p2m_entry(d, gfns[i], mfns[i], 0, types[i]);
926
/* Now check each page for real */
927
for ( i=0; i < count; i++ )
932
for ( j=0; j<PAGE_SIZE/sizeof(*map[i]); j++ )
933
if( *(map[i]+j) != 0 )
936
unmap_domain_page(map[i]);
938
/* See comment in p2m_pod_zero_check_superpage() re gnttab
940
if ( j < PAGE_SIZE/sizeof(*map[i]) )
942
set_p2m_entry(d, gfns[i], mfns[i], 0, types[i]);
954
t.mfn = mfn_x(mfns[i]);
958
__trace_var(TRC_MEM_POD_ZERO_RECLAIM, 0, sizeof(t), (unsigned char *)&t);
961
/* Add to cache, and account for the new p2m PoD entry */
962
p2m_pod_cache_add(d, mfn_to_page(mfns[i]), 0);
963
d->arch.p2m->pod.entry_count++;
969
#define POD_SWEEP_LIMIT 1024
971
p2m_pod_emergency_sweep_super(struct domain *d)
973
struct p2m_domain *p2md = d->arch.p2m;
974
unsigned long i, start, limit;
976
if ( p2md->pod.reclaim_super == 0 )
978
p2md->pod.reclaim_super = (p2md->pod.max_guest>>9)<<9;
979
p2md->pod.reclaim_super -= SUPERPAGE_PAGES;
982
start = p2md->pod.reclaim_super;
983
limit = (start > POD_SWEEP_LIMIT) ? (start - POD_SWEEP_LIMIT) : 0;
985
for ( i=p2md->pod.reclaim_super ; i > 0 ; i-=SUPERPAGE_PAGES )
987
p2m_pod_zero_check_superpage(d, i);
988
/* Stop if we're past our limit and we have found *something*.
990
* NB that this is a zero-sum game; we're increasing our cache size
991
* by increasing our 'debt'. Since we hold the p2m lock,
992
* (entry_count - count) must remain the same. */
993
if ( !page_list_empty(&p2md->pod.super) && i < limit )
997
p2md->pod.reclaim_super = i ? i - SUPERPAGE_PAGES : 0;
1001
#define POD_SWEEP_STRIDE 16
1003
p2m_pod_emergency_sweep(struct domain *d)
1005
struct p2m_domain *p2md = d->arch.p2m;
1006
unsigned long gfns[POD_SWEEP_STRIDE];
1007
unsigned long i, j=0, start, limit;
1011
if ( p2md->pod.reclaim_single == 0 )
1012
p2md->pod.reclaim_single = p2md->pod.max_guest;
1014
start = p2md->pod.reclaim_single;
1015
limit = (start > POD_SWEEP_LIMIT) ? (start - POD_SWEEP_LIMIT) : 0;
1017
/* FIXME: Figure out how to avoid superpages */
1018
for ( i=p2md->pod.reclaim_single ; i > 0 ; i-- )
1020
gfn_to_mfn_query(d, i, &t );
1021
if ( p2m_is_ram(t) )
1025
BUG_ON(j > POD_SWEEP_STRIDE);
1026
if ( j == POD_SWEEP_STRIDE )
1028
p2m_pod_zero_check(d, gfns, j);
1032
/* Stop if we're past our limit and we have found *something*.
1034
* NB that this is a zero-sum game; we're increasing our cache size
1035
* by re-increasing our 'debt'. Since we hold the p2m lock,
1036
* (entry_count - count) must remain the same. */
1037
if ( p2md->pod.count > 0 && i < limit )
1042
p2m_pod_zero_check(d, gfns, j);
1044
p2md->pod.reclaim_single = i ? i - 1 : i;
1049
p2m_pod_demand_populate(struct domain *d, unsigned long gfn,
1053
struct page_info *p = NULL; /* Compiler warnings */
1054
unsigned long gfn_aligned;
1056
struct p2m_domain *p2md = d->arch.p2m;
1059
ASSERT(p2m_locked_by_me(d->arch.p2m));
1061
/* This check is done with the p2m lock held. This will make sure that
1062
* even if d->is_dying changes under our feet, p2m_pod_empty_cache()
1063
* won't start until we're done. */
1064
if ( unlikely(d->is_dying) )
1067
/* If we're low, start a sweep */
1068
if ( order == 9 && page_list_empty(&p2md->pod.super) )
1069
p2m_pod_emergency_sweep_super(d);
1071
if ( page_list_empty(&p2md->pod.single) &&
1073
|| (order == 9 && page_list_empty(&p2md->pod.super) ) ) )
1074
p2m_pod_emergency_sweep(d);
1076
/* Keep track of the highest gfn demand-populated by a guest fault */
1077
if ( q == p2m_guest && gfn > p2md->pod.max_guest )
1078
p2md->pod.max_guest = gfn;
1080
spin_lock(&d->page_alloc_lock);
1082
if ( p2md->pod.count == 0 )
1085
/* Get a page f/ the cache. A NULL return value indicates that the
1086
* 2-meg range should be marked singleton PoD, and retried */
1087
if ( (p = p2m_pod_cache_get(d, order)) == NULL )
1088
goto remap_and_retry;
1090
mfn = page_to_mfn(p);
1092
BUG_ON((mfn_x(mfn) & ((1 << order)-1)) != 0);
1094
spin_unlock(&d->page_alloc_lock);
1096
gfn_aligned = (gfn >> order) << order;
1098
set_p2m_entry(d, gfn_aligned, mfn, order, p2m_ram_rw);
1100
for( i = 0 ; i < (1UL << order) ; i++ )
1101
set_gpfn_from_mfn(mfn_x(mfn) + i, gfn_aligned + i);
1103
p2md->pod.entry_count -= (1 << order); /* Lock: p2m */
1104
BUG_ON(p2md->pod.entry_count < 0);
1118
__trace_var(TRC_MEM_POD_POPULATE, 0, sizeof(t), (unsigned char *)&t);
1123
spin_unlock(&d->page_alloc_lock);
1125
printk("%s: Out of populate-on-demand memory! tot_pages %" PRIu32 " pod_entries %" PRIi32 "\n",
1126
__func__, d->tot_pages, p2md->pod.entry_count);
1132
spin_unlock(&d->page_alloc_lock);
1134
/* Remap this 2-meg region in singleton chunks */
1135
gfn_aligned = (gfn>>order)<<order;
1136
for(i=0; i<(1<<order); i++)
1137
set_p2m_entry(d, gfn_aligned+i, _mfn(POPULATE_ON_DEMAND_MFN), 0,
1138
p2m_populate_on_demand);
1149
__trace_var(TRC_MEM_POD_SUPERPAGE_SPLINTER, 0, sizeof(t), (unsigned char *)&t);
1155
/* Non-ept "lock-and-check" wrapper */
1156
static int p2m_pod_check_and_populate(struct domain *d, unsigned long gfn,
1157
l1_pgentry_t *p2m_entry, int order,
1160
/* Only take the lock if we don't already have it. Otherwise it
1161
* wouldn't be safe to do p2m lookups with the p2m lock held */
1162
int do_locking = !p2m_locked_by_me(d->arch.p2m);
1166
p2m_lock(d->arch.p2m);
1170
/* Check to make sure this is still PoD */
1171
if ( p2m_flags_to_type(l1e_get_flags(*p2m_entry)) != p2m_populate_on_demand )
1174
p2m_unlock(d->arch.p2m);
1178
r = p2m_pod_demand_populate(d, gfn, order, q);
1182
p2m_unlock(d->arch.p2m);
1187
// Returns 0 on error (out of memory)
1189
p2m_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
1190
unsigned int page_order, p2m_type_t p2mt)
1192
// XXX -- this might be able to be faster iff current->domain == d
1193
mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table);
1194
void *table =map_domain_page(mfn_x(table_mfn));
1195
unsigned long i, gfn_remainder = gfn;
1196
l1_pgentry_t *p2m_entry;
1197
l1_pgentry_t entry_content;
1198
l2_pgentry_t l2e_content;
1213
t.order = page_order;
1215
__trace_var(TRC_MEM_SET_P2M_ENTRY, 0, sizeof(t), (unsigned char *)&t);
1218
#if CONFIG_PAGING_LEVELS >= 4
1219
if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
1220
L4_PAGETABLE_SHIFT - PAGE_SHIFT,
1221
L4_PAGETABLE_ENTRIES, PGT_l3_page_table) )
1225
* When using PAE Xen, we only allow 33 bits of pseudo-physical
1226
* address in translated guests (i.e. 8 GBytes). This restriction
1227
* comes from wanting to map the P2M table into the 16MB RO_MPT hole
1228
* in Xen's address space for translated PV guests.
1229
* When using AMD's NPT on PAE Xen, we are restricted to 4GB.
1231
if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
1232
L3_PAGETABLE_SHIFT - PAGE_SHIFT,
1233
((CONFIG_PAGING_LEVELS == 3)
1234
? (d->arch.hvm_domain.hap_enabled ? 4 : 8)
1235
: L3_PAGETABLE_ENTRIES),
1236
PGT_l2_page_table) )
1239
if ( page_order == 0 )
1241
if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
1242
L2_PAGETABLE_SHIFT - PAGE_SHIFT,
1243
L2_PAGETABLE_ENTRIES, PGT_l1_page_table) )
1246
p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
1247
0, L1_PAGETABLE_ENTRIES);
1250
if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) )
1251
entry_content = l1e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt));
1253
entry_content = l1e_empty();
1256
paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 1);
1260
p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
1261
L2_PAGETABLE_SHIFT - PAGE_SHIFT,
1262
L2_PAGETABLE_ENTRIES);
1265
/* FIXME: Deal with 4k replaced by 2meg pages */
1266
if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) &&
1267
!(l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
1269
P2M_ERROR("configure P2M table 4KB L2 entry with large page\n");
1274
if ( mfn_valid(mfn) || p2m_is_magic(p2mt) )
1275
l2e_content = l2e_from_pfn(mfn_x(mfn),
1276
p2m_type_to_flags(p2mt) | _PAGE_PSE);
1278
l2e_content = l2e_empty();
1280
entry_content.l1 = l2e_content.l2;
1281
paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 2);
1284
/* Track the highest gfn for which we have ever had a valid mapping */
1286
&& (gfn + (1UL << page_order) - 1 > d->arch.p2m->max_mapped_pfn) )
1287
d->arch.p2m->max_mapped_pfn = gfn + (1UL << page_order) - 1;
1289
if ( iommu_enabled && need_iommu(d) )
1291
if ( p2mt == p2m_ram_rw )
1292
for ( i = 0; i < (1UL << page_order); i++ )
1293
iommu_map_page(d, gfn+i, mfn_x(mfn)+i );
1295
for ( int i = 0; i < (1UL << page_order); i++ )
1296
iommu_unmap_page(d, gfn+i);
1303
unmap_domain_page(table);
1308
p2m_gfn_to_mfn(struct domain *d, unsigned long gfn, p2m_type_t *t,
1312
paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
1316
ASSERT(paging_mode_translate(d));
1318
/* XXX This is for compatibility with the old model, where anything not
1319
* XXX marked as RAM was considered to be emulated MMIO space.
1320
* XXX Once we start explicitly registering MMIO regions in the p2m
1321
* XXX we will return p2m_invalid for unmapped gfns */
1324
mfn = pagetable_get_mfn(d->arch.phys_table);
1326
if ( gfn > d->arch.p2m->max_mapped_pfn )
1327
/* This pfn is higher than the highest the p2m map currently holds */
1328
return _mfn(INVALID_MFN);
1330
#if CONFIG_PAGING_LEVELS >= 4
1332
l4_pgentry_t *l4e = map_domain_page(mfn_x(mfn));
1333
l4e += l4_table_offset(addr);
1334
if ( (l4e_get_flags(*l4e) & _PAGE_PRESENT) == 0 )
1336
unmap_domain_page(l4e);
1337
return _mfn(INVALID_MFN);
1339
mfn = _mfn(l4e_get_pfn(*l4e));
1340
unmap_domain_page(l4e);
1344
l3_pgentry_t *l3e = map_domain_page(mfn_x(mfn));
1345
#if CONFIG_PAGING_LEVELS == 3
1346
/* On PAE hosts the p2m has eight l3 entries, not four (see
1347
* shadow_set_p2m_entry()) so we can't use l3_table_offset.
1348
* Instead, just count the number of l3es from zero. It's safe
1349
* to do this because we already checked that the gfn is within
1350
* the bounds of the p2m. */
1351
l3e += (addr >> L3_PAGETABLE_SHIFT);
1353
l3e += l3_table_offset(addr);
1355
if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 )
1357
unmap_domain_page(l3e);
1358
return _mfn(INVALID_MFN);
1360
mfn = _mfn(l3e_get_pfn(*l3e));
1361
unmap_domain_page(l3e);
1364
l2e = map_domain_page(mfn_x(mfn));
1365
l2e += l2_table_offset(addr);
1368
if ( (l2e_get_flags(*l2e) & _PAGE_PRESENT) == 0 )
1370
/* PoD: Try to populate a 2-meg chunk */
1371
if ( p2m_flags_to_type(l2e_get_flags(*l2e)) == p2m_populate_on_demand )
1373
if ( q != p2m_query ) {
1374
if ( !p2m_pod_check_and_populate(d, gfn,
1375
(l1_pgentry_t *)l2e, 9, q) )
1378
*t = p2m_populate_on_demand;
1381
unmap_domain_page(l2e);
1382
return _mfn(INVALID_MFN);
1384
else if ( (l2e_get_flags(*l2e) & _PAGE_PSE) )
1386
mfn = _mfn(l2e_get_pfn(*l2e) + l1_table_offset(addr));
1387
*t = p2m_flags_to_type(l2e_get_flags(*l2e));
1388
unmap_domain_page(l2e);
1390
ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
1391
return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN);
1394
mfn = _mfn(l2e_get_pfn(*l2e));
1395
unmap_domain_page(l2e);
1397
l1e = map_domain_page(mfn_x(mfn));
1398
l1e += l1_table_offset(addr);
1400
if ( (l1e_get_flags(*l1e) & _PAGE_PRESENT) == 0 )
1402
/* PoD: Try to populate */
1403
if ( p2m_flags_to_type(l1e_get_flags(*l1e)) == p2m_populate_on_demand )
1405
if ( q != p2m_query ) {
1406
if ( !p2m_pod_check_and_populate(d, gfn,
1407
(l1_pgentry_t *)l1e, 0, q) )
1410
*t = p2m_populate_on_demand;
1413
unmap_domain_page(l1e);
1414
return _mfn(INVALID_MFN);
1416
mfn = _mfn(l1e_get_pfn(*l1e));
1417
*t = p2m_flags_to_type(l1e_get_flags(*l1e));
1418
unmap_domain_page(l1e);
1420
ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
1421
return (p2m_is_valid(*t) || p2m_is_grant(*t)) ? mfn : _mfn(INVALID_MFN);
1424
/* Read the current domain's p2m table (through the linear mapping). */
1425
static mfn_t p2m_gfn_to_mfn_current(unsigned long gfn, p2m_type_t *t,
1428
mfn_t mfn = _mfn(INVALID_MFN);
1429
p2m_type_t p2mt = p2m_mmio_dm;
1430
paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
1431
/* XXX This is for compatibility with the old model, where anything not
1432
* XXX marked as RAM was considered to be emulated MMIO space.
1433
* XXX Once we start explicitly registering MMIO regions in the p2m
1434
* XXX we will return p2m_invalid for unmapped gfns */
1436
if ( gfn <= current->domain->arch.p2m->max_mapped_pfn )
1438
l1_pgentry_t l1e = l1e_empty(), *p2m_entry;
1439
l2_pgentry_t l2e = l2e_empty();
1442
ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START)
1443
/ sizeof(l1_pgentry_t));
1448
p2m_entry = &__linear_l1_table[l1_linear_offset(RO_MPT_VIRT_START)
1449
+ l2_linear_offset(addr)];
1452
ret = __copy_from_user(&l2e,
1456
|| !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
1458
if( (l2e_get_flags(l2e) & _PAGE_PSE)
1459
&& ( p2m_flags_to_type(l2e_get_flags(l2e))
1460
== p2m_populate_on_demand ) )
1462
/* The read has succeeded, so we know that the mapping
1463
* exits at this point. */
1464
if ( q != p2m_query )
1466
if ( !p2m_pod_check_and_populate(current->domain, gfn,
1470
/* Allocate failed. */
1472
printk("%s: Allocate failed!\n", __func__);
1477
p2mt = p2m_populate_on_demand;
1485
if (l2e_get_flags(l2e) & _PAGE_PSE)
1487
p2mt = p2m_flags_to_type(l2e_get_flags(l2e));
1488
ASSERT(l2e_get_pfn(l2e) != INVALID_MFN || !p2m_is_ram(p2mt));
1490
if ( p2m_is_valid(p2mt) )
1491
mfn = _mfn(l2e_get_pfn(l2e) + l1_table_offset(addr));
1499
* Read and process L1
1502
/* Need to __copy_from_user because the p2m is sparse and this
1503
* part might not exist */
1505
p2m_entry = &phys_to_machine_mapping[gfn];
1507
ret = __copy_from_user(&l1e,
1512
p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
1513
ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(p2mt));
1515
if ( p2m_flags_to_type(l1e_get_flags(l1e))
1516
== p2m_populate_on_demand )
1518
/* The read has succeeded, so we know that the mapping
1519
* exits at this point. */
1520
if ( q != p2m_query )
1522
if ( !p2m_pod_check_and_populate(current->domain, gfn,
1523
(l1_pgentry_t *)p2m_entry, 0, q) )
1526
/* Allocate failed. */
1532
p2mt = p2m_populate_on_demand;
1537
if ( p2m_is_valid(p2mt) || p2m_is_grant(p2mt) )
1538
mfn = _mfn(l1e_get_pfn(l1e));
1549
/* Init the datastructures for later use by the p2m code */
1550
int p2m_init(struct domain *d)
1552
struct p2m_domain *p2m;
1554
p2m = xmalloc(struct p2m_domain);
1560
memset(p2m, 0, sizeof(*p2m));
1562
INIT_PAGE_LIST_HEAD(&p2m->pages);
1563
INIT_PAGE_LIST_HEAD(&p2m->pod.super);
1564
INIT_PAGE_LIST_HEAD(&p2m->pod.single);
1566
p2m->set_entry = p2m_set_entry;
1567
p2m->get_entry = p2m_gfn_to_mfn;
1568
p2m->get_entry_current = p2m_gfn_to_mfn_current;
1569
p2m->change_entry_type_global = p2m_change_type_global;
1571
if ( is_hvm_domain(d) && d->arch.hvm_domain.hap_enabled &&
1572
(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) )
1578
void p2m_change_entry_type_global(struct domain *d,
1579
p2m_type_t ot, p2m_type_t nt)
1581
struct p2m_domain *p2m = d->arch.p2m;
1584
p2m->change_entry_type_global(d, ot, nt);
1589
int set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
1590
unsigned int page_order, p2m_type_t p2mt)
1592
unsigned long todo = 1ul << page_order;
1598
if ( is_hvm_domain(d) && d->arch.hvm_domain.hap_enabled )
1599
order = (((gfn | mfn_x(mfn) | todo) & (SUPERPAGE_PAGES - 1)) == 0) ?
1603
if ( !d->arch.p2m->set_entry(d, gfn, mfn, order, p2mt) )
1605
gfn += 1ul << order;
1606
if ( mfn_x(mfn) != INVALID_MFN )
1607
mfn = _mfn(mfn_x(mfn) + (1ul << order));
1608
todo -= 1ul << order;
1614
// Allocate a new p2m table for a domain.
1616
// The structure of the p2m table is that of a pagetable for xen (i.e. it is
1617
// controlled by CONFIG_PAGING_LEVELS).
1619
// The alloc_page and free_page functions will be used to get memory to
1620
// build the p2m, and to release it again at the end of day.
1622
// Returns 0 for success or -errno.
1624
int p2m_alloc_table(struct domain *d,
1625
struct page_info * (*alloc_page)(struct domain *d),
1626
void (*free_page)(struct domain *d, struct page_info *pg))
1629
mfn_t mfn = _mfn(INVALID_MFN);
1630
struct page_info *page, *p2m_top;
1631
unsigned int page_count = 0;
1632
unsigned long gfn = -1UL;
1633
struct p2m_domain *p2m = d->arch.p2m;
1637
if ( pagetable_get_pfn(d->arch.phys_table) != 0 )
1639
P2M_ERROR("p2m already allocated for this domain\n");
1644
P2M_PRINTK("allocating p2m table\n");
1646
p2m->alloc_page = alloc_page;
1647
p2m->free_page = free_page;
1649
p2m_top = p2m->alloc_page(d);
1650
if ( p2m_top == NULL )
1655
page_list_add_tail(p2m_top, &p2m->pages);
1657
p2m_top->count_info = 1;
1658
p2m_top->u.inuse.type_info =
1659
#if CONFIG_PAGING_LEVELS == 4
1664
| 1 | PGT_validated;
1666
d->arch.phys_table = pagetable_from_mfn(page_to_mfn(p2m_top));
1668
P2M_PRINTK("populating p2m table\n");
1670
/* Initialise physmap tables for slot zero. Other code assumes this. */
1671
if ( !set_p2m_entry(d, 0, _mfn(INVALID_MFN), 0,
1675
/* Copy all existing mappings from the page list and m2p */
1676
page_list_for_each(page, &d->page_list)
1678
mfn = page_to_mfn(page);
1679
gfn = get_gpfn_from_mfn(mfn_x(mfn));
1680
/* Pages should not be shared that early */
1681
ASSERT(gfn != SHARED_M2P_ENTRY);
1685
(gfn != 0x5555555555555555L)
1687
(gfn != 0x55555555L)
1689
&& gfn != INVALID_M2P_ENTRY
1690
&& !set_p2m_entry(d, gfn, mfn, 0, p2m_ram_rw) )
1694
P2M_PRINTK("p2m table initialised (%u pages)\n", page_count);
1699
P2M_PRINTK("failed to initialize p2m table, gfn=%05lx, mfn=%"
1700
PRI_mfn "\n", gfn, mfn_x(mfn));
1705
void p2m_teardown(struct domain *d)
1706
/* Return all the p2m pages to Xen.
1707
* We know we don't have any extra mappings to these pages */
1709
struct page_info *pg;
1710
struct p2m_domain *p2m = d->arch.p2m;
1716
for(gfn=0; gfn < p2m->max_mapped_pfn; gfn++)
1718
mfn = p2m->get_entry(d, gfn, &t, p2m_query);
1719
if(mfn_valid(mfn) && (t == p2m_ram_shared))
1720
BUG_ON(mem_sharing_unshare_page(d, gfn, MEM_SHARING_DESTROY_GFN));
1722
d->arch.phys_table = pagetable_null();
1724
while ( (pg = page_list_remove_head(&p2m->pages)) )
1725
p2m->free_page(d, pg);
1729
void p2m_final_teardown(struct domain *d)
1736
static void audit_p2m(struct domain *d)
1738
struct page_info *page;
1740
unsigned long mfn, gfn, m2pfn, lp2mfn = 0;
1741
int entry_count = 0;
1743
unsigned long orphans_d = 0, orphans_i = 0, mpbad = 0, pmbad = 0;
1747
if ( !paging_mode_translate(d) )
1750
//P2M_PRINTK("p2m audit starts\n");
1752
test_linear = ( (d == current->domain)
1753
&& !pagetable_is_null(current->arch.monitor_table) );
1757
spin_lock(&d->page_alloc_lock);
1759
/* Audit part one: walk the domain's page allocation list, checking
1760
* the m2p entries. */
1761
page_list_for_each ( page, &d->page_list )
1763
mfn = mfn_x(page_to_mfn(page));
1765
// P2M_PRINTK("auditing guest page, mfn=%#lx\n", mfn);
1767
od = page_get_owner(page);
1771
P2M_PRINTK("wrong owner %#lx -> %p(%u) != %p(%u)\n",
1772
mfn, od, (od?od->domain_id:-1), d, d->domain_id);
1776
gfn = get_gpfn_from_mfn(mfn);
1777
if ( gfn == INVALID_M2P_ENTRY )
1780
//P2M_PRINTK("orphaned guest page: mfn=%#lx has invalid gfn\n",
1785
if ( gfn == 0x55555555 )
1788
//P2M_PRINTK("orphaned guest page: mfn=%#lx has debug gfn\n",
1793
if ( gfn == SHARED_P2M_ENTRY)
1795
P2M_PRINTK("shared mfn (%lx) on domain page list!\n",
1800
p2mfn = gfn_to_mfn_type_foreign(d, gfn, &type, p2m_query);
1801
if ( mfn_x(p2mfn) != mfn )
1804
P2M_PRINTK("map mismatch mfn %#lx -> gfn %#lx -> mfn %#lx"
1806
mfn, gfn, mfn_x(p2mfn),
1808
? get_gpfn_from_mfn(mfn_x(p2mfn))
1810
/* This m2p entry is stale: the domain has another frame in
1811
* this physical slot. No great disaster, but for neatness,
1812
* blow away the m2p entry. */
1813
set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
1816
if ( test_linear && (gfn <= d->arch.p2m->max_mapped_pfn) )
1818
lp2mfn = mfn_x(gfn_to_mfn_query(d, gfn, &type));
1819
if ( lp2mfn != mfn_x(p2mfn) )
1821
P2M_PRINTK("linear mismatch gfn %#lx -> mfn %#lx "
1822
"(!= mfn %#lx)\n", gfn, lp2mfn, mfn_x(p2mfn));
1826
// P2M_PRINTK("OK: mfn=%#lx, gfn=%#lx, p2mfn=%#lx, lp2mfn=%#lx\n",
1827
// mfn, gfn, p2mfn, lp2mfn);
1830
spin_unlock(&d->page_alloc_lock);
1832
/* Audit part two: walk the domain's p2m table, checking the entries. */
1833
if ( pagetable_get_pfn(d->arch.phys_table) != 0 )
1839
#if CONFIG_PAGING_LEVELS == 4
1843
l4e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
1844
#else /* CONFIG_PAGING_LEVELS == 3 */
1847
l3e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
1851
#if CONFIG_PAGING_LEVELS >= 4
1852
for ( i4 = 0; i4 < L4_PAGETABLE_ENTRIES; i4++ )
1854
if ( !(l4e_get_flags(l4e[i4]) & _PAGE_PRESENT) )
1856
gfn += 1 << (L4_PAGETABLE_SHIFT - PAGE_SHIFT);
1859
l3e = map_domain_page(mfn_x(_mfn(l4e_get_pfn(l4e[i4]))));
1862
i3 < ((CONFIG_PAGING_LEVELS==4) ? L3_PAGETABLE_ENTRIES : 8);
1865
if ( !(l3e_get_flags(l3e[i3]) & _PAGE_PRESENT) )
1867
gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT);
1870
l2e = map_domain_page(mfn_x(_mfn(l3e_get_pfn(l3e[i3]))));
1871
for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ )
1873
if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) )
1875
if ( (l2e_get_flags(l2e[i2]) & _PAGE_PSE)
1876
&& ( p2m_flags_to_type(l2e_get_flags(l2e[i2]))
1877
== p2m_populate_on_demand ) )
1878
entry_count+=SUPERPAGE_PAGES;
1879
gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
1883
/* check for super page */
1884
if ( l2e_get_flags(l2e[i2]) & _PAGE_PSE )
1886
mfn = l2e_get_pfn(l2e[i2]);
1887
ASSERT(mfn_valid(_mfn(mfn)));
1888
for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++)
1890
m2pfn = get_gpfn_from_mfn(mfn+i1);
1891
/* Allow shared M2Ps */
1892
if ( (m2pfn != (gfn + i1)) &&
1893
(m2pfn != SHARED_M2P_ENTRY) )
1896
P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
1897
" -> gfn %#lx\n", gfn+i1, mfn+i1,
1902
gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
1906
l1e = map_domain_page(mfn_x(_mfn(l2e_get_pfn(l2e[i2]))));
1908
for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
1912
type = p2m_flags_to_type(l1e_get_flags(l1e[i1]));
1913
if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) )
1915
if ( type == p2m_populate_on_demand )
1919
mfn = l1e_get_pfn(l1e[i1]);
1920
ASSERT(mfn_valid(_mfn(mfn)));
1921
m2pfn = get_gpfn_from_mfn(mfn);
1922
if ( m2pfn != gfn &&
1923
type != p2m_mmio_direct &&
1924
!p2m_is_grant(type) &&
1925
!p2m_is_shared(type) )
1928
printk("mismatch: gfn %#lx -> mfn %#lx"
1929
" -> gfn %#lx\n", gfn, mfn, m2pfn);
1930
P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
1931
" -> gfn %#lx\n", gfn, mfn, m2pfn);
1935
unmap_domain_page(l1e);
1937
unmap_domain_page(l2e);
1939
#if CONFIG_PAGING_LEVELS >= 4
1940
unmap_domain_page(l3e);
1944
#if CONFIG_PAGING_LEVELS == 4
1945
unmap_domain_page(l4e);
1946
#else /* CONFIG_PAGING_LEVELS == 3 */
1947
unmap_domain_page(l3e);
1952
if ( entry_count != d->arch.p2m->pod.entry_count )
1954
printk("%s: refcounted entry count %d, audit count %d!\n",
1956
d->arch.p2m->pod.entry_count,
1961
//P2M_PRINTK("p2m audit complete\n");
1962
//if ( orphans_i | orphans_d | mpbad | pmbad )
1963
// P2M_PRINTK("p2m audit found %lu orphans (%lu inval %lu debug)\n",
1964
// orphans_i + orphans_d, orphans_i, orphans_d,
1965
if ( mpbad | pmbad )
1966
P2M_PRINTK("p2m audit found %lu odd p2m, %lu bad m2p entries\n",
1969
#endif /* P2M_AUDIT */
1974
p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn,
1975
unsigned int page_order)
1981
if ( !paging_mode_translate(d) )
1983
if ( need_iommu(d) )
1984
for ( i = 0; i < (1 << page_order); i++ )
1985
iommu_unmap_page(d, mfn + i);
1989
P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn);
1991
for ( i = 0; i < (1UL << page_order); i++ )
1993
mfn_return = d->arch.p2m->get_entry(d, gfn + i, &t, p2m_query);
1994
if ( !p2m_is_grant(t) )
1995
set_gpfn_from_mfn(mfn+i, INVALID_M2P_ENTRY);
1996
ASSERT( !p2m_is_valid(t) || mfn + i == mfn_x(mfn_return) );
1998
set_p2m_entry(d, gfn, _mfn(INVALID_MFN), page_order, p2m_invalid);
2002
guest_physmap_remove_page(struct domain *d, unsigned long gfn,
2003
unsigned long mfn, unsigned int page_order)
2005
p2m_lock(d->arch.p2m);
2007
p2m_remove_page(d, gfn, mfn, page_order);
2009
p2m_unlock(d->arch.p2m);
2012
#if CONFIG_PAGING_LEVELS == 3
2013
static int gfn_check_limit(
2014
struct domain *d, unsigned long gfn, unsigned int order)
2017
* 32bit AMD nested paging does not support over 4GB guest due to
2018
* hardware translation limit. This limitation is checked by comparing
2019
* gfn with 0xfffffUL.
2021
if ( !paging_mode_hap(d) || ((gfn + (1ul << order)) <= 0x100000UL) ||
2022
(boot_cpu_data.x86_vendor != X86_VENDOR_AMD) )
2025
if ( !test_and_set_bool(d->arch.hvm_domain.svm.npt_4gb_warning) )
2026
dprintk(XENLOG_WARNING, "Dom%d failed to populate memory beyond"
2027
" 4GB: specify 'hap=0' domain config option.\n",
2033
#define gfn_check_limit(d, g, o) 0
2037
guest_physmap_mark_populate_on_demand(struct domain *d, unsigned long gfn,
2040
struct p2m_domain *p2md = d->arch.p2m;
2047
BUG_ON(!paging_mode_translate(d));
2049
rc = gfn_check_limit(d, gfn, order);
2056
P2M_DEBUG("mark pod gfn=%#lx\n", gfn);
2058
/* Make sure all gpfns are unused */
2059
for ( i = 0; i < (1UL << order); i++ )
2061
omfn = gfn_to_mfn_query(d, gfn + i, &ot);
2062
if ( p2m_is_ram(ot) )
2064
printk("%s: gfn_to_mfn returned type %d!\n",
2069
else if ( ot == p2m_populate_on_demand )
2071
/* Count how man PoD entries we'll be replacing if successful */
2076
/* Now, actually do the two-way mapping */
2077
if ( !set_p2m_entry(d, gfn, _mfn(POPULATE_ON_DEMAND_MFN), order,
2078
p2m_populate_on_demand) )
2082
p2md->pod.entry_count += 1 << order; /* Lock: p2m */
2083
p2md->pod.entry_count -= pod_count;
2084
BUG_ON(p2md->pod.entry_count < 0);
2096
guest_physmap_add_entry(struct domain *d, unsigned long gfn,
2097
unsigned long mfn, unsigned int page_order,
2100
unsigned long i, ogfn;
2106
if ( !paging_mode_translate(d) )
2108
if ( need_iommu(d) && t == p2m_ram_rw )
2110
for ( i = 0; i < (1 << page_order); i++ )
2111
if ( (rc = iommu_map_page(d, mfn + i, mfn + i)) != 0 )
2114
iommu_unmap_page(d, mfn + i);
2121
rc = gfn_check_limit(d, gfn, page_order);
2125
p2m_lock(d->arch.p2m);
2128
P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn, mfn);
2130
/* First, remove m->p mappings for existing p->m mappings */
2131
for ( i = 0; i < (1UL << page_order); i++ )
2133
omfn = gfn_to_mfn_query(d, gfn + i, &ot);
2134
if ( p2m_is_grant(ot) )
2136
/* Really shouldn't be unmapping grant maps this way */
2138
p2m_unlock(d->arch.p2m);
2141
else if ( p2m_is_ram(ot) )
2143
ASSERT(mfn_valid(omfn));
2144
set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
2146
else if ( ot == p2m_populate_on_demand )
2148
/* Count how man PoD entries we'll be replacing if successful */
2153
/* Then, look for m->p mappings for this range and deal with them */
2154
for ( i = 0; i < (1UL << page_order); i++ )
2156
if ( page_get_owner(mfn_to_page(_mfn(mfn + i))) != d )
2158
ogfn = mfn_to_gfn(d, _mfn(mfn+i));
2161
(ogfn != 0x5555555555555555L)
2163
(ogfn != 0x55555555L)
2165
&& (ogfn != INVALID_M2P_ENTRY)
2166
&& (ogfn != gfn + i) )
2168
/* This machine frame is already mapped at another physical
2170
P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n",
2171
mfn + i, ogfn, gfn + i);
2172
omfn = gfn_to_mfn_query(d, ogfn, &ot);
2173
/* If we get here, we know the local domain owns the page,
2174
so it can't have been grant mapped in. */
2175
BUG_ON( p2m_is_grant(ot) );
2176
if ( p2m_is_ram(ot) )
2178
ASSERT(mfn_valid(omfn));
2179
P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n",
2180
ogfn , mfn_x(omfn));
2181
if ( mfn_x(omfn) == (mfn + i) )
2182
p2m_remove_page(d, ogfn, mfn + i, 0);
2187
/* Now, actually do the two-way mapping */
2188
if ( mfn_valid(_mfn(mfn)) )
2190
if ( !set_p2m_entry(d, gfn, _mfn(mfn), page_order, t) )
2192
if ( !p2m_is_grant(t) )
2194
for ( i = 0; i < (1UL << page_order); i++ )
2195
set_gpfn_from_mfn(mfn+i, gfn+i);
2200
gdprintk(XENLOG_WARNING, "Adding bad mfn to p2m map (%#lx -> %#lx)\n",
2202
if ( !set_p2m_entry(d, gfn, _mfn(INVALID_MFN), page_order,
2207
d->arch.p2m->pod.entry_count -= pod_count; /* Lock: p2m */
2208
BUG_ON(d->arch.p2m->pod.entry_count < 0);
2213
p2m_unlock(d->arch.p2m);
2218
/* Walk the whole p2m table, changing any entries of the old type
2219
* to the new type. This is used in hardware-assisted paging to
2220
* quickly enable or diable log-dirty tracking */
2221
void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt)
2223
unsigned long mfn, gfn, flags;
2224
l1_pgentry_t l1e_content;
2228
unsigned long i1, i2, i3;
2230
#if CONFIG_PAGING_LEVELS == 4
2233
#endif /* CONFIG_PAGING_LEVELS == 4 */
2235
BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt));
2237
if ( !paging_mode_translate(d) )
2240
if ( pagetable_get_pfn(d->arch.phys_table) == 0 )
2243
ASSERT(p2m_locked_by_me(d->arch.p2m));
2245
#if CONFIG_PAGING_LEVELS == 4
2246
l4e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
2247
#else /* CONFIG_PAGING_LEVELS == 3 */
2248
l3e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
2251
#if CONFIG_PAGING_LEVELS >= 4
2252
for ( i4 = 0; i4 < L4_PAGETABLE_ENTRIES; i4++ )
2254
if ( !(l4e_get_flags(l4e[i4]) & _PAGE_PRESENT) )
2258
l3e = map_domain_page(l4e_get_pfn(l4e[i4]));
2261
i3 < ((CONFIG_PAGING_LEVELS==4) ? L3_PAGETABLE_ENTRIES : 8);
2264
if ( !(l3e_get_flags(l3e[i3]) & _PAGE_PRESENT) )
2268
l2mfn = _mfn(l3e_get_pfn(l3e[i3]));
2269
l2e = map_domain_page(l3e_get_pfn(l3e[i3]));
2270
for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ )
2272
if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) )
2277
if ( (l2e_get_flags(l2e[i2]) & _PAGE_PSE) )
2279
flags = l2e_get_flags(l2e[i2]);
2280
if ( p2m_flags_to_type(flags) != ot )
2282
mfn = l2e_get_pfn(l2e[i2]);
2283
/* Do not use get_gpfn_from_mfn because it may return
2286
#if CONFIG_PAGING_LEVELS >= 4
2287
+ (i4 * L3_PAGETABLE_ENTRIES)
2290
* L2_PAGETABLE_ENTRIES) * L1_PAGETABLE_ENTRIES;
2291
flags = p2m_type_to_flags(nt);
2292
l1e_content = l1e_from_pfn(mfn, flags | _PAGE_PSE);
2293
paging_write_p2m_entry(d, gfn, (l1_pgentry_t *)&l2e[i2],
2294
l2mfn, l1e_content, 2);
2298
l1mfn = _mfn(l2e_get_pfn(l2e[i2]));
2299
l1e = map_domain_page(mfn_x(l1mfn));
2301
for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
2303
flags = l1e_get_flags(l1e[i1]);
2304
if ( p2m_flags_to_type(flags) != ot )
2306
mfn = l1e_get_pfn(l1e[i1]);
2307
gfn = i1 + (i2 + (i3
2308
#if CONFIG_PAGING_LEVELS >= 4
2309
+ (i4 * L3_PAGETABLE_ENTRIES)
2312
* L2_PAGETABLE_ENTRIES) * L1_PAGETABLE_ENTRIES;
2313
/* create a new 1le entry with the new type */
2314
flags = p2m_type_to_flags(nt);
2315
l1e_content = l1e_from_pfn(mfn, flags);
2316
paging_write_p2m_entry(d, gfn, &l1e[i1],
2317
l1mfn, l1e_content, 1);
2319
unmap_domain_page(l1e);
2321
unmap_domain_page(l2e);
2323
#if CONFIG_PAGING_LEVELS >= 4
2324
unmap_domain_page(l3e);
2328
#if CONFIG_PAGING_LEVELS == 4
2329
unmap_domain_page(l4e);
2330
#else /* CONFIG_PAGING_LEVELS == 3 */
2331
unmap_domain_page(l3e);
2336
/* Modify the p2m type of a single gfn from ot to nt, returning the
2337
* entry's previous type */
2338
p2m_type_t p2m_change_type(struct domain *d, unsigned long gfn,
2339
p2m_type_t ot, p2m_type_t nt)
2344
BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt));
2346
p2m_lock(d->arch.p2m);
2348
mfn = gfn_to_mfn(d, gfn, &pt);
2350
set_p2m_entry(d, gfn, mfn, 0, nt);
2352
p2m_unlock(d->arch.p2m);
2358
set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn)
2364
if ( !paging_mode_translate(d) )
2367
omfn = gfn_to_mfn_query(d, gfn, &ot);
2368
if ( p2m_is_grant(ot) )
2373
else if ( p2m_is_ram(ot) )
2375
ASSERT(mfn_valid(omfn));
2376
set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
2379
P2M_DEBUG("set mmio %lx %lx\n", gfn, mfn_x(mfn));
2380
p2m_lock(d->arch.p2m);
2381
rc = set_p2m_entry(d, gfn, mfn, 0, p2m_mmio_direct);
2382
p2m_unlock(d->arch.p2m);
2384
gdprintk(XENLOG_ERR,
2385
"set_mmio_p2m_entry: set_p2m_entry failed! mfn=%08lx\n",
2386
gmfn_to_mfn(d, gfn));
2391
clear_mmio_p2m_entry(struct domain *d, unsigned long gfn)
2396
if ( !paging_mode_translate(d) )
2399
mfn = gmfn_to_mfn(d, gfn);
2400
if ( INVALID_MFN == mfn )
2402
gdprintk(XENLOG_ERR,
2403
"clear_mmio_p2m_entry: gfn_to_mfn failed! gfn=%08lx\n", gfn);
2406
p2m_lock(d->arch.p2m);
2407
rc = set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0, 0);
2408
p2m_unlock(d->arch.p2m);
2414
set_shared_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn)
2420
if ( !paging_mode_translate(d) )
2423
omfn = gfn_to_mfn_query(d, gfn, &ot);
2424
/* At the moment we only allow p2m change if gfn has already been made
2426
ASSERT(p2m_is_shared(ot));
2427
ASSERT(mfn_valid(omfn));
2428
/* XXX: M2P translations have to be handled properly for shared pages */
2429
set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
2431
P2M_DEBUG("set shared %lx %lx\n", gfn, mfn_x(mfn));
2432
rc = set_p2m_entry(d, gfn, mfn, 0, p2m_ram_shared);
2434
gdprintk(XENLOG_ERR,
2435
"set_mmio_p2m_entry: set_p2m_entry failed! mfn=%08lx\n",
2436
gmfn_to_mfn(d, gfn));
2440
int p2m_mem_paging_nominate(struct domain *d, unsigned long gfn)
2442
struct page_info *page;
2447
mfn = gfn_to_mfn(d, gfn, &p2mt);
2449
/* Check if mfn is valid */
2451
if ( !mfn_valid(mfn) )
2454
/* Check p2m type */
2456
if ( !p2m_is_pageable(p2mt) )
2459
/* Check for io memory page */
2460
if ( is_iomem_page(mfn_x(mfn)) )
2463
/* Check page count and type */
2464
page = mfn_to_page(mfn);
2465
if ( (page->count_info & (PGC_count_mask | PGC_allocated)) !=
2466
(1 | PGC_allocated) )
2469
if ( (page->u.inuse.type_info & PGT_type_mask) != PGT_none )
2473
p2m_lock(d->arch.p2m);
2474
set_p2m_entry(d, gfn, mfn, 0, p2m_ram_paging_out);
2475
p2m_unlock(d->arch.p2m);
2483
int p2m_mem_paging_evict(struct domain *d, unsigned long gfn)
2485
struct page_info *page;
2490
mfn = gfn_to_mfn(d, gfn, &p2mt);
2491
if ( unlikely(!mfn_valid(mfn)) )
2494
if ( (p2mt == p2m_ram_paged) || (p2mt == p2m_ram_paging_in) ||
2495
(p2mt == p2m_ram_paging_in_start) )
2498
/* Get the page so it doesn't get modified under Xen's feet */
2499
page = mfn_to_page(mfn);
2500
if ( unlikely(!get_page(page, d)) )
2503
/* Decrement guest domain's ref count of the page */
2504
if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
2507
/* Remove mapping from p2m table */
2508
p2m_lock(d->arch.p2m);
2509
set_p2m_entry(d, gfn, _mfn(PAGING_MFN), 0, p2m_ram_paged);
2510
p2m_unlock(d->arch.p2m);
2512
/* Put the page back so it gets freed */
2518
void p2m_mem_paging_populate(struct domain *d, unsigned long gfn)
2520
struct vcpu *v = current;
2521
mem_event_request_t req;
2524
memset(&req, 0, sizeof(req));
2526
/* Check that there's space on the ring for this request */
2527
if ( mem_event_check_ring(d) )
2530
/* Fix p2m mapping */
2531
/* XXX: It seems inefficient to have this here, as it's only needed
2532
* in one case (ept guest accessing paging out page) */
2533
gfn_to_mfn(d, gfn, &p2mt);
2534
if ( p2mt != p2m_ram_paging_out )
2536
p2m_lock(d->arch.p2m);
2537
set_p2m_entry(d, gfn, _mfn(PAGING_MFN), 0, p2m_ram_paging_in_start);
2538
p2m_unlock(d->arch.p2m);
2542
if ( v->domain->domain_id == d->domain_id )
2544
vcpu_pause_nosync(v);
2545
req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
2548
/* Send request to pager */
2551
req.vcpu_id = v->vcpu_id;
2553
mem_event_put_request(d, &req);
2556
int p2m_mem_paging_prep(struct domain *d, unsigned long gfn)
2558
struct page_info *page;
2560
/* Get a free page */
2561
page = alloc_domheap_page(d, 0);
2562
if ( unlikely(page == NULL) )
2565
/* Fix p2m mapping */
2566
p2m_lock(d->arch.p2m);
2567
set_p2m_entry(d, gfn, page_to_mfn(page), 0, p2m_ram_paging_in);
2568
p2m_unlock(d->arch.p2m);
2573
void p2m_mem_paging_resume(struct domain *d)
2575
mem_event_response_t rsp;
2579
/* Pull the response off the ring */
2580
mem_event_get_response(d, &rsp);
2583
mfn = gfn_to_mfn(d, rsp.gfn, &p2mt);
2584
p2m_lock(d->arch.p2m);
2585
set_p2m_entry(d, rsp.gfn, mfn, 0, p2m_ram_rw);
2586
p2m_unlock(d->arch.p2m);
2588
/* Unpause domain */
2589
if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
2590
vcpu_unpause(d->vcpu[rsp.vcpu_id]);
2592
/* Unpause any domains that were paused because the ring was full */
2593
mem_event_unpause_vcpus(d);
2600
* c-set-style: "BSD"
2602
* indent-tabs-mode: nil