1
/******************************************************************************
2
* arch/x86/mm/p2m-pod.c
4
* Populate-on-demand p2m entries.
6
* Copyright (c) 2009-2011 Citrix Systems, Inc.
8
* This program is free software; you can redistribute it and/or modify
9
* it under the terms of the GNU General Public License as published by
10
* the Free Software Foundation; either version 2 of the License, or
11
* (at your option) any later version.
13
* This program is distributed in the hope that it will be useful,
14
* but WITHOUT ANY WARRANTY; without even the implied warranty of
15
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
* GNU General Public License for more details.
18
* You should have received a copy of the GNU General Public License
19
* along with this program; if not, write to the Free Software
20
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23
#include <asm/domain.h>
25
#include <asm/paging.h>
27
#include <asm/hvm/vmx/vmx.h> /* ept_p2m_init() */
28
#include <xen/iommu.h>
29
#include <asm/mem_event.h>
30
#include <public/mem_event.h>
31
#include <asm/mem_sharing.h>
32
#include <xen/event.h>
33
#include <asm/hvm/nestedhvm.h>
34
#include <asm/hvm/svm/amd-iommu-proto.h>
38
/* Override macros from asm/page.h to make them work with mfn_t */
40
#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
42
#define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn))
44
#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
46
#define superpage_aligned(_x) (((_x)&(SUPERPAGE_PAGES-1))==0)
48
/* Enforce lock ordering when grabbing the "external" page_alloc lock */
49
static inline void lock_page_alloc(struct p2m_domain *p2m)
51
page_alloc_mm_pre_lock();
52
spin_lock(&(p2m->domain->page_alloc_lock));
53
page_alloc_mm_post_lock(p2m->domain->arch.page_alloc_unlock_level);
56
static inline void unlock_page_alloc(struct p2m_domain *p2m)
58
page_alloc_mm_unlock(p2m->domain->arch.page_alloc_unlock_level);
59
spin_unlock(&(p2m->domain->page_alloc_lock));
63
* Populate-on-demand functionality
67
p2m_pod_cache_add(struct p2m_domain *p2m,
68
struct page_info *page,
73
struct domain *d = p2m->domain;
78
mfn = page_to_mfn(page);
80
/* Check to make sure this is a contiguous region */
81
if( mfn_x(mfn) & ((1 << order) - 1) )
83
printk("%s: mfn %lx not aligned order %u! (mask %lx)\n",
84
__func__, mfn_x(mfn), order, ((1UL << order) - 1));
88
for(i=0; i < 1 << order ; i++) {
91
p = mfn_to_page(_mfn(mfn_x(mfn) + i));
92
od = page_get_owner(p);
95
printk("%s: mfn %lx expected owner d%d, got owner d%d!\n",
96
__func__, mfn_x(mfn), d->domain_id,
103
ASSERT(pod_locked_by_me(p2m));
106
* Pages from domain_alloc and returned by the balloon driver aren't
107
* guaranteed to be zero; but by reclaiming zero pages, we implicitly
108
* promise to provide zero pages. So we scrub pages before using.
110
for ( i = 0; i < (1 << order); i++ )
112
char *b = map_domain_page(mfn_x(page_to_mfn(page)) + i);
114
unmap_domain_page(b);
117
/* First, take all pages off the domain list */
118
lock_page_alloc(p2m);
119
for(i=0; i < 1 << order ; i++)
122
page_list_del(p, &d->page_list);
125
unlock_page_alloc(p2m);
127
/* Then add the first one to the appropriate populate-on-demand list */
131
page_list_add_tail(page, &p2m->pod.super); /* lock: page_alloc */
132
p2m->pod.count += 1 << order;
135
page_list_add_tail(page, &p2m->pod.single); /* lock: page_alloc */
145
/* Get a page of size order from the populate-on-demand cache. Will break
146
* down 2-meg pages into singleton pages automatically. Returns null if
147
* a superpage is requested and no superpages are available. */
148
static struct page_info * p2m_pod_cache_get(struct p2m_domain *p2m,
151
struct page_info *p = NULL;
154
ASSERT(pod_locked_by_me(p2m));
156
if ( order == PAGE_ORDER_2M && page_list_empty(&p2m->pod.super) )
160
else if ( order == PAGE_ORDER_4K && page_list_empty(&p2m->pod.single) )
165
BUG_ON( page_list_empty(&p2m->pod.super) );
167
/* Break up a superpage to make single pages. NB count doesn't
168
* need to be adjusted. */
169
p = page_list_remove_head(&p2m->pod.super);
170
mfn = mfn_x(page_to_mfn(p));
172
for ( i=0; i<SUPERPAGE_PAGES; i++ )
174
q = mfn_to_page(_mfn(mfn+i));
175
page_list_add_tail(q, &p2m->pod.single);
182
BUG_ON( page_list_empty(&p2m->pod.super) );
183
p = page_list_remove_head(&p2m->pod.super);
184
p2m->pod.count -= 1 << order;
187
BUG_ON( page_list_empty(&p2m->pod.single) );
188
p = page_list_remove_head(&p2m->pod.single);
195
/* Put the pages back on the domain page_list */
196
lock_page_alloc(p2m);
197
for ( i = 0 ; i < (1 << order); i++ )
199
BUG_ON(page_get_owner(p + i) != p2m->domain);
200
page_list_add_tail(p + i, &p2m->domain->page_list);
202
unlock_page_alloc(p2m);
207
/* Set the size of the cache, allocating or freeing as necessary. */
209
p2m_pod_set_cache_target(struct p2m_domain *p2m, unsigned long pod_target, int preemptible)
211
struct domain *d = p2m->domain;
214
ASSERT(pod_locked_by_me(p2m));
216
/* Increasing the target */
217
while ( pod_target > p2m->pod.count )
219
struct page_info * page;
222
if ( (pod_target - p2m->pod.count) >= SUPERPAGE_PAGES )
223
order = PAGE_ORDER_2M;
225
order = PAGE_ORDER_4K;
227
page = alloc_domheap_pages(d, order, PAGE_ORDER_4K);
228
if ( unlikely(page == NULL) )
230
if ( order == PAGE_ORDER_2M )
232
/* If we can't allocate a superpage, try singleton pages */
233
order = PAGE_ORDER_4K;
237
printk("%s: Unable to allocate page for PoD cache (target=%lu cache=%ld)\n",
238
__func__, pod_target, p2m->pod.count);
243
p2m_pod_cache_add(p2m, page, order);
245
if ( hypercall_preempt_check() && preemptible )
252
/* Decreasing the target */
253
/* We hold the pod lock here, so we don't need to worry about
254
* cache disappearing under our feet. */
255
while ( pod_target < p2m->pod.count )
257
struct page_info * page;
260
if ( (p2m->pod.count - pod_target) > SUPERPAGE_PAGES
261
&& !page_list_empty(&p2m->pod.super) )
262
order = PAGE_ORDER_2M;
264
order = PAGE_ORDER_4K;
266
page = p2m_pod_cache_get(p2m, order);
268
ASSERT(page != NULL);
271
for ( i = 0 ; i < (1 << order) ; i++ )
273
/* Copied from common/memory.c:guest_remove_page() */
274
if ( unlikely(!get_page(page+i, d)) )
276
gdprintk(XENLOG_INFO, "Bad page free for domain %u\n", d->domain_id);
281
if ( test_and_clear_bit(_PGT_pinned, &(page+i)->u.inuse.type_info) )
282
put_page_and_type(page+i);
284
if ( test_and_clear_bit(_PGC_allocated, &(page+i)->count_info) )
289
if ( hypercall_preempt_check() && preemptible )
302
* The "right behavior" here requires some careful thought. First, some
305
* + B: number of pages the balloon driver has ballooned down to.
306
* + P: Number of populated pages.
310
* The following equations should hold:
311
* 0 <= P <= T <= B <= M
312
* d->arch.p2m->pod.entry_count == B - P
313
* d->tot_pages == P + d->arch.p2m->pod.count
315
* Now we have the following potential cases to cover:
316
* B <T': Set the PoD cache size equal to the number of outstanding PoD
317
* entries. The balloon driver will deflate the balloon to give back
318
* the remainder of the ram to the guest OS.
319
* T <T'<B : Increase PoD cache size.
320
* T'<T<=B : Here we have a choice. We can decrease the size of the cache,
321
* get the memory right away. However, that means every time we
322
* reduce the memory target we risk the guest attempting to populate the
323
* memory before the balloon driver has reached its new target. Safer to
324
* never reduce the cache size here, but only when the balloon driver frees
327
* If there are many zero pages, we could reach the target also by doing
328
* zero sweeps and marking the ranges PoD; but the balloon driver will have
329
* to free this memory eventually anyway, so we don't actually gain that much
332
* NB that the equation (B<T') may require adjustment to the cache
333
* size as PoD pages are freed as well; i.e., freeing a PoD-backed
334
* entry when pod.entry_count == pod.count requires us to reduce both
335
* pod.entry_count and pod.count.
338
p2m_pod_set_mem_target(struct domain *d, unsigned long target)
340
struct p2m_domain *p2m = p2m_get_hostp2m(d);
342
unsigned long populated, pod_target;
346
/* P == B: Nothing to do (unless the guest is being created). */
347
populated = d->tot_pages - p2m->pod.count;
348
if ( populated > 0 && p2m->pod.entry_count == 0 )
351
/* Don't do anything if the domain is being torn down */
355
/* T' < B: Don't reduce the cache size; let the balloon driver
356
* take care of it. */
357
if ( target < d->tot_pages )
360
pod_target = target - populated;
362
/* B < T': Set the cache size equal to # of outstanding entries,
363
* let the balloon driver fill in the rest. */
364
if ( populated > 0 && pod_target > p2m->pod.entry_count )
365
pod_target = p2m->pod.entry_count;
367
ASSERT( pod_target >= p2m->pod.count );
369
ret = p2m_pod_set_cache_target(p2m, pod_target, 1/*preemptible*/);
378
p2m_pod_empty_cache(struct domain *d)
380
struct p2m_domain *p2m = p2m_get_hostp2m(d);
381
struct page_info *page;
383
/* After this barrier no new PoD activities can happen. */
384
BUG_ON(!d->is_dying);
385
spin_barrier(&p2m->pod.lock.lock);
387
lock_page_alloc(p2m);
389
while ( (page = page_list_remove_head(&p2m->pod.super)) )
393
for ( i = 0 ; i < SUPERPAGE_PAGES ; i++ )
395
BUG_ON(page_get_owner(page + i) != d);
396
page_list_add_tail(page + i, &d->page_list);
399
p2m->pod.count -= SUPERPAGE_PAGES;
402
while ( (page = page_list_remove_head(&p2m->pod.single)) )
404
BUG_ON(page_get_owner(page) != d);
405
page_list_add_tail(page, &d->page_list);
410
BUG_ON(p2m->pod.count != 0);
412
unlock_page_alloc(p2m);
416
p2m_pod_offline_or_broken_hit(struct page_info *p)
419
struct p2m_domain *p2m;
420
struct page_info *q, *tmp;
421
unsigned long mfn, bmfn;
423
if ( !(d = page_get_owner(p)) || !(p2m = p2m_get_hostp2m(d)) )
427
bmfn = mfn_x(page_to_mfn(p));
428
page_list_for_each_safe(q, tmp, &p2m->pod.super)
430
mfn = mfn_x(page_to_mfn(q));
431
if ( (bmfn >= mfn) && ((bmfn - mfn) < SUPERPAGE_PAGES) )
434
page_list_del(q, &p2m->pod.super);
435
for ( i = 0; i < SUPERPAGE_PAGES; i++)
437
q = mfn_to_page(_mfn(mfn + i));
438
page_list_add_tail(q, &p2m->pod.single);
440
page_list_del(p, &p2m->pod.single);
446
page_list_for_each_safe(q, tmp, &p2m->pod.single)
448
mfn = mfn_x(page_to_mfn(q));
451
page_list_del(p, &p2m->pod.single);
461
lock_page_alloc(p2m);
462
page_list_add_tail(p, &d->arch.relmem_list);
463
unlock_page_alloc(p2m);
469
p2m_pod_offline_or_broken_replace(struct page_info *p)
472
struct p2m_domain *p2m;
474
if ( !(d = page_get_owner(p)) || !(p2m = p2m_get_hostp2m(d)) )
477
free_domheap_page(p);
479
p = alloc_domheap_page(d, PAGE_ORDER_4K);
484
p2m_pod_cache_add(p2m, p, PAGE_ORDER_4K);
490
p2m_pod_zero_check_superpage(struct p2m_domain *p2m, unsigned long gfn);
493
/* This function is needed for two reasons:
494
* + To properly handle clearing of PoD entries
495
* + To "steal back" memory being freed for the PoD cache, rather than
498
* Once both of these functions have been completed, we can return and
499
* allow decrease_reservation() to handle everything else.
502
p2m_pod_decrease_reservation(struct domain *d,
508
struct p2m_domain *p2m = p2m_get_hostp2m(d);
511
int pod, nonpod, ram;
513
gfn_lock(p2m, gpfn, order);
516
/* If we don't have any outstanding PoD entries, let things take their
518
if ( p2m->pod.entry_count == 0 )
521
if ( unlikely(d->is_dying) )
525
pod = nonpod = ram = 0;
527
/* Figure out if we need to steal some freed memory for our cache */
528
steal_for_cache = ( p2m->pod.entry_count > p2m->pod.count );
530
/* FIXME: Add contiguous; query for PSE entries? */
531
for ( i=0; i<(1<<order); i++)
536
(void)p2m->get_entry(p2m, gpfn + i, &t, &a, 0, NULL);
538
if ( t == p2m_populate_on_demand )
548
/* No populate-on-demand? Don't need to steal anything? Then we're done!*/
549
if(!pod && !steal_for_cache)
554
/* All PoD: Mark the whole region invalid and tell caller
556
set_p2m_entry(p2m, gpfn, _mfn(INVALID_MFN), order, p2m_invalid, p2m->default_access);
557
p2m->pod.entry_count-=(1<<order);
558
BUG_ON(p2m->pod.entry_count < 0);
560
goto out_entry_check;
563
/* Try to grab entire superpages if possible. Since the common case is for drivers
564
* to pass back singleton pages, see if we can take the whole page back and mark the
567
&& p2m_pod_zero_check_superpage(p2m, gpfn & ~(SUPERPAGE_PAGES-1)))
569
/* Since order may be arbitrary, we may have taken more or less
570
* than we were actually asked to; so just re-count from scratch */
574
/* Process as long as:
575
* + There are PoD entries to handle, or
576
* + There is ram left, and we want to steal it
579
i<(1<<order) && (pod>0 || (steal_for_cache && ram > 0));
586
mfn = p2m->get_entry(p2m, gpfn + i, &t, &a, 0, NULL);
587
if ( t == p2m_populate_on_demand )
589
set_p2m_entry(p2m, gpfn + i, _mfn(INVALID_MFN), 0, p2m_invalid, p2m->default_access);
590
p2m->pod.entry_count--;
591
BUG_ON(p2m->pod.entry_count < 0);
594
else if ( steal_for_cache && p2m_is_ram(t) )
596
struct page_info *page;
598
ASSERT(mfn_valid(mfn));
600
page = mfn_to_page(mfn);
602
set_p2m_entry(p2m, gpfn + i, _mfn(INVALID_MFN), 0, p2m_invalid, p2m->default_access);
603
set_gpfn_from_mfn(mfn_x(mfn), INVALID_M2P_ENTRY);
605
p2m_pod_cache_add(p2m, page, 0);
607
steal_for_cache = ( p2m->pod.entry_count > p2m->pod.count );
614
/* If there are no more non-PoD entries, tell decrease_reservation() that
615
* there's nothing left to do. */
620
/* If we've reduced our "liabilities" beyond our "assets", free some */
621
if ( p2m->pod.entry_count < p2m->pod.count )
623
p2m_pod_set_cache_target(p2m, p2m->pod.entry_count, 0/*can't preempt*/);
628
gfn_unlock(p2m, gpfn, order);
632
void p2m_pod_dump_data(struct domain *d)
634
struct p2m_domain *p2m = p2m_get_hostp2m(d);
636
printk(" PoD entries=%ld cachesize=%ld\n",
637
p2m->pod.entry_count, p2m->pod.count);
641
/* Search for all-zero superpages to be reclaimed as superpages for the
642
* PoD cache. Must be called w/ pod lock held, must lock the superpage
645
p2m_pod_zero_check_superpage(struct p2m_domain *p2m, unsigned long gfn)
647
mfn_t mfn, mfn0 = _mfn(INVALID_MFN);
648
p2m_type_t type, type0 = 0;
649
unsigned long * map = NULL;
650
int ret=0, reset = 0;
653
struct domain *d = p2m->domain;
655
ASSERT(pod_locked_by_me(p2m));
657
if ( !superpage_aligned(gfn) )
660
/* Allow an extra refcount for one shadow pt mapping in shadowed domains */
661
if ( paging_mode_shadow(d) )
664
/* NOTE: this is why we don't enforce deadlock constraints between p2m
666
gfn_lock(p2m, gfn, SUPERPAGE_ORDER);
668
/* Look up the mfns, checking to make sure they're the same mfn
669
* and aligned, and mapping them. */
670
for ( i=0; i<SUPERPAGE_PAGES; i++ )
673
mfn = p2m->get_entry(p2m, gfn + i, &type, &a, 0, NULL);
681
/* Conditions that must be met for superpage-superpage:
682
* + All gfns are ram types
683
* + All gfns have the same type
684
* + All of the mfns are allocated to a domain
685
* + None of the mfns are used as pagetables, or allocated via xenheap
686
* + The first mfn is 2-meg aligned
687
* + All the other mfns are in sequence
688
* Adding for good measure:
689
* + None of the mfns are likely to be mapped elsewhere (refcount
690
* 2 or less for shadow, 1 for hap)
692
if ( !p2m_is_ram(type)
694
|| ( (mfn_to_page(mfn)->count_info & PGC_allocated) == 0 )
695
|| ( (mfn_to_page(mfn)->count_info & (PGC_page_table|PGC_xen_heap)) != 0 )
696
|| ( (mfn_to_page(mfn)->count_info & PGC_xen_heap ) != 0 )
697
|| ( (mfn_to_page(mfn)->count_info & PGC_count_mask) > max_ref )
698
|| !( ( i == 0 && superpage_aligned(mfn_x(mfn0)) )
699
|| ( i != 0 && mfn_x(mfn) == (mfn_x(mfn0) + i) ) ) )
703
/* Now, do a quick check to see if it may be zero before unmapping. */
704
for ( i=0; i<SUPERPAGE_PAGES; i++ )
706
/* Quick zero-check */
707
map = map_domain_page(mfn_x(mfn0) + i);
709
for ( j=0; j<16; j++ )
713
unmap_domain_page(map);
720
/* Try to remove the page, restoring old mapping if it fails. */
721
set_p2m_entry(p2m, gfn, _mfn(0), PAGE_ORDER_2M,
722
p2m_populate_on_demand, p2m->default_access);
724
/* Make none of the MFNs are used elsewhere... for example, mapped
725
* via the grant table interface, or by qemu. Allow one refcount for
726
* being allocated to the domain. */
727
for ( i=0; i < SUPERPAGE_PAGES; i++ )
729
mfn = _mfn(mfn_x(mfn0) + i);
730
if ( (mfn_to_page(mfn)->count_info & PGC_count_mask) > 1 )
737
/* Finally, do a full zero-check */
738
for ( i=0; i < SUPERPAGE_PAGES; i++ )
740
map = map_domain_page(mfn_x(mfn0) + i);
742
for ( j=0; j<PAGE_SIZE/sizeof(*map); j++ )
749
unmap_domain_page(map);
767
__trace_var(TRC_MEM_POD_ZERO_RECLAIM, 0, sizeof(t), &t);
770
/* Finally! We've passed all the checks, and can add the mfn superpage
771
* back on the PoD cache, and account for the new p2m PoD entries */
772
p2m_pod_cache_add(p2m, mfn_to_page(mfn0), PAGE_ORDER_2M);
773
p2m->pod.entry_count += SUPERPAGE_PAGES;
775
ret = SUPERPAGE_PAGES;
779
set_p2m_entry(p2m, gfn, mfn0, 9, type0, p2m->default_access);
782
gfn_unlock(p2m, gfn, SUPERPAGE_ORDER);
787
p2m_pod_zero_check(struct p2m_domain *p2m, unsigned long *gfns, int count)
790
p2m_type_t types[count];
791
unsigned long * map[count];
792
struct domain *d = p2m->domain;
797
/* Allow an extra refcount for one shadow pt mapping in shadowed domains */
798
if ( paging_mode_shadow(d) )
801
/* First, get the gfn list, translate to mfns, and map the pages. */
802
for ( i=0; i<count; i++ )
805
mfns[i] = p2m->get_entry(p2m, gfns[i], types + i, &a, 0, NULL);
806
/* If this is ram, and not a pagetable or from the xen heap, and probably not mapped
807
elsewhere, map it; otherwise, skip. */
808
if ( p2m_is_ram(types[i])
809
&& ( (mfn_to_page(mfns[i])->count_info & PGC_allocated) != 0 )
810
&& ( (mfn_to_page(mfns[i])->count_info & (PGC_page_table|PGC_xen_heap)) == 0 )
811
&& ( (mfn_to_page(mfns[i])->count_info & PGC_count_mask) <= max_ref ) )
812
map[i] = map_domain_page(mfn_x(mfns[i]));
817
/* Then, go through and check for zeroed pages, removing write permission
818
* for those with zeroes. */
819
for ( i=0; i<count; i++ )
824
/* Quick zero-check */
825
for ( j=0; j<16; j++ )
826
if( *(map[i]+j) != 0 )
831
unmap_domain_page(map[i]);
836
/* Try to remove the page, restoring old mapping if it fails. */
837
set_p2m_entry(p2m, gfns[i], _mfn(0), PAGE_ORDER_4K,
838
p2m_populate_on_demand, p2m->default_access);
840
/* See if the page was successfully unmapped. (Allow one refcount
841
* for being allocated to a domain.) */
842
if ( (mfn_to_page(mfns[i])->count_info & PGC_count_mask) > 1 )
844
unmap_domain_page(map[i]);
847
set_p2m_entry(p2m, gfns[i], mfns[i], PAGE_ORDER_4K,
848
types[i], p2m->default_access);
854
/* Now check each page for real */
855
for ( i=0; i < count; i++ )
860
for ( j=0; j<PAGE_SIZE/sizeof(*map[i]); j++ )
861
if( *(map[i]+j) != 0 )
864
unmap_domain_page(map[i]);
866
/* See comment in p2m_pod_zero_check_superpage() re gnttab
868
if ( j < PAGE_SIZE/sizeof(*map[i]) )
870
set_p2m_entry(p2m, gfns[i], mfns[i], PAGE_ORDER_4K,
871
types[i], p2m->default_access);
883
t.mfn = mfn_x(mfns[i]);
887
__trace_var(TRC_MEM_POD_ZERO_RECLAIM, 0, sizeof(t), &t);
890
/* Add to cache, and account for the new p2m PoD entry */
891
p2m_pod_cache_add(p2m, mfn_to_page(mfns[i]), PAGE_ORDER_4K);
892
p2m->pod.entry_count++;
898
#define POD_SWEEP_LIMIT 1024
900
/* When populating a new superpage, look at recently populated superpages
901
* hoping that they've been zeroed. This will snap up zeroed pages as soon as
902
* the guest OS is done with them. */
904
p2m_pod_check_last_super(struct p2m_domain *p2m, unsigned long gfn_aligned)
906
unsigned long check_gfn;
908
ASSERT(p2m->pod.last_populated_index < POD_HISTORY_MAX);
910
check_gfn = p2m->pod.last_populated[p2m->pod.last_populated_index];
912
p2m->pod.last_populated[p2m->pod.last_populated_index] = gfn_aligned;
914
p2m->pod.last_populated_index =
915
( p2m->pod.last_populated_index + 1 ) % POD_HISTORY_MAX;
917
p2m_pod_zero_check_superpage(p2m, check_gfn);
921
#define POD_SWEEP_STRIDE 16
923
p2m_pod_emergency_sweep(struct p2m_domain *p2m)
925
unsigned long gfns[POD_SWEEP_STRIDE];
926
unsigned long i, j=0, start, limit;
930
if ( p2m->pod.reclaim_single == 0 )
931
p2m->pod.reclaim_single = p2m->pod.max_guest;
933
start = p2m->pod.reclaim_single;
934
limit = (start > POD_SWEEP_LIMIT) ? (start - POD_SWEEP_LIMIT) : 0;
936
/* FIXME: Figure out how to avoid superpages */
937
/* NOTE: Promote to globally locking the p2m. This will get complicated
938
* in a fine-grained scenario. If we lock each gfn individually we must be
939
* careful about spinlock recursion limits and POD_SWEEP_STRIDE. */
941
for ( i=p2m->pod.reclaim_single; i > 0 ; i-- )
944
(void)p2m->get_entry(p2m, i, &t, &a, 0, NULL);
949
BUG_ON(j > POD_SWEEP_STRIDE);
950
if ( j == POD_SWEEP_STRIDE )
952
p2m_pod_zero_check(p2m, gfns, j);
956
/* Stop if we're past our limit and we have found *something*.
958
* NB that this is a zero-sum game; we're increasing our cache size
959
* by re-increasing our 'debt'. Since we hold the pod lock,
960
* (entry_count - count) must remain the same. */
961
if ( p2m->pod.count > 0 && i < limit )
966
p2m_pod_zero_check(p2m, gfns, j);
969
p2m->pod.reclaim_single = i ? i - 1 : i;
974
p2m_pod_demand_populate(struct p2m_domain *p2m, unsigned long gfn,
978
struct domain *d = p2m->domain;
979
struct page_info *p = NULL; /* Compiler warnings */
980
unsigned long gfn_aligned;
984
ASSERT(gfn_locked_by_me(p2m, gfn));
987
/* This check is done with the pod lock held. This will make sure that
988
* even if d->is_dying changes under our feet, p2m_pod_empty_cache()
989
* won't start until we're done. */
990
if ( unlikely(d->is_dying) )
994
/* Because PoD does not have cache list for 1GB pages, it has to remap
995
* 1GB region to 2MB chunks for a retry. */
996
if ( order == PAGE_ORDER_1G )
999
gfn_aligned = (gfn >> order) << order;
1000
/* Note that we are supposed to call set_p2m_entry() 512 times to
1001
* split 1GB into 512 2MB pages here. But We only do once here because
1002
* set_p2m_entry() should automatically shatter the 1GB page into
1003
* 512 2MB pages. The rest of 511 calls are unnecessary.
1005
* NOTE: In a fine-grained p2m locking scenario this operation
1006
* may need to promote its locking from gfn->1g superpage
1008
set_p2m_entry(p2m, gfn_aligned, _mfn(0), PAGE_ORDER_2M,
1009
p2m_populate_on_demand, p2m->default_access);
1013
/* Only sweep if we're actually out of memory. Doing anything else
1014
* causes unnecessary time and fragmentation of superpages in the p2m. */
1015
if ( p2m->pod.count == 0 )
1016
p2m_pod_emergency_sweep(p2m);
1018
/* If the sweep failed, give up. */
1019
if ( p2m->pod.count == 0 )
1022
/* Keep track of the highest gfn demand-populated by a guest fault */
1023
if ( gfn > p2m->pod.max_guest )
1024
p2m->pod.max_guest = gfn;
1026
/* Get a page f/ the cache. A NULL return value indicates that the
1027
* 2-meg range should be marked singleton PoD, and retried */
1028
if ( (p = p2m_pod_cache_get(p2m, order)) == NULL )
1029
goto remap_and_retry;
1031
mfn = page_to_mfn(p);
1033
BUG_ON((mfn_x(mfn) & ((1 << order)-1)) != 0);
1035
gfn_aligned = (gfn >> order) << order;
1037
set_p2m_entry(p2m, gfn_aligned, mfn, order, p2m_ram_rw, p2m->default_access);
1039
for( i = 0; i < (1UL << order); i++ )
1041
set_gpfn_from_mfn(mfn_x(mfn) + i, gfn_aligned + i);
1042
paging_mark_dirty(d, mfn_x(mfn) + i);
1045
p2m->pod.entry_count -= (1 << order);
1046
BUG_ON(p2m->pod.entry_count < 0);
1060
__trace_var(TRC_MEM_POD_POPULATE, 0, sizeof(t), &t);
1063
/* Check the last guest demand-populate */
1064
if ( p2m->pod.entry_count > p2m->pod.count
1065
&& (order == PAGE_ORDER_2M)
1066
&& (q & P2M_ALLOC) )
1067
p2m_pod_check_last_super(p2m, gfn_aligned);
1074
printk("%s: Dom%d out of PoD memory! (tot=%"PRIu32" ents=%ld dom%d)\n",
1075
__func__, d->domain_id, d->tot_pages, p2m->pod.entry_count,
1076
current->domain->domain_id);
1083
BUG_ON(order != PAGE_ORDER_2M);
1086
/* Remap this 2-meg region in singleton chunks */
1087
/* NOTE: In a p2m fine-grained lock scenario this might
1088
* need promoting the gfn lock from gfn->2M superpage */
1089
gfn_aligned = (gfn>>order)<<order;
1090
for(i=0; i<(1<<order); i++)
1091
set_p2m_entry(p2m, gfn_aligned+i, _mfn(0), PAGE_ORDER_4K,
1092
p2m_populate_on_demand, p2m->default_access);
1103
__trace_var(TRC_MEM_POD_SUPERPAGE_SPLINTER, 0, sizeof(t), &t);
1111
guest_physmap_mark_populate_on_demand(struct domain *d, unsigned long gfn,
1114
struct p2m_domain *p2m = p2m_get_hostp2m(d);
1115
unsigned long i, pod_count = 0;
1120
if ( !paging_mode_translate(d) )
1123
rc = p2m_gfn_check_limit(d, gfn, order);
1127
gfn_lock(p2m, gfn, order);
1129
P2M_DEBUG("mark pod gfn=%#lx\n", gfn);
1131
/* Make sure all gpfns are unused */
1132
for ( i = 0; i < (1UL << order); i++ )
1135
omfn = p2m->get_entry(p2m, gfn + i, &ot, &a, 0, NULL);
1136
if ( p2m_is_ram(ot) )
1138
printk("%s: gfn_to_mfn returned type %d!\n",
1143
else if ( ot == p2m_populate_on_demand )
1145
/* Count how man PoD entries we'll be replacing if successful */
1150
/* Now, actually do the two-way mapping */
1151
if ( !set_p2m_entry(p2m, gfn, _mfn(0), order,
1152
p2m_populate_on_demand, p2m->default_access) )
1157
p2m->pod.entry_count += 1 << order;
1158
p2m->pod.entry_count -= pod_count;
1159
BUG_ON(p2m->pod.entry_count < 0);
1163
gfn_unlock(p2m, gfn, order);