1
/******************************************************************************
4
* Helper functions to offline/online one page
6
* Copyright (c) 2003, K A Fraser.
7
* Copyright (c) 2009, Intel Corporation.
18
#include "xc_private.h"
20
#include "xg_private.h"
21
#include "xg_save_restore.h"
23
struct domain_mem_info{
25
unsigned int pt_level;
26
unsigned int guest_width;
29
unsigned long p2m_size;
34
struct pte_backup_entry
40
#define DEFAULT_BACKUP_COUNT 1024
43
struct pte_backup_entry *entries;
48
static struct domain_info_context _dinfo;
49
static struct domain_info_context *dinfo = &_dinfo;
51
int xc_mark_page_online(int xc, unsigned long start,
52
unsigned long end, uint32_t *status)
57
if ( !status || (end < start) )
60
if (lock_pages(status, sizeof(uint32_t)*(end - start + 1)))
62
ERROR("Could not lock memory for xc_mark_page_online\n");
66
sysctl.cmd = XEN_SYSCTL_page_offline_op;
67
sysctl.u.page_offline.start = start;
68
sysctl.u.page_offline.cmd = sysctl_page_online;
69
sysctl.u.page_offline.end = end;
70
set_xen_guest_handle(sysctl.u.page_offline.status, status);
71
ret = xc_sysctl(xc, &sysctl);
73
unlock_pages(status, sizeof(uint32_t)*(end - start + 1));
78
int xc_mark_page_offline(int xc, unsigned long start,
79
unsigned long end, uint32_t *status)
84
if ( !status || (end < start) )
87
if (lock_pages(status, sizeof(uint32_t)*(end - start + 1)))
89
ERROR("Could not lock memory for xc_mark_page_offline");
93
sysctl.cmd = XEN_SYSCTL_page_offline_op;
94
sysctl.u.page_offline.start = start;
95
sysctl.u.page_offline.cmd = sysctl_page_offline;
96
sysctl.u.page_offline.end = end;
97
set_xen_guest_handle(sysctl.u.page_offline.status, status);
98
ret = xc_sysctl(xc, &sysctl);
100
unlock_pages(status, sizeof(uint32_t)*(end - start + 1));
105
int xc_query_page_offline_status(int xc, unsigned long start,
106
unsigned long end, uint32_t *status)
111
if ( !status || (end < start) )
114
if (lock_pages(status, sizeof(uint32_t)*(end - start + 1)))
116
ERROR("Could not lock memory for xc_query_page_offline_status\n");
120
sysctl.cmd = XEN_SYSCTL_page_offline_op;
121
sysctl.u.page_offline.start = start;
122
sysctl.u.page_offline.cmd = sysctl_query_page_offline;
123
sysctl.u.page_offline.end = end;
124
set_xen_guest_handle(sysctl.u.page_offline.status, status);
125
ret = xc_sysctl(xc, &sysctl);
127
unlock_pages(status, sizeof(uint32_t)*(end - start + 1));
133
* There should no update to the grant when domain paused
135
static int xc_is_page_granted_v1(int xc_handle, xen_pfn_t gpfn,
136
grant_entry_v1_t *gnttab, int gnt_num)
143
for (i = 0; i < gnt_num; i++)
144
if ( ((gnttab[i].flags & GTF_type_mask) != GTF_invalid) &&
145
(gnttab[i].frame == gpfn) )
148
return (i != gnt_num);
151
static int xc_is_page_granted_v2(int xc_handle, xen_pfn_t gpfn,
152
grant_entry_v2_t *gnttab, int gnt_num)
159
for (i = 0; i < gnt_num; i++)
160
if ( ((gnttab[i].hdr.flags & GTF_type_mask) != GTF_invalid) &&
161
(gnttab[i].full_page.frame == gpfn) )
164
return (i != gnt_num);
167
static xen_pfn_t pfn_to_mfn(xen_pfn_t pfn, xen_pfn_t *p2m, int gwidth)
169
return ((xen_pfn_t) ((gwidth==8)?
170
(((uint64_t *)p2m)[(pfn)]):
171
((((uint32_t *)p2m)[(pfn)]) == 0xffffffffU ?
173
(((uint32_t *)p2m)[(pfn)]))));
176
static int get_pt_level(int xc_handle, uint32_t domid,
177
unsigned int *pt_level,
178
unsigned int *gwidth)
181
xen_capabilities_info_t xen_caps = "";
183
if (xc_version(xc_handle, XENVER_capabilities, &xen_caps) != 0)
186
memset(&domctl, 0, sizeof(domctl));
187
domctl.domain = domid;
188
domctl.cmd = XEN_DOMCTL_get_address_size;
190
if ( do_domctl(xc_handle, &domctl) != 0 )
193
*gwidth = domctl.u.address_size.size / 8;
195
if (strstr(xen_caps, "xen-3.0-x86_64"))
196
/* Depends on whether it's a compat 32-on-64 guest */
197
*pt_level = ( (*gwidth == 8) ? 4 : 3 );
198
else if (strstr(xen_caps, "xen-3.0-x86_32p"))
200
else if (strstr(xen_caps, "xen-3.0-x86_32"))
208
static int close_mem_info(int xc_handle, struct domain_mem_info *minfo)
211
free(minfo->pfn_type);
212
munmap(minfo->m2p_table, M2P_SIZE(minfo->max_mfn));
213
munmap(minfo->p2m_table, P2M_FLL_ENTRIES * PAGE_SIZE);
214
minfo->p2m_table = minfo->m2p_table = NULL;
219
static int init_mem_info(int xc_handle, int domid,
220
struct domain_mem_info *minfo,
223
uint64_aligned_t shared_info_frame;
224
shared_info_any_t *live_shinfo = NULL;
227
/* Only be initialized once */
228
if (minfo->pfn_type || minfo->m2p_table || minfo->p2m_table)
231
if ( get_pt_level(xc_handle, domid, &minfo->pt_level,
232
&minfo->guest_width) )
234
ERROR("Unable to get PT level info.");
237
dinfo->guest_width = minfo->guest_width;
239
shared_info_frame = info->shared_info_frame;
241
live_shinfo = xc_map_foreign_range(xc_handle, domid,
242
PAGE_SIZE, PROT_READ, shared_info_frame);
245
ERROR("Couldn't map live_shinfo");
249
if ( (rc = xc_core_arch_map_p2m_writable(xc_handle, minfo->guest_width,
250
info, live_shinfo, &minfo->p2m_table, &minfo->p2m_size)) )
252
ERROR("Couldn't map p2m table %x\n", rc);
255
munmap(live_shinfo, PAGE_SIZE);
258
dinfo->p2m_size = minfo->p2m_size;
260
minfo->max_mfn = xc_memory_op(xc_handle, XENMEM_maximum_ram_page, NULL);
261
if ( !(minfo->m2p_table =
262
xc_map_m2p(xc_handle, minfo->max_mfn, PROT_READ, NULL)) )
264
ERROR("Failed to map live M2P table");
269
minfo->pfn_type = calloc(sizeof(*minfo->pfn_type), minfo->p2m_size);
270
if (!minfo->pfn_type)
272
ERROR("Failed to malloc pfn_type\n");
276
for (i = 0; i < minfo->p2m_size; i++)
277
minfo->pfn_type[i] = pfn_to_mfn(i, minfo->p2m_table,
280
if ( lock_pages(minfo->pfn_type, minfo->p2m_size * sizeof(*minfo->pfn_type)) )
282
ERROR("Unable to lock pfn_type array");
286
for (i = 0; i < minfo->p2m_size ; i+=1024)
288
int count = ((dinfo->p2m_size - i ) > 1024 ) ? 1024: (dinfo->p2m_size - i);
289
if ( ( rc = xc_get_pfn_type_batch(xc_handle, domid, count,
290
minfo->pfn_type + i)) )
292
ERROR("Failed to get pfn_type %x\n", rc);
299
unlock_pages(minfo->pfn_type, minfo->p2m_size * sizeof(*minfo->pfn_type));
303
free(minfo->pfn_type);
304
minfo->pfn_type = NULL;
307
munmap(live_shinfo, PAGE_SIZE);
308
munmap(minfo->m2p_table, M2P_SIZE(minfo->max_mfn));
309
munmap(minfo->p2m_table, P2M_FLL_ENTRIES * PAGE_SIZE);
310
minfo->p2m_table = minfo->m2p_table = NULL;
315
static int backup_ptes(xen_pfn_t table_mfn, int offset,
316
struct pte_backup *backup)
321
if (backup->max == backup->cur)
323
backup->entries = realloc(backup->entries,
324
backup->max * 2 * sizeof(struct pte_backup_entry));
325
if (backup->entries == NULL)
331
backup->entries[backup->cur].table_mfn = table_mfn;
332
backup->entries[backup->cur++].offset = offset;
339
* 1 when MMU update is required
341
* <0 when error happen
343
typedef int (*pte_func)(uint64_t pte, uint64_t *new_pte,
344
unsigned long table_mfn, int table_offset,
345
struct pte_backup *backup,
346
unsigned long no_use);
348
static int __clear_pte(uint64_t pte, uint64_t *new_pte,
349
unsigned long table_mfn, int table_offset,
350
struct pte_backup *backup,
353
/* If no new_pte pointer, same as no changes needed */
354
if (!new_pte || !backup)
357
if ( !(pte & _PAGE_PRESENT))
360
/* XXX Check for PSE bit here */
362
if ( ((pte >> PAGE_SHIFT_X86) & MFN_MASK_X86) == mfn)
364
*new_pte = pte & ~_PAGE_PRESENT;
365
if (!backup_ptes(table_mfn, table_offset, backup))
372
static int __update_pte(uint64_t pte, uint64_t *new_pte,
373
unsigned long table_mfn, int table_offset,
374
struct pte_backup *backup,
375
unsigned long new_mfn)
382
for (index = 0; index < backup->cur; index ++)
383
if ( (backup->entries[index].table_mfn == table_mfn) &&
384
(backup->entries[index].offset == table_offset) )
387
if (index != backup->cur)
389
if (pte & _PAGE_PRESENT)
390
ERROR("Page present while in backup ptes\n");
391
pte &= ~MFN_MASK_X86;
392
pte |= (new_mfn << PAGE_SHIFT_X86) | _PAGE_PRESENT;
400
static int change_pte(int xc_handle, int domid,
401
struct domain_mem_info *minfo,
402
struct pte_backup *backup,
409
void *content = NULL;
411
pte_num = PAGE_SIZE / ((minfo->pt_level == 2) ? 4 : 8);
413
for (i = 0; i < minfo->p2m_size; i++)
415
xen_pfn_t table_mfn = pfn_to_mfn(i, minfo->p2m_table,
417
uint64_t pte, new_pte;
420
if ( (table_mfn == INVALID_P2M_ENTRY) ||
421
((minfo->pfn_type[i] & XEN_DOMCTL_PFINFO_LTAB_MASK) ==
422
XEN_DOMCTL_PFINFO_XTAB) )
425
if ( minfo->pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
427
content = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
428
PROT_READ, table_mfn);
432
for (j = 0; j < pte_num; j++)
434
if ( minfo->pt_level == 2 )
435
pte = ((const uint32_t*)content)[j];
437
pte = ((const uint64_t*)content)[j];
439
rc = func(pte, &new_pte, table_mfn, j, backup, data);
444
if ( xc_add_mmu_update(xc_handle, mmu,
445
table_mfn << PAGE_SHIFT |
446
j * ( (minfo->pt_level == 2) ?
447
sizeof(uint32_t): sizeof(uint64_t)) |
448
MMU_PT_UPDATE_PRESERVE_AD,
462
munmap(content, PAGE_SIZE);
466
if ( xc_flush_mmu_updates(xc_handle, mmu) )
471
/* XXX Shall we take action if we have fail to swap? */
473
munmap(content, PAGE_SIZE);
478
static int update_pte(int xc_handle, int domid,
479
struct domain_mem_info *minfo,
480
struct pte_backup *backup,
482
unsigned long new_mfn)
484
return change_pte(xc_handle, domid, minfo, backup, mmu,
485
__update_pte, new_mfn);
488
static int clear_pte(int xc_handle, int domid,
489
struct domain_mem_info *minfo,
490
struct pte_backup *backup,
494
return change_pte(xc_handle, domid, minfo, backup, mmu,
498
static int exchange_page(int xc_handle, xen_pfn_t mfn,
499
xen_pfn_t *new_mfn, int domid)
504
struct xen_memory_exchange exchange = {
516
set_xen_guest_handle(exchange.in.extent_start, &mfn);
517
set_xen_guest_handle(exchange.out.extent_start, &out_mfn);
519
rc = xc_memory_op(xc_handle, XENMEM_exchange, &exchange);
528
* Check if a page can be exchanged successfully
531
static int is_page_exchangable(int xc_handle, int domid, xen_pfn_t mfn,
537
/* domain checking */
538
if ( !domid || (domid > DOMID_FIRST_RESERVED) )
540
DPRINTF("Dom0's page can't be LM");
545
DPRINTF("Currently we can only live change PV guest's page\n");
549
/* Check if pages are offline pending or not */
550
rc = xc_query_page_offline_status(xc_handle, mfn, mfn, &status);
552
if ( rc || !(status & PG_OFFLINE_STATUS_OFFLINE_PENDING) )
554
ERROR("Page %lx is not offline pending %x\n",
562
/* The domain should be suspended when called here */
563
int xc_exchange_page(int xc_handle, int domid, xen_pfn_t mfn)
566
struct domain_mem_info minfo;
567
struct xc_mmu *mmu = NULL;
568
struct pte_backup old_ptes = {NULL, 0, 0};
569
grant_entry_v1_t *gnttab_v1 = NULL;
570
grant_entry_v2_t *gnttab_v2 = NULL;
571
struct mmuext_op mops;
572
int gnt_num, unpined = 0;
573
void *old_p, *backup = NULL;
576
xen_pfn_t new_mfn, gpfn;
578
if ( xc_domain_getinfo(xc_handle, domid, 1, &info) != 1 )
580
ERROR("Could not get domain info");
584
if (!info.shutdown || info.shutdown_reason != SHUTDOWN_suspend)
586
ERROR("Can't exchange page unless domain is suspended\n");
590
if (!is_page_exchangable(xc_handle, domid, mfn, &info))
592
ERROR("Could not exchange page\n");
596
/* Get domain's memory information */
597
memset(&minfo, 0, sizeof(minfo));
598
init_mem_info(xc_handle, domid, &minfo, &info);
599
gpfn = minfo.m2p_table[mfn];
601
/* Don't exchange CR3 for PAE guest in PAE host environment */
602
if (minfo.guest_width > sizeof(long))
604
if ( (minfo.pfn_type[gpfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) ==
605
XEN_DOMCTL_PFINFO_L3TAB )
609
gnttab_v2 = xc_gnttab_map_table_v2(xc_handle, domid, &gnt_num);
612
gnttab_v1 = xc_gnttab_map_table_v1(xc_handle, domid, &gnt_num);
615
ERROR("Failed to map grant table\n");
621
? xc_is_page_granted_v1(xc_handle, mfn, gnttab_v1, gnt_num)
622
: xc_is_page_granted_v2(xc_handle, mfn, gnttab_v2, gnt_num))
624
ERROR("Page %lx is granted now\n", mfn);
628
/* allocate required data structure */
629
backup = malloc(PAGE_SIZE);
632
ERROR("Failed to allocate backup pages pointer\n");
636
old_ptes.max = DEFAULT_BACKUP_COUNT;
637
old_ptes.entries = malloc(sizeof(struct pte_backup_entry) *
638
DEFAULT_BACKUP_COUNT);
640
if (!old_ptes.entries)
642
ERROR("Faield to allocate backup\n");
647
/* Unpin the page if it is pined */
648
if (minfo.pfn_type[gpfn] & XEN_DOMCTL_PFINFO_LPINTAB)
650
mops.cmd = MMUEXT_UNPIN_TABLE;
653
if ( xc_mmuext_op(xc_handle, &mops, 1, domid) < 0 )
655
ERROR("Failed to unpin page %lx", mfn);
662
/* backup the content */
663
old_p = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
667
ERROR("Failed to map foreign page %lx\n", mfn);
671
memcpy(backup, old_p, PAGE_SIZE);
672
munmap(old_p, PAGE_SIZE);
674
mmu = xc_alloc_mmu_updates(xc_handle, domid);
677
ERROR("%s: failed at %d\n", __FUNCTION__, __LINE__);
681
/* Firstly update all pte to be invalid to remove the reference */
682
rc = clear_pte(xc_handle, domid, &minfo, &old_ptes, mmu, mfn);
686
ERROR("clear pte failed\n");
690
rc = exchange_page(xc_handle, mfn, &new_mfn, domid);
694
ERROR("Exchange the page failed\n");
695
/* Exchange fail means there are refere to the page still */
696
rc = update_pte(xc_handle, domid, &minfo, &old_ptes, mmu, mfn);
702
rc = update_pte(xc_handle, domid, &minfo, &old_ptes, mmu, new_mfn);
706
ERROR("update pte failed guest may be broken now\n");
707
/* No recover action now for swap fail */
712
/* Check if pages are offlined already */
713
rc = xc_query_page_offline_status(xc_handle, mfn, mfn,
718
ERROR("Fail to query offline status\n");
719
}else if ( !(status & PG_OFFLINE_STATUS_OFFLINED) )
721
ERROR("page is still online or pending\n");
727
IPRINTF("Now page is offlined %lx\n", mfn);
728
/* Update the p2m table */
729
minfo.p2m_table[gpfn] = new_mfn;
731
new_p = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
732
PROT_READ|PROT_WRITE, new_mfn);
733
memcpy(new_p, backup, PAGE_SIZE);
734
munmap(new_p, PAGE_SIZE);
735
mops.arg1.mfn = new_mfn;
741
if (unpined && (minfo.pfn_type[mfn] & XEN_DOMCTL_PFINFO_LPINTAB))
743
switch ( minfo.pfn_type[mfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
745
case XEN_DOMCTL_PFINFO_L1TAB:
746
mops.cmd = MMUEXT_PIN_L1_TABLE;
749
case XEN_DOMCTL_PFINFO_L2TAB:
750
mops.cmd = MMUEXT_PIN_L2_TABLE;
753
case XEN_DOMCTL_PFINFO_L3TAB:
754
mops.cmd = MMUEXT_PIN_L3_TABLE;
757
case XEN_DOMCTL_PFINFO_L4TAB:
758
mops.cmd = MMUEXT_PIN_L4_TABLE;
762
ERROR("Unpined for non pate table page\n");
766
if ( xc_mmuext_op(xc_handle, &mops, 1, domid) < 0 )
768
ERROR("failed to pin the mfn again\n");
776
if (old_ptes.entries)
777
free(old_ptes.entries);
783
munmap(gnttab_v1, gnt_num / (PAGE_SIZE/sizeof(grant_entry_v1_t)));
785
munmap(gnttab_v2, gnt_num / (PAGE_SIZE/sizeof(grant_entry_v2_t)));
787
close_mem_info(xc_handle, &minfo);