1
/******************************************************************************
4
* Domain management operations. For use by node control stack.
6
* Copyright (c) 2002-2006, K A Fraser
9
#include <xen/config.h>
10
#include <xen/types.h>
14
#include <xen/sched.h>
15
#include <xen/sched-if.h>
16
#include <xen/domain.h>
17
#include <xen/event.h>
18
#include <xen/domain_page.h>
19
#include <xen/trace.h>
20
#include <xen/console.h>
21
#include <xen/iocap.h>
22
#include <xen/rcupdate.h>
23
#include <xen/guest_access.h>
24
#include <xen/bitmap.h>
25
#include <xen/paging.h>
26
#include <xen/hypercall.h>
27
#include <asm/current.h>
29
#include <public/domctl.h>
32
static DEFINE_SPINLOCK(domctl_lock);
33
DEFINE_SPINLOCK(vcpu_alloc_lock);
35
int cpumask_to_xenctl_cpumap(
36
struct xenctl_cpumap *xenctl_cpumap, const cpumask_t *cpumask)
38
unsigned int guest_bytes, copy_bytes, i;
41
uint8_t *bytemap = xmalloc_array(uint8_t, (nr_cpu_ids + 7) / 8);
46
guest_bytes = (xenctl_cpumap->nr_cpus + 7) / 8;
47
copy_bytes = min_t(unsigned int, guest_bytes, (nr_cpu_ids + 7) / 8);
49
bitmap_long_to_byte(bytemap, cpumask_bits(cpumask), nr_cpu_ids);
51
if ( copy_bytes != 0 )
52
if ( copy_to_guest(xenctl_cpumap->bitmap, bytemap, copy_bytes) )
55
for ( i = copy_bytes; !err && i < guest_bytes; i++ )
56
if ( copy_to_guest_offset(xenctl_cpumap->bitmap, i, &zero, 1) )
64
int xenctl_cpumap_to_cpumask(
65
cpumask_var_t *cpumask, const struct xenctl_cpumap *xenctl_cpumap)
67
unsigned int guest_bytes, copy_bytes;
69
uint8_t *bytemap = xzalloc_array(uint8_t, (nr_cpu_ids + 7) / 8);
74
guest_bytes = (xenctl_cpumap->nr_cpus + 7) / 8;
75
copy_bytes = min_t(unsigned int, guest_bytes, (nr_cpu_ids + 7) / 8);
77
if ( copy_bytes != 0 )
79
if ( copy_from_guest(bytemap, xenctl_cpumap->bitmap, copy_bytes) )
81
if ( (xenctl_cpumap->nr_cpus & 7) && (guest_bytes == copy_bytes) )
82
bytemap[guest_bytes-1] &= ~(0xff << (xenctl_cpumap->nr_cpus & 7));
87
else if ( alloc_cpumask_var(cpumask) )
88
bitmap_byte_to_long(cpumask_bits(*cpumask), bytemap, nr_cpu_ids);
97
static inline int is_free_domid(domid_t dom)
101
if ( dom >= DOMID_FIRST_RESERVED )
104
if ( (d = rcu_lock_domain_by_id(dom)) == NULL )
107
rcu_unlock_domain(d);
111
void getdomaininfo(struct domain *d, struct xen_domctl_getdomaininfo *info)
115
int flags = XEN_DOMINF_blocked;
116
struct vcpu_runstate_info runstate;
118
info->domain = d->domain_id;
119
info->nr_online_vcpus = 0;
123
* - domain is marked as blocked only if all its vcpus are blocked
124
* - domain is marked as running if any of its vcpus is running
126
for_each_vcpu ( d, v )
128
vcpu_runstate_get(v, &runstate);
129
cpu_time += runstate.time[RUNSTATE_running];
130
info->max_vcpu_id = v->vcpu_id;
131
if ( !test_bit(_VPF_down, &v->pause_flags) )
133
if ( !(v->pause_flags & VPF_blocked) )
134
flags &= ~XEN_DOMINF_blocked;
136
flags |= XEN_DOMINF_running;
137
info->nr_online_vcpus++;
141
info->cpu_time = cpu_time;
143
info->flags = (info->nr_online_vcpus ? flags : 0) |
144
((d->is_dying == DOMDYING_dead) ? XEN_DOMINF_dying : 0) |
145
(d->is_shut_down ? XEN_DOMINF_shutdown : 0) |
146
(d->is_paused_by_controller ? XEN_DOMINF_paused : 0) |
147
(d->debugger_attached ? XEN_DOMINF_debugged : 0) |
148
d->shutdown_code << XEN_DOMINF_shutdownshift;
150
if ( is_hvm_domain(d) )
151
info->flags |= XEN_DOMINF_hvm_guest;
153
xsm_security_domaininfo(d, info);
155
info->tot_pages = d->tot_pages;
156
info->max_pages = d->max_pages;
157
info->shr_pages = atomic_read(&d->shr_pages);
158
info->paged_pages = atomic_read(&d->paged_pages);
159
info->shared_info_frame = mfn_to_gmfn(d, __pa(d->shared_info)>>PAGE_SHIFT);
160
BUG_ON(SHARED_M2P(info->shared_info_frame));
162
info->cpupool = d->cpupool ? d->cpupool->cpupool_id : CPUPOOLID_NONE;
164
memcpy(info->handle, d->handle, sizeof(xen_domain_handle_t));
167
static unsigned int default_vcpu0_location(cpumask_t *online)
171
unsigned int i, cpu, nr_cpus, *cnt;
172
cpumask_t cpu_exclude_map;
174
/* Do an initial CPU placement. Pick the least-populated CPU. */
175
nr_cpus = cpumask_last(&cpu_online_map) + 1;
176
cnt = xzalloc_array(unsigned int, nr_cpus);
179
rcu_read_lock(&domlist_read_lock);
180
for_each_domain ( d )
181
for_each_vcpu ( d, v )
182
if ( !test_bit(_VPF_down, &v->pause_flags)
183
&& ((cpu = v->processor) < nr_cpus) )
185
rcu_read_unlock(&domlist_read_lock);
189
* If we're on a HT system, we only auto-allocate to a non-primary HT. We
190
* favour high numbered CPUs in the event of a tie.
192
cpumask_copy(&cpu_exclude_map, per_cpu(cpu_sibling_mask, 0));
193
cpu = cpumask_first(&cpu_exclude_map);
194
if ( cpumask_weight(&cpu_exclude_map) > 1 )
195
cpu = cpumask_next(cpu, &cpu_exclude_map);
196
ASSERT(cpu < nr_cpu_ids);
197
for_each_cpu(i, online)
199
if ( cpumask_test_cpu(i, &cpu_exclude_map) )
201
if ( (i == cpumask_first(per_cpu(cpu_sibling_mask, i))) &&
202
(cpumask_weight(per_cpu(cpu_sibling_mask, i)) > 1) )
204
cpumask_or(&cpu_exclude_map, &cpu_exclude_map,
205
per_cpu(cpu_sibling_mask, i));
206
if ( !cnt || cnt[i] <= cnt[cpu] )
215
bool_t domctl_lock_acquire(void)
218
* Caller may try to pause its own VCPUs. We must prevent deadlock
219
* against other non-domctl routines which try to do the same.
221
if ( !spin_trylock(¤t->domain->hypercall_deadlock_mutex) )
225
* Trylock here is paranoia if we have multiple privileged domains. Then
226
* we could have one domain trying to pause another which is spinning
227
* on domctl_lock -- results in deadlock.
229
if ( spin_trylock(&domctl_lock) )
232
spin_unlock(¤t->domain->hypercall_deadlock_mutex);
236
void domctl_lock_release(void)
238
spin_unlock(&domctl_lock);
239
spin_unlock(¤t->domain->hypercall_deadlock_mutex);
242
long do_domctl(XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
245
struct xen_domctl curop, *op = &curop;
247
if ( copy_from_guest(op, u_domctl, 1) )
250
if ( op->interface_version != XEN_DOMCTL_INTERFACE_VERSION )
255
case XEN_DOMCTL_ioport_mapping:
256
case XEN_DOMCTL_memory_mapping:
257
case XEN_DOMCTL_bind_pt_irq:
258
case XEN_DOMCTL_unbind_pt_irq: {
260
bool_t is_priv = IS_PRIV(current->domain);
261
if ( !is_priv && ((d = rcu_lock_domain_by_id(op->domain)) != NULL) )
263
is_priv = IS_PRIV_FOR(current->domain, d);
264
rcu_unlock_domain(d);
271
case XEN_DOMCTL_getdomaininfo:
275
if ( !IS_PRIV(current->domain) )
280
if ( !domctl_lock_acquire() )
281
return hypercall_create_continuation(
282
__HYPERVISOR_domctl, "h", u_domctl);
287
case XEN_DOMCTL_setvcpucontext:
289
struct domain *d = rcu_lock_domain_by_id(op->domain);
290
vcpu_guest_context_u c = { .nat = NULL };
291
unsigned int vcpu = op->u.vcpucontext.vcpu;
298
ret = xsm_setvcpucontext(d);
303
if ( (d == current->domain) || /* no domain_pause() */
304
(vcpu >= d->max_vcpus) || ((v = d->vcpu[vcpu]) == NULL) )
307
if ( guest_handle_is_null(op->u.vcpucontext.ctxt) )
310
if ( ret == -EAGAIN )
311
ret = hypercall_create_continuation(
312
__HYPERVISOR_domctl, "h", u_domctl);
317
BUILD_BUG_ON(sizeof(struct vcpu_guest_context)
318
< sizeof(struct compat_vcpu_guest_context));
321
if ( (c.nat = alloc_vcpu_guest_context()) == NULL )
325
if ( !is_pv_32on64_vcpu(v) )
326
ret = copy_from_guest(c.nat, op->u.vcpucontext.ctxt, 1);
328
ret = copy_from_guest(c.cmp,
329
guest_handle_cast(op->u.vcpucontext.ctxt,
332
ret = copy_from_guest(c.nat, op->u.vcpucontext.ctxt, 1);
334
ret = ret ? -EFAULT : 0;
339
ret = arch_set_info_guest(v, c);
344
free_vcpu_guest_context(c.nat);
345
rcu_unlock_domain(d);
349
case XEN_DOMCTL_pausedomain:
351
struct domain *d = rcu_lock_domain_by_id(op->domain);
355
ret = xsm_pausedomain(d);
357
goto pausedomain_out;
360
if ( d != current->domain )
362
domain_pause_by_systemcontroller(d);
366
rcu_unlock_domain(d);
371
case XEN_DOMCTL_unpausedomain:
373
struct domain *d = rcu_lock_domain_by_id(op->domain);
379
ret = xsm_unpausedomain(d);
382
rcu_unlock_domain(d);
386
domain_unpause_by_systemcontroller(d);
387
rcu_unlock_domain(d);
392
case XEN_DOMCTL_resumedomain:
394
struct domain *d = rcu_lock_domain_by_id(op->domain);
400
ret = xsm_resumedomain(d);
403
rcu_unlock_domain(d);
408
rcu_unlock_domain(d);
413
case XEN_DOMCTL_createdomain:
417
static domid_t rover = 0;
418
unsigned int domcr_flags;
421
if ( supervisor_mode_kernel ||
422
(op->u.createdomain.flags &
423
~(XEN_DOMCTL_CDF_hvm_guest | XEN_DOMCTL_CDF_hap |
424
XEN_DOMCTL_CDF_s3_integrity | XEN_DOMCTL_CDF_oos_off)) )
428
if ( (dom > 0) && (dom < DOMID_FIRST_RESERVED) )
431
if ( !is_free_domid(dom) )
436
for ( dom = rover + 1; dom != rover; dom++ )
438
if ( dom == DOMID_FIRST_RESERVED )
440
if ( is_free_domid(dom) )
452
if ( op->u.createdomain.flags & XEN_DOMCTL_CDF_hvm_guest )
453
domcr_flags |= DOMCRF_hvm;
454
if ( op->u.createdomain.flags & XEN_DOMCTL_CDF_hap )
455
domcr_flags |= DOMCRF_hap;
456
if ( op->u.createdomain.flags & XEN_DOMCTL_CDF_s3_integrity )
457
domcr_flags |= DOMCRF_s3_integrity;
458
if ( op->u.createdomain.flags & XEN_DOMCTL_CDF_oos_off )
459
domcr_flags |= DOMCRF_oos_off;
461
d = domain_create(dom, domcr_flags, op->u.createdomain.ssidref);
470
memcpy(d->handle, op->u.createdomain.handle,
471
sizeof(xen_domain_handle_t));
473
op->domain = d->domain_id;
474
if ( copy_to_guest(u_domctl, op, 1) )
479
case XEN_DOMCTL_max_vcpus:
482
unsigned int i, max = op->u.max_vcpus.max, cpu;
486
if ( (d = rcu_lock_domain_by_id(op->domain)) == NULL )
490
if ( (d == current->domain) || /* no domain_pause() */
491
(max > MAX_VIRT_CPUS) ||
492
(is_hvm_domain(d) && (max > MAX_HVM_VCPUS)) )
494
rcu_unlock_domain(d);
498
ret = xsm_max_vcpus(d);
501
rcu_unlock_domain(d);
505
/* Until Xenoprof can dynamically grow its vcpu-s array... */
508
rcu_unlock_domain(d);
513
/* Needed, for example, to ensure writable p.t. state is synced. */
517
* Certain operations (e.g. CPU microcode updates) modify data which is
518
* used during VCPU allocation/initialization
520
while ( !spin_trylock(&vcpu_alloc_lock) )
522
if ( hypercall_preempt_check() )
524
ret = hypercall_create_continuation(
525
__HYPERVISOR_domctl, "h", u_domctl);
526
goto maxvcpu_out_novcpulock;
530
/* We cannot reduce maximum VCPUs. */
532
if ( (max < d->max_vcpus) && (d->vcpu[max] != NULL) )
536
* For now don't allow increasing the vcpu count from a non-zero
537
* value: This code and all readers of d->vcpu would otherwise need
538
* to be converted to use RCU, but at present there's no tools side
539
* code path that would issue such a request.
542
if ( (d->max_vcpus > 0) && (max > d->max_vcpus) )
546
online = cpupool_online_cpumask(d->cpupool);
547
if ( max > d->max_vcpus )
551
BUG_ON(d->vcpu != NULL);
552
BUG_ON(d->max_vcpus != 0);
554
if ( (vcpus = xzalloc_array(struct vcpu *, max)) == NULL )
557
/* Install vcpu array /then/ update max_vcpus. */
563
for ( i = 0; i < max; i++ )
565
if ( d->vcpu[i] != NULL )
569
default_vcpu0_location(online) :
570
cpumask_cycle(d->vcpu[i-1]->processor, online);
572
if ( alloc_vcpu(d, i, cpu) == NULL )
579
spin_unlock(&vcpu_alloc_lock);
581
maxvcpu_out_novcpulock:
583
rcu_unlock_domain(d);
587
case XEN_DOMCTL_destroydomain:
589
struct domain *d = rcu_lock_domain_by_id(op->domain);
593
ret = xsm_destroydomain(d) ? : domain_kill(d);
594
rcu_unlock_domain(d);
599
case XEN_DOMCTL_setvcpuaffinity:
600
case XEN_DOMCTL_getvcpuaffinity:
602
domid_t dom = op->domain;
603
struct domain *d = rcu_lock_domain_by_id(dom);
610
ret = xsm_vcpuaffinity(op->cmd, d);
612
goto vcpuaffinity_out;
615
if ( op->u.vcpuaffinity.vcpu >= d->max_vcpus )
616
goto vcpuaffinity_out;
619
if ( (v = d->vcpu[op->u.vcpuaffinity.vcpu]) == NULL )
620
goto vcpuaffinity_out;
622
if ( op->cmd == XEN_DOMCTL_setvcpuaffinity )
624
cpumask_var_t new_affinity;
626
ret = xenctl_cpumap_to_cpumask(
627
&new_affinity, &op->u.vcpuaffinity.cpumap);
630
ret = vcpu_set_affinity(v, new_affinity);
631
free_cpumask_var(new_affinity);
636
ret = cpumask_to_xenctl_cpumap(
637
&op->u.vcpuaffinity.cpumap, v->cpu_affinity);
641
rcu_unlock_domain(d);
645
case XEN_DOMCTL_scheduler_op:
650
if ( (d = rcu_lock_domain_by_id(op->domain)) == NULL )
653
ret = xsm_scheduler(d);
655
goto scheduler_op_out;
657
ret = sched_adjust(d, &op->u.scheduler_op);
658
if ( copy_to_guest(u_domctl, op, 1) )
662
rcu_unlock_domain(d);
666
case XEN_DOMCTL_getdomaininfo:
669
domid_t dom = op->domain;
671
rcu_read_lock(&domlist_read_lock);
673
for_each_domain ( d )
674
if ( d->domain_id >= dom )
679
rcu_read_unlock(&domlist_read_lock);
684
ret = xsm_getdomaininfo(d);
686
goto getdomaininfo_out;
688
getdomaininfo(d, &op->u.getdomaininfo);
690
op->domain = op->u.getdomaininfo.domain;
691
if ( copy_to_guest(u_domctl, op, 1) )
695
rcu_read_unlock(&domlist_read_lock);
699
case XEN_DOMCTL_getvcpucontext:
701
vcpu_guest_context_u c = { .nat = NULL };
706
if ( (d = rcu_lock_domain_by_id(op->domain)) == NULL )
709
ret = xsm_getvcpucontext(d);
711
goto getvcpucontext_out;
714
if ( op->u.vcpucontext.vcpu >= d->max_vcpus )
715
goto getvcpucontext_out;
718
if ( (v = d->vcpu[op->u.vcpucontext.vcpu]) == NULL )
719
goto getvcpucontext_out;
722
if ( !v->is_initialised )
723
goto getvcpucontext_out;
726
BUILD_BUG_ON(sizeof(struct vcpu_guest_context)
727
< sizeof(struct compat_vcpu_guest_context));
730
if ( (c.nat = xmalloc(struct vcpu_guest_context)) == NULL )
731
goto getvcpucontext_out;
736
arch_get_info_guest(v, c);
743
if ( !is_pv_32on64_vcpu(v) )
744
ret = copy_to_guest(op->u.vcpucontext.ctxt, c.nat, 1);
746
ret = copy_to_guest(guest_handle_cast(op->u.vcpucontext.ctxt,
749
ret = copy_to_guest(op->u.vcpucontext.ctxt, c.nat, 1);
752
if ( copy_to_guest(u_domctl, op, 1) || ret )
757
rcu_unlock_domain(d);
761
case XEN_DOMCTL_getvcpuinfo:
765
struct vcpu_runstate_info runstate;
768
if ( (d = rcu_lock_domain_by_id(op->domain)) == NULL )
771
ret = xsm_getvcpuinfo(d);
773
goto getvcpuinfo_out;
776
if ( op->u.getvcpuinfo.vcpu >= d->max_vcpus )
777
goto getvcpuinfo_out;
780
if ( (v = d->vcpu[op->u.getvcpuinfo.vcpu]) == NULL )
781
goto getvcpuinfo_out;
783
vcpu_runstate_get(v, &runstate);
785
op->u.getvcpuinfo.online = !test_bit(_VPF_down, &v->pause_flags);
786
op->u.getvcpuinfo.blocked = test_bit(_VPF_blocked, &v->pause_flags);
787
op->u.getvcpuinfo.running = v->is_running;
788
op->u.getvcpuinfo.cpu_time = runstate.time[RUNSTATE_running];
789
op->u.getvcpuinfo.cpu = v->processor;
792
if ( copy_to_guest(u_domctl, op, 1) )
796
rcu_unlock_domain(d);
800
case XEN_DOMCTL_max_mem:
803
unsigned long new_max;
806
d = rcu_lock_domain_by_id(op->domain);
810
ret = xsm_setdomainmaxmem(d);
815
new_max = op->u.max_mem.max_memkb >> (PAGE_SHIFT-10);
817
spin_lock(&d->page_alloc_lock);
819
* NB. We removed a check that new_max >= current tot_pages; this means
820
* that the domain will now be allowed to "ratchet" down to new_max. In
821
* the meantime, while tot > max, all new allocations are disallowed.
823
d->max_pages = new_max;
825
spin_unlock(&d->page_alloc_lock);
828
rcu_unlock_domain(d);
832
case XEN_DOMCTL_setdomainhandle:
837
d = rcu_lock_domain_by_id(op->domain);
841
ret = xsm_setdomainhandle(d);
844
rcu_unlock_domain(d);
848
memcpy(d->handle, op->u.setdomainhandle.handle,
849
sizeof(xen_domain_handle_t));
850
rcu_unlock_domain(d);
855
case XEN_DOMCTL_setdebugging:
860
d = rcu_lock_domain_by_id(op->domain);
865
if ( d == current->domain ) /* no domain_pause() */
867
rcu_unlock_domain(d);
871
ret = xsm_setdebugging(d);
874
rcu_unlock_domain(d);
879
d->debugger_attached = !!op->u.setdebugging.enable;
880
domain_unpause(d); /* causes guest to latch new status */
881
rcu_unlock_domain(d);
886
case XEN_DOMCTL_irq_permission:
889
unsigned int pirq = op->u.irq_permission.pirq;
890
int allow = op->u.irq_permission.allow_access;
893
d = rcu_lock_domain_by_id(op->domain);
897
if ( pirq >= d->nr_pirqs )
899
else if ( xsm_irq_permission(d, pirq, allow) )
902
ret = pirq_permit_access(d, pirq);
904
ret = pirq_deny_access(d, pirq);
906
rcu_unlock_domain(d);
910
case XEN_DOMCTL_iomem_permission:
913
unsigned long mfn = op->u.iomem_permission.first_mfn;
914
unsigned long nr_mfns = op->u.iomem_permission.nr_mfns;
915
int allow = op->u.iomem_permission.allow_access;
918
if ( (mfn + nr_mfns - 1) < mfn ) /* wrap? */
922
d = rcu_lock_domain_by_id(op->domain);
926
if ( xsm_iomem_permission(d, mfn, mfn + nr_mfns - 1, allow) )
929
ret = iomem_permit_access(d, mfn, mfn + nr_mfns - 1);
931
ret = iomem_deny_access(d, mfn, mfn + nr_mfns - 1);
933
rcu_unlock_domain(d);
937
case XEN_DOMCTL_settimeoffset:
942
d = rcu_lock_domain_by_id(op->domain);
946
ret = xsm_domain_settime(d);
949
rcu_unlock_domain(d);
953
domain_set_time_offset(d, op->u.settimeoffset.time_offset_seconds);
954
rcu_unlock_domain(d);
959
case XEN_DOMCTL_set_target:
961
struct domain *d, *e;
964
d = rcu_lock_domain_by_id(op->domain);
969
e = get_domain_by_id(op->u.set_target.target);
974
if ( (d == e) || (d->target != NULL) )
980
ret = xsm_set_target(d, e);
986
/* Hold reference on @e until we destroy @d. */
992
rcu_unlock_domain(d);
996
case XEN_DOMCTL_subscribe:
1001
d = rcu_lock_domain_by_id(op->domain);
1004
ret = xsm_domctl(d, op->cmd);
1006
d->suspend_evtchn = op->u.subscribe.port;
1007
rcu_unlock_domain(d);
1012
case XEN_DOMCTL_disable_migrate:
1016
if ( (d = rcu_lock_domain_by_id(op->domain)) != NULL )
1018
ret = xsm_domctl(d, op->cmd);
1020
d->disable_migrate = op->u.disable_migrate.disable;
1021
rcu_unlock_domain(d);
1026
case XEN_DOMCTL_set_virq_handler:
1029
uint32_t virq = op->u.set_virq_handler.virq;
1032
d = rcu_lock_domain_by_id(op->domain);
1035
ret = xsm_set_virq_handler(d, virq);
1037
ret = set_global_virq_handler(d, virq);
1038
rcu_unlock_domain(d);
1044
ret = arch_do_domctl(op, u_domctl);
1048
domctl_lock_release();
1056
* c-set-style: "BSD"
1059
* indent-tabs-mode: nil