2
* Copyright (C) 2009 Citrix Ltd.
3
* Author Vincent Hanquez <vincent.hanquez@eu.citrix.com>
4
* Author Stefano Stabellini <stefano.stabellini@eu.citrix.com>
6
* This program is free software; you can redistribute it and/or modify
7
* it under the terms of the GNU Lesser General Public License as published
8
* by the Free Software Foundation; version 2.1 only. with the special
9
* exception on linking described in file LICENSE.
11
* This program is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
* GNU Lesser General Public License for more details.
17
#include "libxl_osdeps.h" /* must come before any other headers */
19
#include "libxl_internal.h"
21
#define PCI_BDF "%04x:%02x:%02x.%01x"
22
#define PCI_BDF_SHORT "%02x:%02x.%01x"
23
#define PCI_BDF_VDEVFN "%04x:%02x:%02x.%01x@%02x"
24
#define PCI_OPTIONS "msitranslate=%d,power_mgmt=%d"
25
#define PCI_BDF_XSPATH "%04x-%02x-%02x-%01x"
27
static unsigned int pcidev_encode_bdf(libxl_device_pci *pcidev)
31
value = pcidev->domain << 16;
32
value |= (pcidev->bus & 0xff) << 8;
33
value |= (pcidev->dev & 0x1f) << 3;
34
value |= (pcidev->func & 0x7);
39
static int pcidev_struct_fill(libxl_device_pci *pcidev, unsigned int domain,
40
unsigned int bus, unsigned int dev,
41
unsigned int func, unsigned int vdevfn)
43
pcidev->domain = domain;
47
pcidev->vdevfn = vdevfn;
51
static void libxl_create_pci_backend_device(libxl__gc *gc, flexarray_t *back, int num, libxl_device_pci *pcidev)
53
flexarray_append(back, libxl__sprintf(gc, "key-%d", num));
54
flexarray_append(back, libxl__sprintf(gc, PCI_BDF, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func));
55
flexarray_append(back, libxl__sprintf(gc, "dev-%d", num));
56
flexarray_append(back, libxl__sprintf(gc, PCI_BDF, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func));
58
flexarray_append_pair(back, libxl__sprintf(gc, "vdevfn-%d", num), libxl__sprintf(gc, "%x", pcidev->vdevfn));
59
flexarray_append(back, libxl__sprintf(gc, "opts-%d", num));
60
flexarray_append(back,
61
libxl__sprintf(gc, "msitranslate=%d,power_mgmt=%d,permissive=%d",
62
pcidev->msitranslate, pcidev->power_mgmt,
64
flexarray_append_pair(back, libxl__sprintf(gc, "state-%d", num), libxl__sprintf(gc, "%d", 1));
67
int libxl__create_pci_backend(libxl__gc *gc, uint32_t domid,
68
libxl_device_pci *pcidev, int num)
70
libxl_ctx *ctx = libxl__gc_owner(gc);
71
flexarray_t *front = NULL;
72
flexarray_t *back = NULL;
74
int ret = ERROR_NOMEM, i;
76
front = flexarray_make(16, 1);
79
back = flexarray_make(16, 1);
85
LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "Creating pci backend");
88
device.backend_devid = 0;
89
device.backend_domid = 0;
90
device.backend_kind = LIBXL__DEVICE_KIND_PCI;
93
device.kind = LIBXL__DEVICE_KIND_PCI;
95
flexarray_append_pair(back, "frontend-id", libxl__sprintf(gc, "%d", domid));
96
flexarray_append_pair(back, "online", "1");
97
flexarray_append_pair(back, "state", libxl__sprintf(gc, "%d", 1));
98
flexarray_append_pair(back, "domain", libxl__domid_to_name(gc, domid));
100
for (i = 0; i < num; i++, pcidev++)
101
libxl_create_pci_backend_device(gc, back, i, pcidev);
103
flexarray_append_pair(back, "num_devs", libxl__sprintf(gc, "%d", num));
104
flexarray_append_pair(front, "backend-id", libxl__sprintf(gc, "%d", 0));
105
flexarray_append_pair(front, "state", libxl__sprintf(gc, "%d", 1));
107
libxl__device_generic_add(gc, XBT_NULL, &device,
108
libxl__xs_kvs_of_flexarray(gc, back, back->count),
109
libxl__xs_kvs_of_flexarray(gc, front, front->count));
113
flexarray_free(back);
115
flexarray_free(front);
119
static int libxl__device_pci_add_xenstore(libxl__gc *gc, uint32_t domid, libxl_device_pci *pcidev, int starting)
121
libxl_ctx *ctx = libxl__gc_owner(gc);
123
char *num_devs, *be_path;
127
be_path = libxl__sprintf(gc, "%s/backend/pci/%d/0", libxl__xs_get_dompath(gc, 0), domid);
128
num_devs = libxl__xs_read(gc, XBT_NULL, libxl__sprintf(gc, "%s/num_devs", be_path));
130
return libxl__create_pci_backend(gc, domid, pcidev, 1);
132
libxl_domain_type domtype = libxl__domain_type(gc, domid);
133
if (domtype == LIBXL_DOMAIN_TYPE_INVALID)
136
if (!starting && domtype == LIBXL_DOMAIN_TYPE_PV) {
137
if (libxl__wait_for_backend(gc, be_path, "4") < 0)
141
back = flexarray_make(16, 1);
145
LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "Adding new pci device to xenstore");
146
num = atoi(num_devs);
147
libxl_create_pci_backend_device(gc, back, num, pcidev);
148
flexarray_append_pair(back, "num_devs", libxl__sprintf(gc, "%d", num + 1));
150
flexarray_append_pair(back, "state", libxl__sprintf(gc, "%d", 7));
153
t = xs_transaction_start(ctx->xsh);
154
libxl__xs_writev(gc, t, be_path,
155
libxl__xs_kvs_of_flexarray(gc, back, back->count));
156
if (!xs_transaction_end(ctx->xsh, t, 0))
158
goto retry_transaction;
160
flexarray_free(back);
164
static int libxl__device_pci_remove_xenstore(libxl__gc *gc, uint32_t domid, libxl_device_pci *pcidev)
166
libxl_ctx *ctx = libxl__gc_owner(gc);
167
char *be_path, *num_devs_path, *num_devs, *xsdev, *tmp, *tmppath;
170
unsigned int domain = 0, bus = 0, dev = 0, func = 0;
172
be_path = libxl__sprintf(gc, "%s/backend/pci/%d/0", libxl__xs_get_dompath(gc, 0), domid);
173
num_devs_path = libxl__sprintf(gc, "%s/num_devs", be_path);
174
num_devs = libxl__xs_read(gc, XBT_NULL, num_devs_path);
177
num = atoi(num_devs);
179
libxl_domain_type domtype = libxl__domain_type(gc, domid);
180
if (domtype == LIBXL_DOMAIN_TYPE_INVALID)
183
if (domtype == LIBXL_DOMAIN_TYPE_PV) {
184
if (libxl__wait_for_backend(gc, be_path, "4") < 0) {
185
LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "pci backend at %s is not ready", be_path);
190
for (i = 0; i < num; i++) {
191
xsdev = libxl__xs_read(gc, XBT_NULL, libxl__sprintf(gc, "%s/dev-%d", be_path, i));
192
sscanf(xsdev, PCI_BDF, &domain, &bus, &dev, &func);
193
if (domain == pcidev->domain && bus == pcidev->bus &&
194
pcidev->dev == dev && pcidev->func == func) {
199
LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "Couldn't find the device on xenstore");
204
t = xs_transaction_start(ctx->xsh);
205
xs_write(ctx->xsh, t, libxl__sprintf(gc, "%s/state-%d", be_path, i), "5", strlen("5"));
206
xs_write(ctx->xsh, t, libxl__sprintf(gc, "%s/state", be_path), "7", strlen("7"));
207
if (!xs_transaction_end(ctx->xsh, t, 0))
209
goto retry_transaction;
211
if (domtype == LIBXL_DOMAIN_TYPE_PV) {
212
if (libxl__wait_for_backend(gc, be_path, "4") < 0) {
213
LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "pci backend at %s is not ready", be_path);
219
t = xs_transaction_start(ctx->xsh);
220
xs_rm(ctx->xsh, t, libxl__sprintf(gc, "%s/state-%d", be_path, i));
221
xs_rm(ctx->xsh, t, libxl__sprintf(gc, "%s/key-%d", be_path, i));
222
xs_rm(ctx->xsh, t, libxl__sprintf(gc, "%s/dev-%d", be_path, i));
223
xs_rm(ctx->xsh, t, libxl__sprintf(gc, "%s/vdev-%d", be_path, i));
224
xs_rm(ctx->xsh, t, libxl__sprintf(gc, "%s/opts-%d", be_path, i));
225
xs_rm(ctx->xsh, t, libxl__sprintf(gc, "%s/vdevfn-%d", be_path, i));
226
libxl__xs_write(gc, t, num_devs_path, "%d", num - 1);
227
for (j = i + 1; j < num; j++) {
228
tmppath = libxl__sprintf(gc, "%s/state-%d", be_path, j);
229
tmp = libxl__xs_read(gc, t, tmppath);
230
xs_write(ctx->xsh, t, libxl__sprintf(gc, "%s/state-%d", be_path, j - 1), tmp, strlen(tmp));
231
xs_rm(ctx->xsh, t, tmppath);
232
tmppath = libxl__sprintf(gc, "%s/dev-%d", be_path, j);
233
tmp = libxl__xs_read(gc, t, tmppath);
234
xs_write(ctx->xsh, t, libxl__sprintf(gc, "%s/dev-%d", be_path, j - 1), tmp, strlen(tmp));
235
xs_rm(ctx->xsh, t, tmppath);
236
tmppath = libxl__sprintf(gc, "%s/key-%d", be_path, j);
237
tmp = libxl__xs_read(gc, t, tmppath);
238
xs_write(ctx->xsh, t, libxl__sprintf(gc, "%s/key-%d", be_path, j - 1), tmp, strlen(tmp));
239
xs_rm(ctx->xsh, t, tmppath);
240
tmppath = libxl__sprintf(gc, "%s/vdev-%d", be_path, j);
241
tmp = libxl__xs_read(gc, t, tmppath);
243
xs_write(ctx->xsh, t, libxl__sprintf(gc, "%s/vdev-%d", be_path, j - 1), tmp, strlen(tmp));
244
xs_rm(ctx->xsh, t, tmppath);
246
tmppath = libxl__sprintf(gc, "%s/opts-%d", be_path, j);
247
tmp = libxl__xs_read(gc, t, tmppath);
249
xs_write(ctx->xsh, t, libxl__sprintf(gc, "%s/opts-%d", be_path, j - 1), tmp, strlen(tmp));
250
xs_rm(ctx->xsh, t, tmppath);
252
tmppath = libxl__sprintf(gc, "%s/vdevfn-%d", be_path, j);
253
tmp = libxl__xs_read(gc, t, tmppath);
255
xs_write(ctx->xsh, t, libxl__sprintf(gc, "%s/vdevfn-%d", be_path, j - 1), tmp, strlen(tmp));
256
xs_rm(ctx->xsh, t, tmppath);
259
if (!xs_transaction_end(ctx->xsh, t, 0))
261
goto retry_transaction2;
265
if (libxl__parse_backend_path(gc, be_path, &dev) != 0)
269
dev.kind = LIBXL__DEVICE_KIND_PCI;
272
libxl__device_destroy(gc, &dev);
279
static int get_all_assigned_devices(libxl__gc *gc, libxl_device_pci **list, int *num)
282
unsigned int nd = 0, i;
287
domlist = libxl__xs_directory(gc, XBT_NULL, "/local/domain", &nd);
288
for(i = 0; i < nd; i++) {
289
char *path, *num_devs;
291
path = libxl__sprintf(gc, "/local/domain/0/backend/pci/%s/0/num_devs", domlist[i]);
292
num_devs = libxl__xs_read(gc, XBT_NULL, path);
294
int ndev = atoi(num_devs), j;
297
for(j = 0; j < ndev; j++) {
298
devpath = libxl__sprintf(gc, "/local/domain/0/backend/pci/%s/0/dev-%u",
300
bdf = libxl__xs_read(gc, XBT_NULL, devpath);
302
unsigned dom, bus, dev, func;
303
if ( sscanf(bdf, PCI_BDF, &dom, &bus, &dev, &func) != 4 )
306
*list = realloc(*list, sizeof(libxl_device_pci) * ((*num) + 1));
309
pcidev_struct_fill(*list + *num, dom, bus, dev, func, 0);
315
libxl__ptr_add(gc, *list);
320
static int is_pcidev_in_array(libxl_device_pci *assigned, int num_assigned,
321
int dom, int bus, int dev, int func)
325
for(i = 0; i < num_assigned; i++) {
326
if ( assigned[i].domain != dom )
328
if ( assigned[i].bus != bus )
330
if ( assigned[i].dev != dev )
332
if ( assigned[i].func != func )
340
/* Write the standard BDF into the sysfs path given by sysfs_path. */
341
static int sysfs_write_bdf(libxl__gc *gc, const char * sysfs_path,
342
libxl_device_pci *pcidev)
344
libxl_ctx *ctx = libxl__gc_owner(gc);
348
fd = open(sysfs_path, O_WRONLY);
350
LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "Couldn't open %s",
355
buf = libxl__sprintf(gc, PCI_BDF, pcidev->domain, pcidev->bus,
356
pcidev->dev, pcidev->func);
357
rc = write(fd, buf, strlen(buf));
358
/* Annoying to have two if's, but we need the errno */
360
LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR,
361
"write to %s returned %d", sysfs_path, rc);
370
libxl_device_pci *libxl_device_pci_assignable_list(libxl_ctx *ctx, int *num)
373
libxl_device_pci *pcidevs = NULL, *new, *assigned;
376
int rc, num_assigned;
380
rc = get_all_assigned_devices(gc, &assigned, &num_assigned);
384
dir = opendir(SYSFS_PCIBACK_DRIVER);
386
if ( errno == ENOENT ) {
387
LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "Looks like pciback driver not loaded");
389
LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "Couldn't open %s", SYSFS_PCIBACK_DRIVER);
394
while( (de = readdir(dir)) ) {
395
unsigned dom, bus, dev, func;
396
if ( sscanf(de->d_name, PCI_BDF, &dom, &bus, &dev, &func) != 4 )
399
if ( is_pcidev_in_array(assigned, num_assigned, dom, bus, dev, func) )
402
new = realloc(pcidevs, ((*num) + 1) * sizeof(*new));
407
new = pcidevs + *num;
409
memset(new, 0, sizeof(*new));
410
pcidev_struct_fill(new, dom, bus, dev, func, 0);
421
/* Unbind device from its current driver, if any. If driver_path is non-NULL,
422
* store the path to the original driver in it. */
423
static int sysfs_dev_unbind(libxl__gc *gc, libxl_device_pci *pcidev,
426
libxl_ctx *ctx = libxl__gc_owner(gc);
427
char * spath, *dp = NULL;
430
spath = libxl__sprintf(gc, SYSFS_PCI_DEV"/"PCI_BDF"/driver",
435
if ( !lstat(spath, &st) ) {
436
/* Find the canonical path to the driver. */
437
dp = libxl__zalloc(gc, PATH_MAX);
438
dp = realpath(spath, dp);
440
LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "realpath() failed");
444
LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "Driver re-plug path: %s",
447
/* Unbind from the old driver */
448
spath = libxl__sprintf(gc, "%s/unbind", dp);
449
if ( sysfs_write_bdf(gc, spath, pcidev) < 0 ) {
450
LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "Couldn't unbind device");
462
* A brief comment about slots. I don't know what slots are for; however,
463
* I have by experimentation determined:
464
* - Before a device can be bound to pciback, its BDF must first be listed
466
* - The way to get the BDF listed there is to write BDF to
468
* - Writing the same BDF to pciback/new_slot is not idempotent; it results
469
* in two entries of the BDF in pciback/slots
470
* It's not clear whether having two entries in pciback/slots is a problem
471
* or not. Just to be safe, this code does the conservative thing, and
472
* first checks to see if there is a slot, adding one only if one does not
476
/* Scan through /sys/.../pciback/slots looking for pcidev's BDF */
477
static int pciback_dev_has_slot(libxl__gc *gc, libxl_device_pci *pcidev)
479
libxl_ctx *ctx = libxl__gc_owner(gc);
482
unsigned dom, bus, dev, func;
484
f = fopen(SYSFS_PCIBACK_DRIVER"/slots", "r");
487
LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "Couldn't open %s",
488
SYSFS_PCIBACK_DRIVER"/slots");
492
while(fscanf(f, "%x:%x:%x.%d\n", &dom, &bus, &dev, &func)==4) {
493
if(dom == pcidev->domain
494
&& bus == pcidev->bus
495
&& dev == pcidev->dev
496
&& func == pcidev->func) {
506
static int pciback_dev_is_assigned(libxl__gc *gc, libxl_device_pci *pcidev)
508
libxl_ctx *ctx = libxl__gc_owner(gc);
513
spath = libxl__sprintf(gc, SYSFS_PCIBACK_DRIVER"/"PCI_BDF,
514
pcidev->domain, pcidev->bus,
515
pcidev->dev, pcidev->func);
516
rc = lstat(spath, &st);
520
if ( rc < 0 && errno == ENOENT )
522
LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "Accessing %s", spath);
526
static int pciback_dev_assign(libxl__gc *gc, libxl_device_pci *pcidev)
528
libxl_ctx *ctx = libxl__gc_owner(gc);
531
if ( (rc=pciback_dev_has_slot(gc, pcidev)) < 0 ) {
532
LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR,
533
"Error checking for pciback slot");
535
} else if (rc == 0) {
536
if ( sysfs_write_bdf(gc, SYSFS_PCIBACK_DRIVER"/new_slot",
538
LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR,
539
"Couldn't bind device to pciback!");
544
if ( sysfs_write_bdf(gc, SYSFS_PCIBACK_DRIVER"/bind", pcidev) < 0 ) {
545
LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR,
546
"Couldn't bind device to pciback!");
552
static int pciback_dev_unassign(libxl__gc *gc, libxl_device_pci *pcidev)
554
libxl_ctx *ctx = libxl__gc_owner(gc);
556
/* Remove from pciback */
557
if ( sysfs_dev_unbind(gc, pcidev, NULL) < 0 ) {
558
LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "Couldn't unbind device!");
562
/* Remove slot if necessary */
563
if ( pciback_dev_has_slot(gc, pcidev) > 0 ) {
564
if ( sysfs_write_bdf(gc, SYSFS_PCIBACK_DRIVER"/remove_slot",
566
LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR,
567
"Couldn't remove pciback slot");
574
#define PCIBACK_INFO_PATH "/libxl/pciback"
576
static void pci_assignable_driver_path_write(libxl__gc *gc,
577
libxl_device_pci *pcidev,
580
libxl_ctx *ctx = libxl__gc_owner(gc);
583
path = libxl__sprintf(gc, PCIBACK_INFO_PATH"/"PCI_BDF_XSPATH"/driver_path",
588
if ( libxl__xs_write(gc, XBT_NULL, path, "%s", driver_path) < 0 ) {
589
LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_WARNING,
590
"Write of %s to node %s failed.",
595
static char * pci_assignable_driver_path_read(libxl__gc *gc,
596
libxl_device_pci *pcidev)
598
return libxl__xs_read(gc, XBT_NULL,
600
PCIBACK_INFO_PATH "/" PCI_BDF_XSPATH "/driver_path",
607
static void pci_assignable_driver_path_remove(libxl__gc *gc,
608
libxl_device_pci *pcidev)
610
libxl_ctx *ctx = libxl__gc_owner(gc);
612
/* Remove the xenstore entry */
613
xs_rm(ctx->xsh, XBT_NULL,
614
libxl__sprintf(gc, PCIBACK_INFO_PATH "/" PCI_BDF_XSPATH,
621
static int libxl__device_pci_assignable_add(libxl__gc *gc,
622
libxl_device_pci *pcidev,
625
libxl_ctx *ctx = libxl__gc_owner(gc);
626
unsigned dom, bus, dev, func;
627
char *spath, *driver_path = NULL;
630
/* Local copy for convenience */
631
dom = pcidev->domain;
636
/* See if the device exists */
637
spath = libxl__sprintf(gc, SYSFS_PCI_DEV"/"PCI_BDF, dom, bus, dev, func);
638
if ( lstat(spath, &st) ) {
639
LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "Couldn't lstat %s", spath);
643
/* Check to see if it's already assigned to pciback */
644
if ( pciback_dev_is_assigned(gc, pcidev) ) {
645
LIBXL__LOG(ctx, LIBXL__LOG_WARNING, PCI_BDF" already assigned to pciback",
646
dom, bus, dev, func);
650
/* Check to see if there's already a driver that we need to unbind from */
651
if ( sysfs_dev_unbind(gc, pcidev, &driver_path ) ) {
652
LIBXL__LOG(ctx, LIBXL__LOG_ERROR,
653
"Couldn't unbind "PCI_BDF" from driver",
654
dom, bus, dev, func);
658
/* Store driver_path for rebinding to dom0 */
661
pci_assignable_driver_path_write(gc, pcidev, driver_path);
663
LIBXL__LOG(ctx, LIBXL__LOG_WARNING,
664
PCI_BDF" not bound to a driver, will not be rebound.",
665
dom, bus, dev, func);
669
if ( pciback_dev_assign(gc, pcidev) ) {
670
LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "Couldn't bind device to pciback!");
677
static int libxl__device_pci_assignable_remove(libxl__gc *gc,
678
libxl_device_pci *pcidev,
681
libxl_ctx *ctx = libxl__gc_owner(gc);
685
/* Unbind from pciback */
686
if ( (rc=pciback_dev_is_assigned(gc, pcidev)) < 0 ) {
687
LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "Checking if pciback was assigned");
690
pciback_dev_unassign(gc, pcidev);
692
LIBXL__LOG(ctx, LIBXL__LOG_WARNING,
693
"Not bound to pciback");
696
/* Rebind if necessary */
697
driver_path = pci_assignable_driver_path_read(gc, pcidev);
701
LIBXL__LOG(ctx, LIBXL__LOG_INFO, "Rebinding to driver at %s",
704
if ( sysfs_write_bdf(gc,
705
libxl__sprintf(gc, "%s/bind", driver_path),
707
LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR,
708
"Couldn't bind device to %s", driver_path);
713
pci_assignable_driver_path_remove(gc, pcidev);
716
LIBXL__LOG(ctx, LIBXL__LOG_WARNING,
717
"Couldn't find path for original driver; not rebinding");
724
int libxl_device_pci_assignable_add(libxl_ctx *ctx, libxl_device_pci *pcidev,
730
rc = libxl__device_pci_assignable_add(gc, pcidev, rebind);
737
int libxl_device_pci_assignable_remove(libxl_ctx *ctx, libxl_device_pci *pcidev,
743
rc = libxl__device_pci_assignable_remove(gc, pcidev, rebind);
750
* This function checks that all functions of a device are bound to pciback
751
* driver. It also initialises a bit-mask of which function numbers are present
754
static int pci_multifunction_check(libxl__gc *gc, libxl_device_pci *pcidev, unsigned int *func_mask)
756
libxl_ctx *ctx = libxl__gc_owner(gc);
762
dir = opendir(SYSFS_PCI_DEV);
764
LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "Couldn't open %s", SYSFS_PCI_DEV);
768
while( (de = readdir(dir)) ) {
769
unsigned dom, bus, dev, func;
773
if ( sscanf(de->d_name, PCI_BDF, &dom, &bus, &dev, &func) != 4 )
775
if ( pcidev->domain != dom )
777
if ( pcidev->bus != bus )
779
if ( pcidev->dev != dev )
782
path = libxl__sprintf(gc, "%s/" PCI_BDF, SYSFS_PCIBACK_DRIVER, dom, bus, dev, func);
783
if ( lstat(path, &st) ) {
784
if ( errno == ENOENT )
785
LIBXL__LOG(ctx, LIBXL__LOG_ERROR, PCI_BDF " is not assigned to pciback driver",
786
dom, bus, dev, func);
788
LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "Couldn't lstat %s", path);
792
(*func_mask) |= (1 << func);
799
static int pci_ins_check(libxl__gc *gc, uint32_t domid, const char *state, void *priv)
801
char *orig_state = priv;
803
if ( !strcmp(state, "pci-insert-failed") )
805
if ( !strcmp(state, "pci-inserted") )
807
if ( !strcmp(state, orig_state) )
813
static int qemu_pci_add_xenstore(libxl__gc *gc, uint32_t domid,
814
libxl_device_pci *pcidev)
816
libxl_ctx *ctx = libxl__gc_owner(gc);
819
char *state, *vdevfn;
821
path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/state", domid);
822
state = libxl__xs_read(gc, XBT_NULL, path);
823
path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/parameter",
825
if (pcidev->vdevfn) {
826
libxl__xs_write(gc, XBT_NULL, path, PCI_BDF_VDEVFN","PCI_OPTIONS,
827
pcidev->domain, pcidev->bus, pcidev->dev,
828
pcidev->func, pcidev->vdevfn, pcidev->msitranslate,
831
libxl__xs_write(gc, XBT_NULL, path, PCI_BDF","PCI_OPTIONS,
832
pcidev->domain, pcidev->bus, pcidev->dev,
833
pcidev->func, pcidev->msitranslate, pcidev->power_mgmt);
836
libxl__qemu_traditional_cmd(gc, domid, "pci-ins");
837
rc = libxl__wait_for_device_model(gc, domid, NULL, NULL,
838
pci_ins_check, state);
839
path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/parameter",
841
vdevfn = libxl__xs_read(gc, XBT_NULL, path);
842
path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/state",
845
LIBXL__LOG(ctx, LIBXL__LOG_ERROR,
846
"qemu refused to add device: %s", vdevfn);
847
else if ( sscanf(vdevfn, "0x%x", &pcidev->vdevfn) != 1 ) {
848
LIBXL__LOG(ctx, LIBXL__LOG_ERROR,
849
"wrong format for the vdevfn: '%s'", vdevfn);
852
xs_write(ctx->xsh, XBT_NULL, path, state, strlen(state));
857
static int do_pci_add(libxl__gc *gc, uint32_t domid, libxl_device_pci *pcidev, int starting)
859
libxl_ctx *ctx = libxl__gc_owner(gc);
862
switch (libxl__domain_type(gc, domid)) {
863
case LIBXL_DOMAIN_TYPE_HVM:
865
if (libxl__wait_for_device_model(gc, domid, "running",
866
NULL, NULL, NULL) < 0) {
869
switch (libxl__device_model_version_running(gc, domid)) {
870
case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL:
871
rc = qemu_pci_add_xenstore(gc, domid, pcidev);
873
case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
874
rc = libxl__qmp_pci_add(gc, domid, pcidev);
882
case LIBXL_DOMAIN_TYPE_PV:
884
char *sysfs_path = libxl__sprintf(gc, SYSFS_PCI_DEV"/"PCI_BDF"/resource", pcidev->domain,
885
pcidev->bus, pcidev->dev, pcidev->func);
886
FILE *f = fopen(sysfs_path, "r");
887
unsigned long long start = 0, end = 0, flags = 0, size = 0;
892
LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "Couldn't open %s", sysfs_path);
895
for (i = 0; i < PROC_PCI_NUM_RESOURCES; i++) {
896
if (fscanf(f, "0x%llx 0x%llx 0x%llx\n", &start, &end, &flags) != 3)
898
size = end - start + 1;
900
if (flags & PCI_BAR_IO) {
901
rc = xc_domain_ioport_permission(ctx->xch, domid, start, size, 1);
903
LIBXL__LOG_ERRNOVAL(ctx, LIBXL__LOG_ERROR, rc, "Error: xc_domain_ioport_permission error 0x%llx/0x%llx", start, size);
908
rc = xc_domain_iomem_permission(ctx->xch, domid, start>>XC_PAGE_SHIFT,
909
(size+(XC_PAGE_SIZE-1))>>XC_PAGE_SHIFT, 1);
911
LIBXL__LOG_ERRNOVAL(ctx, LIBXL__LOG_ERROR, rc, "Error: xc_domain_iomem_permission error 0x%llx/0x%llx", start, size);
919
sysfs_path = libxl__sprintf(gc, SYSFS_PCI_DEV"/"PCI_BDF"/irq", pcidev->domain,
920
pcidev->bus, pcidev->dev, pcidev->func);
921
f = fopen(sysfs_path, "r");
923
LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "Couldn't open %s", sysfs_path);
926
if ((fscanf(f, "%u", &irq) == 1) && irq) {
927
rc = xc_physdev_map_pirq(ctx->xch, domid, irq, &irq);
929
LIBXL__LOG_ERRNOVAL(ctx, LIBXL__LOG_ERROR, rc, "Error: xc_physdev_map_pirq irq=%d", irq);
933
rc = xc_domain_irq_permission(ctx->xch, domid, irq, 1);
935
LIBXL__LOG_ERRNOVAL(ctx, LIBXL__LOG_ERROR, rc, "Error: xc_domain_irq_permission irq=%d", irq);
942
/* Don't restrict writes to the PCI config space from this VM */
943
if (pcidev->permissive) {
944
if ( sysfs_write_bdf(gc, SYSFS_PCIBACK_DRIVER"/permissive",
946
LIBXL__LOG(ctx, LIBXL__LOG_ERROR,
947
"Setting permissive for device");
953
case LIBXL_DOMAIN_TYPE_INVALID:
957
if (!libxl_is_stubdom(ctx, domid, NULL)) {
958
rc = xc_assign_device(ctx->xch, domid, pcidev_encode_bdf(pcidev));
959
if (rc < 0 && (hvm || errno != ENOSYS)) {
960
LIBXL__LOG_ERRNOVAL(ctx, LIBXL__LOG_ERROR, rc, "xc_assign_device failed");
966
rc = libxl__device_pci_add_xenstore(gc, domid, pcidev, starting);
972
static int libxl__device_pci_reset(libxl__gc *gc, unsigned int domain, unsigned int bus,
973
unsigned int dev, unsigned int func)
975
libxl_ctx *ctx = libxl__gc_owner(gc);
979
reset = libxl__sprintf(gc, "%s/pciback/do_flr", SYSFS_PCI_DEV);
980
fd = open(reset, O_WRONLY);
982
char *buf = libxl__sprintf(gc, PCI_BDF, domain, bus, dev, func);
983
rc = write(fd, buf, strlen(buf));
985
LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "write to %s returned %d", reset, rc);
987
return rc < 0 ? rc : 0;
990
LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "Failed to access pciback path %s", reset);
991
reset = libxl__sprintf(gc, "%s/"PCI_BDF"/reset", SYSFS_PCI_DEV, domain, bus, dev, func);
992
fd = open(reset, O_WRONLY);
994
rc = write(fd, "1", 1);
996
LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "write to %s returned %d", reset, rc);
998
return rc < 0 ? rc : 0;
1000
if (errno == ENOENT) {
1001
LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "The kernel doesn't support reset from sysfs for PCI device "PCI_BDF, domain, bus, dev, func);
1003
LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "Failed to access reset path %s", reset);
1008
int libxl__device_pci_setdefault(libxl__gc *gc, libxl_device_pci *pci)
1013
int libxl_device_pci_add(libxl_ctx *ctx, uint32_t domid,
1014
libxl_device_pci *pcidev,
1015
const libxl_asyncop_how *ao_how)
1017
AO_CREATE(ctx, domid, ao_how);
1019
rc = libxl__device_pci_add(gc, domid, pcidev, 0);
1020
libxl__ao_complete(egc, ao, rc);
1021
return AO_INPROGRESS;
1024
static int libxl_pcidev_assignable(libxl_ctx *ctx, libxl_device_pci *pcidev)
1026
libxl_device_pci *pcidevs;
1029
pcidevs = libxl_device_pci_assignable_list(ctx, &num);
1030
for (i = 0; i < num; i++) {
1031
if (pcidevs[i].domain == pcidev->domain &&
1032
pcidevs[i].bus == pcidev->bus &&
1033
pcidevs[i].dev == pcidev->dev &&
1034
pcidevs[i].func == pcidev->func)
1042
int libxl__device_pci_add(libxl__gc *gc, uint32_t domid, libxl_device_pci *pcidev, int starting)
1044
libxl_ctx *ctx = libxl__gc_owner(gc);
1045
unsigned int orig_vdev, pfunc_mask;
1046
libxl_device_pci *assigned;
1047
int num_assigned, i, rc;
1050
rc = libxl__device_pci_setdefault(gc, pcidev);
1053
if (!libxl_pcidev_assignable(ctx, pcidev)) {
1054
LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "PCI device %x:%x:%x.%x is not assignable",
1055
pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func);
1060
rc = get_all_assigned_devices(gc, &assigned, &num_assigned);
1062
LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "cannot determine if device is assigned, refusing to continue");
1065
if ( is_pcidev_in_array(assigned, num_assigned, pcidev->domain,
1066
pcidev->bus, pcidev->dev, pcidev->func) ) {
1067
LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "PCI device already attached to a domain");
1072
libxl__device_pci_reset(gc, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func);
1074
stubdomid = libxl_get_stubdom_id(ctx, domid);
1075
if (stubdomid != 0) {
1076
libxl_device_pci pcidev_s = *pcidev;
1077
/* stubdomain is always running by now, even at create time */
1078
rc = do_pci_add(gc, stubdomid, &pcidev_s, 0);
1083
orig_vdev = pcidev->vdevfn & ~7U;
1085
if ( pcidev->vfunc_mask == LIBXL_PCI_FUNC_ALL ) {
1086
if ( !(pcidev->vdevfn >> 3) ) {
1087
LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "Must specify a v-slot for multi-function devices");
1091
if ( pci_multifunction_check(gc, pcidev, &pfunc_mask) ) {
1095
pcidev->vfunc_mask &= pfunc_mask;
1096
/* so now vfunc_mask == pfunc_mask */
1098
pfunc_mask = (1 << pcidev->func);
1101
for(rc = 0, i = 7; i >= 0; --i) {
1102
if ( (1 << i) & pfunc_mask ) {
1103
if ( pcidev->vfunc_mask == pfunc_mask ) {
1105
pcidev->vdevfn = orig_vdev | i;
1107
/* if not passing through multiple devices in a block make
1108
* sure that virtual function number 0 is always used otherwise
1109
* guest won't see the device
1111
pcidev->vdevfn = orig_vdev;
1113
if ( do_pci_add(gc, domid, pcidev, starting) )
1122
static int qemu_pci_remove_xenstore(libxl__gc *gc, uint32_t domid,
1123
libxl_device_pci *pcidev, int force)
1125
libxl_ctx *ctx = libxl__gc_owner(gc);
1129
path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/state", domid);
1130
state = libxl__xs_read(gc, XBT_NULL, path);
1131
path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/parameter", domid);
1132
libxl__xs_write(gc, XBT_NULL, path, PCI_BDF, pcidev->domain,
1133
pcidev->bus, pcidev->dev, pcidev->func);
1135
/* Remove all functions at once atomically by only signalling
1136
* device-model for function 0 */
1137
if ( !force && (pcidev->vdevfn & 0x7) == 0 ) {
1138
libxl__qemu_traditional_cmd(gc, domid, "pci-rem");
1139
if (libxl__wait_for_device_model(gc, domid, "pci-removed",
1140
NULL, NULL, NULL) < 0) {
1141
LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "Device Model didn't respond in time");
1142
/* This depends on guest operating system acknowledging the
1143
* SCI, if it doesn't respond in time then we may wish to
1144
* force the removal.
1149
path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/state", domid);
1150
xs_write(ctx->xsh, XBT_NULL, path, state, strlen(state));
1155
static int libxl__device_pci_remove_common(libxl__gc *gc, uint32_t domid,
1156
libxl_device_pci *pcidev, int force);
1158
static int do_pci_remove(libxl__gc *gc, uint32_t domid,
1159
libxl_device_pci *pcidev, int force)
1161
libxl_ctx *ctx = libxl__gc_owner(gc);
1162
libxl_device_pci *assigned;
1163
int hvm = 0, rc, num;
1166
assigned = libxl_device_pci_list(ctx, domid, &num);
1167
if ( assigned == NULL )
1171
if ( !is_pcidev_in_array(assigned, num, pcidev->domain,
1172
pcidev->bus, pcidev->dev, pcidev->func) ) {
1173
LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "PCI device not attached to this domain");
1178
switch (libxl__domain_type(gc, domid)) {
1179
case LIBXL_DOMAIN_TYPE_HVM:
1181
if (libxl__wait_for_device_model(gc, domid, "running",
1182
NULL, NULL, NULL) < 0)
1185
switch (libxl__device_model_version_running(gc, domid)) {
1186
case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL:
1187
rc = qemu_pci_remove_xenstore(gc, domid, pcidev, force);
1189
case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
1190
rc = libxl__qmp_pci_del(gc, domid, pcidev);
1201
case LIBXL_DOMAIN_TYPE_PV:
1203
char *sysfs_path = libxl__sprintf(gc, SYSFS_PCI_DEV"/"PCI_BDF"/resource", pcidev->domain,
1204
pcidev->bus, pcidev->dev, pcidev->func);
1205
FILE *f = fopen(sysfs_path, "r");
1206
unsigned int start = 0, end = 0, flags = 0, size = 0;
1211
LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "Couldn't open %s", sysfs_path);
1214
for (i = 0; i < PROC_PCI_NUM_RESOURCES; i++) {
1215
if (fscanf(f, "0x%x 0x%x 0x%x\n", &start, &end, &flags) != 3)
1217
size = end - start + 1;
1219
if (flags & PCI_BAR_IO) {
1220
rc = xc_domain_ioport_permission(ctx->xch, domid, start, size, 0);
1222
LIBXL__LOG_ERRNOVAL(ctx, LIBXL__LOG_ERROR, rc, "xc_domain_ioport_permission error 0x%x/0x%x", start, size);
1224
rc = xc_domain_iomem_permission(ctx->xch, domid, start>>XC_PAGE_SHIFT,
1225
(size+(XC_PAGE_SIZE-1))>>XC_PAGE_SHIFT, 0);
1227
LIBXL__LOG_ERRNOVAL(ctx, LIBXL__LOG_ERROR, rc, "xc_domain_iomem_permission error 0x%x/0x%x", start, size);
1233
sysfs_path = libxl__sprintf(gc, SYSFS_PCI_DEV"/"PCI_BDF"/irq", pcidev->domain,
1234
pcidev->bus, pcidev->dev, pcidev->func);
1235
f = fopen(sysfs_path, "r");
1237
LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "Couldn't open %s", sysfs_path);
1240
if ((fscanf(f, "%u", &irq) == 1) && irq) {
1241
rc = xc_physdev_unmap_pirq(ctx->xch, domid, irq);
1243
LIBXL__LOG_ERRNOVAL(ctx, LIBXL__LOG_ERROR, rc, "xc_physdev_unmap_pirq irq=%d", irq);
1245
rc = xc_domain_irq_permission(ctx->xch, domid, irq, 0);
1247
LIBXL__LOG_ERRNOVAL(ctx, LIBXL__LOG_ERROR, rc, "xc_domain_irq_permission irq=%d", irq);
1257
/* don't do multiple resets while some functions are still passed through */
1258
if ( (pcidev->vdevfn & 0x7) == 0 ) {
1259
libxl__device_pci_reset(gc, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func);
1262
if (!libxl_is_stubdom(ctx, domid, NULL)) {
1263
rc = xc_deassign_device(ctx->xch, domid, pcidev_encode_bdf(pcidev));
1264
if (rc < 0 && (hvm || errno != ENOSYS))
1265
LIBXL__LOG_ERRNOVAL(ctx, LIBXL__LOG_ERROR, rc, "xc_deassign_device failed");
1268
stubdomid = libxl_get_stubdom_id(ctx, domid);
1269
if (stubdomid != 0) {
1270
libxl_device_pci pcidev_s = *pcidev;
1271
libxl__device_pci_remove_common(gc, stubdomid, &pcidev_s, force);
1274
libxl__device_pci_remove_xenstore(gc, domid, pcidev);
1283
static int libxl__device_pci_remove_common(libxl__gc *gc, uint32_t domid,
1284
libxl_device_pci *pcidev, int force)
1286
unsigned int orig_vdev, pfunc_mask;
1289
orig_vdev = pcidev->vdevfn & ~7U;
1291
if ( pcidev->vfunc_mask == LIBXL_PCI_FUNC_ALL ) {
1292
if ( pci_multifunction_check(gc, pcidev, &pfunc_mask) ) {
1296
pcidev->vfunc_mask &= pfunc_mask;
1298
pfunc_mask = (1 << pcidev->func);
1301
for(rc = 0, i = 7; i >= 0; --i) {
1302
if ( (1 << i) & pfunc_mask ) {
1303
if ( pcidev->vfunc_mask == pfunc_mask ) {
1305
pcidev->vdevfn = orig_vdev | i;
1307
pcidev->vdevfn = orig_vdev;
1309
if ( do_pci_remove(gc, domid, pcidev, force) )
1318
int libxl_device_pci_remove(libxl_ctx *ctx, uint32_t domid,
1319
libxl_device_pci *pcidev,
1320
const libxl_asyncop_how *ao_how)
1323
AO_CREATE(ctx, domid, ao_how);
1326
rc = libxl__device_pci_remove_common(gc, domid, pcidev, 0);
1328
libxl__ao_complete(egc, ao, rc);
1329
return AO_INPROGRESS;
1332
int libxl_device_pci_destroy(libxl_ctx *ctx, uint32_t domid,
1333
libxl_device_pci *pcidev,
1334
const libxl_asyncop_how *ao_how)
1336
AO_CREATE(ctx, domid, ao_how);
1339
rc = libxl__device_pci_remove_common(gc, domid, pcidev, 1);
1341
libxl__ao_complete(egc, ao, rc);
1342
return AO_INPROGRESS;
1345
static void libxl__device_pci_from_xs_be(libxl__gc *gc,
1346
const char *be_path,
1347
libxl_device_pci *pci,
1351
unsigned int domain = 0, bus = 0, dev = 0, func = 0, vdevfn = 0;
1353
s = libxl__xs_read(gc, XBT_NULL, libxl__sprintf(gc, "%s/dev-%d", be_path, nr));
1354
sscanf(s, PCI_BDF, &domain, &bus, &dev, &func);
1356
s = libxl__xs_read(gc, XBT_NULL, libxl__sprintf(gc, "%s/vdevfn-%d", be_path, nr));
1358
vdevfn = strtol(s, (char **) NULL, 16);
1360
pcidev_struct_fill(pci, domain, bus, dev, func, vdevfn);
1362
s = libxl__xs_read(gc, XBT_NULL, libxl__sprintf(gc, "%s/opts-%d", be_path, nr));
1365
char *p = strtok_r(s, ",=", &saveptr);
1369
if (!strcmp(p, "msitranslate")) {
1370
p = strtok_r(NULL, ",=", &saveptr);
1371
pci->msitranslate = atoi(p);
1372
} else if (!strcmp(p, "power_mgmt")) {
1373
p = strtok_r(NULL, ",=", &saveptr);
1374
pci->power_mgmt = atoi(p);
1375
} else if (!strcmp(p, "permissive")) {
1376
p = strtok_r(NULL, ",=", &saveptr);
1377
pci->permissive = atoi(p);
1379
} while ((p = strtok_r(NULL, ",=", &saveptr)) != NULL);
1383
libxl_device_pci *libxl_device_pci_list(libxl_ctx *ctx, uint32_t domid, int *num)
1386
char *be_path, *num_devs;
1388
libxl_device_pci *pcidevs = NULL;
1392
be_path = libxl__sprintf(gc, "%s/backend/pci/%d/0", libxl__xs_get_dompath(gc, 0), domid);
1393
num_devs = libxl__xs_read(gc, XBT_NULL, libxl__sprintf(gc, "%s/num_devs", be_path));
1398
pcidevs = calloc(n, sizeof(libxl_device_pci));
1400
for (i = 0; i < n; i++)
1401
libxl__device_pci_from_xs_be(gc, be_path, pcidevs + i, i);
1409
int libxl__device_pci_destroy_all(libxl__gc *gc, uint32_t domid)
1411
libxl_ctx *ctx = libxl__gc_owner(gc);
1412
libxl_device_pci *pcidevs;
1415
pcidevs = libxl_device_pci_list(ctx, domid, &num);
1416
if ( pcidevs == NULL )
1419
for (i = 0; i < num; i++) {
1420
/* Force remove on shutdown since, on HVM, qemu will not always
1421
* respond to SCI interrupt because the guest kernel has shut down the
1422
* devices by the time we even get here!
1424
if (libxl__device_pci_remove_common(gc, domid, pcidevs + i, 1) < 0)
1436
* indent-tabs-mode: nil