4
* This module includes support for MSI-X in pci devices.
6
* Author: Michael S. Tsirkin <mst@redhat.com>
8
* Copyright (c) 2009, Red Hat Inc, Michael S. Tsirkin (mst@redhat.com)
10
* This work is licensed under the terms of the GNU GPL, version 2. See
11
* the COPYING file in the top-level directory.
20
#define MSIX_CAP_LENGTH 12
22
/* MSI enable bit and maskall bit are in byte 1 in FLAGS register */
23
#define MSIX_CONTROL_OFFSET (PCI_MSIX_FLAGS + 1)
24
#define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
25
#define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8)
27
/* How much space does an MSIX table need. */
28
/* The spec requires giving the table structure
29
* a 4K aligned region all by itself. */
30
#define MSIX_PAGE_SIZE 0x1000
31
/* Reserve second half of the page for pending bits */
32
#define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2)
33
#define MSIX_MAX_ENTRIES 32
36
/* Flag for interrupt controller to declare MSI-X support */
39
/* KVM specific MSIX helpers */
40
static void kvm_msix_free(PCIDevice *dev)
42
int vector, changed = 0;
44
for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
45
if (dev->msix_entry_used[vector]) {
46
kvm_msi_message_del(&dev->msix_irq_entries[vector]);
51
kvm_commit_irq_routes();
55
static void kvm_msix_message_from_vector(PCIDevice *dev, unsigned vector,
58
uint8_t *table_entry = dev->msix_table_page + vector * PCI_MSIX_ENTRY_SIZE;
60
kmm->addr_lo = pci_get_long(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR);
61
kmm->addr_hi = pci_get_long(table_entry + PCI_MSIX_ENTRY_UPPER_ADDR);
62
kmm->data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA);
65
static void kvm_msix_update(PCIDevice *dev, int vector,
66
int was_masked, int is_masked)
68
KVMMsiMessage new_entry, *entry;
69
int mask_cleared = was_masked && !is_masked;
72
/* It is only legal to change an entry when it is masked. Therefore, it is
73
* enough to update the routing in kernel when mask is being cleared. */
77
if (!dev->msix_entry_used[vector]) {
81
entry = dev->msix_irq_entries + vector;
82
kvm_msix_message_from_vector(dev, vector, &new_entry);
83
r = kvm_msi_message_update(entry, &new_entry);
85
fprintf(stderr, "%s: kvm_update_msix failed: %s\n", __func__,
91
r = kvm_commit_irq_routes();
93
fprintf(stderr, "%s: kvm_commit_irq_routes failed: %s\n", __func__,
100
static int kvm_msix_vector_add(PCIDevice *dev, unsigned vector)
102
KVMMsiMessage *kmm = dev->msix_irq_entries + vector;
105
kvm_msix_message_from_vector(dev, vector, kmm);
106
r = kvm_msi_message_add(kmm);
108
fprintf(stderr, "%s: kvm_add_msix failed: %s\n", __func__, strerror(-r));
112
r = kvm_commit_irq_routes();
114
fprintf(stderr, "%s: kvm_commit_irq_routes failed: %s\n", __func__, strerror(-r));
120
static void kvm_msix_vector_del(PCIDevice *dev, unsigned vector)
122
kvm_msi_message_del(&dev->msix_irq_entries[vector]);
123
kvm_commit_irq_routes();
126
/* Add MSI-X capability to the config space for the device. */
127
/* Given a bar and its size, add MSI-X table on top of it
128
* and fill MSI-X capability in the config space.
129
* Original bar size must be a power of 2 or 0.
130
* New bar size is returned. */
131
static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries,
132
unsigned bar_nr, unsigned bar_size)
137
pdev->msix_bar_size = bar_size;
139
config_offset = pci_find_capability(pdev, PCI_CAP_ID_MSIX);
141
if (!config_offset) {
144
if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1)
146
if (bar_size > 0x80000000)
149
/* Add space for MSI-X structures */
151
new_size = MSIX_PAGE_SIZE;
152
} else if (bar_size < MSIX_PAGE_SIZE) {
153
bar_size = MSIX_PAGE_SIZE;
154
new_size = MSIX_PAGE_SIZE * 2;
156
new_size = bar_size * 2;
159
pdev->msix_bar_size = new_size;
160
config_offset = pci_add_capability(pdev, PCI_CAP_ID_MSIX,
162
if (config_offset < 0)
163
return config_offset;
164
config = pdev->config + config_offset;
166
pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
167
/* Table on top of BAR */
168
pci_set_long(config + PCI_MSIX_TABLE, bar_size | bar_nr);
169
/* Pending bits on top of that */
170
pci_set_long(config + PCI_MSIX_PBA, (bar_size + MSIX_PAGE_PENDING) |
173
pdev->msix_cap = config_offset;
174
/* Make flags bit writable. */
175
pdev->wmask[config_offset + MSIX_CONTROL_OFFSET] |= MSIX_ENABLE_MASK |
180
static uint32_t msix_mmio_readl(void *opaque, target_phys_addr_t addr)
182
PCIDevice *dev = opaque;
183
unsigned int offset = addr & (MSIX_PAGE_SIZE - 1) & ~0x3;
184
void *page = dev->msix_table_page;
186
return pci_get_long(page + offset);
189
static uint32_t msix_mmio_read_unallowed(void *opaque, target_phys_addr_t addr)
191
fprintf(stderr, "MSI-X: only dword read is allowed!\n");
195
static uint8_t msix_pending_mask(int vector)
197
return 1 << (vector % 8);
200
static uint8_t *msix_pending_byte(PCIDevice *dev, int vector)
202
return dev->msix_table_page + MSIX_PAGE_PENDING + vector / 8;
205
static int msix_is_pending(PCIDevice *dev, int vector)
207
return *msix_pending_byte(dev, vector) & msix_pending_mask(vector);
210
static void msix_set_pending(PCIDevice *dev, int vector)
212
*msix_pending_byte(dev, vector) |= msix_pending_mask(vector);
215
static void msix_clr_pending(PCIDevice *dev, int vector)
217
*msix_pending_byte(dev, vector) &= ~msix_pending_mask(vector);
220
static int msix_function_masked(PCIDevice *dev)
222
return dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & MSIX_MASKALL_MASK;
225
static int msix_is_masked(PCIDevice *dev, int vector)
228
vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL;
229
return msix_function_masked(dev) ||
230
dev->msix_table_page[offset] & PCI_MSIX_ENTRY_CTRL_MASKBIT;
233
static void msix_handle_mask_update(PCIDevice *dev, int vector)
235
if (!msix_is_masked(dev, vector) && msix_is_pending(dev, vector)) {
236
msix_clr_pending(dev, vector);
237
msix_notify(dev, vector);
241
/* Handle MSI-X capability config write. */
242
void msix_write_config(PCIDevice *dev, uint32_t addr,
243
uint32_t val, int len)
245
unsigned enable_pos = dev->msix_cap + MSIX_CONTROL_OFFSET;
248
if (!range_covers_byte(addr, len, enable_pos)) {
252
if (!msix_enabled(dev)) {
256
pci_device_deassert_intx(dev);
258
if (msix_function_masked(dev)) {
262
for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
263
msix_handle_mask_update(dev, vector);
267
static void msix_mmio_writel(void *opaque, target_phys_addr_t addr,
270
PCIDevice *dev = opaque;
271
unsigned int offset = addr & (MSIX_PAGE_SIZE - 1) & ~0x3;
272
int vector = offset / PCI_MSIX_ENTRY_SIZE;
273
int was_masked = msix_is_masked(dev, vector);
274
pci_set_long(dev->msix_table_page + offset, val);
275
if (kvm_enabled() && kvm_irqchip_in_kernel()) {
276
kvm_msix_update(dev, vector, was_masked, msix_is_masked(dev, vector));
278
if (was_masked != msix_is_masked(dev, vector) && dev->msix_mask_notifier) {
279
int r = dev->msix_mask_notifier(dev, vector,
280
msix_is_masked(dev, vector));
283
msix_handle_mask_update(dev, vector);
286
static void msix_mmio_write_unallowed(void *opaque, target_phys_addr_t addr,
289
fprintf(stderr, "MSI-X: only dword write is allowed!\n");
292
static CPUWriteMemoryFunc * const msix_mmio_write[] = {
293
msix_mmio_write_unallowed, msix_mmio_write_unallowed, msix_mmio_writel
296
static CPUReadMemoryFunc * const msix_mmio_read[] = {
297
msix_mmio_read_unallowed, msix_mmio_read_unallowed, msix_mmio_readl
300
/* Should be called from device's map method. */
301
void msix_mmio_map(PCIDevice *d, int region_num,
302
pcibus_t addr, pcibus_t size, int type)
304
uint8_t *config = d->config + d->msix_cap;
305
uint32_t table = pci_get_long(config + PCI_MSIX_TABLE);
306
uint32_t offset = table & ~(MSIX_PAGE_SIZE - 1);
307
/* TODO: for assigned devices, we'll want to make it possible to map
308
* pending bits separately in case they are in a separate bar. */
309
int table_bir = table & PCI_MSIX_FLAGS_BIRMASK;
311
if (table_bir != region_num)
315
cpu_register_physical_memory(addr + offset,
316
MIN(size - offset, MSIX_PAGE_SIZE),
320
static void msix_mask_all(struct PCIDevice *dev, unsigned nentries)
323
for (vector = 0; vector < nentries; ++vector) {
325
vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL;
326
int was_masked = msix_is_masked(dev, vector);
327
dev->msix_table_page[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
328
if (was_masked != msix_is_masked(dev, vector) &&
329
dev->msix_mask_notifier) {
330
r = dev->msix_mask_notifier(dev, vector,
331
msix_is_masked(dev, vector));
337
/* Initialize the MSI-X structures. Note: if MSI-X is supported, BAR size is
338
* modified, it should be retrieved with msix_bar_size. */
339
int msix_init(struct PCIDevice *dev, unsigned short nentries,
340
unsigned bar_nr, unsigned bar_size)
343
/* Nothing to do if MSI is not supported by interrupt controller */
344
if (!msix_supported ||
345
(kvm_enabled() && kvm_irqchip_in_kernel() && !kvm_has_gsi_routing())) {
349
if (nentries > MSIX_MAX_ENTRIES)
352
dev->msix_mask_notifier = NULL;
353
dev->msix_entry_used = qemu_mallocz(MSIX_MAX_ENTRIES *
354
sizeof *dev->msix_entry_used);
356
dev->msix_table_page = qemu_mallocz(MSIX_PAGE_SIZE);
357
msix_mask_all(dev, nentries);
359
dev->msix_mmio_index = cpu_register_io_memory(msix_mmio_read,
360
msix_mmio_write, dev,
361
DEVICE_NATIVE_ENDIAN);
362
if (dev->msix_mmio_index == -1) {
367
dev->msix_entries_nr = nentries;
368
ret = msix_add_config(dev, nentries, bar_nr, bar_size);
372
if (kvm_enabled() && kvm_irqchip_in_kernel()) {
373
dev->msix_irq_entries = qemu_malloc(nentries *
374
sizeof *dev->msix_irq_entries);
377
dev->cap_present |= QEMU_PCI_CAP_MSIX;
381
dev->msix_entries_nr = 0;
382
cpu_unregister_io_memory(dev->msix_mmio_index);
384
qemu_free(dev->msix_table_page);
385
dev->msix_table_page = NULL;
386
qemu_free(dev->msix_entry_used);
387
dev->msix_entry_used = NULL;
391
static void msix_free_irq_entries(PCIDevice *dev)
395
if (kvm_enabled() && kvm_irqchip_in_kernel()) {
399
for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
400
dev->msix_entry_used[vector] = 0;
401
msix_clr_pending(dev, vector);
405
/* Clean up resources for the device. */
406
int msix_uninit(PCIDevice *dev)
408
if (!(dev->cap_present & QEMU_PCI_CAP_MSIX))
410
pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
412
msix_free_irq_entries(dev);
413
dev->msix_entries_nr = 0;
414
cpu_unregister_io_memory(dev->msix_mmio_index);
415
qemu_free(dev->msix_table_page);
416
dev->msix_table_page = NULL;
417
qemu_free(dev->msix_entry_used);
418
dev->msix_entry_used = NULL;
419
qemu_free(dev->msix_irq_entries);
420
dev->msix_irq_entries = NULL;
421
dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
425
void msix_save(PCIDevice *dev, QEMUFile *f)
427
unsigned n = dev->msix_entries_nr;
429
if (!msix_supported) {
433
if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) {
436
qemu_put_buffer(f, dev->msix_table_page, n * PCI_MSIX_ENTRY_SIZE);
437
qemu_put_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8);
440
/* Should be called after restoring the config space. */
441
void msix_load(PCIDevice *dev, QEMUFile *f)
443
unsigned n = dev->msix_entries_nr;
448
if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) {
452
msix_free_irq_entries(dev);
453
qemu_get_buffer(f, dev->msix_table_page, n * PCI_MSIX_ENTRY_SIZE);
454
qemu_get_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8);
457
/* Does device support MSI-X? */
458
int msix_present(PCIDevice *dev)
460
return dev->cap_present & QEMU_PCI_CAP_MSIX;
463
/* Is MSI-X enabled? */
464
int msix_enabled(PCIDevice *dev)
466
return (dev->cap_present & QEMU_PCI_CAP_MSIX) &&
467
(dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &
471
/* Size of bar where MSI-X table resides, or 0 if MSI-X not supported. */
472
uint32_t msix_bar_size(PCIDevice *dev)
474
return (dev->cap_present & QEMU_PCI_CAP_MSIX) ?
475
dev->msix_bar_size : 0;
478
/* Send an MSI-X message */
479
void msix_notify(PCIDevice *dev, unsigned vector)
481
uint8_t *table_entry = dev->msix_table_page + vector * PCI_MSIX_ENTRY_SIZE;
485
if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector])
487
if (msix_is_masked(dev, vector)) {
488
msix_set_pending(dev, vector);
492
if (kvm_enabled() && kvm_irqchip_in_kernel()) {
493
kvm_set_irq(dev->msix_irq_entries[vector].gsi, 1, NULL);
497
address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR);
498
data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA);
499
stl_le_phys(address, data);
502
void msix_reset(PCIDevice *dev)
504
if (!(dev->cap_present & QEMU_PCI_CAP_MSIX))
506
msix_free_irq_entries(dev);
507
dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &=
508
~dev->wmask[dev->msix_cap + MSIX_CONTROL_OFFSET];
509
memset(dev->msix_table_page, 0, MSIX_PAGE_SIZE);
510
msix_mask_all(dev, dev->msix_entries_nr);
513
/* PCI spec suggests that devices make it possible for software to configure
514
* less vectors than supported by the device, but does not specify a standard
515
* mechanism for devices to do so.
517
* We support this by asking devices to declare vectors software is going to
518
* actually use, and checking this on the notification path. Devices that
519
* don't want to follow the spec suggestion can declare all vectors as used. */
521
/* Mark vector as used. */
522
int msix_vector_use(PCIDevice *dev, unsigned vector)
525
if (vector >= dev->msix_entries_nr)
527
if (kvm_enabled() && kvm_irqchip_in_kernel() &&
528
!dev->msix_entry_used[vector]) {
529
ret = kvm_msix_vector_add(dev, vector);
534
++dev->msix_entry_used[vector];
538
/* Mark vector as unused. */
539
void msix_vector_unuse(PCIDevice *dev, unsigned vector)
541
if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) {
544
if (--dev->msix_entry_used[vector]) {
547
if (kvm_enabled() && kvm_irqchip_in_kernel()) {
548
kvm_msix_vector_del(dev, vector);
550
msix_clr_pending(dev, vector);
553
void msix_unuse_all_vectors(PCIDevice *dev)
555
if (!(dev->cap_present & QEMU_PCI_CAP_MSIX))
557
msix_free_irq_entries(dev);
560
/* Invoke the notifier if vector entry is used and unmasked. */
561
static int msix_notify_if_unmasked(PCIDevice *dev, unsigned vector, int masked)
563
assert(dev->msix_mask_notifier);
564
if (!dev->msix_entry_used[vector] || msix_is_masked(dev, vector)) {
567
return dev->msix_mask_notifier(dev, vector, masked);
570
static int msix_set_mask_notifier_for_vector(PCIDevice *dev, unsigned vector)
572
/* Notifier has been set. Invoke it on unmasked vectors. */
573
return msix_notify_if_unmasked(dev, vector, 0);
576
static int msix_unset_mask_notifier_for_vector(PCIDevice *dev, unsigned vector)
578
/* Notifier will be unset. Invoke it to mask unmasked entries. */
579
return msix_notify_if_unmasked(dev, vector, 1);
582
int msix_set_mask_notifier(PCIDevice *dev, msix_mask_notifier_func f)
585
assert(!dev->msix_mask_notifier);
586
dev->msix_mask_notifier = f;
587
for (n = 0; n < dev->msix_entries_nr; ++n) {
588
r = msix_set_mask_notifier_for_vector(dev, n);
597
msix_unset_mask_notifier_for_vector(dev, n);
599
dev->msix_mask_notifier = NULL;
603
int msix_unset_mask_notifier(PCIDevice *dev)
606
assert(dev->msix_mask_notifier);
607
for (n = 0; n < dev->msix_entries_nr; ++n) {
608
r = msix_unset_mask_notifier_for_vector(dev, n);
613
dev->msix_mask_notifier = NULL;
618
msix_set_mask_notifier_for_vector(dev, n);