2
* Copyright (C) 2007 Advanced Micro Devices, Inc.
3
* Author: Leo Duran <leo.duran@amd.com>
4
* Author: Wei Wang <wei.wang2@amd.com> - adapted to xen
6
* This program is free software; you can redistribute it and/or modify
7
* it under the terms of the GNU General Public License as published by
8
* the Free Software Foundation; either version 2 of the License, or
9
* (at your option) any later version.
11
* This program is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
* GNU General Public License for more details.
16
* You should have received a copy of the GNU General Public License
17
* along with this program; if not, write to the Free Software
18
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
#include <xen/sched.h>
23
#include <xen/pci_regs.h>
24
#include <xen/paging.h>
25
#include <xen/softirq.h>
26
#include <asm/hvm/iommu.h>
27
#include <asm/amd-iommu.h>
28
#include <asm/hvm/svm/amd-iommu-proto.h>
31
struct amd_iommu *find_iommu_for_device(int seg, int bdf)
33
struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(seg);
35
BUG_ON ( bdf >= ivrs_bdf_entries );
36
return ivrs_mappings ? ivrs_mappings[bdf].iommu : NULL;
40
* Some devices will use alias id and original device id to index interrupt
41
* table and I/O page table respectively. Such devices will have
42
* both alias entry and select entry in IVRS structure.
44
* Return original device id, if device has valid interrupt remapping
45
* table setup for both select entry and alias entry.
47
int get_dma_requestor_id(u16 seg, u16 bdf)
49
struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(seg);
52
BUG_ON ( bdf >= ivrs_bdf_entries );
53
req_id = ivrs_mappings[bdf].dte_requestor_id;
54
if ( (ivrs_mappings[bdf].intremap_table != NULL) &&
55
(ivrs_mappings[req_id].intremap_table != NULL) )
61
static int is_translation_valid(u32 *entry)
63
return (get_field_from_reg_u32(entry[0],
64
IOMMU_DEV_TABLE_VALID_MASK,
65
IOMMU_DEV_TABLE_VALID_SHIFT) &&
66
get_field_from_reg_u32(entry[0],
67
IOMMU_DEV_TABLE_TRANSLATION_VALID_MASK,
68
IOMMU_DEV_TABLE_TRANSLATION_VALID_SHIFT));
71
static void disable_translation(u32 *dte)
76
set_field_in_reg_u32(IOMMU_CONTROL_DISABLED, entry,
77
IOMMU_DEV_TABLE_TRANSLATION_VALID_MASK,
78
IOMMU_DEV_TABLE_TRANSLATION_VALID_SHIFT, &entry);
79
set_field_in_reg_u32(IOMMU_CONTROL_DISABLED, entry,
80
IOMMU_DEV_TABLE_VALID_MASK,
81
IOMMU_DEV_TABLE_VALID_SHIFT, &entry);
85
static void amd_iommu_setup_domain_device(
86
struct domain *domain, struct amd_iommu *iommu, int bdf)
90
int req_id, valid = 1;
92
u8 bus = PCI_BUS(bdf);
93
u8 devfn = PCI_DEVFN2(bdf);
95
struct hvm_iommu *hd = domain_hvm_iommu(domain);
97
BUG_ON( !hd->root_table || !hd->paging_mode || !iommu->dev_table.buffer );
99
if ( iommu_passthrough && (domain->domain_id == 0) )
105
/* get device-table entry */
106
req_id = get_dma_requestor_id(iommu->seg, bdf);
107
dte = iommu->dev_table.buffer + (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
109
spin_lock_irqsave(&iommu->lock, flags);
111
if ( !is_translation_valid((u32 *)dte) )
113
/* bind DTE to domain page-tables */
114
amd_iommu_set_root_page_table(
115
(u32 *)dte, page_to_maddr(hd->root_table), hd->domain_id,
116
hd->paging_mode, valid);
118
if ( pci_ats_device(iommu->seg, bus, devfn) &&
119
iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) )
120
iommu_dte_set_iotlb((u32 *)dte, dte_i);
122
amd_iommu_flush_device(iommu, req_id);
124
AMD_IOMMU_DEBUG("Setup I/O page table: device id = 0x%04x, "
125
"root table = 0x%"PRIx64", "
126
"domain = %d, paging mode = %d\n", req_id,
127
page_to_maddr(hd->root_table),
128
hd->domain_id, hd->paging_mode);
131
spin_unlock_irqrestore(&iommu->lock, flags);
133
ASSERT(spin_is_locked(&pcidevs_lock));
135
if ( pci_ats_device(iommu->seg, bus, devfn) &&
136
!pci_ats_enabled(iommu->seg, bus, devfn) )
138
struct pci_dev *pdev;
140
enable_ats_device(iommu->seg, bus, devfn);
142
ASSERT(spin_is_locked(&pcidevs_lock));
143
pdev = pci_get_pdev(iommu->seg, bus, devfn);
145
ASSERT( pdev != NULL );
146
amd_iommu_flush_iotlb(pdev, INV_IOMMU_ALL_PAGES_ADDRESS, 0);
150
static void __init amd_iommu_setup_dom0_device(struct pci_dev *pdev)
152
int bdf = PCI_BDF2(pdev->bus, pdev->devfn);
153
struct amd_iommu *iommu = find_iommu_for_device(pdev->seg, bdf);
155
if ( likely(iommu != NULL) )
156
amd_iommu_setup_domain_device(pdev->domain, iommu, bdf);
158
AMD_IOMMU_DEBUG("No iommu for device %04x:%02x:%02x.%u\n",
159
pdev->seg, pdev->bus,
160
PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
163
int __init amd_iov_detect(void)
165
INIT_LIST_HEAD(&amd_iommu_head);
167
if ( (amd_iommu_detect_acpi() !=0) || (iommu_found() == 0) )
169
printk("AMD-Vi: IOMMU not found!\n");
173
if ( amd_iommu_init() != 0 )
175
printk("AMD-Vi: Error initialization\n");
180
* AMD IOMMUs don't distinguish between vectors destined for
181
* different cpus when doing interrupt remapping. This means
182
* that interrupts going through the same intremap table
183
* can't share the same vector.
185
* If irq_vector_map isn't specified, choose a sensible default:
186
* - If we're using per-device interemap tables, per-device
187
* vector non-sharing maps
188
* - If we're using a global interemap table, global vector
191
if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_DEFAULT )
193
if ( amd_iommu_perdev_intremap )
195
printk("AMD-Vi: Enabling per-device vector maps\n");
196
opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_PERDEV;
200
printk("AMD-Vi: Enabling global vector map\n");
201
opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_GLOBAL;
206
printk("AMD-Vi: Not overriding irq_vector_map setting\n");
208
return scan_pci_devices();
211
static int allocate_domain_resources(struct hvm_iommu *hd)
213
/* allocate root table */
214
spin_lock(&hd->mapping_lock);
215
if ( !hd->root_table )
217
hd->root_table = alloc_amd_iommu_pgtable();
218
if ( !hd->root_table )
220
spin_unlock(&hd->mapping_lock);
224
spin_unlock(&hd->mapping_lock);
228
static int get_paging_mode(unsigned long entries)
234
while ( entries > PTE_PER_TABLE_SIZE )
236
entries = PTE_PER_TABLE_ALIGN(entries) >> PTE_PER_TABLE_SHIFT;
244
static int amd_iommu_domain_init(struct domain *d)
246
struct hvm_iommu *hd = domain_hvm_iommu(d);
248
/* allocate page directroy */
249
if ( allocate_domain_resources(hd) != 0 )
251
if ( hd->root_table )
252
free_domheap_page(hd->root_table);
256
/* For pv and dom0, stick with get_paging_mode(max_page)
257
* For HVM dom0, use 2 level page table at first */
258
hd->paging_mode = is_hvm_domain(d) ?
259
IOMMU_PAGING_MODE_LEVEL_2 :
260
get_paging_mode(max_page);
262
hd->domain_id = d->domain_id;
269
static void __init amd_iommu_dom0_init(struct domain *d)
273
if ( !iommu_passthrough && !need_iommu(d) )
275
/* Set up 1:1 page table for dom0 */
276
for ( i = 0; i < max_pdx; i++ )
278
unsigned long pfn = pdx_to_pfn(i);
281
* XXX Should we really map all non-RAM (above 4G)? Minimally
282
* a pfn_valid() check would seem desirable here.
284
if ( mfn_valid(pfn) )
285
amd_iommu_map_page(d, pfn, pfn,
286
IOMMUF_readable|IOMMUF_writable);
290
setup_dom0_pci_devices(d, amd_iommu_setup_dom0_device);
293
void amd_iommu_disable_domain_device(struct domain *domain,
294
struct amd_iommu *iommu, int bdf)
299
u8 bus = PCI_BUS(bdf);
300
u8 devfn = PCI_DEVFN2(bdf);
302
BUG_ON ( iommu->dev_table.buffer == NULL );
303
req_id = get_dma_requestor_id(iommu->seg, bdf);
304
dte = iommu->dev_table.buffer + (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
306
spin_lock_irqsave(&iommu->lock, flags);
307
if ( is_translation_valid((u32 *)dte) )
309
disable_translation((u32 *)dte);
311
if ( pci_ats_device(iommu->seg, bus, devfn) &&
312
iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) )
313
iommu_dte_set_iotlb((u32 *)dte, 0);
315
amd_iommu_flush_device(iommu, req_id);
317
AMD_IOMMU_DEBUG("Disable: device id = 0x%04x, "
318
"domain = %d, paging mode = %d\n",
319
req_id, domain_hvm_iommu(domain)->domain_id,
320
domain_hvm_iommu(domain)->paging_mode);
322
spin_unlock_irqrestore(&iommu->lock, flags);
324
ASSERT(spin_is_locked(&pcidevs_lock));
326
if ( pci_ats_device(iommu->seg, bus, devfn) &&
327
pci_ats_enabled(iommu->seg, bus, devfn) )
328
disable_ats_device(iommu->seg, bus, devfn);
331
static int reassign_device( struct domain *source, struct domain *target,
332
u16 seg, u8 bus, u8 devfn)
334
struct pci_dev *pdev;
335
struct amd_iommu *iommu;
337
struct hvm_iommu *t = domain_hvm_iommu(target);
339
ASSERT(spin_is_locked(&pcidevs_lock));
340
pdev = pci_get_pdev_by_domain(source, seg, bus, devfn);
344
bdf = PCI_BDF2(bus, devfn);
345
iommu = find_iommu_for_device(seg, bdf);
348
AMD_IOMMU_DEBUG("Fail to find iommu."
349
" %04x:%02x:%x02.%x cannot be assigned to dom%d\n",
350
seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
355
amd_iommu_disable_domain_device(source, iommu, bdf);
357
list_move(&pdev->domain_list, &target->arch.pdev_list);
358
pdev->domain = target;
360
/* IO page tables might be destroyed after pci-detach the last device
361
* In this case, we have to re-allocate root table for next pci-attach.*/
362
if ( t->root_table == NULL )
363
allocate_domain_resources(t);
365
amd_iommu_setup_domain_device(target, iommu, bdf);
366
AMD_IOMMU_DEBUG("Re-assign %04x:%02x:%02x.%u from dom%d to dom%d\n",
367
seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
368
source->domain_id, target->domain_id);
373
static int amd_iommu_assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn)
375
struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(seg);
376
int bdf = (bus << 8) | devfn;
377
int req_id = get_dma_requestor_id(seg, bdf);
379
if ( ivrs_mappings[req_id].unity_map_enable )
381
amd_iommu_reserve_domain_unity_map(
383
ivrs_mappings[req_id].addr_range_start,
384
ivrs_mappings[req_id].addr_range_length,
385
ivrs_mappings[req_id].write_permission,
386
ivrs_mappings[req_id].read_permission);
389
return reassign_device(dom0, d, seg, bus, devfn);
392
static void deallocate_next_page_table(struct page_info* pg, int level)
394
void *table_vaddr, *pde;
395
u64 next_table_maddr;
396
int index, next_level, present;
399
table_vaddr = __map_domain_page(pg);
403
for ( index = 0; index < PTE_PER_TABLE_SIZE; index++ )
405
pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE);
406
next_table_maddr = amd_iommu_get_next_table_from_pte(pde);
409
next_level = get_field_from_reg_u32(entry[0],
410
IOMMU_PDE_NEXT_LEVEL_MASK,
411
IOMMU_PDE_NEXT_LEVEL_SHIFT);
413
present = get_field_from_reg_u32(entry[0],
414
IOMMU_PDE_PRESENT_MASK,
415
IOMMU_PDE_PRESENT_SHIFT);
417
if ( (next_table_maddr != 0) && (next_level != 0)
420
deallocate_next_page_table(
421
maddr_to_page(next_table_maddr), level - 1);
426
unmap_domain_page(table_vaddr);
427
free_amd_iommu_pgtable(pg);
430
static void deallocate_iommu_page_tables(struct domain *d)
432
struct hvm_iommu *hd = domain_hvm_iommu(d);
434
if ( iommu_use_hap_pt(d) )
437
spin_lock(&hd->mapping_lock);
438
if ( hd->root_table )
440
deallocate_next_page_table(hd->root_table, hd->paging_mode);
441
hd->root_table = NULL;
443
spin_unlock(&hd->mapping_lock);
447
static void amd_iommu_domain_destroy(struct domain *d)
449
guest_iommu_destroy(d);
450
deallocate_iommu_page_tables(d);
451
amd_iommu_flush_all_pages(d);
454
static int amd_iommu_return_device(
455
struct domain *s, struct domain *t, u16 seg, u8 bus, u8 devfn)
457
return reassign_device(s, t, seg, bus, devfn);
460
static int amd_iommu_add_device(struct pci_dev *pdev)
462
struct amd_iommu *iommu;
467
bdf = PCI_BDF2(pdev->bus, pdev->devfn);
468
iommu = find_iommu_for_device(pdev->seg, bdf);
471
AMD_IOMMU_DEBUG("Fail to find iommu."
472
" %04x:%02x:%02x.%u cannot be assigned to dom%d\n",
473
pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
474
PCI_FUNC(pdev->devfn), pdev->domain->domain_id);
478
amd_iommu_setup_domain_device(pdev->domain, iommu, bdf);
482
static int amd_iommu_remove_device(struct pci_dev *pdev)
484
struct amd_iommu *iommu;
489
bdf = PCI_BDF2(pdev->bus, pdev->devfn);
490
iommu = find_iommu_for_device(pdev->seg, bdf);
493
AMD_IOMMU_DEBUG("Fail to find iommu."
494
" %04x:%02x:%02x.%u cannot be removed from dom%d\n",
495
pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
496
PCI_FUNC(pdev->devfn), pdev->domain->domain_id);
500
amd_iommu_disable_domain_device(pdev->domain, iommu, bdf);
504
static int amd_iommu_group_id(u16 seg, u8 bus, u8 devfn)
507
int bdf = (bus << 8) | devfn;
508
rt = ( bdf < ivrs_bdf_entries ) ?
509
get_dma_requestor_id(seg, bdf) :
514
#include <asm/io_apic.h>
516
static void amd_dump_p2m_table_level(struct page_info* pg, int level,
517
paddr_t gpa, int indent)
520
void *table_vaddr, *pde;
521
paddr_t next_table_maddr;
522
int index, next_level, present;
528
table_vaddr = __map_domain_page(pg);
529
if ( table_vaddr == NULL )
531
printk("Failed to map IOMMU domain page %"PRIpaddr"\n",
536
for ( index = 0; index < PTE_PER_TABLE_SIZE; index++ )
539
process_pending_softirqs();
541
pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE);
542
next_table_maddr = amd_iommu_get_next_table_from_pte(pde);
545
present = get_field_from_reg_u32(entry[0],
546
IOMMU_PDE_PRESENT_MASK,
547
IOMMU_PDE_PRESENT_SHIFT);
552
next_level = get_field_from_reg_u32(entry[0],
553
IOMMU_PDE_NEXT_LEVEL_MASK,
554
IOMMU_PDE_NEXT_LEVEL_SHIFT);
556
if ( next_level && (next_level != (level - 1)) )
558
printk("IOMMU p2m table error. next_level = %d, expected %d\n",
559
next_level, level - 1);
564
address = gpa + amd_offset_level_address(index, level);
565
if ( next_level >= 1 )
566
amd_dump_p2m_table_level(
567
maddr_to_page(next_table_maddr), next_level,
568
address, indent + 1);
570
printk("%*sgfn: %08lx mfn: %08lx\n",
572
(unsigned long)PFN_DOWN(address),
573
(unsigned long)PFN_DOWN(next_table_maddr));
576
unmap_domain_page(table_vaddr);
579
static void amd_dump_p2m_table(struct domain *d)
581
struct hvm_iommu *hd = domain_hvm_iommu(d);
583
if ( !hd->root_table )
586
printk("p2m table has %d levels\n", hd->paging_mode);
587
amd_dump_p2m_table_level(hd->root_table, hd->paging_mode, 0, 0);
590
const struct iommu_ops amd_iommu_ops = {
591
.init = amd_iommu_domain_init,
592
.dom0_init = amd_iommu_dom0_init,
593
.add_device = amd_iommu_add_device,
594
.remove_device = amd_iommu_remove_device,
595
.assign_device = amd_iommu_assign_device,
596
.teardown = amd_iommu_domain_destroy,
597
.map_page = amd_iommu_map_page,
598
.unmap_page = amd_iommu_unmap_page,
599
.reassign_device = amd_iommu_return_device,
600
.get_device_group_id = amd_iommu_group_id,
601
.update_ire_from_apic = amd_iommu_ioapic_update_ire,
602
.update_ire_from_msi = amd_iommu_msi_msg_update_ire,
603
.read_apic_from_ire = __io_apic_read,
604
.read_msi_from_ire = amd_iommu_read_msi_from_ire,
605
.suspend = amd_iommu_suspend,
606
.resume = amd_iommu_resume,
607
.share_p2m = amd_iommu_share_p2m,
608
.crash_shutdown = amd_iommu_suspend,
609
.dump_p2m_table = amd_dump_p2m_table,