1
/**************************************************************************
2
* Copyright (c) 2007, Intel Corporation.
4
* This program is free software; you can redistribute it and/or modify it
5
* under the terms and conditions of the GNU General Public License,
6
* version 2, as published by the Free Software Foundation.
8
* This program is distributed in the hope it will be useful, but WITHOUT
9
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13
* You should have received a copy of the GNU General Public License along with
14
* this program; if not, write to the Free Software Foundation, Inc.,
15
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
17
**************************************************************************/
23
* Code for the SGX MMU:
27
* clflush on one processor only:
28
* clflush should apparently flush the cache line on all processors in an
34
* The usage of the slots must be completely encapsulated within a spinlock, and
35
* no other functions that may be using the locks for other purposed may be
36
* called from within the locked region.
37
* Since the slots are per processor, this will guarantee that we are the only
42
* TODO: Inserting ptes from an interrupt handler:
43
* This may be desirable for some SGX functionality where the GPU can fault in
44
* needed pages. For that, we need to make an atomic insert_pages function, that
46
* If it fails, the caller need to insert the page using a workqueue function,
47
* but on average it should be fast.
50
struct psb_mmu_driver {
51
/* protects driver- and pd structures. Always take in read mode
52
* before taking the page table spinlock.
54
struct rw_semaphore sem;
56
/* protects page tables, directory tables and pt tables.
61
atomic_t needs_tlbflush;
63
uint8_t __iomem *register_map;
64
struct psb_mmu_pd *default_pd;
65
/*uint32_t bif_ctrl;*/
68
unsigned long clflush_mask;
70
struct drm_psb_private *dev_priv;
76
struct psb_mmu_pd *pd;
84
struct psb_mmu_driver *driver;
86
struct psb_mmu_pt **tables;
88
struct page *dummy_pt;
89
struct page *dummy_page;
95
static inline uint32_t psb_mmu_pt_index(uint32_t offset)
97
return (offset >> PSB_PTE_SHIFT) & 0x3FF;
100
static inline uint32_t psb_mmu_pd_index(uint32_t offset)
102
return offset >> PSB_PDE_SHIFT;
105
static inline void psb_clflush(void *addr)
107
__asm__ __volatile__("clflush (%0)\n" : : "r"(addr) : "memory");
110
static inline void psb_mmu_clflush(struct psb_mmu_driver *driver,
113
if (!driver->has_clflush)
121
static void psb_page_clflush(struct psb_mmu_driver *driver, struct page* page)
123
uint32_t clflush_add = driver->clflush_add >> PAGE_SHIFT;
124
uint32_t clflush_count = PAGE_SIZE / clflush_add;
128
clf = kmap_atomic(page, KM_USER0);
130
for (i = 0; i < clflush_count; ++i) {
135
kunmap_atomic(clf, KM_USER0);
138
static void psb_pages_clflush(struct psb_mmu_driver *driver,
139
struct page *page[], unsigned long num_pages)
143
if (!driver->has_clflush)
146
for (i = 0; i < num_pages; i++)
147
psb_page_clflush(driver, *page++);
150
static void psb_mmu_flush_pd_locked(struct psb_mmu_driver *driver,
153
atomic_set(&driver->needs_tlbflush, 0);
156
static void psb_mmu_flush_pd(struct psb_mmu_driver *driver, int force)
158
down_write(&driver->sem);
159
psb_mmu_flush_pd_locked(driver, force);
160
up_write(&driver->sem);
163
void psb_mmu_flush(struct psb_mmu_driver *driver, int rc_prot)
166
down_write(&driver->sem);
168
up_write(&driver->sem);
171
void psb_mmu_set_pd_context(struct psb_mmu_pd *pd, int hw_context)
173
/*ttm_tt_cache_flush(&pd->p, 1);*/
174
psb_pages_clflush(pd->driver, &pd->p, 1);
175
down_write(&pd->driver->sem);
177
psb_mmu_flush_pd_locked(pd->driver, 1);
178
pd->hw_context = hw_context;
179
up_write(&pd->driver->sem);
183
static inline unsigned long psb_pd_addr_end(unsigned long addr,
187
addr = (addr + PSB_PDE_MASK + 1) & ~PSB_PDE_MASK;
188
return (addr < end) ? addr : end;
191
static inline uint32_t psb_mmu_mask_pte(uint32_t pfn, int type)
193
uint32_t mask = PSB_PTE_VALID;
195
if (type & PSB_MMU_CACHED_MEMORY)
196
mask |= PSB_PTE_CACHED;
197
if (type & PSB_MMU_RO_MEMORY)
199
if (type & PSB_MMU_WO_MEMORY)
202
return (pfn << PAGE_SHIFT) | mask;
205
struct psb_mmu_pd *psb_mmu_alloc_pd(struct psb_mmu_driver *driver,
206
int trap_pagefaults, int invalid_type)
208
struct psb_mmu_pd *pd = kmalloc(sizeof(*pd), GFP_KERNEL);
215
pd->p = alloc_page(GFP_DMA32);
218
pd->dummy_pt = alloc_page(GFP_DMA32);
221
pd->dummy_page = alloc_page(GFP_DMA32);
225
if (!trap_pagefaults) {
227
psb_mmu_mask_pte(page_to_pfn(pd->dummy_pt),
230
psb_mmu_mask_pte(page_to_pfn(pd->dummy_page),
237
v = kmap(pd->dummy_pt);
238
for (i = 0; i < (PAGE_SIZE / sizeof(uint32_t)); ++i)
239
v[i] = pd->invalid_pte;
241
kunmap(pd->dummy_pt);
244
for (i = 0; i < (PAGE_SIZE / sizeof(uint32_t)); ++i)
245
v[i] = pd->invalid_pde;
249
clear_page(kmap(pd->dummy_page));
250
kunmap(pd->dummy_page);
252
pd->tables = vmalloc_user(sizeof(struct psb_mmu_pt *) * 1024);
257
pd->pd_mask = PSB_PTE_VALID;
263
__free_page(pd->dummy_page);
265
__free_page(pd->dummy_pt);
273
void psb_mmu_free_pt(struct psb_mmu_pt *pt)
279
void psb_mmu_free_pagedir(struct psb_mmu_pd *pd)
281
struct psb_mmu_driver *driver = pd->driver;
282
struct psb_mmu_pt *pt;
285
down_write(&driver->sem);
286
if (pd->hw_context != -1)
287
psb_mmu_flush_pd_locked(driver, 1);
289
/* Should take the spinlock here, but we don't need to do that
290
since we have the semaphore in write mode. */
292
for (i = 0; i < 1024; ++i) {
299
__free_page(pd->dummy_page);
300
__free_page(pd->dummy_pt);
303
up_write(&driver->sem);
306
static struct psb_mmu_pt *psb_mmu_alloc_pt(struct psb_mmu_pd *pd)
308
struct psb_mmu_pt *pt = kmalloc(sizeof(*pt), GFP_KERNEL);
310
uint32_t clflush_add = pd->driver->clflush_add >> PAGE_SHIFT;
311
uint32_t clflush_count = PAGE_SIZE / clflush_add;
312
spinlock_t *lock = &pd->driver->lock;
320
pt->p = alloc_page(GFP_DMA32);
328
v = kmap_atomic(pt->p, KM_USER0);
330
ptes = (uint32_t *) v;
331
for (i = 0; i < (PAGE_SIZE / sizeof(uint32_t)); ++i)
332
*ptes++ = pd->invalid_pte;
335
if (pd->driver->has_clflush && pd->hw_context != -1) {
337
for (i = 0; i < clflush_count; ++i) {
344
kunmap_atomic(v, KM_USER0);
354
struct psb_mmu_pt *psb_mmu_pt_alloc_map_lock(struct psb_mmu_pd *pd,
357
uint32_t index = psb_mmu_pd_index(addr);
358
struct psb_mmu_pt *pt;
360
spinlock_t *lock = &pd->driver->lock;
363
pt = pd->tables[index];
366
pt = psb_mmu_alloc_pt(pd);
371
if (pd->tables[index]) {
375
pt = pd->tables[index];
379
v = kmap_atomic(pd->p, KM_USER0);
380
pd->tables[index] = pt;
381
v[index] = (page_to_pfn(pt->p) << 12) | pd->pd_mask;
383
kunmap_atomic((void *) v, KM_USER0);
385
if (pd->hw_context != -1) {
386
psb_mmu_clflush(pd->driver, (void *) &v[index]);
387
atomic_set(&pd->driver->needs_tlbflush, 1);
390
pt->v = kmap_atomic(pt->p, KM_USER0);
394
static struct psb_mmu_pt *psb_mmu_pt_map_lock(struct psb_mmu_pd *pd,
397
uint32_t index = psb_mmu_pd_index(addr);
398
struct psb_mmu_pt *pt;
399
spinlock_t *lock = &pd->driver->lock;
402
pt = pd->tables[index];
407
pt->v = kmap_atomic(pt->p, KM_USER0);
411
static void psb_mmu_pt_unmap_unlock(struct psb_mmu_pt *pt)
413
struct psb_mmu_pd *pd = pt->pd;
416
kunmap_atomic(pt->v, KM_USER0);
417
if (pt->count == 0) {
418
v = kmap_atomic(pd->p, KM_USER0);
419
v[pt->index] = pd->invalid_pde;
420
pd->tables[pt->index] = NULL;
422
if (pd->hw_context != -1) {
423
psb_mmu_clflush(pd->driver,
424
(void *) &v[pt->index]);
425
atomic_set(&pd->driver->needs_tlbflush, 1);
427
kunmap_atomic(pt->v, KM_USER0);
428
spin_unlock(&pd->driver->lock);
432
spin_unlock(&pd->driver->lock);
435
static inline void psb_mmu_set_pte(struct psb_mmu_pt *pt,
436
unsigned long addr, uint32_t pte)
438
pt->v[psb_mmu_pt_index(addr)] = pte;
441
static inline void psb_mmu_invalidate_pte(struct psb_mmu_pt *pt,
444
pt->v[psb_mmu_pt_index(addr)] = pt->pd->invalid_pte;
448
void psb_mmu_mirror_gtt(struct psb_mmu_pd *pd,
449
uint32_t mmu_offset, uint32_t gtt_start,
453
uint32_t start = psb_mmu_pd_index(mmu_offset);
454
struct psb_mmu_driver *driver = pd->driver;
455
int num_pages = gtt_pages;
457
down_read(&driver->sem);
458
spin_lock(&driver->lock);
460
v = kmap_atomic(pd->p, KM_USER0);
463
while (gtt_pages--) {
464
*v++ = gtt_start | pd->pd_mask;
465
gtt_start += PAGE_SIZE;
468
/*ttm_tt_cache_flush(&pd->p, num_pages);*/
469
psb_pages_clflush(pd->driver, &pd->p, num_pages);
470
kunmap_atomic(v, KM_USER0);
471
spin_unlock(&driver->lock);
473
if (pd->hw_context != -1)
474
atomic_set(&pd->driver->needs_tlbflush, 1);
476
up_read(&pd->driver->sem);
477
psb_mmu_flush_pd(pd->driver, 0);
480
struct psb_mmu_pd *psb_mmu_get_default_pd(struct psb_mmu_driver *driver)
482
struct psb_mmu_pd *pd;
484
/* down_read(&driver->sem); */
485
pd = driver->default_pd;
486
/* up_read(&driver->sem); */
491
/* Returns the physical address of the PD shared by sgx/msvdx */
492
uint32_t psb_get_default_pd_addr(struct psb_mmu_driver *driver)
494
struct psb_mmu_pd *pd;
496
pd = psb_mmu_get_default_pd(driver);
497
return page_to_pfn(pd->p) << PAGE_SHIFT;
500
void psb_mmu_driver_takedown(struct psb_mmu_driver *driver)
502
psb_mmu_free_pagedir(driver->default_pd);
506
struct psb_mmu_driver *psb_mmu_driver_init(uint8_t __iomem * registers,
509
struct drm_psb_private *dev_priv)
511
struct psb_mmu_driver *driver;
513
driver = kmalloc(sizeof(*driver), GFP_KERNEL);
517
driver->dev_priv = dev_priv;
519
driver->default_pd = psb_mmu_alloc_pd(driver, trap_pagefaults,
521
if (!driver->default_pd)
524
spin_lock_init(&driver->lock);
525
init_rwsem(&driver->sem);
526
down_write(&driver->sem);
527
driver->register_map = registers;
528
atomic_set(&driver->needs_tlbflush, 1);
530
driver->has_clflush = 0;
532
if (boot_cpu_has(X86_FEATURE_CLFLSH)) {
533
uint32_t tfms, misc, cap0, cap4, clflush_size;
536
* clflush size is determined at kernel setup for x86_64
537
* but not for i386. We have to do it here.
540
cpuid(0x00000001, &tfms, &misc, &cap0, &cap4);
541
clflush_size = ((misc >> 8) & 0xff) * 8;
542
driver->has_clflush = 1;
543
driver->clflush_add =
544
PAGE_SIZE * clflush_size / sizeof(uint32_t);
545
driver->clflush_mask = driver->clflush_add - 1;
546
driver->clflush_mask = ~driver->clflush_mask;
549
up_write(&driver->sem);
557
static void psb_mmu_flush_ptes(struct psb_mmu_pd *pd,
558
unsigned long address, uint32_t num_pages,
559
uint32_t desired_tile_stride,
560
uint32_t hw_tile_stride)
562
struct psb_mmu_pt *pt;
569
unsigned long row_add;
570
unsigned long clflush_add = pd->driver->clflush_add;
571
unsigned long clflush_mask = pd->driver->clflush_mask;
573
if (!pd->driver->has_clflush) {
574
/*ttm_tt_cache_flush(&pd->p, num_pages);*/
575
psb_pages_clflush(pd->driver, &pd->p, num_pages);
580
rows = num_pages / desired_tile_stride;
582
desired_tile_stride = num_pages;
584
add = desired_tile_stride << PAGE_SHIFT;
585
row_add = hw_tile_stride << PAGE_SHIFT;
587
for (i = 0; i < rows; ++i) {
593
next = psb_pd_addr_end(addr, end);
594
pt = psb_mmu_pt_map_lock(pd, addr);
599
[psb_mmu_pt_index(addr)]);
602
(addr & clflush_mask) < next);
604
psb_mmu_pt_unmap_unlock(pt);
605
} while (addr = next, next != end);
611
void psb_mmu_remove_pfn_sequence(struct psb_mmu_pd *pd,
612
unsigned long address, uint32_t num_pages)
614
struct psb_mmu_pt *pt;
618
unsigned long f_address = address;
620
down_read(&pd->driver->sem);
623
end = addr + (num_pages << PAGE_SHIFT);
626
next = psb_pd_addr_end(addr, end);
627
pt = psb_mmu_pt_alloc_map_lock(pd, addr);
631
psb_mmu_invalidate_pte(pt, addr);
633
} while (addr += PAGE_SIZE, addr < next);
634
psb_mmu_pt_unmap_unlock(pt);
636
} while (addr = next, next != end);
639
if (pd->hw_context != -1)
640
psb_mmu_flush_ptes(pd, f_address, num_pages, 1, 1);
642
up_read(&pd->driver->sem);
644
if (pd->hw_context != -1)
645
psb_mmu_flush(pd->driver, 0);
650
void psb_mmu_remove_pages(struct psb_mmu_pd *pd, unsigned long address,
651
uint32_t num_pages, uint32_t desired_tile_stride,
652
uint32_t hw_tile_stride)
654
struct psb_mmu_pt *pt;
661
unsigned long row_add;
662
unsigned long f_address = address;
665
rows = num_pages / desired_tile_stride;
667
desired_tile_stride = num_pages;
669
add = desired_tile_stride << PAGE_SHIFT;
670
row_add = hw_tile_stride << PAGE_SHIFT;
672
/* down_read(&pd->driver->sem); */
674
/* Make sure we only need to flush this processor's cache */
676
for (i = 0; i < rows; ++i) {
682
next = psb_pd_addr_end(addr, end);
683
pt = psb_mmu_pt_map_lock(pd, addr);
687
psb_mmu_invalidate_pte(pt, addr);
690
} while (addr += PAGE_SIZE, addr < next);
691
psb_mmu_pt_unmap_unlock(pt);
693
} while (addr = next, next != end);
696
if (pd->hw_context != -1)
697
psb_mmu_flush_ptes(pd, f_address, num_pages,
698
desired_tile_stride, hw_tile_stride);
700
/* up_read(&pd->driver->sem); */
702
if (pd->hw_context != -1)
703
psb_mmu_flush(pd->driver, 0);
706
int psb_mmu_insert_pfn_sequence(struct psb_mmu_pd *pd, uint32_t start_pfn,
707
unsigned long address, uint32_t num_pages,
710
struct psb_mmu_pt *pt;
715
unsigned long f_address = address;
718
down_read(&pd->driver->sem);
721
end = addr + (num_pages << PAGE_SHIFT);
724
next = psb_pd_addr_end(addr, end);
725
pt = psb_mmu_pt_alloc_map_lock(pd, addr);
731
pte = psb_mmu_mask_pte(start_pfn++, type);
732
psb_mmu_set_pte(pt, addr, pte);
734
} while (addr += PAGE_SIZE, addr < next);
735
psb_mmu_pt_unmap_unlock(pt);
737
} while (addr = next, next != end);
740
if (pd->hw_context != -1)
741
psb_mmu_flush_ptes(pd, f_address, num_pages, 1, 1);
743
up_read(&pd->driver->sem);
745
if (pd->hw_context != -1)
746
psb_mmu_flush(pd->driver, 1);
751
int psb_mmu_insert_pages(struct psb_mmu_pd *pd, struct page **pages,
752
unsigned long address, uint32_t num_pages,
753
uint32_t desired_tile_stride,
754
uint32_t hw_tile_stride, int type)
756
struct psb_mmu_pt *pt;
764
unsigned long row_add;
765
unsigned long f_address = address;
768
if (hw_tile_stride) {
769
if (num_pages % desired_tile_stride != 0)
771
rows = num_pages / desired_tile_stride;
773
desired_tile_stride = num_pages;
776
add = desired_tile_stride << PAGE_SHIFT;
777
row_add = hw_tile_stride << PAGE_SHIFT;
779
down_read(&pd->driver->sem);
781
for (i = 0; i < rows; ++i) {
787
next = psb_pd_addr_end(addr, end);
788
pt = psb_mmu_pt_alloc_map_lock(pd, addr);
795
psb_mmu_mask_pte(page_to_pfn(*pages++),
797
psb_mmu_set_pte(pt, addr, pte);
799
} while (addr += PAGE_SIZE, addr < next);
800
psb_mmu_pt_unmap_unlock(pt);
802
} while (addr = next, next != end);
807
if (pd->hw_context != -1)
808
psb_mmu_flush_ptes(pd, f_address, num_pages,
809
desired_tile_stride, hw_tile_stride);
811
up_read(&pd->driver->sem);
813
if (pd->hw_context != -1)
814
psb_mmu_flush(pd->driver, 1);
819
int psb_mmu_virtual_to_pfn(struct psb_mmu_pd *pd, uint32_t virtual,
823
struct psb_mmu_pt *pt;
825
spinlock_t *lock = &pd->driver->lock;
827
down_read(&pd->driver->sem);
828
pt = psb_mmu_pt_map_lock(pd, virtual);
833
v = kmap_atomic(pd->p, KM_USER0);
834
tmp = v[psb_mmu_pd_index(virtual)];
835
kunmap_atomic(v, KM_USER0);
838
if (tmp != pd->invalid_pde || !(tmp & PSB_PTE_VALID) ||
839
!(pd->invalid_pte & PSB_PTE_VALID)) {
844
*pfn = pd->invalid_pte >> PAGE_SHIFT;
847
tmp = pt->v[psb_mmu_pt_index(virtual)];
848
if (!(tmp & PSB_PTE_VALID)) {
852
*pfn = tmp >> PAGE_SHIFT;
854
psb_mmu_pt_unmap_unlock(pt);
856
up_read(&pd->driver->sem);