4
4
* DRIVER TABLE MANAGER + GRU CONTEXT LOAD/UNLOAD
6
* This file is subject to the terms and conditions of the GNU General Public
7
* License. See the file "COPYING" in the main directory of this archive
10
* Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
6
* Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
8
* This program is free software; you can redistribute it and/or modify
9
* it under the terms of the GNU General Public License as published by
10
* the Free Software Foundation; either version 2 of the License, or
11
* (at your option) any later version.
13
* This program is distributed in the hope that it will be useful,
14
* but WITHOUT ANY WARRANTY; without even the implied warranty of
15
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
* GNU General Public License for more details.
18
* You should have received a copy of the GNU General Public License
19
* along with this program; if not, write to the Free Software
20
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
13
23
#include <linux/kernel.h>
76
86
/* Hit the asid limit. Start over */
77
87
static int gru_wrap_asid(struct gru_state *gru)
79
gru_dbg(grudev, "gru %p\n", gru);
89
gru_dbg(grudev, "gid %d\n", gru->gs_gid);
81
91
gru->gs_asid_gen++;
82
gru_flush_all_tlb(gru);
89
98
int i, gid, inuse_asid, limit;
91
gru_dbg(grudev, "gru %p, asid 0x%x\n", gru, asid);
100
gru_dbg(grudev, "gid %d, asid 0x%x\n", gru->gs_gid, asid);
94
103
if (asid >= limit)
95
104
asid = gru_wrap_asid(gru);
105
gru_flush_all_tlb(gru);
96
106
gid = gru->gs_gid;
98
108
for (i = 0; i < GRU_NUM_CCH; i++) {
109
if (!gru->gs_gts[i] || is_kernel_context(gru->gs_gts[i]))
101
111
inuse_asid = gru->gs_gts[i]->ts_gms->ms_asids[gid].mt_asid;
102
gru_dbg(grudev, "gru %p, inuse_asid 0x%x, cxtnum %d, gts %p\n",
103
gru, inuse_asid, i, gru->gs_gts[i]);
112
gru_dbg(grudev, "gid %d, gts %p, gms %p, inuse 0x%x, cxt %d\n",
113
gru->gs_gid, gru->gs_gts[i], gru->gs_gts[i]->ts_gms,
104
115
if (inuse_asid == asid) {
105
116
asid += ASID_INC;
106
117
if (asid >= limit) {
121
132
gru->gs_asid_limit = limit;
122
133
gru->gs_asid = asid;
123
gru_dbg(grudev, "gru %p, new asid 0x%x, new_limit 0x%x\n", gru, asid,
134
gru_dbg(grudev, "gid %d, new asid 0x%x, new_limit 0x%x\n", gru->gs_gid,
133
spin_lock(&gru->gs_asid_lock);
134
144
gru->gs_asid += ASID_INC;
135
145
asid = gru->gs_asid;
136
146
if (asid >= gru->gs_asid_limit)
137
147
asid = gru_reset_asid_limit(gru, asid);
138
spin_unlock(&gru->gs_asid_lock);
140
gru_dbg(grudev, "gru %p, asid 0x%x\n", gru, asid);
149
gru_dbg(grudev, "gid %d, asid 0x%x\n", gru->gs_gid, asid);
215
224
* TLB manangment requires tracking all GRU chiplets that have loaded a GSEG
218
static int gru_load_mm_tracker(struct gru_state *gru, struct gru_mm_struct *gms,
227
static int gru_load_mm_tracker(struct gru_state *gru,
228
struct gru_thread_state *gts)
230
struct gru_mm_struct *gms = gts->ts_gms;
221
231
struct gru_mm_tracker *asids = &gms->ms_asids[gru->gs_gid];
222
unsigned short ctxbitmap = (1 << ctxnum);
232
unsigned short ctxbitmap = (1 << gts->ts_ctxnum);
225
235
spin_lock(&gms->ms_asid_lock);
226
236
asid = asids->mt_asid;
228
if (asid == 0 || asids->mt_asid_gen != gru->gs_asid_gen) {
238
spin_lock(&gru->gs_asid_lock);
239
if (asid == 0 || (asids->mt_ctxbitmap == 0 && asids->mt_asid_gen !=
229
241
asid = gru_assign_asid(gru);
230
242
asids->mt_asid = asid;
231
243
asids->mt_asid_gen = gru->gs_asid_gen;
241
254
spin_unlock(&gms->ms_asid_lock);
244
"gru %x, gms %p, ctxnum 0x%d, asid 0x%x, asidmap 0x%lx\n",
245
gru->gs_gid, gms, ctxnum, asid, gms->ms_asidmap[0]);
257
"gid %d, gts %p, gms %p, ctxnum %d, asid 0x%x, asidmap 0x%lx\n",
258
gru->gs_gid, gts, gms, gts->ts_ctxnum, asid,
249
263
static void gru_unload_mm_tracker(struct gru_state *gru,
250
struct gru_mm_struct *gms, int ctxnum)
264
struct gru_thread_state *gts)
266
struct gru_mm_struct *gms = gts->ts_gms;
252
267
struct gru_mm_tracker *asids;
253
268
unsigned short ctxbitmap;
255
270
asids = &gms->ms_asids[gru->gs_gid];
256
ctxbitmap = (1 << ctxnum);
271
ctxbitmap = (1 << gts->ts_ctxnum);
257
272
spin_lock(&gms->ms_asid_lock);
273
spin_lock(&gru->gs_asid_lock);
258
274
BUG_ON((asids->mt_ctxbitmap & ctxbitmap) != ctxbitmap);
259
275
asids->mt_ctxbitmap ^= ctxbitmap;
260
gru_dbg(grudev, "gru %x, gms %p, ctxnum 0x%d, asidmap 0x%lx\n",
261
gru->gs_gid, gms, ctxnum, gms->ms_asidmap[0]);
276
gru_dbg(grudev, "gid %d, gts %p, gms %p, ctxnum 0x%d, asidmap 0x%lx\n",
277
gru->gs_gid, gts, gms, gts->ts_ctxnum, gms->ms_asidmap[0]);
278
spin_unlock(&gru->gs_asid_lock);
262
279
spin_unlock(&gms->ms_asid_lock);
293
310
* Allocate a thread state structure.
295
static struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma,
296
struct gru_vma_data *vdata,
312
struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma,
313
int cbr_au_count, int dsr_au_count, int options, int tsid)
299
315
struct gru_thread_state *gts;
302
bytes = DSR_BYTES(vdata->vd_dsr_au_count) +
303
CBR_BYTES(vdata->vd_cbr_au_count);
318
bytes = DSR_BYTES(dsr_au_count) + CBR_BYTES(cbr_au_count);
304
319
bytes += sizeof(struct gru_thread_state);
305
gts = kzalloc(bytes, GFP_KERNEL);
320
gts = kmalloc(bytes, GFP_KERNEL);
325
memset(gts, 0, sizeof(struct gru_thread_state)); /* zero out header */
310
326
atomic_set(>s->ts_refcnt, 1);
311
327
mutex_init(>s->ts_ctxlock);
312
gts->ts_cbr_au_count = vdata->vd_cbr_au_count;
313
gts->ts_dsr_au_count = vdata->vd_dsr_au_count;
314
gts->ts_user_options = vdata->vd_user_options;
328
gts->ts_cbr_au_count = cbr_au_count;
329
gts->ts_dsr_au_count = dsr_au_count;
330
gts->ts_user_options = options;
315
331
gts->ts_tsid = tsid;
316
gts->ts_user_options = vdata->vd_user_options;
317
332
gts->ts_ctxnum = NULLCTX;
318
gts->ts_mm = current->mm;
320
333
gts->ts_tlb_int_select = -1;
321
gts->ts_gms = gru_register_mmu_notifier();
334
gts->ts_cch_req_slice = -1;
335
gts->ts_sizeavail = GRU_SIZEAVAIL(PAGE_SHIFT);
337
gts->ts_mm = current->mm;
339
gts->ts_gms = gru_register_mmu_notifier();
325
gru_dbg(grudev, "alloc vdata %p, new gts %p\n", vdata, gts);
344
gru_dbg(grudev, "alloc gts %p\n", gts);
373
392
struct gru_vma_data *vdata = vma->vm_private_data;
374
393
struct gru_thread_state *gts, *ngts;
376
gts = gru_alloc_gts(vma, vdata, tsid);
395
gts = gru_alloc_gts(vma, vdata->vd_cbr_au_count, vdata->vd_dsr_au_count,
396
vdata->vd_user_options, tsid);
399
419
struct gru_state *gru;
401
421
gru = gts->ts_gru;
402
gru_dbg(grudev, "gts %p, gru %p\n", gts, gru);
422
gru_dbg(grudev, "gts %p, gid %d\n", gts, gru->gs_gid);
404
424
spin_lock(&gru->gs_lock);
405
425
gru->gs_gts[gts->ts_ctxnum] = NULL;
432
453
return GRU_HANDLE_BYTES;
435
static void gru_prefetch_context(void *gseg, void *cb, void *cbe, unsigned long cbrmap,
436
unsigned long length)
456
static void gru_prefetch_context(void *gseg, void *cb, void *cbe,
457
unsigned long cbrmap, unsigned long length)
451
472
static void gru_load_context_data(void *save, void *grubase, int ctxnum,
452
unsigned long cbrmap, unsigned long dsrmap)
473
unsigned long cbrmap, unsigned long dsrmap,
454
476
void *gseg, *cb, *cbe;
455
477
unsigned long length;
462
484
gru_prefetch_context(gseg, cb, cbe, cbrmap, length);
464
486
for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
465
save += gru_copy_handle(cb, save);
466
save += gru_copy_handle(cbe + i * GRU_HANDLE_STRIDE, save);
488
save += gru_copy_handle(cb, save);
489
save += gru_copy_handle(cbe + i * GRU_HANDLE_STRIDE,
492
memset(cb, 0, GRU_CACHE_LINE_BYTES);
493
memset(cbe + i * GRU_HANDLE_STRIDE, 0,
494
GRU_CACHE_LINE_BYTES);
467
496
cb += GRU_HANDLE_STRIDE;
470
memcpy(gseg + GRU_DS_BASE, save, length);
500
memcpy(gseg + GRU_DS_BASE, save, length);
502
memset(gseg + GRU_DS_BASE, 0, length);
473
505
static void gru_unload_context_data(void *save, void *grubase, int ctxnum,
497
529
struct gru_context_configuration_handle *cch;
498
530
int ctxnum = gts->ts_ctxnum;
500
zap_vma_ptes(gts->ts_vma, UGRUADDR(gts), GRU_GSEG_PAGESIZE);
532
if (!is_kernel_context(gts))
533
zap_vma_ptes(gts->ts_vma, UGRUADDR(gts), GRU_GSEG_PAGESIZE);
501
534
cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
536
gru_dbg(grudev, "gts %p\n", gts);
503
537
lock_cch_handle(cch);
504
538
if (cch_interrupt_sync(cch))
506
gru_dbg(grudev, "gts %p\n", gts);
508
gru_unload_mm_tracker(gru, gts->ts_gms, gts->ts_ctxnum);
541
if (!is_kernel_context(gts))
542
gru_unload_mm_tracker(gru, gts);
510
544
gru_unload_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr,
511
545
ctxnum, gts->ts_cbr_map,
512
546
gts->ts_dsr_map);
547
gts->ts_data_valid = 1;
514
550
if (cch_deallocate(cch))
517
553
unlock_cch_handle(cch);
519
555
gru_free_gru_context(gts);
520
STAT(unload_context);
524
559
* Load a GRU context by copying it from the thread data structure in memory
527
static void gru_load_context(struct gru_thread_state *gts)
562
void gru_load_context(struct gru_thread_state *gts)
529
564
struct gru_state *gru = gts->ts_gru;
530
565
struct gru_context_configuration_handle *cch;
531
int err, asid, ctxnum = gts->ts_ctxnum;
566
int i, err, asid, ctxnum = gts->ts_ctxnum;
533
568
gru_dbg(grudev, "gts %p\n", gts);
534
569
cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
536
571
lock_cch_handle(cch);
537
asid = gru_load_mm_tracker(gru, gts->ts_gms, gts->ts_ctxnum);
538
572
cch->tfm_fault_bit_enable =
539
573
(gts->ts_user_options == GRU_OPT_MISS_FMM_POLL
540
574
|| gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
543
577
gts->ts_tlb_int_select = gru_cpu_fault_map_id();
544
578
cch->tlb_int_select = gts->ts_tlb_int_select;
580
if (gts->ts_cch_req_slice >= 0) {
581
cch->req_slice_set_enable = 1;
582
cch->req_slice = gts->ts_cch_req_slice;
584
cch->req_slice_set_enable =0;
546
586
cch->tfm_done_bit_enable = 0;
547
err = cch_allocate(cch, asid, gts->ts_cbr_map, gts->ts_dsr_map);
587
cch->dsr_allocation_map = gts->ts_dsr_map;
588
cch->cbr_allocation_map = gts->ts_cbr_map;
590
if (is_kernel_context(gts)) {
591
cch->unmap_enable = 1;
592
cch->tfm_done_bit_enable = 1;
593
cch->cb_int_enable = 1;
595
cch->unmap_enable = 0;
596
cch->tfm_done_bit_enable = 0;
597
cch->cb_int_enable = 0;
598
asid = gru_load_mm_tracker(gru, gts);
599
for (i = 0; i < 8; i++) {
600
cch->asid[i] = asid + i;
601
cch->sizeavail[i] = gts->ts_sizeavail;
605
err = cch_allocate(cch);
550
608
"err %d: cch %p, gts %p, cbr 0x%lx, dsr 0x%lx\n",
555
613
gru_load_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr, ctxnum,
556
gts->ts_cbr_map, gts->ts_dsr_map);
614
gts->ts_cbr_map, gts->ts_dsr_map, gts->ts_data_valid);
558
616
if (cch_start(cch))
560
618
unlock_cch_handle(cch);
566
622
* Update fields in an active CCH:
567
623
* - retarget interrupts on local blade
624
* - update sizeavail mask
568
625
* - force a delayed context unload by clearing the CCH asids. This
569
626
* forces TLB misses for new GRU instructions. The context is unloaded
570
627
* when the next TLB miss occurs.
572
static int gru_update_cch(struct gru_thread_state *gts, int int_select)
629
int gru_update_cch(struct gru_thread_state *gts, int force_unload)
574
631
struct gru_context_configuration_handle *cch;
575
632
struct gru_state *gru = gts->ts_gru;
584
641
if (cch_interrupt(cch))
586
if (int_select >= 0) {
587
gts->ts_tlb_int_select = int_select;
588
cch->tlb_int_select = int_select;
644
for (i = 0; i < 8; i++)
645
cch->sizeavail[i] = gts->ts_sizeavail;
646
gts->ts_tlb_int_select = gru_cpu_fault_map_id();
647
cch->tlb_int_select = gru_cpu_fault_map_id();
648
cch->tfm_fault_bit_enable =
649
(gts->ts_user_options == GRU_OPT_MISS_FMM_POLL
650
|| gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
590
652
for (i = 0; i < 8; i++)
591
653
cch->asid[i] = 0;
618
680
gru_dbg(grudev, "retarget from %d to %d\n", gts->ts_tlb_int_select,
619
681
gru_cpu_fault_map_id());
620
return gru_update_cch(gts, gru_cpu_fault_map_id());
682
return gru_update_cch(gts, 0);
629
691
#define next_gru(b, g) (((g) < &(b)->bs_grus[GRU_CHIPLETS_PER_BLADE - 1]) ? \
630
692
((g)+1) : &(b)->bs_grus[0])
632
static void gru_steal_context(struct gru_thread_state *gts)
694
static int is_gts_stealable(struct gru_thread_state *gts,
695
struct gru_blade_state *bs)
697
if (is_kernel_context(gts))
698
return down_write_trylock(&bs->bs_kgts_sema);
700
return mutex_trylock(>s->ts_ctxlock);
703
static void gts_stolen(struct gru_thread_state *gts,
704
struct gru_blade_state *bs)
706
if (is_kernel_context(gts)) {
707
up_write(&bs->bs_kgts_sema);
708
STAT(steal_kernel_context);
710
mutex_unlock(>s->ts_ctxlock);
711
STAT(steal_user_context);
715
void gru_steal_context(struct gru_thread_state *gts, int blade_id)
634
717
struct gru_blade_state *blade;
635
718
struct gru_state *gru, *gru0;
639
722
cbr = gts->ts_cbr_au_count;
640
723
dsr = gts->ts_dsr_au_count;
643
blade = gru_base[uv_numa_blade_id()];
725
blade = gru_base[blade_id];
644
726
spin_lock(&blade->bs_lock);
646
728
ctxnum = next_ctxnum(blade->bs_lru_ctxnum);
677
759
blade->bs_lru_gru = gru;
678
760
blade->bs_lru_ctxnum = ctxnum;
679
761
spin_unlock(&blade->bs_lock);
764
gts->ustats.context_stolen++;
684
765
ngts->ts_steal_jiffies = jiffies;
685
gru_unload_context(ngts, 1);
686
mutex_unlock(&ngts->ts_ctxlock);
766
gru_unload_context(ngts, is_kernel_context(ngts) ? 0 : 1);
767
gts_stolen(ngts, blade);
688
769
STAT(steal_context_failed);
691
"stole gru %x, ctxnum %d from gts %p. Need cb %d, ds %d;"
772
"stole gid %d, ctxnum %d from gts %p. Need cb %d, ds %d;"
692
773
" avail cb %ld, ds %ld\n",
693
774
gru->gs_gid, ctxnum, ngts, cbr, dsr, hweight64(gru->gs_cbr_map),
694
775
hweight64(gru->gs_dsr_map));
698
779
* Scan the GRUs on the local blade & assign a GRU context.
700
static struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts)
781
struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts,
702
784
struct gru_state *gru, *grux;
703
785
int i, max_active_contexts;
709
790
max_active_contexts = GRU_NUM_CCH;
710
for_each_gru_on_blade(grux, uv_numa_blade_id(), i) {
791
for_each_gru_on_blade(grux, blade, i) {
711
792
if (check_gru_resources(grux, gts->ts_cbr_au_count,
712
793
gts->ts_dsr_au_count,
713
794
max_active_contexts)) {
738
820
STAT(assign_context);
740
"gseg %p, gts %p, gru %x, ctx %d, cbr %d, dsr %d\n",
822
"gseg %p, gts %p, gid %d, ctx %d, cbr %d, dsr %d\n",
741
823
gseg_virtual_address(gts->ts_gru, gts->ts_ctxnum), gts,
742
824
gts->ts_gru->gs_gid, gts->ts_ctxnum,
743
825
gts->ts_cbr_au_count, gts->ts_dsr_au_count);
762
843
struct gru_thread_state *gts;
763
844
unsigned long paddr, vaddr;
765
847
vaddr = (unsigned long)vmf->virtual_address;
766
848
gru_dbg(grudev, "vma %p, vaddr 0x%lx (0x%lx)\n",
773
855
return VM_FAULT_SIGBUS;
777
858
mutex_lock(>s->ts_ctxlock);
860
blade_id = uv_numa_blade_id();
778
862
if (gts->ts_gru) {
779
if (gts->ts_gru->gs_blade_id != uv_numa_blade_id()) {
863
if (gts->ts_gru->gs_blade_id != blade_id) {
780
864
STAT(migrated_nopfn_unload);
781
865
gru_unload_context(gts, 1);
788
872
if (!gts->ts_gru) {
789
if (!gru_assign_gru_context(gts)) {
873
STAT(load_user_context);
874
if (!gru_assign_gru_context(gts, blade_id)) {
790
876
mutex_unlock(>s->ts_ctxlock);
877
set_current_state(TASK_INTERRUPTIBLE);
792
878
schedule_timeout(GRU_ASSIGN_DELAY); /* true hack ZZZ */
879
blade_id = uv_numa_blade_id();
793
880
if (gts->ts_steal_jiffies + GRU_STEAL_DELAY < jiffies)
794
gru_steal_context(gts);
881
gru_steal_context(gts, blade_id);
797
884
gru_load_context(gts);