~ubuntu-branches/ubuntu/quantal/linux-linaro-mx51/quantal

« back to all changes in this revision

Viewing changes to arch/x86/platform/uv/tlb_uv.c

  • Committer: Package Import Robot
  • Author(s): John Rigby, John Rigby
  • Date: 2011-09-26 10:44:23 UTC
  • Revision ID: package-import@ubuntu.com-20110926104423-3o58a3c1bj7x00rs
Tags: 3.0.0-1007.9
[ John Rigby ]

Enable crypto modules and remove crypto-modules from
exclude-module files
LP: #826021

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
1
/*
2
2
 *      SGI UltraViolet TLB flush routines.
3
3
 *
4
 
 *      (c) 2008-2010 Cliff Wickman <cpw@sgi.com>, SGI.
 
4
 *      (c) 2008-2011 Cliff Wickman <cpw@sgi.com>, SGI.
5
5
 *
6
6
 *      This code is released under the GNU General Public License version 2 or
7
7
 *      later.
35
35
                5242880,
36
36
                167772160
37
37
};
 
38
 
38
39
static int timeout_us;
39
40
static int nobau;
40
41
static int baudisabled;
42
43
static cycles_t congested_cycles;
43
44
 
44
45
/* tunables: */
45
 
static int max_bau_concurrent = MAX_BAU_CONCURRENT;
46
 
static int max_bau_concurrent_constant = MAX_BAU_CONCURRENT;
47
 
static int plugged_delay = PLUGGED_DELAY;
48
 
static int plugsb4reset = PLUGSB4RESET;
49
 
static int timeoutsb4reset = TIMEOUTSB4RESET;
50
 
static int ipi_reset_limit = IPI_RESET_LIMIT;
51
 
static int complete_threshold = COMPLETE_THRESHOLD;
52
 
static int congested_response_us = CONGESTED_RESPONSE_US;
53
 
static int congested_reps = CONGESTED_REPS;
54
 
static int congested_period = CONGESTED_PERIOD;
 
46
static int max_concurr          = MAX_BAU_CONCURRENT;
 
47
static int max_concurr_const    = MAX_BAU_CONCURRENT;
 
48
static int plugged_delay        = PLUGGED_DELAY;
 
49
static int plugsb4reset         = PLUGSB4RESET;
 
50
static int timeoutsb4reset      = TIMEOUTSB4RESET;
 
51
static int ipi_reset_limit      = IPI_RESET_LIMIT;
 
52
static int complete_threshold   = COMPLETE_THRESHOLD;
 
53
static int congested_respns_us  = CONGESTED_RESPONSE_US;
 
54
static int congested_reps       = CONGESTED_REPS;
 
55
static int congested_period     = CONGESTED_PERIOD;
 
56
 
 
57
static struct tunables tunables[] = {
 
58
        {&max_concurr, MAX_BAU_CONCURRENT}, /* must be [0] */
 
59
        {&plugged_delay, PLUGGED_DELAY},
 
60
        {&plugsb4reset, PLUGSB4RESET},
 
61
        {&timeoutsb4reset, TIMEOUTSB4RESET},
 
62
        {&ipi_reset_limit, IPI_RESET_LIMIT},
 
63
        {&complete_threshold, COMPLETE_THRESHOLD},
 
64
        {&congested_respns_us, CONGESTED_RESPONSE_US},
 
65
        {&congested_reps, CONGESTED_REPS},
 
66
        {&congested_period, CONGESTED_PERIOD}
 
67
};
 
68
 
55
69
static struct dentry *tunables_dir;
56
70
static struct dentry *tunables_file;
57
71
 
58
 
static int __init setup_nobau(char *arg)
 
72
/* these correspond to the statistics printed by ptc_seq_show() */
 
73
static char *stat_description[] = {
 
74
        "sent:     number of shootdown messages sent",
 
75
        "stime:    time spent sending messages",
 
76
        "numuvhubs: number of hubs targeted with shootdown",
 
77
        "numuvhubs16: number times 16 or more hubs targeted",
 
78
        "numuvhubs8: number times 8 or more hubs targeted",
 
79
        "numuvhubs4: number times 4 or more hubs targeted",
 
80
        "numuvhubs2: number times 2 or more hubs targeted",
 
81
        "numuvhubs1: number times 1 hub targeted",
 
82
        "numcpus:  number of cpus targeted with shootdown",
 
83
        "dto:      number of destination timeouts",
 
84
        "retries:  destination timeout retries sent",
 
85
        "rok:   :  destination timeouts successfully retried",
 
86
        "resetp:   ipi-style resource resets for plugs",
 
87
        "resett:   ipi-style resource resets for timeouts",
 
88
        "giveup:   fall-backs to ipi-style shootdowns",
 
89
        "sto:      number of source timeouts",
 
90
        "bz:       number of stay-busy's",
 
91
        "throt:    number times spun in throttle",
 
92
        "swack:   image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE",
 
93
        "recv:     shootdown messages received",
 
94
        "rtime:    time spent processing messages",
 
95
        "all:      shootdown all-tlb messages",
 
96
        "one:      shootdown one-tlb messages",
 
97
        "mult:     interrupts that found multiple messages",
 
98
        "none:     interrupts that found no messages",
 
99
        "retry:    number of retry messages processed",
 
100
        "canc:     number messages canceled by retries",
 
101
        "nocan:    number retries that found nothing to cancel",
 
102
        "reset:    number of ipi-style reset requests processed",
 
103
        "rcan:     number messages canceled by reset requests",
 
104
        "disable:  number times use of the BAU was disabled",
 
105
        "enable:   number times use of the BAU was re-enabled"
 
106
};
 
107
 
 
108
static int __init
 
109
setup_nobau(char *arg)
59
110
{
60
111
        nobau = 1;
61
112
        return 0;
63
114
early_param("nobau", setup_nobau);
64
115
 
65
116
/* base pnode in this partition */
66
 
static int uv_partition_base_pnode __read_mostly;
 
117
static int uv_base_pnode __read_mostly;
67
118
/* position of pnode (which is nasid>>1): */
68
119
static int uv_nshift __read_mostly;
69
120
static unsigned long uv_mmask __read_mostly;
109
160
 * clear of the Timeout bit (as well) will free the resource. No reply will
110
161
 * be sent (the hardware will only do one reply per message).
111
162
 */
112
 
static inline void uv_reply_to_message(struct msg_desc *mdp,
113
 
                                       struct bau_control *bcp)
 
163
static void reply_to_message(struct msg_desc *mdp, struct bau_control *bcp)
114
164
{
115
165
        unsigned long dw;
116
 
        struct bau_payload_queue_entry *msg;
 
166
        struct bau_pq_entry *msg;
117
167
 
118
168
        msg = mdp->msg;
119
169
        if (!msg->canceled) {
120
 
                dw = (msg->sw_ack_vector << UV_SW_ACK_NPENDING) |
121
 
                                                msg->sw_ack_vector;
122
 
                uv_write_local_mmr(
123
 
                                UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, dw);
 
170
                dw = (msg->swack_vec << UV_SW_ACK_NPENDING) | msg->swack_vec;
 
171
                write_mmr_sw_ack(dw);
124
172
        }
125
173
        msg->replied_to = 1;
126
 
        msg->sw_ack_vector = 0;
 
174
        msg->swack_vec = 0;
127
175
}
128
176
 
129
177
/*
130
178
 * Process the receipt of a RETRY message
131
179
 */
132
 
static inline void uv_bau_process_retry_msg(struct msg_desc *mdp,
133
 
                                            struct bau_control *bcp)
 
180
static void bau_process_retry_msg(struct msg_desc *mdp,
 
181
                                        struct bau_control *bcp)
134
182
{
135
183
        int i;
136
184
        int cancel_count = 0;
137
 
        int slot2;
138
185
        unsigned long msg_res;
139
186
        unsigned long mmr = 0;
140
 
        struct bau_payload_queue_entry *msg;
141
 
        struct bau_payload_queue_entry *msg2;
142
 
        struct ptc_stats *stat;
 
187
        struct bau_pq_entry *msg = mdp->msg;
 
188
        struct bau_pq_entry *msg2;
 
189
        struct ptc_stats *stat = bcp->statp;
143
190
 
144
 
        msg = mdp->msg;
145
 
        stat = bcp->statp;
146
191
        stat->d_retries++;
147
192
        /*
148
193
         * cancel any message from msg+1 to the retry itself
149
194
         */
150
195
        for (msg2 = msg+1, i = 0; i < DEST_Q_SIZE; msg2++, i++) {
151
 
                if (msg2 > mdp->va_queue_last)
152
 
                        msg2 = mdp->va_queue_first;
 
196
                if (msg2 > mdp->queue_last)
 
197
                        msg2 = mdp->queue_first;
153
198
                if (msg2 == msg)
154
199
                        break;
155
200
 
156
 
                /* same conditions for cancellation as uv_do_reset */
 
201
                /* same conditions for cancellation as do_reset */
157
202
                if ((msg2->replied_to == 0) && (msg2->canceled == 0) &&
158
 
                    (msg2->sw_ack_vector) && ((msg2->sw_ack_vector &
159
 
                        msg->sw_ack_vector) == 0) &&
 
203
                    (msg2->swack_vec) && ((msg2->swack_vec &
 
204
                        msg->swack_vec) == 0) &&
160
205
                    (msg2->sending_cpu == msg->sending_cpu) &&
161
206
                    (msg2->msg_type != MSG_NOOP)) {
162
 
                        slot2 = msg2 - mdp->va_queue_first;
163
 
                        mmr = uv_read_local_mmr
164
 
                                (UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE);
165
 
                        msg_res = msg2->sw_ack_vector;
 
207
                        mmr = read_mmr_sw_ack();
 
208
                        msg_res = msg2->swack_vec;
166
209
                        /*
167
210
                         * This is a message retry; clear the resources held
168
211
                         * by the previous message only if they timed out.
170
213
                         * situation to report.
171
214
                         */
172
215
                        if (mmr & (msg_res << UV_SW_ACK_NPENDING)) {
 
216
                                unsigned long mr;
173
217
                                /*
174
218
                                 * is the resource timed out?
175
219
                                 * make everyone ignore the cancelled message.
177
221
                                msg2->canceled = 1;
178
222
                                stat->d_canceled++;
179
223
                                cancel_count++;
180
 
                                uv_write_local_mmr(
181
 
                                    UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS,
182
 
                                        (msg_res << UV_SW_ACK_NPENDING) |
183
 
                                         msg_res);
 
224
                                mr = (msg_res << UV_SW_ACK_NPENDING) | msg_res;
 
225
                                write_mmr_sw_ack(mr);
184
226
                        }
185
227
                }
186
228
        }
192
234
 * Do all the things a cpu should do for a TLB shootdown message.
193
235
 * Other cpu's may come here at the same time for this message.
194
236
 */
195
 
static void uv_bau_process_message(struct msg_desc *mdp,
196
 
                                   struct bau_control *bcp)
 
237
static void bau_process_message(struct msg_desc *mdp,
 
238
                                        struct bau_control *bcp)
197
239
{
198
 
        int msg_ack_count;
199
240
        short socket_ack_count = 0;
200
 
        struct ptc_stats *stat;
201
 
        struct bau_payload_queue_entry *msg;
 
241
        short *sp;
 
242
        struct atomic_short *asp;
 
243
        struct ptc_stats *stat = bcp->statp;
 
244
        struct bau_pq_entry *msg = mdp->msg;
202
245
        struct bau_control *smaster = bcp->socket_master;
203
246
 
204
247
        /*
205
248
         * This must be a normal message, or retry of a normal message
206
249
         */
207
 
        msg = mdp->msg;
208
 
        stat = bcp->statp;
209
250
        if (msg->address == TLB_FLUSH_ALL) {
210
251
                local_flush_tlb();
211
252
                stat->d_alltlb++;
222
263
         * cpu number.
223
264
         */
224
265
        if (msg->msg_type == MSG_RETRY && bcp == bcp->uvhub_master)
225
 
                uv_bau_process_retry_msg(mdp, bcp);
 
266
                bau_process_retry_msg(mdp, bcp);
226
267
 
227
268
        /*
228
 
         * This is a sw_ack message, so we have to reply to it.
 
269
         * This is a swack message, so we have to reply to it.
229
270
         * Count each responding cpu on the socket. This avoids
230
271
         * pinging the count's cache line back and forth between
231
272
         * the sockets.
232
273
         */
233
 
        socket_ack_count = atomic_add_short_return(1, (struct atomic_short *)
234
 
                        &smaster->socket_acknowledge_count[mdp->msg_slot]);
 
274
        sp = &smaster->socket_acknowledge_count[mdp->msg_slot];
 
275
        asp = (struct atomic_short *)sp;
 
276
        socket_ack_count = atom_asr(1, asp);
235
277
        if (socket_ack_count == bcp->cpus_in_socket) {
 
278
                int msg_ack_count;
236
279
                /*
237
280
                 * Both sockets dump their completed count total into
238
281
                 * the message's count.
239
282
                 */
240
283
                smaster->socket_acknowledge_count[mdp->msg_slot] = 0;
241
 
                msg_ack_count = atomic_add_short_return(socket_ack_count,
242
 
                                (struct atomic_short *)&msg->acknowledge_count);
 
284
                asp = (struct atomic_short *)&msg->acknowledge_count;
 
285
                msg_ack_count = atom_asr(socket_ack_count, asp);
243
286
 
244
287
                if (msg_ack_count == bcp->cpus_in_uvhub) {
245
288
                        /*
246
289
                         * All cpus in uvhub saw it; reply
247
290
                         */
248
 
                        uv_reply_to_message(mdp, bcp);
 
291
                        reply_to_message(mdp, bcp);
249
292
                }
250
293
        }
251
294
 
268
311
 * Last resort when we get a large number of destination timeouts is
269
312
 * to clear resources held by a given cpu.
270
313
 * Do this with IPI so that all messages in the BAU message queue
271
 
 * can be identified by their nonzero sw_ack_vector field.
 
314
 * can be identified by their nonzero swack_vec field.
272
315
 *
273
316
 * This is entered for a single cpu on the uvhub.
274
317
 * The sender want's this uvhub to free a specific message's
275
 
 * sw_ack resources.
 
318
 * swack resources.
276
319
 */
277
 
static void
278
 
uv_do_reset(void *ptr)
 
320
static void do_reset(void *ptr)
279
321
{
280
322
        int i;
281
 
        int slot;
282
 
        int count = 0;
283
 
        unsigned long mmr;
284
 
        unsigned long msg_res;
285
 
        struct bau_control *bcp;
286
 
        struct reset_args *rap;
287
 
        struct bau_payload_queue_entry *msg;
288
 
        struct ptc_stats *stat;
 
323
        struct bau_control *bcp = &per_cpu(bau_control, smp_processor_id());
 
324
        struct reset_args *rap = (struct reset_args *)ptr;
 
325
        struct bau_pq_entry *msg;
 
326
        struct ptc_stats *stat = bcp->statp;
289
327
 
290
 
        bcp = &per_cpu(bau_control, smp_processor_id());
291
 
        rap = (struct reset_args *)ptr;
292
 
        stat = bcp->statp;
293
328
        stat->d_resets++;
294
 
 
295
329
        /*
296
330
         * We're looking for the given sender, and
297
 
         * will free its sw_ack resource.
 
331
         * will free its swack resource.
298
332
         * If all cpu's finally responded after the timeout, its
299
333
         * message 'replied_to' was set.
300
334
         */
301
 
        for (msg = bcp->va_queue_first, i = 0; i < DEST_Q_SIZE; msg++, i++) {
302
 
                /* uv_do_reset: same conditions for cancellation as
303
 
                   uv_bau_process_retry_msg() */
 
335
        for (msg = bcp->queue_first, i = 0; i < DEST_Q_SIZE; msg++, i++) {
 
336
                unsigned long msg_res;
 
337
                /* do_reset: same conditions for cancellation as
 
338
                   bau_process_retry_msg() */
304
339
                if ((msg->replied_to == 0) &&
305
340
                    (msg->canceled == 0) &&
306
341
                    (msg->sending_cpu == rap->sender) &&
307
 
                    (msg->sw_ack_vector) &&
 
342
                    (msg->swack_vec) &&
308
343
                    (msg->msg_type != MSG_NOOP)) {
 
344
                        unsigned long mmr;
 
345
                        unsigned long mr;
309
346
                        /*
310
347
                         * make everyone else ignore this message
311
348
                         */
312
349
                        msg->canceled = 1;
313
 
                        slot = msg - bcp->va_queue_first;
314
 
                        count++;
315
350
                        /*
316
351
                         * only reset the resource if it is still pending
317
352
                         */
318
 
                        mmr = uv_read_local_mmr
319
 
                                        (UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE);
320
 
                        msg_res = msg->sw_ack_vector;
 
353
                        mmr = read_mmr_sw_ack();
 
354
                        msg_res = msg->swack_vec;
 
355
                        mr = (msg_res << UV_SW_ACK_NPENDING) | msg_res;
321
356
                        if (mmr & msg_res) {
322
357
                                stat->d_rcanceled++;
323
 
                                uv_write_local_mmr(
324
 
                                    UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS,
325
 
                                        (msg_res << UV_SW_ACK_NPENDING) |
326
 
                                         msg_res);
 
358
                                write_mmr_sw_ack(mr);
327
359
                        }
328
360
                }
329
361
        }
334
366
 * Use IPI to get all target uvhubs to release resources held by
335
367
 * a given sending cpu number.
336
368
 */
337
 
static void uv_reset_with_ipi(struct bau_target_uvhubmask *distribution,
338
 
                              int sender)
 
369
static void reset_with_ipi(struct bau_targ_hubmask *distribution, int sender)
339
370
{
340
371
        int uvhub;
341
 
        int cpu;
 
372
        int maskbits;
342
373
        cpumask_t mask;
343
374
        struct reset_args reset_args;
344
375
 
345
376
        reset_args.sender = sender;
346
 
 
347
377
        cpus_clear(mask);
348
378
        /* find a single cpu for each uvhub in this distribution mask */
349
 
        for (uvhub = 0;
350
 
                    uvhub < sizeof(struct bau_target_uvhubmask) * BITSPERBYTE;
351
 
                    uvhub++) {
 
379
        maskbits = sizeof(struct bau_targ_hubmask) * BITSPERBYTE;
 
380
        for (uvhub = 0; uvhub < maskbits; uvhub++) {
 
381
                int cpu;
352
382
                if (!bau_uvhub_isset(uvhub, distribution))
353
383
                        continue;
354
384
                /* find a cpu for this uvhub */
355
385
                cpu = uvhub_to_first_cpu(uvhub);
356
386
                cpu_set(cpu, mask);
357
387
        }
358
 
        /* IPI all cpus; Preemption is already disabled */
359
 
        smp_call_function_many(&mask, uv_do_reset, (void *)&reset_args, 1);
 
388
 
 
389
        /* IPI all cpus; preemption is already disabled */
 
390
        smp_call_function_many(&mask, do_reset, (void *)&reset_args, 1);
360
391
        return;
361
392
}
362
393
 
363
 
static inline unsigned long
364
 
cycles_2_us(unsigned long long cyc)
 
394
static inline unsigned long cycles_2_us(unsigned long long cyc)
365
395
{
366
396
        unsigned long long ns;
367
397
        unsigned long us;
368
 
        ns =  (cyc * per_cpu(cyc2ns, smp_processor_id()))
369
 
                                                >> CYC2NS_SCALE_FACTOR;
 
398
        int cpu = smp_processor_id();
 
399
 
 
400
        ns =  (cyc * per_cpu(cyc2ns, cpu)) >> CYC2NS_SCALE_FACTOR;
370
401
        us = ns / 1000;
371
402
        return us;
372
403
}
376
407
 * leaves uvhub_quiesce set so that no new broadcasts are started by
377
408
 * bau_flush_send_and_wait()
378
409
 */
379
 
static inline void
380
 
quiesce_local_uvhub(struct bau_control *hmaster)
 
410
static inline void quiesce_local_uvhub(struct bau_control *hmaster)
381
411
{
382
 
        atomic_add_short_return(1, (struct atomic_short *)
383
 
                 &hmaster->uvhub_quiesce);
 
412
        atom_asr(1, (struct atomic_short *)&hmaster->uvhub_quiesce);
384
413
}
385
414
 
386
415
/*
387
416
 * mark this quiet-requestor as done
388
417
 */
389
 
static inline void
390
 
end_uvhub_quiesce(struct bau_control *hmaster)
391
 
{
392
 
        atomic_add_short_return(-1, (struct atomic_short *)
393
 
                &hmaster->uvhub_quiesce);
 
418
static inline void end_uvhub_quiesce(struct bau_control *hmaster)
 
419
{
 
420
        atom_asr(-1, (struct atomic_short *)&hmaster->uvhub_quiesce);
 
421
}
 
422
 
 
423
static unsigned long uv1_read_status(unsigned long mmr_offset, int right_shift)
 
424
{
 
425
        unsigned long descriptor_status;
 
426
 
 
427
        descriptor_status = uv_read_local_mmr(mmr_offset);
 
428
        descriptor_status >>= right_shift;
 
429
        descriptor_status &= UV_ACT_STATUS_MASK;
 
430
        return descriptor_status;
394
431
}
395
432
 
396
433
/*
397
434
 * Wait for completion of a broadcast software ack message
398
435
 * return COMPLETE, RETRY(PLUGGED or TIMEOUT) or GIVEUP
399
436
 */
400
 
static int uv_wait_completion(struct bau_desc *bau_desc,
401
 
        unsigned long mmr_offset, int right_shift, int this_cpu,
402
 
        struct bau_control *bcp, struct bau_control *smaster, long try)
 
437
static int uv1_wait_completion(struct bau_desc *bau_desc,
 
438
                                unsigned long mmr_offset, int right_shift,
 
439
                                struct bau_control *bcp, long try)
403
440
{
404
441
        unsigned long descriptor_status;
405
 
        cycles_t ttime;
 
442
        cycles_t ttm;
406
443
        struct ptc_stats *stat = bcp->statp;
407
 
        struct bau_control *hmaster;
408
 
 
409
 
        hmaster = bcp->uvhub_master;
410
 
 
 
444
 
 
445
        descriptor_status = uv1_read_status(mmr_offset, right_shift);
411
446
        /* spin on the status MMR, waiting for it to go idle */
412
 
        while ((descriptor_status = (((unsigned long)
413
 
                uv_read_local_mmr(mmr_offset) >>
414
 
                        right_shift) & UV_ACT_STATUS_MASK)) !=
415
 
                        DESC_STATUS_IDLE) {
 
447
        while ((descriptor_status != DS_IDLE)) {
416
448
                /*
417
 
                 * Our software ack messages may be blocked because there are
418
 
                 * no swack resources available.  As long as none of them
419
 
                 * has timed out hardware will NACK our message and its
420
 
                 * state will stay IDLE.
 
449
                 * Our software ack messages may be blocked because
 
450
                 * there are no swack resources available.  As long
 
451
                 * as none of them has timed out hardware will NACK
 
452
                 * our message and its state will stay IDLE.
421
453
                 */
422
 
                if (descriptor_status == DESC_STATUS_SOURCE_TIMEOUT) {
 
454
                if (descriptor_status == DS_SOURCE_TIMEOUT) {
423
455
                        stat->s_stimeout++;
424
456
                        return FLUSH_GIVEUP;
425
 
                } else if (descriptor_status ==
426
 
                                        DESC_STATUS_DESTINATION_TIMEOUT) {
 
457
                } else if (descriptor_status == DS_DESTINATION_TIMEOUT) {
427
458
                        stat->s_dtimeout++;
428
 
                        ttime = get_cycles();
 
459
                        ttm = get_cycles();
429
460
 
430
461
                        /*
431
462
                         * Our retries may be blocked by all destination
433
464
                         * pending.  In that case hardware returns the
434
465
                         * ERROR that looks like a destination timeout.
435
466
                         */
436
 
                        if (cycles_2_us(ttime - bcp->send_message) <
437
 
                                                        timeout_us) {
 
467
                        if (cycles_2_us(ttm - bcp->send_message) < timeout_us) {
438
468
                                bcp->conseccompletes = 0;
439
469
                                return FLUSH_RETRY_PLUGGED;
440
470
                        }
447
477
                         */
448
478
                        cpu_relax();
449
479
                }
450
 
        }
451
 
        bcp->conseccompletes++;
452
 
        return FLUSH_COMPLETE;
453
 
}
454
 
 
455
 
static inline cycles_t
456
 
sec_2_cycles(unsigned long sec)
 
480
                descriptor_status = uv1_read_status(mmr_offset, right_shift);
 
481
        }
 
482
        bcp->conseccompletes++;
 
483
        return FLUSH_COMPLETE;
 
484
}
 
485
 
 
486
/*
 
487
 * UV2 has an extra bit of status in the ACTIVATION_STATUS_2 register.
 
488
 */
 
489
static unsigned long uv2_read_status(unsigned long offset, int rshft, int cpu)
 
490
{
 
491
        unsigned long descriptor_status;
 
492
        unsigned long descriptor_status2;
 
493
 
 
494
        descriptor_status = ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK);
 
495
        descriptor_status2 = (read_mmr_uv2_status() >> cpu) & 0x1UL;
 
496
        descriptor_status = (descriptor_status << 1) | descriptor_status2;
 
497
        return descriptor_status;
 
498
}
 
499
 
 
500
static int uv2_wait_completion(struct bau_desc *bau_desc,
 
501
                                unsigned long mmr_offset, int right_shift,
 
502
                                struct bau_control *bcp, long try)
 
503
{
 
504
        unsigned long descriptor_stat;
 
505
        cycles_t ttm;
 
506
        int cpu = bcp->uvhub_cpu;
 
507
        struct ptc_stats *stat = bcp->statp;
 
508
 
 
509
        descriptor_stat = uv2_read_status(mmr_offset, right_shift, cpu);
 
510
 
 
511
        /* spin on the status MMR, waiting for it to go idle */
 
512
        while (descriptor_stat != UV2H_DESC_IDLE) {
 
513
                /*
 
514
                 * Our software ack messages may be blocked because
 
515
                 * there are no swack resources available.  As long
 
516
                 * as none of them has timed out hardware will NACK
 
517
                 * our message and its state will stay IDLE.
 
518
                 */
 
519
                if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT) ||
 
520
                    (descriptor_stat == UV2H_DESC_DEST_STRONG_NACK) ||
 
521
                    (descriptor_stat == UV2H_DESC_DEST_PUT_ERR)) {
 
522
                        stat->s_stimeout++;
 
523
                        return FLUSH_GIVEUP;
 
524
                } else if (descriptor_stat == UV2H_DESC_DEST_TIMEOUT) {
 
525
                        stat->s_dtimeout++;
 
526
                        ttm = get_cycles();
 
527
                        /*
 
528
                         * Our retries may be blocked by all destination
 
529
                         * swack resources being consumed, and a timeout
 
530
                         * pending.  In that case hardware returns the
 
531
                         * ERROR that looks like a destination timeout.
 
532
                         */
 
533
                        if (cycles_2_us(ttm - bcp->send_message) < timeout_us) {
 
534
                                bcp->conseccompletes = 0;
 
535
                                return FLUSH_RETRY_PLUGGED;
 
536
                        }
 
537
                        bcp->conseccompletes = 0;
 
538
                        return FLUSH_RETRY_TIMEOUT;
 
539
                } else {
 
540
                        /*
 
541
                         * descriptor_stat is still BUSY
 
542
                         */
 
543
                        cpu_relax();
 
544
                }
 
545
                descriptor_stat = uv2_read_status(mmr_offset, right_shift, cpu);
 
546
        }
 
547
        bcp->conseccompletes++;
 
548
        return FLUSH_COMPLETE;
 
549
}
 
550
 
 
551
/*
 
552
 * There are 2 status registers; each and array[32] of 2 bits. Set up for
 
553
 * which register to read and position in that register based on cpu in
 
554
 * current hub.
 
555
 */
 
556
static int wait_completion(struct bau_desc *bau_desc,
 
557
                                struct bau_control *bcp, long try)
 
558
{
 
559
        int right_shift;
 
560
        unsigned long mmr_offset;
 
561
        int cpu = bcp->uvhub_cpu;
 
562
 
 
563
        if (cpu < UV_CPUS_PER_AS) {
 
564
                mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
 
565
                right_shift = cpu * UV_ACT_STATUS_SIZE;
 
566
        } else {
 
567
                mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1;
 
568
                right_shift = ((cpu - UV_CPUS_PER_AS) * UV_ACT_STATUS_SIZE);
 
569
        }
 
570
 
 
571
        if (is_uv1_hub())
 
572
                return uv1_wait_completion(bau_desc, mmr_offset, right_shift,
 
573
                                                                bcp, try);
 
574
        else
 
575
                return uv2_wait_completion(bau_desc, mmr_offset, right_shift,
 
576
                                                                bcp, try);
 
577
}
 
578
 
 
579
static inline cycles_t sec_2_cycles(unsigned long sec)
457
580
{
458
581
        unsigned long ns;
459
582
        cycles_t cyc;
464
587
}
465
588
 
466
589
/*
467
 
 * conditionally add 1 to *v, unless *v is >= u
468
 
 * return 0 if we cannot add 1 to *v because it is >= u
469
 
 * return 1 if we can add 1 to *v because it is < u
470
 
 * the add is atomic
471
 
 *
472
 
 * This is close to atomic_add_unless(), but this allows the 'u' value
473
 
 * to be lowered below the current 'v'.  atomic_add_unless can only stop
474
 
 * on equal.
475
 
 */
476
 
static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u)
477
 
{
478
 
        spin_lock(lock);
479
 
        if (atomic_read(v) >= u) {
480
 
                spin_unlock(lock);
481
 
                return 0;
482
 
        }
483
 
        atomic_inc(v);
484
 
        spin_unlock(lock);
485
 
        return 1;
486
 
}
487
 
 
488
 
/*
489
 
 * Our retries are blocked by all destination swack resources being
 
590
 * Our retries are blocked by all destination sw ack resources being
490
591
 * in use, and a timeout is pending. In that case hardware immediately
491
592
 * returns the ERROR that looks like a destination timeout.
492
593
 */
493
 
static void
494
 
destination_plugged(struct bau_desc *bau_desc, struct bau_control *bcp,
 
594
static void destination_plugged(struct bau_desc *bau_desc,
 
595
                        struct bau_control *bcp,
495
596
                        struct bau_control *hmaster, struct ptc_stats *stat)
496
597
{
497
598
        udelay(bcp->plugged_delay);
498
599
        bcp->plugged_tries++;
 
600
 
499
601
        if (bcp->plugged_tries >= bcp->plugsb4reset) {
500
602
                bcp->plugged_tries = 0;
 
603
 
501
604
                quiesce_local_uvhub(hmaster);
 
605
 
502
606
                spin_lock(&hmaster->queue_lock);
503
 
                uv_reset_with_ipi(&bau_desc->distribution, bcp->cpu);
 
607
                reset_with_ipi(&bau_desc->distribution, bcp->cpu);
504
608
                spin_unlock(&hmaster->queue_lock);
 
609
 
505
610
                end_uvhub_quiesce(hmaster);
 
611
 
506
612
                bcp->ipi_attempts++;
507
613
                stat->s_resets_plug++;
508
614
        }
509
615
}
510
616
 
511
 
static void
512
 
destination_timeout(struct bau_desc *bau_desc, struct bau_control *bcp,
513
 
                        struct bau_control *hmaster, struct ptc_stats *stat)
 
617
static void destination_timeout(struct bau_desc *bau_desc,
 
618
                        struct bau_control *bcp, struct bau_control *hmaster,
 
619
                        struct ptc_stats *stat)
514
620
{
515
 
        hmaster->max_bau_concurrent = 1;
 
621
        hmaster->max_concurr = 1;
516
622
        bcp->timeout_tries++;
517
623
        if (bcp->timeout_tries >= bcp->timeoutsb4reset) {
518
624
                bcp->timeout_tries = 0;
 
625
 
519
626
                quiesce_local_uvhub(hmaster);
 
627
 
520
628
                spin_lock(&hmaster->queue_lock);
521
 
                uv_reset_with_ipi(&bau_desc->distribution, bcp->cpu);
 
629
                reset_with_ipi(&bau_desc->distribution, bcp->cpu);
522
630
                spin_unlock(&hmaster->queue_lock);
 
631
 
523
632
                end_uvhub_quiesce(hmaster);
 
633
 
524
634
                bcp->ipi_attempts++;
525
635
                stat->s_resets_timeout++;
526
636
        }
530
640
 * Completions are taking a very long time due to a congested numalink
531
641
 * network.
532
642
 */
533
 
static void
534
 
disable_for_congestion(struct bau_control *bcp, struct ptc_stats *stat)
 
643
static void disable_for_congestion(struct bau_control *bcp,
 
644
                                        struct ptc_stats *stat)
535
645
{
536
 
        int tcpu;
537
 
        struct bau_control *tbcp;
538
 
 
539
646
        /* let only one cpu do this disabling */
540
647
        spin_lock(&disable_lock);
 
648
 
541
649
        if (!baudisabled && bcp->period_requests &&
542
650
            ((bcp->period_time / bcp->period_requests) > congested_cycles)) {
 
651
                int tcpu;
 
652
                struct bau_control *tbcp;
543
653
                /* it becomes this cpu's job to turn on the use of the
544
654
                   BAU again */
545
655
                baudisabled = 1;
546
656
                bcp->set_bau_off = 1;
547
 
                bcp->set_bau_on_time = get_cycles() +
548
 
                        sec_2_cycles(bcp->congested_period);
 
657
                bcp->set_bau_on_time = get_cycles();
 
658
                bcp->set_bau_on_time += sec_2_cycles(bcp->cong_period);
549
659
                stat->s_bau_disabled++;
550
660
                for_each_present_cpu(tcpu) {
551
661
                        tbcp = &per_cpu(bau_control, tcpu);
552
 
                                tbcp->baudisabled = 1;
 
662
                        tbcp->baudisabled = 1;
553
663
                }
554
664
        }
 
665
 
555
666
        spin_unlock(&disable_lock);
556
667
}
557
668
 
558
 
/**
559
 
 * uv_flush_send_and_wait
560
 
 *
 
669
static void count_max_concurr(int stat, struct bau_control *bcp,
 
670
                                struct bau_control *hmaster)
 
671
{
 
672
        bcp->plugged_tries = 0;
 
673
        bcp->timeout_tries = 0;
 
674
        if (stat != FLUSH_COMPLETE)
 
675
                return;
 
676
        if (bcp->conseccompletes <= bcp->complete_threshold)
 
677
                return;
 
678
        if (hmaster->max_concurr >= hmaster->max_concurr_const)
 
679
                return;
 
680
        hmaster->max_concurr++;
 
681
}
 
682
 
 
683
static void record_send_stats(cycles_t time1, cycles_t time2,
 
684
                struct bau_control *bcp, struct ptc_stats *stat,
 
685
                int completion_status, int try)
 
686
{
 
687
        cycles_t elapsed;
 
688
 
 
689
        if (time2 > time1) {
 
690
                elapsed = time2 - time1;
 
691
                stat->s_time += elapsed;
 
692
 
 
693
                if ((completion_status == FLUSH_COMPLETE) && (try == 1)) {
 
694
                        bcp->period_requests++;
 
695
                        bcp->period_time += elapsed;
 
696
                        if ((elapsed > congested_cycles) &&
 
697
                            (bcp->period_requests > bcp->cong_reps))
 
698
                                disable_for_congestion(bcp, stat);
 
699
                }
 
700
        } else
 
701
                stat->s_requestor--;
 
702
 
 
703
        if (completion_status == FLUSH_COMPLETE && try > 1)
 
704
                stat->s_retriesok++;
 
705
        else if (completion_status == FLUSH_GIVEUP)
 
706
                stat->s_giveup++;
 
707
}
 
708
 
 
709
/*
 
710
 * Because of a uv1 hardware bug only a limited number of concurrent
 
711
 * requests can be made.
 
712
 */
 
713
static void uv1_throttle(struct bau_control *hmaster, struct ptc_stats *stat)
 
714
{
 
715
        spinlock_t *lock = &hmaster->uvhub_lock;
 
716
        atomic_t *v;
 
717
 
 
718
        v = &hmaster->active_descriptor_count;
 
719
        if (!atomic_inc_unless_ge(lock, v, hmaster->max_concurr)) {
 
720
                stat->s_throttles++;
 
721
                do {
 
722
                        cpu_relax();
 
723
                } while (!atomic_inc_unless_ge(lock, v, hmaster->max_concurr));
 
724
        }
 
725
}
 
726
 
 
727
/*
 
728
 * Handle the completion status of a message send.
 
729
 */
 
730
static void handle_cmplt(int completion_status, struct bau_desc *bau_desc,
 
731
                        struct bau_control *bcp, struct bau_control *hmaster,
 
732
                        struct ptc_stats *stat)
 
733
{
 
734
        if (completion_status == FLUSH_RETRY_PLUGGED)
 
735
                destination_plugged(bau_desc, bcp, hmaster, stat);
 
736
        else if (completion_status == FLUSH_RETRY_TIMEOUT)
 
737
                destination_timeout(bau_desc, bcp, hmaster, stat);
 
738
}
 
739
 
 
740
/*
561
741
 * Send a broadcast and wait for it to complete.
562
742
 *
563
743
 * The flush_mask contains the cpus the broadcast is to be sent to including
568
748
 * returned to the kernel.
569
749
 */
570
750
int uv_flush_send_and_wait(struct bau_desc *bau_desc,
571
 
                           struct cpumask *flush_mask, struct bau_control *bcp)
 
751
                        struct cpumask *flush_mask, struct bau_control *bcp)
572
752
{
573
 
        int right_shift;
574
 
        int completion_status = 0;
575
753
        int seq_number = 0;
 
754
        int completion_stat = 0;
576
755
        long try = 0;
577
 
        int cpu = bcp->uvhub_cpu;
578
 
        int this_cpu = bcp->cpu;
579
 
        unsigned long mmr_offset;
580
756
        unsigned long index;
581
757
        cycles_t time1;
582
758
        cycles_t time2;
583
 
        cycles_t elapsed;
584
759
        struct ptc_stats *stat = bcp->statp;
585
 
        struct bau_control *smaster = bcp->socket_master;
586
760
        struct bau_control *hmaster = bcp->uvhub_master;
587
761
 
588
 
        if (!atomic_inc_unless_ge(&hmaster->uvhub_lock,
589
 
                        &hmaster->active_descriptor_count,
590
 
                        hmaster->max_bau_concurrent)) {
591
 
                stat->s_throttles++;
592
 
                do {
593
 
                        cpu_relax();
594
 
                } while (!atomic_inc_unless_ge(&hmaster->uvhub_lock,
595
 
                        &hmaster->active_descriptor_count,
596
 
                        hmaster->max_bau_concurrent));
597
 
        }
 
762
        if (is_uv1_hub())
 
763
                uv1_throttle(hmaster, stat);
 
764
 
598
765
        while (hmaster->uvhub_quiesce)
599
766
                cpu_relax();
600
767
 
601
 
        if (cpu < UV_CPUS_PER_ACT_STATUS) {
602
 
                mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
603
 
                right_shift = cpu * UV_ACT_STATUS_SIZE;
604
 
        } else {
605
 
                mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1;
606
 
                right_shift =
607
 
                    ((cpu - UV_CPUS_PER_ACT_STATUS) * UV_ACT_STATUS_SIZE);
608
 
        }
609
768
        time1 = get_cycles();
610
769
        do {
611
770
                if (try == 0) {
615
774
                        bau_desc->header.msg_type = MSG_RETRY;
616
775
                        stat->s_retry_messages++;
617
776
                }
 
777
 
618
778
                bau_desc->header.sequence = seq_number;
619
 
                index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) |
620
 
                        bcp->uvhub_cpu;
 
779
                index = (1UL << AS_PUSH_SHIFT) | bcp->uvhub_cpu;
621
780
                bcp->send_message = get_cycles();
622
 
                uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index);
 
781
 
 
782
                write_mmr_activation(index);
 
783
 
623
784
                try++;
624
 
                completion_status = uv_wait_completion(bau_desc, mmr_offset,
625
 
                        right_shift, this_cpu, bcp, smaster, try);
626
 
 
627
 
                if (completion_status == FLUSH_RETRY_PLUGGED) {
628
 
                        destination_plugged(bau_desc, bcp, hmaster, stat);
629
 
                } else if (completion_status == FLUSH_RETRY_TIMEOUT) {
630
 
                        destination_timeout(bau_desc, bcp, hmaster, stat);
631
 
                }
 
785
                completion_stat = wait_completion(bau_desc, bcp, try);
 
786
 
 
787
                handle_cmplt(completion_stat, bau_desc, bcp, hmaster, stat);
 
788
 
632
789
                if (bcp->ipi_attempts >= bcp->ipi_reset_limit) {
633
790
                        bcp->ipi_attempts = 0;
634
 
                        completion_status = FLUSH_GIVEUP;
 
791
                        completion_stat = FLUSH_GIVEUP;
635
792
                        break;
636
793
                }
637
794
                cpu_relax();
638
 
        } while ((completion_status == FLUSH_RETRY_PLUGGED) ||
639
 
                 (completion_status == FLUSH_RETRY_TIMEOUT));
 
795
        } while ((completion_stat == FLUSH_RETRY_PLUGGED) ||
 
796
                 (completion_stat == FLUSH_RETRY_TIMEOUT));
 
797
 
640
798
        time2 = get_cycles();
641
 
        bcp->plugged_tries = 0;
642
 
        bcp->timeout_tries = 0;
643
 
        if ((completion_status == FLUSH_COMPLETE) &&
644
 
            (bcp->conseccompletes > bcp->complete_threshold) &&
645
 
            (hmaster->max_bau_concurrent <
646
 
                                        hmaster->max_bau_concurrent_constant))
647
 
                        hmaster->max_bau_concurrent++;
 
799
 
 
800
        count_max_concurr(completion_stat, bcp, hmaster);
 
801
 
648
802
        while (hmaster->uvhub_quiesce)
649
803
                cpu_relax();
 
804
 
650
805
        atomic_dec(&hmaster->active_descriptor_count);
651
 
        if (time2 > time1) {
652
 
                elapsed = time2 - time1;
653
 
                stat->s_time += elapsed;
654
 
                if ((completion_status == FLUSH_COMPLETE) && (try == 1)) {
655
 
                        bcp->period_requests++;
656
 
                        bcp->period_time += elapsed;
657
 
                        if ((elapsed > congested_cycles) &&
658
 
                            (bcp->period_requests > bcp->congested_reps)) {
659
 
                                disable_for_congestion(bcp, stat);
 
806
 
 
807
        record_send_stats(time1, time2, bcp, stat, completion_stat, try);
 
808
 
 
809
        if (completion_stat == FLUSH_GIVEUP)
 
810
                return 1;
 
811
        return 0;
 
812
}
 
813
 
 
814
/*
 
815
 * The BAU is disabled. When the disabled time period has expired, the cpu
 
816
 * that disabled it must re-enable it.
 
817
 * Return 0 if it is re-enabled for all cpus.
 
818
 */
 
819
static int check_enable(struct bau_control *bcp, struct ptc_stats *stat)
 
820
{
 
821
        int tcpu;
 
822
        struct bau_control *tbcp;
 
823
 
 
824
        if (bcp->set_bau_off) {
 
825
                if (get_cycles() >= bcp->set_bau_on_time) {
 
826
                        stat->s_bau_reenabled++;
 
827
                        baudisabled = 0;
 
828
                        for_each_present_cpu(tcpu) {
 
829
                                tbcp = &per_cpu(bau_control, tcpu);
 
830
                                tbcp->baudisabled = 0;
 
831
                                tbcp->period_requests = 0;
 
832
                                tbcp->period_time = 0;
660
833
                        }
 
834
                        return 0;
661
835
                }
 
836
        }
 
837
        return -1;
 
838
}
 
839
 
 
840
static void record_send_statistics(struct ptc_stats *stat, int locals, int hubs,
 
841
                                int remotes, struct bau_desc *bau_desc)
 
842
{
 
843
        stat->s_requestor++;
 
844
        stat->s_ntargcpu += remotes + locals;
 
845
        stat->s_ntargremotes += remotes;
 
846
        stat->s_ntarglocals += locals;
 
847
 
 
848
        /* uvhub statistics */
 
849
        hubs = bau_uvhub_weight(&bau_desc->distribution);
 
850
        if (locals) {
 
851
                stat->s_ntarglocaluvhub++;
 
852
                stat->s_ntargremoteuvhub += (hubs - 1);
662
853
        } else
663
 
                stat->s_requestor--;
664
 
        if (completion_status == FLUSH_COMPLETE && try > 1)
665
 
                stat->s_retriesok++;
666
 
        else if (completion_status == FLUSH_GIVEUP) {
667
 
                stat->s_giveup++;
 
854
                stat->s_ntargremoteuvhub += hubs;
 
855
 
 
856
        stat->s_ntarguvhub += hubs;
 
857
 
 
858
        if (hubs >= 16)
 
859
                stat->s_ntarguvhub16++;
 
860
        else if (hubs >= 8)
 
861
                stat->s_ntarguvhub8++;
 
862
        else if (hubs >= 4)
 
863
                stat->s_ntarguvhub4++;
 
864
        else if (hubs >= 2)
 
865
                stat->s_ntarguvhub2++;
 
866
        else
 
867
                stat->s_ntarguvhub1++;
 
868
}
 
869
 
 
870
/*
 
871
 * Translate a cpu mask to the uvhub distribution mask in the BAU
 
872
 * activation descriptor.
 
873
 */
 
874
static int set_distrib_bits(struct cpumask *flush_mask, struct bau_control *bcp,
 
875
                        struct bau_desc *bau_desc, int *localsp, int *remotesp)
 
876
{
 
877
        int cpu;
 
878
        int pnode;
 
879
        int cnt = 0;
 
880
        struct hub_and_pnode *hpp;
 
881
 
 
882
        for_each_cpu(cpu, flush_mask) {
 
883
                /*
 
884
                 * The distribution vector is a bit map of pnodes, relative
 
885
                 * to the partition base pnode (and the partition base nasid
 
886
                 * in the header).
 
887
                 * Translate cpu to pnode and hub using a local memory array.
 
888
                 */
 
889
                hpp = &bcp->socket_master->thp[cpu];
 
890
                pnode = hpp->pnode - bcp->partition_base_pnode;
 
891
                bau_uvhub_set(pnode, &bau_desc->distribution);
 
892
                cnt++;
 
893
                if (hpp->uvhub == bcp->uvhub)
 
894
                        (*localsp)++;
 
895
                else
 
896
                        (*remotesp)++;
 
897
        }
 
898
        if (!cnt)
668
899
                return 1;
669
 
        }
670
900
        return 0;
671
901
}
672
902
 
673
 
/**
674
 
 * uv_flush_tlb_others - globally purge translation cache of a virtual
675
 
 * address or all TLB's
 
903
/*
 
904
 * globally purge translation cache of a virtual address or all TLB's
676
905
 * @cpumask: mask of all cpu's in which the address is to be removed
677
906
 * @mm: mm_struct containing virtual address range
678
907
 * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu)
696
925
 * done.  The returned pointer is valid till preemption is re-enabled.
697
926
 */
698
927
const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
699
 
                                          struct mm_struct *mm,
700
 
                                          unsigned long va, unsigned int cpu)
 
928
                                struct mm_struct *mm, unsigned long va,
 
929
                                unsigned int cpu)
701
930
{
702
931
        int locals = 0;
703
932
        int remotes = 0;
704
933
        int hubs = 0;
705
 
        int tcpu;
706
 
        int tpnode;
707
934
        struct bau_desc *bau_desc;
708
935
        struct cpumask *flush_mask;
709
936
        struct ptc_stats *stat;
710
937
        struct bau_control *bcp;
711
 
        struct bau_control *tbcp;
712
 
        struct hub_and_pnode *hpp;
713
938
 
714
939
        /* kernel was booted 'nobau' */
715
940
        if (nobau)
720
945
 
721
946
        /* bau was disabled due to slow response */
722
947
        if (bcp->baudisabled) {
723
 
                /* the cpu that disabled it must re-enable it */
724
 
                if (bcp->set_bau_off) {
725
 
                        if (get_cycles() >= bcp->set_bau_on_time) {
726
 
                                stat->s_bau_reenabled++;
727
 
                                baudisabled = 0;
728
 
                                for_each_present_cpu(tcpu) {
729
 
                                        tbcp = &per_cpu(bau_control, tcpu);
730
 
                                        tbcp->baudisabled = 0;
731
 
                                        tbcp->period_requests = 0;
732
 
                                        tbcp->period_time = 0;
733
 
                                }
734
 
                        }
735
 
                }
736
 
                return cpumask;
 
948
                if (check_enable(bcp, stat))
 
949
                        return cpumask;
737
950
        }
738
951
 
739
952
        /*
744
957
        flush_mask = (struct cpumask *)per_cpu(uv_flush_tlb_mask, cpu);
745
958
        /* don't actually do a shootdown of the local cpu */
746
959
        cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
 
960
 
747
961
        if (cpu_isset(cpu, *cpumask))
748
962
                stat->s_ntargself++;
749
963
 
750
964
        bau_desc = bcp->descriptor_base;
751
 
        bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu;
 
965
        bau_desc += ITEMS_PER_DESC * bcp->uvhub_cpu;
752
966
        bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
753
 
 
754
 
        for_each_cpu(tcpu, flush_mask) {
755
 
                /*
756
 
                 * The distribution vector is a bit map of pnodes, relative
757
 
                 * to the partition base pnode (and the partition base nasid
758
 
                 * in the header).
759
 
                 * Translate cpu to pnode and hub using an array stored
760
 
                 * in local memory.
761
 
                 */
762
 
                hpp = &bcp->socket_master->target_hub_and_pnode[tcpu];
763
 
                tpnode = hpp->pnode - bcp->partition_base_pnode;
764
 
                bau_uvhub_set(tpnode, &bau_desc->distribution);
765
 
                if (hpp->uvhub == bcp->uvhub)
766
 
                        locals++;
767
 
                else
768
 
                        remotes++;
769
 
        }
770
 
        if ((locals + remotes) == 0)
 
967
        if (set_distrib_bits(flush_mask, bcp, bau_desc, &locals, &remotes))
771
968
                return NULL;
772
 
        stat->s_requestor++;
773
 
        stat->s_ntargcpu += remotes + locals;
774
 
        stat->s_ntargremotes += remotes;
775
 
        stat->s_ntarglocals += locals;
776
 
        remotes = bau_uvhub_weight(&bau_desc->distribution);
777
969
 
778
 
        /* uvhub statistics */
779
 
        hubs = bau_uvhub_weight(&bau_desc->distribution);
780
 
        if (locals) {
781
 
                stat->s_ntarglocaluvhub++;
782
 
                stat->s_ntargremoteuvhub += (hubs - 1);
783
 
        } else
784
 
                stat->s_ntargremoteuvhub += hubs;
785
 
        stat->s_ntarguvhub += hubs;
786
 
        if (hubs >= 16)
787
 
                stat->s_ntarguvhub16++;
788
 
        else if (hubs >= 8)
789
 
                stat->s_ntarguvhub8++;
790
 
        else if (hubs >= 4)
791
 
                stat->s_ntarguvhub4++;
792
 
        else if (hubs >= 2)
793
 
                stat->s_ntarguvhub2++;
794
 
        else
795
 
                stat->s_ntarguvhub1++;
 
970
        record_send_statistics(stat, locals, hubs, remotes, bau_desc);
796
971
 
797
972
        bau_desc->payload.address = va;
798
973
        bau_desc->payload.sending_cpu = cpu;
799
 
 
800
974
        /*
801
975
         * uv_flush_send_and_wait returns 0 if all cpu's were messaged,
802
976
         * or 1 if it gave up and the original cpumask should be returned.
825
999
{
826
1000
        int count = 0;
827
1001
        cycles_t time_start;
828
 
        struct bau_payload_queue_entry *msg;
 
1002
        struct bau_pq_entry *msg;
829
1003
        struct bau_control *bcp;
830
1004
        struct ptc_stats *stat;
831
1005
        struct msg_desc msgdesc;
832
1006
 
833
1007
        time_start = get_cycles();
 
1008
 
834
1009
        bcp = &per_cpu(bau_control, smp_processor_id());
835
1010
        stat = bcp->statp;
836
 
        msgdesc.va_queue_first = bcp->va_queue_first;
837
 
        msgdesc.va_queue_last = bcp->va_queue_last;
 
1011
 
 
1012
        msgdesc.queue_first = bcp->queue_first;
 
1013
        msgdesc.queue_last = bcp->queue_last;
 
1014
 
838
1015
        msg = bcp->bau_msg_head;
839
 
        while (msg->sw_ack_vector) {
 
1016
        while (msg->swack_vec) {
840
1017
                count++;
841
 
                msgdesc.msg_slot = msg - msgdesc.va_queue_first;
842
 
                msgdesc.sw_ack_slot = ffs(msg->sw_ack_vector) - 1;
 
1018
 
 
1019
                msgdesc.msg_slot = msg - msgdesc.queue_first;
 
1020
                msgdesc.swack_slot = ffs(msg->swack_vec) - 1;
843
1021
                msgdesc.msg = msg;
844
 
                uv_bau_process_message(&msgdesc, bcp);
 
1022
                bau_process_message(&msgdesc, bcp);
 
1023
 
845
1024
                msg++;
846
 
                if (msg > msgdesc.va_queue_last)
847
 
                        msg = msgdesc.va_queue_first;
 
1025
                if (msg > msgdesc.queue_last)
 
1026
                        msg = msgdesc.queue_first;
848
1027
                bcp->bau_msg_head = msg;
849
1028
        }
850
1029
        stat->d_time += (get_cycles() - time_start);
852
1031
                stat->d_nomsg++;
853
1032
        else if (count > 1)
854
1033
                stat->d_multmsg++;
 
1034
 
855
1035
        ack_APIC_irq();
856
1036
}
857
1037
 
858
1038
/*
859
 
 * uv_enable_timeouts
860
 
 *
861
 
 * Each target uvhub (i.e. a uvhub that has no cpu's) needs to have
 
1039
 * Each target uvhub (i.e. a uvhub that has cpu's) needs to have
862
1040
 * shootdown message timeouts enabled.  The timeout does not cause
863
1041
 * an interrupt, but causes an error message to be returned to
864
1042
 * the sender.
865
1043
 */
866
 
static void __init uv_enable_timeouts(void)
 
1044
static void __init enable_timeouts(void)
867
1045
{
868
1046
        int uvhub;
869
1047
        int nuvhubs;
877
1055
                        continue;
878
1056
 
879
1057
                pnode = uv_blade_to_pnode(uvhub);
880
 
                mmr_image =
881
 
                    uv_read_global_mmr64(pnode, UVH_LB_BAU_MISC_CONTROL);
 
1058
                mmr_image = read_mmr_misc_control(pnode);
882
1059
                /*
883
1060
                 * Set the timeout period and then lock it in, in three
884
1061
                 * steps; captures and locks in the period.
885
1062
                 *
886
1063
                 * To program the period, the SOFT_ACK_MODE must be off.
887
1064
                 */
888
 
                mmr_image &= ~((unsigned long)1 <<
889
 
                    UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT);
890
 
                uv_write_global_mmr64
891
 
                    (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
 
1065
                mmr_image &= ~(1L << SOFTACK_MSHIFT);
 
1066
                write_mmr_misc_control(pnode, mmr_image);
892
1067
                /*
893
1068
                 * Set the 4-bit period.
894
1069
                 */
895
 
                mmr_image &= ~((unsigned long)0xf <<
896
 
                     UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT);
897
 
                mmr_image |= (UV_INTD_SOFT_ACK_TIMEOUT_PERIOD <<
898
 
                     UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT);
899
 
                uv_write_global_mmr64
900
 
                    (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
 
1070
                mmr_image &= ~((unsigned long)0xf << SOFTACK_PSHIFT);
 
1071
                mmr_image |= (SOFTACK_TIMEOUT_PERIOD << SOFTACK_PSHIFT);
 
1072
                write_mmr_misc_control(pnode, mmr_image);
901
1073
                /*
 
1074
                 * UV1:
902
1075
                 * Subsequent reversals of the timebase bit (3) cause an
903
1076
                 * immediate timeout of one or all INTD resources as
904
1077
                 * indicated in bits 2:0 (7 causes all of them to timeout).
905
1078
                 */
906
 
                mmr_image |= ((unsigned long)1 <<
907
 
                    UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT);
908
 
                uv_write_global_mmr64
909
 
                    (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
 
1079
                mmr_image |= (1L << SOFTACK_MSHIFT);
 
1080
                if (is_uv2_hub()) {
 
1081
                        mmr_image |= (1L << UV2_LEG_SHFT);
 
1082
                        mmr_image |= (1L << UV2_EXT_SHFT);
 
1083
                }
 
1084
                write_mmr_misc_control(pnode, mmr_image);
910
1085
        }
911
1086
}
912
1087
 
913
 
static void *uv_ptc_seq_start(struct seq_file *file, loff_t *offset)
 
1088
static void *ptc_seq_start(struct seq_file *file, loff_t *offset)
914
1089
{
915
1090
        if (*offset < num_possible_cpus())
916
1091
                return offset;
917
1092
        return NULL;
918
1093
}
919
1094
 
920
 
static void *uv_ptc_seq_next(struct seq_file *file, void *data, loff_t *offset)
 
1095
static void *ptc_seq_next(struct seq_file *file, void *data, loff_t *offset)
921
1096
{
922
1097
        (*offset)++;
923
1098
        if (*offset < num_possible_cpus())
925
1100
        return NULL;
926
1101
}
927
1102
 
928
 
static void uv_ptc_seq_stop(struct seq_file *file, void *data)
 
1103
static void ptc_seq_stop(struct seq_file *file, void *data)
929
1104
{
930
1105
}
931
1106
 
932
 
static inline unsigned long long
933
 
microsec_2_cycles(unsigned long microsec)
 
1107
static inline unsigned long long usec_2_cycles(unsigned long microsec)
934
1108
{
935
1109
        unsigned long ns;
936
1110
        unsigned long long cyc;
941
1115
}
942
1116
 
943
1117
/*
944
 
 * Display the statistics thru /proc.
 
1118
 * Display the statistics thru /proc/sgi_uv/ptc_statistics
945
1119
 * 'data' points to the cpu number
 
1120
 * Note: see the descriptions in stat_description[].
946
1121
 */
947
 
static int uv_ptc_seq_show(struct seq_file *file, void *data)
 
1122
static int ptc_seq_show(struct seq_file *file, void *data)
948
1123
{
949
1124
        struct ptc_stats *stat;
950
1125
        int cpu;
951
1126
 
952
1127
        cpu = *(loff_t *)data;
953
 
 
954
1128
        if (!cpu) {
955
1129
                seq_printf(file,
956
1130
                        "# cpu sent stime self locals remotes ncpus localhub ");
957
1131
                seq_printf(file,
958
1132
                        "remotehub numuvhubs numuvhubs16 numuvhubs8 ");
959
1133
                seq_printf(file,
960
 
                        "numuvhubs4 numuvhubs2 numuvhubs1 dto ");
961
 
                seq_printf(file,
962
 
                        "retries rok resetp resett giveup sto bz throt ");
963
 
                seq_printf(file,
964
 
                        "sw_ack recv rtime all ");
965
 
                seq_printf(file,
966
 
                        "one mult none retry canc nocan reset rcan ");
 
1134
                        "numuvhubs4 numuvhubs2 numuvhubs1 dto retries rok ");
 
1135
                seq_printf(file,
 
1136
                        "resetp resett giveup sto bz throt swack recv rtime ");
 
1137
                seq_printf(file,
 
1138
                        "all one mult none retry canc nocan reset rcan ");
967
1139
                seq_printf(file,
968
1140
                        "disable enable\n");
969
1141
        }
990
1162
                /* destination side statistics */
991
1163
                seq_printf(file,
992
1164
                           "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",
993
 
                           uv_read_global_mmr64(uv_cpu_to_pnode(cpu),
994
 
                                        UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE),
 
1165
                           read_gmmr_sw_ack(uv_cpu_to_pnode(cpu)),
995
1166
                           stat->d_requestee, cycles_2_us(stat->d_time),
996
1167
                           stat->d_alltlb, stat->d_onetlb, stat->d_multmsg,
997
1168
                           stat->d_nomsg, stat->d_retries, stat->d_canceled,
1000
1171
                seq_printf(file, "%ld %ld\n",
1001
1172
                        stat->s_bau_disabled, stat->s_bau_reenabled);
1002
1173
        }
1003
 
 
1004
1174
        return 0;
1005
1175
}
1006
1176
 
1008
1178
 * Display the tunables thru debugfs
1009
1179
 */
1010
1180
static ssize_t tunables_read(struct file *file, char __user *userbuf,
1011
 
                                                size_t count, loff_t *ppos)
 
1181
                                size_t count, loff_t *ppos)
1012
1182
{
1013
1183
        char *buf;
1014
1184
        int ret;
1015
1185
 
1016
1186
        buf = kasprintf(GFP_KERNEL, "%s %s %s\n%d %d %d %d %d %d %d %d %d\n",
1017
 
                "max_bau_concurrent plugged_delay plugsb4reset",
 
1187
                "max_concur plugged_delay plugsb4reset",
1018
1188
                "timeoutsb4reset ipi_reset_limit complete_threshold",
1019
1189
                "congested_response_us congested_reps congested_period",
1020
 
                max_bau_concurrent, plugged_delay, plugsb4reset,
 
1190
                max_concurr, plugged_delay, plugsb4reset,
1021
1191
                timeoutsb4reset, ipi_reset_limit, complete_threshold,
1022
 
                congested_response_us, congested_reps, congested_period);
 
1192
                congested_respns_us, congested_reps, congested_period);
1023
1193
 
1024
1194
        if (!buf)
1025
1195
                return -ENOMEM;
1030
1200
}
1031
1201
 
1032
1202
/*
1033
 
 * -1: resetf the statistics
 
1203
 * handle a write to /proc/sgi_uv/ptc_statistics
 
1204
 * -1: reset the statistics
1034
1205
 *  0: display meaning of the statistics
1035
1206
 */
1036
 
static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user,
1037
 
                                 size_t count, loff_t *data)
 
1207
static ssize_t ptc_proc_write(struct file *file, const char __user *user,
 
1208
                                size_t count, loff_t *data)
1038
1209
{
1039
1210
        int cpu;
 
1211
        int i;
 
1212
        int elements;
1040
1213
        long input_arg;
1041
1214
        char optstr[64];
1042
1215
        struct ptc_stats *stat;
1046
1219
        if (copy_from_user(optstr, user, count))
1047
1220
                return -EFAULT;
1048
1221
        optstr[count - 1] = '\0';
 
1222
 
1049
1223
        if (strict_strtol(optstr, 10, &input_arg) < 0) {
1050
1224
                printk(KERN_DEBUG "%s is invalid\n", optstr);
1051
1225
                return -EINVAL;
1052
1226
        }
1053
1227
 
1054
1228
        if (input_arg == 0) {
 
1229
                elements = sizeof(stat_description)/sizeof(*stat_description);
1055
1230
                printk(KERN_DEBUG "# cpu:      cpu number\n");
1056
1231
                printk(KERN_DEBUG "Sender statistics:\n");
1057
 
                printk(KERN_DEBUG
1058
 
                "sent:     number of shootdown messages sent\n");
1059
 
                printk(KERN_DEBUG
1060
 
                "stime:    time spent sending messages\n");
1061
 
                printk(KERN_DEBUG
1062
 
                "numuvhubs: number of hubs targeted with shootdown\n");
1063
 
                printk(KERN_DEBUG
1064
 
                "numuvhubs16: number times 16 or more hubs targeted\n");
1065
 
                printk(KERN_DEBUG
1066
 
                "numuvhubs8: number times 8 or more hubs targeted\n");
1067
 
                printk(KERN_DEBUG
1068
 
                "numuvhubs4: number times 4 or more hubs targeted\n");
1069
 
                printk(KERN_DEBUG
1070
 
                "numuvhubs2: number times 2 or more hubs targeted\n");
1071
 
                printk(KERN_DEBUG
1072
 
                "numuvhubs1: number times 1 hub targeted\n");
1073
 
                printk(KERN_DEBUG
1074
 
                "numcpus:  number of cpus targeted with shootdown\n");
1075
 
                printk(KERN_DEBUG
1076
 
                "dto:      number of destination timeouts\n");
1077
 
                printk(KERN_DEBUG
1078
 
                "retries:  destination timeout retries sent\n");
1079
 
                printk(KERN_DEBUG
1080
 
                "rok:   :  destination timeouts successfully retried\n");
1081
 
                printk(KERN_DEBUG
1082
 
                "resetp:   ipi-style resource resets for plugs\n");
1083
 
                printk(KERN_DEBUG
1084
 
                "resett:   ipi-style resource resets for timeouts\n");
1085
 
                printk(KERN_DEBUG
1086
 
                "giveup:   fall-backs to ipi-style shootdowns\n");
1087
 
                printk(KERN_DEBUG
1088
 
                "sto:      number of source timeouts\n");
1089
 
                printk(KERN_DEBUG
1090
 
                "bz:       number of stay-busy's\n");
1091
 
                printk(KERN_DEBUG
1092
 
                "throt:    number times spun in throttle\n");
1093
 
                printk(KERN_DEBUG "Destination side statistics:\n");
1094
 
                printk(KERN_DEBUG
1095
 
                "sw_ack:   image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE\n");
1096
 
                printk(KERN_DEBUG
1097
 
                "recv:     shootdown messages received\n");
1098
 
                printk(KERN_DEBUG
1099
 
                "rtime:    time spent processing messages\n");
1100
 
                printk(KERN_DEBUG
1101
 
                "all:      shootdown all-tlb messages\n");
1102
 
                printk(KERN_DEBUG
1103
 
                "one:      shootdown one-tlb messages\n");
1104
 
                printk(KERN_DEBUG
1105
 
                "mult:     interrupts that found multiple messages\n");
1106
 
                printk(KERN_DEBUG
1107
 
                "none:     interrupts that found no messages\n");
1108
 
                printk(KERN_DEBUG
1109
 
                "retry:    number of retry messages processed\n");
1110
 
                printk(KERN_DEBUG
1111
 
                "canc:     number messages canceled by retries\n");
1112
 
                printk(KERN_DEBUG
1113
 
                "nocan:    number retries that found nothing to cancel\n");
1114
 
                printk(KERN_DEBUG
1115
 
                "reset:    number of ipi-style reset requests processed\n");
1116
 
                printk(KERN_DEBUG
1117
 
                "rcan:     number messages canceled by reset requests\n");
1118
 
                printk(KERN_DEBUG
1119
 
                "disable:  number times use of the BAU was disabled\n");
1120
 
                printk(KERN_DEBUG
1121
 
                "enable:   number times use of the BAU was re-enabled\n");
 
1232
                for (i = 0; i < elements; i++)
 
1233
                        printk(KERN_DEBUG "%s\n", stat_description[i]);
1122
1234
        } else if (input_arg == -1) {
1123
1235
                for_each_present_cpu(cpu) {
1124
1236
                        stat = &per_cpu(ptcstats, cpu);
1145
1257
}
1146
1258
 
1147
1259
/*
1148
 
 * set the tunables
1149
 
 * 0 values reset them to defaults
 
1260
 * Parse the values written to /sys/kernel/debug/sgi_uv/bau_tunables.
 
1261
 * Zero values reset them to defaults.
1150
1262
 */
1151
 
static ssize_t tunables_write(struct file *file, const char __user *user,
1152
 
                                 size_t count, loff_t *data)
 
1263
static int parse_tunables_write(struct bau_control *bcp, char *instr,
 
1264
                                int count)
1153
1265
{
1154
 
        int cpu;
1155
 
        int cnt = 0;
1156
 
        int val;
1157
1266
        char *p;
1158
1267
        char *q;
1159
 
        char instr[64];
1160
 
        struct bau_control *bcp;
1161
 
 
1162
 
        if (count == 0 || count > sizeof(instr)-1)
1163
 
                return -EINVAL;
1164
 
        if (copy_from_user(instr, user, count))
1165
 
                return -EFAULT;
1166
 
 
1167
 
        instr[count] = '\0';
1168
 
        /* count the fields */
 
1268
        int cnt = 0;
 
1269
        int val;
 
1270
        int e = sizeof(tunables) / sizeof(*tunables);
 
1271
 
1169
1272
        p = instr + strspn(instr, WHITESPACE);
1170
1273
        q = p;
1171
1274
        for (; *p; p = q + strspn(q, WHITESPACE)) {
1174
1277
                if (q == p)
1175
1278
                        break;
1176
1279
        }
1177
 
        if (cnt != 9) {
1178
 
                printk(KERN_INFO "bau tunable error: should be 9 numbers\n");
 
1280
        if (cnt != e) {
 
1281
                printk(KERN_INFO "bau tunable error: should be %d values\n", e);
1179
1282
                return -EINVAL;
1180
1283
        }
1181
1284
 
1187
1290
                switch (cnt) {
1188
1291
                case 0:
1189
1292
                        if (val == 0) {
1190
 
                                max_bau_concurrent = MAX_BAU_CONCURRENT;
1191
 
                                max_bau_concurrent_constant =
1192
 
                                                        MAX_BAU_CONCURRENT;
 
1293
                                max_concurr = MAX_BAU_CONCURRENT;
 
1294
                                max_concurr_const = MAX_BAU_CONCURRENT;
1193
1295
                                continue;
1194
1296
                        }
1195
 
                        bcp = &per_cpu(bau_control, smp_processor_id());
1196
1297
                        if (val < 1 || val > bcp->cpus_in_uvhub) {
1197
1298
                                printk(KERN_DEBUG
1198
1299
                                "Error: BAU max concurrent %d is invalid\n",
1199
1300
                                val);
1200
1301
                                return -EINVAL;
1201
1302
                        }
1202
 
                        max_bau_concurrent = val;
1203
 
                        max_bau_concurrent_constant = val;
1204
 
                        continue;
1205
 
                case 1:
1206
 
                        if (val == 0)
1207
 
                                plugged_delay = PLUGGED_DELAY;
1208
 
                        else
1209
 
                                plugged_delay = val;
1210
 
                        continue;
1211
 
                case 2:
1212
 
                        if (val == 0)
1213
 
                                plugsb4reset = PLUGSB4RESET;
1214
 
                        else
1215
 
                                plugsb4reset = val;
1216
 
                        continue;
1217
 
                case 3:
1218
 
                        if (val == 0)
1219
 
                                timeoutsb4reset = TIMEOUTSB4RESET;
1220
 
                        else
1221
 
                                timeoutsb4reset = val;
1222
 
                        continue;
1223
 
                case 4:
1224
 
                        if (val == 0)
1225
 
                                ipi_reset_limit = IPI_RESET_LIMIT;
1226
 
                        else
1227
 
                                ipi_reset_limit = val;
1228
 
                        continue;
1229
 
                case 5:
1230
 
                        if (val == 0)
1231
 
                                complete_threshold = COMPLETE_THRESHOLD;
1232
 
                        else
1233
 
                                complete_threshold = val;
1234
 
                        continue;
1235
 
                case 6:
1236
 
                        if (val == 0)
1237
 
                                congested_response_us = CONGESTED_RESPONSE_US;
1238
 
                        else
1239
 
                                congested_response_us = val;
1240
 
                        continue;
1241
 
                case 7:
1242
 
                        if (val == 0)
1243
 
                                congested_reps = CONGESTED_REPS;
1244
 
                        else
1245
 
                                congested_reps = val;
1246
 
                        continue;
1247
 
                case 8:
1248
 
                        if (val == 0)
1249
 
                                congested_period = CONGESTED_PERIOD;
1250
 
                        else
1251
 
                                congested_period = val;
 
1303
                        max_concurr = val;
 
1304
                        max_concurr_const = val;
 
1305
                        continue;
 
1306
                default:
 
1307
                        if (val == 0)
 
1308
                                *tunables[cnt].tunp = tunables[cnt].deflt;
 
1309
                        else
 
1310
                                *tunables[cnt].tunp = val;
1252
1311
                        continue;
1253
1312
                }
1254
1313
                if (q == p)
1255
1314
                        break;
1256
1315
        }
 
1316
        return 0;
 
1317
}
 
1318
 
 
1319
/*
 
1320
 * Handle a write to debugfs. (/sys/kernel/debug/sgi_uv/bau_tunables)
 
1321
 */
 
1322
static ssize_t tunables_write(struct file *file, const char __user *user,
 
1323
                                size_t count, loff_t *data)
 
1324
{
 
1325
        int cpu;
 
1326
        int ret;
 
1327
        char instr[100];
 
1328
        struct bau_control *bcp;
 
1329
 
 
1330
        if (count == 0 || count > sizeof(instr)-1)
 
1331
                return -EINVAL;
 
1332
        if (copy_from_user(instr, user, count))
 
1333
                return -EFAULT;
 
1334
 
 
1335
        instr[count] = '\0';
 
1336
 
 
1337
        bcp = &per_cpu(bau_control, smp_processor_id());
 
1338
 
 
1339
        ret = parse_tunables_write(bcp, instr, count);
 
1340
        if (ret)
 
1341
                return ret;
 
1342
 
1257
1343
        for_each_present_cpu(cpu) {
1258
1344
                bcp = &per_cpu(bau_control, cpu);
1259
 
                bcp->max_bau_concurrent = max_bau_concurrent;
1260
 
                bcp->max_bau_concurrent_constant = max_bau_concurrent;
1261
 
                bcp->plugged_delay = plugged_delay;
1262
 
                bcp->plugsb4reset = plugsb4reset;
1263
 
                bcp->timeoutsb4reset = timeoutsb4reset;
1264
 
                bcp->ipi_reset_limit = ipi_reset_limit;
1265
 
                bcp->complete_threshold = complete_threshold;
1266
 
                bcp->congested_response_us = congested_response_us;
1267
 
                bcp->congested_reps = congested_reps;
1268
 
                bcp->congested_period = congested_period;
 
1345
                bcp->max_concurr =              max_concurr;
 
1346
                bcp->max_concurr_const =        max_concurr;
 
1347
                bcp->plugged_delay =            plugged_delay;
 
1348
                bcp->plugsb4reset =             plugsb4reset;
 
1349
                bcp->timeoutsb4reset =          timeoutsb4reset;
 
1350
                bcp->ipi_reset_limit =          ipi_reset_limit;
 
1351
                bcp->complete_threshold =       complete_threshold;
 
1352
                bcp->cong_response_us =         congested_respns_us;
 
1353
                bcp->cong_reps =                congested_reps;
 
1354
                bcp->cong_period =              congested_period;
1269
1355
        }
1270
1356
        return count;
1271
1357
}
1272
1358
 
1273
1359
static const struct seq_operations uv_ptc_seq_ops = {
1274
 
        .start          = uv_ptc_seq_start,
1275
 
        .next           = uv_ptc_seq_next,
1276
 
        .stop           = uv_ptc_seq_stop,
1277
 
        .show           = uv_ptc_seq_show
 
1360
        .start          = ptc_seq_start,
 
1361
        .next           = ptc_seq_next,
 
1362
        .stop           = ptc_seq_stop,
 
1363
        .show           = ptc_seq_show
1278
1364
};
1279
1365
 
1280
 
static int uv_ptc_proc_open(struct inode *inode, struct file *file)
 
1366
static int ptc_proc_open(struct inode *inode, struct file *file)
1281
1367
{
1282
1368
        return seq_open(file, &uv_ptc_seq_ops);
1283
1369
}
1288
1374
}
1289
1375
 
1290
1376
static const struct file_operations proc_uv_ptc_operations = {
1291
 
        .open           = uv_ptc_proc_open,
 
1377
        .open           = ptc_proc_open,
1292
1378
        .read           = seq_read,
1293
 
        .write          = uv_ptc_proc_write,
 
1379
        .write          = ptc_proc_write,
1294
1380
        .llseek         = seq_lseek,
1295
1381
        .release        = seq_release,
1296
1382
};
1324
1410
                return -EINVAL;
1325
1411
        }
1326
1412
        tunables_file = debugfs_create_file(UV_BAU_TUNABLES_FILE, 0600,
1327
 
                        tunables_dir, NULL, &tunables_fops);
 
1413
                                        tunables_dir, NULL, &tunables_fops);
1328
1414
        if (!tunables_file) {
1329
1415
                printk(KERN_ERR "unable to create debugfs file %s\n",
1330
1416
                       UV_BAU_TUNABLES_FILE);
1336
1422
/*
1337
1423
 * Initialize the sending side's sending buffers.
1338
1424
 */
1339
 
static void
1340
 
uv_activation_descriptor_init(int node, int pnode, int base_pnode)
 
1425
static void activation_descriptor_init(int node, int pnode, int base_pnode)
1341
1426
{
1342
1427
        int i;
1343
1428
        int cpu;
1344
1429
        unsigned long pa;
1345
1430
        unsigned long m;
1346
1431
        unsigned long n;
 
1432
        size_t dsize;
1347
1433
        struct bau_desc *bau_desc;
1348
1434
        struct bau_desc *bd2;
1349
1435
        struct bau_control *bcp;
1350
1436
 
1351
1437
        /*
1352
 
         * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR)
1353
 
         * per cpu; and one per cpu on the uvhub (UV_ADP_SIZE)
 
1438
         * each bau_desc is 64 bytes; there are 8 (ITEMS_PER_DESC)
 
1439
         * per cpu; and one per cpu on the uvhub (ADP_SZ)
1354
1440
         */
1355
 
        bau_desc = kmalloc_node(sizeof(struct bau_desc) * UV_ADP_SIZE
1356
 
                                * UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node);
 
1441
        dsize = sizeof(struct bau_desc) * ADP_SZ * ITEMS_PER_DESC;
 
1442
        bau_desc = kmalloc_node(dsize, GFP_KERNEL, node);
1357
1443
        BUG_ON(!bau_desc);
1358
1444
 
1359
1445
        pa = uv_gpa(bau_desc); /* need the real nasid*/
1361
1447
        m = pa & uv_mmask;
1362
1448
 
1363
1449
        /* the 14-bit pnode */
1364
 
        uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE,
1365
 
                              (n << UV_DESC_BASE_PNODE_SHIFT | m));
 
1450
        write_mmr_descriptor_base(pnode, (n << UV_DESC_PSHIFT | m));
1366
1451
        /*
1367
 
         * Initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each
 
1452
         * Initializing all 8 (ITEMS_PER_DESC) descriptors for each
1368
1453
         * cpu even though we only use the first one; one descriptor can
1369
1454
         * describe a broadcast to 256 uv hubs.
1370
1455
         */
1371
 
        for (i = 0, bd2 = bau_desc; i < (UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR);
1372
 
                i++, bd2++) {
 
1456
        for (i = 0, bd2 = bau_desc; i < (ADP_SZ * ITEMS_PER_DESC); i++, bd2++) {
1373
1457
                memset(bd2, 0, sizeof(struct bau_desc));
1374
 
                bd2->header.sw_ack_flag = 1;
 
1458
                bd2->header.swack_flag =        1;
1375
1459
                /*
1376
1460
                 * The base_dest_nasid set in the message header is the nasid
1377
1461
                 * of the first uvhub in the partition. The bit map will
1378
1462
                 * indicate destination pnode numbers relative to that base.
1379
1463
                 * They may not be consecutive if nasid striding is being used.
1380
1464
                 */
1381
 
                bd2->header.base_dest_nasid = UV_PNODE_TO_NASID(base_pnode);
1382
 
                bd2->header.dest_subnodeid = UV_LB_SUBNODEID;
1383
 
                bd2->header.command = UV_NET_ENDPOINT_INTD;
1384
 
                bd2->header.int_both = 1;
 
1465
                bd2->header.base_dest_nasid =   UV_PNODE_TO_NASID(base_pnode);
 
1466
                bd2->header.dest_subnodeid =    UV_LB_SUBNODEID;
 
1467
                bd2->header.command =           UV_NET_ENDPOINT_INTD;
 
1468
                bd2->header.int_both =          1;
1385
1469
                /*
1386
1470
                 * all others need to be set to zero:
1387
1471
                 *   fairness chaining multilevel count replied_to
1401
1485
 * - node is first node (kernel memory notion) on the uvhub
1402
1486
 * - pnode is the uvhub's physical identifier
1403
1487
 */
1404
 
static void
1405
 
uv_payload_queue_init(int node, int pnode)
 
1488
static void pq_init(int node, int pnode)
1406
1489
{
1407
 
        int pn;
1408
1490
        int cpu;
 
1491
        size_t plsize;
1409
1492
        char *cp;
1410
 
        unsigned long pa;
1411
 
        struct bau_payload_queue_entry *pqp;
1412
 
        struct bau_payload_queue_entry *pqp_malloc;
 
1493
        void *vp;
 
1494
        unsigned long pn;
 
1495
        unsigned long first;
 
1496
        unsigned long pn_first;
 
1497
        unsigned long last;
 
1498
        struct bau_pq_entry *pqp;
1413
1499
        struct bau_control *bcp;
1414
1500
 
1415
 
        pqp = kmalloc_node((DEST_Q_SIZE + 1)
1416
 
                           * sizeof(struct bau_payload_queue_entry),
1417
 
                           GFP_KERNEL, node);
 
1501
        plsize = (DEST_Q_SIZE + 1) * sizeof(struct bau_pq_entry);
 
1502
        vp = kmalloc_node(plsize, GFP_KERNEL, node);
 
1503
        pqp = (struct bau_pq_entry *)vp;
1418
1504
        BUG_ON(!pqp);
1419
 
        pqp_malloc = pqp;
1420
1505
 
1421
1506
        cp = (char *)pqp + 31;
1422
 
        pqp = (struct bau_payload_queue_entry *)(((unsigned long)cp >> 5) << 5);
 
1507
        pqp = (struct bau_pq_entry *)(((unsigned long)cp >> 5) << 5);
1423
1508
 
1424
1509
        for_each_present_cpu(cpu) {
1425
1510
                if (pnode != uv_cpu_to_pnode(cpu))
1426
1511
                        continue;
1427
1512
                /* for every cpu on this pnode: */
1428
1513
                bcp = &per_cpu(bau_control, cpu);
1429
 
                bcp->va_queue_first = pqp;
1430
 
                bcp->bau_msg_head = pqp;
1431
 
                bcp->va_queue_last = pqp + (DEST_Q_SIZE - 1);
 
1514
                bcp->queue_first        = pqp;
 
1515
                bcp->bau_msg_head       = pqp;
 
1516
                bcp->queue_last         = pqp + (DEST_Q_SIZE - 1);
1432
1517
        }
1433
1518
        /*
1434
1519
         * need the pnode of where the memory was really allocated
1435
1520
         */
1436
 
        pa = uv_gpa(pqp);
1437
 
        pn = pa >> uv_nshift;
1438
 
        uv_write_global_mmr64(pnode,
1439
 
                              UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST,
1440
 
                              ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) |
1441
 
                              uv_physnodeaddr(pqp));
1442
 
        uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL,
1443
 
                              uv_physnodeaddr(pqp));
1444
 
        uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST,
1445
 
                              (unsigned long)
1446
 
                              uv_physnodeaddr(pqp + (DEST_Q_SIZE - 1)));
 
1521
        pn = uv_gpa(pqp) >> uv_nshift;
 
1522
        first = uv_physnodeaddr(pqp);
 
1523
        pn_first = ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | first;
 
1524
        last = uv_physnodeaddr(pqp + (DEST_Q_SIZE - 1));
 
1525
        write_mmr_payload_first(pnode, pn_first);
 
1526
        write_mmr_payload_tail(pnode, first);
 
1527
        write_mmr_payload_last(pnode, last);
 
1528
 
1447
1529
        /* in effect, all msg_type's are set to MSG_NOOP */
1448
 
        memset(pqp, 0, sizeof(struct bau_payload_queue_entry) * DEST_Q_SIZE);
 
1530
        memset(pqp, 0, sizeof(struct bau_pq_entry) * DEST_Q_SIZE);
1449
1531
}
1450
1532
 
1451
1533
/*
1452
1534
 * Initialization of each UV hub's structures
1453
1535
 */
1454
 
static void __init uv_init_uvhub(int uvhub, int vector, int base_pnode)
 
1536
static void __init init_uvhub(int uvhub, int vector, int base_pnode)
1455
1537
{
1456
1538
        int node;
1457
1539
        int pnode;
1459
1541
 
1460
1542
        node = uvhub_to_first_node(uvhub);
1461
1543
        pnode = uv_blade_to_pnode(uvhub);
1462
 
        uv_activation_descriptor_init(node, pnode, base_pnode);
1463
 
        uv_payload_queue_init(node, pnode);
 
1544
 
 
1545
        activation_descriptor_init(node, pnode, base_pnode);
 
1546
 
 
1547
        pq_init(node, pnode);
1464
1548
        /*
1465
1549
         * The below initialization can't be in firmware because the
1466
1550
         * messaging IRQ will be determined by the OS.
1467
1551
         */
1468
1552
        apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits;
1469
 
        uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
1470
 
                                      ((apicid << 32) | vector));
 
1553
        write_mmr_data_config(pnode, ((apicid << 32) | vector));
1471
1554
}
1472
1555
 
1473
1556
/*
1474
1557
 * We will set BAU_MISC_CONTROL with a timeout period.
1475
1558
 * But the BIOS has set UVH_AGING_PRESCALE_SEL and UVH_TRANSACTION_TIMEOUT.
1476
 
 * So the destination timeout period has be be calculated from them.
 
1559
 * So the destination timeout period has to be calculated from them.
1477
1560
 */
1478
 
static int
1479
 
calculate_destination_timeout(void)
 
1561
static int calculate_destination_timeout(void)
1480
1562
{
1481
1563
        unsigned long mmr_image;
1482
1564
        int mult1;
1486
1568
        int ret;
1487
1569
        unsigned long ts_ns;
1488
1570
 
1489
 
        mult1 = UV_INTD_SOFT_ACK_TIMEOUT_PERIOD & BAU_MISC_CONTROL_MULT_MASK;
1490
 
        mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL);
1491
 
        index = (mmr_image >> BAU_URGENCY_7_SHIFT) & BAU_URGENCY_7_MASK;
1492
 
        mmr_image = uv_read_local_mmr(UVH_TRANSACTION_TIMEOUT);
1493
 
        mult2 = (mmr_image >> BAU_TRANS_SHIFT) & BAU_TRANS_MASK;
1494
 
        base = timeout_base_ns[index];
1495
 
        ts_ns = base * mult1 * mult2;
1496
 
        ret = ts_ns / 1000;
 
1571
        if (is_uv1_hub()) {
 
1572
                mult1 = SOFTACK_TIMEOUT_PERIOD & BAU_MISC_CONTROL_MULT_MASK;
 
1573
                mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL);
 
1574
                index = (mmr_image >> BAU_URGENCY_7_SHIFT) & BAU_URGENCY_7_MASK;
 
1575
                mmr_image = uv_read_local_mmr(UVH_TRANSACTION_TIMEOUT);
 
1576
                mult2 = (mmr_image >> BAU_TRANS_SHIFT) & BAU_TRANS_MASK;
 
1577
                base = timeout_base_ns[index];
 
1578
                ts_ns = base * mult1 * mult2;
 
1579
                ret = ts_ns / 1000;
 
1580
        } else {
 
1581
                /* 4 bits  0/1 for 10/80us, 3 bits of multiplier */
 
1582
                mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL);
 
1583
                mmr_image = (mmr_image & UV_SA_MASK) >> UV_SA_SHFT;
 
1584
                if (mmr_image & (1L << UV2_ACK_UNITS_SHFT))
 
1585
                        mult1 = 80;
 
1586
                else
 
1587
                        mult1 = 10;
 
1588
                base = mmr_image & UV2_ACK_MASK;
 
1589
                ret = mult1 * base;
 
1590
        }
1497
1591
        return ret;
1498
1592
}
1499
1593
 
 
1594
static void __init init_per_cpu_tunables(void)
 
1595
{
 
1596
        int cpu;
 
1597
        struct bau_control *bcp;
 
1598
 
 
1599
        for_each_present_cpu(cpu) {
 
1600
                bcp = &per_cpu(bau_control, cpu);
 
1601
                bcp->baudisabled                = 0;
 
1602
                bcp->statp                      = &per_cpu(ptcstats, cpu);
 
1603
                /* time interval to catch a hardware stay-busy bug */
 
1604
                bcp->timeout_interval           = usec_2_cycles(2*timeout_us);
 
1605
                bcp->max_concurr                = max_concurr;
 
1606
                bcp->max_concurr_const          = max_concurr;
 
1607
                bcp->plugged_delay              = plugged_delay;
 
1608
                bcp->plugsb4reset               = plugsb4reset;
 
1609
                bcp->timeoutsb4reset            = timeoutsb4reset;
 
1610
                bcp->ipi_reset_limit            = ipi_reset_limit;
 
1611
                bcp->complete_threshold         = complete_threshold;
 
1612
                bcp->cong_response_us           = congested_respns_us;
 
1613
                bcp->cong_reps                  = congested_reps;
 
1614
                bcp->cong_period                = congested_period;
 
1615
        }
 
1616
}
 
1617
 
1500
1618
/*
1501
 
 * initialize the bau_control structure for each cpu
 
1619
 * Scan all cpus to collect blade and socket summaries.
1502
1620
 */
1503
 
static int __init uv_init_per_cpu(int nuvhubs, int base_part_pnode)
 
1621
static int __init get_cpu_topology(int base_pnode,
 
1622
                                        struct uvhub_desc *uvhub_descs,
 
1623
                                        unsigned char *uvhub_mask)
1504
1624
{
1505
 
        int i;
1506
1625
        int cpu;
1507
 
        int tcpu;
1508
1626
        int pnode;
1509
1627
        int uvhub;
1510
 
        int have_hmaster;
1511
 
        short socket = 0;
1512
 
        unsigned short socket_mask;
1513
 
        unsigned char *uvhub_mask;
 
1628
        int socket;
1514
1629
        struct bau_control *bcp;
1515
1630
        struct uvhub_desc *bdp;
1516
1631
        struct socket_desc *sdp;
1517
 
        struct bau_control *hmaster = NULL;
1518
 
        struct bau_control *smaster = NULL;
1519
 
        struct socket_desc {
1520
 
                short num_cpus;
1521
 
                short cpu_number[MAX_CPUS_PER_SOCKET];
1522
 
        };
1523
 
        struct uvhub_desc {
1524
 
                unsigned short socket_mask;
1525
 
                short num_cpus;
1526
 
                short uvhub;
1527
 
                short pnode;
1528
 
                struct socket_desc socket[2];
1529
 
        };
1530
 
        struct uvhub_desc *uvhub_descs;
1531
 
 
1532
 
        timeout_us = calculate_destination_timeout();
1533
 
 
1534
 
        uvhub_descs = kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL);
1535
 
        memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc));
1536
 
        uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL);
 
1632
 
1537
1633
        for_each_present_cpu(cpu) {
1538
1634
                bcp = &per_cpu(bau_control, cpu);
 
1635
 
1539
1636
                memset(bcp, 0, sizeof(struct bau_control));
 
1637
 
1540
1638
                pnode = uv_cpu_hub_info(cpu)->pnode;
1541
 
                if ((pnode - base_part_pnode) >= UV_DISTRIBUTION_SIZE) {
 
1639
                if ((pnode - base_pnode) >= UV_DISTRIBUTION_SIZE) {
1542
1640
                        printk(KERN_EMERG
1543
1641
                                "cpu %d pnode %d-%d beyond %d; BAU disabled\n",
1544
 
                                cpu, pnode, base_part_pnode,
1545
 
                                UV_DISTRIBUTION_SIZE);
 
1642
                                cpu, pnode, base_pnode, UV_DISTRIBUTION_SIZE);
1546
1643
                        return 1;
1547
1644
                }
 
1645
 
1548
1646
                bcp->osnode = cpu_to_node(cpu);
1549
 
                bcp->partition_base_pnode = uv_partition_base_pnode;
 
1647
                bcp->partition_base_pnode = base_pnode;
 
1648
 
1550
1649
                uvhub = uv_cpu_hub_info(cpu)->numa_blade_id;
1551
1650
                *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8));
1552
1651
                bdp = &uvhub_descs[uvhub];
 
1652
 
1553
1653
                bdp->num_cpus++;
1554
1654
                bdp->uvhub = uvhub;
1555
1655
                bdp->pnode = pnode;
 
1656
 
1556
1657
                /* kludge: 'assuming' one node per socket, and assuming that
1557
1658
                   disabling a socket just leaves a gap in node numbers */
1558
1659
                socket = bcp->osnode & 1;
1561
1662
                sdp->cpu_number[sdp->num_cpus] = cpu;
1562
1663
                sdp->num_cpus++;
1563
1664
                if (sdp->num_cpus > MAX_CPUS_PER_SOCKET) {
1564
 
                        printk(KERN_EMERG "%d cpus per socket invalid\n", sdp->num_cpus);
1565
 
                        return 1;
1566
 
                }
1567
 
        }
 
1665
                        printk(KERN_EMERG "%d cpus per socket invalid\n",
 
1666
                                sdp->num_cpus);
 
1667
                        return 1;
 
1668
                }
 
1669
        }
 
1670
        return 0;
 
1671
}
 
1672
 
 
1673
/*
 
1674
 * Each socket is to get a local array of pnodes/hubs.
 
1675
 */
 
1676
static void make_per_cpu_thp(struct bau_control *smaster)
 
1677
{
 
1678
        int cpu;
 
1679
        size_t hpsz = sizeof(struct hub_and_pnode) * num_possible_cpus();
 
1680
 
 
1681
        smaster->thp = kmalloc_node(hpsz, GFP_KERNEL, smaster->osnode);
 
1682
        memset(smaster->thp, 0, hpsz);
 
1683
        for_each_present_cpu(cpu) {
 
1684
                smaster->thp[cpu].pnode = uv_cpu_hub_info(cpu)->pnode;
 
1685
                smaster->thp[cpu].uvhub = uv_cpu_hub_info(cpu)->numa_blade_id;
 
1686
        }
 
1687
}
 
1688
 
 
1689
/*
 
1690
 * Initialize all the per_cpu information for the cpu's on a given socket,
 
1691
 * given what has been gathered into the socket_desc struct.
 
1692
 * And reports the chosen hub and socket masters back to the caller.
 
1693
 */
 
1694
static int scan_sock(struct socket_desc *sdp, struct uvhub_desc *bdp,
 
1695
                        struct bau_control **smasterp,
 
1696
                        struct bau_control **hmasterp)
 
1697
{
 
1698
        int i;
 
1699
        int cpu;
 
1700
        struct bau_control *bcp;
 
1701
 
 
1702
        for (i = 0; i < sdp->num_cpus; i++) {
 
1703
                cpu = sdp->cpu_number[i];
 
1704
                bcp = &per_cpu(bau_control, cpu);
 
1705
                bcp->cpu = cpu;
 
1706
                if (i == 0) {
 
1707
                        *smasterp = bcp;
 
1708
                        if (!(*hmasterp))
 
1709
                                *hmasterp = bcp;
 
1710
                }
 
1711
                bcp->cpus_in_uvhub = bdp->num_cpus;
 
1712
                bcp->cpus_in_socket = sdp->num_cpus;
 
1713
                bcp->socket_master = *smasterp;
 
1714
                bcp->uvhub = bdp->uvhub;
 
1715
                bcp->uvhub_master = *hmasterp;
 
1716
                bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->blade_processor_id;
 
1717
                if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) {
 
1718
                        printk(KERN_EMERG "%d cpus per uvhub invalid\n",
 
1719
                                bcp->uvhub_cpu);
 
1720
                        return 1;
 
1721
                }
 
1722
        }
 
1723
        return 0;
 
1724
}
 
1725
 
 
1726
/*
 
1727
 * Summarize the blade and socket topology into the per_cpu structures.
 
1728
 */
 
1729
static int __init summarize_uvhub_sockets(int nuvhubs,
 
1730
                        struct uvhub_desc *uvhub_descs,
 
1731
                        unsigned char *uvhub_mask)
 
1732
{
 
1733
        int socket;
 
1734
        int uvhub;
 
1735
        unsigned short socket_mask;
 
1736
 
1568
1737
        for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
 
1738
                struct uvhub_desc *bdp;
 
1739
                struct bau_control *smaster = NULL;
 
1740
                struct bau_control *hmaster = NULL;
 
1741
 
1569
1742
                if (!(*(uvhub_mask + (uvhub/8)) & (1 << (uvhub%8))))
1570
1743
                        continue;
1571
 
                have_hmaster = 0;
 
1744
 
1572
1745
                bdp = &uvhub_descs[uvhub];
1573
1746
                socket_mask = bdp->socket_mask;
1574
1747
                socket = 0;
1575
1748
                while (socket_mask) {
1576
 
                        if (!(socket_mask & 1))
1577
 
                                goto nextsocket;
1578
 
                        sdp = &bdp->socket[socket];
1579
 
                        for (i = 0; i < sdp->num_cpus; i++) {
1580
 
                                cpu = sdp->cpu_number[i];
1581
 
                                bcp = &per_cpu(bau_control, cpu);
1582
 
                                bcp->cpu = cpu;
1583
 
                                if (i == 0) {
1584
 
                                        smaster = bcp;
1585
 
                                        if (!have_hmaster) {
1586
 
                                                have_hmaster++;
1587
 
                                                hmaster = bcp;
1588
 
                                        }
1589
 
                                }
1590
 
                                bcp->cpus_in_uvhub = bdp->num_cpus;
1591
 
                                bcp->cpus_in_socket = sdp->num_cpus;
1592
 
                                bcp->socket_master = smaster;
1593
 
                                bcp->uvhub = bdp->uvhub;
1594
 
                                bcp->uvhub_master = hmaster;
1595
 
                                bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->
1596
 
                                                blade_processor_id;
1597
 
                                if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) {
1598
 
                                        printk(KERN_EMERG
1599
 
                                                "%d cpus per uvhub invalid\n",
1600
 
                                                bcp->uvhub_cpu);
 
1749
                        struct socket_desc *sdp;
 
1750
                        if ((socket_mask & 1)) {
 
1751
                                sdp = &bdp->socket[socket];
 
1752
                                if (scan_sock(sdp, bdp, &smaster, &hmaster))
1601
1753
                                        return 1;
1602
 
                                }
1603
1754
                        }
1604
 
nextsocket:
1605
1755
                        socket++;
1606
1756
                        socket_mask = (socket_mask >> 1);
1607
 
                        /* each socket gets a local array of pnodes/hubs */
1608
 
                        bcp = smaster;
1609
 
                        bcp->target_hub_and_pnode = kmalloc_node(
1610
 
                                sizeof(struct hub_and_pnode) *
1611
 
                                num_possible_cpus(), GFP_KERNEL, bcp->osnode);
1612
 
                        memset(bcp->target_hub_and_pnode, 0,
1613
 
                                sizeof(struct hub_and_pnode) *
1614
 
                                num_possible_cpus());
1615
 
                        for_each_present_cpu(tcpu) {
1616
 
                                bcp->target_hub_and_pnode[tcpu].pnode =
1617
 
                                        uv_cpu_hub_info(tcpu)->pnode;
1618
 
                                bcp->target_hub_and_pnode[tcpu].uvhub =
1619
 
                                        uv_cpu_hub_info(tcpu)->numa_blade_id;
1620
 
                        }
 
1757
                        make_per_cpu_thp(smaster);
1621
1758
                }
1622
1759
        }
 
1760
        return 0;
 
1761
}
 
1762
 
 
1763
/*
 
1764
 * initialize the bau_control structure for each cpu
 
1765
 */
 
1766
static int __init init_per_cpu(int nuvhubs, int base_part_pnode)
 
1767
{
 
1768
        unsigned char *uvhub_mask;
 
1769
        void *vp;
 
1770
        struct uvhub_desc *uvhub_descs;
 
1771
 
 
1772
        timeout_us = calculate_destination_timeout();
 
1773
 
 
1774
        vp = kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL);
 
1775
        uvhub_descs = (struct uvhub_desc *)vp;
 
1776
        memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc));
 
1777
        uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL);
 
1778
 
 
1779
        if (get_cpu_topology(base_part_pnode, uvhub_descs, uvhub_mask))
 
1780
                return 1;
 
1781
 
 
1782
        if (summarize_uvhub_sockets(nuvhubs, uvhub_descs, uvhub_mask))
 
1783
                return 1;
 
1784
 
1623
1785
        kfree(uvhub_descs);
1624
1786
        kfree(uvhub_mask);
1625
 
        for_each_present_cpu(cpu) {
1626
 
                bcp = &per_cpu(bau_control, cpu);
1627
 
                bcp->baudisabled = 0;
1628
 
                bcp->statp = &per_cpu(ptcstats, cpu);
1629
 
                /* time interval to catch a hardware stay-busy bug */
1630
 
                bcp->timeout_interval = microsec_2_cycles(2*timeout_us);
1631
 
                bcp->max_bau_concurrent = max_bau_concurrent;
1632
 
                bcp->max_bau_concurrent_constant = max_bau_concurrent;
1633
 
                bcp->plugged_delay = plugged_delay;
1634
 
                bcp->plugsb4reset = plugsb4reset;
1635
 
                bcp->timeoutsb4reset = timeoutsb4reset;
1636
 
                bcp->ipi_reset_limit = ipi_reset_limit;
1637
 
                bcp->complete_threshold = complete_threshold;
1638
 
                bcp->congested_response_us = congested_response_us;
1639
 
                bcp->congested_reps = congested_reps;
1640
 
                bcp->congested_period = congested_period;
1641
 
        }
 
1787
        init_per_cpu_tunables();
1642
1788
        return 0;
1643
1789
}
1644
1790
 
1651
1797
        int pnode;
1652
1798
        int nuvhubs;
1653
1799
        int cur_cpu;
 
1800
        int cpus;
1654
1801
        int vector;
1655
 
        unsigned long mmr;
 
1802
        cpumask_var_t *mask;
1656
1803
 
1657
1804
        if (!is_uv_system())
1658
1805
                return 0;
1660
1807
        if (nobau)
1661
1808
                return 0;
1662
1809
 
1663
 
        for_each_possible_cpu(cur_cpu)
1664
 
                zalloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu),
1665
 
                                       GFP_KERNEL, cpu_to_node(cur_cpu));
 
1810
        for_each_possible_cpu(cur_cpu) {
 
1811
                mask = &per_cpu(uv_flush_tlb_mask, cur_cpu);
 
1812
                zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cur_cpu));
 
1813
        }
1666
1814
 
1667
1815
        uv_nshift = uv_hub_info->m_val;
1668
1816
        uv_mmask = (1UL << uv_hub_info->m_val) - 1;
1669
1817
        nuvhubs = uv_num_possible_blades();
1670
1818
        spin_lock_init(&disable_lock);
1671
 
        congested_cycles = microsec_2_cycles(congested_response_us);
 
1819
        congested_cycles = usec_2_cycles(congested_respns_us);
1672
1820
 
1673
 
        uv_partition_base_pnode = 0x7fffffff;
 
1821
        uv_base_pnode = 0x7fffffff;
1674
1822
        for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
1675
 
                if (uv_blade_nr_possible_cpus(uvhub) &&
1676
 
                        (uv_blade_to_pnode(uvhub) < uv_partition_base_pnode))
1677
 
                        uv_partition_base_pnode = uv_blade_to_pnode(uvhub);
 
1823
                cpus = uv_blade_nr_possible_cpus(uvhub);
 
1824
                if (cpus && (uv_blade_to_pnode(uvhub) < uv_base_pnode))
 
1825
                        uv_base_pnode = uv_blade_to_pnode(uvhub);
1678
1826
        }
1679
1827
 
1680
 
        if (uv_init_per_cpu(nuvhubs, uv_partition_base_pnode)) {
 
1828
        if (init_per_cpu(nuvhubs, uv_base_pnode)) {
1681
1829
                nobau = 1;
1682
1830
                return 0;
1683
1831
        }
1685
1833
        vector = UV_BAU_MESSAGE;
1686
1834
        for_each_possible_blade(uvhub)
1687
1835
                if (uv_blade_nr_possible_cpus(uvhub))
1688
 
                        uv_init_uvhub(uvhub, vector, uv_partition_base_pnode);
 
1836
                        init_uvhub(uvhub, vector, uv_base_pnode);
1689
1837
 
1690
 
        uv_enable_timeouts();
 
1838
        enable_timeouts();
1691
1839
        alloc_intr_gate(vector, uv_bau_message_intr1);
1692
1840
 
1693
1841
        for_each_possible_blade(uvhub) {
1694
1842
                if (uv_blade_nr_possible_cpus(uvhub)) {
 
1843
                        unsigned long val;
 
1844
                        unsigned long mmr;
1695
1845
                        pnode = uv_blade_to_pnode(uvhub);
1696
1846
                        /* INIT the bau */
1697
 
                        uv_write_global_mmr64(pnode,
1698
 
                                        UVH_LB_BAU_SB_ACTIVATION_CONTROL,
1699
 
                                        ((unsigned long)1 << 63));
 
1847
                        val = 1L << 63;
 
1848
                        write_gmmr_activation(pnode, val);
1700
1849
                        mmr = 1; /* should be 1 to broadcast to both sockets */
1701
 
                        uv_write_global_mmr64(pnode, UVH_BAU_DATA_BROADCAST,
1702
 
                                                mmr);
 
1850
                        write_mmr_data_broadcast(pnode, mmr);
1703
1851
                }
1704
1852
        }
1705
1853