~ubuntu-branches/ubuntu/saucy/linux-n900/saucy

« back to all changes in this revision

Viewing changes to drivers/infiniband/hw/qib/qib_verbs.c

  • Committer: Bazaar Package Importer
  • Author(s): Mathieu Poirier
  • Date: 2011-02-18 09:43:31 UTC
  • Revision ID: james.westby@ubuntu.com-20110218094331-eyubsja4f9k0yhmq
Tags: 2.6.35-1.1
Initial release.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*
 
2
 * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
 
3
 * All rights reserved.
 
4
 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
 
5
 *
 
6
 * This software is available to you under a choice of one of two
 
7
 * licenses.  You may choose to be licensed under the terms of the GNU
 
8
 * General Public License (GPL) Version 2, available from the file
 
9
 * COPYING in the main directory of this source tree, or the
 
10
 * OpenIB.org BSD license below:
 
11
 *
 
12
 *     Redistribution and use in source and binary forms, with or
 
13
 *     without modification, are permitted provided that the following
 
14
 *     conditions are met:
 
15
 *
 
16
 *      - Redistributions of source code must retain the above
 
17
 *        copyright notice, this list of conditions and the following
 
18
 *        disclaimer.
 
19
 *
 
20
 *      - Redistributions in binary form must reproduce the above
 
21
 *        copyright notice, this list of conditions and the following
 
22
 *        disclaimer in the documentation and/or other materials
 
23
 *        provided with the distribution.
 
24
 *
 
25
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 
26
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 
27
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 
28
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 
29
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 
30
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 
31
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 
32
 * SOFTWARE.
 
33
 */
 
34
 
 
35
#include <rdma/ib_mad.h>
 
36
#include <rdma/ib_user_verbs.h>
 
37
#include <linux/io.h>
 
38
#include <linux/utsname.h>
 
39
#include <linux/rculist.h>
 
40
#include <linux/mm.h>
 
41
 
 
42
#include "qib.h"
 
43
#include "qib_common.h"
 
44
 
 
45
static unsigned int ib_qib_qp_table_size = 251;
 
46
module_param_named(qp_table_size, ib_qib_qp_table_size, uint, S_IRUGO);
 
47
MODULE_PARM_DESC(qp_table_size, "QP table size");
 
48
 
 
49
unsigned int ib_qib_lkey_table_size = 16;
 
50
module_param_named(lkey_table_size, ib_qib_lkey_table_size, uint,
 
51
                   S_IRUGO);
 
52
MODULE_PARM_DESC(lkey_table_size,
 
53
                 "LKEY table size in bits (2^n, 1 <= n <= 23)");
 
54
 
 
55
static unsigned int ib_qib_max_pds = 0xFFFF;
 
56
module_param_named(max_pds, ib_qib_max_pds, uint, S_IRUGO);
 
57
MODULE_PARM_DESC(max_pds,
 
58
                 "Maximum number of protection domains to support");
 
59
 
 
60
static unsigned int ib_qib_max_ahs = 0xFFFF;
 
61
module_param_named(max_ahs, ib_qib_max_ahs, uint, S_IRUGO);
 
62
MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support");
 
63
 
 
64
unsigned int ib_qib_max_cqes = 0x2FFFF;
 
65
module_param_named(max_cqes, ib_qib_max_cqes, uint, S_IRUGO);
 
66
MODULE_PARM_DESC(max_cqes,
 
67
                 "Maximum number of completion queue entries to support");
 
68
 
 
69
unsigned int ib_qib_max_cqs = 0x1FFFF;
 
70
module_param_named(max_cqs, ib_qib_max_cqs, uint, S_IRUGO);
 
71
MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support");
 
72
 
 
73
unsigned int ib_qib_max_qp_wrs = 0x3FFF;
 
74
module_param_named(max_qp_wrs, ib_qib_max_qp_wrs, uint, S_IRUGO);
 
75
MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support");
 
76
 
 
77
unsigned int ib_qib_max_qps = 16384;
 
78
module_param_named(max_qps, ib_qib_max_qps, uint, S_IRUGO);
 
79
MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support");
 
80
 
 
81
unsigned int ib_qib_max_sges = 0x60;
 
82
module_param_named(max_sges, ib_qib_max_sges, uint, S_IRUGO);
 
83
MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support");
 
84
 
 
85
unsigned int ib_qib_max_mcast_grps = 16384;
 
86
module_param_named(max_mcast_grps, ib_qib_max_mcast_grps, uint, S_IRUGO);
 
87
MODULE_PARM_DESC(max_mcast_grps,
 
88
                 "Maximum number of multicast groups to support");
 
89
 
 
90
unsigned int ib_qib_max_mcast_qp_attached = 16;
 
91
module_param_named(max_mcast_qp_attached, ib_qib_max_mcast_qp_attached,
 
92
                   uint, S_IRUGO);
 
93
MODULE_PARM_DESC(max_mcast_qp_attached,
 
94
                 "Maximum number of attached QPs to support");
 
95
 
 
96
unsigned int ib_qib_max_srqs = 1024;
 
97
module_param_named(max_srqs, ib_qib_max_srqs, uint, S_IRUGO);
 
98
MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support");
 
99
 
 
100
unsigned int ib_qib_max_srq_sges = 128;
 
101
module_param_named(max_srq_sges, ib_qib_max_srq_sges, uint, S_IRUGO);
 
102
MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support");
 
103
 
 
104
unsigned int ib_qib_max_srq_wrs = 0x1FFFF;
 
105
module_param_named(max_srq_wrs, ib_qib_max_srq_wrs, uint, S_IRUGO);
 
106
MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support");
 
107
 
 
108
static unsigned int ib_qib_disable_sma;
 
109
module_param_named(disable_sma, ib_qib_disable_sma, uint, S_IWUSR | S_IRUGO);
 
110
MODULE_PARM_DESC(disable_sma, "Disable the SMA");
 
111
 
 
112
/*
 
113
 * Note that it is OK to post send work requests in the SQE and ERR
 
114
 * states; qib_do_send() will process them and generate error
 
115
 * completions as per IB 1.2 C10-96.
 
116
 */
 
117
const int ib_qib_state_ops[IB_QPS_ERR + 1] = {
 
118
        [IB_QPS_RESET] = 0,
 
119
        [IB_QPS_INIT] = QIB_POST_RECV_OK,
 
120
        [IB_QPS_RTR] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK,
 
121
        [IB_QPS_RTS] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK |
 
122
            QIB_POST_SEND_OK | QIB_PROCESS_SEND_OK |
 
123
            QIB_PROCESS_NEXT_SEND_OK,
 
124
        [IB_QPS_SQD] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK |
 
125
            QIB_POST_SEND_OK | QIB_PROCESS_SEND_OK,
 
126
        [IB_QPS_SQE] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK |
 
127
            QIB_POST_SEND_OK | QIB_FLUSH_SEND,
 
128
        [IB_QPS_ERR] = QIB_POST_RECV_OK | QIB_FLUSH_RECV |
 
129
            QIB_POST_SEND_OK | QIB_FLUSH_SEND,
 
130
};
 
131
 
 
132
struct qib_ucontext {
 
133
        struct ib_ucontext ibucontext;
 
134
};
 
135
 
 
136
static inline struct qib_ucontext *to_iucontext(struct ib_ucontext
 
137
                                                  *ibucontext)
 
138
{
 
139
        return container_of(ibucontext, struct qib_ucontext, ibucontext);
 
140
}
 
141
 
 
142
/*
 
143
 * Translate ib_wr_opcode into ib_wc_opcode.
 
144
 */
 
145
const enum ib_wc_opcode ib_qib_wc_opcode[] = {
 
146
        [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
 
147
        [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
 
148
        [IB_WR_SEND] = IB_WC_SEND,
 
149
        [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
 
150
        [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
 
151
        [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
 
152
        [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD
 
153
};
 
154
 
 
155
/*
 
156
 * System image GUID.
 
157
 */
 
158
__be64 ib_qib_sys_image_guid;
 
159
 
 
160
/**
 
161
 * qib_copy_sge - copy data to SGE memory
 
162
 * @ss: the SGE state
 
163
 * @data: the data to copy
 
164
 * @length: the length of the data
 
165
 */
 
166
void qib_copy_sge(struct qib_sge_state *ss, void *data, u32 length, int release)
 
167
{
 
168
        struct qib_sge *sge = &ss->sge;
 
169
 
 
170
        while (length) {
 
171
                u32 len = sge->length;
 
172
 
 
173
                if (len > length)
 
174
                        len = length;
 
175
                if (len > sge->sge_length)
 
176
                        len = sge->sge_length;
 
177
                BUG_ON(len == 0);
 
178
                memcpy(sge->vaddr, data, len);
 
179
                sge->vaddr += len;
 
180
                sge->length -= len;
 
181
                sge->sge_length -= len;
 
182
                if (sge->sge_length == 0) {
 
183
                        if (release)
 
184
                                atomic_dec(&sge->mr->refcount);
 
185
                        if (--ss->num_sge)
 
186
                                *sge = *ss->sg_list++;
 
187
                } else if (sge->length == 0 && sge->mr->lkey) {
 
188
                        if (++sge->n >= QIB_SEGSZ) {
 
189
                                if (++sge->m >= sge->mr->mapsz)
 
190
                                        break;
 
191
                                sge->n = 0;
 
192
                        }
 
193
                        sge->vaddr =
 
194
                                sge->mr->map[sge->m]->segs[sge->n].vaddr;
 
195
                        sge->length =
 
196
                                sge->mr->map[sge->m]->segs[sge->n].length;
 
197
                }
 
198
                data += len;
 
199
                length -= len;
 
200
        }
 
201
}
 
202
 
 
203
/**
 
204
 * qib_skip_sge - skip over SGE memory - XXX almost dup of prev func
 
205
 * @ss: the SGE state
 
206
 * @length: the number of bytes to skip
 
207
 */
 
208
void qib_skip_sge(struct qib_sge_state *ss, u32 length, int release)
 
209
{
 
210
        struct qib_sge *sge = &ss->sge;
 
211
 
 
212
        while (length) {
 
213
                u32 len = sge->length;
 
214
 
 
215
                if (len > length)
 
216
                        len = length;
 
217
                if (len > sge->sge_length)
 
218
                        len = sge->sge_length;
 
219
                BUG_ON(len == 0);
 
220
                sge->vaddr += len;
 
221
                sge->length -= len;
 
222
                sge->sge_length -= len;
 
223
                if (sge->sge_length == 0) {
 
224
                        if (release)
 
225
                                atomic_dec(&sge->mr->refcount);
 
226
                        if (--ss->num_sge)
 
227
                                *sge = *ss->sg_list++;
 
228
                } else if (sge->length == 0 && sge->mr->lkey) {
 
229
                        if (++sge->n >= QIB_SEGSZ) {
 
230
                                if (++sge->m >= sge->mr->mapsz)
 
231
                                        break;
 
232
                                sge->n = 0;
 
233
                        }
 
234
                        sge->vaddr =
 
235
                                sge->mr->map[sge->m]->segs[sge->n].vaddr;
 
236
                        sge->length =
 
237
                                sge->mr->map[sge->m]->segs[sge->n].length;
 
238
                }
 
239
                length -= len;
 
240
        }
 
241
}
 
242
 
 
243
/*
 
244
 * Count the number of DMA descriptors needed to send length bytes of data.
 
245
 * Don't modify the qib_sge_state to get the count.
 
246
 * Return zero if any of the segments is not aligned.
 
247
 */
 
248
static u32 qib_count_sge(struct qib_sge_state *ss, u32 length)
 
249
{
 
250
        struct qib_sge *sg_list = ss->sg_list;
 
251
        struct qib_sge sge = ss->sge;
 
252
        u8 num_sge = ss->num_sge;
 
253
        u32 ndesc = 1;  /* count the header */
 
254
 
 
255
        while (length) {
 
256
                u32 len = sge.length;
 
257
 
 
258
                if (len > length)
 
259
                        len = length;
 
260
                if (len > sge.sge_length)
 
261
                        len = sge.sge_length;
 
262
                BUG_ON(len == 0);
 
263
                if (((long) sge.vaddr & (sizeof(u32) - 1)) ||
 
264
                    (len != length && (len & (sizeof(u32) - 1)))) {
 
265
                        ndesc = 0;
 
266
                        break;
 
267
                }
 
268
                ndesc++;
 
269
                sge.vaddr += len;
 
270
                sge.length -= len;
 
271
                sge.sge_length -= len;
 
272
                if (sge.sge_length == 0) {
 
273
                        if (--num_sge)
 
274
                                sge = *sg_list++;
 
275
                } else if (sge.length == 0 && sge.mr->lkey) {
 
276
                        if (++sge.n >= QIB_SEGSZ) {
 
277
                                if (++sge.m >= sge.mr->mapsz)
 
278
                                        break;
 
279
                                sge.n = 0;
 
280
                        }
 
281
                        sge.vaddr =
 
282
                                sge.mr->map[sge.m]->segs[sge.n].vaddr;
 
283
                        sge.length =
 
284
                                sge.mr->map[sge.m]->segs[sge.n].length;
 
285
                }
 
286
                length -= len;
 
287
        }
 
288
        return ndesc;
 
289
}
 
290
 
 
291
/*
 
292
 * Copy from the SGEs to the data buffer.
 
293
 */
 
294
static void qib_copy_from_sge(void *data, struct qib_sge_state *ss, u32 length)
 
295
{
 
296
        struct qib_sge *sge = &ss->sge;
 
297
 
 
298
        while (length) {
 
299
                u32 len = sge->length;
 
300
 
 
301
                if (len > length)
 
302
                        len = length;
 
303
                if (len > sge->sge_length)
 
304
                        len = sge->sge_length;
 
305
                BUG_ON(len == 0);
 
306
                memcpy(data, sge->vaddr, len);
 
307
                sge->vaddr += len;
 
308
                sge->length -= len;
 
309
                sge->sge_length -= len;
 
310
                if (sge->sge_length == 0) {
 
311
                        if (--ss->num_sge)
 
312
                                *sge = *ss->sg_list++;
 
313
                } else if (sge->length == 0 && sge->mr->lkey) {
 
314
                        if (++sge->n >= QIB_SEGSZ) {
 
315
                                if (++sge->m >= sge->mr->mapsz)
 
316
                                        break;
 
317
                                sge->n = 0;
 
318
                        }
 
319
                        sge->vaddr =
 
320
                                sge->mr->map[sge->m]->segs[sge->n].vaddr;
 
321
                        sge->length =
 
322
                                sge->mr->map[sge->m]->segs[sge->n].length;
 
323
                }
 
324
                data += len;
 
325
                length -= len;
 
326
        }
 
327
}
 
328
 
 
329
/**
 
330
 * qib_post_one_send - post one RC, UC, or UD send work request
 
331
 * @qp: the QP to post on
 
332
 * @wr: the work request to send
 
333
 */
 
334
static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr)
 
335
{
 
336
        struct qib_swqe *wqe;
 
337
        u32 next;
 
338
        int i;
 
339
        int j;
 
340
        int acc;
 
341
        int ret;
 
342
        unsigned long flags;
 
343
        struct qib_lkey_table *rkt;
 
344
        struct qib_pd *pd;
 
345
 
 
346
        spin_lock_irqsave(&qp->s_lock, flags);
 
347
 
 
348
        /* Check that state is OK to post send. */
 
349
        if (unlikely(!(ib_qib_state_ops[qp->state] & QIB_POST_SEND_OK)))
 
350
                goto bail_inval;
 
351
 
 
352
        /* IB spec says that num_sge == 0 is OK. */
 
353
        if (wr->num_sge > qp->s_max_sge)
 
354
                goto bail_inval;
 
355
 
 
356
        /*
 
357
         * Don't allow RDMA reads or atomic operations on UC or
 
358
         * undefined operations.
 
359
         * Make sure buffer is large enough to hold the result for atomics.
 
360
         */
 
361
        if (wr->opcode == IB_WR_FAST_REG_MR) {
 
362
                if (qib_fast_reg_mr(qp, wr))
 
363
                        goto bail_inval;
 
364
        } else if (qp->ibqp.qp_type == IB_QPT_UC) {
 
365
                if ((unsigned) wr->opcode >= IB_WR_RDMA_READ)
 
366
                        goto bail_inval;
 
367
        } else if (qp->ibqp.qp_type != IB_QPT_RC) {
 
368
                /* Check IB_QPT_SMI, IB_QPT_GSI, IB_QPT_UD opcode */
 
369
                if (wr->opcode != IB_WR_SEND &&
 
370
                    wr->opcode != IB_WR_SEND_WITH_IMM)
 
371
                        goto bail_inval;
 
372
                /* Check UD destination address PD */
 
373
                if (qp->ibqp.pd != wr->wr.ud.ah->pd)
 
374
                        goto bail_inval;
 
375
        } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
 
376
                goto bail_inval;
 
377
        else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
 
378
                   (wr->num_sge == 0 ||
 
379
                    wr->sg_list[0].length < sizeof(u64) ||
 
380
                    wr->sg_list[0].addr & (sizeof(u64) - 1)))
 
381
                goto bail_inval;
 
382
        else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic)
 
383
                goto bail_inval;
 
384
 
 
385
        next = qp->s_head + 1;
 
386
        if (next >= qp->s_size)
 
387
                next = 0;
 
388
        if (next == qp->s_last) {
 
389
                ret = -ENOMEM;
 
390
                goto bail;
 
391
        }
 
392
 
 
393
        rkt = &to_idev(qp->ibqp.device)->lk_table;
 
394
        pd = to_ipd(qp->ibqp.pd);
 
395
        wqe = get_swqe_ptr(qp, qp->s_head);
 
396
        wqe->wr = *wr;
 
397
        wqe->length = 0;
 
398
        j = 0;
 
399
        if (wr->num_sge) {
 
400
                acc = wr->opcode >= IB_WR_RDMA_READ ?
 
401
                        IB_ACCESS_LOCAL_WRITE : 0;
 
402
                for (i = 0; i < wr->num_sge; i++) {
 
403
                        u32 length = wr->sg_list[i].length;
 
404
                        int ok;
 
405
 
 
406
                        if (length == 0)
 
407
                                continue;
 
408
                        ok = qib_lkey_ok(rkt, pd, &wqe->sg_list[j],
 
409
                                         &wr->sg_list[i], acc);
 
410
                        if (!ok)
 
411
                                goto bail_inval_free;
 
412
                        wqe->length += length;
 
413
                        j++;
 
414
                }
 
415
                wqe->wr.num_sge = j;
 
416
        }
 
417
        if (qp->ibqp.qp_type == IB_QPT_UC ||
 
418
            qp->ibqp.qp_type == IB_QPT_RC) {
 
419
                if (wqe->length > 0x80000000U)
 
420
                        goto bail_inval_free;
 
421
        } else if (wqe->length > (dd_from_ibdev(qp->ibqp.device)->pport +
 
422
                                  qp->port_num - 1)->ibmtu)
 
423
                goto bail_inval_free;
 
424
        else
 
425
                atomic_inc(&to_iah(wr->wr.ud.ah)->refcount);
 
426
        wqe->ssn = qp->s_ssn++;
 
427
        qp->s_head = next;
 
428
 
 
429
        ret = 0;
 
430
        goto bail;
 
431
 
 
432
bail_inval_free:
 
433
        while (j) {
 
434
                struct qib_sge *sge = &wqe->sg_list[--j];
 
435
 
 
436
                atomic_dec(&sge->mr->refcount);
 
437
        }
 
438
bail_inval:
 
439
        ret = -EINVAL;
 
440
bail:
 
441
        spin_unlock_irqrestore(&qp->s_lock, flags);
 
442
        return ret;
 
443
}
 
444
 
 
445
/**
 
446
 * qib_post_send - post a send on a QP
 
447
 * @ibqp: the QP to post the send on
 
448
 * @wr: the list of work requests to post
 
449
 * @bad_wr: the first bad WR is put here
 
450
 *
 
451
 * This may be called from interrupt context.
 
452
 */
 
453
static int qib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 
454
                         struct ib_send_wr **bad_wr)
 
455
{
 
456
        struct qib_qp *qp = to_iqp(ibqp);
 
457
        int err = 0;
 
458
 
 
459
        for (; wr; wr = wr->next) {
 
460
                err = qib_post_one_send(qp, wr);
 
461
                if (err) {
 
462
                        *bad_wr = wr;
 
463
                        goto bail;
 
464
                }
 
465
        }
 
466
 
 
467
        /* Try to do the send work in the caller's context. */
 
468
        qib_do_send(&qp->s_work);
 
469
 
 
470
bail:
 
471
        return err;
 
472
}
 
473
 
 
474
/**
 
475
 * qib_post_receive - post a receive on a QP
 
476
 * @ibqp: the QP to post the receive on
 
477
 * @wr: the WR to post
 
478
 * @bad_wr: the first bad WR is put here
 
479
 *
 
480
 * This may be called from interrupt context.
 
481
 */
 
482
static int qib_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 
483
                            struct ib_recv_wr **bad_wr)
 
484
{
 
485
        struct qib_qp *qp = to_iqp(ibqp);
 
486
        struct qib_rwq *wq = qp->r_rq.wq;
 
487
        unsigned long flags;
 
488
        int ret;
 
489
 
 
490
        /* Check that state is OK to post receive. */
 
491
        if (!(ib_qib_state_ops[qp->state] & QIB_POST_RECV_OK) || !wq) {
 
492
                *bad_wr = wr;
 
493
                ret = -EINVAL;
 
494
                goto bail;
 
495
        }
 
496
 
 
497
        for (; wr; wr = wr->next) {
 
498
                struct qib_rwqe *wqe;
 
499
                u32 next;
 
500
                int i;
 
501
 
 
502
                if ((unsigned) wr->num_sge > qp->r_rq.max_sge) {
 
503
                        *bad_wr = wr;
 
504
                        ret = -EINVAL;
 
505
                        goto bail;
 
506
                }
 
507
 
 
508
                spin_lock_irqsave(&qp->r_rq.lock, flags);
 
509
                next = wq->head + 1;
 
510
                if (next >= qp->r_rq.size)
 
511
                        next = 0;
 
512
                if (next == wq->tail) {
 
513
                        spin_unlock_irqrestore(&qp->r_rq.lock, flags);
 
514
                        *bad_wr = wr;
 
515
                        ret = -ENOMEM;
 
516
                        goto bail;
 
517
                }
 
518
 
 
519
                wqe = get_rwqe_ptr(&qp->r_rq, wq->head);
 
520
                wqe->wr_id = wr->wr_id;
 
521
                wqe->num_sge = wr->num_sge;
 
522
                for (i = 0; i < wr->num_sge; i++)
 
523
                        wqe->sg_list[i] = wr->sg_list[i];
 
524
                /* Make sure queue entry is written before the head index. */
 
525
                smp_wmb();
 
526
                wq->head = next;
 
527
                spin_unlock_irqrestore(&qp->r_rq.lock, flags);
 
528
        }
 
529
        ret = 0;
 
530
 
 
531
bail:
 
532
        return ret;
 
533
}
 
534
 
 
535
/**
 
536
 * qib_qp_rcv - processing an incoming packet on a QP
 
537
 * @rcd: the context pointer
 
538
 * @hdr: the packet header
 
539
 * @has_grh: true if the packet has a GRH
 
540
 * @data: the packet data
 
541
 * @tlen: the packet length
 
542
 * @qp: the QP the packet came on
 
543
 *
 
544
 * This is called from qib_ib_rcv() to process an incoming packet
 
545
 * for the given QP.
 
546
 * Called at interrupt level.
 
547
 */
 
548
static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
 
549
                       int has_grh, void *data, u32 tlen, struct qib_qp *qp)
 
550
{
 
551
        struct qib_ibport *ibp = &rcd->ppd->ibport_data;
 
552
 
 
553
        /* Check for valid receive state. */
 
554
        if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) {
 
555
                ibp->n_pkt_drops++;
 
556
                return;
 
557
        }
 
558
 
 
559
        switch (qp->ibqp.qp_type) {
 
560
        case IB_QPT_SMI:
 
561
        case IB_QPT_GSI:
 
562
                if (ib_qib_disable_sma)
 
563
                        break;
 
564
                /* FALLTHROUGH */
 
565
        case IB_QPT_UD:
 
566
                qib_ud_rcv(ibp, hdr, has_grh, data, tlen, qp);
 
567
                break;
 
568
 
 
569
        case IB_QPT_RC:
 
570
                qib_rc_rcv(rcd, hdr, has_grh, data, tlen, qp);
 
571
                break;
 
572
 
 
573
        case IB_QPT_UC:
 
574
                qib_uc_rcv(ibp, hdr, has_grh, data, tlen, qp);
 
575
                break;
 
576
 
 
577
        default:
 
578
                break;
 
579
        }
 
580
}
 
581
 
 
582
/**
 
583
 * qib_ib_rcv - process an incoming packet
 
584
 * @rcd: the context pointer
 
585
 * @rhdr: the header of the packet
 
586
 * @data: the packet payload
 
587
 * @tlen: the packet length
 
588
 *
 
589
 * This is called from qib_kreceive() to process an incoming packet at
 
590
 * interrupt level. Tlen is the length of the header + data + CRC in bytes.
 
591
 */
 
592
void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
 
593
{
 
594
        struct qib_pportdata *ppd = rcd->ppd;
 
595
        struct qib_ibport *ibp = &ppd->ibport_data;
 
596
        struct qib_ib_header *hdr = rhdr;
 
597
        struct qib_other_headers *ohdr;
 
598
        struct qib_qp *qp;
 
599
        u32 qp_num;
 
600
        int lnh;
 
601
        u8 opcode;
 
602
        u16 lid;
 
603
 
 
604
        /* 24 == LRH+BTH+CRC */
 
605
        if (unlikely(tlen < 24))
 
606
                goto drop;
 
607
 
 
608
        /* Check for a valid destination LID (see ch. 7.11.1). */
 
609
        lid = be16_to_cpu(hdr->lrh[1]);
 
610
        if (lid < QIB_MULTICAST_LID_BASE) {
 
611
                lid &= ~((1 << ppd->lmc) - 1);
 
612
                if (unlikely(lid != ppd->lid))
 
613
                        goto drop;
 
614
        }
 
615
 
 
616
        /* Check for GRH */
 
617
        lnh = be16_to_cpu(hdr->lrh[0]) & 3;
 
618
        if (lnh == QIB_LRH_BTH)
 
619
                ohdr = &hdr->u.oth;
 
620
        else if (lnh == QIB_LRH_GRH) {
 
621
                u32 vtf;
 
622
 
 
623
                ohdr = &hdr->u.l.oth;
 
624
                if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
 
625
                        goto drop;
 
626
                vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow);
 
627
                if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
 
628
                        goto drop;
 
629
        } else
 
630
                goto drop;
 
631
 
 
632
        opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
 
633
        ibp->opstats[opcode & 0x7f].n_bytes += tlen;
 
634
        ibp->opstats[opcode & 0x7f].n_packets++;
 
635
 
 
636
        /* Get the destination QP number. */
 
637
        qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK;
 
638
        if (qp_num == QIB_MULTICAST_QPN) {
 
639
                struct qib_mcast *mcast;
 
640
                struct qib_mcast_qp *p;
 
641
 
 
642
                if (lnh != QIB_LRH_GRH)
 
643
                        goto drop;
 
644
                mcast = qib_mcast_find(ibp, &hdr->u.l.grh.dgid);
 
645
                if (mcast == NULL)
 
646
                        goto drop;
 
647
                ibp->n_multicast_rcv++;
 
648
                list_for_each_entry_rcu(p, &mcast->qp_list, list)
 
649
                        qib_qp_rcv(rcd, hdr, 1, data, tlen, p->qp);
 
650
                /*
 
651
                 * Notify qib_multicast_detach() if it is waiting for us
 
652
                 * to finish.
 
653
                 */
 
654
                if (atomic_dec_return(&mcast->refcount) <= 1)
 
655
                        wake_up(&mcast->wait);
 
656
        } else {
 
657
                qp = qib_lookup_qpn(ibp, qp_num);
 
658
                if (!qp)
 
659
                        goto drop;
 
660
                ibp->n_unicast_rcv++;
 
661
                qib_qp_rcv(rcd, hdr, lnh == QIB_LRH_GRH, data, tlen, qp);
 
662
                /*
 
663
                 * Notify qib_destroy_qp() if it is waiting
 
664
                 * for us to finish.
 
665
                 */
 
666
                if (atomic_dec_and_test(&qp->refcount))
 
667
                        wake_up(&qp->wait);
 
668
        }
 
669
        return;
 
670
 
 
671
drop:
 
672
        ibp->n_pkt_drops++;
 
673
}
 
674
 
 
675
/*
 
676
 * This is called from a timer to check for QPs
 
677
 * which need kernel memory in order to send a packet.
 
678
 */
 
679
static void mem_timer(unsigned long data)
 
680
{
 
681
        struct qib_ibdev *dev = (struct qib_ibdev *) data;
 
682
        struct list_head *list = &dev->memwait;
 
683
        struct qib_qp *qp = NULL;
 
684
        unsigned long flags;
 
685
 
 
686
        spin_lock_irqsave(&dev->pending_lock, flags);
 
687
        if (!list_empty(list)) {
 
688
                qp = list_entry(list->next, struct qib_qp, iowait);
 
689
                list_del_init(&qp->iowait);
 
690
                atomic_inc(&qp->refcount);
 
691
                if (!list_empty(list))
 
692
                        mod_timer(&dev->mem_timer, jiffies + 1);
 
693
        }
 
694
        spin_unlock_irqrestore(&dev->pending_lock, flags);
 
695
 
 
696
        if (qp) {
 
697
                spin_lock_irqsave(&qp->s_lock, flags);
 
698
                if (qp->s_flags & QIB_S_WAIT_KMEM) {
 
699
                        qp->s_flags &= ~QIB_S_WAIT_KMEM;
 
700
                        qib_schedule_send(qp);
 
701
                }
 
702
                spin_unlock_irqrestore(&qp->s_lock, flags);
 
703
                if (atomic_dec_and_test(&qp->refcount))
 
704
                        wake_up(&qp->wait);
 
705
        }
 
706
}
 
707
 
 
708
static void update_sge(struct qib_sge_state *ss, u32 length)
 
709
{
 
710
        struct qib_sge *sge = &ss->sge;
 
711
 
 
712
        sge->vaddr += length;
 
713
        sge->length -= length;
 
714
        sge->sge_length -= length;
 
715
        if (sge->sge_length == 0) {
 
716
                if (--ss->num_sge)
 
717
                        *sge = *ss->sg_list++;
 
718
        } else if (sge->length == 0 && sge->mr->lkey) {
 
719
                if (++sge->n >= QIB_SEGSZ) {
 
720
                        if (++sge->m >= sge->mr->mapsz)
 
721
                                return;
 
722
                        sge->n = 0;
 
723
                }
 
724
                sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
 
725
                sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
 
726
        }
 
727
}
 
728
 
 
729
#ifdef __LITTLE_ENDIAN
 
730
static inline u32 get_upper_bits(u32 data, u32 shift)
 
731
{
 
732
        return data >> shift;
 
733
}
 
734
 
 
735
static inline u32 set_upper_bits(u32 data, u32 shift)
 
736
{
 
737
        return data << shift;
 
738
}
 
739
 
 
740
static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
 
741
{
 
742
        data <<= ((sizeof(u32) - n) * BITS_PER_BYTE);
 
743
        data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
 
744
        return data;
 
745
}
 
746
#else
 
747
static inline u32 get_upper_bits(u32 data, u32 shift)
 
748
{
 
749
        return data << shift;
 
750
}
 
751
 
 
752
static inline u32 set_upper_bits(u32 data, u32 shift)
 
753
{
 
754
        return data >> shift;
 
755
}
 
756
 
 
757
static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
 
758
{
 
759
        data >>= ((sizeof(u32) - n) * BITS_PER_BYTE);
 
760
        data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
 
761
        return data;
 
762
}
 
763
#endif
 
764
 
 
765
static void copy_io(u32 __iomem *piobuf, struct qib_sge_state *ss,
 
766
                    u32 length, unsigned flush_wc)
 
767
{
 
768
        u32 extra = 0;
 
769
        u32 data = 0;
 
770
        u32 last;
 
771
 
 
772
        while (1) {
 
773
                u32 len = ss->sge.length;
 
774
                u32 off;
 
775
 
 
776
                if (len > length)
 
777
                        len = length;
 
778
                if (len > ss->sge.sge_length)
 
779
                        len = ss->sge.sge_length;
 
780
                BUG_ON(len == 0);
 
781
                /* If the source address is not aligned, try to align it. */
 
782
                off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
 
783
                if (off) {
 
784
                        u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr &
 
785
                                            ~(sizeof(u32) - 1));
 
786
                        u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE);
 
787
                        u32 y;
 
788
 
 
789
                        y = sizeof(u32) - off;
 
790
                        if (len > y)
 
791
                                len = y;
 
792
                        if (len + extra >= sizeof(u32)) {
 
793
                                data |= set_upper_bits(v, extra *
 
794
                                                       BITS_PER_BYTE);
 
795
                                len = sizeof(u32) - extra;
 
796
                                if (len == length) {
 
797
                                        last = data;
 
798
                                        break;
 
799
                                }
 
800
                                __raw_writel(data, piobuf);
 
801
                                piobuf++;
 
802
                                extra = 0;
 
803
                                data = 0;
 
804
                        } else {
 
805
                                /* Clear unused upper bytes */
 
806
                                data |= clear_upper_bytes(v, len, extra);
 
807
                                if (len == length) {
 
808
                                        last = data;
 
809
                                        break;
 
810
                                }
 
811
                                extra += len;
 
812
                        }
 
813
                } else if (extra) {
 
814
                        /* Source address is aligned. */
 
815
                        u32 *addr = (u32 *) ss->sge.vaddr;
 
816
                        int shift = extra * BITS_PER_BYTE;
 
817
                        int ushift = 32 - shift;
 
818
                        u32 l = len;
 
819
 
 
820
                        while (l >= sizeof(u32)) {
 
821
                                u32 v = *addr;
 
822
 
 
823
                                data |= set_upper_bits(v, shift);
 
824
                                __raw_writel(data, piobuf);
 
825
                                data = get_upper_bits(v, ushift);
 
826
                                piobuf++;
 
827
                                addr++;
 
828
                                l -= sizeof(u32);
 
829
                        }
 
830
                        /*
 
831
                         * We still have 'extra' number of bytes leftover.
 
832
                         */
 
833
                        if (l) {
 
834
                                u32 v = *addr;
 
835
 
 
836
                                if (l + extra >= sizeof(u32)) {
 
837
                                        data |= set_upper_bits(v, shift);
 
838
                                        len -= l + extra - sizeof(u32);
 
839
                                        if (len == length) {
 
840
                                                last = data;
 
841
                                                break;
 
842
                                        }
 
843
                                        __raw_writel(data, piobuf);
 
844
                                        piobuf++;
 
845
                                        extra = 0;
 
846
                                        data = 0;
 
847
                                } else {
 
848
                                        /* Clear unused upper bytes */
 
849
                                        data |= clear_upper_bytes(v, l, extra);
 
850
                                        if (len == length) {
 
851
                                                last = data;
 
852
                                                break;
 
853
                                        }
 
854
                                        extra += l;
 
855
                                }
 
856
                        } else if (len == length) {
 
857
                                last = data;
 
858
                                break;
 
859
                        }
 
860
                } else if (len == length) {
 
861
                        u32 w;
 
862
 
 
863
                        /*
 
864
                         * Need to round up for the last dword in the
 
865
                         * packet.
 
866
                         */
 
867
                        w = (len + 3) >> 2;
 
868
                        qib_pio_copy(piobuf, ss->sge.vaddr, w - 1);
 
869
                        piobuf += w - 1;
 
870
                        last = ((u32 *) ss->sge.vaddr)[w - 1];
 
871
                        break;
 
872
                } else {
 
873
                        u32 w = len >> 2;
 
874
 
 
875
                        qib_pio_copy(piobuf, ss->sge.vaddr, w);
 
876
                        piobuf += w;
 
877
 
 
878
                        extra = len & (sizeof(u32) - 1);
 
879
                        if (extra) {
 
880
                                u32 v = ((u32 *) ss->sge.vaddr)[w];
 
881
 
 
882
                                /* Clear unused upper bytes */
 
883
                                data = clear_upper_bytes(v, extra, 0);
 
884
                        }
 
885
                }
 
886
                update_sge(ss, len);
 
887
                length -= len;
 
888
        }
 
889
        /* Update address before sending packet. */
 
890
        update_sge(ss, length);
 
891
        if (flush_wc) {
 
892
                /* must flush early everything before trigger word */
 
893
                qib_flush_wc();
 
894
                __raw_writel(last, piobuf);
 
895
                /* be sure trigger word is written */
 
896
                qib_flush_wc();
 
897
        } else
 
898
                __raw_writel(last, piobuf);
 
899
}
 
900
 
 
901
static struct qib_verbs_txreq *get_txreq(struct qib_ibdev *dev,
 
902
                                         struct qib_qp *qp, int *retp)
 
903
{
 
904
        struct qib_verbs_txreq *tx;
 
905
        unsigned long flags;
 
906
 
 
907
        spin_lock_irqsave(&qp->s_lock, flags);
 
908
        spin_lock(&dev->pending_lock);
 
909
 
 
910
        if (!list_empty(&dev->txreq_free)) {
 
911
                struct list_head *l = dev->txreq_free.next;
 
912
 
 
913
                list_del(l);
 
914
                tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
 
915
                *retp = 0;
 
916
        } else {
 
917
                if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK &&
 
918
                    list_empty(&qp->iowait)) {
 
919
                        dev->n_txwait++;
 
920
                        qp->s_flags |= QIB_S_WAIT_TX;
 
921
                        list_add_tail(&qp->iowait, &dev->txwait);
 
922
                }
 
923
                tx = NULL;
 
924
                qp->s_flags &= ~QIB_S_BUSY;
 
925
                *retp = -EBUSY;
 
926
        }
 
927
 
 
928
        spin_unlock(&dev->pending_lock);
 
929
        spin_unlock_irqrestore(&qp->s_lock, flags);
 
930
 
 
931
        return tx;
 
932
}
 
933
 
 
934
void qib_put_txreq(struct qib_verbs_txreq *tx)
 
935
{
 
936
        struct qib_ibdev *dev;
 
937
        struct qib_qp *qp;
 
938
        unsigned long flags;
 
939
 
 
940
        qp = tx->qp;
 
941
        dev = to_idev(qp->ibqp.device);
 
942
 
 
943
        if (atomic_dec_and_test(&qp->refcount))
 
944
                wake_up(&qp->wait);
 
945
        if (tx->mr) {
 
946
                atomic_dec(&tx->mr->refcount);
 
947
                tx->mr = NULL;
 
948
        }
 
949
        if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF) {
 
950
                tx->txreq.flags &= ~QIB_SDMA_TXREQ_F_FREEBUF;
 
951
                dma_unmap_single(&dd_from_dev(dev)->pcidev->dev,
 
952
                                 tx->txreq.addr, tx->hdr_dwords << 2,
 
953
                                 DMA_TO_DEVICE);
 
954
                kfree(tx->align_buf);
 
955
        }
 
956
 
 
957
        spin_lock_irqsave(&dev->pending_lock, flags);
 
958
 
 
959
        /* Put struct back on free list */
 
960
        list_add(&tx->txreq.list, &dev->txreq_free);
 
961
 
 
962
        if (!list_empty(&dev->txwait)) {
 
963
                /* Wake up first QP wanting a free struct */
 
964
                qp = list_entry(dev->txwait.next, struct qib_qp, iowait);
 
965
                list_del_init(&qp->iowait);
 
966
                atomic_inc(&qp->refcount);
 
967
                spin_unlock_irqrestore(&dev->pending_lock, flags);
 
968
 
 
969
                spin_lock_irqsave(&qp->s_lock, flags);
 
970
                if (qp->s_flags & QIB_S_WAIT_TX) {
 
971
                        qp->s_flags &= ~QIB_S_WAIT_TX;
 
972
                        qib_schedule_send(qp);
 
973
                }
 
974
                spin_unlock_irqrestore(&qp->s_lock, flags);
 
975
 
 
976
                if (atomic_dec_and_test(&qp->refcount))
 
977
                        wake_up(&qp->wait);
 
978
        } else
 
979
                spin_unlock_irqrestore(&dev->pending_lock, flags);
 
980
}
 
981
 
 
982
/*
 
983
 * This is called when there are send DMA descriptors that might be
 
984
 * available.
 
985
 *
 
986
 * This is called with ppd->sdma_lock held.
 
987
 */
 
988
void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail)
 
989
{
 
990
        struct qib_qp *qp, *nqp;
 
991
        struct qib_qp *qps[20];
 
992
        struct qib_ibdev *dev;
 
993
        unsigned i, n;
 
994
 
 
995
        n = 0;
 
996
        dev = &ppd->dd->verbs_dev;
 
997
        spin_lock(&dev->pending_lock);
 
998
 
 
999
        /* Search wait list for first QP wanting DMA descriptors. */
 
1000
        list_for_each_entry_safe(qp, nqp, &dev->dmawait, iowait) {
 
1001
                if (qp->port_num != ppd->port)
 
1002
                        continue;
 
1003
                if (n == ARRAY_SIZE(qps))
 
1004
                        break;
 
1005
                if (qp->s_tx->txreq.sg_count > avail)
 
1006
                        break;
 
1007
                avail -= qp->s_tx->txreq.sg_count;
 
1008
                list_del_init(&qp->iowait);
 
1009
                atomic_inc(&qp->refcount);
 
1010
                qps[n++] = qp;
 
1011
        }
 
1012
 
 
1013
        spin_unlock(&dev->pending_lock);
 
1014
 
 
1015
        for (i = 0; i < n; i++) {
 
1016
                qp = qps[i];
 
1017
                spin_lock(&qp->s_lock);
 
1018
                if (qp->s_flags & QIB_S_WAIT_DMA_DESC) {
 
1019
                        qp->s_flags &= ~QIB_S_WAIT_DMA_DESC;
 
1020
                        qib_schedule_send(qp);
 
1021
                }
 
1022
                spin_unlock(&qp->s_lock);
 
1023
                if (atomic_dec_and_test(&qp->refcount))
 
1024
                        wake_up(&qp->wait);
 
1025
        }
 
1026
}
 
1027
 
 
1028
/*
 
1029
 * This is called with ppd->sdma_lock held.
 
1030
 */
 
1031
static void sdma_complete(struct qib_sdma_txreq *cookie, int status)
 
1032
{
 
1033
        struct qib_verbs_txreq *tx =
 
1034
                container_of(cookie, struct qib_verbs_txreq, txreq);
 
1035
        struct qib_qp *qp = tx->qp;
 
1036
 
 
1037
        spin_lock(&qp->s_lock);
 
1038
        if (tx->wqe)
 
1039
                qib_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
 
1040
        else if (qp->ibqp.qp_type == IB_QPT_RC) {
 
1041
                struct qib_ib_header *hdr;
 
1042
 
 
1043
                if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF)
 
1044
                        hdr = &tx->align_buf->hdr;
 
1045
                else {
 
1046
                        struct qib_ibdev *dev = to_idev(qp->ibqp.device);
 
1047
 
 
1048
                        hdr = &dev->pio_hdrs[tx->hdr_inx].hdr;
 
1049
                }
 
1050
                qib_rc_send_complete(qp, hdr);
 
1051
        }
 
1052
        if (atomic_dec_and_test(&qp->s_dma_busy)) {
 
1053
                if (qp->state == IB_QPS_RESET)
 
1054
                        wake_up(&qp->wait_dma);
 
1055
                else if (qp->s_flags & QIB_S_WAIT_DMA) {
 
1056
                        qp->s_flags &= ~QIB_S_WAIT_DMA;
 
1057
                        qib_schedule_send(qp);
 
1058
                }
 
1059
        }
 
1060
        spin_unlock(&qp->s_lock);
 
1061
 
 
1062
        qib_put_txreq(tx);
 
1063
}
 
1064
 
 
1065
static int wait_kmem(struct qib_ibdev *dev, struct qib_qp *qp)
 
1066
{
 
1067
        unsigned long flags;
 
1068
        int ret = 0;
 
1069
 
 
1070
        spin_lock_irqsave(&qp->s_lock, flags);
 
1071
        if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) {
 
1072
                spin_lock(&dev->pending_lock);
 
1073
                if (list_empty(&qp->iowait)) {
 
1074
                        if (list_empty(&dev->memwait))
 
1075
                                mod_timer(&dev->mem_timer, jiffies + 1);
 
1076
                        qp->s_flags |= QIB_S_WAIT_KMEM;
 
1077
                        list_add_tail(&qp->iowait, &dev->memwait);
 
1078
                }
 
1079
                spin_unlock(&dev->pending_lock);
 
1080
                qp->s_flags &= ~QIB_S_BUSY;
 
1081
                ret = -EBUSY;
 
1082
        }
 
1083
        spin_unlock_irqrestore(&qp->s_lock, flags);
 
1084
 
 
1085
        return ret;
 
1086
}
 
1087
 
 
1088
static int qib_verbs_send_dma(struct qib_qp *qp, struct qib_ib_header *hdr,
 
1089
                              u32 hdrwords, struct qib_sge_state *ss, u32 len,
 
1090
                              u32 plen, u32 dwords)
 
1091
{
 
1092
        struct qib_ibdev *dev = to_idev(qp->ibqp.device);
 
1093
        struct qib_devdata *dd = dd_from_dev(dev);
 
1094
        struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
 
1095
        struct qib_pportdata *ppd = ppd_from_ibp(ibp);
 
1096
        struct qib_verbs_txreq *tx;
 
1097
        struct qib_pio_header *phdr;
 
1098
        u32 control;
 
1099
        u32 ndesc;
 
1100
        int ret;
 
1101
 
 
1102
        tx = qp->s_tx;
 
1103
        if (tx) {
 
1104
                qp->s_tx = NULL;
 
1105
                /* resend previously constructed packet */
 
1106
                ret = qib_sdma_verbs_send(ppd, tx->ss, tx->dwords, tx);
 
1107
                goto bail;
 
1108
        }
 
1109
 
 
1110
        tx = get_txreq(dev, qp, &ret);
 
1111
        if (!tx)
 
1112
                goto bail;
 
1113
 
 
1114
        control = dd->f_setpbc_control(ppd, plen, qp->s_srate,
 
1115
                                       be16_to_cpu(hdr->lrh[0]) >> 12);
 
1116
        tx->qp = qp;
 
1117
        atomic_inc(&qp->refcount);
 
1118
        tx->wqe = qp->s_wqe;
 
1119
        tx->mr = qp->s_rdma_mr;
 
1120
        if (qp->s_rdma_mr)
 
1121
                qp->s_rdma_mr = NULL;
 
1122
        tx->txreq.callback = sdma_complete;
 
1123
        if (dd->flags & QIB_HAS_SDMA_TIMEOUT)
 
1124
                tx->txreq.flags = QIB_SDMA_TXREQ_F_HEADTOHOST;
 
1125
        else
 
1126
                tx->txreq.flags = QIB_SDMA_TXREQ_F_INTREQ;
 
1127
        if (plen + 1 > dd->piosize2kmax_dwords)
 
1128
                tx->txreq.flags |= QIB_SDMA_TXREQ_F_USELARGEBUF;
 
1129
 
 
1130
        if (len) {
 
1131
                /*
 
1132
                 * Don't try to DMA if it takes more descriptors than
 
1133
                 * the queue holds.
 
1134
                 */
 
1135
                ndesc = qib_count_sge(ss, len);
 
1136
                if (ndesc >= ppd->sdma_descq_cnt)
 
1137
                        ndesc = 0;
 
1138
        } else
 
1139
                ndesc = 1;
 
1140
        if (ndesc) {
 
1141
                phdr = &dev->pio_hdrs[tx->hdr_inx];
 
1142
                phdr->pbc[0] = cpu_to_le32(plen);
 
1143
                phdr->pbc[1] = cpu_to_le32(control);
 
1144
                memcpy(&phdr->hdr, hdr, hdrwords << 2);
 
1145
                tx->txreq.flags |= QIB_SDMA_TXREQ_F_FREEDESC;
 
1146
                tx->txreq.sg_count = ndesc;
 
1147
                tx->txreq.addr = dev->pio_hdrs_phys +
 
1148
                        tx->hdr_inx * sizeof(struct qib_pio_header);
 
1149
                tx->hdr_dwords = hdrwords + 2; /* add PBC length */
 
1150
                ret = qib_sdma_verbs_send(ppd, ss, dwords, tx);
 
1151
                goto bail;
 
1152
        }
 
1153
 
 
1154
        /* Allocate a buffer and copy the header and payload to it. */
 
1155
        tx->hdr_dwords = plen + 1;
 
1156
        phdr = kmalloc(tx->hdr_dwords << 2, GFP_ATOMIC);
 
1157
        if (!phdr)
 
1158
                goto err_tx;
 
1159
        phdr->pbc[0] = cpu_to_le32(plen);
 
1160
        phdr->pbc[1] = cpu_to_le32(control);
 
1161
        memcpy(&phdr->hdr, hdr, hdrwords << 2);
 
1162
        qib_copy_from_sge((u32 *) &phdr->hdr + hdrwords, ss, len);
 
1163
 
 
1164
        tx->txreq.addr = dma_map_single(&dd->pcidev->dev, phdr,
 
1165
                                        tx->hdr_dwords << 2, DMA_TO_DEVICE);
 
1166
        if (dma_mapping_error(&dd->pcidev->dev, tx->txreq.addr))
 
1167
                goto map_err;
 
1168
        tx->align_buf = phdr;
 
1169
        tx->txreq.flags |= QIB_SDMA_TXREQ_F_FREEBUF;
 
1170
        tx->txreq.sg_count = 1;
 
1171
        ret = qib_sdma_verbs_send(ppd, NULL, 0, tx);
 
1172
        goto unaligned;
 
1173
 
 
1174
map_err:
 
1175
        kfree(phdr);
 
1176
err_tx:
 
1177
        qib_put_txreq(tx);
 
1178
        ret = wait_kmem(dev, qp);
 
1179
unaligned:
 
1180
        ibp->n_unaligned++;
 
1181
bail:
 
1182
        return ret;
 
1183
}
 
1184
 
 
1185
/*
 
1186
 * If we are now in the error state, return zero to flush the
 
1187
 * send work request.
 
1188
 */
 
1189
static int no_bufs_available(struct qib_qp *qp)
 
1190
{
 
1191
        struct qib_ibdev *dev = to_idev(qp->ibqp.device);
 
1192
        struct qib_devdata *dd;
 
1193
        unsigned long flags;
 
1194
        int ret = 0;
 
1195
 
 
1196
        /*
 
1197
         * Note that as soon as want_buffer() is called and
 
1198
         * possibly before it returns, qib_ib_piobufavail()
 
1199
         * could be called. Therefore, put QP on the I/O wait list before
 
1200
         * enabling the PIO avail interrupt.
 
1201
         */
 
1202
        spin_lock_irqsave(&qp->s_lock, flags);
 
1203
        if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) {
 
1204
                spin_lock(&dev->pending_lock);
 
1205
                if (list_empty(&qp->iowait)) {
 
1206
                        dev->n_piowait++;
 
1207
                        qp->s_flags |= QIB_S_WAIT_PIO;
 
1208
                        list_add_tail(&qp->iowait, &dev->piowait);
 
1209
                        dd = dd_from_dev(dev);
 
1210
                        dd->f_wantpiobuf_intr(dd, 1);
 
1211
                }
 
1212
                spin_unlock(&dev->pending_lock);
 
1213
                qp->s_flags &= ~QIB_S_BUSY;
 
1214
                ret = -EBUSY;
 
1215
        }
 
1216
        spin_unlock_irqrestore(&qp->s_lock, flags);
 
1217
        return ret;
 
1218
}
 
1219
 
 
1220
static int qib_verbs_send_pio(struct qib_qp *qp, struct qib_ib_header *ibhdr,
 
1221
                              u32 hdrwords, struct qib_sge_state *ss, u32 len,
 
1222
                              u32 plen, u32 dwords)
 
1223
{
 
1224
        struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
 
1225
        struct qib_pportdata *ppd = dd->pport + qp->port_num - 1;
 
1226
        u32 *hdr = (u32 *) ibhdr;
 
1227
        u32 __iomem *piobuf_orig;
 
1228
        u32 __iomem *piobuf;
 
1229
        u64 pbc;
 
1230
        unsigned long flags;
 
1231
        unsigned flush_wc;
 
1232
        u32 control;
 
1233
        u32 pbufn;
 
1234
 
 
1235
        control = dd->f_setpbc_control(ppd, plen, qp->s_srate,
 
1236
                be16_to_cpu(ibhdr->lrh[0]) >> 12);
 
1237
        pbc = ((u64) control << 32) | plen;
 
1238
        piobuf = dd->f_getsendbuf(ppd, pbc, &pbufn);
 
1239
        if (unlikely(piobuf == NULL))
 
1240
                return no_bufs_available(qp);
 
1241
 
 
1242
        /*
 
1243
         * Write the pbc.
 
1244
         * We have to flush after the PBC for correctness on some cpus
 
1245
         * or WC buffer can be written out of order.
 
1246
         */
 
1247
        writeq(pbc, piobuf);
 
1248
        piobuf_orig = piobuf;
 
1249
        piobuf += 2;
 
1250
 
 
1251
        flush_wc = dd->flags & QIB_PIO_FLUSH_WC;
 
1252
        if (len == 0) {
 
1253
                /*
 
1254
                 * If there is just the header portion, must flush before
 
1255
                 * writing last word of header for correctness, and after
 
1256
                 * the last header word (trigger word).
 
1257
                 */
 
1258
                if (flush_wc) {
 
1259
                        qib_flush_wc();
 
1260
                        qib_pio_copy(piobuf, hdr, hdrwords - 1);
 
1261
                        qib_flush_wc();
 
1262
                        __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
 
1263
                        qib_flush_wc();
 
1264
                } else
 
1265
                        qib_pio_copy(piobuf, hdr, hdrwords);
 
1266
                goto done;
 
1267
        }
 
1268
 
 
1269
        if (flush_wc)
 
1270
                qib_flush_wc();
 
1271
        qib_pio_copy(piobuf, hdr, hdrwords);
 
1272
        piobuf += hdrwords;
 
1273
 
 
1274
        /* The common case is aligned and contained in one segment. */
 
1275
        if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
 
1276
                   !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
 
1277
                u32 *addr = (u32 *) ss->sge.vaddr;
 
1278
 
 
1279
                /* Update address before sending packet. */
 
1280
                update_sge(ss, len);
 
1281
                if (flush_wc) {
 
1282
                        qib_pio_copy(piobuf, addr, dwords - 1);
 
1283
                        /* must flush early everything before trigger word */
 
1284
                        qib_flush_wc();
 
1285
                        __raw_writel(addr[dwords - 1], piobuf + dwords - 1);
 
1286
                        /* be sure trigger word is written */
 
1287
                        qib_flush_wc();
 
1288
                } else
 
1289
                        qib_pio_copy(piobuf, addr, dwords);
 
1290
                goto done;
 
1291
        }
 
1292
        copy_io(piobuf, ss, len, flush_wc);
 
1293
done:
 
1294
        if (dd->flags & QIB_USE_SPCL_TRIG) {
 
1295
                u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023;
 
1296
                qib_flush_wc();
 
1297
                __raw_writel(0xaebecede, piobuf_orig + spcl_off);
 
1298
        }
 
1299
        qib_sendbuf_done(dd, pbufn);
 
1300
        if (qp->s_rdma_mr) {
 
1301
                atomic_dec(&qp->s_rdma_mr->refcount);
 
1302
                qp->s_rdma_mr = NULL;
 
1303
        }
 
1304
        if (qp->s_wqe) {
 
1305
                spin_lock_irqsave(&qp->s_lock, flags);
 
1306
                qib_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
 
1307
                spin_unlock_irqrestore(&qp->s_lock, flags);
 
1308
        } else if (qp->ibqp.qp_type == IB_QPT_RC) {
 
1309
                spin_lock_irqsave(&qp->s_lock, flags);
 
1310
                qib_rc_send_complete(qp, ibhdr);
 
1311
                spin_unlock_irqrestore(&qp->s_lock, flags);
 
1312
        }
 
1313
        return 0;
 
1314
}
 
1315
 
 
1316
/**
 
1317
 * qib_verbs_send - send a packet
 
1318
 * @qp: the QP to send on
 
1319
 * @hdr: the packet header
 
1320
 * @hdrwords: the number of 32-bit words in the header
 
1321
 * @ss: the SGE to send
 
1322
 * @len: the length of the packet in bytes
 
1323
 *
 
1324
 * Return zero if packet is sent or queued OK.
 
1325
 * Return non-zero and clear qp->s_flags QIB_S_BUSY otherwise.
 
1326
 */
 
1327
int qib_verbs_send(struct qib_qp *qp, struct qib_ib_header *hdr,
 
1328
                   u32 hdrwords, struct qib_sge_state *ss, u32 len)
 
1329
{
 
1330
        struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
 
1331
        u32 plen;
 
1332
        int ret;
 
1333
        u32 dwords = (len + 3) >> 2;
 
1334
 
 
1335
        /*
 
1336
         * Calculate the send buffer trigger address.
 
1337
         * The +1 counts for the pbc control dword following the pbc length.
 
1338
         */
 
1339
        plen = hdrwords + dwords + 1;
 
1340
 
 
1341
        /*
 
1342
         * VL15 packets (IB_QPT_SMI) will always use PIO, so we
 
1343
         * can defer SDMA restart until link goes ACTIVE without
 
1344
         * worrying about just how we got there.
 
1345
         */
 
1346
        if (qp->ibqp.qp_type == IB_QPT_SMI ||
 
1347
            !(dd->flags & QIB_HAS_SEND_DMA))
 
1348
                ret = qib_verbs_send_pio(qp, hdr, hdrwords, ss, len,
 
1349
                                         plen, dwords);
 
1350
        else
 
1351
                ret = qib_verbs_send_dma(qp, hdr, hdrwords, ss, len,
 
1352
                                         plen, dwords);
 
1353
 
 
1354
        return ret;
 
1355
}
 
1356
 
 
1357
int qib_snapshot_counters(struct qib_pportdata *ppd, u64 *swords,
 
1358
                          u64 *rwords, u64 *spkts, u64 *rpkts,
 
1359
                          u64 *xmit_wait)
 
1360
{
 
1361
        int ret;
 
1362
        struct qib_devdata *dd = ppd->dd;
 
1363
 
 
1364
        if (!(dd->flags & QIB_PRESENT)) {
 
1365
                /* no hardware, freeze, etc. */
 
1366
                ret = -EINVAL;
 
1367
                goto bail;
 
1368
        }
 
1369
        *swords = dd->f_portcntr(ppd, QIBPORTCNTR_WORDSEND);
 
1370
        *rwords = dd->f_portcntr(ppd, QIBPORTCNTR_WORDRCV);
 
1371
        *spkts = dd->f_portcntr(ppd, QIBPORTCNTR_PKTSEND);
 
1372
        *rpkts = dd->f_portcntr(ppd, QIBPORTCNTR_PKTRCV);
 
1373
        *xmit_wait = dd->f_portcntr(ppd, QIBPORTCNTR_SENDSTALL);
 
1374
 
 
1375
        ret = 0;
 
1376
 
 
1377
bail:
 
1378
        return ret;
 
1379
}
 
1380
 
 
1381
/**
 
1382
 * qib_get_counters - get various chip counters
 
1383
 * @dd: the qlogic_ib device
 
1384
 * @cntrs: counters are placed here
 
1385
 *
 
1386
 * Return the counters needed by recv_pma_get_portcounters().
 
1387
 */
 
1388
int qib_get_counters(struct qib_pportdata *ppd,
 
1389
                     struct qib_verbs_counters *cntrs)
 
1390
{
 
1391
        int ret;
 
1392
 
 
1393
        if (!(ppd->dd->flags & QIB_PRESENT)) {
 
1394
                /* no hardware, freeze, etc. */
 
1395
                ret = -EINVAL;
 
1396
                goto bail;
 
1397
        }
 
1398
        cntrs->symbol_error_counter =
 
1399
                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_IBSYMBOLERR);
 
1400
        cntrs->link_error_recovery_counter =
 
1401
                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_IBLINKERRRECOV);
 
1402
        /*
 
1403
         * The link downed counter counts when the other side downs the
 
1404
         * connection.  We add in the number of times we downed the link
 
1405
         * due to local link integrity errors to compensate.
 
1406
         */
 
1407
        cntrs->link_downed_counter =
 
1408
                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_IBLINKDOWN);
 
1409
        cntrs->port_rcv_errors =
 
1410
                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RXDROPPKT) +
 
1411
                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RCVOVFL) +
 
1412
                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERR_RLEN) +
 
1413
                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_INVALIDRLEN) +
 
1414
                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRLINK) +
 
1415
                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRICRC) +
 
1416
                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRVCRC) +
 
1417
                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRLPCRC) +
 
1418
                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_BADFORMAT);
 
1419
        cntrs->port_rcv_errors +=
 
1420
                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RXLOCALPHYERR);
 
1421
        cntrs->port_rcv_errors +=
 
1422
                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RXVLERR);
 
1423
        cntrs->port_rcv_remphys_errors =
 
1424
                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RCVEBP);
 
1425
        cntrs->port_xmit_discards =
 
1426
                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_UNSUPVL);
 
1427
        cntrs->port_xmit_data = ppd->dd->f_portcntr(ppd,
 
1428
                        QIBPORTCNTR_WORDSEND);
 
1429
        cntrs->port_rcv_data = ppd->dd->f_portcntr(ppd,
 
1430
                        QIBPORTCNTR_WORDRCV);
 
1431
        cntrs->port_xmit_packets = ppd->dd->f_portcntr(ppd,
 
1432
                        QIBPORTCNTR_PKTSEND);
 
1433
        cntrs->port_rcv_packets = ppd->dd->f_portcntr(ppd,
 
1434
                        QIBPORTCNTR_PKTRCV);
 
1435
        cntrs->local_link_integrity_errors =
 
1436
                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_LLI);
 
1437
        cntrs->excessive_buffer_overrun_errors =
 
1438
                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_EXCESSBUFOVFL);
 
1439
        cntrs->vl15_dropped =
 
1440
                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_VL15PKTDROP);
 
1441
 
 
1442
        ret = 0;
 
1443
 
 
1444
bail:
 
1445
        return ret;
 
1446
}
 
1447
 
 
1448
/**
 
1449
 * qib_ib_piobufavail - callback when a PIO buffer is available
 
1450
 * @dd: the device pointer
 
1451
 *
 
1452
 * This is called from qib_intr() at interrupt level when a PIO buffer is
 
1453
 * available after qib_verbs_send() returned an error that no buffers were
 
1454
 * available. Disable the interrupt if there are no more QPs waiting.
 
1455
 */
 
1456
void qib_ib_piobufavail(struct qib_devdata *dd)
 
1457
{
 
1458
        struct qib_ibdev *dev = &dd->verbs_dev;
 
1459
        struct list_head *list;
 
1460
        struct qib_qp *qps[5];
 
1461
        struct qib_qp *qp;
 
1462
        unsigned long flags;
 
1463
        unsigned i, n;
 
1464
 
 
1465
        list = &dev->piowait;
 
1466
        n = 0;
 
1467
 
 
1468
        /*
 
1469
         * Note: checking that the piowait list is empty and clearing
 
1470
         * the buffer available interrupt needs to be atomic or we
 
1471
         * could end up with QPs on the wait list with the interrupt
 
1472
         * disabled.
 
1473
         */
 
1474
        spin_lock_irqsave(&dev->pending_lock, flags);
 
1475
        while (!list_empty(list)) {
 
1476
                if (n == ARRAY_SIZE(qps))
 
1477
                        goto full;
 
1478
                qp = list_entry(list->next, struct qib_qp, iowait);
 
1479
                list_del_init(&qp->iowait);
 
1480
                atomic_inc(&qp->refcount);
 
1481
                qps[n++] = qp;
 
1482
        }
 
1483
        dd->f_wantpiobuf_intr(dd, 0);
 
1484
full:
 
1485
        spin_unlock_irqrestore(&dev->pending_lock, flags);
 
1486
 
 
1487
        for (i = 0; i < n; i++) {
 
1488
                qp = qps[i];
 
1489
 
 
1490
                spin_lock_irqsave(&qp->s_lock, flags);
 
1491
                if (qp->s_flags & QIB_S_WAIT_PIO) {
 
1492
                        qp->s_flags &= ~QIB_S_WAIT_PIO;
 
1493
                        qib_schedule_send(qp);
 
1494
                }
 
1495
                spin_unlock_irqrestore(&qp->s_lock, flags);
 
1496
 
 
1497
                /* Notify qib_destroy_qp() if it is waiting. */
 
1498
                if (atomic_dec_and_test(&qp->refcount))
 
1499
                        wake_up(&qp->wait);
 
1500
        }
 
1501
}
 
1502
 
 
1503
static int qib_query_device(struct ib_device *ibdev,
 
1504
                            struct ib_device_attr *props)
 
1505
{
 
1506
        struct qib_devdata *dd = dd_from_ibdev(ibdev);
 
1507
        struct qib_ibdev *dev = to_idev(ibdev);
 
1508
 
 
1509
        memset(props, 0, sizeof(*props));
 
1510
 
 
1511
        props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
 
1512
                IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
 
1513
                IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
 
1514
                IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
 
1515
        props->page_size_cap = PAGE_SIZE;
 
1516
        props->vendor_id =
 
1517
                QIB_SRC_OUI_1 << 16 | QIB_SRC_OUI_2 << 8 | QIB_SRC_OUI_3;
 
1518
        props->vendor_part_id = dd->deviceid;
 
1519
        props->hw_ver = dd->minrev;
 
1520
        props->sys_image_guid = ib_qib_sys_image_guid;
 
1521
        props->max_mr_size = ~0ULL;
 
1522
        props->max_qp = ib_qib_max_qps;
 
1523
        props->max_qp_wr = ib_qib_max_qp_wrs;
 
1524
        props->max_sge = ib_qib_max_sges;
 
1525
        props->max_cq = ib_qib_max_cqs;
 
1526
        props->max_ah = ib_qib_max_ahs;
 
1527
        props->max_cqe = ib_qib_max_cqes;
 
1528
        props->max_mr = dev->lk_table.max;
 
1529
        props->max_fmr = dev->lk_table.max;
 
1530
        props->max_map_per_fmr = 32767;
 
1531
        props->max_pd = ib_qib_max_pds;
 
1532
        props->max_qp_rd_atom = QIB_MAX_RDMA_ATOMIC;
 
1533
        props->max_qp_init_rd_atom = 255;
 
1534
        /* props->max_res_rd_atom */
 
1535
        props->max_srq = ib_qib_max_srqs;
 
1536
        props->max_srq_wr = ib_qib_max_srq_wrs;
 
1537
        props->max_srq_sge = ib_qib_max_srq_sges;
 
1538
        /* props->local_ca_ack_delay */
 
1539
        props->atomic_cap = IB_ATOMIC_GLOB;
 
1540
        props->max_pkeys = qib_get_npkeys(dd);
 
1541
        props->max_mcast_grp = ib_qib_max_mcast_grps;
 
1542
        props->max_mcast_qp_attach = ib_qib_max_mcast_qp_attached;
 
1543
        props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
 
1544
                props->max_mcast_grp;
 
1545
 
 
1546
        return 0;
 
1547
}
 
1548
 
 
1549
static int qib_query_port(struct ib_device *ibdev, u8 port,
 
1550
                          struct ib_port_attr *props)
 
1551
{
 
1552
        struct qib_devdata *dd = dd_from_ibdev(ibdev);
 
1553
        struct qib_ibport *ibp = to_iport(ibdev, port);
 
1554
        struct qib_pportdata *ppd = ppd_from_ibp(ibp);
 
1555
        enum ib_mtu mtu;
 
1556
        u16 lid = ppd->lid;
 
1557
 
 
1558
        memset(props, 0, sizeof(*props));
 
1559
        props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE);
 
1560
        props->lmc = ppd->lmc;
 
1561
        props->sm_lid = ibp->sm_lid;
 
1562
        props->sm_sl = ibp->sm_sl;
 
1563
        props->state = dd->f_iblink_state(ppd->lastibcstat);
 
1564
        props->phys_state = dd->f_ibphys_portstate(ppd->lastibcstat);
 
1565
        props->port_cap_flags = ibp->port_cap_flags;
 
1566
        props->gid_tbl_len = QIB_GUIDS_PER_PORT;
 
1567
        props->max_msg_sz = 0x80000000;
 
1568
        props->pkey_tbl_len = qib_get_npkeys(dd);
 
1569
        props->bad_pkey_cntr = ibp->pkey_violations;
 
1570
        props->qkey_viol_cntr = ibp->qkey_violations;
 
1571
        props->active_width = ppd->link_width_active;
 
1572
        /* See rate_show() */
 
1573
        props->active_speed = ppd->link_speed_active;
 
1574
        props->max_vl_num = qib_num_vls(ppd->vls_supported);
 
1575
        props->init_type_reply = 0;
 
1576
 
 
1577
        props->max_mtu = qib_ibmtu ? qib_ibmtu : IB_MTU_4096;
 
1578
        switch (ppd->ibmtu) {
 
1579
        case 4096:
 
1580
                mtu = IB_MTU_4096;
 
1581
                break;
 
1582
        case 2048:
 
1583
                mtu = IB_MTU_2048;
 
1584
                break;
 
1585
        case 1024:
 
1586
                mtu = IB_MTU_1024;
 
1587
                break;
 
1588
        case 512:
 
1589
                mtu = IB_MTU_512;
 
1590
                break;
 
1591
        case 256:
 
1592
                mtu = IB_MTU_256;
 
1593
                break;
 
1594
        default:
 
1595
                mtu = IB_MTU_2048;
 
1596
        }
 
1597
        props->active_mtu = mtu;
 
1598
        props->subnet_timeout = ibp->subnet_timeout;
 
1599
 
 
1600
        return 0;
 
1601
}
 
1602
 
 
1603
static int qib_modify_device(struct ib_device *device,
 
1604
                             int device_modify_mask,
 
1605
                             struct ib_device_modify *device_modify)
 
1606
{
 
1607
        struct qib_devdata *dd = dd_from_ibdev(device);
 
1608
        unsigned i;
 
1609
        int ret;
 
1610
 
 
1611
        if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
 
1612
                                   IB_DEVICE_MODIFY_NODE_DESC)) {
 
1613
                ret = -EOPNOTSUPP;
 
1614
                goto bail;
 
1615
        }
 
1616
 
 
1617
        if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC) {
 
1618
                memcpy(device->node_desc, device_modify->node_desc, 64);
 
1619
                for (i = 0; i < dd->num_pports; i++) {
 
1620
                        struct qib_ibport *ibp = &dd->pport[i].ibport_data;
 
1621
 
 
1622
                        qib_node_desc_chg(ibp);
 
1623
                }
 
1624
        }
 
1625
 
 
1626
        if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) {
 
1627
                ib_qib_sys_image_guid =
 
1628
                        cpu_to_be64(device_modify->sys_image_guid);
 
1629
                for (i = 0; i < dd->num_pports; i++) {
 
1630
                        struct qib_ibport *ibp = &dd->pport[i].ibport_data;
 
1631
 
 
1632
                        qib_sys_guid_chg(ibp);
 
1633
                }
 
1634
        }
 
1635
 
 
1636
        ret = 0;
 
1637
 
 
1638
bail:
 
1639
        return ret;
 
1640
}
 
1641
 
 
1642
static int qib_modify_port(struct ib_device *ibdev, u8 port,
 
1643
                           int port_modify_mask, struct ib_port_modify *props)
 
1644
{
 
1645
        struct qib_ibport *ibp = to_iport(ibdev, port);
 
1646
        struct qib_pportdata *ppd = ppd_from_ibp(ibp);
 
1647
 
 
1648
        ibp->port_cap_flags |= props->set_port_cap_mask;
 
1649
        ibp->port_cap_flags &= ~props->clr_port_cap_mask;
 
1650
        if (props->set_port_cap_mask || props->clr_port_cap_mask)
 
1651
                qib_cap_mask_chg(ibp);
 
1652
        if (port_modify_mask & IB_PORT_SHUTDOWN)
 
1653
                qib_set_linkstate(ppd, QIB_IB_LINKDOWN);
 
1654
        if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)
 
1655
                ibp->qkey_violations = 0;
 
1656
        return 0;
 
1657
}
 
1658
 
 
1659
static int qib_query_gid(struct ib_device *ibdev, u8 port,
 
1660
                         int index, union ib_gid *gid)
 
1661
{
 
1662
        struct qib_devdata *dd = dd_from_ibdev(ibdev);
 
1663
        int ret = 0;
 
1664
 
 
1665
        if (!port || port > dd->num_pports)
 
1666
                ret = -EINVAL;
 
1667
        else {
 
1668
                struct qib_ibport *ibp = to_iport(ibdev, port);
 
1669
                struct qib_pportdata *ppd = ppd_from_ibp(ibp);
 
1670
 
 
1671
                gid->global.subnet_prefix = ibp->gid_prefix;
 
1672
                if (index == 0)
 
1673
                        gid->global.interface_id = ppd->guid;
 
1674
                else if (index < QIB_GUIDS_PER_PORT)
 
1675
                        gid->global.interface_id = ibp->guids[index - 1];
 
1676
                else
 
1677
                        ret = -EINVAL;
 
1678
        }
 
1679
 
 
1680
        return ret;
 
1681
}
 
1682
 
 
1683
static struct ib_pd *qib_alloc_pd(struct ib_device *ibdev,
 
1684
                                  struct ib_ucontext *context,
 
1685
                                  struct ib_udata *udata)
 
1686
{
 
1687
        struct qib_ibdev *dev = to_idev(ibdev);
 
1688
        struct qib_pd *pd;
 
1689
        struct ib_pd *ret;
 
1690
 
 
1691
        /*
 
1692
         * This is actually totally arbitrary.  Some correctness tests
 
1693
         * assume there's a maximum number of PDs that can be allocated.
 
1694
         * We don't actually have this limit, but we fail the test if
 
1695
         * we allow allocations of more than we report for this value.
 
1696
         */
 
1697
 
 
1698
        pd = kmalloc(sizeof *pd, GFP_KERNEL);
 
1699
        if (!pd) {
 
1700
                ret = ERR_PTR(-ENOMEM);
 
1701
                goto bail;
 
1702
        }
 
1703
 
 
1704
        spin_lock(&dev->n_pds_lock);
 
1705
        if (dev->n_pds_allocated == ib_qib_max_pds) {
 
1706
                spin_unlock(&dev->n_pds_lock);
 
1707
                kfree(pd);
 
1708
                ret = ERR_PTR(-ENOMEM);
 
1709
                goto bail;
 
1710
        }
 
1711
 
 
1712
        dev->n_pds_allocated++;
 
1713
        spin_unlock(&dev->n_pds_lock);
 
1714
 
 
1715
        /* ib_alloc_pd() will initialize pd->ibpd. */
 
1716
        pd->user = udata != NULL;
 
1717
 
 
1718
        ret = &pd->ibpd;
 
1719
 
 
1720
bail:
 
1721
        return ret;
 
1722
}
 
1723
 
 
1724
static int qib_dealloc_pd(struct ib_pd *ibpd)
 
1725
{
 
1726
        struct qib_pd *pd = to_ipd(ibpd);
 
1727
        struct qib_ibdev *dev = to_idev(ibpd->device);
 
1728
 
 
1729
        spin_lock(&dev->n_pds_lock);
 
1730
        dev->n_pds_allocated--;
 
1731
        spin_unlock(&dev->n_pds_lock);
 
1732
 
 
1733
        kfree(pd);
 
1734
 
 
1735
        return 0;
 
1736
}
 
1737
 
 
1738
int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr)
 
1739
{
 
1740
        /* A multicast address requires a GRH (see ch. 8.4.1). */
 
1741
        if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE &&
 
1742
            ah_attr->dlid != QIB_PERMISSIVE_LID &&
 
1743
            !(ah_attr->ah_flags & IB_AH_GRH))
 
1744
                goto bail;
 
1745
        if ((ah_attr->ah_flags & IB_AH_GRH) &&
 
1746
            ah_attr->grh.sgid_index >= QIB_GUIDS_PER_PORT)
 
1747
                goto bail;
 
1748
        if (ah_attr->dlid == 0)
 
1749
                goto bail;
 
1750
        if (ah_attr->port_num < 1 ||
 
1751
            ah_attr->port_num > ibdev->phys_port_cnt)
 
1752
                goto bail;
 
1753
        if (ah_attr->static_rate != IB_RATE_PORT_CURRENT &&
 
1754
            ib_rate_to_mult(ah_attr->static_rate) < 0)
 
1755
                goto bail;
 
1756
        if (ah_attr->sl > 15)
 
1757
                goto bail;
 
1758
        return 0;
 
1759
bail:
 
1760
        return -EINVAL;
 
1761
}
 
1762
 
 
1763
/**
 
1764
 * qib_create_ah - create an address handle
 
1765
 * @pd: the protection domain
 
1766
 * @ah_attr: the attributes of the AH
 
1767
 *
 
1768
 * This may be called from interrupt context.
 
1769
 */
 
1770
static struct ib_ah *qib_create_ah(struct ib_pd *pd,
 
1771
                                   struct ib_ah_attr *ah_attr)
 
1772
{
 
1773
        struct qib_ah *ah;
 
1774
        struct ib_ah *ret;
 
1775
        struct qib_ibdev *dev = to_idev(pd->device);
 
1776
        unsigned long flags;
 
1777
 
 
1778
        if (qib_check_ah(pd->device, ah_attr)) {
 
1779
                ret = ERR_PTR(-EINVAL);
 
1780
                goto bail;
 
1781
        }
 
1782
 
 
1783
        ah = kmalloc(sizeof *ah, GFP_ATOMIC);
 
1784
        if (!ah) {
 
1785
                ret = ERR_PTR(-ENOMEM);
 
1786
                goto bail;
 
1787
        }
 
1788
 
 
1789
        spin_lock_irqsave(&dev->n_ahs_lock, flags);
 
1790
        if (dev->n_ahs_allocated == ib_qib_max_ahs) {
 
1791
                spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
 
1792
                kfree(ah);
 
1793
                ret = ERR_PTR(-ENOMEM);
 
1794
                goto bail;
 
1795
        }
 
1796
 
 
1797
        dev->n_ahs_allocated++;
 
1798
        spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
 
1799
 
 
1800
        /* ib_create_ah() will initialize ah->ibah. */
 
1801
        ah->attr = *ah_attr;
 
1802
        atomic_set(&ah->refcount, 0);
 
1803
 
 
1804
        ret = &ah->ibah;
 
1805
 
 
1806
bail:
 
1807
        return ret;
 
1808
}
 
1809
 
 
1810
/**
 
1811
 * qib_destroy_ah - destroy an address handle
 
1812
 * @ibah: the AH to destroy
 
1813
 *
 
1814
 * This may be called from interrupt context.
 
1815
 */
 
1816
static int qib_destroy_ah(struct ib_ah *ibah)
 
1817
{
 
1818
        struct qib_ibdev *dev = to_idev(ibah->device);
 
1819
        struct qib_ah *ah = to_iah(ibah);
 
1820
        unsigned long flags;
 
1821
 
 
1822
        if (atomic_read(&ah->refcount) != 0)
 
1823
                return -EBUSY;
 
1824
 
 
1825
        spin_lock_irqsave(&dev->n_ahs_lock, flags);
 
1826
        dev->n_ahs_allocated--;
 
1827
        spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
 
1828
 
 
1829
        kfree(ah);
 
1830
 
 
1831
        return 0;
 
1832
}
 
1833
 
 
1834
static int qib_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
 
1835
{
 
1836
        struct qib_ah *ah = to_iah(ibah);
 
1837
 
 
1838
        if (qib_check_ah(ibah->device, ah_attr))
 
1839
                return -EINVAL;
 
1840
 
 
1841
        ah->attr = *ah_attr;
 
1842
 
 
1843
        return 0;
 
1844
}
 
1845
 
 
1846
static int qib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
 
1847
{
 
1848
        struct qib_ah *ah = to_iah(ibah);
 
1849
 
 
1850
        *ah_attr = ah->attr;
 
1851
 
 
1852
        return 0;
 
1853
}
 
1854
 
 
1855
/**
 
1856
 * qib_get_npkeys - return the size of the PKEY table for context 0
 
1857
 * @dd: the qlogic_ib device
 
1858
 */
 
1859
unsigned qib_get_npkeys(struct qib_devdata *dd)
 
1860
{
 
1861
        return ARRAY_SIZE(dd->rcd[0]->pkeys);
 
1862
}
 
1863
 
 
1864
/*
 
1865
 * Return the indexed PKEY from the port PKEY table.
 
1866
 * No need to validate rcd[ctxt]; the port is setup if we are here.
 
1867
 */
 
1868
unsigned qib_get_pkey(struct qib_ibport *ibp, unsigned index)
 
1869
{
 
1870
        struct qib_pportdata *ppd = ppd_from_ibp(ibp);
 
1871
        struct qib_devdata *dd = ppd->dd;
 
1872
        unsigned ctxt = ppd->hw_pidx;
 
1873
        unsigned ret;
 
1874
 
 
1875
        /* dd->rcd null if mini_init or some init failures */
 
1876
        if (!dd->rcd || index >= ARRAY_SIZE(dd->rcd[ctxt]->pkeys))
 
1877
                ret = 0;
 
1878
        else
 
1879
                ret = dd->rcd[ctxt]->pkeys[index];
 
1880
 
 
1881
        return ret;
 
1882
}
 
1883
 
 
1884
static int qib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
 
1885
                          u16 *pkey)
 
1886
{
 
1887
        struct qib_devdata *dd = dd_from_ibdev(ibdev);
 
1888
        int ret;
 
1889
 
 
1890
        if (index >= qib_get_npkeys(dd)) {
 
1891
                ret = -EINVAL;
 
1892
                goto bail;
 
1893
        }
 
1894
 
 
1895
        *pkey = qib_get_pkey(to_iport(ibdev, port), index);
 
1896
        ret = 0;
 
1897
 
 
1898
bail:
 
1899
        return ret;
 
1900
}
 
1901
 
 
1902
/**
 
1903
 * qib_alloc_ucontext - allocate a ucontest
 
1904
 * @ibdev: the infiniband device
 
1905
 * @udata: not used by the QLogic_IB driver
 
1906
 */
 
1907
 
 
1908
static struct ib_ucontext *qib_alloc_ucontext(struct ib_device *ibdev,
 
1909
                                              struct ib_udata *udata)
 
1910
{
 
1911
        struct qib_ucontext *context;
 
1912
        struct ib_ucontext *ret;
 
1913
 
 
1914
        context = kmalloc(sizeof *context, GFP_KERNEL);
 
1915
        if (!context) {
 
1916
                ret = ERR_PTR(-ENOMEM);
 
1917
                goto bail;
 
1918
        }
 
1919
 
 
1920
        ret = &context->ibucontext;
 
1921
 
 
1922
bail:
 
1923
        return ret;
 
1924
}
 
1925
 
 
1926
static int qib_dealloc_ucontext(struct ib_ucontext *context)
 
1927
{
 
1928
        kfree(to_iucontext(context));
 
1929
        return 0;
 
1930
}
 
1931
 
 
1932
static void init_ibport(struct qib_pportdata *ppd)
 
1933
{
 
1934
        struct qib_verbs_counters cntrs;
 
1935
        struct qib_ibport *ibp = &ppd->ibport_data;
 
1936
 
 
1937
        spin_lock_init(&ibp->lock);
 
1938
        /* Set the prefix to the default value (see ch. 4.1.1) */
 
1939
        ibp->gid_prefix = IB_DEFAULT_GID_PREFIX;
 
1940
        ibp->sm_lid = be16_to_cpu(IB_LID_PERMISSIVE);
 
1941
        ibp->port_cap_flags = IB_PORT_SYS_IMAGE_GUID_SUP |
 
1942
                IB_PORT_CLIENT_REG_SUP | IB_PORT_SL_MAP_SUP |
 
1943
                IB_PORT_TRAP_SUP | IB_PORT_AUTO_MIGR_SUP |
 
1944
                IB_PORT_DR_NOTICE_SUP | IB_PORT_CAP_MASK_NOTICE_SUP |
 
1945
                IB_PORT_OTHER_LOCAL_CHANGES_SUP;
 
1946
        if (ppd->dd->flags & QIB_HAS_LINK_LATENCY)
 
1947
                ibp->port_cap_flags |= IB_PORT_LINK_LATENCY_SUP;
 
1948
        ibp->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
 
1949
        ibp->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
 
1950
        ibp->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
 
1951
        ibp->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
 
1952
        ibp->pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
 
1953
 
 
1954
        /* Snapshot current HW counters to "clear" them. */
 
1955
        qib_get_counters(ppd, &cntrs);
 
1956
        ibp->z_symbol_error_counter = cntrs.symbol_error_counter;
 
1957
        ibp->z_link_error_recovery_counter =
 
1958
                cntrs.link_error_recovery_counter;
 
1959
        ibp->z_link_downed_counter = cntrs.link_downed_counter;
 
1960
        ibp->z_port_rcv_errors = cntrs.port_rcv_errors;
 
1961
        ibp->z_port_rcv_remphys_errors = cntrs.port_rcv_remphys_errors;
 
1962
        ibp->z_port_xmit_discards = cntrs.port_xmit_discards;
 
1963
        ibp->z_port_xmit_data = cntrs.port_xmit_data;
 
1964
        ibp->z_port_rcv_data = cntrs.port_rcv_data;
 
1965
        ibp->z_port_xmit_packets = cntrs.port_xmit_packets;
 
1966
        ibp->z_port_rcv_packets = cntrs.port_rcv_packets;
 
1967
        ibp->z_local_link_integrity_errors =
 
1968
                cntrs.local_link_integrity_errors;
 
1969
        ibp->z_excessive_buffer_overrun_errors =
 
1970
                cntrs.excessive_buffer_overrun_errors;
 
1971
        ibp->z_vl15_dropped = cntrs.vl15_dropped;
 
1972
}
 
1973
 
 
1974
/**
 
1975
 * qib_register_ib_device - register our device with the infiniband core
 
1976
 * @dd: the device data structure
 
1977
 * Return the allocated qib_ibdev pointer or NULL on error.
 
1978
 */
 
1979
int qib_register_ib_device(struct qib_devdata *dd)
 
1980
{
 
1981
        struct qib_ibdev *dev = &dd->verbs_dev;
 
1982
        struct ib_device *ibdev = &dev->ibdev;
 
1983
        struct qib_pportdata *ppd = dd->pport;
 
1984
        unsigned i, lk_tab_size;
 
1985
        int ret;
 
1986
 
 
1987
        dev->qp_table_size = ib_qib_qp_table_size;
 
1988
        dev->qp_table = kzalloc(dev->qp_table_size * sizeof *dev->qp_table,
 
1989
                                GFP_KERNEL);
 
1990
        if (!dev->qp_table) {
 
1991
                ret = -ENOMEM;
 
1992
                goto err_qpt;
 
1993
        }
 
1994
 
 
1995
        for (i = 0; i < dd->num_pports; i++)
 
1996
                init_ibport(ppd + i);
 
1997
 
 
1998
        /* Only need to initialize non-zero fields. */
 
1999
        spin_lock_init(&dev->qpt_lock);
 
2000
        spin_lock_init(&dev->n_pds_lock);
 
2001
        spin_lock_init(&dev->n_ahs_lock);
 
2002
        spin_lock_init(&dev->n_cqs_lock);
 
2003
        spin_lock_init(&dev->n_qps_lock);
 
2004
        spin_lock_init(&dev->n_srqs_lock);
 
2005
        spin_lock_init(&dev->n_mcast_grps_lock);
 
2006
        init_timer(&dev->mem_timer);
 
2007
        dev->mem_timer.function = mem_timer;
 
2008
        dev->mem_timer.data = (unsigned long) dev;
 
2009
 
 
2010
        qib_init_qpn_table(dd, &dev->qpn_table);
 
2011
 
 
2012
        /*
 
2013
         * The top ib_qib_lkey_table_size bits are used to index the
 
2014
         * table.  The lower 8 bits can be owned by the user (copied from
 
2015
         * the LKEY).  The remaining bits act as a generation number or tag.
 
2016
         */
 
2017
        spin_lock_init(&dev->lk_table.lock);
 
2018
        dev->lk_table.max = 1 << ib_qib_lkey_table_size;
 
2019
        lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table);
 
2020
        dev->lk_table.table = (struct qib_mregion **)
 
2021
                __get_free_pages(GFP_KERNEL, get_order(lk_tab_size));
 
2022
        if (dev->lk_table.table == NULL) {
 
2023
                ret = -ENOMEM;
 
2024
                goto err_lk;
 
2025
        }
 
2026
        memset(dev->lk_table.table, 0, lk_tab_size);
 
2027
        INIT_LIST_HEAD(&dev->pending_mmaps);
 
2028
        spin_lock_init(&dev->pending_lock);
 
2029
        dev->mmap_offset = PAGE_SIZE;
 
2030
        spin_lock_init(&dev->mmap_offset_lock);
 
2031
        INIT_LIST_HEAD(&dev->piowait);
 
2032
        INIT_LIST_HEAD(&dev->dmawait);
 
2033
        INIT_LIST_HEAD(&dev->txwait);
 
2034
        INIT_LIST_HEAD(&dev->memwait);
 
2035
        INIT_LIST_HEAD(&dev->txreq_free);
 
2036
 
 
2037
        if (ppd->sdma_descq_cnt) {
 
2038
                dev->pio_hdrs = dma_alloc_coherent(&dd->pcidev->dev,
 
2039
                                                ppd->sdma_descq_cnt *
 
2040
                                                sizeof(struct qib_pio_header),
 
2041
                                                &dev->pio_hdrs_phys,
 
2042
                                                GFP_KERNEL);
 
2043
                if (!dev->pio_hdrs) {
 
2044
                        ret = -ENOMEM;
 
2045
                        goto err_hdrs;
 
2046
                }
 
2047
        }
 
2048
 
 
2049
        for (i = 0; i < ppd->sdma_descq_cnt; i++) {
 
2050
                struct qib_verbs_txreq *tx;
 
2051
 
 
2052
                tx = kzalloc(sizeof *tx, GFP_KERNEL);
 
2053
                if (!tx) {
 
2054
                        ret = -ENOMEM;
 
2055
                        goto err_tx;
 
2056
                }
 
2057
                tx->hdr_inx = i;
 
2058
                list_add(&tx->txreq.list, &dev->txreq_free);
 
2059
        }
 
2060
 
 
2061
        /*
 
2062
         * The system image GUID is supposed to be the same for all
 
2063
         * IB HCAs in a single system but since there can be other
 
2064
         * device types in the system, we can't be sure this is unique.
 
2065
         */
 
2066
        if (!ib_qib_sys_image_guid)
 
2067
                ib_qib_sys_image_guid = ppd->guid;
 
2068
 
 
2069
        strlcpy(ibdev->name, "qib%d", IB_DEVICE_NAME_MAX);
 
2070
        ibdev->owner = THIS_MODULE;
 
2071
        ibdev->node_guid = ppd->guid;
 
2072
        ibdev->uverbs_abi_ver = QIB_UVERBS_ABI_VERSION;
 
2073
        ibdev->uverbs_cmd_mask =
 
2074
                (1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
 
2075
                (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)        |
 
2076
                (1ull << IB_USER_VERBS_CMD_QUERY_PORT)          |
 
2077
                (1ull << IB_USER_VERBS_CMD_ALLOC_PD)            |
 
2078
                (1ull << IB_USER_VERBS_CMD_DEALLOC_PD)          |
 
2079
                (1ull << IB_USER_VERBS_CMD_CREATE_AH)           |
 
2080
                (1ull << IB_USER_VERBS_CMD_MODIFY_AH)           |
 
2081
                (1ull << IB_USER_VERBS_CMD_QUERY_AH)            |
 
2082
                (1ull << IB_USER_VERBS_CMD_DESTROY_AH)          |
 
2083
                (1ull << IB_USER_VERBS_CMD_REG_MR)              |
 
2084
                (1ull << IB_USER_VERBS_CMD_DEREG_MR)            |
 
2085
                (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
 
2086
                (1ull << IB_USER_VERBS_CMD_CREATE_CQ)           |
 
2087
                (1ull << IB_USER_VERBS_CMD_RESIZE_CQ)           |
 
2088
                (1ull << IB_USER_VERBS_CMD_DESTROY_CQ)          |
 
2089
                (1ull << IB_USER_VERBS_CMD_POLL_CQ)             |
 
2090
                (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ)       |
 
2091
                (1ull << IB_USER_VERBS_CMD_CREATE_QP)           |
 
2092
                (1ull << IB_USER_VERBS_CMD_QUERY_QP)            |
 
2093
                (1ull << IB_USER_VERBS_CMD_MODIFY_QP)           |
 
2094
                (1ull << IB_USER_VERBS_CMD_DESTROY_QP)          |
 
2095
                (1ull << IB_USER_VERBS_CMD_POST_SEND)           |
 
2096
                (1ull << IB_USER_VERBS_CMD_POST_RECV)           |
 
2097
                (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)        |
 
2098
                (1ull << IB_USER_VERBS_CMD_DETACH_MCAST)        |
 
2099
                (1ull << IB_USER_VERBS_CMD_CREATE_SRQ)          |
 
2100
                (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)          |
 
2101
                (1ull << IB_USER_VERBS_CMD_QUERY_SRQ)           |
 
2102
                (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)         |
 
2103
                (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
 
2104
        ibdev->node_type = RDMA_NODE_IB_CA;
 
2105
        ibdev->phys_port_cnt = dd->num_pports;
 
2106
        ibdev->num_comp_vectors = 1;
 
2107
        ibdev->dma_device = &dd->pcidev->dev;
 
2108
        ibdev->query_device = qib_query_device;
 
2109
        ibdev->modify_device = qib_modify_device;
 
2110
        ibdev->query_port = qib_query_port;
 
2111
        ibdev->modify_port = qib_modify_port;
 
2112
        ibdev->query_pkey = qib_query_pkey;
 
2113
        ibdev->query_gid = qib_query_gid;
 
2114
        ibdev->alloc_ucontext = qib_alloc_ucontext;
 
2115
        ibdev->dealloc_ucontext = qib_dealloc_ucontext;
 
2116
        ibdev->alloc_pd = qib_alloc_pd;
 
2117
        ibdev->dealloc_pd = qib_dealloc_pd;
 
2118
        ibdev->create_ah = qib_create_ah;
 
2119
        ibdev->destroy_ah = qib_destroy_ah;
 
2120
        ibdev->modify_ah = qib_modify_ah;
 
2121
        ibdev->query_ah = qib_query_ah;
 
2122
        ibdev->create_srq = qib_create_srq;
 
2123
        ibdev->modify_srq = qib_modify_srq;
 
2124
        ibdev->query_srq = qib_query_srq;
 
2125
        ibdev->destroy_srq = qib_destroy_srq;
 
2126
        ibdev->create_qp = qib_create_qp;
 
2127
        ibdev->modify_qp = qib_modify_qp;
 
2128
        ibdev->query_qp = qib_query_qp;
 
2129
        ibdev->destroy_qp = qib_destroy_qp;
 
2130
        ibdev->post_send = qib_post_send;
 
2131
        ibdev->post_recv = qib_post_receive;
 
2132
        ibdev->post_srq_recv = qib_post_srq_receive;
 
2133
        ibdev->create_cq = qib_create_cq;
 
2134
        ibdev->destroy_cq = qib_destroy_cq;
 
2135
        ibdev->resize_cq = qib_resize_cq;
 
2136
        ibdev->poll_cq = qib_poll_cq;
 
2137
        ibdev->req_notify_cq = qib_req_notify_cq;
 
2138
        ibdev->get_dma_mr = qib_get_dma_mr;
 
2139
        ibdev->reg_phys_mr = qib_reg_phys_mr;
 
2140
        ibdev->reg_user_mr = qib_reg_user_mr;
 
2141
        ibdev->dereg_mr = qib_dereg_mr;
 
2142
        ibdev->alloc_fast_reg_mr = qib_alloc_fast_reg_mr;
 
2143
        ibdev->alloc_fast_reg_page_list = qib_alloc_fast_reg_page_list;
 
2144
        ibdev->free_fast_reg_page_list = qib_free_fast_reg_page_list;
 
2145
        ibdev->alloc_fmr = qib_alloc_fmr;
 
2146
        ibdev->map_phys_fmr = qib_map_phys_fmr;
 
2147
        ibdev->unmap_fmr = qib_unmap_fmr;
 
2148
        ibdev->dealloc_fmr = qib_dealloc_fmr;
 
2149
        ibdev->attach_mcast = qib_multicast_attach;
 
2150
        ibdev->detach_mcast = qib_multicast_detach;
 
2151
        ibdev->process_mad = qib_process_mad;
 
2152
        ibdev->mmap = qib_mmap;
 
2153
        ibdev->dma_ops = &qib_dma_mapping_ops;
 
2154
 
 
2155
        snprintf(ibdev->node_desc, sizeof(ibdev->node_desc),
 
2156
                 QIB_IDSTR " %s", init_utsname()->nodename);
 
2157
 
 
2158
        ret = ib_register_device(ibdev, qib_create_port_files);
 
2159
        if (ret)
 
2160
                goto err_reg;
 
2161
 
 
2162
        ret = qib_create_agents(dev);
 
2163
        if (ret)
 
2164
                goto err_agents;
 
2165
 
 
2166
        if (qib_verbs_register_sysfs(dd))
 
2167
                goto err_class;
 
2168
 
 
2169
        goto bail;
 
2170
 
 
2171
err_class:
 
2172
        qib_free_agents(dev);
 
2173
err_agents:
 
2174
        ib_unregister_device(ibdev);
 
2175
err_reg:
 
2176
err_tx:
 
2177
        while (!list_empty(&dev->txreq_free)) {
 
2178
                struct list_head *l = dev->txreq_free.next;
 
2179
                struct qib_verbs_txreq *tx;
 
2180
 
 
2181
                list_del(l);
 
2182
                tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
 
2183
                kfree(tx);
 
2184
        }
 
2185
        if (ppd->sdma_descq_cnt)
 
2186
                dma_free_coherent(&dd->pcidev->dev,
 
2187
                                  ppd->sdma_descq_cnt *
 
2188
                                        sizeof(struct qib_pio_header),
 
2189
                                  dev->pio_hdrs, dev->pio_hdrs_phys);
 
2190
err_hdrs:
 
2191
        free_pages((unsigned long) dev->lk_table.table, get_order(lk_tab_size));
 
2192
err_lk:
 
2193
        kfree(dev->qp_table);
 
2194
err_qpt:
 
2195
        qib_dev_err(dd, "cannot register verbs: %d!\n", -ret);
 
2196
bail:
 
2197
        return ret;
 
2198
}
 
2199
 
 
2200
void qib_unregister_ib_device(struct qib_devdata *dd)
 
2201
{
 
2202
        struct qib_ibdev *dev = &dd->verbs_dev;
 
2203
        struct ib_device *ibdev = &dev->ibdev;
 
2204
        u32 qps_inuse;
 
2205
        unsigned lk_tab_size;
 
2206
 
 
2207
        qib_verbs_unregister_sysfs(dd);
 
2208
 
 
2209
        qib_free_agents(dev);
 
2210
 
 
2211
        ib_unregister_device(ibdev);
 
2212
 
 
2213
        if (!list_empty(&dev->piowait))
 
2214
                qib_dev_err(dd, "piowait list not empty!\n");
 
2215
        if (!list_empty(&dev->dmawait))
 
2216
                qib_dev_err(dd, "dmawait list not empty!\n");
 
2217
        if (!list_empty(&dev->txwait))
 
2218
                qib_dev_err(dd, "txwait list not empty!\n");
 
2219
        if (!list_empty(&dev->memwait))
 
2220
                qib_dev_err(dd, "memwait list not empty!\n");
 
2221
        if (dev->dma_mr)
 
2222
                qib_dev_err(dd, "DMA MR not NULL!\n");
 
2223
 
 
2224
        qps_inuse = qib_free_all_qps(dd);
 
2225
        if (qps_inuse)
 
2226
                qib_dev_err(dd, "QP memory leak! %u still in use\n",
 
2227
                            qps_inuse);
 
2228
 
 
2229
        del_timer_sync(&dev->mem_timer);
 
2230
        qib_free_qpn_table(&dev->qpn_table);
 
2231
        while (!list_empty(&dev->txreq_free)) {
 
2232
                struct list_head *l = dev->txreq_free.next;
 
2233
                struct qib_verbs_txreq *tx;
 
2234
 
 
2235
                list_del(l);
 
2236
                tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
 
2237
                kfree(tx);
 
2238
        }
 
2239
        if (dd->pport->sdma_descq_cnt)
 
2240
                dma_free_coherent(&dd->pcidev->dev,
 
2241
                                  dd->pport->sdma_descq_cnt *
 
2242
                                        sizeof(struct qib_pio_header),
 
2243
                                  dev->pio_hdrs, dev->pio_hdrs_phys);
 
2244
        lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table);
 
2245
        free_pages((unsigned long) dev->lk_table.table,
 
2246
                   get_order(lk_tab_size));
 
2247
        kfree(dev->qp_table);
 
2248
}