59
59
"v" DRV_VERSION " (" DRV_RELDATE ")");
60
60
MODULE_LICENSE("Dual BSD/GPL");
62
static int srp_sg_tablesize = SRP_DEF_SG_TABLESIZE;
63
static int srp_max_iu_len;
65
module_param(srp_sg_tablesize, int, 0444);
66
MODULE_PARM_DESC(srp_sg_tablesize,
67
"Max number of gather/scatter entries per I/O (default is 12, max 255)");
62
static unsigned int srp_sg_tablesize;
63
static unsigned int cmd_sg_entries;
64
static unsigned int indirect_sg_entries;
65
static bool allow_ext_sg;
69
66
static int topspin_workarounds = 1;
68
module_param(srp_sg_tablesize, uint, 0444);
69
MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries");
71
module_param(cmd_sg_entries, uint, 0444);
72
MODULE_PARM_DESC(cmd_sg_entries,
73
"Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
75
module_param(indirect_sg_entries, uint, 0444);
76
MODULE_PARM_DESC(indirect_sg_entries,
77
"Default max number of gather/scatter entries (default is 12, max is " __stringify(SCSI_MAX_SG_CHAIN_SEGMENTS) ")");
79
module_param(allow_ext_sg, bool, 0444);
80
MODULE_PARM_DESC(allow_ext_sg,
81
"Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");
71
83
module_param(topspin_workarounds, int, 0444);
72
84
MODULE_PARM_DESC(topspin_workarounds,
73
85
"Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
75
static int mellanox_workarounds = 1;
77
module_param(mellanox_workarounds, int, 0444);
78
MODULE_PARM_DESC(mellanox_workarounds,
79
"Enable workarounds for Mellanox SRP target bugs if != 0");
81
87
static void srp_add_one(struct ib_device *device);
82
88
static void srp_remove_one(struct ib_device *device);
83
89
static void srp_recv_completion(struct ib_cq *cq, void *target_ptr);
648
static int srp_map_fmr(struct srp_target_port *target, struct scatterlist *scat,
649
int sg_cnt, struct srp_request *req,
650
struct srp_direct_buf *buf)
667
static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
668
unsigned int dma_len, u32 rkey)
670
struct srp_direct_buf *desc = state->desc;
672
desc->va = cpu_to_be64(dma_addr);
673
desc->key = cpu_to_be32(rkey);
674
desc->len = cpu_to_be32(dma_len);
676
state->total_len += dma_len;
681
static int srp_map_finish_fmr(struct srp_map_state *state,
682
struct srp_target_port *target)
684
struct srp_device *dev = target->srp_host->srp_dev;
685
struct ib_pool_fmr *fmr;
691
if (state->npages == 1) {
692
srp_map_desc(state, state->base_dma_addr, state->fmr_len,
694
state->npages = state->fmr_len = 0;
698
fmr = ib_fmr_pool_map_phys(dev->fmr_pool, state->pages,
699
state->npages, io_addr);
703
*state->next_fmr++ = fmr;
706
srp_map_desc(state, 0, state->fmr_len, fmr->fmr->rkey);
707
state->npages = state->fmr_len = 0;
711
static void srp_map_update_start(struct srp_map_state *state,
712
struct scatterlist *sg, int sg_index,
715
state->unmapped_sg = sg;
716
state->unmapped_index = sg_index;
717
state->unmapped_addr = dma_addr;
720
static int srp_map_sg_entry(struct srp_map_state *state,
721
struct srp_target_port *target,
722
struct scatterlist *sg, int sg_index,
658
725
struct srp_device *dev = target->srp_host->srp_dev;
659
726
struct ib_device *ibdev = dev->dev;
660
struct scatterlist *sg;
665
if (srp_target_is_mellanox(target) &&
666
(ib_sg_dma_address(ibdev, &scat[0]) & ~dev->fmr_page_mask))
670
scsi_for_each_sg(req->scmnd, sg, sg_cnt, i) {
671
unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
673
if (ib_sg_dma_address(ibdev, sg) & ~dev->fmr_page_mask) {
679
if ((ib_sg_dma_address(ibdev, sg) + dma_len) &
680
~dev->fmr_page_mask) {
690
page_cnt += len >> dev->fmr_page_shift;
691
if (page_cnt > SRP_FMR_SIZE)
694
dma_pages = kmalloc(sizeof (u64) * page_cnt, GFP_ATOMIC);
699
scsi_for_each_sg(req->scmnd, sg, sg_cnt, i) {
700
unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
702
for (j = 0; j < dma_len; j += dev->fmr_page_size)
703
dma_pages[page_cnt++] =
704
(ib_sg_dma_address(ibdev, sg) &
705
dev->fmr_page_mask) + j;
708
req->fmr = ib_fmr_pool_map_phys(dev->fmr_pool,
709
dma_pages, page_cnt, io_addr);
710
if (IS_ERR(req->fmr)) {
711
ret = PTR_ERR(req->fmr);
716
buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, &scat[0]) &
717
~dev->fmr_page_mask);
718
buf->key = cpu_to_be32(req->fmr->fmr->rkey);
719
buf->len = cpu_to_be32(len);
727
dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg);
728
unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
735
if (use_fmr == SRP_MAP_NO_FMR) {
736
/* Once we're in direct map mode for a request, we don't
737
* go back to FMR mode, so no need to update anything
738
* other than the descriptor.
740
srp_map_desc(state, dma_addr, dma_len, target->rkey);
744
/* If we start at an offset into the FMR page, don't merge into
745
* the current FMR. Finish it out, and use the kernel's MR for this
746
* sg entry. This is to avoid potential bugs on some SRP targets
747
* that were never quite defined, but went away when the initiator
748
* avoided using FMR on such page fragments.
750
if (dma_addr & ~dev->fmr_page_mask || dma_len > dev->fmr_max_size) {
751
ret = srp_map_finish_fmr(state, target);
755
srp_map_desc(state, dma_addr, dma_len, target->rkey);
756
srp_map_update_start(state, NULL, 0, 0);
760
/* If this is the first sg to go into the FMR, save our position.
761
* We need to know the first unmapped entry, its index, and the
762
* first unmapped address within that entry to be able to restart
763
* mapping after an error.
765
if (!state->unmapped_sg)
766
srp_map_update_start(state, sg, sg_index, dma_addr);
769
if (state->npages == SRP_FMR_SIZE) {
770
ret = srp_map_finish_fmr(state, target);
774
srp_map_update_start(state, sg, sg_index, dma_addr);
777
len = min_t(unsigned int, dma_len, dev->fmr_page_size);
780
state->base_dma_addr = dma_addr;
781
state->pages[state->npages++] = dma_addr;
782
state->fmr_len += len;
787
/* If the last entry of the FMR wasn't a full page, then we need to
788
* close it out and start a new one -- we can only merge at page
792
if (len != dev->fmr_page_size) {
793
ret = srp_map_finish_fmr(state, target);
795
srp_map_update_start(state, NULL, 0, 0);
729
800
static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
730
801
struct srp_request *req)
732
struct scatterlist *scat;
803
struct scatterlist *scat, *sg;
733
804
struct srp_cmd *cmd = req->cmd->buf;
734
int len, nents, count;
735
u8 fmt = SRP_DATA_DESC_DIRECT;
805
int i, len, nents, count, use_fmr;
736
806
struct srp_device *dev;
737
807
struct ib_device *ibdev;
808
struct srp_map_state state;
809
struct srp_indirect_buf *indirect_hdr;
739
813
if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
740
814
return sizeof (struct srp_cmd);
770
846
buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat));
771
847
buf->key = cpu_to_be32(target->rkey);
772
848
buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
773
} else if (srp_map_fmr(target, scat, count, req,
774
(void *) cmd->add_data)) {
776
* FMR mapping failed, and the scatterlist has more
777
* than one entry. Generate an indirect memory
854
/* We have more than one scatter/gather entry, so build our indirect
855
* descriptor table, trying to merge as many entries with FMR as we
858
indirect_hdr = (void *) cmd->add_data;
860
ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr,
861
target->indirect_size, DMA_TO_DEVICE);
863
memset(&state, 0, sizeof(state));
864
state.desc = req->indirect_desc;
865
state.pages = req->map_page;
866
state.next_fmr = req->fmr_list;
868
use_fmr = dev->fmr_pool ? SRP_MAP_ALLOW_FMR : SRP_MAP_NO_FMR;
870
for_each_sg(scat, sg, count, i) {
871
if (srp_map_sg_entry(&state, target, sg, i, use_fmr)) {
872
/* FMR mapping failed, so backtrack to the first
873
* unmapped entry and continue on without using FMR.
876
unsigned int dma_len;
879
sg = state.unmapped_sg;
880
i = state.unmapped_index;
882
dma_addr = ib_sg_dma_address(ibdev, sg);
883
dma_len = ib_sg_dma_len(ibdev, sg);
884
dma_len -= (state.unmapped_addr - dma_addr);
885
dma_addr = state.unmapped_addr;
886
use_fmr = SRP_MAP_NO_FMR;
887
srp_map_desc(&state, dma_addr, dma_len, target->rkey);
891
if (use_fmr == SRP_MAP_ALLOW_FMR && srp_map_finish_fmr(&state, target))
894
/* We've mapped the request, now pull as much of the indirect
895
* descriptor table as we can into the command buffer. If this
896
* target is not using an external indirect table, we are
897
* guaranteed to fit into the command, as the SCSI layer won't
898
* give us more S/G entries than we allow.
900
req->nfmr = state.nfmr;
901
if (state.ndesc == 1) {
902
/* FMR mapping was able to collapse this to one entry,
903
* so use a direct descriptor.
780
struct srp_indirect_buf *buf = (void *) cmd->add_data;
781
struct scatterlist *sg;
785
fmt = SRP_DATA_DESC_INDIRECT;
786
len = sizeof (struct srp_cmd) +
787
sizeof (struct srp_indirect_buf) +
788
count * sizeof (struct srp_direct_buf);
790
scsi_for_each_sg(scmnd, sg, count, i) {
791
unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
793
buf->desc_list[i].va =
794
cpu_to_be64(ib_sg_dma_address(ibdev, sg));
795
buf->desc_list[i].key =
796
cpu_to_be32(target->rkey);
797
buf->desc_list[i].len = cpu_to_be32(dma_len);
801
if (scmnd->sc_data_direction == DMA_TO_DEVICE)
802
cmd->data_out_desc_cnt = count;
804
cmd->data_in_desc_cnt = count;
807
cpu_to_be64(req->cmd->dma + sizeof *cmd + sizeof *buf);
808
buf->table_desc.key =
809
cpu_to_be32(target->rkey);
810
buf->table_desc.len =
811
cpu_to_be32(count * sizeof (struct srp_direct_buf));
813
buf->len = cpu_to_be32(datalen);
905
struct srp_direct_buf *buf = (void *) cmd->add_data;
907
*buf = req->indirect_desc[0];
911
if (unlikely(target->cmd_sg_cnt < state.ndesc &&
912
!target->allow_ext_sg)) {
913
shost_printk(KERN_ERR, target->scsi_host,
914
"Could not fit S/G list into SRP_CMD\n");
918
count = min(state.ndesc, target->cmd_sg_cnt);
919
table_len = state.ndesc * sizeof (struct srp_direct_buf);
921
fmt = SRP_DATA_DESC_INDIRECT;
922
len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf);
923
len += count * sizeof (struct srp_direct_buf);
925
memcpy(indirect_hdr->desc_list, req->indirect_desc,
926
count * sizeof (struct srp_direct_buf));
928
indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
929
indirect_hdr->table_desc.key = cpu_to_be32(target->rkey);
930
indirect_hdr->table_desc.len = cpu_to_be32(table_len);
931
indirect_hdr->len = cpu_to_be32(state.total_len);
933
if (scmnd->sc_data_direction == DMA_TO_DEVICE)
934
cmd->data_out_desc_cnt = count;
936
cmd->data_in_desc_cnt = count;
938
ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len,
816
942
if (scmnd->sc_data_direction == DMA_TO_DEVICE)
817
943
cmd->buf_fmt = fmt << 4;
1228
1354
return -ENOMEM;
1357
static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
1358
struct srp_login_rsp *lrsp,
1359
struct srp_target_port *target)
1361
struct ib_qp_attr *qp_attr = NULL;
1366
if (lrsp->opcode == SRP_LOGIN_RSP) {
1367
target->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
1368
target->req_lim = be32_to_cpu(lrsp->req_lim_delta);
1371
* Reserve credits for task management so we don't
1372
* bounce requests back to the SCSI mid-layer.
1374
target->scsi_host->can_queue
1375
= min(target->req_lim - SRP_TSK_MGMT_SQ_SIZE,
1376
target->scsi_host->can_queue);
1378
shost_printk(KERN_WARNING, target->scsi_host,
1379
PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
1384
if (!target->rx_ring[0]) {
1385
ret = srp_alloc_iu_bufs(target);
1391
qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
1395
qp_attr->qp_state = IB_QPS_RTR;
1396
ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
1400
ret = ib_modify_qp(target->qp, qp_attr, attr_mask);
1404
for (i = 0; i < SRP_RQ_SIZE; i++) {
1405
struct srp_iu *iu = target->rx_ring[i];
1406
ret = srp_post_recv(target, iu);
1411
qp_attr->qp_state = IB_QPS_RTS;
1412
ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
1416
ret = ib_modify_qp(target->qp, qp_attr, attr_mask);
1420
ret = ib_send_cm_rtu(cm_id, NULL, 0);
1426
target->status = ret;
1231
1429
static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
1232
1430
struct ib_cm_event *event,
1233
1431
struct srp_target_port *target)
1328
1522
case IB_CM_REP_RECEIVED:
1330
opcode = *(u8 *) event->private_data;
1332
if (opcode == SRP_LOGIN_RSP) {
1333
struct srp_login_rsp *rsp = event->private_data;
1335
target->max_ti_iu_len = be32_to_cpu(rsp->max_ti_iu_len);
1336
target->req_lim = be32_to_cpu(rsp->req_lim_delta);
1339
* Reserve credits for task management so we don't
1340
* bounce requests back to the SCSI mid-layer.
1342
target->scsi_host->can_queue
1343
= min(target->req_lim - SRP_TSK_MGMT_SQ_SIZE,
1344
target->scsi_host->can_queue);
1346
shost_printk(KERN_WARNING, target->scsi_host,
1347
PFX "Unhandled RSP opcode %#x\n", opcode);
1348
target->status = -ECONNRESET;
1352
if (!target->rx_ring[0]) {
1353
target->status = srp_alloc_iu_bufs(target);
1358
qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
1360
target->status = -ENOMEM;
1364
qp_attr->qp_state = IB_QPS_RTR;
1365
target->status = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
1369
target->status = ib_modify_qp(target->qp, qp_attr, attr_mask);
1373
for (i = 0; i < SRP_RQ_SIZE; i++) {
1374
struct srp_iu *iu = target->rx_ring[i];
1375
target->status = srp_post_recv(target, iu);
1382
qp_attr->qp_state = IB_QPS_RTS;
1383
target->status = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
1387
target->status = ib_modify_qp(target->qp, qp_attr, attr_mask);
1391
target->status = ib_send_cm_rtu(cm_id, NULL, 0);
1524
srp_cm_rep_handler(cm_id, event->private_data, target);
1397
1527
case IB_CM_REJ_RECEIVED:
1937
2117
container_of(dev, struct srp_host, dev);
1938
2118
struct Scsi_Host *target_host;
1939
2119
struct srp_target_port *target;
2120
struct ib_device *ibdev = host->srp_dev->dev;
2121
dma_addr_t dma_addr;
1943
2124
target_host = scsi_host_alloc(&srp_template,
1944
2125
sizeof (struct srp_target_port));
1945
2126
if (!target_host)
1946
2127
return -ENOMEM;
1948
target_host->transportt = ib_srp_transport_template;
2129
target_host->transportt = ib_srp_transport_template;
1949
2130
target_host->max_lun = SRP_MAX_LUN;
1950
2131
target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
1952
2133
target = host_to_target(target_host);
1954
target->io_class = SRP_REV16A_IB_IO_CLASS;
1955
target->scsi_host = target_host;
1956
target->srp_host = host;
1957
target->lkey = host->srp_dev->mr->lkey;
1958
target->rkey = host->srp_dev->mr->rkey;
2135
target->io_class = SRP_REV16A_IB_IO_CLASS;
2136
target->scsi_host = target_host;
2137
target->srp_host = host;
2138
target->lkey = host->srp_dev->mr->lkey;
2139
target->rkey = host->srp_dev->mr->rkey;
2140
target->cmd_sg_cnt = cmd_sg_entries;
2141
target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries;
2142
target->allow_ext_sg = allow_ext_sg;
2144
ret = srp_parse_options(buf, target);
2148
if (!host->srp_dev->fmr_pool && !target->allow_ext_sg &&
2149
target->cmd_sg_cnt < target->sg_tablesize) {
2150
printk(KERN_WARNING PFX "No FMR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
2151
target->sg_tablesize = target->cmd_sg_cnt;
2154
target_host->sg_tablesize = target->sg_tablesize;
2155
target->indirect_size = target->sg_tablesize *
2156
sizeof (struct srp_direct_buf);
2157
target->max_iu_len = sizeof (struct srp_cmd) +
2158
sizeof (struct srp_indirect_buf) +
2159
target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
1960
2161
spin_lock_init(&target->lock);
1961
2162
INIT_LIST_HEAD(&target->free_tx);
1962
2163
INIT_LIST_HEAD(&target->free_reqs);
1963
2164
for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) {
1964
target->req_ring[i].index = i;
1965
list_add_tail(&target->req_ring[i].list, &target->free_reqs);
2165
struct srp_request *req = &target->req_ring[i];
2167
req->fmr_list = kmalloc(target->cmd_sg_cnt * sizeof (void *),
2169
req->map_page = kmalloc(SRP_FMR_SIZE * sizeof (void *),
2171
req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
2172
if (!req->fmr_list || !req->map_page || !req->indirect_desc)
2175
dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
2176
target->indirect_size,
2178
if (ib_dma_mapping_error(ibdev, dma_addr))
2181
req->indirect_dma_addr = dma_addr;
2183
list_add_tail(&req->list, &target->free_reqs);
1968
ret = srp_parse_options(buf, target);
1972
ib_query_gid(host->srp_dev->dev, host->port, 0, &target->path.sgid);
2186
ib_query_gid(ibdev, host->port, 0, &target->path.sgid);
1974
2188
shost_printk(KERN_DEBUG, target->scsi_host, PFX
1975
2189
"new target: id_ext %016llx ioc_guid %016llx pkey %04x "
2122
2340
if (IS_ERR(srp_dev->mr))
2125
memset(&fmr_param, 0, sizeof fmr_param);
2126
fmr_param.pool_size = SRP_FMR_POOL_SIZE;
2127
fmr_param.dirty_watermark = SRP_FMR_DIRTY_SIZE;
2128
fmr_param.cache = 1;
2129
fmr_param.max_pages_per_fmr = SRP_FMR_SIZE;
2130
fmr_param.page_shift = srp_dev->fmr_page_shift;
2131
fmr_param.access = (IB_ACCESS_LOCAL_WRITE |
2132
IB_ACCESS_REMOTE_WRITE |
2133
IB_ACCESS_REMOTE_READ);
2135
srp_dev->fmr_pool = ib_create_fmr_pool(srp_dev->pd, &fmr_param);
2343
for (max_pages_per_fmr = SRP_FMR_SIZE;
2344
max_pages_per_fmr >= SRP_FMR_MIN_SIZE;
2345
max_pages_per_fmr /= 2, srp_dev->fmr_max_size /= 2) {
2346
memset(&fmr_param, 0, sizeof fmr_param);
2347
fmr_param.pool_size = SRP_FMR_POOL_SIZE;
2348
fmr_param.dirty_watermark = SRP_FMR_DIRTY_SIZE;
2349
fmr_param.cache = 1;
2350
fmr_param.max_pages_per_fmr = max_pages_per_fmr;
2351
fmr_param.page_shift = fmr_page_shift;
2352
fmr_param.access = (IB_ACCESS_LOCAL_WRITE |
2353
IB_ACCESS_REMOTE_WRITE |
2354
IB_ACCESS_REMOTE_READ);
2356
srp_dev->fmr_pool = ib_create_fmr_pool(srp_dev->pd, &fmr_param);
2357
if (!IS_ERR(srp_dev->fmr_pool))
2136
2361
if (IS_ERR(srp_dev->fmr_pool))
2137
2362
srp_dev->fmr_pool = NULL;