~ubuntu-branches/ubuntu/trusty/opensm/trusty-proposed

« back to all changes in this revision

Viewing changes to opensm/osm_congestion_control.c

  • Committer: Package Import Robot
  • Author(s): Roland Dreier
  • Date: 2012-10-15 09:52:18 UTC
  • mfrom: (1.1.1)
  • Revision ID: package-import@ubuntu.com-20121015095218-9n0v7cba8rt1nlp8
Tags: 3.3.15-0.1
* Non-maintainer upload.
* Acknowledge NMU.
* New upstream release.
* Bump Standards-Version to 3.9.4 (no changes).
* Switch to dpkg-source 3.0 (quilt) format.
* Don't email root from logrotate just because opensm isn't running.
* libopensm4 -> libopensm5 due to soname bump. 

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*
 
2
 * Copyright (c) 2006-2009 Voltaire, Inc. All rights reserved.
 
3
 * Copyright (c) 2009 HNR Consulting. All rights reserved.
 
4
 * Copyright (c) 2012 Lawrence Livermore National Lab.  All rights reserved.
 
5
 *
 
6
 * This software is available to you under a choice of one of two
 
7
 * licenses.  You may choose to be licensed under the terms of the GNU
 
8
 * General Public License (GPL) Version 2, available from the file
 
9
 * COPYING in the main directory of this source tree, or the
 
10
 * OpenIB.org BSD license below:
 
11
 *
 
12
 *     Redistribution and use in source and binary forms, with or
 
13
 *     without modification, are permitted provided that the following
 
14
 *     conditions are met:
 
15
 *
 
16
 *      - Redistributions of source code must retain the above
 
17
 *        copyright notice, this list of conditions and the following
 
18
 *        disclaimer.
 
19
 *
 
20
 *      - Redistributions in binary form must reproduce the above
 
21
 *        copyright notice, this list of conditions and the following
 
22
 *        disclaimer in the documentation and/or other materials
 
23
 *        provided with the distribution.
 
24
 *
 
25
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 
26
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 
27
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 
28
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 
29
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 
30
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 
31
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 
32
 * SOFTWARE.
 
33
 *
 
34
 */
 
35
 
 
36
/*
 
37
 * Abstract:
 
38
 *    OSM Congestion Control configuration implementation
 
39
 *
 
40
 * Author:
 
41
 *    Albert Chu, LLNL
 
42
 */
 
43
 
 
44
#if HAVE_CONFIG_H
 
45
#  include <config.h>
 
46
#endif                          /* HAVE_CONFIG_H */
 
47
 
 
48
#include <stdlib.h>
 
49
#include <string.h>
 
50
 
 
51
#include <iba/ib_types.h>
 
52
#include <complib/cl_debug.h>
 
53
#include <opensm/osm_subnet.h>
 
54
#include <opensm/osm_opensm.h>
 
55
#include <opensm/osm_log.h>
 
56
#include <opensm/osm_subnet.h>
 
57
#include <opensm/osm_congestion_control.h>
 
58
 
 
59
#define CONGESTION_CONTROL_INITIAL_TID_VALUE 0x7A93
 
60
 
 
61
static void cc_mad_post(osm_congestion_control_t *p_cc,
 
62
                        osm_madw_t *p_madw,
 
63
                        osm_node_t *p_node,
 
64
                        osm_physp_t *p_physp,
 
65
                        ib_net16_t attr_id,
 
66
                        ib_net32_t attr_mod)
 
67
{
 
68
        osm_subn_opt_t *p_opt = &p_cc->subn->opt;
 
69
        ib_cc_mad_t *p_cc_mad;
 
70
        uint8_t port;
 
71
 
 
72
        OSM_LOG_ENTER(p_cc->log);
 
73
 
 
74
        port = osm_physp_get_port_num(p_physp);
 
75
 
 
76
        p_cc_mad = osm_madw_get_cc_mad_ptr(p_madw);
 
77
 
 
78
        p_cc_mad->header.base_ver = 1;
 
79
        p_cc_mad->header.mgmt_class = IB_MCLASS_CC;
 
80
        p_cc_mad->header.class_ver = 2;
 
81
        p_cc_mad->header.method = IB_MAD_METHOD_SET;
 
82
        p_cc_mad->header.status = 0;
 
83
        p_cc_mad->header.class_spec = 0;
 
84
        p_cc_mad->header.trans_id =
 
85
                cl_hton64((uint64_t) cl_atomic_inc(&p_cc->trans_id) &
 
86
                          (uint64_t) (0xFFFFFFFF));
 
87
        if (p_cc_mad->header.trans_id == 0)
 
88
                p_cc_mad->header.trans_id =
 
89
                        cl_hton64((uint64_t) cl_atomic_inc(&p_cc->trans_id) &
 
90
                                  (uint64_t) (0xFFFFFFFF));
 
91
        p_cc_mad->header.attr_id = attr_id;
 
92
        p_cc_mad->header.resv = 0;
 
93
        p_cc_mad->header.attr_mod = attr_mod;
 
94
 
 
95
        p_cc_mad->cc_key = p_opt->cc_key;
 
96
 
 
97
        memset(p_cc_mad->log_data, '\0', IB_CC_LOG_DATA_SIZE);
 
98
 
 
99
        p_madw->mad_addr.dest_lid = osm_node_get_base_lid(p_node, port);
 
100
        p_madw->mad_addr.addr_type.gsi.remote_qp = IB_QP1;
 
101
        p_madw->mad_addr.addr_type.gsi.remote_qkey =
 
102
                cl_hton32(IB_QP1_WELL_KNOWN_Q_KEY);
 
103
        p_madw->resp_expected = TRUE;
 
104
        p_madw->fail_msg = CL_DISP_MSGID_NONE;
 
105
 
 
106
        p_madw->context.cc_context.node_guid = osm_node_get_node_guid(p_node);
 
107
        p_madw->context.cc_context.port_guid = osm_physp_get_port_guid(p_physp);
 
108
        p_madw->context.cc_context.port = port;
 
109
        p_madw->context.cc_context.mad_method = IB_MAD_METHOD_SET;
 
110
        p_madw->context.cc_context.attr_mod = attr_mod;
 
111
 
 
112
        cl_spinlock_acquire(&p_cc->mad_queue_lock);
 
113
        cl_atomic_inc(&p_cc->outstanding_mads);
 
114
        cl_qlist_insert_tail(&p_cc->mad_queue, &p_madw->list_item);
 
115
        cl_spinlock_release(&p_cc->mad_queue_lock);
 
116
 
 
117
        cl_event_signal(&p_cc->cc_poller_wakeup);
 
118
 
 
119
        OSM_LOG_EXIT(p_cc->log);
 
120
}
 
121
 
 
122
static void cc_setup_mad_data(osm_sm_t * p_sm)
 
123
{
 
124
        osm_congestion_control_t *p_cc = &p_sm->p_subn->p_osm->cc;
 
125
        osm_subn_opt_t *p_opt = &p_sm->p_subn->opt;
 
126
        uint16_t ccti_limit;
 
127
        int i;
 
128
 
 
129
        /* Switch Congestion Setting */
 
130
        p_cc->sw_cong_setting.control_map = p_opt->cc_sw_cong_setting_control_map;
 
131
 
 
132
        memcpy(p_cc->sw_cong_setting.victim_mask,
 
133
               p_opt->cc_sw_cong_setting_victim_mask,
 
134
               IB_CC_PORT_MASK_DATA_SIZE);
 
135
 
 
136
        memcpy(p_cc->sw_cong_setting.credit_mask,
 
137
               p_opt->cc_sw_cong_setting_credit_mask,
 
138
               IB_CC_PORT_MASK_DATA_SIZE);
 
139
 
 
140
        /* threshold is 4 bits, takes up upper nibble of byte */
 
141
        p_cc->sw_cong_setting.threshold_resv = (p_opt->cc_sw_cong_setting_threshold << 4);
 
142
 
 
143
        p_cc->sw_cong_setting.packet_size = p_opt->cc_sw_cong_setting_packet_size;
 
144
 
 
145
        /* cs threshold is 4 bits, takes up upper nibble of short */
 
146
        p_cc->sw_cong_setting.cs_threshold_resv =
 
147
                cl_hton16(p_opt->cc_sw_cong_setting_credit_starvation_threshold << 12);
 
148
 
 
149
        p_cc->sw_cong_setting.cs_return_delay =
 
150
                cl_hton16(p_opt->cc_sw_cong_setting_credit_starvation_return_delay.shift << 14
 
151
                          | p_opt->cc_sw_cong_setting_credit_starvation_return_delay.multiplier);
 
152
 
 
153
        p_cc->sw_cong_setting.marking_rate = p_opt->cc_sw_cong_setting_marking_rate;
 
154
 
 
155
        /* CA Congestion Setting */
 
156
        p_cc->ca_cong_setting.port_control = p_opt->cc_ca_cong_setting_port_control;
 
157
        p_cc->ca_cong_setting.control_map = p_opt->cc_ca_cong_setting_control_map;
 
158
 
 
159
        for (i = 0; i < IB_CA_CONG_ENTRY_DATA_SIZE; i++) {
 
160
                ib_ca_cong_entry_t *p_entry;
 
161
 
 
162
                p_entry = &p_cc->ca_cong_setting.entry_list[i];
 
163
 
 
164
                p_entry->ccti_timer = p_opt->cc_ca_cong_entries[i].ccti_timer;
 
165
                p_entry->ccti_increase = p_opt->cc_ca_cong_entries[i].ccti_increase;
 
166
                p_entry->trigger_threshold = p_opt->cc_ca_cong_entries[i].trigger_threshold;
 
167
                p_entry->ccti_min = p_opt->cc_ca_cong_entries[i].ccti_min;
 
168
                p_entry->resv0 = 0;
 
169
                p_entry->resv1 = 0;
 
170
        }
 
171
 
 
172
        /* Congestion Control Table */
 
173
 
 
174
        /* if no entries, we will always send atleast 1 mad to set ccti_limit = 0 */
 
175
        if (!p_opt->cc_cct.entries_len)
 
176
                p_cc->cc_tbl_mads = 1;
 
177
        else {
 
178
                p_cc->cc_tbl_mads = p_opt->cc_cct.entries_len - 1;
 
179
                p_cc->cc_tbl_mads /= IB_CC_TBL_ENTRY_LIST_MAX;
 
180
                p_cc->cc_tbl_mads += 1;
 
181
        }
 
182
 
 
183
        CL_ASSERT(p_cc->cc_tbl_mads <= OSM_CCT_ENTRY_MAD_BLOCKS);
 
184
 
 
185
        if (!p_opt->cc_cct.entries_len)
 
186
                ccti_limit = 0;
 
187
        else
 
188
                ccti_limit = p_opt->cc_cct.entries_len - 1;
 
189
 
 
190
        for (i = 0; i < p_cc->cc_tbl_mads; i++) {
 
191
                int j;
 
192
 
 
193
                p_cc->cc_tbl[i].ccti_limit = cl_hton16(ccti_limit);
 
194
                p_cc->cc_tbl[i].resv = 0;
 
195
 
 
196
                memset(p_cc->cc_tbl[i].entry_list,
 
197
                       '\0',
 
198
                       sizeof(p_cc->cc_tbl[i].entry_list));
 
199
 
 
200
                if (!ccti_limit)
 
201
                        break;
 
202
 
 
203
                for (j = 0; j < IB_CC_TBL_ENTRY_LIST_MAX; j++) {
 
204
                        int k;
 
205
 
 
206
                        k = (i * IB_CC_TBL_ENTRY_LIST_MAX) + j;
 
207
                        p_cc->cc_tbl[i].entry_list[j].shift_multiplier =
 
208
                                cl_hton16(p_opt->cc_cct.entries[k].shift << 14
 
209
                                          | p_opt->cc_cct.entries[k].multiplier);
 
210
                }
 
211
        }
 
212
}
 
213
 
 
214
static ib_api_status_t cc_send_sw_cong_setting(osm_sm_t * p_sm,
 
215
                                               osm_node_t *p_node)
 
216
{
 
217
        osm_congestion_control_t *p_cc = &p_sm->p_subn->p_osm->cc;
 
218
        unsigned force_update;
 
219
        osm_physp_t *p_physp;
 
220
        osm_madw_t *p_madw = NULL;
 
221
        ib_cc_mad_t *p_cc_mad = NULL;
 
222
        ib_sw_cong_setting_t *p_sw_cong_setting = NULL;
 
223
 
 
224
        OSM_LOG_ENTER(p_sm->p_log);
 
225
 
 
226
        p_physp = osm_node_get_physp_ptr(p_node, 0);
 
227
 
 
228
        force_update = p_physp->need_update || p_sm->p_subn->need_update;
 
229
 
 
230
        if (!force_update
 
231
            && !memcmp(&p_cc->sw_cong_setting,
 
232
                       &p_physp->cc.sw.sw_cong_setting,
 
233
                       sizeof(p_cc->sw_cong_setting)))
 
234
                return IB_SUCCESS;
 
235
 
 
236
        p_madw = osm_mad_pool_get(p_cc->mad_pool, p_cc->bind_handle,
 
237
                                  MAD_BLOCK_SIZE, NULL);
 
238
        if (p_madw == NULL) {
 
239
                OSM_LOG(p_sm->p_log, OSM_LOG_ERROR, "ERR C101: "
 
240
                        "failed to allocate mad\n");
 
241
                return IB_INSUFFICIENT_MEMORY;
 
242
        }
 
243
 
 
244
        p_cc_mad = osm_madw_get_cc_mad_ptr(p_madw);
 
245
 
 
246
        p_sw_cong_setting = ib_cc_mad_get_mgt_data_ptr(p_cc_mad);
 
247
 
 
248
        memcpy(p_sw_cong_setting,
 
249
               &p_cc->sw_cong_setting,
 
250
               sizeof(p_cc->sw_cong_setting));
 
251
 
 
252
        cc_mad_post(p_cc, p_madw, p_node, p_physp,
 
253
                    IB_MAD_ATTR_SW_CONG_SETTING, 0);
 
254
 
 
255
        OSM_LOG_EXIT(p_sm->p_log);
 
256
 
 
257
        return IB_SUCCESS;
 
258
}
 
259
 
 
260
static ib_api_status_t cc_send_ca_cong_setting(osm_sm_t * p_sm,
 
261
                                               osm_node_t *p_node,
 
262
                                               osm_physp_t *p_physp)
 
263
{
 
264
        osm_congestion_control_t *p_cc = &p_sm->p_subn->p_osm->cc;
 
265
        unsigned force_update;
 
266
        osm_madw_t *p_madw = NULL;
 
267
        ib_cc_mad_t *p_cc_mad = NULL;
 
268
        ib_ca_cong_setting_t *p_ca_cong_setting = NULL;
 
269
 
 
270
        OSM_LOG_ENTER(p_sm->p_log);
 
271
 
 
272
        force_update = p_physp->need_update || p_sm->p_subn->need_update;
 
273
 
 
274
        if (!force_update
 
275
            && !memcmp(&p_cc->ca_cong_setting,
 
276
                       &p_physp->cc.ca.ca_cong_setting,
 
277
                       sizeof(p_cc->ca_cong_setting)))
 
278
                return IB_SUCCESS;
 
279
 
 
280
        p_madw = osm_mad_pool_get(p_cc->mad_pool, p_cc->bind_handle,
 
281
                                  MAD_BLOCK_SIZE, NULL);
 
282
        if (p_madw == NULL) {
 
283
                OSM_LOG(p_sm->p_log, OSM_LOG_ERROR, "ERR C102: "
 
284
                        "failed to allocate mad\n");
 
285
                return IB_INSUFFICIENT_MEMORY;
 
286
        }
 
287
 
 
288
        p_cc_mad = osm_madw_get_cc_mad_ptr(p_madw);
 
289
 
 
290
        p_ca_cong_setting = ib_cc_mad_get_mgt_data_ptr(p_cc_mad);
 
291
 
 
292
        memcpy(p_ca_cong_setting,
 
293
               &p_cc->ca_cong_setting,
 
294
               sizeof(p_cc->ca_cong_setting));
 
295
 
 
296
        cc_mad_post(p_cc, p_madw, p_node, p_physp,
 
297
                    IB_MAD_ATTR_CA_CONG_SETTING, 0);
 
298
 
 
299
        OSM_LOG_EXIT(p_sm->p_log);
 
300
 
 
301
        return IB_SUCCESS;
 
302
}
 
303
 
 
304
static ib_api_status_t cc_send_cct(osm_sm_t * p_sm,
 
305
                                   osm_node_t *p_node,
 
306
                                   osm_physp_t *p_physp)
 
307
{
 
308
        osm_congestion_control_t *p_cc = &p_sm->p_subn->p_osm->cc;
 
309
        unsigned force_update;
 
310
        osm_madw_t *p_madw = NULL;
 
311
        ib_cc_mad_t *p_cc_mad = NULL;
 
312
        ib_cc_tbl_t *p_cc_tbl = NULL;
 
313
        unsigned int index = 0;
 
314
 
 
315
        OSM_LOG_ENTER(p_sm->p_log);
 
316
 
 
317
        force_update = p_physp->need_update || p_sm->p_subn->need_update;
 
318
 
 
319
        for (index = 0; index < p_cc->cc_tbl_mads; index++) {
 
320
                if (!force_update
 
321
                    && !memcmp(&p_cc->cc_tbl[index],
 
322
                               &p_physp->cc.ca.cc_tbl[index],
 
323
                               sizeof(p_cc->cc_tbl[index])))
 
324
                        continue;
 
325
 
 
326
                p_madw = osm_mad_pool_get(p_cc->mad_pool, p_cc->bind_handle,
 
327
                                          MAD_BLOCK_SIZE, NULL);
 
328
                if (p_madw == NULL) {
 
329
                        OSM_LOG(p_sm->p_log, OSM_LOG_ERROR, "ERR C103: "
 
330
                                "failed to allocate mad\n");
 
331
                        return IB_INSUFFICIENT_MEMORY;
 
332
                }
 
333
 
 
334
                p_cc_mad = osm_madw_get_cc_mad_ptr(p_madw);
 
335
 
 
336
                p_cc_tbl = (ib_cc_tbl_t *)ib_cc_mad_get_mgt_data_ptr(p_cc_mad);
 
337
 
 
338
                memcpy(p_cc_tbl,
 
339
                       &p_cc->cc_tbl[index],
 
340
                       sizeof(p_cc->cc_tbl[index]));
 
341
 
 
342
                cc_mad_post(p_cc, p_madw, p_node, p_physp,
 
343
                            IB_MAD_ATTR_CC_TBL, cl_hton32(index));
 
344
        }
 
345
 
 
346
        OSM_LOG_EXIT(p_sm->p_log);
 
347
 
 
348
        return IB_SUCCESS;
 
349
}
 
350
 
 
351
int osm_congestion_control_setup(struct osm_opensm *p_osm)
 
352
{
 
353
        cl_qmap_t *p_tbl;
 
354
        cl_map_item_t *p_next;
 
355
        int ret = 0;
 
356
 
 
357
        if (!p_osm->subn.opt.congestion_control)
 
358
                return 0;
 
359
 
 
360
        OSM_LOG_ENTER(&p_osm->log);
 
361
 
 
362
        /*
 
363
         * Do nothing unless the most recent routing attempt was successful.
 
364
         */
 
365
        if (!p_osm->sm.p_subn->p_osm->routing_engine_used)
 
366
                return 0;
 
367
 
 
368
        cc_setup_mad_data(&p_osm->sm);
 
369
 
 
370
        cl_plock_acquire(&p_osm->lock);
 
371
 
 
372
        p_tbl = &p_osm->subn.port_guid_tbl;
 
373
        p_next = cl_qmap_head(p_tbl);
 
374
        while (p_next != cl_qmap_end(p_tbl)) {
 
375
                osm_port_t *p_port = (osm_port_t *) p_next;
 
376
                osm_node_t *p_node = p_port->p_node;
 
377
                ib_api_status_t status;
 
378
 
 
379
                p_next = cl_qmap_next(p_next);
 
380
 
 
381
                if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH) {
 
382
                        status = cc_send_sw_cong_setting(&p_osm->sm, p_node);
 
383
                        if (status != IB_SUCCESS)
 
384
                                ret = -1;
 
385
                } else if (osm_node_get_type(p_node) == IB_NODE_TYPE_CA) {
 
386
                        status = cc_send_ca_cong_setting(&p_osm->sm,
 
387
                                                         p_node,
 
388
                                                         p_port->p_physp);
 
389
                        if (status != IB_SUCCESS)
 
390
                                ret = -1;
 
391
 
 
392
                        status = cc_send_cct(&p_osm->sm,
 
393
                                             p_node,
 
394
                                             p_port->p_physp);
 
395
                        if (status != IB_SUCCESS)
 
396
                                ret = -1;
 
397
                }
 
398
        }
 
399
 
 
400
        cl_plock_release(&p_osm->lock);
 
401
 
 
402
        OSM_LOG_EXIT(&p_osm->log);
 
403
 
 
404
        return ret;
 
405
}
 
406
 
 
407
int osm_congestion_control_wait_pending_transactions(struct osm_opensm *p_osm)
 
408
{
 
409
        osm_congestion_control_t *cc = &p_osm->sm.p_subn->p_osm->cc;
 
410
 
 
411
        if (!p_osm->subn.opt.congestion_control)
 
412
                return 0;
 
413
 
 
414
        while (1) {
 
415
                unsigned count = cc->outstanding_mads;
 
416
                if (!count || osm_exit_flag)
 
417
                        break;
 
418
                cl_event_wait_on(&cc->outstanding_mads_done_event,
 
419
                                 EVENT_NO_TIMEOUT,
 
420
                                 TRUE);
 
421
        }
 
422
 
 
423
        return osm_exit_flag;
 
424
}
 
425
 
 
426
static inline void decrement_outstanding_mads(osm_congestion_control_t *p_cc)
 
427
{
 
428
        uint32_t outstanding;
 
429
 
 
430
        outstanding = cl_atomic_dec(&p_cc->outstanding_mads);
 
431
        if (!outstanding)
 
432
                cl_event_signal(&p_cc->outstanding_mads_done_event);
 
433
 
 
434
        cl_atomic_dec(&p_cc->outstanding_mads_on_wire);
 
435
        cl_event_signal(&p_cc->sig_mads_on_wire_continue);
 
436
}
 
437
 
 
438
 
 
439
static void cc_rcv_mad(void *context, void *data)
 
440
{
 
441
        osm_congestion_control_t *p_cc = context;
 
442
        osm_opensm_t *p_osm = p_cc->osm;
 
443
        osm_madw_t *p_madw = data;
 
444
        ib_cc_mad_t *p_cc_mad;
 
445
        osm_madw_context_t *p_mad_context = &p_madw->context;
 
446
        ib_mad_t *p_mad = osm_madw_get_mad_ptr(p_madw);
 
447
        uint64_t node_guid = p_mad_context->cc_context.node_guid;
 
448
        uint64_t port_guid = p_mad_context->cc_context.port_guid;
 
449
        uint8_t port = p_mad_context->cc_context.port;
 
450
        osm_port_t *p_port;
 
451
 
 
452
        OSM_LOG_ENTER(p_cc->log);
 
453
 
 
454
        OSM_LOG(p_cc->log, OSM_LOG_VERBOSE,
 
455
                "Processing received MAD status 0x%x context 0x%"
 
456
                PRIx64 "port %u\n", p_mad->status, node_guid, port);
 
457
 
 
458
        p_cc_mad = osm_madw_get_cc_mad_ptr(p_madw);
 
459
 
 
460
        cl_plock_acquire(&p_osm->lock);
 
461
 
 
462
        p_port = osm_get_port_by_guid(p_cc->subn, port_guid);
 
463
        if (!p_port) {
 
464
                OSM_LOG(p_cc->log, OSM_LOG_ERROR, "ERR C109: "
 
465
                        "Port guid not in table 0x%" PRIx64 "\n",
 
466
                           port_guid);
 
467
                cl_plock_release(&p_osm->lock);
 
468
                goto Exit;
 
469
        }
 
470
 
 
471
        if (p_cc_mad->header.attr_id == IB_MAD_ATTR_SW_CONG_SETTING) {
 
472
                ib_sw_cong_setting_t *p_sw_cong_setting;
 
473
 
 
474
                p_sw_cong_setting = ib_cc_mad_get_mgt_data_ptr(p_cc_mad);
 
475
                p_port->p_physp->cc.sw.sw_cong_setting = *p_sw_cong_setting;
 
476
        }
 
477
        else if (p_cc_mad->header.attr_id == IB_MAD_ATTR_CA_CONG_SETTING) {
 
478
                ib_ca_cong_setting_t *p_ca_cong_setting;
 
479
 
 
480
                p_ca_cong_setting = ib_cc_mad_get_mgt_data_ptr(p_cc_mad);
 
481
                p_port->p_physp->cc.ca.ca_cong_setting = *p_ca_cong_setting;
 
482
        }
 
483
        else if (p_cc_mad->header.attr_id == IB_MAD_ATTR_CC_TBL) {
 
484
                ib_net32_t attr_mod = p_mad_context->cc_context.attr_mod;
 
485
                uint32_t index = cl_ntoh32(attr_mod);
 
486
                ib_cc_tbl_t *p_cc_tbl;
 
487
 
 
488
                p_cc_tbl = ib_cc_mad_get_mgt_data_ptr(p_cc_mad);
 
489
                p_port->p_physp->cc.ca.cc_tbl[index] = *p_cc_tbl;
 
490
        }
 
491
        else
 
492
                OSM_LOG(p_cc->log, OSM_LOG_ERROR, "ERR C10A: "
 
493
                        "Unexpected MAD attribute received: %u\n",
 
494
                           p_cc_mad->header.attr_id);
 
495
 
 
496
        cl_plock_release(&p_osm->lock);
 
497
 
 
498
Exit:
 
499
        decrement_outstanding_mads(p_cc);
 
500
        osm_mad_pool_put(p_cc->mad_pool, p_madw);
 
501
        OSM_LOG_EXIT(p_cc->log);
 
502
}
 
503
 
 
504
static void cc_poller_send(osm_congestion_control_t *p_cc,
 
505
                           osm_madw_t *p_madw)
 
506
{
 
507
        osm_subn_opt_t *p_opt = &p_cc->subn->opt;
 
508
        ib_api_status_t status;
 
509
 
 
510
        status = osm_vendor_send(p_cc->bind_handle, p_madw, TRUE);
 
511
        if (status == IB_SUCCESS) {
 
512
                cl_atomic_inc(&p_cc->outstanding_mads_on_wire);
 
513
                if (p_cc->outstanding_mads_on_wire >
 
514
                    p_opt->cc_max_outstanding_mads)
 
515
                        cl_event_wait_on(&p_cc->sig_mads_on_wire_continue,
 
516
                                         EVENT_NO_TIMEOUT,
 
517
                                         TRUE);
 
518
        }
 
519
        else {
 
520
                osm_madw_context_t *mad_context = &p_madw->context;
 
521
 
 
522
                OSM_LOG(p_cc->log, OSM_LOG_ERROR, "ERR C104: "
 
523
                        "send failed to node 0x%" PRIx64 "port %u\n",
 
524
                        mad_context->cc_context.node_guid,
 
525
                        mad_context->cc_context.port);
 
526
        }
 
527
}
 
528
 
 
529
static void cc_poller(void *p_ptr)
 
530
{
 
531
        osm_congestion_control_t *p_cc = p_ptr;
 
532
        osm_madw_t *p_madw;
 
533
 
 
534
        OSM_LOG_ENTER(p_cc->log);
 
535
 
 
536
        if (p_cc->thread_state == OSM_THREAD_STATE_NONE)
 
537
                p_cc->thread_state = OSM_THREAD_STATE_RUN;
 
538
 
 
539
        while (p_cc->thread_state == OSM_THREAD_STATE_RUN) {
 
540
                cl_spinlock_acquire(&p_cc->mad_queue_lock);
 
541
 
 
542
                p_madw = (osm_madw_t *) cl_qlist_remove_head(&p_cc->mad_queue);
 
543
 
 
544
                cl_spinlock_release(&p_cc->mad_queue_lock);
 
545
 
 
546
                if (p_madw != (osm_madw_t *) cl_qlist_end(&p_cc->mad_queue))
 
547
                        cc_poller_send(p_cc, p_madw);
 
548
                else
 
549
                        cl_event_wait_on(&p_cc->cc_poller_wakeup,
 
550
                                         EVENT_NO_TIMEOUT, TRUE);
 
551
        }
 
552
 
 
553
        OSM_LOG_EXIT(p_cc->log);
 
554
}
 
555
 
 
556
ib_api_status_t osm_congestion_control_init(osm_congestion_control_t * p_cc,
 
557
                                            struct osm_opensm *p_osm,
 
558
                                            const osm_subn_opt_t * p_opt)
 
559
{
 
560
        ib_api_status_t status = IB_SUCCESS;
 
561
 
 
562
        OSM_LOG_ENTER(&p_osm->log);
 
563
 
 
564
        memset(p_cc, 0, sizeof(*p_cc));
 
565
 
 
566
        p_cc->osm = p_osm;
 
567
        p_cc->subn = &p_osm->subn;
 
568
        p_cc->sm = &p_osm->sm;
 
569
        p_cc->log = &p_osm->log;
 
570
        p_cc->mad_pool = &p_osm->mad_pool;
 
571
        p_cc->trans_id = CONGESTION_CONTROL_INITIAL_TID_VALUE;
 
572
        p_cc->vendor = p_osm->p_vendor;
 
573
 
 
574
        p_cc->cc_disp_h = cl_disp_register(&p_osm->disp, OSM_MSG_MAD_CC,
 
575
                                           cc_rcv_mad, p_cc);
 
576
        if (p_cc->cc_disp_h == CL_DISP_INVALID_HANDLE)
 
577
                goto Exit;
 
578
 
 
579
        cl_qlist_init(&p_cc->mad_queue);
 
580
 
 
581
        status = cl_spinlock_init(&p_cc->mad_queue_lock);
 
582
        if (status != IB_SUCCESS)
 
583
                goto Exit;
 
584
 
 
585
        cl_event_construct(&p_cc->cc_poller_wakeup);
 
586
        status = cl_event_init(&p_cc->cc_poller_wakeup, FALSE);
 
587
        if (status != IB_SUCCESS)
 
588
                goto Exit;
 
589
 
 
590
        cl_event_construct(&p_cc->outstanding_mads_done_event);
 
591
        status = cl_event_init(&p_cc->outstanding_mads_done_event, FALSE);
 
592
        if (status != IB_SUCCESS)
 
593
                goto Exit;
 
594
 
 
595
        cl_event_construct(&p_cc->sig_mads_on_wire_continue);
 
596
        status = cl_event_init(&p_cc->sig_mads_on_wire_continue, FALSE);
 
597
        if (status != IB_SUCCESS)
 
598
                goto Exit;
 
599
 
 
600
        p_cc->thread_state = OSM_THREAD_STATE_NONE;
 
601
 
 
602
        status = cl_thread_init(&p_cc->cc_poller, cc_poller, p_cc,
 
603
                                "cc poller");
 
604
        if (status != IB_SUCCESS)
 
605
                goto Exit;
 
606
 
 
607
        status = IB_SUCCESS;
 
608
Exit:
 
609
        OSM_LOG_EXIT(p_cc->log);
 
610
        return status;
 
611
}
 
612
 
 
613
static void cc_mad_recv_callback(osm_madw_t * p_madw, void *bind_context,
 
614
                                 osm_madw_t * p_req_madw)
 
615
{
 
616
        osm_congestion_control_t *p_cc = bind_context;
 
617
 
 
618
        OSM_LOG_ENTER(p_cc->log);
 
619
 
 
620
        osm_madw_copy_context(p_madw, p_req_madw);
 
621
        osm_mad_pool_put(p_cc->mad_pool, p_req_madw);
 
622
 
 
623
        /* Do not decrement outstanding mads here, do it in the dispatcher */
 
624
 
 
625
        if (cl_disp_post(p_cc->cc_disp_h, OSM_MSG_MAD_CC,
 
626
                         p_madw, NULL, NULL) != CL_SUCCESS) {
 
627
                OSM_LOG(p_cc->log, OSM_LOG_ERROR, "ERR C105: "
 
628
                        "Congestion Control Dispatcher post failed\n");
 
629
                osm_mad_pool_put(p_cc->mad_pool, p_madw);
 
630
        }
 
631
 
 
632
        OSM_LOG_EXIT(p_cc->log);
 
633
}
 
634
 
 
635
static void cc_mad_send_err_callback(void *bind_context,
 
636
                                     osm_madw_t * p_madw)
 
637
{
 
638
        osm_congestion_control_t *p_cc = bind_context;
 
639
        osm_madw_context_t *p_madw_context = &p_madw->context;
 
640
        uint64_t node_guid = p_madw_context->cc_context.node_guid;
 
641
        uint8_t port = p_madw_context->cc_context.port;
 
642
 
 
643
        OSM_LOG_ENTER(p_cc->log);
 
644
 
 
645
        OSM_LOG(p_cc->log, OSM_LOG_ERROR, "ERR C106: MAD Error (%s): "
 
646
                "attr id = %u LID %u GUID 0x%016" PRIx64 " port %u "
 
647
                "TID 0x%" PRIx64 "\n",
 
648
                ib_get_err_str(p_madw->status),
 
649
                p_madw->p_mad->attr_id,
 
650
                cl_ntoh16(p_madw->mad_addr.dest_lid),
 
651
                node_guid,
 
652
                port,
 
653
                cl_ntoh64(p_madw->p_mad->trans_id));
 
654
 
 
655
        p_cc->subn->subnet_initialization_error = TRUE;
 
656
 
 
657
        osm_mad_pool_put(p_cc->mad_pool, p_madw);
 
658
 
 
659
        decrement_outstanding_mads(p_cc);
 
660
 
 
661
        OSM_LOG_EXIT(p_cc->log);
 
662
}
 
663
 
 
664
ib_api_status_t osm_congestion_control_bind(osm_congestion_control_t * p_cc,
 
665
                                            ib_net64_t port_guid)
 
666
{
 
667
        osm_bind_info_t bind_info;
 
668
        ib_api_status_t status = IB_SUCCESS;
 
669
 
 
670
        OSM_LOG_ENTER(p_cc->log);
 
671
 
 
672
        bind_info.port_guid = p_cc->port_guid = port_guid;
 
673
        bind_info.mad_class = IB_MCLASS_CC;
 
674
        bind_info.class_version = 2;
 
675
        bind_info.is_responder = FALSE;
 
676
        bind_info.is_report_processor = FALSE;
 
677
        bind_info.is_trap_processor = FALSE;
 
678
        bind_info.recv_q_size = OSM_SM_DEFAULT_QP1_RCV_SIZE;
 
679
        bind_info.send_q_size = OSM_SM_DEFAULT_QP1_SEND_SIZE;
 
680
        bind_info.timeout = p_cc->subn->opt.transaction_timeout;
 
681
        bind_info.retries = p_cc->subn->opt.transaction_retries;
 
682
 
 
683
        OSM_LOG(p_cc->log, OSM_LOG_VERBOSE,
 
684
                "Binding to port GUID 0x%" PRIx64 "\n", cl_ntoh64(port_guid));
 
685
 
 
686
        p_cc->bind_handle = osm_vendor_bind(p_cc->vendor, &bind_info,
 
687
                                            p_cc->mad_pool,
 
688
                                            cc_mad_recv_callback,
 
689
                                            cc_mad_send_err_callback, p_cc);
 
690
 
 
691
        if (p_cc->bind_handle == OSM_BIND_INVALID_HANDLE) {
 
692
                status = IB_ERROR;
 
693
                OSM_LOG(p_cc->log, OSM_LOG_ERROR,
 
694
                        "ERR C107: Vendor specific bind failed (%s)\n",
 
695
                        ib_get_err_str(status));
 
696
                goto Exit;
 
697
        }
 
698
 
 
699
Exit:
 
700
        OSM_LOG_EXIT(p_cc->log);
 
701
        return status;
 
702
}
 
703
 
 
704
void osm_congestion_control_shutdown(osm_congestion_control_t * p_cc)
 
705
{
 
706
        OSM_LOG_ENTER(p_cc->log);
 
707
        if (p_cc->bind_handle == OSM_BIND_INVALID_HANDLE) {
 
708
                OSM_LOG(p_cc->log, OSM_LOG_ERROR,
 
709
                        "ERR C108: No previous bind\n");
 
710
                goto Exit;
 
711
        }
 
712
        cl_disp_unregister(p_cc->cc_disp_h);
 
713
Exit:
 
714
        OSM_LOG_EXIT(p_cc->log);
 
715
}
 
716
 
 
717
void osm_congestion_control_destroy(osm_congestion_control_t * p_cc)
 
718
{
 
719
        osm_madw_t *p_madw;
 
720
 
 
721
        OSM_LOG_ENTER(p_cc->log);
 
722
 
 
723
        p_cc->thread_state = OSM_THREAD_STATE_EXIT;
 
724
 
 
725
        cl_event_signal(&p_cc->sig_mads_on_wire_continue);
 
726
        cl_event_signal(&p_cc->cc_poller_wakeup);
 
727
 
 
728
        cl_thread_destroy(&p_cc->cc_poller);
 
729
 
 
730
        cl_spinlock_acquire(&p_cc->mad_queue_lock);
 
731
 
 
732
        while (!cl_is_qlist_empty(&p_cc->mad_queue)) {
 
733
                p_madw = (osm_madw_t *) cl_qlist_remove_head(&p_cc->mad_queue);
 
734
                osm_mad_pool_put(p_cc->mad_pool, p_madw);
 
735
        }
 
736
 
 
737
        cl_spinlock_release(&p_cc->mad_queue_lock);
 
738
 
 
739
        cl_spinlock_destroy(&p_cc->mad_queue_lock);
 
740
 
 
741
        cl_event_destroy(&p_cc->cc_poller_wakeup);
 
742
        cl_event_destroy(&p_cc->outstanding_mads_done_event);
 
743
        cl_event_destroy(&p_cc->sig_mads_on_wire_continue);
 
744
 
 
745
        OSM_LOG_EXIT(p_cc->log);
 
746
}