82
82
struct inet_peer_base {
83
83
struct inet_peer __rcu *root;
88
88
static struct inet_peer_base v4_peers = {
89
89
.root = peer_avl_empty_rcu,
90
.lock = __SPIN_LOCK_UNLOCKED(v4_peers.lock),
90
.lock = __SEQLOCK_UNLOCKED(v4_peers.lock),
94
94
static struct inet_peer_base v6_peers = {
95
95
.root = peer_avl_empty_rcu,
96
.lock = __SPIN_LOCK_UNLOCKED(v6_peers.lock),
96
.lock = __SEQLOCK_UNLOCKED(v6_peers.lock),
154
154
/* Called with or without local BH being disabled. */
155
155
static void unlink_from_unused(struct inet_peer *p)
157
if (!list_empty(&p->unused)) {
158
spin_lock_bh(&unused_peers.lock);
159
list_del_init(&p->unused);
160
spin_unlock_bh(&unused_peers.lock);
157
spin_lock_bh(&unused_peers.lock);
158
list_del_init(&p->unused);
159
spin_unlock_bh(&unused_peers.lock);
164
162
static int addr_compare(const struct inetpeer_addr *a,
167
165
int i, n = (a->family == AF_INET ? 1 : 4);
169
167
for (i = 0; i < n; i++) {
170
if (a->a6[i] == b->a6[i])
168
if (a->addr.a6[i] == b->addr.a6[i])
172
if (a->a6[i] < b->a6[i])
170
if (a->addr.a6[i] < b->addr.a6[i])
188
189
stackptr = _stack; \
189
190
*stackptr++ = &_base->root; \
190
for (u = rcu_dereference_protected(_base->root, \
191
lockdep_is_held(&_base->lock)); \
191
for (u = rcu_deref_locked(_base->root, _base); \
192
192
u != peer_avl_empty; ) { \
193
193
int cmp = addr_compare(_daddr, &u->daddr); \
199
199
v = &u->avl_right; \
200
200
*stackptr++ = v; \
201
u = rcu_dereference_protected(*v, \
202
lockdep_is_held(&_base->lock)); \
201
u = rcu_deref_locked(*v, _base); \
206
static bool atomic_add_unless_return(atomic_t *ptr, int a, int u, int *newv)
208
int cur, old = atomic_read(ptr);
212
cur = atomic_cmpxchg(ptr, old, *newv);
208
* Called with rcu_read_lock_bh()
221
* Called with rcu_read_lock()
209
222
* Because we hold no lock against a writer, its quite possible we fall
210
223
* in an endless loop.
211
224
* But every pointer we follow is guaranteed to be valid thanks to RCU.
212
225
* We exit from this function if number of links exceeds PEER_MAXDEPTH
214
static struct inet_peer *lookup_rcu_bh(const struct inetpeer_addr *daddr,
215
struct inet_peer_base *base)
227
static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr,
228
struct inet_peer_base *base,
217
struct inet_peer *u = rcu_dereference_bh(base->root);
231
struct inet_peer *u = rcu_dereference(base->root);
220
234
while (u != peer_avl_empty) {
225
239
* distinction between an unused entry (refcnt=0) and
228
if (unlikely(!atomic_add_unless(&u->refcnt, 1, -1)))
242
if (!atomic_add_unless_return(&u->refcnt, 1, -1, newrefcnt))
233
u = rcu_dereference_bh(u->avl_left);
247
u = rcu_dereference(u->avl_left);
235
u = rcu_dereference_bh(u->avl_right);
249
u = rcu_dereference(u->avl_right);
236
250
if (unlikely(++count == PEER_MAXDEPTH))
246
260
struct inet_peer __rcu **v; \
247
261
*stackptr++ = &start->avl_left; \
248
262
v = &start->avl_left; \
249
for (u = rcu_dereference_protected(*v, \
250
lockdep_is_held(&base->lock)); \
263
for (u = rcu_deref_locked(*v, base); \
251
264
u->avl_right != peer_avl_empty_rcu; ) { \
252
265
v = &u->avl_right; \
253
266
*stackptr++ = v; \
254
u = rcu_dereference_protected(*v, \
255
lockdep_is_held(&base->lock)); \
267
u = rcu_deref_locked(*v, base); \
272
284
while (stackend > stack) {
273
285
nodep = *--stackend;
274
node = rcu_dereference_protected(*nodep,
275
lockdep_is_held(&base->lock));
276
l = rcu_dereference_protected(node->avl_left,
277
lockdep_is_held(&base->lock));
278
r = rcu_dereference_protected(node->avl_right,
279
lockdep_is_held(&base->lock));
286
node = rcu_deref_locked(*nodep, base);
287
l = rcu_deref_locked(node->avl_left, base);
288
r = rcu_deref_locked(node->avl_right, base);
280
289
lh = node_height(l);
281
290
rh = node_height(r);
282
291
if (lh > rh + 1) { /* l: RH+2 */
283
292
struct inet_peer *ll, *lr, *lrl, *lrr;
285
ll = rcu_dereference_protected(l->avl_left,
286
lockdep_is_held(&base->lock));
287
lr = rcu_dereference_protected(l->avl_right,
288
lockdep_is_held(&base->lock));
294
ll = rcu_deref_locked(l->avl_left, base);
295
lr = rcu_deref_locked(l->avl_right, base);
289
296
lrh = node_height(lr);
290
297
if (lrh <= node_height(ll)) { /* ll: RH+1 */
291
298
RCU_INIT_POINTER(node->avl_left, lr); /* lr: RH or RH+1 */
296
303
l->avl_height = node->avl_height + 1;
297
304
RCU_INIT_POINTER(*nodep, l);
298
305
} else { /* ll: RH, lr: RH+1 */
299
lrl = rcu_dereference_protected(lr->avl_left,
300
lockdep_is_held(&base->lock)); /* lrl: RH or RH-1 */
301
lrr = rcu_dereference_protected(lr->avl_right,
302
lockdep_is_held(&base->lock)); /* lrr: RH or RH-1 */
306
lrl = rcu_deref_locked(lr->avl_left, base);/* lrl: RH or RH-1 */
307
lrr = rcu_deref_locked(lr->avl_right, base);/* lrr: RH or RH-1 */
303
308
RCU_INIT_POINTER(node->avl_left, lrr); /* lrr: RH or RH-1 */
304
309
RCU_INIT_POINTER(node->avl_right, r); /* r: RH */
305
310
node->avl_height = rh + 1; /* node: RH+1 */
314
319
} else if (rh > lh + 1) { /* r: LH+2 */
315
320
struct inet_peer *rr, *rl, *rlr, *rll;
317
rr = rcu_dereference_protected(r->avl_right,
318
lockdep_is_held(&base->lock));
319
rl = rcu_dereference_protected(r->avl_left,
320
lockdep_is_held(&base->lock));
322
rr = rcu_deref_locked(r->avl_right, base);
323
rl = rcu_deref_locked(r->avl_left, base);
321
324
rlh = node_height(rl);
322
325
if (rlh <= node_height(rr)) { /* rr: LH+1 */
323
326
RCU_INIT_POINTER(node->avl_right, rl); /* rl: LH or LH+1 */
328
331
r->avl_height = node->avl_height + 1;
329
332
RCU_INIT_POINTER(*nodep, r);
330
333
} else { /* rr: RH, rl: RH+1 */
331
rlr = rcu_dereference_protected(rl->avl_right,
332
lockdep_is_held(&base->lock)); /* rlr: LH or LH-1 */
333
rll = rcu_dereference_protected(rl->avl_left,
334
lockdep_is_held(&base->lock)); /* rll: LH or LH-1 */
334
rlr = rcu_deref_locked(rl->avl_right, base);/* rlr: LH or LH-1 */
335
rll = rcu_deref_locked(rl->avl_left, base);/* rll: LH or LH-1 */
335
336
RCU_INIT_POINTER(node->avl_right, rll); /* rll: LH or LH-1 */
336
337
RCU_INIT_POINTER(node->avl_left, l); /* l: LH */
337
338
node->avl_height = lh + 1; /* node: LH+1 */
368
369
/* May be called with local BH enabled. */
369
static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base)
370
static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base,
371
struct inet_peer __rcu **stack[PEER_MAXDEPTH])
375
spin_lock_bh(&base->lock);
377
write_seqlock_bh(&base->lock);
376
378
/* Check the reference counter. It was artificially incremented by 1
377
379
* in cleanup() function to prevent sudden disappearing. If we can
378
380
* atomically (because of lockless readers) take this last reference,
380
382
* We use refcnt=-1 to alert lockless readers this entry is deleted.
382
384
if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) {
383
struct inet_peer __rcu **stack[PEER_MAXDEPTH];
384
385
struct inet_peer __rcu ***stackptr, ***delp;
385
386
if (lookup(&p->daddr, stack, base) != p)
392
393
/* look for a node to insert instead of p */
393
394
struct inet_peer *t;
394
395
t = lookup_rightempty(p, base);
395
BUG_ON(rcu_dereference_protected(*stackptr[-1],
396
lockdep_is_held(&base->lock)) != t);
396
BUG_ON(rcu_deref_locked(*stackptr[-1], base) != t);
397
397
**--stackptr = t->avl_left;
398
398
/* t is removed, t->daddr > x->daddr for any
399
399
* x in p->avl_left subtree.
412
spin_unlock_bh(&base->lock);
412
write_sequnlock_bh(&base->lock);
415
call_rcu_bh(&p->rcu, inetpeer_free_rcu);
415
call_rcu(&p->rcu, inetpeer_free_rcu);
417
417
/* The node is used again. Decrease the reference counter
418
418
* back. The loop "cleanup -> unlink_from_unused
437
437
/* May be called with local BH enabled. */
438
static int cleanup_once(unsigned long ttl)
438
static int cleanup_once(unsigned long ttl, struct inet_peer __rcu **stack[PEER_MAXDEPTH])
440
440
struct inet_peer *p = NULL;
477
477
struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
478
478
struct inet_peer_base *base = family_to_base(daddr->family);
479
479
struct inet_peer *p;
480
unsigned int sequence;
481
int invalidated, newrefcnt = 0;
481
483
/* Look up for the address quickly, lockless.
482
484
* Because of a concurrent writer, we might not find an existing entry.
485
p = lookup_rcu_bh(daddr, base);
486
rcu_read_unlock_bh();
487
sequence = read_seqbegin(&base->lock);
488
p = lookup_rcu(daddr, base, &newrefcnt);
489
invalidated = read_seqretry(&base->lock, sequence);
489
/* The existing node has been found.
493
found: /* The existing node has been found.
490
494
* Remove the entry from unused list if it was there.
492
unlink_from_unused(p);
497
unlink_from_unused(p);
501
/* If no writer did a change during our lookup, we can return early. */
502
if (!create && !invalidated)
496
505
/* retry an exact lookup, taking the lock before.
497
506
* At least, nodes should be hot in our cache.
499
spin_lock_bh(&base->lock);
508
write_seqlock_bh(&base->lock);
500
509
p = lookup(daddr, stack, base);
501
510
if (p != peer_avl_empty) {
502
atomic_inc(&p->refcnt);
503
spin_unlock_bh(&base->lock);
504
/* Remove the entry from unused list if it was there. */
505
unlink_from_unused(p);
511
newrefcnt = atomic_inc_return(&p->refcnt);
512
write_sequnlock_bh(&base->lock);
508
515
p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL;
510
517
p->daddr = *daddr;
511
518
atomic_set(&p->refcnt, 1);
512
519
atomic_set(&p->rid, 0);
513
atomic_set(&p->ip_id_count, secure_ip_id(daddr->a4));
520
atomic_set(&p->ip_id_count, secure_ip_id(daddr->addr.a4));
514
521
p->tcp_ts_stamp = 0;
522
p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
527
memset(&p->redirect_learned, 0, sizeof(p->redirect_learned));
515
528
INIT_LIST_HEAD(&p->unused);
579
593
local_bh_enable();
581
595
EXPORT_SYMBOL_GPL(inet_putpeer);
598
* Check transmit rate limitation for given message.
599
* The rate information is held in the inet_peer entries now.
600
* This function is generic and could be used for other purposes
601
* too. It uses a Token bucket filter as suggested by Alexey Kuznetsov.
603
* Note that the same inet_peer fields are modified by functions in
604
* route.c too, but these work for packet destinations while xrlim_allow
605
* works for icmp destinations. This means the rate limiting information
606
* for one "ip object" is shared - and these ICMPs are twice limited:
607
* by source and by destination.
609
* RFC 1812: 4.3.2.8 SHOULD be able to limit error message rate
610
* SHOULD allow setting of rate limits
612
* Shared between ICMPv4 and ICMPv6.
614
#define XRLIM_BURST_FACTOR 6
615
bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout)
617
unsigned long now, token;
623
token = peer->rate_tokens;
625
token += now - peer->rate_last;
626
peer->rate_last = now;
627
if (token > XRLIM_BURST_FACTOR * timeout)
628
token = XRLIM_BURST_FACTOR * timeout;
629
if (token >= timeout) {
633
peer->rate_tokens = token;
636
EXPORT_SYMBOL(inet_peer_xrlim_allow);