684
711
net->ipv4.sysctl_rt_cache_rebuild_count;
687
static inline bool compare_hash_inputs(const struct flowi *fl1,
688
const struct flowi *fl2)
714
static inline bool compare_hash_inputs(const struct rtable *rt1,
715
const struct rtable *rt2)
690
return ((((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) |
691
((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) |
692
(fl1->iif ^ fl2->iif)) == 0);
717
return ((((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) |
718
((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) |
719
(rt1->rt_iif ^ rt2->rt_iif)) == 0);
695
static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
722
static inline int compare_keys(struct rtable *rt1, struct rtable *rt2)
697
return (((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) |
698
((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) |
699
(fl1->mark ^ fl2->mark) |
700
(*(u16 *)&fl1->fl4_tos ^ *(u16 *)&fl2->fl4_tos) |
701
(fl1->oif ^ fl2->oif) |
702
(fl1->iif ^ fl2->iif)) == 0;
724
return (((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) |
725
((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) |
726
(rt1->rt_mark ^ rt2->rt_mark) |
727
(rt1->rt_key_tos ^ rt2->rt_key_tos) |
728
(rt1->rt_oif ^ rt2->rt_oif) |
729
(rt1->rt_iif ^ rt2->rt_iif)) == 0;
705
732
static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
786
813
const struct rtable *aux = head;
788
815
while (aux != rth) {
789
if (compare_hash_inputs(&aux->fl, &rth->fl))
816
if (compare_hash_inputs(aux, rth))
791
818
aux = rcu_dereference_protected(aux->dst.rt_next, 1);
796
static void rt_check_expire(void)
798
static unsigned int rover;
799
unsigned int i = rover, goal;
801
struct rtable __rcu **rthp;
802
unsigned long samples = 0;
803
unsigned long sum = 0, sum2 = 0;
807
delta = jiffies - expires_ljiffies;
808
expires_ljiffies = jiffies;
809
mult = ((u64)delta) << rt_hash_log;
810
if (ip_rt_gc_timeout > 1)
811
do_div(mult, ip_rt_gc_timeout);
812
goal = (unsigned int)mult;
813
if (goal > rt_hash_mask)
814
goal = rt_hash_mask + 1;
815
for (; goal > 0; goal--) {
816
unsigned long tmo = ip_rt_gc_timeout;
817
unsigned long length;
819
i = (i + 1) & rt_hash_mask;
820
rthp = &rt_hash_table[i].chain;
827
if (rcu_dereference_raw(*rthp) == NULL)
830
spin_lock_bh(rt_hash_lock_addr(i));
831
while ((rth = rcu_dereference_protected(*rthp,
832
lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) {
833
prefetch(rth->dst.rt_next);
834
if (rt_is_expired(rth)) {
835
*rthp = rth->dst.rt_next;
839
if (rth->dst.expires) {
840
/* Entry is expired even if it is in use */
841
if (time_before_eq(jiffies, rth->dst.expires)) {
844
rthp = &rth->dst.rt_next;
846
* We only count entries on
847
* a chain with equal hash inputs once
848
* so that entries for different QOS
849
* levels, and other non-hash input
850
* attributes don't unfairly skew
851
* the length computation
853
length += has_noalias(rt_hash_table[i].chain, rth);
856
} else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout))
859
/* Cleanup aged off entries. */
860
*rthp = rth->dst.rt_next;
863
spin_unlock_bh(rt_hash_lock_addr(i));
865
sum2 += length*length;
868
unsigned long avg = sum / samples;
869
unsigned long sd = int_sqrt(sum2 / samples - avg*avg);
870
rt_chain_length_max = max_t(unsigned long,
872
(avg + 4*sd) >> FRACT_BITS);
878
* rt_worker_func() is run in process context.
879
* we call rt_check_expire() to scan part of the hash table
881
static void rt_worker_func(struct work_struct *work)
884
schedule_delayed_work(&expires_work, ip_rt_gc_interval);
888
* Pertubation of rt_genid by a small quantity [1..256]
824
* Perturbation of rt_genid by a small quantity [1..256]
889
825
* Using 8 bits of shuffling ensure we can call rt_cache_invalidate()
890
826
* many times (2^24) without giving recent rt_genid.
891
827
* Jenkins hash is strong enough that litle changes of rt_genid are OK.
1380
1295
goto reject_redirect;
1383
for (i = 0; i < 2; i++) {
1384
for (k = 0; k < 2; k++) {
1385
unsigned hash = rt_hash(daddr, skeys[i], ikeys[k],
1388
rthp = &rt_hash_table[hash].chain;
1390
while ((rth = rcu_dereference(*rthp)) != NULL) {
1393
if (rth->fl.fl4_dst != daddr ||
1394
rth->fl.fl4_src != skeys[i] ||
1395
rth->fl.oif != ikeys[k] ||
1396
rt_is_input_route(rth) ||
1397
rt_is_expired(rth) ||
1398
!net_eq(dev_net(rth->dst.dev), net)) {
1399
rthp = &rth->dst.rt_next;
1403
if (rth->rt_dst != daddr ||
1404
rth->rt_src != saddr ||
1406
rth->rt_gateway != old_gw ||
1407
rth->dst.dev != dev)
1410
dst_hold(&rth->dst);
1412
rt = dst_alloc(&ipv4_dst_ops);
1418
/* Copy all the information. */
1421
atomic_set(&rt->dst.__refcnt, 1);
1422
rt->dst.child = NULL;
1424
dev_hold(rt->dst.dev);
1425
rt->dst.obsolete = -1;
1426
rt->dst.lastuse = jiffies;
1427
rt->dst.path = &rt->dst;
1428
rt->dst.neighbour = NULL;
1431
rt->dst.xfrm = NULL;
1433
rt->rt_genid = rt_genid(net);
1434
rt->rt_flags |= RTCF_REDIRECTED;
1436
/* Gateway is different ... */
1437
rt->rt_gateway = new_gw;
1439
/* Redirect received -> path was valid */
1440
dst_confirm(&rth->dst);
1443
atomic_inc(&rt->peer->refcnt);
1445
if (arp_bind_neighbour(&rt->dst) ||
1446
!(rt->dst.neighbour->nud_state &
1448
if (rt->dst.neighbour)
1449
neigh_event_send(rt->dst.neighbour, NULL);
1455
netevent.old = &rth->dst;
1456
netevent.new = &rt->dst;
1457
call_netevent_notifiers(NETEVENT_REDIRECT,
1461
if (!rt_intern_hash(hash, rt, &rt, NULL, rt->fl.oif))
1298
peer = inet_getpeer_v4(daddr, 1);
1300
peer->redirect_learned.a4 = new_gw;
1304
atomic_inc(&__rt_peer_genid);
1536
1387
log_martians = IN_DEV_LOG_MARTIANS(in_dev);
1537
1388
rcu_read_unlock();
1391
rt_bind_peer(rt, rt->rt_dst, 1);
1394
icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
1539
1398
/* No redirected packets during ip_rt_redirect_silence;
1540
1399
* reset the algorithm.
1542
if (time_after(jiffies, rt->dst.rate_last + ip_rt_redirect_silence))
1543
rt->dst.rate_tokens = 0;
1401
if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence))
1402
peer->rate_tokens = 0;
1545
1404
/* Too many ignored redirects; do not send anything
1546
1405
* set dst.rate_last to the last seen redirected packet.
1548
if (rt->dst.rate_tokens >= ip_rt_redirect_number) {
1549
rt->dst.rate_last = jiffies;
1407
if (peer->rate_tokens >= ip_rt_redirect_number) {
1408
peer->rate_last = jiffies;
1553
1412
/* Check for load limit; set rate_last to the latest sent
1556
if (rt->dst.rate_tokens == 0 ||
1415
if (peer->rate_tokens == 0 ||
1557
1416
time_after(jiffies,
1558
(rt->dst.rate_last +
1559
(ip_rt_redirect_load << rt->dst.rate_tokens)))) {
1418
(ip_rt_redirect_load << peer->rate_tokens)))) {
1560
1419
icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
1561
rt->dst.rate_last = jiffies;
1562
++rt->dst.rate_tokens;
1420
peer->rate_last = jiffies;
1421
++peer->rate_tokens;
1563
1422
#ifdef CONFIG_IP_ROUTE_VERBOSE
1564
1423
if (log_martians &&
1565
rt->dst.rate_tokens == ip_rt_redirect_number &&
1424
peer->rate_tokens == ip_rt_redirect_number &&
1566
1425
net_ratelimit())
1567
1426
printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n",
1568
&rt->rt_src, rt->rt_iif,
1427
&ip_hdr(skb)->saddr, rt->rt_iif,
1569
1428
&rt->rt_dst, &rt->rt_gateway);
1629
unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
1499
unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph,
1630
1500
unsigned short new_mtu,
1631
1501
struct net_device *dev)
1634
1503
unsigned short old_mtu = ntohs(iph->tot_len);
1636
int ikeys[2] = { dev->ifindex, 0 };
1637
__be32 skeys[2] = { iph->saddr, 0, };
1638
__be32 daddr = iph->daddr;
1639
1504
unsigned short est_mtu = 0;
1641
for (k = 0; k < 2; k++) {
1642
for (i = 0; i < 2; i++) {
1643
unsigned hash = rt_hash(daddr, skeys[i], ikeys[k],
1647
for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
1648
rth = rcu_dereference(rth->dst.rt_next)) {
1649
unsigned short mtu = new_mtu;
1651
if (rth->fl.fl4_dst != daddr ||
1652
rth->fl.fl4_src != skeys[i] ||
1653
rth->rt_dst != daddr ||
1654
rth->rt_src != iph->saddr ||
1655
rth->fl.oif != ikeys[k] ||
1656
rt_is_input_route(rth) ||
1657
dst_metric_locked(&rth->dst, RTAX_MTU) ||
1658
!net_eq(dev_net(rth->dst.dev), net) ||
1662
if (new_mtu < 68 || new_mtu >= old_mtu) {
1664
/* BSD 4.2 compatibility hack :-( */
1666
old_mtu >= dst_mtu(&rth->dst) &&
1667
old_mtu >= 68 + (iph->ihl << 2))
1668
old_mtu -= iph->ihl << 2;
1670
mtu = guess_mtu(old_mtu);
1672
if (mtu <= dst_mtu(&rth->dst)) {
1673
if (mtu < dst_mtu(&rth->dst)) {
1674
dst_confirm(&rth->dst);
1675
if (mtu < ip_rt_min_pmtu) {
1676
u32 lock = dst_metric(&rth->dst,
1678
mtu = ip_rt_min_pmtu;
1679
lock |= (1 << RTAX_MTU);
1680
dst_metric_set(&rth->dst, RTAX_LOCK,
1683
dst_metric_set(&rth->dst, RTAX_MTU, mtu);
1684
dst_set_expires(&rth->dst,
1505
struct inet_peer *peer;
1507
peer = inet_getpeer_v4(iph->daddr, 1);
1509
unsigned short mtu = new_mtu;
1511
if (new_mtu < 68 || new_mtu >= old_mtu) {
1512
/* BSD 4.2 derived systems incorrectly adjust
1513
* tot_len by the IP header length, and report
1514
* a zero MTU in the ICMP message.
1517
old_mtu >= 68 + (iph->ihl << 2))
1518
old_mtu -= iph->ihl << 2;
1519
mtu = guess_mtu(old_mtu);
1522
if (mtu < ip_rt_min_pmtu)
1523
mtu = ip_rt_min_pmtu;
1524
if (!peer->pmtu_expires || mtu < peer->pmtu_learned) {
1525
unsigned long pmtu_expires;
1527
pmtu_expires = jiffies + ip_rt_mtu_expires;
1532
peer->pmtu_learned = mtu;
1533
peer->pmtu_expires = pmtu_expires;
1538
atomic_inc(&__rt_peer_genid);
1693
1540
return est_mtu ? : new_mtu;
1543
static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer)
1545
unsigned long expires = ACCESS_ONCE(peer->pmtu_expires);
1549
if (time_before(jiffies, expires)) {
1550
u32 orig_dst_mtu = dst_mtu(dst);
1551
if (peer->pmtu_learned < orig_dst_mtu) {
1552
if (!peer->pmtu_orig)
1553
peer->pmtu_orig = dst_metric_raw(dst, RTAX_MTU);
1554
dst_metric_set(dst, RTAX_MTU, peer->pmtu_learned);
1556
} else if (cmpxchg(&peer->pmtu_expires, expires, 0) == expires)
1557
dst_metric_set(dst, RTAX_MTU, peer->pmtu_orig);
1696
1560
static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1698
if (dst_mtu(dst) > mtu && mtu >= 68 &&
1699
!(dst_metric_locked(dst, RTAX_MTU))) {
1700
if (mtu < ip_rt_min_pmtu) {
1701
u32 lock = dst_metric(dst, RTAX_LOCK);
1562
struct rtable *rt = (struct rtable *) dst;
1563
struct inet_peer *peer;
1568
rt_bind_peer(rt, rt->rt_dst, 1);
1571
unsigned long pmtu_expires = ACCESS_ONCE(peer->pmtu_expires);
1573
if (mtu < ip_rt_min_pmtu)
1702
1574
mtu = ip_rt_min_pmtu;
1703
dst_metric_set(dst, RTAX_LOCK, lock | (1 << RTAX_MTU));
1575
if (!pmtu_expires || mtu < peer->pmtu_learned) {
1577
pmtu_expires = jiffies + ip_rt_mtu_expires;
1581
peer->pmtu_learned = mtu;
1582
peer->pmtu_expires = pmtu_expires;
1584
atomic_inc(&__rt_peer_genid);
1585
rt->rt_peer_genid = rt_peer_genid();
1705
dst_metric_set(dst, RTAX_MTU, mtu);
1706
dst_set_expires(dst, ip_rt_mtu_expires);
1707
call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
1587
check_peer_pmtu(dst, peer);
1591
static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer)
1593
struct rtable *rt = (struct rtable *) dst;
1594
__be32 orig_gw = rt->rt_gateway;
1596
dst_confirm(&rt->dst);
1598
neigh_release(rt->dst.neighbour);
1599
rt->dst.neighbour = NULL;
1601
rt->rt_gateway = peer->redirect_learned.a4;
1602
if (arp_bind_neighbour(&rt->dst) ||
1603
!(rt->dst.neighbour->nud_state & NUD_VALID)) {
1604
if (rt->dst.neighbour)
1605
neigh_event_send(rt->dst.neighbour, NULL);
1606
rt->rt_gateway = orig_gw;
1609
rt->rt_flags |= RTCF_REDIRECTED;
1610
call_netevent_notifiers(NETEVENT_NEIGH_UPDATE,
1711
1616
static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1713
if (rt_is_expired((struct rtable *)dst))
1618
struct rtable *rt = (struct rtable *) dst;
1620
if (rt_is_expired(rt))
1622
if (rt->rt_peer_genid != rt_peer_genid()) {
1623
struct inet_peer *peer;
1626
rt_bind_peer(rt, rt->rt_dst, 0);
1630
check_peer_pmtu(dst, peer);
1632
if (peer->redirect_learned.a4 &&
1633
peer->redirect_learned.a4 != rt->rt_gateway) {
1634
if (check_peer_redir(dst, peer))
1639
rt->rt_peer_genid = rt_peer_genid();
1818
static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
1762
static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
1763
struct fib_info *fi)
1765
struct inet_peer *peer;
1768
/* If a peer entry exists for this destination, we must hook
1769
* it up in order to get at cached metrics.
1771
if (fl4 && (fl4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS))
1774
rt->peer = peer = inet_getpeer_v4(rt->rt_dst, create);
1776
rt->rt_peer_genid = rt_peer_genid();
1777
if (inet_metrics_new(peer))
1778
memcpy(peer->metrics, fi->fib_metrics,
1779
sizeof(u32) * RTAX_MAX);
1780
dst_init_metrics(&rt->dst, peer->metrics, false);
1782
check_peer_pmtu(&rt->dst, peer);
1783
if (peer->redirect_learned.a4 &&
1784
peer->redirect_learned.a4 != rt->rt_gateway) {
1785
rt->rt_gateway = peer->redirect_learned.a4;
1786
rt->rt_flags |= RTCF_REDIRECTED;
1789
if (fi->fib_metrics != (u32 *) dst_default_metrics) {
1791
atomic_inc(&fi->fib_clntref);
1793
dst_init_metrics(&rt->dst, fi->fib_metrics, true);
1797
static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4,
1798
const struct fib_result *res,
1799
struct fib_info *fi, u16 type, u32 itag)
1820
1801
struct dst_entry *dst = &rt->dst;
1821
struct fib_info *fi = res->fi;
1824
1804
if (FIB_RES_GW(*res) &&
1825
1805
FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
1826
1806
rt->rt_gateway = FIB_RES_GW(*res);
1827
dst_import_metrics(dst, fi->fib_metrics);
1828
#ifdef CONFIG_NET_CLS_ROUTE
1807
rt_init_metrics(rt, fl4, fi);
1808
#ifdef CONFIG_IP_ROUTE_CLASSID
1829
1809
dst->tclassid = FIB_RES_NH(*res).nh_tclassid;
1870
1858
spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
1872
err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst,
1860
err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst,
1877
rth = dst_alloc(&ipv4_dst_ops);
1865
rth = rt_dst_alloc(init_net.loopback_dev,
1866
IN_DEV_CONF_GET(in_dev, NOPOLICY), false);
1870
#ifdef CONFIG_IP_ROUTE_CLASSID
1871
rth->dst.tclassid = itag;
1881
1873
rth->dst.output = ip_rt_bug;
1882
rth->dst.obsolete = -1;
1884
atomic_set(&rth->dst.__refcnt, 1);
1885
rth->dst.flags= DST_HOST;
1886
if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
1887
rth->dst.flags |= DST_NOPOLICY;
1888
rth->fl.fl4_dst = daddr;
1889
rth->rt_dst = daddr;
1890
rth->fl.fl4_tos = tos;
1891
rth->fl.mark = skb->mark;
1892
rth->fl.fl4_src = saddr;
1893
rth->rt_src = saddr;
1894
#ifdef CONFIG_NET_CLS_ROUTE
1895
rth->dst.tclassid = itag;
1898
rth->fl.iif = dev->ifindex;
1899
rth->dst.dev = init_net.loopback_dev;
1900
dev_hold(rth->dst.dev);
1902
rth->rt_gateway = daddr;
1903
rth->rt_spec_dst= spec_dst;
1875
rth->rt_key_dst = daddr;
1876
rth->rt_key_src = saddr;
1904
1877
rth->rt_genid = rt_genid(dev_net(dev));
1905
1878
rth->rt_flags = RTCF_MULTICAST;
1906
1879
rth->rt_type = RTN_MULTICAST;
1880
rth->rt_key_tos = tos;
1881
rth->rt_dst = daddr;
1882
rth->rt_src = saddr;
1883
rth->rt_route_iif = dev->ifindex;
1884
rth->rt_iif = dev->ifindex;
1886
rth->rt_mark = skb->mark;
1887
rth->rt_gateway = daddr;
1888
rth->rt_spec_dst= spec_dst;
1889
rth->rt_peer_genid = 0;
1908
1893
rth->dst.input= ip_local_deliver;
1909
1894
rth->rt_flags |= RTCF_LOCAL;
2017
rth = dst_alloc(&ipv4_dst_ops);
2002
rth = rt_dst_alloc(out_dev->dev,
2003
IN_DEV_CONF_GET(in_dev, NOPOLICY),
2004
IN_DEV_CONF_GET(out_dev, NOXFRM));
2019
2006
err = -ENOBUFS;
2023
atomic_set(&rth->dst.__refcnt, 1);
2024
rth->dst.flags= DST_HOST;
2025
if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
2026
rth->dst.flags |= DST_NOPOLICY;
2027
if (IN_DEV_CONF_GET(out_dev, NOXFRM))
2028
rth->dst.flags |= DST_NOXFRM;
2029
rth->fl.fl4_dst = daddr;
2010
rth->rt_key_dst = daddr;
2011
rth->rt_key_src = saddr;
2012
rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
2013
rth->rt_flags = flags;
2014
rth->rt_type = res->type;
2015
rth->rt_key_tos = tos;
2030
2016
rth->rt_dst = daddr;
2031
rth->fl.fl4_tos = tos;
2032
rth->fl.mark = skb->mark;
2033
rth->fl.fl4_src = saddr;
2034
2017
rth->rt_src = saddr;
2018
rth->rt_route_iif = in_dev->dev->ifindex;
2019
rth->rt_iif = in_dev->dev->ifindex;
2021
rth->rt_mark = skb->mark;
2035
2022
rth->rt_gateway = daddr;
2037
rth->fl.iif = in_dev->dev->ifindex;
2038
rth->dst.dev = (out_dev)->dev;
2039
dev_hold(rth->dst.dev);
2041
2023
rth->rt_spec_dst= spec_dst;
2024
rth->rt_peer_genid = 0;
2043
rth->dst.obsolete = -1;
2044
2028
rth->dst.input = ip_forward;
2045
2029
rth->dst.output = ip_output;
2046
rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
2048
rt_set_nexthop(rth, res, itag);
2050
rth->rt_flags = flags;
2031
rt_set_nexthop(rth, NULL, res, res->fi, res->type, itag);
2190
2176
RT_CACHE_STAT_INC(in_brd);
2193
rth = dst_alloc(&ipv4_dst_ops);
2179
rth = rt_dst_alloc(net->loopback_dev,
2180
IN_DEV_CONF_GET(in_dev, NOPOLICY), false);
2184
rth->dst.input= ip_local_deliver;
2197
2185
rth->dst.output= ip_rt_bug;
2198
rth->dst.obsolete = -1;
2186
#ifdef CONFIG_IP_ROUTE_CLASSID
2187
rth->dst.tclassid = itag;
2190
rth->rt_key_dst = daddr;
2191
rth->rt_key_src = saddr;
2199
2192
rth->rt_genid = rt_genid(net);
2201
atomic_set(&rth->dst.__refcnt, 1);
2202
rth->dst.flags= DST_HOST;
2203
if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
2204
rth->dst.flags |= DST_NOPOLICY;
2205
rth->fl.fl4_dst = daddr;
2206
rth->rt_dst = daddr;
2207
rth->fl.fl4_tos = tos;
2208
rth->fl.mark = skb->mark;
2209
rth->fl.fl4_src = saddr;
2210
rth->rt_src = saddr;
2211
#ifdef CONFIG_NET_CLS_ROUTE
2212
rth->dst.tclassid = itag;
2215
rth->fl.iif = dev->ifindex;
2216
rth->dst.dev = net->loopback_dev;
2217
dev_hold(rth->dst.dev);
2218
rth->rt_gateway = daddr;
2219
rth->rt_spec_dst= spec_dst;
2220
rth->dst.input= ip_local_deliver;
2221
2193
rth->rt_flags = flags|RTCF_LOCAL;
2194
rth->rt_type = res.type;
2195
rth->rt_key_tos = tos;
2196
rth->rt_dst = daddr;
2197
rth->rt_src = saddr;
2198
#ifdef CONFIG_IP_ROUTE_CLASSID
2199
rth->dst.tclassid = itag;
2201
rth->rt_route_iif = dev->ifindex;
2202
rth->rt_iif = dev->ifindex;
2204
rth->rt_mark = skb->mark;
2205
rth->rt_gateway = daddr;
2206
rth->rt_spec_dst= spec_dst;
2207
rth->rt_peer_genid = 0;
2222
2210
if (res.type == RTN_UNREACHABLE) {
2223
2211
rth->dst.input= ip_error;
2224
2212
rth->dst.error= -err;
2225
2213
rth->rt_flags &= ~RTCF_LOCAL;
2227
rth->rt_type = res.type;
2228
hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net));
2229
err = rt_intern_hash(hash, rth, NULL, skb, fl.iif);
2215
hash = rt_hash(daddr, saddr, fl4.flowi4_iif, rt_genid(net));
2216
rth = rt_intern_hash(hash, rth, skb, fl4.flowi4_iif);
2351
2341
EXPORT_SYMBOL(ip_route_input_common);
2353
2343
/* called with rcu_read_lock() */
2354
static int __mkroute_output(struct rtable **result,
2355
struct fib_result *res,
2356
const struct flowi *fl,
2357
const struct flowi *oldflp,
2358
struct net_device *dev_out,
2344
static struct rtable *__mkroute_output(const struct fib_result *res,
2345
const struct flowi4 *fl4,
2346
__be32 orig_daddr, __be32 orig_saddr,
2347
int orig_oif, struct net_device *dev_out,
2350
struct fib_info *fi = res->fi;
2351
u32 tos = RT_FL_TOS(fl4);
2352
struct in_device *in_dev;
2353
u16 type = res->type;
2361
2354
struct rtable *rth;
2362
struct in_device *in_dev;
2363
u32 tos = RT_FL_TOS(oldflp);
2365
if (ipv4_is_loopback(fl->fl4_src) && !(dev_out->flags & IFF_LOOPBACK))
2368
if (ipv4_is_lbcast(fl->fl4_dst))
2369
res->type = RTN_BROADCAST;
2370
else if (ipv4_is_multicast(fl->fl4_dst))
2371
res->type = RTN_MULTICAST;
2372
else if (ipv4_is_zeronet(fl->fl4_dst))
2356
if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK))
2357
return ERR_PTR(-EINVAL);
2359
if (ipv4_is_lbcast(fl4->daddr))
2360
type = RTN_BROADCAST;
2361
else if (ipv4_is_multicast(fl4->daddr))
2362
type = RTN_MULTICAST;
2363
else if (ipv4_is_zeronet(fl4->daddr))
2364
return ERR_PTR(-EINVAL);
2375
2366
if (dev_out->flags & IFF_LOOPBACK)
2376
2367
flags |= RTCF_LOCAL;
2378
2369
in_dev = __in_dev_get_rcu(dev_out);
2371
return ERR_PTR(-EINVAL);
2382
if (res->type == RTN_BROADCAST) {
2373
if (type == RTN_BROADCAST) {
2383
2374
flags |= RTCF_BROADCAST | RTCF_LOCAL;
2385
} else if (res->type == RTN_MULTICAST) {
2376
} else if (type == RTN_MULTICAST) {
2386
2377
flags |= RTCF_MULTICAST | RTCF_LOCAL;
2387
if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src,
2378
if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
2389
2380
flags &= ~RTCF_LOCAL;
2390
2381
/* If multicast route do not exist use
2391
2382
* default one, but do not gateway in this case.
2392
2383
* Yes, it is hack.
2394
if (res->fi && res->prefixlen < 4)
2385
if (fi && res->prefixlen < 4)
2399
rth = dst_alloc(&ipv4_dst_ops);
2389
rth = rt_dst_alloc(dev_out,
2390
IN_DEV_CONF_GET(in_dev, NOPOLICY),
2391
IN_DEV_CONF_GET(in_dev, NOXFRM));
2403
atomic_set(&rth->dst.__refcnt, 1);
2404
rth->dst.flags= DST_HOST;
2405
if (IN_DEV_CONF_GET(in_dev, NOXFRM))
2406
rth->dst.flags |= DST_NOXFRM;
2407
if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
2408
rth->dst.flags |= DST_NOPOLICY;
2410
rth->fl.fl4_dst = oldflp->fl4_dst;
2411
rth->fl.fl4_tos = tos;
2412
rth->fl.fl4_src = oldflp->fl4_src;
2413
rth->fl.oif = oldflp->oif;
2414
rth->fl.mark = oldflp->mark;
2415
rth->rt_dst = fl->fl4_dst;
2416
rth->rt_src = fl->fl4_src;
2417
rth->rt_iif = oldflp->oif ? : dev_out->ifindex;
2418
/* get references to the devices that are to be hold by the routing
2420
rth->dst.dev = dev_out;
2422
rth->rt_gateway = fl->fl4_dst;
2423
rth->rt_spec_dst= fl->fl4_src;
2425
rth->dst.output=ip_output;
2426
rth->dst.obsolete = -1;
2393
return ERR_PTR(-ENOBUFS);
2395
rth->dst.output = ip_output;
2397
rth->rt_key_dst = orig_daddr;
2398
rth->rt_key_src = orig_saddr;
2427
2399
rth->rt_genid = rt_genid(dev_net(dev_out));
2400
rth->rt_flags = flags;
2401
rth->rt_type = type;
2402
rth->rt_key_tos = tos;
2403
rth->rt_dst = fl4->daddr;
2404
rth->rt_src = fl4->saddr;
2405
rth->rt_route_iif = 0;
2406
rth->rt_iif = orig_oif ? : dev_out->ifindex;
2407
rth->rt_oif = orig_oif;
2408
rth->rt_mark = fl4->flowi4_mark;
2409
rth->rt_gateway = fl4->daddr;
2410
rth->rt_spec_dst= fl4->saddr;
2411
rth->rt_peer_genid = 0;
2429
2415
RT_CACHE_STAT_INC(out_slow_tot);
2431
2417
if (flags & RTCF_LOCAL) {
2432
2418
rth->dst.input = ip_local_deliver;
2433
rth->rt_spec_dst = fl->fl4_dst;
2419
rth->rt_spec_dst = fl4->daddr;
2435
2421
if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
2436
rth->rt_spec_dst = fl->fl4_src;
2422
rth->rt_spec_dst = fl4->saddr;
2437
2423
if (flags & RTCF_LOCAL &&
2438
2424
!(dev_out->flags & IFF_LOOPBACK)) {
2439
2425
rth->dst.output = ip_mc_output;
2440
2426
RT_CACHE_STAT_INC(out_slow_mc);
2442
2428
#ifdef CONFIG_IP_MROUTE
2443
if (res->type == RTN_MULTICAST) {
2429
if (type == RTN_MULTICAST) {
2444
2430
if (IN_DEV_MFORWARD(in_dev) &&
2445
!ipv4_is_local_multicast(oldflp->fl4_dst)) {
2431
!ipv4_is_local_multicast(fl4->daddr)) {
2446
2432
rth->dst.input = ip_mr_input;
2447
2433
rth->dst.output = ip_mc_output;
2482
2446
* called with rcu_read_lock();
2485
static int ip_route_output_slow(struct net *net, struct rtable **rp,
2486
const struct flowi *oldflp)
2449
static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4)
2488
u32 tos = RT_FL_TOS(oldflp);
2489
struct flowi fl = { .fl4_dst = oldflp->fl4_dst,
2490
.fl4_src = oldflp->fl4_src,
2491
.fl4_tos = tos & IPTOS_RT_MASK,
2492
.fl4_scope = ((tos & RTO_ONLINK) ?
2493
RT_SCOPE_LINK : RT_SCOPE_UNIVERSE),
2494
.mark = oldflp->mark,
2495
.iif = net->loopback_dev->ifindex,
2496
.oif = oldflp->oif };
2497
struct fib_result res;
2498
unsigned int flags = 0;
2499
2451
struct net_device *dev_out = NULL;
2452
u32 tos = RT_FL_TOS(fl4);
2453
unsigned int flags = 0;
2454
struct fib_result res;
2504
2461
#ifdef CONFIG_IP_MULTIPLE_TABLES
2508
if (oldflp->fl4_src) {
2510
if (ipv4_is_multicast(oldflp->fl4_src) ||
2511
ipv4_is_lbcast(oldflp->fl4_src) ||
2512
ipv4_is_zeronet(oldflp->fl4_src))
2465
orig_daddr = fl4->daddr;
2466
orig_saddr = fl4->saddr;
2467
orig_oif = fl4->flowi4_oif;
2469
fl4->flowi4_iif = net->loopback_dev->ifindex;
2470
fl4->flowi4_tos = tos & IPTOS_RT_MASK;
2471
fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
2472
RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
2476
rth = ERR_PTR(-EINVAL);
2477
if (ipv4_is_multicast(fl4->saddr) ||
2478
ipv4_is_lbcast(fl4->saddr) ||
2479
ipv4_is_zeronet(fl4->saddr))
2515
2482
/* I removed check for oif == dev_out->oif here.
2543
2510
Luckily, this hack is good workaround.
2546
fl.oif = dev_out->ifindex;
2513
fl4->flowi4_oif = dev_out->ifindex;
2547
2514
goto make_route;
2550
if (!(oldflp->flags & FLOWI_FLAG_ANYSRC)) {
2517
if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
2551
2518
/* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
2552
if (!__ip_dev_find(net, oldflp->fl4_src, false))
2519
if (!__ip_dev_find(net, fl4->saddr, false))
2559
dev_out = dev_get_by_index_rcu(net, oldflp->oif);
2525
if (fl4->flowi4_oif) {
2526
dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
2527
rth = ERR_PTR(-ENODEV);
2561
2528
if (dev_out == NULL)
2564
2531
/* RACE: Check return value of inet_select_addr instead. */
2565
2532
if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
2533
rth = ERR_PTR(-ENETUNREACH);
2569
if (ipv4_is_local_multicast(oldflp->fl4_dst) ||
2570
ipv4_is_lbcast(oldflp->fl4_dst)) {
2572
fl.fl4_src = inet_select_addr(dev_out, 0,
2536
if (ipv4_is_local_multicast(fl4->daddr) ||
2537
ipv4_is_lbcast(fl4->daddr)) {
2539
fl4->saddr = inet_select_addr(dev_out, 0,
2573
2540
RT_SCOPE_LINK);
2574
2541
goto make_route;
2577
if (ipv4_is_multicast(oldflp->fl4_dst))
2578
fl.fl4_src = inet_select_addr(dev_out, 0,
2580
else if (!oldflp->fl4_dst)
2581
fl.fl4_src = inet_select_addr(dev_out, 0,
2544
if (ipv4_is_multicast(fl4->daddr))
2545
fl4->saddr = inet_select_addr(dev_out, 0,
2547
else if (!fl4->daddr)
2548
fl4->saddr = inet_select_addr(dev_out, 0,
2582
2549
RT_SCOPE_HOST);
2587
fl.fl4_dst = fl.fl4_src;
2589
fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK);
2554
fl4->daddr = fl4->saddr;
2556
fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
2590
2557
dev_out = net->loopback_dev;
2591
fl.oif = net->loopback_dev->ifindex;
2558
fl4->flowi4_oif = net->loopback_dev->ifindex;
2592
2559
res.type = RTN_LOCAL;
2593
2560
flags |= RTCF_LOCAL;
2594
2561
goto make_route;
2597
if (fib_lookup(net, &fl, &res)) {
2564
if (fib_lookup(net, fl4, &res)) {
2566
if (fl4->flowi4_oif) {
2600
2567
/* Apparently, routing tables are wrong. Assume,
2601
2568
that the destination is on link.
2615
2582
likely IPv6, but we do not.
2618
if (fl.fl4_src == 0)
2619
fl.fl4_src = inet_select_addr(dev_out, 0,
2585
if (fl4->saddr == 0)
2586
fl4->saddr = inet_select_addr(dev_out, 0,
2620
2587
RT_SCOPE_LINK);
2621
2588
res.type = RTN_UNICAST;
2622
2589
goto make_route;
2591
rth = ERR_PTR(-ENETUNREACH);
2628
2595
if (res.type == RTN_LOCAL) {
2630
2597
if (res.fi->fib_prefsrc)
2631
fl.fl4_src = res.fi->fib_prefsrc;
2598
fl4->saddr = res.fi->fib_prefsrc;
2633
fl.fl4_src = fl.fl4_dst;
2600
fl4->saddr = fl4->daddr;
2635
2602
dev_out = net->loopback_dev;
2636
fl.oif = dev_out->ifindex;
2603
fl4->flowi4_oif = dev_out->ifindex;
2638
2605
flags |= RTCF_LOCAL;
2639
2606
goto make_route;
2642
2609
#ifdef CONFIG_IP_ROUTE_MULTIPATH
2643
if (res.fi->fib_nhs > 1 && fl.oif == 0)
2644
fib_select_multipath(&fl, &res);
2610
if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0)
2611
fib_select_multipath(&res);
2647
if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif)
2648
fib_select_default(net, &fl, &res);
2614
if (!res.prefixlen &&
2615
res.table->tb_num_default > 1 &&
2616
res.type == RTN_UNICAST && !fl4->flowi4_oif)
2617
fib_select_default(&res);
2651
fl.fl4_src = FIB_RES_PREFSRC(res);
2620
fl4->saddr = FIB_RES_PREFSRC(net, res);
2653
2622
dev_out = FIB_RES_DEV(res);
2654
fl.oif = dev_out->ifindex;
2623
fl4->flowi4_oif = dev_out->ifindex;
2658
err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags);
2627
rth = __mkroute_output(&res, fl4, orig_daddr, orig_saddr, orig_oif,
2632
hash = rt_hash(orig_daddr, orig_saddr, orig_oif,
2633
rt_genid(dev_net(dev_out)));
2634
rth = rt_intern_hash(hash, rth, NULL, orig_oif);
2663
int __ip_route_output_key(struct net *net, struct rtable **rp,
2664
const struct flowi *flp)
2642
struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp4)
2666
2645
unsigned int hash;
2670
2647
if (!rt_caching(net))
2671
2648
goto slow_output;
2673
hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net));
2650
hash = rt_hash(flp4->daddr, flp4->saddr, flp4->flowi4_oif, rt_genid(net));
2675
2652
rcu_read_lock_bh();
2676
2653
for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth;
2677
2654
rth = rcu_dereference_bh(rth->dst.rt_next)) {
2678
if (rth->fl.fl4_dst == flp->fl4_dst &&
2679
rth->fl.fl4_src == flp->fl4_src &&
2655
if (rth->rt_key_dst == flp4->daddr &&
2656
rth->rt_key_src == flp4->saddr &&
2680
2657
rt_is_output_route(rth) &&
2681
rth->fl.oif == flp->oif &&
2682
rth->fl.mark == flp->mark &&
2683
!((rth->fl.fl4_tos ^ flp->fl4_tos) &
2658
rth->rt_oif == flp4->flowi4_oif &&
2659
rth->rt_mark == flp4->flowi4_mark &&
2660
!((rth->rt_key_tos ^ flp4->flowi4_tos) &
2684
2661
(IPTOS_RT_MASK | RTO_ONLINK)) &&
2685
2662
net_eq(dev_net(rth->dst.dev), net) &&
2686
2663
!rt_is_expired(rth)) {
2687
2664
dst_use(&rth->dst, jiffies);
2688
2665
RT_CACHE_STAT_INC(out_hit);
2689
2666
rcu_read_unlock_bh();
2668
flp4->saddr = rth->rt_src;
2670
flp4->daddr = rth->rt_dst;
2693
2673
RT_CACHE_STAT_INC(out_hlist_search);
2695
2675
rcu_read_unlock_bh();
2699
res = ip_route_output_slow(net, rp, flp);
2678
return ip_route_output_slow(net, flp4);
2703
2680
EXPORT_SYMBOL_GPL(__ip_route_output_key);
2747
2728
dev_hold(new->dev);
2730
rt->rt_key_dst = ort->rt_key_dst;
2731
rt->rt_key_src = ort->rt_key_src;
2732
rt->rt_key_tos = ort->rt_key_tos;
2733
rt->rt_route_iif = ort->rt_route_iif;
2734
rt->rt_iif = ort->rt_iif;
2735
rt->rt_oif = ort->rt_oif;
2736
rt->rt_mark = ort->rt_mark;
2751
2738
rt->rt_genid = rt_genid(net);
2752
2739
rt->rt_flags = ort->rt_flags;
2753
2740
rt->rt_type = ort->rt_type;
2754
2741
rt->rt_dst = ort->rt_dst;
2755
2742
rt->rt_src = ort->rt_src;
2756
rt->rt_iif = ort->rt_iif;
2757
2743
rt->rt_gateway = ort->rt_gateway;
2758
2744
rt->rt_spec_dst = ort->rt_spec_dst;
2759
2745
rt->peer = ort->peer;
2761
2747
atomic_inc(&rt->peer->refcnt);
2750
atomic_inc(&rt->fi->fib_clntref);
2766
dst_release(&(*rp)->dst);
2768
return rt ? 0 : -ENOMEM;
2755
dst_release(dst_orig);
2757
return rt ? &rt->dst : ERR_PTR(-ENOMEM);
2771
int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp,
2772
struct sock *sk, int flags)
2760
struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
2776
if ((err = __ip_route_output_key(net, rp, flp)) != 0)
2781
flp->fl4_src = (*rp)->rt_src;
2783
flp->fl4_dst = (*rp)->rt_dst;
2784
err = __xfrm_lookup(net, (struct dst_entry **)rp, flp, sk,
2785
flags ? XFRM_LOOKUP_WAIT : 0);
2786
if (err == -EREMOTE)
2787
err = ipv4_dst_blackhole(net, rp, flp);
2763
struct rtable *rt = __ip_route_output_key(net, flp4);
2768
if (flp4->flowi4_proto)
2769
rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
2770
flowi4_to_flowi(flp4),
2794
2775
EXPORT_SYMBOL_GPL(ip_route_output_flow);
2796
int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp)
2798
return ip_route_output_flow(net, rp, flp, NULL, 0);
2800
EXPORT_SYMBOL(ip_route_output_key);
2802
2777
static int rt_fill_info(struct net *net,
2803
2778
struct sk_buff *skb, u32 pid, u32 seq, int event,
2804
2779
int nowait, unsigned int flags)