~ubuntu-branches/ubuntu/precise/linux-ti-omap4/precise

« back to all changes in this revision

Viewing changes to net/ipv4/route.c

  • Committer: Bazaar Package Importer
  • Author(s): Paolo Pisati
  • Date: 2011-06-29 15:23:51 UTC
  • mfrom: (26.1.1 natty-proposed)
  • Revision ID: james.westby@ubuntu.com-20110629152351-xs96tm303d95rpbk
Tags: 3.0.0-1200.2
* Rebased against 3.0.0-6.7
* BSP from TI based on 3.0.0

Show diffs side-by-side

added added

removed removed

Lines of Context:
109
109
#include <linux/sysctl.h>
110
110
#endif
111
111
 
112
 
#define RT_FL_TOS(oldflp) \
113
 
    ((u32)(oldflp->fl4_tos & (IPTOS_RT_MASK | RTO_ONLINK)))
 
112
#define RT_FL_TOS(oldflp4) \
 
113
    ((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK)))
114
114
 
115
115
#define IP_MAX_MTU      0xFFF0
116
116
 
131
131
static int ip_rt_min_advmss __read_mostly       = 256;
132
132
static int rt_chain_length_max __read_mostly    = 20;
133
133
 
134
 
static struct delayed_work expires_work;
135
 
static unsigned long expires_ljiffies;
136
 
 
137
134
/*
138
135
 *      Interface to generic destination cache.
139
136
 */
152
149
{
153
150
}
154
151
 
 
152
static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
 
153
{
 
154
        struct rtable *rt = (struct rtable *) dst;
 
155
        struct inet_peer *peer;
 
156
        u32 *p = NULL;
 
157
 
 
158
        if (!rt->peer)
 
159
                rt_bind_peer(rt, rt->rt_dst, 1);
 
160
 
 
161
        peer = rt->peer;
 
162
        if (peer) {
 
163
                u32 *old_p = __DST_METRICS_PTR(old);
 
164
                unsigned long prev, new;
 
165
 
 
166
                p = peer->metrics;
 
167
                if (inet_metrics_new(peer))
 
168
                        memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
 
169
 
 
170
                new = (unsigned long) p;
 
171
                prev = cmpxchg(&dst->_metrics, old, new);
 
172
 
 
173
                if (prev != old) {
 
174
                        p = __DST_METRICS_PTR(prev);
 
175
                        if (prev & DST_METRICS_READ_ONLY)
 
176
                                p = NULL;
 
177
                } else {
 
178
                        if (rt->fi) {
 
179
                                fib_info_put(rt->fi);
 
180
                                rt->fi = NULL;
 
181
                        }
 
182
                }
 
183
        }
 
184
        return p;
 
185
}
 
186
 
155
187
static struct dst_ops ipv4_dst_ops = {
156
188
        .family =               AF_INET,
157
189
        .protocol =             cpu_to_be16(ETH_P_IP),
159
191
        .check =                ipv4_dst_check,
160
192
        .default_advmss =       ipv4_default_advmss,
161
193
        .default_mtu =          ipv4_default_mtu,
 
194
        .cow_metrics =          ipv4_cow_metrics,
162
195
        .destroy =              ipv4_dst_destroy,
163
196
        .ifdown =               ipv4_dst_ifdown,
164
197
        .negative_advice =      ipv4_negative_advice,
391
424
                        dst_metric(&r->dst, RTAX_WINDOW),
392
425
                        (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) +
393
426
                              dst_metric(&r->dst, RTAX_RTTVAR)),
394
 
                        r->fl.fl4_tos,
 
427
                        r->rt_key_tos,
395
428
                        r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1,
396
429
                        r->dst.hh ? (r->dst.hh->hh_output ==
397
430
                                       dev_queue_xmit) : 0,
514
547
        .release = seq_release,
515
548
};
516
549
 
517
 
#ifdef CONFIG_NET_CLS_ROUTE
 
550
#ifdef CONFIG_IP_ROUTE_CLASSID
518
551
static int rt_acct_proc_show(struct seq_file *m, void *v)
519
552
{
520
553
        struct ip_rt_acct *dst, *src;
567
600
        if (!pde)
568
601
                goto err2;
569
602
 
570
 
#ifdef CONFIG_NET_CLS_ROUTE
 
603
#ifdef CONFIG_IP_ROUTE_CLASSID
571
604
        pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops);
572
605
        if (!pde)
573
606
                goto err3;
574
607
#endif
575
608
        return 0;
576
609
 
577
 
#ifdef CONFIG_NET_CLS_ROUTE
 
610
#ifdef CONFIG_IP_ROUTE_CLASSID
578
611
err3:
579
612
        remove_proc_entry("rt_cache", net->proc_net_stat);
580
613
#endif
588
621
{
589
622
        remove_proc_entry("rt_cache", net->proc_net_stat);
590
623
        remove_proc_entry("rt_cache", net->proc_net);
591
 
#ifdef CONFIG_NET_CLS_ROUTE
 
624
#ifdef CONFIG_IP_ROUTE_CLASSID
592
625
        remove_proc_entry("rt_acct", net->proc_net);
593
626
#endif
594
627
}
632
665
static inline int rt_valuable(struct rtable *rth)
633
666
{
634
667
        return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) ||
635
 
                rth->dst.expires;
 
668
                (rth->peer && rth->peer->pmtu_expires);
636
669
}
637
670
 
638
671
static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2)
643
676
        if (atomic_read(&rth->dst.__refcnt))
644
677
                goto out;
645
678
 
646
 
        ret = 1;
647
 
        if (rth->dst.expires &&
648
 
            time_after_eq(jiffies, rth->dst.expires))
649
 
                goto out;
650
 
 
651
679
        age = jiffies - rth->dst.lastuse;
652
 
        ret = 0;
653
680
        if ((age <= tmo1 && !rt_fast_clean(rth)) ||
654
681
            (age <= tmo2 && rt_valuable(rth)))
655
682
                goto out;
684
711
                net->ipv4.sysctl_rt_cache_rebuild_count;
685
712
}
686
713
 
687
 
static inline bool compare_hash_inputs(const struct flowi *fl1,
688
 
                                        const struct flowi *fl2)
 
714
static inline bool compare_hash_inputs(const struct rtable *rt1,
 
715
                                       const struct rtable *rt2)
689
716
{
690
 
        return ((((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) |
691
 
                ((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) |
692
 
                (fl1->iif ^ fl2->iif)) == 0);
 
717
        return ((((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) |
 
718
                ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) |
 
719
                (rt1->rt_iif ^ rt2->rt_iif)) == 0);
693
720
}
694
721
 
695
 
static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
 
722
static inline int compare_keys(struct rtable *rt1, struct rtable *rt2)
696
723
{
697
 
        return (((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) |
698
 
                ((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) |
699
 
                (fl1->mark ^ fl2->mark) |
700
 
                (*(u16 *)&fl1->fl4_tos ^ *(u16 *)&fl2->fl4_tos) |
701
 
                (fl1->oif ^ fl2->oif) |
702
 
                (fl1->iif ^ fl2->iif)) == 0;
 
724
        return (((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) |
 
725
                ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) |
 
726
                (rt1->rt_mark ^ rt2->rt_mark) |
 
727
                (rt1->rt_key_tos ^ rt2->rt_key_tos) |
 
728
                (rt1->rt_oif ^ rt2->rt_oif) |
 
729
                (rt1->rt_iif ^ rt2->rt_iif)) == 0;
703
730
}
704
731
 
705
732
static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
786
813
        const struct rtable *aux = head;
787
814
 
788
815
        while (aux != rth) {
789
 
                if (compare_hash_inputs(&aux->fl, &rth->fl))
 
816
                if (compare_hash_inputs(aux, rth))
790
817
                        return 0;
791
818
                aux = rcu_dereference_protected(aux->dst.rt_next, 1);
792
819
        }
793
820
        return ONE;
794
821
}
795
822
 
796
 
static void rt_check_expire(void)
797
 
{
798
 
        static unsigned int rover;
799
 
        unsigned int i = rover, goal;
800
 
        struct rtable *rth;
801
 
        struct rtable __rcu **rthp;
802
 
        unsigned long samples = 0;
803
 
        unsigned long sum = 0, sum2 = 0;
804
 
        unsigned long delta;
805
 
        u64 mult;
806
 
 
807
 
        delta = jiffies - expires_ljiffies;
808
 
        expires_ljiffies = jiffies;
809
 
        mult = ((u64)delta) << rt_hash_log;
810
 
        if (ip_rt_gc_timeout > 1)
811
 
                do_div(mult, ip_rt_gc_timeout);
812
 
        goal = (unsigned int)mult;
813
 
        if (goal > rt_hash_mask)
814
 
                goal = rt_hash_mask + 1;
815
 
        for (; goal > 0; goal--) {
816
 
                unsigned long tmo = ip_rt_gc_timeout;
817
 
                unsigned long length;
818
 
 
819
 
                i = (i + 1) & rt_hash_mask;
820
 
                rthp = &rt_hash_table[i].chain;
821
 
 
822
 
                if (need_resched())
823
 
                        cond_resched();
824
 
 
825
 
                samples++;
826
 
 
827
 
                if (rcu_dereference_raw(*rthp) == NULL)
828
 
                        continue;
829
 
                length = 0;
830
 
                spin_lock_bh(rt_hash_lock_addr(i));
831
 
                while ((rth = rcu_dereference_protected(*rthp,
832
 
                                        lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) {
833
 
                        prefetch(rth->dst.rt_next);
834
 
                        if (rt_is_expired(rth)) {
835
 
                                *rthp = rth->dst.rt_next;
836
 
                                rt_free(rth);
837
 
                                continue;
838
 
                        }
839
 
                        if (rth->dst.expires) {
840
 
                                /* Entry is expired even if it is in use */
841
 
                                if (time_before_eq(jiffies, rth->dst.expires)) {
842
 
nofree:
843
 
                                        tmo >>= 1;
844
 
                                        rthp = &rth->dst.rt_next;
845
 
                                        /*
846
 
                                         * We only count entries on
847
 
                                         * a chain with equal hash inputs once
848
 
                                         * so that entries for different QOS
849
 
                                         * levels, and other non-hash input
850
 
                                         * attributes don't unfairly skew
851
 
                                         * the length computation
852
 
                                         */
853
 
                                        length += has_noalias(rt_hash_table[i].chain, rth);
854
 
                                        continue;
855
 
                                }
856
 
                        } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout))
857
 
                                goto nofree;
858
 
 
859
 
                        /* Cleanup aged off entries. */
860
 
                        *rthp = rth->dst.rt_next;
861
 
                        rt_free(rth);
862
 
                }
863
 
                spin_unlock_bh(rt_hash_lock_addr(i));
864
 
                sum += length;
865
 
                sum2 += length*length;
866
 
        }
867
 
        if (samples) {
868
 
                unsigned long avg = sum / samples;
869
 
                unsigned long sd = int_sqrt(sum2 / samples - avg*avg);
870
 
                rt_chain_length_max = max_t(unsigned long,
871
 
                                        ip_rt_gc_elasticity,
872
 
                                        (avg + 4*sd) >> FRACT_BITS);
873
 
        }
874
 
        rover = i;
875
 
}
876
 
 
877
 
/*
878
 
 * rt_worker_func() is run in process context.
879
 
 * we call rt_check_expire() to scan part of the hash table
880
 
 */
881
 
static void rt_worker_func(struct work_struct *work)
882
 
{
883
 
        rt_check_expire();
884
 
        schedule_delayed_work(&expires_work, ip_rt_gc_interval);
885
 
}
886
 
 
887
 
/*
888
 
 * Pertubation of rt_genid by a small quantity [1..256]
 
823
/*
 
824
 * Perturbation of rt_genid by a small quantity [1..256]
889
825
 * Using 8 bits of shuffling ensure we can call rt_cache_invalidate()
890
826
 * many times (2^24) without giving recent rt_genid.
891
827
 * Jenkins hash is strong enough that litle changes of rt_genid are OK.
1032
968
                        break;
1033
969
 
1034
970
                expire >>= 1;
1035
 
#if RT_CACHE_DEBUG >= 2
1036
 
                printk(KERN_DEBUG "expire>> %u %d %d %d\n", expire,
1037
 
                                dst_entries_get_fast(&ipv4_dst_ops), goal, i);
1038
 
#endif
1039
971
 
1040
972
                if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size)
1041
973
                        goto out;
1056
988
            dst_entries_get_fast(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh ||
1057
989
            dst_entries_get_slow(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh)
1058
990
                expire = ip_rt_gc_timeout;
1059
 
#if RT_CACHE_DEBUG >= 2
1060
 
        printk(KERN_DEBUG "expire++ %u %d %d %d\n", expire,
1061
 
                        dst_entries_get_fast(&ipv4_dst_ops), goal, rover);
1062
 
#endif
1063
991
out:    return 0;
1064
992
}
1065
993
 
1078
1006
        return length >> FRACT_BITS;
1079
1007
}
1080
1008
 
1081
 
static int rt_intern_hash(unsigned hash, struct rtable *rt,
1082
 
                          struct rtable **rp, struct sk_buff *skb, int ifindex)
 
1009
static struct rtable *rt_intern_hash(unsigned hash, struct rtable *rt,
 
1010
                                     struct sk_buff *skb, int ifindex)
1083
1011
{
1084
1012
        struct rtable   *rth, *cand;
1085
1013
        struct rtable __rcu **rthp, **candp;
1120
1048
                                        printk(KERN_WARNING
1121
1049
                                            "Neighbour table failure & not caching routes.\n");
1122
1050
                                ip_rt_put(rt);
1123
 
                                return err;
 
1051
                                return ERR_PTR(err);
1124
1052
                        }
1125
1053
                }
1126
1054
 
1137
1065
                        rt_free(rth);
1138
1066
                        continue;
1139
1067
                }
1140
 
                if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) {
 
1068
                if (compare_keys(rth, rt) && compare_netns(rth, rt)) {
1141
1069
                        /* Put it first */
1142
1070
                        *rthp = rth->dst.rt_next;
1143
1071
                        /*
1157
1085
                        spin_unlock_bh(rt_hash_lock_addr(hash));
1158
1086
 
1159
1087
                        rt_drop(rt);
1160
 
                        if (rp)
1161
 
                                *rp = rth;
1162
 
                        else
 
1088
                        if (skb)
1163
1089
                                skb_dst_set(skb, &rth->dst);
1164
 
                        return 0;
 
1090
                        return rth;
1165
1091
                }
1166
1092
 
1167
1093
                if (!atomic_read(&rth->dst.__refcnt)) {
1202
1128
                        rt_emergency_hash_rebuild(net);
1203
1129
                        spin_unlock_bh(rt_hash_lock_addr(hash));
1204
1130
 
1205
 
                        hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src,
 
1131
                        hash = rt_hash(rt->rt_key_dst, rt->rt_key_src,
1206
1132
                                        ifindex, rt_genid(net));
1207
1133
                        goto restart;
1208
1134
                }
1218
1144
 
1219
1145
                        if (err != -ENOBUFS) {
1220
1146
                                rt_drop(rt);
1221
 
                                return err;
 
1147
                                return ERR_PTR(err);
1222
1148
                        }
1223
1149
 
1224
1150
                        /* Neighbour tables are full and nothing
1239
1165
                        if (net_ratelimit())
1240
1166
                                printk(KERN_WARNING "ipv4: Neighbour table overflow.\n");
1241
1167
                        rt_drop(rt);
1242
 
                        return -ENOBUFS;
 
1168
                        return ERR_PTR(-ENOBUFS);
1243
1169
                }
1244
1170
        }
1245
1171
 
1246
1172
        rt->dst.rt_next = rt_hash_table[hash].chain;
1247
1173
 
1248
 
#if RT_CACHE_DEBUG >= 2
1249
 
        if (rt->dst.rt_next) {
1250
 
                struct rtable *trt;
1251
 
                printk(KERN_DEBUG "rt_cache @%02x: %pI4",
1252
 
                       hash, &rt->rt_dst);
1253
 
                for (trt = rt->dst.rt_next; trt; trt = trt->dst.rt_next)
1254
 
                        printk(" . %pI4", &trt->rt_dst);
1255
 
                printk("\n");
1256
 
        }
1257
 
#endif
1258
1174
        /*
1259
1175
         * Since lookup is lockfree, we must make sure
1260
 
         * previous writes to rt are comitted to memory
 
1176
         * previous writes to rt are committed to memory
1261
1177
         * before making rt visible to other CPUS.
1262
1178
         */
1263
1179
        rcu_assign_pointer(rt_hash_table[hash].chain, rt);
1265
1181
        spin_unlock_bh(rt_hash_lock_addr(hash));
1266
1182
 
1267
1183
skip_hashing:
1268
 
        if (rp)
1269
 
                *rp = rt;
1270
 
        else
 
1184
        if (skb)
1271
1185
                skb_dst_set(skb, &rt->dst);
1272
 
        return 0;
1273
 
}
1274
 
 
1275
 
void rt_bind_peer(struct rtable *rt, int create)
 
1186
        return rt;
 
1187
}
 
1188
 
 
1189
static atomic_t __rt_peer_genid = ATOMIC_INIT(0);
 
1190
 
 
1191
static u32 rt_peer_genid(void)
 
1192
{
 
1193
        return atomic_read(&__rt_peer_genid);
 
1194
}
 
1195
 
 
1196
void rt_bind_peer(struct rtable *rt, __be32 daddr, int create)
1276
1197
{
1277
1198
        struct inet_peer *peer;
1278
1199
 
1279
 
        peer = inet_getpeer_v4(rt->rt_dst, create);
 
1200
        peer = inet_getpeer_v4(daddr, create);
1280
1201
 
1281
1202
        if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL)
1282
1203
                inet_putpeer(peer);
 
1204
        else
 
1205
                rt->rt_peer_genid = rt_peer_genid();
1283
1206
}
1284
1207
 
1285
1208
/*
1308
1231
 
1309
1232
        if (rt) {
1310
1233
                if (rt->peer == NULL)
1311
 
                        rt_bind_peer(rt, 1);
 
1234
                        rt_bind_peer(rt, rt->rt_dst, 1);
1312
1235
 
1313
1236
                /* If peer is attached to destination, it is never detached,
1314
1237
                   so that we need not to grab a lock to dereference it.
1349
1272
void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1350
1273
                    __be32 saddr, struct net_device *dev)
1351
1274
{
1352
 
        int i, k;
1353
1275
        struct in_device *in_dev = __in_dev_get_rcu(dev);
1354
 
        struct rtable *rth;
1355
 
        struct rtable __rcu **rthp;
1356
 
        __be32  skeys[2] = { saddr, 0 };
1357
 
        int  ikeys[2] = { dev->ifindex, 0 };
1358
 
        struct netevent_redirect netevent;
 
1276
        struct inet_peer *peer;
1359
1277
        struct net *net;
1360
1278
 
1361
1279
        if (!in_dev)
1367
1285
            ipv4_is_zeronet(new_gw))
1368
1286
                goto reject_redirect;
1369
1287
 
1370
 
        if (!rt_caching(net))
1371
 
                goto reject_redirect;
1372
 
 
1373
1288
        if (!IN_DEV_SHARED_MEDIA(in_dev)) {
1374
1289
                if (!inet_addr_onlink(in_dev, new_gw, old_gw))
1375
1290
                        goto reject_redirect;
1380
1295
                        goto reject_redirect;
1381
1296
        }
1382
1297
 
1383
 
        for (i = 0; i < 2; i++) {
1384
 
                for (k = 0; k < 2; k++) {
1385
 
                        unsigned hash = rt_hash(daddr, skeys[i], ikeys[k],
1386
 
                                                rt_genid(net));
1387
 
 
1388
 
                        rthp = &rt_hash_table[hash].chain;
1389
 
 
1390
 
                        while ((rth = rcu_dereference(*rthp)) != NULL) {
1391
 
                                struct rtable *rt;
1392
 
 
1393
 
                                if (rth->fl.fl4_dst != daddr ||
1394
 
                                    rth->fl.fl4_src != skeys[i] ||
1395
 
                                    rth->fl.oif != ikeys[k] ||
1396
 
                                    rt_is_input_route(rth) ||
1397
 
                                    rt_is_expired(rth) ||
1398
 
                                    !net_eq(dev_net(rth->dst.dev), net)) {
1399
 
                                        rthp = &rth->dst.rt_next;
1400
 
                                        continue;
1401
 
                                }
1402
 
 
1403
 
                                if (rth->rt_dst != daddr ||
1404
 
                                    rth->rt_src != saddr ||
1405
 
                                    rth->dst.error ||
1406
 
                                    rth->rt_gateway != old_gw ||
1407
 
                                    rth->dst.dev != dev)
1408
 
                                        break;
1409
 
 
1410
 
                                dst_hold(&rth->dst);
1411
 
 
1412
 
                                rt = dst_alloc(&ipv4_dst_ops);
1413
 
                                if (rt == NULL) {
1414
 
                                        ip_rt_put(rth);
1415
 
                                        return;
1416
 
                                }
1417
 
 
1418
 
                                /* Copy all the information. */
1419
 
                                *rt = *rth;
1420
 
                                rt->dst.__use           = 1;
1421
 
                                atomic_set(&rt->dst.__refcnt, 1);
1422
 
                                rt->dst.child           = NULL;
1423
 
                                if (rt->dst.dev)
1424
 
                                        dev_hold(rt->dst.dev);
1425
 
                                rt->dst.obsolete        = -1;
1426
 
                                rt->dst.lastuse = jiffies;
1427
 
                                rt->dst.path            = &rt->dst;
1428
 
                                rt->dst.neighbour       = NULL;
1429
 
                                rt->dst.hh              = NULL;
1430
 
#ifdef CONFIG_XFRM
1431
 
                                rt->dst.xfrm            = NULL;
1432
 
#endif
1433
 
                                rt->rt_genid            = rt_genid(net);
1434
 
                                rt->rt_flags            |= RTCF_REDIRECTED;
1435
 
 
1436
 
                                /* Gateway is different ... */
1437
 
                                rt->rt_gateway          = new_gw;
1438
 
 
1439
 
                                /* Redirect received -> path was valid */
1440
 
                                dst_confirm(&rth->dst);
1441
 
 
1442
 
                                if (rt->peer)
1443
 
                                        atomic_inc(&rt->peer->refcnt);
1444
 
 
1445
 
                                if (arp_bind_neighbour(&rt->dst) ||
1446
 
                                    !(rt->dst.neighbour->nud_state &
1447
 
                                            NUD_VALID)) {
1448
 
                                        if (rt->dst.neighbour)
1449
 
                                                neigh_event_send(rt->dst.neighbour, NULL);
1450
 
                                        ip_rt_put(rth);
1451
 
                                        rt_drop(rt);
1452
 
                                        goto do_next;
1453
 
                                }
1454
 
 
1455
 
                                netevent.old = &rth->dst;
1456
 
                                netevent.new = &rt->dst;
1457
 
                                call_netevent_notifiers(NETEVENT_REDIRECT,
1458
 
                                                        &netevent);
1459
 
 
1460
 
                                rt_del(hash, rth);
1461
 
                                if (!rt_intern_hash(hash, rt, &rt, NULL, rt->fl.oif))
1462
 
                                        ip_rt_put(rt);
1463
 
                                goto do_next;
1464
 
                        }
1465
 
                do_next:
1466
 
                        ;
1467
 
                }
 
1298
        peer = inet_getpeer_v4(daddr, 1);
 
1299
        if (peer) {
 
1300
                peer->redirect_learned.a4 = new_gw;
 
1301
 
 
1302
                inet_putpeer(peer);
 
1303
 
 
1304
                atomic_inc(&__rt_peer_genid);
1468
1305
        }
1469
1306
        return;
1470
1307
 
1479
1316
        ;
1480
1317
}
1481
1318
 
 
1319
static bool peer_pmtu_expired(struct inet_peer *peer)
 
1320
{
 
1321
        unsigned long orig = ACCESS_ONCE(peer->pmtu_expires);
 
1322
 
 
1323
        return orig &&
 
1324
               time_after_eq(jiffies, orig) &&
 
1325
               cmpxchg(&peer->pmtu_expires, orig, 0) == orig;
 
1326
}
 
1327
 
 
1328
static bool peer_pmtu_cleaned(struct inet_peer *peer)
 
1329
{
 
1330
        unsigned long orig = ACCESS_ONCE(peer->pmtu_expires);
 
1331
 
 
1332
        return orig &&
 
1333
               cmpxchg(&peer->pmtu_expires, orig, 0) == orig;
 
1334
}
 
1335
 
1482
1336
static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
1483
1337
{
1484
1338
        struct rtable *rt = (struct rtable *)dst;
1488
1342
                if (dst->obsolete > 0) {
1489
1343
                        ip_rt_put(rt);
1490
1344
                        ret = NULL;
1491
 
                } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
1492
 
                           (rt->dst.expires &&
1493
 
                            time_after_eq(jiffies, rt->dst.expires))) {
1494
 
                        unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src,
1495
 
                                                rt->fl.oif,
 
1345
                } else if (rt->rt_flags & RTCF_REDIRECTED) {
 
1346
                        unsigned hash = rt_hash(rt->rt_key_dst, rt->rt_key_src,
 
1347
                                                rt->rt_oif,
1496
1348
                                                rt_genid(dev_net(dst->dev)));
1497
 
#if RT_CACHE_DEBUG >= 1
1498
 
                        printk(KERN_DEBUG "ipv4_negative_advice: redirect to %pI4/%02x dropped\n",
1499
 
                                &rt->rt_dst, rt->fl.fl4_tos);
1500
 
#endif
1501
1349
                        rt_del(hash, rt);
1502
1350
                        ret = NULL;
 
1351
                } else if (rt->peer && peer_pmtu_expired(rt->peer)) {
 
1352
                        dst_metric_set(dst, RTAX_MTU, rt->peer->pmtu_orig);
1503
1353
                }
1504
1354
        }
1505
1355
        return ret;
1525
1375
{
1526
1376
        struct rtable *rt = skb_rtable(skb);
1527
1377
        struct in_device *in_dev;
 
1378
        struct inet_peer *peer;
1528
1379
        int log_martians;
1529
1380
 
1530
1381
        rcu_read_lock();
1536
1387
        log_martians = IN_DEV_LOG_MARTIANS(in_dev);
1537
1388
        rcu_read_unlock();
1538
1389
 
 
1390
        if (!rt->peer)
 
1391
                rt_bind_peer(rt, rt->rt_dst, 1);
 
1392
        peer = rt->peer;
 
1393
        if (!peer) {
 
1394
                icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
 
1395
                return;
 
1396
        }
 
1397
 
1539
1398
        /* No redirected packets during ip_rt_redirect_silence;
1540
1399
         * reset the algorithm.
1541
1400
         */
1542
 
        if (time_after(jiffies, rt->dst.rate_last + ip_rt_redirect_silence))
1543
 
                rt->dst.rate_tokens = 0;
 
1401
        if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence))
 
1402
                peer->rate_tokens = 0;
1544
1403
 
1545
1404
        /* Too many ignored redirects; do not send anything
1546
1405
         * set dst.rate_last to the last seen redirected packet.
1547
1406
         */
1548
 
        if (rt->dst.rate_tokens >= ip_rt_redirect_number) {
1549
 
                rt->dst.rate_last = jiffies;
 
1407
        if (peer->rate_tokens >= ip_rt_redirect_number) {
 
1408
                peer->rate_last = jiffies;
1550
1409
                return;
1551
1410
        }
1552
1411
 
1553
1412
        /* Check for load limit; set rate_last to the latest sent
1554
1413
         * redirect.
1555
1414
         */
1556
 
        if (rt->dst.rate_tokens == 0 ||
 
1415
        if (peer->rate_tokens == 0 ||
1557
1416
            time_after(jiffies,
1558
 
                       (rt->dst.rate_last +
1559
 
                        (ip_rt_redirect_load << rt->dst.rate_tokens)))) {
 
1417
                       (peer->rate_last +
 
1418
                        (ip_rt_redirect_load << peer->rate_tokens)))) {
1560
1419
                icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
1561
 
                rt->dst.rate_last = jiffies;
1562
 
                ++rt->dst.rate_tokens;
 
1420
                peer->rate_last = jiffies;
 
1421
                ++peer->rate_tokens;
1563
1422
#ifdef CONFIG_IP_ROUTE_VERBOSE
1564
1423
                if (log_martians &&
1565
 
                    rt->dst.rate_tokens == ip_rt_redirect_number &&
 
1424
                    peer->rate_tokens == ip_rt_redirect_number &&
1566
1425
                    net_ratelimit())
1567
1426
                        printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n",
1568
 
                                &rt->rt_src, rt->rt_iif,
 
1427
                               &ip_hdr(skb)->saddr, rt->rt_iif,
1569
1428
                                &rt->rt_dst, &rt->rt_gateway);
1570
1429
#endif
1571
1430
        }
1574
1433
static int ip_error(struct sk_buff *skb)
1575
1434
{
1576
1435
        struct rtable *rt = skb_rtable(skb);
 
1436
        struct inet_peer *peer;
1577
1437
        unsigned long now;
 
1438
        bool send;
1578
1439
        int code;
1579
1440
 
1580
1441
        switch (rt->dst.error) {
1594
1455
                        break;
1595
1456
        }
1596
1457
 
1597
 
        now = jiffies;
1598
 
        rt->dst.rate_tokens += now - rt->dst.rate_last;
1599
 
        if (rt->dst.rate_tokens > ip_rt_error_burst)
1600
 
                rt->dst.rate_tokens = ip_rt_error_burst;
1601
 
        rt->dst.rate_last = now;
1602
 
        if (rt->dst.rate_tokens >= ip_rt_error_cost) {
1603
 
                rt->dst.rate_tokens -= ip_rt_error_cost;
 
1458
        if (!rt->peer)
 
1459
                rt_bind_peer(rt, rt->rt_dst, 1);
 
1460
        peer = rt->peer;
 
1461
 
 
1462
        send = true;
 
1463
        if (peer) {
 
1464
                now = jiffies;
 
1465
                peer->rate_tokens += now - peer->rate_last;
 
1466
                if (peer->rate_tokens > ip_rt_error_burst)
 
1467
                        peer->rate_tokens = ip_rt_error_burst;
 
1468
                peer->rate_last = now;
 
1469
                if (peer->rate_tokens >= ip_rt_error_cost)
 
1470
                        peer->rate_tokens -= ip_rt_error_cost;
 
1471
                else
 
1472
                        send = false;
 
1473
        }
 
1474
        if (send)
1604
1475
                icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
1605
 
        }
1606
1476
 
1607
1477
out:    kfree_skb(skb);
1608
1478
        return 0;
1626
1496
        return 68;
1627
1497
}
1628
1498
 
1629
 
unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
 
1499
unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph,
1630
1500
                                 unsigned short new_mtu,
1631
1501
                                 struct net_device *dev)
1632
1502
{
1633
 
        int i, k;
1634
1503
        unsigned short old_mtu = ntohs(iph->tot_len);
1635
 
        struct rtable *rth;
1636
 
        int  ikeys[2] = { dev->ifindex, 0 };
1637
 
        __be32  skeys[2] = { iph->saddr, 0, };
1638
 
        __be32  daddr = iph->daddr;
1639
1504
        unsigned short est_mtu = 0;
1640
 
 
1641
 
        for (k = 0; k < 2; k++) {
1642
 
                for (i = 0; i < 2; i++) {
1643
 
                        unsigned hash = rt_hash(daddr, skeys[i], ikeys[k],
1644
 
                                                rt_genid(net));
1645
 
 
1646
 
                        rcu_read_lock();
1647
 
                        for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
1648
 
                             rth = rcu_dereference(rth->dst.rt_next)) {
1649
 
                                unsigned short mtu = new_mtu;
1650
 
 
1651
 
                                if (rth->fl.fl4_dst != daddr ||
1652
 
                                    rth->fl.fl4_src != skeys[i] ||
1653
 
                                    rth->rt_dst != daddr ||
1654
 
                                    rth->rt_src != iph->saddr ||
1655
 
                                    rth->fl.oif != ikeys[k] ||
1656
 
                                    rt_is_input_route(rth) ||
1657
 
                                    dst_metric_locked(&rth->dst, RTAX_MTU) ||
1658
 
                                    !net_eq(dev_net(rth->dst.dev), net) ||
1659
 
                                    rt_is_expired(rth))
1660
 
                                        continue;
1661
 
 
1662
 
                                if (new_mtu < 68 || new_mtu >= old_mtu) {
1663
 
 
1664
 
                                        /* BSD 4.2 compatibility hack :-( */
1665
 
                                        if (mtu == 0 &&
1666
 
                                            old_mtu >= dst_mtu(&rth->dst) &&
1667
 
                                            old_mtu >= 68 + (iph->ihl << 2))
1668
 
                                                old_mtu -= iph->ihl << 2;
1669
 
 
1670
 
                                        mtu = guess_mtu(old_mtu);
1671
 
                                }
1672
 
                                if (mtu <= dst_mtu(&rth->dst)) {
1673
 
                                        if (mtu < dst_mtu(&rth->dst)) {
1674
 
                                                dst_confirm(&rth->dst);
1675
 
                                                if (mtu < ip_rt_min_pmtu) {
1676
 
                                                        u32 lock = dst_metric(&rth->dst,
1677
 
                                                                              RTAX_LOCK);
1678
 
                                                        mtu = ip_rt_min_pmtu;
1679
 
                                                        lock |= (1 << RTAX_MTU);
1680
 
                                                        dst_metric_set(&rth->dst, RTAX_LOCK,
1681
 
                                                                       lock);
1682
 
                                                }
1683
 
                                                dst_metric_set(&rth->dst, RTAX_MTU, mtu);
1684
 
                                                dst_set_expires(&rth->dst,
1685
 
                                                        ip_rt_mtu_expires);
1686
 
                                        }
1687
 
                                        est_mtu = mtu;
1688
 
                                }
1689
 
                        }
1690
 
                        rcu_read_unlock();
1691
 
                }
 
1505
        struct inet_peer *peer;
 
1506
 
 
1507
        peer = inet_getpeer_v4(iph->daddr, 1);
 
1508
        if (peer) {
 
1509
                unsigned short mtu = new_mtu;
 
1510
 
 
1511
                if (new_mtu < 68 || new_mtu >= old_mtu) {
 
1512
                        /* BSD 4.2 derived systems incorrectly adjust
 
1513
                         * tot_len by the IP header length, and report
 
1514
                         * a zero MTU in the ICMP message.
 
1515
                         */
 
1516
                        if (mtu == 0 &&
 
1517
                            old_mtu >= 68 + (iph->ihl << 2))
 
1518
                                old_mtu -= iph->ihl << 2;
 
1519
                        mtu = guess_mtu(old_mtu);
 
1520
                }
 
1521
 
 
1522
                if (mtu < ip_rt_min_pmtu)
 
1523
                        mtu = ip_rt_min_pmtu;
 
1524
                if (!peer->pmtu_expires || mtu < peer->pmtu_learned) {
 
1525
                        unsigned long pmtu_expires;
 
1526
 
 
1527
                        pmtu_expires = jiffies + ip_rt_mtu_expires;
 
1528
                        if (!pmtu_expires)
 
1529
                                pmtu_expires = 1UL;
 
1530
 
 
1531
                        est_mtu = mtu;
 
1532
                        peer->pmtu_learned = mtu;
 
1533
                        peer->pmtu_expires = pmtu_expires;
 
1534
                }
 
1535
 
 
1536
                inet_putpeer(peer);
 
1537
 
 
1538
                atomic_inc(&__rt_peer_genid);
1692
1539
        }
1693
1540
        return est_mtu ? : new_mtu;
1694
1541
}
1695
1542
 
 
1543
static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer)
 
1544
{
 
1545
        unsigned long expires = ACCESS_ONCE(peer->pmtu_expires);
 
1546
 
 
1547
        if (!expires)
 
1548
                return;
 
1549
        if (time_before(jiffies, expires)) {
 
1550
                u32 orig_dst_mtu = dst_mtu(dst);
 
1551
                if (peer->pmtu_learned < orig_dst_mtu) {
 
1552
                        if (!peer->pmtu_orig)
 
1553
                                peer->pmtu_orig = dst_metric_raw(dst, RTAX_MTU);
 
1554
                        dst_metric_set(dst, RTAX_MTU, peer->pmtu_learned);
 
1555
                }
 
1556
        } else if (cmpxchg(&peer->pmtu_expires, expires, 0) == expires)
 
1557
                dst_metric_set(dst, RTAX_MTU, peer->pmtu_orig);
 
1558
}
 
1559
 
1696
1560
static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1697
1561
{
1698
 
        if (dst_mtu(dst) > mtu && mtu >= 68 &&
1699
 
            !(dst_metric_locked(dst, RTAX_MTU))) {
1700
 
                if (mtu < ip_rt_min_pmtu) {
1701
 
                        u32 lock = dst_metric(dst, RTAX_LOCK);
 
1562
        struct rtable *rt = (struct rtable *) dst;
 
1563
        struct inet_peer *peer;
 
1564
 
 
1565
        dst_confirm(dst);
 
1566
 
 
1567
        if (!rt->peer)
 
1568
                rt_bind_peer(rt, rt->rt_dst, 1);
 
1569
        peer = rt->peer;
 
1570
        if (peer) {
 
1571
                unsigned long pmtu_expires = ACCESS_ONCE(peer->pmtu_expires);
 
1572
 
 
1573
                if (mtu < ip_rt_min_pmtu)
1702
1574
                        mtu = ip_rt_min_pmtu;
1703
 
                        dst_metric_set(dst, RTAX_LOCK, lock | (1 << RTAX_MTU));
 
1575
                if (!pmtu_expires || mtu < peer->pmtu_learned) {
 
1576
 
 
1577
                        pmtu_expires = jiffies + ip_rt_mtu_expires;
 
1578
                        if (!pmtu_expires)
 
1579
                                pmtu_expires = 1UL;
 
1580
 
 
1581
                        peer->pmtu_learned = mtu;
 
1582
                        peer->pmtu_expires = pmtu_expires;
 
1583
 
 
1584
                        atomic_inc(&__rt_peer_genid);
 
1585
                        rt->rt_peer_genid = rt_peer_genid();
1704
1586
                }
1705
 
                dst_metric_set(dst, RTAX_MTU, mtu);
1706
 
                dst_set_expires(dst, ip_rt_mtu_expires);
1707
 
                call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
1708
 
        }
 
1587
                check_peer_pmtu(dst, peer);
 
1588
        }
 
1589
}
 
1590
 
 
1591
static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer)
 
1592
{
 
1593
        struct rtable *rt = (struct rtable *) dst;
 
1594
        __be32 orig_gw = rt->rt_gateway;
 
1595
 
 
1596
        dst_confirm(&rt->dst);
 
1597
 
 
1598
        neigh_release(rt->dst.neighbour);
 
1599
        rt->dst.neighbour = NULL;
 
1600
 
 
1601
        rt->rt_gateway = peer->redirect_learned.a4;
 
1602
        if (arp_bind_neighbour(&rt->dst) ||
 
1603
            !(rt->dst.neighbour->nud_state & NUD_VALID)) {
 
1604
                if (rt->dst.neighbour)
 
1605
                        neigh_event_send(rt->dst.neighbour, NULL);
 
1606
                rt->rt_gateway = orig_gw;
 
1607
                return -EAGAIN;
 
1608
        } else {
 
1609
                rt->rt_flags |= RTCF_REDIRECTED;
 
1610
                call_netevent_notifiers(NETEVENT_NEIGH_UPDATE,
 
1611
                                        rt->dst.neighbour);
 
1612
        }
 
1613
        return 0;
1709
1614
}
1710
1615
 
1711
1616
static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1712
1617
{
1713
 
        if (rt_is_expired((struct rtable *)dst))
 
1618
        struct rtable *rt = (struct rtable *) dst;
 
1619
 
 
1620
        if (rt_is_expired(rt))
1714
1621
                return NULL;
 
1622
        if (rt->rt_peer_genid != rt_peer_genid()) {
 
1623
                struct inet_peer *peer;
 
1624
 
 
1625
                if (!rt->peer)
 
1626
                        rt_bind_peer(rt, rt->rt_dst, 0);
 
1627
 
 
1628
                peer = rt->peer;
 
1629
                if (peer) {
 
1630
                        check_peer_pmtu(dst, peer);
 
1631
 
 
1632
                        if (peer->redirect_learned.a4 &&
 
1633
                            peer->redirect_learned.a4 != rt->rt_gateway) {
 
1634
                                if (check_peer_redir(dst, peer))
 
1635
                                        return NULL;
 
1636
                        }
 
1637
                }
 
1638
 
 
1639
                rt->rt_peer_genid = rt_peer_genid();
 
1640
        }
1715
1641
        return dst;
1716
1642
}
1717
1643
 
1720
1646
        struct rtable *rt = (struct rtable *) dst;
1721
1647
        struct inet_peer *peer = rt->peer;
1722
1648
 
 
1649
        if (rt->fi) {
 
1650
                fib_info_put(rt->fi);
 
1651
                rt->fi = NULL;
 
1652
        }
1723
1653
        if (peer) {
1724
1654
                rt->peer = NULL;
1725
1655
                inet_putpeer(peer);
1734
1664
        icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
1735
1665
 
1736
1666
        rt = skb_rtable(skb);
1737
 
        if (rt)
1738
 
                dst_set_expires(&rt->dst, 0);
 
1667
        if (rt && rt->peer && peer_pmtu_cleaned(rt->peer))
 
1668
                dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig);
1739
1669
}
1740
1670
 
1741
1671
static int ip_rt_bug(struct sk_buff *skb)
1744
1674
                &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
1745
1675
                skb->dev ? skb->dev->name : "?");
1746
1676
        kfree_skb(skb);
 
1677
        WARN_ON(1);
1747
1678
        return 0;
1748
1679
}
1749
1680
 
1756
1687
   in IP options!
1757
1688
 */
1758
1689
 
1759
 
void ip_rt_get_source(u8 *addr, struct rtable *rt)
 
1690
void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
1760
1691
{
1761
1692
        __be32 src;
1762
 
        struct fib_result res;
1763
1693
 
1764
1694
        if (rt_is_output_route(rt))
1765
 
                src = rt->rt_src;
 
1695
                src = ip_hdr(skb)->saddr;
1766
1696
        else {
 
1697
                struct fib_result res;
 
1698
                struct flowi4 fl4;
 
1699
                struct iphdr *iph;
 
1700
 
 
1701
                iph = ip_hdr(skb);
 
1702
 
 
1703
                memset(&fl4, 0, sizeof(fl4));
 
1704
                fl4.daddr = iph->daddr;
 
1705
                fl4.saddr = iph->saddr;
 
1706
                fl4.flowi4_tos = iph->tos;
 
1707
                fl4.flowi4_oif = rt->dst.dev->ifindex;
 
1708
                fl4.flowi4_iif = skb->dev->ifindex;
 
1709
                fl4.flowi4_mark = skb->mark;
 
1710
 
1767
1711
                rcu_read_lock();
1768
 
                if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0)
1769
 
                        src = FIB_RES_PREFSRC(res);
 
1712
                if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0)
 
1713
                        src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res);
1770
1714
                else
1771
1715
                        src = inet_select_addr(rt->dst.dev, rt->rt_gateway,
1772
1716
                                        RT_SCOPE_UNIVERSE);
1775
1719
        memcpy(addr, &src, 4);
1776
1720
}
1777
1721
 
1778
 
#ifdef CONFIG_NET_CLS_ROUTE
 
1722
#ifdef CONFIG_IP_ROUTE_CLASSID
1779
1723
static void set_class_tag(struct rtable *rt, u32 tag)
1780
1724
{
1781
1725
        if (!(rt->dst.tclassid & 0xFFFF))
1815
1759
        return mtu;
1816
1760
}
1817
1761
 
1818
 
static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
 
1762
static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
 
1763
                            struct fib_info *fi)
 
1764
{
 
1765
        struct inet_peer *peer;
 
1766
        int create = 0;
 
1767
 
 
1768
        /* If a peer entry exists for this destination, we must hook
 
1769
         * it up in order to get at cached metrics.
 
1770
         */
 
1771
        if (fl4 && (fl4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS))
 
1772
                create = 1;
 
1773
 
 
1774
        rt->peer = peer = inet_getpeer_v4(rt->rt_dst, create);
 
1775
        if (peer) {
 
1776
                rt->rt_peer_genid = rt_peer_genid();
 
1777
                if (inet_metrics_new(peer))
 
1778
                        memcpy(peer->metrics, fi->fib_metrics,
 
1779
                               sizeof(u32) * RTAX_MAX);
 
1780
                dst_init_metrics(&rt->dst, peer->metrics, false);
 
1781
 
 
1782
                check_peer_pmtu(&rt->dst, peer);
 
1783
                if (peer->redirect_learned.a4 &&
 
1784
                    peer->redirect_learned.a4 != rt->rt_gateway) {
 
1785
                        rt->rt_gateway = peer->redirect_learned.a4;
 
1786
                        rt->rt_flags |= RTCF_REDIRECTED;
 
1787
                }
 
1788
        } else {
 
1789
                if (fi->fib_metrics != (u32 *) dst_default_metrics) {
 
1790
                        rt->fi = fi;
 
1791
                        atomic_inc(&fi->fib_clntref);
 
1792
                }
 
1793
                dst_init_metrics(&rt->dst, fi->fib_metrics, true);
 
1794
        }
 
1795
}
 
1796
 
 
1797
static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4,
 
1798
                           const struct fib_result *res,
 
1799
                           struct fib_info *fi, u16 type, u32 itag)
1819
1800
{
1820
1801
        struct dst_entry *dst = &rt->dst;
1821
 
        struct fib_info *fi = res->fi;
1822
1802
 
1823
1803
        if (fi) {
1824
1804
                if (FIB_RES_GW(*res) &&
1825
1805
                    FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
1826
1806
                        rt->rt_gateway = FIB_RES_GW(*res);
1827
 
                dst_import_metrics(dst, fi->fib_metrics);
1828
 
#ifdef CONFIG_NET_CLS_ROUTE
 
1807
                rt_init_metrics(rt, fl4, fi);
 
1808
#ifdef CONFIG_IP_ROUTE_CLASSID
1829
1809
                dst->tclassid = FIB_RES_NH(*res).nh_tclassid;
1830
1810
#endif
1831
1811
        }
1835
1815
        if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40)
1836
1816
                dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40);
1837
1817
 
1838
 
#ifdef CONFIG_NET_CLS_ROUTE
 
1818
#ifdef CONFIG_IP_ROUTE_CLASSID
1839
1819
#ifdef CONFIG_IP_MULTIPLE_TABLES
1840
1820
        set_class_tag(rt, fib_rules_tclass(res));
1841
1821
#endif
1842
1822
        set_class_tag(rt, itag);
1843
1823
#endif
1844
 
        rt->rt_type = res->type;
 
1824
}
 
1825
 
 
1826
static struct rtable *rt_dst_alloc(struct net_device *dev,
 
1827
                                   bool nopolicy, bool noxfrm)
 
1828
{
 
1829
        return dst_alloc(&ipv4_dst_ops, dev, 1, -1,
 
1830
                         DST_HOST |
 
1831
                         (nopolicy ? DST_NOPOLICY : 0) |
 
1832
                         (noxfrm ? DST_NOXFRM : 0));
1845
1833
}
1846
1834
 
1847
1835
/* called in rcu_read_lock() section */
1869
1857
                        goto e_inval;
1870
1858
                spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
1871
1859
        } else {
1872
 
                err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst,
1873
 
                                          &itag, 0);
 
1860
                err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst,
 
1861
                                          &itag);
1874
1862
                if (err < 0)
1875
1863
                        goto e_err;
1876
1864
        }
1877
 
        rth = dst_alloc(&ipv4_dst_ops);
 
1865
        rth = rt_dst_alloc(init_net.loopback_dev,
 
1866
                           IN_DEV_CONF_GET(in_dev, NOPOLICY), false);
1878
1867
        if (!rth)
1879
1868
                goto e_nobufs;
1880
1869
 
 
1870
#ifdef CONFIG_IP_ROUTE_CLASSID
 
1871
        rth->dst.tclassid = itag;
 
1872
#endif
1881
1873
        rth->dst.output = ip_rt_bug;
1882
 
        rth->dst.obsolete = -1;
1883
1874
 
1884
 
        atomic_set(&rth->dst.__refcnt, 1);
1885
 
        rth->dst.flags= DST_HOST;
1886
 
        if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
1887
 
                rth->dst.flags |= DST_NOPOLICY;
1888
 
        rth->fl.fl4_dst = daddr;
1889
 
        rth->rt_dst     = daddr;
1890
 
        rth->fl.fl4_tos = tos;
1891
 
        rth->fl.mark    = skb->mark;
1892
 
        rth->fl.fl4_src = saddr;
1893
 
        rth->rt_src     = saddr;
1894
 
#ifdef CONFIG_NET_CLS_ROUTE
1895
 
        rth->dst.tclassid = itag;
1896
 
#endif
1897
 
        rth->rt_iif     =
1898
 
        rth->fl.iif     = dev->ifindex;
1899
 
        rth->dst.dev    = init_net.loopback_dev;
1900
 
        dev_hold(rth->dst.dev);
1901
 
        rth->fl.oif     = 0;
1902
 
        rth->rt_gateway = daddr;
1903
 
        rth->rt_spec_dst= spec_dst;
 
1875
        rth->rt_key_dst = daddr;
 
1876
        rth->rt_key_src = saddr;
1904
1877
        rth->rt_genid   = rt_genid(dev_net(dev));
1905
1878
        rth->rt_flags   = RTCF_MULTICAST;
1906
1879
        rth->rt_type    = RTN_MULTICAST;
 
1880
        rth->rt_key_tos = tos;
 
1881
        rth->rt_dst     = daddr;
 
1882
        rth->rt_src     = saddr;
 
1883
        rth->rt_route_iif = dev->ifindex;
 
1884
        rth->rt_iif     = dev->ifindex;
 
1885
        rth->rt_oif     = 0;
 
1886
        rth->rt_mark    = skb->mark;
 
1887
        rth->rt_gateway = daddr;
 
1888
        rth->rt_spec_dst= spec_dst;
 
1889
        rth->rt_peer_genid = 0;
 
1890
        rth->peer = NULL;
 
1891
        rth->fi = NULL;
1907
1892
        if (our) {
1908
1893
                rth->dst.input= ip_local_deliver;
1909
1894
                rth->rt_flags |= RTCF_LOCAL;
1916
1901
        RT_CACHE_STAT_INC(in_slow_mc);
1917
1902
 
1918
1903
        hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev)));
1919
 
        return rt_intern_hash(hash, rth, NULL, skb, dev->ifindex);
 
1904
        rth = rt_intern_hash(hash, rth, skb, dev->ifindex);
 
1905
        return IS_ERR(rth) ? PTR_ERR(rth) : 0;
1920
1906
 
1921
1907
e_nobufs:
1922
1908
        return -ENOBUFS;
1959
1945
 
1960
1946
/* called in rcu_read_lock() section */
1961
1947
static int __mkroute_input(struct sk_buff *skb,
1962
 
                           struct fib_result *res,
 
1948
                           const struct fib_result *res,
1963
1949
                           struct in_device *in_dev,
1964
1950
                           __be32 daddr, __be32 saddr, u32 tos,
1965
1951
                           struct rtable **result)
1981
1967
        }
1982
1968
 
1983
1969
 
1984
 
        err = fib_validate_source(saddr, daddr, tos, FIB_RES_OIF(*res),
1985
 
                                  in_dev->dev, &spec_dst, &itag, skb->mark);
 
1970
        err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
 
1971
                                  in_dev->dev, &spec_dst, &itag);
1986
1972
        if (err < 0) {
1987
1973
                ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
1988
1974
                                         saddr);
2013
1999
                }
2014
2000
        }
2015
2001
 
2016
 
 
2017
 
        rth = dst_alloc(&ipv4_dst_ops);
 
2002
        rth = rt_dst_alloc(out_dev->dev,
 
2003
                           IN_DEV_CONF_GET(in_dev, NOPOLICY),
 
2004
                           IN_DEV_CONF_GET(out_dev, NOXFRM));
2018
2005
        if (!rth) {
2019
2006
                err = -ENOBUFS;
2020
2007
                goto cleanup;
2021
2008
        }
2022
2009
 
2023
 
        atomic_set(&rth->dst.__refcnt, 1);
2024
 
        rth->dst.flags= DST_HOST;
2025
 
        if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
2026
 
                rth->dst.flags |= DST_NOPOLICY;
2027
 
        if (IN_DEV_CONF_GET(out_dev, NOXFRM))
2028
 
                rth->dst.flags |= DST_NOXFRM;
2029
 
        rth->fl.fl4_dst = daddr;
 
2010
        rth->rt_key_dst = daddr;
 
2011
        rth->rt_key_src = saddr;
 
2012
        rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
 
2013
        rth->rt_flags = flags;
 
2014
        rth->rt_type = res->type;
 
2015
        rth->rt_key_tos = tos;
2030
2016
        rth->rt_dst     = daddr;
2031
 
        rth->fl.fl4_tos = tos;
2032
 
        rth->fl.mark    = skb->mark;
2033
 
        rth->fl.fl4_src = saddr;
2034
2017
        rth->rt_src     = saddr;
 
2018
        rth->rt_route_iif = in_dev->dev->ifindex;
 
2019
        rth->rt_iif     = in_dev->dev->ifindex;
 
2020
        rth->rt_oif     = 0;
 
2021
        rth->rt_mark    = skb->mark;
2035
2022
        rth->rt_gateway = daddr;
2036
 
        rth->rt_iif     =
2037
 
                rth->fl.iif     = in_dev->dev->ifindex;
2038
 
        rth->dst.dev    = (out_dev)->dev;
2039
 
        dev_hold(rth->dst.dev);
2040
 
        rth->fl.oif     = 0;
2041
2023
        rth->rt_spec_dst= spec_dst;
 
2024
        rth->rt_peer_genid = 0;
 
2025
        rth->peer = NULL;
 
2026
        rth->fi = NULL;
2042
2027
 
2043
 
        rth->dst.obsolete = -1;
2044
2028
        rth->dst.input = ip_forward;
2045
2029
        rth->dst.output = ip_output;
2046
 
        rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
2047
 
 
2048
 
        rt_set_nexthop(rth, res, itag);
2049
 
 
2050
 
        rth->rt_flags = flags;
 
2030
 
 
2031
        rt_set_nexthop(rth, NULL, res, res->fi, res->type, itag);
2051
2032
 
2052
2033
        *result = rth;
2053
2034
        err = 0;
2057
2038
 
2058
2039
static int ip_mkroute_input(struct sk_buff *skb,
2059
2040
                            struct fib_result *res,
2060
 
                            const struct flowi *fl,
 
2041
                            const struct flowi4 *fl4,
2061
2042
                            struct in_device *in_dev,
2062
2043
                            __be32 daddr, __be32 saddr, u32 tos)
2063
2044
{
2066
2047
        unsigned hash;
2067
2048
 
2068
2049
#ifdef CONFIG_IP_ROUTE_MULTIPATH
2069
 
        if (res->fi && res->fi->fib_nhs > 1 && fl->oif == 0)
2070
 
                fib_select_multipath(fl, res);
 
2050
        if (res->fi && res->fi->fib_nhs > 1)
 
2051
                fib_select_multipath(res);
2071
2052
#endif
2072
2053
 
2073
2054
        /* create a routing cache entry */
2076
2057
                return err;
2077
2058
 
2078
2059
        /* put it into the cache */
2079
 
        hash = rt_hash(daddr, saddr, fl->iif,
 
2060
        hash = rt_hash(daddr, saddr, fl4->flowi4_iif,
2080
2061
                       rt_genid(dev_net(rth->dst.dev)));
2081
 
        return rt_intern_hash(hash, rth, NULL, skb, fl->iif);
 
2062
        rth = rt_intern_hash(hash, rth, skb, fl4->flowi4_iif);
 
2063
        if (IS_ERR(rth))
 
2064
                return PTR_ERR(rth);
 
2065
        return 0;
2082
2066
}
2083
2067
 
2084
2068
/*
2097
2081
{
2098
2082
        struct fib_result res;
2099
2083
        struct in_device *in_dev = __in_dev_get_rcu(dev);
2100
 
        struct flowi fl = { .fl4_dst    = daddr,
2101
 
                            .fl4_src    = saddr,
2102
 
                            .fl4_tos    = tos,
2103
 
                            .fl4_scope  = RT_SCOPE_UNIVERSE,
2104
 
                            .mark = skb->mark,
2105
 
                            .iif = dev->ifindex };
 
2084
        struct flowi4   fl4;
2106
2085
        unsigned        flags = 0;
2107
2086
        u32             itag = 0;
2108
2087
        struct rtable * rth;
2139
2118
        /*
2140
2119
         *      Now we are ready to route packet.
2141
2120
         */
2142
 
        err = fib_lookup(net, &fl, &res);
 
2121
        fl4.flowi4_oif = 0;
 
2122
        fl4.flowi4_iif = dev->ifindex;
 
2123
        fl4.flowi4_mark = skb->mark;
 
2124
        fl4.flowi4_tos = tos;
 
2125
        fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
 
2126
        fl4.daddr = daddr;
 
2127
        fl4.saddr = saddr;
 
2128
        err = fib_lookup(net, &fl4, &res);
2143
2129
        if (err != 0) {
2144
2130
                if (!IN_DEV_FORWARD(in_dev))
2145
2131
                        goto e_hostunreach;
2152
2138
                goto brd_input;
2153
2139
 
2154
2140
        if (res.type == RTN_LOCAL) {
2155
 
                err = fib_validate_source(saddr, daddr, tos,
 
2141
                err = fib_validate_source(skb, saddr, daddr, tos,
2156
2142
                                          net->loopback_dev->ifindex,
2157
 
                                          dev, &spec_dst, &itag, skb->mark);
 
2143
                                          dev, &spec_dst, &itag);
2158
2144
                if (err < 0)
2159
2145
                        goto martian_source_keep_err;
2160
2146
                if (err)
2168
2154
        if (res.type != RTN_UNICAST)
2169
2155
                goto martian_destination;
2170
2156
 
2171
 
        err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos);
 
2157
        err = ip_mkroute_input(skb, &res, &fl4, in_dev, daddr, saddr, tos);
2172
2158
out:    return err;
2173
2159
 
2174
2160
brd_input:
2178
2164
        if (ipv4_is_zeronet(saddr))
2179
2165
                spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
2180
2166
        else {
2181
 
                err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst,
2182
 
                                          &itag, skb->mark);
 
2167
                err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst,
 
2168
                                          &itag);
2183
2169
                if (err < 0)
2184
2170
                        goto martian_source_keep_err;
2185
2171
                if (err)
2190
2176
        RT_CACHE_STAT_INC(in_brd);
2191
2177
 
2192
2178
local_input:
2193
 
        rth = dst_alloc(&ipv4_dst_ops);
 
2179
        rth = rt_dst_alloc(net->loopback_dev,
 
2180
                           IN_DEV_CONF_GET(in_dev, NOPOLICY), false);
2194
2181
        if (!rth)
2195
2182
                goto e_nobufs;
2196
2183
 
 
2184
        rth->dst.input= ip_local_deliver;
2197
2185
        rth->dst.output= ip_rt_bug;
2198
 
        rth->dst.obsolete = -1;
 
2186
#ifdef CONFIG_IP_ROUTE_CLASSID
 
2187
        rth->dst.tclassid = itag;
 
2188
#endif
 
2189
 
 
2190
        rth->rt_key_dst = daddr;
 
2191
        rth->rt_key_src = saddr;
2199
2192
        rth->rt_genid = rt_genid(net);
2200
 
 
2201
 
        atomic_set(&rth->dst.__refcnt, 1);
2202
 
        rth->dst.flags= DST_HOST;
2203
 
        if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
2204
 
                rth->dst.flags |= DST_NOPOLICY;
2205
 
        rth->fl.fl4_dst = daddr;
2206
 
        rth->rt_dst     = daddr;
2207
 
        rth->fl.fl4_tos = tos;
2208
 
        rth->fl.mark    = skb->mark;
2209
 
        rth->fl.fl4_src = saddr;
2210
 
        rth->rt_src     = saddr;
2211
 
#ifdef CONFIG_NET_CLS_ROUTE
2212
 
        rth->dst.tclassid = itag;
2213
 
#endif
2214
 
        rth->rt_iif     =
2215
 
        rth->fl.iif     = dev->ifindex;
2216
 
        rth->dst.dev    = net->loopback_dev;
2217
 
        dev_hold(rth->dst.dev);
2218
 
        rth->rt_gateway = daddr;
2219
 
        rth->rt_spec_dst= spec_dst;
2220
 
        rth->dst.input= ip_local_deliver;
2221
2193
        rth->rt_flags   = flags|RTCF_LOCAL;
 
2194
        rth->rt_type    = res.type;
 
2195
        rth->rt_key_tos = tos;
 
2196
        rth->rt_dst     = daddr;
 
2197
        rth->rt_src     = saddr;
 
2198
#ifdef CONFIG_IP_ROUTE_CLASSID
 
2199
        rth->dst.tclassid = itag;
 
2200
#endif
 
2201
        rth->rt_route_iif = dev->ifindex;
 
2202
        rth->rt_iif     = dev->ifindex;
 
2203
        rth->rt_oif     = 0;
 
2204
        rth->rt_mark    = skb->mark;
 
2205
        rth->rt_gateway = daddr;
 
2206
        rth->rt_spec_dst= spec_dst;
 
2207
        rth->rt_peer_genid = 0;
 
2208
        rth->peer = NULL;
 
2209
        rth->fi = NULL;
2222
2210
        if (res.type == RTN_UNREACHABLE) {
2223
2211
                rth->dst.input= ip_error;
2224
2212
                rth->dst.error= -err;
2225
2213
                rth->rt_flags   &= ~RTCF_LOCAL;
2226
2214
        }
2227
 
        rth->rt_type    = res.type;
2228
 
        hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net));
2229
 
        err = rt_intern_hash(hash, rth, NULL, skb, fl.iif);
 
2215
        hash = rt_hash(daddr, saddr, fl4.flowi4_iif, rt_genid(net));
 
2216
        rth = rt_intern_hash(hash, rth, skb, fl4.flowi4_iif);
 
2217
        err = 0;
 
2218
        if (IS_ERR(rth))
 
2219
                err = PTR_ERR(rth);
2230
2220
        goto out;
2231
2221
 
2232
2222
no_route:
2288
2278
 
2289
2279
        for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
2290
2280
             rth = rcu_dereference(rth->dst.rt_next)) {
2291
 
                if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) |
2292
 
                     ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) |
2293
 
                     (rth->fl.iif ^ iif) |
2294
 
                     rth->fl.oif |
2295
 
                     (rth->fl.fl4_tos ^ tos)) == 0 &&
2296
 
                    rth->fl.mark == skb->mark &&
 
2281
                if ((((__force u32)rth->rt_key_dst ^ (__force u32)daddr) |
 
2282
                     ((__force u32)rth->rt_key_src ^ (__force u32)saddr) |
 
2283
                     (rth->rt_iif ^ iif) |
 
2284
                     rth->rt_oif |
 
2285
                     (rth->rt_key_tos ^ tos)) == 0 &&
 
2286
                    rth->rt_mark == skb->mark &&
2297
2287
                    net_eq(dev_net(rth->dst.dev), net) &&
2298
2288
                    !rt_is_expired(rth)) {
2299
2289
                        if (noref) {
2326
2316
                struct in_device *in_dev = __in_dev_get_rcu(dev);
2327
2317
 
2328
2318
                if (in_dev) {
2329
 
                        int our = ip_check_mc(in_dev, daddr, saddr,
2330
 
                                              ip_hdr(skb)->protocol);
 
2319
                        int our = ip_check_mc_rcu(in_dev, daddr, saddr,
 
2320
                                                  ip_hdr(skb)->protocol);
2331
2321
                        if (our
2332
2322
#ifdef CONFIG_IP_MROUTE
2333
2323
                                ||
2351
2341
EXPORT_SYMBOL(ip_route_input_common);
2352
2342
 
2353
2343
/* called with rcu_read_lock() */
2354
 
static int __mkroute_output(struct rtable **result,
2355
 
                            struct fib_result *res,
2356
 
                            const struct flowi *fl,
2357
 
                            const struct flowi *oldflp,
2358
 
                            struct net_device *dev_out,
2359
 
                            unsigned flags)
 
2344
static struct rtable *__mkroute_output(const struct fib_result *res,
 
2345
                                       const struct flowi4 *fl4,
 
2346
                                       __be32 orig_daddr, __be32 orig_saddr,
 
2347
                                       int orig_oif, struct net_device *dev_out,
 
2348
                                       unsigned int flags)
2360
2349
{
 
2350
        struct fib_info *fi = res->fi;
 
2351
        u32 tos = RT_FL_TOS(fl4);
 
2352
        struct in_device *in_dev;
 
2353
        u16 type = res->type;
2361
2354
        struct rtable *rth;
2362
 
        struct in_device *in_dev;
2363
 
        u32 tos = RT_FL_TOS(oldflp);
2364
 
 
2365
 
        if (ipv4_is_loopback(fl->fl4_src) && !(dev_out->flags & IFF_LOOPBACK))
2366
 
                return -EINVAL;
2367
 
 
2368
 
        if (ipv4_is_lbcast(fl->fl4_dst))
2369
 
                res->type = RTN_BROADCAST;
2370
 
        else if (ipv4_is_multicast(fl->fl4_dst))
2371
 
                res->type = RTN_MULTICAST;
2372
 
        else if (ipv4_is_zeronet(fl->fl4_dst))
2373
 
                return -EINVAL;
 
2355
 
 
2356
        if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK))
 
2357
                return ERR_PTR(-EINVAL);
 
2358
 
 
2359
        if (ipv4_is_lbcast(fl4->daddr))
 
2360
                type = RTN_BROADCAST;
 
2361
        else if (ipv4_is_multicast(fl4->daddr))
 
2362
                type = RTN_MULTICAST;
 
2363
        else if (ipv4_is_zeronet(fl4->daddr))
 
2364
                return ERR_PTR(-EINVAL);
2374
2365
 
2375
2366
        if (dev_out->flags & IFF_LOOPBACK)
2376
2367
                flags |= RTCF_LOCAL;
2377
2368
 
2378
2369
        in_dev = __in_dev_get_rcu(dev_out);
2379
2370
        if (!in_dev)
2380
 
                return -EINVAL;
 
2371
                return ERR_PTR(-EINVAL);
2381
2372
 
2382
 
        if (res->type == RTN_BROADCAST) {
 
2373
        if (type == RTN_BROADCAST) {
2383
2374
                flags |= RTCF_BROADCAST | RTCF_LOCAL;
2384
 
                res->fi = NULL;
2385
 
        } else if (res->type == RTN_MULTICAST) {
 
2375
                fi = NULL;
 
2376
        } else if (type == RTN_MULTICAST) {
2386
2377
                flags |= RTCF_MULTICAST | RTCF_LOCAL;
2387
 
                if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src,
2388
 
                                 oldflp->proto))
 
2378
                if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
 
2379
                                     fl4->flowi4_proto))
2389
2380
                        flags &= ~RTCF_LOCAL;
2390
2381
                /* If multicast route do not exist use
2391
2382
                 * default one, but do not gateway in this case.
2392
2383
                 * Yes, it is hack.
2393
2384
                 */
2394
 
                if (res->fi && res->prefixlen < 4)
2395
 
                        res->fi = NULL;
 
2385
                if (fi && res->prefixlen < 4)
 
2386
                        fi = NULL;
2396
2387
        }
2397
2388
 
2398
 
 
2399
 
        rth = dst_alloc(&ipv4_dst_ops);
 
2389
        rth = rt_dst_alloc(dev_out,
 
2390
                           IN_DEV_CONF_GET(in_dev, NOPOLICY),
 
2391
                           IN_DEV_CONF_GET(in_dev, NOXFRM));
2400
2392
        if (!rth)
2401
 
                return -ENOBUFS;
2402
 
 
2403
 
        atomic_set(&rth->dst.__refcnt, 1);
2404
 
        rth->dst.flags= DST_HOST;
2405
 
        if (IN_DEV_CONF_GET(in_dev, NOXFRM))
2406
 
                rth->dst.flags |= DST_NOXFRM;
2407
 
        if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
2408
 
                rth->dst.flags |= DST_NOPOLICY;
2409
 
 
2410
 
        rth->fl.fl4_dst = oldflp->fl4_dst;
2411
 
        rth->fl.fl4_tos = tos;
2412
 
        rth->fl.fl4_src = oldflp->fl4_src;
2413
 
        rth->fl.oif     = oldflp->oif;
2414
 
        rth->fl.mark    = oldflp->mark;
2415
 
        rth->rt_dst     = fl->fl4_dst;
2416
 
        rth->rt_src     = fl->fl4_src;
2417
 
        rth->rt_iif     = oldflp->oif ? : dev_out->ifindex;
2418
 
        /* get references to the devices that are to be hold by the routing
2419
 
           cache entry */
2420
 
        rth->dst.dev    = dev_out;
2421
 
        dev_hold(dev_out);
2422
 
        rth->rt_gateway = fl->fl4_dst;
2423
 
        rth->rt_spec_dst= fl->fl4_src;
2424
 
 
2425
 
        rth->dst.output=ip_output;
2426
 
        rth->dst.obsolete = -1;
 
2393
                return ERR_PTR(-ENOBUFS);
 
2394
 
 
2395
        rth->dst.output = ip_output;
 
2396
 
 
2397
        rth->rt_key_dst = orig_daddr;
 
2398
        rth->rt_key_src = orig_saddr;
2427
2399
        rth->rt_genid = rt_genid(dev_net(dev_out));
 
2400
        rth->rt_flags   = flags;
 
2401
        rth->rt_type    = type;
 
2402
        rth->rt_key_tos = tos;
 
2403
        rth->rt_dst     = fl4->daddr;
 
2404
        rth->rt_src     = fl4->saddr;
 
2405
        rth->rt_route_iif = 0;
 
2406
        rth->rt_iif     = orig_oif ? : dev_out->ifindex;
 
2407
        rth->rt_oif     = orig_oif;
 
2408
        rth->rt_mark    = fl4->flowi4_mark;
 
2409
        rth->rt_gateway = fl4->daddr;
 
2410
        rth->rt_spec_dst= fl4->saddr;
 
2411
        rth->rt_peer_genid = 0;
 
2412
        rth->peer = NULL;
 
2413
        rth->fi = NULL;
2428
2414
 
2429
2415
        RT_CACHE_STAT_INC(out_slow_tot);
2430
2416
 
2431
2417
        if (flags & RTCF_LOCAL) {
2432
2418
                rth->dst.input = ip_local_deliver;
2433
 
                rth->rt_spec_dst = fl->fl4_dst;
 
2419
                rth->rt_spec_dst = fl4->daddr;
2434
2420
        }
2435
2421
        if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
2436
 
                rth->rt_spec_dst = fl->fl4_src;
 
2422
                rth->rt_spec_dst = fl4->saddr;
2437
2423
                if (flags & RTCF_LOCAL &&
2438
2424
                    !(dev_out->flags & IFF_LOOPBACK)) {
2439
2425
                        rth->dst.output = ip_mc_output;
2440
2426
                        RT_CACHE_STAT_INC(out_slow_mc);
2441
2427
                }
2442
2428
#ifdef CONFIG_IP_MROUTE
2443
 
                if (res->type == RTN_MULTICAST) {
 
2429
                if (type == RTN_MULTICAST) {
2444
2430
                        if (IN_DEV_MFORWARD(in_dev) &&
2445
 
                            !ipv4_is_local_multicast(oldflp->fl4_dst)) {
 
2431
                            !ipv4_is_local_multicast(fl4->daddr)) {
2446
2432
                                rth->dst.input = ip_mr_input;
2447
2433
                                rth->dst.output = ip_mc_output;
2448
2434
                        }
2450
2436
#endif
2451
2437
        }
2452
2438
 
2453
 
        rt_set_nexthop(rth, res, 0);
2454
 
 
2455
 
        rth->rt_flags = flags;
2456
 
        *result = rth;
2457
 
        return 0;
2458
 
}
2459
 
 
2460
 
/* called with rcu_read_lock() */
2461
 
static int ip_mkroute_output(struct rtable **rp,
2462
 
                             struct fib_result *res,
2463
 
                             const struct flowi *fl,
2464
 
                             const struct flowi *oldflp,
2465
 
                             struct net_device *dev_out,
2466
 
                             unsigned flags)
2467
 
{
2468
 
        struct rtable *rth = NULL;
2469
 
        int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags);
2470
 
        unsigned hash;
2471
 
        if (err == 0) {
2472
 
                hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif,
2473
 
                               rt_genid(dev_net(dev_out)));
2474
 
                err = rt_intern_hash(hash, rth, rp, NULL, oldflp->oif);
2475
 
        }
2476
 
 
2477
 
        return err;
 
2439
        rt_set_nexthop(rth, fl4, res, fi, type, 0);
 
2440
 
 
2441
        return rth;
2478
2442
}
2479
2443
 
2480
2444
/*
2482
2446
 * called with rcu_read_lock();
2483
2447
 */
2484
2448
 
2485
 
static int ip_route_output_slow(struct net *net, struct rtable **rp,
2486
 
                                const struct flowi *oldflp)
 
2449
static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4)
2487
2450
{
2488
 
        u32 tos = RT_FL_TOS(oldflp);
2489
 
        struct flowi fl = { .fl4_dst = oldflp->fl4_dst,
2490
 
                            .fl4_src = oldflp->fl4_src,
2491
 
                            .fl4_tos = tos & IPTOS_RT_MASK,
2492
 
                            .fl4_scope = ((tos & RTO_ONLINK) ?
2493
 
                                          RT_SCOPE_LINK : RT_SCOPE_UNIVERSE),
2494
 
                            .mark = oldflp->mark,
2495
 
                            .iif = net->loopback_dev->ifindex,
2496
 
                            .oif = oldflp->oif };
2497
 
        struct fib_result res;
2498
 
        unsigned int flags = 0;
2499
2451
        struct net_device *dev_out = NULL;
2500
 
        int err;
2501
 
 
 
2452
        u32 tos = RT_FL_TOS(fl4);
 
2453
        unsigned int flags = 0;
 
2454
        struct fib_result res;
 
2455
        struct rtable *rth;
 
2456
        __be32 orig_daddr;
 
2457
        __be32 orig_saddr;
 
2458
        int orig_oif;
2502
2459
 
2503
2460
        res.fi          = NULL;
2504
2461
#ifdef CONFIG_IP_MULTIPLE_TABLES
2505
2462
        res.r           = NULL;
2506
2463
#endif
2507
2464
 
2508
 
        if (oldflp->fl4_src) {
2509
 
                err = -EINVAL;
2510
 
                if (ipv4_is_multicast(oldflp->fl4_src) ||
2511
 
                    ipv4_is_lbcast(oldflp->fl4_src) ||
2512
 
                    ipv4_is_zeronet(oldflp->fl4_src))
 
2465
        orig_daddr = fl4->daddr;
 
2466
        orig_saddr = fl4->saddr;
 
2467
        orig_oif = fl4->flowi4_oif;
 
2468
 
 
2469
        fl4->flowi4_iif = net->loopback_dev->ifindex;
 
2470
        fl4->flowi4_tos = tos & IPTOS_RT_MASK;
 
2471
        fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
 
2472
                         RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
 
2473
 
 
2474
        rcu_read_lock();
 
2475
        if (fl4->saddr) {
 
2476
                rth = ERR_PTR(-EINVAL);
 
2477
                if (ipv4_is_multicast(fl4->saddr) ||
 
2478
                    ipv4_is_lbcast(fl4->saddr) ||
 
2479
                    ipv4_is_zeronet(fl4->saddr))
2513
2480
                        goto out;
2514
2481
 
2515
2482
                /* I removed check for oif == dev_out->oif here.
2520
2487
                      of another iface. --ANK
2521
2488
                 */
2522
2489
 
2523
 
                if (oldflp->oif == 0 &&
2524
 
                    (ipv4_is_multicast(oldflp->fl4_dst) ||
2525
 
                     ipv4_is_lbcast(oldflp->fl4_dst))) {
 
2490
                if (fl4->flowi4_oif == 0 &&
 
2491
                    (ipv4_is_multicast(fl4->daddr) ||
 
2492
                     ipv4_is_lbcast(fl4->daddr))) {
2526
2493
                        /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
2527
 
                        dev_out = __ip_dev_find(net, oldflp->fl4_src, false);
 
2494
                        dev_out = __ip_dev_find(net, fl4->saddr, false);
2528
2495
                        if (dev_out == NULL)
2529
2496
                                goto out;
2530
2497
 
2543
2510
                           Luckily, this hack is good workaround.
2544
2511
                         */
2545
2512
 
2546
 
                        fl.oif = dev_out->ifindex;
 
2513
                        fl4->flowi4_oif = dev_out->ifindex;
2547
2514
                        goto make_route;
2548
2515
                }
2549
2516
 
2550
 
                if (!(oldflp->flags & FLOWI_FLAG_ANYSRC)) {
 
2517
                if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
2551
2518
                        /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
2552
 
                        if (!__ip_dev_find(net, oldflp->fl4_src, false))
 
2519
                        if (!__ip_dev_find(net, fl4->saddr, false))
2553
2520
                                goto out;
2554
2521
                }
2555
2522
        }
2556
2523
 
2557
2524
 
2558
 
        if (oldflp->oif) {
2559
 
                dev_out = dev_get_by_index_rcu(net, oldflp->oif);
2560
 
                err = -ENODEV;
 
2525
        if (fl4->flowi4_oif) {
 
2526
                dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
 
2527
                rth = ERR_PTR(-ENODEV);
2561
2528
                if (dev_out == NULL)
2562
2529
                        goto out;
2563
2530
 
2564
2531
                /* RACE: Check return value of inet_select_addr instead. */
2565
2532
                if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
2566
 
                        err = -ENETUNREACH;
 
2533
                        rth = ERR_PTR(-ENETUNREACH);
2567
2534
                        goto out;
2568
2535
                }
2569
 
                if (ipv4_is_local_multicast(oldflp->fl4_dst) ||
2570
 
                    ipv4_is_lbcast(oldflp->fl4_dst)) {
2571
 
                        if (!fl.fl4_src)
2572
 
                                fl.fl4_src = inet_select_addr(dev_out, 0,
 
2536
                if (ipv4_is_local_multicast(fl4->daddr) ||
 
2537
                    ipv4_is_lbcast(fl4->daddr)) {
 
2538
                        if (!fl4->saddr)
 
2539
                                fl4->saddr = inet_select_addr(dev_out, 0,
2573
2540
                                                              RT_SCOPE_LINK);
2574
2541
                        goto make_route;
2575
2542
                }
2576
 
                if (!fl.fl4_src) {
2577
 
                        if (ipv4_is_multicast(oldflp->fl4_dst))
2578
 
                                fl.fl4_src = inet_select_addr(dev_out, 0,
2579
 
                                                              fl.fl4_scope);
2580
 
                        else if (!oldflp->fl4_dst)
2581
 
                                fl.fl4_src = inet_select_addr(dev_out, 0,
 
2543
                if (fl4->saddr) {
 
2544
                        if (ipv4_is_multicast(fl4->daddr))
 
2545
                                fl4->saddr = inet_select_addr(dev_out, 0,
 
2546
                                                              fl4->flowi4_scope);
 
2547
                        else if (!fl4->daddr)
 
2548
                                fl4->saddr = inet_select_addr(dev_out, 0,
2582
2549
                                                              RT_SCOPE_HOST);
2583
2550
                }
2584
2551
        }
2585
2552
 
2586
 
        if (!fl.fl4_dst) {
2587
 
                fl.fl4_dst = fl.fl4_src;
2588
 
                if (!fl.fl4_dst)
2589
 
                        fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK);
 
2553
        if (!fl4->daddr) {
 
2554
                fl4->daddr = fl4->saddr;
 
2555
                if (!fl4->daddr)
 
2556
                        fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
2590
2557
                dev_out = net->loopback_dev;
2591
 
                fl.oif = net->loopback_dev->ifindex;
 
2558
                fl4->flowi4_oif = net->loopback_dev->ifindex;
2592
2559
                res.type = RTN_LOCAL;
2593
2560
                flags |= RTCF_LOCAL;
2594
2561
                goto make_route;
2595
2562
        }
2596
2563
 
2597
 
        if (fib_lookup(net, &fl, &res)) {
 
2564
        if (fib_lookup(net, fl4, &res)) {
2598
2565
                res.fi = NULL;
2599
 
                if (oldflp->oif) {
 
2566
                if (fl4->flowi4_oif) {
2600
2567
                        /* Apparently, routing tables are wrong. Assume,
2601
2568
                           that the destination is on link.
2602
2569
 
2615
2582
                           likely IPv6, but we do not.
2616
2583
                         */
2617
2584
 
2618
 
                        if (fl.fl4_src == 0)
2619
 
                                fl.fl4_src = inet_select_addr(dev_out, 0,
 
2585
                        if (fl4->saddr == 0)
 
2586
                                fl4->saddr = inet_select_addr(dev_out, 0,
2620
2587
                                                              RT_SCOPE_LINK);
2621
2588
                        res.type = RTN_UNICAST;
2622
2589
                        goto make_route;
2623
2590
                }
2624
 
                err = -ENETUNREACH;
 
2591
                rth = ERR_PTR(-ENETUNREACH);
2625
2592
                goto out;
2626
2593
        }
2627
2594
 
2628
2595
        if (res.type == RTN_LOCAL) {
2629
 
                if (!fl.fl4_src) {
 
2596
                if (!fl4->saddr) {
2630
2597
                        if (res.fi->fib_prefsrc)
2631
 
                                fl.fl4_src = res.fi->fib_prefsrc;
 
2598
                                fl4->saddr = res.fi->fib_prefsrc;
2632
2599
                        else
2633
 
                                fl.fl4_src = fl.fl4_dst;
 
2600
                                fl4->saddr = fl4->daddr;
2634
2601
                }
2635
2602
                dev_out = net->loopback_dev;
2636
 
                fl.oif = dev_out->ifindex;
 
2603
                fl4->flowi4_oif = dev_out->ifindex;
2637
2604
                res.fi = NULL;
2638
2605
                flags |= RTCF_LOCAL;
2639
2606
                goto make_route;
2640
2607
        }
2641
2608
 
2642
2609
#ifdef CONFIG_IP_ROUTE_MULTIPATH
2643
 
        if (res.fi->fib_nhs > 1 && fl.oif == 0)
2644
 
                fib_select_multipath(&fl, &res);
 
2610
        if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0)
 
2611
                fib_select_multipath(&res);
2645
2612
        else
2646
2613
#endif
2647
 
        if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif)
2648
 
                fib_select_default(net, &fl, &res);
 
2614
        if (!res.prefixlen &&
 
2615
            res.table->tb_num_default > 1 &&
 
2616
            res.type == RTN_UNICAST && !fl4->flowi4_oif)
 
2617
                fib_select_default(&res);
2649
2618
 
2650
 
        if (!fl.fl4_src)
2651
 
                fl.fl4_src = FIB_RES_PREFSRC(res);
 
2619
        if (!fl4->saddr)
 
2620
                fl4->saddr = FIB_RES_PREFSRC(net, res);
2652
2621
 
2653
2622
        dev_out = FIB_RES_DEV(res);
2654
 
        fl.oif = dev_out->ifindex;
 
2623
        fl4->flowi4_oif = dev_out->ifindex;
2655
2624
 
2656
2625
 
2657
2626
make_route:
2658
 
        err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags);
2659
 
 
2660
 
out:    return err;
 
2627
        rth = __mkroute_output(&res, fl4, orig_daddr, orig_saddr, orig_oif,
 
2628
                               dev_out, flags);
 
2629
        if (!IS_ERR(rth)) {
 
2630
                unsigned int hash;
 
2631
 
 
2632
                hash = rt_hash(orig_daddr, orig_saddr, orig_oif,
 
2633
                               rt_genid(dev_net(dev_out)));
 
2634
                rth = rt_intern_hash(hash, rth, NULL, orig_oif);
 
2635
        }
 
2636
 
 
2637
out:
 
2638
        rcu_read_unlock();
 
2639
        return rth;
2661
2640
}
2662
2641
 
2663
 
int __ip_route_output_key(struct net *net, struct rtable **rp,
2664
 
                          const struct flowi *flp)
 
2642
struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp4)
2665
2643
{
 
2644
        struct rtable *rth;
2666
2645
        unsigned int hash;
2667
 
        int res;
2668
 
        struct rtable *rth;
2669
2646
 
2670
2647
        if (!rt_caching(net))
2671
2648
                goto slow_output;
2672
2649
 
2673
 
        hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net));
 
2650
        hash = rt_hash(flp4->daddr, flp4->saddr, flp4->flowi4_oif, rt_genid(net));
2674
2651
 
2675
2652
        rcu_read_lock_bh();
2676
2653
        for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth;
2677
2654
                rth = rcu_dereference_bh(rth->dst.rt_next)) {
2678
 
                if (rth->fl.fl4_dst == flp->fl4_dst &&
2679
 
                    rth->fl.fl4_src == flp->fl4_src &&
 
2655
                if (rth->rt_key_dst == flp4->daddr &&
 
2656
                    rth->rt_key_src == flp4->saddr &&
2680
2657
                    rt_is_output_route(rth) &&
2681
 
                    rth->fl.oif == flp->oif &&
2682
 
                    rth->fl.mark == flp->mark &&
2683
 
                    !((rth->fl.fl4_tos ^ flp->fl4_tos) &
 
2658
                    rth->rt_oif == flp4->flowi4_oif &&
 
2659
                    rth->rt_mark == flp4->flowi4_mark &&
 
2660
                    !((rth->rt_key_tos ^ flp4->flowi4_tos) &
2684
2661
                            (IPTOS_RT_MASK | RTO_ONLINK)) &&
2685
2662
                    net_eq(dev_net(rth->dst.dev), net) &&
2686
2663
                    !rt_is_expired(rth)) {
2687
2664
                        dst_use(&rth->dst, jiffies);
2688
2665
                        RT_CACHE_STAT_INC(out_hit);
2689
2666
                        rcu_read_unlock_bh();
2690
 
                        *rp = rth;
2691
 
                        return 0;
 
2667
                        if (!flp4->saddr)
 
2668
                                flp4->saddr = rth->rt_src;
 
2669
                        if (!flp4->daddr)
 
2670
                                flp4->daddr = rth->rt_dst;
 
2671
                        return rth;
2692
2672
                }
2693
2673
                RT_CACHE_STAT_INC(out_hlist_search);
2694
2674
        }
2695
2675
        rcu_read_unlock_bh();
2696
2676
 
2697
2677
slow_output:
2698
 
        rcu_read_lock();
2699
 
        res = ip_route_output_slow(net, rp, flp);
2700
 
        rcu_read_unlock();
2701
 
        return res;
 
2678
        return ip_route_output_slow(net, flp4);
2702
2679
}
2703
2680
EXPORT_SYMBOL_GPL(__ip_route_output_key);
2704
2681
 
2716
2693
{
2717
2694
}
2718
2695
 
 
2696
static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
 
2697
                                          unsigned long old)
 
2698
{
 
2699
        return NULL;
 
2700
}
 
2701
 
2719
2702
static struct dst_ops ipv4_dst_blackhole_ops = {
2720
2703
        .family                 =       AF_INET,
2721
2704
        .protocol               =       cpu_to_be16(ETH_P_IP),
2724
2707
        .default_mtu            =       ipv4_blackhole_default_mtu,
2725
2708
        .default_advmss         =       ipv4_default_advmss,
2726
2709
        .update_pmtu            =       ipv4_rt_blackhole_update_pmtu,
 
2710
        .cow_metrics            =       ipv4_rt_blackhole_cow_metrics,
2727
2711
};
2728
2712
 
2729
 
 
2730
 
static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi *flp)
 
2713
struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
2731
2714
{
2732
 
        struct rtable *ort = *rp;
2733
 
        struct rtable *rt = (struct rtable *)
2734
 
                dst_alloc(&ipv4_dst_blackhole_ops);
 
2715
        struct rtable *rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, 0, 0);
 
2716
        struct rtable *ort = (struct rtable *) dst_orig;
2735
2717
 
2736
2718
        if (rt) {
2737
2719
                struct dst_entry *new = &rt->dst;
2738
2720
 
2739
 
                atomic_set(&new->__refcnt, 1);
2740
2721
                new->__use = 1;
2741
2722
                new->input = dst_discard;
2742
2723
                new->output = dst_discard;
2746
2727
                if (new->dev)
2747
2728
                        dev_hold(new->dev);
2748
2729
 
2749
 
                rt->fl = ort->fl;
 
2730
                rt->rt_key_dst = ort->rt_key_dst;
 
2731
                rt->rt_key_src = ort->rt_key_src;
 
2732
                rt->rt_key_tos = ort->rt_key_tos;
 
2733
                rt->rt_route_iif = ort->rt_route_iif;
 
2734
                rt->rt_iif = ort->rt_iif;
 
2735
                rt->rt_oif = ort->rt_oif;
 
2736
                rt->rt_mark = ort->rt_mark;
2750
2737
 
2751
2738
                rt->rt_genid = rt_genid(net);
2752
2739
                rt->rt_flags = ort->rt_flags;
2753
2740
                rt->rt_type = ort->rt_type;
2754
2741
                rt->rt_dst = ort->rt_dst;
2755
2742
                rt->rt_src = ort->rt_src;
2756
 
                rt->rt_iif = ort->rt_iif;
2757
2743
                rt->rt_gateway = ort->rt_gateway;
2758
2744
                rt->rt_spec_dst = ort->rt_spec_dst;
2759
2745
                rt->peer = ort->peer;
2760
2746
                if (rt->peer)
2761
2747
                        atomic_inc(&rt->peer->refcnt);
 
2748
                rt->fi = ort->fi;
 
2749
                if (rt->fi)
 
2750
                        atomic_inc(&rt->fi->fib_clntref);
2762
2751
 
2763
2752
                dst_free(new);
2764
2753
        }
2765
2754
 
2766
 
        dst_release(&(*rp)->dst);
2767
 
        *rp = rt;
2768
 
        return rt ? 0 : -ENOMEM;
 
2755
        dst_release(dst_orig);
 
2756
 
 
2757
        return rt ? &rt->dst : ERR_PTR(-ENOMEM);
2769
2758
}
2770
2759
 
2771
 
int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp,
2772
 
                         struct sock *sk, int flags)
 
2760
struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
 
2761
                                    struct sock *sk)
2773
2762
{
2774
 
        int err;
2775
 
 
2776
 
        if ((err = __ip_route_output_key(net, rp, flp)) != 0)
2777
 
                return err;
2778
 
 
2779
 
        if (flp->proto) {
2780
 
                if (!flp->fl4_src)
2781
 
                        flp->fl4_src = (*rp)->rt_src;
2782
 
                if (!flp->fl4_dst)
2783
 
                        flp->fl4_dst = (*rp)->rt_dst;
2784
 
                err = __xfrm_lookup(net, (struct dst_entry **)rp, flp, sk,
2785
 
                                    flags ? XFRM_LOOKUP_WAIT : 0);
2786
 
                if (err == -EREMOTE)
2787
 
                        err = ipv4_dst_blackhole(net, rp, flp);
2788
 
 
2789
 
                return err;
2790
 
        }
2791
 
 
2792
 
        return 0;
 
2763
        struct rtable *rt = __ip_route_output_key(net, flp4);
 
2764
 
 
2765
        if (IS_ERR(rt))
 
2766
                return rt;
 
2767
 
 
2768
        if (flp4->flowi4_proto)
 
2769
                rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
 
2770
                                                   flowi4_to_flowi(flp4),
 
2771
                                                   sk, 0);
 
2772
 
 
2773
        return rt;
2793
2774
}
2794
2775
EXPORT_SYMBOL_GPL(ip_route_output_flow);
2795
2776
 
2796
 
int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp)
2797
 
{
2798
 
        return ip_route_output_flow(net, rp, flp, NULL, 0);
2799
 
}
2800
 
EXPORT_SYMBOL(ip_route_output_key);
2801
 
 
2802
2777
static int rt_fill_info(struct net *net,
2803
2778
                        struct sk_buff *skb, u32 pid, u32 seq, int event,
2804
2779
                        int nowait, unsigned int flags)
2806
2781
        struct rtable *rt = skb_rtable(skb);
2807
2782
        struct rtmsg *r;
2808
2783
        struct nlmsghdr *nlh;
2809
 
        long expires;
 
2784
        long expires = 0;
 
2785
        const struct inet_peer *peer = rt->peer;
2810
2786
        u32 id = 0, ts = 0, tsage = 0, error;
2811
2787
 
2812
2788
        nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags);
2817
2793
        r->rtm_family    = AF_INET;
2818
2794
        r->rtm_dst_len  = 32;
2819
2795
        r->rtm_src_len  = 0;
2820
 
        r->rtm_tos      = rt->fl.fl4_tos;
 
2796
        r->rtm_tos      = rt->rt_key_tos;
2821
2797
        r->rtm_table    = RT_TABLE_MAIN;
2822
2798
        NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN);
2823
2799
        r->rtm_type     = rt->rt_type;
2829
2805
 
2830
2806
        NLA_PUT_BE32(skb, RTA_DST, rt->rt_dst);
2831
2807
 
2832
 
        if (rt->fl.fl4_src) {
 
2808
        if (rt->rt_key_src) {
2833
2809
                r->rtm_src_len = 32;
2834
 
                NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src);
 
2810
                NLA_PUT_BE32(skb, RTA_SRC, rt->rt_key_src);
2835
2811
        }
2836
2812
        if (rt->dst.dev)
2837
2813
                NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
2838
 
#ifdef CONFIG_NET_CLS_ROUTE
 
2814
#ifdef CONFIG_IP_ROUTE_CLASSID
2839
2815
        if (rt->dst.tclassid)
2840
2816
                NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid);
2841
2817
#endif
2842
2818
        if (rt_is_input_route(rt))
2843
2819
                NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst);
2844
 
        else if (rt->rt_src != rt->fl.fl4_src)
 
2820
        else if (rt->rt_src != rt->rt_key_src)
2845
2821
                NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src);
2846
2822
 
2847
2823
        if (rt->rt_dst != rt->rt_gateway)
2850
2826
        if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2851
2827
                goto nla_put_failure;
2852
2828
 
2853
 
        if (rt->fl.mark)
2854
 
                NLA_PUT_BE32(skb, RTA_MARK, rt->fl.mark);
 
2829
        if (rt->rt_mark)
 
2830
                NLA_PUT_BE32(skb, RTA_MARK, rt->rt_mark);
2855
2831
 
2856
2832
        error = rt->dst.error;
2857
 
        expires = rt->dst.expires ? rt->dst.expires - jiffies : 0;
2858
 
        if (rt->peer) {
 
2833
        if (peer) {
2859
2834
                inet_peer_refcheck(rt->peer);
2860
 
                id = atomic_read(&rt->peer->ip_id_count) & 0xffff;
2861
 
                if (rt->peer->tcp_ts_stamp) {
2862
 
                        ts = rt->peer->tcp_ts;
2863
 
                        tsage = get_seconds() - rt->peer->tcp_ts_stamp;
 
2835
                id = atomic_read(&peer->ip_id_count) & 0xffff;
 
2836
                if (peer->tcp_ts_stamp) {
 
2837
                        ts = peer->tcp_ts;
 
2838
                        tsage = get_seconds() - peer->tcp_ts_stamp;
2864
2839
                }
 
2840
                expires = ACCESS_ONCE(peer->pmtu_expires);
 
2841
                if (expires)
 
2842
                        expires -= jiffies;
2865
2843
        }
2866
2844
 
2867
2845
        if (rt_is_input_route(rt)) {
2870
2848
 
2871
2849
                if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
2872
2850
                    IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
2873
 
                        int err = ipmr_get_route(net, skb, r, nowait);
 
2851
                        int err = ipmr_get_route(net, skb,
 
2852
                                                 rt->rt_src, rt->rt_dst,
 
2853
                                                 r, nowait);
2874
2854
                        if (err <= 0) {
2875
2855
                                if (!nowait) {
2876
2856
                                        if (err == 0)
2884
2864
                        }
2885
2865
                } else
2886
2866
#endif
2887
 
                        NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif);
 
2867
                        NLA_PUT_U32(skb, RTA_IIF, rt->rt_iif);
2888
2868
        }
2889
2869
 
2890
2870
        if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage,
2958
2938
                if (err == 0 && rt->dst.error)
2959
2939
                        err = -rt->dst.error;
2960
2940
        } else {
2961
 
                struct flowi fl = {
2962
 
                        .fl4_dst = dst,
2963
 
                        .fl4_src = src,
2964
 
                        .fl4_tos = rtm->rtm_tos,
2965
 
                        .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0,
2966
 
                        .mark = mark,
 
2941
                struct flowi4 fl4 = {
 
2942
                        .daddr = dst,
 
2943
                        .saddr = src,
 
2944
                        .flowi4_tos = rtm->rtm_tos,
 
2945
                        .flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0,
 
2946
                        .flowi4_mark = mark,
2967
2947
                };
2968
 
                err = ip_route_output_key(net, &rt, &fl);
 
2948
                rt = ip_route_output_key(net, &fl4);
 
2949
 
 
2950
                err = 0;
 
2951
                if (IS_ERR(rt))
 
2952
                        err = PTR_ERR(rt);
2969
2953
        }
2970
2954
 
2971
2955
        if (err)
3248
3232
{
3249
3233
        get_random_bytes(&net->ipv4.rt_genid,
3250
3234
                         sizeof(net->ipv4.rt_genid));
 
3235
        get_random_bytes(&net->ipv4.dev_addr_genid,
 
3236
                         sizeof(net->ipv4.dev_addr_genid));
3251
3237
        return 0;
3252
3238
}
3253
3239
 
3256
3242
};
3257
3243
 
3258
3244
 
3259
 
#ifdef CONFIG_NET_CLS_ROUTE
 
3245
#ifdef CONFIG_IP_ROUTE_CLASSID
3260
3246
struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
3261
 
#endif /* CONFIG_NET_CLS_ROUTE */
 
3247
#endif /* CONFIG_IP_ROUTE_CLASSID */
3262
3248
 
3263
3249
static __initdata unsigned long rhash_entries;
3264
3250
static int __init set_rhash_entries(char *str)
3274
3260
{
3275
3261
        int rc = 0;
3276
3262
 
3277
 
#ifdef CONFIG_NET_CLS_ROUTE
 
3263
#ifdef CONFIG_IP_ROUTE_CLASSID
3278
3264
        ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
3279
3265
        if (!ip_rt_acct)
3280
3266
                panic("IP: failed to allocate ip_rt_acct\n");
3311
3297
        devinet_init();
3312
3298
        ip_fib_init();
3313
3299
 
3314
 
        /* All the timers, started at system startup tend
3315
 
           to synchronize. Perturb it a bit.
3316
 
         */
3317
 
        INIT_DELAYED_WORK_DEFERRABLE(&expires_work, rt_worker_func);
3318
 
        expires_ljiffies = jiffies;
3319
 
        schedule_delayed_work(&expires_work,
3320
 
                net_random() % ip_rt_gc_interval + ip_rt_gc_interval);
3321
 
 
3322
3300
        if (ip_rt_proc_init())
3323
3301
                printk(KERN_ERR "Unable to create route proc files\n");
3324
3302
#ifdef CONFIG_XFRM