1
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
2
index 494b888..851e01f 100644
3
--- a/drivers/net/Kconfig
4
+++ b/drivers/net/Kconfig
5
@@ -203,6 +203,125 @@ config RIONET_RX_SIZE
10
+ tristate "IMQ (intermediate queueing device) support"
11
+ depends on NETDEVICES && NETFILTER
13
+ The IMQ device(s) is used as placeholder for QoS queueing
14
+ disciplines. Every packet entering/leaving the IP stack can be
15
+ directed through the IMQ device where it's enqueued/dequeued to the
16
+ attached qdisc. This allows you to treat network devices as classes
17
+ and distribute bandwidth among them. Iptables is used to specify
18
+ through which IMQ device, if any, packets travel.
20
+ More information at: http://www.linuximq.net/
22
+ To compile this driver as a module, choose M here: the module
23
+ will be called imq. If unsure, say N.
26
+ prompt "IMQ behavior (PRE/POSTROUTING)"
28
+ default IMQ_BEHAVIOR_AB
30
+ This setting defines how IMQ behaves in respect to its
31
+ hooking in PREROUTING and POSTROUTING.
33
+ IMQ can work in any of the following ways:
35
+ PREROUTING | POSTROUTING
36
+ -----------------|-------------------
37
+ #1 After NAT | After NAT
38
+ #2 After NAT | Before NAT
39
+ #3 Before NAT | After NAT
40
+ #4 Before NAT | Before NAT
42
+ The default behavior is to hook before NAT on PREROUTING
43
+ and after NAT on POSTROUTING (#3).
45
+ This settings are specially usefull when trying to use IMQ
46
+ to shape NATed clients.
48
+ More information can be found at: www.linuximq.net
50
+ If not sure leave the default settings alone.
52
+config IMQ_BEHAVIOR_AA
55
+ This setting defines how IMQ behaves in respect to its
56
+ hooking in PREROUTING and POSTROUTING.
58
+ Choosing this option will make IMQ hook like this:
60
+ PREROUTING: After NAT
61
+ POSTROUTING: After NAT
63
+ More information can be found at: www.linuximq.net
65
+ If not sure leave the default settings alone.
67
+config IMQ_BEHAVIOR_AB
70
+ This setting defines how IMQ behaves in respect to its
71
+ hooking in PREROUTING and POSTROUTING.
73
+ Choosing this option will make IMQ hook like this:
75
+ PREROUTING: After NAT
76
+ POSTROUTING: Before NAT
78
+ More information can be found at: www.linuximq.net
80
+ If not sure leave the default settings alone.
82
+config IMQ_BEHAVIOR_BA
85
+ This setting defines how IMQ behaves in respect to its
86
+ hooking in PREROUTING and POSTROUTING.
88
+ Choosing this option will make IMQ hook like this:
90
+ PREROUTING: Before NAT
91
+ POSTROUTING: After NAT
93
+ More information can be found at: www.linuximq.net
95
+ If not sure leave the default settings alone.
97
+config IMQ_BEHAVIOR_BB
100
+ This setting defines how IMQ behaves in respect to its
101
+ hooking in PREROUTING and POSTROUTING.
103
+ Choosing this option will make IMQ hook like this:
105
+ PREROUTING: Before NAT
106
+ POSTROUTING: Before NAT
108
+ More information can be found at: www.linuximq.net
110
+ If not sure leave the default settings alone.
115
+ int "Number of IMQ devices"
120
+ This setting defines how many IMQ devices will be created.
122
+ The default value is 16.
124
+ More information can be found at: www.linuximq.net
126
+ If not sure leave the default settings alone.
129
tristate "Universal TUN/TAP device driver support"
131
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
132
index 3fef8a8..12dafc0 100644
133
--- a/drivers/net/Makefile
134
+++ b/drivers/net/Makefile
135
@@ -9,6 +9,7 @@ obj-$(CONFIG_BONDING) += bonding/
136
obj-$(CONFIG_DUMMY) += dummy.o
137
obj-$(CONFIG_EQUALIZER) += eql.o
138
obj-$(CONFIG_IFB) += ifb.o
139
+obj-$(CONFIG_IMQ) += imq.o
140
obj-$(CONFIG_MACVLAN) += macvlan.o
141
obj-$(CONFIG_MACVTAP) += macvtap.o
142
obj-$(CONFIG_MII) += mii.o
143
diff --git a/drivers/net/imq.c b/drivers/net/imq.c
145
index 0000000..2140535
147
+++ b/drivers/net/imq.c
150
+ * Pseudo-driver for the intermediate queue device.
152
+ * This program is free software; you can redistribute it and/or
153
+ * modify it under the terms of the GNU General Public License
154
+ * as published by the Free Software Foundation; either version
155
+ * 2 of the License, or (at your option) any later version.
157
+ * Authors: Patrick McHardy, <kaber@trash.net>
159
+ * The first version was written by Martin Devera, <devik@cdi.cz>
161
+ * Credits: Jan Rafaj <imq2t@cedric.vabo.cz>
162
+ * - Update patch to 2.4.21
163
+ * Sebastian Strollo <sstrollo@nortelnetworks.com>
164
+ * - Fix "Dead-loop on netdevice imq"-issue
165
+ * Marcel Sebek <sebek64@post.cz>
166
+ * - Update to 2.6.2-rc1
168
+ * After some time of inactivity there is a group taking care
169
+ * of IMQ again: http://www.linuximq.net
172
+ * 2004/06/30 - New version of IMQ patch to kernels <=2.6.7
173
+ * including the following changes:
175
+ * - Correction of ipv6 support "+"s issue (Hasso Tepper)
176
+ * - Correction of imq_init_devs() issue that resulted in
177
+ * kernel OOPS unloading IMQ as module (Norbert Buchmuller)
178
+ * - Addition of functionality to choose number of IMQ devices
179
+ * during kernel config (Andre Correa)
180
+ * - Addition of functionality to choose how IMQ hooks on
181
+ * PRE and POSTROUTING (after or before NAT) (Andre Correa)
182
+ * - Cosmetic corrections (Norbert Buchmuller) (Andre Correa)
185
+ * 2005/12/16 - IMQ versions between 2.6.7 and 2.6.13 were
186
+ * released with almost no problems. 2.6.14-x was released
187
+ * with some important changes: nfcache was removed; After
188
+ * some weeks of trouble we figured out that some IMQ fields
189
+ * in skb were missing in skbuff.c - skb_clone and copy_skb_header.
190
+ * These functions are correctly patched by this new patch version.
192
+ * Thanks for all who helped to figure out all the problems with
193
+ * 2.6.14.x: Patrick McHardy, Rune Kock, VeNoMouS, Max CtRiX,
194
+ * Kevin Shanahan, Richard Lucassen, Valery Dachev (hopefully
195
+ * I didn't forget anybody). I apologize again for my lack of time.
198
+ * 2008/06/17 - 2.6.25 - Changed imq.c to use qdisc_run() instead
199
+ * of qdisc_restart() and moved qdisc_run() to tasklet to avoid
200
+ * recursive locking. New initialization routines to fix 'rmmod' not
201
+ * working anymore. Used code from ifb.c. (Jussi Kivilinna)
203
+ * 2008/08/06 - 2.6.26 - (JK)
204
+ * - Replaced tasklet with 'netif_schedule()'.
205
+ * - Cleaned up and added comments for imq_nf_queue().
208
+ * - Add skb_save_cb/skb_restore_cb helper functions for backuping
209
+ * control buffer. This is needed because qdisc-layer on kernels
210
+ * 2.6.27 and newer overwrite control buffer. (Jussi Kivilinna)
211
+ * - Add better locking for IMQ device. Hopefully this will solve
212
+ * SMP issues. (Jussi Kivilinna)
215
+ * - Port to 2.6.29 + fix rmmod not working
217
+ * 2009/04/20 - (Jussi Kivilinna)
218
+ * - Use netdevice feature flags to avoid extra packet handling
219
+ * by core networking layer and possibly increase performance.
221
+ * 2009/09/26 - (Jussi Kivilinna)
222
+ * - Add imq_nf_reinject_lockless to fix deadlock with
223
+ * imq_nf_queue/imq_nf_reinject.
225
+ * 2009/12/08 - (Jussi Kivilinna)
227
+ * - Add check for skb->nf_queue_entry==NULL in imq_dev_xmit()
228
+ * - Also add better error checking for skb->nf_queue_entry usage
230
+ * 2010/02/25 - (Jussi Kivilinna)
233
+ * 2010/08/15 - (Jussi Kivilinna)
235
+ * - Simplify hook registration by using nf_register_hooks.
236
+ * - nf_reinject doesn't need spinlock around it, therefore remove
237
+ * imq_nf_reinject function. Other nf_reinject users protect
238
+ * their own data with spinlock. With IMQ however all data is
239
+ * needed is stored per skbuff, so no locking is needed.
240
+ * - Changed IMQ to use 'separate' NF_IMQ_QUEUE instead of
241
+ * NF_QUEUE, this allows working coexistance of IMQ and other
243
+ * - Make IMQ multi-queue. Number of IMQ device queues can be
244
+ * increased with 'numqueues' module parameters. Default number
245
+ * of queues is 1, in other words by default IMQ works as
246
+ * single-queue device. Multi-queue selection is based on
247
+ * IFB multi-queue patch by Changli Gao <xiaosuo@gmail.com>.
249
+ * 2011/03/18 - (Jussi Kivilinna)
252
+ * 2011/07/12 - (syoder89@gmail.com)
253
+ * - Crash fix that happens when the receiving interface has more
254
+ * than one queue (add missing skb_set_queue_mapping in
255
+ * imq_select_queue).
257
+ * 2011/07/26 - (Jussi Kivilinna)
258
+ * - Add queue mapping checks for packets exiting IMQ.
261
+ * 2011/08/16 - (Jussi Kivilinna)
262
+ * - Clear IFF_TX_SKB_SHARING flag that was added for linux 3.0.2
264
+ * 2011/11/03 - Germano Michel <germanomichel@gmail.com>
265
+ * - Fix IMQ for net namespaces
267
+ * 2011/11/04 - Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
269
+ * - Clean-up, move 'get imq device pointer by imqX name' to
270
+ * separate function from imq_nf_queue().
272
+ * 2012/01/05 - Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
275
+ * 2012/03/19 - Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
278
+ * 2012/12/12 - Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
280
+ * - Fix checkpatch.pl warnings
282
+ * 2013/09/10 - Jussi Kivilinna <jussi.kivilinna@iki.fi>
283
+ * - Fixed GSO handling for 3.10, see imq_nf_queue() for comments.
284
+ * - Don't copy skb->cb_next when copying or cloning skbuffs.
286
+ * 2013/09/16 - Jussi Kivilinna <jussi.kivilinna@iki.fi>
289
+ * 2013/11/12 - Jussi Kivilinna <jussi.kivilinna@iki.fi>
292
+ * 2014/02/07 - Jussi Kivilinna <jussi.kivilinna@iki.fi>
295
+ * Also, many thanks to pablo Sebastian Greco for making the initial
296
+ * patch and to those who helped the testing.
298
+ * More info at: http://www.linuximq.net/ (Andre Correa)
301
+#include <linux/module.h>
302
+#include <linux/kernel.h>
303
+#include <linux/moduleparam.h>
304
+#include <linux/list.h>
305
+#include <linux/skbuff.h>
306
+#include <linux/netdevice.h>
307
+#include <linux/etherdevice.h>
308
+#include <linux/rtnetlink.h>
309
+#include <linux/if_arp.h>
310
+#include <linux/netfilter.h>
311
+#include <linux/netfilter_ipv4.h>
312
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
313
+ #include <linux/netfilter_ipv6.h>
315
+#include <linux/imq.h>
316
+#include <net/pkt_sched.h>
317
+#include <net/netfilter/nf_queue.h>
318
+#include <net/sock.h>
319
+#include <linux/ip.h>
320
+#include <linux/ipv6.h>
321
+#include <linux/if_vlan.h>
322
+#include <linux/if_pppox.h>
324
+#include <net/ipv6.h>
326
+static int imq_nf_queue(struct nf_queue_entry *entry, unsigned queue_num);
328
+static nf_hookfn imq_nf_hook;
330
+static struct nf_hook_ops imq_ops[] = {
332
+ /* imq_ingress_ipv4 */
333
+ .hook = imq_nf_hook,
334
+ .owner = THIS_MODULE,
336
+ .hooknum = NF_INET_PRE_ROUTING,
337
+#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
338
+ .priority = NF_IP_PRI_MANGLE + 1,
340
+ .priority = NF_IP_PRI_NAT_DST + 1,
344
+ /* imq_egress_ipv4 */
345
+ .hook = imq_nf_hook,
346
+ .owner = THIS_MODULE,
348
+ .hooknum = NF_INET_POST_ROUTING,
349
+#if defined(CONFIG_IMQ_BEHAVIOR_AA) || defined(CONFIG_IMQ_BEHAVIOR_BA)
350
+ .priority = NF_IP_PRI_LAST,
352
+ .priority = NF_IP_PRI_NAT_SRC - 1,
355
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
357
+ /* imq_ingress_ipv6 */
358
+ .hook = imq_nf_hook,
359
+ .owner = THIS_MODULE,
361
+ .hooknum = NF_INET_PRE_ROUTING,
362
+#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
363
+ .priority = NF_IP6_PRI_MANGLE + 1,
365
+ .priority = NF_IP6_PRI_NAT_DST + 1,
369
+ /* imq_egress_ipv6 */
370
+ .hook = imq_nf_hook,
371
+ .owner = THIS_MODULE,
373
+ .hooknum = NF_INET_POST_ROUTING,
374
+#if defined(CONFIG_IMQ_BEHAVIOR_AA) || defined(CONFIG_IMQ_BEHAVIOR_BA)
375
+ .priority = NF_IP6_PRI_LAST,
377
+ .priority = NF_IP6_PRI_NAT_SRC - 1,
383
+#if defined(CONFIG_IMQ_NUM_DEVS)
384
+static int numdevs = CONFIG_IMQ_NUM_DEVS;
386
+static int numdevs = IMQ_MAX_DEVS;
389
+static struct net_device *imq_devs_cache[IMQ_MAX_DEVS];
391
+#define IMQ_MAX_QUEUES 32
392
+static int numqueues = 1;
393
+static u32 imq_hashrnd;
395
+static inline __be16 pppoe_proto(const struct sk_buff *skb)
397
+ return *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
398
+ sizeof(struct pppoe_hdr)));
401
+static u16 imq_hash(struct net_device *dev, struct sk_buff *skb)
403
+ unsigned int pull_len;
404
+ u16 protocol = skb->protocol;
416
+ switch (protocol) {
417
+ case htons(ETH_P_8021Q): {
418
+ if (unlikely(skb_pull(skb, VLAN_HLEN) == NULL))
421
+ pull_len += VLAN_HLEN;
422
+ skb->network_header += VLAN_HLEN;
424
+ protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
428
+ case htons(ETH_P_PPP_SES): {
429
+ if (unlikely(skb_pull(skb, PPPOE_SES_HLEN) == NULL))
432
+ pull_len += PPPOE_SES_HLEN;
433
+ skb->network_header += PPPOE_SES_HLEN;
435
+ protocol = pppoe_proto(skb);
439
+ case htons(ETH_P_IP): {
440
+ const struct iphdr *iph = ip_hdr(skb);
442
+ if (unlikely(!pskb_may_pull(skb, sizeof(struct iphdr))))
445
+ addr1 = iph->daddr;
446
+ addr2 = iph->saddr;
448
+ ip_proto = !(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) ?
450
+ ihl = ip_hdrlen(skb);
454
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
455
+ case htons(ETH_P_IPV6): {
456
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
459
+ if (unlikely(!pskb_may_pull(skb, sizeof(struct ipv6hdr))))
462
+ addr1 = iph->daddr.s6_addr32[3];
463
+ addr2 = iph->saddr.s6_addr32[3];
464
+ ihl = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &ip_proto,
466
+ if (unlikely(ihl < 0))
474
+ if (pull_len != 0) {
475
+ skb_push(skb, pull_len);
476
+ skb->network_header -= pull_len;
479
+ return (u16)(ntohs(protocol) % dev->real_num_tx_queues);
483
+ swap(addr1, addr2);
485
+ switch (ip_proto) {
492
+ case IPPROTO_UDPLITE: {
493
+ if (likely(skb_copy_bits(skb, ihl, &ports.in32, 4) >= 0)) {
494
+ if (ports.in16[0] > ports.in16[1])
495
+ swap(ports.in16[0], ports.in16[1]);
505
+ if (pull_len != 0) {
506
+ skb_push(skb, pull_len);
507
+ skb->network_header -= pull_len;
510
+ hash = jhash_3words(addr1, addr2, ports.in32, imq_hashrnd ^ ip_proto);
512
+ return (u16)(((u64)hash * dev->real_num_tx_queues) >> 32);
515
+static inline bool sk_tx_queue_recorded(struct sock *sk)
517
+ return (sk_tx_queue_get(sk) >= 0);
520
+static struct netdev_queue *imq_select_queue(struct net_device *dev,
521
+ struct sk_buff *skb)
523
+ u16 queue_index = 0;
526
+ if (likely(dev->real_num_tx_queues == 1))
529
+ /* IMQ can be receiving ingress or engress packets. */
531
+ /* Check first for if rx_queue is set */
532
+ if (skb_rx_queue_recorded(skb)) {
533
+ queue_index = skb_get_rx_queue(skb);
537
+ /* Check if socket has tx_queue set */
538
+ if (sk_tx_queue_recorded(skb->sk)) {
539
+ queue_index = sk_tx_queue_get(skb->sk);
543
+ /* Try use socket hash */
544
+ if (skb->sk && skb->sk->sk_hash) {
545
+ hash = skb->sk->sk_hash;
547
+ (u16)(((u64)hash * dev->real_num_tx_queues) >> 32);
551
+ /* Generate hash from packet data */
552
+ queue_index = imq_hash(dev, skb);
555
+ if (unlikely(queue_index >= dev->real_num_tx_queues))
556
+ queue_index = (u16)((u32)queue_index % dev->real_num_tx_queues);
558
+ skb_set_queue_mapping(skb, queue_index);
559
+ return netdev_get_tx_queue(dev, queue_index);
562
+static struct net_device_stats *imq_get_stats(struct net_device *dev)
564
+ return &dev->stats;
567
+/* called for packets kfree'd in qdiscs at places other than enqueue */
568
+static void imq_skb_destructor(struct sk_buff *skb)
570
+ struct nf_queue_entry *entry = skb->nf_queue_entry;
572
+ skb->nf_queue_entry = NULL;
575
+ nf_queue_entry_release_refs(entry);
579
+ skb_restore_cb(skb); /* kfree backup */
582
+static void imq_done_check_queue_mapping(struct sk_buff *skb,
583
+ struct net_device *dev)
585
+ unsigned int queue_index;
587
+ /* Don't let queue_mapping be left too large after exiting IMQ */
588
+ if (likely(skb->dev != dev && skb->dev != NULL)) {
589
+ queue_index = skb_get_queue_mapping(skb);
590
+ if (unlikely(queue_index >= skb->dev->real_num_tx_queues)) {
591
+ queue_index = (u16)((u32)queue_index %
592
+ skb->dev->real_num_tx_queues);
593
+ skb_set_queue_mapping(skb, queue_index);
596
+ /* skb->dev was IMQ device itself or NULL, be on safe side and
597
+ * just clear queue mapping.
599
+ skb_set_queue_mapping(skb, 0);
603
+static netdev_tx_t imq_dev_xmit(struct sk_buff *skb, struct net_device *dev)
605
+ struct nf_queue_entry *entry = skb->nf_queue_entry;
607
+ skb->nf_queue_entry = NULL;
608
+ dev->trans_start = jiffies;
610
+ dev->stats.tx_bytes += skb->len;
611
+ dev->stats.tx_packets++;
613
+ if (unlikely(entry == NULL)) {
614
+ /* We don't know what is going on here.. packet is queued for
615
+ * imq device, but (probably) not by us.
617
+ * If this packet was not send here by imq_nf_queue(), then
618
+ * skb_save_cb() was not used and skb_free() should not show:
619
+ * WARNING: IMQ: kfree_skb: skb->cb_next:..
621
+ * WARNING: IMQ: kfree_skb: skb->nf_queue_entry...
623
+ * However if this message is shown, then IMQ is somehow broken
624
+ * and you should report this to linuximq.net.
627
+ /* imq_dev_xmit is black hole that eats all packets, report that
628
+ * we eat this packet happily and increase dropped counters.
631
+ dev->stats.tx_dropped++;
632
+ dev_kfree_skb(skb);
634
+ return NETDEV_TX_OK;
637
+ skb_restore_cb(skb); /* restore skb->cb */
639
+ skb->imq_flags = 0;
640
+ skb->destructor = NULL;
642
+ imq_done_check_queue_mapping(skb, dev);
644
+ nf_reinject(entry, NF_ACCEPT);
646
+ return NETDEV_TX_OK;
649
+static struct net_device *get_imq_device_by_index(int index)
651
+ struct net_device *dev = NULL;
655
+ /* get device by name and cache result */
656
+ snprintf(buf, sizeof(buf), "imq%d", index);
658
+ /* Search device from all namespaces. */
659
+ for_each_net(net) {
660
+ dev = dev_get_by_name(net, buf);
665
+ if (WARN_ON_ONCE(dev == NULL)) {
666
+ /* IMQ device not found. Exotic config? */
667
+ return ERR_PTR(-ENODEV);
670
+ imq_devs_cache[index] = dev;
676
+static struct nf_queue_entry *nf_queue_entry_dup(struct nf_queue_entry *e)
678
+ struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC);
680
+ if (nf_queue_entry_get_refs(entry))
687
+#ifdef CONFIG_BRIDGE_NETFILTER
688
+/* When called from bridge netfilter, skb->data must point to MAC header
689
+ * before calling skb_gso_segment(). Else, original MAC header is lost
690
+ * and segmented skbs will be sent to wrong destination.
692
+static void nf_bridge_adjust_skb_data(struct sk_buff *skb)
694
+ if (skb->nf_bridge)
695
+ __skb_push(skb, skb->network_header - skb->mac_header);
698
+static void nf_bridge_adjust_segmented_data(struct sk_buff *skb)
700
+ if (skb->nf_bridge)
701
+ __skb_pull(skb, skb->network_header - skb->mac_header);
704
+#define nf_bridge_adjust_skb_data(s) do {} while (0)
705
+#define nf_bridge_adjust_segmented_data(s) do {} while (0)
708
+static void free_entry(struct nf_queue_entry *entry)
710
+ nf_queue_entry_release_refs(entry);
714
+static int __imq_nf_queue(struct nf_queue_entry *entry, struct net_device *dev);
716
+static int __imq_nf_queue_gso(struct nf_queue_entry *entry,
717
+ struct net_device *dev, struct sk_buff *skb)
720
+ struct nf_queue_entry *entry_seg;
722
+ nf_bridge_adjust_segmented_data(skb);
724
+ if (skb->next == NULL) { /* last packet, no need to copy entry */
725
+ struct sk_buff *gso_skb = entry->skb;
727
+ ret = __imq_nf_queue(entry, dev);
729
+ entry->skb = gso_skb;
735
+ entry_seg = nf_queue_entry_dup(entry);
737
+ entry_seg->skb = skb;
738
+ ret = __imq_nf_queue(entry_seg, dev);
740
+ free_entry(entry_seg);
745
+static int imq_nf_queue(struct nf_queue_entry *entry, unsigned queue_num)
747
+ struct sk_buff *skb, *segs;
748
+ struct net_device *dev;
749
+ unsigned int queued;
750
+ int index, retval, err;
752
+ index = entry->skb->imq_flags & IMQ_F_IFMASK;
753
+ if (unlikely(index > numdevs - 1)) {
754
+ if (net_ratelimit())
755
+ pr_warn("IMQ: invalid device specified, highest is %u\n",
761
+ /* check for imq device by index from cache */
762
+ dev = imq_devs_cache[index];
763
+ if (unlikely(!dev)) {
764
+ dev = get_imq_device_by_index(index);
766
+ retval = PTR_ERR(dev);
771
+ if (unlikely(!(dev->flags & IFF_UP))) {
772
+ entry->skb->imq_flags = 0;
773
+ retval = -ECANCELED;
777
+ if (!skb_is_gso(entry->skb))
778
+ return __imq_nf_queue(entry, dev);
780
+ /* Since 3.10.x, GSO handling moved here as result of upstream commit
781
+ * a5fedd43d5f6c94c71053a66e4c3d2e35f1731a2 (netfilter: move
782
+ * skb_gso_segment into nfnetlink_queue module).
784
+ * Following code replicates the gso handling from
785
+ * 'net/netfilter/nfnetlink_queue_core.c':nfqnl_enqueue_packet().
790
+ switch (entry->pf) {
792
+ skb->protocol = htons(ETH_P_IP);
795
+ skb->protocol = htons(ETH_P_IPV6);
799
+ nf_bridge_adjust_skb_data(skb);
800
+ segs = skb_gso_segment(skb, 0);
801
+ /* Does not use PTR_ERR to limit the number of error codes that can be
802
+ * returned by nf_queue. For instance, callers rely on -ECANCELED to
803
+ * mean 'ignore this hook'.
811
+ struct sk_buff *nskb = segs->next;
812
+ if (nskb && nskb->next)
813
+ nskb->cb_next = NULL;
815
+ err = __imq_nf_queue_gso(entry, dev, segs);
824
+ if (err) /* some segments are already queued */
831
+ nf_bridge_adjust_segmented_data(skb);
837
+static int __imq_nf_queue(struct nf_queue_entry *entry, struct net_device *dev)
839
+ struct sk_buff *skb_orig, *skb, *skb_shared;
841
+ struct netdev_queue *txq;
842
+ spinlock_t *root_lock;
844
+ int retval = -EINVAL;
845
+ unsigned int orig_queue_index;
847
+ dev->last_rx = jiffies;
852
+ /* skb has owner? => make clone */
853
+ if (unlikely(skb->destructor)) {
855
+ skb = skb_clone(skb, GFP_ATOMIC);
856
+ if (unlikely(!skb)) {
860
+ skb->cb_next = NULL;
864
+ skb->nf_queue_entry = entry;
866
+ dev->stats.rx_bytes += skb->len;
867
+ dev->stats.rx_packets++;
870
+ /* skb->dev == NULL causes problems, try the find cause. */
871
+ if (net_ratelimit()) {
872
+ dev_warn(&dev->dev,
873
+ "received packet with skb->dev == NULL\n");
880
+ /* Disables softirqs for lock below */
881
+ rcu_read_lock_bh();
883
+ /* Multi-queue selection */
884
+ orig_queue_index = skb_get_queue_mapping(skb);
885
+ txq = imq_select_queue(dev, skb);
887
+ q = rcu_dereference(txq->qdisc);
888
+ if (unlikely(!q->enqueue))
889
+ goto packet_not_eaten_by_imq_dev;
891
+ root_lock = qdisc_lock(q);
892
+ spin_lock(root_lock);
894
+ users = atomic_read(&skb->users);
896
+ skb_shared = skb_get(skb); /* increase reference count by one */
898
+ /* backup skb->cb, as qdisc layer will overwrite it */
899
+ skb_save_cb(skb_shared);
900
+ qdisc_enqueue_root(skb_shared, q); /* might kfree_skb */
902
+ if (likely(atomic_read(&skb_shared->users) == users + 1)) {
903
+ kfree_skb(skb_shared); /* decrease reference count by one */
905
+ skb->destructor = &imq_skb_destructor;
908
+ if (unlikely(skb_orig))
909
+ kfree_skb(skb_orig); /* free original */
911
+ spin_unlock(root_lock);
912
+ rcu_read_unlock_bh();
914
+ /* schedule qdisc dequeue */
915
+ __netif_schedule(q);
920
+ skb_restore_cb(skb_shared); /* restore skb->cb */
921
+ skb->nf_queue_entry = NULL;
923
+ * qdisc dropped packet and decreased skb reference count of
924
+ * skb, so we don't really want to and try refree as that would
925
+ * actually destroy the skb.
927
+ spin_unlock(root_lock);
928
+ goto packet_not_eaten_by_imq_dev;
931
+packet_not_eaten_by_imq_dev:
932
+ skb_set_queue_mapping(skb, orig_queue_index);
933
+ rcu_read_unlock_bh();
935
+ /* cloned? restore original */
936
+ if (unlikely(skb_orig)) {
938
+ entry->skb = skb_orig;
945
+static unsigned int imq_nf_hook(const struct nf_hook_ops *hook_ops,
946
+ struct sk_buff *pskb,
947
+ const struct net_device *indev,
948
+ const struct net_device *outdev,
949
+ int (*okfn)(struct sk_buff *))
951
+ return (pskb->imq_flags & IMQ_F_ENQUEUE) ? NF_IMQ_QUEUE : NF_ACCEPT;
954
+static int imq_close(struct net_device *dev)
956
+ netif_stop_queue(dev);
960
+static int imq_open(struct net_device *dev)
962
+ netif_start_queue(dev);
966
+static const struct net_device_ops imq_netdev_ops = {
967
+ .ndo_open = imq_open,
968
+ .ndo_stop = imq_close,
969
+ .ndo_start_xmit = imq_dev_xmit,
970
+ .ndo_get_stats = imq_get_stats,
973
+static void imq_setup(struct net_device *dev)
975
+ dev->netdev_ops = &imq_netdev_ops;
976
+ dev->type = ARPHRD_VOID;
977
+ dev->mtu = 16000; /* too small? */
978
+ dev->tx_queue_len = 11000; /* too big? */
979
+ dev->flags = IFF_NOARP;
980
+ dev->features = NETIF_F_SG | NETIF_F_FRAGLIST |
981
+ NETIF_F_GSO | NETIF_F_HW_CSUM |
983
+ dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE |
984
+ IFF_TX_SKB_SHARING);
987
+static int imq_validate(struct nlattr *tb[], struct nlattr *data[])
991
+ if (tb[IFLA_ADDRESS]) {
992
+ if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) {
996
+ if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) {
997
+ ret = -EADDRNOTAVAIL;
1003
+ pr_warn("IMQ: imq_validate failed (%d)\n", ret);
1007
+static struct rtnl_link_ops imq_link_ops __read_mostly = {
1010
+ .setup = imq_setup,
1011
+ .validate = imq_validate,
1014
+static const struct nf_queue_handler imq_nfqh = {
1015
+ .outfn = imq_nf_queue,
1018
+static int __init imq_init_hooks(void)
1022
+ nf_register_queue_imq_handler(&imq_nfqh);
1024
+ ret = nf_register_hooks(imq_ops, ARRAY_SIZE(imq_ops));
1026
+ nf_unregister_queue_imq_handler();
1031
+static int __init imq_init_one(int index)
1033
+ struct net_device *dev;
1036
+ dev = alloc_netdev_mq(0, "imq%d", imq_setup, numqueues);
1040
+ ret = dev_alloc_name(dev, dev->name);
1044
+ dev->rtnl_link_ops = &imq_link_ops;
1045
+ ret = register_netdevice(dev);
1055
+static int __init imq_init_devs(void)
1059
+ if (numdevs < 1 || numdevs > IMQ_MAX_DEVS) {
1060
+ pr_err("IMQ: numdevs has to be betweed 1 and %u\n",
1065
+ if (numqueues < 1 || numqueues > IMQ_MAX_QUEUES) {
1066
+ pr_err("IMQ: numqueues has to be betweed 1 and %u\n",
1071
+ get_random_bytes(&imq_hashrnd, sizeof(imq_hashrnd));
1074
+ err = __rtnl_link_register(&imq_link_ops);
1076
+ for (i = 0; i < numdevs && !err; i++)
1077
+ err = imq_init_one(i);
1080
+ __rtnl_link_unregister(&imq_link_ops);
1081
+ memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
1088
+static int __init imq_init_module(void)
1092
+#if defined(CONFIG_IMQ_NUM_DEVS)
1093
+ BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS > 16);
1094
+ BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS < 2);
1095
+ BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS - 1 > IMQ_F_IFMASK);
1098
+ err = imq_init_devs();
1100
+ pr_err("IMQ: Error trying imq_init_devs(net)\n");
1104
+ err = imq_init_hooks();
1106
+ pr_err(KERN_ERR "IMQ: Error trying imq_init_hooks()\n");
1107
+ rtnl_link_unregister(&imq_link_ops);
1108
+ memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
1112
+ pr_info("IMQ driver loaded successfully. (numdevs = %d, numqueues = %d)\n",
1113
+ numdevs, numqueues);
1115
+#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
1116
+ pr_info("\tHooking IMQ before NAT on PREROUTING.\n");
1118
+ pr_info("\tHooking IMQ after NAT on PREROUTING.\n");
1120
+#if defined(CONFIG_IMQ_BEHAVIOR_AB) || defined(CONFIG_IMQ_BEHAVIOR_BB)
1121
+ pr_info("\tHooking IMQ before NAT on POSTROUTING.\n");
1123
+ pr_info("\tHooking IMQ after NAT on POSTROUTING.\n");
1129
+static void __exit imq_unhook(void)
1131
+ nf_unregister_hooks(imq_ops, ARRAY_SIZE(imq_ops));
1132
+ nf_unregister_queue_imq_handler();
1135
+static void __exit imq_cleanup_devs(void)
1137
+ rtnl_link_unregister(&imq_link_ops);
1138
+ memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
1141
+static void __exit imq_exit_module(void)
1144
+ imq_cleanup_devs();
1145
+ pr_info("IMQ driver unloaded successfully.\n");
1148
+module_init(imq_init_module);
1149
+module_exit(imq_exit_module);
1151
+module_param(numdevs, int, 0);
1152
+module_param(numqueues, int, 0);
1153
+MODULE_PARM_DESC(numdevs, "number of IMQ devices (how many imq* devices will be created)");
1154
+MODULE_PARM_DESC(numqueues, "number of queues per IMQ device");
1155
+MODULE_AUTHOR("http://www.linuximq.net");
1156
+MODULE_DESCRIPTION("Pseudo-driver for the intermediate queue device. See http://www.linuximq.net/ for more information.");
1157
+MODULE_LICENSE("GPL");
1158
+MODULE_ALIAS_RTNL_LINK("imq");
1160
diff --git a/include/linux/imq.h b/include/linux/imq.h
1161
new file mode 100644
1162
index 0000000..1babb09
1164
+++ b/include/linux/imq.h
1169
+/* IFMASK (16 device indexes, 0 to 15) and flag(s) fit in 5 bits */
1170
+#define IMQ_F_BITS 5
1172
+#define IMQ_F_IFMASK 0x0f
1173
+#define IMQ_F_ENQUEUE 0x10
1175
+#define IMQ_MAX_DEVS (IMQ_F_IFMASK + 1)
1177
+#endif /* _IMQ_H */
1179
diff --git a/include/linux/netfilter/xt_IMQ.h b/include/linux/netfilter/xt_IMQ.h
1180
new file mode 100644
1181
index 0000000..9b07230
1183
+++ b/include/linux/netfilter/xt_IMQ.h
1188
+struct xt_imq_info {
1189
+ unsigned int todev; /* target imq device */
1192
+#endif /* _XT_IMQ_H */
1194
diff --git a/include/linux/netfilter_ipv4/ipt_IMQ.h b/include/linux/netfilter_ipv4/ipt_IMQ.h
1195
new file mode 100644
1196
index 0000000..7af320f
1198
+++ b/include/linux/netfilter_ipv4/ipt_IMQ.h
1203
+/* Backwards compatibility for old userspace */
1204
+#include <linux/netfilter/xt_IMQ.h>
1206
+#define ipt_imq_info xt_imq_info
1208
+#endif /* _IPT_IMQ_H */
1210
diff --git a/include/linux/netfilter_ipv6/ip6t_IMQ.h b/include/linux/netfilter_ipv6/ip6t_IMQ.h
1211
new file mode 100644
1212
index 0000000..198ac01
1214
+++ b/include/linux/netfilter_ipv6/ip6t_IMQ.h
1216
+#ifndef _IP6T_IMQ_H
1217
+#define _IP6T_IMQ_H
1219
+/* Backwards compatibility for old userspace */
1220
+#include <linux/netfilter/xt_IMQ.h>
1222
+#define ip6t_imq_info xt_imq_info
1224
+#endif /* _IP6T_IMQ_H */
1226
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
1227
index ad8f859..8473090 100644
1228
--- a/include/linux/skbuff.h
1229
+++ b/include/linux/skbuff.h
1231
#include <linux/dma-mapping.h>
1232
#include <linux/netdev_features.h>
1233
#include <net/flow_keys.h>
1234
+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1235
+#include <linux/imq.h>
1238
/* A. Checksumming of received packets by device.
1240
@@ -441,6 +444,9 @@ struct sk_buff {
1241
* first. This is owned by whoever has the skb queued ATM.
1243
char cb[48] __aligned(8);
1244
+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1248
unsigned long _skb_refdst;
1250
@@ -476,6 +482,9 @@ struct sk_buff {
1251
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
1252
struct nf_conntrack *nfct;
1254
+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1255
+ struct nf_queue_entry *nf_queue_entry;
1257
#ifdef CONFIG_BRIDGE_NETFILTER
1258
struct nf_bridge_info *nf_bridge;
1260
@@ -513,6 +522,9 @@ struct sk_buff {
1262
__u8 encapsulation:1;
1263
/* 6/8 bit hole (depending on ndisc_nodetype presence) */
1264
+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1265
+ __u8 imq_flags:IMQ_F_BITS;
1267
kmemcheck_bitfield_end(flags2);
1269
#if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL
1270
@@ -653,6 +665,12 @@ void kfree_skb_list(struct sk_buff *segs);
1271
void skb_tx_error(struct sk_buff *skb);
1272
void consume_skb(struct sk_buff *skb);
1273
void __kfree_skb(struct sk_buff *skb);
1275
+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1276
+int skb_save_cb(struct sk_buff *skb);
1277
+int skb_restore_cb(struct sk_buff *skb);
1280
extern struct kmem_cache *skbuff_head_cache;
1282
void kfree_skb_partial(struct sk_buff *skb, bool head_stolen);
1283
@@ -2739,6 +2757,10 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src)
1284
nf_conntrack_get(src->nfct);
1285
dst->nfctinfo = src->nfctinfo;
1287
+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1288
+ dst->imq_flags = src->imq_flags;
1289
+ dst->nf_queue_entry = src->nf_queue_entry;
1291
#ifdef CONFIG_BRIDGE_NETFILTER
1292
dst->nf_bridge = src->nf_bridge;
1293
nf_bridge_get(src->nf_bridge);
1294
diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
1295
index 84a53d7..6ffb593 100644
1296
--- a/include/net/netfilter/nf_queue.h
1297
+++ b/include/net/netfilter/nf_queue.h
1298
@@ -33,6 +33,12 @@ struct nf_queue_handler {
1299
void nf_register_queue_handler(const struct nf_queue_handler *qh);
1300
void nf_unregister_queue_handler(void);
1301
void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict);
1302
+void nf_queue_entry_release_refs(struct nf_queue_entry *entry);
1304
+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1305
+void nf_register_queue_imq_handler(const struct nf_queue_handler *qh);
1306
+void nf_unregister_queue_imq_handler(void);
1309
bool nf_queue_entry_get_refs(struct nf_queue_entry *entry);
1310
void nf_queue_entry_release_refs(struct nf_queue_entry *entry);
1311
diff --git a/include/uapi/linux/netfilter.h b/include/uapi/linux/netfilter.h
1312
index ef1b1f8..079e5ff 100644
1313
--- a/include/uapi/linux/netfilter.h
1314
+++ b/include/uapi/linux/netfilter.h
1319
-#define NF_MAX_VERDICT NF_STOP
1320
+#define NF_IMQ_QUEUE 6
1321
+#define NF_MAX_VERDICT NF_IMQ_QUEUE
1323
/* we overload the higher bits for encoding auxiliary data such as the queue
1324
* number or errno values. Not nice, but better than additional function
1325
diff --git a/net/core/dev.c b/net/core/dev.c
1326
index 3ed11a5..fd62030 100644
1327
--- a/net/core/dev.c
1328
+++ b/net/core/dev.c
1330
#include <linux/hashtable.h>
1331
#include <linux/vmalloc.h>
1332
#include <linux/if_macvlan.h>
1333
+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1334
+#include <linux/imq.h>
1337
#include "net-sysfs.h"
1339
@@ -2611,7 +2614,12 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1343
+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1344
+ if (!list_empty(&ptype_all) &&
1345
+ !(skb->imq_flags & IMQ_F_ENQUEUE))
1347
if (!list_empty(&ptype_all))
1349
dev_queue_xmit_nit(skb, dev);
1352
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
1353
index baf6fc4..7d30d78 100644
1354
--- a/net/core/skbuff.c
1355
+++ b/net/core/skbuff.c
1358
struct kmem_cache *skbuff_head_cache __read_mostly;
1359
static struct kmem_cache *skbuff_fclone_cache __read_mostly;
1360
+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1361
+static struct kmem_cache *skbuff_cb_store_cache __read_mostly;
1364
+static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
1365
+ struct pipe_buffer *buf)
1367
+ put_page(buf->page);
1370
+static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
1371
+ struct pipe_buffer *buf)
1373
+ get_page(buf->page);
1376
+static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
1377
+ struct pipe_buffer *buf)
1382
+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1383
+/* Control buffer save/restore for IMQ devices */
1384
+struct skb_cb_table {
1385
+ char cb[48] __aligned(8);
1390
+static DEFINE_SPINLOCK(skb_cb_store_lock);
1392
+int skb_save_cb(struct sk_buff *skb)
1394
+ struct skb_cb_table *next;
1396
+ next = kmem_cache_alloc(skbuff_cb_store_cache, GFP_ATOMIC);
1400
+ BUILD_BUG_ON(sizeof(skb->cb) != sizeof(next->cb));
1402
+ memcpy(next->cb, skb->cb, sizeof(skb->cb));
1403
+ next->cb_next = skb->cb_next;
1405
+ atomic_set(&next->refcnt, 1);
1407
+ skb->cb_next = next;
1410
+EXPORT_SYMBOL(skb_save_cb);
1412
+int skb_restore_cb(struct sk_buff *skb)
1414
+ struct skb_cb_table *next;
1416
+ if (!skb->cb_next)
1419
+ next = skb->cb_next;
1421
+ BUILD_BUG_ON(sizeof(skb->cb) != sizeof(next->cb));
1423
+ memcpy(skb->cb, next->cb, sizeof(skb->cb));
1424
+ skb->cb_next = next->cb_next;
1426
+ spin_lock(&skb_cb_store_lock);
1428
+ if (atomic_dec_and_test(&next->refcnt))
1429
+ kmem_cache_free(skbuff_cb_store_cache, next);
1431
+ spin_unlock(&skb_cb_store_lock);
1435
+EXPORT_SYMBOL(skb_restore_cb);
1437
+static void skb_copy_stored_cb(struct sk_buff *new, const struct sk_buff *__old)
1439
+ struct skb_cb_table *next;
1440
+ struct sk_buff *old;
1442
+ if (!__old->cb_next) {
1443
+ new->cb_next = NULL;
1447
+ spin_lock(&skb_cb_store_lock);
1449
+ old = (struct sk_buff *)__old;
1451
+ next = old->cb_next;
1452
+ atomic_inc(&next->refcnt);
1453
+ new->cb_next = next;
1455
+ spin_unlock(&skb_cb_store_lock);
1459
+/* Pipe buffer operations for a socket. */
1460
+static const struct pipe_buf_operations sock_pipe_buf_ops = {
1462
+ .map = generic_pipe_buf_map,
1463
+ .unmap = generic_pipe_buf_unmap,
1464
+ .confirm = generic_pipe_buf_confirm,
1465
+ .release = sock_pipe_buf_release,
1466
+ .steal = sock_pipe_buf_steal,
1467
+ .get = sock_pipe_buf_get,
1471
* skb_panic - private function for out-of-line support
1472
@@ -563,6 +672,28 @@ static void skb_release_head_state(struct sk_buff *skb)
1474
skb->destructor(skb);
1476
+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1478
+ * This should not happen. When it does, avoid memleak by restoring
1479
+ * the chain of cb-backups.
1481
+ while (skb->cb_next != NULL) {
1482
+ if (net_ratelimit())
1483
+ pr_warn("IMQ: kfree_skb: skb->cb_next: %08x\n",
1484
+ (unsigned int)skb->cb_next);
1486
+ skb_restore_cb(skb);
1489
+ * This should not happen either, nf_queue_entry is nullified in
1490
+ * imq_dev_xmit(). If we have non-NULL nf_queue_entry then we are
1491
+ * leaking entry pointers, maybe memory. We don't know if this is
1492
+ * pointer to already freed memory, or should this be freed.
1493
+ * If this happens we need to add refcounting, etc for nf_queue_entry.
1495
+ if (skb->nf_queue_entry && net_ratelimit())
1496
+ pr_warn("%s\n", "IMQ: kfree_skb: skb->nf_queue_entry != NULL");
1498
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
1499
nf_conntrack_put(skb->nfct);
1501
@@ -694,6 +825,10 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
1502
new->sp = secpath_get(old->sp);
1504
memcpy(new->cb, old->cb, sizeof(old->cb));
1505
+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1506
+ new->cb_next = NULL;
1507
+ /*skb_copy_stored_cb(new, old);*/
1509
new->csum = old->csum;
1510
new->local_df = old->local_df;
1511
new->pkt_type = old->pkt_type;
1512
@@ -3233,6 +3368,13 @@ void __init skb_init(void)
1514
SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1516
+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1517
+ skbuff_cb_store_cache = kmem_cache_create("skbuff_cb_store_cache",
1518
+ sizeof(struct skb_cb_table),
1520
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1526
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
1527
index 12f7ef0..deb1c9d 100644
1528
--- a/net/ipv6/ip6_output.c
1529
+++ b/net/ipv6/ip6_output.c
1530
@@ -64,9 +64,6 @@ static int ip6_finish_output2(struct sk_buff *skb)
1531
struct in6_addr *nexthop;
1534
- skb->protocol = htons(ETH_P_IPV6);
1537
if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
1538
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1540
@@ -143,6 +140,13 @@ int ip6_output(struct sk_buff *skb)
1545
+ * IMQ-patch: moved setting skb->dev and skb->protocol from
1546
+ * ip6_finish_output2 to fix crashing at netif_skb_features().
1548
+ skb->protocol = htons(ETH_P_IPV6);
1551
return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
1553
!(IP6CB(skb)->flags & IP6SKB_REROUTED));
1554
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
1555
index e9410d1..ba801d5 100644
1556
--- a/net/netfilter/Kconfig
1557
+++ b/net/netfilter/Kconfig
1558
@@ -751,6 +751,18 @@ config NETFILTER_XT_TARGET_LOG
1560
To compile it as a module, choose M here. If unsure, say N.
1562
+config NETFILTER_XT_TARGET_IMQ
1563
+ tristate '"IMQ" target support'
1564
+ depends on NETFILTER_XTABLES
1565
+ depends on IP_NF_MANGLE || IP6_NF_MANGLE
1567
+ default m if NETFILTER_ADVANCED=n
1569
+ This option adds a `IMQ' target which is used to specify if and
1570
+ to which imq device packets should get enqueued/dequeued.
1572
+ To compile it as a module, choose M here. If unsure, say N.
1574
config NETFILTER_XT_TARGET_MARK
1575
tristate '"MARK" target support'
1576
depends on NETFILTER_ADVANCED
1577
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
1578
index bffdad7..050e613 100644
1579
--- a/net/netfilter/Makefile
1580
+++ b/net/netfilter/Makefile
1581
@@ -103,6 +103,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
1582
obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
1583
obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o
1584
obj-$(CONFIG_NETFILTER_XT_TARGET_HMARK) += xt_HMARK.o
1585
+obj-$(CONFIG_NETFILTER_XT_TARGET_IMQ) += xt_IMQ.o
1586
obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
1587
obj-$(CONFIG_NETFILTER_XT_TARGET_LOG) += xt_LOG.o
1588
obj-$(CONFIG_NETFILTER_XT_TARGET_NETMAP) += xt_NETMAP.o
1589
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
1590
index 1fbab0c..4493417 100644
1591
--- a/net/netfilter/core.c
1592
+++ b/net/netfilter/core.c
1593
@@ -191,9 +191,11 @@ next_hook:
1594
ret = NF_DROP_GETERR(verdict);
1597
- } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
1598
+ } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE ||
1599
+ (verdict & NF_VERDICT_MASK) == NF_IMQ_QUEUE) {
1600
int err = nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
1601
- verdict >> NF_VERDICT_QBITS);
1602
+ verdict >> NF_VERDICT_QBITS,
1603
+ verdict & NF_VERDICT_MASK);
1605
if (err == -ECANCELED)
1607
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
1608
index 61a3c92..5388a0e 100644
1609
--- a/net/netfilter/nf_internals.h
1610
+++ b/net/netfilter/nf_internals.h
1611
@@ -23,7 +23,7 @@ unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb,
1612
int nf_queue(struct sk_buff *skb, struct nf_hook_ops *elem, u_int8_t pf,
1613
unsigned int hook, struct net_device *indev,
1614
struct net_device *outdev, int (*okfn)(struct sk_buff *),
1615
- unsigned int queuenum);
1616
+ unsigned int queuenum, unsigned int queuetype);
1617
int __init netfilter_queue_init(void);
1620
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
1621
index 5d24b1f..28317dc 100644
1622
--- a/net/netfilter/nf_queue.c
1623
+++ b/net/netfilter/nf_queue.c
1626
static const struct nf_queue_handler __rcu *queue_handler __read_mostly;
1628
+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1629
+static const struct nf_queue_handler __rcu *queue_imq_handler __read_mostly;
1631
+void nf_register_queue_imq_handler(const struct nf_queue_handler *qh)
1633
+ rcu_assign_pointer(queue_imq_handler, qh);
1635
+EXPORT_SYMBOL_GPL(nf_register_queue_imq_handler);
1637
+void nf_unregister_queue_imq_handler(void)
1639
+ RCU_INIT_POINTER(queue_imq_handler, NULL);
1640
+ synchronize_rcu();
1642
+EXPORT_SYMBOL_GPL(nf_unregister_queue_imq_handler);
1645
/* return EBUSY when somebody else is registered, return EEXIST if the
1646
* same handler is registered, return 0 in case of success. */
1647
void nf_register_queue_handler(const struct nf_queue_handler *qh)
1648
@@ -105,7 +122,8 @@ int nf_queue(struct sk_buff *skb,
1649
struct net_device *indev,
1650
struct net_device *outdev,
1651
int (*okfn)(struct sk_buff *),
1652
- unsigned int queuenum)
1653
+ unsigned int queuenum,
1654
+ unsigned int queuetype)
1656
int status = -ENOENT;
1657
struct nf_queue_entry *entry = NULL;
1658
@@ -115,7 +133,17 @@ int nf_queue(struct sk_buff *skb,
1659
/* QUEUE == DROP if no one is waiting, to be safe. */
1662
- qh = rcu_dereference(queue_handler);
1663
+ if (queuetype == NF_IMQ_QUEUE) {
1664
+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1665
+ qh = rcu_dereference(queue_imq_handler);
1671
+ qh = rcu_dereference(queue_handler);
1677
@@ -205,9 +233,11 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
1681
+ case NF_IMQ_QUEUE:
1682
err = nf_queue(skb, elem, entry->pf, entry->hook,
1683
entry->indev, entry->outdev, entry->okfn,
1684
- verdict >> NF_VERDICT_QBITS);
1685
+ verdict >> NF_VERDICT_QBITS,
1686
+ verdict & NF_VERDICT_MASK);
1688
if (err == -ECANCELED)
1690
diff --git a/net/netfilter/xt_IMQ.c b/net/netfilter/xt_IMQ.c
1691
new file mode 100644
1692
index 0000000..1c3cd66
1694
+++ b/net/netfilter/xt_IMQ.c
1697
+ * This target marks packets to be enqueued to an imq device
1699
+#include <linux/module.h>
1700
+#include <linux/skbuff.h>
1701
+#include <linux/netfilter/x_tables.h>
1702
+#include <linux/netfilter/xt_IMQ.h>
1703
+#include <linux/imq.h>
1705
+static unsigned int imq_target(struct sk_buff *pskb,
1706
+ const struct xt_action_param *par)
1708
+ const struct xt_imq_info *mr = par->targinfo;
1710
+ pskb->imq_flags = (mr->todev & IMQ_F_IFMASK) | IMQ_F_ENQUEUE;
1712
+ return XT_CONTINUE;
1715
+static int imq_checkentry(const struct xt_tgchk_param *par)
1717
+ struct xt_imq_info *mr = par->targinfo;
1719
+ if (mr->todev > IMQ_MAX_DEVS - 1) {
1720
+ pr_warn("IMQ: invalid device specified, highest is %u\n",
1721
+ IMQ_MAX_DEVS - 1);
1728
+static struct xt_target xt_imq_reg[] __read_mostly = {
1731
+ .family = AF_INET,
1732
+ .checkentry = imq_checkentry,
1733
+ .target = imq_target,
1734
+ .targetsize = sizeof(struct xt_imq_info),
1735
+ .table = "mangle",
1740
+ .family = AF_INET6,
1741
+ .checkentry = imq_checkentry,
1742
+ .target = imq_target,
1743
+ .targetsize = sizeof(struct xt_imq_info),
1744
+ .table = "mangle",
1749
+static int __init imq_init(void)
1751
+ return xt_register_targets(xt_imq_reg, ARRAY_SIZE(xt_imq_reg));
1754
+static void __exit imq_fini(void)
1756
+ xt_unregister_targets(xt_imq_reg, ARRAY_SIZE(xt_imq_reg));
1759
+module_init(imq_init);
1760
+module_exit(imq_fini);
1762
+MODULE_AUTHOR("http://www.linuximq.net");
1763
+MODULE_DESCRIPTION("Pseudo-driver for the intermediate queue device. See http://www.linuximq.net/ for more information.");
1764
+MODULE_LICENSE("GPL");
1765
+MODULE_ALIAS("ipt_IMQ");
1766
+MODULE_ALIAS("ip6t_IMQ");