2
* ip_vs_nfct.c: Netfilter connection tracking support for IPVS
4
* Portions Copyright (C) 2001-2002
5
* Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland.
7
* Portions Copyright (C) 2003-2010
11
* This code is free software; you can redistribute it and/or modify
12
* it under the terms of the GNU General Public License as published by
13
* the Free Software Foundation; either version 2 of the License, or
14
* (at your option) any later version.
16
* This program is distributed in the hope that it will be useful,
17
* but WITHOUT ANY WARRANTY; without even the implied warranty of
18
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
* GNU General Public License for more details.
21
* You should have received a copy of the GNU General Public License
22
* along with this program; if not, write to the Free Software
23
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27
* Ben North <ben@redfrontdoor.org>
28
* Julian Anastasov <ja@ssi.bg> Reorganize and sync with latest kernels
29
* Hannes Eder <heder@google.com> Extend NFCT support for FTP, ipvs match
34
* - provide conntrack confirmation for new and related connections, by
35
* this way we can see their proper conntrack state in all hooks
36
* - support for all forwarding methods, not only NAT
37
* - FTP support (NAT), ability to support other NAT apps with expectations
38
* - to correctly create expectations for related NAT connections the proper
39
* NF conntrack support must be already installed, eg. ip_vs_ftp requires
40
* nf_conntrack_ftp ... iptables_nat for the same ports (but no iptables
41
* NAT rules are needed)
42
* - alter reply for NAT when forwarding packet in original direction:
43
* conntrack from client in NEW or RELATED (Passive FTP DATA) state or
44
* when RELATED conntrack is created from real server (Active FTP DATA)
45
* - if iptables_nat is not loaded the Passive FTP will not work (the
46
* PASV response can not be NAT-ed) but Active FTP should work
50
#define KMSG_COMPONENT "IPVS"
51
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
53
#include <linux/module.h>
54
#include <linux/types.h>
55
#include <linux/kernel.h>
56
#include <linux/errno.h>
57
#include <linux/compiler.h>
58
#include <linux/vmalloc.h>
59
#include <linux/skbuff.h>
61
#include <linux/netfilter.h>
62
#include <linux/netfilter_ipv4.h>
63
#include <net/ip_vs.h>
64
#include <net/netfilter/nf_conntrack_core.h>
65
#include <net/netfilter/nf_conntrack_expect.h>
66
#include <net/netfilter/nf_conntrack_helper.h>
67
#include <net/netfilter/nf_conntrack_zones.h>
70
#define FMT_TUPLE "%pI4:%u->%pI4:%u/%u"
71
#define ARG_TUPLE(T) &(T)->src.u3.ip, ntohs((T)->src.u.all), \
72
&(T)->dst.u3.ip, ntohs((T)->dst.u.all), \
75
#define FMT_CONN "%pI4:%u->%pI4:%u->%pI4:%u/%u:%u"
76
#define ARG_CONN(C) &((C)->caddr.ip), ntohs((C)->cport), \
77
&((C)->vaddr.ip), ntohs((C)->vport), \
78
&((C)->daddr.ip), ntohs((C)->dport), \
79
(C)->protocol, (C)->state
82
ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin)
84
enum ip_conntrack_info ctinfo;
85
struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
86
struct nf_conntrack_tuple new_tuple;
88
if (ct == NULL || nf_ct_is_confirmed(ct) || nf_ct_is_untracked(ct) ||
92
/* Never alter conntrack for non-NAT conns */
93
if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
96
/* Alter reply only in original direction */
97
if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
101
* The connection is not yet in the hashtable, so we update it.
102
* CIP->VIP will remain the same, so leave the tuple in
103
* IP_CT_DIR_ORIGINAL untouched. When the reply comes back from the
104
* real-server we will see RIP->DIP.
106
new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
108
* This will also take care of UDP and other protocols.
111
new_tuple.src.u3 = cp->daddr;
112
if (new_tuple.dst.protonum != IPPROTO_ICMP &&
113
new_tuple.dst.protonum != IPPROTO_ICMPV6)
114
new_tuple.src.u.tcp.port = cp->dport;
116
new_tuple.dst.u3 = cp->vaddr;
117
if (new_tuple.dst.protonum != IPPROTO_ICMP &&
118
new_tuple.dst.protonum != IPPROTO_ICMPV6)
119
new_tuple.dst.u.tcp.port = cp->vport;
121
IP_VS_DBG(7, "%s: Updating conntrack ct=%p, status=0x%lX, "
122
"ctinfo=%d, old reply=" FMT_TUPLE
123
", new reply=" FMT_TUPLE ", cp=" FMT_CONN "\n",
124
__func__, ct, ct->status, ctinfo,
125
ARG_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple),
126
ARG_TUPLE(&new_tuple), ARG_CONN(cp));
127
nf_conntrack_alter_reply(ct, &new_tuple);
130
int ip_vs_confirm_conntrack(struct sk_buff *skb)
132
return nf_conntrack_confirm(skb);
136
* Called from init_conntrack() as expectfn handler.
138
static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
139
struct nf_conntrack_expect *exp)
141
struct nf_conntrack_tuple *orig, new_reply;
142
struct ip_vs_conn *cp;
143
struct ip_vs_conn_param p;
144
struct net *net = nf_ct_net(ct);
146
if (exp->tuple.src.l3num != PF_INET)
150
* We assume that no NF locks are held before this callback.
151
* ip_vs_conn_out_get and ip_vs_conn_in_get should match their
152
* expectations even if they use wildcard values, now we provide the
153
* actual values from the newly created original conntrack direction.
154
* The conntrack is confirmed when packet reaches IPVS hooks.
158
orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
159
ip_vs_conn_fill_param(net, exp->tuple.src.l3num, orig->dst.protonum,
160
&orig->src.u3, orig->src.u.tcp.port,
161
&orig->dst.u3, orig->dst.u.tcp.port, &p);
162
cp = ip_vs_conn_out_get(&p);
164
/* Change reply CLIENT->RS to CLIENT->VS */
165
new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
166
IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", "
167
FMT_TUPLE ", found inout cp=" FMT_CONN "\n",
168
__func__, ct, ct->status,
169
ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
171
new_reply.dst.u3 = cp->vaddr;
172
new_reply.dst.u.tcp.port = cp->vport;
173
IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
174
", inout cp=" FMT_CONN "\n",
176
ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
182
cp = ip_vs_conn_in_get(&p);
184
/* Change reply VS->CLIENT to RS->CLIENT */
185
new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
186
IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", "
187
FMT_TUPLE ", found outin cp=" FMT_CONN "\n",
188
__func__, ct, ct->status,
189
ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
191
new_reply.src.u3 = cp->daddr;
192
new_reply.src.u.tcp.port = cp->dport;
193
IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", "
194
FMT_TUPLE ", outin cp=" FMT_CONN "\n",
196
ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
201
IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuple=" FMT_TUPLE
202
" - unknown expect\n",
203
__func__, ct, ct->status, ARG_TUPLE(orig));
207
/* Never alter conntrack for non-NAT conns */
208
if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ)
209
nf_conntrack_alter_reply(ct, &new_reply);
215
* Create NF conntrack expectation with wildcard (optional) source port.
216
* Then the default callback function will alter the reply and will confirm
217
* the conntrack entry when the first packet comes.
218
* Use port 0 to expect connection from any port.
220
void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct,
221
struct ip_vs_conn *cp, u_int8_t proto,
222
const __be16 port, int from_rs)
224
struct nf_conntrack_expect *exp;
226
if (ct == NULL || nf_ct_is_untracked(ct))
229
exp = nf_ct_expect_alloc(ct);
233
nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
234
from_rs ? &cp->daddr : &cp->caddr,
235
from_rs ? &cp->caddr : &cp->vaddr,
236
proto, port ? &port : NULL,
237
from_rs ? &cp->cport : &cp->vport);
239
exp->expectfn = ip_vs_nfct_expect_callback;
241
IP_VS_DBG(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n",
242
__func__, ct, ARG_TUPLE(&exp->tuple));
243
nf_ct_expect_related(exp);
244
nf_ct_expect_put(exp);
246
EXPORT_SYMBOL(ip_vs_nfct_expect_related);
249
* Our connection was terminated, try to drop the conntrack immediately
251
void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
253
struct nf_conntrack_tuple_hash *h;
255
struct nf_conntrack_tuple tuple;
260
tuple = (struct nf_conntrack_tuple) {
261
.dst = { .protonum = cp->protocol, .dir = IP_CT_DIR_ORIGINAL } };
262
tuple.src.u3 = cp->caddr;
263
tuple.src.u.all = cp->cport;
264
tuple.src.l3num = cp->af;
265
tuple.dst.u3 = cp->vaddr;
266
tuple.dst.u.all = cp->vport;
268
IP_VS_DBG(7, "%s: dropping conntrack with tuple=" FMT_TUPLE
269
" for conn " FMT_CONN "\n",
270
__func__, ARG_TUPLE(&tuple), ARG_CONN(cp));
272
h = nf_conntrack_find_get(ip_vs_conn_net(cp), NF_CT_DEFAULT_ZONE,
275
ct = nf_ct_tuplehash_to_ctrack(h);
276
/* Show what happens instead of calling nf_ct_kill() */
277
if (del_timer(&ct->timeout)) {
278
IP_VS_DBG(7, "%s: ct=%p, deleted conntrack timer for tuple="
280
__func__, ct, ARG_TUPLE(&tuple));
281
if (ct->timeout.function)
282
ct->timeout.function(ct->timeout.data);
284
IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple="
286
__func__, ct, ARG_TUPLE(&tuple));
290
IP_VS_DBG(7, "%s: no conntrack for tuple=" FMT_TUPLE "\n",
291
__func__, ARG_TUPLE(&tuple));