Skip to content

Commit 5182d6d

Browse files
Florian Westphalgregkh
authored andcommitted
netfilter: nf_nat_masquerade: make async masq_inet6_event handling generic
[ Upstream commit 30db406 ] masq_inet6_event is called asynchronously from system work queue, because the inet6 notifier is atomic and nf_iterate_cleanup can sleep. The ipv4 and device notifiers call nf_iterate_cleanup directly. This is legal, but these notifiers are called with RTNL mutex held. A large conntrack table with many devices coming and going will have severe impact on the system usability, with 'ip a' blocking for several seconds. This change places the defer code into a helper and makes it more generic so ipv4 and ifdown notifiers can be converted to defer the cleanup walk as well in a follow patch. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> Signed-off-by: Sasha Levin <sashal@kernel.org>
1 parent bcb647c commit 5182d6d

1 file changed

Lines changed: 75 additions & 47 deletions

File tree

net/netfilter/nf_nat_masquerade.c

Lines changed: 75 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,19 @@
99

1010
#include <net/netfilter/nf_nat_masquerade.h>
1111

12+
struct masq_dev_work {
13+
struct work_struct work;
14+
struct net *net;
15+
union nf_inet_addr addr;
16+
int ifindex;
17+
int (*iter)(struct nf_conn *i, void *data);
18+
};
19+
20+
#define MAX_MASQ_WORKER_COUNT 16
21+
1222
static DEFINE_MUTEX(masq_mutex);
1323
static unsigned int masq_refcnt __read_mostly;
24+
static atomic_t masq_worker_count __read_mostly;
1425

1526
unsigned int
1627
nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
@@ -63,6 +74,63 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
6374
}
6475
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4);
6576

77+
static void iterate_cleanup_work(struct work_struct *work)
78+
{
79+
struct masq_dev_work *w;
80+
81+
w = container_of(work, struct masq_dev_work, work);
82+
83+
nf_ct_iterate_cleanup_net(w->net, w->iter, (void *)w, 0, 0);
84+
85+
put_net(w->net);
86+
kfree(w);
87+
atomic_dec(&masq_worker_count);
88+
module_put(THIS_MODULE);
89+
}
90+
91+
/* Iterate conntrack table in the background and remove conntrack entries
92+
* that use the device/address being removed.
93+
*
94+
* In case too many work items have been queued already or memory allocation
95+
* fails iteration is skipped, conntrack entries will time out eventually.
96+
*/
97+
static void nf_nat_masq_schedule(struct net *net, union nf_inet_addr *addr,
98+
int ifindex,
99+
int (*iter)(struct nf_conn *i, void *data),
100+
gfp_t gfp_flags)
101+
{
102+
struct masq_dev_work *w;
103+
104+
if (atomic_read(&masq_worker_count) > MAX_MASQ_WORKER_COUNT)
105+
return;
106+
107+
net = maybe_get_net(net);
108+
if (!net)
109+
return;
110+
111+
if (!try_module_get(THIS_MODULE))
112+
goto err_module;
113+
114+
w = kzalloc(sizeof(*w), gfp_flags);
115+
if (w) {
116+
/* We can overshoot MAX_MASQ_WORKER_COUNT, no big deal */
117+
atomic_inc(&masq_worker_count);
118+
119+
INIT_WORK(&w->work, iterate_cleanup_work);
120+
w->ifindex = ifindex;
121+
w->net = net;
122+
w->iter = iter;
123+
if (addr)
124+
w->addr = *addr;
125+
schedule_work(&w->work);
126+
return;
127+
}
128+
129+
module_put(THIS_MODULE);
130+
err_module:
131+
put_net(net);
132+
}
133+
66134
static int device_cmp(struct nf_conn *i, void *ifindex)
67135
{
68136
const struct nf_conn_nat *nat = nfct_nat(i);
@@ -136,8 +204,6 @@ static struct notifier_block masq_inet_notifier = {
136204
};
137205

138206
#if IS_ENABLED(CONFIG_IPV6)
139-
static atomic_t v6_worker_count __read_mostly;
140-
141207
static int
142208
nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev,
143209
const struct in6_addr *daddr, unsigned int srcprefs,
@@ -187,13 +253,6 @@ nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
187253
}
188254
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6);
189255

190-
struct masq_dev_work {
191-
struct work_struct work;
192-
struct net *net;
193-
struct in6_addr addr;
194-
int ifindex;
195-
};
196-
197256
static int inet6_cmp(struct nf_conn *ct, void *work)
198257
{
199258
struct masq_dev_work *w = (struct masq_dev_work *)work;
@@ -204,21 +263,7 @@ static int inet6_cmp(struct nf_conn *ct, void *work)
204263

205264
tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
206265

207-
return ipv6_addr_equal(&w->addr, &tuple->dst.u3.in6);
208-
}
209-
210-
static void iterate_cleanup_work(struct work_struct *work)
211-
{
212-
struct masq_dev_work *w;
213-
214-
w = container_of(work, struct masq_dev_work, work);
215-
216-
nf_ct_iterate_cleanup_net(w->net, inet6_cmp, (void *)w, 0, 0);
217-
218-
put_net(w->net);
219-
kfree(w);
220-
atomic_dec(&v6_worker_count);
221-
module_put(THIS_MODULE);
266+
return nf_inet_addr_cmp(&w->addr, &tuple->dst.u3);
222267
}
223268

224269
/* atomic notifier; can't call nf_ct_iterate_cleanup_net (it can sleep).
@@ -233,36 +278,19 @@ static int masq_inet6_event(struct notifier_block *this,
233278
{
234279
struct inet6_ifaddr *ifa = ptr;
235280
const struct net_device *dev;
236-
struct masq_dev_work *w;
237-
struct net *net;
281+
union nf_inet_addr addr;
238282

239-
if (event != NETDEV_DOWN || atomic_read(&v6_worker_count) >= 16)
283+
if (event != NETDEV_DOWN)
240284
return NOTIFY_DONE;
241285

242286
dev = ifa->idev->dev;
243-
net = maybe_get_net(dev_net(dev));
244-
if (!net)
245-
return NOTIFY_DONE;
246287

247-
if (!try_module_get(THIS_MODULE))
248-
goto err_module;
288+
memset(&addr, 0, sizeof(addr));
249289

250-
w = kmalloc(sizeof(*w), GFP_ATOMIC);
251-
if (w) {
252-
atomic_inc(&v6_worker_count);
290+
addr.in6 = ifa->addr;
253291

254-
INIT_WORK(&w->work, iterate_cleanup_work);
255-
w->ifindex = dev->ifindex;
256-
w->net = net;
257-
w->addr = ifa->addr;
258-
schedule_work(&w->work);
259-
260-
return NOTIFY_DONE;
261-
}
262-
263-
module_put(THIS_MODULE);
264-
err_module:
265-
put_net(net);
292+
nf_nat_masq_schedule(dev_net(dev), &addr, dev->ifindex, inet6_cmp,
293+
GFP_ATOMIC);
266294
return NOTIFY_DONE;
267295
}
268296

0 commit comments

Comments
 (0)