From 1397af5bfd7d32b0cf2adb70a78c9a9e8f11d912 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 11 Apr 2022 13:01:18 +0200 Subject: netfilter: conntrack: remove the percpu dying list Its no longer needed. Entries that need event redelivery are placed on the new pernet dying list. The advantage is that there is no need to take additional spinlock on conntrack removal unless event redelivery failed or the conntrack entry was never added to the table in the first place (confirmed bit not set). The IPS_CONFIRMED bit now needs to be set as soon as the entry has been unlinked from the unconfirmed list, else the destroy function may attempt to unlink it a second time. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netns/conntrack.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net/netns') diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 0294f3d473af..e985a3010b89 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -96,7 +96,6 @@ struct nf_ip_net { struct ct_pcpu { spinlock_t lock; struct hlist_nulls_head unconfirmed; - struct hlist_nulls_head dying; }; struct netns_ct { -- cgit v1.3.1 From 8a75a2c1741073f0c2d7bee146648e717527e048 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 11 Apr 2022 13:01:24 +0200 Subject: netfilter: conntrack: remove unconfirmed list It has no function anymore and can be removed. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 1 - include/net/netns/conntrack.h | 6 ---- net/netfilter/nf_conntrack_core.c | 57 ++---------------------------------- net/netfilter/nf_conntrack_netlink.c | 44 +--------------------------- 4 files changed, 3 insertions(+), 105 deletions(-) (limited to 'include/net/netns') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index f60212244b13..3ce9a5b42fe5 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -101,7 +101,6 @@ struct nf_conn { /* Have we seen traffic both ways yet? (bitset) */ unsigned long status; - u16 cpu; possible_net_t ct_net; #if IS_ENABLED(CONFIG_NF_NAT) diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index e985a3010b89..a71cfd4e2f21 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -93,11 +93,6 @@ struct nf_ip_net { #endif }; -struct ct_pcpu { - spinlock_t lock; - struct hlist_nulls_head unconfirmed; -}; - struct netns_ct { #ifdef CONFIG_NF_CONNTRACK_EVENTS bool ecache_dwork_pending; @@ -109,7 +104,6 @@ struct netns_ct { u8 sysctl_tstamp; u8 sysctl_checksum; - struct ct_pcpu __percpu *pcpu_lists; struct ip_conntrack_stat __percpu *stat; struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb; struct nf_ip_net nf_ct_proto; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 7b4b3f5db959..de1547a2830e 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -525,35 +525,6 @@ clean_from_lists(struct nf_conn *ct) nf_ct_remove_expectations(ct); } -/* must be called with local_bh_disable */ -static void nf_ct_add_to_unconfirmed_list(struct nf_conn *ct) -{ - struct ct_pcpu *pcpu; - - /* add this conntrack to the (per cpu) unconfirmed list */ - ct->cpu = smp_processor_id(); - pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu); - - spin_lock(&pcpu->lock); - hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, - &pcpu->unconfirmed); - spin_unlock(&pcpu->lock); -} - -/* must be called with local_bh_disable */ -static void nf_ct_del_from_unconfirmed_list(struct nf_conn *ct) -{ - struct ct_pcpu *pcpu; - - /* We overload first tuple to link into unconfirmed list.*/ - pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu); - - spin_lock(&pcpu->lock); - BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode)); - hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); - spin_unlock(&pcpu->lock); -} - #define NFCT_ALIGN(len) (((len) + NFCT_INFOMASK) & ~NFCT_INFOMASK) /* Released via nf_ct_destroy() */ @@ -625,7 +596,6 @@ void nf_ct_destroy(struct nf_conntrack *nfct) if (unlikely(nf_ct_protonum(ct) == IPPROTO_GRE)) destroy_gre_conntrack(ct); - local_bh_disable(); /* Expectations will have been removed in clean_from_lists, * except TFTP can create an expectation on the first packet, * before connection is in the list, so we need to clean here, @@ -633,11 +603,6 @@ void nf_ct_destroy(struct nf_conntrack *nfct) */ nf_ct_remove_expectations(ct); - if (unlikely(!nf_ct_is_confirmed(ct))) - nf_ct_del_from_unconfirmed_list(ct); - - local_bh_enable(); - if (ct->master) nf_ct_put(ct->master); @@ -1248,7 +1213,6 @@ __nf_conntrack_confirm(struct sk_buff *skb) * user context, else we insert an already 'dead' hash, blocking * further use of that particular connection -JM. */ - nf_ct_del_from_unconfirmed_list(ct); ct->status |= IPS_CONFIRMED; if (unlikely(nf_ct_is_dying(ct))) { @@ -1803,9 +1767,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, if (!exp) __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC); - /* Now it is inserted into the unconfirmed list, set refcount to 1. */ + /* Now it is going to be associated with an sk_buff, set refcount to 1. */ refcount_set(&ct->ct_general.use, 1); - nf_ct_add_to_unconfirmed_list(ct); local_bh_enable(); @@ -2594,7 +2557,6 @@ i_see_dead_people: nf_conntrack_ecache_pernet_fini(net); nf_conntrack_expect_pernet_fini(net); free_percpu(net->ct.stat); - free_percpu(net->ct.pcpu_lists); } } @@ -2805,26 +2767,14 @@ int nf_conntrack_init_net(struct net *net) { struct nf_conntrack_net *cnet = nf_ct_pernet(net); int ret = -ENOMEM; - int cpu; BUILD_BUG_ON(IP_CT_UNTRACKED == IP_CT_NUMBER); BUILD_BUG_ON_NOT_POWER_OF_2(CONNTRACK_LOCKS); atomic_set(&cnet->count, 0); - net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu); - if (!net->ct.pcpu_lists) - goto err_stat; - - for_each_possible_cpu(cpu) { - struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); - - spin_lock_init(&pcpu->lock); - INIT_HLIST_NULLS_HEAD(&pcpu->unconfirmed, UNCONFIRMED_NULLS_VAL); - } - net->ct.stat = alloc_percpu(struct ip_conntrack_stat); if (!net->ct.stat) - goto err_pcpu_lists; + return ret; ret = nf_conntrack_expect_pernet_init(net); if (ret < 0) @@ -2840,8 +2790,5 @@ int nf_conntrack_init_net(struct net *net) err_expect: free_percpu(net->ct.stat); -err_pcpu_lists: - free_percpu(net->ct.pcpu_lists); -err_stat: return ret; } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 2e9c8183e4a2..eafe640b3387 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1752,49 +1752,7 @@ static int ctnetlink_dump_one_entry(struct sk_buff *skb, static int ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb) { - struct ctnetlink_list_dump_ctx *ctx = (void *)cb->ctx; - struct nf_conn *ct, *last; - struct nf_conntrack_tuple_hash *h; - struct hlist_nulls_node *n; - struct net *net = sock_net(skb->sk); - int res, cpu; - - if (ctx->done) - return 0; - - last = ctx->last; - - for (cpu = ctx->cpu; cpu < nr_cpu_ids; cpu++) { - struct ct_pcpu *pcpu; - - if (!cpu_possible(cpu)) - continue; - - pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); - spin_lock_bh(&pcpu->lock); -restart: - hlist_nulls_for_each_entry(h, n, &pcpu->unconfirmed, hnnode) { - ct = nf_ct_tuplehash_to_ctrack(h); - - res = ctnetlink_dump_one_entry(skb, cb, ct, false); - if (res < 0) { - ctx->cpu = cpu; - spin_unlock_bh(&pcpu->lock); - goto out; - } - } - if (ctx->last) { - ctx->last = NULL; - goto restart; - } - spin_unlock_bh(&pcpu->lock); - } - ctx->done = true; -out: - if (last) - nf_ct_put(last); - - return skb->len; + return 0; } static int -- cgit v1.3.1 From 2794cdb0b97bfe62d25c996c8afe4832207e78bc Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 25 Apr 2022 15:15:41 +0200 Subject: netfilter: nfnetlink: allow to detect if ctnetlink listeners exist At this time, every new conntrack gets the 'event cache extension' enabled for it. This is because the 'net.netfilter.nf_conntrack_events' sysctl defaults to 1. Changing the default to 0 means that commands that rely on the event notification extension, e.g. 'conntrack -E' or conntrackd, stop working. We COULD detect if there is a listener by means of 'nfnetlink_has_listeners()' and only add the extension if this is true. The downside is a dependency from conntrack module to nfnetlink module. This adds a different way: inc/dec a counter whenever a ctnetlink group is being (un)subscribed and toggle a flag in struct net. Next patches will take advantage of this and will only add the event extension if the flag is set. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netns/conntrack.h | 1 + net/netfilter/nfnetlink.c | 40 +++++++++++++++++++++++++++++++++++++--- 2 files changed, 38 insertions(+), 3 deletions(-) (limited to 'include/net/netns') diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index a71cfd4e2f21..0677cd3de034 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -95,6 +95,7 @@ struct nf_ip_net { struct netns_ct { #ifdef CONFIG_NF_CONNTRACK_EVENTS + bool ctnetlink_has_listener; bool ecache_dwork_pending; #endif u8 sysctl_log_invalid; /* Log invalid packets */ diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 7e2c8dd01408..ad3bbe34ca88 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -45,6 +45,7 @@ MODULE_DESCRIPTION("Netfilter messages via netlink socket"); static unsigned int nfnetlink_pernet_id __read_mostly; struct nfnl_net { + unsigned int ctnetlink_listeners; struct sock *nfnl; }; @@ -654,7 +655,6 @@ static void nfnetlink_rcv(struct sk_buff *skb) netlink_rcv_skb(skb, nfnetlink_rcv_msg); } -#ifdef CONFIG_MODULES static int nfnetlink_bind(struct net *net, int group) { const struct nfnetlink_subsystem *ss; @@ -670,9 +670,44 @@ static int nfnetlink_bind(struct net *net, int group) rcu_read_unlock(); if (!ss) request_module_nowait("nfnetlink-subsys-%d", type); + +#ifdef CONFIG_NF_CONNTRACK_EVENTS + if (type == NFNL_SUBSYS_CTNETLINK) { + struct nfnl_net *nfnlnet = nfnl_pernet(net); + + nfnl_lock(NFNL_SUBSYS_CTNETLINK); + + if (WARN_ON_ONCE(nfnlnet->ctnetlink_listeners == UINT_MAX)) { + nfnl_unlock(NFNL_SUBSYS_CTNETLINK); + return -EOVERFLOW; + } + + nfnlnet->ctnetlink_listeners++; + if (nfnlnet->ctnetlink_listeners == 1) + WRITE_ONCE(net->ct.ctnetlink_has_listener, true); + nfnl_unlock(NFNL_SUBSYS_CTNETLINK); + } +#endif return 0; } + +static void nfnetlink_unbind(struct net *net, int group) +{ +#ifdef CONFIG_NF_CONNTRACK_EVENTS + int type = nfnl_group2type[group]; + + if (type == NFNL_SUBSYS_CTNETLINK) { + struct nfnl_net *nfnlnet = nfnl_pernet(net); + + nfnl_lock(NFNL_SUBSYS_CTNETLINK); + WARN_ON_ONCE(nfnlnet->ctnetlink_listeners == 0); + nfnlnet->ctnetlink_listeners--; + if (nfnlnet->ctnetlink_listeners == 0) + WRITE_ONCE(net->ct.ctnetlink_has_listener, false); + nfnl_unlock(NFNL_SUBSYS_CTNETLINK); + } #endif +} static int __net_init nfnetlink_net_init(struct net *net) { @@ -680,9 +715,8 @@ static int __net_init nfnetlink_net_init(struct net *net) struct netlink_kernel_cfg cfg = { .groups = NFNLGRP_MAX, .input = nfnetlink_rcv, -#ifdef CONFIG_MODULES .bind = nfnetlink_bind, -#endif + .unbind = nfnetlink_unbind, }; nfnlnet->nfnl = netlink_kernel_create(net, NETLINK_NETFILTER, &cfg); -- cgit v1.3.1