summaryrefslogtreecommitdiff
path: root/net/netfilter
diff options
context:
space:
mode:
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/core.c6
-rw-r--r--net/netfilter/ipset/ip_set_core.c10
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c12
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c10
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c10
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c4
-rw-r--r--net/netfilter/nf_bpf_link.c125
-rw-r--r--net/netfilter/nf_conntrack_bpf.c1
-rw-r--r--net/netfilter/nf_conntrack_core.c22
-rw-r--r--net/netfilter/nf_conntrack_expect.c4
-rw-r--r--net/netfilter/nf_conntrack_helper.c4
-rw-r--r--net/netfilter/nf_conntrack_netlink.c8
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c10
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c6
-rw-r--r--net/netfilter/nf_conntrack_standalone.c4
-rw-r--r--net/netfilter/nf_flow_table_offload.c22
-rw-r--r--net/netfilter/nf_log.c7
-rw-r--r--net/netfilter/nf_nat_core.c6
-rw-r--r--net/netfilter/nf_tables_api.c557
-rw-r--r--net/netfilter/nf_tables_offload.c13
-rw-r--r--net/netfilter/nfnetlink_log.c6
-rw-r--r--net/netfilter/nft_byteorder.c14
-rw-r--r--net/netfilter/nft_cmp.c2
-rw-r--r--net/netfilter/nft_ct.c4
-rw-r--r--net/netfilter/nft_dynset.c3
-rw-r--r--net/netfilter/nft_fib.c15
-rw-r--r--net/netfilter/nft_flow_offload.c6
-rw-r--r--net/netfilter/nft_immediate.c35
-rw-r--r--net/netfilter/nft_lookup.c6
-rw-r--r--net/netfilter/nft_masq.c8
-rw-r--r--net/netfilter/nft_meta.c6
-rw-r--r--net/netfilter/nft_nat.c8
-rw-r--r--net/netfilter/nft_objref.c8
-rw-r--r--net/netfilter/nft_osf.c6
-rw-r--r--net/netfilter/nft_redir.c8
-rw-r--r--net/netfilter/nft_set_hash.c88
-rw-r--r--net/netfilter/nft_set_pipapo.c101
-rw-r--r--net/netfilter/nft_set_rbtree.c167
-rw-r--r--net/netfilter/nft_socket.c2
-rw-r--r--net/netfilter/x_tables.c5
-rw-r--r--net/netfilter/xt_repldata.h2
-rw-r--r--net/netfilter/xt_socket.c4
44 files changed, 930 insertions, 421 deletions
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 5f76ae86a656..ef4e76e5aef9 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -680,6 +680,12 @@ EXPORT_SYMBOL_GPL(nfnl_ct_hook);
const struct nf_ct_hook __rcu *nf_ct_hook __read_mostly;
EXPORT_SYMBOL_GPL(nf_ct_hook);
+const struct nf_defrag_hook __rcu *nf_defrag_v4_hook __read_mostly;
+EXPORT_SYMBOL_GPL(nf_defrag_v4_hook);
+
+const struct nf_defrag_hook __rcu *nf_defrag_v6_hook __read_mostly;
+EXPORT_SYMBOL_GPL(nf_defrag_v6_hook);
+
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
u8 nf_ctnetlink_has_listener;
EXPORT_SYMBOL_GPL(nf_ctnetlink_has_listener);
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 0b68e2e2824e..e564b5174261 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -872,7 +872,7 @@ ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name)
BUG_ON(!set);
read_lock_bh(&ip_set_ref_lock);
- strncpy(name, set->name, IPSET_MAXNAMELEN);
+ strscpy_pad(name, set->name, IPSET_MAXNAMELEN);
read_unlock_bh(&ip_set_ref_lock);
}
EXPORT_SYMBOL_GPL(ip_set_name_byindex);
@@ -1326,7 +1326,7 @@ static int ip_set_rename(struct sk_buff *skb, const struct nfnl_info *info,
goto out;
}
}
- strncpy(set->name, name2, IPSET_MAXNAMELEN);
+ strscpy_pad(set->name, name2, IPSET_MAXNAMELEN);
out:
write_unlock_bh(&ip_set_ref_lock);
@@ -1380,9 +1380,9 @@ static int ip_set_swap(struct sk_buff *skb, const struct nfnl_info *info,
return -EBUSY;
}
- strncpy(from_name, from->name, IPSET_MAXNAMELEN);
- strncpy(from->name, to->name, IPSET_MAXNAMELEN);
- strncpy(to->name, from_name, IPSET_MAXNAMELEN);
+ strscpy_pad(from_name, from->name, IPSET_MAXNAMELEN);
+ strscpy_pad(from->name, to->name, IPSET_MAXNAMELEN);
+ strscpy_pad(to->name, from_name, IPSET_MAXNAMELEN);
swap(from->ref, to->ref);
ip_set(inst, from_id) = to;
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index cb83ca506c5c..3230506ae3ff 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1346,7 +1346,7 @@ ip_vs_out_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *stat
if (unlikely(sk && hooknum == NF_INET_LOCAL_OUT &&
af == AF_INET)) {
- if (sk->sk_family == PF_INET && inet_sk(sk)->nodefrag)
+ if (sk->sk_family == PF_INET && inet_test_bit(NODEFRAG, sk))
return NF_ACCEPT;
}
@@ -1946,7 +1946,7 @@ ip_vs_in_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state
if (unlikely(sk && hooknum == NF_INET_LOCAL_OUT &&
af == AF_INET)) {
- if (sk->sk_family == PF_INET && inet_sk(sk)->nodefrag)
+ if (sk->sk_family == PF_INET && inet_test_bit(NODEFRAG, sk))
return NF_ACCEPT;
}
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 62606fb44d02..143a341bbc0a 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1876,6 +1876,7 @@ static int
proc_do_sync_threshold(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
+ struct netns_ipvs *ipvs = table->extra2;
int *valp = table->data;
int val[2];
int rc;
@@ -1885,6 +1886,7 @@ proc_do_sync_threshold(struct ctl_table *table, int write,
.mode = table->mode,
};
+ mutex_lock(&ipvs->sync_mutex);
memcpy(val, valp, sizeof(val));
rc = proc_dointvec(&tmp, write, buffer, lenp, ppos);
if (write) {
@@ -1894,6 +1896,7 @@ proc_do_sync_threshold(struct ctl_table *table, int write,
else
memcpy(valp, val, sizeof(val));
}
+ mutex_unlock(&ipvs->sync_mutex);
return rc;
}
@@ -4266,6 +4269,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
struct net *net = ipvs->net;
struct ctl_table *tbl;
int idx, ret;
+ size_t ctl_table_size = ARRAY_SIZE(vs_vars);
atomic_set(&ipvs->dropentry, 0);
spin_lock_init(&ipvs->dropentry_lock);
@@ -4282,8 +4286,10 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
return -ENOMEM;
/* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns)
+ if (net->user_ns != &init_user_ns) {
tbl[0].procname = NULL;
+ ctl_table_size = 0;
+ }
} else
tbl = vs_vars;
/* Initialize sysctl defaults */
@@ -4321,6 +4327,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
tbl[idx].data = &ipvs->sysctl_sync_threshold;
+ tbl[idx].extra2 = ipvs;
tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD;
tbl[idx++].data = &ipvs->sysctl_sync_refresh_period;
@@ -4353,7 +4360,8 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
#endif
ret = -ENOMEM;
- ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
+ ipvs->sysctl_hdr = register_net_sysctl_sz(net, "net/ipv4/vs", tbl,
+ ctl_table_size);
if (!ipvs->sysctl_hdr)
goto err;
ipvs->sysctl_tbl = tbl;
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 1b87214d385e..cf78ba4ce5ff 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -550,6 +550,7 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler = {
static int __net_init __ip_vs_lblc_init(struct net *net)
{
struct netns_ipvs *ipvs = net_ipvs(net);
+ size_t vars_table_size = ARRAY_SIZE(vs_vars_table);
if (!ipvs)
return -ENOENT;
@@ -562,16 +563,19 @@ static int __net_init __ip_vs_lblc_init(struct net *net)
return -ENOMEM;
/* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns)
+ if (net->user_ns != &init_user_ns) {
ipvs->lblc_ctl_table[0].procname = NULL;
+ vars_table_size = 0;
+ }
} else
ipvs->lblc_ctl_table = vs_vars_table;
ipvs->sysctl_lblc_expiration = DEFAULT_EXPIRATION;
ipvs->lblc_ctl_table[0].data = &ipvs->sysctl_lblc_expiration;
- ipvs->lblc_ctl_header =
- register_net_sysctl(net, "net/ipv4/vs", ipvs->lblc_ctl_table);
+ ipvs->lblc_ctl_header = register_net_sysctl_sz(net, "net/ipv4/vs",
+ ipvs->lblc_ctl_table,
+ vars_table_size);
if (!ipvs->lblc_ctl_header) {
if (!net_eq(net, &init_net))
kfree(ipvs->lblc_ctl_table);
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index ad8f5fea6d3a..9eddf118b40e 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -736,6 +736,7 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler =
static int __net_init __ip_vs_lblcr_init(struct net *net)
{
struct netns_ipvs *ipvs = net_ipvs(net);
+ size_t vars_table_size = ARRAY_SIZE(vs_vars_table);
if (!ipvs)
return -ENOENT;
@@ -748,15 +749,18 @@ static int __net_init __ip_vs_lblcr_init(struct net *net)
return -ENOMEM;
/* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns)
+ if (net->user_ns != &init_user_ns) {
ipvs->lblcr_ctl_table[0].procname = NULL;
+ vars_table_size = 0;
+ }
} else
ipvs->lblcr_ctl_table = vs_vars_table;
ipvs->sysctl_lblcr_expiration = DEFAULT_EXPIRATION;
ipvs->lblcr_ctl_table[0].data = &ipvs->sysctl_lblcr_expiration;
- ipvs->lblcr_ctl_header =
- register_net_sysctl(net, "net/ipv4/vs", ipvs->lblcr_ctl_table);
+ ipvs->lblcr_ctl_header = register_net_sysctl_sz(net, "net/ipv4/vs",
+ ipvs->lblcr_ctl_table,
+ vars_table_size);
if (!ipvs->lblcr_ctl_header) {
if (!net_eq(net, &init_net))
kfree(ipvs->lblcr_ctl_table);
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 264f2f87a437..da5af28ff57b 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1297,11 +1297,9 @@ static void set_sock_size(struct sock *sk, int mode, int val)
*/
static void set_mcast_loop(struct sock *sk, u_char loop)
{
- struct inet_sock *inet = inet_sk(sk);
-
/* setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop)); */
lock_sock(sk);
- inet->mc_loop = loop ? 1 : 0;
+ inet_assign_bit(MC_LOOP, sk, loop);
#ifdef CONFIG_IP_VS_IPV6
if (sk->sk_family == AF_INET6) {
struct ipv6_pinfo *np = inet6_sk(sk);
diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c
index c36da56d756f..e502ec00b2fe 100644
--- a/net/netfilter/nf_bpf_link.c
+++ b/net/netfilter/nf_bpf_link.c
@@ -1,6 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h>
#include <linux/filter.h>
+#include <linux/kmod.h>
+#include <linux/module.h>
#include <linux/netfilter.h>
#include <net/netfilter/nf_bpf_link.h>
@@ -23,8 +25,90 @@ struct bpf_nf_link {
struct nf_hook_ops hook_ops;
struct net *net;
u32 dead;
+ const struct nf_defrag_hook *defrag_hook;
};
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
+static const struct nf_defrag_hook *
+get_proto_defrag_hook(struct bpf_nf_link *link,
+ const struct nf_defrag_hook __rcu *global_hook,
+ const char *mod)
+{
+ const struct nf_defrag_hook *hook;
+ int err;
+
+ /* RCU protects us from races against module unloading */
+ rcu_read_lock();
+ hook = rcu_dereference(global_hook);
+ if (!hook) {
+ rcu_read_unlock();
+ err = request_module(mod);
+ if (err)
+ return ERR_PTR(err < 0 ? err : -EINVAL);
+
+ rcu_read_lock();
+ hook = rcu_dereference(global_hook);
+ }
+
+ if (hook && try_module_get(hook->owner)) {
+ /* Once we have a refcnt on the module, we no longer need RCU */
+ hook = rcu_pointer_handoff(hook);
+ } else {
+ WARN_ONCE(!hook, "%s has bad registration", mod);
+ hook = ERR_PTR(-ENOENT);
+ }
+ rcu_read_unlock();
+
+ if (!IS_ERR(hook)) {
+ err = hook->enable(link->net);
+ if (err) {
+ module_put(hook->owner);
+ hook = ERR_PTR(err);
+ }
+ }
+
+ return hook;
+}
+#endif
+
+static int bpf_nf_enable_defrag(struct bpf_nf_link *link)
+{
+ const struct nf_defrag_hook __maybe_unused *hook;
+
+ switch (link->hook_ops.pf) {
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
+ case NFPROTO_IPV4:
+ hook = get_proto_defrag_hook(link, nf_defrag_v4_hook, "nf_defrag_ipv4");
+ if (IS_ERR(hook))
+ return PTR_ERR(hook);
+
+ link->defrag_hook = hook;
+ return 0;
+#endif
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
+ case NFPROTO_IPV6:
+ hook = get_proto_defrag_hook(link, nf_defrag_v6_hook, "nf_defrag_ipv6");
+ if (IS_ERR(hook))
+ return PTR_ERR(hook);
+
+ link->defrag_hook = hook;
+ return 0;
+#endif
+ default:
+ return -EAFNOSUPPORT;
+ }
+}
+
+static void bpf_nf_disable_defrag(struct bpf_nf_link *link)
+{
+ const struct nf_defrag_hook *hook = link->defrag_hook;
+
+ if (!hook)
+ return;
+ hook->disable(link->net);
+ module_put(hook->owner);
+}
+
static void bpf_nf_link_release(struct bpf_link *link)
{
struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
@@ -32,11 +116,11 @@ static void bpf_nf_link_release(struct bpf_link *link)
if (nf_link->dead)
return;
- /* prevent hook-not-found warning splat from netfilter core when
- * .detach was already called
- */
- if (!cmpxchg(&nf_link->dead, 0, 1))
+ /* do not double release in case .detach was already called */
+ if (!cmpxchg(&nf_link->dead, 0, 1)) {
nf_unregister_net_hook(nf_link->net, &nf_link->hook_ops);
+ bpf_nf_disable_defrag(nf_link);
+ }
}
static void bpf_nf_link_dealloc(struct bpf_link *link)
@@ -92,6 +176,8 @@ static const struct bpf_link_ops bpf_nf_link_lops = {
static int bpf_nf_check_pf_and_hooks(const union bpf_attr *attr)
{
+ int prio;
+
switch (attr->link_create.netfilter.pf) {
case NFPROTO_IPV4:
case NFPROTO_IPV6:
@@ -102,19 +188,18 @@ static int bpf_nf_check_pf_and_hooks(const union bpf_attr *attr)
return -EAFNOSUPPORT;
}
- if (attr->link_create.netfilter.flags)
+ if (attr->link_create.netfilter.flags & ~BPF_F_NETFILTER_IP_DEFRAG)
return -EOPNOTSUPP;
- /* make sure conntrack confirm is always last.
- *
- * In the future, if userspace can e.g. request defrag, then
- * "defrag_requested && prio before NF_IP_PRI_CONNTRACK_DEFRAG"
- * should fail.
- */
- switch (attr->link_create.netfilter.priority) {
- case NF_IP_PRI_FIRST: return -ERANGE; /* sabotage_in and other warts */
- case NF_IP_PRI_LAST: return -ERANGE; /* e.g. conntrack confirm */
- }
+ /* make sure conntrack confirm is always last */
+ prio = attr->link_create.netfilter.priority;
+ if (prio == NF_IP_PRI_FIRST)
+ return -ERANGE; /* sabotage_in and other warts */
+ else if (prio == NF_IP_PRI_LAST)
+ return -ERANGE; /* e.g. conntrack confirm */
+ else if ((attr->link_create.netfilter.flags & BPF_F_NETFILTER_IP_DEFRAG) &&
+ prio <= NF_IP_PRI_CONNTRACK_DEFRAG)
+ return -ERANGE; /* cannot use defrag if prog runs before nf_defrag */
return 0;
}
@@ -149,6 +234,7 @@ int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
link->net = net;
link->dead = false;
+ link->defrag_hook = NULL;
err = bpf_link_prime(&link->link, &link_primer);
if (err) {
@@ -156,8 +242,17 @@ int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
return err;
}
+ if (attr->link_create.netfilter.flags & BPF_F_NETFILTER_IP_DEFRAG) {
+ err = bpf_nf_enable_defrag(link);
+ if (err) {
+ bpf_link_cleanup(&link_primer);
+ return err;
+ }
+ }
+
err = nf_register_net_hook(net, &link->hook_ops);
if (err) {
+ bpf_nf_disable_defrag(link);
bpf_link_cleanup(&link_primer);
return err;
}
diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
index 0d36d7285e3f..c7a6114091ae 100644
--- a/net/netfilter/nf_conntrack_bpf.c
+++ b/net/netfilter/nf_conntrack_bpf.c
@@ -14,6 +14,7 @@
#include <linux/types.h>
#include <linux/btf_ids.h>
#include <linux/net_namespace.h>
+#include <net/xdp.h>
#include <net/netfilter/nf_conntrack_bpf.h>
#include <net/netfilter/nf_conntrack_core.h>
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index d119f1d4c2fc..9f6f2e643575 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -211,24 +211,18 @@ static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
unsigned int zoneid,
const struct net *net)
{
- u64 a, b, c, d;
+ siphash_key_t key;
get_random_once(&nf_conntrack_hash_rnd, sizeof(nf_conntrack_hash_rnd));
- /* The direction must be ignored, handle usable tuplehash members manually */
- a = (u64)tuple->src.u3.all[0] << 32 | tuple->src.u3.all[3];
- b = (u64)tuple->dst.u3.all[0] << 32 | tuple->dst.u3.all[3];
+ key = nf_conntrack_hash_rnd;
- c = (__force u64)tuple->src.u.all << 32 | (__force u64)tuple->dst.u.all << 16;
- c |= tuple->dst.protonum;
+ key.key[0] ^= zoneid;
+ key.key[1] ^= net_hash_mix(net);
- d = (u64)zoneid << 32 | net_hash_mix(net);
-
- /* IPv4: u3.all[1,2,3] == 0 */
- c ^= (u64)tuple->src.u3.all[1] << 32 | tuple->src.u3.all[2];
- d += (u64)tuple->dst.u3.all[1] << 32 | tuple->dst.u3.all[2];
-
- return (u32)siphash_4u64(a, b, c, d, &nf_conntrack_hash_rnd);
+ return siphash((void *)tuple,
+ offsetofend(struct nf_conntrack_tuple, dst.__nfct_hash_offsetend),
+ &key);
}
static u32 scale_hash(u32 hash)
@@ -1762,7 +1756,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
cnet = nf_ct_pernet(net);
if (cnet->expect_count) {
spin_lock_bh(&nf_conntrack_expect_lock);
- exp = nf_ct_find_expectation(net, zone, tuple);
+ exp = nf_ct_find_expectation(net, zone, tuple, !tmpl || nf_ct_is_confirmed(tmpl));
if (exp) {
/* Welcome, Mr. Bond. We've been expecting you... */
__set_bit(IPS_EXPECTED_BIT, &ct->status);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 96948e98ec53..81ca348915c9 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -171,7 +171,7 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
struct nf_conntrack_expect *
nf_ct_find_expectation(struct net *net,
const struct nf_conntrack_zone *zone,
- const struct nf_conntrack_tuple *tuple)
+ const struct nf_conntrack_tuple *tuple, bool unlink)
{
struct nf_conntrack_net *cnet = nf_ct_pernet(net);
struct nf_conntrack_expect *i, *exp = NULL;
@@ -211,7 +211,7 @@ nf_ct_find_expectation(struct net *net,
!refcount_inc_not_zero(&exp->master->ct_general.use)))
return NULL;
- if (exp->flags & NF_CT_EXPECT_PERMANENT) {
+ if (exp->flags & NF_CT_EXPECT_PERMANENT || !unlink) {
refcount_inc(&exp->use);
return exp;
} else if (del_timer(&exp->timeout)) {
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 0c4db2f2ac43..f22691f83853 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -360,6 +360,9 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
BUG_ON(me->expect_class_max >= NF_CT_MAX_EXPECT_CLASSES);
BUG_ON(strlen(me->name) > NF_CT_HELPER_NAME_LEN - 1);
+ if (!nf_ct_helper_hash)
+ return -ENOENT;
+
if (me->expect_policy->max_expected > NF_CT_EXPECT_MAX_CNT)
return -EINVAL;
@@ -515,4 +518,5 @@ int nf_conntrack_helper_init(void)
void nf_conntrack_helper_fini(void)
{
kvfree(nf_ct_helper_hash);
+ nf_ct_helper_hash = NULL;
}
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 69c8c8c7e9b8..334db22199c1 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1321,15 +1321,11 @@ static int ctnetlink_parse_tuple_ip(struct nlattr *attr,
struct nlattr *tb[CTA_IP_MAX+1];
int ret = 0;
- ret = nla_parse_nested_deprecated(tb, CTA_IP_MAX, attr, NULL, NULL);
+ ret = nla_parse_nested_deprecated(tb, CTA_IP_MAX, attr,
+ cta_ip_nla_policy, NULL);
if (ret < 0)
return ret;
- ret = nla_validate_nested_deprecated(attr, CTA_IP_MAX,
- cta_ip_nla_policy, NULL);
- if (ret)
- return ret;
-
switch (tuple->src.l3num) {
case NFPROTO_IPV4:
ret = ipv4_nlattr_to_tuple(tb, tuple, flags);
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index d4fd626d2b8c..e2db1f4ec2df 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -69,6 +69,7 @@
#define DCCP_MSL (2 * 60 * HZ)
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
static const char * const dccp_state_names[] = {
[CT_DCCP_NONE] = "NONE",
[CT_DCCP_REQUEST] = "REQUEST",
@@ -81,6 +82,7 @@ static const char * const dccp_state_names[] = {
[CT_DCCP_IGNORE] = "IGNORE",
[CT_DCCP_INVALID] = "INVALID",
};
+#endif
#define sNO CT_DCCP_NONE
#define sRQ CT_DCCP_REQUEST
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index ad6f0ca40cd2..af369e686fc5 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -205,6 +205,8 @@ int nf_conntrack_gre_packet(struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
const struct nf_hook_state *state)
{
+ unsigned long status;
+
if (!nf_ct_is_confirmed(ct)) {
unsigned int *timeouts = nf_ct_timeout_lookup(ct);
@@ -217,11 +219,17 @@ int nf_conntrack_gre_packet(struct nf_conn *ct,
ct->proto.gre.timeout = timeouts[GRE_CT_UNREPLIED];
}
+ status = READ_ONCE(ct->status);
/* If we've seen traffic both ways, this is a GRE connection.
* Extend timeout. */
- if (ct->status & IPS_SEEN_REPLY) {
+ if (status & IPS_SEEN_REPLY) {
nf_ct_refresh_acct(ct, ctinfo, skb,
ct->proto.gre.stream_timeout);
+
+ /* never set ASSURED for IPS_NAT_CLASH, they time out soon */
+ if (unlikely((status & IPS_NAT_CLASH)))
+ return NF_ACCEPT;
+
/* Also, more likely to be important, and not a probe. */
if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
nf_conntrack_event_cache(IPCT_ASSURED, ct);
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 91eacc9b0b98..b6bcc8f2f46b 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -49,8 +49,8 @@ static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = {
[SCTP_CONNTRACK_COOKIE_WAIT] = 3 SECS,
[SCTP_CONNTRACK_COOKIE_ECHOED] = 3 SECS,
[SCTP_CONNTRACK_ESTABLISHED] = 210 SECS,
- [SCTP_CONNTRACK_SHUTDOWN_SENT] = 300 SECS / 1000,
- [SCTP_CONNTRACK_SHUTDOWN_RECD] = 300 SECS / 1000,
+ [SCTP_CONNTRACK_SHUTDOWN_SENT] = 3 SECS,
+ [SCTP_CONNTRACK_SHUTDOWN_RECD] = 3 SECS,
[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = 3 SECS,
[SCTP_CONNTRACK_HEARTBEAT_SENT] = 30 SECS,
};
@@ -105,7 +105,7 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
{
/* ORIGINAL */
/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS */
-/* init */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCW},
+/* init */ {sCL, sCL, sCW, sCE, sES, sCL, sCL, sSA, sCW},
/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},
/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL},
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 169e16fc2bce..0ee98ce5b816 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -1106,7 +1106,9 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
table[NF_SYSCTL_CT_BUCKETS].mode = 0444;
}
- cnet->sysctl_header = register_net_sysctl(net, "net/netfilter", table);
+ cnet->sysctl_header = register_net_sysctl_sz(net, "net/netfilter",
+ table,
+ ARRAY_SIZE(nf_ct_sysctl_table));
if (!cnet->sysctl_header)
goto out_unregister_netfilter;
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index 1c26f03fc661..a010b25076ca 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -34,7 +34,7 @@ static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
{
struct nf_flow_key *mask = &match->mask;
struct nf_flow_key *key = &match->key;
- unsigned int enc_keys;
+ unsigned long long enc_keys;
if (!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX))
return;
@@ -43,8 +43,8 @@ static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
key->enc_key_id.keyid = tunnel_id_to_key32(tun_info->key.tun_id);
mask->enc_key_id.keyid = 0xffffffff;
- enc_keys = BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
- BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL);
+ enc_keys = BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID) |
+ BIT_ULL(FLOW_DISSECTOR_KEY_ENC_CONTROL);
if (ip_tunnel_info_af(tun_info) == AF_INET) {
NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
@@ -55,7 +55,7 @@ static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
mask->enc_ipv4.src = 0xffffffff;
if (key->enc_ipv4.dst)
mask->enc_ipv4.dst = 0xffffffff;
- enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS);
+ enc_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS);
key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
} else {
memcpy(&key->enc_ipv6.src, &tun_info->key.u.ipv6.dst,
@@ -70,7 +70,7 @@ static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
sizeof(struct in6_addr)))
memset(&mask->enc_ipv6.dst, 0xff,
sizeof(struct in6_addr));
- enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS);
+ enc_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS);
key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
}
@@ -163,14 +163,14 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
return -EOPNOTSUPP;
}
mask->control.addr_type = 0xffff;
- match->dissector.used_keys |= BIT(key->control.addr_type);
+ match->dissector.used_keys |= BIT_ULL(key->control.addr_type);
mask->basic.n_proto = 0xffff;
switch (tuple->l4proto) {
case IPPROTO_TCP:
key->tcp.flags = 0;
mask->tcp.flags = cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16);
- match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP);
+ match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_TCP);
break;
case IPPROTO_UDP:
case IPPROTO_GRE:
@@ -182,9 +182,9 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
key->basic.ip_proto = tuple->l4proto;
mask->basic.ip_proto = 0xff;
- match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_META) |
- BIT(FLOW_DISSECTOR_KEY_CONTROL) |
- BIT(FLOW_DISSECTOR_KEY_BASIC);
+ match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_META) |
+ BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) |
+ BIT_ULL(FLOW_DISSECTOR_KEY_BASIC);
switch (tuple->l4proto) {
case IPPROTO_TCP:
@@ -194,7 +194,7 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
key->tp.dst = tuple->dst_port;
mask->tp.dst = 0xffff;
- match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_PORTS);
+ match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_PORTS);
break;
}
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 8a29290149bd..8cc52d2bd31b 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -487,9 +487,10 @@ static int netfilter_log_sysctl_init(struct net *net)
for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++)
table[i].extra2 = net;
- net->nf.nf_log_dir_header = register_net_sysctl(net,
- "net/netfilter/nf_log",
- table);
+ net->nf.nf_log_dir_header = register_net_sysctl_sz(net,
+ "net/netfilter/nf_log",
+ table,
+ ARRAY_SIZE(nf_log_sysctl_table));
if (!net->nf.nf_log_dir_header)
goto err_reg;
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index fadbd4ed3dc0..c4e0516a8dfa 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -327,7 +327,7 @@ static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
/* If we source map this tuple so reply looks like reply_tuple, will
* that meet the constraints of range.
*/
-static int in_range(const struct nf_conntrack_tuple *tuple,
+static int nf_in_range(const struct nf_conntrack_tuple *tuple,
const struct nf_nat_range2 *range)
{
/* If we are supposed to map IPs, then we must be in the
@@ -376,7 +376,7 @@ find_appropriate_src(struct net *net,
&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
result->dst = tuple->dst;
- if (in_range(result, range))
+ if (nf_in_range(result, range))
return 1;
}
}
@@ -607,7 +607,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
if (maniptype == NF_NAT_MANIP_SRC &&
!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
/* try the original tuple first */
- if (in_range(orig_tuple, range)) {
+ if (nf_in_range(orig_tuple, range)) {
if (!nf_nat_used_tuple(orig_tuple, ct)) {
*tuple = *orig_tuple;
return;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 9573a8fcad79..41b826dff6f5 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -31,7 +31,9 @@ static LIST_HEAD(nf_tables_expressions);
static LIST_HEAD(nf_tables_objects);
static LIST_HEAD(nf_tables_flowtables);
static LIST_HEAD(nf_tables_destroy_list);
+static LIST_HEAD(nf_tables_gc_list);
static DEFINE_SPINLOCK(nf_tables_destroy_list_lock);
+static DEFINE_SPINLOCK(nf_tables_gc_list_lock);
enum {
NFT_VALIDATE_SKIP = 0,
@@ -120,6 +122,9 @@ static void nft_validate_state_update(struct nft_table *table, u8 new_validate_s
static void nf_tables_trans_destroy_work(struct work_struct *w);
static DECLARE_WORK(trans_destroy_work, nf_tables_trans_destroy_work);
+static void nft_trans_gc_work(struct work_struct *work);
+static DECLARE_WORK(trans_gc_work, nft_trans_gc_work);
+
static void nft_ctx_init(struct nft_ctx *ctx,
struct net *net,
const struct sk_buff *skb,
@@ -253,8 +258,10 @@ int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain)
if (chain->bound)
return -EBUSY;
+ if (!nft_use_inc(&chain->use))
+ return -EMFILE;
+
chain->bound = true;
- chain->use++;
nft_chain_trans_bind(ctx, chain);
return 0;
@@ -437,7 +444,7 @@ static int nft_delchain(struct nft_ctx *ctx)
if (IS_ERR(trans))
return PTR_ERR(trans);
- ctx->table->use--;
+ nft_use_dec(&ctx->table->use);
nft_deactivate_next(ctx->net, ctx->chain);
return 0;
@@ -476,7 +483,7 @@ nf_tables_delrule_deactivate(struct nft_ctx *ctx, struct nft_rule *rule)
/* You cannot delete the same rule twice */
if (nft_is_active_next(ctx->net, rule)) {
nft_deactivate_next(ctx->net, rule);
- ctx->chain->use--;
+ nft_use_dec(&ctx->chain->use);
return 0;
}
return -ENOENT;
@@ -580,10 +587,6 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
return __nft_trans_set_add(ctx, msg_type, set, NULL);
}
-static void nft_setelem_data_deactivate(const struct net *net,
- const struct nft_set *set,
- struct nft_set_elem *elem);
-
static int nft_mapelem_deactivate(const struct nft_ctx *ctx,
struct nft_set *set,
const struct nft_set_iter *iter,
@@ -644,7 +647,7 @@ static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set)
nft_map_deactivate(ctx, set);
nft_deactivate_next(ctx->net, set);
- ctx->table->use--;
+ nft_use_dec(&ctx->table->use);
return err;
}
@@ -676,7 +679,7 @@ static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj)
return err;
nft_deactivate_next(ctx->net, obj);
- ctx->table->use--;
+ nft_use_dec(&ctx->table->use);
return err;
}
@@ -711,7 +714,7 @@ static int nft_delflowtable(struct nft_ctx *ctx,
return err;
nft_deactivate_next(ctx->net, flowtable);
- ctx->table->use--;
+ nft_use_dec(&ctx->table->use);
return err;
}
@@ -1370,7 +1373,7 @@ static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info,
if (table == NULL)
goto err_kzalloc;
- table->validate_state = NFT_VALIDATE_SKIP;
+ table->validate_state = nft_net->validate_state;
table->name = nla_strdup(attr, GFP_KERNEL_ACCOUNT);
if (table->name == NULL)
goto err_strdup;
@@ -2396,9 +2399,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
struct nft_chain *chain;
int err;
- if (table->use == UINT_MAX)
- return -EOVERFLOW;
-
if (nla[NFTA_CHAIN_HOOK]) {
struct nft_stats __percpu *stats = NULL;
struct nft_chain_hook hook = {};
@@ -2494,6 +2494,11 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
if (err < 0)
goto err_destroy_chain;
+ if (!nft_use_inc(&table->use)) {
+ err = -EMFILE;
+ goto err_use;
+ }
+
trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
@@ -2510,10 +2515,11 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
goto err_unregister_hook;
}
- table->use++;
-
return 0;
+
err_unregister_hook:
+ nft_use_dec_restore(&table->use);
+err_use:
nf_tables_unregister_hook(net, table, chain);
err_destroy_chain:
nf_tables_chain_destroy(ctx);
@@ -2694,7 +2700,7 @@ err_hooks:
static struct nft_chain *nft_chain_lookup_byid(const struct net *net,
const struct nft_table *table,
- const struct nlattr *nla)
+ const struct nlattr *nla, u8 genmask)
{
struct nftables_pernet *nft_net = nft_pernet(net);
u32 id = ntohl(nla_get_be32(nla));
@@ -2705,7 +2711,8 @@ static struct nft_chain *nft_chain_lookup_byid(const struct net *net,
if (trans->msg_type == NFT_MSG_NEWCHAIN &&
chain->table == table &&
- id == nft_trans_chain_id(trans))
+ id == nft_trans_chain_id(trans) &&
+ nft_active_genmask(chain, genmask))
return chain;
}
return ERR_PTR(-ENOENT);
@@ -3668,6 +3675,9 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
return -EMLINK;
list_for_each_entry(rule, &chain->rules, list) {
+ if (fatal_signal_pending(current))
+ return -EINTR;
+
if (!nft_is_active_next(ctx->net, rule))
continue;
@@ -3679,8 +3689,6 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
if (err < 0)
return err;
}
-
- cond_resched();
}
return 0;
@@ -3704,6 +3712,8 @@ static int nft_table_validate(struct net *net, const struct nft_table *table)
err = nft_chain_validate(&ctx, chain);
if (err < 0)
return err;
+
+ cond_resched();
}
return 0;
@@ -3805,11 +3815,10 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
return PTR_ERR(chain);
}
- if (nft_chain_is_bound(chain))
- return -EOPNOTSUPP;
} else if (nla[NFTA_RULE_CHAIN_ID]) {
- chain = nft_chain_lookup_byid(net, table, nla[NFTA_RULE_CHAIN_ID]);
+ chain = nft_chain_lookup_byid(net, table, nla[NFTA_RULE_CHAIN_ID],
+ genmask);
if (IS_ERR(chain)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN_ID]);
return PTR_ERR(chain);
@@ -3818,6 +3827,9 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
return -EINVAL;
}
+ if (nft_chain_is_bound(chain))
+ return -EOPNOTSUPP;
+
if (nla[NFTA_RULE_HANDLE]) {
handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_HANDLE]));
rule = __nft_rule_lookup(chain, handle);
@@ -3840,9 +3852,6 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
return -EINVAL;
handle = nf_tables_alloc_handle(table);
- if (chain->use == UINT_MAX)
- return -EOVERFLOW;
-
if (nla[NFTA_RULE_POSITION]) {
pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION]));
old_rule = __nft_rule_lookup(chain, pos_handle);
@@ -3936,6 +3945,11 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
}
}
+ if (!nft_use_inc(&chain->use)) {
+ err = -EMFILE;
+ goto err_release_rule;
+ }
+
if (info->nlh->nlmsg_flags & NLM_F_REPLACE) {
err = nft_delrule(&ctx, old_rule);
if (err < 0)
@@ -3967,7 +3981,6 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
}
}
kvfree(expr_info);
- chain->use++;
if (flow)
nft_trans_flow_rule(trans) = flow;
@@ -3978,6 +3991,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
return 0;
err_destroy_flow_rule:
+ nft_use_dec_restore(&chain->use);
if (flow)
nft_flow_rule_destroy(flow);
err_release_rule:
@@ -4078,6 +4092,8 @@ static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info,
list_for_each_entry(chain, &table->chains, list) {
if (!nft_is_active_next(net, chain))
continue;
+ if (nft_chain_is_bound(chain))
+ continue;
ctx.chain = chain;
err = nft_delrule_by_chain(&ctx);
@@ -5014,9 +5030,15 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
alloc_size = sizeof(*set) + size + udlen;
if (alloc_size < size || alloc_size > INT_MAX)
return -ENOMEM;
+
+ if (!nft_use_inc(&table->use))
+ return -EMFILE;
+
set = kvzalloc(alloc_size, GFP_KERNEL_ACCOUNT);
- if (!set)
- return -ENOMEM;
+ if (!set) {
+ err = -ENOMEM;
+ goto err_alloc;
+ }
name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL_ACCOUNT);
if (!name) {
@@ -5037,6 +5059,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
INIT_LIST_HEAD(&set->bindings);
INIT_LIST_HEAD(&set->catchall_list);
+ refcount_set(&set->refs, 1);
set->table = table;
write_pnet(&set->net, net);
set->ops = ops;
@@ -5074,7 +5097,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
goto err_set_expr_alloc;
list_add_tail_rcu(&set->list, &table->sets);
- table->use++;
+
return 0;
err_set_expr_alloc:
@@ -5086,6 +5109,9 @@ err_set_init:
kfree(set->name);
err_set_name:
kvfree(set);
+err_alloc:
+ nft_use_dec_restore(&table->use);
+
return err;
}
@@ -5101,6 +5127,14 @@ static void nft_set_catchall_destroy(const struct nft_ctx *ctx,
}
}
+static void nft_set_put(struct nft_set *set)
+{
+ if (refcount_dec_and_test(&set->refs)) {
+ kfree(set->name);
+ kvfree(set);
+ }
+}
+
static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
{
int i;
@@ -5113,8 +5147,7 @@ static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
set->ops->destroy(ctx, set);
nft_set_catchall_destroy(ctx, set);
- kfree(set->name);
- kvfree(set);
+ nft_set_put(set);
}
static int nf_tables_delset(struct sk_buff *skb, const struct nfnl_info *info,
@@ -5224,9 +5257,6 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_binding *i;
struct nft_set_iter iter;
- if (set->use == UINT_MAX)
- return -EOVERFLOW;
-
if (!list_empty(&set->bindings) && nft_set_is_anonymous(set))
return -EBUSY;
@@ -5254,10 +5284,12 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
return iter.err;
}
bind:
+ if (!nft_use_inc(&set->use))
+ return -EMFILE;
+
binding->chain = ctx->chain;
list_add_tail_rcu(&binding->list, &set->bindings);
nft_set_trans_bind(ctx, set);
- set->use++;
return 0;
}
@@ -5331,7 +5363,7 @@ void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set)
nft_clear(ctx->net, set);
}
- set->use++;
+ nft_use_inc_restore(&set->use);
}
EXPORT_SYMBOL_GPL(nf_tables_activate_set);
@@ -5347,7 +5379,7 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
else
list_del_rcu(&binding->list);
- set->use--;
+ nft_use_dec(&set->use);
break;
case NFT_TRANS_PREPARE:
if (nft_set_is_anonymous(set)) {
@@ -5356,7 +5388,7 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
nft_deactivate_next(ctx->net, set);
}
- set->use--;
+ nft_use_dec(&set->use);
return;
case NFT_TRANS_ABORT:
case NFT_TRANS_RELEASE:
@@ -5364,7 +5396,7 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
nft_map_deactivate(ctx, set);
- set->use--;
+ nft_use_dec(&set->use);
fallthrough;
default:
nf_tables_unbind_set(ctx, set, binding,
@@ -5582,8 +5614,12 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
const struct nft_set_iter *iter,
struct nft_set_elem *elem)
{
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
struct nft_set_dump_args *args;
+ if (nft_set_elem_expired(ext))
+ return 0;
+
args = container_of(iter, struct nft_set_dump_args, iter);
return nf_tables_fill_setelem(args->skb, set, elem, args->reset);
}
@@ -6155,7 +6191,7 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
nft_set_elem_expr_destroy(&ctx, nft_set_ext_expr(ext));
if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
- (*nft_set_ext_obj(ext))->use--;
+ nft_use_dec(&(*nft_set_ext_obj(ext))->use);
kfree(elem);
}
EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
@@ -6254,7 +6290,8 @@ struct nft_set_ext *nft_set_catchall_lookup(const struct net *net,
list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
ext = nft_set_elem_ext(set, catchall->elem);
if (nft_set_elem_active(ext, genmask) &&
- !nft_set_elem_expired(ext))
+ !nft_set_elem_expired(ext) &&
+ !nft_set_elem_is_dead(ext))
return ext;
}
@@ -6262,29 +6299,6 @@ struct nft_set_ext *nft_set_catchall_lookup(const struct net *net,
}
EXPORT_SYMBOL_GPL(nft_set_catchall_lookup);
-void *nft_set_catchall_gc(const struct nft_set *set)
-{
- struct nft_set_elem_catchall *catchall, *next;
- struct nft_set_ext *ext;
- void *elem = NULL;
-
- list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
- ext = nft_set_elem_ext(set, catchall->elem);
-
- if (!nft_set_elem_expired(ext) ||
- nft_set_elem_mark_busy(ext))
- continue;
-
- elem = catchall->elem;
- list_del_rcu(&catchall->list);
- kfree_rcu(catchall, rcu);
- break;
- }
-
- return elem;
-}
-EXPORT_SYMBOL_GPL(nft_set_catchall_gc);
-
static int nft_setelem_catchall_insert(const struct net *net,
struct nft_set *set,
const struct nft_set_elem *elem,
@@ -6346,7 +6360,6 @@ static void nft_setelem_activate(struct net *net, struct nft_set *set,
if (nft_setelem_is_catchall(set, elem)) {
nft_set_elem_change_active(net, set, ext);
- nft_set_elem_clear_busy(ext);
} else {
set->ops->activate(net, set, elem);
}
@@ -6361,8 +6374,7 @@ static int nft_setelem_catchall_deactivate(const struct net *net,
list_for_each_entry(catchall, &set->catchall_list, list) {
ext = nft_set_elem_ext(set, catchall->elem);
- if (!nft_is_active(net, ext) ||
- nft_set_elem_mark_busy(ext))
+ if (!nft_is_active(net, ext))
continue;
kfree(elem->priv);
@@ -6657,8 +6669,16 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
set->objtype, genmask);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
+ obj = NULL;
+ goto err_parse_key_end;
+ }
+
+ if (!nft_use_inc(&obj->use)) {
+ err = -EMFILE;
+ obj = NULL;
goto err_parse_key_end;
}
+
err = nft_set_ext_add(&tmpl, NFT_SET_EXT_OBJREF);
if (err < 0)
goto err_parse_key_end;
@@ -6727,10 +6747,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
if (flags)
*nft_set_ext_flags(ext) = flags;
- if (obj) {
+ if (obj)
*nft_set_ext_obj(ext) = obj;
- obj->use++;
- }
+
if (ulen > 0) {
if (nft_set_ext_check(&tmpl, NFT_SET_EXT_USERDATA, ulen) < 0) {
err = -EINVAL;
@@ -6750,7 +6769,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
goto err_elem_free;
}
- ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK;
+ ext->genmask = nft_genmask_cur(ctx->net);
err = nft_setelem_insert(ctx->net, set, &elem, &ext2, flags);
if (err) {
@@ -6798,12 +6817,13 @@ err_element_clash:
kfree(trans);
err_elem_free:
nf_tables_set_elem_destroy(ctx, set, elem.priv);
- if (obj)
- obj->use--;
err_parse_data:
if (nla[NFTA_SET_ELEM_DATA] != NULL)
nft_data_release(&elem.data.val, desc.type);
err_parse_key_end:
+ if (obj)
+ nft_use_dec_restore(&obj->use);
+
nft_data_release(&elem.key_end.val, NFT_DATA_VALUE);
err_parse_key:
nft_data_release(&elem.key.val, NFT_DATA_VALUE);
@@ -6883,7 +6903,7 @@ void nft_data_hold(const struct nft_data *data, enum nft_data_types type)
case NFT_JUMP:
case NFT_GOTO:
chain = data->verdict.chain;
- chain->use++;
+ nft_use_inc_restore(&chain->use);
break;
}
}
@@ -6898,19 +6918,19 @@ static void nft_setelem_data_activate(const struct net *net,
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
nft_data_hold(nft_set_ext_data(ext), set->dtype);
if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
- (*nft_set_ext_obj(ext))->use++;
+ nft_use_inc_restore(&(*nft_set_ext_obj(ext))->use);
}
-static void nft_setelem_data_deactivate(const struct net *net,
- const struct nft_set *set,
- struct nft_set_elem *elem)
+void nft_setelem_data_deactivate(const struct net *net,
+ const struct nft_set *set,
+ struct nft_set_elem *elem)
{
const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
nft_data_release(nft_set_ext_data(ext), set->dtype);
if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
- (*nft_set_ext_obj(ext))->use--;
+ nft_use_dec(&(*nft_set_ext_obj(ext))->use);
}
static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
@@ -7067,14 +7087,14 @@ static int nft_set_catchall_flush(const struct nft_ctx *ctx,
list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
ext = nft_set_elem_ext(set, catchall->elem);
- if (!nft_set_elem_active(ext, genmask) ||
- nft_set_elem_mark_busy(ext))
+ if (!nft_set_elem_active(ext, genmask))
continue;
elem.priv = catchall->elem;
ret = __nft_set_catchall_flush(ctx, set, &elem);
if (ret < 0)
break;
+ nft_set_elem_change_active(ctx->net, set, ext);
}
return ret;
@@ -7142,29 +7162,6 @@ static int nf_tables_delsetelem(struct sk_buff *skb,
return err;
}
-void nft_set_gc_batch_release(struct rcu_head *rcu)
-{
- struct nft_set_gc_batch *gcb;
- unsigned int i;
-
- gcb = container_of(rcu, struct nft_set_gc_batch, head.rcu);
- for (i = 0; i < gcb->head.cnt; i++)
- nft_set_elem_destroy(gcb->head.set, gcb->elems[i], true);
- kfree(gcb);
-}
-
-struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set,
- gfp_t gfp)
-{
- struct nft_set_gc_batch *gcb;
-
- gcb = kzalloc(sizeof(*gcb), gfp);
- if (gcb == NULL)
- return gcb;
- gcb->head.set = set;
- return gcb;
-}
-
/*
* Stateful objects
*/
@@ -7453,9 +7450,14 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+ if (!nft_use_inc(&table->use))
+ return -EMFILE;
+
type = nft_obj_type_get(net, objtype);
- if (IS_ERR(type))
- return PTR_ERR(type);
+ if (IS_ERR(type)) {
+ err = PTR_ERR(type);
+ goto err_type;
+ }
obj = nft_obj_init(&ctx, type, nla[NFTA_OBJ_DATA]);
if (IS_ERR(obj)) {
@@ -7489,7 +7491,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
goto err_obj_ht;
list_add_tail_rcu(&obj->list, &table->objects);
- table->use++;
+
return 0;
err_obj_ht:
/* queued in transaction log */
@@ -7505,6 +7507,9 @@ err_strdup:
kfree(obj);
err_init:
module_put(type->owner);
+err_type:
+ nft_use_dec_restore(&table->use);
+
return err;
}
@@ -7906,7 +7911,7 @@ void nf_tables_deactivate_flowtable(const struct nft_ctx *ctx,
case NFT_TRANS_PREPARE:
case NFT_TRANS_ABORT:
case NFT_TRANS_RELEASE:
- flowtable->use--;
+ nft_use_dec(&flowtable->use);
fallthrough;
default:
return;
@@ -8260,9 +8265,14 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+ if (!nft_use_inc(&table->use))
+ return -EMFILE;
+
flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL_ACCOUNT);
- if (!flowtable)
- return -ENOMEM;
+ if (!flowtable) {
+ err = -ENOMEM;
+ goto flowtable_alloc;
+ }
flowtable->table = table;
flowtable->handle = nf_tables_alloc_handle(table);
@@ -8317,7 +8327,6 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
goto err5;
list_add_tail_rcu(&flowtable->list, &table->flowtables);
- table->use++;
return 0;
err5:
@@ -8334,6 +8343,9 @@ err2:
kfree(flowtable->name);
err1:
kfree(flowtable);
+flowtable_alloc:
+ nft_use_dec_restore(&table->use);
+
return err;
}
@@ -9042,9 +9054,8 @@ static int nf_tables_validate(struct net *net)
return -EAGAIN;
nft_validate_state_update(table, NFT_VALIDATE_SKIP);
+ break;
}
-
- break;
}
return 0;
@@ -9371,6 +9382,212 @@ void nft_chain_del(struct nft_chain *chain)
list_del_rcu(&chain->list);
}
+static void nft_trans_gc_setelem_remove(struct nft_ctx *ctx,
+ struct nft_trans_gc *trans)
+{
+ void **priv = trans->priv;
+ unsigned int i;
+
+ for (i = 0; i < trans->count; i++) {
+ struct nft_set_elem elem = {
+ .priv = priv[i],
+ };
+
+ nft_setelem_data_deactivate(ctx->net, trans->set, &elem);
+ nft_setelem_remove(ctx->net, trans->set, &elem);
+ }
+}
+
+void nft_trans_gc_destroy(struct nft_trans_gc *trans)
+{
+ nft_set_put(trans->set);
+ put_net(trans->net);
+ kfree(trans);
+}
+
+static void nft_trans_gc_trans_free(struct rcu_head *rcu)
+{
+ struct nft_set_elem elem = {};
+ struct nft_trans_gc *trans;
+ struct nft_ctx ctx = {};
+ unsigned int i;
+
+ trans = container_of(rcu, struct nft_trans_gc, rcu);
+ ctx.net = read_pnet(&trans->set->net);
+
+ for (i = 0; i < trans->count; i++) {
+ elem.priv = trans->priv[i];
+ if (!nft_setelem_is_catchall(trans->set, &elem))
+ atomic_dec(&trans->set->nelems);
+
+ nf_tables_set_elem_destroy(&ctx, trans->set, elem.priv);
+ }
+
+ nft_trans_gc_destroy(trans);
+}
+
+static bool nft_trans_gc_work_done(struct nft_trans_gc *trans)
+{
+ struct nftables_pernet *nft_net;
+ struct nft_ctx ctx = {};
+
+ nft_net = nft_pernet(trans->net);
+
+ mutex_lock(&nft_net->commit_mutex);
+
+ /* Check for race with transaction, otherwise this batch refers to
+ * stale objects that might not be there anymore. Skip transaction if
+ * set has been destroyed from control plane transaction in case gc
+ * worker loses race.
+ */
+ if (READ_ONCE(nft_net->gc_seq) != trans->seq || trans->set->dead) {
+ mutex_unlock(&nft_net->commit_mutex);
+ return false;
+ }
+
+ ctx.net = trans->net;
+ ctx.table = trans->set->table;
+
+ nft_trans_gc_setelem_remove(&ctx, trans);
+ mutex_unlock(&nft_net->commit_mutex);
+
+ return true;
+}
+
+static void nft_trans_gc_work(struct work_struct *work)
+{
+ struct nft_trans_gc *trans, *next;
+ LIST_HEAD(trans_gc_list);
+
+ spin_lock(&nf_tables_gc_list_lock);
+ list_splice_init(&nf_tables_gc_list, &trans_gc_list);
+ spin_unlock(&nf_tables_gc_list_lock);
+
+ list_for_each_entry_safe(trans, next, &trans_gc_list, list) {
+ list_del(&trans->list);
+ if (!nft_trans_gc_work_done(trans)) {
+ nft_trans_gc_destroy(trans);
+ continue;
+ }
+ call_rcu(&trans->rcu, nft_trans_gc_trans_free);
+ }
+}
+
+struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set,
+ unsigned int gc_seq, gfp_t gfp)
+{
+ struct net *net = read_pnet(&set->net);
+ struct nft_trans_gc *trans;
+
+ trans = kzalloc(sizeof(*trans), gfp);
+ if (!trans)
+ return NULL;
+
+ trans->net = maybe_get_net(net);
+ if (!trans->net) {
+ kfree(trans);
+ return NULL;
+ }
+
+ refcount_inc(&set->refs);
+ trans->set = set;
+ trans->seq = gc_seq;
+
+ return trans;
+}
+
+void nft_trans_gc_elem_add(struct nft_trans_gc *trans, void *priv)
+{
+ trans->priv[trans->count++] = priv;
+}
+
+static void nft_trans_gc_queue_work(struct nft_trans_gc *trans)
+{
+ spin_lock(&nf_tables_gc_list_lock);
+ list_add_tail(&trans->list, &nf_tables_gc_list);
+ spin_unlock(&nf_tables_gc_list_lock);
+
+ schedule_work(&trans_gc_work);
+}
+
+static int nft_trans_gc_space(struct nft_trans_gc *trans)
+{
+ return NFT_TRANS_GC_BATCHCOUNT - trans->count;
+}
+
+struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc,
+ unsigned int gc_seq, gfp_t gfp)
+{
+ if (nft_trans_gc_space(gc))
+ return gc;
+
+ nft_trans_gc_queue_work(gc);
+
+ return nft_trans_gc_alloc(gc->set, gc_seq, gfp);
+}
+
+void nft_trans_gc_queue_async_done(struct nft_trans_gc *trans)
+{
+ if (trans->count == 0) {
+ nft_trans_gc_destroy(trans);
+ return;
+ }
+
+ nft_trans_gc_queue_work(trans);
+}
+
+struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp)
+{
+ if (WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net)))
+ return NULL;
+
+ if (nft_trans_gc_space(gc))
+ return gc;
+
+ call_rcu(&gc->rcu, nft_trans_gc_trans_free);
+
+ return nft_trans_gc_alloc(gc->set, 0, gfp);
+}
+
+void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans)
+{
+ WARN_ON_ONCE(!lockdep_commit_lock_is_held(trans->net));
+
+ if (trans->count == 0) {
+ nft_trans_gc_destroy(trans);
+ return;
+ }
+
+ call_rcu(&trans->rcu, nft_trans_gc_trans_free);
+}
+
+struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
+ unsigned int gc_seq)
+{
+ struct nft_set_elem_catchall *catchall;
+ const struct nft_set *set = gc->set;
+ struct nft_set_ext *ext;
+
+ list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
+ ext = nft_set_elem_ext(set, catchall->elem);
+
+ if (!nft_set_elem_expired(ext))
+ continue;
+ if (nft_set_elem_is_dead(ext))
+ goto dead_elem;
+
+ nft_set_elem_dead(ext);
+dead_elem:
+ gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+ if (!gc)
+ return NULL;
+
+ nft_trans_gc_elem_add(gc, catchall->elem);
+ }
+
+ return gc;
+}
+
static void nf_tables_module_autoload_cleanup(struct net *net)
{
struct nftables_pernet *nft_net = nft_pernet(net);
@@ -9529,15 +9746,31 @@ static void nft_set_commit_update(struct list_head *set_update_list)
}
}
+static unsigned int nft_gc_seq_begin(struct nftables_pernet *nft_net)
+{
+ unsigned int gc_seq;
+
+ /* Bump gc counter, it becomes odd, this is the busy mark. */
+ gc_seq = READ_ONCE(nft_net->gc_seq);
+ WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
+
+ return gc_seq;
+}
+
+static void nft_gc_seq_end(struct nftables_pernet *nft_net, unsigned int gc_seq)
+{
+ WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
+}
+
static int nf_tables_commit(struct net *net, struct sk_buff *skb)
{
struct nftables_pernet *nft_net = nft_pernet(net);
struct nft_trans *trans, *next;
+ unsigned int base_seq, gc_seq;
LIST_HEAD(set_update_list);
struct nft_trans_elem *te;
struct nft_chain *chain;
struct nft_table *table;
- unsigned int base_seq;
LIST_HEAD(adl);
int err;
@@ -9568,8 +9801,10 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
}
/* 0. Validate ruleset, otherwise roll back for error reporting. */
- if (nf_tables_validate(net) < 0)
+ if (nf_tables_validate(net) < 0) {
+ nft_net->validate_state = NFT_VALIDATE_DO;
return -EAGAIN;
+ }
err = nft_flow_rule_offload_commit(net);
if (err < 0)
@@ -9614,6 +9849,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
WRITE_ONCE(nft_net->base_seq, base_seq);
+ gc_seq = nft_gc_seq_begin(nft_net);
+
/* step 3. Start new generation, rules_gen_X now in use. */
net->nft.gencursor = nft_gencursor_next(net);
@@ -9713,7 +9950,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
*/
if (nft_set_is_anonymous(nft_trans_set(trans)) &&
!list_empty(&nft_trans_set(trans)->bindings))
- trans->ctx.table->use--;
+ nft_use_dec(&trans->ctx.table->use);
}
nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
NFT_MSG_NEWSET, GFP_KERNEL);
@@ -9721,6 +9958,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
break;
case NFT_MSG_DELSET:
case NFT_MSG_DESTROYSET:
+ nft_trans_set(trans)->dead = 1;
list_del_rcu(&nft_trans_set(trans)->list);
nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
trans->msg_type, GFP_KERNEL);
@@ -9823,6 +10061,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nft_commit_notify(net, NETLINK_CB(skb).portid);
nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
nf_tables_commit_audit_log(&adl, nft_net->base_seq);
+
+ nft_gc_seq_end(nft_net, gc_seq);
+ nft_net->validate_state = NFT_VALIDATE_SKIP;
nf_tables_commit_release(net);
return 0;
@@ -9943,7 +10184,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_trans_destroy(trans);
break;
}
- trans->ctx.table->use--;
+ nft_use_dec_restore(&trans->ctx.table->use);
nft_chain_del(trans->ctx.chain);
nf_tables_unregister_hook(trans->ctx.net,
trans->ctx.table,
@@ -9956,7 +10197,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
list_splice(&nft_trans_chain_hooks(trans),
&nft_trans_basechain(trans)->hook_list);
} else {
- trans->ctx.table->use++;
+ nft_use_inc_restore(&trans->ctx.table->use);
nft_clear(trans->ctx.net, trans->ctx.chain);
}
nft_trans_destroy(trans);
@@ -9966,7 +10207,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_trans_destroy(trans);
break;
}
- trans->ctx.chain->use--;
+ nft_use_dec_restore(&trans->ctx.chain->use);
list_del_rcu(&nft_trans_rule(trans)->list);
nft_rule_expr_deactivate(&trans->ctx,
nft_trans_rule(trans),
@@ -9976,7 +10217,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
break;
case NFT_MSG_DELRULE:
case NFT_MSG_DESTROYRULE:
- trans->ctx.chain->use++;
+ nft_use_inc_restore(&trans->ctx.chain->use);
nft_clear(trans->ctx.net, nft_trans_rule(trans));
nft_rule_expr_activate(&trans->ctx, nft_trans_rule(trans));
if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)
@@ -9989,7 +10230,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_trans_destroy(trans);
break;
}
- trans->ctx.table->use--;
+ nft_use_dec_restore(&trans->ctx.table->use);
if (nft_trans_set_bound(trans)) {
nft_trans_destroy(trans);
break;
@@ -9998,7 +10239,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
break;
case NFT_MSG_DELSET:
case NFT_MSG_DESTROYSET:
- trans->ctx.table->use++;
+ nft_use_inc_restore(&trans->ctx.table->use);
nft_clear(trans->ctx.net, nft_trans_set(trans));
if (nft_trans_set(trans)->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
nft_map_activate(&trans->ctx, nft_trans_set(trans));
@@ -10042,13 +10283,13 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_obj_destroy(&trans->ctx, nft_trans_obj_newobj(trans));
nft_trans_destroy(trans);
} else {
- trans->ctx.table->use--;
+ nft_use_dec_restore(&trans->ctx.table->use);
nft_obj_del(nft_trans_obj(trans));
}
break;
case NFT_MSG_DELOBJ:
case NFT_MSG_DESTROYOBJ:
- trans->ctx.table->use++;
+ nft_use_inc_restore(&trans->ctx.table->use);
nft_clear(trans->ctx.net, nft_trans_obj(trans));
nft_trans_destroy(trans);
break;
@@ -10057,7 +10298,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_unregister_flowtable_net_hooks(net,
&nft_trans_flowtable_hooks(trans));
} else {
- trans->ctx.table->use--;
+ nft_use_dec_restore(&trans->ctx.table->use);
list_del_rcu(&nft_trans_flowtable(trans)->list);
nft_unregister_flowtable_net_hooks(net,
&nft_trans_flowtable(trans)->hook_list);
@@ -10069,7 +10310,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
list_splice(&nft_trans_flowtable_hooks(trans),
&nft_trans_flowtable(trans)->hook_list);
} else {
- trans->ctx.table->use++;
+ nft_use_inc_restore(&trans->ctx.table->use);
nft_clear(trans->ctx.net, nft_trans_flowtable(trans));
}
nft_trans_destroy(trans);
@@ -10099,8 +10340,12 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb,
enum nfnl_abort_action action)
{
struct nftables_pernet *nft_net = nft_pernet(net);
- int ret = __nf_tables_abort(net, action);
+ unsigned int gc_seq;
+ int ret;
+ gc_seq = nft_gc_seq_begin(nft_net);
+ ret = __nf_tables_abort(net, action);
+ nft_gc_seq_end(nft_net, gc_seq);
mutex_unlock(&nft_net->commit_mutex);
return ret;
@@ -10243,6 +10488,9 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
if (ctx->chain == chain)
return -ELOOP;
+ if (fatal_signal_pending(current))
+ return -EINTR;
+
list_for_each_entry(rule, &chain->rules, list) {
nft_rule_for_each_expr(expr, last, rule) {
struct nft_immediate_expr *priv;
@@ -10477,6 +10725,9 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
if (!tb[NFTA_VERDICT_CODE])
return -EINVAL;
+
+ /* zero padding hole for memcmp */
+ memset(data, 0, sizeof(*data));
data->verdict.code = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE]));
switch (data->verdict.code) {
@@ -10502,7 +10753,8 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
genmask);
} else if (tb[NFTA_VERDICT_CHAIN_ID]) {
chain = nft_chain_lookup_byid(ctx->net, ctx->table,
- tb[NFTA_VERDICT_CHAIN_ID]);
+ tb[NFTA_VERDICT_CHAIN_ID],
+ genmask);
if (IS_ERR(chain))
return PTR_ERR(chain);
} else {
@@ -10518,8 +10770,9 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
if (desc->flags & NFT_DATA_DESC_SETELEM &&
chain->flags & NFT_CHAIN_BINDING)
return -EINVAL;
+ if (!nft_use_inc(&chain->use))
+ return -EMFILE;
- chain->use++;
data->verdict.chain = chain;
break;
}
@@ -10537,7 +10790,7 @@ static void nft_verdict_uninit(const struct nft_data *data)
case NFT_JUMP:
case NFT_GOTO:
chain = data->verdict.chain;
- chain->use--;
+ nft_use_dec(&chain->use);
break;
}
}
@@ -10706,11 +10959,11 @@ int __nft_release_basechain(struct nft_ctx *ctx)
nf_tables_unregister_hook(ctx->net, ctx->chain->table, ctx->chain);
list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) {
list_del(&rule->list);
- ctx->chain->use--;
+ nft_use_dec(&ctx->chain->use);
nf_tables_rule_release(ctx, rule);
}
nft_chain_del(ctx->chain);
- ctx->table->use--;
+ nft_use_dec(&ctx->table->use);
nf_tables_chain_destroy(ctx);
return 0;
@@ -10757,21 +11010,24 @@ static void __nft_release_table(struct net *net, struct nft_table *table)
ctx.family = table->family;
ctx.table = table;
list_for_each_entry(chain, &table->chains, list) {
+ if (nft_chain_is_bound(chain))
+ continue;
+
ctx.chain = chain;
list_for_each_entry_safe(rule, nr, &chain->rules, list) {
list_del(&rule->list);
- chain->use--;
+ nft_use_dec(&chain->use);
nf_tables_rule_release(&ctx, rule);
}
}
list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) {
list_del(&flowtable->list);
- table->use--;
+ nft_use_dec(&table->use);
nf_tables_flowtable_destroy(flowtable);
}
list_for_each_entry_safe(set, ns, &table->sets, list) {
list_del(&set->list);
- table->use--;
+ nft_use_dec(&table->use);
if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
nft_map_deactivate(&ctx, set);
@@ -10779,13 +11035,13 @@ static void __nft_release_table(struct net *net, struct nft_table *table)
}
list_for_each_entry_safe(obj, ne, &table->objects, list) {
nft_obj_del(obj);
- table->use--;
+ nft_use_dec(&table->use);
nft_obj_destroy(&ctx, obj);
}
list_for_each_entry_safe(chain, nc, &table->chains, list) {
ctx.chain = chain;
nft_chain_del(chain);
- table->use--;
+ nft_use_dec(&table->use);
nf_tables_chain_destroy(&ctx);
}
nf_tables_table_destroy(&ctx);
@@ -10815,6 +11071,7 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
struct net *net = n->net;
unsigned int deleted;
bool restart = false;
+ unsigned int gc_seq;
if (event != NETLINK_URELEASE || n->protocol != NETLINK_NETFILTER)
return NOTIFY_DONE;
@@ -10822,8 +11079,11 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
nft_net = nft_pernet(net);
deleted = 0;
mutex_lock(&nft_net->commit_mutex);
+
+ gc_seq = nft_gc_seq_begin(nft_net);
+
if (!list_empty(&nf_tables_destroy_list))
- rcu_barrier();
+ nf_tables_trans_destroy_flush_work();
again:
list_for_each_entry(table, &nft_net->tables, list) {
if (nft_table_has_owner(table) &&
@@ -10844,6 +11104,8 @@ again:
if (restart)
goto again;
}
+ nft_gc_seq_end(nft_net, gc_seq);
+
mutex_unlock(&nft_net->commit_mutex);
return NOTIFY_DONE;
@@ -10864,6 +11126,8 @@ static int __net_init nf_tables_init_net(struct net *net)
INIT_LIST_HEAD(&nft_net->notify_list);
mutex_init(&nft_net->commit_mutex);
nft_net->base_seq = 1;
+ nft_net->gc_seq = 0;
+ nft_net->validate_state = NFT_VALIDATE_SKIP;
return 0;
}
@@ -10880,22 +11144,36 @@ static void __net_exit nf_tables_pre_exit_net(struct net *net)
static void __net_exit nf_tables_exit_net(struct net *net)
{
struct nftables_pernet *nft_net = nft_pernet(net);
+ unsigned int gc_seq;
mutex_lock(&nft_net->commit_mutex);
+
+ gc_seq = nft_gc_seq_begin(nft_net);
+
if (!list_empty(&nft_net->commit_list) ||
!list_empty(&nft_net->module_list))
__nf_tables_abort(net, NFNL_ABORT_NONE);
+
__nft_release_tables(net);
+
+ nft_gc_seq_end(nft_net, gc_seq);
+
mutex_unlock(&nft_net->commit_mutex);
WARN_ON_ONCE(!list_empty(&nft_net->tables));
WARN_ON_ONCE(!list_empty(&nft_net->module_list));
WARN_ON_ONCE(!list_empty(&nft_net->notify_list));
}
+static void nf_tables_exit_batch(struct list_head *net_exit_list)
+{
+ flush_work(&trans_gc_work);
+}
+
static struct pernet_operations nf_tables_net_ops = {
.init = nf_tables_init_net,
.pre_exit = nf_tables_pre_exit_net,
.exit = nf_tables_exit_net,
+ .exit_batch = nf_tables_exit_batch,
.id = &nf_tables_net_id,
.size = sizeof(struct nftables_pernet),
};
@@ -10967,6 +11245,7 @@ static void __exit nf_tables_module_exit(void)
nft_chain_filter_fini();
nft_chain_route_fini();
unregister_pernet_subsys(&nf_tables_net_ops);
+ cancel_work_sync(&trans_gc_work);
cancel_work_sync(&trans_destroy_work);
rcu_barrier();
rhltable_destroy(&nft_objname_ht);
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index 910ef881c3b8..12ab78fa5d84 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -35,12 +35,12 @@ void nft_flow_rule_set_addr_type(struct nft_flow_rule *flow,
struct nft_flow_key *mask = &match->mask;
struct nft_flow_key *key = &match->key;
- if (match->dissector.used_keys & BIT(FLOW_DISSECTOR_KEY_CONTROL))
+ if (match->dissector.used_keys & BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL))
return;
key->control.addr_type = addr_type;
mask->control.addr_type = 0xffff;
- match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_CONTROL);
+ match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL);
match->dissector.offset[FLOW_DISSECTOR_KEY_CONTROL] =
offsetof(struct nft_flow_key, control);
}
@@ -59,7 +59,7 @@ static void nft_flow_rule_transfer_vlan(struct nft_offload_ctx *ctx,
.mask = match->mask.basic.n_proto,
};
- if (match->dissector.used_keys & BIT(FLOW_DISSECTOR_KEY_VLAN) &&
+ if (match->dissector.used_keys & BIT_ULL(FLOW_DISSECTOR_KEY_VLAN) &&
(match->key.vlan.vlan_tpid == htons(ETH_P_8021Q) ||
match->key.vlan.vlan_tpid == htons(ETH_P_8021AD))) {
match->key.basic.n_proto = match->key.cvlan.vlan_tpid;
@@ -70,8 +70,9 @@ static void nft_flow_rule_transfer_vlan(struct nft_offload_ctx *ctx,
match->mask.vlan.vlan_tpid = ethertype.mask;
match->dissector.offset[FLOW_DISSECTOR_KEY_CVLAN] =
offsetof(struct nft_flow_key, cvlan);
- match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_CVLAN);
- } else if (match->dissector.used_keys & BIT(FLOW_DISSECTOR_KEY_BASIC) &&
+ match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_CVLAN);
+ } else if (match->dissector.used_keys &
+ BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) &&
(match->key.basic.n_proto == htons(ETH_P_8021Q) ||
match->key.basic.n_proto == htons(ETH_P_8021AD))) {
match->key.basic.n_proto = match->key.vlan.vlan_tpid;
@@ -80,7 +81,7 @@ static void nft_flow_rule_transfer_vlan(struct nft_offload_ctx *ctx,
match->mask.vlan.vlan_tpid = ethertype.mask;
match->dissector.offset[FLOW_DISSECTOR_KEY_VLAN] =
offsetof(struct nft_flow_key, vlan);
- match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_VLAN);
+ match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_VLAN);
}
}
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index e57eb168ee13..53c9e76473ba 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -470,7 +470,6 @@ __build_packet_message(struct nfnl_log_net *log,
sk_buff_data_t old_tail = inst->skb->tail;
struct sock *sk;
const unsigned char *hwhdrp;
- ktime_t tstamp;
nlh = nfnl_msg_put(inst->skb, 0, 0,
nfnl_msg_type(NFNL_SUBSYS_ULOG, NFULNL_MSG_PACKET),
@@ -599,10 +598,9 @@ __build_packet_message(struct nfnl_log_net *log,
goto nla_put_failure;
}
- tstamp = skb_tstamp_cond(skb, false);
- if (hooknum <= NF_INET_FORWARD && tstamp) {
+ if (hooknum <= NF_INET_FORWARD) {
+ struct timespec64 kts = ktime_to_timespec64(skb_tstamp_cond(skb, true));
struct nfulnl_msg_packet_timestamp ts;
- struct timespec64 kts = ktime_to_timespec64(tstamp);
ts.sec = cpu_to_be64(kts.tv_sec);
ts.usec = cpu_to_be64(kts.tv_nsec / NSEC_PER_USEC);
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
index 9a85e797ed58..e596d1a842f7 100644
--- a/net/netfilter/nft_byteorder.c
+++ b/net/netfilter/nft_byteorder.c
@@ -30,11 +30,11 @@ void nft_byteorder_eval(const struct nft_expr *expr,
const struct nft_byteorder *priv = nft_expr_priv(expr);
u32 *src = &regs->data[priv->sreg];
u32 *dst = &regs->data[priv->dreg];
- union { u32 u32; u16 u16; } *s, *d;
+ u16 *s16, *d16;
unsigned int i;
- s = (void *)src;
- d = (void *)dst;
+ s16 = (void *)src;
+ d16 = (void *)dst;
switch (priv->size) {
case 8: {
@@ -62,11 +62,11 @@ void nft_byteorder_eval(const struct nft_expr *expr,
switch (priv->op) {
case NFT_BYTEORDER_NTOH:
for (i = 0; i < priv->len / 4; i++)
- d[i].u32 = ntohl((__force __be32)s[i].u32);
+ dst[i] = ntohl((__force __be32)src[i]);
break;
case NFT_BYTEORDER_HTON:
for (i = 0; i < priv->len / 4; i++)
- d[i].u32 = (__force __u32)htonl(s[i].u32);
+ dst[i] = (__force __u32)htonl(src[i]);
break;
}
break;
@@ -74,11 +74,11 @@ void nft_byteorder_eval(const struct nft_expr *expr,
switch (priv->op) {
case NFT_BYTEORDER_NTOH:
for (i = 0; i < priv->len / 2; i++)
- d[i].u16 = ntohs((__force __be16)s[i].u16);
+ d16[i] = ntohs((__force __be16)s16[i]);
break;
case NFT_BYTEORDER_HTON:
for (i = 0; i < priv->len / 2; i++)
- d[i].u16 = (__force __u16)htons(s[i].u16);
+ d16[i] = (__force __u16)htons(s16[i]);
break;
}
break;
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index 6eb21a4f5698..cd4652259095 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -162,7 +162,7 @@ static int __nft_cmp_offload(struct nft_offload_ctx *ctx,
memcpy(key + reg->offset, data, reg->len);
memcpy(mask + reg->offset, datamask, reg->len);
- flow->match.dissector.used_keys |= BIT(reg->key);
+ flow->match.dissector.used_keys |= BIT_ULL(reg->key);
flow->match.dissector.offset[reg->key] = reg->base_offset;
if (reg->key == FLOW_DISSECTOR_KEY_META &&
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 38958e067aa8..86bb9d7797d9 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -108,7 +108,7 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
helper = rcu_dereference(help->helper);
if (helper == NULL)
goto err;
- strncpy((char *)dest, helper->name, NF_CT_HELPER_NAME_LEN);
+ strscpy_pad((char *)dest, helper->name, NF_CT_HELPER_NAME_LEN);
return;
#ifdef CONFIG_NF_CONNTRACK_LABELS
case NFT_CT_LABELS: {
@@ -262,6 +262,7 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr,
regs->verdict.code = NF_DROP;
return;
}
+ __set_bit(IPS_CONFIRMED_BIT, &ct->status);
}
nf_ct_set(skb, ct, IP_CT_NEW);
@@ -368,6 +369,7 @@ static bool nft_ct_tmpl_alloc_pcpu(void)
return false;
}
+ __set_bit(IPS_CONFIRMED_BIT, &tmp->status);
per_cpu(nft_ct_pcpu_template, cpu) = tmp;
}
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 4fb34d76dbea..5c5cc01c73c5 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -191,6 +191,9 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
if (IS_ERR(set))
return PTR_ERR(set);
+ if (set->flags & NFT_SET_OBJECT)
+ return -EOPNOTSUPP;
+
if (set->ops->update == NULL)
return -EOPNOTSUPP;
diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c
index 6e049fd48760..04b51f285332 100644
--- a/net/netfilter/nft_fib.c
+++ b/net/netfilter/nft_fib.c
@@ -14,17 +14,18 @@
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nft_fib.h>
+#define NFTA_FIB_F_ALL (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR | \
+ NFTA_FIB_F_MARK | NFTA_FIB_F_IIF | NFTA_FIB_F_OIF | \
+ NFTA_FIB_F_PRESENT)
+
const struct nla_policy nft_fib_policy[NFTA_FIB_MAX + 1] = {
[NFTA_FIB_DREG] = { .type = NLA_U32 },
[NFTA_FIB_RESULT] = { .type = NLA_U32 },
- [NFTA_FIB_FLAGS] = { .type = NLA_U32 },
+ [NFTA_FIB_FLAGS] =
+ NLA_POLICY_MASK(NLA_BE32, NFTA_FIB_F_ALL),
};
EXPORT_SYMBOL(nft_fib_policy);
-#define NFTA_FIB_F_ALL (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR | \
- NFTA_FIB_F_MARK | NFTA_FIB_F_IIF | NFTA_FIB_F_OIF | \
- NFTA_FIB_F_PRESENT)
-
int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
const struct nft_data **data)
{
@@ -77,7 +78,7 @@ int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
priv->flags = ntohl(nla_get_be32(tb[NFTA_FIB_FLAGS]));
- if (priv->flags == 0 || (priv->flags & ~NFTA_FIB_F_ALL))
+ if (priv->flags == 0)
return -EINVAL;
if ((priv->flags & (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR)) ==
@@ -150,7 +151,7 @@ void nft_fib_store_result(void *reg, const struct nft_fib *priv,
if (priv->flags & NFTA_FIB_F_PRESENT)
*dreg = !!dev;
else
- strncpy(reg, dev ? dev->name : "", IFNAMSIZ);
+ strscpy_pad(reg, dev ? dev->name : "", IFNAMSIZ);
break;
default:
WARN_ON_ONCE(1);
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index 5ef9146e74ad..ab3362c483b4 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -408,8 +408,10 @@ static int nft_flow_offload_init(const struct nft_ctx *ctx,
if (IS_ERR(flowtable))
return PTR_ERR(flowtable);
+ if (!nft_use_inc(&flowtable->use))
+ return -EMFILE;
+
priv->flowtable = flowtable;
- flowtable->use++;
return nf_ct_netns_get(ctx->net, ctx->family);
}
@@ -428,7 +430,7 @@ static void nft_flow_offload_activate(const struct nft_ctx *ctx,
{
struct nft_flow_offload *priv = nft_expr_priv(expr);
- priv->flowtable->use++;
+ nft_use_inc_restore(&priv->flowtable->use);
}
static void nft_flow_offload_destroy(const struct nft_ctx *ctx,
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index 3d76ebfe8939..fccb3cf7749c 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -125,15 +125,27 @@ static void nft_immediate_activate(const struct nft_ctx *ctx,
return nft_data_hold(&priv->data, nft_dreg_to_type(priv->dreg));
}
+static void nft_immediate_chain_deactivate(const struct nft_ctx *ctx,
+ struct nft_chain *chain,
+ enum nft_trans_phase phase)
+{
+ struct nft_ctx chain_ctx;
+ struct nft_rule *rule;
+
+ chain_ctx = *ctx;
+ chain_ctx.chain = chain;
+
+ list_for_each_entry(rule, &chain->rules, list)
+ nft_rule_expr_deactivate(&chain_ctx, rule, phase);
+}
+
static void nft_immediate_deactivate(const struct nft_ctx *ctx,
const struct nft_expr *expr,
enum nft_trans_phase phase)
{
const struct nft_immediate_expr *priv = nft_expr_priv(expr);
const struct nft_data *data = &priv->data;
- struct nft_ctx chain_ctx;
struct nft_chain *chain;
- struct nft_rule *rule;
if (priv->dreg == NFT_REG_VERDICT) {
switch (data->verdict.code) {
@@ -143,23 +155,20 @@ static void nft_immediate_deactivate(const struct nft_ctx *ctx,
if (!nft_chain_binding(chain))
break;
- chain_ctx = *ctx;
- chain_ctx.chain = chain;
-
- list_for_each_entry(rule, &chain->rules, list)
- nft_rule_expr_deactivate(&chain_ctx, rule, phase);
-
switch (phase) {
case NFT_TRANS_PREPARE_ERROR:
nf_tables_unbind_chain(ctx, chain);
- fallthrough;
+ nft_deactivate_next(ctx->net, chain);
+ break;
case NFT_TRANS_PREPARE:
+ nft_immediate_chain_deactivate(ctx, chain, phase);
nft_deactivate_next(ctx->net, chain);
break;
default:
+ nft_immediate_chain_deactivate(ctx, chain, phase);
nft_chain_del(chain);
chain->bound = false;
- chain->table->use--;
+ nft_use_dec(&chain->table->use);
break;
}
break;
@@ -198,7 +207,7 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx,
* let the transaction records release this chain and its rules.
*/
if (chain->bound) {
- chain->use--;
+ nft_use_dec(&chain->use);
break;
}
@@ -206,9 +215,9 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx,
chain_ctx = *ctx;
chain_ctx.chain = chain;
- chain->use--;
+ nft_use_dec(&chain->use);
list_for_each_entry_safe(rule, n, &chain->rules, list) {
- chain->use--;
+ nft_use_dec(&chain->use);
list_del(&rule->list);
nf_tables_rule_destroy(&chain_ctx, rule);
}
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 29ac48cdd6db..870e5b113d13 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -90,7 +90,8 @@ static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = {
[NFTA_LOOKUP_SET_ID] = { .type = NLA_U32 },
[NFTA_LOOKUP_SREG] = { .type = NLA_U32 },
[NFTA_LOOKUP_DREG] = { .type = NLA_U32 },
- [NFTA_LOOKUP_FLAGS] = { .type = NLA_U32 },
+ [NFTA_LOOKUP_FLAGS] =
+ NLA_POLICY_MASK(NLA_BE32, NFT_LOOKUP_F_INV),
};
static int nft_lookup_init(const struct nft_ctx *ctx,
@@ -120,9 +121,6 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
if (tb[NFTA_LOOKUP_FLAGS]) {
flags = ntohl(nla_get_be32(tb[NFTA_LOOKUP_FLAGS]));
- if (flags & ~NFT_LOOKUP_F_INV)
- return -EINVAL;
-
if (flags & NFT_LOOKUP_F_INV)
priv->invert = true;
}
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
index b115d77fbbc7..8a14aaca93bb 100644
--- a/net/netfilter/nft_masq.c
+++ b/net/netfilter/nft_masq.c
@@ -20,7 +20,8 @@ struct nft_masq {
};
static const struct nla_policy nft_masq_policy[NFTA_MASQ_MAX + 1] = {
- [NFTA_MASQ_FLAGS] = { .type = NLA_U32 },
+ [NFTA_MASQ_FLAGS] =
+ NLA_POLICY_MASK(NLA_BE32, NF_NAT_RANGE_MASK),
[NFTA_MASQ_REG_PROTO_MIN] = { .type = NLA_U32 },
[NFTA_MASQ_REG_PROTO_MAX] = { .type = NLA_U32 },
};
@@ -47,11 +48,8 @@ static int nft_masq_init(const struct nft_ctx *ctx,
struct nft_masq *priv = nft_expr_priv(expr);
int err;
- if (tb[NFTA_MASQ_FLAGS]) {
+ if (tb[NFTA_MASQ_FLAGS])
priv->flags = ntohl(nla_get_be32(tb[NFTA_MASQ_FLAGS]));
- if (priv->flags & ~NF_NAT_RANGE_MASK)
- return -EINVAL;
- }
if (tb[NFTA_MASQ_REG_PROTO_MIN]) {
err = nft_parse_register_load(tb[NFTA_MASQ_REG_PROTO_MIN],
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 8fdc7318c03c..f7da7c43333b 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -185,12 +185,12 @@ static noinline bool nft_meta_get_eval_kind(enum nft_meta_keys key,
case NFT_META_IIFKIND:
if (!in || !in->rtnl_link_ops)
return false;
- strncpy((char *)dest, in->rtnl_link_ops->kind, IFNAMSIZ);
+ strscpy_pad((char *)dest, in->rtnl_link_ops->kind, IFNAMSIZ);
break;
case NFT_META_OIFKIND:
if (!out || !out->rtnl_link_ops)
return false;
- strncpy((char *)dest, out->rtnl_link_ops->kind, IFNAMSIZ);
+ strscpy_pad((char *)dest, out->rtnl_link_ops->kind, IFNAMSIZ);
break;
default:
return false;
@@ -206,7 +206,7 @@ static void nft_meta_store_ifindex(u32 *dest, const struct net_device *dev)
static void nft_meta_store_ifname(u32 *dest, const struct net_device *dev)
{
- strncpy((char *)dest, dev ? dev->name : "", IFNAMSIZ);
+ strscpy_pad((char *)dest, dev ? dev->name : "", IFNAMSIZ);
}
static bool nft_meta_store_iftype(u32 *dest, const struct net_device *dev)
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 5c29915ab028..583885ce7232 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -132,7 +132,8 @@ static const struct nla_policy nft_nat_policy[NFTA_NAT_MAX + 1] = {
[NFTA_NAT_REG_ADDR_MAX] = { .type = NLA_U32 },
[NFTA_NAT_REG_PROTO_MIN] = { .type = NLA_U32 },
[NFTA_NAT_REG_PROTO_MAX] = { .type = NLA_U32 },
- [NFTA_NAT_FLAGS] = { .type = NLA_U32 },
+ [NFTA_NAT_FLAGS] =
+ NLA_POLICY_MASK(NLA_BE32, NF_NAT_RANGE_MASK),
};
static int nft_nat_validate(const struct nft_ctx *ctx,
@@ -246,11 +247,8 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
priv->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
}
- if (tb[NFTA_NAT_FLAGS]) {
+ if (tb[NFTA_NAT_FLAGS])
priv->flags |= ntohl(nla_get_be32(tb[NFTA_NAT_FLAGS]));
- if (priv->flags & ~NF_NAT_RANGE_MASK)
- return -EOPNOTSUPP;
- }
return nf_ct_netns_get(ctx->net, family);
}
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index a48dd5b5d45b..509011b1ef59 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -41,8 +41,10 @@ static int nft_objref_init(const struct nft_ctx *ctx,
if (IS_ERR(obj))
return -ENOENT;
+ if (!nft_use_inc(&obj->use))
+ return -EMFILE;
+
nft_objref_priv(expr) = obj;
- obj->use++;
return 0;
}
@@ -72,7 +74,7 @@ static void nft_objref_deactivate(const struct nft_ctx *ctx,
if (phase == NFT_TRANS_COMMIT)
return;
- obj->use--;
+ nft_use_dec(&obj->use);
}
static void nft_objref_activate(const struct nft_ctx *ctx,
@@ -80,7 +82,7 @@ static void nft_objref_activate(const struct nft_ctx *ctx,
{
struct nft_object *obj = nft_objref_priv(expr);
- obj->use++;
+ nft_use_inc_restore(&obj->use);
}
static const struct nft_expr_ops nft_objref_ops = {
diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c
index 70820c66b591..7f61506e5b44 100644
--- a/net/netfilter/nft_osf.c
+++ b/net/netfilter/nft_osf.c
@@ -23,7 +23,7 @@ static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs,
struct nft_osf *priv = nft_expr_priv(expr);
u32 *dest = &regs->data[priv->dreg];
struct sk_buff *skb = pkt->skb;
- char os_match[NFT_OSF_MAXGENRELEN + 1];
+ char os_match[NFT_OSF_MAXGENRELEN];
const struct tcphdr *tcp;
struct nf_osf_data data;
struct tcphdr _tcph;
@@ -45,7 +45,7 @@ static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs,
}
if (!nf_osf_find(skb, nf_osf_fingers, priv->ttl, &data)) {
- strncpy((char *)dest, "unknown", NFT_OSF_MAXGENRELEN);
+ strscpy_pad((char *)dest, "unknown", NFT_OSF_MAXGENRELEN);
} else {
if (priv->flags & NFT_OSF_F_VERSION)
snprintf(os_match, NFT_OSF_MAXGENRELEN, "%s:%s",
@@ -53,7 +53,7 @@ static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs,
else
strscpy(os_match, data.genre, NFT_OSF_MAXGENRELEN);
- strncpy((char *)dest, os_match, NFT_OSF_MAXGENRELEN);
+ strscpy_pad((char *)dest, os_match, NFT_OSF_MAXGENRELEN);
}
}
diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index a70196ffcb1e..a58bd8d291ff 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -22,7 +22,8 @@ struct nft_redir {
static const struct nla_policy nft_redir_policy[NFTA_REDIR_MAX + 1] = {
[NFTA_REDIR_REG_PROTO_MIN] = { .type = NLA_U32 },
[NFTA_REDIR_REG_PROTO_MAX] = { .type = NLA_U32 },
- [NFTA_REDIR_FLAGS] = { .type = NLA_U32 },
+ [NFTA_REDIR_FLAGS] =
+ NLA_POLICY_MASK(NLA_BE32, NF_NAT_RANGE_MASK),
};
static int nft_redir_validate(const struct nft_ctx *ctx,
@@ -68,11 +69,8 @@ static int nft_redir_init(const struct nft_ctx *ctx,
priv->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
}
- if (tb[NFTA_REDIR_FLAGS]) {
+ if (tb[NFTA_REDIR_FLAGS])
priv->flags = ntohl(nla_get_be32(tb[NFTA_REDIR_FLAGS]));
- if (priv->flags & ~NF_NAT_RANGE_MASK)
- return -EINVAL;
- }
return nf_ct_netns_get(ctx->net, ctx->family);
}
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 0b73cb0e752f..524763659f25 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -59,6 +59,8 @@ static inline int nft_rhash_cmp(struct rhashtable_compare_arg *arg,
if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen))
return 1;
+ if (nft_set_elem_is_dead(&he->ext))
+ return 1;
if (nft_set_elem_expired(&he->ext))
return 1;
if (!nft_set_elem_active(&he->ext, x->genmask))
@@ -188,7 +190,6 @@ static void nft_rhash_activate(const struct net *net, const struct nft_set *set,
struct nft_rhash_elem *he = elem->priv;
nft_set_elem_change_active(net, set, &he->ext);
- nft_set_elem_clear_busy(&he->ext);
}
static bool nft_rhash_flush(const struct net *net,
@@ -196,12 +197,9 @@ static bool nft_rhash_flush(const struct net *net,
{
struct nft_rhash_elem *he = priv;
- if (!nft_set_elem_mark_busy(&he->ext) ||
- !nft_is_active(net, &he->ext)) {
- nft_set_elem_change_active(net, set, &he->ext);
- return true;
- }
- return false;
+ nft_set_elem_change_active(net, set, &he->ext);
+
+ return true;
}
static void *nft_rhash_deactivate(const struct net *net,
@@ -218,9 +216,8 @@ static void *nft_rhash_deactivate(const struct net *net,
rcu_read_lock();
he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
- if (he != NULL &&
- !nft_rhash_flush(net, set, he))
- he = NULL;
+ if (he)
+ nft_set_elem_change_active(net, set, &he->ext);
rcu_read_unlock();
@@ -252,7 +249,9 @@ static bool nft_rhash_delete(const struct nft_set *set,
if (he == NULL)
return false;
- return rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params) == 0;
+ nft_set_elem_dead(&he->ext);
+
+ return true;
}
static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
@@ -278,8 +277,6 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
if (iter->count < iter->skip)
goto cont;
- if (nft_set_elem_expired(&he->ext))
- goto cont;
if (!nft_set_elem_active(&he->ext, iter->genmask))
goto cont;
@@ -314,25 +311,51 @@ static bool nft_rhash_expr_needs_gc_run(const struct nft_set *set,
static void nft_rhash_gc(struct work_struct *work)
{
+ struct nftables_pernet *nft_net;
struct nft_set *set;
struct nft_rhash_elem *he;
struct nft_rhash *priv;
- struct nft_set_gc_batch *gcb = NULL;
struct rhashtable_iter hti;
+ struct nft_trans_gc *gc;
+ struct net *net;
+ u32 gc_seq;
priv = container_of(work, struct nft_rhash, gc_work.work);
set = nft_set_container_of(priv);
+ net = read_pnet(&set->net);
+ nft_net = nft_pernet(net);
+ gc_seq = READ_ONCE(nft_net->gc_seq);
+
+ if (nft_set_gc_is_pending(set))
+ goto done;
+
+ gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
+ if (!gc)
+ goto done;
rhashtable_walk_enter(&priv->ht, &hti);
rhashtable_walk_start(&hti);
while ((he = rhashtable_walk_next(&hti))) {
if (IS_ERR(he)) {
- if (PTR_ERR(he) != -EAGAIN)
- break;
+ if (PTR_ERR(he) != -EAGAIN) {
+ nft_trans_gc_destroy(gc);
+ gc = NULL;
+ goto try_later;
+ }
continue;
}
+ /* Ruleset has been updated, try later. */
+ if (READ_ONCE(nft_net->gc_seq) != gc_seq) {
+ nft_trans_gc_destroy(gc);
+ gc = NULL;
+ goto try_later;
+ }
+
+ if (nft_set_elem_is_dead(&he->ext))
+ goto dead_elem;
+
if (nft_set_ext_exists(&he->ext, NFT_SET_EXT_EXPRESSIONS) &&
nft_rhash_expr_needs_gc_run(set, &he->ext))
goto needs_gc_run;
@@ -340,26 +363,26 @@ static void nft_rhash_gc(struct work_struct *work)
if (!nft_set_elem_expired(&he->ext))
continue;
needs_gc_run:
- if (nft_set_elem_mark_busy(&he->ext))
- continue;
+ nft_set_elem_dead(&he->ext);
+dead_elem:
+ gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+ if (!gc)
+ goto try_later;
- gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
- if (gcb == NULL)
- break;
- rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params);
- atomic_dec(&set->nelems);
- nft_set_gc_batch_add(gcb, he);
+ nft_trans_gc_elem_add(gc, he);
}
+
+ gc = nft_trans_gc_catchall(gc, gc_seq);
+
+try_later:
+ /* catchall list iteration requires rcu read side lock. */
rhashtable_walk_stop(&hti);
rhashtable_walk_exit(&hti);
- he = nft_set_catchall_gc(set);
- if (he) {
- gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
- if (gcb)
- nft_set_gc_batch_add(gcb, he);
- }
- nft_set_gc_batch_complete(gcb);
+ if (gc)
+ nft_trans_gc_queue_async_done(gc);
+
+done:
queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
nft_set_gc_interval(set));
}
@@ -394,7 +417,7 @@ static int nft_rhash_init(const struct nft_set *set,
return err;
INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rhash_gc);
- if (set->flags & NFT_SET_TIMEOUT)
+ if (set->flags & (NFT_SET_TIMEOUT | NFT_SET_EVAL))
nft_rhash_gc_init(set);
return 0;
@@ -422,7 +445,6 @@ static void nft_rhash_destroy(const struct nft_ctx *ctx,
};
cancel_delayed_work_sync(&priv->gc_work);
- rcu_barrier();
rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy,
(void *)&rhash_ctx);
}
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index db526cb7a485..6af9c9ed4b5c 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -566,8 +566,9 @@ next_match:
goto out;
if (last) {
- if (nft_set_elem_expired(&f->mt[b].e->ext) ||
- (genmask &&
+ if (nft_set_elem_expired(&f->mt[b].e->ext))
+ goto next_match;
+ if ((genmask &&
!nft_set_elem_active(&f->mt[b].e->ext, genmask)))
goto next_match;
@@ -602,7 +603,7 @@ static void *nft_pipapo_get(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem, unsigned int flags)
{
return pipapo_get(net, set, (const u8 *)elem->key.val.data,
- nft_genmask_cur(net));
+ nft_genmask_cur(net));
}
/**
@@ -901,12 +902,14 @@ static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f)
static int pipapo_insert(struct nft_pipapo_field *f, const uint8_t *k,
int mask_bits)
{
- int rule = f->rules++, group, ret, bit_offset = 0;
+ int rule = f->rules, group, ret, bit_offset = 0;
- ret = pipapo_resize(f, f->rules - 1, f->rules);
+ ret = pipapo_resize(f, f->rules, f->rules + 1);
if (ret)
return ret;
+ f->rules++;
+
for (group = 0; group < f->groups; group++) {
int i, v;
u8 mask;
@@ -1051,7 +1054,9 @@ static int pipapo_expand(struct nft_pipapo_field *f,
step++;
if (step >= len) {
if (!masks) {
- pipapo_insert(f, base, 0);
+ err = pipapo_insert(f, base, 0);
+ if (err < 0)
+ return err;
masks = 1;
}
goto out;
@@ -1234,6 +1239,9 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
else
ret = pipapo_expand(f, start, end, f->groups * f->bb);
+ if (ret < 0)
+ return ret;
+
if (f->bsize > bsize_max)
bsize_max = f->bsize;
@@ -1528,16 +1536,34 @@ static void pipapo_drop(struct nft_pipapo_match *m,
}
}
+static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set,
+ struct nft_pipapo_elem *e)
+
+{
+ struct nft_set_elem elem = {
+ .priv = e,
+ };
+
+ nft_setelem_data_deactivate(net, set, &elem);
+}
+
/**
* pipapo_gc() - Drop expired entries from set, destroy start and end elements
- * @set: nftables API set representation
+ * @_set: nftables API set representation
* @m: Matching data
*/
-static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m)
+static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m)
{
+ struct nft_set *set = (struct nft_set *) _set;
struct nft_pipapo *priv = nft_set_priv(set);
+ struct net *net = read_pnet(&set->net);
int rules_f0, first_rule = 0;
struct nft_pipapo_elem *e;
+ struct nft_trans_gc *gc;
+
+ gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL);
+ if (!gc)
+ return;
while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) {
union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
@@ -1561,13 +1587,20 @@ static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m)
f--;
i--;
e = f->mt[rulemap[i].to].e;
- if (nft_set_elem_expired(&e->ext) &&
- !nft_set_elem_mark_busy(&e->ext)) {
+
+ /* synchronous gc never fails, there is no need to set on
+ * NFT_SET_ELEM_DEAD_BIT.
+ */
+ if (nft_set_elem_expired(&e->ext)) {
priv->dirty = true;
- pipapo_drop(m, rulemap);
- rcu_barrier();
- nft_set_elem_destroy(set, e, true);
+ gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
+ if (!gc)
+ break;
+
+ nft_pipapo_gc_deactivate(net, set, e);
+ pipapo_drop(m, rulemap);
+ nft_trans_gc_elem_add(gc, e);
/* And check again current first rule, which is now the
* first we haven't checked.
@@ -1577,11 +1610,11 @@ static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m)
}
}
- e = nft_set_catchall_gc(set);
- if (e)
- nft_set_elem_destroy(set, e, true);
-
- priv->last_gc = jiffies;
+ gc = nft_trans_gc_catchall(gc, 0);
+ if (gc) {
+ nft_trans_gc_queue_sync_done(gc);
+ priv->last_gc = jiffies;
+ }
}
/**
@@ -1664,6 +1697,17 @@ static void nft_pipapo_commit(const struct nft_set *set)
priv->clone = new_clone;
}
+static bool nft_pipapo_transaction_mutex_held(const struct nft_set *set)
+{
+#ifdef CONFIG_PROVE_LOCKING
+ const struct net *net = read_pnet(&set->net);
+
+ return lockdep_is_held(&nft_pernet(net)->commit_mutex);
+#else
+ return true;
+#endif
+}
+
static void nft_pipapo_abort(const struct nft_set *set)
{
struct nft_pipapo *priv = nft_set_priv(set);
@@ -1672,7 +1716,7 @@ static void nft_pipapo_abort(const struct nft_set *set)
if (!priv->dirty)
return;
- m = rcu_dereference(priv->match);
+ m = rcu_dereference_protected(priv->match, nft_pipapo_transaction_mutex_held(set));
new_clone = pipapo_clone(m);
if (IS_ERR(new_clone))
@@ -1699,14 +1743,9 @@ static void nft_pipapo_activate(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem)
{
- struct nft_pipapo_elem *e;
-
- e = pipapo_get(net, set, (const u8 *)elem->key.val.data, 0);
- if (IS_ERR(e))
- return;
+ struct nft_pipapo_elem *e = elem->priv;
nft_set_elem_change_active(net, set, &e->ext);
- nft_set_elem_clear_busy(&e->ext);
}
/**
@@ -1918,10 +1957,6 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
data = (const u8 *)nft_set_ext_key(&e->ext);
- e = pipapo_get(net, set, data, 0);
- if (IS_ERR(e))
- return;
-
while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) {
union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
const u8 *match_start, *match_end;
@@ -1929,7 +1964,11 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
int i, start, rules_fx;
match_start = data;
- match_end = (const u8 *)nft_set_ext_key_end(&e->ext)->data;
+
+ if (nft_set_ext_exists(&e->ext, NFT_SET_EXT_KEY_END))
+ match_end = (const u8 *)nft_set_ext_key_end(&e->ext)->data;
+ else
+ match_end = data;
start = first_rule;
rules_fx = rules_f0;
@@ -2001,8 +2040,6 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
goto cont;
e = f->mt[r].e;
- if (nft_set_elem_expired(&e->ext))
- goto cont;
elem.priv = e;
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 5c05c9b990fb..c6435e709231 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -46,6 +46,12 @@ static int nft_rbtree_cmp(const struct nft_set *set,
set->klen);
}
+static bool nft_rbtree_elem_expired(const struct nft_rbtree_elem *rbe)
+{
+ return nft_set_elem_expired(&rbe->ext) ||
+ nft_set_elem_is_dead(&rbe->ext);
+}
+
static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext,
unsigned int seq)
@@ -80,7 +86,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
continue;
}
- if (nft_set_elem_expired(&rbe->ext))
+ if (nft_rbtree_elem_expired(rbe))
return false;
if (nft_rbtree_interval_end(rbe)) {
@@ -98,7 +104,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
nft_set_elem_active(&interval->ext, genmask) &&
- !nft_set_elem_expired(&interval->ext) &&
+ !nft_rbtree_elem_expired(interval) &&
nft_rbtree_interval_start(interval)) {
*ext = &interval->ext;
return true;
@@ -215,38 +221,70 @@ static void *nft_rbtree_get(const struct net *net, const struct nft_set *set,
return rbe;
}
+static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set,
+ struct nft_rbtree *priv,
+ struct nft_rbtree_elem *rbe)
+{
+ struct nft_set_elem elem = {
+ .priv = rbe,
+ };
+
+ nft_setelem_data_deactivate(net, set, &elem);
+ rb_erase(&rbe->node, &priv->root);
+}
+
static int nft_rbtree_gc_elem(const struct nft_set *__set,
struct nft_rbtree *priv,
- struct nft_rbtree_elem *rbe)
+ struct nft_rbtree_elem *rbe,
+ u8 genmask)
{
struct nft_set *set = (struct nft_set *)__set;
struct rb_node *prev = rb_prev(&rbe->node);
- struct nft_rbtree_elem *rbe_prev = NULL;
- struct nft_set_gc_batch *gcb;
+ struct net *net = read_pnet(&set->net);
+ struct nft_rbtree_elem *rbe_prev;
+ struct nft_trans_gc *gc;
- gcb = nft_set_gc_batch_check(set, NULL, GFP_ATOMIC);
- if (!gcb)
+ gc = nft_trans_gc_alloc(set, 0, GFP_ATOMIC);
+ if (!gc)
return -ENOMEM;
- /* search for expired end interval coming before this element. */
+ /* search for end interval coming before this element.
+ * end intervals don't carry a timeout extension, they
+ * are coupled with the interval start element.
+ */
while (prev) {
rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
- if (nft_rbtree_interval_end(rbe_prev))
+ if (nft_rbtree_interval_end(rbe_prev) &&
+ nft_set_elem_active(&rbe_prev->ext, genmask))
break;
prev = rb_prev(prev);
}
- if (rbe_prev) {
- rb_erase(&rbe_prev->node, &priv->root);
- atomic_dec(&set->nelems);
+ if (prev) {
+ rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
+ nft_rbtree_gc_remove(net, set, priv, rbe_prev);
+
+ /* There is always room in this trans gc for this element,
+ * memory allocation never actually happens, hence, the warning
+ * splat in such case. No need to set NFT_SET_ELEM_DEAD_BIT,
+ * this is synchronous gc which never fails.
+ */
+ gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
+ if (WARN_ON_ONCE(!gc))
+ return -ENOMEM;
+
+ nft_trans_gc_elem_add(gc, rbe_prev);
}
- rb_erase(&rbe->node, &priv->root);
- atomic_dec(&set->nelems);
+ nft_rbtree_gc_remove(net, set, priv, rbe);
+ gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
+ if (WARN_ON_ONCE(!gc))
+ return -ENOMEM;
- nft_set_gc_batch_add(gcb, rbe);
- nft_set_gc_batch_complete(gcb);
+ nft_trans_gc_elem_add(gc, rbe);
+
+ nft_trans_gc_queue_sync_done(gc);
return 0;
}
@@ -321,7 +359,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
/* perform garbage collection to avoid bogus overlap reports. */
if (nft_set_elem_expired(&rbe->ext)) {
- err = nft_rbtree_gc_elem(set, priv, rbe);
+ err = nft_rbtree_gc_elem(set, priv, rbe, genmask);
if (err < 0)
return err;
@@ -474,7 +512,6 @@ static void nft_rbtree_activate(const struct net *net,
struct nft_rbtree_elem *rbe = elem->priv;
nft_set_elem_change_active(net, set, &rbe->ext);
- nft_set_elem_clear_busy(&rbe->ext);
}
static bool nft_rbtree_flush(const struct net *net,
@@ -482,12 +519,9 @@ static bool nft_rbtree_flush(const struct net *net,
{
struct nft_rbtree_elem *rbe = priv;
- if (!nft_set_elem_mark_busy(&rbe->ext) ||
- !nft_is_active(net, &rbe->ext)) {
- nft_set_elem_change_active(net, set, &rbe->ext);
- return true;
- }
- return false;
+ nft_set_elem_change_active(net, set, &rbe->ext);
+
+ return true;
}
static void *nft_rbtree_deactivate(const struct net *net,
@@ -544,8 +578,6 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
if (iter->count < iter->skip)
goto cont;
- if (nft_set_elem_expired(&rbe->ext))
- goto cont;
if (!nft_set_elem_active(&rbe->ext, iter->genmask))
goto cont;
@@ -564,26 +596,43 @@ cont:
static void nft_rbtree_gc(struct work_struct *work)
{
- struct nft_rbtree_elem *rbe, *rbe_end = NULL, *rbe_prev = NULL;
- struct nft_set_gc_batch *gcb = NULL;
+ struct nft_rbtree_elem *rbe, *rbe_end = NULL;
+ struct nftables_pernet *nft_net;
struct nft_rbtree *priv;
+ struct nft_trans_gc *gc;
struct rb_node *node;
struct nft_set *set;
+ unsigned int gc_seq;
struct net *net;
- u8 genmask;
priv = container_of(work, struct nft_rbtree, gc_work.work);
set = nft_set_container_of(priv);
net = read_pnet(&set->net);
- genmask = nft_genmask_cur(net);
+ nft_net = nft_pernet(net);
+ gc_seq = READ_ONCE(nft_net->gc_seq);
+
+ if (nft_set_gc_is_pending(set))
+ goto done;
+
+ gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
+ if (!gc)
+ goto done;
write_lock_bh(&priv->lock);
write_seqcount_begin(&priv->count);
for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
+
+ /* Ruleset has been updated, try later. */
+ if (READ_ONCE(nft_net->gc_seq) != gc_seq) {
+ nft_trans_gc_destroy(gc);
+ gc = NULL;
+ goto try_later;
+ }
+
rbe = rb_entry(node, struct nft_rbtree_elem, node);
- if (!nft_set_elem_active(&rbe->ext, genmask))
- continue;
+ if (nft_set_elem_is_dead(&rbe->ext))
+ goto dead_elem;
/* elements are reversed in the rbtree for historical reasons,
* from highest to lowest value, that is why end element is
@@ -596,46 +645,36 @@ static void nft_rbtree_gc(struct work_struct *work)
if (!nft_set_elem_expired(&rbe->ext))
continue;
- if (nft_set_elem_mark_busy(&rbe->ext)) {
- rbe_end = NULL;
+ nft_set_elem_dead(&rbe->ext);
+
+ if (!rbe_end)
continue;
- }
- if (rbe_prev) {
- rb_erase(&rbe_prev->node, &priv->root);
- rbe_prev = NULL;
- }
- gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
- if (!gcb)
- break;
+ nft_set_elem_dead(&rbe_end->ext);
- atomic_dec(&set->nelems);
- nft_set_gc_batch_add(gcb, rbe);
- rbe_prev = rbe;
+ gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+ if (!gc)
+ goto try_later;
- if (rbe_end) {
- atomic_dec(&set->nelems);
- nft_set_gc_batch_add(gcb, rbe_end);
- rb_erase(&rbe_end->node, &priv->root);
- rbe_end = NULL;
- }
- node = rb_next(node);
- if (!node)
- break;
+ nft_trans_gc_elem_add(gc, rbe_end);
+ rbe_end = NULL;
+dead_elem:
+ gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+ if (!gc)
+ goto try_later;
+
+ nft_trans_gc_elem_add(gc, rbe);
}
- if (rbe_prev)
- rb_erase(&rbe_prev->node, &priv->root);
+
+ gc = nft_trans_gc_catchall(gc, gc_seq);
+
+try_later:
write_seqcount_end(&priv->count);
write_unlock_bh(&priv->lock);
- rbe = nft_set_catchall_gc(set);
- if (rbe) {
- gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
- if (gcb)
- nft_set_gc_batch_add(gcb, rbe);
- }
- nft_set_gc_batch_complete(gcb);
-
+ if (gc)
+ nft_trans_gc_queue_async_done(gc);
+done:
queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
nft_set_gc_interval(set));
}
diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
index 84def74698b7..9ed85be79452 100644
--- a/net/netfilter/nft_socket.c
+++ b/net/netfilter/nft_socket.c
@@ -107,7 +107,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
break;
case NFT_SOCKET_MARK:
if (sk_fullsock(sk)) {
- *dest = sk->sk_mark;
+ *dest = READ_ONCE(sk->sk_mark);
} else {
regs->verdict.code = NFT_BREAK;
return;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 470282cf3fae..21624d68314f 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -768,7 +768,7 @@ void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
m->u.user.match_size = msize;
strscpy(name, match->name, sizeof(name));
module_put(match->me);
- strncpy(m->u.user.name, name, sizeof(m->u.user.name));
+ strscpy_pad(m->u.user.name, name, sizeof(m->u.user.name));
*size += off;
*dstptr += msize;
@@ -1148,7 +1148,7 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
t->u.user.target_size = tsize;
strscpy(name, target->name, sizeof(name));
module_put(target->me);
- strncpy(t->u.user.name, name, sizeof(t->u.user.name));
+ strscpy_pad(t->u.user.name, name, sizeof(t->u.user.name));
*size += off;
*dstptr += tsize;
@@ -2014,4 +2014,3 @@ static void __exit xt_fini(void)
module_init(xt_init);
module_exit(xt_fini);
-
diff --git a/net/netfilter/xt_repldata.h b/net/netfilter/xt_repldata.h
index 68ccbe50bb1e..5d1fb7018dba 100644
--- a/net/netfilter/xt_repldata.h
+++ b/net/netfilter/xt_repldata.h
@@ -29,7 +29,7 @@
if (tbl == NULL) \
return NULL; \
term = (struct type##_error *)&(((char *)tbl)[term_offset]); \
- strncpy(tbl->repl.name, info->name, sizeof(tbl->repl.name)); \
+ strscpy_pad(tbl->repl.name, info->name, sizeof(tbl->repl.name)); \
*term = (struct type##_error)typ2##_ERROR_INIT; \
tbl->repl.valid_hooks = hook_mask; \
tbl->repl.num_entries = nhooks + 1; \
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 7013f55f05d1..76e01f292aaf 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -77,7 +77,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
transparent && sk_fullsock(sk))
- pskb->mark = sk->sk_mark;
+ pskb->mark = READ_ONCE(sk->sk_mark);
if (sk != skb->sk)
sock_gen_put(sk);
@@ -138,7 +138,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
transparent && sk_fullsock(sk))
- pskb->mark = sk->sk_mark;
+ pskb->mark = READ_ONCE(sk->sk_mark);
if (sk != skb->sk)
sock_gen_put(sk);