summaryrefslogtreecommitdiff
path: root/drivers/net/wireguard/allowedips.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-06-04 18:25:39 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-06-04 18:25:39 -0700
commit9d32fa5d74b148b1cba262c0c24b9a27a910909b (patch)
treef45f2db51b738797cb99fd1f3ae38fd74305a566 /drivers/net/wireguard/allowedips.c
parent2cb26c15a247a2b2bc9de653773cf21d969bf570 (diff)
parent3822d0670c9d4342794d73e0d0e615322b40438e (diff)
Merge tag 'net-5.13-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Jakub Kicinski: "Networking fixes, including fixes from bpf, wireless, netfilter and wireguard trees. The bpf vs lockdown+audit fix is the most notable. Things haven't slowed down just yet, both in terms of regressions in current release and largish fixes for older code, but we usually see a slowdown only after -rc5. Current release - regressions: - virtio-net: fix page faults and crashes when XDP is enabled - mlx5e: fix HW timestamping with CQE compression, and make sure they are only allowed to coexist with capable devices - stmmac: - fix kernel panic due to NULL pointer dereference of mdio_bus_data - fix double clk unprepare when no PHY device is connected Current release - new code bugs: - mt76: a few fixes for the recent MT7921 devices and runtime power management Previous releases - regressions: - ice: - track AF_XDP ZC enabled queues in bitmap to fix copy mode Tx - fix allowing VF to request more/less queues via virtchnl - correct supported and advertised autoneg by using PHY capabilities - allow all LLDP packets from PF to Tx - kbuild: quote OBJCOPY var to avoid a pahole call break the build Previous releases - always broken: - bpf, lockdown, audit: fix buggy SELinux lockdown permission checks - mt76: address the recent FragAttack vulnerabilities not covered by generic fixes - ipv6: fix KASAN: slab-out-of-bounds Read in fib6_nh_flush_exceptions - Bluetooth: - fix the erroneous flush_work() order, to avoid double free - use correct lock to prevent UAF of hdev object - nfc: fix NULL ptr dereference in llcp_sock_getname() after failed connect - ieee802154: multiple fixes to error checking and return values - igb: fix XDP with PTP enabled - intel: add correct exception tracing for XDP - tls: fix use-after-free when TLS offload device goes down and back up - ipvs: ignore IP_VS_SVC_F_HASHED flag when adding service - netfilter: nft_ct: skip expectations for confirmed conntrack - mptcp: fix falling back to TCP in presence of out of order packets early in connection lifetime - wireguard: switch from O(n) to a O(1) algorithm for maintaining peers, fixing stalls and a large memory leak in the process Misc: - devlink: correct VIRTUAL port to not have phys_port attributes - Bluetooth: fix VIRTIO_ID_BT assigned number - net: return the correct errno code ENOBUF -> ENOMEM - wireguard: - peer: allocate in kmem_cache saving 25% on peer memory - do not use -O3" * tag 'net-5.13-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (91 commits) cxgb4: avoid link re-train during TC-MQPRIO configuration sch_htb: fix refcount leak in htb_parent_to_leaf_offload wireguard: allowedips: free empty intermediate nodes when removing single node wireguard: allowedips: allocate nodes in kmem_cache wireguard: allowedips: remove nodes in O(1) wireguard: allowedips: initialize list head in selftest wireguard: peer: allocate in kmem_cache wireguard: use synchronize_net rather than synchronize_rcu wireguard: do not use -O3 wireguard: selftests: make sure rp_filter is disabled on vethc wireguard: selftests: remove old conntrack kconfig value virtchnl: Add missing padding to virtchnl_proto_hdrs ice: Allow all LLDP packets from PF to Tx ice: report supported and advertised autoneg using PHY capabilities ice: handle the VF VSI rebuild failure ice: Fix VFR issues for AVF drivers that expect ATQLEN cleared ice: Fix allowing VF to request more/less queues via virtchnl virtio-net: fix for skb_over_panic inside big mode ipv6: Fix KASAN: slab-out-of-bounds Read in fib6_nh_flush_exceptions fib: Return the correct errno code ...
Diffstat (limited to 'drivers/net/wireguard/allowedips.c')
-rw-r--r--drivers/net/wireguard/allowedips.c189
1 files changed, 99 insertions, 90 deletions
diff --git a/drivers/net/wireguard/allowedips.c b/drivers/net/wireguard/allowedips.c
index 3725e9cd85f4..b7197e80f226 100644
--- a/drivers/net/wireguard/allowedips.c
+++ b/drivers/net/wireguard/allowedips.c
@@ -6,6 +6,8 @@
#include "allowedips.h"
#include "peer.h"
+static struct kmem_cache *node_cache;
+
static void swap_endian(u8 *dst, const u8 *src, u8 bits)
{
if (bits == 32) {
@@ -28,8 +30,11 @@ static void copy_and_assign_cidr(struct allowedips_node *node, const u8 *src,
node->bitlen = bits;
memcpy(node->bits, src, bits / 8U);
}
-#define CHOOSE_NODE(parent, key) \
- parent->bit[(key[parent->bit_at_a] >> parent->bit_at_b) & 1]
+
+static inline u8 choose(struct allowedips_node *node, const u8 *key)
+{
+ return (key[node->bit_at_a] >> node->bit_at_b) & 1;
+}
static void push_rcu(struct allowedips_node **stack,
struct allowedips_node __rcu *p, unsigned int *len)
@@ -40,6 +45,11 @@ static void push_rcu(struct allowedips_node **stack,
}
}
+static void node_free_rcu(struct rcu_head *rcu)
+{
+ kmem_cache_free(node_cache, container_of(rcu, struct allowedips_node, rcu));
+}
+
static void root_free_rcu(struct rcu_head *rcu)
{
struct allowedips_node *node, *stack[128] = {
@@ -49,7 +59,7 @@ static void root_free_rcu(struct rcu_head *rcu)
while (len > 0 && (node = stack[--len])) {
push_rcu(stack, node->bit[0], &len);
push_rcu(stack, node->bit[1], &len);
- kfree(node);
+ kmem_cache_free(node_cache, node);
}
}
@@ -66,60 +76,6 @@ static void root_remove_peer_lists(struct allowedips_node *root)
}
}
-static void walk_remove_by_peer(struct allowedips_node __rcu **top,
- struct wg_peer *peer, struct mutex *lock)
-{
-#define REF(p) rcu_access_pointer(p)
-#define DEREF(p) rcu_dereference_protected(*(p), lockdep_is_held(lock))
-#define PUSH(p) ({ \
- WARN_ON(IS_ENABLED(DEBUG) && len >= 128); \
- stack[len++] = p; \
- })
-
- struct allowedips_node __rcu **stack[128], **nptr;
- struct allowedips_node *node, *prev;
- unsigned int len;
-
- if (unlikely(!peer || !REF(*top)))
- return;
-
- for (prev = NULL, len = 0, PUSH(top); len > 0; prev = node) {
- nptr = stack[len - 1];
- node = DEREF(nptr);
- if (!node) {
- --len;
- continue;
- }
- if (!prev || REF(prev->bit[0]) == node ||
- REF(prev->bit[1]) == node) {
- if (REF(node->bit[0]))
- PUSH(&node->bit[0]);
- else if (REF(node->bit[1]))
- PUSH(&node->bit[1]);
- } else if (REF(node->bit[0]) == prev) {
- if (REF(node->bit[1]))
- PUSH(&node->bit[1]);
- } else {
- if (rcu_dereference_protected(node->peer,
- lockdep_is_held(lock)) == peer) {
- RCU_INIT_POINTER(node->peer, NULL);
- list_del_init(&node->peer_list);
- if (!node->bit[0] || !node->bit[1]) {
- rcu_assign_pointer(*nptr, DEREF(
- &node->bit[!REF(node->bit[0])]));
- kfree_rcu(node, rcu);
- node = DEREF(nptr);
- }
- }
- --len;
- }
- }
-
-#undef REF
-#undef DEREF
-#undef PUSH
-}
-
static unsigned int fls128(u64 a, u64 b)
{
return a ? fls64(a) + 64U : fls64(b);
@@ -159,7 +115,7 @@ static struct allowedips_node *find_node(struct allowedips_node *trie, u8 bits,
found = node;
if (node->cidr == bits)
break;
- node = rcu_dereference_bh(CHOOSE_NODE(node, key));
+ node = rcu_dereference_bh(node->bit[choose(node, key)]);
}
return found;
}
@@ -191,8 +147,7 @@ static bool node_placement(struct allowedips_node __rcu *trie, const u8 *key,
u8 cidr, u8 bits, struct allowedips_node **rnode,
struct mutex *lock)
{
- struct allowedips_node *node = rcu_dereference_protected(trie,
- lockdep_is_held(lock));
+ struct allowedips_node *node = rcu_dereference_protected(trie, lockdep_is_held(lock));
struct allowedips_node *parent = NULL;
bool exact = false;
@@ -202,13 +157,24 @@ static bool node_placement(struct allowedips_node __rcu *trie, const u8 *key,
exact = true;
break;
}
- node = rcu_dereference_protected(CHOOSE_NODE(parent, key),
- lockdep_is_held(lock));
+ node = rcu_dereference_protected(parent->bit[choose(parent, key)], lockdep_is_held(lock));
}
*rnode = parent;
return exact;
}
+static inline void connect_node(struct allowedips_node **parent, u8 bit, struct allowedips_node *node)
+{
+ node->parent_bit_packed = (unsigned long)parent | bit;
+ rcu_assign_pointer(*parent, node);
+}
+
+static inline void choose_and_connect_node(struct allowedips_node *parent, struct allowedips_node *node)
+{
+ u8 bit = choose(parent, node->bits);
+ connect_node(&parent->bit[bit], bit, node);
+}
+
static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
u8 cidr, struct wg_peer *peer, struct mutex *lock)
{
@@ -218,13 +184,13 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
return -EINVAL;
if (!rcu_access_pointer(*trie)) {
- node = kzalloc(sizeof(*node), GFP_KERNEL);
+ node = kmem_cache_zalloc(node_cache, GFP_KERNEL);
if (unlikely(!node))
return -ENOMEM;
RCU_INIT_POINTER(node->peer, peer);
list_add_tail(&node->peer_list, &peer->allowedips_list);
copy_and_assign_cidr(node, key, cidr, bits);
- rcu_assign_pointer(*trie, node);
+ connect_node(trie, 2, node);
return 0;
}
if (node_placement(*trie, key, cidr, bits, &node, lock)) {
@@ -233,7 +199,7 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
return 0;
}
- newnode = kzalloc(sizeof(*newnode), GFP_KERNEL);
+ newnode = kmem_cache_zalloc(node_cache, GFP_KERNEL);
if (unlikely(!newnode))
return -ENOMEM;
RCU_INIT_POINTER(newnode->peer, peer);
@@ -243,10 +209,10 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
if (!node) {
down = rcu_dereference_protected(*trie, lockdep_is_held(lock));
} else {
- down = rcu_dereference_protected(CHOOSE_NODE(node, key),
- lockdep_is_held(lock));
+ const u8 bit = choose(node, key);
+ down = rcu_dereference_protected(node->bit[bit], lockdep_is_held(lock));
if (!down) {
- rcu_assign_pointer(CHOOSE_NODE(node, key), newnode);
+ connect_node(&node->bit[bit], bit, newnode);
return 0;
}
}
@@ -254,30 +220,29 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
parent = node;
if (newnode->cidr == cidr) {
- rcu_assign_pointer(CHOOSE_NODE(newnode, down->bits), down);
+ choose_and_connect_node(newnode, down);
if (!parent)
- rcu_assign_pointer(*trie, newnode);
+ connect_node(trie, 2, newnode);
else
- rcu_assign_pointer(CHOOSE_NODE(parent, newnode->bits),
- newnode);
- } else {
- node = kzalloc(sizeof(*node), GFP_KERNEL);
- if (unlikely(!node)) {
- list_del(&newnode->peer_list);
- kfree(newnode);
- return -ENOMEM;
- }
- INIT_LIST_HEAD(&node->peer_list);
- copy_and_assign_cidr(node, newnode->bits, cidr, bits);
+ choose_and_connect_node(parent, newnode);
+ return 0;
+ }
- rcu_assign_pointer(CHOOSE_NODE(node, down->bits), down);
- rcu_assign_pointer(CHOOSE_NODE(node, newnode->bits), newnode);
- if (!parent)
- rcu_assign_pointer(*trie, node);
- else
- rcu_assign_pointer(CHOOSE_NODE(parent, node->bits),
- node);
+ node = kmem_cache_zalloc(node_cache, GFP_KERNEL);
+ if (unlikely(!node)) {
+ list_del(&newnode->peer_list);
+ kmem_cache_free(node_cache, newnode);
+ return -ENOMEM;
}
+ INIT_LIST_HEAD(&node->peer_list);
+ copy_and_assign_cidr(node, newnode->bits, cidr, bits);
+
+ choose_and_connect_node(node, down);
+ choose_and_connect_node(node, newnode);
+ if (!parent)
+ connect_node(trie, 2, node);
+ else
+ choose_and_connect_node(parent, node);
return 0;
}
@@ -335,9 +300,41 @@ int wg_allowedips_insert_v6(struct allowedips *table, const struct in6_addr *ip,
void wg_allowedips_remove_by_peer(struct allowedips *table,
struct wg_peer *peer, struct mutex *lock)
{
+ struct allowedips_node *node, *child, **parent_bit, *parent, *tmp;
+ bool free_parent;
+
+ if (list_empty(&peer->allowedips_list))
+ return;
++table->seq;
- walk_remove_by_peer(&table->root4, peer, lock);
- walk_remove_by_peer(&table->root6, peer, lock);
+ list_for_each_entry_safe(node, tmp, &peer->allowedips_list, peer_list) {
+ list_del_init(&node->peer_list);
+ RCU_INIT_POINTER(node->peer, NULL);
+ if (node->bit[0] && node->bit[1])
+ continue;
+ child = rcu_dereference_protected(node->bit[!rcu_access_pointer(node->bit[0])],
+ lockdep_is_held(lock));
+ if (child)
+ child->parent_bit_packed = node->parent_bit_packed;
+ parent_bit = (struct allowedips_node **)(node->parent_bit_packed & ~3UL);
+ *parent_bit = child;
+ parent = (void *)parent_bit -
+ offsetof(struct allowedips_node, bit[node->parent_bit_packed & 1]);
+ free_parent = !rcu_access_pointer(node->bit[0]) &&
+ !rcu_access_pointer(node->bit[1]) &&
+ (node->parent_bit_packed & 3) <= 1 &&
+ !rcu_access_pointer(parent->peer);
+ if (free_parent)
+ child = rcu_dereference_protected(
+ parent->bit[!(node->parent_bit_packed & 1)],
+ lockdep_is_held(lock));
+ call_rcu(&node->rcu, node_free_rcu);
+ if (!free_parent)
+ continue;
+ if (child)
+ child->parent_bit_packed = parent->parent_bit_packed;
+ *(struct allowedips_node **)(parent->parent_bit_packed & ~3UL) = child;
+ call_rcu(&parent->rcu, node_free_rcu);
+ }
}
int wg_allowedips_read_node(struct allowedips_node *node, u8 ip[16], u8 *cidr)
@@ -374,4 +371,16 @@ struct wg_peer *wg_allowedips_lookup_src(struct allowedips *table,
return NULL;
}
+int __init wg_allowedips_slab_init(void)
+{
+ node_cache = KMEM_CACHE(allowedips_node, 0);
+ return node_cache ? 0 : -ENOMEM;
+}
+
+void wg_allowedips_slab_uninit(void)
+{
+ rcu_barrier();
+ kmem_cache_destroy(node_cache);
+}
+
#include "selftest/allowedips.c"