summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/6lowpan/core.c1
-rw-r--r--net/batman-adv/multicast.c2
-rw-r--r--net/bpf/bpf_dummy_struct_ops.c6
-rw-r--r--net/bpf/test_run.c18
-rw-r--r--net/caif/caif_dev.c2
-rw-r--r--net/can/gw.c9
-rw-r--r--net/core/dev.c33
-rw-r--r--net/core/drop_monitor.c41
-rw-r--r--net/core/filter.c17
-rw-r--r--net/core/gro.c16
-rw-r--r--net/core/link_watch.c6
-rw-r--r--net/core/net_namespace.c17
-rw-r--r--net/hsr/hsr_debugfs.c40
-rw-r--r--net/hsr/hsr_device.c10
-rw-r--r--net/hsr/hsr_forward.c7
-rw-r--r--net/hsr/hsr_framereg.c200
-rw-r--r--net/hsr/hsr_framereg.h8
-rw-r--r--net/hsr/hsr_main.h9
-rw-r--r--net/hsr/hsr_netlink.c4
-rw-r--r--net/ieee802154/6lowpan/core.c1
-rw-r--r--net/ipv4/bpf_tcp_ca.c6
-rw-r--r--net/ipv4/fib_frontend.c30
-rw-r--r--net/ipv4/fib_lookup.h3
-rw-r--r--net/ipv4/fib_rules.c18
-rw-r--r--net/ipv4/fib_semantics.c14
-rw-r--r--net/ipv4/fib_trie.c58
-rw-r--r--net/ipv4/inet_hashtables.c53
-rw-r--r--net/ipv4/ip_input.c31
-rw-r--r--net/ipv4/ipmr.c20
-rw-r--r--net/ipv4/nexthop.c12
-rw-r--r--net/ipv4/route.c3
-rw-r--r--net/ipv4/udp.c22
-rw-r--r--net/ipv6/addrconf.c145
-rw-r--r--net/ipv6/fib6_rules.c30
-rw-r--r--net/ipv6/inet6_hashtables.c5
-rw-r--r--net/ipv6/ioam6_iptunnel.c59
-rw-r--r--net/ipv6/ip6_input.c2
-rw-r--r--net/ipv6/ip6_tunnel.c5
-rw-r--r--net/ipv6/ip6mr.c35
-rw-r--r--net/mctp/af_mctp.c189
-rw-r--r--net/mctp/device.c1
-rw-r--r--net/mctp/route.c124
-rw-r--r--net/mctp/test/route-test.c157
-rw-r--r--net/mptcp/pm_netlink.c13
-rw-r--r--net/netfilter/core.c3
-rw-r--r--net/openvswitch/datapath.c18
-rw-r--r--net/openvswitch/datapath.h2
-rw-r--r--net/openvswitch/flow.c3
-rw-r--r--net/sched/cls_api.c45
-rw-r--r--net/tls/tls_sw.c3
-rw-r--r--net/xdp/xsk.c13
-rw-r--r--net/xdp/xsk_buff_pool.c7
-rw-r--r--net/xdp/xsk_queue.h19
53 files changed, 1115 insertions, 480 deletions
diff --git a/net/6lowpan/core.c b/net/6lowpan/core.c
index a068757eabaf..7b3341cef926 100644
--- a/net/6lowpan/core.c
+++ b/net/6lowpan/core.c
@@ -5,6 +5,7 @@
* (C) 2015 Pengutronix, Alexander Aring <aar@pengutronix.de>
*/
+#include <linux/if_arp.h>
#include <linux/module.h>
#include <net/6lowpan.h>
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index f4004cf0ff6f..9f311fddfaf9 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -134,7 +134,7 @@ static u8 batadv_mcast_mla_rtr_flags_softif_get_ipv6(struct net_device *dev)
{
struct inet6_dev *in6_dev = __in6_dev_get(dev);
- if (in6_dev && in6_dev->cnf.mc_forwarding)
+ if (in6_dev && atomic_read(&in6_dev->cnf.mc_forwarding))
return BATADV_NO_FLAGS;
else
return BATADV_MCAST_WANT_NO_RTR6;
diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c
index fbc896323bec..d0e54e30658a 100644
--- a/net/bpf/bpf_dummy_struct_ops.c
+++ b/net/bpf/bpf_dummy_struct_ops.c
@@ -145,7 +145,8 @@ static int bpf_dummy_ops_btf_struct_access(struct bpf_verifier_log *log,
const struct btf *btf,
const struct btf_type *t, int off,
int size, enum bpf_access_type atype,
- u32 *next_btf_id)
+ u32 *next_btf_id,
+ enum bpf_type_flag *flag)
{
const struct btf_type *state;
s32 type_id;
@@ -162,7 +163,8 @@ static int bpf_dummy_ops_btf_struct_access(struct bpf_verifier_log *log,
return -EACCES;
}
- err = btf_struct_access(log, btf, t, off, size, atype, next_btf_id);
+ err = btf_struct_access(log, btf, t, off, size, atype, next_btf_id,
+ flag);
if (err < 0)
return err;
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 65b52b4bd6e1..f08034500813 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -154,7 +154,8 @@ static int bpf_test_finish(const union bpf_attr *kattr,
goto out;
if (sinfo) {
- int i, offset = len, data_len;
+ int i, offset = len;
+ u32 data_len;
for (i = 0; i < sinfo->nr_frags; i++) {
skb_frag_t *frag = &sinfo->frags[i];
@@ -164,7 +165,7 @@ static int bpf_test_finish(const union bpf_attr *kattr,
break;
}
- data_len = min_t(int, copy_size - offset,
+ data_len = min_t(u32, copy_size - offset,
skb_frag_size(frag));
if (copy_to_user(data_out + offset,
@@ -960,7 +961,12 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
while (size < kattr->test.data_size_in) {
struct page *page;
skb_frag_t *frag;
- int data_len;
+ u32 data_len;
+
+ if (sinfo->nr_frags == MAX_SKB_FRAGS) {
+ ret = -ENOMEM;
+ goto out;
+ }
page = alloc_page(GFP_KERNEL);
if (!page) {
@@ -971,7 +977,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
frag = &sinfo->frags[sinfo->nr_frags++];
__skb_frag_set_page(frag, page);
- data_len = min_t(int, kattr->test.data_size_in - size,
+ data_len = min_t(u32, kattr->test.data_size_in - size,
PAGE_SIZE);
skb_frag_size_set(frag, data_len);
@@ -1141,7 +1147,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat
if (!range_is_zero(user_ctx, offsetofend(typeof(*user_ctx), local_port), sizeof(*user_ctx)))
goto out;
- if (user_ctx->local_port > U16_MAX || user_ctx->remote_port > U16_MAX) {
+ if (user_ctx->local_port > U16_MAX) {
ret = -ERANGE;
goto out;
}
@@ -1149,7 +1155,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat
ctx.family = (u16)user_ctx->family;
ctx.protocol = (u16)user_ctx->protocol;
ctx.dport = (u16)user_ctx->local_port;
- ctx.sport = (__force __be16)user_ctx->remote_port;
+ ctx.sport = user_ctx->remote_port;
switch (ctx.family) {
case AF_INET:
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 440139706130..52dd0b6835bc 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -268,7 +268,7 @@ static int receive(struct sk_buff *skb, struct net_device *dev,
err = caifd->layer.up->receive(caifd->layer.up, pkt);
- /* For -EILSEQ the packet is not freed so so it now */
+ /* For -EILSEQ the packet is not freed so free it now */
if (err == -EILSEQ)
cfpkt_destroy(pkt);
diff --git a/net/can/gw.c b/net/can/gw.c
index d8861e862f15..24221352e059 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -1239,16 +1239,19 @@ static int __net_init cangw_pernet_init(struct net *net)
return 0;
}
-static void __net_exit cangw_pernet_exit(struct net *net)
+static void __net_exit cangw_pernet_exit_batch(struct list_head *net_list)
{
+ struct net *net;
+
rtnl_lock();
- cgw_remove_all_jobs(net);
+ list_for_each_entry(net, net_list, exit_list)
+ cgw_remove_all_jobs(net);
rtnl_unlock();
}
static struct pernet_operations cangw_pernet_ops = {
.init = cangw_pernet_init,
- .exit = cangw_pernet_exit,
+ .exit_batch = cangw_pernet_exit_batch,
};
static __init int cgw_module_init(void)
diff --git a/net/core/dev.c b/net/core/dev.c
index f79744d99413..f5ef51601081 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -9683,8 +9683,10 @@ int register_netdevice(struct net_device *dev)
linkwatch_init_dev(dev);
dev_init_scheduler(dev);
- dev_hold(dev);
+
+ dev_hold_track(dev, &dev->dev_registered_tracker, GFP_KERNEL);
list_netdevice(dev);
+
add_device_randomness(dev->dev_addr, dev->addr_len);
/* If the device has permanent device address, driver should
@@ -10172,7 +10174,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev->pcpu_refcnt = alloc_percpu(int);
if (!dev->pcpu_refcnt)
goto free_dev;
- dev_hold(dev);
+ __dev_hold(dev);
#else
refcount_set(&dev->dev_refcnt, 1);
#endif
@@ -10449,7 +10451,7 @@ void unregister_netdevice_many(struct list_head *head)
synchronize_net();
list_for_each_entry(dev, head, unreg_list) {
- dev_put(dev);
+ dev_put_track(dev, &dev->dev_registered_tracker);
net_set_todo(dev);
}
@@ -10732,8 +10734,7 @@ static int __net_init netdev_init(struct net *net)
BUILD_BUG_ON(GRO_HASH_BUCKETS >
8 * sizeof_field(struct napi_struct, gro_bitmask));
- if (net != &init_net)
- INIT_LIST_HEAD(&net->dev_base_head);
+ INIT_LIST_HEAD(&net->dev_base_head);
net->dev_name_head = netdev_create_hash();
if (net->dev_name_head == NULL)
@@ -10849,14 +10850,14 @@ static struct pernet_operations __net_initdata netdev_net_ops = {
.exit = netdev_exit,
};
-static void __net_exit default_device_exit(struct net *net)
+static void __net_exit default_device_exit_net(struct net *net)
{
struct net_device *dev, *aux;
/*
* Push all migratable network devices back to the
* initial network namespace
*/
- rtnl_lock();
+ ASSERT_RTNL();
for_each_netdev_safe(net, dev, aux) {
int err;
char fb_name[IFNAMSIZ];
@@ -10880,22 +10881,22 @@ static void __net_exit default_device_exit(struct net *net)
BUG();
}
}
- rtnl_unlock();
}
static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)
{
- /* Return with the rtnl_lock held when there are no network
+ /* Return (with the rtnl_lock held) when there are no network
* devices unregistering in any network namespace in net_list.
*/
- struct net *net;
- bool unregistering;
DEFINE_WAIT_FUNC(wait, woken_wake_function);
+ bool unregistering;
+ struct net *net;
+ ASSERT_RTNL();
add_wait_queue(&netdev_unregistering_wq, &wait);
for (;;) {
unregistering = false;
- rtnl_lock();
+
list_for_each_entry(net, net_list, exit_list) {
if (net->dev_unreg_count > 0) {
unregistering = true;
@@ -10907,6 +10908,7 @@ static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)
__rtnl_unlock();
wait_woken(&wait, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
+ rtnl_lock();
}
remove_wait_queue(&netdev_unregistering_wq, &wait);
}
@@ -10922,6 +10924,11 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
struct net *net;
LIST_HEAD(dev_kill_list);
+ rtnl_lock();
+ list_for_each_entry(net, net_list, exit_list) {
+ default_device_exit_net(net);
+ cond_resched();
+ }
/* To prevent network device cleanup code from dereferencing
* loopback devices or network devices that have been freed
* wait here for all pending unregistrations to complete,
@@ -10934,6 +10941,7 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
* default_device_exit_batch.
*/
rtnl_lock_unregistering(net_list);
+
list_for_each_entry(net, net_list, exit_list) {
for_each_netdev_reverse(net, dev) {
if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink)
@@ -10947,7 +10955,6 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
}
static struct pernet_operations __net_initdata default_device_ops = {
- .exit = default_device_exit,
.exit_batch = default_device_exit_batch,
};
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 7b288a121a41..4641126b8a20 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -48,6 +48,19 @@
static int trace_state = TRACE_OFF;
static bool monitor_hw;
+#undef EM
+#undef EMe
+
+#define EM(a, b) [a] = #b,
+#define EMe(a, b) [a] = #b
+
+/* drop_reasons is used to translate 'enum skb_drop_reason' to string,
+ * which is reported to user space.
+ */
+static const char * const drop_reasons[] = {
+ TRACE_SKB_DROP_REASON
+};
+
/* net_dm_mutex
*
* An overall lock guarding every operation coming from userspace.
@@ -126,6 +139,7 @@ struct net_dm_skb_cb {
struct devlink_trap_metadata *hw_metadata;
void *pc;
};
+ enum skb_drop_reason reason;
};
#define NET_DM_SKB_CB(__skb) ((struct net_dm_skb_cb *)&((__skb)->cb[0]))
@@ -498,6 +512,7 @@ static void net_dm_packet_trace_kfree_skb_hit(void *ignore,
{
ktime_t tstamp = ktime_get_real();
struct per_cpu_dm_data *data;
+ struct net_dm_skb_cb *cb;
struct sk_buff *nskb;
unsigned long flags;
@@ -508,7 +523,11 @@ static void net_dm_packet_trace_kfree_skb_hit(void *ignore,
if (!nskb)
return;
- NET_DM_SKB_CB(nskb)->pc = location;
+ if ((unsigned int)reason >= SKB_DROP_REASON_MAX)
+ reason = SKB_DROP_REASON_NOT_SPECIFIED;
+ cb = NET_DM_SKB_CB(nskb);
+ cb->reason = reason;
+ cb->pc = location;
/* Override the timestamp because we care about the time when the
* packet was dropped.
*/
@@ -553,7 +572,8 @@ static size_t net_dm_in_port_size(void)
#define NET_DM_MAX_SYMBOL_LEN 40
-static size_t net_dm_packet_report_size(size_t payload_len)
+static size_t net_dm_packet_report_size(size_t payload_len,
+ enum skb_drop_reason reason)
{
size_t size;
@@ -574,6 +594,8 @@ static size_t net_dm_packet_report_size(size_t payload_len)
nla_total_size(sizeof(u32)) +
/* NET_DM_ATTR_PROTO */
nla_total_size(sizeof(u16)) +
+ /* NET_DM_ATTR_REASON */
+ nla_total_size(strlen(drop_reasons[reason]) + 1) +
/* NET_DM_ATTR_PAYLOAD */
nla_total_size(payload_len);
}
@@ -606,7 +628,7 @@ nla_put_failure:
static int net_dm_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb,
size_t payload_len)
{
- u64 pc = (u64)(uintptr_t) NET_DM_SKB_CB(skb)->pc;
+ struct net_dm_skb_cb *cb = NET_DM_SKB_CB(skb);
char buf[NET_DM_MAX_SYMBOL_LEN];
struct nlattr *attr;
void *hdr;
@@ -620,10 +642,15 @@ static int net_dm_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb,
if (nla_put_u16(msg, NET_DM_ATTR_ORIGIN, NET_DM_ORIGIN_SW))
goto nla_put_failure;
- if (nla_put_u64_64bit(msg, NET_DM_ATTR_PC, pc, NET_DM_ATTR_PAD))
+ if (nla_put_u64_64bit(msg, NET_DM_ATTR_PC, (u64)(uintptr_t)cb->pc,
+ NET_DM_ATTR_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_string(msg, NET_DM_ATTR_REASON,
+ drop_reasons[cb->reason]))
goto nla_put_failure;
- snprintf(buf, sizeof(buf), "%pS", NET_DM_SKB_CB(skb)->pc);
+ snprintf(buf, sizeof(buf), "%pS", cb->pc);
if (nla_put_string(msg, NET_DM_ATTR_SYMBOL, buf))
goto nla_put_failure;
@@ -679,7 +706,9 @@ static void net_dm_packet_report(struct sk_buff *skb)
if (net_dm_trunc_len)
payload_len = min_t(size_t, net_dm_trunc_len, payload_len);
- msg = nlmsg_new(net_dm_packet_report_size(payload_len), GFP_KERNEL);
+ msg = nlmsg_new(net_dm_packet_report_size(payload_len,
+ NET_DM_SKB_CB(skb)->reason),
+ GFP_KERNEL);
if (!msg)
goto out;
diff --git a/net/core/filter.c b/net/core/filter.c
index 9615ae1ab530..818244068c2d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2603,7 +2603,7 @@ BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start,
* account for the headroom.
*/
bytes_sg_total = start - offset + bytes;
- if (!test_bit(i, &msg->sg.copy) && bytes_sg_total <= len)
+ if (!test_bit(i, msg->sg.copy) && bytes_sg_total <= len)
goto out;
/* At this point we need to linearize multiple scatterlist
@@ -2809,7 +2809,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
/* Place newly allocated data buffer */
sk_mem_charge(msg->sk, len);
msg->sg.size += len;
- __clear_bit(new, &msg->sg.copy);
+ __clear_bit(new, msg->sg.copy);
sg_set_page(&msg->sg.data[new], page, len + copy, 0);
if (rsge.length) {
get_page(sg_page(&rsge));
@@ -8275,6 +8275,7 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
struct bpf_insn_access_aux *info)
{
const int size_default = sizeof(__u32);
+ int field_size;
if (off < 0 || off >= sizeof(struct bpf_sock))
return false;
@@ -8286,7 +8287,6 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
case offsetof(struct bpf_sock, family):
case offsetof(struct bpf_sock, type):
case offsetof(struct bpf_sock, protocol):
- case offsetof(struct bpf_sock, dst_port):
case offsetof(struct bpf_sock, src_port):
case offsetof(struct bpf_sock, rx_queue_mapping):
case bpf_ctx_range(struct bpf_sock, src_ip4):
@@ -8295,6 +8295,14 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]):
bpf_ctx_record_field_size(info, size_default);
return bpf_ctx_narrow_access_ok(off, size, size_default);
+ case bpf_ctx_range(struct bpf_sock, dst_port):
+ field_size = size == size_default ?
+ size_default : sizeof_field(struct bpf_sock, dst_port);
+ bpf_ctx_record_field_size(info, field_size);
+ return bpf_ctx_narrow_access_ok(off, size, field_size);
+ case offsetofend(struct bpf_sock, dst_port) ...
+ offsetof(struct bpf_sock, dst_ip4) - 1:
+ return false;
}
return size == size_default;
@@ -10845,7 +10853,8 @@ static bool sk_lookup_is_valid_access(int off, int size,
case bpf_ctx_range(struct bpf_sk_lookup, local_ip4):
case bpf_ctx_range_till(struct bpf_sk_lookup, remote_ip6[0], remote_ip6[3]):
case bpf_ctx_range_till(struct bpf_sk_lookup, local_ip6[0], local_ip6[3]):
- case bpf_ctx_range(struct bpf_sk_lookup, remote_port):
+ case offsetof(struct bpf_sk_lookup, remote_port) ...
+ offsetof(struct bpf_sk_lookup, local_ip4) - 1:
case bpf_ctx_range(struct bpf_sk_lookup, local_port):
case bpf_ctx_range(struct bpf_sk_lookup, ingress_ifindex):
bpf_ctx_record_field_size(info, sizeof(__u32));
diff --git a/net/core/gro.c b/net/core/gro.c
index a11b286d1495..ee5e7e889d8b 100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -459,29 +459,22 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
skb_set_network_header(skb, skb_gro_offset(skb));
skb_reset_mac_len(skb);
- NAPI_GRO_CB(skb)->same_flow = 0;
+ BUILD_BUG_ON(sizeof_field(struct napi_gro_cb, zeroed) != sizeof(u32));
+ BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct napi_gro_cb, zeroed),
+ sizeof(u32))); /* Avoid slow unaligned acc */
+ *(u32 *)&NAPI_GRO_CB(skb)->zeroed = 0;
NAPI_GRO_CB(skb)->flush = skb_is_gso(skb) || skb_has_frag_list(skb);
- NAPI_GRO_CB(skb)->free = 0;
- NAPI_GRO_CB(skb)->encap_mark = 0;
- NAPI_GRO_CB(skb)->recursion_counter = 0;
- NAPI_GRO_CB(skb)->is_fou = 0;
NAPI_GRO_CB(skb)->is_atomic = 1;
- NAPI_GRO_CB(skb)->gro_remcsum_start = 0;
/* Setup for GRO checksum validation */
switch (skb->ip_summed) {
case CHECKSUM_COMPLETE:
NAPI_GRO_CB(skb)->csum = skb->csum;
NAPI_GRO_CB(skb)->csum_valid = 1;
- NAPI_GRO_CB(skb)->csum_cnt = 0;
break;
case CHECKSUM_UNNECESSARY:
NAPI_GRO_CB(skb)->csum_cnt = skb->csum_level + 1;
- NAPI_GRO_CB(skb)->csum_valid = 0;
break;
- default:
- NAPI_GRO_CB(skb)->csum_cnt = 0;
- NAPI_GRO_CB(skb)->csum_valid = 0;
}
pp = INDIRECT_CALL_INET(ptype->callbacks.gro_receive,
@@ -634,7 +627,6 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
skb->encapsulation = 0;
skb_shinfo(skb)->gso_type = 0;
- skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
if (unlikely(skb->slow_gro)) {
skb_orphan(skb);
skb_ext_reset(skb);
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index b0f5344d1185..95098d1a49bd 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -166,10 +166,10 @@ static void linkwatch_do_dev(struct net_device *dev)
netdev_state_change(dev);
}
- /* Note: our callers are responsible for
- * calling netdev_tracker_free().
+ /* Note: our callers are responsible for calling netdev_tracker_free().
+ * This is the reason we use __dev_put() instead of dev_put().
*/
- dev_put(dev);
+ __dev_put(dev);
}
static void __linkwatch_run_queue(int urgent_only)
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 8711350085d6..0ec2f5906a27 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -44,13 +44,7 @@ EXPORT_SYMBOL_GPL(net_rwsem);
static struct key_tag init_net_key_domain = { .usage = REFCOUNT_INIT(1) };
#endif
-struct net init_net = {
- .ns.count = REFCOUNT_INIT(1),
- .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),
-#ifdef CONFIG_KEYS
- .key_domain = &init_net_key_domain,
-#endif
-};
+struct net init_net;
EXPORT_SYMBOL(init_net);
static bool init_net_initialized;
@@ -1087,7 +1081,7 @@ out:
rtnl_set_sk_err(net, RTNLGRP_NSID, err);
}
-static int __init net_ns_init(void)
+void __init net_ns_init(void)
{
struct net_generic *ng;
@@ -1108,6 +1102,9 @@ static int __init net_ns_init(void)
rcu_assign_pointer(init_net.gen, ng);
+#ifdef CONFIG_KEYS
+ init_net.key_domain = &init_net_key_domain;
+#endif
down_write(&pernet_ops_rwsem);
if (setup_net(&init_net, &init_user_ns))
panic("Could not setup the initial network namespace");
@@ -1122,12 +1119,8 @@ static int __init net_ns_init(void)
RTNL_FLAG_DOIT_UNLOCKED);
rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid,
RTNL_FLAG_DOIT_UNLOCKED);
-
- return 0;
}
-pure_initcall(net_ns_init);
-
static void free_exit_list(struct pernet_operations *ops, struct list_head *net_exit_list)
{
ops_pre_exit_list(ops, net_exit_list);
diff --git a/net/hsr/hsr_debugfs.c b/net/hsr/hsr_debugfs.c
index 99f3af1a9d4d..fe6094e9a2db 100644
--- a/net/hsr/hsr_debugfs.c
+++ b/net/hsr/hsr_debugfs.c
@@ -17,6 +17,7 @@
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/debugfs.h>
+#include <linux/jhash.h>
#include "hsr_main.h"
#include "hsr_framereg.h"
@@ -28,6 +29,7 @@ hsr_node_table_show(struct seq_file *sfp, void *data)
{
struct hsr_priv *priv = (struct hsr_priv *)sfp->private;
struct hsr_node *node;
+ int i;
seq_printf(sfp, "Node Table entries for (%s) device\n",
(priv->prot_version == PRP_V1 ? "PRP" : "HSR"));
@@ -39,22 +41,28 @@ hsr_node_table_show(struct seq_file *sfp, void *data)
seq_puts(sfp, "DAN-H\n");
rcu_read_lock();
- list_for_each_entry_rcu(node, &priv->node_db, mac_list) {
- /* skip self node */
- if (hsr_addr_is_self(priv, node->macaddress_A))
- continue;
- seq_printf(sfp, "%pM ", &node->macaddress_A[0]);
- seq_printf(sfp, "%pM ", &node->macaddress_B[0]);
- seq_printf(sfp, "%10lx, ", node->time_in[HSR_PT_SLAVE_A]);
- seq_printf(sfp, "%10lx, ", node->time_in[HSR_PT_SLAVE_B]);
- seq_printf(sfp, "%14x, ", node->addr_B_port);
-
- if (priv->prot_version == PRP_V1)
- seq_printf(sfp, "%5x, %5x, %5x\n",
- node->san_a, node->san_b,
- (node->san_a == 0 && node->san_b == 0));
- else
- seq_printf(sfp, "%5x\n", 1);
+
+ for (i = 0 ; i < priv->hash_buckets; i++) {
+ hlist_for_each_entry_rcu(node, &priv->node_db[i], mac_list) {
+ /* skip self node */
+ if (hsr_addr_is_self(priv, node->macaddress_A))
+ continue;
+ seq_printf(sfp, "%pM ", &node->macaddress_A[0]);
+ seq_printf(sfp, "%pM ", &node->macaddress_B[0]);
+ seq_printf(sfp, "%10lx, ",
+ node->time_in[HSR_PT_SLAVE_A]);
+ seq_printf(sfp, "%10lx, ",
+ node->time_in[HSR_PT_SLAVE_B]);
+ seq_printf(sfp, "%14x, ", node->addr_B_port);
+
+ if (priv->prot_version == PRP_V1)
+ seq_printf(sfp, "%5x, %5x, %5x\n",
+ node->san_a, node->san_b,
+ (node->san_a == 0 &&
+ node->san_b == 0));
+ else
+ seq_printf(sfp, "%5x\n", 1);
+ }
}
rcu_read_unlock();
return 0;
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index e57fdad9ef94..7f250216433d 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -485,12 +485,16 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
{
bool unregister = false;
struct hsr_priv *hsr;
- int res;
+ int res, i;
hsr = netdev_priv(hsr_dev);
INIT_LIST_HEAD(&hsr->ports);
- INIT_LIST_HEAD(&hsr->node_db);
- INIT_LIST_HEAD(&hsr->self_node_db);
+ INIT_HLIST_HEAD(&hsr->self_node_db);
+ hsr->hash_buckets = HSR_HSIZE;
+ get_random_bytes(&hsr->hash_seed, sizeof(hsr->hash_seed));
+ for (i = 0; i < hsr->hash_buckets; i++)
+ INIT_HLIST_HEAD(&hsr->node_db[i]);
+
spin_lock_init(&hsr->list_lock);
eth_hw_addr_set(hsr_dev, slave[0]->dev_addr);
diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index e59cbb4f0cd1..5bf357734b11 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -570,20 +570,23 @@ static int fill_frame_info(struct hsr_frame_info *frame,
struct ethhdr *ethhdr;
__be16 proto;
int ret;
+ u32 hash;
/* Check if skb contains ethhdr */
if (skb->mac_len < sizeof(struct ethhdr))
return -EINVAL;
memset(frame, 0, sizeof(*frame));
+
+ ethhdr = (struct ethhdr *)skb_mac_header(skb);
+ hash = hsr_mac_hash(port->hsr, ethhdr->h_source);
frame->is_supervision = is_supervision_frame(port->hsr, skb);
- frame->node_src = hsr_get_node(port, &hsr->node_db, skb,
+ frame->node_src = hsr_get_node(port, &hsr->node_db[hash], skb,
frame->is_supervision,
port->type);
if (!frame->node_src)
return -1; /* Unknown node and !is_supervision, or no mem */
- ethhdr = (struct ethhdr *)skb_mac_header(skb);
frame->is_vlan = false;
proto = ethhdr->h_proto;
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 0775f0f95dbf..b3c6ffa1894d 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -15,11 +15,28 @@
#include <linux/etherdevice.h>
#include <linux/slab.h>
#include <linux/rculist.h>
+#include <linux/jhash.h>
#include "hsr_main.h"
#include "hsr_framereg.h"
#include "hsr_netlink.h"
-/* TODO: use hash lists for mac addresses (linux/jhash.h)? */
+u32 hsr_mac_hash(struct hsr_priv *hsr, const unsigned char *addr)
+{
+ u32 hash = jhash(addr, ETH_ALEN, hsr->hash_seed);
+
+ return reciprocal_scale(hash, hsr->hash_buckets);
+}
+
+struct hsr_node *hsr_node_get_first(struct hlist_head *head)
+{
+ struct hlist_node *first;
+
+ first = rcu_dereference(hlist_first_rcu(head));
+ if (first)
+ return hlist_entry(first, struct hsr_node, mac_list);
+
+ return NULL;
+}
/* seq_nr_after(a, b) - return true if a is after (higher in sequence than) b,
* false otherwise.
@@ -42,8 +59,7 @@ bool hsr_addr_is_self(struct hsr_priv *hsr, unsigned char *addr)
{
struct hsr_node *node;
- node = list_first_or_null_rcu(&hsr->self_node_db, struct hsr_node,
- mac_list);
+ node = hsr_node_get_first(&hsr->self_node_db);
if (!node) {
WARN_ONCE(1, "HSR: No self node\n");
return false;
@@ -59,12 +75,12 @@ bool hsr_addr_is_self(struct hsr_priv *hsr, unsigned char *addr)
/* Search for mac entry. Caller must hold rcu read lock.
*/
-static struct hsr_node *find_node_by_addr_A(struct list_head *node_db,
+static struct hsr_node *find_node_by_addr_A(struct hlist_head *node_db,
const unsigned char addr[ETH_ALEN])
{
struct hsr_node *node;
- list_for_each_entry_rcu(node, node_db, mac_list) {
+ hlist_for_each_entry_rcu(node, node_db, mac_list) {
if (ether_addr_equal(node->macaddress_A, addr))
return node;
}
@@ -79,7 +95,7 @@ int hsr_create_self_node(struct hsr_priv *hsr,
const unsigned char addr_a[ETH_ALEN],
const unsigned char addr_b[ETH_ALEN])
{
- struct list_head *self_node_db = &hsr->self_node_db;
+ struct hlist_head *self_node_db = &hsr->self_node_db;
struct hsr_node *node, *oldnode;
node = kmalloc(sizeof(*node), GFP_KERNEL);
@@ -90,14 +106,13 @@ int hsr_create_self_node(struct hsr_priv *hsr,
ether_addr_copy(node->macaddress_B, addr_b);
spin_lock_bh(&hsr->list_lock);
- oldnode = list_first_or_null_rcu(self_node_db,
- struct hsr_node, mac_list);
+ oldnode = hsr_node_get_first(self_node_db);
if (oldnode) {
- list_replace_rcu(&oldnode->mac_list, &node->mac_list);
+ hlist_replace_rcu(&oldnode->mac_list, &node->mac_list);
spin_unlock_bh(&hsr->list_lock);
kfree_rcu(oldnode, rcu_head);
} else {
- list_add_tail_rcu(&node->mac_list, self_node_db);
+ hlist_add_tail_rcu(&node->mac_list, self_node_db);
spin_unlock_bh(&hsr->list_lock);
}
@@ -106,25 +121,25 @@ int hsr_create_self_node(struct hsr_priv *hsr,
void hsr_del_self_node(struct hsr_priv *hsr)
{
- struct list_head *self_node_db = &hsr->self_node_db;
+ struct hlist_head *self_node_db = &hsr->self_node_db;
struct hsr_node *node;
spin_lock_bh(&hsr->list_lock);
- node = list_first_or_null_rcu(self_node_db, struct hsr_node, mac_list);
+ node = hsr_node_get_first(self_node_db);
if (node) {
- list_del_rcu(&node->mac_list);
+ hlist_del_rcu(&node->mac_list);
kfree_rcu(node, rcu_head);
}
spin_unlock_bh(&hsr->list_lock);
}
-void hsr_del_nodes(struct list_head *node_db)
+void hsr_del_nodes(struct hlist_head *node_db)
{
struct hsr_node *node;
- struct hsr_node *tmp;
+ struct hlist_node *tmp;
- list_for_each_entry_safe(node, tmp, node_db, mac_list)
- kfree(node);
+ hlist_for_each_entry_safe(node, tmp, node_db, mac_list)
+ kfree_rcu(node, rcu_head);
}
void prp_handle_san_frame(bool san, enum hsr_port_type port,
@@ -145,7 +160,7 @@ void prp_handle_san_frame(bool san, enum hsr_port_type port,
* originating from the newly added node.
*/
static struct hsr_node *hsr_add_node(struct hsr_priv *hsr,
- struct list_head *node_db,
+ struct hlist_head *node_db,
unsigned char addr[],
u16 seq_out, bool san,
enum hsr_port_type rx_port)
@@ -175,14 +190,14 @@ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr,
hsr->proto_ops->handle_san_frame(san, rx_port, new_node);
spin_lock_bh(&hsr->list_lock);
- list_for_each_entry_rcu(node, node_db, mac_list,
- lockdep_is_held(&hsr->list_lock)) {
+ hlist_for_each_entry_rcu(node, node_db, mac_list,
+ lockdep_is_held(&hsr->list_lock)) {
if (ether_addr_equal(node->macaddress_A, addr))
goto out;
if (ether_addr_equal(node->macaddress_B, addr))
goto out;
}
- list_add_tail_rcu(&new_node->mac_list, node_db);
+ hlist_add_tail_rcu(&new_node->mac_list, node_db);
spin_unlock_bh(&hsr->list_lock);
return new_node;
out:
@@ -202,7 +217,7 @@ void prp_update_san_info(struct hsr_node *node, bool is_sup)
/* Get the hsr_node from which 'skb' was sent.
*/
-struct hsr_node *hsr_get_node(struct hsr_port *port, struct list_head *node_db,
+struct hsr_node *hsr_get_node(struct hsr_port *port, struct hlist_head *node_db,
struct sk_buff *skb, bool is_sup,
enum hsr_port_type rx_port)
{
@@ -218,7 +233,7 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct list_head *node_db,
ethhdr = (struct ethhdr *)skb_mac_header(skb);
- list_for_each_entry_rcu(node, node_db, mac_list) {
+ hlist_for_each_entry_rcu(node, node_db, mac_list) {
if (ether_addr_equal(node->macaddress_A, ethhdr->h_source)) {
if (hsr->proto_ops->update_san_info)
hsr->proto_ops->update_san_info(node, is_sup);
@@ -268,11 +283,12 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame)
struct hsr_sup_tlv *hsr_sup_tlv;
struct hsr_node *node_real;
struct sk_buff *skb = NULL;
- struct list_head *node_db;
+ struct hlist_head *node_db;
struct ethhdr *ethhdr;
int i;
unsigned int pull_size = 0;
unsigned int total_pull_size = 0;
+ u32 hash;
/* Here either frame->skb_hsr or frame->skb_prp should be
* valid as supervision frame always will have protocol
@@ -310,11 +326,13 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame)
hsr_sp = (struct hsr_sup_payload *)skb->data;
/* Merge node_curr (registered on macaddress_B) into node_real */
- node_db = &port_rcv->hsr->node_db;
- node_real = find_node_by_addr_A(node_db, hsr_sp->macaddress_A);
+ node_db = port_rcv->hsr->node_db;
+ hash = hsr_mac_hash(hsr, hsr_sp->macaddress_A);
+ node_real = find_node_by_addr_A(&node_db[hash], hsr_sp->macaddress_A);
if (!node_real)
/* No frame received from AddrA of this node yet */
- node_real = hsr_add_node(hsr, node_db, hsr_sp->macaddress_A,
+ node_real = hsr_add_node(hsr, &node_db[hash],
+ hsr_sp->macaddress_A,
HSR_SEQNR_START - 1, true,
port_rcv->type);
if (!node_real)
@@ -348,7 +366,8 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame)
hsr_sp = (struct hsr_sup_payload *)skb->data;
/* Check if redbox mac and node mac are equal. */
- if (!ether_addr_equal(node_real->macaddress_A, hsr_sp->macaddress_A)) {
+ if (!ether_addr_equal(node_real->macaddress_A,
+ hsr_sp->macaddress_A)) {
/* This is a redbox supervision frame for a VDAN! */
goto done;
}
@@ -368,7 +387,7 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame)
node_real->addr_B_port = port_rcv->type;
spin_lock_bh(&hsr->list_lock);
- list_del_rcu(&node_curr->mac_list);
+ hlist_del_rcu(&node_curr->mac_list);
spin_unlock_bh(&hsr->list_lock);
kfree_rcu(node_curr, rcu_head);
@@ -406,6 +425,7 @@ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb,
struct hsr_port *port)
{
struct hsr_node *node_dst;
+ u32 hash;
if (!skb_mac_header_was_set(skb)) {
WARN_ONCE(1, "%s: Mac header not set\n", __func__);
@@ -415,7 +435,8 @@ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb,
if (!is_unicast_ether_addr(eth_hdr(skb)->h_dest))
return;
- node_dst = find_node_by_addr_A(&port->hsr->node_db,
+ hash = hsr_mac_hash(port->hsr, eth_hdr(skb)->h_dest);
+ node_dst = find_node_by_addr_A(&port->hsr->node_db[hash],
eth_hdr(skb)->h_dest);
if (!node_dst) {
if (net_ratelimit())
@@ -491,59 +512,73 @@ static struct hsr_port *get_late_port(struct hsr_priv *hsr,
void hsr_prune_nodes(struct timer_list *t)
{
struct hsr_priv *hsr = from_timer(hsr, t, prune_timer);
+ struct hlist_node *tmp;
struct hsr_node *node;
- struct hsr_node *tmp;
struct hsr_port *port;
unsigned long timestamp;
unsigned long time_a, time_b;
+ int i;
spin_lock_bh(&hsr->list_lock);
- list_for_each_entry_safe(node, tmp, &hsr->node_db, mac_list) {
- /* Don't prune own node. Neither time_in[HSR_PT_SLAVE_A]
- * nor time_in[HSR_PT_SLAVE_B], will ever be updated for
- * the master port. Thus the master node will be repeatedly
- * pruned leading to packet loss.
- */
- if (hsr_addr_is_self(hsr, node->macaddress_A))
- continue;
-
- /* Shorthand */
- time_a = node->time_in[HSR_PT_SLAVE_A];
- time_b = node->time_in[HSR_PT_SLAVE_B];
-
- /* Check for timestamps old enough to risk wrap-around */
- if (time_after(jiffies, time_a + MAX_JIFFY_OFFSET / 2))
- node->time_in_stale[HSR_PT_SLAVE_A] = true;
- if (time_after(jiffies, time_b + MAX_JIFFY_OFFSET / 2))
- node->time_in_stale[HSR_PT_SLAVE_B] = true;
-
- /* Get age of newest frame from node.
- * At least one time_in is OK here; nodes get pruned long
- * before both time_ins can get stale
- */
- timestamp = time_a;
- if (node->time_in_stale[HSR_PT_SLAVE_A] ||
- (!node->time_in_stale[HSR_PT_SLAVE_B] &&
- time_after(time_b, time_a)))
- timestamp = time_b;
-
- /* Warn of ring error only as long as we get frames at all */
- if (time_is_after_jiffies(timestamp +
- msecs_to_jiffies(1.5 * MAX_SLAVE_DIFF))) {
- rcu_read_lock();
- port = get_late_port(hsr, node);
- if (port)
- hsr_nl_ringerror(hsr, node->macaddress_A, port);
- rcu_read_unlock();
- }
- /* Prune old entries */
- if (time_is_before_jiffies(timestamp +
- msecs_to_jiffies(HSR_NODE_FORGET_TIME))) {
- hsr_nl_nodedown(hsr, node->macaddress_A);
- list_del_rcu(&node->mac_list);
- /* Note that we need to free this entry later: */
- kfree_rcu(node, rcu_head);
+ for (i = 0; i < hsr->hash_buckets; i++) {
+ hlist_for_each_entry_safe(node, tmp, &hsr->node_db[i],
+ mac_list) {
+ /* Don't prune own node.
+ * Neither time_in[HSR_PT_SLAVE_A]
+ * nor time_in[HSR_PT_SLAVE_B], will ever be updated
+ * for the master port. Thus the master node will be
+ * repeatedly pruned leading to packet loss.
+ */
+ if (hsr_addr_is_self(hsr, node->macaddress_A))
+ continue;
+
+ /* Shorthand */
+ time_a = node->time_in[HSR_PT_SLAVE_A];
+ time_b = node->time_in[HSR_PT_SLAVE_B];
+
+ /* Check for timestamps old enough to
+ * risk wrap-around
+ */
+ if (time_after(jiffies, time_a + MAX_JIFFY_OFFSET / 2))
+ node->time_in_stale[HSR_PT_SLAVE_A] = true;
+ if (time_after(jiffies, time_b + MAX_JIFFY_OFFSET / 2))
+ node->time_in_stale[HSR_PT_SLAVE_B] = true;
+
+ /* Get age of newest frame from node.
+ * At least one time_in is OK here; nodes get pruned
+ * long before both time_ins can get stale
+ */
+ timestamp = time_a;
+ if (node->time_in_stale[HSR_PT_SLAVE_A] ||
+ (!node->time_in_stale[HSR_PT_SLAVE_B] &&
+ time_after(time_b, time_a)))
+ timestamp = time_b;
+
+ /* Warn of ring error only as long as we get
+ * frames at all
+ */
+ if (time_is_after_jiffies(timestamp +
+ msecs_to_jiffies(1.5 * MAX_SLAVE_DIFF))) {
+ rcu_read_lock();
+ port = get_late_port(hsr, node);
+ if (port)
+ hsr_nl_ringerror(hsr,
+ node->macaddress_A,
+ port);
+ rcu_read_unlock();
+ }
+
+ /* Prune old entries */
+ if (time_is_before_jiffies(timestamp +
+ msecs_to_jiffies(HSR_NODE_FORGET_TIME))) {
+ hsr_nl_nodedown(hsr, node->macaddress_A);
+ hlist_del_rcu(&node->mac_list);
+ /* Note that we need to free this
+ * entry later:
+ */
+ kfree_rcu(node, rcu_head);
+ }
}
}
spin_unlock_bh(&hsr->list_lock);
@@ -557,17 +592,19 @@ void *hsr_get_next_node(struct hsr_priv *hsr, void *_pos,
unsigned char addr[ETH_ALEN])
{
struct hsr_node *node;
+ u32 hash;
+
+ hash = hsr_mac_hash(hsr, addr);
if (!_pos) {
- node = list_first_or_null_rcu(&hsr->node_db,
- struct hsr_node, mac_list);
+ node = hsr_node_get_first(&hsr->node_db[hash]);
if (node)
ether_addr_copy(addr, node->macaddress_A);
return node;
}
node = _pos;
- list_for_each_entry_continue_rcu(node, &hsr->node_db, mac_list) {
+ hlist_for_each_entry_continue_rcu(node, mac_list) {
ether_addr_copy(addr, node->macaddress_A);
return node;
}
@@ -587,8 +624,11 @@ int hsr_get_node_data(struct hsr_priv *hsr,
struct hsr_node *node;
struct hsr_port *port;
unsigned long tdiff;
+ u32 hash;
+
+ hash = hsr_mac_hash(hsr, addr);
- node = find_node_by_addr_A(&hsr->node_db, addr);
+ node = find_node_by_addr_A(&hsr->node_db[hash], addr);
if (!node)
return -ENOENT;
diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h
index bdbb8c822ba1..d7cce6b161e3 100644
--- a/net/hsr/hsr_framereg.h
+++ b/net/hsr/hsr_framereg.h
@@ -28,9 +28,11 @@ struct hsr_frame_info {
bool is_from_san;
};
+u32 hsr_mac_hash(struct hsr_priv *hsr, const unsigned char *addr);
+struct hsr_node *hsr_node_get_first(struct hlist_head *head);
void hsr_del_self_node(struct hsr_priv *hsr);
-void hsr_del_nodes(struct list_head *node_db);
-struct hsr_node *hsr_get_node(struct hsr_port *port, struct list_head *node_db,
+void hsr_del_nodes(struct hlist_head *node_db);
+struct hsr_node *hsr_get_node(struct hsr_port *port, struct hlist_head *node_db,
struct sk_buff *skb, bool is_sup,
enum hsr_port_type rx_port);
void hsr_handle_sup_frame(struct hsr_frame_info *frame);
@@ -68,7 +70,7 @@ void prp_handle_san_frame(bool san, enum hsr_port_type port,
void prp_update_san_info(struct hsr_node *node, bool is_sup);
struct hsr_node {
- struct list_head mac_list;
+ struct hlist_node mac_list;
unsigned char macaddress_A[ETH_ALEN];
unsigned char macaddress_B[ETH_ALEN];
/* Local slave through which AddrB frames are received from this node */
diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
index ff9ec7634218..ca556bda3467 100644
--- a/net/hsr/hsr_main.h
+++ b/net/hsr/hsr_main.h
@@ -63,6 +63,9 @@ struct hsr_tag {
#define HSR_V1_SUP_LSDUSIZE 52
+#define HSR_HSIZE_SHIFT 8
+#define HSR_HSIZE BIT(HSR_HSIZE_SHIFT)
+
/* The helper functions below assumes that 'path' occupies the 4 most
* significant bits of the 16-bit field shared by 'path' and 'LSDU_size' (or
* equivalently, the 4 most significant bits of HSR tag byte 14).
@@ -201,8 +204,8 @@ struct hsr_proto_ops {
struct hsr_priv {
struct rcu_head rcu_head;
struct list_head ports;
- struct list_head node_db; /* Known HSR nodes */
- struct list_head self_node_db; /* MACs of slaves */
+ struct hlist_head node_db[HSR_HSIZE]; /* Known HSR nodes */
+ struct hlist_head self_node_db; /* MACs of slaves */
struct timer_list announce_timer; /* Supervision frame dispatch */
struct timer_list prune_timer;
int announce_count;
@@ -212,6 +215,8 @@ struct hsr_priv {
spinlock_t seqnr_lock; /* locking for sequence_nr */
spinlock_t list_lock; /* locking for node list */
struct hsr_proto_ops *proto_ops;
+ u32 hash_buckets;
+ u32 hash_seed;
#define PRP_LAN_ID 0x5 /* 0x1010 for A and 0x1011 for B. Bit 0 is set
* based on SLAVE_A or SLAVE_B
*/
diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
index f3c8f91dbe2c..1405c037cf7a 100644
--- a/net/hsr/hsr_netlink.c
+++ b/net/hsr/hsr_netlink.c
@@ -105,6 +105,7 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev,
static void hsr_dellink(struct net_device *dev, struct list_head *head)
{
struct hsr_priv *hsr = netdev_priv(dev);
+ int i;
del_timer_sync(&hsr->prune_timer);
del_timer_sync(&hsr->announce_timer);
@@ -113,7 +114,8 @@ static void hsr_dellink(struct net_device *dev, struct list_head *head)
hsr_del_ports(hsr);
hsr_del_self_node(hsr);
- hsr_del_nodes(&hsr->node_db);
+ for (i = 0; i < hsr->hash_buckets; i++)
+ hsr_del_nodes(&hsr->node_db[i]);
unregister_netdevice_queue(dev, head);
}
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index 2cf62718a282..2c087b7f17c5 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -47,6 +47,7 @@
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/ieee802154.h>
+#include <linux/if_arp.h>
#include <net/ipv6.h>
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index b60c9fd7147e..f79ab942f03b 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -96,12 +96,14 @@ static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log,
const struct btf *btf,
const struct btf_type *t, int off,
int size, enum bpf_access_type atype,
- u32 *next_btf_id)
+ u32 *next_btf_id,
+ enum bpf_type_flag *flag)
{
size_t end;
if (atype == BPF_READ)
- return btf_struct_access(log, btf, t, off, size, atype, next_btf_id);
+ return btf_struct_access(log, btf, t, off, size, atype, next_btf_id,
+ flag);
if (t != tcp_sock_type) {
bpf_log(log, "only read is supported\n");
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 4d61ddd8a0ec..54811728d906 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -32,6 +32,7 @@
#include <linux/list.h>
#include <linux/slab.h>
+#include <net/inet_dscp.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/route.h>
@@ -735,8 +736,16 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
memset(cfg, 0, sizeof(*cfg));
rtm = nlmsg_data(nlh);
+
+ if (!inet_validate_dscp(rtm->rtm_tos)) {
+ NL_SET_ERR_MSG(extack,
+ "Invalid dsfield (tos): ECN bits must be 0");
+ err = -EINVAL;
+ goto errout;
+ }
+ cfg->fc_dscp = inet_dsfield_to_dscp(rtm->rtm_tos);
+
cfg->fc_dst_len = rtm->rtm_dst_len;
- cfg->fc_tos = rtm->rtm_tos;
cfg->fc_table = rtm->rtm_table;
cfg->fc_protocol = rtm->rtm_protocol;
cfg->fc_scope = rtm->rtm_scope;
@@ -1547,7 +1556,7 @@ static void ip_fib_net_exit(struct net *net)
{
int i;
- rtnl_lock();
+ ASSERT_RTNL();
#ifdef CONFIG_IP_MULTIPLE_TABLES
RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
@@ -1572,7 +1581,7 @@ static void ip_fib_net_exit(struct net *net)
#ifdef CONFIG_IP_MULTIPLE_TABLES
fib4_rules_exit(net);
#endif
- rtnl_unlock();
+
kfree(net->ipv4.fib_table_hash);
fib4_notifier_exit(net);
}
@@ -1599,7 +1608,9 @@ out:
out_proc:
nl_fib_lookup_exit(net);
out_nlfl:
+ rtnl_lock();
ip_fib_net_exit(net);
+ rtnl_unlock();
goto out;
}
@@ -1607,12 +1618,23 @@ static void __net_exit fib_net_exit(struct net *net)
{
fib_proc_exit(net);
nl_fib_lookup_exit(net);
- ip_fib_net_exit(net);
+}
+
+static void __net_exit fib_net_exit_batch(struct list_head *net_list)
+{
+ struct net *net;
+
+ rtnl_lock();
+ list_for_each_entry(net, net_list, exit_list)
+ ip_fib_net_exit(net);
+
+ rtnl_unlock();
}
static struct pernet_operations fib_net_ops = {
.init = fib_net_init,
.exit = fib_net_exit,
+ .exit_batch = fib_net_exit_batch,
};
void __init ip_fib_init(void)
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index e184bcb19943..a63014b54809 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -4,13 +4,14 @@
#include <linux/types.h>
#include <linux/list.h>
+#include <net/inet_dscp.h>
#include <net/ip_fib.h>
#include <net/nexthop.h>
struct fib_alias {
struct hlist_node fa_list;
struct fib_info *fa_info;
- u8 fa_tos;
+ dscp_t fa_dscp;
u8 fa_type;
u8 fa_state;
u8 fa_slen;
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index e0b6c8b6de57..117c48571cf0 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -23,6 +23,7 @@
#include <linux/list.h>
#include <linux/rcupdate.h>
#include <linux/export.h>
+#include <net/inet_dscp.h>
#include <net/ip.h>
#include <net/route.h>
#include <net/tcp.h>
@@ -35,7 +36,7 @@ struct fib4_rule {
struct fib_rule common;
u8 dst_len;
u8 src_len;
- u8 tos;
+ dscp_t dscp;
__be32 src;
__be32 srcmask;
__be32 dst;
@@ -49,7 +50,7 @@ static bool fib4_rule_matchall(const struct fib_rule *rule)
{
struct fib4_rule *r = container_of(rule, struct fib4_rule, common);
- if (r->dst_len || r->src_len || r->tos)
+ if (r->dst_len || r->src_len || r->dscp)
return false;
return fib_rule_matchall(rule);
}
@@ -185,7 +186,7 @@ INDIRECT_CALLABLE_SCOPE int fib4_rule_match(struct fib_rule *rule,
((daddr ^ r->dst) & r->dstmask))
return 0;
- if (r->tos && (r->tos != fl4->flowi4_tos))
+ if (r->dscp && r->dscp != inet_dsfield_to_dscp(fl4->flowi4_tos))
return 0;
if (rule->ip_proto && (rule->ip_proto != fl4->flowi4_proto))
@@ -225,10 +226,12 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
int err = -EINVAL;
struct fib4_rule *rule4 = (struct fib4_rule *) rule;
- if (frh->tos & ~IPTOS_TOS_MASK) {
- NL_SET_ERR_MSG(extack, "Invalid tos");
+ if (!inet_validate_dscp(frh->tos)) {
+ NL_SET_ERR_MSG(extack,
+ "Invalid dsfield (tos): ECN bits must be 0");
goto errout;
}
+ rule4->dscp = inet_dsfield_to_dscp(frh->tos);
/* split local/main if they are not already split */
err = fib_unmerge(net);
@@ -270,7 +273,6 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
rule4->srcmask = inet_make_mask(rule4->src_len);
rule4->dst_len = frh->dst_len;
rule4->dstmask = inet_make_mask(rule4->dst_len);
- rule4->tos = frh->tos;
net->ipv4.fib_has_custom_rules = true;
@@ -313,7 +315,7 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
if (frh->dst_len && (rule4->dst_len != frh->dst_len))
return 0;
- if (frh->tos && (rule4->tos != frh->tos))
+ if (frh->tos && inet_dscp_to_dsfield(rule4->dscp) != frh->tos)
return 0;
#ifdef CONFIG_IP_ROUTE_CLASSID
@@ -337,7 +339,7 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
frh->dst_len = rule4->dst_len;
frh->src_len = rule4->src_len;
- frh->tos = rule4->tos;
+ frh->tos = inet_dscp_to_dsfield(rule4->dscp);
if ((rule4->dst_len &&
nla_put_in_addr(skb, FRA_DST, rule4->dst)) ||
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 4c5399450682..c9c4f2f66b38 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -32,6 +32,7 @@
#include <linux/hash.h>
#include <net/arp.h>
+#include <net/inet_dscp.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/route.h>
@@ -523,7 +524,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
fri.tb_id = tb_id;
fri.dst = key;
fri.dst_len = dst_len;
- fri.tos = fa->fa_tos;
+ fri.tos = inet_dscp_to_dsfield(fa->fa_dscp);
fri.type = fa->fa_type;
fri.offload = fa->offload;
fri.trap = fa->trap;
@@ -2039,7 +2040,7 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
int order = -1, last_idx = -1;
struct fib_alias *fa, *fa1 = NULL;
u32 last_prio = res->fi->fib_priority;
- u8 last_tos = 0;
+ dscp_t last_dscp = 0;
hlist_for_each_entry_rcu(fa, fa_head, fa_list) {
struct fib_info *next_fi = fa->fa_info;
@@ -2047,19 +2048,20 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
if (fa->fa_slen != slen)
continue;
- if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
+ if (fa->fa_dscp &&
+ fa->fa_dscp != inet_dsfield_to_dscp(flp->flowi4_tos))
continue;
if (fa->tb_id != tb->tb_id)
continue;
if (next_fi->fib_priority > last_prio &&
- fa->fa_tos == last_tos) {
- if (last_tos)
+ fa->fa_dscp == last_dscp) {
+ if (last_dscp)
continue;
break;
}
if (next_fi->fib_flags & RTNH_F_DEAD)
continue;
- last_tos = fa->fa_tos;
+ last_dscp = fa->fa_dscp;
last_prio = next_fi->fib_priority;
if (next_fi->fib_scope != res->scope ||
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 8060524f4256..c05cd105e95e 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -61,6 +61,7 @@
#include <linux/vmalloc.h>
#include <linux/notifier.h>
#include <net/net_namespace.h>
+#include <net/inet_dscp.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/route.h>
@@ -81,7 +82,7 @@ static int call_fib_entry_notifier(struct notifier_block *nb,
.dst = dst,
.dst_len = dst_len,
.fi = fa->fa_info,
- .tos = fa->fa_tos,
+ .tos = inet_dscp_to_dsfield(fa->fa_dscp),
.type = fa->fa_type,
.tb_id = fa->tb_id,
};
@@ -98,7 +99,7 @@ static int call_fib_entry_notifiers(struct net *net,
.dst = dst,
.dst_len = dst_len,
.fi = fa->fa_info,
- .tos = fa->fa_tos,
+ .tos = inet_dscp_to_dsfield(fa->fa_dscp),
.type = fa->fa_type,
.tb_id = fa->tb_id,
};
@@ -973,13 +974,13 @@ static struct key_vector *fib_find_node(struct trie *t,
return n;
}
-/* Return the first fib alias matching TOS with
+/* Return the first fib alias matching DSCP with
* priority less than or equal to PRIO.
* If 'find_first' is set, return the first matching
- * fib alias, regardless of TOS and priority.
+ * fib alias, regardless of DSCP and priority.
*/
static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen,
- u8 tos, u32 prio, u32 tb_id,
+ dscp_t dscp, u32 prio, u32 tb_id,
bool find_first)
{
struct fib_alias *fa;
@@ -988,6 +989,10 @@ static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen,
return NULL;
hlist_for_each_entry(fa, fah, fa_list) {
+ /* Avoid Sparse warning when using dscp_t in inequalities */
+ u8 __fa_dscp = inet_dscp_to_dsfield(fa->fa_dscp);
+ u8 __dscp = inet_dscp_to_dsfield(dscp);
+
if (fa->fa_slen < slen)
continue;
if (fa->fa_slen != slen)
@@ -998,9 +1003,9 @@ static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen,
break;
if (find_first)
return fa;
- if (fa->fa_tos > tos)
+ if (__fa_dscp > __dscp)
continue;
- if (fa->fa_info->fib_priority >= prio || fa->fa_tos < tos)
+ if (fa->fa_info->fib_priority >= prio || __fa_dscp < __dscp)
return fa;
}
@@ -1027,8 +1032,8 @@ fib_find_matching_alias(struct net *net, const struct fib_rt_info *fri)
hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
if (fa->fa_slen == slen && fa->tb_id == fri->tb_id &&
- fa->fa_tos == fri->tos && fa->fa_info == fri->fi &&
- fa->fa_type == fri->type)
+ fa->fa_dscp == inet_dsfield_to_dscp(fri->tos) &&
+ fa->fa_info == fri->fi && fa->fa_type == fri->type)
return fa;
}
@@ -1210,7 +1215,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
struct fib_info *fi;
u8 plen = cfg->fc_dst_len;
u8 slen = KEYLENGTH - plen;
- u8 tos = cfg->fc_tos;
+ dscp_t dscp;
u32 key;
int err;
@@ -1227,12 +1232,13 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
goto err;
}
+ dscp = cfg->fc_dscp;
l = fib_find_node(t, &tp, key);
- fa = l ? fib_find_alias(&l->leaf, slen, tos, fi->fib_priority,
+ fa = l ? fib_find_alias(&l->leaf, slen, dscp, fi->fib_priority,
tb->tb_id, false) : NULL;
/* Now fa, if non-NULL, points to the first fib alias
- * with the same keys [prefix,tos,priority], if such key already
+ * with the same keys [prefix,dscp,priority], if such key already
* exists or to the node before which we will insert new one.
*
* If fa is NULL, we will need to allocate a new one and
@@ -1240,7 +1246,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
* of the new alias.
*/
- if (fa && fa->fa_tos == tos &&
+ if (fa && fa->fa_dscp == dscp &&
fa->fa_info->fib_priority == fi->fib_priority) {
struct fib_alias *fa_first, *fa_match;
@@ -1260,7 +1266,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
hlist_for_each_entry_from(fa, fa_list) {
if ((fa->fa_slen != slen) ||
(fa->tb_id != tb->tb_id) ||
- (fa->fa_tos != tos))
+ (fa->fa_dscp != dscp))
break;
if (fa->fa_info->fib_priority != fi->fib_priority)
break;
@@ -1288,7 +1294,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
goto out;
fi_drop = fa->fa_info;
- new_fa->fa_tos = fa->fa_tos;
+ new_fa->fa_dscp = fa->fa_dscp;
new_fa->fa_info = fi;
new_fa->fa_type = cfg->fc_type;
state = fa->fa_state;
@@ -1351,7 +1357,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
goto out;
new_fa->fa_info = fi;
- new_fa->fa_tos = tos;
+ new_fa->fa_dscp = dscp;
new_fa->fa_type = cfg->fc_type;
new_fa->fa_state = 0;
new_fa->fa_slen = slen;
@@ -1567,7 +1573,8 @@ found:
if (index >= (1ul << fa->fa_slen))
continue;
}
- if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
+ if (fa->fa_dscp &&
+ inet_dscp_to_dsfield(fa->fa_dscp) != flp->flowi4_tos)
continue;
if (fi->fib_dead)
continue;
@@ -1703,7 +1710,7 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
struct key_vector *l, *tp;
u8 plen = cfg->fc_dst_len;
u8 slen = KEYLENGTH - plen;
- u8 tos = cfg->fc_tos;
+ dscp_t dscp;
u32 key;
key = ntohl(cfg->fc_dst);
@@ -1715,11 +1722,13 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
if (!l)
return -ESRCH;
- fa = fib_find_alias(&l->leaf, slen, tos, 0, tb->tb_id, false);
+ dscp = cfg->fc_dscp;
+ fa = fib_find_alias(&l->leaf, slen, dscp, 0, tb->tb_id, false);
if (!fa)
return -ESRCH;
- pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
+ pr_debug("Deleting %08x/%d dsfield=0x%02x t=%p\n", key, plen,
+ inet_dscp_to_dsfield(dscp), t);
fa_to_delete = NULL;
hlist_for_each_entry_from(fa, fa_list) {
@@ -1727,7 +1736,7 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
if ((fa->fa_slen != slen) ||
(fa->tb_id != tb->tb_id) ||
- (fa->fa_tos != tos))
+ (fa->fa_dscp != dscp))
break;
if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
@@ -2295,7 +2304,7 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
fri.tb_id = tb->tb_id;
fri.dst = xkey;
fri.dst_len = KEYLENGTH - fa->fa_slen;
- fri.tos = fa->fa_tos;
+ fri.tos = inet_dscp_to_dsfield(fa->fa_dscp);
fri.type = fa->fa_type;
fri.offload = fa->offload;
fri.trap = fa->trap;
@@ -2807,8 +2816,9 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
fa->fa_info->fib_scope),
rtn_type(buf2, sizeof(buf2),
fa->fa_type));
- if (fa->fa_tos)
- seq_printf(seq, " tos=%d", fa->fa_tos);
+ if (fa->fa_dscp)
+ seq_printf(seq, " tos=%d",
+ inet_dscp_to_dsfield(fa->fa_dscp));
seq_putc(seq, '\n');
}
}
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 30ab717ff1b8..17440840a791 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -637,7 +637,9 @@ int __inet_hash(struct sock *sk, struct sock *osk)
int err = 0;
if (sk->sk_state != TCP_LISTEN) {
+ local_bh_disable();
inet_ehash_nolisten(sk, osk, NULL);
+ local_bh_enable();
return 0;
}
WARN_ON(!sk_unhashed(sk));
@@ -669,45 +671,54 @@ int inet_hash(struct sock *sk)
{
int err = 0;
- if (sk->sk_state != TCP_CLOSE) {
- local_bh_disable();
+ if (sk->sk_state != TCP_CLOSE)
err = __inet_hash(sk, NULL);
- local_bh_enable();
- }
return err;
}
EXPORT_SYMBOL_GPL(inet_hash);
-void inet_unhash(struct sock *sk)
+static void __inet_unhash(struct sock *sk, struct inet_listen_hashbucket *ilb)
{
- struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
- struct inet_listen_hashbucket *ilb = NULL;
- spinlock_t *lock;
-
if (sk_unhashed(sk))
return;
- if (sk->sk_state == TCP_LISTEN) {
- ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
- lock = &ilb->lock;
- } else {
- lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
- }
- spin_lock_bh(lock);
- if (sk_unhashed(sk))
- goto unlock;
-
if (rcu_access_pointer(sk->sk_reuseport_cb))
reuseport_stop_listen_sock(sk);
if (ilb) {
+ struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
+
inet_unhash2(hashinfo, sk);
ilb->count--;
}
__sk_nulls_del_node_init_rcu(sk);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
-unlock:
- spin_unlock_bh(lock);
+}
+
+void inet_unhash(struct sock *sk)
+{
+ struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
+
+ if (sk_unhashed(sk))
+ return;
+
+ if (sk->sk_state == TCP_LISTEN) {
+ struct inet_listen_hashbucket *ilb;
+
+ ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
+ /* Don't disable bottom halves while acquiring the lock to
+ * avoid circular locking dependency on PREEMPT_RT.
+ */
+ spin_lock(&ilb->lock);
+ __inet_unhash(sk, ilb);
+ spin_unlock(&ilb->lock);
+ } else {
+ spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
+
+ spin_lock_bh(lock);
+ __inet_unhash(sk, NULL);
+ spin_unlock_bh(lock);
+ }
}
EXPORT_SYMBOL_GPL(inet_unhash);
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 3a025c011971..d94f9f7e60c3 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -196,7 +196,8 @@ resubmit:
if (ipprot) {
if (!ipprot->no_policy) {
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
- kfree_skb(skb);
+ kfree_skb_reason(skb,
+ SKB_DROP_REASON_XFRM_POLICY);
return;
}
nf_reset_ct(skb);
@@ -215,7 +216,7 @@ resubmit:
icmp_send(skb, ICMP_DEST_UNREACH,
ICMP_PROT_UNREACH, 0);
}
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_IP_NOPROTO);
} else {
__IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS);
consume_skb(skb);
@@ -318,8 +319,10 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
{
const struct iphdr *iph = ip_hdr(skb);
int (*edemux)(struct sk_buff *skb);
+ int err, drop_reason;
struct rtable *rt;
- int err;
+
+ drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
if (ip_can_use_hint(skb, iph, hint)) {
err = ip_route_use_hint(skb, iph->daddr, iph->saddr, iph->tos,
@@ -396,19 +399,23 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
* so-called "hole-196" attack) so do it for both.
*/
if (in_dev &&
- IN_DEV_ORCONF(in_dev, DROP_UNICAST_IN_L2_MULTICAST))
+ IN_DEV_ORCONF(in_dev, DROP_UNICAST_IN_L2_MULTICAST)) {
+ drop_reason = SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST;
goto drop;
+ }
}
return NET_RX_SUCCESS;
drop:
- kfree_skb(skb);
+ kfree_skb_reason(skb, drop_reason);
return NET_RX_DROP;
drop_error:
- if (err == -EXDEV)
+ if (err == -EXDEV) {
+ drop_reason = SKB_DROP_REASON_IP_RPFILTER;
__NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
+ }
goto drop;
}
@@ -436,13 +443,16 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
{
const struct iphdr *iph;
+ int drop_reason;
u32 len;
/* When the interface is in promisc. mode, drop all the crap
* that it receives, do not try to analyse it.
*/
- if (skb->pkt_type == PACKET_OTHERHOST)
+ if (skb->pkt_type == PACKET_OTHERHOST) {
+ drop_reason = SKB_DROP_REASON_OTHERHOST;
goto drop;
+ }
__IP_UPD_PO_STATS(net, IPSTATS_MIB_IN, skb->len);
@@ -452,6 +462,7 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
goto out;
}
+ drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
goto inhdr_error;
@@ -488,6 +499,7 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
len = ntohs(iph->tot_len);
if (skb->len < len) {
+ drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
__IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS);
goto drop;
} else if (len < (iph->ihl*4))
@@ -516,11 +528,14 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
return skb;
csum_error:
+ drop_reason = SKB_DROP_REASON_IP_CSUM;
__IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS);
inhdr_error:
+ if (drop_reason == SKB_DROP_REASON_NOT_SPECIFIED)
+ drop_reason = SKB_DROP_REASON_IP_INHDR;
__IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
drop:
- kfree_skb(skb);
+ kfree_skb_reason(skb, drop_reason);
out:
return NULL;
}
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 07274619b9ea..4a55a620e526 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -266,13 +266,12 @@ static void __net_exit ipmr_rules_exit(struct net *net)
{
struct mr_table *mrt, *next;
- rtnl_lock();
+ ASSERT_RTNL();
list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) {
list_del(&mrt->list);
ipmr_free_table(mrt);
}
fib_rules_unregister(net->ipv4.mr_rules_ops);
- rtnl_unlock();
}
static int ipmr_rules_dump(struct net *net, struct notifier_block *nb,
@@ -328,10 +327,9 @@ static int __net_init ipmr_rules_init(struct net *net)
static void __net_exit ipmr_rules_exit(struct net *net)
{
- rtnl_lock();
+ ASSERT_RTNL();
ipmr_free_table(net->ipv4.mrt);
net->ipv4.mrt = NULL;
- rtnl_unlock();
}
static int ipmr_rules_dump(struct net *net, struct notifier_block *nb,
@@ -3075,7 +3073,9 @@ static int __net_init ipmr_net_init(struct net *net)
proc_cache_fail:
remove_proc_entry("ip_mr_vif", net->proc_net);
proc_vif_fail:
+ rtnl_lock();
ipmr_rules_exit(net);
+ rtnl_unlock();
#endif
ipmr_rules_fail:
ipmr_notifier_exit(net);
@@ -3090,12 +3090,22 @@ static void __net_exit ipmr_net_exit(struct net *net)
remove_proc_entry("ip_mr_vif", net->proc_net);
#endif
ipmr_notifier_exit(net);
- ipmr_rules_exit(net);
+}
+
+static void __net_exit ipmr_net_exit_batch(struct list_head *net_list)
+{
+ struct net *net;
+
+ rtnl_lock();
+ list_for_each_entry(net, net_list, exit_list)
+ ipmr_rules_exit(net);
+ rtnl_unlock();
}
static struct pernet_operations ipmr_net_ops = {
.init = ipmr_net_init,
.exit = ipmr_net_exit,
+ .exit_batch = ipmr_net_exit_batch,
};
int __init ip_mr_init(void)
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index eeafeccebb8d..e459a391e607 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -3733,12 +3733,16 @@ out:
}
EXPORT_SYMBOL(nexthop_res_grp_activity_update);
-static void __net_exit nexthop_net_exit(struct net *net)
+static void __net_exit nexthop_net_exit_batch(struct list_head *net_list)
{
+ struct net *net;
+
rtnl_lock();
- flush_all_nexthops(net);
+ list_for_each_entry(net, net_list, exit_list) {
+ flush_all_nexthops(net);
+ kfree(net->nexthop.devhash);
+ }
rtnl_unlock();
- kfree(net->nexthop.devhash);
}
static int __net_init nexthop_net_init(struct net *net)
@@ -3756,7 +3760,7 @@ static int __net_init nexthop_net_init(struct net *net)
static struct pernet_operations nexthop_net_ops = {
.init = nexthop_net_init,
- .exit = nexthop_net_exit,
+ .exit_batch = nexthop_net_exit_batch,
};
static int __init nexthop_init(void)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 8b35075088e1..634766e6c7cc 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -84,6 +84,7 @@
#include <linux/jhash.h>
#include <net/dst.h>
#include <net/dst_metadata.h>
+#include <net/inet_dscp.h>
#include <net/net_namespace.h>
#include <net/ip.h>
#include <net/route.h>
@@ -3391,7 +3392,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
if (fa->fa_slen == slen &&
fa->tb_id == fri.tb_id &&
- fa->fa_tos == fri.tos &&
+ fa->fa_dscp == inet_dsfield_to_dscp(fri.tos) &&
fa->fa_info == res.fi &&
fa->fa_type == fri.type) {
fri.offload = fa->offload;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 090360939401..6b4d8361560f 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2093,16 +2093,20 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
rc = __udp_enqueue_schedule_skb(sk, skb);
if (rc < 0) {
int is_udplite = IS_UDPLITE(sk);
+ int drop_reason;
/* Note that an ENOMEM error is charged twice */
- if (rc == -ENOMEM)
+ if (rc == -ENOMEM) {
UDP_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS,
is_udplite);
- else
+ drop_reason = SKB_DROP_REASON_SOCKET_RCVBUFF;
+ } else {
UDP_INC_STATS(sock_net(sk), UDP_MIB_MEMERRORS,
is_udplite);
+ drop_reason = SKB_DROP_REASON_PROTO_MEM;
+ }
UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
- kfree_skb(skb);
+ kfree_skb_reason(skb, drop_reason);
trace_udp_fail_queue_rcv_skb(rc, sk);
return -1;
}
@@ -2120,14 +2124,17 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
*/
static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
{
+ int drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
struct udp_sock *up = udp_sk(sk);
int is_udplite = IS_UDPLITE(sk);
/*
* Charge it to the socket, dropping if the queue is full.
*/
- if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
+ if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
+ drop_reason = SKB_DROP_REASON_XFRM_POLICY;
goto drop;
+ }
nf_reset_ct(skb);
if (static_branch_unlikely(&udp_encap_needed_key) && up->encap_type) {
@@ -2204,8 +2211,10 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
udp_lib_checksum_complete(skb))
goto csum_error;
- if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr)))
+ if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr))) {
+ drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
goto drop;
+ }
udp_csum_pull_header(skb);
@@ -2213,11 +2222,12 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
return __udp_queue_rcv_skb(sk, skb);
csum_error:
+ drop_reason = SKB_DROP_REASON_UDP_CSUM;
__UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
drop:
__UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
atomic_inc(&sk->sk_drops);
- kfree_skb(skb);
+ kfree_skb_reason(skb, drop_reason);
return -1;
}
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f927c199a93c..4f402bc38f05 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -146,18 +146,11 @@ static int ipv6_generate_stable_address(struct in6_addr *addr,
#define IN6_ADDR_HSIZE_SHIFT 8
#define IN6_ADDR_HSIZE (1 << IN6_ADDR_HSIZE_SHIFT)
-/*
- * Configured unicast address hash table
- */
-static struct hlist_head inet6_addr_lst[IN6_ADDR_HSIZE];
-static DEFINE_SPINLOCK(addrconf_hash_lock);
-static void addrconf_verify(void);
-static void addrconf_verify_rtnl(void);
-static void addrconf_verify_work(struct work_struct *);
+static void addrconf_verify(struct net *net);
+static void addrconf_verify_rtnl(struct net *net);
static struct workqueue_struct *addrconf_wq;
-static DECLARE_DELAYED_WORK(addr_chk_work, addrconf_verify_work);
static void addrconf_join_anycast(struct inet6_ifaddr *ifp);
static void addrconf_leave_anycast(struct inet6_ifaddr *ifp);
@@ -554,7 +547,7 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
#ifdef CONFIG_IPV6_MROUTE
if ((all || type == NETCONFA_MC_FORWARDING) &&
nla_put_s32(skb, NETCONFA_MC_FORWARDING,
- devconf->mc_forwarding) < 0)
+ atomic_read(&devconf->mc_forwarding)) < 0)
goto nla_put_failure;
#endif
if ((all || type == NETCONFA_PROXY_NEIGH) &&
@@ -1011,9 +1004,7 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
{
struct inet6_ifaddr *ifp;
- hlist_for_each_entry(ifp, &inet6_addr_lst[hash], addr_lst) {
- if (!net_eq(dev_net(ifp->idev->dev), net))
- continue;
+ hlist_for_each_entry(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) {
if (ipv6_addr_equal(&ifp->addr, addr)) {
if (!dev || ifp->idev->dev == dev)
return true;
@@ -1024,20 +1015,21 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
static int ipv6_add_addr_hash(struct net_device *dev, struct inet6_ifaddr *ifa)
{
- unsigned int hash = inet6_addr_hash(dev_net(dev), &ifa->addr);
+ struct net *net = dev_net(dev);
+ unsigned int hash = inet6_addr_hash(net, &ifa->addr);
int err = 0;
- spin_lock(&addrconf_hash_lock);
+ spin_lock(&net->ipv6.addrconf_hash_lock);
/* Ignore adding duplicate addresses on an interface */
- if (ipv6_chk_same_addr(dev_net(dev), &ifa->addr, dev, hash)) {
+ if (ipv6_chk_same_addr(net, &ifa->addr, dev, hash)) {
netdev_dbg(dev, "ipv6_add_addr: already assigned\n");
err = -EEXIST;
} else {
- hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]);
+ hlist_add_head_rcu(&ifa->addr_lst, &net->ipv6.inet6_addr_lst[hash]);
}
- spin_unlock(&addrconf_hash_lock);
+ spin_unlock(&net->ipv6.addrconf_hash_lock);
return err;
}
@@ -1261,9 +1253,10 @@ cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires,
static void ipv6_del_addr(struct inet6_ifaddr *ifp)
{
- int state;
enum cleanup_prefix_rt_t action = CLEANUP_PREFIX_RT_NOP;
+ struct net *net = dev_net(ifp->idev->dev);
unsigned long expires;
+ int state;
ASSERT_RTNL();
@@ -1275,9 +1268,9 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
if (state == INET6_IFADDR_STATE_DEAD)
goto out;
- spin_lock_bh(&addrconf_hash_lock);
+ spin_lock_bh(&net->ipv6.addrconf_hash_lock);
hlist_del_init_rcu(&ifp->addr_lst);
- spin_unlock_bh(&addrconf_hash_lock);
+ spin_unlock_bh(&net->ipv6.addrconf_hash_lock);
write_lock_bh(&ifp->idev->lock);
@@ -1920,10 +1913,8 @@ __ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
if (skip_dev_check)
dev = NULL;
- hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
+ hlist_for_each_entry_rcu(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) {
ndev = ifp->idev->dev;
- if (!net_eq(dev_net(ndev), net))
- continue;
if (l3mdev_master_dev_rcu(ndev) != l3mdev)
continue;
@@ -2027,9 +2018,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
struct inet6_ifaddr *ifp, *result = NULL;
rcu_read_lock();
- hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
- if (!net_eq(dev_net(ifp->idev->dev), net))
- continue;
+ hlist_for_each_entry_rcu(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) {
if (ipv6_addr_equal(&ifp->addr, addr)) {
if (!dev || ifp->idev->dev == dev ||
!(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) {
@@ -2096,7 +2085,7 @@ static int addrconf_dad_end(struct inet6_ifaddr *ifp)
void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp)
{
struct inet6_dev *idev = ifp->idev;
- struct net *net = dev_net(ifp->idev->dev);
+ struct net *net = dev_net(idev->dev);
if (addrconf_dad_end(ifp)) {
in6_ifa_put(ifp);
@@ -2675,7 +2664,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
create, now);
in6_ifa_put(ifp);
- addrconf_verify();
+ addrconf_verify(net);
}
return 0;
@@ -2987,7 +2976,7 @@ static int inet6_addr_add(struct net *net, int ifindex,
manage_tempaddrs(idev, ifp, cfg->valid_lft,
cfg->preferred_lft, true, jiffies);
in6_ifa_put(ifp);
- addrconf_verify_rtnl();
+ addrconf_verify_rtnl(net);
return 0;
} else if (cfg->ifa_flags & IFA_F_MCAUTOJOIN) {
ipv6_mc_config(net->ipv6.mc_autojoin_sk, false,
@@ -3027,7 +3016,7 @@ static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags,
manage_tempaddrs(idev, ifp, 0, 0, false,
jiffies);
ipv6_del_addr(ifp);
- addrconf_verify_rtnl();
+ addrconf_verify_rtnl(net);
if (ipv6_addr_is_multicast(pfx)) {
ipv6_mc_config(net->ipv6.mc_autojoin_sk,
false, pfx, dev->ifindex);
@@ -3772,9 +3761,9 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister)
/* Step 2: clear hash table */
for (i = 0; i < IN6_ADDR_HSIZE; i++) {
- struct hlist_head *h = &inet6_addr_lst[i];
+ struct hlist_head *h = &net->ipv6.inet6_addr_lst[i];
- spin_lock_bh(&addrconf_hash_lock);
+ spin_lock_bh(&net->ipv6.addrconf_hash_lock);
restart:
hlist_for_each_entry_rcu(ifa, h, addr_lst) {
if (ifa->idev == idev) {
@@ -3790,7 +3779,7 @@ restart:
}
}
}
- spin_unlock_bh(&addrconf_hash_lock);
+ spin_unlock_bh(&net->ipv6.addrconf_hash_lock);
}
write_lock_bh(&idev->lock);
@@ -4246,7 +4235,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
* before this temporary address becomes deprecated.
*/
if (ifp->flags & IFA_F_TEMPORARY)
- addrconf_verify_rtnl();
+ addrconf_verify_rtnl(dev_net(dev));
}
static void addrconf_dad_run(struct inet6_dev *idev, bool restart)
@@ -4288,10 +4277,8 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos)
}
for (; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
- hlist_for_each_entry_rcu(ifa, &inet6_addr_lst[state->bucket],
+ hlist_for_each_entry_rcu(ifa, &net->ipv6.inet6_addr_lst[state->bucket],
addr_lst) {
- if (!net_eq(dev_net(ifa->idev->dev), net))
- continue;
/* sync with offset */
if (p < state->offset) {
p++;
@@ -4314,8 +4301,6 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
struct net *net = seq_file_net(seq);
hlist_for_each_entry_continue_rcu(ifa, addr_lst) {
- if (!net_eq(dev_net(ifa->idev->dev), net))
- continue;
state->offset++;
return ifa;
}
@@ -4323,9 +4308,7 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
state->offset = 0;
while (++state->bucket < IN6_ADDR_HSIZE) {
hlist_for_each_entry_rcu(ifa,
- &inet6_addr_lst[state->bucket], addr_lst) {
- if (!net_eq(dev_net(ifa->idev->dev), net))
- continue;
+ &net->ipv6.inet6_addr_lst[state->bucket], addr_lst) {
return ifa;
}
}
@@ -4413,9 +4396,7 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr)
int ret = 0;
rcu_read_lock();
- hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
- if (!net_eq(dev_net(ifp->idev->dev), net))
- continue;
+ hlist_for_each_entry_rcu(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) {
if (ipv6_addr_equal(&ifp->addr, addr) &&
(ifp->flags & IFA_F_HOMEADDRESS)) {
ret = 1;
@@ -4453,9 +4434,7 @@ int ipv6_chk_rpl_srh_loop(struct net *net, const struct in6_addr *segs,
hash = inet6_addr_hash(net, addr);
hash_found = false;
- hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
- if (!net_eq(dev_net(ifp->idev->dev), net))
- continue;
+ hlist_for_each_entry_rcu(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) {
if (ipv6_addr_equal(&ifp->addr, addr)) {
hash_found = true;
@@ -4484,7 +4463,7 @@ int ipv6_chk_rpl_srh_loop(struct net *net, const struct in6_addr *segs,
* Periodic address status verification
*/
-static void addrconf_verify_rtnl(void)
+static void addrconf_verify_rtnl(struct net *net)
{
unsigned long now, next, next_sec, next_sched;
struct inet6_ifaddr *ifp;
@@ -4496,11 +4475,11 @@ static void addrconf_verify_rtnl(void)
now = jiffies;
next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
- cancel_delayed_work(&addr_chk_work);
+ cancel_delayed_work(&net->ipv6.addr_chk_work);
for (i = 0; i < IN6_ADDR_HSIZE; i++) {
restart:
- hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[i], addr_lst) {
+ hlist_for_each_entry_rcu_bh(ifp, &net->ipv6.inet6_addr_lst[i], addr_lst) {
unsigned long age;
/* When setting preferred_lft to a value not zero or
@@ -4599,20 +4578,23 @@ restart:
pr_debug("now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
now, next, next_sec, next_sched);
- mod_delayed_work(addrconf_wq, &addr_chk_work, next_sched - now);
+ mod_delayed_work(addrconf_wq, &net->ipv6.addr_chk_work, next_sched - now);
rcu_read_unlock_bh();
}
static void addrconf_verify_work(struct work_struct *w)
{
+ struct net *net = container_of(to_delayed_work(w), struct net,
+ ipv6.addr_chk_work);
+
rtnl_lock();
- addrconf_verify_rtnl();
+ addrconf_verify_rtnl(net);
rtnl_unlock();
}
-static void addrconf_verify(void)
+static void addrconf_verify(struct net *net)
{
- mod_delayed_work(addrconf_wq, &addr_chk_work, 0);
+ mod_delayed_work(addrconf_wq, &net->ipv6.addr_chk_work, 0);
}
static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local,
@@ -4708,7 +4690,8 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
return 0;
}
-static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg)
+static int inet6_addr_modify(struct net *net, struct inet6_ifaddr *ifp,
+ struct ifa6_config *cfg)
{
u32 flags;
clock_t expires;
@@ -4822,7 +4805,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg)
jiffies);
}
- addrconf_verify_rtnl();
+ addrconf_verify_rtnl(net);
return 0;
}
@@ -4909,7 +4892,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
!(nlh->nlmsg_flags & NLM_F_REPLACE))
err = -EEXIST;
else
- err = inet6_addr_modify(ifa, &cfg);
+ err = inet6_addr_modify(net, ifa, &cfg);
in6_ifa_put(ifa);
@@ -5533,7 +5516,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_USE_OPTIMISTIC] = cnf->use_optimistic;
#endif
#ifdef CONFIG_IPV6_MROUTE
- array[DEVCONF_MC_FORWARDING] = cnf->mc_forwarding;
+ array[DEVCONF_MC_FORWARDING] = atomic_read(&cnf->mc_forwarding);
#endif
array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6;
array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad;
@@ -5794,7 +5777,7 @@ update_lft:
write_unlock_bh(&idev->lock);
inet6_ifinfo_notify(RTM_NEWLINK, idev);
- addrconf_verify_rtnl();
+ addrconf_verify_rtnl(dev_net(dev));
return 0;
}
@@ -7111,6 +7094,14 @@ static int __net_init addrconf_init_net(struct net *net)
int err = -ENOMEM;
struct ipv6_devconf *all, *dflt;
+ spin_lock_init(&net->ipv6.addrconf_hash_lock);
+ INIT_DEFERRABLE_WORK(&net->ipv6.addr_chk_work, addrconf_verify_work);
+ net->ipv6.inet6_addr_lst = kcalloc(IN6_ADDR_HSIZE,
+ sizeof(struct hlist_head),
+ GFP_KERNEL);
+ if (!net->ipv6.inet6_addr_lst)
+ goto err_alloc_addr;
+
all = kmemdup(&ipv6_devconf, sizeof(ipv6_devconf), GFP_KERNEL);
if (!all)
goto err_alloc_all;
@@ -7172,11 +7163,15 @@ err_reg_all:
err_alloc_dflt:
kfree(all);
err_alloc_all:
+ kfree(net->ipv6.inet6_addr_lst);
+err_alloc_addr:
return err;
}
static void __net_exit addrconf_exit_net(struct net *net)
{
+ int i;
+
#ifdef CONFIG_SYSCTL
__addrconf_sysctl_unregister(net, net->ipv6.devconf_dflt,
NETCONFA_IFINDEX_DEFAULT);
@@ -7184,7 +7179,19 @@ static void __net_exit addrconf_exit_net(struct net *net)
NETCONFA_IFINDEX_ALL);
#endif
kfree(net->ipv6.devconf_dflt);
+ net->ipv6.devconf_dflt = NULL;
kfree(net->ipv6.devconf_all);
+ net->ipv6.devconf_all = NULL;
+
+ cancel_delayed_work(&net->ipv6.addr_chk_work);
+ /*
+ * Check hash table, then free it.
+ */
+ for (i = 0; i < IN6_ADDR_HSIZE; i++)
+ WARN_ON_ONCE(!hlist_empty(&net->ipv6.inet6_addr_lst[i]));
+
+ kfree(net->ipv6.inet6_addr_lst);
+ net->ipv6.inet6_addr_lst = NULL;
}
static struct pernet_operations addrconf_ops = {
@@ -7207,7 +7214,7 @@ static struct rtnl_af_ops inet6_ops __read_mostly = {
int __init addrconf_init(void)
{
struct inet6_dev *idev;
- int i, err;
+ int err;
err = ipv6_addr_label_init();
if (err < 0) {
@@ -7254,12 +7261,9 @@ int __init addrconf_init(void)
ip6_route_init_special_entries();
- for (i = 0; i < IN6_ADDR_HSIZE; i++)
- INIT_HLIST_HEAD(&inet6_addr_lst[i]);
-
register_netdevice_notifier(&ipv6_dev_notf);
- addrconf_verify();
+ addrconf_verify(&init_net);
rtnl_af_register(&inet6_ops);
@@ -7317,7 +7321,6 @@ out:
void addrconf_cleanup(void)
{
struct net_device *dev;
- int i;
unregister_netdevice_notifier(&ipv6_dev_notf);
unregister_pernet_subsys(&addrconf_ops);
@@ -7335,14 +7338,6 @@ void addrconf_cleanup(void)
}
addrconf_ifdown(init_net.loopback_dev, true);
- /*
- * Check hash table.
- */
- spin_lock_bh(&addrconf_hash_lock);
- for (i = 0; i < IN6_ADDR_HSIZE; i++)
- WARN_ON(!hlist_empty(&inet6_addr_lst[i]));
- spin_unlock_bh(&addrconf_hash_lock);
- cancel_delayed_work(&addr_chk_work);
rtnl_unlock();
destroy_workqueue(addrconf_wq);
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index ec029c86ae06..7c2003833010 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -16,6 +16,7 @@
#include <linux/indirect_call_wrapper.h>
#include <net/fib_rules.h>
+#include <net/inet_dscp.h>
#include <net/ipv6.h>
#include <net/addrconf.h>
#include <net/ip6_route.h>
@@ -25,14 +26,14 @@ struct fib6_rule {
struct fib_rule common;
struct rt6key src;
struct rt6key dst;
- u8 tclass;
+ dscp_t dscp;
};
static bool fib6_rule_matchall(const struct fib_rule *rule)
{
struct fib6_rule *r = container_of(rule, struct fib6_rule, common);
- if (r->dst.plen || r->src.plen || r->tclass)
+ if (r->dst.plen || r->src.plen || r->dscp)
return false;
return fib_rule_matchall(rule);
}
@@ -323,7 +324,7 @@ INDIRECT_CALLABLE_SCOPE int fib6_rule_match(struct fib_rule *rule,
return 0;
}
- if (r->tclass && r->tclass != ip6_tclass(fl6->flowlabel))
+ if (r->dscp && r->dscp != ip6_dscp(fl6->flowlabel))
return 0;
if (rule->ip_proto && (rule->ip_proto != fl6->flowi6_proto))
@@ -349,6 +350,13 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
struct net *net = sock_net(skb->sk);
struct fib6_rule *rule6 = (struct fib6_rule *) rule;
+ if (!inet_validate_dscp(frh->tos)) {
+ NL_SET_ERR_MSG(extack,
+ "Invalid dsfield (tos): ECN bits must be 0");
+ goto errout;
+ }
+ rule6->dscp = inet_dsfield_to_dscp(frh->tos);
+
if (rule->action == FR_ACT_TO_TBL && !rule->l3mdev) {
if (rule->table == RT6_TABLE_UNSPEC) {
NL_SET_ERR_MSG(extack, "Invalid table");
@@ -369,7 +377,6 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
rule6->src.plen = frh->src_len;
rule6->dst.plen = frh->dst_len;
- rule6->tclass = frh->tos;
if (fib_rule_requires_fldissect(rule))
net->ipv6.fib6_rules_require_fldissect++;
@@ -402,7 +409,7 @@ static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
if (frh->dst_len && (rule6->dst.plen != frh->dst_len))
return 0;
- if (frh->tos && (rule6->tclass != frh->tos))
+ if (frh->tos && inet_dscp_to_dsfield(rule6->dscp) != frh->tos)
return 0;
if (frh->src_len &&
@@ -423,7 +430,7 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
frh->dst_len = rule6->dst.plen;
frh->src_len = rule6->src.plen;
- frh->tos = rule6->tclass;
+ frh->tos = inet_dscp_to_dsfield(rule6->dscp);
if ((rule6->dst.plen &&
nla_put_in6_addr(skb, FRA_DST, &rule6->dst.addr)) ||
@@ -486,16 +493,21 @@ out_fib6_rules_ops:
goto out;
}
-static void __net_exit fib6_rules_net_exit(struct net *net)
+static void __net_exit fib6_rules_net_exit_batch(struct list_head *net_list)
{
+ struct net *net;
+
rtnl_lock();
- fib_rules_unregister(net->ipv6.fib6_rules_ops);
+ list_for_each_entry(net, net_list, exit_list) {
+ fib_rules_unregister(net->ipv6.fib6_rules_ops);
+ cond_resched();
+ }
rtnl_unlock();
}
static struct pernet_operations fib6_rules_net_ops = {
.init = fib6_rules_net_init,
- .exit = fib6_rules_net_exit,
+ .exit_batch = fib6_rules_net_exit_batch,
};
int __init fib6_rules_init(void)
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 4514444e96c8..4740afecf7c6 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -333,11 +333,8 @@ int inet6_hash(struct sock *sk)
{
int err = 0;
- if (sk->sk_state != TCP_CLOSE) {
- local_bh_disable();
+ if (sk->sk_state != TCP_CLOSE)
err = __inet_hash(sk, NULL);
- local_bh_enable();
- }
return err;
}
diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c
index f90a87389fcc..f6f5b83dd954 100644
--- a/net/ipv6/ioam6_iptunnel.c
+++ b/net/ipv6/ioam6_iptunnel.c
@@ -32,13 +32,25 @@ struct ioam6_lwt_encap {
struct ioam6_trace_hdr traceh;
} __packed;
+struct ioam6_lwt_freq {
+ u32 k;
+ u32 n;
+};
+
struct ioam6_lwt {
struct dst_cache cache;
+ struct ioam6_lwt_freq freq;
+ atomic_t pkt_cnt;
u8 mode;
struct in6_addr tundst;
struct ioam6_lwt_encap tuninfo;
};
+static struct netlink_range_validation freq_range = {
+ .min = IOAM6_IPTUNNEL_FREQ_MIN,
+ .max = IOAM6_IPTUNNEL_FREQ_MAX,
+};
+
static struct ioam6_lwt *ioam6_lwt_state(struct lwtunnel_state *lwt)
{
return (struct ioam6_lwt *)lwt->data;
@@ -55,6 +67,8 @@ static struct ioam6_trace_hdr *ioam6_lwt_trace(struct lwtunnel_state *lwt)
}
static const struct nla_policy ioam6_iptunnel_policy[IOAM6_IPTUNNEL_MAX + 1] = {
+ [IOAM6_IPTUNNEL_FREQ_K] = NLA_POLICY_FULL_RANGE(NLA_U32, &freq_range),
+ [IOAM6_IPTUNNEL_FREQ_N] = NLA_POLICY_FULL_RANGE(NLA_U32, &freq_range),
[IOAM6_IPTUNNEL_MODE] = NLA_POLICY_RANGE(NLA_U8,
IOAM6_IPTUNNEL_MODE_MIN,
IOAM6_IPTUNNEL_MODE_MAX),
@@ -96,6 +110,7 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla,
struct lwtunnel_state *lwt;
struct ioam6_lwt *ilwt;
int len_aligned, err;
+ u32 freq_k, freq_n;
u8 mode;
if (family != AF_INET6)
@@ -106,6 +121,23 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla,
if (err < 0)
return err;
+ if ((!tb[IOAM6_IPTUNNEL_FREQ_K] && tb[IOAM6_IPTUNNEL_FREQ_N]) ||
+ (tb[IOAM6_IPTUNNEL_FREQ_K] && !tb[IOAM6_IPTUNNEL_FREQ_N])) {
+ NL_SET_ERR_MSG(extack, "freq: missing parameter");
+ return -EINVAL;
+ } else if (!tb[IOAM6_IPTUNNEL_FREQ_K] && !tb[IOAM6_IPTUNNEL_FREQ_N]) {
+ freq_k = IOAM6_IPTUNNEL_FREQ_MIN;
+ freq_n = IOAM6_IPTUNNEL_FREQ_MIN;
+ } else {
+ freq_k = nla_get_u32(tb[IOAM6_IPTUNNEL_FREQ_K]);
+ freq_n = nla_get_u32(tb[IOAM6_IPTUNNEL_FREQ_N]);
+
+ if (freq_k > freq_n) {
+ NL_SET_ERR_MSG(extack, "freq: k > n is forbidden");
+ return -EINVAL;
+ }
+ }
+
if (!tb[IOAM6_IPTUNNEL_MODE])
mode = IOAM6_IPTUNNEL_MODE_INLINE;
else
@@ -140,6 +172,10 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla,
return err;
}
+ atomic_set(&ilwt->pkt_cnt, 0);
+ ilwt->freq.k = freq_k;
+ ilwt->freq.n = freq_n;
+
ilwt->mode = mode;
if (tb[IOAM6_IPTUNNEL_DST])
ilwt->tundst = nla_get_in6_addr(tb[IOAM6_IPTUNNEL_DST]);
@@ -263,11 +299,18 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
struct in6_addr orig_daddr;
struct ioam6_lwt *ilwt;
int err = -EINVAL;
+ u32 pkt_cnt;
if (skb->protocol != htons(ETH_P_IPV6))
goto drop;
ilwt = ioam6_lwt_state(dst->lwtstate);
+
+ /* Check for insertion frequency (i.e., "k over n" insertions) */
+ pkt_cnt = atomic_fetch_inc(&ilwt->pkt_cnt);
+ if (pkt_cnt % ilwt->freq.n >= ilwt->freq.k)
+ goto out;
+
orig_daddr = ipv6_hdr(skb)->daddr;
switch (ilwt->mode) {
@@ -358,6 +401,14 @@ static int ioam6_fill_encap_info(struct sk_buff *skb,
struct ioam6_lwt *ilwt = ioam6_lwt_state(lwtstate);
int err;
+ err = nla_put_u32(skb, IOAM6_IPTUNNEL_FREQ_K, ilwt->freq.k);
+ if (err)
+ goto ret;
+
+ err = nla_put_u32(skb, IOAM6_IPTUNNEL_FREQ_N, ilwt->freq.n);
+ if (err)
+ goto ret;
+
err = nla_put_u8(skb, IOAM6_IPTUNNEL_MODE, ilwt->mode);
if (err)
goto ret;
@@ -379,7 +430,9 @@ static int ioam6_encap_nlsize(struct lwtunnel_state *lwtstate)
struct ioam6_lwt *ilwt = ioam6_lwt_state(lwtstate);
int nlsize;
- nlsize = nla_total_size(sizeof(ilwt->mode)) +
+ nlsize = nla_total_size(sizeof(ilwt->freq.k)) +
+ nla_total_size(sizeof(ilwt->freq.n)) +
+ nla_total_size(sizeof(ilwt->mode)) +
nla_total_size(sizeof(ilwt->tuninfo.traceh));
if (ilwt->mode != IOAM6_IPTUNNEL_MODE_INLINE)
@@ -395,7 +448,9 @@ static int ioam6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
struct ioam6_lwt *ilwt_a = ioam6_lwt_state(a);
struct ioam6_lwt *ilwt_b = ioam6_lwt_state(b);
- return (ilwt_a->mode != ilwt_b->mode ||
+ return (ilwt_a->freq.k != ilwt_b->freq.k ||
+ ilwt_a->freq.n != ilwt_b->freq.n ||
+ ilwt_a->mode != ilwt_b->mode ||
(ilwt_a->mode != IOAM6_IPTUNNEL_MODE_INLINE &&
!ipv6_addr_equal(&ilwt_a->tundst, &ilwt_b->tundst)) ||
trace_a->namespace_id != trace_b->namespace_id);
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 80256717868e..d4b1e2c5aa76 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -508,7 +508,7 @@ int ip6_mc_input(struct sk_buff *skb)
/*
* IPv6 multicast router mode is now supported ;)
*/
- if (dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding &&
+ if (atomic_read(&dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding) &&
!(ipv6_addr_type(&hdr->daddr) &
(IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL)) &&
likely(!(IP6CB(skb)->flags & IP6SKB_FORWARDED))) {
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index b47ffc81f9e5..53f632a560ec 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1122,7 +1122,10 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
neigh_release(neigh);
} else if (skb->protocol == htons(ETH_P_IP)) {
- struct rtable *rt = skb_rtable(skb);
+ const struct rtable *rt = skb_rtable(skb);
+
+ if (!rt)
+ goto tx_err_link_failure;
if (rt->rt_gw_family == AF_INET6)
memcpy(&fl6->daddr, &rt->rt_gw6, sizeof(fl6->daddr));
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 7cf73e60e619..881fe6b50307 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -253,13 +253,12 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
{
struct mr_table *mrt, *next;
- rtnl_lock();
+ ASSERT_RTNL();
list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
list_del(&mrt->list);
ip6mr_free_table(mrt);
}
fib_rules_unregister(net->ipv6.mr6_rules_ops);
- rtnl_unlock();
}
static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
@@ -316,10 +315,9 @@ static int __net_init ip6mr_rules_init(struct net *net)
static void __net_exit ip6mr_rules_exit(struct net *net)
{
- rtnl_lock();
+ ASSERT_RTNL();
ip6mr_free_table(net->ipv6.mrt6);
net->ipv6.mrt6 = NULL;
- rtnl_unlock();
}
static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
@@ -732,7 +730,7 @@ static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
in6_dev = __in6_dev_get(dev);
if (in6_dev) {
- in6_dev->cnf.mc_forwarding--;
+ atomic_dec(&in6_dev->cnf.mc_forwarding);
inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
NETCONFA_MC_FORWARDING,
dev->ifindex, &in6_dev->cnf);
@@ -900,7 +898,7 @@ static int mif6_add(struct net *net, struct mr_table *mrt,
in6_dev = __in6_dev_get(dev);
if (in6_dev) {
- in6_dev->cnf.mc_forwarding++;
+ atomic_inc(&in6_dev->cnf.mc_forwarding);
inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
NETCONFA_MC_FORWARDING,
dev->ifindex, &in6_dev->cnf);
@@ -1323,7 +1321,9 @@ static int __net_init ip6mr_net_init(struct net *net)
proc_cache_fail:
remove_proc_entry("ip6_mr_vif", net->proc_net);
proc_vif_fail:
+ rtnl_lock();
ip6mr_rules_exit(net);
+ rtnl_unlock();
#endif
ip6mr_rules_fail:
ip6mr_notifier_exit(net);
@@ -1336,13 +1336,23 @@ static void __net_exit ip6mr_net_exit(struct net *net)
remove_proc_entry("ip6_mr_cache", net->proc_net);
remove_proc_entry("ip6_mr_vif", net->proc_net);
#endif
- ip6mr_rules_exit(net);
ip6mr_notifier_exit(net);
}
+static void __net_exit ip6mr_net_exit_batch(struct list_head *net_list)
+{
+ struct net *net;
+
+ rtnl_lock();
+ list_for_each_entry(net, net_list, exit_list)
+ ip6mr_rules_exit(net);
+ rtnl_unlock();
+}
+
static struct pernet_operations ip6mr_net_ops = {
.init = ip6mr_net_init,
.exit = ip6mr_net_exit,
+ .exit_batch = ip6mr_net_exit_batch,
};
int __init ip6_mr_init(void)
@@ -1551,7 +1561,7 @@ static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
} else {
rcu_assign_pointer(mrt->mroute_sk, sk);
sock_set_flag(sk, SOCK_RCU_FREE);
- net->ipv6.devconf_all->mc_forwarding++;
+ atomic_inc(&net->ipv6.devconf_all->mc_forwarding);
}
write_unlock_bh(&mrt_lock);
@@ -1567,14 +1577,19 @@ static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
int ip6mr_sk_done(struct sock *sk)
{
- int err = -EACCES;
struct net *net = sock_net(sk);
+ struct ipv6_devconf *devconf;
struct mr_table *mrt;
+ int err = -EACCES;
if (sk->sk_type != SOCK_RAW ||
inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
return err;
+ devconf = net->ipv6.devconf_all;
+ if (!devconf || !atomic_read(&devconf->mc_forwarding))
+ return err;
+
rtnl_lock();
ip6mr_for_each_table(mrt, net) {
if (sk == rtnl_dereference(mrt->mroute_sk)) {
@@ -1584,7 +1599,7 @@ int ip6mr_sk_done(struct sock *sk)
* so the RCU grace period before sk freeing
* is guaranteed by sk_destruct()
*/
- net->ipv6.devconf_all->mc_forwarding--;
+ atomic_dec(&devconf->mc_forwarding);
write_unlock_bh(&mrt_lock);
inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
NETCONFA_MC_FORWARDING,
diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c
index c921de63b494..f0702d920d8d 100644
--- a/net/mctp/af_mctp.c
+++ b/net/mctp/af_mctp.c
@@ -6,6 +6,7 @@
* Copyright (c) 2021 Google
*/
+#include <linux/compat.h>
#include <linux/if_arp.h>
#include <linux/net.h>
#include <linux/mctp.h>
@@ -21,6 +22,8 @@
/* socket implementation */
+static void mctp_sk_expire_keys(struct timer_list *timer);
+
static int mctp_release(struct socket *sock)
{
struct sock *sk = sock->sk;
@@ -99,13 +102,20 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
struct sk_buff *skb;
if (addr) {
+ const u8 tagbits = MCTP_TAG_MASK | MCTP_TAG_OWNER |
+ MCTP_TAG_PREALLOC;
+
if (addrlen < sizeof(struct sockaddr_mctp))
return -EINVAL;
if (addr->smctp_family != AF_MCTP)
return -EINVAL;
if (!mctp_sockaddr_is_ok(addr))
return -EINVAL;
- if (addr->smctp_tag & ~(MCTP_TAG_MASK | MCTP_TAG_OWNER))
+ if (addr->smctp_tag & ~tagbits)
+ return -EINVAL;
+ /* can't preallocate a non-owned tag */
+ if (addr->smctp_tag & MCTP_TAG_PREALLOC &&
+ !(addr->smctp_tag & MCTP_TAG_OWNER))
return -EINVAL;
} else {
@@ -248,6 +258,32 @@ out_free:
return rc;
}
+/* We're done with the key; invalidate, stop reassembly, and remove from lists.
+ */
+static void __mctp_key_remove(struct mctp_sk_key *key, struct net *net,
+ unsigned long flags, unsigned long reason)
+__releases(&key->lock)
+__must_hold(&net->mctp.keys_lock)
+{
+ struct sk_buff *skb;
+
+ trace_mctp_key_release(key, reason);
+ skb = key->reasm_head;
+ key->reasm_head = NULL;
+ key->reasm_dead = true;
+ key->valid = false;
+ mctp_dev_release_key(key->dev, key);
+ spin_unlock_irqrestore(&key->lock, flags);
+
+ hlist_del(&key->hlist);
+ hlist_del(&key->sklist);
+
+ /* unref for the lists */
+ mctp_key_unref(key);
+
+ kfree_skb(skb);
+}
+
static int mctp_setsockopt(struct socket *sock, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
@@ -293,6 +329,115 @@ static int mctp_getsockopt(struct socket *sock, int level, int optname,
return -EINVAL;
}
+static int mctp_ioctl_alloctag(struct mctp_sock *msk, unsigned long arg)
+{
+ struct net *net = sock_net(&msk->sk);
+ struct mctp_sk_key *key = NULL;
+ struct mctp_ioc_tag_ctl ctl;
+ unsigned long flags;
+ u8 tag;
+
+ if (copy_from_user(&ctl, (void __user *)arg, sizeof(ctl)))
+ return -EFAULT;
+
+ if (ctl.tag)
+ return -EINVAL;
+
+ if (ctl.flags)
+ return -EINVAL;
+
+ key = mctp_alloc_local_tag(msk, ctl.peer_addr, MCTP_ADDR_ANY,
+ true, &tag);
+ if (IS_ERR(key))
+ return PTR_ERR(key);
+
+ ctl.tag = tag | MCTP_TAG_OWNER | MCTP_TAG_PREALLOC;
+ if (copy_to_user((void __user *)arg, &ctl, sizeof(ctl))) {
+ spin_lock_irqsave(&key->lock, flags);
+ __mctp_key_remove(key, net, flags, MCTP_TRACE_KEY_DROPPED);
+ mctp_key_unref(key);
+ return -EFAULT;
+ }
+
+ mctp_key_unref(key);
+ return 0;
+}
+
+static int mctp_ioctl_droptag(struct mctp_sock *msk, unsigned long arg)
+{
+ struct net *net = sock_net(&msk->sk);
+ struct mctp_ioc_tag_ctl ctl;
+ unsigned long flags, fl2;
+ struct mctp_sk_key *key;
+ struct hlist_node *tmp;
+ int rc;
+ u8 tag;
+
+ if (copy_from_user(&ctl, (void __user *)arg, sizeof(ctl)))
+ return -EFAULT;
+
+ if (ctl.flags)
+ return -EINVAL;
+
+ /* Must be a local tag, TO set, preallocated */
+ if ((ctl.tag & ~MCTP_TAG_MASK) != (MCTP_TAG_OWNER | MCTP_TAG_PREALLOC))
+ return -EINVAL;
+
+ tag = ctl.tag & MCTP_TAG_MASK;
+ rc = -EINVAL;
+
+ spin_lock_irqsave(&net->mctp.keys_lock, flags);
+ hlist_for_each_entry_safe(key, tmp, &msk->keys, sklist) {
+ /* we do an irqsave here, even though we know the irq state,
+ * so we have the flags to pass to __mctp_key_remove
+ */
+ spin_lock_irqsave(&key->lock, fl2);
+ if (key->manual_alloc &&
+ ctl.peer_addr == key->peer_addr &&
+ tag == key->tag) {
+ __mctp_key_remove(key, net, fl2,
+ MCTP_TRACE_KEY_DROPPED);
+ rc = 0;
+ } else {
+ spin_unlock_irqrestore(&key->lock, fl2);
+ }
+ }
+ spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
+
+ return rc;
+}
+
+static int mctp_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+ struct mctp_sock *msk = container_of(sock->sk, struct mctp_sock, sk);
+
+ switch (cmd) {
+ case SIOCMCTPALLOCTAG:
+ return mctp_ioctl_alloctag(msk, arg);
+ case SIOCMCTPDROPTAG:
+ return mctp_ioctl_droptag(msk, arg);
+ }
+
+ return -EINVAL;
+}
+
+#ifdef CONFIG_COMPAT
+static int mctp_compat_ioctl(struct socket *sock, unsigned int cmd,
+ unsigned long arg)
+{
+ void __user *argp = compat_ptr(arg);
+
+ switch (cmd) {
+ /* These have compatible ptr layouts */
+ case SIOCMCTPALLOCTAG:
+ case SIOCMCTPDROPTAG:
+ return mctp_ioctl(sock, cmd, (unsigned long)argp);
+ }
+
+ return -ENOIOCTLCMD;
+}
+#endif
+
static const struct proto_ops mctp_dgram_ops = {
.family = PF_MCTP,
.release = mctp_release,
@@ -302,7 +447,7 @@ static const struct proto_ops mctp_dgram_ops = {
.accept = sock_no_accept,
.getname = sock_no_getname,
.poll = datagram_poll,
- .ioctl = sock_no_ioctl,
+ .ioctl = mctp_ioctl,
.gettstamp = sock_gettstamp,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
@@ -312,6 +457,9 @@ static const struct proto_ops mctp_dgram_ops = {
.recvmsg = mctp_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = mctp_compat_ioctl,
+#endif
};
static void mctp_sk_expire_keys(struct timer_list *timer)
@@ -319,7 +467,7 @@ static void mctp_sk_expire_keys(struct timer_list *timer)
struct mctp_sock *msk = container_of(timer, struct mctp_sock,
key_expiry);
struct net *net = sock_net(&msk->sk);
- unsigned long next_expiry, flags;
+ unsigned long next_expiry, flags, fl2;
struct mctp_sk_key *key;
struct hlist_node *tmp;
bool next_expiry_valid = false;
@@ -327,15 +475,16 @@ static void mctp_sk_expire_keys(struct timer_list *timer)
spin_lock_irqsave(&net->mctp.keys_lock, flags);
hlist_for_each_entry_safe(key, tmp, &msk->keys, sklist) {
- spin_lock(&key->lock);
+ /* don't expire. manual_alloc is immutable, no locking
+ * required.
+ */
+ if (key->manual_alloc)
+ continue;
+ spin_lock_irqsave(&key->lock, fl2);
if (!time_after_eq(key->expiry, jiffies)) {
- trace_mctp_key_release(key, MCTP_TRACE_KEY_TIMEOUT);
- key->valid = false;
- hlist_del_rcu(&key->hlist);
- hlist_del_rcu(&key->sklist);
- spin_unlock(&key->lock);
- mctp_key_unref(key);
+ __mctp_key_remove(key, net, fl2,
+ MCTP_TRACE_KEY_TIMEOUT);
continue;
}
@@ -346,7 +495,7 @@ static void mctp_sk_expire_keys(struct timer_list *timer)
next_expiry = key->expiry;
next_expiry_valid = true;
}
- spin_unlock(&key->lock);
+ spin_unlock_irqrestore(&key->lock, fl2);
}
spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
@@ -387,9 +536,9 @@ static void mctp_sk_unhash(struct sock *sk)
{
struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
struct net *net = sock_net(sk);
+ unsigned long flags, fl2;
struct mctp_sk_key *key;
struct hlist_node *tmp;
- unsigned long flags;
/* remove from any type-based binds */
mutex_lock(&net->mctp.bind_lock);
@@ -399,20 +548,8 @@ static void mctp_sk_unhash(struct sock *sk)
/* remove tag allocations */
spin_lock_irqsave(&net->mctp.keys_lock, flags);
hlist_for_each_entry_safe(key, tmp, &msk->keys, sklist) {
- hlist_del(&key->sklist);
- hlist_del(&key->hlist);
-
- trace_mctp_key_release(key, MCTP_TRACE_KEY_CLOSED);
-
- spin_lock(&key->lock);
- kfree_skb(key->reasm_head);
- key->reasm_head = NULL;
- key->reasm_dead = true;
- key->valid = false;
- spin_unlock(&key->lock);
-
- /* key is no longer on the lookup lists, unref */
- mctp_key_unref(key);
+ spin_lock_irqsave(&key->lock, fl2);
+ __mctp_key_remove(key, net, fl2, MCTP_TRACE_KEY_CLOSED);
}
spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
}
diff --git a/net/mctp/device.c b/net/mctp/device.c
index ef2755f82f87..02ddc0f1bd3e 100644
--- a/net/mctp/device.c
+++ b/net/mctp/device.c
@@ -6,6 +6,7 @@
* Copyright (c) 2021 Google
*/
+#include <linux/if_arp.h>
#include <linux/if_link.h>
#include <linux/mctp.h>
#include <linux/netdevice.h>
diff --git a/net/mctp/route.c b/net/mctp/route.c
index 8d9f4ff3e285..17e3482aa770 100644
--- a/net/mctp/route.c
+++ b/net/mctp/route.c
@@ -64,8 +64,7 @@ static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb)
if (msk->bind_type != type)
continue;
- if (msk->bind_addr != MCTP_ADDR_ANY &&
- msk->bind_addr != mh->dest)
+ if (!mctp_address_matches(msk->bind_addr, mh->dest))
continue;
return msk;
@@ -77,7 +76,7 @@ static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb)
static bool mctp_key_match(struct mctp_sk_key *key, mctp_eid_t local,
mctp_eid_t peer, u8 tag)
{
- if (key->local_addr != local)
+ if (!mctp_address_matches(key->local_addr, local))
return false;
if (key->peer_addr != peer)
@@ -204,29 +203,38 @@ static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk)
return rc;
}
-/* We're done with the key; unset valid and remove from lists. There may still
- * be outstanding refs on the key though...
+/* Helper for mctp_route_input().
+ * We're done with the key; unlock and unref the key.
+ * For the usual case of automatic expiry we remove the key from lists.
+ * In the case that manual allocation is set on a key we release the lock
+ * and local ref, reset reassembly, but don't remove from lists.
*/
-static void __mctp_key_unlock_drop(struct mctp_sk_key *key, struct net *net,
- unsigned long flags)
- __releases(&key->lock)
+static void __mctp_key_done_in(struct mctp_sk_key *key, struct net *net,
+ unsigned long flags, unsigned long reason)
+__releases(&key->lock)
{
struct sk_buff *skb;
+ trace_mctp_key_release(key, reason);
skb = key->reasm_head;
key->reasm_head = NULL;
- key->reasm_dead = true;
- key->valid = false;
- mctp_dev_release_key(key->dev, key);
+
+ if (!key->manual_alloc) {
+ key->reasm_dead = true;
+ key->valid = false;
+ mctp_dev_release_key(key->dev, key);
+ }
spin_unlock_irqrestore(&key->lock, flags);
- spin_lock_irqsave(&net->mctp.keys_lock, flags);
- hlist_del(&key->hlist);
- hlist_del(&key->sklist);
- spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
+ if (!key->manual_alloc) {
+ spin_lock_irqsave(&net->mctp.keys_lock, flags);
+ hlist_del(&key->hlist);
+ hlist_del(&key->sklist);
+ spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
- /* one unref for the lists */
- mctp_key_unref(key);
+ /* unref for the lists */
+ mctp_key_unref(key);
+ }
/* and one for the local reference */
mctp_key_unref(key);
@@ -380,9 +388,8 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
/* we've hit a pending reassembly; not much we
* can do but drop it
*/
- trace_mctp_key_release(key,
- MCTP_TRACE_KEY_REPLIED);
- __mctp_key_unlock_drop(key, net, f);
+ __mctp_key_done_in(key, net, f,
+ MCTP_TRACE_KEY_REPLIED);
key = NULL;
}
rc = 0;
@@ -424,9 +431,8 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
} else {
if (key->reasm_head || key->reasm_dead) {
/* duplicate start? drop everything */
- trace_mctp_key_release(key,
- MCTP_TRACE_KEY_INVALIDATED);
- __mctp_key_unlock_drop(key, net, f);
+ __mctp_key_done_in(key, net, f,
+ MCTP_TRACE_KEY_INVALIDATED);
rc = -EEXIST;
key = NULL;
} else {
@@ -449,10 +455,10 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
* the reassembly/response key
*/
if (!rc && flags & MCTP_HDR_FLAG_EOM) {
+ msk = container_of(key->sk, struct mctp_sock, sk);
sock_queue_rcv_skb(key->sk, key->reasm_head);
key->reasm_head = NULL;
- trace_mctp_key_release(key, MCTP_TRACE_KEY_REPLIED);
- __mctp_key_unlock_drop(key, net, f);
+ __mctp_key_done_in(key, net, f, MCTP_TRACE_KEY_REPLIED);
key = NULL;
}
@@ -580,9 +586,9 @@ static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key,
/* Allocate a locally-owned tag value for (saddr, daddr), and reserve
* it for the socket msk
*/
-static struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
- mctp_eid_t saddr,
- mctp_eid_t daddr, u8 *tagp)
+struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
+ mctp_eid_t daddr, mctp_eid_t saddr,
+ bool manual, u8 *tagp)
{
struct net *net = sock_net(&msk->sk);
struct netns_mctp *mns = &net->mctp;
@@ -616,9 +622,8 @@ static struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
if (tmp->tag & MCTP_HDR_FLAG_TO)
continue;
- if (!((tmp->peer_addr == daddr ||
- tmp->peer_addr == MCTP_ADDR_ANY) &&
- tmp->local_addr == saddr))
+ if (!(mctp_address_matches(tmp->peer_addr, daddr) &&
+ mctp_address_matches(tmp->local_addr, saddr)))
continue;
spin_lock(&tmp->lock);
@@ -638,6 +643,7 @@ static struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
mctp_reserve_tag(net, key, msk);
trace_mctp_key_acquire(key);
+ key->manual_alloc = manual;
*tagp = key->tag;
}
@@ -651,6 +657,50 @@ static struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
return key;
}
+static struct mctp_sk_key *mctp_lookup_prealloc_tag(struct mctp_sock *msk,
+ mctp_eid_t daddr,
+ u8 req_tag, u8 *tagp)
+{
+ struct net *net = sock_net(&msk->sk);
+ struct netns_mctp *mns = &net->mctp;
+ struct mctp_sk_key *key, *tmp;
+ unsigned long flags;
+
+ req_tag &= ~(MCTP_TAG_PREALLOC | MCTP_TAG_OWNER);
+ key = NULL;
+
+ spin_lock_irqsave(&mns->keys_lock, flags);
+
+ hlist_for_each_entry(tmp, &mns->keys, hlist) {
+ if (tmp->tag != req_tag)
+ continue;
+
+ if (!mctp_address_matches(tmp->peer_addr, daddr))
+ continue;
+
+ if (!tmp->manual_alloc)
+ continue;
+
+ spin_lock(&tmp->lock);
+ if (tmp->valid) {
+ key = tmp;
+ refcount_inc(&key->refs);
+ spin_unlock(&tmp->lock);
+ break;
+ }
+ spin_unlock(&tmp->lock);
+ }
+ spin_unlock_irqrestore(&mns->keys_lock, flags);
+
+ if (!key)
+ return ERR_PTR(-ENOENT);
+
+ if (tagp)
+ *tagp = key->tag;
+
+ return key;
+}
+
/* routing lookups */
static bool mctp_rt_match_eid(struct mctp_route *rt,
unsigned int net, mctp_eid_t eid)
@@ -845,8 +895,14 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
if (rc)
goto out_release;
- if (req_tag & MCTP_HDR_FLAG_TO) {
- key = mctp_alloc_local_tag(msk, saddr, daddr, &tag);
+ if (req_tag & MCTP_TAG_OWNER) {
+ if (req_tag & MCTP_TAG_PREALLOC)
+ key = mctp_lookup_prealloc_tag(msk, daddr,
+ req_tag, &tag);
+ else
+ key = mctp_alloc_local_tag(msk, daddr, saddr,
+ false, &tag);
+
if (IS_ERR(key)) {
rc = PTR_ERR(key);
goto out_release;
@@ -857,7 +913,7 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
tag |= MCTP_HDR_FLAG_TO;
} else {
key = NULL;
- tag = req_tag;
+ tag = req_tag & MCTP_TAG_MASK;
}
skb->protocol = htons(ETH_P_MCTP);
diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c
index 750f9f9b4daf..61205cf40074 100644
--- a/net/mctp/test/route-test.c
+++ b/net/mctp/test/route-test.c
@@ -369,14 +369,15 @@ static void mctp_test_route_input_sk(struct kunit *test)
#define FL_S (MCTP_HDR_FLAG_SOM)
#define FL_E (MCTP_HDR_FLAG_EOM)
-#define FL_T (MCTP_HDR_FLAG_TO)
+#define FL_TO (MCTP_HDR_FLAG_TO)
+#define FL_T(t) ((t) & MCTP_HDR_TAG_MASK)
static const struct mctp_route_input_sk_test mctp_route_input_sk_tests[] = {
- { .hdr = RX_HDR(1, 10, 8, FL_S | FL_E | FL_T), .type = 0, .deliver = true },
- { .hdr = RX_HDR(1, 10, 8, FL_S | FL_E | FL_T), .type = 1, .deliver = false },
+ { .hdr = RX_HDR(1, 10, 8, FL_S | FL_E | FL_TO), .type = 0, .deliver = true },
+ { .hdr = RX_HDR(1, 10, 8, FL_S | FL_E | FL_TO), .type = 1, .deliver = false },
{ .hdr = RX_HDR(1, 10, 8, FL_S | FL_E), .type = 0, .deliver = false },
- { .hdr = RX_HDR(1, 10, 8, FL_E | FL_T), .type = 0, .deliver = false },
- { .hdr = RX_HDR(1, 10, 8, FL_T), .type = 0, .deliver = false },
+ { .hdr = RX_HDR(1, 10, 8, FL_E | FL_TO), .type = 0, .deliver = false },
+ { .hdr = RX_HDR(1, 10, 8, FL_TO), .type = 0, .deliver = false },
{ .hdr = RX_HDR(1, 10, 8, 0), .type = 0, .deliver = false },
};
@@ -436,7 +437,7 @@ static void mctp_test_route_input_sk_reasm(struct kunit *test)
__mctp_route_test_fini(test, dev, rt, sock);
}
-#define RX_FRAG(f, s) RX_HDR(1, 10, 8, FL_T | (f) | ((s) << MCTP_HDR_SEQ_SHIFT))
+#define RX_FRAG(f, s) RX_HDR(1, 10, 8, FL_TO | (f) | ((s) << MCTP_HDR_SEQ_SHIFT))
static const struct mctp_route_input_sk_reasm_test mctp_route_input_sk_reasm_tests[] = {
{
@@ -522,12 +523,156 @@ static void mctp_route_input_sk_reasm_to_desc(
KUNIT_ARRAY_PARAM(mctp_route_input_sk_reasm, mctp_route_input_sk_reasm_tests,
mctp_route_input_sk_reasm_to_desc);
+struct mctp_route_input_sk_keys_test {
+ const char *name;
+ mctp_eid_t key_peer_addr;
+ mctp_eid_t key_local_addr;
+ u8 key_tag;
+ struct mctp_hdr hdr;
+ bool deliver;
+};
+
+/* test packet rx in the presence of various key configurations */
+static void mctp_test_route_input_sk_keys(struct kunit *test)
+{
+ const struct mctp_route_input_sk_keys_test *params;
+ struct mctp_test_route *rt;
+ struct sk_buff *skb, *skb2;
+ struct mctp_test_dev *dev;
+ struct mctp_sk_key *key;
+ struct netns_mctp *mns;
+ struct mctp_sock *msk;
+ struct socket *sock;
+ unsigned long flags;
+ int rc;
+ u8 c;
+
+ params = test->param_value;
+
+ dev = mctp_test_create_dev();
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
+
+ rt = mctp_test_create_route(&init_net, dev->mdev, 8, 68);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt);
+
+ rc = sock_create_kern(&init_net, AF_MCTP, SOCK_DGRAM, 0, &sock);
+ KUNIT_ASSERT_EQ(test, rc, 0);
+
+ msk = container_of(sock->sk, struct mctp_sock, sk);
+ mns = &sock_net(sock->sk)->mctp;
+
+ /* set the incoming tag according to test params */
+ key = mctp_key_alloc(msk, params->key_local_addr, params->key_peer_addr,
+ params->key_tag, GFP_KERNEL);
+
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, key);
+
+ spin_lock_irqsave(&mns->keys_lock, flags);
+ mctp_reserve_tag(&init_net, key, msk);
+ spin_unlock_irqrestore(&mns->keys_lock, flags);
+
+ /* create packet and route */
+ c = 0;
+ skb = mctp_test_create_skb_data(&params->hdr, &c);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb);
+
+ skb->dev = dev->ndev;
+ __mctp_cb(skb);
+
+ rc = mctp_route_input(&rt->rt, skb);
+
+ /* (potentially) receive message */
+ skb2 = skb_recv_datagram(sock->sk, 0, 1, &rc);
+
+ if (params->deliver)
+ KUNIT_EXPECT_NOT_ERR_OR_NULL(test, skb2);
+ else
+ KUNIT_EXPECT_PTR_EQ(test, skb2, NULL);
+
+ if (skb2)
+ skb_free_datagram(sock->sk, skb2);
+
+ mctp_key_unref(key);
+ __mctp_route_test_fini(test, dev, rt, sock);
+}
+
+static const struct mctp_route_input_sk_keys_test mctp_route_input_sk_keys_tests[] = {
+ {
+ .name = "direct match",
+ .key_peer_addr = 9,
+ .key_local_addr = 8,
+ .key_tag = 1,
+ .hdr = RX_HDR(1, 9, 8, FL_S | FL_E | FL_T(1)),
+ .deliver = true,
+ },
+ {
+ .name = "flipped src/dest",
+ .key_peer_addr = 8,
+ .key_local_addr = 9,
+ .key_tag = 1,
+ .hdr = RX_HDR(1, 9, 8, FL_S | FL_E | FL_T(1)),
+ .deliver = false,
+ },
+ {
+ .name = "peer addr mismatch",
+ .key_peer_addr = 9,
+ .key_local_addr = 8,
+ .key_tag = 1,
+ .hdr = RX_HDR(1, 10, 8, FL_S | FL_E | FL_T(1)),
+ .deliver = false,
+ },
+ {
+ .name = "tag value mismatch",
+ .key_peer_addr = 9,
+ .key_local_addr = 8,
+ .key_tag = 1,
+ .hdr = RX_HDR(1, 9, 8, FL_S | FL_E | FL_T(2)),
+ .deliver = false,
+ },
+ {
+ .name = "TO mismatch",
+ .key_peer_addr = 9,
+ .key_local_addr = 8,
+ .key_tag = 1,
+ .hdr = RX_HDR(1, 9, 8, FL_S | FL_E | FL_T(1) | FL_TO),
+ .deliver = false,
+ },
+ {
+ .name = "broadcast response",
+ .key_peer_addr = MCTP_ADDR_ANY,
+ .key_local_addr = 8,
+ .key_tag = 1,
+ .hdr = RX_HDR(1, 11, 8, FL_S | FL_E | FL_T(1)),
+ .deliver = true,
+ },
+ {
+ .name = "any local match",
+ .key_peer_addr = 12,
+ .key_local_addr = MCTP_ADDR_ANY,
+ .key_tag = 1,
+ .hdr = RX_HDR(1, 12, 8, FL_S | FL_E | FL_T(1)),
+ .deliver = true,
+ },
+};
+
+static void mctp_route_input_sk_keys_to_desc(
+ const struct mctp_route_input_sk_keys_test *t,
+ char *desc)
+{
+ sprintf(desc, "%s", t->name);
+}
+
+KUNIT_ARRAY_PARAM(mctp_route_input_sk_keys, mctp_route_input_sk_keys_tests,
+ mctp_route_input_sk_keys_to_desc);
+
static struct kunit_case mctp_test_cases[] = {
KUNIT_CASE_PARAM(mctp_test_fragment, mctp_frag_gen_params),
KUNIT_CASE_PARAM(mctp_test_rx_input, mctp_rx_input_gen_params),
KUNIT_CASE_PARAM(mctp_test_route_input_sk, mctp_route_input_sk_gen_params),
KUNIT_CASE_PARAM(mctp_test_route_input_sk_reasm,
mctp_route_input_sk_reasm_gen_params),
+ KUNIT_CASE_PARAM(mctp_test_route_input_sk_keys,
+ mctp_route_input_sk_keys_gen_params),
{}
};
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index d47795748ad7..5464c2d268bd 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -1174,14 +1174,8 @@ skip_family:
if (tb[MPTCP_PM_ADDR_ATTR_FLAGS])
entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]);
- if (tb[MPTCP_PM_ADDR_ATTR_PORT]) {
- if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) {
- NL_SET_ERR_MSG_ATTR(info->extack, attr,
- "flags must have signal when using port");
- return -EINVAL;
- }
+ if (tb[MPTCP_PM_ADDR_ATTR_PORT])
entry->addr.port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT]));
- }
return 0;
}
@@ -1227,6 +1221,11 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info)
if (ret < 0)
return ret;
+ if (addr.addr.port && !(addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) {
+ GENL_SET_ERR_MSG(info, "flags must have signal when using port");
+ return -EINVAL;
+ }
+
entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry) {
GENL_SET_ERR_MSG(info, "can't allocate addr");
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 354cb472f386..d1c9dfbb11fa 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -621,7 +621,8 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
case NF_ACCEPT:
break;
case NF_DROP:
- kfree_skb(skb);
+ kfree_skb_reason(skb,
+ SKB_DROP_REASON_NETFILTER_DROP);
ret = NF_DROP_GETERR(verdict);
if (ret == 0)
ret = -EPERM;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 67ad08320886..7e8a39a35627 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -37,6 +37,7 @@
#include <net/genetlink.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#include <net/pkt_cls.h>
#include "datapath.h"
#include "flow.h"
@@ -1601,8 +1602,6 @@ static void ovs_dp_reset_user_features(struct sk_buff *skb,
dp->user_features = 0;
}
-DEFINE_STATIC_KEY_FALSE(tc_recirc_sharing_support);
-
static int ovs_dp_set_upcall_portids(struct datapath *dp,
const struct nlattr *ids)
{
@@ -1657,7 +1656,7 @@ u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id)
static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
{
- u32 user_features = 0;
+ u32 user_features = 0, old_features = dp->user_features;
int err;
if (a[OVS_DP_ATTR_USER_FEATURES]) {
@@ -1696,10 +1695,12 @@ static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
return err;
}
- if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
- static_branch_enable(&tc_recirc_sharing_support);
- else
- static_branch_disable(&tc_recirc_sharing_support);
+ if ((dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) &&
+ !(old_features & OVS_DP_F_TC_RECIRC_SHARING))
+ tc_skb_ext_tc_enable();
+ else if (!(dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) &&
+ (old_features & OVS_DP_F_TC_RECIRC_SHARING))
+ tc_skb_ext_tc_disable();
return 0;
}
@@ -1839,6 +1840,9 @@ static void __dp_destroy(struct datapath *dp)
struct flow_table *table = &dp->table;
int i;
+ if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
+ tc_skb_ext_tc_disable();
+
for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
struct vport *vport;
struct hlist_node *n;
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index fcfe6cb46441..0cd29971a907 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -253,8 +253,6 @@ static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
extern struct notifier_block ovs_dp_device_notifier;
extern struct genl_family dp_vport_genl_family;
-DECLARE_STATIC_KEY_FALSE(tc_recirc_sharing_support);
-
void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key);
void ovs_dp_detach_port(struct vport *);
int ovs_dp_upcall(struct datapath *, struct sk_buff *,
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 02096f2ec678..f6cd24fd530c 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -34,6 +34,7 @@
#include <net/mpls.h>
#include <net/ndisc.h>
#include <net/nsh.h>
+#include <net/pkt_cls.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include "conntrack.h"
@@ -895,7 +896,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
key->mac_proto = res;
#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
- if (static_branch_unlikely(&tc_recirc_sharing_support)) {
+ if (tc_skb_ext_tc_enabled()) {
tc_ext = skb_ext_find(skb, TC_SKB_EXT);
key->recirc_id = tc_ext ? tc_ext->chain : 0;
OVS_CB(skb)->mru = tc_ext ? tc_ext->mru : 0;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 5f0f346b576f..ff1e6b474fef 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -49,6 +49,23 @@ static LIST_HEAD(tcf_proto_base);
/* Protects list of registered TC modules. It is pure SMP lock. */
static DEFINE_RWLOCK(cls_mod_lock);
+#ifdef CONFIG_NET_CLS_ACT
+DEFINE_STATIC_KEY_FALSE(tc_skb_ext_tc);
+EXPORT_SYMBOL(tc_skb_ext_tc);
+
+void tc_skb_ext_tc_enable(void)
+{
+ static_branch_inc(&tc_skb_ext_tc);
+}
+EXPORT_SYMBOL(tc_skb_ext_tc_enable);
+
+void tc_skb_ext_tc_disable(void)
+{
+ static_branch_dec(&tc_skb_ext_tc);
+}
+EXPORT_SYMBOL(tc_skb_ext_tc_disable);
+#endif
+
static u32 destroy_obj_hashfn(const struct tcf_proto *tp)
{
return jhash_3words(tp->chain->index, tp->prio,
@@ -1615,19 +1632,21 @@ int tcf_classify(struct sk_buff *skb,
ret = __tcf_classify(skb, tp, orig_tp, res, compat_mode,
&last_executed_chain);
- /* If we missed on some chain */
- if (ret == TC_ACT_UNSPEC && last_executed_chain) {
- struct tc_skb_cb *cb = tc_skb_cb(skb);
-
- ext = tc_skb_ext_alloc(skb);
- if (WARN_ON_ONCE(!ext))
- return TC_ACT_SHOT;
- ext->chain = last_executed_chain;
- ext->mru = cb->mru;
- ext->post_ct = cb->post_ct;
- ext->post_ct_snat = cb->post_ct_snat;
- ext->post_ct_dnat = cb->post_ct_dnat;
- ext->zone = cb->zone;
+ if (tc_skb_ext_tc_enabled()) {
+ /* If we missed on some chain */
+ if (ret == TC_ACT_UNSPEC && last_executed_chain) {
+ struct tc_skb_cb *cb = tc_skb_cb(skb);
+
+ ext = tc_skb_ext_alloc(skb);
+ if (WARN_ON_ONCE(!ext))
+ return TC_ACT_SHOT;
+ ext->chain = last_executed_chain;
+ ext->mru = cb->mru;
+ ext->post_ct = cb->post_ct;
+ ext->post_ct_snat = cb->post_ct_snat;
+ ext->post_ct_dnat = cb->post_ct_dnat;
+ ext->zone = cb->zone;
+ }
}
return ret;
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index efc84845bb6b..0024a692f0f8 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1433,7 +1433,8 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
if (*zc && (out_iov || out_sg)) {
if (out_iov)
- n_sgout = iov_iter_npages(out_iov, INT_MAX) + 1;
+ n_sgout = 1 +
+ iov_iter_npages_cap(out_iov, INT_MAX, data_len);
else
n_sgout = sg_nents(out_sg);
n_sgin = skb_nsg(skb, rxm->offset + prot->prepend_size,
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 28ef3f4465ae..2abd64e4d589 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -343,9 +343,9 @@ out:
}
EXPORT_SYMBOL(xsk_tx_peek_desc);
-static u32 xsk_tx_peek_release_fallback(struct xsk_buff_pool *pool, struct xdp_desc *descs,
- u32 max_entries)
+static u32 xsk_tx_peek_release_fallback(struct xsk_buff_pool *pool, u32 max_entries)
{
+ struct xdp_desc *descs = pool->tx_descs;
u32 nb_pkts = 0;
while (nb_pkts < max_entries && xsk_tx_peek_desc(pool, &descs[nb_pkts]))
@@ -355,8 +355,7 @@ static u32 xsk_tx_peek_release_fallback(struct xsk_buff_pool *pool, struct xdp_d
return nb_pkts;
}
-u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *descs,
- u32 max_entries)
+u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max_entries)
{
struct xdp_sock *xs;
u32 nb_pkts;
@@ -365,7 +364,7 @@ u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *
if (!list_is_singular(&pool->xsk_tx_list)) {
/* Fallback to the non-batched version */
rcu_read_unlock();
- return xsk_tx_peek_release_fallback(pool, descs, max_entries);
+ return xsk_tx_peek_release_fallback(pool, max_entries);
}
xs = list_first_or_null_rcu(&pool->xsk_tx_list, struct xdp_sock, tx_list);
@@ -374,7 +373,7 @@ u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *
goto out;
}
- nb_pkts = xskq_cons_peek_desc_batch(xs->tx, descs, pool, max_entries);
+ nb_pkts = xskq_cons_peek_desc_batch(xs->tx, pool, max_entries);
if (!nb_pkts) {
xs->tx->queue_empty_descs++;
goto out;
@@ -386,7 +385,7 @@ u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *
* packets. This avoids having to implement any buffering in
* the Tx path.
*/
- nb_pkts = xskq_prod_reserve_addr_batch(pool->cq, descs, nb_pkts);
+ nb_pkts = xskq_prod_reserve_addr_batch(pool->cq, pool->tx_descs, nb_pkts);
if (!nb_pkts)
goto out;
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index fd39bb660ebc..b34fca6ada86 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -37,6 +37,7 @@ void xp_destroy(struct xsk_buff_pool *pool)
if (!pool)
return;
+ kvfree(pool->tx_descs);
kvfree(pool->heads);
kvfree(pool);
}
@@ -58,6 +59,12 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
if (!pool->heads)
goto out;
+ if (xs->tx) {
+ pool->tx_descs = kcalloc(xs->tx->nentries, sizeof(*pool->tx_descs), GFP_KERNEL);
+ if (!pool->tx_descs)
+ goto out;
+ }
+
pool->chunk_mask = ~((u64)umem->chunk_size - 1);
pool->addrs_cnt = umem->size;
pool->heads_cnt = umem->chunks;
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index e9aa2c236356..801cda5d1938 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -205,11 +205,11 @@ static inline bool xskq_cons_read_desc(struct xsk_queue *q,
return false;
}
-static inline u32 xskq_cons_read_desc_batch(struct xsk_queue *q,
- struct xdp_desc *descs,
- struct xsk_buff_pool *pool, u32 max)
+static inline u32 xskq_cons_read_desc_batch(struct xsk_queue *q, struct xsk_buff_pool *pool,
+ u32 max)
{
u32 cached_cons = q->cached_cons, nb_entries = 0;
+ struct xdp_desc *descs = pool->tx_descs;
while (cached_cons != q->cached_prod && nb_entries < max) {
struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
@@ -282,12 +282,12 @@ static inline bool xskq_cons_peek_desc(struct xsk_queue *q,
return xskq_cons_read_desc(q, desc, pool);
}
-static inline u32 xskq_cons_peek_desc_batch(struct xsk_queue *q, struct xdp_desc *descs,
- struct xsk_buff_pool *pool, u32 max)
+static inline u32 xskq_cons_peek_desc_batch(struct xsk_queue *q, struct xsk_buff_pool *pool,
+ u32 max)
{
u32 entries = xskq_cons_nb_entries(q, max);
- return xskq_cons_read_desc_batch(q, descs, pool, entries);
+ return xskq_cons_read_desc_batch(q, pool, entries);
}
/* To improve performance in the xskq_cons_release functions, only update local state here.
@@ -304,13 +304,6 @@ static inline void xskq_cons_release_n(struct xsk_queue *q, u32 cnt)
q->cached_cons += cnt;
}
-static inline bool xskq_cons_is_full(struct xsk_queue *q)
-{
- /* No barriers needed since data is not accessed */
- return READ_ONCE(q->ring->producer) - READ_ONCE(q->ring->consumer) ==
- q->nentries;
-}
-
static inline u32 xskq_cons_present_entries(struct xsk_queue *q)
{
/* No barriers needed since data is not accessed */