From 6e116280b41b0cbfd90dfe9fa66e07ff348d50d5 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Mon, 25 Jul 2022 10:51:30 +0200 Subject: net: netfilter: Remove ifdefs for code shared by BPF and ctnetlink The current ifdefry for code shared by the BPF and ctnetlink side looks ugly. As per Pablo's request, simplify this by unconditionally compiling in the code. This can be revisited when the shared code between the two grows further. Suggested-by: Pablo Neira Ayuso Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20220725085130.11553-1-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/net/netfilter/nf_conntrack_core.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 3cd3a6e631aa..b2b9de70d9f4 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -86,10 +86,6 @@ extern spinlock_t nf_conntrack_expect_lock; /* ctnetlink code shared by both ctnetlink and nf_conntrack_bpf */ -#if (IS_BUILTIN(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \ - (IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES) || \ - IS_ENABLED(CONFIG_NF_CT_NETLINK)) - static inline void __nf_ct_set_timeout(struct nf_conn *ct, u64 timeout) { if (timeout > INT_MAX) @@ -101,6 +97,4 @@ int __nf_ct_change_timeout(struct nf_conn *ct, u64 cta_timeout); void __nf_ct_change_status(struct nf_conn *ct, unsigned long on, unsigned long off); int nf_ct_change_status_common(struct nf_conn *ct, unsigned int status); -#endif - #endif /* _NF_CONNTRACK_CORE_H */ -- cgit v1.2.3-70-g09d2 From 52327d2e3996f2cac7df6de02623fd8d80fb4c85 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Mon, 15 Aug 2022 11:04:17 +0800 Subject: net: sched: remove the unused return value of unregister_qdisc Return value of unregister_qdisc is unused, remove it. Signed-off-by: Zhengchao Shao Link: https://lore.kernel.org/r/20220815030417.271894-1-shaozhengchao@huawei.com Signed-off-by: Jakub Kicinski --- include/net/pkt_sched.h | 2 +- net/sched/sch_api.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 3372a1f67cf4..29f65632ebc5 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -100,7 +100,7 @@ struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops, struct netlink_ext_ack *extack); int register_qdisc(struct Qdisc_ops *qops); -int unregister_qdisc(struct Qdisc_ops *qops); +void unregister_qdisc(struct Qdisc_ops *qops); void qdisc_get_default(char *id, size_t len); int qdisc_set_default(const char *id); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index bf87b50837a8..8b4d575a3bbd 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -171,7 +171,7 @@ out_einval: } EXPORT_SYMBOL(register_qdisc); -int unregister_qdisc(struct Qdisc_ops *qops) +void unregister_qdisc(struct Qdisc_ops *qops) { struct Qdisc_ops *q, **qp; int err = -ENOENT; @@ -186,7 +186,8 @@ int unregister_qdisc(struct Qdisc_ops *qops) err = 0; } write_unlock(&qdisc_mod_lock); - return err; + + WARN(err, "unregister qdisc(%s) failed\n", qops->id); } EXPORT_SYMBOL(unregister_qdisc); -- cgit v1.2.3-70-g09d2 From 24426654ed3ae83d1127511891fb782c54f49203 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 16 Aug 2022 23:17:17 -0700 Subject: bpf: net: Avoid sk_setsockopt() taking sk lock when called from bpf Most of the code in bpf_setsockopt(SOL_SOCKET) are duplicated from the sk_setsockopt(). The number of supported optnames are increasing ever and so as the duplicated code. One issue in reusing sk_setsockopt() is that the bpf prog has already acquired the sk lock. This patch adds a has_current_bpf_ctx() to tell if the sk_setsockopt() is called from a bpf prog. The bpf prog calling bpf_setsockopt() is either running in_task() or in_serving_softirq(). Both cases have the current->bpf_ctx initialized. Thus, the has_current_bpf_ctx() only needs to test !!current->bpf_ctx. This patch also adds sockopt_{lock,release}_sock() helpers for sk_setsockopt() to use. These helpers will test has_current_bpf_ctx() before acquiring/releasing the lock. They are in EXPORT_SYMBOL for the ipv6 module to use in a latter patch. Note on the change in sock_setbindtodevice(). sockopt_lock_sock() is done in sock_setbindtodevice() instead of doing the lock_sock in sock_bindtoindex(..., lock_sk = true). Reviewed-by: Stanislav Fomichev Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20220817061717.4175589-1-kafai@fb.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 13 +++++++++++++ include/net/sock.h | 3 +++ net/core/sock.c | 30 +++++++++++++++++++++++++++--- 3 files changed, 43 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a627a02cf8ab..39bd36359c1e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1966,6 +1966,15 @@ static inline bool unprivileged_ebpf_enabled(void) return !sysctl_unprivileged_bpf_disabled; } +/* Not all bpf prog type has the bpf_ctx. + * For the bpf prog type that has initialized the bpf_ctx, + * this function can be used to decide if a kernel function + * is called by a bpf program. + */ +static inline bool has_current_bpf_ctx(void) +{ + return !!current->bpf_ctx; +} #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@ -2175,6 +2184,10 @@ static inline bool unprivileged_ebpf_enabled(void) return false; } +static inline bool has_current_bpf_ctx(void) +{ + return false; +} #endif /* CONFIG_BPF_SYSCALL */ void __bpf_free_used_btfs(struct bpf_prog_aux *aux, diff --git a/include/net/sock.h b/include/net/sock.h index 05a1bbdf5805..352b9458fdc6 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1749,6 +1749,9 @@ static inline void unlock_sock_fast(struct sock *sk, bool slow) } } +void sockopt_lock_sock(struct sock *sk); +void sockopt_release_sock(struct sock *sk); + /* Used by processes to "lock" a socket state, so that * interrupts and bottom half handlers won't change it * from under us. It essentially blocks any incoming diff --git a/net/core/sock.c b/net/core/sock.c index 20269c37ab3b..d3683228376f 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -703,7 +703,9 @@ static int sock_setbindtodevice(struct sock *sk, sockptr_t optval, int optlen) goto out; } - return sock_bindtoindex(sk, index, true); + sockopt_lock_sock(sk); + ret = sock_bindtoindex_locked(sk, index); + sockopt_release_sock(sk); out: #endif @@ -1036,6 +1038,28 @@ static int sock_reserve_memory(struct sock *sk, int bytes) return 0; } +void sockopt_lock_sock(struct sock *sk) +{ + /* When current->bpf_ctx is set, the setsockopt is called from + * a bpf prog. bpf has ensured the sk lock has been + * acquired before calling setsockopt(). + */ + if (has_current_bpf_ctx()) + return; + + lock_sock(sk); +} +EXPORT_SYMBOL(sockopt_lock_sock); + +void sockopt_release_sock(struct sock *sk) +{ + if (has_current_bpf_ctx()) + return; + + release_sock(sk); +} +EXPORT_SYMBOL(sockopt_release_sock); + /* * This is meant for all protocols to use and covers goings on * at the socket level. Everything here is generic. @@ -1067,7 +1091,7 @@ static int sk_setsockopt(struct sock *sk, int level, int optname, valbool = val ? 1 : 0; - lock_sock(sk); + sockopt_lock_sock(sk); switch (optname) { case SO_DEBUG: @@ -1496,7 +1520,7 @@ set_sndbuf: ret = -ENOPROTOOPT; break; } - release_sock(sk); + sockopt_release_sock(sk); return ret; } -- cgit v1.2.3-70-g09d2 From e42c7beee71d0d84a6193357e3525d0cf2a3e168 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 16 Aug 2022 23:17:23 -0700 Subject: bpf: net: Consider has_current_bpf_ctx() when testing capable() in sk_setsockopt() When bpf program calling bpf_setsockopt(SOL_SOCKET), it could be run in softirq and doesn't make sense to do the capable check. There was a similar situation in bpf_setsockopt(TCP_CONGESTION). In commit 8d650cdedaab ("tcp: fix tcp_set_congestion_control() use from bpf hook"), tcp_set_congestion_control(..., cap_net_admin) was added to skip the cap check for bpf prog. This patch adds sockopt_ns_capable() and sockopt_capable() for the sk_setsockopt() to use. They will consider the has_current_bpf_ctx() before doing the ns_capable() and capable() test. They are in EXPORT_SYMBOL for the ipv6 module to use in a latter patch. Suggested-by: Stanislav Fomichev Reviewed-by: Stanislav Fomichev Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20220817061723.4175820-1-kafai@fb.com Signed-off-by: Alexei Starovoitov --- include/net/sock.h | 2 ++ net/core/sock.c | 38 +++++++++++++++++++++++++------------- 2 files changed, 27 insertions(+), 13 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 352b9458fdc6..13089d88a2e2 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1751,6 +1751,8 @@ static inline void unlock_sock_fast(struct sock *sk, bool slow) void sockopt_lock_sock(struct sock *sk); void sockopt_release_sock(struct sock *sk); +bool sockopt_ns_capable(struct user_namespace *ns, int cap); +bool sockopt_capable(int cap); /* Used by processes to "lock" a socket state, so that * interrupts and bottom half handlers won't change it diff --git a/net/core/sock.c b/net/core/sock.c index d3683228376f..7ea46e4700fd 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1060,6 +1060,18 @@ void sockopt_release_sock(struct sock *sk) } EXPORT_SYMBOL(sockopt_release_sock); +bool sockopt_ns_capable(struct user_namespace *ns, int cap) +{ + return has_current_bpf_ctx() || ns_capable(ns, cap); +} +EXPORT_SYMBOL(sockopt_ns_capable); + +bool sockopt_capable(int cap) +{ + return has_current_bpf_ctx() || capable(cap); +} +EXPORT_SYMBOL(sockopt_capable); + /* * This is meant for all protocols to use and covers goings on * at the socket level. Everything here is generic. @@ -1095,7 +1107,7 @@ static int sk_setsockopt(struct sock *sk, int level, int optname, switch (optname) { case SO_DEBUG: - if (val && !capable(CAP_NET_ADMIN)) + if (val && !sockopt_capable(CAP_NET_ADMIN)) ret = -EACCES; else sock_valbool_flag(sk, SOCK_DBG, valbool); @@ -1139,7 +1151,7 @@ set_sndbuf: break; case SO_SNDBUFFORCE: - if (!capable(CAP_NET_ADMIN)) { + if (!sockopt_capable(CAP_NET_ADMIN)) { ret = -EPERM; break; } @@ -1161,7 +1173,7 @@ set_sndbuf: break; case SO_RCVBUFFORCE: - if (!capable(CAP_NET_ADMIN)) { + if (!sockopt_capable(CAP_NET_ADMIN)) { ret = -EPERM; break; } @@ -1188,8 +1200,8 @@ set_sndbuf: case SO_PRIORITY: if ((val >= 0 && val <= 6) || - ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) || - ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) || + sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) sk->sk_priority = val; else ret = -EPERM; @@ -1334,8 +1346,8 @@ set_sndbuf: clear_bit(SOCK_PASSSEC, &sock->flags); break; case SO_MARK: - if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) && - !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { + if (!sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) && + !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { ret = -EPERM; break; } @@ -1343,8 +1355,8 @@ set_sndbuf: __sock_set_mark(sk, val); break; case SO_RCVMARK: - if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) && - !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { + if (!sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) && + !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { ret = -EPERM; break; } @@ -1378,7 +1390,7 @@ set_sndbuf: #ifdef CONFIG_NET_RX_BUSY_POLL case SO_BUSY_POLL: /* allow unprivileged users to decrease the value */ - if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN)) + if ((val > sk->sk_ll_usec) && !sockopt_capable(CAP_NET_ADMIN)) ret = -EPERM; else { if (val < 0) @@ -1388,13 +1400,13 @@ set_sndbuf: } break; case SO_PREFER_BUSY_POLL: - if (valbool && !capable(CAP_NET_ADMIN)) + if (valbool && !sockopt_capable(CAP_NET_ADMIN)) ret = -EPERM; else WRITE_ONCE(sk->sk_prefer_busy_poll, valbool); break; case SO_BUSY_POLL_BUDGET: - if (val > READ_ONCE(sk->sk_busy_poll_budget) && !capable(CAP_NET_ADMIN)) { + if (val > READ_ONCE(sk->sk_busy_poll_budget) && !sockopt_capable(CAP_NET_ADMIN)) { ret = -EPERM; } else { if (val < 0 || val > U16_MAX) @@ -1465,7 +1477,7 @@ set_sndbuf: * scheduler has enough safe guards. */ if (sk_txtime.clockid != CLOCK_MONOTONIC && - !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { + !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { ret = -EPERM; break; } -- cgit v1.2.3-70-g09d2 From 29003875bd5bab262a29d1c6e76a2124bd07e4c2 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 16 Aug 2022 23:18:04 -0700 Subject: bpf: Change bpf_setsockopt(SOL_SOCKET) to reuse sk_setsockopt() After the prep work in the previous patches, this patch removes most of the dup code from bpf_setsockopt(SOL_SOCKET) and reuses them from sk_setsockopt(). The sock ptr test is added to the SO_RCVLOWAT because the sk->sk_socket could be NULL in some of the bpf hooks. The existing optname white-list is refactored into a new function sol_socket_setsockopt(). Reviewed-by: Stanislav Fomichev Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20220817061804.4178920-1-kafai@fb.com Signed-off-by: Alexei Starovoitov --- include/net/sock.h | 2 + net/core/filter.c | 124 +++++++++++++---------------------------------------- net/core/sock.c | 6 +-- 3 files changed, 34 insertions(+), 98 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 13089d88a2e2..ee44b424d952 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1828,6 +1828,8 @@ void sock_pfree(struct sk_buff *skb); #define sock_edemux sock_efree #endif +int sk_setsockopt(struct sock *sk, int level, int optname, + sockptr_t optval, unsigned int optlen); int sock_setsockopt(struct socket *sock, int level, int op, sockptr_t optval, unsigned int optlen); diff --git a/net/core/filter.c b/net/core/filter.c index a663d7b96bad..6f5bcc8df487 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5013,109 +5013,43 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = { .arg1_type = ARG_PTR_TO_CTX, }; +static int sol_socket_setsockopt(struct sock *sk, int optname, + char *optval, int optlen) +{ + switch (optname) { + case SO_SNDBUF: + case SO_RCVBUF: + case SO_KEEPALIVE: + case SO_PRIORITY: + case SO_REUSEPORT: + case SO_RCVLOWAT: + case SO_MARK: + case SO_MAX_PACING_RATE: + case SO_BINDTOIFINDEX: + case SO_TXREHASH: + if (optlen != sizeof(int)) + return -EINVAL; + break; + case SO_BINDTODEVICE: + break; + default: + return -EINVAL; + } + + return sk_setsockopt(sk, SOL_SOCKET, optname, + KERNEL_SOCKPTR(optval), optlen); +} + static int __bpf_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen) { - char devname[IFNAMSIZ]; - int val, valbool; - struct net *net; - int ifindex; - int ret = 0; + int val, ret = 0; if (!sk_fullsock(sk)) return -EINVAL; if (level == SOL_SOCKET) { - if (optlen != sizeof(int) && optname != SO_BINDTODEVICE) - return -EINVAL; - val = *((int *)optval); - valbool = val ? 1 : 0; - - /* Only some socketops are supported */ - switch (optname) { - case SO_RCVBUF: - val = min_t(u32, val, sysctl_rmem_max); - val = min_t(int, val, INT_MAX / 2); - sk->sk_userlocks |= SOCK_RCVBUF_LOCK; - WRITE_ONCE(sk->sk_rcvbuf, - max_t(int, val * 2, SOCK_MIN_RCVBUF)); - break; - case SO_SNDBUF: - val = min_t(u32, val, sysctl_wmem_max); - val = min_t(int, val, INT_MAX / 2); - sk->sk_userlocks |= SOCK_SNDBUF_LOCK; - WRITE_ONCE(sk->sk_sndbuf, - max_t(int, val * 2, SOCK_MIN_SNDBUF)); - break; - case SO_MAX_PACING_RATE: /* 32bit version */ - if (val != ~0U) - cmpxchg(&sk->sk_pacing_status, - SK_PACING_NONE, - SK_PACING_NEEDED); - sk->sk_max_pacing_rate = (val == ~0U) ? - ~0UL : (unsigned int)val; - sk->sk_pacing_rate = min(sk->sk_pacing_rate, - sk->sk_max_pacing_rate); - break; - case SO_PRIORITY: - sk->sk_priority = val; - break; - case SO_RCVLOWAT: - if (val < 0) - val = INT_MAX; - if (sk->sk_socket && sk->sk_socket->ops->set_rcvlowat) - ret = sk->sk_socket->ops->set_rcvlowat(sk, val); - else - WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); - break; - case SO_MARK: - if (sk->sk_mark != val) { - sk->sk_mark = val; - sk_dst_reset(sk); - } - break; - case SO_BINDTODEVICE: - optlen = min_t(long, optlen, IFNAMSIZ - 1); - strncpy(devname, optval, optlen); - devname[optlen] = 0; - - ifindex = 0; - if (devname[0] != '\0') { - struct net_device *dev; - - ret = -ENODEV; - - net = sock_net(sk); - dev = dev_get_by_name(net, devname); - if (!dev) - break; - ifindex = dev->ifindex; - dev_put(dev); - } - fallthrough; - case SO_BINDTOIFINDEX: - if (optname == SO_BINDTOIFINDEX) - ifindex = val; - ret = sock_bindtoindex(sk, ifindex, false); - break; - case SO_KEEPALIVE: - if (sk->sk_prot->keepalive) - sk->sk_prot->keepalive(sk, valbool); - sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool); - break; - case SO_REUSEPORT: - sk->sk_reuseport = valbool; - break; - case SO_TXREHASH: - if (val < -1 || val > 1) { - ret = -EINVAL; - break; - } - sk->sk_txrehash = (u8)val; - break; - default: - ret = -EINVAL; - } + return sol_socket_setsockopt(sk, optname, optval, optlen); } else if (IS_ENABLED(CONFIG_INET) && level == SOL_IP) { if (optlen != sizeof(int) || sk->sk_family != AF_INET) return -EINVAL; diff --git a/net/core/sock.c b/net/core/sock.c index 7ea46e4700fd..2a6f84702eb9 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1077,8 +1077,8 @@ EXPORT_SYMBOL(sockopt_capable); * at the socket level. Everything here is generic. */ -static int sk_setsockopt(struct sock *sk, int level, int optname, - sockptr_t optval, unsigned int optlen) +int sk_setsockopt(struct sock *sk, int level, int optname, + sockptr_t optval, unsigned int optlen) { struct so_timestamping timestamping; struct socket *sock = sk->sk_socket; @@ -1264,7 +1264,7 @@ set_sndbuf: case SO_RCVLOWAT: if (val < 0) val = INT_MAX; - if (sock->ops->set_rcvlowat) + if (sock && sock->ops->set_rcvlowat) ret = sock->ops->set_rcvlowat(sk, val); else WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); -- cgit v1.2.3-70-g09d2 From 0c751f7071ef98d334ed06ca3f8f4cc1f7458cf5 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 16 Aug 2022 23:18:19 -0700 Subject: bpf: Change bpf_setsockopt(SOL_TCP) to reuse do_tcp_setsockopt() After the prep work in the previous patches, this patch removes all the dup code from bpf_setsockopt(SOL_TCP) and reuses the do_tcp_setsockopt(). The existing optname white-list is refactored into a new function sol_tcp_setsockopt(). The sol_tcp_setsockopt() also calls the bpf_sol_tcp_setsockopt() to handle the TCP_BPF_XXX specific optnames. bpf_setsockopt(TCP_SAVE_SYN) now also allows a value 2 to save the eth header also and it comes for free from do_tcp_setsockopt(). Reviewed-by: Stanislav Fomichev Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20220817061819.4180146-1-kafai@fb.com Signed-off-by: Alexei Starovoitov --- include/net/tcp.h | 2 ++ net/core/filter.c | 97 +++++++++++++++++-------------------------------------- net/ipv4/tcp.c | 4 +-- 3 files changed, 34 insertions(+), 69 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index d10962b9f0d0..c03a50c72f40 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -405,6 +405,8 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); bool tcp_bpf_bypass_getsockopt(int level, int optname); +int do_tcp_setsockopt(struct sock *sk, int level, int optname, + sockptr_t optval, unsigned int optlen); int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); void tcp_set_keepalive(struct sock *sk, int val); diff --git a/net/core/filter.c b/net/core/filter.c index bb135d456a53..66877605bb78 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5086,6 +5086,34 @@ static int bpf_sol_tcp_setsockopt(struct sock *sk, int optname, return 0; } +static int sol_tcp_setsockopt(struct sock *sk, int optname, + char *optval, int optlen) +{ + if (sk->sk_prot->setsockopt != tcp_setsockopt) + return -EINVAL; + + switch (optname) { + case TCP_KEEPIDLE: + case TCP_KEEPINTVL: + case TCP_KEEPCNT: + case TCP_SYNCNT: + case TCP_WINDOW_CLAMP: + case TCP_USER_TIMEOUT: + case TCP_NOTSENT_LOWAT: + case TCP_SAVE_SYN: + if (optlen != sizeof(int)) + return -EINVAL; + break; + case TCP_CONGESTION: + break; + default: + return bpf_sol_tcp_setsockopt(sk, optname, optval, optlen); + } + + return do_tcp_setsockopt(sk, SOL_TCP, optname, + KERNEL_SOCKPTR(optval), optlen); +} + static int __bpf_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen) { @@ -5138,73 +5166,8 @@ static int __bpf_setsockopt(struct sock *sk, int level, int optname, default: ret = -EINVAL; } - } else if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP && - sk->sk_prot->setsockopt == tcp_setsockopt) { - if (optname >= TCP_BPF_IW) - return bpf_sol_tcp_setsockopt(sk, optname, - optval, optlen); - - if (optname == TCP_CONGESTION) { - char name[TCP_CA_NAME_MAX]; - - strncpy(name, optval, min_t(long, optlen, - TCP_CA_NAME_MAX-1)); - name[TCP_CA_NAME_MAX-1] = 0; - ret = tcp_set_congestion_control(sk, name, false, true); - } else { - struct inet_connection_sock *icsk = inet_csk(sk); - struct tcp_sock *tp = tcp_sk(sk); - - if (optlen != sizeof(int)) - return -EINVAL; - - val = *((int *)optval); - /* Only some options are supported */ - switch (optname) { - case TCP_SAVE_SYN: - if (val < 0 || val > 1) - ret = -EINVAL; - else - tp->save_syn = val; - break; - case TCP_KEEPIDLE: - ret = tcp_sock_set_keepidle_locked(sk, val); - break; - case TCP_KEEPINTVL: - if (val < 1 || val > MAX_TCP_KEEPINTVL) - ret = -EINVAL; - else - tp->keepalive_intvl = val * HZ; - break; - case TCP_KEEPCNT: - if (val < 1 || val > MAX_TCP_KEEPCNT) - ret = -EINVAL; - else - tp->keepalive_probes = val; - break; - case TCP_SYNCNT: - if (val < 1 || val > MAX_TCP_SYNCNT) - ret = -EINVAL; - else - icsk->icsk_syn_retries = val; - break; - case TCP_USER_TIMEOUT: - if (val < 0) - ret = -EINVAL; - else - icsk->icsk_user_timeout = val; - break; - case TCP_NOTSENT_LOWAT: - tp->notsent_lowat = val; - sk->sk_write_space(sk); - break; - case TCP_WINDOW_CLAMP: - ret = tcp_set_window_clamp(sk, val); - break; - default: - ret = -EINVAL; - } - } + } else if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP) { + return sol_tcp_setsockopt(sk, optname, optval, optlen); } else { ret = -EINVAL; } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index cfed84b1883f..a6986f201f92 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3479,8 +3479,8 @@ int tcp_set_window_clamp(struct sock *sk, int val) /* * Socket option code for TCP. */ -static int do_tcp_setsockopt(struct sock *sk, int level, int optname, - sockptr_t optval, unsigned int optlen) +int do_tcp_setsockopt(struct sock *sk, int level, int optname, + sockptr_t optval, unsigned int optlen) { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); -- cgit v1.2.3-70-g09d2 From ee7f1e1302f5cb29168f70827c12855f1d8c9845 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 16 Aug 2022 23:18:26 -0700 Subject: bpf: Change bpf_setsockopt(SOL_IP) to reuse do_ip_setsockopt() After the prep work in the previous patches, this patch removes the dup code from bpf_setsockopt(SOL_IP) and reuses the implementation in do_ip_setsockopt(). The existing optname white-list is refactored into a new function sol_ip_setsockopt(). NOTE, the current bpf_setsockopt(IP_TOS) is quite different from the the do_ip_setsockopt(IP_TOS). For example, it does not take the INET_ECN_MASK into the account for tcp and also does not adjust sk->sk_priority. It looks like the current bpf_setsockopt(IP_TOS) was referencing the IPV6_TCLASS implementation instead of IP_TOS. This patch tries to rectify that by using the do_ip_setsockopt(IP_TOS). While this is a behavior change, the do_ip_setsockopt(IP_TOS) behavior is arguably what the user is expecting. At least, the INET_ECN_MASK bits should be masked out for tcp. Reviewed-by: Stanislav Fomichev Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20220817061826.4180990-1-kafai@fb.com Signed-off-by: Alexei Starovoitov --- include/net/ip.h | 2 ++ net/core/filter.c | 40 ++++++++++++++++++++-------------------- net/ipv4/ip_sockglue.c | 4 ++-- 3 files changed, 24 insertions(+), 22 deletions(-) (limited to 'include/net') diff --git a/include/net/ip.h b/include/net/ip.h index 1c979fd1904c..34fa5b0f0a0e 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -743,6 +743,8 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk, int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, bool allow_ipv6); DECLARE_STATIC_KEY_FALSE(ip4_min_ttl); +int do_ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, + unsigned int optlen); int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); int ip_getsockopt(struct sock *sk, int level, int optname, char __user *optval, diff --git a/net/core/filter.c b/net/core/filter.c index 66877605bb78..4d1b42b8f4a8 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5114,6 +5114,25 @@ static int sol_tcp_setsockopt(struct sock *sk, int optname, KERNEL_SOCKPTR(optval), optlen); } +static int sol_ip_setsockopt(struct sock *sk, int optname, + char *optval, int optlen) +{ + if (sk->sk_family != AF_INET) + return -EINVAL; + + switch (optname) { + case IP_TOS: + if (optlen != sizeof(int)) + return -EINVAL; + break; + default: + return -EINVAL; + } + + return do_ip_setsockopt(sk, SOL_IP, optname, + KERNEL_SOCKPTR(optval), optlen); +} + static int __bpf_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen) { @@ -5125,26 +5144,7 @@ static int __bpf_setsockopt(struct sock *sk, int level, int optname, if (level == SOL_SOCKET) { return sol_socket_setsockopt(sk, optname, optval, optlen); } else if (IS_ENABLED(CONFIG_INET) && level == SOL_IP) { - if (optlen != sizeof(int) || sk->sk_family != AF_INET) - return -EINVAL; - - val = *((int *)optval); - /* Only some options are supported */ - switch (optname) { - case IP_TOS: - if (val < -1 || val > 0xff) { - ret = -EINVAL; - } else { - struct inet_sock *inet = inet_sk(sk); - - if (val == -1) - val = 0; - inet->tos = val; - } - break; - default: - ret = -EINVAL; - } + return sol_ip_setsockopt(sk, optname, optval, optlen); } else if (IS_ENABLED(CONFIG_IPV6) && level == SOL_IPV6) { if (optlen != sizeof(int) || sk->sk_family != AF_INET6) return -EINVAL; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index a3c496580e6b..751fa69cb557 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -888,8 +888,8 @@ static int compat_ip_mcast_join_leave(struct sock *sk, int optname, DEFINE_STATIC_KEY_FALSE(ip4_min_ttl); -static int do_ip_setsockopt(struct sock *sk, int level, int optname, - sockptr_t optval, unsigned int optlen) +int do_ip_setsockopt(struct sock *sk, int level, int optname, + sockptr_t optval, unsigned int optlen) { struct inet_sock *inet = inet_sk(sk); struct net *net = sock_net(sk); -- cgit v1.2.3-70-g09d2 From 75b64b68ee3f9fe90ad8f21e5c5c92de58abf725 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 16 Aug 2022 23:18:34 -0700 Subject: bpf: Change bpf_setsockopt(SOL_IPV6) to reuse do_ipv6_setsockopt() After the prep work in the previous patches, this patch removes the dup code from bpf_setsockopt(SOL_IPV6) and reuses the implementation in do_ipv6_setsockopt(). ipv6 could be compiled as a module. Like how other code solved it with stubs in ipv6_stubs.h, this patch adds the do_ipv6_setsockopt to the ipv6_bpf_stub. The current bpf_setsockopt(IPV6_TCLASS) does not take the INET_ECN_MASK into the account for tcp. The do_ipv6_setsockopt(IPV6_TCLASS) will handle it correctly. The existing optname white-list is refactored into a new function sol_ipv6_setsockopt(). After this last SOL_IPV6 dup code removal, the __bpf_setsockopt() is simplified enough that the extra "{ }" around the if statement can be removed. Reviewed-by: Stanislav Fomichev Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20220817061834.4181198-1-kafai@fb.com Signed-off-by: Alexei Starovoitov --- include/net/ipv6.h | 2 ++ include/net/ipv6_stubs.h | 2 ++ net/core/filter.c | 56 ++++++++++++++++++++++-------------------------- net/ipv6/af_inet6.c | 1 + net/ipv6/ipv6_sockglue.c | 4 ++-- 5 files changed, 33 insertions(+), 32 deletions(-) (limited to 'include/net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index de9dcc5652c4..c110d9032083 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1156,6 +1156,8 @@ struct in6_addr *fl6_update_dst(struct flowi6 *fl6, */ DECLARE_STATIC_KEY_FALSE(ip6_min_hopcount); +int do_ipv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, + unsigned int optlen); int ipv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); int ipv6_getsockopt(struct sock *sk, int level, int optname, diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index 45e0339be6fa..8692698b01cf 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -81,6 +81,8 @@ struct ipv6_bpf_stub { const struct in6_addr *daddr, __be16 dport, int dif, int sdif, struct udp_table *tbl, struct sk_buff *skb); + int (*ipv6_setsockopt)(struct sock *sk, int level, int optname, + sockptr_t optval, unsigned int optlen); }; extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly; diff --git a/net/core/filter.c b/net/core/filter.c index 4d1b42b8f4a8..23282b8cf61e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5133,45 +5133,41 @@ static int sol_ip_setsockopt(struct sock *sk, int optname, KERNEL_SOCKPTR(optval), optlen); } +static int sol_ipv6_setsockopt(struct sock *sk, int optname, + char *optval, int optlen) +{ + if (sk->sk_family != AF_INET6) + return -EINVAL; + + switch (optname) { + case IPV6_TCLASS: + if (optlen != sizeof(int)) + return -EINVAL; + break; + default: + return -EINVAL; + } + + return ipv6_bpf_stub->ipv6_setsockopt(sk, SOL_IPV6, optname, + KERNEL_SOCKPTR(optval), optlen); +} + static int __bpf_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen) { - int val, ret = 0; - if (!sk_fullsock(sk)) return -EINVAL; - if (level == SOL_SOCKET) { + if (level == SOL_SOCKET) return sol_socket_setsockopt(sk, optname, optval, optlen); - } else if (IS_ENABLED(CONFIG_INET) && level == SOL_IP) { + else if (IS_ENABLED(CONFIG_INET) && level == SOL_IP) return sol_ip_setsockopt(sk, optname, optval, optlen); - } else if (IS_ENABLED(CONFIG_IPV6) && level == SOL_IPV6) { - if (optlen != sizeof(int) || sk->sk_family != AF_INET6) - return -EINVAL; - - val = *((int *)optval); - /* Only some options are supported */ - switch (optname) { - case IPV6_TCLASS: - if (val < -1 || val > 0xff) { - ret = -EINVAL; - } else { - struct ipv6_pinfo *np = inet6_sk(sk); - - if (val == -1) - val = 0; - np->tclass = val; - } - break; - default: - ret = -EINVAL; - } - } else if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP) { + else if (IS_ENABLED(CONFIG_IPV6) && level == SOL_IPV6) + return sol_ipv6_setsockopt(sk, optname, optval, optlen); + else if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP) return sol_tcp_setsockopt(sk, optname, optval, optlen); - } else { - ret = -EINVAL; - } - return ret; + + return -EINVAL; } static int _bpf_setsockopt(struct sock *sk, int level, int optname, diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 2ce0c44d0081..cadc97852787 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -1057,6 +1057,7 @@ static const struct ipv6_stub ipv6_stub_impl = { static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = { .inet6_bind = __inet6_bind, .udp6_lib_lookup = __udp6_lib_lookup, + .ipv6_setsockopt = do_ipv6_setsockopt, }; static int __init inet6_init(void) diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index d23de48ff612..a4535bdbd310 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -391,8 +391,8 @@ sticky_done: return err; } -static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, - sockptr_t optval, unsigned int optlen) +int do_ipv6_setsockopt(struct sock *sk, int level, int optname, + sockptr_t optval, unsigned int optlen) { struct ipv6_pinfo *np = inet6_sk(sk); struct net *net = sock_net(sk); -- cgit v1.2.3-70-g09d2 From 5d81757835859760a38a54a87eff856d4e578836 Mon Sep 17 00:00:00 2001 From: Emeel Hakim Date: Thu, 18 Aug 2022 18:32:30 +0300 Subject: net: macsec: Expose MACSEC_SALT_LEN definition to user space Expose MACSEC_SALT_LEN definition to user space to be used in various user space applications such as iproute. Iproute will use this as part of adding macsec extended packet number support. Reviewed-by: Raed Salem Reviewed-by: Sabrina Dubroca Signed-off-by: Emeel Hakim Link: https://lore.kernel.org/r/20220818153229.4721-1-ehakim@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/macsec.h | 1 - include/uapi/linux/if_macsec.h | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/macsec.h b/include/net/macsec.h index d6fa6b97f6ef..73780aa73644 100644 --- a/include/net/macsec.h +++ b/include/net/macsec.h @@ -14,7 +14,6 @@ #define MACSEC_DEFAULT_PN_LEN 4 #define MACSEC_XPN_PN_LEN 8 -#define MACSEC_SALT_LEN 12 #define MACSEC_NUM_AN 4 /* 2 bits for the association number */ typedef u64 __bitwise sci_t; diff --git a/include/uapi/linux/if_macsec.h b/include/uapi/linux/if_macsec.h index 3af2aa069a36..d5b6d1f37353 100644 --- a/include/uapi/linux/if_macsec.h +++ b/include/uapi/linux/if_macsec.h @@ -22,6 +22,8 @@ #define MACSEC_KEYID_LEN 16 +#define MACSEC_SALT_LEN 12 + /* cipher IDs as per IEEE802.1AE-2018 (Table 14-1) */ #define MACSEC_CIPHER_ID_GCM_AES_128 0x0080C20001000001ULL #define MACSEC_CIPHER_ID_GCM_AES_256 0x0080C20001000002ULL -- cgit v1.2.3-70-g09d2 From 1202cdd665315c525b5237e96e0bedc76d7e754f Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 17 Aug 2022 17:43:21 -0700 Subject: Remove DECnet support from kernel DECnet is an obsolete network protocol that receives more attention from kernel janitors than users. It belongs in computer protocol history museum not in Linux kernel. It has been "Orphaned" in kernel since 2010. The iproute2 support for DECnet was dropped in 5.0 release. The documentation link on Sourceforge says it is abandoned there as well. Leave the UAPI alone to keep userspace programs compiling. This means that there is still an empty neighbour table for AF_DECNET. The table of /proc/sys/net entries was updated to match current directories and reformatted to be alphabetical. Signed-off-by: Stephen Hemminger Acked-by: David Ahern Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- Documentation/admin-guide/kernel-parameters.txt | 4 - Documentation/admin-guide/sysctl/net.rst | 15 +- Documentation/networking/decnet.rst | 243 -- Documentation/networking/index.rst | 1 - Documentation/userspace-api/ioctl/ioctl-number.rst | 1 - MAINTAINERS | 7 - arch/mips/configs/decstation_64_defconfig | 2 - arch/mips/configs/decstation_defconfig | 2 - arch/mips/configs/decstation_r4k_defconfig | 2 - arch/mips/configs/gpr_defconfig | 2 - arch/mips/configs/mtx1_defconfig | 2 - arch/mips/configs/rm200_defconfig | 2 - arch/powerpc/configs/ppc6xx_defconfig | 2 - include/linux/netdevice.h | 4 - include/linux/netfilter.h | 5 - include/linux/netfilter_defs.h | 8 - include/net/dn.h | 231 -- include/net/dn_dev.h | 200 -- include/net/dn_fib.h | 169 -- include/net/dn_neigh.h | 32 - include/net/dn_nsp.h | 201 -- include/net/dn_route.h | 118 - include/net/netns/netfilter.h | 3 - include/uapi/linux/dn.h | 149 -- include/uapi/linux/netfilter_decnet.h | 72 - include/uapi/linux/netlink.h | 2 +- net/Kconfig | 2 - net/Makefile | 1 - net/core/dev.c | 4 +- net/core/neighbour.c | 3 - net/decnet/Kconfig | 43 - net/decnet/Makefile | 10 - net/decnet/README | 8 - net/decnet/af_decnet.c | 2404 -------------------- net/decnet/dn_dev.c | 1433 ------------ net/decnet/dn_fib.c | 798 ------- net/decnet/dn_neigh.c | 607 ----- net/decnet/dn_nsp_in.c | 907 -------- net/decnet/dn_nsp_out.c | 696 ------ net/decnet/dn_route.c | 1922 ---------------- net/decnet/dn_rules.c | 253 -- net/decnet/dn_table.c | 929 -------- net/decnet/dn_timer.c | 104 - net/decnet/netfilter/Kconfig | 17 - net/decnet/netfilter/Makefile | 6 - net/decnet/netfilter/dn_rtmsg.c | 158 -- net/decnet/sysctl_net_decnet.c | 362 --- net/netfilter/core.c | 10 - net/netfilter/nfnetlink_hook.c | 7 - 49 files changed, 10 insertions(+), 12153 deletions(-) delete mode 100644 Documentation/networking/decnet.rst delete mode 100644 include/net/dn.h delete mode 100644 include/net/dn_dev.h delete mode 100644 include/net/dn_fib.h delete mode 100644 include/net/dn_neigh.h delete mode 100644 include/net/dn_nsp.h delete mode 100644 include/net/dn_route.h delete mode 100644 include/uapi/linux/dn.h delete mode 100644 include/uapi/linux/netfilter_decnet.h delete mode 100644 net/decnet/Kconfig delete mode 100644 net/decnet/Makefile delete mode 100644 net/decnet/README delete mode 100644 net/decnet/af_decnet.c delete mode 100644 net/decnet/dn_dev.c delete mode 100644 net/decnet/dn_fib.c delete mode 100644 net/decnet/dn_neigh.c delete mode 100644 net/decnet/dn_nsp_in.c delete mode 100644 net/decnet/dn_nsp_out.c delete mode 100644 net/decnet/dn_route.c delete mode 100644 net/decnet/dn_rules.c delete mode 100644 net/decnet/dn_table.c delete mode 100644 net/decnet/dn_timer.c delete mode 100644 net/decnet/netfilter/Kconfig delete mode 100644 net/decnet/netfilter/Makefile delete mode 100644 net/decnet/netfilter/dn_rtmsg.c delete mode 100644 net/decnet/sysctl_net_decnet.c (limited to 'include/net') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index d7f30902fda0..adfda56b2691 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -966,10 +966,6 @@ debugpat [X86] Enable PAT debugging - decnet.addr= [HW,NET] - Format: [,] - See also Documentation/networking/decnet.rst. - default_hugepagesz= [HW] The size of the default HugeTLB page. This is the size represented by the legacy /proc/ hugepages diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst index 805f2281e000..82879a9d5683 100644 --- a/Documentation/admin-guide/sysctl/net.rst +++ b/Documentation/admin-guide/sysctl/net.rst @@ -34,13 +34,14 @@ Table : Subdirectories in /proc/sys/net ========= =================== = ========== ================== Directory Content Directory Content ========= =================== = ========== ================== - core General parameter appletalk Appletalk protocol - unix Unix domain sockets netrom NET/ROM - 802 E802 protocol ax25 AX25 - ethernet Ethernet protocol rose X.25 PLP layer - ipv4 IP version 4 x25 X.25 protocol - bridge Bridging decnet DEC net - ipv6 IP version 6 tipc TIPC + 802 E802 protocol mptcp Multipath TCP + appletalk Appletalk protocol netfilter Network Filter + ax25 AX25 netrom NET/ROM + bridge Bridging rose X.25 PLP layer + core General parameter tipc TIPC + ethernet Ethernet protocol unix Unix domain sockets + ipv4 IP version 4 x25 X.25 protocol + ipv6 IP version 6 ========= =================== = ========== ================== 1. /proc/sys/net/core - Network core options diff --git a/Documentation/networking/decnet.rst b/Documentation/networking/decnet.rst deleted file mode 100644 index b8bc11ff8370..000000000000 --- a/Documentation/networking/decnet.rst +++ /dev/null @@ -1,243 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 - -========================================= -Linux DECnet Networking Layer Information -========================================= - -1. Other documentation.... -========================== - - - Project Home Pages - - http://www.chygwyn.com/ - Kernel info - - http://linux-decnet.sourceforge.net/ - Userland tools - - http://www.sourceforge.net/projects/linux-decnet/ - Status page - -2. Configuring the kernel -========================= - -Be sure to turn on the following options: - - - CONFIG_DECNET (obviously) - - CONFIG_PROC_FS (to see what's going on) - - CONFIG_SYSCTL (for easy configuration) - -if you want to try out router support (not properly debugged yet) -you'll need the following options as well... - - - CONFIG_DECNET_ROUTER (to be able to add/delete routes) - - CONFIG_NETFILTER (will be required for the DECnet routing daemon) - -Don't turn on SIOCGIFCONF support for DECnet unless you are really sure -that you need it, in general you won't and it can cause ifconfig to -malfunction. - -Run time configuration has changed slightly from the 2.4 system. If you -want to configure an endnode, then the simplified procedure is as follows: - - - Set the MAC address on your ethernet card before starting _any_ other - network protocols. - -As soon as your network card is brought into the UP state, DECnet should -start working. If you need something more complicated or are unsure how -to set the MAC address, see the next section. Also all configurations which -worked with 2.4 will work under 2.5 with no change. - -3. Command line options -======================= - -You can set a DECnet address on the kernel command line for compatibility -with the 2.4 configuration procedure, but in general it's not needed any more. -If you do st a DECnet address on the command line, it has only one purpose -which is that its added to the addresses on the loopback device. - -With 2.4 kernels, DECnet would only recognise addresses as local if they -were added to the loopback device. In 2.5, any local interface address -can be used to loop back to the local machine. Of course this does not -prevent you adding further addresses to the loopback device if you -want to. - -N.B. Since the address list of an interface determines the addresses for -which "hello" messages are sent, if you don't set an address on the loopback -interface then you won't see any entries in /proc/net/neigh for the local -host until such time as you start a connection. This doesn't affect the -operation of the local communications in any other way though. - -The kernel command line takes options looking like the following:: - - decnet.addr=1,2 - -the two numbers are the node address 1,2 = 1.2 For 2.2.xx kernels -and early 2.3.xx kernels, you must use a comma when specifying the -DECnet address like this. For more recent 2.3.xx kernels, you may -use almost any character except space, although a `.` would be the most -obvious choice :-) - -There used to be a third number specifying the node type. This option -has gone away in favour of a per interface node type. This is now set -using /proc/sys/net/decnet/conf//forwarding. This file can be -set with a single digit, 0=EndNode, 1=L1 Router and 2=L2 Router. - -There are also equivalent options for modules. The node address can -also be set through the /proc/sys/net/decnet/ files, as can other system -parameters. - -Currently the only supported devices are ethernet and ip_gre. The -ethernet address of your ethernet card has to be set according to the DECnet -address of the node in order for it to be autoconfigured (and then appear in -/proc/net/decnet_dev). There is a utility available at the above -FTP sites called dn2ethaddr which can compute the correct ethernet -address to use. The address can be set by ifconfig either before or -at the time the device is brought up. If you are using RedHat you can -add the line:: - - MACADDR=AA:00:04:00:03:04 - -or something similar, to /etc/sysconfig/network-scripts/ifcfg-eth0 or -wherever your network card's configuration lives. Setting the MAC address -of your ethernet card to an address starting with "hi-ord" will cause a -DECnet address which matches to be added to the interface (which you can -verify with iproute2). - -The default device for routing can be set through the /proc filesystem -by setting /proc/sys/net/decnet/default_device to the -device you want DECnet to route packets out of when no specific route -is available. Usually this will be eth0, for example:: - - echo -n "eth0" >/proc/sys/net/decnet/default_device - -If you don't set the default device, then it will default to the first -ethernet card which has been autoconfigured as described above. You can -confirm that by looking in the default_device file of course. - -There is a list of what the other files under /proc/sys/net/decnet/ do -on the kernel patch web site (shown above). - -4. Run time kernel configuration -================================ - - -This is either done through the sysctl/proc interface (see the kernel web -pages for details on what the various options do) or through the iproute2 -package in the same way as IPv4/6 configuration is performed. - -Documentation for iproute2 is included with the package, although there is -as yet no specific section on DECnet, most of the features apply to both -IP and DECnet, albeit with DECnet addresses instead of IP addresses and -a reduced functionality. - -If you want to configure a DECnet router you'll need the iproute2 package -since its the _only_ way to add and delete routes currently. Eventually -there will be a routing daemon to send and receive routing messages for -each interface and update the kernel routing tables accordingly. The -routing daemon will use netfilter to listen to routing packets, and -rtnetlink to update the kernels routing tables. - -The DECnet raw socket layer has been removed since it was there purely -for use by the routing daemon which will now use netfilter (a much cleaner -and more generic solution) instead. - -5. How can I tell if its working? -================================= - -Here is a quick guide of what to look for in order to know if your DECnet -kernel subsystem is working. - - - Is the node address set (see /proc/sys/net/decnet/node_address) - - Is the node of the correct type - (see /proc/sys/net/decnet/conf//forwarding) - - Is the Ethernet MAC address of each Ethernet card set to match - the DECnet address. If in doubt use the dn2ethaddr utility available - at the ftp archive. - - If the previous two steps are satisfied, and the Ethernet card is up, - you should find that it is listed in /proc/net/decnet_dev and also - that it appears as a directory in /proc/sys/net/decnet/conf/. The - loopback device (lo) should also appear and is required to communicate - within a node. - - If you have any DECnet routers on your network, they should appear - in /proc/net/decnet_neigh, otherwise this file will only contain the - entry for the node itself (if it doesn't check to see if lo is up). - - If you want to send to any node which is not listed in the - /proc/net/decnet_neigh file, you'll need to set the default device - to point to an Ethernet card with connection to a router. This is - again done with the /proc/sys/net/decnet/default_device file. - - Try starting a simple server and client, like the dnping/dnmirror - over the loopback interface. With luck they should communicate. - For this step and those after, you'll need the DECnet library - which can be obtained from the above ftp sites as well as the - actual utilities themselves. - - If this seems to work, then try talking to a node on your local - network, and see if you can obtain the same results. - - At this point you are on your own... :-) - -6. How to send a bug report -=========================== - -If you've found a bug and want to report it, then there are several things -you can do to help me work out exactly what it is that is wrong. Useful -information (_most_ of which _is_ _essential_) includes: - - - What kernel version are you running ? - - What version of the patch are you running ? - - How far though the above set of tests can you get ? - - What is in the /proc/decnet* files and /proc/sys/net/decnet/* files ? - - Which services are you running ? - - Which client caused the problem ? - - How much data was being transferred ? - - Was the network congested ? - - How can the problem be reproduced ? - - Can you use tcpdump to get a trace ? (N.B. Most (all?) versions of - tcpdump don't understand how to dump DECnet properly, so including - the hex listing of the packet contents is _essential_, usually the -x flag. - You may also need to increase the length grabbed with the -s flag. The - -e flag also provides very useful information (ethernet MAC addresses)) - -7. MAC FAQ -========== - -A quick FAQ on ethernet MAC addresses to explain how Linux and DECnet -interact and how to get the best performance from your hardware. - -Ethernet cards are designed to normally only pass received network frames -to a host computer when they are addressed to it, or to the broadcast address. - -Linux has an interface which allows the setting of extra addresses for -an ethernet card to listen to. If the ethernet card supports it, the -filtering operation will be done in hardware, if not the extra unwanted packets -received will be discarded by the host computer. In the latter case, -significant processor time and bus bandwidth can be used up on a busy -network (see the NAPI documentation for a longer explanation of these -effects). - -DECnet makes use of this interface to allow running DECnet on an ethernet -card which has already been configured using TCP/IP (presumably using the -built in MAC address of the card, as usual) and/or to allow multiple DECnet -addresses on each physical interface. If you do this, be aware that if your -ethernet card doesn't support perfect hashing in its MAC address filter -then your computer will be doing more work than required. Some cards -will simply set themselves into promiscuous mode in order to receive -packets from the DECnet specified addresses. So if you have one of these -cards its better to set the MAC address of the card as described above -to gain the best efficiency. Better still is to use a card which supports -NAPI as well. - - -8. Mailing list -=============== - -If you are keen to get involved in development, or want to ask questions -about configuration, or even just report bugs, then there is a mailing -list that you can join, details are at: - -http://sourceforge.net/mail/?group_id=4993 - -9. Legal Info -============= - -The Linux DECnet project team have placed their code under the GPL. The -software is provided "as is" and without warranty express or implied. -DECnet is a trademark of Compaq. This software is not a product of -Compaq. We acknowledge the help of people at Compaq in providing extra -documentation above and beyond what was previously publicly available. - -Steve Whitehouse - diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst index 03b215bddde8..bacadd09e570 100644 --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst @@ -47,7 +47,6 @@ Contents: cdc_mbim dccp dctcp - decnet dns_resolver driver eql diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst index 3b985b19f39d..5f81e2a24a5c 100644 --- a/Documentation/userspace-api/ioctl/ioctl-number.rst +++ b/Documentation/userspace-api/ioctl/ioctl-number.rst @@ -308,7 +308,6 @@ Code Seq# Include File Comments 0x89 00-06 arch/x86/include/asm/sockios.h 0x89 0B-DF linux/sockios.h 0x89 E0-EF linux/sockios.h SIOCPROTOPRIVATE range -0x89 E0-EF linux/dn.h PROTOPRIVATE range 0x89 F0-FF linux/sockios.h SIOCDEVPRIVATE range 0x8B all linux/wireless.h 0x8C 00-3F WiNRADiO driver diff --git a/MAINTAINERS b/MAINTAINERS index f512b430c7cb..61a34f63633c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5719,13 +5719,6 @@ F: include/linux/tfrc.h F: include/uapi/linux/dccp.h F: net/dccp/ -DECnet NETWORK LAYER -L: linux-decnet-user@lists.sourceforge.net -S: Orphan -W: http://linux-decnet.sourceforge.net -F: Documentation/networking/decnet.rst -F: net/decnet/ - DECSTATION PLATFORM SUPPORT M: "Maciej W. Rozycki" L: linux-mips@vger.kernel.org diff --git a/arch/mips/configs/decstation_64_defconfig b/arch/mips/configs/decstation_64_defconfig index 0021427a1bbe..4044f2829759 100644 --- a/arch/mips/configs/decstation_64_defconfig +++ b/arch/mips/configs/decstation_64_defconfig @@ -53,8 +53,6 @@ CONFIG_IPV6_SUBTREES=y CONFIG_NETWORK_SECMARK=y CONFIG_IP_SCTP=m CONFIG_VLAN_8021Q=m -CONFIG_DECNET=m -CONFIG_DECNET_ROUTER=y # CONFIG_WIRELESS is not set # CONFIG_UEVENT_HELPER is not set # CONFIG_FW_LOADER is not set diff --git a/arch/mips/configs/decstation_defconfig b/arch/mips/configs/decstation_defconfig index 7a97a0818ce4..157fc57520a7 100644 --- a/arch/mips/configs/decstation_defconfig +++ b/arch/mips/configs/decstation_defconfig @@ -49,8 +49,6 @@ CONFIG_IPV6_SUBTREES=y CONFIG_NETWORK_SECMARK=y CONFIG_IP_SCTP=m CONFIG_VLAN_8021Q=m -CONFIG_DECNET=m -CONFIG_DECNET_ROUTER=y # CONFIG_WIRELESS is not set # CONFIG_UEVENT_HELPER is not set # CONFIG_FW_LOADER is not set diff --git a/arch/mips/configs/decstation_r4k_defconfig b/arch/mips/configs/decstation_r4k_defconfig index a0643363526d..f73c26ebfc83 100644 --- a/arch/mips/configs/decstation_r4k_defconfig +++ b/arch/mips/configs/decstation_r4k_defconfig @@ -48,8 +48,6 @@ CONFIG_IPV6_SUBTREES=y CONFIG_NETWORK_SECMARK=y CONFIG_IP_SCTP=m CONFIG_VLAN_8021Q=m -CONFIG_DECNET=m -CONFIG_DECNET_ROUTER=y # CONFIG_WIRELESS is not set # CONFIG_UEVENT_HELPER is not set # CONFIG_FW_LOADER is not set diff --git a/arch/mips/configs/gpr_defconfig b/arch/mips/configs/gpr_defconfig index d82f4ebf687f..ce8a444957c1 100644 --- a/arch/mips/configs/gpr_defconfig +++ b/arch/mips/configs/gpr_defconfig @@ -69,7 +69,6 @@ CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_DECNET_NF_GRABULATOR=m CONFIG_BRIDGE_NF_EBTABLES=m CONFIG_BRIDGE_EBT_BROUTE=m CONFIG_BRIDGE_EBT_T_FILTER=m @@ -99,7 +98,6 @@ CONFIG_ATM_MPOA=m CONFIG_ATM_BR2684=m CONFIG_BRIDGE=m CONFIG_VLAN_8021Q=m -CONFIG_DECNET=m CONFIG_LLC2=m CONFIG_ATALK=m CONFIG_DEV_APPLETALK=m diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig index 4194e79b435c..1339c062a042 100644 --- a/arch/mips/configs/mtx1_defconfig +++ b/arch/mips/configs/mtx1_defconfig @@ -116,7 +116,6 @@ CONFIG_IP6_NF_FILTER=m CONFIG_IP6_NF_TARGET_REJECT=m CONFIG_IP6_NF_MANGLE=m CONFIG_IP6_NF_RAW=m -CONFIG_DECNET_NF_GRABULATOR=m CONFIG_BRIDGE_NF_EBTABLES=m CONFIG_BRIDGE_EBT_BROUTE=m CONFIG_BRIDGE_EBT_T_FILTER=m @@ -146,7 +145,6 @@ CONFIG_ATM_MPOA=m CONFIG_ATM_BR2684=m CONFIG_BRIDGE=m CONFIG_VLAN_8021Q=m -CONFIG_DECNET=m CONFIG_LLC2=m CONFIG_ATALK=m CONFIG_DEV_APPLETALK=m diff --git a/arch/mips/configs/rm200_defconfig b/arch/mips/configs/rm200_defconfig index 7d6f235e8ccb..04c681bd716e 100644 --- a/arch/mips/configs/rm200_defconfig +++ b/arch/mips/configs/rm200_defconfig @@ -116,7 +116,6 @@ CONFIG_IP6_NF_FILTER=m CONFIG_IP6_NF_TARGET_REJECT=m CONFIG_IP6_NF_MANGLE=m CONFIG_IP6_NF_RAW=m -CONFIG_DECNET_NF_GRABULATOR=m CONFIG_BRIDGE_NF_EBTABLES=m CONFIG_BRIDGE_EBT_BROUTE=m CONFIG_BRIDGE_EBT_T_FILTER=m @@ -137,7 +136,6 @@ CONFIG_BRIDGE_EBT_REDIRECT=m CONFIG_BRIDGE_EBT_SNAT=m CONFIG_BRIDGE_EBT_LOG=m CONFIG_BRIDGE=m -CONFIG_DECNET=m CONFIG_NET_SCHED=y CONFIG_NET_SCH_CBQ=m CONFIG_NET_SCH_HTB=m diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index 91967824272e..a24f484bfbd2 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -243,8 +243,6 @@ CONFIG_ATM_LANE=m CONFIG_ATM_BR2684=m CONFIG_BRIDGE=m CONFIG_VLAN_8021Q=m -CONFIG_DECNET=m -CONFIG_DECNET_ROUTER=y CONFIG_ATALK=m CONFIG_DEV_APPLETALK=m CONFIG_IPDDP=m diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1a3cb93c3dcc..64e8662632f8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1837,7 +1837,6 @@ enum netdev_ml_priv_type { * @tipc_ptr: TIPC specific data * @atalk_ptr: AppleTalk link * @ip_ptr: IPv4 specific data - * @dn_ptr: DECnet specific data * @ip6_ptr: IPv6 specific data * @ax25_ptr: AX.25 specific data * @ieee80211_ptr: IEEE 802.11 specific data, assign before registering @@ -2133,9 +2132,6 @@ struct net_device { #if IS_ENABLED(CONFIG_ATALK) void *atalk_ptr; #endif -#if IS_ENABLED(CONFIG_DECNET) - struct dn_dev __rcu *dn_ptr; -#endif #if IS_ENABLED(CONFIG_AX25) void *ax25_ptr; #endif diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index c2c6f332fb90..d8817d381c14 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -243,11 +243,6 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, hook_head = rcu_dereference(net->nf.hooks_bridge[hook]); #endif break; -#if IS_ENABLED(CONFIG_DECNET) - case NFPROTO_DECNET: - hook_head = rcu_dereference(net->nf.hooks_decnet[hook]); - break; -#endif default: WARN_ON_ONCE(1); break; diff --git a/include/linux/netfilter_defs.h b/include/linux/netfilter_defs.h index 8dddfb151f00..a5f7bef1b3a4 100644 --- a/include/linux/netfilter_defs.h +++ b/include/linux/netfilter_defs.h @@ -7,14 +7,6 @@ /* in/out/forward only */ #define NF_ARP_NUMHOOKS 3 -/* max hook is NF_DN_ROUTE (6), also see uapi/linux/netfilter_decnet.h */ -#define NF_DN_NUMHOOKS 7 - -#if IS_ENABLED(CONFIG_DECNET) -/* Largest hook number + 1, see uapi/linux/netfilter_decnet.h */ -#define NF_MAX_HOOKS NF_DN_NUMHOOKS -#else #define NF_MAX_HOOKS NF_INET_NUMHOOKS -#endif #endif diff --git a/include/net/dn.h b/include/net/dn.h deleted file mode 100644 index ba9655b0098a..000000000000 --- a/include/net/dn.h +++ /dev/null @@ -1,231 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NET_DN_H -#define _NET_DN_H - -#include -#include -#include -#include -#include - -struct dn_scp /* Session Control Port */ -{ - unsigned char state; -#define DN_O 1 /* Open */ -#define DN_CR 2 /* Connect Receive */ -#define DN_DR 3 /* Disconnect Reject */ -#define DN_DRC 4 /* Discon. Rej. Complete*/ -#define DN_CC 5 /* Connect Confirm */ -#define DN_CI 6 /* Connect Initiate */ -#define DN_NR 7 /* No resources */ -#define DN_NC 8 /* No communication */ -#define DN_CD 9 /* Connect Delivery */ -#define DN_RJ 10 /* Rejected */ -#define DN_RUN 11 /* Running */ -#define DN_DI 12 /* Disconnect Initiate */ -#define DN_DIC 13 /* Disconnect Complete */ -#define DN_DN 14 /* Disconnect Notificat */ -#define DN_CL 15 /* Closed */ -#define DN_CN 16 /* Closed Notification */ - - __le16 addrloc; - __le16 addrrem; - __u16 numdat; - __u16 numoth; - __u16 numoth_rcv; - __u16 numdat_rcv; - __u16 ackxmt_dat; - __u16 ackxmt_oth; - __u16 ackrcv_dat; - __u16 ackrcv_oth; - __u8 flowrem_sw; - __u8 flowloc_sw; -#define DN_SEND 2 -#define DN_DONTSEND 1 -#define DN_NOCHANGE 0 - __u16 flowrem_dat; - __u16 flowrem_oth; - __u16 flowloc_dat; - __u16 flowloc_oth; - __u8 services_rem; - __u8 services_loc; - __u8 info_rem; - __u8 info_loc; - - __u16 segsize_rem; - __u16 segsize_loc; - - __u8 nonagle; - __u8 multi_ireq; - __u8 accept_mode; - unsigned long seg_total; /* Running total of current segment */ - - struct optdata_dn conndata_in; - struct optdata_dn conndata_out; - struct optdata_dn discdata_in; - struct optdata_dn discdata_out; - struct accessdata_dn accessdata; - - struct sockaddr_dn addr; /* Local address */ - struct sockaddr_dn peer; /* Remote address */ - - /* - * In this case the RTT estimation is not specified in the - * docs, nor is any back off algorithm. Here we follow well - * known tcp algorithms with a few small variations. - * - * snd_window: Max number of packets we send before we wait for - * an ack to come back. This will become part of a - * more complicated scheme when we support flow - * control. - * - * nsp_srtt: Round-Trip-Time (x8) in jiffies. This is a rolling - * average. - * nsp_rttvar: Round-Trip-Time-Varience (x4) in jiffies. This is the - * varience of the smoothed average (but calculated in - * a simpler way than for normal statistical varience - * calculations). - * - * nsp_rxtshift: Backoff counter. Value is zero normally, each time - * a packet is lost is increases by one until an ack - * is received. Its used to index an array of backoff - * multipliers. - */ -#define NSP_MIN_WINDOW 1 -#define NSP_MAX_WINDOW (0x07fe) - unsigned long max_window; - unsigned long snd_window; -#define NSP_INITIAL_SRTT (HZ) - unsigned long nsp_srtt; -#define NSP_INITIAL_RTTVAR (HZ*3) - unsigned long nsp_rttvar; -#define NSP_MAXRXTSHIFT 12 - unsigned long nsp_rxtshift; - - /* - * Output queues, one for data, one for otherdata/linkservice - */ - struct sk_buff_head data_xmit_queue; - struct sk_buff_head other_xmit_queue; - - /* - * Input queue for other data - */ - struct sk_buff_head other_receive_queue; - int other_report; - - /* - * Stuff to do with the slow timer - */ - unsigned long stamp; /* time of last transmit */ - unsigned long persist; - int (*persist_fxn)(struct sock *sk); - unsigned long keepalive; - void (*keepalive_fxn)(struct sock *sk); - -}; - -static inline struct dn_scp *DN_SK(struct sock *sk) -{ - return (struct dn_scp *)(sk + 1); -} - -/* - * src,dst : Source and Destination DECnet addresses - * hops : Number of hops through the network - * dst_port, src_port : NSP port numbers - * services, info : Useful data extracted from conninit messages - * rt_flags : Routing flags byte - * nsp_flags : NSP layer flags byte - * segsize : Size of segment - * segnum : Number, for data, otherdata and linkservice - * xmit_count : Number of times we've transmitted this skb - * stamp : Time stamp of most recent transmission, used in RTT calculations - * iif: Input interface number - * - * As a general policy, this structure keeps all addresses in network - * byte order, and all else in host byte order. Thus dst, src, dst_port - * and src_port are in network order. All else is in host order. - * - */ -#define DN_SKB_CB(skb) ((struct dn_skb_cb *)(skb)->cb) -struct dn_skb_cb { - __le16 dst; - __le16 src; - __u16 hops; - __le16 dst_port; - __le16 src_port; - __u8 services; - __u8 info; - __u8 rt_flags; - __u8 nsp_flags; - __u16 segsize; - __u16 segnum; - __u16 xmit_count; - unsigned long stamp; - int iif; -}; - -static inline __le16 dn_eth2dn(const unsigned char *ethaddr) -{ - return get_unaligned((__le16 *)(ethaddr + 4)); -} - -static inline __le16 dn_saddr2dn(struct sockaddr_dn *saddr) -{ - return *(__le16 *)saddr->sdn_nodeaddr; -} - -static inline void dn_dn2eth(unsigned char *ethaddr, __le16 addr) -{ - __u16 a = le16_to_cpu(addr); - ethaddr[0] = 0xAA; - ethaddr[1] = 0x00; - ethaddr[2] = 0x04; - ethaddr[3] = 0x00; - ethaddr[4] = (__u8)(a & 0xff); - ethaddr[5] = (__u8)(a >> 8); -} - -static inline void dn_sk_ports_copy(struct flowidn *fld, struct dn_scp *scp) -{ - fld->fld_sport = scp->addrloc; - fld->fld_dport = scp->addrrem; -} - -unsigned int dn_mss_from_pmtu(struct net_device *dev, int mtu); -void dn_register_sysctl(void); -void dn_unregister_sysctl(void); - -#define DN_MENUVER_ACC 0x01 -#define DN_MENUVER_USR 0x02 -#define DN_MENUVER_PRX 0x04 -#define DN_MENUVER_UIC 0x08 - -struct sock *dn_sklist_find_listener(struct sockaddr_dn *addr); -struct sock *dn_find_by_skb(struct sk_buff *skb); -#define DN_ASCBUF_LEN 9 -char *dn_addr2asc(__u16, char *); -int dn_destroy_timer(struct sock *sk); - -int dn_sockaddr2username(struct sockaddr_dn *addr, unsigned char *buf, - unsigned char type); -int dn_username2sockaddr(unsigned char *data, int len, struct sockaddr_dn *addr, - unsigned char *type); - -void dn_start_slow_timer(struct sock *sk); -void dn_stop_slow_timer(struct sock *sk); - -extern __le16 decnet_address; -extern int decnet_debug_level; -extern int decnet_time_wait; -extern int decnet_dn_count; -extern int decnet_di_count; -extern int decnet_dr_count; -extern int decnet_no_fc_max_cwnd; - -extern long sysctl_decnet_mem[3]; -extern int sysctl_decnet_wmem[3]; -extern int sysctl_decnet_rmem[3]; - -#endif /* _NET_DN_H */ diff --git a/include/net/dn_dev.h b/include/net/dn_dev.h deleted file mode 100644 index bec303ea8367..000000000000 --- a/include/net/dn_dev.h +++ /dev/null @@ -1,200 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NET_DN_DEV_H -#define _NET_DN_DEV_H - -#include - -struct dn_dev; - -struct dn_ifaddr { - struct dn_ifaddr __rcu *ifa_next; - struct dn_dev *ifa_dev; - __le16 ifa_local; - __le16 ifa_address; - __u32 ifa_flags; - __u8 ifa_scope; - char ifa_label[IFNAMSIZ]; - struct rcu_head rcu; -}; - -#define DN_DEV_S_RU 0 /* Run - working normally */ -#define DN_DEV_S_CR 1 /* Circuit Rejected */ -#define DN_DEV_S_DS 2 /* Data Link Start */ -#define DN_DEV_S_RI 3 /* Routing Layer Initialize */ -#define DN_DEV_S_RV 4 /* Routing Layer Verify */ -#define DN_DEV_S_RC 5 /* Routing Layer Complete */ -#define DN_DEV_S_OF 6 /* Off */ -#define DN_DEV_S_HA 7 /* Halt */ - - -/* - * The dn_dev_parms structure contains the set of parameters - * for each device (hence inclusion in the dn_dev structure) - * and an array is used to store the default types of supported - * device (in dn_dev.c). - * - * The type field matches the ARPHRD_ constants and is used in - * searching the list for supported devices when new devices - * come up. - * - * The mode field is used to find out if a device is broadcast, - * multipoint, or pointopoint. Please note that DECnet thinks - * different ways about devices to the rest of the kernel - * so the normal IFF_xxx flags are invalid here. For devices - * which can be any combination of the previously mentioned - * attributes, you can set this on a per device basis by - * installing an up() routine. - * - * The device state field, defines the initial state in which the - * device will come up. In the dn_dev structure, it is the actual - * state. - * - * Things have changed here. I've killed timer1 since it's a user space - * issue for a user space routing deamon to sort out. The kernel does - * not need to be bothered with it. - * - * Timers: - * t2 - Rate limit timer, min time between routing and hello messages - * t3 - Hello timer, send hello messages when it expires - * - * Callbacks: - * up() - Called to initialize device, return value can veto use of - * device with DECnet. - * down() - Called to turn device off when it goes down - * timer3() - Called once for each ifaddr when timer 3 goes off - * - * sysctl - Hook for sysctl things - * - */ -struct dn_dev_parms { - int type; /* ARPHRD_xxx */ - int mode; /* Broadcast, Unicast, Mulitpoint */ -#define DN_DEV_BCAST 1 -#define DN_DEV_UCAST 2 -#define DN_DEV_MPOINT 4 - int state; /* Initial state */ - int forwarding; /* 0=EndNode, 1=L1Router, 2=L2Router */ - unsigned long t2; /* Default value of t2 */ - unsigned long t3; /* Default value of t3 */ - int priority; /* Priority to be a router */ - char *name; /* Name for sysctl */ - int (*up)(struct net_device *); - void (*down)(struct net_device *); - void (*timer3)(struct net_device *, struct dn_ifaddr *ifa); - void *sysctl; -}; - - -struct dn_dev { - struct dn_ifaddr __rcu *ifa_list; - struct net_device *dev; - struct dn_dev_parms parms; - char use_long; - struct timer_list timer; - unsigned long t3; - struct neigh_parms *neigh_parms; - __u8 addr[ETH_ALEN]; - struct neighbour *router; /* Default router on circuit */ - struct neighbour *peer; /* Peer on pointopoint links */ - unsigned long uptime; /* Time device went up in jiffies */ -}; - -struct dn_short_packet { - __u8 msgflg; - __le16 dstnode; - __le16 srcnode; - __u8 forward; -} __packed; - -struct dn_long_packet { - __u8 msgflg; - __u8 d_area; - __u8 d_subarea; - __u8 d_id[6]; - __u8 s_area; - __u8 s_subarea; - __u8 s_id[6]; - __u8 nl2; - __u8 visit_ct; - __u8 s_class; - __u8 pt; -} __packed; - -/*------------------------- DRP - Routing messages ---------------------*/ - -struct endnode_hello_message { - __u8 msgflg; - __u8 tiver[3]; - __u8 id[6]; - __u8 iinfo; - __le16 blksize; - __u8 area; - __u8 seed[8]; - __u8 neighbor[6]; - __le16 timer; - __u8 mpd; - __u8 datalen; - __u8 data[2]; -} __packed; - -struct rtnode_hello_message { - __u8 msgflg; - __u8 tiver[3]; - __u8 id[6]; - __u8 iinfo; - __le16 blksize; - __u8 priority; - __u8 area; - __le16 timer; - __u8 mpd; -} __packed; - - -void dn_dev_init(void); -void dn_dev_cleanup(void); - -int dn_dev_ioctl(unsigned int cmd, void __user *arg); - -void dn_dev_devices_off(void); -void dn_dev_devices_on(void); - -void dn_dev_init_pkt(struct sk_buff *skb); -void dn_dev_veri_pkt(struct sk_buff *skb); -void dn_dev_hello(struct sk_buff *skb); - -void dn_dev_up(struct net_device *); -void dn_dev_down(struct net_device *); - -int dn_dev_set_default(struct net_device *dev, int force); -struct net_device *dn_dev_get_default(void); -int dn_dev_bind_default(__le16 *addr); - -int register_dnaddr_notifier(struct notifier_block *nb); -int unregister_dnaddr_notifier(struct notifier_block *nb); - -static inline int dn_dev_islocal(struct net_device *dev, __le16 addr) -{ - struct dn_dev *dn_db; - struct dn_ifaddr *ifa; - int res = 0; - - rcu_read_lock(); - dn_db = rcu_dereference(dev->dn_ptr); - if (dn_db == NULL) { - printk(KERN_DEBUG "dn_dev_islocal: Called for non DECnet device\n"); - goto out; - } - - for (ifa = rcu_dereference(dn_db->ifa_list); - ifa != NULL; - ifa = rcu_dereference(ifa->ifa_next)) - if ((addr ^ ifa->ifa_local) == 0) { - res = 1; - break; - } -out: - rcu_read_unlock(); - return res; -} - -#endif /* _NET_DN_DEV_H */ diff --git a/include/net/dn_fib.h b/include/net/dn_fib.h deleted file mode 100644 index 1929a3cd5ebe..000000000000 --- a/include/net/dn_fib.h +++ /dev/null @@ -1,169 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NET_DN_FIB_H -#define _NET_DN_FIB_H - -#include -#include -#include -#include - -extern const struct nla_policy rtm_dn_policy[]; - -struct dn_fib_res { - struct fib_rule *r; - struct dn_fib_info *fi; - unsigned char prefixlen; - unsigned char nh_sel; - unsigned char type; - unsigned char scope; -}; - -struct dn_fib_nh { - struct net_device *nh_dev; - unsigned int nh_flags; - unsigned char nh_scope; - int nh_weight; - int nh_power; - int nh_oif; - __le16 nh_gw; -}; - -struct dn_fib_info { - struct dn_fib_info *fib_next; - struct dn_fib_info *fib_prev; - refcount_t fib_treeref; - refcount_t fib_clntref; - int fib_dead; - unsigned int fib_flags; - int fib_protocol; - __le16 fib_prefsrc; - __u32 fib_priority; - __u32 fib_metrics[RTAX_MAX]; - int fib_nhs; - int fib_power; - struct dn_fib_nh fib_nh[0]; -#define dn_fib_dev fib_nh[0].nh_dev -}; - - -#define DN_FIB_RES_RESET(res) ((res).nh_sel = 0) -#define DN_FIB_RES_NH(res) ((res).fi->fib_nh[(res).nh_sel]) - -#define DN_FIB_RES_PREFSRC(res) ((res).fi->fib_prefsrc ? : __dn_fib_res_prefsrc(&res)) -#define DN_FIB_RES_GW(res) (DN_FIB_RES_NH(res).nh_gw) -#define DN_FIB_RES_DEV(res) (DN_FIB_RES_NH(res).nh_dev) -#define DN_FIB_RES_OIF(res) (DN_FIB_RES_NH(res).nh_oif) - -typedef struct { - __le16 datum; -} dn_fib_key_t; - -typedef struct { - __le16 datum; -} dn_fib_hash_t; - -typedef struct { - __u16 datum; -} dn_fib_idx_t; - -struct dn_fib_node { - struct dn_fib_node *fn_next; - struct dn_fib_info *fn_info; -#define DN_FIB_INFO(f) ((f)->fn_info) - dn_fib_key_t fn_key; - u8 fn_type; - u8 fn_scope; - u8 fn_state; -}; - - -struct dn_fib_table { - struct hlist_node hlist; - u32 n; - - int (*insert)(struct dn_fib_table *t, struct rtmsg *r, - struct nlattr *attrs[], struct nlmsghdr *n, - struct netlink_skb_parms *req); - int (*delete)(struct dn_fib_table *t, struct rtmsg *r, - struct nlattr *attrs[], struct nlmsghdr *n, - struct netlink_skb_parms *req); - int (*lookup)(struct dn_fib_table *t, const struct flowidn *fld, - struct dn_fib_res *res); - int (*flush)(struct dn_fib_table *t); - int (*dump)(struct dn_fib_table *t, struct sk_buff *skb, struct netlink_callback *cb); - - unsigned char data[]; -}; - -#ifdef CONFIG_DECNET_ROUTER -/* - * dn_fib.c - */ -void dn_fib_init(void); -void dn_fib_cleanup(void); - -int dn_fib_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); -struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, - struct nlattr *attrs[], - const struct nlmsghdr *nlh, int *errp); -int dn_fib_semantic_match(int type, struct dn_fib_info *fi, - const struct flowidn *fld, struct dn_fib_res *res); -void dn_fib_release_info(struct dn_fib_info *fi); -void dn_fib_flush(void); -void dn_fib_select_multipath(const struct flowidn *fld, struct dn_fib_res *res); - -/* - * dn_tables.c - */ -struct dn_fib_table *dn_fib_get_table(u32 n, int creat); -struct dn_fib_table *dn_fib_empty_table(void); -void dn_fib_table_init(void); -void dn_fib_table_cleanup(void); - -/* - * dn_rules.c - */ -void dn_fib_rules_init(void); -void dn_fib_rules_cleanup(void); -unsigned int dnet_addr_type(__le16 addr); -int dn_fib_lookup(struct flowidn *fld, struct dn_fib_res *res); - -int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb); - -void dn_fib_free_info(struct dn_fib_info *fi); - -static inline void dn_fib_info_put(struct dn_fib_info *fi) -{ - if (refcount_dec_and_test(&fi->fib_clntref)) - dn_fib_free_info(fi); -} - -static inline void dn_fib_res_put(struct dn_fib_res *res) -{ - if (res->fi) - dn_fib_info_put(res->fi); - if (res->r) - fib_rule_put(res->r); -} - -#else /* Endnode */ - -#define dn_fib_init() do { } while(0) -#define dn_fib_cleanup() do { } while(0) - -#define dn_fib_lookup(fl, res) (-ESRCH) -#define dn_fib_info_put(fi) do { } while(0) -#define dn_fib_select_multipath(fl, res) do { } while(0) -#define dn_fib_rules_policy(saddr,res,flags) (0) -#define dn_fib_res_put(res) do { } while(0) - -#endif /* CONFIG_DECNET_ROUTER */ - -static inline __le16 dnet_make_mask(int n) -{ - if (n) - return cpu_to_le16(~((1 << (16 - n)) - 1)); - return cpu_to_le16(0); -} - -#endif /* _NET_DN_FIB_H */ diff --git a/include/net/dn_neigh.h b/include/net/dn_neigh.h deleted file mode 100644 index 1f7df98bfc33..000000000000 --- a/include/net/dn_neigh.h +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NET_DN_NEIGH_H -#define _NET_DN_NEIGH_H - -#include - -/* - * The position of the first two fields of - * this structure are critical - SJW - */ -struct dn_neigh { - struct neighbour n; - __le16 addr; - unsigned long flags; -#define DN_NDFLAG_R1 0x0001 /* Router L1 */ -#define DN_NDFLAG_R2 0x0002 /* Router L2 */ -#define DN_NDFLAG_P3 0x0004 /* Phase III Node */ - unsigned long blksize; - __u8 priority; -}; - -void dn_neigh_init(void); -void dn_neigh_cleanup(void); -int dn_neigh_router_hello(struct net *net, struct sock *sk, struct sk_buff *skb); -int dn_neigh_endnode_hello(struct net *net, struct sock *sk, struct sk_buff *skb); -void dn_neigh_pointopoint_hello(struct sk_buff *skb); -int dn_neigh_elist(struct net_device *dev, unsigned char *ptr, int n); -int dn_to_neigh_output(struct net *net, struct sock *sk, struct sk_buff *skb); - -extern struct neigh_table dn_neigh_table; - -#endif /* _NET_DN_NEIGH_H */ diff --git a/include/net/dn_nsp.h b/include/net/dn_nsp.h deleted file mode 100644 index a4a18fee0b7c..000000000000 --- a/include/net/dn_nsp.h +++ /dev/null @@ -1,201 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _NET_DN_NSP_H -#define _NET_DN_NSP_H -/****************************************************************************** - (c) 1995-1998 E.M. Serrat emserrat@geocities.com - -*******************************************************************************/ -/* dn_nsp.c functions prototyping */ -#include -#include -#include - -struct sk_buff; -struct sk_buff_head; - -void dn_nsp_send_data_ack(struct sock *sk); -void dn_nsp_send_oth_ack(struct sock *sk); -void dn_send_conn_ack(struct sock *sk); -void dn_send_conn_conf(struct sock *sk, gfp_t gfp); -void dn_nsp_send_disc(struct sock *sk, unsigned char type, - unsigned short reason, gfp_t gfp); -void dn_nsp_return_disc(struct sk_buff *skb, unsigned char type, - unsigned short reason); -void dn_nsp_send_link(struct sock *sk, unsigned char lsflags, char fcval); -void dn_nsp_send_conninit(struct sock *sk, unsigned char flags); - -void dn_nsp_output(struct sock *sk); -int dn_nsp_check_xmit_queue(struct sock *sk, struct sk_buff *skb, - struct sk_buff_head *q, unsigned short acknum); -void dn_nsp_queue_xmit(struct sock *sk, struct sk_buff *skb, gfp_t gfp, - int oob); -unsigned long dn_nsp_persist(struct sock *sk); -int dn_nsp_xmit_timeout(struct sock *sk); - -int dn_nsp_rx(struct sk_buff *); -int dn_nsp_backlog_rcv(struct sock *sk, struct sk_buff *skb); - -struct sk_buff *dn_alloc_skb(struct sock *sk, int size, gfp_t pri); -struct sk_buff *dn_alloc_send_skb(struct sock *sk, size_t *size, int noblock, - long timeo, int *err); - -#define NSP_REASON_OK 0 /* No error */ -#define NSP_REASON_NR 1 /* No resources */ -#define NSP_REASON_UN 2 /* Unrecognised node name */ -#define NSP_REASON_SD 3 /* Node shutting down */ -#define NSP_REASON_ID 4 /* Invalid destination end user */ -#define NSP_REASON_ER 5 /* End user lacks resources */ -#define NSP_REASON_OB 6 /* Object too busy */ -#define NSP_REASON_US 7 /* Unspecified error */ -#define NSP_REASON_TP 8 /* Third-Party abort */ -#define NSP_REASON_EA 9 /* End user has aborted the link */ -#define NSP_REASON_IF 10 /* Invalid node name format */ -#define NSP_REASON_LS 11 /* Local node shutdown */ -#define NSP_REASON_LL 32 /* Node lacks logical-link resources */ -#define NSP_REASON_LE 33 /* End user lacks logical-link resources */ -#define NSP_REASON_UR 34 /* Unacceptable RQSTRID or PASSWORD field */ -#define NSP_REASON_UA 36 /* Unacceptable ACCOUNT field */ -#define NSP_REASON_TM 38 /* End user timed out logical link */ -#define NSP_REASON_NU 39 /* Node unreachable */ -#define NSP_REASON_NL 41 /* No-link message */ -#define NSP_REASON_DC 42 /* Disconnect confirm */ -#define NSP_REASON_IO 43 /* Image data field overflow */ - -#define NSP_DISCINIT 0x38 -#define NSP_DISCCONF 0x48 - -/*------------------------- NSP - messages ------------------------------*/ -/* Data Messages */ -/*---------------*/ - -/* Data Messages (data segment/interrupt/link service) */ - -struct nsp_data_seg_msg { - __u8 msgflg; - __le16 dstaddr; - __le16 srcaddr; -} __packed; - -struct nsp_data_opt_msg { - __le16 acknum; - __le16 segnum; - __le16 lsflgs; -} __packed; - -struct nsp_data_opt_msg1 { - __le16 acknum; - __le16 segnum; -} __packed; - - -/* Acknowledgment Message (data/other data) */ -struct nsp_data_ack_msg { - __u8 msgflg; - __le16 dstaddr; - __le16 srcaddr; - __le16 acknum; -} __packed; - -/* Connect Acknowledgment Message */ -struct nsp_conn_ack_msg { - __u8 msgflg; - __le16 dstaddr; -} __packed; - - -/* Connect Initiate/Retransmit Initiate/Connect Confirm */ -struct nsp_conn_init_msg { - __u8 msgflg; -#define NSP_CI 0x18 /* Connect Initiate */ -#define NSP_RCI 0x68 /* Retrans. Conn Init */ - __le16 dstaddr; - __le16 srcaddr; - __u8 services; -#define NSP_FC_NONE 0x00 /* Flow Control None */ -#define NSP_FC_SRC 0x04 /* Seg Req. Count */ -#define NSP_FC_SCMC 0x08 /* Sess. Control Mess */ -#define NSP_FC_MASK 0x0c /* FC type mask */ - __u8 info; - __le16 segsize; -} __packed; - -/* Disconnect Initiate/Disconnect Confirm */ -struct nsp_disconn_init_msg { - __u8 msgflg; - __le16 dstaddr; - __le16 srcaddr; - __le16 reason; -} __packed; - - - -struct srcobj_fmt { - __u8 format; - __u8 task; - __le16 grpcode; - __le16 usrcode; - __u8 dlen; -} __packed; - -/* - * A collection of functions for manipulating the sequence - * numbers used in NSP. Similar in operation to the functions - * of the same name in TCP. - */ -static __inline__ int dn_before(__u16 seq1, __u16 seq2) -{ - seq1 &= 0x0fff; - seq2 &= 0x0fff; - - return (int)((seq1 - seq2) & 0x0fff) > 2048; -} - - -static __inline__ int dn_after(__u16 seq1, __u16 seq2) -{ - seq1 &= 0x0fff; - seq2 &= 0x0fff; - - return (int)((seq2 - seq1) & 0x0fff) > 2048; -} - -static __inline__ int dn_equal(__u16 seq1, __u16 seq2) -{ - return ((seq1 ^ seq2) & 0x0fff) == 0; -} - -static __inline__ int dn_before_or_equal(__u16 seq1, __u16 seq2) -{ - return (dn_before(seq1, seq2) || dn_equal(seq1, seq2)); -} - -static __inline__ void seq_add(__u16 *seq, __u16 off) -{ - (*seq) += off; - (*seq) &= 0x0fff; -} - -static __inline__ int seq_next(__u16 seq1, __u16 seq2) -{ - return dn_equal(seq1 + 1, seq2); -} - -/* - * Can we delay the ack ? - */ -static __inline__ int sendack(__u16 seq) -{ - return (int)((seq & 0x1000) ? 0 : 1); -} - -/* - * Is socket congested ? - */ -static __inline__ int dn_congested(struct sock *sk) -{ - return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1); -} - -#define DN_MAX_NSP_DATA_HEADER (11) - -#endif /* _NET_DN_NSP_H */ diff --git a/include/net/dn_route.h b/include/net/dn_route.h deleted file mode 100644 index 88c0300236cc..000000000000 --- a/include/net/dn_route.h +++ /dev/null @@ -1,118 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _NET_DN_ROUTE_H -#define _NET_DN_ROUTE_H - -/****************************************************************************** - (c) 1995-1998 E.M. Serrat emserrat@geocities.com - -*******************************************************************************/ - -#include -#include - -struct sk_buff *dn_alloc_skb(struct sock *sk, int size, gfp_t pri); -int dn_route_output_sock(struct dst_entry __rcu **pprt, struct flowidn *, - struct sock *sk, int flags); -int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb); -void dn_rt_cache_flush(int delay); -int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev); - -/* Masks for flags field */ -#define DN_RT_F_PID 0x07 /* Mask for packet type */ -#define DN_RT_F_PF 0x80 /* Padding Follows */ -#define DN_RT_F_VER 0x40 /* Version =0 discard packet if ==1 */ -#define DN_RT_F_IE 0x20 /* Intra Ethernet, Reserved in short pkt */ -#define DN_RT_F_RTS 0x10 /* Packet is being returned to sender */ -#define DN_RT_F_RQR 0x08 /* Return packet to sender upon non-delivery */ - -/* Mask for types of routing packets */ -#define DN_RT_PKT_MSK 0x06 -/* Types of routing packets */ -#define DN_RT_PKT_SHORT 0x02 /* Short routing packet */ -#define DN_RT_PKT_LONG 0x06 /* Long routing packet */ - -/* Mask for control/routing selection */ -#define DN_RT_PKT_CNTL 0x01 /* Set to 1 if a control packet */ -/* Types of control packets */ -#define DN_RT_CNTL_MSK 0x0f /* Mask for control packets */ -#define DN_RT_PKT_INIT 0x01 /* Initialisation packet */ -#define DN_RT_PKT_VERI 0x03 /* Verification Message */ -#define DN_RT_PKT_HELO 0x05 /* Hello and Test Message */ -#define DN_RT_PKT_L1RT 0x07 /* Level 1 Routing Message */ -#define DN_RT_PKT_L2RT 0x09 /* Level 2 Routing Message */ -#define DN_RT_PKT_ERTH 0x0b /* Ethernet Router Hello */ -#define DN_RT_PKT_EEDH 0x0d /* Ethernet EndNode Hello */ - -/* Values for info field in hello message */ -#define DN_RT_INFO_TYPE 0x03 /* Type mask */ -#define DN_RT_INFO_L1RT 0x02 /* L1 Router */ -#define DN_RT_INFO_L2RT 0x01 /* L2 Router */ -#define DN_RT_INFO_ENDN 0x03 /* EndNode */ -#define DN_RT_INFO_VERI 0x04 /* Verification Reqd. */ -#define DN_RT_INFO_RJCT 0x08 /* Reject Flag, Reserved */ -#define DN_RT_INFO_VFLD 0x10 /* Verification Failed, Reserved */ -#define DN_RT_INFO_NOML 0x20 /* No Multicast traffic accepted */ -#define DN_RT_INFO_BLKR 0x40 /* Blocking Requested */ - -/* - * The fl structure is what we used to look up the route. - * The rt_saddr & rt_daddr entries are the same as key.saddr & key.daddr - * except for local input routes, where the rt_saddr = fl.fld_dst and - * rt_daddr = fl.fld_src to allow the route to be used for returning - * packets to the originating host. - */ -struct dn_route { - struct dst_entry dst; - struct dn_route __rcu *dn_next; - - struct neighbour *n; - - struct flowidn fld; - - __le16 rt_saddr; - __le16 rt_daddr; - __le16 rt_gateway; - __le16 rt_local_src; /* Source used for forwarding packets */ - __le16 rt_src_map; - __le16 rt_dst_map; - - unsigned int rt_flags; - unsigned int rt_type; -}; - -static inline bool dn_is_input_route(struct dn_route *rt) -{ - return rt->fld.flowidn_iif != 0; -} - -static inline bool dn_is_output_route(struct dn_route *rt) -{ - return rt->fld.flowidn_iif == 0; -} - -void dn_route_init(void); -void dn_route_cleanup(void); - -#include -#include - -static inline void dn_rt_send(struct sk_buff *skb) -{ - dev_queue_xmit(skb); -} - -static inline void dn_rt_finish_output(struct sk_buff *skb, char *dst, char *src) -{ - struct net_device *dev = skb->dev; - - if ((dev->type != ARPHRD_ETHER) && (dev->type != ARPHRD_LOOPBACK)) - dst = NULL; - - if (dev_hard_header(skb, dev, ETH_P_DNA_RT, dst, src, skb->len) >= 0) - dn_rt_send(skb); - else - kfree_skb(skb); -} - -#endif /* _NET_DN_ROUTE_H */ diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index b593f95e9991..02bbdc577f8e 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -24,9 +24,6 @@ struct netns_nf { #ifdef CONFIG_NETFILTER_FAMILY_BRIDGE struct nf_hook_entries __rcu *hooks_bridge[NF_INET_NUMHOOKS]; #endif -#if IS_ENABLED(CONFIG_DECNET) - struct nf_hook_entries __rcu *hooks_decnet[NF_DN_NUMHOOKS]; -#endif #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) unsigned int defrag_ipv4_users; #endif diff --git a/include/uapi/linux/dn.h b/include/uapi/linux/dn.h deleted file mode 100644 index 36ca71bd8bbe..000000000000 --- a/include/uapi/linux/dn.h +++ /dev/null @@ -1,149 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _LINUX_DN_H -#define _LINUX_DN_H - -#include -#include -#include - -/* - - DECnet Data Structures and Constants - -*/ - -/* - * DNPROTO_NSP can't be the same as SOL_SOCKET, - * so increment each by one (compared to ULTRIX) - */ -#define DNPROTO_NSP 2 /* NSP protocol number */ -#define DNPROTO_ROU 3 /* Routing protocol number */ -#define DNPROTO_NML 4 /* Net mgt protocol number */ -#define DNPROTO_EVL 5 /* Evl protocol number (usr) */ -#define DNPROTO_EVR 6 /* Evl protocol number (evl) */ -#define DNPROTO_NSPT 7 /* NSP trace protocol number */ - - -#define DN_ADDL 2 -#define DN_MAXADDL 2 /* ULTRIX headers have 20 here, but pathworks has 2 */ -#define DN_MAXOPTL 16 -#define DN_MAXOBJL 16 -#define DN_MAXACCL 40 -#define DN_MAXALIASL 128 -#define DN_MAXNODEL 256 -#define DNBUFSIZE 65023 - -/* - * SET/GET Socket options - must match the DSO_ numbers below - */ -#define SO_CONDATA 1 -#define SO_CONACCESS 2 -#define SO_PROXYUSR 3 -#define SO_LINKINFO 7 - -#define DSO_CONDATA 1 /* Set/Get connect data */ -#define DSO_DISDATA 10 /* Set/Get disconnect data */ -#define DSO_CONACCESS 2 /* Set/Get connect access data */ -#define DSO_ACCEPTMODE 4 /* Set/Get accept mode */ -#define DSO_CONACCEPT 5 /* Accept deferred connection */ -#define DSO_CONREJECT 6 /* Reject deferred connection */ -#define DSO_LINKINFO 7 /* Set/Get link information */ -#define DSO_STREAM 8 /* Set socket type to stream */ -#define DSO_SEQPACKET 9 /* Set socket type to sequenced packet */ -#define DSO_MAXWINDOW 11 /* Maximum window size allowed */ -#define DSO_NODELAY 12 /* Turn off nagle */ -#define DSO_CORK 13 /* Wait for more data! */ -#define DSO_SERVICES 14 /* NSP Services field */ -#define DSO_INFO 15 /* NSP Info field */ -#define DSO_MAX 15 /* Maximum option number */ - - -/* LINK States */ -#define LL_INACTIVE 0 -#define LL_CONNECTING 1 -#define LL_RUNNING 2 -#define LL_DISCONNECTING 3 - -#define ACC_IMMED 0 -#define ACC_DEFER 1 - -#define SDF_WILD 1 /* Wild card object */ -#define SDF_PROXY 2 /* Addr eligible for proxy */ -#define SDF_UICPROXY 4 /* Use uic-based proxy */ - -/* Structures */ - - -struct dn_naddr { - __le16 a_len; - __u8 a_addr[DN_MAXADDL]; /* Two bytes little endian */ -}; - -struct sockaddr_dn { - __u16 sdn_family; - __u8 sdn_flags; - __u8 sdn_objnum; - __le16 sdn_objnamel; - __u8 sdn_objname[DN_MAXOBJL]; - struct dn_naddr sdn_add; -}; -#define sdn_nodeaddrl sdn_add.a_len /* Node address length */ -#define sdn_nodeaddr sdn_add.a_addr /* Node address */ - - - -/* - * DECnet set/get DSO_CONDATA, DSO_DISDATA (optional data) structure - */ -struct optdata_dn { - __le16 opt_status; /* Extended status return */ -#define opt_sts opt_status - __le16 opt_optl; /* Length of user data */ - __u8 opt_data[16]; /* User data */ -}; - -struct accessdata_dn { - __u8 acc_accl; - __u8 acc_acc[DN_MAXACCL]; - __u8 acc_passl; - __u8 acc_pass[DN_MAXACCL]; - __u8 acc_userl; - __u8 acc_user[DN_MAXACCL]; -}; - -/* - * DECnet logical link information structure - */ -struct linkinfo_dn { - __u16 idn_segsize; /* Segment size for link */ - __u8 idn_linkstate; /* Logical link state */ -}; - -/* - * Ethernet address format (for DECnet) - */ -union etheraddress { - __u8 dne_addr[ETH_ALEN]; /* Full ethernet address */ - struct { - __u8 dne_hiord[4]; /* DECnet HIORD prefix */ - __u8 dne_nodeaddr[2]; /* DECnet node address */ - } dne_remote; -}; - - -/* - * DECnet physical socket address format - */ -struct dn_addr { - __le16 dna_family; /* AF_DECnet */ - union etheraddress dna_netaddr; /* DECnet ethernet address */ -}; - -#define DECNET_IOCTL_BASE 0x89 /* PROTOPRIVATE range */ - -#define SIOCSNETADDR _IOW(DECNET_IOCTL_BASE, 0xe0, struct dn_naddr) -#define SIOCGNETADDR _IOR(DECNET_IOCTL_BASE, 0xe1, struct dn_naddr) -#define OSIOCSNETADDR _IOW(DECNET_IOCTL_BASE, 0xe0, int) -#define OSIOCGNETADDR _IOR(DECNET_IOCTL_BASE, 0xe1, int) - -#endif /* _LINUX_DN_H */ diff --git a/include/uapi/linux/netfilter_decnet.h b/include/uapi/linux/netfilter_decnet.h deleted file mode 100644 index 3c77f54560f2..000000000000 --- a/include/uapi/linux/netfilter_decnet.h +++ /dev/null @@ -1,72 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef __LINUX_DECNET_NETFILTER_H -#define __LINUX_DECNET_NETFILTER_H - -/* DECnet-specific defines for netfilter. - * This file (C) Steve Whitehouse 1999 derived from the - * ipv4 netfilter header file which is - * (C)1998 Rusty Russell -- This code is GPL. - */ - -#include - -/* only for userspace compatibility */ -#ifndef __KERNEL__ - -#include /* for INT_MIN, INT_MAX */ - -/* kernel define is in netfilter_defs.h */ -#define NF_DN_NUMHOOKS 7 -#endif /* ! __KERNEL__ */ - -/* DECnet Hooks */ -/* After promisc drops, checksum checks. */ -#define NF_DN_PRE_ROUTING 0 -/* If the packet is destined for this box. */ -#define NF_DN_LOCAL_IN 1 -/* If the packet is destined for another interface. */ -#define NF_DN_FORWARD 2 -/* Packets coming from a local process. */ -#define NF_DN_LOCAL_OUT 3 -/* Packets about to hit the wire. */ -#define NF_DN_POST_ROUTING 4 -/* Input Hello Packets */ -#define NF_DN_HELLO 5 -/* Input Routing Packets */ -#define NF_DN_ROUTE 6 - -enum nf_dn_hook_priorities { - NF_DN_PRI_FIRST = INT_MIN, - NF_DN_PRI_CONNTRACK = -200, - NF_DN_PRI_MANGLE = -150, - NF_DN_PRI_NAT_DST = -100, - NF_DN_PRI_FILTER = 0, - NF_DN_PRI_NAT_SRC = 100, - NF_DN_PRI_DNRTMSG = 200, - NF_DN_PRI_LAST = INT_MAX, -}; - -struct nf_dn_rtmsg { - int nfdn_ifindex; -}; - -#define NFDN_RTMSG(r) ((unsigned char *)(r) + NLMSG_ALIGN(sizeof(struct nf_dn_rtmsg))) - -#ifndef __KERNEL__ -/* backwards compatibility for userspace */ -#define DNRMG_L1_GROUP 0x01 -#define DNRMG_L2_GROUP 0x02 -#endif - -enum { - DNRNG_NLGRP_NONE, -#define DNRNG_NLGRP_NONE DNRNG_NLGRP_NONE - DNRNG_NLGRP_L1, -#define DNRNG_NLGRP_L1 DNRNG_NLGRP_L1 - DNRNG_NLGRP_L2, -#define DNRNG_NLGRP_L2 DNRNG_NLGRP_L2 - __DNRNG_NLGRP_MAX -}; -#define DNRNG_NLGRP_MAX (__DNRNG_NLGRP_MAX - 1) - -#endif /*__LINUX_DECNET_NETFILTER_H*/ diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index 855dffb4c1c3..1e543cf0568c 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -20,7 +20,7 @@ #define NETLINK_CONNECTOR 11 #define NETLINK_NETFILTER 12 /* netfilter subsystem */ #define NETLINK_IP6_FW 13 -#define NETLINK_DNRTMSG 14 /* DECnet routing messages */ +#define NETLINK_DNRTMSG 14 /* DECnet routing messages (obsolete) */ #define NETLINK_KOBJECT_UEVENT 15 /* Kernel messages to userspace */ #define NETLINK_GENERIC 16 /* leave room for NETLINK_DM (DM Events) */ diff --git a/net/Kconfig b/net/Kconfig index 6b78f695caa6..48c33c222199 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -204,7 +204,6 @@ config BRIDGE_NETFILTER source "net/netfilter/Kconfig" source "net/ipv4/netfilter/Kconfig" source "net/ipv6/netfilter/Kconfig" -source "net/decnet/netfilter/Kconfig" source "net/bridge/netfilter/Kconfig" endif @@ -221,7 +220,6 @@ source "net/802/Kconfig" source "net/bridge/Kconfig" source "net/dsa/Kconfig" source "net/8021q/Kconfig" -source "net/decnet/Kconfig" source "net/llc/Kconfig" source "drivers/net/appletalk/Kconfig" source "net/x25/Kconfig" diff --git a/net/Makefile b/net/Makefile index fbfeb8a0bb37..6a62e5b27378 100644 --- a/net/Makefile +++ b/net/Makefile @@ -38,7 +38,6 @@ obj-$(CONFIG_AF_KCM) += kcm/ obj-$(CONFIG_STREAM_PARSER) += strparser/ obj-$(CONFIG_ATM) += atm/ obj-$(CONFIG_L2TP) += l2tp/ -obj-$(CONFIG_DECNET) += decnet/ obj-$(CONFIG_PHONET) += phonet/ ifneq ($(CONFIG_VLAN_8021Q),) obj-y += 8021q/ diff --git a/net/core/dev.c b/net/core/dev.c index 716df64fcfa5..6847022b9d66 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10370,9 +10370,7 @@ void netdev_run_todo(void) BUG_ON(!list_empty(&dev->ptype_specific)); WARN_ON(rcu_access_pointer(dev->ip_ptr)); WARN_ON(rcu_access_pointer(dev->ip6_ptr)); -#if IS_ENABLED(CONFIG_DECNET) - WARN_ON(dev->dn_ptr); -#endif + if (dev->priv_destructor) dev->priv_destructor(dev); if (dev->needs_free_netdev) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 5b669eb80270..833d2214f36f 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1847,9 +1847,6 @@ static struct neigh_table *neigh_find_table(int family) case AF_INET6: tbl = neigh_tables[NEIGH_ND_TABLE]; break; - case AF_DECnet: - tbl = neigh_tables[NEIGH_DN_TABLE]; - break; } return tbl; diff --git a/net/decnet/Kconfig b/net/decnet/Kconfig deleted file mode 100644 index 24336bdb1054..000000000000 --- a/net/decnet/Kconfig +++ /dev/null @@ -1,43 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -# -# DECnet configuration -# -config DECNET - tristate "DECnet Support" - help - The DECnet networking protocol was used in many products made by - Digital (now Compaq). It provides reliable stream and sequenced - packet communications over which run a variety of services similar - to those which run over TCP/IP. - - To find some tools to use with the kernel layer support, please - look at Patrick Caulfield's web site: - . - - More detailed documentation is available in - . - - Be sure to say Y to "/proc file system support" and "Sysctl support" - below when using DECnet, since you will need sysctl support to aid - in configuration at run time. - - The DECnet code is also available as a module ( = code which can be - inserted in and removed from the running kernel whenever you want). - The module is called decnet. - -config DECNET_ROUTER - bool "DECnet: router support" - depends on DECNET - select FIB_RULES - help - Add support for turning your DECnet Endnode into a level 1 or 2 - router. This is an experimental, but functional option. If you - do say Y here, then make sure that you also say Y to "Kernel/User - network link driver", "Routing messages" and "Network packet - filtering". The first two are required to allow configuration via - rtnetlink (you will need Alexey Kuznetsov's iproute2 package - from ). The "Network packet - filtering" option will be required for the forthcoming routing daemon - to work. - - See for more information. diff --git a/net/decnet/Makefile b/net/decnet/Makefile deleted file mode 100644 index 07b38e441b2d..000000000000 --- a/net/decnet/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 - -obj-$(CONFIG_DECNET) += decnet.o - -decnet-y := af_decnet.o dn_nsp_in.o dn_nsp_out.o \ - dn_route.o dn_dev.o dn_neigh.o dn_timer.o -decnet-$(CONFIG_DECNET_ROUTER) += dn_fib.o dn_rules.o dn_table.o -decnet-y += sysctl_net_decnet.o - -obj-$(CONFIG_NETFILTER) += netfilter/ diff --git a/net/decnet/README b/net/decnet/README deleted file mode 100644 index 60e7ec88c81f..000000000000 --- a/net/decnet/README +++ /dev/null @@ -1,8 +0,0 @@ - Linux DECnet Project - ====================== - -The documentation for this kernel subsystem is available in the -Documentation/networking subdirectory of this distribution and also -on line at http://www.chygwyn.com/DECnet/ - -Steve Whitehouse diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c deleted file mode 100644 index 6582dfdfb932..000000000000 --- a/net/decnet/af_decnet.c +++ /dev/null @@ -1,2404 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later - -/* - * DECnet An implementation of the DECnet protocol suite for the LINUX - * operating system. DECnet is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * DECnet Socket Layer Interface - * - * Authors: Eduardo Marcelo Serrat - * Patrick Caulfield - * - * Changes: - * Steve Whitehouse: Copied from Eduardo Serrat and Patrick Caulfield's - * version of the code. Original copyright preserved - * below. - * Steve Whitehouse: Some bug fixes, cleaning up some code to make it - * compatible with my routing layer. - * Steve Whitehouse: Merging changes from Eduardo Serrat and Patrick - * Caulfield. - * Steve Whitehouse: Further bug fixes, checking module code still works - * with new routing layer. - * Steve Whitehouse: Additional set/get_sockopt() calls. - * Steve Whitehouse: Fixed TIOCINQ ioctl to be same as Eduardo's new - * code. - * Steve Whitehouse: recvmsg() changed to try and behave in a POSIX like - * way. Didn't manage it entirely, but its better. - * Steve Whitehouse: ditto for sendmsg(). - * Steve Whitehouse: A selection of bug fixes to various things. - * Steve Whitehouse: Added TIOCOUTQ ioctl. - * Steve Whitehouse: Fixes to username2sockaddr & sockaddr2username. - * Steve Whitehouse: Fixes to connect() error returns. - * Patrick Caulfield: Fixes to delayed acceptance logic. - * David S. Miller: New socket locking - * Steve Whitehouse: Socket list hashing/locking - * Arnaldo C. Melo: use capable, not suser - * Steve Whitehouse: Removed unused code. Fix to use sk->allocation - * when required. - * Patrick Caulfield: /proc/net/decnet now has object name/number - * Steve Whitehouse: Fixed local port allocation, hashed sk list - * Matthew Wilcox: Fixes for dn_ioctl() - * Steve Whitehouse: New connect/accept logic to allow timeouts and - * prepare for sendpage etc. - */ - - -/****************************************************************************** - (c) 1995-1998 E.M. Serrat emserrat@geocities.com - - -HISTORY: - -Version Kernel Date Author/Comments -------- ------ ---- --------------- -Version 0.0.1 2.0.30 01-dic-97 Eduardo Marcelo Serrat - (emserrat@geocities.com) - - First Development of DECnet Socket La- - yer for Linux. Only supports outgoing - connections. - -Version 0.0.2 2.1.105 20-jun-98 Patrick J. Caulfield - (patrick@pandh.demon.co.uk) - - Port to new kernel development version. - -Version 0.0.3 2.1.106 25-jun-98 Eduardo Marcelo Serrat - (emserrat@geocities.com) - _ - Added support for incoming connections - so we can start developing server apps - on Linux. - - - Module Support -Version 0.0.4 2.1.109 21-jul-98 Eduardo Marcelo Serrat - (emserrat@geocities.com) - _ - Added support for X11R6.4. Now we can - use DECnet transport for X on Linux!!! - - -Version 0.0.5 2.1.110 01-aug-98 Eduardo Marcelo Serrat - (emserrat@geocities.com) - Removed bugs on flow control - Removed bugs on incoming accessdata - order - - -Version 0.0.6 2.1.110 07-aug-98 Eduardo Marcelo Serrat - dn_recvmsg fixes - - Patrick J. Caulfield - dn_bind fixes -*******************************************************************************/ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct dn_sock { - struct sock sk; - struct dn_scp scp; -}; - -static void dn_keepalive(struct sock *sk); - -#define DN_SK_HASH_SHIFT 8 -#define DN_SK_HASH_SIZE (1 << DN_SK_HASH_SHIFT) -#define DN_SK_HASH_MASK (DN_SK_HASH_SIZE - 1) - - -static const struct proto_ops dn_proto_ops; -static DEFINE_RWLOCK(dn_hash_lock); -static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE]; -static struct hlist_head dn_wild_sk; -static atomic_long_t decnet_memory_allocated; -static DEFINE_PER_CPU(int, decnet_memory_per_cpu_fw_alloc); - -static int __dn_setsockopt(struct socket *sock, int level, int optname, - sockptr_t optval, unsigned int optlen, int flags); -static int __dn_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen, int flags); - -static struct hlist_head *dn_find_list(struct sock *sk) -{ - struct dn_scp *scp = DN_SK(sk); - - if (scp->addr.sdn_flags & SDF_WILD) - return hlist_empty(&dn_wild_sk) ? &dn_wild_sk : NULL; - - return &dn_sk_hash[le16_to_cpu(scp->addrloc) & DN_SK_HASH_MASK]; -} - -/* - * Valid ports are those greater than zero and not already in use. - */ -static int check_port(__le16 port) -{ - struct sock *sk; - - if (port == 0) - return -1; - - sk_for_each(sk, &dn_sk_hash[le16_to_cpu(port) & DN_SK_HASH_MASK]) { - struct dn_scp *scp = DN_SK(sk); - if (scp->addrloc == port) - return -1; - } - return 0; -} - -static unsigned short port_alloc(struct sock *sk) -{ - struct dn_scp *scp = DN_SK(sk); - static unsigned short port = 0x2000; - unsigned short i_port = port; - - while(check_port(cpu_to_le16(++port)) != 0) { - if (port == i_port) - return 0; - } - - scp->addrloc = cpu_to_le16(port); - - return 1; -} - -/* - * Since this is only ever called from user - * level, we don't need a write_lock() version - * of this. - */ -static int dn_hash_sock(struct sock *sk) -{ - struct dn_scp *scp = DN_SK(sk); - struct hlist_head *list; - int rv = -EUSERS; - - BUG_ON(sk_hashed(sk)); - - write_lock_bh(&dn_hash_lock); - - if (!scp->addrloc && !port_alloc(sk)) - goto out; - - rv = -EADDRINUSE; - if ((list = dn_find_list(sk)) == NULL) - goto out; - - sk_add_node(sk, list); - rv = 0; -out: - write_unlock_bh(&dn_hash_lock); - return rv; -} - -static void dn_unhash_sock(struct sock *sk) -{ - write_lock(&dn_hash_lock); - sk_del_node_init(sk); - write_unlock(&dn_hash_lock); -} - -static void dn_unhash_sock_bh(struct sock *sk) -{ - write_lock_bh(&dn_hash_lock); - sk_del_node_init(sk); - write_unlock_bh(&dn_hash_lock); -} - -static struct hlist_head *listen_hash(struct sockaddr_dn *addr) -{ - int i; - unsigned int hash = addr->sdn_objnum; - - if (hash == 0) { - hash = addr->sdn_objnamel; - for(i = 0; i < le16_to_cpu(addr->sdn_objnamel); i++) { - hash ^= addr->sdn_objname[i]; - hash ^= (hash << 3); - } - } - - return &dn_sk_hash[hash & DN_SK_HASH_MASK]; -} - -/* - * Called to transform a socket from bound (i.e. with a local address) - * into a listening socket (doesn't need a local port number) and rehashes - * based upon the object name/number. - */ -static void dn_rehash_sock(struct sock *sk) -{ - struct hlist_head *list; - struct dn_scp *scp = DN_SK(sk); - - if (scp->addr.sdn_flags & SDF_WILD) - return; - - write_lock_bh(&dn_hash_lock); - sk_del_node_init(sk); - DN_SK(sk)->addrloc = 0; - list = listen_hash(&DN_SK(sk)->addr); - sk_add_node(sk, list); - write_unlock_bh(&dn_hash_lock); -} - -int dn_sockaddr2username(struct sockaddr_dn *sdn, unsigned char *buf, unsigned char type) -{ - int len = 2; - - *buf++ = type; - - switch (type) { - case 0: - *buf++ = sdn->sdn_objnum; - break; - case 1: - *buf++ = 0; - *buf++ = le16_to_cpu(sdn->sdn_objnamel); - memcpy(buf, sdn->sdn_objname, le16_to_cpu(sdn->sdn_objnamel)); - len = 3 + le16_to_cpu(sdn->sdn_objnamel); - break; - case 2: - memset(buf, 0, 5); - buf += 5; - *buf++ = le16_to_cpu(sdn->sdn_objnamel); - memcpy(buf, sdn->sdn_objname, le16_to_cpu(sdn->sdn_objnamel)); - len = 7 + le16_to_cpu(sdn->sdn_objnamel); - break; - } - - return len; -} - -/* - * On reception of usernames, we handle types 1 and 0 for destination - * addresses only. Types 2 and 4 are used for source addresses, but the - * UIC, GIC are ignored and they are both treated the same way. Type 3 - * is never used as I've no idea what its purpose might be or what its - * format is. - */ -int dn_username2sockaddr(unsigned char *data, int len, struct sockaddr_dn *sdn, unsigned char *fmt) -{ - unsigned char type; - int size = len; - int namel = 12; - - sdn->sdn_objnum = 0; - sdn->sdn_objnamel = cpu_to_le16(0); - memset(sdn->sdn_objname, 0, DN_MAXOBJL); - - if (len < 2) - return -1; - - len -= 2; - *fmt = *data++; - type = *data++; - - switch (*fmt) { - case 0: - sdn->sdn_objnum = type; - return 2; - case 1: - namel = 16; - break; - case 2: - len -= 4; - data += 4; - break; - case 4: - len -= 8; - data += 8; - break; - default: - return -1; - } - - len -= 1; - - if (len < 0) - return -1; - - sdn->sdn_objnamel = cpu_to_le16(*data++); - len -= le16_to_cpu(sdn->sdn_objnamel); - - if ((len < 0) || (le16_to_cpu(sdn->sdn_objnamel) > namel)) - return -1; - - memcpy(sdn->sdn_objname, data, le16_to_cpu(sdn->sdn_objnamel)); - - return size - len; -} - -struct sock *dn_sklist_find_listener(struct sockaddr_dn *addr) -{ - struct hlist_head *list = listen_hash(addr); - struct sock *sk; - - read_lock(&dn_hash_lock); - sk_for_each(sk, list) { - struct dn_scp *scp = DN_SK(sk); - if (sk->sk_state != TCP_LISTEN) - continue; - if (scp->addr.sdn_objnum) { - if (scp->addr.sdn_objnum != addr->sdn_objnum) - continue; - } else { - if (addr->sdn_objnum) - continue; - if (scp->addr.sdn_objnamel != addr->sdn_objnamel) - continue; - if (memcmp(scp->addr.sdn_objname, addr->sdn_objname, le16_to_cpu(addr->sdn_objnamel)) != 0) - continue; - } - sock_hold(sk); - read_unlock(&dn_hash_lock); - return sk; - } - - sk = sk_head(&dn_wild_sk); - if (sk) { - if (sk->sk_state == TCP_LISTEN) - sock_hold(sk); - else - sk = NULL; - } - - read_unlock(&dn_hash_lock); - return sk; -} - -struct sock *dn_find_by_skb(struct sk_buff *skb) -{ - struct dn_skb_cb *cb = DN_SKB_CB(skb); - struct sock *sk; - struct dn_scp *scp; - - read_lock(&dn_hash_lock); - sk_for_each(sk, &dn_sk_hash[le16_to_cpu(cb->dst_port) & DN_SK_HASH_MASK]) { - scp = DN_SK(sk); - if (cb->src != dn_saddr2dn(&scp->peer)) - continue; - if (cb->dst_port != scp->addrloc) - continue; - if (scp->addrrem && (cb->src_port != scp->addrrem)) - continue; - sock_hold(sk); - goto found; - } - sk = NULL; -found: - read_unlock(&dn_hash_lock); - return sk; -} - - - -static void dn_destruct(struct sock *sk) -{ - struct dn_scp *scp = DN_SK(sk); - - skb_queue_purge(&scp->data_xmit_queue); - skb_queue_purge(&scp->other_xmit_queue); - skb_queue_purge(&scp->other_receive_queue); - - dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1)); -} - -static unsigned long dn_memory_pressure; - -static void dn_enter_memory_pressure(struct sock *sk) -{ - if (!dn_memory_pressure) { - dn_memory_pressure = 1; - } -} - -static struct proto dn_proto = { - .name = "NSP", - .owner = THIS_MODULE, - .enter_memory_pressure = dn_enter_memory_pressure, - .memory_pressure = &dn_memory_pressure, - - .memory_allocated = &decnet_memory_allocated, - .per_cpu_fw_alloc = &decnet_memory_per_cpu_fw_alloc, - - .sysctl_mem = sysctl_decnet_mem, - .sysctl_wmem = sysctl_decnet_wmem, - .sysctl_rmem = sysctl_decnet_rmem, - .max_header = DN_MAX_NSP_DATA_HEADER + 64, - .obj_size = sizeof(struct dn_sock), -}; - -static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gfp, int kern) -{ - struct dn_scp *scp; - struct sock *sk = sk_alloc(net, PF_DECnet, gfp, &dn_proto, kern); - - if (!sk) - goto out; - - if (sock) - sock->ops = &dn_proto_ops; - sock_init_data(sock, sk); - - sk->sk_backlog_rcv = dn_nsp_backlog_rcv; - sk->sk_destruct = dn_destruct; - sk->sk_no_check_tx = 1; - sk->sk_family = PF_DECnet; - sk->sk_protocol = 0; - sk->sk_allocation = gfp; - sk->sk_sndbuf = READ_ONCE(sysctl_decnet_wmem[1]); - sk->sk_rcvbuf = READ_ONCE(sysctl_decnet_rmem[1]); - - /* Initialization of DECnet Session Control Port */ - scp = DN_SK(sk); - scp->state = DN_O; /* Open */ - scp->numdat = 1; /* Next data seg to tx */ - scp->numoth = 1; /* Next oth data to tx */ - scp->ackxmt_dat = 0; /* Last data seg ack'ed */ - scp->ackxmt_oth = 0; /* Last oth data ack'ed */ - scp->ackrcv_dat = 0; /* Highest data ack recv*/ - scp->ackrcv_oth = 0; /* Last oth data ack rec*/ - scp->flowrem_sw = DN_SEND; - scp->flowloc_sw = DN_SEND; - scp->flowrem_dat = 0; - scp->flowrem_oth = 1; - scp->flowloc_dat = 0; - scp->flowloc_oth = 1; - scp->services_rem = 0; - scp->services_loc = 1 | NSP_FC_NONE; - scp->info_rem = 0; - scp->info_loc = 0x03; /* NSP version 4.1 */ - scp->segsize_rem = 230 - DN_MAX_NSP_DATA_HEADER; /* Default: Updated by remote segsize */ - scp->nonagle = 0; - scp->multi_ireq = 1; - scp->accept_mode = ACC_IMMED; - scp->addr.sdn_family = AF_DECnet; - scp->peer.sdn_family = AF_DECnet; - scp->accessdata.acc_accl = 5; - memcpy(scp->accessdata.acc_acc, "LINUX", 5); - - scp->max_window = NSP_MAX_WINDOW; - scp->snd_window = NSP_MIN_WINDOW; - scp->nsp_srtt = NSP_INITIAL_SRTT; - scp->nsp_rttvar = NSP_INITIAL_RTTVAR; - scp->nsp_rxtshift = 0; - - skb_queue_head_init(&scp->data_xmit_queue); - skb_queue_head_init(&scp->other_xmit_queue); - skb_queue_head_init(&scp->other_receive_queue); - - scp->persist = 0; - scp->persist_fxn = NULL; - scp->keepalive = 10 * HZ; - scp->keepalive_fxn = dn_keepalive; - - dn_start_slow_timer(sk); -out: - return sk; -} - -/* - * Keepalive timer. - * FIXME: Should respond to SO_KEEPALIVE etc. - */ -static void dn_keepalive(struct sock *sk) -{ - struct dn_scp *scp = DN_SK(sk); - - /* - * By checking the other_data transmit queue is empty - * we are double checking that we are not sending too - * many of these keepalive frames. - */ - if (skb_queue_empty(&scp->other_xmit_queue)) - dn_nsp_send_link(sk, DN_NOCHANGE, 0); -} - - -/* - * Timer for shutdown/destroyed sockets. - * When socket is dead & no packets have been sent for a - * certain amount of time, they are removed by this - * routine. Also takes care of sending out DI & DC - * frames at correct times. - */ -int dn_destroy_timer(struct sock *sk) -{ - struct dn_scp *scp = DN_SK(sk); - - scp->persist = dn_nsp_persist(sk); - - switch (scp->state) { - case DN_DI: - dn_nsp_send_disc(sk, NSP_DISCINIT, 0, GFP_ATOMIC); - if (scp->nsp_rxtshift >= decnet_di_count) - scp->state = DN_CN; - return 0; - - case DN_DR: - dn_nsp_send_disc(sk, NSP_DISCINIT, 0, GFP_ATOMIC); - if (scp->nsp_rxtshift >= decnet_dr_count) - scp->state = DN_DRC; - return 0; - - case DN_DN: - if (scp->nsp_rxtshift < decnet_dn_count) { - /* printk(KERN_DEBUG "dn_destroy_timer: DN\n"); */ - dn_nsp_send_disc(sk, NSP_DISCCONF, NSP_REASON_DC, - GFP_ATOMIC); - return 0; - } - } - - scp->persist = (HZ * decnet_time_wait); - - if (sk->sk_socket) - return 0; - - if (time_after_eq(jiffies, scp->stamp + HZ * decnet_time_wait)) { - dn_unhash_sock(sk); - sock_put(sk); - return 1; - } - - return 0; -} - -static void dn_destroy_sock(struct sock *sk) -{ - struct dn_scp *scp = DN_SK(sk); - - scp->nsp_rxtshift = 0; /* reset back off */ - - if (sk->sk_socket) { - if (sk->sk_socket->state != SS_UNCONNECTED) - sk->sk_socket->state = SS_DISCONNECTING; - } - - sk->sk_state = TCP_CLOSE; - - switch (scp->state) { - case DN_DN: - dn_nsp_send_disc(sk, NSP_DISCCONF, NSP_REASON_DC, - sk->sk_allocation); - scp->persist_fxn = dn_destroy_timer; - scp->persist = dn_nsp_persist(sk); - break; - case DN_CR: - scp->state = DN_DR; - goto disc_reject; - case DN_RUN: - scp->state = DN_DI; - fallthrough; - case DN_DI: - case DN_DR: -disc_reject: - dn_nsp_send_disc(sk, NSP_DISCINIT, 0, sk->sk_allocation); - fallthrough; - case DN_NC: - case DN_NR: - case DN_RJ: - case DN_DIC: - case DN_CN: - case DN_DRC: - case DN_CI: - case DN_CD: - scp->persist_fxn = dn_destroy_timer; - scp->persist = dn_nsp_persist(sk); - break; - default: - printk(KERN_DEBUG "DECnet: dn_destroy_sock passed socket in invalid state\n"); - fallthrough; - case DN_O: - dn_stop_slow_timer(sk); - - dn_unhash_sock_bh(sk); - sock_put(sk); - - break; - } -} - -char *dn_addr2asc(__u16 addr, char *buf) -{ - unsigned short node, area; - - node = addr & 0x03ff; - area = addr >> 10; - sprintf(buf, "%hd.%hd", area, node); - - return buf; -} - - - -static int dn_create(struct net *net, struct socket *sock, int protocol, - int kern) -{ - struct sock *sk; - - if (protocol < 0 || protocol > U8_MAX) - return -EINVAL; - - if (!net_eq(net, &init_net)) - return -EAFNOSUPPORT; - - switch (sock->type) { - case SOCK_SEQPACKET: - if (protocol != DNPROTO_NSP) - return -EPROTONOSUPPORT; - break; - case SOCK_STREAM: - break; - default: - return -ESOCKTNOSUPPORT; - } - - - if ((sk = dn_alloc_sock(net, sock, GFP_KERNEL, kern)) == NULL) - return -ENOBUFS; - - sk->sk_protocol = protocol; - - return 0; -} - - -static int -dn_release(struct socket *sock) -{ - struct sock *sk = sock->sk; - - if (sk) { - sock_orphan(sk); - sock_hold(sk); - lock_sock(sk); - dn_destroy_sock(sk); - release_sock(sk); - sock_put(sk); - } - - return 0; -} - -static int dn_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) -{ - struct sock *sk = sock->sk; - struct dn_scp *scp = DN_SK(sk); - struct sockaddr_dn *saddr = (struct sockaddr_dn *)uaddr; - struct net_device *dev, *ldev; - int rv; - - if (addr_len != sizeof(struct sockaddr_dn)) - return -EINVAL; - - if (saddr->sdn_family != AF_DECnet) - return -EINVAL; - - if (le16_to_cpu(saddr->sdn_nodeaddrl) && (le16_to_cpu(saddr->sdn_nodeaddrl) != 2)) - return -EINVAL; - - if (le16_to_cpu(saddr->sdn_objnamel) > DN_MAXOBJL) - return -EINVAL; - - if (saddr->sdn_flags & ~SDF_WILD) - return -EINVAL; - - if (!capable(CAP_NET_BIND_SERVICE) && (saddr->sdn_objnum || - (saddr->sdn_flags & SDF_WILD))) - return -EACCES; - - if (!(saddr->sdn_flags & SDF_WILD)) { - if (le16_to_cpu(saddr->sdn_nodeaddrl)) { - rcu_read_lock(); - ldev = NULL; - for_each_netdev_rcu(&init_net, dev) { - if (!dev->dn_ptr) - continue; - if (dn_dev_islocal(dev, dn_saddr2dn(saddr))) { - ldev = dev; - break; - } - } - rcu_read_unlock(); - if (ldev == NULL) - return -EADDRNOTAVAIL; - } - } - - rv = -EINVAL; - lock_sock(sk); - if (sock_flag(sk, SOCK_ZAPPED)) { - memcpy(&scp->addr, saddr, addr_len); - sock_reset_flag(sk, SOCK_ZAPPED); - - rv = dn_hash_sock(sk); - if (rv) - sock_set_flag(sk, SOCK_ZAPPED); - } - release_sock(sk); - - return rv; -} - - -static int dn_auto_bind(struct socket *sock) -{ - struct sock *sk = sock->sk; - struct dn_scp *scp = DN_SK(sk); - int rv; - - sock_reset_flag(sk, SOCK_ZAPPED); - - scp->addr.sdn_flags = 0; - scp->addr.sdn_objnum = 0; - - /* - * This stuff is to keep compatibility with Eduardo's - * patch. I hope I can dispense with it shortly... - */ - if ((scp->accessdata.acc_accl != 0) && - (scp->accessdata.acc_accl <= 12)) { - - scp->addr.sdn_objnamel = cpu_to_le16(scp->accessdata.acc_accl); - memcpy(scp->addr.sdn_objname, scp->accessdata.acc_acc, le16_to_cpu(scp->addr.sdn_objnamel)); - - scp->accessdata.acc_accl = 0; - memset(scp->accessdata.acc_acc, 0, 40); - } - /* End of compatibility stuff */ - - scp->addr.sdn_add.a_len = cpu_to_le16(2); - rv = dn_dev_bind_default((__le16 *)scp->addr.sdn_add.a_addr); - if (rv == 0) { - rv = dn_hash_sock(sk); - if (rv) - sock_set_flag(sk, SOCK_ZAPPED); - } - - return rv; -} - -static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation) -{ - struct dn_scp *scp = DN_SK(sk); - DEFINE_WAIT_FUNC(wait, woken_wake_function); - int err; - - if (scp->state != DN_CR) - return -EINVAL; - - scp->state = DN_CC; - scp->segsize_loc = dst_metric_advmss(__sk_dst_get(sk)); - dn_send_conn_conf(sk, allocation); - - add_wait_queue(sk_sleep(sk), &wait); - for(;;) { - release_sock(sk); - if (scp->state == DN_CC) - *timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, *timeo); - lock_sock(sk); - err = 0; - if (scp->state == DN_RUN) - break; - err = sock_error(sk); - if (err) - break; - err = sock_intr_errno(*timeo); - if (signal_pending(current)) - break; - err = -EAGAIN; - if (!*timeo) - break; - } - remove_wait_queue(sk_sleep(sk), &wait); - if (err == 0) { - sk->sk_socket->state = SS_CONNECTED; - } else if (scp->state != DN_CC) { - sk->sk_socket->state = SS_UNCONNECTED; - } - return err; -} - -static int dn_wait_run(struct sock *sk, long *timeo) -{ - struct dn_scp *scp = DN_SK(sk); - DEFINE_WAIT_FUNC(wait, woken_wake_function); - int err = 0; - - if (scp->state == DN_RUN) - goto out; - - if (!*timeo) - return -EALREADY; - - add_wait_queue(sk_sleep(sk), &wait); - for(;;) { - release_sock(sk); - if (scp->state == DN_CI || scp->state == DN_CC) - *timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, *timeo); - lock_sock(sk); - err = 0; - if (scp->state == DN_RUN) - break; - err = sock_error(sk); - if (err) - break; - err = sock_intr_errno(*timeo); - if (signal_pending(current)) - break; - err = -ETIMEDOUT; - if (!*timeo) - break; - } - remove_wait_queue(sk_sleep(sk), &wait); -out: - if (err == 0) { - sk->sk_socket->state = SS_CONNECTED; - } else if (scp->state != DN_CI && scp->state != DN_CC) { - sk->sk_socket->state = SS_UNCONNECTED; - } - return err; -} - -static int __dn_connect(struct sock *sk, struct sockaddr_dn *addr, int addrlen, long *timeo, int flags) -{ - struct socket *sock = sk->sk_socket; - struct dn_scp *scp = DN_SK(sk); - int err = -EISCONN; - struct flowidn fld; - struct dst_entry *dst; - - if (sock->state == SS_CONNECTED) - goto out; - - if (sock->state == SS_CONNECTING) { - err = 0; - if (scp->state == DN_RUN) { - sock->state = SS_CONNECTED; - goto out; - } - err = -ECONNREFUSED; - if (scp->state != DN_CI && scp->state != DN_CC) { - sock->state = SS_UNCONNECTED; - goto out; - } - return dn_wait_run(sk, timeo); - } - - err = -EINVAL; - if (scp->state != DN_O) - goto out; - - if (addr == NULL || addrlen != sizeof(struct sockaddr_dn)) - goto out; - if (addr->sdn_family != AF_DECnet) - goto out; - if (addr->sdn_flags & SDF_WILD) - goto out; - - if (sock_flag(sk, SOCK_ZAPPED)) { - err = dn_auto_bind(sk->sk_socket); - if (err) - goto out; - } - - memcpy(&scp->peer, addr, sizeof(struct sockaddr_dn)); - - err = -EHOSTUNREACH; - memset(&fld, 0, sizeof(fld)); - fld.flowidn_oif = sk->sk_bound_dev_if; - fld.daddr = dn_saddr2dn(&scp->peer); - fld.saddr = dn_saddr2dn(&scp->addr); - dn_sk_ports_copy(&fld, scp); - fld.flowidn_proto = DNPROTO_NSP; - if (dn_route_output_sock(&sk->sk_dst_cache, &fld, sk, flags) < 0) - goto out; - dst = __sk_dst_get(sk); - sk->sk_route_caps = dst->dev->features; - sock->state = SS_CONNECTING; - scp->state = DN_CI; - scp->segsize_loc = dst_metric_advmss(dst); - - dn_nsp_send_conninit(sk, NSP_CI); - err = -EINPROGRESS; - if (*timeo) { - err = dn_wait_run(sk, timeo); - } -out: - return err; -} - -static int dn_connect(struct socket *sock, struct sockaddr *uaddr, int addrlen, int flags) -{ - struct sockaddr_dn *addr = (struct sockaddr_dn *)uaddr; - struct sock *sk = sock->sk; - int err; - long timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); - - lock_sock(sk); - err = __dn_connect(sk, addr, addrlen, &timeo, 0); - release_sock(sk); - - return err; -} - -static inline int dn_check_state(struct sock *sk, struct sockaddr_dn *addr, int addrlen, long *timeo, int flags) -{ - struct dn_scp *scp = DN_SK(sk); - - switch (scp->state) { - case DN_RUN: - return 0; - case DN_CR: - return dn_confirm_accept(sk, timeo, sk->sk_allocation); - case DN_CI: - case DN_CC: - return dn_wait_run(sk, timeo); - case DN_O: - return __dn_connect(sk, addr, addrlen, timeo, flags); - } - - return -EINVAL; -} - - -static void dn_access_copy(struct sk_buff *skb, struct accessdata_dn *acc) -{ - unsigned char *ptr = skb->data; - - acc->acc_userl = *ptr++; - memcpy(&acc->acc_user, ptr, acc->acc_userl); - ptr += acc->acc_userl; - - acc->acc_passl = *ptr++; - memcpy(&acc->acc_pass, ptr, acc->acc_passl); - ptr += acc->acc_passl; - - acc->acc_accl = *ptr++; - memcpy(&acc->acc_acc, ptr, acc->acc_accl); - - skb_pull(skb, acc->acc_accl + acc->acc_passl + acc->acc_userl + 3); - -} - -static void dn_user_copy(struct sk_buff *skb, struct optdata_dn *opt) -{ - unsigned char *ptr = skb->data; - u16 len = *ptr++; /* yes, it's 8bit on the wire */ - - BUG_ON(len > 16); /* we've checked the contents earlier */ - opt->opt_optl = cpu_to_le16(len); - opt->opt_status = 0; - memcpy(opt->opt_data, ptr, len); - skb_pull(skb, len + 1); -} - -static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo) -{ - DEFINE_WAIT_FUNC(wait, woken_wake_function); - struct sk_buff *skb = NULL; - int err = 0; - - add_wait_queue(sk_sleep(sk), &wait); - for(;;) { - release_sock(sk); - skb = skb_dequeue(&sk->sk_receive_queue); - if (skb == NULL) { - *timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, *timeo); - skb = skb_dequeue(&sk->sk_receive_queue); - } - lock_sock(sk); - if (skb != NULL) - break; - err = -EINVAL; - if (sk->sk_state != TCP_LISTEN) - break; - err = sock_intr_errno(*timeo); - if (signal_pending(current)) - break; - err = -EAGAIN; - if (!*timeo) - break; - } - remove_wait_queue(sk_sleep(sk), &wait); - - return skb == NULL ? ERR_PTR(err) : skb; -} - -static int dn_accept(struct socket *sock, struct socket *newsock, int flags, - bool kern) -{ - struct sock *sk = sock->sk, *newsk; - struct sk_buff *skb = NULL; - struct dn_skb_cb *cb; - unsigned char menuver; - int err = 0; - unsigned char type; - long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); - struct dst_entry *dst; - - lock_sock(sk); - - if (sk->sk_state != TCP_LISTEN || DN_SK(sk)->state != DN_O) { - release_sock(sk); - return -EINVAL; - } - - skb = skb_dequeue(&sk->sk_receive_queue); - if (skb == NULL) { - skb = dn_wait_for_connect(sk, &timeo); - if (IS_ERR(skb)) { - release_sock(sk); - return PTR_ERR(skb); - } - } - - cb = DN_SKB_CB(skb); - sk_acceptq_removed(sk); - newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation, kern); - if (newsk == NULL) { - release_sock(sk); - kfree_skb(skb); - return -ENOBUFS; - } - release_sock(sk); - - dst = skb_dst(skb); - sk_dst_set(newsk, dst); - skb_dst_set(skb, NULL); - - DN_SK(newsk)->state = DN_CR; - DN_SK(newsk)->addrrem = cb->src_port; - DN_SK(newsk)->services_rem = cb->services; - DN_SK(newsk)->info_rem = cb->info; - DN_SK(newsk)->segsize_rem = cb->segsize; - DN_SK(newsk)->accept_mode = DN_SK(sk)->accept_mode; - - if (DN_SK(newsk)->segsize_rem < 230) - DN_SK(newsk)->segsize_rem = 230; - - if ((DN_SK(newsk)->services_rem & NSP_FC_MASK) == NSP_FC_NONE) - DN_SK(newsk)->max_window = decnet_no_fc_max_cwnd; - - newsk->sk_state = TCP_LISTEN; - memcpy(&(DN_SK(newsk)->addr), &(DN_SK(sk)->addr), sizeof(struct sockaddr_dn)); - - /* - * If we are listening on a wild socket, we don't want - * the newly created socket on the wrong hash queue. - */ - DN_SK(newsk)->addr.sdn_flags &= ~SDF_WILD; - - skb_pull(skb, dn_username2sockaddr(skb->data, skb->len, &(DN_SK(newsk)->addr), &type)); - skb_pull(skb, dn_username2sockaddr(skb->data, skb->len, &(DN_SK(newsk)->peer), &type)); - *(__le16 *)(DN_SK(newsk)->peer.sdn_add.a_addr) = cb->src; - *(__le16 *)(DN_SK(newsk)->addr.sdn_add.a_addr) = cb->dst; - - menuver = *skb->data; - skb_pull(skb, 1); - - if (menuver & DN_MENUVER_ACC) - dn_access_copy(skb, &(DN_SK(newsk)->accessdata)); - - if (menuver & DN_MENUVER_USR) - dn_user_copy(skb, &(DN_SK(newsk)->conndata_in)); - - if (menuver & DN_MENUVER_PRX) - DN_SK(newsk)->peer.sdn_flags |= SDF_PROXY; - - if (menuver & DN_MENUVER_UIC) - DN_SK(newsk)->peer.sdn_flags |= SDF_UICPROXY; - - kfree_skb(skb); - - memcpy(&(DN_SK(newsk)->conndata_out), &(DN_SK(sk)->conndata_out), - sizeof(struct optdata_dn)); - memcpy(&(DN_SK(newsk)->discdata_out), &(DN_SK(sk)->discdata_out), - sizeof(struct optdata_dn)); - - lock_sock(newsk); - err = dn_hash_sock(newsk); - if (err == 0) { - sock_reset_flag(newsk, SOCK_ZAPPED); - dn_send_conn_ack(newsk); - - /* - * Here we use sk->sk_allocation since although the conn conf is - * for the newsk, the context is the old socket. - */ - if (DN_SK(newsk)->accept_mode == ACC_IMMED) - err = dn_confirm_accept(newsk, &timeo, - sk->sk_allocation); - } - release_sock(newsk); - return err; -} - - -static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int peer) -{ - struct sockaddr_dn *sa = (struct sockaddr_dn *)uaddr; - struct sock *sk = sock->sk; - struct dn_scp *scp = DN_SK(sk); - - lock_sock(sk); - - if (peer) { - if ((sock->state != SS_CONNECTED && - sock->state != SS_CONNECTING) && - scp->accept_mode == ACC_IMMED) { - release_sock(sk); - return -ENOTCONN; - } - - memcpy(sa, &scp->peer, sizeof(struct sockaddr_dn)); - } else { - memcpy(sa, &scp->addr, sizeof(struct sockaddr_dn)); - } - - release_sock(sk); - - return sizeof(struct sockaddr_dn); -} - - -static __poll_t dn_poll(struct file *file, struct socket *sock, poll_table *wait) -{ - struct sock *sk = sock->sk; - struct dn_scp *scp = DN_SK(sk); - __poll_t mask = datagram_poll(file, sock, wait); - - if (!skb_queue_empty_lockless(&scp->other_receive_queue)) - mask |= EPOLLRDBAND; - - return mask; -} - -static int dn_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) -{ - struct sock *sk = sock->sk; - struct dn_scp *scp = DN_SK(sk); - int err = -EOPNOTSUPP; - long amount = 0; - struct sk_buff *skb; - int val; - - switch(cmd) - { - case SIOCGIFADDR: - case SIOCSIFADDR: - return dn_dev_ioctl(cmd, (void __user *)arg); - - case SIOCATMARK: - lock_sock(sk); - val = !skb_queue_empty(&scp->other_receive_queue); - if (scp->state != DN_RUN) - val = -ENOTCONN; - release_sock(sk); - return val; - - case TIOCOUTQ: - amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); - if (amount < 0) - amount = 0; - err = put_user(amount, (int __user *)arg); - break; - - case TIOCINQ: - lock_sock(sk); - skb = skb_peek(&scp->other_receive_queue); - if (skb) { - amount = skb->len; - } else { - skb_queue_walk(&sk->sk_receive_queue, skb) - amount += skb->len; - } - release_sock(sk); - err = put_user(amount, (int __user *)arg); - break; - - default: - err = -ENOIOCTLCMD; - break; - } - - return err; -} - -static int dn_listen(struct socket *sock, int backlog) -{ - struct sock *sk = sock->sk; - int err = -EINVAL; - - lock_sock(sk); - - if (sock_flag(sk, SOCK_ZAPPED)) - goto out; - - if ((DN_SK(sk)->state != DN_O) || (sk->sk_state == TCP_LISTEN)) - goto out; - - sk->sk_max_ack_backlog = backlog; - sk->sk_ack_backlog = 0; - sk->sk_state = TCP_LISTEN; - err = 0; - dn_rehash_sock(sk); - -out: - release_sock(sk); - - return err; -} - - -static int dn_shutdown(struct socket *sock, int how) -{ - struct sock *sk = sock->sk; - struct dn_scp *scp = DN_SK(sk); - int err = -ENOTCONN; - - lock_sock(sk); - - if (sock->state == SS_UNCONNECTED) - goto out; - - err = 0; - if (sock->state == SS_DISCONNECTING) - goto out; - - err = -EINVAL; - if (scp->state == DN_O) - goto out; - - if (how != SHUT_RDWR) - goto out; - - sk->sk_shutdown = SHUTDOWN_MASK; - dn_destroy_sock(sk); - err = 0; - -out: - release_sock(sk); - - return err; -} - -static int dn_setsockopt(struct socket *sock, int level, int optname, - sockptr_t optval, unsigned int optlen) -{ - struct sock *sk = sock->sk; - int err; - - lock_sock(sk); - err = __dn_setsockopt(sock, level, optname, optval, optlen, 0); - release_sock(sk); -#ifdef CONFIG_NETFILTER - /* we need to exclude all possible ENOPROTOOPTs except default case */ - if (err == -ENOPROTOOPT && optname != DSO_LINKINFO && - optname != DSO_STREAM && optname != DSO_SEQPACKET) - err = nf_setsockopt(sk, PF_DECnet, optname, optval, optlen); -#endif - - return err; -} - -static int __dn_setsockopt(struct socket *sock, int level, int optname, - sockptr_t optval, unsigned int optlen, int flags) -{ - struct sock *sk = sock->sk; - struct dn_scp *scp = DN_SK(sk); - long timeo; - union { - struct optdata_dn opt; - struct accessdata_dn acc; - int mode; - unsigned long win; - int val; - unsigned char services; - unsigned char info; - } u; - int err; - - if (optlen && sockptr_is_null(optval)) - return -EINVAL; - - if (optlen > sizeof(u)) - return -EINVAL; - - if (copy_from_sockptr(&u, optval, optlen)) - return -EFAULT; - - switch (optname) { - case DSO_CONDATA: - if (sock->state == SS_CONNECTED) - return -EISCONN; - if ((scp->state != DN_O) && (scp->state != DN_CR)) - return -EINVAL; - - if (optlen != sizeof(struct optdata_dn)) - return -EINVAL; - - if (le16_to_cpu(u.opt.opt_optl) > 16) - return -EINVAL; - - memcpy(&scp->conndata_out, &u.opt, optlen); - break; - - case DSO_DISDATA: - if (sock->state != SS_CONNECTED && - scp->accept_mode == ACC_IMMED) - return -ENOTCONN; - - if (optlen != sizeof(struct optdata_dn)) - return -EINVAL; - - if (le16_to_cpu(u.opt.opt_optl) > 16) - return -EINVAL; - - memcpy(&scp->discdata_out, &u.opt, optlen); - break; - - case DSO_CONACCESS: - if (sock->state == SS_CONNECTED) - return -EISCONN; - if (scp->state != DN_O) - return -EINVAL; - - if (optlen != sizeof(struct accessdata_dn)) - return -EINVAL; - - if ((u.acc.acc_accl > DN_MAXACCL) || - (u.acc.acc_passl > DN_MAXACCL) || - (u.acc.acc_userl > DN_MAXACCL)) - return -EINVAL; - - memcpy(&scp->accessdata, &u.acc, optlen); - break; - - case DSO_ACCEPTMODE: - if (sock->state == SS_CONNECTED) - return -EISCONN; - if (scp->state != DN_O) - return -EINVAL; - - if (optlen != sizeof(int)) - return -EINVAL; - - if ((u.mode != ACC_IMMED) && (u.mode != ACC_DEFER)) - return -EINVAL; - - scp->accept_mode = (unsigned char)u.mode; - break; - - case DSO_CONACCEPT: - if (scp->state != DN_CR) - return -EINVAL; - timeo = sock_rcvtimeo(sk, 0); - err = dn_confirm_accept(sk, &timeo, sk->sk_allocation); - return err; - - case DSO_CONREJECT: - if (scp->state != DN_CR) - return -EINVAL; - - scp->state = DN_DR; - sk->sk_shutdown = SHUTDOWN_MASK; - dn_nsp_send_disc(sk, 0x38, 0, sk->sk_allocation); - break; - - case DSO_MAXWINDOW: - if (optlen != sizeof(unsigned long)) - return -EINVAL; - if (u.win > NSP_MAX_WINDOW) - u.win = NSP_MAX_WINDOW; - if (u.win == 0) - return -EINVAL; - scp->max_window = u.win; - if (scp->snd_window > u.win) - scp->snd_window = u.win; - break; - - case DSO_NODELAY: - if (optlen != sizeof(int)) - return -EINVAL; - if (scp->nonagle == TCP_NAGLE_CORK) - return -EINVAL; - scp->nonagle = (u.val == 0) ? 0 : TCP_NAGLE_OFF; - /* if (scp->nonagle == 1) { Push pending frames } */ - break; - - case DSO_CORK: - if (optlen != sizeof(int)) - return -EINVAL; - if (scp->nonagle == TCP_NAGLE_OFF) - return -EINVAL; - scp->nonagle = (u.val == 0) ? 0 : TCP_NAGLE_CORK; - /* if (scp->nonagle == 0) { Push pending frames } */ - break; - - case DSO_SERVICES: - if (optlen != sizeof(unsigned char)) - return -EINVAL; - if ((u.services & ~NSP_FC_MASK) != 0x01) - return -EINVAL; - if ((u.services & NSP_FC_MASK) == NSP_FC_MASK) - return -EINVAL; - scp->services_loc = u.services; - break; - - case DSO_INFO: - if (optlen != sizeof(unsigned char)) - return -EINVAL; - if (u.info & 0xfc) - return -EINVAL; - scp->info_loc = u.info; - break; - - case DSO_LINKINFO: - case DSO_STREAM: - case DSO_SEQPACKET: - default: - return -ENOPROTOOPT; - } - - return 0; -} - -static int dn_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) -{ - struct sock *sk = sock->sk; - int err; - - lock_sock(sk); - err = __dn_getsockopt(sock, level, optname, optval, optlen, 0); - release_sock(sk); -#ifdef CONFIG_NETFILTER - if (err == -ENOPROTOOPT && optname != DSO_STREAM && - optname != DSO_SEQPACKET && optname != DSO_CONACCEPT && - optname != DSO_CONREJECT) { - int len; - - if (get_user(len, optlen)) - return -EFAULT; - - err = nf_getsockopt(sk, PF_DECnet, optname, optval, &len); - if (err >= 0) - err = put_user(len, optlen); - } -#endif - - return err; -} - -static int __dn_getsockopt(struct socket *sock, int level,int optname, char __user *optval,int __user *optlen, int flags) -{ - struct sock *sk = sock->sk; - struct dn_scp *scp = DN_SK(sk); - struct linkinfo_dn link; - unsigned int r_len; - void *r_data = NULL; - unsigned int val; - - if(get_user(r_len , optlen)) - return -EFAULT; - - switch (optname) { - case DSO_CONDATA: - if (r_len > sizeof(struct optdata_dn)) - r_len = sizeof(struct optdata_dn); - r_data = &scp->conndata_in; - break; - - case DSO_DISDATA: - if (r_len > sizeof(struct optdata_dn)) - r_len = sizeof(struct optdata_dn); - r_data = &scp->discdata_in; - break; - - case DSO_CONACCESS: - if (r_len > sizeof(struct accessdata_dn)) - r_len = sizeof(struct accessdata_dn); - r_data = &scp->accessdata; - break; - - case DSO_ACCEPTMODE: - if (r_len > sizeof(unsigned char)) - r_len = sizeof(unsigned char); - r_data = &scp->accept_mode; - break; - - case DSO_LINKINFO: - if (r_len > sizeof(struct linkinfo_dn)) - r_len = sizeof(struct linkinfo_dn); - - memset(&link, 0, sizeof(link)); - - switch (sock->state) { - case SS_CONNECTING: - link.idn_linkstate = LL_CONNECTING; - break; - case SS_DISCONNECTING: - link.idn_linkstate = LL_DISCONNECTING; - break; - case SS_CONNECTED: - link.idn_linkstate = LL_RUNNING; - break; - default: - link.idn_linkstate = LL_INACTIVE; - } - - link.idn_segsize = scp->segsize_rem; - r_data = &link; - break; - - case DSO_MAXWINDOW: - if (r_len > sizeof(unsigned long)) - r_len = sizeof(unsigned long); - r_data = &scp->max_window; - break; - - case DSO_NODELAY: - if (r_len > sizeof(int)) - r_len = sizeof(int); - val = (scp->nonagle == TCP_NAGLE_OFF); - r_data = &val; - break; - - case DSO_CORK: - if (r_len > sizeof(int)) - r_len = sizeof(int); - val = (scp->nonagle == TCP_NAGLE_CORK); - r_data = &val; - break; - - case DSO_SERVICES: - if (r_len > sizeof(unsigned char)) - r_len = sizeof(unsigned char); - r_data = &scp->services_rem; - break; - - case DSO_INFO: - if (r_len > sizeof(unsigned char)) - r_len = sizeof(unsigned char); - r_data = &scp->info_rem; - break; - - case DSO_STREAM: - case DSO_SEQPACKET: - case DSO_CONACCEPT: - case DSO_CONREJECT: - default: - return -ENOPROTOOPT; - } - - if (r_data) { - if (copy_to_user(optval, r_data, r_len)) - return -EFAULT; - if (put_user(r_len, optlen)) - return -EFAULT; - } - - return 0; -} - - -static int dn_data_ready(struct sock *sk, struct sk_buff_head *q, int flags, int target) -{ - struct sk_buff *skb; - int len = 0; - - if (flags & MSG_OOB) - return !skb_queue_empty(q) ? 1 : 0; - - skb_queue_walk(q, skb) { - struct dn_skb_cb *cb = DN_SKB_CB(skb); - len += skb->len; - - if (cb->nsp_flags & 0x40) { - /* SOCK_SEQPACKET reads to EOM */ - if (sk->sk_type == SOCK_SEQPACKET) - return 1; - /* so does SOCK_STREAM unless WAITALL is specified */ - if (!(flags & MSG_WAITALL)) - return 1; - } - - /* minimum data length for read exceeded */ - if (len >= target) - return 1; - } - - return 0; -} - - -static int dn_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, - int flags) -{ - struct sock *sk = sock->sk; - struct dn_scp *scp = DN_SK(sk); - struct sk_buff_head *queue = &sk->sk_receive_queue; - size_t target = size > 1 ? 1 : 0; - size_t copied = 0; - int rv = 0; - struct sk_buff *skb, *n; - struct dn_skb_cb *cb = NULL; - unsigned char eor = 0; - long timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); - - lock_sock(sk); - - if (sock_flag(sk, SOCK_ZAPPED)) { - rv = -EADDRNOTAVAIL; - goto out; - } - - if (sk->sk_shutdown & RCV_SHUTDOWN) { - rv = 0; - goto out; - } - - rv = dn_check_state(sk, NULL, 0, &timeo, flags); - if (rv) - goto out; - - if (flags & ~(MSG_CMSG_COMPAT|MSG_PEEK|MSG_OOB|MSG_WAITALL|MSG_DONTWAIT|MSG_NOSIGNAL)) { - rv = -EOPNOTSUPP; - goto out; - } - - if (flags & MSG_OOB) - queue = &scp->other_receive_queue; - - if (flags & MSG_WAITALL) - target = size; - - - /* - * See if there is data ready to read, sleep if there isn't - */ - for(;;) { - DEFINE_WAIT_FUNC(wait, woken_wake_function); - - if (sk->sk_err) - goto out; - - if (!skb_queue_empty(&scp->other_receive_queue)) { - if (!(flags & MSG_OOB)) { - msg->msg_flags |= MSG_OOB; - if (!scp->other_report) { - scp->other_report = 1; - goto out; - } - } - } - - if (scp->state != DN_RUN) - goto out; - - if (signal_pending(current)) { - rv = sock_intr_errno(timeo); - goto out; - } - - if (dn_data_ready(sk, queue, flags, target)) - break; - - if (flags & MSG_DONTWAIT) { - rv = -EWOULDBLOCK; - goto out; - } - - add_wait_queue(sk_sleep(sk), &wait); - sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); - sk_wait_event(sk, &timeo, dn_data_ready(sk, queue, flags, target), &wait); - sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); - remove_wait_queue(sk_sleep(sk), &wait); - } - - skb_queue_walk_safe(queue, skb, n) { - unsigned int chunk = skb->len; - cb = DN_SKB_CB(skb); - - if ((chunk + copied) > size) - chunk = size - copied; - - if (memcpy_to_msg(msg, skb->data, chunk)) { - rv = -EFAULT; - break; - } - copied += chunk; - - if (!(flags & MSG_PEEK)) - skb_pull(skb, chunk); - - eor = cb->nsp_flags & 0x40; - - if (skb->len == 0) { - skb_unlink(skb, queue); - kfree_skb(skb); - /* - * N.B. Don't refer to skb or cb after this point - * in loop. - */ - if ((scp->flowloc_sw == DN_DONTSEND) && !dn_congested(sk)) { - scp->flowloc_sw = DN_SEND; - dn_nsp_send_link(sk, DN_SEND, 0); - } - } - - if (eor) { - if (sk->sk_type == SOCK_SEQPACKET) - break; - if (!(flags & MSG_WAITALL)) - break; - } - - if (flags & MSG_OOB) - break; - - if (copied >= target) - break; - } - - rv = copied; - - - if (eor && (sk->sk_type == SOCK_SEQPACKET)) - msg->msg_flags |= MSG_EOR; - -out: - if (rv == 0) - rv = (flags & MSG_PEEK) ? -sk->sk_err : sock_error(sk); - - if ((rv >= 0) && msg->msg_name) { - __sockaddr_check_size(sizeof(struct sockaddr_dn)); - memcpy(msg->msg_name, &scp->peer, sizeof(struct sockaddr_dn)); - msg->msg_namelen = sizeof(struct sockaddr_dn); - } - - release_sock(sk); - - return rv; -} - - -static inline int dn_queue_too_long(struct dn_scp *scp, struct sk_buff_head *queue, int flags) -{ - unsigned char fctype = scp->services_rem & NSP_FC_MASK; - if (skb_queue_len(queue) >= scp->snd_window) - return 1; - if (fctype != NSP_FC_NONE) { - if (flags & MSG_OOB) { - if (scp->flowrem_oth == 0) - return 1; - } else { - if (scp->flowrem_dat == 0) - return 1; - } - } - return 0; -} - -/* - * The DECnet spec requires that the "routing layer" accepts packets which - * are at least 230 bytes in size. This excludes any headers which the NSP - * layer might add, so we always assume that we'll be using the maximal - * length header on data packets. The variation in length is due to the - * inclusion (or not) of the two 16 bit acknowledgement fields so it doesn't - * make much practical difference. - */ -unsigned int dn_mss_from_pmtu(struct net_device *dev, int mtu) -{ - unsigned int mss = 230 - DN_MAX_NSP_DATA_HEADER; - if (dev) { - struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr); - mtu -= LL_RESERVED_SPACE(dev); - if (dn_db->use_long) - mtu -= 21; - else - mtu -= 6; - mtu -= DN_MAX_NSP_DATA_HEADER; - } else { - /* - * 21 = long header, 16 = guess at MAC header length - */ - mtu -= (21 + DN_MAX_NSP_DATA_HEADER + 16); - } - if (mtu > mss) - mss = mtu; - return mss; -} - -static inline unsigned int dn_current_mss(struct sock *sk, int flags) -{ - struct dst_entry *dst = __sk_dst_get(sk); - struct dn_scp *scp = DN_SK(sk); - int mss_now = min_t(int, scp->segsize_loc, scp->segsize_rem); - - /* Other data messages are limited to 16 bytes per packet */ - if (flags & MSG_OOB) - return 16; - - /* This works out the maximum size of segment we can send out */ - if (dst) { - u32 mtu = dst_mtu(dst); - mss_now = min_t(int, dn_mss_from_pmtu(dst->dev, mtu), mss_now); - } - - return mss_now; -} - -/* - * N.B. We get the timeout wrong here, but then we always did get it - * wrong before and this is another step along the road to correcting - * it. It ought to get updated each time we pass through the routine, - * but in practise it probably doesn't matter too much for now. - */ -static inline struct sk_buff *dn_alloc_send_pskb(struct sock *sk, - unsigned long datalen, int noblock, - int *errcode) -{ - struct sk_buff *skb = sock_alloc_send_skb(sk, datalen, - noblock, errcode); - if (skb) { - skb->protocol = htons(ETH_P_DNA_RT); - skb->pkt_type = PACKET_OUTGOING; - } - return skb; -} - -static int dn_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) -{ - struct sock *sk = sock->sk; - struct dn_scp *scp = DN_SK(sk); - size_t mss; - struct sk_buff_head *queue = &scp->data_xmit_queue; - int flags = msg->msg_flags; - int err = 0; - size_t sent = 0; - int addr_len = msg->msg_namelen; - DECLARE_SOCKADDR(struct sockaddr_dn *, addr, msg->msg_name); - struct sk_buff *skb = NULL; - struct dn_skb_cb *cb; - size_t len; - unsigned char fctype; - long timeo; - - if (flags & ~(MSG_TRYHARD|MSG_OOB|MSG_DONTWAIT|MSG_EOR|MSG_NOSIGNAL|MSG_MORE|MSG_CMSG_COMPAT)) - return -EOPNOTSUPP; - - if (addr_len && (addr_len != sizeof(struct sockaddr_dn))) - return -EINVAL; - - lock_sock(sk); - timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); - /* - * The only difference between stream sockets and sequenced packet - * sockets is that the stream sockets always behave as if MSG_EOR - * has been set. - */ - if (sock->type == SOCK_STREAM) { - if (flags & MSG_EOR) { - err = -EINVAL; - goto out; - } - flags |= MSG_EOR; - } - - - err = dn_check_state(sk, addr, addr_len, &timeo, flags); - if (err) - goto out_err; - - if (sk->sk_shutdown & SEND_SHUTDOWN) { - err = -EPIPE; - if (!(flags & MSG_NOSIGNAL)) - send_sig(SIGPIPE, current, 0); - goto out_err; - } - - if ((flags & MSG_TRYHARD) && sk->sk_dst_cache) - dst_negative_advice(sk); - - mss = scp->segsize_rem; - fctype = scp->services_rem & NSP_FC_MASK; - - mss = dn_current_mss(sk, flags); - - if (flags & MSG_OOB) { - queue = &scp->other_xmit_queue; - if (size > mss) { - err = -EMSGSIZE; - goto out; - } - } - - scp->persist_fxn = dn_nsp_xmit_timeout; - - while(sent < size) { - err = sock_error(sk); - if (err) - goto out; - - if (signal_pending(current)) { - err = sock_intr_errno(timeo); - goto out; - } - - /* - * Calculate size that we wish to send. - */ - len = size - sent; - - if (len > mss) - len = mss; - - /* - * Wait for queue size to go down below the window - * size. - */ - if (dn_queue_too_long(scp, queue, flags)) { - DEFINE_WAIT_FUNC(wait, woken_wake_function); - - if (flags & MSG_DONTWAIT) { - err = -EWOULDBLOCK; - goto out; - } - - add_wait_queue(sk_sleep(sk), &wait); - sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); - sk_wait_event(sk, &timeo, - !dn_queue_too_long(scp, queue, flags), &wait); - sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); - remove_wait_queue(sk_sleep(sk), &wait); - continue; - } - - /* - * Get a suitably sized skb. - * 64 is a bit of a hack really, but its larger than any - * link-layer headers and has served us well as a good - * guess as to their real length. - */ - skb = dn_alloc_send_pskb(sk, len + 64 + DN_MAX_NSP_DATA_HEADER, - flags & MSG_DONTWAIT, &err); - - if (err) - break; - - if (!skb) - continue; - - cb = DN_SKB_CB(skb); - - skb_reserve(skb, 64 + DN_MAX_NSP_DATA_HEADER); - - if (memcpy_from_msg(skb_put(skb, len), msg, len)) { - err = -EFAULT; - goto out; - } - - if (flags & MSG_OOB) { - cb->nsp_flags = 0x30; - if (fctype != NSP_FC_NONE) - scp->flowrem_oth--; - } else { - cb->nsp_flags = 0x00; - if (scp->seg_total == 0) - cb->nsp_flags |= 0x20; - - scp->seg_total += len; - - if (((sent + len) == size) && (flags & MSG_EOR)) { - cb->nsp_flags |= 0x40; - scp->seg_total = 0; - if (fctype == NSP_FC_SCMC) - scp->flowrem_dat--; - } - if (fctype == NSP_FC_SRC) - scp->flowrem_dat--; - } - - sent += len; - dn_nsp_queue_xmit(sk, skb, sk->sk_allocation, flags & MSG_OOB); - skb = NULL; - - scp->persist = dn_nsp_persist(sk); - - } -out: - - kfree_skb(skb); - - release_sock(sk); - - return sent ? sent : err; - -out_err: - err = sk_stream_error(sk, flags, err); - release_sock(sk); - return err; -} - -static int dn_device_event(struct notifier_block *this, unsigned long event, - void *ptr) -{ - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - - if (!net_eq(dev_net(dev), &init_net)) - return NOTIFY_DONE; - - switch (event) { - case NETDEV_UP: - dn_dev_up(dev); - break; - case NETDEV_DOWN: - dn_dev_down(dev); - break; - default: - break; - } - - return NOTIFY_DONE; -} - -static struct notifier_block dn_dev_notifier = { - .notifier_call = dn_device_event, -}; - -static struct packet_type dn_dix_packet_type __read_mostly = { - .type = cpu_to_be16(ETH_P_DNA_RT), - .func = dn_route_rcv, -}; - -#ifdef CONFIG_PROC_FS -struct dn_iter_state { - int bucket; -}; - -static struct sock *dn_socket_get_first(struct seq_file *seq) -{ - struct dn_iter_state *state = seq->private; - struct sock *n = NULL; - - for(state->bucket = 0; - state->bucket < DN_SK_HASH_SIZE; - ++state->bucket) { - n = sk_head(&dn_sk_hash[state->bucket]); - if (n) - break; - } - - return n; -} - -static struct sock *dn_socket_get_next(struct seq_file *seq, - struct sock *n) -{ - struct dn_iter_state *state = seq->private; - - n = sk_next(n); - while (!n) { - if (++state->bucket >= DN_SK_HASH_SIZE) - break; - n = sk_head(&dn_sk_hash[state->bucket]); - } - return n; -} - -static struct sock *socket_get_idx(struct seq_file *seq, loff_t *pos) -{ - struct sock *sk = dn_socket_get_first(seq); - - if (sk) { - while(*pos && (sk = dn_socket_get_next(seq, sk))) - --*pos; - } - return *pos ? NULL : sk; -} - -static void *dn_socket_get_idx(struct seq_file *seq, loff_t pos) -{ - void *rc; - read_lock_bh(&dn_hash_lock); - rc = socket_get_idx(seq, &pos); - if (!rc) { - read_unlock_bh(&dn_hash_lock); - } - return rc; -} - -static void *dn_socket_seq_start(struct seq_file *seq, loff_t *pos) -{ - return *pos ? dn_socket_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; -} - -static void *dn_socket_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - void *rc; - - if (v == SEQ_START_TOKEN) { - rc = dn_socket_get_idx(seq, 0); - goto out; - } - - rc = dn_socket_get_next(seq, v); - if (rc) - goto out; - read_unlock_bh(&dn_hash_lock); -out: - ++*pos; - return rc; -} - -static void dn_socket_seq_stop(struct seq_file *seq, void *v) -{ - if (v && v != SEQ_START_TOKEN) - read_unlock_bh(&dn_hash_lock); -} - -#define IS_NOT_PRINTABLE(x) ((x) < 32 || (x) > 126) - -static void dn_printable_object(struct sockaddr_dn *dn, unsigned char *buf) -{ - int i; - - switch (le16_to_cpu(dn->sdn_objnamel)) { - case 0: - sprintf(buf, "%d", dn->sdn_objnum); - break; - default: - for (i = 0; i < le16_to_cpu(dn->sdn_objnamel); i++) { - buf[i] = dn->sdn_objname[i]; - if (IS_NOT_PRINTABLE(buf[i])) - buf[i] = '.'; - } - buf[i] = 0; - } -} - -static char *dn_state2asc(unsigned char state) -{ - switch (state) { - case DN_O: - return "OPEN"; - case DN_CR: - return " CR"; - case DN_DR: - return " DR"; - case DN_DRC: - return " DRC"; - case DN_CC: - return " CC"; - case DN_CI: - return " CI"; - case DN_NR: - return " NR"; - case DN_NC: - return " NC"; - case DN_CD: - return " CD"; - case DN_RJ: - return " RJ"; - case DN_RUN: - return " RUN"; - case DN_DI: - return " DI"; - case DN_DIC: - return " DIC"; - case DN_DN: - return " DN"; - case DN_CL: - return " CL"; - case DN_CN: - return " CN"; - } - - return "????"; -} - -static inline void dn_socket_format_entry(struct seq_file *seq, struct sock *sk) -{ - struct dn_scp *scp = DN_SK(sk); - char buf1[DN_ASCBUF_LEN]; - char buf2[DN_ASCBUF_LEN]; - char local_object[DN_MAXOBJL+3]; - char remote_object[DN_MAXOBJL+3]; - - dn_printable_object(&scp->addr, local_object); - dn_printable_object(&scp->peer, remote_object); - - seq_printf(seq, - "%6s/%04X %04d:%04d %04d:%04d %01d %-16s " - "%6s/%04X %04d:%04d %04d:%04d %01d %-16s %4s %s\n", - dn_addr2asc(le16_to_cpu(dn_saddr2dn(&scp->addr)), buf1), - scp->addrloc, - scp->numdat, - scp->numoth, - scp->ackxmt_dat, - scp->ackxmt_oth, - scp->flowloc_sw, - local_object, - dn_addr2asc(le16_to_cpu(dn_saddr2dn(&scp->peer)), buf2), - scp->addrrem, - scp->numdat_rcv, - scp->numoth_rcv, - scp->ackrcv_dat, - scp->ackrcv_oth, - scp->flowrem_sw, - remote_object, - dn_state2asc(scp->state), - ((scp->accept_mode == ACC_IMMED) ? "IMMED" : "DEFER")); -} - -static int dn_socket_seq_show(struct seq_file *seq, void *v) -{ - if (v == SEQ_START_TOKEN) { - seq_puts(seq, "Local Remote\n"); - } else { - dn_socket_format_entry(seq, v); - } - return 0; -} - -static const struct seq_operations dn_socket_seq_ops = { - .start = dn_socket_seq_start, - .next = dn_socket_seq_next, - .stop = dn_socket_seq_stop, - .show = dn_socket_seq_show, -}; -#endif - -static const struct net_proto_family dn_family_ops = { - .family = AF_DECnet, - .create = dn_create, - .owner = THIS_MODULE, -}; - -static const struct proto_ops dn_proto_ops = { - .family = AF_DECnet, - .owner = THIS_MODULE, - .release = dn_release, - .bind = dn_bind, - .connect = dn_connect, - .socketpair = sock_no_socketpair, - .accept = dn_accept, - .getname = dn_getname, - .poll = dn_poll, - .ioctl = dn_ioctl, - .listen = dn_listen, - .shutdown = dn_shutdown, - .setsockopt = dn_setsockopt, - .getsockopt = dn_getsockopt, - .sendmsg = dn_sendmsg, - .recvmsg = dn_recvmsg, - .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, -}; - -MODULE_DESCRIPTION("The Linux DECnet Network Protocol"); -MODULE_AUTHOR("Linux DECnet Project Team"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_NETPROTO(PF_DECnet); - -static const char banner[] __initconst = KERN_INFO -"NET4: DECnet for Linux: V.2.5.68s (C) 1995-2003 Linux DECnet Project Team\n"; - -static int __init decnet_init(void) -{ - int rc; - - printk(banner); - - rc = proto_register(&dn_proto, 1); - if (rc != 0) - goto out; - - dn_neigh_init(); - dn_dev_init(); - dn_route_init(); - dn_fib_init(); - - sock_register(&dn_family_ops); - dev_add_pack(&dn_dix_packet_type); - register_netdevice_notifier(&dn_dev_notifier); - - proc_create_seq_private("decnet", 0444, init_net.proc_net, - &dn_socket_seq_ops, sizeof(struct dn_iter_state), - NULL); - dn_register_sysctl(); -out: - return rc; - -} -module_init(decnet_init); - -/* - * Prevent DECnet module unloading until its fixed properly. - * Requires an audit of the code to check for memory leaks and - * initialisation problems etc. - */ -#if 0 -static void __exit decnet_exit(void) -{ - sock_unregister(AF_DECnet); - rtnl_unregister_all(PF_DECnet); - dev_remove_pack(&dn_dix_packet_type); - - dn_unregister_sysctl(); - - unregister_netdevice_notifier(&dn_dev_notifier); - - dn_route_cleanup(); - dn_dev_cleanup(); - dn_neigh_cleanup(); - dn_fib_cleanup(); - - remove_proc_entry("decnet", init_net.proc_net); - - proto_unregister(&dn_proto); - - rcu_barrier(); /* Wait for completion of call_rcu()'s */ -} -module_exit(decnet_exit); -#endif diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c deleted file mode 100644 index a09ba642b5e7..000000000000 --- a/net/decnet/dn_dev.c +++ /dev/null @@ -1,1433 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * DECnet An implementation of the DECnet protocol suite for the LINUX - * operating system. DECnet is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * DECnet Device Layer - * - * Authors: Steve Whitehouse - * Eduardo Marcelo Serrat - * - * Changes: - * Steve Whitehouse : Devices now see incoming frames so they - * can mark on who it came from. - * Steve Whitehouse : Fixed bug in creating neighbours. Each neighbour - * can now have a device specific setup func. - * Steve Whitehouse : Added /proc/sys/net/decnet/conf// - * Steve Whitehouse : Fixed bug which sometimes killed timer - * Steve Whitehouse : Multiple ifaddr support - * Steve Whitehouse : SIOCGIFCONF is now a compile time option - * Steve Whitehouse : /proc/sys/net/decnet/conf//forwarding - * Steve Whitehouse : Removed timer1 - it's a user space issue now - * Patrick Caulfield : Fixed router hello message format - * Steve Whitehouse : Got rid of constant sizes for blksize for - * devices. All mtu based now. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define DN_IFREQ_SIZE (offsetof(struct ifreq, ifr_ifru) + sizeof(struct sockaddr_dn)) - -static char dn_rt_all_end_mcast[ETH_ALEN] = {0xAB,0x00,0x00,0x04,0x00,0x00}; -static char dn_rt_all_rt_mcast[ETH_ALEN] = {0xAB,0x00,0x00,0x03,0x00,0x00}; -static char dn_hiord[ETH_ALEN] = {0xAA,0x00,0x04,0x00,0x00,0x00}; -static unsigned char dn_eco_version[3] = {0x02,0x00,0x00}; - -extern struct neigh_table dn_neigh_table; - -/* - * decnet_address is kept in network order. - */ -__le16 decnet_address = 0; - -static DEFINE_SPINLOCK(dndev_lock); -static struct net_device *decnet_default_device; -static BLOCKING_NOTIFIER_HEAD(dnaddr_chain); - -static struct dn_dev *dn_dev_create(struct net_device *dev, int *err); -static void dn_dev_delete(struct net_device *dev); -static void dn_ifaddr_notify(int event, struct dn_ifaddr *ifa); - -static int dn_eth_up(struct net_device *); -static void dn_eth_down(struct net_device *); -static void dn_send_brd_hello(struct net_device *dev, struct dn_ifaddr *ifa); -static void dn_send_ptp_hello(struct net_device *dev, struct dn_ifaddr *ifa); - -static struct dn_dev_parms dn_dev_list[] = { -{ - .type = ARPHRD_ETHER, /* Ethernet */ - .mode = DN_DEV_BCAST, - .state = DN_DEV_S_RU, - .t2 = 1, - .t3 = 10, - .name = "ethernet", - .up = dn_eth_up, - .down = dn_eth_down, - .timer3 = dn_send_brd_hello, -}, -{ - .type = ARPHRD_IPGRE, /* DECnet tunneled over GRE in IP */ - .mode = DN_DEV_BCAST, - .state = DN_DEV_S_RU, - .t2 = 1, - .t3 = 10, - .name = "ipgre", - .timer3 = dn_send_brd_hello, -}, -#if 0 -{ - .type = ARPHRD_X25, /* Bog standard X.25 */ - .mode = DN_DEV_UCAST, - .state = DN_DEV_S_DS, - .t2 = 1, - .t3 = 120, - .name = "x25", - .timer3 = dn_send_ptp_hello, -}, -#endif -#if 0 -{ - .type = ARPHRD_PPP, /* DECnet over PPP */ - .mode = DN_DEV_BCAST, - .state = DN_DEV_S_RU, - .t2 = 1, - .t3 = 10, - .name = "ppp", - .timer3 = dn_send_brd_hello, -}, -#endif -{ - .type = ARPHRD_DDCMP, /* DECnet over DDCMP */ - .mode = DN_DEV_UCAST, - .state = DN_DEV_S_DS, - .t2 = 1, - .t3 = 120, - .name = "ddcmp", - .timer3 = dn_send_ptp_hello, -}, -{ - .type = ARPHRD_LOOPBACK, /* Loopback interface - always last */ - .mode = DN_DEV_BCAST, - .state = DN_DEV_S_RU, - .t2 = 1, - .t3 = 10, - .name = "loopback", - .timer3 = dn_send_brd_hello, -} -}; - -#define DN_DEV_LIST_SIZE ARRAY_SIZE(dn_dev_list) - -#define DN_DEV_PARMS_OFFSET(x) offsetof(struct dn_dev_parms, x) - -#ifdef CONFIG_SYSCTL - -static int min_t2[] = { 1 }; -static int max_t2[] = { 60 }; /* No max specified, but this seems sensible */ -static int min_t3[] = { 1 }; -static int max_t3[] = { 8191 }; /* Must fit in 16 bits when multiplied by BCT3MULT or T3MULT */ - -static int min_priority[1]; -static int max_priority[] = { 127 }; /* From DECnet spec */ - -static int dn_forwarding_proc(struct ctl_table *, int, void *, size_t *, - loff_t *); -static struct dn_dev_sysctl_table { - struct ctl_table_header *sysctl_header; - struct ctl_table dn_dev_vars[5]; -} dn_dev_sysctl = { - NULL, - { - { - .procname = "forwarding", - .data = (void *)DN_DEV_PARMS_OFFSET(forwarding), - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = dn_forwarding_proc, - }, - { - .procname = "priority", - .data = (void *)DN_DEV_PARMS_OFFSET(priority), - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_priority, - .extra2 = &max_priority - }, - { - .procname = "t2", - .data = (void *)DN_DEV_PARMS_OFFSET(t2), - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_t2, - .extra2 = &max_t2 - }, - { - .procname = "t3", - .data = (void *)DN_DEV_PARMS_OFFSET(t3), - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_t3, - .extra2 = &max_t3 - }, - { } - }, -}; - -static void dn_dev_sysctl_register(struct net_device *dev, struct dn_dev_parms *parms) -{ - struct dn_dev_sysctl_table *t; - int i; - - char path[sizeof("net/decnet/conf/") + IFNAMSIZ]; - - t = kmemdup(&dn_dev_sysctl, sizeof(*t), GFP_KERNEL); - if (t == NULL) - return; - - for(i = 0; i < ARRAY_SIZE(t->dn_dev_vars) - 1; i++) { - long offset = (long)t->dn_dev_vars[i].data; - t->dn_dev_vars[i].data = ((char *)parms) + offset; - } - - snprintf(path, sizeof(path), "net/decnet/conf/%s", - dev? dev->name : parms->name); - - t->dn_dev_vars[0].extra1 = (void *)dev; - - t->sysctl_header = register_net_sysctl(&init_net, path, t->dn_dev_vars); - if (t->sysctl_header == NULL) - kfree(t); - else - parms->sysctl = t; -} - -static void dn_dev_sysctl_unregister(struct dn_dev_parms *parms) -{ - if (parms->sysctl) { - struct dn_dev_sysctl_table *t = parms->sysctl; - parms->sysctl = NULL; - unregister_net_sysctl_table(t->sysctl_header); - kfree(t); - } -} - -static int dn_forwarding_proc(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ -#ifdef CONFIG_DECNET_ROUTER - struct net_device *dev = table->extra1; - struct dn_dev *dn_db; - int err; - int tmp, old; - - if (table->extra1 == NULL) - return -EINVAL; - - dn_db = rcu_dereference_raw(dev->dn_ptr); - old = dn_db->parms.forwarding; - - err = proc_dointvec(table, write, buffer, lenp, ppos); - - if ((err >= 0) && write) { - if (dn_db->parms.forwarding < 0) - dn_db->parms.forwarding = 0; - if (dn_db->parms.forwarding > 2) - dn_db->parms.forwarding = 2; - /* - * What an ugly hack this is... its works, just. It - * would be nice if sysctl/proc were just that little - * bit more flexible so I don't have to write a special - * routine, or suffer hacks like this - SJW - */ - tmp = dn_db->parms.forwarding; - dn_db->parms.forwarding = old; - if (dn_db->parms.down) - dn_db->parms.down(dev); - dn_db->parms.forwarding = tmp; - if (dn_db->parms.up) - dn_db->parms.up(dev); - } - - return err; -#else - return -EINVAL; -#endif -} - -#else /* CONFIG_SYSCTL */ -static void dn_dev_sysctl_unregister(struct dn_dev_parms *parms) -{ -} -static void dn_dev_sysctl_register(struct net_device *dev, struct dn_dev_parms *parms) -{ -} - -#endif /* CONFIG_SYSCTL */ - -static inline __u16 mtu2blksize(struct net_device *dev) -{ - u32 blksize = dev->mtu; - if (blksize > 0xffff) - blksize = 0xffff; - - if (dev->type == ARPHRD_ETHER || - dev->type == ARPHRD_PPP || - dev->type == ARPHRD_IPGRE || - dev->type == ARPHRD_LOOPBACK) - blksize -= 2; - - return (__u16)blksize; -} - -static struct dn_ifaddr *dn_dev_alloc_ifa(void) -{ - struct dn_ifaddr *ifa; - - ifa = kzalloc(sizeof(*ifa), GFP_KERNEL); - - return ifa; -} - -static void dn_dev_free_ifa(struct dn_ifaddr *ifa) -{ - kfree_rcu(ifa, rcu); -} - -static void dn_dev_del_ifa(struct dn_dev *dn_db, struct dn_ifaddr __rcu **ifap, int destroy) -{ - struct dn_ifaddr *ifa1 = rtnl_dereference(*ifap); - unsigned char mac_addr[6]; - struct net_device *dev = dn_db->dev; - - ASSERT_RTNL(); - - *ifap = ifa1->ifa_next; - - if (dn_db->dev->type == ARPHRD_ETHER) { - if (ifa1->ifa_local != dn_eth2dn(dev->dev_addr)) { - dn_dn2eth(mac_addr, ifa1->ifa_local); - dev_mc_del(dev, mac_addr); - } - } - - dn_ifaddr_notify(RTM_DELADDR, ifa1); - blocking_notifier_call_chain(&dnaddr_chain, NETDEV_DOWN, ifa1); - if (destroy) { - dn_dev_free_ifa(ifa1); - - if (dn_db->ifa_list == NULL) - dn_dev_delete(dn_db->dev); - } -} - -static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa) -{ - struct net_device *dev = dn_db->dev; - struct dn_ifaddr *ifa1; - unsigned char mac_addr[6]; - - ASSERT_RTNL(); - - /* Check for duplicates */ - for (ifa1 = rtnl_dereference(dn_db->ifa_list); - ifa1 != NULL; - ifa1 = rtnl_dereference(ifa1->ifa_next)) { - if (ifa1->ifa_local == ifa->ifa_local) - return -EEXIST; - } - - if (dev->type == ARPHRD_ETHER) { - if (ifa->ifa_local != dn_eth2dn(dev->dev_addr)) { - dn_dn2eth(mac_addr, ifa->ifa_local); - dev_mc_add(dev, mac_addr); - } - } - - ifa->ifa_next = dn_db->ifa_list; - rcu_assign_pointer(dn_db->ifa_list, ifa); - - dn_ifaddr_notify(RTM_NEWADDR, ifa); - blocking_notifier_call_chain(&dnaddr_chain, NETDEV_UP, ifa); - - return 0; -} - -static int dn_dev_set_ifa(struct net_device *dev, struct dn_ifaddr *ifa) -{ - struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr); - int rv; - - if (dn_db == NULL) { - int err; - dn_db = dn_dev_create(dev, &err); - if (dn_db == NULL) - return err; - } - - ifa->ifa_dev = dn_db; - - if (dev->flags & IFF_LOOPBACK) - ifa->ifa_scope = RT_SCOPE_HOST; - - rv = dn_dev_insert_ifa(dn_db, ifa); - if (rv) - dn_dev_free_ifa(ifa); - return rv; -} - - -int dn_dev_ioctl(unsigned int cmd, void __user *arg) -{ - char buffer[DN_IFREQ_SIZE]; - struct ifreq *ifr = (struct ifreq *)buffer; - struct sockaddr_dn *sdn = (struct sockaddr_dn *)&ifr->ifr_addr; - struct dn_dev *dn_db; - struct net_device *dev; - struct dn_ifaddr *ifa = NULL; - struct dn_ifaddr __rcu **ifap = NULL; - int ret = 0; - - if (copy_from_user(ifr, arg, DN_IFREQ_SIZE)) - return -EFAULT; - ifr->ifr_name[IFNAMSIZ-1] = 0; - - dev_load(&init_net, ifr->ifr_name); - - switch (cmd) { - case SIOCGIFADDR: - break; - case SIOCSIFADDR: - if (!capable(CAP_NET_ADMIN)) - return -EACCES; - if (sdn->sdn_family != AF_DECnet) - return -EINVAL; - break; - default: - return -EINVAL; - } - - rtnl_lock(); - - if ((dev = __dev_get_by_name(&init_net, ifr->ifr_name)) == NULL) { - ret = -ENODEV; - goto done; - } - - if ((dn_db = rtnl_dereference(dev->dn_ptr)) != NULL) { - for (ifap = &dn_db->ifa_list; - (ifa = rtnl_dereference(*ifap)) != NULL; - ifap = &ifa->ifa_next) - if (strcmp(ifr->ifr_name, ifa->ifa_label) == 0) - break; - } - - if (ifa == NULL && cmd != SIOCSIFADDR) { - ret = -EADDRNOTAVAIL; - goto done; - } - - switch (cmd) { - case SIOCGIFADDR: - *((__le16 *)sdn->sdn_nodeaddr) = ifa->ifa_local; - if (copy_to_user(arg, ifr, DN_IFREQ_SIZE)) - ret = -EFAULT; - break; - - case SIOCSIFADDR: - if (!ifa) { - if ((ifa = dn_dev_alloc_ifa()) == NULL) { - ret = -ENOBUFS; - break; - } - memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); - } else { - if (ifa->ifa_local == dn_saddr2dn(sdn)) - break; - dn_dev_del_ifa(dn_db, ifap, 0); - } - - ifa->ifa_local = ifa->ifa_address = dn_saddr2dn(sdn); - - ret = dn_dev_set_ifa(dev, ifa); - } -done: - rtnl_unlock(); - - return ret; -} - -struct net_device *dn_dev_get_default(void) -{ - struct net_device *dev; - - spin_lock(&dndev_lock); - dev = decnet_default_device; - if (dev) { - if (dev->dn_ptr) - dev_hold(dev); - else - dev = NULL; - } - spin_unlock(&dndev_lock); - - return dev; -} - -int dn_dev_set_default(struct net_device *dev, int force) -{ - struct net_device *old = NULL; - int rv = -EBUSY; - if (!dev->dn_ptr) - return -ENODEV; - - spin_lock(&dndev_lock); - if (force || decnet_default_device == NULL) { - old = decnet_default_device; - decnet_default_device = dev; - rv = 0; - } - spin_unlock(&dndev_lock); - - dev_put(old); - return rv; -} - -static void dn_dev_check_default(struct net_device *dev) -{ - spin_lock(&dndev_lock); - if (dev == decnet_default_device) { - decnet_default_device = NULL; - } else { - dev = NULL; - } - spin_unlock(&dndev_lock); - - dev_put(dev); -} - -/* - * Called with RTNL - */ -static struct dn_dev *dn_dev_by_index(int ifindex) -{ - struct net_device *dev; - struct dn_dev *dn_dev = NULL; - - dev = __dev_get_by_index(&init_net, ifindex); - if (dev) - dn_dev = rtnl_dereference(dev->dn_ptr); - - return dn_dev; -} - -static const struct nla_policy dn_ifa_policy[IFA_MAX+1] = { - [IFA_ADDRESS] = { .type = NLA_U16 }, - [IFA_LOCAL] = { .type = NLA_U16 }, - [IFA_LABEL] = { .type = NLA_STRING, - .len = IFNAMSIZ - 1 }, - [IFA_FLAGS] = { .type = NLA_U32 }, -}; - -static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack) -{ - struct net *net = sock_net(skb->sk); - struct nlattr *tb[IFA_MAX+1]; - struct dn_dev *dn_db; - struct ifaddrmsg *ifm; - struct dn_ifaddr *ifa; - struct dn_ifaddr __rcu **ifap; - int err = -EINVAL; - - if (!netlink_capable(skb, CAP_NET_ADMIN)) - return -EPERM; - - if (!net_eq(net, &init_net)) - goto errout; - - err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX, - dn_ifa_policy, extack); - if (err < 0) - goto errout; - - err = -ENODEV; - ifm = nlmsg_data(nlh); - if ((dn_db = dn_dev_by_index(ifm->ifa_index)) == NULL) - goto errout; - - err = -EADDRNOTAVAIL; - for (ifap = &dn_db->ifa_list; - (ifa = rtnl_dereference(*ifap)) != NULL; - ifap = &ifa->ifa_next) { - if (tb[IFA_LOCAL] && - nla_memcmp(tb[IFA_LOCAL], &ifa->ifa_local, 2)) - continue; - - if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label)) - continue; - - dn_dev_del_ifa(dn_db, ifap, 1); - return 0; - } - -errout: - return err; -} - -static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack) -{ - struct net *net = sock_net(skb->sk); - struct nlattr *tb[IFA_MAX+1]; - struct net_device *dev; - struct dn_dev *dn_db; - struct ifaddrmsg *ifm; - struct dn_ifaddr *ifa; - int err; - - if (!netlink_capable(skb, CAP_NET_ADMIN)) - return -EPERM; - - if (!net_eq(net, &init_net)) - return -EINVAL; - - err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX, - dn_ifa_policy, extack); - if (err < 0) - return err; - - if (tb[IFA_LOCAL] == NULL) - return -EINVAL; - - ifm = nlmsg_data(nlh); - if ((dev = __dev_get_by_index(&init_net, ifm->ifa_index)) == NULL) - return -ENODEV; - - if ((dn_db = rtnl_dereference(dev->dn_ptr)) == NULL) { - dn_db = dn_dev_create(dev, &err); - if (!dn_db) - return err; - } - - if ((ifa = dn_dev_alloc_ifa()) == NULL) - return -ENOBUFS; - - if (tb[IFA_ADDRESS] == NULL) - tb[IFA_ADDRESS] = tb[IFA_LOCAL]; - - ifa->ifa_local = nla_get_le16(tb[IFA_LOCAL]); - ifa->ifa_address = nla_get_le16(tb[IFA_ADDRESS]); - ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : - ifm->ifa_flags; - ifa->ifa_scope = ifm->ifa_scope; - ifa->ifa_dev = dn_db; - - if (tb[IFA_LABEL]) - nla_strscpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ); - else - memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); - - err = dn_dev_insert_ifa(dn_db, ifa); - if (err) - dn_dev_free_ifa(ifa); - - return err; -} - -static inline size_t dn_ifaddr_nlmsg_size(void) -{ - return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) - + nla_total_size(IFNAMSIZ) /* IFA_LABEL */ - + nla_total_size(2) /* IFA_ADDRESS */ - + nla_total_size(2) /* IFA_LOCAL */ - + nla_total_size(4); /* IFA_FLAGS */ -} - -static int dn_nl_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa, - u32 portid, u32 seq, int event, unsigned int flags) -{ - struct ifaddrmsg *ifm; - struct nlmsghdr *nlh; - u32 ifa_flags = ifa->ifa_flags | IFA_F_PERMANENT; - - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags); - if (nlh == NULL) - return -EMSGSIZE; - - ifm = nlmsg_data(nlh); - ifm->ifa_family = AF_DECnet; - ifm->ifa_prefixlen = 16; - ifm->ifa_flags = ifa_flags; - ifm->ifa_scope = ifa->ifa_scope; - ifm->ifa_index = ifa->ifa_dev->dev->ifindex; - - if ((ifa->ifa_address && - nla_put_le16(skb, IFA_ADDRESS, ifa->ifa_address)) || - (ifa->ifa_local && - nla_put_le16(skb, IFA_LOCAL, ifa->ifa_local)) || - (ifa->ifa_label[0] && - nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) || - nla_put_u32(skb, IFA_FLAGS, ifa_flags)) - goto nla_put_failure; - nlmsg_end(skb, nlh); - return 0; - -nla_put_failure: - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; -} - -static void dn_ifaddr_notify(int event, struct dn_ifaddr *ifa) -{ - struct sk_buff *skb; - int err = -ENOBUFS; - - skb = alloc_skb(dn_ifaddr_nlmsg_size(), GFP_KERNEL); - if (skb == NULL) - goto errout; - - err = dn_nl_fill_ifaddr(skb, ifa, 0, 0, event, 0); - if (err < 0) { - /* -EMSGSIZE implies BUG in dn_ifaddr_nlmsg_size() */ - WARN_ON(err == -EMSGSIZE); - kfree_skb(skb); - goto errout; - } - rtnl_notify(skb, &init_net, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL); - return; -errout: - if (err < 0) - rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_IFADDR, err); -} - -static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) -{ - struct net *net = sock_net(skb->sk); - int idx, dn_idx = 0, skip_ndevs, skip_naddr; - struct net_device *dev; - struct dn_dev *dn_db; - struct dn_ifaddr *ifa; - - if (!net_eq(net, &init_net)) - return 0; - - skip_ndevs = cb->args[0]; - skip_naddr = cb->args[1]; - - idx = 0; - rcu_read_lock(); - for_each_netdev_rcu(&init_net, dev) { - if (idx < skip_ndevs) - goto cont; - else if (idx > skip_ndevs) { - /* Only skip over addresses for first dev dumped - * in this iteration (idx == skip_ndevs) */ - skip_naddr = 0; - } - - if ((dn_db = rcu_dereference(dev->dn_ptr)) == NULL) - goto cont; - - for (ifa = rcu_dereference(dn_db->ifa_list), dn_idx = 0; ifa; - ifa = rcu_dereference(ifa->ifa_next), dn_idx++) { - if (dn_idx < skip_naddr) - continue; - - if (dn_nl_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, RTM_NEWADDR, - NLM_F_MULTI) < 0) - goto done; - } -cont: - idx++; - } -done: - rcu_read_unlock(); - cb->args[0] = idx; - cb->args[1] = dn_idx; - - return skb->len; -} - -static int dn_dev_get_first(struct net_device *dev, __le16 *addr) -{ - struct dn_dev *dn_db; - struct dn_ifaddr *ifa; - int rv = -ENODEV; - - rcu_read_lock(); - dn_db = rcu_dereference(dev->dn_ptr); - if (dn_db == NULL) - goto out; - - ifa = rcu_dereference(dn_db->ifa_list); - if (ifa != NULL) { - *addr = ifa->ifa_local; - rv = 0; - } -out: - rcu_read_unlock(); - return rv; -} - -/* - * Find a default address to bind to. - * - * This is one of those areas where the initial VMS concepts don't really - * map onto the Linux concepts, and since we introduced multiple addresses - * per interface we have to cope with slightly odd ways of finding out what - * "our address" really is. Mostly it's not a problem; for this we just guess - * a sensible default. Eventually the routing code will take care of all the - * nasties for us I hope. - */ -int dn_dev_bind_default(__le16 *addr) -{ - struct net_device *dev; - int rv; - dev = dn_dev_get_default(); -last_chance: - if (dev) { - rv = dn_dev_get_first(dev, addr); - dev_put(dev); - if (rv == 0 || dev == init_net.loopback_dev) - return rv; - } - dev = init_net.loopback_dev; - dev_hold(dev); - goto last_chance; -} - -static void dn_send_endnode_hello(struct net_device *dev, struct dn_ifaddr *ifa) -{ - struct endnode_hello_message *msg; - struct sk_buff *skb = NULL; - __le16 *pktlen; - struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr); - - if ((skb = dn_alloc_skb(NULL, sizeof(*msg), GFP_ATOMIC)) == NULL) - return; - - skb->dev = dev; - - msg = skb_put(skb, sizeof(*msg)); - - msg->msgflg = 0x0D; - memcpy(msg->tiver, dn_eco_version, 3); - dn_dn2eth(msg->id, ifa->ifa_local); - msg->iinfo = DN_RT_INFO_ENDN; - msg->blksize = cpu_to_le16(mtu2blksize(dev)); - msg->area = 0x00; - memset(msg->seed, 0, 8); - memcpy(msg->neighbor, dn_hiord, ETH_ALEN); - - if (dn_db->router) { - struct dn_neigh *dn = container_of(dn_db->router, struct dn_neigh, n); - dn_dn2eth(msg->neighbor, dn->addr); - } - - msg->timer = cpu_to_le16((unsigned short)dn_db->parms.t3); - msg->mpd = 0x00; - msg->datalen = 0x02; - memset(msg->data, 0xAA, 2); - - pktlen = skb_push(skb, 2); - *pktlen = cpu_to_le16(skb->len - 2); - - skb_reset_network_header(skb); - - dn_rt_finish_output(skb, dn_rt_all_rt_mcast, msg->id); -} - - -#define DRDELAY (5 * HZ) - -static int dn_am_i_a_router(struct dn_neigh *dn, struct dn_dev *dn_db, struct dn_ifaddr *ifa) -{ - /* First check time since device went up */ - if (time_before(jiffies, dn_db->uptime + DRDELAY)) - return 0; - - /* If there is no router, then yes... */ - if (!dn_db->router) - return 1; - - /* otherwise only if we have a higher priority or.. */ - if (dn->priority < dn_db->parms.priority) - return 1; - - /* if we have equal priority and a higher node number */ - if (dn->priority != dn_db->parms.priority) - return 0; - - if (le16_to_cpu(dn->addr) < le16_to_cpu(ifa->ifa_local)) - return 1; - - return 0; -} - -static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa) -{ - int n; - struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr); - struct dn_neigh *dn = container_of(dn_db->router, struct dn_neigh, n); - struct sk_buff *skb; - size_t size; - unsigned char *ptr; - unsigned char *i1, *i2; - __le16 *pktlen; - char *src; - - if (mtu2blksize(dev) < (26 + 7)) - return; - - n = mtu2blksize(dev) - 26; - n /= 7; - - if (n > 32) - n = 32; - - size = 2 + 26 + 7 * n; - - if ((skb = dn_alloc_skb(NULL, size, GFP_ATOMIC)) == NULL) - return; - - skb->dev = dev; - ptr = skb_put(skb, size); - - *ptr++ = DN_RT_PKT_CNTL | DN_RT_PKT_ERTH; - *ptr++ = 2; /* ECO */ - *ptr++ = 0; - *ptr++ = 0; - dn_dn2eth(ptr, ifa->ifa_local); - src = ptr; - ptr += ETH_ALEN; - *ptr++ = dn_db->parms.forwarding == 1 ? - DN_RT_INFO_L1RT : DN_RT_INFO_L2RT; - *((__le16 *)ptr) = cpu_to_le16(mtu2blksize(dev)); - ptr += 2; - *ptr++ = dn_db->parms.priority; /* Priority */ - *ptr++ = 0; /* Area: Reserved */ - *((__le16 *)ptr) = cpu_to_le16((unsigned short)dn_db->parms.t3); - ptr += 2; - *ptr++ = 0; /* MPD: Reserved */ - i1 = ptr++; - memset(ptr, 0, 7); /* Name: Reserved */ - ptr += 7; - i2 = ptr++; - - n = dn_neigh_elist(dev, ptr, n); - - *i2 = 7 * n; - *i1 = 8 + *i2; - - skb_trim(skb, (27 + *i2)); - - pktlen = skb_push(skb, 2); - *pktlen = cpu_to_le16(skb->len - 2); - - skb_reset_network_header(skb); - - if (dn_am_i_a_router(dn, dn_db, ifa)) { - struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC); - if (skb2) { - dn_rt_finish_output(skb2, dn_rt_all_end_mcast, src); - } - } - - dn_rt_finish_output(skb, dn_rt_all_rt_mcast, src); -} - -static void dn_send_brd_hello(struct net_device *dev, struct dn_ifaddr *ifa) -{ - struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr); - - if (dn_db->parms.forwarding == 0) - dn_send_endnode_hello(dev, ifa); - else - dn_send_router_hello(dev, ifa); -} - -static void dn_send_ptp_hello(struct net_device *dev, struct dn_ifaddr *ifa) -{ - int tdlen = 16; - int size = dev->hard_header_len + 2 + 4 + tdlen; - struct sk_buff *skb = dn_alloc_skb(NULL, size, GFP_ATOMIC); - int i; - unsigned char *ptr; - char src[ETH_ALEN]; - - if (skb == NULL) - return ; - - skb->dev = dev; - skb_push(skb, dev->hard_header_len); - ptr = skb_put(skb, 2 + 4 + tdlen); - - *ptr++ = DN_RT_PKT_HELO; - *((__le16 *)ptr) = ifa->ifa_local; - ptr += 2; - *ptr++ = tdlen; - - for(i = 0; i < tdlen; i++) - *ptr++ = 0252; - - dn_dn2eth(src, ifa->ifa_local); - dn_rt_finish_output(skb, dn_rt_all_rt_mcast, src); -} - -static int dn_eth_up(struct net_device *dev) -{ - struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr); - - if (dn_db->parms.forwarding == 0) - dev_mc_add(dev, dn_rt_all_end_mcast); - else - dev_mc_add(dev, dn_rt_all_rt_mcast); - - dn_db->use_long = 1; - - return 0; -} - -static void dn_eth_down(struct net_device *dev) -{ - struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr); - - if (dn_db->parms.forwarding == 0) - dev_mc_del(dev, dn_rt_all_end_mcast); - else - dev_mc_del(dev, dn_rt_all_rt_mcast); -} - -static void dn_dev_set_timer(struct net_device *dev); - -static void dn_dev_timer_func(struct timer_list *t) -{ - struct dn_dev *dn_db = from_timer(dn_db, t, timer); - struct net_device *dev; - struct dn_ifaddr *ifa; - - rcu_read_lock(); - dev = dn_db->dev; - if (dn_db->t3 <= dn_db->parms.t2) { - if (dn_db->parms.timer3) { - for (ifa = rcu_dereference(dn_db->ifa_list); - ifa; - ifa = rcu_dereference(ifa->ifa_next)) { - if (!(ifa->ifa_flags & IFA_F_SECONDARY)) - dn_db->parms.timer3(dev, ifa); - } - } - dn_db->t3 = dn_db->parms.t3; - } else { - dn_db->t3 -= dn_db->parms.t2; - } - rcu_read_unlock(); - dn_dev_set_timer(dev); -} - -static void dn_dev_set_timer(struct net_device *dev) -{ - struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr); - - if (dn_db->parms.t2 > dn_db->parms.t3) - dn_db->parms.t2 = dn_db->parms.t3; - - dn_db->timer.expires = jiffies + (dn_db->parms.t2 * HZ); - - add_timer(&dn_db->timer); -} - -static struct dn_dev *dn_dev_create(struct net_device *dev, int *err) -{ - int i; - struct dn_dev_parms *p = dn_dev_list; - struct dn_dev *dn_db; - - for(i = 0; i < DN_DEV_LIST_SIZE; i++, p++) { - if (p->type == dev->type) - break; - } - - *err = -ENODEV; - if (i == DN_DEV_LIST_SIZE) - return NULL; - - *err = -ENOBUFS; - if ((dn_db = kzalloc(sizeof(struct dn_dev), GFP_ATOMIC)) == NULL) - return NULL; - - memcpy(&dn_db->parms, p, sizeof(struct dn_dev_parms)); - - rcu_assign_pointer(dev->dn_ptr, dn_db); - dn_db->dev = dev; - timer_setup(&dn_db->timer, dn_dev_timer_func, 0); - - dn_db->uptime = jiffies; - - dn_db->neigh_parms = neigh_parms_alloc(dev, &dn_neigh_table); - if (!dn_db->neigh_parms) { - RCU_INIT_POINTER(dev->dn_ptr, NULL); - kfree(dn_db); - return NULL; - } - - if (dn_db->parms.up) { - if (dn_db->parms.up(dev) < 0) { - neigh_parms_release(&dn_neigh_table, dn_db->neigh_parms); - dev->dn_ptr = NULL; - kfree(dn_db); - return NULL; - } - } - - dn_dev_sysctl_register(dev, &dn_db->parms); - - dn_dev_set_timer(dev); - - *err = 0; - return dn_db; -} - - -/* - * This processes a device up event. We only start up - * the loopback device & ethernet devices with correct - * MAC addresses automatically. Others must be started - * specifically. - * - * FIXME: How should we configure the loopback address ? If we could dispense - * with using decnet_address here and for autobind, it will be one less thing - * for users to worry about setting up. - */ - -void dn_dev_up(struct net_device *dev) -{ - struct dn_ifaddr *ifa; - __le16 addr = decnet_address; - int maybe_default = 0; - struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr); - - if ((dev->type != ARPHRD_ETHER) && (dev->type != ARPHRD_LOOPBACK)) - return; - - /* - * Need to ensure that loopback device has a dn_db attached to it - * to allow creation of neighbours against it, even though it might - * not have a local address of its own. Might as well do the same for - * all autoconfigured interfaces. - */ - if (dn_db == NULL) { - int err; - dn_db = dn_dev_create(dev, &err); - if (dn_db == NULL) - return; - } - - if (dev->type == ARPHRD_ETHER) { - if (memcmp(dev->dev_addr, dn_hiord, 4) != 0) - return; - addr = dn_eth2dn(dev->dev_addr); - maybe_default = 1; - } - - if (addr == 0) - return; - - if ((ifa = dn_dev_alloc_ifa()) == NULL) - return; - - ifa->ifa_local = ifa->ifa_address = addr; - ifa->ifa_flags = 0; - ifa->ifa_scope = RT_SCOPE_UNIVERSE; - strcpy(ifa->ifa_label, dev->name); - - dn_dev_set_ifa(dev, ifa); - - /* - * Automagically set the default device to the first automatically - * configured ethernet card in the system. - */ - if (maybe_default) { - dev_hold(dev); - if (dn_dev_set_default(dev, 0)) - dev_put(dev); - } -} - -static void dn_dev_delete(struct net_device *dev) -{ - struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr); - - if (dn_db == NULL) - return; - - del_timer_sync(&dn_db->timer); - dn_dev_sysctl_unregister(&dn_db->parms); - dn_dev_check_default(dev); - neigh_ifdown(&dn_neigh_table, dev); - - if (dn_db->parms.down) - dn_db->parms.down(dev); - - dev->dn_ptr = NULL; - - neigh_parms_release(&dn_neigh_table, dn_db->neigh_parms); - neigh_ifdown(&dn_neigh_table, dev); - - if (dn_db->router) - neigh_release(dn_db->router); - if (dn_db->peer) - neigh_release(dn_db->peer); - - kfree(dn_db); -} - -void dn_dev_down(struct net_device *dev) -{ - struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr); - struct dn_ifaddr *ifa; - - if (dn_db == NULL) - return; - - while ((ifa = rtnl_dereference(dn_db->ifa_list)) != NULL) { - dn_dev_del_ifa(dn_db, &dn_db->ifa_list, 0); - dn_dev_free_ifa(ifa); - } - - dn_dev_delete(dev); -} - -void dn_dev_init_pkt(struct sk_buff *skb) -{ -} - -void dn_dev_veri_pkt(struct sk_buff *skb) -{ -} - -void dn_dev_hello(struct sk_buff *skb) -{ -} - -void dn_dev_devices_off(void) -{ - struct net_device *dev; - - rtnl_lock(); - for_each_netdev(&init_net, dev) - dn_dev_down(dev); - rtnl_unlock(); - -} - -void dn_dev_devices_on(void) -{ - struct net_device *dev; - - rtnl_lock(); - for_each_netdev(&init_net, dev) { - if (dev->flags & IFF_UP) - dn_dev_up(dev); - } - rtnl_unlock(); -} - -int register_dnaddr_notifier(struct notifier_block *nb) -{ - return blocking_notifier_chain_register(&dnaddr_chain, nb); -} - -int unregister_dnaddr_notifier(struct notifier_block *nb) -{ - return blocking_notifier_chain_unregister(&dnaddr_chain, nb); -} - -#ifdef CONFIG_PROC_FS -static inline int is_dn_dev(struct net_device *dev) -{ - return dev->dn_ptr != NULL; -} - -static void *dn_dev_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(RCU) -{ - int i; - struct net_device *dev; - - rcu_read_lock(); - - if (*pos == 0) - return SEQ_START_TOKEN; - - i = 1; - for_each_netdev_rcu(&init_net, dev) { - if (!is_dn_dev(dev)) - continue; - - if (i++ == *pos) - return dev; - } - - return NULL; -} - -static void *dn_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - struct net_device *dev; - - ++*pos; - - dev = v; - if (v == SEQ_START_TOKEN) - dev = net_device_entry(&init_net.dev_base_head); - - for_each_netdev_continue_rcu(&init_net, dev) { - if (!is_dn_dev(dev)) - continue; - - return dev; - } - - return NULL; -} - -static void dn_dev_seq_stop(struct seq_file *seq, void *v) - __releases(RCU) -{ - rcu_read_unlock(); -} - -static char *dn_type2asc(char type) -{ - switch (type) { - case DN_DEV_BCAST: - return "B"; - case DN_DEV_UCAST: - return "U"; - case DN_DEV_MPOINT: - return "M"; - } - - return "?"; -} - -static int dn_dev_seq_show(struct seq_file *seq, void *v) -{ - if (v == SEQ_START_TOKEN) - seq_puts(seq, "Name Flags T1 Timer1 T3 Timer3 BlkSize Pri State DevType Router Peer\n"); - else { - struct net_device *dev = v; - char peer_buf[DN_ASCBUF_LEN]; - char router_buf[DN_ASCBUF_LEN]; - struct dn_dev *dn_db = rcu_dereference(dev->dn_ptr); - - seq_printf(seq, "%-8s %1s %04u %04u %04lu %04lu" - " %04hu %03d %02x %-10s %-7s %-7s\n", - dev->name, - dn_type2asc(dn_db->parms.mode), - 0, 0, - dn_db->t3, dn_db->parms.t3, - mtu2blksize(dev), - dn_db->parms.priority, - dn_db->parms.state, dn_db->parms.name, - dn_db->router ? dn_addr2asc(le16_to_cpu(*(__le16 *)dn_db->router->primary_key), router_buf) : "", - dn_db->peer ? dn_addr2asc(le16_to_cpu(*(__le16 *)dn_db->peer->primary_key), peer_buf) : ""); - } - return 0; -} - -static const struct seq_operations dn_dev_seq_ops = { - .start = dn_dev_seq_start, - .next = dn_dev_seq_next, - .stop = dn_dev_seq_stop, - .show = dn_dev_seq_show, -}; -#endif /* CONFIG_PROC_FS */ - -static int addr[2]; -module_param_array(addr, int, NULL, 0444); -MODULE_PARM_DESC(addr, "The DECnet address of this machine: area,node"); - -void __init dn_dev_init(void) -{ - if (addr[0] > 63 || addr[0] < 0) { - printk(KERN_ERR "DECnet: Area must be between 0 and 63"); - return; - } - - if (addr[1] > 1023 || addr[1] < 0) { - printk(KERN_ERR "DECnet: Node must be between 0 and 1023"); - return; - } - - decnet_address = cpu_to_le16((addr[0] << 10) | addr[1]); - - dn_dev_devices_on(); - - rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_NEWADDR, - dn_nl_newaddr, NULL, 0); - rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_DELADDR, - dn_nl_deladdr, NULL, 0); - rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETADDR, - NULL, dn_nl_dump_ifaddr, 0); - - proc_create_seq("decnet_dev", 0444, init_net.proc_net, &dn_dev_seq_ops); - -#ifdef CONFIG_SYSCTL - { - int i; - for(i = 0; i < DN_DEV_LIST_SIZE; i++) - dn_dev_sysctl_register(NULL, &dn_dev_list[i]); - } -#endif /* CONFIG_SYSCTL */ -} - -void __exit dn_dev_cleanup(void) -{ -#ifdef CONFIG_SYSCTL - { - int i; - for(i = 0; i < DN_DEV_LIST_SIZE; i++) - dn_dev_sysctl_unregister(&dn_dev_list[i]); - } -#endif /* CONFIG_SYSCTL */ - - remove_proc_entry("decnet_dev", init_net.proc_net); - - dn_dev_devices_off(); -} diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c deleted file mode 100644 index 269c029ad74f..000000000000 --- a/net/decnet/dn_fib.c +++ /dev/null @@ -1,798 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * DECnet An implementation of the DECnet protocol suite for the LINUX - * operating system. DECnet is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * DECnet Routing Forwarding Information Base (Glue/Info List) - * - * Author: Steve Whitehouse - * - * - * Changes: - * Alexey Kuznetsov : SMP locking changes - * Steve Whitehouse : Rewrote it... Well to be more correct, I - * copied most of it from the ipv4 fib code. - * Steve Whitehouse : Updated it in style and fixed a few bugs - * which were fixed in the ipv4 code since - * this code was copied from it. - * - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define RT_MIN_TABLE 1 - -#define for_fib_info() { struct dn_fib_info *fi;\ - for(fi = dn_fib_info_list; fi; fi = fi->fib_next) -#define endfor_fib_info() } - -#define for_nexthops(fi) { int nhsel; const struct dn_fib_nh *nh;\ - for(nhsel = 0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++) - -#define change_nexthops(fi) { int nhsel; struct dn_fib_nh *nh;\ - for(nhsel = 0, nh = (struct dn_fib_nh *)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++) - -#define endfor_nexthops(fi) } - -static DEFINE_SPINLOCK(dn_fib_multipath_lock); -static struct dn_fib_info *dn_fib_info_list; -static DEFINE_SPINLOCK(dn_fib_info_lock); - -static struct -{ - int error; - u8 scope; -} dn_fib_props[RTN_MAX+1] = { - [RTN_UNSPEC] = { .error = 0, .scope = RT_SCOPE_NOWHERE }, - [RTN_UNICAST] = { .error = 0, .scope = RT_SCOPE_UNIVERSE }, - [RTN_LOCAL] = { .error = 0, .scope = RT_SCOPE_HOST }, - [RTN_BROADCAST] = { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE }, - [RTN_ANYCAST] = { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE }, - [RTN_MULTICAST] = { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE }, - [RTN_BLACKHOLE] = { .error = -EINVAL, .scope = RT_SCOPE_UNIVERSE }, - [RTN_UNREACHABLE] = { .error = -EHOSTUNREACH, .scope = RT_SCOPE_UNIVERSE }, - [RTN_PROHIBIT] = { .error = -EACCES, .scope = RT_SCOPE_UNIVERSE }, - [RTN_THROW] = { .error = -EAGAIN, .scope = RT_SCOPE_UNIVERSE }, - [RTN_NAT] = { .error = 0, .scope = RT_SCOPE_NOWHERE }, - [RTN_XRESOLVE] = { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE }, -}; - -static int dn_fib_sync_down(__le16 local, struct net_device *dev, int force); -static int dn_fib_sync_up(struct net_device *dev); - -void dn_fib_free_info(struct dn_fib_info *fi) -{ - if (fi->fib_dead == 0) { - printk(KERN_DEBUG "DECnet: BUG! Attempt to free alive dn_fib_info\n"); - return; - } - - change_nexthops(fi) { - dev_put(nh->nh_dev); - nh->nh_dev = NULL; - } endfor_nexthops(fi); - kfree(fi); -} - -void dn_fib_release_info(struct dn_fib_info *fi) -{ - spin_lock(&dn_fib_info_lock); - if (fi && refcount_dec_and_test(&fi->fib_treeref)) { - if (fi->fib_next) - fi->fib_next->fib_prev = fi->fib_prev; - if (fi->fib_prev) - fi->fib_prev->fib_next = fi->fib_next; - if (fi == dn_fib_info_list) - dn_fib_info_list = fi->fib_next; - fi->fib_dead = 1; - dn_fib_info_put(fi); - } - spin_unlock(&dn_fib_info_lock); -} - -static inline int dn_fib_nh_comp(const struct dn_fib_info *fi, const struct dn_fib_info *ofi) -{ - const struct dn_fib_nh *onh = ofi->fib_nh; - - for_nexthops(fi) { - if (nh->nh_oif != onh->nh_oif || - nh->nh_gw != onh->nh_gw || - nh->nh_scope != onh->nh_scope || - nh->nh_weight != onh->nh_weight || - ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD)) - return -1; - onh++; - } endfor_nexthops(fi); - return 0; -} - -static inline struct dn_fib_info *dn_fib_find_info(const struct dn_fib_info *nfi) -{ - for_fib_info() { - if (fi->fib_nhs != nfi->fib_nhs) - continue; - if (nfi->fib_protocol == fi->fib_protocol && - nfi->fib_prefsrc == fi->fib_prefsrc && - nfi->fib_priority == fi->fib_priority && - memcmp(nfi->fib_metrics, fi->fib_metrics, sizeof(fi->fib_metrics)) == 0 && - ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 && - (nfi->fib_nhs == 0 || dn_fib_nh_comp(fi, nfi) == 0)) - return fi; - } endfor_fib_info(); - return NULL; -} - -static int dn_fib_count_nhs(const struct nlattr *attr) -{ - struct rtnexthop *nhp = nla_data(attr); - int nhs = 0, nhlen = nla_len(attr); - - while (rtnh_ok(nhp, nhlen)) { - nhs++; - nhp = rtnh_next(nhp, &nhlen); - } - - /* leftover implies invalid nexthop configuration, discard it */ - return nhlen > 0 ? 0 : nhs; -} - -static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct nlattr *attr, - const struct rtmsg *r) -{ - struct rtnexthop *nhp = nla_data(attr); - int nhlen = nla_len(attr); - - change_nexthops(fi) { - int attrlen; - - if (!rtnh_ok(nhp, nhlen)) - return -EINVAL; - - nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags; - nh->nh_oif = nhp->rtnh_ifindex; - nh->nh_weight = nhp->rtnh_hops + 1; - - attrlen = rtnh_attrlen(nhp); - if (attrlen > 0) { - struct nlattr *gw_attr; - - gw_attr = nla_find((struct nlattr *) (nhp + 1), attrlen, RTA_GATEWAY); - nh->nh_gw = gw_attr ? nla_get_le16(gw_attr) : 0; - } - - nhp = rtnh_next(nhp, &nhlen); - } endfor_nexthops(fi); - - return 0; -} - - -static int dn_fib_check_nh(const struct rtmsg *r, struct dn_fib_info *fi, struct dn_fib_nh *nh) -{ - int err; - - if (nh->nh_gw) { - struct flowidn fld; - struct dn_fib_res res; - - if (nh->nh_flags&RTNH_F_ONLINK) { - struct net_device *dev; - - if (r->rtm_scope >= RT_SCOPE_LINK) - return -EINVAL; - if (dnet_addr_type(nh->nh_gw) != RTN_UNICAST) - return -EINVAL; - if ((dev = __dev_get_by_index(&init_net, nh->nh_oif)) == NULL) - return -ENODEV; - if (!(dev->flags&IFF_UP)) - return -ENETDOWN; - nh->nh_dev = dev; - dev_hold(dev); - nh->nh_scope = RT_SCOPE_LINK; - return 0; - } - - memset(&fld, 0, sizeof(fld)); - fld.daddr = nh->nh_gw; - fld.flowidn_oif = nh->nh_oif; - fld.flowidn_scope = r->rtm_scope + 1; - - if (fld.flowidn_scope < RT_SCOPE_LINK) - fld.flowidn_scope = RT_SCOPE_LINK; - - if ((err = dn_fib_lookup(&fld, &res)) != 0) - return err; - - err = -EINVAL; - if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) - goto out; - nh->nh_scope = res.scope; - nh->nh_oif = DN_FIB_RES_OIF(res); - nh->nh_dev = DN_FIB_RES_DEV(res); - if (nh->nh_dev == NULL) - goto out; - dev_hold(nh->nh_dev); - err = -ENETDOWN; - if (!(nh->nh_dev->flags & IFF_UP)) - goto out; - err = 0; -out: - dn_fib_res_put(&res); - return err; - } else { - struct net_device *dev; - - if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK)) - return -EINVAL; - - dev = __dev_get_by_index(&init_net, nh->nh_oif); - if (dev == NULL || dev->dn_ptr == NULL) - return -ENODEV; - if (!(dev->flags&IFF_UP)) - return -ENETDOWN; - nh->nh_dev = dev; - dev_hold(nh->nh_dev); - nh->nh_scope = RT_SCOPE_HOST; - } - - return 0; -} - - -struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct nlattr *attrs[], - const struct nlmsghdr *nlh, int *errp) -{ - int err; - struct dn_fib_info *fi = NULL; - struct dn_fib_info *ofi; - int nhs = 1; - - if (r->rtm_type > RTN_MAX) - goto err_inval; - - if (dn_fib_props[r->rtm_type].scope > r->rtm_scope) - goto err_inval; - - if (attrs[RTA_MULTIPATH] && - (nhs = dn_fib_count_nhs(attrs[RTA_MULTIPATH])) == 0) - goto err_inval; - - fi = kzalloc(struct_size(fi, fib_nh, nhs), GFP_KERNEL); - err = -ENOBUFS; - if (fi == NULL) - goto failure; - - fi->fib_protocol = r->rtm_protocol; - fi->fib_nhs = nhs; - fi->fib_flags = r->rtm_flags; - - if (attrs[RTA_PRIORITY]) - fi->fib_priority = nla_get_u32(attrs[RTA_PRIORITY]); - - if (attrs[RTA_METRICS]) { - struct nlattr *attr; - int rem; - - nla_for_each_nested(attr, attrs[RTA_METRICS], rem) { - int type = nla_type(attr); - - if (type) { - if (type > RTAX_MAX || type == RTAX_CC_ALGO || - nla_len(attr) < 4) - goto err_inval; - - fi->fib_metrics[type-1] = nla_get_u32(attr); - } - } - } - - if (attrs[RTA_PREFSRC]) - fi->fib_prefsrc = nla_get_le16(attrs[RTA_PREFSRC]); - - if (attrs[RTA_MULTIPATH]) { - if ((err = dn_fib_get_nhs(fi, attrs[RTA_MULTIPATH], r)) != 0) - goto failure; - - if (attrs[RTA_OIF] && - fi->fib_nh->nh_oif != nla_get_u32(attrs[RTA_OIF])) - goto err_inval; - - if (attrs[RTA_GATEWAY] && - fi->fib_nh->nh_gw != nla_get_le16(attrs[RTA_GATEWAY])) - goto err_inval; - } else { - struct dn_fib_nh *nh = fi->fib_nh; - - if (attrs[RTA_OIF]) - nh->nh_oif = nla_get_u32(attrs[RTA_OIF]); - - if (attrs[RTA_GATEWAY]) - nh->nh_gw = nla_get_le16(attrs[RTA_GATEWAY]); - - nh->nh_flags = r->rtm_flags; - nh->nh_weight = 1; - } - - if (r->rtm_type == RTN_NAT) { - if (!attrs[RTA_GATEWAY] || nhs != 1 || attrs[RTA_OIF]) - goto err_inval; - - fi->fib_nh->nh_gw = nla_get_le16(attrs[RTA_GATEWAY]); - goto link_it; - } - - if (dn_fib_props[r->rtm_type].error) { - if (attrs[RTA_GATEWAY] || attrs[RTA_OIF] || attrs[RTA_MULTIPATH]) - goto err_inval; - - goto link_it; - } - - if (r->rtm_scope > RT_SCOPE_HOST) - goto err_inval; - - if (r->rtm_scope == RT_SCOPE_HOST) { - struct dn_fib_nh *nh = fi->fib_nh; - - /* Local address is added */ - if (nhs != 1 || nh->nh_gw) - goto err_inval; - nh->nh_scope = RT_SCOPE_NOWHERE; - nh->nh_dev = dev_get_by_index(&init_net, fi->fib_nh->nh_oif); - err = -ENODEV; - if (nh->nh_dev == NULL) - goto failure; - } else { - change_nexthops(fi) { - if ((err = dn_fib_check_nh(r, fi, nh)) != 0) - goto failure; - } endfor_nexthops(fi) - } - - if (fi->fib_prefsrc) { - if (r->rtm_type != RTN_LOCAL || !attrs[RTA_DST] || - fi->fib_prefsrc != nla_get_le16(attrs[RTA_DST])) - if (dnet_addr_type(fi->fib_prefsrc) != RTN_LOCAL) - goto err_inval; - } - -link_it: - if ((ofi = dn_fib_find_info(fi)) != NULL) { - fi->fib_dead = 1; - dn_fib_free_info(fi); - refcount_inc(&ofi->fib_treeref); - return ofi; - } - - refcount_set(&fi->fib_treeref, 1); - refcount_set(&fi->fib_clntref, 1); - spin_lock(&dn_fib_info_lock); - fi->fib_next = dn_fib_info_list; - fi->fib_prev = NULL; - if (dn_fib_info_list) - dn_fib_info_list->fib_prev = fi; - dn_fib_info_list = fi; - spin_unlock(&dn_fib_info_lock); - return fi; - -err_inval: - err = -EINVAL; - -failure: - *errp = err; - if (fi) { - fi->fib_dead = 1; - dn_fib_free_info(fi); - } - - return NULL; -} - -int dn_fib_semantic_match(int type, struct dn_fib_info *fi, const struct flowidn *fld, struct dn_fib_res *res) -{ - int err = dn_fib_props[type].error; - - if (err == 0) { - if (fi->fib_flags & RTNH_F_DEAD) - return 1; - - res->fi = fi; - - switch (type) { - case RTN_NAT: - DN_FIB_RES_RESET(*res); - refcount_inc(&fi->fib_clntref); - return 0; - case RTN_UNICAST: - case RTN_LOCAL: - for_nexthops(fi) { - if (nh->nh_flags & RTNH_F_DEAD) - continue; - if (!fld->flowidn_oif || - fld->flowidn_oif == nh->nh_oif) - break; - } - if (nhsel < fi->fib_nhs) { - res->nh_sel = nhsel; - refcount_inc(&fi->fib_clntref); - return 0; - } - endfor_nexthops(fi); - res->fi = NULL; - return 1; - default: - net_err_ratelimited("DECnet: impossible routing event : dn_fib_semantic_match type=%d\n", - type); - res->fi = NULL; - return -EINVAL; - } - } - return err; -} - -void dn_fib_select_multipath(const struct flowidn *fld, struct dn_fib_res *res) -{ - struct dn_fib_info *fi = res->fi; - int w; - - spin_lock_bh(&dn_fib_multipath_lock); - if (fi->fib_power <= 0) { - int power = 0; - change_nexthops(fi) { - if (!(nh->nh_flags&RTNH_F_DEAD)) { - power += nh->nh_weight; - nh->nh_power = nh->nh_weight; - } - } endfor_nexthops(fi); - fi->fib_power = power; - if (power < 0) { - spin_unlock_bh(&dn_fib_multipath_lock); - res->nh_sel = 0; - return; - } - } - - w = jiffies % fi->fib_power; - - change_nexthops(fi) { - if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) { - if ((w -= nh->nh_power) <= 0) { - nh->nh_power--; - fi->fib_power--; - res->nh_sel = nhsel; - spin_unlock_bh(&dn_fib_multipath_lock); - return; - } - } - } endfor_nexthops(fi); - res->nh_sel = 0; - spin_unlock_bh(&dn_fib_multipath_lock); -} - -static inline u32 rtm_get_table(struct nlattr *attrs[], u8 table) -{ - if (attrs[RTA_TABLE]) - table = nla_get_u32(attrs[RTA_TABLE]); - - return table; -} - -static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack) -{ - struct net *net = sock_net(skb->sk); - struct dn_fib_table *tb; - struct rtmsg *r = nlmsg_data(nlh); - struct nlattr *attrs[RTA_MAX+1]; - int err; - - if (!netlink_capable(skb, CAP_NET_ADMIN)) - return -EPERM; - - if (!net_eq(net, &init_net)) - return -EINVAL; - - err = nlmsg_parse_deprecated(nlh, sizeof(*r), attrs, RTA_MAX, - rtm_dn_policy, extack); - if (err < 0) - return err; - - tb = dn_fib_get_table(rtm_get_table(attrs, r->rtm_table), 0); - if (!tb) - return -ESRCH; - - return tb->delete(tb, r, attrs, nlh, &NETLINK_CB(skb)); -} - -static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack) -{ - struct net *net = sock_net(skb->sk); - struct dn_fib_table *tb; - struct rtmsg *r = nlmsg_data(nlh); - struct nlattr *attrs[RTA_MAX+1]; - int err; - - if (!netlink_capable(skb, CAP_NET_ADMIN)) - return -EPERM; - - if (!net_eq(net, &init_net)) - return -EINVAL; - - err = nlmsg_parse_deprecated(nlh, sizeof(*r), attrs, RTA_MAX, - rtm_dn_policy, extack); - if (err < 0) - return err; - - tb = dn_fib_get_table(rtm_get_table(attrs, r->rtm_table), 1); - if (!tb) - return -ENOBUFS; - - return tb->insert(tb, r, attrs, nlh, &NETLINK_CB(skb)); -} - -static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifaddr *ifa) -{ - struct dn_fib_table *tb; - struct { - struct nlmsghdr nlh; - struct rtmsg rtm; - } req; - struct { - struct nlattr hdr; - __le16 dst; - } dst_attr = { - .dst = dst, - }; - struct { - struct nlattr hdr; - __le16 prefsrc; - } prefsrc_attr = { - .prefsrc = ifa->ifa_local, - }; - struct { - struct nlattr hdr; - u32 oif; - } oif_attr = { - .oif = ifa->ifa_dev->dev->ifindex, - }; - struct nlattr *attrs[RTA_MAX+1] = { - [RTA_DST] = (struct nlattr *) &dst_attr, - [RTA_PREFSRC] = (struct nlattr * ) &prefsrc_attr, - [RTA_OIF] = (struct nlattr *) &oif_attr, - }; - - memset(&req.rtm, 0, sizeof(req.rtm)); - - if (type == RTN_UNICAST) - tb = dn_fib_get_table(RT_MIN_TABLE, 1); - else - tb = dn_fib_get_table(RT_TABLE_LOCAL, 1); - - if (tb == NULL) - return; - - req.nlh.nlmsg_len = sizeof(req); - req.nlh.nlmsg_type = cmd; - req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE|NLM_F_APPEND; - req.nlh.nlmsg_pid = 0; - req.nlh.nlmsg_seq = 0; - - req.rtm.rtm_dst_len = dst_len; - req.rtm.rtm_table = tb->n; - req.rtm.rtm_protocol = RTPROT_KERNEL; - req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST); - req.rtm.rtm_type = type; - - if (cmd == RTM_NEWROUTE) - tb->insert(tb, &req.rtm, attrs, &req.nlh, NULL); - else - tb->delete(tb, &req.rtm, attrs, &req.nlh, NULL); -} - -static void dn_fib_add_ifaddr(struct dn_ifaddr *ifa) -{ - - fib_magic(RTM_NEWROUTE, RTN_LOCAL, ifa->ifa_local, 16, ifa); - -#if 0 - if (!(dev->flags&IFF_UP)) - return; - /* In the future, we will want to add default routes here */ - -#endif -} - -static void dn_fib_del_ifaddr(struct dn_ifaddr *ifa) -{ - int found_it = 0; - struct net_device *dev; - struct dn_dev *dn_db; - struct dn_ifaddr *ifa2; - - ASSERT_RTNL(); - - /* Scan device list */ - rcu_read_lock(); - for_each_netdev_rcu(&init_net, dev) { - dn_db = rcu_dereference(dev->dn_ptr); - if (dn_db == NULL) - continue; - for (ifa2 = rcu_dereference(dn_db->ifa_list); - ifa2 != NULL; - ifa2 = rcu_dereference(ifa2->ifa_next)) { - if (ifa2->ifa_local == ifa->ifa_local) { - found_it = 1; - break; - } - } - } - rcu_read_unlock(); - - if (found_it == 0) { - fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 16, ifa); - - if (dnet_addr_type(ifa->ifa_local) != RTN_LOCAL) { - if (dn_fib_sync_down(ifa->ifa_local, NULL, 0)) - dn_fib_flush(); - } - } -} - -static void dn_fib_disable_addr(struct net_device *dev, int force) -{ - if (dn_fib_sync_down(0, dev, force)) - dn_fib_flush(); - dn_rt_cache_flush(0); - neigh_ifdown(&dn_neigh_table, dev); -} - -static int dn_fib_dnaddr_event(struct notifier_block *this, unsigned long event, void *ptr) -{ - struct dn_ifaddr *ifa = (struct dn_ifaddr *)ptr; - - switch (event) { - case NETDEV_UP: - dn_fib_add_ifaddr(ifa); - dn_fib_sync_up(ifa->ifa_dev->dev); - dn_rt_cache_flush(-1); - break; - case NETDEV_DOWN: - dn_fib_del_ifaddr(ifa); - if (ifa->ifa_dev && ifa->ifa_dev->ifa_list == NULL) { - dn_fib_disable_addr(ifa->ifa_dev->dev, 1); - } else { - dn_rt_cache_flush(-1); - } - break; - } - return NOTIFY_DONE; -} - -static int dn_fib_sync_down(__le16 local, struct net_device *dev, int force) -{ - int ret = 0; - int scope = RT_SCOPE_NOWHERE; - - if (force) - scope = -1; - - for_fib_info() { - /* - * This makes no sense for DECnet.... we will almost - * certainly have more than one local address the same - * over all our interfaces. It needs thinking about - * some more. - */ - if (local && fi->fib_prefsrc == local) { - fi->fib_flags |= RTNH_F_DEAD; - ret++; - } else if (dev && fi->fib_nhs) { - int dead = 0; - - change_nexthops(fi) { - if (nh->nh_flags&RTNH_F_DEAD) - dead++; - else if (nh->nh_dev == dev && - nh->nh_scope != scope) { - spin_lock_bh(&dn_fib_multipath_lock); - nh->nh_flags |= RTNH_F_DEAD; - fi->fib_power -= nh->nh_power; - nh->nh_power = 0; - spin_unlock_bh(&dn_fib_multipath_lock); - dead++; - } - } endfor_nexthops(fi) - if (dead == fi->fib_nhs) { - fi->fib_flags |= RTNH_F_DEAD; - ret++; - } - } - } endfor_fib_info(); - return ret; -} - - -static int dn_fib_sync_up(struct net_device *dev) -{ - int ret = 0; - - if (!(dev->flags&IFF_UP)) - return 0; - - for_fib_info() { - int alive = 0; - - change_nexthops(fi) { - if (!(nh->nh_flags&RTNH_F_DEAD)) { - alive++; - continue; - } - if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP)) - continue; - if (nh->nh_dev != dev || dev->dn_ptr == NULL) - continue; - alive++; - spin_lock_bh(&dn_fib_multipath_lock); - nh->nh_power = 0; - nh->nh_flags &= ~RTNH_F_DEAD; - spin_unlock_bh(&dn_fib_multipath_lock); - } endfor_nexthops(fi); - - if (alive > 0) { - fi->fib_flags &= ~RTNH_F_DEAD; - ret++; - } - } endfor_fib_info(); - return ret; -} - -static struct notifier_block dn_fib_dnaddr_notifier = { - .notifier_call = dn_fib_dnaddr_event, -}; - -void __exit dn_fib_cleanup(void) -{ - dn_fib_table_cleanup(); - dn_fib_rules_cleanup(); - - unregister_dnaddr_notifier(&dn_fib_dnaddr_notifier); -} - - -void __init dn_fib_init(void) -{ - dn_fib_table_init(); - dn_fib_rules_init(); - - register_dnaddr_notifier(&dn_fib_dnaddr_notifier); - - rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_NEWROUTE, - dn_fib_rtm_newroute, NULL, 0); - rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_DELROUTE, - dn_fib_rtm_delroute, NULL, 0); -} diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c deleted file mode 100644 index 7c569bcc0aca..000000000000 --- a/net/decnet/dn_neigh.c +++ /dev/null @@ -1,607 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * DECnet An implementation of the DECnet protocol suite for the LINUX - * operating system. DECnet is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * DECnet Neighbour Functions (Adjacency Database and - * On-Ethernet Cache) - * - * Author: Steve Whitehouse - * - * - * Changes: - * Steve Whitehouse : Fixed router listing routine - * Steve Whitehouse : Added error_report functions - * Steve Whitehouse : Added default router detection - * Steve Whitehouse : Hop counts in outgoing messages - * Steve Whitehouse : Fixed src/dst in outgoing messages so - * forwarding now stands a good chance of - * working. - * Steve Whitehouse : Fixed neighbour states (for now anyway). - * Steve Whitehouse : Made error_report functions dummies. This - * is not the right place to return skbs. - * Steve Whitehouse : Convert to seq_file - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static int dn_neigh_construct(struct neighbour *); -static void dn_neigh_error_report(struct neighbour *, struct sk_buff *); -static int dn_neigh_output(struct neighbour *neigh, struct sk_buff *skb); - -/* - * Operations for adding the link layer header. - */ -static const struct neigh_ops dn_neigh_ops = { - .family = AF_DECnet, - .error_report = dn_neigh_error_report, - .output = dn_neigh_output, - .connected_output = dn_neigh_output, -}; - -static u32 dn_neigh_hash(const void *pkey, - const struct net_device *dev, - __u32 *hash_rnd) -{ - return jhash_2words(*(__u16 *)pkey, 0, hash_rnd[0]); -} - -static bool dn_key_eq(const struct neighbour *neigh, const void *pkey) -{ - return neigh_key_eq16(neigh, pkey); -} - -struct neigh_table dn_neigh_table = { - .family = PF_DECnet, - .entry_size = NEIGH_ENTRY_SIZE(sizeof(struct dn_neigh)), - .key_len = sizeof(__le16), - .protocol = cpu_to_be16(ETH_P_DNA_RT), - .hash = dn_neigh_hash, - .key_eq = dn_key_eq, - .constructor = dn_neigh_construct, - .id = "dn_neigh_cache", - .parms ={ - .tbl = &dn_neigh_table, - .reachable_time = 30 * HZ, - .data = { - [NEIGH_VAR_MCAST_PROBES] = 0, - [NEIGH_VAR_UCAST_PROBES] = 0, - [NEIGH_VAR_APP_PROBES] = 0, - [NEIGH_VAR_RETRANS_TIME] = 1 * HZ, - [NEIGH_VAR_BASE_REACHABLE_TIME] = 30 * HZ, - [NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ, - [NEIGH_VAR_INTERVAL_PROBE_TIME_MS] = 5 * HZ, - [NEIGH_VAR_GC_STALETIME] = 60 * HZ, - [NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX, - [NEIGH_VAR_PROXY_QLEN] = 0, - [NEIGH_VAR_ANYCAST_DELAY] = 0, - [NEIGH_VAR_PROXY_DELAY] = 0, - [NEIGH_VAR_LOCKTIME] = 1 * HZ, - }, - }, - .gc_interval = 30 * HZ, - .gc_thresh1 = 128, - .gc_thresh2 = 512, - .gc_thresh3 = 1024, -}; - -static int dn_neigh_construct(struct neighbour *neigh) -{ - struct net_device *dev = neigh->dev; - struct dn_neigh *dn = container_of(neigh, struct dn_neigh, n); - struct dn_dev *dn_db; - struct neigh_parms *parms; - - rcu_read_lock(); - dn_db = rcu_dereference(dev->dn_ptr); - if (dn_db == NULL) { - rcu_read_unlock(); - return -EINVAL; - } - - parms = dn_db->neigh_parms; - if (!parms) { - rcu_read_unlock(); - return -EINVAL; - } - - __neigh_parms_put(neigh->parms); - neigh->parms = neigh_parms_clone(parms); - rcu_read_unlock(); - - neigh->ops = &dn_neigh_ops; - neigh->nud_state = NUD_NOARP; - neigh->output = neigh->ops->connected_output; - - if ((dev->type == ARPHRD_IPGRE) || (dev->flags & IFF_POINTOPOINT)) - memcpy(neigh->ha, dev->broadcast, dev->addr_len); - else if ((dev->type == ARPHRD_ETHER) || (dev->type == ARPHRD_LOOPBACK)) - dn_dn2eth(neigh->ha, dn->addr); - else { - net_dbg_ratelimited("Trying to create neigh for hw %d\n", - dev->type); - return -EINVAL; - } - - /* - * Make an estimate of the remote block size by assuming that its - * two less then the device mtu, which it true for ethernet (and - * other things which support long format headers) since there is - * an extra length field (of 16 bits) which isn't part of the - * ethernet headers and which the DECnet specs won't admit is part - * of the DECnet routing headers either. - * - * If we over estimate here its no big deal, the NSP negotiations - * will prevent us from sending packets which are too large for the - * remote node to handle. In any case this figure is normally updated - * by a hello message in most cases. - */ - dn->blksize = dev->mtu - 2; - - return 0; -} - -static void dn_neigh_error_report(struct neighbour *neigh, struct sk_buff *skb) -{ - printk(KERN_DEBUG "dn_neigh_error_report: called\n"); - kfree_skb(skb); -} - -static int dn_neigh_output(struct neighbour *neigh, struct sk_buff *skb) -{ - struct dst_entry *dst = skb_dst(skb); - struct dn_route *rt = (struct dn_route *)dst; - struct net_device *dev = neigh->dev; - char mac_addr[ETH_ALEN]; - unsigned int seq; - int err; - - dn_dn2eth(mac_addr, rt->rt_local_src); - do { - seq = read_seqbegin(&neigh->ha_lock); - err = dev_hard_header(skb, dev, ntohs(skb->protocol), - neigh->ha, mac_addr, skb->len); - } while (read_seqretry(&neigh->ha_lock, seq)); - - if (err >= 0) - err = dev_queue_xmit(skb); - else { - kfree_skb(skb); - err = -EINVAL; - } - return err; -} - -static int dn_neigh_output_packet(struct net *net, struct sock *sk, struct sk_buff *skb) -{ - struct dst_entry *dst = skb_dst(skb); - struct dn_route *rt = (struct dn_route *)dst; - struct neighbour *neigh = rt->n; - - return neigh->output(neigh, skb); -} - -/* - * For talking to broadcast devices: Ethernet & PPP - */ -static int dn_long_output(struct neighbour *neigh, struct sock *sk, - struct sk_buff *skb) -{ - struct net_device *dev = neigh->dev; - int headroom = dev->hard_header_len + sizeof(struct dn_long_packet) + 3; - unsigned char *data; - struct dn_long_packet *lp; - struct dn_skb_cb *cb = DN_SKB_CB(skb); - - - if (skb_headroom(skb) < headroom) { - struct sk_buff *skb2 = skb_realloc_headroom(skb, headroom); - if (skb2 == NULL) { - net_crit_ratelimited("dn_long_output: no memory\n"); - kfree_skb(skb); - return -ENOBUFS; - } - consume_skb(skb); - skb = skb2; - net_info_ratelimited("dn_long_output: Increasing headroom\n"); - } - - data = skb_push(skb, sizeof(struct dn_long_packet) + 3); - lp = (struct dn_long_packet *)(data+3); - - *((__le16 *)data) = cpu_to_le16(skb->len - 2); - *(data + 2) = 1 | DN_RT_F_PF; /* Padding */ - - lp->msgflg = DN_RT_PKT_LONG|(cb->rt_flags&(DN_RT_F_IE|DN_RT_F_RQR|DN_RT_F_RTS)); - lp->d_area = lp->d_subarea = 0; - dn_dn2eth(lp->d_id, cb->dst); - lp->s_area = lp->s_subarea = 0; - dn_dn2eth(lp->s_id, cb->src); - lp->nl2 = 0; - lp->visit_ct = cb->hops & 0x3f; - lp->s_class = 0; - lp->pt = 0; - - skb_reset_network_header(skb); - - return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, - &init_net, sk, skb, NULL, neigh->dev, - dn_neigh_output_packet); -} - -/* - * For talking to pointopoint and multidrop devices: DDCMP and X.25 - */ -static int dn_short_output(struct neighbour *neigh, struct sock *sk, - struct sk_buff *skb) -{ - struct net_device *dev = neigh->dev; - int headroom = dev->hard_header_len + sizeof(struct dn_short_packet) + 2; - struct dn_short_packet *sp; - unsigned char *data; - struct dn_skb_cb *cb = DN_SKB_CB(skb); - - - if (skb_headroom(skb) < headroom) { - struct sk_buff *skb2 = skb_realloc_headroom(skb, headroom); - if (skb2 == NULL) { - net_crit_ratelimited("dn_short_output: no memory\n"); - kfree_skb(skb); - return -ENOBUFS; - } - consume_skb(skb); - skb = skb2; - net_info_ratelimited("dn_short_output: Increasing headroom\n"); - } - - data = skb_push(skb, sizeof(struct dn_short_packet) + 2); - *((__le16 *)data) = cpu_to_le16(skb->len - 2); - sp = (struct dn_short_packet *)(data+2); - - sp->msgflg = DN_RT_PKT_SHORT|(cb->rt_flags&(DN_RT_F_RQR|DN_RT_F_RTS)); - sp->dstnode = cb->dst; - sp->srcnode = cb->src; - sp->forward = cb->hops & 0x3f; - - skb_reset_network_header(skb); - - return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, - &init_net, sk, skb, NULL, neigh->dev, - dn_neigh_output_packet); -} - -/* - * For talking to DECnet phase III nodes - * Phase 3 output is the same as short output, execpt that - * it clears the area bits before transmission. - */ -static int dn_phase3_output(struct neighbour *neigh, struct sock *sk, - struct sk_buff *skb) -{ - struct net_device *dev = neigh->dev; - int headroom = dev->hard_header_len + sizeof(struct dn_short_packet) + 2; - struct dn_short_packet *sp; - unsigned char *data; - struct dn_skb_cb *cb = DN_SKB_CB(skb); - - if (skb_headroom(skb) < headroom) { - struct sk_buff *skb2 = skb_realloc_headroom(skb, headroom); - if (skb2 == NULL) { - net_crit_ratelimited("dn_phase3_output: no memory\n"); - kfree_skb(skb); - return -ENOBUFS; - } - consume_skb(skb); - skb = skb2; - net_info_ratelimited("dn_phase3_output: Increasing headroom\n"); - } - - data = skb_push(skb, sizeof(struct dn_short_packet) + 2); - *((__le16 *)data) = cpu_to_le16(skb->len - 2); - sp = (struct dn_short_packet *)(data + 2); - - sp->msgflg = DN_RT_PKT_SHORT|(cb->rt_flags&(DN_RT_F_RQR|DN_RT_F_RTS)); - sp->dstnode = cb->dst & cpu_to_le16(0x03ff); - sp->srcnode = cb->src & cpu_to_le16(0x03ff); - sp->forward = cb->hops & 0x3f; - - skb_reset_network_header(skb); - - return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, - &init_net, sk, skb, NULL, neigh->dev, - dn_neigh_output_packet); -} - -int dn_to_neigh_output(struct net *net, struct sock *sk, struct sk_buff *skb) -{ - struct dst_entry *dst = skb_dst(skb); - struct dn_route *rt = (struct dn_route *) dst; - struct neighbour *neigh = rt->n; - struct dn_neigh *dn = container_of(neigh, struct dn_neigh, n); - struct dn_dev *dn_db; - bool use_long; - - rcu_read_lock(); - dn_db = rcu_dereference(neigh->dev->dn_ptr); - if (dn_db == NULL) { - rcu_read_unlock(); - return -EINVAL; - } - use_long = dn_db->use_long; - rcu_read_unlock(); - - if (dn->flags & DN_NDFLAG_P3) - return dn_phase3_output(neigh, sk, skb); - if (use_long) - return dn_long_output(neigh, sk, skb); - else - return dn_short_output(neigh, sk, skb); -} - -/* - * Unfortunately, the neighbour code uses the device in its hash - * function, so we don't get any advantage from it. This function - * basically does a neigh_lookup(), but without comparing the device - * field. This is required for the On-Ethernet cache - */ - -/* - * Pointopoint link receives a hello message - */ -void dn_neigh_pointopoint_hello(struct sk_buff *skb) -{ - kfree_skb(skb); -} - -/* - * Ethernet router hello message received - */ -int dn_neigh_router_hello(struct net *net, struct sock *sk, struct sk_buff *skb) -{ - struct rtnode_hello_message *msg = (struct rtnode_hello_message *)skb->data; - - struct neighbour *neigh; - struct dn_neigh *dn; - struct dn_dev *dn_db; - __le16 src; - - src = dn_eth2dn(msg->id); - - neigh = __neigh_lookup(&dn_neigh_table, &src, skb->dev, 1); - - dn = container_of(neigh, struct dn_neigh, n); - - if (neigh) { - write_lock(&neigh->lock); - - neigh->used = jiffies; - dn_db = rcu_dereference(neigh->dev->dn_ptr); - - if (!(neigh->nud_state & NUD_PERMANENT)) { - neigh->updated = jiffies; - - if (neigh->dev->type == ARPHRD_ETHER) - memcpy(neigh->ha, ð_hdr(skb)->h_source, ETH_ALEN); - - dn->blksize = le16_to_cpu(msg->blksize); - dn->priority = msg->priority; - - dn->flags &= ~DN_NDFLAG_P3; - - switch (msg->iinfo & DN_RT_INFO_TYPE) { - case DN_RT_INFO_L1RT: - dn->flags &=~DN_NDFLAG_R2; - dn->flags |= DN_NDFLAG_R1; - break; - case DN_RT_INFO_L2RT: - dn->flags |= DN_NDFLAG_R2; - } - } - - /* Only use routers in our area */ - if ((le16_to_cpu(src)>>10) == (le16_to_cpu((decnet_address))>>10)) { - if (!dn_db->router) { - dn_db->router = neigh_clone(neigh); - } else { - if (msg->priority > container_of(dn_db->router, - struct dn_neigh, n)->priority) - neigh_release(xchg(&dn_db->router, neigh_clone(neigh))); - } - } - write_unlock(&neigh->lock); - neigh_release(neigh); - } - - kfree_skb(skb); - return 0; -} - -/* - * Endnode hello message received - */ -int dn_neigh_endnode_hello(struct net *net, struct sock *sk, struct sk_buff *skb) -{ - struct endnode_hello_message *msg = (struct endnode_hello_message *)skb->data; - struct neighbour *neigh; - struct dn_neigh *dn; - __le16 src; - - src = dn_eth2dn(msg->id); - - neigh = __neigh_lookup(&dn_neigh_table, &src, skb->dev, 1); - - dn = container_of(neigh, struct dn_neigh, n); - - if (neigh) { - write_lock(&neigh->lock); - - neigh->used = jiffies; - - if (!(neigh->nud_state & NUD_PERMANENT)) { - neigh->updated = jiffies; - - if (neigh->dev->type == ARPHRD_ETHER) - memcpy(neigh->ha, ð_hdr(skb)->h_source, ETH_ALEN); - dn->flags &= ~(DN_NDFLAG_R1 | DN_NDFLAG_R2); - dn->blksize = le16_to_cpu(msg->blksize); - dn->priority = 0; - } - - write_unlock(&neigh->lock); - neigh_release(neigh); - } - - kfree_skb(skb); - return 0; -} - -static char *dn_find_slot(char *base, int max, int priority) -{ - int i; - unsigned char *min = NULL; - - base += 6; /* skip first id */ - - for(i = 0; i < max; i++) { - if (!min || (*base < *min)) - min = base; - base += 7; /* find next priority */ - } - - if (!min) - return NULL; - - return (*min < priority) ? (min - 6) : NULL; -} - -struct elist_cb_state { - struct net_device *dev; - unsigned char *ptr; - unsigned char *rs; - int t, n; -}; - -static void neigh_elist_cb(struct neighbour *neigh, void *_info) -{ - struct elist_cb_state *s = _info; - struct dn_neigh *dn; - - if (neigh->dev != s->dev) - return; - - dn = container_of(neigh, struct dn_neigh, n); - if (!(dn->flags & (DN_NDFLAG_R1|DN_NDFLAG_R2))) - return; - - if (s->t == s->n) - s->rs = dn_find_slot(s->ptr, s->n, dn->priority); - else - s->t++; - if (s->rs == NULL) - return; - - dn_dn2eth(s->rs, dn->addr); - s->rs += 6; - *(s->rs) = neigh->nud_state & NUD_CONNECTED ? 0x80 : 0x0; - *(s->rs) |= dn->priority; - s->rs++; -} - -int dn_neigh_elist(struct net_device *dev, unsigned char *ptr, int n) -{ - struct elist_cb_state state; - - state.dev = dev; - state.t = 0; - state.n = n; - state.ptr = ptr; - state.rs = ptr; - - neigh_for_each(&dn_neigh_table, neigh_elist_cb, &state); - - return state.t; -} - - -#ifdef CONFIG_PROC_FS - -static inline void dn_neigh_format_entry(struct seq_file *seq, - struct neighbour *n) -{ - struct dn_neigh *dn = container_of(n, struct dn_neigh, n); - char buf[DN_ASCBUF_LEN]; - - read_lock(&n->lock); - seq_printf(seq, "%-7s %s%s%s %02x %02d %07ld %-8s\n", - dn_addr2asc(le16_to_cpu(dn->addr), buf), - (dn->flags&DN_NDFLAG_R1) ? "1" : "-", - (dn->flags&DN_NDFLAG_R2) ? "2" : "-", - (dn->flags&DN_NDFLAG_P3) ? "3" : "-", - dn->n.nud_state, - refcount_read(&dn->n.refcnt), - dn->blksize, - (dn->n.dev) ? dn->n.dev->name : "?"); - read_unlock(&n->lock); -} - -static int dn_neigh_seq_show(struct seq_file *seq, void *v) -{ - if (v == SEQ_START_TOKEN) { - seq_puts(seq, "Addr Flags State Use Blksize Dev\n"); - } else { - dn_neigh_format_entry(seq, v); - } - - return 0; -} - -static void *dn_neigh_seq_start(struct seq_file *seq, loff_t *pos) -{ - return neigh_seq_start(seq, pos, &dn_neigh_table, - NEIGH_SEQ_NEIGH_ONLY); -} - -static const struct seq_operations dn_neigh_seq_ops = { - .start = dn_neigh_seq_start, - .next = neigh_seq_next, - .stop = neigh_seq_stop, - .show = dn_neigh_seq_show, -}; -#endif - -void __init dn_neigh_init(void) -{ - neigh_table_init(NEIGH_DN_TABLE, &dn_neigh_table); - proc_create_net("decnet_neigh", 0444, init_net.proc_net, - &dn_neigh_seq_ops, sizeof(struct neigh_seq_state)); -} - -void __exit dn_neigh_cleanup(void) -{ - remove_proc_entry("decnet_neigh", init_net.proc_net); - neigh_table_clear(NEIGH_DN_TABLE, &dn_neigh_table); -} diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c deleted file mode 100644 index c59be5b04479..000000000000 --- a/net/decnet/dn_nsp_in.c +++ /dev/null @@ -1,907 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * DECnet An implementation of the DECnet protocol suite for the LINUX - * operating system. DECnet is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * DECnet Network Services Protocol (Input) - * - * Author: Eduardo Marcelo Serrat - * - * Changes: - * - * Steve Whitehouse: Split into dn_nsp_in.c and dn_nsp_out.c from - * original dn_nsp.c. - * Steve Whitehouse: Updated to work with my new routing architecture. - * Steve Whitehouse: Add changes from Eduardo Serrat's patches. - * Steve Whitehouse: Put all ack handling code in a common routine. - * Steve Whitehouse: Put other common bits into dn_nsp_rx() - * Steve Whitehouse: More checks on skb->len to catch bogus packets - * Fixed various race conditions and possible nasties. - * Steve Whitehouse: Now handles returned conninit frames. - * David S. Miller: New socket locking - * Steve Whitehouse: Fixed lockup when socket filtering was enabled. - * Paul Koning: Fix to push CC sockets into RUN when acks are - * received. - * Steve Whitehouse: - * Patrick Caulfield: Checking conninits for correctness & sending of error - * responses. - * Steve Whitehouse: Added backlog congestion level return codes. - * Patrick Caulfield: - * Steve Whitehouse: Added flow control support (outbound) - * Steve Whitehouse: Prepare for nonlinear skbs - */ - -/****************************************************************************** - (c) 1995-1998 E.M. Serrat emserrat@geocities.com - -*******************************************************************************/ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -extern int decnet_log_martians; - -static void dn_log_martian(struct sk_buff *skb, const char *msg) -{ - if (decnet_log_martians) { - char *devname = skb->dev ? skb->dev->name : "???"; - struct dn_skb_cb *cb = DN_SKB_CB(skb); - net_info_ratelimited("DECnet: Martian packet (%s) dev=%s src=0x%04hx dst=0x%04hx srcport=0x%04hx dstport=0x%04hx\n", - msg, devname, - le16_to_cpu(cb->src), - le16_to_cpu(cb->dst), - le16_to_cpu(cb->src_port), - le16_to_cpu(cb->dst_port)); - } -} - -/* - * For this function we've flipped the cross-subchannel bit - * if the message is an otherdata or linkservice message. Thus - * we can use it to work out what to update. - */ -static void dn_ack(struct sock *sk, struct sk_buff *skb, unsigned short ack) -{ - struct dn_scp *scp = DN_SK(sk); - unsigned short type = ((ack >> 12) & 0x0003); - int wakeup = 0; - - switch (type) { - case 0: /* ACK - Data */ - if (dn_after(ack, scp->ackrcv_dat)) { - scp->ackrcv_dat = ack & 0x0fff; - wakeup |= dn_nsp_check_xmit_queue(sk, skb, - &scp->data_xmit_queue, - ack); - } - break; - case 1: /* NAK - Data */ - break; - case 2: /* ACK - OtherData */ - if (dn_after(ack, scp->ackrcv_oth)) { - scp->ackrcv_oth = ack & 0x0fff; - wakeup |= dn_nsp_check_xmit_queue(sk, skb, - &scp->other_xmit_queue, - ack); - } - break; - case 3: /* NAK - OtherData */ - break; - } - - if (wakeup && !sock_flag(sk, SOCK_DEAD)) - sk->sk_state_change(sk); -} - -/* - * This function is a universal ack processor. - */ -static int dn_process_ack(struct sock *sk, struct sk_buff *skb, int oth) -{ - __le16 *ptr = (__le16 *)skb->data; - int len = 0; - unsigned short ack; - - if (skb->len < 2) - return len; - - if ((ack = le16_to_cpu(*ptr)) & 0x8000) { - skb_pull(skb, 2); - ptr++; - len += 2; - if ((ack & 0x4000) == 0) { - if (oth) - ack ^= 0x2000; - dn_ack(sk, skb, ack); - } - } - - if (skb->len < 2) - return len; - - if ((ack = le16_to_cpu(*ptr)) & 0x8000) { - skb_pull(skb, 2); - len += 2; - if ((ack & 0x4000) == 0) { - if (oth) - ack ^= 0x2000; - dn_ack(sk, skb, ack); - } - } - - return len; -} - - -/** - * dn_check_idf - Check an image data field format is correct. - * @pptr: Pointer to pointer to image data - * @len: Pointer to length of image data - * @max: The maximum allowed length of the data in the image data field - * @follow_on: Check that this many bytes exist beyond the end of the image data - * - * Returns: 0 if ok, -1 on error - */ -static inline int dn_check_idf(unsigned char **pptr, int *len, unsigned char max, unsigned char follow_on) -{ - unsigned char *ptr = *pptr; - unsigned char flen = *ptr++; - - (*len)--; - if (flen > max) - return -1; - if ((flen + follow_on) > *len) - return -1; - - *len -= flen; - *pptr = ptr + flen; - return 0; -} - -/* - * Table of reason codes to pass back to node which sent us a badly - * formed message, plus text messages for the log. A zero entry in - * the reason field means "don't reply" otherwise a disc init is sent with - * the specified reason code. - */ -static struct { - unsigned short reason; - const char *text; -} ci_err_table[] = { - { 0, "CI: Truncated message" }, - { NSP_REASON_ID, "CI: Destination username error" }, - { NSP_REASON_ID, "CI: Destination username type" }, - { NSP_REASON_US, "CI: Source username error" }, - { 0, "CI: Truncated at menuver" }, - { 0, "CI: Truncated before access or user data" }, - { NSP_REASON_IO, "CI: Access data format error" }, - { NSP_REASON_IO, "CI: User data format error" } -}; - -/* - * This function uses a slightly different lookup method - * to find its sockets, since it searches on object name/number - * rather than port numbers. Various tests are done to ensure that - * the incoming data is in the correct format before it is queued to - * a socket. - */ -static struct sock *dn_find_listener(struct sk_buff *skb, unsigned short *reason) -{ - struct dn_skb_cb *cb = DN_SKB_CB(skb); - struct nsp_conn_init_msg *msg = (struct nsp_conn_init_msg *)skb->data; - struct sockaddr_dn dstaddr; - struct sockaddr_dn srcaddr; - unsigned char type = 0; - int dstlen; - int srclen; - unsigned char *ptr; - int len; - int err = 0; - unsigned char menuver; - - memset(&dstaddr, 0, sizeof(struct sockaddr_dn)); - memset(&srcaddr, 0, sizeof(struct sockaddr_dn)); - - /* - * 1. Decode & remove message header - */ - cb->src_port = msg->srcaddr; - cb->dst_port = msg->dstaddr; - cb->services = msg->services; - cb->info = msg->info; - cb->segsize = le16_to_cpu(msg->segsize); - - if (!pskb_may_pull(skb, sizeof(*msg))) - goto err_out; - - skb_pull(skb, sizeof(*msg)); - - len = skb->len; - ptr = skb->data; - - /* - * 2. Check destination end username format - */ - dstlen = dn_username2sockaddr(ptr, len, &dstaddr, &type); - err++; - if (dstlen < 0) - goto err_out; - - err++; - if (type > 1) - goto err_out; - - len -= dstlen; - ptr += dstlen; - - /* - * 3. Check source end username format - */ - srclen = dn_username2sockaddr(ptr, len, &srcaddr, &type); - err++; - if (srclen < 0) - goto err_out; - - len -= srclen; - ptr += srclen; - err++; - if (len < 1) - goto err_out; - - menuver = *ptr; - ptr++; - len--; - - /* - * 4. Check that optional data actually exists if menuver says it does - */ - err++; - if ((menuver & (DN_MENUVER_ACC | DN_MENUVER_USR)) && (len < 1)) - goto err_out; - - /* - * 5. Check optional access data format - */ - err++; - if (menuver & DN_MENUVER_ACC) { - if (dn_check_idf(&ptr, &len, 39, 1)) - goto err_out; - if (dn_check_idf(&ptr, &len, 39, 1)) - goto err_out; - if (dn_check_idf(&ptr, &len, 39, (menuver & DN_MENUVER_USR) ? 1 : 0)) - goto err_out; - } - - /* - * 6. Check optional user data format - */ - err++; - if (menuver & DN_MENUVER_USR) { - if (dn_check_idf(&ptr, &len, 16, 0)) - goto err_out; - } - - /* - * 7. Look up socket based on destination end username - */ - return dn_sklist_find_listener(&dstaddr); -err_out: - dn_log_martian(skb, ci_err_table[err].text); - *reason = ci_err_table[err].reason; - return NULL; -} - - -static void dn_nsp_conn_init(struct sock *sk, struct sk_buff *skb) -{ - if (sk_acceptq_is_full(sk)) { - kfree_skb(skb); - return; - } - - sk_acceptq_added(sk); - skb_queue_tail(&sk->sk_receive_queue, skb); - sk->sk_state_change(sk); -} - -static void dn_nsp_conn_conf(struct sock *sk, struct sk_buff *skb) -{ - struct dn_skb_cb *cb = DN_SKB_CB(skb); - struct dn_scp *scp = DN_SK(sk); - unsigned char *ptr; - - if (skb->len < 4) - goto out; - - ptr = skb->data; - cb->services = *ptr++; - cb->info = *ptr++; - cb->segsize = le16_to_cpu(*(__le16 *)ptr); - - if ((scp->state == DN_CI) || (scp->state == DN_CD)) { - scp->persist = 0; - scp->addrrem = cb->src_port; - sk->sk_state = TCP_ESTABLISHED; - scp->state = DN_RUN; - scp->services_rem = cb->services; - scp->info_rem = cb->info; - scp->segsize_rem = cb->segsize; - - if ((scp->services_rem & NSP_FC_MASK) == NSP_FC_NONE) - scp->max_window = decnet_no_fc_max_cwnd; - - if (skb->len > 0) { - u16 dlen = *skb->data; - if ((dlen <= 16) && (dlen <= skb->len)) { - scp->conndata_in.opt_optl = cpu_to_le16(dlen); - skb_copy_from_linear_data_offset(skb, 1, - scp->conndata_in.opt_data, dlen); - } - } - dn_nsp_send_link(sk, DN_NOCHANGE, 0); - if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_state_change(sk); - } - -out: - kfree_skb(skb); -} - -static void dn_nsp_conn_ack(struct sock *sk, struct sk_buff *skb) -{ - struct dn_scp *scp = DN_SK(sk); - - if (scp->state == DN_CI) { - scp->state = DN_CD; - scp->persist = 0; - } - - kfree_skb(skb); -} - -static void dn_nsp_disc_init(struct sock *sk, struct sk_buff *skb) -{ - struct dn_scp *scp = DN_SK(sk); - struct dn_skb_cb *cb = DN_SKB_CB(skb); - unsigned short reason; - - if (skb->len < 2) - goto out; - - reason = le16_to_cpu(*(__le16 *)skb->data); - skb_pull(skb, 2); - - scp->discdata_in.opt_status = cpu_to_le16(reason); - scp->discdata_in.opt_optl = 0; - memset(scp->discdata_in.opt_data, 0, 16); - - if (skb->len > 0) { - u16 dlen = *skb->data; - if ((dlen <= 16) && (dlen <= skb->len)) { - scp->discdata_in.opt_optl = cpu_to_le16(dlen); - skb_copy_from_linear_data_offset(skb, 1, scp->discdata_in.opt_data, dlen); - } - } - - scp->addrrem = cb->src_port; - sk->sk_state = TCP_CLOSE; - - switch (scp->state) { - case DN_CI: - case DN_CD: - scp->state = DN_RJ; - sk->sk_err = ECONNREFUSED; - break; - case DN_RUN: - sk->sk_shutdown |= SHUTDOWN_MASK; - scp->state = DN_DN; - break; - case DN_DI: - scp->state = DN_DIC; - break; - } - - if (!sock_flag(sk, SOCK_DEAD)) { - if (sk->sk_socket->state != SS_UNCONNECTED) - sk->sk_socket->state = SS_DISCONNECTING; - sk->sk_state_change(sk); - } - - /* - * It appears that its possible for remote machines to send disc - * init messages with no port identifier if we are in the CI and - * possibly also the CD state. Obviously we shouldn't reply with - * a message if we don't know what the end point is. - */ - if (scp->addrrem) { - dn_nsp_send_disc(sk, NSP_DISCCONF, NSP_REASON_DC, GFP_ATOMIC); - } - scp->persist_fxn = dn_destroy_timer; - scp->persist = dn_nsp_persist(sk); - -out: - kfree_skb(skb); -} - -/* - * disc_conf messages are also called no_resources or no_link - * messages depending upon the "reason" field. - */ -static void dn_nsp_disc_conf(struct sock *sk, struct sk_buff *skb) -{ - struct dn_scp *scp = DN_SK(sk); - unsigned short reason; - - if (skb->len != 2) - goto out; - - reason = le16_to_cpu(*(__le16 *)skb->data); - - sk->sk_state = TCP_CLOSE; - - switch (scp->state) { - case DN_CI: - scp->state = DN_NR; - break; - case DN_DR: - if (reason == NSP_REASON_DC) - scp->state = DN_DRC; - if (reason == NSP_REASON_NL) - scp->state = DN_CN; - break; - case DN_DI: - scp->state = DN_DIC; - break; - case DN_RUN: - sk->sk_shutdown |= SHUTDOWN_MASK; - fallthrough; - case DN_CC: - scp->state = DN_CN; - } - - if (!sock_flag(sk, SOCK_DEAD)) { - if (sk->sk_socket->state != SS_UNCONNECTED) - sk->sk_socket->state = SS_DISCONNECTING; - sk->sk_state_change(sk); - } - - scp->persist_fxn = dn_destroy_timer; - scp->persist = dn_nsp_persist(sk); - -out: - kfree_skb(skb); -} - -static void dn_nsp_linkservice(struct sock *sk, struct sk_buff *skb) -{ - struct dn_scp *scp = DN_SK(sk); - unsigned short segnum; - unsigned char lsflags; - signed char fcval; - int wake_up = 0; - char *ptr = skb->data; - unsigned char fctype = scp->services_rem & NSP_FC_MASK; - - if (skb->len != 4) - goto out; - - segnum = le16_to_cpu(*(__le16 *)ptr); - ptr += 2; - lsflags = *(unsigned char *)ptr++; - fcval = *ptr; - - /* - * Here we ignore erroneous packets which should really - * should cause a connection abort. It is not critical - * for now though. - */ - if (lsflags & 0xf8) - goto out; - - if (seq_next(scp->numoth_rcv, segnum)) { - seq_add(&scp->numoth_rcv, 1); - switch(lsflags & 0x04) { /* FCVAL INT */ - case 0x00: /* Normal Request */ - switch(lsflags & 0x03) { /* FCVAL MOD */ - case 0x00: /* Request count */ - if (fcval < 0) { - unsigned char p_fcval = -fcval; - if ((scp->flowrem_dat > p_fcval) && - (fctype == NSP_FC_SCMC)) { - scp->flowrem_dat -= p_fcval; - } - } else if (fcval > 0) { - scp->flowrem_dat += fcval; - wake_up = 1; - } - break; - case 0x01: /* Stop outgoing data */ - scp->flowrem_sw = DN_DONTSEND; - break; - case 0x02: /* Ok to start again */ - scp->flowrem_sw = DN_SEND; - dn_nsp_output(sk); - wake_up = 1; - } - break; - case 0x04: /* Interrupt Request */ - if (fcval > 0) { - scp->flowrem_oth += fcval; - wake_up = 1; - } - break; - } - if (wake_up && !sock_flag(sk, SOCK_DEAD)) - sk->sk_state_change(sk); - } - - dn_nsp_send_oth_ack(sk); - -out: - kfree_skb(skb); -} - -/* - * Copy of sock_queue_rcv_skb (from sock.h) without - * bh_lock_sock() (its already held when this is called) which - * also allows data and other data to be queued to a socket. - */ -static __inline__ int dn_queue_skb(struct sock *sk, struct sk_buff *skb, int sig, struct sk_buff_head *queue) -{ - int err; - - /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces - number of warnings when compiling with -W --ANK - */ - if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= - (unsigned int)sk->sk_rcvbuf) { - err = -ENOMEM; - goto out; - } - - err = sk_filter(sk, skb); - if (err) - goto out; - - skb_set_owner_r(skb, sk); - skb_queue_tail(queue, skb); - - if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk); -out: - return err; -} - -static void dn_nsp_otherdata(struct sock *sk, struct sk_buff *skb) -{ - struct dn_scp *scp = DN_SK(sk); - unsigned short segnum; - struct dn_skb_cb *cb = DN_SKB_CB(skb); - int queued = 0; - - if (skb->len < 2) - goto out; - - cb->segnum = segnum = le16_to_cpu(*(__le16 *)skb->data); - skb_pull(skb, 2); - - if (seq_next(scp->numoth_rcv, segnum)) { - - if (dn_queue_skb(sk, skb, SIGURG, &scp->other_receive_queue) == 0) { - seq_add(&scp->numoth_rcv, 1); - scp->other_report = 0; - queued = 1; - } - } - - dn_nsp_send_oth_ack(sk); -out: - if (!queued) - kfree_skb(skb); -} - -static void dn_nsp_data(struct sock *sk, struct sk_buff *skb) -{ - int queued = 0; - unsigned short segnum; - struct dn_skb_cb *cb = DN_SKB_CB(skb); - struct dn_scp *scp = DN_SK(sk); - - if (skb->len < 2) - goto out; - - cb->segnum = segnum = le16_to_cpu(*(__le16 *)skb->data); - skb_pull(skb, 2); - - if (seq_next(scp->numdat_rcv, segnum)) { - if (dn_queue_skb(sk, skb, SIGIO, &sk->sk_receive_queue) == 0) { - seq_add(&scp->numdat_rcv, 1); - queued = 1; - } - - if ((scp->flowloc_sw == DN_SEND) && dn_congested(sk)) { - scp->flowloc_sw = DN_DONTSEND; - dn_nsp_send_link(sk, DN_DONTSEND, 0); - } - } - - dn_nsp_send_data_ack(sk); -out: - if (!queued) - kfree_skb(skb); -} - -/* - * If one of our conninit messages is returned, this function - * deals with it. It puts the socket into the NO_COMMUNICATION - * state. - */ -static void dn_returned_conn_init(struct sock *sk, struct sk_buff *skb) -{ - struct dn_scp *scp = DN_SK(sk); - - if (scp->state == DN_CI) { - scp->state = DN_NC; - sk->sk_state = TCP_CLOSE; - if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_state_change(sk); - } - - kfree_skb(skb); -} - -static int dn_nsp_no_socket(struct sk_buff *skb, unsigned short reason) -{ - struct dn_skb_cb *cb = DN_SKB_CB(skb); - int ret = NET_RX_DROP; - - /* Must not reply to returned packets */ - if (cb->rt_flags & DN_RT_F_RTS) - goto out; - - if ((reason != NSP_REASON_OK) && ((cb->nsp_flags & 0x0c) == 0x08)) { - switch (cb->nsp_flags & 0x70) { - case 0x10: - case 0x60: /* (Retransmitted) Connect Init */ - dn_nsp_return_disc(skb, NSP_DISCINIT, reason); - ret = NET_RX_SUCCESS; - break; - case 0x20: /* Connect Confirm */ - dn_nsp_return_disc(skb, NSP_DISCCONF, reason); - ret = NET_RX_SUCCESS; - break; - } - } - -out: - kfree_skb(skb); - return ret; -} - -static int dn_nsp_rx_packet(struct net *net, struct sock *sk2, - struct sk_buff *skb) -{ - struct dn_skb_cb *cb = DN_SKB_CB(skb); - struct sock *sk = NULL; - unsigned char *ptr = (unsigned char *)skb->data; - unsigned short reason = NSP_REASON_NL; - - if (!pskb_may_pull(skb, 2)) - goto free_out; - - skb_reset_transport_header(skb); - cb->nsp_flags = *ptr++; - - if (decnet_debug_level & 2) - printk(KERN_DEBUG "dn_nsp_rx: Message type 0x%02x\n", (int)cb->nsp_flags); - - if (cb->nsp_flags & 0x83) - goto free_out; - - /* - * Filter out conninits and useless packet types - */ - if ((cb->nsp_flags & 0x0c) == 0x08) { - switch (cb->nsp_flags & 0x70) { - case 0x00: /* NOP */ - case 0x70: /* Reserved */ - case 0x50: /* Reserved, Phase II node init */ - goto free_out; - case 0x10: - case 0x60: - if (unlikely(cb->rt_flags & DN_RT_F_RTS)) - goto free_out; - sk = dn_find_listener(skb, &reason); - goto got_it; - } - } - - if (!pskb_may_pull(skb, 3)) - goto free_out; - - /* - * Grab the destination address. - */ - cb->dst_port = *(__le16 *)ptr; - cb->src_port = 0; - ptr += 2; - - /* - * If not a connack, grab the source address too. - */ - if (pskb_may_pull(skb, 5)) { - cb->src_port = *(__le16 *)ptr; - ptr += 2; - skb_pull(skb, 5); - } - - /* - * Returned packets... - * Swap src & dst and look up in the normal way. - */ - if (unlikely(cb->rt_flags & DN_RT_F_RTS)) { - swap(cb->dst_port, cb->src_port); - swap(cb->dst, cb->src); - } - - /* - * Find the socket to which this skb is destined. - */ - sk = dn_find_by_skb(skb); -got_it: - if (sk != NULL) { - struct dn_scp *scp = DN_SK(sk); - - /* Reset backoff */ - scp->nsp_rxtshift = 0; - - /* - * We linearize everything except data segments here. - */ - if (cb->nsp_flags & ~0x60) { - if (unlikely(skb_linearize(skb))) - goto free_out; - } - - return sk_receive_skb(sk, skb, 0); - } - - return dn_nsp_no_socket(skb, reason); - -free_out: - kfree_skb(skb); - return NET_RX_DROP; -} - -int dn_nsp_rx(struct sk_buff *skb) -{ - return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_IN, - &init_net, NULL, skb, skb->dev, NULL, - dn_nsp_rx_packet); -} - -/* - * This is the main receive routine for sockets. It is called - * from the above when the socket is not busy, and also from - * sock_release() when there is a backlog queued up. - */ -int dn_nsp_backlog_rcv(struct sock *sk, struct sk_buff *skb) -{ - struct dn_scp *scp = DN_SK(sk); - struct dn_skb_cb *cb = DN_SKB_CB(skb); - - if (cb->rt_flags & DN_RT_F_RTS) { - if (cb->nsp_flags == 0x18 || cb->nsp_flags == 0x68) - dn_returned_conn_init(sk, skb); - else - kfree_skb(skb); - return NET_RX_SUCCESS; - } - - /* - * Control packet. - */ - if ((cb->nsp_flags & 0x0c) == 0x08) { - switch (cb->nsp_flags & 0x70) { - case 0x10: - case 0x60: - dn_nsp_conn_init(sk, skb); - break; - case 0x20: - dn_nsp_conn_conf(sk, skb); - break; - case 0x30: - dn_nsp_disc_init(sk, skb); - break; - case 0x40: - dn_nsp_disc_conf(sk, skb); - break; - } - - } else if (cb->nsp_flags == 0x24) { - /* - * Special for connacks, 'cos they don't have - * ack data or ack otherdata info. - */ - dn_nsp_conn_ack(sk, skb); - } else { - int other = 1; - - /* both data and ack frames can kick a CC socket into RUN */ - if ((scp->state == DN_CC) && !sock_flag(sk, SOCK_DEAD)) { - scp->state = DN_RUN; - sk->sk_state = TCP_ESTABLISHED; - sk->sk_state_change(sk); - } - - if ((cb->nsp_flags & 0x1c) == 0) - other = 0; - if (cb->nsp_flags == 0x04) - other = 0; - - /* - * Read out ack data here, this applies equally - * to data, other data, link service and both - * ack data and ack otherdata. - */ - dn_process_ack(sk, skb, other); - - /* - * If we've some sort of data here then call a - * suitable routine for dealing with it, otherwise - * the packet is an ack and can be discarded. - */ - if ((cb->nsp_flags & 0x0c) == 0) { - - if (scp->state != DN_RUN) - goto free_out; - - switch (cb->nsp_flags) { - case 0x10: /* LS */ - dn_nsp_linkservice(sk, skb); - break; - case 0x30: /* OD */ - dn_nsp_otherdata(sk, skb); - break; - default: - dn_nsp_data(sk, skb); - } - - } else { /* Ack, chuck it out here */ -free_out: - kfree_skb(skb); - } - } - - return NET_RX_SUCCESS; -} diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c deleted file mode 100644 index b05639bdfc8f..000000000000 --- a/net/decnet/dn_nsp_out.c +++ /dev/null @@ -1,696 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * DECnet An implementation of the DECnet protocol suite for the LINUX - * operating system. DECnet is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * DECnet Network Services Protocol (Output) - * - * Author: Eduardo Marcelo Serrat - * - * Changes: - * - * Steve Whitehouse: Split into dn_nsp_in.c and dn_nsp_out.c from - * original dn_nsp.c. - * Steve Whitehouse: Updated to work with my new routing architecture. - * Steve Whitehouse: Added changes from Eduardo Serrat's patches. - * Steve Whitehouse: Now conninits have the "return" bit set. - * Steve Whitehouse: Fixes to check alloc'd skbs are non NULL! - * Moved output state machine into one function - * Steve Whitehouse: New output state machine - * Paul Koning: Connect Confirm message fix. - * Eduardo Serrat: Fix to stop dn_nsp_do_disc() sending malformed packets. - * Steve Whitehouse: dn_nsp_output() and friends needed a spring clean - * Steve Whitehouse: Moved dn_nsp_send() in here from route.h - */ - -/****************************************************************************** - (c) 1995-1998 E.M. Serrat emserrat@geocities.com - -*******************************************************************************/ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -static int nsp_backoff[NSP_MAXRXTSHIFT + 1] = { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 }; - -static void dn_nsp_send(struct sk_buff *skb) -{ - struct sock *sk = skb->sk; - struct dn_scp *scp = DN_SK(sk); - struct dst_entry *dst; - struct flowidn fld; - - skb_reset_transport_header(skb); - scp->stamp = jiffies; - - dst = sk_dst_check(sk, 0); - if (dst) { -try_again: - skb_dst_set(skb, dst); - dst_output(&init_net, skb->sk, skb); - return; - } - - memset(&fld, 0, sizeof(fld)); - fld.flowidn_oif = sk->sk_bound_dev_if; - fld.saddr = dn_saddr2dn(&scp->addr); - fld.daddr = dn_saddr2dn(&scp->peer); - dn_sk_ports_copy(&fld, scp); - fld.flowidn_proto = DNPROTO_NSP; - if (dn_route_output_sock(&sk->sk_dst_cache, &fld, sk, 0) == 0) { - dst = sk_dst_get(sk); - sk->sk_route_caps = dst->dev->features; - goto try_again; - } - - sk->sk_err = EHOSTUNREACH; - if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_state_change(sk); -} - - -/* - * If sk == NULL, then we assume that we are supposed to be making - * a routing layer skb. If sk != NULL, then we are supposed to be - * creating an skb for the NSP layer. - * - * The eventual aim is for each socket to have a cached header size - * for its outgoing packets, and to set hdr from this when sk != NULL. - */ -struct sk_buff *dn_alloc_skb(struct sock *sk, int size, gfp_t pri) -{ - struct sk_buff *skb; - int hdr = 64; - - if ((skb = alloc_skb(size + hdr, pri)) == NULL) - return NULL; - - skb->protocol = htons(ETH_P_DNA_RT); - skb->pkt_type = PACKET_OUTGOING; - - if (sk) - skb_set_owner_w(skb, sk); - - skb_reserve(skb, hdr); - - return skb; -} - -/* - * Calculate persist timer based upon the smoothed round - * trip time and the variance. Backoff according to the - * nsp_backoff[] array. - */ -unsigned long dn_nsp_persist(struct sock *sk) -{ - struct dn_scp *scp = DN_SK(sk); - - unsigned long t = ((scp->nsp_srtt >> 2) + scp->nsp_rttvar) >> 1; - - t *= nsp_backoff[scp->nsp_rxtshift]; - - if (t < HZ) t = HZ; - if (t > (600*HZ)) t = (600*HZ); - - if (scp->nsp_rxtshift < NSP_MAXRXTSHIFT) - scp->nsp_rxtshift++; - - /* printk(KERN_DEBUG "rxtshift %lu, t=%lu\n", scp->nsp_rxtshift, t); */ - - return t; -} - -/* - * This is called each time we get an estimate for the rtt - * on the link. - */ -static void dn_nsp_rtt(struct sock *sk, long rtt) -{ - struct dn_scp *scp = DN_SK(sk); - long srtt = (long)scp->nsp_srtt; - long rttvar = (long)scp->nsp_rttvar; - long delta; - - /* - * If the jiffies clock flips over in the middle of timestamp - * gathering this value might turn out negative, so we make sure - * that is it always positive here. - */ - if (rtt < 0) - rtt = -rtt; - /* - * Add new rtt to smoothed average - */ - delta = ((rtt << 3) - srtt); - srtt += (delta >> 3); - if (srtt >= 1) - scp->nsp_srtt = (unsigned long)srtt; - else - scp->nsp_srtt = 1; - - /* - * Add new rtt variance to smoothed varience - */ - delta >>= 1; - rttvar += ((((delta>0)?(delta):(-delta)) - rttvar) >> 2); - if (rttvar >= 1) - scp->nsp_rttvar = (unsigned long)rttvar; - else - scp->nsp_rttvar = 1; - - /* printk(KERN_DEBUG "srtt=%lu rttvar=%lu\n", scp->nsp_srtt, scp->nsp_rttvar); */ -} - -/** - * dn_nsp_clone_and_send - Send a data packet by cloning it - * @skb: The packet to clone and transmit - * @gfp: memory allocation flag - * - * Clone a queued data or other data packet and transmit it. - * - * Returns: The number of times the packet has been sent previously - */ -static inline unsigned int dn_nsp_clone_and_send(struct sk_buff *skb, - gfp_t gfp) -{ - struct dn_skb_cb *cb = DN_SKB_CB(skb); - struct sk_buff *skb2; - int ret = 0; - - if ((skb2 = skb_clone(skb, gfp)) != NULL) { - ret = cb->xmit_count; - cb->xmit_count++; - cb->stamp = jiffies; - skb2->sk = skb->sk; - dn_nsp_send(skb2); - } - - return ret; -} - -/** - * dn_nsp_output - Try and send something from socket queues - * @sk: The socket whose queues are to be investigated - * - * Try and send the packet on the end of the data and other data queues. - * Other data gets priority over data, and if we retransmit a packet we - * reduce the window by dividing it in two. - * - */ -void dn_nsp_output(struct sock *sk) -{ - struct dn_scp *scp = DN_SK(sk); - struct sk_buff *skb; - unsigned int reduce_win = 0; - - /* - * First we check for otherdata/linkservice messages - */ - if ((skb = skb_peek(&scp->other_xmit_queue)) != NULL) - reduce_win = dn_nsp_clone_and_send(skb, GFP_ATOMIC); - - /* - * If we may not send any data, we don't. - * If we are still trying to get some other data down the - * channel, we don't try and send any data. - */ - if (reduce_win || (scp->flowrem_sw != DN_SEND)) - goto recalc_window; - - if ((skb = skb_peek(&scp->data_xmit_queue)) != NULL) - reduce_win = dn_nsp_clone_and_send(skb, GFP_ATOMIC); - - /* - * If we've sent any frame more than once, we cut the - * send window size in half. There is always a minimum - * window size of one available. - */ -recalc_window: - if (reduce_win) { - scp->snd_window >>= 1; - if (scp->snd_window < NSP_MIN_WINDOW) - scp->snd_window = NSP_MIN_WINDOW; - } -} - -int dn_nsp_xmit_timeout(struct sock *sk) -{ - struct dn_scp *scp = DN_SK(sk); - - dn_nsp_output(sk); - - if (!skb_queue_empty(&scp->data_xmit_queue) || - !skb_queue_empty(&scp->other_xmit_queue)) - scp->persist = dn_nsp_persist(sk); - - return 0; -} - -static inline __le16 *dn_mk_common_header(struct dn_scp *scp, struct sk_buff *skb, unsigned char msgflag, int len) -{ - unsigned char *ptr = skb_push(skb, len); - - BUG_ON(len < 5); - - *ptr++ = msgflag; - *((__le16 *)ptr) = scp->addrrem; - ptr += 2; - *((__le16 *)ptr) = scp->addrloc; - ptr += 2; - return (__le16 __force *)ptr; -} - -static __le16 *dn_mk_ack_header(struct sock *sk, struct sk_buff *skb, unsigned char msgflag, int hlen, int other) -{ - struct dn_scp *scp = DN_SK(sk); - unsigned short acknum = scp->numdat_rcv & 0x0FFF; - unsigned short ackcrs = scp->numoth_rcv & 0x0FFF; - __le16 *ptr; - - BUG_ON(hlen < 9); - - scp->ackxmt_dat = acknum; - scp->ackxmt_oth = ackcrs; - acknum |= 0x8000; - ackcrs |= 0x8000; - - /* If this is an "other data/ack" message, swap acknum and ackcrs */ - if (other) - swap(acknum, ackcrs); - - /* Set "cross subchannel" bit in ackcrs */ - ackcrs |= 0x2000; - - ptr = dn_mk_common_header(scp, skb, msgflag, hlen); - - *ptr++ = cpu_to_le16(acknum); - *ptr++ = cpu_to_le16(ackcrs); - - return ptr; -} - -static __le16 *dn_nsp_mk_data_header(struct sock *sk, struct sk_buff *skb, int oth) -{ - struct dn_scp *scp = DN_SK(sk); - struct dn_skb_cb *cb = DN_SKB_CB(skb); - __le16 *ptr = dn_mk_ack_header(sk, skb, cb->nsp_flags, 11, oth); - - if (unlikely(oth)) { - cb->segnum = scp->numoth; - seq_add(&scp->numoth, 1); - } else { - cb->segnum = scp->numdat; - seq_add(&scp->numdat, 1); - } - *(ptr++) = cpu_to_le16(cb->segnum); - - return ptr; -} - -void dn_nsp_queue_xmit(struct sock *sk, struct sk_buff *skb, - gfp_t gfp, int oth) -{ - struct dn_scp *scp = DN_SK(sk); - struct dn_skb_cb *cb = DN_SKB_CB(skb); - unsigned long t = ((scp->nsp_srtt >> 2) + scp->nsp_rttvar) >> 1; - - cb->xmit_count = 0; - dn_nsp_mk_data_header(sk, skb, oth); - - /* - * Slow start: If we have been idle for more than - * one RTT, then reset window to min size. - */ - if (time_is_before_jiffies(scp->stamp + t)) - scp->snd_window = NSP_MIN_WINDOW; - - if (oth) - skb_queue_tail(&scp->other_xmit_queue, skb); - else - skb_queue_tail(&scp->data_xmit_queue, skb); - - if (scp->flowrem_sw != DN_SEND) - return; - - dn_nsp_clone_and_send(skb, gfp); -} - - -int dn_nsp_check_xmit_queue(struct sock *sk, struct sk_buff *skb, struct sk_buff_head *q, unsigned short acknum) -{ - struct dn_skb_cb *cb = DN_SKB_CB(skb); - struct dn_scp *scp = DN_SK(sk); - struct sk_buff *skb2, *n, *ack = NULL; - int wakeup = 0; - int try_retrans = 0; - unsigned long reftime = cb->stamp; - unsigned long pkttime; - unsigned short xmit_count; - unsigned short segnum; - - skb_queue_walk_safe(q, skb2, n) { - struct dn_skb_cb *cb2 = DN_SKB_CB(skb2); - - if (dn_before_or_equal(cb2->segnum, acknum)) - ack = skb2; - - /* printk(KERN_DEBUG "ack: %s %04x %04x\n", ack ? "ACK" : "SKIP", (int)cb2->segnum, (int)acknum); */ - - if (ack == NULL) - continue; - - /* printk(KERN_DEBUG "check_xmit_queue: %04x, %d\n", acknum, cb2->xmit_count); */ - - /* Does _last_ packet acked have xmit_count > 1 */ - try_retrans = 0; - /* Remember to wake up the sending process */ - wakeup = 1; - /* Keep various statistics */ - pkttime = cb2->stamp; - xmit_count = cb2->xmit_count; - segnum = cb2->segnum; - /* Remove and drop ack'ed packet */ - skb_unlink(ack, q); - kfree_skb(ack); - ack = NULL; - - /* - * We don't expect to see acknowledgements for packets we - * haven't sent yet. - */ - WARN_ON(xmit_count == 0); - - /* - * If the packet has only been sent once, we can use it - * to calculate the RTT and also open the window a little - * further. - */ - if (xmit_count == 1) { - if (dn_equal(segnum, acknum)) - dn_nsp_rtt(sk, (long)(pkttime - reftime)); - - if (scp->snd_window < scp->max_window) - scp->snd_window++; - } - - /* - * Packet has been sent more than once. If this is the last - * packet to be acknowledged then we want to send the next - * packet in the send queue again (assumes the remote host does - * go-back-N error control). - */ - if (xmit_count > 1) - try_retrans = 1; - } - - if (try_retrans) - dn_nsp_output(sk); - - return wakeup; -} - -void dn_nsp_send_data_ack(struct sock *sk) -{ - struct sk_buff *skb = NULL; - - if ((skb = dn_alloc_skb(sk, 9, GFP_ATOMIC)) == NULL) - return; - - skb_reserve(skb, 9); - dn_mk_ack_header(sk, skb, 0x04, 9, 0); - dn_nsp_send(skb); -} - -void dn_nsp_send_oth_ack(struct sock *sk) -{ - struct sk_buff *skb = NULL; - - if ((skb = dn_alloc_skb(sk, 9, GFP_ATOMIC)) == NULL) - return; - - skb_reserve(skb, 9); - dn_mk_ack_header(sk, skb, 0x14, 9, 1); - dn_nsp_send(skb); -} - - -void dn_send_conn_ack (struct sock *sk) -{ - struct dn_scp *scp = DN_SK(sk); - struct sk_buff *skb = NULL; - struct nsp_conn_ack_msg *msg; - - if ((skb = dn_alloc_skb(sk, 3, sk->sk_allocation)) == NULL) - return; - - msg = skb_put(skb, 3); - msg->msgflg = 0x24; - msg->dstaddr = scp->addrrem; - - dn_nsp_send(skb); -} - -static int dn_nsp_retrans_conn_conf(struct sock *sk) -{ - struct dn_scp *scp = DN_SK(sk); - - if (scp->state == DN_CC) - dn_send_conn_conf(sk, GFP_ATOMIC); - - return 0; -} - -void dn_send_conn_conf(struct sock *sk, gfp_t gfp) -{ - struct dn_scp *scp = DN_SK(sk); - struct sk_buff *skb = NULL; - struct nsp_conn_init_msg *msg; - __u8 len = (__u8)le16_to_cpu(scp->conndata_out.opt_optl); - - if ((skb = dn_alloc_skb(sk, 50 + len, gfp)) == NULL) - return; - - msg = skb_put(skb, sizeof(*msg)); - msg->msgflg = 0x28; - msg->dstaddr = scp->addrrem; - msg->srcaddr = scp->addrloc; - msg->services = scp->services_loc; - msg->info = scp->info_loc; - msg->segsize = cpu_to_le16(scp->segsize_loc); - - skb_put_u8(skb, len); - - if (len > 0) - skb_put_data(skb, scp->conndata_out.opt_data, len); - - - dn_nsp_send(skb); - - scp->persist = dn_nsp_persist(sk); - scp->persist_fxn = dn_nsp_retrans_conn_conf; -} - - -static __inline__ void dn_nsp_do_disc(struct sock *sk, unsigned char msgflg, - unsigned short reason, gfp_t gfp, - struct dst_entry *dst, - int ddl, unsigned char *dd, __le16 rem, __le16 loc) -{ - struct sk_buff *skb = NULL; - int size = 7 + ddl + ((msgflg == NSP_DISCINIT) ? 1 : 0); - unsigned char *msg; - - if ((dst == NULL) || (rem == 0)) { - net_dbg_ratelimited("DECnet: dn_nsp_do_disc: BUG! Please report this to SteveW@ACM.org rem=%u dst=%p\n", - le16_to_cpu(rem), dst); - return; - } - - if ((skb = dn_alloc_skb(sk, size, gfp)) == NULL) - return; - - msg = skb_put(skb, size); - *msg++ = msgflg; - *(__le16 *)msg = rem; - msg += 2; - *(__le16 *)msg = loc; - msg += 2; - *(__le16 *)msg = cpu_to_le16(reason); - msg += 2; - if (msgflg == NSP_DISCINIT) - *msg++ = ddl; - - if (ddl) { - memcpy(msg, dd, ddl); - } - - /* - * This doesn't go via the dn_nsp_send() function since we need - * to be able to send disc packets out which have no socket - * associations. - */ - skb_dst_set(skb, dst_clone(dst)); - dst_output(&init_net, skb->sk, skb); -} - - -void dn_nsp_send_disc(struct sock *sk, unsigned char msgflg, - unsigned short reason, gfp_t gfp) -{ - struct dn_scp *scp = DN_SK(sk); - int ddl = 0; - - if (msgflg == NSP_DISCINIT) - ddl = le16_to_cpu(scp->discdata_out.opt_optl); - - if (reason == 0) - reason = le16_to_cpu(scp->discdata_out.opt_status); - - dn_nsp_do_disc(sk, msgflg, reason, gfp, __sk_dst_get(sk), ddl, - scp->discdata_out.opt_data, scp->addrrem, scp->addrloc); -} - - -void dn_nsp_return_disc(struct sk_buff *skb, unsigned char msgflg, - unsigned short reason) -{ - struct dn_skb_cb *cb = DN_SKB_CB(skb); - int ddl = 0; - gfp_t gfp = GFP_ATOMIC; - - dn_nsp_do_disc(NULL, msgflg, reason, gfp, skb_dst(skb), ddl, - NULL, cb->src_port, cb->dst_port); -} - - -void dn_nsp_send_link(struct sock *sk, unsigned char lsflags, char fcval) -{ - struct dn_scp *scp = DN_SK(sk); - struct sk_buff *skb; - unsigned char *ptr; - gfp_t gfp = GFP_ATOMIC; - - if ((skb = dn_alloc_skb(sk, DN_MAX_NSP_DATA_HEADER + 2, gfp)) == NULL) - return; - - skb_reserve(skb, DN_MAX_NSP_DATA_HEADER); - ptr = skb_put(skb, 2); - DN_SKB_CB(skb)->nsp_flags = 0x10; - *ptr++ = lsflags; - *ptr = fcval; - - dn_nsp_queue_xmit(sk, skb, gfp, 1); - - scp->persist = dn_nsp_persist(sk); - scp->persist_fxn = dn_nsp_xmit_timeout; -} - -static int dn_nsp_retrans_conninit(struct sock *sk) -{ - struct dn_scp *scp = DN_SK(sk); - - if (scp->state == DN_CI) - dn_nsp_send_conninit(sk, NSP_RCI); - - return 0; -} - -void dn_nsp_send_conninit(struct sock *sk, unsigned char msgflg) -{ - struct dn_scp *scp = DN_SK(sk); - struct nsp_conn_init_msg *msg; - unsigned char aux; - unsigned char menuver; - struct dn_skb_cb *cb; - unsigned char type = 1; - gfp_t allocation = (msgflg == NSP_CI) ? sk->sk_allocation : GFP_ATOMIC; - struct sk_buff *skb = dn_alloc_skb(sk, 200, allocation); - - if (!skb) - return; - - cb = DN_SKB_CB(skb); - msg = skb_put(skb, sizeof(*msg)); - - msg->msgflg = msgflg; - msg->dstaddr = 0x0000; /* Remote Node will assign it*/ - - msg->srcaddr = scp->addrloc; - msg->services = scp->services_loc; /* Requested flow control */ - msg->info = scp->info_loc; /* Version Number */ - msg->segsize = cpu_to_le16(scp->segsize_loc); /* Max segment size */ - - if (scp->peer.sdn_objnum) - type = 0; - - skb_put(skb, dn_sockaddr2username(&scp->peer, - skb_tail_pointer(skb), type)); - skb_put(skb, dn_sockaddr2username(&scp->addr, - skb_tail_pointer(skb), 2)); - - menuver = DN_MENUVER_ACC | DN_MENUVER_USR; - if (scp->peer.sdn_flags & SDF_PROXY) - menuver |= DN_MENUVER_PRX; - if (scp->peer.sdn_flags & SDF_UICPROXY) - menuver |= DN_MENUVER_UIC; - - skb_put_u8(skb, menuver); /* Menu Version */ - - aux = scp->accessdata.acc_userl; - skb_put_u8(skb, aux); - if (aux > 0) - skb_put_data(skb, scp->accessdata.acc_user, aux); - - aux = scp->accessdata.acc_passl; - skb_put_u8(skb, aux); - if (aux > 0) - skb_put_data(skb, scp->accessdata.acc_pass, aux); - - aux = scp->accessdata.acc_accl; - skb_put_u8(skb, aux); - if (aux > 0) - skb_put_data(skb, scp->accessdata.acc_acc, aux); - - aux = (__u8)le16_to_cpu(scp->conndata_out.opt_optl); - skb_put_u8(skb, aux); - if (aux > 0) - skb_put_data(skb, scp->conndata_out.opt_data, aux); - - scp->persist = dn_nsp_persist(sk); - scp->persist_fxn = dn_nsp_retrans_conninit; - - cb->rt_flags = DN_RT_F_RQR; - - dn_nsp_send(skb); -} diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c deleted file mode 100644 index ac2ee1689111..000000000000 --- a/net/decnet/dn_route.c +++ /dev/null @@ -1,1922 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * DECnet An implementation of the DECnet protocol suite for the LINUX - * operating system. DECnet is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * DECnet Routing Functions (Endnode and Router) - * - * Authors: Steve Whitehouse - * Eduardo Marcelo Serrat - * - * Changes: - * Steve Whitehouse : Fixes to allow "intra-ethernet" and - * "return-to-sender" bits on outgoing - * packets. - * Steve Whitehouse : Timeouts for cached routes. - * Steve Whitehouse : Use dst cache for input routes too. - * Steve Whitehouse : Fixed error values in dn_send_skb. - * Steve Whitehouse : Rework routing functions to better fit - * DECnet routing design - * Alexey Kuznetsov : New SMP locking - * Steve Whitehouse : More SMP locking changes & dn_cache_dump() - * Steve Whitehouse : Prerouting NF hook, now really is prerouting. - * Fixed possible skb leak in rtnetlink funcs. - * Steve Whitehouse : Dave Miller's dynamic hash table sizing and - * Alexey Kuznetsov's finer grained locking - * from ipv4/route.c. - * Steve Whitehouse : Routing is now starting to look like a - * sensible set of code now, mainly due to - * my copying the IPv4 routing code. The - * hooks here are modified and will continue - * to evolve for a while. - * Steve Whitehouse : Real SMP at last :-) Also new netfilter - * stuff. Look out raw sockets your days - * are numbered! - * Steve Whitehouse : Added return-to-sender functions. Added - * backlog congestion level return codes. - * Steve Whitehouse : Fixed bug where routes were set up with - * no ref count on net devices. - * Steve Whitehouse : RCU for the route cache - * Steve Whitehouse : Preparations for the flow cache - * Steve Whitehouse : Prepare for nonlinear skbs - */ - -/****************************************************************************** - (c) 1995-1998 E.M. Serrat emserrat@geocities.com - -*******************************************************************************/ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct dn_rt_hash_bucket { - struct dn_route __rcu *chain; - spinlock_t lock; -}; - -extern struct neigh_table dn_neigh_table; - - -static unsigned char dn_hiord_addr[6] = {0xAA, 0x00, 0x04, 0x00, 0x00, 0x00}; - -static const int dn_rt_min_delay = 2 * HZ; -static const int dn_rt_max_delay = 10 * HZ; -static const int dn_rt_mtu_expires = 10 * 60 * HZ; - -static unsigned long dn_rt_deadline; - -static int dn_dst_gc(struct dst_ops *ops); -static struct dst_entry *dn_dst_check(struct dst_entry *, __u32); -static unsigned int dn_dst_default_advmss(const struct dst_entry *dst); -static unsigned int dn_dst_mtu(const struct dst_entry *dst); -static void dn_dst_destroy(struct dst_entry *); -static void dn_dst_ifdown(struct dst_entry *, struct net_device *dev, int how); -static struct dst_entry *dn_dst_negative_advice(struct dst_entry *); -static void dn_dst_link_failure(struct sk_buff *); -static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb , u32 mtu, - bool confirm_neigh); -static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb); -static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, - struct sk_buff *skb, - const void *daddr); -static int dn_route_input(struct sk_buff *); -static void dn_run_flush(struct timer_list *unused); - -static struct dn_rt_hash_bucket *dn_rt_hash_table; -static unsigned int dn_rt_hash_mask; - -static struct timer_list dn_route_timer; -static DEFINE_TIMER(dn_rt_flush_timer, dn_run_flush); -int decnet_dst_gc_interval = 2; - -static struct dst_ops dn_dst_ops = { - .family = PF_DECnet, - .gc_thresh = 128, - .gc = dn_dst_gc, - .check = dn_dst_check, - .default_advmss = dn_dst_default_advmss, - .mtu = dn_dst_mtu, - .cow_metrics = dst_cow_metrics_generic, - .destroy = dn_dst_destroy, - .ifdown = dn_dst_ifdown, - .negative_advice = dn_dst_negative_advice, - .link_failure = dn_dst_link_failure, - .update_pmtu = dn_dst_update_pmtu, - .redirect = dn_dst_redirect, - .neigh_lookup = dn_dst_neigh_lookup, -}; - -static void dn_dst_destroy(struct dst_entry *dst) -{ - struct dn_route *rt = (struct dn_route *) dst; - - if (rt->n) - neigh_release(rt->n); - dst_destroy_metrics_generic(dst); -} - -static void dn_dst_ifdown(struct dst_entry *dst, struct net_device *dev, int how) -{ - if (how) { - struct dn_route *rt = (struct dn_route *) dst; - struct neighbour *n = rt->n; - - if (n && n->dev == dev) { - n->dev = blackhole_netdev; - dev_hold(n->dev); - dev_put(dev); - } - } -} - -static __inline__ unsigned int dn_hash(__le16 src, __le16 dst) -{ - __u16 tmp = (__u16 __force)(src ^ dst); - tmp ^= (tmp >> 3); - tmp ^= (tmp >> 5); - tmp ^= (tmp >> 10); - return dn_rt_hash_mask & (unsigned int)tmp; -} - -static void dn_dst_check_expire(struct timer_list *unused) -{ - int i; - struct dn_route *rt; - struct dn_route __rcu **rtp; - unsigned long now = jiffies; - unsigned long expire = 120 * HZ; - - for (i = 0; i <= dn_rt_hash_mask; i++) { - rtp = &dn_rt_hash_table[i].chain; - - spin_lock(&dn_rt_hash_table[i].lock); - while ((rt = rcu_dereference_protected(*rtp, - lockdep_is_held(&dn_rt_hash_table[i].lock))) != NULL) { - if (atomic_read(&rt->dst.__refcnt) > 1 || - (now - rt->dst.lastuse) < expire) { - rtp = &rt->dn_next; - continue; - } - *rtp = rt->dn_next; - rt->dn_next = NULL; - dst_dev_put(&rt->dst); - dst_release(&rt->dst); - } - spin_unlock(&dn_rt_hash_table[i].lock); - - if (jiffies != now) - break; - } - - mod_timer(&dn_route_timer, now + decnet_dst_gc_interval * HZ); -} - -static int dn_dst_gc(struct dst_ops *ops) -{ - struct dn_route *rt; - struct dn_route __rcu **rtp; - int i; - unsigned long now = jiffies; - unsigned long expire = 10 * HZ; - - for (i = 0; i <= dn_rt_hash_mask; i++) { - - spin_lock_bh(&dn_rt_hash_table[i].lock); - rtp = &dn_rt_hash_table[i].chain; - - while ((rt = rcu_dereference_protected(*rtp, - lockdep_is_held(&dn_rt_hash_table[i].lock))) != NULL) { - if (atomic_read(&rt->dst.__refcnt) > 1 || - (now - rt->dst.lastuse) < expire) { - rtp = &rt->dn_next; - continue; - } - *rtp = rt->dn_next; - rt->dn_next = NULL; - dst_dev_put(&rt->dst); - dst_release(&rt->dst); - break; - } - spin_unlock_bh(&dn_rt_hash_table[i].lock); - } - - return 0; -} - -/* - * The decnet standards don't impose a particular minimum mtu, what they - * do insist on is that the routing layer accepts a datagram of at least - * 230 bytes long. Here we have to subtract the routing header length from - * 230 to get the minimum acceptable mtu. If there is no neighbour, then we - * assume the worst and use a long header size. - * - * We update both the mtu and the advertised mss (i.e. the segment size we - * advertise to the other end). - */ -static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu, - bool confirm_neigh) -{ - struct dn_route *rt = (struct dn_route *) dst; - struct neighbour *n = rt->n; - u32 min_mtu = 230; - struct dn_dev *dn; - - dn = n ? rcu_dereference_raw(n->dev->dn_ptr) : NULL; - - if (dn && dn->use_long == 0) - min_mtu -= 6; - else - min_mtu -= 21; - - if (dst_metric(dst, RTAX_MTU) > mtu && mtu >= min_mtu) { - if (!(dst_metric_locked(dst, RTAX_MTU))) { - dst_metric_set(dst, RTAX_MTU, mtu); - dst_set_expires(dst, dn_rt_mtu_expires); - } - if (!(dst_metric_locked(dst, RTAX_ADVMSS))) { - u32 mss = mtu - DN_MAX_NSP_DATA_HEADER; - u32 existing_mss = dst_metric_raw(dst, RTAX_ADVMSS); - if (!existing_mss || existing_mss > mss) - dst_metric_set(dst, RTAX_ADVMSS, mss); - } - } -} - -static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb) -{ -} - -/* - * When a route has been marked obsolete. (e.g. routing cache flush) - */ -static struct dst_entry *dn_dst_check(struct dst_entry *dst, __u32 cookie) -{ - return NULL; -} - -static struct dst_entry *dn_dst_negative_advice(struct dst_entry *dst) -{ - dst_release(dst); - return NULL; -} - -static void dn_dst_link_failure(struct sk_buff *skb) -{ -} - -static inline int compare_keys(struct flowidn *fl1, struct flowidn *fl2) -{ - return ((fl1->daddr ^ fl2->daddr) | - (fl1->saddr ^ fl2->saddr) | - (fl1->flowidn_mark ^ fl2->flowidn_mark) | - (fl1->flowidn_scope ^ fl2->flowidn_scope) | - (fl1->flowidn_oif ^ fl2->flowidn_oif) | - (fl1->flowidn_iif ^ fl2->flowidn_iif)) == 0; -} - -static int dn_insert_route(struct dn_route *rt, unsigned int hash, struct dn_route **rp) -{ - struct dn_route *rth; - struct dn_route __rcu **rthp; - unsigned long now = jiffies; - - rthp = &dn_rt_hash_table[hash].chain; - - spin_lock_bh(&dn_rt_hash_table[hash].lock); - while ((rth = rcu_dereference_protected(*rthp, - lockdep_is_held(&dn_rt_hash_table[hash].lock))) != NULL) { - if (compare_keys(&rth->fld, &rt->fld)) { - /* Put it first */ - *rthp = rth->dn_next; - rcu_assign_pointer(rth->dn_next, - dn_rt_hash_table[hash].chain); - rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth); - - dst_hold_and_use(&rth->dst, now); - spin_unlock_bh(&dn_rt_hash_table[hash].lock); - - dst_release_immediate(&rt->dst); - *rp = rth; - return 0; - } - rthp = &rth->dn_next; - } - - rcu_assign_pointer(rt->dn_next, dn_rt_hash_table[hash].chain); - rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt); - - dst_hold_and_use(&rt->dst, now); - spin_unlock_bh(&dn_rt_hash_table[hash].lock); - *rp = rt; - return 0; -} - -static void dn_run_flush(struct timer_list *unused) -{ - int i; - struct dn_route *rt, *next; - - for (i = 0; i < dn_rt_hash_mask; i++) { - spin_lock_bh(&dn_rt_hash_table[i].lock); - - rt = xchg((struct dn_route **)&dn_rt_hash_table[i].chain, NULL); - if (!rt) - goto nothing_to_declare; - - for (; rt; rt = next) { - next = rcu_dereference_raw(rt->dn_next); - RCU_INIT_POINTER(rt->dn_next, NULL); - dst_dev_put(&rt->dst); - dst_release(&rt->dst); - } - -nothing_to_declare: - spin_unlock_bh(&dn_rt_hash_table[i].lock); - } -} - -static DEFINE_SPINLOCK(dn_rt_flush_lock); - -void dn_rt_cache_flush(int delay) -{ - unsigned long now = jiffies; - int user_mode = !in_interrupt(); - - if (delay < 0) - delay = dn_rt_min_delay; - - spin_lock_bh(&dn_rt_flush_lock); - - if (del_timer(&dn_rt_flush_timer) && delay > 0 && dn_rt_deadline) { - long tmo = (long)(dn_rt_deadline - now); - - if (user_mode && tmo < dn_rt_max_delay - dn_rt_min_delay) - tmo = 0; - - if (delay > tmo) - delay = tmo; - } - - if (delay <= 0) { - spin_unlock_bh(&dn_rt_flush_lock); - dn_run_flush(NULL); - return; - } - - if (dn_rt_deadline == 0) - dn_rt_deadline = now + dn_rt_max_delay; - - dn_rt_flush_timer.expires = now + delay; - add_timer(&dn_rt_flush_timer); - spin_unlock_bh(&dn_rt_flush_lock); -} - -/** - * dn_return_short - Return a short packet to its sender - * @skb: The packet to return - * - */ -static int dn_return_short(struct sk_buff *skb) -{ - struct dn_skb_cb *cb; - unsigned char *ptr; - __le16 *src; - __le16 *dst; - - /* Add back headers */ - skb_push(skb, skb->data - skb_network_header(skb)); - - skb = skb_unshare(skb, GFP_ATOMIC); - if (!skb) - return NET_RX_DROP; - - cb = DN_SKB_CB(skb); - /* Skip packet length and point to flags */ - ptr = skb->data + 2; - *ptr++ = (cb->rt_flags & ~DN_RT_F_RQR) | DN_RT_F_RTS; - - dst = (__le16 *)ptr; - ptr += 2; - src = (__le16 *)ptr; - ptr += 2; - *ptr = 0; /* Zero hop count */ - - swap(*src, *dst); - - skb->pkt_type = PACKET_OUTGOING; - dn_rt_finish_output(skb, NULL, NULL); - return NET_RX_SUCCESS; -} - -/** - * dn_return_long - Return a long packet to its sender - * @skb: The long format packet to return - * - */ -static int dn_return_long(struct sk_buff *skb) -{ - struct dn_skb_cb *cb; - unsigned char *ptr; - unsigned char *src_addr, *dst_addr; - unsigned char tmp[ETH_ALEN]; - - /* Add back all headers */ - skb_push(skb, skb->data - skb_network_header(skb)); - - skb = skb_unshare(skb, GFP_ATOMIC); - if (!skb) - return NET_RX_DROP; - - cb = DN_SKB_CB(skb); - /* Ignore packet length and point to flags */ - ptr = skb->data + 2; - - /* Skip padding */ - if (*ptr & DN_RT_F_PF) { - char padlen = (*ptr & ~DN_RT_F_PF); - ptr += padlen; - } - - *ptr++ = (cb->rt_flags & ~DN_RT_F_RQR) | DN_RT_F_RTS; - ptr += 2; - dst_addr = ptr; - ptr += 8; - src_addr = ptr; - ptr += 6; - *ptr = 0; /* Zero hop count */ - - /* Swap source and destination */ - memcpy(tmp, src_addr, ETH_ALEN); - memcpy(src_addr, dst_addr, ETH_ALEN); - memcpy(dst_addr, tmp, ETH_ALEN); - - skb->pkt_type = PACKET_OUTGOING; - dn_rt_finish_output(skb, dst_addr, src_addr); - return NET_RX_SUCCESS; -} - -/** - * dn_route_rx_packet - Try and find a route for an incoming packet - * @net: The applicable net namespace - * @sk: Socket packet transmitted on - * @skb: The packet to find a route for - * - * Returns: result of input function if route is found, error code otherwise - */ -static int dn_route_rx_packet(struct net *net, struct sock *sk, struct sk_buff *skb) -{ - struct dn_skb_cb *cb; - int err; - - err = dn_route_input(skb); - if (err == 0) - return dst_input(skb); - - cb = DN_SKB_CB(skb); - if (decnet_debug_level & 4) { - char *devname = skb->dev ? skb->dev->name : "???"; - - printk(KERN_DEBUG - "DECnet: dn_route_rx_packet: rt_flags=0x%02x dev=%s len=%d src=0x%04hx dst=0x%04hx err=%d type=%d\n", - (int)cb->rt_flags, devname, skb->len, - le16_to_cpu(cb->src), le16_to_cpu(cb->dst), - err, skb->pkt_type); - } - - if ((skb->pkt_type == PACKET_HOST) && (cb->rt_flags & DN_RT_F_RQR)) { - switch (cb->rt_flags & DN_RT_PKT_MSK) { - case DN_RT_PKT_SHORT: - return dn_return_short(skb); - case DN_RT_PKT_LONG: - return dn_return_long(skb); - } - } - - kfree_skb(skb); - return NET_RX_DROP; -} - -static int dn_route_rx_long(struct sk_buff *skb) -{ - struct dn_skb_cb *cb = DN_SKB_CB(skb); - unsigned char *ptr = skb->data; - - if (!pskb_may_pull(skb, 21)) /* 20 for long header, 1 for shortest nsp */ - goto drop_it; - - skb_pull(skb, 20); - skb_reset_transport_header(skb); - - /* Destination info */ - ptr += 2; - cb->dst = dn_eth2dn(ptr); - if (memcmp(ptr, dn_hiord_addr, 4) != 0) - goto drop_it; - ptr += 6; - - - /* Source info */ - ptr += 2; - cb->src = dn_eth2dn(ptr); - if (memcmp(ptr, dn_hiord_addr, 4) != 0) - goto drop_it; - ptr += 6; - /* Other junk */ - ptr++; - cb->hops = *ptr++; /* Visit Count */ - - return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING, - &init_net, NULL, skb, skb->dev, NULL, - dn_route_rx_packet); - -drop_it: - kfree_skb(skb); - return NET_RX_DROP; -} - - - -static int dn_route_rx_short(struct sk_buff *skb) -{ - struct dn_skb_cb *cb = DN_SKB_CB(skb); - unsigned char *ptr = skb->data; - - if (!pskb_may_pull(skb, 6)) /* 5 for short header + 1 for shortest nsp */ - goto drop_it; - - skb_pull(skb, 5); - skb_reset_transport_header(skb); - - cb->dst = *(__le16 *)ptr; - ptr += 2; - cb->src = *(__le16 *)ptr; - ptr += 2; - cb->hops = *ptr & 0x3f; - - return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING, - &init_net, NULL, skb, skb->dev, NULL, - dn_route_rx_packet); - -drop_it: - kfree_skb(skb); - return NET_RX_DROP; -} - -static int dn_route_discard(struct net *net, struct sock *sk, struct sk_buff *skb) -{ - /* - * I know we drop the packet here, but that's considered success in - * this case - */ - kfree_skb(skb); - return NET_RX_SUCCESS; -} - -static int dn_route_ptp_hello(struct net *net, struct sock *sk, struct sk_buff *skb) -{ - dn_dev_hello(skb); - dn_neigh_pointopoint_hello(skb); - return NET_RX_SUCCESS; -} - -int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) -{ - struct dn_skb_cb *cb; - unsigned char flags = 0; - __u16 len = le16_to_cpu(*(__le16 *)skb->data); - struct dn_dev *dn = rcu_dereference(dev->dn_ptr); - unsigned char padlen = 0; - - if (!net_eq(dev_net(dev), &init_net)) - goto dump_it; - - if (dn == NULL) - goto dump_it; - - skb = skb_share_check(skb, GFP_ATOMIC); - if (!skb) - goto out; - - if (!pskb_may_pull(skb, 3)) - goto dump_it; - - skb_pull(skb, 2); - - if (len > skb->len) - goto dump_it; - - skb_trim(skb, len); - - flags = *skb->data; - - cb = DN_SKB_CB(skb); - cb->stamp = jiffies; - cb->iif = dev->ifindex; - - /* - * If we have padding, remove it. - */ - if (flags & DN_RT_F_PF) { - padlen = flags & ~DN_RT_F_PF; - if (!pskb_may_pull(skb, padlen + 1)) - goto dump_it; - skb_pull(skb, padlen); - flags = *skb->data; - } - - skb_reset_network_header(skb); - - /* - * Weed out future version DECnet - */ - if (flags & DN_RT_F_VER) - goto dump_it; - - cb->rt_flags = flags; - - if (decnet_debug_level & 1) - printk(KERN_DEBUG - "dn_route_rcv: got 0x%02x from %s [%d %d %d]\n", - (int)flags, dev->name, len, skb->len, - padlen); - - if (flags & DN_RT_PKT_CNTL) { - if (unlikely(skb_linearize(skb))) - goto dump_it; - - switch (flags & DN_RT_CNTL_MSK) { - case DN_RT_PKT_INIT: - dn_dev_init_pkt(skb); - break; - case DN_RT_PKT_VERI: - dn_dev_veri_pkt(skb); - break; - } - - if (dn->parms.state != DN_DEV_S_RU) - goto dump_it; - - switch (flags & DN_RT_CNTL_MSK) { - case DN_RT_PKT_HELO: - return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO, - &init_net, NULL, skb, skb->dev, NULL, - dn_route_ptp_hello); - - case DN_RT_PKT_L1RT: - case DN_RT_PKT_L2RT: - return NF_HOOK(NFPROTO_DECNET, NF_DN_ROUTE, - &init_net, NULL, skb, skb->dev, NULL, - dn_route_discard); - case DN_RT_PKT_ERTH: - return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO, - &init_net, NULL, skb, skb->dev, NULL, - dn_neigh_router_hello); - - case DN_RT_PKT_EEDH: - return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO, - &init_net, NULL, skb, skb->dev, NULL, - dn_neigh_endnode_hello); - } - } else { - if (dn->parms.state != DN_DEV_S_RU) - goto dump_it; - - skb_pull(skb, 1); /* Pull flags */ - - switch (flags & DN_RT_PKT_MSK) { - case DN_RT_PKT_LONG: - return dn_route_rx_long(skb); - case DN_RT_PKT_SHORT: - return dn_route_rx_short(skb); - } - } - -dump_it: - kfree_skb(skb); -out: - return NET_RX_DROP; -} - -static int dn_output(struct net *net, struct sock *sk, struct sk_buff *skb) -{ - struct dst_entry *dst = skb_dst(skb); - struct dn_route *rt = (struct dn_route *)dst; - struct net_device *dev = dst->dev; - struct dn_skb_cb *cb = DN_SKB_CB(skb); - - int err = -EINVAL; - - if (rt->n == NULL) - goto error; - - skb->dev = dev; - - cb->src = rt->rt_saddr; - cb->dst = rt->rt_daddr; - - /* - * Always set the Intra-Ethernet bit on all outgoing packets - * originated on this node. Only valid flag from upper layers - * is return-to-sender-requested. Set hop count to 0 too. - */ - cb->rt_flags &= ~DN_RT_F_RQR; - cb->rt_flags |= DN_RT_F_IE; - cb->hops = 0; - - return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_OUT, - &init_net, sk, skb, NULL, dev, - dn_to_neigh_output); - -error: - net_dbg_ratelimited("dn_output: This should not happen\n"); - - kfree_skb(skb); - - return err; -} - -static int dn_forward(struct sk_buff *skb) -{ - struct dn_skb_cb *cb = DN_SKB_CB(skb); - struct dst_entry *dst = skb_dst(skb); - struct dn_dev *dn_db = rcu_dereference(dst->dev->dn_ptr); - struct dn_route *rt; - int header_len; - struct net_device *dev = skb->dev; - - if (skb->pkt_type != PACKET_HOST) - goto drop; - - /* Ensure that we have enough space for headers */ - rt = (struct dn_route *)skb_dst(skb); - header_len = dn_db->use_long ? 21 : 6; - if (skb_cow(skb, LL_RESERVED_SPACE(rt->dst.dev)+header_len)) - goto drop; - - /* - * Hop count exceeded. - */ - if (++cb->hops > 30) - goto drop; - - skb->dev = rt->dst.dev; - - /* - * If packet goes out same interface it came in on, then set - * the Intra-Ethernet bit. This has no effect for short - * packets, so we don't need to test for them here. - */ - cb->rt_flags &= ~DN_RT_F_IE; - if (rt->rt_flags & RTCF_DOREDIRECT) - cb->rt_flags |= DN_RT_F_IE; - - return NF_HOOK(NFPROTO_DECNET, NF_DN_FORWARD, - &init_net, NULL, skb, dev, skb->dev, - dn_to_neigh_output); - -drop: - kfree_skb(skb); - return NET_RX_DROP; -} - -/* - * Used to catch bugs. This should never normally get - * called. - */ -static int dn_rt_bug_out(struct net *net, struct sock *sk, struct sk_buff *skb) -{ - struct dn_skb_cb *cb = DN_SKB_CB(skb); - - net_dbg_ratelimited("dn_rt_bug: skb from:%04x to:%04x\n", - le16_to_cpu(cb->src), le16_to_cpu(cb->dst)); - - kfree_skb(skb); - - return NET_RX_DROP; -} - -static int dn_rt_bug(struct sk_buff *skb) -{ - struct dn_skb_cb *cb = DN_SKB_CB(skb); - - net_dbg_ratelimited("dn_rt_bug: skb from:%04x to:%04x\n", - le16_to_cpu(cb->src), le16_to_cpu(cb->dst)); - - kfree_skb(skb); - - return NET_RX_DROP; -} - -static unsigned int dn_dst_default_advmss(const struct dst_entry *dst) -{ - return dn_mss_from_pmtu(dst->dev, dst_mtu(dst)); -} - -static unsigned int dn_dst_mtu(const struct dst_entry *dst) -{ - unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); - - return mtu ? : dst->dev->mtu; -} - -static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, - struct sk_buff *skb, - const void *daddr) -{ - return __neigh_lookup_errno(&dn_neigh_table, daddr, dst->dev); -} - -static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) -{ - struct dn_fib_info *fi = res->fi; - struct net_device *dev = rt->dst.dev; - unsigned int mss_metric; - struct neighbour *n; - - if (fi) { - if (DN_FIB_RES_GW(*res) && - DN_FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) - rt->rt_gateway = DN_FIB_RES_GW(*res); - dst_init_metrics(&rt->dst, fi->fib_metrics, true); - } - rt->rt_type = res->type; - - if (dev != NULL && rt->n == NULL) { - n = __neigh_lookup_errno(&dn_neigh_table, &rt->rt_gateway, dev); - if (IS_ERR(n)) - return PTR_ERR(n); - rt->n = n; - } - - if (dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu) - dst_metric_set(&rt->dst, RTAX_MTU, rt->dst.dev->mtu); - mss_metric = dst_metric_raw(&rt->dst, RTAX_ADVMSS); - if (mss_metric) { - unsigned int mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst)); - if (mss_metric > mss) - dst_metric_set(&rt->dst, RTAX_ADVMSS, mss); - } - return 0; -} - -static inline int dn_match_addr(__le16 addr1, __le16 addr2) -{ - __u16 tmp = le16_to_cpu(addr1) ^ le16_to_cpu(addr2); - int match = 16; - while (tmp) { - tmp >>= 1; - match--; - } - return match; -} - -static __le16 dnet_select_source(const struct net_device *dev, __le16 daddr, int scope) -{ - __le16 saddr = 0; - struct dn_dev *dn_db; - struct dn_ifaddr *ifa; - int best_match = 0; - int ret; - - rcu_read_lock(); - dn_db = rcu_dereference(dev->dn_ptr); - for (ifa = rcu_dereference(dn_db->ifa_list); - ifa != NULL; - ifa = rcu_dereference(ifa->ifa_next)) { - if (ifa->ifa_scope > scope) - continue; - if (!daddr) { - saddr = ifa->ifa_local; - break; - } - ret = dn_match_addr(daddr, ifa->ifa_local); - if (ret > best_match) - saddr = ifa->ifa_local; - if (best_match == 0) - saddr = ifa->ifa_local; - } - rcu_read_unlock(); - - return saddr; -} - -static inline __le16 __dn_fib_res_prefsrc(struct dn_fib_res *res) -{ - return dnet_select_source(DN_FIB_RES_DEV(*res), DN_FIB_RES_GW(*res), res->scope); -} - -static inline __le16 dn_fib_rules_map_destination(__le16 daddr, struct dn_fib_res *res) -{ - __le16 mask = dnet_make_mask(res->prefixlen); - return (daddr&~mask)|res->fi->fib_nh->nh_gw; -} - -static int dn_route_output_slow(struct dst_entry **pprt, const struct flowidn *oldflp, int try_hard) -{ - struct flowidn fld = { - .daddr = oldflp->daddr, - .saddr = oldflp->saddr, - .flowidn_scope = RT_SCOPE_UNIVERSE, - .flowidn_mark = oldflp->flowidn_mark, - .flowidn_iif = LOOPBACK_IFINDEX, - .flowidn_oif = oldflp->flowidn_oif, - }; - struct dn_route *rt = NULL; - struct net_device *dev_out = NULL, *dev; - struct neighbour *neigh = NULL; - unsigned int hash; - unsigned int flags = 0; - struct dn_fib_res res = { .fi = NULL, .type = RTN_UNICAST }; - int err; - int free_res = 0; - __le16 gateway = 0; - - if (decnet_debug_level & 16) - printk(KERN_DEBUG - "dn_route_output_slow: dst=%04x src=%04x mark=%d" - " iif=%d oif=%d\n", le16_to_cpu(oldflp->daddr), - le16_to_cpu(oldflp->saddr), - oldflp->flowidn_mark, LOOPBACK_IFINDEX, - oldflp->flowidn_oif); - - /* If we have an output interface, verify its a DECnet device */ - if (oldflp->flowidn_oif) { - dev_out = dev_get_by_index(&init_net, oldflp->flowidn_oif); - err = -ENODEV; - if (dev_out && dev_out->dn_ptr == NULL) { - dev_put(dev_out); - dev_out = NULL; - } - if (dev_out == NULL) - goto out; - } - - /* If we have a source address, verify that its a local address */ - if (oldflp->saddr) { - err = -EADDRNOTAVAIL; - - if (dev_out) { - if (dn_dev_islocal(dev_out, oldflp->saddr)) - goto source_ok; - dev_put(dev_out); - goto out; - } - rcu_read_lock(); - for_each_netdev_rcu(&init_net, dev) { - if (!dev->dn_ptr) - continue; - if (!dn_dev_islocal(dev, oldflp->saddr)) - continue; - if ((dev->flags & IFF_LOOPBACK) && - oldflp->daddr && - !dn_dev_islocal(dev, oldflp->daddr)) - continue; - - dev_out = dev; - break; - } - rcu_read_unlock(); - if (dev_out == NULL) - goto out; - dev_hold(dev_out); -source_ok: - ; - } - - /* No destination? Assume its local */ - if (!fld.daddr) { - fld.daddr = fld.saddr; - - dev_put(dev_out); - err = -EINVAL; - dev_out = init_net.loopback_dev; - if (!dev_out->dn_ptr) - goto out; - err = -EADDRNOTAVAIL; - dev_hold(dev_out); - if (!fld.daddr) { - fld.daddr = - fld.saddr = dnet_select_source(dev_out, 0, - RT_SCOPE_HOST); - if (!fld.daddr) - goto done; - } - fld.flowidn_oif = LOOPBACK_IFINDEX; - res.type = RTN_LOCAL; - goto make_route; - } - - if (decnet_debug_level & 16) - printk(KERN_DEBUG - "dn_route_output_slow: initial checks complete." - " dst=%04x src=%04x oif=%d try_hard=%d\n", - le16_to_cpu(fld.daddr), le16_to_cpu(fld.saddr), - fld.flowidn_oif, try_hard); - - /* - * N.B. If the kernel is compiled without router support then - * dn_fib_lookup() will evaluate to non-zero so this if () block - * will always be executed. - */ - err = -ESRCH; - if (try_hard || (err = dn_fib_lookup(&fld, &res)) != 0) { - struct dn_dev *dn_db; - if (err != -ESRCH) - goto out; - /* - * Here the fallback is basically the standard algorithm for - * routing in endnodes which is described in the DECnet routing - * docs - * - * If we are not trying hard, look in neighbour cache. - * The result is tested to ensure that if a specific output - * device/source address was requested, then we honour that - * here - */ - if (!try_hard) { - neigh = neigh_lookup_nodev(&dn_neigh_table, &init_net, &fld.daddr); - if (neigh) { - if ((oldflp->flowidn_oif && - (neigh->dev->ifindex != oldflp->flowidn_oif)) || - (oldflp->saddr && - (!dn_dev_islocal(neigh->dev, - oldflp->saddr)))) { - neigh_release(neigh); - neigh = NULL; - } else { - dev_put(dev_out); - if (dn_dev_islocal(neigh->dev, fld.daddr)) { - dev_out = init_net.loopback_dev; - res.type = RTN_LOCAL; - } else { - dev_out = neigh->dev; - } - dev_hold(dev_out); - goto select_source; - } - } - } - - /* Not there? Perhaps its a local address */ - if (dev_out == NULL) - dev_out = dn_dev_get_default(); - err = -ENODEV; - if (dev_out == NULL) - goto out; - dn_db = rcu_dereference_raw(dev_out->dn_ptr); - if (!dn_db) - goto e_inval; - /* Possible improvement - check all devices for local addr */ - if (dn_dev_islocal(dev_out, fld.daddr)) { - dev_put(dev_out); - dev_out = init_net.loopback_dev; - dev_hold(dev_out); - res.type = RTN_LOCAL; - goto select_source; - } - /* Not local either.... try sending it to the default router */ - neigh = neigh_clone(dn_db->router); - BUG_ON(neigh && neigh->dev != dev_out); - - /* Ok then, we assume its directly connected and move on */ -select_source: - if (neigh) - gateway = container_of(neigh, struct dn_neigh, n)->addr; - if (gateway == 0) - gateway = fld.daddr; - if (fld.saddr == 0) { - fld.saddr = dnet_select_source(dev_out, gateway, - res.type == RTN_LOCAL ? - RT_SCOPE_HOST : - RT_SCOPE_LINK); - if (fld.saddr == 0 && res.type != RTN_LOCAL) - goto e_addr; - } - fld.flowidn_oif = dev_out->ifindex; - goto make_route; - } - free_res = 1; - - if (res.type == RTN_NAT) - goto e_inval; - - if (res.type == RTN_LOCAL) { - if (!fld.saddr) - fld.saddr = fld.daddr; - dev_put(dev_out); - dev_out = init_net.loopback_dev; - dev_hold(dev_out); - if (!dev_out->dn_ptr) - goto e_inval; - fld.flowidn_oif = dev_out->ifindex; - if (res.fi) - dn_fib_info_put(res.fi); - res.fi = NULL; - goto make_route; - } - - if (res.fi->fib_nhs > 1 && fld.flowidn_oif == 0) - dn_fib_select_multipath(&fld, &res); - - /* - * We could add some logic to deal with default routes here and - * get rid of some of the special casing above. - */ - - if (!fld.saddr) - fld.saddr = DN_FIB_RES_PREFSRC(res); - - dev_put(dev_out); - dev_out = DN_FIB_RES_DEV(res); - dev_hold(dev_out); - fld.flowidn_oif = dev_out->ifindex; - gateway = DN_FIB_RES_GW(res); - -make_route: - if (dev_out->flags & IFF_LOOPBACK) - flags |= RTCF_LOCAL; - - rt = dst_alloc(&dn_dst_ops, dev_out, 0, DST_OBSOLETE_NONE, 0); - if (rt == NULL) - goto e_nobufs; - - rt->dn_next = NULL; - memset(&rt->fld, 0, sizeof(rt->fld)); - rt->fld.saddr = oldflp->saddr; - rt->fld.daddr = oldflp->daddr; - rt->fld.flowidn_oif = oldflp->flowidn_oif; - rt->fld.flowidn_iif = 0; - rt->fld.flowidn_mark = oldflp->flowidn_mark; - - rt->rt_saddr = fld.saddr; - rt->rt_daddr = fld.daddr; - rt->rt_gateway = gateway ? gateway : fld.daddr; - rt->rt_local_src = fld.saddr; - - rt->rt_dst_map = fld.daddr; - rt->rt_src_map = fld.saddr; - - rt->n = neigh; - neigh = NULL; - - rt->dst.lastuse = jiffies; - rt->dst.output = dn_output; - rt->dst.input = dn_rt_bug; - rt->rt_flags = flags; - if (flags & RTCF_LOCAL) - rt->dst.input = dn_nsp_rx; - - err = dn_rt_set_next_hop(rt, &res); - if (err) - goto e_neighbour; - - hash = dn_hash(rt->fld.saddr, rt->fld.daddr); - /* dn_insert_route() increments dst->__refcnt */ - dn_insert_route(rt, hash, (struct dn_route **)pprt); - -done: - if (neigh) - neigh_release(neigh); - if (free_res) - dn_fib_res_put(&res); - dev_put(dev_out); -out: - return err; - -e_addr: - err = -EADDRNOTAVAIL; - goto done; -e_inval: - err = -EINVAL; - goto done; -e_nobufs: - err = -ENOBUFS; - goto done; -e_neighbour: - dst_release_immediate(&rt->dst); - goto e_nobufs; -} - - -/* - * N.B. The flags may be moved into the flowi at some future stage. - */ -static int __dn_route_output_key(struct dst_entry **pprt, const struct flowidn *flp, int flags) -{ - unsigned int hash = dn_hash(flp->saddr, flp->daddr); - struct dn_route *rt = NULL; - - if (!(flags & MSG_TRYHARD)) { - rcu_read_lock_bh(); - for (rt = rcu_dereference_bh(dn_rt_hash_table[hash].chain); rt; - rt = rcu_dereference_bh(rt->dn_next)) { - if ((flp->daddr == rt->fld.daddr) && - (flp->saddr == rt->fld.saddr) && - (flp->flowidn_mark == rt->fld.flowidn_mark) && - dn_is_output_route(rt) && - (rt->fld.flowidn_oif == flp->flowidn_oif)) { - dst_hold_and_use(&rt->dst, jiffies); - rcu_read_unlock_bh(); - *pprt = &rt->dst; - return 0; - } - } - rcu_read_unlock_bh(); - } - - return dn_route_output_slow(pprt, flp, flags); -} - -static int dn_route_output_key(struct dst_entry **pprt, struct flowidn *flp, int flags) -{ - int err; - - err = __dn_route_output_key(pprt, flp, flags); - if (err == 0 && flp->flowidn_proto) { - *pprt = xfrm_lookup(&init_net, *pprt, - flowidn_to_flowi(flp), NULL, 0); - if (IS_ERR(*pprt)) { - err = PTR_ERR(*pprt); - *pprt = NULL; - } - } - return err; -} - -int dn_route_output_sock(struct dst_entry __rcu **pprt, struct flowidn *fl, struct sock *sk, int flags) -{ - int err; - - err = __dn_route_output_key(pprt, fl, flags & MSG_TRYHARD); - if (err == 0 && fl->flowidn_proto) { - *pprt = xfrm_lookup(&init_net, *pprt, - flowidn_to_flowi(fl), sk, 0); - if (IS_ERR(*pprt)) { - err = PTR_ERR(*pprt); - *pprt = NULL; - } - } - return err; -} - -static int dn_route_input_slow(struct sk_buff *skb) -{ - struct dn_route *rt = NULL; - struct dn_skb_cb *cb = DN_SKB_CB(skb); - struct net_device *in_dev = skb->dev; - struct net_device *out_dev = NULL; - struct dn_dev *dn_db; - struct neighbour *neigh = NULL; - unsigned int hash; - int flags = 0; - __le16 gateway = 0; - __le16 local_src = 0; - struct flowidn fld = { - .daddr = cb->dst, - .saddr = cb->src, - .flowidn_scope = RT_SCOPE_UNIVERSE, - .flowidn_mark = skb->mark, - .flowidn_iif = skb->dev->ifindex, - }; - struct dn_fib_res res = { .fi = NULL, .type = RTN_UNREACHABLE }; - int err = -EINVAL; - int free_res = 0; - - dev_hold(in_dev); - - dn_db = rcu_dereference(in_dev->dn_ptr); - if (!dn_db) - goto out; - - /* Zero source addresses are not allowed */ - if (fld.saddr == 0) - goto out; - - /* - * In this case we've just received a packet from a source - * outside ourselves pretending to come from us. We don't - * allow it any further to prevent routing loops, spoofing and - * other nasties. Loopback packets already have the dst attached - * so this only affects packets which have originated elsewhere. - */ - err = -ENOTUNIQ; - if (dn_dev_islocal(in_dev, cb->src)) - goto out; - - err = dn_fib_lookup(&fld, &res); - if (err) { - if (err != -ESRCH) - goto out; - /* - * Is the destination us ? - */ - if (!dn_dev_islocal(in_dev, cb->dst)) - goto e_inval; - - res.type = RTN_LOCAL; - } else { - __le16 src_map = fld.saddr; - free_res = 1; - - out_dev = DN_FIB_RES_DEV(res); - if (out_dev == NULL) { - net_crit_ratelimited("Bug in dn_route_input_slow() No output device\n"); - goto e_inval; - } - dev_hold(out_dev); - - if (res.r) - src_map = fld.saddr; /* no NAT support for now */ - - gateway = DN_FIB_RES_GW(res); - if (res.type == RTN_NAT) { - fld.daddr = dn_fib_rules_map_destination(fld.daddr, &res); - dn_fib_res_put(&res); - free_res = 0; - if (dn_fib_lookup(&fld, &res)) - goto e_inval; - free_res = 1; - if (res.type != RTN_UNICAST) - goto e_inval; - flags |= RTCF_DNAT; - gateway = fld.daddr; - } - fld.saddr = src_map; - } - - switch (res.type) { - case RTN_UNICAST: - /* - * Forwarding check here, we only check for forwarding - * being turned off, if you want to only forward intra - * area, its up to you to set the routing tables up - * correctly. - */ - if (dn_db->parms.forwarding == 0) - goto e_inval; - - if (res.fi->fib_nhs > 1 && fld.flowidn_oif == 0) - dn_fib_select_multipath(&fld, &res); - - /* - * Check for out_dev == in_dev. We use the RTCF_DOREDIRECT - * flag as a hint to set the intra-ethernet bit when - * forwarding. If we've got NAT in operation, we don't do - * this optimisation. - */ - if (out_dev == in_dev && !(flags & RTCF_NAT)) - flags |= RTCF_DOREDIRECT; - - local_src = DN_FIB_RES_PREFSRC(res); - break; - case RTN_BLACKHOLE: - case RTN_UNREACHABLE: - break; - case RTN_LOCAL: - flags |= RTCF_LOCAL; - fld.saddr = cb->dst; - fld.daddr = cb->src; - - /* Routing tables gave us a gateway */ - if (gateway) - goto make_route; - - /* Packet was intra-ethernet, so we know its on-link */ - if (cb->rt_flags & DN_RT_F_IE) { - gateway = cb->src; - goto make_route; - } - - /* Use the default router if there is one */ - neigh = neigh_clone(dn_db->router); - if (neigh) { - gateway = container_of(neigh, struct dn_neigh, n)->addr; - goto make_route; - } - - /* Close eyes and pray */ - gateway = cb->src; - goto make_route; - default: - goto e_inval; - } - -make_route: - rt = dst_alloc(&dn_dst_ops, out_dev, 1, DST_OBSOLETE_NONE, 0); - if (rt == NULL) - goto e_nobufs; - - rt->dn_next = NULL; - memset(&rt->fld, 0, sizeof(rt->fld)); - rt->rt_saddr = fld.saddr; - rt->rt_daddr = fld.daddr; - rt->rt_gateway = fld.daddr; - if (gateway) - rt->rt_gateway = gateway; - rt->rt_local_src = local_src ? local_src : rt->rt_saddr; - - rt->rt_dst_map = fld.daddr; - rt->rt_src_map = fld.saddr; - - rt->fld.saddr = cb->src; - rt->fld.daddr = cb->dst; - rt->fld.flowidn_oif = 0; - rt->fld.flowidn_iif = in_dev->ifindex; - rt->fld.flowidn_mark = fld.flowidn_mark; - - rt->n = neigh; - rt->dst.lastuse = jiffies; - rt->dst.output = dn_rt_bug_out; - switch (res.type) { - case RTN_UNICAST: - rt->dst.input = dn_forward; - break; - case RTN_LOCAL: - rt->dst.output = dn_output; - rt->dst.input = dn_nsp_rx; - rt->dst.dev = in_dev; - flags |= RTCF_LOCAL; - break; - default: - case RTN_UNREACHABLE: - case RTN_BLACKHOLE: - rt->dst.input = dst_discard; - } - rt->rt_flags = flags; - - err = dn_rt_set_next_hop(rt, &res); - if (err) - goto e_neighbour; - - hash = dn_hash(rt->fld.saddr, rt->fld.daddr); - /* dn_insert_route() increments dst->__refcnt */ - dn_insert_route(rt, hash, &rt); - skb_dst_set(skb, &rt->dst); - -done: - if (neigh) - neigh_release(neigh); - if (free_res) - dn_fib_res_put(&res); - dev_put(in_dev); - dev_put(out_dev); -out: - return err; - -e_inval: - err = -EINVAL; - goto done; - -e_nobufs: - err = -ENOBUFS; - goto done; - -e_neighbour: - dst_release_immediate(&rt->dst); - goto done; -} - -static int dn_route_input(struct sk_buff *skb) -{ - struct dn_route *rt; - struct dn_skb_cb *cb = DN_SKB_CB(skb); - unsigned int hash = dn_hash(cb->src, cb->dst); - - if (skb_dst(skb)) - return 0; - - rcu_read_lock(); - for (rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt != NULL; - rt = rcu_dereference(rt->dn_next)) { - if ((rt->fld.saddr == cb->src) && - (rt->fld.daddr == cb->dst) && - (rt->fld.flowidn_oif == 0) && - (rt->fld.flowidn_mark == skb->mark) && - (rt->fld.flowidn_iif == cb->iif)) { - dst_hold_and_use(&rt->dst, jiffies); - rcu_read_unlock(); - skb_dst_set(skb, (struct dst_entry *)rt); - return 0; - } - } - rcu_read_unlock(); - - return dn_route_input_slow(skb); -} - -static int dn_rt_fill_info(struct sk_buff *skb, u32 portid, u32 seq, - int event, int nowait, unsigned int flags) -{ - struct dn_route *rt = (struct dn_route *)skb_dst(skb); - struct rtmsg *r; - struct nlmsghdr *nlh; - long expires; - - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), flags); - if (!nlh) - return -EMSGSIZE; - - r = nlmsg_data(nlh); - r->rtm_family = AF_DECnet; - r->rtm_dst_len = 16; - r->rtm_src_len = 0; - r->rtm_tos = 0; - r->rtm_table = RT_TABLE_MAIN; - r->rtm_type = rt->rt_type; - r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED; - r->rtm_scope = RT_SCOPE_UNIVERSE; - r->rtm_protocol = RTPROT_UNSPEC; - - if (rt->rt_flags & RTCF_NOTIFY) - r->rtm_flags |= RTM_F_NOTIFY; - - if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN) < 0 || - nla_put_le16(skb, RTA_DST, rt->rt_daddr) < 0) - goto errout; - - if (rt->fld.saddr) { - r->rtm_src_len = 16; - if (nla_put_le16(skb, RTA_SRC, rt->fld.saddr) < 0) - goto errout; - } - if (rt->dst.dev && - nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex) < 0) - goto errout; - - /* - * Note to self - change this if input routes reverse direction when - * they deal only with inputs and not with replies like they do - * currently. - */ - if (nla_put_le16(skb, RTA_PREFSRC, rt->rt_local_src) < 0) - goto errout; - - if (rt->rt_daddr != rt->rt_gateway && - nla_put_le16(skb, RTA_GATEWAY, rt->rt_gateway) < 0) - goto errout; - - if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) - goto errout; - - expires = rt->dst.expires ? rt->dst.expires - jiffies : 0; - if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, - rt->dst.error) < 0) - goto errout; - - if (dn_is_input_route(rt) && - nla_put_u32(skb, RTA_IIF, rt->fld.flowidn_iif) < 0) - goto errout; - - nlmsg_end(skb, nlh); - return 0; - -errout: - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; -} - -const struct nla_policy rtm_dn_policy[RTA_MAX + 1] = { - [RTA_DST] = { .type = NLA_U16 }, - [RTA_SRC] = { .type = NLA_U16 }, - [RTA_IIF] = { .type = NLA_U32 }, - [RTA_OIF] = { .type = NLA_U32 }, - [RTA_GATEWAY] = { .type = NLA_U16 }, - [RTA_PRIORITY] = { .type = NLA_U32 }, - [RTA_PREFSRC] = { .type = NLA_U16 }, - [RTA_METRICS] = { .type = NLA_NESTED }, - [RTA_MULTIPATH] = { .type = NLA_NESTED }, - [RTA_TABLE] = { .type = NLA_U32 }, - [RTA_MARK] = { .type = NLA_U32 }, -}; - -/* - * This is called by both endnodes and routers now. - */ -static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack) -{ - struct net *net = sock_net(in_skb->sk); - struct rtmsg *rtm = nlmsg_data(nlh); - struct dn_route *rt = NULL; - struct dn_skb_cb *cb; - int err; - struct sk_buff *skb; - struct flowidn fld; - struct nlattr *tb[RTA_MAX+1]; - - if (!net_eq(net, &init_net)) - return -EINVAL; - - err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, - rtm_dn_policy, extack); - if (err < 0) - return err; - - memset(&fld, 0, sizeof(fld)); - fld.flowidn_proto = DNPROTO_NSP; - - skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (skb == NULL) - return -ENOBUFS; - skb_reset_mac_header(skb); - cb = DN_SKB_CB(skb); - - if (tb[RTA_SRC]) - fld.saddr = nla_get_le16(tb[RTA_SRC]); - - if (tb[RTA_DST]) - fld.daddr = nla_get_le16(tb[RTA_DST]); - - if (tb[RTA_IIF]) - fld.flowidn_iif = nla_get_u32(tb[RTA_IIF]); - - if (fld.flowidn_iif) { - struct net_device *dev; - dev = __dev_get_by_index(&init_net, fld.flowidn_iif); - if (!dev || !dev->dn_ptr) { - kfree_skb(skb); - return -ENODEV; - } - skb->protocol = htons(ETH_P_DNA_RT); - skb->dev = dev; - cb->src = fld.saddr; - cb->dst = fld.daddr; - local_bh_disable(); - err = dn_route_input(skb); - local_bh_enable(); - memset(cb, 0, sizeof(struct dn_skb_cb)); - rt = (struct dn_route *)skb_dst(skb); - if (!err && -rt->dst.error) - err = rt->dst.error; - } else { - if (tb[RTA_OIF]) - fld.flowidn_oif = nla_get_u32(tb[RTA_OIF]); - - err = dn_route_output_key((struct dst_entry **)&rt, &fld, 0); - } - - skb->dev = NULL; - if (err) - goto out_free; - skb_dst_set(skb, &rt->dst); - if (rtm->rtm_flags & RTM_F_NOTIFY) - rt->rt_flags |= RTCF_NOTIFY; - - err = dn_rt_fill_info(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0); - if (err < 0) { - err = -EMSGSIZE; - goto out_free; - } - - return rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).portid); - -out_free: - kfree_skb(skb); - return err; -} - -/* - * For routers, this is called from dn_fib_dump, but for endnodes its - * called directly from the rtnetlink dispatch table. - */ -int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb) -{ - struct net *net = sock_net(skb->sk); - struct dn_route *rt; - int h, s_h; - int idx, s_idx; - struct rtmsg *rtm; - - if (!net_eq(net, &init_net)) - return 0; - - if (nlmsg_len(cb->nlh) < sizeof(struct rtmsg)) - return -EINVAL; - - rtm = nlmsg_data(cb->nlh); - if (!(rtm->rtm_flags & RTM_F_CLONED)) - return 0; - - s_h = cb->args[0]; - s_idx = idx = cb->args[1]; - for (h = 0; h <= dn_rt_hash_mask; h++) { - if (h < s_h) - continue; - if (h > s_h) - s_idx = 0; - rcu_read_lock_bh(); - for (rt = rcu_dereference_bh(dn_rt_hash_table[h].chain), idx = 0; - rt; - rt = rcu_dereference_bh(rt->dn_next), idx++) { - if (idx < s_idx) - continue; - skb_dst_set(skb, dst_clone(&rt->dst)); - if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, RTM_NEWROUTE, - 1, NLM_F_MULTI) < 0) { - skb_dst_drop(skb); - rcu_read_unlock_bh(); - goto done; - } - skb_dst_drop(skb); - } - rcu_read_unlock_bh(); - } - -done: - cb->args[0] = h; - cb->args[1] = idx; - return skb->len; -} - -#ifdef CONFIG_PROC_FS -struct dn_rt_cache_iter_state { - int bucket; -}; - -static struct dn_route *dn_rt_cache_get_first(struct seq_file *seq) -{ - struct dn_route *rt = NULL; - struct dn_rt_cache_iter_state *s = seq->private; - - for (s->bucket = dn_rt_hash_mask; s->bucket >= 0; --s->bucket) { - rcu_read_lock_bh(); - rt = rcu_dereference_bh(dn_rt_hash_table[s->bucket].chain); - if (rt) - break; - rcu_read_unlock_bh(); - } - return rt; -} - -static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_route *rt) -{ - struct dn_rt_cache_iter_state *s = seq->private; - - rt = rcu_dereference_bh(rt->dn_next); - while (!rt) { - rcu_read_unlock_bh(); - if (--s->bucket < 0) - break; - rcu_read_lock_bh(); - rt = rcu_dereference_bh(dn_rt_hash_table[s->bucket].chain); - } - return rt; -} - -static void *dn_rt_cache_seq_start(struct seq_file *seq, loff_t *pos) -{ - struct dn_route *rt = dn_rt_cache_get_first(seq); - - if (rt) { - while (*pos && (rt = dn_rt_cache_get_next(seq, rt))) - --*pos; - } - return *pos ? NULL : rt; -} - -static void *dn_rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - struct dn_route *rt = dn_rt_cache_get_next(seq, v); - ++*pos; - return rt; -} - -static void dn_rt_cache_seq_stop(struct seq_file *seq, void *v) -{ - if (v) - rcu_read_unlock_bh(); -} - -static int dn_rt_cache_seq_show(struct seq_file *seq, void *v) -{ - struct dn_route *rt = v; - char buf1[DN_ASCBUF_LEN], buf2[DN_ASCBUF_LEN]; - - seq_printf(seq, "%-8s %-7s %-7s %04d %04d %04d\n", - rt->dst.dev ? rt->dst.dev->name : "*", - dn_addr2asc(le16_to_cpu(rt->rt_daddr), buf1), - dn_addr2asc(le16_to_cpu(rt->rt_saddr), buf2), - atomic_read(&rt->dst.__refcnt), - rt->dst.__use, 0); - return 0; -} - -static const struct seq_operations dn_rt_cache_seq_ops = { - .start = dn_rt_cache_seq_start, - .next = dn_rt_cache_seq_next, - .stop = dn_rt_cache_seq_stop, - .show = dn_rt_cache_seq_show, -}; -#endif /* CONFIG_PROC_FS */ - -void __init dn_route_init(void) -{ - int i, goal, order; - - dn_dst_ops.kmem_cachep = - kmem_cache_create("dn_dst_cache", sizeof(struct dn_route), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); - dst_entries_init(&dn_dst_ops); - timer_setup(&dn_route_timer, dn_dst_check_expire, 0); - dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ; - add_timer(&dn_route_timer); - - goal = totalram_pages() >> (26 - PAGE_SHIFT); - - for (order = 0; (1UL << order) < goal; order++) - /* NOTHING */; - - /* - * Only want 1024 entries max, since the table is very, very unlikely - * to be larger than that. - */ - while (order && ((((1UL << order) * PAGE_SIZE) / - sizeof(struct dn_rt_hash_bucket)) >= 2048)) - order--; - - do { - dn_rt_hash_mask = (1UL << order) * PAGE_SIZE / - sizeof(struct dn_rt_hash_bucket); - while (dn_rt_hash_mask & (dn_rt_hash_mask - 1)) - dn_rt_hash_mask--; - dn_rt_hash_table = (struct dn_rt_hash_bucket *) - __get_free_pages(GFP_ATOMIC, order); - } while (dn_rt_hash_table == NULL && --order > 0); - - if (!dn_rt_hash_table) - panic("Failed to allocate DECnet route cache hash table\n"); - - printk(KERN_INFO - "DECnet: Routing cache hash table of %u buckets, %ldKbytes\n", - dn_rt_hash_mask, - (long)(dn_rt_hash_mask*sizeof(struct dn_rt_hash_bucket))/1024); - - dn_rt_hash_mask--; - for (i = 0; i <= dn_rt_hash_mask; i++) { - spin_lock_init(&dn_rt_hash_table[i].lock); - dn_rt_hash_table[i].chain = NULL; - } - - dn_dst_ops.gc_thresh = (dn_rt_hash_mask + 1); - - proc_create_seq_private("decnet_cache", 0444, init_net.proc_net, - &dn_rt_cache_seq_ops, - sizeof(struct dn_rt_cache_iter_state), NULL); - -#ifdef CONFIG_DECNET_ROUTER - rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETROUTE, - dn_cache_getroute, dn_fib_dump, 0); -#else - rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETROUTE, - dn_cache_getroute, dn_cache_dump, 0); -#endif -} - -void __exit dn_route_cleanup(void) -{ - del_timer(&dn_route_timer); - dn_run_flush(NULL); - - remove_proc_entry("decnet_cache", init_net.proc_net); - dst_entries_destroy(&dn_dst_ops); -} diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c deleted file mode 100644 index ee73057529cf..000000000000 --- a/net/decnet/dn_rules.c +++ /dev/null @@ -1,253 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -/* - * DECnet An implementation of the DECnet protocol suite for the LINUX - * operating system. DECnet is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * DECnet Routing Forwarding Information Base (Rules) - * - * Author: Steve Whitehouse - * Mostly copied from Alexey Kuznetsov's ipv4/fib_rules.c - * - * - * Changes: - * Steve Whitehouse - * Updated for Thomas Graf's generic rules - * - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static struct fib_rules_ops *dn_fib_rules_ops; - -struct dn_fib_rule -{ - struct fib_rule common; - unsigned char dst_len; - unsigned char src_len; - __le16 src; - __le16 srcmask; - __le16 dst; - __le16 dstmask; - __le16 srcmap; - u8 flags; -}; - - -int dn_fib_lookup(struct flowidn *flp, struct dn_fib_res *res) -{ - struct fib_lookup_arg arg = { - .result = res, - }; - int err; - - err = fib_rules_lookup(dn_fib_rules_ops, - flowidn_to_flowi(flp), 0, &arg); - res->r = arg.rule; - - return err; -} - -static int dn_fib_rule_action(struct fib_rule *rule, struct flowi *flp, - int flags, struct fib_lookup_arg *arg) -{ - struct flowidn *fld = &flp->u.dn; - int err = -EAGAIN; - struct dn_fib_table *tbl; - - switch(rule->action) { - case FR_ACT_TO_TBL: - break; - - case FR_ACT_UNREACHABLE: - err = -ENETUNREACH; - goto errout; - - case FR_ACT_PROHIBIT: - err = -EACCES; - goto errout; - - case FR_ACT_BLACKHOLE: - default: - err = -EINVAL; - goto errout; - } - - tbl = dn_fib_get_table(rule->table, 0); - if (tbl == NULL) - goto errout; - - err = tbl->lookup(tbl, fld, (struct dn_fib_res *)arg->result); - if (err > 0) - err = -EAGAIN; -errout: - return err; -} - -static int dn_fib_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) -{ - struct dn_fib_rule *r = (struct dn_fib_rule *)rule; - struct flowidn *fld = &fl->u.dn; - __le16 daddr = fld->daddr; - __le16 saddr = fld->saddr; - - if (((saddr ^ r->src) & r->srcmask) || - ((daddr ^ r->dst) & r->dstmask)) - return 0; - - return 1; -} - -static int dn_fib_rule_configure(struct fib_rule *rule, struct sk_buff *skb, - struct fib_rule_hdr *frh, - struct nlattr **tb, - struct netlink_ext_ack *extack) -{ - int err = -EINVAL; - struct dn_fib_rule *r = (struct dn_fib_rule *)rule; - - if (frh->tos) { - NL_SET_ERR_MSG(extack, "Invalid tos value"); - goto errout; - } - - if (rule->table == RT_TABLE_UNSPEC) { - if (rule->action == FR_ACT_TO_TBL) { - struct dn_fib_table *table; - - table = dn_fib_empty_table(); - if (table == NULL) { - err = -ENOBUFS; - goto errout; - } - - rule->table = table->n; - } - } - - if (frh->src_len) - r->src = nla_get_le16(tb[FRA_SRC]); - - if (frh->dst_len) - r->dst = nla_get_le16(tb[FRA_DST]); - - r->src_len = frh->src_len; - r->srcmask = dnet_make_mask(r->src_len); - r->dst_len = frh->dst_len; - r->dstmask = dnet_make_mask(r->dst_len); - err = 0; -errout: - return err; -} - -static int dn_fib_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, - struct nlattr **tb) -{ - struct dn_fib_rule *r = (struct dn_fib_rule *)rule; - - if (frh->src_len && (r->src_len != frh->src_len)) - return 0; - - if (frh->dst_len && (r->dst_len != frh->dst_len)) - return 0; - - if (frh->src_len && (r->src != nla_get_le16(tb[FRA_SRC]))) - return 0; - - if (frh->dst_len && (r->dst != nla_get_le16(tb[FRA_DST]))) - return 0; - - return 1; -} - -unsigned int dnet_addr_type(__le16 addr) -{ - struct flowidn fld = { .daddr = addr }; - struct dn_fib_res res; - unsigned int ret = RTN_UNICAST; - struct dn_fib_table *tb = dn_fib_get_table(RT_TABLE_LOCAL, 0); - - res.r = NULL; - - if (tb) { - if (!tb->lookup(tb, &fld, &res)) { - ret = res.type; - dn_fib_res_put(&res); - } - } - return ret; -} - -static int dn_fib_rule_fill(struct fib_rule *rule, struct sk_buff *skb, - struct fib_rule_hdr *frh) -{ - struct dn_fib_rule *r = (struct dn_fib_rule *)rule; - - frh->dst_len = r->dst_len; - frh->src_len = r->src_len; - frh->tos = 0; - - if ((r->dst_len && - nla_put_le16(skb, FRA_DST, r->dst)) || - (r->src_len && - nla_put_le16(skb, FRA_SRC, r->src))) - goto nla_put_failure; - return 0; - -nla_put_failure: - return -ENOBUFS; -} - -static void dn_fib_rule_flush_cache(struct fib_rules_ops *ops) -{ - dn_rt_cache_flush(-1); -} - -static const struct fib_rules_ops __net_initconst dn_fib_rules_ops_template = { - .family = AF_DECnet, - .rule_size = sizeof(struct dn_fib_rule), - .addr_size = sizeof(u16), - .action = dn_fib_rule_action, - .match = dn_fib_rule_match, - .configure = dn_fib_rule_configure, - .compare = dn_fib_rule_compare, - .fill = dn_fib_rule_fill, - .flush_cache = dn_fib_rule_flush_cache, - .nlgroup = RTNLGRP_DECnet_RULE, - .owner = THIS_MODULE, - .fro_net = &init_net, -}; - -void __init dn_fib_rules_init(void) -{ - dn_fib_rules_ops = - fib_rules_register(&dn_fib_rules_ops_template, &init_net); - BUG_ON(IS_ERR(dn_fib_rules_ops)); - BUG_ON(fib_default_rule_add(dn_fib_rules_ops, 0x7fff, - RT_TABLE_MAIN, 0)); -} - -void __exit dn_fib_rules_cleanup(void) -{ - rtnl_lock(); - fib_rules_unregister(dn_fib_rules_ops); - rtnl_unlock(); - rcu_barrier(); -} diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c deleted file mode 100644 index 4086f9c746af..000000000000 --- a/net/decnet/dn_table.c +++ /dev/null @@ -1,929 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * DECnet An implementation of the DECnet protocol suite for the LINUX - * operating system. DECnet is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * DECnet Routing Forwarding Information Base (Routing Tables) - * - * Author: Steve Whitehouse - * Mostly copied from the IPv4 routing code - * - * - * Changes: - * - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include /* RTF_xxx */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct dn_zone -{ - struct dn_zone *dz_next; - struct dn_fib_node **dz_hash; - int dz_nent; - int dz_divisor; - u32 dz_hashmask; -#define DZ_HASHMASK(dz) ((dz)->dz_hashmask) - int dz_order; - __le16 dz_mask; -#define DZ_MASK(dz) ((dz)->dz_mask) -}; - -struct dn_hash -{ - struct dn_zone *dh_zones[17]; - struct dn_zone *dh_zone_list; -}; - -#define dz_key_0(key) ((key).datum = 0) - -#define for_nexthops(fi) { int nhsel; const struct dn_fib_nh *nh;\ - for(nhsel = 0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++) - -#define endfor_nexthops(fi) } - -#define DN_MAX_DIVISOR 1024 -#define DN_S_ZOMBIE 1 -#define DN_S_ACCESSED 2 - -#define DN_FIB_SCAN(f, fp) \ -for( ; ((f) = *(fp)) != NULL; (fp) = &(f)->fn_next) - -#define DN_FIB_SCAN_KEY(f, fp, key) \ -for( ; ((f) = *(fp)) != NULL && dn_key_eq((f)->fn_key, (key)); (fp) = &(f)->fn_next) - -#define RT_TABLE_MIN 1 -#define DN_FIB_TABLE_HASHSZ 256 -static struct hlist_head dn_fib_table_hash[DN_FIB_TABLE_HASHSZ]; -static DEFINE_RWLOCK(dn_fib_tables_lock); - -static struct kmem_cache *dn_hash_kmem __read_mostly; -static int dn_fib_hash_zombies; - -static inline dn_fib_idx_t dn_hash(dn_fib_key_t key, struct dn_zone *dz) -{ - u16 h = le16_to_cpu(key.datum)>>(16 - dz->dz_order); - h ^= (h >> 10); - h ^= (h >> 6); - h &= DZ_HASHMASK(dz); - return *(dn_fib_idx_t *)&h; -} - -static inline dn_fib_key_t dz_key(__le16 dst, struct dn_zone *dz) -{ - dn_fib_key_t k; - k.datum = dst & DZ_MASK(dz); - return k; -} - -static inline struct dn_fib_node **dn_chain_p(dn_fib_key_t key, struct dn_zone *dz) -{ - return &dz->dz_hash[dn_hash(key, dz).datum]; -} - -static inline struct dn_fib_node *dz_chain(dn_fib_key_t key, struct dn_zone *dz) -{ - return dz->dz_hash[dn_hash(key, dz).datum]; -} - -static inline int dn_key_eq(dn_fib_key_t a, dn_fib_key_t b) -{ - return a.datum == b.datum; -} - -static inline int dn_key_leq(dn_fib_key_t a, dn_fib_key_t b) -{ - return a.datum <= b.datum; -} - -static inline void dn_rebuild_zone(struct dn_zone *dz, - struct dn_fib_node **old_ht, - int old_divisor) -{ - struct dn_fib_node *f, **fp, *next; - int i; - - for(i = 0; i < old_divisor; i++) { - for(f = old_ht[i]; f; f = next) { - next = f->fn_next; - for(fp = dn_chain_p(f->fn_key, dz); - *fp && dn_key_leq((*fp)->fn_key, f->fn_key); - fp = &(*fp)->fn_next) - /* NOTHING */; - f->fn_next = *fp; - *fp = f; - } - } -} - -static void dn_rehash_zone(struct dn_zone *dz) -{ - struct dn_fib_node **ht, **old_ht; - int old_divisor, new_divisor; - u32 new_hashmask; - - old_divisor = dz->dz_divisor; - - switch (old_divisor) { - case 16: - new_divisor = 256; - new_hashmask = 0xFF; - break; - default: - printk(KERN_DEBUG "DECnet: dn_rehash_zone: BUG! %d\n", - old_divisor); - fallthrough; - case 256: - new_divisor = 1024; - new_hashmask = 0x3FF; - break; - } - - ht = kcalloc(new_divisor, sizeof(struct dn_fib_node*), GFP_KERNEL); - if (ht == NULL) - return; - - write_lock_bh(&dn_fib_tables_lock); - old_ht = dz->dz_hash; - dz->dz_hash = ht; - dz->dz_hashmask = new_hashmask; - dz->dz_divisor = new_divisor; - dn_rebuild_zone(dz, old_ht, old_divisor); - write_unlock_bh(&dn_fib_tables_lock); - kfree(old_ht); -} - -static void dn_free_node(struct dn_fib_node *f) -{ - dn_fib_release_info(DN_FIB_INFO(f)); - kmem_cache_free(dn_hash_kmem, f); -} - - -static struct dn_zone *dn_new_zone(struct dn_hash *table, int z) -{ - int i; - struct dn_zone *dz = kzalloc(sizeof(struct dn_zone), GFP_KERNEL); - if (!dz) - return NULL; - - if (z) { - dz->dz_divisor = 16; - dz->dz_hashmask = 0x0F; - } else { - dz->dz_divisor = 1; - dz->dz_hashmask = 0; - } - - dz->dz_hash = kcalloc(dz->dz_divisor, sizeof(struct dn_fib_node *), GFP_KERNEL); - if (!dz->dz_hash) { - kfree(dz); - return NULL; - } - - dz->dz_order = z; - dz->dz_mask = dnet_make_mask(z); - - for(i = z + 1; i <= 16; i++) - if (table->dh_zones[i]) - break; - - write_lock_bh(&dn_fib_tables_lock); - if (i>16) { - dz->dz_next = table->dh_zone_list; - table->dh_zone_list = dz; - } else { - dz->dz_next = table->dh_zones[i]->dz_next; - table->dh_zones[i]->dz_next = dz; - } - table->dh_zones[z] = dz; - write_unlock_bh(&dn_fib_tables_lock); - return dz; -} - - -static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct nlattr *attrs[], struct dn_fib_info *fi) -{ - struct rtnexthop *nhp; - int nhlen; - - if (attrs[RTA_PRIORITY] && - nla_get_u32(attrs[RTA_PRIORITY]) != fi->fib_priority) - return 1; - - if (attrs[RTA_OIF] || attrs[RTA_GATEWAY]) { - if ((!attrs[RTA_OIF] || nla_get_u32(attrs[RTA_OIF]) == fi->fib_nh->nh_oif) && - (!attrs[RTA_GATEWAY] || nla_get_le16(attrs[RTA_GATEWAY]) != fi->fib_nh->nh_gw)) - return 0; - return 1; - } - - if (!attrs[RTA_MULTIPATH]) - return 0; - - nhp = nla_data(attrs[RTA_MULTIPATH]); - nhlen = nla_len(attrs[RTA_MULTIPATH]); - - for_nexthops(fi) { - int attrlen = nhlen - sizeof(struct rtnexthop); - __le16 gw; - - if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0) - return -EINVAL; - if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif) - return 1; - if (attrlen) { - struct nlattr *gw_attr; - - gw_attr = nla_find((struct nlattr *) (nhp + 1), attrlen, RTA_GATEWAY); - gw = gw_attr ? nla_get_le16(gw_attr) : 0; - - if (gw && gw != nh->nh_gw) - return 1; - } - nhp = RTNH_NEXT(nhp); - } endfor_nexthops(fi); - - return 0; -} - -static inline size_t dn_fib_nlmsg_size(struct dn_fib_info *fi) -{ - size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg)) - + nla_total_size(4) /* RTA_TABLE */ - + nla_total_size(2) /* RTA_DST */ - + nla_total_size(4) /* RTA_PRIORITY */ - + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */ - - /* space for nested metrics */ - payload += nla_total_size((RTAX_MAX * nla_total_size(4))); - - if (fi->fib_nhs) { - /* Also handles the special case fib_nhs == 1 */ - - /* each nexthop is packed in an attribute */ - size_t nhsize = nla_total_size(sizeof(struct rtnexthop)); - - /* may contain a gateway attribute */ - nhsize += nla_total_size(4); - - /* all nexthops are packed in a nested attribute */ - payload += nla_total_size(fi->fib_nhs * nhsize); - } - - return payload; -} - -static int dn_fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, - u32 tb_id, u8 type, u8 scope, void *dst, int dst_len, - struct dn_fib_info *fi, unsigned int flags) -{ - struct rtmsg *rtm; - struct nlmsghdr *nlh; - - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags); - if (!nlh) - return -EMSGSIZE; - - rtm = nlmsg_data(nlh); - rtm->rtm_family = AF_DECnet; - rtm->rtm_dst_len = dst_len; - rtm->rtm_src_len = 0; - rtm->rtm_tos = 0; - rtm->rtm_table = tb_id; - rtm->rtm_flags = fi->fib_flags; - rtm->rtm_scope = scope; - rtm->rtm_type = type; - rtm->rtm_protocol = fi->fib_protocol; - - if (nla_put_u32(skb, RTA_TABLE, tb_id) < 0) - goto errout; - - if (rtm->rtm_dst_len && - nla_put(skb, RTA_DST, 2, dst) < 0) - goto errout; - - if (fi->fib_priority && - nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority) < 0) - goto errout; - - if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0) - goto errout; - - if (fi->fib_nhs == 1) { - if (fi->fib_nh->nh_gw && - nla_put_le16(skb, RTA_GATEWAY, fi->fib_nh->nh_gw) < 0) - goto errout; - - if (fi->fib_nh->nh_oif && - nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif) < 0) - goto errout; - } - - if (fi->fib_nhs > 1) { - struct rtnexthop *nhp; - struct nlattr *mp_head; - - mp_head = nla_nest_start_noflag(skb, RTA_MULTIPATH); - if (!mp_head) - goto errout; - - for_nexthops(fi) { - if (!(nhp = nla_reserve_nohdr(skb, sizeof(*nhp)))) - goto errout; - - nhp->rtnh_flags = nh->nh_flags & 0xFF; - nhp->rtnh_hops = nh->nh_weight - 1; - nhp->rtnh_ifindex = nh->nh_oif; - - if (nh->nh_gw && - nla_put_le16(skb, RTA_GATEWAY, nh->nh_gw) < 0) - goto errout; - - nhp->rtnh_len = skb_tail_pointer(skb) - (unsigned char *)nhp; - } endfor_nexthops(fi); - - nla_nest_end(skb, mp_head); - } - - nlmsg_end(skb, nlh); - return 0; - -errout: - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; -} - - -static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id, - struct nlmsghdr *nlh, struct netlink_skb_parms *req) -{ - struct sk_buff *skb; - u32 portid = req ? req->portid : 0; - int err = -ENOBUFS; - - skb = nlmsg_new(dn_fib_nlmsg_size(DN_FIB_INFO(f)), GFP_KERNEL); - if (skb == NULL) - goto errout; - - err = dn_fib_dump_info(skb, portid, nlh->nlmsg_seq, event, tb_id, - f->fn_type, f->fn_scope, &f->fn_key, z, - DN_FIB_INFO(f), 0); - if (err < 0) { - /* -EMSGSIZE implies BUG in dn_fib_nlmsg_size() */ - WARN_ON(err == -EMSGSIZE); - kfree_skb(skb); - goto errout; - } - rtnl_notify(skb, &init_net, portid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL); - return; -errout: - if (err < 0) - rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_ROUTE, err); -} - -static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb, - struct netlink_callback *cb, - struct dn_fib_table *tb, - struct dn_zone *dz, - struct dn_fib_node *f) -{ - int i, s_i; - - s_i = cb->args[4]; - for(i = 0; f; i++, f = f->fn_next) { - if (i < s_i) - continue; - if (f->fn_state & DN_S_ZOMBIE) - continue; - if (dn_fib_dump_info(skb, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - RTM_NEWROUTE, - tb->n, - (f->fn_state & DN_S_ZOMBIE) ? 0 : f->fn_type, - f->fn_scope, &f->fn_key, dz->dz_order, - f->fn_info, NLM_F_MULTI) < 0) { - cb->args[4] = i; - return -1; - } - } - cb->args[4] = i; - return skb->len; -} - -static __inline__ int dn_hash_dump_zone(struct sk_buff *skb, - struct netlink_callback *cb, - struct dn_fib_table *tb, - struct dn_zone *dz) -{ - int h, s_h; - - s_h = cb->args[3]; - for(h = 0; h < dz->dz_divisor; h++) { - if (h < s_h) - continue; - if (h > s_h) - memset(&cb->args[4], 0, sizeof(cb->args) - 4*sizeof(cb->args[0])); - if (dz->dz_hash == NULL || dz->dz_hash[h] == NULL) - continue; - if (dn_hash_dump_bucket(skb, cb, tb, dz, dz->dz_hash[h]) < 0) { - cb->args[3] = h; - return -1; - } - } - cb->args[3] = h; - return skb->len; -} - -static int dn_fib_table_dump(struct dn_fib_table *tb, struct sk_buff *skb, - struct netlink_callback *cb) -{ - int m, s_m; - struct dn_zone *dz; - struct dn_hash *table = (struct dn_hash *)tb->data; - - s_m = cb->args[2]; - read_lock(&dn_fib_tables_lock); - for(dz = table->dh_zone_list, m = 0; dz; dz = dz->dz_next, m++) { - if (m < s_m) - continue; - if (m > s_m) - memset(&cb->args[3], 0, sizeof(cb->args) - 3*sizeof(cb->args[0])); - - if (dn_hash_dump_zone(skb, cb, tb, dz) < 0) { - cb->args[2] = m; - read_unlock(&dn_fib_tables_lock); - return -1; - } - } - read_unlock(&dn_fib_tables_lock); - cb->args[2] = m; - - return skb->len; -} - -int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb) -{ - struct net *net = sock_net(skb->sk); - unsigned int h, s_h; - unsigned int e = 0, s_e; - struct dn_fib_table *tb; - int dumped = 0; - - if (!net_eq(net, &init_net)) - return 0; - - if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) && - ((struct rtmsg *)nlmsg_data(cb->nlh))->rtm_flags&RTM_F_CLONED) - return dn_cache_dump(skb, cb); - - s_h = cb->args[0]; - s_e = cb->args[1]; - - for (h = s_h; h < DN_FIB_TABLE_HASHSZ; h++, s_h = 0) { - e = 0; - hlist_for_each_entry(tb, &dn_fib_table_hash[h], hlist) { - if (e < s_e) - goto next; - if (dumped) - memset(&cb->args[2], 0, sizeof(cb->args) - - 2 * sizeof(cb->args[0])); - if (tb->dump(tb, skb, cb) < 0) - goto out; - dumped = 1; -next: - e++; - } - } -out: - cb->args[1] = e; - cb->args[0] = h; - - return skb->len; -} - -static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct nlattr *attrs[], - struct nlmsghdr *n, struct netlink_skb_parms *req) -{ - struct dn_hash *table = (struct dn_hash *)tb->data; - struct dn_fib_node *new_f, *f, **fp, **del_fp; - struct dn_zone *dz; - struct dn_fib_info *fi; - int z = r->rtm_dst_len; - int type = r->rtm_type; - dn_fib_key_t key; - int err; - - if (z > 16) - return -EINVAL; - - dz = table->dh_zones[z]; - if (!dz && !(dz = dn_new_zone(table, z))) - return -ENOBUFS; - - dz_key_0(key); - if (attrs[RTA_DST]) { - __le16 dst = nla_get_le16(attrs[RTA_DST]); - if (dst & ~DZ_MASK(dz)) - return -EINVAL; - key = dz_key(dst, dz); - } - - if ((fi = dn_fib_create_info(r, attrs, n, &err)) == NULL) - return err; - - if (dz->dz_nent > (dz->dz_divisor << 2) && - dz->dz_divisor > DN_MAX_DIVISOR && - (z==16 || (1< dz->dz_divisor)) - dn_rehash_zone(dz); - - fp = dn_chain_p(key, dz); - - DN_FIB_SCAN(f, fp) { - if (dn_key_leq(key, f->fn_key)) - break; - } - - del_fp = NULL; - - if (f && (f->fn_state & DN_S_ZOMBIE) && - dn_key_eq(f->fn_key, key)) { - del_fp = fp; - fp = &f->fn_next; - f = *fp; - goto create; - } - - DN_FIB_SCAN_KEY(f, fp, key) { - if (fi->fib_priority <= DN_FIB_INFO(f)->fib_priority) - break; - } - - if (f && dn_key_eq(f->fn_key, key) && - fi->fib_priority == DN_FIB_INFO(f)->fib_priority) { - struct dn_fib_node **ins_fp; - - err = -EEXIST; - if (n->nlmsg_flags & NLM_F_EXCL) - goto out; - - if (n->nlmsg_flags & NLM_F_REPLACE) { - del_fp = fp; - fp = &f->fn_next; - f = *fp; - goto replace; - } - - ins_fp = fp; - err = -EEXIST; - - DN_FIB_SCAN_KEY(f, fp, key) { - if (fi->fib_priority != DN_FIB_INFO(f)->fib_priority) - break; - if (f->fn_type == type && - f->fn_scope == r->rtm_scope && - DN_FIB_INFO(f) == fi) - goto out; - } - - if (!(n->nlmsg_flags & NLM_F_APPEND)) { - fp = ins_fp; - f = *fp; - } - } - -create: - err = -ENOENT; - if (!(n->nlmsg_flags & NLM_F_CREATE)) - goto out; - -replace: - err = -ENOBUFS; - new_f = kmem_cache_zalloc(dn_hash_kmem, GFP_KERNEL); - if (new_f == NULL) - goto out; - - new_f->fn_key = key; - new_f->fn_type = type; - new_f->fn_scope = r->rtm_scope; - DN_FIB_INFO(new_f) = fi; - - new_f->fn_next = f; - write_lock_bh(&dn_fib_tables_lock); - *fp = new_f; - write_unlock_bh(&dn_fib_tables_lock); - dz->dz_nent++; - - if (del_fp) { - f = *del_fp; - write_lock_bh(&dn_fib_tables_lock); - *del_fp = f->fn_next; - write_unlock_bh(&dn_fib_tables_lock); - - if (!(f->fn_state & DN_S_ZOMBIE)) - dn_rtmsg_fib(RTM_DELROUTE, f, z, tb->n, n, req); - if (f->fn_state & DN_S_ACCESSED) - dn_rt_cache_flush(-1); - dn_free_node(f); - dz->dz_nent--; - } else { - dn_rt_cache_flush(-1); - } - - dn_rtmsg_fib(RTM_NEWROUTE, new_f, z, tb->n, n, req); - - return 0; -out: - dn_fib_release_info(fi); - return err; -} - - -static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct nlattr *attrs[], - struct nlmsghdr *n, struct netlink_skb_parms *req) -{ - struct dn_hash *table = (struct dn_hash*)tb->data; - struct dn_fib_node **fp, **del_fp, *f; - int z = r->rtm_dst_len; - struct dn_zone *dz; - dn_fib_key_t key; - int matched; - - - if (z > 16) - return -EINVAL; - - if ((dz = table->dh_zones[z]) == NULL) - return -ESRCH; - - dz_key_0(key); - if (attrs[RTA_DST]) { - __le16 dst = nla_get_le16(attrs[RTA_DST]); - if (dst & ~DZ_MASK(dz)) - return -EINVAL; - key = dz_key(dst, dz); - } - - fp = dn_chain_p(key, dz); - - DN_FIB_SCAN(f, fp) { - if (dn_key_eq(f->fn_key, key)) - break; - if (dn_key_leq(key, f->fn_key)) - return -ESRCH; - } - - matched = 0; - del_fp = NULL; - DN_FIB_SCAN_KEY(f, fp, key) { - struct dn_fib_info *fi = DN_FIB_INFO(f); - - if (f->fn_state & DN_S_ZOMBIE) - return -ESRCH; - - matched++; - - if (del_fp == NULL && - (!r->rtm_type || f->fn_type == r->rtm_type) && - (r->rtm_scope == RT_SCOPE_NOWHERE || f->fn_scope == r->rtm_scope) && - (!r->rtm_protocol || - fi->fib_protocol == r->rtm_protocol) && - dn_fib_nh_match(r, n, attrs, fi) == 0) - del_fp = fp; - } - - if (del_fp) { - f = *del_fp; - dn_rtmsg_fib(RTM_DELROUTE, f, z, tb->n, n, req); - - if (matched != 1) { - write_lock_bh(&dn_fib_tables_lock); - *del_fp = f->fn_next; - write_unlock_bh(&dn_fib_tables_lock); - - if (f->fn_state & DN_S_ACCESSED) - dn_rt_cache_flush(-1); - dn_free_node(f); - dz->dz_nent--; - } else { - f->fn_state |= DN_S_ZOMBIE; - if (f->fn_state & DN_S_ACCESSED) { - f->fn_state &= ~DN_S_ACCESSED; - dn_rt_cache_flush(-1); - } - if (++dn_fib_hash_zombies > 128) - dn_fib_flush(); - } - - return 0; - } - - return -ESRCH; -} - -static inline int dn_flush_list(struct dn_fib_node **fp, int z, struct dn_hash *table) -{ - int found = 0; - struct dn_fib_node *f; - - while((f = *fp) != NULL) { - struct dn_fib_info *fi = DN_FIB_INFO(f); - - if (fi && ((f->fn_state & DN_S_ZOMBIE) || (fi->fib_flags & RTNH_F_DEAD))) { - write_lock_bh(&dn_fib_tables_lock); - *fp = f->fn_next; - write_unlock_bh(&dn_fib_tables_lock); - - dn_free_node(f); - found++; - continue; - } - fp = &f->fn_next; - } - - return found; -} - -static int dn_fib_table_flush(struct dn_fib_table *tb) -{ - struct dn_hash *table = (struct dn_hash *)tb->data; - struct dn_zone *dz; - int found = 0; - - dn_fib_hash_zombies = 0; - for(dz = table->dh_zone_list; dz; dz = dz->dz_next) { - int i; - int tmp = 0; - for(i = dz->dz_divisor-1; i >= 0; i--) - tmp += dn_flush_list(&dz->dz_hash[i], dz->dz_order, table); - dz->dz_nent -= tmp; - found += tmp; - } - - return found; -} - -static int dn_fib_table_lookup(struct dn_fib_table *tb, const struct flowidn *flp, struct dn_fib_res *res) -{ - int err; - struct dn_zone *dz; - struct dn_hash *t = (struct dn_hash *)tb->data; - - read_lock(&dn_fib_tables_lock); - for(dz = t->dh_zone_list; dz; dz = dz->dz_next) { - struct dn_fib_node *f; - dn_fib_key_t k = dz_key(flp->daddr, dz); - - for(f = dz_chain(k, dz); f; f = f->fn_next) { - if (!dn_key_eq(k, f->fn_key)) { - if (dn_key_leq(k, f->fn_key)) - break; - else - continue; - } - - f->fn_state |= DN_S_ACCESSED; - - if (f->fn_state&DN_S_ZOMBIE) - continue; - - if (f->fn_scope < flp->flowidn_scope) - continue; - - err = dn_fib_semantic_match(f->fn_type, DN_FIB_INFO(f), flp, res); - - if (err == 0) { - res->type = f->fn_type; - res->scope = f->fn_scope; - res->prefixlen = dz->dz_order; - goto out; - } - if (err < 0) - goto out; - } - } - err = 1; -out: - read_unlock(&dn_fib_tables_lock); - return err; -} - - -struct dn_fib_table *dn_fib_get_table(u32 n, int create) -{ - struct dn_fib_table *t; - unsigned int h; - - if (n < RT_TABLE_MIN) - return NULL; - - if (n > RT_TABLE_MAX) - return NULL; - - h = n & (DN_FIB_TABLE_HASHSZ - 1); - rcu_read_lock(); - hlist_for_each_entry_rcu(t, &dn_fib_table_hash[h], hlist) { - if (t->n == n) { - rcu_read_unlock(); - return t; - } - } - rcu_read_unlock(); - - if (!create) - return NULL; - - if (in_interrupt()) { - net_dbg_ratelimited("DECnet: BUG! Attempt to create routing table from interrupt\n"); - return NULL; - } - - t = kzalloc(sizeof(struct dn_fib_table) + sizeof(struct dn_hash), - GFP_KERNEL); - if (t == NULL) - return NULL; - - t->n = n; - t->insert = dn_fib_table_insert; - t->delete = dn_fib_table_delete; - t->lookup = dn_fib_table_lookup; - t->flush = dn_fib_table_flush; - t->dump = dn_fib_table_dump; - hlist_add_head_rcu(&t->hlist, &dn_fib_table_hash[h]); - - return t; -} - -struct dn_fib_table *dn_fib_empty_table(void) -{ - u32 id; - - for(id = RT_TABLE_MIN; id <= RT_TABLE_MAX; id++) - if (dn_fib_get_table(id, 0) == NULL) - return dn_fib_get_table(id, 1); - return NULL; -} - -void dn_fib_flush(void) -{ - int flushed = 0; - struct dn_fib_table *tb; - unsigned int h; - - for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) { - hlist_for_each_entry(tb, &dn_fib_table_hash[h], hlist) - flushed += tb->flush(tb); - } - - if (flushed) - dn_rt_cache_flush(-1); -} - -void __init dn_fib_table_init(void) -{ - dn_hash_kmem = kmem_cache_create("dn_fib_info_cache", - sizeof(struct dn_fib_info), - 0, SLAB_HWCACHE_ALIGN, - NULL); -} - -void __exit dn_fib_table_cleanup(void) -{ - struct dn_fib_table *t; - struct hlist_node *next; - unsigned int h; - - write_lock(&dn_fib_tables_lock); - for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) { - hlist_for_each_entry_safe(t, next, &dn_fib_table_hash[h], - hlist) { - hlist_del(&t->hlist); - kfree(t); - } - } - write_unlock(&dn_fib_tables_lock); -} diff --git a/net/decnet/dn_timer.c b/net/decnet/dn_timer.c deleted file mode 100644 index aa4155875ca8..000000000000 --- a/net/decnet/dn_timer.c +++ /dev/null @@ -1,104 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * DECnet An implementation of the DECnet protocol suite for the LINUX - * operating system. DECnet is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * DECnet Socket Timer Functions - * - * Author: Steve Whitehouse - * - * - * Changes: - * Steve Whitehouse : Made keepalive timer part of the same - * timer idea. - * Steve Whitehouse : Added checks for sk->sock_readers - * David S. Miller : New socket locking - * Steve Whitehouse : Timer grabs socket ref. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * Slow timer is for everything else (n * 500mS) - */ - -#define SLOW_INTERVAL (HZ/2) - -static void dn_slow_timer(struct timer_list *t); - -void dn_start_slow_timer(struct sock *sk) -{ - timer_setup(&sk->sk_timer, dn_slow_timer, 0); - sk_reset_timer(sk, &sk->sk_timer, jiffies + SLOW_INTERVAL); -} - -void dn_stop_slow_timer(struct sock *sk) -{ - sk_stop_timer(sk, &sk->sk_timer); -} - -static void dn_slow_timer(struct timer_list *t) -{ - struct sock *sk = from_timer(sk, t, sk_timer); - struct dn_scp *scp = DN_SK(sk); - - bh_lock_sock(sk); - - if (sock_owned_by_user(sk)) { - sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ / 10); - goto out; - } - - /* - * The persist timer is the standard slow timer used for retransmits - * in both connection establishment and disconnection as well as - * in the RUN state. The different states are catered for by changing - * the function pointer in the socket. Setting the timer to a value - * of zero turns it off. We allow the persist_fxn to turn the - * timer off in a permant way by returning non-zero, so that - * timer based routines may remove sockets. This is why we have a - * sock_hold()/sock_put() around the timer to prevent the socket - * going away in the middle. - */ - if (scp->persist && scp->persist_fxn) { - if (scp->persist <= SLOW_INTERVAL) { - scp->persist = 0; - - if (scp->persist_fxn(sk)) - goto out; - } else { - scp->persist -= SLOW_INTERVAL; - } - } - - /* - * Check for keepalive timeout. After the other timer 'cos if - * the previous timer caused a retransmit, we don't need to - * do this. scp->stamp is the last time that we sent a packet. - * The keepalive function sends a link service packet to the - * other end. If it remains unacknowledged, the standard - * socket timers will eventually shut the socket down. Each - * time we do this, scp->stamp will be updated, thus - * we won't try and send another until scp->keepalive has passed - * since the last successful transmission. - */ - if (scp->keepalive && scp->keepalive_fxn && (scp->state == DN_RUN)) { - if (time_after_eq(jiffies, scp->stamp + scp->keepalive)) - scp->keepalive_fxn(sk); - } - - sk_reset_timer(sk, &sk->sk_timer, jiffies + SLOW_INTERVAL); -out: - bh_unlock_sock(sk); - sock_put(sk); -} diff --git a/net/decnet/netfilter/Kconfig b/net/decnet/netfilter/Kconfig deleted file mode 100644 index 14ec4ef95fab..000000000000 --- a/net/decnet/netfilter/Kconfig +++ /dev/null @@ -1,17 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -# -# DECnet netfilter configuration -# - -menu "DECnet: Netfilter Configuration" - depends on DECNET && NETFILTER - depends on NETFILTER_ADVANCED - -config DECNET_NF_GRABULATOR - tristate "Routing message grabulator (for userland routing daemon)" - help - Enable this module if you want to use the userland DECnet routing - daemon. You will also need to enable routing support for DECnet - unless you just want to monitor routing messages from other nodes. - -endmenu diff --git a/net/decnet/netfilter/Makefile b/net/decnet/netfilter/Makefile deleted file mode 100644 index 429c84289d0f..000000000000 --- a/net/decnet/netfilter/Makefile +++ /dev/null @@ -1,6 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -# -# Makefile for DECnet netfilter modules -# - -obj-$(CONFIG_DECNET_NF_GRABULATOR) += dn_rtmsg.o diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c deleted file mode 100644 index 26a9193df783..000000000000 --- a/net/decnet/netfilter/dn_rtmsg.c +++ /dev/null @@ -1,158 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * DECnet An implementation of the DECnet protocol suite for the LINUX - * operating system. DECnet is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * DECnet Routing Message Grabulator - * - * (C) 2000 ChyGwyn Limited - https://www.chygwyn.com/ - * - * Author: Steven Whitehouse - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -static struct sock *dnrmg = NULL; - - -static struct sk_buff *dnrmg_build_message(struct sk_buff *rt_skb, int *errp) -{ - struct sk_buff *skb = NULL; - size_t size; - sk_buff_data_t old_tail; - struct nlmsghdr *nlh; - unsigned char *ptr; - struct nf_dn_rtmsg *rtm; - - size = NLMSG_ALIGN(rt_skb->len) + - NLMSG_ALIGN(sizeof(struct nf_dn_rtmsg)); - skb = nlmsg_new(size, GFP_ATOMIC); - if (!skb) { - *errp = -ENOMEM; - return NULL; - } - old_tail = skb->tail; - nlh = nlmsg_put(skb, 0, 0, 0, size, 0); - if (!nlh) { - kfree_skb(skb); - *errp = -ENOMEM; - return NULL; - } - rtm = (struct nf_dn_rtmsg *)nlmsg_data(nlh); - rtm->nfdn_ifindex = rt_skb->dev->ifindex; - ptr = NFDN_RTMSG(rtm); - skb_copy_from_linear_data(rt_skb, ptr, rt_skb->len); - nlh->nlmsg_len = skb->tail - old_tail; - return skb; -} - -static void dnrmg_send_peer(struct sk_buff *skb) -{ - struct sk_buff *skb2; - int status = 0; - int group = 0; - unsigned char flags = *skb->data; - - switch (flags & DN_RT_CNTL_MSK) { - case DN_RT_PKT_L1RT: - group = DNRNG_NLGRP_L1; - break; - case DN_RT_PKT_L2RT: - group = DNRNG_NLGRP_L2; - break; - default: - return; - } - - skb2 = dnrmg_build_message(skb, &status); - if (skb2 == NULL) - return; - NETLINK_CB(skb2).dst_group = group; - netlink_broadcast(dnrmg, skb2, 0, group, GFP_ATOMIC); -} - - -static unsigned int dnrmg_hook(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - dnrmg_send_peer(skb); - return NF_ACCEPT; -} - - -#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err), NULL); return; } while (0) - -static inline void dnrmg_receive_user_skb(struct sk_buff *skb) -{ - struct nlmsghdr *nlh = nlmsg_hdr(skb); - - if (skb->len < sizeof(*nlh) || - nlh->nlmsg_len < sizeof(*nlh) || - skb->len < nlh->nlmsg_len) - return; - - if (!netlink_capable(skb, CAP_NET_ADMIN)) - RCV_SKB_FAIL(-EPERM); - - /* Eventually we might send routing messages too */ - - RCV_SKB_FAIL(-EINVAL); -} - -static const struct nf_hook_ops dnrmg_ops = { - .hook = dnrmg_hook, - .pf = NFPROTO_DECNET, - .hooknum = NF_DN_ROUTE, - .priority = NF_DN_PRI_DNRTMSG, -}; - -static int __init dn_rtmsg_init(void) -{ - int rv = 0; - struct netlink_kernel_cfg cfg = { - .groups = DNRNG_NLGRP_MAX, - .input = dnrmg_receive_user_skb, - }; - - dnrmg = netlink_kernel_create(&init_net, NETLINK_DNRTMSG, &cfg); - if (dnrmg == NULL) { - printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket"); - return -ENOMEM; - } - - rv = nf_register_net_hook(&init_net, &dnrmg_ops); - if (rv) { - netlink_kernel_release(dnrmg); - } - - return rv; -} - -static void __exit dn_rtmsg_fini(void) -{ - nf_unregister_net_hook(&init_net, &dnrmg_ops); - netlink_kernel_release(dnrmg); -} - - -MODULE_DESCRIPTION("DECnet Routing Message Grabulator"); -MODULE_AUTHOR("Steven Whitehouse "); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_DNRTMSG); - -module_init(dn_rtmsg_init); -module_exit(dn_rtmsg_fini); diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c deleted file mode 100644 index 67b5ab2657b7..000000000000 --- a/net/decnet/sysctl_net_decnet.c +++ /dev/null @@ -1,362 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * DECnet An implementation of the DECnet protocol suite for the LINUX - * operating system. DECnet is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * DECnet sysctl support functions - * - * Author: Steve Whitehouse - * - * - * Changes: - * Steve Whitehouse - C99 changes and default device handling - * Steve Whitehouse - Memory buffer settings, like the tcp ones - * - */ -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include - - -int decnet_debug_level; -int decnet_time_wait = 30; -int decnet_dn_count = 1; -int decnet_di_count = 3; -int decnet_dr_count = 3; -int decnet_log_martians = 1; -int decnet_no_fc_max_cwnd = NSP_MIN_WINDOW; - -/* Reasonable defaults, I hope, based on tcp's defaults */ -long sysctl_decnet_mem[3] = { 768 << 3, 1024 << 3, 1536 << 3 }; -int sysctl_decnet_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 }; -int sysctl_decnet_rmem[3] = { 4 * 1024, 87380, 87380 * 2 }; - -#ifdef CONFIG_SYSCTL -extern int decnet_dst_gc_interval; -static int min_decnet_time_wait[] = { 5 }; -static int max_decnet_time_wait[] = { 600 }; -static int min_state_count[] = { 1 }; -static int max_state_count[] = { NSP_MAXRXTSHIFT }; -static int min_decnet_dst_gc_interval[] = { 1 }; -static int max_decnet_dst_gc_interval[] = { 60 }; -static int min_decnet_no_fc_max_cwnd[] = { NSP_MIN_WINDOW }; -static int max_decnet_no_fc_max_cwnd[] = { NSP_MAX_WINDOW }; -static char node_name[7] = "???"; - -static struct ctl_table_header *dn_table_header = NULL; - -/* - * ctype.h :-) - */ -#define ISNUM(x) (((x) >= '0') && ((x) <= '9')) -#define ISLOWER(x) (((x) >= 'a') && ((x) <= 'z')) -#define ISUPPER(x) (((x) >= 'A') && ((x) <= 'Z')) -#define ISALPHA(x) (ISLOWER(x) || ISUPPER(x)) -#define INVALID_END_CHAR(x) (ISNUM(x) || ISALPHA(x)) - -static void strip_it(char *str) -{ - for(;;) { - switch (*str) { - case ' ': - case '\n': - case '\r': - case ':': - *str = 0; - fallthrough; - case 0: - return; - } - str++; - } -} - -/* - * Simple routine to parse an ascii DECnet address - * into a network order address. - */ -static int parse_addr(__le16 *addr, char *str) -{ - __u16 area, node; - - while(*str && !ISNUM(*str)) str++; - - if (*str == 0) - return -1; - - area = (*str++ - '0'); - if (ISNUM(*str)) { - area *= 10; - area += (*str++ - '0'); - } - - if (*str++ != '.') - return -1; - - if (!ISNUM(*str)) - return -1; - - node = *str++ - '0'; - if (ISNUM(*str)) { - node *= 10; - node += (*str++ - '0'); - } - if (ISNUM(*str)) { - node *= 10; - node += (*str++ - '0'); - } - if (ISNUM(*str)) { - node *= 10; - node += (*str++ - '0'); - } - - if ((node > 1023) || (area > 63)) - return -1; - - if (INVALID_END_CHAR(*str)) - return -1; - - *addr = cpu_to_le16((area << 10) | node); - - return 0; -} - -static int dn_node_address_handler(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ - char addr[DN_ASCBUF_LEN]; - size_t len; - __le16 dnaddr; - - if (!*lenp || (*ppos && !write)) { - *lenp = 0; - return 0; - } - - if (write) { - len = (*lenp < DN_ASCBUF_LEN) ? *lenp : (DN_ASCBUF_LEN-1); - memcpy(addr, buffer, len); - addr[len] = 0; - strip_it(addr); - - if (parse_addr(&dnaddr, addr)) - return -EINVAL; - - dn_dev_devices_off(); - - decnet_address = dnaddr; - - dn_dev_devices_on(); - - *ppos += len; - - return 0; - } - - dn_addr2asc(le16_to_cpu(decnet_address), addr); - len = strlen(addr); - addr[len++] = '\n'; - - if (len > *lenp) - len = *lenp; - memcpy(buffer, addr, len); - *lenp = len; - *ppos += len; - - return 0; -} - -static int dn_def_dev_handler(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ - size_t len; - struct net_device *dev; - char devname[17]; - - if (!*lenp || (*ppos && !write)) { - *lenp = 0; - return 0; - } - - if (write) { - if (*lenp > 16) - return -E2BIG; - - memcpy(devname, buffer, *lenp); - devname[*lenp] = 0; - strip_it(devname); - - dev = dev_get_by_name(&init_net, devname); - if (dev == NULL) - return -ENODEV; - - if (dev->dn_ptr == NULL) { - dev_put(dev); - return -ENODEV; - } - - if (dn_dev_set_default(dev, 1)) { - dev_put(dev); - return -ENODEV; - } - *ppos += *lenp; - - return 0; - } - - dev = dn_dev_get_default(); - if (dev == NULL) { - *lenp = 0; - return 0; - } - - strcpy(devname, dev->name); - dev_put(dev); - len = strlen(devname); - devname[len++] = '\n'; - - if (len > *lenp) len = *lenp; - - memcpy(buffer, devname, len); - *lenp = len; - *ppos += len; - - return 0; -} - -static struct ctl_table dn_table[] = { - { - .procname = "node_address", - .maxlen = 7, - .mode = 0644, - .proc_handler = dn_node_address_handler, - }, - { - .procname = "node_name", - .data = node_name, - .maxlen = 7, - .mode = 0644, - .proc_handler = proc_dostring, - }, - { - .procname = "default_device", - .maxlen = 16, - .mode = 0644, - .proc_handler = dn_def_dev_handler, - }, - { - .procname = "time_wait", - .data = &decnet_time_wait, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_decnet_time_wait, - .extra2 = &max_decnet_time_wait - }, - { - .procname = "dn_count", - .data = &decnet_dn_count, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_state_count, - .extra2 = &max_state_count - }, - { - .procname = "di_count", - .data = &decnet_di_count, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_state_count, - .extra2 = &max_state_count - }, - { - .procname = "dr_count", - .data = &decnet_dr_count, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_state_count, - .extra2 = &max_state_count - }, - { - .procname = "dst_gc_interval", - .data = &decnet_dst_gc_interval, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_decnet_dst_gc_interval, - .extra2 = &max_decnet_dst_gc_interval - }, - { - .procname = "no_fc_max_cwnd", - .data = &decnet_no_fc_max_cwnd, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_decnet_no_fc_max_cwnd, - .extra2 = &max_decnet_no_fc_max_cwnd - }, - { - .procname = "decnet_mem", - .data = &sysctl_decnet_mem, - .maxlen = sizeof(sysctl_decnet_mem), - .mode = 0644, - .proc_handler = proc_doulongvec_minmax - }, - { - .procname = "decnet_rmem", - .data = &sysctl_decnet_rmem, - .maxlen = sizeof(sysctl_decnet_rmem), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "decnet_wmem", - .data = &sysctl_decnet_wmem, - .maxlen = sizeof(sysctl_decnet_wmem), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "debug", - .data = &decnet_debug_level, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { } -}; - -void dn_register_sysctl(void) -{ - dn_table_header = register_net_sysctl(&init_net, "net/decnet", dn_table); -} - -void dn_unregister_sysctl(void) -{ - unregister_net_sysctl_table(dn_table_header); -} - -#else /* CONFIG_SYSCTL */ -void dn_unregister_sysctl(void) -{ -} -void dn_register_sysctl(void) -{ -} - -#endif diff --git a/net/netfilter/core.c b/net/netfilter/core.c index dcf752b55a52..5a6705a0e4ec 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -300,12 +300,6 @@ nf_hook_entry_head(struct net *net, int pf, unsigned int hooknum, if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv6) <= hooknum)) return NULL; return net->nf.hooks_ipv6 + hooknum; -#if IS_ENABLED(CONFIG_DECNET) - case NFPROTO_DECNET: - if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_decnet) <= hooknum)) - return NULL; - return net->nf.hooks_decnet + hooknum; -#endif default: WARN_ON_ONCE(1); return NULL; @@ -750,10 +744,6 @@ static int __net_init netfilter_net_init(struct net *net) #ifdef CONFIG_NETFILTER_FAMILY_BRIDGE __netfilter_net_init(net->nf.hooks_bridge, ARRAY_SIZE(net->nf.hooks_bridge)); #endif -#if IS_ENABLED(CONFIG_DECNET) - __netfilter_net_init(net->nf.hooks_decnet, ARRAY_SIZE(net->nf.hooks_decnet)); -#endif - #ifdef CONFIG_PROC_FS net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter", net->proc_net); diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c index 71e29adac48b..8120aadf6a0f 100644 --- a/net/netfilter/nfnetlink_hook.c +++ b/net/netfilter/nfnetlink_hook.c @@ -215,13 +215,6 @@ nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *de hook_head = rcu_dereference(net->nf.hooks_bridge[hook]); #endif break; -#if IS_ENABLED(CONFIG_DECNET) - case NFPROTO_DECNET: - if (hook >= ARRAY_SIZE(net->nf.hooks_decnet)) - return ERR_PTR(-EINVAL); - hook_head = rcu_dereference(net->nf.hooks_decnet[hook]); - break; -#endif #if defined(CONFIG_NETFILTER_INGRESS) || defined(CONFIG_NETFILTER_EGRESS) case NFPROTO_NETDEV: if (hook >= NF_NETDEV_NUMHOOKS) -- cgit v1.2.3-70-g09d2 From e38f22c860edb7804b4722ac2332f7c51b9c6b72 Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Fri, 19 Aug 2022 05:25:19 +0000 Subject: vsock: SO_RCVLOWAT transport set callback This adds transport specific callback for SO_RCVLOWAT, because in some transports it may be difficult to know current available number of bytes ready to read. Thus, when SO_RCVLOWAT is set, transport may reject it. Signed-off-by: Arseniy Krasnov Signed-off-by: Paolo Abeni --- include/net/af_vsock.h | 1 + net/vmw_vsock/af_vsock.c | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) (limited to 'include/net') diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index 1c53c4c4d88f..d609a088cb27 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -135,6 +135,7 @@ struct vsock_transport { u64 (*stream_rcvhiwat)(struct vsock_sock *); bool (*stream_is_active)(struct vsock_sock *); bool (*stream_allow)(u32 cid, u32 port); + int (*set_rcvlowat)(struct vsock_sock *vsk, int val); /* SEQ_PACKET. */ ssize_t (*seqpacket_dequeue)(struct vsock_sock *vsk, struct msghdr *msg, diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index b4ee163154a6..07c8f74a821f 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -2137,6 +2137,25 @@ out: return err; } +static int vsock_set_rcvlowat(struct sock *sk, int val) +{ + const struct vsock_transport *transport; + struct vsock_sock *vsk; + + vsk = vsock_sk(sk); + + if (val > vsk->buffer_size) + return -EINVAL; + + transport = vsk->transport; + + if (transport && transport->set_rcvlowat) + return transport->set_rcvlowat(vsk, val); + + WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); + return 0; +} + static const struct proto_ops vsock_stream_ops = { .family = PF_VSOCK, .owner = THIS_MODULE, @@ -2156,6 +2175,7 @@ static const struct proto_ops vsock_stream_ops = { .recvmsg = vsock_connectible_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, + .set_rcvlowat = vsock_set_rcvlowat, }; static const struct proto_ops vsock_seqpacket_ops = { -- cgit v1.2.3-70-g09d2 From f2fdcf67aceb1a7d5e0661cb7ca95cda68d3014a Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Fri, 19 Aug 2022 05:36:52 +0000 Subject: vsock: add API call for data ready This adds 'vsock_data_ready()' which must be called by transport to kick sleeping data readers. It checks for SO_RCVLOWAT value before waking user, thus preventing spurious wake ups. Based on 'tcp_data_ready()' logic. Signed-off-by: Arseniy Krasnov Reviewed-by: Stefano Garzarella Signed-off-by: Paolo Abeni --- include/net/af_vsock.h | 1 + net/vmw_vsock/af_vsock.c | 10 ++++++++++ 2 files changed, 11 insertions(+) (limited to 'include/net') diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index d609a088cb27..568a87c5e0d0 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -78,6 +78,7 @@ struct vsock_sock { s64 vsock_stream_has_data(struct vsock_sock *vsk); s64 vsock_stream_has_space(struct vsock_sock *vsk); struct sock *vsock_create_connected(struct sock *parent); +void vsock_data_ready(struct sock *sk); /**** TRANSPORT ****/ diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 15171ba76cc3..ee418701cdee 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -882,6 +882,16 @@ s64 vsock_stream_has_space(struct vsock_sock *vsk) } EXPORT_SYMBOL_GPL(vsock_stream_has_space); +void vsock_data_ready(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + if (vsock_stream_has_data(vsk) >= sk->sk_rcvlowat || + sock_flag(sk, SOCK_DONE)) + sk->sk_data_ready(sk); +} +EXPORT_SYMBOL_GPL(vsock_data_ready); + static int vsock_release(struct socket *sock) { __vsock_release(sock->sk, 0); -- cgit v1.2.3-70-g09d2 From 5dc760d1208200daaf49a2ff56a0e7dfa2d80bb2 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 19 Aug 2022 20:48:18 +0300 Subject: net: dsa: use dsa_tree_for_each_cpu_port in dsa_tree_{setup,teardown}_master More logic will be added to dsa_tree_setup_master() and dsa_tree_teardown_master() in upcoming changes. Reduce the indentation by one level in these functions by introducing and using a dedicated iterator for CPU ports of a tree. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Paolo Abeni --- include/net/dsa.h | 4 ++++ net/dsa/dsa2.c | 46 +++++++++++++++++++++------------------------- 2 files changed, 25 insertions(+), 25 deletions(-) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index b902b31bebce..f2ce12860546 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -559,6 +559,10 @@ static inline bool dsa_is_user_port(struct dsa_switch *ds, int p) list_for_each_entry((_dp), &(_dst)->ports, list) \ if (dsa_port_is_user((_dp))) +#define dsa_tree_for_each_cpu_port(_dp, _dst) \ + list_for_each_entry((_dp), &(_dst)->ports, list) \ + if (dsa_port_is_cpu((_dp))) + #define dsa_switch_for_each_port(_dp, _ds) \ list_for_each_entry((_dp), &(_ds)->dst->ports, list) \ if ((_dp)->ds == (_ds)) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index c7bdf7106d23..ed56c7a554b8 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -1060,26 +1060,24 @@ static int dsa_tree_setup_switches(struct dsa_switch_tree *dst) static int dsa_tree_setup_master(struct dsa_switch_tree *dst) { - struct dsa_port *dp; + struct dsa_port *cpu_dp; int err = 0; rtnl_lock(); - list_for_each_entry(dp, &dst->ports, list) { - if (dsa_port_is_cpu(dp)) { - struct net_device *master = dp->master; - bool admin_up = (master->flags & IFF_UP) && - !qdisc_tx_is_noop(master); + dsa_tree_for_each_cpu_port(cpu_dp, dst) { + struct net_device *master = cpu_dp->master; + bool admin_up = (master->flags & IFF_UP) && + !qdisc_tx_is_noop(master); - err = dsa_master_setup(master, dp); - if (err) - break; + err = dsa_master_setup(master, cpu_dp); + if (err) + break; - /* Replay master state event */ - dsa_tree_master_admin_state_change(dst, master, admin_up); - dsa_tree_master_oper_state_change(dst, master, - netif_oper_up(master)); - } + /* Replay master state event */ + dsa_tree_master_admin_state_change(dst, master, admin_up); + dsa_tree_master_oper_state_change(dst, master, + netif_oper_up(master)); } rtnl_unlock(); @@ -1089,22 +1087,20 @@ static int dsa_tree_setup_master(struct dsa_switch_tree *dst) static void dsa_tree_teardown_master(struct dsa_switch_tree *dst) { - struct dsa_port *dp; + struct dsa_port *cpu_dp; rtnl_lock(); - list_for_each_entry(dp, &dst->ports, list) { - if (dsa_port_is_cpu(dp)) { - struct net_device *master = dp->master; + dsa_tree_for_each_cpu_port(cpu_dp, dst) { + struct net_device *master = cpu_dp->master; - /* Synthesizing an "admin down" state is sufficient for - * the switches to get a notification if the master is - * currently up and running. - */ - dsa_tree_master_admin_state_change(dst, master, false); + /* Synthesizing an "admin down" state is sufficient for + * the switches to get a notification if the master is + * currently up and running. + */ + dsa_tree_master_admin_state_change(dst, master, false); - dsa_master_teardown(master); - } + dsa_master_teardown(master); } rtnl_unlock(); -- cgit v1.2.3-70-g09d2 From 0bf73255d3a3cf3b0416e95f2c9f7c53095c2e1a Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Wed, 24 Aug 2022 09:36:21 +0800 Subject: netlink: fix some kernel-doc comments Modify the comment of input parameter of nlmsg_ and nla_ function. Signed-off-by: Zhengchao Shao Link: https://lore.kernel.org/r/20220824013621.365103-1-shaozhengchao@huawei.com Signed-off-by: Jakub Kicinski --- include/net/netlink.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netlink.h b/include/net/netlink.h index 7a2a9d3144ba..e658d18afa67 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -741,6 +741,7 @@ static inline int __nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen, * @hdrlen: length of family specific header * @tb: destination array with maxtype+1 elements * @maxtype: maximum attribute type to be expected + * @policy: validation policy * @extack: extended ACK report struct * * See nla_parse() @@ -760,6 +761,7 @@ static inline int nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen, * @hdrlen: length of family specific header * @tb: destination array with maxtype+1 elements * @maxtype: maximum attribute type to be expected + * @policy: validation policy * @extack: extended ACK report struct * * See nla_parse_deprecated() @@ -779,6 +781,7 @@ static inline int nlmsg_parse_deprecated(const struct nlmsghdr *nlh, int hdrlen, * @hdrlen: length of family specific header * @tb: destination array with maxtype+1 elements * @maxtype: maximum attribute type to be expected + * @policy: validation policy * @extack: extended ACK report struct * * See nla_parse_deprecated_strict() @@ -814,7 +817,6 @@ static inline struct nlattr *nlmsg_find_attr(const struct nlmsghdr *nlh, * @len: length of attribute stream * @maxtype: maximum attribute type to be expected * @policy: validation policy - * @validate: validation strictness * @extack: extended ACK report struct * * Validates all attributes in the specified attribute stream against the -- cgit v1.2.3-70-g09d2 From 28044fc1d4953b07acec0da4d2fc4784c57ea6fb Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Mon, 22 Aug 2022 11:10:21 -0700 Subject: net: Add a bhash2 table hashed by port and address The current bind hashtable (bhash) is hashed by port only. In the socket bind path, we have to check for bind conflicts by traversing the specified port's inet_bind_bucket while holding the hashbucket's spinlock (see inet_csk_get_port() and inet_csk_bind_conflict()). In instances where there are tons of sockets hashed to the same port at different addresses, the bind conflict check is time-intensive and can cause softirq cpu lockups, as well as stops new tcp connections since __inet_inherit_port() also contests for the spinlock. This patch adds a second bind table, bhash2, that hashes by port and sk->sk_rcv_saddr (ipv4) and sk->sk_v6_rcv_saddr (ipv6). Searching the bhash2 table leads to significantly faster conflict resolution and less time holding the hashbucket spinlock. Please note a few things: * There can be the case where the a socket's address changes after it has been bound. There are two cases where this happens: 1) The case where there is a bind() call on INADDR_ANY (ipv4) or IPV6_ADDR_ANY (ipv6) and then a connect() call. The kernel will assign the socket an address when it handles the connect() 2) In inet_sk_reselect_saddr(), which is called when rebuilding the sk header and a few pre-conditions are met (eg rerouting fails). In these two cases, we need to update the bhash2 table by removing the entry for the old address, and add a new entry reflecting the updated address. * The bhash2 table must have its own lock, even though concurrent accesses on the same port are protected by the bhash lock. Bhash2 must have its own lock to protect against cases where sockets on different ports hash to different bhash hashbuckets but to the same bhash2 hashbucket. This brings up a few stipulations: 1) When acquiring both the bhash and the bhash2 lock, the bhash2 lock will always be acquired after the bhash lock and released before the bhash lock is released. 2) There are no nested bhash2 hashbucket locks. A bhash2 lock is always acquired+released before another bhash2 lock is acquired+released. * The bhash table cannot be superseded by the bhash2 table because for bind requests on INADDR_ANY (ipv4) or IPV6_ADDR_ANY (ipv6), every socket bound to that port must be checked for a potential conflict. The bhash table is the only source of port->socket associations. Signed-off-by: Joanne Koong Signed-off-by: Jakub Kicinski --- include/net/inet_connection_sock.h | 3 + include/net/inet_hashtables.h | 80 ++++++++++- include/net/sock.h | 14 ++ net/dccp/ipv4.c | 25 +++- net/dccp/ipv6.c | 18 +++ net/dccp/proto.c | 34 ++++- net/ipv4/af_inet.c | 26 +++- net/ipv4/inet_connection_sock.c | 275 ++++++++++++++++++++++++++++--------- net/ipv4/inet_hashtables.c | 268 ++++++++++++++++++++++++++++++++++-- net/ipv4/tcp.c | 11 +- net/ipv4/tcp_ipv4.c | 23 +++- net/ipv6/tcp_ipv6.c | 17 +++ 12 files changed, 700 insertions(+), 94 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index ee88f0f1350f..c2b15f7e5516 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -25,6 +25,7 @@ #undef INET_CSK_CLEAR_TIMERS struct inet_bind_bucket; +struct inet_bind2_bucket; struct tcp_congestion_ops; /* @@ -57,6 +58,7 @@ struct inet_connection_sock_af_ops { * * @icsk_accept_queue: FIFO of established children * @icsk_bind_hash: Bind node + * @icsk_bind2_hash: Bind node in the bhash2 table * @icsk_timeout: Timeout * @icsk_retransmit_timer: Resend (no ack) * @icsk_rto: Retransmit timeout @@ -83,6 +85,7 @@ struct inet_connection_sock { struct inet_sock icsk_inet; struct request_sock_queue icsk_accept_queue; struct inet_bind_bucket *icsk_bind_hash; + struct inet_bind2_bucket *icsk_bind2_hash; unsigned long icsk_timeout; struct timer_list icsk_retransmit_timer; struct timer_list icsk_delack_timer; diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index e9cf2157ed8a..44a419b9e3d5 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -23,6 +23,7 @@ #include #include +#include #include #include #include @@ -90,7 +91,28 @@ struct inet_bind_bucket { struct hlist_head owners; }; -static inline struct net *ib_net(struct inet_bind_bucket *ib) +struct inet_bind2_bucket { + possible_net_t ib_net; + int l3mdev; + unsigned short port; + union { +#if IS_ENABLED(CONFIG_IPV6) + struct in6_addr v6_rcv_saddr; +#endif + __be32 rcv_saddr; + }; + /* Node in the bhash2 inet_bind_hashbucket chain */ + struct hlist_node node; + /* List of sockets hashed to this bucket */ + struct hlist_head owners; +}; + +static inline struct net *ib_net(const struct inet_bind_bucket *ib) +{ + return read_pnet(&ib->ib_net); +} + +static inline struct net *ib2_net(const struct inet_bind2_bucket *ib) { return read_pnet(&ib->ib_net); } @@ -133,7 +155,14 @@ struct inet_hashinfo { * TCP hash as well as the others for fast bind/connect. */ struct kmem_cache *bind_bucket_cachep; + /* This bind table is hashed by local port */ struct inet_bind_hashbucket *bhash; + struct kmem_cache *bind2_bucket_cachep; + /* This bind table is hashed by local port and sk->sk_rcv_saddr (ipv4) + * or sk->sk_v6_rcv_saddr (ipv6). This 2nd bind table is used + * primarily for expediting bind conflict resolution. + */ + struct inet_bind_hashbucket *bhash2; unsigned int bhash_size; /* The 2nd listener table hashed by local port and address */ @@ -182,14 +211,61 @@ inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net, void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket *tb); +bool inet_bind_bucket_match(const struct inet_bind_bucket *tb, + const struct net *net, unsigned short port, + int l3mdev); + +struct inet_bind2_bucket * +inet_bind2_bucket_create(struct kmem_cache *cachep, struct net *net, + struct inet_bind_hashbucket *head, + unsigned short port, int l3mdev, + const struct sock *sk); + +void inet_bind2_bucket_destroy(struct kmem_cache *cachep, + struct inet_bind2_bucket *tb); + +struct inet_bind2_bucket * +inet_bind2_bucket_find(const struct inet_bind_hashbucket *head, + const struct net *net, + unsigned short port, int l3mdev, + const struct sock *sk); + +bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket *tb, + const struct net *net, unsigned short port, + int l3mdev, const struct sock *sk); + static inline u32 inet_bhashfn(const struct net *net, const __u16 lport, const u32 bhash_size) { return (lport + net_hash_mix(net)) & (bhash_size - 1); } +static inline struct inet_bind_hashbucket * +inet_bhashfn_portaddr(const struct inet_hashinfo *hinfo, const struct sock *sk, + const struct net *net, unsigned short port) +{ + u32 hash; + +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) + hash = ipv6_portaddr_hash(net, &sk->sk_v6_rcv_saddr, port); + else +#endif + hash = ipv4_portaddr_hash(net, sk->sk_rcv_saddr, port); + return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)]; +} + +struct inet_bind_hashbucket * +inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, int port); + +/* This should be called whenever a socket's sk_rcv_saddr (ipv4) or + * sk_v6_rcv_saddr (ipv6) changes after it has been binded. The socket's + * rcv_saddr field should already have been updated when this is called. + */ +int inet_bhash2_update_saddr(struct inet_bind_hashbucket *prev_saddr, struct sock *sk); + void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, - const unsigned short snum); + struct inet_bind2_bucket *tb2, unsigned short port); /* Caller must disable local BH processing. */ int __inet_inherit_port(const struct sock *sk, struct sock *child); diff --git a/include/net/sock.h b/include/net/sock.h index d08cfe190a78..ca469980e006 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -348,6 +348,7 @@ struct sk_filter; * @sk_txtime_report_errors: set report errors mode for SO_TXTIME * @sk_txtime_unused: unused txtime flags * @ns_tracker: tracker for netns reference + * @sk_bind2_node: bind node in the bhash2 table */ struct sock { /* @@ -537,6 +538,7 @@ struct sock { #endif struct rcu_head sk_rcu; netns_tracker ns_tracker; + struct hlist_node sk_bind2_node; }; enum sk_pacing { @@ -870,6 +872,16 @@ static inline void sk_add_bind_node(struct sock *sk, hlist_add_head(&sk->sk_bind_node, list); } +static inline void __sk_del_bind2_node(struct sock *sk) +{ + __hlist_del(&sk->sk_bind2_node); +} + +static inline void sk_add_bind2_node(struct sock *sk, struct hlist_head *list) +{ + hlist_add_head(&sk->sk_bind2_node, list); +} + #define sk_for_each(__sk, list) \ hlist_for_each_entry(__sk, list, sk_node) #define sk_for_each_rcu(__sk, list) \ @@ -887,6 +899,8 @@ static inline void sk_add_bind_node(struct sock *sk, hlist_for_each_entry_safe(__sk, tmp, list, sk_node) #define sk_for_each_bound(__sk, list) \ hlist_for_each_entry(__sk, list, sk_bind_node) +#define sk_for_each_bound_bhash2(__sk, list) \ + hlist_for_each_entry(__sk, list, sk_bind2_node) /** * sk_for_each_entry_offset_rcu - iterate over a list at a given struct offset diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index da6e3b20cd75..6a6e121dc00c 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -45,10 +45,11 @@ static unsigned int dccp_v4_pernet_id __read_mostly; int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; + struct inet_bind_hashbucket *prev_addr_hashbucket = NULL; + __be32 daddr, nexthop, prev_sk_rcv_saddr; struct inet_sock *inet = inet_sk(sk); struct dccp_sock *dp = dccp_sk(sk); __be16 orig_sport, orig_dport; - __be32 daddr, nexthop; struct flowi4 *fl4; struct rtable *rt; int err; @@ -89,9 +90,29 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (inet_opt == NULL || !inet_opt->opt.srr) daddr = fl4->daddr; - if (inet->inet_saddr == 0) + if (inet->inet_saddr == 0) { + if (inet_csk(sk)->icsk_bind2_hash) { + prev_addr_hashbucket = + inet_bhashfn_portaddr(&dccp_hashinfo, sk, + sock_net(sk), + inet->inet_num); + prev_sk_rcv_saddr = sk->sk_rcv_saddr; + } inet->inet_saddr = fl4->saddr; + } + sk_rcv_saddr_set(sk, inet->inet_saddr); + + if (prev_addr_hashbucket) { + err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk); + if (err) { + inet->inet_saddr = 0; + sk_rcv_saddr_set(sk, prev_sk_rcv_saddr); + ip_rt_put(rt); + return err; + } + } + inet->inet_dport = usin->sin_port; sk_daddr_set(sk, daddr); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index fd44638ec16b..e57b43006074 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -934,8 +934,26 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, } if (saddr == NULL) { + struct inet_bind_hashbucket *prev_addr_hashbucket = NULL; + struct in6_addr prev_v6_rcv_saddr; + + if (icsk->icsk_bind2_hash) { + prev_addr_hashbucket = inet_bhashfn_portaddr(&dccp_hashinfo, + sk, sock_net(sk), + inet->inet_num); + prev_v6_rcv_saddr = sk->sk_v6_rcv_saddr; + } + saddr = &fl6.saddr; sk->sk_v6_rcv_saddr = *saddr; + + if (prev_addr_hashbucket) { + err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk); + if (err) { + sk->sk_v6_rcv_saddr = prev_v6_rcv_saddr; + goto failure; + } + } } /* set the source address */ diff --git a/net/dccp/proto.c b/net/dccp/proto.c index e13641c65f88..7cd4a6cc99fc 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1120,6 +1120,12 @@ static int __init dccp_init(void) SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL); if (!dccp_hashinfo.bind_bucket_cachep) goto out_free_hashinfo2; + dccp_hashinfo.bind2_bucket_cachep = + kmem_cache_create("dccp_bind2_bucket", + sizeof(struct inet_bind2_bucket), 0, + SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL); + if (!dccp_hashinfo.bind2_bucket_cachep) + goto out_free_bind_bucket_cachep; /* * Size and allocate the main established and bind bucket @@ -1150,7 +1156,7 @@ static int __init dccp_init(void) if (!dccp_hashinfo.ehash) { DCCP_CRIT("Failed to allocate DCCP established hash table"); - goto out_free_bind_bucket_cachep; + goto out_free_bind2_bucket_cachep; } for (i = 0; i <= dccp_hashinfo.ehash_mask; i++) @@ -1176,14 +1182,24 @@ static int __init dccp_init(void) goto out_free_dccp_locks; } + dccp_hashinfo.bhash2 = (struct inet_bind_hashbucket *) + __get_free_pages(GFP_ATOMIC | __GFP_NOWARN, bhash_order); + + if (!dccp_hashinfo.bhash2) { + DCCP_CRIT("Failed to allocate DCCP bind2 hash table"); + goto out_free_dccp_bhash; + } + for (i = 0; i < dccp_hashinfo.bhash_size; i++) { spin_lock_init(&dccp_hashinfo.bhash[i].lock); INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain); + spin_lock_init(&dccp_hashinfo.bhash2[i].lock); + INIT_HLIST_HEAD(&dccp_hashinfo.bhash2[i].chain); } rc = dccp_mib_init(); if (rc) - goto out_free_dccp_bhash; + goto out_free_dccp_bhash2; rc = dccp_ackvec_init(); if (rc) @@ -1207,30 +1223,38 @@ out_ackvec_exit: dccp_ackvec_exit(); out_free_dccp_mib: dccp_mib_exit(); +out_free_dccp_bhash2: + free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order); out_free_dccp_bhash: free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order); out_free_dccp_locks: inet_ehash_locks_free(&dccp_hashinfo); out_free_dccp_ehash: free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order); +out_free_bind2_bucket_cachep: + kmem_cache_destroy(dccp_hashinfo.bind2_bucket_cachep); out_free_bind_bucket_cachep: kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); out_free_hashinfo2: inet_hashinfo2_free_mod(&dccp_hashinfo); out_fail: dccp_hashinfo.bhash = NULL; + dccp_hashinfo.bhash2 = NULL; dccp_hashinfo.ehash = NULL; dccp_hashinfo.bind_bucket_cachep = NULL; + dccp_hashinfo.bind2_bucket_cachep = NULL; return rc; } static void __exit dccp_fini(void) { + int bhash_order = get_order(dccp_hashinfo.bhash_size * + sizeof(struct inet_bind_hashbucket)); + ccid_cleanup_builtins(); dccp_mib_exit(); - free_pages((unsigned long)dccp_hashinfo.bhash, - get_order(dccp_hashinfo.bhash_size * - sizeof(struct inet_bind_hashbucket))); + free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order); + free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order); free_pages((unsigned long)dccp_hashinfo.ehash, get_order((dccp_hashinfo.ehash_mask + 1) * sizeof(struct inet_ehash_bucket))); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 3ca0cc467886..2e6a3cb06815 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1219,6 +1219,7 @@ EXPORT_SYMBOL(inet_unregister_protosw); static int inet_sk_reselect_saddr(struct sock *sk) { + struct inet_bind_hashbucket *prev_addr_hashbucket; struct inet_sock *inet = inet_sk(sk); __be32 old_saddr = inet->inet_saddr; __be32 daddr = inet->inet_daddr; @@ -1226,6 +1227,7 @@ static int inet_sk_reselect_saddr(struct sock *sk) struct rtable *rt; __be32 new_saddr; struct ip_options_rcu *inet_opt; + int err; inet_opt = rcu_dereference_protected(inet->inet_opt, lockdep_sock_is_held(sk)); @@ -1240,20 +1242,34 @@ static int inet_sk_reselect_saddr(struct sock *sk) if (IS_ERR(rt)) return PTR_ERR(rt); - sk_setup_caps(sk, &rt->dst); - new_saddr = fl4->saddr; - if (new_saddr == old_saddr) + if (new_saddr == old_saddr) { + sk_setup_caps(sk, &rt->dst); return 0; + } + + prev_addr_hashbucket = + inet_bhashfn_portaddr(sk->sk_prot->h.hashinfo, sk, + sock_net(sk), inet->inet_num); + + inet->inet_saddr = inet->inet_rcv_saddr = new_saddr; + + err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk); + if (err) { + inet->inet_saddr = old_saddr; + inet->inet_rcv_saddr = old_saddr; + ip_rt_put(rt); + return err; + } + + sk_setup_caps(sk, &rt->dst); if (READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) > 1) { pr_info("%s(): shifting inet->saddr from %pI4 to %pI4\n", __func__, &old_saddr, &new_saddr); } - inet->inet_saddr = inet->inet_rcv_saddr = new_saddr; - /* * XXX The only one ugly spot where we need to * XXX really change the sockets identity after diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index eb31c7158b39..f0038043b661 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -130,14 +130,75 @@ void inet_get_local_port_range(struct net *net, int *low, int *high) } EXPORT_SYMBOL(inet_get_local_port_range); +static bool inet_use_bhash2_on_bind(const struct sock *sk) +{ +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) { + int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr); + + return addr_type != IPV6_ADDR_ANY && + addr_type != IPV6_ADDR_MAPPED; + } +#endif + return sk->sk_rcv_saddr != htonl(INADDR_ANY); +} + +static bool inet_bind_conflict(const struct sock *sk, struct sock *sk2, + kuid_t sk_uid, bool relax, + bool reuseport_cb_ok, bool reuseport_ok) +{ + int bound_dev_if2; + + if (sk == sk2) + return false; + + bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if); + + if (!sk->sk_bound_dev_if || !bound_dev_if2 || + sk->sk_bound_dev_if == bound_dev_if2) { + if (sk->sk_reuse && sk2->sk_reuse && + sk2->sk_state != TCP_LISTEN) { + if (!relax || (!reuseport_ok && sk->sk_reuseport && + sk2->sk_reuseport && reuseport_cb_ok && + (sk2->sk_state == TCP_TIME_WAIT || + uid_eq(sk_uid, sock_i_uid(sk2))))) + return true; + } else if (!reuseport_ok || !sk->sk_reuseport || + !sk2->sk_reuseport || !reuseport_cb_ok || + (sk2->sk_state != TCP_TIME_WAIT && + !uid_eq(sk_uid, sock_i_uid(sk2)))) { + return true; + } + } + return false; +} + +static bool inet_bhash2_conflict(const struct sock *sk, + const struct inet_bind2_bucket *tb2, + kuid_t sk_uid, + bool relax, bool reuseport_cb_ok, + bool reuseport_ok) +{ + struct sock *sk2; + + sk_for_each_bound_bhash2(sk2, &tb2->owners) { + if (sk->sk_family == AF_INET && ipv6_only_sock(sk2)) + continue; + + if (inet_bind_conflict(sk, sk2, sk_uid, relax, + reuseport_cb_ok, reuseport_ok)) + return true; + } + return false; +} + +/* This should be called only when the tb and tb2 hashbuckets' locks are held */ static int inet_csk_bind_conflict(const struct sock *sk, const struct inet_bind_bucket *tb, + const struct inet_bind2_bucket *tb2, /* may be null */ bool relax, bool reuseport_ok) { - struct sock *sk2; bool reuseport_cb_ok; - bool reuse = sk->sk_reuse; - bool reuseport = !!sk->sk_reuseport; struct sock_reuseport *reuseport_cb; kuid_t uid = sock_i_uid((struct sock *)sk); @@ -150,55 +211,87 @@ static int inet_csk_bind_conflict(const struct sock *sk, /* * Unlike other sk lookup places we do not check * for sk_net here, since _all_ the socks listed - * in tb->owners list belong to the same net - the - * one this bucket belongs to. + * in tb->owners and tb2->owners list belong + * to the same net - the one this bucket belongs to. */ - sk_for_each_bound(sk2, &tb->owners) { - int bound_dev_if2; + if (!inet_use_bhash2_on_bind(sk)) { + struct sock *sk2; - if (sk == sk2) - continue; - bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if); - if ((!sk->sk_bound_dev_if || - !bound_dev_if2 || - sk->sk_bound_dev_if == bound_dev_if2)) { - if (reuse && sk2->sk_reuse && - sk2->sk_state != TCP_LISTEN) { - if ((!relax || - (!reuseport_ok && - reuseport && sk2->sk_reuseport && - reuseport_cb_ok && - (sk2->sk_state == TCP_TIME_WAIT || - uid_eq(uid, sock_i_uid(sk2))))) && - inet_rcv_saddr_equal(sk, sk2, true)) - break; - } else if (!reuseport_ok || - !reuseport || !sk2->sk_reuseport || - !reuseport_cb_ok || - (sk2->sk_state != TCP_TIME_WAIT && - !uid_eq(uid, sock_i_uid(sk2)))) { - if (inet_rcv_saddr_equal(sk, sk2, true)) - break; - } - } + sk_for_each_bound(sk2, &tb->owners) + if (inet_bind_conflict(sk, sk2, uid, relax, + reuseport_cb_ok, reuseport_ok) && + inet_rcv_saddr_equal(sk, sk2, true)) + return true; + + return false; + } + + /* Conflicts with an existing IPV6_ADDR_ANY (if ipv6) or INADDR_ANY (if + * ipv4) should have been checked already. We need to do these two + * checks separately because their spinlocks have to be acquired/released + * independently of each other, to prevent possible deadlocks + */ + return tb2 && inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok, + reuseport_ok); +} + +/* Determine if there is a bind conflict with an existing IPV6_ADDR_ANY (if ipv6) or + * INADDR_ANY (if ipv4) socket. + * + * Caller must hold bhash hashbucket lock with local bh disabled, to protect + * against concurrent binds on the port for addr any + */ +static bool inet_bhash2_addr_any_conflict(const struct sock *sk, int port, int l3mdev, + bool relax, bool reuseport_ok) +{ + kuid_t uid = sock_i_uid((struct sock *)sk); + const struct net *net = sock_net(sk); + struct sock_reuseport *reuseport_cb; + struct inet_bind_hashbucket *head2; + struct inet_bind2_bucket *tb2; + bool reuseport_cb_ok; + + rcu_read_lock(); + reuseport_cb = rcu_dereference(sk->sk_reuseport_cb); + /* paired with WRITE_ONCE() in __reuseport_(add|detach)_closed_sock */ + reuseport_cb_ok = !reuseport_cb || READ_ONCE(reuseport_cb->num_closed_socks); + rcu_read_unlock(); + + head2 = inet_bhash2_addr_any_hashbucket(sk, net, port); + + spin_lock(&head2->lock); + + inet_bind_bucket_for_each(tb2, &head2->chain) + if (inet_bind2_bucket_match_addr_any(tb2, net, port, l3mdev, sk)) + break; + + if (tb2 && inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok, + reuseport_ok)) { + spin_unlock(&head2->lock); + return true; } - return sk2 != NULL; + + spin_unlock(&head2->lock); + return false; } /* * Find an open port number for the socket. Returns with the - * inet_bind_hashbucket lock held. + * inet_bind_hashbucket locks held if successful. */ static struct inet_bind_hashbucket * -inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret, int *port_ret) +inet_csk_find_open_port(const struct sock *sk, struct inet_bind_bucket **tb_ret, + struct inet_bind2_bucket **tb2_ret, + struct inet_bind_hashbucket **head2_ret, int *port_ret) { struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo; int port = 0; - struct inet_bind_hashbucket *head; + struct inet_bind_hashbucket *head, *head2; struct net *net = sock_net(sk); bool relax = false; int i, low, high, attempt_half; + struct inet_bind2_bucket *tb2; struct inet_bind_bucket *tb; u32 remaining, offset; int l3mdev; @@ -239,11 +332,20 @@ other_parity_scan: head = &hinfo->bhash[inet_bhashfn(net, port, hinfo->bhash_size)]; spin_lock_bh(&head->lock); + if (inet_use_bhash2_on_bind(sk)) { + if (inet_bhash2_addr_any_conflict(sk, port, l3mdev, relax, false)) + goto next_port; + } + + head2 = inet_bhashfn_portaddr(hinfo, sk, net, port); + spin_lock(&head2->lock); + tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk); inet_bind_bucket_for_each(tb, &head->chain) - if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev && - tb->port == port) { - if (!inet_csk_bind_conflict(sk, tb, relax, false)) + if (inet_bind_bucket_match(tb, net, port, l3mdev)) { + if (!inet_csk_bind_conflict(sk, tb, tb2, + relax, false)) goto success; + spin_unlock(&head2->lock); goto next_port; } tb = NULL; @@ -272,6 +374,8 @@ next_port: success: *port_ret = port; *tb_ret = tb; + *tb2_ret = tb2; + *head2_ret = head2; return head; } @@ -368,53 +472,95 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN; struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo; int ret = 1, port = snum; - struct inet_bind_hashbucket *head; struct net *net = sock_net(sk); + bool found_port = false, check_bind_conflict = true; + bool bhash_created = false, bhash2_created = false; + struct inet_bind_hashbucket *head, *head2; + struct inet_bind2_bucket *tb2 = NULL; struct inet_bind_bucket *tb = NULL; + bool head2_lock_acquired = false; int l3mdev; l3mdev = inet_sk_bound_l3mdev(sk); if (!port) { - head = inet_csk_find_open_port(sk, &tb, &port); + head = inet_csk_find_open_port(sk, &tb, &tb2, &head2, &port); if (!head) return ret; + + head2_lock_acquired = true; + + if (tb && tb2) + goto success; + found_port = true; + } else { + head = &hinfo->bhash[inet_bhashfn(net, port, + hinfo->bhash_size)]; + spin_lock_bh(&head->lock); + inet_bind_bucket_for_each(tb, &head->chain) + if (inet_bind_bucket_match(tb, net, port, l3mdev)) + break; + } + + if (!tb) { + tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, net, + head, port, l3mdev); if (!tb) - goto tb_not_found; - goto success; + goto fail_unlock; + bhash_created = true; } - head = &hinfo->bhash[inet_bhashfn(net, port, - hinfo->bhash_size)]; - spin_lock_bh(&head->lock); - inet_bind_bucket_for_each(tb, &head->chain) - if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev && - tb->port == port) - goto tb_found; -tb_not_found: - tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, - net, head, port, l3mdev); - if (!tb) - goto fail_unlock; -tb_found: - if (!hlist_empty(&tb->owners)) { - if (sk->sk_reuse == SK_FORCE_REUSE) - goto success; - if ((tb->fastreuse > 0 && reuse) || - sk_reuseport_match(tb, sk)) - goto success; - if (inet_csk_bind_conflict(sk, tb, true, true)) + if (!found_port) { + if (!hlist_empty(&tb->owners)) { + if (sk->sk_reuse == SK_FORCE_REUSE || + (tb->fastreuse > 0 && reuse) || + sk_reuseport_match(tb, sk)) + check_bind_conflict = false; + } + + if (check_bind_conflict && inet_use_bhash2_on_bind(sk)) { + if (inet_bhash2_addr_any_conflict(sk, port, l3mdev, true, true)) + goto fail_unlock; + } + + head2 = inet_bhashfn_portaddr(hinfo, sk, net, port); + spin_lock(&head2->lock); + head2_lock_acquired = true; + tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk); + } + + if (!tb2) { + tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep, + net, head2, port, l3mdev, sk); + if (!tb2) goto fail_unlock; + bhash2_created = true; } + + if (!found_port && check_bind_conflict) { + if (inet_csk_bind_conflict(sk, tb, tb2, true, true)) + goto fail_unlock; + } + success: inet_csk_update_fastreuse(tb, sk); if (!inet_csk(sk)->icsk_bind_hash) - inet_bind_hash(sk, tb, port); + inet_bind_hash(sk, tb, tb2, port); WARN_ON(inet_csk(sk)->icsk_bind_hash != tb); + WARN_ON(inet_csk(sk)->icsk_bind2_hash != tb2); ret = 0; fail_unlock: + if (ret) { + if (bhash_created) + inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb); + if (bhash2_created) + inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep, + tb2); + } + if (head2_lock_acquired) + spin_unlock(&head2->lock); spin_unlock_bh(&head->lock); return ret; } @@ -962,6 +1108,7 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, inet_sk_set_state(newsk, TCP_SYN_RECV); newicsk->icsk_bind_hash = NULL; + newicsk->icsk_bind2_hash = NULL; inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port; inet_sk(newsk)->inet_num = inet_rsk(req)->ir_num; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index b9d995b5ce24..60d77e234a68 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -92,12 +92,75 @@ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket } } +bool inet_bind_bucket_match(const struct inet_bind_bucket *tb, const struct net *net, + unsigned short port, int l3mdev) +{ + return net_eq(ib_net(tb), net) && tb->port == port && + tb->l3mdev == l3mdev; +} + +static void inet_bind2_bucket_init(struct inet_bind2_bucket *tb, + struct net *net, + struct inet_bind_hashbucket *head, + unsigned short port, int l3mdev, + const struct sock *sk) +{ + write_pnet(&tb->ib_net, net); + tb->l3mdev = l3mdev; + tb->port = port; +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) + tb->v6_rcv_saddr = sk->sk_v6_rcv_saddr; + else +#endif + tb->rcv_saddr = sk->sk_rcv_saddr; + INIT_HLIST_HEAD(&tb->owners); + hlist_add_head(&tb->node, &head->chain); +} + +struct inet_bind2_bucket *inet_bind2_bucket_create(struct kmem_cache *cachep, + struct net *net, + struct inet_bind_hashbucket *head, + unsigned short port, + int l3mdev, + const struct sock *sk) +{ + struct inet_bind2_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC); + + if (tb) + inet_bind2_bucket_init(tb, net, head, port, l3mdev, sk); + + return tb; +} + +/* Caller must hold hashbucket lock for this tb with local BH disabled */ +void inet_bind2_bucket_destroy(struct kmem_cache *cachep, struct inet_bind2_bucket *tb) +{ + if (hlist_empty(&tb->owners)) { + __hlist_del(&tb->node); + kmem_cache_free(cachep, tb); + } +} + +static bool inet_bind2_bucket_addr_match(const struct inet_bind2_bucket *tb2, + const struct sock *sk) +{ +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) + return ipv6_addr_equal(&tb2->v6_rcv_saddr, + &sk->sk_v6_rcv_saddr); +#endif + return tb2->rcv_saddr == sk->sk_rcv_saddr; +} + void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, - const unsigned short snum) + struct inet_bind2_bucket *tb2, unsigned short port) { - inet_sk(sk)->inet_num = snum; + inet_sk(sk)->inet_num = port; sk_add_bind_node(sk, &tb->owners); inet_csk(sk)->icsk_bind_hash = tb; + sk_add_bind2_node(sk, &tb2->owners); + inet_csk(sk)->icsk_bind2_hash = tb2; } /* @@ -109,6 +172,9 @@ static void __inet_put_port(struct sock *sk) const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->inet_num, hashinfo->bhash_size); struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; + struct inet_bind_hashbucket *head2 = + inet_bhashfn_portaddr(hashinfo, sk, sock_net(sk), + inet_sk(sk)->inet_num); struct inet_bind_bucket *tb; spin_lock(&head->lock); @@ -117,6 +183,17 @@ static void __inet_put_port(struct sock *sk) inet_csk(sk)->icsk_bind_hash = NULL; inet_sk(sk)->inet_num = 0; inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); + + spin_lock(&head2->lock); + if (inet_csk(sk)->icsk_bind2_hash) { + struct inet_bind2_bucket *tb2 = inet_csk(sk)->icsk_bind2_hash; + + __sk_del_bind2_node(sk); + inet_csk(sk)->icsk_bind2_hash = NULL; + inet_bind2_bucket_destroy(hashinfo->bind2_bucket_cachep, tb2); + } + spin_unlock(&head2->lock); + spin_unlock(&head->lock); } @@ -135,12 +212,21 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child) const int bhash = inet_bhashfn(sock_net(sk), port, table->bhash_size); struct inet_bind_hashbucket *head = &table->bhash[bhash]; + struct inet_bind_hashbucket *head2 = + inet_bhashfn_portaddr(table, child, sock_net(sk), port); + bool created_inet_bind_bucket = false; + bool update_fastreuse = false; + struct net *net = sock_net(sk); + struct inet_bind2_bucket *tb2; struct inet_bind_bucket *tb; int l3mdev; spin_lock(&head->lock); + spin_lock(&head2->lock); tb = inet_csk(sk)->icsk_bind_hash; - if (unlikely(!tb)) { + tb2 = inet_csk(sk)->icsk_bind2_hash; + if (unlikely(!tb || !tb2)) { + spin_unlock(&head2->lock); spin_unlock(&head->lock); return -ENOENT; } @@ -153,25 +239,49 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child) * as that of the child socket. We have to look up or * create a new bind bucket for the child here. */ inet_bind_bucket_for_each(tb, &head->chain) { - if (net_eq(ib_net(tb), sock_net(sk)) && - tb->l3mdev == l3mdev && tb->port == port) + if (inet_bind_bucket_match(tb, net, port, l3mdev)) break; } if (!tb) { tb = inet_bind_bucket_create(table->bind_bucket_cachep, - sock_net(sk), head, port, - l3mdev); + net, head, port, l3mdev); if (!tb) { + spin_unlock(&head2->lock); spin_unlock(&head->lock); return -ENOMEM; } + created_inet_bind_bucket = true; + } + update_fastreuse = true; + + goto bhash2_find; + } else if (!inet_bind2_bucket_addr_match(tb2, child)) { + l3mdev = inet_sk_bound_l3mdev(sk); + +bhash2_find: + tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, child); + if (!tb2) { + tb2 = inet_bind2_bucket_create(table->bind2_bucket_cachep, + net, head2, port, + l3mdev, child); + if (!tb2) + goto error; } - inet_csk_update_fastreuse(tb, child); } - inet_bind_hash(child, tb, port); + if (update_fastreuse) + inet_csk_update_fastreuse(tb, child); + inet_bind_hash(child, tb, tb2, port); + spin_unlock(&head2->lock); spin_unlock(&head->lock); return 0; + +error: + if (created_inet_bind_bucket) + inet_bind_bucket_destroy(table->bind_bucket_cachep, tb); + spin_unlock(&head2->lock); + spin_unlock(&head->lock); + return -ENOMEM; } EXPORT_SYMBOL_GPL(__inet_inherit_port); @@ -675,6 +785,112 @@ void inet_unhash(struct sock *sk) } EXPORT_SYMBOL_GPL(inet_unhash); +static bool inet_bind2_bucket_match(const struct inet_bind2_bucket *tb, + const struct net *net, unsigned short port, + int l3mdev, const struct sock *sk) +{ +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) + return net_eq(ib2_net(tb), net) && tb->port == port && + tb->l3mdev == l3mdev && + ipv6_addr_equal(&tb->v6_rcv_saddr, &sk->sk_v6_rcv_saddr); + else +#endif + return net_eq(ib2_net(tb), net) && tb->port == port && + tb->l3mdev == l3mdev && tb->rcv_saddr == sk->sk_rcv_saddr; +} + +bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket *tb, const struct net *net, + unsigned short port, int l3mdev, const struct sock *sk) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct in6_addr addr_any = {}; + + if (sk->sk_family == AF_INET6) + return net_eq(ib2_net(tb), net) && tb->port == port && + tb->l3mdev == l3mdev && + ipv6_addr_equal(&tb->v6_rcv_saddr, &addr_any); + else +#endif + return net_eq(ib2_net(tb), net) && tb->port == port && + tb->l3mdev == l3mdev && tb->rcv_saddr == 0; +} + +/* The socket's bhash2 hashbucket spinlock must be held when this is called */ +struct inet_bind2_bucket * +inet_bind2_bucket_find(const struct inet_bind_hashbucket *head, const struct net *net, + unsigned short port, int l3mdev, const struct sock *sk) +{ + struct inet_bind2_bucket *bhash2 = NULL; + + inet_bind_bucket_for_each(bhash2, &head->chain) + if (inet_bind2_bucket_match(bhash2, net, port, l3mdev, sk)) + break; + + return bhash2; +} + +struct inet_bind_hashbucket * +inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, int port) +{ + struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo; + u32 hash; +#if IS_ENABLED(CONFIG_IPV6) + struct in6_addr addr_any = {}; + + if (sk->sk_family == AF_INET6) + hash = ipv6_portaddr_hash(net, &addr_any, port); + else +#endif + hash = ipv4_portaddr_hash(net, 0, port); + + return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)]; +} + +int inet_bhash2_update_saddr(struct inet_bind_hashbucket *prev_saddr, struct sock *sk) +{ + struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo; + struct inet_bind2_bucket *tb2, *new_tb2; + int l3mdev = inet_sk_bound_l3mdev(sk); + struct inet_bind_hashbucket *head2; + int port = inet_sk(sk)->inet_num; + struct net *net = sock_net(sk); + + /* Allocate a bind2 bucket ahead of time to avoid permanently putting + * the bhash2 table in an inconsistent state if a new tb2 bucket + * allocation fails. + */ + new_tb2 = kmem_cache_alloc(hinfo->bind2_bucket_cachep, GFP_ATOMIC); + if (!new_tb2) + return -ENOMEM; + + head2 = inet_bhashfn_portaddr(hinfo, sk, net, port); + + if (prev_saddr) { + spin_lock_bh(&prev_saddr->lock); + __sk_del_bind2_node(sk); + inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep, + inet_csk(sk)->icsk_bind2_hash); + spin_unlock_bh(&prev_saddr->lock); + } + + spin_lock_bh(&head2->lock); + tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk); + if (!tb2) { + tb2 = new_tb2; + inet_bind2_bucket_init(tb2, net, head2, port, l3mdev, sk); + } + sk_add_bind2_node(sk, &tb2->owners); + inet_csk(sk)->icsk_bind2_hash = tb2; + spin_unlock_bh(&head2->lock); + + if (tb2 != new_tb2) + kmem_cache_free(hinfo->bind2_bucket_cachep, new_tb2); + + return 0; +} +EXPORT_SYMBOL_GPL(inet_bhash2_update_saddr); + /* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm * Note that we use 32bit integers (vs RFC 'short integers') * because 2^16 is not a multiple of num_ephemeral and this @@ -694,11 +910,13 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *, __u16, struct inet_timewait_sock **)) { struct inet_hashinfo *hinfo = death_row->hashinfo; + struct inet_bind_hashbucket *head, *head2; struct inet_timewait_sock *tw = NULL; - struct inet_bind_hashbucket *head; int port = inet_sk(sk)->inet_num; struct net *net = sock_net(sk); + struct inet_bind2_bucket *tb2; struct inet_bind_bucket *tb; + bool tb_created = false; u32 remaining, offset; int ret, i, low, high; int l3mdev; @@ -755,8 +973,7 @@ other_parity_scan: * the established check is already unique enough. */ inet_bind_bucket_for_each(tb, &head->chain) { - if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev && - tb->port == port) { + if (inet_bind_bucket_match(tb, net, port, l3mdev)) { if (tb->fastreuse >= 0 || tb->fastreuseport >= 0) goto next_port; @@ -774,6 +991,7 @@ other_parity_scan: spin_unlock_bh(&head->lock); return -ENOMEM; } + tb_created = true; tb->fastreuse = -1; tb->fastreuseport = -1; goto ok; @@ -789,6 +1007,20 @@ next_port: return -EADDRNOTAVAIL; ok: + /* Find the corresponding tb2 bucket since we need to + * add the socket to the bhash2 table as well + */ + head2 = inet_bhashfn_portaddr(hinfo, sk, net, port); + spin_lock(&head2->lock); + + tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk); + if (!tb2) { + tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep, net, + head2, port, l3mdev, sk); + if (!tb2) + goto error; + } + /* Here we want to add a little bit of randomness to the next source * port that will be chosen. We use a max() with a random here so that * on low contention the randomness is maximal and on high contention @@ -798,7 +1030,10 @@ ok: WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2); /* Head lock still held and bh's disabled */ - inet_bind_hash(sk, tb, port); + inet_bind_hash(sk, tb, tb2, port); + + spin_unlock(&head2->lock); + if (sk_unhashed(sk)) { inet_sk(sk)->inet_sport = htons(port); inet_ehash_nolisten(sk, (struct sock *)tw, NULL); @@ -810,6 +1045,13 @@ ok: inet_twsk_deschedule_put(tw); local_bh_enable(); return 0; + +error: + spin_unlock(&head2->lock); + if (tb_created) + inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb); + spin_unlock_bh(&head->lock); + return -ENOMEM; } /* diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ba62f8e8bd15..baf6adb723ad 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4742,6 +4742,12 @@ void __init tcp_init(void) SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT, NULL); + tcp_hashinfo.bind2_bucket_cachep = + kmem_cache_create("tcp_bind2_bucket", + sizeof(struct inet_bind2_bucket), 0, + SLAB_HWCACHE_ALIGN | SLAB_PANIC | + SLAB_ACCOUNT, + NULL); /* Size and allocate the main established and bind bucket * hash tables. @@ -4765,7 +4771,7 @@ void __init tcp_init(void) panic("TCP: failed to alloc ehash_locks"); tcp_hashinfo.bhash = alloc_large_system_hash("TCP bind", - sizeof(struct inet_bind_hashbucket), + 2 * sizeof(struct inet_bind_hashbucket), tcp_hashinfo.ehash_mask + 1, 17, /* one slot per 128 KB of memory */ 0, @@ -4774,9 +4780,12 @@ void __init tcp_init(void) 0, 64 * 1024); tcp_hashinfo.bhash_size = 1U << tcp_hashinfo.bhash_size; + tcp_hashinfo.bhash2 = tcp_hashinfo.bhash + tcp_hashinfo.bhash_size; for (i = 0; i < tcp_hashinfo.bhash_size; i++) { spin_lock_init(&tcp_hashinfo.bhash[i].lock); INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain); + spin_lock_init(&tcp_hashinfo.bhash2[i].lock); + INIT_HLIST_HEAD(&tcp_hashinfo.bhash2[i].chain); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 0c83780dc9bf..cc2ad67f75be 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -199,11 +199,12 @@ static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr, /* This will initiate an outgoing connection. */ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { + struct inet_bind_hashbucket *prev_addr_hashbucket = NULL; struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; + __be32 daddr, nexthop, prev_sk_rcv_saddr; struct inet_sock *inet = inet_sk(sk); struct tcp_sock *tp = tcp_sk(sk); __be16 orig_sport, orig_dport; - __be32 daddr, nexthop; struct flowi4 *fl4; struct rtable *rt; int err; @@ -246,10 +247,28 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (!inet_opt || !inet_opt->opt.srr) daddr = fl4->daddr; - if (!inet->inet_saddr) + if (!inet->inet_saddr) { + if (inet_csk(sk)->icsk_bind2_hash) { + prev_addr_hashbucket = inet_bhashfn_portaddr(&tcp_hashinfo, + sk, sock_net(sk), + inet->inet_num); + prev_sk_rcv_saddr = sk->sk_rcv_saddr; + } inet->inet_saddr = fl4->saddr; + } + sk_rcv_saddr_set(sk, inet->inet_saddr); + if (prev_addr_hashbucket) { + err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk); + if (err) { + inet->inet_saddr = 0; + sk_rcv_saddr_set(sk, prev_sk_rcv_saddr); + ip_rt_put(rt); + return err; + } + } + if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) { /* Reset inherited state */ tp->rx_opt.ts_recent = 0; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index e54eee80ce5f..ff5c4fc135fc 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -287,8 +287,25 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, } if (!saddr) { + struct inet_bind_hashbucket *prev_addr_hashbucket = NULL; + struct in6_addr prev_v6_rcv_saddr; + + if (icsk->icsk_bind2_hash) { + prev_addr_hashbucket = inet_bhashfn_portaddr(&tcp_hashinfo, + sk, sock_net(sk), + inet->inet_num); + prev_v6_rcv_saddr = sk->sk_v6_rcv_saddr; + } saddr = &fl6.saddr; sk->sk_v6_rcv_saddr = *saddr; + + if (prev_addr_hashbucket) { + err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk); + if (err) { + sk->sk_v6_rcv_saddr = prev_v6_rcv_saddr; + goto failure; + } + } } /* set the source address */ -- cgit v1.2.3-70-g09d2 From 35ffb66547295c72650978f9c28e670e014d0957 Mon Sep 17 00:00:00 2001 From: Richard Gobert Date: Tue, 23 Aug 2022 09:10:49 +0200 Subject: net: gro: skb_gro_header helper function Introduce a simple helper function to replace a common pattern. When accessing the GRO header, we fetch the pointer from frag0, then test its validity and fetch it from the skb when necessary. This leads to the pattern skb_gro_header_fast -> skb_gro_header_hard -> skb_gro_header_slow recurring many times throughout GRO code. This patch replaces these patterns with a single inlined function call, improving code readability. Signed-off-by: Richard Gobert Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20220823071034.GA56142@debian Signed-off-by: Paolo Abeni --- drivers/net/geneve.c | 9 +++------ drivers/net/vxlan/vxlan_core.c | 9 +++------ include/net/gro.h | 33 ++++++++++++++++++--------------- net/8021q/vlan_core.c | 9 +++------ net/ethernet/eth.c | 9 +++------ net/ipv4/af_inet.c | 9 +++------ net/ipv4/fou.c | 9 +++------ net/ipv4/gre_offload.c | 9 +++------ net/ipv4/tcp_offload.c | 9 +++------ net/ipv6/ip6_offload.c | 9 +++------ 10 files changed, 45 insertions(+), 69 deletions(-) (limited to 'include/net') diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 7962c37b3f14..01aa94776ce3 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -503,12 +503,9 @@ static struct sk_buff *geneve_gro_receive(struct sock *sk, off_gnv = skb_gro_offset(skb); hlen = off_gnv + sizeof(*gh); - gh = skb_gro_header_fast(skb, off_gnv); - if (skb_gro_header_hard(skb, hlen)) { - gh = skb_gro_header_slow(skb, hlen, off_gnv); - if (unlikely(!gh)) - goto out; - } + gh = skb_gro_header(skb, hlen, off_gnv); + if (unlikely(!gh)) + goto out; if (gh->ver != GENEVE_VER || gh->oam) goto out; diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index c3285242f74f..1a47d04f5d1a 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -713,12 +713,9 @@ static struct sk_buff *vxlan_gro_receive(struct sock *sk, off_vx = skb_gro_offset(skb); hlen = off_vx + sizeof(*vh); - vh = skb_gro_header_fast(skb, off_vx); - if (skb_gro_header_hard(skb, hlen)) { - vh = skb_gro_header_slow(skb, hlen, off_vx); - if (unlikely(!vh)) - goto out; - } + vh = skb_gro_header(skb, hlen, off_vx); + if (unlikely(!vh)) + goto out; skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr)); diff --git a/include/net/gro.h b/include/net/gro.h index 867656b0739c..5bf15c212434 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -160,6 +160,17 @@ static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen, return skb->data + offset; } +static inline void *skb_gro_header(struct sk_buff *skb, + unsigned int hlen, unsigned int offset) +{ + void *ptr; + + ptr = skb_gro_header_fast(skb, offset); + if (skb_gro_header_hard(skb, hlen)) + ptr = skb_gro_header_slow(skb, hlen, offset); + return ptr; +} + static inline void *skb_gro_network_header(struct sk_buff *skb) { return (NAPI_GRO_CB(skb)->frag0 ?: skb->data) + @@ -301,12 +312,9 @@ static inline void *skb_gro_remcsum_process(struct sk_buff *skb, void *ptr, return ptr; } - ptr = skb_gro_header_fast(skb, off); - if (skb_gro_header_hard(skb, off + plen)) { - ptr = skb_gro_header_slow(skb, off + plen, off); - if (!ptr) - return NULL; - } + ptr = skb_gro_header(skb, off + plen, off); + if (!ptr) + return NULL; delta = remcsum_adjust(ptr + hdrlen, NAPI_GRO_CB(skb)->csum, start, offset); @@ -329,12 +337,9 @@ static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb, if (!grc->delta) return; - ptr = skb_gro_header_fast(skb, grc->offset); - if (skb_gro_header_hard(skb, grc->offset + sizeof(u16))) { - ptr = skb_gro_header_slow(skb, plen, grc->offset); - if (!ptr) - return; - } + ptr = skb_gro_header(skb, plen, grc->offset); + if (!ptr) + return; remcsum_unadjust((__sum16 *)ptr, grc->delta); } @@ -405,9 +410,7 @@ static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb) off = skb_gro_offset(skb); hlen = off + sizeof(*uh); - uh = skb_gro_header_fast(skb, off); - if (skb_gro_header_hard(skb, hlen)) - uh = skb_gro_header_slow(skb, hlen, off); + uh = skb_gro_header(skb, hlen, off); return uh; } diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 5aa8144101dc..0beb44f2fe1f 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -467,12 +467,9 @@ static struct sk_buff *vlan_gro_receive(struct list_head *head, off_vlan = skb_gro_offset(skb); hlen = off_vlan + sizeof(*vhdr); - vhdr = skb_gro_header_fast(skb, off_vlan); - if (skb_gro_header_hard(skb, hlen)) { - vhdr = skb_gro_header_slow(skb, hlen, off_vlan); - if (unlikely(!vhdr)) - goto out; - } + vhdr = skb_gro_header(skb, hlen, off_vlan); + if (unlikely(!vhdr)) + goto out; type = vhdr->h_vlan_encapsulated_proto; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 62b89d6f54fd..e02daa74e833 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -414,12 +414,9 @@ struct sk_buff *eth_gro_receive(struct list_head *head, struct sk_buff *skb) off_eth = skb_gro_offset(skb); hlen = off_eth + sizeof(*eh); - eh = skb_gro_header_fast(skb, off_eth); - if (skb_gro_header_hard(skb, hlen)) { - eh = skb_gro_header_slow(skb, hlen, off_eth); - if (unlikely(!eh)) - goto out; - } + eh = skb_gro_header(skb, hlen, off_eth); + if (unlikely(!eh)) + goto out; flush = 0; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 2e6a3cb06815..d3ab1ae32ef5 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1464,12 +1464,9 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb) off = skb_gro_offset(skb); hlen = off + sizeof(*iph); - iph = skb_gro_header_fast(skb, off); - if (skb_gro_header_hard(skb, hlen)) { - iph = skb_gro_header_slow(skb, hlen, off); - if (unlikely(!iph)) - goto out; - } + iph = skb_gro_header(skb, hlen, off); + if (unlikely(!iph)) + goto out; proto = iph->protocol; diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 025a33c1b04d..cb5bfb77944c 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -323,12 +323,9 @@ static struct sk_buff *gue_gro_receive(struct sock *sk, off = skb_gro_offset(skb); len = off + sizeof(*guehdr); - guehdr = skb_gro_header_fast(skb, off); - if (skb_gro_header_hard(skb, len)) { - guehdr = skb_gro_header_slow(skb, len, off); - if (unlikely(!guehdr)) - goto out; - } + guehdr = skb_gro_header(skb, len, off); + if (unlikely(!guehdr)) + goto out; switch (guehdr->version) { case 0: diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 07073fa35205..2b9cb5398335 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -137,12 +137,9 @@ static struct sk_buff *gre_gro_receive(struct list_head *head, off = skb_gro_offset(skb); hlen = off + sizeof(*greh); - greh = skb_gro_header_fast(skb, off); - if (skb_gro_header_hard(skb, hlen)) { - greh = skb_gro_header_slow(skb, hlen, off); - if (unlikely(!greh)) - goto out; - } + greh = skb_gro_header(skb, hlen, off); + if (unlikely(!greh)) + goto out; /* Only support version 0 and K (key), C (csum) flags. Note that * although the support for the S (seq#) flag can be added easily diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 30abde86db45..a844a0d38482 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -195,12 +195,9 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb) off = skb_gro_offset(skb); hlen = off + sizeof(*th); - th = skb_gro_header_fast(skb, off); - if (skb_gro_header_hard(skb, hlen)) { - th = skb_gro_header_slow(skb, hlen, off); - if (unlikely(!th)) - goto out; - } + th = skb_gro_header(skb, hlen, off); + if (unlikely(!th)) + goto out; thlen = th->doff * 4; if (thlen < sizeof(*th)) diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index d12dba2dd535..d37a8c97e6de 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -219,12 +219,9 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head, off = skb_gro_offset(skb); hlen = off + sizeof(*iph); - iph = skb_gro_header_fast(skb, off); - if (skb_gro_header_hard(skb, hlen)) { - iph = skb_gro_header_slow(skb, hlen, off); - if (unlikely(!iph)) - goto out; - } + iph = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!iph)) + goto out; skb_set_network_header(skb, off); skb_gro_pull(skb, sizeof(*iph)); -- cgit v1.2.3-70-g09d2 From 9d2bb84d54a40361c7008b33a60dc24f78724746 Mon Sep 17 00:00:00 2001 From: Shaul Triebitz Date: Tue, 2 Aug 2022 15:22:42 +0300 Subject: wifi: cfg80211: add link id to txq params The Tx queue parameters are per link, so add the link ID from nl80211 parameters to the API. While at it, lock the wdev when calling into the driver so it (and we) can check the link ID appropriately. Signed-off-by: Shaul Triebitz Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ net/wireless/nl80211.c | 17 +++++++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 908d58393484..1d393e09a632 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2316,6 +2316,7 @@ struct ocb_setup { * @cwmax: Maximum contention window [a value of the form 2^n-1 in the range * 1..32767] * @aifs: Arbitration interframe space [0..255] + * @link_id: link_id or -1 for non-MLD */ struct ieee80211_txq_params { enum nl80211_ac ac; @@ -2323,6 +2324,7 @@ struct ieee80211_txq_params { u16 cwmin; u16 cwmax; u8 aifs; + int link_id; }; /** diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 2705e3ee8fc4..e2169c364ae1 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3476,8 +3476,21 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) if (result) goto out; - result = rdev_set_txq_params(rdev, netdev, - &txq_params); + txq_params.link_id = + nl80211_link_id_or_invalid(info->attrs); + + wdev_lock(netdev->ieee80211_ptr); + if (txq_params.link_id >= 0 && + !(netdev->ieee80211_ptr->valid_links & + BIT(txq_params.link_id))) + result = -ENOLINK; + else if (txq_params.link_id >= 0 && + !netdev->ieee80211_ptr->valid_links) + result = -EINVAL; + else + result = rdev_set_txq_params(rdev, netdev, + &txq_params); + wdev_unlock(netdev->ieee80211_ptr); if (result) goto out; } -- cgit v1.2.3-70-g09d2 From e7a7b84e33178db4a839c5e1773247be17597c1f Mon Sep 17 00:00:00 2001 From: Veerendranath Jakkam Date: Sat, 30 Jul 2022 10:56:43 +0530 Subject: wifi: cfg80211: Add link_id parameter to various key operations for MLO Add support for various key operations on MLD by adding new parameter link_id. Pass the link_id received from userspace to driver for add_key, get_key, del_key, set_default_key, set_default_mgmt_key and set_default_beacon_key to support configuring keys specific to each MLO link. Userspace must not specify link ID for MLO pairwise key since it is common for all the MLO links. Signed-off-by: Veerendranath Jakkam Link: https://lore.kernel.org/r/20220730052643.1959111-4-quic_vjakkam@quicinc.com Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath6kl/cfg80211.c | 8 +- drivers/net/wireless/ath/wil6210/cfg80211.c | 10 +- .../broadcom/brcm80211/brcmfmac/cfg80211.c | 22 ++-- drivers/net/wireless/marvell/libertas/cfg.c | 9 +- drivers/net/wireless/marvell/mwifiex/cfg80211.c | 10 +- drivers/net/wireless/microchip/wilc1000/cfg80211.c | 17 +-- drivers/net/wireless/quantenna/qtnfmac/cfg80211.c | 12 +- drivers/net/wireless/rndis_wlan.c | 20 ++-- drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c | 13 ++- drivers/staging/wlan-ng/cfg80211.c | 12 +- include/net/cfg80211.h | 37 ++++-- include/uapi/linux/nl80211.h | 14 ++- net/mac80211/cfg.c | 17 +-- net/wireless/ibss.c | 2 +- net/wireless/nl80211.c | 126 ++++++++++++++++----- net/wireless/rdev-ops.h | 58 +++++----- net/wireless/sme.c | 2 +- net/wireless/trace.h | 86 ++++++++------ net/wireless/util.c | 4 +- net/wireless/wext-compat.c | 11 +- 20 files changed, 312 insertions(+), 178 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index e11c7e9accc0..a20e0aeae284 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -1124,7 +1124,7 @@ void ath6kl_cfg80211_ch_switch_notify(struct ath6kl_vif *vif, int freq, } static int ath6kl_cfg80211_add_key(struct wiphy *wiphy, struct net_device *ndev, - u8 key_index, bool pairwise, + int link_id, u8 key_index, bool pairwise, const u8 *mac_addr, struct key_params *params) { @@ -1249,7 +1249,7 @@ static int ath6kl_cfg80211_add_key(struct wiphy *wiphy, struct net_device *ndev, } static int ath6kl_cfg80211_del_key(struct wiphy *wiphy, struct net_device *ndev, - u8 key_index, bool pairwise, + int link_id, u8 key_index, bool pairwise, const u8 *mac_addr) { struct ath6kl *ar = ath6kl_priv(ndev); @@ -1279,7 +1279,7 @@ static int ath6kl_cfg80211_del_key(struct wiphy *wiphy, struct net_device *ndev, } static int ath6kl_cfg80211_get_key(struct wiphy *wiphy, struct net_device *ndev, - u8 key_index, bool pairwise, + int link_id, u8 key_index, bool pairwise, const u8 *mac_addr, void *cookie, void (*callback) (void *cookie, struct key_params *)) @@ -1314,7 +1314,7 @@ static int ath6kl_cfg80211_get_key(struct wiphy *wiphy, struct net_device *ndev, } static int ath6kl_cfg80211_set_default_key(struct wiphy *wiphy, - struct net_device *ndev, + struct net_device *ndev, int link_id, u8 key_index, bool unicast, bool multicast) { diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c index f93bdffa4d1d..40f9a7ef8980 100644 --- a/drivers/net/wireless/ath/wil6210/cfg80211.c +++ b/drivers/net/wireless/ath/wil6210/cfg80211.c @@ -1620,7 +1620,7 @@ static void wil_del_rx_key(u8 key_index, enum wmi_key_usage key_usage, } static int wil_cfg80211_add_key(struct wiphy *wiphy, - struct net_device *ndev, + struct net_device *ndev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr, struct key_params *params) @@ -1696,7 +1696,7 @@ static int wil_cfg80211_add_key(struct wiphy *wiphy, } static int wil_cfg80211_del_key(struct wiphy *wiphy, - struct net_device *ndev, + struct net_device *ndev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr) { @@ -1723,7 +1723,7 @@ static int wil_cfg80211_del_key(struct wiphy *wiphy, /* Need to be present or wiphy_new() will WARN */ static int wil_cfg80211_set_default_key(struct wiphy *wiphy, - struct net_device *ndev, + struct net_device *ndev, int link_id, u8 key_index, bool unicast, bool multicast) { @@ -2072,8 +2072,8 @@ void wil_cfg80211_ap_recovery(struct wil6210_priv *wil) key_params.key = vif->gtk; key_params.key_len = vif->gtk_len; key_params.seq_len = IEEE80211_GCMP_PN_LEN; - rc = wil_cfg80211_add_key(wiphy, ndev, vif->gtk_index, false, - NULL, &key_params); + rc = wil_cfg80211_add_key(wiphy, ndev, -1, vif->gtk_index, + false, NULL, &key_params); if (rc) wil_err(wil, "vif %d recovery add key failed (%d)\n", i, rc); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 9d044af7991b..7c72ea26a7d7 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -2361,7 +2361,8 @@ done: static s32 brcmf_cfg80211_config_default_key(struct wiphy *wiphy, struct net_device *ndev, - u8 key_idx, bool unicast, bool multicast) + int link_id, u8 key_idx, bool unicast, + bool multicast) { struct brcmf_if *ifp = netdev_priv(ndev); struct brcmf_pub *drvr = ifp->drvr; @@ -2395,7 +2396,8 @@ done: static s32 brcmf_cfg80211_del_key(struct wiphy *wiphy, struct net_device *ndev, - u8 key_idx, bool pairwise, const u8 *mac_addr) + int link_id, u8 key_idx, bool pairwise, + const u8 *mac_addr) { struct brcmf_if *ifp = netdev_priv(ndev); struct brcmf_wsec_key *key; @@ -2432,8 +2434,8 @@ brcmf_cfg80211_del_key(struct wiphy *wiphy, struct net_device *ndev, static s32 brcmf_cfg80211_add_key(struct wiphy *wiphy, struct net_device *ndev, - u8 key_idx, bool pairwise, const u8 *mac_addr, - struct key_params *params) + int link_id, u8 key_idx, bool pairwise, + const u8 *mac_addr, struct key_params *params) { struct brcmf_cfg80211_info *cfg = wiphy_to_cfg(wiphy); struct brcmf_if *ifp = netdev_priv(ndev); @@ -2457,8 +2459,8 @@ brcmf_cfg80211_add_key(struct wiphy *wiphy, struct net_device *ndev, } if (params->key_len == 0) - return brcmf_cfg80211_del_key(wiphy, ndev, key_idx, pairwise, - mac_addr); + return brcmf_cfg80211_del_key(wiphy, ndev, -1, key_idx, + pairwise, mac_addr); if (params->key_len > sizeof(key->data)) { bphy_err(drvr, "Too long key length (%u)\n", params->key_len); @@ -2553,8 +2555,9 @@ done: } static s32 -brcmf_cfg80211_get_key(struct wiphy *wiphy, struct net_device *ndev, u8 key_idx, - bool pairwise, const u8 *mac_addr, void *cookie, +brcmf_cfg80211_get_key(struct wiphy *wiphy, struct net_device *ndev, + int link_id, u8 key_idx, bool pairwise, + const u8 *mac_addr, void *cookie, void (*callback)(void *cookie, struct key_params *params)) { @@ -2610,7 +2613,8 @@ done: static s32 brcmf_cfg80211_config_default_mgmt_key(struct wiphy *wiphy, - struct net_device *ndev, u8 key_idx) + struct net_device *ndev, int link_id, + u8 key_idx) { struct brcmf_if *ifp = netdev_priv(ndev); diff --git a/drivers/net/wireless/marvell/libertas/cfg.c b/drivers/net/wireless/marvell/libertas/cfg.c index b0b3f59dabc6..5e3ae00153b8 100644 --- a/drivers/net/wireless/marvell/libertas/cfg.c +++ b/drivers/net/wireless/marvell/libertas/cfg.c @@ -1435,7 +1435,7 @@ static int lbs_cfg_disconnect(struct wiphy *wiphy, struct net_device *dev, } static int lbs_cfg_set_default_key(struct wiphy *wiphy, - struct net_device *netdev, + struct net_device *netdev, int link_id, u8 key_index, bool unicast, bool multicast) { @@ -1455,8 +1455,8 @@ static int lbs_cfg_set_default_key(struct wiphy *wiphy, static int lbs_cfg_add_key(struct wiphy *wiphy, struct net_device *netdev, - u8 idx, bool pairwise, const u8 *mac_addr, - struct key_params *params) + int link_id, u8 idx, bool pairwise, + const u8 *mac_addr, struct key_params *params) { struct lbs_private *priv = wiphy_priv(wiphy); u16 key_info; @@ -1516,7 +1516,8 @@ static int lbs_cfg_add_key(struct wiphy *wiphy, struct net_device *netdev, static int lbs_cfg_del_key(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index, bool pairwise, const u8 *mac_addr) + int link_id, u8 key_index, bool pairwise, + const u8 *mac_addr) { lbs_deb_assoc("del_key: key_idx %d, mac_addr %pM\n", diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index d68c40e0e122..4f3bfdbe0d9d 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -154,7 +154,8 @@ static void *mwifiex_cfg80211_get_adapter(struct wiphy *wiphy) */ static int mwifiex_cfg80211_del_key(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index, bool pairwise, const u8 *mac_addr) + int link_id, u8 key_index, bool pairwise, + const u8 *mac_addr) { struct mwifiex_private *priv = mwifiex_netdev_get_priv(netdev); static const u8 bc_mac[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; @@ -443,7 +444,7 @@ mwifiex_cfg80211_set_power_mgmt(struct wiphy *wiphy, */ static int mwifiex_cfg80211_set_default_key(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index, bool unicast, + int link_id, u8 key_index, bool unicast, bool multicast) { struct mwifiex_private *priv = mwifiex_netdev_get_priv(netdev); @@ -468,8 +469,8 @@ mwifiex_cfg80211_set_default_key(struct wiphy *wiphy, struct net_device *netdev, */ static int mwifiex_cfg80211_add_key(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index, bool pairwise, const u8 *mac_addr, - struct key_params *params) + int link_id, u8 key_index, bool pairwise, + const u8 *mac_addr, struct key_params *params) { struct mwifiex_private *priv = mwifiex_netdev_get_priv(netdev); struct mwifiex_wep_key *wep_key; @@ -506,6 +507,7 @@ mwifiex_cfg80211_add_key(struct wiphy *wiphy, struct net_device *netdev, static int mwifiex_cfg80211_set_default_mgmt_key(struct wiphy *wiphy, struct net_device *netdev, + int link_id, u8 key_index) { struct mwifiex_private *priv = mwifiex_netdev_get_priv(netdev); diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c index 3ac373d29d93..f810a56a7ff0 100644 --- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c +++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c @@ -540,8 +540,9 @@ static int wilc_wfi_cfg_copy_wpa_info(struct wilc_wfi_key *key_info, return 0; } -static int add_key(struct wiphy *wiphy, struct net_device *netdev, u8 key_index, - bool pairwise, const u8 *mac_addr, struct key_params *params) +static int add_key(struct wiphy *wiphy, struct net_device *netdev, int link_id, + u8 key_index, bool pairwise, const u8 *mac_addr, + struct key_params *params) { int ret = 0, keylen = params->key_len; @@ -644,7 +645,7 @@ static int add_key(struct wiphy *wiphy, struct net_device *netdev, u8 key_index, return ret; } -static int del_key(struct wiphy *wiphy, struct net_device *netdev, +static int del_key(struct wiphy *wiphy, struct net_device *netdev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr) @@ -685,8 +686,9 @@ static int del_key(struct wiphy *wiphy, struct net_device *netdev, return 0; } -static int get_key(struct wiphy *wiphy, struct net_device *netdev, u8 key_index, - bool pairwise, const u8 *mac_addr, void *cookie, +static int get_key(struct wiphy *wiphy, struct net_device *netdev, int link_id, + u8 key_index, bool pairwise, const u8 *mac_addr, + void *cookie, void (*callback)(void *cookie, struct key_params *)) { struct wilc_vif *vif = netdev_priv(netdev); @@ -723,13 +725,14 @@ static int get_key(struct wiphy *wiphy, struct net_device *netdev, u8 key_index, /* wiphy_new_nm() will WARNON if not present */ static int set_default_key(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index, bool unicast, bool multicast) + int link_id, u8 key_index, bool unicast, + bool multicast) { return 0; } static int set_default_mgmt_key(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index) + int link_id, u8 key_index) { struct wilc_vif *vif = netdev_priv(netdev); diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c index 83df5977400e..4d796f5a3221 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c @@ -532,8 +532,8 @@ qtnf_dump_station(struct wiphy *wiphy, struct net_device *dev, } static int qtnf_add_key(struct wiphy *wiphy, struct net_device *dev, - u8 key_index, bool pairwise, const u8 *mac_addr, - struct key_params *params) + int link_id, u8 key_index, bool pairwise, + const u8 *mac_addr, struct key_params *params) { struct qtnf_vif *vif = qtnf_netdev_get_priv(dev); int ret; @@ -548,7 +548,8 @@ static int qtnf_add_key(struct wiphy *wiphy, struct net_device *dev, } static int qtnf_del_key(struct wiphy *wiphy, struct net_device *dev, - u8 key_index, bool pairwise, const u8 *mac_addr) + int link_id, u8 key_index, bool pairwise, + const u8 *mac_addr) { struct qtnf_vif *vif = qtnf_netdev_get_priv(dev); int ret; @@ -569,7 +570,8 @@ static int qtnf_del_key(struct wiphy *wiphy, struct net_device *dev, } static int qtnf_set_default_key(struct wiphy *wiphy, struct net_device *dev, - u8 key_index, bool unicast, bool multicast) + int link_id, u8 key_index, bool unicast, + bool multicast) { struct qtnf_vif *vif = qtnf_netdev_get_priv(dev); int ret; @@ -585,7 +587,7 @@ static int qtnf_set_default_key(struct wiphy *wiphy, struct net_device *dev, static int qtnf_set_default_mgmt_key(struct wiphy *wiphy, struct net_device *dev, - u8 key_index) + int link_id, u8 key_index) { struct qtnf_vif *vif = qtnf_netdev_get_priv(dev); int ret; diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c index 05524291d60c..325933217b41 100644 --- a/drivers/net/wireless/rndis_wlan.c +++ b/drivers/net/wireless/rndis_wlan.c @@ -489,14 +489,16 @@ static int rndis_join_ibss(struct wiphy *wiphy, struct net_device *dev, static int rndis_leave_ibss(struct wiphy *wiphy, struct net_device *dev); static int rndis_add_key(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index, bool pairwise, const u8 *mac_addr, - struct key_params *params); + int link_id, u8 key_index, bool pairwise, + const u8 *mac_addr, struct key_params *params); static int rndis_del_key(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index, bool pairwise, const u8 *mac_addr); + int link_id, u8 key_index, bool pairwise, + const u8 *mac_addr); static int rndis_set_default_key(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index, bool unicast, bool multicast); + int link_id, u8 key_index, bool unicast, + bool multicast); static int rndis_get_station(struct wiphy *wiphy, struct net_device *dev, const u8 *mac, struct station_info *sinfo); @@ -2377,8 +2379,8 @@ static int rndis_leave_ibss(struct wiphy *wiphy, struct net_device *dev) } static int rndis_add_key(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index, bool pairwise, const u8 *mac_addr, - struct key_params *params) + int link_id, u8 key_index, bool pairwise, + const u8 *mac_addr, struct key_params *params) { struct rndis_wlan_private *priv = wiphy_priv(wiphy); struct usbnet *usbdev = priv->usbdev; @@ -2413,7 +2415,8 @@ static int rndis_add_key(struct wiphy *wiphy, struct net_device *netdev, } static int rndis_del_key(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index, bool pairwise, const u8 *mac_addr) + int link_id, u8 key_index, bool pairwise, + const u8 *mac_addr) { struct rndis_wlan_private *priv = wiphy_priv(wiphy); struct usbnet *usbdev = priv->usbdev; @@ -2424,7 +2427,8 @@ static int rndis_del_key(struct wiphy *wiphy, struct net_device *netdev, } static int rndis_set_default_key(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index, bool unicast, bool multicast) + int link_id, u8 key_index, bool unicast, + bool multicast) { struct rndis_wlan_private *priv = wiphy_priv(wiphy); struct usbnet *usbdev = priv->usbdev; diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c index cf35125b7891..1632a2654cc5 100644 --- a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c +++ b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c @@ -901,8 +901,8 @@ exit: } static int cfg80211_rtw_add_key(struct wiphy *wiphy, struct net_device *ndev, - u8 key_index, bool pairwise, const u8 *mac_addr, - struct key_params *params) + int link_id, u8 key_index, bool pairwise, + const u8 *mac_addr, struct key_params *params) { char *alg_name; u32 param_len; @@ -993,8 +993,8 @@ addkey_end: } static int cfg80211_rtw_get_key(struct wiphy *wiphy, struct net_device *ndev, - u8 key_index, bool pairwise, const u8 *mac_addr, - void *cookie, + int link_id, u8 key_index, bool pairwise, + const u8 *mac_addr, void *cookie, void (*callback)(void *cookie, struct key_params*)) { @@ -1002,7 +1002,8 @@ static int cfg80211_rtw_get_key(struct wiphy *wiphy, struct net_device *ndev, } static int cfg80211_rtw_del_key(struct wiphy *wiphy, struct net_device *ndev, - u8 key_index, bool pairwise, const u8 *mac_addr) + int link_id, u8 key_index, bool pairwise, + const u8 *mac_addr) { struct adapter *padapter = rtw_netdev_priv(ndev); struct security_priv *psecuritypriv = &padapter->securitypriv; @@ -1017,7 +1018,7 @@ static int cfg80211_rtw_del_key(struct wiphy *wiphy, struct net_device *ndev, } static int cfg80211_rtw_set_default_key(struct wiphy *wiphy, - struct net_device *ndev, u8 key_index + struct net_device *ndev, int link_id, u8 key_index , bool unicast, bool multicast ) { diff --git a/drivers/staging/wlan-ng/cfg80211.c b/drivers/staging/wlan-ng/cfg80211.c index b7b56d8406d1..471bb310176f 100644 --- a/drivers/staging/wlan-ng/cfg80211.c +++ b/drivers/staging/wlan-ng/cfg80211.c @@ -143,8 +143,8 @@ exit: } static int prism2_add_key(struct wiphy *wiphy, struct net_device *dev, - u8 key_index, bool pairwise, const u8 *mac_addr, - struct key_params *params) + int link_id, u8 key_index, bool pairwise, + const u8 *mac_addr, struct key_params *params) { struct wlandevice *wlandev = dev->ml_priv; u32 did; @@ -172,7 +172,7 @@ static int prism2_add_key(struct wiphy *wiphy, struct net_device *dev, } static int prism2_get_key(struct wiphy *wiphy, struct net_device *dev, - u8 key_index, bool pairwise, + int link_id, u8 key_index, bool pairwise, const u8 *mac_addr, void *cookie, void (*callback)(void *cookie, struct key_params*)) { @@ -202,7 +202,8 @@ static int prism2_get_key(struct wiphy *wiphy, struct net_device *dev, } static int prism2_del_key(struct wiphy *wiphy, struct net_device *dev, - u8 key_index, bool pairwise, const u8 *mac_addr) + int link_id, u8 key_index, bool pairwise, + const u8 *mac_addr) { struct wlandevice *wlandev = dev->ml_priv; u32 did; @@ -227,7 +228,8 @@ static int prism2_del_key(struct wiphy *wiphy, struct net_device *dev, } static int prism2_set_default_key(struct wiphy *wiphy, struct net_device *dev, - u8 key_index, bool unicast, bool multicast) + int link_id, u8 key_index, bool unicast, + bool multicast) { struct wlandevice *wlandev = dev->ml_priv; diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 1d393e09a632..ffbc65a9b00b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3931,22 +3931,33 @@ struct mgmt_frame_regs { * @del_intf_link: Remove an MLO link from the given interface. * * @add_key: add a key with the given parameters. @mac_addr will be %NULL - * when adding a group key. + * when adding a group key. @link_id will be -1 for non-MLO connection. + * For MLO connection, @link_id will be >= 0 for group key and -1 for + * pairwise key, @mac_addr will be peer's MLD address for MLO pairwise key. * * @get_key: get information about the key with the given parameters. * @mac_addr will be %NULL when requesting information for a group * key. All pointers given to the @callback function need not be valid * after it returns. This function should return an error if it is * not possible to retrieve the key, -ENOENT if it doesn't exist. + * @link_id will be -1 for non-MLO connection. For MLO connection, + * @link_id will be >= 0 for group key and -1 for pairwise key, @mac_addr + * will be peer's MLD address for MLO pairwise key. * * @del_key: remove a key given the @mac_addr (%NULL for a group key) - * and @key_index, return -ENOENT if the key doesn't exist. + * and @key_index, return -ENOENT if the key doesn't exist. @link_id will + * be -1 for non-MLO connection. For MLO connection, @link_id will be >= 0 + * for group key and -1 for pairwise key, @mac_addr will be peer's MLD + * address for MLO pairwise key. * - * @set_default_key: set the default key on an interface + * @set_default_key: set the default key on an interface. @link_id will be >= 0 + * for MLO connection and -1 for non-MLO connection. * - * @set_default_mgmt_key: set the default management frame key on an interface + * @set_default_mgmt_key: set the default management frame key on an interface. + * @link_id will be >= 0 for MLO connection and -1 for non-MLO connection. * - * @set_default_beacon_key: set the default Beacon frame key on an interface + * @set_default_beacon_key: set the default Beacon frame key on an interface. + * @link_id will be >= 0 for MLO connection and -1 for non-MLO connection. * * @set_rekey_data: give the data necessary for GTK rekeying to the driver * @@ -4295,22 +4306,24 @@ struct cfg80211_ops { unsigned int link_id); int (*add_key)(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index, bool pairwise, const u8 *mac_addr, - struct key_params *params); + int link_id, u8 key_index, bool pairwise, + const u8 *mac_addr, struct key_params *params); int (*get_key)(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index, bool pairwise, const u8 *mac_addr, - void *cookie, + int link_id, u8 key_index, bool pairwise, + const u8 *mac_addr, void *cookie, void (*callback)(void *cookie, struct key_params*)); int (*del_key)(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index, bool pairwise, const u8 *mac_addr); + int link_id, u8 key_index, bool pairwise, + const u8 *mac_addr); int (*set_default_key)(struct wiphy *wiphy, - struct net_device *netdev, + struct net_device *netdev, int link_id, u8 key_index, bool unicast, bool multicast); int (*set_default_mgmt_key)(struct wiphy *wiphy, - struct net_device *netdev, + struct net_device *netdev, int link_id, u8 key_index); int (*set_default_beacon_key)(struct wiphy *wiphy, struct net_device *netdev, + int link_id, u8 key_index); int (*start_ap)(struct wiphy *wiphy, struct net_device *dev, diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index ffb7c573e299..573db20403dc 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -377,14 +377,22 @@ * the non-transmitting interfaces are deleted as well. * * @NL80211_CMD_GET_KEY: Get sequence counter information for a key specified - * by %NL80211_ATTR_KEY_IDX and/or %NL80211_ATTR_MAC. + * by %NL80211_ATTR_KEY_IDX and/or %NL80211_ATTR_MAC. %NL80211_ATTR_MAC + * represents peer's MLD address for MLO pairwise key. For MLO group key, + * the link is identified by %NL80211_ATTR_MLO_LINK_ID. * @NL80211_CMD_SET_KEY: Set key attributes %NL80211_ATTR_KEY_DEFAULT, * %NL80211_ATTR_KEY_DEFAULT_MGMT, or %NL80211_ATTR_KEY_THRESHOLD. + * For MLO connection, the link to set default key is identified by + * %NL80211_ATTR_MLO_LINK_ID. * @NL80211_CMD_NEW_KEY: add a key with given %NL80211_ATTR_KEY_DATA, * %NL80211_ATTR_KEY_IDX, %NL80211_ATTR_MAC, %NL80211_ATTR_KEY_CIPHER, - * and %NL80211_ATTR_KEY_SEQ attributes. + * and %NL80211_ATTR_KEY_SEQ attributes. %NL80211_ATTR_MAC represents + * peer's MLD address for MLO pairwise key. The link to add MLO + * group key is identified by %NL80211_ATTR_MLO_LINK_ID. * @NL80211_CMD_DEL_KEY: delete a key identified by %NL80211_ATTR_KEY_IDX - * or %NL80211_ATTR_MAC. + * or %NL80211_ATTR_MAC. %NL80211_ATTR_MAC represents peer's MLD address + * for MLO pairwise key. The link to delete group key is identified by + * %NL80211_ATTR_MLO_LINK_ID. * * @NL80211_CMD_GET_BEACON: (not used) * @NL80211_CMD_SET_BEACON: change the beacon on an access point interface diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index d97e13b5c3a8..c4c5e2d44eb8 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -452,8 +452,8 @@ static int ieee80211_set_tx(struct ieee80211_sub_if_data *sdata, } static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, - u8 key_idx, bool pairwise, const u8 *mac_addr, - struct key_params *params) + int link_id, u8 key_idx, bool pairwise, + const u8 *mac_addr, struct key_params *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; @@ -596,7 +596,8 @@ ieee80211_lookup_key(struct ieee80211_sub_if_data *sdata, } static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, - u8 key_idx, bool pairwise, const u8 *mac_addr) + int link_id, u8 key_idx, bool pairwise, + const u8 *mac_addr) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; @@ -623,8 +624,8 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, } static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, - u8 key_idx, bool pairwise, const u8 *mac_addr, - void *cookie, + int link_id, u8 key_idx, bool pairwise, + const u8 *mac_addr, void *cookie, void (*callback)(void *cookie, struct key_params *params)) { @@ -729,7 +730,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, static int ieee80211_config_default_key(struct wiphy *wiphy, struct net_device *dev, - u8 key_idx, bool uni, + int link_id, u8 key_idx, bool uni, bool multi) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -741,7 +742,7 @@ static int ieee80211_config_default_key(struct wiphy *wiphy, static int ieee80211_config_default_mgmt_key(struct wiphy *wiphy, struct net_device *dev, - u8 key_idx) + int link_id, u8 key_idx) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -752,7 +753,7 @@ static int ieee80211_config_default_mgmt_key(struct wiphy *wiphy, static int ieee80211_config_default_beacon_key(struct wiphy *wiphy, struct net_device *dev, - u8 key_idx) + int link_id, u8 key_idx) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index 4935f94d1acc..edd062f104f4 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -171,7 +171,7 @@ static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext) */ if (rdev->ops->del_key) for (i = 0; i < 6; i++) - rdev_del_key(rdev, dev, i, false, NULL); + rdev_del_key(rdev, dev, -1, i, false, NULL); if (wdev->u.ibss.current_bss) { cfg80211_unhold_bss(wdev->u.ibss.current_bss); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index e2169c364ae1..72242681ab86 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1545,7 +1545,6 @@ static int nl80211_key_allowed(struct wireless_dev *wdev) return -ENOLINK; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: - /* for MLO, require driver validation of the link ID */ if (wdev->connected) return 0; return -ENOLINK; @@ -4333,6 +4332,38 @@ static int nl80211_set_noack_map(struct sk_buff *skb, struct genl_info *info) return rdev_set_noack_map(rdev, dev, noack_map); } +static int nl80211_validate_key_link_id(struct genl_info *info, + struct wireless_dev *wdev, + int link_id, bool pairwise) +{ + if (pairwise) { + if (link_id != -1) { + GENL_SET_ERR_MSG(info, + "link ID not allowed for pairwise key"); + return -EINVAL; + } + + return 0; + } + + if (wdev->valid_links) { + if (link_id == -1) { + GENL_SET_ERR_MSG(info, + "link ID must for MLO group key"); + return -EINVAL; + } + if (!(wdev->valid_links & BIT(link_id))) { + GENL_SET_ERR_MSG(info, "invalid link ID for MLO group key"); + return -EINVAL; + } + } else if (link_id != -1) { + GENL_SET_ERR_MSG(info, "link ID not allowed for non-MLO group key"); + return -EINVAL; + } + + return 0; +} + struct get_key_cookie { struct sk_buff *msg; int error; @@ -4394,13 +4425,15 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) void *hdr; struct sk_buff *msg; bool bigtk_support = false; + int link_id = nl80211_link_id_or_invalid(info->attrs); + struct wireless_dev *wdev = dev->ieee80211_ptr; if (wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_BEACON_PROTECTION)) bigtk_support = true; - if ((dev->ieee80211_ptr->iftype == NL80211_IFTYPE_STATION || - dev->ieee80211_ptr->iftype == NL80211_IFTYPE_P2P_CLIENT) && + if ((wdev->iftype == NL80211_IFTYPE_STATION || + wdev->iftype == NL80211_IFTYPE_P2P_CLIENT) && wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_BEACON_PROTECTION_CLIENT)) bigtk_support = true; @@ -4452,8 +4485,12 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr)) goto nla_put_failure; - err = rdev_get_key(rdev, dev, key_idx, pairwise, mac_addr, &cookie, - get_key_callback); + err = nl80211_validate_key_link_id(info, wdev, link_id, pairwise); + if (err) + goto free_msg; + + err = rdev_get_key(rdev, dev, link_id, key_idx, pairwise, mac_addr, + &cookie, get_key_callback); if (err) goto free_msg; @@ -4477,6 +4514,8 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) struct key_parse key; int err; struct net_device *dev = info->user_ptr[1]; + int link_id = nl80211_link_id_or_invalid(info->attrs); + struct wireless_dev *wdev = dev->ieee80211_ptr; err = nl80211_parse_key(info, &key); if (err) @@ -4492,7 +4531,7 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) !(key.p.mode == NL80211_KEY_SET_TX)) return -EINVAL; - wdev_lock(dev->ieee80211_ptr); + wdev_lock(wdev); if (key.def) { if (!rdev->ops->set_default_key) { @@ -4500,18 +4539,22 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) goto out; } - err = nl80211_key_allowed(dev->ieee80211_ptr); + err = nl80211_key_allowed(wdev); + if (err) + goto out; + + err = nl80211_validate_key_link_id(info, wdev, link_id, false); if (err) goto out; - err = rdev_set_default_key(rdev, dev, key.idx, - key.def_uni, key.def_multi); + err = rdev_set_default_key(rdev, dev, link_id, key.idx, + key.def_uni, key.def_multi); if (err) goto out; #ifdef CONFIG_CFG80211_WEXT - dev->ieee80211_ptr->wext.default_key = key.idx; + wdev->wext.default_key = key.idx; #endif } else if (key.defmgmt) { if (key.def_uni || !key.def_multi) { @@ -4524,16 +4567,20 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) goto out; } - err = nl80211_key_allowed(dev->ieee80211_ptr); + err = nl80211_key_allowed(wdev); + if (err) + goto out; + + err = nl80211_validate_key_link_id(info, wdev, link_id, false); if (err) goto out; - err = rdev_set_default_mgmt_key(rdev, dev, key.idx); + err = rdev_set_default_mgmt_key(rdev, dev, link_id, key.idx); if (err) goto out; #ifdef CONFIG_CFG80211_WEXT - dev->ieee80211_ptr->wext.default_mgmt_key = key.idx; + wdev->wext.default_mgmt_key = key.idx; #endif } else if (key.defbeacon) { if (key.def_uni || !key.def_multi) { @@ -4546,11 +4593,15 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) goto out; } - err = nl80211_key_allowed(dev->ieee80211_ptr); + err = nl80211_key_allowed(wdev); + if (err) + goto out; + + err = nl80211_validate_key_link_id(info, wdev, link_id, false); if (err) goto out; - err = rdev_set_default_beacon_key(rdev, dev, key.idx); + err = rdev_set_default_beacon_key(rdev, dev, link_id, key.idx); if (err) goto out; } else if (key.p.mode == NL80211_KEY_SET_TX && @@ -4566,14 +4617,18 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) goto out; } - err = rdev_add_key(rdev, dev, key.idx, + err = nl80211_validate_key_link_id(info, wdev, link_id, true); + if (err) + goto out; + + err = rdev_add_key(rdev, dev, link_id, key.idx, NL80211_KEYTYPE_PAIRWISE, mac_addr, &key.p); } else { err = -EINVAL; } out: - wdev_unlock(dev->ieee80211_ptr); + wdev_unlock(wdev); return err; } @@ -4585,6 +4640,8 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info) struct net_device *dev = info->user_ptr[1]; struct key_parse key; const u8 *mac_addr = NULL; + int link_id = nl80211_link_id_or_invalid(info->attrs); + struct wireless_dev *wdev = dev->ieee80211_ptr; err = nl80211_parse_key(info, &key); if (err) @@ -4626,18 +4683,23 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } - wdev_lock(dev->ieee80211_ptr); - err = nl80211_key_allowed(dev->ieee80211_ptr); + wdev_lock(wdev); + err = nl80211_key_allowed(wdev); if (err) GENL_SET_ERR_MSG(info, "key not allowed"); + + if (!err) + err = nl80211_validate_key_link_id(info, wdev, link_id, + key.type == NL80211_KEYTYPE_PAIRWISE); + if (!err) { - err = rdev_add_key(rdev, dev, key.idx, + err = rdev_add_key(rdev, dev, link_id, key.idx, key.type == NL80211_KEYTYPE_PAIRWISE, mac_addr, &key.p); if (err) GENL_SET_ERR_MSG(info, "key addition failed"); } - wdev_unlock(dev->ieee80211_ptr); + wdev_unlock(wdev); return err; } @@ -4649,6 +4711,8 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) struct net_device *dev = info->user_ptr[1]; u8 *mac_addr = NULL; struct key_parse key; + int link_id = nl80211_link_id_or_invalid(info->attrs); + struct wireless_dev *wdev = dev->ieee80211_ptr; err = nl80211_parse_key(info, &key); if (err) @@ -4676,27 +4740,31 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->del_key) return -EOPNOTSUPP; - wdev_lock(dev->ieee80211_ptr); - err = nl80211_key_allowed(dev->ieee80211_ptr); + wdev_lock(wdev); + err = nl80211_key_allowed(wdev); if (key.type == NL80211_KEYTYPE_GROUP && mac_addr && !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN)) err = -ENOENT; if (!err) - err = rdev_del_key(rdev, dev, key.idx, + err = nl80211_validate_key_link_id(info, wdev, link_id, + key.type == NL80211_KEYTYPE_PAIRWISE); + + if (!err) + err = rdev_del_key(rdev, dev, link_id, key.idx, key.type == NL80211_KEYTYPE_PAIRWISE, mac_addr); #ifdef CONFIG_CFG80211_WEXT if (!err) { - if (key.idx == dev->ieee80211_ptr->wext.default_key) - dev->ieee80211_ptr->wext.default_key = -1; - else if (key.idx == dev->ieee80211_ptr->wext.default_mgmt_key) - dev->ieee80211_ptr->wext.default_mgmt_key = -1; + if (key.idx == wdev->wext.default_key) + wdev->wext.default_key = -1; + else if (key.idx == wdev->wext.default_mgmt_key) + wdev->wext.default_mgmt_key = -1; } #endif - wdev_unlock(dev->ieee80211_ptr); + wdev_unlock(wdev); return err; } diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 40915a82da73..13b209a8db28 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -77,65 +77,69 @@ rdev_change_virtual_intf(struct cfg80211_registered_device *rdev, } static inline int rdev_add_key(struct cfg80211_registered_device *rdev, - struct net_device *netdev, u8 key_index, - bool pairwise, const u8 *mac_addr, + struct net_device *netdev, int link_id, + u8 key_index, bool pairwise, const u8 *mac_addr, struct key_params *params) { int ret; - trace_rdev_add_key(&rdev->wiphy, netdev, key_index, pairwise, + trace_rdev_add_key(&rdev->wiphy, netdev, link_id, key_index, pairwise, mac_addr, params->mode); - ret = rdev->ops->add_key(&rdev->wiphy, netdev, key_index, pairwise, - mac_addr, params); + ret = rdev->ops->add_key(&rdev->wiphy, netdev, link_id, key_index, + pairwise, mac_addr, params); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } static inline int rdev_get_key(struct cfg80211_registered_device *rdev, struct net_device *netdev, - u8 key_index, bool pairwise, const u8 *mac_addr, void *cookie, + int link_id, u8 key_index, bool pairwise, const u8 *mac_addr, + void *cookie, void (*callback)(void *cookie, struct key_params*)) { int ret; - trace_rdev_get_key(&rdev->wiphy, netdev, key_index, pairwise, mac_addr); - ret = rdev->ops->get_key(&rdev->wiphy, netdev, key_index, pairwise, - mac_addr, cookie, callback); + trace_rdev_get_key(&rdev->wiphy, netdev, link_id, key_index, pairwise, + mac_addr); + ret = rdev->ops->get_key(&rdev->wiphy, netdev, link_id, key_index, + pairwise, mac_addr, cookie, callback); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } static inline int rdev_del_key(struct cfg80211_registered_device *rdev, - struct net_device *netdev, u8 key_index, - bool pairwise, const u8 *mac_addr) + struct net_device *netdev, int link_id, + u8 key_index, bool pairwise, const u8 *mac_addr) { int ret; - trace_rdev_del_key(&rdev->wiphy, netdev, key_index, pairwise, mac_addr); - ret = rdev->ops->del_key(&rdev->wiphy, netdev, key_index, pairwise, - mac_addr); + trace_rdev_del_key(&rdev->wiphy, netdev, link_id, key_index, pairwise, + mac_addr); + ret = rdev->ops->del_key(&rdev->wiphy, netdev, link_id, key_index, + pairwise, mac_addr); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } static inline int rdev_set_default_key(struct cfg80211_registered_device *rdev, - struct net_device *netdev, u8 key_index, bool unicast, - bool multicast) + struct net_device *netdev, int link_id, u8 key_index, + bool unicast, bool multicast) { int ret; - trace_rdev_set_default_key(&rdev->wiphy, netdev, key_index, + trace_rdev_set_default_key(&rdev->wiphy, netdev, link_id, key_index, unicast, multicast); - ret = rdev->ops->set_default_key(&rdev->wiphy, netdev, key_index, - unicast, multicast); + ret = rdev->ops->set_default_key(&rdev->wiphy, netdev, link_id, + key_index, unicast, multicast); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } static inline int rdev_set_default_mgmt_key(struct cfg80211_registered_device *rdev, - struct net_device *netdev, u8 key_index) + struct net_device *netdev, int link_id, u8 key_index) { int ret; - trace_rdev_set_default_mgmt_key(&rdev->wiphy, netdev, key_index); - ret = rdev->ops->set_default_mgmt_key(&rdev->wiphy, netdev, + trace_rdev_set_default_mgmt_key(&rdev->wiphy, netdev, link_id, + key_index); + ret = rdev->ops->set_default_mgmt_key(&rdev->wiphy, netdev, link_id, key_index); trace_rdev_return_int(&rdev->wiphy, ret); return ret; @@ -143,13 +147,15 @@ rdev_set_default_mgmt_key(struct cfg80211_registered_device *rdev, static inline int rdev_set_default_beacon_key(struct cfg80211_registered_device *rdev, - struct net_device *netdev, u8 key_index) + struct net_device *netdev, int link_id, + u8 key_index) { int ret; - trace_rdev_set_default_beacon_key(&rdev->wiphy, netdev, key_index); - ret = rdev->ops->set_default_beacon_key(&rdev->wiphy, netdev, - key_index); + trace_rdev_set_default_beacon_key(&rdev->wiphy, netdev, link_id, + key_index); + ret = rdev->ops->set_default_beacon_key(&rdev->wiphy, netdev, link_id, + key_index); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 34d27a3070f0..0a5c95631f78 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -1326,7 +1326,7 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, NL80211_EXT_FEATURE_BEACON_PROTECTION_CLIENT)) max_key_idx = 7; for (i = 0; i <= max_key_idx; i++) - rdev_del_key(rdev, dev, i, false, NULL); + rdev_del_key(rdev, dev, -1, i, false, NULL); } rdev_set_qos_map(rdev, dev, NULL); diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 10b2fd9bacb5..001c00d9c5e7 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -434,13 +434,14 @@ TRACE_EVENT(rdev_change_virtual_intf, ); DECLARE_EVENT_CLASS(key_handle, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 key_index, - bool pairwise, const u8 *mac_addr), - TP_ARGS(wiphy, netdev, key_index, pairwise, mac_addr), + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int link_id, + u8 key_index, bool pairwise, const u8 *mac_addr), + TP_ARGS(wiphy, netdev, link_id, key_index, pairwise, mac_addr), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(mac_addr) + __field(int, link_id) __field(u8, key_index) __field(bool, pairwise) ), @@ -448,34 +449,38 @@ DECLARE_EVENT_CLASS(key_handle, WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(mac_addr, mac_addr); + __entry->link_id = link_id; __entry->key_index = key_index; __entry->pairwise = pairwise; ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", key_index: %u, pairwise: %s, mac addr: " MAC_PR_FMT, - WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->key_index, - BOOL_TO_STR(__entry->pairwise), MAC_PR_ARG(mac_addr)) + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d, " + "key_index: %u, pairwise: %s, mac addr: " MAC_PR_FMT, + WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id, + __entry->key_index, BOOL_TO_STR(__entry->pairwise), + MAC_PR_ARG(mac_addr)) ); DEFINE_EVENT(key_handle, rdev_get_key, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 key_index, - bool pairwise, const u8 *mac_addr), - TP_ARGS(wiphy, netdev, key_index, pairwise, mac_addr) + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int link_id, + u8 key_index, bool pairwise, const u8 *mac_addr), + TP_ARGS(wiphy, netdev, link_id, key_index, pairwise, mac_addr) ); DEFINE_EVENT(key_handle, rdev_del_key, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 key_index, - bool pairwise, const u8 *mac_addr), - TP_ARGS(wiphy, netdev, key_index, pairwise, mac_addr) + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int link_id, + u8 key_index, bool pairwise, const u8 *mac_addr), + TP_ARGS(wiphy, netdev, link_id, key_index, pairwise, mac_addr) ); TRACE_EVENT(rdev_add_key, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 key_index, - bool pairwise, const u8 *mac_addr, u8 mode), - TP_ARGS(wiphy, netdev, key_index, pairwise, mac_addr, mode), + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int link_id, + u8 key_index, bool pairwise, const u8 *mac_addr, u8 mode), + TP_ARGS(wiphy, netdev, link_id, key_index, pairwise, mac_addr, mode), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(mac_addr) + __field(int, link_id) __field(u8, key_index) __field(bool, pairwise) __field(u8, mode) @@ -484,24 +489,27 @@ TRACE_EVENT(rdev_add_key, WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(mac_addr, mac_addr); + __entry->link_id = link_id; __entry->key_index = key_index; __entry->pairwise = pairwise; __entry->mode = mode; ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", key_index: %u, " - "mode: %u, pairwise: %s, mac addr: " MAC_PR_FMT, - WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->key_index, - __entry->mode, BOOL_TO_STR(__entry->pairwise), - MAC_PR_ARG(mac_addr)) + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d, " + "key_index: %u, mode: %u, pairwise: %s, " + "mac addr: " MAC_PR_FMT, + WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id, + __entry->key_index, __entry->mode, + BOOL_TO_STR(__entry->pairwise), MAC_PR_ARG(mac_addr)) ); TRACE_EVENT(rdev_set_default_key, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 key_index, - bool unicast, bool multicast), - TP_ARGS(wiphy, netdev, key_index, unicast, multicast), + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int link_id, + u8 key_index, bool unicast, bool multicast), + TP_ARGS(wiphy, netdev, link_id, key_index, unicast, multicast), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY + __field(int, link_id) __field(u8, key_index) __field(bool, unicast) __field(bool, multicast) @@ -509,48 +517,58 @@ TRACE_EVENT(rdev_set_default_key, TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; + __entry->link_id = link_id; __entry->key_index = key_index; __entry->unicast = unicast; __entry->multicast = multicast; ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", key index: %u, unicast: %s, multicast: %s", - WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->key_index, - BOOL_TO_STR(__entry->unicast), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d, " + "key index: %u, unicast: %s, multicast: %s", + WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id, + __entry->key_index, BOOL_TO_STR(__entry->unicast), BOOL_TO_STR(__entry->multicast)) ); TRACE_EVENT(rdev_set_default_mgmt_key, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 key_index), - TP_ARGS(wiphy, netdev, key_index), + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int link_id, + u8 key_index), + TP_ARGS(wiphy, netdev, link_id, key_index), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY + __field(int, link_id) __field(u8, key_index) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; + __entry->link_id = link_id; __entry->key_index = key_index; ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", key index: %u", - WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->key_index) + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d, " + "key index: %u", WIPHY_PR_ARG, NETDEV_PR_ARG, + __entry->link_id, __entry->key_index) ); TRACE_EVENT(rdev_set_default_beacon_key, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 key_index), - TP_ARGS(wiphy, netdev, key_index), + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int link_id, + u8 key_index), + TP_ARGS(wiphy, netdev, link_id, key_index), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY + __field(int, link_id) __field(u8, key_index) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; + __entry->link_id = link_id; __entry->key_index = key_index; ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", key index: %u", - WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->key_index) + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d, " + "key index: %u", WIPHY_PR_ARG, NETDEV_PR_ARG, + __entry->link_id, __entry->key_index) ); TRACE_EVENT(rdev_start_ap, diff --git a/net/wireless/util.c b/net/wireless/util.c index 2c127951764a..0b28d00ba8f5 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -935,13 +935,13 @@ void cfg80211_upload_connect_keys(struct wireless_dev *wdev) for (i = 0; i < CFG80211_MAX_WEP_KEYS; i++) { if (!wdev->connect_keys->params[i].cipher) continue; - if (rdev_add_key(rdev, dev, i, false, NULL, + if (rdev_add_key(rdev, dev, -1, i, false, NULL, &wdev->connect_keys->params[i])) { netdev_err(dev, "failed to set key %d\n", i); continue; } if (wdev->connect_keys->def == i && - rdev_set_default_key(rdev, dev, i, true, true)) { + rdev_set_default_key(rdev, dev, -1, i, true, true)) { netdev_err(dev, "failed to set defkey %d\n", i); continue; } diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 129d3bb91dfb..ddf340bfa07a 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -470,7 +470,7 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN)) err = -ENOENT; else - err = rdev_del_key(rdev, dev, idx, pairwise, + err = rdev_del_key(rdev, dev, -1, idx, pairwise, addr); } wdev->wext.connect.privacy = false; @@ -509,7 +509,7 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, if (wdev->connected || (wdev->iftype == NL80211_IFTYPE_ADHOC && wdev->u.ibss.current_bss)) - err = rdev_add_key(rdev, dev, idx, pairwise, addr, params); + err = rdev_add_key(rdev, dev, -1, idx, pairwise, addr, params); else if (params->cipher != WLAN_CIPHER_SUITE_WEP40 && params->cipher != WLAN_CIPHER_SUITE_WEP104) return -EINVAL; @@ -546,7 +546,8 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, __cfg80211_leave_ibss(rdev, wdev->netdev, true); rejoin = true; } - err = rdev_set_default_key(rdev, dev, idx, true, true); + err = rdev_set_default_key(rdev, dev, -1, idx, true, + true); } if (!err) { wdev->wext.default_key = idx; @@ -561,7 +562,7 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, if (wdev->connected || (wdev->iftype == NL80211_IFTYPE_ADHOC && wdev->u.ibss.current_bss)) - err = rdev_set_default_mgmt_key(rdev, dev, idx); + err = rdev_set_default_mgmt_key(rdev, dev, -1, idx); if (!err) wdev->wext.default_mgmt_key = idx; return err; @@ -632,7 +633,7 @@ static int cfg80211_wext_siwencode(struct net_device *dev, if (wdev->connected || (wdev->iftype == NL80211_IFTYPE_ADHOC && wdev->u.ibss.current_bss)) - err = rdev_set_default_key(rdev, dev, idx, true, + err = rdev_set_default_key(rdev, dev, -1, idx, true, true); if (!err) wdev->wext.default_key = idx; -- cgit v1.2.3-70-g09d2 From ccdde7c74ffd7e8bdd3cf685bbfa41231c8e3131 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 17 Aug 2022 11:17:01 +0200 Subject: wifi: mac80211: properly implement MLO key handling Implement key installation and lookup (on TX and RX) for MLO, so we can use multiple GTKs/IGTKs/BIGTKs. Co-authored-by: Ilan Peer Signed-off-by: Ilan Peer Signed-off-by: Johannes Berg --- include/net/mac80211.h | 2 + net/mac80211/cfg.c | 75 ++++++++++++++---- net/mac80211/ieee80211_i.h | 1 + net/mac80211/iface.c | 9 +++ net/mac80211/key.c | 190 +++++++++++++++++++++++++++++++-------------- net/mac80211/key.h | 13 +++- net/mac80211/rx.c | 44 ++++++----- net/mac80211/tx.c | 56 ++++++++++--- 8 files changed, 286 insertions(+), 104 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index f198af600b5e..7951400e509b 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1975,6 +1975,7 @@ enum ieee80211_key_flags { * - Temporal Authenticator Rx MIC Key (64 bits) * @icv_len: The ICV length for this key type * @iv_len: The IV length for this key type + * @link_id: the link ID for MLO, or -1 for non-MLO or pairwise keys */ struct ieee80211_key_conf { atomic64_t tx_pn; @@ -1984,6 +1985,7 @@ struct ieee80211_key_conf { u8 hw_key_idx; s8 keyidx; u16 flags; + s8 link_id; u8 keylen; u8 key[]; }; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index c4c5e2d44eb8..854becd00468 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -24,12 +24,18 @@ #include "wme.h" static struct ieee80211_link_data * -ieee80211_link_or_deflink(struct ieee80211_sub_if_data *sdata, int link_id) +ieee80211_link_or_deflink(struct ieee80211_sub_if_data *sdata, int link_id, + bool require_valid) { struct ieee80211_link_data *link; if (link_id < 0) { - if (sdata->vif.valid_links) + /* + * For keys, if sdata is not an MLD, we might not use + * the return value at all (if it's not a pairwise key), + * so in that case (require_valid==false) don't error. + */ + if (require_valid && sdata->vif.valid_links) return ERR_PTR(-EINVAL); return &sdata->deflink; @@ -456,6 +462,8 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, const u8 *mac_addr, struct key_params *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_link_data *link = + ieee80211_link_or_deflink(sdata, link_id, false); struct ieee80211_local *local = sdata->local; struct sta_info *sta = NULL; struct ieee80211_key *key; @@ -464,6 +472,9 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, if (!ieee80211_sdata_running(sdata)) return -ENETDOWN; + if (IS_ERR(link)) + return PTR_ERR(link); + if (pairwise && params->mode == NL80211_KEY_SET_TX) return ieee80211_set_tx(sdata, mac_addr, key_idx); @@ -472,6 +483,8 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, case WLAN_CIPHER_SUITE_WEP40: case WLAN_CIPHER_SUITE_TKIP: case WLAN_CIPHER_SUITE_WEP104: + if (link_id >= 0) + return -EINVAL; if (WARN_ON_ONCE(fips_enabled)) return -EINVAL; break; @@ -484,6 +497,8 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, if (IS_ERR(key)) return PTR_ERR(key); + key->conf.link_id = link_id; + if (pairwise) key->conf.flags |= IEEE80211_KEY_FLAG_PAIRWISE; @@ -545,7 +560,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, break; } - err = ieee80211_key_link(key, sdata, sta); + err = ieee80211_key_link(key, link, sta); out_unlock: mutex_unlock(&local->sta_mtx); @@ -554,18 +569,37 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, } static struct ieee80211_key * -ieee80211_lookup_key(struct ieee80211_sub_if_data *sdata, +ieee80211_lookup_key(struct ieee80211_sub_if_data *sdata, int link_id, u8 key_idx, bool pairwise, const u8 *mac_addr) { struct ieee80211_local *local = sdata->local; + struct ieee80211_link_data *link = &sdata->deflink; struct ieee80211_key *key; - struct sta_info *sta; + + if (link_id >= 0) { + link = rcu_dereference_check(sdata->link[link_id], + lockdep_is_held(&sdata->wdev.mtx)); + if (!link) + return NULL; + } if (mac_addr) { + struct sta_info *sta; + struct link_sta_info *link_sta; + sta = sta_info_get_bss(sdata, mac_addr); if (!sta) return NULL; + if (link_id >= 0) { + link_sta = rcu_dereference_check(sta->link[link_id], + lockdep_is_held(&local->sta_mtx)); + if (!link_sta) + return NULL; + } else { + link_sta = &sta->deflink; + } + if (pairwise && key_idx < NUM_DEFAULT_KEYS) return rcu_dereference_check_key_mtx(local, sta->ptk[key_idx]); @@ -575,7 +609,7 @@ ieee80211_lookup_key(struct ieee80211_sub_if_data *sdata, NUM_DEFAULT_MGMT_KEYS + NUM_DEFAULT_BEACON_KEYS) return rcu_dereference_check_key_mtx(local, - sta->deflink.gtk[key_idx]); + link_sta->gtk[key_idx]); return NULL; } @@ -584,7 +618,7 @@ ieee80211_lookup_key(struct ieee80211_sub_if_data *sdata, return rcu_dereference_check_key_mtx(local, sdata->keys[key_idx]); - key = rcu_dereference_check_key_mtx(local, sdata->deflink.gtk[key_idx]); + key = rcu_dereference_check_key_mtx(local, link->gtk[key_idx]); if (key) return key; @@ -607,7 +641,7 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, mutex_lock(&local->sta_mtx); mutex_lock(&local->key_mtx); - key = ieee80211_lookup_key(sdata, key_idx, pairwise, mac_addr); + key = ieee80211_lookup_key(sdata, link_id, key_idx, pairwise, mac_addr); if (!key) { ret = -ENOENT; goto out_unlock; @@ -643,7 +677,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, rcu_read_lock(); - key = ieee80211_lookup_key(sdata, key_idx, pairwise, mac_addr); + key = ieee80211_lookup_key(sdata, link_id, key_idx, pairwise, mac_addr); if (!key) goto out; @@ -734,8 +768,13 @@ static int ieee80211_config_default_key(struct wiphy *wiphy, bool multi) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_link_data *link = + ieee80211_link_or_deflink(sdata, link_id, false); - ieee80211_set_default_key(sdata, key_idx, uni, multi); + if (IS_ERR(link)) + return PTR_ERR(link); + + ieee80211_set_default_key(link, key_idx, uni, multi); return 0; } @@ -745,8 +784,13 @@ static int ieee80211_config_default_mgmt_key(struct wiphy *wiphy, int link_id, u8 key_idx) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_link_data *link = + ieee80211_link_or_deflink(sdata, link_id, true); - ieee80211_set_default_mgmt_key(sdata, key_idx); + if (IS_ERR(link)) + return PTR_ERR(link); + + ieee80211_set_default_mgmt_key(link, key_idx); return 0; } @@ -756,8 +800,13 @@ static int ieee80211_config_default_beacon_key(struct wiphy *wiphy, int link_id, u8 key_idx) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_link_data *link = + ieee80211_link_or_deflink(sdata, link_id, true); + + if (IS_ERR(link)) + return PTR_ERR(link); - ieee80211_set_default_beacon_key(sdata, key_idx); + ieee80211_set_default_beacon_key(link, key_idx); return 0; } @@ -2588,7 +2637,7 @@ static int ieee80211_set_txq_params(struct wiphy *wiphy, struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_link_data *link = - ieee80211_link_or_deflink(sdata, params->link_id); + ieee80211_link_or_deflink(sdata, params->link_id, true); struct ieee80211_tx_queue_params p; if (!local->ops->conf_tx) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index e192e1ec0261..6313c49e5682 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -213,6 +213,7 @@ struct ieee80211_rx_data { struct ieee80211_sub_if_data *sdata; struct ieee80211_link_data *link; struct sta_info *sta; + struct link_sta_info *link_sta; struct ieee80211_key *key; unsigned int flags; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 3c30e1219861..b6e581fc9a40 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -434,10 +434,19 @@ struct link_container { static void ieee80211_free_links(struct ieee80211_sub_if_data *sdata, struct link_container **links) { + LIST_HEAD(keys); unsigned int link_id; + for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { + if (!links[link_id]) + continue; + ieee80211_remove_link_keys(&links[link_id]->data, &keys); + } + synchronize_rcu(); + ieee80211_free_key_list(sdata->local, &keys); + for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { if (!links[link_id]) continue; diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 6befb578ed9e..86aac87e0211 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -344,9 +344,10 @@ static void ieee80211_pairwise_rekey(struct ieee80211_key *old, } } -static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, +static void __ieee80211_set_default_key(struct ieee80211_link_data *link, int idx, bool uni, bool multi) { + struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_key *key = NULL; assert_key_lock(sdata->local); @@ -354,7 +355,7 @@ static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, if (idx >= 0 && idx < NUM_DEFAULT_KEYS) { key = key_mtx_dereference(sdata->local, sdata->keys[idx]); if (!key) - key = key_mtx_dereference(sdata->local, sdata->deflink.gtk[idx]); + key = key_mtx_dereference(sdata->local, link->gtk[idx]); } if (uni) { @@ -365,47 +366,48 @@ static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, } if (multi) - rcu_assign_pointer(sdata->deflink.default_multicast_key, key); + rcu_assign_pointer(link->default_multicast_key, key); ieee80211_debugfs_key_update_default(sdata); } -void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx, +void ieee80211_set_default_key(struct ieee80211_link_data *link, int idx, bool uni, bool multi) { - mutex_lock(&sdata->local->key_mtx); - __ieee80211_set_default_key(sdata, idx, uni, multi); - mutex_unlock(&sdata->local->key_mtx); + mutex_lock(&link->sdata->local->key_mtx); + __ieee80211_set_default_key(link, idx, uni, multi); + mutex_unlock(&link->sdata->local->key_mtx); } static void -__ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata, int idx) +__ieee80211_set_default_mgmt_key(struct ieee80211_link_data *link, int idx) { + struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_key *key = NULL; assert_key_lock(sdata->local); if (idx >= NUM_DEFAULT_KEYS && idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS) - key = key_mtx_dereference(sdata->local, - sdata->deflink.gtk[idx]); + key = key_mtx_dereference(sdata->local, link->gtk[idx]); - rcu_assign_pointer(sdata->deflink.default_mgmt_key, key); + rcu_assign_pointer(link->default_mgmt_key, key); ieee80211_debugfs_key_update_default(sdata); } -void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata, +void ieee80211_set_default_mgmt_key(struct ieee80211_link_data *link, int idx) { - mutex_lock(&sdata->local->key_mtx); - __ieee80211_set_default_mgmt_key(sdata, idx); - mutex_unlock(&sdata->local->key_mtx); + mutex_lock(&link->sdata->local->key_mtx); + __ieee80211_set_default_mgmt_key(link, idx); + mutex_unlock(&link->sdata->local->key_mtx); } static void -__ieee80211_set_default_beacon_key(struct ieee80211_sub_if_data *sdata, int idx) +__ieee80211_set_default_beacon_key(struct ieee80211_link_data *link, int idx) { + struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_key *key = NULL; assert_key_lock(sdata->local); @@ -413,28 +415,30 @@ __ieee80211_set_default_beacon_key(struct ieee80211_sub_if_data *sdata, int idx) if (idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS && idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS + NUM_DEFAULT_BEACON_KEYS) - key = key_mtx_dereference(sdata->local, - sdata->deflink.gtk[idx]); + key = key_mtx_dereference(sdata->local, link->gtk[idx]); - rcu_assign_pointer(sdata->deflink.default_beacon_key, key); + rcu_assign_pointer(link->default_beacon_key, key); ieee80211_debugfs_key_update_default(sdata); } -void ieee80211_set_default_beacon_key(struct ieee80211_sub_if_data *sdata, +void ieee80211_set_default_beacon_key(struct ieee80211_link_data *link, int idx) { - mutex_lock(&sdata->local->key_mtx); - __ieee80211_set_default_beacon_key(sdata, idx); - mutex_unlock(&sdata->local->key_mtx); + mutex_lock(&link->sdata->local->key_mtx); + __ieee80211_set_default_beacon_key(link, idx); + mutex_unlock(&link->sdata->local->key_mtx); } static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, - struct sta_info *sta, - bool pairwise, - struct ieee80211_key *old, - struct ieee80211_key *new) + struct ieee80211_link_data *link, + struct sta_info *sta, + bool pairwise, + struct ieee80211_key *old, + struct ieee80211_key *new) { + struct link_sta_info *link_sta = sta ? &sta->deflink : NULL; + int link_id; int idx; int ret = 0; bool defunikey, defmultikey, defmgmtkey, defbeaconkey; @@ -446,13 +450,36 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, if (new) { idx = new->conf.keyidx; - list_add_tail_rcu(&new->list, &sdata->key_list); is_wep = new->conf.cipher == WLAN_CIPHER_SUITE_WEP40 || new->conf.cipher == WLAN_CIPHER_SUITE_WEP104; + link_id = new->conf.link_id; } else { idx = old->conf.keyidx; is_wep = old->conf.cipher == WLAN_CIPHER_SUITE_WEP40 || old->conf.cipher == WLAN_CIPHER_SUITE_WEP104; + link_id = old->conf.link_id; + } + + if (WARN(old && old->conf.link_id != link_id, + "old link ID %d doesn't match new link ID %d\n", + old->conf.link_id, link_id)) + return -EINVAL; + + if (link_id >= 0) { + if (!link) { + link = sdata_dereference(sdata->link[link_id], sdata); + if (!link) + return -ENOLINK; + } + + if (sta) { + link_sta = rcu_dereference_protected(sta->link[link_id], + lockdep_is_held(&sta->local->sta_mtx)); + if (!link_sta) + return -ENOLINK; + } + } else { + link = &sdata->deflink; } if ((is_wep || pairwise) && idx >= NUM_DEFAULT_KEYS) @@ -482,6 +509,9 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, if (ret) return ret; + if (new) + list_add_tail_rcu(&new->list, &sdata->key_list); + if (sta) { if (pairwise) { rcu_assign_pointer(sta->ptk[idx], new); @@ -489,7 +519,7 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, !(new->conf.flags & IEEE80211_KEY_FLAG_NO_AUTO_TX)) _ieee80211_set_tx_key(new, true); } else { - rcu_assign_pointer(sta->deflink.gtk[idx], new); + rcu_assign_pointer(link_sta->gtk[idx], new); } /* Only needed for transition from no key -> key. * Still triggers unnecessary when using Extended Key ID @@ -503,39 +533,39 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, sdata->default_unicast_key); defmultikey = old && old == key_mtx_dereference(sdata->local, - sdata->deflink.default_multicast_key); + link->default_multicast_key); defmgmtkey = old && old == key_mtx_dereference(sdata->local, - sdata->deflink.default_mgmt_key); + link->default_mgmt_key); defbeaconkey = old && old == key_mtx_dereference(sdata->local, - sdata->deflink.default_beacon_key); + link->default_beacon_key); if (defunikey && !new) - __ieee80211_set_default_key(sdata, -1, true, false); + __ieee80211_set_default_key(link, -1, true, false); if (defmultikey && !new) - __ieee80211_set_default_key(sdata, -1, false, true); + __ieee80211_set_default_key(link, -1, false, true); if (defmgmtkey && !new) - __ieee80211_set_default_mgmt_key(sdata, -1); + __ieee80211_set_default_mgmt_key(link, -1); if (defbeaconkey && !new) - __ieee80211_set_default_beacon_key(sdata, -1); + __ieee80211_set_default_beacon_key(link, -1); if (is_wep || pairwise) rcu_assign_pointer(sdata->keys[idx], new); else - rcu_assign_pointer(sdata->deflink.gtk[idx], new); + rcu_assign_pointer(link->gtk[idx], new); if (defunikey && new) - __ieee80211_set_default_key(sdata, new->conf.keyidx, + __ieee80211_set_default_key(link, new->conf.keyidx, true, false); if (defmultikey && new) - __ieee80211_set_default_key(sdata, new->conf.keyidx, + __ieee80211_set_default_key(link, new->conf.keyidx, false, true); if (defmgmtkey && new) - __ieee80211_set_default_mgmt_key(sdata, + __ieee80211_set_default_mgmt_key(link, new->conf.keyidx); if (defbeaconkey && new) - __ieee80211_set_default_beacon_key(sdata, + __ieee80211_set_default_beacon_key(link, new->conf.keyidx); } @@ -569,6 +599,7 @@ ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, key->conf.flags = 0; key->flags = 0; + key->conf.link_id = -1; key->conf.cipher = cipher; key->conf.keyidx = idx; key->conf.keylen = key_len; @@ -797,9 +828,10 @@ static bool ieee80211_key_identical(struct ieee80211_sub_if_data *sdata, } int ieee80211_key_link(struct ieee80211_key *key, - struct ieee80211_sub_if_data *sdata, + struct ieee80211_link_data *link, struct sta_info *sta) { + struct ieee80211_sub_if_data *sdata = link->sdata; static atomic_t key_color = ATOMIC_INIT(0); struct ieee80211_key *old_key = NULL; int idx = key->conf.keyidx; @@ -827,15 +859,24 @@ int ieee80211_key_link(struct ieee80211_key *key, (old_key && old_key->conf.cipher != key->conf.cipher)) goto out; } else if (sta) { - old_key = key_mtx_dereference(sdata->local, - sta->deflink.gtk[idx]); + struct link_sta_info *link_sta = &sta->deflink; + int link_id = key->conf.link_id; + + if (link_id >= 0) { + link_sta = rcu_dereference_protected(sta->link[link_id], + lockdep_is_held(&sta->local->sta_mtx)); + if (!link_sta) + return -ENOLINK; + } + + old_key = key_mtx_dereference(sdata->local, link_sta->gtk[idx]); } else { if (idx < NUM_DEFAULT_KEYS) old_key = key_mtx_dereference(sdata->local, sdata->keys[idx]); if (!old_key) old_key = key_mtx_dereference(sdata->local, - sdata->deflink.gtk[idx]); + link->gtk[idx]); } /* Non-pairwise keys must also not switch the cipher on rekey */ @@ -866,7 +907,7 @@ int ieee80211_key_link(struct ieee80211_key *key, increment_tailroom_need_count(sdata); - ret = ieee80211_key_replace(sdata, sta, pairwise, old_key, key); + ret = ieee80211_key_replace(sdata, link, sta, pairwise, old_key, key); if (!ret) { ieee80211_debugfs_key_add(key); @@ -890,9 +931,9 @@ void ieee80211_key_free(struct ieee80211_key *key, bool delay_tailroom) * Replace key with nothingness if it was ever used. */ if (key->sdata) - ieee80211_key_replace(key->sdata, key->sta, - key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE, - key, NULL); + ieee80211_key_replace(key->sdata, NULL, key->sta, + key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE, + key, NULL); ieee80211_key_destroy(key, delay_tailroom); } @@ -1019,15 +1060,45 @@ static void ieee80211_free_keys_iface(struct ieee80211_sub_if_data *sdata, ieee80211_debugfs_key_remove_beacon_default(sdata); list_for_each_entry_safe(key, tmp, &sdata->key_list, list) { - ieee80211_key_replace(key->sdata, key->sta, - key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE, - key, NULL); + ieee80211_key_replace(key->sdata, NULL, key->sta, + key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE, + key, NULL); list_add_tail(&key->list, keys); } ieee80211_debugfs_key_update_default(sdata); } +void ieee80211_remove_link_keys(struct ieee80211_link_data *link, + struct list_head *keys) +{ + struct ieee80211_sub_if_data *sdata = link->sdata; + struct ieee80211_local *local = sdata->local; + struct ieee80211_key *key, *tmp; + + mutex_lock(&local->key_mtx); + list_for_each_entry_safe(key, tmp, &sdata->key_list, list) { + if (key->conf.link_id != link->link_id) + continue; + ieee80211_key_replace(key->sdata, link, key->sta, + key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE, + key, NULL); + list_add_tail(&key->list, keys); + } + mutex_unlock(&local->key_mtx); +} + +void ieee80211_free_key_list(struct ieee80211_local *local, + struct list_head *keys) +{ + struct ieee80211_key *key, *tmp; + + mutex_lock(&local->key_mtx); + list_for_each_entry_safe(key, tmp, keys, list) + __ieee80211_key_destroy(key, false); + mutex_unlock(&local->key_mtx); +} + void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata, bool force_synchronize) { @@ -1087,9 +1158,9 @@ void ieee80211_free_sta_keys(struct ieee80211_local *local, key = key_mtx_dereference(local, sta->deflink.gtk[i]); if (!key) continue; - ieee80211_key_replace(key->sdata, key->sta, - key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE, - key, NULL); + ieee80211_key_replace(key->sdata, NULL, key->sta, + key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE, + key, NULL); __ieee80211_key_destroy(key, key->sdata->vif.type == NL80211_IFTYPE_STATION); } @@ -1098,9 +1169,9 @@ void ieee80211_free_sta_keys(struct ieee80211_local *local, key = key_mtx_dereference(local, sta->ptk[i]); if (!key) continue; - ieee80211_key_replace(key->sdata, key->sta, - key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE, - key, NULL); + ieee80211_key_replace(key->sdata, NULL, key->sta, + key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE, + key, NULL); __ieee80211_key_destroy(key, key->sdata->vif.type == NL80211_IFTYPE_STATION); } @@ -1307,7 +1378,8 @@ ieee80211_gtk_rekey_add(struct ieee80211_vif *vif, if (sdata->u.mgd.mfp != IEEE80211_MFP_DISABLED) key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT; - err = ieee80211_key_link(key, sdata, NULL); + /* FIXME: this function needs to get a link ID */ + err = ieee80211_key_link(key, &sdata->deflink, NULL); if (err) return ERR_PTR(err); diff --git a/net/mac80211/key.h b/net/mac80211/key.h index e994dcea1ce3..518af24aab56 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -22,6 +22,7 @@ struct ieee80211_local; struct ieee80211_sub_if_data; +struct ieee80211_link_data; struct sta_info; /** @@ -144,17 +145,21 @@ ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, * to make it used, free old key. On failure, also free the new key. */ int ieee80211_key_link(struct ieee80211_key *key, - struct ieee80211_sub_if_data *sdata, + struct ieee80211_link_data *link, struct sta_info *sta); int ieee80211_set_tx_key(struct ieee80211_key *key); void ieee80211_key_free(struct ieee80211_key *key, bool delay_tailroom); void ieee80211_key_free_unused(struct ieee80211_key *key); -void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx, +void ieee80211_set_default_key(struct ieee80211_link_data *link, int idx, bool uni, bool multi); -void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata, +void ieee80211_set_default_mgmt_key(struct ieee80211_link_data *link, int idx); -void ieee80211_set_default_beacon_key(struct ieee80211_sub_if_data *sdata, +void ieee80211_set_default_beacon_key(struct ieee80211_link_data *link, int idx); +void ieee80211_remove_link_keys(struct ieee80211_link_data *link, + struct list_head *keys); +void ieee80211_free_key_list(struct ieee80211_local *local, + struct list_head *keys); void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata, bool force_synchronize); void ieee80211_free_sta_keys(struct ieee80211_local *local, diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 57df21e2170a..aad617931fad 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1854,7 +1854,6 @@ static struct ieee80211_key * ieee80211_rx_get_bigtk(struct ieee80211_rx_data *rx, int idx) { struct ieee80211_key *key = NULL; - struct ieee80211_sub_if_data *sdata = rx->sdata; int idx2; /* Make sure key gets set if either BIGTK key index is set so that @@ -1873,14 +1872,14 @@ ieee80211_rx_get_bigtk(struct ieee80211_rx_data *rx, int idx) idx2 = idx - 1; } - if (rx->sta) - key = rcu_dereference(rx->sta->deflink.gtk[idx]); + if (rx->link_sta) + key = rcu_dereference(rx->link_sta->gtk[idx]); if (!key) - key = rcu_dereference(sdata->deflink.gtk[idx]); - if (!key && rx->sta) - key = rcu_dereference(rx->sta->deflink.gtk[idx2]); + key = rcu_dereference(rx->link->gtk[idx]); + if (!key && rx->link_sta) + key = rcu_dereference(rx->link_sta->gtk[idx2]); if (!key) - key = rcu_dereference(sdata->deflink.gtk[idx2]); + key = rcu_dereference(rx->link->gtk[idx2]); return key; } @@ -1986,15 +1985,15 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) if (mmie_keyidx < NUM_DEFAULT_KEYS || mmie_keyidx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS) return RX_DROP_MONITOR; /* unexpected BIP keyidx */ - if (rx->sta) { + if (rx->link_sta) { if (ieee80211_is_group_privacy_action(skb) && test_sta_flag(rx->sta, WLAN_STA_MFP)) return RX_DROP_MONITOR; - rx->key = rcu_dereference(rx->sta->deflink.gtk[mmie_keyidx]); + rx->key = rcu_dereference(rx->link_sta->gtk[mmie_keyidx]); } if (!rx->key) - rx->key = rcu_dereference(rx->sdata->deflink.gtk[mmie_keyidx]); + rx->key = rcu_dereference(rx->link->gtk[mmie_keyidx]); } else if (!ieee80211_has_protected(fc)) { /* * The frame was not protected, so skip decryption. However, we @@ -2003,25 +2002,24 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) * have been expected. */ struct ieee80211_key *key = NULL; - struct ieee80211_sub_if_data *sdata = rx->sdata; int i; if (ieee80211_is_beacon(fc)) { key = ieee80211_rx_get_bigtk(rx, -1); } else if (ieee80211_is_mgmt(fc) && is_multicast_ether_addr(hdr->addr1)) { - key = rcu_dereference(rx->sdata->deflink.default_mgmt_key); + key = rcu_dereference(rx->link->default_mgmt_key); } else { - if (rx->sta) { + if (rx->link_sta) { for (i = 0; i < NUM_DEFAULT_KEYS; i++) { - key = rcu_dereference(rx->sta->deflink.gtk[i]); + key = rcu_dereference(rx->link_sta->gtk[i]); if (key) break; } } if (!key) { for (i = 0; i < NUM_DEFAULT_KEYS; i++) { - key = rcu_dereference(sdata->deflink.gtk[i]); + key = rcu_dereference(rx->link->gtk[i]); if (key) break; } @@ -2050,13 +2048,13 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) return RX_DROP_UNUSABLE; /* check per-station GTK first, if multicast packet */ - if (is_multicast_ether_addr(hdr->addr1) && rx->sta) - rx->key = rcu_dereference(rx->sta->deflink.gtk[keyidx]); + if (is_multicast_ether_addr(hdr->addr1) && rx->link_sta) + rx->key = rcu_dereference(rx->link_sta->gtk[keyidx]); /* if not found, try default key */ if (!rx->key) { if (is_multicast_ether_addr(hdr->addr1)) - rx->key = rcu_dereference(rx->sdata->deflink.gtk[keyidx]); + rx->key = rcu_dereference(rx->link->gtk[keyidx]); if (!rx->key) rx->key = rcu_dereference(rx->sdata->keys[keyidx]); @@ -4769,7 +4767,17 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx, if (!link) return true; rx->link = link; + + if (rx->sta) { + rx->link_sta = + rcu_dereference(rx->sta->link[rx->link_id]); + if (!rx->link_sta) + return true; + } } else { + if (rx->sta) + rx->link_sta = &rx->sta->deflink; + rx->link = &sdata->deflink; } diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 45df9932d0ba..8683f24aaec5 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -576,6 +576,51 @@ ieee80211_tx_h_check_control_port_protocol(struct ieee80211_tx_data *tx) return TX_CONTINUE; } +static struct ieee80211_key * +ieee80211_select_link_key(struct ieee80211_tx_data *tx) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); + enum { + USE_NONE, + USE_MGMT_KEY, + USE_MCAST_KEY, + } which_key = USE_NONE; + struct ieee80211_link_data *link; + unsigned int link_id; + + if (ieee80211_is_group_privacy_action(tx->skb)) + which_key = USE_MCAST_KEY; + else if (ieee80211_is_mgmt(hdr->frame_control) && + is_multicast_ether_addr(hdr->addr1) && + ieee80211_is_robust_mgmt_frame(tx->skb)) + which_key = USE_MGMT_KEY; + else if (is_multicast_ether_addr(hdr->addr1)) + which_key = USE_MCAST_KEY; + else + return NULL; + + link_id = u32_get_bits(info->control.flags, IEEE80211_TX_CTRL_MLO_LINK); + if (link_id == IEEE80211_LINK_UNSPECIFIED) { + link = &tx->sdata->deflink; + } else { + link = rcu_dereference(tx->sdata->link[link_id]); + if (!link) + return NULL; + } + + switch (which_key) { + case USE_NONE: + break; + case USE_MGMT_KEY: + return rcu_dereference(link->default_mgmt_key); + case USE_MCAST_KEY: + return rcu_dereference(link->default_multicast_key); + } + + return NULL; +} + static ieee80211_tx_result debug_noinline ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) { @@ -591,16 +636,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) if (tx->sta && (key = rcu_dereference(tx->sta->ptk[tx->sta->ptk_idx]))) tx->key = key; - else if (ieee80211_is_group_privacy_action(tx->skb) && - (key = rcu_dereference(tx->sdata->deflink.default_multicast_key))) - tx->key = key; - else if (ieee80211_is_mgmt(hdr->frame_control) && - is_multicast_ether_addr(hdr->addr1) && - ieee80211_is_robust_mgmt_frame(tx->skb) && - (key = rcu_dereference(tx->sdata->deflink.default_mgmt_key))) - tx->key = key; - else if (is_multicast_ether_addr(hdr->addr1) && - (key = rcu_dereference(tx->sdata->deflink.default_multicast_key))) + else if ((key = ieee80211_select_link_key(tx))) tx->key = key; else if (!is_multicast_ether_addr(hdr->addr1) && (key = rcu_dereference(tx->sdata->default_unicast_key))) -- cgit v1.2.3-70-g09d2 From ea9d807b56428d65cf43030cbd7ae5a580077147 Mon Sep 17 00:00:00 2001 From: Vasanthakumar Thiagarajan Date: Wed, 17 Aug 2022 16:12:12 +0530 Subject: wifi: mac80211: add link information in ieee80211_rx_status In MLO, when the address translation from link to MLD is done in fw/hw, it is necessary to be able to have some information on the link on which the frame has been received. Extend the rx API to include link_id and a valid flag in ieee80211_rx_status. Also make chanes to mac80211 rx APIs to make use of the reported link_id after sanity checks. Signed-off-by: Vasanthakumar Thiagarajan Link: https://lore.kernel.org/r/20220817104213.2531-2-quic_vthiagar@quicinc.com Signed-off-by: Johannes Berg --- include/net/mac80211.h | 5 +++ net/mac80211/rx.c | 95 +++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 99 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 7951400e509b..b38927e33d10 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1480,6 +1480,10 @@ enum mac80211_rx_encoding { * each A-MPDU but the same for each subframe within one A-MPDU * @ampdu_delimiter_crc: A-MPDU delimiter CRC * @zero_length_psdu_type: radiotap type of the 0-length PSDU + * @link_valid: if the link which is identified by @link_id is valid. This flag + * is set only when connection is MLO. + * @link_id: id of the link used to receive the packet. This is used along with + * @link_valid. */ struct ieee80211_rx_status { u64 mactime; @@ -1504,6 +1508,7 @@ struct ieee80211_rx_status { s8 chain_signal[IEEE80211_MAX_CHAINS]; u8 ampdu_delimiter_crc; u8 zero_length_psdu_type; + u8 link_valid:1, link_id:4; }; static inline u32 diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index aad617931fad..3af0c4200e0b 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -4506,6 +4506,15 @@ void ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata) mutex_unlock(&local->sta_mtx); } +static bool +ieee80211_rx_is_valid_sta_link_id(struct ieee80211_sta *sta, u8 link_id) +{ + if (!sta->mlo) + return false; + + return !!(sta->valid_links & BIT(link_id)); +} + static void ieee80211_rx_8023(struct ieee80211_rx_data *rx, struct ieee80211_fast_rx *fast_rx, int orig_len) @@ -4835,6 +4844,7 @@ static void __ieee80211_rx_handle_8023(struct ieee80211_hw *hw, struct list_head *list) { struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_fast_rx *fast_rx; struct ieee80211_rx_data rx; @@ -4855,7 +4865,31 @@ static void __ieee80211_rx_handle_8023(struct ieee80211_hw *hw, rx.sta = container_of(pubsta, struct sta_info, sta); rx.sdata = rx.sta->sdata; - rx.link = &rx.sdata->deflink; + + if (status->link_valid && + !ieee80211_rx_is_valid_sta_link_id(pubsta, status->link_id)) + goto drop; + + /* + * TODO: Should the frame be dropped if the right link_id is not + * available? Or may be it is fine in the current form to proceed with + * the frame processing because with frame being in 802.3 format, + * link_id is used only for stats purpose and updating the stats on + * the deflink is fine? + */ + if (status->link_valid) + rx.link_id = status->link_id; + + if (rx.link_id >= 0) { + struct ieee80211_link_data *link; + + link = rcu_dereference(rx.sdata->link[rx.link_id]); + if (!link) + goto drop; + rx.link = link; + } else { + rx.link = &rx.sdata->deflink; + } fast_rx = rcu_dereference(rx.sta->fast_rx); if (!fast_rx) @@ -4885,7 +4919,19 @@ static bool ieee80211_rx_for_interface(struct ieee80211_rx_data *rx, rx->sta = link_sta->sta; rx->link_id = link_sta->link_id; } else { + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); + rx->sta = sta_info_get_bss(rx->sdata, hdr->addr2); + if (rx->sta) { + if (status->link_valid && + !ieee80211_rx_is_valid_sta_link_id(&rx->sta->sta, + status->link_id)) + return false; + + rx->link_id = status->link_valid ? status->link_id : -1; + } else { + rx->link_id = -1; + } } return ieee80211_prepare_and_rx_handle(rx, skb, consume); @@ -4901,6 +4947,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, struct list_head *list) { struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_sub_if_data *sdata; struct ieee80211_hdr *hdr; __le16 fc; @@ -4945,10 +4992,39 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, if (ieee80211_is_data(fc)) { struct sta_info *sta, *prev_sta; + u8 link_id = status->link_id; if (pubsta) { rx.sta = container_of(pubsta, struct sta_info, sta); rx.sdata = rx.sta->sdata; + + if (status->link_valid && + !ieee80211_rx_is_valid_sta_link_id(pubsta, link_id)) + goto out; + + if (status->link_valid) + rx.link_id = status->link_id; + + /* + * In MLO connection, fetch the link_id using addr2 + * when the driver does not pass link_id in status. + * When the address translation is already performed by + * driver/hw, the valid link_id must be passed in + * status. + */ + + if (!status->link_valid && pubsta->mlo) { + struct ieee80211_hdr *hdr = (void *)skb->data; + struct link_sta_info *link_sta; + + link_sta = link_sta_info_get_bss(rx.sdata, + hdr->addr2); + if (!link_sta) + goto out; + + rx.link_id = link_sta->link_id; + } + if (ieee80211_prepare_and_rx_handle(&rx, skb, true)) return; goto out; @@ -4962,6 +5038,13 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, continue; } + if ((status->link_valid && + !ieee80211_rx_is_valid_sta_link_id(&prev_sta->sta, + link_id)) || + (!status->link_valid && prev_sta->sta.mlo)) + continue; + + rx.link_id = status->link_valid ? link_id : -1; rx.sta = prev_sta; rx.sdata = prev_sta->sdata; ieee80211_prepare_and_rx_handle(&rx, skb, false); @@ -4970,6 +5053,13 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, } if (prev_sta) { + if ((status->link_valid && + !ieee80211_rx_is_valid_sta_link_id(&prev_sta->sta, + link_id)) || + (!status->link_valid && prev_sta->sta.mlo)) + goto out; + + rx.link_id = status->link_valid ? link_id : -1; rx.sta = prev_sta; rx.sdata = prev_sta->sdata; @@ -5112,6 +5202,9 @@ void ieee80211_rx_list(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, } } + if (WARN_ON_ONCE(status->link_id >= IEEE80211_LINK_UNSPECIFIED)) + goto drop; + status->rx_flags = 0; kcov_remote_start_common(skb_get_kcov_handle(skb)); -- cgit v1.2.3-70-g09d2 From c73993b865bf0b2ea1cbb3e9616f18a6508fc49c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 19 Aug 2022 13:12:38 +0200 Subject: wifi: mac80211: maintain link_id in link_sta To helper drivers if they e.g. have a lookup of the link_sta pointer, add the link ID to the link_sta structure. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 2 ++ net/mac80211/sta_info.c | 1 + 2 files changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index b38927e33d10..ffd0ebbff294 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2135,6 +2135,7 @@ struct ieee80211_sta_txpwr { * @addr: MAC address of the Link STA. For non-MLO STA this is same as the addr * in ieee80211_sta. For MLO Link STA this addr can be same or different * from addr in ieee80211_sta (representing MLD STA addr) + * @link_id: the link ID for this link STA (0 for deflink) * @supp_rates: Bitmap of supported rates * @ht_cap: HT capabilities of this STA; restricted to our own capabilities * @vht_cap: VHT capabilities of this STA; restricted to our own capabilities @@ -2151,6 +2152,7 @@ struct ieee80211_sta_txpwr { */ struct ieee80211_link_sta { u8 addr[ETH_ALEN]; + u8 link_id; u32 supp_rates[NUM_NL80211_BANDS]; struct ieee80211_sta_ht_cap ht_cap; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 05dc56811cb4..6649c2483e49 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -472,6 +472,7 @@ static void sta_info_add_link(struct sta_info *sta, link_info->sta = sta; link_info->link_id = link_id; link_info->pub = link_sta; + link_sta->link_id = link_id; rcu_assign_pointer(sta->link[link_id], link_info); rcu_assign_pointer(sta->sta.link[link_id], link_sta); } -- cgit v1.2.3-70-g09d2 From b8c9024e0ed03c5feb29bd1086a1eb799f3fff44 Mon Sep 17 00:00:00 2001 From: Veerendranath Jakkam Date: Fri, 22 Jul 2022 18:41:42 +0530 Subject: wifi: cfg80211: Add link_id to cfg80211_ch_switch_started_notify() Add link_id parameter to cfg80211_ch_switch_started_notify() to allow driver to indicate on which link channel switch started on MLD. Send the data to userspace so it knows as well. Signed-off-by: Veerendranath Jakkam Link: https://lore.kernel.org/r/20220722131143.3438042-1-quic_vjakkam@quicinc.com Link: https://lore.kernel.org/r/20220722131143.3438042-2-quic_vjakkam@quicinc.com [squash two patches] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 +++- net/mac80211/cfg.c | 2 +- net/mac80211/mlme.c | 2 +- net/wireless/nl80211.c | 18 ++++++++++++++---- net/wireless/trace.h | 11 +++++++---- 5 files changed, 26 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index ffbc65a9b00b..e09ff87146c1 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -8281,6 +8281,7 @@ void cfg80211_ch_switch_notify(struct net_device *dev, * cfg80211_ch_switch_started_notify - notify channel switch start * @dev: the device on which the channel switch started * @chandef: the future channel definition + * @link_id: the link ID for MLO, must be 0 for non-MLO * @count: the number of TBTTs until the channel switch happens * @quiet: whether or not immediate quiet was requested by the AP * @@ -8290,7 +8291,8 @@ void cfg80211_ch_switch_notify(struct net_device *dev, */ void cfg80211_ch_switch_started_notify(struct net_device *dev, struct cfg80211_chan_def *chandef, - u8 count, bool quiet); + unsigned int link_id, u8 count, + bool quiet); /** * ieee80211_operating_class_to_band - convert operating class to band diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 960d5c4a5940..b5522edbe05d 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3815,7 +3815,7 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, IEEE80211_QUEUE_STOP_REASON_CSA); cfg80211_ch_switch_started_notify(sdata->dev, - &sdata->deflink.csa_chandef, + &sdata->deflink.csa_chandef, 0, params->count, params->block_tx); if (changed) { diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 2dcb7f5c8397..82cf4ad8bf42 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1911,7 +1911,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, IEEE80211_QUEUE_STOP_REASON_CSA); mutex_unlock(&local->mtx); - cfg80211_ch_switch_started_notify(sdata->dev, &csa_ie.chandef, + cfg80211_ch_switch_started_notify(sdata->dev, &csa_ie.chandef, 0, csa_ie.count, csa_ie.mode); if (local->ops->channel_switch) { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 2baa2f2bc1ed..dad88d231d56 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -18932,11 +18932,13 @@ EXPORT_SYMBOL(cfg80211_pmksa_candidate_notify); static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev, struct net_device *netdev, + unsigned int link_id, struct cfg80211_chan_def *chandef, gfp_t gfp, enum nl80211_commands notif, u8 count, bool quiet) { + struct wireless_dev *wdev = netdev->ieee80211_ptr; struct sk_buff *msg; void *hdr; @@ -18953,6 +18955,10 @@ static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev, if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex)) goto nla_put_failure; + if (wdev->valid_links && + nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID, link_id)) + goto nla_put_failure; + if (nl80211_send_chandef(msg, chandef)) goto nla_put_failure; @@ -19012,22 +19018,26 @@ void cfg80211_ch_switch_notify(struct net_device *dev, cfg80211_sched_dfs_chan_update(rdev); - nl80211_ch_switch_notify(rdev, dev, chandef, GFP_KERNEL, + nl80211_ch_switch_notify(rdev, dev, link_id, chandef, GFP_KERNEL, NL80211_CMD_CH_SWITCH_NOTIFY, 0, false); } EXPORT_SYMBOL(cfg80211_ch_switch_notify); void cfg80211_ch_switch_started_notify(struct net_device *dev, struct cfg80211_chan_def *chandef, - u8 count, bool quiet) + unsigned int link_id, u8 count, + bool quiet) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); - trace_cfg80211_ch_switch_started_notify(dev, chandef); + ASSERT_WDEV_LOCK(wdev); + WARN_INVALID_LINK_ID(wdev, link_id); + + trace_cfg80211_ch_switch_started_notify(dev, chandef, link_id); - nl80211_ch_switch_notify(rdev, dev, chandef, GFP_KERNEL, + nl80211_ch_switch_notify(rdev, dev, link_id, chandef, GFP_KERNEL, NL80211_CMD_CH_SWITCH_STARTED_NOTIFY, count, quiet); } diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 001c00d9c5e7..a405c3edbc47 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -3263,18 +3263,21 @@ TRACE_EVENT(cfg80211_ch_switch_notify, TRACE_EVENT(cfg80211_ch_switch_started_notify, TP_PROTO(struct net_device *netdev, - struct cfg80211_chan_def *chandef), - TP_ARGS(netdev, chandef), + struct cfg80211_chan_def *chandef, + unsigned int link_id), + TP_ARGS(netdev, chandef, link_id), TP_STRUCT__entry( NETDEV_ENTRY CHAN_DEF_ENTRY + __field(unsigned int, link_id) ), TP_fast_assign( NETDEV_ASSIGN; CHAN_DEF_ASSIGN(chandef); + __entry->link_id = link_id; ), - TP_printk(NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT, - NETDEV_PR_ARG, CHAN_DEF_PR_ARG) + TP_printk(NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT ", link:%d", + NETDEV_PR_ARG, CHAN_DEF_PR_ARG, __entry->link_id) ); TRACE_EVENT(cfg80211_radar_event, -- cgit v1.2.3-70-g09d2 From c19d893fbf3f2f8fa864ae39652c7fee939edde2 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Wed, 24 Aug 2022 08:52:31 +0800 Subject: net: sched: delete duplicate cleanup of backlog and qlen qdisc_reset() is clearing qdisc->q.qlen and qdisc->qstats.backlog _after_ calling qdisc->ops->reset. There is no need to clear them again in the specific reset function. Signed-off-by: Zhengchao Shao Link: https://lore.kernel.org/r/20220824005231.345727-1-shaozhengchao@huawei.com Signed-off-by: Paolo Abeni --- include/net/sch_generic.h | 1 - net/sched/sch_atm.c | 1 - net/sched/sch_cbq.c | 1 - net/sched/sch_choke.c | 2 -- net/sched/sch_drr.c | 2 -- net/sched/sch_dsmark.c | 2 -- net/sched/sch_etf.c | 3 --- net/sched/sch_ets.c | 2 -- net/sched/sch_fq_codel.c | 2 -- net/sched/sch_fq_pie.c | 3 --- net/sched/sch_hfsc.c | 2 -- net/sched/sch_htb.c | 2 -- net/sched/sch_multiq.c | 1 - net/sched/sch_prio.c | 2 -- net/sched/sch_qfq.c | 2 -- net/sched/sch_red.c | 2 -- net/sched/sch_sfb.c | 2 -- net/sched/sch_skbprio.c | 3 --- net/sched/sch_taprio.c | 2 -- net/sched/sch_tbf.c | 2 -- net/sched/sch_teql.c | 1 - 21 files changed, 40 deletions(-) (limited to 'include/net') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index ec693fe7c553..f2958fb5ae08 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -1137,7 +1137,6 @@ static inline void __qdisc_reset_queue(struct qdisc_skb_head *qh) static inline void qdisc_reset_queue(struct Qdisc *sch) { __qdisc_reset_queue(&sch->q); - sch->qstats.backlog = 0; } static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new, diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 4c8e994cf0a5..816fd0d7ba38 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -577,7 +577,6 @@ static void atm_tc_reset(struct Qdisc *sch) pr_debug("atm_tc_reset(sch %p,[qdisc %p])\n", sch, p); list_for_each_entry(flow, &p->flows, list) qdisc_reset(flow->q); - sch->q.qlen = 0; } static void atm_tc_destroy(struct Qdisc *sch) diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 91a0dc463c48..ba99ce05cd52 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -975,7 +975,6 @@ cbq_reset(struct Qdisc *sch) cl->cpriority = cl->priority; } } - sch->q.qlen = 0; } diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index 2adbd945bf15..25d2daaa8122 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -315,8 +315,6 @@ static void choke_reset(struct Qdisc *sch) rtnl_qdisc_drop(skb, sch); } - sch->q.qlen = 0; - sch->qstats.backlog = 0; if (q->tab) memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *)); q->head = q->tail = 0; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 18e4f7a0b291..4e5b1cf11b85 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -441,8 +441,6 @@ static void drr_reset_qdisc(struct Qdisc *sch) qdisc_reset(cl->qdisc); } } - sch->qstats.backlog = 0; - sch->q.qlen = 0; } static void drr_destroy_qdisc(struct Qdisc *sch) diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 4c100d105269..7da6dc38a382 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -409,8 +409,6 @@ static void dsmark_reset(struct Qdisc *sch) pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); if (p->q) qdisc_reset(p->q); - sch->qstats.backlog = 0; - sch->q.qlen = 0; } static void dsmark_destroy(struct Qdisc *sch) diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c index c48f91075b5c..d96103b0e2bf 100644 --- a/net/sched/sch_etf.c +++ b/net/sched/sch_etf.c @@ -445,9 +445,6 @@ static void etf_reset(struct Qdisc *sch) timesortedlist_clear(sch); __qdisc_reset_queue(&sch->q); - sch->qstats.backlog = 0; - sch->q.qlen = 0; - q->last = 0; } diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index d73393493553..8de4365886e8 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -727,8 +727,6 @@ static void ets_qdisc_reset(struct Qdisc *sch) } for (band = 0; band < q->nbands; band++) qdisc_reset(q->classes[band].qdisc); - sch->qstats.backlog = 0; - sch->q.qlen = 0; } static void ets_qdisc_destroy(struct Qdisc *sch) diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 839e1235db05..23a042adb74d 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -347,8 +347,6 @@ static void fq_codel_reset(struct Qdisc *sch) codel_vars_init(&flow->cvars); } memset(q->backlogs, 0, q->flows_cnt * sizeof(u32)); - sch->q.qlen = 0; - sch->qstats.backlog = 0; q->memory_usage = 0; } diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index d6aba6edd16e..35c35465226b 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -521,9 +521,6 @@ static void fq_pie_reset(struct Qdisc *sch) INIT_LIST_HEAD(&flow->flowchain); pie_vars_init(&flow->vars); } - - sch->q.qlen = 0; - sch->qstats.backlog = 0; } static void fq_pie_destroy(struct Qdisc *sch) diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index d3979a6000e7..03efc40e42fc 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1484,8 +1484,6 @@ hfsc_reset_qdisc(struct Qdisc *sch) } q->eligible = RB_ROOT; qdisc_watchdog_cancel(&q->watchdog); - sch->qstats.backlog = 0; - sch->q.qlen = 0; } static void diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 23a9d6242429..cb5872d22ecf 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1008,8 +1008,6 @@ static void htb_reset(struct Qdisc *sch) } qdisc_watchdog_cancel(&q->watchdog); __qdisc_reset_queue(&q->direct_queue); - sch->q.qlen = 0; - sch->qstats.backlog = 0; memset(q->hlevel, 0, sizeof(q->hlevel)); memset(q->row_mask, 0, sizeof(q->row_mask)); } diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index cd8ab90c4765..f28050c7f12d 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -152,7 +152,6 @@ multiq_reset(struct Qdisc *sch) for (band = 0; band < q->bands; band++) qdisc_reset(q->queues[band]); - sch->q.qlen = 0; q->curband = 0; } diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 3b8d7197c06b..c03a11dd990f 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -135,8 +135,6 @@ prio_reset(struct Qdisc *sch) for (prio = 0; prio < q->bands; prio++) qdisc_reset(q->queues[prio]); - sch->qstats.backlog = 0; - sch->q.qlen = 0; } static int prio_offload(struct Qdisc *sch, struct tc_prio_qopt *qopt) diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index d4ce58c90f9f..13246a9dc5c1 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -1458,8 +1458,6 @@ static void qfq_reset_qdisc(struct Qdisc *sch) qdisc_reset(cl->qdisc); } } - sch->qstats.backlog = 0; - sch->q.qlen = 0; } static void qfq_destroy_qdisc(struct Qdisc *sch) diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 40adf1f07a82..f1e013e3f04a 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -176,8 +176,6 @@ static void red_reset(struct Qdisc *sch) struct red_sched_data *q = qdisc_priv(sch); qdisc_reset(q->qdisc); - sch->qstats.backlog = 0; - sch->q.qlen = 0; red_restart(&q->vars); } diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 3d061a13d7ed..31717fa45a4f 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -453,8 +453,6 @@ static void sfb_reset(struct Qdisc *sch) struct sfb_sched_data *q = qdisc_priv(sch); qdisc_reset(q->qdisc); - sch->qstats.backlog = 0; - sch->q.qlen = 0; q->slot = 0; q->double_buffering = false; sfb_zero_all_buckets(q); diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c index 7a5e4c454715..df72fb83d9c7 100644 --- a/net/sched/sch_skbprio.c +++ b/net/sched/sch_skbprio.c @@ -213,9 +213,6 @@ static void skbprio_reset(struct Qdisc *sch) struct skbprio_sched_data *q = qdisc_priv(sch); int prio; - sch->qstats.backlog = 0; - sch->q.qlen = 0; - for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++) __skb_queue_purge(&q->qdiscs[prio]); diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 0b941dd63d26..db88a692ef81 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1636,8 +1636,6 @@ static void taprio_reset(struct Qdisc *sch) if (q->qdiscs[i]) qdisc_reset(q->qdiscs[i]); } - sch->qstats.backlog = 0; - sch->q.qlen = 0; } static void taprio_destroy(struct Qdisc *sch) diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 72102277449e..d0288e223542 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -330,8 +330,6 @@ static void tbf_reset(struct Qdisc *sch) struct tbf_sched_data *q = qdisc_priv(sch); qdisc_reset(q->qdisc); - sch->qstats.backlog = 0; - sch->q.qlen = 0; q->t_c = ktime_get_ns(); q->tokens = q->buffer; q->ptokens = q->mtu; diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 1f447b77ce84..16f9238aa51d 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -124,7 +124,6 @@ teql_reset(struct Qdisc *sch) struct teql_sched_data *dat = qdisc_priv(sch); skb_queue_purge(&dat->q); - sch->q.qlen = 0; } static void -- cgit v1.2.3-70-g09d2 From bb67012331f7f07ff325877fbbb430bc515d371e Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 24 Aug 2022 14:20:09 +0200 Subject: net: devlink: extend info_get() version put to indicate a flash component Whenever the driver is called by his info_get() op, it may put multiple version names and values to the netlink message. Extend by additional helper devlink_info_version_running/stored_put_ext() that allows to specify a version type that indicates when particular version name represents a flash component. This is going to be used in follow-up patch calling info_get() during flash update command checking if version with this the version type exists. Signed-off-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 16 ++++++++++++++++ net/core/devlink.c | 34 ++++++++++++++++++++++++++++++---- 2 files changed, 46 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/devlink.h b/include/net/devlink.h index 119ed1ffb988..f50a002e5023 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1714,15 +1714,31 @@ int devlink_info_driver_name_put(struct devlink_info_req *req, const char *name); int devlink_info_board_serial_number_put(struct devlink_info_req *req, const char *bsn); + +enum devlink_info_version_type { + DEVLINK_INFO_VERSION_TYPE_NONE, + DEVLINK_INFO_VERSION_TYPE_COMPONENT, /* May be used as flash update + * component by name. + */ +}; + int devlink_info_version_fixed_put(struct devlink_info_req *req, const char *version_name, const char *version_value); int devlink_info_version_stored_put(struct devlink_info_req *req, const char *version_name, const char *version_value); +int devlink_info_version_stored_put_ext(struct devlink_info_req *req, + const char *version_name, + const char *version_value, + enum devlink_info_version_type version_type); int devlink_info_version_running_put(struct devlink_info_req *req, const char *version_name, const char *version_value); +int devlink_info_version_running_put_ext(struct devlink_info_req *req, + const char *version_name, + const char *version_value, + enum devlink_info_version_type version_type); int devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg); int devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg); diff --git a/net/core/devlink.c b/net/core/devlink.c index b50bcc18b8d9..43c75b5ac039 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -6579,7 +6579,8 @@ EXPORT_SYMBOL_GPL(devlink_info_board_serial_number_put); static int devlink_info_version_put(struct devlink_info_req *req, int attr, const char *version_name, - const char *version_value) + const char *version_value, + enum devlink_info_version_type version_type) { struct nlattr *nest; int err; @@ -6612,7 +6613,8 @@ int devlink_info_version_fixed_put(struct devlink_info_req *req, const char *version_value) { return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_FIXED, - version_name, version_value); + version_name, version_value, + DEVLINK_INFO_VERSION_TYPE_NONE); } EXPORT_SYMBOL_GPL(devlink_info_version_fixed_put); @@ -6621,19 +6623,43 @@ int devlink_info_version_stored_put(struct devlink_info_req *req, const char *version_value) { return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_STORED, - version_name, version_value); + version_name, version_value, + DEVLINK_INFO_VERSION_TYPE_NONE); } EXPORT_SYMBOL_GPL(devlink_info_version_stored_put); +int devlink_info_version_stored_put_ext(struct devlink_info_req *req, + const char *version_name, + const char *version_value, + enum devlink_info_version_type version_type) +{ + return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_STORED, + version_name, version_value, + version_type); +} +EXPORT_SYMBOL_GPL(devlink_info_version_stored_put_ext); + int devlink_info_version_running_put(struct devlink_info_req *req, const char *version_name, const char *version_value) { return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_RUNNING, - version_name, version_value); + version_name, version_value, + DEVLINK_INFO_VERSION_TYPE_NONE); } EXPORT_SYMBOL_GPL(devlink_info_version_running_put); +int devlink_info_version_running_put_ext(struct devlink_info_req *req, + const char *version_name, + const char *version_value, + enum devlink_info_version_type version_type) +{ + return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_RUNNING, + version_name, version_value, + version_type); +} +EXPORT_SYMBOL_GPL(devlink_info_version_running_put_ext); + static int devlink_nl_info_fill(struct sk_buff *msg, struct devlink *devlink, enum devlink_command cmd, u32 portid, -- cgit v1.2.3-70-g09d2 From f94b606325c194adadaee2265b4db990802c4850 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 24 Aug 2022 14:20:11 +0200 Subject: net: devlink: limit flash component name to match version returned by info_get() Limit the acceptance of component name passed to cmd_flash_update() to match one of the versions returned by info_get(), marked by version type. This makes things clearer and enforces 1:1 mapping between exposed version and accepted flash component. Check VERSION_TYPE_COMPONENT version type during cmd_flash_update() execution by calling info_get() with different "req" context. That causes info_get() to lookup the component name instead of filling-up the netlink message. Remove "UPDATE_COMPONENT" flag which becomes used. Signed-off-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/dev.c | 3 +- include/net/devlink.h | 3 +- net/core/devlink.c | 105 +++++++++++++++++++++++++++++++++++++------- 3 files changed, 90 insertions(+), 21 deletions(-) (limited to 'include/net') diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index d6938faf6c8b..efea94c27880 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -1322,8 +1322,7 @@ nsim_dev_devlink_trap_drop_counter_get(struct devlink *devlink, static const struct devlink_ops nsim_dev_devlink_ops = { .eswitch_mode_set = nsim_devlink_eswitch_mode_set, .eswitch_mode_get = nsim_devlink_eswitch_mode_get, - .supported_flash_update_params = DEVLINK_SUPPORT_FLASH_UPDATE_COMPONENT | - DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK, + .supported_flash_update_params = DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK, .reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT), .reload_down = nsim_dev_reload_down, .reload_up = nsim_dev_reload_up, diff --git a/include/net/devlink.h b/include/net/devlink.h index f50a002e5023..1f7026011856 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -624,8 +624,7 @@ struct devlink_flash_update_params { u32 overwrite_mask; }; -#define DEVLINK_SUPPORT_FLASH_UPDATE_COMPONENT BIT(0) -#define DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK BIT(1) +#define DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK BIT(0) struct devlink_region; struct devlink_info_req; diff --git a/net/core/devlink.c b/net/core/devlink.c index 43c75b5ac039..0f7078db1280 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -4742,10 +4742,76 @@ void devlink_flash_update_timeout_notify(struct devlink *devlink, } EXPORT_SYMBOL_GPL(devlink_flash_update_timeout_notify); +struct devlink_info_req { + struct sk_buff *msg; + void (*version_cb)(const char *version_name, + enum devlink_info_version_type version_type, + void *version_cb_priv); + void *version_cb_priv; +}; + +struct devlink_flash_component_lookup_ctx { + const char *lookup_name; + bool lookup_name_found; +}; + +static void +devlink_flash_component_lookup_cb(const char *version_name, + enum devlink_info_version_type version_type, + void *version_cb_priv) +{ + struct devlink_flash_component_lookup_ctx *lookup_ctx = version_cb_priv; + + if (version_type != DEVLINK_INFO_VERSION_TYPE_COMPONENT || + lookup_ctx->lookup_name_found) + return; + + lookup_ctx->lookup_name_found = + !strcmp(lookup_ctx->lookup_name, version_name); +} + +static int devlink_flash_component_get(struct devlink *devlink, + struct nlattr *nla_component, + const char **p_component, + struct netlink_ext_ack *extack) +{ + struct devlink_flash_component_lookup_ctx lookup_ctx = {}; + struct devlink_info_req req = {}; + const char *component; + int ret; + + if (!nla_component) + return 0; + + component = nla_data(nla_component); + + if (!devlink->ops->info_get) { + NL_SET_ERR_MSG_ATTR(extack, nla_component, + "component update is not supported by this device"); + return -EOPNOTSUPP; + } + + lookup_ctx.lookup_name = component; + req.version_cb = devlink_flash_component_lookup_cb; + req.version_cb_priv = &lookup_ctx; + + ret = devlink->ops->info_get(devlink, &req, NULL); + if (ret) + return ret; + + if (!lookup_ctx.lookup_name_found) { + NL_SET_ERR_MSG_ATTR(extack, nla_component, + "selected component is not supported by this device"); + return -EINVAL; + } + *p_component = component; + return 0; +} + static int devlink_nl_cmd_flash_update(struct sk_buff *skb, struct genl_info *info) { - struct nlattr *nla_component, *nla_overwrite_mask, *nla_file_name; + struct nlattr *nla_overwrite_mask, *nla_file_name; struct devlink_flash_update_params params = {}; struct devlink *devlink = info->user_ptr[0]; const char *file_name; @@ -4758,17 +4824,13 @@ static int devlink_nl_cmd_flash_update(struct sk_buff *skb, if (!info->attrs[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME]) return -EINVAL; - supported_params = devlink->ops->supported_flash_update_params; + ret = devlink_flash_component_get(devlink, + info->attrs[DEVLINK_ATTR_FLASH_UPDATE_COMPONENT], + ¶ms.component, info->extack); + if (ret) + return ret; - nla_component = info->attrs[DEVLINK_ATTR_FLASH_UPDATE_COMPONENT]; - if (nla_component) { - if (!(supported_params & DEVLINK_SUPPORT_FLASH_UPDATE_COMPONENT)) { - NL_SET_ERR_MSG_ATTR(info->extack, nla_component, - "component update is not supported by this device"); - return -EOPNOTSUPP; - } - params.component = nla_data(nla_component); - } + supported_params = devlink->ops->supported_flash_update_params; nla_overwrite_mask = info->attrs[DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK]; if (nla_overwrite_mask) { @@ -6553,18 +6615,18 @@ out_unlock: return err; } -struct devlink_info_req { - struct sk_buff *msg; -}; - int devlink_info_driver_name_put(struct devlink_info_req *req, const char *name) { + if (!req->msg) + return 0; return nla_put_string(req->msg, DEVLINK_ATTR_INFO_DRIVER_NAME, name); } EXPORT_SYMBOL_GPL(devlink_info_driver_name_put); int devlink_info_serial_number_put(struct devlink_info_req *req, const char *sn) { + if (!req->msg) + return 0; return nla_put_string(req->msg, DEVLINK_ATTR_INFO_SERIAL_NUMBER, sn); } EXPORT_SYMBOL_GPL(devlink_info_serial_number_put); @@ -6572,6 +6634,8 @@ EXPORT_SYMBOL_GPL(devlink_info_serial_number_put); int devlink_info_board_serial_number_put(struct devlink_info_req *req, const char *bsn) { + if (!req->msg) + return 0; return nla_put_string(req->msg, DEVLINK_ATTR_INFO_BOARD_SERIAL_NUMBER, bsn); } @@ -6585,6 +6649,13 @@ static int devlink_info_version_put(struct devlink_info_req *req, int attr, struct nlattr *nest; int err; + if (req->version_cb) + req->version_cb(version_name, version_type, + req->version_cb_priv); + + if (!req->msg) + return 0; + nest = nla_nest_start_noflag(req->msg, attr); if (!nest) return -EMSGSIZE; @@ -6665,7 +6736,7 @@ devlink_nl_info_fill(struct sk_buff *msg, struct devlink *devlink, enum devlink_command cmd, u32 portid, u32 seq, int flags, struct netlink_ext_ack *extack) { - struct devlink_info_req req; + struct devlink_info_req req = {}; void *hdr; int err; @@ -12332,8 +12403,8 @@ EXPORT_SYMBOL_GPL(devl_trap_policers_unregister); static void __devlink_compat_running_version(struct devlink *devlink, char *buf, size_t len) { + struct devlink_info_req req = {}; const struct nlattr *nlattr; - struct devlink_info_req req; struct sk_buff *msg; int rem, err; -- cgit v1.2.3-70-g09d2 From c249ea9b4309cf3250c5bbb42a05d38d0ed9071c Mon Sep 17 00:00:00 2001 From: Brian Gix Date: Fri, 5 Aug 2022 16:42:32 -0700 Subject: Bluetooth: Move Adv Instance timer to hci_sync The Advertising Instance expiration timer adv_instance_expire was handled with the deprecated hci_request mechanism, rather than it's replacement: hci_sync. Signed-off-by: Brian Gix Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_sync.h | 3 +- net/bluetooth/hci_request.c | 111 --------------------------------- net/bluetooth/hci_request.h | 4 -- net/bluetooth/hci_sync.c | 129 ++++++++++++++++++++++++++++++++++++--- net/bluetooth/mgmt.c | 5 +- 5 files changed, 125 insertions(+), 127 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 3843f5060c73..aea950440b9d 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -72,7 +72,8 @@ int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 data_len, int hci_remove_advertising_sync(struct hci_dev *hdev, struct sock *sk, u8 instance, bool force); int hci_disable_advertising_sync(struct hci_dev *hdev); - +int hci_clear_adv_instance_sync(struct hci_dev *hdev, struct sock *sk, + u8 instance, bool force); int hci_update_passive_scan_sync(struct hci_dev *hdev); int hci_update_passive_scan(struct hci_dev *hdev); int hci_read_rssi_sync(struct hci_dev *hdev, __le16 handle); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index d14e50951aec..be32fb0f5557 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -811,14 +811,6 @@ void hci_req_add_le_passive_scan(struct hci_request *req) addr_resolv); } -static void cancel_adv_timeout(struct hci_dev *hdev) -{ - if (hdev->adv_instance_timeout) { - hdev->adv_instance_timeout = 0; - cancel_delayed_work(&hdev->adv_instance_expire); - } -} - static bool adv_cur_instance_is_scannable(struct hci_dev *hdev) { return hci_adv_instance_is_scannable(hdev, hdev->cur_adv_instance); @@ -1140,37 +1132,6 @@ void hci_req_reenable_advertising(struct hci_dev *hdev) hci_req_run(&req, adv_enable_complete); } -static void adv_timeout_expire(struct work_struct *work) -{ - struct hci_dev *hdev = container_of(work, struct hci_dev, - adv_instance_expire.work); - - struct hci_request req; - u8 instance; - - bt_dev_dbg(hdev, ""); - - hci_dev_lock(hdev); - - hdev->adv_instance_timeout = 0; - - instance = hdev->cur_adv_instance; - if (instance == 0x00) - goto unlock; - - hci_req_init(&req, hdev); - - hci_req_clear_adv_instance(hdev, NULL, &req, instance, false); - - if (list_empty(&hdev->adv_instances)) - __hci_req_disable_advertising(&req); - - hci_req_run(&req, NULL); - -unlock: - hci_dev_unlock(hdev); -} - static int hci_req_add_le_interleaved_scan(struct hci_request *req, unsigned long opt) { @@ -1637,72 +1598,6 @@ int __hci_req_schedule_adv_instance(struct hci_request *req, u8 instance, return 0; } -/* For a single instance: - * - force == true: The instance will be removed even when its remaining - * lifetime is not zero. - * - force == false: the instance will be deactivated but kept stored unless - * the remaining lifetime is zero. - * - * For instance == 0x00: - * - force == true: All instances will be removed regardless of their timeout - * setting. - * - force == false: Only instances that have a timeout will be removed. - */ -void hci_req_clear_adv_instance(struct hci_dev *hdev, struct sock *sk, - struct hci_request *req, u8 instance, - bool force) -{ - struct adv_info *adv_instance, *n, *next_instance = NULL; - int err; - u8 rem_inst; - - /* Cancel any timeout concerning the removed instance(s). */ - if (!instance || hdev->cur_adv_instance == instance) - cancel_adv_timeout(hdev); - - /* Get the next instance to advertise BEFORE we remove - * the current one. This can be the same instance again - * if there is only one instance. - */ - if (instance && hdev->cur_adv_instance == instance) - next_instance = hci_get_next_instance(hdev, instance); - - if (instance == 0x00) { - list_for_each_entry_safe(adv_instance, n, &hdev->adv_instances, - list) { - if (!(force || adv_instance->timeout)) - continue; - - rem_inst = adv_instance->instance; - err = hci_remove_adv_instance(hdev, rem_inst); - if (!err) - mgmt_advertising_removed(sk, hdev, rem_inst); - } - } else { - adv_instance = hci_find_adv_instance(hdev, instance); - - if (force || (adv_instance && adv_instance->timeout && - !adv_instance->remaining_time)) { - /* Don't advertise a removed instance. */ - if (next_instance && - next_instance->instance == instance) - next_instance = NULL; - - err = hci_remove_adv_instance(hdev, instance); - if (!err) - mgmt_advertising_removed(sk, hdev, instance); - } - } - - if (!req || !hdev_is_powered(hdev) || - hci_dev_test_flag(hdev, HCI_ADVERTISING)) - return; - - if (next_instance && !ext_adv_capable(hdev)) - __hci_req_schedule_adv_instance(req, next_instance->instance, - false); -} - int hci_update_random_address(struct hci_request *req, bool require_privacy, bool use_rpa, u8 *own_addr_type) { @@ -1977,7 +1872,6 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason) void hci_request_setup(struct hci_dev *hdev) { - INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire); INIT_DELAYED_WORK(&hdev->interleave_scan, interleave_scan_work); } @@ -1985,10 +1879,5 @@ void hci_request_cancel_all(struct hci_dev *hdev) { __hci_cmd_sync_cancel(hdev, ENODEV); - if (hdev->adv_instance_timeout) { - cancel_delayed_work_sync(&hdev->adv_instance_expire); - hdev->adv_instance_timeout = 0; - } - cancel_interleave_scan(hdev); } diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 41e0b84f2042..3d1b3d97ccdd 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -90,10 +90,6 @@ void __hci_req_update_scan_rsp_data(struct hci_request *req, u8 instance); int __hci_req_schedule_adv_instance(struct hci_request *req, u8 instance, bool force); -void hci_req_clear_adv_instance(struct hci_dev *hdev, struct sock *sk, - struct hci_request *req, u8 instance, - bool force); - int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance); int __hci_req_setup_per_adv_instance(struct hci_request *req, u8 instance, u16 min_interval, u16 max_interval); diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 75e7c0a01ab1..bc993dd2383d 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -465,6 +465,121 @@ unlock: hci_dev_unlock(hdev); } +static void cancel_adv_timeout(struct hci_dev *hdev) +{ + if (hdev->adv_instance_timeout) { + hdev->adv_instance_timeout = 0; + cancel_delayed_work(&hdev->adv_instance_expire); + } +} + +/* For a single instance: + * - force == true: The instance will be removed even when its remaining + * lifetime is not zero. + * - force == false: the instance will be deactivated but kept stored unless + * the remaining lifetime is zero. + * + * For instance == 0x00: + * - force == true: All instances will be removed regardless of their timeout + * setting. + * - force == false: Only instances that have a timeout will be removed. + */ +int hci_clear_adv_instance_sync(struct hci_dev *hdev, struct sock *sk, + u8 instance, bool force) +{ + struct adv_info *adv_instance, *n, *next_instance = NULL; + int err; + u8 rem_inst; + + /* Cancel any timeout concerning the removed instance(s). */ + if (!instance || hdev->cur_adv_instance == instance) + cancel_adv_timeout(hdev); + + /* Get the next instance to advertise BEFORE we remove + * the current one. This can be the same instance again + * if there is only one instance. + */ + if (instance && hdev->cur_adv_instance == instance) + next_instance = hci_get_next_instance(hdev, instance); + + if (instance == 0x00) { + list_for_each_entry_safe(adv_instance, n, &hdev->adv_instances, + list) { + if (!(force || adv_instance->timeout)) + continue; + + rem_inst = adv_instance->instance; + err = hci_remove_adv_instance(hdev, rem_inst); + if (!err) + mgmt_advertising_removed(sk, hdev, rem_inst); + } + } else { + adv_instance = hci_find_adv_instance(hdev, instance); + + if (force || (adv_instance && adv_instance->timeout && + !adv_instance->remaining_time)) { + /* Don't advertise a removed instance. */ + if (next_instance && + next_instance->instance == instance) + next_instance = NULL; + + err = hci_remove_adv_instance(hdev, instance); + if (!err) + mgmt_advertising_removed(sk, hdev, instance); + } + } + + if (!hdev_is_powered(hdev) || hci_dev_test_flag(hdev, HCI_ADVERTISING)) + return 0; + + if (next_instance && !ext_adv_capable(hdev)) + return hci_schedule_adv_instance_sync(hdev, + next_instance->instance, + false); + + return 0; +} + +static int adv_timeout_expire_sync(struct hci_dev *hdev, void *data) +{ + u8 instance = *(u8 *)data; + + kfree(data); + + hci_clear_adv_instance_sync(hdev, NULL, instance, false); + + if (list_empty(&hdev->adv_instances)) + return hci_disable_advertising_sync(hdev); + + return 0; +} + +static void adv_timeout_expire(struct work_struct *work) +{ + u8 *inst_ptr; + struct hci_dev *hdev = container_of(work, struct hci_dev, + adv_instance_expire.work); + + bt_dev_dbg(hdev, ""); + + hci_dev_lock(hdev); + + hdev->adv_instance_timeout = 0; + + if (hdev->cur_adv_instance == 0x00) + goto unlock; + + inst_ptr = kmalloc(1, GFP_KERNEL); + if (!inst_ptr) + goto unlock; + + *inst_ptr = hdev->cur_adv_instance; + hci_cmd_sync_queue(hdev, adv_timeout_expire_sync, inst_ptr, NULL); + +unlock: + hci_dev_unlock(hdev); +} + void hci_cmd_sync_init(struct hci_dev *hdev) { INIT_WORK(&hdev->cmd_sync_work, hci_cmd_sync_work); @@ -474,6 +589,7 @@ void hci_cmd_sync_init(struct hci_dev *hdev) INIT_WORK(&hdev->cmd_sync_cancel_work, hci_cmd_sync_cancel_work); INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable); INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart); + INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire); } void hci_cmd_sync_clear(struct hci_dev *hdev) @@ -1479,14 +1595,6 @@ int hci_le_terminate_big_sync(struct hci_dev *hdev, u8 handle, u8 reason) sizeof(cp), &cp, HCI_CMD_TIMEOUT); } -static void cancel_adv_timeout(struct hci_dev *hdev) -{ - if (hdev->adv_instance_timeout) { - hdev->adv_instance_timeout = 0; - cancel_delayed_work(&hdev->adv_instance_expire); - } -} - static int hci_set_ext_adv_data_sync(struct hci_dev *hdev, u8 instance) { struct { @@ -4566,6 +4674,11 @@ int hci_dev_close_sync(struct hci_dev *hdev) hci_request_cancel_all(hdev); + if (hdev->adv_instance_timeout) { + cancel_delayed_work_sync(&hdev->adv_instance_expire); + hdev->adv_instance_timeout = 0; + } + if (!hci_dev_test_flag(hdev, HCI_UNREGISTER) && !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && test_bit(HCI_UP, &hdev->flags)) { diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 6e31023b84f5..df20e15a05da 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2058,6 +2058,8 @@ static int set_le_sync(struct hci_dev *hdev, void *data) int err; if (!val) { + hci_clear_adv_instance_sync(hdev, NULL, 0x00, true); + if (hci_dev_test_flag(hdev, HCI_LE_ADV)) hci_disable_advertising_sync(hdev); @@ -2131,9 +2133,6 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) val = !!cp->val; enabled = lmp_host_le_capable(hdev); - if (!val) - hci_req_clear_adv_instance(hdev, NULL, NULL, 0x00, true); - if (!hdev_is_powered(hdev) || val == enabled) { bool changed = false; -- cgit v1.2.3-70-g09d2 From 3fe318ee72c54506534f51b4b4dfb19e0e0df2db Mon Sep 17 00:00:00 2001 From: Brian Gix Date: Fri, 5 Aug 2022 16:42:34 -0700 Subject: Bluetooth: move hci_get_random_address() to hci_sync This function has no dependencies on the deprecated hci_request mechanism, so has been moved unchanged to hci_sync.c Signed-off-by: Brian Gix Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_sync.h | 5 + net/bluetooth/hci_request.c | 255 ++++++++++++++------------------------- net/bluetooth/hci_request.h | 9 -- net/bluetooth/hci_sync.c | 73 +++++++++++ 4 files changed, 170 insertions(+), 172 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index aea950440b9d..b6b975c2ed3e 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -16,6 +16,7 @@ struct hci_cmd_sync_work_entry { hci_cmd_sync_work_destroy_t destroy; }; +struct adv_info; /* Function with sync suffix shall not be called with hdev->lock held as they * wait the command to complete and in the meantime an event could be received * which could attempt to acquire hdev->lock causing a deadlock. @@ -51,6 +52,10 @@ int hci_update_class_sync(struct hci_dev *hdev); int hci_update_name_sync(struct hci_dev *hdev); int hci_write_ssp_mode_sync(struct hci_dev *hdev, u8 mode); +int hci_get_random_address(struct hci_dev *hdev, bool require_privacy, + bool use_rpa, struct adv_info *adv_instance, + u8 *own_addr_type, bdaddr_t *rand_addr); + int hci_update_random_address_sync(struct hci_dev *hdev, bool require_privacy, bool rpa, u8 *own_addr_type); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 685dc0f983b7..b9875224ac7b 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -650,6 +650,96 @@ static inline bool hci_is_le_conn_scanning(struct hci_dev *hdev) return false; } +static void set_random_addr(struct hci_request *req, bdaddr_t *rpa); +static int hci_update_random_address(struct hci_request *req, + bool require_privacy, bool use_rpa, + u8 *own_addr_type) +{ + struct hci_dev *hdev = req->hdev; + int err; + + /* If privacy is enabled use a resolvable private address. If + * current RPA has expired or there is something else than + * the current RPA in use, then generate a new one. + */ + if (use_rpa) { + /* If Controller supports LL Privacy use own address type is + * 0x03 + */ + if (use_ll_privacy(hdev)) + *own_addr_type = ADDR_LE_DEV_RANDOM_RESOLVED; + else + *own_addr_type = ADDR_LE_DEV_RANDOM; + + if (rpa_valid(hdev)) + return 0; + + err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa); + if (err < 0) { + bt_dev_err(hdev, "failed to generate new RPA"); + return err; + } + + set_random_addr(req, &hdev->rpa); + + return 0; + } + + /* In case of required privacy without resolvable private address, + * use an non-resolvable private address. This is useful for active + * scanning and non-connectable advertising. + */ + if (require_privacy) { + bdaddr_t nrpa; + + while (true) { + /* The non-resolvable private address is generated + * from random six bytes with the two most significant + * bits cleared. + */ + get_random_bytes(&nrpa, 6); + nrpa.b[5] &= 0x3f; + + /* The non-resolvable private address shall not be + * equal to the public address. + */ + if (bacmp(&hdev->bdaddr, &nrpa)) + break; + } + + *own_addr_type = ADDR_LE_DEV_RANDOM; + set_random_addr(req, &nrpa); + return 0; + } + + /* If forcing static address is in use or there is no public + * address use the static address as random address (but skip + * the HCI command if the current random address is already the + * static one. + * + * In case BR/EDR has been disabled on a dual-mode controller + * and a static address has been configured, then use that + * address instead of the public BR/EDR address. + */ + if (hci_dev_test_flag(hdev, HCI_FORCE_STATIC_ADDR) || + !bacmp(&hdev->bdaddr, BDADDR_ANY) || + (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) && + bacmp(&hdev->static_addr, BDADDR_ANY))) { + *own_addr_type = ADDR_LE_DEV_RANDOM; + if (bacmp(&hdev->static_addr, &hdev->random_addr)) + hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, + &hdev->static_addr); + return 0; + } + + /* Neither privacy nor static address is being used so use a + * public address. + */ + *own_addr_type = ADDR_LE_DEV_PUBLIC; + + return 0; +} + /* Ensure to call hci_req_add_le_scan_disable() first to disable the * controller based address resolution to be able to reconfigure * resolving list. @@ -859,79 +949,6 @@ static void interleave_scan_work(struct work_struct *work) &hdev->interleave_scan, timeout); } -int hci_get_random_address(struct hci_dev *hdev, bool require_privacy, - bool use_rpa, struct adv_info *adv_instance, - u8 *own_addr_type, bdaddr_t *rand_addr) -{ - int err; - - bacpy(rand_addr, BDADDR_ANY); - - /* If privacy is enabled use a resolvable private address. If - * current RPA has expired then generate a new one. - */ - if (use_rpa) { - /* If Controller supports LL Privacy use own address type is - * 0x03 - */ - if (use_ll_privacy(hdev)) - *own_addr_type = ADDR_LE_DEV_RANDOM_RESOLVED; - else - *own_addr_type = ADDR_LE_DEV_RANDOM; - - if (adv_instance) { - if (adv_rpa_valid(adv_instance)) - return 0; - } else { - if (rpa_valid(hdev)) - return 0; - } - - err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa); - if (err < 0) { - bt_dev_err(hdev, "failed to generate new RPA"); - return err; - } - - bacpy(rand_addr, &hdev->rpa); - - return 0; - } - - /* In case of required privacy without resolvable private address, - * use an non-resolvable private address. This is useful for - * non-connectable advertising. - */ - if (require_privacy) { - bdaddr_t nrpa; - - while (true) { - /* The non-resolvable private address is generated - * from random six bytes with the two most significant - * bits cleared. - */ - get_random_bytes(&nrpa, 6); - nrpa.b[5] &= 0x3f; - - /* The non-resolvable private address shall not be - * equal to the public address. - */ - if (bacmp(&hdev->bdaddr, &nrpa)) - break; - } - - *own_addr_type = ADDR_LE_DEV_RANDOM; - bacpy(rand_addr, &nrpa); - - return 0; - } - - /* No privacy so use a public address. */ - *own_addr_type = ADDR_LE_DEV_PUBLIC; - - return 0; -} - static void set_random_addr(struct hci_request *req, bdaddr_t *rpa) { struct hci_dev *hdev = req->hdev; @@ -956,96 +973,8 @@ static void set_random_addr(struct hci_request *req, bdaddr_t *rpa) hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, rpa); } -int hci_update_random_address(struct hci_request *req, bool require_privacy, - bool use_rpa, u8 *own_addr_type) -{ - struct hci_dev *hdev = req->hdev; - int err; - - /* If privacy is enabled use a resolvable private address. If - * current RPA has expired or there is something else than - * the current RPA in use, then generate a new one. - */ - if (use_rpa) { - /* If Controller supports LL Privacy use own address type is - * 0x03 - */ - if (use_ll_privacy(hdev)) - *own_addr_type = ADDR_LE_DEV_RANDOM_RESOLVED; - else - *own_addr_type = ADDR_LE_DEV_RANDOM; - - if (rpa_valid(hdev)) - return 0; - - err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa); - if (err < 0) { - bt_dev_err(hdev, "failed to generate new RPA"); - return err; - } - - set_random_addr(req, &hdev->rpa); - - return 0; - } - - /* In case of required privacy without resolvable private address, - * use an non-resolvable private address. This is useful for active - * scanning and non-connectable advertising. - */ - if (require_privacy) { - bdaddr_t nrpa; - - while (true) { - /* The non-resolvable private address is generated - * from random six bytes with the two most significant - * bits cleared. - */ - get_random_bytes(&nrpa, 6); - nrpa.b[5] &= 0x3f; - - /* The non-resolvable private address shall not be - * equal to the public address. - */ - if (bacmp(&hdev->bdaddr, &nrpa)) - break; - } - - *own_addr_type = ADDR_LE_DEV_RANDOM; - set_random_addr(req, &nrpa); - return 0; - } - - /* If forcing static address is in use or there is no public - * address use the static address as random address (but skip - * the HCI command if the current random address is already the - * static one. - * - * In case BR/EDR has been disabled on a dual-mode controller - * and a static address has been configured, then use that - * address instead of the public BR/EDR address. - */ - if (hci_dev_test_flag(hdev, HCI_FORCE_STATIC_ADDR) || - !bacmp(&hdev->bdaddr, BDADDR_ANY) || - (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) && - bacmp(&hdev->static_addr, BDADDR_ANY))) { - *own_addr_type = ADDR_LE_DEV_RANDOM; - if (bacmp(&hdev->static_addr, &hdev->random_addr)) - hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, - &hdev->static_addr); - return 0; - } - - /* Neither privacy nor static address is being used so use a - * public address. - */ - *own_addr_type = ADDR_LE_DEV_PUBLIC; - - return 0; -} - -void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn, - u8 reason) +static void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn, + u8 reason) { switch (conn->state) { case BT_CONNECTED: diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 55205cc9f281..c7185ad77c6e 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -74,16 +74,7 @@ void hci_req_add_le_passive_scan(struct hci_request *req); void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next); int hci_req_update_adv_data(struct hci_dev *hdev, u8 instance); -int hci_get_random_address(struct hci_dev *hdev, bool require_privacy, - bool use_rpa, struct adv_info *adv_instance, - u8 *own_addr_type, bdaddr_t *rand_addr); - -int hci_update_random_address(struct hci_request *req, bool require_privacy, - bool use_rpa, u8 *own_addr_type); int hci_abort_conn(struct hci_conn *conn, u8 reason); -void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn, - u8 reason); - void hci_request_setup(struct hci_dev *hdev); void hci_request_cancel_all(struct hci_dev *hdev); diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index bc993dd2383d..46eb535ff466 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -5992,3 +5992,76 @@ int hci_le_pa_terminate_sync(struct hci_dev *hdev, u16 handle) return __hci_cmd_sync_status(hdev, HCI_OP_LE_PA_TERM_SYNC, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } + +int hci_get_random_address(struct hci_dev *hdev, bool require_privacy, + bool use_rpa, struct adv_info *adv_instance, + u8 *own_addr_type, bdaddr_t *rand_addr) +{ + int err; + + bacpy(rand_addr, BDADDR_ANY); + + /* If privacy is enabled use a resolvable private address. If + * current RPA has expired then generate a new one. + */ + if (use_rpa) { + /* If Controller supports LL Privacy use own address type is + * 0x03 + */ + if (use_ll_privacy(hdev)) + *own_addr_type = ADDR_LE_DEV_RANDOM_RESOLVED; + else + *own_addr_type = ADDR_LE_DEV_RANDOM; + + if (adv_instance) { + if (adv_rpa_valid(adv_instance)) + return 0; + } else { + if (rpa_valid(hdev)) + return 0; + } + + err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa); + if (err < 0) { + bt_dev_err(hdev, "failed to generate new RPA"); + return err; + } + + bacpy(rand_addr, &hdev->rpa); + + return 0; + } + + /* In case of required privacy without resolvable private address, + * use an non-resolvable private address. This is useful for + * non-connectable advertising. + */ + if (require_privacy) { + bdaddr_t nrpa; + + while (true) { + /* The non-resolvable private address is generated + * from random six bytes with the two most significant + * bits cleared. + */ + get_random_bytes(&nrpa, 6); + nrpa.b[5] &= 0x3f; + + /* The non-resolvable private address shall not be + * equal to the public address. + */ + if (bacmp(&hdev->bdaddr, &nrpa)) + break; + } + + *own_addr_type = ADDR_LE_DEV_RANDOM; + bacpy(rand_addr, &nrpa); + + return 0; + } + + /* No privacy so use a public address. */ + *own_addr_type = ADDR_LE_DEV_PUBLIC; + + return 0; +} -- cgit v1.2.3-70-g09d2 From 651cd3d65b0f76a2198fcf3a80ce5d53dd267717 Mon Sep 17 00:00:00 2001 From: Brian Gix Date: Fri, 5 Aug 2022 16:42:35 -0700 Subject: Bluetooth: convert hci_update_adv_data to hci_sync hci_update_adv_data() is called from hci_event and hci_core due to events from the controller. The prior function used the deprecated hci_request method, and the new one uses hci_sync.c Signed-off-by: Brian Gix Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_sync.h | 1 + net/bluetooth/hci_core.c | 2 +- net/bluetooth/hci_event.c | 2 +- net/bluetooth/hci_request.c | 64 ---------------------------------------- net/bluetooth/hci_request.h | 2 -- net/bluetooth/hci_sync.c | 20 +++++++++++++ 6 files changed, 23 insertions(+), 68 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index b6b975c2ed3e..17f5a4c32f36 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -61,6 +61,7 @@ int hci_update_random_address_sync(struct hci_dev *hdev, bool require_privacy, int hci_update_scan_rsp_data_sync(struct hci_dev *hdev, u8 instance); int hci_update_adv_data_sync(struct hci_dev *hdev, u8 instance); +int hci_update_adv_data(struct hci_dev *hdev, u8 instance); int hci_schedule_adv_instance_sync(struct hci_dev *hdev, u8 instance, bool force); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index b3a5a3cc9372..9d2c33f6b065 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -714,7 +714,7 @@ static void hci_update_passive_scan_state(struct hci_dev *hdev, u8 scan) hci_dev_set_flag(hdev, HCI_BREDR_ENABLED); if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) - hci_req_update_adv_data(hdev, hdev->cur_adv_instance); + hci_update_adv_data(hdev, hdev->cur_adv_instance); mgmt_new_settings(hdev); } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 485c814cf44a..395c6479456f 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2152,7 +2152,7 @@ static u8 hci_cc_set_ext_adv_param(struct hci_dev *hdev, void *data, adv_instance->tx_power = rp->tx_power; } /* Update adv data as tx power is known now */ - hci_req_update_adv_data(hdev, cp->handle); + hci_update_adv_data(hdev, cp->handle); hci_dev_unlock(hdev); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index b9875224ac7b..2e19a271d7a1 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -829,70 +829,6 @@ void hci_req_add_le_passive_scan(struct hci_request *req) addr_resolv); } -static void __hci_req_update_adv_data(struct hci_request *req, u8 instance) -{ - struct hci_dev *hdev = req->hdev; - u8 len; - - if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) - return; - - if (ext_adv_capable(hdev)) { - struct { - struct hci_cp_le_set_ext_adv_data cp; - u8 data[HCI_MAX_EXT_AD_LENGTH]; - } pdu; - - memset(&pdu, 0, sizeof(pdu)); - - len = eir_create_adv_data(hdev, instance, pdu.data); - - /* There's nothing to do if the data hasn't changed */ - if (hdev->adv_data_len == len && - memcmp(pdu.data, hdev->adv_data, len) == 0) - return; - - memcpy(hdev->adv_data, pdu.data, len); - hdev->adv_data_len = len; - - pdu.cp.length = len; - pdu.cp.handle = instance; - pdu.cp.operation = LE_SET_ADV_DATA_OP_COMPLETE; - pdu.cp.frag_pref = LE_SET_ADV_DATA_NO_FRAG; - - hci_req_add(req, HCI_OP_LE_SET_EXT_ADV_DATA, - sizeof(pdu.cp) + len, &pdu.cp); - } else { - struct hci_cp_le_set_adv_data cp; - - memset(&cp, 0, sizeof(cp)); - - len = eir_create_adv_data(hdev, instance, cp.data); - - /* There's nothing to do if the data hasn't changed */ - if (hdev->adv_data_len == len && - memcmp(cp.data, hdev->adv_data, len) == 0) - return; - - memcpy(hdev->adv_data, cp.data, sizeof(cp.data)); - hdev->adv_data_len = len; - - cp.length = len; - - hci_req_add(req, HCI_OP_LE_SET_ADV_DATA, sizeof(cp), &cp); - } -} - -int hci_req_update_adv_data(struct hci_dev *hdev, u8 instance) -{ - struct hci_request req; - - hci_req_init(&req, hdev); - __hci_req_update_adv_data(&req, instance); - - return hci_req_run(&req, NULL); -} - static int hci_req_add_le_interleaved_scan(struct hci_request *req, unsigned long opt) { diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index c7185ad77c6e..7e1de871fca4 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -73,8 +73,6 @@ void hci_req_add_le_passive_scan(struct hci_request *req); void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next); -int hci_req_update_adv_data(struct hci_dev *hdev, u8 instance); - int hci_abort_conn(struct hci_conn *conn, u8 reason); void hci_request_setup(struct hci_dev *hdev); void hci_request_cancel_all(struct hci_dev *hdev); diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 46eb535ff466..6de2ad730995 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -6065,3 +6065,23 @@ int hci_get_random_address(struct hci_dev *hdev, bool require_privacy, return 0; } + +static int _update_adv_data_sync(struct hci_dev *hdev, void *data) +{ + u8 instance = *(u8 *)data; + + kfree(data); + + return hci_update_adv_data_sync(hdev, instance); +} + +int hci_update_adv_data(struct hci_dev *hdev, u8 instance) +{ + u8 *inst_ptr = kmalloc(1, GFP_KERNEL); + + if (!inst_ptr) + return -ENOMEM; + + *inst_ptr = instance; + return hci_cmd_sync_queue(hdev, _update_adv_data_sync, inst_ptr, NULL); +} -- cgit v1.2.3-70-g09d2 From 44387d1736c40a74085be354e2b5f37ca0689608 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Wed, 24 Aug 2022 17:10:03 +0800 Subject: net: sched: remove unnecessary init of qdisc skb head The memory allocated by using kzallloc_node and kcalloc has been cleared. Therefore, the structure members of the new qdisc are 0. So there's no need to explicitly assign a value of 0. Signed-off-by: Zhengchao Shao Signed-off-by: David S. Miller --- include/net/sch_generic.h | 7 ------- net/sched/sch_generic.c | 1 - net/sched/sch_htb.c | 2 -- 3 files changed, 10 deletions(-) (limited to 'include/net') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index f2958fb5ae08..7dc83400bde4 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -940,13 +940,6 @@ static inline void qdisc_purge_queue(struct Qdisc *sch) qdisc_tree_reduce_backlog(sch, qlen, backlog); } -static inline void qdisc_skb_head_init(struct qdisc_skb_head *qh) -{ - qh->head = NULL; - qh->tail = NULL; - qh->qlen = 0; -} - static inline void __qdisc_enqueue_tail(struct sk_buff *skb, struct qdisc_skb_head *qh) { diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 99b697ad2b98..8d25b41d22ec 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -941,7 +941,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, goto errout; __skb_queue_head_init(&sch->gso_skb); __skb_queue_head_init(&sch->skb_bad_txq); - qdisc_skb_head_init(&sch->q); gnet_stats_basic_sync_init(&sch->bstats); spin_lock_init(&sch->q.lock); diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index cb5872d22ecf..dbbb276aecb3 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1104,8 +1104,6 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt, if (err < 0) goto err_free_direct_qdiscs; - qdisc_skb_head_init(&q->direct_queue); - if (tb[TCA_HTB_DIRECT_QLEN]) q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]); else -- cgit v1.2.3-70-g09d2 From 5182a5d48c3d1992b2db8748f96914e07eee0956 Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Fri, 26 Aug 2022 14:46:58 +0300 Subject: net: allow storing xfrm interface metadata in metadata_dst XFRM interfaces provide the association of various XFRM transformations to a netdevice using an 'if_id' identifier common to both the XFRM data structures (polcies, states) and the interface. The if_id is configured by the controlling entity (usually the IKE daemon) and can be used by the administrator to define logical relations between different connections. For example, different connections can share the if_id identifier so that they pass through the same interface, . However, currently it is not possible for connections using a different if_id to use the same interface while retaining the logical separation between them, without using additional criteria such as skb marks or different traffic selectors. When having a large number of connections, it is useful to have a the logical separation offered by the if_id identifier but use a single network interface. Similar to the way collect_md mode is used in IP tunnels. This patch attempts to enable different configuration mechanisms - such as ebpf programs, LWT encapsulations, and TC - to attach metadata to skbs which would carry the if_id. This way a single xfrm interface in collect_md mode can demux traffic based on this configuration on tx and provide this metadata on rx. The XFRM metadata is somewhat similar to ip tunnel metadata in that it has an "id", and shares similar configuration entities (bpf, tc, ...), however, it does not necessarily represent an IP tunnel or use other ip tunnel information, and also has an optional "link" property which can be used for affecting underlying routing decisions. Additional xfrm related criteria may also be added in the future. Therefore, a new metadata type is introduced, to be used in subsequent patches in the xfrm interface and configuration entities. Reviewed-by: Nikolay Aleksandrov Reviewed-by: Nicolas Dichtel Signed-off-by: Eyal Birger Signed-off-by: Steffen Klassert --- include/net/dst_metadata.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/net') diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index adab27ba1ecb..e4b059908cc7 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -9,6 +9,7 @@ enum metadata_type { METADATA_IP_TUNNEL, METADATA_HW_PORT_MUX, + METADATA_XFRM, }; struct hw_port_info { @@ -16,12 +17,18 @@ struct hw_port_info { u32 port_id; }; +struct xfrm_md_info { + u32 if_id; + int link; +}; + struct metadata_dst { struct dst_entry dst; enum metadata_type type; union { struct ip_tunnel_info tun_info; struct hw_port_info port_info; + struct xfrm_md_info xfrm_info; } u; }; @@ -53,6 +60,16 @@ skb_tunnel_info(const struct sk_buff *skb) return NULL; } +static inline struct xfrm_md_info *skb_xfrm_md_info(const struct sk_buff *skb) +{ + struct metadata_dst *md_dst = skb_metadata_dst(skb); + + if (md_dst && md_dst->type == METADATA_XFRM) + return &md_dst->u.xfrm_info; + + return NULL; +} + static inline bool skb_valid_dst(const struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); @@ -82,6 +99,9 @@ static inline int skb_metadata_dst_cmp(const struct sk_buff *skb_a, return memcmp(&a->u.tun_info, &b->u.tun_info, sizeof(a->u.tun_info) + a->u.tun_info.options_len); + case METADATA_XFRM: + return memcmp(&a->u.xfrm_info, &b->u.xfrm_info, + sizeof(a->u.xfrm_info)); default: return 1; } -- cgit v1.2.3-70-g09d2 From abc340b38ba25cd6c7aa2c0bd9150d30738c82d0 Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Fri, 26 Aug 2022 14:46:59 +0300 Subject: xfrm: interface: support collect metadata mode This commit adds support for 'collect_md' mode on xfrm interfaces. Each net can have one collect_md device, created by providing the IFLA_XFRM_COLLECT_METADATA flag at creation. This device cannot be altered and has no if_id or link device attributes. On transmit to this device, the if_id is fetched from the attached dst metadata on the skb. If exists, the link property is also fetched from the metadata. The dst metadata type used is METADATA_XFRM which holds these properties. On the receive side, xfrmi_rcv_cb() populates a dst metadata for each packet received and attaches it to the skb. The if_id used in this case is fetched from the xfrm state, and the link is fetched from the incoming device. This information can later be used by upper layers such as tc, ebpf, and ip rules. Because the skb is scrubed in xfrmi_rcv_cb(), the attachment of the dst metadata is postponed until after scrubing. Similarly, xfrm_input() is adapted to avoid dropping metadata dsts by only dropping 'valid' (skb_valid_dst(skb) == true) dsts. Policy matching on packets arriving from collect_md xfrmi devices is done by using the xfrm state existing in the skb's sec_path. The xfrm_if_cb.decode_cb() interface implemented by xfrmi_decode_session() is changed to keep the details of the if_id extraction tucked away in xfrm_interface.c. Reviewed-by: Nicolas Dichtel Reviewed-by: Nikolay Aleksandrov Signed-off-by: Eyal Birger Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 11 +++- include/uapi/linux/if_link.h | 1 + net/xfrm/xfrm_input.c | 7 ++- net/xfrm/xfrm_interface.c | 121 ++++++++++++++++++++++++++++++++++++------- net/xfrm/xfrm_policy.c | 10 ++-- 5 files changed, 121 insertions(+), 29 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 6e8fa98f786f..28b988577ed2 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -312,9 +312,15 @@ struct km_event { struct net *net; }; +struct xfrm_if_decode_session_result { + struct net *net; + u32 if_id; +}; + struct xfrm_if_cb { - struct xfrm_if *(*decode_session)(struct sk_buff *skb, - unsigned short family); + bool (*decode_session)(struct sk_buff *skb, + unsigned short family, + struct xfrm_if_decode_session_result *res); }; void xfrm_if_register_cb(const struct xfrm_if_cb *ifcb); @@ -985,6 +991,7 @@ void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev); struct xfrm_if_parms { int link; /* ifindex of underlying L2 interface */ u32 if_id; /* interface identifyer */ + bool collect_md; }; struct xfrm_if { diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index e36d9d2c65a7..d96f13a42589 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -694,6 +694,7 @@ enum { IFLA_XFRM_UNSPEC, IFLA_XFRM_LINK, IFLA_XFRM_IF_ID, + IFLA_XFRM_COLLECT_METADATA, __IFLA_XFRM_MAX }; diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 144238a50f3d..25e822fb5771 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "xfrm_inout.h" @@ -720,7 +721,8 @@ resume: sp = skb_sec_path(skb); if (sp) sp->olen = 0; - skb_dst_drop(skb); + if (skb_valid_dst(skb)) + skb_dst_drop(skb); gro_cells_receive(&gro_cells, skb); return 0; } else { @@ -738,7 +740,8 @@ resume: sp = skb_sec_path(skb); if (sp) sp->olen = 0; - skb_dst_drop(skb); + if (skb_valid_dst(skb)) + skb_dst_drop(skb); gro_cells_receive(&gro_cells, skb); return err; } diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index 5113fa0fbcee..e9a355047468 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -56,6 +57,7 @@ static const struct net_device_ops xfrmi_netdev_ops; struct xfrmi_net { /* lists for storing interfaces in use */ struct xfrm_if __rcu *xfrmi[XFRMI_HASH_SIZE]; + struct xfrm_if __rcu *collect_md_xfrmi; }; #define for_each_xfrmi_rcu(start, xi) \ @@ -77,17 +79,23 @@ static struct xfrm_if *xfrmi_lookup(struct net *net, struct xfrm_state *x) return xi; } + xi = rcu_dereference(xfrmn->collect_md_xfrmi); + if (xi && (xi->dev->flags & IFF_UP)) + return xi; + return NULL; } -static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb, - unsigned short family) +static bool xfrmi_decode_session(struct sk_buff *skb, + unsigned short family, + struct xfrm_if_decode_session_result *res) { struct net_device *dev; + struct xfrm_if *xi; int ifindex = 0; if (!secpath_exists(skb) || !skb->dev) - return NULL; + return false; switch (family) { case AF_INET6: @@ -107,11 +115,18 @@ static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb, } if (!dev || !(dev->flags & IFF_UP)) - return NULL; + return false; if (dev->netdev_ops != &xfrmi_netdev_ops) - return NULL; + return false; - return netdev_priv(dev); + xi = netdev_priv(dev); + res->net = xi->net; + + if (xi->p.collect_md) + res->if_id = xfrm_input_state(skb)->if_id; + else + res->if_id = xi->p.if_id; + return true; } static void xfrmi_link(struct xfrmi_net *xfrmn, struct xfrm_if *xi) @@ -157,7 +172,10 @@ static int xfrmi_create(struct net_device *dev) if (err < 0) goto out; - xfrmi_link(xfrmn, xi); + if (xi->p.collect_md) + rcu_assign_pointer(xfrmn->collect_md_xfrmi, xi); + else + xfrmi_link(xfrmn, xi); return 0; @@ -185,7 +203,10 @@ static void xfrmi_dev_uninit(struct net_device *dev) struct xfrm_if *xi = netdev_priv(dev); struct xfrmi_net *xfrmn = net_generic(xi->net, xfrmi_net_id); - xfrmi_unlink(xfrmn, xi); + if (xi->p.collect_md) + RCU_INIT_POINTER(xfrmn->collect_md_xfrmi, NULL); + else + xfrmi_unlink(xfrmn, xi); } static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet) @@ -214,6 +235,7 @@ static int xfrmi_rcv_cb(struct sk_buff *skb, int err) struct xfrm_state *x; struct xfrm_if *xi; bool xnet; + int link; if (err && !secpath_exists(skb)) return 0; @@ -224,6 +246,7 @@ static int xfrmi_rcv_cb(struct sk_buff *skb, int err) if (!xi) return 1; + link = skb->dev->ifindex; dev = xi->dev; skb->dev = dev; @@ -254,6 +277,17 @@ static int xfrmi_rcv_cb(struct sk_buff *skb, int err) } xfrmi_scrub_packet(skb, xnet); + if (xi->p.collect_md) { + struct metadata_dst *md_dst; + + md_dst = metadata_dst_alloc(0, METADATA_XFRM, GFP_ATOMIC); + if (!md_dst) + return -ENOMEM; + + md_dst->u.xfrm_info.if_id = x->if_id; + md_dst->u.xfrm_info.link = link; + skb_dst_set(skb, (struct dst_entry *)md_dst); + } dev_sw_netstats_rx_add(dev, skb->len); return 0; @@ -269,10 +303,23 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) struct net_device *tdev; struct xfrm_state *x; int err = -1; + u32 if_id; int mtu; + if (xi->p.collect_md) { + struct xfrm_md_info *md_info = skb_xfrm_md_info(skb); + + if (unlikely(!md_info)) + return -EINVAL; + + if_id = md_info->if_id; + fl->flowi_oif = md_info->link; + } else { + if_id = xi->p.if_id; + } + dst_hold(dst); - dst = xfrm_lookup_with_ifid(xi->net, dst, fl, NULL, 0, xi->p.if_id); + dst = xfrm_lookup_with_ifid(xi->net, dst, fl, NULL, 0, if_id); if (IS_ERR(dst)) { err = PTR_ERR(dst); dst = NULL; @@ -283,7 +330,7 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) if (!x) goto tx_err_link_failure; - if (x->if_id != xi->p.if_id) + if (x->if_id != if_id) goto tx_err_link_failure; tdev = dst->dev; @@ -633,6 +680,9 @@ static void xfrmi_netlink_parms(struct nlattr *data[], if (data[IFLA_XFRM_IF_ID]) parms->if_id = nla_get_u32(data[IFLA_XFRM_IF_ID]); + + if (data[IFLA_XFRM_COLLECT_METADATA]) + parms->collect_md = true; } static int xfrmi_newlink(struct net *src_net, struct net_device *dev, @@ -645,14 +695,27 @@ static int xfrmi_newlink(struct net *src_net, struct net_device *dev, int err; xfrmi_netlink_parms(data, &p); - if (!p.if_id) { - NL_SET_ERR_MSG(extack, "if_id must be non zero"); - return -EINVAL; - } + if (p.collect_md) { + struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id); - xi = xfrmi_locate(net, &p); - if (xi) - return -EEXIST; + if (p.link || p.if_id) { + NL_SET_ERR_MSG(extack, "link and if_id must be zero"); + return -EINVAL; + } + + if (rtnl_dereference(xfrmn->collect_md_xfrmi)) + return -EEXIST; + + } else { + if (!p.if_id) { + NL_SET_ERR_MSG(extack, "if_id must be non zero"); + return -EINVAL; + } + + xi = xfrmi_locate(net, &p); + if (xi) + return -EEXIST; + } xi = netdev_priv(dev); xi->p = p; @@ -682,12 +745,22 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[], return -EINVAL; } + if (p.collect_md) { + NL_SET_ERR_MSG(extack, "collect_md can't be changed"); + return -EINVAL; + } + xi = xfrmi_locate(net, &p); if (!xi) { xi = netdev_priv(dev); } else { if (xi->dev != dev) return -EEXIST; + if (xi->p.collect_md) { + NL_SET_ERR_MSG(extack, + "device can't be changed to collect_md"); + return -EINVAL; + } } return xfrmi_update(xi, &p); @@ -700,6 +773,8 @@ static size_t xfrmi_get_size(const struct net_device *dev) nla_total_size(4) + /* IFLA_XFRM_IF_ID */ nla_total_size(4) + + /* IFLA_XFRM_COLLECT_METADATA */ + nla_total_size(0) + 0; } @@ -709,7 +784,8 @@ static int xfrmi_fill_info(struct sk_buff *skb, const struct net_device *dev) struct xfrm_if_parms *parm = &xi->p; if (nla_put_u32(skb, IFLA_XFRM_LINK, parm->link) || - nla_put_u32(skb, IFLA_XFRM_IF_ID, parm->if_id)) + nla_put_u32(skb, IFLA_XFRM_IF_ID, parm->if_id) || + (xi->p.collect_md && nla_put_flag(skb, IFLA_XFRM_COLLECT_METADATA))) goto nla_put_failure; return 0; @@ -725,8 +801,10 @@ static struct net *xfrmi_get_link_net(const struct net_device *dev) } static const struct nla_policy xfrmi_policy[IFLA_XFRM_MAX + 1] = { - [IFLA_XFRM_LINK] = { .type = NLA_U32 }, - [IFLA_XFRM_IF_ID] = { .type = NLA_U32 }, + [IFLA_XFRM_UNSPEC] = { .strict_start_type = IFLA_XFRM_COLLECT_METADATA }, + [IFLA_XFRM_LINK] = { .type = NLA_U32 }, + [IFLA_XFRM_IF_ID] = { .type = NLA_U32 }, + [IFLA_XFRM_COLLECT_METADATA] = { .type = NLA_FLAG }, }; static struct rtnl_link_ops xfrmi_link_ops __read_mostly = { @@ -762,6 +840,9 @@ static void __net_exit xfrmi_exit_batch_net(struct list_head *net_exit_list) xip = &xi->next) unregister_netdevice_queue(xi->dev, &list); } + xi = rtnl_dereference(xfrmn->collect_md_xfrmi); + if (xi) + unregister_netdevice_queue(xi->dev, &list); } unregister_netdevice_many(&list); rtnl_unlock(); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 6264680b1f08..3c65059a508a 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3515,17 +3515,17 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, int xerr_idx = -1; const struct xfrm_if_cb *ifcb; struct sec_path *sp; - struct xfrm_if *xi; u32 if_id = 0; rcu_read_lock(); ifcb = xfrm_if_get_cb(); if (ifcb) { - xi = ifcb->decode_session(skb, family); - if (xi) { - if_id = xi->p.if_id; - net = xi->net; + struct xfrm_if_decode_session_result r; + + if (ifcb->decode_session(skb, family, &r)) { + if_id = r.if_id; + net = r.net; } } rcu_read_unlock(); -- cgit v1.2.3-70-g09d2 From 2c2493b9da9166478fe072e3054f8a5741dadb02 Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Fri, 26 Aug 2022 14:47:00 +0300 Subject: xfrm: lwtunnel: add lwtunnel support for xfrm interfaces in collect_md mode Allow specifying the xfrm interface if_id and link as part of a route metadata using the lwtunnel infrastructure. This allows for example using a single xfrm interface in collect_md mode as the target of multiple routes each specifying a different if_id. With the appropriate changes to iproute2, considering an xfrm device ipsec1 in collect_md mode one can for example add a route specifying an if_id like so: ip route add dev ipsec1 encap xfrm if_id 1 In which case traffic routed to the device via this route would use if_id in the xfrm interface policy lookup. Or in the context of vrf, one can also specify the "link" property: ip route add dev ipsec1 encap xfrm if_id 1 link_dev eth15 Note: LWT_XFRM_LINK uses NLA_U32 similar to IFLA_XFRM_LINK even though internally "link" is signed. This is consistent with other _LINK attributes in other devices as well as in bpf and should not have an effect as device indexes can't be negative. Reviewed-by: Nicolas Dichtel Reviewed-by: Nikolay Aleksandrov Signed-off-by: Eyal Birger Signed-off-by: Steffen Klassert --- include/net/dst_metadata.h | 11 ++++++ include/uapi/linux/lwtunnel.h | 10 +++++ net/core/lwtunnel.c | 1 + net/xfrm/xfrm_interface.c | 85 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 107 insertions(+) (limited to 'include/net') diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index e4b059908cc7..57f75960fa28 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -60,13 +60,24 @@ skb_tunnel_info(const struct sk_buff *skb) return NULL; } +static inline struct xfrm_md_info *lwt_xfrm_info(struct lwtunnel_state *lwt) +{ + return (struct xfrm_md_info *)lwt->data; +} + static inline struct xfrm_md_info *skb_xfrm_md_info(const struct sk_buff *skb) { struct metadata_dst *md_dst = skb_metadata_dst(skb); + struct dst_entry *dst; if (md_dst && md_dst->type == METADATA_XFRM) return &md_dst->u.xfrm_info; + dst = skb_dst(skb); + if (dst && dst->lwtstate && + dst->lwtstate->type == LWTUNNEL_ENCAP_XFRM) + return lwt_xfrm_info(dst->lwtstate); + return NULL; } diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h index 2e206919125c..229655ef792f 100644 --- a/include/uapi/linux/lwtunnel.h +++ b/include/uapi/linux/lwtunnel.h @@ -15,6 +15,7 @@ enum lwtunnel_encap_types { LWTUNNEL_ENCAP_SEG6_LOCAL, LWTUNNEL_ENCAP_RPL, LWTUNNEL_ENCAP_IOAM6, + LWTUNNEL_ENCAP_XFRM, __LWTUNNEL_ENCAP_MAX, }; @@ -111,4 +112,13 @@ enum { #define LWT_BPF_MAX_HEADROOM 256 +enum { + LWT_XFRM_UNSPEC, + LWT_XFRM_IF_ID, + LWT_XFRM_LINK, + __LWT_XFRM_MAX, +}; + +#define LWT_XFRM_MAX (__LWT_XFRM_MAX - 1) + #endif /* _UAPI_LWTUNNEL_H_ */ diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index 9ccd64e8a666..6fac2f0ef074 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -50,6 +50,7 @@ static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type) return "IOAM6"; case LWTUNNEL_ENCAP_IP6: case LWTUNNEL_ENCAP_IP: + case LWTUNNEL_ENCAP_XFRM: case LWTUNNEL_ENCAP_NONE: case __LWTUNNEL_ENCAP_MAX: /* should not have got here */ diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index e9a355047468..5a67b120c4db 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -60,6 +60,88 @@ struct xfrmi_net { struct xfrm_if __rcu *collect_md_xfrmi; }; +static const struct nla_policy xfrm_lwt_policy[LWT_XFRM_MAX + 1] = { + [LWT_XFRM_IF_ID] = NLA_POLICY_MIN(NLA_U32, 1), + [LWT_XFRM_LINK] = NLA_POLICY_MIN(NLA_U32, 1), +}; + +static void xfrmi_destroy_state(struct lwtunnel_state *lwt) +{ +} + +static int xfrmi_build_state(struct net *net, struct nlattr *nla, + unsigned int family, const void *cfg, + struct lwtunnel_state **ts, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[LWT_XFRM_MAX + 1]; + struct lwtunnel_state *new_state; + struct xfrm_md_info *info; + int ret; + + ret = nla_parse_nested(tb, LWT_XFRM_MAX, nla, xfrm_lwt_policy, extack); + if (ret < 0) + return ret; + + if (!tb[LWT_XFRM_IF_ID]) { + NL_SET_ERR_MSG(extack, "if_id must be set"); + return -EINVAL; + } + + new_state = lwtunnel_state_alloc(sizeof(*info)); + if (!new_state) { + NL_SET_ERR_MSG(extack, "failed to create encap info"); + return -ENOMEM; + } + + new_state->type = LWTUNNEL_ENCAP_XFRM; + + info = lwt_xfrm_info(new_state); + + info->if_id = nla_get_u32(tb[LWT_XFRM_IF_ID]); + + if (tb[LWT_XFRM_LINK]) + info->link = nla_get_u32(tb[LWT_XFRM_LINK]); + + *ts = new_state; + return 0; +} + +static int xfrmi_fill_encap_info(struct sk_buff *skb, + struct lwtunnel_state *lwt) +{ + struct xfrm_md_info *info = lwt_xfrm_info(lwt); + + if (nla_put_u32(skb, LWT_XFRM_IF_ID, info->if_id) || + (info->link && nla_put_u32(skb, LWT_XFRM_LINK, info->link))) + return -EMSGSIZE; + + return 0; +} + +static int xfrmi_encap_nlsize(struct lwtunnel_state *lwtstate) +{ + return nla_total_size(sizeof(u32)) + /* LWT_XFRM_IF_ID */ + nla_total_size(sizeof(u32)); /* LWT_XFRM_LINK */ +} + +static int xfrmi_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) +{ + struct xfrm_md_info *a_info = lwt_xfrm_info(a); + struct xfrm_md_info *b_info = lwt_xfrm_info(b); + + return memcmp(a_info, b_info, sizeof(*a_info)); +} + +static const struct lwtunnel_encap_ops xfrmi_encap_ops = { + .build_state = xfrmi_build_state, + .destroy_state = xfrmi_destroy_state, + .fill_encap = xfrmi_fill_encap_info, + .get_encap_size = xfrmi_encap_nlsize, + .cmp_encap = xfrmi_encap_cmp, + .owner = THIS_MODULE, +}; + #define for_each_xfrmi_rcu(start, xi) \ for (xi = rcu_dereference(start); xi; xi = rcu_dereference(xi->next)) @@ -1080,6 +1162,8 @@ static int __init xfrmi_init(void) if (err < 0) goto rtnl_link_failed; + lwtunnel_encap_add_ops(&xfrmi_encap_ops, LWTUNNEL_ENCAP_XFRM); + xfrm_if_register_cb(&xfrm_if_cb); return err; @@ -1098,6 +1182,7 @@ pernet_dev_failed: static void __exit xfrmi_fini(void) { xfrm_if_unregister_cb(); + lwtunnel_encap_del_ops(&xfrmi_encap_ops, LWTUNNEL_ENCAP_XFRM); rtnl_link_unregister(&xfrmi_link_ops); xfrmi4_fini(); xfrmi6_fini(); -- cgit v1.2.3-70-g09d2 From 9c5d03d362519f36cd551aec596388f895c93d2d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 24 Aug 2022 17:18:30 -0700 Subject: genetlink: start to validate reserved header bytes We had historically not checked that genlmsghdr.reserved is 0 on input which prevents us from using those precious bytes in the future. One use case would be to extend the cmd field, which is currently just 8 bits wide and 256 is not a lot of commands for some core families. To make sure that new families do the right thing by default put the onus of opting out of validation on existing families. Signed-off-by: Jakub Kicinski Acked-by: Paul Moore (NetLabel) Signed-off-by: David S. Miller --- drivers/block/nbd.c | 1 + drivers/net/gtp.c | 1 + drivers/net/ieee802154/mac802154_hwsim.c | 1 + drivers/net/macsec.c | 1 + drivers/net/team/team.c | 1 + drivers/net/wireguard/netlink.c | 1 + drivers/net/wireless/mac80211_hwsim.c | 1 + drivers/target/target_core_user.c | 1 + drivers/thermal/thermal_netlink.c | 1 + drivers/vdpa/vdpa.c | 1 + fs/cifs/netlink.c | 1 + fs/dlm/netlink.c | 1 + fs/ksmbd/transport_ipc.c | 1 + include/linux/genl_magic_func.h | 1 + include/net/genetlink.h | 3 +++ kernel/taskstats.c | 1 + net/batman-adv/netlink.c | 1 + net/core/devlink.c | 1 + net/core/drop_monitor.c | 1 + net/ethtool/netlink.c | 1 + net/hsr/hsr_netlink.c | 1 + net/ieee802154/netlink.c | 1 + net/ieee802154/nl802154.c | 1 + net/ipv4/fou.c | 1 + net/ipv4/tcp_metrics.c | 1 + net/ipv6/ila/ila_main.c | 1 + net/ipv6/ioam6.c | 1 + net/ipv6/seg6.c | 1 + net/l2tp/l2tp_netlink.c | 1 + net/mptcp/pm_netlink.c | 1 + net/ncsi/ncsi-netlink.c | 1 + net/netfilter/ipvs/ip_vs_ctl.c | 1 + net/netlabel/netlabel_calipso.c | 1 + net/netlabel/netlabel_cipso_v4.c | 1 + net/netlabel/netlabel_mgmt.c | 1 + net/netlabel/netlabel_unlabeled.c | 1 + net/netlink/genetlink.c | 4 ++++ net/nfc/netlink.c | 1 + net/openvswitch/conntrack.c | 1 + net/openvswitch/datapath.c | 3 +++ net/openvswitch/meter.c | 1 + net/psample/psample.c | 1 + net/smc/smc_netlink.c | 3 ++- net/smc/smc_pnet.c | 3 ++- net/tipc/netlink.c | 1 + net/tipc/netlink_compat.c | 1 + net/wireless/nl80211.c | 1 + 47 files changed, 56 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 2a709daefbc4..6cec9ce23fd3 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -2322,6 +2322,7 @@ static struct genl_family nbd_genl_family __ro_after_init = { .module = THIS_MODULE, .small_ops = nbd_connect_genl_ops, .n_small_ops = ARRAY_SIZE(nbd_connect_genl_ops), + .resv_start_op = NBD_CMD_STATUS + 1, .maxattr = NBD_ATTR_MAX, .policy = nbd_attr_policy, .mcgrps = nbd_mcast_grps, diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index a208e2b1a9af..15c7dc82107f 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -1859,6 +1859,7 @@ static struct genl_family gtp_genl_family __ro_after_init = { .module = THIS_MODULE, .small_ops = gtp_genl_ops, .n_small_ops = ARRAY_SIZE(gtp_genl_ops), + .resv_start_op = GTP_CMD_ECHOREQ + 1, .mcgrps = gtp_genl_mcgrps, .n_mcgrps = ARRAY_SIZE(gtp_genl_mcgrps), }; diff --git a/drivers/net/ieee802154/mac802154_hwsim.c b/drivers/net/ieee802154/mac802154_hwsim.c index 38c217bd7c82..2f0544dd7c2a 100644 --- a/drivers/net/ieee802154/mac802154_hwsim.c +++ b/drivers/net/ieee802154/mac802154_hwsim.c @@ -630,6 +630,7 @@ static struct genl_family hwsim_genl_family __ro_after_init = { .module = THIS_MODULE, .small_ops = hwsim_nl_ops, .n_small_ops = ARRAY_SIZE(hwsim_nl_ops), + .resv_start_op = MAC802154_HWSIM_CMD_NEW_EDGE + 1, .mcgrps = hwsim_mcgrps, .n_mcgrps = ARRAY_SIZE(hwsim_mcgrps), }; diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index c6d271e5687e..adf448a8162b 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -3404,6 +3404,7 @@ static struct genl_family macsec_fam __ro_after_init = { .module = THIS_MODULE, .small_ops = macsec_genl_ops, .n_small_ops = ARRAY_SIZE(macsec_genl_ops), + .resv_start_op = MACSEC_CMD_UPD_OFFLOAD + 1, }; static netdev_tx_t macsec_start_xmit(struct sk_buff *skb, diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index aac133a1e27a..b1e1239dfade 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2840,6 +2840,7 @@ static struct genl_family team_nl_family __ro_after_init = { .module = THIS_MODULE, .small_ops = team_nl_ops, .n_small_ops = ARRAY_SIZE(team_nl_ops), + .resv_start_op = TEAM_CMD_PORT_LIST_GET + 1, .mcgrps = team_nl_mcgrps, .n_mcgrps = ARRAY_SIZE(team_nl_mcgrps), }; diff --git a/drivers/net/wireguard/netlink.c b/drivers/net/wireguard/netlink.c index d0f3b6d7f408..0c0644e762e5 100644 --- a/drivers/net/wireguard/netlink.c +++ b/drivers/net/wireguard/netlink.c @@ -621,6 +621,7 @@ static const struct genl_ops genl_ops[] = { static struct genl_family genl_family __ro_after_init = { .ops = genl_ops, .n_ops = ARRAY_SIZE(genl_ops), + .resv_start_op = WG_CMD_SET_DEVICE + 1, .name = WG_GENL_NAME, .version = WG_GENL_VERSION, .maxattr = WGDEVICE_A_MAX, diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 4fb8f68e5c3b..d9054104725e 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -5288,6 +5288,7 @@ static struct genl_family hwsim_genl_family __ro_after_init = { .module = THIS_MODULE, .small_ops = hwsim_ops, .n_small_ops = ARRAY_SIZE(hwsim_ops), + .resv_start_op = HWSIM_CMD_DEL_MAC_ADDR + 1, .mcgrps = hwsim_mcgrps, .n_mcgrps = ARRAY_SIZE(hwsim_mcgrps), }; diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 3deaeecb712e..2940559c3086 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -486,6 +486,7 @@ static struct genl_family tcmu_genl_family __ro_after_init = { .netnsok = true, .small_ops = tcmu_genl_ops, .n_small_ops = ARRAY_SIZE(tcmu_genl_ops), + .resv_start_op = TCMU_CMD_SET_FEATURES + 1, }; #define tcmu_cmd_set_dbi_cur(cmd, index) ((cmd)->dbi_cur = (index)) diff --git a/drivers/thermal/thermal_netlink.c b/drivers/thermal/thermal_netlink.c index 050d243a5fa1..e2d78a996b5f 100644 --- a/drivers/thermal/thermal_netlink.c +++ b/drivers/thermal/thermal_netlink.c @@ -693,6 +693,7 @@ static struct genl_family thermal_gnl_family __ro_after_init = { .policy = thermal_genl_policy, .small_ops = thermal_genl_ops, .n_small_ops = ARRAY_SIZE(thermal_genl_ops), + .resv_start_op = THERMAL_GENL_CMD_CDEV_GET + 1, .mcgrps = thermal_genl_mcgrps, .n_mcgrps = ARRAY_SIZE(thermal_genl_mcgrps), }; diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index c06c02704461..7badf5777597 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -1183,6 +1183,7 @@ static struct genl_family vdpa_nl_family __ro_after_init = { .module = THIS_MODULE, .ops = vdpa_nl_ops, .n_ops = ARRAY_SIZE(vdpa_nl_ops), + .resv_start_op = VDPA_CMD_DEV_VSTATS_GET + 1, }; static int vdpa_init(void) diff --git a/fs/cifs/netlink.c b/fs/cifs/netlink.c index 291cb606f149..147d9409252c 100644 --- a/fs/cifs/netlink.c +++ b/fs/cifs/netlink.c @@ -51,6 +51,7 @@ struct genl_family cifs_genl_family = { .policy = cifs_genl_policy, .ops = cifs_genl_ops, .n_ops = ARRAY_SIZE(cifs_genl_ops), + .resv_start_op = CIFS_GENL_CMD_SWN_NOTIFY + 1, .mcgrps = cifs_genl_mcgrps, .n_mcgrps = ARRAY_SIZE(cifs_genl_mcgrps), }; diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c index 67f68d48d60c..4de4b8651c6c 100644 --- a/fs/dlm/netlink.c +++ b/fs/dlm/netlink.c @@ -75,6 +75,7 @@ static struct genl_family family __ro_after_init = { .version = DLM_GENL_VERSION, .small_ops = dlm_nl_ops, .n_small_ops = ARRAY_SIZE(dlm_nl_ops), + .resv_start_op = DLM_CMD_HELLO + 1, .module = THIS_MODULE, }; diff --git a/fs/ksmbd/transport_ipc.c b/fs/ksmbd/transport_ipc.c index 7cb0eeb07c80..c9aca21637d5 100644 --- a/fs/ksmbd/transport_ipc.c +++ b/fs/ksmbd/transport_ipc.c @@ -197,6 +197,7 @@ static struct genl_family ksmbd_genl_family = { .module = THIS_MODULE, .ops = ksmbd_genl_ops, .n_ops = ARRAY_SIZE(ksmbd_genl_ops), + .resv_start_op = KSMBD_EVENT_SPNEGO_AUTHEN_RESPONSE + 1, }; static void ksmbd_nl_init_fixup(void) diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index 939b1a8f571b..4a4b387181ad 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -294,6 +294,7 @@ static struct genl_family ZZZ_genl_family __ro_after_init = { .ops = ZZZ_genl_ops, .n_ops = ARRAY_SIZE(ZZZ_genl_ops), .mcgrps = ZZZ_genl_mcgrps, + .resv_start_op = 42, /* drbd is currently the only user */ .n_mcgrps = ARRAY_SIZE(ZZZ_genl_mcgrps), .module = THIS_MODULE, }; diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 56a50e1c51b9..a4827b5e1e07 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -39,6 +39,8 @@ struct genl_info; * undo operations done by pre_doit, for example release locks * @mcgrps: multicast groups used by this family * @n_mcgrps: number of multicast groups + * @resv_start_op: first operation for which reserved fields of the header + * can be validated, new families should leave this field at zero * @mcgrp_offset: starting number of multicast group IDs in this family * (private) * @ops: the operations supported by this family @@ -58,6 +60,7 @@ struct genl_family { u8 n_ops; u8 n_small_ops; u8 n_mcgrps; + u8 resv_start_op; const struct nla_policy *policy; int (*pre_doit)(const struct genl_ops *ops, struct sk_buff *skb, diff --git a/kernel/taskstats.c b/kernel/taskstats.c index f7e246336218..8ce3fa0c19e2 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -688,6 +688,7 @@ static struct genl_family family __ro_after_init = { .module = THIS_MODULE, .ops = taskstats_ops, .n_ops = ARRAY_SIZE(taskstats_ops), + .resv_start_op = CGROUPSTATS_CMD_GET + 1, .netnsok = true, }; diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 00875e1d8c44..a5e4a4e976cf 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -1493,6 +1493,7 @@ struct genl_family batadv_netlink_family __ro_after_init = { .module = THIS_MODULE, .small_ops = batadv_netlink_ops, .n_small_ops = ARRAY_SIZE(batadv_netlink_ops), + .resv_start_op = BATADV_CMD_SET_VLAN + 1, .mcgrps = batadv_netlink_mcgrps, .n_mcgrps = ARRAY_SIZE(batadv_netlink_mcgrps), }; diff --git a/net/core/devlink.c b/net/core/devlink.c index 2afbeb6eca67..3396fdf802b6 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -9610,6 +9610,7 @@ static struct genl_family devlink_nl_family __ro_after_init = { .module = THIS_MODULE, .small_ops = devlink_nl_ops, .n_small_ops = ARRAY_SIZE(devlink_nl_ops), + .resv_start_op = DEVLINK_CMD_SELFTESTS_RUN + 1, .mcgrps = devlink_nl_mcgrps, .n_mcgrps = ARRAY_SIZE(devlink_nl_mcgrps), }; diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 876664fc605e..f084a4a6b7ab 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -1645,6 +1645,7 @@ static struct genl_family net_drop_monitor_family __ro_after_init = { .module = THIS_MODULE, .small_ops = dropmon_ops, .n_small_ops = ARRAY_SIZE(dropmon_ops), + .resv_start_op = NET_DM_CMD_STATS_GET + 1, .mcgrps = dropmon_mcgrps, .n_mcgrps = ARRAY_SIZE(dropmon_mcgrps), }; diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index e26079e11835..d5e77f1cbbfa 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -1033,6 +1033,7 @@ static struct genl_family ethtool_genl_family __ro_after_init = { .parallel_ops = true, .ops = ethtool_genl_ops, .n_ops = ARRAY_SIZE(ethtool_genl_ops), + .resv_start_op = ETHTOOL_MSG_MODULE_GET + 1, .mcgrps = ethtool_nl_mcgrps, .n_mcgrps = ARRAY_SIZE(ethtool_nl_mcgrps), }; diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index 1405c037cf7a..7174a9092900 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -522,6 +522,7 @@ static struct genl_family hsr_genl_family __ro_after_init = { .module = THIS_MODULE, .small_ops = hsr_ops, .n_small_ops = ARRAY_SIZE(hsr_ops), + .resv_start_op = HSR_C_SET_NODE_LIST + 1, .mcgrps = hsr_mcgrps, .n_mcgrps = ARRAY_SIZE(hsr_mcgrps), }; diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index b07abc38b4b3..7d2de4ee6992 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -132,6 +132,7 @@ struct genl_family nl802154_family __ro_after_init = { .module = THIS_MODULE, .small_ops = ieee802154_ops, .n_small_ops = ARRAY_SIZE(ieee802154_ops), + .resv_start_op = IEEE802154_LLSEC_DEL_SECLEVEL + 1, .mcgrps = ieee802154_mcgrps, .n_mcgrps = ARRAY_SIZE(ieee802154_mcgrps), }; diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index e0b072aecf0f..38c4f3cb010e 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -2500,6 +2500,7 @@ static struct genl_family nl802154_fam __ro_after_init = { .module = THIS_MODULE, .ops = nl802154_ops, .n_ops = ARRAY_SIZE(nl802154_ops), + .resv_start_op = NL802154_CMD_DEL_SEC_LEVEL + 1, .mcgrps = nl802154_mcgrps, .n_mcgrps = ARRAY_SIZE(nl802154_mcgrps), }; diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index cb5bfb77944c..0c3c6d0cee29 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -928,6 +928,7 @@ static struct genl_family fou_nl_family __ro_after_init = { .module = THIS_MODULE, .small_ops = fou_nl_ops, .n_small_ops = ARRAY_SIZE(fou_nl_ops), + .resv_start_op = FOU_CMD_GET + 1, }; size_t fou_encap_hlen(struct ip_tunnel_encap *e) diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index d58e672be31c..82f4575f9cd9 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -969,6 +969,7 @@ static struct genl_family tcp_metrics_nl_family __ro_after_init = { .module = THIS_MODULE, .small_ops = tcp_metrics_nl_ops, .n_small_ops = ARRAY_SIZE(tcp_metrics_nl_ops), + .resv_start_op = TCP_METRICS_CMD_DEL + 1, }; static unsigned int tcpmhash_entries; diff --git a/net/ipv6/ila/ila_main.c b/net/ipv6/ila/ila_main.c index 36c58aa257e8..3faf62530d6a 100644 --- a/net/ipv6/ila/ila_main.c +++ b/net/ipv6/ila/ila_main.c @@ -55,6 +55,7 @@ struct genl_family ila_nl_family __ro_after_init = { .module = THIS_MODULE, .ops = ila_nl_ops, .n_ops = ARRAY_SIZE(ila_nl_ops), + .resv_start_op = ILA_CMD_FLUSH + 1, }; static __net_init int ila_init_net(struct net *net) diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c index 1098131ed90c..571f0e4d9cf3 100644 --- a/net/ipv6/ioam6.c +++ b/net/ipv6/ioam6.c @@ -619,6 +619,7 @@ static struct genl_family ioam6_genl_family __ro_after_init = { .parallel_ops = true, .ops = ioam6_genl_ops, .n_ops = ARRAY_SIZE(ioam6_genl_ops), + .resv_start_op = IOAM6_CMD_NS_SET_SCHEMA + 1, .module = THIS_MODULE, }; diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index 73aaabf0e966..5421cc7c935f 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -499,6 +499,7 @@ static struct genl_family seg6_genl_family __ro_after_init = { .parallel_ops = true, .ops = seg6_genl_ops, .n_ops = ARRAY_SIZE(seg6_genl_ops), + .resv_start_op = SEG6_CMD_GET_TUNSRC + 1, .module = THIS_MODULE, }; diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 96eb91be9238..a901fd14fe3b 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -989,6 +989,7 @@ static struct genl_family l2tp_nl_family __ro_after_init = { .module = THIS_MODULE, .small_ops = l2tp_nl_ops, .n_small_ops = ARRAY_SIZE(l2tp_nl_ops), + .resv_start_op = L2TP_CMD_SESSION_GET + 1, .mcgrps = l2tp_multicast_group, .n_mcgrps = ARRAY_SIZE(l2tp_multicast_group), }; diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 291b5da42fdb..a3e4ee7af0ee 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -2280,6 +2280,7 @@ static struct genl_family mptcp_genl_family __ro_after_init = { .module = THIS_MODULE, .small_ops = mptcp_pm_ops, .n_small_ops = ARRAY_SIZE(mptcp_pm_ops), + .resv_start_op = MPTCP_PM_CMD_SUBFLOW_DESTROY + 1, .mcgrps = mptcp_pm_mcgrps, .n_mcgrps = ARRAY_SIZE(mptcp_pm_mcgrps), }; diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c index c189b4c8a182..d27f4eccce6d 100644 --- a/net/ncsi/ncsi-netlink.c +++ b/net/ncsi/ncsi-netlink.c @@ -768,6 +768,7 @@ static struct genl_family ncsi_genl_family __ro_after_init = { .module = THIS_MODULE, .small_ops = ncsi_ops, .n_small_ops = ARRAY_SIZE(ncsi_ops), + .resv_start_op = NCSI_CMD_SET_CHANNEL_MASK + 1, }; static int __init ncsi_init_netlink(void) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index efab2b06d373..818b0b058b10 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -4005,6 +4005,7 @@ static struct genl_family ip_vs_genl_family __ro_after_init = { .module = THIS_MODULE, .small_ops = ip_vs_genl_ops, .n_small_ops = ARRAY_SIZE(ip_vs_genl_ops), + .resv_start_op = IPVS_CMD_FLUSH + 1, }; static int __init ip_vs_genl_register(void) diff --git a/net/netlabel/netlabel_calipso.c b/net/netlabel/netlabel_calipso.c index 91a19c3ea1a3..f1d5b8465217 100644 --- a/net/netlabel/netlabel_calipso.c +++ b/net/netlabel/netlabel_calipso.c @@ -344,6 +344,7 @@ static struct genl_family netlbl_calipso_gnl_family __ro_after_init = { .module = THIS_MODULE, .small_ops = netlbl_calipso_ops, .n_small_ops = ARRAY_SIZE(netlbl_calipso_ops), + .resv_start_op = NLBL_CALIPSO_C_LISTALL + 1, }; /* NetLabel Generic NETLINK Protocol Functions diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index 894e6b8f1a86..fa08ee75ac06 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -767,6 +767,7 @@ static struct genl_family netlbl_cipsov4_gnl_family __ro_after_init = { .module = THIS_MODULE, .small_ops = netlbl_cipsov4_ops, .n_small_ops = ARRAY_SIZE(netlbl_cipsov4_ops), + .resv_start_op = NLBL_CIPSOV4_C_LISTALL + 1, }; /* diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index 032b7d7b32c7..689eaa2afbec 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -826,6 +826,7 @@ static struct genl_family netlbl_mgmt_gnl_family __ro_after_init = { .module = THIS_MODULE, .small_ops = netlbl_mgmt_genl_ops, .n_small_ops = ARRAY_SIZE(netlbl_mgmt_genl_ops), + .resv_start_op = NLBL_MGMT_C_VERSION + 1, }; /* diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 0555dffd80e0..9996883bf2b7 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1374,6 +1374,7 @@ static struct genl_family netlbl_unlabel_gnl_family __ro_after_init = { .module = THIS_MODULE, .small_ops = netlbl_unlabel_genl_ops, .n_small_ops = ARRAY_SIZE(netlbl_unlabel_genl_ops), + .resv_start_op = NLBL_UNLABEL_C_STATICLISTDEF + 1, }; /* diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 76aed0571e3a..7c136de117eb 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -757,6 +757,9 @@ static int genl_family_rcv_msg(const struct genl_family *family, if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) return -EINVAL; + if (hdr->cmd >= family->resv_start_op && hdr->reserved) + return -EINVAL; + if (genl_get_cmd(hdr->cmd, family, &op)) return -EOPNOTSUPP; @@ -1348,6 +1351,7 @@ static struct genl_family genl_ctrl __ro_after_init = { .module = THIS_MODULE, .ops = genl_ctrl_ops, .n_ops = ARRAY_SIZE(genl_ctrl_ops), + .resv_start_op = CTRL_CMD_GETPOLICY + 1, .mcgrps = genl_ctrl_groups, .n_mcgrps = ARRAY_SIZE(genl_ctrl_groups), .id = GENL_ID_CTRL, diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 7c62417ccfd7..9d91087b9399 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -1783,6 +1783,7 @@ static struct genl_family nfc_genl_family __ro_after_init = { .module = THIS_MODULE, .ops = nfc_genl_ops, .n_ops = ARRAY_SIZE(nfc_genl_ops), + .resv_start_op = NFC_CMD_DEACTIVATE_TARGET + 1, .mcgrps = nfc_genl_mcgrps, .n_mcgrps = ARRAY_SIZE(nfc_genl_mcgrps), }; diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 4e70df91d0f2..48e8f5c29b67 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -2283,6 +2283,7 @@ struct genl_family dp_ct_limit_genl_family __ro_after_init = { .parallel_ops = true, .small_ops = ct_limit_genl_ops, .n_small_ops = ARRAY_SIZE(ct_limit_genl_ops), + .resv_start_op = OVS_CT_LIMIT_CMD_GET + 1, .mcgrps = &ovs_ct_limit_multicast_group, .n_mcgrps = 1, .module = THIS_MODULE, diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index e4667700336b..8f49e2359a1b 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -692,6 +692,7 @@ static struct genl_family dp_packet_genl_family __ro_after_init = { .parallel_ops = true, .small_ops = dp_packet_genl_ops, .n_small_ops = ARRAY_SIZE(dp_packet_genl_ops), + .resv_start_op = OVS_PACKET_CMD_EXECUTE + 1, .module = THIS_MODULE, }; @@ -1509,6 +1510,7 @@ static struct genl_family dp_flow_genl_family __ro_after_init = { .parallel_ops = true, .small_ops = dp_flow_genl_ops, .n_small_ops = ARRAY_SIZE(dp_flow_genl_ops), + .resv_start_op = OVS_FLOW_CMD_SET + 1, .mcgrps = &ovs_dp_flow_multicast_group, .n_mcgrps = 1, .module = THIS_MODULE, @@ -2051,6 +2053,7 @@ static struct genl_family dp_datapath_genl_family __ro_after_init = { .parallel_ops = true, .small_ops = dp_datapath_genl_ops, .n_small_ops = ARRAY_SIZE(dp_datapath_genl_ops), + .resv_start_op = OVS_DP_CMD_SET + 1, .mcgrps = &ovs_dp_datapath_multicast_group, .n_mcgrps = 1, .module = THIS_MODULE, diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c index 04a060ac7fdf..51111a9009bd 100644 --- a/net/openvswitch/meter.c +++ b/net/openvswitch/meter.c @@ -720,6 +720,7 @@ struct genl_family dp_meter_genl_family __ro_after_init = { .parallel_ops = true, .small_ops = dp_meter_genl_ops, .n_small_ops = ARRAY_SIZE(dp_meter_genl_ops), + .resv_start_op = OVS_METER_CMD_GET + 1, .mcgrps = &ovs_meter_multicast_group, .n_mcgrps = 1, .module = THIS_MODULE, diff --git a/net/psample/psample.c b/net/psample/psample.c index 118d5d2a81a0..81a794e36f53 100644 --- a/net/psample/psample.c +++ b/net/psample/psample.c @@ -115,6 +115,7 @@ static struct genl_family psample_nl_family __ro_after_init = { .mcgrps = psample_nl_mcgrps, .small_ops = psample_nl_ops, .n_small_ops = ARRAY_SIZE(psample_nl_ops), + .resv_start_op = PSAMPLE_CMD_GET_GROUP + 1, .n_mcgrps = ARRAY_SIZE(psample_nl_mcgrps), }; diff --git a/net/smc/smc_netlink.c b/net/smc/smc_netlink.c index c5a62f6f52ba..621c46c70073 100644 --- a/net/smc/smc_netlink.c +++ b/net/smc/smc_netlink.c @@ -142,7 +142,8 @@ struct genl_family smc_gen_nl_family __ro_after_init = { .netnsok = true, .module = THIS_MODULE, .ops = smc_gen_nl_ops, - .n_ops = ARRAY_SIZE(smc_gen_nl_ops) + .n_ops = ARRAY_SIZE(smc_gen_nl_ops), + .resv_start_op = SMC_NETLINK_DISABLE_HS_LIMITATION + 1, }; int __init smc_nl_init(void) diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 4c3bf6db7038..25fb2fd186e2 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -715,7 +715,8 @@ static struct genl_family smc_pnet_nl_family __ro_after_init = { .netnsok = true, .module = THIS_MODULE, .ops = smc_pnet_ops, - .n_ops = ARRAY_SIZE(smc_pnet_ops) + .n_ops = ARRAY_SIZE(smc_pnet_ops), + .resv_start_op = SMC_PNETID_FLUSH + 1, }; bool smc_pnet_is_ndev_pnetid(struct net *net, u8 *pnetid) diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index c447cb5f879e..e8fd257c0e68 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -294,6 +294,7 @@ struct genl_family tipc_genl_family __ro_after_init = { .module = THIS_MODULE, .ops = tipc_genl_v2_ops, .n_ops = ARRAY_SIZE(tipc_genl_v2_ops), + .resv_start_op = TIPC_NL_ADDR_LEGACY_GET + 1, }; int __init tipc_netlink_start(void) diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 0749df80454d..fc68733673ba 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -1357,6 +1357,7 @@ static struct genl_family tipc_genl_compat_family __ro_after_init = { .module = THIS_MODULE, .small_ops = tipc_genl_compat_ops, .n_small_ops = ARRAY_SIZE(tipc_genl_compat_ops), + .resv_start_op = TIPC_GENL_CMD + 1, }; int __init tipc_netlink_compat_start(void) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index dad88d231d56..e0087176796c 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -17237,6 +17237,7 @@ static struct genl_family nl80211_fam __ro_after_init = { .n_ops = ARRAY_SIZE(nl80211_ops), .small_ops = nl80211_small_ops, .n_small_ops = ARRAY_SIZE(nl80211_small_ops), + .resv_start_op = NL80211_CMD_REMOVE_LINK_STA + 1, .mcgrps = nl80211_mcgrps, .n_mcgrps = ARRAY_SIZE(nl80211_mcgrps), .parallel_ops = true, -- cgit v1.2.3-70-g09d2 From 45dca15759643806660e9285e6af8a1ba3c76c82 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 25 Aug 2022 20:09:32 -0700 Subject: netlink: add helpers for extack attr presence checking Being able to check attribute presence and set extack if not on one line is handy, add helpers. Reviewed-by: Johannes Berg Signed-off-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- include/linux/netlink.h | 11 +++++++++++ include/net/genetlink.h | 7 +++++++ 2 files changed, 18 insertions(+) (limited to 'include/net') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 1619221c415c..d51e041d2242 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -139,6 +139,17 @@ struct netlink_ext_ack { } \ } while (0) +#define NL_REQ_ATTR_CHECK(extack, nest, tb, type) ({ \ + struct nlattr **__tb = (tb); \ + u32 __attr = (type); \ + int __retval; \ + \ + __retval = !__tb[__attr]; \ + if (__retval) \ + NL_SET_ERR_ATTR_MISS((extack), (nest), __attr); \ + __retval; \ +}) + static inline void nl_set_extack_cookie_u64(struct netlink_ext_ack *extack, u64 cookie) { diff --git a/include/net/genetlink.h b/include/net/genetlink.h index a4827b5e1e07..8f780170e2f8 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -110,6 +110,13 @@ static inline void genl_info_net_set(struct genl_info *info, struct net *net) #define GENL_SET_ERR_MSG(info, msg) NL_SET_ERR_MSG((info)->extack, msg) +/* Report that a root attribute is missing */ +#define GENL_REQ_ATTR_CHECK(info, attr) ({ \ + struct genl_info *__info = (info); \ + \ + NL_REQ_ATTR_CHECK(__info->extack, NULL, __info->attrs, (attr)); \ +}) + enum genl_validate_flags { GENL_DONT_VALIDATE_STRICT = BIT(0), GENL_DONT_VALIDATE_DUMP = BIT(1), -- cgit v1.2.3-70-g09d2 From 146ecbac1d327e7ed2153cfb3ef880166dc2b312 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 26 Aug 2022 10:27:30 +0200 Subject: net: devlink: stub port params cmds for they are unused internally Follow-up the removal of unused internal api of port params made by commit 42ded61aa75e ("devlink: Delete not used port parameters APIs") and stub the commands and add extack message to tell the user what is going on. If later on port params are needed, could be easily re-introduced, but until then it is a dead code. Signed-off-by: Jiri Pirko Reviewed-by: Jakub Kicinski Link: https://lore.kernel.org/r/20220826082730.1399735-1-jiri@resnulli.us Signed-off-by: Paolo Abeni --- include/net/devlink.h | 1 - net/core/devlink.c | 78 ++++----------------------------------------------- 2 files changed, 5 insertions(+), 74 deletions(-) (limited to 'include/net') diff --git a/include/net/devlink.h b/include/net/devlink.h index 1f7026011856..264aa98e6da6 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -118,7 +118,6 @@ struct devlink_rate { struct devlink_port { struct list_head list; - struct list_head param_list; struct list_head region_list; struct devlink *devlink; unsigned int index; diff --git a/net/core/devlink.c b/net/core/devlink.c index 95fd20ef5862..97a81b0a8ab4 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -5635,89 +5635,22 @@ static int devlink_nl_cmd_param_set_doit(struct sk_buff *skb, static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb) { - struct devlink_param_item *param_item; - struct devlink_port *devlink_port; - struct devlink *devlink; - int start = cb->args[0]; - unsigned long index; - int idx = 0; - int err = 0; - - devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { - devl_lock(devlink); - list_for_each_entry(devlink_port, &devlink->port_list, list) { - list_for_each_entry(param_item, - &devlink_port->param_list, list) { - if (idx < start) { - idx++; - continue; - } - err = devlink_nl_param_fill(msg, - devlink_port->devlink, - devlink_port->index, param_item, - DEVLINK_CMD_PORT_PARAM_GET, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - NLM_F_MULTI); - if (err == -EOPNOTSUPP) { - err = 0; - } else if (err) { - devl_unlock(devlink); - devlink_put(devlink); - goto out; - } - idx++; - } - } - devl_unlock(devlink); - devlink_put(devlink); - } -out: - if (err != -EMSGSIZE) - return err; - - cb->args[0] = idx; + NL_SET_ERR_MSG_MOD(cb->extack, "Port params are not supported"); return msg->len; } static int devlink_nl_cmd_port_param_get_doit(struct sk_buff *skb, struct genl_info *info) { - struct devlink_port *devlink_port = info->user_ptr[1]; - struct devlink_param_item *param_item; - struct sk_buff *msg; - int err; - - param_item = devlink_param_get_from_info(&devlink_port->param_list, - info); - if (!param_item) - return -EINVAL; - - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) - return -ENOMEM; - - err = devlink_nl_param_fill(msg, devlink_port->devlink, - devlink_port->index, param_item, - DEVLINK_CMD_PORT_PARAM_GET, - info->snd_portid, info->snd_seq, 0); - if (err) { - nlmsg_free(msg); - return err; - } - - return genlmsg_reply(msg, info); + NL_SET_ERR_MSG_MOD(info->extack, "Port params are not supported"); + return -EINVAL; } static int devlink_nl_cmd_port_param_set_doit(struct sk_buff *skb, struct genl_info *info) { - struct devlink_port *devlink_port = info->user_ptr[1]; - - return __devlink_nl_cmd_param_set_doit(devlink_port->devlink, - devlink_port->index, - &devlink_port->param_list, info, - DEVLINK_CMD_PORT_PARAM_NEW); + NL_SET_ERR_MSG_MOD(info->extack, "Port params are not supported"); + return -EINVAL; } static int devlink_nl_region_snapshot_id_put(struct sk_buff *msg, @@ -9943,7 +9876,6 @@ int devl_port_register(struct devlink *devlink, INIT_LIST_HEAD(&devlink_port->reporter_list); mutex_init(&devlink_port->reporters_lock); list_add_tail(&devlink_port->list, &devlink->port_list); - INIT_LIST_HEAD(&devlink_port->param_list); INIT_LIST_HEAD(&devlink_port->region_list); INIT_DELAYED_WORK(&devlink_port->type_warn_dw, &devlink_port_type_warn); -- cgit v1.2.3-70-g09d2 From 1a942de092c0b96216864fedcb4d8822ce3fc12e Mon Sep 17 00:00:00 2001 From: Brian Gix Date: Tue, 16 Aug 2022 09:41:20 -0700 Subject: Bluetooth: Move hci_abort_conn to hci_conn.c hci_abort_conn() is a wrapper around a number of DISCONNECT and CREATE_CONN_CANCEL commands that was being invoked from hci_request request queues, which are now deprecated. There are two versions: hci_abort_conn() which can be invoked from the hci_event thread, and hci_abort_conn_sync() which can be invoked within a hci_sync cmd chain. Signed-off-by: Brian Gix Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_conn.c | 76 ++++++++++++++++++++++++++++++++ net/bluetooth/hci_request.c | 93 ---------------------------------------- net/bluetooth/hci_request.h | 1 - net/bluetooth/mgmt.c | 15 ++++++- 5 files changed, 91 insertions(+), 95 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index e7862903187d..932153e68864 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -2075,6 +2075,7 @@ int mgmt_phy_configuration_changed(struct hci_dev *hdev, struct sock *skip); void mgmt_adv_monitor_device_lost(struct hci_dev *hdev, u16 handle, bdaddr_t *bdaddr, u8 addr_type); +int hci_abort_conn(struct hci_conn *conn, u8 reason); u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency, u16 to_multiplier); void hci_le_start_enc(struct hci_conn *conn, __le16 ediv, __le64 rand, diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 337e74d0f8b1..7a59c4487050 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -2760,3 +2760,79 @@ u32 hci_conn_get_phy(struct hci_conn *conn) return phys; } + +int hci_abort_conn(struct hci_conn *conn, u8 reason) +{ + int r = 0; + + switch (conn->state) { + case BT_CONNECTED: + case BT_CONFIG: + if (conn->type == AMP_LINK) { + struct hci_cp_disconn_phy_link cp; + + cp.phy_handle = HCI_PHY_HANDLE(conn->handle); + cp.reason = reason; + r = hci_send_cmd(conn->hdev, HCI_OP_DISCONN_PHY_LINK, + sizeof(cp), &cp); + } else { + struct hci_cp_disconnect dc; + + dc.handle = cpu_to_le16(conn->handle); + dc.reason = reason; + r = hci_send_cmd(conn->hdev, HCI_OP_DISCONNECT, + sizeof(dc), &dc); + } + + conn->state = BT_DISCONN; + + break; + case BT_CONNECT: + if (conn->type == LE_LINK) { + if (test_bit(HCI_CONN_SCANNING, &conn->flags)) + break; + r = hci_send_cmd(conn->hdev, + HCI_OP_LE_CREATE_CONN_CANCEL, 0, NULL); + } else if (conn->type == ACL_LINK) { + if (conn->hdev->hci_ver < BLUETOOTH_VER_1_2) + break; + r = hci_send_cmd(conn->hdev, + HCI_OP_CREATE_CONN_CANCEL, + 6, &conn->dst); + } + break; + case BT_CONNECT2: + if (conn->type == ACL_LINK) { + struct hci_cp_reject_conn_req rej; + + bacpy(&rej.bdaddr, &conn->dst); + rej.reason = reason; + + r = hci_send_cmd(conn->hdev, + HCI_OP_REJECT_CONN_REQ, + sizeof(rej), &rej); + } else if (conn->type == SCO_LINK || conn->type == ESCO_LINK) { + struct hci_cp_reject_sync_conn_req rej; + + bacpy(&rej.bdaddr, &conn->dst); + + /* SCO rejection has its own limited set of + * allowed error values (0x0D-0x0F) which isn't + * compatible with most values passed to this + * function. To be safe hard-code one of the + * values that's suitable for SCO. + */ + rej.reason = HCI_ERROR_REJ_LIMITED_RESOURCES; + + r = hci_send_cmd(conn->hdev, + HCI_OP_REJECT_SYNC_CONN_REQ, + sizeof(rej), &rej); + } + break; + default: + conn->state = BT_CLOSED; + break; + } + + return r; +} diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 2e19a271d7a1..5a0296a4352e 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -909,99 +909,6 @@ static void set_random_addr(struct hci_request *req, bdaddr_t *rpa) hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, rpa); } -static void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn, - u8 reason) -{ - switch (conn->state) { - case BT_CONNECTED: - case BT_CONFIG: - if (conn->type == AMP_LINK) { - struct hci_cp_disconn_phy_link cp; - - cp.phy_handle = HCI_PHY_HANDLE(conn->handle); - cp.reason = reason; - hci_req_add(req, HCI_OP_DISCONN_PHY_LINK, sizeof(cp), - &cp); - } else { - struct hci_cp_disconnect dc; - - dc.handle = cpu_to_le16(conn->handle); - dc.reason = reason; - hci_req_add(req, HCI_OP_DISCONNECT, sizeof(dc), &dc); - } - - conn->state = BT_DISCONN; - - break; - case BT_CONNECT: - if (conn->type == LE_LINK) { - if (test_bit(HCI_CONN_SCANNING, &conn->flags)) - break; - hci_req_add(req, HCI_OP_LE_CREATE_CONN_CANCEL, - 0, NULL); - } else if (conn->type == ACL_LINK) { - if (req->hdev->hci_ver < BLUETOOTH_VER_1_2) - break; - hci_req_add(req, HCI_OP_CREATE_CONN_CANCEL, - 6, &conn->dst); - } - break; - case BT_CONNECT2: - if (conn->type == ACL_LINK) { - struct hci_cp_reject_conn_req rej; - - bacpy(&rej.bdaddr, &conn->dst); - rej.reason = reason; - - hci_req_add(req, HCI_OP_REJECT_CONN_REQ, - sizeof(rej), &rej); - } else if (conn->type == SCO_LINK || conn->type == ESCO_LINK) { - struct hci_cp_reject_sync_conn_req rej; - - bacpy(&rej.bdaddr, &conn->dst); - - /* SCO rejection has its own limited set of - * allowed error values (0x0D-0x0F) which isn't - * compatible with most values passed to this - * function. To be safe hard-code one of the - * values that's suitable for SCO. - */ - rej.reason = HCI_ERROR_REJ_LIMITED_RESOURCES; - - hci_req_add(req, HCI_OP_REJECT_SYNC_CONN_REQ, - sizeof(rej), &rej); - } - break; - default: - conn->state = BT_CLOSED; - break; - } -} - -static void abort_conn_complete(struct hci_dev *hdev, u8 status, u16 opcode) -{ - if (status) - bt_dev_dbg(hdev, "Failed to abort connection: status 0x%2.2x", status); -} - -int hci_abort_conn(struct hci_conn *conn, u8 reason) -{ - struct hci_request req; - int err; - - hci_req_init(&req, conn->hdev); - - __hci_abort_conn(&req, conn, reason); - - err = hci_req_run(&req, abort_conn_complete); - if (err && err != -ENODATA) { - bt_dev_err(conn->hdev, "failed to run HCI request: err %d", err); - return err; - } - - return 0; -} - void hci_request_setup(struct hci_dev *hdev) { INIT_DELAYED_WORK(&hdev->interleave_scan, interleave_scan_work); diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 7e1de871fca4..b9c5a9823837 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -73,6 +73,5 @@ void hci_req_add_le_passive_scan(struct hci_request *req); void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next); -int hci_abort_conn(struct hci_conn *conn, u8 reason); void hci_request_setup(struct hci_dev *hdev); void hci_request_cancel_all(struct hci_dev *hdev); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 77e7aa63c0c0..8d70f4a709d4 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3185,6 +3185,18 @@ unlock: return err; } +static int abort_conn_sync(struct hci_dev *hdev, void *data) +{ + struct hci_conn *conn; + u16 handle = PTR_ERR(data); + + conn = hci_conn_hash_lookup_handle(hdev, handle); + if (!conn) + return 0; + + return hci_abort_conn_sync(hdev, conn, HCI_ERROR_REMOTE_USER_TERM); +} + static int cancel_pair_device(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { @@ -3235,7 +3247,8 @@ static int cancel_pair_device(struct sock *sk, struct hci_dev *hdev, void *data, le_addr_type(addr->type)); if (conn->conn_reason == CONN_REASON_PAIR_DEVICE) - hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM); + hci_cmd_sync_queue(hdev, abort_conn_sync, ERR_PTR(conn->handle), + NULL); unlock: hci_dev_unlock(hdev); -- cgit v1.2.3-70-g09d2 From 4516c873e3b55856012ddd6db9d4366ce3c60c5d Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Tue, 30 Aug 2022 17:22:55 +0800 Subject: net: sched: gred/red: remove unused variables in struct red_stats The variable "other" in the struct red_stats is not used. Remove it. Signed-off-by: Zhengchao Shao Signed-off-by: Jakub Kicinski --- include/net/red.h | 1 - net/sched/sch_gred.c | 3 --- net/sched/sch_red.c | 1 - 3 files changed, 5 deletions(-) (limited to 'include/net') diff --git a/include/net/red.h b/include/net/red.h index be11dbd26492..454ac2b65d8c 100644 --- a/include/net/red.h +++ b/include/net/red.h @@ -122,7 +122,6 @@ struct red_stats { u32 forced_drop; /* Forced drops, qavg > max_thresh */ u32 forced_mark; /* Forced marks, qavg > max_thresh */ u32 pdrop; /* Drops due to queue limits */ - u32 other; /* Drops due to drop() calls */ }; struct red_parms { diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 1073c76d05c4..e7af53f607bb 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -829,7 +829,6 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) opt.Wlog = q->parms.Wlog; opt.Plog = q->parms.Plog; opt.Scell_log = q->parms.Scell_log; - opt.other = q->stats.other; opt.early = q->stats.prob_drop; opt.forced = q->stats.forced_drop; opt.pdrop = q->stats.pdrop; @@ -895,8 +894,6 @@ append_opt: goto nla_put_failure; if (nla_put_u32(skb, TCA_GRED_VQ_STAT_PDROP, q->stats.pdrop)) goto nla_put_failure; - if (nla_put_u32(skb, TCA_GRED_VQ_STAT_OTHER, q->stats.other)) - goto nla_put_failure; nla_nest_end(skb, vq); } diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index f1e013e3f04a..f7ac40c0335e 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -461,7 +461,6 @@ static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d) } st.early = q->stats.prob_drop + q->stats.forced_drop; st.pdrop = q->stats.pdrop; - st.other = q->stats.other; st.marked = q->stats.prob_mark + q->stats.forced_mark; return gnet_stats_copy_app(d, &st, sizeof(st)); -- cgit v1.2.3-70-g09d2 From 8254393663f9b8cb8b84cdce1abb118833c22a54 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 1 Sep 2022 20:06:20 -0700 Subject: net: ieee802154: Fix compilation error when CONFIG_IEEE802154_NL802154_EXPERIMENTAL is disabled When CONFIG_IEEE802154_NL802154_EXPERIMENTAL is disabled, NL802154_CMD_DEL_SEC_LEVEL is undefined and results in a compilation error: net/ieee802154/nl802154.c:2503:19: error: 'NL802154_CMD_DEL_SEC_LEVEL' undeclared here (not in a function); did you mean 'NL802154_CMD_SET_CCA_ED_LEVEL'? 2503 | .resv_start_op = NL802154_CMD_DEL_SEC_LEVEL + 1, | ^~~~~~~~~~~~~~~~~~~~~~~~~~ | NL802154_CMD_SET_CCA_ED_LEVEL Unhide the experimental commands, having them defined in an enum makes no difference. Fixes: 9c5d03d36251 ("genetlink: start to validate reserved header bytes") Signed-off-by: Gal Pressman Acked-by: Stefan Schmidt Tested-by: Sudip Mukherjee Link: https://lore.kernel.org/r/20220902030620.2737091-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/net/nl802154.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/nl802154.h b/include/net/nl802154.h index 145acb8f2509..f5850b569c52 100644 --- a/include/net/nl802154.h +++ b/include/net/nl802154.h @@ -58,9 +58,6 @@ enum nl802154_commands { NL802154_CMD_SET_WPAN_PHY_NETNS, - /* add new commands above here */ - -#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL NL802154_CMD_SET_SEC_PARAMS, NL802154_CMD_GET_SEC_KEY, /* can dump */ NL802154_CMD_NEW_SEC_KEY, @@ -74,7 +71,8 @@ enum nl802154_commands { NL802154_CMD_GET_SEC_LEVEL, /* can dump */ NL802154_CMD_NEW_SEC_LEVEL, NL802154_CMD_DEL_SEC_LEVEL, -#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */ + + /* add new commands above here */ /* used to define NL802154_CMD_MAX below */ __NL802154_CMD_AFTER_LAST, -- cgit v1.2.3-70-g09d2 From 6dadbe4bac68309eb46ab0f30e8ff47a789df49a Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 1 Sep 2022 17:28:53 -0700 Subject: bpf: net: Change do_ipv6_getsockopt() to take the sockptr_t argument Similar to the earlier patch that changes sk_getsockopt() to take the sockptr_t argument . This patch also changes do_ipv6_getsockopt() to take the sockptr_t argument such that a latter patch can make bpf_getsockopt(SOL_IPV6) to reuse do_ipv6_getsockopt(). Note on the change in ip6_mc_msfget(). This function is to return an array of sockaddr_storage in optval. This function is shared between ipv6_get_msfilter() and compat_ipv6_get_msfilter(). However, the sockaddr_storage is stored at different offset of the optval because of the difference between group_filter and compat_group_filter. Thus, a new 'ss_offset' argument is added to ip6_mc_msfget(). Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20220902002853.2892532-1-kafai@fb.com Signed-off-by: Alexei Starovoitov --- include/linux/mroute6.h | 4 +-- include/net/ipv6.h | 2 +- net/ipv6/ip6mr.c | 10 +++---- net/ipv6/ipv6_sockglue.c | 69 ++++++++++++++++++++++++++---------------------- net/ipv6/mcast.c | 8 +++--- 5 files changed, 50 insertions(+), 43 deletions(-) (limited to 'include/net') diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h index bc351a85ce9b..8f2b307fb124 100644 --- a/include/linux/mroute6.h +++ b/include/linux/mroute6.h @@ -27,7 +27,7 @@ struct sock; #ifdef CONFIG_IPV6_MROUTE extern int ip6_mroute_setsockopt(struct sock *, int, sockptr_t, unsigned int); -extern int ip6_mroute_getsockopt(struct sock *, int, char __user *, int __user *); +extern int ip6_mroute_getsockopt(struct sock *, int, sockptr_t, sockptr_t); extern int ip6_mr_input(struct sk_buff *skb); extern int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg); extern int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg); @@ -42,7 +42,7 @@ static inline int ip6_mroute_setsockopt(struct sock *sock, int optname, static inline int ip6_mroute_getsockopt(struct sock *sock, - int optname, char __user *optval, int __user *optlen) + int optname, sockptr_t optval, sockptr_t optlen) { return -ENOPROTOOPT; } diff --git a/include/net/ipv6.h b/include/net/ipv6.h index c110d9032083..a4f24573ed7a 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1209,7 +1209,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, struct sockaddr_storage *list); int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, - struct sockaddr_storage __user *p); + sockptr_t optval, size_t ss_offset); #ifdef CONFIG_PROC_FS int ac6_proc_init(struct net *net); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index a9ba41648e36..516e83b52f26 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1827,8 +1827,8 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval, * Getsock opt support for the multicast routing system. */ -int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, - int __user *optlen) +int ip6_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval, + sockptr_t optlen) { int olr; int val; @@ -1859,16 +1859,16 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, return -ENOPROTOOPT; } - if (get_user(olr, optlen)) + if (copy_from_sockptr(&olr, optlen, sizeof(int))) return -EFAULT; olr = min_t(int, olr, sizeof(int)); if (olr < 0) return -EINVAL; - if (put_user(olr, optlen)) + if (copy_to_sockptr(optlen, &olr, sizeof(int))) return -EFAULT; - if (copy_to_user(optval, &val, olr)) + if (copy_to_sockptr(optval, &val, olr)) return -EFAULT; return 0; } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 4ab284a4adf8..4d9fadef2d3e 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -1030,7 +1030,7 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, EXPORT_SYMBOL(ipv6_setsockopt); static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt, - int optname, char __user *optval, int len) + int optname, sockptr_t optval, int len) { struct ipv6_opt_hdr *hdr; @@ -1058,45 +1058,44 @@ static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt, return 0; len = min_t(unsigned int, len, ipv6_optlen(hdr)); - if (copy_to_user(optval, hdr, len)) + if (copy_to_sockptr(optval, hdr, len)) return -EFAULT; return len; } -static int ipv6_get_msfilter(struct sock *sk, void __user *optval, - int __user *optlen, int len) +static int ipv6_get_msfilter(struct sock *sk, sockptr_t optval, + sockptr_t optlen, int len) { const int size0 = offsetof(struct group_filter, gf_slist_flex); - struct group_filter __user *p = optval; struct group_filter gsf; int num; int err; if (len < size0) return -EINVAL; - if (copy_from_user(&gsf, p, size0)) + if (copy_from_sockptr(&gsf, optval, size0)) return -EFAULT; if (gsf.gf_group.ss_family != AF_INET6) return -EADDRNOTAVAIL; num = gsf.gf_numsrc; lock_sock(sk); - err = ip6_mc_msfget(sk, &gsf, p->gf_slist_flex); + err = ip6_mc_msfget(sk, &gsf, optval, size0); if (!err) { if (num > gsf.gf_numsrc) num = gsf.gf_numsrc; - if (put_user(GROUP_FILTER_SIZE(num), optlen) || - copy_to_user(p, &gsf, size0)) + len = GROUP_FILTER_SIZE(num); + if (copy_to_sockptr(optlen, &len, sizeof(int)) || + copy_to_sockptr(optval, &gsf, size0)) err = -EFAULT; } release_sock(sk); return err; } -static int compat_ipv6_get_msfilter(struct sock *sk, void __user *optval, - int __user *optlen, int len) +static int compat_ipv6_get_msfilter(struct sock *sk, sockptr_t optval, + sockptr_t optlen, int len) { const int size0 = offsetof(struct compat_group_filter, gf_slist_flex); - struct compat_group_filter __user *p = optval; struct compat_group_filter gf32; struct group_filter gf; int err; @@ -1105,7 +1104,7 @@ static int compat_ipv6_get_msfilter(struct sock *sk, void __user *optval, if (len < size0) return -EINVAL; - if (copy_from_user(&gf32, p, size0)) + if (copy_from_sockptr(&gf32, optval, size0)) return -EFAULT; gf.gf_interface = gf32.gf_interface; gf.gf_fmode = gf32.gf_fmode; @@ -1116,22 +1115,24 @@ static int compat_ipv6_get_msfilter(struct sock *sk, void __user *optval, return -EADDRNOTAVAIL; lock_sock(sk); - err = ip6_mc_msfget(sk, &gf, p->gf_slist_flex); + err = ip6_mc_msfget(sk, &gf, optval, size0); release_sock(sk); if (err) return err; if (num > gf.gf_numsrc) num = gf.gf_numsrc; len = GROUP_FILTER_SIZE(num) - (sizeof(gf)-sizeof(gf32)); - if (put_user(len, optlen) || - put_user(gf.gf_fmode, &p->gf_fmode) || - put_user(gf.gf_numsrc, &p->gf_numsrc)) + if (copy_to_sockptr(optlen, &len, sizeof(int)) || + copy_to_sockptr_offset(optval, offsetof(struct compat_group_filter, gf_fmode), + &gf.gf_fmode, sizeof(gf32.gf_fmode)) || + copy_to_sockptr_offset(optval, offsetof(struct compat_group_filter, gf_numsrc), + &gf.gf_numsrc, sizeof(gf32.gf_numsrc))) return -EFAULT; return 0; } static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) + sockptr_t optval, sockptr_t optlen) { struct ipv6_pinfo *np = inet6_sk(sk); int len; @@ -1140,7 +1141,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, if (ip6_mroute_opt(optname)) return ip6_mroute_getsockopt(sk, optname, optval, optlen); - if (get_user(len, optlen)) + if (copy_from_sockptr(&len, optlen, sizeof(int))) return -EFAULT; switch (optname) { case IPV6_ADDRFORM: @@ -1164,10 +1165,15 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, if (sk->sk_type != SOCK_STREAM) return -ENOPROTOOPT; - msg.msg_control_user = optval; + if (optval.is_kernel) { + msg.msg_control_is_user = false; + msg.msg_control = optval.kernel; + } else { + msg.msg_control_is_user = true; + msg.msg_control_user = optval.user; + } msg.msg_controllen = len; msg.msg_flags = 0; - msg.msg_control_is_user = true; lock_sock(sk); skb = np->pktoptions; @@ -1210,7 +1216,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, } } len -= msg.msg_controllen; - return put_user(len, optlen); + return copy_to_sockptr(optlen, &len, sizeof(int)); } case IPV6_MTU: { @@ -1270,7 +1276,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, /* check if ipv6_getsockopt_sticky() returns err code */ if (len < 0) return len; - return put_user(len, optlen); + return copy_to_sockptr(optlen, &len, sizeof(int)); } case IPV6_RECVHOPOPTS: @@ -1324,9 +1330,9 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, if (!mtuinfo.ip6m_mtu) return -ENOTCONN; - if (put_user(len, optlen)) + if (copy_to_sockptr(optlen, &len, sizeof(int))) return -EFAULT; - if (copy_to_user(optval, &mtuinfo, len)) + if (copy_to_sockptr(optval, &mtuinfo, len)) return -EFAULT; return 0; @@ -1403,7 +1409,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, if (len < sizeof(freq)) return -EINVAL; - if (copy_from_user(&freq, optval, sizeof(freq))) + if (copy_from_sockptr(&freq, optval, sizeof(freq))) return -EFAULT; if (freq.flr_action != IPV6_FL_A_GET) @@ -1418,9 +1424,9 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, if (val < 0) return val; - if (put_user(len, optlen)) + if (copy_to_sockptr(optlen, &len, sizeof(int))) return -EFAULT; - if (copy_to_user(optval, &freq, len)) + if (copy_to_sockptr(optval, &freq, len)) return -EFAULT; return 0; @@ -1472,9 +1478,9 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, return -ENOPROTOOPT; } len = min_t(unsigned int, sizeof(int), len); - if (put_user(len, optlen)) + if (copy_to_sockptr(optlen, &len, sizeof(int))) return -EFAULT; - if (copy_to_user(optval, &val, len)) + if (copy_to_sockptr(optval, &val, len)) return -EFAULT; return 0; } @@ -1490,7 +1496,8 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname, if (level != SOL_IPV6) return -ENOPROTOOPT; - err = do_ipv6_getsockopt(sk, level, optname, optval, optlen); + err = do_ipv6_getsockopt(sk, level, optname, + USER_SOCKPTR(optval), USER_SOCKPTR(optlen)); #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IPV6_2292PKTOPTIONS) { diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 87c699d57b36..0566ab03ddbe 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -580,7 +580,7 @@ done: } int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, - struct sockaddr_storage __user *p) + sockptr_t optval, size_t ss_offset) { struct ipv6_pinfo *inet6 = inet6_sk(sk); const struct in6_addr *group; @@ -612,8 +612,7 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc; gsf->gf_numsrc = count; - - for (i = 0; i < copycount; i++, p++) { + for (i = 0; i < copycount; i++) { struct sockaddr_in6 *psin6; struct sockaddr_storage ss; @@ -621,8 +620,9 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, memset(&ss, 0, sizeof(ss)); psin6->sin6_family = AF_INET6; psin6->sin6_addr = psl->sl_addr[i]; - if (copy_to_user(p, &ss, sizeof(ss))) + if (copy_to_sockptr_offset(optval, ss_offset, &ss, sizeof(ss))) return -EFAULT; + ss_offset += sizeof(ss); } return 0; } -- cgit v1.2.3-70-g09d2 From 65ddc82d3b96be5555a36de4e2b4547433a00532 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 1 Sep 2022 17:29:12 -0700 Subject: bpf: Change bpf_getsockopt(SOL_SOCKET) to reuse sk_getsockopt() This patch changes bpf_getsockopt(SOL_SOCKET) to reuse sk_getsockopt(). It removes all duplicated code from bpf_getsockopt(SOL_SOCKET). Before this patch, there were some optnames available to bpf_setsockopt(SOL_SOCKET) but missing in bpf_getsockopt(SOL_SOCKET). It surprises users from time to time. For example, SO_REUSEADDR, SO_KEEPALIVE, SO_RCVLOWAT, and SO_MAX_PACING_RATE. This patch automatically closes this gap without duplicating more code. The only exception is SO_BINDTODEVICE because it needs to acquire a blocking lock. Thus, SO_BINDTODEVICE is not supported. Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20220902002912.2894040-1-kafai@fb.com Signed-off-by: Alexei Starovoitov --- include/net/sock.h | 2 ++ net/core/filter.c | 57 ++++++++++++++++++++++-------------------------------- net/core/sock.c | 4 ++-- 3 files changed, 27 insertions(+), 36 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index ee44b424d952..ea7965524133 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1833,6 +1833,8 @@ int sk_setsockopt(struct sock *sk, int level, int optname, int sock_setsockopt(struct socket *sock, int level, int op, sockptr_t optval, unsigned int optlen); +int sk_getsockopt(struct sock *sk, int level, int optname, + sockptr_t optval, sockptr_t optlen); int sock_getsockopt(struct socket *sock, int level, int op, char __user *optval, int __user *optlen); int sock_gettstamp(struct socket *sock, void __user *userstamp, diff --git a/net/core/filter.c b/net/core/filter.c index 55795815cc44..9b26653a7e1f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5017,8 +5017,9 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = { .arg1_type = ARG_PTR_TO_CTX, }; -static int sol_socket_setsockopt(struct sock *sk, int optname, - char *optval, int optlen) +static int sol_socket_sockopt(struct sock *sk, int optname, + char *optval, int *optlen, + bool getopt) { switch (optname) { case SO_REUSEADDR: @@ -5032,7 +5033,7 @@ static int sol_socket_setsockopt(struct sock *sk, int optname, case SO_MAX_PACING_RATE: case SO_BINDTOIFINDEX: case SO_TXREHASH: - if (optlen != sizeof(int)) + if (*optlen != sizeof(int)) return -EINVAL; break; case SO_BINDTODEVICE: @@ -5041,8 +5042,16 @@ static int sol_socket_setsockopt(struct sock *sk, int optname, return -EINVAL; } + if (getopt) { + if (optname == SO_BINDTODEVICE) + return -EINVAL; + return sk_getsockopt(sk, SOL_SOCKET, optname, + KERNEL_SOCKPTR(optval), + KERNEL_SOCKPTR(optlen)); + } + return sk_setsockopt(sk, SOL_SOCKET, optname, - KERNEL_SOCKPTR(optval), optlen); + KERNEL_SOCKPTR(optval), *optlen); } static int bpf_sol_tcp_setsockopt(struct sock *sk, int optname, @@ -5168,7 +5177,7 @@ static int __bpf_setsockopt(struct sock *sk, int level, int optname, return -EINVAL; if (level == SOL_SOCKET) - return sol_socket_setsockopt(sk, optname, optval, optlen); + return sol_socket_sockopt(sk, optname, optval, &optlen, false); else if (IS_ENABLED(CONFIG_INET) && level == SOL_IP) return sol_ip_setsockopt(sk, optname, optval, optlen); else if (IS_ENABLED(CONFIG_IPV6) && level == SOL_IPV6) @@ -5190,38 +5199,13 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, static int __bpf_getsockopt(struct sock *sk, int level, int optname, char *optval, int optlen) { + int err = 0, saved_optlen = optlen; + if (!sk_fullsock(sk)) goto err_clear; if (level == SOL_SOCKET) { - if (optlen != sizeof(int)) - goto err_clear; - - switch (optname) { - case SO_RCVBUF: - *((int *)optval) = sk->sk_rcvbuf; - break; - case SO_SNDBUF: - *((int *)optval) = sk->sk_sndbuf; - break; - case SO_MARK: - *((int *)optval) = sk->sk_mark; - break; - case SO_PRIORITY: - *((int *)optval) = sk->sk_priority; - break; - case SO_BINDTOIFINDEX: - *((int *)optval) = sk->sk_bound_dev_if; - break; - case SO_REUSEPORT: - *((int *)optval) = sk->sk_reuseport; - break; - case SO_TXREHASH: - *((int *)optval) = sk->sk_txrehash; - break; - default: - goto err_clear; - } + err = sol_socket_sockopt(sk, optname, optval, &optlen, true); } else if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) { struct inet_connection_sock *icsk; @@ -5278,7 +5262,12 @@ static int __bpf_getsockopt(struct sock *sk, int level, int optname, } else { goto err_clear; } - return 0; + + if (err) + optlen = 0; + if (optlen < saved_optlen) + memset(optval + optlen, 0, saved_optlen - optlen); + return err; err_clear: memset(optval, 0, optlen); return -EINVAL; diff --git a/net/core/sock.c b/net/core/sock.c index 7fa30fd4b37f..68e4662eb2eb 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1583,8 +1583,8 @@ static int groups_to_user(sockptr_t dst, const struct group_info *src) return 0; } -static int sk_getsockopt(struct sock *sk, int level, int optname, - sockptr_t optval, sockptr_t optlen) +int sk_getsockopt(struct sock *sk, int level, int optname, + sockptr_t optval, sockptr_t optlen) { struct socket *sock = sk->sk_socket; -- cgit v1.2.3-70-g09d2 From 273b7f0fb44847c41814a59901977be284daa447 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 1 Sep 2022 17:29:18 -0700 Subject: bpf: Change bpf_getsockopt(SOL_TCP) to reuse do_tcp_getsockopt() This patch changes bpf_getsockopt(SOL_TCP) to reuse do_tcp_getsockopt(). It removes the duplicated code from bpf_getsockopt(SOL_TCP). Before this patch, there were some optnames available to bpf_setsockopt(SOL_TCP) but missing in bpf_getsockopt(SOL_TCP). For example, TCP_NODELAY, TCP_MAXSEG, TCP_KEEPIDLE, TCP_KEEPINTVL, and a few more. It surprises users from time to time. This patch automatically closes this gap without duplicating more code. bpf_getsockopt(TCP_SAVED_SYN) does not free the saved_syn, so it stays in sol_tcp_sockopt(). For string name value like TCP_CONGESTION, bpf expects it is always null terminated, so sol_tcp_sockopt() decrements optlen by one before calling do_tcp_getsockopt() and the 'if (optlen < saved_optlen) memset(..,0,..);' in __bpf_getsockopt() will always do a null termination. Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20220902002918.2894511-1-kafai@fb.com Signed-off-by: Alexei Starovoitov --- include/net/tcp.h | 2 ++ net/core/filter.c | 74 ++++++++++++++++++++++++++++++++----------------------- net/ipv4/tcp.c | 4 +-- 3 files changed, 47 insertions(+), 33 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index c03a50c72f40..735e957f7f4b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -402,6 +402,8 @@ void tcp_init_sock(struct sock *sk); void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb); __poll_t tcp_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait); +int do_tcp_getsockopt(struct sock *sk, int level, + int optname, sockptr_t optval, sockptr_t optlen); int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); bool tcp_bpf_bypass_getsockopt(int level, int optname); diff --git a/net/core/filter.c b/net/core/filter.c index 9b26653a7e1f..beadd5b83e6c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5100,8 +5100,9 @@ static int bpf_sol_tcp_setsockopt(struct sock *sk, int optname, return 0; } -static int sol_tcp_setsockopt(struct sock *sk, int optname, - char *optval, int optlen) +static int sol_tcp_sockopt(struct sock *sk, int optname, + char *optval, int *optlen, + bool getopt) { if (sk->sk_prot->setsockopt != tcp_setsockopt) return -EINVAL; @@ -5118,17 +5119,51 @@ static int sol_tcp_setsockopt(struct sock *sk, int optname, case TCP_USER_TIMEOUT: case TCP_NOTSENT_LOWAT: case TCP_SAVE_SYN: - if (optlen != sizeof(int)) + if (*optlen != sizeof(int)) return -EINVAL; break; case TCP_CONGESTION: + if (*optlen < 2) + return -EINVAL; + break; + case TCP_SAVED_SYN: + if (*optlen < 1) + return -EINVAL; break; default: - return bpf_sol_tcp_setsockopt(sk, optname, optval, optlen); + if (getopt) + return -EINVAL; + return bpf_sol_tcp_setsockopt(sk, optname, optval, *optlen); + } + + if (getopt) { + if (optname == TCP_SAVED_SYN) { + struct tcp_sock *tp = tcp_sk(sk); + + if (!tp->saved_syn || + *optlen > tcp_saved_syn_len(tp->saved_syn)) + return -EINVAL; + memcpy(optval, tp->saved_syn->data, *optlen); + /* It cannot free tp->saved_syn here because it + * does not know if the user space still needs it. + */ + return 0; + } + + if (optname == TCP_CONGESTION) { + if (!inet_csk(sk)->icsk_ca_ops) + return -EINVAL; + /* BPF expects NULL-terminated tcp-cc string */ + optval[--(*optlen)] = '\0'; + } + + return do_tcp_getsockopt(sk, SOL_TCP, optname, + KERNEL_SOCKPTR(optval), + KERNEL_SOCKPTR(optlen)); } return do_tcp_setsockopt(sk, SOL_TCP, optname, - KERNEL_SOCKPTR(optval), optlen); + KERNEL_SOCKPTR(optval), *optlen); } static int sol_ip_setsockopt(struct sock *sk, int optname, @@ -5183,7 +5218,7 @@ static int __bpf_setsockopt(struct sock *sk, int level, int optname, else if (IS_ENABLED(CONFIG_IPV6) && level == SOL_IPV6) return sol_ipv6_setsockopt(sk, optname, optval, optlen); else if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP) - return sol_tcp_setsockopt(sk, optname, optval, optlen); + return sol_tcp_sockopt(sk, optname, optval, &optlen, false); return -EINVAL; } @@ -5206,31 +5241,8 @@ static int __bpf_getsockopt(struct sock *sk, int level, int optname, if (level == SOL_SOCKET) { err = sol_socket_sockopt(sk, optname, optval, &optlen, true); - } else if (IS_ENABLED(CONFIG_INET) && - level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) { - struct inet_connection_sock *icsk; - struct tcp_sock *tp; - - switch (optname) { - case TCP_CONGESTION: - icsk = inet_csk(sk); - - if (!icsk->icsk_ca_ops || optlen <= 1) - goto err_clear; - strncpy(optval, icsk->icsk_ca_ops->name, optlen); - optval[optlen - 1] = 0; - break; - case TCP_SAVED_SYN: - tp = tcp_sk(sk); - - if (optlen <= 0 || !tp->saved_syn || - optlen > tcp_saved_syn_len(tp->saved_syn)) - goto err_clear; - memcpy(optval, tp->saved_syn->data, optlen); - break; - default: - goto err_clear; - } + } else if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP) { + err = sol_tcp_sockopt(sk, optname, optval, &optlen, true); } else if (IS_ENABLED(CONFIG_INET) && level == SOL_IP) { struct inet_sock *inet = inet_sk(sk); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 45c737ee95a1..a822cc627e2a 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4043,8 +4043,8 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk, return stats; } -static int do_tcp_getsockopt(struct sock *sk, int level, - int optname, sockptr_t optval, sockptr_t optlen) +int do_tcp_getsockopt(struct sock *sk, int level, + int optname, sockptr_t optval, sockptr_t optlen) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); -- cgit v1.2.3-70-g09d2 From fd969f25fe24be515278d28cbf86dde39be8a495 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 1 Sep 2022 17:29:25 -0700 Subject: bpf: Change bpf_getsockopt(SOL_IP) to reuse do_ip_getsockopt() This patch changes bpf_getsockopt(SOL_IP) to reuse do_ip_getsockopt() and remove the duplicated code. Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20220902002925.2895416-1-kafai@fb.com Signed-off-by: Alexei Starovoitov --- include/net/ip.h | 2 ++ net/core/filter.c | 30 ++++++++++++------------------ net/ipv4/ip_sockglue.c | 4 ++-- 3 files changed, 16 insertions(+), 20 deletions(-) (limited to 'include/net') diff --git a/include/net/ip.h b/include/net/ip.h index 34fa5b0f0a0e..038097c2a152 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -747,6 +747,8 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); +int do_ip_getsockopt(struct sock *sk, int level, int optname, + sockptr_t optval, sockptr_t optlen); int ip_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); int ip_ra_control(struct sock *sk, unsigned char on, diff --git a/net/core/filter.c b/net/core/filter.c index beadd5b83e6c..33275460fe66 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5166,23 +5166,29 @@ static int sol_tcp_sockopt(struct sock *sk, int optname, KERNEL_SOCKPTR(optval), *optlen); } -static int sol_ip_setsockopt(struct sock *sk, int optname, - char *optval, int optlen) +static int sol_ip_sockopt(struct sock *sk, int optname, + char *optval, int *optlen, + bool getopt) { if (sk->sk_family != AF_INET) return -EINVAL; switch (optname) { case IP_TOS: - if (optlen != sizeof(int)) + if (*optlen != sizeof(int)) return -EINVAL; break; default: return -EINVAL; } + if (getopt) + return do_ip_getsockopt(sk, SOL_IP, optname, + KERNEL_SOCKPTR(optval), + KERNEL_SOCKPTR(optlen)); + return do_ip_setsockopt(sk, SOL_IP, optname, - KERNEL_SOCKPTR(optval), optlen); + KERNEL_SOCKPTR(optval), *optlen); } static int sol_ipv6_setsockopt(struct sock *sk, int optname, @@ -5214,7 +5220,7 @@ static int __bpf_setsockopt(struct sock *sk, int level, int optname, if (level == SOL_SOCKET) return sol_socket_sockopt(sk, optname, optval, &optlen, false); else if (IS_ENABLED(CONFIG_INET) && level == SOL_IP) - return sol_ip_setsockopt(sk, optname, optval, optlen); + return sol_ip_sockopt(sk, optname, optval, &optlen, false); else if (IS_ENABLED(CONFIG_IPV6) && level == SOL_IPV6) return sol_ipv6_setsockopt(sk, optname, optval, optlen); else if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP) @@ -5244,19 +5250,7 @@ static int __bpf_getsockopt(struct sock *sk, int level, int optname, } else if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP) { err = sol_tcp_sockopt(sk, optname, optval, &optlen, true); } else if (IS_ENABLED(CONFIG_INET) && level == SOL_IP) { - struct inet_sock *inet = inet_sk(sk); - - if (optlen != sizeof(int) || sk->sk_family != AF_INET) - goto err_clear; - - /* Only some options are supported */ - switch (optname) { - case IP_TOS: - *((int *)optval) = (int)inet->tos; - break; - default: - goto err_clear; - } + err = sol_ip_sockopt(sk, optname, optval, &optlen, true); } else if (IS_ENABLED(CONFIG_IPV6) && level == SOL_IPV6) { struct ipv6_pinfo *np = inet6_sk(sk); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 5d134a75cad0..47830f3fea1b 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1524,8 +1524,8 @@ static int compat_ip_get_mcast_msfilter(struct sock *sk, sockptr_t optval, return 0; } -static int do_ip_getsockopt(struct sock *sk, int level, int optname, - sockptr_t optval, sockptr_t optlen) +int do_ip_getsockopt(struct sock *sk, int level, int optname, + sockptr_t optval, sockptr_t optlen) { struct inet_sock *inet = inet_sk(sk); bool needs_rtnl = getsockopt_needs_rtnl(optname); -- cgit v1.2.3-70-g09d2 From 38566ec06f52250c4abaa7447aae676e0b881c46 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 1 Sep 2022 17:29:31 -0700 Subject: bpf: Change bpf_getsockopt(SOL_IPV6) to reuse do_ipv6_getsockopt() This patch changes bpf_getsockopt(SOL_IPV6) to reuse do_ipv6_getsockopt(). It removes the duplicated code from bpf_getsockopt(SOL_IPV6). This also makes bpf_getsockopt(SOL_IPV6) supporting the same set of optnames as in bpf_setsockopt(SOL_IPV6). In particular, this adds IPV6_AUTOFLOWLABEL support to bpf_getsockopt(SOL_IPV6). ipv6 could be compiled as a module. Like how other code solved it with stubs in ipv6_stubs.h, this patch adds the do_ipv6_getsockopt to the ipv6_bpf_stub. Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20220902002931.2896218-1-kafai@fb.com Signed-off-by: Alexei Starovoitov --- include/net/ipv6.h | 2 ++ include/net/ipv6_stubs.h | 2 ++ net/core/filter.c | 55 +++++++++++++++++++++--------------------------- net/ipv6/af_inet6.c | 1 + net/ipv6/ipv6_sockglue.c | 4 ++-- 5 files changed, 31 insertions(+), 33 deletions(-) (limited to 'include/net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index a4f24573ed7a..d664ba5812d8 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1160,6 +1160,8 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval unsigned int optlen); int ipv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); +int do_ipv6_getsockopt(struct sock *sk, int level, int optname, + sockptr_t optval, sockptr_t optlen); int ipv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index 8692698b01cf..c48186bf4737 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -83,6 +83,8 @@ struct ipv6_bpf_stub { struct sk_buff *skb); int (*ipv6_setsockopt)(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); + int (*ipv6_getsockopt)(struct sock *sk, int level, int optname, + sockptr_t optval, sockptr_t optlen); }; extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly; diff --git a/net/core/filter.c b/net/core/filter.c index 33275460fe66..ee768bb5b5ab 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5191,8 +5191,9 @@ static int sol_ip_sockopt(struct sock *sk, int optname, KERNEL_SOCKPTR(optval), *optlen); } -static int sol_ipv6_setsockopt(struct sock *sk, int optname, - char *optval, int optlen) +static int sol_ipv6_sockopt(struct sock *sk, int optname, + char *optval, int *optlen, + bool getopt) { if (sk->sk_family != AF_INET6) return -EINVAL; @@ -5200,15 +5201,20 @@ static int sol_ipv6_setsockopt(struct sock *sk, int optname, switch (optname) { case IPV6_TCLASS: case IPV6_AUTOFLOWLABEL: - if (optlen != sizeof(int)) + if (*optlen != sizeof(int)) return -EINVAL; break; default: return -EINVAL; } + if (getopt) + return ipv6_bpf_stub->ipv6_getsockopt(sk, SOL_IPV6, optname, + KERNEL_SOCKPTR(optval), + KERNEL_SOCKPTR(optlen)); + return ipv6_bpf_stub->ipv6_setsockopt(sk, SOL_IPV6, optname, - KERNEL_SOCKPTR(optval), optlen); + KERNEL_SOCKPTR(optval), *optlen); } static int __bpf_setsockopt(struct sock *sk, int level, int optname, @@ -5222,7 +5228,7 @@ static int __bpf_setsockopt(struct sock *sk, int level, int optname, else if (IS_ENABLED(CONFIG_INET) && level == SOL_IP) return sol_ip_sockopt(sk, optname, optval, &optlen, false); else if (IS_ENABLED(CONFIG_IPV6) && level == SOL_IPV6) - return sol_ipv6_setsockopt(sk, optname, optval, optlen); + return sol_ipv6_sockopt(sk, optname, optval, &optlen, false); else if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP) return sol_tcp_sockopt(sk, optname, optval, &optlen, false); @@ -5240,43 +5246,30 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, static int __bpf_getsockopt(struct sock *sk, int level, int optname, char *optval, int optlen) { - int err = 0, saved_optlen = optlen; + int err, saved_optlen = optlen; - if (!sk_fullsock(sk)) - goto err_clear; + if (!sk_fullsock(sk)) { + err = -EINVAL; + goto done; + } - if (level == SOL_SOCKET) { + if (level == SOL_SOCKET) err = sol_socket_sockopt(sk, optname, optval, &optlen, true); - } else if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP) { + else if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP) err = sol_tcp_sockopt(sk, optname, optval, &optlen, true); - } else if (IS_ENABLED(CONFIG_INET) && level == SOL_IP) { + else if (IS_ENABLED(CONFIG_INET) && level == SOL_IP) err = sol_ip_sockopt(sk, optname, optval, &optlen, true); - } else if (IS_ENABLED(CONFIG_IPV6) && level == SOL_IPV6) { - struct ipv6_pinfo *np = inet6_sk(sk); - - if (optlen != sizeof(int) || sk->sk_family != AF_INET6) - goto err_clear; - - /* Only some options are supported */ - switch (optname) { - case IPV6_TCLASS: - *((int *)optval) = (int)np->tclass; - break; - default: - goto err_clear; - } - } else { - goto err_clear; - } + else if (IS_ENABLED(CONFIG_IPV6) && level == SOL_IPV6) + err = sol_ipv6_sockopt(sk, optname, optval, &optlen, true); + else + err = -EINVAL; +done: if (err) optlen = 0; if (optlen < saved_optlen) memset(optval + optlen, 0, saved_optlen - optlen); return err; -err_clear: - memset(optval, 0, optlen); - return -EINVAL; } static int _bpf_getsockopt(struct sock *sk, int level, int optname, diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index cadc97852787..19732b5dce23 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -1058,6 +1058,7 @@ static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = { .inet6_bind = __inet6_bind, .udp6_lib_lookup = __udp6_lib_lookup, .ipv6_setsockopt = do_ipv6_setsockopt, + .ipv6_getsockopt = do_ipv6_getsockopt, }; static int __init inet6_init(void) diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index d9887e3a6077..1193f5a5247d 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -1131,8 +1131,8 @@ static int compat_ipv6_get_msfilter(struct sock *sk, sockptr_t optval, return 0; } -static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, - sockptr_t optval, sockptr_t optlen) +int do_ipv6_getsockopt(struct sock *sk, int level, int optname, + sockptr_t optval, sockptr_t optlen) { struct ipv6_pinfo *np = inet6_sk(sk); int len; -- cgit v1.2.3-70-g09d2 From 261ce8879578f42bc1ff3385ff1be8e31d6fb160 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Fri, 2 Sep 2022 16:12:41 +0200 Subject: wifi: mac80211: make smps_mode per-link The SMPS power save mode needs to be per-link rather than being shared for all links. As such, move it into struct ieee80211_link_sta. Signed-off-by: Benjamin Berg Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath10k/mac.c | 6 +++--- drivers/net/wireless/ath/ath11k/mac.c | 6 +++--- drivers/net/wireless/ath/ath9k/xmit.c | 2 +- drivers/net/wireless/intel/iwlegacy/4965-rs.c | 2 +- drivers/net/wireless/intel/iwlegacy/common.c | 8 ++++---- drivers/net/wireless/intel/iwlwifi/dvm/rs.c | 6 +++--- drivers/net/wireless/intel/iwlwifi/dvm/sta.c | 10 +++++----- drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c | 6 +++--- drivers/net/wireless/intel/iwlwifi/mvm/rs.c | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 2 +- drivers/net/wireless/mediatek/mt76/mt7603/main.c | 2 +- drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c | 2 +- drivers/net/wireless/mediatek/mt76/mt76x02_mac.c | 2 +- drivers/net/wireless/mediatek/mt76/mt7915/mcu.c | 4 ++-- drivers/net/wireless/ralink/rt2x00/rt2x00queue.c | 2 +- include/net/mac80211.h | 4 ++-- net/mac80211/he.c | 4 ++-- net/mac80211/ht.c | 7 ++++--- net/mac80211/rc80211_minstrel_ht.c | 6 +++--- net/mac80211/rx.c | 4 ++-- net/mac80211/sta_info.c | 3 ++- 21 files changed, 46 insertions(+), 44 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 23381a9db6ae..e086db920a82 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -8520,7 +8520,7 @@ static void ath10k_sta_rc_update(struct ieee80211_hw *hw, "mac sta rc update for %pM changed %08x bw %d nss %d smps %d\n", sta->addr, changed, sta->deflink.bandwidth, sta->deflink.rx_nss, - sta->smps_mode); + sta->deflink.smps_mode); if (changed & IEEE80211_RC_BW_CHANGED) { bw = WMI_PEER_CHWIDTH_20MHZ; @@ -8554,7 +8554,7 @@ static void ath10k_sta_rc_update(struct ieee80211_hw *hw, if (changed & IEEE80211_RC_SMPS_CHANGED) { smps = WMI_PEER_SMPS_PS_NONE; - switch (sta->smps_mode) { + switch (sta->deflink.smps_mode) { case IEEE80211_SMPS_AUTOMATIC: case IEEE80211_SMPS_OFF: smps = WMI_PEER_SMPS_PS_NONE; @@ -8567,7 +8567,7 @@ static void ath10k_sta_rc_update(struct ieee80211_hw *hw, break; case IEEE80211_SMPS_NUM_MODES: ath10k_warn(ar, "Invalid smps %d in sta rc update for %pM\n", - sta->smps_mode, sta->addr); + sta->deflink.smps_mode, sta->addr); smps = WMI_PEER_SMPS_PS_NONE; break; } diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 7e91e347c9ff..f5bf53d6af7e 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -4701,7 +4701,7 @@ static void ath11k_mac_op_sta_rc_update(struct ieee80211_hw *hw, "mac sta rc update for %pM changed %08x bw %d nss %d smps %d\n", sta->addr, changed, sta->deflink.bandwidth, sta->deflink.rx_nss, - sta->smps_mode); + sta->deflink.smps_mode); spin_lock_bh(&ar->data_lock); @@ -4737,7 +4737,7 @@ static void ath11k_mac_op_sta_rc_update(struct ieee80211_hw *hw, if (changed & IEEE80211_RC_SMPS_CHANGED) { smps = WMI_PEER_SMPS_PS_NONE; - switch (sta->smps_mode) { + switch (sta->deflink.smps_mode) { case IEEE80211_SMPS_AUTOMATIC: case IEEE80211_SMPS_OFF: smps = WMI_PEER_SMPS_PS_NONE; @@ -4750,7 +4750,7 @@ static void ath11k_mac_op_sta_rc_update(struct ieee80211_hw *hw, break; default: ath11k_warn(ar->ab, "Invalid smps %d in sta rc update for %pM\n", - sta->smps_mode, sta->addr); + sta->deflink.smps_mode, sta->addr); smps = WMI_PEER_SMPS_PS_NONE; break; } diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index ba16a7f3e23d..ba271a10d4ab 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -2160,7 +2160,7 @@ static void setup_frame_info(struct ieee80211_hw *hw, fi->keyix = an->ps_key; else fi->keyix = ATH9K_TXKEYIX_INVALID; - fi->dyn_smps = sta && sta->smps_mode == IEEE80211_SMPS_DYNAMIC; + fi->dyn_smps = sta && sta->deflink.smps_mode == IEEE80211_SMPS_DYNAMIC; fi->keytype = keytype; fi->framelen = framelen; fi->tx_power = txpower; diff --git a/drivers/net/wireless/intel/iwlegacy/4965-rs.c b/drivers/net/wireless/intel/iwlegacy/4965-rs.c index d8a5dbf89a02..718efb1aa1b0 100644 --- a/drivers/net/wireless/intel/iwlegacy/4965-rs.c +++ b/drivers/net/wireless/intel/iwlegacy/4965-rs.c @@ -1167,7 +1167,7 @@ il4965_rs_switch_to_mimo2(struct il_priv *il, struct il_lq_sta *lq_sta, if (!conf_is_ht(conf) || !sta->deflink.ht_cap.ht_supported) return -1; - if (sta->smps_mode == IEEE80211_SMPS_STATIC) + if (sta->deflink.smps_mode == IEEE80211_SMPS_STATIC) return -1; /* Need both Tx chains/antennas to support MIMO */ diff --git a/drivers/net/wireless/intel/iwlegacy/common.c b/drivers/net/wireless/intel/iwlegacy/common.c index 04d27a26260b..341c17fe2af4 100644 --- a/drivers/net/wireless/intel/iwlegacy/common.c +++ b/drivers/net/wireless/intel/iwlegacy/common.c @@ -1870,15 +1870,15 @@ il_set_ht_add_station(struct il_priv *il, u8 idx, struct ieee80211_sta *sta) goto done; D_ASSOC("spatial multiplexing power save mode: %s\n", - (sta->smps_mode == IEEE80211_SMPS_STATIC) ? "static" : - (sta->smps_mode == IEEE80211_SMPS_DYNAMIC) ? "dynamic" : + (sta->deflink.smps_mode == IEEE80211_SMPS_STATIC) ? "static" : + (sta->deflink.smps_mode == IEEE80211_SMPS_DYNAMIC) ? "dynamic" : "disabled"); sta_flags = il->stations[idx].sta.station_flags; sta_flags &= ~(STA_FLG_RTS_MIMO_PROT_MSK | STA_FLG_MIMO_DIS_MSK); - switch (sta->smps_mode) { + switch (sta->deflink.smps_mode) { case IEEE80211_SMPS_STATIC: sta_flags |= STA_FLG_MIMO_DIS_MSK; break; @@ -1888,7 +1888,7 @@ il_set_ht_add_station(struct il_priv *il, u8 idx, struct ieee80211_sta *sta) case IEEE80211_SMPS_OFF: break; default: - IL_WARN("Invalid MIMO PS mode %d\n", sta->smps_mode); + IL_WARN("Invalid MIMO PS mode %d\n", sta->deflink.smps_mode); break; } diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/rs.c b/drivers/net/wireless/intel/iwlwifi/dvm/rs.c index baffa1cbe8fc..687c906a9d72 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/rs.c +++ b/drivers/net/wireless/intel/iwlwifi/dvm/rs.c @@ -2,7 +2,7 @@ /****************************************************************************** * * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved. - * Copyright (C) 2019 - 2020 Intel Corporation + * Copyright (C) 2019 - 2020, 2022 Intel Corporation *****************************************************************************/ #include #include @@ -1242,7 +1242,7 @@ static int rs_switch_to_mimo2(struct iwl_priv *priv, if (!conf_is_ht(conf) || !sta->deflink.ht_cap.ht_supported) return -1; - if (sta->smps_mode == IEEE80211_SMPS_STATIC) + if (sta->deflink.smps_mode == IEEE80211_SMPS_STATIC) return -1; /* Need both Tx chains/antennas to support MIMO */ @@ -1297,7 +1297,7 @@ static int rs_switch_to_mimo3(struct iwl_priv *priv, if (!conf_is_ht(conf) || !sta->deflink.ht_cap.ht_supported) return -1; - if (sta->smps_mode == IEEE80211_SMPS_STATIC) + if (sta->deflink.smps_mode == IEEE80211_SMPS_STATIC) return -1; /* Need both Tx chains/antennas to support MIMO */ diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/sta.c b/drivers/net/wireless/intel/iwlwifi/dvm/sta.c index 476068c0abb7..cef43cf80620 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/dvm/sta.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /****************************************************************************** * - * Copyright(c) 2003 - 2014 Intel Corporation. All rights reserved. + * Copyright(c) 2003 - 2014, 2022 Intel Corporation. All rights reserved. * * Portions of this file are derived from the ipw3945 project, as well * as portions of the ieee80211 subsystem header files. @@ -161,12 +161,12 @@ static void iwl_sta_calc_ht_flags(struct iwl_priv *priv, IWL_DEBUG_INFO(priv, "STA %pM SM PS mode: %s\n", sta->addr, - (sta->smps_mode == IEEE80211_SMPS_STATIC) ? + (sta->deflink.smps_mode == IEEE80211_SMPS_STATIC) ? "static" : - (sta->smps_mode == IEEE80211_SMPS_DYNAMIC) ? + (sta->deflink.smps_mode == IEEE80211_SMPS_DYNAMIC) ? "dynamic" : "disabled"); - switch (sta->smps_mode) { + switch (sta->deflink.smps_mode) { case IEEE80211_SMPS_STATIC: *flags |= STA_FLG_MIMO_DIS_MSK; break; @@ -176,7 +176,7 @@ static void iwl_sta_calc_ht_flags(struct iwl_priv *priv, case IEEE80211_SMPS_OFF: break; default: - IWL_WARN(priv, "Invalid MIMO PS mode %d\n", sta->smps_mode); + IWL_WARN(priv, "Invalid MIMO PS mode %d\n", sta->deflink.smps_mode); break; } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c index d8c3d7ff4f44..752d44d96163 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c @@ -143,7 +143,7 @@ rs_fw_vht_set_enabled_rates(const struct ieee80211_sta *sta, }; /* the station support only a single receive chain */ - if (sta->smps_mode == IEEE80211_SMPS_STATIC) + if (sta->deflink.smps_mode == IEEE80211_SMPS_STATIC) max_nss = 1; for (i = 0; i < max_nss && i < IWL_TLC_NSS_MAX; i++) { @@ -205,7 +205,7 @@ rs_fw_he_set_enabled_rates(const struct ieee80211_sta *sta, u8 nss = sta->deflink.rx_nss; /* the station support only a single receive chain */ - if (sta->smps_mode == IEEE80211_SMPS_STATIC) + if (sta->deflink.smps_mode == IEEE80211_SMPS_STATIC) nss = 1; for (i = 0; i < nss && i < IWL_TLC_NSS_MAX; i++) { @@ -270,7 +270,7 @@ static void rs_fw_set_supp_rates(struct ieee80211_sta *sta, cpu_to_le16(ht_cap->mcs.rx_mask[0]); /* the station support only a single receive chain */ - if (sta->smps_mode == IEEE80211_SMPS_STATIC) + if (sta->deflink.smps_mode == IEEE80211_SMPS_STATIC) cmd->ht_rates[IWL_TLC_NSS_2][IWL_TLC_MCS_PER_BW_80] = 0; else diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c index a79043f30775..814a5e8f3666 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c @@ -138,7 +138,7 @@ static bool rs_mimo_allow(struct iwl_mvm *mvm, struct ieee80211_sta *sta, if (!sta->deflink.ht_cap.ht_supported) return false; - if (sta->smps_mode == IEEE80211_SMPS_STATIC) + if (sta->deflink.smps_mode == IEEE80211_SMPS_STATIC) return false; if (num_of_ant(iwl_mvm_get_valid_tx_ant(mvm)) < 2) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index ff0d3b3df140..cc92706b3d16 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -116,7 +116,7 @@ int iwl_mvm_sta_send_to_fw(struct iwl_mvm *mvm, struct ieee80211_sta *sta, break; } - switch (sta->smps_mode) { + switch (sta->deflink.smps_mode) { case IEEE80211_SMPS_AUTOMATIC: case IEEE80211_SMPS_NUM_MODES: WARN_ON(1); diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/main.c b/drivers/net/wireless/mediatek/mt76/mt7603/main.c index 051715ed90dd..ca50feb0b3a9 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/main.c @@ -658,7 +658,7 @@ mt7603_sta_rate_tbl_update(struct ieee80211_hw *hw, struct ieee80211_vif *vif, mt7603_wtbl_set_rates(dev, msta, NULL, msta->rates); msta->rate_probe = false; mt7603_wtbl_set_smps(dev, msta, - sta->smps_mode == IEEE80211_SMPS_DYNAMIC); + sta->deflink.smps_mode == IEEE80211_SMPS_DYNAMIC); spin_unlock_bh(&dev->mt76.lock); } diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c index 9b17bd97ec09..5c4ca93bcf91 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c @@ -896,7 +896,7 @@ void mt76_connac_mcu_wtbl_smps_tlv(struct sk_buff *skb, tlv = mt76_connac_mcu_add_nested_tlv(skb, WTBL_SMPS, sizeof(*smps), wtbl_tlv, sta_wtbl); smps = (struct wtbl_smps *)tlv; - smps->smps = (sta->smps_mode == IEEE80211_SMPS_DYNAMIC); + smps->smps = (sta->deflink.smps_mode == IEEE80211_SMPS_DYNAMIC); } EXPORT_SYMBOL_GPL(mt76_connac_mcu_wtbl_smps_tlv); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c index de30cf5e2d2f..93d96739f802 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c @@ -404,7 +404,7 @@ void mt76x02_mac_write_txwi(struct mt76x02_dev *dev, struct mt76x02_txwi *txwi, txwi->rate |= cpu_to_le16(MT_RXWI_RATE_LDPC); if ((info->flags & IEEE80211_TX_CTL_STBC) && nss == 1) txwi->rate |= cpu_to_le16(MT_RXWI_RATE_STBC); - if (nss > 1 && sta && sta->smps_mode == IEEE80211_SMPS_DYNAMIC) + if (nss > 1 && sta && sta->deflink.smps_mode == IEEE80211_SMPS_DYNAMIC) txwi_flags |= MT_TXWI_FLAGS_MMPS; if (!(info->flags & IEEE80211_TX_CTL_NO_ACK)) txwi->ack_ctl |= MT_TXWI_ACK_CTL_REQ; diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c index f83067961945..1081b893f319 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c @@ -1304,7 +1304,7 @@ int mt7915_mcu_set_fixed_rate_ctrl(struct mt7915_dev *dev, ra->phy = *phy; break; case RATE_PARAM_MMPS_UPDATE: - ra->mmps_mode = mt7915_mcu_get_mmps_mode(sta->smps_mode); + ra->mmps_mode = mt7915_mcu_get_mmps_mode(sta->deflink.smps_mode); break; default: break; @@ -1459,7 +1459,7 @@ mt7915_mcu_sta_rate_ctrl_tlv(struct sk_buff *skb, struct mt7915_dev *dev, ra->channel = chandef->chan->hw_value; ra->bw = sta->deflink.bandwidth; ra->phy.bw = sta->deflink.bandwidth; - ra->mmps_mode = mt7915_mcu_get_mmps_mode(sta->smps_mode); + ra->mmps_mode = mt7915_mcu_get_mmps_mode(sta->deflink.smps_mode); if (supp_rate) { supp_rate &= mask->control[band].legacy; diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00queue.c b/drivers/net/wireless/ralink/rt2x00/rt2x00queue.c index 4d06038afd83..98df0aef8168 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00queue.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00queue.c @@ -318,7 +318,7 @@ static void rt2x00queue_create_tx_descriptor_ht(struct rt2x00_dev *rt2x00dev, * when using more then one tx stream (>MCS7). */ if (sta && txdesc->u.ht.mcs > 7 && - sta->smps_mode == IEEE80211_SMPS_DYNAMIC) + sta->deflink.smps_mode == IEEE80211_SMPS_DYNAMIC) __set_bit(ENTRY_TXD_HT_MIMO_PS, &txdesc->flags); } else { txdesc->u.ht.mcs = rt2x00_get_rate_mcs(hwrate->mcs); diff --git a/include/net/mac80211.h b/include/net/mac80211.h index ffd0ebbff294..d4e1d73d88cc 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2136,6 +2136,7 @@ struct ieee80211_sta_txpwr { * in ieee80211_sta. For MLO Link STA this addr can be same or different * from addr in ieee80211_sta (representing MLD STA addr) * @link_id: the link ID for this link STA (0 for deflink) + * @smps_mode: current SMPS mode (off, static or dynamic) * @supp_rates: Bitmap of supported rates * @ht_cap: HT capabilities of this STA; restricted to our own capabilities * @vht_cap: VHT capabilities of this STA; restricted to our own capabilities @@ -2153,6 +2154,7 @@ struct ieee80211_sta_txpwr { struct ieee80211_link_sta { u8 addr[ETH_ALEN]; u8 link_id; + enum ieee80211_smps_mode smps_mode; u32 supp_rates[NUM_NL80211_BANDS]; struct ieee80211_sta_ht_cap ht_cap; @@ -2191,7 +2193,6 @@ struct ieee80211_link_sta { * if wme is supported. The bits order is like in * IEEE80211_WMM_IE_STA_QOSINFO_AC_*. * @max_sp: max Service Period. Only valid if wme is supported. - * @smps_mode: current SMPS mode (off, static or dynamic) * @rates: rate control selection table * @tdls: indicates whether the STA is a TDLS peer * @tdls_initiator: indicates the STA is an initiator of the TDLS link. Only @@ -2226,7 +2227,6 @@ struct ieee80211_sta { bool wme; u8 uapsd_queues; u8 max_sp; - enum ieee80211_smps_mode smps_mode; struct ieee80211_sta_rates __rcu *rates; bool tdls; bool tdls_initiator; diff --git a/net/mac80211/he.c b/net/mac80211/he.c index d9228fd3f77a..e73899fd4bbb 100644 --- a/net/mac80211/he.c +++ b/net/mac80211/he.c @@ -31,9 +31,9 @@ ieee80211_update_from_he_6ghz_capa(const struct ieee80211_he_6ghz_capa *he_6ghz_ break; } - sta->sta.smps_mode = smps_mode; + link_sta->pub->smps_mode = smps_mode; } else { - sta->sta.smps_mode = IEEE80211_SMPS_OFF; + link_sta->pub->smps_mode = IEEE80211_SMPS_OFF; } switch (le16_get_bits(he_6ghz_capa->capa, diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 8c24817cd497..12a233ba5031 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -299,12 +299,13 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, break; } - if (smps_mode != sta->sta.smps_mode) + if (smps_mode != link_sta->pub->smps_mode) changed = true; - sta->sta.smps_mode = smps_mode; + link_sta->pub->smps_mode = smps_mode; } else { - sta->sta.smps_mode = IEEE80211_SMPS_OFF; + link_sta->pub->smps_mode = IEEE80211_SMPS_OFF; } + return changed; } diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 5f27e6746762..8c41a545a8b7 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2010-2013 Felix Fietkau - * Copyright (C) 2019-2021 Intel Corporation + * Copyright (C) 2019-2022 Intel Corporation */ #include #include @@ -1478,7 +1478,7 @@ minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, * - for fallback rates, to increase chances of getting through */ if (offset > 0 || - (mi->sta->smps_mode == IEEE80211_SMPS_DYNAMIC && + (mi->sta->deflink.smps_mode == IEEE80211_SMPS_DYNAMIC && group->streams > 1)) { ratetbl->rate[offset].count = ratetbl->rate[offset].count_rts; flags |= IEEE80211_TX_RC_USE_RTS_CTS; @@ -1779,7 +1779,7 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, nss = minstrel_mcs_groups[i].streams; /* Mark MCS > 7 as unsupported if STA is in static SMPS mode */ - if (sta->smps_mode == IEEE80211_SMPS_STATIC && nss > 1) + if (sta->deflink.smps_mode == IEEE80211_SMPS_STATIC && nss > 1) continue; /* HT rate */ diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 9df232bdc6ab..a57811372027 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3405,9 +3405,9 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) } /* if no change do nothing */ - if (rx->sta->sta.smps_mode == smps_mode) + if (rx->link_sta->pub->smps_mode == smps_mode) goto handled; - rx->sta->sta.smps_mode = smps_mode; + rx->link_sta->pub->smps_mode = smps_mode; sta_opmode.smps_mode = ieee80211_smps_mode_to_smps_mode(smps_mode); sta_opmode.changed = STA_OPMODE_SMPS_MODE_CHANGED; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index fe8702d92892..ac88a894e5f9 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -475,6 +475,8 @@ static void sta_info_add_link(struct sta_info *sta, link_sta->link_id = link_id; rcu_assign_pointer(sta->link[link_id], link_info); rcu_assign_pointer(sta->sta.link[link_id], link_sta); + + link_sta->smps_mode = IEEE80211_SMPS_OFF; } static struct sta_info * @@ -628,7 +630,6 @@ __sta_info_alloc(struct ieee80211_sub_if_data *sdata, } } - sta->sta.smps_mode = IEEE80211_SMPS_OFF; sta->sta.max_rc_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_BA; sta->cparams.ce_threshold = CODEL_DISABLED_THRESHOLD; -- cgit v1.2.3-70-g09d2 From efe9c2bfd1a82894e455514a68dc794556fbd463 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 2 Sep 2022 16:12:42 +0200 Subject: wifi: mac80211: isolate driver from inactive links In order to let the driver select active links and properly make multi-link connections, as a first step isolate the driver from inactive links, and set the active links to be only the association link for client-side interfaces. For AP side nothing changes since APs always have to have all their links active. To simplify things, update the for_each_sta_active_link() API to include the appropriate vif pointer. This also implies not allocating a chanctx for an inactive link, which requires a few more changes. Since we now no longer try to program multiple links to the driver, remove the check in the MLME code. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 30 ++++---- net/mac80211/chan.c | 6 ++ net/mac80211/driver-ops.c | 172 ++++++++++++++++++++++++++++++++++++++++++++++ net/mac80211/driver-ops.h | 165 +++++++------------------------------------- net/mac80211/key.c | 8 +++ net/mac80211/link.c | 66 ++++++++++++++---- net/mac80211/mlme.c | 25 ++----- net/mac80211/util.c | 2 +- 8 files changed, 286 insertions(+), 188 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index d4e1d73d88cc..20a2f25a38fa 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1799,6 +1799,9 @@ struct ieee80211_vif_cfg { * @link_conf: in case of MLD, the per-link BSS configuration, * indexed by link ID * @valid_links: bitmap of valid links, or 0 for non-MLO. + * @active_links: The bitmap of active links, or 0 for non-MLO. + * The driver shouldn't change this directly, but use the + * API calls meant for that purpose. * @addr: address of this interface * @p2p: indicates whether this AP or STA interface is a p2p * interface, i.e. a GO or p2p-sta respectively @@ -1834,7 +1837,7 @@ struct ieee80211_vif { struct ieee80211_vif_cfg cfg; struct ieee80211_bss_conf bss_conf; struct ieee80211_bss_conf __rcu *link_conf[IEEE80211_MLD_MAX_NUM_LINKS]; - u16 valid_links; + u16 valid_links, active_links; u8 addr[ETH_ALEN] __aligned(2); bool p2p; @@ -1861,12 +1864,11 @@ struct ieee80211_vif { u8 drv_priv[] __aligned(sizeof(void *)); }; -/* FIXME: for now loop over all the available links; later will be changed - * to loop only over the active links. - */ -#define for_each_vif_active_link(vif, link, link_id) \ - for (link_id = 0; link_id < ARRAY_SIZE((vif)->link_conf); link_id++) \ - if ((link = rcu_dereference((vif)->link_conf[link_id]))) +#define for_each_vif_active_link(vif, link, link_id) \ + for (link_id = 0; link_id < ARRAY_SIZE((vif)->link_conf); link_id++) \ + if ((!(vif)->active_links || \ + (vif)->active_links & BIT(link_id)) && \ + (link = rcu_dereference((vif)->link_conf[link_id]))) static inline bool ieee80211_vif_is_mesh(struct ieee80211_vif *vif) { @@ -2264,13 +2266,13 @@ struct ieee80211_sta { u8 drv_priv[] __aligned(sizeof(void *)); }; -/* FIXME: need to loop only over links which are active and check the actual - * lock - */ -#define for_each_sta_active_link(sta, link_sta, link_id) \ - for (link_id = 0; link_id < ARRAY_SIZE((sta)->link); link_id++) \ - if (((link_sta) = rcu_dereference_protected((sta)->link[link_id],\ - 1))) \ +/* FIXME: check the locking correctly */ +#define for_each_sta_active_link(vif, sta, link_sta, link_id) \ + for (link_id = 0; link_id < ARRAY_SIZE((sta)->link); link_id++) \ + if ((!(vif)->active_links || \ + (vif)->active_links & BIT(link_id)) && \ + ((link_sta) = rcu_dereference_protected((sta)->link[link_id],\ + 1))) /** * enum sta_notify_cmd - sta notify command diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index f247daa41563..e72cf0749d49 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -1799,6 +1799,12 @@ int ieee80211_link_use_channel(struct ieee80211_link_data *link, lockdep_assert_held(&local->mtx); + if (sdata->vif.active_links && + !(sdata->vif.active_links & BIT(link->link_id))) { + ieee80211_link_update_chandef(link, chandef); + return 0; + } + mutex_lock(&local->chanctx_mtx); ret = cfg80211_chandef_dfs_required(local->hw.wiphy, diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index 9b61dc7889c2..5392ffa18270 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -192,6 +192,10 @@ int drv_conf_tx(struct ieee80211_local *local, if (!check_sdata_in_driver(sdata)) return -EIO; + if (sdata->vif.active_links && + !(sdata->vif.active_links & BIT(link->link_id))) + return 0; + if (params->cw_min == 0 || params->cw_min > params->cw_max) { /* * If we can't configure hardware anyway, don't warn. We may @@ -272,6 +276,60 @@ void drv_reset_tsf(struct ieee80211_local *local, trace_drv_return_void(local); } +int drv_assign_vif_chanctx(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_bss_conf *link_conf, + struct ieee80211_chanctx *ctx) +{ + int ret = 0; + + drv_verify_link_exists(sdata, link_conf); + if (!check_sdata_in_driver(sdata)) + return -EIO; + + if (sdata->vif.active_links && + !(sdata->vif.active_links & BIT(link_conf->link_id))) + return 0; + + trace_drv_assign_vif_chanctx(local, sdata, link_conf, ctx); + if (local->ops->assign_vif_chanctx) { + WARN_ON_ONCE(!ctx->driver_present); + ret = local->ops->assign_vif_chanctx(&local->hw, + &sdata->vif, + link_conf, + &ctx->conf); + } + trace_drv_return_int(local, ret); + + return ret; +} + +void drv_unassign_vif_chanctx(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_bss_conf *link_conf, + struct ieee80211_chanctx *ctx) +{ + might_sleep(); + + drv_verify_link_exists(sdata, link_conf); + if (!check_sdata_in_driver(sdata)) + return; + + if (sdata->vif.active_links && + !(sdata->vif.active_links & BIT(link_conf->link_id))) + return; + + trace_drv_unassign_vif_chanctx(local, sdata, link_conf, ctx); + if (local->ops->unassign_vif_chanctx) { + WARN_ON_ONCE(!ctx->driver_present); + local->ops->unassign_vif_chanctx(&local->hw, + &sdata->vif, + link_conf, + &ctx->conf); + } + trace_drv_return_void(local); +} + int drv_switch_vif_chanctx(struct ieee80211_local *local, struct ieee80211_vif_chanctx_switch *vifs, int n_vifs, enum ieee80211_chanctx_switch_mode mode) @@ -346,3 +404,117 @@ int drv_ampdu_action(struct ieee80211_local *local, return ret; } + +void drv_link_info_changed(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_bss_conf *info, + int link_id, u64 changed) +{ + might_sleep(); + + if (WARN_ON_ONCE(changed & (BSS_CHANGED_BEACON | + BSS_CHANGED_BEACON_ENABLED) && + sdata->vif.type != NL80211_IFTYPE_AP && + sdata->vif.type != NL80211_IFTYPE_ADHOC && + sdata->vif.type != NL80211_IFTYPE_MESH_POINT && + sdata->vif.type != NL80211_IFTYPE_OCB)) + return; + + if (WARN_ON_ONCE(sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE || + sdata->vif.type == NL80211_IFTYPE_NAN || + (sdata->vif.type == NL80211_IFTYPE_MONITOR && + !sdata->vif.bss_conf.mu_mimo_owner && + !(changed & BSS_CHANGED_TXPOWER)))) + return; + + if (!check_sdata_in_driver(sdata)) + return; + + if (sdata->vif.active_links && + !(sdata->vif.active_links & BIT(link_id))) + return; + + trace_drv_link_info_changed(local, sdata, info, changed); + if (local->ops->link_info_changed) + local->ops->link_info_changed(&local->hw, &sdata->vif, + info, changed); + else if (local->ops->bss_info_changed) + local->ops->bss_info_changed(&local->hw, &sdata->vif, + info, changed); + trace_drv_return_void(local); +} + +int drv_set_key(struct ieee80211_local *local, + enum set_key_cmd cmd, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta, + struct ieee80211_key_conf *key) +{ + int ret; + + might_sleep(); + + sdata = get_bss_sdata(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; + + if (WARN_ON(key->link_id >= 0 && sdata->vif.active_links && + !(sdata->vif.active_links & BIT(key->link_id)))) + return -ENOLINK; + + trace_drv_set_key(local, cmd, sdata, sta, key); + ret = local->ops->set_key(&local->hw, cmd, &sdata->vif, sta, key); + trace_drv_return_int(local, ret); + return ret; +} + +int drv_change_vif_links(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + u16 old_links, u16 new_links, + struct ieee80211_bss_conf *old[IEEE80211_MLD_MAX_NUM_LINKS]) +{ + int ret = -EOPNOTSUPP; + + might_sleep(); + + if (!check_sdata_in_driver(sdata)) + return -EIO; + + if (old_links == new_links) + return 0; + + trace_drv_change_vif_links(local, sdata, old_links, new_links); + if (local->ops->change_vif_links) + ret = local->ops->change_vif_links(&local->hw, &sdata->vif, + old_links, new_links, old); + trace_drv_return_int(local, ret); + + return ret; +} + +int drv_change_sta_links(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta, + u16 old_links, u16 new_links) +{ + int ret = -EOPNOTSUPP; + + might_sleep(); + + if (!check_sdata_in_driver(sdata)) + return -EIO; + + old_links &= sdata->vif.active_links; + new_links &= sdata->vif.active_links; + + if (old_links == new_links) + return 0; + + trace_drv_change_sta_links(local, sdata, sta, old_links, new_links); + if (local->ops->change_sta_links) + ret = local->ops->change_sta_links(&local->hw, &sdata->vif, sta, + old_links, new_links); + trace_drv_return_int(local, ret); + + return ret; +} diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 482f5c97a72b..81e40b0a3b16 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -165,40 +165,10 @@ static inline void drv_vif_cfg_changed(struct ieee80211_local *local, trace_drv_return_void(local); } -static inline void drv_link_info_changed(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, - struct ieee80211_bss_conf *info, - int link_id, u64 changed) -{ - might_sleep(); - - if (WARN_ON_ONCE(changed & (BSS_CHANGED_BEACON | - BSS_CHANGED_BEACON_ENABLED) && - sdata->vif.type != NL80211_IFTYPE_AP && - sdata->vif.type != NL80211_IFTYPE_ADHOC && - sdata->vif.type != NL80211_IFTYPE_MESH_POINT && - sdata->vif.type != NL80211_IFTYPE_OCB)) - return; - - if (WARN_ON_ONCE(sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE || - sdata->vif.type == NL80211_IFTYPE_NAN || - (sdata->vif.type == NL80211_IFTYPE_MONITOR && - !sdata->vif.bss_conf.mu_mimo_owner && - !(changed & BSS_CHANGED_TXPOWER)))) - return; - - if (!check_sdata_in_driver(sdata)) - return; - - trace_drv_link_info_changed(local, sdata, info, changed); - if (local->ops->link_info_changed) - local->ops->link_info_changed(&local->hw, &sdata->vif, - info, changed); - else if (local->ops->bss_info_changed) - local->ops->bss_info_changed(&local->hw, &sdata->vif, - info, changed); - trace_drv_return_void(local); -} +void drv_link_info_changed(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_bss_conf *info, + int link_id, u64 changed); static inline u64 drv_prepare_multicast(struct ieee80211_local *local, struct netdev_hw_addr_list *mc_list) @@ -256,25 +226,11 @@ static inline int drv_set_tim(struct ieee80211_local *local, return ret; } -static inline int drv_set_key(struct ieee80211_local *local, - enum set_key_cmd cmd, - struct ieee80211_sub_if_data *sdata, - struct ieee80211_sta *sta, - struct ieee80211_key_conf *key) -{ - int ret; - - might_sleep(); - - sdata = get_bss_sdata(sdata); - if (!check_sdata_in_driver(sdata)) - return -EIO; - - trace_drv_set_key(local, cmd, sdata, sta, key); - ret = local->ops->set_key(&local->hw, cmd, &sdata->vif, sta, key); - trace_drv_return_int(local, ret); - return ret; -} +int drv_set_key(struct ieee80211_local *local, + enum set_key_cmd cmd, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta, + struct ieee80211_key_conf *key); static inline void drv_update_tkip_key(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, @@ -945,52 +901,14 @@ static inline void drv_verify_link_exists(struct ieee80211_sub_if_data *sdata, sdata_assert_lock(sdata); } -static inline int drv_assign_vif_chanctx(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, - struct ieee80211_bss_conf *link_conf, - struct ieee80211_chanctx *ctx) -{ - int ret = 0; - - drv_verify_link_exists(sdata, link_conf); - if (!check_sdata_in_driver(sdata)) - return -EIO; - - trace_drv_assign_vif_chanctx(local, sdata, link_conf, ctx); - if (local->ops->assign_vif_chanctx) { - WARN_ON_ONCE(!ctx->driver_present); - ret = local->ops->assign_vif_chanctx(&local->hw, - &sdata->vif, - link_conf, - &ctx->conf); - } - trace_drv_return_int(local, ret); - - return ret; -} - -static inline void drv_unassign_vif_chanctx(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, - struct ieee80211_bss_conf *link_conf, - struct ieee80211_chanctx *ctx) -{ - might_sleep(); - - drv_verify_link_exists(sdata, link_conf); - if (!check_sdata_in_driver(sdata)) - return; - - trace_drv_unassign_vif_chanctx(local, sdata, link_conf, ctx); - if (local->ops->unassign_vif_chanctx) { - WARN_ON_ONCE(!ctx->driver_present); - local->ops->unassign_vif_chanctx(&local->hw, - &sdata->vif, - link_conf, - &ctx->conf); - } - trace_drv_return_void(local); -} - +int drv_assign_vif_chanctx(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_bss_conf *link_conf, + struct ieee80211_chanctx *ctx); +void drv_unassign_vif_chanctx(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_bss_conf *link_conf, + struct ieee80211_chanctx *ctx); int drv_switch_vif_chanctx(struct ieee80211_local *local, struct ieee80211_vif_chanctx_switch *vifs, int n_vifs, enum ieee80211_chanctx_switch_mode mode); @@ -1552,46 +1470,13 @@ static inline int drv_net_fill_forward_path(struct ieee80211_local *local, return ret; } -static inline int drv_change_vif_links(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, - u16 old_links, u16 new_links, - struct ieee80211_bss_conf *old[IEEE80211_MLD_MAX_NUM_LINKS]) -{ - int ret = -EOPNOTSUPP; - - might_sleep(); - - if (!check_sdata_in_driver(sdata)) - return -EIO; - - trace_drv_change_vif_links(local, sdata, old_links, new_links); - if (local->ops->change_vif_links) - ret = local->ops->change_vif_links(&local->hw, &sdata->vif, - old_links, new_links, old); - trace_drv_return_int(local, ret); - - return ret; -} - -static inline int drv_change_sta_links(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, - struct ieee80211_sta *sta, - u16 old_links, u16 new_links) -{ - int ret = -EOPNOTSUPP; - - might_sleep(); - - if (!check_sdata_in_driver(sdata)) - return -EIO; - - trace_drv_change_sta_links(local, sdata, sta, old_links, new_links); - if (local->ops->change_sta_links) - ret = local->ops->change_sta_links(&local->hw, &sdata->vif, sta, - old_links, new_links); - trace_drv_return_int(local, ret); - - return ret; -} +int drv_change_vif_links(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + u16 old_links, u16 new_links, + struct ieee80211_bss_conf *old[IEEE80211_MLD_MAX_NUM_LINKS]); +int drv_change_sta_links(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta, + u16 old_links, u16 new_links); #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/key.c b/net/mac80211/key.c index d89ec93b243b..f6f0f65fb255 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -177,6 +177,10 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) } } + if (key->conf.link_id >= 0 && sdata->vif.active_links && + !(sdata->vif.active_links & BIT(key->conf.link_id))) + return 0; + ret = drv_set_key(key->local, SET_KEY, sdata, sta ? &sta->sta : NULL, &key->conf); @@ -246,6 +250,10 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) sta = key->sta; sdata = key->sdata; + if (key->conf.link_id >= 0 && sdata->vif.active_links && + !(sdata->vif.active_links & BIT(key->conf.link_id))) + return; + if (!(key->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC | IEEE80211_KEY_FLAG_PUT_MIC_SPACE | IEEE80211_KEY_FLAG_RESERVE_TAILROOM))) diff --git a/net/mac80211/link.c b/net/mac80211/link.c index 096f313c2a6e..8df348a5edce 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -73,28 +73,37 @@ struct link_container { struct ieee80211_bss_conf conf; }; -static void ieee80211_free_links(struct ieee80211_sub_if_data *sdata, - struct link_container **links) +static void ieee80211_tear_down_links(struct ieee80211_sub_if_data *sdata, + struct link_container **links, u16 mask) { + struct ieee80211_link_data *link; LIST_HEAD(keys); unsigned int link_id; for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { - if (!links[link_id]) + if (!(mask & BIT(link_id))) + continue; + link = &links[link_id]->data; + if (link_id == 0 && !link) + link = &sdata->deflink; + if (WARN_ON(!link)) continue; - ieee80211_remove_link_keys(&links[link_id]->data, &keys); + ieee80211_remove_link_keys(link, &keys); + ieee80211_link_stop(link); } synchronize_rcu(); ieee80211_free_key_list(sdata->local, &keys); +} - for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { - if (!links[link_id]) - continue; - ieee80211_link_stop(&links[link_id]->data); +static void ieee80211_free_links(struct ieee80211_sub_if_data *sdata, + struct link_container **links) +{ + unsigned int link_id; + + for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) kfree(links[link_id]); - } } static int ieee80211_check_dup_link_addrs(struct ieee80211_sub_if_data *sdata) @@ -123,11 +132,38 @@ static int ieee80211_check_dup_link_addrs(struct ieee80211_sub_if_data *sdata) return 0; } +static void ieee80211_set_vif_links_bitmaps(struct ieee80211_sub_if_data *sdata, + u16 links) +{ + sdata->vif.valid_links = links; + + if (!links) { + sdata->vif.active_links = 0; + return; + } + + switch (sdata->vif.type) { + case NL80211_IFTYPE_AP: + /* in an AP all links are always active */ + sdata->vif.active_links = links; + break; + case NL80211_IFTYPE_STATION: + if (sdata->vif.active_links) + break; + WARN_ON(hweight16(links) > 1); + sdata->vif.active_links = links; + break; + default: + WARN_ON(1); + } +} + static int ieee80211_vif_update_links(struct ieee80211_sub_if_data *sdata, struct link_container **to_free, u16 new_links) { u16 old_links = sdata->vif.valid_links; + u16 old_active = sdata->vif.active_links; unsigned long add = new_links & ~old_links; unsigned long rem = old_links & ~new_links; unsigned int link_id; @@ -195,13 +231,17 @@ static int ieee80211_vif_update_links(struct ieee80211_sub_if_data *sdata, ieee80211_link_init(sdata, -1, &sdata->deflink, &sdata->vif.bss_conf); - sdata->vif.valid_links = new_links; - ret = ieee80211_check_dup_link_addrs(sdata); if (!ret) { + /* for keys we will not be able to undo this */ + ieee80211_tear_down_links(sdata, to_free, rem); + + ieee80211_set_vif_links_bitmaps(sdata, new_links); + /* tell the driver */ ret = drv_change_vif_links(sdata->local, sdata, - old_links, new_links, + old_links & old_active, + new_links & sdata->vif.active_links, old); } @@ -209,7 +249,7 @@ static int ieee80211_vif_update_links(struct ieee80211_sub_if_data *sdata, /* restore config */ memcpy(sdata->link, old_data, sizeof(old_data)); memcpy(sdata->vif.link_conf, old, sizeof(old)); - sdata->vif.valid_links = old_links; + ieee80211_set_vif_links_bitmaps(sdata, old_links); /* and free (only) the newly allocated links */ memset(to_free, 0, sizeof(links)); goto free; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 699e409ef45a..609584493ce0 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -4056,11 +4056,11 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, goto out; } - sband = ieee80211_get_link_sband(link); - if (!sband) { + if (WARN_ON(!link->conf->chandef.chan)) { ret = false; goto out; } + sband = local->hw.wiphy->bands[link->conf->chandef.chan->band]; if (!(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HE) && (!elems->he_cap || !elems->he_operation)) { @@ -4884,8 +4884,10 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, err = ieee80211_prep_channel(sdata, link, assoc_data->link[link_id].bss, &link->u.mgd.conn_flags); - if (err) + if (err) { + link_info(link, "prep_channel failed\n"); goto out_err; + } } err = ieee80211_mgd_setup_link_sta(link, sta, link_sta, @@ -6889,23 +6891,6 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, for (i = 0; i < IEEE80211_MLD_MAX_NUM_LINKS; i++) size += req->links[i].elems_len; - if (req->ap_mld_addr) { - for (i = 0; i < IEEE80211_MLD_MAX_NUM_LINKS; i++) { - if (!req->links[i].bss) - continue; - if (i == assoc_link_id) - continue; - /* - * For now, support only a single link in MLO, we - * don't have the necessary parsing of the multi- - * link element in the association response, etc. - */ - sdata_info(sdata, - "refusing MLO association with >1 links\n"); - return -EINVAL; - } - } - /* FIXME: no support for 4-addr MLO yet */ if (sdata->u.mgd.use_4addr && req->link_id >= 0) return -EOPNOTSUPP; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 3359ab332d7d..0ea5d50091dc 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2900,7 +2900,7 @@ void ieee80211_recalc_min_chandef(struct ieee80211_sub_if_data *sdata, */ rcu_read_unlock(); - if (WARN_ON_ONCE(!chanctx_conf)) + if (!chanctx_conf) goto unlock; chanctx = container_of(chanctx_conf, struct ieee80211_chanctx, -- cgit v1.2.3-70-g09d2 From ffa9598ecb93630a45564b95ae17362b819b38de Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 2 Sep 2022 16:12:43 +0200 Subject: wifi: mac80211: add ieee80211_find_sta_by_link_addrs API Add a new API function ieee80211_find_sta_by_link_addrs() that looks up the STA and link ID based on interface and station link addresses. We're going to use it for mac80211-hwsim to track on the AP side which links are active. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 16 ++++++++++++++++ net/mac80211/sta_info.c | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 20a2f25a38fa..954cc029a9f9 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -5986,6 +5986,22 @@ struct ieee80211_sta *ieee80211_find_sta_by_ifaddr(struct ieee80211_hw *hw, const u8 *addr, const u8 *localaddr); +/** + * ieee80211_find_sta_by_link_addrs - find STA by link addresses + * @hw: pointer as obtained from ieee80211_alloc_hw() + * @addr: remote station's link address + * @localaddr: local link address, use %NULL for any (but avoid that) + * @link_id: pointer to obtain the link ID if the STA is found, + * may be %NULL if the link ID is not needed + * + * Obtain the STA by link address, must use RCU protection. + */ +struct ieee80211_sta * +ieee80211_find_sta_by_link_addrs(struct ieee80211_hw *hw, + const u8 *addr, + const u8 *localaddr, + unsigned int *link_id); + /** * ieee80211_sta_block_awake - block station from waking up * @hw: the hardware diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index ac88a894e5f9..bbf582a5702d 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -274,6 +274,43 @@ link_sta_info_get_bss(struct ieee80211_sub_if_data *sdata, const u8 *addr) return NULL; } +struct ieee80211_sta * +ieee80211_find_sta_by_link_addrs(struct ieee80211_hw *hw, + const u8 *addr, + const u8 *localaddr, + unsigned int *link_id) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct link_sta_info *link_sta; + struct rhlist_head *tmp; + + for_each_link_sta_info(local, addr, link_sta, tmp) { + struct sta_info *sta = link_sta->sta; + struct ieee80211_link_data *link; + u8 _link_id = link_sta->link_id; + + if (!localaddr) { + if (link_id) + *link_id = _link_id; + return &sta->sta; + } + + link = rcu_dereference(sta->sdata->link[_link_id]); + if (!link) + continue; + + if (memcmp(link->conf->addr, localaddr, ETH_ALEN)) + continue; + + if (link_id) + *link_id = _link_id; + return &sta->sta; + } + + return NULL; +} +EXPORT_SYMBOL_GPL(ieee80211_find_sta_by_link_addrs); + struct sta_info *sta_info_get_by_addrs(struct ieee80211_local *local, const u8 *sta_addr, const u8 *vif_addr) { -- cgit v1.2.3-70-g09d2 From 0ab26380d98655f0aab720704debfa2ac522cefd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 2 Sep 2022 16:12:47 +0200 Subject: wifi: mac80211: extend ieee80211_nullfunc_get() for MLO Add a link_id parameter to ieee80211_nullfunc_get() to be able to obtain a correctly addressed frame. Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath9k/channel.c | 2 +- drivers/net/wireless/realtek/rtw88/fw.c | 4 +-- drivers/net/wireless/st/cw1200/sta.c | 4 +-- drivers/net/wireless/ti/wl1251/main.c | 2 +- drivers/net/wireless/ti/wlcore/cmd.c | 4 +-- include/net/mac80211.h | 5 +++- net/mac80211/mlme.c | 5 ++-- net/mac80211/tx.c | 43 +++++++++++++++++++++----------- 8 files changed, 43 insertions(+), 26 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/ath/ath9k/channel.c b/drivers/net/wireless/ath/ath9k/channel.c index 6cf087522157..571062f2e82a 100644 --- a/drivers/net/wireless/ath/ath9k/channel.c +++ b/drivers/net/wireless/ath/ath9k/channel.c @@ -1113,7 +1113,7 @@ ath_chanctx_send_vif_ps_frame(struct ath_softc *sc, struct ath_vif *avp, if (!avp->assoc) return false; - skb = ieee80211_nullfunc_get(sc->hw, vif, false); + skb = ieee80211_nullfunc_get(sc->hw, vif, -1, false); if (!skb) return false; diff --git a/drivers/net/wireless/realtek/rtw88/fw.c b/drivers/net/wireless/realtek/rtw88/fw.c index babba68a7132..7f6fdebae203 100644 --- a/drivers/net/wireless/realtek/rtw88/fw.c +++ b/drivers/net/wireless/realtek/rtw88/fw.c @@ -1082,10 +1082,10 @@ static struct sk_buff *rtw_get_rsvd_page_skb(struct ieee80211_hw *hw, skb_new = ieee80211_proberesp_get(hw, vif); break; case RSVD_NULL: - skb_new = ieee80211_nullfunc_get(hw, vif, false); + skb_new = ieee80211_nullfunc_get(hw, vif, -1, false); break; case RSVD_QOS_NULL: - skb_new = ieee80211_nullfunc_get(hw, vif, true); + skb_new = ieee80211_nullfunc_get(hw, vif, -1, true); break; case RSVD_LPS_PG_DPK: skb_new = rtw_lps_pg_dpk_get(hw); diff --git a/drivers/net/wireless/st/cw1200/sta.c b/drivers/net/wireless/st/cw1200/sta.c index 26d3614519b1..8ef1d06b9bbd 100644 --- a/drivers/net/wireless/st/cw1200/sta.c +++ b/drivers/net/wireless/st/cw1200/sta.c @@ -195,7 +195,7 @@ void __cw1200_cqm_bssloss_sm(struct cw1200_common *priv, priv->bss_loss_state++; - skb = ieee80211_nullfunc_get(priv->hw, priv->vif, false); + skb = ieee80211_nullfunc_get(priv->hw, priv->vif, -1, false); WARN_ON(!skb); if (skb) cw1200_tx(priv->hw, NULL, skb); @@ -2263,7 +2263,7 @@ static int cw1200_upload_null(struct cw1200_common *priv) .rate = 0xFF, }; - frame.skb = ieee80211_nullfunc_get(priv->hw, priv->vif, false); + frame.skb = ieee80211_nullfunc_get(priv->hw, priv->vif,-1, false); if (!frame.skb) return -ENOMEM; diff --git a/drivers/net/wireless/ti/wl1251/main.c b/drivers/net/wireless/ti/wl1251/main.c index 9144ef5538a8..289371689a8d 100644 --- a/drivers/net/wireless/ti/wl1251/main.c +++ b/drivers/net/wireless/ti/wl1251/main.c @@ -546,7 +546,7 @@ static int wl1251_build_null_data(struct wl1251 *wl) size = sizeof(struct wl12xx_null_data_template); ptr = NULL; } else { - skb = ieee80211_nullfunc_get(wl->hw, wl->vif, false); + skb = ieee80211_nullfunc_get(wl->hw, wl->vif, -1, false); if (!skb) goto out; size = skb->len; diff --git a/drivers/net/wireless/ti/wlcore/cmd.c b/drivers/net/wireless/ti/wlcore/cmd.c index 138edd28b0de..a939fd89a7f5 100644 --- a/drivers/net/wireless/ti/wlcore/cmd.c +++ b/drivers/net/wireless/ti/wlcore/cmd.c @@ -1065,7 +1065,7 @@ int wl12xx_cmd_build_null_data(struct wl1271 *wl, struct wl12xx_vif *wlvif) } else { skb = ieee80211_nullfunc_get(wl->hw, wl12xx_wlvif_to_vif(wlvif), - false); + -1, false); if (!skb) goto out; size = skb->len; @@ -1092,7 +1092,7 @@ int wl12xx_cmd_build_klv_null_data(struct wl1271 *wl, struct sk_buff *skb = NULL; int ret = -ENOMEM; - skb = ieee80211_nullfunc_get(wl->hw, vif, false); + skb = ieee80211_nullfunc_get(wl->hw, vif,-1, false); if (!skb) goto out; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 954cc029a9f9..bfa6a1625c5c 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -5298,6 +5298,9 @@ struct sk_buff *ieee80211_pspoll_get(struct ieee80211_hw *hw, * ieee80211_nullfunc_get - retrieve a nullfunc template * @hw: pointer obtained from ieee80211_alloc_hw(). * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @link_id: If the vif is an MLD, get a frame with the link addresses + * for the given link ID. For a link_id < 0 you get a frame with + * MLD addresses, however useful that might be. * @qos_ok: QoS NDP is acceptable to the caller, this should be set * if at all possible * @@ -5315,7 +5318,7 @@ struct sk_buff *ieee80211_pspoll_get(struct ieee80211_hw *hw, */ struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - bool qos_ok); + int link_id, bool qos_ok); /** * ieee80211_probereq_get - retrieve a Probe Request template diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 609584493ce0..8e8607bf27dc 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1546,8 +1546,9 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local, struct ieee80211_hdr_3addr *nullfunc; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif, - !ieee80211_hw_check(&local->hw, DOESNT_SUPPORT_QOS_NDP)); + skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif, -1, + !ieee80211_hw_check(&local->hw, + DOESNT_SUPPORT_QOS_NDP)); if (!skb) return; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 1be8c9d83d6a..a09673117565 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -5469,33 +5469,39 @@ EXPORT_SYMBOL(ieee80211_pspoll_get); struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - bool qos_ok) + int link_id, bool qos_ok) { + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_local *local = sdata->local; + struct ieee80211_link_data *link = NULL; struct ieee80211_hdr_3addr *nullfunc; - struct ieee80211_sub_if_data *sdata; - struct ieee80211_local *local; struct sk_buff *skb; bool qos = false; if (WARN_ON(vif->type != NL80211_IFTYPE_STATION)) return NULL; - sdata = vif_to_sdata(vif); - local = sdata->local; + skb = dev_alloc_skb(local->hw.extra_tx_headroom + + sizeof(*nullfunc) + 2); + if (!skb) + return NULL; + rcu_read_lock(); if (qos_ok) { struct sta_info *sta; - rcu_read_lock(); - sta = sta_info_get(sdata, sdata->deflink.u.mgd.bssid); + sta = sta_info_get(sdata, vif->cfg.ap_addr); qos = sta && sta->sta.wme; - rcu_read_unlock(); } - skb = dev_alloc_skb(local->hw.extra_tx_headroom + - sizeof(*nullfunc) + 2); - if (!skb) - return NULL; + if (link_id >= 0) { + link = rcu_dereference(sdata->link[link_id]); + if (WARN_ON_ONCE(!link)) { + rcu_read_unlock(); + kfree_skb(skb); + return NULL; + } + } skb_reserve(skb, local->hw.extra_tx_headroom); @@ -5516,9 +5522,16 @@ struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw, skb_put_data(skb, &qoshdr, sizeof(qoshdr)); } - memcpy(nullfunc->addr1, sdata->deflink.u.mgd.bssid, ETH_ALEN); - memcpy(nullfunc->addr2, vif->addr, ETH_ALEN); - memcpy(nullfunc->addr3, sdata->deflink.u.mgd.bssid, ETH_ALEN); + if (link) { + memcpy(nullfunc->addr1, link->conf->bssid, ETH_ALEN); + memcpy(nullfunc->addr2, link->conf->addr, ETH_ALEN); + memcpy(nullfunc->addr3, link->conf->bssid, ETH_ALEN); + } else { + memcpy(nullfunc->addr1, vif->cfg.ap_addr, ETH_ALEN); + memcpy(nullfunc->addr2, vif->addr, ETH_ALEN); + memcpy(nullfunc->addr3, vif->cfg.ap_addr, ETH_ALEN); + } + rcu_read_unlock(); return skb; } -- cgit v1.2.3-70-g09d2 From 65fd846cb3f94ae63134fbd0f32564cf82539eaa Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 2 Sep 2022 16:12:52 +0200 Subject: wifi: mac80211: add vif/sta link RCU dereference macros Add macros (and an exported function) to allow checking some link RCU protected accesses that are happening in callbacks from mac80211 and are thus under the correct lock. Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 3 +-- include/net/mac80211.h | 31 ++++++++++++++++++++++++++++--- net/mac80211/sta_info.c | 10 ++++++++++ 3 files changed, 39 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 7bbc455f5884..0780a1cc63a4 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -3069,8 +3069,7 @@ static int mac80211_hwsim_change_vif_links(struct ieee80211_hw *hw, for_each_set_bit(i, &add, IEEE80211_MLD_MAX_NUM_LINKS) { struct ieee80211_bss_conf *link_conf; - /* FIXME: figure out how to get the locking here */ - link_conf = rcu_dereference_protected(vif->link_conf[i], 1); + link_conf = link_conf_dereference_protected(vif, i); if (WARN_ON(!link_conf)) continue; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index bfa6a1625c5c..d9e7f62cc972 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -1901,6 +1902,19 @@ struct ieee80211_vif *wdev_to_ieee80211_vif(struct wireless_dev *wdev); */ struct wireless_dev *ieee80211_vif_to_wdev(struct ieee80211_vif *vif); +/** + * lockdep_vif_mutex_held - for lockdep checks on link poiners + * @vif: the interface to check + */ +static inline bool lockdep_vif_mutex_held(struct ieee80211_vif *vif) +{ + return lockdep_is_held(&ieee80211_vif_to_wdev(vif)->mtx); +} + +#define link_conf_dereference_protected(vif, link_id) \ + rcu_dereference_protected((vif)->link_conf[link_id], \ + lockdep_vif_mutex_held(vif)) + /** * enum ieee80211_key_flags - key flags * @@ -2266,13 +2280,24 @@ struct ieee80211_sta { u8 drv_priv[] __aligned(sizeof(void *)); }; -/* FIXME: check the locking correctly */ +#ifdef CONFIG_LOCKDEP +bool lockdep_sta_mutex_held(struct ieee80211_sta *pubsta); +#else +static inline bool lockdep_sta_mutex_held(struct ieee80211_sta *pubsta) +{ + return true; +} +#endif + +#define link_sta_dereference_protected(sta, link_id) \ + rcu_dereference_protected((sta)->link[link_id], \ + lockdep_sta_mutex_held(sta)) + #define for_each_sta_active_link(vif, sta, link_sta, link_id) \ for (link_id = 0; link_id < ARRAY_SIZE((sta)->link); link_id++) \ if ((!(vif)->active_links || \ (vif)->active_links & BIT(link_id)) && \ - ((link_sta) = rcu_dereference_protected((sta)->link[link_id],\ - 1))) + ((link_sta) = link_sta_dereference_protected(sta, link_id))) /** * enum sta_notify_cmd - sta notify command diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index bbf582a5702d..4875bd8af67c 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -2871,3 +2871,13 @@ void ieee80211_sta_set_max_amsdu_subframes(struct sta_info *sta, if (val) sta->sta.max_amsdu_subframes = 4 << val; } + +#ifdef CONFIG_LOCKDEP +bool lockdep_sta_mutex_held(struct ieee80211_sta *pubsta) +{ + struct sta_info *sta = container_of(pubsta, struct sta_info, sta); + + return lockdep_is_held(&sta->local->sta_mtx); +} +EXPORT_SYMBOL(lockdep_sta_mutex_held); +#endif -- cgit v1.2.3-70-g09d2 From 4c51541ddb78cef2da9c4c30006c0d9d06ea9a77 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Fri, 2 Sep 2022 16:12:54 +0200 Subject: wifi: mac80211: keep A-MSDU data in sta and per-link The A-MSDU data needs to be stored per-link and aggregated into a single value for the station. Add a new struct ieee_80211_sta_aggregates in order to store this data and a new function ieee80211_sta_recalc_aggregates to update the current data for the STA. Note that in the non MLO case the pointer in ieee80211_sta will directly reference the data in deflink.agg, which means that recalculation may be skipped in that case. Signed-off-by: Benjamin Berg Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c | 14 +++-- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c | 12 ++-- drivers/net/wireless/intel/iwlwifi/mvm/rs.c | 15 ++--- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 2 +- .../net/wireless/mediatek/mt76/mt76_connac_mcu.c | 4 +- drivers/net/wireless/mediatek/mt76/mt7915/mcu.c | 4 +- drivers/net/wireless/realtek/rtw88/fw.c | 2 +- drivers/net/wireless/realtek/rtw89/debug.c | 2 +- drivers/net/wireless/realtek/rtw89/phy.c | 4 +- include/net/mac80211.h | 68 +++++++++++++++------- net/mac80211/he.c | 8 ++- net/mac80211/ht.c | 6 +- net/mac80211/rc80211_minstrel_ht.c | 3 +- net/mac80211/sta_info.c | 50 +++++++++++++++- net/mac80211/sta_info.h | 3 + net/mac80211/tx.c | 10 ++-- net/mac80211/vht.c | 8 ++- 18 files changed, 152 insertions(+), 65 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c index c0bd697b080a..1e8123140973 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c @@ -430,14 +430,16 @@ static ssize_t iwl_dbgfs_amsdu_len_write(struct ieee80211_sta *sta, return -EBUSY; if (amsdu_len) { - mvmsta->orig_amsdu_len = sta->max_amsdu_len; - sta->max_amsdu_len = amsdu_len; - for (i = 0; i < ARRAY_SIZE(sta->max_tid_amsdu_len); i++) - sta->max_tid_amsdu_len[i] = amsdu_len; + mvmsta->orig_amsdu_len = sta->cur->max_amsdu_len; + sta->deflink.agg.max_amsdu_len = amsdu_len; + sta->deflink.agg.max_amsdu_len = amsdu_len; + for (i = 0; i < ARRAY_SIZE(sta->deflink.agg.max_tid_amsdu_len); i++) + sta->deflink.agg.max_tid_amsdu_len[i] = amsdu_len; } else { - sta->max_amsdu_len = mvmsta->orig_amsdu_len; + sta->deflink.agg.max_amsdu_len = mvmsta->orig_amsdu_len; mvmsta->orig_amsdu_len = 0; } + return count; } @@ -451,7 +453,7 @@ static ssize_t iwl_dbgfs_amsdu_len_read(struct file *file, char buf[32]; int pos; - pos = scnprintf(buf, sizeof(buf), "current %d ", sta->max_amsdu_len); + pos = scnprintf(buf, sizeof(buf), "current %d ", sta->cur->max_amsdu_len); pos += scnprintf(buf + pos, sizeof(buf) - pos, "stored %d\n", mvmsta->orig_amsdu_len); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 5eb28f8ee87e..df0793882f1d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -3193,7 +3193,7 @@ static int iwl_mvm_mac_sta_state(struct ieee80211_hw *hw, NL80211_TDLS_SETUP); } - sta->max_rc_amsdu_len = 1; + sta->deflink.agg.max_rc_amsdu_len = 1; } else if (old_state == IEEE80211_STA_NONE && new_state == IEEE80211_STA_AUTH) { /* diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c index 752d44d96163..2e9081cb6627 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c @@ -340,9 +340,9 @@ void iwl_mvm_tlc_update_notif(struct iwl_mvm *mvm, u16 size = le32_to_cpu(notif->amsdu_size); int i; - if (sta->max_amsdu_len < size) { + if (sta->deflink.agg.max_amsdu_len < size) { /* - * In debug sta->max_amsdu_len < size + * In debug sta->deflink.agg.max_amsdu_len < size * so also check with orig_amsdu_len which holds the * original data before debugfs changed the value */ @@ -352,18 +352,18 @@ void iwl_mvm_tlc_update_notif(struct iwl_mvm *mvm, mvmsta->amsdu_enabled = le32_to_cpu(notif->amsdu_enabled); mvmsta->max_amsdu_len = size; - sta->max_rc_amsdu_len = mvmsta->max_amsdu_len; + sta->deflink.agg.max_rc_amsdu_len = mvmsta->max_amsdu_len; for (i = 0; i < IWL_MAX_TID_COUNT; i++) { if (mvmsta->amsdu_enabled & BIT(i)) - sta->max_tid_amsdu_len[i] = + sta->deflink.agg.max_tid_amsdu_len[i] = iwl_mvm_max_amsdu_size(mvm, sta, i); else /* * Not so elegant, but this will effectively * prevent AMSDU on this TID */ - sta->max_tid_amsdu_len[i] = 1; + sta->deflink.agg.max_tid_amsdu_len[i] = 1; } IWL_DEBUG_RATE(mvm, @@ -450,7 +450,7 @@ void rs_fw_rate_init(struct iwl_mvm *mvm, struct ieee80211_sta *sta, * since TLC offload works with one mode we can assume * that only vht/ht is used and also set it as station max amsdu */ - sta->max_amsdu_len = max_amsdu_len; + sta->deflink.agg.max_amsdu_len = max_amsdu_len; cmd_ver = iwl_fw_lookup_cmd_ver(mvm->fw, WIDE_ID(DATA_PATH_GROUP, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c index 814a5e8f3666..0b50b816684a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c @@ -1491,7 +1491,7 @@ static void rs_set_amsdu_len(struct iwl_mvm *mvm, struct ieee80211_sta *sta, struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta); int i; - sta->max_amsdu_len = rs_fw_get_max_amsdu_len(sta); + sta->deflink.agg.max_amsdu_len = rs_fw_get_max_amsdu_len(sta); /* * In case TLC offload is not active amsdu_enabled is either 0xFFFF @@ -1506,22 +1506,23 @@ static void rs_set_amsdu_len(struct iwl_mvm *mvm, struct ieee80211_sta *sta, if (mvmsta->vif->bss_conf.he_support && !iwlwifi_mod_params.disable_11ax) - mvmsta->max_amsdu_len = sta->max_amsdu_len; + mvmsta->max_amsdu_len = sta->deflink.agg.max_amsdu_len; else - mvmsta->max_amsdu_len = min_t(int, sta->max_amsdu_len, 8500); + mvmsta->max_amsdu_len = + min_t(int, sta->deflink.agg.max_amsdu_len, 8500); - sta->max_rc_amsdu_len = mvmsta->max_amsdu_len; + sta->deflink.agg.max_rc_amsdu_len = mvmsta->max_amsdu_len; for (i = 0; i < IWL_MAX_TID_COUNT; i++) { if (mvmsta->amsdu_enabled) - sta->max_tid_amsdu_len[i] = + sta->deflink.agg.max_tid_amsdu_len[i] = iwl_mvm_max_amsdu_size(mvm, sta, i); else /* * Not so elegant, but this will effectively * prevent AMSDU on this TID */ - sta->max_tid_amsdu_len[i] = 1; + sta->deflink.agg.max_tid_amsdu_len[i] = 1; } } @@ -2933,7 +2934,7 @@ static void rs_drv_rate_init(struct iwl_mvm *mvm, struct ieee80211_sta *sta, lq_sta->lq.sta_id = mvmsta->sta_id; mvmsta->amsdu_enabled = 0; - mvmsta->max_amsdu_len = sta->max_amsdu_len; + mvmsta->max_amsdu_len = sta->cur->max_amsdu_len; for (j = 0; j < LQ_SIZE; j++) rs_rate_scale_clear_tbl_windows(mvm, &lq_sta->lq_info[j]); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index f9e08b339e0c..86d20e13bf47 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -926,7 +926,7 @@ static int iwl_mvm_tx_tso(struct iwl_mvm *mvm, struct sk_buff *skb, * Take the min of ieee80211 station and mvm station */ max_amsdu_len = - min_t(unsigned int, sta->max_amsdu_len, + min_t(unsigned int, sta->cur->max_amsdu_len, iwl_mvm_max_amsdu_size(mvm, sta, tid)); /* diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c index 5c4ca93bcf91..a54af6bc3251 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c @@ -594,14 +594,14 @@ mt76_connac_mcu_sta_amsdu_tlv(struct sk_buff *skb, struct ieee80211_sta *sta, vif->type != NL80211_IFTYPE_STATION) return; - if (!sta->max_amsdu_len) + if (!sta->deflink.agg.max_amsdu_len) return; tlv = mt76_connac_mcu_add_tlv(skb, STA_REC_HW_AMSDU, sizeof(*amsdu)); amsdu = (struct sta_rec_amsdu *)tlv; amsdu->max_amsdu_num = 8; amsdu->amsdu_en = true; - amsdu->max_mpdu_size = sta->max_amsdu_len >= + amsdu->max_mpdu_size = sta->deflink.agg.max_amsdu_len >= IEEE80211_MAX_MPDU_LEN_VHT_7991; wcid->amsdu = true; diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c index 1081b893f319..d1dc6efba457 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c @@ -925,7 +925,7 @@ mt7915_mcu_sta_amsdu_tlv(struct mt7915_dev *dev, struct sk_buff *skb, vif->type != NL80211_IFTYPE_AP) return; - if (!sta->max_amsdu_len) + if (!sta->deflink.agg.max_amsdu_len) return; tlv = mt76_connac_mcu_add_tlv(skb, STA_REC_HW_AMSDU, sizeof(*amsdu)); @@ -934,7 +934,7 @@ mt7915_mcu_sta_amsdu_tlv(struct mt7915_dev *dev, struct sk_buff *skb, amsdu->amsdu_en = true; msta->wcid.amsdu = true; - switch (sta->max_amsdu_len) { + switch (sta->deflink.agg.max_amsdu_len) { case IEEE80211_MAX_MPDU_LEN_VHT_11454: if (!is_mt7915(&dev->mt76)) { amsdu->max_mpdu_size = diff --git a/drivers/net/wireless/realtek/rtw88/fw.c b/drivers/net/wireless/realtek/rtw88/fw.c index 7f6fdebae203..0b5f903c0f36 100644 --- a/drivers/net/wireless/realtek/rtw88/fw.c +++ b/drivers/net/wireless/realtek/rtw88/fw.c @@ -118,7 +118,7 @@ legacy: si->ra_report.desc_rate = rate; si->ra_report.bit_rate = bit_rate; - sta->max_rc_amsdu_len = get_max_amsdu_len(bit_rate); + sta->deflink.agg.max_rc_amsdu_len = get_max_amsdu_len(bit_rate); } static void rtw_fw_ra_report_handle(struct rtw_dev *rtwdev, u8 *payload, diff --git a/drivers/net/wireless/realtek/rtw89/debug.c b/drivers/net/wireless/realtek/rtw89/debug.c index 738cfcd6fd32..9117b6168e32 100644 --- a/drivers/net/wireless/realtek/rtw89/debug.c +++ b/drivers/net/wireless/realtek/rtw89/debug.c @@ -2309,7 +2309,7 @@ static void rtw89_sta_info_get_iter(void *data, struct ieee80211_sta *sta) seq_printf(m, "%s", rtwsta->ra_report.might_fallback_legacy ? " FB_G" : ""); seq_printf(m, "\t(hw_rate=0x%x)", rtwsta->ra_report.hw_rate); seq_printf(m, "\t==> agg_wait=%d (%d)\n", rtwsta->max_agg_wait, - sta->max_rc_amsdu_len); + sta->deflink.agg.max_rc_amsdu_len); seq_printf(m, "RX rate [%d]: ", rtwsta->mac_id); diff --git a/drivers/net/wireless/realtek/rtw89/phy.c b/drivers/net/wireless/realtek/rtw89/phy.c index 4dfeedeb0d90..a2ebef0051b8 100644 --- a/drivers/net/wireless/realtek/rtw89/phy.c +++ b/drivers/net/wireless/realtek/rtw89/phy.c @@ -2031,8 +2031,8 @@ static void rtw89_phy_c2h_ra_rpt_iter(void *data, struct ieee80211_sta *sta) ra_report->hw_rate = FIELD_PREP(RTW89_HW_RATE_MASK_MOD, mode) | FIELD_PREP(RTW89_HW_RATE_MASK_VAL, rate); ra_report->might_fallback_legacy = mcs <= 2; - sta->max_rc_amsdu_len = get_max_amsdu_len(rtwdev, ra_report); - rtwsta->max_agg_wait = sta->max_rc_amsdu_len / 1500 - 1; + sta->deflink.agg.max_rc_amsdu_len = get_max_amsdu_len(rtwdev, ra_report); + rtwsta->max_agg_wait = sta->deflink.agg.max_rc_amsdu_len / 1500 - 1; } static void diff --git a/include/net/mac80211.h b/include/net/mac80211.h index d9e7f62cc972..873e81a45a97 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2143,6 +2143,34 @@ struct ieee80211_sta_txpwr { enum nl80211_tx_power_setting type; }; +/** + * struct ieee80211_sta_aggregates - info that is aggregated from active links + * + * Used for any per-link data that needs to be aggregated and updated in the + * main &struct ieee80211_sta when updated or the active links change. + * + * @max_amsdu_len: indicates the maximal length of an A-MSDU in bytes. + * This field is always valid for packets with a VHT preamble. + * For packets with a HT preamble, additional limits apply: + * + * * If the skb is transmitted as part of a BA agreement, the + * A-MSDU maximal size is min(max_amsdu_len, 4065) bytes. + * * If the skb is not part of a BA agreement, the A-MSDU maximal + * size is min(max_amsdu_len, 7935) bytes. + * + * Both additional HT limits must be enforced by the low level + * driver. This is defined by the spec (IEEE 802.11-2012 section + * 8.3.2.2 NOTE 2). + * @max_rc_amsdu_len: Maximum A-MSDU size in bytes recommended by rate control. + * @max_tid_amsdu_len: Maximum A-MSDU size in bytes for this TID + */ +struct ieee80211_sta_aggregates { + u16 max_amsdu_len; + + u16 max_rc_amsdu_len; + u16 max_tid_amsdu_len[IEEE80211_NUM_TIDS]; +}; + /** * struct ieee80211_link_sta - station Link specific info * All link specific info for a STA link for a non MLD STA(single) @@ -2179,6 +2207,8 @@ struct ieee80211_link_sta { struct ieee80211_he_6ghz_capa he_6ghz_capa; struct ieee80211_sta_eht_cap eht_cap; + struct ieee80211_sta_aggregates agg; + u8 rx_nss; enum ieee80211_sta_rx_bandwidth bandwidth; struct ieee80211_sta_txpwr txpwr; @@ -2218,9 +2248,10 @@ struct ieee80211_link_sta { * @max_amsdu_subframes: indicates the maximal number of MSDUs in a single * A-MSDU. Taken from the Extended Capabilities element. 0 means * unlimited. + * @cur: currently valid data as aggregated from the active links + * For non MLO STA it will point to the deflink data. For MLO STA + * ieee80211_sta_recalc_aggregates() must be called to update it. * @support_p2p_ps: indicates whether the STA supports P2P PS mechanism or not. - * @max_rc_amsdu_len: Maximum A-MSDU size in bytes recommended by rate control. - * @max_tid_amsdu_len: Maximum A-MSDU size in bytes for this TID * @txq: per-TID data TX queues (if driver uses the TXQ abstraction); note that * the last entry (%IEEE80211_NUM_TIDS) is used for non-data frames * @deflink: This holds the default link STA information, for non MLO STA all link @@ -2250,25 +2281,9 @@ struct ieee80211_sta { bool mlo; u8 max_amsdu_subframes; - /** - * @max_amsdu_len: - * indicates the maximal length of an A-MSDU in bytes. - * This field is always valid for packets with a VHT preamble. - * For packets with a HT preamble, additional limits apply: - * - * * If the skb is transmitted as part of a BA agreement, the - * A-MSDU maximal size is min(max_amsdu_len, 4065) bytes. - * * If the skb is not part of a BA agreement, the A-MSDU maximal - * size is min(max_amsdu_len, 7935) bytes. - * - * Both additional HT limits must be enforced by the low level - * driver. This is defined by the spec (IEEE 802.11-2012 section - * 8.3.2.2 NOTE 2). - */ - u16 max_amsdu_len; + struct ieee80211_sta_aggregates *cur; + bool support_p2p_ps; - u16 max_rc_amsdu_len; - u16 max_tid_amsdu_len[IEEE80211_NUM_TIDS]; struct ieee80211_txq *txq[IEEE80211_NUM_TIDS + 1]; @@ -6105,6 +6120,19 @@ void ieee80211_sta_eosp(struct ieee80211_sta *pubsta); */ void ieee80211_send_eosp_nullfunc(struct ieee80211_sta *pubsta, int tid); +/** + * ieee80211_sta_recalc_aggregates - recalculate aggregate data after a change + * @pubsta: the station + * + * Call this function after changing a per-link aggregate data as referenced in + * &struct ieee80211_sta_aggregates by accessing the agg field of + * &struct ieee80211_link_sta. + * + * With non MLO the data in deflink will be referenced directly. In that case + * there is no need to call this function. + */ +void ieee80211_sta_recalc_aggregates(struct ieee80211_sta *pubsta); + /** * ieee80211_sta_register_airtime - register airtime usage for a sta/tid * diff --git a/net/mac80211/he.c b/net/mac80211/he.c index e73899fd4bbb..729f261520c7 100644 --- a/net/mac80211/he.c +++ b/net/mac80211/he.c @@ -39,17 +39,19 @@ ieee80211_update_from_he_6ghz_capa(const struct ieee80211_he_6ghz_capa *he_6ghz_ switch (le16_get_bits(he_6ghz_capa->capa, IEEE80211_HE_6GHZ_CAP_MAX_MPDU_LEN)) { case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454: - sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_11454; + link_sta->pub->agg.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_11454; break; case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991: - sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_7991; + link_sta->pub->agg.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_7991; break; case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895: default: - sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_3895; + link_sta->pub->agg.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_3895; break; } + ieee80211_sta_recalc_aggregates(&sta->sta); + link_sta->pub->he_6ghz_capa = *he_6ghz_capa; } diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 12a233ba5031..83bc41346ae7 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -241,9 +241,11 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, ht_cap.mcs.rx_highest = ht_cap_ie->mcs.rx_highest; if (ht_cap.cap & IEEE80211_HT_CAP_MAX_AMSDU) - sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_7935; + link_sta->pub->agg.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_7935; else - sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_3839; + link_sta->pub->agg.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_3839; + + ieee80211_sta_recalc_aggregates(&sta->sta); apply: changed = memcmp(&link_sta->pub->ht_cap, &ht_cap, sizeof(ht_cap)); diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 8c41a545a8b7..24c3c055db6d 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -1568,7 +1568,8 @@ minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) if (i < IEEE80211_TX_RATE_TABLE_SIZE) rates->rate[i].idx = -1; - mi->sta->max_rc_amsdu_len = minstrel_ht_get_max_amsdu_len(mi); + mi->sta->deflink.agg.max_rc_amsdu_len = minstrel_ht_get_max_amsdu_len(mi); + ieee80211_sta_recalc_aggregates(mi->sta); rate_control_set_rates(mp->hw, mi->sta, rates); } diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 4875bd8af67c..cebfd148bb40 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -376,6 +376,8 @@ static void sta_remove_link(struct sta_info *sta, unsigned int link_id, sta_info_free_link(&alloc->info); kfree_rcu(alloc, rcu_head); } + + ieee80211_sta_recalc_aggregates(&sta->sta); } /** @@ -514,6 +516,7 @@ static void sta_info_add_link(struct sta_info *sta, rcu_assign_pointer(sta->sta.link[link_id], link_sta); link_sta->smps_mode = IEEE80211_SMPS_OFF; + link_sta->agg.max_rc_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_BA; } static struct sta_info * @@ -544,6 +547,8 @@ __sta_info_alloc(struct ieee80211_sub_if_data *sdata, sta_info_add_link(sta, 0, &sta->deflink, &sta->sta.deflink); } + sta->sta.cur = &sta->sta.deflink.agg; + spin_lock_init(&sta->lock); spin_lock_init(&sta->ps_lock); INIT_WORK(&sta->drv_deliver_wk, sta_deliver_ps_frames); @@ -667,8 +672,6 @@ __sta_info_alloc(struct ieee80211_sub_if_data *sdata, } } - sta->sta.max_rc_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_BA; - sta->cparams.ce_threshold = CODEL_DISABLED_THRESHOLD; sta->cparams.target = MS2TIME(20); sta->cparams.interval = MS2TIME(100); @@ -2124,6 +2127,44 @@ void ieee80211_sta_register_airtime(struct ieee80211_sta *pubsta, u8 tid, } EXPORT_SYMBOL(ieee80211_sta_register_airtime); +void ieee80211_sta_recalc_aggregates(struct ieee80211_sta *pubsta) +{ + struct sta_info *sta = container_of(pubsta, struct sta_info, sta); + struct ieee80211_link_sta *link_sta; + int link_id, i; + bool first = true; + + if (!pubsta->valid_links || !pubsta->mlo) { + pubsta->cur = &pubsta->deflink.agg; + return; + } + + rcu_read_lock(); + for_each_sta_active_link(&sta->sdata->vif, pubsta, link_sta, link_id) { + if (first) { + sta->cur = pubsta->deflink.agg; + first = false; + continue; + } + + sta->cur.max_amsdu_len = + min(sta->cur.max_amsdu_len, + link_sta->agg.max_amsdu_len); + sta->cur.max_rc_amsdu_len = + min(sta->cur.max_rc_amsdu_len, + link_sta->agg.max_rc_amsdu_len); + + for (i = 0; i < ARRAY_SIZE(sta->cur.max_tid_amsdu_len); i++) + sta->cur.max_tid_amsdu_len[i] = + min(sta->cur.max_tid_amsdu_len[i], + link_sta->agg.max_tid_amsdu_len[i]); + } + rcu_read_unlock(); + + pubsta->cur = &sta->cur; +} +EXPORT_SYMBOL(ieee80211_sta_recalc_aggregates); + void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local, struct sta_info *sta, u8 ac, u16 tx_airtime, bool tx_completed) @@ -2819,6 +2860,11 @@ int ieee80211_sta_activate_link(struct sta_info *sta, unsigned int link_id) if (!test_sta_flag(sta, WLAN_STA_INSERTED)) goto hash; + /* Ensure the values are updated for the driver, + * redone by sta_remove_link on failure. + */ + ieee80211_sta_recalc_aggregates(&sta->sta); + ret = drv_change_sta_links(sdata->local, sdata, &sta->sta, old_links, new_links); if (ret) { diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 2eb3a9452e07..2517ea714dc4 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -622,6 +622,8 @@ struct link_sta_info { * @tdls_chandef: a TDLS peer can have a wider chandef that is compatible to * the BSS one. * @frags: fragment cache + * @cur: storage for aggregation data + * &struct ieee80211_sta points either here or to deflink.agg. * @deflink: This is the default link STA information, for non MLO STA all link * specific STA information is accessed through @deflink or through * link[0] which points to address of @deflink. For MLO Link STA @@ -705,6 +707,7 @@ struct sta_info { struct ieee80211_fragment_cache frags; + struct ieee80211_sta_aggregates cur; struct link_sta_info deflink; struct link_sta_info __rcu *link[IEEE80211_MLD_MAX_NUM_LINKS]; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index a09673117565..24c0a1706b92 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3387,7 +3387,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, int subframe_len = skb->len - ETH_ALEN; u8 max_subframes = sta->sta.max_amsdu_subframes; int max_frags = local->hw.max_tx_fragments; - int max_amsdu_len = sta->sta.max_amsdu_len; + int max_amsdu_len = sta->sta.cur->max_amsdu_len; int orig_truesize; u32 flow_idx; __be16 len; @@ -3413,13 +3413,13 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, if (test_bit(IEEE80211_TXQ_NO_AMSDU, &txqi->flags)) return false; - if (sta->sta.max_rc_amsdu_len) + if (sta->sta.cur->max_rc_amsdu_len) max_amsdu_len = min_t(int, max_amsdu_len, - sta->sta.max_rc_amsdu_len); + sta->sta.cur->max_rc_amsdu_len); - if (sta->sta.max_tid_amsdu_len[tid]) + if (sta->sta.cur->max_tid_amsdu_len[tid]) max_amsdu_len = min_t(int, max_amsdu_len, - sta->sta.max_tid_amsdu_len[tid]); + sta->sta.cur->max_tid_amsdu_len[tid]); flow_idx = fq_flow_idx(fq, skb); diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index b2b09d421e8b..803de5881485 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -323,16 +323,18 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, */ switch (vht_cap->cap & IEEE80211_VHT_CAP_MAX_MPDU_MASK) { case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454: - link_sta->sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_11454; + link_sta->pub->agg.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_11454; break; case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991: - link_sta->sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_7991; + link_sta->pub->agg.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_7991; break; case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895: default: - link_sta->sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_3895; + link_sta->pub->agg.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_3895; break; } + + ieee80211_sta_recalc_aggregates(&link_sta->sta->sta); } /* FIXME: move this to some better location - parses HE/EHT now */ -- cgit v1.2.3-70-g09d2 From 3d901102922723eedce6ef10ebd03315a7abb8a5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 2 Sep 2022 16:12:56 +0200 Subject: wifi: mac80211: implement link switching Implement an API function and debugfs file to switch active links. Also provide an async version of the API so drivers can call it in arbitrary contexts, e.g. while in the authorized callback. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 41 ++++++++++ net/mac80211/debugfs_netdev.c | 26 +++++++ net/mac80211/ieee80211_i.h | 4 + net/mac80211/iface.c | 12 +++ net/mac80211/key.c | 34 +++++++++ net/mac80211/key.h | 3 + net/mac80211/link.c | 171 ++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 291 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 873e81a45a97..ac2bad57933f 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -7184,4 +7184,45 @@ static inline bool ieee80211_is_tx_data(struct sk_buff *skb) ieee80211_is_data(hdr->frame_control); } +/** + * ieee80211_set_active_links - set active links in client mode + * @vif: interface to set active links on + * @active_links: the new active links bitmap + * + * This changes the active links on an interface. The interface + * must be in client mode (in AP mode, all links are always active), + * and @active_links must be a subset of the vif's valid_links. + * + * If a link is switched off and another is switched on at the same + * time (e.g. active_links going from 0x1 to 0x10) then you will get + * a sequence of calls like + * - change_vif_links(0x11) + * - unassign_vif_chanctx(link_id=0) + * - change_sta_links(0x11) for each affected STA (the AP) + * (TDLS connections on now inactive links should be torn down) + * - remove group keys on the old link (link_id 0) + * - add new group keys (GTK/IGTK/BIGTK) on the new link (link_id 4) + * - change_sta_links(0x10) for each affected STA (the AP) + * - assign_vif_chanctx(link_id=4) + * - change_vif_links(0x10) + * + * Note: This function acquires some mac80211 locks and must not + * be called with any driver locks held that could cause a + * lock dependency inversion. Best call it without locks. + */ +int ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links); + +/** + * ieee80211_set_active_links_async - asynchronously set active links + * @vif: interface to set active links on + * @active_links: the new active links bitmap + * + * See ieee80211_set_active_links() for more information, the only + * difference here is that the link change is triggered async and + * can be called in any context, but the link switch will only be + * completed after it returns. + */ +void ieee80211_set_active_links_async(struct ieee80211_vif *vif, + u16 active_links); + #endif /* MAC80211_H */ diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 1e5b041a5cea..5b014786fd2d 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -570,6 +570,30 @@ static ssize_t ieee80211_if_parse_tsf( } IEEE80211_IF_FILE_RW(tsf); +static ssize_t ieee80211_if_fmt_valid_links(const struct ieee80211_sub_if_data *sdata, + char *buf, int buflen) +{ + return snprintf(buf, buflen, "0x%x\n", sdata->vif.valid_links); +} +IEEE80211_IF_FILE_R(valid_links); + +static ssize_t ieee80211_if_fmt_active_links(const struct ieee80211_sub_if_data *sdata, + char *buf, int buflen) +{ + return snprintf(buf, buflen, "0x%x\n", sdata->vif.active_links); +} + +static ssize_t ieee80211_if_parse_active_links(struct ieee80211_sub_if_data *sdata, + const char *buf, int buflen) +{ + u16 active_links; + + if (kstrtou16(buf, 0, &active_links)) + return -EINVAL; + + return ieee80211_set_active_links(&sdata->vif, active_links) ?: buflen; +} +IEEE80211_IF_FILE_RW(active_links); #ifdef CONFIG_MAC80211_MESH IEEE80211_IF_FILE(estab_plinks, u.mesh.estab_plinks, ATOMIC); @@ -670,6 +694,8 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_ADD_MODE(uapsd_queues, 0600); DEBUGFS_ADD_MODE(uapsd_max_sp_len, 0600); DEBUGFS_ADD_MODE(tdls_wider_bw, 0600); + DEBUGFS_ADD_MODE(valid_links, 0200); + DEBUGFS_ADD_MODE(active_links, 0600); } static void add_ap_files(struct ieee80211_sub_if_data *sdata) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 977aea4467e0..4e1d4c339f2d 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1081,6 +1081,10 @@ struct ieee80211_sub_if_data { struct ieee80211_link_data deflink; struct ieee80211_link_data __rcu *link[IEEE80211_MLD_MAX_NUM_LINKS]; + /* for ieee80211_set_active_links_async() */ + struct work_struct activate_links_work; + u16 desired_active_links; + #ifdef CONFIG_MAC80211_DEBUGFS struct { struct dentry *subdir_stations; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index f99685e2d633..572254366a0f 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -754,6 +754,8 @@ static int ieee80211_stop(struct net_device *dev) ieee80211_stop_mbssid(sdata); } + cancel_work_sync(&sdata->activate_links_work); + wiphy_lock(sdata->local->hw.wiphy); ieee80211_do_stop(sdata, true); wiphy_unlock(sdata->local->hw.wiphy); @@ -1724,6 +1726,15 @@ static void ieee80211_recalc_smps_work(struct work_struct *work) ieee80211_recalc_smps(sdata, &sdata->deflink); } +static void ieee80211_activate_links_work(struct work_struct *work) +{ + struct ieee80211_sub_if_data *sdata = + container_of(work, struct ieee80211_sub_if_data, + activate_links_work); + + ieee80211_set_active_links(&sdata->vif, sdata->desired_active_links); +} + /* * Helper function to initialise an interface to a specific type. */ @@ -1761,6 +1772,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, skb_queue_head_init(&sdata->status_queue); INIT_WORK(&sdata->work, ieee80211_iface_work); INIT_WORK(&sdata->recalc_smps, ieee80211_recalc_smps_work); + INIT_WORK(&sdata->activate_links_work, ieee80211_activate_links_work); switch (type) { case NL80211_IFTYPE_P2P_GO: diff --git a/net/mac80211/key.c b/net/mac80211/key.c index f6f0f65fb255..e8f6c1e5eabf 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -1445,3 +1445,37 @@ void ieee80211_key_replay(struct ieee80211_key_conf *keyconf) } } EXPORT_SYMBOL_GPL(ieee80211_key_replay); + +int ieee80211_key_switch_links(struct ieee80211_sub_if_data *sdata, + unsigned long del_links_mask, + unsigned long add_links_mask) +{ + struct ieee80211_key *key; + int ret; + + list_for_each_entry(key, &sdata->key_list, list) { + if (key->conf.link_id < 0 || + !(del_links_mask & BIT(key->conf.link_id))) + continue; + + /* shouldn't happen for per-link keys */ + WARN_ON(key->sta); + + ieee80211_key_disable_hw_accel(key); + } + + list_for_each_entry(key, &sdata->key_list, list) { + if (key->conf.link_id < 0 || + !(add_links_mask & BIT(key->conf.link_id))) + continue; + + /* shouldn't happen for per-link keys */ + WARN_ON(key->sta); + + ret = ieee80211_key_enable_hw_accel(key); + if (ret) + return ret; + } + + return 0; +} diff --git a/net/mac80211/key.h b/net/mac80211/key.h index 518af24aab56..f3df97df4b72 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -165,6 +165,9 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata, void ieee80211_free_sta_keys(struct ieee80211_local *local, struct sta_info *sta); void ieee80211_reenable_keys(struct ieee80211_sub_if_data *sdata); +int ieee80211_key_switch_links(struct ieee80211_sub_if_data *sdata, + unsigned long del_links_mask, + unsigned long add_links_mask); #define key_mtx_dereference(local, ref) \ rcu_dereference_protected(ref, lockdep_is_held(&((local)->key_mtx))) diff --git a/net/mac80211/link.c b/net/mac80211/link.c index 8df348a5edce..e309708abae8 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -9,6 +9,7 @@ #include #include "ieee80211_i.h" #include "driver-ops.h" +#include "key.h" void ieee80211_link_setup(struct ieee80211_link_data *link) { @@ -300,3 +301,173 @@ void ieee80211_vif_clear_links(struct ieee80211_sub_if_data *sdata) ieee80211_free_links(sdata, links); } + +static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata, + u16 active_links) +{ + struct ieee80211_bss_conf *link_confs[IEEE80211_MLD_MAX_NUM_LINKS]; + struct ieee80211_local *local = sdata->local; + u16 old_active = sdata->vif.active_links; + unsigned long rem = old_active & ~active_links; + unsigned long add = active_links & ~old_active; + struct sta_info *sta; + unsigned int link_id; + int ret, i; + + if (!ieee80211_sdata_running(sdata)) + return -ENETDOWN; + + if (sdata->vif.type != NL80211_IFTYPE_STATION) + return -EINVAL; + + /* cannot activate links that don't exist */ + if (active_links & ~sdata->vif.valid_links) + return -EINVAL; + + /* nothing to do */ + if (old_active == active_links) + return 0; + + for (i = 0; i < IEEE80211_MLD_MAX_NUM_LINKS; i++) + link_confs[i] = sdata_dereference(sdata->vif.link_conf[i], + sdata); + + if (add) { + sdata->vif.active_links |= active_links; + ret = drv_change_vif_links(local, sdata, + old_active, + sdata->vif.active_links, + link_confs); + if (ret) { + sdata->vif.active_links = old_active; + return ret; + } + } + + for_each_set_bit(link_id, &rem, IEEE80211_MLD_MAX_NUM_LINKS) { + struct ieee80211_link_data *link; + + link = sdata_dereference(sdata->link[link_id], sdata); + + /* FIXME: kill TDLS connections on the link */ + + ieee80211_link_release_channel(link); + } + + list_for_each_entry(sta, &local->sta_list, list) { + if (sdata != sta->sdata) + continue; + ret = drv_change_sta_links(local, sdata, &sta->sta, + old_active, + old_active | active_links); + WARN_ON_ONCE(ret); + } + + ret = ieee80211_key_switch_links(sdata, rem, add); + WARN_ON_ONCE(ret); + + list_for_each_entry(sta, &local->sta_list, list) { + if (sdata != sta->sdata) + continue; + ret = drv_change_sta_links(local, sdata, &sta->sta, + old_active | active_links, + active_links); + WARN_ON_ONCE(ret); + } + + for_each_set_bit(link_id, &add, IEEE80211_MLD_MAX_NUM_LINKS) { + struct ieee80211_link_data *link; + + link = sdata_dereference(sdata->link[link_id], sdata); + + ret = ieee80211_link_use_channel(link, &link->conf->chandef, + IEEE80211_CHANCTX_SHARED); + WARN_ON_ONCE(ret); + + ieee80211_link_info_change_notify(sdata, link, + BSS_CHANGED_ERP_CTS_PROT | + BSS_CHANGED_ERP_PREAMBLE | + BSS_CHANGED_ERP_SLOT | + BSS_CHANGED_HT | + BSS_CHANGED_BASIC_RATES | + BSS_CHANGED_BSSID | + BSS_CHANGED_CQM | + BSS_CHANGED_QOS | + BSS_CHANGED_TXPOWER | + BSS_CHANGED_BANDWIDTH | + BSS_CHANGED_TWT | + BSS_CHANGED_HE_OBSS_PD | + BSS_CHANGED_HE_BSS_COLOR); + ieee80211_mgd_set_link_qos_params(link); + } + + old_active = sdata->vif.active_links; + sdata->vif.active_links = active_links; + + if (rem) { + ret = drv_change_vif_links(local, sdata, old_active, + active_links, link_confs); + WARN_ON_ONCE(ret); + } + + return 0; +} + +int ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_local *local = sdata->local; + u16 old_active; + int ret; + + sdata_lock(sdata); + mutex_lock(&local->sta_mtx); + mutex_lock(&local->mtx); + mutex_lock(&local->key_mtx); + old_active = sdata->vif.active_links; + if (old_active & active_links) { + /* + * if there's at least one link that stays active across + * the change then switch to it (to those) first, and + * then enable the additional links + */ + ret = _ieee80211_set_active_links(sdata, + old_active & active_links); + if (!ret) + ret = _ieee80211_set_active_links(sdata, active_links); + } else { + /* otherwise switch directly */ + ret = _ieee80211_set_active_links(sdata, active_links); + } + mutex_unlock(&local->key_mtx); + mutex_unlock(&local->mtx); + mutex_unlock(&local->sta_mtx); + sdata_unlock(sdata); + + return ret; +} +EXPORT_SYMBOL_GPL(ieee80211_set_active_links); + +void ieee80211_set_active_links_async(struct ieee80211_vif *vif, + u16 active_links) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + + if (!ieee80211_sdata_running(sdata)) + return; + + if (sdata->vif.type != NL80211_IFTYPE_STATION) + return; + + /* cannot activate links that don't exist */ + if (active_links & ~sdata->vif.valid_links) + return; + + /* nothing to do */ + if (sdata->vif.active_links == active_links) + return; + + sdata->desired_active_links = active_links; + schedule_work(&sdata->activate_links_work); +} +EXPORT_SYMBOL_GPL(ieee80211_set_active_links_async); -- cgit v1.2.3-70-g09d2 From b338d91703fae6f6afd67f3f75caa3b8f36ddef3 Mon Sep 17 00:00:00 2001 From: Brian Gix Date: Thu, 1 Sep 2022 12:19:13 -0700 Subject: Bluetooth: Implement support for Mesh The patch adds state bits, storage and HCI command chains for sending and receiving Bluetooth Mesh advertising packets, and delivery to requesting user space processes. It specifically creates 4 new MGMT commands and 2 new MGMT events: MGMT_OP_SET_MESH_RECEIVER - Sets passive scan parameters and a list of AD Types which will trigger Mesh Packet Received events MGMT_OP_MESH_READ_FEATURES - Returns information on how many outbound Mesh packets can be simultaneously queued, and what the currently queued handles are. MGMT_OP_MESH_SEND - Command to queue a specific outbound Mesh packet, with the number of times it should be sent, and the BD Addr to use. Discrete advertisments are added to the ADV Instance list. MGMT_OP_MESH_SEND_CANCEL - Command to cancel a prior outbound message request. MGMT_EV_MESH_DEVICE_FOUND - Event to deliver entire received Mesh Advertisement packet, along with timing information. MGMT_EV_MESH_PACKET_CMPLT - Event to indicate that an outbound packet is no longer queued for delivery. Signed-off-by: Brian Gix Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/bluetooth.h | 1 + include/net/bluetooth/hci.h | 3 + include/net/bluetooth/hci_core.h | 16 +- include/net/bluetooth/mgmt.h | 52 +++++ net/bluetooth/hci_core.c | 13 +- net/bluetooth/hci_event.c | 61 +++-- net/bluetooth/hci_sock.c | 1 + net/bluetooth/hci_sync.c | 87 ++++++- net/bluetooth/mgmt.c | 480 +++++++++++++++++++++++++++++++++++++- net/bluetooth/mgmt_util.c | 74 ++++++ net/bluetooth/mgmt_util.h | 18 ++ 11 files changed, 760 insertions(+), 46 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index e72f3b247b5e..bcc5a4cd2c17 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -627,6 +627,7 @@ static inline bool iso_enabled(void) int mgmt_init(void); void mgmt_exit(void); +void mgmt_cleanup(struct sock *sk); void bt_sock_reclassify_lock(struct sock *sk, int proto); diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index cf29511b25a8..b3ade687531f 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -354,6 +354,9 @@ enum { HCI_LE_SIMULTANEOUS_ROLES, HCI_CMD_DRAIN_WORKQUEUE, + HCI_MESH, + HCI_MESH_SENDING, + __HCI_NUM_FLAGS, }; diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 932153e68864..c54bc71254af 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -238,6 +238,7 @@ struct adv_info { bool enabled; bool pending; bool periodic; + __u8 mesh; __u8 instance; __u32 flags; __u16 timeout; @@ -372,6 +373,8 @@ struct hci_dev { __u8 le_resolv_list_size; __u8 le_num_of_adv_sets; __u8 le_states[8]; + __u8 mesh_ad_types[16]; + __u8 mesh_send_ref; __u8 commands[64]; __u8 hci_ver; __u16 hci_rev; @@ -511,6 +514,7 @@ struct hci_dev { struct list_head cmd_sync_work_list; struct mutex cmd_sync_work_lock; struct work_struct cmd_sync_cancel_work; + struct work_struct reenable_adv_work; __u16 discov_timeout; struct delayed_work discov_off; @@ -561,6 +565,7 @@ struct hci_dev { struct hci_conn_hash conn_hash; + struct list_head mesh_pending; struct list_head mgmt_pending; struct list_head reject_list; struct list_head accept_list; @@ -614,6 +619,8 @@ struct hci_dev { struct delayed_work rpa_expired; bdaddr_t rpa; + struct delayed_work mesh_send_done; + enum { INTERLEAVE_SCAN_NONE, INTERLEAVE_SCAN_NO_FILTER, @@ -1576,7 +1583,8 @@ struct adv_info *hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u32 flags, u16 adv_data_len, u8 *adv_data, u16 scan_rsp_len, u8 *scan_rsp_data, u16 timeout, u16 duration, s8 tx_power, - u32 min_interval, u32 max_interval); + u32 min_interval, u32 max_interval, + u8 mesh_handle); struct adv_info *hci_add_per_instance(struct hci_dev *hdev, u8 instance, u32 flags, u8 data_len, u8 *data, u32 min_interval, u32 max_interval); @@ -1997,6 +2005,9 @@ void hci_mgmt_chan_unregister(struct hci_mgmt_chan *c); #define DISCOV_LE_FAST_ADV_INT_MAX 0x00F0 /* 150 msec */ #define DISCOV_LE_PER_ADV_INT_MIN 0x00A0 /* 200 msec */ #define DISCOV_LE_PER_ADV_INT_MAX 0x00A0 /* 200 msec */ +#define DISCOV_LE_ADV_MESH_MIN 0x00A0 /* 100 msec */ +#define DISCOV_LE_ADV_MESH_MAX 0x00A0 /* 100 msec */ +#define INTERVAL_TO_MS(x) (((x) * 10) / 0x10) #define NAME_RESOLVE_DURATION msecs_to_jiffies(10240) /* 10.24 sec */ @@ -2048,7 +2059,8 @@ void mgmt_start_discovery_complete(struct hci_dev *hdev, u8 status); void mgmt_stop_discovery_complete(struct hci_dev *hdev, u8 status); void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 *dev_class, s8 rssi, u32 flags, - u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len); + u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len, + u64 instant); void mgmt_remote_name(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, s8 rssi, u8 *name, u8 name_len); void mgmt_discovering(struct hci_dev *hdev, u8 discovering); diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index 7c1ad0f6fcec..743f6f59dff8 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -837,6 +837,42 @@ struct mgmt_cp_add_adv_patterns_monitor_rssi { struct mgmt_adv_pattern patterns[]; } __packed; #define MGMT_ADD_ADV_PATTERNS_MONITOR_RSSI_SIZE 8 +#define MGMT_OP_SET_MESH_RECEIVER 0x0057 +struct mgmt_cp_set_mesh { + __u8 enable; + __le16 window; + __le16 period; + __u8 num_ad_types; + __u8 ad_types[]; +} __packed; +#define MGMT_SET_MESH_RECEIVER_SIZE 6 + +#define MGMT_OP_MESH_READ_FEATURES 0x0058 +#define MGMT_MESH_READ_FEATURES_SIZE 0 +#define MESH_HANDLES_MAX 3 +struct mgmt_rp_mesh_read_features { + __le16 index; + __u8 max_handles; + __u8 used_handles; + __u8 handles[MESH_HANDLES_MAX]; +} __packed; + +#define MGMT_OP_MESH_SEND 0x0059 +struct mgmt_cp_mesh_send { + struct mgmt_addr_info addr; + __le64 instant; + __le16 delay; + __u8 cnt; + __u8 adv_data_len; + __u8 adv_data[]; +} __packed; +#define MGMT_MESH_SEND_SIZE 19 + +#define MGMT_OP_MESH_SEND_CANCEL 0x005A +struct mgmt_cp_mesh_send_cancel { + __u8 handle; +} __packed; +#define MGMT_MESH_SEND_CANCEL_SIZE 1 #define MGMT_EV_CMD_COMPLETE 0x0001 struct mgmt_ev_cmd_complete { @@ -1120,3 +1156,19 @@ struct mgmt_ev_adv_monitor_device_lost { __le16 monitor_handle; struct mgmt_addr_info addr; } __packed; + +#define MGMT_EV_MESH_DEVICE_FOUND 0x0031 +struct mgmt_ev_mesh_device_found { + struct mgmt_addr_info addr; + __s8 rssi; + __le64 instant; + __le32 flags; + __le16 eir_len; + __u8 eir[]; +} __packed; + + +#define MGMT_EV_MESH_PACKET_CMPLT 0x0032 +struct mgmt_ev_mesh_pkt_cmplt { + __u8 handle; +} __packed; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 9d2c33f6b065..3803e54f23c0 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1706,7 +1706,8 @@ struct adv_info *hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u32 flags, u16 adv_data_len, u8 *adv_data, u16 scan_rsp_len, u8 *scan_rsp_data, u16 timeout, u16 duration, s8 tx_power, - u32 min_interval, u32 max_interval) + u32 min_interval, u32 max_interval, + u8 mesh_handle) { struct adv_info *adv; @@ -1717,7 +1718,7 @@ struct adv_info *hci_add_adv_instance(struct hci_dev *hdev, u8 instance, memset(adv->per_adv_data, 0, sizeof(adv->per_adv_data)); } else { if (hdev->adv_instance_cnt >= hdev->le_num_of_adv_sets || - instance < 1 || instance > hdev->le_num_of_adv_sets) + instance < 1 || instance > hdev->le_num_of_adv_sets + 1) return ERR_PTR(-EOVERFLOW); adv = kzalloc(sizeof(*adv), GFP_KERNEL); @@ -1734,6 +1735,11 @@ struct adv_info *hci_add_adv_instance(struct hci_dev *hdev, u8 instance, adv->min_interval = min_interval; adv->max_interval = max_interval; adv->tx_power = tx_power; + /* Defining a mesh_handle changes the timing units to ms, + * rather than seconds, and ties the instance to the requested + * mesh_tx queue. + */ + adv->mesh = mesh_handle; hci_set_adv_instance_data(hdev, instance, adv_data_len, adv_data, scan_rsp_len, scan_rsp_data); @@ -1762,7 +1768,7 @@ struct adv_info *hci_add_per_instance(struct hci_dev *hdev, u8 instance, adv = hci_add_adv_instance(hdev, instance, flags, 0, NULL, 0, NULL, 0, 0, HCI_ADV_TX_POWER_NO_PREFERENCE, - min_interval, max_interval); + min_interval, max_interval, 0); if (IS_ERR(adv)) return adv; @@ -2486,6 +2492,7 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv) mutex_init(&hdev->lock); mutex_init(&hdev->req_lock); + INIT_LIST_HEAD(&hdev->mesh_pending); INIT_LIST_HEAD(&hdev->mgmt_pending); INIT_LIST_HEAD(&hdev->reject_list); INIT_LIST_HEAD(&hdev->accept_list); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 0ed944aaed94..5acb6fa6d676 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1756,6 +1756,8 @@ static void le_set_scan_enable_complete(struct hci_dev *hdev, u8 enable) hci_dev_set_flag(hdev, HCI_LE_SCAN); if (hdev->le_scan_type == LE_SCAN_ACTIVE) clear_pending_adv_report(hdev); + if (hci_dev_test_flag(hdev, HCI_MESH)) + hci_discovery_set_state(hdev, DISCOVERY_FINDING); break; case LE_SCAN_DISABLE: @@ -1770,7 +1772,7 @@ static void le_set_scan_enable_complete(struct hci_dev *hdev, u8 enable) d->last_adv_addr_type, NULL, d->last_adv_rssi, d->last_adv_flags, d->last_adv_data, - d->last_adv_data_len, NULL, 0); + d->last_adv_data_len, NULL, 0, 0); } /* Cancel this timer so that we don't try to disable scanning @@ -1786,6 +1788,9 @@ static void le_set_scan_enable_complete(struct hci_dev *hdev, u8 enable) */ if (hci_dev_test_and_clear_flag(hdev, HCI_LE_SCAN_INTERRUPTED)) hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + else if (!hci_dev_test_flag(hdev, HCI_LE_ADV) && + hdev->discovery.state == DISCOVERY_FINDING) + queue_work(hdev->workqueue, &hdev->reenable_adv_work); break; @@ -3112,7 +3117,7 @@ static void hci_inquiry_result_evt(struct hci_dev *hdev, void *edata, mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00, info->dev_class, HCI_RSSI_INVALID, - flags, NULL, 0, NULL, 0); + flags, NULL, 0, NULL, 0, 0); } hci_dev_unlock(hdev); @@ -4827,7 +4832,7 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, void *edata, mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00, info->dev_class, info->rssi, - flags, NULL, 0, NULL, 0); + flags, NULL, 0, NULL, 0, 0); } } else if (skb->len == array_size(ev->num, sizeof(struct inquiry_info_rssi))) { @@ -4858,7 +4863,7 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, void *edata, mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00, info->dev_class, info->rssi, - flags, NULL, 0, NULL, 0); + flags, NULL, 0, NULL, 0, 0); } } else { bt_dev_err(hdev, "Malformed HCI Event: 0x%2.2x", @@ -5114,7 +5119,7 @@ static void hci_extended_inquiry_result_evt(struct hci_dev *hdev, void *edata, mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00, info->dev_class, info->rssi, - flags, info->data, eir_len, NULL, 0); + flags, info->data, eir_len, NULL, 0, 0); } hci_dev_unlock(hdev); @@ -6170,7 +6175,7 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev, static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, u8 bdaddr_type, bdaddr_t *direct_addr, u8 direct_addr_type, s8 rssi, u8 *data, u8 len, - bool ext_adv) + bool ext_adv, bool ctl_time, u64 instant) { struct discovery_state *d = &hdev->discovery; struct smp_irk *irk; @@ -6218,7 +6223,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, * important to see if the address is matching the local * controller address. */ - if (direct_addr) { + if (!hci_dev_test_flag(hdev, HCI_MESH) && direct_addr) { direct_addr_type = ev_bdaddr_type(hdev, direct_addr_type, &bdaddr_resolved); @@ -6266,6 +6271,18 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, conn->le_adv_data_len = len; } + if (type == LE_ADV_NONCONN_IND || type == LE_ADV_SCAN_IND) + flags = MGMT_DEV_FOUND_NOT_CONNECTABLE; + else + flags = 0; + + /* All scan results should be sent up for Mesh systems */ + if (hci_dev_test_flag(hdev, HCI_MESH)) { + mgmt_device_found(hdev, bdaddr, LE_LINK, bdaddr_type, NULL, + rssi, flags, data, len, NULL, 0, instant); + return; + } + /* Passive scanning shouldn't trigger any device found events, * except for devices marked as CONN_REPORT for which we do send * device found events, or advertisement monitoring requested. @@ -6279,12 +6296,8 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, idr_is_empty(&hdev->adv_monitors_idr)) return; - if (type == LE_ADV_NONCONN_IND || type == LE_ADV_SCAN_IND) - flags = MGMT_DEV_FOUND_NOT_CONNECTABLE; - else - flags = 0; mgmt_device_found(hdev, bdaddr, LE_LINK, bdaddr_type, NULL, - rssi, flags, data, len, NULL, 0); + rssi, flags, data, len, NULL, 0, 0); return; } @@ -6303,11 +6316,8 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, * and just sends a scan response event, then it is marked as * not connectable as well. */ - if (type == LE_ADV_NONCONN_IND || type == LE_ADV_SCAN_IND || - type == LE_ADV_SCAN_RSP) + if (type == LE_ADV_SCAN_RSP) flags = MGMT_DEV_FOUND_NOT_CONNECTABLE; - else - flags = 0; /* If there's nothing pending either store the data from this * event or send an immediate device found event if the data @@ -6324,7 +6334,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, } mgmt_device_found(hdev, bdaddr, LE_LINK, bdaddr_type, NULL, - rssi, flags, data, len, NULL, 0); + rssi, flags, data, len, NULL, 0, 0); return; } @@ -6343,7 +6353,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, d->last_adv_addr_type, NULL, d->last_adv_rssi, d->last_adv_flags, d->last_adv_data, - d->last_adv_data_len, NULL, 0); + d->last_adv_data_len, NULL, 0, 0); /* If the new report will trigger a SCAN_REQ store it for * later merging. @@ -6360,7 +6370,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, */ clear_pending_adv_report(hdev); mgmt_device_found(hdev, bdaddr, LE_LINK, bdaddr_type, NULL, - rssi, flags, data, len, NULL, 0); + rssi, flags, data, len, NULL, 0, 0); return; } @@ -6370,7 +6380,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, */ mgmt_device_found(hdev, &d->last_adv_addr, LE_LINK, d->last_adv_addr_type, NULL, rssi, d->last_adv_flags, - d->last_adv_data, d->last_adv_data_len, data, len); + d->last_adv_data, d->last_adv_data_len, data, len, 0); clear_pending_adv_report(hdev); } @@ -6378,6 +6388,7 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb) { struct hci_ev_le_advertising_report *ev = data; + u64 instant = jiffies; if (!ev->num) return; @@ -6402,7 +6413,8 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, void *data, rssi = info->data[info->length]; process_adv_report(hdev, info->type, &info->bdaddr, info->bdaddr_type, NULL, 0, rssi, - info->data, info->length, false); + info->data, info->length, false, + false, instant); } else { bt_dev_err(hdev, "Dropping invalid advertising data"); } @@ -6459,6 +6471,7 @@ static void hci_le_ext_adv_report_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb) { struct hci_ev_le_ext_adv_report *ev = data; + u64 instant = jiffies; if (!ev->num) return; @@ -6485,7 +6498,8 @@ static void hci_le_ext_adv_report_evt(struct hci_dev *hdev, void *data, process_adv_report(hdev, legacy_evt_type, &info->bdaddr, info->bdaddr_type, NULL, 0, info->rssi, info->data, info->length, - !(evt_type & LE_EXT_ADV_LEGACY_PDU)); + !(evt_type & LE_EXT_ADV_LEGACY_PDU), + false, instant); } } @@ -6708,6 +6722,7 @@ static void hci_le_direct_adv_report_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb) { struct hci_ev_le_direct_adv_report *ev = data; + u64 instant = jiffies; int i; if (!hci_le_ev_skb_pull(hdev, skb, HCI_EV_LE_DIRECT_ADV_REPORT, @@ -6725,7 +6740,7 @@ static void hci_le_direct_adv_report_evt(struct hci_dev *hdev, void *data, process_adv_report(hdev, info->type, &info->bdaddr, info->bdaddr_type, &info->direct_addr, info->direct_addr_type, info->rssi, NULL, 0, - false); + false, false, instant); } hci_dev_unlock(hdev); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 0d015d4a8e41..b2a33a05c93e 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -2065,6 +2065,7 @@ static int hci_sock_getsockopt(struct socket *sock, int level, int optname, static void hci_sock_destruct(struct sock *sk) { + mgmt_cleanup(sk); skb_queue_purge(&sk->sk_receive_queue); skb_queue_purge(&sk->sk_write_queue); } diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index e08c0503027d..fa433896ddc7 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -246,7 +246,7 @@ int __hci_cmd_sync_status_sk(struct hci_dev *hdev, u16 opcode, u32 plen, skb = __hci_cmd_sync_sk(hdev, opcode, plen, param, event, timeout, sk); if (IS_ERR(skb)) { bt_dev_err(hdev, "Opcode 0x%4x failed: %ld", opcode, - PTR_ERR(skb)); + PTR_ERR(skb)); return PTR_ERR(skb); } @@ -465,6 +465,48 @@ unlock: hci_dev_unlock(hdev); } +static int reenable_adv_sync(struct hci_dev *hdev, void *data) +{ + bt_dev_dbg(hdev, ""); + + if (!hci_dev_test_flag(hdev, HCI_ADVERTISING) && + list_empty(&hdev->adv_instances)) + return 0; + + if (hdev->cur_adv_instance) { + return hci_schedule_adv_instance_sync(hdev, + hdev->cur_adv_instance, + true); + } else { + if (ext_adv_capable(hdev)) { + hci_start_ext_adv_sync(hdev, 0x00); + } else { + hci_update_adv_data_sync(hdev, 0x00); + hci_update_scan_rsp_data_sync(hdev, 0x00); + hci_enable_advertising_sync(hdev); + } + } + + return 0; +} + +static void reenable_adv(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + reenable_adv_work); + int status; + + bt_dev_dbg(hdev, ""); + + hci_dev_lock(hdev); + + status = hci_cmd_sync_queue(hdev, reenable_adv_sync, NULL, NULL); + if (status) + bt_dev_err(hdev, "failed to reenable ADV: %d", status); + + hci_dev_unlock(hdev); +} + static void cancel_adv_timeout(struct hci_dev *hdev) { if (hdev->adv_instance_timeout) { @@ -587,6 +629,7 @@ void hci_cmd_sync_init(struct hci_dev *hdev) mutex_init(&hdev->cmd_sync_work_lock); INIT_WORK(&hdev->cmd_sync_cancel_work, hci_cmd_sync_cancel_work); + INIT_WORK(&hdev->reenable_adv_work, reenable_adv); INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable); INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart); INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire); @@ -597,6 +640,7 @@ void hci_cmd_sync_clear(struct hci_dev *hdev) struct hci_cmd_sync_work_entry *entry, *tmp; cancel_work_sync(&hdev->cmd_sync_work); + cancel_work_sync(&hdev->reenable_adv_work); list_for_each_entry_safe(entry, tmp, &hdev->cmd_sync_work_list, list) { if (entry->destroy) @@ -1746,10 +1790,13 @@ static int hci_clear_adv_sets_sync(struct hci_dev *hdev, struct sock *sk) static int hci_clear_adv_sync(struct hci_dev *hdev, struct sock *sk, bool force) { struct adv_info *adv, *n; + int err = 0; if (ext_adv_capable(hdev)) /* Remove all existing sets */ - return hci_clear_adv_sets_sync(hdev, sk); + err = hci_clear_adv_sets_sync(hdev, sk); + if (ext_adv_capable(hdev)) + return err; /* This is safe as long as there is no command send while the lock is * held. @@ -1777,11 +1824,13 @@ static int hci_clear_adv_sync(struct hci_dev *hdev, struct sock *sk, bool force) static int hci_remove_adv_sync(struct hci_dev *hdev, u8 instance, struct sock *sk) { - int err; + int err = 0; /* If we use extended advertising, instance has to be removed first. */ if (ext_adv_capable(hdev)) - return hci_remove_ext_adv_instance_sync(hdev, instance, sk); + err = hci_remove_ext_adv_instance_sync(hdev, instance, sk); + if (ext_adv_capable(hdev)) + return err; /* This is safe as long as there is no command send while the lock is * held. @@ -1880,13 +1929,16 @@ int hci_read_tx_power_sync(struct hci_dev *hdev, __le16 handle, u8 type) int hci_disable_advertising_sync(struct hci_dev *hdev) { u8 enable = 0x00; + int err = 0; /* If controller is not advertising we are done. */ if (!hci_dev_test_flag(hdev, HCI_LE_ADV)) return 0; if (ext_adv_capable(hdev)) - return hci_disable_ext_adv_instance_sync(hdev, 0x00); + err = hci_disable_ext_adv_instance_sync(hdev, 0x00); + if (ext_adv_capable(hdev)) + return err; return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable, HCI_CMD_TIMEOUT); @@ -1899,7 +1951,11 @@ static int hci_le_set_ext_scan_enable_sync(struct hci_dev *hdev, u8 val, memset(&cp, 0, sizeof(cp)); cp.enable = val; - cp.filter_dup = filter_dup; + + if (hci_dev_test_flag(hdev, HCI_MESH)) + cp.filter_dup = LE_SCAN_FILTER_DUP_DISABLE; + else + cp.filter_dup = filter_dup; return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_SCAN_ENABLE, sizeof(cp), &cp, HCI_CMD_TIMEOUT); @@ -1915,7 +1971,11 @@ static int hci_le_set_scan_enable_sync(struct hci_dev *hdev, u8 val, memset(&cp, 0, sizeof(cp)); cp.enable = val; - cp.filter_dup = filter_dup; + + if (val && hci_dev_test_flag(hdev, HCI_MESH)) + cp.filter_dup = LE_SCAN_FILTER_DUP_DISABLE; + else + cp.filter_dup = filter_dup; return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp, HCI_CMD_TIMEOUT); @@ -2554,6 +2614,7 @@ static int hci_passive_scan_sync(struct hci_dev *hdev) u8 own_addr_type; u8 filter_policy; u16 window, interval; + u8 filter_dups = LE_SCAN_FILTER_DUP_ENABLE; int err; if (hdev->scanning_paused) { @@ -2616,11 +2677,16 @@ static int hci_passive_scan_sync(struct hci_dev *hdev) interval = hdev->le_scan_interval; } + /* Disable all filtering for Mesh */ + if (hci_dev_test_flag(hdev, HCI_MESH)) { + filter_policy = 0; + filter_dups = LE_SCAN_FILTER_DUP_DISABLE; + } + bt_dev_dbg(hdev, "LE passive scan with acceptlist = %d", filter_policy); return hci_start_scan_sync(hdev, LE_SCAN_PASSIVE, interval, window, - own_addr_type, filter_policy, - LE_SCAN_FILTER_DUP_ENABLE); + own_addr_type, filter_policy, filter_dups); } /* This function controls the passive scanning based on hdev->pend_le_conns @@ -2670,7 +2736,8 @@ int hci_update_passive_scan_sync(struct hci_dev *hdev) bt_dev_dbg(hdev, "ADV monitoring is %s", hci_is_adv_monitoring(hdev) ? "on" : "off"); - if (list_empty(&hdev->pend_le_conns) && + if (!hci_dev_test_flag(hdev, HCI_MESH) && + list_empty(&hdev->pend_le_conns) && list_empty(&hdev->pend_le_reports) && !hci_is_adv_monitoring(hdev) && !hci_dev_test_flag(hdev, HCI_PA_SYNC)) { diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 8d70f4a709d4..e1c404ac8ce6 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -129,6 +129,10 @@ static const u16 mgmt_commands[] = { MGMT_OP_ADD_EXT_ADV_PARAMS, MGMT_OP_ADD_EXT_ADV_DATA, MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI, + MGMT_OP_SET_MESH_RECEIVER, + MGMT_OP_MESH_READ_FEATURES, + MGMT_OP_MESH_SEND, + MGMT_OP_MESH_SEND_CANCEL, }; static const u16 mgmt_events[] = { @@ -1048,6 +1052,63 @@ static void discov_off(struct work_struct *work) hci_dev_unlock(hdev); } +static int send_settings_rsp(struct sock *sk, u16 opcode, struct hci_dev *hdev); + +static void mesh_send_complete(struct hci_dev *hdev, + struct mgmt_mesh_tx *mesh_tx, bool silent) +{ + u8 handle = mesh_tx->handle; + + if (!silent) + mgmt_event(MGMT_EV_MESH_PACKET_CMPLT, hdev, &handle, + sizeof(handle), NULL); + + mgmt_mesh_remove(mesh_tx); +} + +static int mesh_send_done_sync(struct hci_dev *hdev, void *data) +{ + struct mgmt_mesh_tx *mesh_tx; + + hci_dev_clear_flag(hdev, HCI_MESH_SENDING); + hci_disable_advertising_sync(hdev); + mesh_tx = mgmt_mesh_next(hdev, NULL); + + if (mesh_tx) + mesh_send_complete(hdev, mesh_tx, false); + + return 0; +} + +static int mesh_send_sync(struct hci_dev *hdev, void *data); +static void mesh_send_start_complete(struct hci_dev *hdev, void *data, int err); +static void mesh_next(struct hci_dev *hdev, void *data, int err) +{ + struct mgmt_mesh_tx *mesh_tx = mgmt_mesh_next(hdev, NULL); + + if (!mesh_tx) + return; + + err = hci_cmd_sync_queue(hdev, mesh_send_sync, mesh_tx, + mesh_send_start_complete); + + if (err < 0) + mesh_send_complete(hdev, mesh_tx, false); + else + hci_dev_set_flag(hdev, HCI_MESH_SENDING); +} + +static void mesh_send_done(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + mesh_send_done.work); + + if (!hci_dev_test_flag(hdev, HCI_MESH_SENDING)) + return; + + hci_cmd_sync_queue(hdev, mesh_send_done_sync, NULL, mesh_next); +} + static void mgmt_init_hdev(struct sock *sk, struct hci_dev *hdev) { if (hci_dev_test_and_set_flag(hdev, HCI_MGMT)) @@ -1058,6 +1119,7 @@ static void mgmt_init_hdev(struct sock *sk, struct hci_dev *hdev) INIT_DELAYED_WORK(&hdev->discov_off, discov_off); INIT_DELAYED_WORK(&hdev->service_cache, service_cache_off); INIT_DELAYED_WORK(&hdev->rpa_expired, rpa_expired); + INIT_DELAYED_WORK(&hdev->mesh_send_done, mesh_send_done); /* Non-mgmt controlled devices get this bit set * implicitly so that pairing works for them, however @@ -2094,6 +2156,306 @@ static int set_le_sync(struct hci_dev *hdev, void *data) return err; } +static void set_mesh_complete(struct hci_dev *hdev, void *data, int err) +{ + struct mgmt_pending_cmd *cmd = data; + u8 status = mgmt_status(err); + struct sock *sk = cmd->sk; + + if (status) { + mgmt_pending_foreach(MGMT_OP_SET_MESH_RECEIVER, hdev, + cmd_status_rsp, &status); + return; + } + + mgmt_pending_remove(cmd); + mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, 0, NULL, 0); +} + +static int set_mesh_sync(struct hci_dev *hdev, void *data) +{ + struct mgmt_pending_cmd *cmd = data; + struct mgmt_cp_set_mesh *cp = cmd->param; + size_t len = cmd->param_len; + + memset(hdev->mesh_ad_types, 0, sizeof(hdev->mesh_ad_types)); + + if (cp->enable) + hci_dev_set_flag(hdev, HCI_MESH); + else + hci_dev_clear_flag(hdev, HCI_MESH); + + len -= sizeof(*cp); + + /* If filters don't fit, forward all adv pkts */ + if (len <= sizeof(hdev->mesh_ad_types)) + memcpy(hdev->mesh_ad_types, cp->ad_types, len); + + hci_update_passive_scan_sync(hdev); + return 0; +} + +static int set_mesh(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) +{ + struct mgmt_cp_set_mesh *cp = data; + struct mgmt_pending_cmd *cmd; + int err = 0; + + bt_dev_dbg(hdev, "sock %p", sk); + + if (!lmp_le_capable(hdev)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, + MGMT_STATUS_NOT_SUPPORTED); + + if (cp->enable != 0x00 && cp->enable != 0x01) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, + MGMT_STATUS_INVALID_PARAMS); + + hci_dev_lock(hdev); + + cmd = mgmt_pending_add(sk, MGMT_OP_SET_MESH_RECEIVER, hdev, data, len); + if (!cmd) + err = -ENOMEM; + else + err = hci_cmd_sync_queue(hdev, set_mesh_sync, cmd, + set_mesh_complete); + + if (err < 0) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, + MGMT_STATUS_FAILED); + + if (cmd) + mgmt_pending_remove(cmd); + } + + hci_dev_unlock(hdev); + return err; +} + +static void mesh_send_start_complete(struct hci_dev *hdev, void *data, int err) +{ + struct mgmt_mesh_tx *mesh_tx = data; + struct mgmt_cp_mesh_send *send = (void *)mesh_tx->param; + unsigned long mesh_send_interval; + u8 mgmt_err = mgmt_status(err); + + /* Report any errors here, but don't report completion */ + + if (mgmt_err) { + hci_dev_clear_flag(hdev, HCI_MESH_SENDING); + /* Send Complete Error Code for handle */ + mesh_send_complete(hdev, mesh_tx, false); + return; + } + + mesh_send_interval = msecs_to_jiffies((send->cnt) * 25); + queue_delayed_work(hdev->req_workqueue, &hdev->mesh_send_done, + mesh_send_interval); +} + +static int mesh_send_sync(struct hci_dev *hdev, void *data) +{ + struct mgmt_mesh_tx *mesh_tx = data; + struct mgmt_cp_mesh_send *send = (void *)mesh_tx->param; + struct adv_info *adv, *next_instance; + u8 instance = hdev->le_num_of_adv_sets + 1; + u16 timeout, duration; + int err = 0; + + if (hdev->le_num_of_adv_sets <= hdev->adv_instance_cnt) + return MGMT_STATUS_BUSY; + + timeout = 1000; + duration = send->cnt * INTERVAL_TO_MS(hdev->le_adv_max_interval); + adv = hci_add_adv_instance(hdev, instance, 0, + send->adv_data_len, send->adv_data, + 0, NULL, + timeout, duration, + HCI_ADV_TX_POWER_NO_PREFERENCE, + hdev->le_adv_min_interval, + hdev->le_adv_max_interval, + mesh_tx->handle); + + if (!IS_ERR(adv)) + mesh_tx->instance = instance; + else + err = PTR_ERR(adv); + + if (hdev->cur_adv_instance == instance) { + /* If the currently advertised instance is being changed then + * cancel the current advertising and schedule the next + * instance. If there is only one instance then the overridden + * advertising data will be visible right away. + */ + cancel_adv_timeout(hdev); + + next_instance = hci_get_next_instance(hdev, instance); + if (next_instance) + instance = next_instance->instance; + else + instance = 0; + } else if (hdev->adv_instance_timeout) { + /* Immediately advertise the new instance if no other, or + * let it go naturally from queue if ADV is already happening + */ + instance = 0; + } + + if (instance) + return hci_schedule_adv_instance_sync(hdev, instance, true); + + return err; +} + +static void send_count(struct mgmt_mesh_tx *mesh_tx, void *data) +{ + struct mgmt_rp_mesh_read_features *rp = data; + + if (rp->used_handles >= rp->max_handles) + return; + + rp->handles[rp->used_handles++] = mesh_tx->handle; +} + +static int mesh_features(struct sock *sk, struct hci_dev *hdev, + void *data, u16 len) +{ + struct mgmt_rp_mesh_read_features rp; + + if (!lmp_le_capable(hdev)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_READ_FEATURES, + MGMT_STATUS_NOT_SUPPORTED); + + memset(&rp, 0, sizeof(rp)); + rp.index = cpu_to_le16(hdev->id); + if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) + rp.max_handles = MESH_HANDLES_MAX; + + hci_dev_lock(hdev); + + if (rp.max_handles) + mgmt_mesh_foreach(hdev, send_count, &rp, sk); + + mgmt_cmd_complete(sk, hdev->id, MGMT_OP_MESH_READ_FEATURES, 0, &rp, + rp.used_handles + sizeof(rp) - MESH_HANDLES_MAX); + + hci_dev_unlock(hdev); + return 0; +} + +static int send_cancel(struct hci_dev *hdev, void *data) +{ + struct mgmt_pending_cmd *cmd = data; + struct mgmt_cp_mesh_send_cancel *cancel = (void *)cmd->param; + struct mgmt_mesh_tx *mesh_tx; + + if (!cancel->handle) { + do { + mesh_tx = mgmt_mesh_next(hdev, cmd->sk); + + if (mesh_tx) + mesh_send_complete(hdev, mesh_tx, false); + } while (mesh_tx); + } else { + mesh_tx = mgmt_mesh_find(hdev, cancel->handle); + + if (mesh_tx && mesh_tx->sk == cmd->sk) + mesh_send_complete(hdev, mesh_tx, false); + } + + mgmt_cmd_complete(cmd->sk, hdev->id, MGMT_OP_MESH_SEND_CANCEL, + 0, NULL, 0); + mgmt_pending_free(cmd); + + return 0; +} + +static int mesh_send_cancel(struct sock *sk, struct hci_dev *hdev, + void *data, u16 len) +{ + struct mgmt_pending_cmd *cmd; + int err; + + if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND_CANCEL, + MGMT_STATUS_REJECTED); + + hci_dev_lock(hdev); + cmd = mgmt_pending_new(sk, MGMT_OP_MESH_SEND_CANCEL, hdev, data, len); + if (!cmd) + err = -ENOMEM; + else + err = hci_cmd_sync_queue(hdev, send_cancel, cmd, NULL); + + if (err < 0) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND_CANCEL, + MGMT_STATUS_FAILED); + + if (cmd) + mgmt_pending_free(cmd); + } + + hci_dev_unlock(hdev); + return err; +} + +static int mesh_send(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) +{ + struct mgmt_mesh_tx *mesh_tx; + struct mgmt_cp_mesh_send *send = data; + struct mgmt_rp_mesh_read_features rp; + bool sending; + int err = 0; + + if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED) || + len <= MGMT_MESH_SEND_SIZE || + len > (MGMT_MESH_SEND_SIZE + 31)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND, + MGMT_STATUS_REJECTED); + + hci_dev_lock(hdev); + + memset(&rp, 0, sizeof(rp)); + rp.max_handles = MESH_HANDLES_MAX; + + mgmt_mesh_foreach(hdev, send_count, &rp, sk); + + if (rp.max_handles <= rp.used_handles) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND, + MGMT_STATUS_BUSY); + goto done; + } + + sending = hci_dev_test_flag(hdev, HCI_MESH_SENDING); + mesh_tx = mgmt_mesh_add(sk, hdev, send, len); + + if (!mesh_tx) + err = -ENOMEM; + else if (!sending) + err = hci_cmd_sync_queue(hdev, mesh_send_sync, mesh_tx, + mesh_send_start_complete); + + if (err < 0) { + bt_dev_err(hdev, "Send Mesh Failed %d", err); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND, + MGMT_STATUS_FAILED); + + if (mesh_tx) { + if (sending) + mgmt_mesh_remove(mesh_tx); + } + } else { + hci_dev_set_flag(hdev, HCI_MESH_SENDING); + + mgmt_cmd_complete(sk, hdev->id, MGMT_OP_MESH_SEND, 0, + &mesh_tx->handle, 1); + } + +done: + hci_dev_unlock(hdev); + return err; +} + static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; @@ -5993,6 +6355,7 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data, if (!hdev_is_powered(hdev) || (val == hci_dev_test_flag(hdev, HCI_ADVERTISING) && (cp->val == 0x02) == hci_dev_test_flag(hdev, HCI_ADVERTISING_CONNECTABLE)) || + hci_dev_test_flag(hdev, HCI_MESH) || hci_conn_num(hdev, LE_LINK) > 0 || (hci_dev_test_flag(hdev, HCI_LE_SCAN) && hdev->le_scan_type == LE_SCAN_ACTIVE)) { @@ -7921,8 +8284,7 @@ static u32 get_supported_adv_flags(struct hci_dev *hdev) /* In extended adv TX_POWER returned from Set Adv Param * will be always valid. */ - if ((hdev->adv_tx_power != HCI_TX_POWER_INVALID) || - ext_adv_capable(hdev)) + if (hdev->adv_tx_power != HCI_TX_POWER_INVALID || ext_adv_capable(hdev)) flags |= MGMT_ADV_FLAG_TX_POWER; if (ext_adv_capable(hdev)) { @@ -7975,8 +8337,14 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev, instance = rp->instance; list_for_each_entry(adv_instance, &hdev->adv_instances, list) { - *instance = adv_instance->instance; - instance++; + /* Only instances 1-le_num_of_adv_sets are externally visible */ + if (adv_instance->instance <= hdev->adv_instance_cnt) { + *instance = adv_instance->instance; + instance++; + } else { + rp->num_instances--; + rp_len--; + } } hci_dev_unlock(hdev); @@ -8238,7 +8606,7 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, timeout, duration, HCI_ADV_TX_POWER_NO_PREFERENCE, hdev->le_adv_min_interval, - hdev->le_adv_max_interval); + hdev->le_adv_max_interval, 0); if (IS_ERR(adv)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, MGMT_STATUS_FAILED); @@ -8442,7 +8810,7 @@ static int add_ext_adv_params(struct sock *sk, struct hci_dev *hdev, /* Create advertising instance with no advertising or response data */ adv = hci_add_adv_instance(hdev, cp->instance, flags, 0, NULL, 0, NULL, timeout, duration, tx_power, min_interval, - max_interval); + max_interval, 0); if (IS_ERR(adv)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_PARAMS, @@ -8888,8 +9256,13 @@ static const struct hci_mgmt_handler mgmt_handlers[] = { { add_ext_adv_data, MGMT_ADD_EXT_ADV_DATA_SIZE, HCI_MGMT_VAR_LEN }, { add_adv_patterns_monitor_rssi, - MGMT_ADD_ADV_PATTERNS_MONITOR_RSSI_SIZE, + MGMT_ADD_ADV_PATTERNS_MONITOR_RSSI_SIZE }, + { set_mesh, MGMT_SET_MESH_RECEIVER_SIZE, HCI_MGMT_VAR_LEN }, + { mesh_features, MGMT_MESH_READ_FEATURES_SIZE }, + { mesh_send, MGMT_MESH_SEND_SIZE, + HCI_MGMT_VAR_LEN }, + { mesh_send_cancel, MGMT_MESH_SEND_CANCEL_SIZE }, }; void mgmt_index_added(struct hci_dev *hdev) @@ -9829,14 +10202,86 @@ static void mgmt_adv_monitor_device_found(struct hci_dev *hdev, kfree_skb(skb); } +static void mesh_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, + u8 addr_type, s8 rssi, u32 flags, u8 *eir, + u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len, + u64 instant) +{ + struct sk_buff *skb; + struct mgmt_ev_mesh_device_found *ev; + int i, j; + + if (!hdev->mesh_ad_types[0]) + goto accepted; + + /* Scan for requested AD types */ + if (eir_len > 0) { + for (i = 0; i + 1 < eir_len; i += eir[i] + 1) { + for (j = 0; j < sizeof(hdev->mesh_ad_types); j++) { + if (!hdev->mesh_ad_types[j]) + break; + + if (hdev->mesh_ad_types[j] == eir[i + 1]) + goto accepted; + } + } + } + + if (scan_rsp_len > 0) { + for (i = 0; i + 1 < scan_rsp_len; i += scan_rsp[i] + 1) { + for (j = 0; j < sizeof(hdev->mesh_ad_types); j++) { + if (!hdev->mesh_ad_types[j]) + break; + + if (hdev->mesh_ad_types[j] == scan_rsp[i + 1]) + goto accepted; + } + } + } + + return; + +accepted: + skb = mgmt_alloc_skb(hdev, MGMT_EV_MESH_DEVICE_FOUND, + sizeof(*ev) + eir_len + scan_rsp_len); + if (!skb) + return; + + ev = skb_put(skb, sizeof(*ev)); + + bacpy(&ev->addr.bdaddr, bdaddr); + ev->addr.type = link_to_bdaddr(LE_LINK, addr_type); + ev->rssi = rssi; + ev->flags = cpu_to_le32(flags); + ev->instant = cpu_to_le64(instant); + + if (eir_len > 0) + /* Copy EIR or advertising data into event */ + skb_put_data(skb, eir, eir_len); + + if (scan_rsp_len > 0) + /* Append scan response data to event */ + skb_put_data(skb, scan_rsp, scan_rsp_len); + + ev->eir_len = cpu_to_le16(eir_len + scan_rsp_len); + + mgmt_event_skb(skb, NULL); +} + void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 *dev_class, s8 rssi, u32 flags, - u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len) + u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len, + u64 instant) { struct sk_buff *skb; struct mgmt_ev_device_found *ev; bool report_device = hci_discovery_active(hdev); + if (hci_dev_test_flag(hdev, HCI_MESH) && link_type == LE_LINK) + mesh_device_found(hdev, bdaddr, addr_type, rssi, flags, + eir, eir_len, scan_rsp, scan_rsp_len, + instant); + /* Don't send events for a non-kernel initiated discovery. With * LE one exception is if we have pend_le_reports > 0 in which * case we're doing passive scanning and want these events. @@ -9995,3 +10440,22 @@ void mgmt_exit(void) { hci_mgmt_chan_unregister(&chan); } + +void mgmt_cleanup(struct sock *sk) +{ + struct mgmt_mesh_tx *mesh_tx; + struct hci_dev *hdev; + + read_lock(&hci_dev_list_lock); + + list_for_each_entry(hdev, &hci_dev_list, list) { + do { + mesh_tx = mgmt_mesh_next(hdev, sk); + + if (mesh_tx) + mesh_send_complete(hdev, mesh_tx, true); + } while (mesh_tx); + } + + read_unlock(&hci_dev_list_lock); +} diff --git a/net/bluetooth/mgmt_util.c b/net/bluetooth/mgmt_util.c index b69cfed62088..0115f783bde8 100644 --- a/net/bluetooth/mgmt_util.c +++ b/net/bluetooth/mgmt_util.c @@ -314,3 +314,77 @@ void mgmt_pending_remove(struct mgmt_pending_cmd *cmd) list_del(&cmd->list); mgmt_pending_free(cmd); } + +void mgmt_mesh_foreach(struct hci_dev *hdev, + void (*cb)(struct mgmt_mesh_tx *mesh_tx, void *data), + void *data, struct sock *sk) +{ + struct mgmt_mesh_tx *mesh_tx, *tmp; + + list_for_each_entry_safe(mesh_tx, tmp, &hdev->mgmt_pending, list) { + if (!sk || mesh_tx->sk == sk) + cb(mesh_tx, data); + } +} + +struct mgmt_mesh_tx *mgmt_mesh_next(struct hci_dev *hdev, struct sock *sk) +{ + struct mgmt_mesh_tx *mesh_tx; + + if (list_empty(&hdev->mesh_pending)) + return NULL; + + list_for_each_entry(mesh_tx, &hdev->mesh_pending, list) { + if (!sk || mesh_tx->sk == sk) + return mesh_tx; + } + + return NULL; +} + +struct mgmt_mesh_tx *mgmt_mesh_find(struct hci_dev *hdev, u8 handle) +{ + struct mgmt_mesh_tx *mesh_tx; + + if (list_empty(&hdev->mesh_pending)) + return NULL; + + list_for_each_entry(mesh_tx, &hdev->mesh_pending, list) { + if (mesh_tx->handle == handle) + return mesh_tx; + } + + return NULL; +} + +struct mgmt_mesh_tx *mgmt_mesh_add(struct sock *sk, struct hci_dev *hdev, + void *data, u16 len) +{ + struct mgmt_mesh_tx *mesh_tx; + + mesh_tx = kzalloc(sizeof(*mesh_tx), GFP_KERNEL); + if (!mesh_tx) + return NULL; + + hdev->mesh_send_ref++; + if (!hdev->mesh_send_ref) + hdev->mesh_send_ref++; + + mesh_tx->handle = hdev->mesh_send_ref; + mesh_tx->index = hdev->id; + memcpy(mesh_tx->param, data, len); + mesh_tx->param_len = len; + mesh_tx->sk = sk; + sock_hold(sk); + + list_add_tail(&mesh_tx->list, &hdev->mesh_pending); + + return mesh_tx; +} + +void mgmt_mesh_remove(struct mgmt_mesh_tx *mesh_tx) +{ + list_del(&mesh_tx->list); + sock_put(mesh_tx->sk); + kfree(mesh_tx); +} diff --git a/net/bluetooth/mgmt_util.h b/net/bluetooth/mgmt_util.h index 98e40395a383..6a8b7e84293d 100644 --- a/net/bluetooth/mgmt_util.h +++ b/net/bluetooth/mgmt_util.h @@ -20,6 +20,16 @@ SOFTWARE IS DISCLAIMED. */ +struct mgmt_mesh_tx { + struct list_head list; + int index; + size_t param_len; + struct sock *sk; + u8 handle; + u8 instance; + u8 param[sizeof(struct mgmt_cp_mesh_send) + 29]; +}; + struct mgmt_pending_cmd { struct list_head list; u16 opcode; @@ -59,3 +69,11 @@ struct mgmt_pending_cmd *mgmt_pending_new(struct sock *sk, u16 opcode, void *data, u16 len); void mgmt_pending_free(struct mgmt_pending_cmd *cmd); void mgmt_pending_remove(struct mgmt_pending_cmd *cmd); +void mgmt_mesh_foreach(struct hci_dev *hdev, + void (*cb)(struct mgmt_mesh_tx *mesh_tx, void *data), + void *data, struct sock *sk); +struct mgmt_mesh_tx *mgmt_mesh_find(struct hci_dev *hdev, u8 handle); +struct mgmt_mesh_tx *mgmt_mesh_next(struct hci_dev *hdev, struct sock *sk); +struct mgmt_mesh_tx *mgmt_mesh_add(struct sock *sk, struct hci_dev *hdev, + void *data, u16 len); +void mgmt_mesh_remove(struct mgmt_mesh_tx *mesh_tx); -- cgit v1.2.3-70-g09d2 From af6bcc1921ff0b644d2d750c0e3a88623b7211f5 Mon Sep 17 00:00:00 2001 From: Brian Gix Date: Thu, 1 Sep 2022 12:19:14 -0700 Subject: Bluetooth: Add experimental wrapper for MGMT based mesh This introduces a "Mesh UUID" and an Experimental Feature bit to the hdev mask, and depending all underlying Mesh functionality on it. Signed-off-by: Brian Gix Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 1 + net/bluetooth/mgmt.c | 112 ++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 105 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index b3ade687531f..e004ba04a9ae 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -354,6 +354,7 @@ enum { HCI_LE_SIMULTANEOUS_ROLES, HCI_CMD_DRAIN_WORKQUEUE, + HCI_MESH_EXPERIMENTAL, HCI_MESH, HCI_MESH_SENDING, diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index e1c404ac8ce6..4c421ebac669 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2203,7 +2203,8 @@ static int set_mesh(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) bt_dev_dbg(hdev, "sock %p", sk); - if (!lmp_le_capable(hdev)) + if (!lmp_le_capable(hdev) || + !hci_dev_test_flag(hdev, HCI_MESH_EXPERIMENTAL)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, MGMT_STATUS_NOT_SUPPORTED); @@ -2322,7 +2323,8 @@ static int mesh_features(struct sock *sk, struct hci_dev *hdev, { struct mgmt_rp_mesh_read_features rp; - if (!lmp_le_capable(hdev)) + if (!lmp_le_capable(hdev) || + !hci_dev_test_flag(hdev, HCI_MESH_EXPERIMENTAL)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_READ_FEATURES, MGMT_STATUS_NOT_SUPPORTED); @@ -2376,6 +2378,11 @@ static int mesh_send_cancel(struct sock *sk, struct hci_dev *hdev, struct mgmt_pending_cmd *cmd; int err; + if (!lmp_le_capable(hdev) || + !hci_dev_test_flag(hdev, HCI_MESH_EXPERIMENTAL)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND_CANCEL, + MGMT_STATUS_NOT_SUPPORTED); + if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND_CANCEL, MGMT_STATUS_REJECTED); @@ -2407,6 +2414,10 @@ static int mesh_send(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) bool sending; int err = 0; + if (!lmp_le_capable(hdev) || + !hci_dev_test_flag(hdev, HCI_MESH_EXPERIMENTAL)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND, + MGMT_STATUS_NOT_SUPPORTED); if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED) || len <= MGMT_MESH_SEND_SIZE || len > (MGMT_MESH_SEND_SIZE + 31)) @@ -4365,17 +4376,30 @@ static const u8 iso_socket_uuid[16] = { 0x6a, 0x49, 0xe0, 0x05, 0x88, 0xf1, 0xba, 0x6f, }; +/* 2ce463d7-7a03-4d8d-bf05-5f24e8f36e76 */ +static const u8 mgmt_mesh_uuid[16] = { + 0x76, 0x6e, 0xf3, 0xe8, 0x24, 0x5f, 0x05, 0xbf, + 0x8d, 0x4d, 0x03, 0x7a, 0xd7, 0x63, 0xe4, 0x2c, +}; + static int read_exp_features_info(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { - char buf[122]; /* Enough space for 6 features: 2 + 20 * 6 */ - struct mgmt_rp_read_exp_features_info *rp = (void *)buf; + struct mgmt_rp_read_exp_features_info *rp; + size_t len; u16 idx = 0; u32 flags; + int status; bt_dev_dbg(hdev, "sock %p", sk); - memset(&buf, 0, sizeof(buf)); + /* Enough space for 7 features */ + len = sizeof(*rp) + (sizeof(rp->features[0]) * 7); + rp = kmalloc(len, GFP_KERNEL); + if (!rp) + return -ENOMEM; + + memset(rp, 0, len); #ifdef CONFIG_BT_FEATURE_DEBUG if (!hdev) { @@ -4439,6 +4463,17 @@ static int read_exp_features_info(struct sock *sk, struct hci_dev *hdev, idx++; } + if (hdev && lmp_le_capable(hdev)) { + if (hci_dev_test_flag(hdev, HCI_MESH_EXPERIMENTAL)) + flags = BIT(0); + else + flags = 0; + + memcpy(rp->features[idx].uuid, mgmt_mesh_uuid, 16); + rp->features[idx].flags = cpu_to_le32(flags); + idx++; + } + rp->feature_count = cpu_to_le16(idx); /* After reading the experimental features information, enable @@ -4446,9 +4481,12 @@ static int read_exp_features_info(struct sock *sk, struct hci_dev *hdev, */ hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS); - return mgmt_cmd_complete(sk, hdev ? hdev->id : MGMT_INDEX_NONE, - MGMT_OP_READ_EXP_FEATURES_INFO, - 0, rp, sizeof(*rp) + (20 * idx)); + status = mgmt_cmd_complete(sk, hdev ? hdev->id : MGMT_INDEX_NONE, + MGMT_OP_READ_EXP_FEATURES_INFO, + 0, rp, sizeof(*rp) + (20 * idx)); + + kfree(rp); + return status; } static int exp_ll_privacy_feature_changed(bool enabled, struct hci_dev *hdev, @@ -4576,6 +4614,63 @@ static int set_debug_func(struct sock *sk, struct hci_dev *hdev, } #endif +static int set_mgmt_mesh_func(struct sock *sk, struct hci_dev *hdev, + struct mgmt_cp_set_exp_feature *cp, u16 data_len) +{ + struct mgmt_rp_set_exp_feature rp; + bool val, changed; + int err; + + /* Command requires to use the controller index */ + if (!hdev) + return mgmt_cmd_status(sk, MGMT_INDEX_NONE, + MGMT_OP_SET_EXP_FEATURE, + MGMT_STATUS_INVALID_INDEX); + + /* Changes can only be made when controller is powered down */ + if (hdev_is_powered(hdev)) + return mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_EXP_FEATURE, + MGMT_STATUS_REJECTED); + + /* Parameters are limited to a single octet */ + if (data_len != MGMT_SET_EXP_FEATURE_SIZE + 1) + return mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_EXP_FEATURE, + MGMT_STATUS_INVALID_PARAMS); + + /* Only boolean on/off is supported */ + if (cp->param[0] != 0x00 && cp->param[0] != 0x01) + return mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_EXP_FEATURE, + MGMT_STATUS_INVALID_PARAMS); + + val = !!cp->param[0]; + + if (val) { + changed = !hci_dev_test_and_set_flag(hdev, + HCI_MESH_EXPERIMENTAL); + } else { + hci_dev_clear_flag(hdev, HCI_MESH); + changed = hci_dev_test_and_clear_flag(hdev, + HCI_MESH_EXPERIMENTAL); + } + + memcpy(rp.uuid, mgmt_mesh_uuid, 16); + rp.flags = cpu_to_le32(val ? BIT(0) : 0); + + hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS); + + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_SET_EXP_FEATURE, 0, + &rp, sizeof(rp)); + + if (changed) + exp_feature_changed(hdev, mgmt_mesh_uuid, val, sk); + + return err; +} + static int set_rpa_resolution_func(struct sock *sk, struct hci_dev *hdev, struct mgmt_cp_set_exp_feature *cp, u16 data_len) @@ -4891,6 +4986,7 @@ static const struct mgmt_exp_feature { #ifdef CONFIG_BT_FEATURE_DEBUG EXP_FEAT(debug_uuid, set_debug_func), #endif + EXP_FEAT(mgmt_mesh_uuid, set_mgmt_mesh_func), EXP_FEAT(rpa_resolution_uuid, set_rpa_resolution_func), EXP_FEAT(quality_report_uuid, set_quality_report_func), EXP_FEAT(offload_codecs_uuid, set_offload_codec_func), -- cgit v1.2.3-70-g09d2 From 08724ef69907214ce622344fe4945412e38368f0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 5 Sep 2022 12:09:36 +0200 Subject: netlink: introduce NLA_POLICY_MAX_BE netlink allows to specify allowed ranges for integer types. Unfortunately, nfnetlink passes integers in big endian, so the existing NLA_POLICY_MAX() cannot be used. At the moment, nfnetlink users, such as nf_tables, need to resort to programmatic checking via helpers such as nft_parse_u32_check(). This is both cumbersome and error prone. This adds NLA_POLICY_MAX_BE which adds range check support for BE16, BE32 and BE64 integers. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/netlink.h | 9 +++++++++ lib/nlattr.c | 31 +++++++++++++++++++++++++++---- 2 files changed, 36 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/netlink.h b/include/net/netlink.h index e658d18afa67..4418b1981e31 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -325,6 +325,7 @@ struct nla_policy { struct netlink_range_validation_signed *range_signed; struct { s16 min, max; + u8 network_byte_order:1; }; int (*validate)(const struct nlattr *attr, struct netlink_ext_ack *extack); @@ -418,6 +419,14 @@ struct nla_policy { .type = NLA_ENSURE_INT_OR_BINARY_TYPE(tp), \ .validation_type = NLA_VALIDATE_MAX, \ .max = _max, \ + .network_byte_order = 0, \ +} + +#define NLA_POLICY_MAX_BE(tp, _max) { \ + .type = NLA_ENSURE_UINT_TYPE(tp), \ + .validation_type = NLA_VALIDATE_MAX, \ + .max = _max, \ + .network_byte_order = 1, \ } #define NLA_POLICY_MASK(tp, _mask) { \ diff --git a/lib/nlattr.c b/lib/nlattr.c index 86029ad5ead4..40f22b177d69 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -159,6 +159,31 @@ void nla_get_range_unsigned(const struct nla_policy *pt, } } +static u64 nla_get_attr_bo(const struct nla_policy *pt, + const struct nlattr *nla) +{ + switch (pt->type) { + case NLA_U16: + if (pt->network_byte_order) + return ntohs(nla_get_be16(nla)); + + return nla_get_u16(nla); + case NLA_U32: + if (pt->network_byte_order) + return ntohl(nla_get_be32(nla)); + + return nla_get_u32(nla); + case NLA_U64: + if (pt->network_byte_order) + return be64_to_cpu(nla_get_be64(nla)); + + return nla_get_u64(nla); + } + + WARN_ON_ONCE(1); + return 0; +} + static int nla_validate_range_unsigned(const struct nla_policy *pt, const struct nlattr *nla, struct netlink_ext_ack *extack, @@ -172,12 +197,10 @@ static int nla_validate_range_unsigned(const struct nla_policy *pt, value = nla_get_u8(nla); break; case NLA_U16: - value = nla_get_u16(nla); - break; case NLA_U32: - value = nla_get_u32(nla); - break; case NLA_U64: + value = nla_get_attr_bo(pt, nla); + break; case NLA_MSECS: value = nla_get_u64(nla); break; -- cgit v1.2.3-70-g09d2 From 0a28bfd4971fd570d1f3e4653b21415becefc92c Mon Sep 17 00:00:00 2001 From: Lior Nahmanson Date: Mon, 5 Sep 2022 22:21:13 -0700 Subject: net/macsec: Add MACsec skb_metadata_dst Tx Data path support In the current MACsec offload implementation, MACsec interfaces shares the same MAC address by default. Therefore, HW can't distinguish from which MACsec interface the traffic originated from. MACsec stack will use skb_metadata_dst to store the SCI value, which is unique per Macsec interface, skb_metadat_dst will be used by the offloading device driver to associate the SKB with the corresponding offloaded interface (SCI). Signed-off-by: Lior Nahmanson Reviewed-by: Raed Salem Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/macsec.c | 15 +++++++++++++++ include/net/dst_metadata.h | 10 ++++++++++ include/net/macsec.h | 4 ++++ 3 files changed, 29 insertions(+) (limited to 'include/net') diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index adf448a8162b..c190dc019717 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -3416,6 +3417,11 @@ static netdev_tx_t macsec_start_xmit(struct sk_buff *skb, int ret, len; if (macsec_is_offloaded(netdev_priv(dev))) { + struct metadata_dst *md_dst = secy->tx_sc.md_dst; + + skb_dst_drop(skb); + dst_hold(&md_dst->dst); + skb_dst_set(skb, &md_dst->dst); skb->dev = macsec->real_dev; return dev_queue_xmit(skb); } @@ -3743,6 +3749,7 @@ static void macsec_free_netdev(struct net_device *dev) { struct macsec_dev *macsec = macsec_priv(dev); + metadata_dst_free(macsec->secy.tx_sc.md_dst); free_percpu(macsec->stats); free_percpu(macsec->secy.tx_sc.stats); @@ -4015,6 +4022,13 @@ static int macsec_add_dev(struct net_device *dev, sci_t sci, u8 icv_len) return -ENOMEM; } + secy->tx_sc.md_dst = metadata_dst_alloc(0, METADATA_MACSEC, GFP_KERNEL); + if (!secy->tx_sc.md_dst) { + free_percpu(secy->tx_sc.stats); + free_percpu(macsec->stats); + return -ENOMEM; + } + if (sci == MACSEC_UNDEF_SCI) sci = dev_to_sci(dev, MACSEC_PORT_ES); @@ -4028,6 +4042,7 @@ static int macsec_add_dev(struct net_device *dev, sci_t sci, u8 icv_len) secy->xpn = DEFAULT_XPN; secy->sci = sci; + secy->tx_sc.md_dst->u.macsec_info.sci = sci; secy->tx_sc.active = true; secy->tx_sc.encoding_sa = DEFAULT_ENCODING_SA; secy->tx_sc.encrypt = DEFAULT_ENCRYPT; diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index adab27ba1ecb..22a6924bf6da 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -4,11 +4,13 @@ #include #include +#include #include enum metadata_type { METADATA_IP_TUNNEL, METADATA_HW_PORT_MUX, + METADATA_MACSEC, }; struct hw_port_info { @@ -16,12 +18,17 @@ struct hw_port_info { u32 port_id; }; +struct macsec_info { + sci_t sci; +}; + struct metadata_dst { struct dst_entry dst; enum metadata_type type; union { struct ip_tunnel_info tun_info; struct hw_port_info port_info; + struct macsec_info macsec_info; } u; }; @@ -82,6 +89,9 @@ static inline int skb_metadata_dst_cmp(const struct sk_buff *skb_a, return memcmp(&a->u.tun_info, &b->u.tun_info, sizeof(a->u.tun_info) + a->u.tun_info.options_len); + case METADATA_MACSEC: + return memcmp(&a->u.macsec_info, &b->u.macsec_info, + sizeof(a->u.macsec_info)); default: return 1; } diff --git a/include/net/macsec.h b/include/net/macsec.h index 73780aa73644..8494953fb0de 100644 --- a/include/net/macsec.h +++ b/include/net/macsec.h @@ -19,6 +19,8 @@ typedef u64 __bitwise sci_t; typedef u32 __bitwise ssci_t; +struct metadata_dst; + typedef union salt { struct { u32 ssci; @@ -182,6 +184,7 @@ struct macsec_tx_sa { * @scb: single copy broadcast flag * @sa: array of secure associations * @stats: stats for this TXSC + * @md_dst: MACsec offload metadata dst */ struct macsec_tx_sc { bool active; @@ -192,6 +195,7 @@ struct macsec_tx_sc { bool scb; struct macsec_tx_sa __rcu *sa[MACSEC_NUM_AN]; struct pcpu_tx_sc_stats __percpu *stats; + struct metadata_dst *md_dst; }; /** -- cgit v1.2.3-70-g09d2 From b1671253c6015841f6cabd39730fa42fb6d3d407 Mon Sep 17 00:00:00 2001 From: Lior Nahmanson Date: Mon, 5 Sep 2022 22:21:15 -0700 Subject: net/macsec: Move some code for sharing with various drivers that implements offload Move some MACsec infrastructure like defines and functions, in order to avoid code duplication for future drivers which implements MACsec offload. Signed-off-by: Lior Nahmanson Reviewed-by: Raed Salem Reviewed-by: Jiri Pirko Reviewed-by: Ben Ben-Ishay Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/macsec.c | 33 ++++++--------------------------- include/net/macsec.h | 21 +++++++++++++++++++++ 2 files changed, 27 insertions(+), 27 deletions(-) (limited to 'include/net') diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index e781b3e22aac..830fed3914b6 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -25,8 +25,6 @@ #include -#define MACSEC_SCI_LEN 8 - /* SecTAG length = macsec_eth_header without the optional SCI */ #define MACSEC_TAG_LEN 6 @@ -47,20 +45,10 @@ struct macsec_eth_header { u8 secure_channel_id[8]; /* optional */ } __packed; -#define MACSEC_TCI_VERSION 0x80 -#define MACSEC_TCI_ES 0x40 /* end station */ -#define MACSEC_TCI_SC 0x20 /* SCI present */ -#define MACSEC_TCI_SCB 0x10 /* epon */ -#define MACSEC_TCI_E 0x08 /* encryption */ -#define MACSEC_TCI_C 0x04 /* changed text */ -#define MACSEC_AN_MASK 0x03 /* association number */ -#define MACSEC_TCI_CONFID (MACSEC_TCI_E | MACSEC_TCI_C) - /* minimum secure data length deemed "not short", see IEEE 802.1AE-2006 9.7 */ #define MIN_NON_SHORT_LEN 48 #define GCM_AES_IV_LEN 12 -#define DEFAULT_ICV_LEN 16 #define for_each_rxsc(secy, sc) \ for (sc = rcu_dereference_bh(secy->rx_sc); \ @@ -244,7 +232,6 @@ static struct macsec_cb *macsec_skb_cb(struct sk_buff *skb) return (struct macsec_cb *)skb->cb; } -#define MACSEC_PORT_ES (htons(0x0001)) #define MACSEC_PORT_SCB (0x0000) #define MACSEC_UNDEF_SCI ((__force sci_t)0xffffffffffffffffULL) #define MACSEC_UNDEF_SSCI ((__force ssci_t)0xffffffff) @@ -259,14 +246,6 @@ static struct macsec_cb *macsec_skb_cb(struct sk_buff *skb) #define DEFAULT_ENCODING_SA 0 #define MACSEC_XPN_MAX_REPLAY_WINDOW (((1 << 30) - 1)) -static bool send_sci(const struct macsec_secy *secy) -{ - const struct macsec_tx_sc *tx_sc = &secy->tx_sc; - - return tx_sc->send_sci || - (secy->n_rx_sc > 1 && !tx_sc->end_station && !tx_sc->scb); -} - static sci_t make_sci(const u8 *addr, __be16 port) { sci_t sci; @@ -331,7 +310,7 @@ static void macsec_fill_sectag(struct macsec_eth_header *h, /* with GCM, C/E clear for !encrypt, both set for encrypt */ if (tx_sc->encrypt) h->tci_an |= MACSEC_TCI_CONFID; - else if (secy->icv_len != DEFAULT_ICV_LEN) + else if (secy->icv_len != MACSEC_DEFAULT_ICV_LEN) h->tci_an |= MACSEC_TCI_C; h->tci_an |= tx_sc->encoding_sa; @@ -655,7 +634,7 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb, unprotected_len = skb->len; eth = eth_hdr(skb); - sci_present = send_sci(secy); + sci_present = macsec_send_sci(secy); hh = skb_push(skb, macsec_extra_len(sci_present)); memmove(hh, eth, 2 * ETH_ALEN); @@ -1303,7 +1282,7 @@ nosci: /* 10.6.1 if the SC is not found */ cbit = !!(hdr->tci_an & MACSEC_TCI_C); if (!cbit) - macsec_finalize_skb(skb, DEFAULT_ICV_LEN, + macsec_finalize_skb(skb, MACSEC_DEFAULT_ICV_LEN, macsec_extra_len(macsec_skb_cb(skb)->has_sci)); list_for_each_entry_rcu(macsec, &rxd->secys, secys) { @@ -4067,7 +4046,7 @@ static int macsec_newlink(struct net *net, struct net_device *dev, { struct macsec_dev *macsec = macsec_priv(dev); rx_handler_func_t *rx_handler; - u8 icv_len = DEFAULT_ICV_LEN; + u8 icv_len = MACSEC_DEFAULT_ICV_LEN; struct net_device *real_dev; int err, mtu; sci_t sci; @@ -4191,7 +4170,7 @@ static int macsec_validate_attr(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { u64 csid = MACSEC_DEFAULT_CIPHER_ID; - u8 icv_len = DEFAULT_ICV_LEN; + u8 icv_len = MACSEC_DEFAULT_ICV_LEN; int flag; bool es, scb, sci; @@ -4203,7 +4182,7 @@ static int macsec_validate_attr(struct nlattr *tb[], struct nlattr *data[], if (data[IFLA_MACSEC_ICV_LEN]) { icv_len = nla_get_u8(data[IFLA_MACSEC_ICV_LEN]); - if (icv_len != DEFAULT_ICV_LEN) { + if (icv_len != MACSEC_DEFAULT_ICV_LEN) { char dummy_key[DEFAULT_SAK_LEN] = { 0 }; struct crypto_aead *dummy_tfm; diff --git a/include/net/macsec.h b/include/net/macsec.h index 8494953fb0de..871599b11707 100644 --- a/include/net/macsec.h +++ b/include/net/macsec.h @@ -16,6 +16,20 @@ #define MACSEC_NUM_AN 4 /* 2 bits for the association number */ +#define MACSEC_SCI_LEN 8 +#define MACSEC_PORT_ES (htons(0x0001)) + +#define MACSEC_TCI_VERSION 0x80 +#define MACSEC_TCI_ES 0x40 /* end station */ +#define MACSEC_TCI_SC 0x20 /* SCI present */ +#define MACSEC_TCI_SCB 0x10 /* epon */ +#define MACSEC_TCI_E 0x08 /* encryption */ +#define MACSEC_TCI_C 0x04 /* changed text */ +#define MACSEC_AN_MASK 0x03 /* association number */ +#define MACSEC_TCI_CONFID (MACSEC_TCI_E | MACSEC_TCI_C) + +#define MACSEC_DEFAULT_ICV_LEN 16 + typedef u64 __bitwise sci_t; typedef u32 __bitwise ssci_t; @@ -292,5 +306,12 @@ struct macsec_ops { }; void macsec_pn_wrapped(struct macsec_secy *secy, struct macsec_tx_sa *tx_sa); +static inline bool macsec_send_sci(const struct macsec_secy *secy) +{ + const struct macsec_tx_sc *tx_sc = &secy->tx_sc; + + return tx_sc->send_sci || + (secy->n_rx_sc > 1 && !tx_sc->end_station && !tx_sc->scb); +} #endif /* _NET_MACSEC_H_ */ -- cgit v1.2.3-70-g09d2 From c92c27171040554cfda7a3fc925e9dbcb5b4a698 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 6 Sep 2022 17:20:35 +0200 Subject: netfilter: nat: move repetitive nat port reserve loop to a helper Almost all nat helpers reserve an expecation port the same way: Try the port inidcated by the peer, then move to next port if that port is already in use. We can squash this into a helper. Suggested-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal --- include/net/netfilter/nf_nat_helper.h | 1 + net/ipv4/netfilter/nf_nat_h323.c | 60 +++-------------------------------- net/netfilter/nf_nat_amanda.c | 14 +------- net/netfilter/nf_nat_ftp.c | 17 ++-------- net/netfilter/nf_nat_helper.c | 19 +++++++++++ net/netfilter/nf_nat_irc.c | 16 ++-------- net/netfilter/nf_nat_sip.c | 14 +------- 7 files changed, 30 insertions(+), 111 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_nat_helper.h b/include/net/netfilter/nf_nat_helper.h index efae84646353..44c421b9be85 100644 --- a/include/net/netfilter/nf_nat_helper.h +++ b/include/net/netfilter/nf_nat_helper.h @@ -38,4 +38,5 @@ bool nf_nat_mangle_udp_packet(struct sk_buff *skb, struct nf_conn *ct, * to port ct->master->saved_proto. */ void nf_nat_follow_master(struct nf_conn *ct, struct nf_conntrack_expect *this); +u16 nf_nat_exp_find_port(struct nf_conntrack_expect *exp, u16 port); #endif diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index a334f0dcc2d0..faee20af4856 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -291,20 +291,7 @@ static int nat_t120(struct sk_buff *skb, struct nf_conn *ct, exp->expectfn = nf_nat_follow_master; exp->dir = !dir; - /* Try to get same port: if not, try to change it. */ - for (; nated_port != 0; nated_port++) { - int ret; - - exp->tuple.dst.u.tcp.port = htons(nated_port); - ret = nf_ct_expect_related(exp, 0); - if (ret == 0) - break; - else if (ret != -EBUSY) { - nated_port = 0; - break; - } - } - + nated_port = nf_nat_exp_find_port(exp, nated_port); if (nated_port == 0) { /* No port available */ net_notice_ratelimited("nf_nat_h323: out of TCP ports\n"); return 0; @@ -347,20 +334,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct, if (info->sig_port[dir] == port) nated_port = ntohs(info->sig_port[!dir]); - /* Try to get same port: if not, try to change it. */ - for (; nated_port != 0; nated_port++) { - int ret; - - exp->tuple.dst.u.tcp.port = htons(nated_port); - ret = nf_ct_expect_related(exp, 0); - if (ret == 0) - break; - else if (ret != -EBUSY) { - nated_port = 0; - break; - } - } - + nated_port = nf_nat_exp_find_port(exp, nated_port); if (nated_port == 0) { /* No port available */ net_notice_ratelimited("nf_nat_q931: out of TCP ports\n"); return 0; @@ -439,20 +413,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct, if (info->sig_port[dir] == port) nated_port = ntohs(info->sig_port[!dir]); - /* Try to get same port: if not, try to change it. */ - for (; nated_port != 0; nated_port++) { - int ret; - - exp->tuple.dst.u.tcp.port = htons(nated_port); - ret = nf_ct_expect_related(exp, 0); - if (ret == 0) - break; - else if (ret != -EBUSY) { - nated_port = 0; - break; - } - } - + nated_port = nf_nat_exp_find_port(exp, nated_port); if (nated_port == 0) { /* No port available */ net_notice_ratelimited("nf_nat_ras: out of TCP ports\n"); return 0; @@ -532,20 +493,7 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct, exp->expectfn = ip_nat_callforwarding_expect; exp->dir = !dir; - /* Try to get same port: if not, try to change it. */ - for (nated_port = ntohs(port); nated_port != 0; nated_port++) { - int ret; - - exp->tuple.dst.u.tcp.port = htons(nated_port); - ret = nf_ct_expect_related(exp, 0); - if (ret == 0) - break; - else if (ret != -EBUSY) { - nated_port = 0; - break; - } - } - + nated_port = nf_nat_exp_find_port(exp, ntohs(port)); if (nated_port == 0) { /* No port available */ net_notice_ratelimited("nf_nat_q931: out of TCP ports\n"); return 0; diff --git a/net/netfilter/nf_nat_amanda.c b/net/netfilter/nf_nat_amanda.c index 3bc7e0854efe..98deef6cde69 100644 --- a/net/netfilter/nf_nat_amanda.c +++ b/net/netfilter/nf_nat_amanda.c @@ -44,19 +44,7 @@ static unsigned int help(struct sk_buff *skb, exp->expectfn = nf_nat_follow_master; /* Try to get same port: if not, try to change it. */ - for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { - int res; - - exp->tuple.dst.u.tcp.port = htons(port); - res = nf_ct_expect_related(exp, 0); - if (res == 0) - break; - else if (res != -EBUSY) { - port = 0; - break; - } - } - + port = nf_nat_exp_find_port(exp, ntohs(exp->saved_proto.tcp.port)); if (port == 0) { nf_ct_helper_log(skb, exp->master, "all ports in use"); return NF_DROP; diff --git a/net/netfilter/nf_nat_ftp.c b/net/netfilter/nf_nat_ftp.c index aace6768a64e..c92a436d9c48 100644 --- a/net/netfilter/nf_nat_ftp.c +++ b/net/netfilter/nf_nat_ftp.c @@ -86,22 +86,9 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb, * this one. */ exp->expectfn = nf_nat_follow_master; - /* Try to get same port: if not, try to change it. */ - for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { - int ret; - - exp->tuple.dst.u.tcp.port = htons(port); - ret = nf_ct_expect_related(exp, 0); - if (ret == 0) - break; - else if (ret != -EBUSY) { - port = 0; - break; - } - } - + port = nf_nat_exp_find_port(exp, ntohs(exp->saved_proto.tcp.port)); if (port == 0) { - nf_ct_helper_log(skb, ct, "all ports in use"); + nf_ct_helper_log(skb, exp->master, "all ports in use"); return NF_DROP; } diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c index a263505455fc..067d6d6f6b7d 100644 --- a/net/netfilter/nf_nat_helper.c +++ b/net/netfilter/nf_nat_helper.c @@ -198,3 +198,22 @@ void nf_nat_follow_master(struct nf_conn *ct, nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); } EXPORT_SYMBOL(nf_nat_follow_master); + +u16 nf_nat_exp_find_port(struct nf_conntrack_expect *exp, u16 port) +{ + /* Try to get same port: if not, try to change it. */ + for (; port != 0; port++) { + int res; + + exp->tuple.dst.u.tcp.port = htons(port); + res = nf_ct_expect_related(exp, 0); + if (res == 0) + return port; + + if (res != -EBUSY) + break; + } + + return 0; +} +EXPORT_SYMBOL_GPL(nf_nat_exp_find_port); diff --git a/net/netfilter/nf_nat_irc.c b/net/netfilter/nf_nat_irc.c index c691ab8d234c..19c4fcc60c50 100644 --- a/net/netfilter/nf_nat_irc.c +++ b/net/netfilter/nf_nat_irc.c @@ -48,20 +48,8 @@ static unsigned int help(struct sk_buff *skb, exp->dir = IP_CT_DIR_REPLY; exp->expectfn = nf_nat_follow_master; - /* Try to get same port: if not, try to change it. */ - for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { - int ret; - - exp->tuple.dst.u.tcp.port = htons(port); - ret = nf_ct_expect_related(exp, 0); - if (ret == 0) - break; - else if (ret != -EBUSY) { - port = 0; - break; - } - } - + port = nf_nat_exp_find_port(exp, + ntohs(exp->saved_proto.tcp.port)); if (port == 0) { nf_ct_helper_log(skb, ct, "all ports in use"); return NF_DROP; diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c index f0a735e86851..cf4aeb299bde 100644 --- a/net/netfilter/nf_nat_sip.c +++ b/net/netfilter/nf_nat_sip.c @@ -410,19 +410,7 @@ static unsigned int nf_nat_sip_expect(struct sk_buff *skb, unsigned int protoff, exp->dir = !dir; exp->expectfn = nf_nat_sip_expected; - for (; port != 0; port++) { - int ret; - - exp->tuple.dst.u.udp.port = htons(port); - ret = nf_ct_expect_related(exp, NF_CT_EXP_F_SKIP_MASTER); - if (ret == 0) - break; - else if (ret != -EBUSY) { - port = 0; - break; - } - } - + port = nf_nat_exp_find_port(exp, port); if (port == 0) { nf_ct_helper_log(skb, ct, "all ports in use for SIP"); return NF_DROP; -- cgit v1.2.3-70-g09d2 From 2a40f883781d6cbbf547ed13b0cec2f9808d839d Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 8 Sep 2022 13:57:50 -0700 Subject: Bluetooth: Fix HCIGETDEVINFO regression Recent changes breaks HCIGETDEVINFO since it changes the size of hci_dev_info. Fixes: 26afbd826ee3 ("Bluetooth: Add initial implementation of CIS connections") Reported-by: Marek Szyprowski Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_sock.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_sock.h b/include/net/bluetooth/hci_sock.h index 0520e21ab698..9949870f7d78 100644 --- a/include/net/bluetooth/hci_sock.h +++ b/include/net/bluetooth/hci_sock.h @@ -124,8 +124,6 @@ struct hci_dev_info { __u16 acl_pkts; __u16 sco_mtu; __u16 sco_pkts; - __u16 iso_mtu; - __u16 iso_pkts; struct hci_dev_stats stat; }; -- cgit v1.2.3-70-g09d2 From acd0a7ab6334f35c3720120d53f79eb8e9b3ac2e Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Thu, 8 Sep 2022 12:14:33 +0800 Subject: net: sched: act: move global static variable net_id to tc_action_ops Each tc action module has a corresponding net_id, so put net_id directly into the structure tc_action_ops. Signed-off-by: Zhengchao Shao Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/act_api.h | 1 + net/sched/act_bpf.c | 13 ++++++------- net/sched/act_connmark.c | 13 ++++++------- net/sched/act_csum.c | 13 ++++++------- net/sched/act_ct.c | 17 ++++++++--------- net/sched/act_ctinfo.c | 13 ++++++------- net/sched/act_gact.c | 13 ++++++------- net/sched/act_gate.c | 13 ++++++------- net/sched/act_ife.c | 13 ++++++------- net/sched/act_ipt.c | 31 ++++++++++++++----------------- net/sched/act_mirred.c | 13 ++++++------- net/sched/act_mpls.c | 13 ++++++------- net/sched/act_nat.c | 13 ++++++------- net/sched/act_pedit.c | 13 ++++++------- net/sched/act_police.c | 13 ++++++------- net/sched/act_sample.c | 13 ++++++------- net/sched/act_simple.c | 13 ++++++------- net/sched/act_skbedit.c | 13 ++++++------- net/sched/act_skbmod.c | 13 ++++++------- net/sched/act_tunnel_key.c | 13 ++++++------- net/sched/act_vlan.c | 13 ++++++------- 21 files changed, 131 insertions(+), 152 deletions(-) (limited to 'include/net') diff --git a/include/net/act_api.h b/include/net/act_api.h index 9cf6870b526e..61f2ceb3939e 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -111,6 +111,7 @@ struct tc_action_ops { struct list_head head; char kind[IFNAMSIZ]; enum tca_id id; /* identifier should match kind */ + unsigned int net_id; size_t size; struct module *owner; int (*act)(struct sk_buff *, const struct tc_action *, diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index fea2d78b9ddc..dd839efe9649 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -29,7 +29,6 @@ struct tcf_bpf_cfg { bool is_ebpf; }; -static unsigned int bpf_net_id; static struct tc_action_ops act_bpf_ops; static int tcf_bpf_act(struct sk_buff *skb, const struct tc_action *act, @@ -280,7 +279,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, bpf_net_id); + struct tc_action_net *tn = net_generic(net, act_bpf_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; struct tcf_chain *goto_ch = NULL; @@ -395,14 +394,14 @@ static int tcf_bpf_walker(struct net *net, struct sk_buff *skb, const struct tc_action_ops *ops, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, bpf_net_id); + struct tc_action_net *tn = net_generic(net, act_bpf_ops.net_id); return tcf_generic_walker(tn, skb, cb, type, ops, extack); } static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index) { - struct tc_action_net *tn = net_generic(net, bpf_net_id); + struct tc_action_net *tn = net_generic(net, act_bpf_ops.net_id); return tcf_idr_search(tn, a, index); } @@ -422,20 +421,20 @@ static struct tc_action_ops act_bpf_ops __read_mostly = { static __net_init int bpf_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, bpf_net_id); + struct tc_action_net *tn = net_generic(net, act_bpf_ops.net_id); return tc_action_net_init(net, tn, &act_bpf_ops); } static void __net_exit bpf_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, bpf_net_id); + tc_action_net_exit(net_list, act_bpf_ops.net_id); } static struct pernet_operations bpf_net_ops = { .init = bpf_init_net, .exit_batch = bpf_exit_net, - .id = &bpf_net_id, + .id = &act_bpf_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 09e2aafc8943..1ea9ad187560 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -25,7 +25,6 @@ #include #include -static unsigned int connmark_net_id; static struct tc_action_ops act_connmark_ops; static int tcf_connmark_act(struct sk_buff *skb, const struct tc_action *a, @@ -99,7 +98,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, connmark_net_id); + struct tc_action_net *tn = net_generic(net, act_connmark_ops.net_id); struct nlattr *tb[TCA_CONNMARK_MAX + 1]; bool bind = flags & TCA_ACT_FLAGS_BIND; struct tcf_chain *goto_ch = NULL; @@ -205,14 +204,14 @@ static int tcf_connmark_walker(struct net *net, struct sk_buff *skb, const struct tc_action_ops *ops, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, connmark_net_id); + struct tc_action_net *tn = net_generic(net, act_connmark_ops.net_id); return tcf_generic_walker(tn, skb, cb, type, ops, extack); } static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index) { - struct tc_action_net *tn = net_generic(net, connmark_net_id); + struct tc_action_net *tn = net_generic(net, act_connmark_ops.net_id); return tcf_idr_search(tn, a, index); } @@ -231,20 +230,20 @@ static struct tc_action_ops act_connmark_ops = { static __net_init int connmark_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, connmark_net_id); + struct tc_action_net *tn = net_generic(net, act_connmark_ops.net_id); return tc_action_net_init(net, tn, &act_connmark_ops); } static void __net_exit connmark_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, connmark_net_id); + tc_action_net_exit(net_list, act_connmark_ops.net_id); } static struct pernet_operations connmark_net_ops = { .init = connmark_init_net, .exit_batch = connmark_exit_net, - .id = &connmark_net_id, + .id = &act_connmark_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 22847ee009ef..400f80cca40f 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -37,7 +37,6 @@ static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = { [TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), }, }; -static unsigned int csum_net_id; static struct tc_action_ops act_csum_ops; static int tcf_csum_init(struct net *net, struct nlattr *nla, @@ -45,7 +44,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, csum_net_id); + struct tc_action_net *tn = net_generic(net, act_csum_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct tcf_csum_params *params_new; struct nlattr *tb[TCA_CSUM_MAX + 1]; @@ -678,14 +677,14 @@ static int tcf_csum_walker(struct net *net, struct sk_buff *skb, const struct tc_action_ops *ops, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, csum_net_id); + struct tc_action_net *tn = net_generic(net, act_csum_ops.net_id); return tcf_generic_walker(tn, skb, cb, type, ops, extack); } static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index) { - struct tc_action_net *tn = net_generic(net, csum_net_id); + struct tc_action_net *tn = net_generic(net, act_csum_ops.net_id); return tcf_idr_search(tn, a, index); } @@ -731,20 +730,20 @@ static struct tc_action_ops act_csum_ops = { static __net_init int csum_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, csum_net_id); + struct tc_action_net *tn = net_generic(net, act_csum_ops.net_id); return tc_action_net_init(net, tn, &act_csum_ops); } static void __net_exit csum_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, csum_net_id); + tc_action_net_exit(net_list, act_csum_ops.net_id); } static struct pernet_operations csum_net_ops = { .init = csum_init_net, .exit_batch = csum_exit_net, - .id = &csum_net_id, + .id = &act_csum_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index d55afb8d14be..38b2f583106c 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -649,7 +649,6 @@ static void tcf_ct_flow_tables_uninit(void) } static struct tc_action_ops act_ct_ops; -static unsigned int ct_net_id; struct tc_ct_action_net { struct tc_action_net tn; /* Must be first */ @@ -1255,7 +1254,7 @@ static int tcf_ct_fill_params(struct net *net, struct nlattr **tb, struct netlink_ext_ack *extack) { - struct tc_ct_action_net *tn = net_generic(net, ct_net_id); + struct tc_ct_action_net *tn = net_generic(net, act_ct_ops.net_id); struct nf_conntrack_zone zone; struct nf_conn *tmpl; int err; @@ -1330,7 +1329,7 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, ct_net_id); + struct tc_action_net *tn = net_generic(net, act_ct_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct tcf_ct_params *params = NULL; struct nlattr *tb[TCA_CT_MAX + 1]; @@ -1563,14 +1562,14 @@ static int tcf_ct_walker(struct net *net, struct sk_buff *skb, const struct tc_action_ops *ops, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, ct_net_id); + struct tc_action_net *tn = net_generic(net, act_ct_ops.net_id); return tcf_generic_walker(tn, skb, cb, type, ops, extack); } static int tcf_ct_search(struct net *net, struct tc_action **a, u32 index) { - struct tc_action_net *tn = net_generic(net, ct_net_id); + struct tc_action_net *tn = net_generic(net, act_ct_ops.net_id); return tcf_idr_search(tn, a, index); } @@ -1623,7 +1622,7 @@ static struct tc_action_ops act_ct_ops = { static __net_init int ct_init_net(struct net *net) { unsigned int n_bits = sizeof_field(struct tcf_ct_params, labels) * 8; - struct tc_ct_action_net *tn = net_generic(net, ct_net_id); + struct tc_ct_action_net *tn = net_generic(net, act_ct_ops.net_id); if (nf_connlabels_get(net, n_bits - 1)) { tn->labels = false; @@ -1641,20 +1640,20 @@ static void __net_exit ct_exit_net(struct list_head *net_list) rtnl_lock(); list_for_each_entry(net, net_list, exit_list) { - struct tc_ct_action_net *tn = net_generic(net, ct_net_id); + struct tc_ct_action_net *tn = net_generic(net, act_ct_ops.net_id); if (tn->labels) nf_connlabels_put(net); } rtnl_unlock(); - tc_action_net_exit(net_list, ct_net_id); + tc_action_net_exit(net_list, act_ct_ops.net_id); } static struct pernet_operations ct_net_ops = { .init = ct_init_net, .exit_batch = ct_exit_net, - .id = &ct_net_id, + .id = &act_ct_ops.net_id, .size = sizeof(struct tc_ct_action_net), }; diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c index 0281e45987a4..626f338c694d 100644 --- a/net/sched/act_ctinfo.c +++ b/net/sched/act_ctinfo.c @@ -25,7 +25,6 @@ #include static struct tc_action_ops act_ctinfo_ops; -static unsigned int ctinfo_net_id; static void tcf_ctinfo_dscp_set(struct nf_conn *ct, struct tcf_ctinfo *ca, struct tcf_ctinfo_params *cp, @@ -157,7 +156,7 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, ctinfo_net_id); + struct tc_action_net *tn = net_generic(net, act_ctinfo_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; u32 dscpmask = 0, dscpstatemask, index; struct nlattr *tb[TCA_CTINFO_MAX + 1]; @@ -347,14 +346,14 @@ static int tcf_ctinfo_walker(struct net *net, struct sk_buff *skb, const struct tc_action_ops *ops, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, ctinfo_net_id); + struct tc_action_net *tn = net_generic(net, act_ctinfo_ops.net_id); return tcf_generic_walker(tn, skb, cb, type, ops, extack); } static int tcf_ctinfo_search(struct net *net, struct tc_action **a, u32 index) { - struct tc_action_net *tn = net_generic(net, ctinfo_net_id); + struct tc_action_net *tn = net_generic(net, act_ctinfo_ops.net_id); return tcf_idr_search(tn, a, index); } @@ -384,20 +383,20 @@ static struct tc_action_ops act_ctinfo_ops = { static __net_init int ctinfo_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, ctinfo_net_id); + struct tc_action_net *tn = net_generic(net, act_ctinfo_ops.net_id); return tc_action_net_init(net, tn, &act_ctinfo_ops); } static void __net_exit ctinfo_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, ctinfo_net_id); + tc_action_net_exit(net_list, act_ctinfo_ops.net_id); } static struct pernet_operations ctinfo_net_ops = { .init = ctinfo_init_net, .exit_batch = ctinfo_exit_net, - .id = &ctinfo_net_id, + .id = &act_ctinfo_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index ac29d1065232..ede896a7ee6b 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -19,7 +19,6 @@ #include #include -static unsigned int gact_net_id; static struct tc_action_ops act_gact_ops; #ifdef CONFIG_GACT_PROB @@ -55,7 +54,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, gact_net_id); + struct tc_action_net *tn = net_generic(net, act_gact_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_GACT_MAX + 1]; struct tcf_chain *goto_ch = NULL; @@ -227,14 +226,14 @@ static int tcf_gact_walker(struct net *net, struct sk_buff *skb, const struct tc_action_ops *ops, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, gact_net_id); + struct tc_action_net *tn = net_generic(net, act_gact_ops.net_id); return tcf_generic_walker(tn, skb, cb, type, ops, extack); } static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index) { - struct tc_action_net *tn = net_generic(net, gact_net_id); + struct tc_action_net *tn = net_generic(net, act_gact_ops.net_id); return tcf_idr_search(tn, a, index); } @@ -317,20 +316,20 @@ static struct tc_action_ops act_gact_ops = { static __net_init int gact_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, gact_net_id); + struct tc_action_net *tn = net_generic(net, act_gact_ops.net_id); return tc_action_net_init(net, tn, &act_gact_ops); } static void __net_exit gact_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, gact_net_id); + tc_action_net_exit(net_list, act_gact_ops.net_id); } static struct pernet_operations gact_net_ops = { .init = gact_init_net, .exit_batch = gact_exit_net, - .id = &gact_net_id, + .id = &act_gact_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c index fd5155274733..1b528550eb22 100644 --- a/net/sched/act_gate.c +++ b/net/sched/act_gate.c @@ -15,7 +15,6 @@ #include #include -static unsigned int gate_net_id; static struct tc_action_ops act_gate_ops; static ktime_t gate_get_time(struct tcf_gate *gact) @@ -298,7 +297,7 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, gate_net_id); + struct tc_action_net *tn = net_generic(net, act_gate_ops.net_id); enum tk_offsets tk_offset = TK_OFFS_TAI; bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_GATE_MAX + 1]; @@ -570,7 +569,7 @@ static int tcf_gate_walker(struct net *net, struct sk_buff *skb, const struct tc_action_ops *ops, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, gate_net_id); + struct tc_action_net *tn = net_generic(net, act_gate_ops.net_id); return tcf_generic_walker(tn, skb, cb, type, ops, extack); } @@ -587,7 +586,7 @@ static void tcf_gate_stats_update(struct tc_action *a, u64 bytes, u64 packets, static int tcf_gate_search(struct net *net, struct tc_action **a, u32 index) { - struct tc_action_net *tn = net_generic(net, gate_net_id); + struct tc_action_net *tn = net_generic(net, act_gate_ops.net_id); return tcf_idr_search(tn, a, index); } @@ -664,20 +663,20 @@ static struct tc_action_ops act_gate_ops = { static __net_init int gate_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, gate_net_id); + struct tc_action_net *tn = net_generic(net, act_gate_ops.net_id); return tc_action_net_init(net, tn, &act_gate_ops); } static void __net_exit gate_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, gate_net_id); + tc_action_net_exit(net_list, act_gate_ops.net_id); } static struct pernet_operations gate_net_ops = { .init = gate_init_net, .exit_batch = gate_exit_net, - .id = &gate_net_id, + .id = &act_gate_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 41ba55e60b1b..ef8355012ac0 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -30,7 +30,6 @@ #include #include -static unsigned int ife_net_id; static int max_metacnt = IFE_META_MAX + 1; static struct tc_action_ops act_ife_ops; @@ -482,7 +481,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, ife_net_id); + struct tc_action_net *tn = net_generic(net, act_ife_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_IFE_MAX + 1]; struct nlattr *tb2[IFE_META_MAX + 1]; @@ -883,14 +882,14 @@ static int tcf_ife_walker(struct net *net, struct sk_buff *skb, const struct tc_action_ops *ops, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, ife_net_id); + struct tc_action_net *tn = net_generic(net, act_ife_ops.net_id); return tcf_generic_walker(tn, skb, cb, type, ops, extack); } static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index) { - struct tc_action_net *tn = net_generic(net, ife_net_id); + struct tc_action_net *tn = net_generic(net, act_ife_ops.net_id); return tcf_idr_search(tn, a, index); } @@ -910,20 +909,20 @@ static struct tc_action_ops act_ife_ops = { static __net_init int ife_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, ife_net_id); + struct tc_action_net *tn = net_generic(net, act_ife_ops.net_id); return tc_action_net_init(net, tn, &act_ife_ops); } static void __net_exit ife_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, ife_net_id); + tc_action_net_exit(net_list, act_ife_ops.net_id); } static struct pernet_operations ife_net_ops = { .init = ife_init_net, .exit_batch = ife_exit_net, - .id = &ife_net_id, + .id = &act_ife_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 2f3d507c24a1..45bd55096ea8 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -24,10 +24,7 @@ #include -static unsigned int ipt_net_id; static struct tc_action_ops act_ipt_ops; - -static unsigned int xt_net_id; static struct tc_action_ops act_xt_ops; static int ipt_init_target(struct net *net, struct xt_entry_target *t, @@ -206,8 +203,8 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, - tp, flags); + return __tcf_ipt_init(net, act_ipt_ops.net_id, nla, est, + a, &act_ipt_ops, tp, flags); } static int tcf_xt_init(struct net *net, struct nlattr *nla, @@ -215,8 +212,8 @@ static int tcf_xt_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, - tp, flags); + return __tcf_ipt_init(net, act_xt_ops.net_id, nla, est, + a, &act_xt_ops, tp, flags); } static int tcf_ipt_act(struct sk_buff *skb, const struct tc_action *a, @@ -321,14 +318,14 @@ static int tcf_ipt_walker(struct net *net, struct sk_buff *skb, const struct tc_action_ops *ops, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, ipt_net_id); + struct tc_action_net *tn = net_generic(net, act_ipt_ops.net_id); return tcf_generic_walker(tn, skb, cb, type, ops, extack); } static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index) { - struct tc_action_net *tn = net_generic(net, ipt_net_id); + struct tc_action_net *tn = net_generic(net, act_ipt_ops.net_id); return tcf_idr_search(tn, a, index); } @@ -348,20 +345,20 @@ static struct tc_action_ops act_ipt_ops = { static __net_init int ipt_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, ipt_net_id); + struct tc_action_net *tn = net_generic(net, act_ipt_ops.net_id); return tc_action_net_init(net, tn, &act_ipt_ops); } static void __net_exit ipt_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, ipt_net_id); + tc_action_net_exit(net_list, act_ipt_ops.net_id); } static struct pernet_operations ipt_net_ops = { .init = ipt_init_net, .exit_batch = ipt_exit_net, - .id = &ipt_net_id, + .id = &act_ipt_ops.net_id, .size = sizeof(struct tc_action_net), }; @@ -370,14 +367,14 @@ static int tcf_xt_walker(struct net *net, struct sk_buff *skb, const struct tc_action_ops *ops, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, xt_net_id); + struct tc_action_net *tn = net_generic(net, act_xt_ops.net_id); return tcf_generic_walker(tn, skb, cb, type, ops, extack); } static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index) { - struct tc_action_net *tn = net_generic(net, xt_net_id); + struct tc_action_net *tn = net_generic(net, act_xt_ops.net_id); return tcf_idr_search(tn, a, index); } @@ -397,20 +394,20 @@ static struct tc_action_ops act_xt_ops = { static __net_init int xt_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, xt_net_id); + struct tc_action_net *tn = net_generic(net, act_xt_ops.net_id); return tc_action_net_init(net, tn, &act_xt_ops); } static void __net_exit xt_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, xt_net_id); + tc_action_net_exit(net_list, act_xt_ops.net_id); } static struct pernet_operations xt_net_ops = { .init = xt_init_net, .exit_batch = xt_exit_net, - .id = &xt_net_id, + .id = &act_xt_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index a1d70cf86843..56877dd19375 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -86,7 +86,6 @@ static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = { [TCA_MIRRED_PARMS] = { .len = sizeof(struct tc_mirred) }, }; -static unsigned int mirred_net_id; static struct tc_action_ops act_mirred_ops; static int tcf_mirred_init(struct net *net, struct nlattr *nla, @@ -94,7 +93,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, mirred_net_id); + struct tc_action_net *tn = net_generic(net, act_mirred_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_MIRRED_MAX + 1]; struct tcf_chain *goto_ch = NULL; @@ -378,14 +377,14 @@ static int tcf_mirred_walker(struct net *net, struct sk_buff *skb, const struct tc_action_ops *ops, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, mirred_net_id); + struct tc_action_net *tn = net_generic(net, act_mirred_ops.net_id); return tcf_generic_walker(tn, skb, cb, type, ops, extack); } static int tcf_mirred_search(struct net *net, struct tc_action **a, u32 index) { - struct tc_action_net *tn = net_generic(net, mirred_net_id); + struct tc_action_net *tn = net_generic(net, act_mirred_ops.net_id); return tcf_idr_search(tn, a, index); } @@ -520,20 +519,20 @@ static struct tc_action_ops act_mirred_ops = { static __net_init int mirred_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, mirred_net_id); + struct tc_action_net *tn = net_generic(net, act_mirred_ops.net_id); return tc_action_net_init(net, tn, &act_mirred_ops); } static void __net_exit mirred_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, mirred_net_id); + tc_action_net_exit(net_list, act_mirred_ops.net_id); } static struct pernet_operations mirred_net_ops = { .init = mirred_init_net, .exit_batch = mirred_exit_net, - .id = &mirred_net_id, + .id = &act_mirred_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index adabeccb63e1..b754d2eae477 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -15,7 +15,6 @@ #include #include -static unsigned int mpls_net_id; static struct tc_action_ops act_mpls_ops; #define ACT_MPLS_TTL_DEFAULT 255 @@ -155,7 +154,7 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, mpls_net_id); + struct tc_action_net *tn = net_generic(net, act_mpls_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_MPLS_MAX + 1]; struct tcf_chain *goto_ch = NULL; @@ -372,14 +371,14 @@ static int tcf_mpls_walker(struct net *net, struct sk_buff *skb, const struct tc_action_ops *ops, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, mpls_net_id); + struct tc_action_net *tn = net_generic(net, act_mpls_ops.net_id); return tcf_generic_walker(tn, skb, cb, type, ops, extack); } static int tcf_mpls_search(struct net *net, struct tc_action **a, u32 index) { - struct tc_action_net *tn = net_generic(net, mpls_net_id); + struct tc_action_net *tn = net_generic(net, act_mpls_ops.net_id); return tcf_idr_search(tn, a, index); } @@ -459,20 +458,20 @@ static struct tc_action_ops act_mpls_ops = { static __net_init int mpls_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, mpls_net_id); + struct tc_action_net *tn = net_generic(net, act_mpls_ops.net_id); return tc_action_net_init(net, tn, &act_mpls_ops); } static void __net_exit mpls_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, mpls_net_id); + tc_action_net_exit(net_list, act_mpls_ops.net_id); } static struct pernet_operations mpls_net_ops = { .init = mpls_init_net, .exit_batch = mpls_exit_net, - .id = &mpls_net_id, + .id = &act_mpls_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 2a39b3729e84..f5810387ce9a 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -26,7 +26,6 @@ #include -static unsigned int nat_net_id; static struct tc_action_ops act_nat_ops; static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = { @@ -37,7 +36,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, nat_net_id); + struct tc_action_net *tn = net_generic(net, act_nat_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_NAT_MAX + 1]; struct tcf_chain *goto_ch = NULL; @@ -294,14 +293,14 @@ static int tcf_nat_walker(struct net *net, struct sk_buff *skb, const struct tc_action_ops *ops, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, nat_net_id); + struct tc_action_net *tn = net_generic(net, act_nat_ops.net_id); return tcf_generic_walker(tn, skb, cb, type, ops, extack); } static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index) { - struct tc_action_net *tn = net_generic(net, nat_net_id); + struct tc_action_net *tn = net_generic(net, act_nat_ops.net_id); return tcf_idr_search(tn, a, index); } @@ -320,20 +319,20 @@ static struct tc_action_ops act_nat_ops = { static __net_init int nat_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, nat_net_id); + struct tc_action_net *tn = net_generic(net, act_nat_ops.net_id); return tc_action_net_init(net, tn, &act_nat_ops); } static void __net_exit nat_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, nat_net_id); + tc_action_net_exit(net_list, act_nat_ops.net_id); } static struct pernet_operations nat_net_ops = { .init = nat_init_net, .exit_batch = nat_exit_net, - .id = &nat_net_id, + .id = &act_nat_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 823ee643371c..0951cd1e277e 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -21,7 +21,6 @@ #include #include -static unsigned int pedit_net_id; static struct tc_action_ops act_pedit_ops; static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = { @@ -139,7 +138,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, pedit_net_id); + struct tc_action_net *tn = net_generic(net, act_pedit_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_PEDIT_MAX + 1]; struct tcf_chain *goto_ch = NULL; @@ -497,14 +496,14 @@ static int tcf_pedit_walker(struct net *net, struct sk_buff *skb, const struct tc_action_ops *ops, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, pedit_net_id); + struct tc_action_net *tn = net_generic(net, act_pedit_ops.net_id); return tcf_generic_walker(tn, skb, cb, type, ops, extack); } static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index) { - struct tc_action_net *tn = net_generic(net, pedit_net_id); + struct tc_action_net *tn = net_generic(net, act_pedit_ops.net_id); return tcf_idr_search(tn, a, index); } @@ -561,20 +560,20 @@ static struct tc_action_ops act_pedit_ops = { static __net_init int pedit_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, pedit_net_id); + struct tc_action_net *tn = net_generic(net, act_pedit_ops.net_id); return tc_action_net_init(net, tn, &act_pedit_ops); } static void __net_exit pedit_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, pedit_net_id); + tc_action_net_exit(net_list, act_pedit_ops.net_id); } static struct pernet_operations pedit_net_ops = { .init = pedit_init_net, .exit_batch = pedit_exit_net, - .id = &pedit_net_id, + .id = &act_pedit_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_police.c b/net/sched/act_police.c index b759628a47c2..b5df33c6de52 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -22,7 +22,6 @@ /* Each policer is serialized by its individual spinlock */ -static unsigned int police_net_id; static struct tc_action_ops act_police_ops; static int tcf_police_walker(struct net *net, struct sk_buff *skb, @@ -30,7 +29,7 @@ static int tcf_police_walker(struct net *net, struct sk_buff *skb, const struct tc_action_ops *ops, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, police_net_id); + struct tc_action_net *tn = net_generic(net, act_police_ops.net_id); return tcf_generic_walker(tn, skb, cb, type, ops, extack); } @@ -58,7 +57,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, struct tc_police *parm; struct tcf_police *police; struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL; - struct tc_action_net *tn = net_generic(net, police_net_id); + struct tc_action_net *tn = net_generic(net, act_police_ops.net_id); struct tcf_police_params *new; bool exists = false; u32 index; @@ -414,7 +413,7 @@ nla_put_failure: static int tcf_police_search(struct net *net, struct tc_action **a, u32 index) { - struct tc_action_net *tn = net_generic(net, police_net_id); + struct tc_action_net *tn = net_generic(net, act_police_ops.net_id); return tcf_idr_search(tn, a, index); } @@ -522,20 +521,20 @@ static struct tc_action_ops act_police_ops = { static __net_init int police_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, police_net_id); + struct tc_action_net *tn = net_generic(net, act_police_ops.net_id); return tc_action_net_init(net, tn, &act_police_ops); } static void __net_exit police_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, police_net_id); + tc_action_net_exit(net_list, act_police_ops.net_id); } static struct pernet_operations police_net_ops = { .init = police_init_net, .exit_batch = police_exit_net, - .id = &police_net_id, + .id = &act_police_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 2f7f5e44d28c..c25a193f9ef4 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -23,7 +23,6 @@ #include -static unsigned int sample_net_id; static struct tc_action_ops act_sample_ops; static const struct nla_policy sample_policy[TCA_SAMPLE_MAX + 1] = { @@ -38,7 +37,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, sample_net_id); + struct tc_action_net *tn = net_generic(net, act_sample_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_SAMPLE_MAX + 1]; struct psample_group *psample_group; @@ -246,14 +245,14 @@ static int tcf_sample_walker(struct net *net, struct sk_buff *skb, const struct tc_action_ops *ops, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, sample_net_id); + struct tc_action_net *tn = net_generic(net, act_sample_ops.net_id); return tcf_generic_walker(tn, skb, cb, type, ops, extack); } static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index) { - struct tc_action_net *tn = net_generic(net, sample_net_id); + struct tc_action_net *tn = net_generic(net, act_sample_ops.net_id); return tcf_idr_search(tn, a, index); } @@ -330,20 +329,20 @@ static struct tc_action_ops act_sample_ops = { static __net_init int sample_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, sample_net_id); + struct tc_action_net *tn = net_generic(net, act_sample_ops.net_id); return tc_action_net_init(net, tn, &act_sample_ops); } static void __net_exit sample_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, sample_net_id); + tc_action_net_exit(net_list, act_sample_ops.net_id); } static struct pernet_operations sample_net_ops = { .init = sample_init_net, .exit_batch = sample_exit_net, - .id = &sample_net_id, + .id = &act_sample_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 8c1d60bde93e..06efa08afff7 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -18,7 +18,6 @@ #include #include -static unsigned int simp_net_id; static struct tc_action_ops act_simp_ops; #define SIMP_MAX_DATA 32 @@ -89,7 +88,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, simp_net_id); + struct tc_action_net *tn = net_generic(net, act_simp_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_DEF_MAX + 1]; struct tcf_chain *goto_ch = NULL; @@ -203,14 +202,14 @@ static int tcf_simp_walker(struct net *net, struct sk_buff *skb, const struct tc_action_ops *ops, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, simp_net_id); + struct tc_action_net *tn = net_generic(net, act_simp_ops.net_id); return tcf_generic_walker(tn, skb, cb, type, ops, extack); } static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index) { - struct tc_action_net *tn = net_generic(net, simp_net_id); + struct tc_action_net *tn = net_generic(net, act_simp_ops.net_id); return tcf_idr_search(tn, a, index); } @@ -230,20 +229,20 @@ static struct tc_action_ops act_simp_ops = { static __net_init int simp_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, simp_net_id); + struct tc_action_net *tn = net_generic(net, act_simp_ops.net_id); return tc_action_net_init(net, tn, &act_simp_ops); } static void __net_exit simp_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, simp_net_id); + tc_action_net_exit(net_list, act_simp_ops.net_id); } static struct pernet_operations simp_net_ops = { .init = simp_init_net, .exit_batch = simp_exit_net, - .id = &simp_net_id, + .id = &act_simp_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index e3bd11dfe1ca..72729ed7219a 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -20,7 +20,6 @@ #include #include -static unsigned int skbedit_net_id; static struct tc_action_ops act_skbedit_ops; static u16 tcf_skbedit_hash(struct tcf_skbedit_params *params, @@ -118,7 +117,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 act_flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, skbedit_net_id); + struct tc_action_net *tn = net_generic(net, act_skbedit_ops.net_id); bool bind = act_flags & TCA_ACT_FLAGS_BIND; struct tcf_skbedit_params *params_new; struct nlattr *tb[TCA_SKBEDIT_MAX + 1]; @@ -352,14 +351,14 @@ static int tcf_skbedit_walker(struct net *net, struct sk_buff *skb, const struct tc_action_ops *ops, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, skbedit_net_id); + struct tc_action_net *tn = net_generic(net, act_skbedit_ops.net_id); return tcf_generic_walker(tn, skb, cb, type, ops, extack); } static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index) { - struct tc_action_net *tn = net_generic(net, skbedit_net_id); + struct tc_action_net *tn = net_generic(net, act_skbedit_ops.net_id); return tcf_idr_search(tn, a, index); } @@ -437,20 +436,20 @@ static struct tc_action_ops act_skbedit_ops = { static __net_init int skbedit_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, skbedit_net_id); + struct tc_action_net *tn = net_generic(net, act_skbedit_ops.net_id); return tc_action_net_init(net, tn, &act_skbedit_ops); } static void __net_exit skbedit_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, skbedit_net_id); + tc_action_net_exit(net_list, act_skbedit_ops.net_id); } static struct pernet_operations skbedit_net_ops = { .init = skbedit_init_net, .exit_batch = skbedit_exit_net, - .id = &skbedit_net_id, + .id = &act_skbedit_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index 2083612d8780..999adceb686a 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -19,7 +19,6 @@ #include #include -static unsigned int skbmod_net_id; static struct tc_action_ops act_skbmod_ops; static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a, @@ -103,7 +102,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, skbmod_net_id); + struct tc_action_net *tn = net_generic(net, act_skbmod_ops.net_id); bool ovr = flags & TCA_ACT_FLAGS_REPLACE; bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_SKBMOD_MAX + 1]; @@ -281,14 +280,14 @@ static int tcf_skbmod_walker(struct net *net, struct sk_buff *skb, const struct tc_action_ops *ops, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, skbmod_net_id); + struct tc_action_net *tn = net_generic(net, act_skbmod_ops.net_id); return tcf_generic_walker(tn, skb, cb, type, ops, extack); } static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index) { - struct tc_action_net *tn = net_generic(net, skbmod_net_id); + struct tc_action_net *tn = net_generic(net, act_skbmod_ops.net_id); return tcf_idr_search(tn, a, index); } @@ -308,20 +307,20 @@ static struct tc_action_ops act_skbmod_ops = { static __net_init int skbmod_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, skbmod_net_id); + struct tc_action_net *tn = net_generic(net, act_skbmod_ops.net_id); return tc_action_net_init(net, tn, &act_skbmod_ops); } static void __net_exit skbmod_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, skbmod_net_id); + tc_action_net_exit(net_list, act_skbmod_ops.net_id); } static struct pernet_operations skbmod_net_ops = { .init = skbmod_init_net, .exit_batch = skbmod_exit_net, - .id = &skbmod_net_id, + .id = &act_skbmod_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 856dc23cef8c..2db0c929fa09 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -20,7 +20,6 @@ #include #include -static unsigned int tunnel_key_net_id; static struct tc_action_ops act_tunnel_key_ops; static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a, @@ -358,7 +357,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 act_flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + struct tc_action_net *tn = net_generic(net, act_tunnel_key_ops.net_id); bool bind = act_flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1]; struct tcf_tunnel_key_params *params_new; @@ -775,14 +774,14 @@ static int tunnel_key_walker(struct net *net, struct sk_buff *skb, const struct tc_action_ops *ops, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + struct tc_action_net *tn = net_generic(net, act_tunnel_key_ops.net_id); return tcf_generic_walker(tn, skb, cb, type, ops, extack); } static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index) { - struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + struct tc_action_net *tn = net_generic(net, act_tunnel_key_ops.net_id); return tcf_idr_search(tn, a, index); } @@ -858,20 +857,20 @@ static struct tc_action_ops act_tunnel_key_ops = { static __net_init int tunnel_key_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + struct tc_action_net *tn = net_generic(net, act_tunnel_key_ops.net_id); return tc_action_net_init(net, tn, &act_tunnel_key_ops); } static void __net_exit tunnel_key_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, tunnel_key_net_id); + tc_action_net_exit(net_list, act_tunnel_key_ops.net_id); } static struct pernet_operations tunnel_key_net_ops = { .init = tunnel_key_init_net, .exit_batch = tunnel_key_exit_net, - .id = &tunnel_key_net_id, + .id = &act_tunnel_key_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 68b5e772386a..a1a0c2c6a5cc 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -16,7 +16,6 @@ #include #include -static unsigned int vlan_net_id; static struct tc_action_ops act_vlan_ops; static int tcf_vlan_act(struct sk_buff *skb, const struct tc_action *a, @@ -117,7 +116,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, vlan_net_id); + struct tc_action_net *tn = net_generic(net, act_vlan_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_VLAN_MAX + 1]; struct tcf_chain *goto_ch = NULL; @@ -338,7 +337,7 @@ static int tcf_vlan_walker(struct net *net, struct sk_buff *skb, const struct tc_action_ops *ops, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, vlan_net_id); + struct tc_action_net *tn = net_generic(net, act_vlan_ops.net_id); return tcf_generic_walker(tn, skb, cb, type, ops, extack); } @@ -355,7 +354,7 @@ static void tcf_vlan_stats_update(struct tc_action *a, u64 bytes, u64 packets, static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index) { - struct tc_action_net *tn = net_generic(net, vlan_net_id); + struct tc_action_net *tn = net_generic(net, act_vlan_ops.net_id); return tcf_idr_search(tn, a, index); } @@ -448,20 +447,20 @@ static struct tc_action_ops act_vlan_ops = { static __net_init int vlan_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, vlan_net_id); + struct tc_action_net *tn = net_generic(net, act_vlan_ops.net_id); return tc_action_net_init(net, tn, &act_vlan_ops); } static void __net_exit vlan_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, vlan_net_id); + tc_action_net_exit(net_list, act_vlan_ops.net_id); } static struct pernet_operations vlan_net_ops = { .init = vlan_init_net, .exit_batch = vlan_exit_net, - .id = &vlan_net_id, + .id = &act_vlan_ops.net_id, .size = sizeof(struct tc_action_net), }; -- cgit v1.2.3-70-g09d2 From 864b656f82ccd433d3e38149c3673d295ad64bf6 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Wed, 7 Sep 2022 10:40:40 -0600 Subject: bpf: Add support for writing to nf_conn:mark Support direct writes to nf_conn:mark from TC and XDP prog types. This is useful when applications want to store per-connection metadata. This is also particularly useful for applications that run both bpf and iptables/nftables because the latter can trivially access this metadata. One example use case would be if a bpf prog is responsible for advanced packet classification and iptables/nftables is later used for routing due to pre-existing/legacy code. Signed-off-by: Daniel Xu Link: https://lore.kernel.org/r/ebca06dea366e3e7e861c12f375a548cc4c61108.1662568410.git.dxu@dxuuu.xyz Signed-off-by: Alexei Starovoitov --- include/net/netfilter/nf_conntrack_bpf.h | 23 +++++++++++ net/core/filter.c | 54 ++++++++++++++++++++++++++ net/netfilter/nf_conntrack_bpf.c | 66 +++++++++++++++++++++++++++++++- net/netfilter/nf_conntrack_core.c | 1 + 4 files changed, 143 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_bpf.h b/include/net/netfilter/nf_conntrack_bpf.h index a473b56842c5..a61a93d1c6dc 100644 --- a/include/net/netfilter/nf_conntrack_bpf.h +++ b/include/net/netfilter/nf_conntrack_bpf.h @@ -3,13 +3,22 @@ #ifndef _NF_CONNTRACK_BPF_H #define _NF_CONNTRACK_BPF_H +#include #include #include +#include #if (IS_BUILTIN(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \ (IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) extern int register_nf_conntrack_bpf(void); +extern void cleanup_nf_conntrack_bpf(void); + +extern struct mutex nf_conn_btf_access_lock; +extern int (*nfct_bsa)(struct bpf_verifier_log *log, const struct btf *btf, + const struct btf_type *t, int off, int size, + enum bpf_access_type atype, u32 *next_btf_id, + enum bpf_type_flag *flag); #else @@ -18,6 +27,20 @@ static inline int register_nf_conntrack_bpf(void) return 0; } +static inline void cleanup_nf_conntrack_bpf(void) +{ +} + +static inline int nf_conntrack_btf_struct_access(struct bpf_verifier_log *log, + const struct btf *btf, + const struct btf_type *t, int off, + int size, enum bpf_access_type atype, + u32 *next_btf_id, + enum bpf_type_flag *flag) +{ + return -EACCES; +} + #endif #endif /* _NF_CONNTRACK_BPF_H */ diff --git a/net/core/filter.c b/net/core/filter.c index e872f45399b0..4b2be211bcbe 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -18,6 +18,7 @@ */ #include +#include #include #include #include @@ -8604,6 +8605,36 @@ static bool tc_cls_act_is_valid_access(int off, int size, return bpf_skb_is_valid_access(off, size, type, prog, info); } +DEFINE_MUTEX(nf_conn_btf_access_lock); +EXPORT_SYMBOL_GPL(nf_conn_btf_access_lock); + +int (*nfct_bsa)(struct bpf_verifier_log *log, const struct btf *btf, + const struct btf_type *t, int off, int size, + enum bpf_access_type atype, u32 *next_btf_id, + enum bpf_type_flag *flag); +EXPORT_SYMBOL_GPL(nfct_bsa); + +static int tc_cls_act_btf_struct_access(struct bpf_verifier_log *log, + const struct btf *btf, + const struct btf_type *t, int off, + int size, enum bpf_access_type atype, + u32 *next_btf_id, + enum bpf_type_flag *flag) +{ + int ret = -EACCES; + + if (atype == BPF_READ) + return btf_struct_access(log, btf, t, off, size, atype, next_btf_id, + flag); + + mutex_lock(&nf_conn_btf_access_lock); + if (nfct_bsa) + ret = nfct_bsa(log, btf, t, off, size, atype, next_btf_id, flag); + mutex_unlock(&nf_conn_btf_access_lock); + + return ret; +} + static bool __is_valid_xdp_access(int off, int size) { if (off < 0 || off >= sizeof(struct xdp_md)) @@ -8663,6 +8694,27 @@ void bpf_warn_invalid_xdp_action(struct net_device *dev, struct bpf_prog *prog, } EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); +static int xdp_btf_struct_access(struct bpf_verifier_log *log, + const struct btf *btf, + const struct btf_type *t, int off, + int size, enum bpf_access_type atype, + u32 *next_btf_id, + enum bpf_type_flag *flag) +{ + int ret = -EACCES; + + if (atype == BPF_READ) + return btf_struct_access(log, btf, t, off, size, atype, next_btf_id, + flag); + + mutex_lock(&nf_conn_btf_access_lock); + if (nfct_bsa) + ret = nfct_bsa(log, btf, t, off, size, atype, next_btf_id, flag); + mutex_unlock(&nf_conn_btf_access_lock); + + return ret; +} + static bool sock_addr_is_valid_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, @@ -10557,6 +10609,7 @@ const struct bpf_verifier_ops tc_cls_act_verifier_ops = { .convert_ctx_access = tc_cls_act_convert_ctx_access, .gen_prologue = tc_cls_act_prologue, .gen_ld_abs = bpf_gen_ld_abs, + .btf_struct_access = tc_cls_act_btf_struct_access, }; const struct bpf_prog_ops tc_cls_act_prog_ops = { @@ -10568,6 +10621,7 @@ const struct bpf_verifier_ops xdp_verifier_ops = { .is_valid_access = xdp_is_valid_access, .convert_ctx_access = xdp_convert_ctx_access, .gen_prologue = bpf_noop_prologue, + .btf_struct_access = xdp_btf_struct_access, }; const struct bpf_prog_ops xdp_prog_ops = { diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c index 1cd87b28c9b0..77eb8e959f61 100644 --- a/net/netfilter/nf_conntrack_bpf.c +++ b/net/netfilter/nf_conntrack_bpf.c @@ -6,8 +6,10 @@ * are exposed through to BPF programs is explicitly unstable. */ +#include #include #include +#include #include #include #include @@ -184,6 +186,54 @@ static struct nf_conn *__bpf_nf_ct_lookup(struct net *net, return ct; } +BTF_ID_LIST(btf_nf_conn_ids) +BTF_ID(struct, nf_conn) +BTF_ID(struct, nf_conn___init) + +/* Check writes into `struct nf_conn` */ +static int _nf_conntrack_btf_struct_access(struct bpf_verifier_log *log, + const struct btf *btf, + const struct btf_type *t, int off, + int size, enum bpf_access_type atype, + u32 *next_btf_id, + enum bpf_type_flag *flag) +{ + const struct btf_type *ncit; + const struct btf_type *nct; + size_t end; + + ncit = btf_type_by_id(btf, btf_nf_conn_ids[1]); + nct = btf_type_by_id(btf, btf_nf_conn_ids[0]); + + if (t != nct && t != ncit) { + bpf_log(log, "only read is supported\n"); + return -EACCES; + } + + /* `struct nf_conn` and `struct nf_conn___init` have the same layout + * so we are safe to simply merge offset checks here + */ + switch (off) { +#if defined(CONFIG_NF_CONNTRACK_MARK) + case offsetof(struct nf_conn, mark): + end = offsetofend(struct nf_conn, mark); + break; +#endif + default: + bpf_log(log, "no write support to nf_conn at off %d\n", off); + return -EACCES; + } + + if (off + size > end) { + bpf_log(log, + "write access at off %d with size %d beyond the member of nf_conn ended at %zu\n", + off, size, end); + return -EACCES; + } + + return 0; +} + __diag_push(); __diag_ignore_all("-Wmissing-prototypes", "Global functions as their definitions will be in nf_conntrack BTF"); @@ -449,5 +499,19 @@ int register_nf_conntrack_bpf(void) int ret; ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_kfunc_set); - return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_kfunc_set); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_kfunc_set); + if (!ret) { + mutex_lock(&nf_conn_btf_access_lock); + nfct_bsa = _nf_conntrack_btf_struct_access; + mutex_unlock(&nf_conn_btf_access_lock); + } + + return ret; +} + +void cleanup_nf_conntrack_bpf(void) +{ + mutex_lock(&nf_conn_btf_access_lock); + nfct_bsa = NULL; + mutex_unlock(&nf_conn_btf_access_lock); } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index da65c6e8eeeb..0195f60fc43b 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2512,6 +2512,7 @@ static int kill_all(struct nf_conn *i, void *data) void nf_conntrack_cleanup_start(void) { + cleanup_nf_conntrack_bpf(); conntrack_gc_work.exiting = true; } -- cgit v1.2.3-70-g09d2 From dda2fa08a13c688bed320ef2e4ba541abb4d6c17 Mon Sep 17 00:00:00 2001 From: Wojciech Drewek Date: Thu, 8 Sep 2022 10:16:41 -0700 Subject: flow_dissector: Add L2TPv3 dissectors Allow to dissect L2TPv3 specific field which is: - session ID (32 bits) L2TPv3 might be transported over IP or over UDP, this implementation is only about L2TPv3 over IP. IP protocol carries L2TPv3 when ip_proto is IPPROTO_L2TP (115). Acked-by: Guillaume Nault Signed-off-by: Wojciech Drewek Signed-off-by: Tony Nguyen Signed-off-by: Paolo Abeni --- include/net/flow_dissector.h | 9 +++++++++ net/core/flow_dissector.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) (limited to 'include/net') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 6c74812d64b2..5ccf52ef8809 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -289,6 +289,14 @@ struct flow_dissector_key_pppoe { __be16 type; }; +/** + * struct flow_dissector_key_l2tpv3: + * @session_id: identifier for a l2tp session + */ +struct flow_dissector_key_l2tpv3 { + __be32 session_id; +}; + enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */ FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */ @@ -320,6 +328,7 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_HASH, /* struct flow_dissector_key_hash */ FLOW_DISSECTOR_KEY_NUM_OF_VLANS, /* struct flow_dissector_key_num_of_vlans */ FLOW_DISSECTOR_KEY_PPPOE, /* struct flow_dissector_key_pppoe */ + FLOW_DISSECTOR_KEY_L2TPV3, /* struct flow_dissector_key_l2tpv3 */ FLOW_DISSECTOR_KEY_MAX, }; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 990429c69ccd..2a1f513a2dc8 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -204,6 +204,30 @@ static void __skb_flow_dissect_icmp(const struct sk_buff *skb, skb_flow_get_icmp_tci(skb, key_icmp, data, thoff, hlen); } +static void __skb_flow_dissect_l2tpv3(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container, const void *data, + int nhoff, int hlen) +{ + struct flow_dissector_key_l2tpv3 *key_l2tpv3; + struct { + __be32 session_id; + } *hdr, _hdr; + + if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_L2TPV3)) + return; + + hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); + if (!hdr) + return; + + key_l2tpv3 = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_L2TPV3, + target_container); + + key_l2tpv3->session_id = hdr->session_id; +} + void skb_flow_dissect_meta(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container) @@ -1501,6 +1525,10 @@ ip_proto_again: __skb_flow_dissect_icmp(skb, flow_dissector, target_container, data, nhoff, hlen); break; + case IPPROTO_L2TP: + __skb_flow_dissect_l2tpv3(skb, flow_dissector, target_container, + data, nhoff, hlen); + break; default: break; -- cgit v1.2.3-70-g09d2 From 2c1befaced504a125d1ab7479684a9208879d350 Mon Sep 17 00:00:00 2001 From: Wojciech Drewek Date: Thu, 8 Sep 2022 10:16:43 -0700 Subject: flow_offload: Introduce flow_match_l2tpv3 Allow to offload L2TPv3 filters by adding flow_rule_match_l2tpv3. Drivers can extract L2TPv3 specific fields from now on. Signed-off-by: Wojciech Drewek Signed-off-by: Tony Nguyen Signed-off-by: Paolo Abeni --- include/net/flow_offload.h | 6 ++++++ net/core/flow_offload.c | 7 +++++++ 2 files changed, 13 insertions(+) (limited to 'include/net') diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 2a9a9e42e7fd..e343f9f8363e 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -80,6 +80,10 @@ struct flow_match_pppoe { struct flow_dissector_key_pppoe *key, *mask; }; +struct flow_match_l2tpv3 { + struct flow_dissector_key_l2tpv3 *key, *mask; +}; + struct flow_rule; void flow_rule_match_meta(const struct flow_rule *rule, @@ -128,6 +132,8 @@ void flow_rule_match_ct(const struct flow_rule *rule, struct flow_match_ct *out); void flow_rule_match_pppoe(const struct flow_rule *rule, struct flow_match_pppoe *out); +void flow_rule_match_l2tpv3(const struct flow_rule *rule, + struct flow_match_l2tpv3 *out); enum flow_action_id { FLOW_ACTION_ACCEPT = 0, diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index 8cfb63528d18..abe423fd5736 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -237,6 +237,13 @@ void flow_rule_match_pppoe(const struct flow_rule *rule, } EXPORT_SYMBOL(flow_rule_match_pppoe); +void flow_rule_match_l2tpv3(const struct flow_rule *rule, + struct flow_match_l2tpv3 *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_L2TPV3, out); +} +EXPORT_SYMBOL(flow_rule_match_l2tpv3); + struct flow_block_cb *flow_block_cb_alloc(flow_setup_cb_t *cb, void *cb_ident, void *cb_priv, void (*release)(void *cb_priv)) -- cgit v1.2.3-70-g09d2 From 8f6a19c0316deb48cdfdc5335de9a6d7db5b7b62 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sun, 11 Sep 2022 04:06:58 +0300 Subject: net: dsa: introduce dsa_port_get_master() There is a desire to support for DSA masters in a LAG. That configuration is intended to work by simply enslaving the master to a bonding/team device. But the physical DSA master (the LAG slave) still has a dev->dsa_ptr, and that cpu_dp still corresponds to the physical CPU port. However, we would like to be able to retrieve the LAG that's the upper of the physical DSA master. In preparation for that, introduce a helper called dsa_port_get_master() that replaces all occurrences of the dp->cpu_dp->master pattern. The distinction between LAG and non-LAG will be made later within the helper itself. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Paolo Abeni --- drivers/net/dsa/bcm_sf2.c | 4 ++-- drivers/net/dsa/bcm_sf2_cfp.c | 4 ++-- drivers/net/dsa/lan9303-core.c | 4 ++-- drivers/net/ethernet/mediatek/mtk_ppe_offload.c | 2 +- include/net/dsa.h | 5 +++++ net/dsa/dsa2.c | 8 +++---- net/dsa/dsa_priv.h | 2 +- net/dsa/port.c | 28 ++++++++++++------------- net/dsa/slave.c | 11 +++++----- net/dsa/tag_8021q.c | 4 ++-- 10 files changed, 38 insertions(+), 34 deletions(-) (limited to 'include/net') diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 572f7450b527..6507663f35e5 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -983,7 +983,7 @@ static int bcm_sf2_sw_resume(struct dsa_switch *ds) static void bcm_sf2_sw_get_wol(struct dsa_switch *ds, int port, struct ethtool_wolinfo *wol) { - struct net_device *p = dsa_to_port(ds, port)->cpu_dp->master; + struct net_device *p = dsa_port_to_master(dsa_to_port(ds, port)); struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); struct ethtool_wolinfo pwol = { }; @@ -1007,7 +1007,7 @@ static void bcm_sf2_sw_get_wol(struct dsa_switch *ds, int port, static int bcm_sf2_sw_set_wol(struct dsa_switch *ds, int port, struct ethtool_wolinfo *wol) { - struct net_device *p = dsa_to_port(ds, port)->cpu_dp->master; + struct net_device *p = dsa_port_to_master(dsa_to_port(ds, port)); struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); s8 cpu_port = dsa_to_port(ds, port)->cpu_dp->index; struct ethtool_wolinfo pwol = { }; diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c index 22bc295bebdb..c4010b7bf089 100644 --- a/drivers/net/dsa/bcm_sf2_cfp.c +++ b/drivers/net/dsa/bcm_sf2_cfp.c @@ -1102,7 +1102,7 @@ static int bcm_sf2_cfp_rule_get_all(struct bcm_sf2_priv *priv, int bcm_sf2_get_rxnfc(struct dsa_switch *ds, int port, struct ethtool_rxnfc *nfc, u32 *rule_locs) { - struct net_device *p = dsa_to_port(ds, port)->cpu_dp->master; + struct net_device *p = dsa_port_to_master(dsa_to_port(ds, port)); struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); int ret = 0; @@ -1145,7 +1145,7 @@ int bcm_sf2_get_rxnfc(struct dsa_switch *ds, int port, int bcm_sf2_set_rxnfc(struct dsa_switch *ds, int port, struct ethtool_rxnfc *nfc) { - struct net_device *p = dsa_to_port(ds, port)->cpu_dp->master; + struct net_device *p = dsa_port_to_master(dsa_to_port(ds, port)); struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); int ret = 0; diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c index 9e04541c3144..438e46af03e9 100644 --- a/drivers/net/dsa/lan9303-core.c +++ b/drivers/net/dsa/lan9303-core.c @@ -1092,7 +1092,7 @@ static int lan9303_port_enable(struct dsa_switch *ds, int port, if (!dsa_port_is_user(dp)) return 0; - vlan_vid_add(dp->cpu_dp->master, htons(ETH_P_8021Q), port); + vlan_vid_add(dsa_port_to_master(dp), htons(ETH_P_8021Q), port); return lan9303_enable_processing_port(chip, port); } @@ -1105,7 +1105,7 @@ static void lan9303_port_disable(struct dsa_switch *ds, int port) if (!dsa_port_is_user(dp)) return; - vlan_vid_del(dp->cpu_dp->master, htons(ETH_P_8021Q), port); + vlan_vid_del(dsa_port_to_master(dp), htons(ETH_P_8021Q), port); lan9303_disable_processing_port(chip, port); lan9303_phy_write(ds, chip->phy_addr_base + port, MII_BMCR, BMCR_PDOWN); diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c index 25dc3c3aa31d..5a1fc4bcd7a5 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c @@ -173,7 +173,7 @@ mtk_flow_get_dsa_port(struct net_device **dev) if (dp->cpu_dp->tag_ops->proto != DSA_TAG_PROTO_MTK) return -ENODEV; - *dev = dp->cpu_dp->master; + *dev = dsa_port_to_master(dp); return dp->index; #else diff --git a/include/net/dsa.h b/include/net/dsa.h index f2ce12860546..23eac1bda843 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -718,6 +718,11 @@ static inline bool dsa_port_offloads_lag(struct dsa_port *dp, return dsa_port_lag_dev_get(dp) == lag->dev; } +static inline struct net_device *dsa_port_to_master(const struct dsa_port *dp) +{ + return dp->cpu_dp->master; +} + static inline struct net_device *dsa_port_to_bridge_port(const struct dsa_port *dp) { diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index ed56c7a554b8..f1f96e2e56aa 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -1263,11 +1263,11 @@ int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst, * attempts to change the tagging protocol. If we ever lift the IFF_UP * restriction, there needs to be another mutex which serializes this. */ - list_for_each_entry(dp, &dst->ports, list) { - if (dsa_port_is_cpu(dp) && (dp->master->flags & IFF_UP)) + dsa_tree_for_each_user_port(dp, dst) { + if (dsa_port_to_master(dp)->flags & IFF_UP) goto out_unlock; - if (dsa_port_is_user(dp) && (dp->slave->flags & IFF_UP)) + if (dp->slave->flags & IFF_UP) goto out_unlock; } @@ -1797,7 +1797,7 @@ void dsa_switch_shutdown(struct dsa_switch *ds) rtnl_lock(); dsa_switch_for_each_user_port(dp, ds) { - master = dp->cpu_dp->master; + master = dsa_port_to_master(dp); slave_dev = dp->slave; netdev_upper_dev_unlink(master, slave_dev); diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 614fbba8fe39..c48c5c8ba790 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -322,7 +322,7 @@ dsa_slave_to_master(const struct net_device *dev) { struct dsa_port *dp = dsa_slave_to_port(dev); - return dp->cpu_dp->master; + return dsa_port_to_master(dp); } /* If under a bridge with vlan_filtering=0, make sure to send pvid-tagged diff --git a/net/dsa/port.c b/net/dsa/port.c index 7afc35db0c29..4183e60db4f9 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -1026,7 +1026,7 @@ int dsa_port_standalone_host_fdb_add(struct dsa_port *dp, int dsa_port_bridge_host_fdb_add(struct dsa_port *dp, const unsigned char *addr, u16 vid) { - struct dsa_port *cpu_dp = dp->cpu_dp; + struct net_device *master = dsa_port_to_master(dp); struct dsa_db db = { .type = DSA_DB_BRIDGE, .bridge = *dp->bridge, @@ -1037,8 +1037,8 @@ int dsa_port_bridge_host_fdb_add(struct dsa_port *dp, * requires rtnl_lock(), since we can't guarantee that is held here, * and we can't take it either. */ - if (cpu_dp->master->priv_flags & IFF_UNICAST_FLT) { - err = dev_uc_add(cpu_dp->master, addr); + if (master->priv_flags & IFF_UNICAST_FLT) { + err = dev_uc_add(master, addr); if (err) return err; } @@ -1077,15 +1077,15 @@ int dsa_port_standalone_host_fdb_del(struct dsa_port *dp, int dsa_port_bridge_host_fdb_del(struct dsa_port *dp, const unsigned char *addr, u16 vid) { - struct dsa_port *cpu_dp = dp->cpu_dp; + struct net_device *master = dsa_port_to_master(dp); struct dsa_db db = { .type = DSA_DB_BRIDGE, .bridge = *dp->bridge, }; int err; - if (cpu_dp->master->priv_flags & IFF_UNICAST_FLT) { - err = dev_uc_del(cpu_dp->master, addr); + if (master->priv_flags & IFF_UNICAST_FLT) { + err = dev_uc_del(master, addr); if (err) return err; } @@ -1208,14 +1208,14 @@ int dsa_port_standalone_host_mdb_add(const struct dsa_port *dp, int dsa_port_bridge_host_mdb_add(const struct dsa_port *dp, const struct switchdev_obj_port_mdb *mdb) { - struct dsa_port *cpu_dp = dp->cpu_dp; + struct net_device *master = dsa_port_to_master(dp); struct dsa_db db = { .type = DSA_DB_BRIDGE, .bridge = *dp->bridge, }; int err; - err = dev_mc_add(cpu_dp->master, mdb->addr); + err = dev_mc_add(master, mdb->addr); if (err) return err; @@ -1252,14 +1252,14 @@ int dsa_port_standalone_host_mdb_del(const struct dsa_port *dp, int dsa_port_bridge_host_mdb_del(const struct dsa_port *dp, const struct switchdev_obj_port_mdb *mdb) { - struct dsa_port *cpu_dp = dp->cpu_dp; + struct net_device *master = dsa_port_to_master(dp); struct dsa_db db = { .type = DSA_DB_BRIDGE, .bridge = *dp->bridge, }; int err; - err = dev_mc_del(cpu_dp->master, mdb->addr); + err = dev_mc_del(master, mdb->addr); if (err) return err; @@ -1294,19 +1294,19 @@ int dsa_port_host_vlan_add(struct dsa_port *dp, const struct switchdev_obj_port_vlan *vlan, struct netlink_ext_ack *extack) { + struct net_device *master = dsa_port_to_master(dp); struct dsa_notifier_vlan_info info = { .dp = dp, .vlan = vlan, .extack = extack, }; - struct dsa_port *cpu_dp = dp->cpu_dp; int err; err = dsa_port_notify(dp, DSA_NOTIFIER_HOST_VLAN_ADD, &info); if (err && err != -EOPNOTSUPP) return err; - vlan_vid_add(cpu_dp->master, htons(ETH_P_8021Q), vlan->vid); + vlan_vid_add(master, htons(ETH_P_8021Q), vlan->vid); return err; } @@ -1314,18 +1314,18 @@ int dsa_port_host_vlan_add(struct dsa_port *dp, int dsa_port_host_vlan_del(struct dsa_port *dp, const struct switchdev_obj_port_vlan *vlan) { + struct net_device *master = dsa_port_to_master(dp); struct dsa_notifier_vlan_info info = { .dp = dp, .vlan = vlan, }; - struct dsa_port *cpu_dp = dp->cpu_dp; int err; err = dsa_port_notify(dp, DSA_NOTIFIER_HOST_VLAN_DEL, &info); if (err && err != -EOPNOTSUPP) return err; - vlan_vid_del(cpu_dp->master, htons(ETH_P_8021Q), vlan->vid); + vlan_vid_del(master, htons(ETH_P_8021Q), vlan->vid); return err; } diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 345106b1ed78..55094b94a5ae 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1503,8 +1503,7 @@ static int dsa_slave_setup_tc_block(struct net_device *dev, static int dsa_slave_setup_ft_block(struct dsa_switch *ds, int port, void *type_data) { - struct dsa_port *cpu_dp = dsa_to_port(ds, port)->cpu_dp; - struct net_device *master = cpu_dp->master; + struct net_device *master = dsa_port_to_master(dsa_to_port(ds, port)); if (!master->netdev_ops->ndo_setup_tc) return -EOPNOTSUPP; @@ -2147,13 +2146,14 @@ static int dsa_slave_fill_forward_path(struct net_device_path_ctx *ctx, struct net_device_path *path) { struct dsa_port *dp = dsa_slave_to_port(ctx->dev); + struct net_device *master = dsa_port_to_master(dp); struct dsa_port *cpu_dp = dp->cpu_dp; path->dev = ctx->dev; path->type = DEV_PATH_DSA; path->dsa.proto = cpu_dp->tag_ops->proto; path->dsa.port = dp->index; - ctx->dev = cpu_dp->master; + ctx->dev = master; return 0; } @@ -2271,9 +2271,9 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev) void dsa_slave_setup_tagger(struct net_device *slave) { struct dsa_port *dp = dsa_slave_to_port(slave); + struct net_device *master = dsa_port_to_master(dp); struct dsa_slave_priv *p = netdev_priv(slave); const struct dsa_port *cpu_dp = dp->cpu_dp; - struct net_device *master = cpu_dp->master; const struct dsa_switch *ds = dp->ds; slave->needed_headroom = cpu_dp->tag_ops->needed_headroom; @@ -2330,8 +2330,7 @@ int dsa_slave_resume(struct net_device *slave_dev) int dsa_slave_create(struct dsa_port *port) { - const struct dsa_port *cpu_dp = port->cpu_dp; - struct net_device *master = cpu_dp->master; + struct net_device *master = dsa_port_to_master(port); struct dsa_switch *ds = port->ds; const char *name = port->name; struct net_device *slave_dev; diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index b5f80bc45ceb..34e5ec5d3e23 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -330,7 +330,7 @@ static int dsa_tag_8021q_port_setup(struct dsa_switch *ds, int port) if (!dsa_port_is_user(dp)) return 0; - master = dp->cpu_dp->master; + master = dsa_port_to_master(dp); err = dsa_port_tag_8021q_vlan_add(dp, vid, false); if (err) { @@ -359,7 +359,7 @@ static void dsa_tag_8021q_port_teardown(struct dsa_switch *ds, int port) if (!dsa_port_is_user(dp)) return; - master = dp->cpu_dp->master; + master = dsa_port_to_master(dp); dsa_port_tag_8021q_vlan_del(dp, vid, false); -- cgit v1.2.3-70-g09d2 From 95f510d0b792f308d3d748242fe960c35bdc2c62 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sun, 11 Sep 2022 04:06:59 +0300 Subject: net: dsa: allow the DSA master to be seen and changed through rtnetlink Some DSA switches have multiple CPU ports, which can be used to improve CPU termination throughput, but DSA, through dsa_tree_setup_cpu_ports(), sets up only the first one, leading to suboptimal use of hardware. The desire is to not change the default configuration but to permit the user to create a dynamic mapping between individual user ports and the CPU port that they are served by, configurable through rtnetlink. It is also intended to permit load balancing between CPU ports, and in that case, the foreseen model is for the DSA master to be a bonding interface whose lowers are the physical DSA masters. To that end, we create a struct rtnl_link_ops for DSA user ports with the "dsa" kind. We expose the IFLA_DSA_MASTER link attribute that contains the ifindex of the newly desired DSA master. Signed-off-by: Vladimir Oltean Signed-off-by: Paolo Abeni --- include/net/dsa.h | 8 +++ include/uapi/linux/if_link.h | 10 ++++ net/dsa/Makefile | 10 +++- net/dsa/dsa.c | 9 +++ net/dsa/dsa2.c | 14 +++++ net/dsa/dsa_priv.h | 10 ++++ net/dsa/netlink.c | 62 +++++++++++++++++++++ net/dsa/port.c | 130 +++++++++++++++++++++++++++++++++++++++++++ net/dsa/slave.c | 120 +++++++++++++++++++++++++++++++++++++++ 9 files changed, 372 insertions(+), 1 deletion(-) create mode 100644 net/dsa/netlink.c (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 23eac1bda843..3f717c3fcba0 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -559,6 +559,10 @@ static inline bool dsa_is_user_port(struct dsa_switch *ds, int p) list_for_each_entry((_dp), &(_dst)->ports, list) \ if (dsa_port_is_user((_dp))) +#define dsa_tree_for_each_user_port_continue_reverse(_dp, _dst) \ + list_for_each_entry_continue_reverse((_dp), &(_dst)->ports, list) \ + if (dsa_port_is_user((_dp))) + #define dsa_tree_for_each_cpu_port(_dp, _dst) \ list_for_each_entry((_dp), &(_dst)->ports, list) \ if (dsa_port_is_cpu((_dp))) @@ -830,6 +834,10 @@ struct dsa_switch_ops { int (*connect_tag_protocol)(struct dsa_switch *ds, enum dsa_tag_protocol proto); + int (*port_change_master)(struct dsa_switch *ds, int port, + struct net_device *master, + struct netlink_ext_ack *extack); + /* Optional switch-wide initialization and destruction methods */ int (*setup)(struct dsa_switch *ds); void (*teardown)(struct dsa_switch *ds); diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 0bfa9a99ebb6..3d39fb398d65 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1375,4 +1375,14 @@ enum { #define IFLA_MCTP_MAX (__IFLA_MCTP_MAX - 1) +/* DSA section */ + +enum { + IFLA_DSA_UNSPEC, + IFLA_DSA_MASTER, + __IFLA_DSA_MAX, +}; + +#define IFLA_DSA_MAX (__IFLA_DSA_MAX - 1) + #endif /* _UAPI_LINUX_IF_LINK_H */ diff --git a/net/dsa/Makefile b/net/dsa/Makefile index af28c24ead18..bf57ef3bce2a 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -1,7 +1,15 @@ # SPDX-License-Identifier: GPL-2.0 # the core obj-$(CONFIG_NET_DSA) += dsa_core.o -dsa_core-y += dsa.o dsa2.o master.o port.o slave.o switch.o tag_8021q.o +dsa_core-y += \ + dsa.o \ + dsa2.o \ + master.o \ + netlink.o \ + port.o \ + slave.o \ + switch.o \ + tag_8021q.o # tagging formats obj-$(CONFIG_NET_DSA_TAG_AR9331) += tag_ar9331.o diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index be7b320cda76..64b14f655b23 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -536,8 +536,16 @@ static int __init dsa_init_module(void) dsa_tag_driver_register(&DSA_TAG_DRIVER_NAME(none_ops), THIS_MODULE); + rc = rtnl_link_register(&dsa_link_ops); + if (rc) + goto netlink_register_fail; + return 0; +netlink_register_fail: + dsa_tag_driver_unregister(&DSA_TAG_DRIVER_NAME(none_ops)); + dsa_slave_unregister_notifier(); + dev_remove_pack(&dsa_pack_type); register_notifier_fail: destroy_workqueue(dsa_owq); @@ -547,6 +555,7 @@ module_init(dsa_init_module); static void __exit dsa_cleanup_module(void) { + rtnl_link_unregister(&dsa_link_ops); dsa_tag_driver_unregister(&DSA_TAG_DRIVER_NAME(none_ops)); dsa_slave_unregister_notifier(); diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index f1f96e2e56aa..42422ddea59b 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -387,6 +387,20 @@ static struct dsa_port *dsa_tree_find_first_cpu(struct dsa_switch_tree *dst) return NULL; } +struct net_device *dsa_tree_find_first_master(struct dsa_switch_tree *dst) +{ + struct device_node *ethernet; + struct net_device *master; + struct dsa_port *cpu_dp; + + cpu_dp = dsa_tree_find_first_cpu(dst); + ethernet = of_parse_phandle(cpu_dp->dn, "ethernet", 0); + master = of_find_net_device_by_node(ethernet); + of_node_put(ethernet); + + return master; +} + /* Assign the default CPU port (the first one in the tree) to all ports of the * fabric which don't already have one as part of their own switch. */ diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index c48c5c8ba790..d252a04ed725 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -200,6 +200,9 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev, return NULL; } +/* netlink.c */ +extern struct rtnl_link_ops dsa_link_ops __read_mostly; + /* port.c */ void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp, const struct dsa_device_ops *tag_ops); @@ -292,6 +295,8 @@ void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr); int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid, bool broadcast); void dsa_port_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid, bool broadcast); void dsa_port_set_host_flood(struct dsa_port *dp, bool uc, bool mc); +int dsa_port_change_master(struct dsa_port *dp, struct net_device *master, + struct netlink_ext_ack *extack); /* slave.c */ extern const struct dsa_device_ops notag_netdev_ops; @@ -305,8 +310,12 @@ int dsa_slave_suspend(struct net_device *slave_dev); int dsa_slave_resume(struct net_device *slave_dev); int dsa_slave_register_notifier(void); void dsa_slave_unregister_notifier(void); +void dsa_slave_sync_ha(struct net_device *dev); +void dsa_slave_unsync_ha(struct net_device *dev); void dsa_slave_setup_tagger(struct net_device *slave); int dsa_slave_change_mtu(struct net_device *dev, int new_mtu); +int dsa_slave_change_master(struct net_device *dev, struct net_device *master, + struct netlink_ext_ack *extack); int dsa_slave_manage_vlan_filtering(struct net_device *dev, bool vlan_filtering); @@ -542,6 +551,7 @@ void dsa_lag_map(struct dsa_switch_tree *dst, struct dsa_lag *lag); void dsa_lag_unmap(struct dsa_switch_tree *dst, struct dsa_lag *lag); struct dsa_lag *dsa_tree_lag_find(struct dsa_switch_tree *dst, const struct net_device *lag_dev); +struct net_device *dsa_tree_find_first_master(struct dsa_switch_tree *dst); int dsa_tree_notify(struct dsa_switch_tree *dst, unsigned long e, void *v); int dsa_broadcast(unsigned long e, void *v); int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst, diff --git a/net/dsa/netlink.c b/net/dsa/netlink.c new file mode 100644 index 000000000000..0f43bbb94769 --- /dev/null +++ b/net/dsa/netlink.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2022 NXP + */ +#include +#include + +#include "dsa_priv.h" + +static const struct nla_policy dsa_policy[IFLA_DSA_MAX + 1] = { + [IFLA_DSA_MASTER] = { .type = NLA_U32 }, +}; + +static int dsa_changelink(struct net_device *dev, struct nlattr *tb[], + struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + int err; + + if (!data) + return 0; + + if (data[IFLA_DSA_MASTER]) { + u32 ifindex = nla_get_u32(data[IFLA_DSA_MASTER]); + struct net_device *master; + + master = __dev_get_by_index(dev_net(dev), ifindex); + if (!master) + return -EINVAL; + + err = dsa_slave_change_master(dev, master, extack); + if (err) + return err; + } + + return 0; +} + +static size_t dsa_get_size(const struct net_device *dev) +{ + return nla_total_size(sizeof(u32)) + /* IFLA_DSA_MASTER */ + 0; +} + +static int dsa_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct net_device *master = dsa_slave_to_master(dev); + + if (nla_put_u32(skb, IFLA_DSA_MASTER, master->ifindex)) + return -EMSGSIZE; + + return 0; +} + +struct rtnl_link_ops dsa_link_ops __read_mostly = { + .kind = "dsa", + .priv_size = sizeof(struct dsa_port), + .maxtype = IFLA_DSA_MAX, + .policy = dsa_policy, + .changelink = dsa_changelink, + .get_size = dsa_get_size, + .fill_info = dsa_fill_info, +}; diff --git a/net/dsa/port.c b/net/dsa/port.c index 4183e60db4f9..e557b42de9f2 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -7,6 +7,7 @@ */ #include +#include #include #include #include @@ -1374,6 +1375,135 @@ int dsa_port_mrp_del_ring_role(const struct dsa_port *dp, return ds->ops->port_mrp_del_ring_role(ds, dp->index, mrp); } +static int dsa_port_assign_master(struct dsa_port *dp, + struct net_device *master, + struct netlink_ext_ack *extack, + bool fail_on_err) +{ + struct dsa_switch *ds = dp->ds; + int port = dp->index, err; + + err = ds->ops->port_change_master(ds, port, master, extack); + if (err && !fail_on_err) + dev_err(ds->dev, "port %d failed to assign master %s: %pe\n", + port, master->name, ERR_PTR(err)); + + if (err && fail_on_err) + return err; + + dp->cpu_dp = master->dsa_ptr; + + return 0; +} + +/* Change the dp->cpu_dp affinity for a user port. Note that both cross-chip + * notifiers and drivers have implicit assumptions about user-to-CPU-port + * mappings, so we unfortunately cannot delay the deletion of the objects + * (switchdev, standalone addresses, standalone VLANs) on the old CPU port + * until the new CPU port has been set up. So we need to completely tear down + * the old CPU port before changing it, and restore it on errors during the + * bringup of the new one. + */ +int dsa_port_change_master(struct dsa_port *dp, struct net_device *master, + struct netlink_ext_ack *extack) +{ + struct net_device *bridge_dev = dsa_port_bridge_dev_get(dp); + struct net_device *old_master = dsa_port_to_master(dp); + struct net_device *dev = dp->slave; + struct dsa_switch *ds = dp->ds; + bool vlan_filtering; + int err, tmp; + + /* Bridges may hold host FDB, MDB and VLAN objects. These need to be + * migrated, so dynamically unoffload and later reoffload the bridge + * port. + */ + if (bridge_dev) { + dsa_port_pre_bridge_leave(dp, bridge_dev); + dsa_port_bridge_leave(dp, bridge_dev); + } + + /* The port might still be VLAN filtering even if it's no longer + * under a bridge, either due to ds->vlan_filtering_is_global or + * ds->needs_standalone_vlan_filtering. In turn this means VLANs + * on the CPU port. + */ + vlan_filtering = dsa_port_is_vlan_filtering(dp); + if (vlan_filtering) { + err = dsa_slave_manage_vlan_filtering(dev, false); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to remove standalone VLANs"); + goto rewind_old_bridge; + } + } + + /* Standalone addresses, and addresses of upper interfaces like + * VLAN, LAG, HSR need to be migrated. + */ + dsa_slave_unsync_ha(dev); + + err = dsa_port_assign_master(dp, master, extack, true); + if (err) + goto rewind_old_addrs; + + dsa_slave_sync_ha(dev); + + if (vlan_filtering) { + err = dsa_slave_manage_vlan_filtering(dev, true); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to restore standalone VLANs"); + goto rewind_new_addrs; + } + } + + if (bridge_dev) { + err = dsa_port_bridge_join(dp, bridge_dev, extack); + if (err && err == -EOPNOTSUPP) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to reoffload bridge"); + goto rewind_new_vlan; + } + } + + return 0; + +rewind_new_vlan: + if (vlan_filtering) + dsa_slave_manage_vlan_filtering(dev, false); + +rewind_new_addrs: + dsa_slave_unsync_ha(dev); + + dsa_port_assign_master(dp, old_master, NULL, false); + +/* Restore the objects on the old CPU port */ +rewind_old_addrs: + dsa_slave_sync_ha(dev); + + if (vlan_filtering) { + tmp = dsa_slave_manage_vlan_filtering(dev, true); + if (tmp) { + dev_err(ds->dev, + "port %d failed to restore standalone VLANs: %pe\n", + dp->index, ERR_PTR(tmp)); + } + } + +rewind_old_bridge: + if (bridge_dev) { + tmp = dsa_port_bridge_join(dp, bridge_dev, extack); + if (tmp) { + dev_err(ds->dev, + "port %d failed to rejoin bridge %s: %pe\n", + dp->index, bridge_dev->name, ERR_PTR(tmp)); + } + } + + return err; +} + void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp, const struct dsa_device_ops *tag_ops) { diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 55094b94a5ae..00df6cf07866 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -164,6 +164,48 @@ static int dsa_slave_unsync_mc(struct net_device *dev, return dsa_slave_schedule_standalone_work(dev, DSA_MC_DEL, addr, 0); } +void dsa_slave_sync_ha(struct net_device *dev) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + struct netdev_hw_addr *ha; + + netif_addr_lock_bh(dev); + + netdev_for_each_synced_mc_addr(ha, dev) + dsa_slave_sync_mc(dev, ha->addr); + + netdev_for_each_synced_uc_addr(ha, dev) + dsa_slave_sync_uc(dev, ha->addr); + + netif_addr_unlock_bh(dev); + + if (dsa_switch_supports_uc_filtering(ds) || + dsa_switch_supports_mc_filtering(ds)) + dsa_flush_workqueue(); +} + +void dsa_slave_unsync_ha(struct net_device *dev) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + struct netdev_hw_addr *ha; + + netif_addr_lock_bh(dev); + + netdev_for_each_synced_uc_addr(ha, dev) + dsa_slave_unsync_uc(dev, ha->addr); + + netdev_for_each_synced_mc_addr(ha, dev) + dsa_slave_unsync_mc(dev, ha->addr); + + netif_addr_unlock_bh(dev); + + if (dsa_switch_supports_uc_filtering(ds) || + dsa_switch_supports_mc_filtering(ds)) + dsa_flush_workqueue(); +} + /* slave mii_bus handling ***************************************************/ static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg) { @@ -2346,6 +2388,7 @@ int dsa_slave_create(struct dsa_port *port) if (slave_dev == NULL) return -ENOMEM; + slave_dev->rtnl_link_ops = &dsa_link_ops; slave_dev->ethtool_ops = &dsa_slave_ethtool_ops; #if IS_ENABLED(CONFIG_DCB) slave_dev->dcbnl_ops = &dsa_slave_dcbnl_ops; @@ -2462,6 +2505,83 @@ void dsa_slave_destroy(struct net_device *slave_dev) free_netdev(slave_dev); } +int dsa_slave_change_master(struct net_device *dev, struct net_device *master, + struct netlink_ext_ack *extack) +{ + struct net_device *old_master = dsa_slave_to_master(dev); + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + struct net_device *upper; + struct list_head *iter; + int err; + + if (master == old_master) + return 0; + + if (!ds->ops->port_change_master) { + NL_SET_ERR_MSG_MOD(extack, + "Driver does not support changing DSA master"); + return -EOPNOTSUPP; + } + + if (!netdev_uses_dsa(master)) { + NL_SET_ERR_MSG_MOD(extack, + "Interface not eligible as DSA master"); + return -EOPNOTSUPP; + } + + netdev_for_each_upper_dev_rcu(master, upper, iter) { + if (dsa_slave_dev_check(upper)) + continue; + if (netif_is_bridge_master(upper)) + continue; + NL_SET_ERR_MSG_MOD(extack, "Cannot join master with unknown uppers"); + return -EOPNOTSUPP; + } + + /* Since we allow live-changing the DSA master, plus we auto-open the + * DSA master when the user port opens => we need to ensure that the + * new DSA master is open too. + */ + if (dev->flags & IFF_UP) { + err = dev_open(master, extack); + if (err) + return err; + } + + netdev_upper_dev_unlink(old_master, dev); + + err = netdev_upper_dev_link(master, dev, extack); + if (err) + goto out_revert_old_master_unlink; + + err = dsa_port_change_master(dp, master, extack); + if (err) + goto out_revert_master_link; + + /* Update the MTU of the new CPU port through cross-chip notifiers */ + err = dsa_slave_change_mtu(dev, dev->mtu); + if (err && err != -EOPNOTSUPP) { + netdev_warn(dev, + "nonfatal error updating MTU with new master: %pe\n", + ERR_PTR(err)); + } + + /* If the port doesn't have its own MAC address and relies on the DSA + * master's one, inherit it again from the new DSA master. + */ + if (is_zero_ether_addr(dp->mac)) + eth_hw_addr_inherit(dev, master); + + return 0; + +out_revert_master_link: + netdev_upper_dev_unlink(master, dev); +out_revert_old_master_unlink: + netdev_upper_dev_link(old_master, dev, NULL); + return err; +} + bool dsa_slave_dev_check(const struct net_device *dev) { return dev->netdev_ops == &dsa_slave_netdev_ops; -- cgit v1.2.3-70-g09d2 From 2e359b00a11715c7a89d7448e6e4cb4d84543520 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sun, 11 Sep 2022 04:07:03 +0300 Subject: net: dsa: propagate extack to port_lag_join Drivers could refuse to offload a LAG configuration for a variety of reasons, mainly having to do with its TX type. Additionally, since DSA masters may now also be LAG interfaces, and this will translate into a call to port_lag_join on the CPU ports, there may be extra restrictions there. Propagate the netlink extack to this DSA method in order for drivers to give a meaningful error message back to the user. Signed-off-by: Vladimir Oltean Signed-off-by: Paolo Abeni --- drivers/net/dsa/mv88e6xxx/chip.c | 27 +++++++++++++++++++-------- drivers/net/dsa/ocelot/felix.c | 5 +++-- drivers/net/dsa/qca/qca8k-common.c | 23 +++++++++++++++++------ drivers/net/dsa/qca/qca8k.h | 3 ++- drivers/net/ethernet/mscc/ocelot.c | 8 ++++++-- drivers/net/ethernet/mscc/ocelot_net.c | 7 +++---- include/net/dsa.h | 6 ++++-- include/soc/mscc/ocelot.h | 3 ++- net/dsa/dsa_priv.h | 1 + net/dsa/port.c | 1 + net/dsa/switch.c | 4 ++-- 11 files changed, 60 insertions(+), 28 deletions(-) (limited to 'include/net') diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 6f4ea39ab466..5f178faa110f 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -6593,14 +6593,17 @@ out: static bool mv88e6xxx_lag_can_offload(struct dsa_switch *ds, struct dsa_lag lag, - struct netdev_lag_upper_info *info) + struct netdev_lag_upper_info *info, + struct netlink_ext_ack *extack) { struct mv88e6xxx_chip *chip = ds->priv; struct dsa_port *dp; int members = 0; - if (!mv88e6xxx_has_lag(chip)) + if (!mv88e6xxx_has_lag(chip)) { + NL_SET_ERR_MSG_MOD(extack, "Chip does not support LAG offload"); return false; + } if (!lag.id) return false; @@ -6609,14 +6612,20 @@ static bool mv88e6xxx_lag_can_offload(struct dsa_switch *ds, /* Includes the port joining the LAG */ members++; - if (members > 8) + if (members > 8) { + NL_SET_ERR_MSG_MOD(extack, + "Cannot offload more than 8 LAG ports"); return false; + } /* We could potentially relax this to include active * backup in the future. */ - if (info->tx_type != NETDEV_LAG_TX_TYPE_HASH) + if (info->tx_type != NETDEV_LAG_TX_TYPE_HASH) { + NL_SET_ERR_MSG_MOD(extack, + "Can only offload LAG using hash TX type"); return false; + } /* Ideally we would also validate that the hash type matches * the hardware. Alas, this is always set to unknown on team @@ -6769,12 +6778,13 @@ static int mv88e6xxx_port_lag_change(struct dsa_switch *ds, int port) static int mv88e6xxx_port_lag_join(struct dsa_switch *ds, int port, struct dsa_lag lag, - struct netdev_lag_upper_info *info) + struct netdev_lag_upper_info *info, + struct netlink_ext_ack *extack) { struct mv88e6xxx_chip *chip = ds->priv; int err, id; - if (!mv88e6xxx_lag_can_offload(ds, lag, info)) + if (!mv88e6xxx_lag_can_offload(ds, lag, info, extack)) return -EOPNOTSUPP; /* DSA LAG IDs are one-based */ @@ -6827,12 +6837,13 @@ static int mv88e6xxx_crosschip_lag_change(struct dsa_switch *ds, int sw_index, static int mv88e6xxx_crosschip_lag_join(struct dsa_switch *ds, int sw_index, int port, struct dsa_lag lag, - struct netdev_lag_upper_info *info) + struct netdev_lag_upper_info *info, + struct netlink_ext_ack *extack) { struct mv88e6xxx_chip *chip = ds->priv; int err; - if (!mv88e6xxx_lag_can_offload(ds, lag, info)) + if (!mv88e6xxx_lag_can_offload(ds, lag, info, extack)) return -EOPNOTSUPP; mv88e6xxx_reg_lock(chip); diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index c73ef5f7aa64..82dcc21a7172 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -861,11 +861,12 @@ static void felix_bridge_leave(struct dsa_switch *ds, int port, static int felix_lag_join(struct dsa_switch *ds, int port, struct dsa_lag lag, - struct netdev_lag_upper_info *info) + struct netdev_lag_upper_info *info, + struct netlink_ext_ack *extack) { struct ocelot *ocelot = ds->priv; - return ocelot_port_lag_join(ocelot, port, lag.dev, info); + return ocelot_port_lag_join(ocelot, port, lag.dev, info, extack); } static int felix_lag_leave(struct dsa_switch *ds, int port, diff --git a/drivers/net/dsa/qca/qca8k-common.c b/drivers/net/dsa/qca/qca8k-common.c index bba95613e218..fb45b598847b 100644 --- a/drivers/net/dsa/qca/qca8k-common.c +++ b/drivers/net/dsa/qca/qca8k-common.c @@ -1017,7 +1017,8 @@ int qca8k_port_vlan_del(struct dsa_switch *ds, int port, static bool qca8k_lag_can_offload(struct dsa_switch *ds, struct dsa_lag lag, - struct netdev_lag_upper_info *info) + struct netdev_lag_upper_info *info, + struct netlink_ext_ack *extack) { struct dsa_port *dp; int members = 0; @@ -1029,15 +1030,24 @@ static bool qca8k_lag_can_offload(struct dsa_switch *ds, /* Includes the port joining the LAG */ members++; - if (members > QCA8K_NUM_PORTS_FOR_LAG) + if (members > QCA8K_NUM_PORTS_FOR_LAG) { + NL_SET_ERR_MSG_MOD(extack, + "Cannot offload more than 4 LAG ports"); return false; + } - if (info->tx_type != NETDEV_LAG_TX_TYPE_HASH) + if (info->tx_type != NETDEV_LAG_TX_TYPE_HASH) { + NL_SET_ERR_MSG_MOD(extack, + "Can only offload LAG using hash TX type"); return false; + } if (info->hash_type != NETDEV_LAG_HASH_L2 && - info->hash_type != NETDEV_LAG_HASH_L23) + info->hash_type != NETDEV_LAG_HASH_L23) { + NL_SET_ERR_MSG_MOD(extack, + "Can only offload L2 or L2+L3 TX hash"); return false; + } return true; } @@ -1160,11 +1170,12 @@ static int qca8k_lag_refresh_portmap(struct dsa_switch *ds, int port, } int qca8k_port_lag_join(struct dsa_switch *ds, int port, struct dsa_lag lag, - struct netdev_lag_upper_info *info) + struct netdev_lag_upper_info *info, + struct netlink_ext_ack *extack) { int ret; - if (!qca8k_lag_can_offload(ds, lag, info)) + if (!qca8k_lag_can_offload(ds, lag, info, extack)) return -EOPNOTSUPP; ret = qca8k_lag_setup_hash(ds, lag, info); diff --git a/drivers/net/dsa/qca/qca8k.h b/drivers/net/dsa/qca/qca8k.h index e36ecc9777f4..0b7a5cb12321 100644 --- a/drivers/net/dsa/qca/qca8k.h +++ b/drivers/net/dsa/qca/qca8k.h @@ -512,7 +512,8 @@ int qca8k_port_vlan_del(struct dsa_switch *ds, int port, /* Common port LAG function */ int qca8k_port_lag_join(struct dsa_switch *ds, int port, struct dsa_lag lag, - struct netdev_lag_upper_info *info); + struct netdev_lag_upper_info *info, + struct netlink_ext_ack *extack); int qca8k_port_lag_leave(struct dsa_switch *ds, int port, struct dsa_lag lag); diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 874fb2a5874e..5c18f8986975 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -2132,10 +2132,14 @@ static void ocelot_migrate_lag_fdbs(struct ocelot *ocelot, int ocelot_port_lag_join(struct ocelot *ocelot, int port, struct net_device *bond, - struct netdev_lag_upper_info *info) + struct netdev_lag_upper_info *info, + struct netlink_ext_ack *extack) { - if (info->tx_type != NETDEV_LAG_TX_TYPE_HASH) + if (info->tx_type != NETDEV_LAG_TX_TYPE_HASH) { + NL_SET_ERR_MSG_MOD(extack, + "Can only offload LAG using hash TX type"); return -EOPNOTSUPP; + } mutex_lock(&ocelot->fwd_domain_lock); diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index 2979fb1ba0f7..50858cc10fef 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -1412,11 +1412,10 @@ static int ocelot_netdevice_lag_join(struct net_device *dev, int port = priv->port.index; int err; - err = ocelot_port_lag_join(ocelot, port, bond, info); - if (err == -EOPNOTSUPP) { - NL_SET_ERR_MSG_MOD(extack, "Offloading not supported"); + err = ocelot_port_lag_join(ocelot, port, bond, info, extack); + if (err == -EOPNOTSUPP) + /* Offloading not supported, fall back to software LAG */ return 0; - } bridge_dev = netdev_master_upper_dev_get(bond); if (!bridge_dev || !netif_is_bridge_master(bridge_dev)) diff --git a/include/net/dsa.h b/include/net/dsa.h index 3f717c3fcba0..1c80e75b3cad 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -1094,7 +1094,8 @@ struct dsa_switch_ops { int port); int (*crosschip_lag_join)(struct dsa_switch *ds, int sw_index, int port, struct dsa_lag lag, - struct netdev_lag_upper_info *info); + struct netdev_lag_upper_info *info, + struct netlink_ext_ack *extack); int (*crosschip_lag_leave)(struct dsa_switch *ds, int sw_index, int port, struct dsa_lag lag); @@ -1169,7 +1170,8 @@ struct dsa_switch_ops { int (*port_lag_change)(struct dsa_switch *ds, int port); int (*port_lag_join)(struct dsa_switch *ds, int port, struct dsa_lag lag, - struct netdev_lag_upper_info *info); + struct netdev_lag_upper_info *info, + struct netlink_ext_ack *extack); int (*port_lag_leave)(struct dsa_switch *ds, int port, struct dsa_lag lag); diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 355cfdedc43b..ea19e8ef1f61 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -1229,7 +1229,8 @@ int ocelot_port_mdb_del(struct ocelot *ocelot, int port, const struct net_device *bridge); int ocelot_port_lag_join(struct ocelot *ocelot, int port, struct net_device *bond, - struct netdev_lag_upper_info *info); + struct netdev_lag_upper_info *info, + struct netlink_ext_ack *extack); void ocelot_port_lag_leave(struct ocelot *ocelot, int port, struct net_device *bond); void ocelot_port_lag_change(struct ocelot *ocelot, int port, bool lag_tx_active); diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index d252a04ed725..f0ae54d0435e 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -88,6 +88,7 @@ struct dsa_notifier_lag_info { const struct dsa_port *dp; struct dsa_lag lag; struct netdev_lag_upper_info *info; + struct netlink_ext_ack *extack; }; /* DSA_NOTIFIER_VLAN_* */ diff --git a/net/dsa/port.c b/net/dsa/port.c index e557b42de9f2..98f7fa0cdd5c 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -635,6 +635,7 @@ int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag_dev, struct dsa_notifier_lag_info info = { .dp = dp, .info = uinfo, + .extack = extack, }; struct net_device *bridge_dev; int err; diff --git a/net/dsa/switch.c b/net/dsa/switch.c index 4dfd68cf61c5..c2cb15e21324 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -507,12 +507,12 @@ static int dsa_switch_lag_join(struct dsa_switch *ds, { if (info->dp->ds == ds && ds->ops->port_lag_join) return ds->ops->port_lag_join(ds, info->dp->index, info->lag, - info->info); + info->info, info->extack); if (info->dp->ds != ds && ds->ops->crosschip_lag_join) return ds->ops->crosschip_lag_join(ds, info->dp->ds->index, info->dp->index, info->lag, - info->info); + info->info, info->extack); return -EOPNOTSUPP; } -- cgit v1.2.3-70-g09d2 From acc43b7bf52a015221d989164c2600c1e1f28790 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sun, 11 Sep 2022 04:07:04 +0300 Subject: net: dsa: allow masters to join a LAG There are 2 ways in which a DSA user port may become handled by 2 CPU ports in a LAG: (1) its current DSA master joins a LAG ip link del bond0 && ip link add bond0 type bond mode 802.3ad ip link set eno2 master bond0 When this happens, all user ports with "eno2" as DSA master get automatically migrated to "bond0" as DSA master. (2) it is explicitly configured as such by the user # Before, the DSA master was eno3 ip link set swp0 type dsa master bond0 The design of this configuration is that the LAG device dynamically becomes a DSA master through dsa_master_setup() when the first physical DSA master becomes a LAG slave, and stops being so through dsa_master_teardown() when the last physical DSA master leaves. A LAG interface is considered as a valid DSA master only if it contains existing DSA masters, and no other lower interfaces. Therefore, we mainly rely on method (1) to enter this configuration. Each physical DSA master (LAG slave) retains its dev->dsa_ptr for when it becomes a standalone DSA master again. But the LAG master also has a dev->dsa_ptr, and this is actually duplicated from one of the physical LAG slaves, and therefore needs to be balanced when LAG slaves come and go. To the switch driver, putting DSA masters in a LAG is seen as putting their associated CPU ports in a LAG. We need to prepare cross-chip host FDB notifiers for CPU ports in a LAG, by calling the driver's ->lag_fdb_add method rather than ->port_fdb_add. Signed-off-by: Vladimir Oltean Signed-off-by: Paolo Abeni --- include/net/dsa.h | 12 +++ net/dsa/dsa_priv.h | 5 ++ net/dsa/master.c | 49 ++++++++++++ net/dsa/port.c | 1 + net/dsa/slave.c | 231 +++++++++++++++++++++++++++++++++++++++++++++++++++-- net/dsa/switch.c | 22 ++++- 6 files changed, 310 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 1c80e75b3cad..d777eac5694f 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -300,6 +300,9 @@ struct dsa_port { u8 master_admin_up:1; u8 master_oper_up:1; + /* Valid only on user ports */ + u8 cpu_port_in_lag:1; + u8 setup:1; struct device_node *dn; @@ -724,6 +727,9 @@ static inline bool dsa_port_offloads_lag(struct dsa_port *dp, static inline struct net_device *dsa_port_to_master(const struct dsa_port *dp) { + if (dp->cpu_port_in_lag) + return dsa_port_lag_dev_get(dp->cpu_dp); + return dp->cpu_dp->master; } @@ -811,6 +817,12 @@ dsa_tree_offloads_bridge_dev(struct dsa_switch_tree *dst, return false; } +static inline bool dsa_port_tree_same(const struct dsa_port *a, + const struct dsa_port *b) +{ + return a->ds->dst == b->ds->dst; +} + typedef int dsa_fdb_dump_cb_t(const unsigned char *addr, u16 vid, bool is_static, void *data); struct dsa_switch_ops { diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index f0ae54d0435e..129e4a649c7e 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -185,6 +185,11 @@ static inline int dsa_tag_protocol_overhead(const struct dsa_device_ops *ops) /* master.c */ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp); void dsa_master_teardown(struct net_device *dev); +int dsa_master_lag_setup(struct net_device *lag_dev, struct dsa_port *cpu_dp, + struct netdev_lag_upper_info *uinfo, + struct netlink_ext_ack *extack); +void dsa_master_lag_teardown(struct net_device *lag_dev, + struct dsa_port *cpu_dp); static inline struct net_device *dsa_master_find_slave(struct net_device *dev, int device, int port) diff --git a/net/dsa/master.c b/net/dsa/master.c index 2176c14b97a8..40367ab41cf8 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -428,3 +428,52 @@ void dsa_master_teardown(struct net_device *dev) */ wmb(); } + +int dsa_master_lag_setup(struct net_device *lag_dev, struct dsa_port *cpu_dp, + struct netdev_lag_upper_info *uinfo, + struct netlink_ext_ack *extack) +{ + bool master_setup = false; + int err; + + if (!netdev_uses_dsa(lag_dev)) { + err = dsa_master_setup(lag_dev, cpu_dp); + if (err) + return err; + + master_setup = true; + } + + err = dsa_port_lag_join(cpu_dp, lag_dev, uinfo, extack); + if (err) { + if (extack && !extack->_msg) + NL_SET_ERR_MSG_MOD(extack, + "CPU port failed to join LAG"); + goto out_master_teardown; + } + + return 0; + +out_master_teardown: + if (master_setup) + dsa_master_teardown(lag_dev); + return err; +} + +/* Tear down a master if there isn't any other user port on it, + * optionally also destroying LAG information. + */ +void dsa_master_lag_teardown(struct net_device *lag_dev, + struct dsa_port *cpu_dp) +{ + struct net_device *upper; + struct list_head *iter; + + dsa_port_lag_leave(cpu_dp, lag_dev); + + netdev_for_each_upper_dev_rcu(lag_dev, upper, iter) + if (dsa_slave_dev_check(upper)) + return; + + dsa_master_teardown(lag_dev); +} diff --git a/net/dsa/port.c b/net/dsa/port.c index 98f7fa0cdd5c..e6289a1db0a0 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -1393,6 +1393,7 @@ static int dsa_port_assign_master(struct dsa_port *dp, return err; dp->cpu_dp = master->dsa_ptr; + dp->cpu_port_in_lag = netif_is_lag_master(master); return 0; } diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 00df6cf07866..aa47ddc19fdf 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -2818,11 +2818,45 @@ dsa_slave_prechangeupper_sanity_check(struct net_device *dev, return NOTIFY_DONE; } +/* To be eligible as a DSA master, a LAG must have all lower interfaces be + * eligible DSA masters. Additionally, all LAG slaves must be DSA masters of + * switches in the same switch tree. + */ +static int dsa_lag_master_validate(struct net_device *lag_dev, + struct netlink_ext_ack *extack) +{ + struct net_device *lower1, *lower2; + struct list_head *iter1, *iter2; + + netdev_for_each_lower_dev(lag_dev, lower1, iter1) { + netdev_for_each_lower_dev(lag_dev, lower2, iter2) { + if (!netdev_uses_dsa(lower1) || + !netdev_uses_dsa(lower2)) { + NL_SET_ERR_MSG_MOD(extack, + "All LAG ports must be eligible as DSA masters"); + return notifier_from_errno(-EINVAL); + } + + if (lower1 == lower2) + continue; + + if (!dsa_port_tree_same(lower1->dsa_ptr, + lower2->dsa_ptr)) { + NL_SET_ERR_MSG_MOD(extack, + "LAG contains DSA masters of disjoint switch trees"); + return notifier_from_errno(-EINVAL); + } + } + } + + return NOTIFY_DONE; +} + static int dsa_master_prechangeupper_sanity_check(struct net_device *master, struct netdev_notifier_changeupper_info *info) { - struct netlink_ext_ack *extack; + struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(&info->info); if (!netdev_uses_dsa(master)) return NOTIFY_DONE; @@ -2840,13 +2874,51 @@ dsa_master_prechangeupper_sanity_check(struct net_device *master, if (netif_is_bridge_master(info->upper_dev)) return NOTIFY_DONE; - extack = netdev_notifier_info_to_extack(&info->info); + /* Allow LAG uppers, subject to further restrictions in + * dsa_lag_master_prechangelower_sanity_check() + */ + if (netif_is_lag_master(info->upper_dev)) + return dsa_lag_master_validate(info->upper_dev, extack); NL_SET_ERR_MSG_MOD(extack, "DSA master cannot join unknown upper interfaces"); return notifier_from_errno(-EBUSY); } +static int +dsa_lag_master_prechangelower_sanity_check(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) +{ + struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(&info->info); + struct net_device *lag_dev = info->upper_dev; + struct net_device *lower; + struct list_head *iter; + + if (!netdev_uses_dsa(lag_dev) || !netif_is_lag_master(lag_dev)) + return NOTIFY_DONE; + + if (!info->linking) + return NOTIFY_DONE; + + if (!netdev_uses_dsa(dev)) { + NL_SET_ERR_MSG(extack, + "Only DSA masters can join a LAG DSA master"); + return notifier_from_errno(-EINVAL); + } + + netdev_for_each_lower_dev(lag_dev, lower, iter) { + if (!dsa_port_tree_same(dev->dsa_ptr, lower->dsa_ptr)) { + NL_SET_ERR_MSG(extack, + "Interface is DSA master for a different switch tree than this LAG"); + return notifier_from_errno(-EINVAL); + } + + break; + } + + return NOTIFY_DONE; +} + /* Don't allow bridging of DSA masters, since the bridge layer rx_handler * prevents the DSA fake ethertype handler to be invoked, so we don't get the * chance to strip off and parse the DSA switch tag protocol header (the bridge @@ -2887,6 +2959,136 @@ dsa_bridge_prechangelower_sanity_check(struct net_device *new_lower, return NOTIFY_DONE; } +static void dsa_tree_migrate_ports_from_lag_master(struct dsa_switch_tree *dst, + struct net_device *lag_dev) +{ + struct net_device *new_master = dsa_tree_find_first_master(dst); + struct dsa_port *dp; + int err; + + dsa_tree_for_each_user_port(dp, dst) { + if (dsa_port_to_master(dp) != lag_dev) + continue; + + err = dsa_slave_change_master(dp->slave, new_master, NULL); + if (err) { + netdev_err(dp->slave, + "failed to restore master to %s: %pe\n", + new_master->name, ERR_PTR(err)); + } + } +} + +static int dsa_master_lag_join(struct net_device *master, + struct net_device *lag_dev, + struct netdev_lag_upper_info *uinfo, + struct netlink_ext_ack *extack) +{ + struct dsa_port *cpu_dp = master->dsa_ptr; + struct dsa_switch_tree *dst = cpu_dp->dst; + struct dsa_port *dp; + int err; + + err = dsa_master_lag_setup(lag_dev, cpu_dp, uinfo, extack); + if (err) + return err; + + dsa_tree_for_each_user_port(dp, dst) { + if (dsa_port_to_master(dp) != master) + continue; + + err = dsa_slave_change_master(dp->slave, lag_dev, extack); + if (err) + goto restore; + } + + return 0; + +restore: + dsa_tree_for_each_user_port_continue_reverse(dp, dst) { + if (dsa_port_to_master(dp) != lag_dev) + continue; + + err = dsa_slave_change_master(dp->slave, master, NULL); + if (err) { + netdev_err(dp->slave, + "failed to restore master to %s: %pe\n", + master->name, ERR_PTR(err)); + } + } + + dsa_master_lag_teardown(lag_dev, master->dsa_ptr); + + return err; +} + +static void dsa_master_lag_leave(struct net_device *master, + struct net_device *lag_dev) +{ + struct dsa_port *dp, *cpu_dp = lag_dev->dsa_ptr; + struct dsa_switch_tree *dst = cpu_dp->dst; + struct dsa_port *new_cpu_dp = NULL; + struct net_device *lower; + struct list_head *iter; + + netdev_for_each_lower_dev(lag_dev, lower, iter) { + if (netdev_uses_dsa(lower)) { + new_cpu_dp = lower->dsa_ptr; + break; + } + } + + if (new_cpu_dp) { + /* Update the CPU port of the user ports still under the LAG + * so that dsa_port_to_master() continues to work properly + */ + dsa_tree_for_each_user_port(dp, dst) + if (dsa_port_to_master(dp) == lag_dev) + dp->cpu_dp = new_cpu_dp; + + /* Update the index of the virtual CPU port to match the lowest + * physical CPU port + */ + lag_dev->dsa_ptr = new_cpu_dp; + wmb(); + } else { + /* If the LAG DSA master has no ports left, migrate back all + * user ports to the first physical CPU port + */ + dsa_tree_migrate_ports_from_lag_master(dst, lag_dev); + } + + /* This DSA master has left its LAG in any case, so let + * the CPU port leave the hardware LAG as well + */ + dsa_master_lag_teardown(lag_dev, master->dsa_ptr); +} + +static int dsa_master_changeupper(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) +{ + struct netlink_ext_ack *extack; + int err = NOTIFY_DONE; + + if (!netdev_uses_dsa(dev)) + return err; + + extack = netdev_notifier_info_to_extack(&info->info); + + if (netif_is_lag_master(info->upper_dev)) { + if (info->linking) { + err = dsa_master_lag_join(dev, info->upper_dev, + info->upper_info, extack); + err = notifier_from_errno(err); + } else { + dsa_master_lag_leave(dev, info->upper_dev); + err = NOTIFY_OK; + } + } + + return err; +} + static int dsa_slave_netdevice_event(struct notifier_block *nb, unsigned long event, void *ptr) { @@ -2905,6 +3107,10 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb, if (notifier_to_errno(err)) return err; + err = dsa_lag_master_prechangelower_sanity_check(dev, info); + if (notifier_to_errno(err)) + return err; + err = dsa_bridge_prechangelower_sanity_check(dev, info); if (notifier_to_errno(err)) return err; @@ -2930,6 +3136,10 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb, if (notifier_to_errno(err)) return err; + err = dsa_master_changeupper(dev, ptr); + if (notifier_to_errno(err)) + return err; + break; } case NETDEV_CHANGELOWERSTATE: { @@ -2937,12 +3147,21 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb, struct dsa_port *dp; int err; - if (!dsa_slave_dev_check(dev)) - break; + if (dsa_slave_dev_check(dev)) { + dp = dsa_slave_to_port(dev); + + err = dsa_port_lag_change(dp, info->lower_state_info); + } - dp = dsa_slave_to_port(dev); + /* Mirror LAG port events on DSA masters that are in + * a LAG towards their respective switch CPU ports + */ + if (netdev_uses_dsa(dev)) { + dp = dev->dsa_ptr; + + err = dsa_port_lag_change(dp, info->lower_state_info); + } - err = dsa_port_lag_change(dp, info->lower_state_info); return notifier_from_errno(err); } case NETDEV_CHANGE: diff --git a/net/dsa/switch.c b/net/dsa/switch.c index c2cb15e21324..ce56acdba203 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -398,8 +398,15 @@ static int dsa_switch_host_fdb_add(struct dsa_switch *ds, dsa_switch_for_each_port(dp, ds) { if (dsa_port_host_address_match(dp, info->dp)) { - err = dsa_port_do_fdb_add(dp, info->addr, info->vid, - info->db); + if (dsa_port_is_cpu(dp) && info->dp->cpu_port_in_lag) { + err = dsa_switch_do_lag_fdb_add(ds, dp->lag, + info->addr, + info->vid, + info->db); + } else { + err = dsa_port_do_fdb_add(dp, info->addr, + info->vid, info->db); + } if (err) break; } @@ -419,8 +426,15 @@ static int dsa_switch_host_fdb_del(struct dsa_switch *ds, dsa_switch_for_each_port(dp, ds) { if (dsa_port_host_address_match(dp, info->dp)) { - err = dsa_port_do_fdb_del(dp, info->addr, info->vid, - info->db); + if (dsa_port_is_cpu(dp) && info->dp->cpu_port_in_lag) { + err = dsa_switch_do_lag_fdb_del(ds, dp->lag, + info->addr, + info->vid, + info->db); + } else { + err = dsa_port_do_fdb_del(dp, info->addr, + info->vid, info->db); + } if (err) break; } -- cgit v1.2.3-70-g09d2 From e9bd0cca09d13ac2f08d25e195203e42d4ad1ce8 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 7 Sep 2022 18:10:18 -0700 Subject: tcp: Don't allocate tcp_death_row outside of struct netns_ipv4. We will soon introduce an optional per-netns ehash and access hash tables via net->ipv4.tcp_death_row->hashinfo instead of &tcp_hashinfo in most places. It could harm the fast path because dereferences of two fields in net and tcp_death_row might incur two extra cache line misses. To save one dereference, let's place tcp_death_row back in netns_ipv4 and fetch hashinfo via net->ipv4.tcp_death_row"."hashinfo. Note tcp_death_row was initially placed in netns_ipv4, and commit fbb8295248e1 ("tcp: allocate tcp_death_row outside of struct netns_ipv4") changed it to a pointer so that we can fire TIME_WAIT timers after freeing net. However, we don't do so after commit 04c494e68a13 ("Revert "tcp/dccp: get rid of inet_twsk_purge()""), so we need not define tcp_death_row as a pointer. Also, we move refcount_dec_and_test(&tw_refcount) from tcp_sk_exit() to tcp_sk_exit_batch() as a debug check. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/net/netns/ipv4.h | 3 ++- net/ipv4/inet_timewait_sock.c | 4 +--- net/ipv4/proc.c | 2 +- net/ipv4/sysctl_net_ipv4.c | 8 ++------ net/ipv4/tcp_ipv4.c | 19 +++++++------------ net/ipv4/tcp_minisocks.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- 7 files changed, 15 insertions(+), 25 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 6320a76cefdc..2c7df93e3403 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -34,6 +34,7 @@ struct inet_hashinfo; struct inet_timewait_death_row { refcount_t tw_refcount; + /* Padding to avoid false sharing, tw_refcount can be often written */ struct inet_hashinfo *hashinfo ____cacheline_aligned_in_smp; int sysctl_max_tw_buckets; }; @@ -41,7 +42,7 @@ struct inet_timewait_death_row { struct tcp_fastopen_context; struct netns_ipv4 { - struct inet_timewait_death_row *tcp_death_row; + struct inet_timewait_death_row tcp_death_row; #ifdef CONFIG_SYSCTL struct ctl_table_header *forw_hdr; diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 47ccc343c9fb..71d3bb0abf6c 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -59,9 +59,7 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw) inet_twsk_bind_unhash(tw, hashinfo); spin_unlock(&bhead->lock); - if (refcount_dec_and_test(&tw->tw_dr->tw_refcount)) - kfree(tw->tw_dr); - + refcount_dec(&tw->tw_dr->tw_refcount); inet_twsk_put(tw); } diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 0088a4c64d77..5386f460bd20 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -59,7 +59,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) socket_seq_show(seq); seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n", sock_prot_inuse_get(net, &tcp_prot), orphans, - refcount_read(&net->ipv4.tcp_death_row->tw_refcount) - 1, + refcount_read(&net->ipv4.tcp_death_row.tw_refcount) - 1, sockets, proto_memory_allocated(&tcp_prot)); seq_printf(seq, "UDP: inuse %d mem %ld\n", sock_prot_inuse_get(net, &udp_prot), diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 5490c285668b..4d7c110c772f 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -530,10 +530,9 @@ static struct ctl_table ipv4_table[] = { }; static struct ctl_table ipv4_net_table[] = { - /* tcp_max_tw_buckets must be first in this table. */ { .procname = "tcp_max_tw_buckets", -/* .data = &init_net.ipv4.tcp_death_row.sysctl_max_tw_buckets, */ + .data = &init_net.ipv4.tcp_death_row.sysctl_max_tw_buckets, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec @@ -1361,8 +1360,7 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) if (!table) goto err_alloc; - /* skip first entry (sysctl_max_tw_buckets) */ - for (i = 1; i < ARRAY_SIZE(ipv4_net_table) - 1; i++) { + for (i = 0; i < ARRAY_SIZE(ipv4_net_table) - 1; i++) { if (table[i].data) { /* Update the variables to point into * the current struct net @@ -1377,8 +1375,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) } } - table[0].data = &net->ipv4.tcp_death_row->sysctl_max_tw_buckets; - net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table); if (!net->ipv4.ipv4_hdr) goto err_reg; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a07243f66d4c..3930b6a1e0d6 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -292,7 +292,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) * complete initialization after this. */ tcp_set_state(sk, TCP_SYN_SENT); - tcp_death_row = net->ipv4.tcp_death_row; + tcp_death_row = &net->ipv4.tcp_death_row; err = inet_hash_connect(tcp_death_row, sk); if (err) goto failure; @@ -3091,13 +3091,9 @@ EXPORT_SYMBOL(tcp_prot); static void __net_exit tcp_sk_exit(struct net *net) { - struct inet_timewait_death_row *tcp_death_row = net->ipv4.tcp_death_row; - if (net->ipv4.tcp_congestion_control) bpf_module_put(net->ipv4.tcp_congestion_control, net->ipv4.tcp_congestion_control->owner); - if (refcount_dec_and_test(&tcp_death_row->tw_refcount)) - kfree(tcp_death_row); } static int __net_init tcp_sk_init(struct net *net) @@ -3129,13 +3125,10 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_tw_reuse = 2; net->ipv4.sysctl_tcp_no_ssthresh_metrics_save = 1; - net->ipv4.tcp_death_row = kzalloc(sizeof(struct inet_timewait_death_row), GFP_KERNEL); - if (!net->ipv4.tcp_death_row) - return -ENOMEM; - refcount_set(&net->ipv4.tcp_death_row->tw_refcount, 1); + refcount_set(&net->ipv4.tcp_death_row.tw_refcount, 1); cnt = tcp_hashinfo.ehash_mask + 1; - net->ipv4.tcp_death_row->sysctl_max_tw_buckets = cnt / 2; - net->ipv4.tcp_death_row->hashinfo = &tcp_hashinfo; + net->ipv4.tcp_death_row.sysctl_max_tw_buckets = cnt / 2; + net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo; net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 128); net->ipv4.sysctl_tcp_sack = 1; @@ -3201,8 +3194,10 @@ static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) inet_twsk_purge(&tcp_hashinfo, AF_INET); - list_for_each_entry(net, net_exit_list, exit_list) + list_for_each_entry(net, net_exit_list, exit_list) { + WARN_ON_ONCE(!refcount_dec_and_test(&net->ipv4.tcp_death_row.tw_refcount)); tcp_fastopen_ctx_destroy(net); + } } static struct pernet_operations __net_initdata tcp_sk_ops = { diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 80ce27f8f77e..8bddb2a78b21 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -250,7 +250,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) struct net *net = sock_net(sk); struct inet_timewait_sock *tw; - tw = inet_twsk_alloc(sk, net->ipv4.tcp_death_row, state); + tw = inet_twsk_alloc(sk, &net->ipv4.tcp_death_row, state); if (tw) { struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5c562d69fddf..eb1da7a63fbb 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -325,7 +325,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, inet->inet_dport = usin->sin6_port; tcp_set_state(sk, TCP_SYN_SENT); - tcp_death_row = net->ipv4.tcp_death_row; + tcp_death_row = &net->ipv4.tcp_death_row; err = inet6_hash_connect(tcp_death_row, sk); if (err) goto late_failure; -- cgit v1.2.3-70-g09d2 From 429e42c1c54e0d9bfe880195f7d4a8fd5a727194 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 7 Sep 2022 18:10:19 -0700 Subject: tcp: Set NULL to sk->sk_prot->h.hashinfo. We will soon introduce an optional per-netns ehash. This means we cannot use the global sk->sk_prot->h.hashinfo to fetch a TCP hashinfo. Instead, set NULL to sk->sk_prot->h.hashinfo for TCP and get a proper hashinfo from net->ipv4.tcp_death_row.hashinfo. Note that we need not use sk->sk_prot->h.hashinfo if DCCP is disabled. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/net/inet_hashtables.h | 10 ++++++++++ net/ipv4/af_inet.c | 2 +- net/ipv4/inet_connection_sock.c | 9 ++++----- net/ipv4/inet_hashtables.c | 14 +++++++------- net/ipv4/tcp_ipv4.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- 6 files changed, 24 insertions(+), 15 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 44a419b9e3d5..520dd894b73d 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -170,6 +170,16 @@ struct inet_hashinfo { struct inet_listen_hashbucket *lhash2; }; +static inline struct inet_hashinfo *tcp_or_dccp_get_hashinfo(const struct sock *sk) +{ +#if IS_ENABLED(CONFIG_IP_DCCP) + return sk->sk_prot->h.hashinfo ? : + sock_net(sk)->ipv4.tcp_death_row.hashinfo; +#else + return sock_net(sk)->ipv4.tcp_death_row.hashinfo; +#endif +} + static inline struct inet_listen_hashbucket * inet_lhash2_bucket(struct inet_hashinfo *h, u32 hash) { diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index d3ab1ae32ef5..e2c219382345 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1250,7 +1250,7 @@ static int inet_sk_reselect_saddr(struct sock *sk) } prev_addr_hashbucket = - inet_bhashfn_portaddr(sk->sk_prot->h.hashinfo, sk, + inet_bhashfn_portaddr(tcp_or_dccp_get_hashinfo(sk), sk, sock_net(sk), inet->inet_num); inet->inet_saddr = inet->inet_rcv_saddr = new_saddr; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 8e71d65cfad4..ebca860e113f 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -285,7 +285,7 @@ inet_csk_find_open_port(const struct sock *sk, struct inet_bind_bucket **tb_ret, struct inet_bind2_bucket **tb2_ret, struct inet_bind_hashbucket **head2_ret, int *port_ret) { - struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo; + struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk); int i, low, high, attempt_half, port, l3mdev; struct inet_bind_hashbucket *head, *head2; struct net *net = sock_net(sk); @@ -467,8 +467,8 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb, */ int inet_csk_get_port(struct sock *sk, unsigned short snum) { + struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk); bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN; - struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo; bool found_port = false, check_bind_conflict = true; bool bhash_created = false, bhash2_created = false; struct inet_bind_hashbucket *head, *head2; @@ -910,10 +910,9 @@ static bool reqsk_queue_unlink(struct request_sock *req) bool found = false; if (sk_hashed(sk)) { - struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; - spinlock_t *lock; + struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk); + spinlock_t *lock = inet_ehash_lockp(hashinfo, req->rsk_hash); - lock = inet_ehash_lockp(hashinfo, req->rsk_hash); spin_lock(lock); found = __sk_nulls_del_node_init_rcu(sk); spin_unlock(lock); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 29dce78de179..bdb5427a7a3d 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -168,7 +168,7 @@ void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, */ static void __inet_put_port(struct sock *sk) { - struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; + struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk); struct inet_bind_hashbucket *head, *head2; struct net *net = sock_net(sk); struct inet_bind_bucket *tb; @@ -208,7 +208,7 @@ EXPORT_SYMBOL(inet_put_port); int __inet_inherit_port(const struct sock *sk, struct sock *child) { - struct inet_hashinfo *table = sk->sk_prot->h.hashinfo; + struct inet_hashinfo *table = tcp_or_dccp_get_hashinfo(sk); unsigned short port = inet_sk(child)->inet_num; struct inet_bind_hashbucket *head, *head2; bool created_inet_bind_bucket = false; @@ -629,7 +629,7 @@ static bool inet_ehash_lookup_by_sk(struct sock *sk, */ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk) { - struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; + struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk); struct inet_ehash_bucket *head; struct hlist_nulls_head *list; spinlock_t *lock; @@ -701,7 +701,7 @@ static int inet_reuseport_add_sock(struct sock *sk, int __inet_hash(struct sock *sk, struct sock *osk) { - struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; + struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk); struct inet_listen_hashbucket *ilb2; int err = 0; @@ -747,7 +747,7 @@ EXPORT_SYMBOL_GPL(inet_hash); void inet_unhash(struct sock *sk) { - struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; + struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk); if (sk_unhashed(sk)) return; @@ -834,7 +834,7 @@ inet_bind2_bucket_find(const struct inet_bind_hashbucket *head, const struct net struct inet_bind_hashbucket * inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, int port) { - struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo; + struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk); u32 hash; #if IS_ENABLED(CONFIG_IPV6) struct in6_addr addr_any = {}; @@ -850,7 +850,7 @@ inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, in int inet_bhash2_update_saddr(struct inet_bind_hashbucket *prev_saddr, struct sock *sk) { - struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo; + struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk); struct inet_bind2_bucket *tb2, *new_tb2; int l3mdev = inet_sk_bound_l3mdev(sk); struct inet_bind_hashbucket *head2; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3930b6a1e0d6..3bb7da95b757 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -3083,7 +3083,7 @@ struct proto tcp_prot = { .slab_flags = SLAB_TYPESAFE_BY_RCU, .twsk_prot = &tcp_timewait_sock_ops, .rsk_prot = &tcp_request_sock_ops, - .h.hashinfo = &tcp_hashinfo, + .h.hashinfo = NULL, .no_autobind = true, .diag_destroy = tcp_abort, }; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index eb1da7a63fbb..e0b5f5b4d868 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2194,7 +2194,7 @@ struct proto tcpv6_prot = { .slab_flags = SLAB_TYPESAFE_BY_RCU, .twsk_prot = &tcp6_timewait_sock_ops, .rsk_prot = &tcp6_request_sock_ops, - .h.hashinfo = &tcp_hashinfo, + .h.hashinfo = NULL, .no_autobind = true, .diag_destroy = tcp_abort, }; -- cgit v1.2.3-70-g09d2 From edc12f032a5a1633474db566878ce0012e7ca2f5 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 7 Sep 2022 18:10:21 -0700 Subject: tcp: Save unnecessary inet_twsk_purge() calls. While destroying netns, we call inet_twsk_purge() in tcp_sk_exit_batch() and tcpv6_net_exit_batch() for AF_INET and AF_INET6. These commands trigger the kernel to walk through the potentially big ehash twice even though the netns has no TIME_WAIT sockets. # ip netns add test # ip netns del test or # unshare -n /bin/true >/dev/null When tw_refcount is 1, we need not call inet_twsk_purge() at least for the net. We can save such unneeded iterations if all netns in net_exit_list have no TIME_WAIT sockets. This change eliminates the tax by the additional unshare() described in the next patch to guarantee the per-netns ehash size. Tested: # mount -t debugfs none /sys/kernel/debug/ # echo cleanup_net > /sys/kernel/debug/tracing/set_ftrace_filter # echo inet_twsk_purge >> /sys/kernel/debug/tracing/set_ftrace_filter # echo function > /sys/kernel/debug/tracing/current_tracer # cat ./add_del_unshare.sh for i in `seq 1 40` do (for j in `seq 1 100` ; do unshare -n /bin/true >/dev/null ; done) & done wait; # ./add_del_unshare.sh Before the patch: # cat /sys/kernel/debug/tracing/trace_pipe kworker/u128:0-8 [031] ...1. 174.162765: cleanup_net <-process_one_work kworker/u128:0-8 [031] ...1. 174.240796: inet_twsk_purge <-cleanup_net kworker/u128:0-8 [032] ...1. 174.244759: inet_twsk_purge <-tcp_sk_exit_batch kworker/u128:0-8 [034] ...1. 174.290861: cleanup_net <-process_one_work kworker/u128:0-8 [039] ...1. 175.245027: inet_twsk_purge <-cleanup_net kworker/u128:0-8 [046] ...1. 175.290541: inet_twsk_purge <-tcp_sk_exit_batch kworker/u128:0-8 [037] ...1. 175.321046: cleanup_net <-process_one_work kworker/u128:0-8 [024] ...1. 175.941633: inet_twsk_purge <-cleanup_net kworker/u128:0-8 [025] ...1. 176.242539: inet_twsk_purge <-tcp_sk_exit_batch After: # cat /sys/kernel/debug/tracing/trace_pipe kworker/u128:0-8 [038] ...1. 428.116174: cleanup_net <-process_one_work kworker/u128:0-8 [038] ...1. 428.262532: cleanup_net <-process_one_work kworker/u128:0-8 [030] ...1. 429.292645: cleanup_net <-process_one_work Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 1 + net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/tcp_minisocks.c | 15 +++++++++++++++ net/ipv6/tcp_ipv6.c | 2 +- 4 files changed, 18 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 735e957f7f4b..27e8d378c70a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -346,6 +346,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb); void tcp_rcv_space_adjust(struct sock *sk); int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp); void tcp_twsk_destructor(struct sock *sk); +void tcp_twsk_purge(struct list_head *net_exit_list, int family); ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 9aa186af6c44..73e6854ea662 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -3206,7 +3206,7 @@ static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) { struct net *net; - inet_twsk_purge(&tcp_hashinfo, AF_INET); + tcp_twsk_purge(net_exit_list, AF_INET); list_for_each_entry(net, net_exit_list, exit_list) { WARN_ON_ONCE(!refcount_dec_and_test(&net->ipv4.tcp_death_row.tw_refcount)); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 2e53981252d9..98c576d4b671 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -347,6 +347,21 @@ void tcp_twsk_destructor(struct sock *sk) } EXPORT_SYMBOL_GPL(tcp_twsk_destructor); +void tcp_twsk_purge(struct list_head *net_exit_list, int family) +{ + struct net *net; + + list_for_each_entry(net, net_exit_list, exit_list) { + /* The last refcount is decremented in tcp_sk_exit_batch() */ + if (refcount_read(&net->ipv4.tcp_death_row.tw_refcount) == 1) + continue; + + inet_twsk_purge(&tcp_hashinfo, family); + break; + } +} +EXPORT_SYMBOL_GPL(tcp_twsk_purge); + /* Warning : This function is called without sk_listener being locked. * Be sure to read socket fields once, as their value could change under us. */ diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f9ed47eb9b93..06beed4e23bf 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2229,7 +2229,7 @@ static void __net_exit tcpv6_net_exit(struct net *net) static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list) { - inet_twsk_purge(&tcp_hashinfo, AF_INET6); + tcp_twsk_purge(net_exit_list, AF_INET6); } static struct pernet_operations tcpv6_net_ops = { -- cgit v1.2.3-70-g09d2 From d1e5e6408b305ff78b825d437df8d3f77e82a4be Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 7 Sep 2022 18:10:22 -0700 Subject: tcp: Introduce optional per-netns ehash. The more sockets we have in the hash table, the longer we spend looking up the socket. While running a number of small workloads on the same host, they penalise each other and cause performance degradation. The root cause might be a single workload that consumes much more resources than the others. It often happens on a cloud service where different workloads share the same computing resource. On EC2 c5.24xlarge instance (196 GiB memory and 524288 (1Mi / 2) ehash entries), after running iperf3 in different netns, creating 24Mi sockets without data transfer in the root netns causes about 10% performance regression for the iperf3's connection. thash_entries sockets length Gbps 524288 1 1 50.7 24Mi 48 45.1 It is basically related to the length of the list of each hash bucket. For testing purposes to see how performance drops along the length, I set 131072 (1Mi / 8) to thash_entries, and here's the result. thash_entries sockets length Gbps 131072 1 1 50.7 1Mi 8 49.9 2Mi 16 48.9 4Mi 32 47.3 8Mi 64 44.6 16Mi 128 40.6 24Mi 192 36.3 32Mi 256 32.5 40Mi 320 27.0 48Mi 384 25.0 To resolve the socket lookup degradation, we introduce an optional per-netns hash table for TCP, but it's just ehash, and we still share the global bhash, bhash2 and lhash2. With a smaller ehash, we can look up non-listener sockets faster and isolate such noisy neighbours. In addition, we can reduce lock contention. We can control the ehash size by a new sysctl knob. However, depending on workloads, it will require very sensitive tuning, so we disable the feature by default (net.ipv4.tcp_child_ehash_entries == 0). Moreover, we can fall back to using the global ehash in case we fail to allocate enough memory for a new ehash. The maximum size is 16Mi, which is large enough that even if we have 48Mi sockets, the average list length is 3, and regression would be less than 1%. We can check the current ehash size by another read-only sysctl knob, net.ipv4.tcp_ehash_entries. A negative value means the netns shares the global ehash (per-netns ehash is disabled or failed to allocate memory). # dmesg | cut -d ' ' -f 5- | grep "established hash" TCP established hash table entries: 524288 (order: 10, 4194304 bytes, vmalloc hugepage) # sysctl net.ipv4.tcp_ehash_entries net.ipv4.tcp_ehash_entries = 524288 # can be changed by thash_entries # sysctl net.ipv4.tcp_child_ehash_entries net.ipv4.tcp_child_ehash_entries = 0 # disabled by default # ip netns add test1 # ip netns exec test1 sysctl net.ipv4.tcp_ehash_entries net.ipv4.tcp_ehash_entries = -524288 # share the global ehash # sysctl -w net.ipv4.tcp_child_ehash_entries=100 net.ipv4.tcp_child_ehash_entries = 100 # ip netns add test2 # ip netns exec test2 sysctl net.ipv4.tcp_ehash_entries net.ipv4.tcp_ehash_entries = 128 # own a per-netns ehash with 2^n buckets When more than two processes in the same netns create per-netns ehash concurrently with different sizes, we need to guarantee the size in one of the following ways: 1) Share the global ehash and create per-netns ehash First, unshare() with tcp_child_ehash_entries==0. It creates dedicated netns sysctl knobs where we can safely change tcp_child_ehash_entries and clone()/unshare() to create a per-netns ehash. 2) Control write on sysctl by BPF We can use BPF_PROG_TYPE_CGROUP_SYSCTL to allow/deny read/write on sysctl knobs. Note that the global ehash allocated at the boot time is spread over available NUMA nodes, but inet_pernet_hashinfo_alloc() will allocate pages for each per-netns ehash depending on the current process's NUMA policy. By default, the allocation is done in the local node only, so the per-netns hash table could fully reside on a random node. Thus, depending on the NUMA policy the netns is created with and the CPU the current thread is running on, we could see some performance differences for highly optimised networking applications. Note also that the default values of two sysctl knobs depend on the ehash size and should be tuned carefully: tcp_max_tw_buckets : tcp_child_ehash_entries / 2 tcp_max_syn_backlog : max(128, tcp_child_ehash_entries / 128) As a bonus, we can dismantle netns faster. Currently, while destroying netns, we call inet_twsk_purge(), which walks through the global ehash. It can be potentially big because it can have many sockets other than TIME_WAIT in all netns. Splitting ehash changes that situation, where it's only necessary for inet_twsk_purge() to clean up TIME_WAIT sockets in each netns. With regard to this, we do not free the per-netns ehash in inet_twsk_kill() to avoid UAF while iterating the per-netns ehash in inet_twsk_purge(). Instead, we do it in tcp_sk_exit_batch() after calling tcp_twsk_purge() to keep it protocol-family-independent. In the future, we could optimise ehash lookup/iteration further by removing netns comparison for the per-netns ehash. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- Documentation/networking/ip-sysctl.rst | 29 +++++++++++++++++++++ include/net/inet_hashtables.h | 6 +++++ include/net/netns/ipv4.h | 1 + net/dccp/proto.c | 2 ++ net/ipv4/inet_hashtables.c | 47 ++++++++++++++++++++++++++++++++++ net/ipv4/sysctl_net_ipv4.c | 39 ++++++++++++++++++++++++++++ net/ipv4/tcp.c | 1 + net/ipv4/tcp_ipv4.c | 38 ++++++++++++++++++++++----- net/ipv4/tcp_minisocks.c | 9 +++++-- 9 files changed, 164 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index a759872a2883..e7b3fa7bb3f7 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1040,6 +1040,35 @@ tcp_challenge_ack_limit - INTEGER TCP stack implements per TCP socket limits anyway. Default: INT_MAX (unlimited) +tcp_ehash_entries - INTEGER + Show the number of hash buckets for TCP sockets in the current + networking namespace. + + A negative value means the networking namespace does not own its + hash buckets and shares the initial networking namespace's one. + +tcp_child_ehash_entries - INTEGER + Control the number of hash buckets for TCP sockets in the child + networking namespace, which must be set before clone() or unshare(). + + If the value is not 0, the kernel uses a value rounded up to 2^n + as the actual hash bucket size. 0 is a special value, meaning + the child networking namespace will share the initial networking + namespace's hash buckets. + + Note that the child will use the global one in case the kernel + fails to allocate enough memory. In addition, the global hash + buckets are spread over available NUMA nodes, but the allocation + of the child hash table depends on the current process's NUMA + policy, which could result in performance differences. + + Note also that the default value of tcp_max_tw_buckets and + tcp_max_syn_backlog depend on the hash bucket size. + + Possible values: 0, 2^n (n: 0 - 24 (16Mi)) + + Default: 0 + UDP variables ============= diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 520dd894b73d..9121ccab1fa1 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -168,6 +168,8 @@ struct inet_hashinfo { /* The 2nd listener table hashed by local port and address */ unsigned int lhash2_mask; struct inet_listen_hashbucket *lhash2; + + bool pernet; }; static inline struct inet_hashinfo *tcp_or_dccp_get_hashinfo(const struct sock *sk) @@ -214,6 +216,10 @@ static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo) hashinfo->ehash_locks = NULL; } +struct inet_hashinfo *inet_pernet_hashinfo_alloc(struct inet_hashinfo *hashinfo, + unsigned int ehash_entries); +void inet_pernet_hashinfo_free(struct inet_hashinfo *hashinfo); + struct inet_bind_bucket * inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net, struct inet_bind_hashbucket *head, diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 2c7df93e3403..1b8004679445 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -171,6 +171,7 @@ struct netns_ipv4 { int sysctl_tcp_pacing_ca_ratio; int sysctl_tcp_wmem[3]; int sysctl_tcp_rmem[3]; + unsigned int sysctl_tcp_child_ehash_entries; unsigned long sysctl_tcp_comp_sack_delay_ns; unsigned long sysctl_tcp_comp_sack_slack_ns; int sysctl_max_syn_backlog; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 7cd4a6cc99fc..c548ca3e9b0e 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1197,6 +1197,8 @@ static int __init dccp_init(void) INIT_HLIST_HEAD(&dccp_hashinfo.bhash2[i].chain); } + dccp_hashinfo.pernet = false; + rc = dccp_mib_init(); if (rc) goto out_free_dccp_bhash2; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index c440de998910..74e64aad5114 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -1145,3 +1145,50 @@ int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo) return 0; } EXPORT_SYMBOL_GPL(inet_ehash_locks_alloc); + +struct inet_hashinfo *inet_pernet_hashinfo_alloc(struct inet_hashinfo *hashinfo, + unsigned int ehash_entries) +{ + struct inet_hashinfo *new_hashinfo; + int i; + + new_hashinfo = kmemdup(hashinfo, sizeof(*hashinfo), GFP_KERNEL); + if (!new_hashinfo) + goto err; + + new_hashinfo->ehash = vmalloc_huge(ehash_entries * sizeof(struct inet_ehash_bucket), + GFP_KERNEL_ACCOUNT); + if (!new_hashinfo->ehash) + goto free_hashinfo; + + new_hashinfo->ehash_mask = ehash_entries - 1; + + if (inet_ehash_locks_alloc(new_hashinfo)) + goto free_ehash; + + for (i = 0; i < ehash_entries; i++) + INIT_HLIST_NULLS_HEAD(&new_hashinfo->ehash[i].chain, i); + + new_hashinfo->pernet = true; + + return new_hashinfo; + +free_ehash: + vfree(new_hashinfo->ehash); +free_hashinfo: + kfree(new_hashinfo); +err: + return NULL; +} +EXPORT_SYMBOL_GPL(inet_pernet_hashinfo_alloc); + +void inet_pernet_hashinfo_free(struct inet_hashinfo *hashinfo) +{ + if (!hashinfo->pernet) + return; + + inet_ehash_locks_free(hashinfo); + vfree(hashinfo->ehash); + kfree(hashinfo); +} +EXPORT_SYMBOL_GPL(inet_pernet_hashinfo_free); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 4d7c110c772f..9b8a6db7a66b 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -39,6 +39,7 @@ static u32 u32_max_div_HZ = UINT_MAX / HZ; static int one_day_secs = 24 * 3600; static u32 fib_multipath_hash_fields_all_mask __maybe_unused = FIB_MULTIPATH_HASH_FIELD_ALL_MASK; +static unsigned int tcp_child_ehash_entries_max = 16 * 1024 * 1024; /* obsolete */ static int sysctl_tcp_low_latency __read_mostly; @@ -382,6 +383,29 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl, return ret; } +static int proc_tcp_ehash_entries(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct net *net = container_of(table->data, struct net, + ipv4.sysctl_tcp_child_ehash_entries); + struct inet_hashinfo *hinfo = net->ipv4.tcp_death_row.hashinfo; + int tcp_ehash_entries; + struct ctl_table tbl; + + tcp_ehash_entries = hinfo->ehash_mask + 1; + + /* A negative number indicates that the child netns + * shares the global ehash. + */ + if (!net_eq(net, &init_net) && !hinfo->pernet) + tcp_ehash_entries *= -1; + + tbl.data = &tcp_ehash_entries; + tbl.maxlen = sizeof(int); + + return proc_dointvec(&tbl, write, buffer, lenp, ppos); +} + #ifdef CONFIG_IP_ROUTE_MULTIPATH static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write, void *buffer, size_t *lenp, @@ -1320,6 +1344,21 @@ static struct ctl_table ipv4_net_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, + { + .procname = "tcp_ehash_entries", + .data = &init_net.ipv4.sysctl_tcp_child_ehash_entries, + .mode = 0444, + .proc_handler = proc_tcp_ehash_entries, + }, + { + .procname = "tcp_child_ehash_entries", + .data = &init_net.ipv4.sysctl_tcp_child_ehash_entries, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = &tcp_child_ehash_entries_max, + }, { .procname = "udp_rmem_min", .data = &init_net.ipv4.sysctl_udp_rmem_min, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8230be00ecca..829beee3fa32 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4790,6 +4790,7 @@ void __init tcp_init(void) INIT_HLIST_HEAD(&tcp_hashinfo.bhash2[i].chain); } + tcp_hashinfo.pernet = false; cnt = tcp_hashinfo.ehash_mask + 1; sysctl_tcp_max_orphans = cnt / 2; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 73e6854ea662..6376ad915765 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -3110,10 +3110,38 @@ static void __net_exit tcp_sk_exit(struct net *net) net->ipv4.tcp_congestion_control->owner); } -static int __net_init tcp_sk_init(struct net *net) +static void __net_init tcp_set_hashinfo(struct net *net) { - int cnt; + struct inet_hashinfo *hinfo; + unsigned int ehash_entries; + struct net *old_net; + + if (net_eq(net, &init_net)) + goto fallback; + + old_net = current->nsproxy->net_ns; + ehash_entries = READ_ONCE(old_net->ipv4.sysctl_tcp_child_ehash_entries); + if (!ehash_entries) + goto fallback; + + ehash_entries = roundup_pow_of_two(ehash_entries); + hinfo = inet_pernet_hashinfo_alloc(&tcp_hashinfo, ehash_entries); + if (!hinfo) { + pr_warn("Failed to allocate TCP ehash (entries: %u) " + "for a netns, fallback to the global one\n", + ehash_entries); +fallback: + hinfo = &tcp_hashinfo; + ehash_entries = tcp_hashinfo.ehash_mask + 1; + } + + net->ipv4.tcp_death_row.hashinfo = hinfo; + net->ipv4.tcp_death_row.sysctl_max_tw_buckets = ehash_entries / 2; + net->ipv4.sysctl_max_syn_backlog = max(128U, ehash_entries / 128); +} +static int __net_init tcp_sk_init(struct net *net) +{ net->ipv4.sysctl_tcp_ecn = 2; net->ipv4.sysctl_tcp_ecn_fallback = 1; @@ -3140,11 +3168,8 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_no_ssthresh_metrics_save = 1; refcount_set(&net->ipv4.tcp_death_row.tw_refcount, 1); - cnt = tcp_hashinfo.ehash_mask + 1; - net->ipv4.tcp_death_row.sysctl_max_tw_buckets = cnt / 2; - net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo; + tcp_set_hashinfo(net); - net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 128); net->ipv4.sysctl_tcp_sack = 1; net->ipv4.sysctl_tcp_window_scaling = 1; net->ipv4.sysctl_tcp_timestamps = 1; @@ -3209,6 +3234,7 @@ static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) tcp_twsk_purge(net_exit_list, AF_INET); list_for_each_entry(net, net_exit_list, exit_list) { + inet_pernet_hashinfo_free(net->ipv4.tcp_death_row.hashinfo); WARN_ON_ONCE(!refcount_dec_and_test(&net->ipv4.tcp_death_row.tw_refcount)); tcp_fastopen_ctx_destroy(net); } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 98c576d4b671..442838ab0253 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -349,6 +349,7 @@ EXPORT_SYMBOL_GPL(tcp_twsk_destructor); void tcp_twsk_purge(struct list_head *net_exit_list, int family) { + bool purged_once = false; struct net *net; list_for_each_entry(net, net_exit_list, exit_list) { @@ -356,8 +357,12 @@ void tcp_twsk_purge(struct list_head *net_exit_list, int family) if (refcount_read(&net->ipv4.tcp_death_row.tw_refcount) == 1) continue; - inet_twsk_purge(&tcp_hashinfo, family); - break; + if (net->ipv4.tcp_death_row.hashinfo->pernet) { + inet_twsk_purge(net->ipv4.tcp_death_row.hashinfo, family); + } else if (!purged_once) { + inet_twsk_purge(&tcp_hashinfo, family); + purged_once = true; + } } } EXPORT_SYMBOL_GPL(tcp_twsk_purge); -- cgit v1.2.3-70-g09d2 From 52bdae37c92ae10d47d54bd7cd39e0a17547ebfa Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Tue, 20 Sep 2022 08:15:22 -0600 Subject: bpf: Remove unused btf_struct_access stub This stub was not being used anywhere. Signed-off-by: Daniel Xu Link: https://lore.kernel.org/r/590e7bd6172ffe0f3d7b51cd40e8ded941aaf7e8.1663683114.git.dxu@dxuuu.xyz Signed-off-by: Martin KaFai Lau --- include/net/netfilter/nf_conntrack_bpf.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_bpf.h b/include/net/netfilter/nf_conntrack_bpf.h index a61a93d1c6dc..9c07d2d59da5 100644 --- a/include/net/netfilter/nf_conntrack_bpf.h +++ b/include/net/netfilter/nf_conntrack_bpf.h @@ -3,8 +3,6 @@ #ifndef _NF_CONNTRACK_BPF_H #define _NF_CONNTRACK_BPF_H -#include -#include #include #include @@ -31,16 +29,6 @@ static inline void cleanup_nf_conntrack_bpf(void) { } -static inline int nf_conntrack_btf_struct_access(struct bpf_verifier_log *log, - const struct btf *btf, - const struct btf_type *t, int off, - int size, enum bpf_access_type atype, - u32 *next_btf_id, - enum bpf_type_flag *flag) -{ - return -EACCES; -} - #endif #endif /* _NF_CONNTRACK_BPF_H */ -- cgit v1.2.3-70-g09d2 From 5a090aa35038e3dad1ee334e3c509c39e7599bb4 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Tue, 20 Sep 2022 08:15:23 -0600 Subject: bpf: Rename nfct_bsa to nfct_btf_struct_access The former name was a little hard to guess. Signed-off-by: Daniel Xu Link: https://lore.kernel.org/r/73adc72385c8b162391fbfb404f0b6d4c5cc55d7.1663683114.git.dxu@dxuuu.xyz Signed-off-by: Martin KaFai Lau --- include/net/netfilter/nf_conntrack_bpf.h | 8 ++++---- net/core/filter.c | 18 +++++++++--------- net/netfilter/nf_conntrack_bpf.c | 4 ++-- 3 files changed, 15 insertions(+), 15 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_bpf.h b/include/net/netfilter/nf_conntrack_bpf.h index 9c07d2d59da5..1199d4f8e019 100644 --- a/include/net/netfilter/nf_conntrack_bpf.h +++ b/include/net/netfilter/nf_conntrack_bpf.h @@ -13,10 +13,10 @@ extern int register_nf_conntrack_bpf(void); extern void cleanup_nf_conntrack_bpf(void); extern struct mutex nf_conn_btf_access_lock; -extern int (*nfct_bsa)(struct bpf_verifier_log *log, const struct btf *btf, - const struct btf_type *t, int off, int size, - enum bpf_access_type atype, u32 *next_btf_id, - enum bpf_type_flag *flag); +extern int (*nfct_btf_struct_access)(struct bpf_verifier_log *log, const struct btf *btf, + const struct btf_type *t, int off, int size, + enum bpf_access_type atype, u32 *next_btf_id, + enum bpf_type_flag *flag); #else diff --git a/net/core/filter.c b/net/core/filter.c index 4b2be211bcbe..2fd9449026aa 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -8608,11 +8608,11 @@ static bool tc_cls_act_is_valid_access(int off, int size, DEFINE_MUTEX(nf_conn_btf_access_lock); EXPORT_SYMBOL_GPL(nf_conn_btf_access_lock); -int (*nfct_bsa)(struct bpf_verifier_log *log, const struct btf *btf, - const struct btf_type *t, int off, int size, - enum bpf_access_type atype, u32 *next_btf_id, - enum bpf_type_flag *flag); -EXPORT_SYMBOL_GPL(nfct_bsa); +int (*nfct_btf_struct_access)(struct bpf_verifier_log *log, const struct btf *btf, + const struct btf_type *t, int off, int size, + enum bpf_access_type atype, u32 *next_btf_id, + enum bpf_type_flag *flag); +EXPORT_SYMBOL_GPL(nfct_btf_struct_access); static int tc_cls_act_btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf, @@ -8628,8 +8628,8 @@ static int tc_cls_act_btf_struct_access(struct bpf_verifier_log *log, flag); mutex_lock(&nf_conn_btf_access_lock); - if (nfct_bsa) - ret = nfct_bsa(log, btf, t, off, size, atype, next_btf_id, flag); + if (nfct_btf_struct_access) + ret = nfct_btf_struct_access(log, btf, t, off, size, atype, next_btf_id, flag); mutex_unlock(&nf_conn_btf_access_lock); return ret; @@ -8708,8 +8708,8 @@ static int xdp_btf_struct_access(struct bpf_verifier_log *log, flag); mutex_lock(&nf_conn_btf_access_lock); - if (nfct_bsa) - ret = nfct_bsa(log, btf, t, off, size, atype, next_btf_id, flag); + if (nfct_btf_struct_access) + ret = nfct_btf_struct_access(log, btf, t, off, size, atype, next_btf_id, flag); mutex_unlock(&nf_conn_btf_access_lock); return ret; diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c index 77eb8e959f61..29c4efb3da5e 100644 --- a/net/netfilter/nf_conntrack_bpf.c +++ b/net/netfilter/nf_conntrack_bpf.c @@ -502,7 +502,7 @@ int register_nf_conntrack_bpf(void) ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_kfunc_set); if (!ret) { mutex_lock(&nf_conn_btf_access_lock); - nfct_bsa = _nf_conntrack_btf_struct_access; + nfct_btf_struct_access = _nf_conntrack_btf_struct_access; mutex_unlock(&nf_conn_btf_access_lock); } @@ -512,6 +512,6 @@ int register_nf_conntrack_bpf(void) void cleanup_nf_conntrack_bpf(void) { mutex_lock(&nf_conn_btf_access_lock); - nfct_bsa = NULL; + nfct_btf_struct_access = NULL; mutex_unlock(&nf_conn_btf_access_lock); } -- cgit v1.2.3-70-g09d2 From fdf214978a71b2749d26f6da2b1d51d9ac23831d Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Tue, 20 Sep 2022 08:15:24 -0600 Subject: bpf: Move nf_conn extern declarations to filter.h We're seeing the following new warnings on netdev/build_32bit and netdev/build_allmodconfig_warn CI jobs: ../net/core/filter.c:8608:1: warning: symbol 'nf_conn_btf_access_lock' was not declared. Should it be static? ../net/core/filter.c:8611:5: warning: symbol 'nfct_bsa' was not declared. Should it be static? Fix by ensuring extern declaration is present while compiling filter.o. Fixes: 864b656f82cc ("bpf: Add support for writing to nf_conn:mark") Signed-off-by: Daniel Xu Link: https://lore.kernel.org/r/2bd2e0283df36d8a4119605878edb1838d144174.1663683114.git.dxu@dxuuu.xyz Signed-off-by: Martin KaFai Lau --- include/linux/filter.h | 6 ++++++ include/net/netfilter/nf_conntrack_bpf.h | 7 ------- net/netfilter/nf_conntrack_bpf.c | 1 + 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/linux/filter.h b/include/linux/filter.h index 75335432fcbc..98e28126c24b 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -567,6 +567,12 @@ struct sk_filter { DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key); +extern struct mutex nf_conn_btf_access_lock; +extern int (*nfct_btf_struct_access)(struct bpf_verifier_log *log, const struct btf *btf, + const struct btf_type *t, int off, int size, + enum bpf_access_type atype, u32 *next_btf_id, + enum bpf_type_flag *flag); + typedef unsigned int (*bpf_dispatcher_fn)(const void *ctx, const struct bpf_insn *insnsi, unsigned int (*bpf_func)(const void *, diff --git a/include/net/netfilter/nf_conntrack_bpf.h b/include/net/netfilter/nf_conntrack_bpf.h index 1199d4f8e019..c8b80add1142 100644 --- a/include/net/netfilter/nf_conntrack_bpf.h +++ b/include/net/netfilter/nf_conntrack_bpf.h @@ -4,7 +4,6 @@ #define _NF_CONNTRACK_BPF_H #include -#include #if (IS_BUILTIN(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \ (IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) @@ -12,12 +11,6 @@ extern int register_nf_conntrack_bpf(void); extern void cleanup_nf_conntrack_bpf(void); -extern struct mutex nf_conn_btf_access_lock; -extern int (*nfct_btf_struct_access)(struct bpf_verifier_log *log, const struct btf *btf, - const struct btf_type *t, int off, int size, - enum bpf_access_type atype, u32 *next_btf_id, - enum bpf_type_flag *flag); - #else static inline int register_nf_conntrack_bpf(void) diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c index 29c4efb3da5e..67df64283aef 100644 --- a/net/netfilter/nf_conntrack_bpf.c +++ b/net/netfilter/nf_conntrack_bpf.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3-70-g09d2 From fe0df81df51eb932a83e0c3844106ac6c0f914db Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Fri, 16 Sep 2022 10:02:43 +0800 Subject: net/sched: cls_api: add helper for tc cls walker stats dump The walk implementation of most tc cls modules is basically the same. That is, the values of count and skip are checked first. If count is greater than or equal to skip, the registered fn function is executed. Otherwise, increase the value of count. So we can reconstruct them. Signed-off-by: Zhengchao Shao Reviewed-by: Jamal Hadi Salim Reviewed-by: Victor Nogueira Tested-by: Victor Nogueira Signed-off-by: Jakub Kicinski --- include/net/pkt_cls.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index d9d90e6925e1..d376c995d906 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -81,6 +81,19 @@ int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode); +static inline bool tc_cls_stats_dump(struct tcf_proto *tp, + struct tcf_walker *arg, + void *filter) +{ + if (arg->count >= arg->skip && arg->fn(tp, filter, arg) < 0) { + arg->stop = 1; + return false; + } + + arg->count++; + return true; +} + #else static inline bool tcf_block_shared(struct tcf_block *block) { -- cgit v1.2.3-70-g09d2 From 1d14b30b5a5e52da93467af7c1dca08f124186df Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Mon, 19 Sep 2022 13:06:27 +0000 Subject: net: sched: remove unused tcf_result extension Added by: commit e5cf1baf92cb ("act_mirred: use TC_ACT_REINSERT when possible") but no longer useful. Signed-off-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20220919130627.3551233-1-jhs@mojatatu.com Signed-off-by: Jakub Kicinski --- include/net/sch_generic.h | 5 ----- net/sched/act_mirred.c | 3 +-- 2 files changed, 1 insertion(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 7dc83400bde4..32819299937d 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -326,11 +326,6 @@ struct tcf_result { }; const struct tcf_proto *goto_tp; - /* used in the skb_tc_reinsert function */ - struct { - bool ingress; - struct gnet_stats_queue *qstats; - }; }; }; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index f9c14d4188d4..b8ad6ae282c0 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -305,8 +305,7 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, /* let's the caller reinsert the packet, if possible */ if (use_reinsert) { - res->ingress = want_ingress; - err = tcf_mirred_forward(res->ingress, skb); + err = tcf_mirred_forward(want_ingress, skb); if (err) tcf_action_inc_overlimit_qstats(&m->common); __this_cpu_dec(mirred_rec_level); -- cgit v1.2.3-70-g09d2 From adb5c33e4d4c83fb848a402e2191fbf3e2bf50d1 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 14 Sep 2022 19:04:03 +0200 Subject: xfrm: add extack support to xfrm_dev_state_add Signed-off-by: Sabrina Dubroca Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 5 +++-- net/xfrm/xfrm_device.c | 20 +++++++++++++++----- net/xfrm/xfrm_user.c | 8 +++++--- 3 files changed, 23 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 28b988577ed2..9c1cccf85f12 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1886,7 +1886,8 @@ void xfrm_dev_resume(struct sk_buff *skb); void xfrm_dev_backlog(struct softnet_data *sd); struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again); int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, - struct xfrm_user_offload *xuo); + struct xfrm_user_offload *xuo, + struct netlink_ext_ack *extack); bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x); static inline void xfrm_dev_state_advance_esn(struct xfrm_state *x) @@ -1949,7 +1950,7 @@ static inline struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_fea return skb; } -static inline int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, struct xfrm_user_offload *xuo) +static inline int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, struct xfrm_user_offload *xuo, struct netlink_ext_ack *extack) { return 0; } diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 637ca8838436..5f5aafd418af 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -207,7 +207,8 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur EXPORT_SYMBOL_GPL(validate_xmit_xfrm); int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, - struct xfrm_user_offload *xuo) + struct xfrm_user_offload *xuo, + struct netlink_ext_ack *extack) { int err; struct dst_entry *dst; @@ -216,15 +217,21 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, xfrm_address_t *saddr; xfrm_address_t *daddr; - if (!x->type_offload) + if (!x->type_offload) { + NL_SET_ERR_MSG(extack, "Type doesn't support offload"); return -EINVAL; + } /* We don't yet support UDP encapsulation and TFC padding. */ - if (x->encap || x->tfcpad) + if (x->encap || x->tfcpad) { + NL_SET_ERR_MSG(extack, "Encapsulation and TFC padding can't be offloaded"); return -EINVAL; + } - if (xuo->flags & ~(XFRM_OFFLOAD_IPV6 | XFRM_OFFLOAD_INBOUND)) + if (xuo->flags & ~(XFRM_OFFLOAD_IPV6 | XFRM_OFFLOAD_INBOUND)) { + NL_SET_ERR_MSG(extack, "Unrecognized flags in offload request"); return -EINVAL; + } dev = dev_get_by_index(net, xuo->ifindex); if (!dev) { @@ -256,6 +263,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, if (x->props.flags & XFRM_STATE_ESN && !dev->xfrmdev_ops->xdo_dev_state_advance_esn) { + NL_SET_ERR_MSG(extack, "Device doesn't support offload with ESN"); xso->dev = NULL; dev_put(dev); return -EINVAL; @@ -277,8 +285,10 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, xso->real_dev = NULL; netdev_put(dev, &xso->dev_tracker); - if (err != -EOPNOTSUPP) + if (err != -EOPNOTSUPP) { + NL_SET_ERR_MSG(extack, "Device failed to offload this state"); return err; + } } return 0; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 3c150e1f8a2a..c56b9442dffe 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -652,7 +652,8 @@ static void xfrm_smark_init(struct nlattr **attrs, struct xfrm_mark *m) static struct xfrm_state *xfrm_state_construct(struct net *net, struct xfrm_usersa_info *p, struct nlattr **attrs, - int *errp) + int *errp, + struct netlink_ext_ack *extack) { struct xfrm_state *x = xfrm_state_alloc(net); int err = -ENOMEM; @@ -735,7 +736,8 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, /* configure the hardware if offload is requested */ if (attrs[XFRMA_OFFLOAD_DEV]) { err = xfrm_dev_state_add(net, x, - nla_data(attrs[XFRMA_OFFLOAD_DEV])); + nla_data(attrs[XFRMA_OFFLOAD_DEV]), + extack); if (err) goto error; } @@ -763,7 +765,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, if (err) return err; - x = xfrm_state_construct(net, p, attrs, &err); + x = xfrm_state_construct(net, p, attrs, &err, extack); if (!x) return err; -- cgit v1.2.3-70-g09d2 From 741f9a1064985512567eca1552643738ecfb5cc5 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 14 Sep 2022 19:04:05 +0200 Subject: xfrm: add extack to __xfrm_init_state Signed-off-by: Sabrina Dubroca Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 3 ++- net/xfrm/xfrm_state.c | 26 +++++++++++++++++++------- net/xfrm/xfrm_user.c | 2 +- 3 files changed, 22 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 9c1cccf85f12..f427a74d571b 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1582,7 +1582,8 @@ void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si); u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq); int xfrm_init_replay(struct xfrm_state *x); u32 xfrm_state_mtu(struct xfrm_state *x, int mtu); -int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload); +int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload, + struct netlink_ext_ack *extack); int xfrm_init_state(struct xfrm_state *x); int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); int xfrm_input_resume(struct sk_buff *skb, int nexthdr); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 52e60e607f8a..7470d2474796 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -2610,7 +2610,8 @@ u32 xfrm_state_mtu(struct xfrm_state *x, int mtu) } EXPORT_SYMBOL_GPL(xfrm_state_mtu); -int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload) +int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload, + struct netlink_ext_ack *extack) { const struct xfrm_mode *inner_mode; const struct xfrm_mode *outer_mode; @@ -2625,12 +2626,16 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload) if (x->sel.family != AF_UNSPEC) { inner_mode = xfrm_get_mode(x->props.mode, x->sel.family); - if (inner_mode == NULL) + if (inner_mode == NULL) { + NL_SET_ERR_MSG(extack, "Requested mode not found"); goto error; + } if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL) && - family != x->sel.family) + family != x->sel.family) { + NL_SET_ERR_MSG(extack, "Only tunnel modes can accommodate a change of family"); goto error; + } x->inner_mode = *inner_mode; } else { @@ -2638,11 +2643,15 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload) int iafamily = AF_INET; inner_mode = xfrm_get_mode(x->props.mode, x->props.family); - if (inner_mode == NULL) + if (inner_mode == NULL) { + NL_SET_ERR_MSG(extack, "Requested mode not found"); goto error; + } - if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL)) + if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL)) { + NL_SET_ERR_MSG(extack, "Only tunnel modes can accommodate an AF_UNSPEC selector"); goto error; + } x->inner_mode = *inner_mode; @@ -2657,8 +2666,10 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload) } x->type = xfrm_get_type(x->id.proto, family); - if (x->type == NULL) + if (x->type == NULL) { + NL_SET_ERR_MSG(extack, "Requested type not found"); goto error; + } x->type_offload = xfrm_get_type_offload(x->id.proto, family, offload); @@ -2668,6 +2679,7 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload) outer_mode = xfrm_get_mode(x->props.mode, family); if (!outer_mode) { + NL_SET_ERR_MSG(extack, "Requested mode not found"); err = -EPROTONOSUPPORT; goto error; } @@ -2689,7 +2701,7 @@ int xfrm_init_state(struct xfrm_state *x) { int err; - err = __xfrm_init_state(x, true, false); + err = __xfrm_init_state(x, true, false, NULL); if (!err) x->km.state = XFRM_STATE_VALID; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 2cf5956b562e..14e9b84f9dad 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -721,7 +721,7 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, if (attrs[XFRMA_IF_ID]) x->if_id = nla_get_u32(attrs[XFRMA_IF_ID]); - err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV]); + err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV], extack); if (err) goto error; -- cgit v1.2.3-70-g09d2 From 1cf9a3ae3e2de359471a7036f48ac59e48b15256 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 14 Sep 2022 19:04:06 +0200 Subject: xfrm: add extack support to xfrm_init_replay Signed-off-by: Sabrina Dubroca Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 2 +- net/xfrm/xfrm_replay.c | 10 +++++++--- net/xfrm/xfrm_state.c | 2 +- net/xfrm/xfrm_user.c | 2 +- 4 files changed, 10 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index f427a74d571b..c504d07bcb7c 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1580,7 +1580,7 @@ int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_vali void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si); void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si); u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq); -int xfrm_init_replay(struct xfrm_state *x); +int xfrm_init_replay(struct xfrm_state *x, struct netlink_ext_ack *extack); u32 xfrm_state_mtu(struct xfrm_state *x, int mtu); int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload, struct netlink_ext_ack *extack); diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index 9277d81b344c..9f4d42eb090f 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -766,18 +766,22 @@ int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb) } #endif -int xfrm_init_replay(struct xfrm_state *x) +int xfrm_init_replay(struct xfrm_state *x, struct netlink_ext_ack *extack) { struct xfrm_replay_state_esn *replay_esn = x->replay_esn; if (replay_esn) { if (replay_esn->replay_window > - replay_esn->bmp_len * sizeof(__u32) * 8) + replay_esn->bmp_len * sizeof(__u32) * 8) { + NL_SET_ERR_MSG(extack, "ESN replay window is too large for the chosen bitmap size"); return -EINVAL; + } if (x->props.flags & XFRM_STATE_ESN) { - if (replay_esn->replay_window == 0) + if (replay_esn->replay_window == 0) { + NL_SET_ERR_MSG(extack, "ESN replay window must be > 0"); return -EINVAL; + } x->repl_mode = XFRM_REPLAY_MODE_ESN; } else { x->repl_mode = XFRM_REPLAY_MODE_BMP; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 7470d2474796..0b59ff7985e6 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -2686,7 +2686,7 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload, x->outer_mode = *outer_mode; if (init_replay) { - err = xfrm_init_replay(x); + err = xfrm_init_replay(x, extack); if (err) goto error; } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 14e9b84f9dad..e73f9efc54c1 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -741,7 +741,7 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, /* sysctl_xfrm_aevent_etime is in 100ms units */ x->replay_maxage = (net->xfrm.sysctl_aevent_etime*HZ)/XFRM_AE_ETH_M; - if ((err = xfrm_init_replay(x))) + if ((err = xfrm_init_replay(x, extack))) goto error; /* override default values from above */ -- cgit v1.2.3-70-g09d2 From 77eee32514314209961af5c2982e871ecb364445 Mon Sep 17 00:00:00 2001 From: Wen Gu Date: Tue, 20 Sep 2022 17:52:21 +0800 Subject: net/smc: Introduce a specific sysctl for TEST_LINK time SMC-R tests the viability of link by sending out TEST_LINK LLC messages over RoCE fabric when connections on link have been idle for a time longer than keepalive interval (testlink time). But using tcp_keepalive_time as testlink time maybe not quite suitable because it is default no less than two hours[1], which is too long for single link to find peer dead. The active host will still use peer-dead link (QP) sending messages, and can't find out until get IB_WC_RETRY_EXC_ERR error CQEs, which takes more time than TEST_LINK timeout (SMC_LLC_WAIT_TIME) normally. So this patch introduces a independent sysctl for SMC-R to set link keepalive time, in order to detect link down in time. The default value is 30 seconds. [1] https://www.rfc-editor.org/rfc/rfc1122#page-101 Signed-off-by: Wen Gu Signed-off-by: Paolo Abeni --- Documentation/networking/smc-sysctl.rst | 7 +++++++ include/net/netns/smc.h | 1 + net/smc/smc_llc.c | 2 +- net/smc/smc_llc.h | 1 + net/smc/smc_sysctl.c | 9 +++++++++ 5 files changed, 19 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/Documentation/networking/smc-sysctl.rst b/Documentation/networking/smc-sysctl.rst index 742e90e6d822..45ba1522e189 100644 --- a/Documentation/networking/smc-sysctl.rst +++ b/Documentation/networking/smc-sysctl.rst @@ -34,3 +34,10 @@ smcr_buf_type - INTEGER - 1 - Use virtually contiguous buffers - 2 - Mixed use of the two types. Try physically contiguous buffers first. If not available, use virtually contiguous buffers then. + +smcr_testlink_time - INTEGER + How frequently SMC-R link sends out TEST_LINK LLC messages to confirm + viability, after the last activity of connections on it. Value 0 means + disabling TEST_LINK. + + Default: 30 seconds. diff --git a/include/net/netns/smc.h b/include/net/netns/smc.h index 2adbe2b245df..d295e2c10dca 100644 --- a/include/net/netns/smc.h +++ b/include/net/netns/smc.h @@ -19,5 +19,6 @@ struct netns_smc { #endif unsigned int sysctl_autocorking_size; unsigned int sysctl_smcr_buf_type; + int sysctl_smcr_testlink_time; }; #endif diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 175026ae33ae..524649d0ab65 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -2127,7 +2127,7 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc) init_waitqueue_head(&lgr->llc_flow_waiter); init_waitqueue_head(&lgr->llc_msg_waiter); mutex_init(&lgr->llc_conf_mutex); - lgr->llc_testlink_time = READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time); + lgr->llc_testlink_time = READ_ONCE(net->smc.sysctl_smcr_testlink_time); } /* called after lgr was removed from lgr_list */ diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h index 4404e52b3346..7e7a3162c68b 100644 --- a/net/smc/smc_llc.h +++ b/net/smc/smc_llc.h @@ -19,6 +19,7 @@ #define SMC_LLC_WAIT_FIRST_TIME (5 * HZ) #define SMC_LLC_WAIT_TIME (2 * HZ) +#define SMC_LLC_TESTLINK_DEFAULT_TIME (30 * HZ) enum smc_llc_reqresp { SMC_LLC_REQ, diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c index 0613868fdb97..3224d303cc9d 100644 --- a/net/smc/smc_sysctl.c +++ b/net/smc/smc_sysctl.c @@ -16,6 +16,7 @@ #include "smc.h" #include "smc_core.h" +#include "smc_llc.h" #include "smc_sysctl.h" static struct ctl_table smc_table[] = { @@ -35,6 +36,13 @@ static struct ctl_table smc_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_TWO, }, + { + .procname = "smcr_testlink_time", + .data = &init_net.smc.sysctl_smcr_testlink_time, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, { } }; @@ -60,6 +68,7 @@ int __net_init smc_sysctl_net_init(struct net *net) net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE; net->smc.sysctl_smcr_buf_type = SMCR_PHYS_CONT_BUFS; + net->smc.sysctl_smcr_testlink_time = SMC_LLC_TESTLINK_DEFAULT_TIME; return 0; -- cgit v1.2.3-70-g09d2 From 0227f058aa29f5ab6f6ec79c3a36ae41f1e03a13 Mon Sep 17 00:00:00 2001 From: Tony Lu Date: Tue, 20 Sep 2022 17:52:22 +0800 Subject: net/smc: Unbind r/w buffer size from clcsock and make them tunable Currently, SMC uses smc->sk.sk_{rcv|snd}buf to create buffers for send buffer and RMB. And the values of buffer size are from tcp_{w|r}mem in clcsock. The buffer size from TCP socket doesn't fit SMC well. Generally, buffers are usually larger than TCP for SMC-R/-D to get higher performance, for they are different underlay devices and paths. So this patch unbinds buffer size from TCP, and introduces two sysctl knobs to tune them independently. Also, these knobs are per net namespace and work for containers. Signed-off-by: Tony Lu Signed-off-by: Paolo Abeni --- Documentation/networking/smc-sysctl.rst | 18 ++++++++++++++++++ include/net/netns/smc.h | 2 ++ net/smc/af_smc.c | 5 ++--- net/smc/smc_core.c | 8 ++++---- net/smc/smc_sysctl.c | 21 +++++++++++++++++++++ 5 files changed, 47 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/Documentation/networking/smc-sysctl.rst b/Documentation/networking/smc-sysctl.rst index 45ba1522e189..6d8acdbe9be1 100644 --- a/Documentation/networking/smc-sysctl.rst +++ b/Documentation/networking/smc-sysctl.rst @@ -41,3 +41,21 @@ smcr_testlink_time - INTEGER disabling TEST_LINK. Default: 30 seconds. + +wmem - INTEGER + Initial size of send buffer used by SMC sockets. + The default value inherits from net.ipv4.tcp_wmem[1]. + + The minimum value is 16KiB and there is no hard limit for max value, but + only allowed 512KiB for SMC-R and 1MiB for SMC-D. + + Default: 16K + +rmem - INTEGER + Initial size of receive buffer (RMB) used by SMC sockets. + The default value inherits from net.ipv4.tcp_rmem[1]. + + The minimum value is 16KiB and there is no hard limit for max value, but + only allowed 512KiB for SMC-R and 1MiB for SMC-D. + + Default: 128K diff --git a/include/net/netns/smc.h b/include/net/netns/smc.h index d295e2c10dca..582212ada3ba 100644 --- a/include/net/netns/smc.h +++ b/include/net/netns/smc.h @@ -20,5 +20,7 @@ struct netns_smc { unsigned int sysctl_autocorking_size; unsigned int sysctl_smcr_buf_type; int sysctl_smcr_testlink_time; + int sysctl_wmem; + int sysctl_rmem; }; #endif diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 0939cc3b915a..e44ca7009632 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -379,6 +379,8 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock, sk->sk_state = SMC_INIT; sk->sk_destruct = smc_destruct; sk->sk_protocol = protocol; + WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(net->smc.sysctl_wmem)); + WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(net->smc.sysctl_rmem)); smc = smc_sk(sk); INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); INIT_WORK(&smc->connect_work, smc_connect_work); @@ -3253,9 +3255,6 @@ static int __smc_create(struct net *net, struct socket *sock, int protocol, smc->clcsock = clcsock; } - smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE); - smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE); - out: return rc; } diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index ebf56cdf17db..ea41f224f644 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -2307,10 +2307,10 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) if (is_rmb) /* use socket recv buffer size (w/o overhead) as start value */ - sk_buf_size = smc->sk.sk_rcvbuf / 2; + sk_buf_size = smc->sk.sk_rcvbuf; else /* use socket send buffer size (w/o overhead) as start value */ - sk_buf_size = smc->sk.sk_sndbuf / 2; + sk_buf_size = smc->sk.sk_sndbuf; for (bufsize_short = smc_compress_bufsize(sk_buf_size, is_smcd, is_rmb); bufsize_short >= 0; bufsize_short--) { @@ -2369,7 +2369,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) if (is_rmb) { conn->rmb_desc = buf_desc; conn->rmbe_size_short = bufsize_short; - smc->sk.sk_rcvbuf = bufsize * 2; + smc->sk.sk_rcvbuf = bufsize; atomic_set(&conn->bytes_to_rcv, 0); conn->rmbe_update_limit = smc_rmb_wnd_update_limit(buf_desc->len); @@ -2377,7 +2377,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */ } else { conn->sndbuf_desc = buf_desc; - smc->sk.sk_sndbuf = bufsize * 2; + smc->sk.sk_sndbuf = bufsize; atomic_set(&conn->sndbuf_space, bufsize); } return 0; diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c index 3224d303cc9d..b6f79fabb9d3 100644 --- a/net/smc/smc_sysctl.c +++ b/net/smc/smc_sysctl.c @@ -19,6 +19,9 @@ #include "smc_llc.h" #include "smc_sysctl.h" +static int min_sndbuf = SMC_BUF_MIN_SIZE; +static int min_rcvbuf = SMC_BUF_MIN_SIZE; + static struct ctl_table smc_table[] = { { .procname = "autocorking_size", @@ -43,6 +46,22 @@ static struct ctl_table smc_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, + { + .procname = "wmem", + .data = &init_net.smc.sysctl_wmem, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &min_sndbuf, + }, + { + .procname = "rmem", + .data = &init_net.smc.sysctl_rmem, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &min_rcvbuf, + }, { } }; @@ -69,6 +88,8 @@ int __net_init smc_sysctl_net_init(struct net *net) net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE; net->smc.sysctl_smcr_buf_type = SMCR_PHYS_CONT_BUFS; net->smc.sysctl_smcr_testlink_time = SMC_LLC_TESTLINK_DEFAULT_TIME; + WRITE_ONCE(net->smc.sysctl_wmem, READ_ONCE(net->ipv4.sysctl_tcp_wmem[1])); + WRITE_ONCE(net->smc.sysctl_rmem, READ_ONCE(net->ipv4.sysctl_tcp_rmem[1])); return 0; -- cgit v1.2.3-70-g09d2 From 60240bc26114543fcbfcd8a28466e67e77b20388 Mon Sep 17 00:00:00 2001 From: Jalal Mostafa Date: Wed, 21 Sep 2022 13:57:01 +0000 Subject: xsk: Inherit need_wakeup flag for shared sockets The flag for need_wakeup is not set for xsks with `XDP_SHARED_UMEM` flag and of different queue ids and/or devices. They should inherit the flag from the first socket buffer pool since no flags can be specified once `XDP_SHARED_UMEM` is specified. Fixes: b5aea28dca134 ("xsk: Add shared umem support between queue ids") Signed-off-by: Jalal Mostafa Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/20220921135701.10199-1-jalal.a.mostapha@gmail.com --- include/net/xsk_buff_pool.h | 2 +- net/xdp/xsk.c | 4 ++-- net/xdp/xsk_buff_pool.c | 5 +++-- 3 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index 647722e847b4..f787c3f524b0 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -95,7 +95,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, struct xdp_umem *umem); int xp_assign_dev(struct xsk_buff_pool *pool, struct net_device *dev, u16 queue_id, u16 flags); -int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem, +int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_sock *umem_xs, struct net_device *dev, u16 queue_id); int xp_alloc_tx_descs(struct xsk_buff_pool *pool, struct xdp_sock *xs); void xp_destroy(struct xsk_buff_pool *pool); diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 639b2c3beb69..9f0561b67c12 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -954,8 +954,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) goto out_unlock; } - err = xp_assign_dev_shared(xs->pool, umem_xs->umem, - dev, qid); + err = xp_assign_dev_shared(xs->pool, umem_xs, dev, + qid); if (err) { xp_destroy(xs->pool); xs->pool = NULL; diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index a71a8c6edf55..ed6c71826d31 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -212,17 +212,18 @@ err_unreg_pool: return err; } -int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem, +int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_sock *umem_xs, struct net_device *dev, u16 queue_id) { u16 flags; + struct xdp_umem *umem = umem_xs->umem; /* One fill and completion ring required for each queue id. */ if (!pool->fq || !pool->cq) return -EINVAL; flags = umem->zc ? XDP_ZEROCOPY : XDP_COPY; - if (pool->uses_need_wakeup) + if (umem_xs->pool->uses_need_wakeup) flags |= XDP_USE_NEED_WAKEUP; return xp_assign_dev(pool, dev, queue_id, flags); -- cgit v1.2.3-70-g09d2 From 2d2c5ea24243eb3ed12f232b2aef43981fa15360 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Tue, 20 Sep 2022 16:01:47 +0300 Subject: net/tls: Describe ciphers sizes by const structs Introduce cipher sizes descriptor. It helps reducing the amount of code duplications and repeated switch/cases that assigns the proper sizes according to the cipher type. Signed-off-by: Tariq Toukan Signed-off-by: Gal Pressman Signed-off-by: Jakub Kicinski --- include/net/tls.h | 10 ++++++++++ net/tls/tls_main.c | 17 +++++++++++++++++ 2 files changed, 27 insertions(+) (limited to 'include/net') diff --git a/include/net/tls.h b/include/net/tls.h index cb205f9d9473..154949c7b0c8 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -51,6 +51,16 @@ struct tls_rec; +struct tls_cipher_size_desc { + unsigned int iv; + unsigned int key; + unsigned int salt; + unsigned int tag; + unsigned int rec_seq; +}; + +extern const struct tls_cipher_size_desc tls_cipher_size_desc[]; + /* Maximum data size carried in a TLS record */ #define TLS_MAX_PAYLOAD_SIZE ((size_t)1 << 14) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 08ddf9d837ae..5cc6911cc97d 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -58,6 +58,23 @@ enum { TLS_NUM_PROTS, }; +#define CIPHER_SIZE_DESC(cipher) [cipher] = { \ + .iv = cipher ## _IV_SIZE, \ + .key = cipher ## _KEY_SIZE, \ + .salt = cipher ## _SALT_SIZE, \ + .tag = cipher ## _TAG_SIZE, \ + .rec_seq = cipher ## _REC_SEQ_SIZE, \ +} + +const struct tls_cipher_size_desc tls_cipher_size_desc[] = { + CIPHER_SIZE_DESC(TLS_CIPHER_AES_GCM_128), + CIPHER_SIZE_DESC(TLS_CIPHER_AES_GCM_256), + CIPHER_SIZE_DESC(TLS_CIPHER_AES_CCM_128), + CIPHER_SIZE_DESC(TLS_CIPHER_CHACHA20_POLY1305), + CIPHER_SIZE_DESC(TLS_CIPHER_SM4_GCM), + CIPHER_SIZE_DESC(TLS_CIPHER_SM4_CCM), +}; + static const struct proto *saved_tcpv6_prot; static DEFINE_MUTEX(tcpv6_prot_mutex); static const struct proto *saved_tcpv4_prot; -- cgit v1.2.3-70-g09d2 From d7a68e564e29bcd1c70d76d9e2f65591e6183f46 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Wed, 21 Sep 2022 10:41:04 +0800 Subject: net/sched: sch_api: add helper for tc qdisc walker stats dump MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The walk implementation of most qdisc class modules is basically the same. That is, the values of count and skip are checked first. If count is greater than or equal to skip, the registered fn function is executed. Otherwise, increase the value of count. So we can reconstruct them. Signed-off-by: Zhengchao Shao Acked-by: Toke Høiland-Jørgensen Signed-off-by: Jakub Kicinski --- include/net/pkt_sched.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/net') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 29f65632ebc5..2ff80cd04c5c 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -222,4 +222,17 @@ static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb) return cb; } +static inline bool tc_qdisc_stats_dump(struct Qdisc *sch, + unsigned long cl, + struct qdisc_walker *arg) +{ + if (arg->count >= arg->skip && arg->fn(sch, cl, arg) < 0) { + arg->stop = 1; + return false; + } + + arg->count++; + return true; +} + #endif -- cgit v1.2.3-70-g09d2 From 99383f1298ee25901b1f6a665bdcc3344acb2382 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Wed, 21 Sep 2022 15:51:18 +0200 Subject: net: macsec: remove the prepare flag from the MACsec offloading context Now that the MACsec offloading preparation phase was removed from the MACsec core implementation as well as from drivers implementing it, we can safely remove the flag representing it. Signed-off-by: Antoine Tenart Signed-off-by: Jakub Kicinski --- drivers/net/macsec.c | 1 - include/net/macsec.h | 2 -- 2 files changed, 3 deletions(-) (limited to 'include/net') diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 160976929dfe..8193ab39206f 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -1663,7 +1663,6 @@ static int macsec_offload(int (* const func)(struct macsec_context *), if (ctx->offload == MACSEC_OFFLOAD_PHY) mutex_lock(&ctx->phydev->lock); - ctx->prepare = false; ret = (*func)(ctx); if (ctx->offload == MACSEC_OFFLOAD_PHY) diff --git a/include/net/macsec.h b/include/net/macsec.h index 871599b11707..5b9c61c4d3a6 100644 --- a/include/net/macsec.h +++ b/include/net/macsec.h @@ -271,8 +271,6 @@ struct macsec_context { struct macsec_rx_sa_stats *rx_sa_stats; struct macsec_dev_stats *dev_stats; } stats; - - u8 prepare:1; }; /** -- cgit v1.2.3-70-g09d2 From c8f01a4a54473f88f8cc0d9046ec9eb5a99815d5 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Thu, 22 Sep 2022 16:38:55 +0800 Subject: neighbour: Remove unused inline function neigh_key_eq16() All uses of neigh_key_eq16() have been removed since commit 1202cdd66531 ("Remove DECnet support from kernel"), so remove it. Signed-off-by: Gaosheng Cui Signed-off-by: Jakub Kicinski --- include/net/neighbour.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/net') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 3827a6b395fd..20745cf7ae1a 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -276,11 +276,6 @@ static inline void *neighbour_priv(const struct neighbour *n) extern const struct nla_policy nda_policy[]; -static inline bool neigh_key_eq16(const struct neighbour *n, const void *pkey) -{ - return *(const u16 *)n->primary_key == *(const u16 *)pkey; -} - static inline bool neigh_key_eq32(const struct neighbour *n, const void *pkey) { return *(const u32 *)n->primary_key == *(const u32 *)pkey; -- cgit v1.2.3-70-g09d2 From d6755f37abfd009e7ca3520c319c93d14cae54b3 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Thu, 22 Sep 2022 16:38:56 +0800 Subject: net: Remove unused inline function sk_nulls_node_init() All uses of sk_nulls_node_init() have been removed since commit dbca1596bbb0 ("ping: convert to RCU lookups, get rid of rwlock"), so remove it. Signed-off-by: Gaosheng Cui Signed-off-by: Jakub Kicinski --- include/net/sock.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 96a31026e35d..08038a385ef2 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -744,11 +744,6 @@ static inline void sk_node_init(struct hlist_node *node) node->pprev = NULL; } -static inline void sk_nulls_node_init(struct hlist_nulls_node *node) -{ - node->pprev = NULL; -} - static inline void __sk_del_node(struct sock *sk) { __hlist_del(&sk->sk_node); -- cgit v1.2.3-70-g09d2 From 0b81882ddf8ac2743f657afb001beec7fc3929af Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Thu, 22 Sep 2022 16:38:57 +0800 Subject: net: Remove unused inline function dst_hold_and_use() All uses of dst_hold_and_use() have been removed since commit 1202cdd66531 ("Remove DECnet support from kernel"), so remove it. Signed-off-by: Gaosheng Cui Signed-off-by: Jakub Kicinski --- include/net/dst.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index 6aa252c3fc55..00b479ce6b99 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -239,12 +239,6 @@ static inline void dst_use_noref(struct dst_entry *dst, unsigned long time) } } -static inline void dst_hold_and_use(struct dst_entry *dst, unsigned long time) -{ - dst_hold(dst); - dst_use_noref(dst, time); -} - static inline struct dst_entry *dst_clone(struct dst_entry *dst) { if (dst) -- cgit v1.2.3-70-g09d2 From b860a1b964be7917ed3dcaa674ec0a2ba0a62aa0 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 23 Sep 2022 14:48:00 +0200 Subject: xdp: Adjust xdp_frame layout to avoid using bitfields Practical experience (and advice from Alexei) tell us that bitfields in structs lead to un-optimized assembly code. I've verified this change does lead to better x86_64 assembly, both via objdump and playing with code snippets in godbolt.org. Using scripts/bloat-o-meter shows the code size is reduced with 24 bytes for xdp_convert_buff_to_frame() that gets inlined e.g. in i40e_xmit_xdp_tx_ring() which were used for microbenchmarking. Microbenchmarking results do show improvements, but very small and varying between 0.5 to 2 nanosec improvement per packet. The member @metasize is changed from u8 to u32. Future users of this area could split this into two u16 fields. I've also benchmarked with two u16 fields showing equal performance gains and code size reduction. The moved member @frame_sz doesn't change sizeof struct due to existing padding. Like xdp_buff member @frame_sz is placed next to @flags, which allows compiler to optimize assignment of these. Signed-off-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/r/166393728005.2213882.4162674859542409548.stgit@firesoul Signed-off-by: Jakub Kicinski --- include/net/xdp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/xdp.h b/include/net/xdp.h index 04c852c7a77f..55dbc68bfffc 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -164,13 +164,13 @@ struct xdp_frame { void *data; u16 len; u16 headroom; - u32 metasize:8; - u32 frame_sz:24; + u32 metasize; /* uses lower 8-bits */ /* Lifetime of xdp_rxq_info is limited to NAPI/enqueue time, * while mem info is valid on remote CPU. */ struct xdp_mem_info mem; struct net_device *dev_rx; /* used by cpumap */ + u32 frame_sz; u32 flags; /* supported values defined in xdp_buff_flags */ }; -- cgit v1.2.3-70-g09d2 From 63a8bf85568b30438431d4d78afb2fde4a280760 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 26 Sep 2022 18:02:20 -0500 Subject: netns: Replace zero-length array with DECLARE_FLEX_ARRAY() helper Zero-length arrays are deprecated and we are moving towards adopting C99 flexible-array members, instead. So, replace zero-length arrays declarations in anonymous union with the new DECLARE_FLEX_ARRAY() helper macro. This helper allows for flexible-array members in unions. Link: https://github.com/KSPP/linux/issues/193 Link: https://github.com/KSPP/linux/issues/225 Link: https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html Signed-off-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/YzIvfGXxfjdXmIS3@work Signed-off-by: Jakub Kicinski --- include/net/netns/generic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netns/generic.h b/include/net/netns/generic.h index 7ce68183f6e1..00c399edeed1 100644 --- a/include/net/netns/generic.h +++ b/include/net/netns/generic.h @@ -33,7 +33,7 @@ struct net_generic { struct rcu_head rcu; } s; - void *ptr[0]; + DECLARE_FLEX_ARRAY(void *, ptr); }; }; -- cgit v1.2.3-70-g09d2 From 3242abeb8da7071fa40d32346ed36343bee33b80 Mon Sep 17 00:00:00 2001 From: Benjamin Hesmans Date: Mon, 26 Sep 2022 16:27:37 -0700 Subject: tcp: export tcp_sendmsg_fastopen It will be used to support TCP FastOpen with MPTCP in the following commit. Acked-by: Paolo Abeni Reviewed-by: Matthieu Baerts Co-developed-by: Dmytro Shytyi Signed-off-by: Dmytro Shytyi Signed-off-by: Benjamin Hesmans Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 2 ++ net/ipv4/tcp.c | 5 ++--- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 27e8d378c70a..4f71cc15ff8e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -327,6 +327,8 @@ void tcp_remove_empty_skb(struct sock *sk); int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size); +int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied, + size_t size, struct ubuf_info *uarg); int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a109a4cb1e47..648b5c54bb32 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1162,9 +1162,8 @@ void tcp_free_fastopen_req(struct tcp_sock *tp) } } -static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, - int *copied, size_t size, - struct ubuf_info *uarg) +int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied, + size_t size, struct ubuf_info *uarg) { struct tcp_sock *tp = tcp_sk(sk); struct inet_sock *inet = inet_sk(sk); -- cgit v1.2.3-70-g09d2 From 5456262d2baa43c38e0c770543d5a31b0942f41c Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Mon, 26 Sep 2022 17:25:44 -0700 Subject: net: Fix incorrect address comparison when searching for a bind2 bucket The v6_rcv_saddr and rcv_saddr are inside a union in the 'struct inet_bind2_bucket'. When searching a bucket by following the bhash2 hashtable chain, eg. inet_bind2_bucket_match, it is only using the sk->sk_family and there is no way to check if the inet_bind2_bucket has a v6 or v4 address in the union. This leads to an uninit-value KMSAN report in [0] and also potentially incorrect matches. This patch fixes it by adding a family member to the inet_bind2_bucket and then tests 'sk->sk_family != tb->family' before matching the sk's address to the tb's address. Cc: Joanne Koong Fixes: 28044fc1d495 ("net: Add a bhash2 table hashed by port and address") Signed-off-by: Martin KaFai Lau Reviewed-by: Eric Dumazet Tested-by: Alexander Potapenko Link: https://lore.kernel.org/r/20220927002544.3381205-1-kafai@fb.com Signed-off-by: Jakub Kicinski --- include/net/inet_hashtables.h | 3 +++ net/ipv4/inet_hashtables.c | 10 ++++++++++ 2 files changed, 13 insertions(+) (limited to 'include/net') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 9121ccab1fa1..3af1e927247d 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -95,6 +95,9 @@ struct inet_bind2_bucket { possible_net_t ib_net; int l3mdev; unsigned short port; +#if IS_ENABLED(CONFIG_IPV6) + unsigned short family; +#endif union { #if IS_ENABLED(CONFIG_IPV6) struct in6_addr v6_rcv_saddr; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 74e64aad5114..49db8c597eea 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -109,6 +109,7 @@ static void inet_bind2_bucket_init(struct inet_bind2_bucket *tb, tb->l3mdev = l3mdev; tb->port = port; #if IS_ENABLED(CONFIG_IPV6) + tb->family = sk->sk_family; if (sk->sk_family == AF_INET6) tb->v6_rcv_saddr = sk->sk_v6_rcv_saddr; else @@ -146,6 +147,9 @@ static bool inet_bind2_bucket_addr_match(const struct inet_bind2_bucket *tb2, const struct sock *sk) { #if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family != tb2->family) + return false; + if (sk->sk_family == AF_INET6) return ipv6_addr_equal(&tb2->v6_rcv_saddr, &sk->sk_v6_rcv_saddr); @@ -791,6 +795,9 @@ static bool inet_bind2_bucket_match(const struct inet_bind2_bucket *tb, int l3mdev, const struct sock *sk) { #if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family != tb->family) + return false; + if (sk->sk_family == AF_INET6) return net_eq(ib2_net(tb), net) && tb->port == port && tb->l3mdev == l3mdev && @@ -807,6 +814,9 @@ bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket *tb, const #if IS_ENABLED(CONFIG_IPV6) struct in6_addr addr_any = {}; + if (sk->sk_family != tb->family) + return false; + if (sk->sk_family == AF_INET6) return net_eq(ib2_net(tb), net) && tb->port == port && tb->l3mdev == l3mdev && -- cgit v1.2.3-70-g09d2 From e1e10b44cf284248fb099681f48cc723564a1cc8 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 27 Sep 2022 17:45:29 +0200 Subject: xfrm: pass extack down to xfrm_type ->init_state Signed-off-by: Sabrina Dubroca Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 3 ++- net/ipv4/ah4.c | 2 +- net/ipv4/esp4.c | 2 +- net/ipv4/ipcomp.c | 3 ++- net/ipv4/xfrm4_tunnel.c | 2 +- net/ipv6/ah6.c | 2 +- net/ipv6/esp6.c | 2 +- net/ipv6/ipcomp6.c | 3 ++- net/ipv6/mip6.c | 4 ++-- net/ipv6/xfrm6_tunnel.c | 2 +- net/xfrm/xfrm_state.c | 2 +- 11 files changed, 15 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index c504d07bcb7c..dbc81f5eb553 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -405,7 +405,8 @@ struct xfrm_type { #define XFRM_TYPE_LOCAL_COADDR 4 #define XFRM_TYPE_REMOTE_COADDR 8 - int (*init_state)(struct xfrm_state *x); + int (*init_state)(struct xfrm_state *x, + struct netlink_ext_ack *extack); void (*destructor)(struct xfrm_state *); int (*input)(struct xfrm_state *, struct sk_buff *skb); int (*output)(struct xfrm_state *, struct sk_buff *pskb); diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index f8ad04470d3a..babefff15de3 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -471,7 +471,7 @@ static int ah4_err(struct sk_buff *skb, u32 info) return 0; } -static int ah_init_state(struct xfrm_state *x) +static int ah_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack) { struct ah_data *ahp = NULL; struct xfrm_algo_desc *aalg_desc; diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 5c03eba787e5..bc2b2c5717b5 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -1131,7 +1131,7 @@ error: return err; } -static int esp_init_state(struct xfrm_state *x) +static int esp_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack) { struct crypto_aead *aead; u32 align; diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 366094c1ce6c..230d1120874f 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -117,7 +117,8 @@ out: return err; } -static int ipcomp4_init_state(struct xfrm_state *x) +static int ipcomp4_init_state(struct xfrm_state *x, + struct netlink_ext_ack *extack) { int err = -EINVAL; diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c index 9d4f418f1bf8..08826e0d7962 100644 --- a/net/ipv4/xfrm4_tunnel.c +++ b/net/ipv4/xfrm4_tunnel.c @@ -22,7 +22,7 @@ static int ipip_xfrm_rcv(struct xfrm_state *x, struct sk_buff *skb) return ip_hdr(skb)->protocol; } -static int ipip_init_state(struct xfrm_state *x) +static int ipip_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack) { if (x->props.mode != XFRM_MODE_TUNNEL) return -EINVAL; diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index b5995c1f4d7a..f5bc0d4b37ad 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -666,7 +666,7 @@ static int ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; } -static int ah6_init_state(struct xfrm_state *x) +static int ah6_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack) { struct ah_data *ahp = NULL; struct xfrm_algo_desc *aalg_desc; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 8220923a12f7..2ca9b7b7e500 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -1174,7 +1174,7 @@ error: return err; } -static int esp6_init_state(struct xfrm_state *x) +static int esp6_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack) { struct crypto_aead *aead; u32 align; diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 15f984be3570..7e47009739e9 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -136,7 +136,8 @@ out: return err; } -static int ipcomp6_init_state(struct xfrm_state *x) +static int ipcomp6_init_state(struct xfrm_state *x, + struct netlink_ext_ack *extack) { int err = -EINVAL; diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index aeb35d26e474..3d87ae88ebfd 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -247,7 +247,7 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, return err; } -static int mip6_destopt_init_state(struct xfrm_state *x) +static int mip6_destopt_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack) { if (x->id.spi) { pr_info("%s: spi is not 0: %u\n", __func__, x->id.spi); @@ -333,7 +333,7 @@ static int mip6_rthdr_output(struct xfrm_state *x, struct sk_buff *skb) return 0; } -static int mip6_rthdr_init_state(struct xfrm_state *x) +static int mip6_rthdr_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack) { if (x->id.spi) { pr_info("%s: spi is not 0: %u\n", __func__, x->id.spi); diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 2b31112c0856..dda44b0671ac 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -270,7 +270,7 @@ static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; } -static int xfrm6_tunnel_init_state(struct xfrm_state *x) +static int xfrm6_tunnel_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack) { if (x->props.mode != XFRM_MODE_TUNNEL) return -EINVAL; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 0b59ff7985e6..82c571d07836 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -2673,7 +2673,7 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload, x->type_offload = xfrm_get_type_offload(x->id.proto, family, offload); - err = x->type->init_state(x); + err = x->type->init_state(x, extack); if (err) goto error; -- cgit v1.2.3-70-g09d2 From 6ee55320520e31f5dae637e928d5792352b22776 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 27 Sep 2022 17:45:33 +0200 Subject: xfrm: ipcomp: add extack to ipcomp{4,6}_init_state And the shared helper ipcomp_init_state. Signed-off-by: Sabrina Dubroca Signed-off-by: Steffen Klassert --- include/net/ipcomp.h | 2 +- net/ipv4/ipcomp.c | 7 +++++-- net/ipv6/ipcomp6.c | 7 +++++-- net/xfrm/xfrm_ipcomp.c | 10 +++++++--- 4 files changed, 18 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/ipcomp.h b/include/net/ipcomp.h index c31108295079..8660a2a6d1fc 100644 --- a/include/net/ipcomp.h +++ b/include/net/ipcomp.h @@ -22,7 +22,7 @@ struct xfrm_state; int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb); int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb); void ipcomp_destroy(struct xfrm_state *x); -int ipcomp_init_state(struct xfrm_state *x); +int ipcomp_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack); static inline struct ip_comp_hdr *ip_comp_hdr(const struct sk_buff *skb) { diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 230d1120874f..5a4fb2539b08 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -130,17 +130,20 @@ static int ipcomp4_init_state(struct xfrm_state *x, x->props.header_len += sizeof(struct iphdr); break; default: + NL_SET_ERR_MSG(extack, "Unsupported XFRM mode for IPcomp"); goto out; } - err = ipcomp_init_state(x); + err = ipcomp_init_state(x, extack); if (err) goto out; if (x->props.mode == XFRM_MODE_TUNNEL) { err = ipcomp_tunnel_attach(x); - if (err) + if (err) { + NL_SET_ERR_MSG(extack, "Kernel error: failed to initialize the associated state"); goto out; + } } err = 0; diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 7e47009739e9..72d4858dec18 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -149,17 +149,20 @@ static int ipcomp6_init_state(struct xfrm_state *x, x->props.header_len += sizeof(struct ipv6hdr); break; default: + NL_SET_ERR_MSG(extack, "Unsupported XFRM mode for IPcomp"); goto out; } - err = ipcomp_init_state(x); + err = ipcomp_init_state(x, extack); if (err) goto out; if (x->props.mode == XFRM_MODE_TUNNEL) { err = ipcomp6_tunnel_attach(x); - if (err) + if (err) { + NL_SET_ERR_MSG(extack, "Kernel error: failed to initialize the associated state"); goto out; + } } err = 0; diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c index cb40ff0ff28d..656045a87606 100644 --- a/net/xfrm/xfrm_ipcomp.c +++ b/net/xfrm/xfrm_ipcomp.c @@ -325,18 +325,22 @@ void ipcomp_destroy(struct xfrm_state *x) } EXPORT_SYMBOL_GPL(ipcomp_destroy); -int ipcomp_init_state(struct xfrm_state *x) +int ipcomp_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack) { int err; struct ipcomp_data *ipcd; struct xfrm_algo_desc *calg_desc; err = -EINVAL; - if (!x->calg) + if (!x->calg) { + NL_SET_ERR_MSG(extack, "Missing required compression algorithm"); goto out; + } - if (x->encap) + if (x->encap) { + NL_SET_ERR_MSG(extack, "IPComp is not compatible with encapsulation"); goto out; + } err = -ENOMEM; ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL); -- cgit v1.2.3-70-g09d2 From aac4daa8941ea6566563ac001e9e5d4e54a674e2 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 28 Sep 2022 12:51:57 +0300 Subject: net/sched: query offload capabilities through ndo_setup_tc() When adding optional new features to Qdisc offloads, existing drivers must reject the new configuration until they are coded up to act on it. Since modifying all drivers in lockstep with the changes in the Qdisc can create problems of its own, it would be nice if there existed an automatic opt-in mechanism for offloading optional features. Jakub proposes that we multiplex one more kind of call through ndo_setup_tc(): one where the driver populates a Qdisc-specific capability structure. First user will be taprio in further changes. Here we are introducing the definitions for the base functionality. Link: https://patchwork.kernel.org/project/netdevbpf/patch/20220923163310.3192733-3-vladimir.oltean@nxp.com/ Suggested-by: Jakub Kicinski Co-developed-by: Jakub Kicinski Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 1 + include/net/pkt_sched.h | 5 +++++ include/net/sch_generic.h | 3 +++ net/sched/sch_api.c | 17 +++++++++++++++++ 4 files changed, 26 insertions(+) (limited to 'include/net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 64c3a5864969..eddf8ee270e7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -940,6 +940,7 @@ struct net_device_path_ctx { }; enum tc_setup_type { + TC_QUERY_CAPS, TC_SETUP_QDISC_MQPRIO, TC_SETUP_CLSU32, TC_SETUP_CLSFLOWER, diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 2ff80cd04c5c..34600292fdfb 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -141,6 +141,11 @@ static inline struct net *qdisc_net(struct Qdisc *q) return dev_net(q->dev_queue->dev); } +struct tc_query_caps_base { + enum tc_setup_type type; + void *caps; +}; + struct tc_cbs_qopt_offload { u8 enable; s32 queue; diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 32819299937d..d5517719af4e 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -677,6 +677,9 @@ qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch, { } #endif +void qdisc_offload_query_caps(struct net_device *dev, + enum tc_setup_type type, + void *caps, size_t caps_len); struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, const struct Qdisc_ops *ops, struct netlink_ext_ack *extack); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index db1569fac57c..7c15f1f3da17 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -868,6 +868,23 @@ void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch, } EXPORT_SYMBOL(qdisc_offload_graft_helper); +void qdisc_offload_query_caps(struct net_device *dev, + enum tc_setup_type type, + void *caps, size_t caps_len) +{ + const struct net_device_ops *ops = dev->netdev_ops; + struct tc_query_caps_base base = { + .type = type, + .caps = caps, + }; + + memset(caps, 0, caps_len); + + if (ops->ndo_setup_tc) + ops->ndo_setup_tc(dev, TC_QUERY_CAPS, &base); +} +EXPORT_SYMBOL(qdisc_offload_query_caps); + static void qdisc_offload_graft_root(struct net_device *dev, struct Qdisc *new, struct Qdisc *old, struct netlink_ext_ack *extack) -- cgit v1.2.3-70-g09d2 From a54fc09e4cba3004443aa05979f8c678196c8226 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 28 Sep 2022 12:51:58 +0300 Subject: net/sched: taprio: allow user input of per-tc max SDU IEEE 802.1Q clause 12.29.1.1 "The queueMaxSDUTable structure and data types" and 8.6.8.4 "Enhancements for scheduled traffic" talk about the existence of a per traffic class limitation of maximum frame sizes, with a fallback on the port-based MTU. As far as I am able to understand, the 802.1Q Service Data Unit (SDU) represents the MAC Service Data Unit (MSDU, i.e. L2 payload), excluding any number of prepended VLAN headers which may be otherwise present in the MSDU. Therefore, the queueMaxSDU is directly comparable to the device MTU (1500 means L2 payload sizes are accepted, or frame sizes of 1518 octets, or 1522 plus one VLAN header). Drivers which offload this are directly responsible of translating into other units of measurement. To keep the fast path checks optimized, we keep 2 arrays in the qdisc, one for max_sdu translated into frame length (so that it's comparable to skb->len), and another for offloading and for dumping back to the user. Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- include/net/pkt_sched.h | 5 ++ include/uapi/linux/pkt_sched.h | 11 +++ net/sched/sch_taprio.c | 152 ++++++++++++++++++++++++++++++++++++++++- 3 files changed, 167 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 34600292fdfb..38207873eda6 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -160,6 +160,10 @@ struct tc_etf_qopt_offload { s32 queue; }; +struct tc_taprio_caps { + bool supports_queue_max_sdu:1; +}; + struct tc_taprio_sched_entry { u8 command; /* TC_TAPRIO_CMD_* */ @@ -173,6 +177,7 @@ struct tc_taprio_qopt_offload { ktime_t base_time; u64 cycle_time; u64 cycle_time_extension; + u32 max_sdu[TC_MAX_QUEUE]; size_t num_entries; struct tc_taprio_sched_entry entries[]; diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index f292b467b27f..000eec106856 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -1232,6 +1232,16 @@ enum { #define TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST _BITUL(0) #define TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD _BITUL(1) +enum { + TCA_TAPRIO_TC_ENTRY_UNSPEC, + TCA_TAPRIO_TC_ENTRY_INDEX, /* u32 */ + TCA_TAPRIO_TC_ENTRY_MAX_SDU, /* u32 */ + + /* add new constants above here */ + __TCA_TAPRIO_TC_ENTRY_CNT, + TCA_TAPRIO_TC_ENTRY_MAX = (__TCA_TAPRIO_TC_ENTRY_CNT - 1) +}; + enum { TCA_TAPRIO_ATTR_UNSPEC, TCA_TAPRIO_ATTR_PRIOMAP, /* struct tc_mqprio_qopt */ @@ -1245,6 +1255,7 @@ enum { TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION, /* s64 */ TCA_TAPRIO_ATTR_FLAGS, /* u32 */ TCA_TAPRIO_ATTR_TXTIME_DELAY, /* u32 */ + TCA_TAPRIO_ATTR_TC_ENTRY, /* nest */ __TCA_TAPRIO_ATTR_MAX, }; diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 0bc6d90e1e51..435d866fcfa0 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -78,6 +78,8 @@ struct taprio_sched { struct sched_gate_list __rcu *admin_sched; struct hrtimer advance_timer; struct list_head taprio_list; + u32 max_frm_len[TC_MAX_QUEUE]; /* for the fast path */ + u32 max_sdu[TC_MAX_QUEUE]; /* for dump and offloading */ u32 txtime_delay; }; @@ -415,6 +417,9 @@ static int taprio_enqueue_one(struct sk_buff *skb, struct Qdisc *sch, struct Qdisc *child, struct sk_buff **to_free) { struct taprio_sched *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + int prio = skb->priority; + u8 tc; /* sk_flags are only safe to use on full sockets. */ if (skb->sk && sk_fullsock(skb->sk) && sock_flag(skb->sk, SOCK_TXTIME)) { @@ -426,6 +431,11 @@ static int taprio_enqueue_one(struct sk_buff *skb, struct Qdisc *sch, return qdisc_drop(skb, sch, to_free); } + /* Devices with full offload are expected to honor this in hardware */ + tc = netdev_get_prio_tc_map(dev, prio); + if (skb->len > q->max_frm_len[tc]) + return qdisc_drop(skb, sch, to_free); + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; @@ -754,6 +764,11 @@ static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { [TCA_TAPRIO_SCHED_ENTRY_INTERVAL] = { .type = NLA_U32 }, }; +static const struct nla_policy taprio_tc_policy[TCA_TAPRIO_TC_ENTRY_MAX + 1] = { + [TCA_TAPRIO_TC_ENTRY_INDEX] = { .type = NLA_U32 }, + [TCA_TAPRIO_TC_ENTRY_MAX_SDU] = { .type = NLA_U32 }, +}; + static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = { [TCA_TAPRIO_ATTR_PRIOMAP] = { .len = sizeof(struct tc_mqprio_qopt) @@ -766,6 +781,7 @@ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = { [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 }, [TCA_TAPRIO_ATTR_FLAGS] = { .type = NLA_U32 }, [TCA_TAPRIO_ATTR_TXTIME_DELAY] = { .type = NLA_U32 }, + [TCA_TAPRIO_ATTR_TC_ENTRY] = { .type = NLA_NESTED }, }; static int fill_sched_entry(struct taprio_sched *q, struct nlattr **tb, @@ -1216,7 +1232,8 @@ static int taprio_enable_offload(struct net_device *dev, { const struct net_device_ops *ops = dev->netdev_ops; struct tc_taprio_qopt_offload *offload; - int err = 0; + struct tc_taprio_caps caps; + int tc, err = 0; if (!ops->ndo_setup_tc) { NL_SET_ERR_MSG(extack, @@ -1224,6 +1241,19 @@ static int taprio_enable_offload(struct net_device *dev, return -EOPNOTSUPP; } + qdisc_offload_query_caps(dev, TC_SETUP_QDISC_TAPRIO, + &caps, sizeof(caps)); + + if (!caps.supports_queue_max_sdu) { + for (tc = 0; tc < TC_MAX_QUEUE; tc++) { + if (q->max_sdu[tc]) { + NL_SET_ERR_MSG_MOD(extack, + "Device does not handle queueMaxSDU"); + return -EOPNOTSUPP; + } + } + } + offload = taprio_offload_alloc(sched->num_entries); if (!offload) { NL_SET_ERR_MSG(extack, @@ -1233,6 +1263,9 @@ static int taprio_enable_offload(struct net_device *dev, offload->enable = 1; taprio_sched_to_offload(dev, sched, offload); + for (tc = 0; tc < TC_MAX_QUEUE; tc++) + offload->max_sdu[tc] = q->max_sdu[tc]; + err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload); if (err < 0) { NL_SET_ERR_MSG(extack, @@ -1367,6 +1400,89 @@ out: return err; } +static int taprio_parse_tc_entry(struct Qdisc *sch, + struct nlattr *opt, + u32 max_sdu[TC_QOPT_MAX_QUEUE], + unsigned long *seen_tcs, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[TCA_TAPRIO_TC_ENTRY_MAX + 1] = { }; + struct net_device *dev = qdisc_dev(sch); + u32 val = 0; + int err, tc; + + err = nla_parse_nested(tb, TCA_TAPRIO_TC_ENTRY_MAX, opt, + taprio_tc_policy, extack); + if (err < 0) + return err; + + if (!tb[TCA_TAPRIO_TC_ENTRY_INDEX]) { + NL_SET_ERR_MSG_MOD(extack, "TC entry index missing"); + return -EINVAL; + } + + tc = nla_get_u32(tb[TCA_TAPRIO_TC_ENTRY_INDEX]); + if (tc >= TC_QOPT_MAX_QUEUE) { + NL_SET_ERR_MSG_MOD(extack, "TC entry index out of range"); + return -ERANGE; + } + + if (*seen_tcs & BIT(tc)) { + NL_SET_ERR_MSG_MOD(extack, "Duplicate TC entry"); + return -EINVAL; + } + + *seen_tcs |= BIT(tc); + + if (tb[TCA_TAPRIO_TC_ENTRY_MAX_SDU]) + val = nla_get_u32(tb[TCA_TAPRIO_TC_ENTRY_MAX_SDU]); + + if (val > dev->max_mtu) { + NL_SET_ERR_MSG_MOD(extack, "TC max SDU exceeds device max MTU"); + return -ERANGE; + } + + max_sdu[tc] = val; + + return 0; +} + +static int taprio_parse_tc_entries(struct Qdisc *sch, + struct nlattr *opt, + struct netlink_ext_ack *extack) +{ + struct taprio_sched *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + u32 max_sdu[TC_QOPT_MAX_QUEUE]; + unsigned long seen_tcs = 0; + struct nlattr *n; + int tc, rem; + int err = 0; + + for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) + max_sdu[tc] = q->max_sdu[tc]; + + nla_for_each_nested(n, opt, rem) { + if (nla_type(n) != TCA_TAPRIO_ATTR_TC_ENTRY) + continue; + + err = taprio_parse_tc_entry(sch, n, max_sdu, &seen_tcs, extack); + if (err) + goto out; + } + + for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) { + q->max_sdu[tc] = max_sdu[tc]; + if (max_sdu[tc]) + q->max_frm_len[tc] = max_sdu[tc] + dev->hard_header_len; + else + q->max_frm_len[tc] = U32_MAX; /* never oversized */ + } + +out: + return err; +} + static int taprio_mqprio_cmp(const struct net_device *dev, const struct tc_mqprio_qopt *mqprio) { @@ -1445,6 +1561,10 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, if (err < 0) return err; + err = taprio_parse_tc_entries(sch, opt, extack); + if (err) + return err; + new_admin = kzalloc(sizeof(*new_admin), GFP_KERNEL); if (!new_admin) { NL_SET_ERR_MSG(extack, "Not enough memory for a new schedule"); @@ -1825,6 +1945,33 @@ error_nest: return -1; } +static int taprio_dump_tc_entries(struct taprio_sched *q, struct sk_buff *skb) +{ + struct nlattr *n; + int tc; + + for (tc = 0; tc < TC_MAX_QUEUE; tc++) { + n = nla_nest_start(skb, TCA_TAPRIO_ATTR_TC_ENTRY); + if (!n) + return -EMSGSIZE; + + if (nla_put_u32(skb, TCA_TAPRIO_TC_ENTRY_INDEX, tc)) + goto nla_put_failure; + + if (nla_put_u32(skb, TCA_TAPRIO_TC_ENTRY_MAX_SDU, + q->max_sdu[tc])) + goto nla_put_failure; + + nla_nest_end(skb, n); + } + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, n); + return -EMSGSIZE; +} + static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) { struct taprio_sched *q = qdisc_priv(sch); @@ -1863,6 +2010,9 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) nla_put_u32(skb, TCA_TAPRIO_ATTR_TXTIME_DELAY, q->txtime_delay)) goto options_error; + if (taprio_dump_tc_entries(q, skb)) + goto options_error; + if (oper && dump_schedule(skb, oper)) goto options_error; -- cgit v1.2.3-70-g09d2 From f4ce91ce12a7c6ead19b128ffa8cff6e3ded2a14 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Wed, 28 Sep 2022 16:03:31 -0400 Subject: tcp: fix tcp_cwnd_validate() to not forget is_cwnd_limited This commit fixes a bug in the tracking of max_packets_out and is_cwnd_limited. This bug can cause the connection to fail to remember that is_cwnd_limited is true, causing the connection to fail to grow cwnd when it should, causing throughput to be lower than it should be. The following event sequence is an example that triggers the bug: (a) The connection is cwnd_limited, but packets_out is not at its peak due to TSO deferral deciding not to send another skb yet. In such cases the connection can advance max_packets_seq and set tp->is_cwnd_limited to true and max_packets_out to a small number. (b) Then later in the round trip the connection is pacing-limited (not cwnd-limited), and packets_out is larger. In such cases the connection would raise max_packets_out to a bigger number but (unexpectedly) flip tp->is_cwnd_limited from true to false. This commit fixes that bug. One straightforward fix would be to separately track (a) the next window after max_packets_out reaches a maximum, and (b) the next window after tp->is_cwnd_limited is set to true. But this would require consuming an extra u32 sequence number. Instead, to save space we track only the most important information. Specifically, we track the strongest available signal of the degree to which the cwnd is fully utilized: (1) If the connection is cwnd-limited then we remember that fact for the current window. (2) If the connection not cwnd-limited then we track the maximum number of outstanding packets in the current window. In particular, note that the new logic cannot trigger the buggy (a)/(b) sequence above because with the new logic a condition where tp->packets_out > tp->max_packets_out can only trigger an update of tp->is_cwnd_limited if tp->is_cwnd_limited is false. This first showed up in a testing of a BBRv2 dev branch, but this buggy behavior highlighted a general issue with the tcp_cwnd_validate() logic that can cause cwnd to fail to increase at the proper rate for any TCP congestion control, including Reno or CUBIC. Fixes: ca8a22634381 ("tcp: make cwnd-limited checks measurement-based, and gentler") Signed-off-by: Neal Cardwell Signed-off-by: Kevin(Yudong) Yang Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 +- include/net/tcp.h | 5 ++++- net/ipv4/tcp.c | 2 ++ net/ipv4/tcp_output.c | 19 ++++++++++++------- 4 files changed, 19 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index a9fbe22732c3..4791fd801945 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -295,7 +295,7 @@ struct tcp_sock { u32 packets_out; /* Packets which are "in flight" */ u32 retrans_out; /* Retransmitted packets out */ u32 max_packets_out; /* max packets_out in last window */ - u32 max_packets_seq; /* right edge of max_packets_out flight */ + u32 cwnd_usage_seq; /* right edge of cwnd usage tracking flight */ u16 urg_data; /* Saved octet of OOB data and control flags */ u8 ecn_flags; /* ECN status bits. */ diff --git a/include/net/tcp.h b/include/net/tcp.h index d10962b9f0d0..95c1d51393ac 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1295,11 +1295,14 @@ static inline bool tcp_is_cwnd_limited(const struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); + if (tp->is_cwnd_limited) + return true; + /* If in slow start, ensure cwnd grows to twice what was ACKed. */ if (tcp_in_slow_start(tp)) return tcp_snd_cwnd(tp) < 2 * tp->max_packets_out; - return tp->is_cwnd_limited; + return false; } /* BBR congestion control needs pacing. diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e373dde1f46f..997a80ce1e13 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3137,6 +3137,8 @@ int tcp_disconnect(struct sock *sk, int flags) tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tcp_snd_cwnd_set(tp, TCP_INIT_CWND); tp->snd_cwnd_cnt = 0; + tp->is_cwnd_limited = 0; + tp->max_packets_out = 0; tp->window_clamp = 0; tp->delivered = 0; tp->delivered_ce = 0; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 290019de766d..c69f4d966024 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1875,15 +1875,20 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; struct tcp_sock *tp = tcp_sk(sk); - /* Track the maximum number of outstanding packets in each - * window, and remember whether we were cwnd-limited then. + /* Track the strongest available signal of the degree to which the cwnd + * is fully utilized. If cwnd-limited then remember that fact for the + * current window. If not cwnd-limited then track the maximum number of + * outstanding packets in the current window. (If cwnd-limited then we + * chose to not update tp->max_packets_out to avoid an extra else + * clause with no functional impact.) */ - if (!before(tp->snd_una, tp->max_packets_seq) || - tp->packets_out > tp->max_packets_out || - is_cwnd_limited) { - tp->max_packets_out = tp->packets_out; - tp->max_packets_seq = tp->snd_nxt; + if (!before(tp->snd_una, tp->cwnd_usage_seq) || + is_cwnd_limited || + (!tp->is_cwnd_limited && + tp->packets_out > tp->max_packets_out)) { tp->is_cwnd_limited = is_cwnd_limited; + tp->max_packets_out = tp->packets_out; + tp->cwnd_usage_seq = tp->snd_nxt; } if (tcp_is_cwnd_limited(sk)) { -- cgit v1.2.3-70-g09d2 From 9ca66afe73da16cd2c529e6770cc5aea0c4454df Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Thu, 29 Sep 2022 00:21:41 -0700 Subject: xsk: Expose min chunk size to drivers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drivers should be aware of the range of valid UMEM chunk sizes to be able to allocate their internal structures of an appropriate size. It will be used by mlx5e in the following patches. Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan CC: "Björn Töpel" CC: Magnus Karlsson CC: Maciej Fijalkowski Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- include/net/xdp_sock_drv.h | 3 +++ net/xdp/xdp_umem.c | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 0e58c38ce0c1..6406faa3d57d 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -9,6 +9,9 @@ #include #include +#define XDP_UMEM_MIN_CHUNK_SHIFT 11 +#define XDP_UMEM_MIN_CHUNK_SIZE (1 << XDP_UMEM_MIN_CHUNK_SHIFT) + #ifdef CONFIG_XDP_SOCKETS void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries); diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 869b9b9b9fad..4681e8e8ad94 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -19,8 +19,6 @@ #include "xdp_umem.h" #include "xsk_queue.h" -#define XDP_UMEM_MIN_CHUNK_SIZE 2048 - static DEFINE_IDA(umem_ida); static void xdp_umem_unpin_pages(struct xdp_umem *umem) -- cgit v1.2.3-70-g09d2 From f2f1675836015e79ba9720d4f7f02441fd0bb5e5 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Thu, 29 Sep 2022 00:21:47 -0700 Subject: xsk: Remove unused xsk_buff_discard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous commit removed the last usage of xsk_buff_discard in mlx5e, so the function that is no longer used can be removed. Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan CC: "Björn Töpel" CC: Magnus Karlsson CC: Maciej Fijalkowski Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- include/net/xdp_sock_drv.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/net') diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 6406faa3d57d..9c0d860609ba 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -107,13 +107,6 @@ static inline void xsk_buff_free(struct xdp_buff *xdp) xp_free(xskb); } -static inline void xsk_buff_discard(struct xdp_buff *xdp) -{ - struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp); - - xp_release(xskb); -} - static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size) { xdp->data = xdp->data_hard_start + XDP_PACKET_HEADROOM; -- cgit v1.2.3-70-g09d2 From 081adcfe930e4b01a55eaa329b2e453a442f35a9 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 29 Sep 2022 09:28:57 +0200 Subject: net: devlink: introduce a flag to indicate devlink port being registered Instead of relying on devlink pointer not being initialized, introduce an extra flag to indicate if devlink port is registered. This is needed as later on devlink pointer is going to be initialized even in case devlink port is not registered yet. Signed-off-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 3 ++- net/core/devlink.c | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/devlink.h b/include/net/devlink.h index 264aa98e6da6..bcacd8dab297 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -129,7 +129,8 @@ struct devlink_port { void *type_dev; struct devlink_port_attrs attrs; u8 attrs_set:1, - switch_port:1; + switch_port:1, + registered:1; struct delayed_work type_warn_dw; struct list_head reporter_list; struct mutex reporters_lock; /* Protects reporter_list */ diff --git a/net/core/devlink.c b/net/core/devlink.c index f5bfbdb0301e..17529e6b2bbf 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -372,9 +372,9 @@ static struct devlink *devlink_get_from_attrs(struct net *net, } #define ASSERT_DEVLINK_PORT_REGISTERED(devlink_port) \ - WARN_ON_ONCE(!(devlink_port)->devlink) + WARN_ON_ONCE(!(devlink_port)->registered) #define ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port) \ - WARN_ON_ONCE((devlink_port)->devlink) + WARN_ON_ONCE((devlink_port)->registered) static struct devlink_port *devlink_port_get_by_index(struct devlink *devlink, unsigned int port_index) @@ -9876,6 +9876,7 @@ int devl_port_register(struct devlink *devlink, ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port); + devlink_port->registered = true; devlink_port->devlink = devlink; devlink_port->index = port_index; spin_lock_init(&devlink_port->type_lock); @@ -9934,6 +9935,7 @@ void devl_port_unregister(struct devlink_port *devlink_port) WARN_ON(!list_empty(&devlink_port->reporter_list)); WARN_ON(!list_empty(&devlink_port->region_list)); mutex_destroy(&devlink_port->reporters_lock); + devlink_port->registered = false; } EXPORT_SYMBOL_GPL(devl_port_unregister); -- cgit v1.2.3-70-g09d2 From ae3bbc04d4bfef5d0332cd4edda3ac8f714cea23 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 29 Sep 2022 09:28:58 +0200 Subject: net: devlink: add port_init/fini() helpers to allow pre-register/post-unregister functions Lifetime of some of the devlink objects, like regions, is currently forced to be different for devlink instance and devlink port instance (per-port regions). The reason is that for devlink ports, the internal structures initialization happens only after devlink_port_register() is called. To resolve this inconsistency, introduce new set of helpers to allow driver to initialize devlink pointer and region list before devlink_register() is called. That allows port regions to be created before devlink port registration and destroyed after devlink port unregistration. Signed-off-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 6 +++++- net/core/devlink.c | 46 +++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 48 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/devlink.h b/include/net/devlink.h index bcacd8dab297..ba6b8b094943 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -130,7 +130,8 @@ struct devlink_port { struct devlink_port_attrs attrs; u8 attrs_set:1, switch_port:1, - registered:1; + registered:1, + initialized:1; struct delayed_work type_warn_dw; struct list_head reporter_list; struct mutex reporters_lock; /* Protects reporter_list */ @@ -1563,6 +1564,9 @@ void devlink_set_features(struct devlink *devlink, u64 features); void devlink_register(struct devlink *devlink); void devlink_unregister(struct devlink *devlink); void devlink_free(struct devlink *devlink); +void devlink_port_init(struct devlink *devlink, + struct devlink_port *devlink_port); +void devlink_port_fini(struct devlink_port *devlink_port); int devl_port_register(struct devlink *devlink, struct devlink_port *devlink_port, unsigned int port_index); diff --git a/net/core/devlink.c b/net/core/devlink.c index 17529e6b2bbf..89baa7c0938b 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -375,6 +375,8 @@ static struct devlink *devlink_get_from_attrs(struct net *net, WARN_ON_ONCE(!(devlink_port)->registered) #define ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port) \ WARN_ON_ONCE((devlink_port)->registered) +#define ASSERT_DEVLINK_PORT_INITIALIZED(devlink_port) \ + WARN_ON_ONCE(!(devlink_port)->initialized) static struct devlink_port *devlink_port_get_by_index(struct devlink *devlink, unsigned int port_index) @@ -9852,6 +9854,44 @@ static void devlink_port_type_warn_cancel(struct devlink_port *devlink_port) cancel_delayed_work_sync(&devlink_port->type_warn_dw); } +/** + * devlink_port_init() - Init devlink port + * + * @devlink: devlink + * @devlink_port: devlink port + * + * Initialize essencial stuff that is needed for functions + * that may be called before devlink port registration. + * Call to this function is optional and not needed + * in case the driver does not use such functions. + */ +void devlink_port_init(struct devlink *devlink, + struct devlink_port *devlink_port) +{ + if (devlink_port->initialized) + return; + devlink_port->devlink = devlink; + INIT_LIST_HEAD(&devlink_port->region_list); + devlink_port->initialized = true; +} +EXPORT_SYMBOL_GPL(devlink_port_init); + +/** + * devlink_port_fini() - Deinitialize devlink port + * + * @devlink_port: devlink port + * + * Deinitialize essencial stuff that is in use for functions + * that may be called after devlink port unregistration. + * Call to this function is optional and not needed + * in case the driver does not use such functions. + */ +void devlink_port_fini(struct devlink_port *devlink_port) +{ + WARN_ON(!list_empty(&devlink_port->region_list)); +} +EXPORT_SYMBOL_GPL(devlink_port_fini); + /** * devl_port_register() - Register devlink port * @@ -9876,14 +9916,13 @@ int devl_port_register(struct devlink *devlink, ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port); + devlink_port_init(devlink, devlink_port); devlink_port->registered = true; - devlink_port->devlink = devlink; devlink_port->index = port_index; spin_lock_init(&devlink_port->type_lock); INIT_LIST_HEAD(&devlink_port->reporter_list); mutex_init(&devlink_port->reporters_lock); list_add_tail(&devlink_port->list, &devlink->port_list); - INIT_LIST_HEAD(&devlink_port->region_list); INIT_DELAYED_WORK(&devlink_port->type_warn_dw, &devlink_port_type_warn); devlink_port_type_warn_schedule(devlink_port); @@ -9933,7 +9972,6 @@ void devl_port_unregister(struct devlink_port *devlink_port) devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL); list_del(&devlink_port->list); WARN_ON(!list_empty(&devlink_port->reporter_list)); - WARN_ON(!list_empty(&devlink_port->region_list)); mutex_destroy(&devlink_port->reporters_lock); devlink_port->registered = false; } @@ -11347,6 +11385,8 @@ devlink_port_region_create(struct devlink_port *port, struct devlink_region *region; int err = 0; + ASSERT_DEVLINK_PORT_INITIALIZED(port); + if (WARN_ON(!ops) || WARN_ON(!ops->destructor)) return ERR_PTR(-EINVAL); -- cgit v1.2.3-70-g09d2 From 61e4a51621587c939672d6a9354f6d0aa3d4e131 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 29 Sep 2022 09:29:02 +0200 Subject: net: dsa: remove bool devlink_port_setup Since dsa_port_devlink_setup() and dsa_port_devlink_teardown() are already called from code paths which only execute once per port (due to the existing bool dp->setup), keeping another dp->devlink_port_setup is redundant, because we can already manage to balance the calls properly (and not call teardown when setup was never called, or call setup twice, or things like that). Signed-off-by: Vladimir Oltean Signed-off-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- include/net/dsa.h | 2 -- net/dsa/dsa2.c | 14 ++++++-------- 2 files changed, 6 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index d777eac5694f..ee369670e20e 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -294,8 +294,6 @@ struct dsa_port { u8 lag_tx_enabled:1; - u8 devlink_port_setup:1; - /* Master state bits, valid only on CPU ports */ u8 master_admin_up:1; u8 master_oper_up:1; diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 747c0364fb0f..af0e2c0394ac 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -510,7 +510,6 @@ static int dsa_port_devlink_setup(struct dsa_port *dp) ds->ops->port_teardown(ds, dp->index); return err; } - dp->devlink_port_setup = true; return 0; } @@ -520,13 +519,12 @@ static void dsa_port_devlink_teardown(struct dsa_port *dp) struct devlink_port *dlp = &dp->devlink_port; struct dsa_switch *ds = dp->ds; - if (dp->devlink_port_setup) { - devlink_port_unregister(dlp); - if (ds->ops->port_teardown) - ds->ops->port_teardown(ds, dp->index); - devlink_port_fini(dlp); - } - dp->devlink_port_setup = false; + devlink_port_unregister(dlp); + + if (ds->ops->port_teardown) + ds->ops->port_teardown(ds, dp->index); + + devlink_port_fini(dlp); } static int dsa_port_setup(struct dsa_port *dp) -- cgit v1.2.3-70-g09d2 From 402963e34a707e4a8f1854ed86437bc375d65766 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Tue, 27 Sep 2022 20:48:54 +0800 Subject: net: sched: cls_api: introduce tc_cls_bind_class() helper All the bind_class callback duplicate the same logic, this patch introduces tc_cls_bind_class() helper for common usage. Signed-off-by: Zhengchao Shao Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index d376c995d906..4cabb32a2ad9 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -210,6 +210,18 @@ tcf_unbind_filter(struct tcf_proto *tp, struct tcf_result *r) __tcf_unbind_filter(q, r); } +static inline void tc_cls_bind_class(u32 classid, unsigned long cl, + void *q, struct tcf_result *res, + unsigned long base) +{ + if (res->classid == classid) { + if (cl) + __tcf_bind_filter(q, res, base); + else + __tcf_unbind_filter(q, res); + } +} + struct tcf_exts { #ifdef CONFIG_NET_CLS_ACT __u32 type; /* for backward compat(TCA_OLD_COMPAT) */ -- cgit v1.2.3-70-g09d2 From 537dd2d9fb9f4aa7939fb4fcf552ebe4f497bd7e Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Thu, 29 Sep 2022 21:52:02 +0800 Subject: net: Add helper function to parse netlink msg of ip_tunnel_encap Add ip_tunnel_netlink_encap_parms to parse netlink msg of ip_tunnel_encap. Reduces duplicate code, no actual functional changes. Signed-off-by: Liu Jian Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 3 +++ net/ipv4/ip_tunnel_core.c | 35 +++++++++++++++++++++++++++++++++++ net/ipv4/ipip.c | 38 ++------------------------------------ net/ipv6/ip6_tunnel.c | 37 ++----------------------------------- net/ipv6/sit.c | 38 ++------------------------------------ 5 files changed, 44 insertions(+), 107 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index ced80e2f8b58..51da2957cf48 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -302,6 +302,9 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], struct ip_tunnel_parm *p, __u32 fwmark); void ip_tunnel_setup(struct net_device *dev, unsigned int net_id); +bool ip_tunnel_netlink_encap_parms(struct nlattr *data[], + struct ip_tunnel_encap *encap); + extern const struct header_ops ip_tunnel_header_ops; __be16 ip_tunnel_parse_protocol(const struct sk_buff *skb); diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index cc1caab4a654..6d08f7e39191 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -1079,3 +1079,38 @@ EXPORT_SYMBOL(ip_tunnel_parse_protocol); const struct header_ops ip_tunnel_header_ops = { .parse_protocol = ip_tunnel_parse_protocol }; EXPORT_SYMBOL(ip_tunnel_header_ops); + +/* This function returns true when ENCAP attributes are present in the nl msg */ +bool ip_tunnel_netlink_encap_parms(struct nlattr *data[], + struct ip_tunnel_encap *encap) +{ + bool ret = false; + + memset(encap, 0, sizeof(*encap)); + + if (!data) + return ret; + + if (data[IFLA_IPTUN_ENCAP_TYPE]) { + ret = true; + encap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]); + } + + if (data[IFLA_IPTUN_ENCAP_FLAGS]) { + ret = true; + encap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]); + } + + if (data[IFLA_IPTUN_ENCAP_SPORT]) { + ret = true; + encap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]); + } + + if (data[IFLA_IPTUN_ENCAP_DPORT]) { + ret = true; + encap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]); + } + + return ret; +} +EXPORT_SYMBOL_GPL(ip_tunnel_netlink_encap_parms); diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 123ea63a04cb..7c64ca06adf3 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -448,40 +448,6 @@ static void ipip_netlink_parms(struct nlattr *data[], *fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]); } -/* This function returns true when ENCAP attributes are present in the nl msg */ -static bool ipip_netlink_encap_parms(struct nlattr *data[], - struct ip_tunnel_encap *ipencap) -{ - bool ret = false; - - memset(ipencap, 0, sizeof(*ipencap)); - - if (!data) - return ret; - - if (data[IFLA_IPTUN_ENCAP_TYPE]) { - ret = true; - ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]); - } - - if (data[IFLA_IPTUN_ENCAP_FLAGS]) { - ret = true; - ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]); - } - - if (data[IFLA_IPTUN_ENCAP_SPORT]) { - ret = true; - ipencap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]); - } - - if (data[IFLA_IPTUN_ENCAP_DPORT]) { - ret = true; - ipencap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]); - } - - return ret; -} - static int ipip_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) @@ -491,7 +457,7 @@ static int ipip_newlink(struct net *src_net, struct net_device *dev, struct ip_tunnel_encap ipencap; __u32 fwmark = 0; - if (ipip_netlink_encap_parms(data, &ipencap)) { + if (ip_tunnel_netlink_encap_parms(data, &ipencap)) { int err = ip_tunnel_encap_setup(t, &ipencap); if (err < 0) @@ -512,7 +478,7 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[], bool collect_md; __u32 fwmark = t->fwmark; - if (ipip_netlink_encap_parms(data, &ipencap)) { + if (ip_tunnel_netlink_encap_parms(data, &ipencap)) { int err = ip_tunnel_encap_setup(t, &ipencap); if (err < 0) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 9e97f3b4c7e8..cc5d5e75b658 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1988,39 +1988,6 @@ static void ip6_tnl_netlink_parms(struct nlattr *data[], parms->fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]); } -static bool ip6_tnl_netlink_encap_parms(struct nlattr *data[], - struct ip_tunnel_encap *ipencap) -{ - bool ret = false; - - memset(ipencap, 0, sizeof(*ipencap)); - - if (!data) - return ret; - - if (data[IFLA_IPTUN_ENCAP_TYPE]) { - ret = true; - ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]); - } - - if (data[IFLA_IPTUN_ENCAP_FLAGS]) { - ret = true; - ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]); - } - - if (data[IFLA_IPTUN_ENCAP_SPORT]) { - ret = true; - ipencap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]); - } - - if (data[IFLA_IPTUN_ENCAP_DPORT]) { - ret = true; - ipencap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]); - } - - return ret; -} - static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) @@ -2033,7 +2000,7 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev, nt = netdev_priv(dev); - if (ip6_tnl_netlink_encap_parms(data, &ipencap)) { + if (ip_tunnel_netlink_encap_parms(data, &ipencap)) { err = ip6_tnl_encap_setup(nt, &ipencap); if (err < 0) return err; @@ -2070,7 +2037,7 @@ static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[], if (dev == ip6n->fb_tnl_dev) return -EINVAL; - if (ip6_tnl_netlink_encap_parms(data, &ipencap)) { + if (ip_tunnel_netlink_encap_parms(data, &ipencap)) { int err = ip6_tnl_encap_setup(t, &ipencap); if (err < 0) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 98f1cf40746f..a8a258f672fa 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1534,40 +1534,6 @@ static void ipip6_netlink_parms(struct nlattr *data[], *fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]); } -/* This function returns true when ENCAP attributes are present in the nl msg */ -static bool ipip6_netlink_encap_parms(struct nlattr *data[], - struct ip_tunnel_encap *ipencap) -{ - bool ret = false; - - memset(ipencap, 0, sizeof(*ipencap)); - - if (!data) - return ret; - - if (data[IFLA_IPTUN_ENCAP_TYPE]) { - ret = true; - ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]); - } - - if (data[IFLA_IPTUN_ENCAP_FLAGS]) { - ret = true; - ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]); - } - - if (data[IFLA_IPTUN_ENCAP_SPORT]) { - ret = true; - ipencap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]); - } - - if (data[IFLA_IPTUN_ENCAP_DPORT]) { - ret = true; - ipencap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]); - } - - return ret; -} - #ifdef CONFIG_IPV6_SIT_6RD /* This function returns true when 6RD attributes are present in the nl msg */ static bool ipip6_netlink_6rd_parms(struct nlattr *data[], @@ -1619,7 +1585,7 @@ static int ipip6_newlink(struct net *src_net, struct net_device *dev, nt = netdev_priv(dev); - if (ipip6_netlink_encap_parms(data, &ipencap)) { + if (ip_tunnel_netlink_encap_parms(data, &ipencap)) { err = ip_tunnel_encap_setup(nt, &ipencap); if (err < 0) return err; @@ -1671,7 +1637,7 @@ static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[], if (dev == sitn->fb_tunnel_dev) return -EINVAL; - if (ipip6_netlink_encap_parms(data, &ipencap)) { + if (ip_tunnel_netlink_encap_parms(data, &ipencap)) { err = ip_tunnel_encap_setup(t, &ipencap); if (err < 0) return err; -- cgit v1.2.3-70-g09d2 From b86fca800a6a3d439c454b462f7f067a18234e60 Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Thu, 29 Sep 2022 21:52:03 +0800 Subject: net: Add helper function to parse netlink msg of ip_tunnel_parm Add ip_tunnel_netlink_parms to parse netlink msg of ip_tunnel_parm. Reduces duplicate code, no actual functional changes. Signed-off-by: Liu Jian Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 3 +++ net/ipv4/ip_tunnel_core.c | 32 ++++++++++++++++++++++++++++++++ net/ipv4/ipip.c | 24 +----------------------- net/ipv6/sit.c | 27 +-------------------------- 4 files changed, 37 insertions(+), 49 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 51da2957cf48..fca357679816 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -305,6 +305,9 @@ void ip_tunnel_setup(struct net_device *dev, unsigned int net_id); bool ip_tunnel_netlink_encap_parms(struct nlattr *data[], struct ip_tunnel_encap *encap); +void ip_tunnel_netlink_parms(struct nlattr *data[], + struct ip_tunnel_parm *parms); + extern const struct header_ops ip_tunnel_header_ops; __be16 ip_tunnel_parse_protocol(const struct sk_buff *skb); diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 6d08f7e39191..92c02c886fe7 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -1114,3 +1114,35 @@ bool ip_tunnel_netlink_encap_parms(struct nlattr *data[], return ret; } EXPORT_SYMBOL_GPL(ip_tunnel_netlink_encap_parms); + +void ip_tunnel_netlink_parms(struct nlattr *data[], + struct ip_tunnel_parm *parms) +{ + if (data[IFLA_IPTUN_LINK]) + parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]); + + if (data[IFLA_IPTUN_LOCAL]) + parms->iph.saddr = nla_get_be32(data[IFLA_IPTUN_LOCAL]); + + if (data[IFLA_IPTUN_REMOTE]) + parms->iph.daddr = nla_get_be32(data[IFLA_IPTUN_REMOTE]); + + if (data[IFLA_IPTUN_TTL]) { + parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]); + if (parms->iph.ttl) + parms->iph.frag_off = htons(IP_DF); + } + + if (data[IFLA_IPTUN_TOS]) + parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]); + + if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC])) + parms->iph.frag_off = htons(IP_DF); + + if (data[IFLA_IPTUN_FLAGS]) + parms->i_flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]); + + if (data[IFLA_IPTUN_PROTO]) + parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]); +} +EXPORT_SYMBOL_GPL(ip_tunnel_netlink_parms); diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 7c64ca06adf3..180f9daf5bec 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -417,29 +417,7 @@ static void ipip_netlink_parms(struct nlattr *data[], if (!data) return; - if (data[IFLA_IPTUN_LINK]) - parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]); - - if (data[IFLA_IPTUN_LOCAL]) - parms->iph.saddr = nla_get_in_addr(data[IFLA_IPTUN_LOCAL]); - - if (data[IFLA_IPTUN_REMOTE]) - parms->iph.daddr = nla_get_in_addr(data[IFLA_IPTUN_REMOTE]); - - if (data[IFLA_IPTUN_TTL]) { - parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]); - if (parms->iph.ttl) - parms->iph.frag_off = htons(IP_DF); - } - - if (data[IFLA_IPTUN_TOS]) - parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]); - - if (data[IFLA_IPTUN_PROTO]) - parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]); - - if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC])) - parms->iph.frag_off = htons(IP_DF); + ip_tunnel_netlink_parms(data, parms); if (data[IFLA_IPTUN_COLLECT_METADATA]) *collect_md = true; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index a8a258f672fa..d27683e3fc97 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1503,32 +1503,7 @@ static void ipip6_netlink_parms(struct nlattr *data[], if (!data) return; - if (data[IFLA_IPTUN_LINK]) - parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]); - - if (data[IFLA_IPTUN_LOCAL]) - parms->iph.saddr = nla_get_be32(data[IFLA_IPTUN_LOCAL]); - - if (data[IFLA_IPTUN_REMOTE]) - parms->iph.daddr = nla_get_be32(data[IFLA_IPTUN_REMOTE]); - - if (data[IFLA_IPTUN_TTL]) { - parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]); - if (parms->iph.ttl) - parms->iph.frag_off = htons(IP_DF); - } - - if (data[IFLA_IPTUN_TOS]) - parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]); - - if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC])) - parms->iph.frag_off = htons(IP_DF); - - if (data[IFLA_IPTUN_FLAGS]) - parms->i_flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]); - - if (data[IFLA_IPTUN_PROTO]) - parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]); + ip_tunnel_netlink_parms(data, parms); if (data[IFLA_IPTUN_FWMARK]) *fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]); -- cgit v1.2.3-70-g09d2 From 9bc61c04ff6cce6a3756b86e6b34914f7b39d734 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 30 Sep 2022 16:37:30 +0200 Subject: net: Remove DECnet leftovers from flow.h. DECnet was removed by commit 1202cdd66531 ("Remove DECnet support from kernel"). Let's also revome its flow structure. Compile-tested only (allmodconfig). Signed-off-by: Guillaume Nault Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/flow.h | 26 -------------------------- 1 file changed, 26 deletions(-) (limited to 'include/net') diff --git a/include/net/flow.h b/include/net/flow.h index 987bd511d652..2f0da4f0318b 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -54,11 +54,6 @@ union flowi_uli { __u8 code; } icmpt; - struct { - __le16 dport; - __le16 sport; - } dnports; - __be32 gre_key; struct { @@ -156,27 +151,11 @@ struct flowi6 { __u32 mp_hash; } __attribute__((__aligned__(BITS_PER_LONG/8))); -struct flowidn { - struct flowi_common __fl_common; -#define flowidn_oif __fl_common.flowic_oif -#define flowidn_iif __fl_common.flowic_iif -#define flowidn_mark __fl_common.flowic_mark -#define flowidn_scope __fl_common.flowic_scope -#define flowidn_proto __fl_common.flowic_proto -#define flowidn_flags __fl_common.flowic_flags - __le16 daddr; - __le16 saddr; - union flowi_uli uli; -#define fld_sport uli.ports.sport -#define fld_dport uli.ports.dport -} __attribute__((__aligned__(BITS_PER_LONG/8))); - struct flowi { union { struct flowi_common __fl_common; struct flowi4 ip4; struct flowi6 ip6; - struct flowidn dn; } u; #define flowi_oif u.__fl_common.flowic_oif #define flowi_iif u.__fl_common.flowic_iif @@ -211,11 +190,6 @@ static inline struct flowi_common *flowi6_to_flowi_common(struct flowi6 *fl6) return &(fl6->__fl_common); } -static inline struct flowi *flowidn_to_flowi(struct flowidn *fldn) -{ - return container_of(fldn, struct flowi, u.dn); -} - __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys); #endif -- cgit v1.2.3-70-g09d2 From 820dc0523e05c12810bb6bf4e56ce26e4c1948a2 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 30 Sep 2022 00:38:43 +0200 Subject: net: netfilter: move bpf_ct_set_nat_info kfunc in nf_nat_bpf.c Remove circular dependency between nf_nat module and nf_conntrack one moving bpf_ct_set_nat_info kfunc in nf_nat_bpf.c Fixes: 0fabd2aa199f ("net: netfilter: add bpf_ct_set_nat_info kfunc helper") Suggested-by: Kumar Kartikeya Dwivedi Tested-by: Nathan Chancellor Tested-by: Yauheni Kaliuta Signed-off-by: Lorenzo Bianconi Acked-by: John Fastabend Link: https://lore.kernel.org/r/51a65513d2cda3eeb0754842e8025ab3966068d8.1664490511.git.lorenzo@kernel.org Signed-off-by: Alexei Starovoitov --- include/net/netfilter/nf_conntrack_bpf.h | 19 ++++++++ net/netfilter/Makefile | 6 +++ net/netfilter/nf_conntrack_bpf.c | 50 -------------------- net/netfilter/nf_nat_bpf.c | 79 ++++++++++++++++++++++++++++++++ net/netfilter/nf_nat_core.c | 4 +- 5 files changed, 106 insertions(+), 52 deletions(-) create mode 100644 net/netfilter/nf_nat_bpf.c (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_bpf.h b/include/net/netfilter/nf_conntrack_bpf.h index c8b80add1142..2d0da478c8e0 100644 --- a/include/net/netfilter/nf_conntrack_bpf.h +++ b/include/net/netfilter/nf_conntrack_bpf.h @@ -4,6 +4,11 @@ #define _NF_CONNTRACK_BPF_H #include +#include + +struct nf_conn___init { + struct nf_conn ct; +}; #if (IS_BUILTIN(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \ (IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) @@ -24,4 +29,18 @@ static inline void cleanup_nf_conntrack_bpf(void) #endif +#if (IS_BUILTIN(CONFIG_NF_NAT) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \ + (IS_MODULE(CONFIG_NF_NAT) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) + +extern int register_nf_nat_bpf(void); + +#else + +static inline int register_nf_nat_bpf(void) +{ + return 0; +} + +#endif + #endif /* _NF_CONNTRACK_BPF_H */ diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 06df49ea6329..0f060d100880 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -60,6 +60,12 @@ obj-$(CONFIG_NF_NAT) += nf_nat.o nf_nat-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o nf_nat-$(CONFIG_NF_NAT_MASQUERADE) += nf_nat_masquerade.o +ifeq ($(CONFIG_NF_NAT),m) +nf_nat-$(CONFIG_DEBUG_INFO_BTF_MODULES) += nf_nat_bpf.o +else ifeq ($(CONFIG_NF_NAT),y) +nf_nat-$(CONFIG_DEBUG_INFO_BTF) += nf_nat_bpf.o +endif + # NAT helpers obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c index 756ea818574e..8639e7efd0e2 100644 --- a/net/netfilter/nf_conntrack_bpf.c +++ b/net/netfilter/nf_conntrack_bpf.c @@ -14,10 +14,8 @@ #include #include #include -#include #include #include -#include /* bpf_ct_opts - Options for CT lookup helpers * @@ -239,10 +237,6 @@ __diag_push(); __diag_ignore_all("-Wmissing-prototypes", "Global functions as their definitions will be in nf_conntrack BTF"); -struct nf_conn___init { - struct nf_conn ct; -}; - /* bpf_xdp_ct_alloc - Allocate a new CT entry * * Parameters: @@ -476,49 +470,6 @@ int bpf_ct_change_status(struct nf_conn *nfct, u32 status) return nf_ct_change_status_common(nfct, status); } -/* bpf_ct_set_nat_info - Set source or destination nat address - * - * Set source or destination nat address of the newly allocated - * nf_conn before insertion. This must be invoked for referenced - * PTR_TO_BTF_ID to nf_conn___init. - * - * Parameters: - * @nfct - Pointer to referenced nf_conn object, obtained using - * bpf_xdp_ct_alloc or bpf_skb_ct_alloc. - * @addr - Nat source/destination address - * @port - Nat source/destination port. Non-positive values are - * interpreted as select a random port. - * @manip - NF_NAT_MANIP_SRC or NF_NAT_MANIP_DST - */ -int bpf_ct_set_nat_info(struct nf_conn___init *nfct, - union nf_inet_addr *addr, int port, - enum nf_nat_manip_type manip) -{ -#if ((IS_MODULE(CONFIG_NF_NAT) && IS_MODULE(CONFIG_NF_CONNTRACK)) || \ - IS_BUILTIN(CONFIG_NF_NAT)) - struct nf_conn *ct = (struct nf_conn *)nfct; - u16 proto = nf_ct_l3num(ct); - struct nf_nat_range2 range; - - if (proto != NFPROTO_IPV4 && proto != NFPROTO_IPV6) - return -EINVAL; - - memset(&range, 0, sizeof(struct nf_nat_range2)); - range.flags = NF_NAT_RANGE_MAP_IPS; - range.min_addr = *addr; - range.max_addr = range.min_addr; - if (port > 0) { - range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; - range.min_proto.all = cpu_to_be16(port); - range.max_proto.all = range.min_proto.all; - } - - return nf_nat_setup_info(ct, &range, manip) == NF_DROP ? -ENOMEM : 0; -#else - return -EOPNOTSUPP; -#endif -} - __diag_pop() BTF_SET8_START(nf_ct_kfunc_set) @@ -532,7 +483,6 @@ BTF_ID_FLAGS(func, bpf_ct_set_timeout, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_ct_change_timeout, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_ct_set_status, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_ct_change_status, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_ct_set_nat_info, KF_TRUSTED_ARGS) BTF_SET8_END(nf_ct_kfunc_set) static const struct btf_kfunc_id_set nf_conntrack_kfunc_set = { diff --git a/net/netfilter/nf_nat_bpf.c b/net/netfilter/nf_nat_bpf.c new file mode 100644 index 000000000000..0fa5a0bbb0ff --- /dev/null +++ b/net/netfilter/nf_nat_bpf.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Unstable NAT Helpers for XDP and TC-BPF hook + * + * These are called from the XDP and SCHED_CLS BPF programs. Note that it is + * allowed to break compatibility for these functions since the interface they + * are exposed through to BPF programs is explicitly unstable. + */ + +#include +#include +#include +#include +#include + +__diag_push(); +__diag_ignore_all("-Wmissing-prototypes", + "Global functions as their definitions will be in nf_nat BTF"); + +/* bpf_ct_set_nat_info - Set source or destination nat address + * + * Set source or destination nat address of the newly allocated + * nf_conn before insertion. This must be invoked for referenced + * PTR_TO_BTF_ID to nf_conn___init. + * + * Parameters: + * @nfct - Pointer to referenced nf_conn object, obtained using + * bpf_xdp_ct_alloc or bpf_skb_ct_alloc. + * @addr - Nat source/destination address + * @port - Nat source/destination port. Non-positive values are + * interpreted as select a random port. + * @manip - NF_NAT_MANIP_SRC or NF_NAT_MANIP_DST + */ +int bpf_ct_set_nat_info(struct nf_conn___init *nfct, + union nf_inet_addr *addr, int port, + enum nf_nat_manip_type manip) +{ + struct nf_conn *ct = (struct nf_conn *)nfct; + u16 proto = nf_ct_l3num(ct); + struct nf_nat_range2 range; + + if (proto != NFPROTO_IPV4 && proto != NFPROTO_IPV6) + return -EINVAL; + + memset(&range, 0, sizeof(struct nf_nat_range2)); + range.flags = NF_NAT_RANGE_MAP_IPS; + range.min_addr = *addr; + range.max_addr = range.min_addr; + if (port > 0) { + range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + range.min_proto.all = cpu_to_be16(port); + range.max_proto.all = range.min_proto.all; + } + + return nf_nat_setup_info(ct, &range, manip) == NF_DROP ? -ENOMEM : 0; +} + +__diag_pop() + +BTF_SET8_START(nf_nat_kfunc_set) +BTF_ID_FLAGS(func, bpf_ct_set_nat_info, KF_TRUSTED_ARGS) +BTF_SET8_END(nf_nat_kfunc_set) + +static const struct btf_kfunc_id_set nf_bpf_nat_kfunc_set = { + .owner = THIS_MODULE, + .set = &nf_nat_kfunc_set, +}; + +int register_nf_nat_bpf(void) +{ + int ret; + + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, + &nf_bpf_nat_kfunc_set); + if (ret) + return ret; + + return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, + &nf_bpf_nat_kfunc_set); +} diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 7981be526f26..d8e6380f6337 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -16,7 +16,7 @@ #include #include -#include +#include #include #include #include @@ -1152,7 +1152,7 @@ static int __init nf_nat_init(void) WARN_ON(nf_nat_hook != NULL); RCU_INIT_POINTER(nf_nat_hook, &nat_hook); - return 0; + return register_nf_nat_bpf(); } static void __exit nf_nat_cleanup(void) -- cgit v1.2.3-70-g09d2