From abc17a11ed29b0471e428d86189acca8d1a213c6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Mar 2023 15:31:55 +0000 Subject: inet: preserve const qualifier in inet_sk() We can change inet_sk() to propagate const qualifier of its argument. This should avoid some potential errors caused by accidental (const -> not_const) promotion. Other helpers like tcp_sk(), udp_sk(), raw_sk() will be handled in separate patch series. v2: use container_of_const() as advised by Jakub and Linus Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/netdev/20230315142841.3a2ac99a@kernel.org/ Link: https://lore.kernel.org/netdev/CAHk-=wiOf12nrYEF2vJMcucKjWPN-Ns_SW9fA7LwST_2Dzp7rw@mail.gmail.com/ Signed-off-by: David S. Miller --- include/trace/events/sock.h | 4 ++-- include/trace/events/tcp.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h index 03d19fc562f8..fd206a6ab5b8 100644 --- a/include/trace/events/sock.h +++ b/include/trace/events/sock.h @@ -158,7 +158,7 @@ TRACE_EVENT(inet_sock_set_state, ), TP_fast_assign( - struct inet_sock *inet = inet_sk(sk); + const struct inet_sock *inet = inet_sk(sk); struct in6_addr *pin6; __be32 *p32; @@ -222,7 +222,7 @@ TRACE_EVENT(inet_sk_error_report, ), TP_fast_assign( - struct inet_sock *inet = inet_sk(sk); + const struct inet_sock *inet = inet_sk(sk); struct in6_addr *pin6; __be32 *p32; diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 901b440238d5..bf06db8d2046 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -67,7 +67,7 @@ DECLARE_EVENT_CLASS(tcp_event_sk_skb, ), TP_fast_assign( - struct inet_sock *inet = inet_sk(sk); + const struct inet_sock *inet = inet_sk(sk); __be32 *p32; __entry->skbaddr = skb; -- cgit v1.3.1 From 8cdc3223e78c43e1b60ea1c536a103e32fdca3c5 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 27 Mar 2023 16:54:54 -0700 Subject: ipv6: Remove in6addr_any alternatives. Some code defines the IPv6 wildcard address as a local variable and use it with memcmp() or ipv6_addr_equal(). Let's use in6addr_any and ipv6_addr_any() instead. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Mark Bloch Reviewed-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c | 5 ++--- include/net/ip6_fib.h | 9 +++------ include/trace/events/fib.h | 5 ++--- include/trace/events/fib6.h | 5 +---- net/ethtool/ioctl.c | 10 +++++----- net/ipv4/inet_hashtables.c | 11 ++++------- 6 files changed, 17 insertions(+), 28 deletions(-) (limited to 'include/trace') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index a108e73c9f66..20c2d2ecaf93 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -98,7 +98,6 @@ int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow, #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) else if (ip_version == 6) { int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6); - struct in6_addr zerov6 = {}; daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6); @@ -106,8 +105,8 @@ int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow, outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6); memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size); memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size); - if (!memcmp(&tun_attr->dst_ip.v6, &zerov6, sizeof(zerov6)) || - !memcmp(&tun_attr->src_ip.v6, &zerov6, sizeof(zerov6))) + if (ipv6_addr_any(&tun_attr->dst_ip.v6) || + ipv6_addr_any(&tun_attr->src_ip.v6)) return 0; } #endif diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 79570cb4ea9c..05e6f756feaf 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -469,13 +469,10 @@ void rt6_get_prefsrc(const struct rt6_info *rt, struct in6_addr *addr) rcu_read_lock(); from = rcu_dereference(rt->from); - if (from) { + if (from) *addr = from->fib6_prefsrc.addr; - } else { - struct in6_addr in6_zero = {}; - - *addr = in6_zero; - } + else + *addr = in6addr_any; rcu_read_unlock(); } diff --git a/include/trace/events/fib.h b/include/trace/events/fib.h index c2300c407f58..76297ecd4935 100644 --- a/include/trace/events/fib.h +++ b/include/trace/events/fib.h @@ -36,7 +36,6 @@ TRACE_EVENT(fib_table_lookup, ), TP_fast_assign( - struct in6_addr in6_zero = {}; struct net_device *dev; struct in6_addr *in6; __be32 *p32; @@ -74,7 +73,7 @@ TRACE_EVENT(fib_table_lookup, *p32 = nhc->nhc_gw.ipv4; in6 = (struct in6_addr *)__entry->gw6; - *in6 = in6_zero; + *in6 = in6addr_any; } else if (nhc->nhc_gw_family == AF_INET6) { p32 = (__be32 *) __entry->gw4; *p32 = 0; @@ -87,7 +86,7 @@ TRACE_EVENT(fib_table_lookup, *p32 = 0; in6 = (struct in6_addr *)__entry->gw6; - *in6 = in6_zero; + *in6 = in6addr_any; } ), diff --git a/include/trace/events/fib6.h b/include/trace/events/fib6.h index 6e821eb79450..4d3e607b3cde 100644 --- a/include/trace/events/fib6.h +++ b/include/trace/events/fib6.h @@ -68,11 +68,8 @@ TRACE_EVENT(fib6_table_lookup, strcpy(__entry->name, "-"); } if (res->f6i == net->ipv6.fib6_null_entry) { - struct in6_addr in6_zero = {}; - in6 = (struct in6_addr *)__entry->gw; - *in6 = in6_zero; - + *in6 = in6addr_any; } else if (res->nh) { in6 = (struct in6_addr *)__entry->gw; *in6 = res->nh->fib_nh_gw6; diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 646b3e490c71..59adc4e6e9ee 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -3127,7 +3128,6 @@ struct ethtool_rx_flow_rule * ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input) { const struct ethtool_rx_flow_spec *fs = input->fs; - static struct in6_addr zero_addr = {}; struct ethtool_rx_flow_match *match; struct ethtool_rx_flow_rule *flow; struct flow_action_entry *act; @@ -3233,20 +3233,20 @@ ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input) v6_spec = &fs->h_u.tcp_ip6_spec; v6_m_spec = &fs->m_u.tcp_ip6_spec; - if (memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr))) { + if (!ipv6_addr_any((struct in6_addr *)v6_m_spec->ip6src)) { memcpy(&match->key.ipv6.src, v6_spec->ip6src, sizeof(match->key.ipv6.src)); memcpy(&match->mask.ipv6.src, v6_m_spec->ip6src, sizeof(match->mask.ipv6.src)); } - if (memcmp(v6_m_spec->ip6dst, &zero_addr, sizeof(zero_addr))) { + if (!ipv6_addr_any((struct in6_addr *)v6_m_spec->ip6dst)) { memcpy(&match->key.ipv6.dst, v6_spec->ip6dst, sizeof(match->key.ipv6.dst)); memcpy(&match->mask.ipv6.dst, v6_m_spec->ip6dst, sizeof(match->mask.ipv6.dst)); } - if (memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr)) || - memcmp(v6_m_spec->ip6dst, &zero_addr, sizeof(zero_addr))) { + if (!ipv6_addr_any((struct in6_addr *)v6_m_spec->ip6src) || + !ipv6_addr_any((struct in6_addr *)v6_m_spec->ip6dst)) { match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS); match->dissector.offset[FLOW_DISSECTOR_KEY_IPV6_ADDRS] = diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 6edae3886885..e7391bf310a7 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -826,13 +826,11 @@ bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket *tb, const unsigned short port, int l3mdev, const struct sock *sk) { #if IS_ENABLED(CONFIG_IPV6) - struct in6_addr addr_any = {}; - if (sk->sk_family != tb->family) { if (sk->sk_family == AF_INET) return net_eq(ib2_net(tb), net) && tb->port == port && tb->l3mdev == l3mdev && - ipv6_addr_equal(&tb->v6_rcv_saddr, &addr_any); + ipv6_addr_any(&tb->v6_rcv_saddr); return false; } @@ -840,7 +838,7 @@ bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket *tb, const if (sk->sk_family == AF_INET6) return net_eq(ib2_net(tb), net) && tb->port == port && tb->l3mdev == l3mdev && - ipv6_addr_equal(&tb->v6_rcv_saddr, &addr_any); + ipv6_addr_any(&tb->v6_rcv_saddr); else #endif return net_eq(ib2_net(tb), net) && tb->port == port && @@ -866,11 +864,10 @@ inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, in { struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk); u32 hash; -#if IS_ENABLED(CONFIG_IPV6) - struct in6_addr addr_any = {}; +#if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) - hash = ipv6_portaddr_hash(net, &addr_any, port); + hash = ipv6_portaddr_hash(net, &in6addr_any, port); else #endif hash = ipv4_portaddr_hash(net, 0, port); -- cgit v1.3.1 From 054fbf7ff8143d35ca7d3bb5414bb44ee1574194 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 3 Apr 2023 17:43:16 +0200 Subject: net: qrtr: correct types of trace event parameters The arguments passed to the trace events are of type unsigned int, however the signature of the events used __le32 parameters. I may be missing the point here, but sparse flagged this and it does seem incorrect to me. net/qrtr/ns.c: note: in included file (through include/trace/trace_events.h, include/trace/define_trace.h, include/trace/events/qrtr.h): ./include/trace/events/qrtr.h:11:1: warning: cast to restricted __le32 ./include/trace/events/qrtr.h:11:1: warning: restricted __le32 degrades to integer ./include/trace/events/qrtr.h:11:1: warning: restricted __le32 degrades to integer ... (a lot more similar warnings) net/qrtr/ns.c:115:47: expected restricted __le32 [usertype] service net/qrtr/ns.c:115:47: got unsigned int service net/qrtr/ns.c:115:61: warning: incorrect type in argument 2 (different base types) ... (a lot more similar warnings) Fixes: dfddb54043f0 ("net: qrtr: Add tracepoint support") Reviewed-by: Manivannan Sadhasivam Signed-off-by: Simon Horman Link: https://lore.kernel.org/r/20230402-qrtr-trace-types-v1-1-92ad55008dd3@kernel.org Signed-off-by: Jakub Kicinski --- include/trace/events/qrtr.h | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/qrtr.h b/include/trace/events/qrtr.h index b1de14c3bb93..441132c67133 100644 --- a/include/trace/events/qrtr.h +++ b/include/trace/events/qrtr.h @@ -10,15 +10,16 @@ TRACE_EVENT(qrtr_ns_service_announce_new, - TP_PROTO(__le32 service, __le32 instance, __le32 node, __le32 port), + TP_PROTO(unsigned int service, unsigned int instance, + unsigned int node, unsigned int port), TP_ARGS(service, instance, node, port), TP_STRUCT__entry( - __field(__le32, service) - __field(__le32, instance) - __field(__le32, node) - __field(__le32, port) + __field(unsigned int, service) + __field(unsigned int, instance) + __field(unsigned int, node) + __field(unsigned int, port) ), TP_fast_assign( @@ -36,15 +37,16 @@ TRACE_EVENT(qrtr_ns_service_announce_new, TRACE_EVENT(qrtr_ns_service_announce_del, - TP_PROTO(__le32 service, __le32 instance, __le32 node, __le32 port), + TP_PROTO(unsigned int service, unsigned int instance, + unsigned int node, unsigned int port), TP_ARGS(service, instance, node, port), TP_STRUCT__entry( - __field(__le32, service) - __field(__le32, instance) - __field(__le32, node) - __field(__le32, port) + __field(unsigned int, service) + __field(unsigned int, instance) + __field(unsigned int, node) + __field(unsigned int, port) ), TP_fast_assign( @@ -62,15 +64,16 @@ TRACE_EVENT(qrtr_ns_service_announce_del, TRACE_EVENT(qrtr_ns_server_add, - TP_PROTO(__le32 service, __le32 instance, __le32 node, __le32 port), + TP_PROTO(unsigned int service, unsigned int instance, + unsigned int node, unsigned int port), TP_ARGS(service, instance, node, port), TP_STRUCT__entry( - __field(__le32, service) - __field(__le32, instance) - __field(__le32, node) - __field(__le32, port) + __field(unsigned int, service) + __field(unsigned int, instance) + __field(unsigned int, node) + __field(unsigned int, port) ), TP_fast_assign( -- cgit v1.3.1 From 3b3009ea8abb713b022d94fba95ec270cf6e7eae Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 17 Apr 2023 10:32:26 -0400 Subject: net/handshake: Create a NETLINK service for handling handshake requests When a kernel consumer needs a transport layer security session, it first needs a handshake to negotiate and establish a session. This negotiation can be done in user space via one of the several existing library implementations, or it can be done in the kernel. No in-kernel handshake implementations yet exist. In their absence, we add a netlink service that can: a. Notify a user space daemon that a handshake is needed. b. Once notified, the daemon calls the kernel back via this netlink service to get the handshake parameters, including an open socket on which to establish the session. c. Once the handshake is complete, the daemon reports the session status and other information via a second netlink operation. This operation marks that it is safe for the kernel to use the open socket and the security session established there. The notification service uses a multicast group. Each handshake mechanism (eg, tlshd) adopts its own group number so that the handshake services are completely independent of one another. The kernel can then tell via netlink_has_listeners() whether a handshake service is active and prepared to handle a handshake request. A new netlink operation, ACCEPT, acts like accept(2) in that it instantiates a file descriptor in the user space daemon's fd table. If this operation is successful, the reply carries the fd number, which can be treated as an open and ready file descriptor. While user space is performing the handshake, the kernel keeps its muddy paws off the open socket. A second new netlink operation, DONE, indicates that the user space daemon is finished with the socket and it is safe for the kernel to use again. The operation also indicates whether a session was established successfully. Signed-off-by: Chuck Lever Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/handshake.yaml | 122 +++++++++++ MAINTAINERS | 9 + include/trace/events/handshake.h | 159 ++++++++++++++ include/uapi/linux/handshake.h | 71 ++++++ net/Kconfig | 5 + net/Makefile | 1 + net/handshake/Makefile | 11 + net/handshake/genl.c | 57 +++++ net/handshake/genl.h | 23 ++ net/handshake/handshake.h | 82 +++++++ net/handshake/netlink.c | 312 ++++++++++++++++++++++++++ net/handshake/request.c | 339 +++++++++++++++++++++++++++++ net/handshake/trace.c | 20 ++ 13 files changed, 1211 insertions(+) create mode 100644 Documentation/netlink/specs/handshake.yaml create mode 100644 include/trace/events/handshake.h create mode 100644 include/uapi/linux/handshake.h create mode 100644 net/handshake/Makefile create mode 100644 net/handshake/genl.c create mode 100644 net/handshake/genl.h create mode 100644 net/handshake/handshake.h create mode 100644 net/handshake/netlink.c create mode 100644 net/handshake/request.c create mode 100644 net/handshake/trace.c (limited to 'include/trace') diff --git a/Documentation/netlink/specs/handshake.yaml b/Documentation/netlink/specs/handshake.yaml new file mode 100644 index 000000000000..0333d92b1438 --- /dev/null +++ b/Documentation/netlink/specs/handshake.yaml @@ -0,0 +1,122 @@ +# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) +# +# Author: Chuck Lever +# +# Copyright (c) 2023, Oracle and/or its affiliates. +# + +name: handshake + +protocol: genetlink + +doc: Netlink protocol to request a transport layer security handshake. + +definitions: + - + type: enum + name: handler-class + value-start: 0 + entries: [ none, max ] + - + type: enum + name: msg-type + value-start: 0 + entries: [ unspec, clienthello, serverhello ] + - + type: enum + name: auth + value-start: 0 + entries: [ unspec, unauth, psk, x509 ] + +attribute-sets: + - + name: x509 + attributes: + - + name: cert + type: u32 + - + name: privkey + type: u32 + - + name: accept + attributes: + - + name: sockfd + type: u32 + - + name: handler-class + type: u32 + enum: handler-class + - + name: message-type + type: u32 + enum: msg-type + - + name: timeout + type: u32 + - + name: auth-mode + type: u32 + enum: auth + - + name: peer-identity + type: u32 + multi-attr: true + - + name: certificate + type: nest + nested-attributes: x509 + multi-attr: true + - + name: done + attributes: + - + name: status + type: u32 + - + name: sockfd + type: u32 + - + name: remote-auth + type: u32 + multi-attr: true + +operations: + list: + - + name: ready + doc: Notify handlers that a new handshake request is waiting + notify: accept + - + name: accept + doc: Handler retrieves next queued handshake request + attribute-set: accept + flags: [ admin-perm ] + do: + request: + attributes: + - handler-class + reply: + attributes: + - sockfd + - message-type + - timeout + - auth-mode + - peer-identity + - certificate + - + name: done + doc: Handler reports handshake completion + attribute-set: done + do: + request: + attributes: + - status + - sockfd + - remote-auth + +mcast-groups: + list: + - + name: none diff --git a/MAINTAINERS b/MAINTAINERS index 4fc57dfd5fd0..cdc7748d15b8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8947,6 +8947,15 @@ Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/anttip/media_tree.git F: drivers/media/usb/hackrf/ +HANDSHAKE UPCALL FOR TRANSPORT LAYER SECURITY +M: Chuck Lever +L: kernel-tls-handshake@lists.linux.dev +L: netdev@vger.kernel.org +S: Maintained +F: Documentation/netlink/specs/handshake.yaml +F: include/trace/events/handshake.h +F: net/handshake/ + HANTRO VPU CODEC DRIVER M: Ezequiel Garcia M: Philipp Zabel diff --git a/include/trace/events/handshake.h b/include/trace/events/handshake.h new file mode 100644 index 000000000000..8dadcab5f12a --- /dev/null +++ b/include/trace/events/handshake.h @@ -0,0 +1,159 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM handshake + +#if !defined(_TRACE_HANDSHAKE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_HANDSHAKE_H + +#include +#include + +DECLARE_EVENT_CLASS(handshake_event_class, + TP_PROTO( + const struct net *net, + const struct handshake_req *req, + const struct sock *sk + ), + TP_ARGS(net, req, sk), + TP_STRUCT__entry( + __field(const void *, req) + __field(const void *, sk) + __field(unsigned int, netns_ino) + ), + TP_fast_assign( + __entry->req = req; + __entry->sk = sk; + __entry->netns_ino = net->ns.inum; + ), + TP_printk("req=%p sk=%p", + __entry->req, __entry->sk + ) +); +#define DEFINE_HANDSHAKE_EVENT(name) \ + DEFINE_EVENT(handshake_event_class, name, \ + TP_PROTO( \ + const struct net *net, \ + const struct handshake_req *req, \ + const struct sock *sk \ + ), \ + TP_ARGS(net, req, sk)) + +DECLARE_EVENT_CLASS(handshake_fd_class, + TP_PROTO( + const struct net *net, + const struct handshake_req *req, + const struct sock *sk, + int fd + ), + TP_ARGS(net, req, sk, fd), + TP_STRUCT__entry( + __field(const void *, req) + __field(const void *, sk) + __field(int, fd) + __field(unsigned int, netns_ino) + ), + TP_fast_assign( + __entry->req = req; + __entry->sk = req->hr_sk; + __entry->fd = fd; + __entry->netns_ino = net->ns.inum; + ), + TP_printk("req=%p sk=%p fd=%d", + __entry->req, __entry->sk, __entry->fd + ) +); +#define DEFINE_HANDSHAKE_FD_EVENT(name) \ + DEFINE_EVENT(handshake_fd_class, name, \ + TP_PROTO( \ + const struct net *net, \ + const struct handshake_req *req, \ + const struct sock *sk, \ + int fd \ + ), \ + TP_ARGS(net, req, sk, fd)) + +DECLARE_EVENT_CLASS(handshake_error_class, + TP_PROTO( + const struct net *net, + const struct handshake_req *req, + const struct sock *sk, + int err + ), + TP_ARGS(net, req, sk, err), + TP_STRUCT__entry( + __field(const void *, req) + __field(const void *, sk) + __field(int, err) + __field(unsigned int, netns_ino) + ), + TP_fast_assign( + __entry->req = req; + __entry->sk = sk; + __entry->err = err; + __entry->netns_ino = net->ns.inum; + ), + TP_printk("req=%p sk=%p err=%d", + __entry->req, __entry->sk, __entry->err + ) +); +#define DEFINE_HANDSHAKE_ERROR(name) \ + DEFINE_EVENT(handshake_error_class, name, \ + TP_PROTO( \ + const struct net *net, \ + const struct handshake_req *req, \ + const struct sock *sk, \ + int err \ + ), \ + TP_ARGS(net, req, sk, err)) + + +/* + * Request lifetime events + */ + +DEFINE_HANDSHAKE_EVENT(handshake_submit); +DEFINE_HANDSHAKE_ERROR(handshake_submit_err); +DEFINE_HANDSHAKE_EVENT(handshake_cancel); +DEFINE_HANDSHAKE_EVENT(handshake_cancel_none); +DEFINE_HANDSHAKE_EVENT(handshake_cancel_busy); +DEFINE_HANDSHAKE_EVENT(handshake_destruct); + + +TRACE_EVENT(handshake_complete, + TP_PROTO( + const struct net *net, + const struct handshake_req *req, + const struct sock *sk, + int status + ), + TP_ARGS(net, req, sk, status), + TP_STRUCT__entry( + __field(const void *, req) + __field(const void *, sk) + __field(int, status) + __field(unsigned int, netns_ino) + ), + TP_fast_assign( + __entry->req = req; + __entry->sk = sk; + __entry->status = status; + __entry->netns_ino = net->ns.inum; + ), + TP_printk("req=%p sk=%p status=%d", + __entry->req, __entry->sk, __entry->status + ) +); + +/* + * Netlink events + */ + +DEFINE_HANDSHAKE_ERROR(handshake_notify_err); +DEFINE_HANDSHAKE_FD_EVENT(handshake_cmd_accept); +DEFINE_HANDSHAKE_ERROR(handshake_cmd_accept_err); +DEFINE_HANDSHAKE_FD_EVENT(handshake_cmd_done); +DEFINE_HANDSHAKE_ERROR(handshake_cmd_done_err); + +#endif /* _TRACE_HANDSHAKE_H */ + +#include diff --git a/include/uapi/linux/handshake.h b/include/uapi/linux/handshake.h new file mode 100644 index 000000000000..7f66ff489b87 --- /dev/null +++ b/include/uapi/linux/handshake.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/handshake.yaml */ +/* YNL-GEN uapi header */ + +#ifndef _UAPI_LINUX_HANDSHAKE_H +#define _UAPI_LINUX_HANDSHAKE_H + +#define HANDSHAKE_FAMILY_NAME "handshake" +#define HANDSHAKE_FAMILY_VERSION 1 + +enum handshake_handler_class { + HANDSHAKE_HANDLER_CLASS_NONE, + HANDSHAKE_HANDLER_CLASS_MAX, +}; + +enum handshake_msg_type { + HANDSHAKE_MSG_TYPE_UNSPEC, + HANDSHAKE_MSG_TYPE_CLIENTHELLO, + HANDSHAKE_MSG_TYPE_SERVERHELLO, +}; + +enum handshake_auth { + HANDSHAKE_AUTH_UNSPEC, + HANDSHAKE_AUTH_UNAUTH, + HANDSHAKE_AUTH_PSK, + HANDSHAKE_AUTH_X509, +}; + +enum { + HANDSHAKE_A_X509_CERT = 1, + HANDSHAKE_A_X509_PRIVKEY, + + __HANDSHAKE_A_X509_MAX, + HANDSHAKE_A_X509_MAX = (__HANDSHAKE_A_X509_MAX - 1) +}; + +enum { + HANDSHAKE_A_ACCEPT_SOCKFD = 1, + HANDSHAKE_A_ACCEPT_HANDLER_CLASS, + HANDSHAKE_A_ACCEPT_MESSAGE_TYPE, + HANDSHAKE_A_ACCEPT_TIMEOUT, + HANDSHAKE_A_ACCEPT_AUTH_MODE, + HANDSHAKE_A_ACCEPT_PEER_IDENTITY, + HANDSHAKE_A_ACCEPT_CERTIFICATE, + + __HANDSHAKE_A_ACCEPT_MAX, + HANDSHAKE_A_ACCEPT_MAX = (__HANDSHAKE_A_ACCEPT_MAX - 1) +}; + +enum { + HANDSHAKE_A_DONE_STATUS = 1, + HANDSHAKE_A_DONE_SOCKFD, + HANDSHAKE_A_DONE_REMOTE_AUTH, + + __HANDSHAKE_A_DONE_MAX, + HANDSHAKE_A_DONE_MAX = (__HANDSHAKE_A_DONE_MAX - 1) +}; + +enum { + HANDSHAKE_CMD_READY = 1, + HANDSHAKE_CMD_ACCEPT, + HANDSHAKE_CMD_DONE, + + __HANDSHAKE_CMD_MAX, + HANDSHAKE_CMD_MAX = (__HANDSHAKE_CMD_MAX - 1) +}; + +#define HANDSHAKE_MCGRP_NONE "none" + +#endif /* _UAPI_LINUX_HANDSHAKE_H */ diff --git a/net/Kconfig b/net/Kconfig index f806722bccf4..4b800706cc76 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -68,6 +68,11 @@ source "net/iucv/Kconfig" source "net/smc/Kconfig" source "net/xdp/Kconfig" +config NET_HANDSHAKE + bool + depends on SUNRPC || NVME_TARGET_TCP || NVME_TCP + default y + config INET bool "TCP/IP networking" help diff --git a/net/Makefile b/net/Makefile index 87592009366f..4c4dc535453d 100644 --- a/net/Makefile +++ b/net/Makefile @@ -79,3 +79,4 @@ obj-$(CONFIG_NET_NCSI) += ncsi/ obj-$(CONFIG_XDP_SOCKETS) += xdp/ obj-$(CONFIG_MPTCP) += mptcp/ obj-$(CONFIG_MCTP) += mctp/ +obj-$(CONFIG_NET_HANDSHAKE) += handshake/ diff --git a/net/handshake/Makefile b/net/handshake/Makefile new file mode 100644 index 000000000000..d38736de45da --- /dev/null +++ b/net/handshake/Makefile @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Makefile for the Generic HANDSHAKE service +# +# Author: Chuck Lever +# +# Copyright (c) 2023, Oracle and/or its affiliates. +# + +obj-y += handshake.o +handshake-y := genl.o netlink.o request.o trace.o diff --git a/net/handshake/genl.c b/net/handshake/genl.c new file mode 100644 index 000000000000..652f37d19bd6 --- /dev/null +++ b/net/handshake/genl.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/handshake.yaml */ +/* YNL-GEN kernel source */ + +#include +#include + +#include "genl.h" + +#include + +/* HANDSHAKE_CMD_ACCEPT - do */ +static const struct nla_policy handshake_accept_nl_policy[HANDSHAKE_A_ACCEPT_HANDLER_CLASS + 1] = { + [HANDSHAKE_A_ACCEPT_HANDLER_CLASS] = NLA_POLICY_MAX(NLA_U32, 1), +}; + +/* HANDSHAKE_CMD_DONE - do */ +static const struct nla_policy handshake_done_nl_policy[HANDSHAKE_A_DONE_REMOTE_AUTH + 1] = { + [HANDSHAKE_A_DONE_STATUS] = { .type = NLA_U32, }, + [HANDSHAKE_A_DONE_SOCKFD] = { .type = NLA_U32, }, + [HANDSHAKE_A_DONE_REMOTE_AUTH] = { .type = NLA_U32, }, +}; + +/* Ops table for handshake */ +static const struct genl_split_ops handshake_nl_ops[] = { + { + .cmd = HANDSHAKE_CMD_ACCEPT, + .doit = handshake_nl_accept_doit, + .policy = handshake_accept_nl_policy, + .maxattr = HANDSHAKE_A_ACCEPT_HANDLER_CLASS, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = HANDSHAKE_CMD_DONE, + .doit = handshake_nl_done_doit, + .policy = handshake_done_nl_policy, + .maxattr = HANDSHAKE_A_DONE_REMOTE_AUTH, + .flags = GENL_CMD_CAP_DO, + }, +}; + +static const struct genl_multicast_group handshake_nl_mcgrps[] = { + [HANDSHAKE_NLGRP_NONE] = { "none", }, +}; + +struct genl_family handshake_nl_family __ro_after_init = { + .name = HANDSHAKE_FAMILY_NAME, + .version = HANDSHAKE_FAMILY_VERSION, + .netnsok = true, + .parallel_ops = true, + .module = THIS_MODULE, + .split_ops = handshake_nl_ops, + .n_split_ops = ARRAY_SIZE(handshake_nl_ops), + .mcgrps = handshake_nl_mcgrps, + .n_mcgrps = ARRAY_SIZE(handshake_nl_mcgrps), +}; diff --git a/net/handshake/genl.h b/net/handshake/genl.h new file mode 100644 index 000000000000..a1eb7ccccc7f --- /dev/null +++ b/net/handshake/genl.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/handshake.yaml */ +/* YNL-GEN kernel header */ + +#ifndef _LINUX_HANDSHAKE_GEN_H +#define _LINUX_HANDSHAKE_GEN_H + +#include +#include + +#include + +int handshake_nl_accept_doit(struct sk_buff *skb, struct genl_info *info); +int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info); + +enum { + HANDSHAKE_NLGRP_NONE, +}; + +extern struct genl_family handshake_nl_family; + +#endif /* _LINUX_HANDSHAKE_GEN_H */ diff --git a/net/handshake/handshake.h b/net/handshake/handshake.h new file mode 100644 index 000000000000..52568dbe24f1 --- /dev/null +++ b/net/handshake/handshake.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Generic netlink handshake service + * + * Author: Chuck Lever + * + * Copyright (c) 2023, Oracle and/or its affiliates. + */ + +#ifndef _INTERNAL_HANDSHAKE_H +#define _INTERNAL_HANDSHAKE_H + +/* Per-net namespace context */ +struct handshake_net { + spinlock_t hn_lock; /* protects next 3 fields */ + int hn_pending; + int hn_pending_max; + struct list_head hn_requests; + + unsigned long hn_flags; +}; + +enum hn_flags_bits { + HANDSHAKE_F_NET_DRAINING, +}; + +struct handshake_proto; + +/* One handshake request */ +struct handshake_req { + struct list_head hr_list; + struct rhash_head hr_rhash; + unsigned long hr_flags; + const struct handshake_proto *hr_proto; + struct sock *hr_sk; + void (*hr_odestruct)(struct sock *sk); + + /* Always the last field */ + char hr_priv[]; +}; + +enum hr_flags_bits { + HANDSHAKE_F_REQ_COMPLETED, +}; + +/* Invariants for all handshake requests for one transport layer + * security protocol + */ +struct handshake_proto { + int hp_handler_class; + size_t hp_privsize; + + int (*hp_accept)(struct handshake_req *req, + struct genl_info *info, int fd); + void (*hp_done)(struct handshake_req *req, + unsigned int status, + struct genl_info *info); + void (*hp_destroy)(struct handshake_req *req); +}; + +/* netlink.c */ +int handshake_genl_notify(struct net *net, const struct handshake_proto *proto, + gfp_t flags); +struct nlmsghdr *handshake_genl_put(struct sk_buff *msg, + struct genl_info *info); +struct handshake_net *handshake_pernet(struct net *net); + +/* request.c */ +struct handshake_req *handshake_req_alloc(const struct handshake_proto *proto, + gfp_t flags); +int handshake_req_hash_init(void); +void handshake_req_hash_destroy(void); +void *handshake_req_private(struct handshake_req *req); +struct handshake_req *handshake_req_hash_lookup(struct sock *sk); +struct handshake_req *handshake_req_next(struct handshake_net *hn, int class); +int handshake_req_submit(struct socket *sock, struct handshake_req *req, + gfp_t flags); +void handshake_complete(struct handshake_req *req, unsigned int status, + struct genl_info *info); +bool handshake_req_cancel(struct sock *sk); + +#endif /* _INTERNAL_HANDSHAKE_H */ diff --git a/net/handshake/netlink.c b/net/handshake/netlink.c new file mode 100644 index 000000000000..7264cac04047 --- /dev/null +++ b/net/handshake/netlink.c @@ -0,0 +1,312 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Generic netlink handshake service + * + * Author: Chuck Lever + * + * Copyright (c) 2023, Oracle and/or its affiliates. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include "handshake.h" +#include "genl.h" + +#include + +/** + * handshake_genl_notify - Notify handlers that a request is waiting + * @net: target network namespace + * @proto: handshake protocol + * @flags: memory allocation control flags + * + * Returns zero on success or a negative errno if notification failed. + */ +int handshake_genl_notify(struct net *net, const struct handshake_proto *proto, + gfp_t flags) +{ + struct sk_buff *msg; + void *hdr; + + if (!genl_has_listeners(&handshake_nl_family, net, + proto->hp_handler_class)) + return -ESRCH; + + msg = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, 0, 0, &handshake_nl_family, 0, + HANDSHAKE_CMD_READY); + if (!hdr) + goto out_free; + + if (nla_put_u32(msg, HANDSHAKE_A_ACCEPT_HANDLER_CLASS, + proto->hp_handler_class) < 0) { + genlmsg_cancel(msg, hdr); + goto out_free; + } + + genlmsg_end(msg, hdr); + return genlmsg_multicast_netns(&handshake_nl_family, net, msg, + 0, proto->hp_handler_class, flags); + +out_free: + nlmsg_free(msg); + return -EMSGSIZE; +} + +/** + * handshake_genl_put - Create a generic netlink message header + * @msg: buffer in which to create the header + * @info: generic netlink message context + * + * Returns a ready-to-use header, or NULL. + */ +struct nlmsghdr *handshake_genl_put(struct sk_buff *msg, + struct genl_info *info) +{ + return genlmsg_put(msg, info->snd_portid, info->snd_seq, + &handshake_nl_family, 0, info->genlhdr->cmd); +} +EXPORT_SYMBOL(handshake_genl_put); + +/* + * dup() a kernel socket for use as a user space file descriptor + * in the current process. The kernel socket must have an + * instatiated struct file. + * + * Implicit argument: "current()" + */ +static int handshake_dup(struct socket *sock) +{ + struct file *file; + int newfd; + + if (!sock->file) + return -EBADF; + + file = get_file(sock->file); + newfd = get_unused_fd_flags(O_CLOEXEC); + if (newfd < 0) { + fput(file); + return newfd; + } + + fd_install(newfd, file); + return newfd; +} + +int handshake_nl_accept_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = sock_net(skb->sk); + struct handshake_net *hn = handshake_pernet(net); + struct handshake_req *req = NULL; + struct socket *sock; + int class, fd, err; + + err = -EOPNOTSUPP; + if (!hn) + goto out_status; + + err = -EINVAL; + if (GENL_REQ_ATTR_CHECK(info, HANDSHAKE_A_ACCEPT_HANDLER_CLASS)) + goto out_status; + class = nla_get_u32(info->attrs[HANDSHAKE_A_ACCEPT_HANDLER_CLASS]); + + err = -EAGAIN; + req = handshake_req_next(hn, class); + if (!req) + goto out_status; + + sock = req->hr_sk->sk_socket; + fd = handshake_dup(sock); + if (fd < 0) { + err = fd; + goto out_complete; + } + err = req->hr_proto->hp_accept(req, info, fd); + if (err) + goto out_complete; + + trace_handshake_cmd_accept(net, req, req->hr_sk, fd); + return 0; + +out_complete: + handshake_complete(req, -EIO, NULL); + fput(sock->file); +out_status: + trace_handshake_cmd_accept_err(net, req, NULL, err); + return err; +} + +int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = sock_net(skb->sk); + struct socket *sock = NULL; + struct handshake_req *req; + int fd, status, err; + + if (GENL_REQ_ATTR_CHECK(info, HANDSHAKE_A_DONE_SOCKFD)) + return -EINVAL; + fd = nla_get_u32(info->attrs[HANDSHAKE_A_DONE_SOCKFD]); + + err = 0; + sock = sockfd_lookup(fd, &err); + if (err) { + err = -EBADF; + goto out_status; + } + + req = handshake_req_hash_lookup(sock->sk); + if (!req) { + err = -EBUSY; + fput(sock->file); + goto out_status; + } + + trace_handshake_cmd_done(net, req, sock->sk, fd); + + status = -EIO; + if (info->attrs[HANDSHAKE_A_DONE_STATUS]) + status = nla_get_u32(info->attrs[HANDSHAKE_A_DONE_STATUS]); + + handshake_complete(req, status, info); + fput(sock->file); + return 0; + +out_status: + trace_handshake_cmd_done_err(net, req, sock->sk, err); + return err; +} + +static unsigned int handshake_net_id; + +static int __net_init handshake_net_init(struct net *net) +{ + struct handshake_net *hn = net_generic(net, handshake_net_id); + unsigned long tmp; + struct sysinfo si; + + /* + * Arbitrary limit to prevent handshakes that do not make + * progress from clogging up the system. The cap scales up + * with the amount of physical memory on the system. + */ + si_meminfo(&si); + tmp = si.totalram / (25 * si.mem_unit); + hn->hn_pending_max = clamp(tmp, 3UL, 50UL); + + spin_lock_init(&hn->hn_lock); + hn->hn_pending = 0; + hn->hn_flags = 0; + INIT_LIST_HEAD(&hn->hn_requests); + return 0; +} + +static void __net_exit handshake_net_exit(struct net *net) +{ + struct handshake_net *hn = net_generic(net, handshake_net_id); + struct handshake_req *req; + LIST_HEAD(requests); + + /* + * Drain the net's pending list. Requests that have been + * accepted and are in progress will be destroyed when + * the socket is closed. + */ + spin_lock(&hn->hn_lock); + set_bit(HANDSHAKE_F_NET_DRAINING, &hn->hn_flags); + list_splice_init(&requests, &hn->hn_requests); + spin_unlock(&hn->hn_lock); + + while (!list_empty(&requests)) { + req = list_first_entry(&requests, struct handshake_req, hr_list); + list_del(&req->hr_list); + + /* + * Requests on this list have not yet been + * accepted, so they do not have an fd to put. + */ + + handshake_complete(req, -ETIMEDOUT, NULL); + } +} + +static struct pernet_operations __net_initdata handshake_genl_net_ops = { + .init = handshake_net_init, + .exit = handshake_net_exit, + .id = &handshake_net_id, + .size = sizeof(struct handshake_net), +}; + +/** + * handshake_pernet - Get the handshake private per-net structure + * @net: network namespace + * + * Returns a pointer to the net's private per-net structure for the + * handshake module, or NULL if handshake_init() failed. + */ +struct handshake_net *handshake_pernet(struct net *net) +{ + return handshake_net_id ? + net_generic(net, handshake_net_id) : NULL; +} + +static int __init handshake_init(void) +{ + int ret; + + ret = handshake_req_hash_init(); + if (ret) { + pr_warn("handshake: hash initialization failed (%d)\n", ret); + return ret; + } + + ret = genl_register_family(&handshake_nl_family); + if (ret) { + pr_warn("handshake: netlink registration failed (%d)\n", ret); + handshake_req_hash_destroy(); + return ret; + } + + /* + * ORDER: register_pernet_subsys must be done last. + * + * If initialization does not make it past pernet_subsys + * registration, then handshake_net_id will remain 0. That + * shunts the handshake consumer API to return ENOTSUPP + * to prevent it from dereferencing something that hasn't + * been allocated. + */ + ret = register_pernet_subsys(&handshake_genl_net_ops); + if (ret) { + pr_warn("handshake: pernet registration failed (%d)\n", ret); + genl_unregister_family(&handshake_nl_family); + handshake_req_hash_destroy(); + } + + return ret; +} + +static void __exit handshake_exit(void) +{ + unregister_pernet_subsys(&handshake_genl_net_ops); + handshake_net_id = 0; + + handshake_req_hash_destroy(); + genl_unregister_family(&handshake_nl_family); +} + +module_init(handshake_init); +module_exit(handshake_exit); diff --git a/net/handshake/request.c b/net/handshake/request.c new file mode 100644 index 000000000000..d5b2bc6de057 --- /dev/null +++ b/net/handshake/request.c @@ -0,0 +1,339 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Handshake request lifetime events + * + * Author: Chuck Lever + * + * Copyright (c) 2023, Oracle and/or its affiliates. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include "handshake.h" + +#include + +/* + * We need both a handshake_req -> sock mapping, and a sock -> + * handshake_req mapping. Both are one-to-one. + * + * To avoid adding another pointer field to struct sock, net/handshake + * maintains a hash table, indexed by the memory address of @sock, to + * find the struct handshake_req outstanding for that socket. The + * reverse direction uses a simple pointer field in the handshake_req + * struct. + */ + +static struct rhashtable handshake_rhashtbl ____cacheline_aligned_in_smp; + +static const struct rhashtable_params handshake_rhash_params = { + .key_len = sizeof_field(struct handshake_req, hr_sk), + .key_offset = offsetof(struct handshake_req, hr_sk), + .head_offset = offsetof(struct handshake_req, hr_rhash), + .automatic_shrinking = true, +}; + +int handshake_req_hash_init(void) +{ + return rhashtable_init(&handshake_rhashtbl, &handshake_rhash_params); +} + +void handshake_req_hash_destroy(void) +{ + rhashtable_destroy(&handshake_rhashtbl); +} + +struct handshake_req *handshake_req_hash_lookup(struct sock *sk) +{ + return rhashtable_lookup_fast(&handshake_rhashtbl, &sk, + handshake_rhash_params); +} + +static bool handshake_req_hash_add(struct handshake_req *req) +{ + int ret; + + ret = rhashtable_lookup_insert_fast(&handshake_rhashtbl, + &req->hr_rhash, + handshake_rhash_params); + return ret == 0; +} + +static void handshake_req_destroy(struct handshake_req *req) +{ + if (req->hr_proto->hp_destroy) + req->hr_proto->hp_destroy(req); + rhashtable_remove_fast(&handshake_rhashtbl, &req->hr_rhash, + handshake_rhash_params); + kfree(req); +} + +static void handshake_sk_destruct(struct sock *sk) +{ + void (*sk_destruct)(struct sock *sk); + struct handshake_req *req; + + req = handshake_req_hash_lookup(sk); + if (!req) + return; + + trace_handshake_destruct(sock_net(sk), req, sk); + sk_destruct = req->hr_odestruct; + handshake_req_destroy(req); + if (sk_destruct) + sk_destruct(sk); +} + +/** + * handshake_req_alloc - Allocate a handshake request + * @proto: security protocol + * @flags: memory allocation flags + * + * Returns an initialized handshake_req or NULL. + */ +struct handshake_req *handshake_req_alloc(const struct handshake_proto *proto, + gfp_t flags) +{ + struct handshake_req *req; + + if (!proto) + return NULL; + if (proto->hp_handler_class <= HANDSHAKE_HANDLER_CLASS_NONE) + return NULL; + if (proto->hp_handler_class >= HANDSHAKE_HANDLER_CLASS_MAX) + return NULL; + if (!proto->hp_accept || !proto->hp_done) + return NULL; + + req = kzalloc(struct_size(req, hr_priv, proto->hp_privsize), flags); + if (!req) + return NULL; + + INIT_LIST_HEAD(&req->hr_list); + req->hr_proto = proto; + return req; +} +EXPORT_SYMBOL(handshake_req_alloc); + +/** + * handshake_req_private - Get per-handshake private data + * @req: handshake arguments + * + */ +void *handshake_req_private(struct handshake_req *req) +{ + return (void *)&req->hr_priv; +} +EXPORT_SYMBOL(handshake_req_private); + +static bool __add_pending_locked(struct handshake_net *hn, + struct handshake_req *req) +{ + if (WARN_ON_ONCE(!list_empty(&req->hr_list))) + return false; + hn->hn_pending++; + list_add_tail(&req->hr_list, &hn->hn_requests); + return true; +} + +static void __remove_pending_locked(struct handshake_net *hn, + struct handshake_req *req) +{ + hn->hn_pending--; + list_del_init(&req->hr_list); +} + +/* + * Returns %true if the request was found on @net's pending list, + * otherwise %false. + * + * If @req was on a pending list, it has not yet been accepted. + */ +static bool remove_pending(struct handshake_net *hn, struct handshake_req *req) +{ + bool ret = false; + + spin_lock(&hn->hn_lock); + if (!list_empty(&req->hr_list)) { + __remove_pending_locked(hn, req); + ret = true; + } + spin_unlock(&hn->hn_lock); + + return ret; +} + +struct handshake_req *handshake_req_next(struct handshake_net *hn, int class) +{ + struct handshake_req *req, *pos; + + req = NULL; + spin_lock(&hn->hn_lock); + list_for_each_entry(pos, &hn->hn_requests, hr_list) { + if (pos->hr_proto->hp_handler_class != class) + continue; + __remove_pending_locked(hn, pos); + req = pos; + break; + } + spin_unlock(&hn->hn_lock); + + return req; +} + +/** + * handshake_req_submit - Submit a handshake request + * @sock: open socket on which to perform the handshake + * @req: handshake arguments + * @flags: memory allocation flags + * + * Return values: + * %0: Request queued + * %-EINVAL: Invalid argument + * %-EBUSY: A handshake is already under way for this socket + * %-ESRCH: No handshake agent is available + * %-EAGAIN: Too many pending handshake requests + * %-ENOMEM: Failed to allocate memory + * %-EMSGSIZE: Failed to construct notification message + * %-EOPNOTSUPP: Handshake module not initialized + * + * A zero return value from handshake_req_submit() means that + * exactly one subsequent completion callback is guaranteed. + * + * A negative return value from handshake_req_submit() means that + * no completion callback will be done and that @req has been + * destroyed. + */ +int handshake_req_submit(struct socket *sock, struct handshake_req *req, + gfp_t flags) +{ + struct handshake_net *hn; + struct net *net; + int ret; + + if (!sock || !req || !sock->file) { + kfree(req); + return -EINVAL; + } + + req->hr_sk = sock->sk; + if (!req->hr_sk) { + kfree(req); + return -EINVAL; + } + req->hr_odestruct = req->hr_sk->sk_destruct; + req->hr_sk->sk_destruct = handshake_sk_destruct; + + ret = -EOPNOTSUPP; + net = sock_net(req->hr_sk); + hn = handshake_pernet(net); + if (!hn) + goto out_err; + + ret = -EAGAIN; + if (READ_ONCE(hn->hn_pending) >= hn->hn_pending_max) + goto out_err; + + spin_lock(&hn->hn_lock); + ret = -EOPNOTSUPP; + if (test_bit(HANDSHAKE_F_NET_DRAINING, &hn->hn_flags)) + goto out_unlock; + ret = -EBUSY; + if (!handshake_req_hash_add(req)) + goto out_unlock; + if (!__add_pending_locked(hn, req)) + goto out_unlock; + spin_unlock(&hn->hn_lock); + + ret = handshake_genl_notify(net, req->hr_proto, flags); + if (ret) { + trace_handshake_notify_err(net, req, req->hr_sk, ret); + if (remove_pending(hn, req)) + goto out_err; + } + + /* Prevent socket release while a handshake request is pending */ + sock_hold(req->hr_sk); + + trace_handshake_submit(net, req, req->hr_sk); + return 0; + +out_unlock: + spin_unlock(&hn->hn_lock); +out_err: + trace_handshake_submit_err(net, req, req->hr_sk, ret); + handshake_req_destroy(req); + return ret; +} +EXPORT_SYMBOL(handshake_req_submit); + +void handshake_complete(struct handshake_req *req, unsigned int status, + struct genl_info *info) +{ + struct sock *sk = req->hr_sk; + struct net *net = sock_net(sk); + + if (!test_and_set_bit(HANDSHAKE_F_REQ_COMPLETED, &req->hr_flags)) { + trace_handshake_complete(net, req, sk, status); + req->hr_proto->hp_done(req, status, info); + + /* Handshake request is no longer pending */ + sock_put(sk); + } +} + +/** + * handshake_req_cancel - Cancel an in-progress handshake + * @sk: socket on which there is an ongoing handshake + * + * Request cancellation races with request completion. To determine + * who won, callers examine the return value from this function. + * + * Return values: + * %true - Uncompleted handshake request was canceled + * %false - Handshake request already completed or not found + */ +bool handshake_req_cancel(struct sock *sk) +{ + struct handshake_req *req; + struct handshake_net *hn; + struct net *net; + + net = sock_net(sk); + req = handshake_req_hash_lookup(sk); + if (!req) { + trace_handshake_cancel_none(net, req, sk); + return false; + } + + hn = handshake_pernet(net); + if (hn && remove_pending(hn, req)) { + /* Request hadn't been accepted */ + goto out_true; + } + if (test_and_set_bit(HANDSHAKE_F_REQ_COMPLETED, &req->hr_flags)) { + /* Request already completed */ + trace_handshake_cancel_busy(net, req, sk); + return false; + } + +out_true: + trace_handshake_cancel(net, req, sk); + + /* Handshake request is no longer pending */ + sock_put(sk); + return true; +} +EXPORT_SYMBOL(handshake_req_cancel); diff --git a/net/handshake/trace.c b/net/handshake/trace.c new file mode 100644 index 000000000000..1c4d8e27e17a --- /dev/null +++ b/net/handshake/trace.c @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Trace points for transport security layer handshakes. + * + * Author: Chuck Lever + * + * Copyright (c) 2023, Oracle and/or its affiliates. + */ + +#include + +#include +#include +#include + +#include "handshake.h" + +#define CREATE_TRACE_POINTS + +#include -- cgit v1.3.1