diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-06-30 15:51:09 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-06-30 15:51:09 -0700 |
commit | dbe69e43372212527abf48609aba7fc39a6daa27 (patch) | |
tree | 96cfafdf70f5325ceeac1054daf7deca339c9730 /net/netfilter/nfnetlink_hook.c | |
parent | a6eaf3850cb171c328a8b0db6d3c79286a1eba9d (diff) | |
parent | b6df00789e2831fff7a2c65aa7164b2a4dcbe599 (diff) |
Merge tag 'net-next-5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski:
"Core:
- BPF:
- add syscall program type and libbpf support for generating
instructions and bindings for in-kernel BPF loaders (BPF loaders
for BPF), this is a stepping stone for signed BPF programs
- infrastructure to migrate TCP child sockets from one listener to
another in the same reuseport group/map to improve flexibility
of service hand-off/restart
- add broadcast support to XDP redirect
- allow bypass of the lockless qdisc to improving performance (for
pktgen: +23% with one thread, +44% with 2 threads)
- add a simpler version of "DO_ONCE()" which does not require jump
labels, intended for slow-path usage
- virtio/vsock: introduce SOCK_SEQPACKET support
- add getsocketopt to retrieve netns cookie
- ip: treat lowest address of a IPv4 subnet as ordinary unicast
address allowing reclaiming of precious IPv4 addresses
- ipv6: use prandom_u32() for ID generation
- ip: add support for more flexible field selection for hashing
across multi-path routes (w/ offload to mlxsw)
- icmp: add support for extended RFC 8335 PROBE (ping)
- seg6: add support for SRv6 End.DT46 behavior
- mptcp:
- DSS checksum support (RFC 8684) to detect middlebox meddling
- support Connection-time 'C' flag
- time stamping support
- sctp: packetization Layer Path MTU Discovery (RFC 8899)
- xfrm: speed up state addition with seq set
- WiFi:
- hidden AP discovery on 6 GHz and other HE 6 GHz improvements
- aggregation handling improvements for some drivers
- minstrel improvements for no-ack frames
- deferred rate control for TXQs to improve reaction times
- switch from round robin to virtual time-based airtime scheduler
- add trace points:
- tcp checksum errors
- openvswitch - action execution, upcalls
- socket errors via sk_error_report
Device APIs:
- devlink: add rate API for hierarchical control of max egress rate
of virtual devices (VFs, SFs etc.)
- don't require RCU read lock to be held around BPF hooks in NAPI
context
- page_pool: generic buffer recycling
New hardware/drivers:
- mobile:
- iosm: PCIe Driver for Intel M.2 Modem
- support for Qualcomm MSM8998 (ipa)
- WiFi: Qualcomm QCN9074 and WCN6855 PCI devices
- sparx5: Microchip SparX-5 family of Enterprise Ethernet switches
- Mellanox BlueField Gigabit Ethernet (control NIC of the DPU)
- NXP SJA1110 Automotive Ethernet 10-port switch
- Qualcomm QCA8327 switch support (qca8k)
- Mikrotik 10/25G NIC (atl1c)
Driver changes:
- ACPI support for some MDIO, MAC and PHY devices from Marvell and
NXP (our first foray into MAC/PHY description via ACPI)
- HW timestamping (PTP) support: bnxt_en, ice, sja1105, hns3, tja11xx
- Mellanox/Nvidia NIC (mlx5)
- NIC VF offload of L2 bridging
- support IRQ distribution to Sub-functions
- Marvell (prestera):
- add flower and match all
- devlink trap
- link aggregation
- Netronome (nfp): connection tracking offload
- Intel 1GE (igc): add AF_XDP support
- Marvell DPU (octeontx2): ingress ratelimit offload
- Google vNIC (gve): new ring/descriptor format support
- Qualcomm mobile (rmnet & ipa): inline checksum offload support
- MediaTek WiFi (mt76)
- mt7915 MSI support
- mt7915 Tx status reporting
- mt7915 thermal sensors support
- mt7921 decapsulation offload
- mt7921 enable runtime pm and deep sleep
- Realtek WiFi (rtw88)
- beacon filter support
- Tx antenna path diversity support
- firmware crash information via devcoredump
- Qualcomm WiFi (wcn36xx)
- Wake-on-WLAN support with magic packets and GTK rekeying
- Micrel PHY (ksz886x/ksz8081): add cable test support"
* tag 'net-next-5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (2168 commits)
tcp: change ICSK_CA_PRIV_SIZE definition
tcp_yeah: check struct yeah size at compile time
gve: DQO: Fix off by one in gve_rx_dqo()
stmmac: intel: set PCI_D3hot in suspend
stmmac: intel: Enable PHY WOL option in EHL
net: stmmac: option to enable PHY WOL with PMT enabled
net: say "local" instead of "static" addresses in ndo_dflt_fdb_{add,del}
net: use netdev_info in ndo_dflt_fdb_{add,del}
ptp: Set lookup cookie when creating a PTP PPS source.
net: sock: add trace for socket errors
net: sock: introduce sk_error_report
net: dsa: replay the local bridge FDB entries pointing to the bridge dev too
net: dsa: ensure during dsa_fdb_offload_notify that dev_hold and dev_put are on the same dev
net: dsa: include fdb entries pointing to bridge in the host fdb list
net: dsa: include bridge addresses which are local in the host fdb list
net: dsa: sync static FDB entries on foreign interfaces to hardware
net: dsa: install the host MDB and FDB entries in the master's RX filter
net: dsa: reference count the FDB addresses at the cross-chip notifier level
net: dsa: introduce a separate cross-chip notifier type for host FDBs
net: dsa: reference count the MDB entries at the cross-chip notifier level
...
Diffstat (limited to 'net/netfilter/nfnetlink_hook.c')
-rw-r--r-- | net/netfilter/nfnetlink_hook.c | 377 |
1 files changed, 377 insertions, 0 deletions
diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c new file mode 100644 index 000000000000..50b4e3c9347a --- /dev/null +++ b/net/netfilter/nfnetlink_hook.c @@ -0,0 +1,377 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2021 Red Hat GmbH + * + * Author: Florian Westphal <fw@strlen.de> + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/skbuff.h> +#include <linux/errno.h> +#include <linux/netlink.h> +#include <linux/slab.h> + +#include <linux/netfilter.h> + +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/nfnetlink_hook.h> + +#include <net/netfilter/nf_tables.h> +#include <net/sock.h> + +static const struct nla_policy nfnl_hook_nla_policy[NFNLA_HOOK_MAX + 1] = { + [NFNLA_HOOK_HOOKNUM] = { .type = NLA_U32 }, + [NFNLA_HOOK_PRIORITY] = { .type = NLA_U32 }, + [NFNLA_HOOK_DEV] = { .type = NLA_STRING, + .len = IFNAMSIZ - 1 }, + [NFNLA_HOOK_FUNCTION_NAME] = { .type = NLA_NUL_STRING, + .len = KSYM_NAME_LEN, }, + [NFNLA_HOOK_MODULE_NAME] = { .type = NLA_NUL_STRING, + .len = MODULE_NAME_LEN, }, + [NFNLA_HOOK_CHAIN_INFO] = { .type = NLA_NESTED, }, +}; + +static int nf_netlink_dump_start_rcu(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct netlink_dump_control *c) +{ + int err; + + if (!try_module_get(THIS_MODULE)) + return -EINVAL; + + rcu_read_unlock(); + err = netlink_dump_start(nlsk, skb, nlh, c); + rcu_read_lock(); + module_put(THIS_MODULE); + + return err; +} + +struct nfnl_dump_hook_data { + char devname[IFNAMSIZ]; + unsigned long headv; + u8 hook; +}; + +static int nfnl_hook_put_nft_chain_info(struct sk_buff *nlskb, + const struct nfnl_dump_hook_data *ctx, + unsigned int seq, + const struct nf_hook_ops *ops) +{ + struct net *net = sock_net(nlskb->sk); + struct nlattr *nest, *nest2; + struct nft_chain *chain; + int ret = 0; + + if (ops->hook_ops_type != NF_HOOK_OP_NF_TABLES) + return 0; + + chain = ops->priv; + if (WARN_ON_ONCE(!chain)) + return 0; + + if (!nft_is_active(net, chain)) + return 0; + + nest = nla_nest_start(nlskb, NFNLA_HOOK_CHAIN_INFO); + if (!nest) + return -EMSGSIZE; + + ret = nla_put_be32(nlskb, NFNLA_HOOK_INFO_TYPE, + htonl(NFNL_HOOK_TYPE_NFTABLES)); + if (ret) + goto cancel_nest; + + nest2 = nla_nest_start(nlskb, NFNLA_HOOK_INFO_DESC); + if (!nest2) + goto cancel_nest; + + ret = nla_put_string(nlskb, NFTA_CHAIN_TABLE, chain->table->name); + if (ret) + goto cancel_nest; + + ret = nla_put_string(nlskb, NFTA_CHAIN_NAME, chain->name); + if (ret) + goto cancel_nest; + + nla_nest_end(nlskb, nest2); + nla_nest_end(nlskb, nest); + return ret; + +cancel_nest: + nla_nest_cancel(nlskb, nest); + return -EMSGSIZE; +} + +static int nfnl_hook_dump_one(struct sk_buff *nlskb, + const struct nfnl_dump_hook_data *ctx, + const struct nf_hook_ops *ops, + unsigned int seq) +{ + u16 event = nfnl_msg_type(NFNL_SUBSYS_HOOK, NFNL_MSG_HOOK_GET); + unsigned int portid = NETLINK_CB(nlskb).portid; + struct nlmsghdr *nlh; + int ret = -EMSGSIZE; +#ifdef CONFIG_KALLSYMS + char sym[KSYM_SYMBOL_LEN]; + char *module_name; +#endif + nlh = nfnl_msg_put(nlskb, portid, seq, event, + NLM_F_MULTI, ops->pf, NFNETLINK_V0, 0); + if (!nlh) + goto nla_put_failure; + +#ifdef CONFIG_KALLSYMS + ret = snprintf(sym, sizeof(sym), "%ps", ops->hook); + if (ret >= sizeof(sym)) { + ret = -EINVAL; + goto nla_put_failure; + } + + module_name = strstr(sym, " ["); + if (module_name) { + char *end; + + module_name += 2; + end = strchr(module_name, ']'); + if (end) { + *end = 0; + + ret = nla_put_string(nlskb, NFNLA_HOOK_MODULE_NAME, module_name); + if (ret) + goto nla_put_failure; + } + } + + ret = nla_put_string(nlskb, NFNLA_HOOK_FUNCTION_NAME, sym); + if (ret) + goto nla_put_failure; +#endif + + ret = nla_put_be32(nlskb, NFNLA_HOOK_HOOKNUM, htonl(ops->hooknum)); + if (ret) + goto nla_put_failure; + + ret = nla_put_be32(nlskb, NFNLA_HOOK_PRIORITY, htonl(ops->priority)); + if (ret) + goto nla_put_failure; + + ret = nfnl_hook_put_nft_chain_info(nlskb, ctx, seq, ops); + if (ret) + goto nla_put_failure; + + nlmsg_end(nlskb, nlh); + return 0; +nla_put_failure: + nlmsg_trim(nlskb, nlh); + return ret; +} + +static const struct nf_hook_entries * +nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *dev) +{ + const struct nf_hook_entries *hook_head = NULL; + struct net_device *netdev; + + switch (pf) { + case NFPROTO_IPV4: + if (hook >= ARRAY_SIZE(net->nf.hooks_ipv4)) + return ERR_PTR(-EINVAL); + hook_head = rcu_dereference(net->nf.hooks_ipv4[hook]); + break; + case NFPROTO_IPV6: + if (hook >= ARRAY_SIZE(net->nf.hooks_ipv6)) + return ERR_PTR(-EINVAL); + hook_head = rcu_dereference(net->nf.hooks_ipv6[hook]); + break; + case NFPROTO_ARP: +#ifdef CONFIG_NETFILTER_FAMILY_ARP + if (hook >= ARRAY_SIZE(net->nf.hooks_arp)) + return ERR_PTR(-EINVAL); + hook_head = rcu_dereference(net->nf.hooks_arp[hook]); +#endif + break; + case NFPROTO_BRIDGE: +#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE + if (hook >= ARRAY_SIZE(net->nf.hooks_bridge)) + return ERR_PTR(-EINVAL); + hook_head = rcu_dereference(net->nf.hooks_bridge[hook]); +#endif + break; +#if IS_ENABLED(CONFIG_DECNET) + case NFPROTO_DECNET: + if (hook >= ARRAY_SIZE(net->nf.hooks_decnet)) + return ERR_PTR(-EINVAL); + hook_head = rcu_dereference(net->nf.hooks_decnet[hook]); + break; +#endif +#ifdef CONFIG_NETFILTER_INGRESS + case NFPROTO_NETDEV: + if (hook != NF_NETDEV_INGRESS) + return ERR_PTR(-EOPNOTSUPP); + + if (!dev) + return ERR_PTR(-ENODEV); + + netdev = dev_get_by_name_rcu(net, dev); + if (!netdev) + return ERR_PTR(-ENODEV); + + return rcu_dereference(netdev->nf_hooks_ingress); +#endif + default: + return ERR_PTR(-EPROTONOSUPPORT); + } + + return hook_head; +} + +static int nfnl_hook_dump(struct sk_buff *nlskb, + struct netlink_callback *cb) +{ + struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + struct nfnl_dump_hook_data *ctx = cb->data; + int err, family = nfmsg->nfgen_family; + struct net *net = sock_net(nlskb->sk); + struct nf_hook_ops * const *ops; + const struct nf_hook_entries *e; + unsigned int i = cb->args[0]; + + rcu_read_lock(); + + e = nfnl_hook_entries_head(family, ctx->hook, net, ctx->devname); + if (!e) + goto done; + + if (IS_ERR(e)) { + cb->seq++; + goto done; + } + + if ((unsigned long)e != ctx->headv || i >= e->num_hook_entries) + cb->seq++; + + ops = nf_hook_entries_get_hook_ops(e); + + for (; i < e->num_hook_entries; i++) { + err = nfnl_hook_dump_one(nlskb, ctx, ops[i], cb->seq); + if (err) + break; + } + +done: + nl_dump_check_consistent(cb, nlmsg_hdr(nlskb)); + rcu_read_unlock(); + cb->args[0] = i; + return nlskb->len; +} + +static int nfnl_hook_dump_start(struct netlink_callback *cb) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + const struct nlattr * const *nla = cb->data; + struct nfnl_dump_hook_data *ctx = NULL; + struct net *net = sock_net(cb->skb->sk); + u8 family = nfmsg->nfgen_family; + char name[IFNAMSIZ] = ""; + const void *head; + u32 hooknum; + + hooknum = ntohl(nla_get_be32(nla[NFNLA_HOOK_HOOKNUM])); + if (hooknum > 255) + return -EINVAL; + + if (family == NFPROTO_NETDEV) { + if (!nla[NFNLA_HOOK_DEV]) + return -EINVAL; + + nla_strscpy(name, nla[NFNLA_HOOK_DEV], sizeof(name)); + } + + rcu_read_lock(); + /* Not dereferenced; for consistency check only */ + head = nfnl_hook_entries_head(family, hooknum, net, name); + rcu_read_unlock(); + + if (head && IS_ERR(head)) + return PTR_ERR(head); + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + strscpy(ctx->devname, name, sizeof(ctx->devname)); + ctx->headv = (unsigned long)head; + ctx->hook = hooknum; + + cb->seq = 1; + cb->data = ctx; + + return 0; +} + +static int nfnl_hook_dump_stop(struct netlink_callback *cb) +{ + kfree(cb->data); + return 0; +} + +static int nfnl_hook_get(struct sk_buff *skb, + const struct nfnl_info *info, + const struct nlattr * const nla[]) +{ + if (!nla[NFNLA_HOOK_HOOKNUM]) + return -EINVAL; + + if (info->nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .start = nfnl_hook_dump_start, + .done = nfnl_hook_dump_stop, + .dump = nfnl_hook_dump, + .module = THIS_MODULE, + .data = (void *)nla, + }; + + return nf_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); + } + + return -EOPNOTSUPP; +} + +static const struct nfnl_callback nfnl_hook_cb[NFNL_MSG_HOOK_MAX] = { + [NFNL_MSG_HOOK_GET] = { + .call = nfnl_hook_get, + .type = NFNL_CB_RCU, + .attr_count = NFNLA_HOOK_MAX, + .policy = nfnl_hook_nla_policy + }, +}; + +static const struct nfnetlink_subsystem nfhook_subsys = { + .name = "nfhook", + .subsys_id = NFNL_SUBSYS_HOOK, + .cb_count = NFNL_MSG_HOOK_MAX, + .cb = nfnl_hook_cb, +}; + +MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_HOOK); + +static int __init nfnetlink_hook_init(void) +{ + return nfnetlink_subsys_register(&nfhook_subsys); +} + +static void __exit nfnetlink_hook_exit(void) +{ + nfnetlink_subsys_unregister(&nfhook_subsys); +} + +module_init(nfnetlink_hook_init); +module_exit(nfnetlink_hook_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); +MODULE_DESCRIPTION("nfnetlink_hook: list registered netfilter hooks"); |