From 49b02a19c23a6541026ae8d36fee85ce8af11b60 Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Fri, 27 Oct 2023 08:50:43 -0700 Subject: net: sched: Fill in MODULE_DESCRIPTION for act_gate W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Gate is the only TC action that is lacking such description. Fill MODULE_DESCRIPTION for Gate TC ACTION. Signed-off-by: Victor Nogueira Acked-by: Jamal Hadi Salim Reviewed-by: Vinicius Costa Gomes Link: https://lore.kernel.org/r/20231027155045.46291-2-victor@mojatatu.com Signed-off-by: Jakub Kicinski --- net/sched/act_gate.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c index c9a811f4c7ee..393b78729216 100644 --- a/net/sched/act_gate.c +++ b/net/sched/act_gate.c @@ -677,4 +677,5 @@ static void __exit gate_cleanup_module(void) module_init(gate_init_module); module_exit(gate_cleanup_module); +MODULE_DESCRIPTION("TC gate action"); MODULE_LICENSE("GPL v2"); -- cgit v1.2.3-70-g09d2 From a9c92771fa23b263ac747fc5a12e0e233aed23d5 Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Fri, 27 Oct 2023 08:50:44 -0700 Subject: net: sched: Fill in missing MODULE_DESCRIPTION for classifiers W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Fill in missing MODULE_DESCRIPTIONs for TC classifiers. Signed-off-by: Victor Nogueira Acked-by: Jamal Hadi Salim Reviewed-by: Vinicius Costa Gomes Link: https://lore.kernel.org/r/20231027155045.46291-3-victor@mojatatu.com Signed-off-by: Jakub Kicinski --- net/sched/cls_basic.c | 1 + net/sched/cls_cgroup.c | 1 + net/sched/cls_fw.c | 1 + net/sched/cls_route.c | 1 + net/sched/cls_u32.c | 1 + 5 files changed, 5 insertions(+) (limited to 'net') diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 1b92c33b5f81..a1f56931330c 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -341,4 +341,5 @@ static void __exit exit_basic(void) module_init(init_basic) module_exit(exit_basic) +MODULE_DESCRIPTION("TC basic classifier"); MODULE_LICENSE("GPL"); diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index bd9322d71910..7ee8dbf49ed0 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -222,4 +222,5 @@ static void __exit exit_cgroup_cls(void) module_init(init_cgroup_cls); module_exit(exit_cgroup_cls); +MODULE_DESCRIPTION("TC cgroup classifier"); MODULE_LICENSE("GPL"); diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index c49d6af0e048..afc534ee0a18 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -446,4 +446,5 @@ static void __exit exit_fw(void) module_init(init_fw) module_exit(exit_fw) +MODULE_DESCRIPTION("SKB mark based TC classifier"); MODULE_LICENSE("GPL"); diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 1424bfeaca73..12a505db4183 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -684,4 +684,5 @@ static void __exit exit_route4(void) module_init(init_route4) module_exit(exit_route4) +MODULE_DESCRIPTION("Routing table realm based TC classifier"); MODULE_LICENSE("GPL"); diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 6663e971a13e..d5bdfd4a7655 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -1489,4 +1489,5 @@ static void __exit exit_u32(void) module_init(init_u32) module_exit(exit_u32) +MODULE_DESCRIPTION("Universal 32bit based TC Classifier"); MODULE_LICENSE("GPL"); -- cgit v1.2.3-70-g09d2 From f96118c5d86f03d81bc24c7941f133ae5dd56a7b Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Fri, 27 Oct 2023 08:50:45 -0700 Subject: net: sched: Fill in missing MODULE_DESCRIPTION for qdiscs W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Fill in missing MODULE_DESCRIPTIONs for TC qdiscs. Signed-off-by: Victor Nogueira Acked-by: Jamal Hadi Salim Reviewed-by: Vinicius Costa Gomes Link: https://lore.kernel.org/r/20231027155045.46291-4-victor@mojatatu.com Signed-off-by: Jakub Kicinski --- net/sched/sch_cbs.c | 1 + net/sched/sch_choke.c | 1 + net/sched/sch_drr.c | 1 + net/sched/sch_etf.c | 1 + net/sched/sch_ets.c | 1 + net/sched/sch_fifo.c | 1 + net/sched/sch_gred.c | 1 + net/sched/sch_hfsc.c | 1 + net/sched/sch_htb.c | 1 + net/sched/sch_ingress.c | 1 + net/sched/sch_mqprio.c | 1 + net/sched/sch_mqprio_lib.c | 1 + net/sched/sch_multiq.c | 1 + net/sched/sch_netem.c | 1 + net/sched/sch_plug.c | 1 + net/sched/sch_prio.c | 1 + net/sched/sch_qfq.c | 1 + net/sched/sch_red.c | 1 + net/sched/sch_sfq.c | 1 + net/sched/sch_skbprio.c | 1 + net/sched/sch_taprio.c | 1 + net/sched/sch_tbf.c | 1 + net/sched/sch_teql.c | 1 + 23 files changed, 23 insertions(+) (limited to 'net') diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c index cac870eb7897..9a0b85190a2c 100644 --- a/net/sched/sch_cbs.c +++ b/net/sched/sch_cbs.c @@ -574,3 +574,4 @@ static void __exit cbs_module_exit(void) module_init(cbs_module_init) module_exit(cbs_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Credit Based shaper"); diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index 19c851125901..ae1da08e268f 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -513,3 +513,4 @@ module_init(choke_module_init) module_exit(choke_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Choose and keep responsive flows scheduler"); diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 19901e77cd3b..097740a9afea 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -495,3 +495,4 @@ static void __exit drr_exit(void) module_init(drr_init); module_exit(drr_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Deficit Round Robin scheduler"); diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c index 61d1f0e32cf3..4808159a5466 100644 --- a/net/sched/sch_etf.c +++ b/net/sched/sch_etf.c @@ -513,3 +513,4 @@ static void __exit etf_module_exit(void) module_init(etf_module_init) module_exit(etf_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Earliest TxTime First (ETF) qdisc"); diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index b10efeaf0629..f7c88495946b 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -826,3 +826,4 @@ static void __exit ets_exit(void) module_init(ets_init); module_exit(ets_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Enhanced Transmission Selection(ETS) scheduler"); diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index e1040421b797..450f5c67ac49 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -269,3 +269,4 @@ struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops, return q ? : ERR_PTR(err); } EXPORT_SYMBOL(fifo_create_dflt); +MODULE_DESCRIPTION("Single queue packet and byte based First In First Out(P/BFIFO) scheduler"); diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 872d127c9db4..8c61eb3dc943 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -945,3 +945,4 @@ module_init(gred_module_init) module_exit(gred_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Generic Random Early Detection qdisc"); diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 880c5f16b29c..16c45da4036a 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1693,5 +1693,6 @@ hfsc_cleanup(void) } MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Hierarchical Fair Service Curve scheduler"); module_init(hfsc_init); module_exit(hfsc_cleanup); diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 0d947414e616..7349233eaa9b 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -2179,3 +2179,4 @@ static void __exit htb_module_exit(void) module_init(htb_module_init) module_exit(htb_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Hierarchical Token Bucket scheduler"); diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index a463a63192c3..5fa9eaa79bfc 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -370,3 +370,4 @@ module_exit(ingress_module_exit); MODULE_ALIAS("sch_clsact"); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Ingress and clsact based ingress and egress qdiscs"); diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 793009f445c0..43e53ee00a56 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -789,3 +789,4 @@ module_init(mqprio_module_init); module_exit(mqprio_module_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Classful multiqueue prio qdisc"); diff --git a/net/sched/sch_mqprio_lib.c b/net/sched/sch_mqprio_lib.c index 83b3793c4012..b3a5572c167b 100644 --- a/net/sched/sch_mqprio_lib.c +++ b/net/sched/sch_mqprio_lib.c @@ -129,3 +129,4 @@ void mqprio_fp_to_offload(u32 fp[TC_QOPT_MAX_QUEUE], EXPORT_SYMBOL_GPL(mqprio_fp_to_offload); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Shared mqprio qdisc code currently between taprio and mqprio"); diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 75c9c860182b..d66d5f0ec080 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -410,3 +410,4 @@ module_init(multiq_module_init) module_exit(multiq_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Multi queue to hardware queue mapping qdisc"); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 6ba2dc191ed9..fa678eb88528 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -1307,3 +1307,4 @@ static void __exit netem_module_exit(void) module_init(netem_module_init) module_exit(netem_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Network characteristics emulator qdisc"); diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c index 35f49edf63db..992f0c8d7988 100644 --- a/net/sched/sch_plug.c +++ b/net/sched/sch_plug.c @@ -226,3 +226,4 @@ static void __exit plug_module_exit(void) module_init(plug_module_init) module_exit(plug_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Qdisc to plug and unplug traffic via netlink control"); diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index fdc5ef52c3ee..8ecdd3ef6f8e 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -433,3 +433,4 @@ module_init(prio_module_init) module_exit(prio_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Simple 3-band priority qdisc"); diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 28315166fe8e..48a604c320c7 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -1535,3 +1535,4 @@ static void __exit qfq_exit(void) module_init(qfq_init); module_exit(qfq_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Quick Fair Queueing Plus qdisc"); diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 16277b6a0238..607b6c8b3a9b 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -563,3 +563,4 @@ module_init(red_module_init) module_exit(red_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Random Early Detection qdisc"); diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 66dcb18638fe..eb77558fa367 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -937,3 +937,4 @@ static void __exit sfq_module_exit(void) module_init(sfq_module_init) module_exit(sfq_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Stochastic Fairness qdisc"); diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c index 5df2dacb7b1a..28beb11762d8 100644 --- a/net/sched/sch_skbprio.c +++ b/net/sched/sch_skbprio.c @@ -307,3 +307,4 @@ module_init(skbprio_module_init) module_exit(skbprio_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SKB priority based scheduling qdisc"); diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 2e1949de4171..31a8252bd09c 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -2572,3 +2572,4 @@ static void __exit taprio_module_exit(void) module_init(taprio_module_init); module_exit(taprio_module_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Time Aware Priority qdisc"); diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 17d2d00ddb18..dd6b1a723bf7 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -621,3 +621,4 @@ static void __exit tbf_module_exit(void) module_init(tbf_module_init) module_exit(tbf_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Token Bucket Filter qdisc"); diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 7721239c185f..59304611dc00 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -523,3 +523,4 @@ module_init(teql_init); module_exit(teql_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("True (or trivial) link equalizer qdisc"); -- cgit v1.2.3-70-g09d2 From 05f0431bb90f2ee3657e7fc2678f11a1f9b778b7 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 30 Oct 2023 17:17:50 +0100 Subject: netlink: specs: devlink: add forgotten port function caps enum values Add two enum values that the blamed commit omitted. Fixes: f2f9dd164db0 ("netlink: specs: devlink: add the remaining command to generate complete split_ops") Signed-off-by: Jiri Pirko Link: https://lore.kernel.org/r/20231030161750.110420-1-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/devlink.yaml | 4 ++++ net/devlink/netlink_gen.c | 2 +- tools/net/ynl/generated/devlink-user.c | 2 ++ 3 files changed, 7 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml index c6ba4889575a..572d83a414d0 100644 --- a/Documentation/netlink/specs/devlink.yaml +++ b/Documentation/netlink/specs/devlink.yaml @@ -71,6 +71,10 @@ definitions: name: roce-bit - name: migratable-bit + - + name: ipsec-crypto-bit + - + name: ipsec-packet-bit - type: enum name: sb-threshold-type diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c index 9cbae0169249..788dfdc498a9 100644 --- a/net/devlink/netlink_gen.c +++ b/net/devlink/netlink_gen.c @@ -15,7 +15,7 @@ const struct nla_policy devlink_dl_port_function_nl_policy[DEVLINK_PORT_FN_ATTR_ [DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] = { .type = NLA_BINARY, }, [DEVLINK_PORT_FN_ATTR_STATE] = NLA_POLICY_MAX(NLA_U8, 1), [DEVLINK_PORT_FN_ATTR_OPSTATE] = NLA_POLICY_MAX(NLA_U8, 1), - [DEVLINK_PORT_FN_ATTR_CAPS] = NLA_POLICY_BITFIELD32(3), + [DEVLINK_PORT_FN_ATTR_CAPS] = NLA_POLICY_BITFIELD32(15), }; const struct nla_policy devlink_dl_selftest_id_nl_policy[DEVLINK_ATTR_SELFTEST_ID_FLASH + 1] = { diff --git a/tools/net/ynl/generated/devlink-user.c b/tools/net/ynl/generated/devlink-user.c index 75b744b47986..bc5065bd99b2 100644 --- a/tools/net/ynl/generated/devlink-user.c +++ b/tools/net/ynl/generated/devlink-user.c @@ -121,6 +121,8 @@ const char *devlink_port_fn_opstate_str(enum devlink_port_fn_opstate value) static const char * const devlink_port_fn_attr_cap_strmap[] = { [0] = "roce-bit", [1] = "migratable-bit", + [2] = "ipsec-crypto-bit", + [3] = "ipsec-packet-bit", }; const char *devlink_port_fn_attr_cap_str(enum devlink_port_fn_attr_cap value) -- cgit v1.2.3-70-g09d2 From 7b3ba18703a63f6fd487183b9262b08e5632da1b Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 25 Oct 2023 19:42:38 -0400 Subject: llc: verify mac len before reading mac header LLC reads the mac header with eth_hdr without verifying that the skb has an Ethernet header. Syzbot was able to enter llc_rcv on a tun device. Tun can insert packets without mac len and with user configurable skb->protocol (passing a tun_pi header when not configuring IFF_NO_PI). BUG: KMSAN: uninit-value in llc_station_ac_send_test_r net/llc/llc_station.c:81 [inline] BUG: KMSAN: uninit-value in llc_station_rcv+0x6fb/0x1290 net/llc/llc_station.c:111 llc_station_ac_send_test_r net/llc/llc_station.c:81 [inline] llc_station_rcv+0x6fb/0x1290 net/llc/llc_station.c:111 llc_rcv+0xc5d/0x14a0 net/llc/llc_input.c:218 __netif_receive_skb_one_core net/core/dev.c:5523 [inline] __netif_receive_skb+0x1a6/0x5a0 net/core/dev.c:5637 netif_receive_skb_internal net/core/dev.c:5723 [inline] netif_receive_skb+0x58/0x660 net/core/dev.c:5782 tun_rx_batched+0x3ee/0x980 drivers/net/tun.c:1555 tun_get_user+0x54c5/0x69c0 drivers/net/tun.c:2002 Add a mac_len test before all three eth_hdr(skb) calls under net/llc. There are further uses in include/net/llc_pdu.h. All these are protected by a test skb->protocol == ETH_P_802_2. Which does not protect against this tun scenario. But the mac_len test added in this patch in llc_fixup_skb will indirectly protect those too. That is called from llc_rcv before any other LLC code. It is tempting to just add a blanket mac_len check in llc_rcv, but not sure whether that could break valid LLC paths that do not assume an Ethernet header. 802.2 LLC may be used on top of non-802.3 protocols in principle. The below referenced commit shows that used to, on top of Token Ring. At least one of the three eth_hdr uses goes back to before the start of git history. But the one that syzbot exercises is introduced in this commit. That commit is old enough (2008), that effectively all stable kernels should receive this. Fixes: f83f1768f833 ("[LLC]: skb allocation size for responses") Reported-by: syzbot+a8c7be6dee0de1b669cc@syzkaller.appspotmail.com Signed-off-by: Willem de Bruijn Link: https://lore.kernel.org/r/20231025234251.3796495-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- net/llc/llc_input.c | 10 ++++++++-- net/llc/llc_s_ac.c | 3 +++ net/llc/llc_station.c | 3 +++ 3 files changed, 14 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c index 7cac441862e2..51bccfb00a9c 100644 --- a/net/llc/llc_input.c +++ b/net/llc/llc_input.c @@ -127,8 +127,14 @@ static inline int llc_fixup_skb(struct sk_buff *skb) skb->transport_header += llc_len; skb_pull(skb, llc_len); if (skb->protocol == htons(ETH_P_802_2)) { - __be16 pdulen = eth_hdr(skb)->h_proto; - s32 data_size = ntohs(pdulen) - llc_len; + __be16 pdulen; + s32 data_size; + + if (skb->mac_len < ETH_HLEN) + return 0; + + pdulen = eth_hdr(skb)->h_proto; + data_size = ntohs(pdulen) - llc_len; if (data_size < 0 || !pskb_may_pull(skb, data_size)) diff --git a/net/llc/llc_s_ac.c b/net/llc/llc_s_ac.c index 79d1cef8f15a..06fb8e6944b0 100644 --- a/net/llc/llc_s_ac.c +++ b/net/llc/llc_s_ac.c @@ -153,6 +153,9 @@ int llc_sap_action_send_test_r(struct llc_sap *sap, struct sk_buff *skb) int rc = 1; u32 data_size; + if (skb->mac_len < ETH_HLEN) + return 1; + llc_pdu_decode_sa(skb, mac_da); llc_pdu_decode_da(skb, mac_sa); llc_pdu_decode_ssap(skb, &dsap); diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c index 05c6ae092053..f50654292510 100644 --- a/net/llc/llc_station.c +++ b/net/llc/llc_station.c @@ -76,6 +76,9 @@ static int llc_station_ac_send_test_r(struct sk_buff *skb) u32 data_size; struct sk_buff *nskb; + if (skb->mac_len < ETH_HLEN) + goto out; + /* The test request command is type U (llc_len = 3) */ data_size = ntohs(eth_hdr(skb)->h_proto) - 3; nskb = llc_alloc_frame(NULL, skb->dev, LLC_PDU_TYPE_U, data_size); -- cgit v1.2.3-70-g09d2 From 876f8ab52363f649bcc74072157dfd7adfbabc0d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 27 Oct 2023 15:19:01 +0300 Subject: hsr: Prevent use after free in prp_create_tagged_frame() The prp_fill_rct() function can fail. In that situation, it frees the skb and returns NULL. Meanwhile on the success path, it returns the original skb. So it's straight forward to fix bug by using the returned value. Fixes: 451d8123f897 ("net: prp: add packet handling support") Signed-off-by: Dan Carpenter Acked-by: Paolo Abeni Link: https://lore.kernel.org/r/57af1f28-7f57-4a96-bcd3-b7a0f2340845@moroto.mountain Signed-off-by: Jakub Kicinski --- net/hsr/hsr_forward.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index b71dab630a87..80cdc6f6b34c 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -342,9 +342,7 @@ struct sk_buff *prp_create_tagged_frame(struct hsr_frame_info *frame, skb = skb_copy_expand(frame->skb_std, 0, skb_tailroom(frame->skb_std) + HSR_HLEN, GFP_ATOMIC); - prp_fill_rct(skb, frame, port); - - return skb; + return prp_fill_rct(skb, frame, port); } static void hsr_deliver_master(struct sk_buff *skb, struct net_device *dev, -- cgit v1.2.3-70-g09d2 From 19b3f72a41a8751e26bffc093bb7e1cef29ad579 Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Mon, 30 Oct 2023 16:55:40 +0900 Subject: tipc: Change nla_policy for bearer-related names to NLA_NUL_STRING syzbot reported the following uninit-value access issue [1]: ===================================================== BUG: KMSAN: uninit-value in strlen lib/string.c:418 [inline] BUG: KMSAN: uninit-value in strstr+0xb8/0x2f0 lib/string.c:756 strlen lib/string.c:418 [inline] strstr+0xb8/0x2f0 lib/string.c:756 tipc_nl_node_reset_link_stats+0x3ea/0xb50 net/tipc/node.c:2595 genl_family_rcv_msg_doit net/netlink/genetlink.c:971 [inline] genl_family_rcv_msg net/netlink/genetlink.c:1051 [inline] genl_rcv_msg+0x11ec/0x1290 net/netlink/genetlink.c:1066 netlink_rcv_skb+0x371/0x650 net/netlink/af_netlink.c:2545 genl_rcv+0x40/0x60 net/netlink/genetlink.c:1075 netlink_unicast_kernel net/netlink/af_netlink.c:1342 [inline] netlink_unicast+0xf47/0x1250 net/netlink/af_netlink.c:1368 netlink_sendmsg+0x1238/0x13d0 net/netlink/af_netlink.c:1910 sock_sendmsg_nosec net/socket.c:730 [inline] sock_sendmsg net/socket.c:753 [inline] ____sys_sendmsg+0x9c2/0xd60 net/socket.c:2541 ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2595 __sys_sendmsg net/socket.c:2624 [inline] __do_sys_sendmsg net/socket.c:2633 [inline] __se_sys_sendmsg net/socket.c:2631 [inline] __x64_sys_sendmsg+0x307/0x490 net/socket.c:2631 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Uninit was created at: slab_post_alloc_hook+0x12f/0xb70 mm/slab.h:767 slab_alloc_node mm/slub.c:3478 [inline] kmem_cache_alloc_node+0x577/0xa80 mm/slub.c:3523 kmalloc_reserve+0x13d/0x4a0 net/core/skbuff.c:559 __alloc_skb+0x318/0x740 net/core/skbuff.c:650 alloc_skb include/linux/skbuff.h:1286 [inline] netlink_alloc_large_skb net/netlink/af_netlink.c:1214 [inline] netlink_sendmsg+0xb34/0x13d0 net/netlink/af_netlink.c:1885 sock_sendmsg_nosec net/socket.c:730 [inline] sock_sendmsg net/socket.c:753 [inline] ____sys_sendmsg+0x9c2/0xd60 net/socket.c:2541 ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2595 __sys_sendmsg net/socket.c:2624 [inline] __do_sys_sendmsg net/socket.c:2633 [inline] __se_sys_sendmsg net/socket.c:2631 [inline] __x64_sys_sendmsg+0x307/0x490 net/socket.c:2631 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd TIPC bearer-related names including link names must be null-terminated strings. If a link name which is not null-terminated is passed through netlink, strstr() and similar functions can cause buffer overrun. This causes the above issue. This patch changes the nla_policy for bearer-related names from NLA_STRING to NLA_NUL_STRING. This resolves the issue by ensuring that only null-terminated strings are accepted as bearer-related names. syzbot reported similar uninit-value issue related to bearer names [2]. The root cause of this issue is that a non-null-terminated bearer name was passed. This patch also resolved this issue. Fixes: 7be57fc69184 ("tipc: add link get/dump to new netlink api") Fixes: 0655f6a8635b ("tipc: add bearer disable/enable to new netlink api") Reported-and-tested-by: syzbot+5138ca807af9d2b42574@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=5138ca807af9d2b42574 [1] Reported-and-tested-by: syzbot+9425c47dccbcb4c17d51@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=9425c47dccbcb4c17d51 [2] Signed-off-by: Shigeru Yoshida Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20231030075540.3784537-1-syoshida@redhat.com Signed-off-by: Jakub Kicinski --- net/tipc/netlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index e8fd257c0e68..1a9a5bdaccf4 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -88,7 +88,7 @@ const struct nla_policy tipc_nl_net_policy[TIPC_NLA_NET_MAX + 1] = { const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = { [TIPC_NLA_LINK_UNSPEC] = { .type = NLA_UNSPEC }, - [TIPC_NLA_LINK_NAME] = { .type = NLA_STRING, + [TIPC_NLA_LINK_NAME] = { .type = NLA_NUL_STRING, .len = TIPC_MAX_LINK_NAME }, [TIPC_NLA_LINK_MTU] = { .type = NLA_U32 }, [TIPC_NLA_LINK_BROADCAST] = { .type = NLA_FLAG }, @@ -125,7 +125,7 @@ const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = { const struct nla_policy tipc_nl_bearer_policy[TIPC_NLA_BEARER_MAX + 1] = { [TIPC_NLA_BEARER_UNSPEC] = { .type = NLA_UNSPEC }, - [TIPC_NLA_BEARER_NAME] = { .type = NLA_STRING, + [TIPC_NLA_BEARER_NAME] = { .type = NLA_NUL_STRING, .len = TIPC_MAX_BEARER_NAME }, [TIPC_NLA_BEARER_PROP] = { .type = NLA_NESTED }, [TIPC_NLA_BEARER_DOMAIN] = { .type = NLA_U32 } -- cgit v1.2.3-70-g09d2 From 74da77921333171766031ea213b11f1e650814f9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 31 Oct 2023 12:51:09 +0300 Subject: net/tcp_sigpool: Fix some off by one bugs The "cpool_populated" variable is the number of elements in the cpool[] array that have been populated. It is incremented in tcp_sigpool_alloc_ahash() every time we populate a new element. Unpopulated elements are NULL but if we have populated every element then this code will read one element beyond the end of the array. Fixes: 8c73b26315aa ("net/tcp: Prepare tcp_md5sig_pool for TCP-AO") Signed-off-by: Dan Carpenter Reviewed-by: Dmitry Safonov Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/ce915d61-04bc-44fb-b450-35fcc9fc8831@moroto.mountain Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_sigpool.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_sigpool.c b/net/ipv4/tcp_sigpool.c index 65a8eaae2fec..55b310a722c7 100644 --- a/net/ipv4/tcp_sigpool.c +++ b/net/ipv4/tcp_sigpool.c @@ -231,7 +231,7 @@ static void cpool_schedule_cleanup(struct kref *kref) */ void tcp_sigpool_release(unsigned int id) { - if (WARN_ON_ONCE(id > cpool_populated || !cpool[id].alg)) + if (WARN_ON_ONCE(id >= cpool_populated || !cpool[id].alg)) return; /* slow-path */ @@ -245,7 +245,7 @@ EXPORT_SYMBOL_GPL(tcp_sigpool_release); */ void tcp_sigpool_get(unsigned int id) { - if (WARN_ON_ONCE(id > cpool_populated || !cpool[id].alg)) + if (WARN_ON_ONCE(id >= cpool_populated || !cpool[id].alg)) return; kref_get(&cpool[id].kref); } @@ -256,7 +256,7 @@ int tcp_sigpool_start(unsigned int id, struct tcp_sigpool *c) __cond_acquires(RC struct crypto_ahash *hash; rcu_read_lock_bh(); - if (WARN_ON_ONCE(id > cpool_populated || !cpool[id].alg)) { + if (WARN_ON_ONCE(id >= cpool_populated || !cpool[id].alg)) { rcu_read_unlock_bh(); return -EINVAL; } @@ -301,7 +301,7 @@ EXPORT_SYMBOL_GPL(tcp_sigpool_end); */ size_t tcp_sigpool_algo(unsigned int id, char *buf, size_t buf_len) { - if (WARN_ON_ONCE(id > cpool_populated || !cpool[id].alg)) + if (WARN_ON_ONCE(id >= cpool_populated || !cpool[id].alg)) return -EINVAL; return strscpy(buf, cpool[id].alg, buf_len); -- cgit v1.2.3-70-g09d2 From 61e4a86600029e6e8d468d1fad6b6c749bebed19 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 27 Oct 2023 00:49:34 +0100 Subject: rxrpc: Fix two connection reaping bugs Fix two connection reaping bugs: (1) rxrpc_connection_expiry is in units of seconds, so rxrpc_disconnect_call() needs to multiply it by HZ when adding it to jiffies. (2) rxrpc_client_conn_reap_timeout() should set RXRPC_CLIENT_REAP_TIMER if local->kill_all_client_conns is clear, not if it is set (in which case we don't need the timer). Without this, old client connections don't get cleaned up until the local endpoint is cleaned up. Fixes: 5040011d073d ("rxrpc: Make the local endpoint hold a ref on a connected call") Fixes: 0d6bf319bc5a ("rxrpc: Move the client conn cache management to the I/O thread") Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/783911.1698364174@warthog.procyon.org.uk Signed-off-by: Jakub Kicinski --- net/rxrpc/conn_object.c | 2 +- net/rxrpc/local_object.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index ac85d4644a3c..df8a271948a1 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -212,7 +212,7 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) conn->idle_timestamp = jiffies; if (atomic_dec_and_test(&conn->active)) rxrpc_set_service_reap_timer(conn->rxnet, - jiffies + rxrpc_connection_expiry); + jiffies + rxrpc_connection_expiry * HZ); } rxrpc_put_call(call, rxrpc_call_put_io_thread); diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 7d910aee4f8c..c553a30e9c83 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -87,7 +87,7 @@ static void rxrpc_client_conn_reap_timeout(struct timer_list *timer) struct rxrpc_local *local = container_of(timer, struct rxrpc_local, client_conn_reap_timer); - if (local->kill_all_client_conns && + if (!local->kill_all_client_conns && test_and_set_bit(RXRPC_CLIENT_CONN_REAP_TIMER, &local->client_conn_flags)) rxrpc_wake_up_io_thread(local); } -- cgit v1.2.3-70-g09d2 From 391145ba2accc48b596f3d438af1a6255b62a555 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Tue, 31 Oct 2023 14:56:24 -0700 Subject: bpf: Add __bpf_kfunc_{start,end}_defs macros BPF kfuncs are meant to be called from BPF programs. Accordingly, most kfuncs are not called from anywhere in the kernel, which the -Wmissing-prototypes warning is unhappy about. We've peppered __diag_ignore_all("-Wmissing-prototypes", ... everywhere kfuncs are defined in the codebase to suppress this warning. This patch adds two macros meant to bound one or many kfunc definitions. All existing kfunc definitions which use these __diag calls to suppress -Wmissing-prototypes are migrated to use the newly-introduced macros. A new __diag_ignore_all - for "-Wmissing-declarations" - is added to the __bpf_kfunc_start_defs macro based on feedback from Andrii on an earlier version of this patch [0] and another recent mailing list thread [1]. In the future we might need to ignore different warnings or do other kfunc-specific things. This change will make it easier to make such modifications for all kfunc defs. [0]: https://lore.kernel.org/bpf/CAEf4BzaE5dRWtK6RPLnjTW-MW9sx9K3Fn6uwqCTChK2Dcb1Xig@mail.gmail.com/ [1]: https://lore.kernel.org/bpf/ZT+2qCc%2FaXep0%2FLf@krava/ Signed-off-by: Dave Marchevsky Suggested-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Cc: Jiri Olsa Acked-by: Jiri Olsa Acked-by: David Vernet Acked-by: Yafang Shao Link: https://lore.kernel.org/r/20231031215625.2343848-1-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- Documentation/bpf/kfuncs.rst | 6 ++---- include/linux/btf.h | 9 +++++++++ kernel/bpf/bpf_iter.c | 6 ++---- kernel/bpf/cgroup_iter.c | 6 ++---- kernel/bpf/cpumask.c | 6 ++---- kernel/bpf/helpers.c | 6 ++---- kernel/bpf/map_iter.c | 6 ++---- kernel/bpf/task_iter.c | 18 ++++++------------ kernel/trace/bpf_trace.c | 6 ++---- net/bpf/test_run.c | 7 +++---- net/core/filter.c | 13 ++++--------- net/core/xdp.c | 6 ++---- net/ipv4/fou_bpf.c | 6 ++---- net/netfilter/nf_conntrack_bpf.c | 6 ++---- net/netfilter/nf_nat_bpf.c | 6 ++---- net/xfrm/xfrm_interface_bpf.c | 6 ++---- 16 files changed, 46 insertions(+), 73 deletions(-) (limited to 'net') diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst index 0d2647fb358d..723408e399ab 100644 --- a/Documentation/bpf/kfuncs.rst +++ b/Documentation/bpf/kfuncs.rst @@ -37,16 +37,14 @@ prototype in a header for the wrapper kfunc. An example is given below:: /* Disables missing prototype warnings */ - __diag_push(); - __diag_ignore_all("-Wmissing-prototypes", - "Global kfuncs as their definitions will be in BTF"); + __bpf_kfunc_start_defs(); __bpf_kfunc struct task_struct *bpf_find_get_task_by_vpid(pid_t nr) { return find_get_task_by_vpid(nr); } - __diag_pop(); + __bpf_kfunc_end_defs(); A wrapper kfunc is often needed when we need to annotate parameters of the kfunc. Otherwise one may directly make the kfunc visible to the BPF program by diff --git a/include/linux/btf.h b/include/linux/btf.h index c2231c64d60b..dc5ce962f600 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -84,6 +84,15 @@ */ #define __bpf_kfunc __used noinline +#define __bpf_kfunc_start_defs() \ + __diag_push(); \ + __diag_ignore_all("-Wmissing-declarations", \ + "Global kfuncs as their definitions will be in BTF");\ + __diag_ignore_all("-Wmissing-prototypes", \ + "Global kfuncs as their definitions will be in BTF") + +#define __bpf_kfunc_end_defs() __diag_pop() + /* * Return the name of the passed struct, if exists, or halt the build if for * example the structure gets renamed. In this way, developers have to revisit diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index 833faa04461b..0fae79164187 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -782,9 +782,7 @@ struct bpf_iter_num_kern { int end; /* final value, exclusive */ } __aligned(8); -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in vmlinux BTF"); +__bpf_kfunc_start_defs(); __bpf_kfunc int bpf_iter_num_new(struct bpf_iter_num *it, int start, int end) { @@ -843,4 +841,4 @@ __bpf_kfunc void bpf_iter_num_destroy(struct bpf_iter_num *it) s->cur = s->end = 0; } -__diag_pop(); +__bpf_kfunc_end_defs(); diff --git a/kernel/bpf/cgroup_iter.c b/kernel/bpf/cgroup_iter.c index 209e5135f9fb..d1b5c5618dd7 100644 --- a/kernel/bpf/cgroup_iter.c +++ b/kernel/bpf/cgroup_iter.c @@ -305,9 +305,7 @@ struct bpf_iter_css_kern { unsigned int flags; } __attribute__((aligned(8))); -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in vmlinux BTF"); +__bpf_kfunc_start_defs(); __bpf_kfunc int bpf_iter_css_new(struct bpf_iter_css *it, struct cgroup_subsys_state *start, unsigned int flags) @@ -358,4 +356,4 @@ __bpf_kfunc void bpf_iter_css_destroy(struct bpf_iter_css *it) { } -__diag_pop(); \ No newline at end of file +__bpf_kfunc_end_defs(); diff --git a/kernel/bpf/cpumask.c b/kernel/bpf/cpumask.c index 6983af8e093c..e01c741e54e7 100644 --- a/kernel/bpf/cpumask.c +++ b/kernel/bpf/cpumask.c @@ -34,9 +34,7 @@ static bool cpu_valid(u32 cpu) return cpu < nr_cpu_ids; } -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global kfuncs as their definitions will be in BTF"); +__bpf_kfunc_start_defs(); /** * bpf_cpumask_create() - Create a mutable BPF cpumask. @@ -407,7 +405,7 @@ __bpf_kfunc u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1, return cpumask_any_and_distribute(src1, src2); } -__diag_pop(); +__bpf_kfunc_end_defs(); BTF_SET8_START(cpumask_kfunc_btf_ids) BTF_ID_FLAGS(func, bpf_cpumask_create, KF_ACQUIRE | KF_RET_NULL) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 95449ea7cc1b..abe82105e33e 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1886,9 +1886,7 @@ void bpf_rb_root_free(const struct btf_field *field, void *rb_root, } } -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in vmlinux BTF"); +__bpf_kfunc_start_defs(); __bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign) { @@ -2505,7 +2503,7 @@ __bpf_kfunc void bpf_throw(u64 cookie) WARN(1, "A call to BPF exception callback should never return\n"); } -__diag_pop(); +__bpf_kfunc_end_defs(); BTF_SET8_START(generic_btf_ids) #ifdef CONFIG_KEXEC_CORE diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c index 6fc9dae9edc8..6abd7c5df4b3 100644 --- a/kernel/bpf/map_iter.c +++ b/kernel/bpf/map_iter.c @@ -193,9 +193,7 @@ static int __init bpf_map_iter_init(void) late_initcall(bpf_map_iter_init); -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in vmlinux BTF"); +__bpf_kfunc_start_defs(); __bpf_kfunc s64 bpf_map_sum_elem_count(const struct bpf_map *map) { @@ -213,7 +211,7 @@ __bpf_kfunc s64 bpf_map_sum_elem_count(const struct bpf_map *map) return ret; } -__diag_pop(); +__bpf_kfunc_end_defs(); BTF_SET8_START(bpf_map_iter_kfunc_ids) BTF_ID_FLAGS(func, bpf_map_sum_elem_count, KF_TRUSTED_ARGS) diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index 8967d1ac7551..4e156dca48de 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -822,9 +822,7 @@ struct bpf_iter_task_vma_kern { struct bpf_iter_task_vma_kern_data *data; } __attribute__((aligned(8))); -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in vmlinux BTF"); +__bpf_kfunc_start_defs(); __bpf_kfunc int bpf_iter_task_vma_new(struct bpf_iter_task_vma *it, struct task_struct *task, u64 addr) @@ -890,7 +888,7 @@ __bpf_kfunc void bpf_iter_task_vma_destroy(struct bpf_iter_task_vma *it) } } -__diag_pop(); +__bpf_kfunc_end_defs(); #ifdef CONFIG_CGROUPS @@ -902,9 +900,7 @@ struct bpf_iter_css_task_kern { struct css_task_iter *css_it; } __attribute__((aligned(8))); -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in vmlinux BTF"); +__bpf_kfunc_start_defs(); __bpf_kfunc int bpf_iter_css_task_new(struct bpf_iter_css_task *it, struct cgroup_subsys_state *css, unsigned int flags) @@ -950,7 +946,7 @@ __bpf_kfunc void bpf_iter_css_task_destroy(struct bpf_iter_css_task *it) bpf_mem_free(&bpf_global_ma, kit->css_it); } -__diag_pop(); +__bpf_kfunc_end_defs(); #endif /* CONFIG_CGROUPS */ @@ -973,9 +969,7 @@ enum { BPF_TASK_ITER_PROC_THREADS }; -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in vmlinux BTF"); +__bpf_kfunc_start_defs(); __bpf_kfunc int bpf_iter_task_new(struct bpf_iter_task *it, struct task_struct *task__nullable, unsigned int flags) @@ -1045,7 +1039,7 @@ __bpf_kfunc void bpf_iter_task_destroy(struct bpf_iter_task *it) { } -__diag_pop(); +__bpf_kfunc_end_defs(); DEFINE_PER_CPU(struct mmap_unlock_irq_work, mmap_unlock_work); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index df697c74d519..84e8a0f6e4e0 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1252,9 +1252,7 @@ static const struct bpf_func_proto bpf_get_func_arg_cnt_proto = { }; #ifdef CONFIG_KEYS -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "kfuncs which will be used in BPF programs"); +__bpf_kfunc_start_defs(); /** * bpf_lookup_user_key - lookup a key by its serial @@ -1404,7 +1402,7 @@ __bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr_kern *data_ptr, } #endif /* CONFIG_SYSTEM_DATA_VERIFICATION */ -__diag_pop(); +__bpf_kfunc_end_defs(); BTF_SET8_START(key_sig_kfunc_set) BTF_ID_FLAGS(func, bpf_lookup_user_key, KF_ACQUIRE | KF_RET_NULL | KF_SLEEPABLE) diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 0841f8d82419..c9fdcc5cdce1 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -503,9 +503,8 @@ out: * architecture dependent calling conventions. 7+ can be supported in the * future. */ -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in vmlinux BTF"); +__bpf_kfunc_start_defs(); + __bpf_kfunc int bpf_fentry_test1(int a) { return a + 1; @@ -605,7 +604,7 @@ __bpf_kfunc void bpf_kfunc_call_memb_release(struct prog_test_member *p) { } -__diag_pop(); +__bpf_kfunc_end_defs(); BTF_SET8_START(bpf_test_modify_return_ids) BTF_ID_FLAGS(func, bpf_modify_return_test) diff --git a/net/core/filter.c b/net/core/filter.c index 21d75108c2e9..383f96b0a1c7 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -11767,9 +11767,7 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id) return func; } -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in vmlinux BTF"); +__bpf_kfunc_start_defs(); __bpf_kfunc int bpf_dynptr_from_skb(struct sk_buff *skb, u64 flags, struct bpf_dynptr_kern *ptr__uninit) { @@ -11816,7 +11814,7 @@ __bpf_kfunc int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern, return 0; } -__diag_pop(); +__bpf_kfunc_end_defs(); int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags, struct bpf_dynptr_kern *ptr__uninit) @@ -11879,10 +11877,7 @@ static int __init bpf_kfunc_init(void) } late_initcall(bpf_kfunc_init); -/* Disables missing prototype warnings */ -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in vmlinux BTF"); +__bpf_kfunc_start_defs(); /* bpf_sock_destroy: Destroy the given socket with ECONNABORTED error code. * @@ -11916,7 +11911,7 @@ __bpf_kfunc int bpf_sock_destroy(struct sock_common *sock) return sk->sk_prot->diag_destroy(sk, ECONNABORTED); } -__diag_pop() +__bpf_kfunc_end_defs(); BTF_SET8_START(bpf_sk_iter_kfunc_ids) BTF_ID_FLAGS(func, bpf_sock_destroy, KF_TRUSTED_ARGS) diff --git a/net/core/xdp.c b/net/core/xdp.c index df4789ab512d..b6f1d6dab3f2 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -696,9 +696,7 @@ struct xdp_frame *xdpf_clone(struct xdp_frame *xdpf) return nxdpf; } -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in vmlinux BTF"); +__bpf_kfunc_start_defs(); /** * bpf_xdp_metadata_rx_timestamp - Read XDP frame RX timestamp. @@ -738,7 +736,7 @@ __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash, return -EOPNOTSUPP; } -__diag_pop(); +__bpf_kfunc_end_defs(); BTF_SET8_START(xdp_metadata_kfunc_ids) #define XDP_METADATA_KFUNC(_, __, name, ___) BTF_ID_FLAGS(func, name, KF_TRUSTED_ARGS) diff --git a/net/ipv4/fou_bpf.c b/net/ipv4/fou_bpf.c index 3760a14b6b57..4da03bf45c9b 100644 --- a/net/ipv4/fou_bpf.c +++ b/net/ipv4/fou_bpf.c @@ -22,9 +22,7 @@ enum bpf_fou_encap_type { FOU_BPF_ENCAP_GUE, }; -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in BTF"); +__bpf_kfunc_start_defs(); /* bpf_skb_set_fou_encap - Set FOU encap parameters * @@ -100,7 +98,7 @@ __bpf_kfunc int bpf_skb_get_fou_encap(struct __sk_buff *skb_ctx, return 0; } -__diag_pop() +__bpf_kfunc_end_defs(); BTF_SET8_START(fou_kfunc_set) BTF_ID_FLAGS(func, bpf_skb_set_fou_encap) diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c index b21799d468d2..475358ec8212 100644 --- a/net/netfilter/nf_conntrack_bpf.c +++ b/net/netfilter/nf_conntrack_bpf.c @@ -230,9 +230,7 @@ static int _nf_conntrack_btf_struct_access(struct bpf_verifier_log *log, return 0; } -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in nf_conntrack BTF"); +__bpf_kfunc_start_defs(); /* bpf_xdp_ct_alloc - Allocate a new CT entry * @@ -467,7 +465,7 @@ __bpf_kfunc int bpf_ct_change_status(struct nf_conn *nfct, u32 status) return nf_ct_change_status_common(nfct, status); } -__diag_pop() +__bpf_kfunc_end_defs(); BTF_SET8_START(nf_ct_kfunc_set) BTF_ID_FLAGS(func, bpf_xdp_ct_alloc, KF_ACQUIRE | KF_RET_NULL) diff --git a/net/netfilter/nf_nat_bpf.c b/net/netfilter/nf_nat_bpf.c index 141ee7783223..6e3b2f58855f 100644 --- a/net/netfilter/nf_nat_bpf.c +++ b/net/netfilter/nf_nat_bpf.c @@ -12,9 +12,7 @@ #include #include -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in nf_nat BTF"); +__bpf_kfunc_start_defs(); /* bpf_ct_set_nat_info - Set source or destination nat address * @@ -54,7 +52,7 @@ __bpf_kfunc int bpf_ct_set_nat_info(struct nf_conn___init *nfct, return nf_nat_setup_info(ct, &range, manip) == NF_DROP ? -ENOMEM : 0; } -__diag_pop() +__bpf_kfunc_end_defs(); BTF_SET8_START(nf_nat_kfunc_set) BTF_ID_FLAGS(func, bpf_ct_set_nat_info, KF_TRUSTED_ARGS) diff --git a/net/xfrm/xfrm_interface_bpf.c b/net/xfrm/xfrm_interface_bpf.c index d74f3fd20f2b..7d5e920141e9 100644 --- a/net/xfrm/xfrm_interface_bpf.c +++ b/net/xfrm/xfrm_interface_bpf.c @@ -27,9 +27,7 @@ struct bpf_xfrm_info { int link; }; -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in xfrm_interface BTF"); +__bpf_kfunc_start_defs(); /* bpf_skb_get_xfrm_info - Get XFRM metadata * @@ -93,7 +91,7 @@ __bpf_kfunc int bpf_skb_set_xfrm_info(struct __sk_buff *skb_ctx, const struct bp return 0; } -__diag_pop() +__bpf_kfunc_end_defs(); BTF_SET8_START(xfrm_ifc_kfunc_set) BTF_ID_FLAGS(func, bpf_skb_get_xfrm_info) -- cgit v1.2.3-70-g09d2 From 15fb6f2b6c4c3c129adc2412ae12ec15e60a6adb Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Tue, 31 Oct 2023 14:56:25 -0700 Subject: bpf: Add __bpf_hook_{start,end} macros Not all uses of __diag_ignore_all(...) in BPF-related code in order to suppress warnings are wrapping kfunc definitions. Some "hook point" definitions - small functions meant to be used as attach points for fentry and similar BPF progs - need to suppress -Wmissing-declarations. We could use __bpf_kfunc_{start,end}_defs added in the previous patch in such cases, but this might be confusing to someone unfamiliar with BPF internals. Instead, this patch adds __bpf_hook_{start,end} macros, currently having the same effect as __bpf_kfunc_{start,end}_defs, then uses them to suppress warnings for two hook points in the kernel itself and some bpf_testmod hook points as well. Signed-off-by: Dave Marchevsky Cc: Yafang Shao Acked-by: Jiri Olsa Acked-by: Yafang Shao Link: https://lore.kernel.org/r/20231031215625.2343848-2-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- include/linux/btf.h | 2 ++ kernel/cgroup/rstat.c | 9 +++------ net/socket.c | 8 ++------ tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c | 6 ++---- 4 files changed, 9 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/include/linux/btf.h b/include/linux/btf.h index dc5ce962f600..59d404e22814 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -92,6 +92,8 @@ "Global kfuncs as their definitions will be in BTF") #define __bpf_kfunc_end_defs() __diag_pop() +#define __bpf_hook_start() __bpf_kfunc_start_defs() +#define __bpf_hook_end() __bpf_kfunc_end_defs() /* * Return the name of the passed struct, if exists, or halt the build if for diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index d80d7a608141..c0adb7254b45 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -156,19 +156,16 @@ static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos, * optimize away the callsite. Therefore, __weak is needed to ensure that the * call is still emitted, by telling the compiler that we don't know what the * function might eventually be. - * - * __diag_* below are needed to dismiss the missing prototype warning. */ -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "kfuncs which will be used in BPF programs"); + +__bpf_hook_start(); __weak noinline void bpf_rstat_flush(struct cgroup *cgrp, struct cgroup *parent, int cpu) { } -__diag_pop(); +__bpf_hook_end(); /* see cgroup_rstat_flush() */ static void cgroup_rstat_flush_locked(struct cgroup *cgrp) diff --git a/net/socket.c b/net/socket.c index 0d1c4e78fc7f..3379c64217a4 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1685,20 +1685,16 @@ struct file *__sys_socket_file(int family, int type, int protocol) * Therefore, __weak is needed to ensure that the call is still * emitted, by telling the compiler that we don't know what the * function might eventually be. - * - * __diag_* below are needed to dismiss the missing prototype warning. */ -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "A fmod_ret entry point for BPF programs"); +__bpf_hook_start(); __weak noinline int update_socket_protocol(int family, int type, int protocol) { return protocol; } -__diag_pop(); +__bpf_hook_end(); int __sys_socket(int family, int type, int protocol) { diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index a5e246f7b202..91907b321f91 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -39,9 +39,7 @@ struct bpf_testmod_struct_arg_4 { int b; }; -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in bpf_testmod.ko BTF"); +__bpf_hook_start(); noinline int bpf_testmod_test_struct_arg_1(struct bpf_testmod_struct_arg_2 a, int b, int c) { @@ -335,7 +333,7 @@ noinline int bpf_fentry_shadow_test(int a) } EXPORT_SYMBOL_GPL(bpf_fentry_shadow_test); -__diag_pop(); +__bpf_hook_end(); static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = { .attr = { .name = "bpf_testmod", .mode = 0666, }, -- cgit v1.2.3-70-g09d2 From 8ffbd1669ed1d58939d6e878dffaa2f60bf961a4 Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Mon, 30 Oct 2023 17:12:56 +0800 Subject: net: page_pool: add missing free_percpu when page_pool_init fail When ptr_ring_init() returns failure in page_pool_init(), free_percpu() is not called to free pool->recycle_stats, which may cause memory leak. Fixes: ad6fa1e1ab1b ("page_pool: Add recycle stats") Signed-off-by: Jian Shen Signed-off-by: Jijie Shao Reviewed-by: Yunsheng Lin Reviewed-by: Jiri Pirko Reviewed-by: Somnath Kotur Reviewed-by: Ilias Apalodimas Link: https://lore.kernel.org/r/20231030091256.2915394-1-shaojijie@huawei.com Signed-off-by: Paolo Abeni --- net/core/page_pool.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 5e409b98aba0..dec544337236 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -217,8 +217,12 @@ static int page_pool_init(struct page_pool *pool, return -ENOMEM; #endif - if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) + if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) { +#ifdef CONFIG_PAGE_POOL_STATS + free_percpu(pool->recycle_stats); +#endif return -ENOMEM; + } atomic_set(&pool->pages_state_release_cnt, 0); -- cgit v1.2.3-70-g09d2 From fa2df45af13091f76b89adb84a28f13818d5d631 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 30 Oct 2023 13:10:41 -0700 Subject: dccp: Call security_inet_conn_request() after setting IPv4 addresses. Initially, commit 4237c75c0a35 ("[MLSXFRM]: Auto-labeling of child sockets") introduced security_inet_conn_request() in some functions where reqsk is allocated. The hook is added just after the allocation, so reqsk's IPv4 remote address was not initialised then. However, SELinux/Smack started to read it in netlbl_req_setattr() after the cited commits. This bug was partially fixed by commit 284904aa7946 ("lsm: Relocate the IPv4 security_inet_conn_request() hooks"). This patch fixes the last bug in DCCPv4. Fixes: 389fb800ac8b ("netlabel: Label incoming TCP connections correctly in SELinux") Fixes: 07feee8f812f ("netlabel: Cleanup the Smack/NetLabel code to fix incoming TCP connections") Signed-off-by: Kuniyuki Iwashima Acked-by: Paul Moore Signed-off-by: Paolo Abeni --- net/dccp/ipv4.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 1b8cbfda6e5d..44b033fe1ef6 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -629,9 +629,6 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (dccp_parse_options(sk, dreq, skb)) goto drop_and_free; - if (security_inet_conn_request(sk, skb, req)) - goto drop_and_free; - ireq = inet_rsk(req); sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr); sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); @@ -639,6 +636,9 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ireq->ireq_family = AF_INET; ireq->ir_iif = READ_ONCE(sk->sk_bound_dev_if); + if (security_inet_conn_request(sk, skb, req)) + goto drop_and_free; + /* * Step 3: Process LISTEN state * -- cgit v1.2.3-70-g09d2 From 23be1e0e2a83a8543214d2599a31d9a2185a796b Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 30 Oct 2023 13:10:42 -0700 Subject: dccp/tcp: Call security_inet_conn_request() after setting IPv6 addresses. Initially, commit 4237c75c0a35 ("[MLSXFRM]: Auto-labeling of child sockets") introduced security_inet_conn_request() in some functions where reqsk is allocated. The hook is added just after the allocation, so reqsk's IPv6 remote address was not initialised then. However, SELinux/Smack started to read it in netlbl_req_setattr() after commit e1adea927080 ("calipso: Allow request sockets to be relabelled by the lsm."). Commit 284904aa7946 ("lsm: Relocate the IPv4 security_inet_conn_request() hooks") fixed that kind of issue only in TCPv4 because IPv6 labeling was not supported at that time. Finally, the same issue was introduced again in IPv6. Let's apply the same fix on DCCPv6 and TCPv6. Fixes: e1adea927080 ("calipso: Allow request sockets to be relabelled by the lsm.") Signed-off-by: Kuniyuki Iwashima Acked-by: Paul Moore Signed-off-by: Paolo Abeni --- net/dccp/ipv6.c | 6 +++--- net/ipv6/syncookies.c | 7 ++++--- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 8d344b219f84..4550b680665a 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -360,15 +360,15 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (dccp_parse_options(sk, dreq, skb)) goto drop_and_free; - if (security_inet_conn_request(sk, skb, req)) - goto drop_and_free; - ireq = inet_rsk(req); ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; ireq->ireq_family = AF_INET6; ireq->ir_mark = inet_request_mark(sk, skb); + if (security_inet_conn_request(sk, skb, req)) + goto drop_and_free; + if (ipv6_opt_accepted(sk, skb, IP6CB(skb)) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 500f6ed3b8cf..12eedc6ca2cc 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -181,14 +181,15 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) treq = tcp_rsk(req); treq->tfo_listener = false; - if (security_inet_conn_request(sk, skb, req)) - goto out_free; - req->mss = mss; ireq->ir_rmt_port = th->source; ireq->ir_num = ntohs(th->dest); ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; + + if (security_inet_conn_request(sk, skb, req)) + goto out_free; + if (ipv6_opt_accepted(sk, skb, &TCP_SKB_CB(skb)->header.h6) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { -- cgit v1.2.3-70-g09d2 From cdbab6236605dc11780779d9af689aea7d58cab1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 31 Oct 2023 06:19:45 +0000 Subject: tcp: fix fastopen code vs usec TS After blamed commit, TFO client-ack-dropped-then-recovery-ms-timestamps packetdrill test failed. David Morley and Neal Cardwell started investigating and Neal pointed that we had : tcp_conn_request() tcp_try_fastopen() -> tcp_fastopen_create_child -> child = inet_csk(sk)->icsk_af_ops->syn_recv_sock() -> tcp_create_openreq_child() -> copy req_usec_ts from req: newtp->tcp_usec_ts = treq->req_usec_ts; // now the new TFO server socket always does usec TS, no matter // what the route options are... send_synack() -> tcp_make_synack() // disable tcp_rsk(req)->req_usec_ts if route option is not present: if (tcp_rsk(req)->req_usec_ts < 0) tcp_rsk(req)->req_usec_ts = dst_tcp_usec_ts(dst); tcp_conn_request() has the initial dst, we can initialize tcp_rsk(req)->req_usec_ts there instead of later in send_synack(); This means tcp_rsk(req)->req_usec_ts can be a boolean. Many thanks to David an Neal for their help. Fixes: 614e8316aa4c ("tcp: add support for usec resolution in TCP TS values") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202310302216.f79d78bc-oliver.sang@intel.com Suggested-by: Neal Cardwell Signed-off-by: Eric Dumazet Cc: David Morley Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 +- net/ipv4/syncookies.c | 2 +- net/ipv4/tcp_input.c | 7 ++++--- net/ipv4/tcp_output.c | 2 -- 4 files changed, 6 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index ec4e9367f5b0..68f3d315d2e1 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -152,7 +152,7 @@ struct tcp_request_sock { u64 snt_synack; /* first SYNACK sent time */ bool tfo_listener; bool is_mptcp; - s8 req_usec_ts; + bool req_usec_ts; #if IS_ENABLED(CONFIG_MPTCP) bool drop_req; #endif diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 98b25e5d147b..d37282c06e3d 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -306,7 +306,7 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, treq->af_specific = af_ops; treq->syn_tos = TCP_SKB_CB(skb)->ip_dsfield; - treq->req_usec_ts = -1; + treq->req_usec_ts = false; #if IS_ENABLED(CONFIG_MPTCP) treq->is_mptcp = sk_is_mptcp(sk); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 50aaa1527150..bcb55d98004c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -7115,7 +7115,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, req->syncookie = want_cookie; tcp_rsk(req)->af_specific = af_ops; tcp_rsk(req)->ts_off = 0; - tcp_rsk(req)->req_usec_ts = -1; + tcp_rsk(req)->req_usec_ts = false; #if IS_ENABLED(CONFIG_MPTCP) tcp_rsk(req)->is_mptcp = 0; #endif @@ -7143,9 +7143,10 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, if (!dst) goto drop_and_free; - if (tmp_opt.tstamp_ok) + if (tmp_opt.tstamp_ok) { + tcp_rsk(req)->req_usec_ts = dst_tcp_usec_ts(dst); tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb); - + } if (!want_cookie && !isn) { int max_syn_backlog = READ_ONCE(net->ipv4.sysctl_max_syn_backlog); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f558c054cf6e..0d8dd5b7e2e5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3693,8 +3693,6 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, mss = tcp_mss_clamp(tp, dst_metric_advmss(dst)); memset(&opts, 0, sizeof(opts)); - if (tcp_rsk(req)->req_usec_ts < 0) - tcp_rsk(req)->req_usec_ts = dst_tcp_usec_ts(dst); now = tcp_clock_ns(); #ifdef CONFIG_SYN_COOKIES if (unlikely(synack_type == TCP_SYNACK_COOKIE && ireq->tstamp_ok)) -- cgit v1.2.3-70-g09d2 From 016b9332a3346e97a6cacffea0f9dc10e1235a75 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 1 Nov 2023 21:57:24 -0700 Subject: netlink: fill in missing MODULE_DESCRIPTION() W=1 builds now warn if a module is built without a MODULE_DESCRIPTION(). Fill it in for sock_diag. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/netlink/diag.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/netlink/diag.c b/net/netlink/diag.c index 9c4f231be275..1eeff9422856 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -257,5 +257,6 @@ static void __exit netlink_diag_exit(void) module_init(netlink_diag_init); module_exit(netlink_diag_exit); +MODULE_DESCRIPTION("Netlink-based socket monitoring/diagnostic interface (sock_diag)"); MODULE_LICENSE("GPL"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 16 /* AF_NETLINK */); -- cgit v1.2.3-70-g09d2 From 40cb2fdfed342e7e578d551a073687789f698d89 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sat, 28 Oct 2023 13:16:10 -0400 Subject: net, sched: Fix SKB_NOT_DROPPED_YET splat under debug config Getting the following splat [1] with CONFIG_DEBUG_NET=y and this reproducer [2]. Problem seems to be that classifiers clear 'struct tcf_result::drop_reason', thereby triggering the warning in __kfree_skb_reason() due to reason being 'SKB_NOT_DROPPED_YET' (0). Fixed by disambiguating a legit error from a verdict with a bogus drop_reason [1] WARNING: CPU: 0 PID: 181 at net/core/skbuff.c:1082 kfree_skb_reason+0x38/0x130 Modules linked in: CPU: 0 PID: 181 Comm: mausezahn Not tainted 6.6.0-rc6-custom-ge43e6d9582e0 #682 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-1.fc37 04/01/2014 RIP: 0010:kfree_skb_reason+0x38/0x130 [...] Call Trace: __netif_receive_skb_core.constprop.0+0x837/0xdb0 __netif_receive_skb_one_core+0x3c/0x70 process_backlog+0x95/0x130 __napi_poll+0x25/0x1b0 net_rx_action+0x29b/0x310 __do_softirq+0xc0/0x29b do_softirq+0x43/0x60 [2] ip link add name veth0 type veth peer name veth1 ip link set dev veth0 up ip link set dev veth1 up tc qdisc add dev veth1 clsact tc filter add dev veth1 ingress pref 1 proto all flower dst_mac 00:11:22:33:44:55 action drop mausezahn veth0 -a own -b 00:11:22:33:44:55 -q -c 1 Ido reported: [...] getting the following splat [1] with CONFIG_DEBUG_NET=y and this reproducer [2]. Problem seems to be that classifiers clear 'struct tcf_result::drop_reason', thereby triggering the warning in __kfree_skb_reason() due to reason being 'SKB_NOT_DROPPED_YET' (0). [...] [1] WARNING: CPU: 0 PID: 181 at net/core/skbuff.c:1082 kfree_skb_reason+0x38/0x130 Modules linked in: CPU: 0 PID: 181 Comm: mausezahn Not tainted 6.6.0-rc6-custom-ge43e6d9582e0 #682 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-1.fc37 04/01/2014 RIP: 0010:kfree_skb_reason+0x38/0x130 [...] Call Trace: __netif_receive_skb_core.constprop.0+0x837/0xdb0 __netif_receive_skb_one_core+0x3c/0x70 process_backlog+0x95/0x130 __napi_poll+0x25/0x1b0 net_rx_action+0x29b/0x310 __do_softirq+0xc0/0x29b do_softirq+0x43/0x60 [2] #!/bin/bash ip link add name veth0 type veth peer name veth1 ip link set dev veth0 up ip link set dev veth1 up tc qdisc add dev veth1 clsact tc filter add dev veth1 ingress pref 1 proto all flower dst_mac 00:11:22:33:44:55 action drop mausezahn veth0 -a own -b 00:11:22:33:44:55 -q -c 1 What happens is that inside most classifiers the tcf_result is copied over from a filter template e.g. *res = f->res which then implicitly overrides the prior SKB_DROP_REASON_TC_{INGRESS,EGRESS} default drop code which was set via sch_handle_{ingress,egress}() for kfree_skb_reason(). Commit text above copied verbatim from Daniel. The general idea of the patch is not very different from what Ido originally posted but instead done at the cls_api codepath. Fixes: 54a59aed395c ("net, sched: Make tc-related drop reason more flexible") Reported-by: Ido Schimmel Signed-off-by: Jamal Hadi Salim Link: https://lore.kernel.org/netdev/ZTjY959R+AFXf3Xy@shredder Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/sched/act_api.c | 2 +- net/sched/cls_api.c | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 9d3f26bf0440..c39252d61ebb 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -1098,7 +1098,7 @@ repeat: } } else if (TC_ACT_EXT_CMP(ret, TC_ACT_GOTO_CHAIN)) { if (unlikely(!rcu_access_pointer(a->goto_chain))) { - net_warn_ratelimited("can't go to NULL chain!\n"); + tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); return TC_ACT_SHOT; } tcf_action_goto_chain_exec(a, res); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 1daeb2182b70..1976bd163986 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1658,6 +1658,7 @@ static inline int __tcf_classify(struct sk_buff *skb, int act_index, u32 *last_executed_chain) { + u32 orig_reason = res->drop_reason; #ifdef CONFIG_NET_CLS_ACT const int max_reclassify_loop = 16; const struct tcf_proto *first_tp; @@ -1712,8 +1713,14 @@ reclassify: goto reset; } #endif - if (err >= 0) + if (err >= 0) { + /* Policy drop or drop reason is over-written by + * classifiers with a bogus value(0) */ + if (err == TC_ACT_SHOT && + res->drop_reason == SKB_NOT_DROPPED_YET) + tcf_set_drop_reason(res, orig_reason); return err; + } } if (unlikely(n)) { -- cgit v1.2.3-70-g09d2 From 0a8e987dcc13244b5a5bc90cb1b184f813104d87 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 2 Nov 2023 14:05:48 -0700 Subject: tcp: Fix SYN option room calculation for TCP-AO. When building SYN packet in tcp_syn_options(), MSS, TS, WS, and SACKPERM are used without checking the remaining bytes in the options area. To keep that logic as is, we limit the TCP-AO MAC length in tcp_ao_parse_crypto(). Currently, the limit is calculated as below. MAX_TCP_OPTION_SPACE - TCPOLEN_TSTAMP_ALIGNED - TCPOLEN_WSCALE_ALIGNED - TCPOLEN_SACKPERM_ALIGNED This looks confusing as (1) we pack SACKPERM into the leading 2-bytes of the aligned 12-bytes of TS and (2) TCPOLEN_MSS_ALIGNED is not used. Fortunately, the calculated limit is not wrong as TCPOLEN_SACKPERM_ALIGNED and TCPOLEN_MSS_ALIGNED are the same value. However, we should use the proper constant in the formula. MAX_TCP_OPTION_SPACE - TCPOLEN_MSS_ALIGNED - TCPOLEN_TSTAMP_ALIGNED - TCPOLEN_WSCALE_ALIGNED Fixes: 4954f17ddefc ("net/tcp: Introduce TCP_AO setsockopt()s") Signed-off-by: Kuniyuki Iwashima Reviewed-by: Dmitry Safonov Signed-off-by: David S. Miller --- net/ipv4/tcp_ao.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index ef5472ed6158..7696417d0640 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -1315,7 +1315,8 @@ static int tcp_ao_parse_crypto(struct tcp_ao_add *cmd, struct tcp_ao_key *key) key->maclen = cmd->maclen ?: 12; /* 12 is the default in RFC5925 */ /* Check: maclen + tcp-ao header <= (MAX_TCP_OPTION_SPACE - mss - * - tstamp - wscale - sackperm), + * - tstamp (including sackperm) + * - wscale), * see tcp_syn_options(), tcp_synack_options(), commit 33ad798c924b. * * In order to allow D-SACK with TCP-AO, the header size should be: @@ -1342,9 +1343,9 @@ static int tcp_ao_parse_crypto(struct tcp_ao_add *cmd, struct tcp_ao_key *key) * large to leave sufficient option space. */ syn_tcp_option_space = MAX_TCP_OPTION_SPACE; + syn_tcp_option_space -= TCPOLEN_MSS_ALIGNED; syn_tcp_option_space -= TCPOLEN_TSTAMP_ALIGNED; syn_tcp_option_space -= TCPOLEN_WSCALE_ALIGNED; - syn_tcp_option_space -= TCPOLEN_SACKPERM_ALIGNED; if (tcp_ao_len(key) > syn_tcp_option_space) { err = -EMSGSIZE; goto err_kfree; -- cgit v1.2.3-70-g09d2 From 5211c9729484c923f8d2e06bd29f9322cc42bb8f Mon Sep 17 00:00:00 2001 From: "D. Wythe" Date: Fri, 3 Nov 2023 14:07:38 +0800 Subject: net/smc: fix dangling sock under state SMC_APPFINCLOSEWAIT Considering scenario: smc_cdc_rx_handler __smc_release sock_set_flag smc_close_active() sock_set_flag __set_bit(DEAD) __set_bit(DONE) Dues to __set_bit is not atomic, the DEAD or DONE might be lost. if the DEAD flag lost, the state SMC_CLOSED will be never be reached in smc_close_passive_work: if (sock_flag(sk, SOCK_DEAD) && smc_close_sent_any_close(conn)) { sk->sk_state = SMC_CLOSED; } else { /* just shutdown, but not yet closed locally */ sk->sk_state = SMC_APPFINCLOSEWAIT; } Replace sock_set_flags or __set_bit to set_bit will fix this problem. Since set_bit is atomic. Fixes: b38d732477e4 ("smc: socket closing and linkgroup cleanup") Signed-off-by: D. Wythe Reviewed-by: Dust Li Signed-off-by: David S. Miller --- net/smc/af_smc.c | 4 ++-- net/smc/smc.h | 5 +++++ net/smc/smc_cdc.c | 2 +- net/smc/smc_close.c | 2 +- 4 files changed, 9 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index abd2667734d4..da97f946b79b 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -275,7 +275,7 @@ static int __smc_release(struct smc_sock *smc) if (!smc->use_fallback) { rc = smc_close_active(smc); - sock_set_flag(sk, SOCK_DEAD); + smc_sock_set_flag(sk, SOCK_DEAD); sk->sk_shutdown |= SHUTDOWN_MASK; } else { if (sk->sk_state != SMC_CLOSED) { @@ -1743,7 +1743,7 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) if (new_clcsock) sock_release(new_clcsock); new_sk->sk_state = SMC_CLOSED; - sock_set_flag(new_sk, SOCK_DEAD); + smc_sock_set_flag(new_sk, SOCK_DEAD); sock_put(new_sk); /* final */ *new_smc = NULL; goto out; diff --git a/net/smc/smc.h b/net/smc/smc.h index 24745fde4ac2..e377980b8414 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -377,4 +377,9 @@ int smc_nl_dump_hs_limitation(struct sk_buff *skb, struct netlink_callback *cb); int smc_nl_enable_hs_limitation(struct sk_buff *skb, struct genl_info *info); int smc_nl_disable_hs_limitation(struct sk_buff *skb, struct genl_info *info); +static inline void smc_sock_set_flag(struct sock *sk, enum sock_flags flag) +{ + set_bit(flag, &sk->sk_flags); +} + #endif /* __SMC_H */ diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 89105e95b452..01bdb7909a14 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -385,7 +385,7 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc, smc->sk.sk_shutdown |= RCV_SHUTDOWN; if (smc->clcsock && smc->clcsock->sk) smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN; - sock_set_flag(&smc->sk, SOCK_DONE); + smc_sock_set_flag(&smc->sk, SOCK_DONE); sock_hold(&smc->sk); /* sock_put in close_work */ if (!queue_work(smc_close_wq, &conn->close_work)) sock_put(&smc->sk); diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index dbdf03e8aa5b..449ef454b53b 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -173,7 +173,7 @@ void smc_close_active_abort(struct smc_sock *smc) break; } - sock_set_flag(sk, SOCK_DEAD); + smc_sock_set_flag(sk, SOCK_DEAD); sk->sk_state_change(sk); if (release_clcsock) { -- cgit v1.2.3-70-g09d2 From c5bf605ba4f9d6fbbb120595ab95002f4716edcb Mon Sep 17 00:00:00 2001 From: "D. Wythe" Date: Fri, 3 Nov 2023 14:07:39 +0800 Subject: net/smc: allow cdc msg send rather than drop it with NULL sndbuf_desc This patch re-fix the issues mentioned by commit 22a825c541d7 ("net/smc: fix NULL sndbuf_desc in smc_cdc_tx_handler()"). Blocking sending message do solve the issues though, but it also prevents the peer to receive the final message. Besides, in logic, whether the sndbuf_desc is NULL or not have no impact on the processing of cdc message sending. Hence that, this patch allows the cdc message sending but to check the sndbuf_desc with care in smc_cdc_tx_handler(). Fixes: 22a825c541d7 ("net/smc: fix NULL sndbuf_desc in smc_cdc_tx_handler()") Signed-off-by: D. Wythe Reviewed-by: Dust Li Signed-off-by: David S. Miller --- net/smc/smc_cdc.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 01bdb7909a14..3c06625ceb20 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -28,13 +28,15 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, { struct smc_cdc_tx_pend *cdcpend = (struct smc_cdc_tx_pend *)pnd_snd; struct smc_connection *conn = cdcpend->conn; + struct smc_buf_desc *sndbuf_desc; struct smc_sock *smc; int diff; + sndbuf_desc = conn->sndbuf_desc; smc = container_of(conn, struct smc_sock, conn); bh_lock_sock(&smc->sk); - if (!wc_status) { - diff = smc_curs_diff(cdcpend->conn->sndbuf_desc->len, + if (!wc_status && sndbuf_desc) { + diff = smc_curs_diff(sndbuf_desc->len, &cdcpend->conn->tx_curs_fin, &cdcpend->cursor); /* sndbuf_space is decreased in smc_sendmsg */ @@ -114,9 +116,6 @@ int smc_cdc_msg_send(struct smc_connection *conn, union smc_host_cursor cfed; int rc; - if (unlikely(!READ_ONCE(conn->sndbuf_desc))) - return -ENOBUFS; - smc_cdc_add_pending_send(conn, pend); conn->tx_cdc_seq++; -- cgit v1.2.3-70-g09d2 From aa96fbd6d78d9770323b21e2c92bd38821be8852 Mon Sep 17 00:00:00 2001 From: "D. Wythe" Date: Fri, 3 Nov 2023 14:07:40 +0800 Subject: net/smc: put sk reference if close work was canceled Note that we always hold a reference to sock when attempting to submit close_work. Therefore, if we have successfully canceled close_work from pending, we MUST release that reference to avoid potential leaks. Fixes: 42bfba9eaa33 ("net/smc: immediate termination for SMCD link groups") Signed-off-by: D. Wythe Reviewed-by: Dust Li Signed-off-by: David S. Miller --- net/smc/smc_close.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index 449ef454b53b..10219f55aad1 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -116,7 +116,8 @@ static void smc_close_cancel_work(struct smc_sock *smc) struct sock *sk = &smc->sk; release_sock(sk); - cancel_work_sync(&smc->conn.close_work); + if (cancel_work_sync(&smc->conn.close_work)) + sock_put(sk); cancel_delayed_work_sync(&smc->conn.tx_work); lock_sock(sk); } -- cgit v1.2.3-70-g09d2 From 7425627b2b2cd671d5bf6541ce50f7cba8a76ad6 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 6 Nov 2023 14:14:16 -0700 Subject: tcp: Fix -Wc23-extensions in tcp_options_write() Clang warns (or errors with CONFIG_WERROR=y) when CONFIG_TCP_AO is set: net/ipv4/tcp_output.c:663:2: error: label at end of compound statement is a C23 extension [-Werror,-Wc23-extensions] 663 | } | ^ 1 error generated. On earlier releases (such as clang-11, the current minimum supported version for building the kernel) that do not support C23, this was a hard error unconditionally: net/ipv4/tcp_output.c:663:2: error: expected statement } ^ 1 error generated. While adding a semicolon after the label would resolve this, it is more in line with the kernel as a whole to refactor this block into a standalone function, which means the goto a label construct can just be replaced with a return statement. Do so to resolve the warning. Closes: https://github.com/ClangBuiltLinux/linux/issues/1953 Fixes: 1e03d32bea8e ("net/tcp: Add TCP-AO sign to outgoing packets") Signed-off-by: Nathan Chancellor Reviewed-by: Dmitry Safonov Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 70 ++++++++++++++++++++++++++++----------------------- 1 file changed, 39 insertions(+), 31 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 0d8dd5b7e2e5..eb13a55d660c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -601,6 +601,44 @@ static void bpf_skops_write_hdr_opt(struct sock *sk, struct sk_buff *skb, } #endif +static __be32 *process_tcp_ao_options(struct tcp_sock *tp, + const struct tcp_request_sock *tcprsk, + struct tcp_out_options *opts, + struct tcp_key *key, __be32 *ptr) +{ +#ifdef CONFIG_TCP_AO + u8 maclen = tcp_ao_maclen(key->ao_key); + + if (tcprsk) { + u8 aolen = maclen + sizeof(struct tcp_ao_hdr); + + *ptr++ = htonl((TCPOPT_AO << 24) | (aolen << 16) | + (tcprsk->ao_keyid << 8) | + (tcprsk->ao_rcv_next)); + } else { + struct tcp_ao_key *rnext_key; + struct tcp_ao_info *ao_info; + + ao_info = rcu_dereference_check(tp->ao_info, + lockdep_sock_is_held(&tp->inet_conn.icsk_inet.sk)); + rnext_key = READ_ONCE(ao_info->rnext_key); + if (WARN_ON_ONCE(!rnext_key)) + return ptr; + *ptr++ = htonl((TCPOPT_AO << 24) | + (tcp_ao_len(key->ao_key) << 16) | + (key->ao_key->sndid << 8) | + (rnext_key->rcvid)); + } + opts->hash_location = (__u8 *)ptr; + ptr += maclen / sizeof(*ptr); + if (unlikely(maclen % sizeof(*ptr))) { + memset(ptr, TCPOPT_NOP, sizeof(*ptr)); + ptr++; + } +#endif + return ptr; +} + /* Write previously computed TCP options to the packet. * * Beware: Something in the Internet is very sensitive to the ordering of @@ -629,37 +667,7 @@ static void tcp_options_write(struct tcphdr *th, struct tcp_sock *tp, opts->hash_location = (__u8 *)ptr; ptr += 4; } else if (tcp_key_is_ao(key)) { -#ifdef CONFIG_TCP_AO - u8 maclen = tcp_ao_maclen(key->ao_key); - - if (tcprsk) { - u8 aolen = maclen + sizeof(struct tcp_ao_hdr); - - *ptr++ = htonl((TCPOPT_AO << 24) | (aolen << 16) | - (tcprsk->ao_keyid << 8) | - (tcprsk->ao_rcv_next)); - } else { - struct tcp_ao_key *rnext_key; - struct tcp_ao_info *ao_info; - - ao_info = rcu_dereference_check(tp->ao_info, - lockdep_sock_is_held(&tp->inet_conn.icsk_inet.sk)); - rnext_key = READ_ONCE(ao_info->rnext_key); - if (WARN_ON_ONCE(!rnext_key)) - goto out_ao; - *ptr++ = htonl((TCPOPT_AO << 24) | - (tcp_ao_len(key->ao_key) << 16) | - (key->ao_key->sndid << 8) | - (rnext_key->rcvid)); - } - opts->hash_location = (__u8 *)ptr; - ptr += maclen / sizeof(*ptr); - if (unlikely(maclen % sizeof(*ptr))) { - memset(ptr, TCPOPT_NOP, sizeof(*ptr)); - ptr++; - } -out_ao: -#endif + ptr = process_tcp_ao_options(tp, tcprsk, opts, key, ptr); } if (unlikely(opts->mss)) { *ptr++ = htonl((TCPOPT_MSS << 24) | -- cgit v1.2.3-70-g09d2 From 3a5cc90a4d1756072619fe511d07621bdef7f120 Mon Sep 17 00:00:00 2001 From: Filippo Storniolo Date: Fri, 3 Nov 2023 18:55:48 +0100 Subject: vsock/virtio: remove socket from connected/bound list on shutdown If the same remote peer, using the same port, tries to connect to a server on a listening port more than once, the server will reject the connection, causing a "connection reset by peer" error on the remote peer. This is due to the presence of a dangling socket from a previous connection in both the connected and bound socket lists. The inconsistency of the above lists only occurs when the remote peer disconnects and the server remains active. This bug does not occur when the server socket is closed: virtio_transport_release() will eventually schedule a call to virtio_transport_do_close() and the latter will remove the socket from the bound and connected socket lists and clear the sk_buff. However, virtio_transport_do_close() will only perform the above actions if it has been scheduled, and this will not happen if the server is processing the shutdown message from a remote peer. To fix this, introduce a call to vsock_remove_sock() when the server is handling a client disconnect. This is to remove the socket from the bound and connected socket lists without clearing the sk_buff. Fixes: 06a8fc78367d ("VSOCK: Introduce virtio_vsock_common.ko") Reported-by: Daan De Meyer Tested-by: Daan De Meyer Co-developed-by: Luigi Leonardi Signed-off-by: Luigi Leonardi Signed-off-by: Filippo Storniolo Reviewed-by: Stefano Garzarella Signed-off-by: David S. Miller --- net/vmw_vsock/virtio_transport_common.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index e22c81435ef7..4c595dd1fd64 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -1369,11 +1369,17 @@ virtio_transport_recv_connected(struct sock *sk, vsk->peer_shutdown |= RCV_SHUTDOWN; if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SHUTDOWN_SEND) vsk->peer_shutdown |= SEND_SHUTDOWN; - if (vsk->peer_shutdown == SHUTDOWN_MASK && - vsock_stream_has_data(vsk) <= 0 && - !sock_flag(sk, SOCK_DONE)) { - (void)virtio_transport_reset(vsk, NULL); - virtio_transport_do_close(vsk, true); + if (vsk->peer_shutdown == SHUTDOWN_MASK) { + if (vsock_stream_has_data(vsk) <= 0 && !sock_flag(sk, SOCK_DONE)) { + (void)virtio_transport_reset(vsk, NULL); + virtio_transport_do_close(vsk, true); + } + /* Remove this socket anyway because the remote peer sent + * the shutdown. This way a new connection will succeed + * if the remote peer uses the same source port, + * even if the old socket is still unreleased, but now disconnected. + */ + vsock_remove_sock(vsk); } if (le32_to_cpu(virtio_vsock_hdr(skb)->flags)) sk->sk_state_change(sk); -- cgit v1.2.3-70-g09d2 From 34c4effacfc329aeca5635a69fd9e0f6c90b4101 Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Sun, 5 Nov 2023 00:05:31 +0900 Subject: virtio/vsock: Fix uninit-value in virtio_transport_recv_pkt() KMSAN reported the following uninit-value access issue: ===================================================== BUG: KMSAN: uninit-value in virtio_transport_recv_pkt+0x1dfb/0x26a0 net/vmw_vsock/virtio_transport_common.c:1421 virtio_transport_recv_pkt+0x1dfb/0x26a0 net/vmw_vsock/virtio_transport_common.c:1421 vsock_loopback_work+0x3bb/0x5a0 net/vmw_vsock/vsock_loopback.c:120 process_one_work kernel/workqueue.c:2630 [inline] process_scheduled_works+0xff6/0x1e60 kernel/workqueue.c:2703 worker_thread+0xeca/0x14d0 kernel/workqueue.c:2784 kthread+0x3cc/0x520 kernel/kthread.c:388 ret_from_fork+0x66/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304 Uninit was stored to memory at: virtio_transport_space_update net/vmw_vsock/virtio_transport_common.c:1274 [inline] virtio_transport_recv_pkt+0x1ee8/0x26a0 net/vmw_vsock/virtio_transport_common.c:1415 vsock_loopback_work+0x3bb/0x5a0 net/vmw_vsock/vsock_loopback.c:120 process_one_work kernel/workqueue.c:2630 [inline] process_scheduled_works+0xff6/0x1e60 kernel/workqueue.c:2703 worker_thread+0xeca/0x14d0 kernel/workqueue.c:2784 kthread+0x3cc/0x520 kernel/kthread.c:388 ret_from_fork+0x66/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304 Uninit was created at: slab_post_alloc_hook+0x105/0xad0 mm/slab.h:767 slab_alloc_node mm/slub.c:3478 [inline] kmem_cache_alloc_node+0x5a2/0xaf0 mm/slub.c:3523 kmalloc_reserve+0x13c/0x4a0 net/core/skbuff.c:559 __alloc_skb+0x2fd/0x770 net/core/skbuff.c:650 alloc_skb include/linux/skbuff.h:1286 [inline] virtio_vsock_alloc_skb include/linux/virtio_vsock.h:66 [inline] virtio_transport_alloc_skb+0x90/0x11e0 net/vmw_vsock/virtio_transport_common.c:58 virtio_transport_reset_no_sock net/vmw_vsock/virtio_transport_common.c:957 [inline] virtio_transport_recv_pkt+0x1279/0x26a0 net/vmw_vsock/virtio_transport_common.c:1387 vsock_loopback_work+0x3bb/0x5a0 net/vmw_vsock/vsock_loopback.c:120 process_one_work kernel/workqueue.c:2630 [inline] process_scheduled_works+0xff6/0x1e60 kernel/workqueue.c:2703 worker_thread+0xeca/0x14d0 kernel/workqueue.c:2784 kthread+0x3cc/0x520 kernel/kthread.c:388 ret_from_fork+0x66/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304 CPU: 1 PID: 10664 Comm: kworker/1:5 Not tainted 6.6.0-rc3-00146-g9f3ebbef746f #3 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-1.fc38 04/01/2014 Workqueue: vsock-loopback vsock_loopback_work ===================================================== The following simple reproducer can cause the issue described above: int main(void) { int sock; struct sockaddr_vm addr = { .svm_family = AF_VSOCK, .svm_cid = VMADDR_CID_ANY, .svm_port = 1234, }; sock = socket(AF_VSOCK, SOCK_STREAM, 0); connect(sock, (struct sockaddr *)&addr, sizeof(addr)); return 0; } This issue occurs because the `buf_alloc` and `fwd_cnt` fields of the `struct virtio_vsock_hdr` are not initialized when a new skb is allocated in `virtio_transport_init_hdr()`. This patch resolves the issue by initializing these fields during allocation. Fixes: 71dc9ec9ac7d ("virtio/vsock: replace virtio_vsock_pkt with sk_buff") Reported-and-tested-by: syzbot+0c8ce1da0ac31abbadcd@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=0c8ce1da0ac31abbadcd Signed-off-by: Shigeru Yoshida Reviewed-by: Stefano Garzarella Link: https://lore.kernel.org/r/20231104150531.257952-1-syoshida@redhat.com Signed-off-by: Jakub Kicinski --- net/vmw_vsock/virtio_transport_common.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 4c595dd1fd64..f6dc896bf44c 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -130,6 +130,8 @@ static void virtio_transport_init_hdr(struct sk_buff *skb, hdr->dst_port = cpu_to_le32(dst_port); hdr->flags = cpu_to_le32(info->flags); hdr->len = cpu_to_le32(payload_len); + hdr->buf_alloc = cpu_to_le32(0); + hdr->fwd_cnt = cpu_to_le32(0); } static void virtio_transport_copy_nonlinear_skb(const struct sk_buff *skb, -- cgit v1.2.3-70-g09d2 From 94090b23f3f71c150359a2e0716855a4037ad45a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 4 Nov 2023 11:14:05 +0100 Subject: netfilter: add missing module descriptions W=1 builds warn on missing MODULE_DESCRIPTION, add them. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebtable_broute.c | 1 + net/bridge/netfilter/ebtable_filter.c | 1 + net/bridge/netfilter/ebtable_nat.c | 1 + net/bridge/netfilter/ebtables.c | 1 + net/bridge/netfilter/nf_conntrack_bridge.c | 1 + net/ipv4/netfilter/iptable_nat.c | 1 + net/ipv4/netfilter/iptable_raw.c | 1 + net/ipv4/netfilter/nf_defrag_ipv4.c | 1 + net/ipv4/netfilter/nf_reject_ipv4.c | 1 + net/ipv6/netfilter/ip6table_nat.c | 1 + net/ipv6/netfilter/ip6table_raw.c | 1 + net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 1 + net/ipv6/netfilter/nf_reject_ipv6.c | 1 + net/netfilter/nf_conntrack_broadcast.c | 1 + net/netfilter/nf_conntrack_netlink.c | 1 + net/netfilter/nf_conntrack_proto.c | 1 + net/netfilter/nf_nat_core.c | 1 + net/netfilter/nf_tables_api.c | 1 + net/netfilter/nfnetlink_osf.c | 1 + net/netfilter/nft_chain_nat.c | 1 + net/netfilter/nft_fib.c | 1 + net/netfilter/nft_fwd_netdev.c | 1 + 22 files changed, 22 insertions(+) (limited to 'net') diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 8f19253024b0..741360219552 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -135,3 +135,4 @@ static void __exit ebtable_broute_fini(void) module_init(ebtable_broute_init); module_exit(ebtable_broute_fini); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Force packets to be routed instead of bridged"); diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 278f324e6752..dacd81b12e62 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -116,3 +116,4 @@ static void __exit ebtable_filter_fini(void) module_init(ebtable_filter_init); module_exit(ebtable_filter_fini); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ebtables legacy filter table"); diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 9066f7f376d5..0f2a8c6118d4 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -116,3 +116,4 @@ static void __exit ebtable_nat_fini(void) module_init(ebtable_nat_init); module_exit(ebtable_nat_fini); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ebtables legacy stateless nat table"); diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index aa23479b20b2..99d82676f780 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -2595,3 +2595,4 @@ EXPORT_SYMBOL(ebt_do_table); module_init(ebtables_init); module_exit(ebtables_fini); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ebtables legacy core"); diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c index 71056ee84773..b5c406a6e765 100644 --- a/net/bridge/netfilter/nf_conntrack_bridge.c +++ b/net/bridge/netfilter/nf_conntrack_bridge.c @@ -416,3 +416,4 @@ module_exit(nf_conntrack_l3proto_bridge_fini); MODULE_ALIAS("nf_conntrack-" __stringify(AF_BRIDGE)); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Bridge IPv4 and IPv6 connection tracking"); diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index 56f6ecc43451..4d42d0756fd7 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -170,3 +170,4 @@ module_init(iptable_nat_init); module_exit(iptable_nat_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("iptables legacy nat table"); diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index ca5e5b21587c..0e7f53964d0a 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -108,3 +108,4 @@ static void __exit iptable_raw_fini(void) module_init(iptable_raw_init); module_exit(iptable_raw_fini); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("iptables legacy raw table"); diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 265b39bc435b..482e733c3375 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -186,3 +186,4 @@ module_init(nf_defrag_init); module_exit(nf_defrag_fini); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("IPv4 defragmentation support"); diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index f33aeab9424f..f01b038fc1cd 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -336,3 +336,4 @@ void nf_send_unreach(struct sk_buff *skb_in, int code, int hook) EXPORT_SYMBOL_GPL(nf_send_unreach); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("IPv4 packet rejection core"); diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index bf3cb3a13600..52cf104e3478 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -170,3 +170,4 @@ module_init(ip6table_nat_init); module_exit(ip6table_nat_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Ip6tables legacy nat table"); diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 08861d5d1f4d..fc9f6754028f 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -106,3 +106,4 @@ static void __exit ip6table_raw_fini(void) module_init(ip6table_raw_init); module_exit(ip6table_raw_fini); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Ip6tables legacy raw table"); diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index d59b296b4f51..be7817fbc024 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -182,3 +182,4 @@ module_init(nf_defrag_init); module_exit(nf_defrag_fini); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("IPv6 defragmentation support"); diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 58ccdb08c0fd..d45bc54b7ea5 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -413,3 +413,4 @@ void nf_send_unreach6(struct net *net, struct sk_buff *skb_in, EXPORT_SYMBOL_GPL(nf_send_unreach6); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("IPv6 packet rejection core"); diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c index 9fb9b8031298..cfa0fe0356de 100644 --- a/net/netfilter/nf_conntrack_broadcast.c +++ b/net/netfilter/nf_conntrack_broadcast.c @@ -82,3 +82,4 @@ out: EXPORT_SYMBOL_GPL(nf_conntrack_broadcast_help); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Broadcast connection tracking helper"); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 334db22199c1..fb0ae15e96df 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -57,6 +57,7 @@ #include "nf_internals.h" MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("List and change connection tracking table"); struct ctnetlink_list_dump_ctx { struct nf_conn *last; diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index c928ff63b10e..f36727ed91e1 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -699,3 +699,4 @@ MODULE_ALIAS("ip_conntrack"); MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET)); MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6)); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("IPv4 and IPv6 connection tracking"); diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index c4e0516a8dfa..c3d7ecbc777c 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -1263,6 +1263,7 @@ static void __exit nf_nat_cleanup(void) } MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Network address translation core"); module_init(nf_nat_init); module_exit(nf_nat_cleanup); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 3c1fd8283bf4..146b7447a969 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -11386,4 +11386,5 @@ module_exit(nf_tables_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy "); +MODULE_DESCRIPTION("Framework for packet filtering and classification"); MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFTABLES); diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c index 50723ba08289..c0fc431991e8 100644 --- a/net/netfilter/nfnetlink_osf.c +++ b/net/netfilter/nfnetlink_osf.c @@ -447,4 +447,5 @@ module_init(nfnl_osf_init); module_exit(nfnl_osf_fini); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Passive OS fingerprint matching"); MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF); diff --git a/net/netfilter/nft_chain_nat.c b/net/netfilter/nft_chain_nat.c index 98e4946100c5..40e230d8b712 100644 --- a/net/netfilter/nft_chain_nat.c +++ b/net/netfilter/nft_chain_nat.c @@ -137,6 +137,7 @@ module_init(nft_chain_nat_init); module_exit(nft_chain_nat_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("nftables network address translation support"); #ifdef CONFIG_NF_TABLES_IPV4 MODULE_ALIAS_NFT_CHAIN(AF_INET, "nat"); #endif diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c index 04b51f285332..1bfe258018da 100644 --- a/net/netfilter/nft_fib.c +++ b/net/netfilter/nft_fib.c @@ -204,4 +204,5 @@ bool nft_fib_reduce(struct nft_regs_track *track, EXPORT_SYMBOL_GPL(nft_fib_reduce); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Query routing table from nftables"); MODULE_AUTHOR("Florian Westphal "); diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c index a5268e6dd32f..358e742afad7 100644 --- a/net/netfilter/nft_fwd_netdev.c +++ b/net/netfilter/nft_fwd_netdev.c @@ -270,4 +270,5 @@ module_exit(nft_fwd_netdev_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso "); +MODULE_DESCRIPTION("nftables netdev packet forwarding support"); MODULE_ALIAS_NFT_AF_EXPR(5, "fwd"); -- cgit v1.2.3-70-g09d2 From 93995bf4af2c5a99e2a87f0cd5ce547d31eb7630 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 6 Nov 2023 10:53:09 +0100 Subject: netfilter: nf_tables: remove catchall element in GC sync path The expired catchall element is not deactivated and removed from GC sync path. This path holds mutex so just call nft_setelem_data_deactivate() and nft_setelem_catchall_remove() before queueing the GC work. Fixes: 4a9e12ea7e70 ("netfilter: nft_set_pipapo: call nft_trans_gc_queue_sync() in catchall GC") Reported-by: lonial con Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 146b7447a969..a761ee6796f6 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -6520,6 +6520,12 @@ static int nft_setelem_deactivate(const struct net *net, return ret; } +static void nft_setelem_catchall_destroy(struct nft_set_elem_catchall *catchall) +{ + list_del_rcu(&catchall->list); + kfree_rcu(catchall, rcu); +} + static void nft_setelem_catchall_remove(const struct net *net, const struct nft_set *set, struct nft_elem_priv *elem_priv) @@ -6528,8 +6534,7 @@ static void nft_setelem_catchall_remove(const struct net *net, list_for_each_entry_safe(catchall, next, &set->catchall_list, list) { if (catchall->elem == elem_priv) { - list_del_rcu(&catchall->list); - kfree_rcu(catchall, rcu); + nft_setelem_catchall_destroy(catchall); break; } } @@ -9678,11 +9683,12 @@ static struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc, unsigned int gc_seq, bool sync) { - struct nft_set_elem_catchall *catchall; + struct nft_set_elem_catchall *catchall, *next; const struct nft_set *set = gc->set; + struct nft_elem_priv *elem_priv; struct nft_set_ext *ext; - list_for_each_entry_rcu(catchall, &set->catchall_list, list) { + list_for_each_entry_safe(catchall, next, &set->catchall_list, list) { ext = nft_set_elem_ext(set, catchall->elem); if (!nft_set_elem_expired(ext)) @@ -9700,7 +9706,13 @@ dead_elem: if (!gc) return NULL; - nft_trans_gc_elem_add(gc, catchall->elem); + elem_priv = catchall->elem; + if (sync) { + nft_setelem_data_deactivate(gc->net, gc->set, elem_priv); + nft_setelem_catchall_destroy(catchall); + } + + nft_trans_gc_elem_add(gc, elem_priv); } return gc; -- cgit v1.2.3-70-g09d2 From 17cd01e4d1e37e2c8051bbc0ca1ecca4cb001198 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 7 Nov 2023 10:48:04 +0100 Subject: ipvs: add missing module descriptions W=1 builds warn on missing MODULE_DESCRIPTION, add them. Signed-off-by: Florian Westphal Acked-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_core.c | 1 + net/netfilter/ipvs/ip_vs_dh.c | 1 + net/netfilter/ipvs/ip_vs_fo.c | 1 + net/netfilter/ipvs/ip_vs_ftp.c | 1 + net/netfilter/ipvs/ip_vs_lblc.c | 1 + net/netfilter/ipvs/ip_vs_lblcr.c | 1 + net/netfilter/ipvs/ip_vs_lc.c | 1 + net/netfilter/ipvs/ip_vs_nq.c | 1 + net/netfilter/ipvs/ip_vs_ovf.c | 1 + net/netfilter/ipvs/ip_vs_pe_sip.c | 1 + net/netfilter/ipvs/ip_vs_rr.c | 1 + net/netfilter/ipvs/ip_vs_sed.c | 1 + net/netfilter/ipvs/ip_vs_sh.c | 1 + net/netfilter/ipvs/ip_vs_twos.c | 1 + net/netfilter/ipvs/ip_vs_wlc.c | 1 + net/netfilter/ipvs/ip_vs_wrr.c | 1 + 16 files changed, 16 insertions(+) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 3230506ae3ff..a2c16b501087 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -2450,3 +2450,4 @@ static void __exit ip_vs_cleanup(void) module_init(ip_vs_init); module_exit(ip_vs_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("IP Virtual Server"); diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c index 5e6ec32aff2b..75f4c231f4a0 100644 --- a/net/netfilter/ipvs/ip_vs_dh.c +++ b/net/netfilter/ipvs/ip_vs_dh.c @@ -270,3 +270,4 @@ static void __exit ip_vs_dh_cleanup(void) module_init(ip_vs_dh_init); module_exit(ip_vs_dh_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs destination hashing scheduler"); diff --git a/net/netfilter/ipvs/ip_vs_fo.c b/net/netfilter/ipvs/ip_vs_fo.c index b846cc385279..ab117e5bc34e 100644 --- a/net/netfilter/ipvs/ip_vs_fo.c +++ b/net/netfilter/ipvs/ip_vs_fo.c @@ -72,3 +72,4 @@ static void __exit ip_vs_fo_cleanup(void) module_init(ip_vs_fo_init); module_exit(ip_vs_fo_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs weighted failover scheduler"); diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index ef1f45e43b63..f53899d12416 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -635,3 +635,4 @@ static void __exit ip_vs_ftp_exit(void) module_init(ip_vs_ftp_init); module_exit(ip_vs_ftp_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs ftp helper"); diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index cf78ba4ce5ff..8ceec7a2fa8f 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -632,3 +632,4 @@ static void __exit ip_vs_lblc_cleanup(void) module_init(ip_vs_lblc_init); module_exit(ip_vs_lblc_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs locality-based least-connection scheduler"); diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 9eddf118b40e..0fb64707213f 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -817,3 +817,4 @@ static void __exit ip_vs_lblcr_cleanup(void) module_init(ip_vs_lblcr_init); module_exit(ip_vs_lblcr_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs locality-based least-connection with replication scheduler"); diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c index 9d34d81fc6f1..c2764505e380 100644 --- a/net/netfilter/ipvs/ip_vs_lc.c +++ b/net/netfilter/ipvs/ip_vs_lc.c @@ -86,3 +86,4 @@ static void __exit ip_vs_lc_cleanup(void) module_init(ip_vs_lc_init); module_exit(ip_vs_lc_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs least connection scheduler"); diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c index f56862a87518..ed7f5c889b41 100644 --- a/net/netfilter/ipvs/ip_vs_nq.c +++ b/net/netfilter/ipvs/ip_vs_nq.c @@ -136,3 +136,4 @@ static void __exit ip_vs_nq_cleanup(void) module_init(ip_vs_nq_init); module_exit(ip_vs_nq_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs never queue scheduler"); diff --git a/net/netfilter/ipvs/ip_vs_ovf.c b/net/netfilter/ipvs/ip_vs_ovf.c index c03066fdd5ca..c7708b809700 100644 --- a/net/netfilter/ipvs/ip_vs_ovf.c +++ b/net/netfilter/ipvs/ip_vs_ovf.c @@ -79,3 +79,4 @@ static void __exit ip_vs_ovf_cleanup(void) module_init(ip_vs_ovf_init); module_exit(ip_vs_ovf_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs overflow connection scheduler"); diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c index 0ac6705a61d3..e4ce1d9a63f9 100644 --- a/net/netfilter/ipvs/ip_vs_pe_sip.c +++ b/net/netfilter/ipvs/ip_vs_pe_sip.c @@ -185,3 +185,4 @@ static void __exit ip_vs_sip_cleanup(void) module_init(ip_vs_sip_init); module_exit(ip_vs_sip_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs sip helper"); diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c index 38495c6f6c7c..6baa34dff9f0 100644 --- a/net/netfilter/ipvs/ip_vs_rr.c +++ b/net/netfilter/ipvs/ip_vs_rr.c @@ -122,4 +122,5 @@ static void __exit ip_vs_rr_cleanup(void) module_init(ip_vs_rr_init); module_exit(ip_vs_rr_cleanup); +MODULE_DESCRIPTION("ipvs round-robin scheduler"); MODULE_LICENSE("GPL"); diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c index 7663288e5358..a46f99a56618 100644 --- a/net/netfilter/ipvs/ip_vs_sed.c +++ b/net/netfilter/ipvs/ip_vs_sed.c @@ -137,3 +137,4 @@ static void __exit ip_vs_sed_cleanup(void) module_init(ip_vs_sed_init); module_exit(ip_vs_sed_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs shortest expected delay scheduler"); diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index c2028e412092..92e77d7a6b50 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -376,3 +376,4 @@ static void __exit ip_vs_sh_cleanup(void) module_init(ip_vs_sh_init); module_exit(ip_vs_sh_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs source hashing scheduler"); diff --git a/net/netfilter/ipvs/ip_vs_twos.c b/net/netfilter/ipvs/ip_vs_twos.c index 3308e4cc740a..8d5419edde50 100644 --- a/net/netfilter/ipvs/ip_vs_twos.c +++ b/net/netfilter/ipvs/ip_vs_twos.c @@ -137,3 +137,4 @@ static void __exit ip_vs_twos_cleanup(void) module_init(ip_vs_twos_init); module_exit(ip_vs_twos_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs power of twos choice scheduler"); diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c index 09f584b564a0..9fa500927c0a 100644 --- a/net/netfilter/ipvs/ip_vs_wlc.c +++ b/net/netfilter/ipvs/ip_vs_wlc.c @@ -109,3 +109,4 @@ static void __exit ip_vs_wlc_cleanup(void) module_init(ip_vs_wlc_init); module_exit(ip_vs_wlc_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs weighted least connection scheduler"); diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c index 1bc7a0789d85..85ce0d04afac 100644 --- a/net/netfilter/ipvs/ip_vs_wrr.c +++ b/net/netfilter/ipvs/ip_vs_wrr.c @@ -263,3 +263,4 @@ static void __exit ip_vs_wrr_cleanup(void) module_init(ip_vs_wrr_init); module_exit(ip_vs_wrr_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs weighted round-robin scheduler"); -- cgit v1.2.3-70-g09d2 From 7b308feb4fd2d1c06919445c65c8fbf8e9fd1781 Mon Sep 17 00:00:00 2001 From: Maciej Żenczykowski Date: Sun, 5 Nov 2023 11:56:00 -0800 Subject: netfilter: xt_recent: fix (increase) ipv6 literal buffer length MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit in6_pton() supports 'low-32-bit dot-decimal representation' (this is useful with DNS64/NAT64 networks for example): # echo +aaaa:bbbb:cccc:dddd:eeee:ffff:1.2.3.4 > /proc/self/net/xt_recent/DEFAULT # cat /proc/self/net/xt_recent/DEFAULT src=aaaa:bbbb:cccc:dddd:eeee:ffff:0102:0304 ttl: 0 last_seen: 9733848829 oldest_pkt: 1 9733848829 but the provided buffer is too short: # echo +aaaa:bbbb:cccc:dddd:eeee:ffff:255.255.255.255 > /proc/self/net/xt_recent/DEFAULT -bash: echo: write error: Invalid argument Fixes: 079aa88fe717 ("netfilter: xt_recent: IPv6 support") Signed-off-by: Maciej Żenczykowski Reviewed-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_recent.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 7ddb9a78e3fc..ef93e0d3bee0 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -561,7 +561,7 @@ recent_mt_proc_write(struct file *file, const char __user *input, { struct recent_table *t = pde_data(file_inode(file)); struct recent_entry *e; - char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:5afe:c0de")]; + char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:255.255.255.255")]; const char *c = buf; union nf_inet_addr addr = {}; u_int16_t family; -- cgit v1.2.3-70-g09d2 From 80abbe8a8263106fe45a4f293b92b5c74cc9cc8a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 8 Nov 2023 13:18:53 +0100 Subject: netfilter: nat: fix ipv6 nat redirect with mapped and scoped addresses The ipv6 redirect target was derived from the ipv4 one, i.e. its identical to a 'dnat' with the first (primary) address assigned to the network interface. The code has been moved around to make it usable from nf_tables too, but its still the same as it was back when this was added in 2012. IPv6, however, has different types of addresses, if the 'wrong' address comes first the redirection does not work. In Daniels case, the addresses are: inet6 ::ffff:192 ... inet6 2a01: ... ... so the function attempts to redirect to the mapped address. Add more checks before the address is deemed correct: 1. If the packets' daddr is scoped, search for a scoped address too 2. skip tentative addresses 3. skip mapped addresses Use the first address that appears to match our needs. Reported-by: Daniel Huhardeaux Closes: https://lore.kernel.org/netfilter/71be06b8-6aa0-4cf9-9e0b-e2839b01b22f@tootai.net/ Fixes: 115e23ac78f8 ("netfilter: ip6tables: add REDIRECT target") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_nat_redirect.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_nat_redirect.c b/net/netfilter/nf_nat_redirect.c index 6616ba5d0b04..5b37487d9d11 100644 --- a/net/netfilter/nf_nat_redirect.c +++ b/net/netfilter/nf_nat_redirect.c @@ -80,6 +80,26 @@ EXPORT_SYMBOL_GPL(nf_nat_redirect_ipv4); static const struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT; +static bool nf_nat_redirect_ipv6_usable(const struct inet6_ifaddr *ifa, unsigned int scope) +{ + unsigned int ifa_addr_type = ipv6_addr_type(&ifa->addr); + + if (ifa_addr_type & IPV6_ADDR_MAPPED) + return false; + + if ((ifa->flags & IFA_F_TENTATIVE) && (!(ifa->flags & IFA_F_OPTIMISTIC))) + return false; + + if (scope) { + unsigned int ifa_scope = ifa_addr_type & IPV6_ADDR_SCOPE_MASK; + + if (!(scope & ifa_scope)) + return false; + } + + return true; +} + unsigned int nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, unsigned int hooknum) @@ -89,14 +109,19 @@ nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, if (hooknum == NF_INET_LOCAL_OUT) { newdst.in6 = loopback_addr; } else { + unsigned int scope = ipv6_addr_scope(&ipv6_hdr(skb)->daddr); struct inet6_dev *idev; - struct inet6_ifaddr *ifa; bool addr = false; idev = __in6_dev_get(skb->dev); if (idev != NULL) { + const struct inet6_ifaddr *ifa; + read_lock_bh(&idev->lock); list_for_each_entry(ifa, &idev->addr_list, if_list) { + if (!nf_nat_redirect_ipv6_usable(ifa, scope)) + continue; + newdst.in6 = ifa->addr; addr = true; break; -- cgit v1.2.3-70-g09d2 From 9bc64bd0cd765f696fcd40fc98909b1f7c73b2ba Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Fri, 3 Nov 2023 16:14:10 +0100 Subject: net/sched: act_ct: Always fill offloading tuple iifidx Referenced commit doesn't always set iifidx when offloading the flow to hardware. Fix the following cases: - nf_conn_act_ct_ext_fill() is called before extension is created with nf_conn_act_ct_ext_add() in tcf_ct_act(). This can cause rule offload with unspecified iifidx when connection is offloaded after only single original-direction packet has been processed by tc data path. Always fill the new nf_conn_act_ct_ext instance after creating it in nf_conn_act_ct_ext_add(). - Offloading of unidirectional UDP NEW connections is now supported, but ct flow iifidx field is not updated when connection is promoted to bidirectional which can result reply-direction iifidx to be zero when refreshing the connection. Fill in the extension and update flow iifidx before calling flow_offload_refresh(). Fixes: 9795ded7f924 ("net/sched: act_ct: Fill offloading tuple iifidx") Reviewed-by: Paul Blakey Signed-off-by: Vlad Buslov Reviewed-by: Simon Horman Fixes: 6a9bad0069cf ("net/sched: act_ct: offload UDP NEW connections") Link: https://lore.kernel.org/r/20231103151410.764271-1-vladbu@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/netfilter/nf_conntrack_act_ct.h | 30 ++++++++++++++++------------- net/openvswitch/conntrack.c | 2 +- net/sched/act_ct.c | 15 ++++++++++++++- 3 files changed, 32 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_conntrack_act_ct.h b/include/net/netfilter/nf_conntrack_act_ct.h index 078d3c52c03f..e5f2f0b73a9a 100644 --- a/include/net/netfilter/nf_conntrack_act_ct.h +++ b/include/net/netfilter/nf_conntrack_act_ct.h @@ -20,7 +20,22 @@ static inline struct nf_conn_act_ct_ext *nf_conn_act_ct_ext_find(const struct nf #endif } -static inline struct nf_conn_act_ct_ext *nf_conn_act_ct_ext_add(struct nf_conn *ct) +static inline void nf_conn_act_ct_ext_fill(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo) +{ +#if IS_ENABLED(CONFIG_NET_ACT_CT) + struct nf_conn_act_ct_ext *act_ct_ext; + + act_ct_ext = nf_conn_act_ct_ext_find(ct); + if (dev_net(skb->dev) == &init_net && act_ct_ext) + act_ct_ext->ifindex[CTINFO2DIR(ctinfo)] = skb->dev->ifindex; +#endif +} + +static inline struct +nf_conn_act_ct_ext *nf_conn_act_ct_ext_add(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo) { #if IS_ENABLED(CONFIG_NET_ACT_CT) struct nf_conn_act_ct_ext *act_ct = nf_ct_ext_find(ct, NF_CT_EXT_ACT_CT); @@ -29,22 +44,11 @@ static inline struct nf_conn_act_ct_ext *nf_conn_act_ct_ext_add(struct nf_conn * return act_ct; act_ct = nf_ct_ext_add(ct, NF_CT_EXT_ACT_CT, GFP_ATOMIC); + nf_conn_act_ct_ext_fill(skb, ct, ctinfo); return act_ct; #else return NULL; #endif } -static inline void nf_conn_act_ct_ext_fill(struct sk_buff *skb, struct nf_conn *ct, - enum ip_conntrack_info ctinfo) -{ -#if IS_ENABLED(CONFIG_NET_ACT_CT) - struct nf_conn_act_ct_ext *act_ct_ext; - - act_ct_ext = nf_conn_act_ct_ext_find(ct); - if (dev_net(skb->dev) == &init_net && act_ct_ext) - act_ct_ext->ifindex[CTINFO2DIR(ctinfo)] = skb->dev->ifindex; -#endif -} - #endif /* _NF_CONNTRACK_ACT_CT_H */ diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 0b9a785dea45..3019a4406ca4 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -985,7 +985,7 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key, if (err) return err; - nf_conn_act_ct_ext_add(ct); + nf_conn_act_ct_ext_add(skb, ct, ctinfo); } else if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && labels_nonzero(&info->labels.mask)) { err = ovs_ct_set_labels(ct, key, &info->labels.value, diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 9583645e86c2..0db0ecf1d110 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -376,6 +376,17 @@ static void tcf_ct_flow_tc_ifidx(struct flow_offload *entry, entry->tuplehash[dir].tuple.tc.iifidx = act_ct_ext->ifindex[dir]; } +static void tcf_ct_flow_ct_ext_ifidx_update(struct flow_offload *entry) +{ + struct nf_conn_act_ct_ext *act_ct_ext; + + act_ct_ext = nf_conn_act_ct_ext_find(entry->ct); + if (act_ct_ext) { + tcf_ct_flow_tc_ifidx(entry, act_ct_ext, FLOW_OFFLOAD_DIR_ORIGINAL); + tcf_ct_flow_tc_ifidx(entry, act_ct_ext, FLOW_OFFLOAD_DIR_REPLY); + } +} + static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft, struct nf_conn *ct, bool tcp, bool bidirectional) @@ -671,6 +682,8 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p, else ctinfo = IP_CT_ESTABLISHED_REPLY; + nf_conn_act_ct_ext_fill(skb, ct, ctinfo); + tcf_ct_flow_ct_ext_ifidx_update(flow); flow_offload_refresh(nf_ft, flow, force_refresh); if (!test_bit(IPS_ASSURED_BIT, &ct->status)) { /* Process this flow in SW to allow promoting to ASSURED */ @@ -1034,7 +1047,7 @@ do_nat: tcf_ct_act_set_labels(ct, p->labels, p->labels_mask); if (!nf_ct_is_confirmed(ct)) - nf_conn_act_ct_ext_add(ct); + nf_conn_act_ct_ext_add(skb, ct, ctinfo); /* This will take care of sending queued events * even if the connection is already confirmed. -- cgit v1.2.3-70-g09d2 From 31356547e3316829f15a98ecf9a2096cf3e228d2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 7 Nov 2023 18:03:05 -0800 Subject: net: kcm: fill in MODULE_DESCRIPTION() W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Link: https://lore.kernel.org/r/20231108020305.537293-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/kcm/kcmsock.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index dd1d8ffd5f59..65d1f6755f98 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1946,4 +1946,5 @@ module_init(kcm_init); module_exit(kcm_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("KCM (Kernel Connection Multiplexor) sockets"); MODULE_ALIAS_NETPROTO(PF_KCM); -- cgit v1.2.3-70-g09d2 From f1a3b283f852c613fae004f87bbbacc8cef5a061 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 7 Nov 2023 16:04:40 +0000 Subject: net_sched: sch_fq: better validate TCA_FQ_WEIGHTS and TCA_FQ_PRIOMAP syzbot was able to trigger the following report while providing too small TCA_FQ_WEIGHTS attribute [1] Fix is to use NLA_POLICY_EXACT_LEN() to ensure user space provided correct sizes. Apply the same fix to TCA_FQ_PRIOMAP. [1] BUG: KMSAN: uninit-value in fq_load_weights net/sched/sch_fq.c:960 [inline] BUG: KMSAN: uninit-value in fq_change+0x1348/0x2fe0 net/sched/sch_fq.c:1071 fq_load_weights net/sched/sch_fq.c:960 [inline] fq_change+0x1348/0x2fe0 net/sched/sch_fq.c:1071 fq_init+0x68e/0x780 net/sched/sch_fq.c:1159 qdisc_create+0x12f3/0x1be0 net/sched/sch_api.c:1326 tc_modify_qdisc+0x11ef/0x2c20 rtnetlink_rcv_msg+0x16a6/0x1840 net/core/rtnetlink.c:6558 netlink_rcv_skb+0x371/0x650 net/netlink/af_netlink.c:2545 rtnetlink_rcv+0x34/0x40 net/core/rtnetlink.c:6576 netlink_unicast_kernel net/netlink/af_netlink.c:1342 [inline] netlink_unicast+0xf47/0x1250 net/netlink/af_netlink.c:1368 netlink_sendmsg+0x1238/0x13d0 net/netlink/af_netlink.c:1910 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] ____sys_sendmsg+0x9c2/0xd60 net/socket.c:2588 ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2642 __sys_sendmsg net/socket.c:2671 [inline] __do_sys_sendmsg net/socket.c:2680 [inline] __se_sys_sendmsg net/socket.c:2678 [inline] __x64_sys_sendmsg+0x307/0x490 net/socket.c:2678 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x44/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b Uninit was created at: slab_post_alloc_hook+0x129/0xa70 mm/slab.h:768 slab_alloc_node mm/slub.c:3478 [inline] kmem_cache_alloc_node+0x5e9/0xb10 mm/slub.c:3523 kmalloc_reserve+0x13d/0x4a0 net/core/skbuff.c:560 __alloc_skb+0x318/0x740 net/core/skbuff.c:651 alloc_skb include/linux/skbuff.h:1286 [inline] netlink_alloc_large_skb net/netlink/af_netlink.c:1214 [inline] netlink_sendmsg+0xb34/0x13d0 net/netlink/af_netlink.c:1885 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] ____sys_sendmsg+0x9c2/0xd60 net/socket.c:2588 ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2642 __sys_sendmsg net/socket.c:2671 [inline] __do_sys_sendmsg net/socket.c:2680 [inline] __se_sys_sendmsg net/socket.c:2678 [inline] __x64_sys_sendmsg+0x307/0x490 net/socket.c:2678 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x44/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b CPU: 1 PID: 5001 Comm: syz-executor300 Not tainted 6.6.0-syzkaller-12401-g8f6f76a6a29f #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/09/2023 Fixes: 29f834aa326e ("net_sched: sch_fq: add 3 bands and WRR scheduling") Fixes: 49e7265fd098 ("net_sched: sch_fq: add TCA_FQ_WEIGHTS attribute") Reported-by: syzbot Signed-off-by: Eric Dumazet Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20231107160440.1992526-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/sched/sch_fq.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 0fd18c344ab5..3a31c47fea9b 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -919,14 +919,8 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = { [TCA_FQ_TIMER_SLACK] = { .type = NLA_U32 }, [TCA_FQ_HORIZON] = { .type = NLA_U32 }, [TCA_FQ_HORIZON_DROP] = { .type = NLA_U8 }, - [TCA_FQ_PRIOMAP] = { - .type = NLA_BINARY, - .len = sizeof(struct tc_prio_qopt), - }, - [TCA_FQ_WEIGHTS] = { - .type = NLA_BINARY, - .len = FQ_BANDS * sizeof(s32), - }, + [TCA_FQ_PRIOMAP] = NLA_POLICY_EXACT_LEN(sizeof(struct tc_prio_qopt)), + [TCA_FQ_WEIGHTS] = NLA_POLICY_EXACT_LEN(FQ_BANDS * sizeof(s32)), }; /* compress a u8 array with all elems <= 3 to an array of 2-bit fields */ -- cgit v1.2.3-70-g09d2