diff options
-rw-r--r-- | Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst | 62 | ||||
-rw-r--r-- | drivers/net/dsa/lantiq_gswip.c | 7 | ||||
-rw-r--r-- | drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c | 71 | ||||
-rw-r--r-- | drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 14 | ||||
-rw-r--r-- | drivers/s390/net/qeth_core.h | 3 | ||||
-rw-r--r-- | drivers/s390/net/qeth_core_main.c | 38 | ||||
-rw-r--r-- | drivers/s390/net/qeth_l2_main.c | 2 | ||||
-rw-r--r-- | drivers/s390/net/qeth_l3_main.c | 4 | ||||
-rw-r--r-- | kernel/bpf/task_iter.c | 1 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 2 | ||||
-rw-r--r-- | net/ipv4/ip_tunnel.c | 11 | ||||
-rw-r--r-- | net/ipv4/nexthop.c | 6 | ||||
-rw-r--r-- | tools/bpf/bpftool/net.c | 1 | ||||
-rw-r--r-- | tools/bpf/resolve_btfids/main.c | 17 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/progs/bprm_opts.c | 2 | ||||
-rwxr-xr-x | tools/testing/selftests/net/fib_nexthops.sh | 2 | ||||
-rw-r--r-- | tools/testing/selftests/netfilter/Makefile | 3 | ||||
-rwxr-xr-x | tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh | 206 |
18 files changed, 338 insertions, 114 deletions
diff --git a/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst b/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst index d3fcf536d14e..61e850460e18 100644 --- a/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst +++ b/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst @@ -164,46 +164,56 @@ Devlink health reporters NPA Reporters ------------- -The NPA reporters are responsible for reporting and recovering the following group of errors +The NPA reporters are responsible for reporting and recovering the following group of errors: + 1. GENERAL events + - Error due to operation of unmapped PF. - Error due to disabled alloc/free for other HW blocks (NIX, SSO, TIM, DPI and AURA). + 2. ERROR events + - Fault due to NPA_AQ_INST_S read or NPA_AQ_RES_S write. - AQ Doorbell Error. + 3. RAS events + - RAS Error Reporting for NPA_AQ_INST_S/NPA_AQ_RES_S. + 4. RVU events + - Error due to unmapped slot. -Sample Output -------------- -~# devlink health -pci/0002:01:00.0: - reporter hw_npa_intr - state healthy error 2872 recover 2872 last_dump_date 2020-12-10 last_dump_time 09:39:09 grace_period 0 auto_recover true auto_dump true - reporter hw_npa_gen - state healthy error 2872 recover 2872 last_dump_date 2020-12-11 last_dump_time 04:43:04 grace_period 0 auto_recover true auto_dump true - reporter hw_npa_err - state healthy error 2871 recover 2871 last_dump_date 2020-12-10 last_dump_time 09:39:17 grace_period 0 auto_recover true auto_dump true - reporter hw_npa_ras - state healthy error 0 recover 0 last_dump_date 2020-12-10 last_dump_time 09:32:40 grace_period 0 auto_recover true auto_dump true +Sample Output:: + + ~# devlink health + pci/0002:01:00.0: + reporter hw_npa_intr + state healthy error 2872 recover 2872 last_dump_date 2020-12-10 last_dump_time 09:39:09 grace_period 0 auto_recover true auto_dump true + reporter hw_npa_gen + state healthy error 2872 recover 2872 last_dump_date 2020-12-11 last_dump_time 04:43:04 grace_period 0 auto_recover true auto_dump true + reporter hw_npa_err + state healthy error 2871 recover 2871 last_dump_date 2020-12-10 last_dump_time 09:39:17 grace_period 0 auto_recover true auto_dump true + reporter hw_npa_ras + state healthy error 0 recover 0 last_dump_date 2020-12-10 last_dump_time 09:32:40 grace_period 0 auto_recover true auto_dump true Each reporter dumps the + - Error Type - Error Register value - Reason in words -For eg: -~# devlink health dump show pci/0002:01:00.0 reporter hw_npa_gen - NPA_AF_GENERAL: - NPA General Interrupt Reg : 1 - NIX0: free disabled RX -~# devlink health dump show pci/0002:01:00.0 reporter hw_npa_intr - NPA_AF_RVU: - NPA RVU Interrupt Reg : 1 - Unmap Slot Error -~# devlink health dump show pci/0002:01:00.0 reporter hw_npa_err - NPA_AF_ERR: - NPA Error Interrupt Reg : 4096 - AQ Doorbell Error +For example:: + + ~# devlink health dump show pci/0002:01:00.0 reporter hw_npa_gen + NPA_AF_GENERAL: + NPA General Interrupt Reg : 1 + NIX0: free disabled RX + ~# devlink health dump show pci/0002:01:00.0 reporter hw_npa_intr + NPA_AF_RVU: + NPA RVU Interrupt Reg : 1 + Unmap Slot Error + ~# devlink health dump show pci/0002:01:00.0 reporter hw_npa_err + NPA_AF_ERR: + NPA Error Interrupt Reg : 4096 + AQ Doorbell Error diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c index 4b36d89bec06..662e68a0e7e6 100644 --- a/drivers/net/dsa/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq_gswip.c @@ -1436,11 +1436,12 @@ static void gswip_phylink_validate(struct dsa_switch *ds, int port, phylink_set(mask, Pause); phylink_set(mask, Asym_Pause); - /* With the exclusion of MII and Reverse MII, we support Gigabit, - * including Half duplex + /* With the exclusion of MII, Reverse MII and Reduced MII, we + * support Gigabit, including Half duplex */ if (state->interface != PHY_INTERFACE_MODE_MII && - state->interface != PHY_INTERFACE_MODE_REVMII) { + state->interface != PHY_INTERFACE_MODE_REVMII && + state->interface != PHY_INTERFACE_MODE_RMII) { phylink_set(mask, 1000baseT_Full); phylink_set(mask, 1000baseT_Half); } diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index a0e0d8a83681..51dd030b3b36 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -621,7 +621,7 @@ static void chtls_reset_synq(struct listen_ctx *listen_ctx) while (!skb_queue_empty(&listen_ctx->synq)) { struct chtls_sock *csk = - container_of((struct synq *)__skb_dequeue + container_of((struct synq *)skb_peek (&listen_ctx->synq), struct chtls_sock, synq); struct sock *child = csk->sk; @@ -1109,6 +1109,7 @@ static struct sock *chtls_recv_sock(struct sock *lsk, const struct cpl_pass_accept_req *req, struct chtls_dev *cdev) { + struct adapter *adap = pci_get_drvdata(cdev->pdev); struct neighbour *n = NULL; struct inet_sock *newinet; const struct iphdr *iph; @@ -1118,9 +1119,10 @@ static struct sock *chtls_recv_sock(struct sock *lsk, struct dst_entry *dst; struct tcp_sock *tp; struct sock *newsk; + bool found = false; u16 port_id; int rxq_idx; - int step; + int step, i; iph = (const struct iphdr *)network_hdr; newsk = tcp_create_openreq_child(lsk, oreq, cdev->askb); @@ -1152,7 +1154,7 @@ static struct sock *chtls_recv_sock(struct sock *lsk, n = dst_neigh_lookup(dst, &ip6h->saddr); #endif } - if (!n) + if (!n || !n->dev) goto free_sk; ndev = n->dev; @@ -1161,6 +1163,13 @@ static struct sock *chtls_recv_sock(struct sock *lsk, if (is_vlan_dev(ndev)) ndev = vlan_dev_real_dev(ndev); + for_each_port(adap, i) + if (cdev->ports[i] == ndev) + found = true; + + if (!found) + goto free_dst; + port_id = cxgb4_port_idx(ndev); csk = chtls_sock_create(cdev); @@ -1238,6 +1247,7 @@ static struct sock *chtls_recv_sock(struct sock *lsk, free_csk: chtls_sock_release(&csk->kref); free_dst: + neigh_release(n); dst_release(dst); free_sk: inet_csk_prepare_forced_close(newsk); @@ -1387,7 +1397,7 @@ static void chtls_pass_accept_request(struct sock *sk, newsk = chtls_recv_sock(sk, oreq, network_hdr, req, cdev); if (!newsk) - goto free_oreq; + goto reject; if (chtls_get_module(newsk)) goto reject; @@ -1403,8 +1413,6 @@ static void chtls_pass_accept_request(struct sock *sk, kfree_skb(skb); return; -free_oreq: - chtls_reqsk_free(oreq); reject: mk_tid_release(reply_skb, 0, tid); cxgb4_ofld_send(cdev->lldi->ports[0], reply_skb); @@ -1589,6 +1597,11 @@ static int chtls_pass_establish(struct chtls_dev *cdev, struct sk_buff *skb) sk_wake_async(sk, 0, POLL_OUT); data = lookup_stid(cdev->tids, stid); + if (!data) { + /* listening server close */ + kfree_skb(skb); + goto unlock; + } lsk = ((struct listen_ctx *)data)->lsk; bh_lock_sock(lsk); @@ -1997,39 +2010,6 @@ static void t4_defer_reply(struct sk_buff *skb, struct chtls_dev *cdev, spin_unlock_bh(&cdev->deferq.lock); } -static void send_abort_rpl(struct sock *sk, struct sk_buff *skb, - struct chtls_dev *cdev, int status, int queue) -{ - struct cpl_abort_req_rss *req = cplhdr(skb); - struct sk_buff *reply_skb; - struct chtls_sock *csk; - - csk = rcu_dereference_sk_user_data(sk); - - reply_skb = alloc_skb(sizeof(struct cpl_abort_rpl), - GFP_KERNEL); - - if (!reply_skb) { - req->status = (queue << 1); - t4_defer_reply(skb, cdev, send_defer_abort_rpl); - return; - } - - set_abort_rpl_wr(reply_skb, GET_TID(req), status); - kfree_skb(skb); - - set_wr_txq(reply_skb, CPL_PRIORITY_DATA, queue); - if (csk_conn_inline(csk)) { - struct l2t_entry *e = csk->l2t_entry; - - if (e && sk->sk_state != TCP_SYN_RECV) { - cxgb4_l2t_send(csk->egress_dev, reply_skb, e); - return; - } - } - cxgb4_ofld_send(cdev->lldi->ports[0], reply_skb); -} - static void chtls_send_abort_rpl(struct sock *sk, struct sk_buff *skb, struct chtls_dev *cdev, int status, int queue) @@ -2078,9 +2058,9 @@ static void bl_abort_syn_rcv(struct sock *lsk, struct sk_buff *skb) queue = csk->txq_idx; skb->sk = NULL; + chtls_send_abort_rpl(child, skb, BLOG_SKB_CB(skb)->cdev, + CPL_ABORT_NO_RST, queue); do_abort_syn_rcv(child, lsk); - send_abort_rpl(child, skb, BLOG_SKB_CB(skb)->cdev, - CPL_ABORT_NO_RST, queue); } static int abort_syn_rcv(struct sock *sk, struct sk_buff *skb) @@ -2110,8 +2090,8 @@ static int abort_syn_rcv(struct sock *sk, struct sk_buff *skb) if (!sock_owned_by_user(psk)) { int queue = csk->txq_idx; + chtls_send_abort_rpl(sk, skb, cdev, CPL_ABORT_NO_RST, queue); do_abort_syn_rcv(sk, psk); - send_abort_rpl(sk, skb, cdev, CPL_ABORT_NO_RST, queue); } else { skb->sk = sk; BLOG_SKB_CB(skb)->backlog_rcv = bl_abort_syn_rcv; @@ -2129,9 +2109,6 @@ static void chtls_abort_req_rss(struct sock *sk, struct sk_buff *skb) int queue = csk->txq_idx; if (is_neg_adv(req->status)) { - if (sk->sk_state == TCP_SYN_RECV) - chtls_set_tcb_tflag(sk, 0, 0); - kfree_skb(skb); return; } @@ -2158,12 +2135,12 @@ static void chtls_abort_req_rss(struct sock *sk, struct sk_buff *skb) if (sk->sk_state == TCP_SYN_RECV && !abort_syn_rcv(sk, skb)) return; - chtls_release_resources(sk); - chtls_conn_done(sk); } chtls_send_abort_rpl(sk, skb, BLOG_SKB_CB(skb)->cdev, rst_status, queue); + chtls_release_resources(sk); + chtls_conn_done(sk); } static void chtls_abort_rpl_rss(struct sock *sk, struct sk_buff *skb) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index 7d0f96290943..1a8f5a039d50 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -871,8 +871,10 @@ static int cgx_lmac_init(struct cgx *cgx) if (!lmac) return -ENOMEM; lmac->name = kcalloc(1, sizeof("cgx_fwi_xxx_yyy"), GFP_KERNEL); - if (!lmac->name) - return -ENOMEM; + if (!lmac->name) { + err = -ENOMEM; + goto err_lmac_free; + } sprintf(lmac->name, "cgx_fwi_%d_%d", cgx->cgx_id, i); lmac->lmac_id = i; lmac->cgx = cgx; @@ -883,7 +885,7 @@ static int cgx_lmac_init(struct cgx *cgx) CGX_LMAC_FWI + i * 9), cgx_fwi_event_handler, 0, lmac->name, lmac); if (err) - return err; + goto err_irq; /* Enable interrupt */ cgx_write(cgx, lmac->lmac_id, CGXX_CMRX_INT_ENA_W1S, @@ -895,6 +897,12 @@ static int cgx_lmac_init(struct cgx *cgx) } return cgx_lmac_verify_fwi_version(cgx); + +err_irq: + kfree(lmac->name); +err_lmac_free: + kfree(lmac); + return err; } static int cgx_lmac_exit(struct cgx *cgx) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 6f5ddc3eab8c..28f637042d44 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -1079,7 +1079,8 @@ struct qeth_card *qeth_get_card_by_busid(char *bus_id); void qeth_set_allowed_threads(struct qeth_card *card, unsigned long threads, int clear_start_mask); int qeth_threads_running(struct qeth_card *, unsigned long); -int qeth_set_offline(struct qeth_card *card, bool resetting); +int qeth_set_offline(struct qeth_card *card, const struct qeth_discipline *disc, + bool resetting); int qeth_send_ipa_cmd(struct qeth_card *, struct qeth_cmd_buffer *, int (*reply_cb) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index f4b60294a969..cf18d87da41e 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -5507,12 +5507,12 @@ out: return rc; } -static int qeth_set_online(struct qeth_card *card) +static int qeth_set_online(struct qeth_card *card, + const struct qeth_discipline *disc) { bool carrier_ok; int rc; - mutex_lock(&card->discipline_mutex); mutex_lock(&card->conf_mutex); QETH_CARD_TEXT(card, 2, "setonlin"); @@ -5529,7 +5529,7 @@ static int qeth_set_online(struct qeth_card *card) /* no need for locking / error handling at this early stage: */ qeth_set_real_num_tx_queues(card, qeth_tx_actual_queues(card)); - rc = card->discipline->set_online(card, carrier_ok); + rc = disc->set_online(card, carrier_ok); if (rc) goto err_online; @@ -5537,7 +5537,6 @@ static int qeth_set_online(struct qeth_card *card) kobject_uevent(&card->gdev->dev.kobj, KOBJ_CHANGE); mutex_unlock(&card->conf_mutex); - mutex_unlock(&card->discipline_mutex); return 0; err_online: @@ -5552,15 +5551,14 @@ err_hardsetup: qdio_free(CARD_DDEV(card)); mutex_unlock(&card->conf_mutex); - mutex_unlock(&card->discipline_mutex); return rc; } -int qeth_set_offline(struct qeth_card *card, bool resetting) +int qeth_set_offline(struct qeth_card *card, const struct qeth_discipline *disc, + bool resetting) { int rc, rc2, rc3; - mutex_lock(&card->discipline_mutex); mutex_lock(&card->conf_mutex); QETH_CARD_TEXT(card, 3, "setoffl"); @@ -5581,7 +5579,7 @@ int qeth_set_offline(struct qeth_card *card, bool resetting) cancel_work_sync(&card->rx_mode_work); - card->discipline->set_offline(card); + disc->set_offline(card); qeth_qdio_clear_card(card, 0); qeth_drain_output_queues(card); @@ -5602,16 +5600,19 @@ int qeth_set_offline(struct qeth_card *card, bool resetting) kobject_uevent(&card->gdev->dev.kobj, KOBJ_CHANGE); mutex_unlock(&card->conf_mutex); - mutex_unlock(&card->discipline_mutex); return 0; } EXPORT_SYMBOL_GPL(qeth_set_offline); static int qeth_do_reset(void *data) { + const struct qeth_discipline *disc; struct qeth_card *card = data; int rc; + /* Lock-free, other users will block until we are done. */ + disc = card->discipline; + QETH_CARD_TEXT(card, 2, "recover1"); if (!qeth_do_run_thread(card, QETH_RECOVER_THREAD)) return 0; @@ -5619,8 +5620,8 @@ static int qeth_do_reset(void *data) dev_warn(&card->gdev->dev, "A recovery process has been started for the device\n"); - qeth_set_offline(card, true); - rc = qeth_set_online(card); + qeth_set_offline(card, disc, true); + rc = qeth_set_online(card, disc); if (!rc) { dev_info(&card->gdev->dev, "Device successfully recovered!\n"); @@ -6584,6 +6585,7 @@ static int qeth_core_probe_device(struct ccwgroup_device *gdev) break; default: card->info.layer_enforced = true; + /* It's so early that we don't need the discipline_mutex yet. */ rc = qeth_core_load_discipline(card, enforced_disc); if (rc) goto err_load; @@ -6616,10 +6618,12 @@ static void qeth_core_remove_device(struct ccwgroup_device *gdev) QETH_CARD_TEXT(card, 2, "removedv"); + mutex_lock(&card->discipline_mutex); if (card->discipline) { card->discipline->remove(gdev); qeth_core_free_discipline(card); } + mutex_unlock(&card->discipline_mutex); qeth_free_qdio_queues(card); @@ -6634,6 +6638,7 @@ static int qeth_core_set_online(struct ccwgroup_device *gdev) int rc = 0; enum qeth_discipline_id def_discipline; + mutex_lock(&card->discipline_mutex); if (!card->discipline) { def_discipline = IS_IQD(card) ? QETH_DISCIPLINE_LAYER3 : QETH_DISCIPLINE_LAYER2; @@ -6647,16 +6652,23 @@ static int qeth_core_set_online(struct ccwgroup_device *gdev) } } - rc = qeth_set_online(card); + rc = qeth_set_online(card, card->discipline); + err: + mutex_unlock(&card->discipline_mutex); return rc; } static int qeth_core_set_offline(struct ccwgroup_device *gdev) { struct qeth_card *card = dev_get_drvdata(&gdev->dev); + int rc; - return qeth_set_offline(card, false); + mutex_lock(&card->discipline_mutex); + rc = qeth_set_offline(card, card->discipline, false); + mutex_unlock(&card->discipline_mutex); + + return rc; } static void qeth_core_shutdown(struct ccwgroup_device *gdev) diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 4ed0fb0705a5..4254caf1d9b6 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -2208,7 +2208,7 @@ static void qeth_l2_remove_device(struct ccwgroup_device *gdev) wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0); if (gdev->state == CCWGROUP_ONLINE) - qeth_set_offline(card, false); + qeth_set_offline(card, card->discipline, false); cancel_work_sync(&card->close_dev_work); if (card->dev->reg_state == NETREG_REGISTERED) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index d138ac432d01..4c2cae7ae9a7 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1813,7 +1813,7 @@ static netdev_features_t qeth_l3_osa_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features) { - if (qeth_get_ip_version(skb) != 4) + if (vlan_get_protocol(skb) != htons(ETH_P_IP)) features &= ~NETIF_F_HW_VLAN_CTAG_TX; return qeth_features_check(skb, dev, features); } @@ -1971,7 +1971,7 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev) wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0); if (cgdev->state == CCWGROUP_ONLINE) - qeth_set_offline(card, false); + qeth_set_offline(card, card->discipline, false); cancel_work_sync(&card->close_dev_work); if (card->dev->reg_state == NETREG_REGISTERED) diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index 3efe38191d1c..175b7b42bfc4 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -159,6 +159,7 @@ again: } /* set info->task and info->tid */ + info->task = curr_task; if (curr_tid == info->tid) { curr_fd = info->fd; } else { diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 89fff5f59eea..2ed0b01f72f0 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -302,7 +302,7 @@ static int __ip_finish_output(struct net *net, struct sock *sk, struct sk_buff * if (skb_is_gso(skb)) return ip_finish_output_gso(net, sk, skb, mtu); - if (skb->len > mtu || (IPCB(skb)->flags & IPSKB_FRAG_PMTU)) + if (skb->len > mtu || IPCB(skb)->frag_max_size) return ip_fragment(net, sk, skb, mtu, ip_finish_output2); return ip_finish_output2(net, sk, skb); diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index ee65c9225178..64594aa755f0 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -759,8 +759,11 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, goto tx_error; } - if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph, - 0, 0, false)) { + df = tnl_params->frag_off; + if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df) + df |= (inner_iph->frag_off & htons(IP_DF)); + + if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, 0, 0, false)) { ip_rt_put(rt); goto tx_error; } @@ -788,10 +791,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, ttl = ip4_dst_hoplimit(&rt->dst); } - df = tnl_params->frag_off; - if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df) - df |= (inner_iph->frag_off&htons(IP_DF)); - max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) + rt->dst.header_len + ip_encap_hlen(&tunnel->encap); if (max_headroom > dev->needed_headroom) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 5e1b22d4f939..e53e43aef785 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -627,7 +627,7 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[], for (i = NHA_GROUP_TYPE + 1; i < __NHA_MAX; ++i) { if (!tb[i]) continue; - if (tb[NHA_FDB]) + if (i == NHA_FDB) continue; NL_SET_ERR_MSG(extack, "No other attributes can be set in nexthop groups"); @@ -1459,8 +1459,10 @@ static struct nexthop *nexthop_create_group(struct net *net, return nh; out_no_nh: - for (; i >= 0; --i) + for (i--; i >= 0; --i) { + list_del(&nhg->nh_entries[i].nh_list); nexthop_put(nhg->nh_entries[i].nh); + } kfree(nhg->spare); kfree(nhg); diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c index 3fae61ef6339..ff3aa0cf3997 100644 --- a/tools/bpf/bpftool/net.c +++ b/tools/bpf/bpftool/net.c @@ -11,7 +11,6 @@ #include <bpf/bpf.h> #include <bpf/libbpf.h> #include <net/if.h> -#include <linux/if.h> #include <linux/rtnetlink.h> #include <linux/socket.h> #include <linux/tc_act/tc_bpf.h> diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index e3ea569ee125..7409d7860aa6 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -139,6 +139,8 @@ int eprintf(int level, int var, const char *fmt, ...) #define pr_debug2(fmt, ...) pr_debugN(2, pr_fmt(fmt), ##__VA_ARGS__) #define pr_err(fmt, ...) \ eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__) +#define pr_info(fmt, ...) \ + eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__) static bool is_btf_id(const char *name) { @@ -472,7 +474,7 @@ static int symbols_resolve(struct object *obj) int nr_funcs = obj->nr_funcs; int err, type_id; struct btf *btf; - __u32 nr; + __u32 nr_types; btf = btf__parse(obj->btf ?: obj->path, NULL); err = libbpf_get_error(btf); @@ -483,12 +485,12 @@ static int symbols_resolve(struct object *obj) } err = -1; - nr = btf__get_nr_types(btf); + nr_types = btf__get_nr_types(btf); /* * Iterate all the BTF types and search for collected symbol IDs. */ - for (type_id = 1; type_id <= nr; type_id++) { + for (type_id = 1; type_id <= nr_types; type_id++) { const struct btf_type *type; struct rb_root *root; struct btf_id *id; @@ -526,8 +528,13 @@ static int symbols_resolve(struct object *obj) id = btf_id__find(root, str); if (id) { - id->id = type_id; - (*nr)--; + if (id->id) { + pr_info("WARN: multiple IDs found for '%s': %d, %d - using %d\n", + str, id->id, type_id, id->id); + } else { + id->id = type_id; + (*nr)--; + } } } diff --git a/tools/testing/selftests/bpf/progs/bprm_opts.c b/tools/testing/selftests/bpf/progs/bprm_opts.c index 5bfef2887e70..418d9c6d4952 100644 --- a/tools/testing/selftests/bpf/progs/bprm_opts.c +++ b/tools/testing/selftests/bpf/progs/bprm_opts.c @@ -4,7 +4,7 @@ * Copyright 2020 Google LLC. */ -#include "vmlinux.h" +#include <linux/bpf.h> #include <errno.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index eb693a3b7b4a..4c7d33618437 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -869,7 +869,7 @@ ipv6_torture() pid3=$! ip netns exec me ping -f 2001:db8:101::2 >/dev/null 2>&1 & pid4=$! - ip netns exec me mausezahn veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & + ip netns exec me mausezahn -6 veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & pid5=$! sleep 300 diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index a374e10ef506..3006a8e5b41a 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -4,7 +4,8 @@ TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \ conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \ nft_concat_range.sh nft_conntrack_helper.sh \ - nft_queue.sh nft_meta.sh + nft_queue.sh nft_meta.sh \ + ipip-conntrack-mtu.sh LDLIBS = -lmnl TEST_GEN_FILES = nf-queue diff --git a/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh b/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh new file mode 100755 index 000000000000..4a6f5c3b3215 --- /dev/null +++ b/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh @@ -0,0 +1,206 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +# Conntrack needs to reassemble fragments in order to have complete +# packets for rule matching. Reassembly can lead to packet loss. + +# Consider the following setup: +# +--------+ +---------+ +--------+ +# |Router A|-------|Wanrouter|-------|Router B| +# | |.IPIP..| |..IPIP.| | +# +--------+ +---------+ +--------+ +# / mtu 1400 \ +# / \ +#+--------+ +--------+ +#|Client A| |Client B| +#| | | | +#+--------+ +--------+ + +# Router A and Router B use IPIP tunnel interfaces to tunnel traffic +# between Client A and Client B over WAN. Wanrouter has MTU 1400 set +# on its interfaces. + +rnd=$(mktemp -u XXXXXXXX) +rx=$(mktemp) + +r_a="ns-ra-$rnd" +r_b="ns-rb-$rnd" +r_w="ns-rw-$rnd" +c_a="ns-ca-$rnd" +c_b="ns-cb-$rnd" + +checktool (){ + if ! $1 > /dev/null 2>&1; then + echo "SKIP: Could not $2" + exit $ksft_skip + fi +} + +checktool "iptables --version" "run test without iptables" +checktool "ip -Version" "run test without ip tool" +checktool "which nc" "run test without nc (netcat)" +checktool "ip netns add ${r_a}" "create net namespace" + +for n in ${r_b} ${r_w} ${c_a} ${c_b};do + ip netns add ${n} +done + +cleanup() { + for n in ${r_a} ${r_b} ${r_w} ${c_a} ${c_b};do + ip netns del ${n} + done + rm -f ${rx} +} + +trap cleanup EXIT + +test_path() { + msg="$1" + + ip netns exec ${c_b} nc -n -w 3 -q 3 -u -l -p 5000 > ${rx} < /dev/null & + + sleep 1 + for i in 1 2 3; do + head -c1400 /dev/zero | tr "\000" "a" | ip netns exec ${c_a} nc -n -w 1 -u 192.168.20.2 5000 + done + + wait + + bytes=$(wc -c < ${rx}) + + if [ $bytes -eq 1400 ];then + echo "OK: PMTU $msg connection tracking" + else + echo "FAIL: PMTU $msg connection tracking: got $bytes, expected 1400" + exit 1 + fi +} + +# Detailed setup for Router A +# --------------------------- +# Interfaces: +# eth0: 10.2.2.1/24 +# eth1: 192.168.10.1/24 +# ipip0: No IP address, local 10.2.2.1 remote 10.4.4.1 +# Routes: +# 192.168.20.0/24 dev ipip0 (192.168.20.0/24 is subnet of Client B) +# 10.4.4.1 via 10.2.2.254 (Router B via Wanrouter) +# No iptables rules at all. + +ip link add veth0 netns ${r_a} type veth peer name veth0 netns ${r_w} +ip link add veth1 netns ${r_a} type veth peer name veth0 netns ${c_a} + +l_addr="10.2.2.1" +r_addr="10.4.4.1" +ip netns exec ${r_a} ip link add ipip0 type ipip local ${l_addr} remote ${r_addr} mode ipip || exit $ksft_skip + +for dev in lo veth0 veth1 ipip0; do + ip -net ${r_a} link set $dev up +done + +ip -net ${r_a} addr add 10.2.2.1/24 dev veth0 +ip -net ${r_a} addr add 192.168.10.1/24 dev veth1 + +ip -net ${r_a} route add 192.168.20.0/24 dev ipip0 +ip -net ${r_a} route add 10.4.4.0/24 via 10.2.2.254 + +ip netns exec ${r_a} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null + +# Detailed setup for Router B +# --------------------------- +# Interfaces: +# eth0: 10.4.4.1/24 +# eth1: 192.168.20.1/24 +# ipip0: No IP address, local 10.4.4.1 remote 10.2.2.1 +# Routes: +# 192.168.10.0/24 dev ipip0 (192.168.10.0/24 is subnet of Client A) +# 10.2.2.1 via 10.4.4.254 (Router A via Wanrouter) +# No iptables rules at all. + +ip link add veth0 netns ${r_b} type veth peer name veth1 netns ${r_w} +ip link add veth1 netns ${r_b} type veth peer name veth0 netns ${c_b} + +l_addr="10.4.4.1" +r_addr="10.2.2.1" + +ip netns exec ${r_b} ip link add ipip0 type ipip local ${l_addr} remote ${r_addr} mode ipip || exit $ksft_skip + +for dev in lo veth0 veth1 ipip0; do + ip -net ${r_b} link set $dev up +done + +ip -net ${r_b} addr add 10.4.4.1/24 dev veth0 +ip -net ${r_b} addr add 192.168.20.1/24 dev veth1 + +ip -net ${r_b} route add 192.168.10.0/24 dev ipip0 +ip -net ${r_b} route add 10.2.2.0/24 via 10.4.4.254 +ip netns exec ${r_b} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null + +# Client A +ip -net ${c_a} addr add 192.168.10.2/24 dev veth0 +ip -net ${c_a} link set dev lo up +ip -net ${c_a} link set dev veth0 up +ip -net ${c_a} route add default via 192.168.10.1 + +# Client A +ip -net ${c_b} addr add 192.168.20.2/24 dev veth0 +ip -net ${c_b} link set dev veth0 up +ip -net ${c_b} link set dev lo up +ip -net ${c_b} route add default via 192.168.20.1 + +# Wan +ip -net ${r_w} addr add 10.2.2.254/24 dev veth0 +ip -net ${r_w} addr add 10.4.4.254/24 dev veth1 + +ip -net ${r_w} link set dev lo up +ip -net ${r_w} link set dev veth0 up mtu 1400 +ip -net ${r_w} link set dev veth1 up mtu 1400 + +ip -net ${r_a} link set dev veth0 mtu 1400 +ip -net ${r_b} link set dev veth0 mtu 1400 + +ip netns exec ${r_w} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null + +# Path MTU discovery +# ------------------ +# Running tracepath from Client A to Client B shows PMTU discovery is working +# as expected: +# +# clienta:~# tracepath 192.168.20.2 +# 1?: [LOCALHOST] pmtu 1500 +# 1: 192.168.10.1 0.867ms +# 1: 192.168.10.1 0.302ms +# 2: 192.168.10.1 0.312ms pmtu 1480 +# 2: no reply +# 3: 192.168.10.1 0.510ms pmtu 1380 +# 3: 192.168.20.2 2.320ms reached +# Resume: pmtu 1380 hops 3 back 3 + +# ip netns exec ${c_a} traceroute --mtu 192.168.20.2 + +# Router A has learned PMTU (1400) to Router B from Wanrouter. +# Client A has learned PMTU (1400 - IPIP overhead = 1380) to Client B +# from Router A. + +#Send large UDP packet +#--------------------- +#Now we send a 1400 bytes UDP packet from Client A to Client B: + +# clienta:~# head -c1400 /dev/zero | tr "\000" "a" | nc -u 192.168.20.2 5000 +test_path "without" + +# The IPv4 stack on Client A already knows the PMTU to Client B, so the +# UDP packet is sent as two fragments (1380 + 20). Router A forwards the +# fragments between eth1 and ipip0. The fragments fit into the tunnel and +# reach their destination. + +#When sending the large UDP packet again, Router A now reassembles the +#fragments before routing the packet over ipip0. The resulting IPIP +#packet is too big (1400) for the tunnel PMTU (1380) to Router B, it is +#dropped on Router A before sending. + +ip netns exec ${r_a} iptables -A FORWARD -m conntrack --ctstate NEW +test_path "with" |