summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst62
-rw-r--r--drivers/net/dsa/lantiq_gswip.c7
-rw-r--r--drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c71
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/cgx.c14
-rw-r--r--drivers/s390/net/qeth_core.h3
-rw-r--r--drivers/s390/net/qeth_core_main.c38
-rw-r--r--drivers/s390/net/qeth_l2_main.c2
-rw-r--r--drivers/s390/net/qeth_l3_main.c4
-rw-r--r--kernel/bpf/task_iter.c1
-rw-r--r--net/ipv4/ip_output.c2
-rw-r--r--net/ipv4/ip_tunnel.c11
-rw-r--r--net/ipv4/nexthop.c6
-rw-r--r--tools/bpf/bpftool/net.c1
-rw-r--r--tools/bpf/resolve_btfids/main.c17
-rw-r--r--tools/testing/selftests/bpf/progs/bprm_opts.c2
-rwxr-xr-xtools/testing/selftests/net/fib_nexthops.sh2
-rw-r--r--tools/testing/selftests/netfilter/Makefile3
-rwxr-xr-xtools/testing/selftests/netfilter/ipip-conntrack-mtu.sh206
18 files changed, 338 insertions, 114 deletions
diff --git a/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst b/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst
index d3fcf536d14e..61e850460e18 100644
--- a/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst
+++ b/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst
@@ -164,46 +164,56 @@ Devlink health reporters
NPA Reporters
-------------
-The NPA reporters are responsible for reporting and recovering the following group of errors
+The NPA reporters are responsible for reporting and recovering the following group of errors:
+
1. GENERAL events
+
- Error due to operation of unmapped PF.
- Error due to disabled alloc/free for other HW blocks (NIX, SSO, TIM, DPI and AURA).
+
2. ERROR events
+
- Fault due to NPA_AQ_INST_S read or NPA_AQ_RES_S write.
- AQ Doorbell Error.
+
3. RAS events
+
- RAS Error Reporting for NPA_AQ_INST_S/NPA_AQ_RES_S.
+
4. RVU events
+
- Error due to unmapped slot.
-Sample Output
--------------
-~# devlink health
-pci/0002:01:00.0:
- reporter hw_npa_intr
- state healthy error 2872 recover 2872 last_dump_date 2020-12-10 last_dump_time 09:39:09 grace_period 0 auto_recover true auto_dump true
- reporter hw_npa_gen
- state healthy error 2872 recover 2872 last_dump_date 2020-12-11 last_dump_time 04:43:04 grace_period 0 auto_recover true auto_dump true
- reporter hw_npa_err
- state healthy error 2871 recover 2871 last_dump_date 2020-12-10 last_dump_time 09:39:17 grace_period 0 auto_recover true auto_dump true
- reporter hw_npa_ras
- state healthy error 0 recover 0 last_dump_date 2020-12-10 last_dump_time 09:32:40 grace_period 0 auto_recover true auto_dump true
+Sample Output::
+
+ ~# devlink health
+ pci/0002:01:00.0:
+ reporter hw_npa_intr
+ state healthy error 2872 recover 2872 last_dump_date 2020-12-10 last_dump_time 09:39:09 grace_period 0 auto_recover true auto_dump true
+ reporter hw_npa_gen
+ state healthy error 2872 recover 2872 last_dump_date 2020-12-11 last_dump_time 04:43:04 grace_period 0 auto_recover true auto_dump true
+ reporter hw_npa_err
+ state healthy error 2871 recover 2871 last_dump_date 2020-12-10 last_dump_time 09:39:17 grace_period 0 auto_recover true auto_dump true
+ reporter hw_npa_ras
+ state healthy error 0 recover 0 last_dump_date 2020-12-10 last_dump_time 09:32:40 grace_period 0 auto_recover true auto_dump true
Each reporter dumps the
+
- Error Type
- Error Register value
- Reason in words
-For eg:
-~# devlink health dump show pci/0002:01:00.0 reporter hw_npa_gen
- NPA_AF_GENERAL:
- NPA General Interrupt Reg : 1
- NIX0: free disabled RX
-~# devlink health dump show pci/0002:01:00.0 reporter hw_npa_intr
- NPA_AF_RVU:
- NPA RVU Interrupt Reg : 1
- Unmap Slot Error
-~# devlink health dump show pci/0002:01:00.0 reporter hw_npa_err
- NPA_AF_ERR:
- NPA Error Interrupt Reg : 4096
- AQ Doorbell Error
+For example::
+
+ ~# devlink health dump show pci/0002:01:00.0 reporter hw_npa_gen
+ NPA_AF_GENERAL:
+ NPA General Interrupt Reg : 1
+ NIX0: free disabled RX
+ ~# devlink health dump show pci/0002:01:00.0 reporter hw_npa_intr
+ NPA_AF_RVU:
+ NPA RVU Interrupt Reg : 1
+ Unmap Slot Error
+ ~# devlink health dump show pci/0002:01:00.0 reporter hw_npa_err
+ NPA_AF_ERR:
+ NPA Error Interrupt Reg : 4096
+ AQ Doorbell Error
diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c
index 4b36d89bec06..662e68a0e7e6 100644
--- a/drivers/net/dsa/lantiq_gswip.c
+++ b/drivers/net/dsa/lantiq_gswip.c
@@ -1436,11 +1436,12 @@ static void gswip_phylink_validate(struct dsa_switch *ds, int port,
phylink_set(mask, Pause);
phylink_set(mask, Asym_Pause);
- /* With the exclusion of MII and Reverse MII, we support Gigabit,
- * including Half duplex
+ /* With the exclusion of MII, Reverse MII and Reduced MII, we
+ * support Gigabit, including Half duplex
*/
if (state->interface != PHY_INTERFACE_MODE_MII &&
- state->interface != PHY_INTERFACE_MODE_REVMII) {
+ state->interface != PHY_INTERFACE_MODE_REVMII &&
+ state->interface != PHY_INTERFACE_MODE_RMII) {
phylink_set(mask, 1000baseT_Full);
phylink_set(mask, 1000baseT_Half);
}
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
index a0e0d8a83681..51dd030b3b36 100644
--- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
+++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
@@ -621,7 +621,7 @@ static void chtls_reset_synq(struct listen_ctx *listen_ctx)
while (!skb_queue_empty(&listen_ctx->synq)) {
struct chtls_sock *csk =
- container_of((struct synq *)__skb_dequeue
+ container_of((struct synq *)skb_peek
(&listen_ctx->synq), struct chtls_sock, synq);
struct sock *child = csk->sk;
@@ -1109,6 +1109,7 @@ static struct sock *chtls_recv_sock(struct sock *lsk,
const struct cpl_pass_accept_req *req,
struct chtls_dev *cdev)
{
+ struct adapter *adap = pci_get_drvdata(cdev->pdev);
struct neighbour *n = NULL;
struct inet_sock *newinet;
const struct iphdr *iph;
@@ -1118,9 +1119,10 @@ static struct sock *chtls_recv_sock(struct sock *lsk,
struct dst_entry *dst;
struct tcp_sock *tp;
struct sock *newsk;
+ bool found = false;
u16 port_id;
int rxq_idx;
- int step;
+ int step, i;
iph = (const struct iphdr *)network_hdr;
newsk = tcp_create_openreq_child(lsk, oreq, cdev->askb);
@@ -1152,7 +1154,7 @@ static struct sock *chtls_recv_sock(struct sock *lsk,
n = dst_neigh_lookup(dst, &ip6h->saddr);
#endif
}
- if (!n)
+ if (!n || !n->dev)
goto free_sk;
ndev = n->dev;
@@ -1161,6 +1163,13 @@ static struct sock *chtls_recv_sock(struct sock *lsk,
if (is_vlan_dev(ndev))
ndev = vlan_dev_real_dev(ndev);
+ for_each_port(adap, i)
+ if (cdev->ports[i] == ndev)
+ found = true;
+
+ if (!found)
+ goto free_dst;
+
port_id = cxgb4_port_idx(ndev);
csk = chtls_sock_create(cdev);
@@ -1238,6 +1247,7 @@ static struct sock *chtls_recv_sock(struct sock *lsk,
free_csk:
chtls_sock_release(&csk->kref);
free_dst:
+ neigh_release(n);
dst_release(dst);
free_sk:
inet_csk_prepare_forced_close(newsk);
@@ -1387,7 +1397,7 @@ static void chtls_pass_accept_request(struct sock *sk,
newsk = chtls_recv_sock(sk, oreq, network_hdr, req, cdev);
if (!newsk)
- goto free_oreq;
+ goto reject;
if (chtls_get_module(newsk))
goto reject;
@@ -1403,8 +1413,6 @@ static void chtls_pass_accept_request(struct sock *sk,
kfree_skb(skb);
return;
-free_oreq:
- chtls_reqsk_free(oreq);
reject:
mk_tid_release(reply_skb, 0, tid);
cxgb4_ofld_send(cdev->lldi->ports[0], reply_skb);
@@ -1589,6 +1597,11 @@ static int chtls_pass_establish(struct chtls_dev *cdev, struct sk_buff *skb)
sk_wake_async(sk, 0, POLL_OUT);
data = lookup_stid(cdev->tids, stid);
+ if (!data) {
+ /* listening server close */
+ kfree_skb(skb);
+ goto unlock;
+ }
lsk = ((struct listen_ctx *)data)->lsk;
bh_lock_sock(lsk);
@@ -1997,39 +2010,6 @@ static void t4_defer_reply(struct sk_buff *skb, struct chtls_dev *cdev,
spin_unlock_bh(&cdev->deferq.lock);
}
-static void send_abort_rpl(struct sock *sk, struct sk_buff *skb,
- struct chtls_dev *cdev, int status, int queue)
-{
- struct cpl_abort_req_rss *req = cplhdr(skb);
- struct sk_buff *reply_skb;
- struct chtls_sock *csk;
-
- csk = rcu_dereference_sk_user_data(sk);
-
- reply_skb = alloc_skb(sizeof(struct cpl_abort_rpl),
- GFP_KERNEL);
-
- if (!reply_skb) {
- req->status = (queue << 1);
- t4_defer_reply(skb, cdev, send_defer_abort_rpl);
- return;
- }
-
- set_abort_rpl_wr(reply_skb, GET_TID(req), status);
- kfree_skb(skb);
-
- set_wr_txq(reply_skb, CPL_PRIORITY_DATA, queue);
- if (csk_conn_inline(csk)) {
- struct l2t_entry *e = csk->l2t_entry;
-
- if (e && sk->sk_state != TCP_SYN_RECV) {
- cxgb4_l2t_send(csk->egress_dev, reply_skb, e);
- return;
- }
- }
- cxgb4_ofld_send(cdev->lldi->ports[0], reply_skb);
-}
-
static void chtls_send_abort_rpl(struct sock *sk, struct sk_buff *skb,
struct chtls_dev *cdev,
int status, int queue)
@@ -2078,9 +2058,9 @@ static void bl_abort_syn_rcv(struct sock *lsk, struct sk_buff *skb)
queue = csk->txq_idx;
skb->sk = NULL;
+ chtls_send_abort_rpl(child, skb, BLOG_SKB_CB(skb)->cdev,
+ CPL_ABORT_NO_RST, queue);
do_abort_syn_rcv(child, lsk);
- send_abort_rpl(child, skb, BLOG_SKB_CB(skb)->cdev,
- CPL_ABORT_NO_RST, queue);
}
static int abort_syn_rcv(struct sock *sk, struct sk_buff *skb)
@@ -2110,8 +2090,8 @@ static int abort_syn_rcv(struct sock *sk, struct sk_buff *skb)
if (!sock_owned_by_user(psk)) {
int queue = csk->txq_idx;
+ chtls_send_abort_rpl(sk, skb, cdev, CPL_ABORT_NO_RST, queue);
do_abort_syn_rcv(sk, psk);
- send_abort_rpl(sk, skb, cdev, CPL_ABORT_NO_RST, queue);
} else {
skb->sk = sk;
BLOG_SKB_CB(skb)->backlog_rcv = bl_abort_syn_rcv;
@@ -2129,9 +2109,6 @@ static void chtls_abort_req_rss(struct sock *sk, struct sk_buff *skb)
int queue = csk->txq_idx;
if (is_neg_adv(req->status)) {
- if (sk->sk_state == TCP_SYN_RECV)
- chtls_set_tcb_tflag(sk, 0, 0);
-
kfree_skb(skb);
return;
}
@@ -2158,12 +2135,12 @@ static void chtls_abort_req_rss(struct sock *sk, struct sk_buff *skb)
if (sk->sk_state == TCP_SYN_RECV && !abort_syn_rcv(sk, skb))
return;
- chtls_release_resources(sk);
- chtls_conn_done(sk);
}
chtls_send_abort_rpl(sk, skb, BLOG_SKB_CB(skb)->cdev,
rst_status, queue);
+ chtls_release_resources(sk);
+ chtls_conn_done(sk);
}
static void chtls_abort_rpl_rss(struct sock *sk, struct sk_buff *skb)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
index 7d0f96290943..1a8f5a039d50 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
@@ -871,8 +871,10 @@ static int cgx_lmac_init(struct cgx *cgx)
if (!lmac)
return -ENOMEM;
lmac->name = kcalloc(1, sizeof("cgx_fwi_xxx_yyy"), GFP_KERNEL);
- if (!lmac->name)
- return -ENOMEM;
+ if (!lmac->name) {
+ err = -ENOMEM;
+ goto err_lmac_free;
+ }
sprintf(lmac->name, "cgx_fwi_%d_%d", cgx->cgx_id, i);
lmac->lmac_id = i;
lmac->cgx = cgx;
@@ -883,7 +885,7 @@ static int cgx_lmac_init(struct cgx *cgx)
CGX_LMAC_FWI + i * 9),
cgx_fwi_event_handler, 0, lmac->name, lmac);
if (err)
- return err;
+ goto err_irq;
/* Enable interrupt */
cgx_write(cgx, lmac->lmac_id, CGXX_CMRX_INT_ENA_W1S,
@@ -895,6 +897,12 @@ static int cgx_lmac_init(struct cgx *cgx)
}
return cgx_lmac_verify_fwi_version(cgx);
+
+err_irq:
+ kfree(lmac->name);
+err_lmac_free:
+ kfree(lmac);
+ return err;
}
static int cgx_lmac_exit(struct cgx *cgx)
diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 6f5ddc3eab8c..28f637042d44 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -1079,7 +1079,8 @@ struct qeth_card *qeth_get_card_by_busid(char *bus_id);
void qeth_set_allowed_threads(struct qeth_card *card, unsigned long threads,
int clear_start_mask);
int qeth_threads_running(struct qeth_card *, unsigned long);
-int qeth_set_offline(struct qeth_card *card, bool resetting);
+int qeth_set_offline(struct qeth_card *card, const struct qeth_discipline *disc,
+ bool resetting);
int qeth_send_ipa_cmd(struct qeth_card *, struct qeth_cmd_buffer *,
int (*reply_cb)
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index f4b60294a969..cf18d87da41e 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -5507,12 +5507,12 @@ out:
return rc;
}
-static int qeth_set_online(struct qeth_card *card)
+static int qeth_set_online(struct qeth_card *card,
+ const struct qeth_discipline *disc)
{
bool carrier_ok;
int rc;
- mutex_lock(&card->discipline_mutex);
mutex_lock(&card->conf_mutex);
QETH_CARD_TEXT(card, 2, "setonlin");
@@ -5529,7 +5529,7 @@ static int qeth_set_online(struct qeth_card *card)
/* no need for locking / error handling at this early stage: */
qeth_set_real_num_tx_queues(card, qeth_tx_actual_queues(card));
- rc = card->discipline->set_online(card, carrier_ok);
+ rc = disc->set_online(card, carrier_ok);
if (rc)
goto err_online;
@@ -5537,7 +5537,6 @@ static int qeth_set_online(struct qeth_card *card)
kobject_uevent(&card->gdev->dev.kobj, KOBJ_CHANGE);
mutex_unlock(&card->conf_mutex);
- mutex_unlock(&card->discipline_mutex);
return 0;
err_online:
@@ -5552,15 +5551,14 @@ err_hardsetup:
qdio_free(CARD_DDEV(card));
mutex_unlock(&card->conf_mutex);
- mutex_unlock(&card->discipline_mutex);
return rc;
}
-int qeth_set_offline(struct qeth_card *card, bool resetting)
+int qeth_set_offline(struct qeth_card *card, const struct qeth_discipline *disc,
+ bool resetting)
{
int rc, rc2, rc3;
- mutex_lock(&card->discipline_mutex);
mutex_lock(&card->conf_mutex);
QETH_CARD_TEXT(card, 3, "setoffl");
@@ -5581,7 +5579,7 @@ int qeth_set_offline(struct qeth_card *card, bool resetting)
cancel_work_sync(&card->rx_mode_work);
- card->discipline->set_offline(card);
+ disc->set_offline(card);
qeth_qdio_clear_card(card, 0);
qeth_drain_output_queues(card);
@@ -5602,16 +5600,19 @@ int qeth_set_offline(struct qeth_card *card, bool resetting)
kobject_uevent(&card->gdev->dev.kobj, KOBJ_CHANGE);
mutex_unlock(&card->conf_mutex);
- mutex_unlock(&card->discipline_mutex);
return 0;
}
EXPORT_SYMBOL_GPL(qeth_set_offline);
static int qeth_do_reset(void *data)
{
+ const struct qeth_discipline *disc;
struct qeth_card *card = data;
int rc;
+ /* Lock-free, other users will block until we are done. */
+ disc = card->discipline;
+
QETH_CARD_TEXT(card, 2, "recover1");
if (!qeth_do_run_thread(card, QETH_RECOVER_THREAD))
return 0;
@@ -5619,8 +5620,8 @@ static int qeth_do_reset(void *data)
dev_warn(&card->gdev->dev,
"A recovery process has been started for the device\n");
- qeth_set_offline(card, true);
- rc = qeth_set_online(card);
+ qeth_set_offline(card, disc, true);
+ rc = qeth_set_online(card, disc);
if (!rc) {
dev_info(&card->gdev->dev,
"Device successfully recovered!\n");
@@ -6584,6 +6585,7 @@ static int qeth_core_probe_device(struct ccwgroup_device *gdev)
break;
default:
card->info.layer_enforced = true;
+ /* It's so early that we don't need the discipline_mutex yet. */
rc = qeth_core_load_discipline(card, enforced_disc);
if (rc)
goto err_load;
@@ -6616,10 +6618,12 @@ static void qeth_core_remove_device(struct ccwgroup_device *gdev)
QETH_CARD_TEXT(card, 2, "removedv");
+ mutex_lock(&card->discipline_mutex);
if (card->discipline) {
card->discipline->remove(gdev);
qeth_core_free_discipline(card);
}
+ mutex_unlock(&card->discipline_mutex);
qeth_free_qdio_queues(card);
@@ -6634,6 +6638,7 @@ static int qeth_core_set_online(struct ccwgroup_device *gdev)
int rc = 0;
enum qeth_discipline_id def_discipline;
+ mutex_lock(&card->discipline_mutex);
if (!card->discipline) {
def_discipline = IS_IQD(card) ? QETH_DISCIPLINE_LAYER3 :
QETH_DISCIPLINE_LAYER2;
@@ -6647,16 +6652,23 @@ static int qeth_core_set_online(struct ccwgroup_device *gdev)
}
}
- rc = qeth_set_online(card);
+ rc = qeth_set_online(card, card->discipline);
+
err:
+ mutex_unlock(&card->discipline_mutex);
return rc;
}
static int qeth_core_set_offline(struct ccwgroup_device *gdev)
{
struct qeth_card *card = dev_get_drvdata(&gdev->dev);
+ int rc;
- return qeth_set_offline(card, false);
+ mutex_lock(&card->discipline_mutex);
+ rc = qeth_set_offline(card, card->discipline, false);
+ mutex_unlock(&card->discipline_mutex);
+
+ return rc;
}
static void qeth_core_shutdown(struct ccwgroup_device *gdev)
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 4ed0fb0705a5..4254caf1d9b6 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -2208,7 +2208,7 @@ static void qeth_l2_remove_device(struct ccwgroup_device *gdev)
wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0);
if (gdev->state == CCWGROUP_ONLINE)
- qeth_set_offline(card, false);
+ qeth_set_offline(card, card->discipline, false);
cancel_work_sync(&card->close_dev_work);
if (card->dev->reg_state == NETREG_REGISTERED)
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index d138ac432d01..4c2cae7ae9a7 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -1813,7 +1813,7 @@ static netdev_features_t qeth_l3_osa_features_check(struct sk_buff *skb,
struct net_device *dev,
netdev_features_t features)
{
- if (qeth_get_ip_version(skb) != 4)
+ if (vlan_get_protocol(skb) != htons(ETH_P_IP))
features &= ~NETIF_F_HW_VLAN_CTAG_TX;
return qeth_features_check(skb, dev, features);
}
@@ -1971,7 +1971,7 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev)
wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0);
if (cgdev->state == CCWGROUP_ONLINE)
- qeth_set_offline(card, false);
+ qeth_set_offline(card, card->discipline, false);
cancel_work_sync(&card->close_dev_work);
if (card->dev->reg_state == NETREG_REGISTERED)
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index 3efe38191d1c..175b7b42bfc4 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -159,6 +159,7 @@ again:
}
/* set info->task and info->tid */
+ info->task = curr_task;
if (curr_tid == info->tid) {
curr_fd = info->fd;
} else {
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 89fff5f59eea..2ed0b01f72f0 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -302,7 +302,7 @@ static int __ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *
if (skb_is_gso(skb))
return ip_finish_output_gso(net, sk, skb, mtu);
- if (skb->len > mtu || (IPCB(skb)->flags & IPSKB_FRAG_PMTU))
+ if (skb->len > mtu || IPCB(skb)->frag_max_size)
return ip_fragment(net, sk, skb, mtu, ip_finish_output2);
return ip_finish_output2(net, sk, skb);
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index ee65c9225178..64594aa755f0 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -759,8 +759,11 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
goto tx_error;
}
- if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph,
- 0, 0, false)) {
+ df = tnl_params->frag_off;
+ if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
+ df |= (inner_iph->frag_off & htons(IP_DF));
+
+ if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, 0, 0, false)) {
ip_rt_put(rt);
goto tx_error;
}
@@ -788,10 +791,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
ttl = ip4_dst_hoplimit(&rt->dst);
}
- df = tnl_params->frag_off;
- if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
- df |= (inner_iph->frag_off&htons(IP_DF));
-
max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
+ rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
if (max_headroom > dev->needed_headroom)
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 5e1b22d4f939..e53e43aef785 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -627,7 +627,7 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[],
for (i = NHA_GROUP_TYPE + 1; i < __NHA_MAX; ++i) {
if (!tb[i])
continue;
- if (tb[NHA_FDB])
+ if (i == NHA_FDB)
continue;
NL_SET_ERR_MSG(extack,
"No other attributes can be set in nexthop groups");
@@ -1459,8 +1459,10 @@ static struct nexthop *nexthop_create_group(struct net *net,
return nh;
out_no_nh:
- for (; i >= 0; --i)
+ for (i--; i >= 0; --i) {
+ list_del(&nhg->nh_entries[i].nh_list);
nexthop_put(nhg->nh_entries[i].nh);
+ }
kfree(nhg->spare);
kfree(nhg);
diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c
index 3fae61ef6339..ff3aa0cf3997 100644
--- a/tools/bpf/bpftool/net.c
+++ b/tools/bpf/bpftool/net.c
@@ -11,7 +11,6 @@
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
#include <net/if.h>
-#include <linux/if.h>
#include <linux/rtnetlink.h>
#include <linux/socket.h>
#include <linux/tc_act/tc_bpf.h>
diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c
index e3ea569ee125..7409d7860aa6 100644
--- a/tools/bpf/resolve_btfids/main.c
+++ b/tools/bpf/resolve_btfids/main.c
@@ -139,6 +139,8 @@ int eprintf(int level, int var, const char *fmt, ...)
#define pr_debug2(fmt, ...) pr_debugN(2, pr_fmt(fmt), ##__VA_ARGS__)
#define pr_err(fmt, ...) \
eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_info(fmt, ...) \
+ eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__)
static bool is_btf_id(const char *name)
{
@@ -472,7 +474,7 @@ static int symbols_resolve(struct object *obj)
int nr_funcs = obj->nr_funcs;
int err, type_id;
struct btf *btf;
- __u32 nr;
+ __u32 nr_types;
btf = btf__parse(obj->btf ?: obj->path, NULL);
err = libbpf_get_error(btf);
@@ -483,12 +485,12 @@ static int symbols_resolve(struct object *obj)
}
err = -1;
- nr = btf__get_nr_types(btf);
+ nr_types = btf__get_nr_types(btf);
/*
* Iterate all the BTF types and search for collected symbol IDs.
*/
- for (type_id = 1; type_id <= nr; type_id++) {
+ for (type_id = 1; type_id <= nr_types; type_id++) {
const struct btf_type *type;
struct rb_root *root;
struct btf_id *id;
@@ -526,8 +528,13 @@ static int symbols_resolve(struct object *obj)
id = btf_id__find(root, str);
if (id) {
- id->id = type_id;
- (*nr)--;
+ if (id->id) {
+ pr_info("WARN: multiple IDs found for '%s': %d, %d - using %d\n",
+ str, id->id, type_id, id->id);
+ } else {
+ id->id = type_id;
+ (*nr)--;
+ }
}
}
diff --git a/tools/testing/selftests/bpf/progs/bprm_opts.c b/tools/testing/selftests/bpf/progs/bprm_opts.c
index 5bfef2887e70..418d9c6d4952 100644
--- a/tools/testing/selftests/bpf/progs/bprm_opts.c
+++ b/tools/testing/selftests/bpf/progs/bprm_opts.c
@@ -4,7 +4,7 @@
* Copyright 2020 Google LLC.
*/
-#include "vmlinux.h"
+#include <linux/bpf.h>
#include <errno.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index eb693a3b7b4a..4c7d33618437 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -869,7 +869,7 @@ ipv6_torture()
pid3=$!
ip netns exec me ping -f 2001:db8:101::2 >/dev/null 2>&1 &
pid4=$!
- ip netns exec me mausezahn veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
+ ip netns exec me mausezahn -6 veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
pid5=$!
sleep 300
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
index a374e10ef506..3006a8e5b41a 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -4,7 +4,8 @@
TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \
conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \
nft_concat_range.sh nft_conntrack_helper.sh \
- nft_queue.sh nft_meta.sh
+ nft_queue.sh nft_meta.sh \
+ ipip-conntrack-mtu.sh
LDLIBS = -lmnl
TEST_GEN_FILES = nf-queue
diff --git a/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh b/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh
new file mode 100755
index 000000000000..4a6f5c3b3215
--- /dev/null
+++ b/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh
@@ -0,0 +1,206 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# Conntrack needs to reassemble fragments in order to have complete
+# packets for rule matching. Reassembly can lead to packet loss.
+
+# Consider the following setup:
+# +--------+ +---------+ +--------+
+# |Router A|-------|Wanrouter|-------|Router B|
+# | |.IPIP..| |..IPIP.| |
+# +--------+ +---------+ +--------+
+# / mtu 1400 \
+# / \
+#+--------+ +--------+
+#|Client A| |Client B|
+#| | | |
+#+--------+ +--------+
+
+# Router A and Router B use IPIP tunnel interfaces to tunnel traffic
+# between Client A and Client B over WAN. Wanrouter has MTU 1400 set
+# on its interfaces.
+
+rnd=$(mktemp -u XXXXXXXX)
+rx=$(mktemp)
+
+r_a="ns-ra-$rnd"
+r_b="ns-rb-$rnd"
+r_w="ns-rw-$rnd"
+c_a="ns-ca-$rnd"
+c_b="ns-cb-$rnd"
+
+checktool (){
+ if ! $1 > /dev/null 2>&1; then
+ echo "SKIP: Could not $2"
+ exit $ksft_skip
+ fi
+}
+
+checktool "iptables --version" "run test without iptables"
+checktool "ip -Version" "run test without ip tool"
+checktool "which nc" "run test without nc (netcat)"
+checktool "ip netns add ${r_a}" "create net namespace"
+
+for n in ${r_b} ${r_w} ${c_a} ${c_b};do
+ ip netns add ${n}
+done
+
+cleanup() {
+ for n in ${r_a} ${r_b} ${r_w} ${c_a} ${c_b};do
+ ip netns del ${n}
+ done
+ rm -f ${rx}
+}
+
+trap cleanup EXIT
+
+test_path() {
+ msg="$1"
+
+ ip netns exec ${c_b} nc -n -w 3 -q 3 -u -l -p 5000 > ${rx} < /dev/null &
+
+ sleep 1
+ for i in 1 2 3; do
+ head -c1400 /dev/zero | tr "\000" "a" | ip netns exec ${c_a} nc -n -w 1 -u 192.168.20.2 5000
+ done
+
+ wait
+
+ bytes=$(wc -c < ${rx})
+
+ if [ $bytes -eq 1400 ];then
+ echo "OK: PMTU $msg connection tracking"
+ else
+ echo "FAIL: PMTU $msg connection tracking: got $bytes, expected 1400"
+ exit 1
+ fi
+}
+
+# Detailed setup for Router A
+# ---------------------------
+# Interfaces:
+# eth0: 10.2.2.1/24
+# eth1: 192.168.10.1/24
+# ipip0: No IP address, local 10.2.2.1 remote 10.4.4.1
+# Routes:
+# 192.168.20.0/24 dev ipip0 (192.168.20.0/24 is subnet of Client B)
+# 10.4.4.1 via 10.2.2.254 (Router B via Wanrouter)
+# No iptables rules at all.
+
+ip link add veth0 netns ${r_a} type veth peer name veth0 netns ${r_w}
+ip link add veth1 netns ${r_a} type veth peer name veth0 netns ${c_a}
+
+l_addr="10.2.2.1"
+r_addr="10.4.4.1"
+ip netns exec ${r_a} ip link add ipip0 type ipip local ${l_addr} remote ${r_addr} mode ipip || exit $ksft_skip
+
+for dev in lo veth0 veth1 ipip0; do
+ ip -net ${r_a} link set $dev up
+done
+
+ip -net ${r_a} addr add 10.2.2.1/24 dev veth0
+ip -net ${r_a} addr add 192.168.10.1/24 dev veth1
+
+ip -net ${r_a} route add 192.168.20.0/24 dev ipip0
+ip -net ${r_a} route add 10.4.4.0/24 via 10.2.2.254
+
+ip netns exec ${r_a} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
+
+# Detailed setup for Router B
+# ---------------------------
+# Interfaces:
+# eth0: 10.4.4.1/24
+# eth1: 192.168.20.1/24
+# ipip0: No IP address, local 10.4.4.1 remote 10.2.2.1
+# Routes:
+# 192.168.10.0/24 dev ipip0 (192.168.10.0/24 is subnet of Client A)
+# 10.2.2.1 via 10.4.4.254 (Router A via Wanrouter)
+# No iptables rules at all.
+
+ip link add veth0 netns ${r_b} type veth peer name veth1 netns ${r_w}
+ip link add veth1 netns ${r_b} type veth peer name veth0 netns ${c_b}
+
+l_addr="10.4.4.1"
+r_addr="10.2.2.1"
+
+ip netns exec ${r_b} ip link add ipip0 type ipip local ${l_addr} remote ${r_addr} mode ipip || exit $ksft_skip
+
+for dev in lo veth0 veth1 ipip0; do
+ ip -net ${r_b} link set $dev up
+done
+
+ip -net ${r_b} addr add 10.4.4.1/24 dev veth0
+ip -net ${r_b} addr add 192.168.20.1/24 dev veth1
+
+ip -net ${r_b} route add 192.168.10.0/24 dev ipip0
+ip -net ${r_b} route add 10.2.2.0/24 via 10.4.4.254
+ip netns exec ${r_b} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
+
+# Client A
+ip -net ${c_a} addr add 192.168.10.2/24 dev veth0
+ip -net ${c_a} link set dev lo up
+ip -net ${c_a} link set dev veth0 up
+ip -net ${c_a} route add default via 192.168.10.1
+
+# Client A
+ip -net ${c_b} addr add 192.168.20.2/24 dev veth0
+ip -net ${c_b} link set dev veth0 up
+ip -net ${c_b} link set dev lo up
+ip -net ${c_b} route add default via 192.168.20.1
+
+# Wan
+ip -net ${r_w} addr add 10.2.2.254/24 dev veth0
+ip -net ${r_w} addr add 10.4.4.254/24 dev veth1
+
+ip -net ${r_w} link set dev lo up
+ip -net ${r_w} link set dev veth0 up mtu 1400
+ip -net ${r_w} link set dev veth1 up mtu 1400
+
+ip -net ${r_a} link set dev veth0 mtu 1400
+ip -net ${r_b} link set dev veth0 mtu 1400
+
+ip netns exec ${r_w} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
+
+# Path MTU discovery
+# ------------------
+# Running tracepath from Client A to Client B shows PMTU discovery is working
+# as expected:
+#
+# clienta:~# tracepath 192.168.20.2
+# 1?: [LOCALHOST] pmtu 1500
+# 1: 192.168.10.1 0.867ms
+# 1: 192.168.10.1 0.302ms
+# 2: 192.168.10.1 0.312ms pmtu 1480
+# 2: no reply
+# 3: 192.168.10.1 0.510ms pmtu 1380
+# 3: 192.168.20.2 2.320ms reached
+# Resume: pmtu 1380 hops 3 back 3
+
+# ip netns exec ${c_a} traceroute --mtu 192.168.20.2
+
+# Router A has learned PMTU (1400) to Router B from Wanrouter.
+# Client A has learned PMTU (1400 - IPIP overhead = 1380) to Client B
+# from Router A.
+
+#Send large UDP packet
+#---------------------
+#Now we send a 1400 bytes UDP packet from Client A to Client B:
+
+# clienta:~# head -c1400 /dev/zero | tr "\000" "a" | nc -u 192.168.20.2 5000
+test_path "without"
+
+# The IPv4 stack on Client A already knows the PMTU to Client B, so the
+# UDP packet is sent as two fragments (1380 + 20). Router A forwards the
+# fragments between eth1 and ipip0. The fragments fit into the tunnel and
+# reach their destination.
+
+#When sending the large UDP packet again, Router A now reassembles the
+#fragments before routing the packet over ipip0. The resulting IPIP
+#packet is too big (1400) for the tunnel PMTU (1380) to Router B, it is
+#dropped on Router A before sending.
+
+ip netns exec ${r_a} iptables -A FORWARD -m conntrack --ctstate NEW
+test_path "with"