diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-03-17 13:31:16 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-03-17 13:31:16 -0700 |
commit | 478a351ce0d69cef2d2bf2a686a09b356b63a66c (patch) | |
tree | 42a6b550a0ae321799fc81f68e6548c532283ccc /drivers/net/virtio_net.c | |
parent | 8d3c682a5e3d9dfc2448ecbb22f4cd48359b9e21 (diff) | |
parent | f5e305e63b035a1782a666a6535765f80bb2dca3 (diff) |
Merge tag 'net-6.3-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Jakub Kicinski:
"Including fixes from netfilter, wifi and ipsec.
A little more changes than usual, but it's pretty normal for us that
the rc3/rc4 PRs are oversized as people start testing in earnest.
Possibly an extra boost from people deploying the 6.1 LTS but that's
more of an unscientific hunch.
Current release - regressions:
- phy: mscc: fix deadlock in phy_ethtool_{get,set}_wol()
- virtio: vsock: don't use skbuff state to account credit
- virtio: vsock: don't drop skbuff on copy failure
- virtio_net: fix page_to_skb() miscalculating the memory size
Current release - new code bugs:
- eth: correct xdp_features after device reconfig
- wifi: nl80211: fix the puncturing bitmap policy
- net/mlx5e: flower:
- fix raw counter initialization
- fix missing error code
- fix cloned flow attribute
- ipa:
- fix some register validity checks
- fix a surprising number of bad offsets
- kill FILT_ROUT_CACHE_CFG IPA register
Previous releases - regressions:
- tcp: fix bind() conflict check for dual-stack wildcard address
- veth: fix use after free in XDP_REDIRECT when skb headroom is small
- ipv4: fix incorrect table ID in IOCTL path
- ipvlan: make skb->skb_iif track skb->dev for l3s mode
- mptcp:
- fix possible deadlock in subflow_error_report
- fix UaFs when destroying unaccepted and listening sockets
- dsa: mv88e6xxx: fix max_mtu of 1492 on 6165, 6191, 6220, 6250, 6290
Previous releases - always broken:
- tcp: tcp_make_synack() can be called from process context, don't
assume preemption is disabled when updating stats
- netfilter: correct length for loading protocol registers
- virtio_net: add checking sq is full inside xdp xmit
- bonding: restore IFF_MASTER/SLAVE flags on bond enslave Ethertype
change
- phy: nxp-c45-tja11xx: fix MII_BASIC_CONFIG_REV bit number
- eth: i40e: fix crash during reboot when adapter is in recovery mode
- eth: ice: avoid deadlock on rtnl lock when auxiliary device
plug/unplug meets bonding
- dsa: mt7530:
- remove now incorrect comment regarding port 5
- set PLL frequency and trgmii only when trgmii is used
- eth: mtk_eth_soc: reset PCS state when changing interface types
Misc:
- ynl: another license adjustment
- move the TCA_EXT_WARN_MSG attribute for tc action"
* tag 'net-6.3-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (108 commits)
selftests: bonding: add tests for ether type changes
bonding: restore bond's IFF_SLAVE flag if a non-eth dev enslave fails
bonding: restore IFF_MASTER/SLAVE flags on bond enslave ether type change
net: renesas: rswitch: Fix GWTSDIE register handling
net: renesas: rswitch: Fix the output value of quote from rswitch_rx()
ethernet: sun: add check for the mdesc_grab()
net: ipa: fix some register validity checks
net: ipa: kill FILT_ROUT_CACHE_CFG IPA register
net: ipa: add two missing declarations
net: ipa: reg: include <linux/bug.h>
net: xdp: don't call notifiers during driver init
net/sched: act_api: add specific EXT_WARN_MSG for tc action
Revert "net/sched: act_api: move TCA_EXT_WARN_MSG to the correct hierarchy"
net: dsa: microchip: fix RGMII delay configuration on KSZ8765/KSZ8794/KSZ8795
ynl: make the tooling check the license
ynl: broaden the license even more
tools: ynl: make definitions optional again
hsr: ratelimit only when errors are printed
qed/qed_mng_tlv: correctly zero out ->min instead of ->hour
selftests: net: devlink_port_split.py: skip test if no suitable device available
...
Diffstat (limited to 'drivers/net/virtio_net.c')
-rw-r--r-- | drivers/net/virtio_net.c | 171 |
1 files changed, 95 insertions, 76 deletions
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index fb5e68ed3ec2..2396c28c0122 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -446,7 +446,8 @@ static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx) static struct sk_buff *page_to_skb(struct virtnet_info *vi, struct receive_queue *rq, struct page *page, unsigned int offset, - unsigned int len, unsigned int truesize) + unsigned int len, unsigned int truesize, + unsigned int headroom) { struct sk_buff *skb; struct virtio_net_hdr_mrg_rxbuf *hdr; @@ -464,11 +465,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, else hdr_padded_len = sizeof(struct padded_vnet_hdr); - buf = p; + buf = p - headroom; len -= hdr_len; offset += hdr_padded_len; p += hdr_padded_len; - tailroom = truesize - hdr_padded_len - len; + tailroom = truesize - headroom - hdr_padded_len - len; shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); @@ -545,6 +546,87 @@ ok: return skb; } +static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi) +{ + unsigned int len; + unsigned int packets = 0; + unsigned int bytes = 0; + void *ptr; + + while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { + if (likely(!is_xdp_frame(ptr))) { + struct sk_buff *skb = ptr; + + pr_debug("Sent skb %p\n", skb); + + bytes += skb->len; + napi_consume_skb(skb, in_napi); + } else { + struct xdp_frame *frame = ptr_to_xdp(ptr); + + bytes += xdp_get_frame_len(frame); + xdp_return_frame(frame); + } + packets++; + } + + /* Avoid overhead when no packets have been processed + * happens when called speculatively from start_xmit. + */ + if (!packets) + return; + + u64_stats_update_begin(&sq->stats.syncp); + sq->stats.bytes += bytes; + sq->stats.packets += packets; + u64_stats_update_end(&sq->stats.syncp); +} + +static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) +{ + if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) + return false; + else if (q < vi->curr_queue_pairs) + return true; + else + return false; +} + +static void check_sq_full_and_disable(struct virtnet_info *vi, + struct net_device *dev, + struct send_queue *sq) +{ + bool use_napi = sq->napi.weight; + int qnum; + + qnum = sq - vi->sq; + + /* If running out of space, stop queue to avoid getting packets that we + * are then unable to transmit. + * An alternative would be to force queuing layer to requeue the skb by + * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be + * returned in a normal path of operation: it means that driver is not + * maintaining the TX queue stop/start state properly, and causes + * the stack to do a non-trivial amount of useless work. + * Since most packets only take 1 or 2 ring slots, stopping the queue + * early means 16 slots are typically wasted. + */ + if (sq->vq->num_free < 2+MAX_SKB_FRAGS) { + netif_stop_subqueue(dev, qnum); + if (use_napi) { + if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) + virtqueue_napi_schedule(&sq->napi, sq->vq); + } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { + /* More just got used, free them then recheck. */ + free_old_xmit_skbs(sq, false); + if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { + netif_start_subqueue(dev, qnum); + virtqueue_disable_cb(sq->vq); + } + } + } +} + static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, struct send_queue *sq, struct xdp_frame *xdpf) @@ -686,6 +768,9 @@ static int virtnet_xdp_xmit(struct net_device *dev, } ret = nxmit; + if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq)) + check_sq_full_and_disable(vi, dev, sq); + if (flags & XDP_XMIT_FLUSH) { if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) kicks = 1; @@ -925,7 +1010,7 @@ static struct sk_buff *receive_big(struct net_device *dev, { struct page *page = buf; struct sk_buff *skb = - page_to_skb(vi, rq, page, 0, len, PAGE_SIZE); + page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0); stats->bytes += len - vi->hdr_len; if (unlikely(!skb)) @@ -1188,9 +1273,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, switch (act) { case XDP_PASS: + head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz); + if (unlikely(!head_skb)) + goto err_xdp_frags; + if (unlikely(xdp_page != page)) put_page(page); - head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz); rcu_read_unlock(); return head_skb; case XDP_TX: @@ -1248,7 +1336,7 @@ err_xdp_frags: rcu_read_unlock(); skip_xdp: - head_skb = page_to_skb(vi, rq, page, offset, len, truesize); + head_skb = page_to_skb(vi, rq, page, offset, len, truesize, headroom); curr_skb = head_skb; if (unlikely(!curr_skb)) @@ -1714,52 +1802,6 @@ static int virtnet_receive(struct receive_queue *rq, int budget, return stats.packets; } -static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi) -{ - unsigned int len; - unsigned int packets = 0; - unsigned int bytes = 0; - void *ptr; - - while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { - if (likely(!is_xdp_frame(ptr))) { - struct sk_buff *skb = ptr; - - pr_debug("Sent skb %p\n", skb); - - bytes += skb->len; - napi_consume_skb(skb, in_napi); - } else { - struct xdp_frame *frame = ptr_to_xdp(ptr); - - bytes += xdp_get_frame_len(frame); - xdp_return_frame(frame); - } - packets++; - } - - /* Avoid overhead when no packets have been processed - * happens when called speculatively from start_xmit. - */ - if (!packets) - return; - - u64_stats_update_begin(&sq->stats.syncp); - sq->stats.bytes += bytes; - sq->stats.packets += packets; - u64_stats_update_end(&sq->stats.syncp); -} - -static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) -{ - if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) - return false; - else if (q < vi->curr_queue_pairs) - return true; - else - return false; -} - static void virtnet_poll_cleantx(struct receive_queue *rq) { struct virtnet_info *vi = rq->vq->vdev->priv; @@ -1989,30 +2031,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) nf_reset_ct(skb); } - /* If running out of space, stop queue to avoid getting packets that we - * are then unable to transmit. - * An alternative would be to force queuing layer to requeue the skb by - * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be - * returned in a normal path of operation: it means that driver is not - * maintaining the TX queue stop/start state properly, and causes - * the stack to do a non-trivial amount of useless work. - * Since most packets only take 1 or 2 ring slots, stopping the queue - * early means 16 slots are typically wasted. - */ - if (sq->vq->num_free < 2+MAX_SKB_FRAGS) { - netif_stop_subqueue(dev, qnum); - if (use_napi) { - if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) - virtqueue_napi_schedule(&sq->napi, sq->vq); - } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { - /* More just got used, free them then recheck. */ - free_old_xmit_skbs(sq, false); - if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { - netif_start_subqueue(dev, qnum); - virtqueue_disable_cb(sq->vq); - } - } - } + check_sq_full_and_disable(vi, dev, sq); if (kick || netif_xmit_stopped(txq)) { if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { |