diff options
author | David S. Miller <davem@davemloft.net> | 2019-11-15 12:25:42 -0800 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2019-11-15 12:25:42 -0800 |
commit | 43da44c87678612f301d6ef6314064a6b52aad60 (patch) | |
tree | dcf587aa085a7e71ef483f6beba17147163e2e64 | |
parent | 983db6198f0d0ee406f365069901387e7834c3c0 (diff) | |
parent | 8d07a7930434afd7ca4faa90ed478552d4a84b17 (diff) |
Merge branch 'net-stmmac-CPU-Performance-Improvements'
Jose Abreu says:
====================
net: stmmac: CPU Performance Improvements
CPU Performance improvements for stmmac. Please check bellow for results
before and after the series.
Patch 1/7, allows RX Interrupt on Completion to be disabled and only use the
RX HW Watchdog.
Patch 2/7, setups the default RX coalesce settings instead of using the
minimum value.
Patch 3/7 and 4/7, removes the uneeded computations for RX Flow Control
activation/de-activation, on some cases.
Patch 5/7, tunes-up the default coalesce settings.
Patch 6/7, re-works the TX coalesce timer activation logic.
Patch 7/7, removes the now uneeded TBU interrupt.
NetPerf UDP Results:
--------------------
Socket Message Elapsed Messages CPU Service
Size Size Time Okay Errors Throughput Util Demand
bytes bytes secs # # 10^6bits/sec % SS us/KB
--- XGMAC@2.5G: Before
212992 1400 10.00 2100620 0 2351.7 36.69 5.112
212992 10.00 2100539 2351.6 26.18 3.648
--- XGMAC@2.5G: After
212992 1400 10.00 2108972 0 2361.5 21.73 3.015
212992 10.00 2097038 2348.1 19.21 2.666
--- GMAC5@1G: Before
212992 1400 10.00 786000 0 880.2 34.71 12.923
212992 10.00 786000 880.2 23.42 8.719
--- GMAC5@1G: After
212992 1400 10.00 842648 0 943.7 14.12 4.903
212992 10.00 842648 943.7 12.73 4.418
Perf TCP Results on RX Path:
----------------------------
--- XGMAC@2.5G: Before
22.51% swapper [stmmac] [k] dwxgmac2_dma_interrupt
10.82% swapper [stmmac] [k] dwxgmac2_host_mtl_irq_status
5.21% swapper [stmmac] [k] dwxgmac2_host_irq_status
4.67% swapper [stmmac] [k] dwxgmac3_safety_feat_irq_status
3.63% swapper [kernel.kallsyms] [k] stack_trace_consume_entry
2.74% iperf3 [kernel.kallsyms] [k] copy_user_enhanced_fast_string
2.52% swapper [kernel.kallsyms] [k] update_stack_state
1.94% ksoftirqd/0 [stmmac] [k] dwxgmac2_dma_interrupt
1.45% iperf3 [kernel.kallsyms] [k] queued_spin_lock_slowpath
1.26% swapper [kernel.kallsyms] [k] create_object
--- XGMAC@2.5G: After
7.43% swapper [kernel.kallsyms] [k] stack_trace_consume_entry
5.86% swapper [stmmac] [k] dwxgmac2_dma_interrupt
5.68% swapper [kernel.kallsyms] [k] update_stack_state
4.71% iperf3 [kernel.kallsyms] [k] copy_user_enhanced_fast_string
2.88% swapper [kernel.kallsyms] [k] create_object
2.69% swapper [stmmac] [k] dwxgmac2_host_mtl_irq_status
2.61% swapper [stmmac] [k] stmmac_napi_poll_rx
2.52% swapper [kernel.kallsyms] [k] unwind_next_frame.part.4
1.48% swapper [kernel.kallsyms] [k] unwind_get_return_address
1.38% swapper [kernel.kallsyms] [k] arch_stack_walk
--- GMAC5@1G: Before
31.29% swapper [stmmac] [k] dwmac4_dma_interrupt
14.57% swapper [stmmac] [k] dwmac4_irq_mtl_status
10.66% swapper [stmmac] [k] dwmac4_irq_status
1.97% swapper [kernel.kallsyms] [k] stack_trace_consume_entry
1.73% iperf3 [kernel.kallsyms] [k] copy_user_enhanced_fast_string
1.59% swapper [kernel.kallsyms] [k] update_stack_state
1.15% iperf3 [kernel.kallsyms] [k] do_syscall_64
1.01% ksoftirqd/0 [stmmac] [k] dwmac4_dma_interrupt
0.89% swapper [kernel.kallsyms] [k] __default_send_IPI_dest_field
0.75% swapper [stmmac] [k] stmmac_napi_poll_rx
--- GMAC5@1G: After
6.70% swapper [kernel.kallsyms] [k] stack_trace_consume_entry
5.79% swapper [stmmac] [k] dwmac4_dma_interrupt
5.29% swapper [kernel.kallsyms] [k] update_stack_state
3.52% iperf3 [kernel.kallsyms] [k] copy_user_enhanced_fast_string
2.83% swapper [stmmac] [k] dwmac4_irq_mtl_status
2.62% swapper [kernel.kallsyms] [k] create_object
2.46% swapper [stmmac] [k] stmmac_napi_poll_rx
2.32% swapper [kernel.kallsyms] [k] unwind_next_frame.part.4
2.19% swapper [stmmac] [k] dwmac4_irq_status
1.39% swapper [kernel.kallsyms] [k] unwind_get_return_address
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/ethernet/stmicro/stmmac/common.h | 5 | ||||
-rw-r--r-- | drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c | 14 | ||||
-rw-r--r-- | drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c | 14 | ||||
-rw-r--r-- | drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 74 |
5 files changed, 59 insertions, 50 deletions
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 912bbb6515b2..b210e987a1db 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -248,12 +248,13 @@ struct stmmac_safety_stats { /* Max/Min RI Watchdog Timer count value */ #define MAX_DMA_RIWT 0xff #define MIN_DMA_RIWT 0x10 +#define DEF_DMA_RIWT 0xa0 /* Tx coalesce parameters */ #define STMMAC_COAL_TX_TIMER 1000 #define STMMAC_MAX_COAL_TX_TICK 100000 #define STMMAC_TX_MAX_FRAMES 256 -#define STMMAC_TX_FRAMES 1 -#define STMMAC_RX_FRAMES 25 +#define STMMAC_TX_FRAMES 25 +#define STMMAC_RX_FRAMES 0 /* Packets types */ enum packets_types { diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c index 36a0af8bf89f..c15409030710 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c @@ -252,19 +252,9 @@ static void dwmac4_dma_rx_chan_op_mode(void __iomem *ioaddr, int mode, rfa = 0x01; /* Full-1.5K */ break; - case 8192: - rfd = 0x06; /* Full-4K */ - rfa = 0x0a; /* Full-6K */ - break; - - case 16384: - rfd = 0x06; /* Full-4K */ - rfa = 0x12; /* Full-10K */ - break; - default: - rfd = 0x06; /* Full-4K */ - rfa = 0x1e; /* Full-16K */ + rfd = 0x07; /* Full-4.5K */ + rfa = 0x04; /* Full-3K */ break; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h index 99037386080a..e908d80a1d6f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h @@ -360,7 +360,7 @@ #define XGMAC_TBUE BIT(2) #define XGMAC_TIE BIT(0) #define XGMAC_DMA_INT_DEFAULT_EN (XGMAC_NIE | XGMAC_AIE | XGMAC_RBUE | \ - XGMAC_RIE | XGMAC_TBUE | XGMAC_TIE) + XGMAC_RIE | XGMAC_TIE) #define XGMAC_DMA_CH_Rx_WATCHDOG(x) (0x0000313c + (0x80 * (x))) #define XGMAC_RWT GENMASK(7, 0) #define XGMAC_DMA_CH_STATUS(x) (0x00003160 + (0x80 * (x))) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c index f148cb2061d8..22a7f0cc1b90 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c @@ -183,19 +183,9 @@ static void dwxgmac2_dma_rx_mode(void __iomem *ioaddr, int mode, rfa = 0x01; /* Full-1.5K */ break; - case 8192: - rfd = 0x06; /* Full-4K */ - rfa = 0x0a; /* Full-6K */ - break; - - case 16384: - rfd = 0x06; /* Full-4K */ - rfa = 0x12; /* Full-10K */ - break; - default: - rfd = 0x06; /* Full-4K */ - rfa = 0x1e; /* Full-16K */ + rfd = 0x07; /* Full-4.5K */ + rfa = 0x04; /* Full-3K */ break; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 39b4efd521f9..4ba250a9008f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2605,9 +2605,10 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp) priv->tx_lpi_timer = STMMAC_DEFAULT_TWT_LS; if (priv->use_riwt) { - ret = stmmac_rx_watchdog(priv, priv->ioaddr, MIN_DMA_RIWT, rx_cnt); - if (!ret) - priv->rx_riwt = MIN_DMA_RIWT; + if (!priv->rx_riwt) + priv->rx_riwt = DEF_DMA_RIWT; + + ret = stmmac_rx_watchdog(priv, priv->ioaddr, priv->rx_riwt, rx_cnt); } if (priv->hw->pcs) @@ -2915,16 +2916,17 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) struct stmmac_priv *priv = netdev_priv(dev); int nfrags = skb_shinfo(skb)->nr_frags; u32 queue = skb_get_queue_mapping(skb); + unsigned int first_entry, tx_packets; + int tmp_pay_len = 0, first_tx; struct stmmac_tx_queue *tx_q; - unsigned int first_entry; u8 proto_hdr_len, hdr; - int tmp_pay_len = 0; + bool has_vlan, set_ic; u32 pay_len, mss; dma_addr_t des; - bool has_vlan; int i; tx_q = &priv->tx_queue[queue]; + first_tx = tx_q->cur_tx; /* Compute header lengths */ if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) { @@ -3032,16 +3034,27 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) tx_q->tx_skbuff[tx_q->cur_tx] = skb; /* Manage tx mitigation */ - tx_q->tx_count_frames += nfrags + 1; - if (likely(priv->tx_coal_frames > tx_q->tx_count_frames) && - !((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && - priv->hwts_tx_en)) { - stmmac_tx_timer_arm(priv, queue); - } else { + tx_packets = (tx_q->cur_tx + 1) - first_tx; + tx_q->tx_count_frames += tx_packets; + + if ((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && priv->hwts_tx_en) + set_ic = true; + else if (!priv->tx_coal_frames) + set_ic = false; + else if (tx_packets > priv->tx_coal_frames) + set_ic = true; + else if ((tx_q->tx_count_frames % priv->tx_coal_frames) < tx_packets) + set_ic = true; + else + set_ic = false; + + if (set_ic) { desc = &tx_q->dma_tx[tx_q->cur_tx]; tx_q->tx_count_frames = 0; stmmac_set_tx_ic(priv, desc); priv->xstats.tx_set_ic_bit++; + } else { + stmmac_tx_timer_arm(priv, queue); } /* We've used all descriptors we need for this skb, however, @@ -3132,6 +3145,7 @@ dma_map_err: */ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) { + unsigned int first_entry, tx_packets, enh_desc; struct stmmac_priv *priv = netdev_priv(dev); unsigned int nopaged_len = skb_headlen(skb); int i, csum_insertion = 0, is_jumbo = 0; @@ -3140,13 +3154,12 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) int gso = skb_shinfo(skb)->gso_type; struct dma_desc *desc, *first; struct stmmac_tx_queue *tx_q; - unsigned int first_entry; - unsigned int enh_desc; + bool has_vlan, set_ic; + int entry, first_tx; dma_addr_t des; - bool has_vlan; - int entry; tx_q = &priv->tx_queue[queue]; + first_tx = tx_q->cur_tx; if (priv->tx_path_in_lpi_mode) stmmac_disable_eee_mode(priv); @@ -3240,12 +3253,21 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) * This approach takes care about the fragments: desc is the first * element in case of no SG. */ - tx_q->tx_count_frames += nfrags + 1; - if (likely(priv->tx_coal_frames > tx_q->tx_count_frames) && - !((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && - priv->hwts_tx_en)) { - stmmac_tx_timer_arm(priv, queue); - } else { + tx_packets = (entry + 1) - first_tx; + tx_q->tx_count_frames += tx_packets; + + if ((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && priv->hwts_tx_en) + set_ic = true; + else if (!priv->tx_coal_frames) + set_ic = false; + else if (tx_packets > priv->tx_coal_frames) + set_ic = true; + else if ((tx_q->tx_count_frames % priv->tx_coal_frames) < tx_packets) + set_ic = true; + else + set_ic = false; + + if (set_ic) { if (likely(priv->extend_desc)) desc = &tx_q->dma_etx[entry].basic; else @@ -3254,6 +3276,8 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) tx_q->tx_count_frames = 0; stmmac_set_tx_ic(priv, desc); priv->xstats.tx_set_ic_bit++; + } else { + stmmac_tx_timer_arm(priv, queue); } /* We've used all descriptors we need for this skb, however, @@ -3440,7 +3464,11 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue) rx_q->rx_count_frames += priv->rx_coal_frames; if (rx_q->rx_count_frames > priv->rx_coal_frames) rx_q->rx_count_frames = 0; - use_rx_wd = priv->use_riwt && rx_q->rx_count_frames; + + use_rx_wd = !priv->rx_coal_frames; + use_rx_wd |= rx_q->rx_count_frames > 0; + if (!priv->use_riwt) + use_rx_wd = false; dma_wmb(); stmmac_set_rx_owner(priv, p, use_rx_wd); |